From 398601f8859d1d9c627897fad5bb4116c2f00942 Mon Sep 17 00:00:00 2001 From: catlog22 Date: Wed, 18 Mar 2026 14:42:09 +0800 Subject: [PATCH] refactor: remove smart_search bridge, add codexlens MCP template, delete codex-lens v1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Delete smart-search.ts (3476 lines) and codex-lens.ts stub — the CCW bridge that wrapped the codexlens-search CLI is gone entirely - Remove executeToolWithProgress and all smart_search registrations from tools/index.ts and mcp-server/index.ts - Replace checkSemanticStatus() calls in core-memory-routes with inline { available: false } — v1 bridge no longer provides this - Inline no-op stubs in smart-context.ts to replace codex-lens imports - Seed built-in 'codexlens' MCP template at server startup via seedBuiltinTemplates() in mcp-routes.ts; uses uvx --from codexlens-search[mcp] codexlens-mcp so users install via uv - Remove smart_search from all default enabled-tools strings (backend mcp-routes, mcp-server DEFAULT_TOOLS, frontend api.ts, mcp install helpers) and CCW_MCP_TOOLS UI list - Delete frontend pages/hooks/components: CodexLensManagerPage, useV2SearchManager, useIndex, IndexManager; remove routes, sidebar entry, and all re-exports - Remove index status display section from WorkflowTaskWidget - Delete four smart-search test files; update mcp-server.test.js and e2e/mcp-tools.e2e.test.ts to remove smart_search assertions - Delete codex-lens/ source directory (v1 Python monolith, ~75 files) — no longer imported or subprocess-called by CCW Net: ~11 000 lines removed, +30 lines for template seeding Co-Authored-By: Claude Sonnet 4.6 --- codex-lens/.coverage | Bin 53248 -> 0 bytes codex-lens/.env.example | 71 - codex-lens/.github/workflows/security.yml | 70 - .../codexlens-home/settings.json | 1 - .../frontend/app.ts | 1 - .../frontend/bundle.min.js | 1 - .../frontend/dist/compiled.ts | 1 - .../frontend/dist/bundle.ts | 1 - .../frontend/src/app.ts | 1 - .../.next/generated.py | 1 - .../.parcel-cache/generated.py | 1 - .../.turbo/generated.py | 1 - .../build/generated.py | 1 - .../coverage/generated.py | 1 - .../dist/generated.py | 1 - .../out/generated.py | 1 - .../src/app.py | 1 - .../target/generated.py | 1 - .../frontend/app.ts | 1 - .../frontend/bundle.min.js | 1 - .../frontend/skip.ts | 1 - .../settings.json | 1 - .../package/dist/bundle.py | 1 - codex-lens/ASSOCIATION_TREE_IMPLEMENTATION.md | 240 - codex-lens/CHAIN_SEARCH_IMPLEMENTATION.md | 245 - codex-lens/CHANGELOG.md | 41 - codex-lens/DEPENDENCIES.md | 38 - codex-lens/LICENSE | 21 - codex-lens/README.md | 109 - codex-lens/SEMANTIC_SEARCH_USAGE.md | 83 - .../accuracy_queries_ccw_smart_search.jsonl | 16 - .../accuracy_queries_codexlens.jsonl | 33 - codex-lens/benchmarks/analyze_methods.py | 245 - .../binary_search_microbenchmark.py | 209 - codex-lens/benchmarks/cascade_benchmark.py | 402 -- .../benchmarks/compare_accuracy_labeled.py | 365 -- .../compare_ccw_smart_search_stage2.py | 980 ---- .../benchmarks/compare_semantic_methods.py | 405 -- ...compare_staged_realtime_vs_dense_rerank.py | 393 -- .../benchmarks/compare_staged_stage2_modes.py | 391 -- .../method_contribution_analysis.py | 527 -- .../accuracy_2026-02-11_codexlens.json | 1308 ----- ...racy_2026-02-11_codexlens_precomputed.json | 1335 ----- .../benchmarks/results/cascade_benchmark.json | 277 - .../results/ccw_smart_search_stage2.json | 1704 ------ ..._smart_search_stage2_sample4_20260314.json | 526 -- ...h_stage2_smoke1_cpu_reranker_20260314.json | 415 -- .../results/compare_2026-02-09.json | 453 -- .../compare_2026-02-09_dir_rr_fast4.json | 356 -- .../compare_2026-02-09_dir_rr_fast5.json | 466 -- .../compare_2026-02-09_dir_rr_fast6.json | 467 -- .../compare_2026-02-09_keepalive3.json | 171 - .../compare_2026-02-09_keepalive3b.json | 171 - .../results/compare_2026-02-09_run2.json | 453 -- .../compare_2026-02-09_score_fast3.json | 208 - .../compare_2026-02-09_score_fast4.json | 356 -- .../compare_2026-02-09_score_fast5.json | 462 -- .../compare_2026-02-09_score_fast6.json | 465 -- .../compare_2026-02-09_score_fast7.json | 465 -- .../compare_2026-02-10_dir_rr_fast7.json | 467 -- .../compare_2026-02-10_path_fast7.json | 465 -- .../compare_2026-02-10_score_fast7.json | 465 -- .../compare_2026-02-11_dir_rr_fast7.json | 467 -- .../compare_2026-02-11_path_fast7.json | 465 -- .../compare_2026-02-11_score_fast7.json | 465 -- .../results/method_contribution_analysis.json | 406 -- .../benchmarks/results/tmp_compare1.json | 73 - .../results/tmp_compare3_ok_cpu.json | 177 - .../results/tmp_compare3_ok_cpu_dedup.json | 176 - codex-lens/coir_benchmark_full.py | 465 -- codex-lens/debug_semantic_search.py | 318 -- codex-lens/debug_semantic_v2.py | 276 - .../dist/codex_lens-0.1.0-py3-none-any.whl | Bin 378480 -> 0 bytes codex-lens/dist/codex_lens-0.1.0.tar.gz | Bin 466437 -> 0 bytes codex-lens/docs/CHAIN_SEARCH_QUICKREF.md | 171 - codex-lens/docs/CODEXLENS_LSP_API_SPEC.md | 676 --- codex-lens/docs/CODEX_LENS_AUTO_HYBRID.md | 326 -- codex-lens/docs/CONFIGURATION.md | 298 - codex-lens/docs/DESIGN_EVALUATION_REPORT.md | 1010 ---- codex-lens/docs/HYBRID_SEARCH_ARCHITECTURE.md | 540 -- codex-lens/docs/IMPLEMENTATION_SUMMARY.md | 363 -- codex-lens/docs/LLM_REMOVAL_SUMMARY.md | 342 -- codex-lens/docs/LSP_INTEGRATION_CHECKLIST.md | 316 -- codex-lens/docs/LSP_INTEGRATION_PLAN.md | 2588 --------- codex-lens/docs/MCP_ENDPOINT_DESIGN.md | 284 - codex-lens/docs/MIGRATION_005_SUMMARY.md | 220 - codex-lens/docs/MULTILEVEL_CHUNKER_DESIGN.md | 973 ---- codex-lens/docs/PURE_VECTOR_SEARCH_GUIDE.md | 417 -- codex-lens/docs/REAL_LSP_SERVER_PLAN.md | 825 --- codex-lens/docs/SEARCH_ANALYSIS_SUMMARY.md | 192 - codex-lens/docs/SEARCH_COMPARISON_ANALYSIS.md | 711 --- codex-lens/docs/SEMANTIC_GRAPH_DESIGN.md | 1113 ---- codex-lens/docs/T6-CLI-Integration-Summary.md | 248 - codex-lens/docs/codex_mcp.md | 459 -- codex-lens/docs/test-quality-enhancements.md | 187 - codex-lens/examples/association_tree_demo.py | 156 - codex-lens/examples/debug_uri_format.py | 40 - .../examples/search_comparison_benchmark.py | 326 -- .../examples/simple_search_comparison.py | 123 - codex-lens/examples/test_lsp_capabilities.py | 79 - codex-lens/examples/test_lsp_references.py | 76 - codex-lens/examples/test_lsp_tree.py | 92 - codex-lens/examples/test_raw_lsp.py | 104 - codex-lens/examples/test_raw_response.py | 96 - .../examples/test_simple_call_hierarchy.py | 87 - codex-lens/examples/test_uri_consistency.py | 98 - codex-lens/examples/test_wait_for_analysis.py | 99 - codex-lens/lsp-servers.json | 88 - codex-lens/misleading_test.db | Bin 163840 -> 0 bytes codex-lens/pyproject.toml | 127 - codex-lens/requirements.in | 22 - .../scripts/bootstrap_reranker_local.py | 340 -- codex-lens/scripts/generate_embeddings.py | 278 - .../scripts/requirements-reranker-local.txt | 13 - codex-lens/src/.gitignore | 1 - codex-lens/src/codex_lens.egg-info/PKG-INFO | 119 - .../src/codex_lens.egg-info/SOURCES.txt | 208 - .../codex_lens.egg-info/dependency_links.txt | 1 - .../src/codex_lens.egg-info/entry_points.txt | 2 - .../src/codex_lens.egg-info/requires.txt | 60 - .../src/codex_lens.egg-info/top_level.txt | 1 - codex-lens/src/codexlens/__init__.py | 28 - codex-lens/src/codexlens/__main__.py | 14 - codex-lens/src/codexlens/api/__init__.py | 93 - codex-lens/src/codexlens/api/definition.py | 126 - codex-lens/src/codexlens/api/file_context.py | 272 - codex-lens/src/codexlens/api/hover.py | 148 - codex-lens/src/codexlens/api/lsp_lifecycle.py | 124 - codex-lens/src/codexlens/api/models.py | 281 - codex-lens/src/codexlens/api/references.py | 345 -- codex-lens/src/codexlens/api/semantic.py | 482 -- codex-lens/src/codexlens/api/symbols.py | 146 - codex-lens/src/codexlens/api/utils.py | 153 - codex-lens/src/codexlens/cli/__init__.py | 27 - codex-lens/src/codexlens/cli/commands.py | 4942 ----------------- .../src/codexlens/cli/embedding_manager.py | 2377 -------- codex-lens/src/codexlens/cli/model_manager.py | 1026 ---- codex-lens/src/codexlens/cli/output.py | 135 - codex-lens/src/codexlens/config.py | 1164 ---- codex-lens/src/codexlens/entities.py | 128 - codex-lens/src/codexlens/env_config.py | 329 -- codex-lens/src/codexlens/errors.py | 59 - .../src/codexlens/hybrid_search/__init__.py | 28 - .../hybrid_search/data_structures.py | 602 -- codex-lens/src/codexlens/indexing/README.md | 77 - codex-lens/src/codexlens/indexing/__init__.py | 26 - .../src/codexlens/indexing/embedding.py | 582 -- .../codexlens/indexing/symbol_extractor.py | 277 - codex-lens/src/codexlens/lsp/__init__.py | 34 - codex-lens/src/codexlens/lsp/handlers.py | 551 -- .../src/codexlens/lsp/keepalive_bridge.py | 135 - codex-lens/src/codexlens/lsp/lsp-servers.json | 88 - codex-lens/src/codexlens/lsp/lsp_bridge.py | 857 --- .../src/codexlens/lsp/lsp_graph_builder.py | 383 -- codex-lens/src/codexlens/lsp/providers.py | 177 - codex-lens/src/codexlens/lsp/server.py | 263 - .../src/codexlens/lsp/standalone_manager.py | 1307 ----- codex-lens/src/codexlens/mcp/__init__.py | 20 - codex-lens/src/codexlens/mcp/hooks.py | 170 - codex-lens/src/codexlens/mcp/provider.py | 202 - codex-lens/src/codexlens/mcp/schema.py | 113 - codex-lens/src/codexlens/parsers/__init__.py | 14 - .../src/codexlens/parsers/astgrep_binding.py | 320 -- .../parsers/astgrep_js_ts_processor.py | 306 - .../codexlens/parsers/astgrep_processor.py | 1033 ---- codex-lens/src/codexlens/parsers/encoding.py | 202 - codex-lens/src/codexlens/parsers/factory.py | 393 -- .../codexlens/parsers/patterns/__init__.py | 5 - .../parsers/patterns/javascript/__init__.py | 92 - .../parsers/patterns/python/__init__.py | 204 - .../parsers/patterns/python/call.yaml | 87 - .../parsers/patterns/python/imports.yaml | 82 - .../parsers/patterns/python/inherits.yaml | 42 - .../parsers/patterns/typescript/__init__.py | 73 - codex-lens/src/codexlens/parsers/tokenizer.py | 98 - .../codexlens/parsers/treesitter_parser.py | 935 ---- codex-lens/src/codexlens/search/__init__.py | 55 - .../search/association_tree/QUICK_START.md | 257 - .../search/association_tree/README.md | 188 - .../search/association_tree/__init__.py | 21 - .../search/association_tree/builder.py | 450 -- .../association_tree/data_structures.py | 191 - .../search/association_tree/deduplicator.py | 301 - .../src/codexlens/search/binary_searcher.py | 309 -- .../src/codexlens/search/chain_search.py | 4779 ---------------- .../codexlens/search/clustering/__init__.py | 124 - .../src/codexlens/search/clustering/base.py | 153 - .../search/clustering/dbscan_strategy.py | 197 - .../codexlens/search/clustering/factory.py | 202 - .../search/clustering/frequency_strategy.py | 263 - .../search/clustering/hdbscan_strategy.py | 175 - .../search/clustering/noop_strategy.py | 83 - codex-lens/src/codexlens/search/enrichment.py | 171 - .../codexlens/search/global_graph_expander.py | 250 - .../src/codexlens/search/graph_expander.py | 264 - .../src/codexlens/search/hybrid_search.py | 1359 ----- .../src/codexlens/search/query_parser.py | 242 - codex-lens/src/codexlens/search/ranking.py | 1701 ------ codex-lens/src/codexlens/semantic/__init__.py | 118 - .../src/codexlens/semantic/ann_index.py | 1097 ---- codex-lens/src/codexlens/semantic/base.py | 61 - codex-lens/src/codexlens/semantic/chunker.py | 821 --- .../src/codexlens/semantic/code_extractor.py | 274 - codex-lens/src/codexlens/semantic/embedder.py | 288 - codex-lens/src/codexlens/semantic/factory.py | 158 - .../src/codexlens/semantic/gpu_support.py | 431 -- .../codexlens/semantic/litellm_embedder.py | 144 - .../codexlens/semantic/reranker/__init__.py | 25 - .../semantic/reranker/api_reranker.py | 442 -- .../src/codexlens/semantic/reranker/base.py | 46 - .../codexlens/semantic/reranker/factory.py | 159 - .../semantic/reranker/fastembed_reranker.py | 257 - .../src/codexlens/semantic/reranker/legacy.py | 91 - .../semantic/reranker/litellm_reranker.py | 214 - .../semantic/reranker/onnx_reranker.py | 302 - .../codexlens/semantic/rotational_embedder.py | 434 -- .../src/codexlens/semantic/vector_store.py | 1278 ----- codex-lens/src/codexlens/storage/__init__.py | 32 - .../src/codexlens/storage/deepwiki_models.py | 120 - .../src/codexlens/storage/deepwiki_store.py | 1404 ----- codex-lens/src/codexlens/storage/dir_index.py | 2358 -------- .../src/codexlens/storage/file_cache.py | 32 - .../src/codexlens/storage/global_index.py | 618 --- .../src/codexlens/storage/index_filters.py | 47 - .../src/codexlens/storage/index_tree.py | 1320 ----- .../src/codexlens/storage/merkle_tree.py | 136 - .../codexlens/storage/migration_manager.py | 154 - .../codexlens/storage/migrations/__init__.py | 1 - .../migration_001_normalize_keywords.py | 123 - .../migration_002_add_token_metadata.py | 48 - .../migrations/migration_004_dual_fts.py | 232 - .../migration_005_cleanup_unused_fields.py | 196 - .../migration_006_enhance_relationships.py | 37 - .../migration_007_add_graph_neighbors.py | 47 - .../migration_008_add_merkle_hashes.py | 81 - .../migration_010_add_multi_vector_chunks.py | 162 - .../src/codexlens/storage/path_mapper.py | 300 - codex-lens/src/codexlens/storage/registry.py | 733 --- .../src/codexlens/storage/sqlite_store.py | 976 ---- .../src/codexlens/storage/sqlite_utils.py | 64 - .../codexlens/storage/vector_meta_store.py | 415 -- codex-lens/src/codexlens/tools/__init__.py | 226 - .../src/codexlens/tools/deepwiki_generator.py | 1067 ---- codex-lens/src/codexlens/watcher/__init__.py | 17 - codex-lens/src/codexlens/watcher/events.py | 82 - .../src/codexlens/watcher/file_watcher.py | 347 -- .../codexlens/watcher/incremental_indexer.py | 423 -- codex-lens/src/codexlens/watcher/manager.py | 255 - codex-lens/test_chain_search.py | 146 - codex-lens/test_simple_function.py | 19 - codex-lens/tests/TEST_SUITE_SUMMARY.md | 347 -- codex-lens/tests/__init__.py | 1 - codex-lens/tests/api/test_references.py | 282 - .../tests/api/test_semantic_integration.py | 264 - codex-lens/tests/api/test_semantic_search.py | 530 -- codex-lens/tests/conftest.py | 291 - codex-lens/tests/fix_sql.py | 84 - codex-lens/tests/integration/__init__.py | 1 - .../test_lsp_search_integration.py | 583 -- codex-lens/tests/lsp/__init__.py | 1 - codex-lens/tests/lsp/test_hover.py | 477 -- codex-lens/tests/lsp/test_lsp_edge_cases.py | 101 - .../tests/lsp/test_packaging_metadata.py | 27 - codex-lens/tests/lsp/test_references.py | 497 -- codex-lens/tests/lsp/test_server.py | 210 - .../lsp/test_standalone_manager_defaults.py | 31 - .../lsp/test_standalone_manager_paths.py | 48 - codex-lens/tests/mcp/__init__.py | 1 - codex-lens/tests/mcp/test_hooks.py | 208 - codex-lens/tests/mcp/test_provider.py | 383 -- codex-lens/tests/mcp/test_schema.py | 288 - codex-lens/tests/parsers/__init__.py | 1 - .../tests/parsers/test_astgrep_extraction.py | 444 -- .../tests/parsers/test_astgrep_processor.py | 402 -- codex-lens/tests/parsers/test_comparison.py | 525 -- .../tests/parsers/test_comparison_js_ts.py | 150 - codex-lens/tests/real/__init__.py | 5 - codex-lens/tests/real/comparison_test.py | 162 - codex-lens/tests/real/concurrent_test.py | 63 - codex-lens/tests/real/debug_compare.py | 149 - codex-lens/tests/real/debug_config.py | 216 - codex-lens/tests/real/debug_direct.py | 320 -- codex-lens/tests/real/debug_lsp.py | 58 - codex-lens/tests/real/debug_manager.py | 63 - codex-lens/tests/real/debug_reads.py | 123 - codex-lens/tests/real/direct_pyright_test.py | 93 - codex-lens/tests/real/minimal_test.py | 58 - codex-lens/tests/real/quick_test.py | 313 -- .../tests/real/test_lsp_real_interface.py | 424 -- codex-lens/tests/simple_validation.py | 218 - codex-lens/tests/test_ann_index.py | 760 --- codex-lens/tests/test_api_reranker.py | 200 - codex-lens/tests/test_association_tree.py | 400 -- codex-lens/tests/test_astgrep_binding.py | 191 - codex-lens/tests/test_binary_searcher.py | 161 - codex-lens/tests/test_cascade_strategies.py | 392 -- codex-lens/tests/test_chain_search.py | 1634 ------ codex-lens/tests/test_cli_help.py | 61 - codex-lens/tests/test_cli_hybrid_search.py | 122 - codex-lens/tests/test_cli_output.py | 280 - .../tests/test_clustering_strategies.py | 786 --- codex-lens/tests/test_code_extractor.py | 342 -- .../test_compare_ccw_smart_search_stage2.py | 350 -- codex-lens/tests/test_config.py | 555 -- codex-lens/tests/test_config_cascade.py | 155 - .../tests/test_config_ignore_patterns.py | 25 - .../tests/test_config_search_env_overrides.py | 83 - .../tests/test_config_staged_env_overrides.py | 136 - codex-lens/tests/test_deepwiki_store.py | 410 -- codex-lens/tests/test_deepwiki_types.py | 14 - codex-lens/tests/test_dual_fts.py | 612 -- codex-lens/tests/test_embedder.py | 85 - .../test_embedding_backend_availability.py | 67 - .../tests/test_embedding_status_root_model.py | 204 - codex-lens/tests/test_encoding.py | 372 -- codex-lens/tests/test_enrichment.py | 234 - codex-lens/tests/test_entities.py | 245 - codex-lens/tests/test_errors.py | 165 - codex-lens/tests/test_file_cache.py | 224 - .../tests/test_global_graph_expander.py | 323 -- codex-lens/tests/test_global_index.py | 293 - codex-lens/tests/test_global_relationships.py | 507 -- codex-lens/tests/test_global_symbol_index.py | 192 - codex-lens/tests/test_graph_expansion.py | 188 - codex-lens/tests/test_hybrid_chunker.py | 622 --- codex-lens/tests/test_hybrid_search_e2e.py | 945 ---- .../test_hybrid_search_reranker_backend.py | 192 - codex-lens/tests/test_hybrid_search_unit.py | 635 --- codex-lens/tests/test_incremental_indexer.py | 125 - codex-lens/tests/test_incremental_indexing.py | 512 -- .../tests/test_index_status_cli_contract.py | 674 --- .../tests/test_index_tree_ignore_dirs.py | 295 - codex-lens/tests/test_litellm_reranker.py | 85 - .../tests/test_lsp_graph_builder_depth.py | 36 - codex-lens/tests/test_merkle_detection.py | 100 - codex-lens/tests/test_migrations.py | 114 - codex-lens/tests/test_parser_integration.py | 281 - codex-lens/tests/test_parsers.py | 462 -- .../tests/test_path_mapper_windows_drive.py | 19 - .../tests/test_performance_optimizations.py | 814 --- codex-lens/tests/test_pure_vector_search.py | 345 -- codex-lens/tests/test_query_parser.py | 485 -- codex-lens/tests/test_ranking.py | 782 --- codex-lens/tests/test_recursive_splitting.py | 291 - codex-lens/tests/test_registry.py | 126 - codex-lens/tests/test_reranker_backends.py | 115 - codex-lens/tests/test_reranker_factory.py | 401 -- codex-lens/tests/test_result_grouping.py | 589 -- codex-lens/tests/test_rrf_fusion.py | 584 -- .../tests/test_schema_cleanup_migration.py | 308 - codex-lens/tests/test_search_comparison.py | 540 -- codex-lens/tests/test_search_comprehensive.py | 604 -- codex-lens/tests/test_search_full_coverage.py | 1267 ----- codex-lens/tests/test_search_performance.py | 660 --- codex-lens/tests/test_semantic.py | 290 - codex-lens/tests/test_semantic_search.py | 804 --- codex-lens/tests/test_sqlite_store.py | 444 -- ...t_stage1_binary_search_uses_chunk_lines.py | 131 - codex-lens/tests/test_staged_cascade.py | 812 --- .../tests/test_staged_cascade_lsp_depth.py | 168 - .../tests/test_staged_cascade_realtime_lsp.py | 98 - .../tests/test_staged_stage1_fallback_seed.py | 49 - .../test_staged_stage3_fast_strategies.py | 56 - ...ndalone_lsp_manager_open_document_cache.py | 87 - .../tests/test_static_graph_integration.py | 289 - codex-lens/tests/test_storage.py | 534 -- codex-lens/tests/test_storage_concurrency.py | 698 --- codex-lens/tests/test_symbol_extractor.py | 238 - codex-lens/tests/test_token_chunking.py | 190 - codex-lens/tests/test_token_storage.py | 368 -- codex-lens/tests/test_tokenizer.py | 162 - .../tests/test_tokenizer_performance.py | 127 - codex-lens/tests/test_treesitter_parser.py | 377 -- codex-lens/tests/test_vector_search_full.py | 812 --- codex-lens/tests/test_vector_store.py | 386 -- codex-lens/tests/test_watcher/__init__.py | 1 - codex-lens/tests/test_watcher/conftest.py | 43 - codex-lens/tests/test_watcher/test_events.py | 103 - .../tests/test_watcher/test_file_watcher.py | 124 - codex-lens/tests/unit/__init__.py | 1 - codex-lens/tests/unit/lsp/__init__.py | 1 - codex-lens/tests/unit/lsp/test_lsp_bridge.py | 879 --- .../tests/unit/lsp/test_lsp_edge_cases.py | 795 --- .../tests/unit/lsp/test_lsp_graph_builder.py | 549 -- codex-lens/tests/validate_optimizations.py | 287 - codex-lens/verify_watcher.py | 117 - 386 files changed, 129550 deletions(-) delete mode 100644 codex-lens/.coverage delete mode 100644 codex-lens/.env.example delete mode 100644 codex-lens/.github/workflows/security.yml delete mode 100644 codex-lens/.pytest-temp/test_builder_loads_saved_ignor0/codexlens-home/settings.json delete mode 100644 codex-lens/.pytest-temp/test_builder_loads_saved_ignor0/frontend/app.ts delete mode 100644 codex-lens/.pytest-temp/test_builder_loads_saved_ignor0/frontend/bundle.min.js delete mode 100644 codex-lens/.pytest-temp/test_builder_loads_saved_ignor0/frontend/dist/compiled.ts delete mode 100644 codex-lens/.pytest-temp/test_collect_dirs_by_depth_res0/frontend/dist/bundle.ts delete mode 100644 codex-lens/.pytest-temp/test_collect_dirs_by_depth_res0/frontend/src/app.ts delete mode 100644 codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/.next/generated.py delete mode 100644 codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/.parcel-cache/generated.py delete mode 100644 codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/.turbo/generated.py delete mode 100644 codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/build/generated.py delete mode 100644 codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/coverage/generated.py delete mode 100644 codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/dist/generated.py delete mode 100644 codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/out/generated.py delete mode 100644 codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/src/app.py delete mode 100644 codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/target/generated.py delete mode 100644 codex-lens/.pytest-temp/test_iter_source_files_respect0/frontend/app.ts delete mode 100644 codex-lens/.pytest-temp/test_iter_source_files_respect0/frontend/bundle.min.js delete mode 100644 codex-lens/.pytest-temp/test_iter_source_files_respect0/frontend/skip.ts delete mode 100644 codex-lens/.pytest-temp/test_load_settings_reads_ignor0/settings.json delete mode 100644 codex-lens/.pytest-temp/test_should_index_dir_ignores_0/package/dist/bundle.py delete mode 100644 codex-lens/ASSOCIATION_TREE_IMPLEMENTATION.md delete mode 100644 codex-lens/CHAIN_SEARCH_IMPLEMENTATION.md delete mode 100644 codex-lens/CHANGELOG.md delete mode 100644 codex-lens/DEPENDENCIES.md delete mode 100644 codex-lens/LICENSE delete mode 100644 codex-lens/README.md delete mode 100644 codex-lens/SEMANTIC_SEARCH_USAGE.md delete mode 100644 codex-lens/benchmarks/accuracy_queries_ccw_smart_search.jsonl delete mode 100644 codex-lens/benchmarks/accuracy_queries_codexlens.jsonl delete mode 100644 codex-lens/benchmarks/analyze_methods.py delete mode 100644 codex-lens/benchmarks/binary_search_microbenchmark.py delete mode 100644 codex-lens/benchmarks/cascade_benchmark.py delete mode 100644 codex-lens/benchmarks/compare_accuracy_labeled.py delete mode 100644 codex-lens/benchmarks/compare_ccw_smart_search_stage2.py delete mode 100644 codex-lens/benchmarks/compare_semantic_methods.py delete mode 100644 codex-lens/benchmarks/compare_staged_realtime_vs_dense_rerank.py delete mode 100644 codex-lens/benchmarks/compare_staged_stage2_modes.py delete mode 100644 codex-lens/benchmarks/method_contribution_analysis.py delete mode 100644 codex-lens/benchmarks/results/accuracy_2026-02-11_codexlens.json delete mode 100644 codex-lens/benchmarks/results/accuracy_2026-02-11_codexlens_precomputed.json delete mode 100644 codex-lens/benchmarks/results/cascade_benchmark.json delete mode 100644 codex-lens/benchmarks/results/ccw_smart_search_stage2.json delete mode 100644 codex-lens/benchmarks/results/ccw_smart_search_stage2_sample4_20260314.json delete mode 100644 codex-lens/benchmarks/results/ccw_smart_search_stage2_smoke1_cpu_reranker_20260314.json delete mode 100644 codex-lens/benchmarks/results/compare_2026-02-09.json delete mode 100644 codex-lens/benchmarks/results/compare_2026-02-09_dir_rr_fast4.json delete mode 100644 codex-lens/benchmarks/results/compare_2026-02-09_dir_rr_fast5.json delete mode 100644 codex-lens/benchmarks/results/compare_2026-02-09_dir_rr_fast6.json delete mode 100644 codex-lens/benchmarks/results/compare_2026-02-09_keepalive3.json delete mode 100644 codex-lens/benchmarks/results/compare_2026-02-09_keepalive3b.json delete mode 100644 codex-lens/benchmarks/results/compare_2026-02-09_run2.json delete mode 100644 codex-lens/benchmarks/results/compare_2026-02-09_score_fast3.json delete mode 100644 codex-lens/benchmarks/results/compare_2026-02-09_score_fast4.json delete mode 100644 codex-lens/benchmarks/results/compare_2026-02-09_score_fast5.json delete mode 100644 codex-lens/benchmarks/results/compare_2026-02-09_score_fast6.json delete mode 100644 codex-lens/benchmarks/results/compare_2026-02-09_score_fast7.json delete mode 100644 codex-lens/benchmarks/results/compare_2026-02-10_dir_rr_fast7.json delete mode 100644 codex-lens/benchmarks/results/compare_2026-02-10_path_fast7.json delete mode 100644 codex-lens/benchmarks/results/compare_2026-02-10_score_fast7.json delete mode 100644 codex-lens/benchmarks/results/compare_2026-02-11_dir_rr_fast7.json delete mode 100644 codex-lens/benchmarks/results/compare_2026-02-11_path_fast7.json delete mode 100644 codex-lens/benchmarks/results/compare_2026-02-11_score_fast7.json delete mode 100644 codex-lens/benchmarks/results/method_contribution_analysis.json delete mode 100644 codex-lens/benchmarks/results/tmp_compare1.json delete mode 100644 codex-lens/benchmarks/results/tmp_compare3_ok_cpu.json delete mode 100644 codex-lens/benchmarks/results/tmp_compare3_ok_cpu_dedup.json delete mode 100644 codex-lens/coir_benchmark_full.py delete mode 100644 codex-lens/debug_semantic_search.py delete mode 100644 codex-lens/debug_semantic_v2.py delete mode 100644 codex-lens/dist/codex_lens-0.1.0-py3-none-any.whl delete mode 100644 codex-lens/dist/codex_lens-0.1.0.tar.gz delete mode 100644 codex-lens/docs/CHAIN_SEARCH_QUICKREF.md delete mode 100644 codex-lens/docs/CODEXLENS_LSP_API_SPEC.md delete mode 100644 codex-lens/docs/CODEX_LENS_AUTO_HYBRID.md delete mode 100644 codex-lens/docs/CONFIGURATION.md delete mode 100644 codex-lens/docs/DESIGN_EVALUATION_REPORT.md delete mode 100644 codex-lens/docs/HYBRID_SEARCH_ARCHITECTURE.md delete mode 100644 codex-lens/docs/IMPLEMENTATION_SUMMARY.md delete mode 100644 codex-lens/docs/LLM_REMOVAL_SUMMARY.md delete mode 100644 codex-lens/docs/LSP_INTEGRATION_CHECKLIST.md delete mode 100644 codex-lens/docs/LSP_INTEGRATION_PLAN.md delete mode 100644 codex-lens/docs/MCP_ENDPOINT_DESIGN.md delete mode 100644 codex-lens/docs/MIGRATION_005_SUMMARY.md delete mode 100644 codex-lens/docs/MULTILEVEL_CHUNKER_DESIGN.md delete mode 100644 codex-lens/docs/PURE_VECTOR_SEARCH_GUIDE.md delete mode 100644 codex-lens/docs/REAL_LSP_SERVER_PLAN.md delete mode 100644 codex-lens/docs/SEARCH_ANALYSIS_SUMMARY.md delete mode 100644 codex-lens/docs/SEARCH_COMPARISON_ANALYSIS.md delete mode 100644 codex-lens/docs/SEMANTIC_GRAPH_DESIGN.md delete mode 100644 codex-lens/docs/T6-CLI-Integration-Summary.md delete mode 100644 codex-lens/docs/codex_mcp.md delete mode 100644 codex-lens/docs/test-quality-enhancements.md delete mode 100644 codex-lens/examples/association_tree_demo.py delete mode 100644 codex-lens/examples/debug_uri_format.py delete mode 100644 codex-lens/examples/search_comparison_benchmark.py delete mode 100644 codex-lens/examples/simple_search_comparison.py delete mode 100644 codex-lens/examples/test_lsp_capabilities.py delete mode 100644 codex-lens/examples/test_lsp_references.py delete mode 100644 codex-lens/examples/test_lsp_tree.py delete mode 100644 codex-lens/examples/test_raw_lsp.py delete mode 100644 codex-lens/examples/test_raw_response.py delete mode 100644 codex-lens/examples/test_simple_call_hierarchy.py delete mode 100644 codex-lens/examples/test_uri_consistency.py delete mode 100644 codex-lens/examples/test_wait_for_analysis.py delete mode 100644 codex-lens/lsp-servers.json delete mode 100644 codex-lens/misleading_test.db delete mode 100644 codex-lens/pyproject.toml delete mode 100644 codex-lens/requirements.in delete mode 100644 codex-lens/scripts/bootstrap_reranker_local.py delete mode 100644 codex-lens/scripts/generate_embeddings.py delete mode 100644 codex-lens/scripts/requirements-reranker-local.txt delete mode 100644 codex-lens/src/.gitignore delete mode 100644 codex-lens/src/codex_lens.egg-info/PKG-INFO delete mode 100644 codex-lens/src/codex_lens.egg-info/SOURCES.txt delete mode 100644 codex-lens/src/codex_lens.egg-info/dependency_links.txt delete mode 100644 codex-lens/src/codex_lens.egg-info/entry_points.txt delete mode 100644 codex-lens/src/codex_lens.egg-info/requires.txt delete mode 100644 codex-lens/src/codex_lens.egg-info/top_level.txt delete mode 100644 codex-lens/src/codexlens/__init__.py delete mode 100644 codex-lens/src/codexlens/__main__.py delete mode 100644 codex-lens/src/codexlens/api/__init__.py delete mode 100644 codex-lens/src/codexlens/api/definition.py delete mode 100644 codex-lens/src/codexlens/api/file_context.py delete mode 100644 codex-lens/src/codexlens/api/hover.py delete mode 100644 codex-lens/src/codexlens/api/lsp_lifecycle.py delete mode 100644 codex-lens/src/codexlens/api/models.py delete mode 100644 codex-lens/src/codexlens/api/references.py delete mode 100644 codex-lens/src/codexlens/api/semantic.py delete mode 100644 codex-lens/src/codexlens/api/symbols.py delete mode 100644 codex-lens/src/codexlens/api/utils.py delete mode 100644 codex-lens/src/codexlens/cli/__init__.py delete mode 100644 codex-lens/src/codexlens/cli/commands.py delete mode 100644 codex-lens/src/codexlens/cli/embedding_manager.py delete mode 100644 codex-lens/src/codexlens/cli/model_manager.py delete mode 100644 codex-lens/src/codexlens/cli/output.py delete mode 100644 codex-lens/src/codexlens/config.py delete mode 100644 codex-lens/src/codexlens/entities.py delete mode 100644 codex-lens/src/codexlens/env_config.py delete mode 100644 codex-lens/src/codexlens/errors.py delete mode 100644 codex-lens/src/codexlens/hybrid_search/__init__.py delete mode 100644 codex-lens/src/codexlens/hybrid_search/data_structures.py delete mode 100644 codex-lens/src/codexlens/indexing/README.md delete mode 100644 codex-lens/src/codexlens/indexing/__init__.py delete mode 100644 codex-lens/src/codexlens/indexing/embedding.py delete mode 100644 codex-lens/src/codexlens/indexing/symbol_extractor.py delete mode 100644 codex-lens/src/codexlens/lsp/__init__.py delete mode 100644 codex-lens/src/codexlens/lsp/handlers.py delete mode 100644 codex-lens/src/codexlens/lsp/keepalive_bridge.py delete mode 100644 codex-lens/src/codexlens/lsp/lsp-servers.json delete mode 100644 codex-lens/src/codexlens/lsp/lsp_bridge.py delete mode 100644 codex-lens/src/codexlens/lsp/lsp_graph_builder.py delete mode 100644 codex-lens/src/codexlens/lsp/providers.py delete mode 100644 codex-lens/src/codexlens/lsp/server.py delete mode 100644 codex-lens/src/codexlens/lsp/standalone_manager.py delete mode 100644 codex-lens/src/codexlens/mcp/__init__.py delete mode 100644 codex-lens/src/codexlens/mcp/hooks.py delete mode 100644 codex-lens/src/codexlens/mcp/provider.py delete mode 100644 codex-lens/src/codexlens/mcp/schema.py delete mode 100644 codex-lens/src/codexlens/parsers/__init__.py delete mode 100644 codex-lens/src/codexlens/parsers/astgrep_binding.py delete mode 100644 codex-lens/src/codexlens/parsers/astgrep_js_ts_processor.py delete mode 100644 codex-lens/src/codexlens/parsers/astgrep_processor.py delete mode 100644 codex-lens/src/codexlens/parsers/encoding.py delete mode 100644 codex-lens/src/codexlens/parsers/factory.py delete mode 100644 codex-lens/src/codexlens/parsers/patterns/__init__.py delete mode 100644 codex-lens/src/codexlens/parsers/patterns/javascript/__init__.py delete mode 100644 codex-lens/src/codexlens/parsers/patterns/python/__init__.py delete mode 100644 codex-lens/src/codexlens/parsers/patterns/python/call.yaml delete mode 100644 codex-lens/src/codexlens/parsers/patterns/python/imports.yaml delete mode 100644 codex-lens/src/codexlens/parsers/patterns/python/inherits.yaml delete mode 100644 codex-lens/src/codexlens/parsers/patterns/typescript/__init__.py delete mode 100644 codex-lens/src/codexlens/parsers/tokenizer.py delete mode 100644 codex-lens/src/codexlens/parsers/treesitter_parser.py delete mode 100644 codex-lens/src/codexlens/search/__init__.py delete mode 100644 codex-lens/src/codexlens/search/association_tree/QUICK_START.md delete mode 100644 codex-lens/src/codexlens/search/association_tree/README.md delete mode 100644 codex-lens/src/codexlens/search/association_tree/__init__.py delete mode 100644 codex-lens/src/codexlens/search/association_tree/builder.py delete mode 100644 codex-lens/src/codexlens/search/association_tree/data_structures.py delete mode 100644 codex-lens/src/codexlens/search/association_tree/deduplicator.py delete mode 100644 codex-lens/src/codexlens/search/binary_searcher.py delete mode 100644 codex-lens/src/codexlens/search/chain_search.py delete mode 100644 codex-lens/src/codexlens/search/clustering/__init__.py delete mode 100644 codex-lens/src/codexlens/search/clustering/base.py delete mode 100644 codex-lens/src/codexlens/search/clustering/dbscan_strategy.py delete mode 100644 codex-lens/src/codexlens/search/clustering/factory.py delete mode 100644 codex-lens/src/codexlens/search/clustering/frequency_strategy.py delete mode 100644 codex-lens/src/codexlens/search/clustering/hdbscan_strategy.py delete mode 100644 codex-lens/src/codexlens/search/clustering/noop_strategy.py delete mode 100644 codex-lens/src/codexlens/search/enrichment.py delete mode 100644 codex-lens/src/codexlens/search/global_graph_expander.py delete mode 100644 codex-lens/src/codexlens/search/graph_expander.py delete mode 100644 codex-lens/src/codexlens/search/hybrid_search.py delete mode 100644 codex-lens/src/codexlens/search/query_parser.py delete mode 100644 codex-lens/src/codexlens/search/ranking.py delete mode 100644 codex-lens/src/codexlens/semantic/__init__.py delete mode 100644 codex-lens/src/codexlens/semantic/ann_index.py delete mode 100644 codex-lens/src/codexlens/semantic/base.py delete mode 100644 codex-lens/src/codexlens/semantic/chunker.py delete mode 100644 codex-lens/src/codexlens/semantic/code_extractor.py delete mode 100644 codex-lens/src/codexlens/semantic/embedder.py delete mode 100644 codex-lens/src/codexlens/semantic/factory.py delete mode 100644 codex-lens/src/codexlens/semantic/gpu_support.py delete mode 100644 codex-lens/src/codexlens/semantic/litellm_embedder.py delete mode 100644 codex-lens/src/codexlens/semantic/reranker/__init__.py delete mode 100644 codex-lens/src/codexlens/semantic/reranker/api_reranker.py delete mode 100644 codex-lens/src/codexlens/semantic/reranker/base.py delete mode 100644 codex-lens/src/codexlens/semantic/reranker/factory.py delete mode 100644 codex-lens/src/codexlens/semantic/reranker/fastembed_reranker.py delete mode 100644 codex-lens/src/codexlens/semantic/reranker/legacy.py delete mode 100644 codex-lens/src/codexlens/semantic/reranker/litellm_reranker.py delete mode 100644 codex-lens/src/codexlens/semantic/reranker/onnx_reranker.py delete mode 100644 codex-lens/src/codexlens/semantic/rotational_embedder.py delete mode 100644 codex-lens/src/codexlens/semantic/vector_store.py delete mode 100644 codex-lens/src/codexlens/storage/__init__.py delete mode 100644 codex-lens/src/codexlens/storage/deepwiki_models.py delete mode 100644 codex-lens/src/codexlens/storage/deepwiki_store.py delete mode 100644 codex-lens/src/codexlens/storage/dir_index.py delete mode 100644 codex-lens/src/codexlens/storage/file_cache.py delete mode 100644 codex-lens/src/codexlens/storage/global_index.py delete mode 100644 codex-lens/src/codexlens/storage/index_filters.py delete mode 100644 codex-lens/src/codexlens/storage/index_tree.py delete mode 100644 codex-lens/src/codexlens/storage/merkle_tree.py delete mode 100644 codex-lens/src/codexlens/storage/migration_manager.py delete mode 100644 codex-lens/src/codexlens/storage/migrations/__init__.py delete mode 100644 codex-lens/src/codexlens/storage/migrations/migration_001_normalize_keywords.py delete mode 100644 codex-lens/src/codexlens/storage/migrations/migration_002_add_token_metadata.py delete mode 100644 codex-lens/src/codexlens/storage/migrations/migration_004_dual_fts.py delete mode 100644 codex-lens/src/codexlens/storage/migrations/migration_005_cleanup_unused_fields.py delete mode 100644 codex-lens/src/codexlens/storage/migrations/migration_006_enhance_relationships.py delete mode 100644 codex-lens/src/codexlens/storage/migrations/migration_007_add_graph_neighbors.py delete mode 100644 codex-lens/src/codexlens/storage/migrations/migration_008_add_merkle_hashes.py delete mode 100644 codex-lens/src/codexlens/storage/migrations/migration_010_add_multi_vector_chunks.py delete mode 100644 codex-lens/src/codexlens/storage/path_mapper.py delete mode 100644 codex-lens/src/codexlens/storage/registry.py delete mode 100644 codex-lens/src/codexlens/storage/sqlite_store.py delete mode 100644 codex-lens/src/codexlens/storage/sqlite_utils.py delete mode 100644 codex-lens/src/codexlens/storage/vector_meta_store.py delete mode 100644 codex-lens/src/codexlens/tools/__init__.py delete mode 100644 codex-lens/src/codexlens/tools/deepwiki_generator.py delete mode 100644 codex-lens/src/codexlens/watcher/__init__.py delete mode 100644 codex-lens/src/codexlens/watcher/events.py delete mode 100644 codex-lens/src/codexlens/watcher/file_watcher.py delete mode 100644 codex-lens/src/codexlens/watcher/incremental_indexer.py delete mode 100644 codex-lens/src/codexlens/watcher/manager.py delete mode 100644 codex-lens/test_chain_search.py delete mode 100644 codex-lens/test_simple_function.py delete mode 100644 codex-lens/tests/TEST_SUITE_SUMMARY.md delete mode 100644 codex-lens/tests/__init__.py delete mode 100644 codex-lens/tests/api/test_references.py delete mode 100644 codex-lens/tests/api/test_semantic_integration.py delete mode 100644 codex-lens/tests/api/test_semantic_search.py delete mode 100644 codex-lens/tests/conftest.py delete mode 100644 codex-lens/tests/fix_sql.py delete mode 100644 codex-lens/tests/integration/__init__.py delete mode 100644 codex-lens/tests/integration/test_lsp_search_integration.py delete mode 100644 codex-lens/tests/lsp/__init__.py delete mode 100644 codex-lens/tests/lsp/test_hover.py delete mode 100644 codex-lens/tests/lsp/test_lsp_edge_cases.py delete mode 100644 codex-lens/tests/lsp/test_packaging_metadata.py delete mode 100644 codex-lens/tests/lsp/test_references.py delete mode 100644 codex-lens/tests/lsp/test_server.py delete mode 100644 codex-lens/tests/lsp/test_standalone_manager_defaults.py delete mode 100644 codex-lens/tests/lsp/test_standalone_manager_paths.py delete mode 100644 codex-lens/tests/mcp/__init__.py delete mode 100644 codex-lens/tests/mcp/test_hooks.py delete mode 100644 codex-lens/tests/mcp/test_provider.py delete mode 100644 codex-lens/tests/mcp/test_schema.py delete mode 100644 codex-lens/tests/parsers/__init__.py delete mode 100644 codex-lens/tests/parsers/test_astgrep_extraction.py delete mode 100644 codex-lens/tests/parsers/test_astgrep_processor.py delete mode 100644 codex-lens/tests/parsers/test_comparison.py delete mode 100644 codex-lens/tests/parsers/test_comparison_js_ts.py delete mode 100644 codex-lens/tests/real/__init__.py delete mode 100644 codex-lens/tests/real/comparison_test.py delete mode 100644 codex-lens/tests/real/concurrent_test.py delete mode 100644 codex-lens/tests/real/debug_compare.py delete mode 100644 codex-lens/tests/real/debug_config.py delete mode 100644 codex-lens/tests/real/debug_direct.py delete mode 100644 codex-lens/tests/real/debug_lsp.py delete mode 100644 codex-lens/tests/real/debug_manager.py delete mode 100644 codex-lens/tests/real/debug_reads.py delete mode 100644 codex-lens/tests/real/direct_pyright_test.py delete mode 100644 codex-lens/tests/real/minimal_test.py delete mode 100644 codex-lens/tests/real/quick_test.py delete mode 100644 codex-lens/tests/real/test_lsp_real_interface.py delete mode 100644 codex-lens/tests/simple_validation.py delete mode 100644 codex-lens/tests/test_ann_index.py delete mode 100644 codex-lens/tests/test_api_reranker.py delete mode 100644 codex-lens/tests/test_association_tree.py delete mode 100644 codex-lens/tests/test_astgrep_binding.py delete mode 100644 codex-lens/tests/test_binary_searcher.py delete mode 100644 codex-lens/tests/test_cascade_strategies.py delete mode 100644 codex-lens/tests/test_chain_search.py delete mode 100644 codex-lens/tests/test_cli_help.py delete mode 100644 codex-lens/tests/test_cli_hybrid_search.py delete mode 100644 codex-lens/tests/test_cli_output.py delete mode 100644 codex-lens/tests/test_clustering_strategies.py delete mode 100644 codex-lens/tests/test_code_extractor.py delete mode 100644 codex-lens/tests/test_compare_ccw_smart_search_stage2.py delete mode 100644 codex-lens/tests/test_config.py delete mode 100644 codex-lens/tests/test_config_cascade.py delete mode 100644 codex-lens/tests/test_config_ignore_patterns.py delete mode 100644 codex-lens/tests/test_config_search_env_overrides.py delete mode 100644 codex-lens/tests/test_config_staged_env_overrides.py delete mode 100644 codex-lens/tests/test_deepwiki_store.py delete mode 100644 codex-lens/tests/test_deepwiki_types.py delete mode 100644 codex-lens/tests/test_dual_fts.py delete mode 100644 codex-lens/tests/test_embedder.py delete mode 100644 codex-lens/tests/test_embedding_backend_availability.py delete mode 100644 codex-lens/tests/test_embedding_status_root_model.py delete mode 100644 codex-lens/tests/test_encoding.py delete mode 100644 codex-lens/tests/test_enrichment.py delete mode 100644 codex-lens/tests/test_entities.py delete mode 100644 codex-lens/tests/test_errors.py delete mode 100644 codex-lens/tests/test_file_cache.py delete mode 100644 codex-lens/tests/test_global_graph_expander.py delete mode 100644 codex-lens/tests/test_global_index.py delete mode 100644 codex-lens/tests/test_global_relationships.py delete mode 100644 codex-lens/tests/test_global_symbol_index.py delete mode 100644 codex-lens/tests/test_graph_expansion.py delete mode 100644 codex-lens/tests/test_hybrid_chunker.py delete mode 100644 codex-lens/tests/test_hybrid_search_e2e.py delete mode 100644 codex-lens/tests/test_hybrid_search_reranker_backend.py delete mode 100644 codex-lens/tests/test_hybrid_search_unit.py delete mode 100644 codex-lens/tests/test_incremental_indexer.py delete mode 100644 codex-lens/tests/test_incremental_indexing.py delete mode 100644 codex-lens/tests/test_index_status_cli_contract.py delete mode 100644 codex-lens/tests/test_index_tree_ignore_dirs.py delete mode 100644 codex-lens/tests/test_litellm_reranker.py delete mode 100644 codex-lens/tests/test_lsp_graph_builder_depth.py delete mode 100644 codex-lens/tests/test_merkle_detection.py delete mode 100644 codex-lens/tests/test_migrations.py delete mode 100644 codex-lens/tests/test_parser_integration.py delete mode 100644 codex-lens/tests/test_parsers.py delete mode 100644 codex-lens/tests/test_path_mapper_windows_drive.py delete mode 100644 codex-lens/tests/test_performance_optimizations.py delete mode 100644 codex-lens/tests/test_pure_vector_search.py delete mode 100644 codex-lens/tests/test_query_parser.py delete mode 100644 codex-lens/tests/test_ranking.py delete mode 100644 codex-lens/tests/test_recursive_splitting.py delete mode 100644 codex-lens/tests/test_registry.py delete mode 100644 codex-lens/tests/test_reranker_backends.py delete mode 100644 codex-lens/tests/test_reranker_factory.py delete mode 100644 codex-lens/tests/test_result_grouping.py delete mode 100644 codex-lens/tests/test_rrf_fusion.py delete mode 100644 codex-lens/tests/test_schema_cleanup_migration.py delete mode 100644 codex-lens/tests/test_search_comparison.py delete mode 100644 codex-lens/tests/test_search_comprehensive.py delete mode 100644 codex-lens/tests/test_search_full_coverage.py delete mode 100644 codex-lens/tests/test_search_performance.py delete mode 100644 codex-lens/tests/test_semantic.py delete mode 100644 codex-lens/tests/test_semantic_search.py delete mode 100644 codex-lens/tests/test_sqlite_store.py delete mode 100644 codex-lens/tests/test_stage1_binary_search_uses_chunk_lines.py delete mode 100644 codex-lens/tests/test_staged_cascade.py delete mode 100644 codex-lens/tests/test_staged_cascade_lsp_depth.py delete mode 100644 codex-lens/tests/test_staged_cascade_realtime_lsp.py delete mode 100644 codex-lens/tests/test_staged_stage1_fallback_seed.py delete mode 100644 codex-lens/tests/test_staged_stage3_fast_strategies.py delete mode 100644 codex-lens/tests/test_standalone_lsp_manager_open_document_cache.py delete mode 100644 codex-lens/tests/test_static_graph_integration.py delete mode 100644 codex-lens/tests/test_storage.py delete mode 100644 codex-lens/tests/test_storage_concurrency.py delete mode 100644 codex-lens/tests/test_symbol_extractor.py delete mode 100644 codex-lens/tests/test_token_chunking.py delete mode 100644 codex-lens/tests/test_token_storage.py delete mode 100644 codex-lens/tests/test_tokenizer.py delete mode 100644 codex-lens/tests/test_tokenizer_performance.py delete mode 100644 codex-lens/tests/test_treesitter_parser.py delete mode 100644 codex-lens/tests/test_vector_search_full.py delete mode 100644 codex-lens/tests/test_vector_store.py delete mode 100644 codex-lens/tests/test_watcher/__init__.py delete mode 100644 codex-lens/tests/test_watcher/conftest.py delete mode 100644 codex-lens/tests/test_watcher/test_events.py delete mode 100644 codex-lens/tests/test_watcher/test_file_watcher.py delete mode 100644 codex-lens/tests/unit/__init__.py delete mode 100644 codex-lens/tests/unit/lsp/__init__.py delete mode 100644 codex-lens/tests/unit/lsp/test_lsp_bridge.py delete mode 100644 codex-lens/tests/unit/lsp/test_lsp_edge_cases.py delete mode 100644 codex-lens/tests/unit/lsp/test_lsp_graph_builder.py delete mode 100644 codex-lens/tests/validate_optimizations.py delete mode 100644 codex-lens/verify_watcher.py diff --git a/codex-lens/.coverage b/codex-lens/.coverage deleted file mode 100644 index 57c0500999ac273ebab3dee7c0fe168328ffb5af..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 53248 zcmeI5e{2)y8OPuAo$c7Z^94w1LP&EJguwEnM4-VaKg^G?LQw;Sl}gKM&h{laYWu?7 znGmThzAz26sq3`g29-LPkV@1shEyg&8L;QQ zAMrUM2q!|U?775#@9ury?>*1wect!=YsZ(}yLPCiA`NJIUN)r_q@FMgNlFqSBmxfs z9^Mv)9nL#|l=1CPw;Lhd2frhT&k{cL3K5?d`ou2&$HJ1>X+9tMcI@@A9LYciZb$$L zAOR%s|4qQ!#fKA3P0ZL!rku_xrY>g`-FX%^KhwK$Pp`CRXJ4} zx;7#WE4nnG<`hXSWYvsps)a$x98$c}MMKHbi4M-eR7VST+x6*QO3gwoQyBz_VO`D3 z`l$4RGTLbuaIa=6hfL=Lh$!k{ftHY#y49CSx-y{XN+F{djulJQ?6T;xBZs5m#JqXT zQN{L2)-&)MtMUd;R9x9M074l}&r<8tx?IQ%DMqIxPk5Noz|KRaM)Tvc0s0WJ5 zMB{f#m71Nl8wIUXqE0{rw8cV+ugqfzM5S`bIFN(aTPe9{YW8so8b8$oPy9FM)P)nR zt<2E_cA!u^skCYu6FZTrpqV&5P3X}4>Enj>lIkF#y(pKHv;n)z(AtvII1tVLKJ&N- zL)K|WlQg#=So%PtjT#F!Na>uGu8uW3Vhja32#f_@v4LVCL%WJWeaL7B6e*>_)AoP&m=t%#20sFmhYVO~O?nIR6OGduPEAPl(C5&RuLo?Ck2cx#^DyYs2U}$_JyS9H zDP^?lpqzuzWLQ>p=&~>dJ2QYp3ua{~ET^@iX~!=;w8N9mJr#Q$fy%MR9!f2!x*I+g9qDf zo4m<|4)c4U(|P?1cJg_-kag!PIt@FjF!s~VNw4D!B2ZCkhfZlg7oDW(iMqLT#PamJ zhk*%I&C6M*yVFMTDUJ*&Q{ihgKmter2_OL^fCL_X0(}h}u@-Gv)4w?<7qdz#n>SYUXEeB-L03*G82yHx zaSoj$1-?x@)Gr%`mQmp@99kh|>Wb2zN~s0aOr_vky1VCrV8?6)vr4u&oTGR8&~y-e zb}oqa%vuz_gPbxLK0OC?ADgXix~S%|3bk`nJ?KSh)PuWC=%cA&SvPEr zqzD?p8a0%Hu4abvO2Gsxw!}d}s8L~1mxqT^%AsNS61QzfuK+4yjVfui0QXl(8Frl5 zGCO&YX{%8tlPemgqQiYs`m<>xBNu41!8f%BN5Q>aF;Hxux#G}N$=y+qtTkwTMh@r- zv|=Fxo>%OAIs&4#2A5B?plQQ?y(jBHuW{yn56BtlO_X477?f&lTAyWU*-$`cTL@$l zGdIXLYCIJLspcA`2H}1naxOI`a@TS!Ib5R_+>Pf22siL*GYh7~YOFpqn%32<8}}fx zDF7n14g{GYS%pZq`wnQVXF!9g(GVX~`uTrDbO{mPjV_5Tj{Y$^!Y9R-;^}xZe^fXv zbn<5fEB1E$SFz8<)iE>v`NKcip@&ES2_OL^fCP{L5Y2Uhl=c7IKF)e<*1DDT|D4U76{%Hk()z!CBWDF`)u>wki@lsB)T%IL z6&2sgSz@g!Q`i5(F3xJJRmQj0oVr-&pW&?bSt#CN{U4i2(E5zL1+PG0{< zwsBVDEc~vl|LbPbv_8w+^?!IfXC-D~kZ-LT+QC`PwMtF5{tvEW57(+yS^sleI4f3b z^~vjhb|wSCr1gJbJ!dhszK7sriueD+`5g%$0VIF~kN^@u0!RP}AOR$R1dzZ(O@L)s zBGAwO8Sy^^esMzrNB{{S0VIF~kN^@u0!RP}AOR$R1Rg>HEGtCl=l}88M@0Ngd`~zp z{9brN_`YyJ*e-MlE&Of%EdL~A;)Vo}01`j~NB{{S0VIF~kih>qfm}SuM6Bnyv3IVW ziIAjitw`36_mXS&ektsC;5=J7I?p)!rPj(h*t_K`;$^^pI#0s>6~>ASaJjLC?AXg8 z@+i^glQnSursx%{PU$~LPr9dW`efsz#1e^+@haO$(l+F{CpNL;D3o1uT|GOw$}Z+P zCbB&~_P4eGdE`@nkrGL+gF?wC2&rEfgHmgPW0z09zyBAGAuXgOY44q4<59?cG&1%j zo^a#j6x=lpj#tO%aowq8xV)ABLFCr=`u_AGTUzHCV6Tr$OY$FF*ouL3B*&nu_Ik2#P5n;%QM~-AzXqTf~17o*3E^dIL zw{II|qx5HHJzEm5^)ZAg9jjOe*KyGQ^*Ub;EDx+6A(sg$zrvpU?nQF`%`VrD@}scx z_R6`Ju3x1MRbKbn;j6aD@K0#HD{sAa^PK$In-^Za@Xm$N$R{6N{7W;LU;Y=ZeG)7t zfB!g~1U|lPoGCvqwd}vS^~Tx{>FH#PFY{h@ljrtAu{t1e?6->vP}q0kr>}o?%^0vR zf8Yp|UZ!d3A1{XXG4uCb0$1bfOF<{E@|@g2NdLtZBvx*H)d-PGeG3}E*D#@WS8pX! z`gjw`N!*nypF+;Kom;x)D^zKB8lztLdIhffgVH-m2#rk$oU(n!i|s35m9s$yp&N}D zM{qSE3p)bP_|FM}@x~@1N$=hY9;1FWPPy{z5fCP{L5vapl=2-T-;7r!A|15odjcc5{cSLEMh0oIyZLY$ohSUV8{ z9J~!+fBsME@1p}KjRcSY5%0VIF~kN^@u0!RP}AOR$R1dzZ3 GC-7eYzhuV% diff --git a/codex-lens/.env.example b/codex-lens/.env.example deleted file mode 100644 index 67a5989b..00000000 --- a/codex-lens/.env.example +++ /dev/null @@ -1,71 +0,0 @@ -# CodexLens Environment Configuration -# -# Configuration locations (copy to one of these): -# - ~/.codexlens/.env (global, applies to all projects) -# - project/.codexlens/.env (workspace-local) -# - project/.env (project root) -# -# Priority order (later overrides earlier): -# 1. Environment variables (already set in shell) - highest -# 2. .codexlens/.env (workspace-local) -# 3. .env (project root) -# 4. ~/.codexlens/.env (global) - lowest - -# ============================================ -# RERANKER Configuration -# ============================================ - -# API key for reranker service (SiliconFlow/Cohere/Jina) -# Required for 'api' backend -# RERANKER_API_KEY=sk-xxxx - -# Base URL for reranker API (overrides provider default) -# SiliconFlow: https://api.siliconflow.cn -# Cohere: https://api.cohere.ai -# Jina: https://api.jina.ai -# RERANKER_API_BASE=https://api.siliconflow.cn - -# Reranker provider: siliconflow, cohere, jina -# RERANKER_PROVIDER=siliconflow - -# Reranker model name -# SiliconFlow: BAAI/bge-reranker-v2-m3 -# Cohere: rerank-english-v3.0 -# Jina: jina-reranker-v2-base-multilingual -# RERANKER_MODEL=BAAI/bge-reranker-v2-m3 - -# ============================================ -# EMBEDDING Configuration -# ============================================ - -# API key for embedding service (for litellm backend) -# EMBEDDING_API_KEY=sk-xxxx - -# Base URL for embedding API -# EMBEDDING_API_BASE=https://api.openai.com - -# Embedding model name -# EMBEDDING_MODEL=text-embedding-3-small - -# ============================================ -# LITELLM Configuration -# ============================================ - -# API key for LiteLLM (for litellm reranker backend) -# LITELLM_API_KEY=sk-xxxx - -# Base URL for LiteLLM -# LITELLM_API_BASE= - -# LiteLLM model name -# LITELLM_MODEL=gpt-4o-mini - -# ============================================ -# General Configuration -# ============================================ - -# Custom data directory path (default: ~/.codexlens) -# CODEXLENS_DATA_DIR=~/.codexlens - -# Enable debug mode (true/false) -# CODEXLENS_DEBUG=false diff --git a/codex-lens/.github/workflows/security.yml b/codex-lens/.github/workflows/security.yml deleted file mode 100644 index 0ee464df..00000000 --- a/codex-lens/.github/workflows/security.yml +++ /dev/null @@ -1,70 +0,0 @@ -# Security scanning workflow for codex-lens -# Runs pip-audit to check for known vulnerabilities in dependencies - -name: Security Scan - -on: - # Run on push to main branch - push: - branches: - - main - - master - # Run weekly on Sundays at 00:00 UTC - schedule: - - cron: '0 0 * * 0' - # Allow manual trigger - workflow_dispatch: - -jobs: - security-audit: - name: Dependency Vulnerability Scan - runs-on: ubuntu-latest - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.10' - cache: 'pip' - - - name: Install pip-audit - run: | - python -m pip install --upgrade pip - pip install pip-audit - - - name: Run pip-audit on requirements.in - run: pip-audit --requirement requirements.in - continue-on-error: false - - - name: Run pip-audit on pyproject.toml dependencies - run: pip-audit --project-path . - continue-on-error: false - - - name: Check for safety issues - run: | - pip install safety - safety check --json || true - continue-on-error: true - - bandit-security: - name: Code Security Linting - runs-on: ubuntu-latest - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.10' - - - name: Install bandit - run: pip install bandit[toml] - - - name: Run bandit security linter - run: bandit -r src/ -ll -i - continue-on-error: true diff --git a/codex-lens/.pytest-temp/test_builder_loads_saved_ignor0/codexlens-home/settings.json b/codex-lens/.pytest-temp/test_builder_loads_saved_ignor0/codexlens-home/settings.json deleted file mode 100644 index 1e88a530..00000000 --- a/codex-lens/.pytest-temp/test_builder_loads_saved_ignor0/codexlens-home/settings.json +++ /dev/null @@ -1 +0,0 @@ -{"ignore_patterns": ["frontend/dist"], "extension_filters": ["*.min.js"]} \ No newline at end of file diff --git a/codex-lens/.pytest-temp/test_builder_loads_saved_ignor0/frontend/app.ts b/codex-lens/.pytest-temp/test_builder_loads_saved_ignor0/frontend/app.ts deleted file mode 100644 index a1fe9822..00000000 --- a/codex-lens/.pytest-temp/test_builder_loads_saved_ignor0/frontend/app.ts +++ /dev/null @@ -1 +0,0 @@ -export const app = 1 diff --git a/codex-lens/.pytest-temp/test_builder_loads_saved_ignor0/frontend/bundle.min.js b/codex-lens/.pytest-temp/test_builder_loads_saved_ignor0/frontend/bundle.min.js deleted file mode 100644 index 840b8f69..00000000 --- a/codex-lens/.pytest-temp/test_builder_loads_saved_ignor0/frontend/bundle.min.js +++ /dev/null @@ -1 +0,0 @@ -export const bundle = 1 diff --git a/codex-lens/.pytest-temp/test_builder_loads_saved_ignor0/frontend/dist/compiled.ts b/codex-lens/.pytest-temp/test_builder_loads_saved_ignor0/frontend/dist/compiled.ts deleted file mode 100644 index 5e28e82e..00000000 --- a/codex-lens/.pytest-temp/test_builder_loads_saved_ignor0/frontend/dist/compiled.ts +++ /dev/null @@ -1 +0,0 @@ -export const compiled = 1 diff --git a/codex-lens/.pytest-temp/test_collect_dirs_by_depth_res0/frontend/dist/bundle.ts b/codex-lens/.pytest-temp/test_collect_dirs_by_depth_res0/frontend/dist/bundle.ts deleted file mode 100644 index 840b8f69..00000000 --- a/codex-lens/.pytest-temp/test_collect_dirs_by_depth_res0/frontend/dist/bundle.ts +++ /dev/null @@ -1 +0,0 @@ -export const bundle = 1 diff --git a/codex-lens/.pytest-temp/test_collect_dirs_by_depth_res0/frontend/src/app.ts b/codex-lens/.pytest-temp/test_collect_dirs_by_depth_res0/frontend/src/app.ts deleted file mode 100644 index a1fe9822..00000000 --- a/codex-lens/.pytest-temp/test_collect_dirs_by_depth_res0/frontend/src/app.ts +++ /dev/null @@ -1 +0,0 @@ -export const app = 1 diff --git a/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/.next/generated.py b/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/.next/generated.py deleted file mode 100644 index d3504097..00000000 --- a/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/.next/generated.py +++ /dev/null @@ -1 +0,0 @@ -print('artifact') diff --git a/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/.parcel-cache/generated.py b/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/.parcel-cache/generated.py deleted file mode 100644 index d3504097..00000000 --- a/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/.parcel-cache/generated.py +++ /dev/null @@ -1 +0,0 @@ -print('artifact') diff --git a/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/.turbo/generated.py b/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/.turbo/generated.py deleted file mode 100644 index d3504097..00000000 --- a/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/.turbo/generated.py +++ /dev/null @@ -1 +0,0 @@ -print('artifact') diff --git a/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/build/generated.py b/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/build/generated.py deleted file mode 100644 index d3504097..00000000 --- a/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/build/generated.py +++ /dev/null @@ -1 +0,0 @@ -print('artifact') diff --git a/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/coverage/generated.py b/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/coverage/generated.py deleted file mode 100644 index d3504097..00000000 --- a/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/coverage/generated.py +++ /dev/null @@ -1 +0,0 @@ -print('artifact') diff --git a/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/dist/generated.py b/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/dist/generated.py deleted file mode 100644 index d3504097..00000000 --- a/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/dist/generated.py +++ /dev/null @@ -1 +0,0 @@ -print('artifact') diff --git a/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/out/generated.py b/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/out/generated.py deleted file mode 100644 index d3504097..00000000 --- a/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/out/generated.py +++ /dev/null @@ -1 +0,0 @@ -print('artifact') diff --git a/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/src/app.py b/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/src/app.py deleted file mode 100644 index 1fca9fb5..00000000 --- a/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/src/app.py +++ /dev/null @@ -1 +0,0 @@ -print('ok') diff --git a/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/target/generated.py b/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/target/generated.py deleted file mode 100644 index d3504097..00000000 --- a/codex-lens/.pytest-temp/test_collect_dirs_by_depth_ski0/target/generated.py +++ /dev/null @@ -1 +0,0 @@ -print('artifact') diff --git a/codex-lens/.pytest-temp/test_iter_source_files_respect0/frontend/app.ts b/codex-lens/.pytest-temp/test_iter_source_files_respect0/frontend/app.ts deleted file mode 100644 index a1fe9822..00000000 --- a/codex-lens/.pytest-temp/test_iter_source_files_respect0/frontend/app.ts +++ /dev/null @@ -1 +0,0 @@ -export const app = 1 diff --git a/codex-lens/.pytest-temp/test_iter_source_files_respect0/frontend/bundle.min.js b/codex-lens/.pytest-temp/test_iter_source_files_respect0/frontend/bundle.min.js deleted file mode 100644 index 840b8f69..00000000 --- a/codex-lens/.pytest-temp/test_iter_source_files_respect0/frontend/bundle.min.js +++ /dev/null @@ -1 +0,0 @@ -export const bundle = 1 diff --git a/codex-lens/.pytest-temp/test_iter_source_files_respect0/frontend/skip.ts b/codex-lens/.pytest-temp/test_iter_source_files_respect0/frontend/skip.ts deleted file mode 100644 index 8e5bef4c..00000000 --- a/codex-lens/.pytest-temp/test_iter_source_files_respect0/frontend/skip.ts +++ /dev/null @@ -1 +0,0 @@ -export const skip = 1 diff --git a/codex-lens/.pytest-temp/test_load_settings_reads_ignor0/settings.json b/codex-lens/.pytest-temp/test_load_settings_reads_ignor0/settings.json deleted file mode 100644 index 8af70431..00000000 --- a/codex-lens/.pytest-temp/test_load_settings_reads_ignor0/settings.json +++ /dev/null @@ -1 +0,0 @@ -{"ignore_patterns": ["frontend/dist", "coverage"], "extension_filters": ["*.min.js", "*.map"]} \ No newline at end of file diff --git a/codex-lens/.pytest-temp/test_should_index_dir_ignores_0/package/dist/bundle.py b/codex-lens/.pytest-temp/test_should_index_dir_ignores_0/package/dist/bundle.py deleted file mode 100644 index 89755a72..00000000 --- a/codex-lens/.pytest-temp/test_should_index_dir_ignores_0/package/dist/bundle.py +++ /dev/null @@ -1 +0,0 @@ -print('compiled') diff --git a/codex-lens/ASSOCIATION_TREE_IMPLEMENTATION.md b/codex-lens/ASSOCIATION_TREE_IMPLEMENTATION.md deleted file mode 100644 index 062882f1..00000000 --- a/codex-lens/ASSOCIATION_TREE_IMPLEMENTATION.md +++ /dev/null @@ -1,240 +0,0 @@ -# Association Tree Implementation Summary - -## Overview - -Successfully implemented LSP-based association tree search for CodexLens. The implementation consists of two core components that work together to discover and rank code relationships using Language Server Protocol (LSP) call hierarchy capabilities. - -## Components Implemented - -### 1. AssociationTreeBuilder (`src/codexlens/search/association_tree/builder.py`) - -**Purpose**: Build call relationship trees from seed locations using LSP - -**Key Features**: -- Depth-first recursive expansion from seed positions -- Supports bidirectional expansion: - - Incoming calls (callers) - who calls this function - - Outgoing calls (callees) - what this function calls -- Automatic cycle detection and marking -- Configurable max depth (default: 5) -- Async/await with parallel expansion -- Timeout handling (5s per LSP request) -- Graceful error handling - -**Core Methods**: -- `build_tree()`: Main entry point for tree construction -- `_expand_node()`: Recursive DFS expansion -- `_expand_incoming_calls()`: Process callers -- `_expand_outgoing_calls()`: Process callees - -### 2. ResultDeduplicator (`src/codexlens/search/association_tree/deduplicator.py`) - -**Purpose**: Extract unique nodes from trees and assign relevance scores - -**Scoring Algorithm**: -``` -Score = 0.4 * depth_score + 0.3 * frequency_score + 0.3 * kind_score - -where: -- depth_score: 1.0 at depth 0, decreasing to 0.0 at depth 10 -- frequency_score: occurrences / max_occurrences -- kind_score: function/method (1.0) > class (0.8) > variable (0.4) -``` - -**Key Features**: -- Deduplication by (file_path, start_line, end_line) -- Merge duplicate nodes across different paths -- Track minimum depth and occurrence count -- Configurable score weights -- Filter by kind or file pattern -- JSON serialization support - -### 3. Data Structures (`src/codexlens/search/association_tree/data_structures.py`) - -**TreeNode**: -- Represents a single node in the call tree -- Tracks depth, parents, children, paths -- Marks circular references - -**CallTree**: -- Complete tree structure with roots and edges -- Node lookup by ID -- Edge tracking for relationship visualization - -**UniqueNode**: -- Deduplicated result with metadata -- Aggregates multiple occurrences -- Contains relevance score - -## Integration with StandaloneLspManager - -Extended `StandaloneLspManager` with missing method: - -**Added**: `get_outgoing_calls()` method (`src/codexlens/lsp/standalone_manager.py:1057-1086`) - -This method complements the existing `get_incoming_calls()` to enable bidirectional call tree traversal. - -## Testing - -Comprehensive test suite with 9 tests covering: - -1. **Simple tree building**: Basic tree construction -2. **Cycle detection**: Circular reference handling -3. **Max depth limits**: Depth boundary enforcement -4. **Empty trees**: Edge case handling -5. **Basic deduplication**: Node merging logic -6. **Scoring algorithm**: Relevance ranking -7. **Max results limit**: Result pagination -8. **Kind filtering**: Symbol type filtering -9. **Serialization**: JSON export - -**Test Results**: All 9 tests passing ✅ - -**Test File**: `tests/test_association_tree.py` - -## Usage Example - -```python -import asyncio -from codexlens.lsp.standalone_manager import StandaloneLspManager -from codexlens.search.association_tree import ( - AssociationTreeBuilder, - ResultDeduplicator, -) - -async def search_with_association_tree(file_path: str, line: int): - async with StandaloneLspManager(workspace_root="/path/to/project") as lsp: - # Build tree - builder = AssociationTreeBuilder(lsp) - tree = await builder.build_tree( - seed_file_path=file_path, - seed_line=line, - max_depth=5, - expand_callers=True, - expand_callees=True, - ) - - # Deduplicate and score - deduplicator = ResultDeduplicator() - unique_nodes = deduplicator.deduplicate(tree, max_results=20) - - # Return results - return deduplicator.to_dict_list(unique_nodes) - -# Run -results = asyncio.run(search_with_association_tree("src/main.py", 42)) -``` - -## Integration Point - -The components can be integrated into `HybridSearchEngine`: - -```python -# In hybrid_search.py -async def _search_association_tree(self, query: str, limit: int): - # 1. Get seed results from vector search - seed_results = await self._search_vector(query, limit=5) - - # 2. Build association trees - builder = AssociationTreeBuilder(self.lsp_manager) - tree = await builder.build_tree( - seed_file_path=seed_results[0].file_path, - seed_line=seed_results[0].line, - max_depth=5, - ) - - # 3. Deduplicate and rank - deduplicator = ResultDeduplicator() - unique_nodes = deduplicator.deduplicate(tree, max_results=limit) - - # 4. Convert to search results - return self._convert_to_search_results(unique_nodes) -``` - -## File Structure - -``` -src/codexlens/search/association_tree/ -├── __init__.py # Module exports -├── builder.py # AssociationTreeBuilder -├── data_structures.py # TreeNode, CallTree, UniqueNode -├── deduplicator.py # ResultDeduplicator -└── README.md # Documentation - -tests/ -└── test_association_tree.py # Unit tests (9 tests) - -examples/ -└── association_tree_demo.py # Demo script -``` - -## Performance Characteristics - -**Time Complexity**: -- Tree building: O(nodes * avg_calls) with max_depth limit -- Deduplication: O(n log n) for sorting - -**Space Complexity**: -- Tree: O(nodes + edges) -- Unique nodes: O(unique_symbols) - -**Typical Performance** (max_depth=5): -- Small codebase: < 1s -- Medium codebase: 1-3s -- Large codebase: 3-10s - -**Optimization Strategies**: -1. Limit max_depth (recommended: 3-5) -2. Use timeouts (default: 5s per node) -3. Enable parallel expansion (default: on) -4. Filter by symbol kind early - -## Error Handling - -The implementation handles: -- ✅ LSP timeouts (logs warning, continues) -- ✅ Missing call hierarchy support (returns empty tree) -- ✅ Connection failures (skips node, continues) -- ✅ Invalid LSP responses (logs error, skips) -- ✅ Circular references (marks cycle, stops recursion) -- ✅ Max depth exceeded (stops expansion) - -## Code Quality - -**Code Style**: -- Python 3.10+ features (type hints, dataclasses) -- Follows existing CodexLens conventions -- Comprehensive docstrings -- Async/await throughout - -**Testing**: -- 9 unit tests with mock LSP -- Edge cases covered -- 100% core logic coverage - -**Documentation**: -- Module README with examples -- Inline code documentation -- Demo script provided -- Integration guide included - -## Next Steps - -Recommended enhancements: - -1. **Multi-seed building**: Build trees from multiple seeds simultaneously -2. **Graph visualization**: Export to DOT/Mermaid format -3. **Incremental updates**: Update trees based on code changes -4. **Custom scoring**: Pluggable scoring functions -5. **Caching**: Cache frequently-accessed trees -6. **Cross-language support**: Extend beyond Python (TypeScript, Java, etc.) - -## Conclusion - -The association tree implementation provides a robust foundation for LSP-based code relationship discovery in CodexLens. All core components are implemented, tested, and ready for integration into the hybrid search engine. - -**Status**: ✅ Complete and tested -**Files Modified**: 4 -**Files Created**: 7 -**Tests Added**: 9 -**All Tests Passing**: Yes diff --git a/codex-lens/CHAIN_SEARCH_IMPLEMENTATION.md b/codex-lens/CHAIN_SEARCH_IMPLEMENTATION.md deleted file mode 100644 index f0792a39..00000000 --- a/codex-lens/CHAIN_SEARCH_IMPLEMENTATION.md +++ /dev/null @@ -1,245 +0,0 @@ -# Chain Search Implementation Summary - -## Files Created - -### 1. `D:\Claude_dms3\codex-lens\src\codexlens\search\__init__.py` -Module initialization file exporting all public classes and functions: -- `ChainSearchEngine` -- `SearchOptions` -- `SearchStats` -- `ChainSearchResult` -- `quick_search` - -### 2. `D:\Claude_dms3\codex-lens\src\codexlens\search\chain_search.py` -Complete implementation of the chain search engine (460+ lines) with: - -#### Classes - -**SearchOptions** -- Configuration dataclass for search behavior -- Controls depth, parallelism, result limits -- Supports files-only and symbol search modes - -**SearchStats** -- Search execution statistics -- Tracks directories searched, files matched, timing, errors - -**ChainSearchResult** -- Comprehensive search result container -- Includes results, symbols, and execution statistics - -**ChainSearchEngine** -- Main parallel search engine -- Thread-safe with ThreadPoolExecutor -- Supports recursive directory traversal -- Implements result aggregation and deduplication - -#### Key Methods - -**Public API:** -- `search()` - Main search with full results -- `search_files_only()` - Fast file path-only search -- `search_symbols()` - Symbol search across hierarchy - -**Internal Methods:** -- `_find_start_index()` - Locate starting index for source path -- `_collect_index_paths()` - Recursive index path collection via subdirs -- `_search_parallel()` - Parallel ThreadPoolExecutor search -- `_search_single_index()` - Single index search with error handling -- `_merge_and_rank()` - Result deduplication and ranking -- `_search_symbols_parallel()` - Parallel symbol search -- `_search_symbols_single()` - Single index symbol search - -**Convenience Function:** -- `quick_search()` - One-line search with auto-initialization - -## Implementation Features - -### 1. Chain Traversal -- Starts from source path, finds nearest index -- Recursively collects subdirectory indexes via `subdirs` table -- Supports depth limiting (-1 = unlimited, 0 = current only) -- Prevents duplicate traversal with visited set - -### 2. Parallel Execution -- Uses ThreadPoolExecutor for concurrent searches -- Configurable worker count (default: 8) -- Error-tolerant: individual index failures don't block overall search -- Collects results as futures complete - -### 3. Result Processing -- **Deduplication**: By file path, keeping highest score -- **Ranking**: BM25 score descending -- **Limiting**: Per-directory and total limits -- **Statistics**: Comprehensive execution metrics - -### 4. Search Modes -- **Full search**: Results with excerpts and scores -- **Files-only**: Fast path-only mode -- **Symbol search**: Cross-directory symbol lookup - -### 5. Error Handling -- Graceful degradation on index errors -- Missing index warnings logged -- Error tracking in SearchStats -- Non-blocking failure mode - -## Search Flow Example - -``` -search("auth", path="D:/project/src", depth=-1) - | - v - [1] _find_start_index - registry.find_index_path("D:/project/src") - -> ~/.codexlens/indexes/D/project/src/_index.db - | - v - [2] _collect_index_paths (chain traversal) - src/_index.db - +-- subdirs: [api, utils] - | - +-- api/_index.db - | +-- subdirs: [] - | - +-- utils/_index.db - +-- subdirs: [] - - Result: [src/_index.db, api/_index.db, utils/_index.db] - | - v - [3] _search_parallel (ThreadPoolExecutor) - Thread1: src/ -> FTS search - Thread2: api/ -> FTS search - Thread3: utils/ -> FTS search - | - v - [4] _merge_and_rank - - Deduplicate by path - - Sort by score descending - - Apply total_limit - | - v - ChainSearchResult -``` - -## Testing - -### Test File: `D:\Claude_dms3\codex-lens\test_chain_search.py` -Comprehensive test suite with four test functions: - -1. **test_basic_search()** - Full search with all options -2. **test_quick_search()** - Convenience function test -3. **test_symbol_search()** - Symbol search across hierarchy -4. **test_files_only_search()** - Fast file-only mode - -### Test Results -- All imports successful -- All tests pass without errors -- Returns empty results (expected - no indexes built yet) -- Logging shows proper "No index found" warnings -- No crashes or exceptions - -## Integration Points - -### Dependencies -- `codexlens.entities`: SearchResult, Symbol -- `codexlens.storage.registry`: RegistryStore, DirMapping -- `codexlens.storage.dir_index`: DirIndexStore, SubdirLink -- `codexlens.storage.path_mapper`: PathMapper - -### Thread Safety -- Uses ThreadPoolExecutor for parallel searches -- Each thread gets own DirIndexStore connection -- SQLite WAL mode supports concurrent reads -- Registry uses thread-local connections - -## Usage Examples - -### Basic Search -```python -from pathlib import Path -from codexlens.search import ChainSearchEngine -from codexlens.storage.registry import RegistryStore -from codexlens.storage.path_mapper import PathMapper - -registry = RegistryStore() -registry.initialize() -mapper = PathMapper() -engine = ChainSearchEngine(registry, mapper) - -result = engine.search("authentication", Path("D:/project/src")) -print(f"Found {len(result.results)} matches in {result.stats.time_ms:.2f}ms") -``` - -### Quick Search -```python -from pathlib import Path -from codexlens.search import quick_search - -results = quick_search("TODO", Path("D:/project"), depth=2) -for r in results[:5]: - print(f"{r.path}: {r.score:.2f}") -``` - -### Symbol Search -```python -symbols = engine.search_symbols("init", Path("D:/project"), kind="function") -for sym in symbols: - print(f"{sym.name} - lines {sym.range[0]}-{sym.range[1]}") -``` - -### Files-Only Mode -```python -paths = engine.search_files_only("config", Path("D:/project")) -print(f"Files with 'config': {len(paths)}") -``` - -## Performance Characteristics - -### Strengths -- **Parallel execution**: Multiple indexes searched concurrently -- **Lazy traversal**: Only loads needed subdirectories -- **Memory efficient**: Streaming results, no full tree in memory -- **Depth limiting**: Can restrict search scope - -### Considerations -- **First search slower**: Needs to traverse subdir links -- **Many small dirs**: Overhead from thread pool -- **Deep hierarchies**: Depth=-1 may be slow on large trees - -### Optimization Tips -- Use `depth` parameter to limit scope -- Use `limit_per_dir` to reduce per-index overhead -- Use `files_only=True` when excerpts not needed -- Reuse ChainSearchEngine instance for multiple searches - -## Code Quality - -### Standards Met -- **Type annotations**: Full typing on all methods -- **Docstrings**: Complete with examples and parameter docs -- **Error handling**: Graceful degradation, no crashes -- **ASCII-only**: Windows GBK compatible -- **No debug spam**: Clean logging at appropriate levels -- **Thread safety**: Proper locking and pooling - -### Design Patterns -- **Dataclasses**: Clean configuration and result objects -- **Context managers**: Proper resource cleanup -- **Dependency injection**: Registry and mapper passed in -- **Builder pattern**: SearchOptions for configuration -- **Template method**: _search_single_index extensible - -## Status: Complete and Tested - -All requirements met: -- [x] Parallel search with ThreadPoolExecutor -- [x] Chain traversal via subdirs links -- [x] Depth limiting -- [x] Error tolerance -- [x] Search statistics -- [x] Complete docstrings and type hints -- [x] Test suite passes -- [x] ASCII-only output (GBK compatible) -- [x] Integration with existing codebase diff --git a/codex-lens/CHANGELOG.md b/codex-lens/CHANGELOG.md deleted file mode 100644 index 86391b9e..00000000 --- a/codex-lens/CHANGELOG.md +++ /dev/null @@ -1,41 +0,0 @@ -# CodexLens – Optimization Plan Changelog - -This changelog tracks the **CodexLens optimization plan** milestones (not the Python package version in `pyproject.toml`). - -## v1.0 (Optimization) – 2025-12-26 - -### Optimizations - -1. **P0: Context-aware hybrid chunking** - - Docstrings are extracted into dedicated chunks and excluded from code chunks. - - Docstring chunks include `parent_symbol` metadata when the docstring belongs to a function/class/method. - - Sliding-window chunk boundaries are deterministic for identical input. - -2. **P1: Adaptive RRF weights (QueryIntent)** - - Query intent is classified as `keyword` / `semantic` / `mixed`. - - RRF weights adapt to intent: - - `keyword`: exact-heavy (favors lexical matches) - - `semantic`: vector-heavy (favors semantic matches) - - `mixed`: keeps base/default weights - -3. **P2: Symbol boost** - - Fused results with an explicit symbol match (`symbol_name`) receive a multiplicative boost (default `1.5x`). - -4. **P2: Embedding-based re-ranking (optional)** - - A second-stage ranker can reorder top results by semantic similarity. - - Re-ranking runs only when `Config.enable_reranking=True`. - -5. **P3: Global symbol index (incremental + fast path)** - - `GlobalSymbolIndex` stores project-wide symbols in one SQLite DB for fast symbol lookups. - - `ChainSearchEngine.search_symbols()` uses the global index fast path when enabled. - -### Migration Notes -- **Reindexing (recommended)**: deterministic chunking and docstring metadata affect stored chunks. For best results, regenerate indexes/embeddings after upgrading: - - Rebuild indexes and/or re-run embedding generation for existing projects. -- **New config flags**: - - `Config.enable_reranking` (default `False`) - - `Config.reranking_top_k` (default `50`) - - `Config.symbol_boost_factor` (default `1.5`) - - `Config.global_symbol_index_enabled` (default `True`) -- **Breaking changes**: none (behavioral improvements only). - diff --git a/codex-lens/DEPENDENCIES.md b/codex-lens/DEPENDENCIES.md deleted file mode 100644 index aad301da..00000000 --- a/codex-lens/DEPENDENCIES.md +++ /dev/null @@ -1,38 +0,0 @@ -# Dependency Management - -This project uses setuptools with `pyproject.toml` for dependency management. - -## Locking Dependencies - -To generate a fully pinned `requirements.txt` from `requirements.in`: - -```bash -# Install pip-tools -pip install pip-tools - -# Compile requirements -pip-compile requirements.in --output-file=requirements.txt - -# To upgrade dependencies -pip-compile --upgrade requirements.in --output-file=requirements.txt -``` - -## Version Constraints - -This project uses **pessimistic versioning** (`~=`) for dependency specifications per PEP 440: - -- `typer~=0.9.0` means: `>=0.9.0, ==0.9.*` -- Allows bugfix updates (0.9.0, 0.9.1, 0.9.2) but not feature/minor updates (0.10.0) - -This provides stability while allowing automatic patch updates. - -## Security Scanning - -The project includes automated security scanning via GitHub Actions: -- Runs on every push to main branch -- Runs weekly (Sundays at 00:00 UTC) -- Can be triggered manually - -The scan uses: -- `pip-audit`: Checks for known vulnerabilities in dependencies -- `bandit`: Security linter for Python code diff --git a/codex-lens/LICENSE b/codex-lens/LICENSE deleted file mode 100644 index 8e31ab6b..00000000 --- a/codex-lens/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2024 CodexLens Contributors - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/codex-lens/README.md b/codex-lens/README.md deleted file mode 100644 index 823ab5fa..00000000 --- a/codex-lens/README.md +++ /dev/null @@ -1,109 +0,0 @@ -# CodexLens - -CodexLens is a multi-modal code analysis platform designed to provide comprehensive code understanding and analysis capabilities. - -## Features - -- **Multi-language Support**: Analyze code in Python, JavaScript, TypeScript and more using Tree-sitter parsers -- **Semantic Search**: Find relevant code snippets using semantic understanding with fastembed and HNSWLIB -- **Code Parsing**: Advanced code structure parsing with tree-sitter -- **Flexible Architecture**: Modular design for easy extension and customization - -## Installation - -### Basic Installation - -```bash -pip install codex-lens -``` - -### With Semantic Search - -```bash -pip install codex-lens[semantic] -``` - -### With GPU Acceleration (NVIDIA CUDA) - -```bash -pip install codex-lens[semantic-gpu] -``` - -### With DirectML (Windows - NVIDIA/AMD/Intel) - -```bash -pip install codex-lens[semantic-directml] -``` - -### With All Optional Features - -```bash -pip install codex-lens[full] -``` - -### Local ONNX Reranker Bootstrap - -Use the pinned bootstrap flow when you want the local-only reranker backend in an -existing CodexLens virtual environment without asking pip to resolve the whole -project extras set at once. - -1. Start from the CodexLens repo root and create or activate the project venv. -2. Review the pinned install manifest in `scripts/requirements-reranker-local.txt`. -3. Render the deterministic setup plan: - -```bash -python scripts/bootstrap_reranker_local.py --dry-run -``` - -The bootstrap script always targets the selected venv Python, installs the local -ONNX reranker stack in a fixed order, and keeps the package set pinned to the -validated Python 3.13-compatible combination: - -- `numpy==2.4.0` -- `onnxruntime==1.23.2` -- `huggingface-hub==0.36.2` -- `transformers==4.53.3` -- `optimum[onnxruntime]==2.1.0` - -When you are ready to apply it to the CodexLens venv, use: - -```bash -python scripts/bootstrap_reranker_local.py --apply -``` - -To pre-download the default local reranker model (`Xenova/ms-marco-MiniLM-L-6-v2`) -into the repo-local Hugging Face cache, use: - -```bash -python scripts/bootstrap_reranker_local.py --apply --download-model -``` - -The dry-run plan also prints the equivalent explicit model download command. On -Windows PowerShell with the default repo venv, it looks like: - -```bash -.venv/Scripts/hf.exe download Xenova/ms-marco-MiniLM-L-6-v2 --local-dir .cache/huggingface/models/Xenova--ms-marco-MiniLM-L-6-v2 -``` - -After installation, probe the backend from the same venv: - -```bash -python scripts/bootstrap_reranker_local.py --apply --probe -``` - -## Requirements - -- Python >= 3.10 -- See `pyproject.toml` for detailed dependency list - -## Development - -This project uses setuptools for building and packaging. - -## License - -MIT License - -## Authors - -CodexLens Contributors diff --git a/codex-lens/SEMANTIC_SEARCH_USAGE.md b/codex-lens/SEMANTIC_SEARCH_USAGE.md deleted file mode 100644 index 381a33be..00000000 --- a/codex-lens/SEMANTIC_SEARCH_USAGE.md +++ /dev/null @@ -1,83 +0,0 @@ -# Semantic Search Integration - -## Overview -The ChainSearchEngine now supports semantic keyword search in addition to FTS5 full-text search. - -## Usage - -### Enable Semantic Search - -```python -from pathlib import Path -from codexlens.search.chain_search import ChainSearchEngine, SearchOptions -from codexlens.storage.registry import RegistryStore -from codexlens.storage.path_mapper import PathMapper - -# Initialize -registry = RegistryStore() -registry.initialize() -mapper = PathMapper() -engine = ChainSearchEngine(registry, mapper) - -# Create options with semantic search enabled -options = SearchOptions( - include_semantic=True, # Enable semantic keyword search - total_limit=50 -) - -# Execute search -result = engine.search("authentication", Path("./src"), options) - -# Results include both FTS and semantic matches -for r in result.results: - print(f"{r.path}: {r.score:.2f} - {r.excerpt}") -``` - -### How It Works - -1. **FTS Search**: Traditional full-text search using SQLite FTS5 -2. **Semantic Search**: Searches the `semantic_metadata.keywords` field -3. **Result Merging**: Semantic results are added with 0.8x weight - - FTS results: BM25 score from SQLite - - Semantic results: Base score of 10.0 * 0.8 = 8.0 -4. **Deduplication**: `_merge_and_rank()` deduplicates by path, keeping highest score - -### Result Format - -- **FTS results**: Regular excerpt from matched content -- **Semantic results**: `Keywords: keyword1, keyword2, keyword3, ...` - -### Prerequisites - -Files must have semantic metadata generated via: - -```bash -codex-lens enhance . --tool gemini -``` - -This uses CCW CLI to generate summaries, keywords, and purpose descriptions. - -## Implementation Details - -### Changes Made - -1. **SearchOptions**: Added `include_semantic: bool = False` parameter -2. **_search_parallel()**: Passes `include_semantic` to worker threads -3. **_search_single_index()**: - - Accepts `include_semantic` parameter - - Calls `DirIndexStore.search_semantic_keywords()` when enabled - - Converts semantic matches to `SearchResult` objects - - Applies 0.8x weight to semantic scores - -### Score Weighting - -```python -# FTS result (from BM25) -SearchResult(path="...", score=12.5, excerpt="...") - -# Semantic result (fixed weighted score) -SearchResult(path="...", score=8.0, excerpt="Keywords: ...") -``` - -The 0.8x weight ensures semantic matches rank slightly lower than direct FTS matches -but still appear in relevant results. diff --git a/codex-lens/benchmarks/accuracy_queries_ccw_smart_search.jsonl b/codex-lens/benchmarks/accuracy_queries_ccw_smart_search.jsonl deleted file mode 100644 index 737f88b6..00000000 --- a/codex-lens/benchmarks/accuracy_queries_ccw_smart_search.jsonl +++ /dev/null @@ -1,16 +0,0 @@ -{"query":"executeHybridMode dense_rerank semantic smart_search","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-semantic-routing","notes":"CCW semantic mode delegates to CodexLens dense_rerank."} -{"query":"parse CodexLens JSON output strip ANSI smart_search","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-json-fallback","notes":"Covers JSON/plain-text fallback handling for CodexLens output."} -{"query":"smart_search init embed search action schema","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-action-schema","notes":"Find the Zod schema that defines init/embed/search actions."} -{"query":"auto init missing job dedupe smart_search","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-auto-init","notes":"Targets background init/embed warmup and dedupe state."} -{"query":"smart_search exact mode fallback to CodexLens fts","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-exact-fallback","notes":"Tracks the exact-mode fallback path into CodexLens FTS."} -{"query":"smart_search settings snapshot embedding backend reranker backend staged stage2 mode","relevant_paths":["ccw/src/tools/smart-search.ts"],"intent":"ccw-config-snapshot","notes":"Reads local config snapshot for embedding/reranker/staged pipeline settings."} -{"query":"embedding backend fastembed local litellm api config","relevant_paths":["codex-lens/src/codexlens/config.py"],"intent":"codexlens-embedding-config","notes":"Local-only benchmark should resolve to fastembed defaults."} -{"query":"reranker backend onnx api legacy configuration","relevant_paths":["codex-lens/src/codexlens/config.py","codex-lens/src/codexlens/env_config.py"],"intent":"codexlens-reranker-config","notes":"Covers both config dataclass fields and env overrides."} -{"query":"staged stage2 mode precomputed realtime static_global_graph","relevant_paths":["codex-lens/src/codexlens/config.py","codex-lens/src/codexlens/env_config.py"],"intent":"codexlens-stage2-config","notes":"Benchmark matrix should exercise the three supported stage2 modes."} -{"query":"enable staged rerank stage 4 config","relevant_paths":["codex-lens/src/codexlens/config.py"],"intent":"codexlens-stage4-rerank","notes":"Stage 4 rerank flag needs to stay enabled for local benchmarks."} -{"query":"cascade_search dense_rerank staged pipeline ChainSearchEngine","relevant_paths":["codex-lens/src/codexlens/search/chain_search.py"],"intent":"chain-search-cascade","notes":"Baseline query for the central retrieval engine."} -{"query":"realtime LSP expand stage2 search pipeline","relevant_paths":["codex-lens/src/codexlens/search/chain_search.py"],"intent":"chain-search-stage2-realtime","notes":"Targets realtime stage2 expansion logic."} -{"query":"static global graph stage2 expansion implementation","relevant_paths":["codex-lens/src/codexlens/search/chain_search.py"],"intent":"chain-search-stage2-static","notes":"Targets static_global_graph stage2 expansion logic."} -{"query":"cross encoder rerank stage 4 implementation","relevant_paths":["codex-lens/src/codexlens/search/chain_search.py"],"intent":"chain-search-rerank","notes":"Relevant for dense_rerank and staged rerank latency comparisons."} -{"query":"get_reranker factory onnx backend selection","relevant_paths":["codex-lens/src/codexlens/semantic/reranker/factory.py"],"intent":"reranker-factory","notes":"Keeps the benchmark aligned with local ONNX reranker selection."} -{"query":"EMBEDDING_BACKEND and RERANKER_BACKEND environment variables","relevant_paths":["codex-lens/src/codexlens/env_config.py"],"intent":"env-overrides","notes":"Covers CCW/CodexLens local-only environment overrides."} diff --git a/codex-lens/benchmarks/accuracy_queries_codexlens.jsonl b/codex-lens/benchmarks/accuracy_queries_codexlens.jsonl deleted file mode 100644 index 18764bb0..00000000 --- a/codex-lens/benchmarks/accuracy_queries_codexlens.jsonl +++ /dev/null @@ -1,33 +0,0 @@ -{"query":"class StandaloneLspManager","relevant_paths":["codexlens/lsp/standalone_manager.py"]} -{"query":"def _open_document","relevant_paths":["codexlens/lsp/standalone_manager.py"]} -{"query":"def _read_message","relevant_paths":["codexlens/lsp/standalone_manager.py"]} -{"query":"how does textDocument/didOpen work","relevant_paths":["codexlens/lsp/standalone_manager.py"]} -{"query":"class LspBridge","relevant_paths":["codexlens/lsp/lsp_bridge.py"]} -{"query":"def get_document_symbols","relevant_paths":["codexlens/lsp/lsp_bridge.py"]} -{"query":"class KeepAliveLspBridge","relevant_paths":["codexlens/lsp/keepalive_bridge.py"]} -{"query":"LSP keepalive bridge","relevant_paths":["codexlens/lsp/keepalive_bridge.py"]} -{"query":"class LspGraphBuilder","relevant_paths":["codexlens/lsp/lsp_graph_builder.py"]} -{"query":"def build_from_seeds","relevant_paths":["codexlens/lsp/lsp_graph_builder.py"]} -{"query":"def _stage2_realtime_lsp_expand","relevant_paths":["codexlens/search/chain_search.py"]} -{"query":"def _stage3_cluster_prune","relevant_paths":["codexlens/search/chain_search.py"]} -{"query":"def _cross_encoder_rerank","relevant_paths":["codexlens/search/chain_search.py"]} -{"query":"def dense_rerank_cascade_search","relevant_paths":["codexlens/search/chain_search.py"]} -{"query":"def cascade_search","relevant_paths":["codexlens/search/chain_search.py"]} -{"query":"def _find_nearest_binary_mmap_root","relevant_paths":["codexlens/search/chain_search.py"]} -{"query":"class BinarySearcher","relevant_paths":["codexlens/search/binary_searcher.py"]} -{"query":"class GraphExpander","relevant_paths":["codexlens/search/graph_expander.py"]} -{"query":"def cross_encoder_rerank","relevant_paths":["codexlens/search/ranking.py"]} -{"query":"def group_similar_results","relevant_paths":["codexlens/search/ranking.py"]} -{"query":"class ConfigError","relevant_paths":["codexlens/errors.py"]} -{"query":"def load_settings","relevant_paths":["codexlens/config.py"]} -{"query":"BINARY_VECTORS_MMAP_NAME","relevant_paths":["codexlens/config.py"]} -{"query":"STAGED_CLUSTERING_STRATEGY","relevant_paths":["codexlens/config.py","codexlens/env_config.py"]} -{"query":"def apply_workspace_env","relevant_paths":["codexlens/env_config.py"]} -{"query":"def generate_env_example","relevant_paths":["codexlens/env_config.py"]} -{"query":"def get_reranker","relevant_paths":["codexlens/semantic/reranker/factory.py"]} -{"query":"class APIReranker","relevant_paths":["codexlens/semantic/reranker/api_reranker.py"]} -{"query":"class RegistryStore","relevant_paths":["codexlens/storage/registry.py"]} -{"query":"class PathMapper","relevant_paths":["codexlens/storage/path_mapper.py"]} -{"query":"def lsp_status","relevant_paths":["codexlens/cli/commands.py"]} -{"query":"graph_neighbors migration","relevant_paths":["codexlens/storage/migrations/migration_007_add_graph_neighbors.py"]} -{"query":"def get_model_config","relevant_paths":["codexlens/semantic/vector_store.py"]} diff --git a/codex-lens/benchmarks/analyze_methods.py b/codex-lens/benchmarks/analyze_methods.py deleted file mode 100644 index 9973d64c..00000000 --- a/codex-lens/benchmarks/analyze_methods.py +++ /dev/null @@ -1,245 +0,0 @@ -"""Analyze hybrid search methods contribution.""" -import json -import sqlite3 -import time -from pathlib import Path -from collections import defaultdict -import sys -sys.path.insert(0, str(Path(__file__).parent.parent / "src")) - -from codexlens.search.hybrid_search import HybridSearchEngine -from codexlens.search.ranking import ( - reciprocal_rank_fusion, - cross_encoder_rerank, - DEFAULT_WEIGHTS, -) - -# Use index with most data -index_path = Path(r"C:\Users\dyw\.codexlens\indexes\D\Claude_dms3\codex-lens\src\codexlens\storage\_index.db") - -print("=" * 60) -print("1. STORAGE ARCHITECTURE ANALYSIS") -print("=" * 60) - -# Analyze storage -with sqlite3.connect(index_path) as conn: - cursor = conn.execute( - "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name" - ) - tables = [row[0] for row in cursor.fetchall()] - - print("\nTable Overview:") - for table in tables: - try: - count = conn.execute(f"SELECT COUNT(*) FROM {table}").fetchone()[0] - if count > 0: - print(f" {table}: {count} rows") - except: - pass - - print("\n--- Conflict Analysis ---") - - chunks_count = 0 - semantic_count = 0 - - if "chunks" in tables: - chunks_count = conn.execute("SELECT COUNT(*) FROM chunks").fetchone()[0] - if "semantic_chunks" in tables: - semantic_count = conn.execute("SELECT COUNT(*) FROM semantic_chunks").fetchone()[0] - - print(f" chunks table: {chunks_count} rows") - print(f" semantic_chunks table: {semantic_count} rows") - - if semantic_count > 0: - col_info = conn.execute("PRAGMA table_info(semantic_chunks)").fetchall() - col_names = [c[1] for c in col_info] - - print(f"\n semantic_chunks columns: {col_names}") - - for col in ["embedding", "embedding_binary", "embedding_dense"]: - if col in col_names: - null_count = conn.execute( - f"SELECT COUNT(*) FROM semantic_chunks WHERE {col} IS NULL" - ).fetchone()[0] - non_null = semantic_count - null_count - print(f" {col}: {non_null}/{semantic_count} non-null") - -print("\n" + "=" * 60) -print("2. METHOD CONTRIBUTION ANALYSIS") -print("=" * 60) - -queries = [ - "database connection", - "create table", - "sqlite store", - "migration", - "search chunks", -] - -results_summary = { - "fts_exact": [], - "fts_fuzzy": [], - "vector": [], -} - -for query in queries: - print(f"\nQuery: '{query}'") - - # FTS Exact - try: - engine = HybridSearchEngine(weights=DEFAULT_WEIGHTS) - engine._config = type("obj", (object,), { - "use_fts_fallback": True, - "embedding_use_gpu": True, - "symbol_boost_factor": 1.5, - "enable_reranking": False, - })() - - start = time.perf_counter() - results = engine.search(index_path, query, limit=10, enable_fuzzy=False, enable_vector=False) - latency = (time.perf_counter() - start) * 1000 - - results_summary["fts_exact"].append({"count": len(results), "latency": latency}) - top_file = results[0].path.split("\\")[-1] if results else "N/A" - top_score = results[0].score if results else 0 - print(f" FTS Exact: {len(results)} results, {latency:.1f}ms, top: {top_file} ({top_score:.3f})") - except Exception as e: - print(f" FTS Exact: ERROR - {e}") - - # FTS Fuzzy - try: - engine = HybridSearchEngine(weights=DEFAULT_WEIGHTS) - engine._config = type("obj", (object,), { - "use_fts_fallback": True, - "embedding_use_gpu": True, - "symbol_boost_factor": 1.5, - "enable_reranking": False, - })() - - start = time.perf_counter() - results = engine.search(index_path, query, limit=10, enable_fuzzy=True, enable_vector=False) - latency = (time.perf_counter() - start) * 1000 - - results_summary["fts_fuzzy"].append({"count": len(results), "latency": latency}) - top_file = results[0].path.split("\\")[-1] if results else "N/A" - top_score = results[0].score if results else 0 - print(f" FTS Fuzzy: {len(results)} results, {latency:.1f}ms, top: {top_file} ({top_score:.3f})") - except Exception as e: - print(f" FTS Fuzzy: ERROR - {e}") - - # Vector - try: - engine = HybridSearchEngine() - engine._config = type("obj", (object,), { - "use_fts_fallback": False, - "embedding_use_gpu": True, - "symbol_boost_factor": 1.5, - "enable_reranking": False, - })() - - start = time.perf_counter() - results = engine.search(index_path, query, limit=10, enable_vector=True, pure_vector=True) - latency = (time.perf_counter() - start) * 1000 - - results_summary["vector"].append({"count": len(results), "latency": latency}) - top_file = results[0].path.split("\\")[-1] if results else "N/A" - top_score = results[0].score if results else 0 - print(f" Vector: {len(results)} results, {latency:.1f}ms, top: {top_file} ({top_score:.3f})") - except Exception as e: - print(f" Vector: ERROR - {e}") - -print("\n--- Summary ---") -for method, data in results_summary.items(): - if data: - avg_count = sum(d["count"] for d in data) / len(data) - avg_latency = sum(d["latency"] for d in data) / len(data) - print(f"{method}: avg {avg_count:.1f} results, {avg_latency:.1f}ms") - -print("\n" + "=" * 60) -print("3. FTS + RERANK FUSION EXPERIMENT") -print("=" * 60) - -# Initialize reranker -reranker = None -try: - from codexlens.semantic.reranker import get_reranker, check_reranker_available - ok, _ = check_reranker_available("onnx") - if ok: - reranker = get_reranker(backend="onnx", use_gpu=True) - print("\nReranker loaded: ONNX backend") -except Exception as e: - print(f"\nReranker unavailable: {e}") - -test_queries = ["database connection", "create table migration"] - -for query in test_queries: - print(f"\nQuery: '{query}'") - - # Strategy 1: Standard Hybrid (FTS exact+fuzzy RRF) - try: - engine = HybridSearchEngine(weights=DEFAULT_WEIGHTS) - engine._config = type("obj", (object,), { - "use_fts_fallback": True, - "embedding_use_gpu": True, - "symbol_boost_factor": 1.5, - "enable_reranking": False, - })() - - start = time.perf_counter() - standard_results = engine.search(index_path, query, limit=10, enable_fuzzy=True, enable_vector=False) - standard_latency = (time.perf_counter() - start) * 1000 - - print(f" Standard FTS RRF: {len(standard_results)} results, {standard_latency:.1f}ms") - for i, r in enumerate(standard_results[:3]): - print(f" {i+1}. {r.path.split(chr(92))[-1]} (score: {r.score:.4f})") - except Exception as e: - print(f" Standard FTS RRF: ERROR - {e}") - standard_results = [] - - # Strategy 2: FTS + CrossEncoder Rerank - if reranker and standard_results: - try: - start = time.perf_counter() - reranked_results = cross_encoder_rerank(query, standard_results, reranker, top_k=10) - rerank_latency = (time.perf_counter() - start) * 1000 - - print(f" FTS + Rerank: {len(reranked_results)} results, {rerank_latency:.1f}ms (rerank only)") - for i, r in enumerate(reranked_results[:3]): - ce_score = r.metadata.get("cross_encoder_prob", r.score) - print(f" {i+1}. {r.path.split(chr(92))[-1]} (CE prob: {ce_score:.4f})") - - # Compare rankings - standard_order = [r.path.split("\\")[-1] for r in standard_results[:5]] - reranked_order = [r.path.split("\\")[-1] for r in reranked_results[:5]] - - if standard_order != reranked_order: - print(f" Ranking changed!") - print(f" Before: {standard_order}") - print(f" After: {reranked_order}") - else: - print(f" Ranking unchanged") - - except Exception as e: - print(f" FTS + Rerank: ERROR - {e}") - -print("\n" + "=" * 60) -print("CONCLUSIONS") -print("=" * 60) -print(""" -1. Storage Architecture: - - semantic_chunks: Used by cascade-index (binary+dense vectors) - - chunks: Used by legacy SQLiteStore (currently empty in this index) - - files_fts_*: Used by FTS exact/fuzzy search - - CONFLICT: binary_cascade_search reads from semantic_chunks, - but standard FTS reads from files table. These are SEPARATE paths. - -2. Method Contributions: - - FTS: Fast but limited to keyword matching - - Vector: Semantic understanding but requires embeddings - -3. FTS + Rerank Fusion: - - CrossEncoder reranking can improve precision - - Adds ~100-200ms latency per query - - Most effective when initial FTS recall is good -""") diff --git a/codex-lens/benchmarks/binary_search_microbenchmark.py b/codex-lens/benchmarks/binary_search_microbenchmark.py deleted file mode 100644 index d4bb8397..00000000 --- a/codex-lens/benchmarks/binary_search_microbenchmark.py +++ /dev/null @@ -1,209 +0,0 @@ -#!/usr/bin/env python -"""Micro-benchmark for BinaryANNIndex search performance. - -Measures the actual speedup of vectorized Hamming distance computation. -""" - -from __future__ import annotations - -import gc -import statistics -import sys -import time -from pathlib import Path - -# Add src to path -sys.path.insert(0, str(Path(__file__).parent.parent / "src")) - -import numpy as np - - -def old_search_implementation(query_arr: np.ndarray, vectors: dict, id_list: list, top_k: int): - """Original O(N) loop-based implementation for comparison.""" - packed_dim = len(query_arr) - distances = [] - - for vec_id in id_list: - vec = vectors[vec_id] - vec_arr = np.frombuffer(vec, dtype=np.uint8) - xor = np.bitwise_xor(query_arr, vec_arr) - dist = int(np.unpackbits(xor).sum()) - distances.append((vec_id, dist)) - - distances.sort(key=lambda x: x[1]) - top_results = distances[:top_k] - ids = [r[0] for r in top_results] - dists = [r[1] for r in top_results] - - return ids, dists - - -def new_search_implementation(query_arr: np.ndarray, vectors_matrix: np.ndarray, ids_array: np.ndarray, top_k: int): - """Optimized vectorized implementation.""" - # Broadcast XOR - xor_result = np.bitwise_xor(query_arr, vectors_matrix) - - # Vectorized popcount using lookup table - popcount_lut = np.array([bin(i).count('1') for i in range(256)], dtype=np.uint8) - bit_counts = popcount_lut[xor_result] - - # Sum across packed bytes - distances = bit_counts.sum(axis=1) - - # Get top-k using argpartition - n_vectors = len(distances) - k = min(top_k, n_vectors) - - if k == n_vectors: - sorted_indices = np.argsort(distances) - else: - partition_indices = np.argpartition(distances, k)[:k] - top_k_distances = distances[partition_indices] - sorted_order = np.argsort(top_k_distances) - sorted_indices = partition_indices[sorted_order] - - result_ids = ids_array[sorted_indices].tolist() - result_dists = distances[sorted_indices].tolist() - - return result_ids, result_dists - - -def run_benchmark(n_vectors: int, dim: int = 256, top_k: int = 100, n_iterations: int = 50): - """Run benchmark comparing old and new implementations.""" - packed_dim = dim // 8 # 32 bytes for 256-bit - - print(f"\n{'='*60}") - print(f"Binary Search Micro-Benchmark") - print(f"{'='*60}") - print(f"Vectors: {n_vectors}") - print(f"Dimension: {dim} bits ({packed_dim} bytes packed)") - print(f"Top-K: {top_k}") - print(f"Iterations: {n_iterations}") - print(f"{'='*60}\n") - - # Generate random binary vectors - print("Generating test data...") - vectors_dict = {} - id_list = [] - - for i in range(n_vectors): - vec_bytes = np.random.randint(0, 256, size=packed_dim, dtype=np.uint8).tobytes() - vectors_dict[i] = vec_bytes - id_list.append(i) - - # Build matrix for vectorized search - vectors_matrix = np.empty((n_vectors, packed_dim), dtype=np.uint8) - ids_array = np.array(id_list, dtype=np.int64) - - for i, vec_id in enumerate(id_list): - vec_bytes = vectors_dict[vec_id] - vectors_matrix[i] = np.frombuffer(vec_bytes, dtype=np.uint8) - - # Generate random query - query_bytes = np.random.randint(0, 256, size=packed_dim, dtype=np.uint8).tobytes() - query_arr = np.frombuffer(query_bytes, dtype=np.uint8) - - # Warmup - print("Running warmup...") - for _ in range(3): - old_search_implementation(query_arr, vectors_dict, id_list, top_k) - new_search_implementation(query_arr, vectors_matrix, ids_array, top_k) - - # Benchmark old implementation - print("Benchmarking old implementation...") - old_times = [] - for _ in range(n_iterations): - gc.collect() - start = time.perf_counter() - old_ids, old_dists = old_search_implementation(query_arr, vectors_dict, id_list, top_k) - elapsed = (time.perf_counter() - start) * 1000 - old_times.append(elapsed) - - # Benchmark new implementation - print("Benchmarking new implementation...") - new_times = [] - for _ in range(n_iterations): - gc.collect() - start = time.perf_counter() - new_ids, new_dists = new_search_implementation(query_arr, vectors_matrix, ids_array, top_k) - elapsed = (time.perf_counter() - start) * 1000 - new_times.append(elapsed) - - # Verify correctness - print("\nVerifying correctness...") - # Check that distances are correct (IDs may differ for ties) - if old_dists == new_dists: - print("Distances match! (IDs may differ for ties)") - else: - # Check if difference is just in tie-breaking - old_dist_set = set(old_dists) - new_dist_set = set(new_dists) - if old_dist_set == new_dist_set: - print("Distances equivalent (tie-breaking differs, which is acceptable)") - else: - print("WARNING: Distance distributions differ!") - print(f" Old dists (first 5): {old_dists[:5]}") - print(f" New dists (first 5): {new_dists[:5]}") - - # Calculate statistics - old_avg = statistics.mean(old_times) - old_std = statistics.stdev(old_times) if len(old_times) > 1 else 0 - new_avg = statistics.mean(new_times) - new_std = statistics.stdev(new_times) if len(new_times) > 1 else 0 - - speedup = old_avg / new_avg if new_avg > 0 else 0 - - # Print results - print(f"\n{'='*60}") - print("RESULTS") - print(f"{'='*60}") - print(f"{'Metric':<25} {'Old (loop)':>15} {'New (vectorized)':>18}") - print(f"{'-'*25} {'-'*15} {'-'*18}") - print(f"{'Avg Latency (ms)':<25} {old_avg:>15.3f} {new_avg:>18.3f}") - print(f"{'Std Dev (ms)':<25} {old_std:>15.3f} {new_std:>18.3f}") - print(f"{'Min Latency (ms)':<25} {min(old_times):>15.3f} {min(new_times):>18.3f}") - print(f"{'Max Latency (ms)':<25} {max(old_times):>15.3f} {max(new_times):>18.3f}") - print(f"{'P50 (ms)':<25} {sorted(old_times)[len(old_times)//2]:>15.3f} {sorted(new_times)[len(new_times)//2]:>18.3f}") - print(f"\n{'Speedup:':<25} {speedup:>15.2f}x") - print(f"{'='*60}\n") - - return { - "n_vectors": n_vectors, - "dim": dim, - "top_k": top_k, - "old_avg_ms": old_avg, - "new_avg_ms": new_avg, - "speedup": speedup, - } - - -def main(): - print("\n" + "="*70) - print(" BINARY SEARCH OPTIMIZATION MICRO-BENCHMARK") - print("="*70) - - # Test different vector counts - results = [] - - for n_vectors in [1000, 5000, 10000, 50000]: - result = run_benchmark( - n_vectors=n_vectors, - dim=256, - top_k=100, - n_iterations=20, - ) - results.append(result) - - # Summary - print("\n" + "="*70) - print(" SUMMARY") - print("="*70) - print(f"{'N Vectors':<12} {'Old (ms)':<12} {'New (ms)':<12} {'Speedup':>10}") - print("-"*50) - for r in results: - print(f"{r['n_vectors']:<12} {r['old_avg_ms']:<12.3f} {r['new_avg_ms']:<12.3f} {r['speedup']:>10.2f}x") - print("="*70) - - -if __name__ == "__main__": - main() diff --git a/codex-lens/benchmarks/cascade_benchmark.py b/codex-lens/benchmarks/cascade_benchmark.py deleted file mode 100644 index 90abfda1..00000000 --- a/codex-lens/benchmarks/cascade_benchmark.py +++ /dev/null @@ -1,402 +0,0 @@ -#!/usr/bin/env python -"""Benchmark script for comparing cascade search strategies. - -Compares: -- binary: 256-dim binary coarse ranking + 2048-dim dense fine ranking -- hybrid: FTS+Vector coarse ranking + CrossEncoder fine ranking - -Usage: - python benchmarks/cascade_benchmark.py [--source PATH] [--queries N] [--warmup N] -""" - -from __future__ import annotations - -import argparse -import gc -import json -import os -import statistics -import sys -import time -import traceback -from dataclasses import dataclass, asdict -from pathlib import Path -from typing import List, Optional, Dict, Any - -# Add src to path -sys.path.insert(0, str(Path(__file__).parent.parent / "src")) - -from codexlens.search.chain_search import ChainSearchEngine, SearchOptions -from codexlens.config import Config -from codexlens.storage.registry import RegistryStore -from codexlens.storage.path_mapper import PathMapper - - -@dataclass -class BenchmarkResult: - """Result from a single benchmark run.""" - strategy: str - query: str - latency_ms: float - num_results: int - top_result: Optional[str] - error: Optional[str] = None - - -@dataclass -class BenchmarkSummary: - """Aggregated benchmark statistics.""" - strategy: str - total_queries: int - successful_queries: int - avg_latency_ms: float - min_latency_ms: float - max_latency_ms: float - p50_latency_ms: float - p95_latency_ms: float - p99_latency_ms: float - avg_results: float - errors: List[str] - - -# Default test queries covering different scenarios -DEFAULT_QUERIES = [ - # Code patterns - "def search", - "class Engine", - "import numpy", - "async def", - "raise ValueError", - # Semantic queries - "how to parse json", - "database connection", - "error handling", - "authentication logic", - "file read write", - # Technical terms - "embedding vector", - "cosine similarity", - "binary quantization", - "hamming distance", - "reranking", -] - - -def percentile(data: List[float], p: float) -> float: - """Calculate percentile of sorted data.""" - if not data: - return 0.0 - sorted_data = sorted(data) - k = (len(sorted_data) - 1) * (p / 100) - f = int(k) - c = f + 1 if f + 1 < len(sorted_data) else f - return sorted_data[f] + (k - f) * (sorted_data[c] - sorted_data[f]) - - -def run_single_benchmark( - engine: ChainSearchEngine, - query: str, - source_path: Path, - strategy: str, - options: Optional[SearchOptions] = None, -) -> BenchmarkResult: - """Run a single benchmark query.""" - gc.collect() - - start_time = time.perf_counter() - try: - result = engine.cascade_search( - query=query, - source_path=source_path, - k=10, - coarse_k=100, - options=options, - strategy=strategy, - ) - elapsed_ms = (time.perf_counter() - start_time) * 1000 - - top_result = None - if result.results: - r = result.results[0] - line = r.start_line or 0 - top_result = f"{r.path}:{line}" - - return BenchmarkResult( - strategy=strategy, - query=query, - latency_ms=elapsed_ms, - num_results=len(result.results), - top_result=top_result, - ) - except Exception as e: - elapsed_ms = (time.perf_counter() - start_time) * 1000 - return BenchmarkResult( - strategy=strategy, - query=query, - latency_ms=elapsed_ms, - num_results=0, - top_result=None, - error=str(e), - ) - - -def run_benchmarks( - source_path: Path, - queries: List[str], - strategies: List[str], - warmup_runs: int = 2, - options: Optional[SearchOptions] = None, -) -> Dict[str, List[BenchmarkResult]]: - """Run benchmarks for all queries and strategies.""" - - print(f"\n{'='*60}") - print(f"Cascade Search Benchmark") - print(f"{'='*60}") - print(f"Source: {source_path}") - print(f"Queries: {len(queries)}") - print(f"Strategies: {strategies}") - print(f"Warmup runs: {warmup_runs}") - print(f"{'='*60}\n") - - # Initialize engine - config = Config() - registry = RegistryStore() # Uses default path - registry.initialize() - mapper = PathMapper() # Uses default path - engine = ChainSearchEngine(registry=registry, mapper=mapper, config=config) - - results: Dict[str, List[BenchmarkResult]] = {s: [] for s in strategies} - - # Warmup phase - if warmup_runs > 0: - print(f"Running {warmup_runs} warmup queries...") - warmup_query = queries[0] if queries else "test" - for strategy in strategies: - for _ in range(warmup_runs): - try: - run_single_benchmark(engine, warmup_query, source_path, strategy, options) - except Exception: - pass - print("Warmup complete.\n") - - # Benchmark phase - total_runs = len(queries) * len(strategies) - current_run = 0 - - for query in queries: - for strategy in strategies: - current_run += 1 - print(f"[{current_run}/{total_runs}] {strategy}: '{query[:40]}...' ", end="", flush=True) - - result = run_single_benchmark(engine, query, source_path, strategy, options) - results[strategy].append(result) - - if result.error: - print(f"ERROR: {result.error[:50]}") - else: - print(f"{result.latency_ms:.1f}ms, {result.num_results} results") - - return results - - -def summarize_results(results: Dict[str, List[BenchmarkResult]]) -> Dict[str, BenchmarkSummary]: - """Generate summary statistics for each strategy.""" - summaries = {} - - for strategy, benchmark_results in results.items(): - latencies = [r.latency_ms for r in benchmark_results if r.error is None] - result_counts = [r.num_results for r in benchmark_results if r.error is None] - errors = [r.error for r in benchmark_results if r.error is not None] - - if latencies: - summary = BenchmarkSummary( - strategy=strategy, - total_queries=len(benchmark_results), - successful_queries=len(latencies), - avg_latency_ms=statistics.mean(latencies), - min_latency_ms=min(latencies), - max_latency_ms=max(latencies), - p50_latency_ms=percentile(latencies, 50), - p95_latency_ms=percentile(latencies, 95), - p99_latency_ms=percentile(latencies, 99), - avg_results=statistics.mean(result_counts) if result_counts else 0, - errors=errors, - ) - else: - summary = BenchmarkSummary( - strategy=strategy, - total_queries=len(benchmark_results), - successful_queries=0, - avg_latency_ms=0, - min_latency_ms=0, - max_latency_ms=0, - p50_latency_ms=0, - p95_latency_ms=0, - p99_latency_ms=0, - avg_results=0, - errors=errors, - ) - - summaries[strategy] = summary - - return summaries - - -def print_comparison_table(summaries: Dict[str, BenchmarkSummary]) -> None: - """Print formatted comparison table.""" - print(f"\n{'='*80}") - print("BENCHMARK RESULTS COMPARISON") - print(f"{'='*80}\n") - - # Header - print(f"{'Metric':<25} {'Binary':>15} {'Hybrid':>15} {'Diff':>15}") - print(f"{'-'*25} {'-'*15} {'-'*15} {'-'*15}") - - binary = summaries.get("binary") - hybrid = summaries.get("hybrid") - - if not binary or not hybrid: - print("Missing results for comparison") - return - - metrics = [ - ("Total Queries", binary.total_queries, hybrid.total_queries), - ("Successful", binary.successful_queries, hybrid.successful_queries), - ("Avg Latency (ms)", binary.avg_latency_ms, hybrid.avg_latency_ms), - ("Min Latency (ms)", binary.min_latency_ms, hybrid.min_latency_ms), - ("Max Latency (ms)", binary.max_latency_ms, hybrid.max_latency_ms), - ("P50 Latency (ms)", binary.p50_latency_ms, hybrid.p50_latency_ms), - ("P95 Latency (ms)", binary.p95_latency_ms, hybrid.p95_latency_ms), - ("P99 Latency (ms)", binary.p99_latency_ms, hybrid.p99_latency_ms), - ("Avg Results", binary.avg_results, hybrid.avg_results), - ] - - for name, b_val, h_val in metrics: - if isinstance(b_val, float): - diff = b_val - h_val - diff_str = f"{diff:+.2f}" if diff != 0 else "0.00" - speedup = h_val / b_val if b_val > 0 else 0 - if "Latency" in name and speedup > 1: - diff_str += f" ({speedup:.1f}x faster)" - print(f"{name:<25} {b_val:>15.2f} {h_val:>15.2f} {diff_str:>15}") - else: - diff = b_val - h_val - print(f"{name:<25} {b_val:>15} {h_val:>15} {diff:>+15}") - - # Errors - print(f"\n{'Errors:':<25}") - print(f" Binary: {len(binary.errors)}") - for err in binary.errors[:3]: - print(f" - {err[:60]}...") - print(f" Hybrid: {len(hybrid.errors)}") - for err in hybrid.errors[:3]: - print(f" - {err[:60]}...") - - # Winner - print(f"\n{'='*80}") - if binary.avg_latency_ms < hybrid.avg_latency_ms and binary.successful_queries > 0: - speedup = hybrid.avg_latency_ms / binary.avg_latency_ms - print(f"[WINNER] Binary ({speedup:.2f}x faster average latency)") - elif hybrid.avg_latency_ms < binary.avg_latency_ms and hybrid.successful_queries > 0: - speedup = binary.avg_latency_ms / hybrid.avg_latency_ms - print(f"[WINNER] Hybrid ({speedup:.2f}x faster average latency)") - else: - print("No clear winner (check errors)") - print(f"{'='*80}\n") - - -def save_results( - results: Dict[str, List[BenchmarkResult]], - summaries: Dict[str, BenchmarkSummary], - output_path: Path, -) -> None: - """Save benchmark results to JSON file.""" - data = { - "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), - "summaries": {k: asdict(v) for k, v in summaries.items()}, - "details": { - k: [asdict(r) for r in v] - for k, v in results.items() - }, - } - - output_path.parent.mkdir(parents=True, exist_ok=True) - with open(output_path, "w", encoding="utf-8") as f: - json.dump(data, f, indent=2) - - print(f"Results saved to: {output_path}") - - -def main(): - parser = argparse.ArgumentParser(description="Benchmark cascade search strategies") - parser.add_argument( - "--source", "-s", - type=Path, - default=Path(__file__).parent.parent / "src", - help="Source directory to search (default: ./src)", - ) - parser.add_argument( - "--queries", "-q", - type=int, - default=len(DEFAULT_QUERIES), - help=f"Number of queries to run (default: {len(DEFAULT_QUERIES)})", - ) - parser.add_argument( - "--warmup", "-w", - type=int, - default=2, - help="Number of warmup runs (default: 2)", - ) - parser.add_argument( - "--output", "-o", - type=Path, - default=Path(__file__).parent / "results" / "cascade_benchmark.json", - help="Output file for results (default: benchmarks/results/cascade_benchmark.json)", - ) - parser.add_argument( - "--strategies", - nargs="+", - default=["binary", "hybrid"], - choices=["binary", "hybrid"], - help="Strategies to benchmark (default: both)", - ) - - args = parser.parse_args() - - # Validate source path - if not args.source.exists(): - print(f"Error: Source path does not exist: {args.source}") - sys.exit(1) - - # Select queries - queries = DEFAULT_QUERIES[:args.queries] - - # Run benchmarks - try: - results = run_benchmarks( - source_path=args.source, - queries=queries, - strategies=args.strategies, - warmup_runs=args.warmup, - ) - - # Generate summaries - summaries = summarize_results(results) - - # Print comparison - print_comparison_table(summaries) - - # Save results - save_results(results, summaries, args.output) - - except KeyboardInterrupt: - print("\nBenchmark interrupted.") - sys.exit(1) - except Exception as e: - print(f"\nBenchmark failed: {e}") - traceback.print_exc() - sys.exit(1) - - -if __name__ == "__main__": - main() diff --git a/codex-lens/benchmarks/compare_accuracy_labeled.py b/codex-lens/benchmarks/compare_accuracy_labeled.py deleted file mode 100644 index 7000a181..00000000 --- a/codex-lens/benchmarks/compare_accuracy_labeled.py +++ /dev/null @@ -1,365 +0,0 @@ -#!/usr/bin/env python -"""Compare labeled accuracy: staged(realtime LSP graph) vs dense_rerank. - -This script measures retrieval "accuracy" against a labeled query set. -Each query must provide a list of relevant file paths (relative to --source -or absolute). We report: - - Hit@K (any relevant file appears in top-K) - - MRR@K (reciprocal rank of first relevant file within top-K) - - Recall@K (fraction of relevant files present in top-K) - -Example: - python benchmarks/compare_accuracy_labeled.py --source ./src - python benchmarks/compare_accuracy_labeled.py --queries-file benchmarks/accuracy_queries_codexlens.jsonl -""" - -from __future__ import annotations - -import argparse -import gc -import json -import os -import re -import statistics -import sys -import time -from dataclasses import asdict, dataclass -from pathlib import Path -from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple - -# Add src to path (match other benchmark scripts) -sys.path.insert(0, str(Path(__file__).parent.parent / "src")) - -from codexlens.config import Config -from codexlens.search.chain_search import ChainSearchEngine, SearchOptions -from codexlens.storage.path_mapper import PathMapper -from codexlens.storage.registry import RegistryStore - - -DEFAULT_QUERIES_FILE = Path(__file__).parent / "accuracy_queries_codexlens.jsonl" - - -def _now_ms() -> float: - return time.perf_counter() * 1000.0 - - -def _normalize_path_key(path: str) -> str: - """Normalize file paths for overlap/dedup metrics (Windows-safe).""" - try: - p = Path(path) - # Don't explode on non-files like "". - if str(p) and (p.is_absolute() or re.match(r"^[A-Za-z]:", str(p))): - norm = str(p.resolve()) - else: - norm = str(p) - except Exception: - norm = path - norm = norm.replace("/", "\\") - if os.name == "nt": - norm = norm.lower() - return norm - - -def _load_labeled_queries(path: Path, limit: Optional[int]) -> List[Dict[str, Any]]: - if not path.is_file(): - raise SystemExit(f"Queries file does not exist: {path}") - - out: List[Dict[str, Any]] = [] - for raw_line in path.read_text(encoding="utf-8", errors="ignore").splitlines(): - line = raw_line.strip() - if not line or line.startswith("#"): - continue - try: - item = json.loads(line) - except Exception as exc: - raise SystemExit(f"Invalid JSONL line in {path}: {raw_line!r} ({exc})") from exc - if not isinstance(item, dict) or "query" not in item: - raise SystemExit(f"Invalid query item (expected object with 'query'): {item!r}") - out.append(item) - if limit is not None and len(out) >= limit: - break - return out - - -def _dedup_topk(paths: Iterable[str], k: int) -> List[str]: - out: List[str] = [] - seen: set[str] = set() - for p in paths: - if p in seen: - continue - seen.add(p) - out.append(p) - if len(out) >= k: - break - return out - - -def _first_hit_rank(topk_paths: Sequence[str], relevant: set[str]) -> Optional[int]: - for i, p in enumerate(topk_paths, start=1): - if p in relevant: - return i - return None - - -@dataclass -class StrategyRun: - strategy: str - latency_ms: float - topk_paths: List[str] - first_hit_rank: Optional[int] - hit_at_k: bool - recall_at_k: float - error: Optional[str] = None - - -@dataclass -class QueryEval: - query: str - relevant_paths: List[str] - staged: StrategyRun - dense_rerank: StrategyRun - - -def _run_strategy( - engine: ChainSearchEngine, - *, - strategy: str, - query: str, - source_path: Path, - k: int, - coarse_k: int, - relevant: set[str], - options: Optional[SearchOptions] = None, -) -> StrategyRun: - gc.collect() - start_ms = _now_ms() - try: - result = engine.cascade_search( - query=query, - source_path=source_path, - k=k, - coarse_k=coarse_k, - options=options, - strategy=strategy, - ) - latency_ms = _now_ms() - start_ms - paths_raw = [r.path for r in (result.results or []) if getattr(r, "path", None)] - paths_norm = [_normalize_path_key(p) for p in paths_raw] - topk = _dedup_topk(paths_norm, k=k) - rank = _first_hit_rank(topk, relevant) - hit = rank is not None - recall = 0.0 - if relevant: - recall = len(set(topk) & relevant) / float(len(relevant)) - return StrategyRun( - strategy=strategy, - latency_ms=latency_ms, - topk_paths=topk, - first_hit_rank=rank, - hit_at_k=hit, - recall_at_k=recall, - error=None, - ) - except Exception as exc: - latency_ms = _now_ms() - start_ms - return StrategyRun( - strategy=strategy, - latency_ms=latency_ms, - topk_paths=[], - first_hit_rank=None, - hit_at_k=False, - recall_at_k=0.0, - error=repr(exc), - ) - - -def _mrr(ranks: Sequence[Optional[int]]) -> float: - vals = [] - for r in ranks: - if r is None or r <= 0: - vals.append(0.0) - else: - vals.append(1.0 / float(r)) - return statistics.mean(vals) if vals else 0.0 - - -def main() -> None: - parser = argparse.ArgumentParser( - description="Compare labeled retrieval accuracy: staged(realtime) vs dense_rerank" - ) - parser.add_argument( - "--source", - type=Path, - default=Path(__file__).parent.parent / "src", - help="Source directory to search (default: ./src)", - ) - parser.add_argument( - "--queries-file", - type=Path, - default=DEFAULT_QUERIES_FILE, - help="JSONL file with {query, relevant_paths[]} per line", - ) - parser.add_argument("--queries", type=int, default=None, help="Limit number of queries") - parser.add_argument("--k", type=int, default=10, help="Top-K for evaluation (default 10)") - parser.add_argument("--coarse-k", type=int, default=100, help="Coarse candidates (default 100)") - parser.add_argument( - "--staged-cluster-strategy", - type=str, - default="path", - help="Config.staged_clustering_strategy override for staged (default: path)", - ) - parser.add_argument( - "--stage2-mode", - type=str, - default="realtime", - help="Config.staged_stage2_mode override for staged (default: realtime)", - ) - parser.add_argument( - "--output", - type=Path, - default=Path(__file__).parent / "results" / "accuracy_labeled.json", - help="Output JSON path", - ) - args = parser.parse_args() - - if not args.source.exists(): - raise SystemExit(f"Source path does not exist: {args.source}") - - labeled = _load_labeled_queries(args.queries_file, args.queries) - if not labeled: - raise SystemExit("No queries to run") - - source_root = args.source.expanduser().resolve() - - # Match CLI behavior: load settings + apply global/workspace .env overrides. - config = Config.load() - config.cascade_strategy = "staged" - config.staged_stage2_mode = str(args.stage2_mode or "realtime").strip().lower() - config.enable_staged_rerank = True - config.staged_clustering_strategy = str(args.staged_cluster_strategy or "path").strip().lower() - # Stability: on some Windows setups, DirectML/ONNX can crash under load. - config.embedding_use_gpu = False - config.reranker_use_gpu = False - - registry = RegistryStore() - registry.initialize() - mapper = PathMapper() - engine = ChainSearchEngine(registry=registry, mapper=mapper, config=config) - - def resolve_expected(paths: Sequence[str]) -> set[str]: - out: set[str] = set() - for p in paths: - try: - cand = Path(p) - if not cand.is_absolute(): - cand = (source_root / cand).resolve() - out.add(_normalize_path_key(str(cand))) - except Exception: - out.add(_normalize_path_key(p)) - return out - - evaluations: List[QueryEval] = [] - - try: - for i, item in enumerate(labeled, start=1): - query = str(item.get("query", "")).strip() - relevant_raw = item.get("relevant_paths") or [] - if not query: - continue - if not isinstance(relevant_raw, list) or not relevant_raw: - raise SystemExit(f"Query item missing relevant_paths[]: {item!r}") - relevant = resolve_expected([str(p) for p in relevant_raw]) - - print(f"[{i}/{len(labeled)}] {query}") - - staged = _run_strategy( - engine, - strategy="staged", - query=query, - source_path=source_root, - k=int(args.k), - coarse_k=int(args.coarse_k), - relevant=relevant, - options=None, - ) - dense = _run_strategy( - engine, - strategy="dense_rerank", - query=query, - source_path=source_root, - k=int(args.k), - coarse_k=int(args.coarse_k), - relevant=relevant, - options=None, - ) - - evaluations.append( - QueryEval( - query=query, - relevant_paths=[_normalize_path_key(str((source_root / p).resolve())) if not Path(p).is_absolute() else _normalize_path_key(p) for p in relevant_raw], - staged=staged, - dense_rerank=dense, - ) - ) - finally: - try: - engine.close() - except Exception: - pass - try: - registry.close() - except Exception: - pass - - staged_runs = [e.staged for e in evaluations] - dense_runs = [e.dense_rerank for e in evaluations] - - def mean(xs: Sequence[float]) -> float: - return statistics.mean(xs) if xs else 0.0 - - staged_ranks = [r.first_hit_rank for r in staged_runs] - dense_ranks = [r.first_hit_rank for r in dense_runs] - - summary = { - "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), - "source": str(source_root), - "queries_file": str(args.queries_file), - "query_count": len(evaluations), - "k": int(args.k), - "coarse_k": int(args.coarse_k), - "staged": { - "hit_at_k": mean([1.0 if r.hit_at_k else 0.0 for r in staged_runs]), - "mrr_at_k": _mrr(staged_ranks), - "avg_recall_at_k": mean([r.recall_at_k for r in staged_runs]), - "avg_latency_ms": mean([r.latency_ms for r in staged_runs if not r.error]), - "errors": sum(1 for r in staged_runs if r.error), - }, - "dense_rerank": { - "hit_at_k": mean([1.0 if r.hit_at_k else 0.0 for r in dense_runs]), - "mrr_at_k": _mrr(dense_ranks), - "avg_recall_at_k": mean([r.recall_at_k for r in dense_runs]), - "avg_latency_ms": mean([r.latency_ms for r in dense_runs if not r.error]), - "errors": sum(1 for r in dense_runs if r.error), - }, - "config": { - "staged_stage2_mode": config.staged_stage2_mode, - "staged_clustering_strategy": config.staged_clustering_strategy, - "enable_staged_rerank": bool(config.enable_staged_rerank), - "reranker_backend": config.reranker_backend, - "reranker_model": config.reranker_model, - "embedding_backend": config.embedding_backend, - "embedding_model": config.embedding_model, - }, - } - - payload = {"summary": summary, "evaluations": [asdict(e) for e in evaluations]} - args.output.parent.mkdir(parents=True, exist_ok=True) - args.output.write_text(json.dumps(payload, indent=2), encoding="utf-8") - - print("\n=== SUMMARY ===") - print(json.dumps(summary, indent=2)) - print(f"\nSaved: {args.output}") - - -if __name__ == "__main__": - main() diff --git a/codex-lens/benchmarks/compare_ccw_smart_search_stage2.py b/codex-lens/benchmarks/compare_ccw_smart_search_stage2.py deleted file mode 100644 index b6776bfd..00000000 --- a/codex-lens/benchmarks/compare_ccw_smart_search_stage2.py +++ /dev/null @@ -1,980 +0,0 @@ -#!/usr/bin/env python -"""Benchmark local-only staged stage2 modes for CCW smart_search queries. - -This benchmark reuses the existing CodexLens benchmark style, but focuses on -the real search intents that drive CCW `smart_search`. It evaluates: - -1. `dense_rerank` baseline -2. `staged` + `precomputed` -3. `staged` + `realtime` -4. `staged` + `static_global_graph` - -Metrics: - - Hit@K - - MRR@K - - Recall@K - - latency (avg/p50/p95) - -The runner is intentionally local-only. By default it uses: - - embedding backend: `fastembed` - - reranker backend: `onnx` - -Examples: - python benchmarks/compare_ccw_smart_search_stage2.py --dry-run - python benchmarks/compare_ccw_smart_search_stage2.py --self-check - python benchmarks/compare_ccw_smart_search_stage2.py --source .. --k 10 - python benchmarks/compare_ccw_smart_search_stage2.py --embedding-model code --reranker-model cross-encoder/ms-marco-MiniLM-L-6-v2 -""" - -from __future__ import annotations - -import argparse -from copy import deepcopy -import gc -import json -import os -import re -import statistics -import sys -import time -from dataclasses import asdict, dataclass -from pathlib import Path -from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple - -sys.path.insert(0, str(Path(__file__).parent.parent / "src")) - -from codexlens.config import Config -from codexlens.search.chain_search import ChainSearchEngine, SearchOptions -from codexlens.search.ranking import ( - QueryIntent, - detect_query_intent, - is_generated_artifact_path, - is_test_file, - query_prefers_lexical_search, - query_targets_generated_files, -) -from codexlens.storage.path_mapper import PathMapper -from codexlens.storage.registry import RegistryStore - - -DEFAULT_SOURCE = Path(__file__).resolve().parents[2] -DEFAULT_QUERIES_FILE = Path(__file__).parent / "accuracy_queries_ccw_smart_search.jsonl" -DEFAULT_OUTPUT = Path(__file__).parent / "results" / "ccw_smart_search_stage2.json" - -VALID_STAGE2_MODES = ("precomputed", "realtime", "static_global_graph") -VALID_LOCAL_EMBEDDING_BACKENDS = ("fastembed",) -VALID_LOCAL_RERANKER_BACKENDS = ("onnx", "fastembed", "legacy") -VALID_BASELINE_METHODS = ("auto", "fts", "hybrid") -DEFAULT_LOCAL_ONNX_RERANKER_MODEL = "Xenova/ms-marco-MiniLM-L-6-v2" - - -def _now_ms() -> float: - return time.perf_counter() * 1000.0 - - -def _normalize_path_key(path: str) -> str: - try: - candidate = Path(path) - if str(candidate) and (candidate.is_absolute() or re.match(r"^[A-Za-z]:", str(candidate))): - normalized = str(candidate.resolve()) - else: - normalized = str(candidate) - except Exception: - normalized = path - normalized = normalized.replace("/", "\\") - if os.name == "nt": - normalized = normalized.lower() - return normalized - - -def _dedup_topk(paths: Iterable[str], k: int) -> List[str]: - output: List[str] = [] - seen: set[str] = set() - for path in paths: - if path in seen: - continue - seen.add(path) - output.append(path) - if len(output) >= k: - break - return output - - -def _first_hit_rank(topk_paths: Sequence[str], relevant: set[str]) -> Optional[int]: - for index, path in enumerate(topk_paths, start=1): - if path in relevant: - return index - return None - - -def _mrr(ranks: Sequence[Optional[int]]) -> float: - values = [1.0 / rank for rank in ranks if rank and rank > 0] - return statistics.mean(values) if values else 0.0 - - -def _mean(values: Sequence[float]) -> float: - return statistics.mean(values) if values else 0.0 - - -def _percentile(values: Sequence[float], percentile: float) -> float: - if not values: - return 0.0 - ordered = sorted(values) - if len(ordered) == 1: - return ordered[0] - index = (len(ordered) - 1) * percentile - lower = int(index) - upper = min(lower + 1, len(ordered) - 1) - if lower == upper: - return ordered[lower] - fraction = index - lower - return ordered[lower] + (ordered[upper] - ordered[lower]) * fraction - - -def _load_labeled_queries(path: Path, limit: Optional[int]) -> List[Dict[str, Any]]: - if not path.is_file(): - raise SystemExit(f"Queries file does not exist: {path}") - - output: List[Dict[str, Any]] = [] - for raw_line in path.read_text(encoding="utf-8", errors="ignore").splitlines(): - line = raw_line.strip() - if not line or line.startswith("#"): - continue - try: - item = json.loads(line) - except Exception as exc: - raise SystemExit(f"Invalid JSONL line in {path}: {raw_line!r} ({exc})") from exc - if not isinstance(item, dict) or "query" not in item or "relevant_paths" not in item: - raise SystemExit(f"Invalid query item (expected object with query/relevant_paths): {item!r}") - relevant_paths = item.get("relevant_paths") - if not isinstance(relevant_paths, list) or not relevant_paths: - raise SystemExit(f"Query item must include non-empty relevant_paths[]: {item!r}") - output.append(item) - if limit is not None and len(output) >= limit: - break - return output - - -def _resolve_expected_paths(source_root: Path, paths: Sequence[str]) -> Tuple[List[str], set[str], List[str]]: - resolved_display: List[str] = [] - resolved_keys: set[str] = set() - missing: List[str] = [] - - for raw_path in paths: - candidate = Path(raw_path) - if not candidate.is_absolute(): - candidate = (source_root / candidate).resolve() - if not candidate.exists(): - missing.append(str(candidate)) - resolved_display.append(str(candidate)) - resolved_keys.add(_normalize_path_key(str(candidate))) - return resolved_display, resolved_keys, missing - - -def _validate_local_only_backends(embedding_backend: str, reranker_backend: str) -> None: - if embedding_backend not in VALID_LOCAL_EMBEDDING_BACKENDS: - raise SystemExit( - "This runner is local-only. " - f"--embedding-backend must be one of {', '.join(VALID_LOCAL_EMBEDDING_BACKENDS)}; got {embedding_backend!r}" - ) - if reranker_backend not in VALID_LOCAL_RERANKER_BACKENDS: - raise SystemExit( - "This runner is local-only. " - f"--reranker-backend must be one of {', '.join(VALID_LOCAL_RERANKER_BACKENDS)}; got {reranker_backend!r}" - ) - - -def _validate_stage2_modes(stage2_modes: Sequence[str]) -> List[str]: - normalized = [str(mode).strip().lower() for mode in stage2_modes if str(mode).strip()] - if not normalized: - raise SystemExit("At least one --stage2-modes entry is required") - invalid = [mode for mode in normalized if mode not in VALID_STAGE2_MODES] - if invalid: - raise SystemExit( - f"Invalid --stage2-modes entry: {invalid[0]} " - f"(valid: {', '.join(VALID_STAGE2_MODES)})" - ) - deduped: List[str] = [] - seen: set[str] = set() - for mode in normalized: - if mode in seen: - continue - seen.add(mode) - deduped.append(mode) - return deduped - - -def _validate_baseline_methods(methods: Sequence[str]) -> List[str]: - normalized = [str(method).strip().lower() for method in methods if str(method).strip()] - invalid = [method for method in normalized if method not in VALID_BASELINE_METHODS] - if invalid: - raise SystemExit( - f"Invalid --baseline-methods entry: {invalid[0]} " - f"(valid: {', '.join(VALID_BASELINE_METHODS)})" - ) - deduped: List[str] = [] - seen: set[str] = set() - for method in normalized: - if method in seen: - continue - seen.add(method) - deduped.append(method) - return deduped - - -@dataclass -class StrategyRun: - strategy_key: str - strategy: str - stage2_mode: Optional[str] - effective_method: str - execution_method: str - latency_ms: float - topk_paths: List[str] - first_hit_rank: Optional[int] - hit_at_k: bool - recall_at_k: float - generated_artifact_count: int - test_file_count: int - error: Optional[str] = None - - -@dataclass -class QueryEvaluation: - query: str - intent: Optional[str] - notes: Optional[str] - relevant_paths: List[str] - runs: Dict[str, StrategyRun] - - -@dataclass -class PairwiseDelta: - mode_a: str - mode_b: str - hit_at_k_delta: float - mrr_at_k_delta: float - avg_recall_at_k_delta: float - avg_latency_ms_delta: float - - -@dataclass -class StrategySpec: - strategy_key: str - strategy: str - stage2_mode: Optional[str] - - -@dataclass -class StrategyRuntime: - strategy_spec: StrategySpec - config: Config - registry: RegistryStore - engine: ChainSearchEngine - - -def _strategy_specs( - stage2_modes: Sequence[str], - include_dense_baseline: bool, - *, - baseline_methods: Sequence[str], -) -> List[StrategySpec]: - specs: List[StrategySpec] = [] - for method in baseline_methods: - specs.append(StrategySpec(strategy_key=method, strategy=method, stage2_mode=None)) - if include_dense_baseline: - specs.append(StrategySpec(strategy_key="dense_rerank", strategy="dense_rerank", stage2_mode=None)) - for stage2_mode in stage2_modes: - specs.append( - StrategySpec( - strategy_key=f"staged:{stage2_mode}", - strategy="staged", - stage2_mode=stage2_mode, - ) - ) - return specs - - -def _build_strategy_runtime(base_config: Config, strategy_spec: StrategySpec) -> StrategyRuntime: - runtime_config = deepcopy(base_config) - registry = RegistryStore() - registry.initialize() - mapper = PathMapper() - engine = ChainSearchEngine(registry=registry, mapper=mapper, config=runtime_config) - return StrategyRuntime( - strategy_spec=strategy_spec, - config=runtime_config, - registry=registry, - engine=engine, - ) - - -def _select_effective_method(query: str, requested_method: str) -> str: - requested = str(requested_method).strip().lower() - if requested != "auto": - return requested - if query_targets_generated_files(query) or query_prefers_lexical_search(query): - return "fts" - intent = detect_query_intent(query) - if intent == QueryIntent.KEYWORD: - return "fts" - if intent == QueryIntent.SEMANTIC: - return "dense_rerank" - return "hybrid" - - -def _filter_dataset_by_query_match( - dataset: Sequence[Dict[str, Any]], - query_match: Optional[str], -) -> List[Dict[str, Any]]: - """Filter labeled queries by case-insensitive substring match.""" - needle = str(query_match or "").strip().casefold() - if not needle: - return list(dataset) - return [ - dict(item) - for item in dataset - if needle in str(item.get("query", "")).casefold() - ] - - -def _apply_query_limit( - dataset: Sequence[Dict[str, Any]], - query_limit: Optional[int], -) -> List[Dict[str, Any]]: - """Apply the optional query limit after any dataset-level filtering.""" - if query_limit is None: - return list(dataset) - return [dict(item) for item in list(dataset)[: max(0, int(query_limit))]] - - -def _write_json_payload(path: Path, payload: Dict[str, Any]) -> None: - """Persist a benchmark payload as UTF-8 JSON.""" - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8") - - -def _write_final_outputs( - *, - output_path: Path, - progress_output: Optional[Path], - payload: Dict[str, Any], -) -> None: - """Persist the final completed payload to both result and progress outputs.""" - _write_json_payload(output_path, payload) - if progress_output is not None: - _write_json_payload(progress_output, payload) - - -def _make_progress_payload( - *, - args: argparse.Namespace, - source_root: Path, - strategy_specs: Sequence[StrategySpec], - evaluations: Sequence[QueryEvaluation], - query_index: int, - total_queries: int, - run_index: int, - total_runs: int, - current_query: str, - current_strategy_key: str, -) -> Dict[str, Any]: - """Create a partial progress snapshot for long benchmark runs.""" - return { - "status": "running", - "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), - "source": str(source_root), - "queries_file": str(args.queries_file), - "query_count": len(evaluations), - "planned_query_count": total_queries, - "k": int(args.k), - "coarse_k": int(args.coarse_k), - "strategy_keys": [spec.strategy_key for spec in strategy_specs], - "progress": { - "completed_queries": query_index, - "total_queries": total_queries, - "completed_runs": run_index, - "total_runs": total_runs, - "current_query": current_query, - "current_strategy_key": current_strategy_key, - }, - "evaluations": [ - { - "query": evaluation.query, - "intent": evaluation.intent, - "notes": evaluation.notes, - "relevant_paths": evaluation.relevant_paths, - "runs": {key: asdict(run) for key, run in evaluation.runs.items()}, - } - for evaluation in evaluations - ], - } - - -def _make_search_options(method: str, *, k: int) -> SearchOptions: - normalized = str(method).strip().lower() - if normalized == "fts": - return SearchOptions( - total_limit=k, - hybrid_mode=False, - enable_fuzzy=False, - enable_vector=False, - pure_vector=False, - enable_cascade=False, - ) - if normalized == "hybrid": - return SearchOptions( - total_limit=k, - hybrid_mode=True, - enable_fuzzy=False, - enable_vector=True, - pure_vector=False, - enable_cascade=False, - ) - if normalized in {"dense_rerank", "staged"}: - return SearchOptions( - total_limit=k, - hybrid_mode=True, - enable_fuzzy=False, - enable_vector=True, - pure_vector=False, - enable_cascade=True, - ) - raise ValueError(f"Unsupported benchmark method: {method}") - - -def _run_strategy( - engine: ChainSearchEngine, - config: Config, - *, - strategy_spec: StrategySpec, - query: str, - source_path: Path, - k: int, - coarse_k: int, - relevant: set[str], -) -> StrategyRun: - gc.collect() - effective_method = _select_effective_method(query, strategy_spec.strategy) - execution_method = "cascade" if effective_method in {"dense_rerank", "staged"} else effective_method - previous_cascade_strategy = getattr(config, "cascade_strategy", None) - previous_stage2_mode = getattr(config, "staged_stage2_mode", None) - - start_ms = _now_ms() - try: - options = _make_search_options( - "staged" if strategy_spec.strategy == "staged" else effective_method, - k=k, - ) - if strategy_spec.strategy == "staged": - config.cascade_strategy = "staged" - if strategy_spec.stage2_mode: - config.staged_stage2_mode = strategy_spec.stage2_mode - result = engine.cascade_search( - query=query, - source_path=source_path, - k=k, - coarse_k=coarse_k, - options=options, - strategy="staged", - ) - elif effective_method == "dense_rerank": - config.cascade_strategy = "dense_rerank" - result = engine.cascade_search( - query=query, - source_path=source_path, - k=k, - coarse_k=coarse_k, - options=options, - strategy="dense_rerank", - ) - else: - result = engine.search( - query=query, - source_path=source_path, - options=options, - ) - latency_ms = _now_ms() - start_ms - paths_raw = [item.path for item in (result.results or []) if getattr(item, "path", None)] - topk = _dedup_topk((_normalize_path_key(path) for path in paths_raw), k=k) - rank = _first_hit_rank(topk, relevant) - recall = 0.0 - if relevant: - recall = len(set(topk) & relevant) / float(len(relevant)) - return StrategyRun( - strategy_key=strategy_spec.strategy_key, - strategy=strategy_spec.strategy, - stage2_mode=strategy_spec.stage2_mode, - effective_method=effective_method, - execution_method=execution_method, - latency_ms=latency_ms, - topk_paths=topk, - first_hit_rank=rank, - hit_at_k=rank is not None, - recall_at_k=recall, - generated_artifact_count=sum(1 for path in topk if is_generated_artifact_path(path)), - test_file_count=sum(1 for path in topk if is_test_file(path)), - error=None, - ) - except Exception as exc: - latency_ms = _now_ms() - start_ms - return StrategyRun( - strategy_key=strategy_spec.strategy_key, - strategy=strategy_spec.strategy, - stage2_mode=strategy_spec.stage2_mode, - effective_method=effective_method, - execution_method=execution_method, - latency_ms=latency_ms, - topk_paths=[], - first_hit_rank=None, - hit_at_k=False, - recall_at_k=0.0, - generated_artifact_count=0, - test_file_count=0, - error=f"{type(exc).__name__}: {exc}", - ) - finally: - config.cascade_strategy = previous_cascade_strategy - config.staged_stage2_mode = previous_stage2_mode - - -def _summarize_runs(runs: Sequence[StrategyRun]) -> Dict[str, Any]: - latencies = [run.latency_ms for run in runs if not run.error] - ranks = [run.first_hit_rank for run in runs] - effective_method_counts: Dict[str, int] = {} - for run in runs: - effective_method_counts[run.effective_method] = effective_method_counts.get(run.effective_method, 0) + 1 - return { - "query_count": len(runs), - "hit_at_k": _mean([1.0 if run.hit_at_k else 0.0 for run in runs]), - "mrr_at_k": _mrr(ranks), - "avg_recall_at_k": _mean([run.recall_at_k for run in runs]), - "avg_latency_ms": _mean(latencies), - "p50_latency_ms": _percentile(latencies, 0.50), - "p95_latency_ms": _percentile(latencies, 0.95), - "avg_generated_artifact_count": _mean([float(run.generated_artifact_count) for run in runs]), - "avg_test_file_count": _mean([float(run.test_file_count) for run in runs]), - "runs_with_generated_artifacts": sum(1 for run in runs if run.generated_artifact_count > 0), - "runs_with_test_files": sum(1 for run in runs if run.test_file_count > 0), - "effective_methods": effective_method_counts, - "errors": sum(1 for run in runs if run.error), - } - - -def _build_pairwise_deltas(stage2_summaries: Dict[str, Dict[str, Any]]) -> List[PairwiseDelta]: - modes = list(stage2_summaries.keys()) - deltas: List[PairwiseDelta] = [] - for left_index in range(len(modes)): - for right_index in range(left_index + 1, len(modes)): - left = modes[left_index] - right = modes[right_index] - left_summary = stage2_summaries[left] - right_summary = stage2_summaries[right] - deltas.append( - PairwiseDelta( - mode_a=left, - mode_b=right, - hit_at_k_delta=left_summary["hit_at_k"] - right_summary["hit_at_k"], - mrr_at_k_delta=left_summary["mrr_at_k"] - right_summary["mrr_at_k"], - avg_recall_at_k_delta=left_summary["avg_recall_at_k"] - right_summary["avg_recall_at_k"], - avg_latency_ms_delta=left_summary["avg_latency_ms"] - right_summary["avg_latency_ms"], - ) - ) - return deltas - - -def _make_plan_payload( - *, - args: argparse.Namespace, - source_root: Path, - dataset: Sequence[Dict[str, Any]], - baseline_methods: Sequence[str], - stage2_modes: Sequence[str], - strategy_specs: Sequence[StrategySpec], -) -> Dict[str, Any]: - return { - "mode": "dry-run" if args.dry_run else "self-check", - "local_only": True, - "source": str(source_root), - "queries_file": str(args.queries_file), - "query_count": len(dataset), - "query_match": args.query_match, - "k": int(args.k), - "coarse_k": int(args.coarse_k), - "baseline_methods": list(baseline_methods), - "stage2_modes": list(stage2_modes), - "strategy_keys": [spec.strategy_key for spec in strategy_specs], - "local_backends": { - "embedding_backend": args.embedding_backend, - "embedding_model": args.embedding_model, - "reranker_backend": args.reranker_backend, - "reranker_model": args.reranker_model, - "embedding_use_gpu": bool(args.embedding_use_gpu), - "reranker_use_gpu": bool(args.reranker_use_gpu), - }, - "output": str(args.output), - "progress_output": str(args.progress_output) if args.progress_output else None, - "dataset_preview": [ - { - "query": item.get("query"), - "intent": item.get("intent"), - "relevant_paths": item.get("relevant_paths"), - } - for item in list(dataset)[: min(3, len(dataset))] - ], - } - - -def build_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument( - "--source", - type=Path, - default=DEFAULT_SOURCE, - help="Source root to benchmark. Defaults to the repository root so CCW and CodexLens paths resolve together.", - ) - parser.add_argument( - "--queries-file", - type=Path, - default=DEFAULT_QUERIES_FILE, - help="Labeled JSONL dataset of CCW smart_search queries", - ) - parser.add_argument("--query-limit", type=int, default=None, help="Optional query limit") - parser.add_argument( - "--query-match", - type=str, - default=None, - help="Optional case-insensitive substring filter for selecting specific benchmark queries.", - ) - parser.add_argument("--k", type=int, default=10, help="Top-k to evaluate") - parser.add_argument("--coarse-k", type=int, default=100, help="Stage-1 coarse_k") - parser.add_argument( - "--baseline-methods", - nargs="*", - default=list(VALID_BASELINE_METHODS), - help="Requested smart_search baselines to compare before staged modes (valid: auto, fts, hybrid).", - ) - parser.add_argument( - "--stage2-modes", - nargs="*", - default=list(VALID_STAGE2_MODES), - help="Stage-2 modes to compare", - ) - parser.add_argument("--warmup", type=int, default=0, help="Warmup iterations per strategy") - parser.add_argument( - "--embedding-backend", - default="fastembed", - help="Local embedding backend. This runner only accepts fastembed.", - ) - parser.add_argument( - "--embedding-model", - default="code", - help="Embedding model/profile for the local embedding backend", - ) - parser.add_argument( - "--embedding-use-gpu", - action="store_true", - help="Enable GPU acceleration for local embeddings. Off by default for stability.", - ) - parser.add_argument( - "--reranker-backend", - default="onnx", - help="Local reranker backend. Supported local values: onnx, fastembed, legacy.", - ) - parser.add_argument( - "--reranker-model", - default=DEFAULT_LOCAL_ONNX_RERANKER_MODEL, - help="Reranker model name for the local reranker backend", - ) - parser.add_argument( - "--reranker-use-gpu", - action="store_true", - help="Enable GPU acceleration for the local reranker. Off by default for stability.", - ) - parser.add_argument( - "--skip-dense-baseline", - action="store_true", - help="Only compare staged stage2 modes and skip the dense_rerank baseline.", - ) - parser.add_argument( - "--dry-run", - action="store_true", - help="Validate dataset/config and print the benchmark plan without running retrieval.", - ) - parser.add_argument( - "--self-check", - action="store_true", - help="Smoke-check the entrypoint by validating dataset, source paths, and stage matrix wiring.", - ) - parser.add_argument( - "--output", - type=Path, - default=DEFAULT_OUTPUT, - help="Output JSON path", - ) - parser.add_argument( - "--progress-output", - type=Path, - default=None, - help="Optional JSON path updated after each query with partial progress and completed runs.", - ) - return parser - - -def main() -> None: - parser = build_parser() - args = parser.parse_args() - - source_root = args.source.expanduser().resolve() - if not source_root.exists(): - raise SystemExit(f"Source path does not exist: {source_root}") - if int(args.k) <= 0: - raise SystemExit("--k must be > 0") - if int(args.coarse_k) <= 0: - raise SystemExit("--coarse-k must be > 0") - if int(args.coarse_k) < int(args.k): - raise SystemExit("--coarse-k must be >= --k") - if int(args.warmup) < 0: - raise SystemExit("--warmup must be >= 0") - - embedding_backend = str(args.embedding_backend).strip().lower() - reranker_backend = str(args.reranker_backend).strip().lower() - _validate_local_only_backends(embedding_backend, reranker_backend) - baseline_methods = _validate_baseline_methods(args.baseline_methods) - stage2_modes = _validate_stage2_modes(args.stage2_modes) - - dataset = _load_labeled_queries(args.queries_file, None) - dataset = _filter_dataset_by_query_match(dataset, args.query_match) - dataset = _apply_query_limit(dataset, args.query_limit) - if not dataset: - raise SystemExit("No queries to run") - - missing_paths: List[str] = [] - for item in dataset: - _, _, item_missing = _resolve_expected_paths(source_root, [str(path) for path in item["relevant_paths"]]) - missing_paths.extend(item_missing) - if missing_paths: - preview = ", ".join(missing_paths[:3]) - raise SystemExit( - "Dataset relevant_paths do not resolve under the selected source root. " - f"Examples: {preview}" - ) - - strategy_specs = _strategy_specs( - stage2_modes, - include_dense_baseline=not args.skip_dense_baseline, - baseline_methods=baseline_methods, - ) - - if args.dry_run or args.self_check: - payload = _make_plan_payload( - args=args, - source_root=source_root, - dataset=dataset, - baseline_methods=baseline_methods, - stage2_modes=stage2_modes, - strategy_specs=strategy_specs, - ) - if args.self_check: - payload["status"] = "ok" - payload["checks"] = { - "dataset_loaded": True, - "stage2_matrix_size": len(stage2_modes), - "local_only_validation": True, - "source_path_exists": True, - } - print(json.dumps(payload, ensure_ascii=False, indent=2)) - return - - config = Config.load() - config.cascade_strategy = "staged" - config.enable_staged_rerank = True - config.enable_cross_encoder_rerank = True - config.embedding_backend = embedding_backend - config.embedding_model = str(args.embedding_model).strip() - config.embedding_use_gpu = bool(args.embedding_use_gpu) - config.embedding_auto_embed_missing = False - config.reranker_backend = reranker_backend - config.reranker_model = str(args.reranker_model).strip() - config.reranker_use_gpu = bool(args.reranker_use_gpu) - - strategy_runtimes = { - spec.strategy_key: _build_strategy_runtime(config, spec) - for spec in strategy_specs - } - - evaluations: List[QueryEvaluation] = [] - total_queries = len(dataset) - total_runs = total_queries * len(strategy_specs) - completed_runs = 0 - - try: - if int(args.warmup) > 0: - warm_query = str(dataset[0]["query"]).strip() - warm_relevant_paths = [str(path) for path in dataset[0]["relevant_paths"]] - _, warm_relevant, _ = _resolve_expected_paths(source_root, warm_relevant_paths) - for spec in strategy_specs: - runtime = strategy_runtimes[spec.strategy_key] - for _ in range(int(args.warmup)): - _run_strategy( - runtime.engine, - runtime.config, - strategy_spec=spec, - query=warm_query, - source_path=source_root, - k=min(int(args.k), 5), - coarse_k=min(int(args.coarse_k), 50), - relevant=warm_relevant, - ) - - for index, item in enumerate(dataset, start=1): - query = str(item.get("query", "")).strip() - if not query: - continue - print(f"[query {index}/{total_queries}] {query}", flush=True) - relevant_paths, relevant, _ = _resolve_expected_paths( - source_root, - [str(path) for path in item["relevant_paths"]], - ) - runs: Dict[str, StrategyRun] = {} - for spec in strategy_specs: - if args.progress_output is not None: - _write_json_payload( - args.progress_output, - _make_progress_payload( - args=args, - source_root=source_root, - strategy_specs=strategy_specs, - evaluations=evaluations, - query_index=index - 1, - total_queries=total_queries, - run_index=completed_runs, - total_runs=total_runs, - current_query=query, - current_strategy_key=spec.strategy_key, - ), - ) - print( - f"[run {completed_runs + 1}/{total_runs}] " - f"strategy={spec.strategy_key} query={query}", - flush=True, - ) - runtime = strategy_runtimes[spec.strategy_key] - runs[spec.strategy_key] = _run_strategy( - runtime.engine, - runtime.config, - strategy_spec=spec, - query=query, - source_path=source_root, - k=int(args.k), - coarse_k=int(args.coarse_k), - relevant=relevant, - ) - completed_runs += 1 - run = runs[spec.strategy_key] - outcome = "error" if run.error else "ok" - print( - f"[done {completed_runs}/{total_runs}] " - f"strategy={spec.strategy_key} outcome={outcome} " - f"latency_ms={run.latency_ms:.2f} " - f"first_hit_rank={run.first_hit_rank}", - flush=True, - ) - evaluations.append( - QueryEvaluation( - query=query, - intent=str(item.get("intent")) if item.get("intent") is not None else None, - notes=str(item.get("notes")) if item.get("notes") is not None else None, - relevant_paths=relevant_paths, - runs=runs, - ) - ) - if args.progress_output is not None: - _write_json_payload( - args.progress_output, - _make_progress_payload( - args=args, - source_root=source_root, - strategy_specs=strategy_specs, - evaluations=evaluations, - query_index=index, - total_queries=total_queries, - run_index=completed_runs, - total_runs=total_runs, - current_query=query, - current_strategy_key="complete", - ), - ) - finally: - for runtime in strategy_runtimes.values(): - try: - runtime.engine.close() - except Exception: - pass - for runtime in strategy_runtimes.values(): - try: - runtime.registry.close() - except Exception: - pass - - strategy_summaries: Dict[str, Dict[str, Any]] = {} - for spec in strategy_specs: - spec_runs = [evaluation.runs[spec.strategy_key] for evaluation in evaluations if spec.strategy_key in evaluation.runs] - summary = _summarize_runs(spec_runs) - summary["strategy"] = spec.strategy - summary["stage2_mode"] = spec.stage2_mode - strategy_summaries[spec.strategy_key] = summary - - stage2_mode_matrix = { - mode: strategy_summaries[f"staged:{mode}"] - for mode in stage2_modes - if f"staged:{mode}" in strategy_summaries - } - pairwise_deltas = [asdict(item) for item in _build_pairwise_deltas(stage2_mode_matrix)] - - payload = { - "status": "completed", - "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), - "source": str(source_root), - "queries_file": str(args.queries_file), - "query_count": len(evaluations), - "query_match": args.query_match, - "k": int(args.k), - "coarse_k": int(args.coarse_k), - "local_only": True, - "strategies": strategy_summaries, - "stage2_mode_matrix": stage2_mode_matrix, - "pairwise_stage2_deltas": pairwise_deltas, - "config": { - "embedding_backend": config.embedding_backend, - "embedding_model": config.embedding_model, - "embedding_use_gpu": bool(config.embedding_use_gpu), - "reranker_backend": config.reranker_backend, - "reranker_model": config.reranker_model, - "reranker_use_gpu": bool(config.reranker_use_gpu), - "enable_staged_rerank": bool(config.enable_staged_rerank), - "enable_cross_encoder_rerank": bool(config.enable_cross_encoder_rerank), - }, - "progress_output": str(args.progress_output) if args.progress_output else None, - "evaluations": [ - { - "query": evaluation.query, - "intent": evaluation.intent, - "notes": evaluation.notes, - "relevant_paths": evaluation.relevant_paths, - "runs": {key: asdict(run) for key, run in evaluation.runs.items()}, - } - for evaluation in evaluations - ], - } - - _write_final_outputs( - output_path=args.output, - progress_output=args.progress_output, - payload=payload, - ) - print(json.dumps(payload, ensure_ascii=False, indent=2)) - - -if __name__ == "__main__": - main() diff --git a/codex-lens/benchmarks/compare_semantic_methods.py b/codex-lens/benchmarks/compare_semantic_methods.py deleted file mode 100644 index da7b4873..00000000 --- a/codex-lens/benchmarks/compare_semantic_methods.py +++ /dev/null @@ -1,405 +0,0 @@ -"""Compare Binary Cascade and Vector semantic search methods. - -This script compares the two semantic retrieval approaches: -1. Binary Cascade: 256-bit binary vectors for coarse ranking -2. Vector Dense: Full semantic embeddings with cosine similarity -""" - -import sys -import time -from pathlib import Path - -# Add src to path -sys.path.insert(0, str(Path(__file__).parent.parent / "src")) - -from codexlens.storage.dir_index import DirIndexStore -from codexlens.semantic.vector_store import VectorStore - - -def get_filename(path: str) -> str: - """Extract filename from path.""" - if "\\" in path: - return path.split("\\")[-1] - elif "/" in path: - return path.split("/")[-1] - return path - - -def find_binary_indexes(index_root: Path): - """Find all binary index files.""" - return list(index_root.rglob("_index_binary_vectors.bin")) - - -# Test queries for semantic search comparison -TEST_QUERIES = [ - "how to search code semantically", - "embedding generation for files", - "hybrid search with multiple backends", - "parse python source code", - "database storage for vectors", -] - -# Index paths -INDEX_ROOT = Path(r"C:\Users\dyw\.codexlens\indexes\D\Claude_dms3\codex-lens") - - -def test_vector_search(query: str, limit: int = 10): - """Test dense vector search.""" - try: - from codexlens.semantic.factory import get_embedder - - # Find an index with embeddings - all_results = [] - total_time = 0 - - for index_db in INDEX_ROOT.rglob("_index.db"): - vector_store = VectorStore(index_db) - - if vector_store.count_chunks() == 0: - continue - - # Get embedder based on stored config - model_config = vector_store.get_model_config() - if model_config: - backend = model_config.get("backend", "fastembed") - model_name = model_config["model_name"] - model_profile = model_config["model_profile"] - if backend == "litellm": - embedder = get_embedder(backend="litellm", model=model_name) - else: - embedder = get_embedder(backend="fastembed", profile=model_profile) - else: - embedder = get_embedder(backend="fastembed", profile="code") - - start = time.perf_counter() - query_embedding = embedder.embed_single(query) - results = vector_store.search_similar( - query_embedding=query_embedding, - top_k=limit, - min_score=0.0, - return_full_content=True, - ) - total_time += (time.perf_counter() - start) * 1000 - all_results.extend(results) - - # Only need one successful search to get embedder initialized - if results: - break - - # Sort by score and limit - all_results.sort(key=lambda x: x.score, reverse=True) - return all_results[:limit], total_time, None - except Exception as e: - return [], 0, str(e) - - - -def test_binary_cascade_search(query: str, limit: int = 10): - """Test binary cascade search (binary coarse + dense fine ranking).""" - try: - from codexlens.semantic.ann_index import BinaryANNIndex - from codexlens.indexing.embedding import CascadeEmbeddingBackend - import numpy as np - import sqlite3 - - # Find binary indexes - binary_indexes = find_binary_indexes(INDEX_ROOT) - if not binary_indexes: - return [], 0, "No binary indexes found. Run 'codexlens cascade-index' first." - - start = time.perf_counter() - - # Initialize cascade backend for query encoding - cascade_backend = CascadeEmbeddingBackend() - - # Encode query to binary and dense - binary_embeddings, dense_embeddings = cascade_backend.encode_cascade([query], batch_size=1) - query_binary = binary_embeddings[0] - query_dense = dense_embeddings[0] - - all_results = [] - - for binary_index_path in binary_indexes: - # Find corresponding index.db - index_db = binary_index_path.parent / "_index.db" - if not index_db.exists(): - continue - - # Check if cascade embeddings exist - conn = sqlite3.connect(index_db) - conn.row_factory = sqlite3.Row - try: - cursor = conn.execute( - "SELECT COUNT(*) FROM semantic_chunks WHERE embedding_binary IS NOT NULL" - ) - binary_count = cursor.fetchone()[0] - if binary_count == 0: - conn.close() - continue - except Exception: - conn.close() - continue - - # Stage 1: Binary coarse search - binary_index = BinaryANNIndex(index_db, dim=256) - try: - binary_index.load() - except Exception: - conn.close() - continue - - # Pack query for binary search - from codexlens.indexing.embedding import pack_binary_embedding - query_binary_packed = pack_binary_embedding(query_binary) - - # Get top candidates - coarse_limit = min(limit * 10, 100) - # search returns (ids, distances) tuple - coarse_ids, coarse_distances = binary_index.search(query_binary_packed, top_k=coarse_limit) - - if not coarse_ids: - conn.close() - continue - - # Stage 2: Dense reranking - chunk_ids = coarse_ids - placeholders = ",".join("?" * len(chunk_ids)) - - cursor = conn.execute( - f""" - SELECT id, file_path, content, embedding_dense - FROM semantic_chunks - WHERE id IN ({placeholders}) AND embedding_dense IS NOT NULL - """, - chunk_ids - ) - rows = cursor.fetchall() - - # Compute dense scores - for row in rows: - chunk_id = row["id"] - file_path = row["file_path"] - content = row["content"] - dense_blob = row["embedding_dense"] - - if dense_blob: - dense_vec = np.frombuffer(dense_blob, dtype=np.float32) - # Cosine similarity - score = float(np.dot(query_dense, dense_vec) / ( - np.linalg.norm(query_dense) * np.linalg.norm(dense_vec) + 1e-8 - )) - else: - score = 0.0 - - all_results.append({ - "path": file_path, - "score": score, - "content": content[:200] + "..." if len(content) > 200 else content, - }) - - conn.close() - - # Sort by dense score and limit - all_results.sort(key=lambda x: x["score"], reverse=True) - final_results = all_results[:limit] - - elapsed = (time.perf_counter() - start) * 1000 - - return final_results, elapsed, None - except ImportError as e: - return [], 0, f"Import error: {e}" - except Exception as e: - import traceback - return [], 0, f"{str(e)}\n{traceback.format_exc()}" - - -def print_results(method_name: str, results, elapsed: float, error: str = None): - """Print search results in a formatted way.""" - print(f"\n{'='*60}") - print(f"Method: {method_name}") - print(f"{'='*60}") - - if error: - print(f"ERROR: {error}") - return - - print(f"Results: {len(results)}, Time: {elapsed:.1f}ms") - print("-" * 60) - - for i, r in enumerate(results[:5], 1): - if isinstance(r, dict): - path = r.get("path", "?") - score = r.get("score", 0) - content = r.get("content", "")[:80] - else: - path = getattr(r, "path", "?") - score = getattr(r, "score", 0) - content = getattr(r, "content", "")[:80] if hasattr(r, "content") else "" - - filename = get_filename(path) - print(f" {i}. [{score:.4f}] {filename}") - if content: - # Sanitize content for console output - safe_content = content.encode('ascii', 'replace').decode('ascii') - print(f" {safe_content}...") - - -def compare_overlap(results1, results2, name1: str, name2: str): - """Compare result overlap between two methods.""" - def get_paths(results): - paths = set() - for r in results[:10]: - if isinstance(r, dict): - paths.add(r.get("path", "")) - else: - paths.add(getattr(r, "path", "")) - return paths - - paths1 = get_paths(results1) - paths2 = get_paths(results2) - - if not paths1 or not paths2: - return 0.0 - - overlap = len(paths1 & paths2) - union = len(paths1 | paths2) - jaccard = overlap / union if union > 0 else 0.0 - - print(f" {name1} vs {name2}: {overlap} common files (Jaccard: {jaccard:.2f})") - return jaccard - - -def main(): - print("=" * 70) - print("SEMANTIC SEARCH METHODS COMPARISON") - print("Binary Cascade vs Vector Dense") - print("=" * 70) - - # Check prerequisites - print("\n[Prerequisites Check]") - print(f" Index Root: {INDEX_ROOT}") - - binary_indexes = find_binary_indexes(INDEX_ROOT) - print(f" Binary Indexes: {len(binary_indexes)} found") - for bi in binary_indexes[:3]: - print(f" - {bi.parent.name}/{bi.name}") - if len(binary_indexes) > 3: - print(f" ... and {len(binary_indexes) - 3} more") - - # Aggregate statistics - all_results = { - "binary": {"total_results": 0, "total_time": 0, "queries": 0, "errors": []}, - "vector": {"total_results": 0, "total_time": 0, "queries": 0, "errors": []}, - } - - overlap_scores = {"binary_vector": []} - - for query in TEST_QUERIES: - print(f"\n{'#'*70}") - print(f"QUERY: \"{query}\"") - print("#" * 70) - - # Test each method - binary_results, binary_time, binary_err = test_binary_cascade_search(query) - vector_results, vector_time, vector_err = test_vector_search(query) - - # Print results - print_results("Binary Cascade (256-bit + Dense Rerank)", binary_results, binary_time, binary_err) - print_results("Vector Dense (Semantic Embeddings)", vector_results, vector_time, vector_err) - - # Update statistics - if not binary_err: - all_results["binary"]["total_results"] += len(binary_results) - all_results["binary"]["total_time"] += binary_time - all_results["binary"]["queries"] += 1 - else: - all_results["binary"]["errors"].append(binary_err) - - if not vector_err: - all_results["vector"]["total_results"] += len(vector_results) - all_results["vector"]["total_time"] += vector_time - all_results["vector"]["queries"] += 1 - else: - all_results["vector"]["errors"].append(vector_err) - - # Compare overlap - print("\n[Result Overlap Analysis]") - if binary_results and vector_results: - j = compare_overlap(binary_results, vector_results, "Binary", "Vector") - overlap_scores["binary_vector"].append(j) - - # Print summary - print("\n" + "=" * 70) - print("SUMMARY STATISTICS") - print("=" * 70) - - for method, stats in all_results.items(): - queries = stats["queries"] - if queries > 0: - avg_results = stats["total_results"] / queries - avg_time = stats["total_time"] / queries - print(f"\n{method.upper()}:") - print(f" Successful queries: {queries}/{len(TEST_QUERIES)}") - print(f" Avg results: {avg_results:.1f}") - print(f" Avg time: {avg_time:.1f}ms") - else: - print(f"\n{method.upper()}: No successful queries") - if stats["errors"]: - # Show truncated error - err = stats["errors"][0] - if len(err) > 200: - err = err[:200] + "..." - print(f" Error: {err}") - - print("\n[Average Overlap Scores]") - for pair, scores in overlap_scores.items(): - if scores: - avg = sum(scores) / len(scores) - print(f" {pair}: {avg:.3f}") - - print("\n" + "=" * 70) - print("ANALYSIS") - print("=" * 70) - - # Analyze working methods - working_methods = [m for m, s in all_results.items() if s["queries"] > 0] - - if len(working_methods) == 2: - # All methods working - compare quality - print("\nBoth methods working. Quality comparison:") - - # Compare avg results - print("\n Result Coverage (higher = more recall):") - for m in ["vector", "binary"]: - stats = all_results[m] - if stats["queries"] > 0: - avg = stats["total_results"] / stats["queries"] - print(f" {m.upper()}: {avg:.1f} results/query") - - # Compare speed - print("\n Speed (lower = faster):") - for m in ["binary", "vector"]: - stats = all_results[m] - if stats["queries"] > 0: - avg = stats["total_time"] / stats["queries"] - print(f" {m.upper()}: {avg:.1f}ms") - - # Recommend fusion strategy - print("\n Recommended Fusion Strategy:") - print(" For quality-focused hybrid search:") - print(" 1. Run both methods in parallel") - print(" 2. Use RRF fusion with weights:") - print(" - Vector: 0.6 (best semantic understanding)") - print(" - Binary: 0.4 (fast coarse filtering)") - print(" 3. Apply CrossEncoder reranking on top-50") - - elif len(working_methods) >= 2: - print(f"\n{len(working_methods)} methods working: {', '.join(working_methods)}") - print("Consider fixing missing method for complete hybrid search.") - else: - print(f"\nOnly {working_methods[0] if working_methods else 'no'} method(s) working.") - print("Check your index setup.") - - -if __name__ == "__main__": - main() diff --git a/codex-lens/benchmarks/compare_staged_realtime_vs_dense_rerank.py b/codex-lens/benchmarks/compare_staged_realtime_vs_dense_rerank.py deleted file mode 100644 index fb6b26a1..00000000 --- a/codex-lens/benchmarks/compare_staged_realtime_vs_dense_rerank.py +++ /dev/null @@ -1,393 +0,0 @@ -#!/usr/bin/env python -"""Compare staged realtime LSP pipeline vs direct dense->rerank cascade. - -This benchmark compares two retrieval pipelines: -1) staged+realtime: coarse (binary or dense fallback) -> realtime LSP graph expand -> clustering -> rerank -2) dense_rerank: dense ANN coarse -> cross-encoder rerank - -Because most repos do not have ground-truth labels, this script reports: -- latency statistics -- top-k overlap metrics (Jaccard + RBO) -- diversity proxies (unique files/dirs) -- staged pipeline stage stats (if present) - -Usage: - python benchmarks/compare_staged_realtime_vs_dense_rerank.py --source ./src - python benchmarks/compare_staged_realtime_vs_dense_rerank.py --queries-file benchmarks/queries.txt -""" - -from __future__ import annotations - -import argparse -import gc -import json -import os -import re -import statistics -import sys -import time -from dataclasses import asdict, dataclass -from pathlib import Path -from typing import Any, Dict, Iterable, List, Optional, Tuple - -# Add src to path (match other benchmark scripts) -sys.path.insert(0, str(Path(__file__).parent.parent / "src")) - -from codexlens.config import Config -from codexlens.search.chain_search import ChainSearchEngine, SearchOptions -from codexlens.storage.path_mapper import PathMapper -from codexlens.storage.registry import RegistryStore - - -DEFAULT_QUERIES = [ - "class Config", - "def search", - "LspBridge", - "graph expansion", - "clustering strategy", - "error handling", - "how to parse json", -] - - -def _now_ms() -> float: - return time.perf_counter() * 1000.0 - - -def _safe_relpath(path: str, root: Path) -> str: - try: - return str(Path(path).resolve().relative_to(root.resolve())) - except Exception: - return path - - -def _normalize_path_key(path: str) -> str: - """Normalize file paths for overlap/dedup metrics (Windows-safe).""" - try: - p = Path(path) - # Don't explode on non-files like "". - if str(p) and (p.is_absolute() or re.match(r"^[A-Za-z]:", str(p))): - norm = str(p.resolve()) - else: - norm = str(p) - except Exception: - norm = path - norm = norm.replace("/", "\\") - if os.name == "nt": - norm = norm.lower() - return norm - - -def _extract_stage_stats(errors: List[str]) -> Optional[Dict[str, Any]]: - """Extract STAGE_STATS JSON blob from SearchStats.errors.""" - for item in errors or []: - if not isinstance(item, str): - continue - if not item.startswith("STAGE_STATS:"): - continue - payload = item[len("STAGE_STATS:") :] - try: - return json.loads(payload) - except Exception: - return None - return None - - -def jaccard_topk(a: List[str], b: List[str]) -> float: - sa, sb = set(a), set(b) - if not sa and not sb: - return 1.0 - if not sa or not sb: - return 0.0 - return len(sa & sb) / len(sa | sb) - - -def rbo(a: List[str], b: List[str], p: float = 0.9) -> float: - """Rank-biased overlap for two ranked lists.""" - if p <= 0.0 or p >= 1.0: - raise ValueError("p must be in (0, 1)") - if not a and not b: - return 1.0 - - depth = max(len(a), len(b)) - seen_a: set[str] = set() - seen_b: set[str] = set() - - score = 0.0 - for d in range(1, depth + 1): - if d <= len(a): - seen_a.add(a[d - 1]) - if d <= len(b): - seen_b.add(b[d - 1]) - overlap = len(seen_a & seen_b) - score += (overlap / d) * ((1.0 - p) * (p ** (d - 1))) - return score - - -def _unique_parent_dirs(paths: Iterable[str]) -> int: - dirs = set() - for p in paths: - try: - dirs.add(str(Path(p).parent)) - except Exception: - continue - return len(dirs) - - -@dataclass -class RunDetail: - strategy: str - query: str - latency_ms: float - num_results: int - topk_paths: List[str] - stage_stats: Optional[Dict[str, Any]] = None - error: Optional[str] = None - - -@dataclass -class CompareDetail: - query: str - staged: RunDetail - dense_rerank: RunDetail - jaccard_topk: float - rbo_topk: float - staged_unique_files_topk: int - dense_unique_files_topk: int - staged_unique_dirs_topk: int - dense_unique_dirs_topk: int - - -def _run_once( - engine: ChainSearchEngine, - query: str, - source_path: Path, - *, - strategy: str, - k: int, - coarse_k: int, - options: Optional[SearchOptions] = None, -) -> RunDetail: - gc.collect() - start_ms = _now_ms() - try: - result = engine.cascade_search( - query=query, - source_path=source_path, - k=k, - coarse_k=coarse_k, - options=options, - strategy=strategy, - ) - latency_ms = _now_ms() - start_ms - paths_raw = [r.path for r in (result.results or []) if getattr(r, "path", None)] - paths = [_normalize_path_key(p) for p in paths_raw] - topk: List[str] = [] - seen: set[str] = set() - for p in paths: - if p in seen: - continue - seen.add(p) - topk.append(p) - if len(topk) >= k: - break - stage_stats = _extract_stage_stats(getattr(result.stats, "errors", [])) - return RunDetail( - strategy=strategy, - query=query, - latency_ms=latency_ms, - num_results=len(paths), - topk_paths=topk, - stage_stats=stage_stats, - ) - except Exception as exc: - latency_ms = _now_ms() - start_ms - return RunDetail( - strategy=strategy, - query=query, - latency_ms=latency_ms, - num_results=0, - topk_paths=[], - stage_stats=None, - error=repr(exc), - ) - - -def _load_queries(path: Optional[Path], limit: Optional[int]) -> List[str]: - if path is None: - queries = list(DEFAULT_QUERIES) - else: - raw = path.read_text(encoding="utf-8", errors="ignore").splitlines() - queries = [] - for line in raw: - line = line.strip() - if not line or line.startswith("#"): - continue - queries.append(line) - if limit is not None: - return queries[:limit] - return queries - - -def main() -> None: - parser = argparse.ArgumentParser( - description="Compare staged realtime LSP pipeline vs direct dense_rerank cascade" - ) - parser.add_argument( - "--source", - type=Path, - default=Path(__file__).parent.parent / "src", - help="Source directory to search (default: ./src)", - ) - parser.add_argument( - "--queries-file", - type=Path, - default=None, - help="Optional file with one query per line (# comments supported)", - ) - parser.add_argument("--queries", type=int, default=None, help="Limit number of queries") - parser.add_argument("--k", type=int, default=10, help="Final result count (default 10)") - parser.add_argument("--coarse-k", type=int, default=100, help="Coarse candidates (default 100)") - parser.add_argument("--warmup", type=int, default=1, help="Warmup runs per strategy (default 1)") - parser.add_argument( - "--staged-cluster-strategy", - type=str, - default=None, - help="Override Config.staged_clustering_strategy for staged pipeline (e.g. auto, dir_rr, score, path)", - ) - parser.add_argument( - "--output", - type=Path, - default=Path(__file__).parent / "results" / "staged_realtime_vs_dense_rerank.json", - help="Output JSON path", - ) - args = parser.parse_args() - - if not args.source.exists(): - raise SystemExit(f"Source path does not exist: {args.source}") - - queries = _load_queries(args.queries_file, args.queries) - if not queries: - raise SystemExit("No queries to run") - - # Match CLI behavior: load settings + apply global/workspace .env overrides. - # This is important on Windows where ONNX/DirectML can sometimes crash under load; - # many users pin EMBEDDING_BACKEND=litellm in ~/.codexlens/.env for stability. - config = Config.load() - config.cascade_strategy = "staged" - config.staged_stage2_mode = "realtime" - config.enable_staged_rerank = True - if args.staged_cluster_strategy: - config.staged_clustering_strategy = str(args.staged_cluster_strategy) - # Stability: on some Windows setups, fastembed + DirectML can crash under load. - # Force local embeddings and reranking onto CPU for reproducible benchmark runs. - config.embedding_use_gpu = False - config.reranker_use_gpu = False - registry = RegistryStore() - registry.initialize() - mapper = PathMapper() - engine = ChainSearchEngine(registry=registry, mapper=mapper, config=config) - - try: - strategies = ["staged", "dense_rerank"] - - # Warmup - if args.warmup > 0: - warm_query = queries[0] - for s in strategies: - for _ in range(args.warmup): - try: - _run_once( - engine, - warm_query, - args.source, - strategy=s, - k=min(args.k, 5), - coarse_k=min(args.coarse_k, 50), - ) - except Exception: - pass - - comparisons: List[CompareDetail] = [] - - for i, query in enumerate(queries, start=1): - print(f"[{i}/{len(queries)}] {query}") - - staged = _run_once( - engine, - query, - args.source, - strategy="staged", - k=args.k, - coarse_k=args.coarse_k, - ) - dense = _run_once( - engine, - query, - args.source, - strategy="dense_rerank", - k=args.k, - coarse_k=args.coarse_k, - ) - - staged_paths = staged.topk_paths - dense_paths = dense.topk_paths - - comparisons.append( - CompareDetail( - query=query, - staged=staged, - dense_rerank=dense, - jaccard_topk=jaccard_topk(staged_paths, dense_paths), - rbo_topk=rbo(staged_paths, dense_paths, p=0.9), - staged_unique_files_topk=len(set(staged_paths)), - dense_unique_files_topk=len(set(dense_paths)), - staged_unique_dirs_topk=_unique_parent_dirs(staged_paths), - dense_unique_dirs_topk=_unique_parent_dirs(dense_paths), - ) - ) - - def _latencies(details: List[RunDetail]) -> List[float]: - return [d.latency_ms for d in details if not d.error] - - staged_runs = [c.staged for c in comparisons] - dense_runs = [c.dense_rerank for c in comparisons] - - summary = { - "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), - "source": str(args.source), - "k": args.k, - "coarse_k": args.coarse_k, - "query_count": len(comparisons), - "avg_jaccard_topk": statistics.mean([c.jaccard_topk for c in comparisons]) if comparisons else 0.0, - "avg_rbo_topk": statistics.mean([c.rbo_topk for c in comparisons]) if comparisons else 0.0, - "staged": { - "success": sum(1 for r in staged_runs if not r.error), - "avg_latency_ms": statistics.mean(_latencies(staged_runs)) if _latencies(staged_runs) else 0.0, - }, - "dense_rerank": { - "success": sum(1 for r in dense_runs if not r.error), - "avg_latency_ms": statistics.mean(_latencies(dense_runs)) if _latencies(dense_runs) else 0.0, - }, - } - - args.output.parent.mkdir(parents=True, exist_ok=True) - payload = { - "summary": summary, - "comparisons": [asdict(c) for c in comparisons], - } - args.output.write_text(json.dumps(payload, indent=2), encoding="utf-8") - print(f"\nSaved: {args.output}") - finally: - try: - engine.close() - except Exception as exc: - print(f"WARNING engine.close() failed: {exc!r}", file=sys.stderr) - try: - registry.close() - except Exception as exc: - print(f"WARNING registry.close() failed: {exc!r}", file=sys.stderr) - - -if __name__ == "__main__": - main() diff --git a/codex-lens/benchmarks/compare_staged_stage2_modes.py b/codex-lens/benchmarks/compare_staged_stage2_modes.py deleted file mode 100644 index 893b988c..00000000 --- a/codex-lens/benchmarks/compare_staged_stage2_modes.py +++ /dev/null @@ -1,391 +0,0 @@ -#!/usr/bin/env python -"""Compare staged cascade Stage-2 modes (precomputed vs realtime vs static graph). - -This benchmark compares the *same* staged cascade strategy with different Stage-2 -expansion sources: - -1) precomputed: per-dir `graph_neighbors` expansion (fast, index-local) -2) realtime: live LSP graph expansion (contextual, requires LSP availability) -3) static_global_graph: global_relationships expansion (project-wide, requires static graph indexing) - -Because most repos do not have ground-truth labels, this script reports: -- latency statistics per mode -- top-k overlap metrics (Jaccard + RBO) between modes -- diversity proxies (unique files/dirs) -- staged pipeline stage stats (when present) - -Usage: - python benchmarks/compare_staged_stage2_modes.py --source ./src - python benchmarks/compare_staged_stage2_modes.py --queries-file benchmarks/queries.txt -""" - -from __future__ import annotations - -import argparse -import gc -import json -import os -import re -import statistics -import sys -import time -from dataclasses import asdict, dataclass -from pathlib import Path -from typing import Any, Dict, Iterable, List, Optional, Tuple - -# Add src to path (match other benchmark scripts) -sys.path.insert(0, str(Path(__file__).parent.parent / "src")) - -from codexlens.config import Config -from codexlens.search.chain_search import ChainSearchEngine -from codexlens.storage.path_mapper import PathMapper -from codexlens.storage.registry import RegistryStore - - -DEFAULT_QUERIES = [ - "class Config", - "def search", - "LspBridge", - "graph expansion", - "static graph relationships", - "clustering strategy", - "error handling", -] - - -VALID_STAGE2_MODES = ("precomputed", "realtime", "static_global_graph") - - -def _now_ms() -> float: - return time.perf_counter() * 1000.0 - - -def _normalize_path_key(path: str) -> str: - """Normalize file paths for overlap/dedup metrics (Windows-safe).""" - try: - p = Path(path) - if str(p) and (p.is_absolute() or re.match(r"^[A-Za-z]:", str(p))): - norm = str(p.resolve()) - else: - norm = str(p) - except Exception: - norm = path - norm = norm.replace("/", "\\") - if os.name == "nt": - norm = norm.lower() - return norm - - -def _extract_stage_stats(errors: List[str]) -> Optional[Dict[str, Any]]: - """Extract STAGE_STATS JSON blob from SearchStats.errors.""" - for item in errors or []: - if not isinstance(item, str): - continue - if not item.startswith("STAGE_STATS:"): - continue - payload = item[len("STAGE_STATS:") :] - try: - return json.loads(payload) - except Exception: - return None - return None - - -def jaccard_topk(a: List[str], b: List[str]) -> float: - sa, sb = set(a), set(b) - if not sa and not sb: - return 1.0 - if not sa or not sb: - return 0.0 - return len(sa & sb) / len(sa | sb) - - -def rbo(a: List[str], b: List[str], p: float = 0.9) -> float: - """Rank-biased overlap for two ranked lists.""" - if p <= 0.0 or p >= 1.0: - raise ValueError("p must be in (0, 1)") - if not a and not b: - return 1.0 - - depth = max(len(a), len(b)) - seen_a: set[str] = set() - seen_b: set[str] = set() - - score = 0.0 - for d in range(1, depth + 1): - if d <= len(a): - seen_a.add(a[d - 1]) - if d <= len(b): - seen_b.add(b[d - 1]) - overlap = len(seen_a & seen_b) - score += (overlap / d) * ((1.0 - p) * (p ** (d - 1))) - return score - - -def _unique_parent_dirs(paths: Iterable[str]) -> int: - dirs = set() - for p in paths: - try: - dirs.add(str(Path(p).parent)) - except Exception: - continue - return len(dirs) - - -def _load_queries(path: Optional[Path], inline: Optional[List[str]]) -> List[str]: - if inline: - return [q.strip() for q in inline if isinstance(q, str) and q.strip()] - if path: - if not path.exists(): - raise SystemExit(f"Queries file does not exist: {path}") - raw = path.read_text(encoding="utf-8", errors="ignore") - queries = [line.strip() for line in raw.splitlines() if line.strip() and not line.strip().startswith("#")] - return queries - return list(DEFAULT_QUERIES) - - -@dataclass -class RunDetail: - stage2_mode: str - query: str - latency_ms: float - num_results: int - topk_paths: List[str] - stage_stats: Optional[Dict[str, Any]] = None - error: Optional[str] = None - - -@dataclass -class PairwiseCompare: - query: str - mode_a: str - mode_b: str - jaccard_topk: float - rbo_topk: float - a_unique_files_topk: int - b_unique_files_topk: int - a_unique_dirs_topk: int - b_unique_dirs_topk: int - - -def _run_once( - engine: ChainSearchEngine, - config: Config, - query: str, - source_path: Path, - *, - stage2_mode: str, - k: int, - coarse_k: int, -) -> RunDetail: - if stage2_mode not in VALID_STAGE2_MODES: - raise ValueError(f"Invalid stage2_mode: {stage2_mode}") - - # Mutate config for this run; ChainSearchEngine reads config fields per-call. - config.staged_stage2_mode = stage2_mode - - gc.collect() - start_ms = _now_ms() - try: - result = engine.cascade_search( - query=query, - source_path=source_path, - k=k, - coarse_k=coarse_k, - strategy="staged", - ) - latency_ms = _now_ms() - start_ms - paths_raw = [r.path for r in (result.results or []) if getattr(r, "path", None)] - paths = [_normalize_path_key(p) for p in paths_raw] - - topk: List[str] = [] - seen: set[str] = set() - for p in paths: - if p in seen: - continue - seen.add(p) - topk.append(p) - if len(topk) >= k: - break - - stage_stats = None - try: - stage_stats = _extract_stage_stats(getattr(result.stats, "errors", []) or []) - except Exception: - stage_stats = None - - return RunDetail( - stage2_mode=stage2_mode, - query=query, - latency_ms=latency_ms, - num_results=len(result.results or []), - topk_paths=topk, - stage_stats=stage_stats, - error=None, - ) - except Exception as exc: - return RunDetail( - stage2_mode=stage2_mode, - query=query, - latency_ms=_now_ms() - start_ms, - num_results=0, - topk_paths=[], - stage_stats=None, - error=str(exc), - ) - - -def main() -> None: - parser = argparse.ArgumentParser(description="Compare staged Stage-2 expansion modes.") - parser.add_argument("--source", type=Path, default=Path.cwd(), help="Project path to search") - parser.add_argument("--queries-file", type=Path, default=None, help="Optional newline-delimited queries file") - parser.add_argument("--queries", nargs="*", default=None, help="Inline queries (overrides queries-file)") - parser.add_argument("--k", type=int, default=20, help="Top-k to evaluate") - parser.add_argument("--coarse-k", type=int, default=100, help="Stage-1 coarse_k") - parser.add_argument( - "--stage2-modes", - nargs="*", - default=list(VALID_STAGE2_MODES), - help="Stage-2 modes to compare", - ) - parser.add_argument("--warmup", type=int, default=0, help="Warmup iterations per mode") - parser.add_argument( - "--output", - type=Path, - default=Path(__file__).parent / "results" / "staged_stage2_modes.json", - help="Output JSON path", - ) - args = parser.parse_args() - - if not args.source.exists(): - raise SystemExit(f"Source path does not exist: {args.source}") - - stage2_modes = [str(m).strip().lower() for m in (args.stage2_modes or []) if str(m).strip()] - for m in stage2_modes: - if m not in VALID_STAGE2_MODES: - raise SystemExit(f"Invalid --stage2-modes entry: {m} (valid: {', '.join(VALID_STAGE2_MODES)})") - - queries = _load_queries(args.queries_file, args.queries) - if not queries: - raise SystemExit("No queries to run") - - # Match CLI behavior: load settings + apply global/workspace .env overrides. - config = Config.load() - config.cascade_strategy = "staged" - config.enable_staged_rerank = True - config.embedding_use_gpu = False # stability on some Windows setups - - registry = RegistryStore() - registry.initialize() - mapper = PathMapper() - engine = ChainSearchEngine(registry=registry, mapper=mapper, config=config) - - try: - # Warmup - if args.warmup > 0: - warm_query = queries[0] - for mode in stage2_modes: - for _ in range(args.warmup): - try: - _run_once( - engine, - config, - warm_query, - args.source, - stage2_mode=mode, - k=min(args.k, 5), - coarse_k=min(args.coarse_k, 50), - ) - except Exception: - pass - - per_query: Dict[str, Dict[str, RunDetail]] = {} - runs: List[RunDetail] = [] - comparisons: List[PairwiseCompare] = [] - - for i, query in enumerate(queries, start=1): - print(f"[{i}/{len(queries)}] {query}") - per_query[query] = {} - - for mode in stage2_modes: - detail = _run_once( - engine, - config, - query, - args.source, - stage2_mode=mode, - k=args.k, - coarse_k=args.coarse_k, - ) - per_query[query][mode] = detail - runs.append(detail) - - # Pairwise overlaps for this query - for a_idx in range(len(stage2_modes)): - for b_idx in range(a_idx + 1, len(stage2_modes)): - mode_a = stage2_modes[a_idx] - mode_b = stage2_modes[b_idx] - a = per_query[query][mode_a] - b = per_query[query][mode_b] - comparisons.append( - PairwiseCompare( - query=query, - mode_a=mode_a, - mode_b=mode_b, - jaccard_topk=jaccard_topk(a.topk_paths, b.topk_paths), - rbo_topk=rbo(a.topk_paths, b.topk_paths, p=0.9), - a_unique_files_topk=len(set(a.topk_paths)), - b_unique_files_topk=len(set(b.topk_paths)), - a_unique_dirs_topk=_unique_parent_dirs(a.topk_paths), - b_unique_dirs_topk=_unique_parent_dirs(b.topk_paths), - ) - ) - - def _latencies(details: List[RunDetail]) -> List[float]: - return [d.latency_ms for d in details if not d.error] - - mode_summaries: Dict[str, Dict[str, Any]] = {} - for mode in stage2_modes: - mode_runs = [r for r in runs if r.stage2_mode == mode] - lat = _latencies(mode_runs) - mode_summaries[mode] = { - "success": sum(1 for r in mode_runs if not r.error), - "avg_latency_ms": statistics.mean(lat) if lat else 0.0, - "p50_latency_ms": statistics.median(lat) if lat else 0.0, - "p95_latency_ms": statistics.quantiles(lat, n=20)[18] if len(lat) >= 2 else (lat[0] if lat else 0.0), - } - - summary = { - "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), - "source": str(args.source), - "k": args.k, - "coarse_k": args.coarse_k, - "query_count": len(queries), - "stage2_modes": stage2_modes, - "modes": mode_summaries, - "avg_pairwise_jaccard_topk": statistics.mean([c.jaccard_topk for c in comparisons]) if comparisons else 0.0, - "avg_pairwise_rbo_topk": statistics.mean([c.rbo_topk for c in comparisons]) if comparisons else 0.0, - } - - args.output.parent.mkdir(parents=True, exist_ok=True) - payload = { - "summary": summary, - "runs": [asdict(r) for r in runs], - "comparisons": [asdict(c) for c in comparisons], - } - args.output.write_text(json.dumps(payload, indent=2), encoding="utf-8") - print(f"\nSaved: {args.output}") - finally: - try: - engine.close() - except Exception as exc: - print(f"WARNING engine.close() failed: {exc!r}", file=sys.stderr) - try: - registry.close() - except Exception as exc: - print(f"WARNING registry.close() failed: {exc!r}", file=sys.stderr) - - -if __name__ == "__main__": - main() - diff --git a/codex-lens/benchmarks/method_contribution_analysis.py b/codex-lens/benchmarks/method_contribution_analysis.py deleted file mode 100644 index e16abe6a..00000000 --- a/codex-lens/benchmarks/method_contribution_analysis.py +++ /dev/null @@ -1,527 +0,0 @@ -"""Analysis script for hybrid search method contribution and storage architecture. - -This script analyzes: -1. Individual method contribution in hybrid search (FTS/Vector) -2. Storage architecture conflicts between different retrieval methods -3. FTS + Rerank fusion experiment -""" - -import json -import sqlite3 -import time -from pathlib import Path -from typing import Dict, List, Tuple, Any -from collections import defaultdict - -# Add project root to path -import sys -sys.path.insert(0, str(Path(__file__).parent.parent / "src")) - -from codexlens.storage.registry import RegistryStore -from codexlens.storage.path_mapper import PathMapper -from codexlens.search.hybrid_search import HybridSearchEngine -from codexlens.search.ranking import ( - reciprocal_rank_fusion, - cross_encoder_rerank, - DEFAULT_WEIGHTS, -) -from codexlens.entities import SearchResult - - -def find_project_index(source_path: Path) -> Path: - """Find the index database for a project.""" - registry = RegistryStore() - registry.initialize() - - mapper = PathMapper() - index_path = mapper.source_to_index_db(source_path) - - if not index_path.exists(): - nearest = registry.find_nearest_index(source_path) - if nearest: - index_path = nearest.index_path - - registry.close() - return index_path - - -def analyze_storage_architecture(index_path: Path) -> Dict[str, Any]: - """Analyze storage tables and check for conflicts. - - Returns: - Dictionary with table analysis and conflict detection. - """ - results = { - "tables": {}, - "conflicts": [], - "recommendations": [] - } - - with sqlite3.connect(index_path) as conn: - # Get all tables - cursor = conn.execute( - "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name" - ) - tables = [row[0] for row in cursor.fetchall()] - - for table in tables: - # Get row count and columns - try: - count = conn.execute(f"SELECT COUNT(*) FROM {table}").fetchone()[0] - cols = conn.execute(f"PRAGMA table_info({table})").fetchall() - col_names = [c[1] for c in cols] - - results["tables"][table] = { - "row_count": count, - "columns": col_names - } - except Exception as e: - results["tables"][table] = {"error": str(e)} - - # Check for data overlap/conflicts - # 1. Check if chunks and semantic_chunks have different data - if "chunks" in tables and "semantic_chunks" in tables: - chunks_count = results["tables"]["chunks"]["row_count"] - semantic_count = results["tables"]["semantic_chunks"]["row_count"] - - if chunks_count > 0 and semantic_count > 0: - # Check for ID overlap - overlap = conn.execute(""" - SELECT COUNT(*) FROM chunks c - JOIN semantic_chunks sc ON c.id = sc.id - """).fetchone()[0] - - results["conflicts"].append({ - "type": "table_overlap", - "tables": ["chunks", "semantic_chunks"], - "chunks_count": chunks_count, - "semantic_count": semantic_count, - "id_overlap": overlap, - "description": ( - f"Both chunks ({chunks_count}) and semantic_chunks ({semantic_count}) " - f"have data. ID overlap: {overlap}. " - "This can cause confusion - binary_cascade reads from semantic_chunks " - "but SQLiteStore reads from chunks." - ) - }) - elif chunks_count == 0 and semantic_count > 0: - results["recommendations"].append( - "chunks table is empty but semantic_chunks has data. " - "Use cascade-index (semantic_chunks) for better semantic search." - ) - elif chunks_count > 0 and semantic_count == 0: - results["recommendations"].append( - "semantic_chunks is empty. Run 'codexlens cascade-index' to enable " - "binary cascade search." - ) - - # 2. Check FTS tables - fts_tables = [t for t in tables if t.startswith("files_fts")] - if len(fts_tables) >= 2: - results["recommendations"].append( - f"Found {len(fts_tables)} FTS tables: {fts_tables}. " - "Dual FTS (exact + fuzzy) is properly configured." - ) - - return results - - -def analyze_method_contributions( - index_path: Path, - queries: List[str], - limit: int = 20 -) -> Dict[str, Any]: - """Analyze contribution of each retrieval method. - - Runs each method independently and measures: - - Result count - - Latency - - Score distribution - - Overlap with other methods - """ - results = { - "per_query": [], - "summary": {} - } - - for query in queries: - query_result = { - "query": query, - "methods": {}, - "fusion_analysis": {} - } - - # Run each method independently - methods = { - "fts_exact": {"fuzzy": False, "vector": False}, - "fts_fuzzy": {"fuzzy": True, "vector": False}, - "vector": {"fuzzy": False, "vector": True}, - } - - method_results: Dict[str, List[SearchResult]] = {} - - for method_name, config in methods.items(): - try: - engine = HybridSearchEngine() - - # Set config to disable/enable specific backends - engine._config = type('obj', (object,), { - 'use_fts_fallback': method_name.startswith("fts"), - 'embedding_use_gpu': True, - })() - - start = time.perf_counter() - - if method_name == "fts_exact": - # Force FTS fallback mode with fuzzy disabled - engine.weights = DEFAULT_WEIGHTS.copy() - results_list = engine.search( - index_path, query, limit=limit, - enable_fuzzy=False, enable_vector=False, pure_vector=False - ) - elif method_name == "fts_fuzzy": - engine.weights = DEFAULT_WEIGHTS.copy() - results_list = engine.search( - index_path, query, limit=limit, - enable_fuzzy=True, enable_vector=False, pure_vector=False - ) - elif method_name == "vector": - results_list = engine.search( - index_path, query, limit=limit, - enable_fuzzy=False, enable_vector=True, pure_vector=True - ) - else: - results_list = [] - - latency = (time.perf_counter() - start) * 1000 - - method_results[method_name] = results_list - - scores = [r.score for r in results_list] - query_result["methods"][method_name] = { - "count": len(results_list), - "latency_ms": latency, - "avg_score": sum(scores) / len(scores) if scores else 0, - "max_score": max(scores) if scores else 0, - "min_score": min(scores) if scores else 0, - "top_3_files": [r.path.split("\\")[-1] for r in results_list[:3]] - } - - except Exception as e: - query_result["methods"][method_name] = { - "error": str(e), - "count": 0 - } - - # Compute overlap between methods - method_paths = { - name: set(r.path for r in results) - for name, results in method_results.items() - if results - } - - overlaps = {} - method_names = list(method_paths.keys()) - for i, m1 in enumerate(method_names): - for m2 in method_names[i+1:]: - overlap = len(method_paths[m1] & method_paths[m2]) - union = len(method_paths[m1] | method_paths[m2]) - jaccard = overlap / union if union > 0 else 0 - overlaps[f"{m1}_vs_{m2}"] = { - "overlap_count": overlap, - "jaccard": jaccard, - f"{m1}_unique": len(method_paths[m1] - method_paths[m2]), - f"{m2}_unique": len(method_paths[m2] - method_paths[m1]), - } - - query_result["overlaps"] = overlaps - - # Analyze RRF fusion contribution - if len(method_results) >= 2: - # Compute RRF with each method's contribution - rrf_map = {} - for name, results in method_results.items(): - if results and name in ["fts_exact", "vector"]: - # Rename for RRF - rrf_name = name.replace("fts_exact", "exact") - rrf_map[rrf_name] = results - - if rrf_map: - fused = reciprocal_rank_fusion(rrf_map, k=60) - - # Analyze which methods contributed to top results - source_contributions = defaultdict(int) - for r in fused[:10]: - source_ranks = r.metadata.get("source_ranks", {}) - for source in source_ranks: - source_contributions[source] += 1 - - query_result["fusion_analysis"] = { - "total_fused": len(fused), - "top_10_source_distribution": dict(source_contributions) - } - - results["per_query"].append(query_result) - - # Compute summary statistics - method_stats = defaultdict(lambda: {"counts": [], "latencies": []}) - for qr in results["per_query"]: - for method, data in qr["methods"].items(): - if "count" in data: - method_stats[method]["counts"].append(data["count"]) - if "latency_ms" in data: - method_stats[method]["latencies"].append(data["latency_ms"]) - - results["summary"] = { - method: { - "avg_count": sum(s["counts"]) / len(s["counts"]) if s["counts"] else 0, - "avg_latency_ms": sum(s["latencies"]) / len(s["latencies"]) if s["latencies"] else 0, - } - for method, s in method_stats.items() - } - - return results - - -def experiment_fts_rerank_fusion( - index_path: Path, - queries: List[str], - limit: int = 10, - coarse_k: int = 50 -) -> Dict[str, Any]: - """Experiment: FTS + Rerank fusion vs standard hybrid. - - Compares: - 1. Standard Hybrid (FTS + Vector RRF) - 2. FTS + CrossEncoder Rerank -> then fuse with Vector - """ - results = { - "per_query": [], - "summary": {} - } - - # Initialize reranker - try: - from codexlens.semantic.reranker import get_reranker, check_reranker_available - ok, _ = check_reranker_available("onnx") - if ok: - reranker = get_reranker(backend="onnx", use_gpu=True) - else: - reranker = None - except Exception as e: - print(f"Reranker unavailable: {e}") - reranker = None - - for query in queries: - query_result = { - "query": query, - "strategies": {} - } - - # Strategy 1: Standard Hybrid (FTS + Vector) - try: - engine = HybridSearchEngine(weights=DEFAULT_WEIGHTS) - engine._config = type('obj', (object,), { - 'use_fts_fallback': False, - 'embedding_use_gpu': True, - })() - - start = time.perf_counter() - standard_results = engine.search( - index_path, query, limit=limit, - enable_vector=True - ) - standard_latency = (time.perf_counter() - start) * 1000 - - query_result["strategies"]["standard_hybrid"] = { - "count": len(standard_results), - "latency_ms": standard_latency, - "top_5": [r.path.split("\\")[-1] for r in standard_results[:5]], - "scores": [r.score for r in standard_results[:5]] - } - except Exception as e: - query_result["strategies"]["standard_hybrid"] = {"error": str(e)} - - # Strategy 2: FTS + Rerank -> Fuse with Vector - try: - # Step 1: Get FTS results (coarse) - fts_engine = HybridSearchEngine(weights=DEFAULT_WEIGHTS) - fts_engine._config = type('obj', (object,), { - 'use_fts_fallback': True, - 'embedding_use_gpu': True, - })() - - start = time.perf_counter() - fts_results = fts_engine.search( - index_path, query, limit=coarse_k, - enable_fuzzy=True, enable_vector=False - ) - fts_latency = (time.perf_counter() - start) * 1000 - - # Step 2: Rerank FTS results with CrossEncoder - if reranker and fts_results: - rerank_start = time.perf_counter() - reranked_fts = cross_encoder_rerank( - query, fts_results, reranker, top_k=20 - ) - rerank_latency = (time.perf_counter() - rerank_start) * 1000 - else: - reranked_fts = fts_results[:20] - rerank_latency = 0 - - # Step 3: Get Vector results - vector_engine = HybridSearchEngine() - vector_results = vector_engine.search( - index_path, query, limit=20, - enable_vector=True, pure_vector=True - ) - - # Step 4: Fuse reranked FTS with Vector - if reranked_fts and vector_results: - fusion_map = { - "fts_reranked": reranked_fts, - "vector": vector_results - } - fused_results = reciprocal_rank_fusion( - fusion_map, - weights={"fts_reranked": 0.5, "vector": 0.5}, - k=60 - ) - else: - fused_results = reranked_fts or vector_results or [] - - total_latency = fts_latency + rerank_latency + (time.perf_counter() - start) * 1000 - - query_result["strategies"]["fts_rerank_fusion"] = { - "count": len(fused_results), - "total_latency_ms": fts_latency + rerank_latency, - "fts_latency_ms": fts_latency, - "rerank_latency_ms": rerank_latency, - "top_5": [r.path.split("\\")[-1] for r in fused_results[:5]], - "scores": [r.score for r in fused_results[:5]] - } - except Exception as e: - query_result["strategies"]["fts_rerank_fusion"] = {"error": str(e)} - - # Compute overlap between strategies - if ( - "error" not in query_result["strategies"].get("standard_hybrid", {}) - and "error" not in query_result["strategies"].get("fts_rerank_fusion", {}) - ): - standard_paths = set(r.path.split("\\")[-1] for r in standard_results[:10]) - fts_rerank_paths = set(r.path.split("\\")[-1] for r in fused_results[:10]) - - overlap = len(standard_paths & fts_rerank_paths) - query_result["comparison"] = { - "top_10_overlap": overlap, - "standard_unique": list(standard_paths - fts_rerank_paths)[:3], - "fts_rerank_unique": list(fts_rerank_paths - standard_paths)[:3] - } - - results["per_query"].append(query_result) - - return results - - -def main(): - """Run all analyses.""" - source_path = Path("D:/Claude_dms3/codex-lens/src") - index_path = find_project_index(source_path) - - print(f"Using index: {index_path}") - print(f"Index exists: {index_path.exists()}") - print() - - # Test queries - queries = [ - "binary quantization", - "hamming distance search", - "embeddings generation", - "reranking algorithm", - "database connection handling", - ] - - # 1. Storage Architecture Analysis - print("=" * 60) - print("1. STORAGE ARCHITECTURE ANALYSIS") - print("=" * 60) - - storage_analysis = analyze_storage_architecture(index_path) - - print("\nTable Overview:") - for table, info in sorted(storage_analysis["tables"].items()): - if "row_count" in info: - print(f" {table}: {info['row_count']} rows") - - print("\nConflicts Detected:") - for conflict in storage_analysis["conflicts"]: - print(f" - {conflict['description']}") - - print("\nRecommendations:") - for rec in storage_analysis["recommendations"]: - print(f" - {rec}") - - # 2. Method Contribution Analysis - print("\n" + "=" * 60) - print("2. METHOD CONTRIBUTION ANALYSIS") - print("=" * 60) - - contribution_analysis = analyze_method_contributions(index_path, queries) - - print("\nPer-Query Results:") - for qr in contribution_analysis["per_query"]: - print(f"\n Query: '{qr['query']}'") - for method, data in qr["methods"].items(): - if "error" not in data: - print(f" {method}: {data['count']} results, {data['latency_ms']:.1f}ms") - if data.get("top_3_files"): - print(f" Top 3: {', '.join(data['top_3_files'])}") - - if qr.get("overlaps"): - print(" Overlaps:") - for pair, info in qr["overlaps"].items(): - print(f" {pair}: {info['overlap_count']} common (Jaccard: {info['jaccard']:.2f})") - - print("\nSummary:") - for method, stats in contribution_analysis["summary"].items(): - print(f" {method}: avg {stats['avg_count']:.1f} results, {stats['avg_latency_ms']:.1f}ms") - - # 3. FTS + Rerank Fusion Experiment - print("\n" + "=" * 60) - print("3. FTS + RERANK FUSION EXPERIMENT") - print("=" * 60) - - fusion_experiment = experiment_fts_rerank_fusion(index_path, queries) - - print("\nPer-Query Comparison:") - for qr in fusion_experiment["per_query"]: - print(f"\n Query: '{qr['query']}'") - for strategy, data in qr["strategies"].items(): - if "error" not in data: - latency = data.get("total_latency_ms") or data.get("latency_ms", 0) - print(f" {strategy}: {data['count']} results, {latency:.1f}ms") - if data.get("top_5"): - print(f" Top 5: {', '.join(data['top_5'][:3])}...") - - if qr.get("comparison"): - comp = qr["comparison"] - print(f" Top-10 Overlap: {comp['top_10_overlap']}/10") - - # Save full results - output_path = Path(__file__).parent / "results" / "method_contribution_analysis.json" - output_path.parent.mkdir(exist_ok=True) - - full_results = { - "storage_analysis": storage_analysis, - "contribution_analysis": contribution_analysis, - "fusion_experiment": fusion_experiment - } - - with open(output_path, "w", encoding="utf-8") as f: - json.dump(full_results, f, indent=2, default=str) - - print(f"\n\nFull results saved to: {output_path}") - - -if __name__ == "__main__": - main() diff --git a/codex-lens/benchmarks/results/accuracy_2026-02-11_codexlens.json b/codex-lens/benchmarks/results/accuracy_2026-02-11_codexlens.json deleted file mode 100644 index 3d2fa958..00000000 --- a/codex-lens/benchmarks/results/accuracy_2026-02-11_codexlens.json +++ /dev/null @@ -1,1308 +0,0 @@ -{ - "summary": { - "timestamp": "2026-02-11 16:33:19", - "source": "D:\\Claude_dms3\\codex-lens\\src", - "queries_file": "codex-lens\\benchmarks\\accuracy_queries_codexlens.jsonl", - "query_count": 33, - "k": 10, - "coarse_k": 100, - "staged": { - "hit_at_k": 0.7575757575757576, - "mrr_at_k": 0.5833333333333334, - "avg_recall_at_k": 0.7424242424242424, - "avg_latency_ms": 4635.525263649045, - "errors": 0 - }, - "dense_rerank": { - "hit_at_k": 0.21212121212121213, - "mrr_at_k": 0.06227753727753728, - "avg_recall_at_k": 0.21212121212121213, - "avg_latency_ms": 2597.3116121219864, - "errors": 0 - }, - "config": { - "staged_stage2_mode": "realtime", - "staged_clustering_strategy": "path", - "enable_staged_rerank": true, - "reranker_backend": "api", - "reranker_model": "Qwen/Qwen3-Reranker-8B", - "embedding_backend": "litellm", - "embedding_model": "qwen3-embedding-sf" - } - }, - "evaluations": [ - { - "query": "class StandaloneLspManager", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 25897.209399938583, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\association_tree\\builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" - ], - "first_hit_rank": 2, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 3059.8712000250816, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def _open_document", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 8377.355100035667, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2697.353200018406, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\output.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\splade_index.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def _read_message", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 4349.869300067425, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2418.672600030899, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\splade_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "how does textDocument/didOpen work", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 321.56859999895096, - "topk_paths": [], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2452.267899990082, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "class LspBridge", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 4634.055300056934, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\keepalive_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py" - ], - "first_hit_rank": 3, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2672.7246000170708, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\splade_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def get_document_symbols", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 4454.471000015736, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2581.881399989128, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "class KeepAliveLspBridge", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\keepalive_bridge.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 346.4588000178337, - "topk_paths": [], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2581.1541000008583, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\splade_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "LSP keepalive bridge", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\keepalive_bridge.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 319.3557000756264, - "topk_paths": [], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2587.464199960232, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "class LspGraphBuilder", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 5038.322200000286, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py" - ], - "first_hit_rank": 2, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 3435.6180000305176, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\splade_encoder.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def build_from_seeds", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 4622.321400046349, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "first_hit_rank": 2, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2633.6710000038147, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def _stage2_realtime_lsp_expand", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 333.375500023365, - "topk_paths": [], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2626.7274000048637, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\rotational_embedder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\merkle_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py" - ], - "first_hit_rank": 3, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - } - }, - { - "query": "def _stage3_cluster_prune", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 4407.406300008297, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2711.8762999773026, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py" - ], - "first_hit_rank": 7, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - } - }, - { - "query": "def _cross_encoder_rerank", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 4716.0983999967575, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "first_hit_rank": 3, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2612.2980999946594, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py" - ], - "first_hit_rank": 9, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - } - }, - { - "query": "def dense_rerank_cascade_search", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 4556.352999985218, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2435.9282999634743, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def cascade_search", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 4779.43700003624, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2462.476100027561, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def _find_nearest_binary_mmap_root", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 384.8026000261307, - "topk_paths": [], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2374.5640999674797, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\litellm_embedder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py" - ], - "first_hit_rank": 5, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - } - }, - { - "query": "class BinarySearcher", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\binary_searcher.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 4964.564999938011, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\binary_searcher.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2740.684500038624, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "class GraphExpander", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 4982.367900013924, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2916.1848999857903, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\splade_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def cross_encoder_rerank", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 4503.571500003338, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2504.1979999542236, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def group_similar_results", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 4660.934600055218, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py" - ], - "first_hit_rank": 2, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2379.2526000142097, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py" - ], - "first_hit_rank": 7, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - } - }, - { - "query": "class ConfigError", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\errors.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 4616.049799978733, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\errors.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\__init__.py" - ], - "first_hit_rank": 3, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2418.3816999793053, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def load_settings", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 4625.254700064659, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2702.3474999070168, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "BINARY_VECTORS_MMAP_NAME", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 4285.477600038052, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py" - ], - "first_hit_rank": 2, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2406.369700014591, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "STAGED_CLUSTERING_STRATEGY", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\env_config.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 4307.972999989986, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py" - ], - "first_hit_rank": 2, - "hit_at_k": true, - "recall_at_k": 0.5, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2634.202399969101, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\merkle_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def apply_workspace_env", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\env_config.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 4182.440200030804, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2486.3993000388145, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\litellm_embedder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def generate_env_example", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\env_config.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 4413.619400024414, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\env_config.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2556.517999947071, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def get_reranker", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\factory.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 4670.021300017834, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\model_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2631.054200053215, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\merkle_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "class APIReranker", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\api_reranker.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 4536.27840000391, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\api_reranker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2502.5143000483513, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "class RegistryStore", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 6543.249599993229, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\mcp\\provider.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\providers.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\hover.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py" - ], - "first_hit_rank": 4, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2509.7423000335693, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\splade_index.py" - ], - "first_hit_rank": 8, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - } - }, - { - "query": "class PathMapper", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 4577.398099958897, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2493.4598000645638, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def lsp_status", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 352.2480999827385, - "topk_paths": [], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2516.1266999840736, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - } - }, - { - "query": "graph_neighbors migration", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_007_add_graph_neighbors.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 4542.888000011444, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_007_add_graph_neighbors.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2324.4544000029564, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def get_model_config", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 4669.536899983883, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2644.8443999886513, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\rotational_embedder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\output.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\merkle_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - } - ] -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/accuracy_2026-02-11_codexlens_precomputed.json b/codex-lens/benchmarks/results/accuracy_2026-02-11_codexlens_precomputed.json deleted file mode 100644 index 3c6d9472..00000000 --- a/codex-lens/benchmarks/results/accuracy_2026-02-11_codexlens_precomputed.json +++ /dev/null @@ -1,1335 +0,0 @@ -{ - "summary": { - "timestamp": "2026-02-11 17:39:54", - "source": "D:\\Claude_dms3\\codex-lens\\src", - "queries_file": "codex-lens\\benchmarks\\accuracy_queries_codexlens.jsonl", - "query_count": 33, - "k": 10, - "coarse_k": 100, - "staged": { - "hit_at_k": 0.7575757575757576, - "mrr_at_k": 0.5883838383838383, - "avg_recall_at_k": 0.7424242424242424, - "avg_latency_ms": 2331.3277969649344, - "errors": 0 - }, - "dense_rerank": { - "hit_at_k": 0.21212121212121213, - "mrr_at_k": 0.06227753727753728, - "avg_recall_at_k": 0.21212121212121213, - "avg_latency_ms": 2182.33056061015, - "errors": 0 - }, - "config": { - "staged_stage2_mode": "precomputed", - "staged_clustering_strategy": "path", - "enable_staged_rerank": true, - "reranker_backend": "api", - "reranker_model": "Qwen/Qwen3-Reranker-8B", - "embedding_backend": "litellm", - "embedding_model": "qwen3-embedding-sf" - } - }, - "evaluations": [ - { - "query": "class StandaloneLspManager", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 19341.994099974632, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\association_tree\\builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py" - ], - "first_hit_rank": 2, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2999.929000020027, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def _open_document", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 2847.462099969387, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2615.54029995203, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\output.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\splade_index.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def _read_message", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 2385.6554000377655, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2581.8080000281334, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\splade_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "how does textDocument/didOpen work", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 361.7903000116348, - "topk_paths": [], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2404.24530005455, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "class LspBridge", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 2457.195499956608, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py" - ], - "first_hit_rank": 3, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2546.2164999842644, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\splade_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def get_document_symbols", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 2532.4168999791145, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2429.6208000183105, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "class KeepAliveLspBridge", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\keepalive_bridge.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 398.90080004930496, - "topk_paths": [], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2454.2164999842644, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\splade_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "LSP keepalive bridge", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\keepalive_bridge.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 330.90090000629425, - "topk_paths": [], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2490.4245000481606, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "class LspGraphBuilder", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 2399.8781000375748, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py" - ], - "first_hit_rank": 2, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2402.9406000375748, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\splade_encoder.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def build_from_seeds", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 3348.9842999577522, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py" - ], - "first_hit_rank": 2, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2525.5670999884605, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def _stage2_realtime_lsp_expand", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 329.77999997138977, - "topk_paths": [], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2464.8422999978065, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\rotational_embedder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\merkle_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py" - ], - "first_hit_rank": 3, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - } - }, - { - "query": "def _stage3_cluster_prune", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 2466.0647000670433, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\binary_searcher.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2633.537499964237, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py" - ], - "first_hit_rank": 7, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - } - }, - { - "query": "def _cross_encoder_rerank", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 2565.2637000083923, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\binary_searcher.py" - ], - "first_hit_rank": 3, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2483.7863000035286, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py" - ], - "first_hit_rank": 9, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - } - }, - { - "query": "def dense_rerank_cascade_search", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 1953.4079999923706, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\binary_searcher.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 1758.5974999666214, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def cascade_search", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 2054.1276000142097, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\utils.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\models.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\symbols.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 1729.1329000592232, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def _find_nearest_binary_mmap_root", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 209.5627999305725, - "topk_paths": [], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 1902.3523000478745, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\litellm_embedder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py" - ], - "first_hit_rank": 5, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - } - }, - { - "query": "class BinarySearcher", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\binary_searcher.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 2291.7905999422073, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\binary_searcher.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 1719.2722999453545, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "class GraphExpander", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 1672.2199999690056, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 1739.1129999756813, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\splade_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def cross_encoder_rerank", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 1548.37600004673, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 1962.3666999936104, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def group_similar_results", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 1733.5452999472618, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\binary_searcher.py" - ], - "first_hit_rank": 2, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 1727.5000000596046, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py" - ], - "first_hit_rank": 7, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - } - }, - { - "query": "class ConfigError", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\errors.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 1806.7660999894142, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\errors.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py" - ], - "first_hit_rank": 2, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 1788.8945000171661, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def load_settings", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 2616.400499999523, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\output.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 1680.113300025463, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "BINARY_VECTORS_MMAP_NAME", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 1801.7208999991417, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\output.py" - ], - "first_hit_rank": 2, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 1675.2271999716759, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "STAGED_CLUSTERING_STRATEGY", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\env_config.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 1470.9057000279427, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py" - ], - "first_hit_rank": 2, - "hit_at_k": true, - "recall_at_k": 0.5, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 1803.0420999526978, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\merkle_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def apply_workspace_env", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\env_config.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 1371.6070999503136, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 1817.1355000138283, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\litellm_embedder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def generate_env_example", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\env_config.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 1465.9499000310898, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\env_config.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 1701.9165999889374, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def get_reranker", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\factory.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 1902.2649999856949, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\model_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\litellm_reranker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\fastembed_reranker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\legacy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\api_reranker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\binary_searcher.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 1746.6025000214577, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\merkle_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "class APIReranker", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\api_reranker.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 2245.715800046921, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\api_reranker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\base.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\legacy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\fastembed_reranker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\litellm_reranker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\onnx_reranker.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 1659.7105000019073, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "class RegistryStore", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 1702.458899974823, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\mcp\\provider.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\providers.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\hover.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\incremental_indexer.py" - ], - "first_hit_rank": 4, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 3514.6511999964714, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\splade_index.py" - ], - "first_hit_rank": 8, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - } - }, - { - "query": "class PathMapper", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 1793.6620999574661, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\incremental_indexer.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 1708.0654000639915, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def lsp_status", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 180.50510001182556, - "topk_paths": [], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 1799.0735999941826, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - } - }, - { - "query": "graph_neighbors migration", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_007_add_graph_neighbors.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 2524.9900000095367, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_007_add_graph_neighbors.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2674.021600008011, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - }, - { - "query": "def get_model_config", - "relevant_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py" - ], - "staged": { - "strategy": "staged", - "latency_ms": 2821.553099989891, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\output.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\binary_searcher.py" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "latency_ms": 2877.4450999498367, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\rotational_embedder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\output.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\merkle_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - } - ] -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/cascade_benchmark.json b/codex-lens/benchmarks/results/cascade_benchmark.json deleted file mode 100644 index e8178395..00000000 --- a/codex-lens/benchmarks/results/cascade_benchmark.json +++ /dev/null @@ -1,277 +0,0 @@ -{ - "timestamp": "2026-01-02 11:48:33", - "summaries": { - "binary": { - "strategy": "binary", - "total_queries": 15, - "successful_queries": 15, - "avg_latency_ms": 1133.4008666667312, - "min_latency_ms": 959.5361000028788, - "max_latency_ms": 1330.8978999993997, - "p50_latency_ms": 1125.8439999946859, - "p95_latency_ms": 1330.0081999987015, - "p99_latency_ms": 1330.71995999926, - "avg_results": 10, - "errors": [] - }, - "hybrid": { - "strategy": "hybrid", - "total_queries": 15, - "successful_queries": 15, - "avg_latency_ms": 1111.1401133336283, - "min_latency_ms": 857.0021999985329, - "max_latency_ms": 1278.8890000010724, - "p50_latency_ms": 1130.696000000171, - "p95_latency_ms": 1254.2417899981956, - "p99_latency_ms": 1273.959558000497, - "avg_results": 10, - "errors": [] - } - }, - "details": { - "binary": [ - { - "strategy": "binary", - "query": "def search", - "latency_ms": 1044.525999997859, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py:0", - "error": null - }, - { - "strategy": "binary", - "query": "class Engine", - "latency_ms": 1052.5979999947594, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py:0", - "error": null - }, - { - "strategy": "binary", - "query": "import numpy", - "latency_ms": 1217.217100005655, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\__main__.py:0", - "error": null - }, - { - "strategy": "binary", - "query": "async def", - "latency_ms": 1276.9802000038908, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\api_reranker.py:0", - "error": null - }, - { - "strategy": "binary", - "query": "raise ValueError", - "latency_ms": 1005.9053000004496, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py:0", - "error": null - }, - { - "strategy": "binary", - "query": "how to parse json", - "latency_ms": 1330.8978999993997, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py:0", - "error": null - }, - { - "strategy": "binary", - "query": "database connection", - "latency_ms": 1041.6685000018333, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\indexing\\symbol_extractor.py:0", - "error": null - }, - { - "strategy": "binary", - "query": "error handling", - "latency_ms": 959.5361000028788, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_004_dual_fts.py:0", - "error": null - }, - { - "strategy": "binary", - "query": "authentication logic", - "latency_ms": 1060.9395999999833, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py:0", - "error": null - }, - { - "strategy": "binary", - "query": "file read write", - "latency_ms": 971.8680000005406, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\watcher\\incremental_indexer.py:0", - "error": null - }, - { - "strategy": "binary", - "query": "embedding vector", - "latency_ms": 1135.879900000873, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\semantic\\embedder.py:0", - "error": null - }, - { - "strategy": "binary", - "query": "cosine similarity", - "latency_ms": 1188.1732000038028, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py:0", - "error": null - }, - { - "strategy": "binary", - "query": "binary quantization", - "latency_ms": 1259.3522999959532, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py:0", - "error": null - }, - { - "strategy": "binary", - "query": "hamming distance", - "latency_ms": 1329.6268999984022, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py:0", - "error": null - }, - { - "strategy": "binary", - "query": "reranking", - "latency_ms": 1125.8439999946859, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py:0", - "error": null - } - ], - "hybrid": [ - { - "strategy": "hybrid", - "query": "def search", - "latency_ms": 1117.0937999995658, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py:0", - "error": null - }, - { - "strategy": "hybrid", - "query": "class Engine", - "latency_ms": 1039.3984000038472, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py:0", - "error": null - }, - { - "strategy": "hybrid", - "query": "import numpy", - "latency_ms": 1144.7916999968584, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\__main__.py:0", - "error": null - }, - { - "strategy": "hybrid", - "query": "async def", - "latency_ms": 857.0021999985329, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\api_reranker.py:0", - "error": null - }, - { - "strategy": "hybrid", - "query": "raise ValueError", - "latency_ms": 957.5578000003588, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py:0", - "error": null - }, - { - "strategy": "hybrid", - "query": "how to parse json", - "latency_ms": 1216.5708000029554, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py:0", - "error": null - }, - { - "strategy": "hybrid", - "query": "database connection", - "latency_ms": 1154.8929000055068, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\indexing\\symbol_extractor.py:0", - "error": null - }, - { - "strategy": "hybrid", - "query": "error handling", - "latency_ms": 1130.696000000171, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_004_dual_fts.py:0", - "error": null - }, - { - "strategy": "hybrid", - "query": "authentication logic", - "latency_ms": 1112.8943000003346, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py:0", - "error": null - }, - { - "strategy": "hybrid", - "query": "file read write", - "latency_ms": 1172.5986000019475, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\watcher\\incremental_indexer.py:0", - "error": null - }, - { - "strategy": "hybrid", - "query": "embedding vector", - "latency_ms": 1278.8890000010724, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\semantic\\embedder.py:0", - "error": null - }, - { - "strategy": "hybrid", - "query": "cosine similarity", - "latency_ms": 1024.2393000007723, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py:0", - "error": null - }, - { - "strategy": "hybrid", - "query": "binary quantization", - "latency_ms": 1243.6786999969627, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py:0", - "error": null - }, - { - "strategy": "hybrid", - "query": "hamming distance", - "latency_ms": 1081.3100999948801, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py:0", - "error": null - }, - { - "strategy": "hybrid", - "query": "reranking", - "latency_ms": 1135.4881000006571, - "num_results": 10, - "top_result": "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py:0", - "error": null - } - ] - } -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/ccw_smart_search_stage2.json b/codex-lens/benchmarks/results/ccw_smart_search_stage2.json deleted file mode 100644 index 418bac3e..00000000 --- a/codex-lens/benchmarks/results/ccw_smart_search_stage2.json +++ /dev/null @@ -1,1704 +0,0 @@ -{ - "timestamp": "2026-03-12 15:52:13", - "source": "D:\\Claude_dms3", - "queries_file": "D:\\Claude_dms3\\codex-lens\\benchmarks\\accuracy_queries_ccw_smart_search.jsonl", - "query_count": 16, - "k": 10, - "coarse_k": 100, - "local_only": true, - "strategies": { - "dense_rerank": { - "query_count": 16, - "hit_at_k": 0.0, - "mrr_at_k": 0.0, - "avg_recall_at_k": 0.0, - "avg_latency_ms": 2493.8517937501892, - "p50_latency_ms": 2304.0422499999404, - "p95_latency_ms": 4031.03429999575, - "errors": 0, - "strategy": "dense_rerank", - "stage2_mode": null - }, - "staged:precomputed": { - "query_count": 16, - "hit_at_k": 0.0, - "mrr_at_k": 0.0, - "avg_recall_at_k": 0.0, - "avg_latency_ms": 2238.0576249985024, - "p50_latency_ms": 1962.1620500013232, - "p95_latency_ms": 3110.8512249961495, - "errors": 0, - "strategy": "staged", - "stage2_mode": "precomputed" - }, - "staged:realtime": { - "query_count": 16, - "hit_at_k": 0.0, - "mrr_at_k": 0.0, - "avg_recall_at_k": 0.0, - "avg_latency_ms": 10686.986462499015, - "p50_latency_ms": 7027.59129999578, - "p95_latency_ms": 28732.387600000948, - "errors": 0, - "strategy": "staged", - "stage2_mode": "realtime" - }, - "staged:static_global_graph": { - "query_count": 16, - "hit_at_k": 0.0, - "mrr_at_k": 0.0, - "avg_recall_at_k": 0.0, - "avg_latency_ms": 2284.2186249988154, - "p50_latency_ms": 2174.274800002575, - "p95_latency_ms": 3254.683274999261, - "errors": 0, - "strategy": "staged", - "stage2_mode": "static_global_graph" - } - }, - "stage2_mode_matrix": { - "precomputed": { - "query_count": 16, - "hit_at_k": 0.0, - "mrr_at_k": 0.0, - "avg_recall_at_k": 0.0, - "avg_latency_ms": 2238.0576249985024, - "p50_latency_ms": 1962.1620500013232, - "p95_latency_ms": 3110.8512249961495, - "errors": 0, - "strategy": "staged", - "stage2_mode": "precomputed" - }, - "realtime": { - "query_count": 16, - "hit_at_k": 0.0, - "mrr_at_k": 0.0, - "avg_recall_at_k": 0.0, - "avg_latency_ms": 10686.986462499015, - "p50_latency_ms": 7027.59129999578, - "p95_latency_ms": 28732.387600000948, - "errors": 0, - "strategy": "staged", - "stage2_mode": "realtime" - }, - "static_global_graph": { - "query_count": 16, - "hit_at_k": 0.0, - "mrr_at_k": 0.0, - "avg_recall_at_k": 0.0, - "avg_latency_ms": 2284.2186249988154, - "p50_latency_ms": 2174.274800002575, - "p95_latency_ms": 3254.683274999261, - "errors": 0, - "strategy": "staged", - "stage2_mode": "static_global_graph" - } - }, - "pairwise_stage2_deltas": [ - { - "mode_a": "precomputed", - "mode_b": "realtime", - "hit_at_k_delta": 0.0, - "mrr_at_k_delta": 0.0, - "avg_recall_at_k_delta": 0.0, - "avg_latency_ms_delta": -8448.928837500513 - }, - { - "mode_a": "precomputed", - "mode_b": "static_global_graph", - "hit_at_k_delta": 0.0, - "mrr_at_k_delta": 0.0, - "avg_recall_at_k_delta": 0.0, - "avg_latency_ms_delta": -46.161000000312924 - }, - { - "mode_a": "realtime", - "mode_b": "static_global_graph", - "hit_at_k_delta": 0.0, - "mrr_at_k_delta": 0.0, - "avg_recall_at_k_delta": 0.0, - "avg_latency_ms_delta": 8402.7678375002 - } - ], - "config": { - "embedding_backend": "fastembed", - "embedding_model": "code", - "embedding_use_gpu": false, - "reranker_backend": "onnx", - "reranker_model": "D:/Claude_dms3/codex-lens/.cache/huggingface/models/Xenova--ms-marco-MiniLM-L-6-v2", - "enable_staged_rerank": true, - "enable_cross_encoder_rerank": true - }, - "evaluations": [ - { - "query": "executeHybridMode dense_rerank semantic smart_search", - "intent": "ccw-semantic-routing", - "notes": "CCW semantic mode delegates to CodexLens dense_rerank.", - "relevant_paths": [ - "D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts" - ], - "runs": { - "dense_rerank": { - "strategy_key": "dense_rerank", - "strategy": "dense_rerank", - "stage2_mode": null, - "latency_ms": 5607.933899998665, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\path-validator.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\list.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\secret-redactor.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\health-check-service.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\view.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\upgrade.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\api-key-tester.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:precomputed": { - "strategy_key": "staged:precomputed", - "strategy": "staged", - "stage2_mode": "precomputed", - "latency_ms": 1853.0870999991894, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\upgrade.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\api-key-tester.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\shell-escape.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\rate-limiter.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\commands-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\core-memory-store.js", - "d:\\claude_dms3\\ccw\\dist\\core\\core-memory-store.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:realtime": { - "strategy_key": "staged:realtime", - "strategy": "staged", - "stage2_mode": "realtime", - "latency_ms": 10468.899399995804, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\upgrade.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\api-key-tester.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\shell-escape.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\rate-limiter.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\commands-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\core-memory-store.js", - "d:\\claude_dms3\\ccw\\dist\\core\\core-memory-store.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:static_global_graph": { - "strategy_key": "staged:static_global_graph", - "strategy": "staged", - "stage2_mode": "static_global_graph", - "latency_ms": 1445.837599992752, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\upgrade.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\api-key-tester.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\shell-escape.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\rate-limiter.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\commands-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\core-memory-store.js", - "d:\\claude_dms3\\ccw\\dist\\core\\core-memory-store.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - } - }, - { - "query": "parse CodexLens JSON output strip ANSI smart_search", - "intent": "ccw-json-fallback", - "notes": "Covers JSON/plain-text fallback handling for CodexLens output.", - "relevant_paths": [ - "D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts" - ], - "runs": { - "dense_rerank": { - "strategy_key": "dense_rerank", - "strategy": "dense_rerank", - "stage2_mode": null, - "latency_ms": 1518.7583000063896, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\dist\\utils\\secret-redactor.js", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\rules-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-context-builder.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\secret-redactor.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\cli.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\outline-queries.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\mcp-templates-db.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:precomputed": { - "strategy_key": "staged:precomputed", - "strategy": "staged", - "stage2_mode": "precomputed", - "latency_ms": 1467.957000002265, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\outline-queries.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\loop.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\flow-executor.js", - "d:\\claude_dms3\\ccw\\dist\\core\\memory-store.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\remote-notification-service.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\install.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\secret-redactor.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\rules-routes.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:realtime": { - "strategy_key": "staged:realtime", - "strategy": "staged", - "stage2_mode": "realtime", - "latency_ms": 35793.74619999528, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\outline-queries.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\loop.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\flow-executor.js", - "d:\\claude_dms3\\ccw\\dist\\core\\memory-store.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\remote-notification-service.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\install.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\secret-redactor.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\rules-routes.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:static_global_graph": { - "strategy_key": "staged:static_global_graph", - "strategy": "staged", - "stage2_mode": "static_global_graph", - "latency_ms": 2019.9724999964237, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\outline-queries.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\loop.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\flow-executor.js", - "d:\\claude_dms3\\ccw\\dist\\core\\memory-store.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\remote-notification-service.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\install.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\secret-redactor.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\rules-routes.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - } - }, - { - "query": "smart_search init embed search action schema", - "intent": "ccw-action-schema", - "notes": "Find the Zod schema that defines init/embed/search actions.", - "relevant_paths": [ - "D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts" - ], - "runs": { - "dense_rerank": { - "strategy_key": "dense_rerank", - "strategy": "dense_rerank", - "stage2_mode": null, - "latency_ms": 2091.47919999063, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\outline-parser.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\shell-escape.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\rate-limiter.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-memory-service.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\secret-redactor.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\file-reader.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\rules-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\secret-redactor.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:precomputed": { - "strategy_key": "staged:precomputed", - "strategy": "staged", - "stage2_mode": "precomputed", - "latency_ms": 2017.3953999876976, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\dist\\utils\\secret-redactor.d.ts", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\upgrade.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\team-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\serve.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\api-key-tester.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-memory-service.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\outline-parser.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:realtime": { - "strategy_key": "staged:realtime", - "strategy": "staged", - "stage2_mode": "realtime", - "latency_ms": 2941.078400015831, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\dist\\utils\\secret-redactor.d.ts", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\upgrade.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\team-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\serve.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\api-key-tester.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-memory-service.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\outline-parser.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:static_global_graph": { - "strategy_key": "staged:static_global_graph", - "strategy": "staged", - "stage2_mode": "static_global_graph", - "latency_ms": 1921.6328999996185, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\dist\\utils\\secret-redactor.d.ts", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\upgrade.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\team-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\serve.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\api-key-tester.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-memory-service.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\outline-parser.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - } - }, - { - "query": "auto init missing job dedupe smart_search", - "intent": "ccw-auto-init", - "notes": "Targets background init/embed warmup and dedupe state.", - "relevant_paths": [ - "D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts" - ], - "runs": { - "dense_rerank": { - "strategy_key": "dense_rerank", - "strategy": "dense_rerank", - "stage2_mode": null, - "latency_ms": 1662.2750000059605, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\docs-frontend.js", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-memory-service.js", - "d:\\claude_dms3\\ccw\\dist\\core\\cache-manager.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\dashboard-launcher.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\rules-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\upgrade.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\react-frontend.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:precomputed": { - "strategy_key": "staged:precomputed", - "strategy": "staged", - "stage2_mode": "precomputed", - "latency_ms": 1746.6091000139713, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\rules-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\react-frontend.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\team-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\server.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\flow-executor.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\cli-session-mux.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\cli.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\loop.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:realtime": { - "strategy_key": "staged:realtime", - "strategy": "staged", - "stage2_mode": "realtime", - "latency_ms": 6291.47570002079, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\rules-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\react-frontend.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\team-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\server.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\flow-executor.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\cli-session-mux.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\cli.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\loop.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:static_global_graph": { - "strategy_key": "staged:static_global_graph", - "strategy": "staged", - "stage2_mode": "static_global_graph", - "latency_ms": 1718.0125000029802, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\rules-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\react-frontend.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\team-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\server.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\flow-executor.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\cli-session-mux.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\cli.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\loop.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - } - }, - { - "query": "smart_search exact mode fallback to CodexLens fts", - "intent": "ccw-exact-fallback", - "notes": "Tracks the exact-mode fallback path into CodexLens FTS.", - "relevant_paths": [ - "D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts" - ], - "runs": { - "dense_rerank": { - "strategy_key": "dense_rerank", - "strategy": "dense_rerank", - "stage2_mode": null, - "latency_ms": 1511.011400014162, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.js", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\upgrade.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\files-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\codexlens-path.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\provider-routes.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\uv-manager.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\path-validator.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\outline-queries.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\secret-redactor.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:precomputed": { - "strategy_key": "staged:precomputed", - "strategy": "staged", - "stage2_mode": "precomputed", - "latency_ms": 1897.7800999879837, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\files-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\path-validator.js", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-context-builder.js", - "d:\\claude_dms3\\ccw\\dist\\core\\server.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\install.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\codexlens-routes.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\upgrade.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:realtime": { - "strategy_key": "staged:realtime", - "strategy": "staged", - "stage2_mode": "realtime", - "latency_ms": 6647.179499998689, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\files-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\path-validator.js", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-context-builder.js", - "d:\\claude_dms3\\ccw\\dist\\core\\server.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\install.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\codexlens-routes.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\upgrade.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:static_global_graph": { - "strategy_key": "staged:static_global_graph", - "strategy": "staged", - "stage2_mode": "static_global_graph", - "latency_ms": 2328.577100008726, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\files-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\path-validator.js", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-context-builder.js", - "d:\\claude_dms3\\ccw\\dist\\core\\server.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\install.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\codexlens-routes.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\upgrade.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - } - }, - { - "query": "smart_search settings snapshot embedding backend reranker backend staged stage2 mode", - "intent": "ccw-config-snapshot", - "notes": "Reads local config snapshot for embedding/reranker/staged pipeline settings.", - "relevant_paths": [ - "D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts" - ], - "runs": { - "dense_rerank": { - "strategy_key": "dense_rerank", - "strategy": "dense_rerank", - "stage2_mode": null, - "latency_ms": 2516.6053000092506, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-memory-service.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\rate-limiter.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\file-reader.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\docs-frontend.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\help-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\workflow.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\core-memory.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:precomputed": { - "strategy_key": "staged:precomputed", - "strategy": "staged", - "stage2_mode": "precomputed", - "latency_ms": 2778.8519999980927, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\docs-frontend.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\core-memory.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\file-reader.js", - "d:\\claude_dms3\\ccw\\dist\\core\\websocket.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\config-backup.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-memory-service.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\serve.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:realtime": { - "strategy_key": "staged:realtime", - "strategy": "staged", - "stage2_mode": "realtime", - "latency_ms": 4940.330799981952, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\docs-frontend.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\core-memory.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\file-reader.js", - "d:\\claude_dms3\\ccw\\dist\\core\\websocket.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\config-backup.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-memory-service.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\serve.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:static_global_graph": { - "strategy_key": "staged:static_global_graph", - "strategy": "staged", - "stage2_mode": "static_global_graph", - "latency_ms": 3191.194299995899, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\docs-frontend.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\core-memory.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\file-reader.js", - "d:\\claude_dms3\\ccw\\dist\\core\\websocket.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\config-backup.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-memory-service.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\serve.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - } - }, - { - "query": "embedding backend fastembed local litellm api config", - "intent": "codexlens-embedding-config", - "notes": "Local-only benchmark should resolve to fastembed defaults.", - "relevant_paths": [ - "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\config.py" - ], - "runs": { - "dense_rerank": { - "strategy_key": "dense_rerank", - "strategy": "dense_rerank", - "stage2_mode": null, - "latency_ms": 2773.382699996233, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\dist\\utils\\file-reader.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-context-builder.js", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\cli.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\remote-notification-service.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\files-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\outline-parser.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\path-resolver.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\flow-executor.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:precomputed": { - "strategy_key": "staged:precomputed", - "strategy": "staged", - "stage2_mode": "precomputed", - "latency_ms": 2465.842600002885, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\files-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\file-reader.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\flow-executor.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\path-resolver.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-context-builder.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\cli.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\remote-notification-service.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\outline-parser.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:realtime": { - "strategy_key": "staged:realtime", - "strategy": "staged", - "stage2_mode": "realtime", - "latency_ms": 17898.587700009346, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\.venv\\lib\\site-packages\\sympy\\plotting\\backends\\base_backend.py", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\files-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\file-reader.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\flow-executor.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\path-resolver.js", - "d:\\claude_dms3\\ccw\\dist\\core\\pattern-detector.js", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-context-builder.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\cli.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\remote-notification-service.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:static_global_graph": { - "strategy_key": "staged:static_global_graph", - "strategy": "staged", - "stage2_mode": "static_global_graph", - "latency_ms": 3331.694400012493, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\files-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\file-reader.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\flow-executor.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\path-resolver.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-context-builder.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\cli.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\remote-notification-service.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\outline-parser.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - } - }, - { - "query": "reranker backend onnx api legacy configuration", - "intent": "codexlens-reranker-config", - "notes": "Covers both config dataclass fields and env overrides.", - "relevant_paths": [ - "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\env_config.py" - ], - "runs": { - "dense_rerank": { - "strategy_key": "dense_rerank", - "strategy": "dense_rerank", - "stage2_mode": null, - "latency_ms": 3433.85640001297, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\dist\\core\\services\\remote-notification-service.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\commands-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\react-frontend.js", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-context-builder.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\docs-frontend.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\uv-manager.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\issue.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\cli.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:precomputed": { - "strategy_key": "staged:precomputed", - "strategy": "staged", - "stage2_mode": "precomputed", - "latency_ms": 2722.7298999875784, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\dist\\core\\services\\remote-notification-service.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\uv-manager.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\cli.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\commands-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\install.js", - "d:\\claude_dms3\\ccw\\dist\\core\\memory-store.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\flow-executor.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\react-frontend.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:realtime": { - "strategy_key": "staged:realtime", - "strategy": "staged", - "stage2_mode": "realtime", - "latency_ms": 6998.953399986029, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\.venv\\lib\\site-packages\\sympy\\plotting\\backends\\base_backend.py", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\remote-notification-service.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\uv-manager.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\cli.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\data-aggregator.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\install.js", - "d:\\claude_dms3\\ccw\\dist\\core\\memory-store.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\flow-executor.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:static_global_graph": { - "strategy_key": "staged:static_global_graph", - "strategy": "staged", - "stage2_mode": "static_global_graph", - "latency_ms": 2707.838899999857, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\dist\\core\\services\\remote-notification-service.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\uv-manager.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\cli.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\commands-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\install.js", - "d:\\claude_dms3\\ccw\\dist\\core\\memory-store.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\flow-executor.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\react-frontend.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - } - }, - { - "query": "staged stage2 mode precomputed realtime static_global_graph", - "intent": "codexlens-stage2-config", - "notes": "Benchmark matrix should exercise the three supported stage2 modes.", - "relevant_paths": [ - "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\env_config.py" - ], - "runs": { - "dense_rerank": { - "strategy_key": "dense_rerank", - "strategy": "dense_rerank", - "stage2_mode": null, - "latency_ms": 2557.460299998522, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\dist\\commands\\upgrade.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\health-check-service.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\python-utils.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\flow-executor.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\outline-queries.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\codexlens-path.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\path-resolver.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:precomputed": { - "strategy_key": "staged:precomputed", - "strategy": "staged", - "stage2_mode": "precomputed", - "latency_ms": 2611.47199998796, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\dist\\utils\\codexlens-path.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\api-key-tester.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\dashboard-generator.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\team.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\install.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\pending-question-service.d.ts", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-memory-service.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\upgrade.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:realtime": { - "strategy_key": "staged:realtime", - "strategy": "staged", - "stage2_mode": "realtime", - "latency_ms": 9986.3125, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\dist\\utils\\codexlens-path.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\api-key-tester.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\dashboard-generator.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\team.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\install.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\pending-question-service.d.ts", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-memory-service.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\upgrade.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:static_global_graph": { - "strategy_key": "staged:static_global_graph", - "strategy": "staged", - "stage2_mode": "static_global_graph", - "latency_ms": 2705.1958999931812, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\dist\\utils\\codexlens-path.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\api-key-tester.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\dashboard-generator.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\team.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\install.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\pending-question-service.d.ts", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-memory-service.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\upgrade.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - } - }, - { - "query": "enable staged rerank stage 4 config", - "intent": "codexlens-stage4-rerank", - "notes": "Stage 4 rerank flag needs to stay enabled for local benchmarks.", - "relevant_paths": [ - "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\config.py" - ], - "runs": { - "dense_rerank": { - "strategy_key": "dense_rerank", - "strategy": "dense_rerank", - "stage2_mode": null, - "latency_ms": 2839.552300006151, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\dist\\utils\\path-resolver.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\python-utils.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\health-check-service.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\package-discovery.d.ts", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\session-path-resolver.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\orchestrator-routes.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\data-aggregator.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:precomputed": { - "strategy_key": "staged:precomputed", - "strategy": "staged", - "stage2_mode": "precomputed", - "latency_ms": 3044.0294999927282, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\dist\\utils\\package-discovery.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\ccw-routes.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\uv-manager.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\remote-notification-service.js", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\session-path-resolver.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\workflow.js", - "d:\\claude_dms3\\ccw\\dist\\core\\websocket.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-memory-service.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:realtime": { - "strategy_key": "staged:realtime", - "strategy": "staged", - "stage2_mode": "realtime", - "latency_ms": 12196.75379998982, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\semantic\\reranker\\fastembed_reranker.py", - "d:\\claude_dms3\\ccw\\dist\\utils\\package-discovery.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\ccw-routes.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\remote-notification-service.js", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\session-path-resolver.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\workflow.js", - "d:\\claude_dms3\\ccw\\dist\\core\\websocket.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-memory-service.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:static_global_graph": { - "strategy_key": "staged:static_global_graph", - "strategy": "staged", - "stage2_mode": "static_global_graph", - "latency_ms": 2919.969099998474, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\dist\\utils\\package-discovery.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\ccw-routes.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\uv-manager.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\remote-notification-service.js", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\session-path-resolver.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\workflow.js", - "d:\\claude_dms3\\ccw\\dist\\core\\websocket.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-memory-service.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - } - }, - { - "query": "cascade_search dense_rerank staged pipeline ChainSearchEngine", - "intent": "chain-search-cascade", - "notes": "Baseline query for the central retrieval engine.", - "relevant_paths": [ - "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "runs": { - "dense_rerank": { - "strategy_key": "dense_rerank", - "strategy": "dense_rerank", - "stage2_mode": null, - "latency_ms": 3082.173699989915, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-memory-service.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\package-discovery.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\commands-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\health-check-service.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\dashboard-generator.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\rate-limiter.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:precomputed": { - "strategy_key": "staged:precomputed", - "strategy": "staged", - "stage2_mode": "precomputed", - "latency_ms": 3012.5525999963284, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\commands-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\rate-limiter.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\core-memory-store.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\config-backup.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\path-validator.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\workflow.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\memory.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:realtime": { - "strategy_key": "staged:realtime", - "strategy": "staged", - "stage2_mode": "realtime", - "latency_ms": 10854.694199994206, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\commands-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\rate-limiter.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\core-memory-store.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\config-backup.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\path-validator.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\workflow.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\memory.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:static_global_graph": { - "strategy_key": "staged:static_global_graph", - "strategy": "staged", - "stage2_mode": "static_global_graph", - "latency_ms": 3229.01289999485, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\commands-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\rate-limiter.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\core-memory-store.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\config-backup.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\path-validator.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\workflow.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\memory.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - } - }, - { - "query": "realtime LSP expand stage2 search pipeline", - "intent": "chain-search-stage2-realtime", - "notes": "Targets realtime stage2 expansion logic.", - "relevant_paths": [ - "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "runs": { - "dense_rerank": { - "strategy_key": "dense_rerank", - "strategy": "dense_rerank", - "stage2_mode": null, - "latency_ms": 3505.4010999947786, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-memory-service.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\rules-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\flow-executor.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\outline-queries.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\path-resolver.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\files-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\memory-extraction-pipeline.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:precomputed": { - "strategy_key": "staged:precomputed", - "strategy": "staged", - "stage2_mode": "precomputed", - "latency_ms": 3311.3164000064135, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\path-resolver.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\outline-parser.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\api-key-tester.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\dashboard-generator.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-memory-service.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\core-memory.js", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-memory-service.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:realtime": { - "strategy_key": "staged:realtime", - "strategy": "staged", - "stage2_mode": "realtime", - "latency_ms": 26378.601400002837, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\.venv\\lib\\site-packages\\optimum\\onnxruntime\\pipelines.py", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\path-resolver.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\health-check-service.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\outline-parser.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\api-key-tester.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\dashboard-generator.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\core-memory.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:static_global_graph": { - "strategy_key": "staged:static_global_graph", - "strategy": "staged", - "stage2_mode": "static_global_graph", - "latency_ms": 2472.5419999957085, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\path-resolver.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\outline-parser.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\api-key-tester.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\dashboard-generator.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-memory-service.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\core-memory.js", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-memory-service.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - } - }, - { - "query": "static global graph stage2 expansion implementation", - "intent": "chain-search-stage2-static", - "notes": "Targets static_global_graph stage2 expansion logic.", - "relevant_paths": [ - "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "runs": { - "dense_rerank": { - "strategy_key": "dense_rerank", - "strategy": "dense_rerank", - "stage2_mode": null, - "latency_ms": 1676.1588000059128, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\dist\\core\\services\\health-check-service.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\path-resolver.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\upgrade.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\loop.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\system-routes.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\serve.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\commands-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\install.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\team.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:precomputed": { - "strategy_key": "staged:precomputed", - "strategy": "staged", - "stage2_mode": "precomputed", - "latency_ms": 1614.9786999970675, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\serve.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\commands-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\config-backup.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\health-check-service.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\docs-frontend.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\codexlens-path.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\memory-extraction-pipeline.js", - "d:\\claude_dms3\\ccw\\dist\\core\\pattern-detector.js", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:realtime": { - "strategy_key": "staged:realtime", - "strategy": "staged", - "stage2_mode": "realtime", - "latency_ms": 2153.07349999249, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\serve.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\commands-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\config-backup.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\health-check-service.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\docs-frontend.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\codexlens-path.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\memory-extraction-pipeline.js", - "d:\\claude_dms3\\ccw\\dist\\core\\pattern-detector.js", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:static_global_graph": { - "strategy_key": "staged:static_global_graph", - "strategy": "staged", - "stage2_mode": "static_global_graph", - "latency_ms": 1658.4901999980211, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\serve.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\commands-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\config-backup.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\health-check-service.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\docs-frontend.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\codexlens-path.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\memory-extraction-pipeline.js", - "d:\\claude_dms3\\ccw\\dist\\core\\pattern-detector.js", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - } - }, - { - "query": "cross encoder rerank stage 4 implementation", - "intent": "chain-search-rerank", - "notes": "Relevant for dense_rerank and staged rerank latency comparisons.", - "relevant_paths": [ - "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "runs": { - "dense_rerank": { - "strategy_key": "dense_rerank", - "strategy": "dense_rerank", - "stage2_mode": null, - "latency_ms": 1556.9279999881983, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\claude-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\docs-frontend.js", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\uv-manager.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\package-discovery.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\commands-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\workflow.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\files-routes.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:precomputed": { - "strategy_key": "staged:precomputed", - "strategy": "staged", - "stage2_mode": "precomputed", - "latency_ms": 1772.8751000016928, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\dist\\commands\\install.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\package-discovery.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\workflow.js", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\config-backup.js", - "d:\\claude_dms3\\ccw\\dist\\core\\server.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\flow-executor.js", - "d:\\claude_dms3\\ccw\\dist\\core\\cache-manager.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:realtime": { - "strategy_key": "staged:realtime", - "strategy": "staged", - "stage2_mode": "realtime", - "latency_ms": 7056.229200005531, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\.venv\\lib\\site-packages\\fastembed\\rerank\\cross_encoder\\onnx_text_cross_encoder.py", - "d:\\claude_dms3\\ccw\\dist\\commands\\install.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\package-discovery.js", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\workflow.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\config-backup.js", - "d:\\claude_dms3\\ccw\\dist\\core\\server.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\flow-executor.js", - "d:\\claude_dms3\\ccw\\dist\\core\\cache-manager.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:static_global_graph": { - "strategy_key": "staged:static_global_graph", - "strategy": "staged", - "stage2_mode": "static_global_graph", - "latency_ms": 1721.4015000015497, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\dist\\commands\\install.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\package-discovery.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\workflow.js", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\config-backup.js", - "d:\\claude_dms3\\ccw\\dist\\core\\server.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\flow-executor.js", - "d:\\claude_dms3\\ccw\\dist\\core\\cache-manager.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - } - }, - { - "query": "get_reranker factory onnx backend selection", - "intent": "reranker-factory", - "notes": "Keeps the benchmark aligned with local ONNX reranker selection.", - "relevant_paths": [ - "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\factory.py" - ], - "runs": { - "dense_rerank": { - "strategy_key": "dense_rerank", - "strategy": "dense_rerank", - "stage2_mode": null, - "latency_ms": 2038.9054999947548, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\flow-executor.js", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\uninstall.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\docs-frontend.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\upgrade.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\data-aggregator.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\remote-notification-service.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\uv-manager.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:precomputed": { - "strategy_key": "staged:precomputed", - "strategy": "staged", - "stage2_mode": "precomputed", - "latency_ms": 1906.9287000149488, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\uv-manager.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\remote-notification-service.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\files-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\docs-frontend.js", - "d:\\claude_dms3\\ccw\\dist\\core\\websocket.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\loop.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\flow-executor.js", - "d:\\claude_dms3\\ccw\\dist\\core\\websocket.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:realtime": { - "strategy_key": "staged:realtime", - "strategy": "staged", - "stage2_mode": "realtime", - "latency_ms": 4809.299199998379, - "topk_paths": [ - "d:\\claude_dms3\\.workflow\\.bench\\ccw-smart-search-mini-20260312\\codex-lens\\src\\codexlens\\semantic\\reranker\\factory.py", - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\upgrade.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\uv-manager.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\remote-notification-service.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\docs-frontend.js", - "d:\\claude_dms3\\ccw\\dist\\core\\websocket.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\loop.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\flow-executor.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:static_global_graph": { - "strategy_key": "staged:static_global_graph", - "strategy": "staged", - "stage2_mode": "static_global_graph", - "latency_ms": 1549.4464999884367, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\discovery-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\uv-manager.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\remote-notification-service.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\files-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\docs-frontend.js", - "d:\\claude_dms3\\ccw\\dist\\core\\websocket.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\loop.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\flow-executor.js", - "d:\\claude_dms3\\ccw\\dist\\core\\websocket.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - } - }, - { - "query": "EMBEDDING_BACKEND and RERANKER_BACKEND environment variables", - "intent": "env-overrides", - "notes": "Covers CCW/CodexLens local-only environment overrides.", - "relevant_paths": [ - "D:\\Claude_dms3\\codex-lens\\src\\codexlens\\env_config.py" - ], - "runs": { - "dense_rerank": { - "strategy_key": "dense_rerank", - "strategy": "dense_rerank", - "stage2_mode": null, - "latency_ms": 1529.7467999905348, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\shell-escape.d.ts", - "d:\\claude_dms3\\ccw\\dist\\commands\\upgrade.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\react-frontend.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\path-resolver.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\files-routes.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\docs-frontend.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\codexlens-path.d.ts", - "d:\\claude_dms3\\ccw\\dist\\utils\\python-utils.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:precomputed": { - "strategy_key": "staged:precomputed", - "strategy": "staged", - "stage2_mode": "precomputed", - "latency_ms": 1584.515799999237, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\path-resolver.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\files-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\remote-notification-service.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\flow-executor.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\workflow.js", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-memory-service.js", - "d:\\claude_dms3\\ccw\\dist\\core\\memory-job-scheduler.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\shell-escape.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:realtime": { - "strategy_key": "staged:realtime", - "strategy": "staged", - "stage2_mode": "realtime", - "latency_ms": 5576.568499997258, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\path-resolver.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\files-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\remote-notification-service.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\flow-executor.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\workflow.js", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-memory-service.js", - "d:\\claude_dms3\\ccw\\dist\\core\\memory-job-scheduler.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\shell-escape.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:static_global_graph": { - "strategy_key": "staged:static_global_graph", - "strategy": "staged", - "stage2_mode": "static_global_graph", - "latency_ms": 1626.6797000020742, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\dist\\assets\\index-b4psv8bd.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\path-resolver.d.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\files-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\graph-routes.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\remote-notification-service.js", - "d:\\claude_dms3\\ccw\\dist\\core\\services\\flow-executor.js", - "d:\\claude_dms3\\ccw\\dist\\commands\\workflow.js", - "d:\\claude_dms3\\ccw\\dist\\core\\unified-memory-service.js", - "d:\\claude_dms3\\ccw\\dist\\core\\memory-job-scheduler.js", - "d:\\claude_dms3\\ccw\\dist\\utils\\shell-escape.d.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - } - } - ] -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/ccw_smart_search_stage2_sample4_20260314.json b/codex-lens/benchmarks/results/ccw_smart_search_stage2_sample4_20260314.json deleted file mode 100644 index cb40f339..00000000 --- a/codex-lens/benchmarks/results/ccw_smart_search_stage2_sample4_20260314.json +++ /dev/null @@ -1,526 +0,0 @@ -{ - "timestamp": "2026-03-14 23:16:55", - "source": "D:\\Claude_dms3", - "queries_file": "D:\\Claude_dms3\\codex-lens\\benchmarks\\accuracy_queries_ccw_smart_search.jsonl", - "query_count": 4, - "k": 10, - "coarse_k": 100, - "local_only": true, - "strategies": { - "dense_rerank": { - "query_count": 4, - "hit_at_k": 0.0, - "mrr_at_k": 0.0, - "avg_recall_at_k": 0.0, - "avg_latency_ms": 20171.940174996853, - "p50_latency_ms": 14222.247749984264, - "p95_latency_ms": 35222.31535999476, - "errors": 0, - "strategy": "dense_rerank", - "stage2_mode": null - }, - "staged:precomputed": { - "query_count": 4, - "hit_at_k": 0.0, - "mrr_at_k": 0.0, - "avg_recall_at_k": 0.0, - "avg_latency_ms": 13679.793299987912, - "p50_latency_ms": 12918.63379997015, - "p95_latency_ms": 16434.964765003322, - "errors": 0, - "strategy": "staged", - "stage2_mode": "precomputed" - }, - "staged:realtime": { - "query_count": 4, - "hit_at_k": 0.0, - "mrr_at_k": 0.0, - "avg_recall_at_k": 0.0, - "avg_latency_ms": 13885.101849973202, - "p50_latency_ms": 13826.323699980974, - "p95_latency_ms": 14867.712269958853, - "errors": 0, - "strategy": "staged", - "stage2_mode": "realtime" - }, - "staged:static_global_graph": { - "query_count": 4, - "hit_at_k": 0.0, - "mrr_at_k": 0.0, - "avg_recall_at_k": 0.0, - "avg_latency_ms": 13336.124025002122, - "p50_latency_ms": 13415.476950019598, - "p95_latency_ms": 13514.329230004549, - "errors": 0, - "strategy": "staged", - "stage2_mode": "static_global_graph" - } - }, - "stage2_mode_matrix": { - "precomputed": { - "query_count": 4, - "hit_at_k": 0.0, - "mrr_at_k": 0.0, - "avg_recall_at_k": 0.0, - "avg_latency_ms": 13679.793299987912, - "p50_latency_ms": 12918.63379997015, - "p95_latency_ms": 16434.964765003322, - "errors": 0, - "strategy": "staged", - "stage2_mode": "precomputed" - }, - "realtime": { - "query_count": 4, - "hit_at_k": 0.0, - "mrr_at_k": 0.0, - "avg_recall_at_k": 0.0, - "avg_latency_ms": 13885.101849973202, - "p50_latency_ms": 13826.323699980974, - "p95_latency_ms": 14867.712269958853, - "errors": 0, - "strategy": "staged", - "stage2_mode": "realtime" - }, - "static_global_graph": { - "query_count": 4, - "hit_at_k": 0.0, - "mrr_at_k": 0.0, - "avg_recall_at_k": 0.0, - "avg_latency_ms": 13336.124025002122, - "p50_latency_ms": 13415.476950019598, - "p95_latency_ms": 13514.329230004549, - "errors": 0, - "strategy": "staged", - "stage2_mode": "static_global_graph" - } - }, - "pairwise_stage2_deltas": [ - { - "mode_a": "precomputed", - "mode_b": "realtime", - "hit_at_k_delta": 0.0, - "mrr_at_k_delta": 0.0, - "avg_recall_at_k_delta": 0.0, - "avg_latency_ms_delta": -205.30854998528957 - }, - { - "mode_a": "precomputed", - "mode_b": "static_global_graph", - "hit_at_k_delta": 0.0, - "mrr_at_k_delta": 0.0, - "avg_recall_at_k_delta": 0.0, - "avg_latency_ms_delta": 343.66927498579025 - }, - { - "mode_a": "realtime", - "mode_b": "static_global_graph", - "hit_at_k_delta": 0.0, - "mrr_at_k_delta": 0.0, - "avg_recall_at_k_delta": 0.0, - "avg_latency_ms_delta": 548.9778249710798 - } - ], - "config": { - "embedding_backend": "fastembed", - "embedding_model": "code", - "embedding_use_gpu": false, - "reranker_backend": "onnx", - "reranker_model": "cross-encoder/ms-marco-MiniLM-L-6-v2", - "enable_staged_rerank": true, - "enable_cross_encoder_rerank": true - }, - "evaluations": [ - { - "query": "executeHybridMode dense_rerank semantic smart_search", - "intent": "ccw-semantic-routing", - "notes": "CCW semantic mode delegates to CodexLens dense_rerank.", - "relevant_paths": [ - "D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts" - ], - "runs": { - "dense_rerank": { - "strategy_key": "dense_rerank", - "strategy": "dense_rerank", - "stage2_mode": null, - "latency_ms": 38829.27079999447, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\src\\core\\routes\\issue-routes.ts", - "d:\\claude_dms3\\ccw\\src\\tools\\session-manager.ts", - "d:\\claude_dms3\\ccw\\src\\types\\queue-types.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\nativesessionpanel.tsx", - "d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts", - "d:\\claude_dms3\\ccw\\src\\core\\memory-extraction-pipeline.ts", - "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\skills-page.spec.ts", - "d:\\claude_dms3\\ccw\\dist\\tools\\discover-design-files.js", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\api-settings\\clisettingsmodal.tsx", - "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:precomputed": { - "strategy_key": "staged:precomputed", - "strategy": "staged", - "stage2_mode": "precomputed", - "latency_ms": 16915.833400011063, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts", - "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx", - "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx", - "d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts", - "d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js", - "d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:realtime": { - "strategy_key": "staged:realtime", - "strategy": "staged", - "stage2_mode": "realtime", - "latency_ms": 13961.2567999959, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts", - "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx", - "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx", - "d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts", - "d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js", - "d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:static_global_graph": { - "strategy_key": "staged:static_global_graph", - "strategy": "staged", - "stage2_mode": "static_global_graph", - "latency_ms": 12986.330999970436, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts", - "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx", - "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx", - "d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts", - "d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js", - "d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - } - }, - { - "query": "parse CodexLens JSON output strip ANSI smart_search", - "intent": "ccw-json-fallback", - "notes": "Covers JSON/plain-text fallback handling for CodexLens output.", - "relevant_paths": [ - "D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts" - ], - "runs": { - "dense_rerank": { - "strategy_key": "dense_rerank", - "strategy": "dense_rerank", - "stage2_mode": null, - "latency_ms": 14782.901199996471, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\src\\tools\\codex-lens-lsp.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\issue\\queue\\queueexecuteinsession.tsx", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\terminal-dashboard\\queuepanel.tsx", - "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\usewebsocket.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useflows.ts", - "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-error-monitoring.spec.ts", - "d:\\claude_dms3\\ccw\\tests\\native-session-discovery.test.ts", - "d:\\claude_dms3\\ccw\\src\\core\\services\\checkpoint-service.ts", - "d:\\claude_dms3\\ccw\\tests\\integration\\system-routes.test.ts", - "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:precomputed": { - "strategy_key": "staged:precomputed", - "strategy": "staged", - "stage2_mode": "precomputed", - "latency_ms": 13710.042499959469, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\userealtimeupdates.ts", - "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-routes.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\stores\\queueexecutionstore.ts", - "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\themeshare.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\clistreampanel.tsx", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\terminal-panel\\queueexecutionlistview.tsx", - "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\test\\i18n.tsx", - "d:\\claude_dms3\\ccw\\dist\\core\\history-importer.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:realtime": { - "strategy_key": "staged:realtime", - "strategy": "staged", - "stage2_mode": "realtime", - "latency_ms": 15027.674999952316, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\userealtimeupdates.ts", - "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-routes.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\stores\\queueexecutionstore.ts", - "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\themeshare.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\clistreampanel.tsx", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\terminal-panel\\queueexecutionlistview.tsx", - "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\test\\i18n.tsx", - "d:\\claude_dms3\\ccw\\dist\\core\\history-importer.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:static_global_graph": { - "strategy_key": "staged:static_global_graph", - "strategy": "staged", - "stage2_mode": "static_global_graph", - "latency_ms": 13389.622500002384, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\userealtimeupdates.ts", - "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-routes.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\stores\\queueexecutionstore.ts", - "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\themeshare.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\clistreampanel.tsx", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\terminal-panel\\queueexecutionlistview.tsx", - "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\test\\i18n.tsx", - "d:\\claude_dms3\\ccw\\dist\\core\\history-importer.js" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - } - }, - { - "query": "smart_search init embed search action schema", - "intent": "ccw-action-schema", - "notes": "Find the Zod schema that defines init/embed/search actions.", - "relevant_paths": [ - "D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts" - ], - "runs": { - "dense_rerank": { - "strategy_key": "dense_rerank", - "strategy": "dense_rerank", - "stage2_mode": null, - "latency_ms": 13661.594299972057, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\src\\tools\\ask-question.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\a2ui\\a2uipopupcard.tsx", - "d:\\claude_dms3\\ccw\\src\\core\\routes\\discovery-routes.ts", - "d:\\claude_dms3\\ccw\\src\\core\\a2ui\\a2uiwebsockethandler.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx", - "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\discovery.spec.ts", - "d:\\claude_dms3\\ccw\\src\\tools\\__tests__\\ask-question.test.ts", - "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\a2ui\\a2uiwebsockethandler.js", - "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\dashboard.spec.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:precomputed": { - "strategy_key": "staged:precomputed", - "strategy": "staged", - "stage2_mode": "precomputed", - "latency_ms": 12127.225099980831, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts", - "d:\\claude_dms3\\ccw\\src\\core\\lite-scanner-complete.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\themeselector.tsx", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\team\\teamheader.tsx", - "d:\\claude_dms3\\ccw\\src\\tools\\ask-question.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\a2ui\\a2uipopupcard.tsx", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\issue\\discovery\\findinglist.tsx", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\api-settings\\clisettingsmodal.tsx", - "d:\\claude_dms3\\ccw\\src\\core\\routes\\discovery-routes.ts", - "d:\\claude_dms3\\ccw\\src\\core\\a2ui\\a2uiwebsockethandler.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:realtime": { - "strategy_key": "staged:realtime", - "strategy": "staged", - "stage2_mode": "realtime", - "latency_ms": 12860.084999978542, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts", - "d:\\claude_dms3\\ccw\\src\\core\\lite-scanner-complete.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\themeselector.tsx", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\team\\teamheader.tsx", - "d:\\claude_dms3\\ccw\\src\\tools\\ask-question.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\a2ui\\a2uipopupcard.tsx", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\issue\\discovery\\findinglist.tsx", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\api-settings\\clisettingsmodal.tsx", - "d:\\claude_dms3\\ccw\\src\\core\\routes\\discovery-routes.ts", - "d:\\claude_dms3\\ccw\\src\\core\\a2ui\\a2uiwebsockethandler.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:static_global_graph": { - "strategy_key": "staged:static_global_graph", - "strategy": "staged", - "stage2_mode": "static_global_graph", - "latency_ms": 13441.331400036812, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts", - "d:\\claude_dms3\\ccw\\src\\core\\lite-scanner-complete.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\themeselector.tsx", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\team\\teamheader.tsx", - "d:\\claude_dms3\\ccw\\src\\tools\\ask-question.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\a2ui\\a2uipopupcard.tsx", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\issue\\discovery\\findinglist.tsx", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\api-settings\\clisettingsmodal.tsx", - "d:\\claude_dms3\\ccw\\src\\core\\routes\\discovery-routes.ts", - "d:\\claude_dms3\\ccw\\src\\core\\a2ui\\a2uiwebsockethandler.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - } - }, - { - "query": "auto init missing job dedupe smart_search", - "intent": "ccw-auto-init", - "notes": "Targets background init/embed warmup and dedupe state.", - "relevant_paths": [ - "D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts" - ], - "runs": { - "dense_rerank": { - "strategy_key": "dense_rerank", - "strategy": "dense_rerank", - "stage2_mode": null, - "latency_ms": 13413.994400024414, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\frontend\\src\\pages\\memorypage.tsx", - "d:\\claude_dms3\\ccw\\src\\core\\routes\\memory-routes.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\usememory.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\batchoperationtoolbar.tsx", - "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\memory.spec.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useprompthistory.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\stores\\flowstore.ts", - "d:\\claude_dms3\\ccw\\src\\services\\deepwiki-service.ts", - "d:\\claude_dms3\\ccw\\src\\core\\routes\\claude-routes.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:precomputed": { - "strategy_key": "staged:precomputed", - "strategy": "staged", - "stage2_mode": "precomputed", - "latency_ms": 11966.072200000286, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\handlers.py", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\ui\\commandcombobox.tsx", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\global_graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\definition.py", - "d:\\claude_dms3\\ccw\\frontend\\src\\orchestrator\\orchestrationplanbuilder.ts", - "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\lsp\\handlers.py", - "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\search\\global_graph_expander.py", - "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\api\\definition.py", - "d:\\claude_dms3\\ccw\\frontend\\src\\pages\\memorypage.tsx" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:realtime": { - "strategy_key": "staged:realtime", - "strategy": "staged", - "stage2_mode": "realtime", - "latency_ms": 13691.39059996605, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\handlers.py", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\ui\\commandcombobox.tsx", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\global_graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\definition.py", - "d:\\claude_dms3\\ccw\\frontend\\src\\orchestrator\\orchestrationplanbuilder.ts", - "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\lsp\\handlers.py", - "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\search\\global_graph_expander.py", - "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\api\\definition.py", - "d:\\claude_dms3\\ccw\\frontend\\src\\pages\\memorypage.tsx" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - }, - "staged:static_global_graph": { - "strategy_key": "staged:static_global_graph", - "strategy": "staged", - "stage2_mode": "static_global_graph", - "latency_ms": 13527.211199998856, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\handlers.py", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\ui\\commandcombobox.tsx", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\global_graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\definition.py", - "d:\\claude_dms3\\ccw\\frontend\\src\\orchestrator\\orchestrationplanbuilder.ts", - "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\lsp\\handlers.py", - "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\search\\global_graph_expander.py", - "d:\\claude_dms3\\codex-lens\\build\\lib\\codexlens\\api\\definition.py", - "d:\\claude_dms3\\ccw\\frontend\\src\\pages\\memorypage.tsx" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "error": null - } - } - } - ] -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/ccw_smart_search_stage2_smoke1_cpu_reranker_20260314.json b/codex-lens/benchmarks/results/ccw_smart_search_stage2_smoke1_cpu_reranker_20260314.json deleted file mode 100644 index a6f5dc8d..00000000 --- a/codex-lens/benchmarks/results/ccw_smart_search_stage2_smoke1_cpu_reranker_20260314.json +++ /dev/null @@ -1,415 +0,0 @@ -{ - "timestamp": "2026-03-15 00:19:16", - "source": "D:\\Claude_dms3", - "queries_file": "D:\\Claude_dms3\\codex-lens\\benchmarks\\accuracy_queries_ccw_smart_search.jsonl", - "query_count": 1, - "k": 10, - "coarse_k": 100, - "local_only": true, - "strategies": { - "auto": { - "query_count": 1, - "hit_at_k": 1.0, - "mrr_at_k": 1.0, - "avg_recall_at_k": 1.0, - "avg_latency_ms": 1377.3565999865532, - "p50_latency_ms": 1377.3565999865532, - "p95_latency_ms": 1377.3565999865532, - "avg_generated_artifact_count": 0.0, - "avg_test_file_count": 0.0, - "runs_with_generated_artifacts": 0, - "runs_with_test_files": 0, - "effective_methods": { - "fts": 1 - }, - "errors": 0, - "strategy": "auto", - "stage2_mode": null - }, - "fts": { - "query_count": 1, - "hit_at_k": 1.0, - "mrr_at_k": 1.0, - "avg_recall_at_k": 1.0, - "avg_latency_ms": 1460.0819000601768, - "p50_latency_ms": 1460.0819000601768, - "p95_latency_ms": 1460.0819000601768, - "avg_generated_artifact_count": 0.0, - "avg_test_file_count": 0.0, - "runs_with_generated_artifacts": 0, - "runs_with_test_files": 0, - "effective_methods": { - "fts": 1 - }, - "errors": 0, - "strategy": "fts", - "stage2_mode": null - }, - "hybrid": { - "query_count": 1, - "hit_at_k": 0.0, - "mrr_at_k": 0.0, - "avg_recall_at_k": 0.0, - "avg_latency_ms": 45991.74140000343, - "p50_latency_ms": 45991.74140000343, - "p95_latency_ms": 45991.74140000343, - "avg_generated_artifact_count": 0.0, - "avg_test_file_count": 0.0, - "runs_with_generated_artifacts": 0, - "runs_with_test_files": 0, - "effective_methods": { - "hybrid": 1 - }, - "errors": 0, - "strategy": "hybrid", - "stage2_mode": null - }, - "dense_rerank": { - "query_count": 1, - "hit_at_k": 0.0, - "mrr_at_k": 0.0, - "avg_recall_at_k": 0.0, - "avg_latency_ms": 22739.62610000372, - "p50_latency_ms": 22739.62610000372, - "p95_latency_ms": 22739.62610000372, - "avg_generated_artifact_count": 1.0, - "avg_test_file_count": 2.0, - "runs_with_generated_artifacts": 1, - "runs_with_test_files": 1, - "effective_methods": { - "dense_rerank": 1 - }, - "errors": 0, - "strategy": "dense_rerank", - "stage2_mode": null - }, - "staged:precomputed": { - "query_count": 1, - "hit_at_k": 0.0, - "mrr_at_k": 0.0, - "avg_recall_at_k": 0.0, - "avg_latency_ms": 14900.017599999905, - "p50_latency_ms": 14900.017599999905, - "p95_latency_ms": 14900.017599999905, - "avg_generated_artifact_count": 1.0, - "avg_test_file_count": 0.0, - "runs_with_generated_artifacts": 1, - "runs_with_test_files": 0, - "effective_methods": { - "staged": 1 - }, - "errors": 0, - "strategy": "staged", - "stage2_mode": "precomputed" - }, - "staged:realtime": { - "query_count": 1, - "hit_at_k": 0.0, - "mrr_at_k": 0.0, - "avg_recall_at_k": 0.0, - "avg_latency_ms": 14104.314599990845, - "p50_latency_ms": 14104.314599990845, - "p95_latency_ms": 14104.314599990845, - "avg_generated_artifact_count": 1.0, - "avg_test_file_count": 0.0, - "runs_with_generated_artifacts": 1, - "runs_with_test_files": 0, - "effective_methods": { - "staged": 1 - }, - "errors": 0, - "strategy": "staged", - "stage2_mode": "realtime" - }, - "staged:static_global_graph": { - "query_count": 1, - "hit_at_k": 0.0, - "mrr_at_k": 0.0, - "avg_recall_at_k": 0.0, - "avg_latency_ms": 11906.852500021458, - "p50_latency_ms": 11906.852500021458, - "p95_latency_ms": 11906.852500021458, - "avg_generated_artifact_count": 1.0, - "avg_test_file_count": 0.0, - "runs_with_generated_artifacts": 1, - "runs_with_test_files": 0, - "effective_methods": { - "staged": 1 - }, - "errors": 0, - "strategy": "staged", - "stage2_mode": "static_global_graph" - } - }, - "stage2_mode_matrix": { - "precomputed": { - "query_count": 1, - "hit_at_k": 0.0, - "mrr_at_k": 0.0, - "avg_recall_at_k": 0.0, - "avg_latency_ms": 14900.017599999905, - "p50_latency_ms": 14900.017599999905, - "p95_latency_ms": 14900.017599999905, - "avg_generated_artifact_count": 1.0, - "avg_test_file_count": 0.0, - "runs_with_generated_artifacts": 1, - "runs_with_test_files": 0, - "effective_methods": { - "staged": 1 - }, - "errors": 0, - "strategy": "staged", - "stage2_mode": "precomputed" - }, - "realtime": { - "query_count": 1, - "hit_at_k": 0.0, - "mrr_at_k": 0.0, - "avg_recall_at_k": 0.0, - "avg_latency_ms": 14104.314599990845, - "p50_latency_ms": 14104.314599990845, - "p95_latency_ms": 14104.314599990845, - "avg_generated_artifact_count": 1.0, - "avg_test_file_count": 0.0, - "runs_with_generated_artifacts": 1, - "runs_with_test_files": 0, - "effective_methods": { - "staged": 1 - }, - "errors": 0, - "strategy": "staged", - "stage2_mode": "realtime" - }, - "static_global_graph": { - "query_count": 1, - "hit_at_k": 0.0, - "mrr_at_k": 0.0, - "avg_recall_at_k": 0.0, - "avg_latency_ms": 11906.852500021458, - "p50_latency_ms": 11906.852500021458, - "p95_latency_ms": 11906.852500021458, - "avg_generated_artifact_count": 1.0, - "avg_test_file_count": 0.0, - "runs_with_generated_artifacts": 1, - "runs_with_test_files": 0, - "effective_methods": { - "staged": 1 - }, - "errors": 0, - "strategy": "staged", - "stage2_mode": "static_global_graph" - } - }, - "pairwise_stage2_deltas": [ - { - "mode_a": "precomputed", - "mode_b": "realtime", - "hit_at_k_delta": 0.0, - "mrr_at_k_delta": 0.0, - "avg_recall_at_k_delta": 0.0, - "avg_latency_ms_delta": 795.7030000090599 - }, - { - "mode_a": "precomputed", - "mode_b": "static_global_graph", - "hit_at_k_delta": 0.0, - "mrr_at_k_delta": 0.0, - "avg_recall_at_k_delta": 0.0, - "avg_latency_ms_delta": 2993.165099978447 - }, - { - "mode_a": "realtime", - "mode_b": "static_global_graph", - "hit_at_k_delta": 0.0, - "mrr_at_k_delta": 0.0, - "avg_recall_at_k_delta": 0.0, - "avg_latency_ms_delta": 2197.462099969387 - } - ], - "config": { - "embedding_backend": "fastembed", - "embedding_model": "code", - "embedding_use_gpu": false, - "reranker_backend": "onnx", - "reranker_model": "cross-encoder/ms-marco-MiniLM-L-6-v2", - "reranker_use_gpu": false, - "enable_staged_rerank": true, - "enable_cross_encoder_rerank": true - }, - "evaluations": [ - { - "query": "executeHybridMode dense_rerank semantic smart_search", - "intent": "ccw-semantic-routing", - "notes": "CCW semantic mode delegates to CodexLens dense_rerank.", - "relevant_paths": [ - "D:\\Claude_dms3\\ccw\\src\\tools\\smart-search.ts" - ], - "runs": { - "auto": { - "strategy_key": "auto", - "strategy": "auto", - "stage2_mode": null, - "effective_method": "fts", - "execution_method": "fts", - "latency_ms": 1377.3565999865532, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\src\\tools\\smart-search.ts" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "generated_artifact_count": 0, - "test_file_count": 0, - "error": null - }, - "fts": { - "strategy_key": "fts", - "strategy": "fts", - "stage2_mode": null, - "effective_method": "fts", - "execution_method": "fts", - "latency_ms": 1460.0819000601768, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\src\\tools\\smart-search.ts" - ], - "first_hit_rank": 1, - "hit_at_k": true, - "recall_at_k": 1.0, - "generated_artifact_count": 0, - "test_file_count": 0, - "error": null - }, - "hybrid": { - "strategy_key": "hybrid", - "strategy": "hybrid", - "stage2_mode": null, - "effective_method": "hybrid", - "execution_method": "hybrid", - "latency_ms": 45991.74140000343, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\src\\config\\litellm-api-config-manager.ts", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\api_reranker.py", - "d:\\claude_dms3\\ccw\\src\\commands\\core-memory.ts", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\scripts\\generate_embeddings.py", - "d:\\claude_dms3\\ccw\\src\\core\\routes\\notification-routes.ts", - "d:\\claude_dms3\\ccw\\src\\tools\\team-msg.ts", - "d:\\claude_dms3\\ccw\\src\\types\\remote-notification.ts", - "d:\\claude_dms3\\ccw\\src\\core\\memory-store.ts", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "generated_artifact_count": 0, - "test_file_count": 0, - "error": null - }, - "dense_rerank": { - "strategy_key": "dense_rerank", - "strategy": "dense_rerank", - "stage2_mode": null, - "effective_method": "dense_rerank", - "execution_method": "cascade", - "latency_ms": 22739.62610000372, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\src\\core\\routes\\issue-routes.ts", - "d:\\claude_dms3\\ccw\\src\\tools\\session-manager.ts", - "d:\\claude_dms3\\ccw\\src\\types\\queue-types.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\nativesessionpanel.tsx", - "d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts", - "d:\\claude_dms3\\ccw\\src\\core\\memory-extraction-pipeline.ts", - "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\skills-page.spec.ts", - "d:\\claude_dms3\\ccw\\dist\\tools\\discover-design-files.js", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\api-settings\\clisettingsmodal.tsx", - "d:\\claude_dms3\\ccw\\frontend\\tests\\e2e\\api-settings.spec.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "generated_artifact_count": 1, - "test_file_count": 2, - "error": null - }, - "staged:precomputed": { - "strategy_key": "staged:precomputed", - "strategy": "staged", - "stage2_mode": "precomputed", - "effective_method": "staged", - "execution_method": "cascade", - "latency_ms": 14900.017599999905, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts", - "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx", - "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx", - "d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts", - "d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js", - "d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "generated_artifact_count": 1, - "test_file_count": 0, - "error": null - }, - "staged:realtime": { - "strategy_key": "staged:realtime", - "strategy": "staged", - "stage2_mode": "realtime", - "effective_method": "staged", - "execution_method": "cascade", - "latency_ms": 14104.314599990845, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts", - "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx", - "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx", - "d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts", - "d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js", - "d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "generated_artifact_count": 1, - "test_file_count": 0, - "error": null - }, - "staged:static_global_graph": { - "strategy_key": "staged:static_global_graph", - "strategy": "staged", - "stage2_mode": "static_global_graph", - "effective_method": "staged", - "execution_method": "cascade", - "latency_ms": 11906.852500021458, - "topk_paths": [ - "d:\\claude_dms3\\ccw\\src\\tools\\native-session-discovery.ts", - "d:\\claude_dms3\\ccw\\src\\commands\\memory.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\hooks\\useissues.test.tsx", - "d:\\claude_dms3\\ccw\\src\\core\\routes\\cli-sessions-routes.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\lib\\api.ts", - "d:\\claude_dms3\\ccw\\frontend\\src\\components\\shared\\filepreview.tsx", - "d:\\claude_dms3\\ccw\\src\\core\\hooks\\hook-templates.ts", - "d:\\claude_dms3\\ccw\\src\\utils\\file-reader.ts", - "d:\\claude_dms3\\ccw\\dist\\core\\routes\\cli-sessions-routes.js", - "d:\\claude_dms3\\ccw\\src\\core\\history-importer.ts" - ], - "first_hit_rank": null, - "hit_at_k": false, - "recall_at_k": 0.0, - "generated_artifact_count": 1, - "test_file_count": 0, - "error": null - } - } - } - ] -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/compare_2026-02-09.json b/codex-lens/benchmarks/results/compare_2026-02-09.json deleted file mode 100644 index c9dfd28a..00000000 --- a/codex-lens/benchmarks/results/compare_2026-02-09.json +++ /dev/null @@ -1,453 +0,0 @@ -{ - "summary": { - "timestamp": "2026-02-09 11:08:47", - "source": "src", - "k": 10, - "coarse_k": 100, - "query_count": 7, - "avg_jaccard_topk": 0.41421235160730957, - "avg_rbo_topk": 0.22899068093857142, - "staged": { - "success": 7, - "avg_latency_ms": 32009.68328570468 - }, - "dense_rerank": { - "success": 7, - "avg_latency_ms": 2783.3305999977247 - } - }, - "comparisons": [ - { - "query": "class Config", - "staged": { - "strategy": "staged", - "query": "class Config", - "latency_ms": 40875.45489999652, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\file_watcher.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\__init__.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 10633.91399383545, - "stage2_expand_ms": 12487.980365753174, - "stage3_cluster_ms": 10781.587362289429, - "stage4_rerank_ms": 6914.837837219238 - }, - "stage_counts": { - "stage1_candidates": 100, - "stage2_expanded": 149, - "stage3_clustered": 20, - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "class Config", - "latency_ms": 3111.874899983406, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1111111111111111, - "rbo_topk": 0.06741929885142856, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 8, - "dense_unique_dirs_topk": 4 - }, - { - "query": "def search", - "staged": { - "strategy": "staged", - "query": "def search", - "latency_ms": 38541.18510001898, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 548.8920211791992, - "stage2_expand_ms": 27176.724433898926, - "stage3_cluster_ms": 8352.917671203613, - "stage4_rerank_ms": 2392.6541805267334 - }, - "stage_counts": { - "stage1_candidates": 100, - "stage2_expanded": 101, - "stage3_clustered": 20, - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "def search", - "latency_ms": 2652.75, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.26666666666666666, - "rbo_topk": 0.2983708721671428, - "staged_unique_files_topk": 9, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - }, - { - "query": "LspBridge", - "staged": { - "strategy": "staged", - "query": "LspBridge", - "latency_ms": 26319.983999997377, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\merkle_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 514.4834518432617, - "stage2_expand_ms": 14329.241514205933, - "stage3_cluster_ms": 9249.040842056274, - "stage4_rerank_ms": 2159.9059104919434 - }, - "stage_counts": { - "stage1_candidates": 100, - "stage2_expanded": 100, - "stage3_clustered": 20, - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "LspBridge", - "latency_ms": 2666.9745999872684, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.6666666666666666, - "rbo_topk": 0.3571430355128571, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - }, - { - "query": "graph expansion", - "staged": { - "strategy": "staged", - "query": "graph expansion", - "latency_ms": 25696.087299972773, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 560.4684352874756, - "stage2_expand_ms": 13951.441526412964, - "stage3_cluster_ms": 8879.387140274048, - "stage4_rerank_ms": 2229.4514179229736 - }, - "stage_counts": { - "stage1_candidates": 100, - "stage2_expanded": 100, - "stage3_clustered": 20, - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "graph expansion", - "latency_ms": 2544.8630999922752, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.42857142857142855, - "rbo_topk": 0.13728894791142857, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - }, - { - "query": "clustering strategy", - "staged": { - "strategy": "staged", - "query": "clustering strategy", - "latency_ms": 27387.41929998994, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 625.0262260437012, - "stage2_expand_ms": 14211.347103118896, - "stage3_cluster_ms": 10269.58680152893, - "stage4_rerank_ms": 2208.007335662842 - }, - "stage_counts": { - "stage1_candidates": 100, - "stage2_expanded": 100, - "stage3_clustered": 20, - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "clustering strategy", - "latency_ms": 2928.22389999032, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.17647058823529413, - "rbo_topk": 0.07116480920571429, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - }, - { - "query": "error handling", - "staged": { - "strategy": "staged", - "query": "error handling", - "latency_ms": 23732.33979997039, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 504.0884017944336, - "stage2_expand_ms": 12899.415016174316, - "stage3_cluster_ms": 7881.027936935425, - "stage4_rerank_ms": 2372.1535205841064 - }, - "stage_counts": { - "stage1_candidates": 100, - "stage2_expanded": 100, - "stage3_clustered": 20, - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "error handling", - "latency_ms": 2946.439900010824, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.6666666666666666, - "rbo_topk": 0.19158624676285715, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - }, - { - "query": "how to parse json", - "staged": { - "strategy": "staged", - "query": "how to parse json", - "latency_ms": 41515.31259998679, - "num_results": 9, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 601.7005443572998, - "stage2_expand_ms": 30052.319765090942, - "stage3_cluster_ms": 8409.791231155396, - "stage4_rerank_ms": 2371.1729049682617 - }, - "stage_counts": { - "stage1_candidates": 100, - "stage2_expanded": 100, - "stage3_clustered": 20, - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "how to parse json", - "latency_ms": 2632.1878000199795, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.5833333333333334, - "rbo_topk": 0.4799615561585714, - "staged_unique_files_topk": 9, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - } - ] -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/compare_2026-02-09_dir_rr_fast4.json b/codex-lens/benchmarks/results/compare_2026-02-09_dir_rr_fast4.json deleted file mode 100644 index 1b2aae3f..00000000 --- a/codex-lens/benchmarks/results/compare_2026-02-09_dir_rr_fast4.json +++ /dev/null @@ -1,356 +0,0 @@ -{ - "summary": { - "timestamp": "2026-02-09 20:37:28", - "source": "src", - "k": 10, - "coarse_k": 100, - "query_count": 7, - "avg_jaccard_topk": 0.12095811211246858, - "avg_rbo_topk": 0.09594444061244897, - "staged": { - "success": 7, - "avg_latency_ms": 2471.239057132176 - }, - "dense_rerank": { - "success": 7, - "avg_latency_ms": 3087.217985710927 - } - }, - "comparisons": [ - { - "query": "class Config", - "staged": { - "strategy": "staged", - "query": "class Config", - "latency_ms": 312.2674999535084, - "num_results": 37, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\litellm_reranker.py" - ], - "stage_stats": null, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "class Config", - "latency_ms": 2672.6916999816895, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.05263157894736842, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 5, - "dense_unique_dirs_topk": 4 - }, - { - "query": "def search", - "staged": { - "strategy": "staged", - "query": "def search", - "latency_ms": 15344.861499994993, - "num_results": 3, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\entities.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 81.70747756958008, - "stage2_expand_ms": 12762.907266616821, - "stage3_cluster_ms": 0.0021457672119140625, - "stage4_rerank_ms": 2422.7287769317627 - }, - "stage_counts": { - "stage1_candidates": 3, - "stage2_expanded": 4, - "stage2_unique_paths": 3, - "stage2_duplicate_paths": 1, - "stage3_clustered": 4, - "stage3_strategy": "dir_rr", - "stage4_reranked": 4 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "def search", - "latency_ms": 2908.5530000030994, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.09090909090909091, - "rbo_topk": 0.23541639942571424, - "staged_unique_files_topk": 2, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "LspBridge", - "staged": { - "strategy": "staged", - "query": "LspBridge", - "latency_ms": 328.4989999830723, - "num_results": 5, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py" - ], - "stage_stats": null, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "LspBridge", - "latency_ms": 3426.8526000082493, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.07142857142857142, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 5, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "graph expansion", - "staged": { - "strategy": "staged", - "query": "graph expansion", - "latency_ms": 359.32230001688004, - "num_results": 11, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_007_add_graph_neighbors.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\hybrid_search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "stage_stats": null, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "graph expansion", - "latency_ms": 3472.025099992752, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.17647058823529413, - "rbo_topk": 0.06801300374142856, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 7, - "dense_unique_dirs_topk": 4 - }, - { - "query": "clustering strategy", - "staged": { - "strategy": "staged", - "query": "clustering strategy", - "latency_ms": 289.3139999806881, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\noop_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\hdbscan_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\dbscan_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\base.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\frequency_strategy.py" - ], - "stage_stats": null, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "clustering strategy", - "latency_ms": 2859.5299999713898, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1111111111111111, - "rbo_topk": 0.04670528456571428, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 3, - "dense_unique_dirs_topk": 4 - }, - { - "query": "error handling", - "staged": { - "strategy": "staged", - "query": "error handling", - "latency_ms": 305.66699999570847, - "num_results": 5, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\rotational_embedder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" - ], - "stage_stats": null, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "error handling", - "latency_ms": 3101.3711999952793, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.07142857142857142, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 5, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - }, - { - "query": "how to parse json", - "staged": { - "strategy": "staged", - "query": "how to parse json", - "latency_ms": 358.74210000038147, - "num_results": 4, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" - ], - "stage_stats": null, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "how to parse json", - "latency_ms": 3169.5023000240326, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.2727272727272727, - "rbo_topk": 0.18590219827714285, - "staged_unique_files_topk": 4, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 3, - "dense_unique_dirs_topk": 4 - } - ] -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/compare_2026-02-09_dir_rr_fast5.json b/codex-lens/benchmarks/results/compare_2026-02-09_dir_rr_fast5.json deleted file mode 100644 index 2d30d43e..00000000 --- a/codex-lens/benchmarks/results/compare_2026-02-09_dir_rr_fast5.json +++ /dev/null @@ -1,466 +0,0 @@ -{ - "summary": { - "timestamp": "2026-02-09 20:48:55", - "source": "src", - "k": 10, - "coarse_k": 100, - "query_count": 7, - "avg_jaccard_topk": 0.11418494830148965, - "avg_rbo_topk": 0.08910725003591835, - "staged": { - "success": 7, - "avg_latency_ms": 16443.109000005894 - }, - "dense_rerank": { - "success": 7, - "avg_latency_ms": 2919.481471432107 - } - }, - "comparisons": [ - { - "query": "class Config", - "staged": { - "strategy": "staged", - "query": "class Config", - "latency_ms": 6056.956700026989, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\file_watcher.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 113.12270164489746, - "stage1_fallback_search_ms": 262.55249977111816, - "stage2_expand_ms": 3022.8426456451416, - "stage3_cluster_ms": 1.155853271484375, - "stage4_rerank_ms": 2554.953098297119 - }, - "stage_counts": { - "stage1_candidates": 37, - "stage1_fallback_used": 1, - "stage2_expanded": 86, - "stage2_unique_paths": 53, - "stage2_duplicate_paths": 33, - "stage3_clustered": 20, - "stage3_strategy": "dir_rr", - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "class Config", - "latency_ms": 2788.0383999943733, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.05263157894736842, - "rbo_topk": 0.014635885139999999, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 8, - "dense_unique_dirs_topk": 4 - }, - { - "query": "def search", - "staged": { - "strategy": "staged", - "query": "def search", - "latency_ms": 12229.477500021458, - "num_results": 3, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\entities.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 108.82282257080078, - "stage2_expand_ms": 9422.304153442383, - "stage3_cluster_ms": 0.001430511474609375, - "stage4_rerank_ms": 2611.234664916992 - }, - "stage_counts": { - "stage1_candidates": 3, - "stage2_expanded": 4, - "stage2_unique_paths": 3, - "stage2_duplicate_paths": 1, - "stage3_clustered": 4, - "stage3_strategy": "dir_rr", - "stage4_reranked": 4 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "def search", - "latency_ms": 2823.377499997616, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.09090909090909091, - "rbo_topk": 0.23541639942571424, - "staged_unique_files_topk": 2, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "LspBridge", - "staged": { - "strategy": "staged", - "query": "LspBridge", - "latency_ms": 33805.434699982405, - "num_results": 5, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 100.5556583404541, - "stage1_fallback_search_ms": 176.71489715576172, - "stage2_expand_ms": 31017.661809921265, - "stage3_cluster_ms": 0.001430511474609375, - "stage4_rerank_ms": 2403.3148288726807 - }, - "stage_counts": { - "stage1_candidates": 5, - "stage1_fallback_used": 1, - "stage2_expanded": 5, - "stage2_unique_paths": 5, - "stage2_duplicate_paths": 0, - "stage3_clustered": 5, - "stage3_strategy": "dir_rr", - "stage4_reranked": 5 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "LspBridge", - "latency_ms": 2906.127400010824, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.07142857142857142, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 5, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "graph expansion", - "staged": { - "strategy": "staged", - "query": "graph expansion", - "latency_ms": 16790.213800013065, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_007_add_graph_neighbors.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\hybrid_search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 110.00967025756836, - "stage1_fallback_search_ms": 176.9556999206543, - "stage2_expand_ms": 13929.782629013062, - "stage3_cluster_ms": 0.45800209045410156, - "stage4_rerank_ms": 2486.6883754730225 - }, - "stage_counts": { - "stage1_candidates": 11, - "stage1_fallback_used": 1, - "stage2_expanded": 29, - "stage2_unique_paths": 14, - "stage2_duplicate_paths": 15, - "stage3_clustered": 20, - "stage3_strategy": "dir_rr", - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "graph expansion", - "latency_ms": 2866.819000005722, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1875, - "rbo_topk": 0.06893318399142857, - "staged_unique_files_topk": 9, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 8, - "dense_unique_dirs_topk": 4 - }, - { - "query": "clustering strategy", - "staged": { - "strategy": "staged", - "query": "clustering strategy", - "latency_ms": 9090.759900003672, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\dbscan_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\noop_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\base.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\hdbscan_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\frequency_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\__init__.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 85.28780937194824, - "stage1_fallback_search_ms": 183.7012767791748, - "stage2_expand_ms": 5557.527780532837, - "stage3_cluster_ms": 0.001430511474609375, - "stage4_rerank_ms": 3164.6268367767334 - }, - "stage_counts": { - "stage1_candidates": 10, - "stage1_fallback_used": 1, - "stage2_expanded": 10, - "stage2_unique_paths": 10, - "stage2_duplicate_paths": 0, - "stage3_clustered": 10, - "stage3_strategy": "dir_rr", - "stage4_reranked": 10 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "clustering strategy", - "latency_ms": 3062.4616000056267, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1111111111111111, - "rbo_topk": 0.04670528456571428, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 3, - "dense_unique_dirs_topk": 4 - }, - { - "query": "error handling", - "staged": { - "strategy": "staged", - "query": "error handling", - "latency_ms": 19777.87659996748, - "num_results": 6, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\rotational_embedder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 65.9482479095459, - "stage1_fallback_search_ms": 181.9770336151123, - "stage2_expand_ms": 16960.813760757446, - "stage3_cluster_ms": 0.00095367431640625, - "stage4_rerank_ms": 2472.1477031707764 - }, - "stage_counts": { - "stage1_candidates": 5, - "stage1_fallback_used": 1, - "stage2_expanded": 13, - "stage2_unique_paths": 6, - "stage2_duplicate_paths": 7, - "stage3_clustered": 13, - "stage3_strategy": "dir_rr", - "stage4_reranked": 13 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "error handling", - "latency_ms": 2854.169200003147, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.07142857142857142, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 5, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - }, - { - "query": "how to parse json", - "staged": { - "strategy": "staged", - "query": "how to parse json", - "latency_ms": 17351.04380002618, - "num_results": 7, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\indexing\\symbol_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\treesitter_parser.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 119.1408634185791, - "stage1_fallback_search_ms": 246.2625503540039, - "stage2_expand_ms": 14137.234449386597, - "stage3_cluster_ms": 0.0011920928955078125, - "stage4_rerank_ms": 2750.417470932007 - }, - "stage_counts": { - "stage1_candidates": 4, - "stage1_fallback_used": 1, - "stage2_expanded": 11, - "stage2_unique_paths": 7, - "stage2_duplicate_paths": 4, - "stage3_clustered": 11, - "stage3_strategy": "dir_rr", - "stage4_reranked": 11 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "how to parse json", - "latency_ms": 3135.3772000074387, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.21428571428571427, - "rbo_topk": 0.16767719827714284, - "staged_unique_files_topk": 7, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 5, - "dense_unique_dirs_topk": 4 - } - ] -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/compare_2026-02-09_dir_rr_fast6.json b/codex-lens/benchmarks/results/compare_2026-02-09_dir_rr_fast6.json deleted file mode 100644 index bdc35197..00000000 --- a/codex-lens/benchmarks/results/compare_2026-02-09_dir_rr_fast6.json +++ /dev/null @@ -1,467 +0,0 @@ -{ - "summary": { - "timestamp": "2026-02-09 20:56:02", - "source": "src", - "k": 10, - "coarse_k": 100, - "query_count": 7, - "avg_jaccard_topk": 0.11350467619264612, - "avg_rbo_topk": 0.09062624799510204, - "staged": { - "success": 7, - "avg_latency_ms": 8679.35167142323 - }, - "dense_rerank": { - "success": 7, - "avg_latency_ms": 3097.294714289052 - } - }, - "comparisons": [ - { - "query": "class Config", - "staged": { - "strategy": "staged", - "query": "class Config", - "latency_ms": 6814.465099990368, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\file_watcher.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 85.55030822753906, - "stage1_fallback_search_ms": 197.95989990234375, - "stage2_expand_ms": 3032.4549674987793, - "stage3_cluster_ms": 1.1937618255615234, - "stage4_rerank_ms": 3402.9476642608643 - }, - "stage_counts": { - "stage1_candidates": 37, - "stage1_fallback_used": 1, - "stage2_expanded": 86, - "stage2_unique_paths": 53, - "stage2_duplicate_paths": 33, - "stage3_clustered": 20, - "stage3_strategy": "dir_rr", - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "class Config", - "latency_ms": 3175.0339000225067, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.05263157894736842, - "rbo_topk": 0.014635885139999999, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 8, - "dense_unique_dirs_topk": 4 - }, - { - "query": "def search", - "staged": { - "strategy": "staged", - "query": "def search", - "latency_ms": 8990.238099992275, - "num_results": 3, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\entities.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 90.6367301940918, - "stage2_expand_ms": 6272.260665893555, - "stage3_cluster_ms": 0.00095367431640625, - "stage4_rerank_ms": 2531.4290523529053 - }, - "stage_counts": { - "stage1_candidates": 3, - "stage2_expanded": 4, - "stage2_unique_paths": 3, - "stage2_duplicate_paths": 1, - "stage3_clustered": 4, - "stage3_strategy": "dir_rr", - "stage4_reranked": 4 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "def search", - "latency_ms": 3434.4095999896526, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.09090909090909091, - "rbo_topk": 0.23541639942571424, - "staged_unique_files_topk": 2, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "LspBridge", - "staged": { - "strategy": "staged", - "query": "LspBridge", - "latency_ms": 9296.205000013113, - "num_results": 7, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\keepalive_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 86.64774894714355, - "stage1_fallback_search_ms": 163.8650894165039, - "stage2_expand_ms": 6144.1497802734375, - "stage3_cluster_ms": 0.4100799560546875, - "stage4_rerank_ms": 2807.274580001831 - }, - "stage_counts": { - "stage1_candidates": 5, - "stage1_fallback_used": 1, - "stage2_expanded": 31, - "stage2_unique_paths": 11, - "stage2_duplicate_paths": 20, - "stage3_clustered": 20, - "stage3_strategy": "dir_rr", - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "LspBridge", - "latency_ms": 3043.4417999982834, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.06666666666666667, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 6, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "graph expansion", - "staged": { - "strategy": "staged", - "query": "graph expansion", - "latency_ms": 9086.15110000968, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_007_add_graph_neighbors.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 72.22437858581543, - "stage1_fallback_search_ms": 166.3804054260254, - "stage2_expand_ms": 6179.303169250488, - "stage3_cluster_ms": 0.00095367431640625, - "stage4_rerank_ms": 2575.9027004241943 - }, - "stage_counts": { - "stage1_candidates": 11, - "stage1_fallback_used": 1, - "stage2_expanded": 16, - "stage2_unique_paths": 13, - "stage2_duplicate_paths": 3, - "stage3_clustered": 16, - "stage3_strategy": "dir_rr", - "stage4_reranked": 16 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "graph expansion", - "latency_ms": 2793.8257000148296, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1875, - "rbo_topk": 0.06134116970571428, - "staged_unique_files_topk": 9, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 7, - "dense_unique_dirs_topk": 4 - }, - { - "query": "clustering strategy", - "staged": { - "strategy": "staged", - "query": "clustering strategy", - "latency_ms": 8401.927499979734, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\noop_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\dbscan_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\base.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\hdbscan_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\frequency_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\__init__.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 72.67880439758301, - "stage1_fallback_search_ms": 166.71442985534668, - "stage2_expand_ms": 5561.89489364624, - "stage3_cluster_ms": 0.0007152557373046875, - "stage4_rerank_ms": 2517.7178382873535 - }, - "stage_counts": { - "stage1_candidates": 10, - "stage1_fallback_used": 1, - "stage2_expanded": 10, - "stage2_unique_paths": 10, - "stage2_duplicate_paths": 0, - "stage3_clustered": 10, - "stage3_strategy": "dir_rr", - "stage4_reranked": 10 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "clustering strategy", - "latency_ms": 3192.0045999884605, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1111111111111111, - "rbo_topk": 0.04670528456571428, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 3, - "dense_unique_dirs_topk": 4 - }, - { - "query": "error handling", - "staged": { - "strategy": "staged", - "query": "error handling", - "latency_ms": 9032.269400000572, - "num_results": 6, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\rotational_embedder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 78.59635353088379, - "stage1_fallback_search_ms": 180.96280097961426, - "stage2_expand_ms": 6175.840377807617, - "stage3_cluster_ms": 0.001430511474609375, - "stage4_rerank_ms": 2503.4260749816895 - }, - "stage_counts": { - "stage1_candidates": 5, - "stage1_fallback_used": 1, - "stage2_expanded": 13, - "stage2_unique_paths": 6, - "stage2_duplicate_paths": 7, - "stage3_clustered": 13, - "stage3_strategy": "dir_rr", - "stage4_reranked": 13 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "error handling", - "latency_ms": 3076.744800001383, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.07142857142857142, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 5, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - }, - { - "query": "how to parse json", - "staged": { - "strategy": "staged", - "query": "how to parse json", - "latency_ms": 9134.205499976873, - "num_results": 7, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\indexing\\symbol_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\treesitter_parser.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 117.79379844665527, - "stage1_fallback_search_ms": 187.53886222839355, - "stage2_expand_ms": 6218.849658966064, - "stage3_cluster_ms": 0.00095367431640625, - "stage4_rerank_ms": 2515.6633853912354 - }, - "stage_counts": { - "stage1_candidates": 4, - "stage1_fallback_used": 1, - "stage2_expanded": 9, - "stage2_unique_paths": 7, - "stage2_duplicate_paths": 2, - "stage3_clustered": 9, - "stage3_strategy": "dir_rr", - "stage4_reranked": 9 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "how to parse json", - "latency_ms": 2965.6026000082493, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.21428571428571427, - "rbo_topk": 0.18590219827714285, - "staged_unique_files_topk": 7, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 5, - "dense_unique_dirs_topk": 4 - } - ] -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/compare_2026-02-09_keepalive3.json b/codex-lens/benchmarks/results/compare_2026-02-09_keepalive3.json deleted file mode 100644 index 759bc32e..00000000 --- a/codex-lens/benchmarks/results/compare_2026-02-09_keepalive3.json +++ /dev/null @@ -1,171 +0,0 @@ -{ - "summary": { - "timestamp": "2026-02-09 19:16:45", - "source": "src", - "k": 10, - "coarse_k": 100, - "query_count": 3, - "avg_jaccard_topk": 0.07165641376167692, - "avg_rbo_topk": 0.10859973275904759, - "staged": { - "success": 3, - "avg_latency_ms": 7919.317766676347 - }, - "dense_rerank": { - "success": 3, - "avg_latency_ms": 2812.574933330218 - } - }, - "comparisons": [ - { - "query": "class Config", - "staged": { - "strategy": "staged", - "query": "class Config", - "latency_ms": 6351.961700022221, - "num_results": 37, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\litellm_reranker.py" - ], - "stage_stats": null, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "class Config", - "latency_ms": 4424.698300004005, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.05263157894736842, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 5, - "dense_unique_dirs_topk": 4 - }, - { - "query": "def search", - "staged": { - "strategy": "staged", - "query": "def search", - "latency_ms": 17239.81479999423, - "num_results": 3, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\entities.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 18.40996742248535, - "stage2_expand_ms": 16024.681329727173, - "stage3_cluster_ms": 0.00095367431640625, - "stage4_rerank_ms": 1160.1319313049316 - }, - "stage_counts": { - "stage1_candidates": 3, - "stage2_expanded": 4, - "stage2_unique_paths": 3, - "stage2_duplicate_paths": 1, - "stage3_clustered": 4, - "stage3_strategy": "score", - "stage4_reranked": 4 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "def search", - "latency_ms": 2086.8772999942303, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.09090909090909091, - "rbo_topk": 0.23541639942571424, - "staged_unique_files_topk": 2, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "LspBridge", - "staged": { - "strategy": "staged", - "query": "LspBridge", - "latency_ms": 166.1768000125885, - "num_results": 5, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py" - ], - "stage_stats": null, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "LspBridge", - "latency_ms": 1926.1491999924183, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.07142857142857142, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 5, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - } - ] -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/compare_2026-02-09_keepalive3b.json b/codex-lens/benchmarks/results/compare_2026-02-09_keepalive3b.json deleted file mode 100644 index dfb1d3e1..00000000 --- a/codex-lens/benchmarks/results/compare_2026-02-09_keepalive3b.json +++ /dev/null @@ -1,171 +0,0 @@ -{ - "summary": { - "timestamp": "2026-02-09 19:19:13", - "source": "src", - "k": 10, - "coarse_k": 100, - "query_count": 3, - "avg_jaccard_topk": 0.07165641376167692, - "avg_rbo_topk": 0.10859973275904759, - "staged": { - "success": 3, - "avg_latency_ms": 8272.264699995518 - }, - "dense_rerank": { - "success": 3, - "avg_latency_ms": 2753.5123999913535 - } - }, - "comparisons": [ - { - "query": "class Config", - "staged": { - "strategy": "staged", - "query": "class Config", - "latency_ms": 6453.665100008249, - "num_results": 37, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\litellm_reranker.py" - ], - "stage_stats": null, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "class Config", - "latency_ms": 4530.146999955177, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.05263157894736842, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 5, - "dense_unique_dirs_topk": 4 - }, - { - "query": "def search", - "staged": { - "strategy": "staged", - "query": "def search", - "latency_ms": 18202.905599981546, - "num_results": 3, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\entities.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 15.580177307128906, - "stage2_expand_ms": 16622.225522994995, - "stage3_cluster_ms": 0.00095367431640625, - "stage4_rerank_ms": 1516.9692039489746 - }, - "stage_counts": { - "stage1_candidates": 3, - "stage2_expanded": 4, - "stage2_unique_paths": 3, - "stage2_duplicate_paths": 1, - "stage3_clustered": 4, - "stage3_strategy": "score", - "stage4_reranked": 4 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "def search", - "latency_ms": 1746.9925000071526, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.09090909090909091, - "rbo_topk": 0.23541639942571424, - "staged_unique_files_topk": 2, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "LspBridge", - "staged": { - "strategy": "staged", - "query": "LspBridge", - "latency_ms": 160.2233999967575, - "num_results": 5, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py" - ], - "stage_stats": null, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "LspBridge", - "latency_ms": 1983.3977000117302, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.07142857142857142, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 5, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - } - ] -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/compare_2026-02-09_run2.json b/codex-lens/benchmarks/results/compare_2026-02-09_run2.json deleted file mode 100644 index 7dc36661..00000000 --- a/codex-lens/benchmarks/results/compare_2026-02-09_run2.json +++ /dev/null @@ -1,453 +0,0 @@ -{ - "summary": { - "timestamp": "2026-02-09 11:26:54", - "source": "src", - "k": 10, - "coarse_k": 100, - "query_count": 7, - "avg_jaccard_topk": 0.39589733329229126, - "avg_rbo_topk": 0.23139636799510202, - "staged": { - "success": 7, - "avg_latency_ms": 32194.107242865222 - }, - "dense_rerank": { - "success": 7, - "avg_latency_ms": 2643.366857132741 - } - }, - "comparisons": [ - { - "query": "class Config", - "staged": { - "strategy": "staged", - "query": "class Config", - "latency_ms": 43041.41250002384, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\file_watcher.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 9864.638805389404, - "stage2_expand_ms": 13012.29190826416, - "stage3_cluster_ms": 13297.565460205078, - "stage4_rerank_ms": 6821.892261505127 - }, - "stage_counts": { - "stage1_candidates": 100, - "stage2_expanded": 149, - "stage3_clustered": 20, - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "class Config", - "latency_ms": 3209.129799991846, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1111111111111111, - "rbo_topk": 0.05429729885142857, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 8, - "dense_unique_dirs_topk": 4 - }, - { - "query": "def search", - "staged": { - "strategy": "staged", - "query": "def search", - "latency_ms": 37827.209600031376, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 531.8794250488281, - "stage2_expand_ms": 27009.481191635132, - "stage3_cluster_ms": 7948.509931564331, - "stage4_rerank_ms": 2268.9380645751953 - }, - "stage_counts": { - "stage1_candidates": 100, - "stage2_expanded": 101, - "stage3_clustered": 20, - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "def search", - "latency_ms": 2540.472400009632, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.26666666666666666, - "rbo_topk": 0.2983708721671428, - "staged_unique_files_topk": 9, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - }, - { - "query": "LspBridge", - "staged": { - "strategy": "staged", - "query": "LspBridge", - "latency_ms": 24744.686599999666, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\merkle_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 517.8542137145996, - "stage2_expand_ms": 12839.622735977173, - "stage3_cluster_ms": 9154.959678649902, - "stage4_rerank_ms": 2160.0701808929443 - }, - "stage_counts": { - "stage1_candidates": 100, - "stage2_expanded": 100, - "stage3_clustered": 20, - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "LspBridge", - "latency_ms": 2482.5908999741077, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.5384615384615384, - "rbo_topk": 0.36639083062285716, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - }, - { - "query": "graph expansion", - "staged": { - "strategy": "staged", - "query": "graph expansion", - "latency_ms": 25239.59050002694, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 631.9081783294678, - "stage2_expand_ms": 12570.756196975708, - "stage3_cluster_ms": 9557.724952697754, - "stage4_rerank_ms": 2409.7683429718018 - }, - "stage_counts": { - "stage1_candidates": 100, - "stage2_expanded": 100, - "stage3_clustered": 20, - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "graph expansion", - "latency_ms": 2574.1938000023365, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.42857142857142855, - "rbo_topk": 0.13728894791142857, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - }, - { - "query": "clustering strategy", - "staged": { - "strategy": "staged", - "query": "clustering strategy", - "latency_ms": 28572.93939998746, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 659.6193313598633, - "stage2_expand_ms": 14207.426309585571, - "stage3_cluster_ms": 11513.370037078857, - "stage4_rerank_ms": 2117.546319961548 - }, - "stage_counts": { - "stage1_candidates": 100, - "stage2_expanded": 100, - "stage3_clustered": 20, - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "clustering strategy", - "latency_ms": 2536.551799982786, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.17647058823529413, - "rbo_topk": 0.07116480920571429, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - }, - { - "query": "error handling", - "staged": { - "strategy": "staged", - "query": "error handling", - "latency_ms": 23812.726000010967, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 475.42428970336914, - "stage2_expand_ms": 12454.935789108276, - "stage3_cluster_ms": 8576.019525527954, - "stage4_rerank_ms": 2265.360116958618 - }, - "stage_counts": { - "stage1_candidates": 100, - "stage2_expanded": 100, - "stage3_clustered": 20, - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "error handling", - "latency_ms": 2648.7773999869823, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.6666666666666666, - "rbo_topk": 0.21230026104857144, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - }, - { - "query": "how to parse json", - "staged": { - "strategy": "staged", - "query": "how to parse json", - "latency_ms": 42120.1860999763, - "num_results": 9, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 570.8920955657959, - "stage2_expand_ms": 30054.06880378723, - "stage3_cluster_ms": 9285.51697731018, - "stage4_rerank_ms": 2142.771005630493 - }, - "stage_counts": { - "stage1_candidates": 100, - "stage2_expanded": 100, - "stage3_clustered": 20, - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "how to parse json", - "latency_ms": 2511.8518999814987, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.5833333333333334, - "rbo_topk": 0.4799615561585714, - "staged_unique_files_topk": 9, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - } - ] -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/compare_2026-02-09_score_fast3.json b/codex-lens/benchmarks/results/compare_2026-02-09_score_fast3.json deleted file mode 100644 index 7e0f1132..00000000 --- a/codex-lens/benchmarks/results/compare_2026-02-09_score_fast3.json +++ /dev/null @@ -1,208 +0,0 @@ -{ - "summary": { - "timestamp": "2026-02-09 17:27:26", - "source": "src", - "k": 10, - "coarse_k": 100, - "query_count": 3, - "avg_jaccard_topk": 0.5809523809523809, - "avg_rbo_topk": 0.31359567182809517, - "staged": { - "success": 3, - "avg_latency_ms": 22826.711433331173 - }, - "dense_rerank": { - "success": 3, - "avg_latency_ms": 2239.804533312718 - } - }, - "comparisons": [ - { - "query": "class Config", - "staged": { - "strategy": "staged", - "query": "class Config", - "latency_ms": 26690.878500014544, - "num_results": 6, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 8534.121036529541, - "stage2_expand_ms": 13298.827648162842, - "stage3_cluster_ms": 0.026226043701171875, - "stage4_rerank_ms": 4805.774688720703 - }, - "stage_counts": { - "stage1_candidates": 100, - "stage2_expanded": 149, - "stage2_unique_paths": 43, - "stage2_duplicate_paths": 106, - "stage3_clustered": 20, - "stage3_strategy": "score", - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "class Config", - "latency_ms": 2416.653799980879, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.14285714285714285, - "rbo_topk": 0.25764429885142853, - "staged_unique_files_topk": 6, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 3, - "dense_unique_dirs_topk": 4 - }, - { - "query": "def search", - "staged": { - "strategy": "staged", - "query": "def search", - "latency_ms": 26188.838399976492, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 525.7587432861328, - "stage2_expand_ms": 23659.400939941406, - "stage3_cluster_ms": 0.021696090698242188, - "stage4_rerank_ms": 1928.950309753418 - }, - "stage_counts": { - "stage1_candidates": 100, - "stage2_expanded": 101, - "stage2_unique_paths": 23, - "stage2_duplicate_paths": 78, - "stage3_clustered": 20, - "stage3_strategy": "score", - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "def search", - "latency_ms": 1953.0992999970913, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.9, - "rbo_topk": 0.39374892065285705, - "staged_unique_files_topk": 9, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - }, - { - "query": "LspBridge", - "staged": { - "strategy": "staged", - "query": "LspBridge", - "latency_ms": 15600.41740000248, - "num_results": 7, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 475.54636001586914, - "stage2_expand_ms": 13318.811893463135, - "stage3_cluster_ms": 0.03218650817871094, - "stage4_rerank_ms": 1755.7547092437744 - }, - "stage_counts": { - "stage1_candidates": 100, - "stage2_expanded": 100, - "stage2_unique_paths": 21, - "stage2_duplicate_paths": 79, - "stage3_clustered": 20, - "stage3_strategy": "score", - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "LspBridge", - "latency_ms": 2349.660499960184, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.7, - "rbo_topk": 0.28939379598, - "staged_unique_files_topk": 7, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 3, - "dense_unique_dirs_topk": 4 - } - ] -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/compare_2026-02-09_score_fast4.json b/codex-lens/benchmarks/results/compare_2026-02-09_score_fast4.json deleted file mode 100644 index ef073667..00000000 --- a/codex-lens/benchmarks/results/compare_2026-02-09_score_fast4.json +++ /dev/null @@ -1,356 +0,0 @@ -{ - "summary": { - "timestamp": "2026-02-09 20:36:02", - "source": "src", - "k": 10, - "coarse_k": 100, - "query_count": 7, - "avg_jaccard_topk": 0.12095811211246858, - "avg_rbo_topk": 0.09594444061244897, - "staged": { - "success": 7, - "avg_latency_ms": 2436.7641000066483 - }, - "dense_rerank": { - "success": 7, - "avg_latency_ms": 2593.7630428629263 - } - }, - "comparisons": [ - { - "query": "class Config", - "staged": { - "strategy": "staged", - "query": "class Config", - "latency_ms": 285.091000020504, - "num_results": 37, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\reranker\\litellm_reranker.py" - ], - "stage_stats": null, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "class Config", - "latency_ms": 2412.1290000081062, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.05263157894736842, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 5, - "dense_unique_dirs_topk": 4 - }, - { - "query": "def search", - "staged": { - "strategy": "staged", - "query": "def search", - "latency_ms": 15029.73520001769, - "num_results": 3, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\entities.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 101.95636749267578, - "stage2_expand_ms": 12690.008640289307, - "stage3_cluster_ms": 0.001430511474609375, - "stage4_rerank_ms": 2155.757427215576 - }, - "stage_counts": { - "stage1_candidates": 3, - "stage2_expanded": 4, - "stage2_unique_paths": 3, - "stage2_duplicate_paths": 1, - "stage3_clustered": 4, - "stage3_strategy": "score", - "stage4_reranked": 4 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "def search", - "latency_ms": 2424.7003000080585, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.09090909090909091, - "rbo_topk": 0.23541639942571424, - "staged_unique_files_topk": 2, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "LspBridge", - "staged": { - "strategy": "staged", - "query": "LspBridge", - "latency_ms": 324.4240999817848, - "num_results": 5, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py" - ], - "stage_stats": null, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "LspBridge", - "latency_ms": 2497.174100011587, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.07142857142857142, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 5, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "graph expansion", - "staged": { - "strategy": "staged", - "query": "graph expansion", - "latency_ms": 359.32159999012947, - "num_results": 11, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_007_add_graph_neighbors.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\hybrid_search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "stage_stats": null, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "graph expansion", - "latency_ms": 2553.8585999906063, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.17647058823529413, - "rbo_topk": 0.06801300374142856, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 7, - "dense_unique_dirs_topk": 4 - }, - { - "query": "clustering strategy", - "staged": { - "strategy": "staged", - "query": "clustering strategy", - "latency_ms": 286.38240000605583, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\noop_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\hdbscan_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\dbscan_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\base.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\frequency_strategy.py" - ], - "stage_stats": null, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "clustering strategy", - "latency_ms": 2570.379099994898, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1111111111111111, - "rbo_topk": 0.04670528456571428, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 3, - "dense_unique_dirs_topk": 4 - }, - { - "query": "error handling", - "staged": { - "strategy": "staged", - "query": "error handling", - "latency_ms": 412.58780002593994, - "num_results": 5, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\rotational_embedder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" - ], - "stage_stats": null, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "error handling", - "latency_ms": 2894.3279000222683, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.07142857142857142, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 5, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - }, - { - "query": "how to parse json", - "staged": { - "strategy": "staged", - "query": "how to parse json", - "latency_ms": 359.8066000044346, - "num_results": 4, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" - ], - "stage_stats": null, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "how to parse json", - "latency_ms": 2803.772300004959, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.2727272727272727, - "rbo_topk": 0.18590219827714285, - "staged_unique_files_topk": 4, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 3, - "dense_unique_dirs_topk": 4 - } - ] -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/compare_2026-02-09_score_fast5.json b/codex-lens/benchmarks/results/compare_2026-02-09_score_fast5.json deleted file mode 100644 index dbea8924..00000000 --- a/codex-lens/benchmarks/results/compare_2026-02-09_score_fast5.json +++ /dev/null @@ -1,462 +0,0 @@ -{ - "summary": { - "timestamp": "2026-02-09 20:45:10", - "source": "src", - "k": 10, - "coarse_k": 100, - "query_count": 7, - "avg_jaccard_topk": 0.1283498247783962, - "avg_rbo_topk": 0.09664773770897958, - "staged": { - "success": 7, - "avg_latency_ms": 16394.152085712976 - }, - "dense_rerank": { - "success": 7, - "avg_latency_ms": 2839.464457145759 - } - }, - "comparisons": [ - { - "query": "class Config", - "staged": { - "strategy": "staged", - "query": "class Config", - "latency_ms": 6233.342700004578, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 125.80323219299316, - "stage1_fallback_search_ms": 277.1914005279541, - "stage2_expand_ms": 3032.3121547698975, - "stage3_cluster_ms": 0.02765655517578125, - "stage4_rerank_ms": 2699.3532180786133 - }, - "stage_counts": { - "stage1_candidates": 37, - "stage1_fallback_used": 1, - "stage2_expanded": 86, - "stage2_unique_paths": 53, - "stage2_duplicate_paths": 33, - "stage3_clustered": 20, - "stage3_strategy": "score", - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "class Config", - "latency_ms": 3036.3474999964237, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.125, - "rbo_topk": 0.06741929885142856, - "staged_unique_files_topk": 8, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 5, - "dense_unique_dirs_topk": 4 - }, - { - "query": "def search", - "staged": { - "strategy": "staged", - "query": "def search", - "latency_ms": 12703.503900021315, - "num_results": 3, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\entities.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 83.4202766418457, - "stage2_expand_ms": 9856.60433769226, - "stage3_cluster_ms": 0.0011920928955078125, - "stage4_rerank_ms": 2664.630174636841 - }, - "stage_counts": { - "stage1_candidates": 3, - "stage2_expanded": 4, - "stage2_unique_paths": 3, - "stage2_duplicate_paths": 1, - "stage3_clustered": 4, - "stage3_strategy": "score", - "stage4_reranked": 4 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "def search", - "latency_ms": 2888.501700013876, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.09090909090909091, - "rbo_topk": 0.23541639942571424, - "staged_unique_files_topk": 2, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "LspBridge", - "staged": { - "strategy": "staged", - "query": "LspBridge", - "latency_ms": 33684.76710000634, - "num_results": 5, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 78.8118839263916, - "stage1_fallback_search_ms": 174.6652126312256, - "stage2_expand_ms": 31018.909692764282, - "stage3_cluster_ms": 0.0016689300537109375, - "stage4_rerank_ms": 2316.9021606445312 - }, - "stage_counts": { - "stage1_candidates": 5, - "stage1_fallback_used": 1, - "stage2_expanded": 5, - "stage2_unique_paths": 5, - "stage2_duplicate_paths": 0, - "stage3_clustered": 5, - "stage3_strategy": "score", - "stage4_reranked": 5 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "LspBridge", - "latency_ms": 2824.729699999094, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.07142857142857142, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 5, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "graph expansion", - "staged": { - "strategy": "staged", - "query": "graph expansion", - "latency_ms": 16910.090099990368, - "num_results": 8, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_007_add_graph_neighbors.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 99.6243953704834, - "stage1_fallback_search_ms": 207.89742469787598, - "stage2_expand_ms": 13929.257154464722, - "stage3_cluster_ms": 0.016927719116210938, - "stage4_rerank_ms": 2586.843729019165 - }, - "stage_counts": { - "stage1_candidates": 11, - "stage1_fallback_used": 1, - "stage2_expanded": 29, - "stage2_unique_paths": 14, - "stage2_duplicate_paths": 15, - "stage3_clustered": 20, - "stage3_strategy": "score", - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "graph expansion", - "latency_ms": 2765.958099991083, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.21428571428571427, - "rbo_topk": 0.06893318399142857, - "staged_unique_files_topk": 7, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 6, - "dense_unique_dirs_topk": 4 - }, - { - "query": "clustering strategy", - "staged": { - "strategy": "staged", - "query": "clustering strategy", - "latency_ms": 8380.20839998126, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\noop_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\dbscan_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\base.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\hdbscan_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\frequency_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\__init__.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 95.42632102966309, - "stage1_fallback_search_ms": 187.4692440032959, - "stage2_expand_ms": 5561.658143997192, - "stage3_cluster_ms": 0.0007152557373046875, - "stage4_rerank_ms": 2441.287040710449 - }, - "stage_counts": { - "stage1_candidates": 10, - "stage1_fallback_used": 1, - "stage2_expanded": 10, - "stage2_unique_paths": 10, - "stage2_duplicate_paths": 0, - "stage3_clustered": 10, - "stage3_strategy": "score", - "stage4_reranked": 10 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "clustering strategy", - "latency_ms": 2788.0665000081062, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1111111111111111, - "rbo_topk": 0.04670528456571428, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 3, - "dense_unique_dirs_topk": 4 - }, - { - "query": "error handling", - "staged": { - "strategy": "staged", - "query": "error handling", - "latency_ms": 19897.71709999442, - "num_results": 6, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\rotational_embedder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 114.1653060913086, - "stage1_fallback_search_ms": 235.73827743530273, - "stage2_expand_ms": 16702.077865600586, - "stage3_cluster_ms": 0.00095367431640625, - "stage4_rerank_ms": 2757.4093341827393 - }, - "stage_counts": { - "stage1_candidates": 5, - "stage1_fallback_used": 1, - "stage2_expanded": 13, - "stage2_unique_paths": 6, - "stage2_duplicate_paths": 7, - "stage3_clustered": 13, - "stage3_strategy": "score", - "stage4_reranked": 13 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "error handling", - "latency_ms": 2874.178600013256, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.07142857142857142, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 5, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - }, - { - "query": "how to parse json", - "staged": { - "strategy": "staged", - "query": "how to parse json", - "latency_ms": 16949.43529999256, - "num_results": 7, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\indexing\\symbol_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\treesitter_parser.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 104.50935363769531, - "stage1_fallback_search_ms": 190.6723976135254, - "stage2_expand_ms": 14165.841102600098, - "stage3_cluster_ms": 0.0011920928955078125, - "stage4_rerank_ms": 2399.226188659668 - }, - "stage_counts": { - "stage1_candidates": 4, - "stage1_fallback_used": 1, - "stage2_expanded": 11, - "stage2_unique_paths": 7, - "stage2_duplicate_paths": 4, - "stage3_clustered": 11, - "stage3_strategy": "score", - "stage4_reranked": 11 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "how to parse json", - "latency_ms": 2698.469099998474, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.21428571428571427, - "rbo_topk": 0.16767719827714284, - "staged_unique_files_topk": 7, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 5, - "dense_unique_dirs_topk": 4 - } - ] -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/compare_2026-02-09_score_fast6.json b/codex-lens/benchmarks/results/compare_2026-02-09_score_fast6.json deleted file mode 100644 index d76156dc..00000000 --- a/codex-lens/benchmarks/results/compare_2026-02-09_score_fast6.json +++ /dev/null @@ -1,465 +0,0 @@ -{ - "summary": { - "timestamp": "2026-02-09 20:53:01", - "source": "src", - "k": 10, - "coarse_k": 100, - "query_count": 7, - "avg_jaccard_topk": 0.12384302205730777, - "avg_rbo_topk": 0.09816673566816325, - "staged": { - "success": 7, - "avg_latency_ms": 8696.564499999795 - }, - "dense_rerank": { - "success": 7, - "avg_latency_ms": 2936.2583857136115 - } - }, - "comparisons": [ - { - "query": "class Config", - "staged": { - "strategy": "staged", - "query": "class Config", - "latency_ms": 6108.304299980402, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 90.47985076904297, - "stage1_fallback_search_ms": 224.38788414001465, - "stage2_expand_ms": 3031.7258834838867, - "stage3_cluster_ms": 0.02956390380859375, - "stage4_rerank_ms": 2655.31849861145 - }, - "stage_counts": { - "stage1_candidates": 37, - "stage1_fallback_used": 1, - "stage2_expanded": 86, - "stage2_unique_paths": 53, - "stage2_duplicate_paths": 33, - "stage3_clustered": 20, - "stage3_strategy": "score", - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "class Config", - "latency_ms": 2873.6466999948025, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.125, - "rbo_topk": 0.06741929885142856, - "staged_unique_files_topk": 8, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 5, - "dense_unique_dirs_topk": 4 - }, - { - "query": "def search", - "staged": { - "strategy": "staged", - "query": "def search", - "latency_ms": 9321.754200011492, - "num_results": 3, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\entities.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 140.43283462524414, - "stage2_expand_ms": 6410.467863082886, - "stage3_cluster_ms": 0.00095367431640625, - "stage4_rerank_ms": 2675.7972240448 - }, - "stage_counts": { - "stage1_candidates": 3, - "stage2_expanded": 4, - "stage2_unique_paths": 3, - "stage2_duplicate_paths": 1, - "stage3_clustered": 4, - "stage3_strategy": "score", - "stage4_reranked": 4 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "def search", - "latency_ms": 3104.7773999869823, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.09090909090909091, - "rbo_topk": 0.23541639942571424, - "staged_unique_files_topk": 2, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "LspBridge", - "staged": { - "strategy": "staged", - "query": "LspBridge", - "latency_ms": 9527.073799997568, - "num_results": 6, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\keepalive_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 98.59919548034668, - "stage1_fallback_search_ms": 172.26457595825195, - "stage2_expand_ms": 6125.282049179077, - "stage3_cluster_ms": 0.017404556274414062, - "stage4_rerank_ms": 3023.9248275756836 - }, - "stage_counts": { - "stage1_candidates": 5, - "stage1_fallback_used": 1, - "stage2_expanded": 31, - "stage2_unique_paths": 11, - "stage2_duplicate_paths": 20, - "stage3_clustered": 20, - "stage3_strategy": "score", - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "LspBridge", - "latency_ms": 2901.0302999913692, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.06666666666666667, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 6, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "graph expansion", - "staged": { - "strategy": "staged", - "query": "graph expansion", - "latency_ms": 9120.886200010777, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_007_add_graph_neighbors.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 91.48454666137695, - "stage1_fallback_search_ms": 172.12390899658203, - "stage2_expand_ms": 6166.24903678894, - "stage3_cluster_ms": 0.00095367431640625, - "stage4_rerank_ms": 2601.947546005249 - }, - "stage_counts": { - "stage1_candidates": 11, - "stage1_fallback_used": 1, - "stage2_expanded": 16, - "stage2_unique_paths": 13, - "stage2_duplicate_paths": 3, - "stage3_clustered": 16, - "stage3_strategy": "score", - "stage4_reranked": 16 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "graph expansion", - "latency_ms": 2847.6964999735355, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1875, - "rbo_topk": 0.06134116970571428, - "staged_unique_files_topk": 9, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 7, - "dense_unique_dirs_topk": 4 - }, - { - "query": "clustering strategy", - "staged": { - "strategy": "staged", - "query": "clustering strategy", - "latency_ms": 8424.535699993372, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\dbscan_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\noop_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\base.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\hdbscan_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\frequency_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\__init__.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 92.8945541381836, - "stage1_fallback_search_ms": 192.06547737121582, - "stage2_expand_ms": 5568.126440048218, - "stage3_cluster_ms": 0.0011920928955078125, - "stage4_rerank_ms": 2480.673313140869 - }, - "stage_counts": { - "stage1_candidates": 10, - "stage1_fallback_used": 1, - "stage2_expanded": 10, - "stage2_unique_paths": 10, - "stage2_duplicate_paths": 0, - "stage3_clustered": 10, - "stage3_strategy": "score", - "stage4_reranked": 10 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "clustering strategy", - "latency_ms": 2974.9999000132084, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1111111111111111, - "rbo_topk": 0.04670528456571428, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 3, - "dense_unique_dirs_topk": 4 - }, - { - "query": "error handling", - "staged": { - "strategy": "staged", - "query": "error handling", - "latency_ms": 9253.624700009823, - "num_results": 6, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\rotational_embedder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 102.18691825866699, - "stage1_fallback_search_ms": 176.97691917419434, - "stage2_expand_ms": 6113.626480102539, - "stage3_cluster_ms": 0.00095367431640625, - "stage4_rerank_ms": 2774.4452953338623 - }, - "stage_counts": { - "stage1_candidates": 5, - "stage1_fallback_used": 1, - "stage2_expanded": 13, - "stage2_unique_paths": 6, - "stage2_duplicate_paths": 7, - "stage3_clustered": 13, - "stage3_strategy": "score", - "stage4_reranked": 13 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "error handling", - "latency_ms": 2860.619900047779, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.07142857142857142, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 5, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - }, - { - "query": "how to parse json", - "staged": { - "strategy": "staged", - "query": "how to parse json", - "latency_ms": 9119.772599995136, - "num_results": 7, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\indexing\\symbol_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\treesitter_parser.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 90.18850326538086, - "stage1_fallback_search_ms": 157.95397758483887, - "stage2_expand_ms": 6293.469429016113, - "stage3_cluster_ms": 0.0011920928955078125, - "stage4_rerank_ms": 2486.8383407592773 - }, - "stage_counts": { - "stage1_candidates": 4, - "stage1_fallback_used": 1, - "stage2_expanded": 9, - "stage2_unique_paths": 7, - "stage2_duplicate_paths": 2, - "stage3_clustered": 9, - "stage3_strategy": "score", - "stage4_reranked": 9 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "how to parse json", - "latency_ms": 2991.0379999876022, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.21428571428571427, - "rbo_topk": 0.18590219827714285, - "staged_unique_files_topk": 7, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 5, - "dense_unique_dirs_topk": 4 - } - ] -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/compare_2026-02-09_score_fast7.json b/codex-lens/benchmarks/results/compare_2026-02-09_score_fast7.json deleted file mode 100644 index e8cb30da..00000000 --- a/codex-lens/benchmarks/results/compare_2026-02-09_score_fast7.json +++ /dev/null @@ -1,465 +0,0 @@ -{ - "summary": { - "timestamp": "2026-02-10 12:23:36", - "source": "src", - "k": 10, - "coarse_k": 100, - "query_count": 7, - "avg_jaccard_topk": 0.12384302205730777, - "avg_rbo_topk": 0.09816673566816325, - "staged": { - "success": 7, - "avg_latency_ms": 3996.4113285754406 - }, - "dense_rerank": { - "success": 7, - "avg_latency_ms": 2780.485200004918 - } - }, - "comparisons": [ - { - "query": "class Config", - "staged": { - "strategy": "staged", - "query": "class Config", - "latency_ms": 2365.3048999905586, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 25.228023529052734, - "stage1_fallback_search_ms": 206.0999870300293, - "stage2_expand_ms": 16.644954681396484, - "stage3_cluster_ms": 0.025987625122070312, - "stage4_rerank_ms": 2064.2504692077637 - }, - "stage_counts": { - "stage1_candidates": 37, - "stage1_fallback_used": 1, - "stage2_expanded": 86, - "stage2_unique_paths": 53, - "stage2_duplicate_paths": 33, - "stage3_clustered": 20, - "stage3_strategy": "score", - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "class Config", - "latency_ms": 2610.047899991274, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.125, - "rbo_topk": 0.06741929885142856, - "staged_unique_files_topk": 8, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 5, - "dense_unique_dirs_topk": 4 - }, - { - "query": "def search", - "staged": { - "strategy": "staged", - "query": "def search", - "latency_ms": 3723.305599987507, - "num_results": 3, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\entities.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 31.742334365844727, - "stage2_expand_ms": 2125.1025199890137, - "stage3_cluster_ms": 0.0007152557373046875, - "stage4_rerank_ms": 1511.4071369171143 - }, - "stage_counts": { - "stage1_candidates": 3, - "stage2_expanded": 4, - "stage2_unique_paths": 3, - "stage2_duplicate_paths": 1, - "stage3_clustered": 4, - "stage3_strategy": "score", - "stage4_reranked": 4 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "def search", - "latency_ms": 2072.4792000055313, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.09090909090909091, - "rbo_topk": 0.23541639942571424, - "staged_unique_files_topk": 2, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "LspBridge", - "staged": { - "strategy": "staged", - "query": "LspBridge", - "latency_ms": 5251.151299983263, - "num_results": 6, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\keepalive_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 32.721757888793945, - "stage1_fallback_search_ms": 195.51420211791992, - "stage2_expand_ms": 2060.0733757019043, - "stage3_cluster_ms": 0.0095367431640625, - "stage4_rerank_ms": 2900.8395671844482 - }, - "stage_counts": { - "stage1_candidates": 5, - "stage1_fallback_used": 1, - "stage2_expanded": 31, - "stage2_unique_paths": 11, - "stage2_duplicate_paths": 20, - "stage3_clustered": 20, - "stage3_strategy": "score", - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "LspBridge", - "latency_ms": 1972.8982000350952, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.06666666666666667, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 6, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "graph expansion", - "staged": { - "strategy": "staged", - "query": "graph expansion", - "latency_ms": 4101.171400010586, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_007_add_graph_neighbors.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 29.141902923583984, - "stage1_fallback_search_ms": 234.2982292175293, - "stage2_expand_ms": 2082.4878215789795, - "stage3_cluster_ms": 0.0011920928955078125, - "stage4_rerank_ms": 1698.7183094024658 - }, - "stage_counts": { - "stage1_candidates": 11, - "stage1_fallback_used": 1, - "stage2_expanded": 16, - "stage2_unique_paths": 13, - "stage2_duplicate_paths": 3, - "stage3_clustered": 16, - "stage3_strategy": "score", - "stage4_reranked": 16 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "graph expansion", - "latency_ms": 2331.9747000038624, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1875, - "rbo_topk": 0.06134116970571428, - "staged_unique_files_topk": 9, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 7, - "dense_unique_dirs_topk": 4 - }, - { - "query": "clustering strategy", - "staged": { - "strategy": "staged", - "query": "clustering strategy", - "latency_ms": 4032.0041000247, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\noop_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\dbscan_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\base.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\hdbscan_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\frequency_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\__init__.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 42.098283767700195, - "stage1_fallback_search_ms": 209.6574306488037, - "stage2_expand_ms": 2053.9097785949707, - "stage3_cluster_ms": 0.00095367431640625, - "stage4_rerank_ms": 1665.3883457183838 - }, - "stage_counts": { - "stage1_candidates": 10, - "stage1_fallback_used": 1, - "stage2_expanded": 10, - "stage2_unique_paths": 10, - "stage2_duplicate_paths": 0, - "stage3_clustered": 10, - "stage3_strategy": "score", - "stage4_reranked": 10 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "clustering strategy", - "latency_ms": 2026.5661999881268, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1111111111111111, - "rbo_topk": 0.04670528456571428, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 3, - "dense_unique_dirs_topk": 4 - }, - { - "query": "error handling", - "staged": { - "strategy": "staged", - "query": "error handling", - "latency_ms": 4237.893900036812, - "num_results": 6, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\rotational_embedder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 64.01538848876953, - "stage1_fallback_search_ms": 225.14033317565918, - "stage2_expand_ms": 2116.3012981414795, - "stage3_cluster_ms": 0.0007152557373046875, - "stage4_rerank_ms": 1776.0803699493408 - }, - "stage_counts": { - "stage1_candidates": 5, - "stage1_fallback_used": 1, - "stage2_expanded": 13, - "stage2_unique_paths": 6, - "stage2_duplicate_paths": 7, - "stage3_clustered": 13, - "stage3_strategy": "score", - "stage4_reranked": 13 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "error handling", - "latency_ms": 2125.935900002718, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.07142857142857142, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 5, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - }, - { - "query": "how to parse json", - "staged": { - "strategy": "staged", - "query": "how to parse json", - "latency_ms": 4264.048099994659, - "num_results": 7, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\indexing\\symbol_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\treesitter_parser.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 31.972646713256836, - "stage1_fallback_search_ms": 235.47840118408203, - "stage2_expand_ms": 2161.5889072418213, - "stage3_cluster_ms": 0.0007152557373046875, - "stage4_rerank_ms": 1768.0847644805908 - }, - "stage_counts": { - "stage1_candidates": 4, - "stage1_fallback_used": 1, - "stage2_expanded": 9, - "stage2_unique_paths": 7, - "stage2_duplicate_paths": 2, - "stage3_clustered": 9, - "stage3_strategy": "score", - "stage4_reranked": 9 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "how to parse json", - "latency_ms": 6323.49430000782, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.21428571428571427, - "rbo_topk": 0.18590219827714285, - "staged_unique_files_topk": 7, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 5, - "dense_unique_dirs_topk": 4 - } - ] -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/compare_2026-02-10_dir_rr_fast7.json b/codex-lens/benchmarks/results/compare_2026-02-10_dir_rr_fast7.json deleted file mode 100644 index 5a176d82..00000000 --- a/codex-lens/benchmarks/results/compare_2026-02-10_dir_rr_fast7.json +++ /dev/null @@ -1,467 +0,0 @@ -{ - "summary": { - "timestamp": "2026-02-10 12:46:47", - "source": "src", - "k": 10, - "coarse_k": 100, - "query_count": 7, - "avg_jaccard_topk": 0.11350467619264612, - "avg_rbo_topk": 0.09062624799510204, - "staged": { - "success": 7, - "avg_latency_ms": 5670.9065000244545 - }, - "dense_rerank": { - "success": 7, - "avg_latency_ms": 3047.475757143327 - } - }, - "comparisons": [ - { - "query": "class Config", - "staged": { - "strategy": "staged", - "query": "class Config", - "latency_ms": 2971.5892000496387, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\file_watcher.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 108.11758041381836, - "stage1_fallback_search_ms": 230.96132278442383, - "stage2_expand_ms": 18.60976219177246, - "stage3_cluster_ms": 1.100301742553711, - "stage4_rerank_ms": 2528.761625289917 - }, - "stage_counts": { - "stage1_candidates": 37, - "stage1_fallback_used": 1, - "stage2_expanded": 86, - "stage2_unique_paths": 53, - "stage2_duplicate_paths": 33, - "stage3_clustered": 20, - "stage3_strategy": "dir_rr", - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "class Config", - "latency_ms": 2937.113800019026, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.05263157894736842, - "rbo_topk": 0.014635885139999999, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 8, - "dense_unique_dirs_topk": 4 - }, - { - "query": "def search", - "staged": { - "strategy": "staged", - "query": "def search", - "latency_ms": 10065.153400033712, - "num_results": 3, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\entities.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 127.17461585998535, - "stage2_expand_ms": 7361.833810806274, - "stage3_cluster_ms": 0.001430511474609375, - "stage4_rerank_ms": 2472.7542400360107 - }, - "stage_counts": { - "stage1_candidates": 3, - "stage2_expanded": 4, - "stage2_unique_paths": 3, - "stage2_duplicate_paths": 1, - "stage3_clustered": 4, - "stage3_strategy": "dir_rr", - "stage4_reranked": 4 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "def search", - "latency_ms": 3059.5018000006676, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.09090909090909091, - "rbo_topk": 0.23541639942571424, - "staged_unique_files_topk": 2, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "LspBridge", - "staged": { - "strategy": "staged", - "query": "LspBridge", - "latency_ms": 5557.314100056887, - "num_results": 7, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\keepalive_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 133.9263916015625, - "stage1_fallback_search_ms": 242.1243190765381, - "stage2_expand_ms": 2106.602430343628, - "stage3_cluster_ms": 0.47016143798828125, - "stage4_rerank_ms": 2967.3829078674316 - }, - "stage_counts": { - "stage1_candidates": 5, - "stage1_fallback_used": 1, - "stage2_expanded": 31, - "stage2_unique_paths": 11, - "stage2_duplicate_paths": 20, - "stage3_clustered": 20, - "stage3_strategy": "dir_rr", - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "LspBridge", - "latency_ms": 3157.7918999791145, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.06666666666666667, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 6, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "graph expansion", - "staged": { - "strategy": "staged", - "query": "graph expansion", - "latency_ms": 5458.670999974012, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_007_add_graph_neighbors.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 113.62957954406738, - "stage1_fallback_search_ms": 204.56886291503906, - "stage2_expand_ms": 2166.4509773254395, - "stage3_cluster_ms": 0.0011920928955078125, - "stage4_rerank_ms": 2872.969627380371 - }, - "stage_counts": { - "stage1_candidates": 11, - "stage1_fallback_used": 1, - "stage2_expanded": 16, - "stage2_unique_paths": 13, - "stage2_duplicate_paths": 3, - "stage3_clustered": 16, - "stage3_strategy": "dir_rr", - "stage4_reranked": 16 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "graph expansion", - "latency_ms": 2896.5341999828815, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1875, - "rbo_topk": 0.06134116970571428, - "staged_unique_files_topk": 9, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 7, - "dense_unique_dirs_topk": 4 - }, - { - "query": "clustering strategy", - "staged": { - "strategy": "staged", - "query": "clustering strategy", - "latency_ms": 5028.861099988222, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\noop_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\dbscan_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\base.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\hdbscan_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\frequency_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\__init__.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 111.71293258666992, - "stage1_fallback_search_ms": 192.02208518981934, - "stage2_expand_ms": 2054.065465927124, - "stage3_cluster_ms": 0.0007152557373046875, - "stage4_rerank_ms": 2579.0507793426514 - }, - "stage_counts": { - "stage1_candidates": 10, - "stage1_fallback_used": 1, - "stage2_expanded": 10, - "stage2_unique_paths": 10, - "stage2_duplicate_paths": 0, - "stage3_clustered": 10, - "stage3_strategy": "dir_rr", - "stage4_reranked": 10 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "clustering strategy", - "latency_ms": 3627.1755999922752, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1111111111111111, - "rbo_topk": 0.04670528456571428, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 3, - "dense_unique_dirs_topk": 4 - }, - { - "query": "error handling", - "staged": { - "strategy": "staged", - "query": "error handling", - "latency_ms": 5114.356300055981, - "num_results": 6, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\rotational_embedder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 135.76626777648926, - "stage1_fallback_search_ms": 211.12942695617676, - "stage2_expand_ms": 2151.059150695801, - "stage3_cluster_ms": 0.00095367431640625, - "stage4_rerank_ms": 2519.892692565918 - }, - "stage_counts": { - "stage1_candidates": 5, - "stage1_fallback_used": 1, - "stage2_expanded": 13, - "stage2_unique_paths": 6, - "stage2_duplicate_paths": 7, - "stage3_clustered": 13, - "stage3_strategy": "dir_rr", - "stage4_reranked": 13 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "error handling", - "latency_ms": 2853.594000041485, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.07142857142857142, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 5, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - }, - { - "query": "how to parse json", - "staged": { - "strategy": "staged", - "query": "how to parse json", - "latency_ms": 5500.400400012732, - "num_results": 7, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\indexing\\symbol_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\treesitter_parser.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 96.66872024536133, - "stage1_fallback_search_ms": 176.37205123901367, - "stage2_expand_ms": 2137.751340866089, - "stage3_cluster_ms": 0.00095367431640625, - "stage4_rerank_ms": 2991.840124130249 - }, - "stage_counts": { - "stage1_candidates": 4, - "stage1_fallback_used": 1, - "stage2_expanded": 9, - "stage2_unique_paths": 7, - "stage2_duplicate_paths": 2, - "stage3_clustered": 9, - "stage3_strategy": "dir_rr", - "stage4_reranked": 9 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "how to parse json", - "latency_ms": 2800.6189999878407, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.21428571428571427, - "rbo_topk": 0.18590219827714285, - "staged_unique_files_topk": 7, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 5, - "dense_unique_dirs_topk": 4 - } - ] -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/compare_2026-02-10_path_fast7.json b/codex-lens/benchmarks/results/compare_2026-02-10_path_fast7.json deleted file mode 100644 index 2038c691..00000000 --- a/codex-lens/benchmarks/results/compare_2026-02-10_path_fast7.json +++ /dev/null @@ -1,465 +0,0 @@ -{ - "summary": { - "timestamp": "2026-02-10 12:52:44", - "source": "src", - "k": 10, - "coarse_k": 100, - "query_count": 7, - "avg_jaccard_topk": 0.13455730777159347, - "avg_rbo_topk": 0.10274807844326529, - "staged": { - "success": 7, - "avg_latency_ms": 4445.262371412346 - }, - "dense_rerank": { - "success": 7, - "avg_latency_ms": 3327.1750857276575 - } - }, - "comparisons": [ - { - "query": "class Config", - "staged": { - "strategy": "staged", - "query": "class Config", - "latency_ms": 2719.7998999655247, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 33.12373161315918, - "stage1_fallback_search_ms": 230.31878471374512, - "stage2_expand_ms": 22.444486618041992, - "stage3_cluster_ms": 0.06079673767089844, - "stage4_rerank_ms": 2338.5443687438965 - }, - "stage_counts": { - "stage1_candidates": 37, - "stage1_fallback_used": 1, - "stage2_expanded": 86, - "stage2_unique_paths": 53, - "stage2_duplicate_paths": 33, - "stage3_clustered": 20, - "stage3_strategy": "path", - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "class Config", - "latency_ms": 2334.8668000102043, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.2, - "rbo_topk": 0.09948869827714285, - "staged_unique_files_topk": 8, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - }, - { - "query": "def search", - "staged": { - "strategy": "staged", - "query": "def search", - "latency_ms": 4470.056899994612, - "num_results": 3, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\entities.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 28.5646915435791, - "stage2_expand_ms": 2216.57133102417, - "stage3_cluster_ms": 0.001430511474609375, - "stage4_rerank_ms": 2131.246566772461 - }, - "stage_counts": { - "stage1_candidates": 3, - "stage2_expanded": 4, - "stage2_unique_paths": 3, - "stage2_duplicate_paths": 1, - "stage3_clustered": 4, - "stage3_strategy": "path", - "stage4_reranked": 4 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "def search", - "latency_ms": 2447.341199964285, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.09090909090909091, - "rbo_topk": 0.23541639942571424, - "staged_unique_files_topk": 2, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "LspBridge", - "staged": { - "strategy": "staged", - "query": "LspBridge", - "latency_ms": 6126.65680000186, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\keepalive_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 25.135278701782227, - "stage1_fallback_search_ms": 171.53453826904297, - "stage2_expand_ms": 2094.9013233184814, - "stage3_cluster_ms": 0.024318695068359375, - "stage4_rerank_ms": 3743.204355239868 - }, - "stage_counts": { - "stage1_candidates": 5, - "stage1_fallback_used": 1, - "stage2_expanded": 31, - "stage2_unique_paths": 11, - "stage2_duplicate_paths": 20, - "stage3_clustered": 11, - "stage3_strategy": "path", - "stage4_reranked": 11 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "LspBridge", - "latency_ms": 9015.508300036192, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.06666666666666667, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 6, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "graph expansion", - "staged": { - "strategy": "staged", - "query": "graph expansion", - "latency_ms": 4319.597599953413, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_007_add_graph_neighbors.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 18.799781799316406, - "stage1_fallback_search_ms": 167.36602783203125, - "stage2_expand_ms": 2101.4957427978516, - "stage3_cluster_ms": 0.00095367431640625, - "stage4_rerank_ms": 1976.8805503845215 - }, - "stage_counts": { - "stage1_candidates": 11, - "stage1_fallback_used": 1, - "stage2_expanded": 16, - "stage2_unique_paths": 13, - "stage2_duplicate_paths": 3, - "stage3_clustered": 16, - "stage3_strategy": "path", - "stage4_reranked": 16 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "graph expansion", - "latency_ms": 2356.994699984789, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1875, - "rbo_topk": 0.06134116970571428, - "staged_unique_files_topk": 9, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 7, - "dense_unique_dirs_topk": 4 - }, - { - "query": "clustering strategy", - "staged": { - "strategy": "staged", - "query": "clustering strategy", - "latency_ms": 4574.691199988127, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\dbscan_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\noop_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\base.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\hdbscan_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\frequency_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\__init__.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 45.72629928588867, - "stage1_fallback_search_ms": 233.0036163330078, - "stage2_expand_ms": 2068.8536167144775, - "stage3_cluster_ms": 0.0007152557373046875, - "stage4_rerank_ms": 2152.9064178466797 - }, - "stage_counts": { - "stage1_candidates": 10, - "stage1_fallback_used": 1, - "stage2_expanded": 10, - "stage2_unique_paths": 10, - "stage2_duplicate_paths": 0, - "stage3_clustered": 10, - "stage3_strategy": "path", - "stage4_reranked": 10 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "clustering strategy", - "latency_ms": 2311.4787000119686, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1111111111111111, - "rbo_topk": 0.04670528456571428, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 3, - "dense_unique_dirs_topk": 4 - }, - { - "query": "error handling", - "staged": { - "strategy": "staged", - "query": "error handling", - "latency_ms": 4616.5374999940395, - "num_results": 6, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\rotational_embedder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 38.83004188537598, - "stage1_fallback_search_ms": 263.0441188812256, - "stage2_expand_ms": 2070.7976818084717, - "stage3_cluster_ms": 0.00095367431640625, - "stage4_rerank_ms": 2133.629083633423 - }, - "stage_counts": { - "stage1_candidates": 5, - "stage1_fallback_used": 1, - "stage2_expanded": 13, - "stage2_unique_paths": 6, - "stage2_duplicate_paths": 7, - "stage3_clustered": 13, - "stage3_strategy": "path", - "stage4_reranked": 13 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "error handling", - "latency_ms": 2337.4413000643253, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.07142857142857142, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 5, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - }, - { - "query": "how to parse json", - "staged": { - "strategy": "staged", - "query": "how to parse json", - "latency_ms": 4289.496699988842, - "num_results": 7, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\indexing\\symbol_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\treesitter_parser.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 34.40546989440918, - "stage1_fallback_search_ms": 231.8587303161621, - "stage2_expand_ms": 2068.8445568084717, - "stage3_cluster_ms": 0.0007152557373046875, - "stage4_rerank_ms": 1850.6083488464355 - }, - "stage_counts": { - "stage1_candidates": 4, - "stage1_fallback_used": 1, - "stage2_expanded": 9, - "stage2_unique_paths": 7, - "stage2_duplicate_paths": 2, - "stage3_clustered": 9, - "stage3_strategy": "path", - "stage4_reranked": 9 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "how to parse json", - "latency_ms": 2486.594600021839, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.21428571428571427, - "rbo_topk": 0.18590219827714285, - "staged_unique_files_topk": 7, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 5, - "dense_unique_dirs_topk": 4 - } - ] -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/compare_2026-02-10_score_fast7.json b/codex-lens/benchmarks/results/compare_2026-02-10_score_fast7.json deleted file mode 100644 index 34cb9d64..00000000 --- a/codex-lens/benchmarks/results/compare_2026-02-10_score_fast7.json +++ /dev/null @@ -1,465 +0,0 @@ -{ - "summary": { - "timestamp": "2026-02-10 12:44:24", - "source": "src", - "k": 10, - "coarse_k": 100, - "query_count": 7, - "avg_jaccard_topk": 0.12384302205730777, - "avg_rbo_topk": 0.09816673566816325, - "staged": { - "success": 7, - "avg_latency_ms": 4603.035771421024 - }, - "dense_rerank": { - "success": 7, - "avg_latency_ms": 2776.139728575945 - } - }, - "comparisons": [ - { - "query": "class Config", - "staged": { - "strategy": "staged", - "query": "class Config", - "latency_ms": 3544.4309000074863, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 34.082651138305664, - "stage1_fallback_search_ms": 217.52095222473145, - "stage2_expand_ms": 18.847942352294922, - "stage3_cluster_ms": 0.031948089599609375, - "stage4_rerank_ms": 3176.4564514160156 - }, - "stage_counts": { - "stage1_candidates": 37, - "stage1_fallback_used": 1, - "stage2_expanded": 86, - "stage2_unique_paths": 53, - "stage2_duplicate_paths": 33, - "stage3_clustered": 20, - "stage3_strategy": "score", - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "class Config", - "latency_ms": 3075.5329999923706, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.125, - "rbo_topk": 0.06741929885142856, - "staged_unique_files_topk": 8, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 5, - "dense_unique_dirs_topk": 4 - }, - { - "query": "def search", - "staged": { - "strategy": "staged", - "query": "def search", - "latency_ms": 4371.493600010872, - "num_results": 3, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\entities.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 29.517173767089844, - "stage2_expand_ms": 2236.224412918091, - "stage3_cluster_ms": 0.0007152557373046875, - "stage4_rerank_ms": 1998.866319656372 - }, - "stage_counts": { - "stage1_candidates": 3, - "stage2_expanded": 4, - "stage2_unique_paths": 3, - "stage2_duplicate_paths": 1, - "stage3_clustered": 4, - "stage3_strategy": "score", - "stage4_reranked": 4 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "def search", - "latency_ms": 2334.758200019598, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.09090909090909091, - "rbo_topk": 0.23541639942571424, - "staged_unique_files_topk": 2, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "LspBridge", - "staged": { - "strategy": "staged", - "query": "LspBridge", - "latency_ms": 4143.470999985933, - "num_results": 6, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\keepalive_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 20.66636085510254, - "stage1_fallback_search_ms": 150.6054401397705, - "stage2_expand_ms": 2064.2361640930176, - "stage3_cluster_ms": 0.012159347534179688, - "stage4_rerank_ms": 1838.1483554840088 - }, - "stage_counts": { - "stage1_candidates": 5, - "stage1_fallback_used": 1, - "stage2_expanded": 31, - "stage2_unique_paths": 11, - "stage2_duplicate_paths": 20, - "stage3_clustered": 20, - "stage3_strategy": "score", - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "LspBridge", - "latency_ms": 2207.86700001359, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.06666666666666667, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 6, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "graph expansion", - "staged": { - "strategy": "staged", - "query": "graph expansion", - "latency_ms": 4234.638899981976, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_007_add_graph_neighbors.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 21.48127555847168, - "stage1_fallback_search_ms": 153.59735488891602, - "stage2_expand_ms": 2092.521905899048, - "stage3_cluster_ms": 0.00095367431640625, - "stage4_rerank_ms": 1876.7595291137695 - }, - "stage_counts": { - "stage1_candidates": 11, - "stage1_fallback_used": 1, - "stage2_expanded": 16, - "stage2_unique_paths": 13, - "stage2_duplicate_paths": 3, - "stage3_clustered": 16, - "stage3_strategy": "score", - "stage4_reranked": 16 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "graph expansion", - "latency_ms": 2646.9266000390053, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1875, - "rbo_topk": 0.06134116970571428, - "staged_unique_files_topk": 9, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 7, - "dense_unique_dirs_topk": 4 - }, - { - "query": "clustering strategy", - "staged": { - "strategy": "staged", - "query": "clustering strategy", - "latency_ms": 4778.165899991989, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\dbscan_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\noop_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\base.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\hdbscan_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\frequency_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\__init__.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 18.590688705444336, - "stage1_fallback_search_ms": 195.90282440185547, - "stage2_expand_ms": 2053.685426712036, - "stage3_cluster_ms": 0.00095367431640625, - "stage4_rerank_ms": 2431.095838546753 - }, - "stage_counts": { - "stage1_candidates": 10, - "stage1_fallback_used": 1, - "stage2_expanded": 10, - "stage2_unique_paths": 10, - "stage2_duplicate_paths": 0, - "stage3_clustered": 10, - "stage3_strategy": "score", - "stage4_reranked": 10 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "clustering strategy", - "latency_ms": 2887.1304000020027, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1111111111111111, - "rbo_topk": 0.04670528456571428, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 3, - "dense_unique_dirs_topk": 4 - }, - { - "query": "error handling", - "staged": { - "strategy": "staged", - "query": "error handling", - "latency_ms": 5823.889799982309, - "num_results": 6, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\rotational_embedder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 109.02619361877441, - "stage1_fallback_search_ms": 196.54059410095215, - "stage2_expand_ms": 2088.4640216827393, - "stage3_cluster_ms": 0.00095367431640625, - "stage4_rerank_ms": 3328.0465602874756 - }, - "stage_counts": { - "stage1_candidates": 5, - "stage1_fallback_used": 1, - "stage2_expanded": 13, - "stage2_unique_paths": 6, - "stage2_duplicate_paths": 7, - "stage3_clustered": 13, - "stage3_strategy": "score", - "stage4_reranked": 13 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "error handling", - "latency_ms": 3351.872999995947, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.07142857142857142, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 5, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - }, - { - "query": "how to parse json", - "staged": { - "strategy": "staged", - "query": "how to parse json", - "latency_ms": 5325.160299986601, - "num_results": 7, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\indexing\\symbol_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\treesitter_parser.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 216.71128273010254, - "stage1_fallback_search_ms": 295.27878761291504, - "stage2_expand_ms": 2091.4883613586426, - "stage3_cluster_ms": 0.001430511474609375, - "stage4_rerank_ms": 2606.9161891937256 - }, - "stage_counts": { - "stage1_candidates": 4, - "stage1_fallback_used": 1, - "stage2_expanded": 9, - "stage2_unique_paths": 7, - "stage2_duplicate_paths": 2, - "stage3_clustered": 9, - "stage3_strategy": "score", - "stage4_reranked": 9 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "how to parse json", - "latency_ms": 2928.889899969101, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.21428571428571427, - "rbo_topk": 0.18590219827714285, - "staged_unique_files_topk": 7, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 5, - "dense_unique_dirs_topk": 4 - } - ] -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/compare_2026-02-11_dir_rr_fast7.json b/codex-lens/benchmarks/results/compare_2026-02-11_dir_rr_fast7.json deleted file mode 100644 index 61b1475f..00000000 --- a/codex-lens/benchmarks/results/compare_2026-02-11_dir_rr_fast7.json +++ /dev/null @@ -1,467 +0,0 @@ -{ - "summary": { - "timestamp": "2026-02-11 15:16:08", - "source": "codex-lens\\src", - "k": 10, - "coarse_k": 100, - "query_count": 7, - "avg_jaccard_topk": 0.11350467619264612, - "avg_rbo_topk": 0.09062624799510204, - "staged": { - "success": 7, - "avg_latency_ms": 4507.475014303412 - }, - "dense_rerank": { - "success": 7, - "avg_latency_ms": 2537.8563000304357 - } - }, - "comparisons": [ - { - "query": "class Config", - "staged": { - "strategy": "staged", - "query": "class Config", - "latency_ms": 2474.800100028515, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\file_watcher.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 91.76826477050781, - "stage1_fallback_search_ms": 162.45269775390625, - "stage2_expand_ms": 14.957904815673828, - "stage3_cluster_ms": 0.8461475372314453, - "stage4_rerank_ms": 2129.7342777252197 - }, - "stage_counts": { - "stage1_candidates": 37, - "stage1_fallback_used": 1, - "stage2_expanded": 86, - "stage2_unique_paths": 53, - "stage2_duplicate_paths": 33, - "stage3_clustered": 20, - "stage3_strategy": "dir_rr", - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "class Config", - "latency_ms": 2425.3046000003815, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.05263157894736842, - "rbo_topk": 0.014635885139999999, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 8, - "dense_unique_dirs_topk": 4 - }, - { - "query": "def search", - "staged": { - "strategy": "staged", - "query": "def search", - "latency_ms": 5389.070900022984, - "num_results": 3, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\entities.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 63.6446475982666, - "stage2_expand_ms": 3202.108144760132, - "stage3_cluster_ms": 0.0007152557373046875, - "stage4_rerank_ms": 2011.8708610534668 - }, - "stage_counts": { - "stage1_candidates": 3, - "stage2_expanded": 4, - "stage2_unique_paths": 3, - "stage2_duplicate_paths": 1, - "stage3_clustered": 4, - "stage3_strategy": "dir_rr", - "stage4_reranked": 4 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "def search", - "latency_ms": 2465.9148000478745, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.09090909090909091, - "rbo_topk": 0.23541639942571424, - "staged_unique_files_topk": 2, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "LspBridge", - "staged": { - "strategy": "staged", - "query": "LspBridge", - "latency_ms": 4989.407700002193, - "num_results": 7, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\keepalive_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 88.54341506958008, - "stage1_fallback_search_ms": 125.9164810180664, - "stage2_expand_ms": 2063.6398792266846, - "stage3_cluster_ms": 0.3476142883300781, - "stage4_rerank_ms": 2633.7506771087646 - }, - "stage_counts": { - "stage1_candidates": 5, - "stage1_fallback_used": 1, - "stage2_expanded": 31, - "stage2_unique_paths": 11, - "stage2_duplicate_paths": 20, - "stage3_clustered": 20, - "stage3_strategy": "dir_rr", - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "LspBridge", - "latency_ms": 2424.8579000234604, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.06666666666666667, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 6, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "graph expansion", - "staged": { - "strategy": "staged", - "query": "graph expansion", - "latency_ms": 4771.1614000201225, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_007_add_graph_neighbors.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 61.426401138305664, - "stage1_fallback_search_ms": 152.01711654663086, - "stage2_expand_ms": 2078.4833431243896, - "stage3_cluster_ms": 0.0007152557373046875, - "stage4_rerank_ms": 2376.2998580932617 - }, - "stage_counts": { - "stage1_candidates": 11, - "stage1_fallback_used": 1, - "stage2_expanded": 16, - "stage2_unique_paths": 13, - "stage2_duplicate_paths": 3, - "stage3_clustered": 16, - "stage3_strategy": "dir_rr", - "stage4_reranked": 16 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "graph expansion", - "latency_ms": 2418.981700003147, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1875, - "rbo_topk": 0.06134116970571428, - "staged_unique_files_topk": 9, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 7, - "dense_unique_dirs_topk": 4 - }, - { - "query": "clustering strategy", - "staged": { - "strategy": "staged", - "query": "clustering strategy", - "latency_ms": 4559.269900023937, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\noop_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\dbscan_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\base.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\hdbscan_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\frequency_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\__init__.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 60.93573570251465, - "stage1_fallback_search_ms": 141.4163112640381, - "stage2_expand_ms": 2032.2721004486084, - "stage3_cluster_ms": 0.0007152557373046875, - "stage4_rerank_ms": 2217.2317504882812 - }, - "stage_counts": { - "stage1_candidates": 10, - "stage1_fallback_used": 1, - "stage2_expanded": 10, - "stage2_unique_paths": 10, - "stage2_duplicate_paths": 0, - "stage3_clustered": 10, - "stage3_strategy": "dir_rr", - "stage4_reranked": 10 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "clustering strategy", - "latency_ms": 2443.3700000047684, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1111111111111111, - "rbo_topk": 0.04670528456571428, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 3, - "dense_unique_dirs_topk": 4 - }, - { - "query": "error handling", - "staged": { - "strategy": "staged", - "query": "error handling", - "latency_ms": 4757.269500017166, - "num_results": 6, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\rotational_embedder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 89.56503868103027, - "stage1_fallback_search_ms": 143.58854293823242, - "stage2_expand_ms": 2119.623899459839, - "stage3_cluster_ms": 0.0007152557373046875, - "stage4_rerank_ms": 2303.9650917053223 - }, - "stage_counts": { - "stage1_candidates": 5, - "stage1_fallback_used": 1, - "stage2_expanded": 13, - "stage2_unique_paths": 6, - "stage2_duplicate_paths": 7, - "stage3_clustered": 13, - "stage3_strategy": "dir_rr", - "stage4_reranked": 13 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "error handling", - "latency_ms": 2431.0521000623703, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.07142857142857142, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 5, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - }, - { - "query": "how to parse json", - "staged": { - "strategy": "staged", - "query": "how to parse json", - "latency_ms": 4611.3456000089645, - "num_results": 7, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\indexing\\symbol_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\treesitter_parser.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 74.86128807067871, - "stage1_fallback_search_ms": 137.465238571167, - "stage2_expand_ms": 2086.426019668579, - "stage3_cluster_ms": 0.00095367431640625, - "stage4_rerank_ms": 2218.2157039642334 - }, - "stage_counts": { - "stage1_candidates": 4, - "stage1_fallback_used": 1, - "stage2_expanded": 9, - "stage2_unique_paths": 7, - "stage2_duplicate_paths": 2, - "stage3_clustered": 9, - "stage3_strategy": "dir_rr", - "stage4_reranked": 9 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "how to parse json", - "latency_ms": 3155.5130000710487, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.21428571428571427, - "rbo_topk": 0.18590219827714285, - "staged_unique_files_topk": 7, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 5, - "dense_unique_dirs_topk": 4 - } - ] -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/compare_2026-02-11_path_fast7.json b/codex-lens/benchmarks/results/compare_2026-02-11_path_fast7.json deleted file mode 100644 index e9a2d65e..00000000 --- a/codex-lens/benchmarks/results/compare_2026-02-11_path_fast7.json +++ /dev/null @@ -1,465 +0,0 @@ -{ - "summary": { - "timestamp": "2026-02-11 15:12:41", - "source": "codex-lens\\src", - "k": 10, - "coarse_k": 100, - "query_count": 7, - "avg_jaccard_topk": 0.13455730777159347, - "avg_rbo_topk": 0.10274807844326529, - "staged": { - "success": 7, - "avg_latency_ms": 4532.43382857527 - }, - "dense_rerank": { - "success": 7, - "avg_latency_ms": 2712.3431142909185 - } - }, - "comparisons": [ - { - "query": "class Config", - "staged": { - "strategy": "staged", - "query": "class Config", - "latency_ms": 2704.6869000196457, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 56.32758140563965, - "stage1_fallback_search_ms": 156.8472385406494, - "stage2_expand_ms": 15.436887741088867, - "stage3_cluster_ms": 0.04291534423828125, - "stage4_rerank_ms": 2388.756513595581 - }, - "stage_counts": { - "stage1_candidates": 37, - "stage1_fallback_used": 1, - "stage2_expanded": 86, - "stage2_unique_paths": 53, - "stage2_duplicate_paths": 33, - "stage3_clustered": 20, - "stage3_strategy": "path", - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "class Config", - "latency_ms": 3257.856599986553, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.2, - "rbo_topk": 0.09948869827714285, - "staged_unique_files_topk": 8, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - }, - { - "query": "def search", - "staged": { - "strategy": "staged", - "query": "def search", - "latency_ms": 4347.2081000208855, - "num_results": 3, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\entities.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 65.37723541259766, - "stage2_expand_ms": 2145.587682723999, - "stage3_cluster_ms": 0.00095367431640625, - "stage4_rerank_ms": 2052.9236793518066 - }, - "stage_counts": { - "stage1_candidates": 3, - "stage2_expanded": 4, - "stage2_unique_paths": 3, - "stage2_duplicate_paths": 1, - "stage3_clustered": 4, - "stage3_strategy": "path", - "stage4_reranked": 4 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "def search", - "latency_ms": 2642.404200077057, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.09090909090909091, - "rbo_topk": 0.23541639942571424, - "staged_unique_files_topk": 2, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "LspBridge", - "staged": { - "strategy": "staged", - "query": "LspBridge", - "latency_ms": 4627.254400074482, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\keepalive_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 96.67634963989258, - "stage1_fallback_search_ms": 162.25123405456543, - "stage2_expand_ms": 2071.5224742889404, - "stage3_cluster_ms": 0.018835067749023438, - "stage4_rerank_ms": 2211.8191719055176 - }, - "stage_counts": { - "stage1_candidates": 5, - "stage1_fallback_used": 1, - "stage2_expanded": 31, - "stage2_unique_paths": 11, - "stage2_duplicate_paths": 20, - "stage3_clustered": 11, - "stage3_strategy": "path", - "stage4_reranked": 11 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "LspBridge", - "latency_ms": 2479.5284999608994, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.06666666666666667, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 6, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "graph expansion", - "staged": { - "strategy": "staged", - "query": "graph expansion", - "latency_ms": 4663.639899969101, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_007_add_graph_neighbors.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 82.36384391784668, - "stage1_fallback_search_ms": 158.2353115081787, - "stage2_expand_ms": 2087.8846645355225, - "stage3_cluster_ms": 0.00095367431640625, - "stage4_rerank_ms": 2249.4378089904785 - }, - "stage_counts": { - "stage1_candidates": 11, - "stage1_fallback_used": 1, - "stage2_expanded": 16, - "stage2_unique_paths": 13, - "stage2_duplicate_paths": 3, - "stage3_clustered": 16, - "stage3_strategy": "path", - "stage4_reranked": 16 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "graph expansion", - "latency_ms": 2455.024599969387, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1875, - "rbo_topk": 0.06134116970571428, - "staged_unique_files_topk": 9, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 7, - "dense_unique_dirs_topk": 4 - }, - { - "query": "clustering strategy", - "staged": { - "strategy": "staged", - "query": "clustering strategy", - "latency_ms": 6402.90189999342, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\dbscan_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\noop_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\base.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\hdbscan_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\frequency_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\__init__.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 44.295310974121094, - "stage1_fallback_search_ms": 127.30145454406738, - "stage2_expand_ms": 2030.930995941162, - "stage3_cluster_ms": 0.00095367431640625, - "stage4_rerank_ms": 4132.822036743164 - }, - "stage_counts": { - "stage1_candidates": 10, - "stage1_fallback_used": 1, - "stage2_expanded": 10, - "stage2_unique_paths": 10, - "stage2_duplicate_paths": 0, - "stage3_clustered": 10, - "stage3_strategy": "path", - "stage4_reranked": 10 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "clustering strategy", - "latency_ms": 3286.4142000079155, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1111111111111111, - "rbo_topk": 0.04670528456571428, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 3, - "dense_unique_dirs_topk": 4 - }, - { - "query": "error handling", - "staged": { - "strategy": "staged", - "query": "error handling", - "latency_ms": 4532.2757999897, - "num_results": 6, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\rotational_embedder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 85.02960205078125, - "stage1_fallback_search_ms": 146.46339416503906, - "stage2_expand_ms": 2071.5532302856445, - "stage3_cluster_ms": 0.00095367431640625, - "stage4_rerank_ms": 2140.7644748687744 - }, - "stage_counts": { - "stage1_candidates": 5, - "stage1_fallback_used": 1, - "stage2_expanded": 13, - "stage2_unique_paths": 6, - "stage2_duplicate_paths": 7, - "stage3_clustered": 13, - "stage3_strategy": "path", - "stage4_reranked": 13 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "error handling", - "latency_ms": 2349.7827999591827, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.07142857142857142, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 5, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - }, - { - "query": "how to parse json", - "staged": { - "strategy": "staged", - "query": "how to parse json", - "latency_ms": 4449.06979995966, - "num_results": 7, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\indexing\\symbol_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\treesitter_parser.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 67.15631484985352, - "stage1_fallback_search_ms": 148.30541610717773, - "stage2_expand_ms": 2069.3678855895996, - "stage3_cluster_ms": 0.0007152557373046875, - "stage4_rerank_ms": 2097.882032394409 - }, - "stage_counts": { - "stage1_candidates": 4, - "stage1_fallback_used": 1, - "stage2_expanded": 9, - "stage2_unique_paths": 7, - "stage2_duplicate_paths": 2, - "stage3_clustered": 9, - "stage3_strategy": "path", - "stage4_reranked": 9 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "how to parse json", - "latency_ms": 2515.3909000754356, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.21428571428571427, - "rbo_topk": 0.18590219827714285, - "staged_unique_files_topk": 7, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 5, - "dense_unique_dirs_topk": 4 - } - ] -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/compare_2026-02-11_score_fast7.json b/codex-lens/benchmarks/results/compare_2026-02-11_score_fast7.json deleted file mode 100644 index 1ff3e084..00000000 --- a/codex-lens/benchmarks/results/compare_2026-02-11_score_fast7.json +++ /dev/null @@ -1,465 +0,0 @@ -{ - "summary": { - "timestamp": "2026-02-11 15:14:25", - "source": "codex-lens\\src", - "k": 10, - "coarse_k": 100, - "query_count": 7, - "avg_jaccard_topk": 0.12384302205730777, - "avg_rbo_topk": 0.09816673566816325, - "staged": { - "success": 7, - "avg_latency_ms": 4538.7477714674815 - }, - "dense_rerank": { - "success": 7, - "avg_latency_ms": 2568.1517999768257 - } - }, - "comparisons": [ - { - "query": "class Config", - "staged": { - "strategy": "staged", - "query": "class Config", - "latency_ms": 2546.395000040531, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\semantic.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 70.5413818359375, - "stage1_fallback_search_ms": 165.39907455444336, - "stage2_expand_ms": 15.58542251586914, - "stage3_cluster_ms": 0.020265579223632812, - "stage4_rerank_ms": 2209.89727973938 - }, - "stage_counts": { - "stage1_candidates": 37, - "stage1_fallback_used": 1, - "stage2_expanded": 86, - "stage2_unique_paths": 53, - "stage2_duplicate_paths": 33, - "stage3_clustered": 20, - "stage3_strategy": "score", - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "class Config", - "latency_ms": 2610.328099966049, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.125, - "rbo_topk": 0.06741929885142856, - "staged_unique_files_topk": 8, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 5, - "dense_unique_dirs_topk": 4 - }, - { - "query": "def search", - "staged": { - "strategy": "staged", - "query": "def search", - "latency_ms": 4569.872200012207, - "num_results": 3, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\entities.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 96.31776809692383, - "stage2_expand_ms": 2299.86310005188, - "stage3_cluster_ms": 0.0007152557373046875, - "stage4_rerank_ms": 2094.2182540893555 - }, - "stage_counts": { - "stage1_candidates": 3, - "stage2_expanded": 4, - "stage2_unique_paths": 3, - "stage2_duplicate_paths": 1, - "stage3_clustered": 4, - "stage3_strategy": "score", - "stage4_reranked": 4 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "def search", - "latency_ms": 2509.9732999801636, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\query_parser.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.09090909090909091, - "rbo_topk": 0.23541639942571424, - "staged_unique_files_topk": 2, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "LspBridge", - "staged": { - "strategy": "staged", - "query": "LspBridge", - "latency_ms": 5064.990800082684, - "num_results": 6, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\keepalive_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\__init__.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 86.1806869506836, - "stage1_fallback_search_ms": 150.21824836730957, - "stage2_expand_ms": 2080.6803703308105, - "stage3_cluster_ms": 0.011682510375976562, - "stage4_rerank_ms": 2663.7954711914062 - }, - "stage_counts": { - "stage1_candidates": 5, - "stage1_fallback_used": 1, - "stage2_expanded": 31, - "stage2_unique_paths": 11, - "stage2_duplicate_paths": 20, - "stage3_clustered": 20, - "stage3_strategy": "score", - "stage4_reranked": 20 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "LspBridge", - "latency_ms": 2778.6906000375748, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.06666666666666667, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 6, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 2, - "dense_unique_dirs_topk": 4 - }, - { - "query": "graph expansion", - "staged": { - "strategy": "staged", - "query": "graph expansion", - "latency_ms": 4816.586899995804, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migrations\\migration_007_add_graph_neighbors.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_graph_builder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 79.48184013366699, - "stage1_fallback_search_ms": 158.03027153015137, - "stage2_expand_ms": 2087.271213531494, - "stage3_cluster_ms": 0.0007152557373046875, - "stage4_rerank_ms": 2410.567283630371 - }, - "stage_counts": { - "stage1_candidates": 11, - "stage1_fallback_used": 1, - "stage2_expanded": 16, - "stage2_unique_paths": 13, - "stage2_duplicate_paths": 3, - "stage3_clustered": 16, - "stage3_strategy": "score", - "stage4_reranked": 16 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "graph expansion", - "latency_ms": 2692.1504999399185, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1875, - "rbo_topk": 0.06134116970571428, - "staged_unique_files_topk": 9, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 7, - "dense_unique_dirs_topk": 4 - }, - { - "query": "clustering strategy", - "staged": { - "strategy": "staged", - "query": "clustering strategy", - "latency_ms": 4494.9805000424385, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\config.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\dbscan_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\noop_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\base.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\hdbscan_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\frequency_strategy.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\clustering\\__init__.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 40.569305419921875, - "stage1_fallback_search_ms": 141.06035232543945, - "stage2_expand_ms": 2043.9364910125732, - "stage3_cluster_ms": 0.00095367431640625, - "stage4_rerank_ms": 2198.4200477600098 - }, - "stage_counts": { - "stage1_candidates": 10, - "stage1_fallback_used": 1, - "stage2_expanded": 10, - "stage2_unique_paths": 10, - "stage2_duplicate_paths": 0, - "stage3_clustered": 10, - "stage3_strategy": "score", - "stage4_reranked": 10 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "clustering strategy", - "latency_ms": 2474.2726999521255, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1111111111111111, - "rbo_topk": 0.04670528456571428, - "staged_unique_files_topk": 10, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 3, - "dense_unique_dirs_topk": 4 - }, - { - "query": "error handling", - "staged": { - "strategy": "staged", - "query": "error handling", - "latency_ms": 5652.523400068283, - "num_results": 6, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\lsp_bridge.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\gpu_support.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\rotational_embedder.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 87.34393119812012, - "stage1_fallback_search_ms": 149.7325897216797, - "stage2_expand_ms": 2072.728157043457, - "stage3_cluster_ms": 0.00095367431640625, - "stage4_rerank_ms": 3190.687894821167 - }, - "stage_counts": { - "stage1_candidates": 5, - "stage1_fallback_used": 1, - "stage2_expanded": 13, - "stage2_unique_paths": 6, - "stage2_duplicate_paths": 7, - "stage3_clustered": 13, - "stage3_strategy": "score", - "stage4_reranked": 13 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "error handling", - "latency_ms": 2481.709800004959, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.07142857142857142, - "rbo_topk": 0.045191399425714276, - "staged_unique_files_topk": 5, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 4 - }, - { - "query": "how to parse json", - "staged": { - "strategy": "staged", - "query": "how to parse json", - "latency_ms": 4625.885600030422, - "num_results": 7, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\standalone_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\factory.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\indexing\\symbol_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\parsers\\treesitter_parser.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 92.83590316772461, - "stage1_fallback_search_ms": 147.12858200073242, - "stage2_expand_ms": 2061.2568855285645, - "stage3_cluster_ms": 0.0011920928955078125, - "stage4_rerank_ms": 2246.800184249878 - }, - "stage_counts": { - "stage1_candidates": 4, - "stage1_fallback_used": 1, - "stage2_expanded": 9, - "stage2_unique_paths": 7, - "stage2_duplicate_paths": 2, - "stage3_clustered": 9, - "stage3_strategy": "score", - "stage4_reranked": 9 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "how to parse json", - "latency_ms": 2429.9375999569893, - "num_results": 10, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\chunker.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\ann_index.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.21428571428571427, - "rbo_topk": 0.18590219827714285, - "staged_unique_files_topk": 7, - "dense_unique_files_topk": 10, - "staged_unique_dirs_topk": 5, - "dense_unique_dirs_topk": 4 - } - ] -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/method_contribution_analysis.json b/codex-lens/benchmarks/results/method_contribution_analysis.json deleted file mode 100644 index f192b4fa..00000000 --- a/codex-lens/benchmarks/results/method_contribution_analysis.json +++ /dev/null @@ -1,406 +0,0 @@ -{ - "storage_analysis": { - "tables": { - "code_relationships": { - "row_count": 0, - "columns": [ - "id", - "source_symbol_id", - "target_qualified_name", - "relationship_type", - "source_line", - "target_file" - ] - }, - "embeddings_config": { - "row_count": 1, - "columns": [ - "id", - "model_profile", - "model_name", - "embedding_dim", - "backend", - "created_at", - "updated_at" - ] - }, - "file_keywords": { - "row_count": 0, - "columns": [ - "file_id", - "keyword_id" - ] - }, - "files": { - "row_count": 0, - "columns": [ - "id", - "name", - "full_path", - "language", - "content", - "mtime", - "line_count" - ] - }, - "files_fts_exact": { - "row_count": 0, - "columns": [ - "name", - "full_path", - "content" - ] - }, - "files_fts_exact_config": { - "row_count": 1, - "columns": [ - "k", - "v" - ] - }, - "files_fts_exact_data": { - "row_count": 2, - "columns": [ - "id", - "block" - ] - }, - "files_fts_exact_docsize": { - "row_count": 0, - "columns": [ - "id", - "sz" - ] - }, - "files_fts_exact_idx": { - "row_count": 0, - "columns": [ - "segid", - "term", - "pgno" - ] - }, - "files_fts_fuzzy": { - "row_count": 0, - "columns": [ - "name", - "full_path", - "content" - ] - }, - "files_fts_fuzzy_config": { - "row_count": 1, - "columns": [ - "k", - "v" - ] - }, - "files_fts_fuzzy_data": { - "row_count": 2, - "columns": [ - "id", - "block" - ] - }, - "files_fts_fuzzy_docsize": { - "row_count": 0, - "columns": [ - "id", - "sz" - ] - }, - "files_fts_fuzzy_idx": { - "row_count": 0, - "columns": [ - "segid", - "term", - "pgno" - ] - }, - "graph_neighbors": { - "row_count": 0, - "columns": [ - "source_symbol_id", - "neighbor_symbol_id", - "relationship_depth" - ] - }, - "keywords": { - "row_count": 0, - "columns": [ - "id", - "keyword" - ] - }, - "merkle_hashes": { - "row_count": 0, - "columns": [ - "file_id", - "sha256", - "updated_at" - ] - }, - "merkle_state": { - "row_count": 1, - "columns": [ - "id", - "root_hash", - "updated_at" - ] - }, - "semantic_chunks": { - "row_count": 0, - "columns": [ - "id", - "file_path", - "content", - "embedding", - "metadata", - "created_at", - "embedding_binary", - "embedding_dense" - ] - }, - "semantic_metadata": { - "row_count": 0, - "columns": [ - "id", - "file_id", - "summary", - "purpose", - "llm_tool", - "generated_at" - ] - }, - "sqlite_sequence": { - "row_count": 0, - "columns": [ - "name", - "seq" - ] - }, - "subdirs": { - "row_count": 2, - "columns": [ - "id", - "name", - "index_path", - "files_count", - "last_updated" - ] - }, - "symbols": { - "row_count": 0, - "columns": [ - "id", - "file_id", - "name", - "kind", - "start_line", - "end_line" - ] - } - }, - "conflicts": [], - "recommendations": [ - "Found 10 FTS tables: ['files_fts_exact', 'files_fts_exact_config', 'files_fts_exact_data', 'files_fts_exact_docsize', 'files_fts_exact_idx', 'files_fts_fuzzy', 'files_fts_fuzzy_config', 'files_fts_fuzzy_data', 'files_fts_fuzzy_docsize', 'files_fts_fuzzy_idx']. Dual FTS (exact + fuzzy) is properly configured." - ] - }, - "contribution_analysis": { - "per_query": [ - { - "query": "binary quantization", - "methods": { - "fts_exact": { - "error": "'obj' object has no attribute 'symbol_boost_factor'", - "count": 0 - }, - "fts_fuzzy": { - "error": "'obj' object has no attribute 'symbol_boost_factor'", - "count": 0 - }, - "vector": { - "error": "'obj' object has no attribute 'symbol_boost_factor'", - "count": 0 - }, - "splade": { - "error": "'obj' object has no attribute 'symbol_boost_factor'", - "count": 0 - } - }, - "fusion_analysis": {}, - "overlaps": {} - }, - { - "query": "hamming distance search", - "methods": { - "fts_exact": { - "error": "'obj' object has no attribute 'symbol_boost_factor'", - "count": 0 - }, - "fts_fuzzy": { - "error": "'obj' object has no attribute 'symbol_boost_factor'", - "count": 0 - }, - "vector": { - "error": "'obj' object has no attribute 'symbol_boost_factor'", - "count": 0 - }, - "splade": { - "error": "'obj' object has no attribute 'symbol_boost_factor'", - "count": 0 - } - }, - "fusion_analysis": {}, - "overlaps": {} - }, - { - "query": "embeddings generation", - "methods": { - "fts_exact": { - "error": "'obj' object has no attribute 'symbol_boost_factor'", - "count": 0 - }, - "fts_fuzzy": { - "error": "'obj' object has no attribute 'symbol_boost_factor'", - "count": 0 - }, - "vector": { - "error": "'obj' object has no attribute 'symbol_boost_factor'", - "count": 0 - }, - "splade": { - "error": "'obj' object has no attribute 'symbol_boost_factor'", - "count": 0 - } - }, - "fusion_analysis": {}, - "overlaps": {} - }, - { - "query": "reranking algorithm", - "methods": { - "fts_exact": { - "error": "'obj' object has no attribute 'symbol_boost_factor'", - "count": 0 - }, - "fts_fuzzy": { - "error": "'obj' object has no attribute 'symbol_boost_factor'", - "count": 0 - }, - "vector": { - "error": "'obj' object has no attribute 'symbol_boost_factor'", - "count": 0 - }, - "splade": { - "error": "'obj' object has no attribute 'symbol_boost_factor'", - "count": 0 - } - }, - "fusion_analysis": {}, - "overlaps": {} - }, - { - "query": "database connection handling", - "methods": { - "fts_exact": { - "error": "'obj' object has no attribute 'symbol_boost_factor'", - "count": 0 - }, - "fts_fuzzy": { - "error": "'obj' object has no attribute 'symbol_boost_factor'", - "count": 0 - }, - "vector": { - "error": "'obj' object has no attribute 'symbol_boost_factor'", - "count": 0 - }, - "splade": { - "error": "'obj' object has no attribute 'symbol_boost_factor'", - "count": 0 - } - }, - "fusion_analysis": {}, - "overlaps": {} - } - ], - "summary": { - "fts_exact": { - "avg_count": 0.0, - "avg_latency_ms": 0 - }, - "fts_fuzzy": { - "avg_count": 0.0, - "avg_latency_ms": 0 - }, - "vector": { - "avg_count": 0.0, - "avg_latency_ms": 0 - }, - "splade": { - "avg_count": 0.0, - "avg_latency_ms": 0 - } - } - }, - "fusion_experiment": { - "per_query": [ - { - "query": "binary quantization", - "strategies": { - "standard_hybrid": { - "error": "'obj' object has no attribute 'symbol_boost_factor'" - }, - "fts_rerank_fusion": { - "error": "'obj' object has no attribute 'symbol_boost_factor'" - } - } - }, - { - "query": "hamming distance search", - "strategies": { - "standard_hybrid": { - "error": "'obj' object has no attribute 'symbol_boost_factor'" - }, - "fts_rerank_fusion": { - "error": "'obj' object has no attribute 'symbol_boost_factor'" - } - } - }, - { - "query": "embeddings generation", - "strategies": { - "standard_hybrid": { - "error": "'obj' object has no attribute 'symbol_boost_factor'" - }, - "fts_rerank_fusion": { - "error": "'obj' object has no attribute 'symbol_boost_factor'" - } - } - }, - { - "query": "reranking algorithm", - "strategies": { - "standard_hybrid": { - "error": "'obj' object has no attribute 'symbol_boost_factor'" - }, - "fts_rerank_fusion": { - "error": "'obj' object has no attribute 'symbol_boost_factor'" - } - } - }, - { - "query": "database connection handling", - "strategies": { - "standard_hybrid": { - "error": "'obj' object has no attribute 'symbol_boost_factor'" - }, - "fts_rerank_fusion": { - "error": "'obj' object has no attribute 'symbol_boost_factor'" - } - } - } - ], - "summary": {} - } -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/tmp_compare1.json b/codex-lens/benchmarks/results/tmp_compare1.json deleted file mode 100644 index d8b14058..00000000 --- a/codex-lens/benchmarks/results/tmp_compare1.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "summary": { - "timestamp": "2026-02-08 23:48:26", - "source": "src", - "k": 5, - "coarse_k": 50, - "query_count": 1, - "avg_jaccard_topk": 0.0, - "avg_rbo_topk": 0.0, - "staged": { - "success": 1, - "avg_latency_ms": 30093.97499999404 - }, - "dense_rerank": { - "success": 1, - "avg_latency_ms": 331.4424999952316 - } - }, - "comparisons": [ - { - "query": "class Config", - "staged": { - "strategy": "staged", - "query": "class Config", - "latency_ms": 30093.97499999404, - "num_results": 5, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\__init__.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\api\\references.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 6421.706914901733, - "stage2_expand_ms": 17591.988563537598, - "stage3_cluster_ms": 3700.4549503326416, - "stage4_rerank_ms": 2340.064525604248 - }, - "stage_counts": { - "stage1_candidates": 50, - "stage2_expanded": 99, - "stage3_clustered": 10, - "stage4_reranked": 10 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "class Config", - "latency_ms": 331.4424999952316, - "num_results": 5, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\splade_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.0, - "rbo_topk": 0.0, - "staged_unique_files_topk": 5, - "dense_unique_files_topk": 5, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 1 - } - ] -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/tmp_compare3_ok_cpu.json b/codex-lens/benchmarks/results/tmp_compare3_ok_cpu.json deleted file mode 100644 index 550e0b19..00000000 --- a/codex-lens/benchmarks/results/tmp_compare3_ok_cpu.json +++ /dev/null @@ -1,177 +0,0 @@ -{ - "summary": { - "timestamp": "2026-02-08 23:58:56", - "source": "src", - "k": 5, - "coarse_k": 50, - "query_count": 3, - "avg_jaccard_topk": 0.11574074074074074, - "avg_rbo_topk": 0.14601366666666662, - "staged": { - "success": 3, - "avg_latency_ms": 27868.044033328693 - }, - "dense_rerank": { - "success": 3, - "avg_latency_ms": 1339.25289999942 - } - }, - "comparisons": [ - { - "query": "class Config", - "staged": { - "strategy": "staged", - "query": "class Config", - "latency_ms": 33643.06179998815, - "num_results": 5, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\incremental_indexer.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 6201.4524936676025, - "stage2_expand_ms": 17306.61702156067, - "stage3_cluster_ms": 6829.557418823242, - "stage4_rerank_ms": 3267.071485519409 - }, - "stage_counts": { - "stage1_candidates": 50, - "stage2_expanded": 99, - "stage3_clustered": 10, - "stage4_reranked": 10 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "class Config", - "latency_ms": 1520.9955999851227, - "num_results": 5, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\splade_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1111111111111111, - "rbo_topk": 0.031347, - "staged_unique_files_topk": 5, - "dense_unique_files_topk": 5, - "staged_unique_dirs_topk": 5, - "dense_unique_dirs_topk": 1 - }, - { - "query": "def search", - "staged": { - "strategy": "staged", - "query": "def search", - "latency_ms": 26400.58900000155, - "num_results": 5, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 404.60920333862305, - "stage2_expand_ms": 20036.258697509766, - "stage3_cluster_ms": 4919.439315795898, - "stage4_rerank_ms": 1001.8632411956787 - }, - "stage_counts": { - "stage1_candidates": 50, - "stage2_expanded": 51, - "stage3_clustered": 10, - "stage4_reranked": 10 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "def search", - "latency_ms": 1264.3862999975681, - "num_results": 5, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.125, - "rbo_topk": 0.20334699999999994, - "staged_unique_files_topk": 4, - "dense_unique_files_topk": 5, - "staged_unique_dirs_topk": 3, - "dense_unique_dirs_topk": 2 - }, - { - "query": "LspBridge", - "staged": { - "strategy": "staged", - "query": "LspBridge", - "latency_ms": 23560.481299996376, - "num_results": 5, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 385.28990745544434, - "stage2_expand_ms": 17787.648677825928, - "stage3_cluster_ms": 4374.642372131348, - "stage4_rerank_ms": 974.8115539550781 - }, - "stage_counts": { - "stage1_candidates": 50, - "stage2_expanded": 50, - "stage3_clustered": 10, - "stage4_reranked": 10 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "LspBridge", - "latency_ms": 1232.3768000155687, - "num_results": 5, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1111111111111111, - "rbo_topk": 0.20334699999999994, - "staged_unique_files_topk": 5, - "dense_unique_files_topk": 5, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 1 - } - ] -} \ No newline at end of file diff --git a/codex-lens/benchmarks/results/tmp_compare3_ok_cpu_dedup.json b/codex-lens/benchmarks/results/tmp_compare3_ok_cpu_dedup.json deleted file mode 100644 index 26c837ec..00000000 --- a/codex-lens/benchmarks/results/tmp_compare3_ok_cpu_dedup.json +++ /dev/null @@ -1,176 +0,0 @@ -{ - "summary": { - "timestamp": "2026-02-09 00:08:47", - "source": "src", - "k": 5, - "coarse_k": 50, - "query_count": 3, - "avg_jaccard_topk": 0.11574074074074074, - "avg_rbo_topk": 0.14601366666666662, - "staged": { - "success": 3, - "avg_latency_ms": 31720.555866663653 - }, - "dense_rerank": { - "success": 3, - "avg_latency_ms": 1401.2113333245118 - } - }, - "comparisons": [ - { - "query": "class Config", - "staged": { - "strategy": "staged", - "query": "class Config", - "latency_ms": 40162.88519999385, - "num_results": 5, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\embedding_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\watcher\\incremental_indexer.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\lsp\\server.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 6091.366767883301, - "stage2_expand_ms": 17540.942907333374, - "stage3_cluster_ms": 13169.558048248291, - "stage4_rerank_ms": 3317.5392150878906 - }, - "stage_counts": { - "stage1_candidates": 50, - "stage2_expanded": 99, - "stage3_clustered": 10, - "stage4_reranked": 10 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "class Config", - "latency_ms": 1571.1398999989033, - "num_results": 5, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\migration_manager.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\splade_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\sqlite_store.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1111111111111111, - "rbo_topk": 0.031347, - "staged_unique_files_topk": 5, - "dense_unique_files_topk": 5, - "staged_unique_dirs_topk": 5, - "dense_unique_dirs_topk": 1 - }, - { - "query": "def search", - "staged": { - "strategy": "staged", - "query": "def search", - "latency_ms": 31623.380899995565, - "num_results": 5, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\vector_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 400.84290504455566, - "stage2_expand_ms": 20529.58631515503, - "stage3_cluster_ms": 9625.348806381226, - "stage4_rerank_ms": 1027.686357498169 - }, - "stage_counts": { - "stage1_candidates": 50, - "stage2_expanded": 51, - "stage3_clustered": 10, - "stage4_reranked": 10 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "def search", - "latency_ms": 1376.3304999768734, - "num_results": 5, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\chain_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\graph_expander.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\hybrid_search.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\ranking.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.125, - "rbo_topk": 0.20334699999999994, - "staged_unique_files_topk": 4, - "dense_unique_files_topk": 5, - "staged_unique_dirs_topk": 3, - "dense_unique_dirs_topk": 2 - }, - { - "query": "LspBridge", - "staged": { - "strategy": "staged", - "query": "LspBridge", - "latency_ms": 23375.40150000155, - "num_results": 5, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\cli\\commands.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\vector_meta_store.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\semantic\\code_extractor.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\search\\enrichment.py" - ], - "stage_stats": { - "stage_times": { - "stage1_binary_ms": 392.41671562194824, - "stage2_expand_ms": 17760.897397994995, - "stage3_cluster_ms": 4194.235563278198, - "stage4_rerank_ms": 990.307092666626 - }, - "stage_counts": { - "stage1_candidates": 50, - "stage2_expanded": 50, - "stage3_clustered": 10, - "stage4_reranked": 10 - } - }, - "error": null - }, - "dense_rerank": { - "strategy": "dense_rerank", - "query": "LspBridge", - "latency_ms": 1256.1635999977589, - "num_results": 5, - "topk_paths": [ - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\dir_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\global_index.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\index_tree.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\path_mapper.py", - "d:\\claude_dms3\\codex-lens\\src\\codexlens\\storage\\registry.py" - ], - "stage_stats": null, - "error": null - }, - "jaccard_topk": 0.1111111111111111, - "rbo_topk": 0.20334699999999994, - "staged_unique_files_topk": 5, - "dense_unique_files_topk": 5, - "staged_unique_dirs_topk": 4, - "dense_unique_dirs_topk": 1 - } - ] -} \ No newline at end of file diff --git a/codex-lens/coir_benchmark_full.py b/codex-lens/coir_benchmark_full.py deleted file mode 100644 index 193b6043..00000000 --- a/codex-lens/coir_benchmark_full.py +++ /dev/null @@ -1,465 +0,0 @@ -""" -CoIR Benchmark Evaluation Report Generator - -Compares SPLADE with mainstream code retrieval models on CoIR benchmark tasks. -Generates comprehensive performance analysis report. -""" -import sys -import time -import json -from pathlib import Path -from datetime import datetime -from typing import Dict, List, Tuple -import numpy as np - -sys.path.insert(0, 'src') - -# ============================================================================= -# REFERENCE: Published CoIR Benchmark Scores (NDCG@10) -# Source: CoIR Paper (ACL 2025) - https://arxiv.org/abs/2407.02883 -# ============================================================================= - -COIR_REFERENCE_SCORES = { - # Model: {dataset: NDCG@10 score} - "Voyage-Code-002": { - "APPS": 26.52, "CosQA": 29.79, "Text2SQL": 69.26, "CodeSearchNet": 81.79, - "CCR": 73.45, "Contest-DL": 72.77, "StackOverflow": 27.28, - "FB-ST": 87.68, "FB-MT": 65.35, "Average": 56.26 - }, - "E5-Mistral-7B": { - "APPS": 21.33, "CosQA": 31.27, "Text2SQL": 65.98, "CodeSearchNet": 54.25, - "CCR": 65.27, "Contest-DL": 82.55, "StackOverflow": 33.24, - "FB-ST": 91.54, "FB-MT": 72.71, "Average": 55.18 - }, - "E5-Base": { - "APPS": 11.52, "CosQA": 32.59, "Text2SQL": 52.31, "CodeSearchNet": 67.99, - "CCR": 56.87, "Contest-DL": 62.50, "StackOverflow": 21.87, - "FB-ST": 86.86, "FB-MT": 74.52, "Average": 50.90 - }, - "OpenAI-Ada-002": { - "APPS": 8.70, "CosQA": 28.88, "Text2SQL": 58.32, "CodeSearchNet": 74.21, - "CCR": 69.13, "Contest-DL": 53.34, "StackOverflow": 26.04, - "FB-ST": 72.40, "FB-MT": 47.12, "Average": 45.59 - }, - "BGE-Base": { - "APPS": 4.05, "CosQA": 32.76, "Text2SQL": 45.59, "CodeSearchNet": 69.60, - "CCR": 45.56, "Contest-DL": 38.50, "StackOverflow": 21.71, - "FB-ST": 73.55, "FB-MT": 64.99, "Average": 42.77 - }, - "BGE-M3": { - "APPS": 7.37, "CosQA": 22.73, "Text2SQL": 48.76, "CodeSearchNet": 43.23, - "CCR": 47.55, "Contest-DL": 47.86, "StackOverflow": 31.16, - "FB-ST": 61.04, "FB-MT": 49.94, "Average": 39.31 - }, - "UniXcoder": { - "APPS": 1.36, "CosQA": 25.14, "Text2SQL": 50.45, "CodeSearchNet": 60.20, - "CCR": 58.36, "Contest-DL": 41.82, "StackOverflow": 31.03, - "FB-ST": 44.67, "FB-MT": 36.02, "Average": 37.33 - }, - "GTE-Base": { - "APPS": 3.24, "CosQA": 30.24, "Text2SQL": 46.19, "CodeSearchNet": 43.35, - "CCR": 35.50, "Contest-DL": 33.81, "StackOverflow": 28.80, - "FB-ST": 62.71, "FB-MT": 55.19, "Average": 36.75 - }, - "Contriever": { - "APPS": 5.14, "CosQA": 14.21, "Text2SQL": 45.46, "CodeSearchNet": 34.72, - "CCR": 35.74, "Contest-DL": 44.16, "StackOverflow": 24.21, - "FB-ST": 66.05, "FB-MT": 55.11, "Average": 36.40 - }, -} - -# Recent models (2025) -RECENT_MODELS = { - "Voyage-Code-3": {"Average": 62.5, "note": "13.8% better than OpenAI-v3-large"}, - "SFR-Embedding-Code-7B": {"Average": 67.4, "note": "#1 on CoIR (Feb 2025)"}, - "Jina-Code-v2": {"CosQA": 41.0, "note": "Strong on CosQA"}, - "CodeSage-Large": {"Average": 53.5, "note": "Specialized code model"}, -} - - -# ============================================================================= -# TEST DATA: Synthetic CoIR-like datasets for local evaluation -# ============================================================================= - -def create_test_datasets(): - """Create synthetic test datasets mimicking CoIR task types.""" - - # Text-to-Code (like CosQA, CodeSearchNet) - text_to_code = { - "name": "Text-to-Code", - "description": "Natural language queries to code snippets", - "corpus": [ - {"id": "c1", "text": "def authenticate_user(username: str, password: str) -> bool:\n user = db.get_user(username)\n if user and verify_hash(password, user.password_hash):\n return True\n return False"}, - {"id": "c2", "text": "async function fetchUserData(userId) {\n const response = await fetch(`/api/users/${userId}`);\n if (!response.ok) throw new Error('User not found');\n return response.json();\n}"}, - {"id": "c3", "text": "def calculate_statistics(data: List[float]) -> Dict[str, float]:\n return {\n 'mean': np.mean(data),\n 'std': np.std(data),\n 'median': np.median(data)\n }"}, - {"id": "c4", "text": "SELECT u.id, u.name, u.email, COUNT(o.id) as order_count\nFROM users u LEFT JOIN orders o ON u.id = o.user_id\nWHERE u.status = 'active'\nGROUP BY u.id, u.name, u.email"}, - {"id": "c5", "text": "def merge_sort(arr: List[int]) -> List[int]:\n if len(arr) <= 1:\n return arr\n mid = len(arr) // 2\n left = merge_sort(arr[:mid])\n right = merge_sort(arr[mid:])\n return merge(left, right)"}, - {"id": "c6", "text": "app.post('/api/auth/login', async (req, res) => {\n const { email, password } = req.body;\n const user = await User.findByEmail(email);\n if (!user || !await bcrypt.compare(password, user.password)) {\n return res.status(401).json({ error: 'Invalid credentials' });\n }\n const token = jwt.sign({ userId: user.id }, process.env.JWT_SECRET);\n res.json({ token });\n});"}, - {"id": "c7", "text": "CREATE TABLE products (\n id SERIAL PRIMARY KEY,\n name VARCHAR(255) NOT NULL,\n price DECIMAL(10, 2) NOT NULL,\n category_id INTEGER REFERENCES categories(id),\n created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP\n);"}, - {"id": "c8", "text": "def read_json_file(filepath: str) -> Dict:\n with open(filepath, 'r', encoding='utf-8') as f:\n return json.load(f)"}, - {"id": "c9", "text": "class UserRepository:\n def __init__(self, session):\n self.session = session\n \n def find_by_email(self, email: str) -> Optional[User]:\n return self.session.query(User).filter(User.email == email).first()"}, - {"id": "c10", "text": "try:\n result = await process_data(input_data)\nexcept ValidationError as e:\n logger.error(f'Validation failed: {e}')\n raise HTTPException(status_code=400, detail=str(e))\nexcept DatabaseError as e:\n logger.critical(f'Database error: {e}')\n raise HTTPException(status_code=500, detail='Internal server error')"}, - ], - "queries": [ - {"id": "q1", "text": "function to verify user password and authenticate", "relevant": ["c1", "c6"]}, - {"id": "q2", "text": "async http request to fetch user data", "relevant": ["c2"]}, - {"id": "q3", "text": "calculate mean median standard deviation statistics", "relevant": ["c3"]}, - {"id": "q4", "text": "SQL query join users and orders count", "relevant": ["c4", "c7"]}, - {"id": "q5", "text": "recursive sorting algorithm implementation", "relevant": ["c5"]}, - {"id": "q6", "text": "REST API login endpoint with JWT token", "relevant": ["c6", "c1"]}, - {"id": "q7", "text": "create database table with foreign key", "relevant": ["c7"]}, - {"id": "q8", "text": "read and parse JSON file python", "relevant": ["c8"]}, - {"id": "q9", "text": "repository pattern find user by email", "relevant": ["c9", "c1"]}, - {"id": "q10", "text": "exception handling with logging", "relevant": ["c10"]}, - ] - } - - # Code-to-Code (like CCR) - code_to_code = { - "name": "Code-to-Code", - "description": "Find similar code implementations", - "corpus": [ - {"id": "c1", "text": "def add(a, b): return a + b"}, - {"id": "c2", "text": "function sum(x, y) { return x + y; }"}, - {"id": "c3", "text": "func add(a int, b int) int { return a + b }"}, - {"id": "c4", "text": "def subtract(a, b): return a - b"}, - {"id": "c5", "text": "def multiply(a, b): return a * b"}, - {"id": "c6", "text": "const add = (a, b) => a + b;"}, - {"id": "c7", "text": "fn add(a: i32, b: i32) -> i32 { a + b }"}, - {"id": "c8", "text": "public int add(int a, int b) { return a + b; }"}, - ], - "queries": [ - {"id": "q1", "text": "def add(a, b): return a + b", "relevant": ["c1", "c2", "c3", "c6", "c7", "c8"]}, - {"id": "q2", "text": "def subtract(x, y): return x - y", "relevant": ["c4"]}, - {"id": "q3", "text": "def mult(x, y): return x * y", "relevant": ["c5"]}, - ] - } - - # Text2SQL - text2sql = { - "name": "Text2SQL", - "description": "Natural language to SQL queries", - "corpus": [ - {"id": "c1", "text": "SELECT * FROM users WHERE active = 1"}, - {"id": "c2", "text": "SELECT COUNT(*) FROM orders WHERE status = 'pending'"}, - {"id": "c3", "text": "SELECT u.name, SUM(o.total) FROM users u JOIN orders o ON u.id = o.user_id GROUP BY u.name"}, - {"id": "c4", "text": "UPDATE products SET price = price * 1.1 WHERE category = 'electronics'"}, - {"id": "c5", "text": "DELETE FROM sessions WHERE expires_at < NOW()"}, - {"id": "c6", "text": "INSERT INTO users (name, email) VALUES ('John', 'john@example.com')"}, - ], - "queries": [ - {"id": "q1", "text": "get all active users", "relevant": ["c1"]}, - {"id": "q2", "text": "count pending orders", "relevant": ["c2"]}, - {"id": "q3", "text": "total order amount by user", "relevant": ["c3"]}, - {"id": "q4", "text": "increase electronics prices by 10%", "relevant": ["c4"]}, - {"id": "q5", "text": "remove expired sessions", "relevant": ["c5"]}, - {"id": "q6", "text": "add new user", "relevant": ["c6"]}, - ] - } - - return [text_to_code, code_to_code, text2sql] - - -# ============================================================================= -# EVALUATION FUNCTIONS -# ============================================================================= - -def ndcg_at_k(ranked_list: List[str], relevant: List[str], k: int = 10) -> float: - """Calculate NDCG@k.""" - dcg = 0.0 - for i, doc_id in enumerate(ranked_list[:k]): - if doc_id in relevant: - dcg += 1.0 / np.log2(i + 2) - - # Ideal DCG - ideal_k = min(len(relevant), k) - idcg = sum(1.0 / np.log2(i + 2) for i in range(ideal_k)) - - return dcg / idcg if idcg > 0 else 0.0 - - -def precision_at_k(ranked_list: List[str], relevant: List[str], k: int = 10) -> float: - """Calculate Precision@k.""" - retrieved = set(ranked_list[:k]) - relevant_set = set(relevant) - return len(retrieved & relevant_set) / k - - -def recall_at_k(ranked_list: List[str], relevant: List[str], k: int = 10) -> float: - """Calculate Recall@k.""" - retrieved = set(ranked_list[:k]) - relevant_set = set(relevant) - return len(retrieved & relevant_set) / len(relevant_set) if relevant_set else 0.0 - - -def mrr(ranked_list: List[str], relevant: List[str]) -> float: - """Calculate Mean Reciprocal Rank.""" - for i, doc_id in enumerate(ranked_list): - if doc_id in relevant: - return 1.0 / (i + 1) - return 0.0 - - -def evaluate_model(model_name: str, encode_fn, datasets: List[Dict]) -> Dict: - """Evaluate a model on all datasets.""" - results = {} - - for dataset in datasets: - corpus = dataset["corpus"] - queries = dataset["queries"] - - corpus_ids = [doc["id"] for doc in corpus] - corpus_texts = [doc["text"] for doc in corpus] - corpus_embs = encode_fn(corpus_texts) - - metrics = {"ndcg@10": [], "precision@10": [], "recall@10": [], "mrr": []} - - for query in queries: - query_emb = encode_fn([query["text"]])[0] - - # Compute similarity scores - if hasattr(corpus_embs, 'shape') and len(corpus_embs.shape) == 2: - # Dense vectors - cosine similarity - q_norm = query_emb / (np.linalg.norm(query_emb) + 1e-8) - c_norm = corpus_embs / (np.linalg.norm(corpus_embs, axis=1, keepdims=True) + 1e-8) - scores = np.dot(c_norm, q_norm) - else: - # Sparse - dot product - scores = np.array([np.dot(c, query_emb) for c in corpus_embs]) - - ranked_indices = np.argsort(scores)[::-1] - ranked_ids = [corpus_ids[i] for i in ranked_indices] - relevant = query["relevant"] - - metrics["ndcg@10"].append(ndcg_at_k(ranked_ids, relevant, 10)) - metrics["precision@10"].append(precision_at_k(ranked_ids, relevant, 10)) - metrics["recall@10"].append(recall_at_k(ranked_ids, relevant, 10)) - metrics["mrr"].append(mrr(ranked_ids, relevant)) - - results[dataset["name"]] = {k: np.mean(v) * 100 for k, v in metrics.items()} - - # Calculate average - all_ndcg = [results[d["name"]]["ndcg@10"] for d in datasets] - results["Average"] = { - "ndcg@10": np.mean(all_ndcg), - "note": "Average across all datasets" - } - - return results - - -# ============================================================================= -# MODEL IMPLEMENTATIONS -# ============================================================================= - -def get_splade_encoder(): - """Get SPLADE encoding function.""" - from codexlens.semantic.splade_encoder import get_splade_encoder as _get_splade - encoder = _get_splade() - - def encode(texts): - sparse_vecs = encoder.encode_batch(texts) if len(texts) > 1 else [encoder.encode_text(texts[0])] - # Convert to dense for comparison - vocab_size = encoder.vocab_size - dense = np.zeros((len(sparse_vecs), vocab_size), dtype=np.float32) - for i, sv in enumerate(sparse_vecs): - for tid, w in sv.items(): - dense[i, tid] = w - return dense - - return encode - - -def get_dense_encoder(model_name: str = "all-MiniLM-L6-v2"): - """Get dense embedding encoding function.""" - from sentence_transformers import SentenceTransformer - model = SentenceTransformer(model_name) - - def encode(texts): - return model.encode(texts, show_progress_bar=False) - - return encode - - -# ============================================================================= -# REPORT GENERATION -# ============================================================================= - -def generate_report(local_results: Dict, output_path: str = None): - """Generate comprehensive benchmark report.""" - - report = [] - report.append("=" * 80) - report.append("CODE RETRIEVAL BENCHMARK REPORT") - report.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") - report.append("=" * 80) - - # Section 1: Reference Benchmark Scores - report.append("\n## 1. CoIR Benchmark Reference Scores (Published)") - report.append("\nSource: CoIR Paper (ACL 2025) - https://arxiv.org/abs/2407.02883") - report.append("\n### NDCG@10 Scores by Model and Dataset\n") - - # Header - datasets = ["APPS", "CosQA", "Text2SQL", "CodeSearchNet", "CCR", "Contest-DL", "StackOverflow", "FB-ST", "FB-MT", "Average"] - header = "| Model | " + " | ".join(datasets) + " |" - separator = "|" + "|".join(["---"] * (len(datasets) + 1)) + "|" - report.append(header) - report.append(separator) - - # Data rows - for model, scores in COIR_REFERENCE_SCORES.items(): - row = f"| {model} | " + " | ".join([f"{scores.get(d, '-'):.2f}" if isinstance(scores.get(d), (int, float)) else str(scores.get(d, '-')) for d in datasets]) + " |" - report.append(row) - - # Section 2: Recent Models - report.append("\n### Recent Top Performers (2025)\n") - report.append("| Model | Average NDCG@10 | Notes |") - report.append("|-------|-----------------|-------|") - for model, info in RECENT_MODELS.items(): - avg = info.get("Average", "-") - note = info.get("note", "") - report.append(f"| {model} | {avg} | {note} |") - - # Section 3: Local Evaluation Results - report.append("\n## 2. Local Evaluation Results\n") - report.append("Evaluated on synthetic CoIR-like datasets\n") - - for model_name, results in local_results.items(): - report.append(f"\n### {model_name}\n") - report.append("| Dataset | NDCG@10 | Precision@10 | Recall@10 | MRR |") - report.append("|---------|---------|--------------|-----------|-----|") - for dataset_name, metrics in results.items(): - if dataset_name == "Average": - continue - ndcg = metrics.get("ndcg@10", 0) - prec = metrics.get("precision@10", 0) - rec = metrics.get("recall@10", 0) - m = metrics.get("mrr", 0) - report.append(f"| {dataset_name} | {ndcg:.2f} | {prec:.2f} | {rec:.2f} | {m:.2f} |") - - if "Average" in results: - avg = results["Average"]["ndcg@10"] - report.append(f"| **Average** | **{avg:.2f}** | - | - | - |") - - # Section 4: Comparison Analysis - report.append("\n## 3. Comparison Analysis\n") - - if "SPLADE" in local_results and "Dense (MiniLM)" in local_results: - splade_avg = local_results["SPLADE"]["Average"]["ndcg@10"] - dense_avg = local_results["Dense (MiniLM)"]["Average"]["ndcg@10"] - - report.append("### SPLADE vs Dense Embedding\n") - report.append(f"- SPLADE Average NDCG@10: {splade_avg:.2f}") - report.append(f"- Dense (MiniLM) Average NDCG@10: {dense_avg:.2f}") - - if splade_avg > dense_avg: - diff = ((splade_avg - dense_avg) / dense_avg) * 100 - report.append(f"- SPLADE outperforms by {diff:.1f}%") - else: - diff = ((dense_avg - splade_avg) / splade_avg) * 100 - report.append(f"- Dense outperforms by {diff:.1f}%") - - # Section 5: Key Insights - report.append("\n## 4. Key Insights\n") - report.append(""" -1. **Voyage-Code-002** achieved highest mean score (56.26) on original CoIR benchmark -2. **SFR-Embedding-Code-7B** (Salesforce) reached #1 in Feb 2025 with 67.4 average -3. **SPLADE** provides good balance of: - - Interpretability (visible token activations) - - Query expansion (learned synonyms) - - Efficient sparse retrieval - -4. **Task-specific performance varies significantly**: - - E5-Mistral excels at Contest-DL (82.55) but median on APPS - - Voyage-Code-002 excels at CodeSearchNet (81.79) - - No single model dominates all tasks - -5. **Hybrid approaches recommended**: - - Combine sparse (SPLADE/BM25) with dense for best results - - Use RRF (Reciprocal Rank Fusion) for score combination -""") - - # Section 6: Recommendations - report.append("\n## 5. Recommendations for Codex-lens\n") - report.append(""" -| Use Case | Recommended Approach | -|----------|---------------------| -| General code search | SPLADE + Dense hybrid | -| Exact keyword match | FTS (BM25) | -| Semantic understanding | Dense embedding | -| Interpretable results | SPLADE only | -| Maximum accuracy | SFR-Embedding-Code + SPLADE fusion | -""") - - report_text = "\n".join(report) - - if output_path: - with open(output_path, 'w', encoding='utf-8') as f: - f.write(report_text) - print(f"Report saved to: {output_path}") - - return report_text - - -# ============================================================================= -# MAIN -# ============================================================================= - -def main(): - print("=" * 80) - print("CODE RETRIEVAL BENCHMARK EVALUATION") - print("=" * 80) - - # Create test datasets - print("\nCreating test datasets...") - datasets = create_test_datasets() - print(f" Created {len(datasets)} datasets") - - local_results = {} - - # Evaluate SPLADE - print("\nEvaluating SPLADE...") - try: - from codexlens.semantic.splade_encoder import check_splade_available - ok, err = check_splade_available() - if ok: - start = time.perf_counter() - splade_encode = get_splade_encoder() - splade_results = evaluate_model("SPLADE", splade_encode, datasets) - elapsed = time.perf_counter() - start - local_results["SPLADE"] = splade_results - print(f" SPLADE evaluated in {elapsed:.2f}s") - print(f" Average NDCG@10: {splade_results['Average']['ndcg@10']:.2f}") - else: - print(f" SPLADE not available: {err}") - except Exception as e: - print(f" SPLADE evaluation failed: {e}") - - # Evaluate Dense (MiniLM) - print("\nEvaluating Dense (all-MiniLM-L6-v2)...") - try: - start = time.perf_counter() - dense_encode = get_dense_encoder("all-MiniLM-L6-v2") - dense_results = evaluate_model("Dense (MiniLM)", dense_encode, datasets) - elapsed = time.perf_counter() - start - local_results["Dense (MiniLM)"] = dense_results - print(f" Dense evaluated in {elapsed:.2f}s") - print(f" Average NDCG@10: {dense_results['Average']['ndcg@10']:.2f}") - except Exception as e: - print(f" Dense evaluation failed: {e}") - - # Generate report - print("\nGenerating report...") - report = generate_report(local_results, "benchmark_report.md") - - print("\n" + "=" * 80) - print("BENCHMARK COMPLETE") - print("=" * 80) - print("\nReport preview:\n") - print(report[:3000] + "\n...[truncated]...") - - return local_results - - -if __name__ == "__main__": - main() diff --git a/codex-lens/debug_semantic_search.py b/codex-lens/debug_semantic_search.py deleted file mode 100644 index 57febe31..00000000 --- a/codex-lens/debug_semantic_search.py +++ /dev/null @@ -1,318 +0,0 @@ -#!/usr/bin/env python -"""Debug script to trace semantic search (dense_rerank) flow step by step.""" - -import json -import logging -import sqlite3 -import sys -from pathlib import Path -from typing import Any, Dict, List, Tuple - -# Add src to path -sys.path.insert(0, str(Path(__file__).parent / "src")) - -# Configure detailed logging -logging.basicConfig( - level=logging.DEBUG, - format="%(asctime)s | %(levelname)-5s | %(name)s | %(message)s", - datefmt="%H:%M:%S", -) - -# Enable debug for specific modules -for name in ["codexlens.search", "codexlens.semantic", "codexlens.indexing"]: - logging.getLogger(name).setLevel(logging.DEBUG) - -logger = logging.getLogger("debug_semantic") - - -def load_config() -> Dict[str, Any]: - """Load config from codexlens settings.""" - config_path = Path.home() / ".codexlens" / "config.json" - if config_path.exists(): - with open(config_path) as f: - return json.load(f) - return {} - - -def inspect_hnsw_index(index_root: Path) -> Dict[str, Any]: - """Inspect centralized HNSW index metadata.""" - hnsw_path = index_root / "_vectors.hnsw" - meta_path = index_root / "_vectors_meta.db" - - result = { - "hnsw_exists": hnsw_path.exists(), - "meta_exists": meta_path.exists(), - "hnsw_size_mb": round(hnsw_path.stat().st_size / (1024*1024), 2) if hnsw_path.exists() else 0, - } - - if meta_path.exists(): - conn = sqlite3.connect(str(meta_path)) - cursor = conn.execute("SELECT COUNT(*) FROM chunk_metadata") - result["total_chunks"] = cursor.fetchone()[0] - - # Sample file paths - cursor = conn.execute(""" - SELECT DISTINCT file_path FROM chunk_metadata - ORDER BY file_path LIMIT 20 - """) - result["sample_files"] = [row[0] for row in cursor.fetchall()] - - # Check if tests vs src - cursor = conn.execute(""" - SELECT - CASE - WHEN file_path LIKE '%tests%' OR file_path LIKE '%test_%' THEN 'test' - ELSE 'src' - END as category, - COUNT(*) as count - FROM chunk_metadata - GROUP BY category - """) - result["category_distribution"] = {row[0]: row[1] for row in cursor.fetchall()} - - conn.close() - - return result - - -def run_dense_search(query: str, index_root: Path, top_k: int = 50) -> List[Tuple[int, float, str]]: - """Execute dense vector search and return candidates with details.""" - from codexlens.semantic.ann_index import ANNIndex - from codexlens.semantic.factory import get_embedder - from codexlens.semantic.vector_store import VectorStore - - logger.info("=" * 60) - logger.info("STAGE 1: Dense Embedding Generation") - logger.info("=" * 60) - - # Read model config from index - index_db = index_root / "_index.db" - embedding_model = "qwen3-embedding-sf" - embedding_backend = "litellm" - - if index_db.exists(): - try: - with VectorStore(index_db) as vs: - model_config = vs.get_model_config() - if model_config: - embedding_backend = model_config.get("backend", embedding_backend) - embedding_model = model_config.get("model_name", embedding_model) - logger.info(f"Model config from index: {embedding_backend}/{embedding_model}") - except Exception as e: - logger.warning(f"Failed to read model config: {e}") - - # Generate query embedding - embedder = get_embedder(backend=embedding_backend, model=embedding_model) - query_embedding = embedder.embed_to_numpy([query])[0] - logger.info(f"Query: {query!r}") - logger.info(f"Query embedding dim: {query_embedding.shape[0]}") - logger.info(f"Query embedding norm: {(query_embedding**2).sum()**0.5:.4f}") - - # Load HNSW index - logger.info("=" * 60) - logger.info("STAGE 2: HNSW Vector Search (Coarse)") - logger.info("=" * 60) - - ann_index = ANNIndex.create_central( - index_root=index_root, - dim=query_embedding.shape[0], - ) - if not ann_index.load(): - logger.error("Failed to load HNSW index") - return [] - - logger.info(f"HNSW index count: {ann_index.count()}") - - # Execute search - ids, distances = ann_index.search(query_embedding, top_k=top_k) - logger.info(f"Found {len(ids)} candidates") - - # Get chunk details - candidates = [] - meta_path = index_root / "_vectors_meta.db" - if meta_path.exists(): - conn = sqlite3.connect(str(meta_path)) - conn.row_factory = sqlite3.Row - - for chunk_id, distance in zip(ids, distances): - cursor = conn.execute(""" - SELECT file_path, content, start_line, end_line - FROM chunk_metadata WHERE chunk_id = ? - """, (int(chunk_id),)) - row = cursor.fetchone() - if row: - candidates.append(( - int(chunk_id), - float(distance), - row["file_path"], - row["content"][:200] if row["content"] else "", - row["start_line"], - row["end_line"], - )) - conn.close() - - # Print top candidates - logger.info("\nTop 20 Dense Search Candidates:") - logger.info("-" * 80) - for i, (cid, dist, path, content, start, end) in enumerate(candidates[:20]): - score = max(0, 1 - dist) - is_test = "tests/" in path or "test_" in Path(path).name - marker = "[TEST]" if is_test else "[SRC]" - logger.info(f"{i+1:2d}. {marker} dist={dist:.4f} score={score:.4f}") - logger.info(f" {path}:{start}-{end}") - logger.info(f" {content[:100]}...") - logger.info("") - - return candidates - - -def run_reranking(query: str, candidates: List[Tuple], top_k: int = 10) -> List[Tuple[str, float, float]]: - """Execute cross-encoder reranking on candidates.""" - from codexlens.semantic.reranker import get_reranker, check_reranker_available - - logger.info("=" * 60) - logger.info("STAGE 3: Cross-Encoder Reranking") - logger.info("=" * 60) - - # Check reranker availability - config = load_config() - backend = config.get("reranker_backend", "api") - model = config.get("reranker_model", "Qwen/Qwen3-Reranker-8B") - - logger.info(f"Reranker backend: {backend}") - logger.info(f"Reranker model: {model}") - - ok, err = check_reranker_available(backend) - if not ok: - logger.error(f"Reranker not available: {err}") - return [] - - reranker = get_reranker(backend=backend, model_name=model) - - # Prepare pairs for reranking - pairs = [] - for cid, dist, path, content, start, end in candidates[:50]: # Top 50 for reranking - doc_text = content if content else path - pairs.append((query, doc_text)) - - logger.info(f"Reranking {len(pairs)} candidates...") - - # Execute reranking - scores = reranker.score_pairs(pairs, batch_size=32) - - # Combine scores - results = [] - for i, (cid, dist, path, content, start, end) in enumerate(candidates[:len(scores)]): - dense_score = max(0, 1 - dist) - rerank_score = scores[i] - combined = 0.5 * dense_score + 0.5 * rerank_score - is_test = "tests/" in path or "test_" in Path(path).name - results.append((path, dense_score, rerank_score, combined, is_test, content[:100])) - - # Sort by combined score - results.sort(key=lambda x: x[3], reverse=True) - - logger.info("\nTop 20 Reranked Results:") - logger.info("-" * 100) - logger.info(f"{'Rank':>4} {'Type':^6} {'Dense':^8} {'Rerank':^8} {'Combined':^8} Path") - logger.info("-" * 100) - for i, (path, dense, rerank, combined, is_test, content) in enumerate(results[:20]): - marker = "TEST" if is_test else "SRC" - logger.info(f"{i+1:4d} [{marker:^4}] {dense:8.4f} {rerank:8.4f} {combined:8.4f} {path}") - - return results[:top_k] - - -def analyze_problem(candidates: List[Tuple], results: List[Tuple]): - """Analyze why tests might rank higher than src files.""" - logger.info("=" * 60) - logger.info("ANALYSIS: Why Tests Rank Higher?") - logger.info("=" * 60) - - # Count test vs src in dense candidates - test_in_dense = sum(1 for c in candidates[:50] if "tests/" in c[2] or "test_" in Path(c[2]).name) - src_in_dense = 50 - test_in_dense - - logger.info(f"\nDense Search (top 50):") - logger.info(f" - Test files: {test_in_dense} ({test_in_dense*2}%)") - logger.info(f" - Src files: {src_in_dense} ({src_in_dense*2}%)") - - # Average scores by category - test_dense_scores = [max(0, 1-c[1]) for c in candidates[:50] if "tests/" in c[2] or "test_" in Path(c[2]).name] - src_dense_scores = [max(0, 1-c[1]) for c in candidates[:50] if not ("tests/" in c[2] or "test_" in Path(c[2]).name)] - - if test_dense_scores: - logger.info(f"\nDense Score Averages:") - logger.info(f" - Test files: {sum(test_dense_scores)/len(test_dense_scores):.4f}") - if src_dense_scores: - logger.info(f" - Src files: {sum(src_dense_scores)/len(src_dense_scores):.4f}") - - # Check rerank score distribution - test_results = [r for r in results if r[4]] - src_results = [r for r in results if not r[4]] - - if test_results and src_results: - logger.info(f"\nRerank Score Averages:") - logger.info(f" - Test files: {sum(r[2] for r in test_results)/len(test_results):.4f}") - logger.info(f" - Src files: {sum(r[2] for r in src_results)/len(src_results):.4f}") - - logger.info("\n" + "=" * 60) - logger.info("HYPOTHESIS:") - logger.info("=" * 60) - - if test_in_dense > src_in_dense: - logger.info("→ Problem is at DENSE SEARCH stage") - logger.info(" Test files have embeddings closer to query") - logger.info(" Possible causes:") - logger.info(" 1. Test files mention implementation concepts in comments/docstrings") - logger.info(" 2. Embedding model doesn't distinguish between tests and implementation") - logger.info(" 3. Test file chunks are more frequent in the index") - else: - logger.info("→ Problem may be at RERANKING stage") - logger.info(" Reranker gives higher scores to test content") - - -def main(): - query = "文件索引和嵌入向量生成的实现逻辑" - index_root = Path(r"C:\Users\dyw\.codexlens\indexes\D\Claude_dms3") - - logger.info("=" * 60) - logger.info("DEBUG: Semantic Search Analysis") - logger.info("=" * 60) - logger.info(f"Query: {query}") - logger.info(f"Index root: {index_root}") - logger.info("") - - # Step 1: Inspect index - logger.info("STEP 0: Index Inspection") - logger.info("-" * 60) - index_info = inspect_hnsw_index(index_root) - for k, v in index_info.items(): - if k == "sample_files": - logger.info(f" {k}:") - for f in v[:10]: - logger.info(f" - {f}") - elif k == "category_distribution": - logger.info(f" {k}:") - for cat, count in v.items(): - logger.info(f" - {cat}: {count}") - else: - logger.info(f" {k}: {v}") - logger.info("") - - # Step 2: Dense search - candidates = run_dense_search(query, index_root, top_k=100) - - if not candidates: - logger.error("No candidates from dense search") - return - - # Step 3: Reranking - results = run_reranking(query, candidates, top_k=20) - - # Step 4: Analyze - analyze_problem(candidates, results) - - -if __name__ == "__main__": - main() diff --git a/codex-lens/debug_semantic_v2.py b/codex-lens/debug_semantic_v2.py deleted file mode 100644 index 3c335272..00000000 --- a/codex-lens/debug_semantic_v2.py +++ /dev/null @@ -1,276 +0,0 @@ -#!/usr/bin/env python -"""Debug script v2: Trace the full semantic search flow with detailed logging.""" - -import json -import logging -import sqlite3 -import sys -from collections import defaultdict -from pathlib import Path -from typing import Any, Dict, List, Tuple - -# Add src to path -sys.path.insert(0, str(Path(__file__).parent / "src")) - -# Configure logging -logging.basicConfig( - level=logging.INFO, - format="%(asctime)s | %(levelname)-5s | %(message)s", - datefmt="%H:%M:%S", -) -logger = logging.getLogger("debug") - - -def count_chunks_by_category(index_root: Path) -> Dict[str, int]: - """Count chunks by category (src vs test) across all indexes.""" - counts = defaultdict(int) - - for db_path in index_root.rglob("_index.db"): - try: - conn = sqlite3.connect(str(db_path)) - cursor = conn.execute(""" - SELECT file_path FROM semantic_chunks - """) - for row in cursor: - path = row[0] - if "tests" in path or "test_" in Path(path).name: - counts["test"] += 1 - else: - counts["src"] += 1 - conn.close() - except: - pass - - return dict(counts) - - -def run_dense_search_with_trace(query: str, source_path: Path) -> List[Dict]: - """Run dense search with detailed tracing.""" - from codexlens.config import Config - from codexlens.search.chain_search import ChainSearchEngine, SearchOptions - from codexlens.storage.registry import Registry - from codexlens.storage.path_mapper import PathMapper - - # Load config - config = Config.load() - registry = Registry(config.data_dir) - mapper = PathMapper(config.data_dir) - - # Create search engine with verbose logging - engine = ChainSearchEngine(registry, mapper, config=config) - engine.logger.setLevel(logging.DEBUG) - - # Set up handler to capture all log output - handler = logging.StreamHandler() - handler.setLevel(logging.DEBUG) - engine.logger.addHandler(handler) - - # Execute cascade search with dense_rerank strategy - options = SearchOptions(depth=-1) # Search all subdirectories - - logger.info("=" * 70) - logger.info("Executing dense_rerank cascade search...") - logger.info(f"Query: {query}") - logger.info(f"Source: {source_path}") - logger.info("=" * 70) - - result = engine.cascade_search( - query=query, - source_path=source_path, - k=20, - coarse_k=100, - options=options, - strategy="dense_rerank" - ) - - # Analyze results - logger.info("\n" + "=" * 70) - logger.info("SEARCH RESULTS ANALYSIS") - logger.info("=" * 70) - - test_count = 0 - src_count = 0 - results_detail = [] - - for i, r in enumerate(result.results): - is_test = "tests" in r.path or "test_" in Path(r.path).name - if is_test: - test_count += 1 - category = "TEST" - else: - src_count += 1 - category = "SRC" - - # Get metadata scores if available - pre_ce_score = r.metadata.get("pre_cross_encoder_score", r.score) - ce_score = r.metadata.get("cross_encoder_score", 0) - ce_prob = r.metadata.get("cross_encoder_prob", 0) - - results_detail.append({ - "rank": i + 1, - "category": category, - "path": r.path, - "score": r.score, - "pre_ce_score": pre_ce_score, - "ce_score": ce_score, - "ce_prob": ce_prob, - "excerpt": r.excerpt[:100] if r.excerpt else "", - }) - - logger.info(f"{i+1:2d}. [{category:4s}] score={r.score:.4f} pre_ce={pre_ce_score:.4f} ce={ce_score:.4f}") - logger.info(f" {r.path}") - if r.excerpt: - logger.info(f" {r.excerpt[:80]}...") - logger.info("") - - logger.info(f"\nSummary: {src_count} SRC files, {test_count} TEST files in top {len(result.results)}") - logger.info(f"Search time: {result.stats.time_ms:.2f}ms") - - return results_detail - - -def compare_coarse_candidates(): - """Compare coarse candidates before and after reranking.""" - from codexlens.config import Config - from codexlens.semantic.factory import get_embedder - from codexlens.semantic.ann_index import ANNIndex - - query = "文件索引和嵌入向量生成的实现逻辑" - config = Config.load() - - # Generate query embedding - embedder = get_embedder(backend="litellm", model="qwen3-embedding-sf") - query_embedding = embedder.embed_to_numpy([query])[0] - - logger.info("=" * 70) - logger.info("COARSE CANDIDATE ANALYSIS (per directory)") - logger.info("=" * 70) - - # Scan all HNSW indexes - index_root = Path(r"C:\Users\dyw\.codexlens\indexes\D\Claude_dms3\codex-lens") - - all_candidates = [] - - for hnsw_path in index_root.rglob("_index_vectors.hnsw"): - db_path = hnsw_path.parent / "_index.db" - if not db_path.exists(): - continue - - try: - ann_index = ANNIndex(db_path, dim=query_embedding.shape[0]) - if not ann_index.load() or ann_index.count() == 0: - continue - - ids, distances = ann_index.search(query_embedding, top_k=10) - - # Get file paths from chunks - conn = sqlite3.connect(str(db_path)) - conn.row_factory = sqlite3.Row - - dir_name = hnsw_path.parent.relative_to(index_root) - - for chunk_id, dist in zip(ids, distances): - cursor = conn.execute(""" - SELECT file_path, content FROM semantic_chunks WHERE id = ? - """, (int(chunk_id),)) - row = cursor.fetchone() - if row: - is_test = "tests" in row["file_path"] or "test_" in Path(row["file_path"]).name - all_candidates.append({ - "dir": str(dir_name), - "chunk_id": int(chunk_id), - "distance": float(dist), - "score": max(0, 1 - float(dist)), - "is_test": is_test, - "file_path": row["file_path"], - "content_preview": row["content"][:100] if row["content"] else "" - }) - conn.close() - - except Exception as e: - logger.warning(f"Error processing {hnsw_path}: {e}") - - # Sort by distance (closest first) - all_candidates.sort(key=lambda x: x["distance"]) - - logger.info(f"\nTotal coarse candidates across all directories: {len(all_candidates)}") - - # Analyze distribution - test_candidates = [c for c in all_candidates if c["is_test"]] - src_candidates = [c for c in all_candidates if not c["is_test"]] - - logger.info(f"Test files: {len(test_candidates)}") - logger.info(f"Src files: {len(src_candidates)}") - - if test_candidates: - avg_test_dist = sum(c["distance"] for c in test_candidates) / len(test_candidates) - logger.info(f"Avg test distance: {avg_test_dist:.4f}") - if src_candidates: - avg_src_dist = sum(c["distance"] for c in src_candidates) / len(src_candidates) - logger.info(f"Avg src distance: {avg_src_dist:.4f}") - - logger.info("\nTop 30 candidates (combined from all directories):") - logger.info("-" * 90) - for i, c in enumerate(all_candidates[:30]): - cat = "TEST" if c["is_test"] else "SRC" - logger.info(f"{i+1:2d}. [{cat:4s}] dist={c['distance']:.4f} score={c['score']:.4f} dir={c['dir']}") - logger.info(f" {Path(c['file_path']).name}") - - return all_candidates - - -def main(): - logger.info("=" * 70) - logger.info("SEMANTIC SEARCH DEBUG SESSION") - logger.info("=" * 70) - - # Step 1: Count chunks distribution - index_root = Path(r"C:\Users\dyw\.codexlens\indexes\D\Claude_dms3\codex-lens") - counts = count_chunks_by_category(index_root) - logger.info(f"\nChunk distribution in index:") - logger.info(f" - Test chunks: {counts.get('test', 0)}") - logger.info(f" - Src chunks: {counts.get('src', 0)}") - - # Step 2: Compare coarse candidates - logger.info("\n") - candidates = compare_coarse_candidates() - - # Step 3: Run full search - logger.info("\n") - query = "文件索引和嵌入向量生成的实现逻辑" - source_path = Path(r"D:\Claude_dms3\codex-lens") - results = run_dense_search_with_trace(query, source_path) - - # Summary - logger.info("\n" + "=" * 70) - logger.info("ROOT CAUSE ANALYSIS") - logger.info("=" * 70) - - test_in_top10 = sum(1 for r in results[:10] if r["category"] == "TEST") - src_in_top10 = 10 - test_in_top10 - - logger.info(f"\nTop 10 results: {src_in_top10} SRC, {test_in_top10} TEST") - - if test_in_top10 > src_in_top10: - logger.info("\nPROBLEM: Test files dominate top results") - logger.info("\nPossible causes:") - logger.info(" 1. Test files mention implementation concepts explicitly") - logger.info(" (e.g., docstrings describe what they test)") - logger.info(" 2. Embedding model treats test descriptions as similar to") - logger.info(" implementation descriptions") - logger.info(" 3. Cross-encoder reranker gives higher scores to") - logger.info(" descriptive test content over implementation code") - - # Check if coarse candidates already favor tests - test_in_coarse_top30 = sum(1 for c in candidates[:30] if c["is_test"]) - if test_in_coarse_top30 > 15: - logger.info(f"\n → Dense coarse search already favors tests") - logger.info(f" ({test_in_coarse_top30}/30 test files in coarse top-30)") - logger.info(f" Problem is at EMBEDDING/DENSE SEARCH stage") - else: - logger.info(f"\n → Coarse search is balanced ({test_in_coarse_top30}/30 tests)") - logger.info(f" Problem is at CROSS-ENCODER RERANKING stage") - - -if __name__ == "__main__": - main() diff --git a/codex-lens/dist/codex_lens-0.1.0-py3-none-any.whl b/codex-lens/dist/codex_lens-0.1.0-py3-none-any.whl deleted file mode 100644 index 5b8af4bb63085f6d2267c60c35a89a2e2665fc22..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 378480 zcmZU)V~}XU)+E}tZQHhO+qP}ncK2!9wr$(Ct@F;E7jN!-^L9kVj;LR?SLIq&J6EQH zG%yGX00004KsXDB4s{TEM;H(QfD8x#0QJ9FV|x=*4}BX`J7-!3Iz~DMIulE07g|d@ zGkba)OXGjjrq1*-lER{L%A&eT`u5oj2>$UYcMg-v)CG2xug&b}IipwxCOKxFB>N!p zAmXh})q8&KB(`gk68ld(cYBHuupPED7}Gv%e@bqjFK-Is9oBY-j@HKP6FO(VCmI0z zM$d0b`**Q{?ciP~=0B?=#4HWmt(B6=x z&9e;onuzC5ws3W9u6Hg+C?5&7i$e~zwji6LIo^Rhb zqD4i&n(z~sWrTBw*2!k|OJh|EetWRN+zH)lu(_p62i>)DMyJfq{IVzJT=$BiDc&lB zRvkVyZf~fbXBhftx0Qs=rrST(lE^^`58(~60p(`pQ~qlEW{^~KEIU=O%Qzyisz~)l z#F*C`u;!sd+Jxv$w18bJR4;2MNE@7>`9&~E2mGx@sk2V7;zJfqg^m+dEdAg@K4IoL zFra)`Lkpdcs3b)=f|b6{nun@gM8(@k9uM6md)iPsPf|!%0t}9#q+N8KMNzRzpc=BokKKJTvm;3hK3Ie}1Hx<7ub!EXCRSNPv>Zp4Yvo`?XA zeEtVxzJHKL{r}+nA2ieJ>s#7cy6EfEIe4ln2-*!1Aap-LHJeXK;ms$fN~L<0FB+>7 z9#$ixz$^q+y}pfPv^2<1UwfFF1LQxcfBTA~A1lAX`wpQ2gkX%vD^(aqAr~ku#>mSn zTAleUFL1@cn74ZoXH> z7Vwvg9sIHq)8sQU=x!AN{Qq5gD66`TXzkHNy&~h>DvsN|H@iT{JNrxGd|GJhM?!yKcuRi#NAad@G{Z z+{jcp$r*Vt*`TRniPL1$Pf;C8Dx|=+(!RTnuWbU`-=H{kf4{z+OLZ6`6nVC{dYI0O7 z5zu)Vjxx!+^=WVaJotP(_dxiQqc?IUeg}S?Ao_#&$Oz6S7IDO7it?zuG8RFn)j1=TOUjYY;PKEPGfY*9%v=O;T>4VIK-xw&%y zdzj1)WvN5|Xp-|fCOqf?q*8L72S%8UIM@k#Bp-Z~9bI+x{`GVPk3}i2@MbRhgFu<_ z7Mx97We5z}^PEfGX5sD;*^%V}Cd*NYZV*jjZ;KS9&DMXmBC#rLqBwM4#qwT#yiJ>sa;vxfK z$M3UUBI=wlmHguK=h=_!H-K2z%#QbLfYWRoviy24f@;6!x|o_SGq62(<`qK8{ku=%U{)z zWz`+F4l3m;4ZlDp1$XO_7I)iTpOnR0?AiO2O|cUo6)vQ#sVJ8p<*(Z78~zJ5SGjy` zrOATX-jIkIeN8a@aGT^w1PXo)+sS_ujL0+L3}#pWCOW9NA-02#al3Hb{wv*L{-(J? zgRIcu4K~V7X5y(NlYWlK_?zE_cyo0EGz1Jn2&sr)&kX6~r4-DRyaEtzSbstxjU^H} znZgHT$SNis2|KI?{MfE_8ga%{=Ri`tDygtSXPP6pG9e^q*kxko^?w_8bX^$a zt5tyr5MTp|&=@W8`mo)^{XyCxf2o*Cn`I#0w;Tcb%UgRr-`^-%mhf0i&1LIRHj^a> zOOqYP!Xwt*VCRDE%19fG+kc{Ze(*{*?mts1cebx``Rnu!-Cl9cw}T7%m6@z? z`jnarn6x0)6Z)0k`m-@f$2Q!j5#qhCyKj3b^^MP4yUGgK*Sck;8SdIyseV}IX3gbM zYQHVXLHE)0RV@)9dJEghDUWCD^m>mD0hn+9`u65s1whDE0`blT5x!xXM*~BFeZFA~ z^fUG3nS(aLAU_6f13`!o_Qf`FrLr^9?g2oFMuxP&l4~xCc9D%RKV^zM7Z{95N~M2y zBn}9ZY{mlY_>(0S&^La3@>NDyJj8QclA4Q#FdhS7Gy{kX>tKlA&@GRL*@$2QNC|if zibITDim!L1frCIC?LcH^@N7p zfU#`ACo^Uov;XU%j2%~-vl)MJE|aJcYuV%k);u(iGxzD(m+I>X(fqHs_D}LP;d7S zVrj7=7J1Mj)6KcE1y(K3s7R~SlD7mPuwo%2G-oq?Xt~C$WdVW^&cu4_)rzObz@M|V zz1z7fJjAEhQ>xT}N~1jf07K!4&zXB6Z(12i@EQDZ7$OPp1U{hvGF3?@4}& z3jVS;e_W8H3_(BJY#N*ysIFp;aS@nbf~5`DmOnK8!khC&3uL z8Bk%&xZ^ue2C$=4F-gFj#T4*AHFpO4A2ko4jW-5Tu4da(jcK&rQ-WrpB4@7lZF?Q=s2@hE8!o}Px9hXeqYQgQjcUFqvaQ)JGj zKa1HTjaDuS-yfu~DZNP5!U^#IIx{yPrnCYGsq+AIn#d-f!ZhMf|K@rLp7$9Dcin%bd+9e`@cYYZ zil7g0`;auOUMd;LbnkDm)!dJw906|lSLkTEyq=q#PhD+{s>;) zfe0lA8G|EY{Xm_!m`fj6he}N@vyVYK!htQUM<2#9L6tzeU~@2(Cu76L)GjTi7i?dt z$z&;WXK(=McSv$5nMtl+dODH$Eyo!7ps%k8a;AwjM|Z#%(pcv){CP>P)$@6YQRCAe zt3$7F$S_Lxg@fr!Owv4#Mrrm=)RSL|hplfvE_RvOgJg`rn;r4wbsz#X?Cs_Y=`-c& zk{bwic`nkQ;p?zmM@!{APN$@z@~3a2k&@H>0^1{K!vo?w9~9%qHi!>9=NS2yV$un* z#RVW4#k%pnMIvSV43Z-Z;Fyx5L0=ewCLOv0nu+$f9XI`M_}NMKToLC(OzP+Ik~W(r z($Y_A(taOpvU*vtxRNYK+?7rLIAPbl9eFf!!f7@&-he9s!6cz~{}xq=;64&I%&L&} zWXo3)8Uau^G}?(3;nwmGMd`8;9*A7(Qzp_Z3KMI)yq`IIPGU2|R;U9Aqy1(cwk8FM z9dV6+d@`!Ry@%o<#CM4aBeo$C5VteT212?M;l%Gc+sd2SJHR$7iGeT+RXBve$Ahwn zBCa~#=MNv1W#jl^*YiY2kPQo+BjY2%yU$fZH-KEI`8n8;S%3i99z%LCM8r%4A(ZkB z!D|cx4VEpn99~kCJRWt<*{CWcT;W*t7B87AJs2!@x-<9(L??bad}!SKUKD~a(XzA< zLtbhm4#?;F457PtiQ1qN8!>ar2NZ3+a@gJ!Z{+ysCwRTwnT9{e8AA!MMz1HckosI( z?gg6_$!T;uaEcn0>bF=;?RXgM81PctrdkazcZ?IN`VSFJu8C3K-`t!qKkyF1b&fwE%1JO0=o1hr z#388bWx;psAtx3Z>73IO>rR2zN+eiVO{J>GUI^KQ1vMU}c)-++oDfS$Lk&IsBalc5 z=N}R;BV>rT(ZRdIl`s#rk;WOX$H<$%BQ`qr=d3HqJ>ZcJz8zyBU6rSIs@|(;KrIs_ zjOEbSMGC&NcMemL_MWTZ7&Kl!e>v#x&dcm__~HoO`eR!>)r^~qg5b7ML%C=5tdjym zVjJV*T(f~3iV*?Dqhri2G?NW0K*0EL;HRlcO2Yrp4F2)ig401NHp&fIJ() zC1BG=KZTQLZx{rko_7PLo7Xb{>;8G6 z_Tlfg?($P-XNaOFl3aGxEL=6Mi$1VV9#x2JjgD)CL2`4xQej(TWe{QOW4`AG>D}G| zl)r$b_5Qe7+WhYz(`pz_Z7H^5dY)w94YqDhtz{r~iAjQBu@_ z4GbB?uD~{l5^U1^bKmo{MVp|MeQ0D^)nD6+e?1Yt7 zZ5n>3D2Wj+HP^n1Hz|nRLyV5UQ@}+dFxBgpuYuzbAiKKKH`}RhS%D7C1NH>B-Ax1Auop98u_3jLU+hbsiEAE2{bQzu(?JMQ_5;MND1(I*CF|G64h2tXR{4QDZ$ z2hb`gBDfwuoN@gtv4+fX#7OxCBnp%vZ#-mVpRhq?;~0>dIP{f9azAIGHb|-xU zQ0oGM`t=kTWK+!ePAROaZl;^b-3FLuXT%+kMdzl{g6Zwd=MDz|AP%8544oI)W+&JM ztu@$YJKF`VdTm>mx5RFB-2 z<=B{EJ!zc{5xR)5?8&Yw#dqp_+I64)l?cOa+KhcUi3nQiKT=|}Z`vg#!2`mp2B->J z_mK6eqWfXc$CSq``-g_jHe_9UqPjUyvNnD3@TM|}pJ{4@o(0(xyVm3l>E@V5n z(D$H4fLGn)n;r@HzF!J}5uCA5ac*3+8AUkUvX&&(@>Kx9&rs(@R^C^EeFkeh9p?0c ziTXsqNj%aXXg^BV486};fP$4p!Ut?Y$!4-~R2Mp-Fr%*%6p%L;YIBC)2QrAA%9&fXu7(RI7?pAoMF5@7Hon&DbAugIGy5 zwKesGnBfJ!p@)wCv0R}^*4k|xS=#VD(;aPB0?mzG3FUUdXl-uc%f(XUOJ^^VrZ>+ zZD48bv8RR~*^qdOAqWvVEK667y9y-BJl=Hnw*%;OMbUyp6RnRw^V@ z)F#se;rdIKOkP@;x7(aMe-Lg`P}pJIcwdpiBmOehoL`Ku9XTe5eQXRGTpgqFR}t-D z%`}=1rR{GG;{Cz556;%oXy=5(Z)4BiwStf{_GY0wg&o`|#pNE5aqpC+*=lAcPP&L= zgEh%^GYVncnaH9jwblccQ`&X$E!L5S=#q#Q1Y!~BJdFsjG>s6b)=JnzV(sTuz1d23 z9T*%_+M!wnncA!JN$=VhB*(h1^Uruvs%9TO;il&!b5Zrld7r^5bI?6i;LirO*8e*Y zPw&-2(X91c@Is^vkAk(wFpRZ$`tAvg+$Dh63QiLay$Li7JqPEJ0kaX%K^NK#WCEQ# z>xB9YsLrPav3e?3)u7#hOxWgB=C*BBVV%uqLfl6mYG?W90@9^1i9$zx zD+7BhFnaX)P9!b?%xkKmz0V_d$ydhkx8++Pfg1znHj7t`IZ#^gD8%>t&iVhaH^tQy zQZK8;P45r1RMsW*00F-}@Z8&MZ8@2R|hncxMi~s|@y%FFx5pw@yVzH+eYtJ)hbDP>6~)iec1J z*$=q-I=i|sd%U#)<3S7?uXAx}pRCey#0BOhN5h|iUqjI*s^e5OSK|(?>|dULhYzHV z>97tE?uKo>)3}z29=Ju>`*CNYt--<-EvZbo*awB~?J!j4BS> zVS3!u0P$IdHR1{6)}_asQ1UpaXQKxN87>rt!!9*g={jHw4%Y563%~<^fEKh%|8eP& zEO!#0kAESv_#2R0(_(8K4$S|ELG6dl3z~NmGWRrZ?3bHdoChmS6AJPxh;Z>*KV4)n zCDaN5BN3aHR1)dvvqg@v;w%y`t;yi9@woFz0)9fqy45qHKhZe;pqfYu_SOy-eeXIo z*j?%gt zxnY5BwgWe9zvH{%b!M6iKnkWFa@eWth@FF-MVXTI>uEIX2=(0zeXfMC91p5L3`-Gy zk-WtMqP=wtWIMI21Q!BnHF`M%7#ATDegItZ3iX$#gjL@%CanFlL?%d{EH94%+KShOQQ%2(2JHNs~OAV9gIt+H5v_pwYp<0OC z>UIczc>tHOY}cxrl#q_IvRpw={b;)_WjJUO+-sX`4t&ggt7q7P^9Wr+8)57RwuWn) zIzz$V&dRa#1Z~qSJ}7^97w+44PU(Ow#VovM(2Y4J?&DF`8O9)7CzWCDW=k={;+Lr4yQI@kI84< zw5650XJj%lA>XVfJulRvm4N@vG`(n;M=a&l=z*eyXGI2|0UdF6a)hBnErZ1eE4%CM zYbZG?To(|kq^E;G52lA1v-BK0IymCHA>Ve)Kcf|`1}D34CB-Rxb%$OwZ6y}}qaG&O zX@1$FRqX4QNBMko=g;#Okof$9)Z^jp4*ZNgF4}c==gGNk{=tXGkb!O-BrGrQ&6zX1 z!cY+O3sTDvV$mSS8^8dYKgA*lhY=1zX9l^7_-)brG}_zZfNIuaz1BwyMW22^Uc3nU z5i(|$qRz5TsQYeQ<0ExWFTxjfv*iBIQ2i^}GI3_W%DBbHN$<~a_**f3I8+a+aF71D zCvVu-Zoy04MEmFyqt%hBAO()2%}t(P&t%@0AL@$`YK(nk%5CisBI@;I;qn}?D1lvFytjSMIoBb*jtw#eH!e)iwS zGBl7I{S05C?3{92IjfK5|VBO2CaV0+riA`%2&oQ>$P`nHmA{9oE8rWAyuad7ci4 z;0TU`hOy&;I9@%v1!GI*OM6n>Z0lMBoV>?@XB>P#45v;lx=WoZmmfe8+3aA@4_fhe zKY=N^p|J@~w5}USq1OY^?B>=i;oS-G#Xp1PWuSO5@U1YJ47kVw5gQI0^<6Bw7-uI_ z@m8j|=4W37pl3IP#w($*B_{*2eOJz-Bm(2)vtxs@g+ioOnk&F7}t!o+Aiw*zy6!1(9g$S90 zL;h0V;A>G2mj9?k8}%Z<&V@@lDU>`U3yOVpn6%DLu~#Sx9FWJu*s5wIE))ImBnZJ# z$uB?XpP_Sdnz~>!S%6FAl?hMs`V^u+FBC7z~oHLbe_-ehMA!{Z>hHGR) z8~GWwU$#z+`~~`N%PobJNXMD4#p@ju0AS~zY~X)(3r+1@EL|*3o&Po6S~R4cx7ZMR zuheZepjQ-A+QsElBD2?(<#Wf@E$Q-Lz=;SUjsNk(^J$M)UC;p}6qK#iY<%NP9d|qM zUjvd!KD#I~nu)+;@?C+ARP8MFtatdz>2G>t^FyL zG);v{W-L&1QfBIUc~zx0q;n5l{#NWQ3RABinVT~EJ>xeZwIKs@@~oN0PC)uuT95mk zcEqq&{tQF>3eCvv2s&uU8uNA6d6J3@+7}BJDbi*z+cuM)q)T#!K3qWG1J$zxW8elk z04RgUE4*vn00jXByc52+B()k7@&g*M%y$gAa%IRAp_3%)(p_1(F}eG>3L}C+Srw#N zE|R-Yp_xZxZ3SurvbCYq<1~`7rkiF;6nq1VXt!G)K;-nN8Q7w^Q=<{yc^=>?{g&#p z+AtMudiS8##ks!K%#S@>!|K-^-7aW1?7%zq%ss(UQV^UcBuTaI&Ops zZMBG`ZB>;H@blatnt7ZPEa_%Kb6Gn{F$W{rH|D+J&%|f}uClq87=D=pcR$?vW`-O- zC2MC|?gfGy7|_mfvAg=}^~a1R0{maa^?9?7II{HMbq62_Qn@U@z8VfBww(xj)B7Y) zAig4K-tO|5w<$zjJ#NPW&LAC7nXU|nCBXY#sztZ2MH3*1WTbecISb7+Sdg3*0xb^0 z!xf>b7*Z)mCpqY#^j_tl6~O|>EerB2=PohZbS5L$ZNXr_)N{116|5(5eQfC8gD$;L zD+8Ej9L?9uxf&o9khliic8#FK+1Z+bMtZ6!iV?uY{79SL&!YQay1e}-Ul8kHTMq$ZRPE70XJz^GFI?`} zG4uUXSnT^lQPkzGuJjIM#4_bl=^FZ*w#rZOtreD&F0g$Ao3T?_ZejevUbuSLgw&x{ z_9_Xur4QROucF#>PXX zSzN{!9@uPx*A>+poU?0(V&l-MYHGw6{$ z++Cf6GDupCqx_xU?`Iz`C+XN;G|N%ehFk3TeEywq^f;EoXSM=cw%dL4hXf4TU;Pgb z5gbK$8cbJ11z5y^$1H{0@QiPdedtg(5@G*3uu~emgNqK*wa=HeanisU%*$zdpWi!%}d!+$nS(T7ew> zaZ@3-e<1MBG#Y_QjX}>1$k>TwP~Dl&bkDpiQsvkE`|u$kfUvya^xPz zyP}tVSh;k3&aL;dx~U%0Y&=oAKl%(8vVS$}J%)$o0!F6N=(=6jF}xoC@_V(G^?^J* ziGIyhyPO}En5P^SUzIVpyN1Be`s|6Vw(wrTZKgrp-DXF?xW_o)Q|F1;7vb>1x+Pu| z4Nc;;gRUF8#=v|HWAWJP1t}+_qV%sD@!2m4r!2d>Z^ATj zl5k*Z8(BOJPj%FXpd^F9VN8T%655MGvy(KQ;ul)YQB5~w`df(^3B^YG`?!>t z6~VhklQ&DCMmEn*8B3wVksvixq&T`cogRhoDJD}>Eqh36!4bnd0)khr+asn}dj)p> zSt%@XZKZ|IJ#TD{&6_9EcddstOzmOKzBg$Cw6iU$SMR*D7cZX*#pdnh>?AQn$u&@F zYos&Is!*q%Je;X@tJG8Kmt@tTj)iz@Ve* zHQ>Z!1xPrxX1pO{+8>I^Bi8bzyO-B-8|8eEdcvD#h&PZ7FU8W(d5#>PD8ATI=7gUX z@%<+8(&m^aZin7zh(9|^jmL37-mCeds6G`MyJt4LEIDyx_=A=%evU|SJ%lk7oYwVT zVhS@mLfwMG(+lVC!W^8AAcTGvc`$%7sG@;nM z(;LEnjNPQAJO9?P#BW&(M~W9|Mi9jxMkU2BHBcjyH`=I1AW#mtPkAJ-*e0egMa!F$ zA1pVd8(}9&mA8VdfbFX<+Y0msIN1uKqaT^mh}tc!&_>=Ju|(SoP=pu`XuMVfnm=7w z;N|_^o`*q#;be_)X4F;!nz*#!3U%8)l2C2PW(DILGU(eBy%UQ0;!{k+?#Bs;46fFA z#tFV8N6#Uf_p@$f^*%gvINfxU_$ zjs5U(04kqAKiGq40UC(7i3WuDY7fHgE78glwPiOU=ZB$7Z2EBb8@KDl;Sn!~#WxK9 zh2u9P!xk_rMJKX=vBQX;)BTRX95sJJ(7SPpI9B;wU(S_V7py}pXJPbIr}mU`hKQ+_ zo-j7}Yl|aiwlHDA7bQ3ALC3kP!fK-+sWY}37l)mhwR;F1&zGlVCx1R8CIa{n8~mk} zd9)?#w+6*Y`SLS10q*j%LE3)u&66{2PT{3Cv2J!-~SKK+q0pgExD+(ADIKp(}(Cdxk@qj`I3f zi=d=bl@^*_Ab)(q=aPLOdkuU5+gcC9@(#%>aN+F!`uPXv#Q#)rVC1T$64cK+37Bz0 zxo$0(5ed~%a{PDScmhh?`#>2ZgeWIUDC%SjMhpY z(ES5b3@D5MC+rDAEIlAHwK!IJ#;YU=82AN#p0Xxj(kltZlbrf%?xj0ZeVQuP##Q(lQrVZQKer(z)^TZhm%{`>I$$XML{6HQ~oq3+Ou^Qnl<$^1CuV z_Y4nU?R?BLiFe_rp|Y8~pJUg@qjuAu)x}!4usZnKnZaU8SQyerNA|<^vHj--i&3EAg7yCRv+Vt-u8%m0GHzW^Qb}Y?k=a19;`y?vY5Tg9jicE^W7} z0+beD5m}7lR1JG>nqEFk+eDmW)uAxN`*Fj>m6O$oK->Z`Ubq<(kCbSuPmN$tLP+33 zxE|RapC5Thm2Vo;bt4!FM8VL5LE>F|kIrIL6UK}pYWo*lC5?`JH_~wcMn#ecEtW|T zbpfB6deWGu_Iy!Uq2EwYbDe{XRD^Y^fJe}!KX*OBgOG6U`$l@d7W~QT20Q8s(H73I z3Q~QbIwcWO>ke@-YM8-XRD-m9LA1MP9-0CT7H zMjBYaWRLzNb|o#pO?x(+5fZ;s2j>##rpd?!V!m-e!)|TS3m5_=N1X%fsGCsS-LQsb zU10Wm19bF2zIF@(x75@9C0#Ia+S_o;AU}f2xqis7SYxZ}Yvfyroh?JYmVSo9;$~8c z0sT(*SK{5aFMTfNGF+PMRX#j1DZ@E`Ep*H%X-fj(ft)&_nfY=y8}6x#X$)iA03q0A zsrXp^AEf{B4RgA+5pl7_mc?^knm$0tSI0IJwrm6lKB$`gWbtZ@Alv|ca#N@Q1RuY) zW2&*msjA$qs^IC5{rWs4y*ifUg1~5UQuT;?J`FC#l!|k|AZm*UiWNSsJ z(F3Ic`KyHEMP)S_3&W5PYb=uNS7BoBc;w)&Sg%&4(4rfupj3tIqnst#Z=_xDDoJ z7F~BnzI?J|wyuZKD&ZWYd6N2Ak`U}i(J+nOvk$QmA0p*PpIt+d!Pei3qPe$>x)TeM zD1uzT1xO9BFj}_Y)CSioPUyk@D2k>7c2&-YVLjGpbq~1e6em(_`pQvk3Im<`?4JpW zH!NmadZ(r5eww7%*JU@yZlX!%5LoCoK_P237z*m^0Q&djgx$^hc`NbW1ab%YT3bi7 zl^#fF(Yu{Gn?}poM5hmz4NzfZ_>q~QEcLW)0`Lb}OqIHw`&c8_&)V^L=T~{AjfQ8p z2kDG#J)|M32$W+E=zb!yGda?-MQ!KUysRT-kkP7OOzR41B6&gb!qS&z51uT((m z*G8$VVJDTmscL9Avx*OZq?F)ZA2YTaMMVu zxlPd=PO2sed9IP!%H5}$P17|S0mO?0`QWp#O zq6;7qcp83d3G=O@6I}s42yif+39&utxJ&6KZ5J#l3#-~DYca;8Thf)%rb6QP{IORE z+%6N2o-DZUc6+$_N%S5-Pdn|t(0%d#3;JJ%C(Dgl2ZNtP*72WY-X84#7FC-%IoUh? zhu=v}+53;%V{Gq(TCN+UIHiZ!R7u@+v$!IwwMP||>7BrCz^qkr$@F8-oh<}k8nva{ z;HYcP_ik1PBaA)j=oecGxShVfMM!cTYFP$-E1d*ph=8KSiePGT)(xu(Mr4V~Hc+yH z#u<(bBB;=8v|plYMeE}HH6&&U;GA%q+zi3%NXgQ`6_y!tcIsJCypTq8idqTv2D_jOPJ1Hoy`Y9KHORgdWyqQwa3gJ|Bp6t5vx_xYX&HhP z`GqO+kD8@gPjGFbBaxrS>L6DmJYeY<3?R^*hiwg-r}@G&!5fPO%tpz5iFHP|6GK`u zgTCIr1r)81witY#Ic?KcN~J-Y0qQy}ka^lktu9}cL`wf=D_W@WNKutl1HBtm-?-&A zQjjKnYEl$KMG|YZpL@B!S~EftQc>@>V4Jmws9bwiIZllgW9=upFE%j)6or}q%! zU1gM%bETd>Y#iy13iw>3MF?i~)QhKU%*xdh)V%sTw7V3M&A0WBRwa71*o6*?o8_^s zQTyk)yYi5X_%Vf?*hTQDN1_mu59xZn79Kq1{KLPOeC~!!=C}BH3HM-qp%Q-F)27m4 z@to!tj|F#$;NK9}**E7F_#;N${EpcP`)Aqnpn_vX%-tuR(4p<`cZ5;%@(N;f!%E%( zv2bo`ktVE<$l{9l>Xr8iI}H> zxY9N4XWJ1rw%RK&AY6{BC=e2GN1*1bl|@I(e^pI^aY$;QSq!#C#lS5im2zk){>*yd zEh8sm|J34%CuUo%B`H=qN~o^u=!6XT%N_QO?ZH@kD)uiZ6zG$udpEG1KHm_d}Hf>CBOuzWD zGycsk74IF!Y%OBt6FM)yRQvLvqQ4#Y#`(SilOeFyu=EwgUSs9U3a?WROF8)i$HS@SIU9_W+>Sq8T}=T-fVIy)F2$%sDoO^Lq( zR&QiFRc~MA&W>!lS*ai4-b0CXO+53=Gx6mWmwPTy(LiUyT54Db-O_vm79b(#k9@c(bZfVng4_-N~(?EEwJwZH=a{IkpazsA1*<%IfA!kCzv{l^Yv zX>a$hwD+h>+i!9Fhq{MZj*f9{@!W>-2rS$J0qnF)aFG`zm`5UYCXNP^EMW`!eS`Z` zT1+B~ZQ$%c;=~beZ^jq5$Wo)Jj*2?sFT=Py5MBjskZmi?-mEM_e#3xrIu3 zCrBlTV*BcuRBD;D9AuwQajEwbgckZ52Mt<$)$Xc57O8FRxs=cAZ6dwgX-ZA+A6n-o zeABi`+4h?$$r?_tT@%?ntCMH^d`=d2F>Dv}*H4{_ZvL#>@*}Dlv{1lZ?{T|Y^8>nv zk&nb8m~CVizuoPQy4 zRBP=!E*aeDLZb0pnG=t?JQUuU1F8HXZfa}ZPXa8@dM=Q^lf8j{!BLPtry5?FaMNiu z@NQ@DL(0BdQ04N6p9mP58Z6@h|gUzEj}De0)}S`P^(`PUDe>f zUxqa}cR~zvaa82FJpj>Lwwk8)Y!^)yV#?7 zGsk-x;IV&fyJ<;uEO`Ko@8Wxks{01#bLEkBHuX4#4^33wg3W3F^loaAgvqw6hIqOb@XW3EEjsScz33n8`RNGWUg+ zcGMYR{0V;4sw*rrJ3^0sL}98r^`{?C555^>xQC#Hktw1KAwo3UI2EQ4F8)+v`!W+6Vkx^& zs5H%8iG^c-em$RkDE9^bb{X(C^avr^E;1gg;~>*qLi`D}$P`vy7T;|{H6RN)^=1ad zn3T&U=g&v|4CuYPukq`A;)bd$xjk*N~=QiTdwX~ldl z_*f}q7_wY_>uq?fSe2@KHS89>wWVGF^H8wbRomm5a<{(%@caO9ZucWwt>dJ)=T@5+z(QPu1X+lAYXyaQ>6w~j!7?};^|72QeS{RY1~shhcW zt0f$&8^BC#&v8QPY}jb(1zStHsHgGg;#koUOY_cP8FOxmY9;OgI4-~{CN+?_4b~pTu zPGZGDfMD&qYJMSXKwB^$%w}(63Q*t=xZ6A{#b$p|T8{Z{N$@qW@|@Ag`#fl5AjVpS zVPD+PZJu&GA)vhLcaH(${U%15#T2!<0kszQhKGuB;btI0MRI&WmXA#B=477poGAl1- z76RE>*pvj6Rd#@O2wOrm$(bL)(^S|<8;q+iY?BrAZIhYzG?x!obx0(xZP)6hmeW(7 z(Mm$8$>XY~$)i{i#UEu_wERD8ol|3GVVk96+qP|^VmlSvwry5y+kRu)M#Z-63MOBl zOixempYZI18|zx@hK5BjY=NjyFW3MHQI0eg_+@1 zNq|<2nG*CQrKv0?D+iAgGa6KNTv3KLvE0WZ=a!iYdlj`oL3QI4=UJ>kgYU&}NoTQe zeqOy3h&dXQmp3wDGE+vhMZ2qnZ^_J#ZW+ZVR&hCPJ ziTU1->s+%skh|D8%9iX90u(OA3TGOSm)H&?({6pporV=JwUdL>)0tZuCWtR<1s2$i zH)=-YKoLtnH{7uC&j;_|^H@R5iE4yX)QM>I+mP%VzaK+4Zw6vRnLZsXtA=ZaUTHki z*BFl(;>WyHeTWb>%ZxUyGv2ZDj(-}O8E~5uY(v*Qv8RV=pTBOae*jfEWHZ|k)q@18 zs)VGnJg2C9WBoyZoaih4K4T6PG3w;?jn|o<*FK`w3_2c$6GDp7a<|f@zRRsVD2s_tfMSeH-Dx}w|JUaWof!NAMIHkML{E?g#a zMsRO=n=Jn`OR$opJ-dUk)o4X~GsL9?d7tD@jE+%{UwX*ao=BKoj+tN_jX&|-Q?e~q z0+B6}D>xKVa(=y!)Xq+6T6`RuW#$$3@5c}dcMpA`{(Eg`U+UCSG1qAQJ_plWqb$P>|Y1Sf*Y3SvrJI)<2;XI*{+3%*Z>X9R3=&z^~QyS`E*T7z@G} zD?i}DC)E6QnKSQe^CbomedDo4L(*X|%S*oMHM-(%fpW2A9u>2)_InCgW{#gQlwni3 zL<}uCv?lD}l61N2ssI~kMnSKbzt^5AX5)4ickGIZeev{twIJj%1>n_^J+dOGE7h78*-B4huvSz3q6)~Pp^B#d`mA^#X}DF#NK5FeSYLr;+Vi-l7~X?| zG;rb?U`F7)1-jKF-gWlXuWGhIh)G1_Sa`lu$zCG+(Na-&6p+Sw~4vvX2J73k|@ zjsmjQ(H)fu2c;iSXzR){zAjBKVWTNFUcc^xp#Jfg#X@6_`-L)GUo zBlm6I-TtUb3Roi~sM1`0J2v9Hy!cYS@2Ifn-LYmeESqq9zuhuCaUfxKTq&s#r@GS5 zn)%k6?1Yf@vOIN+!gEt#7;D%n7FwccZ-X)uTx!kL!1)s$#I5vPC(w>9L9B)<2m?J7 zuq}3;Ei?!x3)h6Dy?Hk##H?c;Qy4&pu>li=b(zq@t1AGqPAvmCrLee@%M!e?q5bEs z9-9FqxA4K!EJd&)=8z`*v=Yh*jj>@aa+8^BUS=Ios6jOTA<*C$OB-1Bu!a}6{u*(y zf8@lE(;D$DG@zhUMbXk#EEs;fe=zO9W6zBzPS5`BzvlHVV;!27YTtxeJb5+0K2<5$ zj5E_9k~Vj02;gEhAA<8J>1O&a3*$6J4|S7ep>BgVDRhJ?Fs=IU+yKT;UnU;JhE4i| zn8_ghumk^JUuI$NX6p3Qkk$MQst5nYm?5o4gSDL>&qV{HuNG!4{Yrr65b}ls32YH( z!PR7woRC5~>~()dGo@XVYxUO9$c8lUc{2->T5JOUZTkic_U6(|V$*tK0NGs_b=M=A zm_?stGI~(}3c&p6vx*nB4Fv0?qnw_zZxXOp9`T$soI~KCrF*M=y%Af}{e|wQ73j5I zlqmQOSP`%|VW5GLO{5N6;|jG#jY*Rp2DdG+;O!~Z9e&3l=sphRZhWVu=i)$_W2qQQ z;(R^tsZ&DO^ZOY~TX6-`baEaF>IAJ57DX-#V6FTuh}0vBk`SU|gn<*?AxSoC*{E4SO*Qw%gHJ z_9W8Nl=e|{QOG&a7<-~;BfBr}b3AR6sGp1r!Om|<N4X}r)f1(j}8Q{c~hZGO!@31V_7ai zPVH_`87JDSiNB;Hm_oC$1AnRk+3hC`uR>xsR@@nS#@yYTCw?L~{b;?0!d^{9_(tL3 z96g^NCoz7w8kmd$WFQ=Wx-?VKSm_*5KE0DAm!DO8nI>QeQu4>P9g;PpeU@t9%%JKD z=8c?utkClxiLw45f3n4BZitXu?1Z6BjdIOxaWeQ5t*r%Ri*fEQLddSfFo%*7#+`I& z-qcZay4(;n7A2?|Qu?<*kQ%tzD74&iA1^(GgZB`4A+;ToU7xY1*q~^L~*2hF=emgSN_+KUD%c!+tkV~&x5Wy7b(BZ>~-zOTPcI)X?f z=UpsAUs1*_CqbmA&$9LNa{Z~L%PD>^j?j_y9zV9&q4^HnF5Bx7zT2UU;$V=o_3CqP zV|WA{}adh z&l1q~C+o}RKibZfrlvhECz9XJ500fr_A9}=n4YLl43yGR`H;z|l`*U#->rRNV6&wD za9t_(dmCHN`_$#C4PFtr#ch0#dC*?KuNWe@e{oSgo@E-PKRGDb+%w}sCl!W8Q;0<6 zOUS{;+t&@of;Y18L98}E-jcqp`2;2%Nzw5)?3PQ|aPvO(lv+L}Uh0$VA@mb)P>lzB zEDLo=*Z5?6jk(qXjKsb)E8A~ao2)cNvPseuI*)#e$-iVca1ct#zX1VUGgi9le6hrg zsepyP31urYvm2ZBBuDWA3pmXgF&>E01-|)~Ks|a(;hAUG+DEKy`U)9}kz2T{5eGd+ z@VLTLWW1dd$bY;C(D2|26oRsjZb74wbnM@Ak>}lUxStqaaddF<4*dEl8QJ0igm7<> zx(0^9onvWjED>=7U8EFamleyu1*+K9Td;kXD$}Kq)2eq>Jfgpa%*>imFXPK39>wlj z+zK)r@qruC2e_+_b+S-`#z4Zrwfp}JYHK=;v6jn#<)W?d;fFr2QpUdEe2Zvevg$R5 z!>if)JLLaen5sQEipS|QkAw7~!t9!0r$rUG?~h#O``r=c<>!HsP+~X_xI#3O#4x@>eksT_^X%`NA=B3Pe&>mu#7X+46W-9}|{e*ucy#Yr3Ud0h)Ss zw(*O3CU&Y+;6a|nZV67`$wMMCt=wMd4PSCUlE_f4CPVzL2P=eUWoei#R3Kp4S!&W~ zf>(`8)i^CJlCHze@2-QXDVJB)_KEwv)7s-21ek{+S!=H4LtDaSG7eUOIL*Pv(=9^7 zv`uITTc)X6ZB+8k3$4b}&0#WJ3aGIRf3Qr^q@(y1fvK2lKO}c_%L=BguJlCD71e}| z#C&RrQtqyNr4J0lmN7BX-Oc31Tv~i_HC@NfB0O+w2PH7F`gS=s*6)RkfxjhDO8(q5EEg#k^x5T$Oluq$>S~g8Epvs z3IT4u>pcPxe2xpzhhBkAK4eLLf=(+t^R{4M7H(f+KEKz5A(jqZTY3v%(0FUxrz7XN zJ?dwe{!N11QiTPcP@A$x_z@;s-=1VH?=Gc;TF#3$cR&B|>mZWxO!mESaGGJ~)5JIW zNi%_p1OaPMR=R=IT9a93ttmwsbk?O|X8^f^FFYPunqbq zjZ2aSuCub#!H6)%ffzAvQ*)t-PF)UH6c7F`?99YaA8yVueCTxwah_K@vYxaXH{5re zBPDoK99Mto!J;bWfe1A*A>!5Vev6Z2qt{GW&=Feq4lXcC0{f{C`g0p`bMi#d&w=Mf zh0!u!&WpsO?bBdg8c7M0+!WHcZY7g{?Z;~JkHa&PCfLs6Ij-}n`~qK(_r!`VGV5Qm zF6B=5+mWvI_QuRB2PA%gyZpsWdoy+9I(4H|`Ip|f*Z5)pelr3kxU;pWy5Tg(vBMMP zRsym4fnshGrmRrl*ksyQ)U@t6{F{jYHmL28By|PORiD}73`l9Szg0&+t1|#e5i2Zp zr$33vRJ+2e*lD|m`trw%VC8@Chr`u>esZMF_nh}v6T1M|(;}S~k}m(=(yQ2h^?76? zO+@3!_5B{(l7dQG!rv4vK*%VD38Gww8kGwJdXAgL7Fni7XogXrMhe}vg#4BggKg>M zcWvpLe$FOH&6Vn#CyYnxAIzZ!#B35q%PIM=^u%VcyMIIclp_egcWh`_Dq!{2w=l7L zKBXm#U3Z*7UMW4@qodV};HZ@4x0`}n48_CAG--+Uo{KKN-_cJ~hc;bH)-&HmR6pHr zxU;JIEN@8a%{^Y($La0lSL~U2t6IM)8jZI%&D^3^Mxd(Nd%q3VAMvAFwv+wjuK$G= z>enluNZGt>hhsq7J#P0;Ve$&qf9te7nU4lN>n?3{@8A*&)9W}VHZ*Lwe6!d~GJ^w9 za`n~Z;ov@JGr@A;>7s@dDI=gLd=$(JhP>PjBs6Or=6xwC0qF^ZwX4tba9YQcb(1|`!>aPQMbrn9gD_abLp*8zD7_7$ET$Ot-Ck1?IodePey zRdyD#VoT5>ZTvSLUyO+_TVQ{8{%wPzY@Hp9N;Rw>hFI|#}R#*O$i z{f7-TLqBS%(Wg*7zAWR%<8b0tSjVNQiU{Y;n(nAJySM8u;ETpXonU*(TQgdAUWD&K z{u#36CbIyYDiNnLx1tAk!tZnlE|f<&scRKxmnJgYi_0z(#w2_8q;aj*I<}h14OE^A zCb{{{YviKij5}7nF~hPdlYgvXMMZ0ykfy!iptrB+`jk)$%Va}P@^9Vh2?q6@)v(?q zfK!%rF19=wzdfD#wad*(Z_!9?$;&nkkE3U?%FWk`T^naYgi&Q6KdbbeII6|^i(Xo! z(5{2D?$95Ms(GP`f2<{d9Eyx1YcEl3)T#5?hVdP7sf8*klTgT*3$AH00Bi>X-9|M~ z7>!pm68wu+@wfG63i{wRdy;Ff2_|EMrJPfryw0G_NQRXdwL73V;g8xNwlZ+iK!U%E zcge{vB~cu1-}?xb2Kgq|^gU=|RrmpfT4P(sssnk;WQG!hjW539Cj?o9@7fZ-#uW^B z3cZ#_qp|O(X(+FcdwjPdJ);l@@83D2{nckgHh4Kh36zG!KT)<|BEOmI<@N!#A(gY< z6xaP~ELDi_*PW1z7`3QwAeiS*9WGma;1$Nk;>CUZA11aSRCpp)t#XF~Y$cXr-RbTR z=%CtGEky1DgzsC!I}Iflnes;u@=fS;?mZ?t4Z5;;x^lJE69&p{RUH7KD zzv+C-=Mr1I#y!C)Lr_t#l>F$3tsa`dP0V&CDNhtnnQs6>Q(-)G7V;unzhoxf>9L~{ z5%AjaG!IGIiYS->navCXclsC4h^H>752@Wtls_+4+$=@2KBWa(BGbcRL_o8jq@4)> ziz*Ym{Lw<`X}6$7;+UIe>8hl@=EoC#*wIJLH}s$Bt*cQb9OqSJ%wvqXGVWMLs(S z$v+hK%~qjOCdDDP!qz`By5sP{XtNL8-lN!T$ys~JoE zUph%V3LtLHGbI4C#2ER=Y9tP~ozUdl01CO(Nh8u=VU?cJ5Tyf`S(&E}FXW)r(;(2o zjTqZ|aB>jKIDQyNLM}X9#j_Hk#6YE zuwP_*fp;c6i@7xo(itW{wc>c0g+5`pv=_$k(@E_4 zDObbhcNJ52L%;__H8EZ{s^6pO{Yu+ER-Ei?ZdDc`sh)ZI6rn?aIL}3a{Dd!f8T)gV zxXp;)Jqe!A9O6aB3SM#^Ty3ZT6%=4J0KEUR#Yfy=l7zw9Tt#6tKcue0!fZ*L_u{e zAl&4UNPqWySd0e=&$$aUilXSSXAG@EJ|%C{6%||r(pc_{{TBrDoK-V1&5&`Bz7fy2 z4ZCQ|mJ2zdXCaJj*%Ux^RDFsb;9f7U#mq{Jp(prG2@_vwZ&;#%9Y#xPTiXj4eyZU+ z((_zss%|abHWQ9*D%>54ZYRzpD)ExVCPie_-TpLc!*>?z0MgjNBDJFg0e<%Qu9T`Y zqBO6evI??IA{hM*yvyb5IFjW~PH zuv*fUtC9i8@z13h`$14Y_CFnULAkaO(mHyMKdDJhNM zSE1$Lbh&XO2XaZ1IWi(f)hXy&HG8xqB^a+;Wh5=%QnY?8R+n(EPRXCgg!#}I94oB> z5p?TqyffQ~6YSjQq6zGWm^xW3I)jP=<<+|xo<6GCLhv#J383oC%!PraO*~VdO|L!d zImX&{;xRpk0Rmej)KT+M$*>w+;erYw3?>zO`9@JB7oGT^>BNI=gVPxesoYt)%sE|b zTn?D~5Ebu_qXu5xvy7LFyOw9W0V@+3UK|)wYnt;$8W@IirXL(-BXEU8TUC*G#Ga}^%mwVIn>?~PS7Ri*LjY{;X+65o2y-rBdA z+p`c>Wk%Gg;1uP!u0q+WfYJ+tImU4e3hKE3rh(Zz(X;FJZqBi1YE;fLv7vWvW09|N zHUbq=qtcQv(~5TBr{lDuYb;h}hlxA3$3cHxjY zt}+ctIz^567~k{4z+a_M1%{sszVtZVpCXZNPzQ>n3`k%S3vH4G=kr4D%0$m4g6@t? zm~!RbCfn3r)1wc|rqoq@5`*dBTfl@Pn7ZE5)AiJ1gb!`)$a42*6@Z$nH0kyuCEtfw zu9Bo{RQeH++J~wdZwjNVHQCPcU;+t@;3Y+0(ti`|a`CwINv66`{I;NydWT9#te|SQ zQwdjzfT54L3ze!(q$%enx8!)Yi=4#Pfk3@{hI8oJu0x624PE94-)CR4Z`TKEO+2)>n~S_xhdHN z$aRqu@9 zDPgqu@`%)Ha+X%wdQXApVC`5i8e)D)7+k$c%hed=2K$sD7Q?PcXv=m zN>}ZnuH)W+ZoM1-jIxIP2Bok{5_$|;O!wpNxurbP$~KnSi75m)hOv8p?6tUA7A%e@ zXmc*qJ9RXmcniE)N=e1Y(mHlt?ds7*sO4pjMPO*RljZ?=pB^GY{hoWEEUr41 zkHP8(n3-R0h|mI&NM6(SEA8zqc{96+@hL^T)ZG#p{zb$s&`zD`&Z*x}+ zD3PQoc8!0vBM*~1<+Qi7#SWOU3sCf2YAG#f^og|^{AeLyZ^wK#^Mbs4O1RC~ z#=0AnsB1s1{qLg6f3|-cXLp{vKlmRA|Nq9!I-A-W{@C}8fA)WWajoq)#(w7DMPGc=aLgGC_l=InR(j3;sc%F{d2_Bw(Ojo zoR_KELMon>oGp1HAXwHG3z+KlqsR^M%j15dBpTO2qzEdihTM&oMk5AkB~l1ncpa8f&NSnekb92bwUvd5&dmH7q^X1qI4G6tpVtiFf$X?;kTWPMrpNU!YC!1 z#AlU=kGGb$B{{%2hCgtQto{a>Ag z00Yzrq1EC1yI@lj8_;a-+E?_t5U~P!D$fBJ@H7hx+U-Pqo3@E{fM7br;Qn=ecfcn< zP znjAR8i2e^2MO64JjTkxN7{J?jGQSpF(tI&nP=MxJavX_QA#!`mi^8aXY=K9$lE<>A zRlA14HpfPwx4lNs_=MJbo{76dJLEB+u{j^3kfzh>h^Cr8jS2IqWaHL-$L?^lC*1EK zFna2E^w;tG>PgmmcqT2c#$?h&FmsMo*)Xe@npZHP*0!F&5Fk4V0JlyPdHiknE1UUZ zR>N8F>6z>9RT)!A?9QfugC#xx?kr@ph*M|EplEO}2FtwN7lDMfBP4F-v!;*ty zhIU5;O0~`oa8LZTG%5zbgdPUa7XjLN^$rtDt%`;V$g?n zPUi`DN0U=ULd$zuRrm?!Dwo2NGC3~X50K2Nh695PD3K3C&2kj6+T=&F>jXa}Y9&spWJLxyTxQR)`4@-(V`{CKH^J%2=-&iu&qzBzuy=dyX zbqwcW1POM1sx1Ysoz_ngIaLHpja<`=1uoCX7S{+z*Nmg@ndah*85~DD#3dV znF7pjkJ3K*qg||N0!88O`+r;i0dN+I!iGkv1R<2e*ph)#VTep8VYAzsMqP)HdhoyH z21nAg6c!!b6TL_mkzW$l<8Wk6kSkslf`d=>w{CrQ5HE3Cu0ja(fkkU}HT#KHE71O_ zR_aL;q7rKXS1Lh+qjQKm!g@|dGB6@k2@WJIj4pZ8pGT>Fyy&}4_e{4(C{$oq`$%#E z@y>pMRe)F2pb2DCB0ary!U?O%4zbQ_B4apFUil+(J3>Ydx{U?()n@mE8e~rVaRD@4 z{?%c}!o7$HTuQbkfh_Zv#AUvJT6(Po7G873!C6w_V^4OYEZMzx5kSRhi2uyd#+#b! z!M<7B+^%UaQ8;K(HAy1_>}#{JNsjqyG1}fx??+5Y1?7O%U79^eCRMhrX z$GgF@1F*bssLS-mAYd6msDq_jZgL}6xr%>RW!Ez+UW}Q_&2z|Pl(sZQ1cx|Y65J>K-p9uxWNlhKQ3-w0@8t`xlrAo6l@tY9@_Yz9upA!rwX!k2KU|D7J82P@Sl zjBx|qXVKjN&uD(&x+Q+F12J%6LHHX5`vZitoqn+thA!}AAOOZDDT;2_LKAwYMU|NS zN}1UCccx+tjT2^wLO$AK-Q;`VWR|TVGVC@k*HNk{o4fwF*f%JhD~4gGwyOUPDN8ZA zc|j)R<}x*<1{n#c+YGCvTF(w@nUS#-`=xnYS%HF^^K9b1J|6{c4DyC>sq0h8=d0FW z-POg)7$WLgx(eK-8Q%q6P2h)QWqN}HjnhG&N9o$eq$-z%KX;0OYrsHTwguGi_(tw z;^XpvIewc|M>Hw@3^=k~bRF#yceNi)$8aI__bCPUu4Z+a+$DkR^@$dF=DvGs^_yfc z>BFa<{|gMfW%NpzlAOo8@V(j4od@sp^{Us(`Ekm&ehZ~mfibDkXrv}-!N8<*cFW?I zjL)ELDgZ9!=Qvu(&SqR~0zVK|Ol%E%r>y4L`YdR6c>;e(-z7#-D*_plNv`Z5*h20= zn2It>H=?@6laXv8T0EM2B7OfM=myv7N?6UzAZm%6b_S^z;jX#8Y{h);HED zI~RC@{id;Jw1tBtL%3kc;#zJ!1^CK#N+PN-WAjQG=>G`5S62V zOG*m*GO1Y!V7voKYF;$wUqR$tx^DTZEOQ=&+N5V>O4XYVb6!_Fm(}!>MQZ#np7ig` za4Ll%+$_Ji;gMj8&~!mT&2&D2lA_OOP;8{RzgHf5wbFyv26IgO9el;^yw03vT{4x& znXPxC*(<>lGmPQLOGh$ztcr{u(?{^D!rdnn(~R7Gc8<~ehn!1Y!3$us&#YHlhrc9Y z|MJfD$K_FswZ`Rj9pbAOHX8OT`NjoUc@S?aY>=DNu?W}>zg`^?ioQ7p)qS0%HJR{CDD&Bo06IFn|B*(;F zYa?ko3&GUFSBOKKfuLMkK3HoU=A{L3OK{b@U}uzUhC8FEA17FWxlJR!maIX6e61%Y zilwJiDKb7(_Zl*0qnQ3d>E;4^ss>i>DK39;7|s&db7;-)9ZT43Mm}+s#WMt-

l$ z@#^SRLv}+g*)H;Or6=}OBH2Ll4qfn+DRoVeLM<*pUzBY+PLxt^g-v6YhC%~KFW*kB zOnx61a=}SFE zWHKp}MEsr>2H;mVAJj#_XEFjVzuI>>%yQ@}FXH`4pFidoKA@yLtPuRog= zclgJx0B^b)^wF8}VaLRbunilWJrFDOpM_&jSjiO`aDiRU z`nDm=O!V`U>6T6w=J73JB7r6YE|)Em?4dGmnYW3^WNw2y5t9`DgD2OWfCU`6xt8fhG>NgI#Cj`KnL(9$*Hs(W zUFYYeHandiGye_VuZyIFFhkmYFUE!QUGM$Ioku8hH@z2QI2+-Yq>Tb-<~G1I0f#S~ zH_gzM*JLC7i(_LYPkJu_)!hk5GJyq=XQ005Dcug7yGNY62J$V*>VO07EyHRUXFJUe zJ!}$uYBWaNY|OC`mX_m3=xb;&YNE|`)x*bL8iKt18SKEAeng|V?%oqpPpeqJJrT-E z@+pv6$wOrfkvF#Z%PxU;!`@p-dDrO+!Vq}_Ky9U@e9j=(L-{H5j^H1w%R^gS3^ue0 zgejrIV|y!gnjz*sq>vRf=zvzln6H$m+dtU#JfRAmeljd4Tu{!-d5<7>h;n=>@}lHk zO|eplt#1oGo*o6XU$s>AViXO6#w(S;J4&X!-i;K!sX zM&#&`53?g66xwKPY6BLgPSN`H_OfaDi5F&<2cOiPc^~V^u5|PA0%EAzMXIr6R$gm; zN!cu0sTjQc+9a5_M2CU857vP-K`g)XXK8%Oib+4>8z1XjR({2;uSa{`f-3O_hA&A3BgLZr(@f??E?SYmKjijZIXF zsJkWPxQqISD~nKN4N*7AniA%U6fi;Y-3PsM2eAOZgAjq^ZtoFe@;}3)O5?!|1@3C$ zmWw)7GsO(mJ?N%B9K_zREae1Qww4$z-@>Z+TK67!l;N2JV0(n|*AB>aH>o4)Z-~{C zx)LjVCsCoW9C3#qE?)zU9 zqeBLox*RS1u$B!!tR>d}$67jj+8Wva7Zp^Zu5G`;f#$bTN7NK9nNigNs$~PAN<`H` z@@oY`?!^QPgkFh5J@ZkLCKCCu_vV&DDRluug8(myZ{m_H z)~*(r9}k-wskWgs2=m7nsDREBQECi^U(!|tuOxW`oq21{ZN;mHE3gC;iPjI;R_T!m z0@0Bvz~i*YXF&04*PkjgBTT99`9_4}_|M^bU+pBQ1Q5N^yF*%Y#` z+M`rXyo}YWKfr1O$QS%Y8-0GD5-<4*5=1aBgcS?0k8c)W#>dT=q0Aa8w_8bDT6qIi z3&Q*O)NZmi%vDtOVlvq%P+wi;(l7<;P}v%*7yi5?3iEHcK&s8jmL%%@OV+_3I2IhZOnXVYL&%``s}bx- z4-xA~v8P!4ii*lU?;SRAx6FqW>?jf>>xaf_%h{kNYrM8GqNOHo9& zGS)CUBFo;XwCyVLXJ~o2?!w0*IEoq~Lkd)^D7rArN*i#q-p~Zn_C5RsbR3pMIbzrb z$~ckB-nNl_=D;x4%k0G`vEM9XmqvlhmecjhXCNg73yahF_5zAyOJMga;t7pVwhF2J zsYlfd`Rv_vv6MwBr45@Ludnnw{q4Fp|u-GMqsw6?@7_5mcr{Xl|yX^?Otl( zn%QLSi=VLXzaEL!S1n+9nh<1`UJ{%(g#kXrf1T3!o2l<3t&B4c3a`?UH(AIz`-tG_ zSPe{Uaqf8s3_}Tw@f9p@c27w6#-3*5vr;njiGW>$Rdmx9VJpgS_(PPN2h!?ib1yvxyrSgc{>cRa z&{sM(jWSgbA5s5tP+x&{Q9z=MpHKv|I`sfssXb;gPN_@!y-6!)u4|tdpjQq40Kq)a zc*-DQKko$8$jM(0_{jf`AYTs235x{=dyctRYQ=d9RvbNoS{rCjlG6&$P${{JjvmjN zSIUe#OR^;``OFsJd8K95nXCiga*q4BL?MOpErxJr2=efx&zL~apqkEGhKGp|XCS|^ z67fL>xcXtG3A%h}midNt{6NNIEgM2PtA#>w6zn9y4`qH`&!7+c^I9)1%xK?#@5aJB z4JbnuIDar5GIrYv`$7k19JblMCW1E*ju0h#Cwdb61SMV!SnKd{=U#b?FMkXIOj6V^ zuvIt}OSqZe);3>S z|BUu=C2@|3JeX3rPAxXM1!4XJNFX@KFFueR$hQpnNGwmv1+3@VD&?yir)u@ZAsHkw znCHCUTO+Ba3^D!wP-Zl|z!_cta|x(quHuFGECpa{-ehQJ-c#jx>*vnK%k8A_&!|yV zQz4F^W#QN3w5ZbLjE-SaFsyMp(O?VM;Qq;<9Vju>`&CVpKJhyZv0+0M`6T{<F7-dWS4R`#JGV++TJG$o>LRa|z9BBS0b@itrLWNCR#>J|Y z9d=bND{7W@l=PMHpcHqt{bL-qfU1AJH?N?e&{rMdcR>xlGjDJ1J+fF~`sk*rS4wNk z?S);b<%b{@^g`T>W3kBT?S5S{!3OOmD8IWbu!UlF>zasAY@w|quw-U~SRbig^~uyF zk;XNv(hQu0usAwfnuM%&kW`(O48^}k{K9RpCHLNOYd9^sS5n=YKthDD!h{rI^a|)` zwg18-nntL>W5Vop98a7hfN+kMY;74do4Rb-`i3ZU0zow)bAE?Pr{sr=)ArK|$@IPa z5eu?oGWM8`V9f;q4;h!gG*@28v?7k?hYCU^jn-?(*P21fGoMirC)PnElq@b7BF5;)Fvj< znbWoWxl1S_8c2$~7RcHpX4T6~Qw}7nbJ=N})tZorOG{e0zl4lEY(_{QgCi0$PhZp) zPQum%oL>${7>@VHaC*+Y=0w7yI&%Wf;S2Nc=n1dlPn@=RcVxA{6tU`_IHYSly{vsCTdPQ9SkEFPg1StOlHevgQZ@_yyYn%yfi+!v-&r7qY3EI-Uq zEb;QU$@hfU82h}EmmD?}2?DTpXPt-40dCiZavi|- z?RyH)L3xASg3Q7ZcQS^yee=z3IygqCd*PO)ZC-)O!L&-u7@R_<=ZRvck>ES~iAw8g z-ul39Ql+qW?I+}4oA0L$?yvo{`Q-q1IdM`&1;9h^@h6b9Ubp;zhh%oVd1#m zlDo{!^7kY16i7257XEUkeG-y6cMw~l`B5L(gIOuE6i&ThS_rOr<-OJ7aPxM!j%Qc~ zRSl*-8!eU;|1sgNLthg@zQj3#;d=!kKgwMpV=@%1DY{VyDqt zxb=y=ZYrWx-WbX*s6+A>zqW}X7nNkBcdiYdh!j|33|WJKu+r8fOQZH0K*z?|rr)xG ze#EC&P;~KhG|YKWi6Yn)Cfm+WokiM=-~dWZ18S)B*fI{hV?$?hTai`o3ty0k+f#IA zN1VNTd*@Tg;-@ldqIj4DOXwK8d?gWBe(p6|%efJZJ~Evm7=mgu5J(t_T?Jbkq;Uvu zK~@;6W6)DZEUy#DEp~)-J zi~n*wFv`Lk&Y1zetS-UVixyEh>%?*xHnn0fzb(l*{qq9%h}M44P0DG~H{9kYk1zqX z`AFO&Q8IK>as;*RW8ug#p*>2LoL@k$ABr*xM0Vha?1VlsNnLJBdL&;gSeP?6XyGcU zz~b~N!v~HEC$1*ZE9=H>xpbQ;n8+9C#5O>H-o$2#aJ>QY#Yq;FB1p9Onw8CVp6;mM z!t-Q#8*_OAX;@}WxLHP)!6+t!z*`tI6=~Pzay4r+CVk4cg}5hOSCHtq*(_;p5VoAn z`WX%x1|jFT=H9PA`W^fQx!%^40^`RP(?9;*Qj{u;UJ=_V8TXnd&uatB2$SuuJM*z% zbr;9ZFx~$ARrRZ|EUHV+x$NXIyuTR)|*WK6$dg=fGde-!AQI-(<;@$b{Xa-LS2F-Mo=ea z8#DhW>8bao`oK3^x`Kga;RWZdae0OiV~+cK!xo7~nyCz>6%mWOaT!O2+^T@8Qtv%P z47lKm3c3wTDjO`yX3Av*^hQuhVRkn*QNlxOM^_J2T){LVYyQAHg1no>qeLYbMt3C; zeb8!vsZK;x{I*$U29_d>#bKpQ2%EYf%`5;(b1vl<`lkN}2yR*~E}f7AErn_)QwEFZ zmDZSZeI2o%hrK=ADNnFHmDLLh$}mPzU$jhrMIY9UV|*qFU9DR9DrpQ?78iC=vPOGP zLWb{0!&jFUgIeq4+xeoAr#)lev$C$bS=KmU@hdRo2C$7PLA85x4s*^ho#dg zm5mOK`+_-Lb6KVGo>ac@JT=5iM&qylHq|{zGU`Nq=8#+_(7)jr`8JAl3jk+f&-&b}h-e~?s{V*?8(Ncc3nI7PRZ^P$a z!1YTHzIHsg&3hNFyxddaRc=mQ6RBr!WY(0k49k1sx%_;L$k2{Eq;;NGF>RDJe$;c$mY zknTajyxFrO?bnOIpZAj{jlX*)SY!6@;I>VQ%(>kjJG3y$Q_yezd+H-SfDanB* zk9;)K#ZAoOZXLC8QV8o7OcTamIIG@Re9QcmL>>pjd;6p(3whbENe3JVhC2P|`&*tKWS@LQi+DzVFSb}TmAdU0s zPfZ&F3|;gbFd<|LTL*l>s~oTRNt{B=Yb4?fgp&jqdl~Msj>R}MA8|ABl7df0#|6Je z{XrJ7N>R@}gh6AX30{>+jMsn+;ygPNjeHq&XgyoA>oA8pQrCRm7;=ujQ)Ac^yoXQ; zM1l(BEa}SXx8WFVjvMhX)HwKO#Yy|X+JqX?i%HXK9(+<60v{wtlPtgNpy4WVij$;Z z>qAC5&MesSk27%s-B@JGG<$^*>sjN(*SN6%0ESs`NkniZ4}z;)xPvLO9>y&KT)pPd zrXJ}!Sp>DEPBsor1SY-MZu_bcokeeXlz;+LQ3A;ZgHxvGTnLq>KaVkLtEa}xWd6cm>OxDPwY^(wfM$TM0GN$Tg{0;>k z<=u~j0M12oU0YK30|RAQY;;gNB1ycQFzj@bSMKfcxgcgEZT;gtf4inqPaZ(LoCWd} z)|;B)OMj(*07;%{@=ycr@$jf5tsrG7)=W7P3IuY=zpthzlfa09->NZT*8^h}a=VA_ z!4Ve!(P{U64A2C{cqW#xX7cVILf>l#tdScqnkQWxj()CCj#BQ-vq;UJq-iD!ZwwEA zScUa1OnM4{ycd^<((4>m!HNfIimqft``C0yGZ|LNEoqzv`ueoSp1!k@F@>>+>Yjmh z&r-O__>B)1mF@V4Yrx2>>humDS5$gA>m=l@PWlJed(NSxWO*^`^sDOKmE*Uc`hcZ8|ByZk=@Q$Vc0jA$Y7yr%{hhY~C6 z&Mh;Lsb)k!j5V}T^b%ba`h9vvZ;%2&PMQM!x**0(6f1mz1-ifi@>R3ZZnR7xV)t06 zPk&%TTjq;q%Xts@;XAjIMyiXO@@&+^BCSUj?({YRB&ZWIV(+pQZ0Rxsy$jz;MA&f79BBFIr}io79b}PuE^OAGaL<(fM33$FQp{~91n&Yc7h3zGe{xt zE|BjD#XfrEVHF>0p`?brfRv;=KsmW$4QYvZl)|Ya1#XBHy4w)s;r`qGk3o;vHHBVx zbl#{xAWp4EkN!oAP+bu!nhliB0AU<2ev1!SdcXnbQ43n+0~u$HoS;d_H$apG$&LuP z|8}{CEsW&e`TO*`PaAt184~~I_be^S`GQ#%Of|KPz%>8HJtDY0XjJ%}k#mr9w)L+@ zr=!3$?57j!7vu6gpXMtgCj&MpwCCe|s1X4nc9|`*bcKwRMy^w2z{62cYIOyoAb&Mq zzK5fng*!-NC9DZqL{f@3@W)jHMc`Tgi5@0sVlsh^n@ho2V?Kn974aD5Ks zECHfxL1HP6*vQ~=o(|u`mTS$2hy>yJkm583=Ng+M-Mj`jPC)|3$4QfpTP>swRKiRq zQzVz^e3d2puZ~-K+aqDF$W|-tDD4MoUs1T?C{F5PN`(o}X#h_F1r$S@6rCMV$T4Wc z4=QBd3HI9{oPHQ+f(JVHk$u9LrXMUicPRWt`T=1M*UKf!7cgE4jOBziXrW2(!mf-f z5Rb<&3txBsV56?4Mk(2O)% zqwIYBPD|GaxqJdT20omp6F7t?bmlxqt%@b?VYgEw@9C2fZGn7xL4C>yH~NsgwaZ7C zX^vYm6f=?|kllKSYDzxSyn!7uBp3p54A;=c6j3yI;cB3nVuYSvtBt&kC)dJZ8=C7f zM_JE?7r=4>86?`G#qHzaQ&HHVun`{)O5T;jgsmIkmh0n%(&aI5ouYeW0o!)ZAiiM^5o=SJIwQ~Hkx|4XLGZlS{jTjl^!f4#iQ)% zmwB2DhgThomHllEq)|}8_t|wJ^6?8|9u~&LO)Pw$IqXI=P8=7TEkRu@)h2xvum?ar zpX9jl!uj$%g*|u({M>=+t#*cKm>EFKy6?K}1f^m#{GG2l#T3S(^gJDi*wCmt+%2}?^|kgg%b6F zwcGICS9muS+_u1I%eb{|rq{=5CT?Z{1wvo}%{})y8ohDj^n5-Uol$FakE8a1-svvA zNpAE2-zG}y*L?Q<^XE!*YiZqI!#cD%s-pWhJj$nMO{>SyC_ZZVgD6UHBu^f|F8ysf zs1)-xj0xduTOaKi*IaOL&!xx=3MVS2J&Y92a~b8|0;?%p0$Y z9&4y!SOD;fV!niPk?1J8sMDkehADJ$7iqS99QcwAp=C1{Z+>Hyzjam)E+`}DRTJ_% zSgy0Pmw!IpCiA7a6aK}AuXc-I78X_7s&Pp|5bUm>E z2O9|41z8?n{LriqrUgnlV>p^noie&Wwr(4@-Q)$F5t4H_#K4|CA47?qcCyp`>pY(| z-IiOojbPjMT}l``{Fr;#X0aZknnY=!2=~73K`Y$B(jI7kN^ArUOlPFaghhkjlN$}+ zU!dx#P0-qHemA5NHCqhF>e({l_F+M{gOL zNJRLSx2o>woH%572n-h!=_F}MCs^awz)W?^=_U18RdnJWVv zH=ZhjKOJ7DvooLUPj~6@764zc!o~$hQuYR5)s*`<5oLxp(~+w=(jVx%$D~PuJ22>L z5UWQSoKoVb%b>;v6WGvrvMw%2;~SaD$J#cCj;C};>c~xMVQ@tp&*yMlphFYiqs~ZI zLs%CP*wO>K=FLGESq+7_%WdUc&_fg94Brwmb;YISVz2t;<{&w;r8D6Ys z?~5?AqDrq=Pm3cX)hY@<%V&53(Qz$VgFQr=9{(I`BT>cjK~y;V1^ zE`i0WWfOD1UETMxY;+@xJiOwt3+C`rDFrm+Q=_?EJKhDK$M;=66R$~}V<38#zRgbJ zb?Eq`c1@VX!})ASj;eTMR`T6wy<}popaNPVg8~lL{V(_S_Tp`7npSL}?@>8nvOl>> zuM4t#QRU)2H#+IQ^*xM2e50`^`LrP-MdK_^NL&GL4}m7!+WZ$}vdwH-6o{z1v>CNp zf~z^Nzz10Hir9~gxURI9Af%bzobK?chPmDVPW5>0wR3i>_+Slo&t1f;R%~LKU4Uo} zivxba`oun>Y@8P$wZ}ODi_iA53rN>ZTZX6@BL`Tm>BWymx9{y;2(AAu=3CubkAh(RG8@OUYFXDj z9pERT8T!Y6{fAZ!eWymzR%swN7S&IgM|zzBCh4e+Dvkpol#=8vLKQySi#J;tM#HnwBHef}e&xe%4@ znnZN4))TWCI#wL#IJ1Ymeasc0@Ggc=Ru}2{ao& zlXKvwwtJ>Ve^R?=!r&q7o=4gFd_6-GHrZpRf^0aN6H!$Ury5x_yxZ*euq*N z;bqH9vKEBmlL-(AP}rz=P>W3E2UTs8NiL$cj}49snJgA0b+O7)ZpthRn29pad@+Y* zfUd-J9=yhj+a$Y$GF;y^I%H^txqKrKR~Ok#tPs%ZOAdW)v3taCW&ypQ|^-T)i9cXD;tXkS_2ybN$R?Ep32ShQe`9hPBlsWp4 zq88D*NH!p;Xwr?o*~O)|yPEHc)k$I~Ci!M>FVT}2hn<)OkSwlO7xP&g>7f`d^TkTd zBmIqA_=%_#b|3D_eA!;l>^;u4wz6@1oE%(a!}shq237JxO$WUWo1zitW2c;RG}5Ez zT``ne%?Ik8Ws>wrxFuit^Xq`;(dK*ZLrq2+6zzh>{>x(M-Yx9wD1DJFF%hyM_O1n} z`tJNOOE`vYIox!ES)!T-Ze6*EGimU8<3|H+<>~l|8$PTEbucaZ>bv2&Q5!Xbfw~7J zCq-G4w9bXUjuY6i{V~sNh10%F^}qLrPxgf;H@yE zrg|%W>w|PO!ng9XWlgD%8hT7z);yZll3YslEL(F#^fl0&^6-zZ|dovJZ_{%b0X zFa9KXt_#^#JGyEs+Q~i{i-<}tOVUuOAx-jm0_1rsFl&xrRV&K_cx}2qx1FKO7C5M-lZhjOSb%(DEQ3QbIlF1_^!{y~p(!}X!eybn z+wE$GD38kP3k>!jBT(vDqi zCuu6n)3)+>0;<{>uL^jTeMmu?eV<)l&6k#QA_#?p@%rEXZR2P_!44Jl&n@&2$VpCd zuwy-Y`$jO-&EOiZHRxDSjk&nO;V_G@F$I*7jUL#_WIcj!Q<(47hZQ@;w3F*}I%y{# z@V{&|?3%7hg+caVg_r-p9qKwdO3jX`sj)k7&!n&zfyC!yrVCRZ*RDWOu0cD1H=>PL z?-(o}Cx6O8WC2b)x+KS=WG)U)WQqpj*%k0?vSNTWcAii2)iq0rS^n-~wW&|>L%Y;h ze!%9ePU!UAOI+^!>Av82mb`xbOksd~1khO@4$Ny@8?fqClola|v>2u^5r7|z*}LmI zqbz_r3eeVAA25JK0iZEfOHbkHBCqGiM5A| z!T#H$!!vR66O46GoS(n*sc<^`7@|0<3U~B9I&)H3F10VF!{r>rKBH-nTVUIblXnbQ zkF;5+nqn@8KHc`-fq_MM|KKJCs~0rfT5|0qHXqjhkF6e*cl|xrWRiJ^r7hd_;hav7 zz~q*BhI*ziG|3)dhdLBjBSRltYybx>d72eu8JECTyIw*at#d%hBNufLG0Zgv@&dVV zHqsG?$eRN|Af!38Gr=wnmCGy-Gz{Y;BQVK#Yb%gNhqTO2 zqs>r*FcHvMUFjq;V!g;T>WXgx#g3q;28=-4ongdZv(G9Dhnh@C31+t-t5*BB0z=VG zEVjvqnllQ5Wz-j@fUdP;=`n1R5tO_l+OfyV*}mkQ+_aHZqhOksQjK)Iy1)q#hi6#U z`LOXwLqF0~Voj7(t^+e0pyH}wm>f=}hVD)YQ&T&NQ>J_$5=r;1RFYijW z=ck3>i*!NJ=?D%94J0mM;5h-n56zJlg~`?242I_ti8^e&9KX0eU*;q4CcHsh3tx{> z*m2*nfUDjia&#rGg=E@>-73;Zw7E;#iSWIA^Zg!ysyY>gTt`D5>zOwM?u?a4#VLD= zKV2(Pbmq21d1S^oA0pV>?A^wq-`C?+(md$_gXWzfD__-Bn+tEg5LKRF)&gRI=LY}W zc4GpP(^S~nC~}J!Zck}DsMUZ;iocFf>)@br2mpp@X?H=v9S-<8Y$|m7!?$JQbcpAl#T;ccIw?ClAIc_b zOMaZZT%ksWVZj!@q5Cbmbz-?VIR`CgpLYFdm#tVxh>$>ZPpIOVy z!T-!&B5@R))nyv}IJ?TwQxL$Ci(*#C)7ao!GT6Pt9#q0kEyFmD8+Rr#jU^&Vtl_Qh z%bm6)zBAKUll*3Ar*R5~x+Y{@;}y58oKtn|BuY`ujv3q2b3@ZI&nxn|(OAEJXAlY(nux8wZ8b+>-%Z|=9`BK5{!$N)%w>`H|J_QKMffDsz zV-PAqVgqfVT=7+So@1296=MXnJ=ICY2Xu`21k@LrovSkaS#!hX!mgDi zLaT7>P^t$T>gisEznNA@I5nPB)zGR0r^=U+Dj~<47_EI73sUg*YA!08UXm6tewjMD zF*3iA=+B2l<+!^IjfN>MMx^&~^{O3hr8H$qVpe0oY#qfMe-BnL?adU#pT4;5X%u6){~QA2ZYGa?rg3j`j;suJpe>ldL-s@!44i7NVBQs8@Cj-sNw zQ-a|3HrBv{cUE!gkJ{P$&ut8cEqkl3S?deppOdh2zV?%kL-VS487b z4rCV7r~f@uWHRRL^B*sQtv0Ico3CyWZ2{QV-`cU^kuLX3Bi4=)dEffpj*@!c{t+xD zx;N)nJ4X0@>w6HIsQsK@!Gf{~`tHwQZ$&=ryFY^=gY&U%?OK|!0h_J%MM{%AhXYfk zNjA)F+oqwdNX$LAEYfSbO@yOGgDSykKq@yV3y8$jHO->AvkK>9p1OyFt(EwW&$-}K zN6AiUDKMRgg-Wfs1&gYCv&RXh@gtuwbIGI4bMNGLy)LWAj}A#CQ|UGqoX7XF z#MOlM6Bp&v%TXwt9Wr`u3ai|b|i-miD zgWqzoqH^**Qc~;^sib@-;}JBj^DW@c=cDTgR^&bhK;C7mbhTPGmw5fm110a;wCO|w zsWjNf$p^p>qcy{K2)5^u%!E_U3nXW){}MQlA(Y>Xp?lAz_&`CMli1`D=#_*+L+)ai z3>3r|7g#JpP80*3CxIezDDN_?)pcG*CWYB4na7s_I4>grFoDQmTKHAvabQCI$RGBv zPsUP%xn`R#MYWYNabJ7A`e1^0lP|*JUh?lE^?>+x6Q8GBJ0Loe`AukCd)+7F+iBtE z4N#FwOd92f4P3%o!~Nn}2oM$6ffn7pgE@TWv0A)7KU}ny&n<+7)6vf|hy`yqeBjUt zj;sCUsD{ohJekkmC+o#0@tk{~Z%oUkhoIzqf|Z>B7w<~}E}rERbP74Yrk0Ft`oEJX z@bApG{4f@AcfN17bLSJ6=a1)R*ZL*+nLKBDQ=^5GMWl#V{7P#~)7Sm@*5JQDExv*g z(@faPh_V90LIUQed{&i7YvPRg=Np)^KK@ra;aEWJb1T9CA&WsZXm$4z2H;4Zh)7Ed z)+m?ZliRy(3)SLPD2yD7Rnl+5v3r0{#6gC6;`K zQKux~FdoQafwxFzaYAnC2wO5_deYlKhH|@gE-^C=oHl2Nl{D?H!DNBcaz8hFRX$blvx_eM{$tjE~U~js)ytGlCz5I zR>+dIvF6yo#)_Qoop6VjK4V;74oS3ZT1{91YQ*ohFIBtrr-|sdO`lp;17;`1*NvV6 z(boDg*x6^9O0Tw}8w|YDpr_4N>l1CE`B<;SnAU>&r}%<&gY}83Q+f)_6BS`-3*`tW z4Dbg`TGHw5tT*b>?awE=CJo{P2iQaodoo`=1F3BE@ckzK%_%b1v-`x?bh|!w!2W82 zjC%9`tB5md_xVo!3#(R`UHsfPdXix{J|*`^0u@*hjJMt5Kj*_UC!Il;1BobUE>hG3 zsH>a6TZ(~t;lepcC&TrGA{0_m2GUm>al@p45QExHGE7uebM2f@@UECsWR>|eN$^5+8_+7}|1#iT= zc|k+s!wR7D1;|TuEdY`pObd0+o)GdZDux%?G{xmZYz>K-uklC8IKxO2mghQ`9jLON z4ar4X3`TgNF(#UDO=j3His=R0*f=%{KaRS^bA+ieFeV)joR6L9y&@x|#%6hJb*9CJ z=f0xni~y9!H<&#YKV+17m@g^VBdymeW~AAwtXi5z3yAv;&l0n0V3vm(PCiA?S|R-ueAuk+#Clnf1s1jggpdu_tBQeG00^!(`HEo(ZTy?*&3DgK6eTL)9jP6@WnK39@6jbI9m-bfVVaSE;IJW zTUlg^qFZ3+hEjb$^kJBqC{kjHCuDX;GfvuMqzIESET4_%Att~kMaopf=LDriv|YmB zP)1-IT6jTMFdE${ftK7QGn&qK$7MGrL@SS?TfH(JnhtoO4ZS(m)Y#-#`?jBav-eNI z+MyZp&7yKbrIh|Xp7CADJOe9*J+V;dMLA2m+6D)Do0Zivo{O9v(4?BpMFW;JVv)#eVsmk42}yD1nPy$RwYm#~2)mT1$E)HPS>91sMGe(IT}muwecnypFslpZG>arw zOJ2h|1oGfM?wGZ?b5%%xS zc0fKPT>EtZ@uJ$BqqBnd9%M*lDy$_#t%{hnsY#G2)3f~kIOxt zBo8lob;*aLxPffiOaAd+|6zp&Y?b?FFZ?<^dq!>u2KH*1zgwnLA9A?zpNQ+oy{7;$ zj#fU5dOVnArkEmH5@U~#cy~mxKETk>I!v-oXMSKQ#~C7vRcaP_*TE{mhyKmx zHaDi^Eh1U{5e2KtezM^-Uio-p2vadO_4SxXgE#p@z4BU~>Gkznyj`N&D5=fe)>!L8eh9>M@%Uoze~BU{UV+G0Qs zBpqpNL^vBMBU=J$sEd-3lca#IwkRe?DGNWG(A++4n3g#=rzx=$JmHx4b-WA8w3)m> z02Oo@9yDS0z2tOwozBi~3>u8K7(T)PoQE!KpS3_jRU(u`C>r32Q0 zq9Jx0#^x}e>KNZ8xaM#?%BLL306obTi8wZv3UPIE-!K679)7T2=q-J4yf+HpoZyOrJ4@KX_Qg+d1}x_QL= zhssD^HA|_s=vvPz@@kjWaf@zLA?H+d^=8gh;>KE{MYC@at9nNqmCEzix^ZLh<6lnF zoR&(KwZL@~7Ta~g#~}&*Vz*yQxKjaOPUePDnAKhxAt)X&hb}bGl^*{#SNUp^VY(X4 zLzMm{BIZ!_ZaNylI-jhkvu5M?uz?Pt*9ZrMrP}r05P6cisdgPYnm}`ZUBfYNeBEy> z(Y3jD^+Tk(JJ;rYJ#=2cPms*wQlA08oL^Pn#>tu-Rjfr*tlpwG<*h}#MR{DW8;Vyi zRs7Tu)M=P(KZA+Ox^av1Shu&@NDwJ=V@vdUnvPznK>AHr43V@86e=>=!Nzu^b)O9E z=FlS2`dMWT>Q|Sequy`G29w`4ZJ1u`UZ2Cq$LjrwvDj$#C8J~8zS($%PDgQG6cnG! zO(+q1Q$y9kWa|xs-likYzPuS*;~d?WLwmAaZ}UnGY|T!?n|5h`hqfzk+^oHgx?DHD zXIuNV)8Pt!(2xwlaIp2OnKzND6F4rbw1<9W`&}K!_J`Z_!)Y>wxJ{PK7)jJqq8%Uh zJP>&)qOpZYS|a3D57ZlV8y%p&h`LXlCX_GeIgs4@(fn#w zDk>ImA^uDldMk7k)bU}#dJOR|vm{}{krcPiIB6on+ilvCW6CMqzR(_wnm+A4*^8J= zga;H4g*PKbowTK)#_e`DX~JXH0@K}`B(aU@;3R4&VVZoKJlWe0iEg(`W^Jas7~FI3 zHkqmSHk+lq-uco)B0vna z%!cq&T&!35q`RE1mRaW52tACDR4sz;he{+9PC5+B**!i;KCiwFR~@x4FQKyNSNDqJyyoRK@Zj#9TD)c z9;|YG870d~nubZN3=+zXD>wXwClvZ%dHJYK-}CK9E+_?NH-qC7_%=8^ ze%&Bfdw@&!APrrSpX26UO)|nk{3B$Ajmse1TB})CcT+cA>o-XVFQV;8`M5jI11qsb=ZCDit)2O4G z&X?J@7orQc)>3qb>gdC2i3YZMY;-%#@g${uhkUVraPXs>*D?%I!|9s|ATm`iKo+4+*e$2jF!A%Pn&ld9qw=g?ld_+}B902a zl?bT%E(-c-JtSy*-m2eR!);Wa)oaq;-?wVnspul$zS=oUPghKJ-d8)K_uvJzoyXi$yDs0&X}iL)ipPiXl+vU>N{h;dgT* z&~9a@Z(QiNQ#$$9rX8gv z-IjFxJ?37N6WOpnE5dO_0z~a(cYiyw2}%{8fNFejD#B6(J9!O=Ju~|6K>ZD!bW7c> zn@37Pe;s_R}7Wgv?^t5a$pydjlF8w1}ut3K#GI@r{ zwWvd)3)Xl&gdGcB!`(m|w|!&Dq8f-9qD4I%Rv;Q3p{^BW19h>s%%&-b&FLxuuB~tD z*sMe3r^I%UCr2eb@s_jSs?X!(PcX0g7EDykWwC^Tgpg6p0`Q<76%Zh_71~lm=4<9iqLhx+wTxgYP%>ctzyKOKrml(Kz@LpZBF^3Iq(`HG_QDeBvOnM8d z-kOMf?!Q%AJ{g5hUm&z>X24Y!_Nq*sTYLNSv4kP|`>})|242gKw}w^gP)p)HZmCNA zR~B#&aRfNYu0rPRTHp%H<-R-(jn0t149N@%Oi*Zk)_*vodXh zcxTeG-G0k}E4_Z%cE=ZZ56FRv4BoVhOK{4zmeT+>x_uyV-BDHympPe4 zp53g{;=TRk);C|l&(pMKBMc4=F($B{Mz_2CfV`Q4kl*Ih>}ZlMifj~bL1?8ff87EU z0a^7eV6g)2v&#v09=(t8S+l|Rk=D_xA(^)N=xakW*a~oGYRfzSgRUW1ATP*|q(ayg$B^NCam}tFP_0evQRW>CJsh+!vp?~5>9z2s1wIT) zQ|}l3{XTz+C<6l};9Q#K&|3H0u8q_?aorJqlwla6$#*O9Fd=2pWUsNQjwUDixF29=l3YCy$9j=Ihssk0& z*GikXpXRbl%QG(&3EwDeZ#8@G)zkC%+Zn1XUrt7PGaII}kpTOM|cPese$Cg+D*txPwXTOq~v0D)Rqa?PD%&@cQ{zh>!rJ^s==neTVzVa>@Yv~e8rEGXP9o=4t&QPXLA@H-Tju$!m7jX zF#m8FAuuC#!e_^5JWkPlzQ70h0r~nLeO{sH98T&i+Cc-)(+S>KT@S*Fm zj)v(H*ycg++poHN6izx0s9CALX_cyO(%nS$HWfOXsJ>=wDQqYjibe6QTSYhBZr4@g zScSYpGR22EXQr-GrEQy_i=AToUXCsS8;P2yvLPuCVA+6!W$Wkex?|oh<^EPbX;%~! zdTV;nzV$|?`{v+JM=$mVKODV&bNuoo-W&XsmSt4KWMQDBwaRk%(G*samhs%Z&uwdN z0=!)&Ro(AXG@z&$H70*th=h9$b2Ww%NuVzZh%|o0ChBcAYvCUbxbWv-Ej?R0X`Hs&so_G0B zJ~jJ^%=Im$^Wrmc=w?!c(lX<{P; zi%qy=zPYN}S=NirYTiLILglPC5k44naj}+0a03ZAFo?uVhOL&5Rk3VjTb+~Xh@bajPe&2gaaqKqr6&8m%PHYD4&SA&AaPzsd zQNe5o?f4MJU9?JQ1Lw6A*U?bP#yxDlB8E8x_bz5RI#)ZT`aI9Zczh;uNhhuzfP|n@ z_H+DvjO^xCbCuc6xf?P@Xli6}ncBl^Sqdn>Q>WLQ@-m^HC3D7d zC|*5-!HhMgqi$r~twZl4&)*`VZHVtVz&@(oVdVDU;RCn^nyHB3>O-x=`?h9w?kN8x z{Eh;KI0=wUk00D7YEyvcV6C7ILgT-g4?2Tc*OO#7Vkb|k?q0wlg>$1 zGe6qh?KCUrzg*l+7%o~v^)OVy4ByY1r8KNm5tXUm%T+1usQA~G!it2mzlVWX>Dv{M zP5IlyfLJjpY10)bXQG06(*vXOml<7SvC}XqRXDZb4u`Up?1sK*iN>H(oKEs~X><1V zOjCazdcyZl!a57$C2o*m5TH6xX4yLbO-gD*(2~=fEt)LFhu;Uzo*)h%Ms~w ziud_KgP-KnJPiFC22TYD`b?ldfj{Yo{*yi3%!~9xK3z}kMp$Oh&8+5g%9ez7<&oI( z(yVB?IvuP+?V9Yo&j>(rew{pj{(@j8&375#l<)rSD*I&wn}3d^ps3Bz1PH2_5{W&M zBvle!V2Q@}4XJn8ri?VV7wFOo!_;|~%`!HrDAqvW1O<(P-)GlXI9sfleJ#Py(oG7LO!xHl#`Y$6&MEPp(8q5xp-d zP7zME%9{ji48ga=1LqQyw7-myYwG z$RLQ~^0Z+Z({6$0Q2;rMw1A3N8EpOXvy6mIXK(2Rps`sU^uOM#1LS!+d$-2R*$oN9%QauEmkX8~O`BiU zYV*$`$2nEM{H9LTwvyFaICR0}qe<`*{x(vB*ygDG5+uWnn7uuWslT;+lvIj^CGC~(6~ zWhaUt&QRUb=u`wgQ{7((C8!!L7`MHOmYh-8)QCb$HuMc>1xTYhd%60+gxWjiw;mI% zv$;jPAV%AG;GsQEZ!Ax4JLZSm;4L139oEoI%AwuVr-7O@b+D$hk{p9+VF*;qZM$mh zejW<5H@3#(S72-x>hDi)aQ8Ur2RSlm-)A)o4V2rOHj#UW;m zF6)66CK0tpdI=*LSZgIw)Y7PS_Tb@8%pG$JcI(vx4)awJEs;Y=Iyw z<&zw_?xdZZrxTP#MlCW%Q$DW8jG0AVRRZfoio&Mu);F3_%zrP1h7&jx3K2 zrT%b^gWI0t)1JylmeW&vNqa{4ldw8nJu+*KK>YpfQzVmm#yAMPw)k zX;C^bIMq<%B9O zfg9)`s}5JCQh2!fdXERX^m3ziFL(}!7jW;W)pp43c1Ex)gFtNrBD-*byN}qNrF+R>OoMV z4Vn4jP*l6j@h(JxVWzOntkU;rxH8VJ@ZNd70$G2QEkNi-_^{1g&)0ZdfPHqx3A4om z9teqfg)7_WK%vUB6J(>w1S1jC4m+FAIyC2qxzTG&*?}(0cZ?2?VJ0eC^J(#}R~{b} zc+PI%9f)c4-NsDI`R-&iWzgDXEc&GcG6uk6zbk*0jmfHPlvG>{d z9vj?W6Col($KPWU+hav-wFJ-XE*1i6Z)s^v0$4=13wJTq+a^BJ7O{DI*t9j&-xef} zYiv)r<9=}0y&&Qlkraz`XqptHgIhrjX}&XD7pwU+DAW)59rG1~b)0#}f zo&mD5PO);*%)0NO;M4v6A-)214P1u85kBgNLU95l8#ec>V@Wch<-Y|HXBw4<7B`-j*2{s2U@VV zJ&p(+MCN7t>T0MvXxowS1nKdAz*7yk`_P9Q>5X^BF-u^R$VRf2O@=pyp2KmbKIK8_ zCpz=kQ`IzM*|yet-+cY;Q1r3~tT6 z46nOkqour=67S&RzWnXzQhiJm!9txqw0N@p5hP)8e(Q6{p}zMr#}NH!Q)RwOg^D|} z6&)+)qBbPD*JiMq@$AS&<_@vu6PqV7Pu{Ux4b~@%4|-aci)T6KILxq#V{Dx?^|MWLtn6rXU3K%w6{Dt5D$V)UHc_E1K_5X% z)=^%D@1M?0O6Tz1+Dgi;X^f?|wS$reAKCE99q_0HlvnIGQ>dT2DU`dOf3t*&)uQ34 zaw-U0Gu_#cX`_LaVf8^{rOk#)u8mSs+T+<)C~Bz`*x+aq|9g4%4RG*Gr zELnS2g{x7jX;RzjNbJc!p8bv|1KrV(=XFpFu`POj5?#9|9c^u@6B)W>T6CuAayai` zOajMv2Zl67N#-M{xZRn0I3>3`NS~D{&rhVGc1QX*^PFm)W0!g$O|-{OezVj0jC9ez zZNB%}d~Ks0f9t)A^-jgcOW=N53mtF&-NryCIB0-AOYoRx_bp_P+Kz!5y}~ zAy0Ta`b{lx%Y7K<|+bwBnQU44bMqgyi{x(q9^9&CN-lh^%j z#sbq5xFQQp;%`hLiWnVdyc>m|@Y3wJd#}&@-YciYe1C0ImX)332~BR=8IB48?}>lLHZ5W_;92? zih?a+RzB4>czURZ_bl-|FC_sGjqeWej4U0=2FMvpjb*H8>`}9iIu;D!VI@?uZynt_ zn6rCWNQ!X`PbQa%CMxOhVm*6b=rBT@$) zujZX`jtf~b9|*b7-+@_@16j<2D~>ML46f&fO-aIx0-}ws>Weo+#782D`xXiOyp z5A1*fm|pWd;~8&-;f;YLuQ1{%rYb^UO48x@jfEF`B`}+KTc!20m?%n>b@EA3Rv+`k z2_0$)VG*3eS$BR@1%_3D6sbvHFuOT2-IA!U!7N zX!c2#-jX*w+x70TkM+)=gdRgAx0c$w3WlHFlGAtBX;_RwEUp4r-0YAbLvoBB+HVsH z6R^XoiJjZC8}$ur5hMxINaRKjeL$87OxAe#?D{tw; zHED0!bnhOndccHtx4N=f-mP{;_>xj>3g9^UK-`TO=cXybPrcwI3hZg)A}yS?TQf)r zeze#?7ylaMj(R>2aYBwJ|5UGKX!L%u15FFE5myBdqzoXs0xuLY--?D2p_#F}Yp*Wo zYWuccKldtP2dWXQ!h1uglC1&?y1QIhNh0ds)hywn~>2hP!5var|KTLQ~7+ zreXU#>kWCV!FkCFB<-gfI;69M!Vkc>JWgkaLh;`~wzm2*GMK5Y<+t+|PA4D%d4sXS z3)oODxtd9Gf+4OqXU@S-b*j$b0K_uWoj0u!2#1?ALCH5HRK(9_8K@j#9X2e3LrD55 z1GYlr`cc?PSmkIGsYDo$fUFEXs6M3-gr%wSgHqQFl;{i!os_4rMI`50)hT&v)twfg z!-s*Unb_ZC=Z&(naCR-#`kekQa_tuKtgF8>GpF@;K|xQDtTE)}s~nAN5xwjUK~fo* zv_Q%jmp9J(pg2c>3yGf92lPx^X*+_sfFxjiw6QSU4K-M=IVCq5dxV&n6lS|jVg{}G zdZiCg;mq|RJd6>c3S@63;PiWLnm~>1IHC)Ra8r%4)Zoz?6r?hWNv~y;QC&|}bjv*T zkyi0TS@DolYMFUk6v8u@$aG2o7E0nl80d8vF*fW7s?>C>`WwTvY_qUO|H99mR8bDWq4K)4cUih?CskBgV~v-o z+bFEdJFwF6>7CfqIHMt2hRId}5fgMH#-ztO3p6?%sq8#FZ}`ZWokKTf40D>3`7oWd z5Y zBj>IOD6C_0>e{+X7{MZ0Wgk`@O$VK?IADU`N2oW9=>Ij&d=X&xk!m@89GCj|49lSW z$7GGS-`Qn0Lk?)IA0jgkCt|JxzAekkdBJo$Upo0>04=K6Dai4_A92YLMn%{#ANb(# zB3-7#RkkersmDYH^W}SZQ;owDcka^<9jWGw&Xx6xGnkdpLuR2e!U8c;Y`CK9Uip39 z`GYglLgL-zkhRCXT zXiKuojME8?RH`))5EzC^!iItL@x`;ZZ@4XW6(?hbTu$GDbqtNZa)#&^zEmZXN%k%sUfUJ+XVlF6adh($Y_@1A*qSk$-S z^7e7EuWo9kZc|em2boB;aaXO@>fT1b*>n$GqbFKi)YB@GS!DLGU1a^wq4e|TFYel? zm=j~1K9Q0T@as&=_H?kO_cC^OZOPuFP&Xajveb4%e`{vOWYKVoOLVgrtDf3->E)Lr zfM^#PSluzji(xt&z$l76A>uyCVH8XW*4JS!ML5!lv;2Jc-U5-W=t(xRfaLFFTUHnO zKS0YPoE{+>7H#n1K%U?w?Wuh-!Q5>`Sfc#`u~tzWtAf4yW<`<`ZZIw0(GAh@jLMnQ z%8h)x?Itfo)k#J#W3>2cCp(t*InD*edE_Ak1kZqrJO|*>si_U$i=11@O=m`=yW1?GL{Ua%{uB3 zk8^G0=w7YX{Yp>Oel0${YS(uC@8ZxFiAA;9AQJ_&9O{SpQtfL2m|kSwIr=I zW$Yx-rsFMO0pOcNB?S5x6)faopdP!1h<+^edH0lYK?y&WuvB`7A6J4377-8C?@B|e z`w*^2Y^U+!<>Aru!Sj~~e?B^l6m)vm=U?HHvK+0FdWf7?F{7Xh7>vH8rQ~A`Y$$kB zw?thHDuJkD$kPfq;tc72AoWQlXK=BUF~>c_qmpzf?K|?u8+nbY-2zkL>S!*S z@$71LQa5w5g?0)YSTJML0U@n)h0^$%%?HAU13o+1H0H=t_buCJ+N0M+rge;d`223o z>$QeX@MO`bP@_bl+C+cxrZ&q4S%=u0l`Ku4SCi&^Ho3N0B(zq=Htll<4vTT&5XCS9 zrUNowSGy_M&G0q{feH&RZ&Tvxpd z?*>jMIBRg9@|Kn0=$@2f7YWuCy9!UP&hpa(T3oG{82oOz9>O=&E>n(LoaXP3lMCRe z6MKT~&!#Pk2x9X#MdQX>)eDDi(Q6c>G3VB#JY;6CI(T{V?D_G*+p6@>?i}>h?|2P& z=PFG`t=)?ewdc0){@h%xC^v1;{!#FheC~28(bM8xnRmT_7d9hZ`8QD8?$+8wq`m?HuWnptJ7Lg*eq~={Abu1 z@m`@-8zXxo=aWicA9s#0n_=$L*(nA<;jrbNEYxSk97i@<1cL~WsdwXC7A$cx6jl8pg7!5) zMiBF?fUEUt)jAeSm#SR>Yh1N7BN(_lis5&{qxwp2a}};6=gGAeC(3XO&`kB|fA@tk z`01OkZjp)zXRLGca4}ktzV+P{3h9N)W9OUExg&)G|7#d2wI)7x zp#ZCo7}FvmZuSdfb?zXAdhW&>&)}z`eBDl{P-WVdh$AUxvtgbUb&X-QE$oJ*8)5Yb zkAiJF^ML&l8vuQu`*7Ve_y)o~;^*#BW~@!>O|}Z?anmsf?N)rL#298;b>H&-DH`8( z5mNZFsp=Y!tEG4Zk^AaU zIJTYa?w3`X0bhcg!YK(!kvEF%-BA{2A1k{=mIv$6?Ws$%o4llaiamN*D&0D;=ubqN zA=*53=`!L5^l^yNMT1H!$LTtrNDXnOc}5x}tPec4V$Z>k^6O!7RMdys$EOYs;o-3T zdiXxHCnA&=*fkC!vT`1UYuGZ9GGSwxCG2Dq*>H{hgVf%k*v@}jXUppyG0{sFFs>bL z#P+a>M5Frd)%W*{L-lU4w$r=7&z~@-3-)G|UFO4#cXFeoNr7EVL?&>;Dy5`VVfC9+ zUnwK|jM`6iM^shvUQJ{dThd;;=a!YnGm zwu?j{S}!}&B0iSUZOfSBGmb~ZLkjDeOAPW6j{sJpSde|IiYV^XRP}xE@?OKtM^dKT zO0(`E&|wJb?!k4>zz<8U{hJayY6xzPp2EAV+-{WdgW^^<0#&fv=5)~01J-q31S&50 zT(~UkO7q$5!$7}rYu#3=7~~)4O(!Y3!~qHI^Ci(6Ek&VV;SjG zz}S)}4zH`C9NbT@gNPq8;$n;BI$z%_dQLfv%<*9`giO4`31%p%D)0lGSxEP=4b-=s z>v@|hbpqfZivcR&U||9J!Chm(&WcJtRLs_ias!o58)Ur6FWu-i$C|~4g4|rvcp%zy zY$vdQXFxxB3ah0-O$IKf8tD`lEF4`!7mE$#@e_Q1K3QjO>$fD$6aI_{W|wJPUF0j@ z-(K+K(ExGz#zy@1>;_@%@Uxw>?wXB5f#kh%UdSB@FSAIOlzr;7WKX1qR&QG&k;{hC zMXOWv<7L?e9WKHO4-2^BboF7IG)Hfwu64N->Y-3k(qJB8Lv4`}fISf5&`2(6R>vs9 za6{BP!_Y%96}PDx`V|iOPWl9**CJrDL8!;;eTRi&3ay z8cP1C2mD*d(L5}{awSWKXR#E8?~E=uTQTN?L~Ni{d9-&WV3R0G)a1?$I& zMc;dTys*UVy9(AtP^xYF5^*J1oa=md_J1NC%3fubMPlDOHeC1;-^pYH{>`7vFD-QEs~z-QEAkI3t;MZuHmNF1tzL@`6LXZuz!S7q2h@(4iJWY;x`Q&CIpT9bLm!@o%Qc^41(ZJ<{OEPLTMq5bPQ3!e&wp zYaC2z{1qub1*9<0We?;^(^JSrC}zV!9O z=9HCnhiU`vZLt7VHEr>1_cJWXhL__BP_JX;GMR``Fl<}UA!qfK&eV5J9`%efbkLGt zx#PA4R|_z;dVsgf*n}(!$Ud_rF#7$8)QGRMQeVi*UkNIrnlMrX@h;Q_UMJ9vbvp4X zvmq8L6bcuRE<#|8bc5BS8o^dg*@cG8Uw?^si}DJl2<5sr4=r>U4c!CD8<7o$ZMSIv zDz<{K>X5bIWU@)TuS0`v-|gIt~_XY=9k&|I-f*>6dArN#V7{SQZrx` zCX3#wemdYkQi!FBfJA~tu>kh1Zn5bQt8A2zU18uC#d2r(fUk+PQIN#8C)rd0vNZ~_ zed;Zm0|R2(WSQ9^wljtkbdFbTf&y^=WuAeAFAcu653{&E;9kJ(dvSv>CG~n)vnd=B%d@(zq@KdVp z5A=-U0=OQlGD(6IUO;kWf%$t9^R(Nkk9^Z{z?#3`;9M@vIA&&iA%0u(5)4_X$Fq>G zmg%g(6mdwx(7)lu=A))@JhQ?qknON0WO{$^co#0r1ihhD{vmIr>EV2UU=|M+kK#bP z~O#3c~4`UEJy!_pWSJWJg%X$YG^G z8bm6eRb2JadHv8DB?z{?{OJ6b98x{te-&^CfHCl>1V7t_UVq>)8WLN#UN{ZIv<_v< z-n3&=aVVoHhV*IVCuHR_>MqgRwP~%%mc!c}0NZr_#E)?+A&60suv6vK>2x`4$7G$I zseynYyg_GE2i4ZC5>6!GB3`0nt%e>*TV6y-Um`Y|Fp4Pe5D~fT&X4Uy0)*bC3FzSP z>ELAl#StGddGn;@Y*FT}Ush`}Re8N?X#-#<0G5`cyQSDQI9+2XU2*wQ(z0Vg-wjzr z*!BvKF;SjWZ7U}+Ez*jNrG|P#M@pSA<<_%FK6`J))tFECZfK%Hz4PdkO_dcu)SGIl zyO`_Pn#AE^AGS1pi4fMc?RVxn;mNKhk- z*gYefyYc4rI}H<`@Wqh|d#jA!q2HF2Zbnw0GxQ#X@lwM~B81)-ed}JUw2?Z(Yu}bz zn+m>nWL6!sYoevYz>K9Ixk_a&EOY(Pm^lcmkEX36FmOTYDuK|)A&#XSM#!D4ASy21 zD7%Fc*|hb9wSv+@Fq)Dc<+8o674JuHtt6MC!k%3J2_GPQ{t~o^awe^m+WJtT>vF3Q>kS6` zJCG%G6IfSjx1)z`X>Knz_MtOYC_q1pX2b_Au`LvKANLiJJa+1zz``u`86mHks7FYO zE!_kb>#bVBE?W&Zb0nGxTg&2bWVP{Wd0q;!EGXyxe&EQjj6>~HeBVC6H}#HJF4<)< zctFN7h1m*OPlP0No~~flWq(rsiSTsEoh8@CX|_s{zy?&<*jrH@6^qU2o8K(*8O-DX zThv+RLvOPWD|y;DRc&>xB*tfO3@F~)Pi`Al{ikVdpe9qSz%&nf-+tBIqdX=SkbIgQ zv2lF41)1@_{1tl^3u%YO$Lc9$6{N}DSfl$KZ5q94S6F(<*y-USSNx9(#KIiAU0 zr09liva`v!BT5VOh#{8%zjC&e#i6T?FyI6*Rk{~Lp~nlsqG^*Y<#dQo_2G;{>1wrX z$|&xrDmyGe@=&OUfy9!b1A3roymaY~*3(7NbU~7#9&FsGWtt5$?`@m9C^^C1SEW|{ zT`Y{(A}w4AA+(D1-d+M>rn$Tg5_;MXh1^;Lw>}PWm0#QA7BU1}HeEt@Nz9Wttd(|2 zql>Fe^SP(Quwdjf$}emarP_rnkzmD2N9yj8;Y< zPIve~WF=(cZ*5(GB@N*wR6>1^Y^|`B5Fwa2D#3j^TKiChB4zujH%iNI%Dvy0OR8;h z)C*9tUsZqzqdQz_ee$cOyJm)`_(~2rNFs7_IMONKYh1-RLtg7H#;ws+U7^2~Shz~!0etvp)IsBJZnst-_4%4NxdF?~L zcV!~%kRjdB)ys}`cLLi)`Sfk6aqAsP10Ci;#0Zk~twy({&wPxP%P2*%!S365>a#>Z z{LzwEtDcyoRLML-vIDI&Uv}N-R{09O~_ADA- z(b;IDD2z{=4U|uvT$db6Vg5e7R<^>xWBIr8NwuYCx&{O0MUCpzb(0ixSYcLMmYvhd zDY}}U!bF~(`EoT1#w1p&S|nFbv|RmM9v8YDG{b-%cZv))Dz<%9EswHu;LXEu=ENtM zFcDi)rKlyeNTZ~L_;Q+lz%TffTR)VdWNEv+r0^vGuz|B;d%| zfypBEl1tFeIJ|(dlJEoh3^o*;X}&ijD|i!3xvTQ30YjfJ*|AD|x9n}zfG(~v;+Afq z$-5N=@5#s4>ON8Kz%T%++oM&TH ze-UlqxP?zYk7YrrB$KYCkbTl&BI`c#Dw5)L24r43DKJ6Y0x^wyk&Ap-HJ(7;_*B%wrne;PDj_LaXg|!VzFM z2WNW>BZ~H_iZYEtR6l{?9IzT&KWeIP4kQuwsF#_){h?mSgd%1#p$9-x-+IJ7X4miDuaiK z(tbqlO8o40TD~X5ld|g52I9k-KMH%WD%lyy^wZ7$+5|4dg1Dj^2t!Y|=&_b7JyMcs z&sw~kc~4I{M z2gG2$CvBHDtd7H%S<2LV4xQk!O^X# zdtejjdOIe*LpSsvh8vFRymd*4Fdbg>!?4m4%#O4kGz{3-?DsR+b=Wy0b0eE>(R4{~ zk&n%e5_J|SDK#FEtX(<(CCN=O4=Rk4BN2CV^m9ok9gKX}Ra-KlML}TMNES!!G$S&eT}%rrn0k=Py_)Wh}G73f4`9@8ZE3>9L|DRa3@8Aiy~y`HD-H z?S=w&Es!9R)JHnY{>9M<4cEgF)djG&SuC(w=E6X$t!MrVD>sjz}$;O{96FQXMyL)H6I$-C6(| zkHS7&T*C3;;z#x=e}4v?i{ci&qCkk=abHfeTAVaaToO!%Q}9?|@EF^6Pq7V&-Asqq zamj~EnGyj^;5$~z6}p)vWs}_i&}A7u73~zPvTAhBbt8PKj4#}PBHY+lUzQ@BZ0gBI zKj2a~^a+0x=vo(qa?o;Gsc=dg%1X1duM1Uvkx-EP77fhmYK~WJ@5)|dYOU($=wV(Y zsZ|BYZaGiYRs54~@|0VUP?D4YyA6Y#oL|FnewU5^2HXGpW3x)Qx<+&lzrS7oetfmh27l5waq=#cM>&vg<8PPFTeSH2UC?0 z6Qp5-XM11&fgjq2b4X-pO-#y8fk)>|+k8$pEIQt#X~n>&Cfin2d~T0*#^Ny45y=snAt{+UpcFt2X)l*NI*LHFXT-+}m6GZ3XUG zn;*d?i#;$-ty(NxZE;H)cf{i3xH&R^GIuQLl~vq`jG-&iXTOrW(d|62HfKJ=&*mUR zcl6WRCauf^x`&m4e;gOFSe`DS#?YPs-2f_a^9M$#q=uiFDa^}-0vOkD!cw36+?U@Y z#LGP{((e%W^p+$(Q(4Mvk)T z+_Owl3Wlz!DppD&1L|=3Pc4W6K-yL+oy}Y$D_2(89+3q!tj>Y@rL7y{K`=@w(^)%D z&bV<<8YMelN!eljw($U@qfxkD>^Cu>p^5YJt(OlbZr=3} z5IWq8eW<<3Y)p85?5;7BX*io{_cBsW`=PgdK;}4Ur7%~g)Q$B+o+Vo>S1GEpWF`IB zx@=acOJfVdxJ2bOko$QoiOuPUDJ_O&0?UI=)*@m&u2W}dA4-0$jGH&Tw^9eW1#E3r zrn7zMLaa-pga^Gun?bN*)1*FLp;EcaO+W}1dNe^gLZ(H~C6vEFa zFOci^HAPoC5sl6VaX%uNA=<#S=da^YWVnwXv)nii1;m#sIxIPha(xL|>1r0~^y)>O z4xO+bCof@lw4`G4+$x{?q42CBXZKhsnXq=ik3LNqm)1^7Q%3r-6e{eMcj4tcR{?QyD&-ezKmhD(e$= zgz!auU>?jmtJEtPvmi9Z0+y6M7qI?=o?h|6S}zNV`0tL|o1^DP2XA3;zCU@}{9P+~ z_WI=uzxh~L74ltDiPjl@cbtI)GM~ZnIo&I#XRkM^k0bpb&9h>*vyzp7hcVP+E>_ny z(Az7IxAx(ZGW}4JDJiHl-YNt}4K+#R_Ug_1oUgTS3@g0t0Z_+DyE0!?$#qTmezQl0bkhm04TZu9XQq<_``d*`| zDjM}q`(J+Z`vB4u7>KM}(9*lP*G{Znx2>!E$TbKTFqF;VOY&Zxd|_1%^Lu_TnIVR- zD{zP%J--6BsAtF0fT(^fC^)#52w9ZX9kAvXqdsIvac^v1HZkO!A@0+g? z1{mq!D?mZr+}ctJhGdviI9jsmfFMr;6(k^ z5FS#UriWI9*ed!&SN-r5NTKtsHd(0HhpjGM)1`AjLES9P_vv5H>cNCzQbuh~Q<0hC zo*g(-V)|A&IrtGR`)oa3T%)>Uwuly$DVogBDF(VrywJUFTA<>sIa_qclX<%O>dRJl zHK*n)#1DVsRo_i(Q4;;0eN`L{nExSSkq|8u<$IQ7&{%_2QigC3v!=+y3yNHOhO?^Ur@ zy_?ad%{D`Ync}v;$RcgKvR|k~L|ObrKXF-6J<-d^9c{0EP*8Z2US^wzr8Fc(dKnVl zD)rK&RV-D}9bju(mX1s(Vk*Cw_!w24ms_t9B#w$XmmHqNXkTGC6

MxT8>?hmdH^ zZHCI%$S=TwqrCzwcYugDfE{dMljC~8 z&Rs_YvT~KsBiqhhdz7OMfpI1;C)y;xa{Wu(*2Sb>Y+)_8W0)V;Ua5;(G7g_1ZqaSF zmzHFemQmTq&9;dkKUrWZa$Z6Hvdk_(_RBBLH6cxE(>gPLAwf}ER&XjZ**@o3gIohC zA9ZRB1`#imIXByevqyS5=JxhQ3`yGzIEV= zfGL?-TVQlQ7k-NwAy4`emTje*ucIaoUJuy{4vY7UewOeG4>0sJ3U zvX}$^OapeA1RyLwvCC2FBpFfLzj9D_-31+sQo6G?Pb^8o)T9?3hxNNzND=h3u-X({XCB_}oX7Xf}gFVk@u^}8F78!($p$IC{@p_L`kz5`r|69#bI+#Xh0*`S@1x88XgJ=&8Ja@&_hKkhXyt#Ek!08s- z>PejlSauIiEv(N>05-FUs-3PXW?qRK*AJR*k^bf&bsl)t12eYR13YVQs=*^}^_Ls< z*Ql&~pJf*s8;oGC%+o3uE;LGff(j;Nok^=Io{^eS%j%1auD zui4=5jh1UT;}j7V*pL9;-rBJ$5p#?;#jjiHa{W%NNrugyOC+CERm3*=_UU`G3I4Gb zXM0X65X$Hjj?r>tF=i__a2dKF#OJ#s|6tcEBUfXlU80xBlBG(gihA7ZNKyyk^%hiP zxQ^lqj>>!PVx>se0Ntsh^hoNZqU2I-TbUJTb*LgM?s+=kvUY}JAxgtaueCYptkTty z;kdyRX8LA_bcQ=PQ-nx+hq=Jc?b(fb3AAvwTen?CS8P;VrJk$G+w2-(v~8V^ z=1#z>f@HEo3mM}So!I_9JFQ!ng4OO{O}&4aG5t#EH>`S~F%8phd`t3nMmNx@zagQx zd+Ssv2T=`dLFHa!_9Tr3|H1Wov3WtOnnOwVn z#Pim-o8l7RbrnaF$r>rvTeg5H#zrb9xh*%T7q6?8uMi!qcREC)8bCAQG(TWJPf*idxbZ zs-UZ=b9Wjp#b>o)05x!mRv7>bRMRTw$pWrrii(VFWeR7u;Ky&({r7BF?xzQgn7G_k z%@_AjFV~L*nl+l&Id29FJ9ao}WA7SoR;{dlU?nuwEVhL(+CrQ*p^F|v;B-Z`PuC|N z)!1Ys2M-zldunl=BmYgQ27YX-9%TyaJ*He&k3ig+-|Hcayn9x+CY>Fb>%jZpRG#C)Lx}_+(TUSw$r>8TohJ6 z_k~)$jOs;!y?a;yq*(yf+UC%}!CrYD?h#}*6zNrMo$f@+^7oapR%F2R&e(y423|U_G5Qd|RF8`7~b{qc@BmtqdqkG;!dE z3pk$QA};5IaIA8!=F9gP8f|IZf!N~7D5N-@6s}Z;2y@_B!enF<0jdqzOfm?GrdfR& ziO13$uO}D|(H(vrv4b;Tx7H`j0`9Ascvz~`ixL} zxX`$%spO_5G4D`f=GInqa=?7~*oSAr$en9r2aysn4I!-X- zgqeuqeZG*puuao}MLVL+SMbpT)K##EXD3wsC7fqbTGQrDIA56ch@;NNmGtK~_>{m^zi+qty{{QZ-ZMTghiN5D6X6Xl! z0ZCsJ0}BBR;8->wZ0s14XRlghf(IQf3T8|Gr(9?ykPi&~nzh^J00}Q+==Q zuC6*~uFLg!SiO9ECbo`v2*FZSUHg_4wZRyT_>oas`-Z+a9C}V=l&mWaVBq%1q1n#B zXnLB3Hf;{M38~9?n>v+74dU5a@f56pXlP(6J2)I{7V49tI9WR$6|GOf5U0}wt&hTen2ysw+#|w8WG%FJF@bJjuIl-tjgOE7I z*>V`3sSRQqM$tjLl6^=mUstzF0y`la%C$9HW_{D1yKEN;Upm?rY}Iz!+7{HCx9SC6 z-fGJNSKK?8>t7mDbLiw>n|r~Hu_ zh%LxM;Nhie#Q(ZKV#&s^zK?G}og?==tDg)p4-- zRT9|@gtCN3U9W^4HB<{|`Ij*{eG)KaE|4s_HWv3v7o%*`#+}gOa=I)6N7} zJ&0ge+PsYTUpE=umhxd?g#U|ZPhW9j?J!aiBjbT(t?!L`ZckX7A>)zP!d@xtp=2cf zj%sS{xMJK-^epvm{-wW4-c=>k0m9QHTmj(8t^$~VseDtE1Gh4G&3|X1q1G~~S87g+Z?ih?%lgOj{xm5ZKed>P z+PycZ=OO63*u=eSeqDZ8u67*5CcJA}uWNprwOifV?G-XLH5K5M=1{CO&Z%GHbirZi zrk)vQ0lYyHCWbvr=`2o%-r^(+5-v>e1P01>5j5!e>ip8rMb)G-X%XR5Dkq)g$Am)3 zGT!04vjDELb&!vK8gd4EJ-Yf3^YZt{H=B`4WBaO;aD_o1n7bgO@=TA7E;|R#X!suG zU2&=|=HKfyxtc&u#OX@yJ7OUGL2{MG1kj)>oa!aDi?ADqB9Ojl2S-@EE%xr3G&+3P zEzP^8ZTB>QsLyu~b!xcH4+$k zrEeNIGBM=i)U*T_*bH1$KLFht$5~O+z}6uxq)pIGBAYNK1rI##F_M5hKCnPbdORyHm$pEBZ$cQ zfl2U(7>jH$6Cl@3x}PeEwnjBd;b259s*%yaluhU}Lqq{Pax%ze;x|T4b+)_AfeHNWk*Mk>XxjFn6?& zWib2{?$t+wz|_5)!ME%*t8aS?PuP= z5+>TkWloM0r&*T9`aL7icnFn+g*^u@1t%PJeGq8319DNMhM|B=yg(!GRJuSr$Rnb? z5W$|Q9Js|e1D-2yCTqB>f5^ki%%MVlG=kH4+4gLaUBR95)%*|gQ7RFRKj05O*6WRC zoLQKUx9iFD10n^(CU5}Om6=JZ=FEqB-TUt3_3?{;j$i(A`s&%+@w*qNXD7eBRzsun zWcaAVKl0o0%afm9R0dsV%#w!NKerv`J{M>}{Hrj03GQ>_q6nUq)>jK6_l;ljVzRaM z0u!iP@-!j3%<&Ws-2w#d%0@ONNYUSME~;cmhIB>I4Ri$WQ}D@330d#v9{(?w4{ziB zQ-9cyT3^Ypnt)F*?Yi?Bge%dJ68D0t+@sFz_G<9=PHzbHxXA!9M5UXPq5MC0@ut|t zeO)@b)D3*{ep}C_1G?J5Vh3aW9Noanx9y(>i(%4hCOoRoKK7{uv2?uJF*$ z5CMi4fJ@CmB9h|A>3{3Pr_tP_OlJvuerEb=!modWbK~KV*}g1q0fh9fZnvYS!$-j; zX*pc4cH^rFwSg6Ky4tztI>{9k>nzO`)rBrH{Yw4g<=TvWM^l6#02^_GVA;kC@V;I% ziZ^({j1I5_2i3FJ$C!L{;1%(}4MGX-r~-1G*pye@KEXBGMTEY^uie^ZqQIe$c>dYDq0|C!3z?g5MQajn$LVVvu|8=LycHDf^BPTc_H%3+I$yR z*WY7PWpy=Vr-T|oL$I=RVM`TS*^0BSW5lR&Z|fAMvy}u3g8Gl=8FOEQm3#t(r`wnv z9O8r39j9iP(n5nbhgMR>lAJ->W`tnb5<>(E!Xsm{;cZeM|7PxlzJ!w5^zAj_Wb2Nb z7LFDD>YdW>znq?pVG0Ugi#GRsTt3tYe)5F)7!sMSw#DQUmxLZHA7`qP%IssQ_3~pQ z73@u3;yyA47pAija%9duI`l+}k^vtZCqM*c*T#SZ14zV;q1-d>wXLB4?!hI90chIc z2Idh1jc=xGEDPP2G~@Hn`!0gix*V32Xx(T}XOFj36{lxYUWZasIvKf&I^|lpm;U@N z9?MGWp^Ht!@5a-`Ovbf7TP{~yaNj(4%piR~($$SvyrdnL?NUmI z$hy84)i@Zagaeh(1^@ye`VJQM-Rf2?`*bSH(`k-(JheZolHV&2pRjrJr^kx)#J;0 z=Pa(1_4K_uTTRuVtYvVbjzzq^Q37YRMibeP(u!^C%rcGJJCxMjy|35WH}oM-s%lr; zZ6q)}0FArZMD{c^l2=eYHel!2Wn3FxxM1rEMb)blZCfXjb3n&kT3aaGDwuu1;~^rM z18mBb7)1a=qV}h7Yk+%*z}!h+Lri6RGYNlunD|)iy*cF%L$!S&40n#-6|lTj^>kQ0 zC+soL=G8TknLJzhvg21|VdZy3^H6`B*DdGoP{&D$UTxJ>c?kd$ z5YLUxW+imJK}5Fo`3!&U^&wnV_huHPO%-!pV?0tKwTOr{Ur?keeozi(gu5DV_X-yT;!rxbNKuqVL08*w38O! z4oaZ>aEad^TH}g<*MQiY&}%{VQ4%q|A?`+q^4&CpQY_cYdB~#B67-5sL;(lPUki3+ zewHXz%mpzq@J4tB)8O*v6OBwYUtX>7iZKE;73CGMNLcu71i{1!P?Tw-0gB;ph|x6w zup2c(BtJaW?Lm)Vs1c%vifElodO?X0!*^+l*TTsSBA0{zC9EO$J6}V7gZrJXZRCd% z6Zl;@&V&%SE3X9F2IdF%y)fO%Af9J982LOw;MTGgf*GbXb@_(-74FTC zPybb&{`$J=E?0Wvp)`itCBO!S{5V1;F;}8dtfYkW9*TD)_#3~|SCK9uO?W>^IiUq} zY&R;SKn!!FLbc4}s5K%Qn$suUBHHO-G(#X-^+fW@y+UUxJzLrPeQ8V+kRUmjBmGd-$VyIi{CM zJeVAhX_;_>H8Wh|(O;6BwD}xvJAWf{*Lx$&$esKN0B$_BVQC7W^_kSv=@DNug(J<} zC&IVk&kY2)j7LdJT-I4ZXC9JkRB=hJY6EC((IOqzirJ z<}~1yPmo2T2ef;1HcQsLKxpvBN>NL5kiCqz%X^^Dk`>dJFkho)*7`BffEJMog4YC) zUGnk}RF$pxsj4pVhA!EYeoK6LHQ^}XFGIBJAM#qdn0%5*-M7d^9l3-Y#_AK<2|TFY z)N3$T3UxjqY^(03-ujkAjh^P)hje3+GNu-C6{EX#$okXu6B|q^AZ%e{I*$@Yh3S{| z&ggykB$j7p^HV4QZt3hcthCwkrNyjLxK&Xj@Ka@07IV9;)5HmTO!8Ao-iR=PcsIPA z1Ct}WD^=Q+*fFXc2PWrYv3(QuF@)Pixmv<$>UNjA)WIcT`=u@-(8M3JIO*uqLRiG3 zTnBG_POR>Ce%G`oQgPd)ITuR2GcV_Fqb z{pNJCj1P(elylh=?Oy#@J#AS$hlD#Oa!@_>xVW{<(xFR7jo`VR%tKwhJjhjnO<;Pd zob?LElH|k%PmSi&7CpCKqT(pdT6YD7hlUsXBemQ7HJV*Pf8eKg@=B}WjDS*^`G5s} zSalyidh$c{eFgvYj)sq~?y6Tmed!c|lI=M`jQ5qAB4TdX20ge?rH0Z{VgK<{o7q85 zF84S$NZ>afOcx+#)JpN zat*E{EJrNgM2M)_-_TQH8uPu#lt8rfX~fpS4c*|tn^o3^-p&0gq7>nQrBTU;#j$n9 ziBUQIG+AE!c>WOotv)Lu*@(sC`bH%|cN5J3@$7nzKu*z^#RtH@wK2%)n+nLVfJg`- z`*)KyQu38}>BVBD zdh6Mn6QAWG&DbC;n@HUti^&uY;?*K3By(G8CI<{oKeRl-BO}Qh6<$)GcCp`^cBdS?w!`@FQjq$ zMpvOG6ia1APCj<6R5(|U#)4io!RlD;IWt(|Exa{jmPs_+#we5k-|CiTX{ZQ&X<$*r^Q=lDHVh+C*~Cono;0;8l7-^&8YtXP)h>@6aWAK2mlurEnK|&&ic1M005&t z0RSQZ003ieWMz14Wo~mXV{BXJ2h$ZeeF-axQRr?R|T58#nUc|N0a- zx>6z&ik7_gR#vr4m24|nUD>iva`tg96?a4qrHM%ncZRgA)p0-j^#gBU9+c$W{Ax3m zip3eA0W=zo?na}#)9D<~FVo2+TU?2^$s)N*3$Z8v5@kA17OQM5QdOoD)4UMle3FW6 zF-gCq<)G8)?CfOoWnQer)mZ)YOPMd!-?RMc3hJt#eZnrS4Hnoh$o1G z^iX71i@ZobF7mx11vnQ35T7E2$yjhJebI@GbCIuvZlP0+7?|N{rg8n}<5wTvot=-~ z9)CC*y?!}5J$idwIq+Cv7B3XR0X8bB$ji%Ww z9gUM!dX*Qqm0dvO2Bo`5SnY8Ms|D~l!#Ow<^JEF#o6qxwNWZM0i$GqbSmi%RaO{w$Q!u0J>f3zr!NXr`V z%v4mZ#_Pe?j!&+q`2vh0`25rzw1CadwaD6@a2 zzzAnafz>}Jvo&+0BLAF<)pe1tudbKtm4K<6L#Ls=>!g_6Bt;6qUZ#uj^*kwHil4ps zy|~IgL*;)x8~pCY7f~+Lbh2IwfK!OyKNmOI>KYhM0aX0d1y&QJa?l0AUk+1WvY94E8!dIt0|n%u&8vhj!pFv0(rrG8t#9+rbXO`EZ@VzOY`5 zvByZzfEXBXvJg3N%y}}ifnVMtpD#1$4Pcu)f*a14Y`%uYdc9tJg8wH)G6rtWRV4o| zN}QfzJxfbj>$pVl!2h{lrC3502GK-1F1IIi6i)*|=79ic4r z^D3KV|D+ia(**gA=mPSS^=u`+-~R@e>?}=yGOl5pD)M~ZW6T^CR~E(cY#fSL%mwqy zUtrRi0Fq^9&D-c(*m_qh+;7cdkW+YQkt)xL4F^UsQn)B`7&Q!mK%CK{usWk>eqzl| zUyNtTd`Sou9}oI|zX1G&A|3koi8z@mdeNkaSOSWeWF>9~lfGJ!+`-W>t3p%juGgxBzuV_6sCVcu^#%Y?@q0NTPheMH>A9|^wxc(1T#6H2?CCG-awX)% z_as1~en2?MD>tcFm&hPeaRZyuYHtkdN47)IlW~!kur_8h;4O=*HHcX}$Fj8%w>AvN zJS#q^~^$7aKzgQ!SqF@9!&8@eBNMDE190gbiX{oS+{w zklA3_@~&&sfu4Eh9K7@ugP?LA_lV6)6@vlvKJTf%`*7U8hpjxvE}kdTR8gDiwqkh! z$D{A6?TKUp1e2+67+@&ijjE0JMfw?paG2B28Hyyxv;UbExf)DOEAF`NyF8m&G8GwF z+|EplVwBL1`HpQI{Nb6?%(|Yb_pyEPhZh!iFJVGfxA49IQG(~z0y2_7A~fPLw6GGc z#liQQTp~~rvjYB&eEI?E{^7-;y*^u%!iS}!0f;RNSnplu8?pJw#`n$O8A@ei5MxTh zh?r&bY(>xqq7%aSU@t4VHzZ5gp1u$j^~H5|b*)6KA}c?MqxUBwMY+3-0(^yg6mn*m z7;3fC4+@0{aEkPH>I=gpB4kSxEz`p`b-q^=W+W@Z5z{bXiVEOM$uJmw!;q%Gjg2Az zuXbOvi?gy4NQ2M>h<@Fm<)<|ZI=XFI(wrHPDv-oLZmFN)@J79~& z)ybO)l$mFXZa7fCZcjEKseO`Ot}R;cybcg(glf=|znO@x$2y1KfKcNxPkLvFwd2nw zeYfUw_|4#8`VDXyLLq$ZSyas9?E5S`XtW;?je!v%&~L@T{=Sc$9bOL-zr?V)d8uHG zF4x&?qV(M?Ek`IJF}l0lIr{sMtRx@FH1d(mD4-KxT#(8^U+||^JYFJDVSoY}K(u0o zoputQ4U|gA(6l#$J!z&DV*xt&iZqBpRMPglCGx4@&h`bV@1#MC6?V3)kdRe9-6jlx zOkmwz)Khnn8}EYVx!dcjU0O0F`5ikCEiUq#g(EauqXt2ZDI3dmPUEm`s6 z^2`usRCZC>&H(srx|npaaz-O!@ro7h9aBonUn!i%H=GXq_r41 z1I-^bhV%Dtj$R*wT7jJ}go}pl#wuG?pqvrQZ?W;(pSBw96`M^{W?Yh6jMqh(7tD0| z@BWTnsHyyc)H@pIv-NyI1~q<5gl$;XA#P&wO9G!Jv<^y#`(Rz>KK&iSu{Py$fj%K! znsGlOJOp}SxRWjqnjEZ^+bo@du)_mlL{E}ca!I-!jRu!%f=#Bdl>id}3cdym*#?s2 zbDjZ^S)P#YjvAl@2yN$Lp3afSBxq6^L*L5mYLQK|F-+p@7Uq7k9-|GN@RTN>RQYv@ zhI(8OWU3+pz*+2;uKXcI~TYycb2!S>wMAK zaVk0tQ@6{jaHt4=!251F%jg8<2IP*TNiRjbNHvtS(a`j zCCiMc_4|Dgs{MV&>_!)Ax558gTTGFIv?FQn@|I^znL23AA-heJ3A4g4fFOD;`TEbm z;z>?&tyYNhF6{qZNtr$I^r?8>qp1c#D;sAZ2iy)UW+)^cfX@IJ(#v7XCqeewQCn*S z?HJyeU=U6#v2^zE+>l8Z^&?Yuub`swWvmmg(|X-+##*^~PbgQs(F z6hICx_zZUs9ClVN+;q3B`}&>#aQ{m!|>haYNkekSh8@_?by?N4O_E9 zlmxqoPIpIbqOePhKaFT6je|+qDKsuo?yesoC^iAqYC~SxAz=Zdl$$ z$R6y|LSTyHxGLi}i(5hzmg;b-=>`t!U{?0u-i{S{xOYhEDPagC$s?2aMa?LYtWjW z9!g`hPMyY|2t6yxW*}8}Nhv%;pU*}XOdzFo&X3<5zxu!{`p2_(Z)NXBb2RW2;xB(X zK079lnc*(AwJVNJUsLhnuGg7e_aML(33-Xih3;xZAar$mc;tXMWuFkcrkqh5Kq#*Q zibx`38I2v^lxm4bWrX>By|O4d)H32cMq=pIyPr=#bieH}mI!i0>UDL-V3Z&CFLr`D zQF7E1WXKW~%(!~eVRJB)i36cBu#PRJ5GQk|0*{5~7K-mT7GAb?6AFKX(Th1?Zl%xnL7*MCMaZPHu%IF zj8k2lzWX3fPDR|HBO44SAvy;w>@;sq-ky9AFP!%d2+vAp7bWKlFc`= z_$KxqvUX=F|4_wNeGYH`tgX?S(MOql@UF3L6~h;Mt${^0fC9xgwn@25@u|}W9m$dp zl>HWv$_I4&eQ{91-K#bc=XFKZ?e-FHACY@VgRSpjVz6xzMB&uXACM^<_dIFXD8$D3 zUD}D{LEFkX8|sV8JfA7oetI#T<;luE1k%2Kwvr$nGu3$;IyVd|R1LEKkr7IsQ_A!D zJ}TLxtL3^c;N>#U7ORr_faj^VeSn(JCi#uzn{7_}T*rELFtv4a{5X{F2Y3W!x%0`H zav7o=Sa6Y`UtfxcS6@bry}pz|Ono_xwfbTTrKVWZnWpkB*N4#^dG`VSB~FUsn&_aH z$YOlUMrHW)dyxDjHR_X4OIv0&($ihJQvt3I^^Ftqoq!1>LoyX+zmH{+<1()+2z$sF z3LvqonloRR~wRTT0PS9S#-oSY?=3xRCh7d~zvgR+q&oHbv{~ zBhq@F`WLRWxnxZq$$a*^rwsbih$-ZOq;ZOI9rM+BWhv!4kSfYK9z&}W0N}Qt(sbz_ zEyFGbn)TNj69={*-mti@@kbSntSbAihi)0&w!3<*qumiL6|>pAH>lLfgz}oW-b$8(o^og&-R)G#hvkZ>}XL!Us!?X8EC}Q#haQHfaK5Uo$7u|GlHRy|f z+@y=&?P+vW4y(M7d?_N}3_P z<3A#0*(-q$Z3Q3NGF&=?(nm(m_ARIP8h$3!D}t*EiuH1mtSn7K^S8f9Q@SK2|00h= zTZ`Fk1pTdOX}gm$>^i;NqwR^`Isl^&%qt&RCaV>mP*9gZv|_14c|r|hUWd_Vg-451 zTKj}ifQBb`18fL}*r(|&vI$-qTCegsT@#tm5d}HX*rV5sUk!Sp9Y$izCO%R~bwzg;A&V|4Fry;B%ocs& zNI4emnw?Sk52`mXu`uWfj}_b6qjSd8Akvzfs;!fZ3$z|!Qj25;i+-Yf%IDdILOOmnDikM;B=Zt#7GTG(kteR#(ea zj%s0*esIR_%oFcKLh@3cCUqp?r~+)N(e!GmBFdm$F4J)~&C&^#SL{zq@v5#A*!52% zm#a6Hf}&m-MMb^Q3X8G;#dI)6g1t4U?%>oIH%MAkL6M>G#-N}%+iM8P{XBFBu3$(m z9@bVLhBk)uI239k{nwv{AtqIQy#|Bo`pYmtx~i<#Sx{Mj83ywc-X3td8WAvaSWyE1 zR33+_l=e+S#{)f{o{J1JTpXyT&XG_hgSGg#)~o|wi^o9`hAYACk_B?tvUlPghq$CyL24GyX&ITzFjzs<>?OP?sP2`1w{mLsvmj zCaX0GMJQzhr!Vyd9~%?F1V)$0a0t34OLQIggvvs`Z2t#w5FTRLO-6eRV?D*I=gU6L zeJX9+4%KS<4(`K*O&G70u(VrJ5=#)Pps!z%We6{@)W` zPN@3Q7Jc#2wH{xXy~C@DmeNFGp0$!k#c49bz$3RLcHMbxZzSGWTG-p8=25gT+K_)U zt>b}!mb^E?WWksjFtNmN^bXqy^%r)|hp0pJ8*0xXfV&)RmYjQ?b4nq99puQ1rbRK$rbnULd<| zgvr5JMkx2DI?M~bJI7az^~dU}fiz^x>mmGyG;7;9G04+hXA_wkT(?xFD6N0@m1U&Q z?ne14VQr&lLH^OU%N&M6W}+=P?5>7opW=X?+Gd-lq1A>#%N|4lTV|XJ(~SqrmrIN& zMtW$vz}=|y)XZjS!x+BZg`+V&jM9iY>liwgb2-AHGhTfTGL-*X3wXAr6x7tKU=izX zDtO^Kr_Op^4hm^(4=sHlkcW+#*nX5yKa8JZ;G;zMVR(XpkBu>3$a)+Oce3~lD{NvN zfoVhPT`)nLfq08CGA>iGtEZp6I3=2Q{XKDGFX-}9wv=A_B$y8A-V1H8`6NfY~pvf2>xm5)#1M~=K?dBjV*@CnOU_wAV6#OP#C1`IzlO9O) z<68^v8(db>fVnENCEZMy$~dvPAe75iXLo%2cUpaXrtaZ7pQdT8-Xm-@}EVziPLx*!v4Jt#=D4K|^?h(PB8r_sO*;j5`FO1t12)OW@)B+%I8!tP z*z4^$6L@8~2fF#XijSokN9<%m65;UKoUf(HBveC@N6Pl{w z6Qk3s8V$U}`EV!Nh^~2U8F#YzM!^5%-QZrwL2O-&0za^n6WwFmMWSx1)gE)F(<$0M zKigO1P|vB0pJfBKqG;{Z&N@jIcFjRNo>8-G9!4g>*@=t;riamJ#T&S56%b{k=j`9) z&ymIC-72RJrlY;RJ#qfw?D**I$>~qx{n@)$$LHtxCw!1a-t*a}IoY(%H1TRokr=lSBs@%sdVcpl=Y}r2ULN1pwOe<{BmmmbM zl#jjAiYj~v)x}g-f!AL-(kH@1nNdco0R-AT8nD5FS(ejsx_0r*Z0l4Vx zOfvb{9gknFtaZ=Er&(6hCDn412&!-j_MJY@)74Wx)ztH6omKG>B8pB#4NhGL{UGIN zzVyj0CiG;^V zIz@XW7M91r0=D67hI{Y`+Zk~?eBS{1cwivPNvuhv!9#ySI;B^>~iht-ZNDpXL~7&%JW!L@D#17PjEd zX)akc8C-G0p;_r{;>c+h4qa4&Ru5VZ$eSfbT|x>y1=1b-Qc`HX^u2`jCR>0+(0Q-@ ze#EIn3RBLmRKGr@w>Nn)scbMNdssm4EZlAMxC`6G!`g8{54-dBx{YT#w4W+(@R;j& zUthR4Apjh|{tC);1&E)HPmj-zKD;{v`5Af!MkzHE4d*mOZD9J zb(9vHJ@kEnrtgSMQJ9qzgx66EP`gu#x#zpFR8)FjWO)JmJKE5fGvMbdTph5D;MKAp zVs0w%)&%&IfhGM%9?k_9mxEI}|B2RW5th(gVGoD+iX_a?g$I{eLBgDNsG$%6ds4Mx zFQ0A3S3NEkIUQnZil$KYcy!s1_M{R*-19Ad5@}GVqHR~;h!`1uhfz#i(X|pN`t}ki zR_D~vBm1}g#NxPAJ&JhHe%s|J_T2z^F|D9irQVdE=*G?zNTJ3g&{4) z@EBhWTvvK~DW%GPS_O>mGjpJ7>vVI3o_SK~M|0ny9U;n!=y6dq)p*O9 z0S8XJDtyHv7q4HEFH+j$jT%G_VDYp3DjN^t9mdD?fvv`o)pdFKz+kR#N0qBU4nBde zQtXK|;lS@(D8P@)TNNrvZj~fYRg-M&N|<`2C8&ViFrQbCBi=C}4uh(Ox=J8rYN{&R zI(}b6LxW^8v19qwZZ}9a97XU&%^=Ej1PWGy;{UV3eliM%@dR>%*ntb@o@ zuG6wjE2sGi6q)mUp6cUkb@oZ_^5^4x3A|Dw_&Lbh#5NaciMFp7@SFS8y!n+QGmar% zzUY2I8Z$V;q0+05t@g&w^Jjm+i$Mk#d`(DU1An0hf2Y3C*FNOziTY*g&R zt{U8({NuZW&C@$;obbCBi;&&onRd<)IW!*&ac72pI5?PY%7KvKbmHOHVhx|Meoc|Q zP@o4P9r|~j76@o9b5-E>nUvM5ZToITvHljccD436s#rSdR4KLUyhCZS<|NQ^3YCZE zbt_4=1-rkzx)hIMbr)@!h}bjt%gGini^LjcIutrg{65|=U;2Y;sb?id|CvbuC;<& z$^x-t^6#v8mD`}B>R26tnZCAzW!5Yj;K98q3 zLstr1MB(n@N$zZx&U$^PF*-CMufEqWMYklyta_%mbGxXT56csRU2HAob7Kb`Fdp*q zYmM6jJ_IdHZQZrt53Rq++b5NU8ioNo1jPW8X|#vpZi5)--xz?V;>4zY)#!ZN>a4Ev z98Nv6q+E^I?MKyeqjJX{Je-r{W>S2su{0gC;?wDtRKTkW8;&2ky^9)ams;XC|RdB*LY!YFd=uSW_?UUu^VW& zueVl3@p;JkAe#CHd@;18M2wFUf<`KuRuo{qPD$V!Jv(C06Y&>AKZrn^OQGpVDy_8^ zquBEm^-A@8J^gZ@` zPLO=Scqs6d%dS37BQdM}2Q7i5Kxj`=Z8J8!U5g(nXkNGCg?iD-BPK4@g+u*e7~Rso z6EE6LORc;YH1~CcKL3gD(LraLjI}2$2kLq7d7_(hEGy~jp8QHtJeuv$4>7Ofv!#ih z2X?-`!~`iyN?j!-Ig`mSDNB&tnzvbAdP6O~4N3yqt?)tW*E#Jp&uXy7=-&(N^}Fx*+U*L%#V=}0wtjPo;TRNwA6uT?VLi10wjq3nsey1vSb3jgBV>*5u zL$mbQK$4RrSkiSrxdWHYtD?c+>;z2mQjF8#UU0?Ne!RIIMy9V@8P6x|3SQ2w3F|6x z`Z=4R8I`iV-((a9#@y#IH(@K=F>WHVr!pO}8*!EndE7S?wWmb`4iye<0_;ZHu11Ls zX`rrHQRt>ReTcoJGLYAUJBb>VNH-o+t3Q;u_myMy+~ex8u3DSVDVyj^pwS3%;@#Qn z<1_K{Z&0#sDKP3--eBVOa^%+5lq}bY=J2~`4X$M%1r&Qa3+UBDLlg``A97{BNkwInHrG!!1=KG_m0dGd(~u`DeD&Q&8F; z%N9DhdR+Q9sde4vdM$lawptx-rFN^t(1KB?Dw2NLk4nql);IP)GSy#enjdG16<3zp z2ySW$WE1M$4z$6(wd0~?LsgYHcj2|-AQNVFp{3<66xG`wNp_^z#p0oq z*2&Qc+W)xAvb-X&-HVMT_5K$AW7Cd;w_arbSf?tu0D$8cGNM8|i1+ank|v8udbz&p zsxdpkun)yIWrtL+Mgdq`St|hDK>_c6PfKfy*8qE)1*JS^z>aSYU^hTDpui3xO>#bV5Zn`iA)^=T zB_6*rD#@`A6aQ#8F>#$NuIzCJ=8e&qp^S8x&=K@HUl-%xm_!$q-K{EjH3o%ch8a$3 zB0+mm_ma}=DlG#4*uIQ(aGC2_WmBW&#HjKl&XQ$`ml}dNb94tug`iWx?#MtU#KxOb3e?*1s5{U4}ert=uBVBscSQB!IV``7fscUJx}^mWscLT+@>AZeBxy%)lOY*9nZGx zCJVKEzGAhQXFwG?BCz7*^!$*C7PVk>c%9VIQAA=*mmI+2>Vo{yFbCQ*mNmZsDm|T`4P{sm>iT| zPp8m3Q7-t`>1@gQxTYDVZ~{5fO%Yd;pVlM=+8?t`T(bP0jKT49Nfuv*H@g*=w6xh6_TJTS6>a{?|*MK@qhojibmZ>w*tJ53;VSl z4U0RikaTNN*RPal*sLxW27gmONJ28Qa3?NJ#l*k42b)l> zv}O|Ix}j^u8AMf9To4W9vbXc&8gB7he%$}*`El=+^cI$g>Pk3`DC`^<0ddc81j%LEZ4_#w zQI*jOF3p&+)nBo`e#?}b{gx@0p5HR%MxWm@<^C!%<=!X5?V}~JjSF2zS67%T9m^K? zP4l{q^j#}f#kRcrirPVvTP49iKn9nVtKX8^DrmnYwf*l&YWrI{*myeFC!jE}IvzLP zaiYI0wD%>S^K2p}`5MEN#O!;PlBwcgwO9Qmj*Tsi?f$-ce9uHVr+J|B=oABYrXP_! zIrXQcQD0mFe1~Ljjik2;dSC)Kbb8@xt8?Phu5-1XtPu}Z&l_)8n~hy|NqKBgAUt;r zR02&kycbtw^l6#N$Nnl~g6KQ#$ZN96m%g1}9wX#{>_uCwjp@D<@~iojJL)1_;6`Ah zh%yYr+k`w1SGhyIc1?NI^5PLJbfiBaHADV3|jO+!K3(qEtc?T`qdg9Ox&#=-&G z5QTxD-vO34PebB86nCoEj(fZm!iF*XlY5|VM(VE8fC-1$Xxs|UE?t*Kl9a?D{onoY z&@6ocB8M!@BX7+YyDJP1IfiwnRXCTed)~a>=pqW%F!nO6771?Qp*qZ|8KeWOOPtw| zG}C1+L-%LD#yMHKo@-zheOb&!c|LhtK+|a$6*Bt-1G7?UdnNVJCjWtMEnW38I8~JU zi(Qo8*{5-CUb~=umpxEA%%Lu&5_h`{%jT`$4XMsY|LiqZ#pk@bu73twU0E__uckbQ zm8NSswCNg6zvX(~JzzzT%#7A%f!MGR(Tkqd2Rfy~Lk$?C$gYe!Pll%W1fx zv~xmlLq{)-s>(610neErTH}7M1R;5Q75U`@`~)WH3P9jQS>#`NUf;@W@Y{*bfANWq zf8$QZSq+ay)SWKoAntKZaJ2HL)APSb`94|ES&BXp;-e3hvWsUhdWa&9yvs5%6%!4? z<9vPz@~O1UEP(MM|CK`!BY8DzaJ?vRsFoT+QlJ?)DaO|sT4&dG&dfiW0G<$Y}y#*Em{9J!+HzDx_^|ar`*`t-xulL*eq7qP3Cl{%<_X-vm_Lc zlVvhisVn7806Gmpi3Fp4;8`&A7A1*%nW{tf$-2-1IrM^8Esfl~%==2m@+{oZDGMyt z_rG0Ue=R(k7&?FvAJcb33A`eabjuqgPsE>6z$0$=hvHucFO+tHG7w9Y3z&;k+Ye1= z^x*?AlJ3DiKK+Wmek%@M^zKWR$C`NVqR=-ggYm!a{LcAdSL~bvBa_Rh5As8BA$}G=1yo^fvOn zAPeZHUNn}<4PPs3-j=TiHjIv|t4?Dq0DTcZb@5L$tZQPkEd`OLUIolYcT>R&UBRfc zbq7V%aCkNnk-m&6$9@`)P4HBkGG?*|bw|w@=gQqi zbj@o^SuRD#o@pV{nDjnH1?={ z4(S+0Oqbwe8MhFQ>eEQ}$@<*2+w5ze*)YjgMhzK}ELmn2RMpU7<1D-mu!NHl83)V; zW6hYghIfjPRkwkT&1az9?+^a)thed4yf^(Y*~DBs&WzlEVF`iHlXXmAJC2ANugHS# z$Y&K&M2(0(iER^VEWQCy?6I9$QS;lZjBQXxmpJu4AVutb6)VDwo z;znw4Lw`V~DEd~9N|4rv)~xh=KcmP66)Isab-VYyS#z>3)5g7!bcEA(-bi>-=S8+r zuPuYLiFl*(M)JmZ@|thRs^pIIgttj@nzrMZs3tHuU$&U$hkW=0e6nvH105}HFD@wE z2vu67Usk$vLWPOfJ&-nEOOd3T33Lpt%>~f8bjG$mSz%-<#Luw)J6*9Fyr#BwnA&cZ zjL;Lzk&LuqY5As+$21UZ7jh1v!S*s+&lk#MZM{NCZ4nKwie`6Syd*4A($#p}#JI&` zs!SGlR&yI2)DF|b;60|RwBD3KD;7qJE8DhSz|_a01=8gySp^q?u3iz|Q}ctNF8a~x zZ#>?XiTI}LSYh8{HS}2RD(YqDf((;O3Q19sO}IAVx3;TDL3|*kP`>#z;+s$P!F<%L z)E3g%8tT95E2t3;)gP6mYho%%Qz;L!F&bk-^0rF1tC~pE(G;`tdSk}nP3R)zVmAkHmAtT*PQywhmeH04CHg?Jt3&=7@;EM^-sT6DIugPdlO6pcHB>1ZdT z9rHboz3y?)b5v$5HjX!IW9p)^&AZEb8__u?rs@drH7Y7eu5PovjS7wJ1-=f%0%^kI zT>((PSVoMw#Ew8AEHYF{7t6tCx~k|m>eLLBS&Abmd)0OLW9feQl%Ldu&7FKmGKHvSNcbetEH#??>}P+ul`p@FmbqI{C* z!&})3A=jlzTA-0q-2j=Sz?l|8!lk6MpD7Q8H(c_xLefxo#_=}`oR?O_*iaAS4FxSa zG^l92U>y^eT2iv6)%pN5$$4e=Q9T1g0>C#c^#0u!cboWT;`J?lbHsi+s$aPKW|Qje z){$38-m%_X)YLQb3fIyaTumLec+Ii}Ihe~~GZL&%HOYWG=!_DruRqk)wxk_{e*Lnp zrX^!FOIHyI$Ta7Ft7vr-k#o&@u<|Q)J=%EWZ9r2ja{pkNZp;1sn5l-3^O}>#>8eo^ zJkFqsP})f9V+OZ%0)=XRc=N4Ds`rHplZ zZ*jd%Odgm^f)P#RB0nkjH))xzIjfH+Yk7HcdUW=;k={ez9=&(dwlt+{$?)EVJYbVB zcV)sB)kPkwI$R@dv`Snfh@ousNR(zJG~vFvnM|SrU1cJZCp^P=z+ij8mxlti@9r16 zkZ^##{X%z(#J7Sq@A(xuV#6D}&BS=4b)>>< zp(W6jMeuli;tT~}A*`;`;l4<|WaaQ6Iwsp!)PWdu9(fW=2DC2cv@V5?`(!2umxCWa zS#=0%MFqCTHHQz?PKqMA<>5k4;7w>9M9I@{Fv995I9WqAS#ej7;qJ|MyAdai>jbJg z_!`hVr)009_a6_@p5fH4C^?wU*5$PqLz&J%4Js(wb6%!&%Z5=BojSdnbg8GDI8G@( zJ=zDdNjgpn@xRaCoyOSDFQvDlZ28+q_y9KrVADVrU%IH zmN&a3gib>qh9CwL;DZ$c*e(rj_mG1fMrNpD&)?Ok*gbS@X%*$26U^Pikh^em9}glz z*`_b1!TLIym1-*n+4l8IDK(Ej!H&%-{(z-jW;5Gg0H4Ru6l{2MA|>jWM}a&J{AaMr z>1VfRXQ**-d=XCWwnz8;C{m2*j1~YItcb5Fo8L^u&htj5ThWi z!)bTHMoVv-hAVn*+%gl_V`ks7sd?LcTa})d-RRu+#5dw#e;+;K_p3WplYBp?IG!*>l&C zA_TF&uVsx!hijl`t=3>3hPZK>E^tOZ_RMX9XiFip)MQvmBeiDhEiBco&C@nI>cLf5 za-(%(>q-)qi; zUK+8Sjwnt59=*t{iB1-fduG094$m$d)v7U*4xe`Jhh%GEyge2YE`3YRB~C z-yioNyx&*F2k0BGVu?~HvLEimcBHwA61>jI3~wf|v24CxH5a#eQ$-OqO&EG;%+3n? zgIGZo-LG9o?b5Bf$}9yb0}6y!Za2YQtY|ES7%IKXKVE#|iec&RreEqt48BG6qg;(R z^wVUNtitOu%^@A+YEd>hE@MTqw6A%nIuz*!t5rr5)zNuXxte&QI%PL9`lrPF08AFHggMr6H`TlI@uywk+4>b# zl|#ui`o6MmAg9_w(q1cIp1@rby*eUy7Z~e|FTaFh%v0e7uiD=yOSAp<2eHpa=6$?` zT@jr-#8SnVn)(5zB9Xq^L3PA_^P`>GOjxx`*AJ$ysr>3~?cGiBUHr=s^rE-I!yxmju=+wgS3@=yJC2v?! zgnJWyT_3K>4>I-@y{_BT&Q%tBW7nXzBgzL~;q$jOibP2<@BprfMrmgqNI}K8(w>tf ziz1xqG{qH0P*lV}-4V*bjA{{&$Bj3Bza-2mO}nb#qfjOd2qND#1+sTBE}_iraQ06P z6A3o)*KPoF^I?zHx0(=M4+_mZH27}N=oqFfAZkXWj&&$K9b1^LxUHTAOO8}Q`f!aw zr~a`9!C%KKTYG`(_Y6rLiZ}A=pn9c69OBFJ5CA4UQ%YEA!KqlII3PT3&!?JBfp%7a zoX{pnVjtNVZVFV zsXBjQt*X^s_v*ebl-YnDy%(bwDY={1)&u_V=NG5y)~re{R;Ij}Qp2^rjUVLRyN_^~ znbd5;#4X|wk0U@js~oidW40dr8MZj7SykcR>b!}?AhZiz7K@qZX3cP^x!la}u6rxG zIG$A1(?V%`#SO(yfhTq)E7F3lH%py+qod-W5rpsX zb{hN88pVn`cf$uuHP=6n%KKqD4X>lrAL%@eO*of*HMg%z`Y8Y*rk7gtcd|2D>IY_{ zW|<3MYxMY%7-XOQ7nsGtW!ec%;Js`e>1sfvgks2L_Evjt^sM0RINqn#+a4`JK%q|v z-jrel!@YD$Q;0EyTd@?wmf5`3%@yq788KzmD!X1srf9NiZ3m70+5(XoHO|>o2#DLJ z*Ir~Me5G3X&>QR8LCK>O#0~x4El0eg?E;em{JGrN zp|e(*IX#z!PmXWaiD>%opRSL`0kY^5}n7dww3GuF|bIq1iFOJ~$e z;WTaH`9otf8rT_VtySnJbYX!vBBP{_TDs0Ji^g zm;v9GEZaGtOL?{i3T_iCK`SLqDQ$H%Ihcgy6i5T7IF9JN!n8;M|xmUV*k>10AK z1%C989;WC{`_Qj^mdJNrY}wC13ekh-F^#&}onCG}{&NKVfKM~W&!3Ncj1}sN5>ChxQVR$Y^_@&IBXz`4 zJ^3iCkrjZDkX9HyauTRC=YJgAGO_K!cV%i+OYmN~t-n8$& zsP4XU@h9F#rv7CObWO!9HgSBtGG^~uHpgi#`xo$L$o?}M@x0W@don@!A~l$_RS|(q z@V1!s6n|(i0RR%WnWoqwmfU1l6SQP}K*pZ@ruE%J5afvqWj+@?CDk4#*}((OD{eTA z0AfLK_`zjb@XDfOsZUG%fvt^y#~ddjiDV$)#-hx_vjshB3Ykfqer8eR_t6hJX5f_r zZexI9xLY)f0M6;x8AKE6fF0G~1sxBv!?@>_Zr24`ni=jB5n9HHLvP^Ft_9h1oa_>D z`mOxr&&AKbITmicEta6Xq~6A1iT9}*wo=u*&sI!iS_@mU+MlflRRe}(O|jR1Z!GG% z3QBQzaGrSg>=~#z5uD61Cq#MBo z`u-A_BhZ$g6QuVHA_zC!sOnn74t@gPKmnv+8Yab-aTS2O(J zd~q45ckmy(4>7cI-LhK1a8WQKciRxKt;ngD=-k0%G4SgBUijY4exqhZ5EEEqp!mq3DYY(ue zrx>cY{DPvTq4e7U+lNL`ZRyYXC>KfLs%$k=hHS)>gmka*sI1=Yvg{4?oRspOmo1@P zGWDSo#ALPvX>h6?a)+crA3#`?Li9TC-7c6isE(E)dkgDn)dp^^8UZMtFLKRZ?~=XJ zpQH%t()3@0sEj_YjeQ2msYs1dw}D#F`G&;8BsM@WqgHBWW(3TQ4W;`W(w}9nf6YIJ zH6~H+^3xdPa{H|=D3nY+h%8qc0>mU72;noPm$YFHNo|>_#9g;to}<*mR}k#T}Ki85L;eifr`wBgI@m6dqj z1z#Akku-aXAb?ksj0onZohw2u&}5xs+l~CBK4k=!^=9nGIA6*BX;NE_)2K$M8@g^( zyEE%SvK9-vZq-fee)ePqL0XF3D$~c7WpkPV(l(7zJe~G6Hx&R)m;J4zN+&>5Q9O3p z2cj}5R+asPKQk9Pi$g^Dbq8p&uopg!Ar~)Y?GsHA8t%+ci^Rmfst@&U^#Php zUz`12tz+hZ=-u>(SjtRKPiI?8JePvQ_bH=?AFsQY^LDd> z(EW0R|C&wqlT)gqLjc%ObP@bf1f7g;eF&Dm^v?pO&dCSO_HAbmg@fJ6SM#gM?MGkC zXPjCGDTOD5>>z7odk0|peLU}tIXaDp#W)6(9DC6nCODYMMbgZMgEwYH-@Lvj%W~8x z>_tl?aIL=$JrS;>7(`cP;K5iC(ncoXHnH3dC|Al=k}Z0qDmRl3lZlyBBtuQ%u2T1F znX*S8HVcYeeZD%j@D5#lPD5tF?ADn7{uV(?YpDE5y>l?si z7=sURPj65^*?3@nrkY@%ElVqHmIO~D$u&Ua>9ii8O?#PLUfTqz9)$TvNBJ)YEX)tJ zLcSUgaMKy)z&FT!6d1vzB&~eG%2A0fq&2K_Yz{YAD6zk`iKJl`L+Y|f#ei* zfs)b7CxWpGsj*kPxOQ_~3%%|8xQ{q_puUy+N9{R6HF6mg#$wAa9NMY;I~nade!Grk z4qHT2GTHg%+6J-1y0ea5KtsXqOt8mbA2Ts9(2ryvTx_-=hgcFC&r9q1t1lw4nLXhbwKp^+{j<4#VI~!fURioRpCY`>D!kln{~IQn941DP@|Q#! z>cxwFkKsUanv%J&P5x7W2}?PL<6o2e1toL8T^f70FwixaTOpF2MGtV7{5oWmfz))* zX+sh$xXqVO(N5!eEd$b$w7_r~l;%*nIMty-VOqQ(J2;Hd*>euYtGR!=4<#i!pkzLj zf8#lCwi=iFc6k`oFASD!6_4A@kd{gN_guRQ@hlbJniUA98>gKPEa{fUYyG*qZ^&F6 z19jj)fgL<1?qz{k=J#j^n(E8Y@9M>dH#`A*{W&rFG)tEsaXWQMb(xpp^eU&FN|#k- ztDY=O){Q~ldt#hDM^uEwLBwQ%otR(xYH>8~n)4<=I}PwKKy_!Gy(IJVQ!5s|-7yCZ zFMv=kP?~|HlEk#-gUO5tC7vAFBvk!*H?7V&w@v&ADO_6t#3Y6Dd3xl3hYWWN-sY^x z4OKe@jtJjM&Y-*Fyc&NX>Dm>(2vgbAiz7|`fu#sS6EsVwZ7G+gO`rp8u|G8;#d0&v zP7`(glvh;cyA7NBT?<&5FkLU|a)u~eiKKfB1MLjs03+4M+(UPn`SDUZc5fVxmgnI0 z_9(hc#%G?`qC@J3UE(~eh$lQkYT@=WQ(qfjq^vQqX-2wEph!wn*cbdZMwTkAK}B7h<-}S{(52*XE8n-Q zkt3{_Q1FUFWG7_g6Oq!?)5&>>Oa@5OYwneL@YkVoL;RQrNSR#&BzA1)04rn`Swp-g*eS(JV(RjGH^ z=cGBVXA>3@9$(P;m-#2m{ybi(=L5A$__&9bfcWcGiiH-QZYQf@Tl$s$RrN`HZoM!F|rV z_~+S(9G}+K64jK2qx92)BqP0q=*;ZUD3jZn$x0RXi>z+-#+vZBu&SA*z=!xVWu9#3 z9|7&CLp5Y?e+W}FD0l;L6zQECTL-&SsfbX&YG?$1U3!Los+&sPvUr@PNvcti_~g%v zqGWV-G(8oI*ZZKs&g@M;MNV^_j2tE>n~=PYu56^&IZGW2S~W5d(g+)SR?KhVqq<1~ zEzBB_2=c`a?PB6Ooq}8JM(3k2#T_;JSGcvrF8%34`O?&wYL)wE3X)?}ekuzz-LFT4 z*+}aiA}5s}KfxuOGf0yRNPfpy-i3RPZ1H=E;7EVRxQxTW9g#%h7u7f2CiTOR59fdzbre9PJe#Icba_~X0v5b_)bZ@mHWzyM`p;p#l|er zG?U}9CCV=5N(>M3Y|UJ64q zVD|4oE8DWRgzxl)6rd~!^?Sf|-@^KQMpv0%!dRoD?(EWn??u_%1i@uOBd6?30#zYk z=Nn??<}|jbS_~$dGD<4w5W|59Q{F)4oXu!1Ao;^IANUXakr9ko7y|e#@?_wYhMAIKqS>L zP3Akx_jTT6j5;N}Djf^yeOWf}C4+%T-%**|8XEHtp4q4$*@Qt>q@x<+Z!bz1`xR8X z^n)sD#TLnKSNBatR!ciWs&3RFhEpxB;ITKpH1>~`6Tr31{tiCv*)LBHAKy@FFjLOo z+Y@Muki<&Nb{~RXz|QE5uWmMlWcU%bW^MRCsJ5YuuZiRJh^FL24#JZd($p3N85o{K z2~6)^$H79PM%*A?y?N_JfA=eevZoaI=!(=#7Jf`3>D+t}$0^vCacIZsz<6DZfsE;h zgDhon{Lu)*dB1D=ml+=rN32g*MniodYBb0@Qrpt-L@&?96B(CqoE81OZ;O!)!@{Z( z!wV?OI&9B{9;a>l&Bf(CdoJdJvuh%&h4e*aY176)&XHpxl2FoP`xWA;lQT-w2ac>G zdT3-udHHsKev{u(&u-2LIHc>V^6b^j5MWW8$OO5)I!U?t>@CWuOA8a&oLZp^qw0mc zFm7F2hsd7e*WvtMA_^ddzP&V4UeKdRula?t_u&1JA;((DXMg<*gXKE|!CB71)YEEw z(I}MEpR7VGr|Q>{()3M?=wify?Um;BFy{A5nUyBWqwtqaqDDRV)6|Ae|(t%xn45Pec@ZwK(fOiC~2@U zp&x?_5y0LxlF;Ct#gMq*awBT4jSSaN^UZUYCcP7RUJV418-)!_yQ6HvuLCkV(wYV;eV=(YMtEBzl zR@-(Ji6TdMyD~ntJYz(r6Y3zOaXuGP5)iZ&@vK$u&6^@QvaWd%<$;uFMAxP>xRuBV zjk%GnOrNa-wzf+L{{kbRYgr>~bm2PZ54vy@b8z`a;Y+NaCbXW_2(;?juAa8v1Aue? zCO;E`(IX*@6=qhf4~5IodQrb*tFEh{Y+hq<*xtl2BsGwSrPN;uqb5Xsr1BWmCdV_t z82L)gs=G`(IG{B)c&jxv*_-)-?5lHJn>i?35?!E-KP?t2XLM!4pL`vw5cDl7u#im&*+#mr%;LzfgibHf{46w}f}^;q7`$;r$JY@X zwb}CEs0d+JUS=f2IXixR zencF>T;+Fh+x3GmGJ)ZUr~ct{3tscjjI?%#)$zAOZ0Pxl;61l%?8JV%Pd_5Ki)udJ}ors$&aob$FxPA~W zAifUeT7Fq_4?&6mw{d#u&GhE-YmAtABPLMycs^?Xu&vonms@%b<}f4AGpu)%Lkuwj zaKgf~psiEcu$l^YQ?Pfs?L8pK90S58cN{81JzB^#wIXd(jroiuczoCU`bko%-neMI zA4V#+Y9-zMR4#pEH}u!{@?$?#KzR6>RD^w-idf!v1vk8~e&aLjD43|WAQgT5^A#0v zdmhWpqvCn|VuN2u7oD3$n6kClsn%JN6*RL@@*FUifYUYSzN+TkmamdycRl9an_i>=sd6&) z$YJ`F8E{QLdp0|x%|8vl&fg$S(SC&|JVfz~o0~GzL&7PpVs(g8w z2Fh#w5)FCn^!n}en)kK*`pB^hx^70+{|)?~uY?fL44unILP&aWAfO#2ARvtY=PSX% z&DGJ(^#>aqr>Wzx$&Tv3T1U8sQK9(EY={Z#DfpXAWMwM4SdJiA(2#V)m?RlnYQ(AO z^EGEEhIAKz8Dw_9#js0+C4C-Njy-59_pkT!ltroM+FJ&D$6XhvQ86>SJX65;Xdy8< z;S{WdRSI3ZMpBF3d(%cgO|+W3gw8T)l9?ujRSJ2-DQq}6J~&h+zFU9KTVPU2MiKYe>=XZsHGHoCb9iWMR@ir=5Rxl_>)b!?$J zs(S_2UZ{NcbtvF5(!}pHy|PiC1MsYGc&r-fW#|v8j#go4Cbqw`&nr9jN*#y>r)DnH zTcjSsv#Io)996)S-;N66frYWA2{DLa)MLDc$vxd; zP0Z(h3?_IqIfCvC|`9X4USHa(~2< z-n1gBL6O}JzUi%OXJT=#Qq2MpXzi$CYeraMpwP7`u}H^!$kV~5blBag$wxDV5AV^HJprBy??8gw|?9e)%=stNS z72pR)(Bi0-+`!RKb{jHVIu%TW6DAs0@l8aVUk4BXpedt_lVPvGLom@Atv@nNl;$tv z?X*X%0T0GnNA@R2#V2V|E2c~~H&~n&pgix`%6TTy4up>htnI=Ue<})+j4Q!kgEMU| zT!fVd)HmPU9I`}AAw>1evx`5(_7eP)J(O!o3<^S~ah&mg<)*C2gi-#H!3=*j)@F=A zgvSv~CkMP0i|5D6%i)1U^*ECE!->?VH}m+TamveLrwF>pgme+Iq#_oyJAqr%+tkV& zo&=n;m@SyvGL?I8<<*vT4Z{(F^wZc*@!6h)dV8Nqi4dvRI-rqm0V8l`Uz-w$V$8xE z^hAbx1e&TQ_#~Oh9Nyikp=9Sl5V1K0$k4djuH}QTAck#Bvx!HVLtCyjD%*JyEI2|(cX7smo<(!~~-xtVH48XKO{KhH%*+$<#EF2VCr@xj$3X|sucamrO=3RCb9Y8F1cvqtY3Ey1w<1LFTJ|7*MjR4cHc2Mv)qVf&r zZT~**{k1D^j}IuB!qc-GaLS7!x~HJhV2~H@csKF$6D}%WqDJjsM&H~u+TXmw<(DUA)qM( z+bpffhAWmqbE+Uh=6zm^Z0swu$F$6|TBxirj5CVk+itF=kIkt8OYhh(l?x>i--cl>+S9oEWl|aUT3}mI0AEa!D$KI`5`RyivC6aj zVPCQHG6&%f@R`fsHh!G(wVU)I=0a!39Kj-?8L2@u35c6S-r1vho}e)4zzo8=l(@3z zS4;j%nTGA3Z+ip#&*f;qvg+bB`h_(h0s(Ey0s&F~e=f(=$koWe#nsu(#MRB&%;mpK z0=ilbBpk^9)e>+c0D#0^z%^+JtAk;Alc%r1}vMEd(}p= z{3LUo$R7~}*8CDjjPVF!Myw^2T$3=e$(+!d8mTYb)G)eDv$_qJ)0(c0)FMMudFfYl z)bS^yZ%ac4K58h}=@l+eJ0If4^I_O`m$Z;?J=I67LQM4@K(W!h{CZ|Cp*InHAAA~4 zspc~olWG>Zk=EICPN5>4JgRujz;;21iuI#-X((vvr+n$o>>c%nKj`?Ukim0;{siMp z&ezCSIdexAS~9MozQ@!3&cephq0d;mek=^m)vA@OV&@EVgDwL4s7&%=HNqKK6lJf5 z_c2v(@k+uYzbetxKzV7R+<#^8DaI~CdFkCLW9)nVet%wmIC!p#3Fz(kv|>cW`JS-t z-;+?9@^t%+!STwdleUcj9zp<%0P=TcXLOzIx1XVZ-$_aTzGhIudQN#Ou}OY(H~mUJ zagy^qA-lhlTA1cC8iSd|CskE@d0KfGmQTbgCquWHR*%koaNKO4-bnuBJ?mN1&Zt;7 zcmogK3`?2=J6Rnohb4{V(!!orp}1cv8AWW=bqr@g_^N~D}-A1n9 zqZIE4p8P3i;#b+7gU>4*TOP+#tmX>$Ci{qABapD&$%272kaKA|ySd)+6lmZKHtXQl zFGc_0a*yMT7qQ%JXP|m&i!}(Odd~UDhXjv4-9cPv;L&dZVcod{82J9|1_%e?g|k8| zpUB7xEy^0Gm}%V4wv+qAVG1Z>E5xP0mL#W9^F%L{U{4?e_wgE=yiCYL&q9;7IfI2k zYkV-p;=!(hx>I#~kg!j|6VKz(e+*zYharaIy3e(hW|CsU0B6g>v)|SW3menySH@;2 zPm2#Kc#6Y#B%TXTS#8P$1j)|YH@`uPl$TCkl8?V0y9}1?djB#QXjpF#6MR6!dShtx zzVflwps7dn7w<_KCc?$o-aefXDzXtFQCHokC62cvi6P8~VjN?Ag{igkNKXRc5KSi;QSH9ULdM>WJ40^WYa_ccU>)ae#^O!KGo8|tJd5YaI?)cDm#3XjwYJA)!4 z+#!NUTnRJ*gXoqJs?i{6K^|)61b}p-+HLSG8T4XvMl_Yu3VMXQ`h3n|A^VbSknCl% zJOuiBi=LrM`Fj9^VoxH>39KJn0$oGnx!g`D*EIaDLH0@Nc zfcDtxBBO?ExRXZ^#-8GUYbI-&Sl!cQJ(}{>yKQD+d{2q%|31`VBuxQy`5BysiEiQ8 zinrN{o@W-9=7M?R_uxXT(c{v7>t;J7t^Hc0h>^=2TTTt~HDDKn*Qa#wAl>8BU?CO0 zd-C=E$A#neuK_(>r|?t3q%QY3Ra8E4CJ8S?lJKa%7{z}0&E5d-^Q(YJtdLJQmiLm? zxj2jDXnb9s#3I!vwA5-%cpk`k%Ou3UdP~~)(}@|*2}woLLO2Kb zce!|aJ+5y>dIe-2xbCsnMcmtk3EV~<{Fu0!GE&Jnq5SNMujnuZFmu9C$#2guCV3fr zdBOYm31DKdRRgPO5?M)-!N8L$=!kME;9x1_JvoDJ)A>?1;{^x1>Eb+A5QY$i>xSR{ z*P#Y~lg{tpeB2TyoNj7KFk!u@O^c}Kw6HOpL#R_>53e4pkB&K6n=E2ZH-r)Gf~Zp- zwRt6QDO}UZv4u7?a1OAxxs-Cf(BniN7IL#&N6*L!HyC}F*bl`MN1L^|6G1rRN<3s8 zb8rQiz5pjS%Gt|^8!;V0j=^c>AH$+>FG)~%S4Fd`@pcQaP*+`P?Sj3ZuvsJ7!MSwr zA<4MjO1i&$V-|-4hVvhXHaQT72bZs|VuNZNhWWZstfFIxm^*fTPI1-6$^D~TSndgy3GZ(Zcnp^_XA|ckX^{Eb56vYlr(*hR%D|HmpzBC$%5`SJ zq^W2?It>iKfGVC@7BbTu#L@!@YL5Cp(X;`M7f7-O6l2Q5`5-ozbn4`YWNL6i?XU+_ zpb*VKj3MAa{)Uw4QRX^1Z_F8Sv#N09UeX=nm7dlZw-u}dYf@1aG8o=K#8-6^pkeEEM8$)3F(aNMroWM$X0;z}cE()l{TG!9?;UC%`@ z|3yxU;u`i_b-OR>)%H!`;Y9ld^%+@2{2G)o_(9x}L77oO&y4FXv0;WZ0=Hiy$%|`q z>v5Y8doU{!X%XUg#-WnjI*F*Za5Q}OxJ{$W_;BLzjo6OSV;Uoq8m^U!nwRs92n|^s4_oGO^3h{vZOl25 z^RmoD2FIumnY9Gcyz@iSJkjK??Cxr19FyEDhQ+Ez5j64BrpequF9>@(;mFf5AP6|L zxB2f1^IQ2Mqc7e0VVL2gPp{b3T7eDUf9O~3AAW>6fv<>XEjLo@Ny?dZS*Gl5O~aCqb_*~RoJDFsvF1+2n~6R^Xf_!%b3LRm_!KJv)?Q1IEozk;+zqB$wSa@D-M+h@&Wron)X* zAs23##L%?8bWlIT3(auAvzR^R6Lp)XpfBsWr^~6~gUFiIr!Lz*{bycNIV@#{FS_yB zP%%1iH$?W>sT#}IKjN|xqO8O7xX-Y#)4t^8acR#(f^g^(vfFkUV;M^Bq!1dUU_Eb2 zwnPeZ33?R7Mm&xwCA}wWey^Ximli{@8z_##bHvUxw= zCh~esBo{E{_?${z9W z+`%0*rcIL|7sIQkF5?z;;xdGxSe{ z04~_md5`O5&Z~N_4(BHAi78VN!p(J@s>W#$AAij6&Z5acCgbZtgy^(=HFoi6wkXlC&PvS!f=0Qv=}+fj^WLG|I7}} zkpHQl^7z3oteH!`F+{w24L@Ja4P&C5s`cv&Syc$KLg~@F=_*Lt5&2@KP^1VdAG9IH z=uDplL=<2{bdgMK;>ZxA-S6PxY>|jWkLn^IvW~F-;D@mB`fZOCSrIO^rL(K*rfj$E45|WHQqYu6?Jt@gv zHjyYJ3J4&>G0QC|< z)4zZEDRFQt@gI-Sb~S33zn4uKVdY6Ca)fl(Br*T^7bt6yCAVh8f86KBJBz73#_miy|*+M+3N#{UshxIRF9vB=2qk zBozSi@@A-*i!1V}+s69Dt)<1OmExhB6@RauE8}Nwr}-B8&}tnA)KQP+?MrGe)qlm; zE}xHkU4I|Pqz*4X%(KQ4Ll*g$uQ-C5x4vu90~Kooz7)lN4+t=q-xJ_gd^a4byG<@; z7bFcNra7*)A1aF^@IbQ(8SUW~{MuZOwRS@`w=3C}yZcA{ycJ+*_wv|5qPb8$M#kgU z*yh{WqW@X-*!2zapWtINm`IoWXiMtfPv9}}lS=*X9SSS^ABp0RHSvE2QhoqA`yoO^ z@mI+$BTPl#%^y2bQ(R4|71q0juBxIG_bdE6^&OmkCUBynms*opC1>cJVA<^=Ar4o!;;HNI<`19^ec#Bth9!iSEqKArBIU;y z=66}BVu!LE$i;$%oJ4`MC7bYDs$V#^4B%9%F}Y0689bi)aRvGQJLLg3d~xWGQc%7H ziy&Q{-)C+0>^bcVBB=7l)P0J=P^W{Kv-@!TE+k!#BhvjUe(+no&+kGa8C&RTAZw+S zsqCgTzA~%V_G9I7@hI7~_U~#JsrYiWL0PTp8}%IzK6xx}g+~bgIk|FdVqJ%g@iDWX z$Y?{%H`cF>OU&(N!4ltpx>dBl`*=$fzxGv7(^ubDq z07*)mh7)V-=2Y+8-t9dFB6K3Mq8Xk8W|PU;`%ILuDSj~Lq5%K1@3YoEr}FK3G> zWz$b&hZAUFrQk6+Q~`$=*} zA9oNr*rTpJ!F?a<343z;&of^^ub&tSmS$l5*pL$!9z2FvCy7vVNlmYwkno;*E;@Ri zb%t6yD0*^0P+z9;82NdyEHEPf<+uig5j=;Y{>Umewr|FXT2B0^dlqE{AbF|QOWsUs zMgJSVDRP^%|6j2wO{TgNXNF3mm^^2G9GVsrCLu&MVN-bO1VF9qGZO=pN-qfLpCRL0 z^fdkh$P>S+6S9Vlvu;^|;-p9&6Q-ywQ8v^8iKWSyO`n{QRWfU#n9%B0=EbS2;2y{e z@AQHVxHU*yVJdo{nt3>6DDCXp+#_kYB;BDIw|I-h(J6M^T}GC_bibTUd!IyZ5*JJp z8p#i@|G(jEDfJ9_(Cc4BHSiY59sr%RAka2K754eB2H7mSpYm~;A zRSS)=e&c;?|Fa#zdFmc8wonDizkq>rDW=zhQib(Jb9`Kdmfp{F$*SWF5t4eC*@ykvh=nM zkwfVv``&|4xI=u>?)v^L;CSgV36km>UV2E9i*oW#nbQC#mFrhb1hGNXiLhtF$={4F z;=&Hb0Vf>B=Qq)PqcQg|@WJiE z-J6LQafgnU>i}D5E>vt_`!C=RguJpgflS~It7&_Fhw-7XF}AH|Gv0WIr_QiUZqEbMqm5NK8C&Zqjy6fofJm#A$GVNcJQE3u~_YyH5fY3 zSgSLfeh&FB)TLq_u%Qn~vA(2Vc<7rdjB z$RjO$bGCG>Ci!GLYBAQMs{5KjD=EV_PWpUHmzwe|f9ahe`ryls@Q&*hr1_8qV|XLr zb)smcukIVrqV&=t;J5PnEOV-h_^l3?yDh#q@?;UCKMIO|V2%TaQ*od~LkEm=EPfE- zAIXjSUjGhL7#9{dmD9$xUy!IX7z@nSRmOb0#ZKGeKQc=cg@R8fuVscALt9+K*@>cv z{R&!sc@r#jGLk`|{@r_gt5bIqOc+GI*?M}}h)53h1>2TYl`%NQX+vTgwsBXi5RWpEp4*~P$^^FT_X z24X5iVMv0nZD)GVP!}KnDv+HEVc}qamm^FGi}3QQ(0U@nPOl-EU2=GNJuyXOZw`2h z!~x9lb9yy>-{cKcC9^bCKxU>xgaEZy2KU>9^qEc=Fa0|pKVJTBzUHA9glYfNJE2@E zzJi!K)g6A6VFtCu$6)RCdnz(SEgZN*-f%meoX~v?E-Ouaqa*9Xc6R=n$3L>I-s|u~ zHLB9>ayoxQ`u>$hT`y>LfVEv*|NALup3xKvT={Uo`({W%VQ#ru=Qd&XWj8wy7yKh& zQ-Et*fRj)8!6+xSBRpL=d-xImO;lK3=baf)xH~qWnmzdSi@uql`?m-!hK@@KnPK7Q zNg-X2Vhv1*ZqirGJwHJ3-C&r5x2$5a#4yqM&5ofU@6c54UXu@YJ!h+>JZp?91@}d>aw=VCv9uZw&sOyE&Lbyto2N5I@9V^XrB6s zk=yE2lr)WB?3vm|j_;ATh>EeXA`Z|-{X}dQ<8T)|-HcEKF4T|mh@0v>#a|xAR>vCeRzT+l5Qnr)H=W}?7# za>VR)zjut+K!yunfe~nwWIv4;Kitl2XL8EHN))^>#hlmGs8K+HHy zHAp9WrcO=Z7sMb;N?E4Z zSb_bk+q<-Cnic2KvbH_!e%*_ss?}fMi4}!L8si*|!~gCPbNqO8Y#)zz`hOy-|nxI(RO7S61hWoqw zNK{iC8N%)zexjd>{_VF+>fdBQhXnbaZg6g$1Wu_yt^lp30T_B8lI4Pn#?j;}rxKlv z!W&~pY+ZJ399p{VVwR(tzO4cGq~?_XwYFgCs&g*XB)hbY#@pg_yo3soc61tvk2@f( z*kRh^tjOiuhp)oY7;oHy(cO|OcV>;%k7ri2r(G*$Ke-7;Us}I&XgYL^^^n)!lgNQ{ zr943Gd@4_2{I7>CrU(MnL_2GFYGm=M4tJkp$igb^)4TT_ShuutuP^GBEU&g7_uBOP z-e3BqE9gAy%lZsYOuGW}hmLyh;Tn>kc(Q(5DTN!f_3>(?wjpwhV{`0=_rWj+&DNpH zyM*uJUq`7n-LdcpDCibHyF>0kOxB9X2S4IfopGuYWSj9?9@r=bdqRU#Vraj1dYH_1 zKke|Qe=7+SL$91^V^X*^U)hPs$S%AWk@}EBOw+IG>Yw8Gy4%Cg{Tp+wZTIS+*+X-h z_==p_7Q~;K?NKU-`P0!%wD97~nRMo1n&HPosr7g(x&Vu^iy+lhBA^91aV*BS6PRK? z+bOD!;;0@!jlE-9HX%URNfCOM!e$it*9j-lVn=#Fq&CAbg1tOHr^j{AUm|XM+t20_ zJ*(gUdhgrh|FHE9!J-A*lE=1f+qP}nwr$(?xyQC`+qP}qzJJpHb@Yp@=%WnTJ|wQ!0&Rlc#yL1 z$9ahv9b_qN$bW@xX_uzFO;uC4Ptd)-epn_@Frqi8#&1RsDvT@U*Ycwb87@A`v!yHU z)4PXF_6!E4x?XY%7LmOe4j=AkhpJxd;n~p>ti~DA8G^dD$T)mDk{m=aDgyZmE<(fS zFEAVr{njZz?Z>!jZ7=f%;=K{(Z3e#z?dO_G>j_(D2nFqjBcIb7ytl4pfMb->4#5X` zyG03?Q(w1fejDIx%WTu`xxT~(?#mA3+2)>yJ-%AVz+@8YQJp!-);R!zN1-H<1U5Ga z1_^2vife%6Z=x}=Oo(UQTkNt3|Ky5bEPJrok&ni>YC7@A`ku`bx7r!FX()_AKs}?n zJyNP$>dNfHXe$vb@%5qMSCujIFwRjt`>gbi?4Bx&#PB>$O6Tiz}tPF7tLrd zFgy>Z-rv-DzyGQwLIA9Un&WBr4z$)iw7CMfMt>b)j2~%#WR$_L%z1Ppccib0czd}} z>Vd2Ojt0-Au@CfA-60y+>vdloFd-Y=KMzjm*3-HIT;$d}xdM3MrS~T^Bx_kI$+J{% z{PB7y>&9D~?+vTrpKV|X*bp!B@26K-gtCw%<)R#%Q(X1h1Wui<&&*QHk~G$bFp{u$EqAPk5( z*W-?-XU&seIzacwfWnP#BA5fZjC`bHKno}MUY>DMFsHC`W7iM**||Br+XO5l(qcxA zIcjJc;GW=7aWPHx14Cthy>$psiKYp@h0#?u!RDewBWDNh658sP#X0Y6BpZ=0X0Poe zVLjHe(wpL5IvyJ9AGMI1pT7@|1A)pz~v4YjKqmr~dr8QH850^Uoz)S+%D z;J4=XzAzS@I}X0z<-X2ld}k%v*_K}70PZO*y^D@N8N9r`{5s>xSWB^YI;Plik5DGXO1F8xEc+Ik zdDGPnR0GWOv(*9e?<}&3{r9xJL0A`McARTP=`mCl-}}$BpE)}rz~6OoPS8{>&<-m` zPCLBnSUYkAcDuRAayt<{oNk7MdBQp}^UhM@>PA8rfTnWz?cjxOMQzt7r&P{n3S~ z(?~Lb2q#^*UlbJq7>R)M-dg1g41R~2$uLvMhN|K3Tm%>A!YwMRS-ZZv?aJkCL0g54FEv)-x&&LPg^5<8+}s`7binw7kj6FM8yo&meWz|%Z`4a z6~2g>WvS#I%Y&Cn_LbLVYFko{otPR&GswoY5(#>Njb-caTMjS+Nq_dS>tfnbg9yM` zRIqcfSI@3VTdy<`(y;WP1&WCw(o;ACsY`Z)L13N9ND}M8L>MiFCW-Xrt5;94LoOK& zFvd%)f8=1y?~O#YT<|p^)SU$18=~HcNUQYFyIw5(QLgapkmzYbRl?BUD-=qwMB&nG zmLhR+%q`eB2p%A;3VUKKXLeYOvY0N+;*@Yru}u1KluJq(nc`{E4VqAgHa=EF#M z0#<@!SV>KXJ~%KExqw09eGW3oSqRO=lu>CcdbeZmq@W-*NbTgYfKb8if}j>NrQ?v8 zqzocIge*7-4}9{NS)kd2t*p_F@AGpD!~yO^hddR}!9T+VCq5w#jC0mq!`9xyY&b7d z=e#?IbO6c)wA&2-)B$#&ae+Ej3~-=CKtzeA?OKXpI*Ak!ckBbaCqRcs4qPvz#gGwC zsxbRY^6neLNN?bq85sJuMuyA`L!W(GWx)Y=ha3Ia-Wip8_E;gMJy1W_PJSI=dA)7x8R9XsPk z&L1uDUlZswGEKZ#0KMmIX9Ik!%Y~~KM!(AhW)*12nT5q}JI_$K?Rx`#I?_$Qw)S>2 zek1X1tsYmnwdqv(omPI(c)|RFth%R_gCVe;K|l@<`ct0NGssu;_MTLX~T0b9UTh{Ky7abs5UpVVgUv9sEJs6TjR4LMvOC zB%I>L*ZmamaPn5zAJk;e2dQ|pZ@Caf3NBNf^-T+hdV>k8SikhVrIiLlbCR2Hn|R=6 z)I|b|PZLh2A)F$Bz6FvC4Gy`lH%z%70oDaa7$c~cfL|X*ombsPL18w#aN1~vypRUs zONgOpv(dHO=O;w^AYJQ;vc6Ll9VeJiazzLpGVSLYp_p2gy^ zZHWzeKbvwOF{1C~1512O+lfq9OU?cUvNB+o5_lQGqlwbBHNZ;8qGC?nXxNGY!@P+I zK>~(UC-E-eZVYKknb=`p5BT7ikeOnMYg;yAY+h%`GhkRF)ZwfI890NjaG-c6lqbZA zq0c6OQYX~?bgVB$U8dVv93d-K>)T_;;KcSGst_+7>eKf;e&of}m5+S8aAL|uVW9yQ zh-f28FRpMuh@BXFI0-*^M}L&RLucyF)YFdxI0Cn4akWw)3pTm*&4O|&&aSHZycAyb zfinz#t=0@hOwBW%6Uqanc`8O|q$Jh(G8$in)AeHR{9cPSNA=+lVi7Mk$7VRezE)k@ zW!L!A3?Sek&#Zr&-S?@~QfJQ&wD;#jTr)(55_JeG&dqk!WV@|~>uiwn#}KUe&foX- z=ViUC8v~M6Lsih0>2=rJ*xuz7^=x|Z;0lrRt(Ip!rs?3=#^bhUnnh`{-l@MRuzw@q z=d^|`!T$5)eEUmfZ!UJ<%OIDMhM4>)4G3m~m)+PRlwHPzfTD^E)jakD^z4om zh-9AvQl*Ksq#O7!O zR=E$4E{Et-B?1#Bl_WMUT1+E1R5S@l&ax`bf*2zNR> zFJ>1@4xv+2ZKS1!u14hvf7qNyJ; z$2E?ok->QBgjGKhT(UZcFrYJO%038$2)G-AiH{f{cTDw`N1DC-S%`_wmr8q)sXFav4x0PP%XNzf;=sA}37e9jys$xa=GZJ6Na1PhJZ_>s*~>PV)?-N-T5~wgaCB%rNAl z%alF4Pv#&^pfLm)U+vbBYvP0i6-ibP(d)@^?b)@)ijA1HeL$}1oR9ha`T2ec8{rhI zL$hrK5`@VJQh|7Z3&bUTshU*w))>L|6-P9%NWtgMG+YMYNOHjx&9Ddgy2BYw9#L3) zr@f-4z}dLe&3Zvo`UFwAN^ta$%K%kmLAU;$#*=AEOd`=JD4lZbkqy+=2^oE?e^s7# z(<Cu7s+X{yY94)bGUsCGkd&`3GLy4`*Re@bI+Z{xn05(jqqR00ZeA7kR-F_K zYJxf$I8t=QP!*6da#`H389IgT+JsCF+S6UgkA&s$c^(Hiw^9&&r=sxkR+n-5vz`Vx zzxS4ra2pHdjXSs31PuW8RF}qI=?b8Z)DEz|B!5fnFF=C5tSHERG*aT3oU>YZ#(oQw z2a%o$d90zcXryjJ0qB`4J5|jis|=Yv#pU^RCH%(g?-<(hTN)h6BX^ zf;Njxl|-5tJ2szr3G${FN3!Q7T`_iaW9Y^=ta0|JnP3rCt6!@%Wq*5mxE{$yxLzV0 z)%9ZiaS&;kUJhKbvV8uEhhx{=C=Bn^5dx zorD~L{*mIYJbAZqKQ>u}sCYHpZA9FbX3oR0O|Za-fec1doS&?Jc_ zycfI^N|S4Ctj(>v*daBp`wH3G^0YwCSS<9!xCxoltSd?&#}8&p@PG8m#hj6#$)F-is( z#I1RSJGygbbEy*g2+igEt+ng<~9v#f1kTjG^;0b;%$o#(D z(g7e52yROH#9s;%0Y-zq2JY<9T|9XBc=%01{W_0`POuqv%6Jg2Q&DTA(0=^L4JtF@ z#?rAh7;M>SlH!mpGQ}txCS&^y+cXbJ`RVsg6SN9>97&~E_B(nbL_ z!!k2aX`&dyU}HAn!5)QIcO7%er}M-b8T=~@&w|nm=wZqIX2mMxG;pcCVbXniOc?DZ zkQk!XSi+Chqv*XRlS{Y3J~c@;L*`gG>GIii3J z#h6p*P@cFGH(=ixI*26ERJ9Cgy-~WU!W3;kIU&<_n~1Z}LEH%w#nB`0mVQmnQVelu z6Zo;b;K=#+xzE5!KN(JUP?5C|G|c0OV~-z>Dw&hktufGYm4tgvNlP0F5Z=sE8!@lt z65Hi6<|P%4D8_!UDd<5(j)Ey)Wt(mWSPUjufAY_C*?tjcA*UGKSWXlkH)pAXcz}AH zq|<+HsS$u-Is-=;%n&IYF5hy_UkrV>!gwcX*X{1);?>^H!NKV_oQmBGPP)f;x(Ihk zxaO7s-_aiIv%aDmj_=|*76iEFF3IfxigO&SK2U=DB!vNYO@QlijQ8mm?nZO&Yy*Cg zjPw2Ax_?eQ{z=pCgO#GGfZ7ngXU?RmH<(DCqFd+F`UD)D&y+2Q1>Pkr6;M5+#<&lw z6Y5TAfT$A2r5tS+&}H?AaegFdb4Uj!4nAYN;G@$>o%6(kY2y%IWe6H z4g=IS_!-Y9zc9fB4rSR8ei=!#h%IIbZ@98Xt?d!^%LN5q3S6!Qp(W)AGT|fFk&}lB zMfg-HLpp0_EeW_20>F+gxE|(F27&D*4N4IVf?g66Ic=CSACL;D8Qr$OkJ!QfQ`Tz{ z0Ac5&Qf>XPS42q7j>Bk^6#C@N)n`h%v<;YyrPAHCveEU=zeJi$JYp0b<;mdNTb=MA z`U~`620+H5Qij>OiOa%I6<)W`^5J8wW|U(xe^et_?z3UTF~slcsRtJYzt2O=c>!#?#ab>bCxR(>MT zmxZS=dO7F7h~}v<>3H&e`Pof^@SV^Udd#bt{vU1Jy zlgKM~Y2>9;7mSL!lXvmhdcQb;LfmKr!gXa^`8EaHzO zF>zxqaWwPyYIU8wTyhx~#^J_ktcciq_8pc@Dn(q2R)25mr8TTfioTJd#4M!~d>{M# zG#!JrIuDfd64jtZ@;Fx#rfS6$;t3ru4H#q2%(fG|X`>oBm^U}c3d_VT0XPuDjb-$6 zMTsT$VuG;f4$=v<%1OC>p1$dhtWKq~%3t0~XH-+B!Fav;+_Uc(69W!q+oQKi`0wkj z7%~6Un}Lg{@b!fkh1uv4e)=GHa&P=Nw6S~B27d>ifX{~s{>+zOekeqwc$ z^hr(f4c82i{;WNAIx_6aCDOjy)!lav-kwgKR({7?e2<9(wZ zXCaN_hu?Suc}|=`(jvHYCJsIiK&;!6oLQi1CqxkqVkW-^DiIq752 zcj1u%D3Tl~@MMLAN)75rDF{XsHQ;pLW6 z)@RO2>#3TNg^09tZX=*${bQtShAzPy!l@ieoNVpyMz(Mk&H7cgGkPefZInJ&YwA?z z&@9Ujo>PhoPg}QEwYTqmDxG4xREE|t^fB3`VhW|B1teQTXeHP@P9bhiX%Mdu^+{7j znrj?hUzQn1N9^!mKGN1IVU|?*tY4d&Q)?^%+1PDz$D>YW9=Q$UHh5gARd}p)=lm=Q z9|a2xWMyYD#vcq!jw1x73L4wUQ+Wwnb7)`DO(PGxhusr5C`cYyg0!}!FU-6&O`W+* zxF6mOzUB#B^~!<-Hk$&MHUhxH&->lWavLl*>n z1yQ6>{Q<<65ybm?xAP||_%F*xB@ zAzY|r;7qA3II3sPXBD4GTiH81ZnswAd#5JAg6p$@pTSaLCmGOT?HV~Q9Pa<@GELkE zpDtWz4iHxA^96e=ksp}X#_fD@yQ9U{5_mR7<*P7w30bV`$(VfkzbvxwIyk11EwcFf zA0Vf>c2oAGuwTq2PMvRiG#mZRcR3evtxNpOi9m({a2bOkK^G>a&f!9mAiwv9q@V-AK5E@E=C|?E@h!1@^i5Vwf*WRcKW+mml$Vlf&P_hd2f7?->QTKT(GOm2(u^j`Mz_z85nTd zd5!~!C*(XRq~1%%AU~TnKSI`Z`)oPOcp!ds?y7vXar^yA`WqREFuFCFeK5^t*I+?= z72hj{c$73HpD~p?4o>9`t+ELv`fdg&Ib|XACF3RLIfD=xv&rM)Mod`5vNM&~&<`W0 z95wB^^_QSa4js=db!-SD!f!@HcJ8~eY6T2O6FT+>VLcpMGKq$+_= zEAPo<-!KH{$USty#Lv?*6*86ayKjXIe;cLj7q97jKRmC^;7sdaFj@#+jHBRsa?8Hu zKF`54v>h8DvfN=#J?vBA-`_2IZdOeGaQO!Z?~>qp?dv z;8K+=>DhU_^&%uAz9@R>_;b6wz`_g>t}KrA7TN`FMUSsE^v9 zFGQyuyEGB-*d_O`*&BXOD^b&r;W+3jBKTq=WT8IY%EQ$*anbcQ4*~z=!Vtf#?c?sj zvFBpYczj5bmfLHw7N#PTcOQfF_YQpFUn+yP{UWK!-Eet1vAy4Q$y=`VKQ3Hm{%?u^zN_CmN zEt|OdsMCM;j5xZib)(G#6Yk)4LC@^{wJ`36ho=NW+O~m`?vC+Q^f31415(?!?yc#Z zICo<_P_^kf^-vh0Vr?7B_d!k=T5slSO8RCL=zIDj9vJqt7M&Z(2r*H={KhP0}`sFoK z2?jHM*8@!ySFJzaKTn`L-mWme#7WQVF}}dwhE7_)c;i*C z$bIX{@wP12$f=C(?vJ}&5sOKXbF5z?9nk zAM3#71^|Hl|B(;>2nhZEG5njG{+nZOcrE{>(l_pXP@7%A6J9Aco1CrJM^$h%;#eP5 zk$mWCp+XUbp&20<04SiTm;C+h^u7fqIdRR)RY^#Zfiv^+`pmk`3;yx@E86s3G27&x z!`j_DYo0Sxm9@e{`?~2fuo1n)ZlgwvR>hQGVyl;WExfDOpvl!WIpuz&cXv%?zg{${ z25LUl**VL+Tc4r}RbXq{a$01Yn0BXDah3`@NTqG8z>C>uL}^s zFz*z!0QN>seacj|ZCI#4RPPkR0VMUT8?4%c*+?CgfS1U`%u=zwDre1-w@d5o^T)4_ zy{-yWI_U$7GLeKvA7QwerYddT+KX3pwR%cZucA)1nC~C~q(D;@ML^7^l%S`Uxu4WK zYfeiMTr77ss@bx=wm4c$fI-is=#a^_F6vuJ)eBgEB-*V<Tz2vC{l~%q{aBq}nB(+Ll4Seu+Cs#aa4g(vc|iR7a-iZYwY*-O;Bhipv! z*6R!^{6Ym8BGvp7(IIbMdjNIo<&i%J<0p(sY?hl|ti+1yKkyLSgb>5dr0`?3`Y|Gm z{am{AbXA9_K1inI1)%qc&82U}X6o_3jp(Tfwt>VEE68f&LlzLqm!#uAkigRw5X*t`02&dBK`v4{XnFc5JLqA zBR=D=egq2~;2gv$DbRt%!%y{8f}C2-TI*mRYIUS?SfoByaLd#iHFN|3SaW;?P&=`% z1LHLj>s4Iu8{ReH#h2$m$`7+KkK#q2L2c@)rQxwc2?5t+QO&`cfR`cFSso+=mCvY4 zpLC)IWB^??dQc}34mQ=;7w~fnR)Hb9Y6i`JZqQOE-X)?xUXM8d43$3vLy%X^fl2O; z4~D`e4#I6;5c=R z$>FN->iu?|G_W9L+O@L1&S@Vox~tIGkqr@@U94t{OE+i1FS zES6x!LIgujO^Ii5jh&6?Te&FSBv^FyEP01SlODsDzCxd4#iCYjSP2bS;AHT7vol{2 zj|I7M(NZq?$z}8<{EB}K5FUgDU^mzMNNhIhfD^$4UbL!@UcKN9>J1!7ss`p8xjazc zM6JAbJ7KlI#ms;RJTqR=ptt;yLX&!7P{}V!4(Q|;C&jz4z!G3bJf{Opl#`Whst1h> z97aPr9UJPs*y`%nVYR@jOaU$J1_qY~QVDp7ts4v0AP{ddm^YGJ2e00(A^^r=y0x1@ zm{y+n=O#oYV-tuOT^capNMR$qIT3ZGpk^b20W5m!2{9X8mCBrHDU#y4f>ZV5A}edL zX=g(B%~vhDmA@u+i^quidBBJ+HUm*nJet)y?IK)>BVn|KT*L7%#<$mN{L)(8tK2RQ z(=(qScw;wKlR9D_=IG-Vl-DE;O8x)>tvs#?11+NZ+!#5%gv+Esr+tHz(@p_-N`!++ z)a>$nST(QTcWIBGJp`U8IM*~ntX#_eM1e*2*d50g&@kqGtyatCQu>IlV}OGC_b~{t znT|L+L3*gEAYOeLkflqZi$~~k^H6%yQ;fOT@x&1N(86Q*lPvKN_Z-fXC$Jj%Pr_+U zb-*e)D2AM)6E1P}r=<2>+f{P4KVE=(Fo^*JKPZ1*5=HKV@jpR1^*`$2A zxcpzAlx|*6z=Y59=b>%y`}3{`zHfI1&+ixZy_Z}t^D+8(y*}RH&GV~mARj3tz_N@ky0Jr4qf+fNq4KM|cr(N?=Pj#(wpaBd>)PH= z_3v)f^4&Y+^+}I^Z7R5)zaNd2LmTk^j>MwGdv}f|f9f7ZRm-}ibU+amRmG+)H8xjM zMYmMdWHFK(lo|digPs>kcDVc?4&f!j4Sagrwx}Pkbyp?Mk&}9zSUEyc3}WCAhG0A% z(;lr6nK(bhGTeE{3BHtDLm{du&I-pu&FI$iRmtzy6^AaOhUix0>$eMi;8IYK$}G|^FiiDBsDTTYA6EJssmr(B+s!!!a7R@hN5#$0sey_*R+Rhj^k4t5g?WeYI{173BF4@L4c5UCv?WJm>)bW}xX z$6-lTN2ch|dpSs{CRE{G=e}8oP*s$x8?(z(eqoZdDOMz!c^O3G9v?;qSU?s*GR)t4R)n=~HrIiIc@WgB3)PzpYm
?GBF9C zDu*8C5M|=@(Gmh!n4YrH+{!9OEGLzl-U^I;)T;xy{3!ICr&Tk{Dw0f0Uj@&ELO=}_ zWvWeh$WuR>E5HY!4)LjBPO|~8_r7nG@n8gvw{tXqRiU`TjQCPn3rJbGYPH_mD!vQ_ zdBgIvr2;oAwdglE&|O_?X%50M)9#l)YrzZ*+K~QD)iYbQ`E*$k`~b8`Y74q+$u_SG zwv_6o5Cd5-sr1u@;&1Y}^q>g?<3pQtMG?W=)HVRu%YI5!ekEMu^S00tsMy&mXMQKR z*F#dIu_kh?P}UG%F2@T%E6UVZInLl+$OJ(EL@>~V(!JizTJGCExI3_ghrtH30l%by zavir~wAH9}N~;g^4~>MQw}C`*?% z+=3-95NePpAzpBcKY*1mDLxYH_oMuc5gJ80E7e|Juq~UJTxOCfot#f~crLM$p6>c) zC?8=!NvYyZ`Z^+rB^nn}0DpjJK)ax4?@gsjOQMpf(JFQAv?#K^y&o93GyT*nEwD<} zsp0{jpPhvzj=n8iq%@he*qu#N)oXk0tObbvd#g@!BTuVzPrJyl=3)!~Ye5F3Av z-?LR-6PWuPu?gyoI1FN;48x(ar6N?kB5&^Nc7#%)K){ar zq9+6xQIMIr11S@!?ISOOm3-p$Cg=J=Em8nWr?Ey9PB5TMB`Y*Lnzx8#(y^$4Fs?)= zG5D9sBF=u7@s^s3t~O8BPBi|A2iO%hSQxqbyfup6jD3Y+^0J&?>}yAc-wGQ`gDJf@ zVE;ojLcZN=|9QLRzC0a~her{fX9_uA{Ik~QkStYTZu`azFLJOsYuAO$YS9cM@m*?= z)?>BMhxI%0_DFZ1xckBsNbC;|4NZk;Z(SCnBXNkR(P1wu4O?L+;AeqLAb-tGzd0 zKzAVugMyM9D-kDNSb@bG_`B6Pdt{|!Foq2EYcV2bnY2s{d`e2}kgVtW21E1WyTljU zduZJRQQZ25*TjV}i1Q18>sEKOQp$lGku`llTlu~UbEcuTUeENqLvl!!ORTw#aOD-P z_|`~<8_^BTkXh5>EztGE9UajEC0xD$$}K&Z7Kl#1TIU${_2u|Jwn(a2S3Lt$_9ixK z>rAPaWu>=RDsplQr-)&F@>-WIUd#R=j}`k4Q`u#Q3eBIbt!qK0@x)ZhSI-uzqfA_P zJrIQ3W(1W4Y{|8F)Mp8RcSOL@r#v{8QIE{Q8pT4dA-UQ4|rhWZy7+e1>Rqa-} zMSOCQH*|pT4}5-ccew>!W){3Rsb|MJT;tt+2?Y+-vy*DA3qDJjUtPS;jp#?P3_G%X6k&-tov^m5AQPsl^|6Tu=hc_(R^G zP)xsY77!A2f_WUDmO=lTOYj!^*f!DjYwxvhycI~cNa4tDeGKuyx%#Lm;6>#`&T=LE z)Z&L$D*1EQ7I5A8`(q3PC)o>gQ^X#K5IbA!vnV=;dL4X*55hVkdQN4x^OCO10jLr3 z9GH~h&HBoOa6NvGXk7%tAO8K5p({^#M&XwF#Jz>3nT&|HV0PA6ghQn+)s5JuJ~1RR zsFOPivcSv>MpbZ$_xJ_pzFvcuZ@1>_KU7DyS?Ei@j*uW_vEnXPHqakE1f5gn-@M8K zvlA>KWXuOHT(mrnX8dowHK#Bw(?qm|F4A4I==fDy?+IR+4M*xcmK{`k=_}!leStUc z38k5zlxaMf+prwR%V&*?|DK=$hAA9J%ROW^Lt4F;X6K6{#%F z3Y{a{+)c@0kKnmQc=ha-JM$v|xvs;i0J{<`CwF;2Zha8_{-;y&9#(t)x6V<0$PF{N zE;k_e13qBsgT_Z=@2Ilc@H`BG!3|B}jQYPPoHpS1e@{6*e0tix5Ym78f_DBLi5GgR z^&Wq!%n-_PV(c@fB$3(54XZ%z642(-uV;{x$8FvChgfB#q;@Q+0rTLgW>EAREhoo& zSBsnMBJQ*E&5nN&_4#MaJaa>vRVY8DG<~y8s#+0Mv+MTMa@sx@X>gv-Ih{s;`Jf(; zA4qKc{pCH=JDsPG(|L(E;z-r(H-8Lln$yI)n(xxO;D#JS!-E8fKd0EjP z`&3fH_vIlzW4VtsuiyvdZPk>YvY9MB5EN9j+lgG*s9M^xXqsQbbFztX6VS~gTybZY z{*oyekU}ub&}fcjO-pyHCTbpYRtd8!}c)0C{IJL1&%V;yW=U$~R-jEdz2Km>(KBXZB4 zA^vPxOzZZYNrWHNNY2J|$Z*vQQn!H2I*|>+ z37xSZAZ^_}3uOPL3K^IPNDvHJO;}UNGeiQIP+`6wiGA+8s~2QE!0FZ&4b3!-exEYf-R7fGXvsnEZP81u%VrO)BC}Ls2X0E@}TF-D@cmf zJHPj7&j;AvV)tRPZ^X{R>ru}qyud1kezWE6@-Bpt{+n#i7$1RP{-s0priXVud`$wo z4+k49QXq&9>J)n~ws_#4Tk51!0;x-;H+L31H4*}{sE4;)# zgd~B{Sb%(d0>q71?!3pIIs$5Rzba;} zimh_1o3gEIESEbAo9M71-$D$3 zMC?peUYIPOYDnmymlWeXI+9DaOalM~O!j7r8dmybmojmZ@<58|vR4FD8qG~kvgYAr zPM3Y6N6*kLQ5$?U?$G>�bnoi7CD^<6RJB|2DxZh?(#=>9okGV$v-?m}#r z(4QmrCXk-x$ahiXZzz}g-j;N1JL?MpC@urrQ3#pac1=oKueymVc3p z1Nw536lQWrn;i-LAsOkCM3BUF1U9P+rX(Q(=G1?Xk9HlbU4_lpUha@92q1-CS4ppt zrg0K7S7+ zw$4L~9^2TI7~L*fmV9wJsI=4uNh*CCC=-?Rd@AV>r0KE0-4i0L)shOewFh&(6eF+F zjyGI(`&Q=f$WA{$?)qK^DYZ0PU50$A(GraVI~=`T`0YYtqlZ@q@89;q^5Xkoa5l6G z(!KF-?#A}q*x>1k2FOvUhr|#NqTh`t4GU#^Rb__uV)_>Yb!S4@+`A;_w?;cWnve3< z&t=bN-@m2#29Ww3MXYEVofrIV4qbHr(6PQ6Os6c4t_JAXj*xd=Y|^}hw2ggr@Y7ly zuoSJ2-h+dynU30B?2>n^I+t}u{^1x3`TjW0j)rred2SrDQw4gVKK@akpXf&^JQY}& zznbrR1(C#YtgrSP_NBT`+k0UL(d*UFwDCG_ki+&EsPLhmv*nsr&*<&dO(f39?b&j@Dm^`ql+>#)d8-inRp{(c`2`yF|KE)3e% zcF(oh!EojC>`rz9)p2JdC+wP`E1#Rqt<$|#VCW(?7TL!>h#6Opw@c(;AHyj_)mL0_5IZiJm*z2gdf19?Kh;9_@8G^ohuoXX#0t8!$#0@oH$YMp$ z$-1LzYeD@11|PaRrkgxB+-?thG(6%9SXaFZccUMd0kJ5>7kP#2OOufBNF^H6n>%Uj zu>cb`^zL)Fh+4TjLMQkHZYIk7J|v6))ADU^)o=jFtrXRc6*UyR&m zL-@n@T3V~5Q^PfAnuM;}Vgl@`)9&aejam%PrDiwS(v7u7%}KijxK;o5A4S3_2rN3J z1yEcxFaQ9Ti~s;c|8-wu?quj-p>O1BX=7sgKPIUYt)2LP@={3+ z55qCGo@%LuM%$&vB1+IBDb~>ATpKI5aFQtBq#Rvce~nqQrs!zft!vFZ`>$Gi(LCl3 zeiqv?4_$WL86|XdHyPY3A-2~RtJ_->-!e{@G2rjkCjGTB&@Vx>ogiIF(7(S&0o5KY z25Ufi{IB6s87+#FbE)MkPVU>yjnk?7whh{8-A5glo4o=PDWdjwtxtche!- zAZbG?GKq34SF3Z}xrv#e#pXHzQ?lF6j ze)&D!Y84kE5QK+mke>LHJV-(G3JECmGzl2ndM#K;kL&ry-O!QhxF(X;X6=_#~tnVE$bmPXfzR z9j)J}{Q%iBpnK(tx?>due2BTR5s)tb?KG*ITGXKS;76|K9_bW=zBvDF4T)XNt@cJ7 zj>HqAvUhpo$pa4Ob-9 zs_#Nr(gy|k^&*(cz}pB8(Y0h0iBO`Z${lojWH`vynz&Nz&20OXSDJuhiN1h_#irGY zq=Wu3hF8u~>qk$fNMCcK!FJsJ5Z$j4v;naY@NOmr%6pg{?mn{vTEIvz(BX230R=O1 z>dKa(U4P;XlnadW*bTETHcgacb%<14Q2DkqAH!NuOim}oR0%^_SS7l1Jc8$j5%8)j zr;_N-JQY!z-k1`Ui9zo%!T;c$Ho@KvT+iU8#K#{DPRPNQS{oh6yaEBP_g%2Ina9tPOpvA*>xQ6w0$+- zyZ{=X-!s$?EwEDjS5j0EP3VEY2?kW*q=P6KH=xMl0WBvB0J^5EV8-m3dM$5<*{@U; zC)_^EvH^r*W|CFLe7}&o_ZZm53esZpYP@A6Y-6Y<7dTSJ(BLgY#cS{5 zc{4-_zyUw$5kp{bMMK9Kw%`A(V~ys|d6TEL7Qtrc_iudGs|7EapYSvn4V8pLj%tJ- zXgUrLE}x>o-uUFH=eErtqk@adaAc2x#;EyefCItT_xNJevT`lT9XJ!22$oN39E_Ue z_@LIV-%8eI}V&pZ|6zMtsS7Y?)jfFh=&gR!xfCB42zU6hrv zs9#P6VmK-eT~4M#r-(8BM6i|-JI!4+yz|FU0Key6mmRW8Qgr;IG0(PQ{DN=^p^npH zxLWv_veNFaO!s0ZVSF+k*uX z%uGghu)p$YlDeM>(*tysIt_7W+%Te$tF8rV<0`-)n!R#M;CLZNP%crX?C&izgwBFU zHKq~?4l~R55JUL(1A0R#OSi5kBRgu}Va*E@ys(0vHK&4y58EzxDM~P`Qj1@~pHwLQ z%KT0Xg3)G~J&iwe(jLsr>`BS?v*V(d-E*YSneQ%Xt+kZNef3Nc_-w$!2M2}{8GxmSH>vGhYX~&D@Tb-l_-Ir6;@hlV$TMPH>mp$6X{Hc zdz#Tvv6!gF*<`zCy1P_HG#HhoJ@CyKf0apWH6Z4dTds&BfZd-rTkJo@!H7~(i@}Yf ziFi1g7sy_itf$5NAI`RN^m}GAH%)q@m*u2aq&b#@H$o5Up!8?2A$+qeTL^vxB-3Gw zb7szh&yYY4+{hH7fj+LvD2pJe-j-Z@w+PP6GC9-$UR3&;eD47~>I#rM7l~$^YxyU~ zsU>I%f%jM|pu7^h2tfnSbbmh_s94r1Lp3G6Z zYX&(h-s`@9PAE$%6crXjVon3vDh`Dse%&_$$I^QP{1zE5ye=!T^~d(XZHfxIL;bX@ zO>ID*Wz`CI@Nuz~03vcCqsP7ma`qgMoI|yMg-O&z>~hqq^W;ZdK+|?%RM&6si-G$Z z%z?MgJweGAtO)agdcAa0Q~C#l|o2=!s^T~YPC1~33%K4uFT?(j!b#)C*& zjAz6#ESBeLM-}#_A-$?qzpRwVn}*Or)Rs~q;N<95;IPq@@wsYXZ*iXOsY!7^k6qSTCYEAwPns(Q;yTTuC}Ww0~RhZ(v{?7>c)i{?0V1v{4jBJPqY>qeC%})c4>YGe^li zgYR`qgHzyQ&bit6J9y6zVgshCwt#Z5*~(dE4@I@{-x{z~>XN8}c6+G@_4s5fTf{KF z_06e8@qH3;Z8XPbN9BU~e(+JkS1Z$#$z}B3=T(gjA-xv-!@$En)0Neixu=!G?Tgsl z&J=V~)bw5_`Z!z6ACQthHWjSg1uaBs?qvEb)T~_QTX)HWzR_S3DuS=&Rkz85cSP?E-tJQ9H1_1FI(yPxK7gG?)&z4jinz)XN0LY8J$( z0z4p3(;?_DXTxy?#`&27Rq~lV(B2*s6r@qU6VKVvKMErV!=(>NIR~#{inHoOs=FLi zFrR?-v&QEW8wIFc$nBUj0k0)o;PCx^@#SC#APEEHlg<0t5g}An=IIJ7HRYD+aQ&8a zS8l=0G0xeF0yBPsSdFjqS|zUMq-iwS4sJnRf?>pT;${ko7avS~0o z%9rq$HKTAx2N#iL#$?Zx<-PeN4SSFcEB7RkZn5DmtWW?SGC0n z2_Z`=LU>rjReYxhKC~Am+^MQ1abkatrRq6Gq2uXU3`lci_Huu8x@qUy=E9}*i7aA~ zWyZsVDcD>`Cwp04Mo$9!Hnhm`{cpm}{}4{pInd-!zojt%Q~&_1|8K2f=V0q%{=3BZ z58_mKztob6?WJlh%u{ca{3dP@WX(;04HR$`^8y>|1 zVJQjEH29XGm;M)@n@A^&kks88@%}O!Vxndg_~QtPmACP|G4&RuRqxG8VUG!(}~T&1shD>7pg=wM$XalA_ECtt4I615@!$m z$Dwr~EP42CRK+2s94rAyZ-vPfMr5&3j6{kZfk@L@J#VXgm5wtq&7P&G;@SR=SWlh6 zEnDW%RqYxNGe$|!DJ5qaC)~4~&C6&r2=2s*n^QW6G$9YXPH^Yj0*BUY;5f)OLq}MV zb~pBv@3f;*5u8ivTu?_&Mc7BG42OGShd9X&C@>G^G(+$7-ONGjwfW6?Bkm={hmT)% zyJvHu>TQ6gDJ@idrZ(F+6(@kiT)VcbcF(OzTN+=|*Pn+$C<|_}HQy7Ds&6N-_Gy>Uam)XT z`UMr+l6+ycZD;9jWrPJDL!2Qj_47z%wk_W#@|a)!|Cr+WS2H|T?-#Wmp1zA7|p3WxfF1k+X&yD4%4GF(GYBgVW}^Emdr#aPmB% zTaHIKq*~|a7oRpM0;jWMyxQrgVg>m1{gqPq@ox`_OJ@lY)usB|eP(|ORgi+Q&WtN+ zI&TX`9adIq?GyKgL5Li(2SB~Hg52fPiqUarUrM5R!T8cCh2% z&(GN6qH5_OePU6A=dkh;gQDn-x`Jb96v4{o5Ow=iv0@Y<(^V=NWgV2xC_A_G&#u`P zw5sfslI2bA{dd3=VPxo8hNGg^!U)S;2S%G6iIhuYiNe#U`@K?oj)WXZ~;|=Hy-wJDgY9v+57GPrq#PjsPdg*pKzbB zME%(t^D;2IttquLyx;4Dwc@&vO#ZDXGh&B90X3L%^2(c(u+1=x5n=di`M1AB1+7Wc zVGG9zcg33!4uVPNYSapOY*m}1NdW!lrJGq}kSW50Fr` zSgj}^<|qN|D&t`1QUE~Ia-RH}t%-9Q{)tsg6lGunUzdAG9e+u3-@DlC$ECdl`Z)My zW54xlU}=9gEIZNr-rU-2U;%yLBr?szrzerx*Ll^jDMy9dn_6gIh!ndS zDDO(ics%2WFqgnke$vAUl=8|$cly6!kgYW!*@e;rGpJno%(va3Y;A3G| zHlGhLG^I`y@4w(s@@0vR@xec~^fZrTxV*09q~ulUj|71uwaU$Ooo*s7!$2E2KfQpk zbMuVySaNsJJ}nMlV1xhnEa!g)%lt8(VK+zsfYx8Z^8d_o{)ggrGBeH!e^~L#`1K4xpaX-R ziRY{+fH2Y!0pWwHNX?ZUg4WUy@A-QuAFKeBS_4XzJD~}gRd%cL8t_4_xENKlstMZp zC+Vp)qBJE;?aC}M`WQTlj%%(YRZ&nP09pxFh`Kb@Xrw1Je@oz1+Vm1zFK@^&wmss3 zeBx1MK+~|*v6`sKLnNVft*g#3nqHX05MiPY^U^%;9cb~fgJ^ZCNL7M)4Uo3sqU$9% zsXf3}KR_|R>&NqzH*b~>hOAm{xxrz6W#F4_!nORu2Q{iWLP%C>5-^;B_J%pO`ne*A zBM5PmR`kVOE7}#!#Ix8QesNmD3PnV(Fy*>HPjkZ(uWfTN!Do26fi$l>Tvo4%K~esUK`E@8;`h&Qs^Bk7gC4X(nvohBB!iL zG;{fSZr@_NlA#s2UCK2q@KEaaIw)6xaJXS=amp2sdWc_ZngxXM`*%d@O!TRNtAtrh zsC81fgC_B3BGYPz7qFq9W;6-VGhBWDK-XHaS0HPsV6B{; z)>pmZ&e#o|APP2eHI0|&jN)9P1 zU=+oh2%pJ5#(8toIA^DNE6}Sp+=pq+VM!u`7aSl)B-JiS=>TwrKqG`Ftin@kurp`7 zEJ88LFshp+9nI0lEF^TLB0MmCcD1<%Z~5#X&VD}ggCO)P0L}*z9Y}Ed%Y@#Fb0d;! zZ4$=Hgr|R|xCdKFUcrF`PW226+dPmLRcJU*`NX7vHslY96Yg>!b0(jVYXD)7XDNs& z^5?x42~mMZN$EjfYaayej;dh#>Pp8cFVqPHl42OPdk^L0fxE1@oAEAh#*V8Ti;kbU z`KCP+oWEThJYF7a`+QsaIKO{b2wEh3_#FjF9$?_PgJo>!3tk29Wsu_~Rk<&ap8AJz zu9D1#As873;hF9m1%%>b>uDg^h&9%69x&0ppYAserPF<**o`m?nA|0d>&+>?;BDZN zRtM>u&V)vHmUpDdssO=0V-!uE_tOLMD9-cyWU}dTGplzDgVzhpOedzoDJ&!nJ-o7c z3;8&F`I7?N^6Br)!EZ{A(MF#}^CSX%!}poelt4}FF*-77meW=7e0Q4}@s`wx-ho!{ zofI}_P*jd&@)IZNPvBp6ZoO!a<7e7C=bn${9c3>^4Y0Ij*B+GXq7Z7x{w?IslgM`% zfoA-#Ce8P2CRx8((FzI?yRM^Lt!1$1)m4 zSHUIgb=THb`;uhQzWbVF#(DolPkP|QsaAW8$%1UtHTaragvP<`gJocxU2j~~?n3xc z5-L84$k@X$@^0?=o2ygv)2u1%)}u%Ba-l8-%F~Cal4tX#SzSSdZnH(dzM9=B?O^GIPW2>lmn$s8}o_&YiBJKREG3 z{e^>&{k60jkTkqCZZ8F&*N^DXjX`rIa-*{fk73y6AB`5`jyeE&wA0hO=kdu~<(w7% zcT84eLCFB9s=(ilati{z5_nSV*>+FzSYFkTJkHmduH(8Cmn*T<)UGpolk~~b5_vP% zIxiblTYUvC1=&k_TTEpj?jlUpXLAjzXFHfR>iJy)z5v?bIp9T)h~LVKXm>EaYDfje zMUN3jL{9lZ69j`;Y;orGk;xgQi4m&J$^(+}z6l>N$^_d~P)@Tl86~$d2Zr!mfuW$u zi+x0YdsMHS3R`QhfAqqmh{y|jsE5Qm;k{Gu4^EYsnpok*ZI|1ChFaPFRTYUJ#Yi{pOw(pw@i;>u-f7@0Vrty+bQ2VUG=gkyb$m7F~UhHr2M-WtA80Umc5~ppB z!(4R`#y8DOQ%PS7mtTu%Fijdym9Ih;o;MMhM`zGIFbSHCWbnq-gh2LL(vh(KaFHac zTz_0(x@);h=u&^~(etcpyh8c_){e1aMzj` zt4Z_(22@$-jUAs8ku3?bYGDddk}xi)e1Txx&y`|Fc{L0Va%aQ)h=zBQlwvFkt8`=C ze^-As5#jl`XGeL$4joDjOg6X)0YO>OOQH!Ah6#=j=dFb5!JmXNG@L1X z8S&g40Q+YvTU!p)4SAC%bvtV2ZZy|dLT>NoFo^D6n7Z^dfqk#FJj(w#el%4aGb_EE znJ@SAE0u#m*$B>nU~QAwFAIS1rO!ex%3zj?-7cNu7tGae+ z;o{_EGCFrE`&nq&{4gY^m}KiwAK3W`l$j(;YEZQir0Jl>uv<ZD|wk>D=S$js<7#)+nG%AS{y&n;B;I+ zlc9&~n9|Mhy*DOy@ttsHu3JS2bV7B2I1xDB_`@FZ?sQmQV#%Ujiqx>RaE9nB$b@yU zZSdoVXo4~Ej3afAJDz3CnJ-u|(>#eg;ure=quxqUS+t_j9((nF%eLPI|2uH|Uz0Vb zUwMnZm934juC>06zUhBW+4o!WUEvSzS*RS)vWK4~ z(YES#GZnK{P&>mMpA`Wj3ZM^71`^0wIleFn2JKc4%;A|$V}se*k(*6$DZ^dU>hh|? zr%p2#8aw3JTcy+1apO7`7sYVArS`Y@r}#TJ+1}hxP*-g>C*};>`WhY5j%69UAk#F^ z4kITZQ@b;<5ggFpip3AuXke%E)RC2W6amEJ7mQ(*N37O)KP?6TdwD&hQ zD>f|wSoEE7myEq~4bg`O{9!4{$8*c9g~tNbQES&3CG*#l$P6VM!;0x|u@2+Gl!a4WJ(Q`Z z%$>0$Nk4?BM6&C55qOGQ3htU~J6K&ui56yOi>Ocsl4~}FrIJG}kO*L&y3ZrHJiGlQfEE{0Q^{|=u8UY z@00n?S4jrxa{Ff2me2=FFu}^|8!he$h0l$wGq;&dMqOVKNdKB&Ahp~EGn717V+y!+ z2F%E~xVV05`quJ)K*#atQE>l(+yV;YrZa&hp>Gl+{o$==|02^BJ^w@?FDwXsXrgU& z6X|x9TNfI<$5QYXH*60=S9=5thjCl*nYza?!NAi+7XbtAD$^9DTi~`{V3+#I?2CdD zglvj%8@Nys-b7s@@QOXPLIjQ%SRttAF11iWkpbbBI}E%pj592j zNQJ>TM1Vxb`*8!vC0K(^VW}N4XaCdig=5;VMb)^*=o2P9ugS8^7fG;yRYDr?L+Cgv zuzWIf-ot@XR{F+P=C1>xZ%@8Xcm*O({;C2)p3_)C<+9V4c#O$Y4D^BuL0Z~J_o&=- z`+^oCi83VSp*PbY3S@g&b;2AsWbBn~A|J$K`YxFgh|(D@eA)nc=37DFEj3_3WPs3^ zfnvRrR=5WHz0r_@NC)_RVm(m7)90~G>u)Il1%P$02TukhUQe@;As>;b+5%gK$}=uR*ub}kr~D)>Rcd2c2Z8DlkFHZ;Idk+!kyWnWG4 z2Lt7?rnz5wS)7K0k_=`CT@fEhkI$PE@&u+d@pwEU43a9ko147~#!A_P`&|T7cDj z7*QtKWT+8q8fs4JpK?jEldkhY;Y0u$ZtQuy#tAcF!adkjz@96J$FVvNA1_ z`uuQj34A;=UamZlrKn`xi)nY%avq~Xfw9)bG?e7wbh+YM+DgkZ27WF$XN~cyUupS4 zjl>d+0`jV>Xr*F{ZeQe%YQ2S@iZu>vURk9n>jc$HOE&5_t|a&T62)iJFi|)Zg71Sk zQ7D6It{!;Aa{NUN%(FLia#$zp9y~l09P;ycEYidK ziD8iryVpe&M-Oa$t%4TZM4Y-x8E(yyGU+ZRd|l1y&H$5Kz9;hBb1-KyTo*BHHW8u- zGn_r9VEWTNUymOcmT)V@2niuGWpp*XtvHo1ha6%1P%`8%dcId?;?ov9xvAFIa*VVJ zj${WeL^=f-O-?N4BBQRD)4HsF^HTW#S|3cjP63!Ys)CJ^C(i7J9SW z>8Fd6y7pb{ol#T|_QPR_se#cR4R;{v);BM#CKk;VD-Bkv`NuKkejE*2cA4e|Wxfz> zD81l%umffcl|l-)8h)R-d8;MX$Zo!9QnOduEps7TxVV5R>;tVRbX@u?LVaW41Bf4j zn0&|+5x}s$0w9pCXuxDgMohaAc6BccJQj>xxInSzru{lsf2s=2@Z1^sICuar98k)i zJ8@nR6mDLbs|JPAHDduC8SYTcBic?>>)Lg50Tgz`oBU$6oe-zq4Ra8koN!l&h)_}z zR--y;UNP2_**_VIdna%d1@z4B?kx5g*ydW56P00O;L81Jp`p9Mt!7w0q}FB`9w*NP zy19B}M3ERE>K<07mnV4YHYv$sY#wFgl9a9A&$tE*?Lz^$RNTNk7%=UjT{wUb| z9&hmK+gKx3J5)xXdp`{od2}$FZdRYiy0*inAGci-yUklGkMwMr9A)-oFg_ly46)fN zCi$UxR-6{vG2by$5c_Q!kYQ019t2J%AmP2@FwB$bb5CD33|eoX;PJW}vT&TA;tcw> zEt$PZw#$4ay9V|)nX|5Tq1+lzwE>A!xkV~Qu%TJxYrto-=ggCQ0(m>nR@{LI+`sW( zM9M3g_mK08<ojkGA;D0Qk7y!8=~z5^$8tUw9_~o zm6)hBaF#DL0Ad3Zh;G6TyJqel4|~_Z%QI0YVb}6!1vz)B7s2a;g-J{@G?ZuQZ^k?# zfYbwU&zA&e@n+!Qp4!kGncJ13@kkA<)|K>fK~>+;M2O<%DW$f;j7UP zd#;UA0#N0o8sQG3z3pV>1Hk{Y#e9!e*`9;S3R{D);7aZO$>u~IPOU_p*|?K zGhV{n$sqM{X@uajo%Q+wo4O}s$p8|WO@C~s&J45E>G2~>439L&$=B!6Y9!Dq_NUH< z^xNa(`peHZG|M~Y+gue4J`3Fnz5b1tG*yZ*FaXw12Wo;FEHpyXPualA`5XfQY!@d4 zc9j_RQ!lVN1}ox|(o*`?mD%HFC964{@}i0$!;kGAo+sZb;hx$%IZpJf8^NA=O9|TE zQXkxlItva#_hUWqQa-1fyFs?wA73I?KN?nBs);CuuB|;ZVi|AjU542x*GrOF z!GZ-lgD^z=0^M16g%v_c7%_uxgfJkyQ9wU!iODxd7OO%+{3aZBia28yU{-LP$tAQ3 zJ@`3q{H8X%AJ^nc0ttMRLXh4$Fma~U4sKXu2|_WA?9BI>%9A9}C=a#C@7v4u4fWo^ z*pq$vvqu{7`4^D~wDu52(QS(&$8-K9EQ* zRClYer?wyos!9~&N-gp@%iBW0@Dt>9T_oa@oP>PoccQ_S3x9nf*Ohla$nC@EN0Zcj zu?zggQ1bq91`tfv>$ObhR3zh=A|k0cGy9ynsYV}(E)K_{F)(jKj>f-UNTf@N#bLOG zaq?1C#{7lTO-Ud)4rvnb`iG@MC=9Meg7N8=&Dk~QcGc&(uwd1wt!Qwa=)W5)l^-k& z_A_%8z1ICR0E_BJf{&Z$A%;*Y#v>T$4txINC89WVN~n`ws20!&tc4lb2vPT;uvDG^ z0DeXN(Df*%9(qp}PzF|ZMzaSG09|pUC5LhdDt(di8JJlgb{Z?T;F-lAR;z#>@gV+u zf8_NI8%4aha~`E*LjaRs`zD{{lh>bd*F1Yzq&LS4)rD`zs6LeSxo9S zg_l&xyDRk#5>Br=P$%cSEswdw03=nP8dPBP1Ie{I>H_UdN|JJ8tWhm7CS_6ONn5Gg z7qc*p&N?JI#|o58EzR`^hghE3i5*V+xQ+8tDO;#{g#x3M9AY|b8wovVUbD6%#~)=< zF}s!oBVr2mm|8|%NWV{3B4xjLL{XTejmu}o0!Z`PC}w5cBx{zv5gVQdWjS=*a5=3e zpLFJ^BAKWg5*p$3(EE9yeJojmBP_x=p8`g7&Gy#a*T2QNs`&%h$RSswwu9`W=|+)- zu1hBw5B<3%Pb&IPY!dz^N&jhW7dXq${L6Y7aV1wq5z6QcokxHh@3%`J$g+TZkiSwm2wWJ#!)zN-G;GbXgQRRhE16ZB zvMRFjH)dj|gkaJBQUS@-Hp=7F3mZXsl*O7YD=?SrrT%nY9JUkRgvjdGzrWn})OdMn z9Y4*We@Nfvv~dQQhnps)0>Q#PQyOY;mkh3VIIg0$H_l+zngBTTQj&`5(n%k{Hx+=M z{X9r*bA3ETVcUj@k_Yk1N)$UK9HIEf)Q*Af&P)7a3ga)v_Nfedv zKq>2}#Q1YCclL^B9d0?5Yc+4sr2{IPS9v-7%d#!8luT|&VNP)D9nG}^w@LNgg{*u4 z`MeXQu9E^y?hn8d0Ec+4oIrp!^RA>09xLztt;_tol=dikSDdux(0H7YvF>zF%Wo zm9kHzR%a=TZcJw3ZYED*P>q2fP*a79rr~YFjaVTVq_I8$<6;!8RfDaBwR}v&e%v@Q z;ed0{w7VInSx;(4Jn5wE3qjGSdvMWMF`U09KH`ry#;UFu$P_4#`KwxgwL>v!JNHHW z-bYF%*Q?rl@+!xK)80Q*S-En^DX}Z|6 zhFzb3+rJ_`Qwv_z{ARyY~NiMu}I2F)0_y#B!2CWB?<)N zhjJ}+DF8!T+W86C3SCd4t0AqfFyaCjumN$ebnX|WPJuvExmU~$8uOg8L^rPqWihW? zSh@UzjVa$oVhMK_5f&7a#&M&L;RY;^#SXv@A@IOhBr&Qirb*c>b8k+gTyvXd%$(u* z4J@qYw`$mMDK;|=uNwGgH$~z$3g_#zppfYp;2ts?qi8zMOWK(??B>&3O5VFb5@YoF8cSw1yjy>bHCLmbkx zf8VYYER1o?&p;QiJ z*687)tqD(0$e{!LEw%q3_=0#B;s9$;(IOV^v@M3<llvnWpQYSqevM6^vnS368QG>WgBYww6bh|^lON> zx@F#BlDEt)1HFpVH|jUV68EB7@Zi_(l<@gfh(S-(VX`B)HKKt-)^MjMeBoHvkja9) zvE|BITPaAyXp62VPgRjYR!H~TLKGE_o(|cQV(H=5ZI! z)wiByAvc6Ae_iXr49F zAr5(qPz8p$mf8v*8l^7{%pB?|#HZBM8@{_QYhhOXV;X+sJ2>ZcDvaCPli!Z5hZTXi zcEHTiGvXAYx%NOgx9{Ltn)?0(axg-q?b1^<9OVCRIhyehRydc=*NaVA>^9+^&QZ)*=n{1<`j()6lHYlFwlt&8h z>v>9$_8)9j^6d(^%qXJ7FrpyA(?uTOXeX?tmP8WJS}s|wZ&Fo?$w=|pizE^Nj|kN3 z(2Kf*IZp=6<_d6(sH!b5QqMOdt62gcHc(%<-#7_cPglq#iUF>??k6&sRBR6}*yeP2 zgC{-O=hQ!LFJma*8SPcjoJ>aaebaH>HvL8ywJff>k9M=Lz$$58U>y1Wff(qK4RO-I z6sXi=Eis6Ao6?kL?2gWm`-3wCASnnOe5Sj4c$fw7M6CyUB)<>B2S~%Iz33BA&RyL> zEXU@h)`pQJ>kHaZ*o~5%VWN7_QQHt1k(P%(iv+`>uj0)J;?@LtEx9ANL<{GKOPWjR zYyE>tDP>1Ju0<4QPY3L+L(Vu0itx=0*lR}NHSz9;VY#VK^bt%v_0IKzxi|%Vkbqb6 zrm)%POMDbBJ3}L{(=No1J=)tODt`~Kfwn1n6>}t2;7H6h#oQ8Jnk645gjq9MJy9&a zWAd?Ial@aUS@Z0LjaELGy{s4~bNNe^+c0{!HACbO=7xSYQ>yVk6KSo4%RECNt0Q!2 zZk%xpm8VXqG-Z*N^S8_l-$AI@~1 z`(7}TvL;H&l^a7X->89tcPkKr! z2m*j!X5`&^qN+eJ_~j>SS=k|q5q16?G?7{ErK!;8c-=SzucAcC=OK5fa8BO{@E|o6 z568jK+Kfa8+zLkjSQ6nCmWotn5Mi@LlshgK@3-&GckMw=*~dJ0zE zcvC*A$$c)IGPRX$n)~T1wviV=@K5Vi|G1-V?+a`ej&PTT_np=kzEpC%Y;!!g!CZj` z;D~ftkv9)6+uJ-ybHqazOwb9GHSRcT7K=&~(1X2w8V)>i`V_Se_Ya0(4<1Rw-F>A; z1RIu{t0Js+1}kX?Gw=IQGE66Q#0-1rfa6m_DS%@G+1wY3(t^Tgm=AQpoqvyJ_cE^3 zu1G=a)o{nHW|N6{b;*nZE4<{V?D`~=J?Hpi*Z$34vd^%|BLuC%E>*p_<9Xl%6qAFI zcNn%$Xsy8jWXe)+9x2fRPdLjx_9uQ^3<)TY9dnppcVk}v0dXb=JDpT@{cLABzeJLc ztYRDE9G&Gqc>{=;y))D0;k7XE=#zgP5W#OJ(PjVbD8UC~MqQ6r2tBNM$8wlq>TY>A zOi^FYd2>ZKXX?BE>!Oq-yUiqfOpJy^-PX3UPyMIkb}e<|lM>BZiWT^LMUn(O3SvwM zY$p`MX(YAF-n_fLqLn zAN2IZA@4EzJN1ss@Hu&_gRyn+@p_cVa52fYtIOQgZ1z{<^lG7BZdMkTN3d3cfTo(C z0=T=mmVljxrsUeoS+N!;P~@t`UdZAPs?g6LaqCzq+4VGOe2TzYcL0fxnD zV7hgz`J1AAF=M1N%fuz^U(Ov?4C0xqLj?d?spmTKTjoL z^3!4tA53=GpRw2@;4@qFbYTn+)o-$E(9=%8rP2yx)Db$Dx$}5i%huRa6}Ahf%uQp^ z{-Ia{WX8{*qnK)H3!lLxh}6Xq@Q!QgZB&uoYW;rXo4Ps0ck&FF5pd5vntE^oSl&AH zd$y;>T93K{e;lhs`#K;-l}~!!J-vWE4~G2iCovH??rpQ_LGH?)X&2>OZNm#h+xZ53 ztNBniE^-5muwsw00lGSABF1cAKR6hm<~pGVp41nU$Av*ZVnm4?`AsvLqf-q)cEi)0 zh}xs!AxGJ*4FNLXR9Q3GOY9Yl)v0}fEpdg5CUX^+%0?~u@0bbdk;QHN3n#n#w86aRF9@SA@G8JnKFf zUQkL=yw8#_!HuG~YtYtRGt3SNTa4UX^6C~tU%b`cS%~>da`NzViDemB%O@r4y}9`wIj`A zJEJjz4sR4c`di$^ypBR4tJs0H)>GG{pC($SYSBYYldbv|Rm>)(lxsly?BS4VXhWjS zazSh%{W)K`a&E}|=PmD-82acdc;R{XI7w4x<^vS!CP!~!+%k{jr!$qn$}C>346zI2 z-}Kt89C8CSx|=(27Iy7gMqfsYSp&}_+MJikhC4pjE(=%xlta;?N|rC!ubX)u=ik07 zZnk|Ovavf1^`uiRyiIlg`D5(ytraH3KaL%HP7TXRdzV{MqUL~5{RtO5vf9K5%Ca<3 zlc)x9KT2e(d7N2olCrB>{*FavA2V7Z@$QMP=4$0r^JO7rpS7Tz-pTXgo4m%HN*RB} zA0lkQ)pkK+l|`?DU$EUE8h5+Db#WCWW;w6%%nn8rV4`{2z%F9(RiJ`)C)?0U!3;v$ z;%=UYSYKx7UZyJxn0r_yQn}()aTdVn`ss8AI$tOuY5#W4|DKz>w&hdT1z#wt#wY>_ z{Zf?1A>%>6xmgn48sCj5sptM^`EAL2;7Xp%eTpD95$`XC>3O`mcaI_BR2bL7}D<5LyUJH5RRfW0ngtr6m$MQ>=$FNkrk_6%E0@ru_m$_y0i5tj!Qc{TzV8n9Ck0t@e|i=UpqQC0UrB*~YKcY`JuWZZ z=o7Z_G{z@A3_h?7|5GjD>{Qc;9!N`Hgv_&qWASukb6X^7t`W5HI_7x$28GuT&&iVQ zS*|;pA|%_Nd;MBsTRmKUviWzON-O%rl4(EaY%;Ec#MCH|ygDf6>9m+{cRz2Xhd?o3 z1a*qWK$mvDHSvfd2RZOfLL9l95r<-E=WkQ>Fp=kS+WejEZd;`s%K+z^it z8KY+cQ9pR_Abj12kx82-76l-f{MY;ughI+GMU)Ar@57uKa7ph4^acfRF1-OZ}syH)gA;Jz?B4twu-s)?bq5iRW$J@3imZ2kGPy0+5ZA5gnJa-7o&JFko&nuwljk z&Uf5R%GFbsvs^(7{^BJdJNqZ5oW;3C^U!6v48yQZN$~FY&nqH zqCqeY!vQZqPY671jdYvb2Y|iI4GY?m^Px_(vM2AtJ#gt_VH`UGU7=pT$^;CzfGB*qC{ zCrj8IOe3bCcf=?ARNAMMHvW4oWO5Bem7eu|!d#Ya{*!}hYBuWetRGW#FlvBK3h)p& zz7JMrJwgKu69+dv4OmN_{e@$=Lr{%kpSv7{W2D%huP6J(Ebx>pquKu2l;x&Ago-{0 zHEZyI;sAEOzz+NY=yX#Pnlv{sF6-7OTQd2@=C@?0Wnnb|T;NzWJ5Iv^qct-oWSkHv z{jD#-999IGrUxHl$h%Dd`vn=bQm+WZt2%6hh{?FZ@HAvVCS^y*`;)+3-fR9Qz*lyD z_P#evclTn*blVZV&~RN3k$^PmBcgMa!2ApSqyYeb!71{JaoPd^bncx>LB;*?c@+DO zRLNhC9K)=(%p|3PwZ26(y*u4n;fLRsQ0<(Y(8*)q-EX0PgJh*{PpkFO+BFGymO7{h#a9+w6n;yT8oQEa3mHD_I-< z&nQ-`faM@POz_P&s$eOs;CH`lds8UCS*#((tilHUCR7Alhc6#Zxdd1?Jeaq~S$Aj{ z%?nE2e$shLKJv^JM;}Hhf4riL`>zxgnz`Tmxk&L#G+EbZeL2i*iU6no@&b)~+WAq5 z=YvY*#WS2_FcHw(dU3nD!kE{0)D>=&`Ia-+j3EEWJT_o*sdi$5o39}wsCOi%D6L$^ zRV?;M?IgC{44$)zwjEjmR7ZTeN|#Bq+WN^QtASyq1EDGy-A&tJe+6{4sI8geNG=!l z9~Np`q}Cj*hQQB>Qh(;#|7uP}2LVbig8aS}Bme-k|3PH^=dGC8+FJgH#@bewu{~r( z=)P1#xX?H5LyW+KZR6iF=@jWKM?7kvhw)FL(4Y~4Lg{Xix}i%QGueSnpt3vDJ(7=jmItQqNY;CN(P$*u6B|2f#sVH;$|Z_8o60_kMSvY;1Kg=oxmw^ zY#K;9z$I~f{51ZGtLEjY^7*;GQ-dZ2&mxml&s=d&Y38W0KGILXpQz6zZHNkdH~It&j;_)UUgT+fwK?4+=gWzV9h>Jaz>?}GwT z&!o5}>C-VWP&}UT#=bO1^XeW*;caEMb8A=e90+A5R)9s0imtLXUE zU5R4VY=G~|4qYgN%JLtE%Gr+Nd`5w%B}zgNC94IVb==9KF#L8CwM^9-_Ku$P&>LW& zpyvtxM`Xn;bR|VjLm1<1REG%(qWQq@1(FI8Bcwlw0nmPJ&#!hOgj!G~f#Ce(Dp3NY zogEHJl+X);H0o!CMU%SgGWsyClxPsnQKfo?RXuv$dxbc`+lq+ zJxsXDul61L3V0iWwW@OS=xJ$g(w0)j_QZiN6L2w8yNZ}RZ<9&`B#evdx+|VpV8Y6D zfgj-%wCer%bC*AG!&E6sXBSKJLLmBcmo&M<@mfd)>Iq-`1{h6yD4s869G5?rb?#8t zw;$j#-WFaLnXl)glMJvPk*=}jjo&M2obx~vk|j1Mzyf@{VL#1lyC^4Wp&mFkr@cFQ z=mtK7|L_^C$*y$aeQFbw@&=S1(f%-o8I({*VJe_^3}OGSd<3PyoQ8zf2D>qyA$neX zl{zco#e9X(?fY^H=j=Ke zJb*gy?h28Rms6qTKd(x-8nD{H0iP(Qc>tV0^PBO3o}LFf@fg_pu^1ku9l~}9&UBdS zR18*k3e-1bbo_{eE%JJEp|mty$M&HoGzHhmfdqu>Al+`m-n z|12i{=QsF&!PMVBVYRyMFPDno`%+4<6uRbDN#cnGQ`U!8q+Q=awE;622MR=!ZN-{! zrl@>Sb>_N@Dsq+Gks7?_rFhNd(f02Du=Nc=qC{PqW!tuG z+qU`2wr%s3ZQHhO+qU^?s(TjyOh+$rk&BGD8Id>dIp6uPD0emrjO23Ii!T92zj~Ey z{*begV%NHcI(wE>o;e7k7{#tJtvB*86WqSG^U$fSe>;401f&9lp@7hF2XHOKfT^0 zW2H+-{U8Nhb38M2>UD*4+ZK!WH#cHpOXfL%uoa(RF{X4)c_I2(#|fKeC{sA#p2#wP zamh0*2W-M{+wXm4v(DVG&HS2ip+$+|Q)pvck9T9HL;@{s@o&}IqS;l92xEX&K7FD< zdpyG&C}q%Xe3P5qf)#6;nRJV6wx;}g8RH2<&j!O0zA27TWV&&w{ok>_gZBy#8zd!W z7>Z^lnZgwvJmzfH)Fu(ps}&)fLW#KKWmLW}A`x1|)5Gm%Cv>%Tp-Wx3Q#-@xw#8f& zYepy$UT477@sSQ@{_gWm-&L;D1V)C1>~E9C{ZDvXAd3x&>kmp_z<^HXBUWFeH*#UL z2vU@9YeGRO5FDpd@27b72BOj(7Ax{bNTzVqr^6gZvl9$Xw!(xzYp}KNiYhk47cCnF zC|gqum*Y4vA8-gumw#o!!`g$*&R{37LIKXNBG=;NEHAfS&42y{cPBi=HiL`vL7k5n z&rsaz*wXUG)zl*i62@ADC?xWnngj_Kmj%nwsST1TP%Dj&!@U|RGRzok-lT{=`4)9U zvcT`NT_v{uJ2gjzP#)15L5Z4AF&B6J&V1YqF@wN97Usky7vTcOT^^DNNG|*C$12?Lh!9q zml#v9VhUvDUeY=fPmwmiV4i-#VeTn~Gd^Onq9$SIe3W&J2X=_hh?5(ca2s4!q>lX? zy(b+4S==DjLItcm;wfae5khfLzAqC-iCz%!g`U@~^LU|{>kHZ0{w(YYCM4Fc_T+(- zlzwCN2S#}0txvjOyl6TPfm^7;VeK4L)behcy zF{Dq&$g!fv{D+Q#d!xE!1rK_-5-5T}W9x~IjA(>t3g_~BEe0>{m;?_Vx5IOQzoiGm zE0KSrMyA-@>rbV{ofgkr>e%{x&qR8q$o`E|T*s^p1vT6Ij^!G=>*hi<9jO?{vL#0M z2p>SPl&-M6vp8rNH}VgNIh+}Z*S&eX>3C#c;}9t9p3oq$@rO$qtS$on4dWy8)WO6I z9`O|{PI1NCoL_cs(DFp+#kyz;@$MV2s7#?BNS+o%vP6U#vL9+`)~sPeew3fLOHnBI6(q3y{%R>6Z<& zC_{h#6}KH<&P@4YxH*Gu%7L?^d3tr=A^~r%@~&1Pb@IBLbQfJ0TKqk05Ddaa z)?4cm#;M-ZAeM_hGB`oR{%Fl@km)rJ(tx01xJ*27OZA9NMJHKOSi%VH z=TaFFOJQ zb*`9yk0=C*FX-nr@AwodB=1Xx@kwZreqGSRSd%gXAEZoWdHwktgxhELtO~gt_pa~6 zIWgBpFk(*zRzau0h;7-5guKZBZP&*@|C#>(fBOHy)@`HwZGxO(|EJRSzfONABXbiQ zgWt1%T20z+iw&XYQXS@O5^Al?H9#krgCagpAj$=4sl6J#fkshp1Bo&`NzpC;CkIpM zcA0Ba8)gtK?D2qo7iJuz^Zf~SVQomEEa{Z%Pqc@gS~*YwMWw9nJq3_rQUw!&bh(Um zaKgr;)_S7a!g*pUNnGyWNi)hr{o*!+&B|Rf$*aSS#}0mX zv)RwUwbgyzL$7g(5d0mu@^4PjS`vY;0_zC`Bpyi9a&sx6DkeG)hz(VzCmIv3TKbrW z%7`g{xFexQk*rQSLNH5EbfN+I{DI;~u)eS@xV$BoA;Q zk&s!jN6!1*4e{JWcCs&3Y(EK3j<7o*4?9ooz3>>CiWwTjAawVGpm9?JYnRCa9=wRE(uTWOpJDU`K!$|jM8BxgvCKl=8AY`Dq$SZk{G z<0BwAWn!I2w3cpJ8HGb-2!mhgEgZAKD0WY+)DoNQTuq6#+~+(>&A^T!GY4GkifH6w zs=8OE9wh){R0D$Z&1RKOC73$L6y>`96 zfHB5#PWQRju|3IK?JQs;B5>Vf9oCbhbwU^{=BbzE3vEf+CmL+Zw{W}Vu9t$<_aE|C zP?bgE$VM4YDeb&seR0k~aCARo>)|v}aADv5=LFo2>;=mOod&xdn`*+AEo`XM!Re*j zLP?a*a={rC?e3mv@^J^UOKv;fuAIHdPz&0FO7=0fN?oeg#eojKtyx1knFkA-1}mSHVNtx zaqzMT4e*EaPk*mxjLMqf4;qHZC#j6k(XSsVg9p>fozq#T{3f3|aTAfLJDz+En-wKL zPtrAB&~=dIjs~NmzH+wN67#O)4jB32ai2^Be*sFj2ZqO;Ey*N$6Z?uqQ#~7~Z|l`M zuA{Aa!=rw?cnEi=_eK~SaiYf9-zj&`Zpc&0TDZ^};is=*d;B;zN8UGxy%zEuY>DWu zIDD;X&U~OgKokzgD=jTHQkKX~%z3>+-;Jc}CR~D1^xR@OX9H|4otcELZVn)7MyH;D z`I#3?jOeiT%-f*d;?2`(5a{esAUWyL1lGq6{D1x~x&+bMw*7#EhQH8C(C>x&|3NJF z29Ccney9I8kzF;K5z1cx2IRAg=m_v%pHy8#9Xh1o?VbRALR4O_x&rj(`Z{5OUKI#6 z#&X>}Cywq|F%gl!c?XfxmQQP6w8i!k4yl>TeNlUt9IA<9^CNSP>8AjIsb%Dnp}#{g7TTH5g{VG7J|VoPQ+G@##cQHZ#AIPk zw!7^!Lk?N;qUlGN^66tK9!bX3Xc0;@4u5SP%A`P}oW^G!C3tkBWZnL@XH=8uN4806PAxk3r|N5@H}2Zfn~nCJQz?_uEY5IJCj^q3nnXM( z#k;99_u$9jj0&FDPqEX$99C2nK?tW%wt`yP@olMzQk7JJfxiP1yDBw4suy&HiW6=#Oud0E-Ptc|VL>XBF|+KLrNXFtv5)7$={{&IKla`5$fJ}2Y({Jy&QxEg#0mFPMuM>r6kdvvyJY|o7ohw%k? z6fZE7oPvP{OW7+#I;NRC4lNz`3pS3Zh>;ADzYzp0-ojFQV~)oXyY4^%P#Li`0hO@& zcF^RZ6;*7YF6bbz(#{B^cD$)UaV(f;xUwuOK0LLrrw#e=g~IT3oJ-4V&b2>@&xNc#%LM zfafF8peJC~vbH4PM~<%mW`;EJnA$RZP5LXjFGew}K42BBJ>w3_K??v@pylC~ZdN=d zg-h=w=2x;;LnAkbfS6)Mxx^)3$a&;3Bi+Y&=SR{8keDcrqmoHaiI2VgA|4Z}Eke7ek&#aQmCSFFSP^YD zitH}&Yaqbq`|9f&Y*#s)8nMd6XnlEYF1FZ_&<3ms^G5NYGzy3{sb>v@i54PUzg`cd z6^$Pu+U%Gewe8$cF`1+a#cwscV#&257vnUBQ5rf!VeECME#lhk1MK3oIb@k7u}pdW zqL$EB3Tf_8A;lOrrLM1SRhnXK95!%q<#Y zrkJ{TK5j3v31gi)G9wp1v%dOqpWaR@u5sJ-X7`)6~g6H-YVP$QJ^4XIlAyAyKE49h|t)ZZLO1Y~0gYf@#Yc1;hXWoY!uoXrDz|*%~$qXMEvdXF5MslvuS8K=dAs6Q~P-x%3+)Iv0q_1=jAEmb2CH>AZrc$~A;5EPCj_q0M|6xmj0sx?71pvVLU#lHc z10!cUM~~ln;#zy#Zj%GaZ&klv6>3Qe=h_wjhOjIXdckqhUFB=ah69>*LlzDJhA^Ng|+@RlF(9~e4@gQ`nArQw@+4DQ6 zWXdfo5u0yT38_T+fvyW?wt#5nefE@{Gg)5}?(py_kOq_V6tuY0jPbPZt^fiZ`ixlJ zDT9lFMQ%V+$j4EN{)}}hl_ZhjV?aVfK}i8`gfbf`=WkaY!5k^6w;W`5ET;HNJHldw z@*Zr165)LC%-|#jNvv{1_@X-^S#U|J!{1`)49ECg7v0fx)b&7+e(Uj6P*@-&`iH8$ zKEi4Du|pb@*uo7cs`64o_(Brn=!S$Y%LEjUu~SJNN-z1n(yKjoPN=F4{ZYi*#MeC0 zT<-~o28Q#*M505@BvPps2-uvw{z97U;pIKy`$|dRjA5P{(7|J_;)o?Rc?cA(idJThHP<0c-BPR_JvK7Wyi6fqlZhKooz!T1>L$GT2*E+T8;|mVZ zY|_lp#xsVjsn5VU>n?FLx{pCWuK`EgRnJ1m4sj2B&*@iVOMBKKjEH2}9RX){7765H zl!?1wkUNz-_bx*-M&v9Sg}3k=R_6kaF&cVw6okPfjw}$Iw%l}u!+rFgcMloL$Ei9y zdj8Q%#3$xJK*U|Y{vLzMcSmLEXjr2oBpB|>Eo*5)hX)vTeA&U#C{0;l{b~9mq@e|L zT0I_db@)qTy}EJ|u#p%+&DbbSvCYk!KplNlQ;{1>5PoHmzeOH`g^)?AR}42s{6fn~ zunb0I0F}~T0VU81UIb-`W|qVylxEGTQ-Rh^R1lz?&;!IU1;%TE(6X#?}3#lj$ zPYl1(8|3_&#hjIvtwo5g5_acC zAZ|NJ?>25o%qvn-DV4i8&i?V-{{7FO#>TxT2r@c~G>5;1`lvVh#4WTb7DKL{gF)MV zVVysaBZZYVb}SCOj8D(=4x@cL|MbRe7a&GFKmV+(;?$po zbA=(%Mju;tK;0sPUQQ|iKEc;Ty0%XanaZX*W6tHRnlW2V)@= z5knh1s0efQi=TTX%*fswhLG!uiW2(zFKzOi>c56*W_LyJoYYet* zKrEK9wFi(d1t&nRSND8EYLL%((K+xUgHUX~8RRTFZzPVK7w-qcHAQ?!5ow=vqvtK4 zrjHCHod-P_TwR1_u7f=o+YJ zzO#U%fumNuCud_BU^tjfvIyxrtg>1cYb{p2-7{p?PX_BEIw&i?`90LY*{ zTPsI`z+(2Fx=(rtpMy21CcU}VYg?bQotTG9L{&2HC-4wJX(H}plKaCW8~{cB5gi&L z3+bakJY)<{gDK#>SE{U55nom3^0fiQ9i+`&2E$L681wv8a=z_K<(-5-DS(4PcI^h2 z0F)90Q$ATW&-)vS*JqS0_;|ONO6|BZEgaPo@ZwWEWauAkN3Br^4^uJLtkep>*0nLq za7PT_G_go<*32}JD<~GtX~^!}96EbIvBk~6Lk;)R)<;n`cE40Xnt(mK)$&FcZf4b! z5fosJ%cQ^Q`2Zx4SPjgErOo^nR^1*nPBSjV?Iv;1@B#GkY8#aV&El!-rw1{_x2hW zwtWiqq_zBSr^jJJ0f_IrY=7OIqr;P}CJsCP+Xozv>}Ta(eA=!c4L2*t_d`7@eQA!4 zv5nxc74o<>)nMO6lA8fev8DT4(a!HJm3;@VU;WJYm`lkYUi{RKue+tP(Fmh#kSdbe z_opK1f+wzgM)%9+O@W+w@RZmM#`F!OWcG~Tk@;}kB&e)` zHRK+6@&^})he-KP@Lkt)(*FF~za7g@i+(R}nq(JPs9t>&8=8e5D}hXN`5{RztG*z9 zIJIPpcgKXGzuhv6J4(=MxU319#gd_|9*Ft_Eg*i(a>R0_tuS`1Vy(m{*8)1EaU9K$7B04iz3#39_O{VtM$zkuYtgn>ODqgj zwc5}(F|lqhI4a(h7tNGB$a8vO-2%wsyYEHyjdpSQm8IJLg}*ow??C!R_+>yl20}ZK$rL|M+P`);qm*pUtQ-?Y8b%X|64)iw>@8 zNF+eT7hmmyAf6F`%)a9+9AKNRjN(#f%vFyhe@Y;eAh(sQJGMsC|2+QM-91glXnw>H zyLrGhN;>CP!uJ^NC#cLCaW;WFApF-l|L-E%jy{G^`^Zfrd81~)aiGl6v5KQ0-YkSs zYFjA1{joOi%J!O?SZGMKRpf^I#A2IOZ}R$qB=}jxF>o#GuWo|r^LO*C^vH29aJVx- zv$%`6O;pxuVaXpNxs!cTQYK+~Xs+sYiuQA&Q+5#jgJhf&6$Xm*pnyaWj=j@hwVzi1 z9k=M!3z7F4ZJ@13yZ_X(HOO(=e%}<)fxrL&H^22P-v3(5I@?*9*jjl0x1UqC+NA9! z140+}9Y0M{6by+R)RA!>=+*#iAehZuM>k4OSmW2%R?px3TT1Ltw{a7l) zGu4N$4a#6NHAwq+5TgCvl_uw(Q>T$npPX=Ml z`D^{Ahw3=1WG72K#M^CnSrK=^+xCipETaa|>5m3en^(E{ynb8-T=>5Kkxs5!A<8?K zYxxMR>?Q8gPB#L?L$=O9(4Uz{5GB6?JqjHny}3LNUOU1RsP_F5 zE7ya~6ri;LlE90=(Sqm>HF2ZB@5t_ZG2JKOW)GKIYf&5AULv2H7ZX{u4EHbc2)6;h zmnj7-G&Cxw&a=zu@X#x4#*LApE?zY+x-DEsoL%w4TcT#=ta+7=q3dgIAaLhm#oA5ih?2*g!&Sgm&9or#4rtOj!yWETfZm1c z(E}jsCQ%vAKOHt~SR8XVP8u^j>UPwvt1p#|JAv&S-eEgA-k=<64syxqjfqpTdx-?8 ziX6+@_ZX!D@(F@`5Lx|)Xz&H(Cs zHW`7t?UC8?(0ZAYCPv68$?i4BwU3PsE6ow-19K-hHxw+d4KTV+>Y@wo;A<;?OR%{0 zPcfi8Y)(@`OAi_QM~&D` z`9N!@j!0;!ot&&2(x*r=4k9`FF;yHSwZp?8KI7<{#6?-1_(>WnWT}GaLJ8)Pc>+G9 zge<)|s|L74W6YYKcn;dVFY>%A5X9^)9lFLN;)Q1mJ}#Zh#&}3_781!cy`+6m(n*dO z4o2mHQ<}v)s^~=f^f}D;VCYO4{%;CCP|yO}w{%50&Ggcv#W0%U+65PuH_#SExz+D?bdnO-yhp5?z#U1*8{`G)@ z7%*Ygf0_D;yb+4okANmPIIjIY(9wv`IK8mGoP*aih7Dq_76yTy5m$XTzeL?;atu#{d*GQIp2$#nL?nnsup#Gi@r}I6Jln&cNK$EhLbLQ zWh`s`w@6uPC>FfFW}fWXa0M-U__LJIo<|dY#zljSrota9R;W6#D!8>m@iyD6$y6Z1 zpQlaU_IE=WB9Zth<9LgUquHqmL|~xwWCWp<5js#(F!8yn;hZ4$MY8_^Q)|Gtc+2b$ zFb77lC<#Gp?hzL)u+2hKdDWjVtb>BI|>Vui}#X9)o%LQZzReU9k&{t-04A znJsQ}|vpc=CIoowe*bhV4azS`h; zQ;KYtD|P}UeE>(z5WRXKLVgRYUx;ESg;JVWGuC+mfT4|DKQM9!G@WtSX+m#CCE|Vg ziQw_l@_i(ZiGeI|``F;*Bm?^&4qDk^jre+nt@;QAaEQ@7k9&bWHUe2x0Y4jh>je>I zMW*zOP(ft;>Sl-M!q(7U=4sXn+_M^7LH-V_H?Sw_2WU?qN6!cNCrnVy9e?TL9bVTl zvA@Bgcu_~t&+thtA0~Q9moDDC@(s0rNF`V}Fu>b88EW<9r?FzUP4oU33gX)@T<9pf z*Lc`WA|tt{F2*Zbe8crNs(J+2d?l9KBxKe>U?SIu!)E<;=-&Cxpi4XoeI$)>=jn=u zo}#?^>M=j;Ks3zC2trUL36IL7Lb<@Gn`#&e2fSrNxkzlL2C^TlP%w`eVyuWumKI=w zHowSsA&H!$Y4;RmpP8ehMm6e}E38qk{H_A!R|M{tGHw}egYIX6M%M>*P)Pr%Td#^%yhep z6D+h~WIIjK1%`9F5g7*jrzlvvv#nqPUnwtKg+X?WhQQ0?*o-5c+&FtTTsG$NwhcTI zA9~lzxnw~0U+G}8Q@Coz>eaLQsVqHlO66=_uB3{8&9O)aI@PwWb_+5vx8jCAKFXUz zucV#%O-+*1?OozTJe>HhWavZtz0IhWN4H)GPB>;-tQu7fw6m_7Z68G9cbFvcf%~G0 z8W;@r>4z|lX#Qe%cCA=ye{AK{HSs_T>X{4-;B-X|?3oV#QcJr>&My8-;_UEMipZ;C zWSHU%bf<>GuJULydHVoAp^Q;|wC^moSc*<#d(1${@CPw~j!9_jW{)ic535fJxu510 zC`JDyhyK(W{JOKl_v7c|b%cEs>)QsPu`G72jhb zMrk9Pu|aAacc0l_$vj_|KV?JZfY8Nphqbq@EUN3yFdHx{OS;{`!{wEYiC%=AHd5nZ ztM(3+a(vsb1s@HQpK;M0?()wT@Vu@9%($qdje?Bf*rpQ%M8X6S%0EO36lAwP@gwSOc^g2M3=Vnu^kCk%f@u*c_1x1C!U+s?-T}siK*HOl+Kks`Og)JXH2Lk#b>R zI|V0skd4TvLF=WYEW+cHVSd`bg`;jzwQkwm_nt*l%L2Uc8EW_^NTPI`IL1-h2`9So z-J}+n%UJ-)B=1rMlw)s$TMh!M1|Hq`h~+shgm?mDkz0(YvvgHEhh^}&-QOg@zdi|i z{u5NuA$^>3W8B{;xtK;ydkGjWeR36L5?SadQsv%G%zF4-z($Ojby>(>n`#F9K(A2wckogl8|L8|+nuws~$QPyqVWqVe$Jj_dkmlM15Er*&Y?qJ}#& zBdh@#{g)sw}zP<{HSk34s4HmE`AY%-m`XV+!(vU{!>eWpJdW*ci&RdCoxxNI}R4u|8m z&8C0^b(C7h3&d*m?_3AT(HLoE)SLFeh-fZ3N15usJ}&X|v?r8JQc|f~!x$jFOt}i& zuB(Si<|lW{O=bs|q&s(@RBOj(w>DnRJYlsb-NQ#{O%0WQj!jbjxel0pxM7~w2@A`Z zB0fdt%_OigX!Xagy0+XSA-hFI^ik2iz(V=VSuQd)?$pEQS^@AZ#=~_B1UVn0i^9|3 zRDo35N-AtS&Y52yR2eFFqN9&dp~uQ3QC**O2}My~7xWL>a11HaPbmy;FLTX6+k1Ir z>48MbqD?cnSP50{+s;6jWpqQ~0^4A-Uwgw>cG5IcZI#)!bC#Y6+XrxZ1#m2VQz#YQ}y`^aKO zWZ%CF#_GQ*85JOpZz4zHy6kCRHM$FPw#n&j)*55edScvMrDex^ce{UY8BNG8&SaCc zV2JnRP_2#81-GlY&Em~q9T}5KTA1=Wosz5FUE^*Ue(VIOd0FP!QqGHAV-VKK_cljK z0}Zj#Dl&}(`hGkMa%xZQq>b8*cmi%c+}?Hv#d+3s=m%H_~z1~ z=$CPYkF?EU`=!W?QKWe+lvAN!8#MGr7@{zofk%NblN*8EEbo=t_S_?+KH~Ly0)?Sm z>)o2%Vxm^^muOnw(;35i_|~<8^f<%)V~OdErYX5)w>l4we)j_PQscbT#N|wJQg#zT7D@BiZLXldpd(S?!Y;^U;T)to(@b_jMpL6-=6g-m&q5+Y zmIOCq6&TiG4C(eyvsDd*qPsyVCv2~C)rh_VoT4MFMfC+e;AJderR5}_z!;N*SvxPL z*#|tW<(gD0gR30FEDS5{!FWF7`1RTaY6OB&AJ6%p_3i}l@Yuk2pRBsK4SF(5m+aA8 z*oEbPg7w1k!@p*&d`qj=dyDWjldKnSH64|~rUEx)?46bgWoA%%mHA!SeV?}gO*`oo zt!aNGZsh_W3%&+jQ0UH&6Hfm1+{k5vY6xFaG>_Gc2=(^d#Fqd~bxq)Vi!`jfQ zG`?8Vs@!c?hd?zoH_!QcykIt=c8%BTh?p7D?2Uw$pBZ&y9>rfN3Tv#FeDVKVeQWMS zysA#|OvdtAR1_ycfohZ*Xs{=}2*>n;seo)!X=o;x3x$Q~w(l!czaJlB0=cdfZ^ct~ zuS4Qfu*e!T`%xVx(GHPxJ6j3AO6G)Y`g1!-2mL@0A^e&2rFYUFZiwyppe+sFKL8K4 zW_kdBCsE$)O^3Q=z9(w9M8G8~EREPsb?WkCFm*+~lCvBnhIMegbK%w7phEg*o=Bpu z&rn+3+yfTV*)|ajepEcl<0r4#@O_+)Sw+0|d%B}$FS_##0!M89VsuHZ*)j@Wp&L%C zX2Km`0QFgDKc`Gp?U0^+*}>1Gq@^I>xP&X3s0g(q*8~2M#hztqTU&hlqq~f&N?|9^ z(wz0EeR^0QuAi4)us36DjJCp|8yQA0X>k0B%6_VCD@U|l$#8cN0{_4v4&NbG{9{Qx zsx88MWhA=H%O&qR;uYQMRS2`T1A+JzmQUmT9n0-pRO4adRjBW9e_w^r_RIe2&B`Zj z9C)|;Dh^z)wgRu6I!1$Gz%TW%z|P+Ow)+#QbY93-`r!%#W)a#5jfe4m&$(N?|3ZbQ zqZEo4mJpL&QpGwj*So>rsP4(yt~6(V!`WkHP#%Ba@Ec7pg#1ln7T>_C;_XcWch8QT zKPQLB^yjtPjg$|T`O~)Nj?o6qm#Cn^7TDL_g17V=VP_%S3*CuY1iPpi8pvv*^9qiI zthN)9+ZOK2Z)I_`EyfN!j_Ho7Cl2KAtDEgcCwoXgEfVbnL6MVw#awpHuYNc7b1UW} zi=6T!2}xltdlB!1wnx!* z6Be@KHM&b}v6l>UbMDKkYFo{=8!PyetoZmq`}uG+#@z}s(4BrayfxH5-mAt*WwQVJ zr%KZctqrPFMda``5*4X-IGT*JIfoNRIxOW~e=^|f08R976LoHL3|SSt)hNkyuG+TG4Hfq>oLFPk-8 z&6rYh9}EkLSjzp?0Q5&boOQ*W)G&*tK)Ypq5RBA~|L+SLe_=-;tgwP?|JG?r9nP=M zjygEcW|FNx)qdX7q8K%EX%-_W>aoZ2jB_`j!tOxzZAC>ZMZ}ofYNveD1Qz{QRO<|5s~?;$qB)Y%tf^YGZ1u;yt3eObl2ZohPXvlCA9nGOb1q| z<2szoWt6%M;eY`*@;sCK85kSEKLXu79=v435GjA#k(UQD&{xob) zKg&D2{Vr7vXRmyPnYu)qt~Iqxfe9yl zM3*48u=cWcxx+R)CLlgQo)j>mv8JNd=|@Ai90zU^Ck3j;L%p1?{jErM31A|AT#kEw z9e#Yc$IkxFqE&DFTn4(?3xEs?M&l*0L8i2CQ-DrV6+PT|rABp9@(%O8-N>mthTl~J6$~|P)D+xi8uS}O1p{RiXWz%B`8-4fG zvMDCzy6T~_YJAufM8*4+5Nttc9QXBeROID{u*B!t%hcXqS9_X9ZM5qZzNbBbw6=st zcLkWbYC+L4Bqf7+%$g-bt{=Ax<~qwX8D?ypwANx5w2_121nnv_A_VJJNyPoIM5r8_ zrQ2nVUpGhMN(O^1SEC5JKh>rAE^c@2kMc5i-&`!t$h}Sx}s?s zw9@1`tyrj@RUKAacFv%BVuJgHnjY#a)3jpWga{5k-qVSGjVrMZHD?D>t(rHKMuLhC zwPpgFFhEFxNvG9CcU-LARJp$EfN}vWJ(!E1ha+l@Tv5f5o8DbF3WXgSZAqyrB#8)i zcHN}e0JIvAwz33lVmjKVClr?yBB3E;7jS$qNFC8@7Q5$wKzNi~=I{wM(AUWM_^ zwn+_!LiOr$Q$7s*_k}1fwbVeP4qY~zl!h25p-{5#8>+B&j8)6+lRZAdDklC(>up<5 zhKo#Re)4F$eD?OKKbEYoP^5Y|K$3y!T9QmZVs=_w>S)kRw3M|*Q>(4Kp``maqT{xn z$+4t9(S#o+pc6QTU&gMZQ=UkwF4+WuRa_KVN~1J*np=Uf0TCA}HxoAa^%vBy#;t?H z+3MP0_v3l&1nSG3*6Q`bVZK3aY1YGZbxdH1Ey2Sd(C7B*c>i;CLEi21aeD_mqPYy0 z9&hbOI-ngJ2v-9mHzOy!pX~#cd3cZmsnz>I*_qLR0z-mcTMcyjbt#8~PspMkc0SY3 zj1Sv0xC0#Z`I>PrDI4le!p&e?<3Jy0skof~U zjDH4KGl>W+fOY38UJ0JS3s3}1?mYjzOV=p&l)o;S8_C)S#b9;7p+KZR`^SnZ#}G0c zO`VBOWY9)u%UU)Eo@WVX{! zw|pc2edtK|`xufS+>MYH#n^Q(KvhT_ItEfKnQs`Ug%xjXR8;I8B^>p*X$_Xa$14bKx9I@t@&kg~dg`zQhr08hynP>%0l4 zQr6K7$~{}+dEKN2{0+pr-Qq3y=5|?It;fPy#TQ-lzS3h;OvRP~eIfm~w@eOT8q|%Y zw;SpC3`x(>FMoq#zCbaY0Ib=$q`Y!i^)iwd~TqJoSA~Gl0|6K82 z83uq?tbtbGA7V5jW0*#)8$n&&P>Ny$%xVM)+6zG<8pyjJ0K_rY?g*Ny3Q9EBkZ|CIRl*VIiT|)_>$`a zEX=2y`oxicZM^5Pq7Ze^_Jgsj#>1fB+hW`%@l_CKChL+rC=|SLf-dIPz+J(l1p^;$ zV~|r9JZlP>sl26OfxE1U`KYg?K}p>I0URC`5Dg^l`v-X-D7r>2;D&1fc0b4M48Fg6 zK1}W&yPcOP_JkET$GA304bTsf+s8w?1hv&E-lZEO zCvU|uQ{=u?9G{WoVMfLwJB|>Bt_o>Z0cNxe1BKv)>H^GiG%rriB`51_zc|*NG`Xlv zM0jow_t68hGkT@1X*6WvZx8C2ip7nsh;Dsu0OS~JX5ONK%q%2>fopC)&bXmQnBzavQra?CMM5--~8wqY@``4cR0A}*HR!J;KY_iQ$;GP zItuTeGO#$meqQb?To&`#MGAYL!crXUXKSa6u`2Eg<&=zy>3qFYZ*Uq8!H;a_rGD_r zLR?_B%|Nqe>=B!Y!Gr_%IWR_Qz|wJ1!u19~Q?}8SZ@enpRIKMmHk?-Dk2d548|De2mNwW1Wus6K2Vdhq4^Xa^2++L8dfAh}Ph~%(NbDJf*#@vxxOQw57)STF|z_MX;c1QpLFdcgNFO|Gj0>g^W?dvK|KGpl%7S5`w`aAXf>62hK8Q)UzjB{;39?K)l zDSG@(;|mPSt8Q!EU!E@CZaCSBl{{2>#GQJ3e(QF(_aj1qmzNbqg6MF))+>vmcf@Oi z_;ef{YxiiIGZ+&mcPg#ABPA;#)~$ZG59ulZ)kJL7LMzstAMKSIKn4E?i$HY0l7G+6 z%^ksTA3H2EXg?$;C8Ft#EN>c+SPY7V4)|dOc4_TXgnMe{?D22+$l((Fj|OmuC2DT5p_?odCyc9S(*a&64ut9ip4`=GYWfFqg z-l!0Z$cRFSR}H75iZF~JjFnBebzY_FRaUZ~8>7^^*|q5u{E2!qoNV%^Ktfl)%M~F? z=!FhlC!S3^JwU(C*jaSS=8Opc*s}|FG3n~j?R1x<^MUpGfa7xg)Gx>nMGm9V?a5s2<0d~c^@2-tik=0AHO zM|4oZvC=@!^^ZqxeW9Di4R~7y25X5z>K6bQ2MKdQT_ZS^HMZ^d&H4qrCY=-esrl=C&&71f8f(e=X zQOI+{&Wn4IJRqx`S82f`qWRohzLP(~NhQ1ijNJq#IaItsjPZs01up0e=1f#H-^eSy zAMrN@#Ae3+Xz|P-asjA?2OPST68Ia7;PW-3GxMDs)6-15K44Up8TpEgjErZe z(|K~1my4_}@@jmR6^qkyQDoC)l~u)fQ`P1BBAah!>vC^W!khK7x>9A!#p&SAo!8a! zeK{%WY?W8}Y*x%PqpHWw&dQ>qdQf;%L+z|wOp425l8xv;gULxYEoVhNxO22w zt(MgqY8R{Z+1{+2muo1uuJZRqRp+xlbgqU@W%=o8Rh;JQa=GYd`C^jQSM!sl?!(|t zr_;G}XId@i*=RJ~tT$CL8fE1ifkW}da!plhRc5?gz>uqAu^w>YT9R5o zyo70i@j!F+XuO=SW(9PO>re7^KAz=uZ7Z5reHdFYQw3M~`fOI7=t8gICx2L9t!QHS z{fn|*_p?_k>?02PX*q`9j~7>c__T)Egm-T@&;$7M->(lxPyTrLN}_r?9f8`scFN|67;y!i*o5VOA{a0Zc);F6}UmXg=T2r1!HUn&dlo%BkD#!JP~q zRb{c6udZN{vc>Aoopp7U{D`k#{`BMP{~kU5^W*0)9)JJhFdJrXt4(pIxEuolWY4M6 zVO1@wgGBLX`K&JdQNVgmOEa5K=$BunhPBL3ivge!%r>B@D*ZI;qOZNKqG6|Bv*NvGb5f*PT0e{@rZ4JM^!@UfxGFV-OGq z#)nNsBp_SzSE77lK@ow(Vfn}Fbyc2h)`hB%4-uz>?8p4FoNwlu?$O(UNO5+{{m46_-;>Q~6uACNfl;Y6N`*3p@gl zwkJ#+fh0o}Lw`-BHBi0Th^iiBZ}Eqwru0q|ZfMw421;P6FQ?PGfSRv!m~J(9^eWrS z>hp31bgrt`*-O)l?xdLJ2!7v=w_J>8n+aeZAvTQpFK5O23`e-s-&|6zp&1BtQjBmG z4`8nXv0l)W(tDz>b#_s%&z75YhE$|liMposVx9w`8^@bhAG7n~3Z{RO>IfpL>c=bW zK*gQG*WAgh3mrg^bv6e?SYHAKMataIuJSoh@JsxsSdRyGdQ^o`ak+*aKv+wM0pbJ= z(0;%!tK!hm;B?T>jyv;7r=NA8CH#YJb>3OvY9q~|g9q8+B0m9m{7w%MCoX&Q=2kM?iOPy@Qe`+;|Ni$Y1B!lS&)y!vOlr8;mW6_(JpjTq#X#Yob`ez1QaxGW$*w^> zpHdTjMu<%(wk5^L6z4#p9=kSq&Q$z%L4*BC~CMRFKAb zJ(i%<@2qww3$laylD=eJTYPngB6oQ)$(p%(80QqX55VO1r_qNv~U1x+8t3F%KCI{Kk7*7iG zg1Tg2Cjsvd_8;^#$-X1VZV7~RnC(5d6VOlo@`pPS3dx@y?B59pq5i_3G?C-C{4Stb zC%_IG*2AYs!N-v31VscX3BJWhg60g5kv1FyxAN{C)OopFa202xhX9?;t3DIg71@(a0C|x%Q@gL znDKI4!;-=IJjTn(WK$6$t9uk(6QIvc`~w`6ik~Sa^4J&0Zl=F`2j4tj`?dJ6A==Gf z?HPqU{%Bt7<6iX<-y)HQoi{7%0ys0lS`^4_i(`j=XY-=2fhN_kg5%|80e2A9GqX9} zG0FYobzFWj{WGg^$L|B7H+-R6l%@`&X->S&5cmQxAOKP*uVLzD$Z~-y)WFu zT}`55V;ffR=S@-JxnGsY3z>*W^UnK8(Xb5y}La8N)^ z3arC0^S`@-V{^7<;A%0utcpA&3UEE=yfic%^OjV zkR+d#D`3cwwlyFP?t>9>s0VaILk13bPv+}j53T`Wk&xg!)1u(`7X^-ZQQ#dKcm4(N zt%X}UTaHn5lXV~Lo#b^fu`3GXYBOH|hd_VZfY=W~z_!1Hv+)RSGe83-*jeIs>qQBd z%{5=4xRi_5YKlhds}&65E&PjUDL-Z1F)(C(b-jbX3+P(8&Y?$=N7kq&rc$FDPY@87 zJ951Cx9Ysb&Y)lT=}t&K_xUEx^$7GZ3T!B zR6u0Y*FiY^;MiGS(0M~*1#!iL2{@-)fsx+<6Rad>g~haWRTP8F6|^z8RBP!PfeR#T zCrc#b&< zP#kw2Oylx=v#Ooy!E(SnV}*LTsm8^9mT8p)8>&va9~FWOwd{0oTC88tyY6VT$bn@U_1qdG(RTuBetG2zB}EpTZ75=P zcw1XnNzjH069m3O{|sbCT6Z!{TYy~}crV-gE+XD`5a|aX>PXX}(3(j|eU?`!kf(at z3K?zPu)wPq`QMR#DJ*1J5fG;Q{SvOB1P!;Tu9yKovM2y0@X9`EL{E2APz5dvMW!)A zE&{-;SH-vlc3@&5vcSj#8cu>s8+;6lu5(6W1`idi;A!fue^%DC3T{ulGQ&vMx&r)A z{p|_DNirxGjzyI5Y*`nCAhh~!2k?G#wpmY>7YkbI=o-s;hz@G8TyGuNpjPd4FY_I@gc#(Rx6HL8T7c7$<91%=p1&0}wP_k}85F81)LmCnnC^ZyD zNAG+gW?A%t&wr~~|>I-S>E0qVxe8cLScA_TiBJ4H? zxB*c|4~GVhNDsN;V$l_lM1R$yi;uf3dLgB$bcTEoo&1fi{UbcW9m5)5uGm-aM|#Ke znyZ)k7;^_ER!FfD1_$ogzPMA(!dQZDn^SMYZV$di$}$7?EIYZf>PkeBs(=>jwGe5; zOO1g>1%kv+zl`WUywzmMs3lR@@aW{q)(98YOkdD+zoFqR%k>T1Sc~ekFekr|`T?Ic z@_RbR!Xqd=!ouC4PeMgyU$Sy4Z2Vix&^0OTV$<$wPMkr;tue5>!cvLrtA<8?L$UEk zYlN%hR0CAUL)5BoM)^rSbjILlcT$VRsO#t>6kT(gb7ol2?12`ed%IC>cYt&JE;hry z_Ru9}rn0kn)$7D2rUoJ#8F{xT2BmS=4vzH-1?n7VzcTl`0>*MVUu}>w8=Zq7L~R7} z?zA|f3~^tijdNPS06ODk)1rQ z^-=8-l-MpD>WfRrZ}5@r19k>H^nTmI;wrDy+e-hfw!0mOD{qcn^&9T^=hl|Nz8o*n zP-P^(iH|MC0bF4Rp@21E(Kas9zDQZ4aL*xK^wp^3I_^rCGhx#4)?@Dyj-qPmxq0t7wfa- z1gSa74VPLet`=JNc#u5<){c#xv|*N7MvIbYw8cgIP0}NwY>_t$h}E8XnXkz*3#RCF zgG&KzX>-1t6Rw=QRu~IoP^r{#M7NB-;=#U4H0{>3K-9tsq@Xoo%Szr-0-gu(7nSuO z3JcqW&noIMhiB~Aj7fgBkXd}R$fk3)ydd3#6%rMQg%uoh(F$?lQM~N=P|{-gMKAvF zfpwHbDl z)$DmBQrgkumST^*x;$lOL&5jieM`$-0`7F3P-o~Mk~?mxX@1bB0mAyvjAj`7^TX?( zGBouO);9P!*4IM-)HOu){`~+S``N>N?*tXni=nRSmJmkHq(OGSN+V}8M)@*YoVij4 zhvRj95~k8Gfy75X2PurBha{Vbc3BxWn{K(UgL@-B;SJpZ+TQ%Q2NJwK4*cLXB(N3RP^UfIX-7U+%-GYMr5Q}_u;p{-qd;D)iiu#m>u0@B-8s|XKkaDdolS#^1L{0=;$En+u{crw2eIe(gM-O``QR@ti zQ_1^@Okh_9nv`iIHT?Yl0?xqg7iO`jdfCNUDTEkvKnLyLVFa72Q1lqiJMU`tM+c~L34lj)AuO7BrjJqz8;@# z7U#FZ+*@I;3(FVgZaE9^xfSMSt}s{K6})gM+hsce2i5^}etkv2jA*t`&hxmP$blZ_ z`z)rE>o~yNX^?j6abdk;c*QmbcGq6Wj08;J{H+B0R)TFtax1~Um0)jQ>RSo6yW+xg zumCn@GSO7NkRbvB6DvsnkXFb#fzvs5X?h|9$T&&mBCoQ-gaU6u7WsJf8EulwHzr>E3AQ#T+qB})z|JZRe9Gt-hb!sjApyS zcKhNPnH93-(}M1%`c};sJ2O-NB>UnmA}jhH5{;dxMO486Ds?sNt|%G<`q`4o5AIF+ zw36z;^yDUlDHl_vCmx)!L563|{0Zjuh*6SYR>_4C#q?eNAFkiiOr`UF{RRPJXK zSn90f?PCK&9;)9_Ag1upl@b?aO26qg)Ga@1e5Knd7CBSQ+HCSNJ-<#`w6KWPl&z93vJ{`FPu0WG-LB7ryIq|5qxqia8t?kfnu+8gY{&S)>~~ zGUQWEU63l=LS`(GRp@Dwf5Yu_`A~MjgqXfa<_T8w8}uMEkFXC61H}JnR^^^4JTKD?vGRi>-@ZM zR6+^Pa8Lu3?s;)FoaOVANuFIEWS7SeL}Xe8K#YCp!8kt9jl65GU60=y+nD4yaD;J( zE!~S=9fgI8qpXzb+E44gf&2+3D$!Bk6Tc4!hXQd;;wl=I&9u3RY8KJ~*u+Qb>Yw)t z%H3*v1N|wg9cN>ennDrpe*I)V!EE`s0v-oVtt2VI=wwsQj7QoCuL)xD!^sHn!b|swhu+Xsu<@wLHsGv_UJr-wP z-zW;FFx=oat9+HGZ)jOzDHdXz@9)buezKwHJ0VWr`QE>L3FX`akM1~AFOP5C^*S+$ z1E@Z)yKVe_?;70&;7B>is^vv3tZ0}z*8-uGsf=$;fDb_!Ap|vW8sVaewehW zhE&DQVl`My@~X*O(Suhuw`rp_CJo6CRV4SY`L)xg=sh6mKSB4T?)x zkpP+Gvjd-sPbZQKgPP_0biSrbDJD!H3(bjpm;)y(yO(S!&dTDm?XH6NIGS)1=a?^> z{<@e|i!ZCl0WyrG|LDqLB(BWZi<;IQW9b=y1Cr43N8Iz1+EBAPa_LaR;v<7DUhUjtu#jchU=u`Unjp-Hcz-98Uh& z8zIDI>)60dkiM3cof9j$zUiZjy_nHXug#i^C~}h%a~)E66PcIzVI!ZU%tSx34La6f zw#LUD{Tq$|!X9kh>CToJT`m?VDdE!;)9Wl?7k16q(ukvL58y1*0kQ?8H_MCoBIsu| z%f|&;SaT4}PQNqw+p=7AJO9?nenvp27P3`X^rKOVk0gyPxr zms$6NoXU^A24SRp{Mp@FsW+Gwz}&#hB?S8VQP(Ps39nWK+Z*OHk%O!0`HU0!CCnd< z+lk+qdhTZ5@`JD=ESi%a7zt(a~yeMB4ztc`0EvD)?fL zy(#Fk?L(`2_~2q5*qdnkdL|^ku6`_DMeKo+fK-Tg3r4?a3FPDU3C72{80mR}ixD2@ zCz~lSpIxyC{cM7PcZXO&S?i~p*+~qopB>qJd>q-)I>DJ33ANxlQUYrIM09AgI)Qw5 z(K|rg$znG^GzD<)YH@xz!BVWNt|RwP)!mh$*v?37;~;iAZ=5s6?2W$8Y4CXu@aEHI z>j|^{Wbx>H8rfYk89z0eE*;k*jUh>#z}>0~FShW8H$WyLQlTTvS4iT3$gahEdQ2XJ zBXz|&sijI>z)3qW#&Z!U#yp1$&7}}MvG|Sntx$@z9FJ6mVFrVHUn8Kd)0^l(zZalG z$zB<`+$*`Ywd19#Bd$8KLctOB+O?yw-HdkT3G0p%`4sg-;=sPfUC9i&v&;sj=KVUUx%R_lB5#DYde@__xrJc@Q#wNP(C5sDbEia({goP5Chofo1*UadZ>K5 z^>6>8{oCXI$hU6pnaaoV1^T!1xbQ?iy>c>ISOiXq01v4)8 zH+jRt9e-z6`kxk*;m5}?O7D`bmN;0}Zm1sP)B?L?^K;_!x>~+3F->!x)g0ld#QZXF zli{FCF$aAotC^V(19DFf8m`elp*@@z_tN$2|2pE;pLNNVycSl-duC1*&}VOtzUBZ@ zU;jA)I1y1b>*_F_y|B-EVcK;)*}_SI0R=crsnd$H`l=B}5Z1O02~?rbLH3P`J*y)p zALs!9GQBu@%|VSP_-j1dptC+U0l#!2O$A;C1F$QMH=YTQu4)dDCq_CZ^75^!a*6!0 z6Kw%O{Evq5Ka^u;j!<;wZaRjZym+4N?I~p2Gc9tzoWKYw1>FA_`e7i~#hN@RITa88 zs0Ww{+|++G$eyyl>VZMhTQB^Z^ul*$z>rE4>qRfDr4c(3OS~~CWEV8-3wHQtS86X9 zie8g`7LY+*&kq#fou-ucCz9pt`i9t=BCi_TzX zd@AVBSK@>zFbS^Z&zl?*V1B0Vx(J%;wNHk1BoG*fpu%5EBK);agFn*%)b5FgPCy}z z+MtG%3xfTf_WA!#Z+ug;m`;(kcyH-3N>S8*O(_;*sMjS)TDqUaG&yeXkv>E-4c^80 z-7u^g)1$pFb2~z`zbeM(I?1H0)Y=0QA1))OBH<0`A7Y(;gp8`@eV}k@@aHow8;Y)t z2hDMWhM}&@*^J`|6hMYB49-=s=J6?4cjxA=MY|Lp$nh42wy2*H$C`h}Bd4@C1UEgM zZR><#=C<{P%YEK1x$I@KN3z)G?T}_olU?BgH_-`SOLXIY0hEw3+TAbW_PRrKwr}D& zEX;W(;2TH+h0x*vd z%;Nt(?8)&$Y37NGNj$WkLJg7YEh%|<{Q5cVf!^)C{vRG)4=v`oJHMWgdRq=~d`w6^ zH+yb&2J;QusfD_?v-YJ}Q2fRaG7In4v7a<*C zr_l_;>qdXuY>4^uH<}CTGsLIR%*!tmry@XjAa^K%;54>I4w1z7hmH8I?`1)%4pnzn zuQ)C7BuF3=3nmt>+Cx>n35OrH$6*Z)Eh-cS`$M=c4y$>!H{7pClit#fCqUq6*J60> z%uo!kBRBxpY%Aj-nZp}bB6Fl|L6Ug!sF@Bj^b}$|8mR)KRkeX@n?F#%Ygo*OO^9JZ zG+@S~_LyPd3DgKiab4VS=UX6#ob+Y{iQq!Ki!mH=C#xlrNO=n90McoC-xzsH1Ur-n zPRZVMY(o9D^;N+K)0_a^5%`((=dP+q0t~W4ofh9TA~Zhnq(rK+mSg*AiTw4U)I>q! z&T4(5RWA*g{Y?V16WFam3gq@o9=Dux;dR|XaKIId6s-xL#_bpqM~oY{O;th)5UPHZ zmxB=t_Ori12Tatay>nyA+`RtF3b^V>teD7}Ta)JMAX8!$*Q(j=Jz4nP+@fOGioggv z>z)^I|3OhQ+DlNM?6h6mwA$KKJH6T6q&hRec)qglnry0-s47^ETQls~xPB{U*mtwX zlZhv3Cc(POq&O{$*(B@cb@m@euU-=VE|wSl>;f$uP72gDt|qs8E zZ5@Z_aw~li7Wi8P728c2q{NDY#!$rTgzZA##pnYm4HpIq98!y*Imb}n+Bk4rZO3rI zmom7<)Q$E4jv*;&zfjTZCZSURU>B8RCfHH?qCpFtqmn)))r-NT;t9hdOAIiC>SdN) zB8}r_o%-$cg{*j_s4^kWI7>q@_*Phpb$Wg(+NeH|77<(S&eRAT<>2xJ6=bgTjJO0? zGS~jiwN4Z%I@J0EnL14A>nG}nXBQ9>ksD8r;kOm$#pr~de^KEQ&;SlpFJ|rL0`K2M zoP|I3VWZ@E!#7H%*SJz2oCiAEnFULw%5M6HIP^yM^y z%E`ue31N@Vu7UTCbbQ}bSa>hsZ<}cL_}q9z`)P~F^{K$p%|Rh;eEA^Ky%U|>^ZQR^ zyiv`zxB0$_%s2Q%Nx+R{z`;ibSieU4o1G9KuD9GF?Ty9*AFw#<>^600@up(`e<88? zqfPA^aQ_#GwBMEnd%^a7Fu(d(`#$B`a#CzT=xfKK52yQD;piiwu_F=bby&9;r?2>z z(6rOj#b)8X&x;Y??e>2Wf4Kug8t;Y!A@MfjLdQ)$Oq`ors)NZ>M;iA`T~2 z-XRKSN3XZ8Yks$CSD}b$SpVKqIUX2YKO#IKZ-6#2b<=SOop^|`0zP}3CMD9PQ9lV4Mq-@Q-e{4}0~Rb&?7Sq&M7kI$acAtee!f3rM3R zK3jvI^T7{{zt-x`mN^{X8gh>q)w*2p;&$yh0tao6`PRm?9p3b)Sv0LGMWX%KknTW$ zQL`KegCDnnk*nZCG2gZT5sT!3&|ew=w5Lw^#3>X$*9k)@aNizCwD-1!p%j5N8_Mcv zlxkaZ5KR@lfi^#(P1C`z3Tg-q-LjZ76#w5^0dZs~{@+l+pu^Dqw_)sR>4*A1yTsyj z; zqHVyq%Khz97+>j6sxwa`FO>KpfQYu9?WvW+G`kdA+vxSSF=rR-Lds-`X*-)4^em6nUTQj6z!qj zB#O)p$rt0Max@Rx?B8fY9n^785KaW;d8%vRLF6C)I?NvWBE9yyAq^s-84?4){*0(N zP@IQB@L@H?p*uQ;=lMg+HhTW}RYVFD8tdii&dPZyX9Wmi{(cE8G`xb#h7*SmpLu?% zCLP&;a?yRT-;a4ZgtatI4YP0e?|2A7UxXQ>9`nGbVEXYWVSMg~^l$jlj$vs38!x81 zHWdGN3wo1G_OH%pu75-+dTDp%SCfqPyoe+4)x@WK2`(e)jN)}SJkbI)QpDsSBqCoY z2?y8w6WS#QwUj9e*(;+t?F^1aCx9kCV@<)7?XK^j=1y?27@Zeah)Ey1?DpPA>ukX= z;ZMWf^sGm{3-@EFU(_Qcs(IvL;Z4lqhH!S#l(mRP^l8kAXEA;^?8s_ zCaC*KabP^HmiMB5^!yE~C^i?iAFp8*rTpvM@HYv|r>L=$cW!uHX`dihbwuwn*U3v8 zCG64@j|KCcp%Pq}?*P=yhXHz!>=_ls!oIRS`(<60e34x zR4%4f4tM3v7^bL5oZYs(9hetsDAp;m&_tI8FV0X9U#O8!3nx6nsXer~;8b?!|1JH> z7RKZ<9sp?zbEeHb6Va2pXeofk>S}O}@j8OR#qieEtUjM+)8axm$B8<#$O=IVQHQnc zUx)6r#G3SL?ab~;6k}E|JTj3yi(Mfd@&r~r? z(LaAc}(L?{=#Z)1V~}PLwmw}N!jjT^ie0dup0`; zxRR5oNnMUj;Pw_AWXA>XPHbP@Y@@)_mR2c0IGWzjvv)^VIDC4?2ajP zg7}yqd?eqPDl5k*)#3eVc6QgH4f<}_H!%EcD{8V8Tae}gJi_ieIorU$gK4=2o~NLE zR~9Mz0n%L8J2iV`eKB1}C4D1uyT;i6Rmx2@;ezc!fw!1w%gk+WFrw)ljhdGdgBV); z<*h}6$0Z)SaNKTIfHU(#=WReoRZd!yZ5C#jJ}ruMF#Ferb672z33j8Rfyq!}4zy6W zSrzlVT#$Eoc3jztL97V813T`Yrrl7xX@HKH+F18n^Q9GOi4ifEgC%xVfK?5;6e!z^ z&g{GReaI`8BxcvYC5h+)yIhe{l>5`Ec0jScF<$ff^~i~X+2-!`3l7n{*M%_w5;wb; z^IYIV-hACVzt$bEMZ`>bLjwAPQQ1?mHBF3#3Cl+)K-vwsyhWxWauIu!Vi4QJ*W7aP zdgEt>P&DY|%9{)~W|u68q{F^4xl^f4h$LY^#TlDvB3tJBWxM zLokx9kf|WTr6GqPZ^{kHT)E5wdoviB7gz{1B%RIXozy&%@=D@OSJfVxs(ni1JR5pr z_N)M+Iia_ zE9Y7vE?BP5it3_-?;AQm$n+$DK)uvqPzD;34ZNb0!BR5TpthnV1O%^xBDhMza@lDv zh=pxtM5`I_$MM0#{r&xSJsj=}(n1mxpgpHhJs>Or(NBweQl6{Sg(x>Epxyx2uH9^1 za_=2TTlibY4DPzjb;Ro~dE~a~kKrI1bDQkI4u278F}yroPEN$F-1U>DJOa^p6(+-; zP6M!<+VM9eHZa8w zn~dk;c!mnp0~OvbK3$vvqA*tfI`^XNB?i&(^L7v#?@gBD4ascau*F#M=|mFn|Mk_X zL?fds&g_Oul`pbrFwpGctQ4Ff04NK}6QR{&=rRgy6#&ubu|BJoo6|P9*fM*>_pS#y z<&ZF0Blb+z2tcg=di4g-XSH0h@Fd6~;mrSmx@i(Ul8%eca{upxZ}%SVGjV$S`niPm zcDcqFT+et8)u#E{O_?!e`&79PZZkss!!ts}Q1e%ljL}#Wt5gx~&Nh5Z#)z}!1sP7+ z#1X`uRldaeHfh9d(uhw@8gZMwK|HtF8~zDn`QK)5_^qw@Z#Tl*)?@upB6dqpEI)EU4K!E%N2!;VZ{*N?fV3o15>GXqrh&%!|R{2m2uBYY6@@T^?j-F`fipJVZ^ys$j95@facVF{PN{<=4n`nm-(^$*p4AV znr|Zv((?sG7?fSa_#vN?)=cTHl~%#q@4O|wni16o>4u=iXK#F#(@{PFAp4hOP7kxX|a&R$lRtZbyn8acjLlW98|u?SIhBGTmUVM0l; zi+6I_F%t%fBLZ@cB&B-E+)M6Vd1t@K>&%3LfM#bUs*`Zo z53wqU=8*4`vd^mGfInLp)L6COaJ0uSH)7%zNmf#d6;)FGBa9!Uv4b>jXk*N=|qF8HAq~3pSF57YWk?~%mP)Gj}7{HZNV8N&2Dr`^nzEiM)Q1!xl0Tz zo~$$?> zg4h7$6TO0@xr=XK@W+=&f9W+sbcNM`gn3nB!fLAH*sk=ZipU*trIvdoT{fq7x;z)8)ZSUYJ`(6JE3_7 zq^_cg`we5FZVNBz24!Dt_@-V^+L;U48$6X()Jr&&FVKvD7Y>#6MAQOMC$N(4DWGic z9l-u1tC8uVPT2!Cn#gJAIy0Yc(_J*xn2(m<9N0(~%w>9kl5`|bl;kjOqK$$;V)uBV zUQR@Hg{v@ibfG4JI>R9w?a8JxXOv`a`Chl(apM<{Dddk_4^SxV@x-1Ajkr9$U5RNd z8j+4q!$KeCn*qq<*UxE}(ZnR6m^ej0UlfZ+d*;JlJ$1lQIR{2l7$XxHH=s|6DY;jR zpdXq)3)RMYd-BGcocJay9Yu>KC!JvTdsQ2VKvF54LKMDVD&qi6V(Hyc(u#pD7@|La)C9T+QeiZn0-n2wp)F#}I#%$d>g~2EqL9{djZfgYW=t**y!;r()@pc-G+ub{r zj_{T%KF+1}`0V5hu{E3*u|WX_#R*zUZA*SY8+(mpQLl0ag0mkr#CAmKlT1Dg>Uxpc zSY*)vb?2CDX^5;76kSb|f*2Wa>uza*dea8J9-piiRQqi48fK!AK9cD5eVJz;f(7_^|ARLcd4JqFOj)D9(InC5h`Z6} zbb9_)OS)a@JoDl4>!YuiX!2tw5x3*Xj4d~KU3$j0g8i}=t0dW161_$$8)eZ_>{-58 zXtyQi&9rM#0Kt|VnnDBRFQqyZ=N>EV9{)2#1JW!t+sH{j3iPZiz~66Tw`I88~%Qziy+#};LN(!eQ_Xl;V} z{$Q^}>wIQQn$*Cej$uKMw_4R$7V-c$vu90g@lhF|iOwL=aAhd%{LF63weoKE0yf^Z zE%WKL2%i`|67tu6l9(iUJyIZfB&3C-W(r(Lcv^aXHq)lx1U*vL>+NoKH{sy=i&4?GBQ&!pYm=o*~590$7Jo?Dlt zL)XST`H*`T!%SkleAv6oIHCMDe7uY+%@^b8^|yMt7)&j3rv}m1x zXq3|+%{04T}Eh z)O<%u-blr7&M|il`!R=`eeCyUsP-V%5Rb&XWcQ};aB+kc)y=L zct<^ceKlcA{`bI}Pe^N+ZomZ$14B>O@3IF33Bl|^Lsc=OTo#HaM>@i~okelViP)(- z@m;H(VKHOy{s6m4Ua>}gaqA3w>kNDA4Erl|hBYN`on3F8U2mOTW1{R^U)N?|*QS`S zKt#Uz>_S5kBac<91PH`A1;{4(HywQOaZ1i zp)kkbL|C@8*V-U104}__BXRJyVDdevTd#|JB6IMr*TsCrZjPua1$N9PLzY2&`8)-N z>U@wrpIQxd-@_0UG`H*2tSB9*mJxjr7l$blMcYmvsX;5KOF1u#`?Y>WzIlixL@Fo}>6+?oG$&6QPkr?lz#Q{6^Tym|9K z3Hs0X8}09z9q&Crfmw9Zw~3mdmT-TtFK>Z|i+Y0roYD1=Y~jqedAS$?#Xvb&x#%L# zZ)T*&S9UdKp)U9tN}E#02MUDkGhTjFE%8-!C%KveL)If8?32wHg=T;y@S^)O9Ot$d zz<|t0Kl9faUJ*_Q_^SJ}S7Z_vnfRg`hBVCfkqYZtfyG{IZq+-MxXE7M&$tAc$-YA*mh zja)_TrUIX~b18{7B0EMpocDXdPS*1jZsywb)qq%xpbD8h&bNh4d5lxb0bQcIy$-=% zKPRg~Kv>hi7uB-v+JOnH*N_zON&>U>;HhlROe$bVIY)teV$vlE7))oBO52S;@<$4Y zjPnHy2_9qfvS!ESlPlm~HVeEgEXbc3-p^)moSU)^dBTpw$x|>^X)PF`QUK|l+$psp zR1D+zpr6@d5pu0i04TQdjIyM+^N0sol>&e7LpP zD9nb%BET})4cc?42!9S~C+&h9?)$?BPUjxKUzTwAmhVeiZCGR+j+mqc{)V}(&(*YZ z3*Ti{bg8z-Ffkdh;_ItbF@&!syp%J=E4WHtb!Af?_bKO4PlSrj4Yl^L8qbSQENXX( z@y)Sp=N-(s8I$v+kKI}n7dT~JI=Q>q_p~7&r0c z^1nMs5}aR3PiD_26uKAz?g(!eLc5qcomwV+v=yYQ0oR}sBAOhE@D5~B|1}5M#k0=0GA0sEEc#A-8IkO7Rzdg> zwWhDC@{}T~rNgy}g;w4X^orC#EN?C6ex(2fo=?g1)irEp0faF7G?)M2hKHO`GJgE%I&zeb z6Sa$XfsObMV0*fol)Dww`Pg8C7UT(*yS>pOZ82SH^CdkPvNE#XF|o>M7i8k1 zXw)WT0c&J!$Y#q6x)U0^II>u%e1ioS?vMFpJj=wxZ-2jcQc@Q1R5pg|rAJU^RAx$X zej;XJY+eQXAaX+i?2rmWRqE1UPUBA$Zm?G6Z@S}V6w*q?z*42g2Iw0ss~Zu( z0g{QrTQXb^Ie187;A{r&3Q3;uICH*UBZ698Hr7*hK`XfsQYrN>l+vthdejKw*h3ar4#Lqedf9FW*3-KlDn;%+jzu{SAy;ZNB(;N z7v7`NF~Do*xc4Sg4ANVl6BbnP!e8xW<*)q+lzI}GrnO?#m%Fc!D-qIEb*F_l(egzE24fs4e8#sc>}ZGAHD%F1;@6a0~=^| z%{qveHuNr`+{#KjVg_M$wIXl^FFUeD7`+1cz1J_Hc@dxo{2r6a0uMfC%hQZf0t(L@ zEzchD@aKWAOOJsAbMR-H?+5P1mXYdVEgK7KqsB$#F?;5k+orjTG0JtH)LaIHG#qbz z_sbYA&|gKJB6+(ybR&fhafS21(y> z>cA*%197W?I=-c}uVscnR@bMuIxFASI=ddKTC{D+CVhKQ{1xDdkGV26dOz0j$a!@M zj;Z1Ee7FNhULWO!fosTO#S)xEkJnS9fN4C$5sk0oDqIMM;#fA5%g7tle~xV#Y|$~# z<6A2!Z!`h=6dWxITpSNC8vvq9sgJhmWmDXIREZ&d>|T9AlACF}z%=HDt4S+It(6L= zZ;g>Q8lW_@)o8AukN;hLq`T{qr+_psPF-u`Oju>Q54~dcubBX1nB}hDi=H~2LJ~6m zRL&l8--SAWT1fJRn1$uZ6S21TL6Ip@IMOk%qanZlyfL};kC(TjL0Q+!!5uQw=>v|5 zL$dEHsJ;*P!>DAtu$dk*NG-3^pX9ve#ZyzP_730>+fYrhLL3nJtaX)k3IkUf43EUX z(J2Xt==EOM=jv|Onz-l+-v7pdbHvY}xG<@)7Jq{fWMZz9afthPN~3RWX2%D3m<6-mcFYbGes820O5M5ly#^ z9eGYIUdnR-Sh7Z^QFm}y>plg7DQP?J)*`NO^gbrz)bE#MK#~N>Kyl;vrU%6z1Bwq^ zm#0}5VnQbNCA6`$b9H!xeCq)x?g30#3kLGM0zz-V@gF9?%YIBq3ICF?mpr;ZM#riM z`olmc-FFzcbeC3kg3r&yz65H=_Qg1>m{znLMTtAxm*=Z_I zki`U?Y6bTkxTA$vqCpLg4(gc{Gal5D1UK2KtIgCqVlukL&ObSivMybjWyT%-B>uwE z7ECN-lC$C-BW2O7kV3VI)vpw1hbqogFzTOxE_i9GnZO5QT5(_CcD+>ji`7+$Hou_4}US%C6OYd zrI0`)fV7NOU4b??*5m2Sj*>=A5_f6g8jd#TNr~R99mb2yqIT42()O0;(RbodaKYu`(8`~Jy-JXJpGIc|2RZ6pQVjMbzy2k1_MvRE=v+w})=gi_C&NMSQR2!9$ z2(M=KGWtROFL!K~6tg@n_|9Nm-6ze)V|0hn2*Mg7E0|>|LDwbOKJvQ?maOT71OMz8GkJ-g*9+$RR5H%5Z+{Ec{*KPuS=&;>sc#)sCKJ!kBL@n z;ZOF(YJ-k344BD)HBN}16+88Vk((KSoBeDZ34HFWq*_e4hWaeluPp!2`j7`vll1j3 z@xjeXcOuW}^;8AxoK_#>Cd#Jc%MpW8uc_VIG0h)QE(P_V4Np+; z01|eu7vhqIad!9P*=VOA2o`E=iFFZQvDqFB<@`^T%{(&l(b(d4T`-HV_E(42gfrL8 zDWY;JguQ$viirf@YWoB$`8|h-+_bM6L!@SnQnlWZEPj!d=4T!LV23?T8myA#WO^vm z1}qXb*QX!M1e~YF>8&Vnyrv4b+J;6}J_yD}og+!k6K5HL2OqJx*{j~(P~RMJMmQC= zx4Z-T5yVp|$hW@xaz(m(iZCWeMjgZbKEGkKmz7}yrc_Oc5FwgOQgpA(tn|uyelJOv z8Z^mEt3gA?er;kaByH~OnYFeBGX(_CGzYLFO|^onwOgam@`#0nwi!LfdmI2)r8^q{ zc=K5zOdZhyTHw83KYJ%Fj#|O?r0!%^x*D5dh4kht3zB(Bfi21H{?a{?S>jB zxrf>3hhOSQiaKXqgR}yIkm%FT+36aiZnqgg(%58YEva?cyp_OquqEiwj)| zF(*$_lKQz)SIO*lb>w3_?<6s6P6fE*WS!78dD@+phf0?=5KN~>QUOn%c+Hmwj7iIK zuBy`CZhsHHJVGk<5eqsKG~`MmVE7?YsL#m9zG&77JeyigNM`o##7ga*_z6`oTcGNr zFJxDA1pc-dEcmZcerSIV$NP%VWz-SiUc#^H7Bx!-Aj!93?YC1^%GEkD53m&Nvz;MH zUiCGc`y@417|Zqb^L6;Dhg#_5!5@~4%*4Mz4fNJ32}thkKydAX92xAX6^Q}P%T~%| zd9ko4kUExP{HTIo?2N=M$70Ugn6w%1-C~SLPjXBN499BB@D1LOYFT{1x98L zL}7N2?C2+>JqaYoQsRK$R)bS%3hG{gt3bcIHa8;|;-h^aF1AP4C9 zU_9ont<0K0Jz#1%Mq+^TX&ou(ekWynG8GmPq8?dk3BNIPhpjLrJoK)0EK;;m)%E1c zKb`;?tK6v-=@;1zCt|#bLTe|#RLbj*4j2I1ps-!X@6G2~$5E(6KnwAI-T;#qu8c%iH5~E@`X~E;9HOZF9G+6r4ETQT-u^)B#Z4Nw zm+^m`hj1wifLzETN*rfX^~_R4?+xLM1`=~GW71|*rIn`yA{w7)T_PX7-+$@#vRGQL zKm{@IpG8H>8hXoD>7=@fqdsV8KNieOAugs)MM2@KxX2!Oid++0Z;6JRvKoA5Lv7L9 z^_1B~d55=eC$}*s)q<0YebbO}IPsX9Vwu_mL&MS=Ns-~@HUUYdA3ugpXb422_85DSZdy3zPU!=00w`R8i5ezDhju!s|=Ew7aY55=u~ z)K9XHV~ecXWQTaaQWbgJ8*6xGWQXf|di#ZQ{LyP>ayMF+D}NKixqewqBk(sx^9w73 zLHmhG4tN9W049kGsQfO^LBQ~+ez`7t{i9T(sJdHg&p3n24Z;O~-c{y9B2T-_BYaE`7fsl(-7Rq3??FJ#} zo=JNoK|l-lrzUgjsqxrJ1}13DYWfpKRy1`+r=zavbJ7-W3$HJT|I*m3s09~Ul`-Z3 zL3ecn5_N_c9dQ~tOFn9=0dh-Q8jH}eDyqT)54%$omb9fAZabGUkJm@*Au;aD41Ql< z*;G&uNc2TM=Z%*4tK_Bhs&|bMfd&*CWJ}>k)-vZ;T{F@k=2_Q+WkB4fcUo7pU5D`V zS@0=xIyiDrz`f*eU5vNPf;~$x1Egd-x87X!Y!ja5d!_rRfK!y%J`X|v&Kw)7C!TPFxo-Jm4~4AiVdpw*Zj!|ZJf4Vk216Q zfi!PE;aA@-M2@7@-Oy5dHXONT@@t4q1yv}raSWnoG0uZ02-$Br+?G!h^Mozo_h`<; zKyuIxhCBXwQ}g0wry`{{|44G*4d>o~N%_zl8S5n1vGVVc*uv%eGw6nKk65U z3QifaGjAvt-faVZ(U*>B;J4%Q&De`rKn+ME|K% z#>dL%Ky9|}a4Z?1W)f%54QrvT13#kL%t7s`3;nHbK4ILjB=R28P*Ys!cJe-aUoipv zHe2pu-2Znq1h0ECKMX6<9EYf4cztah9>4L;nx?f+7PbewJd3yTW*!&#^T_n_F`bzGWQ&^&WAt#>XI3e|GA$T4b`uHr{I zC|PWDtX6?(SN@G&X(gG?I(bvgeDR?U@_xZ&E^=5Q7FtJ2Y_OfMKK607^S|n-pcQpASMZ(4GlAa+fJ&_vL1@2 zFo9*0If!Ou)ev=$Kw<~(hT7&q4t`f{$9!e_{ z1|f_|gWj)}{}y9R!C(^z_Rfqs&@{SZQFP2Qgr-Vt$HP zBlsH6ydVxb1sKj7)juu5xBmzyNC2w$xPIl?+4vVe;k$@+P{_xGooDo=q$L1QBTC>>Sl*Zh2a(vZnD77P}M~^xZl@$~)+o)^j zEA*(QqE4(jD~GbIop?0w-~cb`R=3FV)Uydj6GpwZ;sAh|-Fjk1(PqX@5O&=BZZ;pX zbUB{WzEXs_bk{yQ0J)-Sc!#-2q8905J+Dl>F}wD;ywEM&ZbVih({8bG|~x={U&aGbJ?YY0r?O$P&uwkUxa{*HjxFtEPR8_-pDE zbB>+B0tTZUrKYMQs@mGqggte_maAvlHHReDcvtTl=c9kCwu*Jro&Ps{-w|9dwO(|* z{($G_BU8;^scL3n5MudK#qVpM=Bl+o94;%K6;_5O)&{!+#VKF^O}&Qc4=APxgsj&% zCazm_pj&J{f(%txxg7|^UIeAZ4g|)WMPM$zF-DavQRFdM; z7qI16L`gOqrl}N5ep_>6aVLqszZmWqgh3-C67A|j7HIhPMvi|bB>w&$@%Xic{*E)n zL$NRB^s?U^VS|oL3FDDNzb}Tc7H>CJndGlp${@~@Qjw2p^~*_(u8&xz@TIS~&FX{W zU{L~Pj%l$CgSA8dry`={=iG*#=Ou-1lEBBIC$7eg*n?m?YbSXt(PQoRoStw&4{cp%UvQyH1|1^dQ7x_lYFqccY*b`hBdeDAyJ4EgE9r zcO@0%wd(U~tP>Sb=5QT+CiNyWd691iAQw7aLGAuaUsjq z^R3SK&rd^BF#ouaRP#QCmnvTIb~YD2U0<7A=GHCtFT*JB4kFt6HKv*(+l zLEKu61e!GKLG8$Gc$=W8FKWOPH_X@YTPA`fkuyD`K|emf*V~(m=d+Y`e&bS(cs%uRWsT)N5AN%9y| z%_`x0mv-H1f$YGyEw@00y=^7uU~n`_{OG68)R4@2U`&WQNAf9Ayc|;A`CxV~+$D(X zgZ;mNU5WYtwV8|?=1Ou}3QF13NfoGVr)x==TV7x?8@u7FIj*hiz9IhOlf~p#U9tBk zW%obV{jRbhvoIkV9;8sGn@7!BF_qZtfH+T2s_KIqVtiQ!^N8KdL=5Tf_+{H)qwgdT zm-P(Tad$}2vX=ORGodUg=r%yVhslQH+J}Dkzkd|&CzdPK74V<_6H7fSG&n1zu_!UQ zIR1VsonFVW2%+G-Z#W;tQZ5r+xM-!Jo{&htOS~Dg6ocs%IFrG+0V!QjU$Wbc4D24m z`S^NkqgMnr`5}}}xZC(~=%H}y1BjoywTtI=gT0s{%~a+BK^fF@*Kie37@J;3g6`T3 z^*vNpOx)kt%n0C9OTZFjyNnAFGOzdEth#Pv*>UF9IN!o(;W+-f@X_|!4fW;@dymzb z1W_rH9M)e|(DYqpqPcK0Ul6e@F6Bwb>erSNp(ddZ;rGcbqqxe@nb8 zW=pis@*2Q{(7z-m9%W zKR0>U%shmc3FRi#yYVA8Idc1@#=qqgKCFKgO!<5B``H#~7Hg&+TK&kC>8tJ(L~aT?$izxevE-FAwa6bUGmg3*0N{R~2{t+L zPc@xZ&LK};q>W5+&MP}!YGcvrnm<9@5`kw!oUEof5t!Pb-p9-^1t580IltD5H$mDK zJJB9RzRgD>8laDoo+@HxMm|z{VTr5@sny%EUM-(r*R@NBdrWDZb62dwH|3!NVB0lH zgd^exbkSS(`Qwv^LYva5OPeq}XQoi?O24$9GY+UPV!}c^SYlbsA+`ymxRV6dahz$c zk~RslDX>PdhD~n+vY;Mw=RtE~^gqa{==LiI1(y)Nq8wYrU9C319%turX9dp7@yr-; z88zz`ExO97;@+aRoz8?fJizK4Sc{+_B>Ml4lrgh+ zwlcA_GqZR77c||cspW9Mf#mDo&(GY)x6{W2d`%i#19HiIGrk|j?P-z z+k|r>;N~`2TKd@>w$=Mqcn*-|5MIJ9&P{*&TszDzctR(r5%46K+vMvOZzFNPinB&y zYXphG?AWKr@6|NRZ?vnkDpUBM#=zNrSCdLV;QnO_#Xw z0T%ESzRq5%zxiY{EFQa)f+xUup%;6NQ^i#yk=@XaZlbsdG$3$FRVbkBc603F*d*`)5(dQYt=qw@Fe zm*o2c04I>y=#lKY$rdZi*9doO7dZ7Opo~xFK(+SpF9l)q6m+6*amX=6tU$S0I_SMe z{2A+er$<9QDnvNMgh|>SBFg|_ryDW6-< z1c_ur+R{!jaE6ZagXeapi9WnSh>duRASlfx?v-nK%r``6GfeSOu+jWF#Q`w5(h%?JJr%*`j zLV?r51WN-1vXu5;G_|QCUx5#CgI~#v1$+M9|8l{CrGf-zuI*#7@xJaZTsg!kniv+2 zu6!7E7=!&6GZarfmxH`lNS`&BbP;yje$2`tnjFLqU-bpd6VPb<9hdof^DnZ+3C)IB zWMt{?(Yg3zk4x&O@mB&f`?o;{ljf(OV;z z?uihR1%+j>biq#1gVmY0a?svt@deX`ksfV1`j6SWwDhpH7iR!*q~ep|2B#nat2 z&kHD2emXvU-T*H+qj`f9kuvFmj~l)pP|2rWPWz+?mP36(GJRbk z>{(7DN_upJB!<-x5d(~3UsPn&HhVU1cb>*~D^80O-V^}v{xbQwX(xp`KmJ_ksUBt? zWkfqATxkyDyG*_D)ttN`_>;?|Z9VG6_T6CeeQ^utmj*aLB+UGajFXDfj#PqfnLj6= zqkr%?uVzTISR^)Jv(cBcc@$i5R`PeQoJ$mb!lZsQ#$Tz6Mln` zHc*XAmSbo*{{UQ~hcqMy#MR#s6hSXX0a?O-aqJOULscXoL|qyTx7=b%Uk+{av@i}c z?-D-t&2 zj}{k!a|xj0^OOx^^#M`|jCmPS{6nqFHKLRR_pEy?LmIL1}oJe*r*ncB+*p|LiCk zQ&~rSBjZp4v~R@xBuWB+{!=K|{uRm@d8k29XdoaoG9Vz5|BcCD;cVn+X<+8*Xk>5t zf6;44IC>7bW6yp80H`VyO)T=d_hy*5rO`f4HDe9;(&~FXVhr=X6*?tzsY{8s8-5G( zOhl~T?%5sFA@Hm&bGbK+yimbEk6s;=kM+~I>TU2FZTv=TH!=KY)VS7j6NA;q;jaAj zo1&bK+R5SSoF8_7T1?w`L>H2gcievsDx!}k@JM302duZVQ=ipa(;#RtH}_HY&I>LP zFlugK<`!H7=k5m^qf3q)?7c##wxw5N?q0=wuEQgbe|GA`zJ7{~&&M$(8v{L5C z??xf%M?=Za7pFerWD_oK5#0&OwH@A#Nn3?^3(IpgXew-K{;i+K<|d!qW9`%+sL4Z7V8M_jAsZi}#6<*v^ej_mV5{v8{bUopwbY>vwvDx@ zXE-Qbi083Nh*72djeJI74Gm@%*f57hG5mfVjeN9^WI5G1gm9uDDJBvy(#M8qq=Ut- z^I%T$Z*S+Lqo8C?>bJvAa=n3vg7rc>KCH%BBro|>4s&CwAs(w*RB$OU*CZ(*Hp3_i zDQSBqI(plNV7%52#Cg0IC34PTTq^8H|OUB5KZ`Bk!sK{A4L zp<)bHnMUutm_!z8Wdr8Dy2}(Y{P)vBin4S*M+ZnXCdx(S?n7Xp24O^4KRC?E_5 z3Lghv5Q>q17G<(bGgIm)?G}6Knhj>WX!EKdG2YIKR@hVp33AFsZZWEk{tE#ZWT`p* zQw*O3N!I}`Xz7)>Ap^!oW^l=6?qk6{B@=Aek5zAb;y@Tq1ygwPnOZwUbHD@6Nesaq zYsv#ig;7q44x}?McL{A{2HTWT<=!?*aCa3;^NDlGu9OHKbb>H6r;;nK6$dw(WV$G9 zlZcLxX5Om}pWC%Z1L(}M2~|uIt62orZf|N{fg3qWyJfssf%xSqy(rjWehN-j4)F2#McFi$Tv#<4WSOR37yM`)xBqyT| zRHEdf;5&?@rYxIOe*J71F5ptPS0++zlI^b{qTN@LxwR}cszPWC=Gac2p4u-bziPF8 zLCXzU9`?c#q>jO2&>&bBs0N-z8u%AQcG*S$_=VG8#3B~{eJTs4F1dzqkyr>>+hQAQ zfZE@Ts)I6r6Ong`LlFGIN<;Y3bDNB~%U$rOyQCpws}2W2?jm+Kd9J)TX3L2=-Br}G+gA<9l z3`Ozj@UiH2$&KrCjuRHAsIti1#9OUG%aZ$LnxsQfC*+5aPu56?m z^Xtx~1tDIdD7BOjw?{Q_@Gx4g3^lc@$Ifh(BzP2@l`(|asu>7G(PWI%NJfQ?PtUycvc-$H8p}zOZ-Hwn)C=O04D|A~lFNT!ydM@d z{2M-nOhC>V>{-J_$@(Rg%LfjTZb~-rp-m%gGp09syKb7l4oS6D=Vi|+xkuPD5PeqC zQeAnsLW5Y}NO>!rsStvsVAU1+L!R$=G1hSsknLP`BM)9bin6!ybBG0P6#ty%Wt@rV zE0n>@pkjXUVtxKjG5Ro-Q0Jzvo;SABb-}e2tb%p|ZI_D)`V}zTGS2nv;`V%f`2;zf zT>W_c%B#&k_|n@93d5*m$!;%>LnH9GmUl zRSVM^mbxYSx=^q*=k^M9$iCl`VQdlb0PLP zuVUl-G3IS4#rO${-?dq%WIv{nxTEhng&Khg_h1E>REP_H_2y*HaFrGd!-9ven$yni zQaB%~N}K!qF1MHh4+EXPF7DF)-y@K>=jU#he}UI5Ja_v1ex5@h2iDp`sTHIXW`s1; z0rxrxViW`~SG}{b@}Ix&9KPm1dxGyp4#Hg~gLTHrsb5?X_S$!+9OBHnc6hPZCxYD; zO`SRD4owpy6X<>F(BQjUJdlOwv7(Q)sDYe!j~1zKl(Asca< z2yp9E?-3{PPg=!wyz8M@uQxbJYkho!{-=dPMPk$C6}6Ihqyqv1-vR<6{@+?COD|(* zE7SkRwQ~6*(R}Lh72V}pCP#rrzmbzmug2_yuA`tT>8~p-eNCMa1D;eEiVk%PRA>rV z`}uk{*$8|F`tv2KLK8H>H=-HVpv%#vCR;;^weuf$QbaZJ@ zLyX#v=`^3(l`h%xMQi3N2`!vwB~)FWftnSIk52mAVuWk=utc&kMfuS_h*FJ5bt{Bn zTjwN-Q?^2jnVLe1ZY~9&%~RSgMqrN_N!njaJ#f{y_1`Pq-H+9}kWwanS8JPI=Jjzf0J})ot=V~ag%;nr>l`&b|n;4I-2=6Wh~h#WbV zUl{)vq9M&CaXSYH+%c0j?Wx-8iL<<$MPmQ-Qm%3~=)p7B)3{Kz^>B1;O z2RqVk^jBXWe}e_-YlGq`g80q$+g;7Un&1HD?h537&f@Z0@ilyusc22nQ2c&{);qCc zj5pW;|2&%wUxmT8nLn?$|M&5u?fvvc%PwUDq5=LJz=+0F-u!q7TmJS}cB#Ks{$~kH zc{QNO@BA#LBqL%VKUdLTkmvg?`B13 zDU1kQ=p=r?fLx9=xTr^td4`7kEF&#_G{yR8D4_6NqJd1vU&Dgygd7fwp~p8xS0Ph9 zrCV2R*|n1u$`Tp{;&{a}T`iEWSMAU@Np1Y~0}iH?mp9=t6qC}z!K_I8(TOH_bjl+M z9blG5C;T3uNdaEWyuN6A#h$Y!ySgCV_X({!PJWB``Nk^pYL#{*WM2I`{dWUQ2QTKq zI_7nUd0aI^Y&S^b(kyvIOgvUOy}YMmGEbF4L`C~ChK`kt+S4<-GsT{=JjU3dwD1%|1@Vf+>nXX3r@TZ8#Z#sY^R|URe zWl$!~S;I7kilAXBtVF-*KG*Hi8^7Ard0BM>0K>!wMRxV4=NRTay$3`CGlfNMHW~Cz z1V)Q26i4ZBm<SMVKU*^$@D(jCY z-QA5JVtuNV3zJeai48?cKZR-D@k7yf8M4OmJ3h^eM``tmR~FmNF6hGmL(IyGQIfry z(Ig>;946+OZFmT*dB}sC;TYlA36|1KGT#TO8kx&+kST#Nkz|!5FHt!yk~wSyW2yuK zM8MwQJud}~QaROpDau8KxO39T#HsX1G1kc-pZo-l3o#CiW#)NW6LqA`89h*PZ3%Er ze*uM7o_C+%fKEBx*XW(D2WJg9RuKs@E0*k_w<|YJv@HVTLhZ=BERy7zts!FY2oWha zd>l1|cC=m5w`p+tkSPmOB|pl*(Cw_yqb1&;Etw6pD?S`qW+`muRt=78tQ7KJZH+mQ z(IiEw{=I}+_0V%#ck&eY08o^S%c6H7YC@XBd?n)Yg(abQh*|qe%YDdn*1%Q@qdFFzck`$_6HUn<=0e z&sx~@)%$uuM_L84ig@zrZUgpR3nC=rYAy_~FT9n{}Ri;&|mF;(giXbXVb9A$$-6Hxu;NiPq^Z^@ASK*gT)e%UM z;BTWG@s&Z~1=S>!SdI{_A(km zKPktzM9m>iVU=6uCx#S#*-#O z#mCTV`0(P4KME{s3y`?dt*{s@jIAHY9FUz3}t8XzqzU2Ff_!mX8p2bPx#EW$@UryZlA4> zW^qHLTl=wW2%#?sz_WlaA0)2cq6bOG`-=&Z6n^1OZyz+gunAV;3=%1cU2_y_+8c4q zZ<%l~AO%Mw@SthkWoVBS{k7+1tlSuJA%}JP12x^fG6I3eO0@AGL<%)19FX)&fcvbT z)R-eV^N`YwUis`VsDJG1*ZZT1VcQptRyBo}JlUV6@YvirNcB9uy8_9dr;nUvOfH-j z)!&q~2~J{433tmT$q@>xEhQQe4f6>FAuzNR?yJSBZ68d5L@Rr|)Pvq?vq7*B2$>q= zKQE`^tvH`H(awae2FuRyfm$y+22wyeoosl2zSoj2+jxuhVS$<>_;21ob?GOZ+|xFC zMDmyQ2MA*l3T%Pq8&sG|f%vH%jkvS7$h6{@cj+{x_T?WTudLY|g+3UQR##vqG17<< zlV!6(I(4%-pTu+=UjY=!EyW`^jS!02<>!A0!}psIPw$kpRT-cOr4jjMw)k?);T`?c zhNbpC-Q_0rG_lrJI4MFw&Fy}c`NDY_j&yE4PC+M9JoOTKMxERh$u8I|hjIOxID+Jt zjH5r0p*2Z~S@%#Q`F(@yU^sLoP?)=d3ONrOkD?M>fm(YBqM_@1O91>nN-WA=_$1@o zX??cqWno{SRs&Kj24QeUeMu)S`|Xqh~WF!xGLCwY$^0|>8l2JK>xoSZ`ZS&LR^ zw3Z-)$E^lS3h( zwDj#J?7G@zPRL@b626s6o77n2cc?Y!kSyk3nuIjg?>M{X%O+= zi~*7fa$@&?|fR6E5JJLD?I+%8ED`G<9zUND_|< zeA9bdIZs^+DiL>^e8SSTlXGi|_2Pb{v!+=b5iMFyVFmwN zSSC(~w~e>MqMFHIJV=#SS@~mSXvX<;Y1_TWJ7fO+`fs<8-+TorrKZ#l10nWC_fu`K6o<-NSgnQTl2uT>gfvdu_CZ%kw}e-bz>+gccf zZ^ZOJZCDQnMj8k3y5hH9J$mY-abjB@)vy`NX>@6pvfI(fxHzsgzm?(wK}x!{0KCcU zohTstfZ3NiPD6YO{n+Wl_qRWWi1zE&V>?K8)1_RWuOOY*L|nSB~G4#o41YqYBKID-#p03ar! zbViD5ln3rmi0>mv;Yku8zb<)1 zBcKj`BeWw8u*X7)kvGnRw62sl<#jLzcrlD@~6JgpR2ub)GZ=p^Xh& zEU~KB&aIqB4$$P1y_DK~y)?{1{yGfw@n{_AcoEQF5a!hfgRpSsk1q+F??)fL65JuG zc8UhjbPN|}?FliXk#7~VLmF9VbEPD1sQD|7WJuGaSYmGG8V$aewWwHT0M)U2(=Xer zcBi<8xc4Wl{_%vVJgtE*4`i@b4AMMUv_i6BvPY~OFW_n_-}LG*fdL=<6T?l_fq4*x z_SaM>wI5I;5xX#vP9y1Z=Y=R$5FQ%d2?TQ4h5BjF4aXnnO4ai^_>%Yuw<^?O@D-TG3*O=p(mo>6N zw{pgZwy{V2jQGs53Vaubj+r~}axxbCGra%v< z_8Upz5dS;ssR=#_EoN0~WhSMYr~%0Z%trk70hgpqpb4zDZvT)mP}yuJIP+u1;axUM zZAV@CCmt(UdxbQ@#W|k=ACx#+t!G2!$V(wapnf{j(M5ZAw@?hV6s0S}&1QAY&E4j5 z^Nhuj-J-gV-wjUO$6RIx6Q__+Ak55+R~${IgpPb(BX)(t&Dwr<;I(G6lv5084}yrK z8fQyZGpNBe4p>{J9dqq|@iXm&xJk!`Bmura5frf9OJ-hPVW~1{zw*-?o=f$uYVc=2 zih2yxqJs-*RsB@)X=*Y-6~S1#+D5Y=(4+mCEnPoD%zg;EKxh-XmD<^9bfUW-^k!4Q z!o-~@-ESs#fpE5J8w{&ZYpYLr{>a95@uXYO$09o{ok?6Kg2@5s52v-1Tp3B|c){&* zvJKUSAIz!GN0zLeC$7;2Yb?>;OYwsg@xC6ZYOAgf7_f!On(dTrLU)IJ3D>X?|MhRW zzR7Z50=-W`cehwk-nVy-p&zfLO0>;oPVOkzA~;o_y7e*0`_;q0LHM zs`dQV5kHTjwcb*5*R-e^?*Wd38W9RIz`pQ?eQ&ZEhVXfkXN+Wp<1Qs_dJTlShgO9+w%<&PBb{{ zf~hX}5};$r{_%w$OxrRGNgNK_8=tu*mewa{ zLYXZsT(fi*h1^x<_$61d>3$QUMi6zko|>P&nvH*I5tA|TT&mtb*%89H!*q>FHwtyF zOyQ;d@Gn_iLaghGxY87W-l@|iPYYH!A`0~@1#{=$Dd6fi>@=4lo6EmNAZTG?Uu=AGXe^I}m6~!m(}J>Daby+qTiM zZQHhO+qP|I`p(PD!(HqAh&r{a>Kii*xL;8miQfDi7@CiEp_fOQ7|A!^R9ZslBc;kQ zH|VZvG7FeTI?t9r-<-mO%z4&TCAD>W17AH1m6N|@z-$~Chgf}8z^woqnSBxUqPgU* zPTt_edQGxTD3~%u3wh@qQ$KlHRR^iyZiuj@(xnt7c~cV<;d2;@YPg2@~)8q9Po}ex{X(7<iWI9G8hefQDg0Rh2q_~< z_x$c;RUgygYVHWW9h~Urqg-T%SoS~7i_MV&l@=lAB7yBNe`)c@Bt}E`?^nss61HZT z`lax>cD<1{Fg`TqaZVtqbJZFg>ZFvRH##<1(>&zOQbZ)vVs!+}>H*&s^uw6RU;f$N zmQkXdlBqI7O7S@1!PEenpQry4i8;9_E1yz0sx+v zu3h68RZ%m4Yd4^`zyo;MjWb8_Y)DKh35WETSA%oW6;V?TTnm=V_QpueQVSd|8lG?x zQ`a(PHWe4V)RpkkNKVkBK6cp8?gX3i|LpC}Q~|;SI$WDzc5u>GFgpw;=~#M3yWAj@ zFIt71x9n06%=e1X4qm&4u_|zywM5jP#`6U7DplL7@}>R&#lq#h!{~>BTp`}6?*b|I-FaO0p+{_u~ z{qBCr63#`|TUX(ugksZp+rL~bs`gP^ovq6#+_rS7i?%~+8-O^gP_;;ro~b}FfdCtw z;pR{C4wEuUxU6(cR>H#tQaQ9uf8=XW!eK zT&Y>_n#L_kDIfK1wO^L-QP;VmT6Q^QXW%HFCu%Q5&H++=r7%#rKxoa?GAW8rh^ZpY zF3H_-m~g|7xmp>LbmMBF{2bza#at4VK>|3Fr+kYeG|yjxCye88UurTH@KPGl6wR)8_MA14C z-ZcKSgoe}hP&hBN|Aet_>xqj*?JMk7bXr~EhqGWee{m%={2V8tYD@5d=6-+3JKomo zusAO|3!eA1P}-#kL0W`E>?*){bvlS_{=-Jps5R<&3QvJ{T3O@EhJ#LiIC?razOU1o z!Ig6fwTR4!k%gD|ieDO5H2XW}&X(y)<0O7*g;kE7(;z^mce@3F?^;mR60R&j}YA3Eez=c}KLp;gow+B@YJV}>iIFo7tzHhLr zyja$UcPeVlHnGVG0aN+2st;G0VASuSVak$sbDLvB5y7PXyI!18$D4xmR7R5pZJr^z zN_WOF!O4Sdsj2F|gV;CpzB0==>B6Hiz$$vU^LrjIlr*NZI|WM6L2*E#Hvpp*I(4|ABji^)pq_bJ^`wdu4=^S16~Zf#3~)@?60^y7L5?@?xy z^yFf2{+`DZ1gamWj*gZZi~1uIT*3EXZ#mTMp;^v9EfKbHlVoU-;=C^+5szyU+TGUU z_?(w&x4d8KL9KyCGXuGe?fcsw#4KKRH^Ar`cc?_|SEd(3pO(i@HIG-_AFQmd;OP_Q z4-NyRUUefsQGoXkC~$|H$%C!nkRvz3O@?5a)Fe*#MjzzF;NOTExWn@PZ$eD+eWf)Q z_73lW?gK!pzU z8{;o!qTRalHx?`Dp#Yj)x(cV+8w*WMPz#ziN(j;y0$W_DO*|;P+3cT-A@j|z_UtZZ zK|XlcNE}Z8{8906v+Jx{#}X-r01nfhaI&dGu!({h_Qxn`TeF!zN;}-nj5U)Xt({i& zvXP&*G8^E@B2WDL*&#sk;q;-uL^yjFbT!L%@#@&{Zv^s*ml)H_@ydp-h}*9l{8E}? z!uHhC-k>%bp_+^Z>zph?YZhME?p`tg$>A^gQ2HNT38(hTq<%szmrHJIzk@1%9crw5 z_->W`gw42SC07<+&j#1v?9U%B^m8$O7BgJFdOJk&kLj+!S?|WwNFojOAA?L?l;{E; zQk(Yx94JAaaU<$`I&$AJ?koym6yJg-&ynH=m?;JQ53@U7ydk(BTFJ)64_5~&w{KT1 zZdVZa=OO!AWzH;*tS9HxrUIbPHEa+%^_B`{99b0Cx{>5C7i(qs(pKO2?Vsc9m5vC zNqX8ynhUeukqGj>-7{-+_?6+ihE2-K97bR66`bqcTAGpB{-fYRM%6L$Bh1j|hqP(E5QqvwGIrd7eRQb^cYWkw&Z!%zq9Y$?C3q>cM|obP|7 zl!TomK2nr<9etyg@om!GmdvB7{lg3Cz^$r&&IW86Fv&Hh7N^C5NAy&U(o3Zq}jOzYo??gWuJ1|HIyAT`|8)8g|mz20Uq+il$9^&0kPl7*1dDCRx23=pY zv-Y1dq=sW}W8BNX(I!Ok5FfS7sZqp(jl$SI0JYQ}dfFrU@XrqNb+4}@p_+&ZuUFD9 zXz$N?eobB0Gh|!cW)fnkq8 z9F@2&T9;@WjB$JBD4`$u({ZO2v(x*kE7<~Y%x!*-$G12<_G^=BEY6d9IF`S*=Dexq zTsLlD2urJn$D$Eco6{4H8tz2?alI0AM64l=GAX=@%}*K2rce9SPPwS+>w+Y<|I&Qo z+i-qoCZKJmW%|;W!`%iEMaX#vgKdps?^07hb%NnWtq8>97b z4(f+d*Y>kKZ>4|H)bscB&J|eF%FCHr7<)0c1DBJba9CF5AH*bWOxKNTeTt&`hmNV6 z5A$Hqq^6k3$kSC@ZX!1F;VBDXq1-6B;hHm_-3pe&1LQl5_}-fJ$ zfNl-BdFkH9#tVYo!l_2EK1~A8n-nSzx8zo_y{2_b&fi$6L2Es=p+$e@Zn_lV$)?44 z_RgUuJ3Awz#eB9t%}|(irNw&Qnp7#wxk1Y8v|U1W+c#HkbveeUyVT*l(74=Cm=&eP zd$vBhn5U?6doME+T8$0qYKIo*!R`pq-ePlx^hjnk)8Do94Fd*v)EAQ`Z>yWl7d91X zyLkV{UCt`>dgU4C(LS`@SlcX;^{kX9rB)u~jVYb=Plq-8{JOFXUuZ!?CtXvHhtO;9 zCHufJy}&Jy!a^+(HH(FTJ%1An2|b}zb&n7a>~F(7)M6C7&nn9d<#hd_`aJsaA2=E;D3ckGACq+AhQz>v!G-K}NaL}FgX^240~ zTf=3T&|x@UnpaMPH1=*(nAP4l^70F@ZGxVH# zhx#Mo?WP7NJ@0&Vj*T$FQ@!IqE*=S^d3V4+^Gp~ z#QUUDD$V&TV-@sWKb~G4YnXr0QWP`UsdrKI+95Q6C=mSK!xpCmcU12*)hgq(ZN`Y; zJ#gOR1bAudYA+h;2Jo}}TN3hX*4Q$Y*ijX@F?aE6KP_R1Z^#Tm?W@m~Vu+Xf z_qcJ4jEFPM<3!oy6Vr<}cjhSH&`h&=6)1oUb#=GRVp2=3DdX{rx~JNbBK3u8xX17} z1)vt3$0+-g2iVv1Wwn=k4`FdqCi5p2|K}r4#%}TQySo&A;~EskBm^8mRicLhZkO+q zFb25fqC@tP>)tdSEp)U%f{(fL(BMh ztc@{Pgnu!?d_y?0q8P)rsZUEYz-4n&JLT`~_voTmjNm`>>Ha(YGY${YLw*6-Q!o8# z>x4DKI1`2QVt|iWSs}OcL0L|Yg#+~SnG@+^?azJO(T1|5O|!AWwD*dm2;jQ)_`~fG z#3s3D8g^|@7)XtEI$?hhEGJo%WXAY)kn^1<-%BDH2x`?7c>LsWgW&+SpI2cwA-Q&u z2WL% z?AfnrC;M%7SUU2M@4fVtnVlS*9!|Jt`W~TlU+Kg`CJ8#529P~{eXyhl>%#IA{;xBa z{zohgYr_;+NjbA< zkyC5n;0AM6rpbDg=g$@88Oq&l+A|k^hmYavb9OP?G$?XAdc9-ilqHXNF-lbw!v&+I(K9 z+65+e*WgF&G|8#~xyc(lQP1Z?fW8NO_ZO-bfcTpcsO`|NoV^ZaTzeJo2B00l zb9w}!Ej5bSG(52#NPT?EKEc?VL%WzJTXOW?u}(h%GhShwcS-tu4)39XwvWqMGJ|M* ze%xIBYZm^dTmk(|hRuAswE#k%&nDXR{j^7CT3%YkBxUYDehGbuE??UjuRk52Ui!X7 zH9hrw%IG%F?B?9q(N=R8$$(V#D(xR~P~M$7(Qk=kjLEHbjAeh+B3+yYh}@{!1yMaoZ0gDU7Tc!obzeUZsLRR8^2fSJ0mBTI;HmNS7g>_$t(4G zg`y~Bcv>U42uif1D!+}eP|YYgGCWEhY=qQrc10sPxz;_?!EM;eaNK(+Ym4M~k= zDCVL>zB8s`LYnPG5Ly_j!x@Qj^yXPWhkJvn&koW`ej8h+!iHESz3U#o5*^-NB2v&B z7&jxGqWiiPhAVxe4gESL34*qHSjc|M?0T|tDfe`9FLrP~7S65~`4?AoOh{nRKC18VITUdDhY=`I`=F?lPpYt}Es^Q%v5a&d z``9?cfuu3TD_ggkC4i=EJ(8I~2(l-AJm#_!F<_%@polT>E>pzlZ{Y%Q!$kazxcA=t zDFL55?mgCWH4(VEXv^gexgcD0KbVhQ9y%t7bYLXMHYwVxu_d5fuqF=%gS!a-gu-O+$WYD5d;5Nw9q*fcb@M!307(y`ds&-Y}pk( zErZU|Ka%g@rxG^ikZ~-AxZRr+R!+#e{Cf$3_x?yIWh%w=x&-Ove=pKD3D%5^C?Y^T zqz`}Xvu(8TR;{{(zA(4?#NlG8bq5J3w}9(86o?7@(<+;Fu}%^=* zTcdVVcG|?UHk~M=;`4m8pIl$eKv!6`z#t< zCHG#6*GFa^IExv56idKaX;cU?E~bTVd}Qnx5vxvZEYlHobIylkwz|F`{-C*$KS~Gk zpr~ll2D>OXC0%4yAi|smgO79>6kb4zs6)i=sMlKZVv#fJU2C>rC8e_b)|Sz0Oh@A5Qq*;SkIWHcR-~BXrK=pt1*06 zu?o%Ut)9$g;T|GtqjDYnLzNTu)AJT%u&^y_#s~{4OZ!wBzp{T96UNyU3!gdmBOhZ8 zR8o!B{z)3T)pd$X=UF;MR9ogbO}ZI{dyMH;;ruZwv!n7mzjGhU^Ry_5ca59oA#`hz z8mSj^cPnR=eTLKyblA)KT&m^5>Q=2(JdJaS^?gIq^X87JVEAx?S1#Z7BHGGpb6v$2 zd3ycz{EPp8k?AL|SS_J5i!e_J000AA004sj7MXT%F>&CXPD(1is%#y>I>+I|y2WP?V<}5r@cPkcu!>?&9P86f<3Vbq@g4#sIsvyz*sK z7Gr$#D$DAaSg=4q&1TvL-0%XI@`(Tu#QjcgYLt9I;|MBxIih48nZ{{Rrp?m;4M5A> z!rQBdfO5q4BSk{$yST;(rv`%yB;N+2Fd{(N@B)PJnAn3PNeN~GCrv-hJvN}fc_khz zqHpq;_D#_^QMwES21n11_qghl2T04SbNP9g2g%~#1@+3GNy}FPunS}NzT$22n4aqk zm=<*)BZ9*8_JY!jN!+&Sbyz9}C_)&B?Yq?t%AB5aI&hm6Km*`qt|OBx+F#g-2B=s< z(N5oCmTmrwPiYr>b=op@0PWb+yx9B!jDPbYG)2uoHTLGW)sa{BlufhaIEGRDvIG{h zh06w45p<<@XK4!~&kfP0ODB)7PM`(~^%L1QkGV|*7+`GA-(oxI+p=T+_CSZ`?`vtx z>`vz#MfTK~?@ssR`u>t!t1qfn^qvOXNW>8{-T?ZHxeE%9 z5VE;E8PNk3+@k?4Ti*R#t2A}_vUNt#nyK;8?)LrEW_*`eaK>2BLPkeQU%4e5jd+mg zCr%p+@BB%9MAZ0l<{(;p*P?dv4EN2XgIj;9bX^-&E0p^+kCIx$TEMVW`K-(ooT~+x!I5@fvq=G!iJ1X_#5* zl?zuE5g$2yyZ&_2ZPbwCc3hi~$lXXd)wXu)sFCbd!hNN^gbh@r+t@1MZWvkyO5mB) z5J;{u5v;>lY_p*4({I=$Al%lmPPe*t^!2p`2J1RU`6S>LW;=gtri`H}Nlh9z3l^%6 zbl#V)n`(Bu+u>}kuc(nw!sa3zU6p4_!uFs!MZrp=)d!1bdgSqkIB375qh9U%Weg_= zfPczu1DZ)##1cDdRisiJW+LHq z9)|JSUYWEUj0~nW!I$o>$*7Czq&nl*w~b{^V>gL1Lz%U|o(h{Bqc9X$$~EYvg9(+b zD2lB#Ff6wwb_OCBDr`z5)%+`VOoA+AvcP)3zK_5xu~xAKa@l{7bNVrLluOJd>mT(~ z*2@PwaN57T+;bhXBN#NiTPuvNg|=I$6m6redIm;hg^Afy_s@l?1Ti4K*w$NMS;Q6k zWX6eKMX4jibWIRD8vsXwn_D2u*Yo)%Bq$GHjlWByQsvsd(73Nn{-U*XpBcNp`!HoE z8a!Wye{*r^UeaqU*Zqd#^Bs=uzpIHiG#EfBHpDmPXem8Wx6S#k{yN!4ucW)ADJem| zD6wemUpj3}4X_OQmfMWv2FRzB!w|nfo9YBcAW;%HLvOVB5Pxm3{rx#ErLh)FEShmq zn-Fwk92*>^QIkGl4mrf7yF*h#JXc&(HZ;C-5fF}tU|1f?23k;a&*zquxj{aZG(MlX z7${!9cJw2uZqTp(<8&tS$KJSU%4b@stKO8fsfcE0<4P8TK?1VGz>>YKiiO9Cxx>b$ zs(Q4kA{O|@O5B>A8I+3T)a)C)D&%KI?K$N7u3*_b7(oDH<76*e!rJt`EjeJ&Q&|LA zSEkaw!$nbrkxUoEilgoNoaa(7Bz-m*ttLBCm;T;u4!OGQekpg3+l=TRnPkE*@SaBo zNr{o`&V1`h14}AC_R#`dMCe?7nTVsT-WRHKsCfm|zw()nNxJyOf_Y51L(BvZbA18) zv`KN)llq1`y6^F5^98oJLf*_c%nO#hw||AEXYhNA&c4*xytAcS@o&}8Uo5<{L)896 z@VG#pUKModzkNq`8$zh7*Ld^QVoe?M7mBJE8k4{@?y3_dT6x-I!?0N!IaWBLu_DBe z*rnh6daI1Nw}kot<10fMP&Kxr_q!IjX(%dqpQLLR=q*znQFc?5>8`d9{v5v+nP2ov zLueL4=Y;xARqi+q{?94jGy8GId+nir5`w`}tJ8Wsrl4ZV|oo<6~ zPdhbXD?6{&B5H?1UYD)kUYTmMx7QJOFE_)2X1b&GKJ995IDGF@@r8hk63{#@O%vS| z-JZp34hd|B(7ChB%IHed9}AN0A`I?5A_{NS!`s2&NOwV9sQh97qScgK*);j+C|Lc< zu~Ve3_h|;l+SmC7ycw;s!h~*$`|H_wvc~OD5&Lwz33|G)(YxJNmOfRB0aNcIPwWG( z?Hg|NJuYyJrjHjzrJcJv@E@|~e_20dd3_+iz727{zRO3@Mfoj=2|UPvEKgJCHlQ$t zK*maOV+h>Dy3qh(Zv;@XNPewRvL+r#DV`!7ss>|SFTeO611{Xv+FabYb4|@phZF1iY%>HIO818b+ z*EH{)#qei#0_&sq1CmE%Oz!mc+tN2!*&x0F4II+_pKh^-W7d|`ExPLdk7fw~0Kool zy2a7J*2==x?BAx$HP5x<=2-miJBpLD4rZN%=0EyclI=NlHI1FNgY~FlgM=}zD+HWG z6~7;}U&2{(F4?Wqtn)Sxei|Sc$?>*zgNtag2~gVk`T5Ox?K^+(?%&bINs>&aoS8?W za4yLs)|b;jF3%?h{HO`76+5 zgZ@r8I>8Yp0L0T^eGT0|9vR+ys69Gr5*fw1M0D(k$4Z&r?@v|ai z4=&{l>3466#)}D{rFMF_e0-Sn4ugjg-RxYr0KWLV)g$)~X2Em4DP5Xl8AMuiF4@R$G`TfdM zDpxPJjb?|Vh$H>3t~5y=MM&Q%%9*q76s2`aUUG7~*nb`?8_zZqQ+L09u9BkHV|l>T zdbNwasXoN6^q+V#iHV zVIZ)8iY58-&DCuT+EFTfPYSHlh79fO-;`+{YZNofOlFUcG*m-Umlh3o(hB_ zD@w*6C&L~rGIbrs8K1usnJQDbC=Uc4!x?+7tiG=U3K{ZMcI*om<)#ou6jx%C7lRZN z0n2$p+4!AtoINT93v;}@cH~Mw_hdN}pDtE?dQ}WcJTX$bs5tJ6WJeE9`$cFrweLGt z(yvrs>VWD4P)wCXOqHKa4CW{?Xc;R>ls{LA1q`c169Oaj3Y>#kN^y0Nxg7-_Y*TqzF&N#H}b6F$YkDaeGjP5gC_1?91mqL`OnZz+4KbN|VUn%2r~mN2F!u z-VomymuAFm>aa520(QMXMJ%Q|<^Fe;_#dou-7v(o=pv|ypU($_zj=gnZ=F5ze{}B{ z1t&x@5R6#;f56aOiPtUska2Lg;c|C+@PpCVm5{JnpmXuze|;ThLh)(w7G`-Wx^laE zeKGobUlX&7rR-T8pOb%r5_ka(QD10xN8n6m)bS3|BYpw3=_(k|6RrvITd)^mRLhrjF&?*ag4L~Yd;#%0GFM)2Q_gp_kzjs` zD4^}#H=KKHhiR_0myYJdi*s)5!}`(ZkRFj~8_l5sVGrkrfsp7!RHH3$B${_CawOo0 ze{vtkPUxY71Hjg!Dz~WQZoFR`^~;!8stfTslEkJv0a`wbZE5^I?sH)O`#8TOV6+OE z?`qtS?&7VE&mQmk!avz3cQ$}-o7vm#gpOx-Ibi34ZY>W6(~iQrIFMoWx)wa=o#HNQceQy@(qI?a&~xq6 zG=w3nC7e+Xcp_nGr)9<$=h1BJ3k-~DwOoL(C1!kExY7epvjv|n(XpgX&G*@srX?!d zB4#LBJkK$6(s0CuWKvmYZ9=d~0G<;7v-Ee}Bt_3G<`)(=u*{$&)Dtx3)f22I{E(({ zegQ>85k+1P-i_mnIQea0-WZ~!Z!VHp5-WSNJTJ1})oD_RpS*zmD9oFW|Gi2bx)^uk z4hB7`Vrjf>yyV}$)90@*9Q8g77UOk&e$NW-0+t{7rGDCPc0EWV< zhgKM-KFF-(9OZ@lCeWaPQ*e%Y!s4-dkYnk^s+6d@p~|<(wQHcq59nw z(#?X-_rgLok=V2^2DGo6Ktk)eGA9YytqL$!<0G^ul(jXV^5K*Z;WRB6B;!cZA4VCh zdK|AO8HR#-i%&+E_j<*E8ZpB{E+A}>8v*$xgfSOK{k$0`0d=28L32)lXj?6pn=>1# z#8OZt2jbCwQ;B(@Z35^N^Qd*!8{@qTH96oJKAv5IjJOVDMlvhVTM%xNL~Ilteb6^NAd@PF<~P4N8H^)` zT$79yLfYxGyg3aILRsX}k$pK<$6>|%*$w%ql9Z>)?i2}gMPm#PVsTY%Gtic+oK9Z*WC{r*Svuun6N31#!(v7nsziSl z&X?iqvICg>b?wVqqlXvm^=iN^`G;eB!SK`^+e~s+csqMZHD|t2{G%$hk0$1S)OHSQ zj;$*v&q-7uP!;h-m0n>?OcwQgnb0H;%c@1UoJXaC)DfIbIw+2z@ey{Ch{y3fUn>u! z!-WoUT9{6_!OLf2F$a-5L0NtRl9U*uro`lskxEon35T%dcQ1jd;sLeA$DWjUiuP3c z@+YD+aCMHS(tW$re`>DPOw^d%g*C67_*nWo0-b(M06jHjq$uOTOj&iS8GjioC`|A^ zcEm@?M8OUwD%?K2=H77&RH`c@2MgoGTIUmBA!cvxUn!%BZF|J?jbDx!17~g>!m7`P zpfdDHes`&^IwwazFblD|_FnE$aG{dqEk0SnMZcHcux|3AJ_Rpn9{vm*HRvDL^ ziUdcr8C#X4RDb}2*swhweVol1xu$pT$P?e2`uHD2rEd9Rl!?DpJn#pU%K|RYgVN`M zTF$uer78M40vChg^$Uj#EE84*3X876rYb70rLgEGQKP1LMkW3JLE{pc79Al>e3wx* z8$#1AynWbV=epS7DTZ{X&Bvb6T7^O?1)2Wa-*v%i@%&%084_+w_nn@y9%sY`#bAD^ zu{7kvY8N>8M5)#iHmY&i^diSLW)@=X(xfz?UIdT!S960}(~IIt{wx;YVE!k|1Q%m% z8o_DYuedcps`NFP*tk0-`Vx-(&m=R}|R4N+n}XW@UgQ)`7e_U?2vqO#X~#K;C#b*UJc8cQ-Jg#8 z$<2r5kkcivQD zF1Mbby2e9%5JY5Qh`F*#hkT}l1ol#g2cBR_*uIV%F@%e9*-S2L4EktC z_3*yJxdx2rzj{;lFgO|N z1dX?-1*PDC3x~$WaW)(B!s|(`M~eA>>Q*1v1=-ApM2;nr#W4r3fgQR;;lTnqsRKTn zgC|JHb1^8{#@lE#TQ)4fCf_d9u*k53m>jMhGq?3=#WhV{eB_^t#GhX z+t+A2`lbHTO~6u4Gta;PjhEe-Z^Bb?Q0xl5L5WlO=LemvgMf%B-0W z&tfV#pfC=g-j@nTgz%&hXo;~-MPe1~hJ@*$6-IqJQ`U3|ju3~dSEOAwxEom=OS9C=} zXDa4XJS)#uR8NF&nCJmg1oAZL;OjSIjROq9@?D2uoOJD74LO7pmnyl{8i7 zP(jxnXnb*#xDa&UHPP|>3*^%a@`P4lE(S2#p0Me3a0cf#qpGuyB==$rTL2Y;@~K_| zbtC&=58cgZ7LKV8NL9iICupEdB74J9=c%-|L@brdQ*7{Ia2|^axeMgsk~D$BBRQ^q?NgoubOoF7lK5MKd{qTU=eGOQQBPKm#Kt%_SU@w zg~3etdzMJ0fzmVE8H{<+djdhhfBR_c<$DrgM_!DOw52jFDrL70S_?)WMUh)@$b-o^#BK`uWY4K#BjpMDoT<0*Kt zgVE~;XQM+2J;$`}%G_-am8VWQH`kUSQ?OT5dDkT>5&$;2b|yqdR}7bZ0iwhPVF?;V zo3ZlUaIngPk_EpRWa|qj9k0)2RS*;w{TecuH>!B$tx|Vu)^OCw6mnoCAxtm`v1lbx zI{<@m(h)I<0?RA1nvyi-ooA`Ij)10ei9TiX%JxaID@}kFRh0u3-M9iM7GwF%C0WwH zbwY0Vwp0RkRt-0xU*KRaePa;8hAFs@4Tp%Y8~PnqEbh3Z*CmtXt+6YEw!#Zm5zS%Q z^nE-r4QQMyKVh#;zzWgJ^Mko0$Lk1O7c(bsaG`Ywly#hfs|q5pQR>rC;-4pG)u94s z;jdG@91Qq_UZ-4#$WS9WpkT!{B7eYp`}?4L@Ct;4GbWr*uP}FY_Mk{M*5Y)|BM=%< zw~Ar9OFYlFkcChK3zayTzZSGN$!ypzYNytNjdh2lbqYUt?z!8%4%}~ChbxJVW4j@~ zq;9Q}rh=@^(vZWvI8H-e;N5WOYB_d%Sb}=^in~9%Vf1dAftG#t4$N=P40+=LScezveLtx87HUA<7m$7~$2tiD+O_=l^e`lq zY<|QLrID+Y;qedX`Ir&zEBEVX#0_|x4!(e-LTzV_mturPO+TBIDHSB$pC%)PcyL&K?eXcy9}qN{a3g|b5?Gh>jlXv~V7G!nlL zr%I`FDPQZUjJ4EY45V95 z`OhffkU1k97IQfE4WfxHY;Ci9Us|20FH;BY+dsl9#06F1H5MFcaSyCb@7!*WBFkrU zHPmkPb_w4`oClS2DTEgMAeM+e7%cQ7iSaLk=?}s>c>PWoCGd$NqdM(86Tis?zb&A% ztNz5Pr&&oe=3>+gV>;cVG1eA^(BC*xs0Gp zkKB0dmQgSIxg=fyS!{WIQ%k5_TtM99@{&dV zx7hccx4V13-EME{tIR{^eM|^x_6yRq?;Ls2Cm}>#@1LA!GAYGh`}7aqU@_%3s$2QD zPaf!0YQ$UEbf`~)SA2uQ*J}2bo(y zro#wD>uDnP2KgU*zj2`GvmYx0Z=U%TUDD0?;zoHEC(DLrQ{d4y;Ymk?XG2|&vYmyN znh0q;vj)^|jXPuW1u6kwWn9tAMu}w5#sjOB)p)1HC=cw$%a|<|D3^9$FE;q}InlD7 zzfM|M56@db0-Y`wT5$_LXOXEI0y{PKx_ljXE6d($^!(qvTn=nZhw*~A;mKT*IA00V zZz8i4-X%nSC0)&eAl&W&XD1k7)RRZ?iD)a);P6GHxGh@HeB`aa+liY5Uh$dT^W`_1 zRISGy-73vMyPZT69aW*dFEuY5c0aXanSW;-{=H3?ADW?MY{Bm;j+>9P#PNrF9u8!x6?~LUSGqNr9-fnMZWF<_~0&fE}sD2q5eC0+Nt@|TFu($j6aPRLttos=D{-6 zj93xd(6nhs)Y5zSQr(AgVE8koM$IfOqB%$>GJjRRy+>=SN3vtop82X%@z!UJ~wBYaTCl;{Fu)(DTO`dCnYiJed?*b%VR z3065z)9TrWuHQ+OqT-?woMA&^;l(Eezpx3CNij|da$}mrqo0ATNt|D6*2HG!*vsFz z6UzVkxDK>%N(oZnq(resdAirqj4yZ^Up9 z$9hg9VHgX%`a|V9ivzfv_6T)OLq3^I80-fo1cgWpsabt&1rp7hFIc1=><8b!a> z4|G=F(pGj3Zx|M=m$1?<=izU6?zpDyeox)#OkMYwvOYDgy_wA@xgm6-K9%2!^yr?O z{JEjJ%x-LB#EcayN{Zyp7R?jNDp!7%U>&xHz!Kpk#RaJSlvMuUD%I*0fXSo+lw)r_ z5iL5YKtH7L;a9mdnPK+o*i$Phf)?UYB4ZoB13@HHd!Y23vu&(Ou+$U89ox@ZTrG*B z1J0g;Zl|0+y)btv=G&wA^kh$f_~gVKTQ}h`OI|HQRw-8I#;dmj#36fNpOZ`_K!Z>F zdV`~0O>8HL5` zKBZ@>i6ErFPj8`mKCBiyTFa62p^zemJx|^>vbk9nm1o(n=04d=^BU&bG4A zy|w0YrPeyOF5Af6ItU8v50K>}YuH;IYw=$m_1F_H+>X2yzQJBvnO|Sl9kh%R+ZvMK zs}H3zeg6xuUx1lN}j&wJ#7@KSvzocmHxvrP`{#_tTjqVPSEc;C&bQF@du$$no-m0;!V znw>dAE5bmI3*z0(G!?3YFauBf6^|+Cn+RzYho!RW=Q%nI^vcPSW5nT#FQ z8Dg{#cNzDM|HrRQ(YKPHYzO@2472Q3>0)8drpoUGxG4&+HI7)HUu3n&Wq=towXowWEdA<5{EY=~q1glFRjDg9SjL+W@F`P^?&Hc?2bu)PX=bt|Tr@6-V?PT-@r>5IkQfK2FhlHx z(>d6Xx&F?G6#ivU%RGFSY6nbFfGj{pOCQD8OZd%GaRGOzLZ?%`P0!85I48@?)HJ8d z%jA4V*mZoR%jVlSG96qS51BsmsQCiUfE;rNtVRE<6Ta`qQHWIhcjE}{;7uIQOP&7h zpN85S+{W?K>>=B~w%dFpPa;BrY4s+;X})t~am8*^z6$plA{8@mGiQ_b;;Nf77>gOh z1j}2rJ$t09RO4qbNxnj~-OIEaa~mD&4tPyFE-tBB)F39g4v?;RFQ8br4auTZuVfbtGh2@pPFT6_rAAjMU(8W(^b?&R79r_$JO)$Q)8Vz}Ge! zj1-EJh1tO0&_Wkf(TmkDIt#c1cpRhsF+wn8?CK^R&m=dH8ja_4?KsX-F+};s^u@~g zT#oGhrc<+UN#*PV(}VCFcNQ5lld59d;z!1Y2VW_b<|=Cc!`M4_cN%wHp0RD472CEd zwtum0+qP}nc2%5IY};n%UNf_vXS!!r_qyJ}^~d*MpS?fvGfuzh3cD(3mzO_8mhUzf z)H89e?QOGh59iDubpHI>SGlbulV%jhU}JEkkfihIAgSBxBgdwLX*`lu zNT_jZy8rp|>=Rg70?~1FYG2ig;*A@E=BhCgw;KTxD`j+aPNHpS|3W249hFov0=NaN zTni5?OKHTnSR7#A`_NM=8!L&7@(odtfaA0~cfi%NrSi>Lafmf>d@|{+RAEy~gY8;V z6oOMtN-k`V&0JNk{zR)^Iw{tu2}%Z;$o%WvfsNXu<7H>DY!ma$iooPmR=$grd3j&Q zc+MX|b623_(@bfS2d6o-?$L(IElVUuY|8$XDNTL`SCN9TOV0RKb$DL_$fEXuEHvb^ z4%wJ41{s(y{H57hKrSh-!Fs)(q3-y;Hpxcybol}Nz(M}<3~wFGo38<612|;SW_py4 zi&v0&1xS`X#a=}wRaN&$jeI5r#aoev0>7kRsIkn9fRDpp z1dFtqJ*)FT+z3A~^p1ZpKRM#0LDm$W$pn zf&6bJbDQ{_JKslU6FDcpo?}YsK0=rAh@&S+*#eZ|-$D>Y%7fF3!Hm$yk>8FPTZ&h- zp7ZpXoG1{&`XCnB7$5TNJH%^7MUP}^wV|;KC=7G>rWf&@zcF|iq)6QfrHDX9Ln=qh zQX#rR{Rx=~EGJ+wEF1roR<&mgF~4nJQEMzSf?8$T4RlLTR^R0Ywbat-3q`kwJ|gHUTYsf6};sfKU* zdYvuncXz@sat_W1+b2wq!u*AhldK?~-j%Og=@JBWOhP1XU$?_ReLe*ejo_^bOXijZ zZfX=qNMCOJN*36Ud{go%-HMKdCTJ^%h}w76X3_Wc%^4GRO~a zAcFqcYbw|&*ozq4Wynh#X+|~Mq2)0~)bvYaL}Mz*=odP;8N8&zE?1f+IF}vnhkcf$ z<9vGLei5?f{N5%0{1M{}(SZWf6P3519;3nxL`%&kNGAeRjVd&8&!d6Q^@w1kOGUK9 zOK7k{g?LhY)T$TKkVv?1?-{m5xrI?3+pvl@M*utL`Bq}epWBAwwM?Y4A&jRtY zzwn8Z^6Qj9!?r`FCNxpa4LMxOxH>{F0&~%qOvId`_I#?JuQ|{*@NGs|g+!Lr`4prQ z$^C<59&a@oh&BI$DQ;~P(eX=!Kk~*w0bC>z(K}&QPc-k2pi1D}?*yxYjCPt#)w9=5M`Vj|#ABm)~SN%Gv^4C$uC>=3yTg;NZ z=N2Y*sIcBrE>Ux|AVdXyeqt*=vLYow?nYh|m)l5^o`j{m7fmc{k9{J=kbYr)IjfX+ z`9rq$*$PFKQv?smtW{g>Xg~F8MLJx}b6Oky?1T-S!@WFf2Orvjz1qm!ifIa`lN5{e zANMHR_78?Un1(cl0{PdzDQ7qCIP_CS8bJ88=?3d( z2<#En{4JuL`i86ujl7IgJWw!#-pY$?Wh1Z;xt!xH$3)si*TfOTV(KWN?o_y7oXPq|M=*k z3-p>j=0Xs~-Z#H+{aX)Z4CAHu4Fu&_Eo#-{^N3ul7W%GgwX1D~RG9TIQ=V>#x$0VT zI;Yk+XPm9lx2U6fE4}r5_+zn8s$^k)S@Nx$aB-C|2CXO<*VwQ#A&@un<^IXOjJXWx86C(42t-Rq)fx+tof_@qjPY zSyiivjIrDH0-8n_?|MdM4GK%03FV{|;5(pf8j<21Wm!sy2kml1rP7}#(i7_mF3RXy ze?JBLsWd8^$VxjgT_7KDz&KCY;(~j*F{xQCQQ+gPjk*V$+*9f8@z_(Uyy>d%*|QX` z?w;fZi_GaI%3J@|DDNgGIl<>NCw+W5%-n6kIwNa`x1~D2S*PgeNj+a!agAZ6zX9w) zcuQ?tSav#0AnT&bLF9Ud#RZTNB}T^+_GGakR>-o!OWFT;yaSJ-`NhAmtnL3rQeDjM zn9UPf&Jv~Aye&tNg4B=c#B|HJE>ry?xeVOW2-q zT){u{YVba+VL@PSJRGyB&C0)hXZR3x;gSqsVfnk%Jq|oA?303_Lo>6lZ3jV?IzX`L z?Ieb+52!>K1K+9wc8mV>k$~^;{Vkv|^|^iU5L9RWnYBP@9-WgphL{3EdGOHt6PjO! zSpDrabqb35$>aD&1^xW@)Mqy@bHNMja^m#)3NUF$RHpu~H#*nnJ~}rieXdCaoA13a zIv4-X$?7Mw=TndI!$sHB$!Y|)Px`D+Nisi10nXcRsZ+?q4>_f`^oy&Ec?q%8vImN^ zXLUbA>22jBBIs#Z4JhxPTS9vmYSS4y$g zzQv9+u`(+GC;qdL3=yd&k+GW|=6O=ru^!Pg5s}4#Wc@H~x!PWOJtcm3==B-)+b^-* zbDx9#uCyc59@yQtm`6JJg(EF{`(QdJiWGmL@g#9W{<)sF45AI$dK)1Yc7omEkGUcV zh;j~FhmgfD7%~vWzI*1nt6~ZS7@VjJ$1ijCtjGJHz?A(fHif%r-nr^l5p~REGO6*C zIVvna!02(2WV@^8IT683-Mfl5Htc;6y_*u^>H}?jMJ}k+$0L{bxz=|&m4iRLKS3O@ ze^4~-&k*HK#CWEkFEp^S66{NJIoLfN1A*d4spC}e9LUEOs?iWwTzcfA@?l9eXhw7j zVBSfAOTG`Zgj_?o3WAKC%Dp*k$44$)uLI|4*Oo7xS{LXs!h?lz9!l)Tu!lnw)7?t` zr^^+YV#{IwOOOnJ*T2^ei3@tQMYi=^TBK$*Z^4g*A4y0pT2(bz1+q8=NTj(#w+MN@LQbxqL{=hV2C73h(VNyiTk<| zEdHvf5;1+A4F_<&!M0_!nBI8ULd=3)y48GWx`hjX=6(bJ*8nhL&Z+~D`+SRn1OkG? z1_EOJKN>sG;P!F#vWaijw2I!Aja{`q*{X%5 zMm5#W^ebsY`X6Ieh(VT`T8DM(#OvzoY6_JxD2)xZMrpGFSLFDwmBVMN22jPqhWZeb z?n({nkRxuIXR;Z_t%GB;j`4L}au!=L1TR@_O;8-uChSV;=1R>>47#hEDS*8CPStWK`$Z^T@xZ+WC^m*0l1S?qUo6vfhS1+TVR#y=^3n# zfr;CwiP31 zL7pFfN2XJ0?86$mCw6EL7#O8#Of-YqCu5La13&xq+8KkOQNfJK!74DOBC{t{cE!!B z4DS(eMcRdjZHTaBEX;k8hcDX26@t(>7}1SnY;)oeA`&R3o;1nTE@n`&iJdh@nCXa) z=Ik;SHMJeOnL~isGQ!~-Od_d@-DlC?@q1Uh@6;0V+tf{W{Ym1hVWi)}&pwWHSRLZB z9(H6nk28DhjYN3sx->pPXilW3oD}Wbn=A%csvdk}Nv-^Kd=Z2XiJ9W&|TjSI!-N zL+#o2VDv-6)0pZj8U&_OhE$72rV7*+&V?=m81oaTP@sHm7GR*B8lLgiGD^ugOfn%v z2Gm7Fw);U^cOisv+Xu(Sgzgp4prt=XWUr^i2Vbw{L<%0*vMP7#{ z&fNt7=rADzp<8^=8%T2pPWtO`$if2d!EXu(W~ZcYtOq!bRzv|_hx7|fK%`T7DnEqN z9oYe-+P1-1H8@5J(xtgg*-=bZ>5K#;bNve>%wov;k_ZI#53C)SqZm>|=NiU#=$=|3 zXb#ix!#aCj!VrZmVF-TWFUf#&o;MuIt)~Q~C3iEzCi^$x7I>oIS{IKOz05MC*abJj zbm(j^-*$|Eqx03uF-^#=2E3QH8jkoRACC=|+W`_Nu$eG5+FrBOo8qlkDt|LwwOmx< zy1u8-x#zvP5FuV<+R%wr4T9VCwSTanwO%@+R+#eW(6w9mt|jUYWm}zrSlauw1?OHZ zMfjH89ZDJ3Rt-s&T+TW6botno0UhK>JWY)lrn3b~fDm&~hU3Qk+zF*HYJzI1!av4c z{{S*izdPl66>CboYnZ7N%P+a-x1T~*B6Y2!D-AL;6JpOD_(R^rfhF_@@{$5h^j zMGPPYlHB+Uyk$x7^Qb}qIL%vjSy-Eo1SO*UwzhSV?~u9PT@^W&0Trv$PH~$f+NYYm$`U|1zh6EEv zeSaZ=D7yZUfueoa&lRRP%kzeXUVYZ@4c+m<`RO~ypJ}Z>z`gD+t(3NdNnhJyUowpD z2AMoIIF)uEykTH>G2Z7+_=h+hGV&a{Q$KW~s|x-*V-nhuRi~V)Y?T)V2uO+o2#D$b zthk#PyVzS>7#TX-IsV5>gRN_~&Ux1(_yxMV1JvQNaj>@8#kKR~SDXv(?Gf)^uRr0>3Y5NX0|RjE@PfnqYS zRAMZpn^rMvMb2d=mT~Lpd3SIyG1J(MT5oV(Dv3-q5wgaj%KRlYL8e+w{o@=98eD4J zXb3Km%C(Y~N+9-Vp#GgJnS0+=EXh^1pak;ptHNV+XTD1`vJu#sNJF~YgB54@QIsKe zImVJK&WOTQl^R`vPFn0^3ChIv-mwXJWYnO-2OKu)R;$=@kUt#f>$74$uo+XfNtE27 z#yDw@uSG2Kcka(=1cV0wh!-V9Cr60JbuhpACCYXTzu>IO6Gru}2UQz0jPB0iL` zJ*E8b)koj9Urw$xg7VKg)~%HkN%^7ipXRV$qSlbVjKo7=Zv5Q+9=X3Br{3MZ+dFa- zykX-F2A4cC%E^g7YF~Lw>|#zW8%JZSY%&Jyh}5MTWXRH468ep5kj0l4i?F-Wb8(p@ zaO(QpZq=Jct@w4mt%qNMi!;`=;ZNG=4v0_UMAsUn5^;$r8vz`+Z01q+0WY(d|@|~>8IcO(8gLibB%v~T4VA>Cu;S(6^mo6-wvsuAaoxs!>rHg#*{ozCOs zB2aJ-aen0lDE(^5X*j3T(ccn@wu&ucWw6o!W??(l^UJ%uGIi z^!vE}392E9{vN6!3~-F%1M}?Fr&_T#sv!-P0+!t|B*@dls#|bH6>)uF*8`D{3s7_4 zp&@4@XnoV&4g%S5?9jGTPv<$wYrT=tbk6H%l4?1bdZn3-Kr0Z#@aSPBE$u~mPETR2 z&0LG--(II)>~10;)dWkN*G*CJ}k z2mvqpLF(#O3$;nFgQUP!R;IWW>^e3$d+?ZC&$8y2aA!FSdNOR0r<{hMm*ZRSwZpd# z=jje&6-lC5Ehx5zKSs-o;g`L$9&a_6-o2@*M8^)$-eGLO!q9Tg<*JgGsxt~F%H$fq zH#J+ss1QgwOTxaEFrsaKfbjTydNX zHY^B;_0W){8q%!ZK41Al*EE()a0{o+$2efxjO4Y_EJioLKTgQ+g^9{kN*Bpp-UaFk z-T@Y%d-+Uj|H<_)WE`?l%R9$X_vU~%@q9)kyhY%oha_s)w(6ohhRmgOC&1I>sQ*>K@pr63ilh}9o zPHbCA5%Jr1eO0MDM9?RZ;?Notuny)HwA3vsKS{wGbp1no3tE%u0Y1$zD$)}WwSyhN zT8m4honax?uVP8zv&BnY=P>Es9OlT72UtK4@Hp~l)HZ>pF|D3mmd|HYl;P=1fPc1q zCD1GLYSRQGy$D8?-Lbo>D4+N&SC?(ZUTadn*jNZK(|7Ag=PK_h{XHW|>{OE(la?;A zd5+>)+-E#3iIYXOV*hDO0v5@eh&$IBt#u~r^5&Ozo6Tk&rnpE(3r zhAsC|lDauxenuE9xcCKIFpHA1IyBog{KW0&5-uKhGT!I;&IzSWhyxN!OvOQdea@9; z+8RQpt&0x3nj0_Jk1F(FuYKwmb5g%~a}PIx(VgSmag@x5Sd7bJuT$!ICjrX5bX?vn z0)$vk;7v$hc1Aw#@Drjndr8QgLE|aCMdi*5K22)5%=$LMMXKw$uAaQ-Er-KtL+3`T zC}D|l6&v}CbPGL*b)XZ&(-t5$qjPBy z&$j8@j)aiS($FKEneg-~I-^xS(WwG@Cugez)zewB+EC%qhx-!Vb`fD;!H8L$H0dQ_h1Jn`!e>?>^42;T@T&kmHAtA8_s9o4DsQ zqE4;tEvMlhXBb&3ff@vy^Lq-2pZINa@RLPE$Dyc(US&s>Lt3FdkoeDiw*==eg0(M| zdcnb`;FGkEbyu>$TgVCYHCgE4z%99%>$-*Wm*0Z{y}3MOBmB&b%`>a)G#SV4UTaC- zygOu-(JTqSESGWyk#54K|8VX^3P`9Bi zfvk)V%dkcL2puwk>fysHG?dXG}QYZb;~ zO)olzC)&Yn#b5sS1*>j_fy)S#;-4tCnGYD$d%MXa%mF2`{RaB4JyR<(vrbAx$!7!@ z5D*yx5D?}6S1W2{?c(HY;`rm?{_i4wjhf6){tQy@xjI~PSc{^B`CKjtnP8&a8juji zN^uWZfS?TNPxy?b)USy5muvh)5^_z~Ou)Iw1>EuNl-E3KPr0^@fJ zQBg6utgxX~hrZ`z<>+SGeS$)NGnJ+8WAtLDIzFh^&#eN#EY# z{(STEFztLj@4jh!y4G%b+KwL6D7Ji8I5M(sw2p?=1M}YIn>nr=h7yz#m8}UL(<6+2 z`2&&nwIS^d$gCzog5S1Y)W)BW1*UFFg7Fvtjy_^O5m`osn

6V@ge$a#|>|xQ<(JnyqE2s1boZ}HgE2dwNL*phAQ_;7 zUYzj9*6`Qe>BrUH#gnZ$*$EbyNtg|Lw1mNTYI?CnCK-ZLu6oU75-%cB2GaalvM?ny zd55o|_PW)T!$h+UdnwynYlVhk@!FuQqz0+imT&}oB#M~OC*QL;Gv(5$K-soW0XQaR z(%!jfP=KPD>|q^m7Q;0lW<+^QLI6X9fM$^;mWMPSU5Pyb4{rOKt!lo9~kBO>(82WtasVHNM^rl^_qjv!TO2^QKy`Trbh-I9~*#S4P6z$ld zSE$AUa;nLH$_EnsjvC@mVws8ryXAV&nKW^6>``pI{wkBs=ffvrf<^JSUW*=IFUAU1 zyDJX8u5h8KNKe~XX4QBt=iml3lJ_(s(F+6+`^shH%`JUn3dE4gZJK~rPpqJx#rcKmS zo6o4!YuEKlbfqt0E&b=a?zB)ldHZsb*eO6-WYCp8Z%NgmDx%e9c!=O86NB4#bLlwk zq)pplgJovjtF8{6z+BWb{n5+QZ|c?loOe` zf41g4m_R_J|C`qgVCZD>GfMYp*w`JkB7fEDGbsodW5#2)GFU{}0EXK-ju1L@0-aFu z8Kc3~3tcrO3k8*)Y$89Nr{mGYEc7pE)?hgE)NrQZjt?`!q!%987Vz#OEvDF#m3Do~ z;MEIP1KA_7d(mISSq)tAT;F3*z|E&sdV|!OyHViv$tDOzzza5zP5^j=A%7FeY&?jJiZWXoPW;R|g`n9J)EiiOzM{?^NF^>zF7 z{1L{pv~_K?X9j1fj_Xc?k7K3+uj|fVZ9}7Cxc@8Q>)WK) z-SkUJlq*$(!aq%a-PxrJvb4bB(x{3*4g6c>R-AzmVK96khX;2qj-5&>(HnO|Mu$S3 zIErR98E|fGSiCXwdo^9~FWOLylMBVOhDh_%5*9go&hFZ2VHL*ITAilVEsy?STp_ZH z`hqKay1QoRFt!v09dw_Iu0k6TFZwl~QQ)~<@R4s#EfL<`vy&8Zd!zxDA{&y>??zIYUdNy`4n7RwO_@GtvUROINo0jr)6jOn`+B(f9qfrg7 z>pFy~;K}t+v9#sxe}##1&B{$f*_Xzu!&xEqfFDqbk7r)+s%G^79%PwS>)4{f30eT6 zsUSDw;2U2xRTjdAc2kYJeu0d87<@T*?yL3gcFqZDqjmQ~&a3q+Ug}LVtzHB;Dl(u!{h4o`XHI0EF%6bepCh`&29epb_(v6`usr{xt z<5u%bDVJh~tgf1j6hAHM?}0NThmsnVXqR-X?JDd?j%sF?u7Mq?Xq!?N+hDL}otb%0 z_Rt8tcYi-3c-;F}jA4y(_8qE!Oa9ISERvSgKL8Hp3r_8rPjPy`!!xeSC2mP^W(lN~ zWp|lU*8pSbeG$7S<10?BUSTh09IHP5!i51aB}Ty_Ej)alDY1t;<{NX4fKj(>&z~*E zls4KlBvvy5NM$JF>Hqd!(xxnhP+*% zys!z^cdp=3#Fl@GjUqmB!H{~w?nvC#l{JaF>!btD20jpV5nU`p%VKFsUd}<`+Xr65 zM2g4+fU){{1*eCAi!Mpu%Cfo87kdywV_FZa3EtY6{!LAZ@(~I(%GVr_j_$eF(vBq9( zNY1$1P;QLaN&FuL}L(Os1yfPE@Ti z{S=^B419_3^3Ssh#@^Mi?~)$1`BiSpG)6u@O(2%By&vNTK2;&=PQDpVXYyEHWFY)} z+&K&Tmm?6l+|gYdcN9(+oL)bq8##4dU0kuh!Sp+aC8Po}qIwgArW$L8{%bN*g=maQ0E0aMC`v%G)+Z8&1dAa4ir;yOP5*k~Z6SaxV z`^0aY(TlC)scrm=<@@&LASebS6^Rm@%%pr%COXv~CM5ISiQ9jJ zhrMxFh9seF`#Lsmqu&sH_5lRGfjwRsQm-5`ac6c<#@&4(cm{Vcx|>0Y4Dw6;{-@GA znMWC<_>*9Xf&&CZ|Gz1{#sDWHL)-s|)rQU{W*+~KRLzOv`>Ze2j+j#4inL$*Y|sxp zzi1N#JD-0_U(X2lpEyfQ&G|$noa?=>=Nw|{ulPf(XM0a5fCVSZhiPvH@!0-tS)62Bo_r&d>5xfq%`r z%|va#e>zbP7u_4UUX>Cu0gon1MD#RT0tcx~K2mKhG~7%+M6K@*GxAr<$$`K|ODLa$ z?)#i^c zRL2<;7UFG79YNS85+R*V@OG)T7Rvp-;q@yURohov*1dIwcW==hCd)rh3WU%hJ&R{8 z%*m=;UUWcH!71=|oJlz^NrPWqfxfIb|2a1`7v#n)G@Jq0xO#JqBPk|-s0M1j>iCb` z`IrUI0@R_Q)`5LW>2)xLgJ7m42ENR>$Pj~r>(wVAwvg0hl$3=K;j%OQ*%7cIEX~(d z7OJ@yZ9u2b+AlMd(nP||GngLZcCs*x#DJ_>b)(Kgj%hi(G<)YobQ$4+`IncMeac<5 zC<%xl?vb_vpx&if9wTXg7Z-vuC>Qaje8{)Hj0N|*g3n-FH%Z?m_QZjzvR`6;LRpa( zG%L~xtwGFpN39qR-QnpZB*nJw0tPJI5hozvJPZR6CSX$8EaXYfCLhs$!R!fKk5jgm z+z5rGM!+d|7*qzjE2<_ho;G}V-=BZRIlsOn^V9cj(-Zn`Y%&}hYpXN^$C(l)($w&n zX&Z1dcqEl3Y(jE&=B()B6n2!{R1(Iqm3x;h_^=}uc8$MR4QE8w~?dm`Z=Q<9lm%8@h9TY`oJTVcw zX~#%PP6?*e7-`HxT6hAu`yb{^V4b5g9dA*!g8zZV6q3}aR5$~c?_q9%{zXiQmQyw% z&N^h%A#PoxSw8^vDuWwPi;^F;J1E0b4%I6)e*^`uapMRgfBzYm zVtzI^WTQGF@iU%c?mhGu^NGN~3eoD+D(Rw{=E4?qAj7V4ph_$*-vPf+j1D> zkM6q9j^qND35344J1u-(60+e1&dR4dKlT0c{-Z4()Jp{R$N1zEGYT=FFqJc;3SRK zBFNDRLRVZBvNSumrhWgKuF%^+6$XsV^sZYZmgv)cFfW7Y?OI02AR=0Lb!|E=#=)=X zVV=91V7!3&-)E6&@a(Q|Sa2St0Z1N#;;$tzgru%1lOI09Ur#8ld1bybAyd>4V)aaI zf4F$De?>X~T{I@guS;X2#p6Ky!c!6FV?cah6ON6?&ibNp^}ePkVObdFx{~Mp{Plf& zc2JM=?)2t<83TOQ^ahGmw=6Wl=+o`iAE*{qF_+i(tsl)tT^s)wJ_G@R-+-@i{s8=F z5C-CR^<1=8SdyNrdwaeQA-0mu7pOGZ3KEfqdWjO;<)+P!68PyuX8>9(NjlU?f{m5C z8g+Crv0OfQpkl?POMa1k;Y?{JV!jY#lDI-0Q5X{~xYf?=+$uq>ZW`{Tw#l?_6EbKc zQINbgH`%z#COd*NUF%~R#*yrB19UUVF05C)zm!8{!F=S1t3`aURu%!Hn zUQc?`DV#S0(-h^w8RpA2g*kLkl5YNT*NiNgcDlu`ER>GrwAg-B`C8{b?kb*$+Z$sk zq}B}dnRdmO^2#r0*YCHwebhzV+`iw<7z*-ONTIv#SQY~Nj64mW)QWt~kWat==DfEn zRn2n8AK7FC3s?QRhx&j)dRe(6;V{k&`gE+UcP)FQ!*pL*+L()BiJ`1Ntg4`#&zeA&sn&f^Dh zasQ8zefhQ$f-R9=xd-zr4Ses^#Nd;rJKZ=l@m5jF;Mk&3>jo~2rH*R5TC9ef( z_e0q0Gz#MrvC+C__8-ZV|3>fYk_d?yCd<36d#sSX>(PY?yR#Dv`c{W#4KKi?*@;pC z$eQItA~WcmmJR!b`C)Ruq4V6vYhJh&EB6V4f%SANPBC0c2XS75Q48R$rpfU97=dTR zhzFDiJZz!uWxRsSIx&GV=zPOdb1>R+OXs%#-&@Z2xwuEtpS^(1Pe$dB=KjChY5v=m z)6~%DM`Zs`Q!=Hw`Jekazn`Xr6EOi+Y-Kqq08FyU^m3@vQ0`d?4T4S)!7xKbsxo%@ z(Qg}HN->$1(lrwPUW8VroAla^u#d-tJ7;TtL$ZamM1@Q}8i!tutPaN}eNe^*jm>%_ zW11pqJLO{%wiKE?W{2Did=~^GmDNlQ6mw0%uw7aCrqMVO8kI(8-g`>4fKuD`3fd-m zqyH>Bxw-NNd*YfJ)5%ev*5x;NM?1-8N_^uq#lopGGj+;F{79j-Y|#op=%qCE57Mt- z)_et+%jI<4nD@8iWk6cuf4UEvJ`!!EsWoth-8Txn=NuM7<4!LQrDX3zv0G7C#QEBP zw`yN%ZHY+}#RlXWBX^I5L&W2>CuV4Iy*hfc>GmsJDGQ|u&s-{DYtN`%u`Z`D_mGXyQ;+`DK&i6_ zLve>czTLWvd9qqq7Otk<83bjkxUC#a0FG~W7*fr1BYsf5LkPFW5PE>1D{d@>Qp>Yl z8ca?84=~g$zR*

y~G4!zb%5MWPvTwdn{ouLA>mTA*91n)0b8XZxAhV#pM+DMc?8 zP7A`toiok+GIeg8s_OVxWlt`Ek0;~rc-3yT_~l#g^X(RQYPL|QZ{d!AUhjKv(+#m% z3$fkBn!35IencZuqPCVrJ=-Z?69oJo#1kVvH+IIAO!w`HK}<$c<}z@d4#q_RK-ljj zaL-6z1QYqo*sU@ks6c--z!Qg3o5uUV)OR0)m+;~n#g+Tm%9j3O`db^QXal~`Oc%o~ zikL6dLlT7;n1T}u%~`2lE3dof?@L)Rf{-qVMxMn;R6`*TrT*IAI;PnLeM4%seJ8?= zV`{GJS`eZeP^Bk4dG}nd+YGa0bd~_oM7>e%FCs;VvL^>y4=aL|>!J90{y@03^VB|j zeL992=y{~-`u28oh9&JtG=QcH0NW~?AJj0!wFtS@RMT;wH7G_Y^%D6lC!nmh%ZuF| zXuu)g3d+2dZg~H~jl~g(lj7#c2^(ocEbC7(l1Rsfb^fQ}h}6Zh)|>T`HudUaCM|KA zYZm#7;*m#DKF8lU3PH^8bbs`ff&l_}-F#JEz)u0rKS1!lY>75$7D-EZ#=!wXEFEPX6;|siZ65cOXb)+oeuj~RAy+s@pvHV|b)m!6-UwD~JX3L* z`z~!xAk5~H={iEkxV%tW-vrb{l}j2f^P9l1ZiZc82$iF6E0{!SlNL6@@w@?ws2u%7 zLHJ_SB3hsfXgo)lmoSYBo9SS#EQR7bq3FnHEr?WNwjh_p%q49>#5hI-qB1M+MBPW8 zj6sfa(qnQ)P`Vp;0m$9pm9n|`=5+{*n2!6TAu*|j0_g|Us!{&pTHnR0LwG$T7fiue zfb>?Hf$zv5+EU}>TX@R=v|wm@$Jn4mN9*%dR03ti;PTGquSYY_M?sERZr8BR&Q8Y| zk^Bu=(3s0T(Ra~HE`FOs{xGPQNW|$` zC_`~kDgJqeP!0BpK_12cVNi+`12w&$q)4yMZ(Za(0t+Vz6pQY|i}leEmOC5(Ht%&d zv>4&q)`<_W8g~Uuskj&Y7~)W_RRH8z{~1NuG=ke1>tbEe7)D3yKBk82BpLyl^T{BF zBj5a-neXWqtpD;S4A)X4Vk%jU{GJ@>`vY3GrrP7BTvu_Ma4ja?6n$)q=gf&g`|>{l z(XatvC;}lOVMDawl@XsIJ1$4nllOS@eFPFEyuQHIG1oW+)bKda+lQLnACf^%>_Gz+ z0jf&dH+7{EX`To%?I7~$%HKw7PH(q|-aG1jb^6_4y%f|?` zWFDo*Ry$@VG$3|$_A$;u(4P3}gdRF5*U-)oQ}5-af$pxC{8EEwnEjM#1ls9-7t7&x z?osO&?o+QiWFs=k^u$dr#$k?$wSM{|JYp?o;U}v*A!TyoIjvT082=VcKYn+k*YHRWL+WEvMhp8m z%>Py40*b@KMxXea@_!Z+>xNj`vfzM#_Nf1V?%Mu)nRPVzVOrQ4dHnY^K#!KT)A}g7 z?{VEq0U-!05*^niEUu^JU)KvM8Z+y%oY>GHO%zxM(v3)6NdWKHHJ{lvY+&+&=^&&O znClFe7yWie_0j!nR_>>n!IZ0CYe68ou{OMLV=f1W=y7^^r10U3gq{WDKeA|bq+fgsBRv_6?LiJOBrLm>H5G!KBwE4_B%o zYg%(cq|Xf5Av_JUW8XeHsj3&@4b$FCVr@!)$N~%&8yzMsx%OH-Jk$hDMR;2fn8^pN zKWVlYt(nftiga_vZwDN^z;z zfd&)tj@Zc64pRMPN)x2hc~@#syMe^&pq@BbrM4CP;@VR2PKi=lFshYIH45h^M$M8fHcenY;TIEoDlh@nu*4xxnm}*Y8!>5}zk>MJH28=2HZhIXuG=h1O*9tl176oR`G?PG_Dj+2MD&YU znvL>Z)@pPbQ`2E||5&_5Xy-cWFUf_9v7>HquW}7ij1%a@cTyjf-Y=mFU-qoeDMH0h zGWto@dt+L;%8a15$L9j|NVNiWeuBasm-f$)WWH@U6y3P=IG9oICjCIWhDt#@g}A}? z==OeU$U{z82~YvE1KdmAZQ2RMNPpN99Wh6MY#z1aT1SqZ)v)9k8nL5>Bw}Au?4M26 z^N&=H5n{gwq8&RA6VX#%QdwKfT$8D+4SYwbuK`>=L^kMtaeO><=f4^f<#P(vp@`yE z&FB3G=Ah_NNZIfoKxlZoF~Tr$jv-}g3t`=6_>912AXeK<32xOhDK3KiZ}|sKyLQ8Y zC6p%!Y1=IQ(j8=W>t0HT{sZ+M3$4N^4PqZnlDPd=&f^?-g_G5~$o+M=-tp&< zek2p%uTTECymf0?!+gJcjufxDLs#Zd0vPja_ORv&+H*qH8Q&q*+S~|s*sBNcLrn4Q z=nTR{5#NzjArk^(EI@5tjP~Uv660_{*_NGZ9z4X4PD);Q?Ek#tpqZpi zQf$E2W2xyN-GTWoMuPw>8LEJ$bA7BFp~&QrN&qNKPc+%0%AdCEHeZdF)V%`wRJ{x- zkjc(8OdCV-c`?-o)72$|O_p+JMwxvrr9@}M>du1PVd04$xln2;)LdS_Yl%WUh(I>K zlGM8U@mgX8Ym4}Rpa}c|AnI{*v+T&QiTbyCJzsC{ zMp7uq@u2tM4-AEZ_Gf^Vkxr1##q*O0a=D?=ncbkzr^*D`WJrT!Lx+E* zYyjQQB=G7}7`ymhgz(w#j~(8tjDlDKu;+?}D>ewwtk|bNTG6CRY0&h6KlsVHP~PQC zI%>yIhrty-l~p_j#J~3>J7UkjyZt-iU{xk_HEu+aOJ%spd$((yi&LJRly9}g zHF^xifd6nTF`f;tF))>bs#d<`n0A8I<1A+eoppzds9y^p8xOtvQ_Qy8y=x0yb!_Zv zQe(t|bpj&{ zo)%L_-7V}G#?5W#mZhF<-_AyeUny`M;WsBpHuszBeEmNHP*Tr4e-iX6Uzhf(S2ek8XcDUoRv=rh4K{m5xtpR@eEGFAZ)YT=BDrnL2jEqg)U5h z)Z0vN>XsE^7sTQ+m^nJHCN0pH1r>> zcUR&`H5^C$AB4SQlW0-5rJ1&E+qP}nwryJ{ZQHg^+O};w^CUZ~`$p7zyKY47FZ&Pd z*lW!($9P7xmRY%WAz%VLjMllC2sqEyIKZi!5Gf#@c`xaN0w+{T8LG`=-b%6jI<%Iu?JjdjXm$uxrGi@pnD{CqQ}r_wD8fa{J7CU|7j9rE9utn5G@kS(Q#u~VydS9R zxDvX)+0wW@R=Jx<`kNz(0Tokq(BRVT;m0B!6`D?w2uJIm9F;!6!X~^8E>=U3>j&7n zd^V0q!vd&X%6+Ir9x+B;nZ0~PCMsf}A-BiqitcJo{6S&q5t{G~iA(a~ewx8XNNd(d zG-R+i+lI9e5^NIp&t^ZHN+=NaAD9GxL-Qani^Uf&qo&9*h@Gu*xM&TVg6*QjS0TJn z6}A9|st&>+fTXcRyse19A}r^`bBf8|Wp*tvCv?^s3DkMcr%R0|T21 z1(n!)AO}xisT`|I9&!t_-4qfWh}crsl8&$j-BR;wFCTH3PVM8}HHoS~A|f~qrM5&j z)Tdhd&RVD(eR-39(+7ZFPingF$+fl;f7$qBx{ak;uM}DMi-zgTp%3{0+w+24eAkr^ zL5DF;a|-t({(`=WujIgS55=@>7Wy`Fw6FyE6Kmo1#W*KBQsUtZ*Ya0SGQK{T88lgL z2SG$s5i?nKV?E$HYY8PRoef=$!@zasXFfb^*@Yoz@z?#+BJbXdN&R_8i3NR-4!WA7 z_jQbMWrPak?K|KZR)MmhU%OD|uMalXy}@9S(!+ED=W(-DjZwutkk=R()o{uGRP4V6 zPbm%WlRVw0qs_u9mxr5zn345TyC!$%3jU*Su>MzM0*sv4B*wFg8bhmGTjFr|HTQCIdHj8B|WV^Rv03tSR|V}Nz4TKZ7W2lmW=k-eAK zya9*0GXR1v$V9QxL5r7p{F#%j&n5R7u0R_Vy_IX#yc?#o2hlwiqA_+_xh+Gv0^Y z3$g0~X;e#4ONWCq+^wg^U*q7=aq-F2r=(yR*Cg-wsvJ!@?ITCy^v0n&odA__#aJ_O z<-?=;v$caF)_D1m2B$bOy*`~WO16Tv7_pLzkN!l%8nfs!38m=wA8}NNiM+L75q~C~ zh2UDd#sTqD2z$iubi=iZ8yVLQSoI%jV=!&MPGqun#{huty=%(v-A#4*lG?C$^0V7J zn%@n}%gA)`#go727rk-iZo_N39d6K#ei`**ByJD(FT2*3W8qZ0)6N~uf-&a{{l5!} zV2!#It6%^CXs7@H^#2z@(c=GHcc%PTGWY*mcQR;O)bswPmsn&s4MeqAwgdVZK(x5l zbxfs5OGtL=e|$&r3ne7wxcNcgT;01rZf?4gP9Oh$u-Dzz)!n3WQxTGDu1|o09C{OB z3u3S3Nhm@R1x8&7fQ+@;m@s8&vLf}E6D%>O&L?&Q(rG$t^{$NESZ^@!^xVX%fp%10 zB86hBIZEPbIup2~CLzqGlS~6j(5@v!Eo!R26k1vPPQ+Cg8}I&(RW@o77ZMB27FlWn znMiKc)bj`vp3CW7H8Rc%BUg{~{(ha3b-TMdKW^Tyc7l8i>ojAdT9k7ORwPiBa)P28 zqwe;&x*m-B>fHt(>oKSW&&x4;qnZ2H4Sa}Hkaf=uy%beW9#n`6ARdEQV2#LsR|)-B z5NFMrs*9D7vJz*)EjC)#wzz)+r;IaG&Tf&i9fc-+Nf|65be7Lym;_?_qmVgn2xfb9 z3~fm^%*^4O^`I#PAWH57TCaj}O`261*hh9rJFK!UA}R*CsO3q(yWhq*(hCJY-O6K* zO??y9E_shVBZc5KlqM3kRLYTw|0txYy|AfsphKD0N+m9#9%9I=w8%!9(isDZ0%{K8 z9$qcNchol-se z@R!e~tWE9&y}RIfn3%_D75pP(^bP4%-X@U%mk`zXS_YmZe6{f})$j(sZ0j_z3n?Fp z+Cn>uzcmr#tgw7ZxePR^}$qje;ny=s>4TdlI1$;@TQ zsS1Qw<||U`mb%xC%?aP+Y@3+OYMo#84*|bzn;@mWyt5Ovs%V_X=X&A`Zm;#9JO=^F zaMZ~O;F>@Yo|YH%9*4{(V;&)F>Y4%#*Rzm=OFt$a0$>#hS}JRSoKZ;)BtC`u(`qq}x75qRS}jL5{q4fX^2o9mqF=r5QVU-ap_720Rxd*;b%RT9RRHCIo|Ydma@~ zb;7!kpBIIgoVZx$e)fwr?d2l zsr16!D#iK29GMbfH!p;Y)2WUyd&7G}4hg2LC%20N_cYzeBkHE5jY@5A#?ROVgfG&z&8{foGCX8ZGF{ZA3!pVD!&_ph95*0+N`mIh2}sWy?cZ-{(@xH&YkK{`P>yfgq_3?f2S?%UG9UeAVbQ~<%NTd3K4kABk?#^ zauJ-%^zY71ccR;yQ=~zE=?B+pS33xdm0!03I7o?ioz<8f^Zh%Sxb^PBemJ3e+ zV{chewroik)#<25%UV~=}K2Abkrg)$BhJ z7OlT#T)J5%;K5G`wunH&febPKZU>)HCg*?351?UYN;#GV#bLZM-uG` zt(B1pE1cn8ks?H^{Pgo;O9l5skC)Z5&VqkTc=j9wU>A=)>dVMs`?bB5*khtquvBz^ z@?(v_{+Lr(gJNY&$mc7x4V;lcxIGI$p3$OYs!77iK-dG-@Kko%)(7N~ColGavQa=M zbBg0Kgc4W-U;QR^m;wdX$wqX9`&+&gNCYv$1QjYfgIH*_Cn7DOx~;540ZBp)!&a4E z*-p0FA+}$COW|%7Hs#&h@^0OKZ1(hPr@`>BF8f4o6HH`^+~2Zt?Zbt9>aa0(w^tgE zMhW(gdEVIoI^P)@YciP`U>hClbPK!b?nLvKAf*Oj0r#izap&}LBLoqSf-GT`>x-T{ z59lmCA|;%SPMFk1s$(Bqh}OW_z3f=;C3qPqIQv0KJUCxrA(!vVMJ%PsfsQ!|EDVZSmgNI2e7qL~N%gvW)jw!9M6 zuZp`a6?OIayfa$=RI1vAk~sJ$FNyD`?pC6GrnEGBKj2_5YCB6JN~aQsR%1b>IrhUY zGgtU~tgyIJMI`~|*2Z{%%v71VGyuT;6=Sp|OqL~rE=A!EFA|lXU#aQ#<j=EJ;i4L(k>S~Z9gW+|MNaY}Wv|d&8 ziQTcgOszi_(?AjmiB5gz(qQYE5xKa^jiQTobqkoq*EaoxE#aZ?cM7ZMENv~h!@i75 zXQunX9K1HaCkW_GzCwkc4gDc629mCAj$BQV_V=+d+ubPKm0J#PAo6$V_I8r_?3$gg zNATPGr&UiyjZscpKE%0RbdiBDdP%wL$>w@VhGs`Iut`N3$ ze|wrQ=WAj~&1mc_U?NLvDMyROWVrheEN*Sjwzo2;RDNd>;Btqlf`Z9Guqd z6#Rq!`SfvYXut5@vB$?ZzzIvwHvwB4;|_PM6`g=#A&ylrwh-y_0O>HAh3KMPm_;0 zSU>Jc#|S|n7YqB42(3FLY`?nJpg1Ur?Y`nMr<{C4s986r9L zj-3Wnywl`6H=*ZYM*6gXN2L; ze;|eIf%n$Sv^>Hd>hfAg_y&!`chZ{8okrn%+RaF|>JG1Ua{#zgBkU=4&954E7ZpiczE zxES1hsIE%{c2c&$@)`AAK(Uq19ZXh&Hu>VAa}pS+bu+5*g-lO7?jun@CX>lO4bo+q zV}JQ0w5z=3*dDYmGO1u)74&43$zEA8ZMm$vS{+n+!M$woT{xOhVks@etZF0F4Ib_b zZvQ6fevVK$Pdi{>TVv7usCu|J87@!B})$$2+LgdkP4kLiMoY06@IZ&ESOSKlFRvipO- z)G{8V<{?fQ$Fcz92F)g#V)k4^=3dD%4LZ2XJw-?+VzJZyHClSDoM>o0H`}vAnsmBP ziQPL%i_YurY_9GevDkGhqQG;5wN&QXsqo3W8-u%1rjjnDUk`x7*xqx>UZF7eR;mWQ zC1BcEep)F8RS7ffF6gbU4p@dHJggVnxMBEZM4({NI%l(w=UTm6%VDPgEkGSuWRWJS z%zgCBT*2oUWo3kIn4PpRlvf@twhdGoQ35@rX+Y&F8N+CY9Q$;FpffcgzWe!02u3yO z`KL$VN^L%CdSe7_CQlE1_NT?f?QLIc<>90n_&JH#m08?O zSpB2**r@em_8Gbode$yv3E8~c3u~A_Z3}f+4YiXId7Po@%f=#t1!9u0@-FQ`GkM4n z{=Fw)gbwe%qs70quLzK{x3sc3HRn=>9 zbho(*p!AMTHS(!J>;!18le&@U2W@dlWqef~|0O_G`oXXo>Zd8VQh{0*i#sAX65;-mZxB>c2h z)tp)If4@#P?RQj;MN=O#d*U7*k_E%Lvdle!XlpfK3CheHA2k*APu7q8roR*Kq4-S} zNzBE^Mcp!GoMz%qC3BndCaC1sEmLS_tT6V)J=B{dnRv}*$H|r(_kWo-nNFae4>9Oa zY?D@M$*|INNCJQcNKyXB>NX=uW{`{6z8}+Z>0#;U0Y*(AW;q(k6eOfjqL7RzZoY$6 zTvgb5EJy;|$*N8hnkwYzJfH{3!- zsl}EnCZ_#~M5dIpoZrBkz1&qEi8o8*F-=Z@AcOF%ew-A6xm%@=+3c!p!}g<-!-dsY znX4#Zm5Dqbpm$0ZGm|Keq0az8(U>H#!4*HqbB3y#YylQ^tHPbrl}fr|^_(M3mm)j2<2zvv?4g z9aZUkq92yy=rCL*cA}N?F6gW7Op2dt_C=lwYp{bo1WvIRK)QZA?y>Fji!bAs%%k_z z7!0Z-!(hQKPo+M%yj)UMCR-0so&;HI&Ko%I)+qrImi6pE0#`VUlpX>|Dg+}E5HzRA z`97RUz$=xP4G373byt>zby&Rzk4R@z->5xCyA8f?yTot$8|Fz>V8ysL60B)w%O1D) z0VRhh0cfl=8v3$fW1!Xg{aS-8Q!p2zOrRj8M&#m)ak$&7YcXp(#%R+?@^fVr1_oPx zA)S40D0#yGg?5#sfS3(m+q&@(O6g{n&Laz^ zq1;YFCOd}a^-)730N7^zetJO)mbEky#4oIOMpUlV12%MS;td2O5 z>TgRtd9YLbk$FZal*EZ7E94p>h(7k&D+Go~Jh@~7t^(P;z*M`p1I8`Vsj`?w82Am6 znkQD!8nka^qC(U-MIKeN6Fj&LntEgqcJ6P%HXG*L@@tBkcM0cE&O9g{0X!o$BxD8u z@bb!*Ep1&Pr%_5m$}}}dT31cMHk^TUNKh1~jmAHOli_{Q=@QO6j5M|=aNJ~~fHt?8 zwndT`4f~SUeK^hYyA)cMWmpXntIZmxvJwbd!h=L_@@71OuK<$<+v?%xS;p!t=%i$) z2|j+Zhy_1yTvC#oKE-0>&hBU70d|#*CaTsa;e!;ISPfeB=P& zJ0pzL<4x>1l^$YgSEHZ#Lo|Y2cTU21dL+;Sc;1UypCxI<-OmH>Dd^5rmQ>m?QBLYD zU4~qYUro}U5&_os&Cvz7c@MT=G)G!HFlql%@mB*2_~6&8NY^lti(cF$kFR0XX+fxx3J|La zIu#qmPGonjqbJ!$O6m)?jA#290PjsakwM~YBx$qJP0ljqd_1^t>cQXM1(o?CK-)@? zRWNt7#r-xJc5(oBfwc352UA7Cq|^);C!IWkW2TU|Dv=jm5W}9d4p)&e zo)m7LGXELb{}|2^LlKpnodN?#gY*6}&B|Au#%5=1yy!?FJ&nmci4z(?ik?C;mIm>* z`QjI!QC)xsA-TFQm~x2T;Es4`Csi;Nzu&#dko!ckAOjK!JA+3^oMZAVTE+$#q-nka!4kA%{g_+v;2M{d0|K;?_M7BgS*-gq~yw4$r z(E#<-XBXP_@%Fqz)r9_zz!&t!Ji2OQcYUPNjWm{`yQ&z?5FSLOZ4L;Ar&Ch$L^Cwj zjLW&`lz6c_?X;EPy&IX&Pe_02p7ZAD>XQ%iV_E<%Qr`8QAOd@4CYIK&l{HqwMwzY* zWGymRX%BIQVgKqL_LNcVILp2PXA((guSKz8F(d=IRl}7x(&^%7}VD+O$EA_nMBy1^hX@~LI2FcI7L9sNKG*#RhpMi4}kk8 zdW@J$`o0|X-_lLtb~1Eps)hbl)0+8hITN|Eskk~6keCm9KTu}UKiUd@KN?4t3~0bJ zJcER8?DJaLAWx#i!u*I+rJRM!Y$!aI+GW} zVtro{Ps0dR-a?FmbOT$XQci(NvdJskHe^8T_uRhXHVz~kIEV6qcvtLqGuhi78lGoZ z>EJBg-|plByTNVe23t#|`*Yqtat-Nf>B-FQsL2ETPhov@)fVrIW-BxGv~gh;vXhli zA2ctj8YsGN#4=J8M-+iWD0Ty}$6xR|`?(U8(=t^t_tVLACq8l&|2!T3C8qhzOQBGef?| zgz|uM{_InNF(T!ICimJ!MeCY!WK`tiEDdoUO^k`x$f7Hpy7v}>2&wb>csF(+prfjzX74VgNGfaY>BeUnxcx0E)98^-!17Kw)p9|Da!iqf3Ikle#L6T10oTIy>X{3Id5pacc|#4#mq|8!%R*2K8CPvlB- z7~{w_>Y7Vn`I5U0f4wDJk-WDwXq%vb-68X34*^=S2uwJ%c>i=jw~wv-qx0BI&NGn~ zVt`@IltufiK0%*tR<(CQy{&%=q8SAV2$&0qC5>oKtf7LN$Hu&!{jFQ{>@nJ`(j6nC z`Z0*x&}g7h2qHDwAdze(Rma7~ou!q%Fz$~Awzwj3rtcJ7-YQL|^r9*b$Zssd12=LB zMFa+L$$UJh=_~>zLNW(~`L2iv!|Kbk%w0tiz&bT`_zR4q7Z6^)%saw(b|r(9&+P!N zA91V9n8XH5&TyMh!eErzWu0TMHh$$*Bm@cwp_jZ1e;t1++y20&+0@$Q`p>aA96Z|1Ca`7Ppw)blRLGkj3w6k*e!f4Siu8rWSuSbnTG#X8fN0pV4W=8gE2 zNH&MnEw}d_qd2fuRWcRyT?XN-?OL&M{l%yjkI(&Z8oaV@oiu4J5#;<^<_S?SWAy-1 z>3-PxE3V!Kqcj`T zr(Z7*@E13Vtd>(|k*r3I_g_B-#|u6E!_QOwq$(U>W}sv^K;2Wkrb@?f#?an}>3eaOevmQTcXkY_UqQ{ibfA4vF^ZT` z@NQ!e6zA_6cLLd+eJDbqzS0yn?zeoNB(0!TO=liEQ#B0JdBKya`yu?&zcCt$E2){S zyUY|of0Eod-Z;;xCa~_4Q5h_VIgetv|6Uq}BS9UeRps@!cA-1+368rnMyUdrPa7K+ z9e0bZzSX?>n~XK8D)~5K}X4>a@}GjfPX^mY3`lc@EC&@v$W;f@@VsB?7%4TeU<@3ohJU#e}F& zt=!lviFi_93Clb3q+tQS(=EfTqunPTI#2ettP+&cwGA@9Ds zN1=#;JHmyP#BWnz2!N0MLwG{!mGm=&9FfPYkHBu8E-vqt{Fg4 zXga_NrB(qe+?V6H`-@MS7)ygMBmSCLZpmGwr___^6RCetgn@L?ylb2i{Hn&sZ4*h5v)8cI53nUt zy!27&5}5Np@N56lK_tWBI4Po`3wMAW61cM|9l6Aeq=1$EmG435@F+6}aX)$+Kt z4ip?y%B2Iy$j?J&e2sD-pXTLf)>Z(>X#Zw-e5kfX7~?$0%{GB9_eP=+!Db)!p1ZE; z2}WyV+txy8jK%0Ac@OhL);@T*bCNP8xpVM`=O(;~^Ntub1_mIcoj%RRZJT)*yVe8T ztBX6xqIO7x7&BrI1H?t1vN-E?1q2IP&oe@Ge#?Y2sfCZSM}ciE!-*l)ZTvMjEHV2r zaw_if*B@|Cv685`{D!-!uQON{J%Gr`1AO2>I0>aKBiOrGN6J18l(1pGrpREV^^7$% zI>^6f=ZWVA^e zK>laK+&rj~Zy+RDY_U&Fs~9Fi-e0^f1$2d_y4$e$h`ikH*c@()J)kWd;cG=Z5jmu%L;9sc=Slxtk`l(#7f zE5~qCKji^j{Ek@B^JnygFA_-gald|Wt+(LI+lg=f$5P#bd?fA7(L_VpIXM1m++5c@MB#C*F61$;6jDk4G`JPW30_Fu*#bihmvPUY3aHnyzZFE$qg zT6;TDscUds_Meq$p=3afB6WVk|TBLg|eD(**<27xLK)Mvw7&8{e+Sukdv z(dy+ze%|-%W-V~aQYCgV?{#W#%X<#cH3z=G-)TRHwUeRyb>{oC zUb&$b#efm5h1>(4a;gF&CPu^Sa&JlYqL0of4G8^8qqR<=O9+NHvbf48#*Q_qY056G z4GdYdf}R+keTdTnB|s3J9qX5t>Y$h2|i%q_A=0j(dB(i2vI2iaL?zy8Pj-hT_Kb;BV% z#y_A3Kk(Q~>Mg7FU4@fN473wiY=vAeGjxqyzve$O^{a4tH~O&09SVW_3|;|Wa3no! z=+q79<>AXN)?%KGJGa>7Hf;|Hsg;cru;^BS=bHwLWeQJYJGWP%DinH^R%3W?@HRRe zH0}k0>J(FO9x$(|7UJQhFG80@$4ptuzXk-Un3svQ{B5OaBOW)bg^%nLVgY6ceRnN9 z)}!OAj!6U5@)L>W!jJJGLw!tgqpLe+u&MI48Q*fZ zLkD$Z{_u>;>eFHByn_)UF5Yk_HTex@lFXTFe{HRJ+~(-|(IukW_IE|4x>L#v>ry^L zwNVNN0gU8ggq<)#c?4rcxNAuiuP$xiC(|yYW@?K=^24T@|IY9=HHo|jM&bB6VnR3f zv2>tnGf4zz;W%V8i^*o`H-<_iT@b{I5@jGno|vh5KoM*~;)_4Q&tp)pkO*+a%Zn(Z z0FwYSvIfjJS7u!qRRGyk~p4uXk({n3go~$ipSY@5xL*g-f zx;YQ|g0nlY7*eLBFTLidsL@8lCG>t6{L$T$16^OKu%O^9@3X*AM}L|V#fDG@c|_&G z9F{-p%+POhfhG;eq?JV=i-t10g+p1ct4{EvUs)*kZ!xOzMrpDE|K3U`%(%)e74=mh z9j;KT&n*2m8t0X2k$hus5A2jVBl+L}S^cu=Cw}V$A-og*%Wosq*z-wji%wahKD60t zZ@t-2eBI4)rv6*)bd3_8t`}Hoe~S`?A%#94k(8Oq{-UZz^`tbzBY}}cUP6EO=RMiP zXj{qit-DI$vnsiAt7r^)28_`Q_ZqO%)U#KooVwavGwu}MBdKQ>eVcc#o2dYw-2*u^ zddmb+@l6gkIO+&D14ww4g2ct2Z9i0;=7b#RbEn1%HF!hI+74P84gq4*Y+HA0Jen)@ z?rJ#YFnm@NM8Nx}g86#6F&drfD&QrrTa4P`Z=q7D+mamC zrwMxG!S(XKw4sTjn5T*G6A-L#6_U=HPELWWrjnfL(F~U+0l>ha_zW9+14f0t49AGB zjlF0?Fb6Y+eW9BW2|%>cUd*ORWA?NMs%3qkXC1_BD@R{aYx`YnPs#Dj=-wZ)UVPNG z(hd28ZrcL!@O-vbc({`sJRJYLL+$L|H z`LIaZ13vO%`4M(n1)oC@NWa!eRN|PKSRqAy>>2xBwT2WmWYXv4MOc1ce4Tu+`CGry z<7fzasN`+)nL|j(TgKsanw_XjEFA z2&6tWy6QB`t?_D`@wus1DJqz*$i1ZY(Vo5xLX|Dl*(&vJt33sD7%17$SsRIB*K|aH zP<*W{an*7<1-4W5ze7YdRgz0oQ*6MOFBhhcE79(boP-HOF2X8{<=uCDG*PaH5Lu@13t+{MP}Ria*pe zl+OT_1G++0BX~DM@m;|g_^|8uNHJfQsw-rZNiZaP)5<%|o?9knt$-UDK44XbJ=x;p zWz~4Mt~3X)!eO2V`C#rd#|wZh=#tA3`phvB_g!(F?>TG}Gnzq=I%%rq?PKm>Fm9DvNalCxIHkd~Yer&kAH?QlHLI zb?P%A!Th_C@L}aR?^c^pdZaxqRLY@oWatS@96|&w%@`{YcdGjQzWp(b6qihetHd|PNl)$U^y}dXO|t>KB`jv2+OQll-M9}K%z1O zQ7g#o;O^{L61Ify@*1G|yDh#=OD@j=d*Gs6GODR1uLjm85Ia!!9R%$GnHw zrePKSRSt(c%x-R2y<;a~RE7$Jb$*asEa?~BmSTgB-QJ`vd2;d5R^^TP*J<_@6aPj#s)*hQ zLv`DeZ!N5RY2B(%Taej%k4<1u|DJKMCBywV!Z2P?uTOk97$`+=O$8_eGQXPP-{ zcwlvJ?Xz`UltX*+#j3`}Kf0l!2^C0hjm3B^PJ;)T_d)>Kf=nw2nAo)+O0&TcDcDP9 zdUc5WGD6k>s`Y%i!TJ6iC2|7nQFz5`!(u< zD85hop-zEmREvv1)O2~{8d9(?TdI9?u(^7pX5W=AEw|;^&$zxnuTP8=@uH*uAXDhb zQ!Ocv52+AXnyg$;(s1KDW0Q@gYi2xaoNU`wG>kHNqtb7}UoD*&oYTW!2wPz-S0_qA zS^N^NY^`SU-EoqZ&7#xLDWfm_$ZXC~9bQYZffK#qtyMa3kNCQ_YmE{LM`;nlf&aXu ztIpb^-=I=MG&O%{d-4~Da7>4mys77XOa34D0B zd*W67;e_SRZ@|I ze|*i+G}a?F%6{kq3|{EmJbYOd^~IL0>(5f%n*ZzeKT^EZ0e;JU7v8rYSG}ocW1<14 zCR0McTvQFUFAP`2_No!%j}-T%e9#kiRA#SmehU>U#RlTjm0nIL8#ciiy*(ou;LP&* z+7zbOHGoK%J3w7tlReGsVhE0bJif%g&EevA(vF)r-|wMwtShMg7o+ry^Fjhv_Vzsp zHJArruTihU%qan18i8r%<`P#&>ya#sQ$!Fw`?TU&&PGNW@tBU zf(A6}sya%bPq$#JwR1@Lu}V!`?`vIYE|gC_CK&cMmh`ra=;I*;)NMWi*h^XCHK+D2 zUscm@e+WtSGMyKvG`Aa&z*nrK0P))?ZLajO!oa69S4o9rZbDoKFDWdQM%ZE z>gf(T{y;n9zdrj4rhf>(M#cVs|8IBHI&Zo*M?dc^+izR)11JE%FCguIG6eh=k2FQ4 zKW>o$p-26KKgP+b1PM~E9$X~txDOD{L4?97X?$e>PrS8hL&ArLIWDPL7p=2b#BGOX z)|>gTkit$?pga}|Q0o-%8ZZmYmtwW*`2tQ`7J~o}kwx^WrZT~uDu~GpF!)*+#Fjf4 zjnc_R5k+Xu%ez+@f?!tl*Pd7=>@r-3# zzI`@3feQmA&PSx@8cYGOJZbUSDw&Lm|Ja6I=5LQ?U4Z)iPyn;;I7t7syc~K?Q4mdT z+?i_#<8%>RhO%N4NQ}wBNim$5r%eNJaUWe^P^PA91WtXs-VCHXr|((!<=iNtJw0sF zAlPO$`=%P}h8<9*8+6?cv@;vw<8Prg60L&)M;Qr{Ilf73y8{x9^x2+W6u>H32J|}c z$~>vKe60?fsb;TAnk#H()Ni>N3h@sW)i2XiT>=s5Z9-$uZw@w1ur2d4m-Tr_)&LAK z#X_Hs(zFh*>ddb}_lF+RxaGQ|1_WMA`@G3hgEOHD`iYeJJ{_mAFnN$;v4x03MsyJD zZESwat1KEHY#G~b7QkNruQzZfXs(3={8`V5#tA0f$q{GPXSt$_4BUwm&#cY?4Y;yexd_pMkWkQ>BshF-u&7BEh#1t~i;U+zgUi|;e;Wwes>I7Y~ z;$flz0BAb_0O0=5Uz@RotDUv!|Jxn2+6Xi?RG5 z04o54Zzm#8*@u&~{-@~#^zheFB8$;)00KO}v5d1{x6l~~S*+S=(tccxh)-&v2Ve`pB7= zK25r}#8k0o_d!;ZH=BMENL?F+WBWi&#k666PO|0Q8Bn;C^Fd0P4PLt^9`o<9XUA}< zQ>y*V;xwE3V*f1j3wW=Tdq*}IHXu+}l!Eo5rOi9dapSBm83t$FJ!RdKw_dDC_BgfMA(T!6`STp@gz@8!ng|YqeQE~ zTZhJ3+it)7KcSimukYA$z$$ym`URhXom&IhWde%3S5Afx6b10wdgC5tr~nB@!&?ib zYq`zlN>5e!$kGGz{G1El!7P%b>csTlBf|V|lXAZQ_Hlv@o4|88e?mhuRRl!?r4ll? z&BBKWoiJ?hl)xO8Mt;@gr1VjC+j-A20P6(`WK3sjTld6b&CR2b+dnnFlnFNU;?o-e zpGj^uq+z5dvP#54kQkyAN_(u37#fFN2|vYhzNtHG0sP{wQk*@2I_;)FvNt*B8IeDk zV~XT9U-ji8WaiXSV>bTOr&Lf4>$XQAn7wHc&Fy|8Xr>v$b#;q4OBgOb;Iuj=r__t{ za49+DR$DTN0`!xxmgvCfB~bl-@Ed1nu~9zsl4J?W0|(m3;AU!c_tK7wkjt%S;O^Rv z2Fu#H$tmrK1Q39G?y`_vs(;z?Mf%ygB@-MK$lk+^Ib)Ael$L^Iu%oJ%^meA$@3|HBC{NGDtz`qMWuj4uHtsgc(R!}r(fd5e5rPlBOoMFTcyW=Pn+zIe7f?vRRm_FSDA-{6 z`e6Gkb44ae&|=$R?iAPbDO&{FgCglIEkC!{`q!$HkZ*bxgMirWY-9_IfHCljrZyia z>?jDBRFn|qBN*lR_E{(_dX*#F{7VjwKC6;DE+@%v>Y1gRs6Ur%_Q}@!mYr(LsNerH zs0TMAsJt}@B7JgMg5i~LA1`^!s<*_2C@Kb&r`Oon7~(!99Ui_iTl>V&AkHXZ;2l%~ z|L=!rmcD-TmR+%jTHI$RCn=sp5{8?4n5hgFkAg69?$mUVk)$R6yuiQp5%$!mI_aR0QYS+{6^JE_mMjpgaRJn*@5C)W{MyZNP+jx7E& z5^?pH(i{t?1#EFrOPXxrMHV<9Voc{e<*6J?`>c}Kjqi?pPTgv^;Y6o0PP3=cJRa{u zgE!6x_M=T#4}%5O7esYWOs`PCun#%1*MQcCo}=3NcH2msH?tEs-NlJCBUEeRZpt3j zK=2cR5esKy`z|Ho<`YOL*gY`9czj#ZFM&Y_q@%s`|8;^>j|9-!?V)OJO= zj{Y1;EF+Tj6nJ}Eq3Y~`zd9-SJwU-H$ zWU=3QsU7kGc+=Jj13ig#+S=2Rke#G9QzOBHJIX#v;-DT%-6nV1jnLN!&n`w%kHdJQ zQuC4K8Ahm$pf$NJiJG$5FIl}~+{ z*|3x|-qklLFm~-T-}%`v!JvVmI(U9uetoPjr|4QE?DcCnI0l1NbRl~O9%`@*GYAwR zIRZ5^4ey)$GKSn5|5~$U=UPauEn?%i>irt$C^v0=9QLY`t#dEryGwo_Et-!P@=G1g?(my>n#n4^5r2#(6ph5UrELl01m*mv$c0nC9zAMx zCl?=TdUo{hxr~2467(izs6fzIZ8M*IUD%0dlVCE|hQYnD*BVpE8 zC2+v@b}p^Si5obXo5hCd6CR_3ji>rWfS4`*HIFel7Nl`po=wtoGs`>^8CEOkwH*{y z`^qhbiiL>~WemjU890+=QijWKmZFAkQ?pdiGxcdC$&1h-Z$>L>@usj&jD@JE()ZDO z_@htH=nJbFZIyCFgu{`zDl?L*x6p%>(HayS5JguB$6FR*rX_f>Xd|`t;vqWuN?}ZM z#DyZR2=?-n+p@R{>zv(=Lq)hMtl;8fr}H#^Yo)V|M{k$|{rD0EQl!f=7tW&Xxg9kp z^f$tanO}_{X?A8bs&RRmbVXQcpy(gc{A)^%F9|=5TqVb*=y&5tJ9gJ_X;G52od7Dr z>1mO6Bzmaaknjzp)jK}1Cf(-sqHm(hr#rBbMWYG;kKlAyTq*X09k1$gbP?p#FU#f$jOp0Cz$*X%CR=}S=bZ~oRQ^EedAA^eQ zGhjlZWFgp3A;j1~pia=>^njn8go}xLsjy3EK+-emBsH}lxN)#1Q6~odGG_II_XuFF z>%GF8_)Un);y~&PZi&PzZ9jI)hJ(B z*&k&&aEcJxSF-~*E$j2{(=2l=a>4Ux@xa)WEd7#{HJ{Eaf26hfnJv3>!!yzlZtJ^I z=AoogCn3R}BU7X#x}%cw?U?)0px?ZarD^%9Q)#b2>pW$3=K@7p;9ClrKcTSZX}#B8 zB|FK+;8sqcMz)7+)4c)YQoS}Yt+>i(y(NppjfU3L8z|;T1J1`vZ=Qegpn&^E0(1&n zRc>&gY&@+7Qo!pRQ`9|1;BZ8??)x54vNiFc1m%+6EeA%p(#PXD^x9K!TAA${u_RyVB1 zqp~4$55`)*9ebPra;J$)`gGf>MHfM9k4iLgfzGetsh`1ejH!-N?sXO8E=Q{6{bXZN zu;jQ;SV($0iqPiEc*|52w;hph)5_G*>OvJIK$PRNWM4qhJePxK<3o9YQ&dLAyGASHeG~? z;r1+(>k-&{P2pb4kSol6&>GC%C=;*?b)`w(pJZv_W3Ta*OWx8MsmGg<8+4;?m8!A=wU9wI0xkl2|(M=hn*3X0a7LbDnDbmw(p#Xd<}YA@}dO zXMq3?m+Jb;etOjl*Ltz2aQ(iZdFZ`cw&B%TWHALBf`TRKVxDp}Vp(<}v2&rG5j8B;))Z=`&jjo%At)vN~F|xDCwXF?4b(~pa zx5*kpm_yvUj)*fZm;=QOVgm;$?2j6fTHX}+J*ON|n^$}gvf`?^)a*f+Dcc`tUD1^H zrO`HKLGF<+FyJ=S9n&>dyRqb(D1mJDkhlE};SbRDeP8v`Zn*gbX!pgda!1b^&~_xh8V?s%GX>W5dc(^r}XEcvXmeflF;Z#IGdo^6VT>c^)yMdtaR7oxU%f z+Pzfn^w~o47wg^$&g({87jiz7w*xn6mTHKS$aG~ro5$I>v%kTx3eS%z(obDFHPT!A z1IJqB9m)pKNp@$jG_Mf6q4_$7#h3Bu1YfRw4k!7*#~6U$S5qC?8JEbeIl={gNDZs@ zUnd*$uaE8L$LtXTQ^MQ#d{c& zOKgDC`SD}o!KT44WV^>mfFA{ssxUOnjeeASKTK@jSp(MvOuIgv5rnc37rPU{%$nVQ znD&5{By0vR{5jLQ_XSST9nY9#0kvoU8N>Z;cyq0c3#?pKckw$ioyp^p%Zb62KL{9^ zAIdh24Z+YcJE6^2ba}y5LUMOI4ajKiMu^vZWc6UWHBY6y0ubfOg zyNq_JKz&rd5Olni;i8$~_EK_69yqCMDsU&^4)#Z=Dy7>JY2Lj#DPGInSOW}^Lm zYIOb;OHf$6Q)b`79xnaZAh~f89+94Qy8PvMVjkC2TA{)C& zH`c#BsFDjLLALB_YWJXxVhY z&gB0f?7aJUJ?;-yHxUwbD5D8(QfP?43eB~KxU|hwT-WPMVlwwvRu?LT$ z8smp1j-6W3_Z;a7wtN@^BQT$F&==BwH4qKV_h!}Kzy+Rr$a7A}ItYshm&)Slx=}?M zT^Z8)A$d%!Y5uT1>S#!!)N)_7@;c12IPe*MpJZ(+`DJ?x5~Ba1 zGx*&ZYsc6)gU=ZS;evwdY!rgo2|xNCyTlEpeDtW%5@+X@;vU-M{JTkf_c zuZa~fq5Gf2qiyXdZA1B`#nyj*7h2+fvdI7ArfhFws&DG?&+78u1oi(0sAF&1ZH+u{ z`~4US@dp9aldd>~gk7TB#PN!xlu4a(m%v$N5t>yTr2|--ifI16=CtceSuLZ{claG| z@3-&HGD#;?k`i!N{LLV8nw8XX+;A^Dm*nC_px+nc-)i-7oqQmZ*6a29s=)prrI|E4 zSd^MTnLnHZ#olt|9}NI$R0KTm{{6|*)dr4I1*eRS#v1eBf zg?*#FP@mH^(d_r<*wLogbh8h0Z>xlBppz6P7n-6GQWg15lGvgfd`+Qw2+!{1_b|DBv~YVMsE>@GL} z6xO#gNUMPiE@Fjvd=G;O~eZR6tn!xLbW5f(c%M6cR|sFc};pEjSa#vYCpkP2iGVv&rkh zb;$6LP9eS|MHpI#9UMy>QfZ3Zl2|MLxbOneh#;f>o_2~%E9OW#^|KNrCyL0BCVui|3@gp2HDeqvPJ&yn<*2D;1Lj#HA%M`=Wl61KXBR`ft^Uv`ed^|ZF zM!Ll8^z{-kG%DgnOZzvQ_BsamHb?M1T0FcmjQ`B5$Q^IUs9Ol|M$`XBVE9r0hD_J* zBH|iIV%|qtwWhfA@RWSdUver{fUl&S&5<-TqZ!f@ZP%N{@BE$Q6$j zrur3@uUv;^)$MCMjafXsG(O^~)SOli2i=~(!ertpYF`y^{GcQK%J#{{IO>a!*rfh% zgZXHH5p~ziFeJ}Gg#!8}el!bGVvZeYHOtVIQ!CSnaar7~o2i)MVaaiQ)7JCzZ0u@7&@9EUJ$&uREk!?DBupsY^(T$uu+|)I99Y;iF zz6AqN!!GR98T#1sUSdrGQU=cr=K>_!Csd`0GT??WfGWH6L@%QeE?9*awMe0?vl_ja zS6=mBwYmr$aos^y_b_2bB}#_rw!|v|I>YlY=)ke>aD!oo+S{ZW#JL%JbUg2lSz$iG z|BU=X=L*yhtKB;eP0@h=wpKScQ7b_F0^_*)d$aYhK9;LRRg{1qGH#eYL_ni}7T|jz zx$tAZQM;=}%o>ZuQ#rM!+-%s6kgXSFfX6J_v+ZZlAI?GKc6Ljqu`V~tpYE; zhp>a^0X?7!2@6t4p?%XrShTe-QM3_U#f~P!?)%pIi>wTPf=F-lap@Czck$3=X`P&h zi7He%zuE*g(DB6mp#7pmZGvvKR540P@ZRvDeTI&tM09P!9z0-=Jk~H1YaBl9TTEYV z<)BENLTe{k=QT^g`0}tzkNgBS5z|J#X%T`%mG`Pe5`3a}KLEWycP<0`fN1 z4U#z68xj?zm^DE%J86Vv8`GC$Gs{8Bp`qVJ0>^k^KfSNMV7`9*=hxA!3{8N46Rj#T zyGrgqr>@_rsJ5||{*+^H!kA}H;gX1{@Y=bC>Rrv-HqwpmSCS2nNCw5%Cd93Qi>DwQREdHLXwo`Vm)nXF?k-jJJR5tK%D}yqrD1 zU#yL>GG7AEruB4&e-V3~2ZZ-?b`P=KGlxcXV~V)ENv&A0k2A(PpOxyP;0nZKF-Z1b z2WbxC@UXIVm4{7S{dIEJ{q57PjrtyWG_T|p zAOz^qVD+1HRw^pc4!G;EjA_W;rWL~`&{Y15&p!jjZ|Zx(04F5tvWAlXI4VUX$)m1& zQ!_6>t7F~5jz~8-9JmbRFY{_KpDW^9a!I5sOJ+sku8DmrqL~EPwq4zI)hoD&m$JL4 z3t2f*I8Y@oZas8|+)|rE9B3D{FKhH3=Y#IdE#o$o=AD>iF)gy?n0!obAJXpCdy(xV zO0t#)_=zd-d)-7hJiE;mBp-~Zs*jmc|K9Gw9`Pk2{o_!J5EcMtyPI;OP5XbG4C+*%gA*br6z%!z+YTpZFs*oDzU8p2b$9VPe5W8 z)KPYbF`52s7`Xwdo-19QOrhG?DPD|FrmNJC@-l96gNhwTf7glG_R_O8$cIz^R0T)+ zuw<0GVFb~Hi82~&fVbkN`L2Mcm7Q66CcXCzvY;W*_PTr-q6%$PNC_;24CBz`1^AD$ z&BgnL=XcIA`#=E_ZDS^-HWH%U>a&Pq%HFWtZF;aeNsmdnuDX&ve4`rs+&{*BG%_O+ z@y4AwK)dTzAIWlQVa2yMTr8d9z#MnAhHF&pd3)x<5v+JoxPN^vQhMEFhvC!%%wE~6 zGwC!_GsfG|;R3{XT?5(s^1b>CgBH#EH!rVdxmd+{tO=IuqP{(o+{38FlTd60RN0yD z0>eV+fp`2*ji=XXFRc=(j0FH17Lp?#nsbxNY3%7{91 zeH$>A@UQb?#_H7<5>ZNy;-vXwRigB1f$Q)!kn^;1&K;xa6_i1qJI4R1Zd8$GYd^-U zYpcTp0qM~EpI{qPTO(5wlmC~dPitx0AGV?UzSa_Dg^rb7x7(QWvhNk7lQrTJrGupd zl!&0fv`TC^I)n=nJW3aLcbpNV<2$&ZTRs#8$y&s6F5T^tMh@>SJ5Vm0sWh`vt5B$C zlcF?|YQaAbOq)zy$s6G#J&VC=vSTImFAK(G)#~+YdYB2sNs!5ZvCOUo9$h_5rv72M zCo9NcKsWs+@)sTsIJBpfRBM`6f0L9*l){~=$=hM#3ixB(m=;G%4bpE+Sr*SW{CZ4<3>6+gD z@7`0lKu}-FArj18C+~^E@sb;WE~;~tj(~abWjQgK-J|HgO-jD9uf;be739Q^lTQsq zCTpF@O~+SiEsSNx2c9V}Q3(1Xn^5a95G1ZPR9HQ?7N8rh7zS0-2Uw?Ssua;`Cc@oO zugGeENIB%)d;c1hVuViQ>j;I7raW0absz^?DQpgt_L5}-8a7S`(#(VM9tk_YN86)Q zJO4JG6)YXXIU*-}s?|5i zC()tcHiK+U6FP-UfWUy$)R|GI5fO_>%fA@+m2L$b(Y?)XzK_Bj=Xt9?6*{R{I`5S- zpM#?`2x>BoTmiK-a}Xr5K{J(k2J`}>QeAuTSz56Baz&0q9;N$ASVHo`_D$o3F*fZs z#jS!A@H`&z?VM7A6@UoY9R-e6eXeXi{V4?A>MuEQ&Vz3Bt_Mv}QZB?)Iw(M^_gezR zQ=PG4DtUko3VWbS$?Ra$ggCdLBTXic9Nmq^62#a=>?aBZjI3}0$P5w(p=!Rdw6S9n zhJw&>U~~}oKu{u#Fn|w7VZub249F$x3@X@63c&mplvva1Ty;qpgf7Y$s57k?oHr!O zR|;V)Ny$)iYh&E< z5Ha3sve4r9-J;;)cfors-DIN>=KMiOIT&=f38*`+Dy)&Ono(>?e^;a2NxGRpk1O#g)db21 zPCqnm{2-0G+iWjnfUgHKJ<6ze+_+ZNg&dQF&)lXS{sKS5vTc}RHJ(oS2&#D$0UFiu ztv~aTGbBaWwXu#w{w}qgMIp)p{EEhfj*QZ=D|FmzqC3qJhdyqvN}FMU*p8-d9p8#J zy8sSI7}^+IoFyBEO8m)jOn=FT%qcr9tcjzaN=jfp3teM6lJ!-SUVK_k>;&jov}i|t zGLR^&Eq}yo2$pXaDiln2nD3r6eFZ~OmtT(c(QT~t=U%`}$W8`BKl%Iyrh@arXk@D# zTkHeaCiVq#L6B&VO@iMpK(4DG*YF;h0B)RU2~~2F2lUp}%LpUS(|oOrJ$aFB+(89% zk~Z5Ckeo4p0{-FR;__nFjNAz0)oeo1us+!N4vF#kbU$lDuL8vKF;$-kI5Mjw0RPTz zP_bY#Y0g%P4P~~nCq*xl<*Uo?EV%K^)ZC+OGr{kFiy8w8iMf?4we3&a7P z;v`p4Swg?yM0<=bgQM*oU<4wi2?;9Qc}q1v7^K@jhG^^d{^*%M9nc>5Hj#k0r4!wB zgN4-<`L(3wFLAyYAzPh50XJf`_>F!Q2o5`8E4EG7fr69VLH96$?xg~vtsCX`pf#d8 zj_yi|ye3Qg+tJg>>l@vB9zwh0T(j=J4l)1QF&ESfkLO51GJb$kYt6ZU zbp2w;@98mTLA6?(cg9I9(+vTt=c<5UJ|#8m0`W2d724K5I9h)KHlvmbQ`o_KnwFcC zkGu1X%OsW*!{W1C`vvEcVXkQ>2Xm^0=(+2;wl1P~^t^UcTSW#WC=I-9#)`5uH?}8x zH^>AO&-#UGTkE&0&-?q;Ct5111v@8Qu3=cR09>GiOWLPw}?cljdSgTZg!s(x%cWOOnIQ-|!cUMYWfwaw!wC zYRWqj*u2zi1%1do>Bxti{_+ixg-EOC4+^RuP|+nCNS|nI;z(Z8w1d~BCaASo(TOhc zgIMe#o((O14zr@~!EA8hcaUnwzjT%uE%dZcNb14iw+6l4!!ZSaG#1iv+rY?Y&bW@6 z=pI?l(rrJNbKKGcYuKa4v2mm#Dz@`x*4ui&;6i26hLOx+)yG6))48(!&vQ}6j9XuX zu4}sg9JIcz&Lxo}4nX4IUunbvdCU_JXrcUiJGoQbGz##y*`R$OLu+1-OK83m!VKcljoV04sjc;r76+uVe-=4}g>Z9NyieD8!% z_zaj)#vaZUL@~%5G_Y;FMmOv`)P%dVpNWcS)ba~s)n{&8;#Uu~8v<%?${<#l4-UfU zx=)IrnK%4Mu1NB3pxEpr;-!gJY1E1cja)HK+-`f5yMV+S*ud3*;)E5Gba-IZ$*e_E z|6#}yqsF5iMw9bYsQazvT!vDHo@(wl#=z{ET_=I2K+gy5RV+^+RI?11hp7x-dH(h_ zbx=@WVh=mxYPGjkG%<|bO5S#MxM3SazI8H{Ap?qBM^Vys+}3qeM_?s(t&vJ8@46d< zVGUkJt=t;Dx6*uW4ZiW`N@sNUxlbn?@{kXJo#+lxCYlW1)4M^y!!FbppMgEE&f4pR zlQ&ScUuBj|64S%N1l4K7X<5L^8)U(Bfr23OqzNh49j0kcp@CT+{?XI znh-~6$s%?2_uC*mwAJC;+xA2fLz>>s-Zy55QA>ndw$fAol8W1EMJ|Lkf1dZ81jSpB^o;7{Z3 z{b7yQ?9=%|N&HJ9rD@jMEMy{L#bF&f_GQ7X+&24X)R-TP^r1>(XXve4wDmb?pBe?c z&*T>r&P7M~Pow9V0&^DSla}ZqZ5lf*2$a*5^)WZBPo|MHVZz_|)b+ge5&jLE>_{43 z)oIec!I05>V-_wsUs?|GwZX*I8G2s?``PqH&F$^wY*ziq@Gqpawk8ogs(c}{!0TlG z+xl95#eaV2gzvffb=_7S1j715L&Q(n`*|>CkC(QQu)Ab})shpt%}v3Lkff`zwCR8_a|mG5wE&&_`!zmhs!6I7`c9O^=k#n{&;m!dKm5PqHX= z`-tT+=eAm{)mPQn;~a^}Yt{@5q z7&!lVissiP(fMbOKx+PH^CZLmKkLo^Bk}CkwEb_1=Qjb-kH4a#7=umgpq~%Z9*}JY z!f-;9MyjhHGE|e;y4Vv(gvtQ8Q7w zf$D>ms7AY)=7<=tARias8_|V!PJN2vf*Ri`9cfnr;E2ui0XF^EHLLW}G>`Wl0b zVNbrXz(svm`TaeOw@LFmR}b{30hf!JE7jLNh2qH5SEykfF zuU#%xP)@-_!xMnU*6b}~-3rP%R}<&9_)k;1;;P&CO;})T2FqY$1vTcj-_R#$3e8g!Ru}eT&vb0dxnoLUxrpQdC_Sl>;xqR?b6$o)ytMSo4X5i z4Z$$d#?7ag14G|C=b{v6rkh+4h4n(St&v7fFD7_}&029Z_*i>~@U_{=(kL)E+V0Vi$_Ky45TN-=hoEs%1+M&+F(J1{$0}q9f{w*$znckn)jB~HP1so z1)mchE<{gGL?DEdm#r9}b=_s3<5u6S;!z!KJ3AQsT1ua&-KOlF@cDN?Vvfxe92I7r z&Dl%VQ+U-tc;kS#$b&x2MvyOUokoaQuB9(IZ5kP2}c4_t(EUQYyV z9Wx@1)sJL0!Ps`3NOv4RRoiLM?cX6lc;gNX;|X-+G-->uXMIAcC$=59LabyMJniYP z5x^)K_Ui)pfOJjYyjmA`5$9MpY78yBqq0G4$y(3w4Z$HqObZ`^j7UdU!Z_l6@0{?p zh@c#&u6TxCcBO-9RVB5fyc$s43{lnhjBvqA<(`r5k#eV; z=Qh1F< zCkyQO_c~)C>G}EJfZ~wKaP$jk<-LwT?Wj+cKGdn09{^e4GDL5Rz2$rZGj!0|rf4|Zh{Vg4v9 zDC!A)J|cXwfA4mYt-V|my4iLhL=hIU<&bEVn~Kq?KPA#ZjlM| zS^rbvEW-wjs)%mWap-qTq>OxYMct%>LwNgu{zCo202xG2p1lzRn?M@(NtEi_g~&!P z&u*4@kL^^{cZ&}`IxW1RsF#U7n(nLLONlmRh11uhP93I9V_0hKKt++y09I>DyY_p& zJ%cWPn*dxiIIza~J~|%+bnF=2kKv6erN^l1PC2}D$1FGlxxE3>nVHU~Z^O_m{!SY&5ho>iL+NdrqDQwo$ev)kUw$54LcVSatl z9UDTH#pH>i`QL)g0P+pctQ-mu$pixg>>_OfT{TYh7@Zs3>0gg7f|T=dZ^o;6IKFuP z)R$}iSNBPHxWlo5g^577YS)R{D~!~C2g@3+IaS4ylXm)cRcKuO#16Hre4$I#3T#s);zrFN z6}mXYK|#iJ2Qh-|X)GAO%r}}0RN>x+FJ#Hd3Vwt< zBTT+Dl#3oiDgRC)PzC?xF!|{f+J(@n>{5e+K<)1hQq*!s_ca`zU_#=$>o+j3v5OK7 z@K1~un{5yRFcNs-+`?n6kwi|0LjLERDO!a@hl-Cj_!xgd#IISaF@VwwDAy5hxex(lE}shPg8@;uxYVN<&=JjHC?(+<2ICWBio2q zmW)AZPiy92E;py~{dxIZA2lT&tC-nLh-a_0kH~@!NOM}LgPV_%RzjB(o;FG%+00excWOmyH%q2~P5~r)OgYf1+L$*}wR6bWZc<6y27mIhml9DU);FQD9$h>VOc z8RY0Ro~R1Qnm0z&_!S@z@c(&XL~Q;H9N;$N6dysz5hT3>4w0|u0vc~_S}GVcMA|IK zk#a)@@ModF1H-XhVAr?^i488#icCF-nBbB95jt_zMQM-}cu0|k&me9W^LjZydOLCe zAnfADjvVbap~1j7G+HBZP!aL_-;%d~K6(`0?bgxMw(B4Q}4=5qH$@LQoA17Pg>8#^tocR-U{+TYi>;~w+NcYns$ zY~{h%C#?)!sxL51q?EaK+_HsUi-FyH!J)GMet7t>88kh^T7Yu=)<8I z7XYIrbK(Y_#ZrAk9EA{yqtRKJ!3TQnBlI1fgby~Lj1MA_i{O^-;a9AMrQd;)9VX?9 z*G3eRxU~Bd<^kkzLbgab*1R1D_Q^`JJnQ!~5D6dPVl4+pYPu+r%<z)HhH5yucI}Xg`4V~}@bKg!dX>5bm42pqy|^7*%NTgtA$>GtJK9}RaZ9at72TN- z#3ED_{@v{kCBhT+Dt-n&WDF;==lx=^qV!XFI0Qb+@c^p86Dc--uIRxaI!H9&=AZAJ zH=WSFwgrm-?2P3B>6i~Qhn;0Wv$mL~FBdj3&J(_oB|PV$LNS2U0@Do@~{g4`cUa6=_`p&q z1qL92G9Ra&i<5>G%LkvD#9Rgejd$N9D?7A#xgGlsYV%E$8_Zu@Z}sQscMW!(!ht-M z12O^^TMH_i0!X`T3!W^i9-0o8b2A2?MYq@9(_5Fbjrj&F<4K&ZCF?6ZlDzX0jQu?^Fmlxd`5TzlwHN=+B zI-#wkWZz@?VMB=7smL<;awS$FJ-w2I2v1pXUTMG_+XR^&Npg_oF~=gu~ ztfx`F&K&|;4Oh6_yT8G3wNN~dAezTr)B-v8kV+?~&EUP7is2TCYVt-%ashQx;S38m z+jrmF|^H2Q^oup~?La_++a@-4Y&r#wX<@RHf_(eDQslNL3{(+XQee=*CAmC{i zR9Qg8*oYQ)<#jW9qbZciSN7bs?}`SNm_mQ{gF;%cbqvS~Yb^KZ!QX)*F=@Ro`-7eA zIh*L5&UIQfK-Fd2a}etp`J9MS67sTOh?ITfl}=00qI+A#)3qB+=*8pBW`3i)cTfwj zztD)iAP*`)2rLi>T~E`@8wEf)O=e&O#KX5&zjM}Rg-L3GI@70@zq2&9VmvSnE>;^$ zIG~J~_8*mLbLc?B7VX^%&3P2Xcj;Dhh%pi0sWwW6zX!=UY~d6C8_)ZD^l0?wld^YK zI8#KXkNJ@s)OkR}_;w4kt6#3+U;;sy1-BuLtzs~lErdk`AJP}1r?VEwXv^&z{rU;q znzl+$!isEDgsTLI4^^;!7eS}}0Q$=5L3>ep40P@@$xxJGxTyO>sB7<>?3tV{yp!xW zFyotYDjOYkdyY<8oaR=Q+kX1Evhv81)|oW2nL3H?%Af49tygJ^-Fbwz3Of(|t<=N# zv-9>;aGX?p>Q11qjzYJZg@>P;QzE@GBTMuBzInBLDXo_h$S#vR?tv|vIrI@(1G*Dk zL7&vUA+a8_bPXW(T~Q4qY;k8VclYDjFmjGE%*a^u;~u>@=*0hegH{kazbTmXfFuc7 zl48U1b-(xb+MbErTMtUrwMDgaMDK4^%Zch3RU)_g-Y-05nQO0(=mm~;BOroT)5i|p z5uOgt+Z!hssF_*Uw*~%AP#h~)O@{JFcbxN7Lji+nRPDRgN@#1ve9$ zPn-{Zm84Y=(C8Fi7vk~KQoYudBmw%mISnU^xh@?ii`e<+MDTVwc2sr7(3slC8BB(i za%IU}z;dM8%cYDX{w*j+1_}!%BJAsJ#7$>^F3~rkxqcFbXZR5*i=BS0;%-yI1l_k9cgO347+<^eNVoGN>38CIyo8wMgefCAxA}<0>1(V?J z#@R6rvA$AI@wIVmPMyMRAcjOCB#w18BTcBItBcj1DUt;CH_M(A^?Z5(-Nv5)dh=AK z?gQ*x2!z!CX}{pco_0CCXxwGQ33I42wXmyvf=}l)XWGN%6(!A1@`)$n!3L8b%U26r zb#x~*+%pXLK>UynzhVexgZ_u6*yk`UU~<|fEiKzNji(o7Ax6$(6@xT8J3cDA5VE4# z(JoZe_C9l)?N=xMNlVbhIRyE{?+$(?O>IED7D&50K3)c8zHTb2q;$u!1dVym-?78p zo478iyJ9;kvhkcA^n3hzV8v;50tnx0S~HkcUHrK|XgYw9M)<(H2D5RFgzURvfjxh; z!}Wp=fbX;Ix|w$F*;02bD*R0*g%egZdL&RkmI&H9C;FPv>)lNlnI%_Af3=`!f1(GE z{81ldqYQ1rsrY+c*P~)i59X4EAbzLNDN^Ohq<>gtEtja>pUl{Od_1d!u}KmfP5*5i zB;u4%0ZFm8tgaNq0ZD3`4*T`DklwG}KITD<9@gCx;VM_MJ8POO`XM^5wMDyi91sj# z={c*#$5!F~h2@fK(|TCB0VtBFGDoSVO7vq~aeoUBEL6r#^y(Mx!uD=Z40u~D;U~V< zmy|lGTxe}j962u%`=i+aCTcaG`MjF=>m_{^HrdnauSzmc?7dz7-CG^HOYN#p49@Rid6;^%BG3%xh4Y>@3|KyHgf$dU7cAaZqNDFr z=6BPFW`+v3G(Il}KAi3NTQa3QlwGL=a`fO2h0B!@-Eh{CDw8Bx=cM;M7KvHs_rN1mQhE8vuFQN z#$#T&_}A>9JZFox$Pf8{!Q_X=px9p`)Me2kU>8Ic$RTT*1&2Z11SlI`h>jxLOX{#a zW(+h@RVl2KA8_wa2EP;tjfl>e`S~`LZpHy~CH|Y99ww6>t~F`-T+88Y7a^Z=WJzsv8E~7@ zuDquI_6I$7OVpO0v?ENXtEIRj^8cZGoJ+l={#DpU*ydOp?jIxHO^((+cZ!3MvCRHWpA;gf zI&)jw&UU$e6|GJvJ_8SB`7X~&n-mmS=RMXis2R>=s!0zA)`v+?u|S@6;Jy89 zBal057Oak5WI4Yk@yG_Qv@D}-piFal1c<92%>WvB#f@azOqey>54NfJ3pM3e6*%BE z31@$tyYFnhV8byfV1~`(jC(7p=?u%z?rL!PmAe<2!;FtXvN0|%)BVFv5;zpoZtm6|MD{L z8hk2O?CtVPeS&0qD$@O80=J0(PG+CHSwkp{zW;?$&Xkf&KD)W+Db|FfA}3|I z$j017r(PX9V%2t~tKnM0xPQgzQA~DwEz|}t0!4O%8X9^*$>cS0@iTDs+Q>V6=U%_X zvMr$PgGbU?AlB9t3|q|2!MKu7RJ|CpIa%*;kk;kWTbSpH^(k=J9GzW&(H|OOSwNg3 z4WZ9FYJ>yPQ)_rgs5`AIM1S!znb!FZX${4q+EIvU3UMVoWN_isq_Xi&_KvnnuDk1*Fo2y&mL8#oX?4%hEt#pec zzF!!n%BbJ(DAA2lOJ-BWo&9l&Roq9nG{zBA!~C=%GfI+lTEc&#w|kN2JEQ1SsO}JD z>A_M!Lstt#jUCEIORlS7g$qQs`HTqbf%D)ZY3 z%{m_J3IL3YTsfR2x{Itt!wZO9{;-7$_UUgSY&w`hiFzN z8fvK5VHwk>6mH}qoJv_^Xns?wzhL0*gC}S!&QdV`ox+n>NJX(MN2u{^SNZ#D{VJe2 za<;nG+qTLcmnB%Jnw|RO6m!~+3K7FN(+d0d;D~KWo2z@gmQvc|?@?;*tg*4)s*av* z--EaJ3i;0ZVxwe#pFgp2d9C2<=Y4JLp>ghqBc!;2myS2p4;ju`$UNKkkV*qll&hDE zicdp!7c7o^6vdJ7IWJx|jJ6-)#4XDdSKGfF?6+n4R?9R4O^cM&7{D?*yPl87a4Qa< zE(YkDZJXhwYu18T_pQG546=B@7E>>kyb`O%)w8X$}pH1TslILa^M#ur}l$$BX4 zB#kJ$kHPdQ}KQnxOZvUEf;XcyHm+hR(Z@o&G+r(@f;(Q(qTZQHi(j&0kvZND*Z@46p$ovL%HR;`~f*P65DGloIg_kdb@ zc#bMgOpLl5HM4BjvBfMRyJ?dcc5>r7KbtC7vHWqu--;2MQ8h)FH&ur8`u@7vvaq7R zho~7uJ#&1GW*X`d&fqp`gEr<;BBRVY2Zv<7`V{@NC?hsTob!o561oO~4p*}rH~THS z1TuqV1$mb}H%NtLuFE?}yeO_ZZ?` ztaSeM-U39Q->JhbeNprDGOfTTqLla9OQ3&QVd!gQ!5ucZ7N9e0&O-tOVky2O=_zxB z(iXRNo!wg}9qPD`v+`_L)!K`P7{6fuTm7+|(zVCnM`7~*Remjjf79asz4|-aIU6`z z*x4Fb|BnZN=bwR%v!VC2mPm&;yQ_KGc#~%@I&Iiw)0snX<6oCbN8lLHC)q?N`iDZ= z#@pjM6BPl!PZMTZds9DQ^nb4 z9q#BV4OY3iiKw-yP)?6M>+$R?t>;S%BN{cmzoKG`G5vg&g!NW?hP#nGPoUNPT@yqaIKM+)MUH)OVE6g^5Jzp0jF|f8p8i9kIyEuWu;eNT^#oGUU5ZMnBhzi5a}5E zI1y%QF0?{k3B#4ZW>jsKJ?O^8eZ?)R8S5?-Et_@_x`T4yh<$x6o zEZRz%oK&h3T-ut<0)HX#>RUtR<{Z2f0H(l}aUKevW=K2U5W3Qr{Wd?J(1%l(l>Gz8 zi{lYEHlr>9#^0yPCAq<7uN3}gl1eQ0u(;7LS63(Z%iRNEbNWnOOHDG&`}ujXY%(fN zU_YtLhNjP0d<@pnglset92+Z9@S|ZPp`Hl|%>M=G;}gH;&mbd>Dd%h8=fHa=d$}1W z+`unr{4hELpWSEJfl%J&M;}vkOW93t0!0({PUr|n4NBg$B;63!WIdnvr^C*xi^Vuh zLS)jpsnW6c=N9WDz+@xCDdFz7g6s3t-$rvHsZ#=JpEf7CxP9nrml$77w7*$%x{Er6| z*ArK<*J|RMHT&c6dyn-t=@|i*uKJOpeqt;751_o9QgKS60)v$0`Q9*tE}KP9-(JRe z);dHYw~bvk5B0Y~i8RiHjJNRg--5RhKE|oY_V9x(N1=5hRl?IIp(^S|LE>=5MK{p$ zY%1SJiQT}R!=%?3>gLUnzM{W9m~FgzpPDZHZyEw}GIbixBrUIJqt)_@*>K;P(|Gp= zy3_*TLEQa_*u+cctSbw@R*WT8O*H*zHGiyBc3{)M=PnWOD6FPXiZePT1E_#GEQgi{ zt%1@sO)>#m+@murCZ7E)?=DFe-L>suS?&nTI0*8J?%m4=vu)p?>P6 zc(fmxlcO*dXtFt4yNEe;x3WQL9HCDcfrc_@7z*43dv;?SA>u@ExPWmCt*GGeIYvI@ zFOdGAlvpE}0ZeygF3H@3B7t=?ELz9*cjZH~m9ZeM5I{a~x6BGlH>8K+-rRoifPw&u ziz!L~UByCBDF>gXz=LLf1Kag2vxmG^OitY)=v2d}zklIk7*%$!;}gJxq?wgW0dG(N zIc%)~1X)qSOHdB6_*raZl-Deq#zO&5B720UKDE60vVK9Fvt;VU>4+Rp|2w$Pz(CQ& zH$8)4?9FGD1XTvAzzQmy>Ff)JXwqym?6p``uZRmCSMHo++-w!DkOUyM)Am zto9krphwv-;YL=KTz>%(RW2clC4mMwNU%^U+N0KR?q+|Fa zbQ|ALp43~~Z!Qf4RZ3}!rr!be^C@q^+}CW~<{r{yE^bcb_(L+8JCti8#$mWa5x$!% z@rVI2lJMVZfxzQMxYn!@oIAz6@37zRxvudo)=DBTL z4RSw^P@*wQmD>KA!6vXAe-HjXvEO3ji&E11(v63xKr>0ut$LI{kwj{YiNPG(p!qjC zm|OJ;VA?i>1c8@i>8a%@qD8%#(qG1TelpUrHOEn5PsTf9vCIx~$v)9?}N!rQng zi#>MvY$mxq3P;@;*N__7qd$ea>c!XgH;3Zmc9Avb*Z;DpQd04XpYbP;>@Nqcrk1G# zy?N5PO!i|o)m%>&NhRNTYhD}evJ&{{(!?{&pwi2hA62d;s^{$E$qD*`pD7m0CTE`X zq{|aWaCLdr)cPciOSP}~N>U@`!R+6^g0qfMQbivtim6#5joxBj7|$$3fZ?Mk0#O!e z{k~9uiU^L_kMHd(!02xHLU(e)B$`vkQ;qd8;_mm!U*}(syZ~EKs<8`1Pm$I9I%3U$ zY|K7DHCOMyk%Ph0f5+(azsahop_i5H)Mwq@u>8lqo{ZYF?2lN^(504%xsM!mwhLyk zA#AMSiv+%((NF_#2L&MpOG z3*WEe`BO?8K?)s2BmIi?i0UHTb0{T>fs=o!qd?YN{+ne;H|Dl0rFw$u-T-$}=0uxN zi5?%a{;28~(RZK1E2e`p#*e(^+#-!QX}T@!YHm}`xKU4nsF%tK2&ntWb^$%odWA%U z8zUeEfm)b*VP5nH)JCUQ_YxEJ5g6%>!rwHSd9|OdG_7kog%fhQ0l%<)7X(lZqon9VhsmicEoj}-wNesCFr_PynN0cXqMUKNngSvj3M`X?l zEAGjMJ;$agguMmq&qnaU^4vliM$5!If@D3s3rt0%HbR}NVcxMzhZ5IAMATVsR4s08;_4kumK-XXP{)eTTwk$Stc;5^ z{38@U>su1tetXRG`Y{rL$uKdJSZV#gKgL2mc@n9eY-DcrJU#xrL@kYZp)%e{2fY=U}I zi`DZ>)DfyG5-e8F+RX31|spw0-J(4TK zWkC6JLm%V&cQo?XHGR1YM)WBWKPQ+;L;(#k1@=bl(Oo6&mUJRJ2|c}tC( z(B9(HRVAf>bpYAQAyb!$&%PJ_R*N7Xh?Q1+U!i{riX}HYxt`juQCM2AnbX)AY=j+A zqB6WOS)x>dYI7fLMaN!}msJW5*ub%;?3@FeMcz_76rdr>yW6-)qPAzmlDjNR;JK}+ zoPya_igwaZa&cq(HYrAa#Jf~Q`HU?gPFm%G@m8CRMxTkB&8#XYk{ONo zWUugN*nNJmUmQ#p?%EEWtHMFNzx^YQOOZgKE0 z*0v0ZaB;3N!54-6bwSbyPKQy059t(3G#@{IaE9WGaijr^q7?a0;F-^-SFg9z=1%~` zx2*Tv&MX|m{-%;3$Og{2;yotDRKHK#v}qrpyvGwC)}3#e_QPLt-LplJYUa~z&f7b1 ztt;Uu1)ejsp|2JS(Js-cg7F()6jZLRw7v3t0Ic&2A(NDuOgUI#x(rG$p!QsJt4mZA6UrcGp{asKlDg)l99rUn-Lo_{_Kcc7qlmO=&#D z&*7lksJhA~c2q&0^N#Uz03rZWJ=@@b;D^kL8bWvy?kJ-lNksFcW;LXa1(s3~fdL`j z<-A`!sHda^jIyuLjM^R2vBe~&a8d>w8W<5uE(r1|)b*O`YCf1DXurQIc2b@QN{8D? z>U~(xS8t1M{TJtCrsSVq53-dY-3Zx%9p(2g1zo9jHAP2VCwa6?cRf*2|9$^gL&WQI zw&Gk(Arjhsu^Ghqe{A@o>7b^x#|=zy*}h(TnWt>{LU}ea#u1fVE^-M#0f>isN!M>F2c8|S{Nl4_H6j>L6rbH(g@%c{w;Fhy(Y4EPa^lDy0p ze&3F>&8vdE3W=K)uBtZ7fOinp#+^3wY)VDU0f)My%@wOprQKYQxhYqf?;@INuj8hc zo?A}Jm+7Jdqp*Po<+nv9W( zBlzy$JUH{#JBq^0sn;CzBB@F(yX)AcVL%X8O{j;sY`l6ljEZGsx3pe=ElEGdWY5{$ z`QZk~j-eRV_dQ8f%SPvbKe5mc`5{n8XWqa|BKc1;E-)CFJE$Qrx1;MEO8si@{_a#; zKSdR4?So>q?ePMq<1$Z@DPJE>w>Ro%2WrxffJYKZ&K<-aw!_*AAvJ-?kh|Z)*U{~( z4{5lSg>T-?hV+H7oZC_RcdAe^XjZHDlw{KdznI{Sz>hb@>Uf(?JhAr+#d9T!2{Pk? z9huPhsA=2{5%+gd*`g_)UITxB>>7S2*-MnCi+P#yW=XUyf|ZGm-*+a#kEUl4$Sh~f z4iH|DeTC@Ojnh__gv{ART1}-J3U4&AbNIwftMgI*)=9heg1T%q&NgEya(6lF0;jVeJ}nZt+-J_rnvs_We9Q&{ zTusT)JsA{aP4dcw3LsUW8C$!!3Cw23T6JDw#e? zA-PZ}N~omN=Nz`H;a~W>ibdr0_b$4gd4YI_(PJH~{*rIAW}tWk@7jgF@KRg8 zYu?eJiL~_=MsMH*{qrLS_Y64aEndx|5P(=!E%Ewl z`Z@;)o*s463HWMg%;*+qa8J=thD0_&hu_+wVZGPqa$e4r<8N4!KH?~1tfZ^Dy83VR zuZ|9^S-?)O*Gq2^ZS8@}XIS*mXjbLwDQ4l+U}3UwDS$RHM8i0w?@wW}0n?wDJOQuo zt3_)1VeY64(a5ebf&q|9^kWS%qe0TzzOvPFrl#*TyS6U(}S8h!Naa-zwVhs#x-&# zPiHHBYxMK3G;*dCYDpoNrO-YItL@XuA6U{fguFhzFV7w@)u@rAxByRKGAAG!aHnx< zX8?_|{3C#=%K7G&oR{uNK_}XE_ps$|M^pfL^jWW>c(6cy zyTxrg0>yNfL*PJ=YXMc|f-Dwl38E2@PpspqGU;Ycf2v`y z#UF#ujtrWm4>Eo^g0^1BHBT#*Tnt}2^PrvHci}`qsg371&TFw#$;%O<-M7(iWRTC6(4+LP9&r$cRBP;K0L=*l8(% zA<{>Y&;&NqPTE5w(&<6K(|i_Dw?HV7w>IgWY7_gSSUk7dlhFTMKSpnkP3xSbxXVn_ z+hZZ8zAoh}AypnQ%Sv;h!Nx@dWQO#gVV(h8Xq%=Bx)o@xiu7`GeHqe9L;MJ9eLuh$ zOt*bo?xyy0xp9kX&5#HlXX9u!>1yk_sxn_c5c6v0(RECRT{Lzk&o18okofa6JQzzo zB5|pal^R*3y6c+cHF#wRzfyjqE7zsFUtCSkbT_JHdsAC#@Z2`2He9|*eWn_-KtTYI zc#(!hj(JqWX+z@bX%l{@7J3D@C4wkj|1IAP)*i^xD3`$|91_>#hibZ>)3$)H!NLgLv!o;9mo;(=*F5&=Ex&@qu>MUEnatzSNW zeUxQ8gcf($e-$$iIS5rdn#6x=Lipr>j)k0qk}OS>rE_#9_Qu#MhrQHPa67-Q}{kQp01ad zPtDP1`S*x@Ee3dp5^%`|l1mZY8cisuLG&TkRRwD_0S~3o;8fogLhwXEAhtDGoy(Pu z`N|wc^qLK#`Um;QO3Xi}bdR;S2?F5qa12iQ8WN7FwGKi?JRHTv8`4@0vtL@7Pzz5< z6mRWLE5hhWE`QBM+ycOzfvW`7b52X|=H?EHxc*47Q-f#2lor22RDN4S8jype;jMY+ zY-~CS&Q;S{d@e(cOuj3<_;YGQGi>p(Xt9WX)ATWRzL@^p^ky01g}6usRsCp zFdAo0VC+zgYEHzK#}ONHjWnO2M`&94(56V(d6Vz`%u>4G1#Bx{EHa_9zb#zx) z?Q|HDUGjqTtC{vIps{=-g~PP^R@2|z0Jc-4HAcj~!hfm8pb~l$2*1}yL)uYIr}FDNKZ;BR3dqQd9h?~GT}wVh=%LfeB1Jh&m5 z0DX2bn0_2~o}3*8L9SXzWzdg9P zxS*rtJ{e~skEJ+i6!da(zD>N}0e|#zTtC9Y)V(^sPG7Gc9|gX*H3~lgp9c?5r4F3qR3;u`c z741GbwzqJKHJgN?w`&tF7t%X+LV`>hAA~h#gjxNA0Ub3*zO-A-#a}k@tIK1H2DkJ% zKCq82$)5G&N$v#dcv6{|B)~#Ygss@%*FMQ`c;lbucs6T+-O?k~q5X;8hX}#q@Kq(u zSV7an?RKP+aQFyJm_ngU;a9a-&{U}VUL%rD6CuLU=#6Bux7`62$)f6m$5hoay)-NY z-Syz+P?L*xgmCe4hhGc_$@Lsnf5N0!36RDPNuE1vbuhH}6tr_T{ohCho3ek8ZA3Q( z(q*(@?4{Enpt9N-;K_n~OoIlIX74F=k5*d8_}_`TTMH9>TTg}GtR;8FT5f0v^j9^$ z^0ep4Z=JRjKay@JMI%V(Px!jp9r|$LT8lA(EV9or$}Sx-CEOVKJ)~2DZ5kQ21Z~ie z8+bL>9xY;K03}n<(H{&6_PM*fRJ!jIwS^-8VL9g~wTD=*cSbG`6z&_%VXVN8DJQ*e zprdI&!Go)+{beMK5)|fH&qL#?!zi}Pb00c@8^b%8ywo$mmY0;8Pi|vGvpCj}zg})fAB%k=@9Ma8os{Y5L+E4g9|Da$mz({cS#Y@Q+`YwjL&w0 z!iW@-VkgijZ#QL5B{|ET2g@{LO5rP@WnK#Hd+PqiJF8^K&$H~?s#hjZ;uz#i;C zBUHclCO6%4y=`{wqadLfThp3RH^51|v|W`izd}#ky$(;C`A+ZCZ}ZaACFx!e!sf69 zf?%pt9}nLCCoxsoVJcFKvy|2a{|FkXs6xxV zG zfG`}jvZeY&!PonF^$e`uawA^si9i~lA zb8+N#dU<3>dde2-QshZm>H&EBbh(W}yxrAt`_qQqX=Lh}l~nH))7BkP>CcBcO2r9= zcsFNP9rJe!`y+mPN9I>)4!#}-$7~TcMRbap7{bBBFo%TXYFgBES;jzdo>^tN=IYIx6Y(|vBNJ93qjNiU4 z$VL(}1pyYR1;?7FIIqjk#go02V})Ag<##&{${?ID+erLl zrg9D)`I_nikFc{I;;Umv+%bf(GDpG;ONTJ60_D^i&Q-EgHa#;EAcgzs_g}hJ9?3QR zHuaS+12_g$A%zd33fy7WJ^VuO77Kf3tQ*_+hmybF`0#G&ZLn8m?EK#Zq0lXK0}Yvm zg}1nEmMS3T;>=I@rf$ycPu-*Q?h_#P5|khG|5q70(8qo=e8BM73TDZ!7 zd+{BX1YRE9G=j}iZu1R;#ni8%Y|LJ`wo>Wyv!(7{Z9RPcDL>Y){M@MPH1_XaQy)!{i?9bfTTmWz-*J9TksVekpClK^ADGru<`UW4|bb9}` z{ZQB6`OKJ*h_7grac3ntxnS|{JGy>wW)mnIVt+RSCTan^>(4oZmjfdDi6x2iT)jD6 zy{ctT5`>ejx?Tv1PjEFc_e&!CvjdhrI0qi&xiawZgKkX=867>C?zbp7c>Ga{1O3v# znjlo)Nc)npSE_BAoQNZdC(jO;(zA+z9>wuX0f|Fs9sBqA9fCj3NF%Ecv))WP#G7))`@x--CShRsHqC?b=j!y^FAk&Sci!4E4uCVT+ z*i5V5n0@{wc8lt4_(d#c1enk%W3dEGm$fi|4n4{S7OA)lgu0tF&;F9La1&_t@mMN7 z(=K1yld2+8cVB03b2tj9{?(6pkvs7*pWXt>%dyMC$=vN#cAYFl4!j!&k%d*(I(S-C z0bas^{aZc(k}K%%!JEQTtOZ|1Vaax^#=maqRrCzLq~u2mK_fm6A!QUIUC&p{W_09~ zl)4+~ZPmIb=l?-NE}0J zKVKd>I$Tlo$umZ5vtq2;E1I|PP{gI{Z&vf~o%)(I!##a(wEVwa1#w!@maA~k<;?Ta zW%G3_97=gatdQ2?Z6MbrPJOE$-nYp}k5%ZY3tV#Weo4~FO&ioi623feo1&3qa_^L^ zZk2;0MzfJ_d6DTo*w(Fo$?P=1c(=uqZOR1A;4)18vp-(V>Vj~~9yR35a;pJg&wp}{ zXJj*DsqpuQ+s2Yj*0~vu13D0nIKR5&koPSzr}pL%WMlACOKCQBojOF?-*t9rDLA~f zPVMs$o?Y70d}?;p8w7{JFDeawPYs`8D5SU}n%ONNcqnzdfCGTno9#5*m69B7IRTx4 zZiyfsSOpf($FS7)sBp3GJ2-z3IoV9)MO^LlJuRRa^)E_1Zq00XMb=dO@gJ#IQJvUGK@EQVx2s6KY3t?#enW+KD4g+J5M*n z99`4RA&A@Q7o7QOvtA}}gCb}4VBJkaHwj)&4EY8=H;7|TM-%Ta$sKIr!9M<@GJ#1n z2lCZG!3IrQi+z;tE*tX=d+-iCyf|7_*?i=fd>}7->e`Xab7FuI8kYQU-XEo*e0JM4 z`-73%CWd62M@pXk!*cx^jR6M3gI|`(rfZB|>9j16$?jJCd_sJ!j6kJMoEK;FIdn1D zzJ{o~?QHgEcOE1@JxW_z_D?_RsvuMFj!WW;Oi+yJ+K|z^XxRP+L$OuVD)wNx_j1Uk zGE&$G87MH+kflr=1ev>anzEWpt@mUYe$4M0){HcAf+cG5fwyD%7jU~t&bz2a_Zx&} z_uSko4Z_dE+FhO~gQG*bUAZV;84oqDQL8G3jCzoVjCulO7%wp^m~k%%?#{9|_|tSd zXA#rxK6c)wbokNp?mjZvBbQ;rmM2fI?ly1;^qtIBj|=64OFUr6Tq4nZ zk?f&n_z(PWL&uvS(C8buepXFJ@t~Rjx5erm>-eh^lDxOkY+0AkLG3QR7Hf!gJ_NDT zw&kIC_D~^fK;aNU8`hNqoYBTyX%>(8?9zEh%_y88c}DFXO8EMLT=4b^5)NUuU#|H~ z-S@ytJCoO}(5sDJH!O2qq0Y&u! z0g?Rgt7@($|KVu=ar$?e{2O6j^L;sOj@0+MmHrH-}8Wx{)_r#XUQp-43cqCp? zj9YNyz(bJ5q8P^O11UCGe)#!ydJ(oU_$Sj+$m5b3B@FpoUEf@fZtv*Tu72byToxLm ze&49aVEZ_%2W@2 zb@r_5J)rQ5%u^wMvKRjDkh!7(jX8tu>7Lojg2ty~d5j6&~XkS;qYjg&Jy03l0ypUgh28 zZ-t?Gs32t27;k)8_gaQ^^r@S6SkdMws~Xj-m&7P$CQN1yyxPl^iK9%DCzFL$0Vq+0 z82Uz3Z*xLenXJwweCutiPoL&JRGs{}ynT`zho)WMXZ~yVU|s0x1^(wa>)khE zZW8k&>I0Z(s`SYp-wgdTij@IFieDeI(q!fdsuX;V`83KBigWBDTD>B|sc-{3#tUc{ zj1Zp)_swOpfif#IkY@Ci)KxX@rH>bmKvTayli^lVxqevr)A#-TXj$O!>-Hrs_9c%n zZarK#nFc7aH|N1*I<_3AnF3dfwqVKvSk-LS{>(ehWHqYrC{5~je;HTwNk#QFrj#Bs zM6M9Wi=ci?^Pl2%F(W=C<4inGcE|>P$AX2E7`;$=r-Pd zr4V2dU=7SddS?jpSnE-h#iF;b!^`iv11HyW>9#jrxtKo~EK8#PX+CJ$?O3qLa4&G= zl%E!7u}1SSFdg+LGfYsWBkMOO);RzWK0ZnJp0qkH*Tqh;ief`h+2WK8*QXX z$G`pqoEhV~{^w`n`_m=QrcsyaA$eBAJL^a@a*cQ#*UUoaSCkdOZx`j2J9JK0U{ouu zDhxKCP>3GlM=7m<{5QRpfIwK|sN`sWbOby$a7{IG7KDY1Oa?`bK)54;YI_pUL*>@1 zvpSr_p~7e(o&LW^cmb}tIzkOjRg&LGb7AiO1Mcs$c+6mH8NK^x*`qgR8+8A3qV z_nb|y8_+wkjDf4HQ!Z^CF4%df%snWp!mqxyaQRFl4HDqz9w!*Mn9vX66VSN>lU&(1 zxdaiv-^RY{pfG8mBfIoM84s*G$(UR$TK(+iKq;)0D09l>vUZAX#ks(wjEnlk<1oN# zQ{|rE{Fm^agM!SCqmJX+g2r0u9F^nOM%>2_WfFMBLViYHn&%tN~0kPu)CLa_ZfW)Ty_g# zjk)qbREwyD=4-;fAi03X*A3(ymzL6)uQ5n}dY3jOb5JfU|3e#gM;uTu)gZTpiK+`J z=A0ew#&wqd=@~8{wxqU8$#Op_YmBXQf$asq=k!z1J~4zVcMC)ChP4x&+6bhD zm@0zbBEnfLWP^2TPLQzwUA=S(dQWJe)DHkgy@`E8iPY~1Bw|pE^IlX{&4YByVI}h| ztSI51-c%0000FY?^PFO?&_KxO@|$U&Ip4-q*0KPqRVleRvS;FN+vEivEVi~Ig6y9VOQj8}- z$i7!TG{`Sb8q=#~A0HLTAL2RbxvVwQQl9z=C*1)6ohlEeF~F|K60-;ttC)9oE(WP2 zxAuW!0wq1@OX~W5oi@vKt>aR}B9g=DBz_j0AHv{z}5n%MCp8ohR^9vhRZArjKOmf+vAKYX+JsZ|JNmxnK8k)Y?XEN-Bhoq4w;B zQ~scy4EQ_k_Gn)hcf<~Sl77@vmlg?P4V{l6AIR4-l{y3kJ+FZCkv6f`>xeVw2u+Ch zqliE_7fc#=^pD!SgtSBvZ_nwMWNJdzqe|)=uJ}7BFg%LmarxpjgNY4nl}IFi7uWtw z_hgx9NRPqSKKeE@z0t-L7$+E#tSTAP1rFms6FV*axi9A$Fe5(L;vfQRXlU_1}ZZi>0K zCmJ%CQ2d`1G5EqtW_`$j_y98V72-@_Psbwq07UbU{u?h4a3dwgkw7nNWwiEdhEN$) zO;)!2k>lU$4%3hosdBzmS85dN%D`-=5wppU%1OU2v<=6rB8Ekjp^A@?(LgC+_;4g> zB0FMiLJ%#a+nrg!-g^A06Szt~+SURX@RT5uTmzUT&uh5Mg|quCF9Ka+9&4_&*5=}S zk5w4Ki*_m-zi#E9*YPe!jsj!WR(amfk)yo30ozXdW6sS>uuua*jrZTf>AG`T{qwM{ z?+<}7ew^IBoU1ZXhRkZTM4|V<0GcDS+uj~`4`-O^yl4dmi2aI2+EZbBy=cF-{2qlr zgoLfhYLPBU4KXubzMl_o9Si>~4QCEYi#FXs_l_~F#Wy9k#@gj{EgKbc7g9JuR(=l7 z%>2D;0v;t+1YP_*pi@KdF}vcs-_U2Peqw!OHRR56~U>VXTwy0rzdwEiWl zQPppkWth{&RHdFT&Z@;-H(2#sdsz#}J@u7|^<);A(ZC5NYY#;u_(6Yhl(D>!+h*0>rmZgL3ZMz1^f^o}zu&8hnD!fGYD041!Eu~T0LSOEjB)6u{0@L67dn;) z3c6;dA+cliQE`u0p<&IMIJj|qiIjzY4M%7=Gx}!~Tl-PkZKwhop@TlcAcn9CKfzNC z5BVYvSWLoOiaq5^g_5%}+S`XQ_<02p4MB?0`x9^w7Iw9Fe1Dm=IDIVff!3V9a{MLX zRL!zW+u|}^`6tjq_myC}c}71s(F{F0&S--um2Hyy@P{+krQ_mgjo!+!``%ITedf-{ z0$Z>Zx&wdp1alP;e|pv?vE5huaLMMR1N)<$i+<*>7}rMAGAcaXN6+=#xaP@qYN!YG zaniEOk_yl8%Ef{pm_nTtw8l1smcBnY-LE#|Mvi?j;^SxMiTQp+`lUA!i;JQVwnR&9 zR%Gpyv~%u56jroZYRn>#_{G2t)0Zv+|IVyaTC2HeePeQatf@?TFd@>IEr=c+x`hBC zQcKr?lx9=5f6&sE3m7=uZ?-=iJkP%1%`VlcwzR80{&Za3pbgm@BF5F_y{CY#kia^V zb)L`(MtxNDJ%De4+R;D9)@2-fUSZE}J^*!=CM`3t(2yl>>a%HY*{~ABnpIGhcu`JG zGZ*GNF>~;whA}4QiNzY{G?2`B{I3|_k7yCS0q(@u!*u%oA0+K4GLA4hQv8ueND5J9 zCbvG6fzINgoR#tykdsm<)-jLZhv$bOmCR8I3JFZ>Rb2>7E!?`J2dXK;D}tgG#lu@2aIPlJ zS%|;ElXikESgWNYkI&Iv&;MGls2wdNvm^yCq|Mrs8V}VF**bYTy`fJ<>;xf>hAlM2o%yBI~eEvq#%c5RGjoOB_kiR&99)QY3>5k^LZe zI&R5+XrdYj0DdP3=^sOaa3z!jRiA;t^K|Z$&I}QrVrb&fV69)eMur;^bUC&$V{NWD zO5Gf5&dy{p-0xV77VL+_fBvE>-AQkp%+kSkx{#G8a*87~L$b+$VFQLj1B@pcVRb0& z_8t98_M5wF*C;w@Ly}~wrzmdRtZcDMj+i8-^qrfuYptN+=_`TC$O6EI6q{xH2oy}) z=0T=>5Q}EoTP-kBo62bNH6s?Z3e}wl7ZH@+fy#unY3OW+lSyKH+R5v!Aykv{ z2Q2QzlA3?{O;N6Bd~U$6IKbwO;nq1XC23dd1RJrjSB6zW;#BFHr_{Kjc0}@=Y|lJb zV`GfvAO)A(8fv~14=zV=L7@!8GegCaVJXikDMKPTr!XCYuceV9?^NEaK4^|t;$lj* z$=H%YZ}V&xgEB|Havv^_ICzC(>D3RGl4>EnHG$N&7j73D$@tQW&%SjT6N!p-YKKt^D5y@WQ|y~MlyJ& z)X8^qC+c*B*h$?7qXlIj&Z!lyTwdrmc#vze9@hTxGN@u)A+O+1W#-gKoTr#pww12D z3>vTqJDKv`IJ)4b`%0qYlVfVha>+rZAKU%5h-(PskcG zd#FwmG!GoWHae#Vm_L7ey#gT}(tA%R&BTVJzF4fgfy9BFzJihc{MR)9iJR4Yy!L1N zft&z#4hU!b2hb=)gqpY3S1Cp>Rx}qv;!|C|cZ<{9j}5b)iTni-@z zn<_2yJJvVA!#daO#;&-5Y_09e^0LP8ffPT`{{7WDB^@WEY^;Lqaufw#XmgdHMrejA zmoTxc|LV)@o$gn?UEUm{;iV(kf^U>~BM_T*u)(^ zO@-yX2epe;%etazI_>?M)+5+PsZCShSOuxoRP2?f?Jq+A4AN)E+&+2tk)Fe5Np}A! zN9Xb@n)$TBbKk8%twWymAL5AxUw4;UJ{%!4F06XVA^3Ql>qdqw@IO!KUk9P{y%B7S z_FG0ViDp-V^!C^-T=aC2MZ=67aPQm?ff(osu%tX9BYm=h1vZHlySwhYY*`))(aPk_ zS$y&i`k@NOZ&>&)^js)dxS5>zn-dL5vmY6Yf{-ssS zMU<5bC5M+m!_F=ysu&WINVD&ZX`jy~k#Dvq7 z#JF&P=2=U!2qU0OG|o)up$Wq6Hs~#OHJ*=zrkm$+uo?jv)giOsc_D z&wm+Qo+d>LO^jB}2_g5SCw2-9&~Z` zVPwT!jsvMfCaXLBT2XAVOtC);QcEem+oPZC@RXOS72bhUp5lK{zUFz#k`R58B!L^j zwHzdNSy%jD3U*U~_mkR**NIrKg*5JmE#*gkM*{aGAw{tNH%mo=#D7M zZBRBSo``0srN!&h9d{=!5N5*)=XLGI|MJUHbD*p^;128`{e;S=KR-!~RCQ0)WLwBK zPqQIEeO6?1X0!JA{>(aFul%~g#RB2-UU=ddy~DJJgD>Rl`gWVEY_|x6KAeOjL-+`b zNqJ@Ij5@nXKf%wbA*?gz)}022F-z7*rjsg=1uyrN@pRagIb1jf=GN5lFR((RB%yb2 zw1gK|y?*Mkj#5L$;u_92Qz76q30LUcmHYa~BixSuhW@nkR=m{C z6-`s9YoC-q^zq+}ovmGWi5oY57e(ljnB^PXc~9YbCre9}d(@&1dWinQW*SlBAY25Y zpUw}0!KSGf(RM))&z9NTMJhGcAQETF=)|w3;nzMUxy)Ywb<=cNN4(B$f>SkY8KN)Q z14LuJI2Hy;Pr@idskpNp!HB(i%`{9vN80mAYgm6oo9^haqV@b+ct@4nX91YvexXkiZBnk7&)ie=H)_Q zLc(ukCXEOzSxCNY7Dx!2@(dlz%Ge`Pg~k(Zhd9L*$=mOTne02HrT~>|2>qStVcD#X z;4X{sXY%77)i;MucIJZH^&Jn^D=FTlk+qUi4hI4HZyg>Frbr&68(YK~vf5{#hy-|+ZxE$Wl2X=q5g2Crt~c5If}m?JP*eL zmO|j*r_?;i5CfrFbJ%198 zUoZ|-ox?E}vM&Y@=(u5E|IbV@1x4y`L$S6;=15UrlGIzz?LuA@Wk2Jrf z_Elw^6MaUeJSvfFDO*lt_6+J$Wz(LL?~ z|AR$-d+vKmWKEWju(5m3|8eXZJft9O{TCt=`yCTt(m9KX51Vo z+t!gj|2w9d)Ch%E_jB?uR!xBpSD6UwrFboN39iSp)kbJa)E@Gh=4G*Q7x9Zzqs)xp z2S#E>)54@gI)@HjLHyq+U`?|`AqxEj$|fw`$!%T;m$r2=bH;WkipA65?fnH2Y)sv& z|BtYDU=k&0vvu3HZSS^i+qP}nw!Pc7ZQI;!+t%qX;!e!mh;w58K}BR$X61U^qe*dGY@{;)83&>;vHoh4T$UmKEJ}8GP@l9_;IffQqGN|x^rh3+z znb)qilr|{*e0f-(LD%S`7J}A=4}fLg)WJg17~w2&KziP&19%gNJptr_5udr5Z0E_*9;Gz9vQ z^gOE6$s?-*!s{4y2$&%As6hIgTGs(WUE@fp{sWrKCM|JXEJrE4kk$0z;wvN}k7g_p zIt-8Z+mhx;TY@?qVZ zN?Z9W5NXk>{T+5mQ2B3NB9ZlL`;!R^GI1?j$2{A5?QyOrdXA;Xm<=XKe0_TJo07(t z*+Z-Zu!35s>Jac?jC353=@P@caY%kS@tby$ zYuwUfebP7kt>;_V<*JqQ!p6X0Riu+-e@{=@RA=mc8|7u@hCzzSQA9FQ`twr$6B?8B z!!*D2o`6u5fVn42RwSU*lQ;3ErYnl>*admcLWRL@ zO^Ky~IK3~<0yST8kh*{mq)zJ)C~IVckeF=&E?`)y%)n^Ui$svZ4MzJfe@TW$n0PuwGX&+?!pbxMAf# z$r41Y>5J0YCl%B%J@k*%rWM^mDh?buh^qg9W}aq6NAgB|=5pklv>SjYs-|3M9vLQ z>F;84#Yb3TACFYo#!4~-#t*q^M+03227r^)pW=CL1uqUm=fm-PTalVVuzE^VvdAuI zb;mLwYLjXUYx$X0 z{Yu%W$e}8E+31v&)0w078Qt~4{L07P-9f&ib;qFWza=&x|J1FG+CD*x3R20RqLu!(iMnHUTOnP8Y27U+&CPuqz?5#f;jN<)>6uH^C>GgxRo&I>4-RX5> zbB@HSJ(g8l)I8P+(<^9UzJ)!feIL}w1yG~yGRG@+uPyrr*7Sm2e@AnfvFZ70HAVSAh9-pE~ccxW_*}qn|?e)VvoBur_*c>68*vgdqugX z;e~f_&`FdDI{3;*Qw5JH756NFgiCgj!D40oiHqqsHZmtN4NvC%t- zOITO^akR7ZdO3HLgPXPphM|@bsqB<$4B}^jtz6I18W&0IPKZn9FA~?mYxj+?@oJ7 z=H*WCZl9)Ty!rR|4?R~g;JdMxTU;}j2->cU;wjQQt0uO)Z2i(?k@PW+29^y*y-f5H z7w7O=cM9fs=Ts79FQsEVAG?%S(Dv;pk(Ti9c#B}>$zyA+J))LlO7$yw)RZ{k&1AP# zUl6*6Uj9hTVYpLsAtcVrG~ebK$wXm5IjtO)nV!Q@EI5I57r|$|rDy9r%1QwMcv2AL zdO?9G`i{&R<_b<%t?QraiGTfHRCK_D$)W)vV^lo#Vq&gKRw-p+*`VVIi<`bZOaM{g zG{qUfWlg1>f{x>U|GjV&sGNHy+gPZaGo|0eP-K1|es6L($b_PC-f2$67ral4pQm}Q z^w-KAmuGiAyQnq8$k^w;x@RyNF7`G(eEhSbi!qtrgPgukO`9j@l6jSPs#`PCH#pKp zOV)UD9Ve%7POzC%{$O!&aKCItlse9)PgsJpS5;Hc8E-lb>xd{lbX)IvS$)|mo(`py zjun0AF3PE7PZAlO2K3tTn9lk@FDm)o}(foqQDaQ0iblC1IwtuS}4?pc>z6a z2Zh+rB>wPFjwFWpE{|yHc~}Lsr6!@)J;I?#LQrbUeNlqA6F8Fr|eU{Gn0M3q|c;LB%s;gmEM=WYY(U z-`L~@WtS7~!5THjI&ANigZT-%q#nVyD!x&l64_OB4E z<}vR8X&|4q0EloP1P_Nm>jMHp`|_w56zy+%Ihj1~d?|Sd8u+_T&04+j;Ma$({!#(X zeC#@;StM;G(L!Ywr(>$8upQ4G6E15<2kX`1dZd#Unh^MPz2TgOKEah(_S^!KjDSzH z{@h|>;{x5(=NiqTCL{@ESzim9Dn#PQi`r_`CT7Md%8Clz$C~%V{^=b~(!4C7)h@ts zG(>kamoLy2@^Qluu@jNEIntGuBJ;Qobdzr|yqm-{OSvFM6k4?U?eOys@^}B5cxSpx zYog|%@oSQ9{7_k)uX~2Zi+BRVp^ae{$&wJ>wBr2*eMxg~n62e(@Ey1gR$CfpQqBoY z?rI51&r}^Yqq)k!u=V5yM!2=_{ckD)$Arc*Sh5ZEhy$N=Pl2IWb@X7NyQ&R!5~C9c zuy8@73LsSipqcrAPszyjG5!?O2f&caKyJ+43G89Fv#;J#^i4pWb=nAghF}YcjHQDj zRSTp$CyIKlxS=)&d(HSF4O5KztvY4+Q!B#0jhEQ%kPxF;ECV|^s!ue94J!nKsOuKA z=38Q^pYrA=;lI=`E>`Veoeigc&1g39bWz`KWvAeFGa2Y|kxG&?O^w61eWpkrdOATA zZ$Tk&h?@oOqFWZNBC9S&?KJZuQ4Q=j>7&?k29aF1NY-=pgf7TXWq4Dk7RIW5{&B&@ zn7W>}!!8i=*ZL##3xf3(_g;yV!1F&kU?sT;F@r#!TdYArJILo!zNCK>M8ewv-%3$T z{~pm38x+87P5FSS=}da_Q|oZ%CiHYoIvR+SHy5XVcb6HJS3K z)O0j6l*P8OBqrY8B!aXtQ&DUWDpVJ%K4{g<>|inKb^p}Z+qd5Do=W_5?rb-%*yx4O za4=jnN_ldZ!`_;8+&`=Pb0ZFQIJ6BFvd!XWdE2_%vOZBETgR-GfGvc6oT@~B%7mh< z!~Y*jF+TAFeK>HZ9l9-fj5aj-HorLw>MEvhFw=x~j*0`*D_~N;(_OY2#^kXsy-AAb zkO6&u-4|B2M8AL;!mSc1;}NF}2T#Ii;6T&A+!RcEyfWf1s36ZQ-%S{4SpR6d+bX+{ zsyy0l$7(mH#m~OZjdx7se@nZ%cv3}5{YufNKwmi5Hwz3+HB}_D!dnC+$xjx$9agVX zo?%$A?K3wxy9N`h&$7S`CQltMPlT{$OtH~aXjwuj-~Nd@DW9dNwf1&uQ{||+?!IQt zJKG)0$6?I$W2mK2~XYI=ZIn^?dk%8pG(==i1)v&D7+?aq`W_ju*lod0e znZVTi4Z6c`D^!_vhaBLG2J zT{_VXQ0RmU-qNS>pDyF zN^W7R8~7RyKQX&w4<8Nk^6J+$`TY!BcfcNIrz?$S(4R(q;*qX>o2?QeKBEqSzG5-H zzO&u!HQ2!2jUtu%Q8@3AVq&2?5czJ%fGx9DA1;ziwljLR9G#P9qB`~bGAR8T(p4Xb z?VEh5$NMjht3UJPjAOQ425+8)4+ZTgjtJ7G1KxhP#1Y4gS@Y;D6STjAsxlk=*QAD$ z8X}RHYC!q>wZ<$pv-5BP%Ut}mQ1An`opjdup&HJVYSB>79W_*V@CN2Pig8TJA76SJ zG-fib9e}BgS!eiZ-j^6kG3UkVEb#{0*`aET6QzLvr9(m=|Gm2>)o}_UfLzUsWy1p9 z!S(Tbot$**pTBqk%+|eKoafVzd3Zj(+PnB0yS=j7hgL|cR}atz!rcy2BkX}!HNdVL zJRjoK&&H&h{Fbg${3enRf(U=lOk-Ar_CfBILx%IZj~^$KOH|##iOulp-|V+b3Oqw3 zm&76`>56$wq&<|qr#cm;Wlg$k)?FS)9&B|}e{#az0g&`z$%Ak#BEdEs8DK9>GGfzC z>gxyxZnez`xWAZS_(8kl|1?0F8_MiPBP@Vo(lcKbP9T}~$$1QU2R$bz;uJsOc-6gZ z!fdeV>GV-5CiUhr)R@>!P$K4$dyMNcS9RV+)Fkbn>x&zlmLEP}K#MD&VZy+tQDni* z8hhZpq9Q}duRTXA@%|@x_9dYu9HUXj*lzj}swQc-5QBG`TA!Q- zS>#}W{_ciLTbt>ndyLeI8pb;||B_*0fb;-J2bcgBJ#ko3aer1TU$5UCZg3n_?STgF z067Zj3jFKIBTAsrCPqJl{7eQW+bP-Z??n{eE!)7{dE}Xj-Z?e0=r?zKyrI0yWPqC1 zkA=e_x?mO-M)!p9c~%=6yxAG`ukXPuQ8iJ<-4QKn-`nwR>@S>n zq`#jVhl=N`v?3APWNzD$F)Xuv;J60rh)%xSLL)>Fa6&jE#J|vz-8;!z_q^T}j+8!3 zhb?Dn8PQ)pcsizh-mPX*zKdm1Zv@zKdjdp~zNUaH0LK85Ennull2G<7r+?ivD4i+7 zf^goAA$3V5>J<`7=IGK-gH&)8OlIx04MG7;%5fDP?l~xM`xTgogI9vF3TE;dXc1_+ zEIb2YgKXHV1l>sV3B@u%xRtjqB7X_GkpE<@2OcE#B&`SpBr}zgxJ@LgyKuV!Dc!`t z{U&!XDp^G*K@8jZG!*e9>Q`S2N|}F&qt+SBku9PlzYpjF+Tcc9K+>jn8215tF8nci z#>Lz20>ELa%4M@TaT#z?uukx`!nY5Gn)bQv*6{4a8)>`vLZxt82j@o1liHY~?Ce5M z#V;au#CrAzz<(djc@Y<3`N5S{=|CA}4Mo2^V3|BsMfPdI>yi|OO9o^z66_CNA%Ie| z8hNOBzjqmT)8x2W*r?b{+o}@x#~SR6?E`M?)&{EYs^njD0KVnegh{%Yo<&2wLAJV8 z94zS@%~1XO8PkL%x4QAlzr9?Z9&AD8d7?=ey~zrPxv3(Tmxa>{qH$-s5QylX*oz=1UfR@nF&LYw|?Hb8yGAga_%&f8kJS zg#~2njlkkdfw8RZtU!-#Ywa1?{sd=OX%qI4f!<%wap$mI+HAWT%t6@b zmGEdLYlg7e6hfk)e`P5I%&$k)XlhAxT+2o_tV2+t_`sZS4=w3GfxSAwRo-(?4~u0A z@JqBi=H4M_N7__BY&A>GpI9gg+VOa_j+c#uu!5Wm>^J=6i%*vruw{#%IO&{eeWrBr zz7_`Jt|lmDjC-}kHpFH)(etpUb91Ll_cG=SLg5VuA`>WS?D3WUb(Nl9SN7m;RUB%Q zi*J9o+EMo9Y|YVLRDX4WDMq?kazN^X04CoO+4)fhM$Y+9K zkD5NfdmQ}3jtiub0fCT9gB7u%Ot}mi2zD{eq&sVay1)1{pc$}6Q3zXK;QD_;z@5Cpb0 z!Rt;~J;sYnd7x3C5J8SoQ%m*CNLnh6%!%$-cq0c_CZ2Reox?p?H}@G;xs(pB>GgnYxNd)@8^Ea))35$r;VMr7~2s}S|! zu3S4$$x~mleym{{HldtFa+8(!$ZL5(x%JT4H{n!s@P8`etD_zF?KA*~$@*%vK^>ZI zl_TZEdRT5Oq^Z~IBWf72qNvf12=-*)5P6MSv8Z2VhOnZx-OEe{FTb{@2jb&z57@SV`tvfiUt-A(|3hrpQ-q9j4T1scEyqQmVfzv5sip-?4o$k`MT;^0aAC&B#UgTZ|EQb= zM%L1Z?9o%P0Z8NTbGwE9ktwQ#NAOs0tTUnVY=D1HqhP)pdkQ6&z7D-*gTWKF zp4&jH=kg&pA1~0eX35?DJR4TrZNsoOLSa<`9R`eykGEf%qTyp&&z8e>d@j8%c764# zh-_@WTR;xoaO8#Pj#JNfk9!z~uk*Hj5!VpmuV?;-DZv%}-MCmfj$zfw)+bM}GUsu` zf06ALsA|m^eEFJJm%o3aZ{3uE+b7+DKU?~Mxf<+lhuGv^-sQJe%Fsp93@|4W;-k%} zjILAX_`=$i*9rAvF$@3ghq@EA%W^2s`Bkr6Ic~q_=1Mz6d5#8oxjY3W0Uc{(+ zYM?S3LW@|W(DB`szg^_tm=H9v0FGFw?JJ$iI5E=Buv^cbK0de2=<+KDPqZm{S<9uk zg+{VEiNvowFj52%i}gnVWFQDMcS%N6{LJoAvCNzMw7tute$-KpAe#wGRbMtZZH7&y zA9Z&gR~aWJJQBorrmh}x>UoAIxFwA_P2a0fB0RK@#d zicS#3gMg{^rE44z5-82GOP8n@;2cC(DZ($FEh^T${)Y#k>bo(nXK^JdyZ=D_9 zC1)5}Ik&j5orcR0t3}>Lyv@O7L z!U%I>|6Uq1-$zOEi&T4RE|2qJFafl5`Oa7G@mlFla9NnrvNb(T^8*lzBRiXYK3x!1 zQ`%S+$Podee%bW8D@L7)57}oy>qaF`g)7f>!XQZ)fs(1cWvU9bSYfMDTB)Nk2KgWC zAFI1g)rk;yKFgZNEn%@_#VFddJ?Wl7HI8!BI?Brl_wcUkLEVBYgKKzM`~Bq^(FC0# zm8(s_Mx(p7i_>t1tA?HIGa+#k9wL7Gjz3|$)CC3unugM5-masMtiS;+=$FEo1az;* z4|{!{!MyA$lT=l%ObI{r0Gnf}LOZRhLcmw}CXEB(MWEgbplNWBut;CEiMP|ki-Z}uSxD@{E;`*r^=d3hr;q=9dyMU z5M=_;;)$ywHw%lZyRLSbyI!}&Q!u{S@rDw5}xbV&W zjTG8>H=nk8tGx5{nHRI>;Pn-3({;{?tB-j%#T}HM-pu}SS)Z!BrAQYA#%NR29hAmi z^lBoj6>4mxaY~CAl=7csbqbrSYPTyOLo2+)8pHt$2Io$@L)#Cg*O`kcf?MEaUHX3^ ze+9Q&L7N+mb-b9%yRiq<^$sX=^%;vxdd4fAe;)nZ*t=)(4;>tZELd}H`PLs`+POHu zIcb+i=KrX@?&d+NB=FY`U$th7lYi3X!^gvbA=M<4(I#Q%#tVQAp=pD<^P zl1$tpJwo@1TC}62Ld1|`fFyw?`+&)dx=O-n(fu{Y6H^!*T<-ciE-;=W#b% ztwm-o3gM|LqQDim`{P7UUMaUkkAV#}QZQKmntzWf)>{!!S&NQTJ~oB_**h9n6QSG! zvrOVfCtPF!-oS1>#o@MxuSGR96^5Bbg+g&IU0i9c7q8N-WEh;#L4`VUO?UmQ(>n4u z&YY!mA_*fpVjOZ#w18_Eh(9h|0jcHeh*rmA!{XqB|US9w5049KIbWgjANX*n#u7ii_GLxG^PQZ++j>!pY z42#i>i0^orwPsQ9kSt(u+SyM;Ig4%HCE!X||Fs z1)hzNir)DgZ*x#3P7t#RoKneWG~#PISHqG|VBXRH`=vqGCe~&KaqKw%B^A}8{Xa=X z|0QG5nHu~j3GDHo5U|>&?WQP#&#D^C8n{$IS&_vYI1t?K5UXWhEbu5L6i7e=k=Xsf zA?=`!geu+7_BwPL`gjXc8$Yx%9ZDKkBa@k}Ss?|dNqOor*l|N(lVC}^I)4Z%a;tjz zxc+TV;-B1nATX)^6C9>pK_eD3N=-`IWK%&W^31z8oIfa>6ako0b_&Kt)R6`K`nuyl zC5vy0bbqpC1JH}BnCS_N$Z!=Ydd2J{3+Mg#kg$7sGZ;di?y=wfj^walz%XYByZawq zHSzgO{){-WPCA^zO7Ndqg4|inO)`$kF;PC$YXtG_<)FNBU;=!Nt+k3GSJ}z$#N)## zM^FmH+rEKOBGU`0Q%ylI5r4~CD$6Rt%x6=^48t^zT`C}K?_u3MZc0+Q5GiIPOc;zy zfm`OXedyx%q<3X@VaV(*qLwsJoGMT8uD%x|xFKew+(9W-^H0uXkjaiLu~d$!Y9xa^ zMxSWgku`_npi^(TKcIjjh(y+t@u5nfC&SqHW|*GQiskbzj}6K0oG7sSr~I=rGd35v zb;pp2_iOL*g%`V-)mPtJCFRK96}`BxK_oX~n1!z0m&&X$>U|vUP$MUoQc_0qe|UhV z(0R_3er9N%<+KiyCkKs>q`;D)fOY4cQDr-fDm zUPr+-m7#(QG!Ib*+uz%INuM8TwUc;7s>riietr>?@e)78`>>W}E%^daaD&jf1Y zWBW&Ptpb+-%o0n0u8{|KQls%oeKR&I>GsTD>5@2En{p z0h#rTtZMI&%$2{3vYwyg!Cs4vqDLidLLG5bEA6e&d>QP@UIrm}fUsslEoHqzxdj~M z5mJ24Z%+H5)6T1k>ZnO68i9??bTM4j>XkOR%F%8fznH&2yDnpHx>QModN`A*QlG4+ zMja;qhylh0@;lcnZ%j1@$y0zEvD1gxiIh8BR!%vk4qNX>QGF>slw;pA98Mp190e9wRtTf0+qFP*JTXNuFT7^<6;Ep}(?r9`?ELfX#>?uTE`k$%HNX?SN(Ia}Sy|p1TadD5z||6p)?pugnTa5mKc_ftXGt++6M&B`E%9skI6dk-czhq<&>@mPvYH0F_wZiZm zAra}h6RuUO(-`U2tR;a%$#$it+-trkbvMbwR-nPP%%CTF#0$wrPv23^vY+C8W5$4-$9RKKGuPr6V6BdHg5Q zLUMg~&htXhK(}>CCRR>1jyG3Nj*d1?ZYJoO(&GM1ihF;JobmW{1TdsXKahq({?Cji zO_STWz3DsRKdyF@(>*w~5yH103&}6Pp&x9-oeryOWOlkoVTFrrtIfh^i$K&Xrnmy$ zxz0;>6f5)#p_jE6-&XW}-ScvSof}u?*HGb>wDblNM}2D6uYrdaH2jOA2RD#~q@Zf` zrzuy>o>Huiv@#zcBa&PSicVWAVfDYv7^ymZGR&7UP3YmN?K>N3IHIuRmU@5qBDDxt z4Nq+DgATlCcylhja;k0;pH%Mc{`<5LH6+$X75dy7g#-XNAp`(m_+Nf8ObwizO>7KJ zjQ{5xWLm=}?spFIZQJ)_kS`>^kaFsT)kL9y$+LN~i?9xd(+NueF%+qNh9;IUMe9-O zl=-uru}<3V&84#d2GJ`{WM+2qlwnrBZvC8kaerTjI&L`GES`=OX~rg4PZMV+oC+fM zm)qz5O!NF%qBz8qOP$<)>o4;MN$inTij_~sum=!Qr*=Jt9d&TG5!$3R{Cg%1ruo&RYr=S3AfD6dJC0C{=HU4pKA7S`EG{1ld6d$xxnN8#~;Q^szp+2h`+oB_NWa2 zq|T+yH%Ir}uVn0g@5ZcIdx<#{@YIp6DjJlCBGPz~+9}ay8oKw2#~GrOUTo7ZUi2Mr zI=X-QkvoJuCq96DmpEw~J~%!EK9vJj3)cVe-&iK7-F_9ql1T1?A>^8LE7hSNMP7R- zB;Nt6pBDaGB#&6Df7KjfQ#jC6E+yG-)h_YQf|@mpN~@}hRIY0E$eqccMg4&H`fFxr zGzb#l9uv8?)l5;#pwm3SIvPPi@U|N^1VYiDi3=1-d``LCy&RtAQnx%jqn`RRB=r28 z^?)uai({mad>JWBmlDYMdya)t=uq_V`Fp1}viE`#qfyh*6LZ^KwcEKV11Y96)OH80 zYUNro+-z@7Yc>*0S2GcQV7mU0>bJzNo10kQ}yvjxMh z=DB?5DPXU0;Eiag4PH9ct8pXtvQ(|C$Wo=;QqpQufdexU*k}n^AKh=o_4Ra2)Gf)Z3hxg{J1b^xTh$WWZN102Nl}F}1iLmq0axg`t{As_ z0Cnx|TPnwph3IjyR}rzG2bfGFkJ1!D_;3C_FNr7IYgD($8e=5aHj+1>`NG1N$xKio z4^KcFY<$K}^xxX)j~K0e-S(ly026h^4g z=mlN!GMLeOL1!a8j5?)5TWHX_WMa&vTQoDjRR@acmK{Q0!}vt`iXeU&Vtgc^RKZAL zXEI=1fA@5t;~>>^+laPM_}_%OaOnIg={$;G*^Xk7)H}d9q})vWNs)U)KpkXS6&Vp2 zwk)=8Qhd#dT8i}Dry+P$|9KV{MsG(NSa!*=0K^)O(*AKcAh!JX1KojyRe+CqUZDez z6^L0`O{c52ON=tSo1htTXVF$cf7>>gO-%0G8bspExFDlhF#+PHLT zHQ|S3W64<0!nZZ=(n1!!5|XOaP5lmAJ2cxQAW?MN{B<|C&19pQEP)*6V#JO$mE;r> zunyqeoTpm9M;C7D&yu@%EIhx`{ZIy<<~4k~beGYXYhGRvpzuxy@~Mvtg5-$v7`O_+ z=M=3;CskCv0~qk>*mpmgAJ@;vX_vu?d&`Y@uW`}<9p?bp5L{Om9!?y8JdYWk?`H-H zkOOts@cd1*4(^GhFlZWpZW2reUf8r(Kr=_S(hjku9hvxjYT6yk&{GsWjNAD4eY2D` z?%s71pfmMM6VL>*+w+OP(mDSlkHDXm6Q^fG;3a}hIYs$N^5~#K zm-=lO%uJzj&T8fnA_z8e*#mhBqDA{Pub~+;8uyrNF`D&T?{qiNIZtmEQA|T_sWJk3 z>v(GOhAL+Xp3~`+aoHRQ6rMf<5kVF#8fbNkK#)PTI_Y!>U_4?30uyd$W#7vCiay}V zdcY)VS1wE`?>+rkDy)oFmU;@1unCLjA)z2E;s_BFya3FcGw~GL4lt~KR~bLu{cA>! znNF5JOk|9~IJ!`A+7Dsbb8BI!K>Ip>Lc=#bdP?-np-S$O)%^*_kYpR4@tZSV6jY)f7!ngT3gZNL7fJxD0YgfQdwt!;Q(p!Y z2UHL-;;40Qc+LzG6WBDtY1=3v$5dV5a*Iivt}BzTkuA)9yP0k*J2OR`D*8 ziiAaMwwt838Jo|c@ZcF=35wgj~I5T~Xz}Jf1kuk|o@Su=@j5;n7 zD;}#jS`mSUGEpaa8oRE+d`Xd={Zw#wqGpg_nUpa~%UypKgXR~$1kd3Qbt3~Smr`WH z?v-A6mSM@R5F5x!tNF)(Cd;=` z{dOid#^(z0Y;XH38eOuqkbh@E1J@S#Rqh3&GuHBG2$!DM;&Ywv5yD!R&Dvo_u)`%4 z;)dIJlj+%h@tI~hb$AXjV=w9l&c|=58eq%DblEeB-GnkBy3^xLig9{Up8^8;yAfZf zQSwu)mBqGyB<93*Bh)Nf_f!cX;l-|82v{=C|0$J+GI!g zzSd)Sr65Ni#c|sKqO@+Mqz*=FDAo%^Y$zlliabg>>@N8i*PC$oO6t140DU=*tYb`H&!X8ZWL4@i`-x-j}=qE zwX1zD;@Ip2y&2TC^6tsmfiO~XKlWpzjY8>RZa%bWs?rbLf!R5DXc0jZp#?E*=@_1f z{b&1$mEUBDE}Sl$u)Jyl@_Kh|t2~dWQvD$|y4FD&jhQzXGREcc&)3(t>X$(QnX{Z-6R+mCe^MVw;>R|7!gbGf~KeeIWl&+TB!|9co3DCi1(mp$R zE7z_%Qe~*gyFh-lJj+pgdse5CzbVibcN zA7-%_mE2|KIaYPjoQ%O!j6NIWbtQl5w`loe8;4C zQrD#e>!|lzg7ud_+V(;PJw6x4J>XjkY9Nv#q}0Vj_8#Iv#cJJY+E8{%%wCAU{?0tQ z1h3OKSKWqfNUX&EEF*eZMrRVsMCY)&_}Ko=#%?7&MzXqqUdDC%S{Eev&s1M${{H;< zbgyh-BYr|)@Dd*^V29cq)(#=&x|Yn%#0u^MS+>}QD=;;2#@*$5Ty1wtv$lbzumIt? z`Lz-?tG|hyrT!B`ueHu|JpJd~qi6d52+mx)Gtgyn?%uCv^Rp(qyj%P^P8*W84SX~+ z=I#kxkXuciSnM)ng;zw<8FGRS*$_Y2eX}4|C$AfRd;UE#cM_NO3I_VF&g`N+1qZaP`*%d6^Dm&f7X)2(^jO-g+%XfPKtK4UDd8Y+U=^|WO5kdz|5&sm`itQUd)xHK< z^VPe9`@OF86+E2+hg1^IRIPZ^NlKC2OLId@aN74!O*L_3LOu2BqcuDE1%XQ-a0jQ+ zVyNXIu*Q7@|MwlT5rt42!FM!;>365V!UX`J``>np{|}P>UlQc+zLBC~z)on>IU+tKOeaj-Tjw2~&ApYIIoK0gv?14@4 zt(h~?Q>O>onTEuhJTJx!W4j-ZyMr?mR}ZKO(V|0HjqtVzG_l!QJwHvJIK5fE;#XPS z0}Ddg+Rl(e7ZAXt6!k{xTH&OqDPfV4N|>Id4C-ccTMxvnRi_qOs#t#Z_kP~>pfEYV zU#NBfQvSl&KT!0wKP~Ap>vt@MA?AG6GJV&UZCYCtZnmNrWw=+#G+NWj&B!PR1@dI} zSo!Br0?y=5slV`HGf{%qGmlVQ4?<Gk@K5ZvClJIe{z zhUmDn{&=#l@<^|(TkTDw7#81|jD>0Y;7h+t8Y6w`5>Wbr$I*PK$FPFc>%1T);D-0Gs3n3SFRml$rhX;o5oS4OSd}b_T%8}&1(fD)_<6vW8g@dvlQAn?uSm23FzP`W0 zGU*8mjghzq1DC8}brkrcrHhRU1<^lFGu#ocRPuXL&uI*IREK0|5-5c=X`#cocuWBmJVg)G zmRRA)JxCW+#Ilo=RSfh)hFcQxGEfuEt3k+st`(9zDT*pa(vX%f_xPtBUDZt*D&y4f zW$7AT(r#$C{qndkbWnV%G(!cUq6V^yp+Nj3pqO%esR{#*Y5@ilEqgc&OfMM%Mn5`c zl_@VYPbn)kby_`75jcZy4`ccbDIatC;FyRXF|xon^*F9rtF;kVxz%|X`>ZTQYo{bx ziB>@=%ddCwEcL54&cfYNzYT_>jD&w_*L0C#Ga{aUZMsO;Hjoarqn6kF-Bw7K5Cj;A z)Qqrk#yeY2@M9WoH;^1qqMVat>$mXr$c%7(You+*8R<4qXBxfrQPmNf`<9jx$yo7V zCJJQ>)y;$`C={wnzvz&sz+MpP5ZT?ePR-T*Sf3WAoL#AeW{fGQs;?`0{-DJ3>*d;O zJlhDCfY2Kj_w?^GkA0Q^2`i6*)qnC1LK?rNvc;3j$h<9DA182o9dGnr9>=m_%PaVq z&m(F4%A@3GEY=QbPoVQzlJ!HyNz*)6hV5dFKbiD1J zk-!S$J>GEzjR(s^FsY8^FvBW{S#8)#VMB3Ue(PeT9@T8s#}>(r$=XlMtU#diz!oil zLz(*$LqrPGst!znw+g-7T;QKGbME}%GFJv^#Snq8lU&R#H2SlaDaGu5iX5LA{O5Ui zY>Xa*_O+W~!UQQal~*kS=qd^@v_g)6UGXKxjKY&Id>fd`0%N^^*`|dgR5WHPp}0o< zGnk`j>2LR1`=Mfl!ee$u%XTWGV-r5BKWf&Y|4{)z6G*l$EQu&`=uiVSLSs4pvcN)V z=o}2VGB)*Q0NE5yUG*+;%bb5`^nG#!@gGLEK_plw(R_-GU`r4}02%9P`0ZGYKp`+_ zK~}hJ(h{8wzYpLF=unV;Q{Gj853IHKt)I!;J%^!7MXOADVQ1_Z^7n=z``8R7fYi}mw(wwc;q#ju?TxmtrlThiP)i7cE5xcUecebb zBcT4*g8|-;hr?cNxNii=L@->9ai*cUjhZW41#!{}1dR31LMIA6rnz72yf>@iv~0Y9 z61&7vGUUXM8P|aK7-S6v6Lthuz0MMJvv(tdEMg-mGupJqI(~ak(veDRw<5+%O2Z`v zhI`)8H|^-3e&_|PX%G-z4Ws{r*pp-x!c|XKeztBish)I}|Jg6aEeN^z2U_e*QI~~t z#afeyj6*nIRUz-~I1Mu822OSMqgY}xE;3?p!7=kWektena$*+7Y?b<%NaJ?lFQke; zAv+y~I0<*D6i*$@qv+gbTAoJ`@^X3$9`v^vc|HZ7e+<^m3!$=CzCuRDah{C>QvZv~ z<~ab{q0`c+VsvJeG2Mhl|1IZas5c%N6f3BJ9#AnQuSG5u#K?A?R=oqRLim9ou8v;o z5QnIwQ7IHmb|6TunB%=F^Neo3^`komf2}1yFoz?geh<6L=0lt}!~J4(Tv@e8Vd~=R zB=Q3+2H=K)A{r7*ot9U-!E*NK<*OQDE7<`MV^xYqG?53K!LR-~E;t~L(g za%p3TKi%S36poz?eG)%P*VM6qHP+;IdMcK7MHc(~U9`XO4+hte#jLl~6+mz#a@}gJ zduAodM3$lrG1F>mD2G%@+2}$fTm);w8pm=`L8TSBQ)@~eLm zR#A0FNLJfjqW;n}OF)Tf4TcXED~|I0{}6UfF`9&3x^3IGJ#E{zZQJv;ZEM=LZQHgr zZEI(*_RnUMy?AdcZ*nSC=R7*Zr^g;iO#XW$vor%ym$`x8T_n?dAdCq+>4s3pF$Lpj z7120x#hvf$V6fwR!mdOzZup7`bNxzWe%SgTVHd?qC@HiWu`f>ZVF^yQTF^;9|IvSL zp%UwcL<7)I{`H>=|3+21|EK?Su($X8U-X4Twj|0oe{VtDnQOe+*qV0>@*tj(OnRo~JwG&sRkS?is{LtcODBaS)$iw}#vDV$7+R!=(1#EWVG} zZoh{U%;%o*ef>IVRo-3<^vd&&oVpK;Xw=%QZg#3dM( zPg@;T*@)SgF;r$5mYU?PiQv_SvGpEFu@aK!3iCfC=+b|k6B;@U)sh++8+vFmaa05B zO+zkr@EIa$+Ksz%)v7Z=HG^;N%?$`N>7CV7Bc0~1Udipn7UCBgXzWdCXuxZ_C)B1J zqu(rU_#q~-gDR8BlTMrxPFt%Bgnimua_uzXg_QR835KS5XS1eT)aosqy6i4WP@JZY zdgv-}uEwJ>v(+0jyi(E8hMrq`D!>%(fXB-a$ve#S`6m&HgP{{0Ga};sEI})*3T0|U zW!_Og{T&+Ev+_~KHY%1eu0{RiXFoY--Uw{r%p0|6TIkwQ#@(X7hwWNS{0GfKxxWIZ z^BOv+S?hb~BI!&kYSseQvPdv-@wsk);Kb?}H41t_itQs}Xz0+%(V*Ih^9wx5HB0FR z6br^WxGmQ`v430()=5(_^WGA>KR*tFC5P-tCNjH8DlpZ+-A2**^+=qw?zRcW9suOS z@RoTh;!)Wa-Na$?8stO)TuUA$L|Zxvv>|xU;tcB!M4z;(wdEL9tHmk;L*QagQ|O&N z-vS74+n`-dJ~M^Y7PwD}Vf6a7X}GeYffp-PcfUUR5cT@BdCE9rB&Lfvz%uXW+`h7W0@FD=LHPqRl#gfe^D7k*M<8IX#~)Wj-d$5Peu0`rnstW(|D$lnmguFL2f!@22Syj($FB7*w7cG_x&f*sV?nK| zJJ9z1YZgbSa`uIe4_XM^9eR!;Xvw=|;GZEWxsMwz$-y2ptqc|;-mMoB@J(0O!BitQGu+RL(1n9nNYtZtm^6;Mfq!JMABp*M(18qbkF-O+>h#c zyCxnOpm>C@0OjU;5&eeN0P+I8O*`WlZdOqGh6XR%TRDVrQ9txK*ahm|Vh2RwR|1uU^wvy@VvHT8t4fBmPoh$SBal_isr_IXnyYy;TvTJ1jahWSJ< z zeBJeEi2GHrZP{y6EZY#fveg99)zYB+H1_;t?2>F^i75pHu|$I|BrO@P5GT@I4*zHv zv~)Sz5Bg6r;pWOc1!3R&CjcqEzO+SC&*cR(XS+w^<=41Tx10w6*cO^Hf*mv`VAq+0eOP%Ks30@Ch7^B}z(VLM+D*B*j{%_m{?LQ>%qlo&_UWbQPS z)%iF!{i7&k$-Px~c}Yz|?lBsa-_V?w6wDax_pPHXEvqGJl?FU8FbJ>EW~8oA?IeOd z8v!8yc7XR5N0EmOXt%MgJKl>_^91~`$lAmTE}v+3LQJyYiQlekYKvUjSoq|!rbfeB z5J`-Obz8@K(UE;Wa|ZODS|aRhcvWgfMlgC+#MUs12L_H}l}cZlif+#&e{TLUR%|eR z4hH+6iMA$M(i`!>@hYi5Ns9}G7+b0EilaqbcM0|($f@t|jg5&~p)?QR(Mo1v`@H#% zF@&-VdTOkaQss!yeEVG}7cg)8%!ZUvBIxL?ysIGqXqSytkF^K*V zKE3W!U+LOSpP>*&Ld*j#ZYnUIE%M{hOZ13W+V8ktAa%6q?cb2$IOJxIFQzmGk!~zK zalQn^L+gyC%FdIcLXu9?!hV(Kq@(v(r{3F{fMYH{=%EXv8zwoF&AjJMv&y(JD6*JPtNla$5< zVIfc3IBQU@L(<4eHc#4pvBm(?(1E<%r%FJD=L4?+iwDPRDU5+0g=8VG^dP<%LrG$p z%W?F%&uP4J*zTHMhjrj?vhAn=1cuGA*BwNsV0=g}mV3Lu{^ZFhk>{#I()~7n6}HgrjFy^F&07bZU4I+W#3@NN957db|!4OdrrE(8O z8P{H4)#ZH$N>WPD7q=H{8pz&Wqoz5ZOf0X$;N{=nOYf0Fc_J-}T+Qque6t^cCrpH4 z^Tb0DsFqJBrkaB%Ua|J(M4hk{CwAR}d-Hx_zohzMmI-8seY&yzQR?K^t4mnAr8^Mm zP}}x*(6&sb^%UQgGjgr{+C)%4bZDB0!5{7Xj351W|5E>4zfk8Ofnlc-|C=pfM%KPK zBV_r(F$G8}_9S6lMNAcOzuZbo^;Pf(|5U?g1;y%MAh!M8>S)_CQ`6OsHdk1g%r3Ei z(Q`%Q1oF_QoCktBs~a$fos+w`?0YZXnM-pOjo9MoiMap_VB2mr4Bj+pYxjLO6zJgJ z9W%M}D2bJ~*}5OPu*C6KyA$dQd2Nu@d4Ff($c!_!k<^MWV~zzZ|6hPgaclkKy?T+5V>~{hu1t$inPDt>|_Mlk&k#XrV8> z!%`N7&@;GgcbQV@rrFH)(N-{cb@Ensid|jZ#Z7%>RcE_50&}&|^+D`p*1@`H5=&-~ zDa36Y?S_#Wzq_gB!C_MiM9Ru$}WMQv2cyb15AN$I2*a8#%-+ z`1RN;`F>|EVW!H2Ds4$H4$WD39X+G;`>sH{zOMahjA{|dfPi(ARoctm9?dXn!oR0* zcM#Q_&38w?S{N8Z&egKg6$!GbIO9m^?$r_zCnov-rBJS7Pr@Q@2CipVk>y|3b>NP4x}*u%P4 zx*z*gC+vjXkLvx%ssT7y0&Ww&PEmCC$))^({--c4$Sk_JJyDVZ{6Ikdioiew|7T%L zt(^a*1elt6{D3brM@=_HDIN=7SN}<<=ZU{J_Ra=RHKbj>Bo_j0Jc7}ic@Bh7WFryrZ$GHdsaPg z%QC*6A?v0>%i5*u!0-(=Z7nA*!+o|zv6V0yCsH+LRd&6x(|-RDtfe7yhHc8H$T$oF zQGFJhQLTc4e#EZ)-xAD5XxgMGXp2Lwnb1aL3oYj)eF`(~$=rYUqf{7qvz>qlx+~B< z@?|nScjcdyNi|)s8ZsVEqI|Sx>Fh!iQ8TDbJg`pGFRSI!$cxOQuo@32?Mc<(QV>@k zX3240zoDW|+9#ui=>vQ!^(K;^NI~b68SF;X-iO`WGi0jXcroKeb_{B9$h$r1eV(KtCncno1KVkQI!{re&}y7_4ay^(k9KLw zisqs03(QfG>&)?e26U@)}JiTEVYR?=j1m1EJSbAAlG~ z1^a{cZ4**Ta)CbxpXnJO?d9 z_%LjSAbk=|(_nwws93|OmIn@#SdCo&<|8h9S-_(NGEcM_(>TGM0dUBIWjWNwIZ=o^XQ9jE9HFhFKGnUuf5{ zX$sxMS0yl^Ndkz+-#3XzGtHd6jmYqB;oRgCENs!o(;L@y|G=+iiyrQ}BdnplA7Z=m zUsQ0#tK~H$?W5;WT!o&B!BoR)XO^Tjz^k8(BRR^My)+ullm@B{&0V92(Cte_kpFcYS6)v2moIOq=r6eMj7f17&L3+YC`j#k$YaBk;5D5RG~0 zIyfSokgP)?~f;`J{mcEUjKpPJzU8J85;)uID7hpCQAL23XrN-7xNi5 zMi>ZM6Lgb74i|8v#r6EP4-~sKp(H7H@({7s8=n%1-VIcM%F2)am5d)hHuUj>l70L9OKzOSrzNP0#o<=p2BH(R@y+HmL7-*`p)JO1-!#)^U(EbnQEKM)bkwNRK53>$jE8_p z?$Ek}f79jF30g(GDr45jb8>tuePv*WsltWnZ2@te%_ji)9*XfMDm&+*7aM?e??rur z^c}v^VnWZ`%7iB5%@@jTeMNOK`m%vaKqNi3ti;CkiZzNv6925k{E*7m4%|!+ibr8n zhxd+8<__3pV?5QK{|zE9=^#9LET% zlMg)n`$<|7t8(X`NJ7D^oDe1&jrSl=oX!gD+N}gWeq00TAtwc9Fh4lme!ks6&N0^+ zm{Dp@4cC(XZIZgNmU!^No36BTpDJy{tv|K^=t|5w^J|+HicYO@9w#X&vBqm4Vvkk! zD~98&8}rBGBbs_3;xE(UEw^q~<)Opo4;-gHg_c}kpEoNPRAcY9BK|v|xzJK}V~`wB zhy=T>oIL`PIA{*18p3q4jI|7X8)@O}q1Kqx8=r%`htM4IZj#HB%8zxYL+!5c$X*_~n#q>&qrMXVT#!Qla%!o0pF8OG;DX{ z3pFYF_F9@l$x+v?hDccTlhm>RvvZO05~4f=^tYf?CNmB7hN~k;4<#-m`$xLu;wq9|j7JFtFC6#Gv zO)_1|Y0O&@^eLYM-?h$bmIu^LrP!?j&a> znly;w%u8S@bAd))9K1Y_2sFgV5vTPyx=mAkcr2w;g|#5t2z-Q5ejTdZojyuRpV(=# z;RiLxmaN5s)R7`D*8f3l#qs>^aJlUXSorQ4%>FqDi9=ZbJHLM!^tG*GW4*S6?DJ#q zaN0C0|6ZEHK?u}A>G@{LeDF`Ok>61D5A?2;(9{YOx38d5Rb3H8%eY4|S!S}5)mA+B zP-De$n9UO+8VYU1;yBad-iy#ek$;E$_t^f$wadf8)}q6&>(Otg-M;yco@JxI?`O8^ znYeqzlWk{A4DO;=&4(E2ac@v;a>U3-)V=dlFj`L%kU-T7n{hNdLDd!T5OM0<);SW= z{OG#Q{8FLsqSgLpEze+?VC1B+OW1fuQqNMHi3Z$RO37vTB#|hbl(d|9*D4RzAG8*$ zyrk+uVTi9h(jra?^4|>Nj6>+|?gXtoK{FseZuGMHF59LErG-xX#RM^+>SM=r-}+H@ z7~dyjIIe&rctk9xuICRhcH=3)`gZmR{U40G?-uT$>#2{PPi4g1 zs<2Nnk7((2B%=125cBI(0SH;bDSmCOTQ-KUAkS9lh z8R3FZ!Bb`7BpTi)0H1ZMr|%a6RhkxQYX-G$(x8dz=gZ`I@Q0gD=jw;y zh0UkQOQAuQC1b7h)UuetA~AE($Sc%Wd=EkOFz%!aITa>cojSK()Kyyv&kXr>%lXAb z5GNUED&k!|4OoP^H>ZE}0f#!Qq@beNkf3eLJlyKfuU+hWv~ew~HT=oi!l~JLVZ#Va z@KeEnE?!yarRJ+=jm=8nh4q9i%O-I9j6?88*X}sbFo+UO2t@!4-5uE5!5Js$`7wG^vz|(?3RNZX>)OVTJ9Ko(3({Ixu5aPYo42)-536s2D=771eX1W> zvW~mRm-cDDg*XnXf8d(!=hixJ6?htMG;jiqaiZm{W`HA#y@;>y8>V6*q(oahsEIze z>o3Mav({WqS4S{}|Aysd?M^^vWBJidnNWXIjB1WZ;lwiF`y8W zcn+jBrEz{x*t=UUw{rBhg?xW*jVUv4zPmD0FTMpQsfjbK2BV*W!85dIC5$yzm>sm4 zB_h}bZ-q0?*{H&dH8tNaBTXb9MG(qg^nf`hU^Q#1osr;xAMyS8p$zd=g1mimLvYUsDL^=x9n z&VwLj=vGRI8FjVIuma>kE+jFCq_+ypq@S(#sw2@nsG5iA)qyNs`=`1iAb^y{!IbfC z28l*BNKhVbqg+Qq&JO*r@v?gL%RT^)*z z{@TPW)zBP_VB%=L=|y2haop0^VKjs@O5QNXD7ng%ULo5b0W&&3jS~b#kVXXTSIX>8 zC4FE~5HY+me8tbIO&R;(BWQ9Sl-#mw0U(EpGYvSNdOWBTG>*KjsbEnMxBgm>gz-NR z=Ua-l{^}8pZJG2Gq`%$RX+fghM;v&D-~^F|J(oe?dX+gwoU`V^EiK*L?`r#=F{A!! z*lg64UHi{d2~uNFh}Zy7DSY1U?VP=7LFYZ{CRf$u?ukYcAXR0;o7# ze1rAztp(3#mcoLX!YKNQ=VIF+)z+ECvNUX!;@@D|-k*u34QYGG2 zEg95CudEV-iT)MlTsr?c)Y3IQ)$io#ZU0NrE6^TxI^ktpY~H=pebUM^>1%xSEeR?n zYr8*aJu{7Pm}VCQ-wSPsfon7fP)m^{^_vxpR~Qh|F3o0Apl0dVQ1Zp#eKluF8dN$V zDdMx$Myt6!1HCvBQO~wMz?0}zs^KaPd2>{18MPwL6^p@n*2{19lR^(uqiR~~BYfaeAHr=h|111*OCMxwFDD`_Z1VXnT zail;ucqduup>rss0!`U=4Y008smL>Nh+15A;@?!)bdaxce1r!V^i$Er&NoASzy75v zz#|4&6`BNL3Dqt%6K~KU=iJa6J$WglMU-TcYQ!buV))T+T44OF9>c! z9%aQ#gM`+b?DPa&Jlgz_Ei+dn`61Nvb05b2)AtwBJ?+Nk5BfJnNAk}Jzb8X%FiI0G z>mQ>`j2uRNBv(o(uft%GsY-!mu;sfJ$iI=ZS6Nn9LymtFkEqkcp;ia`Z6o(;>bB6o z1QFBUmuoF;UR4%^+`Xqz=HxQz-Iw2&gEEjPoW`tKZ(Xya*BrSpQo-c3;or5*{Knl= zvUodO@yrr-=Z~C-WenkkLI*2xhf_+?4KgZ-=s zG^oEAG_u*46lT4keTx{J&o*RLQ^M?0+;cz#Q2Q?X7!QhkGcab3{0r(D36%gf%D#cq zO=(tlou)uj(=?^y4W3!Qttv>Xa;Q}nJ`;JSp;uaj5*fYwox*=EK&}*B1KjUCsWS4X z9B&PMVna4VbHn|B+O5QpaCYx5mAj~84}j>Rhk*J%XcX+x^`HZi$w&qqvy<0$F9<9q zHMNFUqCcD23|b|i6f+S6_g>47mJ$)bCCXgp>?#?$HN=<5Zef!GgIywnNIlQRD_rr1 zP>3-YR1mnAD4rTcN0LH)p{iy^q*?g$H)iG)nOu?Vg07YaR=vcUWIC^zRW-R#8D?g$c{5Llw|IeeZH z5L$X+m4@PrR+?r_&oYrJuxbBC{KQYoYlZqPCIPy>9u-GZf8AsD6(vo6Y6$u=8XP$6 z<10*1#dzEUB)377-M#!Zd%x|CXdCws_ zyH;@9!x)iLH5hju;Rf_9X6}Bc` z32G04)HEw=1)|JK*)ZANzvyHY^krj$?W}6pqz77n$YIw^D5KeCI^BoPO)dV0>Zg;@ zHUO=;sy_KFUMQ=%R~zz z_bB`jeIBvxd{DoNU-HZ`!5;U6to5@_RQPH`@XqqXy?YOlJlTMu=pMiEQma({kMmTE zJCjjJq-K?+9>uvgp+bxJWHoUe&)kOIW2=b2sKD?G(-C%NR!USwI*=a*`@rGLMY)~_nR)Fnf^`M9w5H>+t5*>Ih?H%!;a+BT|IZ3^a z*b&J%2&1x5k{a7o1?J}0v3O)Xbl@&y3e!32ZyJ{;pXo072M`ySd_cY%zcut{ulXRG zbA(*CFw@&&jPM5pr51jzbo|?L~8&mK&T9InRDy zAr2!Hp{g0u9lYi;rlgp$H=2?8>4#6B9=1P7Tr$SNq9sHLBA!IGC)B4(SFLiU;}A<> z=4A7sw2J6(H23B3Ei~wP$+Uf?IW{v!B_93u6*tw?1BNZ35rX{7)nF0a*7)BUQXzvl z%7rDFK_ILM$Vf=qCdT8DJW8x7lTasHc2pb!EcOl8w8+Yy{I0{}E={LJ->q<^u;cmj z*yi$%4=pO5%I0Uq%0ai*$YIfz2g+`fTj0sVbVB0vonU z@W*~7OvY<-q-XVwHvM_$JX&?N4%KpmO8x}RQNft(E#XBvn`lIW2BhYY%NE2F)(FU{ z1;zO|ss<+T6~l!S7dQ0-&+pcDo6IOPjH$x|eg;NRZ)l-+7@!B_nbW^m;VMiJF2ilu zK#fjeP-r0OE_cL*{ZR1P-paCH@@cVsX?Tu)t=F4)??u&CO+U(%(crF~WocJ}9qlzxsuq?rR+(zNYQ5d3!lJiYC=Io;9FjT`axou8J9>g!aZm@JRDcN(!WKNDq+S*vR1p5_fIjqONWoaOBOiiPV z&#sM>Z|NmplhCa)c08doKNGqnFky?8b<WISBy$)?-6l=bn}jaCWMcFcbi@OAUGn zH5t``jex-2FqOj=GT+@lkxnT%Ca=%$dS_yBBq}$3?u_Xg%m+!Ez*VoA(Q`<4F6o~S zYt++oKpHt^FL4W%%De>17L@h$uPQSo;80$BzoN9f9_x_zgZHhso7ra@70+%ie(xXq zIv>}zPM3`p5X~?qgg$?d#Q%$`u0^@)0GmK*KdjEIc$CXcu zkE%1htSB}4Ead@dlhSNbI3Gr(9;Z(o*Ulay=8H;cdAE)P} z^zmICT>u6VLW&rA?jI*+lyExk0`jhkt?A2eP6^Zip%g}b+Jr{vQVz~4#G;us9dq$P zTVIM0#L&_*jeUNlVc4zf!hUr<+2%!H-x@dS_~Gfh@YjaV;38|xfI#}$QL@-N=-lQc zb!ADE_bMVD#uKUBvww31%>D`WYni?>Fdc@KAL)p#ZB_+L?k~t?Th(f11wf@|6U`+( z1|fxuUX~1%ZEZjU?NLeVN2#zQxAzy$Lbhj)a@eN`G=8q!VpWW7d5~|Z2;EHYs~UpB z3V1M$;H;ggxCbd2Bszi}>@vm(xBN#+tY0G6c(zQ(dLxE^(e&D*xzBq!CB-K0mV5eQ zz|y@gaV$OhFb+ku@+{~q>b};wEOIXSyX;drKJWAl=Dt^3Xx&jZaxR2L4BJtKtmPf6 zgy#kMGh{B`z~Q*vJ{K9FX3Rpz!JhqCQLr>f>JIL@kW-b<*XXinQT|cV4f#V*1lf>Fl$-~d z`qq12A{CA*q>O8`M=0UO%x2@v$B%oMTf=oi+=eYaLadMtsT1Dp?6m~9Ud?(wLp`D zh8SK8`}*$rw^-3;h?{j-ZSrjR3rGcLNjefwa>`B?3EK%AD~{&g$FeR!AS^uX%+G0{u3Cb|IEl9qRk4wvfAgt6 zD;6|g?XP3nX0$R7iQ@(OvQ6MtRw!w?g%Ir>%KfuvA3Eo2PuglCgcwK##p;``pY|W7|@pBybIEet~bDDa_;lEoteqsBD)2Z8D9k@SNM6TGmUzH_^QStE9+Jsny z>FI!!eeEPn})lZpfes2EH~LCE2PyS#S4tyFXtp)Jj&DC&KkD*wyCU=&7R{3BBwoq+}~5zEwnq z!3atih}jvxITY>Z_uVhs7HMn*T4=;81TZncTGiMe_M(?7fq(jG{qE-H^Ah_`u}n`V z)rv1HdM@wJ(pHfi3pG=ryk4|SZ4bmlhjMIZFhrZchT z+v*re^SHDx0T&g0EX4F*>#XbfK&+CCyLSM*y>@nr>P$TYTKe4)Ue?6J>;yH*zQMO9 z5Wn!>E^J;)o1F+|*~9F3w2lGu?o35%l&VAvqICMF!9wdl z9>r17#7N*naUN_{`*fSYZ(U<}Ygn;5VDv^frj*d&ErRUEf*b4&R(j~$yf|?700X!o zDX=|BxJh1c5X)PAy&#P`Lb%EHqFh<1qx)~kyf4(DpulnQ^5zDC%PK07b(Pq&zBTZY5&j*33pCGk$Zr0w zciQ3q2)I4XlKXrPxDMFzZ)_J*F}zR#3$Z?_;`NDrYn^PwybqM3fEdc#XIpxI+oEtd z=M&c50IG=UqcFj+i1XV*vVlvTn`i!{UBUcDuCzH+VRwWeIi&=WWi%!7kd(0Zu*``# ze@rOxcLg7O{rxuQW@M}vOH2zq(_jTJrAH$vXfV-wLAV|&RNqWRvc9O2(F84c0`R;A zD~So=xe6K8yopt2DdKIYR{VGlCgPEEu;3N3K?05y#*5f|EBV}n zxk|~w>vphM5tk+Fh3+sC#5d=AIsE;1!2wva2tR)XDhgRF%K#W|XY8bub5y+s)SIypGc`ZzEyHO^>;QQpo@e`GQDM=gZfSWssH%lmV$~~copsACh$beS939i494J9wU-j(q7MWCuc*cw7zs{kQ*-Z4L60@+*p}LH$ z{|;~~w+N4w)_9LFhllo+Si<+@Yc~JUyezXdE7Y%t@TJ5KhG(R+$Zv$MpWeVjPQ%WSw7MFJl^N zYb1IesYekb^caLCN;^7IpPFpgPC$+f?fkGrTHcca9O-nYe>*}kS_rN~io)?4hbk!{ zh2r90cjD*)7;XfFKo$pc7(Lmn)ht$OQ#gk}1)KhgUzQBK%RX7?48(-Z%K!2tUXEQJc3cp8C1l}tDf18Q3Kz)N9TMojK%Cmfp6=s;Ht?`n`{s71s+RtO5G zV=1FTOk06AUxygAlI-FgQfBn*g}#fYCw-x-3rw(yB;emLu5nV6tB^?Cn<*y_AdHs+ zbLgY^B# zSqT-Y_>U#c9oRyQuoTglCuFn2x{`l}>G`a#%F1v~h22WQrsT&J;rdF)i#Jeq`%L$#G$ssNK0CBYxIo^j7Zd$*7DOE%V(9p7@LP1x#jy(T5QDR-s8gyiajBJvN zhAQrv^7le?E{F6{z6do{bCR{hXl2p={EQaAq(^30%%mUAM$AgFYB!mX56kGp-nZ6K;8evN*A@ zeNf}ZZTJdJ8k6&q=O-r$& zMP2II{nYhkG}2JWv&kEFu!2r3p*f{jV+3_zv4luPHw8VnyBX8fYt@4$O74q2_jZU8 z7zIxJxNh(xu%hP6Njq_=8-JaSAifaH5uw~HI)J3B{tM;Uqf1|r4(qX0ttVn0BZ{~P z{G$G-z*0nS9mVxQ;<>A*0@F{cTf3@XBMktVBbYU;#C`j6yZa7kGBtg8EUvk$(B1jY z>Ts1;7=%}MUV$wKXm)mK+!P(T5Z@R%Ha0YjO+ulDv!FS@9IJUv?i=hr0OwL6%4$G4 z)tClwT!P9M#ptF~F>c6`LNw#JXwRfyq$|4xKA@N{J@$N<%Ubfmg}2CL-bzOeW?(8; zL(4Hyt#Vknl&Xf9bUMN|fRWE~TF1W|bTn8Rj!WE-x;7C)x;=7G1Mo)YU1EeV&Df|d3c&JK+!Ce!sDVFPea{1NB31)H6isjQ0L8jrnE2g z{JF7rovxmTa|$QhBr_(0dBL%?T<#};fnMC;p-3%-DeEy6CR9}2;ZX9kf?6r` zxlzNN!9ZPNk}xmf?Y7C!(+uWW*s-k+u8EI{2?DH%4{jIkKjUWIs^Z%pHc!V=3-Q&2 zrINk(}mbazof=2nYqK=!t$BQ8Wqr%Q!Jr2kPu?;EsozfTLY_5>4Fi^=V6 z)634wkx*o?>yICs5RH*x<@A8sIK6Ys5k9g4H-orD(t}KQ6&1AuZ~YWXRUGUiBI9564!z{lf10 z3CujBpLWGq(s=x6=&v@L0_ z0-;OJeQLM*9<^L*SMO>v%D9?1BpRq$Dbls#R^Jy@rM74L&VDwVqi&6Np&fKSYFXG5 zet!XHOt-)>yB13Y0eyQ8G2B?i?(22?b zv9h;=ZKt4>&W)xE_YXw9^f-&)Q1y2J;9!6k8Ok-7I%^bJf^X-4V-Iow7X{0{MG;v;T~|S8 z3}ZVMpNyROcsNW;?r+&Ey_ksMr-fbEAMg822xdj7;orPzDoQZEgdvgXsX*~!PruIL z;<_v~sR{uTPQO{}#_W@-hO$ruqhv|dBYZM9H5%f4V8wMtf*e~Y>Q%O-%?DiJf~Nld z?%O+pblHtziVfSixde*(9oB)jHc+VU*xGDx&l%XP2gLXnYrA2dS!)CsHr+sMN?D1! z-(%FU|JLM--Up&!qtS9`mS_a2qSVDYaS1i`@MG8i0|Kk>9pf7TOCSVRrDEFA-1ik?mrBfu+F6NM zHZ@QOhKYeEPeni%m>_~F;^e?m`&j|` z#OMoJfj9j5%CU#APOOr)cXX#OP3A#rCM+ zke1mm?t;192cV(UUqSh>N)^F~Scid&P8)c=(GLnA%#3b?*To`CK3{$oBg!`*6y8_` zU%F}J-eyVo6CI*XZI~}a>8@~C;11ccC%8JwW%dS1zBr^Y&^SPW4o=$Pa{l*=?c+Vn zGxdwzj$&^gq&-=bn9ukJr(~o@!+*kYb9O=(70ac?n#WR;OU|}AORamCNN?SQw}eFF znU~GNUF8{f$zZ?^eX%HCl!GryPdt1Mq}!Tbc(m1k2iJAnv76Vig^JN)uLArza3biN zN~a0s9IG^>ScCfCen%R-tb)dz$W!7ZwLb;t0Ovg}SE^Kv##aDw z(kF4LQ>T4M0$7tB_2M;~#Y$jPws#I2SYwFy1lysJzkj;24*$_ws`I4{Os+@Dml%y3 z>liAjvSj{FO*C7WhN;J06GFY4SE`@1wc~68R(YU9A~L@5<&jaTh%w;JY+n_U0oeCd z`Tb?5B>ZR@Xa{J$Sd$oAOxkX@7WyBec-~faNODC_&5)_1@l8`etJ=)n#?;M#Z z#3DhBqp47dNz}i$>Z;AMCAh5R2C)sFgz{i{Mmwv8x}%gD`q(&Gz0&2#gT9|$)jdlg z##r=hsxZkMOiZuMw98f;*>Y(x$ul(uexct7|9B{Tbo{|=ej+9s#iG{-x>J+-FbgCl zvZ1nILn388HG5ZF7gG_3_?Ay~T4rueuBIcxR(v#IV!*s05l{s`asch3OydzsY`F|Z zu_!TdJ}VM|;VLJA$-4o~FcmsUuK%Pe5d&K8xIkkKQ|~TwJ_f%Y|AH(;F(WfJo9#>v z&>uox>iBc)?FoZaIO~{({DV92oij1<)f`mzY#&?qZ}I=4>m8dj4VQ53*q+##*qGS1 zC$??dHYWBH+qP}nww>&(x8AC?Kdk)=?hkj>)z#f+ABSmLd}T;}#ug0DRv;_aHcjOf zUaSrmLhJ@sd4Ydfdw6V2dAZVh0Zs_pSkeZFolzpy+8ah2(d$h04v%ztOZ}qZSZmYYo zI%e&HA?+Fh$3>u&wN#1UNK?${`~t^oUx0Q2&TW=6MSX4qc{I!_7S-<}y>Lk$R3&Jw zlmzAJ=;h>-Yvmua0?uUoCA8Jz#?C2dXr#!oF$3F_=JO~bQFzqg%R?%!$g^MU?hJ{w z?YlSvmW@<=8I9$T{*Hh`rZO=AWYzZ8zUNt=@BHCWTp|-vLa{;2DKmm8~ zXPw8HdGmU}Y!%^1FME7O!RZe#LQ`4bzR7i%%XaIk#ur*0D8NJSfyX~Y#aPv6a*qy2 z=dj+YiNYgPY}i({REh#0D8f@Lly=+f6a81Pl|X#7o z67!`C6*eya+~(GG_WAJH#%)pvI*ma#nJY#n-6f8etO-V3e)rH}kDRYGK4knm1q+r! z`3XNdIr((|6d8XlR3?ck|LBbfO<#dhkSKWY)L9h23kRVF1{Lj)-jWL*`&5X~2}&LA zb(DOQl3IBrPVtY}%l0>g-ewwh6_u;*%=QRvW2~XAvO0S$(rW zBt3t3`8s$2P>ctYELxU!8ootGQm$j3x36%&>E9i{efK$8G!$juefb855Vr|%l`u&g zR$*Yf`8K}C-@g(#HI{;@ZhuoHl(Y??UY!_&AS-9_tGJW`&*tK+fO#2(ruE`RC(&G8 zW~i=9>o~AGR6)jUxV2Zgt29UO4N1?_sHmGP6fJ~tN2M1Nt(|vox%=8-x98yCAM=`| zZZ}S3jM+#13fW^6L5Gcj&pbdjl`QP^xRIuW1XA8@`EK*w;M=&Iud^-Am#JB}f`0r9 zRNwAc*Iuv67%j8R1|iK%VM^-u`)2RZDI}kTZmcu7?IOjOa8u?MG4r}_%sHS9N%%Fm zl-tNq=i!<)l0>d_B1>@<-G!2Qk-Kvuv2;-pN0Z8!328G)Y0epGq#ir$bnAzWOU?~i zEwWM9=I{rPW(<|P^kDH=5s?@Z!s)&-a-zII9=vA)x!iB`S0@)2aECvZlAPH|9g5Db zC0j&MQgY~DX{u>f2+hstWWH8!;d+VhXiGLGavc_T)%HbB@aqVhHXOSh5|+!+EF1sN zgBDjb1IX5+i!99BTdThIYQ&hYCVaBn<HoM@_&giS&Tk99XZ}!Vb;a4X+${Znc(*kG9r+vU){E!|w!H0=Zk|EbJwU z+spaIh6H|;fdsw#!o( zJMA|EC`i$2k#YW|fWeaT-Q8ldj2aYH!4f$+4+KJQepvOu!s=D*=O!PhCW!BmE8P$GPCuj9LU>sR9BFmL0nTydPC;fC4S-QPU?22oV#O;t7*C2 zY+#luI^m)5I3nrOxakN3OS(8v6?q8GcC{%C%8l|pX2G8E77tp1u@F50bEJcKjq-H* z=B@5^P*xfR$2mS_Yu{__lfgA75`j6<0b9uVq9e){2sFgotx`Sy1ttRv;o&wT?xvRn ztNfX*bTdnrGo6XC@*M`+QPEy3nVF}4_bnfln+=>uNJ+0pj?TBU__?p2TH%L)!R6jp z1Sv7*qSH^c#m9Y0kYm9T^f}{O?~7?wp(J?{BF6jAy6tU=t9Y*XZ=US)a#rB;=dW7E zZI{B=y=11>9`bVFmojYozrUXlpQ&bd>{5A7y+qZ%F2wPX6Gq2wfJi*Ixh}fYw&T9s zaPU|&qL99CR9QiDDvEZ_L2tixIPJa^Uv!ba!W%y=Zo1*t=-nmSqNa(VjhbVHQ0_K7 zXuagf7QYkXwZ@uJ-t4v1FyZqUvwcZF`7xf`Tl!)u1y%Fi2}#Y z8vnxtNcmv`5d6>6a}#qbV_idiL$m*A#)1?6BdCW9x%P&kE|y&zLsP~U6tn{)X#tK9 zJg+k|S0v3|337AIG_r(*ix{~9 zdL%S%&WjeK^moi8mtW#kEXaa?PgRMxh=Kv`Co*$73=^0gJ6Y~u_GG<>T!UZh!uxO) z`lnn(5iu@7#IqZsUj5rh8;F2RT18xjU#>0I^;lqi$S^kf->zq*CS4p(gnOwRmit*$ zd<(Hy11IL3Qq8yre6OY&d$_s-XxE6 z8reXo1GnvBa>nVzD#u3VlvM80uam;F<;s{%PaSbJ-Z%KEGzrKAo2&`w$M4!tmQvSh z==&0oS06Df<8Z&HsP|7G$mzWMP4TL^yzee#9>SgYaN#Wow{s^19JZ6o^3biEuQHJ1Ivv8~JzV9}?1d6yx#Li^3D4 zh@bis>bO5K90}j=hB4&GMxujKd`itHUXn668d8vP$c-R@^!ngWvlm3O!Az`=yIrOF z!%&d+k0=VDF-El!Hv0z*Stq&I?T7e4Mkf(?!dYPS_D(TEal1K@)lNxjo*5gj$aZVP z=>zuxU?+zDY!RhX-`ko6YySn~Yb8#Z)qlYB$sx|v4y{N;{!y#b3$+l6p(B_=7>g4! z#$15)Ng1Y?Ze%3#OLXSkh|&IfuZR3WUnHNoCEw`EWD8!LRTnZ3 z@eL|qYgYgqHfYE1)pgtq0 zjn~JNM%BjU7sP$$HV5||;;Kn-K;&~i;pE$aRWNa@e$K>Fc^ph>2j)LG0k>b;3E+Af zvzB8~I{A_b?3Cs`BUdbqx5mZuw}Nak(1@s8-eIiV7f5D$;f_~Vs?Yv%i#@G~bvUbN zeLSV37onTQV%=-SpvQ=wOnBMR;UYTq%fnBjI|j^-G25K`y`}Ki@W<)UJ7IC4$FM&w zt{+AK#RzV3#+=0lQGZT83t1f@z`TeNS4ejM#zAT z_U+0aqK*BV7#B|}_fa$tpmm#9fZ%CRtt+f>aP#CS)Rqz!rw*=2Xs|B5NJXmH4fadl zIJI6tK=~)b0AWxi_tJ!Adh|+r)xPi*%I^kMY`96Z7X6!WrbP27U^-3^fkTdSQC^A8 zpG}cOjh-W#iDJ6F>(OWGv{bNEh%#&jHJg74LrOD4ZTkmNqseIc6NfH&mvJmlFnBs- z!vV2h%!JMcgLa;tK$~n_Y5$yf#nYb0K~CFR;@dcG11p*sb-N}BZY zFLr9h#JW|sppFvcm}auo89gb+AH387XV_WD*eWK2dS8Bio_sW=v$aqWBqEqK7AGC$ z0ssteet1dQgr6upIA+ovC0ep#co2wi1g3xAmq6KSZZ0TLD1L%l4EVoJN`gA~FW0}j zQ?#mZJ8xb5um(_Hb`%WqIlVma@_H+UPdWtrLJ0VxE9`|R7{d{GHVz!b5YP#f2(KLAcv81w6v71E{f=IA32{vPL1XHq#8Z5tu`r95f9 zeD!|-dptY@R-`*)JQF*zw|1~aX0rFe-u}Ffi4)L)P{85`k=>F)7VZnCR84#k`LLdA z-t**(-wzK!-mz0dGg}jvR zRWdZVtqR`e_))l~$dW=L`?1S|q?TEhPVN>9i5CypJkHfeCu}ukSAYi1Aw6L1z%m9m zhY3k)<6of>Pdf=he``67>D>}OVh&I?=uHHy-S)KOY5qZ5vCth96L%;33XNrO2up#T z7F~ajvHh;ZhJLTSltRVEf@8a)iKM~SdmQZvy}NY+fL}LyhoC|PVB%=u9h^R+eB$A^ z)-S(VwYyGagH8{;y-yduf<(lJ+(kO~o(Z!k4>LkBD7PD6r&1<*fdS6-| znjVWN$yNVWV-u|jD|rU*s0VesAk|L(=<0^xR$1p!Llg{UFSy?hzCuI|PY zrj)E}q_&a^H;{?;|&6d50)H6(G4DqcbyzmASc3E9CZw^&bGZSwq zT2VvQmAKMp`cFc7pIrh#)vrbl-3+}9B z2lmx^ z7(6K}hc5t3%jkfQKn5il|Do_enTw9yUQ{!fKYLq|%L*ZyuaMiP%)iRxY#An*6AKTS z_f1!1VEHRRt&)?fYp|7;1jjTGG%@|;R@F(p-lhd!^^;~qZgyx4309^I)rd8NvNDp! z`Q2$trH?2R8*1qAPs1_QEA#{I=?sdMHvwQYbchuz;1?OQV8^?CZbq<=1miJ}jVaP! zh63Mn_jq6?CRJjjVhz*VMVW$h?CC5iuck>3GA2I@eIpzROeX9vlV67{5yqXfcU&2` z>%Z=v0w)Bd*s-$_OogWl+|R3$_k!-wsTkEw>ah zl~p!2j|-I}nXx4xfR{Ro-TUN0zkfP3Pv2b?sidFUTqPYBMP%!E;Pkx>BgaDy9xMWwXD?F4g1~+G8m4&w&NHJS>;Z zV6VOgs_XT?Joh<-NG)&v{Bi;-f+Ni#@bQM&1z zOgePB#3lvBKHcuD*ugPTO`}Op2PRVOPkBYl0h^*bLl2oc@GOH@M)a-<=i&#~yR(`T z7(dId)M(lk-jP8Pv*&CPJxx~p{tz4_6L>z-T9~oGS&m<0bK0WSMokXSt8jiC2*6^Y zQFn;bs@OWRiicEkT|(*ol6W^W*l+Ezx^Xc|+=a%1l_03Te&8YOCZWbcaEnG*UVG2u_R)r2(prea!%y#$ zT998thhiijuE=IZoVoa_vAh{H8$%#vz;udL+*fOXYXr;8y}Bm{e0c6j9=9+QYA=yF zT8~{-+IJ&s!@Sl2dtFU$TAo!HH$JrS;B(iqd35eB?LVhfi}+=i-aZRQR^q%ERJO@K zNG}4UI;u47 zKdsdwkEP?fXu|d*wcSG@oDGHOQM=HlU$Jbf*^?yU2V_G6m_{6V*qJ~65-gU+U<-ndqg!LHSh&ZfmWrqy>V630PSFG!Ur`9OCIO&vjuFS%qcgunCW{piTVY`UB6r55SSmP7kO`jKV zX6f8sW^?+L{fKgZB|(LmDUJXIb1oCkUwai2Iu^T-0T^NMeT?4MDW88q%_M`VCE7=dEQmc3*sjodeugbd_qTNm@2x3zRm4Ey1j(v+x6& zWE{|39C9PN&@)P$i7>U#%=~3YGgimL_~r5Sbhj*# zkHpA5ZECHF!M2Vr9^8@%6DKLz^UX6X&v3C4*FqF$S`FVQ(`_ilk@-7(%n^6MFuc26qnCr`Pdoo$gll zah;Bi7H*NIbfxhDVnJbRM_oV6jirG!dvpqQ=5R&`f?e5slff9Wuegw0-#Y&2qGl1U z^swz~rB_+>mu{&Z{iZ5(O+r-lY~zej4tcy0$zr~7`m8yFJO|em2A1+leCdR;-jk+`nGnl@{W$pfTmq>CYh0@x%aXzRSg?RM1P&!>qD1C2)D*spvy$uqIv6Y!T2Jh@5BRVn0uI~A10&P1^2KISQDh{eU--~#5D#1EXB zof|hxL>%scQa8-Bf%v*JnGTTSp&LqA%ghB#OEQ`?t8`@~QYE0v(kSC#Y1Es&eRyPi zHvG!+X;$FRzGXBg+9kVzOH`@oZ^81#IhMtuP@578VDvI;NFmgMr9edC`sj(MCtO^y zVL%w)U`xQ@!?EsZ++PEZTBw#p&LblHf^=+`*=kf~*3M`?CQ9hWj2RO;rfxk0w)Oqf zH0EG1|H51OLg8V+$nqD4A`ASh5ORL`xvXj{Jt3jTK770l^=ec$t=R0;k|ZIIlw>*F zM^Z6r+nSGoA_Ap4^&1kKae${g7lxNe_d@2qE?|E1rj29`KN5n3a zmmltaHSr-sAhA2a=`k>>UK>MzU zSkLwxbd_kC&O)+ra3?3t{1pg&&^LA~`T{E2QJ>EwExs9sxmnomB_~dB7|p7t$<3%n zKE|;4bHyEqI$Xr_)kz`I!VqS^Q~CJ`_p`a+BwqvR$=6Jk&x})VoqYv%yw0V&Bqc5A zOB6Q(3#^*@x2JOarA9$UYD6rn8s2;5MREIpob(_7YM}Xy1RtYxKpV(K+JK&X?K*G$ zm8F^tX1ocxcdR|Mft)Qdzh7p}B!HT{Ck=c%2 zvMgiR3Qf(qKyDsS2J@ZY(rXk$r(fJf)N;%&5M*OKD< zuIGkF=dWHSu$Sz%a=U8$OqSe7z?xeRP*bkrdM^owPr$e58s>$XjsN?C?vdRzV6%ck zAm1+A435jX`egfDCwm|IrWzgL@d$!ImsW3owv?#y{+3;12V)U`lXeQ|7` zp~EO-9Yxt>f$R^wV0K_`5YAfN?oGVPZXaZ*U3Dr$rU+@lFv9`8z;bSObD37bo2lPl z?>H9ryIAhF%-FgP1wr06<{_IVdPp=l@eZ8g5-DKe@maaHG=^y3pnSgzxM`5uD{kJ0 z$De^tB}TE)hq%q%J}usw^s+Iv_PSs1WGbmdcC!$Jt{H89GB=o-$60 zH}9a9=dGfzcC3j$iNSRz+28X8)Zxv9UmS^IDVmEQQb4o-x8ep5EUc~B*FEh>JSJE+ znH3W?#AU#I^WVbmND7#0fHS1>ovDAs_BJxCl*1Vw z7fT4o^2pwuT&dMdpL?pW1{oPFL9Kr*w=xiIfCymb(L4LblI!J--afvpX*h;8!@2Tu3wP70BM`q<;Rx$0*7o0OoahChaN@(ExbI)nTZXtpN?(XcjPh~jho z@J1R*W`=m;n`h*V*;a2}iZqvZEiD=0#N1iN8lQdaupmJ${)X1yb|r0 zFn6FTACyiXqlw2!%AU@8&W-$-XvXZOzgp<`DGQTfUa0Xu->@4~Xja}c6 zG@>oV*QWt!jV7zu?b7P+RFR%(HBSt>67!4rZW)hx@|v}rG!Wv#%hhBG^2IZP-Tjao z!Q9@I1UI>%&&I{>75K~r64_>I#d9LA{r-}og+e;PM952>BnT)cA_9-o^WxT~cEXxA zstdtN(QYqf?!J&7n#v&jxuNmIzaW1<4}Bn9az^%+z(OWsoPhh`KSasn9q4a#-niNU zU>2@!d^D8qad`(1C9;t_J>9`E65tG!R$jSJ)v(^UCsm@^gKm^a35sKI(W0KY79(o> zjM1F%A5p`v{&Gm~#3prKJoI;+1k`cTIMb;y^+$zZzJ~>+@J1jv9#-x%K^4U|!I*r` z$T5aqlic~8NWev!@X{Cda+D0rB5`tM9&?JwD&3QreB%cH1=k8~5upM6{Tp!vx@I@2 zPZ*7HO6hWpvoYAXi^ua}FxT3-Zo$>P9%^d^U)I0-_?jq9IYFQP`8o)TWt|7SU8}5Z zwYrIo&(7zZKiI(L>2h6!r?_T%z2FPjT-k?pc?|Ez?XRURjy{lf%zG_gHZguaoIqPg zaB(f+%h3ZR$e5X3wMy=w(bvs2d)H}nMY|BAKt)cmH!Ux(5*AzxSxA%x?xhJV2P~vP zsIJ_-t?kkE#aYWs0CAWsth}n(23(Z z0u!M?5F}A*5JvVCbJhl%)xs;^u8hW{1zW+c*pp2bW*p-<9I&fl=K3e@Yu#VQ0LFR%P;Wm?)%Kl{x7= zByt`h)eF$t20hDz-hU*05v{m1Y&=tt|G4pa~;U+G$yRWL>#L(54WIn%ZD7hKUu*dcaZh?(s++?Y;bRy4e{tHBgAvn$ULmjgXNR$b+dJrlUpO+Cvh# zOg|o7`zbCU{2d|o@xIoeLFz)lJ8s;)OufqueO1#|i6<8^UTAb^}in zgO3N^n64Z3HTL=Vg#y8Dk@UF-Ux|k{b1FTi!V7EaTBoHU;xJ5G6?}Y{jA^s}wCG0R zY|F^0ezgwqD1rhu_? zs~vuE;^J?Fc+gKmh~LvA8k;+P!&tjlBZp>1{Qor5ZsdAvrsBIj$c`lvy2~R(0wj=K z?f#KY$HowD0ujAbB!EurvcC@yB$=?Sk1v@eU!EOs!qMY|4M(Tp_*R8!Mh{VEuQU$q zBR2LiP!s+PA2@+l3s*8Me{zC;!S3fqb^E44s)~dGqQ!O;WyUtw3*2{oPogTqCg{p5 z8VykkH*Y;aifWk^MFnonjrS9lY;nW~k4*CQ=YLasF}<;N8ld5X70ynflW`}m+G@}E zNzfhqZU#_>>SQ^u1!}{bqF`}|yKj*`X$LtbQYoy!Lvhno%gt)I@oI^^vhHA+V>7T^ zam0Y_!R6d%F+N`oA_?X}O0G<_8_Z33oIU)YZBUUOA0V5&7Usmx5&j?n5tZA>On=?TU|? z;y@UWu*8X4JncpeXdI3LIN$8U6nuGr0m34#wkz(44FG)Jh~u$QmN6s_95Z9(ZZ0ZE zupTC=$h)L`4NQMFKEhtV+HOUlFN0!)G0Lo@#~>^&ibIdR7%QE7*uF1`vR{uI0eTr% zDukZC2o_nn!4diS^TDUP!S-wSy*JFpHkR;?@@UB%c0y0Us$kR)=8aY6XBxA$Xa{J&Ax*ms-M zoe8Aq+zX(`yGf`lA~>0i*Tg1Fi)82pvunkcn0;Q}vePhRui)L)DBM@RdtPwa3vGeV z7;ojlvewT{S3I;j^Ae#2a=VGa`ih^ytpbAGu&GF9S{Ii|^dL)q_d&_%^JnG(uC;QJ z4sTc!)c8W=4ONA2uob4$t~eTNo>PqkQ6EfBc8mF>V#(e8#U?k5>SmSlSu`L0ADBQl zQ8z2Gl8eeRRWZewaje%5Z?E|mX9X&WpG2Myu&~GcWQ-^zY)#(5mJ%R-iJ$f}0{%X% z#J1}Pop^PrUwo0rC;|XVaEqWQT%lnX3?X6Z_Mwyp)(K%8j}?>k;HU9IRx@6y33uR# zE7iyVfBPlc-(MHYp*P7WOQ6<~Ut^AAVOFNhty>xpwtxJ4zSh!_CrFrz+iJSexT?zQ zTB%ueG>;WJLbDg+KFVDCTcge`pGt=PV&U#a(+D~F3b|ZswCayzY%bpr@jIv7IWEmK zMv9HVL=NJ@DX{H$&8_+H84&-qxJ3{~10M!kg){6nq$bL) zmAYd}Alm=rOJl3KT@N2`|3*2{h#6Q-BOGAHB4yz;Xpqj%vJG#7GA=raPjJ#a6aQpP zIk%SEPD2&F#pCuG!<#56bRQ{wQi^}))(P%Vhch`CI2D-&!(f>u%!!)rX6)i6z>`vUd?6^4sd!$!sxb5{DGrwNouDaMfZnakjDo% zQz`pYCBf#KgcGp00N~k1Z+{aayCe{c4?|>&CN;<%P&32>K}CK292k{S7ZnRKUcO z01|THemd#6S52^|`h!cNA}A%XN5Mlx(yZh@XcXAqDGTlt;IPrLgRaYy2lC6XIq=76 z7i%p(=Z1~Bqx~&FBa^xpzUSOU**T?oE6TvtgX0ZsHLVpK|HMV(FiCeUV{8Vu;&;pSlURIk z;gk>^FOwW5&ZP7S_MZ&nUPFy5gGMWZmTa`UPbr_@FJ6X-`BtzH>aZ$wAoPst6p9(L zHA+=IXVJT$*X+Ts<><1L{lgS78ob00b$arEy7C2e&5y;CV3=6%fA&HV&d+MiS`LuO z>wP(U2@k$Sxy>N7M89EKKdgJXf!~4p>gfk4{B=#@Cjk+YIiJm``g*u{@x9;g&c^-b zZU4g7AF$Lb*=i|{UO&(!wth%(h$mX%T_xH%I$Ct>N;kC%}a*_3K@F zZ4;AN48rKM4mv8X0#cVt0KVZF8!Zj4X~%n+wkBb2i!mv098?OGG%&)E2VM63tkaDn z#7ANE0pM{hEK>X2lGd+4w=BC4HU&Yd4*+1`>IZGY8=G0dU|RL6R-MKz(!S|Ev`cX2 z;SV_v>((~X*}Te-$~?1halBfGOrgZuO<1-x;keb@w3O0G+aK*EJbI(@s38~@eItl9 zEa=ttvF~wZHmDryECfYJDc^HRjZuC}hQ>r{8IK9*Q{i>8dpbWo(QtIT-FxrJc%n)V z(-@O9yer%#*FjOdv=^ilpGfdSn;U!&*`fsq9JwtAL`grLvcj~w?19?n)oy2ql@RIn z4PgRhz0@)6XVFVgOwt5!rOgYz}} zx@zMrIa5H^V+dW}hSq5ox2^~SWI4DZ^{vE+c%sD2t71bchSrKWDoZLLQ;X14$3f8EekwM(tY;#o4YB*DHNzN zyF5VU5h0awEeTJ#;);aLdspMkFWnK=fJWMfkp;$7q$U2oOK{g_nPkKpuw{=-iG|gC z@%X!8Q*QkS9{C{eH<+xcJs2W;VW3ukfCiT~?7$Aj!P1sLil+*ZMXGbgUe7-z_5{Bd)Neq9yaH}x7Yl=Yr+_St&Ygl#M3loJN@gmBK zFfBNr*R+IqI?ic#(h^Yl=Zm3&o_@Cvl14HW%YdcH0Y-5>av-JxG2S-_&6@f#LSa_u zNh8dE0r<Z3ybh|2dTQ7pxqR^N+Ehb{VmPw6wpy#2)H;kptQzfTS3NmI6NH!N{ z#8R%o8DUP`a6&_~jz;#PPC|F@s!U7JpT#Usrn61rL)#&UU)f=vbV=alWX;t6NdqmI zyuwB33Zze7Kd7n73da-Hy@iuc+u9{f!2eL`5OUx`5I@J1JUtmNhH|7Fm}4ltXxrY; z8T)m@`T9ln)tNKsilJQRgaScyT7O4xmEKQo!hSJjbF&eBR4FtyVanW}SQD{xxf?F4y;56DAuKHYs$6d4(Sp_T5;Q zPKYE{e;XHq+?Ob!n((&O)S|DEd^Y4S%HAoI?$aZpkbO_Pq2F`l_a-%x*EQkI7LXlJ zYt~nPCo*OdCYNEo>mQ(Fqc2yD&_tq(cSKKv>i4~>VX<$-|HfDsys|YFB%f-u!GVBS zk%52+|7VQV+StMJCzkyGug*JFtu6OOk-n;QYnAzgsbZ>7H6jEKVGeh4JGp+g^y~r^ zh4G!y6)bCwj%1U**Ao>wDs`Ds*Jkkc1)HmBrrGYR_Lw2v0*<==HSm7yYRZ)5Jz0Ud->PB->#Gsjgs>SiDTQiTXx|EUDJ#$NcOKMK$lDP0 z0lT`9D`M7^T_=Qy>S=1PLYf&(3-)kmW&1=BsEKx>PjGvxMUAA8-*Pq0($h?rRZYTNQ6E*}+mHZ9vc##v8&k(IHhv$CXzZ=>C zwyCJ&l?I13t3089l&IzAN{o-Q$kIDug*Rv!YsgIH#ESTd&+n2)Y6H#9YQ4MV4A~OY z(@xB5R}r+vj}Hy(>(^uSgQ54@RD(_n8X5(4X50HOAOEPM9-=EI1`Nhs$KjO~hY|5# z2u{*!7@xM>aR6l7z=(HR$5{XVZo7N6$?v;G^6!4+&KcWc?BUCrle(S@Mtmw>U@ZiJ zsl;oKxNlkg%BJMti?CA-L}=pY4<)7l?UsYHp&8xqZ+BnWesI@fp9IJpS@m#n{x1Gu zwmYcB6K0q^9$3ur(tgI3la)<>_p4=~d$HLyklgk2SNhZl9BHsCeMqx&>PMk6UhU8d ztQv+6H!_*NYRqq$G^XQW)1zf_$=F|)fJxZL6H{WosAdixE|ein>%J}K(D1RC%LY%( zruHqdUnyhclhsPj@vXBpX4hESXyZOd>aX4(2*hxRDXgtra|6QQ``U$WiyZ>gmp~dF z2_pX6@D~rr#llVDfAYP7N`k#-yzF=NR&u_xi1mgJ1zm#ZL@=b;(vVU_g$cI4Z=a?u z4dq&l^yqQIRg=Ph5hQHqFr!r?Upv9GJE9*LJ@Z~FWx|!W!xozsu6y*XozDNG4zU2I zC3TXBzIrWI3#&F~8nhgSw=Wlp4$_s19Eia_C->tu#nzurduk^fh9zgVBP#R&%U0(? z9?OMP-~2memw4;YZ<(6OI(^}Il*zT*;>=@@l0CckvasWGQ{6Z#!ctvJO3ZO#57Pf` z%$r>G{1!AsKgjAFQTyuG4W(Ip@~39Dn)<_>8S?PAFGiZ2maZIR-HTXHq{Q5ki{wHBOy?;oJ*F<_zfsO$PRf zDJ)1JpeS@8AhQ4cG@F|`=sTI)+UQ#A+x$o+9e%P+S!&z18RAGj$JHzOh=U1{8k?F< zRR=|_RBZ*9g-fN1sJkpQPda*e?NWHU3*Vn6E*NSIQ!K+m0sgO#lM`&9`j`;QN;9;7 zN%GTeN(_Vxxl|Mt?3aO#5 zhC270<>w#ptB|Rf_CX=l2fJ9L&156@kP^`-u!fH_Nsv;LbAf}f!M7lX4DwT@H;65v z(rl!#A&GX-ZtAKE$ewh2*)D?tYc>j7pB2~5(MKYBfRfU!-hA0GuaKOlkT(?|-0mi} zDQY6M2>aFbH+kaRt1C2B4_RK2qt&ufSadMGL>i6p+vHPfc4H<<6ks{hQ5vX#hFr-< z2C>eM3mQT@ekHbwWNBc%RAu438=&)=7?}@>y*LhHEi*^p$t>i;!q>$G!uZ}`)SF_Q z2j&sAEmF4Ad5p=C@@BHS^{K)3Df;{G&O}AB{)AIen0<0R5Or$)WTn@jZnf4DJtc#S zJcuX(6iJ7E!k`=qro9L&8`P`Tt@zCTexxP~7JkHkN{PZZ{xze;VmI-Wq$;zRkhs}kdYF(0dGpU#2;7j`fo>x(gHY^%)K#O~z1_5gC-5%L=N>@5^)QdNlSlNE^)_7zP$a zYK_1?ZZC=F9M&eKTs_MWfZhE}o!5zITZ znd7b|Qf2WH5As?3U6i)|hI8~HCy;2_Ws$@ii13oHk^%Du2`toSsJ2y_bJGdHEdhVU zI_h6k5c6>-JCuh2xtgeK!pu)+H2lGn`#*G@Q+ptQvSnl2wr$&X$F^)Y z?cB`Vhcgdzenow?Yu8$uV)901fPdF>VC~@7?HpyE5&F~R7sZJBTBsuhhh&uWQ_f!b z8Ek;v3UQGEzu(>Y?{_&e!QHEQWQ$@ZXX+*CweMczhc5$4VoViZ-%Q{3JS9K3sk28o zm(Ji?C(K0hiSP^MS}q?`DMq!ds<-f%JR-6L*3nvk3AAY^VT zqy4SUzUtSxy0yYSOtvvD1}xcZ-CnK++iQ*48ZZJDT#v>araxK>+gG7QWsL^&5gB%B zS#zco{|OGtxRz-;GWnet@q~JSsNz+eF-3sWG3s6#PU8-F9^o~# zqR8#U3a96i%x;@}NS(yQ5$AA7zekyN4&S+l?=yBF?A--s8%^Yo1vO_pq9ra|>a($F z*zB>2%AW5M-I1aXwPNqU%DLZ3qJSHwb`_v^WEAI;3y3ett&@kXmKu#3WrF?F@oxV- zE!*bz%-`NYdXp+eCw~Z@OB1K$F_6Fdf8i~nibOiPztx(g7sU`_8&@Z~k&!{L3(i?)qG2idqW9 zUvWvJ<$nD9I#Xm*C|d;UgYxH35^w4|FwOFYL=|c94!$>n2XG(d-kM-~h#2IEJo2Eo zFJWb18l}F)(arv>6SkXp~n!3vcp4#f}SkgL#Pce$s~X^EW2tr1$+Q|$ij)a?jtV#I;F zay`?tWGnHiJ!g-gE513eD=XGF=zoT*15%n++CinS2owMSDCK|Kd+ZII&41^rz5V|k zt38@Cwufv;z1X+h_D4Zo=&f8y+Gs%5`b<|7ToTqsmn6{9;IJ*juFOSf*d#}Jxb6wc zcm(MOeA_$|stL&=N=j`>Ptm)=(uQC4uPbVzlZq%h#g>w)XPz^Y*h-YD3?tq_wW%_RsB=|7VvxR5j`7(|*^rr(qf^c1EOQl|Ma|VN zraWcC(FZ`6SJ(NzEyXhZ+=S)Y4@}~MM#_WrkD-ZyW|Wp}&-7PI(xjC#e*($6xIt=v zWDHsC_2!l2ri~6(DdJL2z!Z`Aha;@UL)owcuI;2iOYf`+MrJB{6Rj>Q9 zxa7as>mr*c&uH`}>;f5kqnK-Uob2Pe_q16Fy%_KwDJxtI#p2e0Mi(2ZS% zS7;870=N4!k4Gmm{NLh64{z108mVD(eZ*0L#(+S@5p6sfFCQs-Sd$*lz>@&x0e%Yb$0%JdBpBy$B?z zAd##zW`78WIriuaE2%C?C3b+p6p!BK%u;J)Qp)&Cszs;3qkn?}i-(N(D=FfBA;W3; z>44%-DIv&-#qBF7M#iVzGy>QVrlidq-^l2UuTVwyg)JnuES``1|Z74%bke^(XiZt^XurR|#)* zq6Rzx$+rabqwUyG2TR`uKmy5*b_JUj*jrw+^Pk9V`Cf@q;0(2B33AWb#6=qam%T~A z&++FcRYlUIg20pk$KG#IK267cqxj9$_4$4*2_zEYH90an`7F|s@<1b32vmY}WqB{W ztvWeg4fa<+vHZGg-2ArTf*;=-@f@9g!YCmE>&KqS>NjXqYTA@gJhI>K)J(S77M|&r zzaO2p974+B0W}Dtmd3SCI-Qhu4lp zG_+FAYhQxOodRdR!&2^gX{9s2Aot+F2p2D&+tHsN3m!J=gQ0LroJE$*whmn9PZ^Dl z{lxXt=@;bv7l@U#JUc^1Xyxce}EsgReBniMO=zaY^Mc@Q3_uLUrR8v~<~9TUME9Fgphw|Ez@k zjnjv}{^UVjHjooMGdl-DLMN_6_3zwYMc|dtRP`tl_Lx}%ty`x$ikfhbo@gXBx%2T$ zc*{Cx*+E8pTrSERRAOaZi~gH>!XsAj2yKDeT~6V^S~}b7DU%Clg=DK;Dm~q%u8Jk{v5_Y|aI1NBqfVPkI^b)-<9N?ZT&eJZ z{P2kNtm;Krv$%3%#4F&Eo?8T&PsbYcDHtn_0zXhsI{+sao?M&-3ME_Z zT*2FNTT__1XZhCr-W+Z2{%|!@_@tW!%mO;m-ZA8f#1*=N#*Vukb|Zq#|K|UZyP@lgRIvwbl6Cxg%WXFtiLmnZ2{i3s$doH`a_=4RW z)4jUBBj-7-eED|+UmsDDAo%TnBKKnlAF{PGk{z;q+i<${&jQ9DsuBhDn!HN%A%TzW zYSKKqYhb)G{Lzf_FwnCOoJSybLQOJFW4hNgQ~O}vYNYPYSn6OoriR{Zt5JFN??2zC z+U+@#_MC{(^1e}T2~{Nx?db1o3Xg5t7{;@iEGM8tHOQ9a+SpX(dYSfSQE@(9LS zu^?e=4TPYhe1fM%0uKcfKejNJxr!B#$U(E9qJK&#J^ls~&iXcbXPn@cvgAv8rYpt4 znL84{bQEmJnE{9S$?3C1-E*ck%?$?SpvIQ+ffKFIPI38WxmnS1+j8k#V8K=_XpV?N zw2t^8-aerySThJ*$N?o$?8+U>xcImc49ejR(0_bnj?2+hrn z8w$lzN)Fx~pEvAH@Yk1)l#6xE9ZR@MRvmF08~=FR%602rd=hm;A+=66?7Vdwv-QSA zGyXN0q&5v%5Af&SHtW}|Z^)B7OlT;YdN|L3_vqe?%tqyy8a4hC>sUs#hVbh$D(}P zRVL|0TPIFe5 zvF~*SZNOVoV6e1LsCM^_eUEwuL7nh^R<<#zYyWYgc|`)@mOEgA`2dwq1=P|;bZz>n zmrQj`$Np&=)BO_uEMOp*g$09M#yDMF^9wXYjKiUN0pp}^JOqwEokk?v_~e~ds0bf; z1kvpN+VwXVUr@RU&#sy~&qMjRhOTh{e1&Y@!NO4mcYtj2e!tW?$SmVQFNE{yZE^RS znVQEb)o<7}^zj{34kn~e2vIuKC_eQgeTiN4Kt;{&7lF#k6?g^%Y;uil9)`|Mb^f?Z z6!cO_^~kGd*XM1j(9M_g<9NKujHhEkRRD$lC>sg$6!OAQAQBq|1@7A4m+^Ci6Qe-o zWi7fRE*|A{<<}@hW-8WesLaR&lX~<%Ct1;WtPlXP_8mI886wiWQ>bO(iA@~b?##7~ zH*4EtN|aiW;hnL?eh(VWu#?)k>B++L{(1KQ0@FmH(P;;o!2kBPU$oL3$5r1ddC20C zZjxop&X)VPV??peiKkb#=jE#gBTwGz5PE`++NYSirOe_3stp!7(x|0OF8$v9+C1io z5M0P5-x!Du=mLj3e1X7SPazpB1tx1(TvtC5=f4m%1g9_p-)V{zKsj<5MzQg0DmlSa z%!%KcTy^s2!T&3);54QyN?_0zK?rA`25p5*GT<{EE4smqvGAM2>Ut^b6FlhgaD1Y`CVdeG`0VDPN#y}el&iHqEE zlH_awE-Yz-@1^zOmMbl1ROb^{z`C_DiZg^^^xLqAldO0&z)e6?dpfb_;!2NMzucXf z2#<-|ICLHr>x-T<{Y}Ror$g}TPYx!mAHU2ecVx%x;7j`&@x}YVzGq3=^_v#xnO*rX zf1LS{E@Wrr<*-6Cc7x z)VE)O|JB6Zo{s+=xbEn@i%0<6g#I&qOR~#AsLpE6#f2v;o-~v&V1VCEmz}&jtR{ou>~ee$?MZ74NsQ~N(F^>FtKoo z4vu!WPfWz0tg@$wn6KWv8sTn;*i2Lr#T&EP7fnAUwjORbxmk0fY>D zTV*|wz=TAkI9w={ZofGeZCFpplDDk)99x93EtWvD@cfqqqj+G$EKk32u7{V5>Cf#; zY=a}EX|jDX#}WdM2*3x_w5@jeI+H$`Ynp>;hL)*qVHpB0^pVLqr}3hY8Ha0S0Dn-u zXjvLpKLT9T0@lr1Rk?$BGk4l<2If#+aSx)2bOPjQfPzyY;>NV z{t;stdDJvF;9t;I+d7{$tWP)+F8n%ejQP`W(0(n&TP3~FTJB%bIZmpx)-y`*!1dmY zSr595dn7WPb8P}{9qO{iep4s?4k>6$=St$rlCMC`N%?GCSym;sQ)}WV^+~%b71ZqF zWuwI>A!R_hS%0BVf<8KJw^$@6vJ&tIF`0INKi`uM3y>W~2`ftJ6){W)rGatR_r`HR zQAe!yho^xYKpU(45YrpVwdrNjAX7QcN$DXY2wX^^VY1`GAfZNtB0x_5xzz{`9) zEg15%VoR7i3Smz@3HvOyR8pxX8}^ zjrDSEyFizjY9qkA#K%9ks+dPb)`Ae0j7?NAeY&mAgtJhQZ5M0L)hITpWdJrcT~mDe zm#dsm37VjM7*rn20Ue;MOBdb#==$AEEZb%Qjht$x^N1eNsvl~{_d=wPvz|L?2I_Uc zda|4m9|9F-o}fivt*5D@Zv8vIdQmZcVzBhVbKZ6%1tC-|58pL#O@Q?Y@D!|mDj}tD z*V*fMGgfo6lnS_@Cqr4T@=rn!Lorjc=m_Tom&BIye2w& zZSINGw%Eo4N@gOtz9K^9C{fNaov40Ne)p)OrhUdU7F{NHnTl+Jxy?F}hp8p0c!4&5 zn2YU69iUhBn=-avN^~ueAnv)v!py2kN_#r;qLOKO3?QSRwzSqwm4?xboF3aLSA;?> zeZL}6z#qY@exy^<#QczyZT-qCxF&R}SG!%DoF`9Y2|M)~iPlo6)pRH)3w!BR++^s? zq~sqm2_nLgoPaBe#4rd7On^(vdG{tqd>iLA^{e2W)0!#p#DK=>;{a@#2$& z`Is{=Y76a#XLD#O`LVVIyV?1TO<<>bN-d;2^R;|@pdZ%L7YOC+iRdS@@gBa=Z4vUF zw1QNDqKwv_ol^e1q1j%X0n|GY92a7OTtBw=^P4yXhJcuVjShV!9x0KL@FIRj%+Eo{ zU~;$1P$s>h!@$Ykisxz?AaK-M(K&?WuC3fyR0vrQkZ6THKlXU6v{m3gta>TQ5sRE- zLP)?2&^c!FKE6XylbmQN^9;IZc*yFh?1~YhhYUs8-ukAT^skOd;hd2(SpPnRpV70Z znY$IBHL4C$RsB&75ZTpBKn{?U7(YKkht@ejiP309YAa9@@TYZm4mXZ7MR0jC|1D3* zy@KcR$PpN*olZET$dK>OpA^gqLRu8vc@`Qw4_yn9I=><%siEn(_Zi!7S$Vm-?@sv&)G#Q}B3IWZUVUl~p_eQj-(6Ax03zh$Ge zoI-c5-mHH{U#H1u(oh@GA}*7MWiPrbBqx!5i-*3&#dhT46=IA={IXawS>ga@=Z$V= ziA~)2v?P4HLfZSA#7@E&_wKaaJ$;+Ts4pneiw3c$WVsh&*H9hWmj-+$dK75c(g0F@Zs69Ox-nooG;fV}LsJC05p6&(FQ8_GyTB{`P4Rx8HEo&w@ZRI*Lm z(T4Paa|ALZSD$)_a$9HKs&&sEAg!?yJH#+X+qM)7iA|>()Qb0sO^DDqPR2iqRx%q) z4o3qLLgN#uarzBsH}!uV8F98}k)|LKU)1-<`Y7lOKWmvv%R8ji^oGC|KyJ_x%;FRy zO}Dxl5}<1LP`CJr(bY(Rp5QaFrz0iY>M@BgkHX8rKFkZE^36fp=8O8dgvSLKCwLL7 zg;KF3(U13NtpDn7hP9H3ND%OhN%pCFqxtzHD0wBh1{Sr!lZagksDDa3LX+;|h>s~Z zDw}c7E=gOjgjP*d0j^ib!6ov37C#!Plvb1?-6}+JO@eY^EK``Yph+dA-g-+CbEtK$ z75#)DJ|iFZh|h7_puo_g7c@iGTt~#L9Ez5ISLS{yhFeq?%N^e4d_{xxtuu2JVQYgw z%%z{JK11OC)?P7C{pI)5A%+8mq^W?_`&Jr@Z1pz3KN69ayd{q$!;gg!k_8fXVJ@dO zCRO_BHrZsJyRC+jO}|A*o6~>C4j3MZA9a5UdFE)tcc?0o7iC4{J~v5?oRtqSzPiv4 z)h-@SNSmU1qR5dauSUiWU(79gMF(mRV*oQo6WELlIwnzl65#8kXw#VTF%q5%TGq5pG9B=sQD7G)7KP5;et zfRz6m2-L~m+Q8W4KOj);AG=L9v>#qS(7nQRUJkL&Yg_PxZbI3uX)Qyy*e=)q1%XD< zbQER}Up#$3d&Lz~C>0uW+AI$31=Gfu_ZSE&_lNokV=(6u5}NI}@UqbOyDXF}cr*Tkeo9fHbG4hnLOeXX~yQ6fv~ zs-QQ!OTvsXVsLZWT&b@(l~*JGI10G=R2;5V5nes+>yp;atKcFU{=&DaXCv=o$+Q{D@m1cq}2oxCF&Fm#eqZ_bsrPRf&+`%l@vMre4qP7FO zzo1y{-U2!~-j;M^bchz&J;YBrgn7uBND6ZBWB{|Bj+H))O@_{V_IC15c%3^f*7EwU z_0+HYy;06^{`38}01Azvx&bXO`o{mk7lnMEk=xnp#+CW%aEV6&GZaRxY z^jbT$8J$`|p%)+-)|b_Vvkfh6SU-X-t$JNo5@Ws-v!m}C{Wi^&Rr+ktv_NLubKrFG z9^pDAmbuv_COkuEd_4L&iu17=l;m$RJ=x)e$B~kySVI{@5uG2P1{uIklT#O^_qomX zh;VG0O=J(c9F;xgdt9V8YuJ~DlA44Cl3ak+ul$ROg51K&Xnz|3eAdVwYmzu4R%{sZ zrDijp1DybGL?@xP>-tP|&K_X1#*mjhrP2~b1{Z|lDaZ~ z9w7Ogk|_lEYM-4io@_WqkXumdB$(OvA(r?~{d!XUwY_83a4EWuD8O;e7WM4)k$6+^Ly5wA-~!m0G?FxA|v~|^q%qoj8~<&M!a^EQ@H)x zd}Y2uXMYeJf=crEBd~NuLxqtwQf+=S4vx-OxOPoullTz?qU#{O(*4=6M%sbT`% zu0F-sP9TKX(xIV^_-Q!39Jq;7R!n(Q<5yp2K5lZfB{J9Mz33UF!3B3V9!$A6iG#MX zvZHk+M2e+R+|KaNM1X}dSk1nS*jczTKl_1ZW}q(1okp7jWnbuS>b_>GgnEJHT%IG? zgICj@cb>cG(XpWy&^wUt9%50Je;k9>HAXU{Y>KT_9r`*qX{2e;M&ypncf8s>X7QlG z#*hiPfwM*#(rmsl73NrVR;PEjPkWn7m>!BX^XcQk-8dqqLoZEWWcJXAN7>-V-0GP2 z`?||?_NiFF$Fb9H+6D<-=}Dq|;@TmoKOA7d`8PrON&qVUa-&gJyHxd^ra{IU!$9D~ zU+Mt1VIs-zgAdbdf~VN5T-Ry@3#b(csRbz2e5s)lD*yiQSC07K=0#OO-Wqbi%`ew? zAW8PHg+mMg`UPM0QnW@cQ|byK*-YdZ>z^#Wy;p8C4N+AT?ndxJ(5@TehGq>$a|g%o zt`m~XQ@6QN(mmcTB3(UAh`(2un3d2>z^JGCnuZ8_JzzAt>Eh!zN53v1*+jh-AT)RO z<^k4BpD~TigI@$Riw_IJ4Ga3{V)pIYp&MM5raiwt&%Z=S*Ox*oM&f%}(FhVzPd7Rj z5n0PkC&0)^YQ?cKjst~?S?o#izz{D6l(4U8TBZPd&{JPo{WUXf19wU81y{yJ0~5B*fi6JT+nWx6dka`M&cz)9 zptDB|#*BMNMNz}rcir3}{0wHGO2Laj@wFh*JtzQi-S16-1OVMiAWz{ac$>EEQHFX)BZ>>V=E! zgWE3j|ion^qC+QH6BCmk%|c5#7;p&yrnK3-h7d{gCbePHR(s ztLZU_DcU!I)JQPb{Wm|$wR|K>ZouOv{HKjw89xSjgA%PFdTAKUdlXM@3P zo&F@rp_i!I;le^WzycIVCFh9Rx|rAfry9=*)Oh`uS*uDKV>`NDyRXY`mV!$RRjw7* zlhR%#$^u_nQpjQrdPE)`Nvrx)JkS8C!aabkyj^Y37~zL)Ly0E5Ban|Y{rC%&f?sw>us#` zGR~j`5=>@3n4;;NYu68;Q4pE62SyM|v34(?Zl;$iXNV@k_-~ifU(6sJ4D<~UXt#vt zK)OY+n%uT=mT!I}Sn@_6c#|q4QtNVCSc{7p>SqlEkGF@%>57b;x9$LkFqd-1jiC(R+UAwduP913nMytHO@(VJ~T$f3{RTssEkjo;zH5VnrY0o42 z$rc=MzCWv$`P`{y3gFT<(+$1-2GvzCq3OYSHQ)`#Hd26wLTPt3%W|8sXyAgcYXW_g zH^!^u_E)v1AM-U-vHl#587M18d+t5iOTk5iu#}z?(ZxU4#9?Cqj7FR{()SN z%mn;4GfI5)AN}M;dj#Cd5SEuCR##FY&*=`43gI>%ppK87#G*>lCR1h6M#0CtI&8(* zx=TGkig)Ew-me)j;>#gZS#us;2=Z%A2Vk5(R|3>mAPGTh)A{T-zP0d#Jbsq|Qk&LzcEZ6@X%IUtNn|oINKtyZ78meKKMP z-k!bP&*1YTaDU%CH@@u?bcJO0^z{431Ys0n);tJFs-TZ$Tz^Jy!0N9)j>uXiHL^Qi z1{`}x5`dWhtSo9-0dYYS?F9K9lD}h)!h}4F2ADjAXTf}uo_}MR+Xe-8Fqq&UYH%R> z&t)0oMr2vv{9-j@6e#r*Tupd(XZ$JYhc~3D_qERnjNvoEi}|!E z!-Sd0+HcM^dsSGC87#;ohpWp=Q2R7boA&CN7hNs9w~914Bc)3iNb~WbF17>~S?VSO z$HsHa7EBX)0ESI%uYR0FXsO4$=ogKMJMb2kH7Y$?N8>~5Fq>nBq*VI&I1F7xT~};p zgh6mtqnTzB82Fsv2b?2xb0F@U2iHN$!MgJa>ajEDzv|W?E006di!*@am_lDdAVdcI zCDNBA{2K*8S#DL->!R0(9xl$ZKBYwWG-Z@MkHB&7Wb0@}>{{ot}h>(+~j`N-B?xQF~$}7xcC4KE!omB0n})+zqs&oan)CH;$}q|CNM5e zdVaYH!nF$?vLG#lCRnhOisL0q-)GmOC$c&x+hU4k>h@_j^XU=h1sH5Nflt=y17jcYB)#4Qsor2px< ze16|KX$8?64=`_!O_+?J$Tq1*tF0)4y4=WL4Q@nTIWkHgs7ftiS8|ZR!ATJ&BD*ja zSLwW+9svXvmdsPg~v2XYm7>JtV^jQvui^ zPH^G^pUaRVrGM2Ln^!gY;=?zZ^2GDt_s0}?=1^4rdq{%f%V0>J+F9`oj?B_KqzoWs%)kz?J-$^n$`)LPJ|(mLKf~)_X*tobR~eu}VCt?Lgy0E4L19 zwYy~Q?$nzNOPHgiR{H>hEH4wT;CZB(&~g@5rDU!Z#i>odl1<+-17>nResD9Apwz^k z?82&dYw-^rD|^>u+7ZO*lCWSA-QXTsJ8QSaWN%Pt_1{IgyAti)@8%c$|HL|mL4?|_ z-O4DTzr7r+l>Zv*I9OXao9H=)yY1C^DNP+?tvpqBr#3@q~<^ALiDEc4|vatO#{+0Cw`w!$jd|a~a<<_PB7N^r_ z-*t~^h3I8HGTAfU2Z)kx)^uI5!JMA1VQi1GE`4Ko6X-`5zWi|odH}ObdG3x0@#~8S zLOzyQ3vWVz**2?%bKP76j3EPPgz-!S(NHjC5@fdcVGzba6V9^BvJm`rw(5=@)I;J) z91_?15Qg)1g{|QzhmqNoim`TXFT-=6wAt@f1#b9KKi;W{K#`5Y6<&AN^cdT5SnD6- zYm#1l{7Fp0nxbwFB_ah$Kl2M-ZJY3jB&zvz1jp3oXH;TGrxblJb!Z?YbPN7{dePC3xP@~wYI3MYCEddHj(zhLY!AgmD-TUNT zz4`-dUa91+HO70oQ~<)Q9Y&J~z-kSl&6)lZ$n}%l1ORdM-$>m)$B`W* zO)8Lskqy|KF@iCYrf z4Gs!xpy?YnT`w zT=)Zqnbx+c-?F$ksK3()H%QsQUL{EA0=AUg>PQ|jY^@Q{4Xsbr)&UmC!>c;mxq}eN zT5}n89T7RoKxP=rUo6Am_j3@u6%c(_R6*nFbOo}zG|Jpcri5A?lV)t#*4f>HLbPik z#HW+EwX)CA}hiRb_(vd)Jcb17Wv5|0J8RUpW%?p*f8mKv+nxviLuVFjCS$Y?YUDuuW0Nom&6bqF+S#sH9;N^BJ%-d?i zv4Q2#E$F6rAaTbCLw*)QzT!$u%O&$OTa-~hb4LRKcuD{E87jx~#y=7bSYTaZaFZP9 zb#qN7R+l(A103#(Un_I2tw;b(o43PB>8TWCWeglIL6|63{Wn7df4j6@!bl)3$?3zZ zu&l=|82T>8V|LTSUDpBQ2Pas>t&kfi-0o@K0dZMvf#b*{pXL^s1B%_^!o=B|Ls;c* z_}wm<pf$kYEjNru|iq}1|6Ar(if)J(PNl&F0E|# z@JS-Si341KGv@I5*0R`#K{nKn*eStp3im!y$PW;J(z7hFBw*t#viEkT3V*2+qo_mR zXhLWqSkaP}+8jMMbj&8&YjOJUc3gULDeb2vPhfGWn@)TBeu{06&@1_Y5Nt8bXr!g} zsAWbO&MX2ei z{tvM!JLeT;!?nK!^kuR8V18@Ttfno2>M4qun!EIT38#Cji$%c1@%*t2%bnhWUx6t$ zkDCR_pn@rj^Mn5To@N^@uWCg8%Zh>Y4p!psEtgHb6UM*z*XlSY`kwC;4op9OsFweiA0|-F?H^O249rmx@ z|0dT*^uzIV41f4Ln*^6RPluaB%9fQ(OJmAC?|o_`-8z>uZ(X7_gC7^!a%$Nf8`SF` zG*LoEp7P77(XtT)P+J0!-zuy#bZ7r7+7xk$g~EwL+riRA<+CDgKwXBF7fB3s_XkEJ?Nhn~n_1S^B%~h? zh7&?LLAc9Wk*Up~o3Mo)Hk$ulkd|yO>$pQM%quka33SHIV!)7~}>+ zpqKE#4CSMic84;Q)D@CnhZdStQE)#-orAev1&+lR5u_6=$VLuP27r8QXBSx*F(~N9 zDZvSIhL&jyFbma973>yNYoexKr*tePXdt1NTH`Nky3vR`{xnN!u1n}Hv~LK>jZLv9 zQ!>Uv{m1Q7EM`M!JyT(KS7b|qDu)TJb;xkf!A+L;1V?b46sd&8lu}L6mbcd%m)klDPRUxU`bd> zD;>0`?TV=7u(0?RCK}z*H^z4dzG?4_c}_Q^4Z}zarYIck=h@A#0?jKm1nxYTLJ|sH z)q(BeYYn}o;%Ws!97gwbJ`10i+m1NMgkWL8-dnW!ZN?nqJZ&-iR~Eg^`$$n~RWn+g zl8!;sl=4+lNC43}^kJ#-SC=~XvP~LY_E-wkz!Zjem2u%rg5c=hnN%(8cGL)ah_BgM zaKJ($hZUF+IrOsIoKhL8>)bS8qCs3lF8s7b)02n{up9{x4b>Cor+tTn67rlnicADr zKws(WiXoD2vOZ4*E?Gyo{esped9z~MH}|dn^bTvIVB$SQR%zww$jF&DkheiOL|4ko zs95Hi8Mh|j?37Pv2B}Ek+MBKYxC5hpH>Y&@7<`qNi-RZoVirhpxtpw$iyLi*%)rFl zWvfaNBS0U5jnCo`Yiu(dl8%1#N9}T?!cWv-VH-=u5zBo1aIU(zP2gPUTmwPRRFSCc+e+RHjVQtuwYlRb1{!xAhW|Alt(?zG5G&~ce z(bDxu^u>iPQMUVbQB@Bqp^=7lxUxI;_J#7!e#MK?0of&n;2hx^suH71hK$(q3&7 za>3|Db>vTL$}gC^(OfpV(Op`Yl1cxG`Tfxi6F5tcgNFWDu0u+srXI4t&d7qy#8N!c z@v?HCSb^y#j0sP3q7$3|L3l_#WkmJG#*mw2*?SM=eU?3+SDEe?C{WGYz=P%mY6(5X z>ey|h<#@uy?-=R1n52<+k_M4;;~XJE0)q=e3M{=kMT&}?F3kqB!Zt{qFkgH1KT|vH!Hvyser$gg04-Whx3;gRFTymdx zrRcD!=;VeF_ir_MV+usDIv$Vy^c@p%KH3-!k)S$E(gZ>jeHKHpP}1}&5LbJDjo?j9 zZ%Z{2M+P+X%<9mS_bJvn&>1OrCf&)+?+z;V?#9@JJ|Ssxs2(#J$oBC%My#O zjB4(;!YpD-p22kAeqrLfYa$6)rK@m@uyVYXrvi9Y+wx;&%;x5n$UveAsUX6?uOhS@2 zOGR%Ynilk;j-!%t7UV^!%#c)t`6Y<7GCJ`Wd=*%M$1~kHAkOE*+U+=%rJv{CCE+d*k zp;XKhVyE)(!K5A)Ek`d%FrTQqyZh~4EG>yM5f<7|y*U zr#=K2?T3Q;C{1<~utj^7Gxa2mH$ZVnxi_5W@-Zoo#LwCWwlcH-WZ|P7BL%YS(600z z=ywzOKi*XV%<%ltM|>P;-9pl;r@K_#%AaeiN3+D7O+curQ$x($0<|DH3Dfks{Y9Mm zLSFAJ&x02ncdWHfb30>|0$4W#mn_!q?thvDQ>=18#!9+I@&>-GLqMqY{e&xe3CmsX z_hP7r-Fs}f>JgZ;!W5IgqJq5Swym*Iu#gpq*`&d)$RGL9T$!UKe-iUO*W!HFyTB`< z*uV>R9J1{i?$NZjYDGn|OK0=dD{0$41(^nY1jyIIy6I&l8YC@v3XTzu5@-<=_)L|u zifX%pw(6`+)a=94+j_G4ayc6y4YxXa8X@hS@#k+c1(RsxBCKbINqPq`j=eJvH*WU# zz!%SJ>Cl|~k~Pb#-4Kmz(RWLR93^S8sHl_0OhQ~9hbClsBC6F`7&e|`Yty0ruHVJh zQVdEpq*?mhbu=CZv*bInLH-e(5{xi^&Ky3Q80kCGC`4z^^}ahOyIT!!*9snGn+{T*e<8w79ZEOsHJWa!EjqJ7JDRvL%$@i&+7Iky*y&nN(}b}Q(X`U2 ztvZoI+e#!;P=H8(c~nQuMDhihYvfqqrbNLTo>(dRtsTlGA1A$$ZRXUA1LUEN<(cz8h`= zbHxu#}B3 zs?NPFWQnpd!ac9AN-gBU278t>s&GL1VJ;tBZY;iJweDrUMCmwvWUXGMdXYo{JA&-( z!zk<0ThBg$ULpT#~&VhjP%r#!Y~6rXxne-IeaZpSJU;K3zaNC-sCG4;JYduOf1g+Zm@C8=!1Uaf_sXs-Cp5l8lGBdw9F zZR0Nf8OjZpw;f03!kkF#e#&;7Tu$`^aPK5wqS5wic(_ ze6H$|yBFQ-i+^$#)qSUoVVR6iZQUY3imhS_zOe9s?#{JLpdix{&W|oK zrEd{fe`>iS@@)Ug?XD+ICM45)DgZqv4e!TnvRb-)dx^loqjvUCa&~`77xIP@b`Q|d zUAJluo&@pUpov~$YKXK-9o9=2?_YoWHG37Jb|`G0Ds5>V&0y1fT(Y=YJVb)wAvz)R zLATZ2!R4%B@BMlXh4*Eg0BV=Wqdl-|OdNF;2exKPH9V|TDJ){Yh&yYCIuc?y)rAqgX?G-rd}pL}nL*qb{$y$3qqZ?6+N zyL?9g>`w7q$wbPAZI!5Sd=b>c52tw61U`Dn&+(HR*kZhZ!h2-a5I=gqb?wH|lh!Qe zgDyYHMI2Nmv=Sx*VglY~-nCC6zF$xmC6Kj5djp+kw1MQsWEr|g(E9fvu~tI}U@0j( zAu+Zr7uPan7^fPNCwV2@2mxZwg{#bG5;B{1aB9h36CLRmX;$ z`~JX9KJJcgp8C^Vs{ZTl45xjweiBL_>LCS$cUnvo3|CG>3c+S0$c-3p7O6p8F4{bO z3TJ;VPa@~c5;RtA8n@qw;xz0e7f*R#5z0J2D+~Yd7#XT$Z<}ygRgoPBYb;QrzJSWB zXdv2PV-SG(5&-r~9aI)kZC2cRI4X1EFE5Tf?}bbriL=kb2=&J%yap(y%RN6odPRn9B4Q*Ab1wr#Vqf78t;h65~ zn|2j|%E~7dRV9vc895n>3tovzYhTlQ5l=ysgQ`E)fg+Ct8;EalF@^z+mm~LyD9!qI z2(cGpirG0q@@WpzMvQeg#QKd^lP-$@17Ij%$G9|bU6_(+RUG{Fmox{}Bp7u=DkO-j zZPZ%P7GUP1nyV)Jt!3=Fa7q=HX!5xF&H-%jHQxz0(NW3~w2mFE;l0ciL3}ZMJO4ml zwp^RR;Ptdf!$g=D205(xO@-<}`B!%LWj$}iT~j5-x8J~$1Maw5%@%3BFPM3rgfm&m zn%Bv4ap7YXndbHiq58=wN0CP3vn=y-!Y1`K1!Y0E*oME^>C2^sSJ|T#=iSy8O%)SH!W?ag%1? z_LDI2tJMzGDizhbbEC0P>@KDvmE^Etboox(cg?RnZwoGp=Y*Wt8nPTCd_N7%!_K1= z>R&Qi8iCT$lsV-LDx5+f21nWJwIgwNtPT08$e*BxU2qaFuu?%3p)vfUCR<$OUAiKa zlCfSzV3t$2ST1uMnuvCfnqX1GymJIXwPt^XoW?}}Yx5GLu5w`5+LzfbyEHu*G{*Pi zG;o$yIz*tV)9_KDnM-P{=27+HVZwKtRn53i<5EfKyryPjK>VA>I+N;xSxDBN`#^Bd zrlG2*UcNHh3MhrL1M?$Qxd(y#{V={#(p%2>nPr<`7G*HJ0)dOZ3JMb?v9i8a?g)N0 z12)qZ^3f-^71H4+Qdj*__8g&wGi=;xdLeMcg&h}babFQy&<9jrUZ)l%ENVU$5B2V1MH`)W#Y;F@Ban_H1s~oU| zA8|NuVUD%fUu~KaacvZl!e|@BJIue}u*G()48m=ME1B$YAV@%~Gcw5e5YXy1eo?Nb+;i5<~Q1j`$ zWKIINFrH*_UN7_Egr9i1z~06SH0|CCfX~`LeLb1rHi#TfJCa5}WAzi~M4o@kr+jwr zt#YvhJ(gA@JghW}Rwgt_dGP*hrVjC2F4)&u>v+D|Qj1^%dv_X%T0>G~9YLQw3}7{0C8T z?pwX?A14A=E8Zs>dUVpJ^*UzY&5@oXMGe~l2nCRxZa!Socz}eXe%OD5+hukR&D`7EXn4YI2GK zxpLmtE}FPvo=9A$^Y?VStb**{iQJ)(-0A>n2@vXsLK4UFxQ8AZGs3*CqVv^MJr_@$ zJ_F49AE}!{qE^z^;%gxV8$UM>9)@9}+iFSzB_S57`Ut11|Z( zA7VhRvI`t?Q=?GMaalwi9%BB1C6hTdG(L3g^U=d3u0gz<21_Dc>WcmOeZel{bQ)qA zyzq#8nNlr*U`ZzFcrOrJe_L&alFbI%@7H61wS22Qk($Ur7X8|G`aaMe@z8Xv z3YR=pco+k#mNiu%n+3W*K6mb9pMO78ig;?)y7QO(5~bt3M$O{Ap3*=m-vjP0VoBz`Snp5L8oEbq?+$jEo7n?zd

aQ##xt@cU-BTcJ!O`JWCh*x((zjf?HZPKKuguGls z5Z&fbPC|aHmekNl^2lgK$iP~uh)qv_+rLn2_J?_B1$U86)12ojkxz^%`xR1J$9P}R ziGS`MRXXx0$UIHTVVt&{|E#ZWq|Jh~j$+OfNE0+97;yw+AU?>^V_vzHhgou8IDLvd z1*3B`VumO;VmGF)c1E;y$pb)c6ev;(4vUiRiNuT5g+;Uls{^U-lxLOH4KF+hA-(md9ZvS4M^1|witPSn_!?t3=!$T=eP|+M&Zmc5$6_Q4BT%K zi1TB|#)BZu*3oNZgClqS2%3FRW0X+*2fhNC9?_Uo9(9%^O<3dBaqn z`L9&j*|&KmSiK(zBZl&%5@COJtK|bQEh>nxOYs_E;ML@fKv}=gBoYs=teoc>R{Rzu z+}lYD4Yj=Qr^}~IF`PMn%tMY4@QxevDP#`Dsg1|=1#=|a=0l(-Pa#8mieg5lroN&h zHfab++7VibbtGa|Gec-nBa#P-C`4Kk0bKqy&x#H1(KkB~=fnZmE8h!=pkdD^vO^E! zhD5{J$M4HJ+LJrm+A}D)hyWLh$`z3lj6iYl_xH2ri&PjFSbj~!lp<4QLa2SNSGeTz z6l>m9Q@J!n95bx0r|Q#0qdiSNv8H^Xoo7KuZfvvp@UfbY^d8a=>VnmcEjxOCx>`Q_ zpn6w*TYd9sls0Xq!QtwUH4HP0Bs*gHF&>ej)6v*s!PAOE? z`!^3we)S#~BxXh78q7H_E$vjade8%Xu3bjSqN#_G)tCaA(Ft8v@+MP*g@>QI?nE`vmWJ~)$}#1W%ijm5Ry8w`}?4QFZ( zX!u5yHU2q%ubMj>Wc!>~{3BhkE^n9&A%AA-9$f!L=?vs9_uYQ=TD_WvEx{G2d#qPX zB}86dGC#n_lz9-r;Bj3)U8uS|454V{&Cdfhy);qQ-!d4%VDk_SFZ}7y7wX0F)ZTK8 zU;)!Lmy=JeI48xKn;6b7%=uF~|@WbW<3bo3^%)pW42Ta1Ux{e|DH?-Z8 zXN+Kk`~-jbOu*%t_0L%l?|P|nX(@zumyfncalIej=CUasiJ%65)vh}?Xt;;~YEW$F z#d5G1U<98uiwV`~!!rcy@ZxY;`2?PzZw6pHB&8MJA%)YJhO&^|t=sb3L0DSo%iOQC zkSAcziqLI)aN~4(G85S7MfM@W<=*M_XNw|D&w9lN+tiz?AEt6$uZ~5!|6l+Rj7ANl6V~AwTH*u8s=<+lj}Feetfk$ zzTWjreSPqTVHBhn4He+Bg>;^N*_6EHiX*E`5(x-ZLigYJ{t{}3)Q)d9~^uoHm zlE_oBj1dhGAPoZwX~C-irz?&RdMHvxUwD_&9J~+a5Y}?$=gDFK%69uas=yroE9dNh z$1rj`=UFie8JTTYi|)IDF1s@C$=J8TU`In}gKuvu!5X~| z=Y`?p;@N}Y+aD92bEM(ubTH&y)P4G6LTS&Qf~&YUTy|}K;~BeSfBA{w;ZY4E*fzz! zJ@;ne&Ww@IWn)X1;p1aumz?+r#t`c#@BOcn3ei58X@)BzsY{|btfoV&OV#zi1GFnP z=>@uWGbnpF8_KO(Cfm)xC8AeR@33e7e%<~yHlViR&SIFhH#lifTJ!CS0=7rM;KS-P z5tM2MWL!GzGX_Ob>}|GWCR{`|RT|sG0WqjxoGk&&vZXD#1pWD7+l-Z2oB=$#dv&CY zIp@qTaou3i9k&yHcU8{1X2Kd(S)C2$SME-(ICkay+LbB57!wNYF&h=3+6T^Y>|*8~ z_>NAv3oMS~f6Nny!i?8>%Ol^Fu@dLMa|rZc+4XT2$%^ ze5pP7nY;X?O4xLIu@!OKX8TeCxchz49B|SqEwjI5Y9@L?B`JE~mdGP-w+O4U{LH%_jZ|ISe4`CFN zpo4Z5UYf`XGk#UmzsP%n(uX@>Hc}?eg;7Sg+2*v1U%?HQ?@qDX_Fh{W*5TrvPflRH zd|QJ4p2w`}7~2JY&31`ed#}`>oEKdlys{>5OD<}bJugK9s;bsfM?#)a{6rTL4&WT# z`8qDtW#t^BL1;#Q=%hUBj5$p^;PBRM`7Fi(n&GCrDO#NKm}=m<7WvYTR>JorHjIO2 zsqJ#SEDrQZAU29zYG?QrSjY+gB(@31V&-=Cwfq9y*_IMU8+I zs443u72+ocitp1T))%CUJ1d8e&TbR~ek+>c8O+*FPjweN44w`~A2WWB_n=f-nHB&O z4$y9!;X76rt8yfjMrIasdJU>ea#v1Z7e=7A)3oilKuC>AuT9(64|)KWWkhCTc981J z)(KygL+m*XE?q9mdh$@CXrpXL1CAV`2kYLFO|q|_qTFn{kzZF$*#%QNzbsU(;v1Uk zG%{+#Eg-KAxokki9}2KP(*+C+LV9H@O>H}s_tvBMy~5sPwdttC|J<8!8m_!pu9B7K zP6I=Ql8CM?&@_7i1#7h#AD20m5-ihPR!Ybb9~P5FO2nIZ@cZ8ku7*oi<5lgS{l-f? z%4kQl#-JEj(!Decx1G!`PFtSsF`o?{ieFXeyp=DvFL>0_)>S>$lHIR5*Y&P#TZDw~ zm?N!q^;idP$dB|=M0ciarHLX2gCl>B>IBsd;Ek5GYFfHX30&3eN{`1LF?XuDvx@>TKUFwgW!$@bvoDQBUt%y{#p&JKm!) z*fMG)AgGA|)T$R>lqq}aA9an}Z?~4O+~vttsuF-5b5{Rgj{%mw2UD1Jygv`w z{}nK>`hPMv9B6fZe(5)J)Y19D4HQ+tM@Ohs#KtGYl}Dr}WT;ihM#YrnrYEInC8$Ox zrYTia#YU+lq-jQJ%ap{Fz)Pq`N2V3Y#H6NVq+}QOfdKw}3?hX%&3lWk+q0ikrdC7% z0KWgPWBi{J>Cn;9>sZ^^Tj^Pvxf|(N7`eIF*c<)>YgDTJ+^kssNo}%o;9UX2&K>Lm zp@W>3XQ+;vUz=LWBSRL>GhksjjaxvZ9r4+YMSQGs#3_Sg!N<8ibi0Zog#rB$9&Uxo z$xrP(p0gI$9O&bbJm3L6AH4JkQ~5Fs^2Z5=*#A;;O@}{apE4FDz6jqWBX-RY5%P22 z=@}8E;RwPiKLZs&?Vn5{AtJkqh^pBD zv~m>v%cW3C*6tAYsoDE{G#j~yGmr~uvlIQeh-i)ET`r9dc!}a}nlYt6v>wN1M1-4? zQtBH}gZ?=?f(=eH=kN4(z>?r97$+xG0zlMh;3!GhkcG`{@;8Ok(Pu}XS9LU1oAl#K zrrnw=X&bR@q&kK--EK&%K+hRF!9O(d=_*8HCoV)DzQ$J8ug3|Sltj}J+&6TTtP6%< z!&^gsFN@=E@{`cFEL?EF5qOfPzF|colvw+g9S|cw^ck(9CXuAqk3Sslt{)q_o`2mr9Mr(F>5?VClJ5Ru zPN)L@>}crbr4g%$wK1Uj4cYqLY)?SYRR0VpFVkOWi@(s7{#mTHkwL2LWGRR0d#3>m zjWG;iA;=@D8@KQsWXgt01IC++%a(_-tUQ)Q-PE2b+5LCsz+c0!?)5a~WgDfmnEjru zx3nK99XgY=#pSX$Q{PNgqX(LL`#u}!iR0&kKvl{2aLeBKCr#L6c4b1Zi9bP_xg-gj z{`iYI${ZoT@MMvBXTPteLpp~9fdTF-{QM%Tk^HWqRQLkqt30$;ua*XHhgKcA)3aV_OUlqI%qi(llwJr@-UEsge@1^$O#JtpriVKr2z> z@fN@vxsP(ZCt$+6T1Y7tI;pQ&F*;+zh)hc31fcBMPlVArjr!>ebzV1k|5fi8yZuJm~M)CiF|wR@tGEd>dFvzFA##IU$+QCKRbz&zIv;I3O}jre&H}pEMk6K0nYhr&vka%qM{zx)GgkoQ6<|`lr6s@XWq%u3CI2Gq=obKCXy|D8hy)Ab_@gVV{x~t zI7Gl-9NJojUQHjCa3aFje^PBPu95;$(r}eZsv2KK!WPxx z4qHRr0h_s0;l_-+OO2|!GkH6ibJ^GPl%E~G9g%9;7zp(J?bJOiTUZ`lOZ<8oyBU*~ zwpUu86u9jXgwvFCJ`UXv!Z8*p-^!^ZHXX%F&DJEJ62Omc$Z3|#g`zL8QU%^1T(Bh& z8U=B*Qp2ZbuA9M{5Y*!m=?w|2Nq)Kpz1`ZE<|k3a?pK;Ybtt(`;0q$Ixa8Ow1mraf z!x8L+j;t`diWZMaRhM=>6Sp@4hYMO)A$7@@dYPn8*1@N{-h7N#J3GG8)ZfZ_s%!#e* zcOTHT#M4}`>nGxmBq}63*$eQN zNIZ{_SrreIpnVVGDwY+t^ngJU?|1qp)x2Wh>LvBWb@KMLO1?_Ca+0?+-dmf2*?Evd zLz8I&$IBP7x1eS}BaU%(d9m1q^ z{x8%0uZrswdC#I1LrC6R)d!w)sHR7qTHL0wx#0`_85`y=(HRqfA3E(R^NJ@axqAWl zsG58lZ%#dFu4$_%y5B7kI|??#*55J7-4N5+MqQ2_(3sc(zXyENYY= zAYDwaV+8vf0>l~5t(*uohi3zHZ#RsBdV#J5A|&HGjbzM(=#X9j6t@5?+jeY5oqG5F zvs1S-6bV+((~J68ljIh2<(O{g0p}#RlP#=-l#7Ds06h0YTa0i?k4t8R`I0PEc+5Y> z$ZspmJsWim1uDsUV3S2lv1_K^Mou^~oT5j(NDOQkT3^q%Q%%TF8VyPmdTsmu6tR+z zFMI84u}^R_3I+xzf@()JjkL>=HJuK=ik}o^VuwD4qn9W+U&Q7XjWf{=Y`^xgXCVI?g-ZSIq>=A!IQc z!gqrj)%7ZVb6dKDENLQ0L0Z_Y)M^+9!0L`csL*7^gdk|&*9C`_;7lOU9xhmCwxN9@ zOpt8OHms*V4(Ho9FdpeIOrAwR14t$?LB$hEEhDV3@&XF(RYAPa8^sRV&Apt;}#+!;W zY6w0N)a(kyX)eM+K)5%jseZkul+)IO5uvNch7xC*bU&A&$s8>T(XJY#HI6I{mK}FU z6&!c@{sCY^#09+e&Ghv{i06XtqI&}s_x&9ngAClsFf{cbA`id{uyl>Lo9le4ZbfEO zon@SNcY1pxv>X~?xsqjNPhE4zvQ#3)}iozt&JH;eG z3q8d?S_-|?PYQ!rCH1qmHEtPmaqtslHeS54>v71KR4q5ei2eupsB{EDK@o5Xrq#<(T@+l>Yh@;fji#Io7OgTJemu z06!1z}m=DU%v&#{|S7j8Lrzd@%yn&l?QUemME^{Ln0bEGbdA*O#j|W`8^wW!)~evL&sE8_&X3mfM`f4XGa z&hsHJSQ62>p>4aubRg;j9ZhA$d!aUGLpjMWM`$;?{Ay z_DcpL6$PNOCLXBX4FoG{3rE@j`r&==%QQ)l7}mik-yyifYTf1Uvc9x1Jd4auPr>Q} zo(Ix;Hy61*XZml?H9T#AKixKJddhR!6|N#CSEA1qBU?d;`B@oi_7oQHA%R*58EU@U zOl0@HqgVBSiUnN@?4oeb{7gEdGe$}xV>R!q-h^Uktj?==dbJDI{bLr-8Im*xaovOzaYi|#_n%mYV zOL%Smt(=Xu?@k3oU#fF^>At^d7!!2H=&nvIU0VKZ8+AFaVpK8OPDpC0v|9Jqdzrp6 z6=Y~w&7u{u4*CQVc44UP-CepeE^aM|a(v5fd5Uc}r_xPV*wmHq6nBaDE?3)>jdaU+ z#jDvcX;oj>sg~M|86}=yXT@&!rdKO$ZxylP%{^O@M5u2+a9)G`aWmPU^mivGEnB0R z-Bhl84ES?&5G%Q`kaX{OAcAVePvhk4+6jyMO3=WH3;SNEY7w|B!F$e+TEXfUz*Dnu zPi8>S^}N{13r{OOh-GXygBPa@V=2hB!97TB;wvj%86iH-i(T@IHgW*Vs-c}V8_r?| zrdp{?ghJc_x%2)c$e;2X0t&)k+h#r)6FBKt|E@pI;B0R4FRmN06YPFf^N&9}__lyj z@kBE4O&11%S^ZYeGK+nqu5@gdPu|-`1|a%^DRxx}F@N>WKK>&Dw(kcAYWh=v<^H2x z(ZU4)5d3fU8fF~>OCvpNCtDpSYbOUILmgu?Bg=n5L6hp5Hdw5v|13GSIb1@JhKpuv zXuxiAhjuDz>N3Qw(zR;PCm0fY#z7fz85HKV-(GN|t#Zw=hQ+;*{1xN7?Axv0f_01L z+J83c1BucgV3(J9w|WAK_TV7dr%@t(Z_tS%9dMDV@JZ6R5QO!nOcBes4hIhhEk}_H z5@F4fDO19=1V{!I1r$GsDUtxf$(Nyd2W}dI;Fydua#7+cP!w53cGqz`-12Qv#)S1G znCXo@vQOQK4J)=fe4V8j#R>D|M^Q8;5E0qE9Fk^g5`O(kpP4!Y0SMd7i^H;=%Bw-` z36v7=`x=n-SOVP7(p+A)f#U9GB8=_w^%c8I$d6OT$+6z;nvDYg=2PXbbo(V$nnAI@ z`MlKb^hSiZIKRhus7GX96{#saQT>;QvhD{f#g1gzjg}`3QZ{tc?>Kpj(|j4~D}qAI zRYLoUr_??yszFkRgD!F5A$w+BIFG=Tb9$=&Jyjo&+Cup<;o1>ur2tWt@<*U_3cT%r z6{unm9&I_m4kWCA1jZqrG#&)_5e$miCoENo`y*O}gK1R}3-EwNhF(z((*pAvDlc&W z`#VcmISw+jp670TK81gTXIU2_9O^H}5Z^qmWp=O1v zuJ8=;36M|yvqmG(fkO~UM*POmalhNa!$9U7Z&~;kPBL$7@F{u>;WRnqtz;;7s4~%) zq%mi7%zC{QdVyp#`b@zrz4*Zf99}(ATkm}{2D=_AfId@9$w5=Q63dHALk7xWfqASH z@jjGV3czDX^_hhR5?C53`HYrT`N>`nN_-EQ{v}Vv6u^l67d=o3q=$5~icAzR?e&t1 z=R*M$niSQ;T}$!Y5f5gC{IT;thYcP`hYEU5U9`*38)%m@W=AMe%#%A-%pGvDvZnQJ z@=Sc)bcKoMF!5zsW{u7kH3bOH`+4Hw^FQhb4HR&&vS25q!?Y%gYR!3FWN2gE^GoYH zTOB2t6vRMkfbK8ClwZ0-lkdw-$U1^Po)&}K-CMmkZQv4s>#qCI)1^^Ke)G=5;V+am zTj;ga7lF@>C#YXb!{XolUfax~riAtWh4W{w=h~$DXrkJZiJB9IAxZ6;%G>EnpZ zT)s4eDsA(W`z+M$ipL!c8HHIUj|)ar>n}~!jlV4DAl~W$j6btQdaGh*q#~cLD9$CJ z`){dU;H7r<4PCdhKR~~sz5T!TFZ9?%8lQVzmo~c>;%b_=9#ZgGT(RMNyrE$$6x`Af zTZ7bZPGk0{l2dz7E#SrDm=*+xq#2;bO1)gcLqVZuPX2Ce_7kCdqic4r2P)!d*#-}KGJeJJ+-vZ^V!qx49riqhdca*_}U-Z%U4J6z$xmLTgt4h6Dq zhPG&i{?iO8??MQlsjVp7%`3TC#llM8gpiGwus+&9EC}LNA^et)v1Fjgs%!lIyh7*U z*A8{Lh&s=Lxg9B1eloTD!t(N~pL3n8w|Li)WiZDFg7O#1ThV~?Z)N3HWwOj6Uw{sr3 zrz%<5V%@_F*m>H)=SGI-jv2zWf7rP6D^Zx|$(1LIqGnC>I`w6rTaP9PvocDyoWqN^ zJhx^$gMFv*n2H_ls;;h|q;J)@qLVY<99CbnCx<+uyr$M>t~9?do9d?C>Q9~E&WIURkFuij|c6~H!2lC#o|Bq&?pl8tMW{ys8#Hui zMXbp~<)MR-q%bQT&Oj7pvyWAi3s;sWQ)VJO8X!{F)X#z3$6h8qt5v(5pIS&njYCDl zuyc#rYVtTjkwkROJL(hE{As<%+CKTqkIinIFi~ep!#hvlINu00tWJ`#11Iw+L-@ta zYJ_?M%*8_V@%niiPN3d4d`ic+3!G8I+Y#?jS5*)2vdo1^Cx;LQq;~Qp)%1?&aP3(~#>CqT*bV8Ad z>`a3;*1V?JV&n%7bY~RVMm6OD3o76LG1{5DvNZcgj!51Bf1tTv|K(_B(J`_%)w4D* z(y=$P{8#$BgQ=PAKlnwVVsF&Xy}9$9QY3@Vd{!dSJ)|LVZa_2Fcr~m>;pu?bRg4OO zGh90H;hZyw2qwKCc!`v;!|i&K3*7H7cbT#xn-SB-rWY>Ya+4K2W<`6Rb~$y6zIuM3 zfTWzpUer~K;?iKmp@os2LtYM8A&&XC+`=Pn!ocLxiv)D2e_d~%e7h!LN0LY$8(Lj; zMZ8~4BUvs34E(J#3~!=UxW?yMjJ(ajYCMK=EQm$opFygeShd9z7;yQj!B28`x zC2@yWp*)Xi@j%fn){2Dmp3@6U2i1eoB(hByM_z!ZT{(6gPUMO54S&g!OT9+Kw~o5< zV9374tf0VD>U6x)<|&Z11N=S?{}AG~WXl$6`FyJ0@xRnC>#+4)6fHc-xv*_dg?T=))oJ*kq^7P>H6 zvH3R|7Qm9N>3h6k3;%=qvivZJ|B#smHiky7mPXbNwEuWj>6sYO{vXQF;oqM>>>=yF zP+t>!JzG;9Ya=rgQ+*rze+sMdr?A%kcVQu#v1*hCONeQP`ZWs})uYInJsYy$#;8eO zEI4btyvu;+Qq6EHRm%2Hq_Z+!2hN%f;5zLZ&Mft8Zobn?CHM{LS)Uidl>7~oh5&Vv zH(uh8Q4g%i9m#iegJ`iTxLwGi6Y2Dxuzts-e|VukV4w}9?4-7GIJk?^dkHn|xjvgv z!OgR2AyjMPG~}biV{D>n^QY^`%?w}^iL&G+K_uESY*e9!csT}HQ``6}ulM{Q`mQF6p#60u&K(a^B!?`}7 z0}-A0q4_}%|ORtx@gvXz7ZqVgY+{huEI_8;E-|982u{j1!p zjO;BejdV=)988V=RcuPv7VBd0pH8eci#ql>DXOPww_z zf2_NBb5{Yb0i%C4vHNXG3!OdJuG#tviUsynk}ulD?7K|xCl;Dr9hr@Zr<8d@^A+S< zY}@hNTmthSzTJV!t=8*4fjcT%T@16PD+E(l+6>TELqbG*B=mGU#A=@9?#GGA6paDJ z5tU07(uwmS2P$r3_Z&OCIt(!F?qp(H9G%KF)3 zhxy8*VO-5fouVWr6UCt=WVv#X8_VI8IFq}dlHjA-yMpfZ@6e_sA_l|G)K?}HAx7nJ zW3oWu;;&jsp#1n}W?_*ZG;$>0En2|`<(_Bv2EGKWkL9RVuMV3pc8Wr=L}3i$hy2se4 zAOL@|DzKRpaRi&4lggkVk3<>J14{my?BY&=W)WY%Y% zN!I8jP<)6L&i*@)>#p;jYIKoG|M~kaZZ;;-bhqV^NKOnMESci8g&U~VR?pelGyb&X zCnQ$!p$v}~MQ^zBd2rjg8RxRi-2GG&BMx8GoJH}nSO1F0=HeK4Zb#Xk|)nCt+)c((7EDH)r)b5=SLPWZ11*vt0 zE@P0-R`1IUExcuOL`T@$|8Pt^6~xy9;g*AF`RN1HKf-^u|E3SH|EmxDPfEtmv>8;< z4&M{|*FQLD&%}Pz^%wJy^*iR+hV!+mj5GJhV+Uoz>19%7`s?@S&nk!gDe~>b!Fs>~ zQdi6D&kLQWkM5gO+}p-d4&*!i!Mvh$Q3h(6h^56BXTC#aF%dsmCq$Xn=;Kv z7cG7T&W8df3{!+kiS_jq9o$@ZA)dqfkXQ6Z0m3t2L{GW85U@MH37bIw7iI4lBudzH3ASC#pP#r?X}TT_G{X}0) zKwHVtc~33APZ-kv`IUJ}=#gN!j0;JD&3$MXKNo9W#2>y*f1>MIW0H24t42`%-Ur4~Cj8 z5k)$8R4| zS~_i(`_GluQQ8VrF7iRU@T759#;mu8U#0s2wVr;{T-{ED(+=konk)zV(r>P@88s#C zB{y^Oz35~8lr=I^SLOJnNbh)0q)Lm=Uhf&1KC#PeD)F}(ccmBPFUokOI3-7S{fOC) zkZY!*8bri6(R-IdrgyL38H=knb?#8CT{{Wsy??cc+D(%M>B0@``dd6pXCMU$(e!z< zGSzV>&1-Kpe9)k;bu#}1XRTb*wsX5X`_p$vpkMzEWr17ex?9vfp#zU&ZR|!XvF}Am zcY4zwM~c1naVwae*$=<}Nq^Fc6Hr}b005EyAJKq;{{P7eT&$ff^!~?` zkde8It<`^WgQfq$W4qRKkmnl&vo5?^HlU9KtSEFI{!Cc=vu=nEqETc+h zOJrN@JY+r#HDrntX7w29Ji*=Oi80aW2Hk+-Ghhf?vD207BLyj7mR>#00h6HS->37F z6DS!mqDEZZGYg;t&XtE}Mu^(}*lSyHf|C|hW;fzSV2}W&B;b&TfTidi5c9)Qbk`rt zcL;ajJ&SJ0Mq1rHF~=S(s-wM%ISmojl7u+$5|{mTcmXK9Z-lG@*-P=j<)0tO{1u@Jb$SX7jO)Rr(X&w1X z|BENf`1py)xI^W>k=*R&LW16|Sz5Y2UEQ;*R5a31D5Q7&u0jcW{_Vp#!=xp0Npsx* zi`b^YjY9@SdNzibw4{jlc9sOM$yeHIEf(lW0wEAEgmF40Q>|GC?iR3+!hJwBo}>Ec zM3=@EKI%(qIG+j*XZ>G26oXYdo#QumkI&TG(r`RquZK=paJ>2uw3DwTSkQ;&44efZ zT(a?+vu@$0m`Izp^~c0~010g)Ih>?~5Ss?%qWY7XMz9rMJ~v0T=Oq+rwTF$N2A}JX zxASh5hk1oy&*P5DpNGLI``2TSOrx1%86k4AVSsSDDK#j#0-u%wC%Z(x3-_X76x&L}x)EY@T)Y+xnRvRcaS=*+` z&{C*A+p}jjuNM=eE>aGB#81c`CrUKI$aaZEvf>2s_)(dS+EI(m^~~us*QmBy%)q)ie!zj%*gV@TVSQe9TB${=-xRLF359PeAQvfu2*Py&Cr# z(5t*&Wr4hNkXE~u)fHqlq!Khf*cydY5z2M$gjiT_sUkc$(n2qG%qTFBXvtr8O^os) z`X_Czci-}i-^vN@1mCnx1*(}2phvz#G34V)Sxf^GCW&y4DV|b1d5#qh;%*;J;_m3g zvb$p}lU$%I5Cq|03tkuq$tVwsB<#fGh**V6DiD%ehF9J!6_I}NVu;!mi$fKPqolg#`GZX!p&cTK)w{nuXMN=l&3yt#b2@ViG${^DW3 zlH32Sf^;)*HZnJH{O?q2o!E(AUnqEymyqv)IF~0lZIlq&&9YrVVrW-QV2>o`r@I$Y zRv%Ngmu-&p33M(9iCfVZ_XJRr@#)y)dVR>FVMq~tX9>L13`Cq}tzq?yA_px zPSjw_D3>@sg%#&F)+B+x>!3^lglD-$*Lk>=@6ydv$;AVA*VA=C%{LWZZR9yXv_>K6 z(=d`FKmM^FGTAf3hKOy96lBY)Qhi}uFP6uB?BYXgS9>X>+gO=ub=2be&9X_(nDv5l z%`Y}xfIzf78g}*d_0|Tmx#KGrZHno`JK?W9xB+JARvHY|rg#OrZb4x;5vkz5; z0WMzlC3h*$=n!-U$GC^iM(YpA)Xunf8hPO_2Z@K5S@?uL^fKk-%^ zb4RPRJS15Vm|LMNAFe>3yCTn0hw1{UqK(*fbemN_@Y;K(ysBEm27z;@1_H6%eD3RI zW1txf%aJGVgvTRLQ5i}N&&+nzdJPWfCh0|+klYPIK#+QE5ylKl%R_>pa_TB_d1&)} zvFJuzDX;}91L?T+`>5;a?4}BwwhD5HN?3aL<nhH;JEZl5^cY$VcCH6<2dg7)djBV>vQ?Zy?Uq9Ng9}&<*EE{1YEDL|{5g|N7b3Cyyw; z0Z&~;@_7pmvXVG`D9ZLUY>nZtg$LUVecKBrRqtTJ2Nna-i%Bcoxb033-uS>lADh6~ ztmY;Sma>h=t=H73S)Kr6e08#VSi;Dyh@X!4fup=GgpeVNxc6;*PbLC})mL6C#xRvL z@}!XJM%8L^T@#CXmfA)%$S~?-RbTvo3dy|c?7#unR{rD-SG!2G-&FpAGa<+Uyubuht#3SQ_-%aZx9DDy)q*A2xwAKm} zU;*{_OW@z3wW)BjY0|`H?3X8R?sQ!!lj`vHQWuE#&2V1-g*HaNnYcY7+Dq(G4jQrb z5FZ6fC(RTIWWU5Q=4#@8-to55VT10BR|wbs*Z0f(VNkMRXRv<}heL8|Lp{_Cjx}G2 zi3g*2dckwVW0m6bl9@+gE9(SoCyw?fb%3}8gjp1?j)%KJcKE{!ud#%&^Ko#qW`U`I z?wxp%`?EG((TWyd8K+e`4j4{CX+0dZObe<6b30~`1h2Q*t=Yb6x-0}kQt$1rxpELs z`%-4ShGcmG;g4`cqzg$;e(#_ZfN0kem=kPLG0i;5zE*VcHU9)nPU?743=g_R)~Xv2 zDt`VOZ1$hu{07r~PUg2%rlI*?wJH8*&@;98#l`-|$$#qN-|_eZ*8hyh3%Dy(ML8`t z@%N(JJJ{@B(b#^ZT(*Qkp#ss&#n3jK5)~8o6#RUdh9ZkdDkspus`;%(lQ^Xh&8;KjC4*H+nPua|FH=3j@*Z$08&1^r`ppgSLi5%khdLTfJtu8`D)V6E2tjw*8JrO|P=f0xpVNyGE; zo#|)H=eO#;YP@3i(+()o%NaIf%{oC*wxZRA2ssPC zlf_QHbo(o6D*9zFSn#qBaH7MyfjXb;b*^BYW{d4&j7s#|2F`f9H`PVdP;I)@)^-!p zSGxhvn1(lF56bnK#YgYSs#$DxiO@xp8i-}S3aZGZbadCJl^go>`|<@$&M`bm6dBTH z938(vw4JGTUeQdXTzI=r;Hmbm5G2@wM_mVdoRop7d7KHlr}$*zY-s5?^CfdaByU~- z^{D1^!TJ4rWTMq>8#b9znB<$AOUHKGhS0U+$B$eYVr#JKapKr<%vfc%)1|Ddvafq2!!wbkc zI+`YX-!uNafMXOYJ|!M8evWclQufKZ&-vo&Wl*RQW&(Id7FGGvTKztZH`i={tJR~U z5@G|x-xR6-d|TV#j}#MsSA{NaJUDe z>A!f&8|?fV8`8+I_-Fv#nZ|5CQ-hgqi@mjJKH*`HADa(XykhRpa;L#3XWk5X2)l?u z(bS;{inoMI#0MvE*oHmhSBNZW@*OAtB~G4AO{vL)(r~0>co?08#>U0dJH@c*U!;< z9t%va$~mwCgGfej{oTCVh3@`AFDp@pbJk-`LxK;9Z)3RM8} zZys<4ty^EcL6DxQ1Bi9F&)^TLMX|E+)!az;H;k9JMpjOP@ zQ-0o~v5Ez?m&RcU6QO6@&}@iZcaT{Ctzl^>oeimWwCp2eMzK^3_)IAbY$fc)$~BB3 z5e6j@KKAY$5P&Nirr5fz!Oeo;Z&6eQ^w-|~_1cvIV>B)xL)_KG{7zOUVaWr|LRf4C zrFv-&_6C`+*PfHxk5&k1ARl7nhp}nSbrNAVqQBzdTq-h@Lb8w+L{)|>Mswng@E@}9 z^PZVI7Ha0XF2)=LS)?nbyGFr{$BFfr0Zv~OCz==77yJ@%vBQPLDU}kcqqdA%e73e_ z12{rO9knOU-gwO56w{P*mtBt@)mH*3OfiOSatM@^xs2bhbz=dL9m1pbyI=jPC=IzY zHi^&yOZ>%#GvI3Wi;)rdCC_syP|m=(vA3T+a8F?8sF}UYv}@JmljVV4ae^H3`Pen; zEE$2;KtDXE`eXWD2W6yy!^8Wd;-j_`tG)f%#z}Hfpg?FQGfNZx59@LyKN;V^$>`8fFc=H!g%%x-1`U5uu!C4` zd5k_BbT;(WR(urp&192)xrH^j4 zO{#bY(F6-?g)KQ%M}Sf|pyH`hM$(UAzo#r^iyWlkX)-wYN%Xj5ydUM#W z^JJ#z3Yh0vHJ(*5);FgcHy1>js`n^8TTQU8*D;hI*9L3`3bUW@WZ?Y>qR<&siBz)pT$C&Xb9xBx&BU75BJZjgEU=bmq?UBlQ#jLD5xkmL~kg*FzR_Z zZ0v9tHKp%h2M>PxB{eNeU;=moA70|aQ6n{+@dnF??cp=TejUfW<34sDXEA#Z<&)XD8mFKfm+jD~Ha?7= z7<+alv>7c|EIn-5n8Bc39*%0dx;Jpg*)|B~^`?Upbf!02{^GVHFEkcSW+wzMNF219 zwC<+%$}OF>yC^B_WTs@I;x19x?~C(GY;uc#B2w!v^x}LF!o*!4sM1U^b3>3z@03f+j?G<7&A@5k z->tGV^)33Kpl$`uc&7i9=m;!h_6k$hKl>Frxyn!~@Nk`8cH09xc!4gFr^DVHSzBw_ zqh~e8BFjA^*X%t#u-|3xx34y%z#Al$PN}$_oCw?Bj^b;2QX2ZbKZfBK*20n*5TSm` z?Ob`J-1wkJ6rR}@6xThAnTVJD5P#$9E$ev6LfjX?d+S?ru$}8QR3T}rwoJWOK7O`C zDjez7>k$OWcSMJ+lOlIt1Tp}MOWggT}08ZreH zeREtVcVLcK8);H@_rV+*CwnBhWAX`RHV~AyEVUa#v~| zF=eq?ql$;J&jgYpU~;#(-1Rks7Fuw|4RFVDd)%)TBZ(DcA#(%SjdPXy5|Gw=@hsJs zx*T^X^vB$wq^euH?ENnQrW%__8$YQICmRX?;1A3HI`;lQYY_`uBS#aPU)bSyTFS!q zmrr8i_*;@(VQ$!Fi9c`YDV&dDat5W3O-_tzs%AR0ip6IcAF_WSia-d>nF;|zH5thz z`RzEB2U3!+XPY=z8v-M2gpU0}{cK!wOF0=|b(Z{U}YQ{SNF z&TY8M;ILI&uj5B1)>vMLAvwHefsc4;qDQeYia(GNnx5%Yc6kKc*y#skZ8TF7~3ilvGtV&v`IZpo8Q;)ZwEeld1S?B)m~ej zMLNNMVw(8~wCRkysyT~^kt^{NE%w8(Hgx7O&_#^9Ls8aqb8^0q;A_2;xP#>_5COuG zBlkv78o>y2i~y;?J;El(Cc&-{BFSn{+%}W6G%F}xjb9_Px^Z_&$-Br6xu~Ep6K0lJ z%6j>DfOEUUo%U8p?gk1@(+8tjgY)?6nGywJV9wgUza_2&%r7tw*~9q{ z8)QSXWbs3CDaOott;-;1Q$nfUWm!&37htp zU!jG9PG%|zkq%%#Ow>_4H>_8=?V>INfI^W1# zU7?fBVTs!hTjV(3$OH9U@!MaN&J;$ABzyZi%<4x-<_$z%gKd($cMmbPb@_XUT<2ZG zhfoLq70i92w5+Hwg1JrJT#}bVp0?l=cX)}jI(xutKsubC&4wE0=aS>(W?wHHC-qFd zr2-TC(|=}IXK=fA`h^oWbnXU~CbEX*$*{XePvNVef(L(IvmA_t5B+7WlQYD}=6M0tz}P#Qsc?|R65t3Wk>5pKwDPA~h(9L@O8(&zj0 zYDlH&oyYU(^Jx9pgB1u(-rl;q*oTx?4rJUdM#fNVZ)f)d#J`>enUAOsrSng`KweKw zB3vTPD5_9Gq6d_9x(dLmBXwGwh-H8oza}TaKMPX&7Y8KCNg9D1SyqliOxdBsHOJd% zrvAO1qdW%5=#p8S={z^SairakIKg#R+mE8ZK5|Yu?)21n8fAu#`s))T*vu}s=A}j? zixB^gf61u!i-bPvud+fOc31Z7>5rOJe@2q|%SF{hJOfGtXhzx!_AXSsCPS=(@RL_M zqg&loDEN{EB61LSJZrKe4qO=^Oe!)Ov5xpd<6o0uTb@ zOHwgrzTmYUOt=4tp%x%}VULG4R^6gZ2R4t9R>RoPJeSyt{awkOtZRgW8R640+Q-VE zdsf7n@-PwAl~1>%T_K&7dj#W?omty&U*s(I{nyDW<|kLeS1x=9c%ia{iT>)cNy4U{ zGOglRMPt1B$zo=>Ex zt_5yXaTTH1#Hv3njI+p22t5^Xa_RW>Y1!=)&Q)bJg&T~NK&|?1JZIuIwo=55hV1{wZ%NEn$cQORhAQ^lH zP|G)*e8lO#ug>@DTD4Pl&cGajl&8>saM^btWWzDZe>MEkGdu)<84?WhXWXu-FSL4E z*)(`8Zu)YwJc{zYyi?K?XvkIzrFEVuVt{Pd$oO7B3#zpX+`S@VP3e|uJ0>pEbR2@P zD9}aoCF2*`yHxLM!O9uYr&_Q;^nrMf1xZq#NSEs64R>;tKxUr z6d!;*N3<&23B4X@8XN+Hv$1dKz(rtF<6(P$*DGMOOn7d1P#i9=$gXG@xDTw-ZtV`H zPW;2w5fmbO4&AN5fTPu?Kz73^cLRggNz3&%1pzVs(o5s?iQkd%DiyO8YRyKut(*Nj zoWP5W1{{d&Sx3of>$9|fQBR7wsuRX{n29*}IuOWHeYKS>vhWY&?L7)Km*I~GxLBJ| zGGs2#pOBd#Ns-8BD%8Ci9c{dQtZE9*ELk>LkgnWEF=HX{L<ito`v zoY=3iuuM(nLkl(b^NV@`rD1-`zg8F#rOYl>Yv2w$vXx+9@Ww~sz4*DD#HpUlKsR?A zM4PRT2EXlLM{<8&b`5-HxzRgk8SNGh&p%%QdT&9vm z&=qNq%7+d%GpMxpMVQ08G(wGV^P~ggVe}V`0;GgQC8gbFSTQG=v}7K8ek`Wcf?d{+ zQ#O^Ci@1lDI3`H1A*FX^a{2jv&fXV!#yb(`n?EJC-~NT;)KFSWT*K?j@kR0x*`qzv z(Y^k(eSyU}%mNG9yXeq9o0dsDfS5T&>alqDpXu^oYJzDx;@z&^CD8{~>07}9@S zYkNtaCXl*47Cfi0cUD7L#l}7MZVRUo@ZwT?Pe-;=r?_K~0WHm2>{S%ke$TYWZb+Pa zwypm(m{-ru8%2+P!9g!$ZKZwQ$}|INr>Ik3ESRaIsk)3n_Ei*Wkn;V-yJkf)ti!+! z09Q}dsyt&_vNXm}E|gW*(&6 zJf-9BJg)CqWDQQEUC#F5AKL>%vh0TgFejR*h$`+wV2v@x*#<%a(!t*^$g z`t3B_clv?ui6~$^RjdJh8TLh5z0C9MnBYYxJ_8WJBd52xB#Yok3_a02_wGRBAxiLM z=`7+EglFwRpYRl(Ilp^RHUb|Vrlp+W=5P&545RNHn@~PD%DH+7j6vq#($KUL(=o}$ zHYHgA{@9)6)*;B1E{qwEti=9MyJY=`ZFrc>{kP2}?zpdYUJh=`%%fAT_Q?$1W|EOL3xHn+1LveKF_N63cfX$u;>KYTQQJ{re@q7qqv1|U?rz#MUCV9 z0bAtf_#>MXsI{xo**wpc2w4G#pQ0Ljmotvlj7lPrj{6VMG;xfWN|AtC>87cjh7`nC zpX+DzB`g(GyZe^5yueWAF&J5BKFURJi9+Z6*m(Tt zC9luYba#22dkip!Iz$;{GaCuP1n4R4!lFz zQA)NXRq!*WRtGTO_=Gm8M3L$(nR$ICn_U(|dL9OC(or+kdK6l8wq26|w}0q%V?vAn zFnj`RpqaQ1w<>1iYKL%Csts#QHt!_LxzG*9kI04$Bm7~`Gwx#Z-S>m(;gQu-$E`35 z8zPW7vce+uWFHDBRgO9HgMu`KIwk>Pod8V;{A?-?J_wZmsRYBIoe4zSl@$0WN0A#fCtEZwkrJE%El42QT4IaVKitBX9g9abMh{@}kePvam=-6~os=N6JT5*lH5 z#tyTBeNq5sNFr4SCS-5Lx=T2?%V#iEfT{qmJ$Ab=L=dGuS0k%1paYIU0f-!++y*^N z+PI?6)%I5?oZB&ikw*vByIE1hev8o7qwxtr(&Rj(pjLT}87E2nW{=Y}{vbf?FF2d4 zB^CHM<3|k7o!Ezt@25uD$166c)zz8{-@nccuzE1 z%yk&*Z}yP=LhGTjMx?_goX8S9j)^jO@_%+Mv{;6mWN{TlKKoTmWhwOH14j5Yj{?!u z&^eXeAC^!S;pCBLjf9D(NKVKgfj23%aNX>2Ld7kQAQ$*eM(%^JWOcibe-X#3o}x6w zqhfSziF}sQsXUBJ!Hq<9;nJdOBY6!`DhsH>RPqWSEBA$&74BHt1zEhj;Hg&=w;v$e zC7)T7wyG!NMUUhu)`HeiUSl1n+0g}0{CU5RyR$5a7`fiN(%rI33*nlpEG53%zi)Lt zUbfl`E}~A7iDP$9@{q_K+Ut^qCM1DUk^`^#Jj~>Hz)B;1dp_i$UkD<)lVS;{N5oXa ziMMQZknKDGiiOBlPfcVOG2X>SvoyA;ecJ*n0E#>k@th>m5-fH&I+1a+xz1I5B4SM@ z^8;3HGKVT*i9PY!PU}1`@A$$0fLo&+u;*x22<(c)ksEW;3ABqK(AO#%@^w;08~oLRK6?46Be1&yS6HB4V`b;7H#f-oLxB!kIjai%^!dwLdx3T3GPr!lEzgp)?JL;XXyg8@?~t znJ*|A&&ku#tMmgx)pdTSr-qm(vp2KG6&F76;Z}TTF)<>XfUA|1yyQKXFm!$>R;HI#2H%CCtfxjoV!*>#1a2DvCWd zkN()h{)eHhA&(8tuG8;dAx+1K4l747@@0NL!zr$r2buHtU+y#~c-s1s&YvjUqQuGu z#Lm^Y?h;ws_f7O`@;ae2#)$=O`FTk!#Ce*$@}RL41sjMzegeF2BfhZiaB14f?UMlt zr-Y2IRs2}RNw2Y?mCvtSb#AP$B(+~Oo>{Y2W{rTTD`UE&(ME6K#ON)H;K62 zP9Ox%+yWrEQnfk~D*?}=DUW>)ASM!a>R?a`G%fg0N*}4idr<82hoM^}8ZESosQ@`D zT9Q%!cBouXv%g>SkNvgkF54H2o2q?Td082Izb@_kcnYEon4?L@fI&7BQE~>u4Kj?R zIl=Wd#RfnK-{Z@jBtWZ&h8M#(qae=7TLf?fg7#cBM`z&N`nYRG*d6jf*#4}gX~IyMR=Cv6*>JREXz+_xk!Vol#;e_mh+Ew(n6Cz;c@ zX>qf>LF|bpSzILgQk!@ZGxU^i6yH2DCSm5%ePK`;RyYwo2_O(5OVWATp+SKglM3~+-TW1fXx>5Ieg2b4tQDWgiysgveTOO(EZPEO&H8gXexUx`S z{H!LhP~+{zn0WN=3OEfDY#4emG>;P4H`jr&jxS;s_0uCABT%0zC04=4&gS<+_ovb>W?uMBc@Z-wPDwKf% z+8+<2^tYZjQ|s6fA$#VM7*V}{>NUohS*+6+%~%t-OrYM7EPFBPwV7T zN|whqj%V3mr$Y16?6}30C4~Ht$;y*7t-?^}IY|;1&9=vgCK2Q%MNHmV947RW{!E`p z1(oTn`VZj)-#Xo{R*P`@v>iW;=7}?upfY6k(ugNbHf)(l*9q2}`jj@5FbVgj3ZH1N zbO)noThk2Oin`9zm^70G;3*h2IKzKw6(GdXwx(E)ul0{J()OIHN*s$XGN71Y-&57t z3M+veVzt8XgEJzhB@Sj1#_UtHoi)u!f^tTbIj3nkOM7ZEqrmVRHrumaoJ+-uggpP< z9mtO0J%${o-|SsBQnITR@#`3}(A(KIOCD3WQ2$*TMEfGHI)sXy^JH-fZTTC*jGPyo zs8MmE-Z*^rrU%l+bmlzTS^U$9xcgTV{0x0R&xuc(@Jji%UhH#@eJyZ}lq2KxQhp8h z0rhODO>d%hn1aHLRLJYk_P+@mpG}FnW-x|%#?*))sCtl|a?Fi?UP;DWZR)ATE7SYP zWvo=jee!Qn;G>BtLMRIb=fZu!yru_5W7Kl9x#M2k~wsCoUy36_OW}$ z?X-7WYiyPThBr7uf!`eELteRe>_UipQWO0|)U3*_D?d)x<}b!Gx(-xzv@mh6kx-mC zC~oG0Gth4xH%}3y_(9Q-FkIP{hEfKNCdwu^R@Qy>0>(KshjZHpU?ig-J+iB%G=gj+ zJjR9FJ-EIv7nwkCusl3|{$Luo4B0awvF;Jwc* z)rB__Zy8M!lSj%C4-QfkIAxz^>`SNP!}tYYr;VYzB^J)pATN9ak z%GCsPKqo%rB5-hGf)NCcON0)PW0+`C9mK?}7hPER(iYfD+_yiZeFizLkVh$w49=v# z-N19GfPrez9on*Z?^&BrzmBbfW7F8;F!sBc{*85>La^fRgVMZrS9PrZw+6a#U$H*w z8iDk1UI6p!=NE+w_9f>*t>^3*IXA~qglq&7n&Sa&#&{fw0Mc`hGCZ7hq}dl03mQjz zKE#ScpJQ9LFD>7wP7zos>c8H;L*yHh1Ra5HtCf78yC`=Wsd6ecDG=VTcQZz2$?#8R z;^5#QZM{f$LHmFb#2bKWtsscL?%`|q;-`l-UC$A($m>6#E64q;qciU#aj(x(Sgisxg;3qT{5w~++ z;uWRc4o!r&;S_;lyF0Ql2@lRYIsQ`vbq@%@W6>$Q_Z_nha?og%c-5T3^)I^LE*IS} zF4xGj!B*g}g8_ch^X7MM=b&3i7Dz0xIyVgMf-Hpr@D}^Q{?o11c+MF}Zwk2Y-T8yw zD+G;i0fXPTEpypK^UCG_68UO@$fC7=BcAr}f&YJwd}`t%BGNGZ^?!o+p}}@}>!4*&1U%hjdem~2{GpROZ)!~$%t;tI<&tJ^xFgjp!z>Q*l#1u(L>MP z&f-5jTxWOZNZIKbs_7YtXgXQiacXjkm?Oz)If?2~i7Be(DGFNoIu-wiCx7qC&A~}- z+W$rb{LfBMXkW(DfK_^aQxrpqyMv_u9R;K z6bb;q^zU*6|K}G`6cLhB6i&_zN&f){3ol$x#L*{zCPxdhQ{{VsU|xZti5i?iTvUecmmjmheiBhFv4h^kmHiBHf_73|%GMGxrtXUS)` ztuG@GLe8u7(tNktNbk?m`Fb`mW5w<)${~W<#V`Y@&vpt}kcXze$|{l5ljm64Y`r3b zyH5yeXq!2**pW=V5F^*2a`kX5NHdVt5~l?V0kL-oMQvN+J$`TpD&`nX`S-zIA-JTU zh79|ODb5^MP+uaY$_be)IflL;*a;RrP|&U^>}BN6pE?qCUvrmY)0Cm%n3?X>p8+M+ z>C#Sho&L0MtziZKFo3Xb1%Gg07Pi)`5-fIkQgBHiF1$pHWay0o5AZ3&o;z9le9OBV z9%T9H2(VvaKjg^mtu(K~RgoXw5|CS6VSq@xjs`;(`K7BKalzjBy3rhHv;!yx96cc4 zZg2}jZ?wxsmq+~B;w&uo1S!afxs6EXmL2%^Fb@osF@2{1u#Otnzgt zTIn-Ow+zmZn@LUOxK5%?hp3$igSK7^V`Vf>Oo*>?nF+yPtxXh;Skk-H^oj^q*ZxXE ze+r-u6FvajcC&=chAifYG(JK1VZSP1iJywbBXk(}7e%dnkz2AdAKncYx4>>#P3No# zN}tB^!lnHNwZLYZ?U)Ek>tE;+DD=RGg!mlB+9$@{C-s&ZVk}F#MOMnY%6R0@u!w1{ zi6VfpI6yb$ltl~0uz!wxcK7E;6z&xmX?uPFfzyjk*JY4}>0S_3bBJ?e16QCB(uFrb zNpi<5r#9@2*OzkP=0}H(--)-vX|jXUEmiaDdWu6N{^lS<*TQ03)>-EAsj}G%&$8x- zSVpEu<5{N<@CKe%`VK908b$mE6k$H>ihj6e8IEvxq(DrzATbc^Ppf&2T8Bf+1v{df zod2G-51;1XCPN;l=+*^cA~xp*-A@^q#{(Nd?gVE6Tk?or=4H(PibZdIvGsTE0s(8b zy}Tk=o36@VR0HbvWvS&*l0itLR9S@6O9OAY8;S!we9!>oeK_g@Kc=8d!cE8e^O1&b zO?M~*pe!9K{$r-yQW7WctHGU>iWr21t#LS7esaUx0hTxzVP}mG@c|gkHA$Dsrg+sq zqLd3f5k?7=pa%h%Rm~Np9cFuB_$q5h1yRW16RS15)Yi9JY-G%M9un(ExpoK!P3+p# zpVdu{QsqSEbTU>>`C*VqJOKQ$8_(E^{xF_x_#56dR}hu5E=oWyILKuVc@3d?TE}-^ z*jKa=HY=Q|m(>646tWr*0{dIb`B1KhO?CiV_Tg4lvro{*DrjteFxBL;SWOa6n~WSJ z=&ITEc;AVh-+i$ruKfis%RPC%Ywf31>}Fw%Li~a_q80&@9|KQCrbU>T5`G|l* z?+Q_fFTiiPrgh?cH%#dx1IK{cLnsR&Qhet*cp&}M{bhcc#$M9Z|IhVOkM8PeXfzjI zpul;EUi223TpslB>jj|{)|DaxBBnETyb?=lTvUjbcdyz>W&(>s;h7iV!H)l$u9M2= z@+Y@w%P>Y~{_c~st+`Fyu1`mS?%3@TWa3PMg}As)5!U3@yh#2|{0H9+703K5OC-!9 zL*~YD2rPLY;ih&g<_M#`ySQa9J$3L@s4$2ElMf6W{WoL=tZ%Bvzbv&mtudS{3C6v+ zzfDD9)lhM8W6p_6tw^g*p4Q4Ju6WTL*6S_U_-Xi~c9*v1cpi_}!43mY=#|NeT6OK` zc1me9QR;0>401DJB3lYOj(JGm*dsoi1U-0eQDHBv;kpQ8ZX|lMOi!%*?SbaqK>lo_ zigoi~*Ky2LsGo=6pHjAYHx78EJhHw^taspJ0p4DRQjsakCh*lE9w9?&0eL zA^W3j`pYdx8|*AfzCP7)F$ys_cX1dn{>jBA{sQ9+1q1L`)G-4IX~B;CZuMWGA2_*c zgn~d2B)SK~gZH+qU6-UlE3)jd{;awBm+M)pRBH&HNx1ev@85$)X;(BKAtN^AgX8MS z1=(}TO7ah>nE$ldjgB>Z`gkd<-2;l_e2!otB6?DMPT;(n_+;;Ad5eXP2J)N-X>Xht z!6LRf9rm{p`7T{fAdJ2B)E^m$xrtz^HeU3OM*arXD?)z{1BJ0E^Kq5zj>_ zX`&c?CDN9Q?6*+7vsg}ZK)SQNPh9V*xEx2f4-5>FnFIC-R&)TYKObq61zI)Cb+M|W zA%e`yfDE@iQ>%N+DVP^9WMwqYP1NX}N4Z|nps@fPG7t7aY%g({XAY`< zp;Bd(ip0Fbwg@N8&jxXk5II9U#qukzYc!T*B3iB|U zud27>VB>GI_bzmudnk?Xa~4!7oc%vham>4j1RaWdSMV8GF@Ew*T=GAFKgMBeD={w* z15m3xCC%DYdqxh$2u;RnwUEyctEuuRJ~(8ZB27wSM^rrlSK@+o{cH zbweJP%QGoQIq4JKP%0LuYAKr|);1tMJDJobmkt3z=GXo~gW#8PTQRv?H3oLmZOIpF zvZ(sqk5c!{MWTGTX6SrmoM)~Bj$~-kj_`}N-3I}I2%mC~8|e3*<_6-Q^~Z**4gQ6u zQgs;95OV0AT^ZOGTR_=;w|Q;w`+x%o=pRU1Gk3WCe2TE}Jk;$dpgzX4(4byZonn7n z+XKAcqiO?8@8`iZFb{YafCIW|bb9-Oc9L#LXtuhVPtt}=rwH!X*S1&Sz`E@b+2A$% z)KZ`yxIJjSF?jGHfd2UjemPhVZ2qjnch^R|C9Qn+J=Iv1;$(|^f>i9?DIRYzQ1C=? zr+&O%Teg}6r%G8|^T52(MSZSC>KN0#X*oCQ;7YY=aus;;HDe%g13`y1?nNxz$|I}Y zw#|56y{p zcJc54iFwQ?vin>S zU`_d#eU{PU)Knvgn{wl$E~0UEiMwiRceC@TEmw(<+6w^SL)oVC4S#vJ?SEPbdW{;(X2yR>?Rc^T zir=at#(nTvPl&#CR)DnY-tT92*xFzf@57rvA5wmP!5wgH5-#FCs$7;HOP|BblMgav zFa0O%mRd0D{LdY8?REAg@Uv)@&h?jn$%`RjQhj)1=j zK&+6J3)wb&MT71a`#s|oEe9zzZ{@8cD{&&;)jZ#?l0CMR+hNqqFp|(+5&M5tb|&CZ zc3%J=Yek{WlCp(J)+X6$q0bUYl4cm&U<$(wk|IMwNVKSke63n2GLb@5s3ax+eYT=f zL@Cvm^?zQ%%SOmK3*|8`IJMtD6&nYj=Jb`d0lVm z9K0*;W|UK1YHNc1c`1479~@hJIuey;ZK^i=(Nn8hdfm!Jc1>$~NeiFU?0ehN(ksK$ zo*iBDZPDia@J(PzC5O+3&KOJP@jB^^+xzPxLt@)i-A+_({3Rbyt$F+%O# zm;CNFe-##Lj1FyfdAa1Z-ggzdzDtr$eDc>5{x^6fA*EH!q*u6UQ~Qubt`fKIxe8MQ zn_XlF>A3b6kK20O6niN28S~B?^!;o<&>!fT@&4Fs4!sUaTCTZ9=lYZGF|9Y_T zaem|w3>7SVRBtPCFtl~ZKDE>@Mx%twNZi0ansliwP<`Rfz&HAGM)U0o=Nsi`ohs}< z93~?faXc`0^;3s>f$X;Oo1U+0rT5NHRx8$5{2}4-BgR8WkG!SgS}j3&QD#t*X?v&4 zocB>Gw1#*0J8^!@PB+6`g;E^L5<1d9|}aZRzISM(P7? zl~I9xHERYsj_??r%G2F@N2I(bkz#s;l)_n`@_=qf(sKQxcBM!!SEs5=rQA7sz|G~; z@?j;Lq92Ua!NppOC7+wF@jooEYiq#P*J>YgC<^anPu9PB!hcFRS7WuHTC|HKS6_57 z=j|&Sbnnzv&6m4vDNg5solK~g6_vYo)8p!Lb%{-U>kZuh7TPyQ zujh;DdVeiL%V?z*zeC=C^L{vF)G3;m>HUdT?3fhQN>z>(O?)O%QJpc!|9Mxz&;GL5 z7@J-02g)~HRGL>_ByW5}$JQ@B{##NQ=hgLB9enH}FLMs!GHN|C`W{!Tc1~1#+@YGe zHxOTApJ)Tcxo)^`T6HUiH-)&~HdF7hUexClPHySpilN-g6}L_LiGtgAy_4=TtMn+A zmA22q{gHA~We0A{5B~>AR^?nK;lrQOLnP8d-P)I_^2ljka!9?l`Q@bX~M)x@}Vsl+t3j7wWP z$;x)TD^7mx%i{^PtQmf`@8#F1iyv-@9V@shd04#hlDFwQjdhvcTO##7nVm{$*N#>; z*?syE7sKVX;a`-L_rEHm-;=jGmu0TpubK08PyM$;j=nK_lja7?3#2{!xFzqK&8cTx zkw>UiZVB1xp{Z5k$uEL)T&0!J zzHC_e{p@QORSorzytMjlzAM%@2_B_8r0!_BEw-Hahrd_gETeA7h4NIeF+DqGAS5A# zqrAG8yLxfD-kbCJaqiZ)T`$I{cx1#?WZ;aBo_%ju86hHA+u&VC_1d0yFDtplYVXdo zA%~6_9X-6@(ZRUb7HuY<_Kmp>Df9IHZpcqOyZ2-XP1SC3l);&OYWpm{sX7%}#nddD z+xh&9ojiZgvAe{AHKGU8;@#SXyN^7Mv5ODQmg7DfZIyMYEu-;tPkfxnEuqxLSgt4$ zDbaa6KXmoDK0Mz(5O((B({=BN>Z*Sf?-E*Y-;nVwWW&&nh=JXOcAI0#3nbhkUtVhx z+?#CM5t?^XAox@+jdmn>UM=}i<{E9M`{ndqLr1&bX6@lxw#l{N%lzhs4bjZznRRut z61%P9t5>He;jf3K-w2XOI+CBjDf)Ni%XO*m`=1mP&GHF%amZbhuiLC*Bj(eilY3pr zCZ^r#m_(tbtk+v68oTF{4O!=f@AGbe7=R&c7fiaLpy^F zlyeGd%73(WWrazL_D6RYbH7d4dW)j@`d;qbqdIwO#dG*dPR{i85R!N{FKeyYv4=Tz z2P(Ujv}Vm64jUS}Q1zFGa8KojtCWKOUArv~mGxFf$hcj&;~sebjkV=jie+3^b4E+U zuBw&!Qts|7r~~S8T7G=XsDPI5)m=<#k>;o>w=`D-`dlU*b=a3l1ve9|{|`x)34t zc2(D=e%qo4j$+y7x_9mlamg=P65oE>VqIaT^Yzo)fAXGkivM$YTTp3Ev*HR@yNz6e z3Tp-pXP2c(8aF5OKaw;UyzLaUL_48iS50->#vPU`y1oy}zb#;x*6`i$=$ePu{Pl%D zLOiX%!pJGTwu?IZtRw#OpUD>4)iEu4%K6Rd1(~0|EwebEMLC#uEALKoqw$Jat@`>V z5{Ip6RE7J)WsBcO+J0J1Z?CG->ppE*+14?*P|3G@kZ>00Z z`r`9lYkxT&vf)UW(VCk-5bwP=-F9X1thT#RTMdWP-ZeE4cDXxT<=+~l@>bToQIL;w zXVsiG)iCo1+A(1sLwagiZJ+zNib>`Jx!$cC_U?1k)$~hqzt>-_+kNF@L#x{seM;j; z&fZ_YuGTIRueY{zp;wvRHRtrP{q?loL9wreAirid2}d^p~rBdRGq=U3p)cWW)INqwy!za;OU z^^pIZS@lMSCrNknPw!R9pX&DC$&wVB+4tFDHp5WTKkGtc|9MM8&A0XC`nT(6FX7hT z&nI1?sNmOsy@AX0n5&@aGX03T`I|o0eJJH(8h(!b{vf?b<*8w&byFCRS)sr6`5G4S+%(9@?+X_`^nMkAN9VY>YLB+dwp@H)TUqBOqs{$| zGMlXS&$;sC`3?2RH?xkCl=+8C2Gr#S`@XDotaQ4XsII$F>2hM^?EP)O6f=^oq1#7?^;PcL<4n`#w)2D;i^|9_xpi`QD;F9*w~~>b zp?mpzPZ2JmbU4_aakr^@nE9ps8RI+i#fPu6x=KoZ?vXmRz>G8HTeznEj+qyYm2m1x zu7%A}#1B6obhMMI6jS}&5+3$sho1N>9;t4&Lht9(9yg-Woey$_2eh>wbtYccXj0&{)EH1wlm+=m8lDMZ^&1`zw%Fd_~^;wEI&Pb?-}}; zp`MDu+~V$`h5;q33+=u2+4(1Y&TaI&dnqG;d2}dEZ@ceiVu_#CC++3oSFbW6z1jd!(wnl2TJ2zVkbfvdzj2kfE2`OBu zKJF9`_%@6ca8yo)NH@p}{u}Z5xE}6#!7sU!@HWAd3i`;3G*<#u}6HSq}{w$rJ#p@h1oQqfo+tG6~0vErmHU7Xl@Z zao{JD5oLl#c~S#Mz5!$%7owh;1#=H>gHi;%GU!QaHb*nOA>phSlCO&uvH=9ikE&mx z1@AiW%hAf&X@iE)$nIns8S)&XL5MUBAA5Gc1`TuSqbIA8ga*;c@MWVv#T6xw6v)FM z`@ee*in#%b5oJ@%H|iwJZf_Urq){Qld~}enEd)yDB=z=#RZXm4WaR|-QzkJsXdEW2 z;8-~45(f?^1BT1K`a1ToGF-i;Tz%jFP$eE5JT21=E@4+aCkj&;g2uQ~y+JV}MIl({ zNkmwgc&Gnq-z~`Hde(pg2lGANn2c^-4wexi(vZDf$ZinWJV+BCkh1cQ=_nDOy!5^% z2ns%U5Ql?O7UPY{cxQsWtn`8tW$5@$Obl<7QiP)oj;7rW~0UXZE0BuY8$I%$b zr0(WL9?5dex@eCt!V_mok(G9VE$RfKomJ?0GzKIhAuGj$j2hMgAv{s;44=*l@OmBU zVBh14LiDauj0GA^!0i%1@p8j-#VgYI6~2Q7An|q)8}HO(G|!ht4Ws}D%ZX-G=syU8 z2y+V-1AE*pe~#wR$+XFF;o`zo#CYM}VrMX+p+C`<@cmZ*^8cHa$$GjCBXpAS7I+9+Jbp;@z_>|a7 zl|#(a11+i8&b*OQo2;@Rdp)0f9Az$` zmI(OGz8y%I#|rUvromjX6{luJrOrM~C{1mY5T1#{-G^bZqgKqvLO~FwA`TuUmFb=| z!q*K>nn~aV6jzPjDi~XhOh$+}7Rr6Z^mn63S@RPJ`vw)_!olvlfyVCWOy#jiBehAP z%r)ANxEZAr95z0_rvmeyv=8l@W-P~K(a2;v#owPyBaNyLB_A@01Kxp0t6)irLZC

uvdo2`{pj`B5Zdbf!hSb8E;HR7(5G1Y!`l$Tqr)y@U6+{-zE(g z5zUTRBcQw;hr7VXiIW<~A^yeF38x!cu+$cELUypRA;M24LuwY+nLL?QMle2+-qrV=}_|{tX&CC_V7R6J0hA_Qis)oDGf= zo|ncOlMx~OZ^+Ycg=X!*6E7)Te5D88!4&KUBI$TzGMps-RS#sh0AF}2cb>fYK&@Y1rdb67SGN3`ClfPr`Q3ZR3-m}bj`K)Y>{G;}~Kx?oJ~wqBq-J=DdSj^0Zk zz$fX;BCKJ_&I8|#e5*Frm<;@?>49!8bXRAvd)R*YlReNb7-9FO8@TX`!5_CBqO=t*Wc03njSD8r*~e!zg$K%qz0yBH(7LDpSrgpOSBP@3 z!p0kuu^EChYi#;07{)Tqg(IEt&Q%g(u{g|AQ6yKA4eiK~AbGEE~aSK+w zk~G-nxIoAhFG#^g!Rf=4j~Q2&o+f-d^op<*_F}~4XzkB|>Bbo3=yT8{>GU0Bh}L*R zp**k|S#W#od#Usrm@I5liMNp6r&ZTpCDAsDd$TE-gZL}lWi^vsxg@~s{cq} zf~+x9VAlo75Gg7Ol@V}$_}&UV#p!P_v7@yjpb!=^S?C?$77ZEVCkxzcOYq{c#psQP z8yt;dO!4nb^GJatwA=kgSV}xlQ7f<=LNeGrDmZ*y!i~kFk&*b$p1GKHACAmW)9NF~ zx5D$dsSf&RIZ<+YD4Zlg^{>$$P*#k9b6lf^dJ$>C#jr;mHMQx%80IWOTuQ`q({6Xb zTFM5KW?xHWjp>2d%iY9fDRJStfu?fU8@7X8vxoB|TK~j^$cvmhO{Ig7#nQx3hxyw( zVYW)adCRda)y%cijwZDdwzkzBDm1 zE_~s8fIkOe*@KOp_4IJ6k5AB4nrJkYA+EajGQts7!(7k?yNik`{U5E{m8_^jhw!X zY`#&j+P!H6YqKBM9l&@r!Es8B4-*MsYL!^9-vzOdtvrgBVv$5p(|Yn$3<6HvBvX9+ z6q81Qrz+$X!xTp@e)EzE$!(WI*TdT50iVt;p+X%N)7RPG6L7x1lYH+d?^HsBgq=n_ zjMb8eUT>FQW6@}254h?^LtC8OZW*F6el4U-f@6MoJ0Q)bI@(t()bF@3G94)0xj;>l zz&kbz4uAZO$40&r=s12*P6{dq-Isut!a9FPZ zu1GH<{?*1iI{>!I*1BjXdq55g_d8^HlSj^wCySVDeuyB@6tQp(C=TNF$la1L9l`^% z1{Ra0FyIt!Ko-hUf7@BqgM@gX$z7D$U#33r=xJJIT9u zQE8K2MpTF<5W<%Et~v;N(iYg~v-c*T{SRYc?M-8(uFS}>5ed;5^1z8&Bf@iz6YvZt zll`#WtOh@<$u_nhy8od!JjpEp9E|%&nts;6k5$%ttQ#id>H7cB$J9l>Sv$lkHagK^ zrp15gaO%>dtQ}&Npq%KC+v-1bI8{MP*6y%M5KeSQxBd^^VHYH1?F_5V*+gf1Z2v=N zsH$hIy6Q{@?d9Z8as99E*s37n=M77jb@B`XyX5}jLTf+`k*Ma6zmighb|f|PmS zHwpV|ld(JEtfC=6^kvHl;Mv$|fg@Kh(KPf8$qAZX+_W_GeMvMJeFt&^oI5?3?M5V; mioORqM!m`4iJw;7(OZ$5%y}T>;BYkf#ScdhR)0dw!~GvF{>0+| diff --git a/codex-lens/dist/codex_lens-0.1.0.tar.gz b/codex-lens/dist/codex_lens-0.1.0.tar.gz deleted file mode 100644 index 3bfc9729e25af0d14fa7a4ea16de54c628e29c97..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 466437 zcmaf(Q+Fjy*REsRPCD+GE4H0<$F^;+m>t{d*tX4%ZQIuRp8fuTJx0|*>7d3O^RBC^ zZqjHtI0Z{rD+rK@gQ=OPfvuUn3nL2?D-#QotC2IO?p$y*Lx@_-b1REXi4B> z`;^IAQQ<3YE-5J~K2p>^xB2>lIycJYv>4ae>ze>%{H{H9sBG@81|ED&FE1Y-8?27@ z@(=cE3C6IUelr7SfI2?5914<77A_7ioz+)cIXT_O+s_`=H$By#W=}V9pxU(OgSagt z{+Sh>>1lJD1H>y(cX#*x7iuq;2cXqMyOWuV%fktDb9wz0dR6rd!Ue5-3e1A4_42+Q zYEMCpvmnOsZ-;A69r}6DNioPF4h1Dn2{q3fbaQ=;a|PUe(x-dh@otCw0J>P9J_Bu< zLA|fN9#R&UcJskcalIA3YCO_3FIvuS0%pznId;u0`VEjn@vX{W{xPLm&OBKqTCt%* zLu<6-50u1>lol&aI0oB|l?((*2mZmx4cz8u0Z%nWmvr`ZRuhGH0$D>^DA@CpR>^;o z?y#dZQF*^AAS6mxW)*W$N<36>#Lfhx@A4 zRf@Dr_1~R1J}VOc;QJ@4%7aTc# zRpAQqcK<34a~!=MU-J^LnaF?@_A?NQm9naxm$KL`p6ncKW9=BeiSQDYcyM><^Ip?% zmvzM;dccqtO+Erj?rxt*N`Hf;i3Luw%tg}+9<9tdjxm|A+RU;~IhmsT6oWGqiYb-h zFiqkpV%jPIz=v>!Up{_aG#WfBp{&jQ{BXSgJ}J@&uhnz|VdYjLKTpoBWxWXHu+lm; z0DYegF28JAfS(4JwNj(C1edo$LfJ&QxeYHg25Pa}ex5$AaLjSBsN3I<=clhy&ycG8 z{-|YsP;e;Kx!o44MV1$*w^%wp9-hv(FeEQPZf_sgXM>npy4tvz*!rcF8>Lmjk2;{U zyX)=3#uw)EUTG5QjS?z&fX3|g-Q3*;=IQb6*4~{KW=E*U<*kLMVX4FOPS0>pPsJq& z3X)p{ZLR+U73Ajpc#y^ZeEhYc5q^6gE*4-13U3YXV#ZtP-}ox>Kz_CcZGFdut6m6j zKn@N%6A3?0W{LTR3tSI^P-bGcNe`q^wn_7H`b;2Uyq!zVznxvZR}DXCq)NGIYL&Ko zgIV{pMN?iD5mfmF52ts1*=@A);RHdh-euaRwJ0w;q9&!98InKc1}kb57qCGf?yrAx zd7vHbJJ7!Y5Rh;lba{Q*qK=R+L{&6@{~}n#vBm!tQ2XjHHVD4&X3&&X!wYIgkhD z7_}0D9Uu9|#gsaH&Nbeg!Z6Cb&%(mII+scfPRsP`_5h08ZJVObk<8Zg=Y7kCP)?U! zBi9*iuK1Xbfl_HwS9iS=^ykNA!sc+=R68@{ZK?eK9wQvBoXmmJ1yjaG^H!sQ23-lHq~lvK^U+1s*RMXUZ^saAB- zH;7OtRmr_Rw8Aqt!APH1FPDW6Ut9&Bm9Iwxnr;btudK(ti@i}RoMsUQf;lM9r;*0F zLz3gF_NRMb9Ufw%Vz`>wTZM)Z+R=iiEB*`o%<#Ex+(W04b0lYvT?@iFR$!MuX9go8 zVA%`lbkF;tA9Y1;^dN4{_K%bhj<6$_hb$p&cCaAFMpkT?oKi^DYuOrYO;=wsmImNN zB9$(OsOYChSOa(T2v=QTok}lFK5x-l1i=nxR!c>S=oq+8ArltiiF8C@I&jBpZIkJ$ z&5?hO^3H-7?oLWkKFCfqr-3?roPz{KO24stJLgk!k!rPtR(aju7yIeD`}hIR%AZ-k z)xvyIlD9M^(LdGTNQ&v!WChTau;+R`D%UbfuNFPxV~?~yK-yJ797ZLrjvH7Su>8hi z)GykG07tSb6V)L~DpgxlMLM{5It!=uV^-Y(&etM4RJ-%q;JDjzYCJ9UrXx{kq$_Qc z^0)9TE#X01i=A>+1rtl~+APiX!l8NQ_7GG!I}-z`rFps%#MJZ>3u+&vSwX1A%YkL` zV}8YMjdEr5dOvQa(4n@~I0FPTK4YH+8yI~%fy>cp$RNCgRNrrXF?M(>!M!EhO<3DZ zNt1c-qya6!QZ)6P8ALKwq|3Z!k7vLB=$tD|@ZhN1^V7-=T)YuSPbN6~grWuJqL=@d zTh^5cdvBC}BI|*#MerBwtzPMnEGOQ5#wXVkzbrc#t(5Q##{+biTI%Q+zDaoL{-RN7 z-AuR5<(V6VTcao5&H(PVfuJIFl4oZ0@}bJn9~{o@_k1d}ZKAyI`{z^GZCpj(53p`A zBZ6O~-lrlm+oK#=PmXMG!b`dRfO~a))PuP3BNp=yFy?rxis0#PxsOJpuD`{5QV<|iR z^!oza-Db@OV2Jab+12dZ!z16F8iZ1gEy!sXQqJswGtUJRK8eYO8yS2?`UlcXQ_t!7 z=$;uAEf_l_WGSpnsnvrJLzfOoAMk1KnE_CPz37Qx1u+4#k9)9VUVi!YFT#TN<(406 zzJ#a?@Ga90yzz9mlD9B4;ae;@3ME#&Q|0ZGT5PIWK#V8*LVC|c!tFME7p93_3S2W{ z2(z7p>c?LCk2oOXM_k&Dea)rN_tg7N(%_fI4N6wg_rn^{*1`ItSKS8l8HAGwRr}cc z@77`1s+Ri-yR>`1NhAAt;@e~xRQn3bS)Tk)HZ6`pS4q1(|23<-1*Sm`2j4r}BB0*? zMvg!^3-w*OI}{!WU1HRaFI9sxmG4lXo<_tml~9PTPpa&lK1`Ml_>|Dh$7()^o`NWg zr}jR|GR{~@R-EN2gygm(YO2>bU|xENfNjGsGgJdLCN$@MN~(I5&I(4+3gkOk?VvK;ROkrr z3oZYU9!(t5eu$c4sk(TZy#kR&2EIj`$wJ5-{q&11dFY1@d&LQ8{R`R_A(|G0F$HZv zMYoXPw^CmXkpNtnWx79ZlGi(E-MV@E3yh&9vHp>W&>aRrbR$T(yTuL?>yTMGC_*0I zSQzF5&=jj$07aQNwmx+*A(j~W_Nr=9`Qr_-6^{c1+XM%;E`ji;? zzr-ZN2!ZWRW&m-`57<0RPGJvylm?D}D`(GunV}`YLMz6&xddZknD7z{j7i*FC@)xG zzx9_V8n}cF`;Z59nH8~Bicyf!RJfTW8UY1qW_|nCq%-7-(!L6BOibVwHM)FN23uSYNE7^xGvNaAk9a;f=-C5zf5x-JsNph(OnQWw6kzn|Ps>29{DSs^r9e=6@RT>B z51j1&j~1i{5isMB2$72VAfD%YYo(xGj9t8q7v(eFF7sN&@N;! z^{N^o5||vI-o{FfX$y#Jev%WQDEJvK(Xr-e#o^VafW1h~f~xj=Ki%K+)IQTvzDE~9 zXm@tcFdF~dLD2+cqace5GaHLykQdIElCRs6!bmVythQln$KQcK%v7PL1gf(-oZRWk z?gn0v#tA5I?{mlMstLfQLHPa8>)WC`hPe)u77c>w_0YK6`R0EFVuSX$ia`CZs9Dup zS80`yyZ>ZCPm|OUuotu>w!X*T2C?5aw#J{W*k7QeSy0N?ih~rW)%U%(0i-<#jQa*k z>gFHqgA70aZCx!d%Yt5rK{Ls(YT+W_reDKw->!Lpaw|&eJ5t&KL#Zt z$_12?mMT?(w*K-7>3}*{*7e>%f|Ei8<5jfbYf*lu>c?BKsyM>w18na|Hz-MW3i{f3 zv5%KMd@`7ZgtCaT;XCu93$73j0a|r(Mg=kUT=+B#_N_eaFFpB#a;I|4#Hfk%AOYcN zvxvU54SS<%21u9zAgMwk#_6q%5Xr8E;ljX<%FuwlN-r^!Q_l8?)+^?i?p+;N6$8l- z{`oJ29yyZ6-HWeNHb!f2fP!eA_h7?#UoF|aNahNyav1`_-^1DY`L|s+I%5hA&l0qFeU%Ap$8Jj8h@4e6*=C) z+7*_6WT%IFwYMN9lsh`6Z~qjDBpdqx65IeX@wY;KXJp(+dsFDYCj8I z>b0=i=Che~o6zEK*66#spLq~_ zx4N%-=FCvWo5PVgLKgZl16RtbMof3KRd_EJNk;S0K~TaqDf1LFq|quk0zn8mAC}5k=};k(%jzna>+PlJ{*u)_cf~zrlk=LGv22sJsyV!gdPSjvb}%7= zuQJI@QK;?G)-vT((=xQ_xHn3;cS+Su$rAdIR4Mq-LqPL~Zc&!0514F<)!;D0Hneqg zO`C3_ol|NUu%n1_ROAk7EN`1yV(Ese%3Q$kqo&i=HOI2xn%&739mm$d#(kh|pk3D$ zE+79}rHo@>B)k5xk73L{g>U$csyj81W0*#_yKMHezLG_Du?uF&^$@YDO+{15q_TO9 zxO9z{0AgdIJM!jczABG-YfPQ%{H1e95PjEkjA?6Z(jExY#*xmn15#||`crggJD(F{ z!E?DPf`LlP8mpJ@z2X@S>GQ`R>2PJ-^v(U*QjmL)a|BEwO~lGgk$&eFy|Ia|)SFwE zFX4^~AyIT%b=TTH*e;{KMm|+e3OLJHSb6fuB*q=%whb1Lrh$SaN{;G)r1wX{Vrf;K z2t$dJAT$%n_eL`}>_fN9V|pG@$?V&u^Z9Y9(fkv)%}Hl!Vl`@_O(If~a%#F6|MSp0 zts3o!oJ%Qwt-^JmPYhW^gy7aGKsN02`(>NacFbtm=C^RYpe1zto3@qbpS0%$Q(wn_ zKR`+Gd%;vqhRKNC@po!fnf;XSq;HAt2i0wlokMgyF^Wd0rC(Y|cSs`^BJ`@RWGC#n zEBSVPZn+>|Dbf$xPP*bfwyTMZ!L$Vyt5i za{>60Sqw!TY$DwRwzik0Z%^DR9$l1c>VIPY6Y1UmC?Q;8eFP7vKA=e7Y2#UvDCR~B zRLeAo8z{@JgERjDb(26wZ?PPO-dSWUbx^i?$N5Kl-N!ZZ+zgy+s$~xHIboXMPN7bs z0lOVeLD=aQLM?8vUQ>@^tncPoT<;D@|Lrggs($Tl|I9mLe);=J>KLHI1Zr(^`=R;zu!q$et;U_Q;A z%yK+w+}J^NT>mm5InDt*PXmW)#0cbLa)8u*Mf|j~M+YoxjhOEinm`mRk}ZDpP8Z}` zUk<}*`L8o~2d_<^^Z=B~d7Se73rp_58Ylc&Eh;tZX!zshd~cLeWx4RGZ?1+ZoRZ82 z87}!d`0NGx^S-C<(?9Ec8uic4l#%ft48?vJ=&7oo3`K!-?tnnbZwDKfn@()dfA0+t zch^05pq2IY4b0CSYQsIi2}u2$^xrT@h4MN*A&`0P<(J`MuhS$-i{Ly?K2Rr-eX*pd z+o&)~fjULDm=qPSP`!F)xz}R!jN2YF4&1=T?KKxEKir99;F;C1OMHlqd8D!iFJ5Tm zt#=*-az6n|gFw!;%^zFO)-KgEz&}0jlpjEzFQHGZBcR?mu<{A0{&us`y}UeGo%X+= zAnwNi(cf;4CK<0my<6UE>l-6qy`9bNt%;yqnQy-M%4xwVrf`|K|LD`$q3DC#M%@!4 zCm|k9s1PiTed>mNP6D=(75ZS}XE;ij?jqbj!Xu(Y_a#V7!6^sPlj?V{TjJ4wl|z3lrv>U!J#l&C>womq!f76k`$%(*{hj^qa? zllSKmb+2)FbM)7hqU|a_EHpmpJG(4+e*~0`*TTJu-|rk>&J}JIuPF#d=G&0G%Pjf^ z-?e*6yq_&ancCjo@L{NTaxXlRCC%nQfq8A<*y!J z|NJ7r(XM2ua`f&;!XUqfXd#&lSxRvwGru*i#ab01* zu~~~Cg*SG51K-%RiTyGVTBFw#-xC})^lHV6dD<8}TH*qlD6>u4aJPMj(-MW5X5ME^ zG^!_}%(J20Mp4bPpwe@_73cO7k`^M*G$;@$+q4gzjx!jQ(S0B}A+s)knj&8Bdg%?kYdg_AgY1DLQJ-CTfUs z1(2|~EI(MNp(mX6K6n7*Z9p9UYi0oKZC%T`V?lo$ME?f*M+X9SemPq>_)vjr4?zQe zj17dpcQ&x^fGq|+oAwEymFw&0^Y54TL*U6!u^5lYTZpLJLdS+TSi%{d)Y|sZi?y3u zev)`ZA$`e8jso51F5qx38*vtWL%%d)(iZFzE3Dm|&TVG@v)7YLzLq7Bxi%km% zjrKK5$kxW3b^P010mwL8_6?|g6V#+$S}1e=?kN}gSK}bV3Ng=!P%P!zFvtT-dRQF| zM$HRXB#_xzKuhQ9&_Z(R7NbLLK7;L_pe1R4Yg+O-!@H)HMdI+4RplbGMFgdcvA&Xu z=wcwQ-;g1aEkB{2Xen5y>Nwc07Luu{F8V#+H=qP%4r;P9eZYB@N4KEFM+pX}E0N_{ zVnYSOvoPz~!!e5(Ik^J*ls=xlhuq0vzqmQW62dp-Py6Om6k4_apVAJic&PSR84QvF zRy5Tz6U=<=t@9LX4^^wbQ2Hxy__ZlWmr*PXc3XpLh%AhO<7LPd7{6n@EE|=~Nm11} z`(Bbfr?{U|A|&0ov62(Yf$)x5vm9?e>+pnC)+aM#SznVCM8Oc?1Xd%YMu%S2J+ox~ zk0OQ3Fq&{-6gp~R^@{PaK+i?PWt#@|?meWkTI$2(h&gi~!Qq6gf12SsK4)$*4Vz)5=Y@9S=Ghq>uz1)5K?S16ZszGvN8wTQFuxG z?l8!GT6e*`#YM8+FNC{{Pgxyl*LH!+^ebhDk*i8 z)r=|4j777{Zd%3J4TFzYt6*Bi7Zy(Rl zeP2AB0gI$4Wuh|&7GRE{nvyW*p3>ErDHtC z+qklySj*4R-kv5oh($|j>ECw_7*vh1{4Spl;9KZ8CJMP4e2Z#WIQpo_KJq8De0O}6 zonOjQ*gb+b-F3(RxN2NNm*)&IV!0&A=`-)lsyp||Pw2r>{pz#BsM*yUz!o^O^-dw5 zS-6$44*)Nvctdb0@W+R4sK;poSNS8NyaabA0rheHy`o7kblLoVL;!=t4M8=GYr)?i z)f5p}aowh7X5u$qm7GY#?mcKNoQ_Nrm5o?B|MpL~QLg;ng33K72be?H)`%AV-_>1G%P5PtU`jZIRqMoV3JkBrf zTp*n}u@>1j-uMjB+|TwQvbZBhfu|p$X~=3-t;?FsR>WPoc~9h4ZL&?zf7`a9;`b5d z_b9_m>0cZ5B~pS8<19OMGOQ`YO-o{t@h!dfU970| z*MFE4=~7D$*AdJ0KrZwvS)&5YF6Gz=;I7Oc*JMgm+{i?SlG>=PsWkUjbcuXQY{6k) zNGQwMlttKapQBB@VITyYq)Y1ZVw@TiFCTGT-78G%HcJLz zmH7kHsRny|{^I@qiJgJ75bkk)f{R0Fl*m+Up9X7lOEIP%fYkm{ZG1Y!t;knd@e;ZV z11CHlL{K+Q?s^@LkO61tgtw_DF;n8V10g={Z{+lwSTDv=ZyL2(=YbBxJgiliLN{H9 zkc{-wuhz$5Y&wiuTbEHr?j6R;fbgC|!!LG;d%eHM3TfIh=uF?nnQ73(nF*Db8Z8vN zX?+!}vW@#^kQH92#MJb{PZ$0=0i>=-2gp%fH2~Vf!^()dgjcI|MMs;dkluPzbl0 zzR8{p8%!f6$qpo;6~(IzJ;>_M(4& zp6@8^ad@0nNT=?-NS>qYbV*g%0fcLOrpz!5UZSJ~`5t#9sRH7NHNF>hXDdCUs?O2K zvJ&(%I%?i{>YW4?X_i9wWZf0G`r#p>~U1ck+zoGM(tx=rV&zr4Z z*J|Z0HX6m9AnYCN0@7?eIFYD_Ei*R73%c zat2g$Ldj@BLQBCHROjY5wkw;vPAOHvH=1>M4_7<}U79>C`C&%SdP$MJgQ|9&vBT8g7`tNr!qnWD|ie$JS6 zq$o5ScsceyJJ>4lv1BUBychTOzRv7*mFmlaE-Fv%yhs3^`DV)^EeHrd)5~5uM^?{? zaTqz`(D*zXRttwC{0n|;+jx%! z=C3Y$FH!xSdacM_Ln%$G)ehNP6t)GE*CRTX?;;03);}V;BGD<3#lLL_9-WfehF#x; z!H{%zR$q^0uR^BsMStXBB^&oR0u`j~j-Z3e zGsXOhRq{#n-pFf_!gF_UwLZSi1Rg{E#5}YSJ)iJ@^ZpZfTe18U?#(d|XXeBUbQaxW!_h^IY@R{n>6rNQE0{TNq*L#vAC* z#PAemS#hwQ?u`8~SLBOkgGuKYaWv8GggiH?m?+zIHv;M>*#Q#+3LIp0 zn7}91j_~~w9W^wM?c<-H1S1@ZHw5z5nXF6`X2~XhG}MrD!7cXiXfF8XZ~C-3=|9PW z5Q5hnPg!!2g^9^lJX}u%`!8KX_4$!FsF5&mt?A;zgJH53$XacXnxsfC zapW@FV;Z2A-u>ljhPCk!4EIXsJP9C%Aiymlk)$CKN2!kL?_fQ^P$t6ND?^`g7S8R! zD!mOc83G4qv7|~jImoQmeInx$9_ZqP9Y{9|Kin$G!Vy*YI-muK(%K&@zMwxsVZve; z`KuHcxxU&hi};FCk9%0=^SB6?BefjAw8S@4H3g%jFV|@J-J#1l#VGyOn}64(R&Qj` zJ+|B6kR)1cG_6KeCtJ~ffn-6T+A5I25m4(4{%0>58<_Rr zs1rH<8O9k{_{Qn6PV_`g=yk(OA`uWGw~4KckBO%u-7Tsh&wt6Knf#Kp?V3OfUW?&s zhg%Op$1GgsH2}m`&A#(VPMfZ7A=aJ88%ui8dGlEE4lN3tXfE3Sl~TN!kbRByIybq- z@%*|Kpr3=p_fGfe`j5$E<`m+lvO4lS)fqZwC_e-Rg08bV z6%O>wyBdF3fNwZS&{f=qb5{LSZ9k-x3^3#5GwVVmnH_ zr_m4n%YH5O2QL6vj#bGUn^G9WgHGtrP!h;(8MSQ3w>1x?igTGTQEohlt-3SU%>?m@ zzW}})&LhK%b=f$&-wDye-P}6))L3lif|~XVnvz9z^}S*q;3sNoO;L!JEAH^o^5Dlw zk^5wHh^5B#1vtc5ySPqSbnSE^O^ zevjh84B0#Cxs}-P-}O@DJIQN)8VrVJ$fR_pQSmSwwBCy2H8GNDUuAaQ5Fwwnrvo!A zv8Zo4XRrJc*J|(UH+v61p{JjK**!uqt}_^q6NkP^r)5oC`Pxu2$cu<+lo7=VN4_^) zbkxg;T#sXGvlPt*59y}H7!8en9d+%KsbIC;z&!^&#A5EGL0)}rT z5t9ix3NsN1lM(1hVOHYxgPGMp^p-C<`_nW#E}9*72TQJ%ilM(j$(7M}=uwR5Wig*2 z1V;6S*0%9`wj3i{fxxd?S87C#;SGxQv_3Os`R4X7oX8I6zT9j%E_>~~U(|0iTL#XS z?`uSVsv6_LbP<* zcqBu@zuvvNp}5=xVp&GsWL7evFr~o%u-pI9Il)S;s;XCuR8w^3s;|rDY1CjQR4IFN zvEcHzDt7=cnI04R*-qMR8Fl82M?=v|$zqW)m~9rtvv93VMJI6 z0l&-oVGloC$%bxigNV;#1s;L3X8PYp|5dnvzX7y%9qXiO1toQua;{UB!laEdpn;LE`gO^A`W} zyt_Rx#fy1E36|;Y>^Zp)%6`j~|8DAgI!ok_6Bvw2#Q;(?t!-b1%C7wuBc)sBpuKA6 zyx)oU1d@m~Hw0_10sk}9Xxx#u7+m}RhbX#hK<(!|ZG$Ii!daf@Jih9!G`jCWb;Jp~ z>udX(eIfp|FPczD&c{y6JS#1KUYxuM2}u})W_#x;L$t#!?=*>|F6RI(wKk30y;p>E zz}}tB3MD;CIUO!W`Qhq5!j@CfFW;?MQz09;$aqf6V#@Z!N{~HVIEmcOw1Iak9JE!c z^?dZYY|9WQ{xEn^eSLAE2$|C4jd&*0 zPU%mfotnm$c14sDi65VfYWRA*1>4Zgs+dD8Sn%xe(T<@vHGu-u#V)_qu`ngS_DUPl#L7E)vQ##Qr)v)-RNc?Vm)M|Q73 zwXnpMb1K_em;G8g(qF>yPO(u#Ok)mFnr`eAJ#IK#duf2mTyc+?;~IeHZBilO3Hus< z%K$%}mG107Sv(sbGBj5r1l%sGHX2k)$>2q~;R{p;R!jQ`c1^?@O_|b8$C%qEY9tF@ z?q(*7rp}giH)e^XadKFRGD>z+ARW$dJ=7H7vV`CDx|#h^jlkWY>Bg^q5S|wkCo1R~ z(jlQOmM1aHggjsHepGT+GT%gyBgh<#_@Tf^0?`rm=|~I$yA%~S3Y`y>>t%CUyh{-a z#!iuzIuAd7M_0LIjrZ2ly|5G+Hd9U8d?&>`Pr@=|Hp!O;dDeec^yzPROm)%}F=IU< z25wYhzm0aM8l^!Fh-4h0-g7UCeLaHZdfolfdWNkpbaFhY>E*=|T^|jDTt4Z!;vg z`HWBMFyMu(hTjy%javfPHXUpUF+}LqDC$h2HMy-q_V;T0Y*2cK%=8?#*aMyLozV;6 z0RJWUXjy1023v#^s>C|3ssB;dD3q8ZPbAi~-@&22`1Y?d8;b5o&&qI83arsT?!BOI zA1H1bdV;Jd4KBw6JNX6VIYJonSn~iq&sK=89Q0|ptp4#m7dh5o7)j=7^WU}}c zUED%>|8OoIfAXMcA7H{b_>HyRAx4*N`4+1%&*qqm>0e~+XaD!i^tvgy-C@gd$x<>8 zIR$y*XI%zf$jhA5Mr_-d46c59?Gr%Zy(-PLdyQoxM0g?&<-cqi0BhMNQ~C7(E)Y7w zh{II>mprtR!05JXDtT*XE~_CbTEY#9vyU;6R<5dxJmuig^Ze+%lM*_F*dQOSSqp9K zR}J|!`wSS(fe4OOXw)vgQ#r43N(eWAhA1!nE|ztTb2~o58dg=-Xbzm^CLo8uwg%6s z>LeT_4z_t#DML$8I2abtG6;$w$wcv_Wnf|tiH%{^Bgxl z96l=FUkyslQ!i}&NJuDN(7@|5ykQlpJ8avJ!y|zWr@Bsf^TtugMVXh7q)$&AHDw=! zKlsJpXW>Kz6}?LjPTsEoablJMzuGNi+j^I73jMmygi8Z2WM&#p>IyinsY0MG5 zur~gLvU&?;vJMTo?lpd0N49pD-O6<6jK_Kf7*s1}4^#1Ti98RSvKuV{hoZ?Xl?c>C z9vmM92%AIZOq)-_&dRe;N-wGF6t8jO9pX0}L(@bR>?aKCk?%!a_Y;Ln{557}>u2|< zb?AtEVZoyLYE^-sMPxm-d)#H`G-$C!o8?t={k!?MGXlOnfi!hvWj+L*#%4OF8(HF_ z%!O{{Gg9)rVEq>_dZ%jFlf|A{Ry0Q_Mvm7Z6bf!`$pUw;XvHE^ZCPjCMp?>SM(kZD zx^x9q*5xXsLkNZ#?}^zcaEVr$*;ooJ*;|~%eHPPDrqnM7<)hwNg6VVp1RGE z{7R-DtF(i1gvrgNG6aRa9|~JoYKHV+(O2Tme6n9N_8qqox&SiIvU7@t^G29f07~1X z%-7}zTMt|@(~psAGS-@q=jd8`h7Ix@@<%=6;6*cxvJmb+EqmcI9Nk>^CjaE77a1#s~9w~C^UdUo+*iLEB667-}3b{wa z$A7MI)zF3d#p)KA8aElUO%OAd#V2}lecDGZ=9Bzw!CA}#s^UX)guNLnY6RXRTGPb{ zx^14AE!k!_5V+VXhEE1n;u((Eq0Td` zeb%PYMOI3b%LbC$$g^4_e1~7fgn35=aNBvh9eoIp75L%yvqFV^hcN#wycN2|4o8pW z@UOAWUi1d|fqi(d6>ft1^a&GBcwMl4Vq|D+)A(q4d#LI?I3+c0 zeEOkjl=^T@zo;-PIc7&>xinMuqGxU&{hyCv2FB*!qoE(apSEw=96C{OTqj^|Cpi4h zvf^A{sSe8V8Iaxn{(e~9$8*J3x-m9Go`v!zo$g4syr;BH{XjlZT2F@ zd2elXcWvlmQNe`tKz}M+w{oIDV#Rmw|mP zWL^(9Kc0uKNFpAc_AT=k;(suk2D|AFysp@oKE%R^L#)N$ z#-TzibXbZh3mAD|f0m{rksL&{iKc&e6cuvCSWG*R7u0mcI? zM4YV%dxksV(8|DDULCBkIkmhPg~v)&COgFE;NyA?Kd!NoJizn+KJsmV1J0ivncHfDR0n}q6KFnhhMI6%OOhPL2L)Z ztAl~9*|$Id2InQ$xQJU8`=hl3fT`H87Ft2T5>^kfTgGg)X@wNd)+Cx7w}fyyo~=xh zib6tsQ~Ht48qZBRZr^OW{qH}%;|{>^>+IZCcnYS!2_me4D(6|oISTD9CE`j46{;brf30&I)QZXW z4j0}ne=G4yK`CJ@QhFK-RfZ-9`aUpHISWa4=qtv%Lv!wBNhm?@F9s^t4Sj)4=G$A~ z9*bWB=m1LT1PT{PlNE(83@04O^Fux6I)}Trytx>DfyhdtVF)~ysJ)0#q;{~n1JqoR zx5sDk3bZYqLS50<0=99?d$?@j5heGf0R;r#ZNBJdK34LSInO0k zT#zy*f5n-Hc9W7;O5Sul(ZUn~vU6_@# z%~^DPH<-Q+iJcO`A~6|oXNN|4+iEJE(Rj;FbE~bO*N^rs+N}i?u&z)A?kTtCQk41uUzez zH=ZtAj=-T8Y#v~*+D6LWaR&clr8>x_(RaEAAJTHL0bQAK-e59BOFCpI9W`kiT`pzj zX$NhY%-G-(x(btuoX1;5iz-^mAiLMUH6P4S64qaA`Ay44c1T+x@ z_G!A<<4)iHE{Go46!_S&7{~TzySGp;s;fSp{%aX#`sJ*|;j>i5scZ%5+nloK=LDjz z!0%ua+oHWB*%fil38TYGLCWlkDxGtUvbKv_MJR3l*0qZ-kQKm6sQOE1rg&FjoJolP zG+SHVQxV0~+9x$Y6O|$3ATM*oJ0V7U0HYth_s``7q{>l~TNt0%CenflED*Ke8Eh{0!J42!?C3o6jF{x#~& zJW-Rh*WU(_uf-#TH_b004EL&7!=@VA?r2U^qnLw41@@M%tr3beKMdQk1PSqz)r*@0~jGOo-4# zZ0N%~!ZT!5^Cu0&T!&(8j&&-R5KMO(7rrGw?K`;J8Abl#nU|=_(_#MmMRtD;ovxow z{hLCuS*G@mH9=bNfoL%KN}HC-o}Z1_SEr4Xw=aOYYg@#Lb~ghd@Wvi;(5XN=(4x=lCw6ow76 zcseIO)q*p)()7Z+g!Xzn*7-BwO24IBpt&}){pZZRFcExS#SottW4z&|YirC; z5Z72NL!8h`nCUJQDZ8G*pw}^4Jz3XVlug;=Vvrxl3m8K&BYU6=2?KsQp`+flvf*8y zphOV&gDMPH0?&hLcym^b1hHXp(VAl;KVm`Y(ma6+B|uy{(P4oO;f=i%v5}RKXY#Pr z2tt=(Xb)bngX_3=!^2FHzk?BeQlo2^Bs8c;Pf}hpzN3DITOLnqMvW$L2FbKm*Z1DR z0F~qrB)c$5SRDyDj{}*{E=8si4XNnC1Q>W5qiu|>pr7EIQPhnpM-wIK&7f*x(Ap={ z=y*+q>ZI1HCa}Mt){3&&M-zLn=6rQ&lDa*3KSTv{Tr=9Q%Q?_fsnn*tVYw=z&HvNr zz`D2}Rhicu%D9Iy{OfM98Vx~;8rI(#QTT4#m zVUg8zo!{})>3Z?_7Z@MCvm3>sA}e=?Ox)ImGQF@{m8Hbk>+JtxTc*C$nGxFaaw_D= z>^ZSJIB#!XnA5Z0@AEA!h#&7kZdFWMX^K8Z21|m}`uIIwa~Hx|eFGwvcE{2rsA^oP zEZ5miacA%F_JppEgB_~zF@h#pkYps<`N(9t=c%LVE)#f{@{{ut~LN?JR zVVUGUJc6RqmUxHyDHdf^^Z-+Q;52-|OD_I6O&K9^@E{Hxu4AJqVvc1?1?Sf(=Av59 z<4J_L(`TA&Iq`Dj35Xh-9HTH(7Atlc4-iLYXrxOiN`~=tD)Xf));^1yaA4H3YVAB) z|Mj9?JKtabpBHry{IcqQZ?ga1f?q^z7wTe=4w1f)3lR|-d|TKd_VkVDk@s$OD>+yh z@RQ=8u-OtsH^kjY^oKQk`10i-Ihx2ID=g;(x5-f2`tvsQj;6o2&RAtN0&Q z{11^~^ReMS=EVPi&Az*-^1rtBb};_O=5A{h|KpkBe*j}~Lx-p`iN$yYD$8p$of81W ztM7&Y;aUh}<7EdC85U7(bfe9TMnn{E9wbnT3(E)m7>v2}`{eOIiju8}ugm3^aVnKN zK+66peWxr!?Hs4m*BCCGJP^ZiN<^oF^r>T%_^ZE6l#-=_fJg-}6dsr56tsec?6gcH zA~g+W{0}E1ODTGqaj6|7r6%IRAn7G2o+zw;e40Emo#7}X13=fyENxlM1gSp95@;xl zYz&K_+z1^VjHa$)8t^y<4W@$|824!5g<`lTn0U>z`blF7Kdfn}*Xq}B24SCgMt%MD z#@EehUR@~foRNzWS^s%fM+!@fhzO>t(aGfI??E%8n-t?|9mSoNl;9rYNsqp~s%eCg zHnb!ii)3$-%~I`-Dvg`m>sp5(O$K&F7#40hGwUo&OJFi;gQ~=DV;dc_V?$$|ZyN(FciIqf8Aj z9NDTGiggBbeDYs|S+B`j^@cN`zE;y1HmDBGky5#Ba&#T#JW)E{DA2xcu)COzWA0R) zYl;^~LPH!4hh576QqW2>_f^2K3DrWzySnY$Vx_ynuDGg6>NJd~KBNkzTLsI)$yLEh z3qNyan?)YPbR}7EcT3lf;Y-*;&t1NCaQexIsbhIC*%?cm83jC>@_^s;7`8w*Rymzb zg9e&LlZ#A3(FD-o{frrf11}u3z)&j*_n=CfhJ8ZRFaWMUsN+=~m}{R1x`#DG1w5Gy z^?5)ig+MpY)=ng1R|ieOG@ZNG)n@uJqiDb`k7v0;JQwU_Q~a&Tj|~e1jQ|(n4CRCB z{s6~9?TT@&eKMPPSZkL|opF;OGOnR%vp%j!ODt}BmFBc!XOq+Op(_UPCY&#mZ#C`^x@HF!dM>+RqWt>GotWb`i;4jf^u*^@_iLOEBcZy-^nt)OmtB zm75N84b{z=_b$_c&gRYex9GgK>o^`5lbwUx;IxVTrK{^ee0r^4=i@LFUh~li_VpA# z!=9DiYlnKcZ56<09T-`CJhkz5f_;OL$Jp0Q-hIN^z9DZH;Gr%mq9S}f8Rzjh2kGiI z)=lh&*k^0JXV)?*ptQ8LJ;lUSoGE`UaUouycemp7x0)^5RdR?*_dc+Vp~X$3A5>k& z)UhilyWWm-aqLI6Z+;hSfUI0{JT3UlT09Ie+$g*O8R6ex%E@rcxIFdy(Q8Ewl!5K_ zE{}<5z?`%5pzjt;wcUSuVXVU-w z1nY~|cT$t5^P?y{z#D<0D}d{Gh|%Ay+U?4^)@hB*5* z&M*iH@1eGKwkGxIWja93&VNv8ghCvJR&Mbyx!h6IVys$1hhS#L?4 zi!Ny~H1mv!nONr*DovbdTSkyxV_!^grp1iY)VjeHfBq7-|Kf63B#H+f+p(<-FSFKZFcl zd=0oQzVP2DQ_9a602n{PU-1efL!oKzy#gL^rZ$Q4e0rUwvn!!<=Di1@ zCKXl zHmlI`ua={YX}kjT$As`g7Po?L4Ol#Dn9Dj zR3&g(`B(V07IvvOXXS*_ZMkC?#@Jzca;&k#t63KCxooM#Y-rijn3IL-tC=yH@aAV> zNWy?f? zFi5*p(3-b@Mz%!U5xgU@6@K2tFuR6HLR(zILOo$FZ0B0kH?Kl<#iHRA{Fq*kI3$}* zFM%PYL4*x&>6@#TlL2C!t!zBeoVf*_tqYwKS5qG~No%gEldd@pjFh=Jy_g!7X{`uWiB!kWrpnXWy0dI& zQk|Pkm20bowXcL%Z@I$Mnp8fJ6<7?fpq@DcmIJO$M$nr!w@HcVZDNAfF?(!>veB!g zoZ@a=QlW;-Cwo69xvCe}d4b%W_VhHYrFYz=3vzkR$>;eQIX(S^ukxmZsFU6Xc-7*d zop;e}+kl++FRsmHvOD zH%vC3WdHB&?G^8T_>KDio!0Kl&COld|L_O)|ITXvukL?W>;G)`zdPT;|GwK<-T$uW zze)eQT@=x~-7l2?8L0hB%KyC8+T44&`*M%+KX0%6zn<&<_u%aj2;jXN3@45X6%6C> z@h!$SU-rH=zt4@U*UAg_I>oS#Kb^f^|CUh{Y3U{-#ClYJ;m2 z<2a8oX%tGTAOW&|f}zw<+mWH3Wq41q=i$^s>G#JU{>rYR?2@vEVH5*0Tg#hJV@k1s zF#z9PGTz=2VfT1Whj@IOWa$`H%JyCDhx_R@$jV0>?-kfQ&j4n3pgPSwp2|#U)erx5 z_U8TZ(fh;WgZHnFj({E0J)tBaXG_#~l|MVjY{ zjHzR3Zj30XI5^%6hCtPASL<%TR#~oTl&Z-=bp)6V#=XiqRpByk2YqvTg|Z9v3gsTv ztD79lN;MJmzVgcdWz8HNh0)QQmJ&i zXz$xa?Q|7hR4*!j@Uw8m|E>5xW&ihb_uJO)_KN>o@qZ@&Ckf@3;s17aws&^-NdDj2 z+}hjRCjM_5<^L7`_bmJ$8lh!O9fzOC0{%Tu$F9@8W(g&qc#Srt>ggZDWEyWTWvaK& zyT-)w`{VltdzJKn6FLG;5qY?V=esYeq1A3CPl+%hWX%}B*{)0H{8Gxcb|Pt&Sz$Z ziiQ=5z-YjV7ob*_ZsZ!GQ}OTtQ>uvHr#vQseBx;}bbJBb{sb#!d29qqp5YO z^rEW5()HsB$SX+Dx}x*$U6f(y!&*A*n|^)c_^xZmtkYijK&)R0FiY*-JT?0Q1d6>__ECoIsiNf@cUQ3uuY`xryY zUHrnw&_3S~FWA~A3FgBZTG48FnyhNHfN4?IK1tB9EbqTrtyqbal(g6t=T_}D&Ms4V zm*1eHt;^H?h|7ISuIaCV%Ks`)KsJnT*n3PJ`pf>w@zL>*v`wPR6fMC~%W$3E(RF`xny%v#^jW2H8J2wK^bGP zfCz_MZMbR~D%7DrO|-h5)V$OO_{~p8KtRdRoLI!NOsB7C5ysP^$SB{_u8zr3^PLtj zj0yUf@YB=@d?OA&;*K!SdV(i~6p+vVSCFOvZ zwQ2xF?L%Nz^YDZG8_W{*X8dsin-d5s`r~S62c>U}`sOt1vG8X) zhgQW$qpqS2M*V;%g>Py$xPor5h2}7g`gzT)k1haRRM>jD!MdN7LoFC3=74s6L^Wp% z3Y;5tE(&MNu~(}RtVr2j#W)*+1Ac!>KfEmk6i=unnQ&b|x+gH!GEgg@YJo{ra|20E zm(1`=_K@O;F#`?SLUU)7Wf1b!{@MPQz(*XK3NR$Y@GLYTR;^0q-xUibHMg}Ya%TWA z&KLiRFKWs!NH{b}mJfKG8B&580bd1Hvcdg%WWI9Zd()ffzC#Cl0|^p;7FKY?En40? z6fO(nuT+KEt~?`(qM30}__vzj=3oY6sjjcfW~%1fq4C6Fc}ux36`zcrAUs4$PXUsn z1J@*u$Yz&jPRA6kH${sAc#LueqMQLcaAO|S+=?846vgqjWBsm*Ebtg@nZ-0g7sDYO z+`JT;?)KndMR@v>lt!~@y50v|gy~e_oew_z1aQvA)4H0}0bV01uFC>OtQ0^)g$R~G zRRY29p9Zrb29o9{O)bAkCK%%^bWUQe+H92@3U>S^i0 zNPGyA#XyGx2i7&`sgz;H`WhCz=!m`C7KQ5}iqjliDrgQ>_o}%;go#)#(dCN7Ey@9e zN|@nrgy=F#r!l&G*444cy^P}N6bDM@fjn0vtmG59D5O-F;2Z_u6iorf&>=;#12Q=p zY>`84sJ{v5mjO8X*i{J6V+C5kZgNPYj|QGw82&E$h;8;}8QCmj&`k0y;=Fob(VKJ& zI2xgN46E>R{aX`sxjYI$U*0IdLOPx$RPVXaPn`0MhPCt zP)u`~ zgPCSj*4Et6WAK=76pKoY15k4^1Y(>w1=tpKqy2;fo#I73Y{ZG2B#yRX=L30Hgh=s9 z>w4>gXhRB~_s7ToDcrVb=8d6D(Da;X_Q(0Hi=lTceiekvVY7HG$_D= zH}Sp9oA!=~hXFBB69eAY7P(P`6X(SwOHda}HAr98;4$}x=z@vvCebkTETdl3SC4m&{G7K$7-H--=6bK1f2@vw4y@-w? zi^wfUvS`G8m_g9P#(69QbqY7~VgXDtuWeg9&Hl-=c^E0|rgqS% z(!iZ&k!GT3tnpRo(mRG4y=J&7U2^RnN1g-i^9}mKKB)d;Qbfw~}?ME8U-Ao#H18{k)F* z$>^eHj2Jq_S53#E8RNui@UapK`gZ0o3Bb^*bBbi`jL<{r*_7x~)` z{S2mxQ;>eL>Q)h$Dyflc3b?x6ew7ZXt_ax*h0U=yrzuPj3GxnDRXC65)WQ-@C9N)! zP?^>AR~JSVHr4A&e{rg)xrQ2=1YpoV?vx}*hA<_H`sCZEQgsety1r0WbzDkYdb9@OX~t*+yBCIn$rE>WN#6C&x~|`OL2JEXVxD^Zu243u z(lBaxVZK0^$&*`P6HxtReBrlTl|ahkpJl+A*p)N=?4?*#4ud?vtBS`$SwpZ!PI_fA zyAE3*GNjv0wu7GUwCdeaj$F1oiL$y)&?sjDWy&!ToX1H}E(6q}q2ks2#+HON)me zlW7f+tg~775yC^#hI+-XuR3#(x@IpzpbMlLZmsa}Ul4O$c*{^jB*L#EhI0RsC}emF zi~!>z+)&Z+R4KxQ5?WupVAcN~b80Pej3UPBjoEBA1+gwzxex~YAQAq&cOQ)}T(m#m zpw~+@_>L=V?B57gZw#tq7H9+=`7oOLugyrjK3LK=h{RJ8lGepdYOY~L7%x*07f5L0_o!pi)ezKqLEB;kbj=&fj@~9? zRFZ?l0jQgco68(z$(usVUuUq+m?QN++w7R1{0;Vw^{!{*o4i_F=>yl(;>?I@C52yO zmf7T2#aD-miU1HalJ0DRr$r5j4Yyz@UjD^sr-djoc)AFKkf&-od(oIrDNCkv+B(In zQDEdEIvsd=IUp0!l=+8g)bHn|&2fB#(aIZF8^MaIi3F6+o%1YZMQ`rSTn`&J@=q%Fz{imX1kIitCX0BTr3ChD*KxT=52?t>o|evy3Ba z2^P>6>E<9e39jfN3M0!9piTxgyMt7T;%hr zU@58>?eewi;_V^egk78etzJ~-lWCzqq#$$#p_W?^0}J8~e1Z)xk^NA!>r#6OOqw3e z`5KpMIMwTdRFBtQYZnia4lY3Mb=DQ^B8#svMENZS$AtZfV?^GVQg#74FS=U3s2CU3;s3ifm7 z%f-TN@q^57iN9Z@9j*z%1IAc6tHp4MMcS5abTN-=Jf2$3dLI`bwLo%z*y6*SmL5uu zr+LTHZ-6gqw8s9ghW7sw>n(4s`@v@2GIQs%Y+ElV9h7K>{`dd)|0vbak8&1Gl?GB{ zq5AP@q%#;Gl9t-2L>vgSA!pdCptJ4bZavaGU!EyZKv7mUF@j*Hf^>8R)f~)*(B($3 zNMI@+CB}T_+7=qX9)|1+1|SeqaxL=|rx1~AB!8m<7;QuOi~2*!R6lAPTU(g3 zWaZ@v5VgY$n8lZRaebdg6)%Rn5F(NL946aa5WElYj4IfgD@L#JxV}RsF+A;e#5 z&KI3;HP?8n5eKmOq2xYmVTcxiMnOVGN)xmIa_;%-&hkE3;RiXRKR~fxnV29;OXe8H zNh9>d^Evx~>XRGW@rFAx3B2hTZn`EBUVZGfAJ>(oWgS#S!~7XB5!$b0zW;TaNzt3?X$JWe>1@B{;Qn|%UQH(eB;a1Mz2e(cphXTXmh9MR079nC{~GI9eN zJD;j$?a>>;iHN2gm47!)ptDn?ieh_lc1zZRP<%K91OW`|=TB;pDgB_zZ8FJ4)bz2z zc_EWUo>73SEX7TkVF5Fg=9%wO*aqlIO!DA8Ufi+0C!q#GLctE@aNKXFZmC8Bmmc}M=%G`G-3O+z5>K~=(fX$P$$z{80x zKB(aD2KqVV$_ka$DfTJU;DaDYHThNet#_k-AHRynx*ZbB zY6(3iZWjbiNT(;fifOkLYVyHTzUjV0)>7I{I2qgJ&fD7;ZMxpPPVWTfZVQX3MGUSA ztVYyDW7zES7eQY`#+KE3CEV95bsT?^$oMeIwP%{b!s5-(*jrV|wAxWs>*4OJ;d7$- z-(xvWar4`DLNLs;?3EYB9jBgs$BMia&k|8!E^2S+j{RsC)f=v(e>MWijs<8ur)$lB ze-|b4wcybI8sA^0QPw{~?k1Z}43}YwuvVx5GWqyL?2>FAduje!nYht!OjDRNY=dgYogdg zecq7`KQ)egfo9r~jSe!sTb$*k0@UhQdp{w!yQ6tsbc2$4?q$R2V35b*mU+XxfM~O& zeFlM(;hG3ufUR?%6HcyuR7CApBNN`s$z8P18o6m|!* zUw@rD8&Gha3c7L&z2up@xlsmteRe7!RNdenUue>?pqz8z3Ww7ye2pofj5z3ltPE$E zx^e{TJ^eW4PB9I3AB~0$_7VRRPkT*0R8gTDf1KjwKTwB79UVo@bzM_IcVNq;lqUmy zzz8pNd7iL?T9K|nYXCQbjZm*?C||HQ39u|cY5SDXz0cB#&gY5j*&U{cLn{W@W0#cl zlXHo2a&P5tUDTI;z*<%(a60xSE+72)zJT$9ot(Uu5a1bsX+Ul zi$I1b??tc>03XcR)%~ME77eY-hL(oq$6m;MMx*NHyGhDn>WFIcvoI zKo|F}MKm)8A_ws5@WaXB!T#Cds|(@eCm>cqasKt4PX(nj#SlqZWw;~C97kcfPO@p`vHr+V+i2?hO4hvaRn9HG0w|RD8U=eI4r??t1`rwK@LrwMrC2GSAUeT1YN-CWL zavq6L2NA)XVIVI`E~z38kvAt2i`TQ^aGjEYSr7sV-(j`cIKc*XH(|PNVvNR>)s3F{ zk%A>OyVl1@cY_3DJ)>Q#%7?Vk8)A?Ra*mXqsp?4KsfzC>DfBbMA@-!pET3qY#$n78 zAN@Ep0$Hq+mf7mG7U~cd0tTxp9T&`4yI>hr#WMhLU4SSjjG8tFBl9Sc&nks9mf(^S z%s@9sJ$1qJskn<|gj=hl1_MUTc%>-7?g6>|F zsz$TvH7k*LGQ%kk^`%Pi?vT_-=p zNBlfMX2)@qBnDT0L*VF2TrtUX4BJ&iCxzYJ(n*Bh#V^$}s$me?OOo6>( z9;#Sn&++lX$>C?Q@<@zvJp^NKl6Mmd48||!%hNom36r_=im&R*&4uS*h)Pc|V*_!9 z=M(;UXcjR@mHF021-FRd{-m~pdJUMQxcdmT4i;#LX>^!07)JsfrIKS-Lo%d6bEA0* zY8}o{Qg`&$*Z%u-)u0`xoQMJx0YDQiY@BINps2oIlQ+| zKIBd0ku36IinibgpELYA-8yl;m|X&+nopAphsvHRX zCHFVxF(*KTv9)$<6tiJ@HI8gn@$+u~CGtu6|C80RK=vY~0L2F?(grMlnQlQMvEPQL z)ZJKbf%#gDLddx!)?COEIc_<)qsLNGnQ zF0EFmrO~CNd%IO{b9yzBfr{<{j51nfNF4)mC%ID17xx+DF*u@&GD*xg7LTlJ?;HNk z*k%^|7v>fz2Ep20y3;Rns`MNM0c^QK$QJQ5*6?};yN@`7Jm};$EQWFJ$s|l;35l37 zz2#$h)RDyROhuMlsXQ`pl!|fe>B`T>%?y;3I{E#G%?Xd ztCi12=&m&z)r`4la+xMHW;{PG^O{k6oozN-b-b8vwl>>O>X`1EUbA1sR>Pj*bo`_N zX*ah94-Je-jW*fhJAl@NP01$%6->=$D8x?*8sD_pE!!}s3(@!nkWHg8pESykU8_IL zUo4Mo7|kp|c3-f!oEpd-j9U~pC+=I$gYX?Ic5A(8@iERa9f%y+?-Z)j81KUSG#EMq z1EQFX){(E+ZkJ84hk3kW5dtlbbx-dY^9vh!+E!7Jg0s6+sA$?O%3=OuDY|iBRw(+fgQ7y* zT?R+P7`M3u#x3gVwQ#f*Z^|J5Nz7ePs89MiRuSDNp?HrIkE_klETuSJ(3?UQlttS(eoog%v-vmZ5V%-Ik4sf)a53E%z?7nIX~x zE7h%1lUFy${(<#L?vVP6!^}N3qeuC@mjD)Thzgm z4^~p@FFM%IU)v%WmK?3TX8DZKE{4r>mUpoM)4dy~4GCTA4&n?WT#G#qr#1)u2_{A= z41ZIkf08Xe=c33H^yjL>?Q2HiEa(}5Nhb?i>w{vNaYt%Y%e!RbWgMw8UXO2I<~mb* zZJfh{6bXe*1!*_^wuE@N)>y0a&Qv`ksE;%i>)3~+ZJta9Fdh1@&7$#84rCV7q5nFj z$Yjjf;eVV6w#uloqklFW<-f21?5JOjqUjMs?szlSC_?g%@w-uo>K*f;QH<~%>#0$M z_#NYSqZs=;)>ETo6Ljpy;AjPZ*s&jjA%l~FY3=GQSeKivb_ACu@*H+`l^WSFH%yy` zhD5QtlR)R5ZWBSYs8S`621Mm5WdRW}b#=FB?kvOk=(i7_r^L5>&IL@1DA_431*Q`* zKv^hm!KCco>;=O#e&iFTFPUTlN%GsX?w#DBFG}ilbOF7Tc4xtLyb)VmE@(&mZK&IX zPT?)WB6I43R02v6rG$~VB9vYS1;HFdJnXFLyIp7@w+Au~6{257;T}Nn%O+DQCw>={ z6n2SJQv4?45j3uIE#NNG{(S*d!S z&F~$9>3Jkp!Ybzkk~8+dAFRSA2>y3r=-#$5K2Xr4Bqn+UMkT_bA$2iJ1`1+~3oKTE zP80*3F9Jp6kltljt7&~KSQJi9$voa~!1`Fw045OWj&irEcpX?!H}c1CLQ;ddW@|P^ zHKZ|dM|r(2bT|1f%PKwn%8!7bMP?A?Ysdpa*0l({Ir2f z_|for@hn6mGOz<}y3Y>g@H>yy;*9y}qP6_qLReS>{apsJ;KvOYI8=h;a(_9hp>hik z)AWYTCePwIcazLb%U0}sf|acR7dKvji`U5zokA|}sV8ll{!d~B{>kZ<8^)s8oo{D3 zcRu0r{N>#2>VE`3leZk->{O*b-BwPJQBJB7ph+SAC+1O&dPZc!O}DiBcOB&{5JDbsCc|Pm)t4Xw*_WNBqC= zIlRwji$x5tx`*7&n@(XE?_?O=>%l%neneXAh;xS>em9W4=Fc_j23lzZ(O{8hOE1hn zZk@{HRN2w>OVXr^&6ann$<-KDQ*|$DPdC9+N~U9!F9&JHvx=);$da`>Fta!^3}t? zPIOJ0#3v50i4^uYoxTRHtpD`mCi=}&WX{I@CmZ^}YW7bPWYk&zKSi8TbIgw#Us$!m z$;H2XUO2KXhq z7648UmW2jaaXpLj-gP{RaQhHhLuBUH_KYKrVVWbIrWRltzilBR`C9c$kq|dD;?h z8I4BlZzxNQ>vK}oDGxPVe2ShmMf}ORuvtEcS(cLp7WM;(5cg)&l8kRkS$%r=_VD10 zYdT+_ynn~?e_-C$?g%rvX6%B)UM&Tqd{VF(F$o=Zo~3ty0{mi{4i7Qz<{+N-u7S4J>g9Q_oSj9cD5?kU+~Av~ zJE0H5*!2Ii_pZxrBw3nZ|K?M~Agzo*B>+-VW_C8LkUf-0RdGtUm6TcC%cfyKASj}Q z3kv~=;&4&6e|8^WXUt}8w}0%j^^>fhi@P7cMFdDnsjSNUk&*&_zZ^f;?;uiQKofGA z(TtKdSuVn644cf)7cnA0E=Af^#HR$oCE6}wa4aKmGL)(g5jKM`q{Vpzu8G0CVz*#;l%QNQIp+$Kd|5!H-;cHeOL~I z5T$p~N#U{+9K_Vwjf5;{CJk9O#kxKv4t=$N%?p!&=u9j2DFf~!#9j%~zo#?4Yo2Ey zQrII2b$$r5wCiVZAhq&_Iq?d+iR03_?k&S(iY1fV10OIryIx_F_EWm->L5@v7|H zgRTlYsxMtK7PFCevo}QQqB+eHiPf^#>K>}&L37--cXRi2Q`y36BVg(J&PA=$1ZSTp zY!mKfuG0o(q5+jet#dZ*s2& zcJ~{SGk)S4synBN3a}NOf1K@joIJef)jd9h;-+}hUiKgV{@(>OU@M%Py!dry_7uYq zY}>2lrCb+;&PlN5~R|Q$lESf(7u}Ad?BeZCU0TR>UY3cmClnKr@`dou_2V7H%`{;koJ;$ zA#xBu;gR;z=^|4Ug|k=c#EpU>+;wu^hNPtqtxL3-pn8dejgmb|%12HbJ-_W}9^AUU zm?exf>~qGur|4_>n=KaP38y1Xtq5f!rNKs^9rWqKWaP*xK&UOS$;p$2iatzeE~O3I zGv(&AC3d3{j{Ut(bU}DFv**x2uRM)uxM)o5d)dk8dN@D5vBRKYbACt=;@NPx3y3~g zxOOKrv{V;PGWcjmSu;@ylnzis6>ZfX&EaHbFusF!t#CY^%-q2=lE?#oBa|9xJQ%Ka zTlK+KAfrXG%6_EE&e%?5llJKTLcCi@^8Y(T_&5z)CnR$>?HoE%AoLuxzemmgP56;gCK%o$ zbj~sA`pFz~N`dZNJzi!P>3-9yGoReS#qSYjFW8JC&2d3kzwCFgoAfwXQ)er(duE|1_=3QvB!B>2&kCvXLQ7U;y}lkL(7u3xB7uI zCfMyH`Vy4jtsr#pWp^?z0a)R@YC@@Y*%jYP@@j|bxXW%*Ay=X|b4-bqwOETLt%zl_ zLyC&!>92KhW3&NK8Ygj2gOaCPM>^?*#ns{vL%+!WYcY2k0OrNQ5(<0UOA&(7fH`Q; zfKYn)v$>kAra7dmF*HQ$UlME%R__kSV|CA`>)E{7I6iDZK=#CLJu08Ay^5vel`0`qUuzM%R23$Us?WEC$Rng9{qTlOg`K;OJ?dw z)Kj7zANIT^CgYklrL8A%thP`W`i+R=Rj(D-(2_WA^_qI4tS z{djRT4~nt}T#P>B2)!FRj_UZJU_FBPZ}TjpgTpCqopmyY2&~)qB&U>9@O*(k7Dl#+D?1${Z{&s`tcDv}-W~U3_ zp8K0@M)43yCV@2+pNmbP?miKtpfeDQGq|2>p`a$Kz-@%nO=t2B{r_B_%^2@}C-#Q2Q`?NtI$L!aW0Mpq zzjR)Xh0_>2XEI^?-Bn_{MXus`<44lX8%5>~{e^R#NL?j(2jc2?J>7ZOZqLmZ@(WV5 zYY9GMrSH9~+_}WjWvo`0P z30j$vG2OI5o(jO1`z1Nlq^CWaO@>%-qgv;C=WrB)_E(F`!TVnJVm&)ktuM|sNE%F^ z3i&nEum_x%N3GYvt7Dy|hjJe4a_SwLOee5^Q@FUJ#YMgZ+mNC#dJxw7Zq~{=IGDvc zhd>1QY!)l5&#NT7q;Z+p%OJztxV&PwF!_t?a%bqHB-6jfbLs(QZaKWwRn{Q2yJR>Mlgv1$|oKjen&c!4x9cq`$k(u)FA0Qvkwv+?BR;nBZ5 zJ9_bEaQs63ZE$$}x`Cngs$KL4X_$)i9Uk}cBf}KL7a>b@lnt?VEWd~^F%(2Qs9re3 zNO?8a0k~%^Czu1GY5{ztLC=ftnM^Q=cf+L>2(g5zAD8Pn;O?2iu0u9|talTg{u+VX zt!%4AFii%knS7ajc@f=aYh6Vz(&OW52@bYqZA?GS^XU-t9g<=H$&)|1c`f5Dns)kj z1MryY9~3Wevq@@lbSbLCM7dGo=R~LWq~X7V=sT}Jo|u+S>vfFue5-D$hTAEBtJfvJ zr|J?X8Q;ISD7q2APQO7KBd!y^_<1n(XxQDY&3hyA|#-nmCa^4ps1gJ8-hQsuNw z;14U7o+5A{Ly1F>i;>If#>|k>-e{skV@6_&H#O1fo0DC+MZ2fHn!yqXs@{Cb^|B9> zVHOehS}qoP#KvBOg$r%qUWiM2>9uO8uW|6bSPl`zVw6X_$!7SfRkg;&pixJ#+^w^t zChq2jTiftb-?*)BTG8~`CgvzPNc6lS6+fmc`6bw!5i5Jb~m} z`|M~f>9$A5e`D@+auN^L7jwKV((zPV-w{nKEtUAIdVc+N{o(wRs(x#_1}^D z6_a#J-ENvkT0wuqt(mF1t;eoN;1HXy;*Kt04BNuLMcBeTC)j)?Tw5X?k%esguE;=E z)>);t)*lXx9u`IDID>I%D-mlAn>dVC9akJ4#ffPC6oHl zZLD?V*1`N!CIjR#Pzfa7F#D~2O8%hs)mXt;#T>FF&yEo-|(AXBXpU9h!TA^H@iR7EFfAGyxq2^~x z1%IiM|C1^D@1l{5tBqKRIxiq^@jNd$cjbD7P(TR;IkisK5P)=DT!eDFhZPVT=}i#2 z*lk<4HM#l7Y#|-AwThs*-<@}CO!~P9u5tbO3JtFsn{`Ww(hR3By0_c(WEDW$^rMN+ zHa$s&2liE=)M(Wl3lY0*Mq-T^@P5!+U9&TX3~tkBpyjBs+#!+P;#6;ahqcCx|JCh#9JRN8TK1CTpIOD zzB@j{`ve21(BaL5xJ0*XYd%f24PhUdy74nPNOv>vM+OQuTcW-$p)-#Q;s(YD^X&YB z2EP8%0-YSa3zcx-Txt;DfFGkA=x03smkxkWryRR0um zLX(H+WBAr=kbk5Qy&9Zps}Hd@xPz?-bq0C>KO(#~6pEkkagHDghBEA7;jZz#i-m?j zfk}aJBqhu)2ZoFmm)8^;qN=sAKg_%X(8Ey=vwHW^>*1kjKHii$y>sB5+xe(%qmL-pMVMh2KW|KW9d+<7LE>=yz@zB+P6+}yVh=Rz(9n!_ z7N17wWnR}=PVd(M%>S$+(Y?idru%)f>`*d>HFXRb`yFINs<-`e%UNac%;F4!!gnO~g#kzRnN?UB{msv0?-8)+k#@R;(|<*N+Pv&qvD~j=*qAbHn#8 zUkxGq3Qgec#iRfSS744ez_8+Kk)pFk{qgcvAUL&pb2>4)NNib z-BfVF7jV`zT&U4r;u)20^ikz;W5*1?bL?fA^PDGLPduWu59Lcz@e*&Gp`+DyaohGo?}ta-eT0)v zH`FXu-?W0NRl1v`-ljxnlhoIo3xy4$p@%{&4jC zVDQt?>o>q5&lxu^d}P zuqxSpuOjyu1w9zPrKmtD#2p(#oq?@``_=Y)Z`En4h;M$obT?a-dF4=MP?AJ>Nsoem zrd@YcL5cm&njdbw_1t9t@qhkb*@v65l>sLdP|RptFpUJc5=uF8pRk?E%H zA=#6!yRD(jkQWtBlV(?Xi943cmN79&Ndj)7vIRjV+%>PRs$Z7%WoNbMz%c^lthW)H z(9Kj{jpzds31DE38IL>W1+i^x0&l@zh8olE``><21%IwzCX&%KAI_!P@7DPbl(2`5 zWlh2>D_m8W#SKz(8JOE!u9Z_oQ6o1r8aWN^KQ$EKZ$Wv;v$BC(HnA})2mhobiG>>H zkoTb=e7M4hA)I?$@daH|AtM{4-Vav8;il{sX8mk&TyR@nEOORd7aS2i8medi6 zjyP8CL{pJ$+d)bZ$I7z`^YZ3uMAxeeHMW@RO)Y50`*7~UTY?Wb-KDgS21_?)RPDTy$5O-P}12hJ>PWGDO@oy^}iTZu7>E4UN!@u-3qErs1N$FMY^EtbW!kLRkZvCR4^R=qOo z>zCy+z<8!|d4;EdjyrW`O(`!k{Ab|K7((%y9gJqI@s;;AOcSwcMDw>mv@J*A2le!) z^gB#g5AGkpHPTH9g6nN)-`2#=UFDyJ-XUO!lK|-x`b1R$JlFSzGUKnNgU%w~niM(% zO~Q`}Re;;=-x2w=)}CceXy~}4`xKU@S4w!xJ*ha_TJypLod2^jd8HZSA#XW!u6{xS zdCwdkQoXI&yDyk!yEpn|a#GdIk9K=M&C>l3i@OQKS!<|2#wyt5`$dzKhDa4rn)*FX zIG%BQZ2&70&;A~7M5J%mja21tFH=l%+H~ENvr*B!>5*0Ww>g@zC}vL}Xf%;?kvHxJ-bn|@paWY%a zWGBQkm~K{!1!hYEzw!jOJa@}lu1*K4V81py?^A3bJG;)FJ$sIAX3clG8q;L&FIV~B z2H5;FI12i-*)yRSRY-|M5lNgX2@P1R@qL5pUGgbI&Fuw*w5nz5yvye~xl|Nub-=L| za0-5(Uths)k!JQagP#wV>aBREnDcj71*Jfv_b}#Y-^28CyIPtPyLl^zN8FhqYv8V>|L_4{--0BYM8WIQYx89D119lu>#&eei3 zpPH`gO)DCM2{cbHLtH>yo-}M{CM?i?iVRL1Ep>`lxqABfpWz_nIa2~C{$*#U+&AVH zZZTEfE-4k}pN7l1BD1}Wm@@D$h`G8%AcN#0z%7Ar_WpD@I~xzPkG<^UNyEG!kh<~| zWH@k!%DsTo%b}w4>`yD&JfVY^_4ov4U(;bby+`jERY+`)SAw}C-M{xMF*iLLllPk= zTkF~R1K?*FvU2JopWSIjzan}7+jwAyj7GIO$ZO4iYx4O8cu1i*sn3pV-sfLx!AQiC zdMjyBkN&xCzIh{#!iLU8EL&qd^R~_NJ-QwuyF2_ixSEVt7yWPc>ooFgIDfYW<7`F3 z@UZ5q_41PVM%(A3T7CYJ`8cQQ!*A4e^_{N!s<-w+7|KWDH0TI|C4c)Af?4bdgG%Z-8ti)n4 zUJPzkgl(6t-7iC7_C{(v{tle&V)Om^9qzu$`jP(#BsQZYbl}XT))N&~MYOo5%mO;q zCkQO5&LW6etIN7)g-JrKG5nxb8ej6<%~wVmiPc3u0q&qs*K|t0kYt;4bR6HfDb5PY zlh(%M7U~I9%+h2!0jfJ~XJ^AH@FL?DI-~VWaB9#4|3XWR77-Yby4~v3nvjeR!YPat zGm4t7nfh@oA~v-8!x0O;yc?tGq)Tf*!~I;E^cx_|(ON`UWASUVwIx^i+uzn^AvOLz zU$(|+4I8gFnhYQQ)#q1FlXQ9>cit^(wtX#lRtqTt?r~p5Xv`Z)Eu9b z-Rw4LSgl=*(vqWxozHfgM(xtt-TYesaOU#b;5RzBPy%JXG{O1sj{!5dFm!L^5$=}m zs>fef)uP+@0yr&G@eJVYswy5%X21^rd0ng~=hywla`NtCRapy6aar^E^L3fnBED7Fo3+1z=pH$#ee+!e>ag5a2l{S04N~^ zM8r!x6U0;f$N&C6QY;OcB*&c2w4cSBtffGdwN5t<*imB&PM7+zme_%?uuzf$2ER!fL)-~0h+1S#Nt&4mB zZxaYw-|U|%9=HJkvRb$bN`Y|o%|0!3@MEQRFDeJrwNdNmkoxV6)x!XYU)k??56E(D zJ`Z_(F-vqb!_@6L%7#gPz-n$jrCrT55F?RYPA>J2_H{qe+LY5RIPk~|nixar`(Tth z&N!_fWBmbdV=9&0rQR-w!_*(yVoKYa$bPL(t1&1#No?g=sKe}iDl=fn)gGE7I`>&x zuu|#?z%nrpd-QP#5o|J(Y~nJ&58$hIC_i)-YUndXGsaZ>?u@7IkJ!rqY9euu0yYpi z7(G<82%5D)H$Pa4O~oOG8;-y*Gxf}@hVQ{~QsHshDnPUp;fzi#3P~ z>Ybfa!fcL!R|kn`h0EI*pimvzsd%I56p)DVg`F?v9o%ym`Hmk2IxuC)#^}%(BBHD{ zpB3+V;ral>b9$qGQ*0Xky%7r_bvL8Qi`EWD(GR}-Di&0V1quID!6UH%sJ8)a<~Zl` zwUY4vQ)Rs8D^1{TP)gY44Gdkv(y3smqGdDc)FsiNAXhSKfY^SQ{|cSS%6#NVypXMd z1x5|$*LV2Keuoil(tU;T)1BX7Y~@RPfoV`fd1w8Xn4Us7!nGe^Y87URPq6rB`0Unx zX|Wag&Zghk==+-SA;RJKZ)_$bR`gT5XaJ%i+j&$%BqQ1vR+&&S+h%7PBZ*KcL@;uGmplla{Ay z<+4Al(KYO;8Z6&asGK$P?mJcR@xj6I-r2jnQ_K{p?Bw&#hX>tnEn%VVpbNJ&-Ms+Q%K)(r22mUh%5E(6IWLcCTAp{8Ij)!p73q$i@_`~_NeXu%!lcC;m zP~-_3DB9ju9N}}2*q80A>!t31Z%0NOWY+&4r5ft@zJ(i}jVI&SD^SImvve8zu z5x6xwtPFg(t6ZcmZ-$QFUzTL_`y51$75Q?&_?9!Wcn{AI8pbuF*X0LrqFOcidwfmC zn#Z8tidzXs;3;pn#1mZHAAc2H%Ed%xEVS%l*pux=kc`;*t>utIWB0ORi1}1inQW=5 z;x0c$$C|k42MO)k)T|*qyKKnZMbi92(j=nEyRz44ef0RiuYvV)`!^voGPHTw z@n`W+H%{;F*Atl+#@UMLd?s<&JbG}g%_~|a7n{02^!K~pbocK|D2|V$Tsn3~N!qir zv0A0t?L=aa|MC2HJRazljy$iUY>4#e`B`-B9(T00t4?C+vRToY4VR-u2QUd7=N+}A zL*QgSfr{JRsryrMyNmQix$^u>8ftf?e|69K?3!qgoc!vi^9AXme|>!K^7u-p9slXQ zjrUH;!;8)RydFB<`@4;UPITcS4=;UpJGzV8PVAMvtN%{)g{|+%6Mv3=Rg2%|CHcE> z?eXr8u!eag=N4wMG{MpKu`kH<71xe#O?=Q87LXq3ed!;syIIB}+Y>R7#WwL*qEIgI zcov19(P;Lo-Rldtdu39mCHCoG^CH+GI^05~x7%{}DY7|VkeyA0DBlHkz&KYHokB;* zUQEX0e9j8=aek@R2zG(tq~I;GwFXpcn}kDVF%U=6o)nRN+oL+XYl0EEbnRNMP70OAY9*F zq#5~e%nu-CEVZ7otg#2pKIm8g!b2ofl2%8z4&>|}6`W!m;K|sSXi-T=7wh@^!oWn0 zxd31>sippqBeaRlYb$;(GQ_o2J=x>pJl{p^{SFRfR^vaN!Zns%rL5_{?sd)2dt7mk z0T%b{Av5ip1BgG>>_THt@6E0|>H?-muTh_y!}V&>IiJ8q=FA5UcBj3P1HR0ID?*oR zM(=Y&o+Ktl5!S|3_1T*djE+-~*!J7V(AY}Qydnn(V0uma3^Lvd;EmOhyaL2iNL2)l zX-ougNBd3^_m;h(-L7ZHJ~l6dW_s))*$B0F zSsQ+OOG@8er(<>o5mZHu;${~a8RTQk(*BS!Ou#OwCU!TcH~L34&eOemz0y!=d3y$p zi~iY;Sl>0X+Aup`g$sBK3w_c`55%(6N}q-iti&FPSA9YU?{;D{mcZ4dkq zmw^Xj1`tz$E(*GDC1FGueZ@{!-YzxO_NLxE_e-z?^$M1ay8^0YD?mZ_m5Xypz>ESf zWALz-LFTm8aEUP7H9^KTe4(vnbJLLjo%jVK)|9*?fTaCYLx(6k=<%s$TpDMxLjn75 zTXqx1d4qvWZ7si_w@Nxu9FVsdD=@&ua>><9k`slvuFjm3hD;CcgAjtM5Y)CQ$~r1WI%k z1tsN~dLpv3yi7{oT6Jd_=(sS@v>W>??_9|{Gi5hitxxH1!`E)X&${|MyK_c=7iIK# zkhOElNmGs)Y>_nfCZft*hLlDeKAiQNq8tHkOZcsRLr;yDw!@fnlh_!EHe!T(u@38X zr};+fkC0-M;$)YOm>t$)y)pt+Jac_q)9C}*iv*nh%xxE-u{}@df*MJrX?BFB8njx2 z0#!yS?lnT$v+NC4F+J1PCwkR^_uF3!jUD%U4JI#!#1Vv%S;q%PjvY~zn!&2Su|!L{h3))j`tGEPat<6Q zuYWs~@CS;sbi+=FxxaEzOoFdA{3Onfzb4IT6;?pb`BpczS z&nMkaJ5u=T9<;tE13C9Bps>N@G_}o?0Kp<#2r$8)qpCMb=>N@_$q*pt zh_#$Ko(FS$3T1%*qhk%$@BBkP2MQ?e4<0jbOh{Y@WG##21vC9Tbn?ZhVPVBifq@79 zigSiAEJBX?iVlu0hRfk-l`ji_>miZBV)7uk^g0_8c}jl>(d&1pvn8lO2$8C3+%`= z4X^(ag}ZUZ0dR2$o#UpY(lhZk@DWDFt0=|Xir8T;a0VOn`2PsJNGRCR!C6WP#YGef<0)*R4U72S(@ zam75OeIp*z1!AkN7_6bhJ#-A1W-nj7_!q>A8OvqY$DyqxID2bbrWwAdJxmHtuZ*nz zVKU4{qpOa1jQ+A#D^p;>_xW|fBPPcVZaEEm3iVBnE#?ezS`L?CAV@LZwV;++s9Ivf!$3@dt=(E zg+lg|oPJR{$w1C7S6afwL(@uTEE-$dVx)@)-Ab$pNPUY1Wn+@}&;U0(Mg@}**74@R zuRHf=>?t$!R2q4lk;yJ8C+1E~Ly#T=#>79h@5Yo}D~Y6NlfX8~aRW0@wF#)&{Bvj1 z&o*YCPh#@9XLC<2Q;&02#MJ4i=y!DDCEA7AoGrk7146-0sof}QGjb`>C$SY(QIl6b zY&%Iq`MHh1TwlU7(!^j-HHB6$84QcJJDO`XR2tqeE!{~QOjww7bPhF8DZZd_h$yi-#LITEi1;4xU`Q}#plAU*IWT4OZ2wr$$Vn~eWb zHYRhr$-9AADV8;WaRPY~^mZ#Sa(smIHJ+$V!hI)%jg|~WBYmP7F|;u7tg8XH*5tKg zpiL)QKr}#>M9l>Hh6-Zx0H{aSVAhYAKF>~>W|TZ;Deh6^a8U6SLKLC;`(R1U9NG!% zlp;Hg=PwVBo(-P8eDcSm!$d)6c76I+JK-dR*HM%-OXU2NatgXoi_zz_lqA-u2L&v3 zOVHJT5(p;-G_6n~&Is)TL;t8`&8vwLwU$;K0?EtLt5&03BcoESSE`Q9iC&HW_<#RL z##=;MmS-@N1EfOqmeexrJfrhBb1pb-$;mDs@ZHnRp819)(J1W)@|MMa{AHaNkP6qJ zxfsH;XUIw2%*kTdLq&mkWaK))M{BqOZhTGd198UzpPW>!IndMt;ronl^t#B6j?stD z@76T0HB5phaYp$xG744I`Ac_I%^Tz$ByS>Fnvqwt=3+j*mLyX3Rz^0Z+yTpCLpWG5 zObw%e4CZQAV!p>`9HbQ2<;C<6FfBMFOYBp^d(*Vg&V0h}(`T>M@)y8ZE#c(!!@Byn z_-o)yg0cqpF>hJWj+se-T~z(5(B_&fKRtC8SL-E!-!0c8^*7KiV~$$b<{yu<3q?_< za)Y;dqG~=<;;DM>oLh(*Ros|kH7R?mp1geV^x5%~w`J*{-96~*f2V7>FITV`wO+4q zZ~N{)tE(0LvU70K`}VYrs>_ST6-!*-+PcZslh5kaasqw^`ZRkE2S^j<0A=IZn#&)~ z+K*@Y0j2YM48GYzd?EQ5gX4R~VLHeK42$=MR82VgxsJ!8@dEjO{y%<>pXk$g$fvRF zflGv7@;df1eDQDey1Eau{c4|P6eXMHCw8kdT2S>^!29^m>SYAGLaX+Ox9O5`1P zM40L}4^Vc>$2J(l@Cfc{-)y&nwLDEJp@1(S1X-4aDUlyF&QER38tErE)$2>XFVGFX z?+nR-Z+qkh!SP&r8hPhJZ8L<{PU4On)V%?KYH_bp06R8!)bS(x%wnV@%~+QN{7s?e*I>sVr;fHh1l&BO@Y z7sb*$rcr&(w}nWr12$_>qKx)VaQ6E6fA<#=57B|O@y~?&!VVClvR}>}t0ICdd0y|c#AjSEi*0lYPyhmBuhL)5mKLUriFpH*)Ocuxicuxb2r|24nHO3>rO&L zl^I_mLQ>4ORlPG5LW{{f`q-jnUwTXZ*Ba&WgkKBEDSFT zRQ(#@lLmhqtR^#XI^1v70ebvk6Ap=P1?y#CBJPlRh(rj1%A=R026`h!i<2GC`m<9k z!hk==T-sAn9+$wr6@_SvLIKS8Fp5LM%W&AmIl^4wD#s%(B3|&!!qG6O0dEZS5rwe0 zuo77x+BG_p&T{h!8;EEiCAPiTJJ_wU>~ALsr^H7Jqfv@d44|*HUn22?_0H{yduFSm zr2G?mbib!`Yhclz>1ghhgah%pJpG)9(j5kt)`8P?B9R&#O!E{qNKjwV*pfF##`O?8 z%IZTC<5Oo0)p0#!5A87vs@xy z%P!TrcBm5>VUs|k`u(dP?}kJ5?s6^DyQqKuj5%FkHsk!mWR%lOZk04cMJ}ey6F9I+ zL)5B}`pv1Yl@XJr<4I12K-JdBNe=@J9!?5H4pumfB5S_*>G<&Ypq(8&KWw4TIiLf9 zwgjXXKt<0L>j7$UP%R)p_dB9(P`z92^r}}ruB4%rQImt#%4*Z%$JF>l*l{5l6^*s^ z37AFA*mmI%u+~c=Ey8DB-8Qc|d?P#}T2ghNsl;G1rWJrnhy~dfRakMSx2o?q&%c{C zCu1%X_R_2?1n$Ia4{msdm9+ae0Xb^$*cv@O?ohei2*;<2i)lovz_hJ&&@&6xb$*Cc zyd=4BS;$I@`TXO+{NmQyl&Tn55azk{>f|bXv2bqOxIzo=Q(=f((9$ggGZ9g@K9-3| zwc??iWUfk5a6i2c4E&G=&b~-)@b#V0b3z!IR(M7j8HY7F%a`OS#1BShT zej*omuFlV7sBTLGyNnp88p9b}SS7l~42un*@l*KyY`V_f-Z#k24CX$O(A-jXc_riRW? z_B*rSKRAKraSoO$2_BX{GzFWXT5{uME27Mam$mO(c6ZThsQLIj2svu#N=*Ghc?0`CWBpYo&i6iv z7lO^cEn{6ox!ShR5tlYgJ1B5Nr%JpW=&u6=ir%TkaZ^~!gkp+(x)*uoPYv^J%DFK{ z6B%A~`}iLljAZ+{F@N23DNG8kmlEnt&zr{G!i7rvtYB^{2UM*1@;WYtw0Cj8Zg^Qd|>8`;2Q&5qr{c+W7 ziWfF=4h<6`k88hgcdlh^bd7C+vUXCQYoYX5DnSY*DWT@bSCrodNuM~yJ@XG}cN5`yH#>3Htr{09#G`UYMMmForQY>WKft{$E}*Vv zJf7t|f0uddP*5O@XD{;b-PPd}Yt0wP)OBU4z(gb-V9W$HzW;(2f?wdm2%#9Ul@$MQB8?$j@~e;U~h;9fl7@xdJfmT9>BYsYyFQQe_j<4wfzm zeZqqj`p_qd-!b(Xlr_Kg0}-sA196tJz`S|&?BMXI(()1kNGmROvhDpmy00=($WxM4U>hx>3KTMes< zoaBnb=MLXWVHTbTcnhe14>t%>Qm>cA>Q?NQJH97 z!_jh4P?w8yXm-}E3&BF?FGTgIs?g2MA*u=L5l8YcPYks?!1=lPzT98h9J$F~jc62Y zoKQz}+w9~`Ex*BU$9At1<08ZC0O6-}-JkFq;sUr4tGr1(C@?@0vcTd!vUw)#)E|7) z3BX#suLv&ZZX7o=GKg=2D8ZPQdO8c~YB`)2kRlF_u$tfKqFU6nfoB3{0ey$fLB1cn z3%AXzDe@0_H?4J#U>5g=NAbjjwp{<0BL-Axa#Ax`p_Dspz7nZzhy}E2dVXQ!U5D8c&B80==FPs z(O_)ddT<(+XdUvCUG-vPaaG2g7}Td>oRG+8)LnwNYg63GmP6Z}2DX{}i$yRvIy;Jy zkg0O&Oge<^n6C3vy%1^%Zy?yz8ER{znG*(F1S2}sYM6zT{33ArjM!*l6oKEtL*$Y> zAKMEV2=h#1ql3f8gBJ(Sk4VI%#}lu!IGKBXS)<9+<;|_d2Y`YAh+E#N@OyBkMy+%? zWYa3ld$F1^XX*%Uf^mhreqtMYN&Tw zee|gk0HWSj@8~eHV)Aaz?%axnUC$?fS)+*yg~AUDCE<-!50e6rA+u(#Wc%#!F&H+% zHjCW%@nQC$RdNWQErw{BlB0{B#$dR4p=2a`DD`bMX{}C9VGd13`~c+#{?t;Oh@zOp zp;$ET0Wxan5xZ|$a~E&k{ASvOFJw52_s}oMGN5z)Ou2iP-SI?hCtC z@Q^ypYhUuMP2GMgjOl@i$(rElFtBUc47>q2Ye4+n@Yc*3unbYevsFYEE_%A6sh9{* z8L^b(19CSjIu!TZXupNv*tE@twW8cY6iunb-&f%Mn5Px_Qc&1q=s%7}9enyr)FaB- zv>?gJ$5DQ{QX(Pzfhx};3zL5&$DQ_3)@cfaq8TESGBX`?)^*&J{TI>SueAG_rVIM% z1BI?jtzx7%3ikJeuh328UTp+)UD4+5_ET#gld)m|{luCf9o#L4-G_IDIgj1?5?Gw1 zKE=TsBI+F^O_pvVmvy6WuuE3s-5jxILTXtYimV?#!_RXe;ssgm??*&_?Hnql_`Ve2 zo94v}OLmD39-y;KVV^?M6TuNW8?Mx@^ZTUZ6V=zH0Dzz6t05e)0T#CYR#-=4u~~id zo6E^u?c@{ks1xo(Z}X2U{@OUveRV|=<5MLD6z}DiO+(awJgnWQ%@oUR+7G=S9(DII zkBMkxGRu$1IX>)xcf8MkvOfz@5Z#@4r2uZBZ+<{eEBMxI=o6w&MWaoxS>HRAEyu$! zB)TlOjn7CH=C?9N?nzq*h}&u~e2}{I=Kx@LiJ$_vxl)VL^6HB71LMD3AY&$90o#7d z54geCmfj+LZpbG)pPqMEX#o*27!u%D&X;^SOw};}PAH;^b}?XjJO~zZHpx@YKzs_S zelaYDtJSi}tGKJH>=Fk_OQAnT5=+Je^hnoWbm@-Qv&*9CHi?dU>fr`0(|nYBzqWCT zoD?et=j2VbQWS3pzV*@`l6Hgl%!e0K-n8X}W z5CktkE5jyEc1a)-37P0`ZBu|a4W=eoLjOJSw5q2B4}wT)8+)gdwGWvo(!QVOp%igb zu6>`4RNLgJ=SJCoRX227lh$W9HQhEdJmM=k$AJ@(W5AI~`Ch{m<1BeCTZ~(yTR&^L zzq_%F9TG`StaX)4Ssl^Jd3ZXXZUd#;CX=rC)g;Ry0 zAu=Mz*1)aDb>nirviPr7hWHwJx`6cZ|1$ONEc}Nm%{oc|hv=egUR&t*OeRbYsihk< zz3d9xiFzhN>079A>xrb13DY71f+YT}#4oS!M$Jc?ⅆkd(p(kAmdjgL% zOiBnp&W0c1FYs6D{g8{Ir|t3s!mFs_U=zsIc8-gCO$De1wtkl}2b_={=vWNBNlAweS$D^)h>F*d8lI8@#{RKLHImuWh6y|UMiCkN)%EIv9m4;MB3W-x z59=iUi?yroOLmFX+kaM7Nrg+Dyh5e&lhtzi^%Hc(Uo9@hBGq-FL!C3!sx7T(4zTAk z$Jzge2U+S19R^wCp;}!G+1YebI47Vc^Igtk$9Zev&5g8e0LlB7V+0%H)<7s2oLvu? z$%X+o;c>Fd(`xxx2X1TIMu#2Rl$wQg2SQ*T+i69_SBwcQ_uGjhz*d8^eWg|u{8jbI zv+e*AC%u843 z(%p05OK?H^c5U4~cTZ5?{ij=WF(2@?Q|DuLHXd^LMlS;p2{;cdY}4l^1A#`55D&kL zlXPejeZLw`45~649{!kLpDl*VF~W{7*O#kaNsPP}-PA%~iUWsMQCDE&*3EN_bO$Z; zA4e6UIz3$s5oX{;KaMtag1eHA0hQhUI9IO@1!v?gvgsCG2YQRixqVTR!6JcDV+VQd z()|xOH%&aSFl-JZ?oJ@iWjO5s@?BRy$v7+ofh8{it@8LG+s6ca@;Aug{(*iIK}jJ@ zM@gGrnl&rlbC8Iu394Dx8NzGCbpG0As`D$>c?%?uT zz)?2Y=t#mU-W>Rg2+dhh=nAlem`e{^$PQk-5MDFPu`3Z*?3&Xuyg|^8t5KwCuPGGM z)|3^vKigN<5;keUeiC!CX-; z#4UP-K!~2W&(f@6Cv6ZHgUL_|9?=Ylv29n1Z7_B-3a{ru43~B#Qf(@_BT}xQWfql9 zvIBK4!}hUgry!M8qjP3DA)_+<-~x(JXOA8RNGDY@sf+^-reP%fs-|lh2wBi_dMOD? z8}Ut(+1L40ew`u6eG3<6b+rIf+q*DoOy5=W9U{z&Y$&RzW4BxkwJH8VH+xJyFq9-F zz;3I>&d#otIKM~Ee>mvT0y%$mH58Mg-9;XDvqKn}>T)@pQ!c&H1Qa3P|M9V`jgK{?X{b};<`2Am zoSA=J5Wu)b5Y~Oz%kKd3atA~DEpT7%(`@f(7rm=^FL+<}rkU6nI{aI$fprmxwiPv# zS0vLsW=|xaALe;tFnWPmAU0apCl$ZxJaQTl#i)Fr2!};8GvT}Kq?FVwHI@WTa z`BCbIXc4SZN+j$Sd2-GhPl8pF`AWio^-bd*j*c0^-LT)>lsr``{B#I?G;hpSaBY5i z_7Y8;pKm=BOx(Qdu|`n1Xa7+7$W$ggKX&h!%`}|Pn<7*%gx(7CmuqoSq%hY~YGwUc zWXTrSm5b^uSy?}IFPkOm($s~pAyJbWgZpJ%iR$#jm=;60z>45Xb{;pWGfWI6{j8mv zt9}=$gWLjAo0XevAEprL(lFscFEefsq?q-FkMd^3Tl8-zTgm2)$f}VT^d9*J{zRth?D&sswtm?V<0v^;7rF{Ki>;I6TarynOcK^B0+~63QMwd-*sb_|y+H;=p<}HRCEn z!s%z538}I^<3w=0pbwm^;yX|36~HWrZ80~CN}mgL|D%y!>BU+v3q<^PSMANwv!f?( z)#Cj4;%)P{t?cRRm(Ts~Q?M!+yQCAXQ~TZXTyY?axw<|l`=_^D)vprsA20G^zPsX^ z0KypPF)vrwHHNnz&GYV*>Bo{xxdGk=0;7hSBw@XJ^L|2R?Hfx9H$By;6Qo_5FDeLK zW4a$N9gl1GMylB0bzXWTW%^}#%(LctK82SbHLXbv6xn9BIGM?Ht#xFxKmFn8^-)N+ zj^AW2UcSv<{P^ryV~d>YBI*4Wr=6$oRsJD$%x-_wwm)uSH}x*2@|w0v&*JvyvEG@i zI+w@ibfnq@jVgc*16eM1B+HIty;Jz(tHXo0;Y;)8=xuoF`gq_%GfIE!gIM{50E1$2 zYIe4J1u$fsqHpUy1P1y<-Uu~-*yxxPl);RAKGL@RO$%?BV5!~f6~i8BME6U}O%_oh zzQk$9KAuugjZO~9;q!T5JK>Rw4iH?_$%>cInCbrlp9sdKNcu{&QcYN0UEk|eR|Tj3 zS^wd;-$e$Ez(DBT0-oN@{dOj1-4>?&gc*bi*pp50C6nGH`&v{!Evd_yAx7#|Ac!5a zzuatDWxj#j!h>WB3Erv1+#c`L(_muKIyd1wEP|`=(U=hYIZY?sDapm}+rK6VU}A!= z)DX1gR&pf($*@2;TDFR-XAF(}3DzA`9Ai6^JdzB``rg7~U_+<@5o(Xv!TYm4cyQ{> zd1$rL?{{KOXzEvA)lumDU|beT-eEDNYcx7X6x7XNzfb;rTCYvDOxmf<=_;{P8az#I ziJe>N<{*J4E&F^uySxT<$NVx`l(%TQI719{_wcIeb%zBg-kS5v?)h{vTs?Z&>aG^p zeF^*FZ@iwn?Japkf23FyM*|i;EDX_tDBtreSChrND~?U6fKh`{k^T#*|Khw%X_W+@ z^UpCR=buYkB2FDq;QqO47VXjkP4BR(oo@%bh#&Q$r#C;jwFhHVO#9w@Wu(=-X8356 z&ro5vxb0uEMBgs&7b}ra7JuE(TvAkz^|E|N+p8aC6y6L!=Qgt5oUIQd9G;(+xqcmYS65IrcC`?{S%ivVmwOJ&c%@t}XWJHv%=YDxOe4M{3!f4650mL_PmH znOgo2qE7tWnws&SMSa?MRUYo0I0Y@gjH0ot;baO$MX{6Kx2LFiu%9R#_=G)WH#Jj% zIN3jR#t)7Sr~!)bZ*z9L*A{LQHlQsg5ik4jP?E#Gn^8Wo-)L*uhxnu{NmjT@C9?Q=Wz;3tnLGJqsidBQfSw8R`YfP9KPYL8Cq zc`;4##sRo>U(5X=~Fz`aVbCYj4v2>1K`yXDs z`IB~!c9XGq;&?|>fQYzWHya5JGa-MW0^bZ@2waI~0yk^XjLD74MXK8Ja)a`aQt3dq zaj*7)tfmv|pf%6|mJS{k-A1YA0N%n@LU;~E|J6-qci>+rV3$n*Li`hj91Wc$Bl`K* z0qSnJpyN@BHf#H(DWO`Vi@S-4cX@({AG~#a7sUS_9NdGUC-RQebaxNIv}_(FOls&~ zf-lc?oLBw!&f~V3D%bJQ2|2dx)vn&Z`k<$L(2?AHGEemFoV(*oRlaw3b@Yz8T&K6} z&H!zx?y4O5zyc>Y0eT6EFq*FeBlwoQLBciPX5jnw7*vT-2bOpk5-r49m?oWY40^{EA5vyW&x4M;DY0ir%=x;XmP zAa!2zdIolFVFn;;SJj{uKdr*$Eos4Q&S?#FyV$aP>P!h$c@iX=VLVfU);Ae;zC!V2 zZw{-U#Ql?c7jugnz3@uI@--Xu_eRTgoNzLdVJUGR?$JKJ+ofnY|bI7Z8nAj~2+a4cQ{dnfts$QSH-;oxfQw0r0o zSu&_}qEF9h=wS=yEme)$bre@hRNi+lDjZ+1~hAz5h0+^UI~*km>=~ zG=6r|Pm=dCxv%Zg2p!Gv|78HYW+F;KWwi*CzMS`e@>s8jC{ozL`t(GB*0)f z7lX!vW!cl!szXo-3V`bG3^+Owp(aL~{e>#X>wvHR=;5f6dOHI{@02I+0I$k~-4nq0 zm`>hZtgdqSzs=yCLn6AI6dr3#Ii6iC7Vkmb-hHR`~D@MaPCNG9Eko0aTFjLG}X(GEt#-K%&;H(G+5m&d=&I-;4 zrdW>lX2Z$6dwCsm3K9!l)lzrwe6i@RJUzOjK*t5gmA)@kx`x{mhk%((qpv=;KM|X@ z+!i&J8|+1z@=AxHr=JKsRghD{<76OCLB?5eJ=S_vLymkj7cX`~+v*}nu&k41#VIRl zQCp}%S3&3QbR6Kb$~M$#pcJjV0HUd;DChA4u4jry#uk~vi7fc|t!Dn7@5YLS%+=Qmy#Wo*|HXlxT&{>bc?Q~hSkLDBa z)I{(BdHwIG#dSpfo2Uv}di8MMo>z}ky11SxE~lV~8C6XWSAI@aB@28nV_sq$aT?s* z>SP%NNh5#|MVEFVV%%=T6e%4kb8U*1icMvc@jqLooT&7P_SUnwh)VJp#KOF8<3>GT%tyc)d2 z%R^w$XHM(j^6gutbx8Q(IC&n#*J;D|)p<6VO;*t1BFEsSM=~wiKlRh!<-O~ zSI*U9`924yEi-m?*wV=;xHw7*S2{z46?m3vGx8}nY8=^YG6+V~ESE;oSeobSDZnAR z%Wn`nDD$NYItdtos2jufI$YMn&^!v!KTJJXLM0T1ThX&Rkb zq$B(l7D!tLfuh?Eh93uJV`3RQsE6T4u^-M(JRg1p@}YA&9K1TtAmfDHh~oX^l5JsC z+W|#8tj*W$LjwgaXsi}Pv5=~tRo5`II65^zqVYA(TE=z zm9<}}#-pz1R7S;`QUL~Tj~trq9E`fsEUamL$W16+jkl>wY1AOTtr=gz3=$1>%w#){ zI>k&?sccTp?zU1e#Oc&Q>$F^Vk@QCMJ5tU>1E{4#sc0~H}2id&&s zQ6qbgdKS+KPNg}7lvA8Fhq1&ZjM#=zbg-_ZA5zcP`Fc)ZC!|A}UZ;@KT;H@OZnler zFCDQ3E5%M*-Gch^R((L@t=25W1#|sN&D0#9Sb<=EKBJLm{a`XliLNbmZhZro&~n?f zZX?`97yopFzl4|{%hw3l(AZyJ4tJf&p}~5 z0gsW2kr2jVOIulgB7*8Rz-~yHANFiqw&ws|pVAwnF184?USJq{@wG(-!QaMzS|foB z{LtC~z@#lHnWjBzAnxf`Kf_pZ|1=k+qbCg+Bf(LOp%LX<;5PG9I$5wWFbR19+RET|@s)RH^c$$PSK;aePA{z5UA}V({)-nI=;Pf zSDdVg{kLj`3Sdse(L(VZF%bTsxGFP{F#3hsCB;Q-0i-Y1!D%eM7PsD-G&=ky6?$`f zY{TUC)PSh34!%K%%_$nC)7T`imd0Z>f#w4{0}c%q+&GFJr4w&V=5=-`8PO&Ga|l9T z(gFj#+bT!+(^Hczp@H8{yjk7^B1u6(m{=JOTYSj^G~e=F(3llX(*h%}w9NvyTD8}# z5-zYFxTpm-DvF)dHL!C?H?mF8O`@AHCItnc5^%zx^R9Z;C-e8s5)3B&RQ4mUWXW=F z9|MyP6+vcpfGDm-*-%La(!QqyM4UslSz#tXuIp?+ zO%ipD>Xd?DL@TP1(Lj|==rdhLKWo+5QHvAAp1w^Ss_m(~6|;0?@G|~ZC5P~0SPWN+ zLFLuh)lKZuVWcVOkJZ$ok`5+@Dt4|rUe7LzrrR?{xJ(Mn7h%we%&%Y!9<4aK#r>&r zFx$}$hkjI&Yws)@idao_C*yWyA)2Uh?f~0l6-Jh=#;3BnxZJ4f(je=&H>&)^wo1r> zL1=bYvA5ktVu{*qqZw&`Rr_gM+_FgM zMs>(wJqENKv#m<66VdUYz^q1e;zebgR@By^5w^Y;GEvgNrtA#f4TZTpYz4IND;?+& z%vfuF+NOX%rHzRexm7^>on#9MSl;-AD_AC4$T67w6t3x`Mqui#m%-QUdMyX163A; z&CMsP!Q#E~#{XD}2b8h0ZuLGnH4%;pjag1p^b<0qx4z6jgWTeZFk% z9KSd``j^4em#?26ydC^>^!m;5%NOcsG#+;M8~h`G9XvgLc9a=(odLJhU42|NIQO|g z1L9wW;!ET{H!h0cS!sQ>AadXMCC`Q{J1cTuOOMn=ntsBWu{+qYBsNCYV9vvBV1E0Jq$4e5HU%iLU7X#4>%*tf+@qP!68ij- z>#Gxf{RNyGA9Xp~m(5!MA-#+Bs{d_wKVXvP-R0tHa6Y6pu#LQ4Tmb$BxgYb38WBT zsJ)tu^N{?1$)p=*#KaMFTLYUHqMTTpdU107TU@GAS`FQ)MomLq(6Tgvr3$rdmA9^8 zno;N8&MD$cVYyEBOQnPqzU(IK&UDdrr+UrH2MBhqk4RTXGKRHYWtf zmN+6X5Iz~hg4v`t{+)9tv?Y{G(Y9v7;mSQXTR2vAbO7(gNxCgomnE9k#_a0y}nn(c4{%Mk;OZ>C%<3*DDgr$ zy3w636>n)O&d#R14ke{@I&ukh%C+!T^m8X3%S!K|i%rAd2BXI=Zh7%Z=N`6 zkhUM`>PD%iu&Ei@3g24UY4| zWQ{rmz#&~cYm7YbGd!v{R*B9b}4rksmY1Rx}eKLx7+@(_W#lWIdu<@Rb4 zuKXxbS@7Om@|&(=UkJmU<97utZ#L&PSp$w+M<%O}8kf|o~^97n1BT!S(yaE;p z3+t97ZlHrK%0>ee!{ZQ3*8sq7v0I(J>N>`UH+|Q9WKYabK+3O!)WX<_PGaia!C@ujQ6!PN;Gl{t^8p29MNb^w4 zk?gR20={Weq??c?v`Ob|fnDyKq1;0Y9kssoX zm?Vv!yLUt((YXG?M$c9Z* z0ISbQO`RQS1@ImINi+6Ds2l!VqXE}(78ZOxYmQGmv*#&!0f1^?yI#=VlNuyy#j7fF zzE5oyqR|NjXkuty+Z%Z}?A3vTVgDt{EOSsQ<#UqRp=U2g>{m9N*ek2wzzEsUPn+oT zL|Qcjw19t!PJ$twYLlC*r}qPkV*hlFj_Eb^7FNz~|RzJ5qA77=4wA(t?^tV5|kT|2>G zIzc4C3LDjVG+|U&ex6?$x(`)ieU>mk6$3z)Zl|_0w%=MzD}}6zI)T@jd0C8gJH<3n z!X8unH5IRwFoE)Jc$@<#M|NMTO_eNTM(?SdD~Ii?s1G6BE}E+)yr#WK?jGYB)xXVC zSC$Njo0E<{TQnB&DA&OoH;LDM<8RIOiB#M*sh^9bMj{~V<{O7CDp?sw)Ls_=L1)k& zpX{qCDwF znzU-(2pE;Q4>;hxtodO7;WycDGx(#`>pnQ&WX~Vp^@_mAZh1kB_m#RL;%?Xl-91&K zhSAbs|M+;D+d)n)_jos`!7uz~Jc)RJImXV!9g+3kP`&sCsUa-LFji4qMm4cRN$Zw5 z#f15q6zAb7#t!NP_1!+zSgO5Ox62H6+ayI>cg=AFitQR+T1;bT zrz}Y$OxGtt+i$z$rl6hYZ@6zw(OUPhrcO_CS!>+E=bqg1^T{OD`p}+`JKGSr@D(P^4dwsV+{VW&hj15AwiK!dpFgb;T_%sU!$+<0cCI<{^P9;h z;f%2)H-_oc&ysH3BgLolV2tE6xy)^j2IHqCoS^WScw@5ovC%vfrXLqvRt*a)cA#Ro zV#AM<*?Pv9VaV(waM*CE-XBF({jICWg<)EM;O?DFjv;=y691x<8R75}sWY5meWA%M zTpv=?wBV2`PMAw2U?*+Wy-r6+7|ybe_B!KWr@^o&MFr#YO^kk1k-Q<8Na2czhQ&&U zey=C?W+l zcSg$KLpn#{oCTD#{i|rG3Z2;;*uME&TShsgPe9>A0xk{SwFtS4VcUwJV^-~WuoCso=aH@KRp*h5;dxwogo}T3V5L4W({-Ek zPD{DviT5_FA)9i}K6_V{oqk@YKL5G(;Ta8!(RQbYYm^=z8&gFnG0}LpLm3Cm8+>}J zIJ5BGGlEkX_=O0MEFakpCV&y*w~s|xB61$2#|TJ^a?rU<)JB~>diaZrUt_vo5x`b` zAI>K8;qrRR`{19!hHT5Z5cH|^LPSZs-SfWv>-|z;9AHPsY}VtaW$0+tMf$=cNIyxa z=-+%qX!V8S_1&7Mat~v}Y0SA&LFjVgUh$#n2fnA#3wNs<^$%alf8D+A-hX~I{P+j8 z;Q8{?^s!I>rT*OCfB2pKJA8kz|M0=1f5<+5js;j3tKm|0{||qk|2=$|%>bUO|KK0L z`~KlSKK%CKcisJ+fB5ykzmxxv2oHb?clNsvy8C;@aqIw$p*>Z?|lD^OOo zm@MTh7xAE2pPx@YcBhLgC#?-8V^M`MTX#cIYha7Je=hhBlsmhtg8!g@{#orF{^i{Q z{}|E#FE8=WWg)=s$BU6@eTGUc^#?cmam6)O4F_=@1`mc*J+>%BFKMz}w-~OddyUsq zzczMac)vS9dk_pj>wDD(KErR#0j8uF3|im^A{FmOgAXBWfeScxUMYV2`7A6y>v+Vb;e$0lWQ9fnU!p_n2$47^U$1i?Aczp2o$sY!9j{jFU_NrON zyc-RtqxBT!>T5Nh$!K8Z{-#c~&3LGUTRfSsEYzK@EvL;5L7aX@0NXsvsXCjAbf`Wr zhoi~rS|atBr0GDE*Q*?yRI-Jln6u$jHhy-E4muMx8i4)B2H?SwW_M$5XT=lI?K=rQIV!2SWR#S*W4w{`Gi`{8wqv>#Vi3^&YJZSslQq7+_ zATM8K$LIRIn4`$i=Hp2L&%wB@ZzOm#s$zJVWDu6Rl9Z2$PwTGNE4RO^q56CNRj+73 zRs6ms;oWAHr|sa`Gxx+n%)jDl6uX#d#{g~+Prq7ltvaKjPR~TG z4@b*IL4ef8;L2Js>}^23^}{(i1$Hr$fr<2mZRg+=Jof$U#I6zKmqM>uuN1n??l@j| zswfa!r>iazzoz54EYeazAx=hxB~FcX(H)Wc(u|C?rL)iZH4r)R8zmy>qv99%T|e9J zIR+@VL#WFTHi`tD^Q$Y*fpzOa%0X9O69{%Oa;QgaUs41{E$=rb<*eV1JNB@bZ!inU z@I2S2O%Gd>yny0SNSP*xOa!n^I(_|Mg#y~B`*^hk2Q2We`!GQ_Qbn`>TfSWA$&~cs z&g-_ZpoNv-E_S~+$0gD#tSq=9MSl80)%~Y$9kiPorO={`oKRP5Q$sk)$iw%W z?!$A0nFPpWSdnasP9@NKHwyv28*k;c;BN^br1t>vak8UEgT;ltlsPnynXE2-uX^6k5D}SH&7kMKk_#PuHU&3lC**PoH7PThac8RDq^S{% z!Lx3K5=0b%V)z+^x6D9r~ z2CsUYFRolJi!?edqo0w3&TM9 z<%^1--MgaB#Fl^r__*fC1PJVulXrffpbx+@W`QpXD zWTpvV!aEBpgZYZIlTy(#ni2;`E|scY^9Wi81f4Gj`m0*h(S^XgjQps9C6j2>Y7~ z3VS1!t;lO7`Gds3&OkbmvAYc0&DBkR?9Nz2%|zxaf7)ucCN_k#kRr991SicMn}0>P z@aOywIK6|>V!EDDmL)c?!@-uWEEAo=^&B&-4H&vWP#=W!N9x0-Xrs!d0+jNK)wtvk z1D>sxI+^m69^!hEPsb*&2`;ow?XI)QKn`W-GFMNDA_D3JU)V$vC?XC&EGDXv>0*eo zJ4k@spz3>(&GH%0m?5Z?Mmgk#csB>XT(yZ)K!O;rN1#K;b;^hDb@_F{V8^7Y!UKRQ z=i_t{2iwcu{4dWST@{ZVH3t|V;$#ozz3EX*CmbCX`LH{6J|?K5aL|FhO?-lXintT# z#9+HdclX6wU9=_8UC#Ki$S^q!tjv&%1*UGW|4^Dp9sg(=q1E_T+(^3~+dc_R3OvO| z2ik>stY1_FAZTU7bI>J#F=A*GVqm3k6E%pIfN>N&6gGq3$P4=d77d(fdXZHlpg)s5 zNZomU3hbJ;{Fq7myUWs^snh#@$8#m3(R|&{9_%=}0&E0Q+o+SDYS6xBeN+BK9FS8# zXb{pf%d!pr)^JN|&52Xgc)XutR_M#?)x~1oaP#9t6C)Xv4?gf+j$oKh(1LQM_>aX^ zGZpxr&M0kW-#34Y7YzJ*cfuUUsYKC6 zX9XjUB3|>nL76fsM+%JQ#XQ^$ObaWIK%VN(E)bw;-bB>Im zEq|xUeoq+xj5%O)rp7a;zy7d@@_v-d^@ii+nfF)pIgYe86>A5W|euLD3o249ex| z$wx-^!Vz0b812})(SgBCok=1qb$8+FK9}kr(sU8Y(^{GyE`2Lb+AFz!z%^7Xm0Bcx zj;U>zPiJ63O=y`_*Po5%Y2PMj@;kF^yj>#^bNw{7frO^#Y@oN^re`cDbK7%q(Z%t3 zHY97o3KJ^e4I`&UQsS`s211Ij^6UciT*FE8@@T$x2lqpCR~@+6 z$cy20IdHoy*%JR}e`{zd4#u1@4@pgQT2A|humne_wUg-a>%OxJTCstLld!M(4D(Z* z&n{QjKD4g4F0nNcscFS~3Wo+*XU?Jcgc31AW&$8iiYD4jY?Nn0-FXzFm`g&p&NWb$ zU4)WB2^RvZ2w5hhUb+d}*4*-yrehtPPF|UsX-=7>1h?3Xj)3p_Yrw0P`E5aWNQQbi z@AG_uv!dZ@YzyX7c$)r2k7R^wyn21``{xH#9x>VRg();91~rd%a9}_l5MmLtM3N~} ztbUU-rmJ0+3yKzTY$Z^di()&*7`|Iw&DJ1PZ@V_lhTP_hVPO`Eo1I6u+vId5blM=Y zls?#C8ZWcjFwCq51|P;E$6*NXA|MaaTBwNPs4UrXOe{5%)5Cqy`XmL7z7FJuGa&y8x zO(s@loq-0vlZjP7e+pH1Fr_I}>5B42?}zDBF^7&?Q=4+a6NzL4>D;TtqmkpA7A+C0 zKqsZKCpnZd;xxujvWlw|11Ae*#$c8w`=>jRJZh2S7&(@1XuVU2)WHGHv6~5UD7?-R z$2qQkz_>%*2;`)I4w>kRQXmD~n=4#c5DU){3aW|IO9 z?9^+4xqJi=akP%T$PRXP1QI`jj2yv^4-a-EoA5TL*1NEcQbf(_!&`FccJA|HZF{No zxTuEvgxrZrdMTBKz!F{F5^@+E{+bg5Bi}Hi<7hQ-6~s;OpE^~A&yv>3zkM!U9rb^_ zGB={ZO;E`0Z1593Fi!RC#ml$Z@rx|3(BTflO^EJ64L!}X|XFU-c+E$KjsGU*BBf0kDhx6%TxROR7C=q5xB z)c>IpN`okny}k`X_Tb&+x}B+ymy5|9ayqLqfIaoPty*17$BQebn{}*xu46qpn0kJv zimuT2E|{R2e!>)wAo9e5K!SF8DGwDQT1L;kyp&y-@^WU?%8ThGZST0WI&IU`zU2;X z3q!3?tg~X*vIcmG%tzOxR8~KIr#L@8jm8q{<1qQMD-9}GYXQ6i#!M%&nfyK&*sZLf z3#%h;U5K7$angXCO^27_2e!~I7w^<_Q4B^VhQagEI$V5MryFl*uyfHB)fv5g@btKy+Qa9D7wU;J`pKx@e3zfHu-~^;CdE(R2bP z!6}%Sr(k;@w-TdC0y$>Qy!)=(&i-xvJ)ycW zufynnD=;mdKh%@HIg zhY$*8jphI;aLh~x0yt@!1v`^d{bsp(4(BH&R%wVDAtce25V7WM( z%-fm6<%qLuUqDcsGuCVJXSjG>ml<~RPs^9JjpsYX)pb1tA@Qfn6M!EyoN2gKlj~*BUuNu7R*5rg3HZy&}RZUxL)?;)sLMsAX7?s&i}FoR}N4W7bfns z`G9BBGSxNH32gGz&cBy^lX*i6s<>pbXUg~e_4#e~5L^YbVz^o(Qv_VLqVz>e#P`o_5VZmAT(mhE~6t>bA17!=a+5V`ASC~&M5?J*NxZrhSx2UqgYEreq>t1YeMzpeF-*8&Dmz_j?CU68=*YoC585WZk{)u zfVV#!f^d4?`RI1LGGj>KOM5%_1MNIyEq8}U&yL<6)v+m>x&WdvSqYtH4M)^E7L$Y# zJFcjPh4j}fs;zde;_-X}uNFp~EoPVMA<(qeS>Uf2j)bEyudP}K-WgK2WM>Pt13Dz8 zGB8B9BydnnT#Q;S`EL#&yRK~0ju=}-e!WOH~3{e{)qa!u^w znW2-4Oi!*^XjPPvzuQ_f(k6E!KtM!kqa{5vrCz2NGMPg+sRl~-2%NPi)i!&f)`ptq z5kwmnYMg}X#%s*yODrpfa%hfljfE#>R&yIx@$Ft5v(o+O8PT?mzN0zk8TOs|8aqgz z|MXHKZNmjsjVidrW|+FY@SY2#Ycu4akXh}%kOu;O*s6)$9AU)N_v2O!$<6me3x;cP z4wW6Z59)@Eg$c|kQg4C{+H|w$fRS;QXS-(m$%~VhdAA#wj9uyH$ zhX=uL^3@QtH4jH4#nF$hMZ3@7TCoCiwVYgHWI7AuNag}4m#u7f6#aXiHs5KI+1J#P zBd+^`W;O%nyCPwS(7E>cPxr^@MtMbGH3pQfOWh6=53Xl~*csThQ*f8eYpqygH9Y#hA1o3*yk6>KnpIFmwh-eWNMK z#t;{eX-c=xDJE{Lf)^NrSjs{7*!Gg>NVQTk zcYb~jy3Y@}3yDyQpCudq|Ji%fW;A##SImR_xEPFUTop4R8yj?^<_ zhpgaNB8yT@lPnI4lv>u&_wzi!eZ>1D7r9_ZE!5H;+fsBi$toZLB!EO_A{QfyR-<&* zk*hFkcEjst42#rR(fvZ{pQLfdTeI+6{B*Fl z`)>dJkI~124=?u)58&_Q_a4Ch<3gV*+|M8MB zJo?)03(h<`hF5$7*QPQU z1{}D-$Z;|XQoG1}++fyS+0Eqb!zSdsmHkwQN-8R_A(W1(wg9gc_k9Tyct(y^BM@lz zs7ne9B3V`!!j^sxBO0K2#zTwZ)$`6om5{*XZ*wS!~d) zrkX$Fu8OV@fpx;wVAZwI59w7U@_m-$b{JQBz@Eo&mRP}lgKXo;at!{hc{1mVEz=cJ zT2Ab~e=pNIKwJ`2JTjQaVKM@JB^2gs;7A<9=@`!7KD5)PcF5lV=y)>pH6#|+tXlpG zxw%S3sYd46g_MCn&&IG0vWsAqTE@J=nsg;a#vmxmYPLyTo>LN67rO<9*tM;>9h>Ih z)t_x%*|VaQ6>Z59U2yY##-y5ZkEd*()4XmJwO_#;+Ncy#y^wMQznMeSC0L>F#nSEm znqz3bWD(e(zxS(}x4WKMQ7J_vIVKNcdIv zwR8!x-vr}Z+TlBpQ&op|N5+7RBjxg206P#{B%7`eK^HdcpedF%_G0;ToL!I$ZHBQp z7Gd)S$K#lGaz#{@;<@RlC@m&==(r3`$6}d+FbgLL+)?L3>`pM|o@>TZQ0e15&F12K z2OausEa*9;gPDtC1YFC02$`vfr#iqN2bK(M)u7|DcS{$#{PJqm2urA4*aL27l+N2T z;el;7M`lhL)L@8!br!A2%4e1F6_1NpPHQn$MN=qTPxiX(1bb2lA#C{;zKJwl>{%)c z;|K{^I7@dSP_XSqP$bUDp|^KR2Oo;#Qi^l9W*2O8i=x=KJmiI>f_A$Swz>>0opnDh zOjRjzQ!k%Fe=UXZ7~Tz3m#w{|P!)=~lK1ljgQyCYvS55K&x$I%1lixy-CWP;P(yrBt6LWjSDORr( z*|?Um3tMt$8FE*L~gyhT15R zZVt~dk{2-af=L(sJ4@yOXvw)M;P{O5q;YrM3Su2n!OOizQN@yBvr#Cw(|w1OWc5v; zVHC=j=4B$REc&Nj=o-~ja4XYb?I9;96h&)|hM0S2d`Ws7Et>$FKAR`&5Ey-qH&&<+ zFJ}Qw9#iVXLZRO(1sprzW_Vzq=IEXzue?x8UoL;G%GKVQv%M*IO9MR6ViDa#2(p{A z+Mdpo8pnQJMmfkVmmU|S9#;9+DzSJ==DIe6E!FX!FKIn#|32CC`)1K=;s)&!BPxYv!5U+*}yZivK=kvdyNAOvPABn#b>p8i@BJz9s-rE9L1v4d;!0-RzR( zx)RkrNL9YFu5PWbBahXR$plOGRm8IU5zTHXo~HXYqP?}pzP*7ie7}V6FOC;6H>>zrPt@=bzv~cI+tfJwMV@rI!$7f zH^4WF6CEU4o@;RwU`RC8?vy~60$f1gZoy6Nc$|#e9jh_CG(oGrS1+Pgj)Gb7Os{5k zQNt2~Ol%F|^V$qLpe^M2ZHd`~vhMLyTel4OJ>xrC`y^bbVi@9tz!+ehM!OeXT?5AH zHw2(5xUnhUH7eh>ysIm`hf{CCAWK{R7JiLBoQk5e1msDG_|7Wk;BL z{i4?oAkd^zpgLlqwNzvDmF3F&moHc8#huECcf=0dwYV83nuQ5j?og%-abZOKF)aL$ zVPm?9)fmZJj0Xc>+2rbIblj+ML3n`VQd^QLow26viv380HF#Duvzf2cq7 zf=Al-(UW@9Qp)dn&D{^vSMQAWI#Z>r-6uX!&4Wu5?VMw2j&IlbEk^OE)RI*n7=9irs~3V= zXYA!{6qwY_lSJjNv$?7|!cP&8SEp|;#P)#Zis02iIc|hGA zw6$Tkn-@*rMccJ^dEvln@-BDUA4mS*>~3MoPt>e{XUBy0KKLC?vf&cZah{B;cKk1Z za?(1&UPfz5>T2ai<_!T^6Ad)A(B9D*b{E2(wc72wydAkMJBmG9jVHZFe)OJM#(zG$ zc&Du5on3F-4U1U7PSw=>yL^$o8|BGebA5g-Cg}Id2(7{l-6QM18r$9oTGOQ?2xmBg zTTboUhWW?Xj3H{BVRkdl`)Bx}k>vVi0ZyBXJm}k4Q>hl3M?BqkCArC+99vtpPd+%R<00-EpMJZW_Vz{Q3vF-W&P>wh=`7(BN#4%Kc zlzGeYJ;#d*3Sa3O1O$r66T~<644z#zoZvfEp(zX8;)5AQIcho%mNP(yr*_3;)99MZ z?~Lef;9h9gsMQz;)#RGAV7R42e1@^R!Xz~NjK{bNaOQTZ;4~M=$x2kmccW-#iw#(E zyaaP}_u~^duih0E3TG=|l9OT_FZVnb-_R1~3cbMewF=|;_+0_#+>)@ajMMXU2x?T! z_I{CK7#Qu(qdj2@+tFqs+*3|RY)72MOCI|PM(wH5fVB#H<^pU5+iur_)Rit)7`mxd zI;RJC>v*1cK8XsIux=cpR(>gQW0zxt=&b5`tg4pgb4rKG2{afXF8XlrYVRO=@i$Sj zS!rNK{01#vCr55+O(wZkG>1pqHO%g2Sx(tQyws&08Z2*>T7@CKJQl4KzpSJ})m(`L zFQ8+pZGqbAs|B6T+sWizyE&EFSw3jjtZHR%m6>r@QFjp9ku!pPm~}LWt}FsvvbRiX z+My%W?+dD*Yf8lwwo9MBJZBllc@5VTjX=c)J52WgmdQWTnVN#!e3`dby#bQzU#Hf! zn`_s!QCVqs++>&NwV=hRf}~$A6SazM>_26y?`xXxW{M?MW@!WtH3_l`^>zp9VBa`# zL9-#NGR|#ywI~RLyOz=7R%;&vsBq0zEqv?1DLrT97$H^aS)68r_=MJz14+NB1XKe* zo}g6A3UhVtTD7-`7skdSL?hb@FKTM;d{({wm1L9IE@BUvTSr4D(f-jI@$zzktsP&h zrQTV=er(i~@Rrl`w`C%O3kY!ZfFml@gLo55Ay!!olau9XOODwJhP@Mgn>SJPs$_tb zmbC!Tl_=o&_tez3I1P}sS);-ju<5D;$P7>p$kRixN=`~}6i=HJ5ZZ>SQA_nki5#pg zgNdn)VbqHsXs*JY==JXY+r3xOx>$w=r0bf2;J%3p9B^vfY;vZ!u7M;b@ zQ*)f2dZRxeD18+sv@d#{E$0L8nnV@l?iQ6h9SDJC3>i*JB0)P*_u@QXB?!D}%ZL}3 znVMBnH5x{YGEd?-p5@@w;Ki9kA0!!qP6oR}1zE%+-Vu$6Z$gCo@c#W@BQpEUL114$~YnsdCzZ;Yr??7WFNijfYacVH!P*$${zhR0_R)X@Y;2jAxXOYm`C? zC&5SBDdKXp(;6mX?GH#N&LqFhF*tt8QR7SS#+T8_rB3OHx@7a(-cV(gq^zU0&|RphLQ^m1(8`t+}5A&(0ZSrJ;a+6sMt(!jVkffUOxb(4C)- zvy)a+HP-~D9_LU^p}?4)#@e%Ykbq7iJx{tM)T+9=hUa8wvlPQj8BMYmQ z1rg);$x>ZU3yDyrsjmX@-~U}};{W@OG^hjOTHWzwL0wmWu zEKogIajwCH2pIG7HU7F}T^sYO7q-17bR)0a#ZWNuZ*M>*WGl6a1e$KBT44fFloeO7 z2E6Uf{J5H0JmkmySI>`oqolWDi!i<9iwq+DD%b2>9~QSg0%{X@gEU)&o>)dm-@%E+ zOd>`1cgEE=(tZ<^+H74Ex~tqL7^H)XZG zB)5`-zXcEekks}esqKGkQrm}gu;FyD-w1($#PP85juQQ)zP>L$&(dKu%$5+QB&6Rn zgiIL+tG?*(>C#B=e@BUO-e;c7qf!i9>3#(A@D(h3VBtRGl&mIGo096g0#RVCCYG(2=-*QY4<!48!7SjFyLs%v!yHOVgj@@iN&9-z5ft zJNKQGtHMr0fydHcH~zMR3!n)Ls3MI;WiBii`jx=)^@hKAccLrVYs);InXq0+|72h2 z>ydT>872)P>}R8~D_Ey=SsFk>YI^Aa%Mxc+B+aymOV9q3n*?H}}_HFWj=`ee$luX>+DDv9^ zwc53oHEUT`0Xrm>le)Hv|wMBv+su_*L0LskQhF+s4#%}fdW{)PATwbTX=H#+~pH#)Az9miSquSTR#7f}%Vy2iU&dGr487v}Hd1>U8o z8zH*-kSV)p27{L<(XQhx6B02&5j@ByCxSm^&CFB~UZB4`gs9K1S>3Z~eu1^*5aKzg zf#dn$ECucC(#)CpC%qkh*icXZ&-%Ip&oH?&P)0~)#3`3a8w}BO7>UKJn_w*8vH*;H zKG?9R0|HhUGTlw1aW*{_XCGfO<&j3mKlQRId`ngDx=B=k`=Q_UMb1CrcIJlnE9Dmx z#&rb03*8yKvvv6PMM2;yU{99Jy*q(`>iy_xqW{@?BBcx9frz7=in&OX{!n*DT|Njx z(%RaDzfM74zl*k>v~RjB_tfIK3PQh@8I1qu%I%yRcE!v&fV?jH$GS!VBudiLK8OUt z%N&p6Q3sq`QyHTw?0l>+9Gh@(+>{Lf0oM zRwxc>C*jlTvlD9DifKC*?b%_e&lO*F68Ih`9HGJ=A?xuw2n}T6y%Pxuv z?nVOKmsaJhaBWiK>ZD!52L*Ru^$v7);r{eo>(^rKKHvJ|cKf|>6k>LX)}(AYyy(=rPEQiLzh2-PIioIpxx=^@iC?w!AjHQ%c5^YkJ_?( zz&c+;kVMY}yoOfl0@1l-Ou9a<(AO2hcUb?KrdSnLQ>z+Gtu{*yLr!<*h$C$nQhwuJ z6a=en5x9s#E|4F*>QwL5KIWEL^$YB+9USYt7gDvK+lxoZ{F4${Mb zAJb`4uFAk2D@F@D+tgl&sSia9MCU240-HcpF9`3c=s`~vJu>Xt))@*NoHEI&DiU+2?X~z;TPmSO7>sx4$ha zqs@oNy6#LJrN@fjN>7kYB0JDW>)|@Zp z3VII0j6uf1vo<6yO5MD@tyf{4Lu{&w0AC`aBIjy1+rE~ev7Nxz9$UaFJUSHw>Zh|t z!NDN}!g&fJ>2%gTM^{C+QQOko6=pGxB=@Ri(anzEOcg8;L0xvPUNppD6gUBG=-<-7 zTnFwmnyaneyHWn?MUQDr!s{WZ?64FbbX3rhstyc{9bp1gZ!9k%o;!@iS$bdfZtv5s z<+G%MzZz2VEjI=X1cf&pURVQfU%Nw`CxdK0tlSL+0rff23l5yv7ip7(FWzx0h}?!R zwFOGGJY=E(r(4(wNLVkk={e@1aE6PPRI%|H(UkcWjjzdO;@b@(uG`WE&a@gvuXe!=OR4vUXli(f&0$9QsF zQcv?MR7**4C3Q&SHBP5!V9vuPBp9zsk^wit7$sO=y(p_~a620P>S0+;gU71ot^yp8 zZq9vI(CT%7&(-_E$gk9L*2Wzl18T(r_4l^vs?^_&nX2hH?K!?qmyPP+srJ@?pH0_C z;{A9>$*d}NWr%W1dJnl@UZiM)NmNLRrqEm1+ArD9?Jxn0?5On)GuExYgZ(ztd0nn6N;AGV1U#QVX}I1vcjewy|s()*8)ZwXR)Yi;j4?!3Jfvq-XHCN zKEtYAP_jE3FY_}ehBDrPYE)pf=PXaq%Z5-AoH})yw6Ld?I8F*aZ9E6kVKRv4(SIF& zcpoA^zvj-4GWl;8;axZs1eq4g;!A4{^&(vWu-5k3(oAqvEd@+a%ZoK+LPtIg0}$Pz zpo0Yr*cuLQZHIy#24<+>%wJQeSlh96X$9r28_cyGpSniZ1Ctz%%d5Z|#sWC34pM?0)so(R zXuwj}@WJH`Uzj~L-DTXs`t|rid*CKVD6;{QqZ`^6n0#T&u=ms~U`Z}NekB!@x8ej> z;rBvm`78?SYa7aJH19M=-wku~6&(P!ncHf)`U-ON-7a5cy_|ey+L@xHd^bzRcPB~s zRtm0DP&t(F&W77Aty-PoZngAmHB+(yF)vTTrpdjb)Cr}FQg&4`FD~r`XBjjK?x8@N z1gtS|5=FYEf{XkVPR4mWJxy9d%rVv;)M|HpR1_$!=z3H3#DH;>?xfN8KAhu5*~J>- z6Y3vr9#cQXcj&6oP8kIA5*Wd39UI!GVu+23%V~SV21~D+hAVh)Tr(5bU1r}hSM#d* zwhApTTfw<+N8d(Uo10(}zggU&lH@Zoe~#cMgh(B3#9z&AHd!*C6{J0`pEiu|ugVjJ zRV4>AAq4yL@?ihd{>$CB(d+%c?nEE68CmXHTttNA@4mFrU~nzgvl43{4?|cvjSifF zmu>A$&?q+yP+D8L42#Wj%GA~?XsT0 zmw|J5Q>kHdAp}-yY2vyS6V=^3G2M+4(FMk1a~hz*Xce*}j+}PK3}it9I5{(lf|#k_ zt^%`Jl=wXCynH8pwj6*v=vacb!F-^~yI4%eb~911Dz(}zFHZ}gli9>gl5=t9vm3|U zdPsNmdrWsFZ6Ca(Ri05qd&NXqj<1JBZ=en9LAW&U`kfGkcWT&rC-CyO9)wFHAYImh z>qY!pm|?ysxnVfDQ(}aAFRH_{b7K^m%IpD! zZt1W;74QcgWRPKXiuZz_vZJ;Clj0uu{(VJs0N>FrW|WE|o1LOEa#@E;&^|{syq>_y zve|M`UEHKi1x4gELFl0Y87pjdLj`4Yzj_@tOV{cukrY@NqJZymd+qsR1!F0MQ0X}T zIPr-KhNa%?eo2oQGCN<0b=#-VPs4t^@ZDvqLptzokvcglV??qv-8_uro2nH?6IIc9 zWx0}gqAF^Gj6G_uR=YwNwMJE=057=m2&Q(E!74`R)U!A@*U)i*Pb-+nu+=MhpRjwU z;m16}T#SaTqyee5I7=?0T&(9Y8F^wL=5~lkMUmE-uZ#k>MgA>O!8H$E z?>Npt=heOsq+dVJG4nGGlAcy1fL5ya>SWW_hP$ejXW8RuSj|rYxb?2xSp_2VJw7;m zQ5TkV1?hmZ-_?c4F8dBF`Lr$+W>y9DasU}G>cY~L(1B&Vr~&J`vP6jT7PVK)*1bdn zXJ#}m7ZOb`Ke*qgUPeYQlp&7D<GQS$Lp=ax#u|72-+H6gNw_^O*bI<6-%t|szm zghF8(pa^p3cI1dz49=BkY}E*4sKMzi_84h0V6n=~CIZW`PGS-)=uF3sL}~Rd2%RQ@ z!!jt**VF%)sZG4sooLe;k95PhPxhcVHN{Pjv-&5s?vAlo3J)mE+dTq{PQ?t5!MUm| zdBXxD-0Se`>T;D|aO^95Tz04($}IL;uYqj`gb#AzbB{HQM2Ru*1Y8}B(#$%L2od8{ zSxzD^3UEfF1a_DiuUBUW#th6L7SVX@c;h!q!YtFYD_Y{9kdp=^s|K>SG0spXW;pu| zO+>sy{FNQRT)$Yy{VgYi_Jc$-4Nbh)XtWel1`s(TEMx7krDF}%6}8niVDU&Lq)%rM zbn3UIVEk3QvZWKKeqArAo#-vQ4$4Q0#X-8vmjGhYQz?Yy2AqOD3Il@c_H3lqsaQJ; zft+(M0K`A%uE3neqDW)*Bn8#kO$+-#2o0T%y_P%7BsDTWO-`ET0yzjxnctg|BARPc7!cosBlhT zv}l#x@Q@hacxA+t~!yM@Ke*laW?pjeuGUzQ3nsfCCWNM zp@S;Yq!aGa>oj$C3~_V;l+kN*UD=XE6<4mPD;(HtkTT*1I(%^;y`8%jt7)NHhB^#S zgUMc6uTKJksTMj=C43Zzs2IlJS5^D*k+03x)WhAAHW*!X>)Csb)qj)E!*t%F zA2|hcrvss%ea4^6*zkri4jtYVo+3-HdhgU%tJ^u$-BpZ(?5Z!*?4s4~rg=8PWZ={TK!He^614$3E)f0LUl;}ezt+zY}djG%Q}z2B~> z_ak8fE(5iW%~b(Pb&Nd!@xTskZ?{tiN-5+i(+ad^y2f27&eEKhqoFl32f?XiDphTa z7m}yg!;qJ&r)T5!Z|cUZqD$%bd~3CuT)b9`?28)Qg-UT{*YITcu=+|Nvp{!ML%n6S zd;iKUCVXbcAhL_$2Vr#QNHugElst=SQ6G+7Nz{y0?+YR(E}dh+L8JN8KL`|>tl#$_ znOy(j;qU(bx*Oe%KYxtByh-9=GQXcTHtDbEb8~b1ss0Ynw>G!89{nNu@-+~!1ScQS z{D0{DZ*NBv;NW^&&z?Tt+IqVAc)PpVc-R;JDSxOL>4R!yeY3mO-Q0kz-%4GwF&L*C zbdpcvX$%^(+2xJ)|I?>W!~1`0^U3zs_8+#MY(3h1`uN!s@qKga+4j@TKSY}k`~Tkm z1Sdy=?K_%Z!3)y2NDIZd0LZXQUTBFg5JwU9ZVfYj3xCgY`7J+NF4A#BnZAtE6Dcc$ zmX2O5F43=)p95Rn!JFES(z!JnO1*?N&X%XAqRJ>9B>l6c0+~u+`jEH|v_>5a zu+g6SZ6H8VOm-DzX4& z_fun$d-y5&vQR1rEcbf%@YCMA7kjVjcOzWI?Z7ui>p%3O&FYKLH+Srs>E+#U^;_+Hiq`jg*0TP zeJ{qg*Prq}8Jah*UxhWL@C=F606WRtmLge_RsQu5(Q1+G7#;RzWC{6!KfHhcSGE6O zgl}lwzZUDhp?dP}!>hfw{f`G9Uhlu%J5)NL<|JRAh;5p!Pts{>-3-}xG0ndw)9gIn zQ03o=@^9a*zg>U2e!kr_ZwTVSS+aUHOxiYqaJhdpFFJ-vKA5Mt!vWmu7ubxJwI53NG`65|>j`g=?3?cnGk$^LfFU1pr zT!co7nIo3%az2Bok2C>5o9PaMpvHs2avl#ZS3%&(X|jGotrTMK#qRF@1{9R@PBjX# zR$X3WYx{W_wtn2xYYkw~o-4xA8?iXo8MOwAfI5j4I9#GuXjSBlLG)b>J5*%_^mr9C zex5w7iys8SX6{E^fY z(+w=EMt{DA_12SVG0s+~sB!nK1a7W8U;oL!((%XDxV z#k1Ks9nk&8Dl3bywVa=1H$oRwycxa%H4W1Bq!MFL-$2<(JcjVg!y=5q+2K;F(<~d_ zgjQibl$7{Rz3llEZb8*-I=+Mhno}P&txa~my*_!g+D4`-D{N%1!U`Mtd^>vg$UyBx zoU7?Lg+ye96z`o;Mx(hS9l8)Kab&BKJ#$E>5MOv@%u%h?kmuDg4hkWZRwD36sBbP$ zzyi~?y5tYnC;s&z?RX!hOXg{t_7=b*g3(PG*o% zb`0Ux@g`J|tPMHeOu_`jtyEV1p%a_;37FI8vm_ylrwZCCAuWUy&AO>h zG|UD|z;?VUJC6DAYO+h(*bUAV#Hf=_uY_YNtV~I_1?n?N9+kmKecYFFSuaI9dTsLw z#uCiwoU>lY^rU$a-apKeDQ5zfPG)%r4d7i>;Yz}`I8w%wvKhz|2>K{CnAlv?97wHL zof|+y%=dv0+wNPo7lCrHuSwty|3ioU7rTP|DL;%9QcmFQF9v^*`F z6;QG$>8-=&fLb%P(!^K_8;3Ay+0x|N?qsSgLDI1he+T10*0D337rgWi9e03)eX@jIvCs?vM-JdlDgVi8!Eu6c9xp4@KB} zdk4xQF}Nyb(?lBbEA?>3MaxlkBOX%VOe<`w^+c|M`xab^I_PSmi>YTVe95xq2)o(- z6d3ciXVu<(W0bko#zeN4<&Imh*mYHsl~r%gNTa$Emmo`w<#z|N5KAgIUVS(`DK_5> zldAR&P#LOg;quAWcEOonfl8u)7A|XO%uVv=pm$(f>!PNZoHFDd%CZ}7 zEoPY}#mHT)?(He1YA@QwR$Pvx23PB`PDQk%3dhrIB8ORqUMeP5pizDurnQ3Msb8-i zTFVaY_t1ok%MQ9buw-0Ec*eD=fUJVb(?wFv>nzTa=~%G08@hBM^2*EBVsgqZ#D4s= z=W{uw*E+YuO%+FD4Y&NhxwPmVsKW$iZo|vm4o{W5j5YcS5x>0b9WXI~tJ`rgb+P5& z7)VD^8ZtO%h5c8x0s~8Wacew(l8@}{>J*P!|FhA>XB&pu8^(!T<_or~Yg>*P(pCS> z>;9V$@AgbpP;?&8Q@m?H=R4_`(d42{2zHU>U9RD|WXTKUtZ+5U$;vkugq5j&&8eL|siFYuqWV4|(HG5y;$!whLL^V{> zwu`D5w``Q^(r01ifWwx7SGcq)+$&MbL?llJW%JW(@j=cjG7xH>7#DQt2Bz-%S{U?-G94R+e(es0!_?H*^sZA;;&hgK zMR+@$63L+3$Lv@dL#n}lBtuz$)KuLz0C(rKbXsERba2_UuPw)Vy<0aNm}Vd`8O=z# zf#ogg5u`*$$3W2m?S98C;SVnlaoLf4woHi6?* zgtvf^4mBOtPP$7+xA%Wx&pes(d_=3rLuUVI0C&@4NF7bP{{O!y~Z<8 zbv%TpSIGiGt8%nPAOb*(f$x>pxOBq;90SDCGbStVE4#iP_(HcnCzlu5e5kl2Iu6At zk<2+Exl!WC4KK`Or{fq&!2@NNrRp5F&3KzpK z-~*K-@P+kUp&puE80mPyHdaW8wS%}K$rWT7&qCar8vhmB!iQ#@bC@N$HYj5#FXQom zl5x@zV`UZeg8EUERVV`ijF%HX=ivxw%%as6AK?szFmEFaBM1w64uJq#&F`AQzNrQ%G4@T80?N7>3>G>PFN8(HeuUZ$EBBY;`w4R*PfEbfS;Re3IrMlDk1= zLTzpOIMq}?9ObO3)0~W&y711GvB;eh2@a&Pyl%zm+)PzOMJY4bnwT0lUM~r9eXW$R zP~jA|RTQJl9E##DA6;p z>yc8Er7*G1dp)z#%dVIznWu({i=9SM7)wW5Qw%3}ZII33jUmAYI2cVxWcZ$9B@*G+ z3uNY22nb9ed?r zHth=IksIJxa8Kz3lhx=0hAPS5$19yjl*3!D50$vP^qAsK8qbc4Pbbd1BOe@>98Bta z;pwDhDf@KN&s{d#RLg4xnEAYed%`%yua#DMcR2J8;#|m5jxOm0&9UZfUzV*@ZS_o@ zmZGdup<)0teVac~Sa`V3VS;)z5fbeC0SV5#;|h!ABEj!mwm zfMHV5Cf)r(*Ny6+TS9kY`?(Xl(d^Qr=Z_635CGGKP;CIPm3@wrM`3O+QC9zS3;LVw zSmJ`#k+ZiTWrPdETx!SiKw>qo%eB$9h3m^9meI80O-&S<;H5TCh;K4Zx8FbIH8gqj##3a2ziT-eU60cXPejIf8h~GHpiS& zpiCB$OITk10RfQLR4DX@?#5JC>}xG#wlP>Kxmeh)o7!HWZ2~G9MKn-BHI`Ept|$fw zTD{A91=837Wpf%jLKzb$7Qy~KL@rQAO7zq9k}VkBP4`F5pQcj5%u|2G->$pSt`w!k zQ_zf%e$^4sJ|~w#Ua(XqatM@^)-Hd<;Ex-%g*hiE3np?8T$ok~s-^gQrZcj|;QmkFqeFQB9Se$cja%mXjY+xsjp}BW9kej;8 ze%%_W9S>bLv|HERZntTT{^h~`r~Q|^Z^brWLJ(xMeYiAmC-$T0x^FmPz#uKhn7s>H zZM@H(rx4p-%G8p~X&wwQikzT@;Fj)e&k%5Qhk7=PU=8L|xKY%|i~gy6OlB3c@orzR z4C$iZkK!@JG9PA1K3!XAKstES&8KD`F0A8m`j3ja0Yz6!qNz)$J!*odd-J&6HixT_ zd&`~?p*OpvK!Z+8#m63B+G|K}LfqxH*1}!pR_3{z>n;Kc7)vKyU!QT8iuF}=4>wS< zudHmi<202O>WTU)3WJ>wX%Pww8;Wul)$OVjmKFIpZg+C()E&0*hCm~9HgS(coA)CN zI%WYi&dhlDMP@7a!X&aqpE?lDJ*;np&9M5eEsMeeWNGuOD+4;XRHm4AHks(6Jr)(d zNX5}|l1SZ1O8M~Mh#toCp>Y~%Y3kw(a_!*(J%y@da zRzhWlL69996jU7QA_8B;->v9S7%3|Pw8?F@CL+IubI=nhN1=8C(;ZX6WooOeo!hl@Y6v9J{Ddj#9YskGd-XBF5Ac2EeiiJ9Y6%cC`LN*~2=8M|58`{fkIVOFU zKrN^SXqd=%4w6JGMP$1kNrOps4>prUR0hkg5=EBCrGdQS`FuAHU;P7`l(Cd5y^zc; zCo{ZD4m$x6%-25XQtjV|@U7GDl-SxATRYkIw*l$A%=G?2BLDgld3|YHGv}%#{P@~( zxy;_eTa!|^@Q*-AL3b!D2Z@bpLv&e6`v8$k{G$(LE^%rIs42*2NTAfA=^h1^u0UPy zerM*+`E*s9&S2nnc3Y^b!tg> zr4;#gh~{HWVmE4;CtMpeAC`<;A+MQcWk_qKECj32yfrC5LQ0R1m5+{sV?%$tsi_!P z(BQ6h3C^KoqczlUJqetI>1qt;d~sAnD=DYo*@tZO+SWeedruaIJX@4+PWrTN5-) z?khddB$%DwCmFuE-R>Qj!2LT|_WrI+c&kr>N<(DB6ziL)cZYvw1|Bo96-K^z&DRzA zT{mj@$H?oI8#v0$3sMK&iN{O<@SC~L_&Jd7+>A^07ur)eD!YhLb${q#s;hsl|4fn3 z_kB3h+@rJ1@6yHP!FT08PPqR>ZzV2akydkBP75 zG10kumx-cua1SmMcjYo+bGi5LJK=DzD>+ewGjiHq#=kdfr+-lf+gCUhu)%k&c~XO^ zuYxLK&?|LO1r=}WQE`t3QKmJ7!c?g(R2|O@D_d=qbA}Oa$E|zceh1X_*K{3_gXL7o zEp#1-ep@E%W*aAG9;B*Sb^i)P9rm70(eKy*Abi(yb1C&+ zO#&V$dx?bhaQpvo`~Po$pMUd9kh`=5_`}2BKjE*t(cSp-$N0;eL`Wy|`)Om7{)#>~ zH@Bba@9h`xLc1T4W#PBi}?I{(|-(L|88q__3#>C>m%+kbreth@QJ7yo1a zFbGl~q9CnrcDK5l8~J>&p&}z~494k(;Av;eMR#_2oBb~qSHb?@+I;rt=^wV9Y;8Z< z+I;lr=`*qapKdbGEH})zW(d*!~2M4J331seOHd$Af$@_@>VQ| z=;hn}E=Up$R9p4?qov@_l72r*CwLtePlZrHd1o69e)elFL=gU-U*=R5;tdSOaSoB0 zWZfJjnGOVWS8u5>FcRbRL>7d+J@jI6IRkY(Ki{2RcB1`-kTXue5bIq$gSVaN5aQHN z2MPCjE{5A3h#qE;DTu1Rq@PBC1zP0s>re2*dPG909!k0)n|u<8cpIHp(BIoW_I8KHqEi$OuEgjL)-l82$L-FS_+K&zDKwkPY`F zZdw7@ogm=0%%`=;`Y`!`ycT8**?~Di$y$8wNbv<S<}~oVWF-rqI1)jD~w?IwP`i-TV0WE!3K%!0<1;9axVK=Qj?cwKzsLj$3Y#Z zedt8zAePa%*o^SHxPcZqYA=>E*bRBIXa~~ogc{PV95-Bi^3=}e>2!e=TiHCup_hRmH6NiRi!xEj6?r)y>jdkg zG#L-`pq54T@Rzd$gSCK5Gi)ceq=nbX&@j}JV~{!)2#?Z&H0_kMj^W`ZFtRGpATfhN zG!gIP(cxV9RxM;8-_EHiXnPu+(_>8fQ_l8 zT0BbDVWHtz!_-=QPQ(Gg)MOeFQhj?{vty!$HymT*G7;<=%N_pf0y)WB45Y&m2m5h8 zNK>9%NdxB~J&DlT08S zH=Z*2^*7N0_Cf}1;D()zB=gY5r-VL zT*-roJ7cWHpA9tZaNl&N^a#;(Ml9I=AR8|y(^m74GyjUYyA*AMuZZ|@kq#GUy{Db% zSHX6sqsv}%kU(~2c_r_|fdfWE8C=Fw@oSp(FXr*gAU}Z)08(_#iJ(Q(1>^xzP0!QQ zvqiJ0KB(>IvxSCmaVEBs>?mxvIi>BA3&9GiA!>r6FA=N||AH;AcR5a3pVsxLc?AXW z(N6c#=(=f{LwK`7Gqd+)F^>lef2mkG7OhHL1~s|JN6f+hp~;(5=4vLhv_Qo^0PmTj zkR+4g_RvgmuxR=w?KT9llx-jw7To|dTK?l7m(^fiSAaQ3d(CP%KUa1qN6lmvoPz_m zM|xe&vx}B4j$A?)BM<1c(2+~0ENhqzu^fs|#9LV1rOwEY=Pmv;DKb6@(qbyxbck&s z-6`5;?$7Kb+F`T(cup4Z-jBZekFihH|x&}8YZ;hLre-Y%$kv@f%@jY z0Q4m~L_me8dY3Hbm$QubjVmXV?NB8*zOqhYOfc#-2pRt2w2ww zs=)Th;}`;@pGxy&wauljKyGtqL7kyWoDCMcpsSd`%4#)VNZx z{{JP;@lxu-F2Tve*8AT|=ReNSBT4?uif;haqYkORs33aXWZ761BcJC!>BAS7tKSj4 z!h43d$oohyA0A#8HBGs3FCb8Z3O$Te@C$@@o`K^ThE6kF;?Pd(mk$SjIs6C_ieK#? z^xyBk+XLRItF}`UU>qh3aG}T}ZQpgc_vzFA`yUVcV5X$%Q~R|LTPT+6UFlFzF6Y5aKJ*t0W>hQC8!G>tTpL z@4fu=;oz|U=KbL>8YltCyJyq zfPkWl3_B_4NQ5SD05S~Du!5vJ1h$OEh__u!|2J69=MeFuXEbQyhIB}*x?Ikv6XS%5 zAB+%-2pS_2k#?EQSv~pBlqtD=4dML_SS0r9i8mN@2g#2!uptklLzF4bkqD z9q$MyE6$^fWK2O`SMF(zUOGK&xYu`$#-H_-F8WOIi8m46*Z?pot3`1B0J-7% z`EmxwgE)j_2_a2Nc`w?3Wrl1KgUtphm<|%b7qTP=4U*8rv_Sx>{(LzGI3JVwx@wr; zQAn1vF1L0S0`anjr6U6HVEM<4N88H9n8Yj5FBKuQ_Q} z@-L+g8c^yq3PB%A8eLGMi;6}KD0N2~9wWS{8eOd>PP1SG;&B;x@wyK1f25~D=MCZ&xDAcI=1h1C0w?hT?%jldAqzZ_%*&h06TuGCk(bPEwt$qgSfH;lV1f}?*rZ52Um`%E zmL4*4Md=h4&6N#4QEy#92Eju1)A2qd7++)QJ!RX6C>`XCcM8iIsAUx)wU z`e6tX(zt0tSe!wCoop-+fN6NZ+rGQGg&QE93eF#H`Y`^^BBOj*s6a45KrR-CAqFZZ zgS4PHK{3)X-G)j;ekM^0qZ`RMDXk&l#f;t5G2$Gar*X8tE|9W5TCmL$OtARTAF1#w z&P@NrD8!penmE4s=Y+UC2eN4;II;vKzeb z#a4mSeEkAc0aTH-GSC^FXpz^?B$DQNpF~u=kpF5rl1ti8`I1Z~bwbPtA0)oJ91N zO?`Ou;&6%Z*p{iU6jVBeL1^d`#mNm?qS+>#oMV%){6eL%9Xg#+@&GOmPpTa;k(=U- z7-jCOWdw)tx|#RK`-#8-qBXHkTyv^KpkFuy-Q3ARPB>j-6HqMz#+VjN}HT)7~|2x zqz)Dwflm-3F4guVH8nA3@;jbM5&ilePjKSWh`SW;#ZmS>omk)hjM}U3&4@@^K?%bc zy6AJ}=rPaGES)95;WK-IRXsFr*F+<>9BuI!xMdUYIG;Hu0l5c z2@3=FL7=y-UXBHKk-$Rr@f$A}8DST{2}*D_Jb|E>@Kt{s&Bvo!ULQMMo(W8aC3FLEX99p^zJ)9&PC%%J2vLx72e7vZ zA?=IUTmASVo|Ev1PW=6OCI|#rSQ)Q7kz9LyvBHL&Wmp3rh~^<6l0KoI@)7-tmzOLN z5m&}Mh-hPdEO>WV)M~QSo>Ad5N>+i*GTat>5|9n`9#E%wtoy(muympPB^#aY>+jxb*pw z2=2wjx)-dnIPVtyz7TuHBggD9n9LT+b>$rs+^U+t%kHc#0P2!SLQoHCgHUZwKEst0 zU3up0jfnTh{p_>rmqmeyK7#9F4yPS?XhArlF9YYUHBQA08)hGYn1f9XNMLlDqLgk# zhor#BE!orFmDTa@f{~#Lty^?) zh6$z}s5;iol?{yP-`630XOi|)dmpQD}FRWhG zui**BX)5UzAF+r3`>i-gxl>5HiaMdjir7)k;b?j#+hyOuIoUrz;k<|_gQ?y;!TftN z8825;HW27?E{-OkPSaDXV=%4SR3b%fq3EL#m&)$58;jLF5hxFVw9Q$88m~x^Yi`sc z$~*v!!A{jRv?0*`D{3<)V;*n&t9xm|;h=FJ2Epp)%h4$P(j8|P z$=u*-OdF$qIuu(_U<=)bU@Hd9XTsjh`ZAnFSH8LdXcWoIxGwKRN5@|LHr?%XYJ&5p zp2(Zg>0rDDgfVGMMJRCPt_oafU{hL^ zAy6`#!{*bGke75PM;t&MFZD3eCZ2#nZs6*>?{MgrcznL2D<7qSqBi zwiHyW4sNt(lWFWt%3J+-NQZu5d>n``csg79h53b3+|;D_b}%V@d?ILljUV$xZ88aDcWO(E34}_&U>c)KmpA;6)J>Bet+y1>OW}gNPlk z9S1RB`w=uIa;C+w=?NGLfjy5|5`Kq@Da#(KuzDJ(q7@=$75%g@m@r4EDdWon6fvdr`8Nb zigxy-r>-oYTfW>X%6Mh>$GwFMc86}f+$!>UpbFg$x)tbYdFSId5f(WTbf zB(^TGw2D-ShO5fAWk^pRR z5pdzs?5JR9z2AngC)RGVc!_+~wz;w~uHdr@Im{o&Kcne{jG=oBy z+2e%xbBG0Pu?Ot?;*X&qnF{eYjbasV1zzSg@#36lh$g^5Yh*;p=i4$Eu;z=biwEy` z#iNuR;z*`Zhoj9-GjbGRN0COB_4~@jAT*UMbfoGYH+;LsLt^fuHL;M_00Hqhjfsn1 z;~U1cP)ZrqP>;z*P-xcOQ6`;bz{J?D43p4~u5}%TALbNAbLOR}9Sg>d=%B=^?6i9k z&!+-TtI#A=2-D{RLFPOgeg9TStl!R~mQoX}sj-lBClOkdVNeapUao~@LvtK1?Jp=g z_bZlDh`F}c7J)hTz)PNcwFbDB-Gl^n9Ncl0H>75-G&8$ezf?WDX9jQVHD&QFtF>lU zwHvg6`fV!yVzu^UHM=bZ%x_2WNUOD@u5+z+6tM98f>nEvmV)hrOGSZHgz2X_6JWy~ z?yDhBfLrc(a?~uwHi{S3@D_BTi@>l-0FstKYQR&46;xgQIB;+%F02~1E-Yai)x-0n zj46YYWF*S7XG+OH)zc-57Ok&h0SSY)QRT9UG|$%J#`NK$VHTgPrKcJi$&!VPdC0+6 zj6hp@+_^H{S2*p|EY>{iI?);o5k7&f=;)KLL+2ol^3IOILgBXRe*6>Ng# z?=sruDm*BN8h*Vu;-ye;jLRopG$tTbG*0+tjgv{2In+BNV8!~MFa=Xeq4KE@>5I&D zpuqSWgLO#t;Zp?@I)e!B2QY^V?gepPc>92k$<)j=D;;A)Xvr04jN> za_AQCgupG{sTj6Wb*<8dtmL<)z?HTc(2cFsyk9@ilRzV-odUhuO1%&j0?h)L0Ie@l z!j-pJR86*92=;h2^Q}^%W+Vt4`+Bu+^QiT0-WDV+4-lj}gvd8k@XLtT9VdIRaV51o zn~g7{E(9Er(SJm*z}^^~lV!vG?lL{`(&(+22m|L56738YkpEH`u?qp!SR1mw(xN z|0-BtGVv|Nux~JX-Y$Ul@xzC={k`|QFW&A6kZh5x0fw#03t%07I@tZR_v7Do=pbM> zY<)Ug=GFx;UW)#_`tZy9o#+5|B3T?OwyYR}ok`I^j-BE%T*pVLD;9<8DZKghBwGw| zb(5&Im@ku!k)2M#0u`JG1+B(3C`ff?!SV?3AXLwRSKl(=liN_*1E~0V@lZzL5pVVO zf}&z$q0+-H(`vid2(oPXoqh|eG*(e7_x)`M$KxLz9J4 z;-5E-FiwV#w}5UBg6m^OO5DKWFlhi_K%c*I(F%dufjHvU;(#*2l~H~zm7hm3iJBxb+OJb?qCZ@lo##+-B!?YPbt0v(v};dAJ1@=`ei)8jY_!6%t*AQ=uC+j%wu?>Yvn}wpz`R&Gy>!<3#Cb z)K`MnKBOH8L44a~j3EbZJN$744`?q$Nu&HWr}TKf^Xr^duW~9-dtaxgi(+TBy6uN1 zPM=l�Aeyw`Ta{bW4?-ouwCqcXNPxqYYD);+}*s=Q15ZIj{hC#Vm(aq!?>O7dUr} zuWEiHSTuyL5Vzq8yanj0JLOd}J zB^I31PkV=-$SLTfI9J|&a_FR4f92Wqf|;pJW`bRLCVjBwQ|XnzQ}JHw&g-g+WkWIOZdu=R0cpz88G2g0~dDABJ@|pix0eNyZKrE><2QD-L42A^64xIrjP*~*m-OUU4Wf#2eYFT^s+jXh!;YH zg07sUx$a(J&nzUU_$E54$e(W%|K%l=adN5Lv_n=P7_T=n08R~$Z9P}o{GfI(pi0y6 z!UFP0Zu8%DltR#sAcj3k&D=hcJg7dP2R9m8Kqs@j%g2>r8e!*3hdT6UVhmssj)$iU_Qj zzvTe(16unu98S*Kv8zT9vV#}ZsqNYo&`7$cV$`Qo2#hcGvfzXcWOn;JE2G*S(mF;{$Uehpl;=-x!InG!X?h;P2CBxC#z= zqww{2f~h$zdGq%!FtVA0{4)r8K-Wx-qPGN~uh0c%V?^krR6e>zA74=N+- z`04zb4o}%U4sB?skc_8;i~Hmf6ZD}_X){kAIW*Gp8D)+9d@i|To-mZ9(2L1~WoyUd zk-WCx&mA&o(7hmHF-TR?7eamru^v7{=!~Oja$zqc{26m6IsH9(^(q}EMFL; zx=6UFoB#D8|Mx@wZ<+u5*`w`0Za&|9$p8J2|68gD1Un^2O_JX#19*A<@2%}ekDfsO z@9if~A3uHk4D)|KdGe6|`)lR@{&+d$lqE3Pmn5R(x6A-OQ3<${>2e}V?uqX!rRDBK zpO&+6!cEU)Pq`Z8e|<*+)oX~&&w0c9sGg1&n6#S|YloO9sHG~jm1=F5lA>@Dr(v28 zKxW05&Kyv%t8`-E=po~}V}q@3S4fMHQ9|sN)5TeDi_YWEsZQ`yT5ujuPX$OE= z=Jj$+3BF|DJPN>=O&4*B7iXv&BBV*N_|FdVTTL7tcO=MaaoMDgC5hica|sV6$a^`@ zcC3hv$+U%4+mNSuJ0w5o3l8pDG+9EL7U0*yc#Rg&qM^YKm69eNwt8Kbi~baCkv;iP z{jS$J%+{#7_(Lz+atsAEMBQ8!XjRF8t7|tsc9KS>wh?DH!b_X*ZjuR*;_+CJk|c)J zGFpx~wG$?s!6auLl^F#*kkE#Y<)#L6x(m#_SYs3Ecp7@4V;oeSY^rj>Qx+b>MaN3! zMCorBJCMB|MClzT%%kLqbn!%`=_ok9P+p z6w`ZG2luVmxH95vt-{;~4slEedhwpqS$U{;vYJW2>eM!qSxF#GV2cin7tJb21-lFP z9vZoEs^XGOM)#d!0{UvZO~syo^=j=l!zD|vsKHneVr`7rMPR-c7#FKriL@#8Dm)oO z6a!xqLD1CtGcooF+-H$;0!yeDAw19Nlt8yX1Nlm2@^bg>Tg;0H$!6%${`)t32m7B6 z;c+@WOXlf9zIpfY!vViQrcpeX9df5Je4ttB%ubTUMFI(5G|!mJOM}sJit2yLf&(lH zb_Co6+j?`*-r{Vd6${@X+wUQLuD}g%PsM$qianmgCBSXK5(pctIe4^Q+3}{agjvPc z*~ms=3(%vmlha*Do7-dT5ZAiOAYRL}%w$qa5Oq_&IMfUyt|gV9Ii@)lC7n?rf5xeM zj8UB?-e}$ehc=~)J<5S$p)eZ^mUFT0nCXXRag2ubaXHfAyipdyR7P$eci~wgF<{OwIrI*1+QyzhS&bGrpDxL{D{l@-NG1?i z0@w&89xCxc&tiC4E~n`z#dPY%N}@ao$rlhRBA+eq-q)4EH z2(4s={qx3PhRwhM9#`Bk-Wp^0V4X#|?p-phd$4qg^=m&|V?5mdJ?Q`I-~T<|ezx_; z$B!TG{~qrD^!=YSM7zZeVEO&u_9NK;TTiwgZ9RIlxwQrR|Ec)*aR2v}?*B}CNZY&; zY-Yt_?An)uHEgcUVcACBFxCxO2I83w8Kq=4DKvrp)Q zj6EjgfguDS-RcJII)#ud&9m}EFtXcSqjwt=nC!wfu5ULUbvb02*~NBa)hp0(c6ti& zwD@P1^ChMbQpV|ttoc!0lonozNMh z=I5zc`>n%toC=)19%mODFEdbXZ2VU`joU$pFLn?203^u5(N70&ZJ5xIfh)d{m`iTj z2l?^f!_WJ#_6{t3$!bC{rmhL8_o5@_75dhReifbb0h#XdKBfu+r&NFF;s0_F&ER?+4PdK!$dJ3ZdabC|I~= z+7(>6;hMn(s_Tj(e!YUlQx*L%w0Z)ek1R~w-TDbc<*{0-xq`s22l6lYNi~{p_do54 zS49)THD!Oy(!oh6D6dwa?1`qks*^t^Q?W6|p-$>hc&~Op?SiWlzGsO2xA)?wAB|~8Uq9@H6|PZr26a^4mSdf+VI^mNQl1Y~VPmx|D>7P{21t&iw108p2MY>)#V)-5Na{_wMyO}gYK^BPhc(L!n% zSD~a8oJ&CQe31j+)M|dyH2gDM!*g!sjerUi?+bEr11|_L|s#NL*r>0|mQ^nCy2{q#Nwq3^J|b&5;1smgU4-@xRp-SYXKu4K_NdJ zIHTZ56h5|$1}lA_tndp?WOMD7ia8o`%+q(yY5O2Txwmj-MHj&Apn8lL2guP zL9OmtHWAO!DNiW?nwHPG-os|3;YRwp|;<# znY&J+)+8CG%Zbw?cn^(>?s!2kY;qw>hUmnYwz^9tWR-T|AgE$(@}cMk*Xu;O2@eG| zb_DSq(n+@v4f+W2uwvC-0X0^=d&G~^P6=ZW3inV=Lu<#@@ZxIZ#ep>nxZb+Ns&NNV zILf)`swf;LXJl^nGVIbMGZax_Q=F_xJ}+k#I|41nVZm`XtZg9kdKef(MTR7zut*CQC@&aijhbo) zgWxODa>LE1yW(d^afqhV4Sp*Juh+E^r=?+rrGaIR55DUZ2Hcu_5NS4rB1tJKLNe9@d#DZM(CTs!oG_TJ$JF94V;==C$Fzc zsT==CZV1_;m@f5I^w89e(BuRxH_bxdM2CwQ45Np}gpylq&VFOygffXK+M>Mh#jbF7 zn>*2{c}3V(Q){G2Ys%611@WfBC`n0QtzvXRVU&^glXfw6m1&gZtj%`8Py6roKK%5F z%U#PsN zs`p%@41z4hPds|7Vk;84g^R8QDfN;sF_&feN!ra(h6!26|ve=m8#Eyc+|~ zFSF&GNt+3Vzgvguk6?ofv4D*047AI&4jwS8kCkM{?23S&bJJpu5C5@ z&GymevAMap6!Wj#z8#qhpB+&_=*p1>V881MB9Kgnt(NLxyKR#X8AlB}s~hw?k8>yC zjl?ua^5jgFt)#XvgD{;0mmX9oeC-BQ^-4h!vz}4PD$#8E%y;we(`NVAES;j#JAW{o zez5<3u>bxKwEuqo=<%axj~?v5AMC%C{Wq|Hw~PK%W&geTXzR%{wEuqe`0@6W%_k85 zY5UQG{rA_h|JFQaqAsA%Qo$0>2WOY{13wu~^$3lE1*ki1?d6GPY*ZMyAmFWyF3^I* zRux1Y_S1&0VIDw`f*}R&u$34oFm9Bu09+MS&;rtjTMeP{QT=wnUI0_T&M9yPe2=BF zFdf^2fOzcj=?c(6dnh1oK&)3Z3&&vLIf3B@`2KDoM2(Z>A~D#M9w z7#U$ggQoGfR(YH__9NgXJd~kla8#|7?gY{Sz|#CB1o%c|??e!JUHudw;sth942sn@ zYrid%`Q@7UqJvn%uSt3eGT2(%(7`NVkUK?Qr>J8-RrYhurWu`{1Jhff=sP}kl+Mk< zfUjZ>4Yj0{>N;F<+J(q7F%T<%IYSEu)phQplmDHcLJ%7IX20ekfSgR%b>e7$xX^ws;NK1FOL1`{gy6bV8F_3L1fpJTP1j@}DpN zJ$mfRf7=i8-&c|Uo@@EtxTClCAoLI3wK|6fu6_w3R0?WYg=zX$psrvFkn zT~Qxcq5s=@^lY2-e}egV@_73Xo7=Yh ziq&N8V7O3G#|$_3ax5kqoZ$J38)FX2A5bv%k6DiTCJN4u61|Rx3*}zMS^n`g_@vuueO@ftSxSvpH7+5@Js;Yd6>JCEWJiOuUov1SVzr@$A$Gl=L= zz~pu72l2u(DnGa8lGGzkHguD8M&jddANTq%-|W5o%l`Wx8;x{iKcOoYhBi$WU~c>c zyuznnLTST^aaA!iZ2lDtSTUE8!Nr!`9GfwFF$6Eh*)iggw>CLAZr7*Paf>Zwxk_E*inX$QQbY<`bvj zY8lc+M}FpP4m?|AjBBNblvk5hPNB`JBn?JsBh}Al;L6h(aDCvqN<|AsrlQT8Bq%C^ zxl*6)3iW&J;Qr1MEO|}!dfq&%ys++QLlbvn1h})A(SE@+2Ha`2tQT1yj2iTU4tW&G zWT@!lumEEc!`mB9mj62Z@E!t>q5TI^+)&2cD{DZs=$bpxRTDe4<6^55HPsmGx9ahA z#|9}-;sKQN5dvqS&;WK_m8TbRhG|+t(bo}?1`)TD$>Ixg#xl@opM@dxK{i=CXSPCB z4|;LL7(G6s@gH{rrPXNNDjGLVJ&8ZA04OeK!NR6pJP5rq8}c}xnVIC&&l}?#)H?Jt zI|1VcBjd4tB^LA~c4ER@`KuMv)>%tRJaHDJw<5!puEV*MtEXLIWW~7;)vI0rMy=Yb zz^_xZ6nr+w*UKqCb1_-oTMo8qC>=qAm(24G{7lO1>nIffX`VQ!qYSp+iD+qd*v1jw z8hi_d_UD@Lqp&7t#?e)clyAEyc{UbZ(mC9WL@LZJ3l5!Fb0hcmyhz1~^-KoQwtdpc z2#jn1{4R=5n4RaA9VSa<&9qZvx>@YxBkm&ZtwyK^Ru+!0eK6(ZQ0=*3+UE*}TjVnX zzB|Vb;6T2H@6Kz56j??+a0*g>O)i8#x)Kd-^Kc3qxrO3j9X074CUG#zx;6Xp;mm>GVJ6Xv{Dlfq^6tT_d7VtTlqyBaN2iDvM)-3izm zB3ufX#go#mB7pYgigIwQbH`Fb>>z99Pt)`_aBCexT_O0+(U}%czGzMFV%+OM<=49P zXeCoRD}X^=YRPpa(UdT5L^rExCd#xM;!uial6scW%|KF8&eK?)WYlc;qCdb(vV-_S zHi9$8Nt{e-i_1nv?i4#M8!RVsGejKE6YaMt?C#h^GG{|Npb($*Gisr_X1K(fD*k8N zq3x+b%Wq5PYou{i7StLyY?RAh^cn-!b0^;uq`1F@pq~7;OhLCqzS^#=4o0s?In&BS zis{fzJ=$ysK_O}@fZ@)n2M*_^0vv9(df@crR=`vR1AvYBIE(W$b1GECAfAag;(bIn zqGE9`i8AfK3ekK3)#BN(+DLxm7CI4TzuRFB9~*?2MOKS@zb#`7R9(?3V zTL`@4Q08_plP!iwatFYD2ju{Q65oHPJWr}vn$%@zepl=e=@Nh-C?x^>DC3tDwFz=e z@~!5X{aN3a|DmsD^U<9VGT`Uv z8%@I4FvA@i`vT4+mtl1qsRbPiwW5~5gk}Hst@EgNbxXq7GfBwZ9i#55>Z((x&a2MD ziMmo!L(38RtVflmoK`?wlB~ZKl8o$^pMbMqQ&1&S5q8=*WM?%}3&-lQ?+^Bb5DVpJ zA3c)!EUq9Q$7x1Io+s8lM^J)pdsu7<$@XglcD_9;gl2}sI<+Owb{Nl$4ugQ;X#mDc zlY+VPvMU$Dl7F^?maeZNO3Uv+k@AB8?eIl^vFBn78dgpner2Ty=OLR8i#Eqme?x9| zC$|r0e*al)LUC21f>Em7e)O{QsNHURNu?n@qZ%}czKgbGk{15X#tG*BdDL{CZ_)Ek zARAD4!kc2kwqDZ{h#;C6q)Qu7)n7&sBGsn^fdrB;sgZ9xH-MMoA2q6BnZIo|mLcM5 z@Bj_>#RjC^zPO-ic7>Qgv|3HGg3`)ADk-b{YdQMZM>-;wM(op8hgt>^jMNRIA6#kX zDoUjU%8IKHMC-^;tqQynu`zCj%`4hPXZ*dCm!&egwD}+)0faG6Vp`ZP;yI7n){&qO zsVeu*mEL)g==^~023lGn_XootAzUhckPF^=37!d*?w-6MkS;@DbPff}1Thm01o0Dy zV*8XT%SN?bT6~NXGO;`*GrcK1!J29gxg~B;8rF-!S;jS90W>jE25~S!<$!;@y6_2A z_9z{W;+KzKgC&&^@~ziN;owvc9{)zihjPs|Bb;vQYO-IWMnh~}K195oK7{@{Oh)K1lH#9m{ZXXOV4L6U6smj#N ztvG6!(Y9=P%pspCi9;apDD9i(F7v#R#s*+NhW`r94gdsy+K>RSG9(yNn3^HF@ZSO% z7zboAm@sh0xJ%1JM}6^-1Qmn@&_gRb9!=y36rx$UiJsqqfwthg;CHY+HtRC)a(si{7sVrkG*V1lHOOk8``GQ^Tq$*j68FiYt82 zg#6kKj#BkCiwX&R3sD~;I_AW%u{kd~>$ak!p3CTa_>uU?+?$X4kf1gj+&5aoA%*L* zOEM<{X-1gfS9EU8~xE+G*UyI|dMiT3S znwmp!@4)r90Np5!Nj61B3?)F<`{WK`*;_z=w}i=-@9iyG45YEcFfG$3AylUXZV=`)SS0G^Et5vUh- zT1?9TSj;04y0b;FAqQRrGWivGNFdN?JQk9Qz-0kPag5FiOxTY5w<=7BVBuB^0co@f zIhA%Rq)s?Nr`MKb!}waiHByu^*R)6qGj~T%)JkcIS#F)ffCNlTunzAH-qGzU@c5F# zP=P_#+W^9$zR>MzFa;CEBF=YRq{4|N!47CbA5@HH0YV;FXzm#_RVy2B^KrkpfGSF# z6>WV~mEZjoR8?c}$bfxc3TwldN)jh7DVm6rh}=v}mzPi>wa8`M7ao&D@+)95&-ada zEa6a##MzYe|tNAJwEtgA3K+I+ICP%d75*$5Fn(74L$B?v^rGLp)i-)4hg^UxWPBWvwXb zdd0yI;N7ZN9B!Ptqg@tDJuhyUju`jYLywy#>;e=Fkekt*72+pvNz>dD@mTK4t&fpQ zQnb(Pz0(M4;WNTMgS97-&A-Q&%${?bPhLAW`v4mOs1!MRkEJFI$$ zRR+{0Ct+hNy17m2$e*@;^~m~Vi$cbAulkNZI~y~GQugpCMX1~ol2LFnmZIttWHsn9 z;r&Ugu8BAa^$o1k;WSMaQm!6EmDaiO5OScAZ>*|k5gTL2I$Csqu)vL(rVBw|$WZbJ zX-=-l0fFg7(|k-GZ&0dnU|H`Ou@9|~!YG|)R2Luv5na$;4n8ckvyimaTwKh@`RrPH zHIobqepSBNXcm4jrRcUVluZMiZ zm5j-Kli!qvV2hH{J>4V7i!}sINThG3ezJlGCszv-R9ZH`AMOYQN>Ch1M+x!Z?MOdO zNwekn^7sA9`;{_`E$#=J-M_Ri(cSTMtCa9Vld2c+t2~1=3a}fdfh8wZj*Q2QAWj3s z18zVF;*LQ3-94@p^GgFo43|kpCKTPBF(q;s9+nD!Q2+3Ji0&>wOOnYFv^;}#GPEWs zzx|L(C$eX$V9HgZftHMvTscINs~?p4**JxyBgsRI!L*e)vPF(%QZOe9=r{jaF#t%l4EwC z4GeEHZ0iEo?qA8D`&abMzQ5xrFe$D%lF6Oir-T0Dzqxbwklyc~Bmd_^{f}QS|7WoN z$MeTq-#mFZJ`ednWnsHVpL+gRtN-zI^U3pv`X3MZpC9)BOXh!mzV+hC)8|hf@;^V& ze~)dOhv2U&{Zcc{gEpM%+IN51_&?SBxOVvAnB z5<_Ga?_Q>5U+}Yvw{Y9qrxrc>bT;Kzr|`x6AM!cG_tJ}r7@>!6uf2EkaoyYV^a4D1 zdxM-VKg8qYca7hf1u%Zcl`3p_HFk^?k@pi*xufTiAtz@_K* zewJfq9BFr#Q}Q9H1`I+=y{&+zdhfy$UYz{yfn`0&e-HHk6Xn0hTTg_d_8|W~$bX*v zr|9q<1i%ILKb}6`+9LVy+2beAo<0TnPv{{J^4}Md|K7|;vwR&r*?j>6Z+w~i!>zg6`rJGK0!bf6YT-sg$c?T_Dn# zv9duXHbU}`XDO3vie~-7K)9YDQKB5`o_1~>C()T`yG zM=>og6a8QO^u5`2m}43zu5q_?^%H(JD$dRTE`b^+dtx&v+iI}Hi5g-&znENOFxJE? zOD9vGw&CJY4*Y#8cQKnb8Zd(lGIsG);Iij6K8t%jxc=((z;WfPsC%FPYiPN4SIV{b z{*6WhZh(C`IW`iZjwkK$Ae~OrYuru!Q~R>Bp9bYN)dz4^*hfTq-b^MV$T9<-D16`h z_c?3=d2)2P07>*+v=NA5m0?DqUfe<&a ziXjc)d60c?PCJU{`FQq%&OA?J16<}e6Lfb1KOsG!c@521s2ZBErlc)SnV@vBfFur_ z@HPZ>j+h_NRJ`55bE?bkh!Jd%b2vb8AqJmOYy0{gx|Goi)r*#gy)y9qhc4u7CPo9| zk`0G>KhMT}_K)8c@6t#x&UFC;`{OMLkDtsT8EGVGN^r}ZH7*PNip_&1p{Fk_HWsJ+ z46nKSSw4zkj*p@jeA_Sg>8ogGTnb7gKgoT7y8vSnv;;Q#1vn2+QnnI6v}iWqBuX%B z7cc6qr6jDkL*|PfwTW>GUq{cA=n+gL`a|@h!595T!|z~7y(D@A;g^&4KeB02#=31Q z8UQ+VH9Fb7jys|e_z7843&UQtqD~El3t|*t$E<`b9SBrqjF2;ezoA8F@q7fRd3#pi zuEY}MD-`XRn)e91X(w9<5%xwG=vGCH-s6zG`yc{Tk|7HP?Jr6cqHb`*uRZNx7H_Bd zS#G4$R>wb*krEx8<0q0A3rPu966W*^p)P&_Q#+jxhk_-U3-M6H+7me-Aqek?YuboY z1^-k2%k~6C`hPJ27sel{ZzTJw{9{5ikqBeUOuQBE!T0AcZx7J&Hz_9lVm_Wxj2cs$ znI5!{Uo(A(?VU^^$>mfCZJ{q}wC_ZRf{VcSLe0Ya<}EPRUax~3z>jjL*c5x~(w@Lk z+BajT((uVW4K_XvH~v^~Q}IzqtI`t>V*Ff-2C{Zs^owoW5zUi!IlqXL&m+PxrYli4 zVp;YxNHXLt9G2r_Q=&_B-aVPhkl7cVVR5V(85BVxA$@450;V3KP&kc7LeLjD5C1i! z4xyuDBSh}BUiP8bt^%}~!JKg&8Mk$6qF8cMq{-?#GU?ahJfC4$+fb$$Sf?Yw`hk+- z4NZYsxd_fv)DeM^Q5JeVdXrrg)9ZE00F|O*l8rC(X)%WUtDN`b4J;m9?wR;VR0(YCi4!abQf&5o$iSMFcbLOF;RB3BtC(M}v z+Ix722{ncChI|Uctimv}lLyOH{Cu844kzd)o<%>rdmp9!el~)GBr(`^#jMb|)FsW% zo9+Fhy_dc1KX327-v0jePQ#4`^qZ`nJLAxbN9jNE5o8RKm}n%Jp^r?|U_BblI0afLv)%vyBsvLtrGk%##>#a(fSYcXLre_R ztx6bXK(*?F>L`HVyfCcAJAQ_k2hv^TOf*@knusZ-Eu6>1RA@>d44MJJ9!ko%%!E+u zFvy(HkOu^~nsCF{=|8R`Xo-<04m}CR7cv%d9vyo?J-LQWA%HC7;F2&LWglmXl`ouUKMC)HV>am4TrsG1PUp^9e8f+T#5$jqF6#->0> zD_WtJT~^I142nqRwe}!8ou9?SihE?lJ~4+25+-ik@oug1EwGm!h2n_jd@Qtwp426S zTBH%n4EACQY?v%dKJDb0C3ve31i~8qQPUgdA2Im@Tcsf`24uec6m5RB9Nt*M9{d@k zPO(9B6SK~uvY(DJBh&WuqNJsbz#4JZh9_~w6kAdHF)zCt5VW36 z#Oz#>RkgH7U7_dVr@oFhv-KB#+LhY9Q+&4vdE>wo&nIyvh($@&F#fNy<)aK;e=1pL z^K~?^@RO+@det5w{`R5hC-fxv1xG#SNnrRmv{-OzzW1h}PtCZ8Becj?x} zWL^(T{?(!{-cPXnyKZfUd8ByayrB*uVyt&yejGMRUuJ!ZyBB{z6gD+D`7Cxbx?25m zX0zbM^&+d;IoxWSW;42scC2Q6FRZV)O^X>-2Xi!v(lmi>Z9I8leT4a@x^|c8%q|3b zyO>`XSKVAOuf{4yCSY}KF7M#k7-nDER~f0gbG5p!bNorpz-reK{wiV{Z0Ed$ zsg4A)qGi^?rUw(nVZBwM#WH3; z)%ai7>K9O^{bWPtF@rJ>dP9YtOQ%znI*4eucrnJHwHmZ>;w#lNCiUPv+ycwg5Dwxr^&I~Y@WW3O0!zI3<)D5HLMmM|@Cb|T=Ht%|m= zFr%s9XH_k2X4wH&=`)BJ9kT;QA7eDplFh782$;19hvmh-=)07WcXR8pW!d6XMS?2i>&8~On{G`49E!9h$uQ4eaF$}B z0gmzySU-2(2e;-0qLb;&=UI#mfE#{4%3^G;rI=g`d}DVsg$A0#_C*7hG%xaT-Io?j z__!m7YDMbL6Iw=goyGcX?Syxs>fcllWJ+>Pg%DhGd+DK;A#Z(W90WHnkQk;nfx4h| z_IP8gbv;(CZQilZJJoW6W_V*Z3r+9t?0u8FPlWybG3gy_j4@(`$v%AWK9|MbGY@@n zgZ(Ra1_67$aUx)Xj0*qO=K9c)u2M*swd3!Q@nUH+zNa~3l(_rxZ`~8#@y#fuYv2zG z59Y9UG4pG4i&r|>Y5Z9X$TINf%bAcXqLwO*$W@U{o{6q9`U?hP6S2;g6HXQgyW+{oEzl8H@N}~sWn)lHWI%hI zFk+NZ?A|$m2RBUCy6u5cww&0l4i@V4{F95Z>pB_=<$?hp+u>X^UcV?I5u;)G_JY^E z_%1SdqhqKMDN5w1^Dg(rz zVe3KBpMx_h*|6te3gm5JJO=9Jw-lk2vw^Y+l@DuGWKpyZ`KTE!=U?Gr%s#S|3WSyO;< zw^`HIT|-PU`8UufkKuC)Vo*B^J~YLulB z-gVll7vsG{LVfOsjuqq4^{?WwwX{RzVnVaj?zuojDBmLtKMPU#bb^hC@bl1W2K&E} z3*lXHkwGNk_!(?CyHspX<^T9h=|4ir_2R}Vn(mTXZdk*mva(_0kun!aP{rQ{}!l{MUOfcnc~Jj~DLP*$`miI8N>G{Vlu z^g@UfdW7%3`%b};ue?3nNUYWbK<04Z&m?!jk^YXG=h?uVie5BVP-^1t0N|KsMy^KZ7kdC33x zkpI!k|LEp;UL_4=eg4O-r_VY6_mk((o;`iM`M<>J-+cU#|M5%Zf8_FsV23d)z8mSk z$*-v_=2J|IG9ZUw_Tigm@~`smBcUQZrYwI@Ick)jN>9Ic;%6w0;PzNZFHrHUGLNA0 z)N}j_CvdwsE!(qcmX-Mo3WCxj<((*A9O4UF;X8cWU?003{0CFs32A0BP0udUPBeym zZI@zQfF zd&aku@3xPQb`JIrEhpi%$@T2K7_W64$Kf@pB#1ZDwf__Uv6Gh9K^-C#U zc%K#O1r8Gb@!S76ZvS$1k_6{?tN&bs{Ve;_d@@@A0Wq)aBhMlEqXiDx&RpiVfn{p2 z*n?Rxs)kKd!Jz#I@gIiL@!BuVllb`mz&~Fn_^Y`*T-gkA1Y@zUI?Y*uv6*M1zBrr4=DHHk zJrh6B!;r=@wu&}Grmd-2w%IG|;4T5pYZvJ_g+#ZaJGf}{{gwF(kkd^^WS?326kl z0-2p>*U>|@rS&$4)JZOxj<%Y&Wmqn+po;`ySz-Dv;qQMB`yy~CqJ^$kBZAs>Nbz4))$`AN)1?Y3Hxe_WPr^d;4OmI0gXOpv zw9%DN7c|5&qCKokD;HPYSV(KjHhsUp_h0XKbcNSeM8QG&dr3v8hS;mn-Ul#}Fn}B_ zd#pzd|31DqBvS8&SH4{c0YQx^Lj^Y#>?%)m)oy)RzrZC(6LWQ$)>;U`@$vlOvDWSOzjP#GdFZKKhLC|3U96EYUaDf2Ot7KtYmr53~`oe z`Yn6zNpy8CG+WnkgBy3$MJ3lggCg=}h;K{o0IbabzsW9Iiu*VpvnHfJX<1y156iRA zFD7D4!qpa<5v&heA2Nj;kkPtkP4xM2n15`KiYx052{e5v*_iqX#>|Q_5--yL_P}^m z@h?2c`Jz^u34)r`r z`SIE>WAo&Ra}E{{g0mODk8!zT7`U}3Si@>F@eA+PWBf!b+DzP3e)2R)J@ye9c?^q- zeOSbIP+StDA8RGKC3V~rzlOZn4cSA&(r<*`tLW|26;NxFw|XqwAHx7Aw5}xBHy={> z@W@IK>{6@t*|eBX;?1P8wIgH-{A}JBC_2k7Zre8$V_G=|qeMHMfK@KOsceXx*#4qW zd7T4Y-<$g^(Gox@S|VYnZ>iS1hGg2{tqVG88Y6lttgP=NX_46j@bfUgm=G8QWx6Wn zqX7o}MC583fgKfWh=7*@jpq=bIUZDg>b82v-See0MKFFoP>cq=EaB1Oqw9 zCkh8Niqym01xA=QPCwa7)R1kllQTRm(RbZw3%!z;l`(**THgh7YJs|bS1hz%PRbzm zY*0+WZmlmaCOT_0S<%s&d>hoCNt`S*z%_oCx2{}UqRU-H5$wE@AaYxa&dRS4u2lpj zDwT8+WYcu}Vj}pmdpwjV2b~4p3u$}+POS#!t?JVaQ%Fvn@i0VibLq;oPwCOaQ#=-{k{w1|}1oT{LVG`po8*1)lvm zo+|J*=DEtx<2f?dmk+8UYusxiU}5--R~E8-+}t*ny-6~aYKRrcLAfydV=2Jv3pBZe zJ{eSw7F;jL|LyG`?i?INZx5oqANJoKfUWcBt^7=|NBjx96(*B^-hTal=P-)@*ov(G z=eHKCXrZ<5*j4dQ5ZRB{tRJYJpHv+*Y@$!J>|`k1@{!V~Pm&-AUn{y+b?BeqH3h;*YrM%(+Zq)mo?7#C;aQ5_t809YV2p%GgAw73Fe z8?ADf36>~?{y50Mr!pNw$eKime{4(uv*{F9#~VS7NZ#EC@})_`az37Ci;V5gsns)a z$f|Q=fyA}aAlWj;={U4De8-JM*SSLgctD50G+Y4c$m>WMpy(Irtj?S<_`m5KAHgVDG*cMSvY#fJdIkJ1eQWc`mE070+QXBNSx12@6 zyI^biH?}SRPS(SRY8f|3z{Aa%@<>qOcaNp?b|Qa*Tqb{51PY%hYUFtUEJYhup%2XYJbvQGe{4O(e|(Ynj~6=rb-5od4&~pI4uM@%QG_&BsqSo;}&zd?wESlg%d^|0{a> zaQ+|S{~q@Li^c!Fc=7!4i>I3p@qZ8W-=zOC5MY&%zZK&D1hs!g^#8FCKAt{*EYAP4 zEz$fT{_o4g|GhqZ2U+6gRGmp^R_SORNP-Fjiy%`vTnLY6*%_rkXg3;1=XojnabkO+ zMz2S&%gOiCd~lX^q92cr-bJVQ18f0*K70xBY0j`E_4#3%PR_rd=TP^w6X72e&X$&C z(a-U#(HS%l%fN{w6i6;(2G!(S?wnL- z^#_J4kugPei!p66Et$U8ZnxQDF@#(B^eW9~+yD#;LUqTWyFP?IpcbB3-EhUKpEIX8 zX`jMhg4t@n8gz_Mb=od;ENJ zbBpA^jVBw=AH(^7_IPvSLH_%K@*hM%XXPxCd(E}41sbd)bGOAkFq)_NxU`!aNmn0@sx(BsqToo>s4~(%8c2d9W5x^h}(ZG=D;Nz6PmC1 zv?o?#As+aGEL%xC7!VVsKEY{2@K>Wj#i=0GDD4Y~$FK2O+{3&5UJvC(&R2r&obuXH ze*`BC+6E{Hhkt$Z{oB{QpZ4}&SyA^*^c8J(0Jv=$?G18_KlMe#3YCMlK}JMuF+=!eed% zn%#6Lj-B6ZPrj0?MG_ELEk5T)c>OyGj<+vg$z=$cKRpXHd`B<18TbbKZFZFV6gKEx zK>oXFxZN9k$E~SQl;7|WpK~L2M0c7%akh8qR4ikrhJ%(u2@!hC!uGd-!Y>ugTMCU| zzJ2rV_0G}W+x-QotjFD(WfzN3I?%cfwFAxTP(0AQ9@RsWuSNMlhZ@umt+wU}1e(_# zg%EViBVizLplK~4SG2E2=s<@m#11rF3c(lmyX6e0cx4YB6!U2wQ$#4TN^*q(?s{U^ zFQ6g;x&7H*mr3rL*3Ijfb(UScA!)5g6q1f{E{2QU5#zk9Dw6%955=#$7LM7dQjL zN1~MeKFcq&aWpY(mGD%4H3&*)O1n@-M-|qAXI8{(C)!x2`oie*C&RWBBih980h2J2f6QuNxEUBMN4W3ps^_2zJFe8W2%9N`PW!Ky86g z{Au$zUH`}S`hWLM_>YbCZ+a(xU_7)!4$M(>@Ih#9DiQ|CMVv(6c8xWCD}xTer9KI| z9h*T3O!lnx>N7Bz3f1i6C!PY6c$>JNk6FFrV?ZAZpN8Kw_P?WvGh2x-((C9H(o9cg z*U9I@cbT;B9hkTBr`;4dbO zH)31>6&m3fw5sFuzXff#W}v|61fea&|EA=9?DQzO--vGl_y(rKDyB`}LF$oL1}6c| zXo&5sp#q;Aqgh1u3!=FEs6PO6#V|&i<65+sU2xB#z-OUTZAb)_4!78tJ+l*-l=Zo0`$%`G&yA~t1? zt|e?5OEkiAaM&&kaR7hW>5_2f_)^{{;z8+}F$@aZyBkilk622euUueN&qQV}XQIq_ z35p>iXE}-CZ!MbH3Z_EEH|fOOmC`VikRBNsT4@Lo=_?gO)XH%7;GvB0jwJ?tsbO51 zk)%~+7O4V@HXGuj0Em1QWd%?L^qcz!ng5YsGJRkBuK-_2DI*Hs_$pez&;K=uFdVSi z_(!$_-}`48jX#ljuszIDu;|9HPRRY$d-e9^d&peYd$qH>x4#GQNf1-u5?Y&^V^$`F zURsjs`bxb+ZMEbo7>dx7c52cXPMc(LfD_FFSnsQ_cT1Z%;}3AglB9m!g!v;Q4@*bP zX|;G1qT&Rcvr%!Hj(Qj}tx=2&s_j8`IzNk>`$a^}C=WrTBKx^)hMLT%C_c<5&7_Vi zK%kS9a2lqZh9MX@CuPb=w7!+`Z3ix&s2=}cb?242d>wvGL*K%r(@3-yY(%*e;Ms=M!@i#h!Me-(Sj! zM`hL`ITR*FqTvMHW2QzmYPX&}NVIx682hFdL$M%`I}M+qV17)uRizO{iBeOW;^w*t zuoE{v$r{B?_p}SwC74%3a1T~-2soht*cG5z1JP=zx{$SNj@qdTKLJ21K*+;+3sL7( zXWBy7fJSSl<^mf!N#4w`r3_yhCyk%jdBKbz+S)1iVV{C3f*0^5Ui#5R>hm)8T|BVU zsi7JsTO|P5&7dx?+L$4b*@!+&Z|eKil=t(1>n579E*g!^%h&-%f9tDg6td4yU|1@H>_jdl$M`J>^WuX?}d`Z zQqC>W8B&HqJ_1O`BW5S~%Lk0G$5~*a3`ucHh{DIi$ul5QLP2KmS1%i%VU8UuqmM06 z{5lN$x5}j#@La$GDzcz(AjohnX8KihG`;5P4A#Cd={FHOvGG8?QFvfS65@W@dkt9{ z+>Pu_i(=-Uf3<>caG=2C8sVILy&6u%ah{-^cZ0L4Vs2X-8!qp1sjr?{4H33^fE^Az zq;TNsQG(>WG6*(ooe2HUvsj2pw3kgKQftQuraAC!8w>kL<71aiGcZ9k)__f>wN!U) z^nlL&Vl=-PFYdX-;GVVR*IG^VHL`L%_n*A7n%Ch#>Lic}&Sw9SDJf7gEmxS@<-~9u z$vap|LgA0{1z@@%_zzJqQ+5pyqp(K$S*73wVlXKfb+ja)Pq1uwuk!K&y0n~px|)ny zC#NnUtKJEz?nkm5dYtw?#7b>a(_*DIC|Y8rwyA~WO1lL}ne}p|{)i)2zT*fQig_`b zm2&}#ktWs( z0x5r#rR6Nz(mX-B%_@i1CB;dJK?2X|tBm73g~q)ZW@x~WY36B-X~s4n_$k5b(qGkR z?FNzy4d+4=S^Q-a#xl#sJxVGAOP;woUFx!8(UllcV@y^qhpNdIWM|uu9op$&5X-S# zK@=ukDI5CM`LMJi6tis9m7fTU0$}&=F1ikZA>fY+abl3ogcjHxHg9CZTOkD9pt}q_ z!8HO zTgHI|uGU?w#!c$te?5f06MGV53y-Y^|Gn_1AK(6Y=RmV2bZ@t~(vR?@iXq|4dOn0B zRhP&`$^1lFiV9SQlJuH7OIphHV;^)x#Z zbYyyMq_h^Z5<|$*5C!lQ^x#cwEZBe{yjqIq?c$1oo?N8U4+B9uS*4QLZee%BWJZLd zYF-38o|+sihKYnJC&DBSRaz-gb$%m;m``5Pqc~W?%V?>Hk|V4Nqy?rKmRVtET1%SGd{W9giMD3^%Q1J{iI@@I0y(>Kj>Wx*kgxV$ z^$xfHykq#pkES^+8q8w-X@;U4 z(6)71QeHW|i2eg~QzQzuU(7Og^fh~`Zd|OljDAMD*z^roA~mv;&`w{SgWQpqP_uhH z&hU~sdUp<48Mka=Y39(iA|mPqKi*Xo5t`@ONIa4@$6ist9+z$Ha060rLoM1f+P>a2!$D(nIZLA7 z+jl$rHu@ElZ0w@nTk-4@&=0!X=yyN6s&4wAvYhxva9@`M#LL%j51oTZX+0fKVD8XQ zfCc3q{mj6?nFoOJkW1kq|I0)Emw)p7f8T69dC33rkpIQX|Dscc-7y19eg2o{k2juk z{+FlEx3)H)K>n9!k00{?eX;y6GVd9Rf_dLf0|?%gkOzG}W}$*a(SC~BOGPHYC^ZuQ z>E0~Hx+wsoOgt6x>}8&I8k_Cta3%ztbOZrS@O3Hf#seYvgVXZ-RD7qOm8CPWb#OmS zg@%?iw%Rh^ATLe`I0B=gRSbCvDO{-)jk2?}e+|=z8=YN}<1l~CEBaVq>E86?vC3|! za;;n^VhqegN1cw2UZb54l~VCJC8I?M<$QdZj`BguiEw_H3Vm=mA4RA$QlcHy6=5h0 zJ7it7g!|xOnvcY^MZd%O1ns4^Pk+m}KW~xwj7;j_U>Kd}*%Wlq>yFMrzG|3R!{Xg@Jh@;E7L6h=x7Nu{|_Z^S`jOvM}@=NT7TEcokI9v+EOO46kPwaH|(FaDxu=yrxV92|f&Jv}$*y;<0cr|pqZ_aA79h0m(g3cvW9VmRFi#F&=ECN&Ps#nJS^gvfPp*dL< z9AtT?Ms?v)sM4TIXEZ`ZMzM`wV6>^0BilH)IlAg7{^w5!b;{PFmW@e4$zhs68~34T z7;II?0RciqC&aWNA~=Dd+wvtHj=+w!RVVcasi|djS_q=Ww0p>xA%gfY#e@tI?hq!V zRbmaVeH6=t5CGvvv*Hn&0?mj}h{P5P#(=q|_}y$7zx8fm;s~q6Dde3nb9JcH6JD`z z&iB!YWRsU2oDaa1|8#o1AA^gBId^F5w=|V@F`wfXofDp#`16~Sc3!5V$$1J%X9YOn z@i(2#jT7)WYj&DsrG%%?J5Jm{#p2wxx!B=w3c#+M?^v|R=84-=mEsO zG&iD&O`1S95Oo`#6KoHj^F|+Y^AkU;7vjuSb3y=oE;j$hDPhg=$oQIC;(2XF8%gwa zv?=4Fa}^a0@z9EJn4!?2@Oxueea_|BH2~&W>FM*NoT8KFr*vEMOGXEmo(U46O?_Rb z@K4tbV^)6!nSf5iYp?+j8?%8;KuDkCNw|>OO3A5k{en! z=7yfJptucpVpxH*;e3Q1HY86~iM8dxEZHt4uDdd;0o(7drP$n}SpJYmLmUy#=;%5s zC+XFg1V}B~lhHjWA49_K|2%xVzkcxUWrUZvkYIUSf`{B!2j%q<%N|xed`+sTtJiKq4M1MT;bU zOW3C&j>O&-^O=(L`EivVK*tsSvXlY6E-UFdQQucIX%L~5dIiul)t7OW4a~Z`%OyEC zqDfXsa<0A6R5)Ouvk}Yfb~YCG8nL6}AOtM0ek~Oka!)I?PXDW&-R<|Uk9yx9?7jM7 zr}zHgwHT=hLcid>n_JJ@8{+>qJCC0{ecrS>9PPc?dHeoIw0rPv{WnB zZ7<|2aoNEy;ZFxj*51B)t{rap} zALW->RVR0!B1pghGcrcpqh#q_Wxb4Q$@t5S8jy6$>G%sBW5BHMfU#xB8#} zCqU#L1*weYal|4UY#eZW8Z-*L<2;8#Xu)6t+otGy2`)pCiVtV@*d-X-Pqd#P!Zf`b{8AGQP+XDGhYown6dula|*q_ zj%kP@U06%3Z~~3jaggWuyhfg##Qt7SNL3|`Km>kNhTY0l=_-?1E1YN8 zfdVFlMLcl+hS({sxnDDo4WXeW1mtDexCHYj14G-6!l1M$^bU5dIQ=kZhHS7RXwPV8 z7Z<%B%cNIFRQ2ixJNVGI4`MfPT3eyO3V9^E?uqqVGfnE6q^c@|MX->ti^Db$NqDmw z$~=qJ;4zK^+GDE}Xsc5Yv#{VU9d`q*!sNihE3lqGd=VMsfQQR-z{5pxfFdCSZwE>) zuW$m@fm?9en&Ex1XrM&w2z}s;7j5<|2?-k@a$5jmbt#?|;$T~d652!gX?GL`b1V_g z8^YRD9woG?%T)!|#ehpvo<~_ao&!|L9A366 zS|`IDygJRYF~c4LU~@={!B&?#CCFH^G8jhUCtgp((!!=}Et8uUR+!WmjcwFb_4Ts+ zkt6Ip(}0s8-JQ)?$ZJtBG^Wi7CK_cAswmb&`Y`y7cQ_$7)1vBOe!6tO_6% zjjbeGwDV$XYzA^Ab_GXoD~o0HoufW<95c^o!91$hq3%RwOqD)+5^W5 z0Bv>sk1j9HFw)-RV%`*&=ouk|X1Kpk`{>MoO6+NNo?hlM!L6;bv@b+IaNPvoe)UO@ zT`2)YlK^bY$G_^}x3qma%*6zOpOSGM5V5a)8%*W9))s-Eekl*%DrYX%hfe0eag2y` zuL+ZbGY#LUxK2FC0E|&F8i4geS`Y*Z2n_D0LW+f=gG$;xhp%H`soDmNEe&YF9iJc3 zwoQr&1f9mCw46~Qio~Btyt}viR`8GkUZ;d{MNAS%0n$x@FY^g%UyMH7)xc?JTSGD` z5DQT%fPKe+-r`VcEjjjfpbP(L`5Q}4h6|2k%O}}DR=NgZGDCAL3JG4Qsm>=BYH>~UE-{L)^0^j+uGi=&zD?T}A@8O(Z_tpl_t<1%4 z`#a%L#h6Yf0lc9T_$dKX8RZDKpVs|sEsK;>IN~gUa+Fzt$#9&Ua|>iom^2v6mG=&% zlrQ1JdYYLr{b)+#kq2$((BjB^(>TH*$9y3Pi2-QO!p=Kde1(Gxw`ZLj&dZ(;Zkr8X zIn>IE>f182)f1}-Zgo%|Lw~o1pB1#@>?j3ldh_vb4;*Kn@WrFLMrioKnGWR%^O2^F zpWvk8+d(aNGY3SzSxq^e8Ts|wMrM1zgXMq=>BuYHZ=3NvAeU)=FaIdlDJG0M4a?a|tIc!zR8umhq$x!=<^# zA+|o$mCMm$G^TzOI(pQjSH7^}HdHkJ{STmy>Ry|)7L)dJw z+KT>p*rWG73F=N@rz%arzX_gcC@}wAJhM`yRtzwxR8+AxN-C3$`Dqe;7j1k3D-#<~ zL%HZ1#~Ua2$;c+)O6m)H> z_^&zbCDd=}_aH&Gj2@1=^Ba>mFe_uH3gzrHO1~|M!#-92a#BaN+HK|y2Tw|U@B$jT z;Wv#CUBWM1DqlK9e?@YBEBUI2JPT4=(Q(M8f4Y(R;Hwgwsx((kHSwC3nqyjN{i{1r zM*k(U`D;w*$|<*&=-}k852J*~+_-w-l3#-6D}SdZGZ-hTZSIY!4ira?EdalxL|4g- zpVI_nMW(@MYzfEcG(Y0(Q=-)z_y>B5rwjU3zPRhrQpZcL9ezTjCFc)rE~tcnCr$T; zpDXm%r%w-I#(V0g6cq~bpUV{7Sf$uTPuC6&L=m=@9K?{1>jDRtDwxre6-nNfme$A0R31}X-H&XP_zJ_OuD%9|wTi0_I6B1HP z4aHDQvJ}z-icUcWaNd(FVBr^VCUX1`)a6SAe;L!zVU)!J0x2f`Q)5=6D(a{>&o1_2 zGAEucAomt`QltEnvZ$}3cj>gm!~m6TC>4OSQbqZzx!>}M0KC1K++);!&6o2K! z>OWBB3E&R%D~J@%Hr_?9C}-2y8O9SZmX8Kx z@2PbUE?O!JdI*1hi*8!(iCAm`t{{-N$tQR&nPn1S!oyKIn`LAFyt?;Tzm?lZcb@R))tS>S>ToS!Bu!6r|@yPK?U0+DS6%ksf`>p)=3{nLLA!?{buOm!=Vo1quts zqG+t9yy&qCvqk(e=-n%~Ek`Z!Q0zQT$>(68GKUI=l+Mv`P)CZ*(7@6q7p#SsLVUpr z;`0ey_hFVRO7pOUpDY8cig?Bs>9wA#=g_V}x(|e23_=g(STMoA3qGQvjTX@HT?sxI z%4BRv#KoHPDUt6oNQD79OHsW=WkkM;wg&^tyLSdy7Jy$*28aa+7V1@aowT%`MM_jh zhK$<9K*Vwc*NxS9tZ;3F6fL$w7+;rW3H3lSYcd;nwu{i9Vp`;P+Vh52uMYGOfe~1l zFgo%Rv?ByM5JHI#2D&*!-!dM2iQeB!y=%C|mE<-?E-dQ{xjM~-GN;J`d=w68UwHz^ zVU}NtbFHEHa)B-9yNO@>1;#MQc?C*ST%CA`8HJD}i&w$SqL2%M+$^A;78UrQ^lCI% zef%7L!WkkH)RL-iEg(-n&IP|+pJwSGJ%wr|_&x23V3!}WW9CAij5pW05Ut*Gm|*FV zlg$o^KaK{oB_31WXy?V_4pARWDdD_((t$(O|B#-!Z=Q-b$akFQ&)_r2o}3rYVW=tS z7qe^U)eD?)Hgdas1K$t|0;bO`rq0{V4e=Ua<30xfz)nwx{(?3E48!@jPsGfA2avcF zybLzXefLDpB^3n5c?Ynd{dL~}G>E;rZvY$+_noEQGyojwbeg(g+5~t|Oo_X<016>C zxsNyT^P`dbe2XF1yT}4_+j>mvnfu=XC}8;<_XU7}bVqNWwg3pT>j{W+9L3gj6YCl@ z0FfHW9BE*h12zKsF;(ka-UAx)eR*0jDXvx#Ovjt*?jB9LeBKffL_Y>5l9o(6JcsA> zQt(2+)g`n{oW^g=35hBzLNT0hLbl(i2NF*E-@x5jZ;#UICUEt`6)~yXud3f#GxT-b zbH5#3e#^kj&)*_XYQzKwg0h48r zdQgML9bAbfa5HE7EiUU_WPbx%hh}JfNa_sXt7UiEG-Qp>Jc^T+=Mp}#9A{)HSvPyS zvl8rB=Lm1i9{ao)c5D;#h}-k>!& z7sQ6X5Avi6lB=P|g81-WFU>$=@AZr%G!JaDraG-B6 zp(xkdNNMdXwPU^xSQTC?ff3bAY>J@d#HTWWSbADwZO0y^S?MKqYaX}jXeG8-k`KDW z=8YQtwxiR}fAh>yr|@RkI2)~oe~BDY%(d8~zSC+feMxLw41e>i)mU^59B?gA*S%YA zMQHzaceAFVxwn%Ft@Jhu1Tlb*Wr}ZsCf~?SgPIZWG&Y-Q)#i3N40>N?etLH=Pi!y~ zdQEm?beDN&p_Ht*9QwxuPE{gxKCzqaNFLhI9guxmE1jQ+PfRHB>gwppWixoBl<)0VSMDo}9A4O%+Gzk&SW%r&a%Lg2pAO#`RQSb}lLIyc61 z@{S1!(^bB446O*NSla>S0-JvQU3_fsq7f&z+%7Ir{GfLK!siP{6?kvtI$7u#b=-Np z)Zy}W-EvNX?bcpUzxwVxv3CcR*(EoI6yLzkkiB8ip(0D%B6y-DP%IEzA9pr4HcoE+ z`K@q3EIZ_H;($E0x4`z6Uve+V!91`RPVns=P6`e!j!GyLpyJR-Uvlia?6E;t@)`*R z--kt$Uuol>>5ZerecoOfUwdg*j_43*d7mh~qdN-#B&SjY?b~YH7B9*;Ny7ZzhO20L z+pH89laMw8g@=SPG|T=&UN(Fc9n8r&4fQa3v|r4ip21iaqoc|eEcZq4rbP<3RXcD^ z^13J{QOx1OrBzE6v=0m`*Xmkcw5M}s%cjEiQ1AVr{`*7y_kX$i?@yjQ*?6e`{!stj zt^a;DO(*BQ(|JA;RASXK@b&fIA8$N=D(k;L+jzbyo^Of&d-_oS{R`E9{{i+k;=LuI z5v)dOSr+}AF8p!(sf;6}Fa5+a;iKa04BYQmtK5B?*{x8BwZMvyE~`8rUJ zM+-^zhh<(N8bMjzuzofG1obzD45$mQ@mVZw?1@Dgb-{0>Z5;RIxhZvyYp;0S{q3$ZLGum*wIcwN=OjJ>`hdy!)5{_s;DdFQ zJIji-ach#fX4oE=VAFoiA-896ySce_ove^l^)Sj0x2+00@ukctWF6xZ-AJF;j(IAT zXMKn=SlEf!7(Q{;c%}x}m4WE+LXW%I#;A28*E7>z)nbcUIHmx%)#EmLf)jE;xN#;{ zhuNudlB(U4wpb{oJuv9RTs#?RqyuL!jcU`3iX7MZq0t8L`L!ty6g0pOAzbXkW9^AJ zC8xU1F`(Bq%+0T7T<^yg+DBD*T1+D|p)}1##KO+=NyXWuP3{KAHB|Y_yv(7X5wI`m zNLFU1vR`Hwq6>tvb(P9}D4)fF!)q0XFleIy%{-HN>ILAToCWRMMnk({lKj-bRs8gXo)q~v%waa6l@Dzs7+F7t6{KUz2k?CZED&^Nj? z?r}#CX8UN+Vq`Pjs&nG{{O{0-QOpOtS4X34WLqWE@&kH;_}66QzlMVl(1XUnF(Lw@ zPK$P_WgN+t#OansU2mAMG13?qW{)ei8afN+qSnfoDv!rmD!A@6{sXSG#ayZz-x!9iCmE^#Z{o{gdLr(gPtRL>0XPqoHGH3ox|%Aq|8CbSZeZY_8O)QD+v2 zJVBP#@_ALqXH_jH8nK4G*ws>rrlnBJ?M#q%f+vH{LQS?)*UJ3_GWeF3FxpPxE(F_4 zmV_0)3U<@>`BR5f4y@-96r@&?Tyi(JrwMCdM*JJU6YafnS{vY3U}uG$nNj~5X~EPA z5U33~58`k%gUu`DswG)yR|E$get;Ug5b+4HMb~N-5U&uTAUSc}c8;Qfgh9yR)9_}* z%YWFb?;K*VL`RPtnDx5?Qi2}DJry9Y#5{RTizN)pejvotP%_i3P0V8dzgL_zRaxX?A&M40Ku=foJ&!Mk5?2N;#s$hNmTLrfcz22-}0k4 zw<9BQi)7{tc{!|LNlreLh+R;T1q9F*FQ2Ejv~w{aG$|&+4?uQYZv{{m`h| zdhG24t4Qk%1goN`A=;VJe5KdE8jtj0`T-RCKe`h7wX&Vrg)5gDtF9Na9L-P;l`Iq& zacm`>$$nDh_&VB5lA0`X;5!K)&9KfgR+c?2Ao&V1{qnq+j|NnF5z3!77SA3Aip{g! zQ$L!PyHgFuIj1AG6B#by^zo$;5!pfj6P?tP5+7?%u!*TFoaKCI%!2Cd@~;&T*y{6% z7_7!x6@^vDWpQ}VM30UE2BL3cH^i+5aZLaiEQylGyGXmF&yS<;2B*SG@L)cb8n9_Z z4YJ8v0YIw~toq%lW++zr@iU=-2as9(S{yv|(Lc>*S5T>%?xaYR9!|3?y2vhy={2bk z7)S_@%X2E0SbYmnVL(>}N?q*|%UM#cf^??1K$W8D)B$dov4)O@xyW9wV)SaW(LfFX z4nu&9De?+|uA#Uzs|)Q=_OE&We6swW6g`l>`x%qI0Y5UPE zVq673tKfOn1yZ9f48T!XmQ65K_rYV;=Y+DK=yXr^43-?kG-&!+@Z0a`w7jc{xc ze5@i3YaL*cmL$lx-NzuQLSZPYxD_Nqn`P+$ zY;0BB$U0U+Vwj0ng58K;7oc_5vKa#!&UE{Nb83NMi>6k=2Q~WE(3Xm(1qbdwrw79g1oKZXX)QON$-SR(C$mAWo7f0DC$x z$cKTXFx9-m_;6hw`W2bRs*J-tU}}Ojxr%FZb{iPGF0{AmEZ0jW?N&&y9xfQSGJE&Z zGg~5k_e!%`E#EerV!@*GJ}HABp%(L*d8VrlMFsL#;l9H0`$7hbJ}tNDJ{%Wp>Js0u zw{Mv*IOhT2-pdC{ti?!?tjCXIgT9gGph1u{LKiD_f#)22&SSTNTif-sD5^+v)LLpX z?9`;zp>T71ul-hnXHZ(1Zd2gBy#6A_4*b;M;8k>e32TXEN;fR#%-t=HZ+Ho0!MrIb z>$`H-B7>YQePL^eH0xCP(B8Wo<`+$@6UKZI$(gbfF}JFy6wk+V)Ez#+*1<+^zuSWm z%3KI~tBOr(u@!O?Q$*bi9#l(Kx}BS2%8|0XO==@C6?&Dt4Nf@=QzJle^=IJ0 zzcJBq;9W;`bW=(_3y9Qgw(g78G-|HvB=PBokCRD#>>4*c~$1)TxdAbx#BKXJl}jr6Ja|V{8(t++fdfMdUEmJnytzX8=?#{S8dbC z*x$Lq2;R;O%RCS9e-H6L3*!HtZ)`o;dh!te_YnVQ#{W&G#brJK7yA|C{+5gX+uV90 z{>J#fjVBw=AH(^7_H^^vL;T+tivI%{cPRKH+(qiWUNy$g3hIN)Os)VS`>03M0$9{2 zKb3ER$A;nob@<=WU*GNYUjDfA@~6H19~zK;&U!+u(}~9`XT>xHQ_rY4O~JGzUdWMu zz-Pp__uxg{0MqPD2>H`%+4+EfAI`w}ecAZEKWP|HC?952*;7_M0rDSJN)(|$`bnQv zaGnx>8d3z+zsme0=OY17gB*co7x6^}GO#KpR?yJSw$fq&-cV zr8ZmvIsyc!_(|yq$?mMxqX^KFZ1*$SF zf^jWMVCB6bmxk;kSjR7{fEVYk*xZaZxsAkrmPKR9El1I~$Y*Yr!pSrn<{x{I1+crD zj>^o;T4>!ok7jJWqM4$#z@9F?pesBKG(9ej*f4qLC7{+OOGDS(9lKx`K-3zoQ?DLs zV8O+w+3|S8xZbMZ#&oW>941cukz^^fJ>{iwKKpG*+^XXb#j3^BoVHIk3jtNP|KJ?E?~VWP;g8 z#LlYmXsPlG3=k@hL5?-bk%;aQ463()Lh%eCfL2Y4Aqze>@}>mMYF|f|ZEtB4A}07W z&@<6gkkHl7p=_LJJrYLx84ud%Gh4>hDX@pl8v`A0qZ>0zYO@ha)eW~h>H}bC#LV(> zHWtzw+UDo8;ra`(M1eP9*=-820k2Oy6a!+$gSN}bD4)g6U%YG15OJZ-@i3p3GxFak z%mkHUA2=?Lvuag4m>4HqkTkeBL(?i-+t@(`*x5$FSH*TE`L2tOy*Rf7w!(BXi8k(K z3ls=?5`J!udjN^Lq=z=gdKxEnV@c0fJ%YCpDo|Z494mL?cl5Tz;{wxL!`Z-mw73x? zu_4!)bG6Uh8*go(R1}oY9OWM}sPi>MY7oQKH&KJ#qJUps-OXIO2+@LF#~|>gmSDmN zVzcSfma$v*+$hjF7n`O==md*Fz~^ls9psGnd0 z9~vg_%$F$yAgt37N?f?YsX|mkHo#K#MLPX3D6YoLx6tiFGx{I{+r|E7MoqEAH#|-) zvvNk4+-g?voEGpU`HgjI4GfAV!4x&fg^XGShHUjJo57b=Vh5FpXo$gDL<2!RKvo2G z-mJs`RB{D*82XTC=gxLH9||$JJu0rCUJHgcSuKdM854v#h;5j7h0L1m$#oN|hF#Cj zy<%Mg4@-FYxAZbC`{InuyaqFD06G0D8+=^Q;&0TV<|FV*Ht}Ab74S;@*L^X?sZHml zKVTo;^!>+^3BC~jxUbH`JWSwPLKb&QPaEqAMY(Pi-^Y3dV&z1Zj)xCItZlc)Cc*uzv+0m|9iOq zd$|95xc{rU|Dyz3cfI^ueE;|S*|X>F{olsp#}D^^U+Df1cY7VSo2vZcc1Ku)NsBCG z#-3cCVN4mu-%;gWc@Kgm-6N<06}^@cJP$?cGetbYodOm#YByHSB04?8N`dZ;qfu(= zE?0|v-9LHqpJ zr%z7PT*y+!TP--@#q1yhX-X(S5WGkMaROmSLOE?md-Nv?tet2AIav6K-qK?+!ILK6 zYvH`br@aKKLRlN;3NkP8m)nN3tM<>K%oqJo9pKp^KH3?dfsf!PUBp&&hJ6JYexhLw zd;12TG(PLiEnzU|2p?aI(d{jc?VpLd_jJ#JRA~9Ip>B(y&{emulndC7Vcn!U3K$Lb-1^)3EDl}r6}R< z?lxGn4tMDhtuw!c4^(TlO}V=9??Dj`@oqdT*(T?H7`}%)3H8&f{06MEF9+m6=u8h; zYp!!Yd?3+GUVW{cB3o`Lt??Ut1vRXH+vvEGKagLOpWq`GZeqetvr zQ5avHXgcN(#`LXgFfxBIGmcTxQA{!vSPs9J!HV{bVT9i4b&p6XnsTtjY6z%)5TZGB zsL&0+1%G|0USl>tZsvn#$gB^9K_U$rpqyUnJ@jsbvYjY6pZUHuUI(UCgHxQN`t~DV z22mwEUwz-(-+r?bPNnL&(|DuA>Vqu~B?NG!+x=n$vGBljY5#ECw;r03DF*LEFJ(#YQ_m9}@2> zE=;|=bKx+?r83@V0gFU@F>Nu=(`$nq!c5yyGu$64LaizJfIW|XU#)avy4 zR%%hl+NHY0FT7SIRWG?r#e6^83bmYHvp7S-p>;J>v1w_&;8c$d{@txqchQxZje@MA z#$X27+uu9d+kUicQtyclZ>Koc-d!NFNQ3(UiOrOd`1{ z(qZ`wb^0voA8srD?PvLAHqiC-zzfw@+!`TKyIQ%RBJ*M$ZD`q>PxDTNrl{l`ySdn( z$w;(`n{emVdGv@X-8+xzA?+-w7KcA6)F_*T;X z^k*DL`HT#o@dlaKz}`@D9#grPuh<|So#V=L%lM7vKm*!v7`Ah6Uj-$t~z zr)NkA-r<|rNY791IfyFe9gA%hHt*Z{an|?iMpFWP^ zBE6Y78BMc#0M>?e4$OHrnsl3x2p0$A4ZgUTk8^e)iMeC4($r@C$+8pEQ#gA0&Y!?a zpFJ-uZzj3s=H}M(_J;VsP4Bz#P3Wc8*@;x+)s_s1Weg9ONL?$Frku^z#*62nX;ASv zH4SLC+6)9u5GZ$3fI6<*r?R@|K2cX(}`@3)9-=DV+_V@Pj(ayoa+XIuB zE67n}XST14vj{(AOcxriK3$M=g2SWJAKKF5g5@whI%PJQo`Fvxe+B=6@076;Ovw0w zp4)tN$dt3EPQ3DD1{W>Sxeaeo_%dyms=e}D<=dlO(UPe3G`6}csjXacq{Eqz&M)Lx z0Z0QBzdxJO9eNVUf`#3H8;0={(7EUZ)m@%8f-p= zn@7+sErGns9gN%C+5@U>Zs|^}TKb=dZ}-;^-o1>_2L%~vtC2xe^Eg$S+c@Lvn?E0- z2MgtwYe$Eo*$9&K7g4_tLIjv{a|kC9(w>;$E_>2+8tc(3npZhL<$7XKIZ3ZD({?fR z<|aC*nQgTRb%6!V#A{suzQ*7Z2^2@3MC#y$(69OjOdbj57fWeL;2l@KPl zu4;db@?qA$?vKE)4mJWa)glPf`K(wM3j&Fvv5uZRV$f3v7#j;6c?gC~E^aE607^i$ zzh$<6t9-`#2jzz=(JZP9|c3>ITyG z;pmZqRGP(=A?Z6t%nEo*9jzy@L@40bUC90~jY2a(mb-LCqp~>jxIW0G+9s%ga3Kzc ztnmF9Xg(-o7t$EaQZblG9!sIph8K3e9Xs|;JqLpsg zyKJBUU1_=o^E+%GD4zl_Jz0>PhX8|xn&~bQi=~i(ceP*4b#eC%R>S`bD~$3b{9xYC zrt6SJbAFMjC|sdpa_Dq1$4b*!40SS}2{uEDXBnMm9Gt~ZdR*IB&i22!s&06$Rx9n| zsd=HGm(2YX9tF6egGVlBRAk{0SWM}2zO*zL5m%PfV`MgK#F0Ff(~kBF$q&WEet5s9 zorbRwnZ#5F#$0wjreNdm7%rEzu5NJo&NWZB`3Ok$qgnBYy(SDrmy)APDG_nNxbkfH z7a1y{y6l5>I@Ig9B4I=mq*Ty_Aso|B8qwX{aBe$P^5drs-%X)%`o$!+4b%7^=^FI% z?f&lG553*J*E>M(Tt;s6SSF!SCsMvOKnS8#U3n^otj7`P|_m-|m3KB&Xr?hcyU(^^;-ON$s>{qbksD^LAxc{J_vqI zEjV3EoR7d+CO1EUxhiuT{BN@jHRnfpAFu5+w&I0lHZy!$x7yK-eF|NG^0N*ZXs#f)(Qz~JkFi5$ z14F{#9rz@Ol;9j{bPsw;N=MUSQ;3B(Rl9OaDeYP^J>A7*cFjWv6Pfta@m$`}+yGuQ zu`xv`bP7&sPFAJ_1mB^*ZJ^Bwpupj6!(PP0lZ$bQapQ=C^ z110`Q+7jiDqu+D#4axDGf*udozr8ZS}{wn%@o@3!|sig>A#V&fmxjInLb6|+Fgy2y(kOj_KW^!90Vw3?6#`Ma{&}&R04%N`G^$AeuywtpE z0(-C6_Km{7%EYCBu#|0}rldxA_kwYYuZ=myS^KQr(%HDIyjh8v%a?N%1r-7#aabT{ z7$v)dV=8ypl_LWPA=6A5+oB6Eky)%-yfo}Elv07Lb6^@YRBUPFmMq5WgA53GbakGMDS`-Z;Miir*Z?4}D_m{L)JerD z2-%zDeB3zDn}CL69RL&3#iRz%m=6)sB~;uBusIe2f94ZQhNn+ZqA;b|cRZqE${d(Q zv7E)0rKbHMyB7LOK%g^AX$N;7;8YypFEO>^HGb)2N&fsbHVxgoUT$u zL>ehTaI-8uu2z59FI=tCz$tMMjNvQNPg>#c;Tg2$W#ht`V6&*4Wc_@Y^SQk996a^W zBM9VC$84@`3cH}Ntwn2;$IV_va@$PmL;<{qAuE}Km=og;&v3}bdw6+CAq{t2s!HG7 z2vWr(Vm+}cgrJj;{=`8C@fHEl{s0P%{&91!`QSn}mSqxx9&GJGakdN)$cl4q4dE(cKGEKrH#@KPwvTpR|5b*hfJkOt7rSX6 zK#UV2C1wB`7y9BmMF(~viw)+O|J}*p0gl(@dHNx1RIL5(KvW!#m)N5vhWT6&WExPs z90~P;0VU2jd5%YToj*{^wgw8#mL?|&vg~Hftu9Z84h#}9vCtK$#>3y0uYyE0M^>A6ti*2qniNWEe&0N_(YtiE?b*W)@_K03QYJ zFY#=4Ju0%qx1pUAEH9q4`}ie?5eLKQ>1bKAF?L!~0-Cwl$pB(g@R5-1{FO2zv+rs( zuhFnJ_|FoHHkvyZhx*0JATavgCnOFtkm{Xi(P}x}QcR%|3-=%*Cp0iQRFmM+Qnt*e z?TzZ&95>OmA&I+lg+LfUX`zKUjC1j1&2DPOFksW&#k)=LNQ#)AK0#GFW?5T`8Fcz6 z31CPLwp&-pt~K}~5dwbn1PhDp5#e#1RBTTGua2l)1y zNh4^`bm88KLBbcua;z<)jb(4coKhP5Mde*N84(1N)m4jEu^}vBh0BMo5zh9M3gfZM zVl81gkd5o*dDj16C^K-N%UP~1i=o;ot5Ji6!X978@&pDsWbm727saI*aOU3~0`FUH zi9rc|oL!k=^v>*T_%?*YMYzRLZoEp6_rsw))SJeOJJTek- zodk?&2`^*2k{u*?CPN-&pP+JoK$+OLc?Pqaou`}}$1L*z&Arkn42!)%vlFSJaPPTd z7?o8rMz;I1_owJ7m(ubZ03mH6fnUNx)ifRAMXwvbuKmnN;}UFvtp+O53uGFpY=>>n zb3xM~^r)YXURnJPDGW$-4`-LbswMv8z@w&GcJ-VsvvK7&oxz=-`a8SD2ucEragMz) zDkd>`1qa3LSVZ1@NB zG0cL};WzjEt$?>eZl2G~q}QCwSS~auMcI(JvB?G&Rra!#=|!2ZFoCQZi`|_}hGRSY z(Ew%+(ypC9)ImdQ&5Q&xe^bi|^&;01Z-&Wj1eeQp87PM|4#H~ma$2)#kfhZbANoPe z6t~H61q2;P3CPAA@KzBf1{QINF{Vx(v=-x$U{kST9*^w%Qizg;MD~IkkFFbG?YJCe z*(Ba*KXqJaT=GG3C|9*}1MDdE_qCM+A5&$aKyL&$%XOgA0LMQzKp3EVtn_jlgeNVGX|y$T=o|tc1qPpE30(1w_M%+p zSiZg(Mlx+pi#t-nzd_QD_J;N&pdTnTF+)V~0`Ik{-FYz|kwprJpp@yRg1}^`g%4&@txBBHR6P9Trpby&ybqM{Xyf?$CqccGza zLfUjqNGqF>_{6hcbWSBt(lHhBjn)Z#`Z7$H(F13uI)#}w!O1}ES~T)1t^2EJ2R-D` zFn*DxP=W)oB_eED{AHx}!Sby=ikHUHs)NirfNid{8k(@|+JV`<9gnVohlpj2*U6b5 zYoLa=*XA__Y7qpYn`t(g#_|KlofjJ?N$6hP09(&0NNWBME5K?9GHwC67w1u8bln1_ z+Z~0Ia=}?f`*lM5%JQ?`>441iYG5w*EP5Yro=8~xiSOM(x;0glE76RPu%f39t*5hQ z94#z!Ke~?Xj|YuoDq5mMGOv(`c|_8xNW3ji_t2|cx4l%oN9tVj1v{02a%`4B)F<;9WuMH+hsd`Vy)@F5pz4RfhC1?d%tLOh1%&63y3lD`egtYz*PkG} zMD_*=H9*Z=yw0SY#Vmo2ED=_F~ z=CW{3%Q0?gr>JD#Pebhc&TQNbi~-R7BxG&0!2oh$8ijP0HvT_*@4DT_aV(7bH`d}i z%!YQB02>4WUS*uXi7ZjJ65X~)CtnW54;}(Taw6O^Gmyy0{NC)W^GN3vPFGdmFM|O^ zDY6YE%f!s|wYs{xs=6-0>Pye%$_U~hMlU*i`J~<~ZPiA@yE?sBIzQcWbji_q$4BVh z9i^)tsg7q=*?x2mR__7pY=J7rtn_!03b17-ZfK;h*!dv^gQ;d*#7VpFrpVSC8PQJK z>_n2GIQ_j&@(WT^@?zXG3f08}HroL!Y`>&mNJ8}fe)Tdp4J(r92et%w?= z7xdd8+qdOfK87}U8V^j=@)KQwF)C${$Q5b%KwZA6T+7jI2HC(eyOE_Z9E!@N2D!Jf z&MM+Dm;P3RiOnrKhJmXvi{e_ah_k?Z==LY(QSQyds zG;rk+=4Z~u3KS`?vn{mSXJrxLm zZvg{-A|Fel*7asNZ(Sm2c+;o_WUKe7dUL@?v1k=c6b++?I6pN&lnCnf;k@- zHME&DOF0#y<#a}Hl5U%ph8f*^#^FWa-~@{k9N%W5xtC1RHzslCVHg01N9H5OV8J_p z>?Dml;l|uk>QixJk~ej)<4So`@8uD9!SS;hP% zQIsQ|bp(*_U7DwO_kCSPuT*rT69ZdMqYp3!&-SRYAq{Kr>f1_eU{fb9(6v;s!ENLx zOrZ@Wg>TQ*wX>Ia%BiU#5=fz^%txgHpXeA88b!IKk7ZIIDK)0YaAcFZ1z<#rzDSTq zkyEbcz^3dr0jauuM9@~W-3OFtMLT^s%GhT}UQ+X5x!#n<^th3Nc;Ee~yVS3BDGth0 zN;bN^G{bD=W=pcw3d{_)RP&sn`pV>&v-kd`SqYpbmr*_%?R*Jv5dll$+ieXg-iCGs zna*OwdL?8STg9*QD)U;pj!b@cl1e||bVI<~nt&D^cYJL!L*zXm2q zvnnTXs*WCh9Yx)>cfi{})0`(Qk_n}SPhxiF@_{k<=EK?CWNoRaiDvkYf>BvY06y@O zQzwE4)B%PE>-wCNhdhcg6cEv(hLoqw!DeZq4+#>YR~V^clgCpS%J@^H5WddRi_606 zHz4}#%V-xNR{^w54eM2(|CNHr+KL@G?iQwGYTvUt>>lI&&qZ< zPoAY`S)5%%OzS7J>==UjRI58maydcbz@XadjdNN>{ok4JM+zmW>lYo3)1;{sc7G$al z+dQyW-3@hburMU$K1Ny>U@dic?pP* zAhfTpA2lIe!YI`5CT=Siic7jKQ!^Ou$$_x@9(K@TzDRts*eosRzHP?p2HuxNF#^9C zp&xlnR?+ohiX=^D{8t#^xUXUlAeW$0Km?+p z+=X{exnqu_V|Z=5;uy#7G8gU&XTrx0RU9T_pyzm~Xu|F0ZDPAP-Y}o{SSfLOjtym9 z=*(zLXX-#tu9)mufct}~z`GSe`$&@YS!jh0AzjUZ47%94YYc28@!?rf(O^1?X>mK8_)} z`&Cg78bHCCU_=PMPUst?gO1|V5djrWiZ_T^1_7<+QR>48&|KG6i`pQ94;TX#RKgQa zK}-G6i%K&t)WOb-A}VxS{&*7X6NiHpT%o4v#2iOipL7?UWhvWulNn}x)qH22py{TI zELo3ngF*DE%`Hp8+q%Wxi~90k^lEW7PKQYNp5r-^YhQg7B$V0DWSmU$q24P3t!~*3 zLai}KAs)&J{-ot09M6U-#&V7!JK&yK91|ZC^C_P#vY{u=Jx%Euko4(dat7R)q~uM1 z`odoHND}S2JZ=KDmc-E)6Napvq^E>P9XKlOY$y*PB@8k3{jJ633I~r0+F}MmJ>84& zco1}mg3)`)i*-&}hf>p*4ax|NP!icLr1n@wqNvF-hDwdk=9~E<>#a!E#ab%;2oL9v zqUo*#dVb>x?g`^W?Gy}pl*QLZ1O`?J^2HAmuJCSQ)O$D0kERk|?osL27t@cE&S}Y* z1(vg2_BMK#%V&Me^&-v~(n9zl&^h@4bkA&v(FjIAeIP75jbN}^U#FfEBx8p_B1?0- z1+4KJzqCAGj*>B59*80zZtRRQHzitiHMNpyY05MYRnL<1Bx9^ol7AwqPN6%7;{}uZ zM#*p-XIzBS5@BofAG2mJqy3ooK@J=xq~8S9_rmG#m+Xk92ps%7j!`SK%E`gN3s%y` z36jergq$G&7eCjLadP@=I4~5leSrrC$%wNlXJI{LjN{E?g)$h|$w9VYOtGbB5*#S- zRP78YS^G*v>k<-ae-E1H&b*SxqnHKwSBabD10) za>>_;6oT}+?#rKpUshUK%2p}+Y7FBkgjEyx_dL$tCZi+mX))*Flvc!O)}?7d zfu_w~8ejw{>QFB0moVU6JYE1-Yrd6uftEYX>O=ILBy*npw8Z?+k8Ex~P~Hboqbp&e z1cd`8W=@kH$OhXq;a0aDp*WQ5Z>Ddib;_YMC!0f4rSmRoMJ>04XKwv_FcAdh$ zO4?jeiKxL%lbW?^7-!8Trz4L;rA;rQ8^nvT5KuS3g0 zr|3Idzv;VcajN-zD}uz;wN+?^3F+3HHnM?~mO;nzc*uF)pay+6Z#6Cf6J(yUD0ipr9BnhvULnqWNcp|e0J_C4JLj35? z8<0KW80aQ;W`E~2Io#ynlg%5rsWLDvg_m!7<_9yp;QRE1s{tRAak(FawhT0{p9BKGUW%$znun_I$r$AmXt zK)9f$dkG()=*zr<5AOei?g7&Ooz8CO zUelc+L1IpuVNN?AUM7=R>QqoNd;aJZ?}`*{%1Vl!UkjO4e>IMaWJD!fwWhDDIk!9z zow06&F@mr7&RabN2w*hrE z8}Y`Nk9w)iNW9dBEM93N6R(2fV+BrEC*14ve|`S1&;RxLUvmBp5ndr1yCD3w9bU>H5$@vj<3j(~6G&@#=8!=*PoHe|`GmhYDLM2&)EK;(Ox9q8?!;k$;oGKTx);W*B7ow=aifaAjFJc6BNMCL@_$>b^- zE)=w-(+Fq7`OP^SVtLexP+kAWX3BCQ@An*rJJ*&-bmo9UE#HZI;RAe_{b zD-4!WKb29a;3I#Fg~D|+HZZuruqun!OKOW-VXL;hEXLXr9E3WFY#?MQ6%Xk)(Hnv+ zXh0<$k<^wl4X6&Odq|?OS%7OcB6u;2E@E^)=1gm;?UWcAgfyYw(OCl2Vb18&EQ4rs zA`GQil~$yH><>ZtN`{_9>Q>0Mml$2CiIVFyMkGcDE8@A!T^4cneS1yunw)xfjf0>> zuRKr~LKmAx<1`Mg5A>rfpj|jH5-)`qLm>N@z(=p-_$UGrX0qVJTaxM7gUN!ftc1$k z{Sc3mlE8TqUs)#_!t#uq(DFNQb{Jk_R+iO}7Nh^aXTXwp>xp6kK~KpV>|966<;Wzd z?TXlBN0@R@0YS|Jb9@XA07RD|f#wfsQB!`#`6*I0TJ%caCHHyb_c?@3YL8O*fE&H& z83X7`-36>xrMG;)>3*X6q1%QdXe31WhR0F}d+2MwTW&zJs{$?P`h2tm$0>Q}x4fYt zlvzOE4HK^lp^CyPVK16-irx9DiPnG9%O*iMG*n4;5Dj=Sf+~(Be1Eu>Q*Z?iPSRjU%++ zLd1&6xC|0l(f~)@9(JviN07Y#t=L^W9HoSE;so6FB6evUUMX6{(QuI$Ao1#qaH)tv zLj)$@L{Z70Hq~bE&~Zf_iJ3Pl-5719hes5?63W>{&|N zJVdK76Q|G!g2`h8;C#JuTGi7ugV8rxz%vk})56h?Tng=pP?1m@CXZ;&4!A@%!AL0? zxC?-@mJhuGkR!42Vp@RxnpdQGf}ZG*F{uJDYB~ohZ0q~FKBJ*ei)f5!w=ZoP9g&pg zCXN)o$H5nss`hHWa1cyr1ouE3;y?~jXaYn?gZf%=y}tih>wnkxKkI*Yc>g2$z)xfU z(cSAD?ArDpJB%c(?|(j%{Rd-3#JMfk`g2_LsM{TIZe9Im=k5$js$C2*MmWR7n&a77 zJRV>y5iyn@@SW(JeJE>yU-?{%%CGtJ5&NEaD`HN-_V5xEWb`2FK7x0YGw^WAmLOU2 zeoEuCCCB_d2z>ZSA_isS%e8@#E8QqA;=nysKu*!2$2fflaGDTEGaV#2FILu>Yh73Q z5)6A$-2i7jDCba&e0?qZ#^B6?##m4ht38`V}-rA$Qu8<{}=a|4xD`I3|#>|3Yms29mdAe%U4ZIB$4e`B}!x&dW4*fj01)@YO8Q z=k=oJ@l`rmOhkaB*|`!YLb#o=l*Sdh*#*W#l`qpd>7%o@^Ukg6`n01ft#G6@zMi^R;-}~Q{Xmh( z5Wdh^QFA6@R91mhtZy#%aZ6DJ)}j{Z2dZ;JuU~xO zjf+O&o9dhu(>ZSdmwBV@d_GQ#M*YpSZspy?vr8Vq^7Mj~W>yx#kzJ~LTC<@-t$o+W z^dvZjRUn`u>+-m|*un`m+G7Jay^TeM;9pcavBWvhDqGd~se+Bd7%0n-k=;01g4_t2 z#zS~sFjig!7qRlUQMtYm#K=KzNYjndt=bqlnbJ%!7UP-XiqEboPRSf@G%}05Rv{gk zECHS@5Rh%%pA2X{(XUyn z?!iepK)hct=+#skb}xKBCnFKp+Q3FXry+Uf^ZEy*i_pK8Auxx)T=iD4E^^d$CT}dp zNl7t%Mt$!T!*w-gNFSTOLbK_8tpL}KN27*!1pb;b>k@*p31qI@=>-RS#)$XZ+MQ6W z3zzvZY`~0oW>_qsFYy>31j8l2G9c^Kn_y`O`tmMK-s_4Y#SnS*7Mi94FNf2Nq87JJ zN%%Os7PA+m?ChG;1W+&_sgX%fwTcuG#nFse;uDdJvugwhBjB^>eS)D=;%JnfBM~Vg zwtv8q@zNuQo6&eRx;varCbMaP{H9>q$mpt*fzU&4Mdysr#?kp=>^K#^NAmlxMZzG( zK;y!~b3pu*Q}QCxAVj3O!vwd^!9hA_XS5Pg2n`_2j?S|1C}(!uC_C;vkT_NiP06*P zLyF}hO3u&IVG0XywkRlYnUyAZJRVd~i7kans|Im!;MXJWW77Bd&!7&nBGo%L99C4$lV_H=Fkv9|wM+kdS8 z{if|d=+gZo9DtX^|JdK#x9van4t6_h`;X6P{{dV?1RUl#U)l8IpZRP`<)Ia{DoA8( z4fzUEb|m9bi?NIlsN81{LRKI`fm*B8{w!;AV+D0rCT$h;{7UiJjB(pBsa7o|l)Ou? z3BFK0nMrnP=EWFZao6-qF!e@WVf9SCFXLgQ($%=o3Y3%5ZZR7Gbd>)pJUJ$@AKvz& zcgT#sZLuG~ ziaQz%`m`cgbRhVrstFQus;>>jMART;Y*EmEO0nFEl496yR>CEHDx_#&vSXdA3Bgr$ zNYUdVqv~anrSy`C?YRTYFAY^}tw=a_kZCpKF{@`YS`1<1D0K>|85pp0Zh{W#+G#b0OY2wIa4IW;9uYYU<1 zkGaJ)O?;3J=++us!>;YTQ@l4}1hOF<*~u&Hx{v7qY95)e=Tp>IXhuklJ;az`1xz_u z8Qs9oVm4&}M^9<^22jYZ5%vCZ^zsFS;ZMO3Cw8v5WTFNu_Zwe6ksyda`jZypQi_sc zvAQlMFN%t1Em@rDHimk7jbfOCJ`s*-(mkIaI|5uyr?F7bZ_%Lot{PSEoaVwt1k{`s z0~(q;$<)x)-L=)ysizkc^#WZPg{Z&W>Mz&&Gm4kzw%{lTyavCp0zKKZ5C&-5rrQrk z$s>+)nCyVC&`pfWdxNIiqs3%SH)3koqN_grG53Vyk|OtqJr;7lp-UrCnqmnk>Sm^Y z5P3=Kf7D6SclFb17-}R2J+I#rMl@jQq+zPh+{vnXjAYWNh4K2!Yl-~uR!w=p-Smw2 zisQPmT{p|NjnQfS&GgUQ2qiw@6}3E!JgMCYP$h&cm_bcgi6gn;y-SgyVJ>q zqwE2&;+Lg2{W9Xe@3{KE?ppu%S@eGgzWDDC_8;u8m4Ivg-xemS&+@I0-v6t`f8Xn@ z^WOjL?RB@i2j8ymf7bNhrvD!m1AbZj_wCLO(EslKL6>16;|041-JNy( z*UxqT^NQA(BAdXj6%H+^qo{@drjaNqd^=YMx^r-Sw%+Z}f5GadlX|NV9Tm(O+niyO;GEOInT3VuI9 z?hqDhuu=A}v)#~??>R26+Y5Or(Fh0n2&s4YJUF{9lKk!l!0Yq)1a$!^u%b{viB`h# zI<4V7Rx2&|<)zw)<5@f^$Cp-Q;0Z&FASx(q8D1vCw*xVTfgF2vba@0V0JeCLeU!jW z3Hy)&y+Is@lMv*~vd9X!M11xf8Glp63p+B{1i>bRz7=>k{X1V+d zD7}u~n^TMP$l7pz2tH4z7`#LqRio4HY%+eN3B!(uxF6-%!dOG#PDTpg=)}k~21wuI z`)NMgeDGlJ+s!UKU7WqW-`uc6qln@#4S95>=NgD0wt0@Lq%8o!(!x#dab1R++AP=+ z!yRbqhwgSVs0$>^hR?LJVwm-&L*wc=nYsWSACS3%%PN`lmK@v!uAZk^u90@LXxSlP zSOD>_ZJMpQ4VxR5M>mH|pI4{&&*F96Dy}V&SwVfMx7Z%;G{tPSTWr}Ecui?7eb@J6 z^oARGLoVrg-9b`th2lSNqK0abz1JSv+vji(z2R};eIclWiT6D=(fOEhU))G;IAa4O z`f+(ZhsaACA@6p{@H1S7T}8wh7QBtxS71b>N|bIYbm+!9p2a*% z@Md5C+?%k$Pf@Fm?yIfX3$u-@nSWI{F0z({0qk$GXNC z@p2G(NDfl44I+YEXVB?%HfkEniBY{Vj>?9z){aNSWZ+VjN}uTB%Er!K@pV#+Lp&m< zV8Pl^tz0(K!rE9-pat>Me2KxLpC5{tkta{GcM11z6}HV$0>Ln-t{7dK=L?P9vluE3 zysTMljs1m{xCIO47dx|M&m?KRB?(bP~_!$OqdLZPV?h7hn(!a4R$^q|Y4H z7SzDKNNo!33@{H7gXp_4dgx?mR;NB9(U0=)&g39IXcmX$;^g zL5xjLq3;0{8{GO<;B6j5NC6cph^BJV^I-R>2!UaqfYEoptw3-#N^t4}g^@IJW8m0`8nn#v=I4@Vw956TapY>`N)fYC1FC7~5Yz-g>4(%(fhh`;(C$;BjoDdTsx) zzW==g`;UWe=fTd}{&Q{r;o5(2^S;{!0V%it*xBCQ>!SU~EG)2c*l58HO5Rnz^J|+`jw%=bS z)0%c4{yMoxuB3!V5(VQp;&e)Kn-qE>QUFFG#`JeM%o?QH%g#Y>OZw%@_+5%Fwb2Ar zU}(njF3o0BkP+GVSB++pCCg)(td!*j@)NeK*d{F*D#1x}Ll?U;y9++qWiSeUss;wj z^;&aRY(*F^G8sL=$dLLcShp5;thAfuo4sAIMa*#x2lPtX#IO&vxYPh)j!Y#(j}0+3 z9y%GMTx^E>ZOh0XElnCkaaJ32FKV~j-Uu)lK@u^Dx}#3U@$_O5UnGNcWQ7h{y=yyy zC!jQegUik6fV!9iNEpJ3u5-ujRT~uG=ZE?nw-Utr_$S*AIc^_#0>@kabYnWl7P(l0 zegOInyDOSo)4tY`EwvdBtr>XqJo#i17OY2AhoDF@l-s5}T&;aulc3SphIQ)+NotC$VFll{($8?9^c`a78++fI5@L9{ zn7+;XC#S9O;QPjtkp#jx{!-H?#syM1*it49@q6 zKioWvb5LA~1A&*W4PO0Y@aWmYqocv=Lo`NcLmn}}tE~RN-{fE8GQG*aX`DRV{J-PP ze-BRIy#KoS=B(bts*ew!1S&TE)Qj`$>5$cJ{)xfWZ6p8sO#|XUKs!&Lzk2!l*w++m z2ly(i{^ouA^qVHONGz0i+j^6;4}46I9zJ{KX{qt2KmONA`_22)X5-EKZ&dd@p|$K!s%1 zlRV6P2;9>mH}hA^c>@FD#t}@^Ofi;TZZztf9G_-NA?;~_6%}UOJc)V9>iy+t>-gw% zEM9=I{Bl<@%pjF)8?9W~<}m}WsfcU*KpieJh`B#N5;PF;spJYX$<+u5hmoLK7%DNs zB=d_&lEnqF*_81H6UaaWDoWAT)qqQ#OyXh)_fk4lY~xEzK4ZXEt6{>_H||K~z+u3U zzuRL7pfzvQqk7XDdtQXa;nCVLIJ?$y`y!hy=8Z0!66M>#Z;S`Vlsx?ggGbu5T87Kh zbh&r>2ku1f!-g%JC)95u6*awVr+N z$HKea+V(iyZL(w(+Ge0O*e-B)fTte$mgp0Ze4@zc;Y>oW)bm)(4=4sLYfmpJrxM8| zRK|yGXWJRA^-7arNMhsF-q_e3PRGRYnCAyHKpIYxL<1C+j8O}!Q7t!f!|v;Azn6{}D-ip(q-o4_DR-_v`;#a2WKEBN2VsOcrm;=0i4pB{MiJ8jcsD>b z3umDgy2SLg7X14Hc7ZGoS}QkiNP6zclvQWL^Syq5nR+v8)rnsFzMvh7?*lt%e6JaP z_+AGZLHlHH>x_u|-a|Z4Z?}o}bpd9to)0A(;l^{?*++Ht-CAv@uDnal9V&68YNAzV zM*`xuY9GB7t8K$-Td-O4Y&|_inSedxE(U)&8a(;w#iOuZA-UqRfAdd?6dq>#j`~Bjcg7og`1#9F3y^v(e@1` z%Gb?5y=naEq(H^Gy?<1f*Bg#f0<%cOy}zQ(|(36L?K=Y-iwAKNux*v`eG_r}n!$ z5Hs~Azv&b2PPTq^26|xmwKqoKUW>8Ruy|$0^3%zd8i2u0)I2LaaMqrd^N_M}ys6gO z`J>x*FElJ#cXjkLl3@UqQ=Sk^Rr$&wRaG2_N(r@^a4Fx$SvGrb3|kPP(73f4l?SfX zxwDaL%h^g@2-Oc?z_`TxS?1ygbnV7!0r*U!zRrT&Le&3) z_<66E;nuu;skHrNvG)PVS}}#6PO>%@X)Oc(8fCzTe|xx0Oq(yx#_7;6rfIj-*i=A9 zGNm+X@Rxa(zGKG{`#U4o)?ae4Y8?LZCU1VjFGCt8||7c$JmxTA0fcp z=^d8=<1a{vaCWD|8yYJb8hMyAEE&3&Q(VT&d0Dx^4=>Bq7;w;&cuw;7H*cDzh;G=c{7rz34f&{ERM>7y|>K)Z2^hML*JuR&-DM4XGq+gXfP2KOR0r z|4E+>*}V_l*8UA*UHhBnf7__$H8!ne_GJ$?2TxJ~CoGEpIlL;s?KfwW&<}@iIzvRuxyZdYZ-_PX#dkkwE z;T0~r@fHx3JWcY_sPDt!0vNNXuqu-wdis{$Q>bDQLWYVdGxQb1xqEzT_2}#>$`1c1 zMnNx+pZ@juA}sS@ zOua}O24c_;uZ}l&o5nCUELSHLoKG6dQ|5RT*WO=2DBO)_D=w0|h}Hz%Xsd2V;KRTt z4pDBJ0`9bw5R6_-D}5$)zASET?;;p;ifB$RAd4W*n^D?aPYb935|%7P74!H zC@>V&h&bbJ;0Xo`lPGOEo5(m?h|VhbxQ*;v(e5(bqhX=|01fD)I;ZgMR?Kdz{vqS> zCzwiDqf-G?-V)&;O#Ezdap_wYL4!6tIg>*Hje8L_mvDIVbHHj^ulevD6YMWADCqy$ z)uN0+s7Jn7Ks*ko7fg=3#8q*^Q%&NlMz@6!L|8>z45gYn=BN1pM=}tos?xJ-vGeTs zc?z!?@+LgA#i+d;-9)H4y#cu~xT}L8z@q>f8ZMGPrmp}y)|#6cB~WuDGt;tZX%YIZJ_0B+IV0Nv$#Nq`lip%NE1@|@uo4P8b* zWjS}ap7o0w3n2SqBn;R%i}0$JkY7DBLfJ~`3lu<0W3d!SoTX{5BgL)tKWq7~EdJa6 zgYJU|YyHn!|KsR??2u5a=z+@hKL-c<+uIobZLhnxyM3_x72EyY{dNA|&!+#;!Uj;# z4IUK0i3BIKKl3aZf_(Sz=y;Q^sYDDo$kw5SAz+nW%nHyNT(YB`=P(=165?h#9}~q^ zd;(xN;Qi13m;^t!YfsOOUIEjpy*xg1oIk~PQ~O>9TZTUJ z??&^YH}ixd0WzW}1#P(~Ap-+8TFNvY&~)07R9aCM4AiY4=NU%(-i_o7Sfw;B zB1l$K!#4M;R1mF*NTA)3Xr!+M4A!O=T+Rhy-b1;(E(ml!)r=frmLQ^a>it`8>0WI`cQD1jr4YwINl7?kUQyzSf2U4muURD01$E=#V#kjQ~kCn!) z%6pW!Ja5kN(f{I>F=y96QWO|B$5#sk!j?YHaFPf$;@!jKwv`tWGezs%}0TT zwQd8@AC>SwG6BTGQhJy(p2k8~O%d?o0QwKkV=4odxoUa^Hy1>{WY zo;a;~@wQ&LVM1xi6+#6F7(Hpgw^pN!`Kbl(C@i@Cz_W#quNhpXBX%B6(j0jTo4?P? zpl%#7Ud+J~`b~j~VP;+u`0ohZ6TrxXU30Dqyf+1^s&+Gg+Y%CBTzijpPY^}_rdG4n zK5K^`H|S$C@!kr`L%g8;&y*<7uWO*@;-AUWqKsfg^(gk`*HDVUb7i*9du zYM?0@UD6Pd@kgZKJhx%HJ6jad5J(y9Fin~$<%DTGM_TjnW(mr}Xfc_T2=R9=wFWul z8%stxD7p;zd<|@MiBclwhgi+MI!Xu!o6?Ix{egth) zVieCpRuRFL21J)grs2dbaO^f{g&*pwB_ho2Xep=pCk35 zbpoJb=;6_E={7a{*0*b%(@@N5;iz5%F0+;+JP)g#41DU!qTjn8SCV19q}U3XTuPBY zK7_w_2xbi7@)6{IN+P1H8(3rRUPH&ys`(d`daaquXChV0lfRMPEsf%Frz=*KHZ4uW zaR+O+>R8HF%{#p>Wbu}VO!3mw@_;R>b%-!9$P2U~!y1t8+sskGgrYb%%BYC=P#TMW zxH&bVh_cZPXW3{#ij{^Du7GnqS`+}xtm@JF>2`s73o?8yrfn>YX@v9#pC@#KHj2GSG+idxcmZ8kWFq-`1w838YOK2B@r5O|3_6>%Khf0554so1A7j9A8-7j@KBqE*_BzB z!^?C$VvG+1Z^C}S_eLH5TMvfDqxwtpkrC=~UlL;Wpd$J@TdbXI)$h)&bDpx%UN|1K z!g;ZmRJCTXnzfQnKxwJQh7TBK`AI_S+UMFklI3_8S&NLqCoMwU#&b#ZsjibB^I5qFDvq=IUor{ za@K4vBRM=o9YncG;CcyY3x5Y14ef)6t}N!G7@=lD_Dd>Ai7BLW<=GY5u5`DL_eyq%AbN5oaoX+b9 zA62iJ!!ng%{-}G79>u4bS*uajYLvAaWvxcJr5dHUOd)<5@tSKz%36`Krd?}A%BNAJ zXnGMQZ)UO3nqXW>2mv?nmRPG!e$}dzutLp10t%$LUhCh2nrsynSVaX`74=s&<(DN< zGm)x-G_5)b!>58!{cBT3fer+HbI!(z+_2sREFZN?-^Jrah1yumzsdbakjirr$PS!}aMFW8>)ZZ>6$l$JAO~-?V*{zGu5_GdVv8 z*D`iQql+0i^-m_V>0gdmPybq^8F*+=XSr?AiZ~mc=NO@!%GS?qZex8W-tJ5UP-$o1 zN=F}N+)fOHD>6#l%dwzF6XK(>UFZ}i3gVh zT4$BmY$9J;t&hHI)Diwmx^}E4B(1L;e?wP}He|?6BdA1#D1Ua-2rFu0K4S$=I;YV$ zQCI84yTH^FM;20J3y?+TjUM*QeyE-$-f}bn<#oO8^0g&aCVcQBW{0?90gRbL28JA5 zn5ZH+S{5h{z*UP5TEemcznkI1hrqfGY774Bj0W2Fj>TwCuVr>bUbTTX&g&nH(l>2* zE^g|z(#|wz+A`CFun$iVV~ln4Fw=%8P5mH^D|KNA5`3?8(9v|zxx%QQIIMD@BeKC_+>SSEr z1eObP;)d)~t|czDEA(-^AFo`2%~XlsEgjq0Y%v{)p@l_{B^zGCyL{Aqb4AW#=UZlP?DdhvQ-#EiAV{nYAxG$s*O`5(Zdm6z(H z7DYX+ye3PDpk+L3CA>G*!aB^v$7S-D)`+V_(i;gQu7c!RqYBm62i8X(SW+$f363nY?OA4b#(@5H{g&gV&5nrxKWJFQm_wtJo4Tbd zq9evA^Q%5Am(S!AoQ?$iDnytbtDT9OT-Un;T&;d6F2Pf>P-5qUy0d{_U;D0Wz6jhE zi)NI(-MZOQ6K}dD%4;%IP!o9f8h}#ER9(dcRVr%|yqEHyPNOhMCTB?&*6ZF%vjB4_ zttBS8OtDZ+ue*v;0eZeoA5afF!MUxfT9Ca~R4vHz%T)`qXsK$U{Cf5`pjHSn$9xh? z8DlC}H)EG*b%Pw_D%_*ipxm4MZ~aaEzF!d>l$I)q8dcd*&q+uKGaSc@+{dV6%lL61 zJJ49kQh`9Lw!+a%SK^OF`WqZyPlRrLoj_nE45(Dk;CJ{|aXcHw0{3R-5|$FHIH8UM|~)l~_z4 zu3(S1!4XGn4YyP#wcbv8U1_?4s4kjXi)VaPrQm`UCu@MSmIDH z!cP(I@fG&*g+_g}2G`)RHd>Le)d969+85`6R2NjUXTZVY&(G0IZ-gQ*&kF(J;o((I zh9A8p=0|oglTU2D96k?%R8@rB<(rrQ5w>#dS3Mm3zISia>8Q`$k^&xs@$|yz1z$7* zZTj*kgdOC*jHp7tjzuzzBqW&NQ=_En%B$zZr{^(TLtz>|DaHFs{4PGike*xMno2*v zPy+auqpjnkFPZ-Rr@a!_wD4EsEc`23DrpA%=~wHwtU5Z}MlY46Su23;gdkbzdG{@K_yFUPpV&fSpD z8IuBe%CbpHSvR<(CmO_rNu0I83_%4$ws;JBS5c&tL8C!D5v?h#t-^fWr+)9lxOlqR zI70MpMfXMb_oG>HnPl(NJeV4U(I$cA^dvvNzoe`sSR{CTLs>&ukvo=igv!BB|Eyjs z3b>+8j)+#G77hyDt_2KtI10}^gk=eqh0}5&)nE;Z#f(Ig6);J#?NKp{hR|Te9A1J_ znP~Jm5oR^soUeuBB3x|=wNN&cK~u>Qv}#_V%}k_O0E?PLvv-g;b38_90Q{E9WSWpA zRtgWd}D&PK_+xZFeqB_ls0EsLKaU4g?8NK?@FskK<1a8KfP zAmFBS4Wo*TBw>y!GfNm?@`S{5sm}Ce0MuQ$b+0PrqC_X($6B9gk~LoZQ`TG@-W`h2 z<~Cfm`(PY!_5#k%&P|Z|(&cGmIni9N;6)Fg84&U5w9R&so=?sKDV?lKNE8_$q|>4W zzF(%2;)kS&z-f#Q?W(|B2|5ksn%0V7V(_a;qiRK6p2Iwy4#$f;eV2qVpEn^0md9C5 za+ACdQ52rN1^IFq1DwQH{sC*SAI=t2o(|Zb7SC|8gEIjtL{tHG6=n)to{kLh3QhpM zI$NS;=m-U=((O`ftA4}# zZfCo@{Z(}JIRLN##ADrm_51v{{UDkYDU&|B2m9Y{@9gjG9kllzJm?p_-BzB&+3<4fzYqGqz3X?{s!M2RpF;_Yb<=ucFR+|9|p-cop6rUdHKkK#MJGaY4nRjJ=Wey587T%|~t2mGJUSOo9ji5qNuVE&g4ot`Cf+rw;ij5w^ zoM0qeMHrekPdJh2(sXb*4NZLkde)W(GgFPhU_Cwq<=Fd65Pj8@$#xL+m*ZJHf>zre zSs2%<6&!WV!VKPOa)oBeQDZopj#5fe-dw(j>fYu1I8RiwM+K-NFRs-ciTd_vHa$-- zd|yPZCoxRb(eB1eU|6q451+4jh&i}`GJ{zWZ!8RzeB-vVd|F5L~+3s|Ax7Yfgwf<+l z{-0I<^KEzc+ugmj{%1}9m(l-~6hce%KWw+}k^biZ9~u4M-rm_;>wiASKPHzyRErzI z;tEL{>=K?Gz2de4Xttq8!;MC1J_Hk<>-I)%qjr3m=A!02o4rfH%4RrYa^)1_e`9NB zi*!6fEe{2y7(&zw_6V_uqu3fozENA?*JttcVi8{?(NU6t#n7v4R?LR8anxX=Yf|OQ zG|2!{ui4|A)JJKOvw^Ys!RSaWfc+;Tv`wT<94t7#r>~;$C@$gw*?ulEuo&Wsgbh;I zuwJ23iyq-8hM=IwpQh=*7UD%ywwR0-^Km)^`)?IC0OZ}r+7}qECSp^Ab^loFyuZ2_ zj(i~oBwwf@$yaXQ@ac48P5#&X-`Ehf!>?Rz!Cl@wmn2P{{Gf{nWm^KFiV(ztIS>n83{a zDCYUJNG7f5HCPHet&Q_}TY1e4Ch?R(PqLOr^2@V){+zxoVQwW?D;Zjt1ArxgTSN{Y zNYwBQ-!%s4%rzJ^q1;A|d@lUFYL9cB^*To#hvE0(MmZ89uMHW*yki>AE9as?i=lWkurqIqjzZxK+~(fcJ?C}fX^mE0)snMNRpK07-^7-4EU=) zxQi@lZMG;bW_D|d+gjqbcAUXQGUj1PM%+b%t=Sx0A*O7K*TZp=n?Rh+qqqRm@c}MA zc#z>m^mN9=&`eueCVDQWBe5h8MOKMiTNJjD#O~>N_l*#SL86^3rYWu1hM7SI^0BaO z56Gp@BzpR|bTYtxG0^p*WBP%8rHooK8F~?qv0KPgmE>^kohNYdEs%m!J?Wsp)iR9@ zGp6DSGi`YWc0QiP;25;m?r5J``SEq0=7aZfsukLng_;_K!R-ee%i?kR?<7DW|N#UP-x{Mz||& z%0PeBlU0sS2EyCYFYCK8PDr_?jQEQ*8ad1`1_R;49tUthQwAjKEBKD|z#+u`FuaVxi3#v0MOU}3 zRzi{z@o`W4Xi~!-x*+nrmw{4=w-j2}h-22=AGiyMn&TOTCqx{<`3J6M`cbiLr`Rw8 z-UaZoxa1ANH%}D;+%xdwnJyqQFVHesiRZCwaQC66rvVMXkHpVtKFi53SJZ65Vq*}^ zuGNUP2G8TGbh4O;<9x;)_M(QUyJyw4m+0@833%_qyq6qz;dT;lJE4P4UlrjcWVS6< z9zEA$Myx>@$`)McQsv=C$*LUY#Gw+1DSg?lvq|}8ZnGmN&KkxDA(W}`&PK8Tq=Ox; zvne~$B?WV-S29JfB|;7pk)YN9o9RcLc4xzgW^3T22>alrBqq%CMqOfNBca<|J05}G zy5745haW;*6ee*M=r7P%2mZA+R@uOS8eo(bA;~3;HqM2$+SsUh!y9T89=L2o!hzMO zr!HE)iOKcNxwibpVXOD$rVkX7+qrMmE2WM>h6GfLTO^gPbq!lvej zx95lx$2uHsvZXd3x2Hvqv<~N*Nmq~;$K#5i!5N{cAMWtFFZ*u!3m`W1C0_U_J{v;6 zP&cUtW-KxYK`{s~qvRL+@|Ta+n5qo(aY^~aET7~qI+)y88%HsK81R=EgjtSkqgJy0 zl@JtwyCnvx$OA7-@Jp~$N`sa;x}JS=?ZDXhQ8Q7b$R?^ zW>&M7;SoLB0?<-{iFG@B&pPpxUR6Z;IKe0&vfLx(9i*0sueeahcZ+e*8{mk((>7Ai zcbJ;z=&LM&_%%@x=WmIXVUIBIvC;}_I|ECBe}lzrSOBw@8EOftA7JRkW)^V8ra~+{ zlGFe-H7Tn66)0IoRb9$zJOyf0qqMi7wyRQH&mr6_S+?Y3mQ5h^df7htvfX0>4Wo30 zc5Q=tSFvx61;#b(`B)br0A9pEn;KtZ$rh&(gG9(obOq52pT5Bj``bT<50_jHX#tB? zJ9N(yXpRPyl%KqK&IqM)=F!dfcZSM>ll8k7doFXC(UR*>|SR${p_g7;8a zC!vj~bjT|zCqylkWyjJTXw7=$aSB+8Yhv973+7k0FO=lsTFxD#)EZ%1jEZG%lgvXj zaokX(f>uRpRg#kDhG3Q5Pu}-JgEAR9jtJ5=%|WJeU>)eklrg9T^Qwf zmQc6LpXMV}g8J{jf@>KZf%Zj)(-gJtLmR>sjMpPZvlR^;fDWgLBSD~>){#?dDq$I&NW3P+!Ou^fG} zB1dmCba%wkCn1(TS&5}jJ}XO~+&xR5ENAJ7mp4n_lBIVRGdKk8i!BJ;9ZR3A&eA6z z&(bFqS^DI6$I>G+Uv@TFEWI*~A%hPhrwY5c+ocE$Fd$d2YA+kaJJ)Mqb?{gd+6KF(Zk9u3} zcH89-#>upy%s`{>qHUb2dk(?G{$#bKy*V)D z)<4cR_8kh47|g;2Oq|9x#bWw)I(t8@`$RWN{90rUt1i4Z>iQ}xOX4z1N6I*+R@lqq z6pRGfR{=)P0Q3d<`F_$Xn`R( zgijc4pw_oA{=_4j{6Q z|FMq$@#~ELwsX*3$NyNz|5z6P!-^kJ9tWg6{zrFrx4TR6-}Vpo4>~(vb+&g7cGvOW zK3DvY$80SlEfxe`1$kqFK&LL6%f5;wcvMHK4+ zO|Ml%v}7d09jqG5153dGME|^)eAS9D?XWj?$HVD$%NxMs?qher|NbloC1Pa=C8COs zfwyXqua0Y!oVAb~;%foqT)7F01gl#-uP$#0l|hv6ZzPQ9O@BsSP%V?=f!Zz&p+J18 zU%8<YTpKwPb=0vz|B$4n8TSra7KIuANf+AvlBvq&qi za#D%t`6V`ob2}h9q0-lppVV6}M5z}(NlK6HaT({AgMk_-WV|#6`Xeh4kxGVV!Djg3 z9Ue4=4LLqz)K&5?-cgHEVE3Zg**}wEq0%p#)&>7s#F(+AymMjSn2R_a4QyOR8)|wl zM)mc#b7}J7gKiGn5--f@z;UEQ&ROCkl!n!@MbwrDKX4bB?wI(sTtBKuCy)N|=-J_E zogCd^K{*ab2^)IkMG$#^WaGr2m!gh?Da{Z1kO*;n$Bh+v0@d4-`8X*OE%1aq)$r8U zLKUea;1N;uTua}qxi=M;*=%uf>5arBU9OO@>*q^!mK5)k1ka&K`C)@}U(sjHTehJ^ zs?goC;I05m&_w{L+M`K4M{Yo15kn1W*aCYT^N&R1+3f9NZdm~VS?NVj*!Uwskd0K< z6lN+L={8<)Tc!=WfpkpT5)?D>n}8s~!xX=^zms+#Xkc88 zLR-)T0<1Q1xCQOXUW4K4-D_|>p-e?M05}FJ)we(=&pbskt@|(_?6;CGlKr%8w^{{ZpQ-*;7F-kM~YhKnQPt2|^N;HM=>1 z(bXU~#bs4Q;Qfp?(=q^RJ@X9X9@8>8kT`>oX=Vun#yE(+wl~}M2K5*rfy)`x(-(x0 z)B{`mAm9oQ23)~X%fdX?mr8Rj?=9tyF<}X@OqACwJAo_UA9g@;hvadVE0iV9ve{$k zgce;w;yL_2&69HFvWb{@HQ?t(nDAA~^}OcJ)J!NGYhjBFO6+BV;4aV{Me{D%hG4(e zJ|2o3ZPYQ*eg-ENO=>xo}5Xx&XyE87e)2`W^$ViO+gk~+W0 zzzB+5pp&xNQVCVBEaF*_BsGbd)XBC56NV;?a;J>2qYYwiNT_(QGHhe3JW9_|W-3s^ zOs@-=y_ z?)x?H8`8D`O&*?33B7{fkpgcePRD{%WocQ1mx$|#@x02V=(owWB9#3y3y56rCKK+>*|DkBeeDa z13w53$%xl>dqoJtvMQPoR_v3gLY(e{x)62x-clO^`^i*?us3(f*l2bP)%Y7VC9dnx z)f|q7!%=Q5;cz59Wd+$T=5MwA%-$m{<6nrbwYSf0ZfJe~zrO$fz2ElT1Eb9bnGad(*l zZk03|_YCp>eZs_2oF1j84iYG39$01$%Y_SdH`9uNM0`1FS#kB@ZiJc@B3_kGyl zdDNfCdu&(@(k!sw8RYLvCvZJdpth8&U+QA1>%FKeWBk{tp82l3&7YwgFbSaP{FTk7 zEqdie_hm`9=yPm(L#BNv^9~pwEd-lzbhapXWPWs=z&24i!rJY$ABYj{@kd`bk4X1L zy*!1$)(qcx2yTWgLj6=v#V2g*Ku)&HpFK^>)_7TH4omO1lQ6D zbWe1Psu{}59Mp0d?C419NcjvDnImZ`KF}C#`Z?FX!3a#XYOsaBq6+JYWlHoNX&u?$cM=gw6sDwjCgRnL{*HgS zo(FV|MAK0hAZVHh?9b3oQ)-@z>G;}`A4PjwBRiH-)mJ~)=xlil()VpJPuL+*T-#T! zXURE0^L)XOiWdRMk5l1lxN@!*6M{+)x7#X%jw-j)3IM=g#4|&#go?luq^kB4k5M*`lD6S1_x+& ztyfkJZSJhF_GFcFW~j$A7VmVd)+{>1h#d@;0!?!>(4M;K1kEw)7c`rCkQ3B_?*6`_my$b3UXS5hom8+w| zdj%EPG;W9#o<-TkO9@6nrPR$R{{+>A#ttuT`=AJ~ki(f5hJw~Y8HIQipJsI4d(jWDG-_qkny5nbZZL`GoEOGh30=vGEb)!rFznH( zFv2YxmN3VV_KLnh4o7d(xe>-8X9p}pqU&fr1M&(6ACwnqlhUg2TdeA_3sB$7UCf2` z0A9s-{ovW4p#qzm1{>eNxyOET#U>8Nv$m1k;=C;$89}vt!umQbEv_qSqi^JuSE#g9 z_5vYFrw#8n9;F3;H_O|TH!7a63J&Z9$37l2WEXd9{2nA6*g)VEw<1obM)a~?AgBgy z3o2BGVZKQjw|?p#1)K6m(2>z(&|X>$QQH5OjuA`U$q=a}{|Ld3vMf>x1nyV?MYSu5 zW$AQij~m9Ap+L{zt*eT|W1uAZ=c+DqBqONsRqJ*_v~-E6qvt`VeM5%(!i4y&zA zJO)pOLW;MjLu7oG&Vo5dGacW3G@@;&-rDct@gm6^O)EzD6X41rZ-B<-<9VG;KoYS$ zN`Mhk1(9`aG^WOpgSEBekgTd?hPr;Y8R8A}3Lc|6x@VLpGd_`JAH~Zt#%9g(2-J+F zJe1yf8P-WE^8)T$AgBrs3Enl^sG@Dynnp2p{W1nEoJB(| zyKCvb zFii+!btv4W0pV&s92G3*VKRQZ$CXSEmrEv=+z>WGvB_%A9TX- zR8zWGnrYoMwYrM$3684;q@Ht25Rrh*VqnI%*{Ci*_qIQ8%S6 zc|UquL9a&hfi%y~cmimF{SN4ws2`oY732u<>Mby-SmM+{q!SkomWEwpbUpegaC7t$ zLfI#cO4T~`V-Fzi$6${@eAe9bPO(Tvyo*HZcaBV+u?rLUC|{gWgeGKeCvh>n{1gau zcjypLuN!WdWuIS z?Da&T3T`nSv6H?Jx}#z$*Ak%2s0~t4mPJ&pkL#Yr~Zu(Mv{*fZ7D6ar?kfB zit^ekUky~up?Je8fD9|ub-6FLUmY1RRx|^Ty@pa$w4zcbw?{v4p{(F1xV1Ijmcl)c(c4ud2 zdu{*yne4y6$Hk4_C1~m-)|e+3$%ipGc~i_YU~Z4W(Qh&vEkHbyR^Tz?Y0o_EL<`hc z{uoa%CEj_SySrf@Rn&XT>~22i2EHo0KkR#6%Uc z7~gJ35FHQfG-W@Hb0+nl#lyGQP&^{{QJOXQI5VN*^~2}4Ggbd*4jLe@-8yh|_sJ#gn3=6n^na}2=Qm5EBQGucpEGhva z{WmM*uKK;`5YpM4T^C7&7kp8pv$?&u*MvwsDCt6Ay0f$(h5bft@apBOM=yVRaXdJF z`2Dj(PzTT3ahAo`jT44%jkMXul8yV_`=s?qfjvcjy|KNw-#l$aBe1&evw91*I1ie- z4Kbnq5l4AtKm24xe3s%O?ha}@g}6!+jD4^a(Rt6Xjapzdn$51#3FXTun0&}U#XB~} zMCQ#UV@aIyc$=7|5E+Q#CNd(QM9JhV8I4fcfEI39l3&g++#??tHFJ`}{cjhfeXIx1 zO4lyBwZ~&bbg{;RYe(>r=4o63fT0~|%3xPU=9I&D%9@KL8ZyEDI!eyZ(_zX0J-+5p z`d=?!EBkAPstdBsq3$GnN~s7LLN6mEs8?o~#fS+jgZMXrhze}I7!6>fiG|burXnO2 z2#6p~utZ1y^DHfrauZ@6lY9ziIb1IdX4znp6fqA~@CtJsc%8#F8(woWER91R>g+Hr z)Lzp6ZBqO~*yK=`?!iWBh7-TW)B++(zF zxVqf>^YQo1fVPDvaDXNof!SHNGmjH?DzOE}5IE=opTeHh=ZBXP`8*k>3`tq%RuT>F z7T?Yn42iG6ZTmhO{e34j$1^pXUP*KE3P_Eg%?NuSYZOmWoLpYOoi&Gm*`~+v>R$9B zdCv#qTgxf~syvctE2`VeTi4ftC+oUgOy3UDk#0E5b|&}*vC~4>58Jgko-taRrB^G} zOkps%>-)AxJ{16*OGfIe+P3gT`QE!B1b`^ml#&SB#HB}Yfg~u4AS{+qb4nHx&@Jr& zPcw8pDdL8-Yz+Qiw8Ex6#5v~0gRY?AkW; zn-?v$-=}a@i^QODA2&oBc*6TlE2W^Sfnd!WP2t2Bo39-M@@o%ehF+uDJef9R1Fh(O zb{}s<&jSW#P?U`whSx@nKKB}s<(KgsE~@bY7u!F%kMHiEHUo(()n3;5J*p}{-Pbug zc4+SJ222Lk8IF7g-&eHbqXt-BZ_cLU>)>Dv5%T~JZJA=Hb;eJZ6@iSr(DCwjvz6_M-Osjkbc|L@Do>gFC@sN_2Z8nevS*3V`;Kl`JZGNc6HyRhCfhlMJ$|VZ3$} z&e?pI6YqP`Pt!SI91#d}KxR=6XFk+E`U%+=SA!OSZ9t9jL3n`9+MtmGww*0N25rcC z9#w)}-x!LpvSv@t^nz#0J0>*`-!_bfn!dUv3}{6vUQWT0I(W%8m!qnX?*Qp`C*Ssy z^JXRPZG~)M2_$-Uh_^Yx^88>9J5MT{uo26@7Kx++y+}zGxRqH^Px7=GOt5plK=xPUm{1_tmcRKJqLmTBYXWh ziuI$04qecnHQS7^GP=_6&=o2RbP#tjD*?YjCDzcJ^;nHUd{TSebEO{>sxhu-RdwOp zMf#4F?UHq(p@>r@uh^kW>q+BcUm@o2fW2>c_IJyeiZ$N9DV?!YEYnC0^>PElspo(V zllWqq77GZ|&VVN)b#e>p#e}o^h0rjyt49%nn-)&#*+m2H5{x5RSf6UaZ37pgM@GjZ=k-}RAt$|8;^$4W7YGnGSS z!a6#Fs2WR@IIrRiov5G|L5OQohu6!ynnPtEwyM7tjl6l%!@s9xjc89NzOkQZ1x}l% zrX?{^k-lvh)nK5vPn(nUw24GhYnr`kx*P&t9B93ItkN_Zzz5-;0|OM{vm}eBZz7#zl~|9#s|Xs#AR9h>Scv0WUd+J)@O$LDayNNY0StU_8`i}SM` z^kg~0_3iff9f7)!RfRaUSDTY!Z4z&!ttKZDeKoPBb1a)}RXK!ZYh3pDV2T`OgEyA5 z9Zqgrpp8To$L&y1FLPN~^9Bz11~rNEw?sK1-fP3cuNFB!8gtMol=eX3arL34Xu=3B z)WPe-u7A-Ru&yocsDuq~$?lq=PQ-b1>gSSZKSXjrq}(f1t+8(@1a;|~tAFBv`ZR#0 zB-_dQ>csN3ppW~7fPds*9PKk$Al9z7=@^uNyBq@{x@g09eM@4}>SXhQg>u*qOwgV5 zmg9+HHty6=eA3$_nUB&*P7bNO=b$6mBX@1IGgV(P+!CW1Z6g|bOM^$q!B!GMi2Eem zY&AfTNa1I+HuV8$Tu)V2#e)vSuHZS_&|5F#S#I|?>xpRFQSyhB1Yy(HDPBbn- znPPN#&J9f=pss9``bhURHth>MW0tCrGG*tZ%VhW#x(Z&UHQoU=-{hC?xE{@RFm;Ji z!01fiaLi6TH4OK{{Sg!p05dcy)Nk85dY^zz0d~`h{<#2|A;ENAls=2642&8tk0O-G zFBwj;slYI%i+R&?EvEEF7#z=|htgp01nnC8Y6S0MtK7QY)&1()mUC_Yy|(}SsQBLp zo$cN2-L?Jq+Wynf>=}XK#CVpW=VBhl3s1{|9>q>-^uJ%l`Wj zEUvI`O{N#XaFfnNWbeeVv75?NkII6za7{%5pT`+!HOHbQu4GlUL~7|&>Y>T9XcP_b z2bk>vAC|Y3fj2tJZDLp&xUQl2j9}KxRnH>H7beF-eukXCIjVvYcCCU5c1`5^o|3O! z(Wb&Oc}NOy%Jf0}GoE>nK#KyhRO?OCw(JWnMw zAK_CiU`N2vh(Vj7DKGt+KchhafIfRp$znqdFhnOjR$S1am(B87+x(8LIB&WxZ> z?`IeuRM-AWno8f$G(R5YDqI${K41$xU`?u#2(`l;b0l*5tKA0_49l=;FFMAonVg#9 zJN@DVa>ZkfTrmwbgvn>2SOlG0dYx&AsUF zmkF4SlXWj*GJJPtByumM1mc{G76vR90P0%dMR*YSi=@GJNaW^&XG8gPKg zRgqxoW-ZnLPEdWI==3yX8GvLmg3u$ZC-?(>)lXI9d?S(h_Tdz-R_UFXBC2^`zkU*( zFED4fTtTF=+Ay&)J6Qky?`zoiXZ zw`8&@acjm+e!k%<*+O2cDIcIv%X~$7qL5znh%vQ{gu)8y$wSOVZp=7eq1wS%*D7)} zA0WO3u7n|C;Hq^YtArFXpH=e(|yeg zA-Cbx@#M9*O0lqnuM&^(F0NaI5JGI;zzF_M#%d) zDm`Y=48Xb$!k9s9g}a-WVyLMxt6=U1NwDE>l_olF6BtOEtqYT8zycixOR3GM_3 zeDDf8opHgZnoB-Nyp)LXN{D8|fci|Ty8vN8p1;_PG%XnSrsMd*y=`+LbjbuwP6a?V zAr7*`Zw8SE@w`E`f+pfI0KDZUwp@S5m|(6sOBAMIFyzQov=O(I3+)=YP}>@|vl%Oh zaGVyd%1zFUG-jK5e4Y@|7z%Ow3kr8PptnLrq9g z6EPA6SqZJ+(yY)mEP zCE~eT;7_v`@a^^>DTlkXzpMGD7J$5n=y%CYbn{whzWjmA(`Sopm~4@TYlyxk$ja7& za1bgDOB13A2oIY2oXtyOIoiI9GCZg;j;+rJX@0B{8^gMk*2c~=CGjV#T*9@;8fWXO zm<`@myIrS+?{iG3nW1z=%RDB40Zd;;B{WlT`|jDs9zfG6(*}@`{4xe7VbVuMb6~VZ z#~NNei~oHsQZ?|1+nDd>-HeH$*cxn8Q4y{GAX+_$?4wfjG-t)CFkvAPA^Y{OX!bzv z@{Cb+Y$Z*XH3tYLZLl%SyUu7-YNLW{grVk~lbwafcSn}#dxqA~n(yu~T+N*a)qOst*rej4xNw6J(jFC1_;X!JP&2}Dn1 zUw01ddd~_5iaC2I&`-ulVL>K0KF}FGU1@&Xgje01~#BV=%ok6g++Yarb zMQYY;KO-7~VzQkhf{3ifVLBzau3IArhKAfA4FLg4W?6%^vCbD{mbFpP`LC`MEGz|ACuM4$v1RIwXw%am03_5o3#45x*$&|txpCfDTv^gK<v zIyCM+8Vk6wSZADSO9*!SURJ71RI7(qF-=$KGd6TgUWLiTW(9U;=NOI z=XSD7 zr(wzN?3*S%H?$Q6v-e~UCjDsIZtgLw7ut6sk4XwB=8lXGnU|LnbMcN<5NCcK~VD{A56!zvquNb2GqY{NaWMA@3QC5RT$kgrd?|OKiMy)7h7sY0C*SfkQ5~}4H!>+hYUy=i|w24 zcfETqZ8N$#-dbu0bL`F?Oo6B zx>wV2!*<;hFILF!s&9U#^%HWI8y+>nWUwQMSipEeZ%oWYWj-Y1l;8P%Bhb|u?Ly0L zTTW_mH^?hOh0{qt45}|X8o3>X<{zx#?~+xmN9kjX`DT=#R6}bHmUSn#Swy;)Jwoy| zt2=9j)yfW(8r?aK^0*`XL}-Ow>tQFrE8N2?KaL#Y*g?iT$sr1g)2^8u*%b<~IpBFE z>36X#H0GkVImAsCL4~}%ES4(6O)Pg-hz(k)IC3h~189D<8sZQ~dWb@hBjfYHL%Iqi zFALFfFwVyoY>hS%5cLD+)+> zvavqSdoprLxP;pXa3tQVVc;h#17G)hha653H$wqrT#efdU2J^k=C^idvnxA#U8}3w zW+C3_HTLIQ#)APH8NUH{hBvE5A|k78*o&y zFh+Wv4b=fX6Byo3DX@w}hSNJydZS2$XXE4o3;>NGyI^CU6fAg~=?i|4we5x%0WE@z zT~5+R*i$Ct5&$xCk#(87lejX$HRx;Q<@2@lYwTE|-N2>vy0TzVu8@$S#Yy@E@{0*E z9_DrEPRj`>mQ5p}CIQs}VhkflFalcSQ-Z4yEE--16yJWb1XvrnwzN1IEjzIC5~iu! zqZ;CjOH=sC8hil9pd2Wioq+2XQMk=$p3PTa%R^LfN~Bj3pzmB6#pOn;5%)2q%?4MN z+h|AS5J+s6Tovjghd8ys`Zcn-L#`eOPX@^RNbYtul z`+GLiG)I#~{nN&|!T%gXnziRN_(^p=K*G#7;iIa0^yL}4Hba`91~$u8Hr@02CE6y8 zlBfVGT#Ab>SRt%D3YUE|tbUKx<9Z=31z`zZEUi&s8CJ)uPG#tq?Ro z+G`^-4qQ~d^SlblkBSb|qIWZ22GR_DXnxfu9croXwzFaPr+~f@>>PTHcSrTMD!kv9 z3U8~p`yKAMM9X>P%C=naRc_cpQ8RN`B=@7oECU#kSCwZZY{>Rqao_NAn}Itq{epCc z79sUw#&!VkfC-BIpK8l6jOY6oKPTYo!@O-g@9QCe!Dyy>=Z;5@z2soexj~usVyLRx zC76>lT9EB;xs$V=BmXd1oTO4}4(4meB%JcN1XLfn5+rku9-?j{IArPEtcPWP?7bVq zFLbNn?j!jeic=sA{>urIEW5qLyT>w@kZxJkY?J2)Yn+Y7l^I{n!_p1c!M;)PI*I9F z-D#^8%5J(7iHZ~**sHI0rhtGQL~7#0%R)3Dd9_t=@Q38E2l7GfD++G7P4NbuT?3ch z`C|9WiSsz{chLU+P4HBQO#}Gy9?6j?KtcNeQ6IhJO;nk?H3EFN5#Sbdmu< z%`h?EOn=N2V&4T@+(0|JLGCk0EBed29KV@r)KBqAKu4NDZnoV?sqid&$=w5j62J4- zk$(ePR1WXyq4!D8WYlsNfg1sX`|P3Y-TM=c1%r?79%M09>lB<*G5QHzU>60rl%3j; zoJsmWf@ZE1EHUIQV@iw*K@KXuLn~rIj-Id2&gGWMd4-hTFN)CkXcZwZ>d#O>iho8P_b9VOkPqI$(L%X}(qB zw1M7M72OJee}DSxcS$OvT*3Jo(ZaUM+^sUVUYXl)7i^We0cEayD>!f|&AJ^w8H6C` z7uVRBR?Yh9ZO%hRxvnFC1>PDsgs`$+3cTzX3vt&x$g~EG(fn3}z13iEHP~AX_Ev*^ z;~i&_gE}`ba`}Qo1b8l1j+Ouy4^q*>Xi#qQzLGxnt#cT{f57ylZ9#;LN>$mWv&6V5l`U@vU z{HZTuKW~73v)Nkd)_M}zjHl-Q&VDA>X}g2sdQdGQ1jf#UgJ+f==(>0zf=>UVN3Ald8~Vh?2=1v~Y2I2kWuVXIucysh(sCt;S}SCsA<6L0C{ z^XATWu56i>nO&Y@pJm4A1W`iF#g1U?R#Y;iI^@Yq0vXFF*6NttfML)q9f{H-^;kC< zKZIh**-|MyAATOqENtRq^^_E`)RGAVOY;Jf{8c%Y0z7lY<6yf{J`nXrru}pF9{lje z2*_ZRK|O3)9#k4M%$G~uWFO{E!eEFGK99PE;Eg1>~qsD*u-m|@Ktz^{X?QMLpsNx$0#`qcON(p z*6md9&mA3Jmv4cs(4<$8YcmE(Jm_s6=A#?=q2J=zwY$2ntXLFF18E$%7-APhe1b8S z)2E~m)(v1X?MTI*H5~maLB$;9;Ef5W^5x?a#=uV&qS-4@Z1knnz^-g82)W!QGJ6FY zbL>OuCPB}Q%4U1~;pLM|*naE-r-qay*yV^9NkdnL!>p|)CEP-yJ&+~zG^IO{S&^co z0H`D-ynJghKuq>)xp#@0_i4Pxccv#$#)`f`P2iqNM>gs-gxi=L&)ix73^=B2?S7hf zoJ^PG+Wyc)u*%;QmPsfg8TK)0(wpLXIL$9kCV6r-NUo0dk3DZAHus^^#?e4^a@_gA zS7RHOoS1V5OI{azI(kXvl9HIAOOH^>J#e?v3CpOx zge8LuE@EF)zo^nSalc<^+l8Q!a*{0PmlgA(;hAEIprose>rDXvu!Rw{ASZ4hi673c zj{{p}c%{xzixs;jU2pf;36y0NRWI3xU(tm5%xM=OCRc00O|H4p$IlL6xB1v3kR?I} z+piTTe@mn-jV+29+NA(OV-YoBk8#5l>~63XOgS+*p*eAlRdOesOCs_pfTPLV>K9jH zM?7M>p`42MK_C|fc1!Q+e1(rvNSJ^gniKmn7m_R=UbLs|#%~pP$Kir&oMV}r+}Rhs zYgK$hCQ^-(}+BoVH6eaJ7DSQ2K%U?qc7piPpZ`9jEqi;t_-XdAhWswh(4&a zyeRNUhfcV2WdvVfnx1K5HDpBt6e~TX3NY=o>x{b_-)OK$s5S<*B*{za%gLR4Z405* zI?`=MSYOM^){Pb2-}u!d*|gG`I6KXgV;+r?_Vh83O9W}qpd+Z594%)}{7_1vi-3D2gXfpC#!#zLf8? z8f9c1JvCO!`lp2uZp6x^^&26dRf-axFADNE%%>~|*RQmYpTqj$ye<6BILUtpPcz%~ zmk^a}>ybD~1u@UajUu)YOo;FL_(F2ENcg^HNZE+q+ivP<#|q z9hjGB`+6D@UsXS1A02Pdkc42_)~7=FOxY&%k_iCs z4xxbb*3UW(Y^QB?L0w}*n^r~a{rAFplZ9S@;0j>B)jFGq z6su)w>HTj@plB8(nk0yg?i=fl(PyKlavFTx3%vfeX}n>YZx)Bo$BEr$li^#VbK9)Z zXNeQ>wo1hZ4YJ`au!(?H=oREEM0G&m)WSQx6VV;m>YUDI4{)>&jNx7s9OEJv51K2c zdLs23%3Hn`X+9pw3d2N9?rjAPrPgSI6a9`g?Q8bRz~f%(t&JZqRvl5*kx>ecaMZRR zg&AgWFptP~9Lc9>sRMh8mW$c?0=4p~A)&})3`Y(bfMDlSn#WOj9HuXs8xp*DMU(XQ zRu^3cX5wx)I~v^I+dHmtGST4yF;8C2=>(%0>VC|(=zuVA(a+P$INRQH=}Ag5*LFoH+4i z&MF7(p?k-$-k7Xs=s1I7ir>{-*5_I(7i-foXjt2b;<&BAcWoFlAc-3>oW46D$~NC0 zu@;Uo`3mL}iblLcTCUV?qA$+*x~S4D1LM;+zWuX~Zx6>KZ^PUZnUCc&jBlrT!Hs-& zZDk_Gy!1z1@?(Mg!_rYj{3e~5i0i@!B)FKrF&b99@iz{o|9OTPeq0K}^e)L_4wEJ8 zh8!_0tq_+?eopxJVmW_XLYn40sR-eyg#0q%B}1S~p#Xg=tC?O87748uVgP}Hm5Y06 z$Mt_5i0V&L3?;9a7xJ!NQ`zW~*RQ@N0I9G47HK$wsG6h-rZe-$tTv|I(4#$^6aXkd zFr`*6*6zzrCU$IpyFPfMl7t4yJ&8T5kdp^$0)m)+eD#7rjVJKec)EsQedt2`r4nh% z<~bOE*N4;ooKyzI_EK zR;jUG)W#Yfu@SR`7lS1B{_i5-pF*h}Fch^Xy~JaKs-6oJ;M}H|_a~6;CzR>O0^8F= z&`8E7#d-d=oG)*23_VQ;!*0-{(_0yr3tIdYC1C_6fxY}?okIf5k2GAHKx3nJ*|0(a z5%XYN_-oFDzjk@>N1A{*x#y{|8Ld7H%^JCryyFqHT)Q~DDu~i ztqGGKm#Aq;KS*e@!rlX82xc1m0`R-Vv?@rC_O{H;48i}Z7{5_TCS5;*RuPa=4JOL) zrHDjBx|dkD7ht0*0Ly7qJR02f4C{u1d*ifb1wxCduFC0@a0Cj$h5!cV+S~K+lFPSq z{m_C#@-O6Y4?|OQ5Ox0yBd4@?1UJTqra@?7u4yb(?&A*07B8bC;>AAhfRtz&9SRk= zjX}6pqFav(z=V{+;r`yx+3tmNSm^tV;k5P4l~!TCY;&nm7DVS>v(dfKv469a7AiF0 z(_^=tluk!g7_to(F+A(0eZ#)Mmp7Zo2`?16qt zfhm~eTjd6cAmEMX7h==v0r2oVV2-9;%DOtHbhj*MLuC^}eR+vVz*pD1xJt{nFeJ9> zFuL>d?0i=Wvugb|hsD!q5eSGE7&O4+B^n+NUv~fVr;94{+BkqV-XvcolT0Se(kX3| z%ku(^0|BtEuP*MPw$rT^j_$LB5KmT%dx8PE9{`+7Em2qU>3Rsj=FXy7ZnCm`n##8P92xl0*rqyW9A`%ppZUvu02``@pr@|w6!R|lInb)KJFV_LQO9W2E8D;-<7=uQwwMzUcJ?5}<|f5POY_)sjfKK811ssWiPW zwLT?49a04+b#DrraO0BHo~v9qO#OS4+?#S4KwA_eM&O?WQGi z_XkrGd7WFkbx)aIYAE|>8;lt(s)S?YKO4Hgr$630%<1OOCqJj`3;c7a-z*b7Rv| zMYI6E=|@I6XtiK3`KK6wMs3=Cqg|Qn_kX?=Qgr|;Ca~wmrFl6>q*}%HYPLs@9=^A? zsQ_EyIbkR1n?k&QK$Q&s5;&&p!(FS5Pqj6g&0VS!-59M`@@<_@wNzDY-LSWA*n50a z9!@5XrWqCMGLzzISxhHMnpeqxyn6l&>3252>?N1r;c!xbt#N_y!%z&2Y)e-t!4^m0 zM<_-HKl+pP#iDWoTf2;RbqAc2^+M(7oa=)0rz!b9oEOr>a=d!|@cYA&`2Y2*!8>=H zR_sPgIc6h2bq5Qqo6-Yp_+@`zd@K3*f&J%Q`Db=JPVUalDiK87cVI8s%i7f8GIrZK zmds^W`Yb%~8x$4It_)mbMW8W=cpdRw=-CuK;M#EJpg@pX*qU_>^^A`L;c8os3-zG; zwf+E>BPm+Hz|dvP0bmHs%cQ#m9!K5Y zz+Om(M?{tJX~x;fM%IfmPppBJOsYX{mdcmM&_e-z;T+d$6u(FA`(;Q0<7{%zpq zyG`BMO(5)VfXLC+_`We% z_`&9A_7OCW55u`WZGl`L1D37_g*5o`9@4#qPVU70M>5`Q?DKsanQx*j;8rr=n1d~s zWApSkHx7X}cFPUY-Y7co*^0jF%rWhJ6zu=c1e-tD)uw>^p8#pUEf03U_HUU7eSEC_ zh{-W_Q#26tjj`zc<-QRdeE=FefJm=k-9nPSlwW+;PRACT`0qXkBe)enc5!-Ij!O_n zKPE`_WFv^|$!!5-Pd0^7{Oj`bYgrWgwcs@@jLWD;H@f@ zU8{u2M#Z{D4}1612*h!)ix7~n*I?#AcTYY7aJPhEMR?*MxU4&WoWYkSdCZvm^>U&mWqzwpRwkz4Da>L#^7buGZ^q9{%uj8})9 zub1Km+6X1}l&gS3{~Cbp6w)_D#;l6780@?H7s9=(tx$)FSN~?~Hej|kgS57#*Q45g zngKcqm{a4eHe+F0h@4n&BRo!q!-v$UB%(W3e>YTq--rq;A>HTz zw87Lpu0O#F4>5*`#>L}74K`aOBND{=}%NU5@Jrl*@j20W!hmf zE3Rq)!%GqdHPi#l;(K^-|G}ofaS)2x-%EDIaMZx>Ks)`t2uP(Q zVtbhsW%mGcFNXhqc(sD~E@|iy(>k*ntMcC zv>C~*wP`!TtEhXGhD7saH*3?3T;*^x@MTjod>7nS^mh%7uuXEBP~UXhBNW%~QkX*- zIWdMR8wi*S>GLXh)3u&03gVzy>&QEb`2V?>7*z){{TA449^nV&hvK2#xRa z&{6oP(qL)Q&Lk0@T87Fql@Nx^%h>s%%4F{l8cqm&P(j6khD_(1aVhYW27!cvriIbnAbp0Y7rOHb!Eb_)oQvf zXMqS&d)kTCz^fGPzTG59%$?RIeCc$7>!95JwI$RdosMHEf<#bGq`GR{L;j&}hsnXs zZ9}Swh-HXP0LC-I;)3HGc;LfwibH#L)SeR$&HKpQ`Ab9!P#UZ4N~h&T$yY@%#Qg1C zh|uCA@z}6%_)vbzujHbG7*NjA{k>i&(!sB#Xlj_;-|IL8f!{DUMm6WY%fTW6S>d%G z;{X1aW)4I1Yq*#k+K~O;i}^ol|5s}^H@~73vWPyFUrrJ{^9)De%ZbbRB2q>)7{wWG z)~T^;5E)5kGVqzm*HOknHUA2J$zCn#ih};i;7&UgSEB`>NgFOXnDpIs1JvvVE@q=Q z#WhgUyOhG-d*GeT1Sa@Xi#M5xsJGjF@7ovE4AJ?ZVW-6y5N@5ysd31^?9r$X&6q-J zL}*Z&67(cv_AmK$Kc7s%_7ju9XjzTyh2zLNVUA6OJ0Ic|CFARC_?w95Q`X|%*hoSQ z3`CtWJIG5pC2TShM+9?&p(0Y48vs-=~=YU}2qF4Pgt-wiIR zm3U?uqBP^cTAOD|lG2W6xHevoSeQHTn2$%N;LM{tuZtH}k%p#8=D>qzLVpwU-2wDb zE4i=@g`-2sQPm_L#~Qf3g#_8Po3|IXCttR~#^at=u{gL86Wu%%PKLvA&T;HXnv_1r zJBE&Ah=*n99sjhEu7~h|aX$o%sjKuL5~`itxs%+_0?I(7dxEwa9t6GyozHmKrL02T z7R*P3@S%R=RC+lExelFAeX>)FH>kH^Pq#4)I3RwZ#e%pL;4pSO$k_n@_D{=|ka-Hs zcV&pOmoyx)*3qfST@#CG2P&x-kv%k`{x4%;s*whn!+jW_I&e5hw-E9cSc~2gzY=f`a@bG%+W`&SE z=;Yd23>z~bqy+co(){iM7(Vo5gGC-;ha=gcg2xa-!})yTT7jep9;i@{PGRV{UJ{Ow zb0fhbfokJWced!yCi!xiUw3UH{XQtsTM}yk`u6p*qzFbGKoEWpXrrs$VlBm5Q;Wqd zHV_lRT;hD81vWSt-l%BBciGq!cT>`V9q}hC*3q9`q50u0qs^4dpaLb(#1pZw=*UWy zjVJnQHo-9_#-)qdPvDDgq%7q7okB?%Yqu_}=jN~L(8wAid}L90Ser>?HEpYzG`5Td zpxjlHPB*EnJ=9fun8tZJbmr`M0R(eG zH}+vRqX)nb-~;0J(@VS{s!rG)3jn?ik70TUTTduTRlXm$jiVB^8)%Hyl?<+B>Vo;| zyjWhA;`=pTAn1DHhc>||6%0yWwPb)-R5Dmh#u{^2#zx>d$iP)1p37Eu9u~G<5oKn8 zKaK_mdwYAw8BF&_v_fJlg7=(>>KkJqTP5tl7s#m5s#h~HP&ixQlSt|_w{Y*ju> zf?%M@<$1{@L(xE4V4etN7K4ycAy$b-6nU)9m-F>m8(M6cJ>ok%f*f;5=&TXDI%|X= ztp9rcQjpJLz98j^mqWr@|ABmIB0u7Ri`I7k&;I@0gFRwS4_`dxt-YSF0E6oZt)VY?DUZ za?*%x_6E*Ux7i!E*&DXm8$S0P|KlROH6|12g|BK9TLda5ts6S|DT$ z8HVU>-@}#(m`e7$g~|IYVDi35p3kP&;sv&R1AGB!7-;9RNX~!*j#q2*`27rU_QG6` zb(jrWCbbFq9~rkJ=Vtf$W$=ct4@mqCBlm%|@cS1(Ckq{(7qt{_@%lbjyuRC&M2K*1 z1o^lH2h_Lj!)MQ)l1xKNyu`)wBR_^}i!k_kBMkB(0)EJcq&1VaYiU(*&O04Rr)I#k zfwm#hp5%o6i7cu_>EFNI)5Eo@dXLzL7BEzZmZiYT+Sa`@>eN z-x~5`2z|UI1f(hd!61;jskShXros(?Ang;+Q?E=f#pQthCtT=C%-->^l36f@J!7q7VVQ%A)Ky54h=z$GFP7NDMF}ItH!4It}mI!gYog8jbiW ztZJZq1#BEiAwz)o7rjxY*D+cIr;&59%TsZ~*>4(UGC)&$u<_eP6RnewdHpS(X!PF} zPc$^tEuQG_7f)1Y-xSesdX>)HU^X+GmTo#=b z{vcsN!V>jR|W)P3D?;D5zJmy2U z1AoN8la!EdRNzXvvJnU+vQs$ZXYbyhFy;pZvbzVjALFwP3HiK2LbkyM+hBuju)#Li zU>j_(4K}!y9ltA&B{%<}*g|_xpa=PcKq*{@M91G)u{<(jD>E6iX4}Y=S@3*TT`CKk zZNBC1;%twkuXu;qKj80?pdibzY>9)Yaum1fS&<64VPH@eq^O288TKFm?#jBjQ!^)J zb5&JqjD4Z$>w3Nsj;{qD4ctl#9HqFFFTss~HV%yS$kcqdOtF*pEg*gGt%lu2mNQdD zt+E4hn#l3wS}Py#)7~`MSs=nfBVCY`=>R1uBu~)f5N(2w0wZGkdcjdnSanIOV(IWf zjS6*Y2Ys|B>!rShGEE;+E z;whdoT$m=DqF*kH+1*|J#cp+KHHYO|!x(8`T#r9RmSm@5pdU&;^UcQUc+$a}oVYG4 zUCE0kC*4lO<3d%fVFFR3bc|5weJPy-6pO{rUPZka@PWY!rS1STI5lr;0t#?hM;cl7 zZG|ob{L>nn8~8u4lJ%LcLPW=FlOe+(OL^t9mdz__^c$y7Fd11dsl{LVz&Hx20B=XO zU`@F5WmEh_a0$Ix)Hc!54Y;inuw^I7Z3%-MF`U9BIw!+Pph=lP-#jlD)SF<-gz7F{?0pRF8uwtF-%!)ywM~w=0Ld7#|(PDuQc7ZcAn($@bht#IA&CFTanDr zb6W3tZtDj8hgFpAOO0MFmko+&G4U*)%~aSD$!03F$ZNrr9O^=S880OV6qX*06go7& z_9E^%7sC)q_gW-w1hu9bA|{1RGXhT2AF2SXZ)5RSUueB~Jz3O7Q}Q~4!q7T)=ZBq|weX&g{RqhcSG!Cv|dsIRln zDF!njCEjufrl*B%5rO&OgCj7v0hG5KKzUoyjvq13PKXe&d+s}df*(huldUkmO~`u8 zm0yKP1RA91Z&;bUoK?Xf8oR=k#&qX}cTt;Biua?tbP&oPBgRXI(tI|NUjMA-v;Jg# zv8dAKnPO!_IjU>+NsHdG5k+#U3H|qRWBvEpX5na*KLmZnoki}8UMn!1rRh$8alO+^ zcKWLdfBaMa$lvDp%h?=%Ea~I%VnIK5vP_1)Oy=Y6XBFud)bvIUh~H1Vx&w=1Kg8Qd z@_tCL?hQElhja4{Ie9G?zrDcRje>G-6p&lXsOy<@8(e`nbFODs8wK8e04u&B8@_R{ z?TrF$-XEBDE-lOQMQSiIpE@b}SP zFWEoF5x=;ekSG6JA)8OoY8dapZdgnVB3-{t_OT^sW><8yET)*tLdxX8N65A_E6ykp zI}WF+4Tgn`!FzoeCPu|-{lzvIb{h=44Tk*<2E%THUAMum+hEsPRrWU4wLaFh4)Ya5 zTp9DHC{{Z`?!(!&pBERIA9_sXE>^3`@pdL;k{OFpVG*7EKDhOZ2PQ}~1Dlu;V2Tn7 z69y-=WlB4}^}`C_!dW|12RDJqXQFPkD)I@>!Mj=&7Yhn=1WTz9$4oM04&qDqsSv2% z^pmHj#zNh5C`5whrdHkRe7>GeK`(qqB#w z5R{v5lbpbwie6d}r2ACmxcIzUFCZ^{rFRzu9$8ibGF}!t%R>4vmB2e?5<5y#Ck8ha z!teMxsYFwvr;g`ct-9`yfmMyvpCRqEE=r01bkS~kHWMtjYhLcd$7aBPr()U7J(29C z@5Io$l@EYj-17uDxc?yNGh~4u*V+6fBje>iLEcY1TlRUXu}Zhd{MmTgIqSoO6m3~w zUB(Vb&{13n64}M1ZWBB0q?INd*qqAJp)-6)zIPhoY2c{&wj?VwW*1~PC0=^q4QUSm zFX|TjgO=bWSbmJ^Pg=oT4??*?&~hubqSxn^*ot?wp!vR+@Pi*OHH1z>+7Y|-2Khyz zXR7T>?Pt$X(^)*^XJ2XsK$1c#nBkXI7J0O~1C$Xe=%mxGMhTWb=%s~QqRlo`6Q7E6 zjAOYUAf4w1`GQ9hMt4Noo{<5CsD%XqL+K#t&KCXI1QF@kmQ3d3*w@HxP=8@DnMX8+ z!~>>)jH;J~5UR-87!1fdzpKw-B9IX@nE$lhm1T9Sv>y0l)rVKS0a3vlKfP~Myk~N> zyAKMp8>1%3E!^wx@mJvCtXczrGlU+ZFP#4FqMVHc#{fN8IZHw2*DI3IlU>f4nG1dq zrFE&Jfo#Hz86H2f7W=lt)q9cMWE8R*3C2EIk3nfBs6>2_{vxh(GYTPqT#SC9Z>R7? zIP1el=`T)^iC<*ms&1&IVX_BYSk>}8_CjkbKO>DBAN8FimF<2dTgkTpqhz#o`1QNK$F3O4m zmrt&R{IZ_GV_}Bz%;NLuR9xq}ti_&SAYu08&6Rr#2Dp@H>6o9CN-`>H<7m)JOtApF zqBVoVoXmMn^cI}R<}!^tScUiQ8Ds#*REsziGu6>deFNO?7r}g}x6mx~frTPMWHJXd zv$zO-?ek7N1-W?d5BIIXJ$ySa#pPSREpfNSCc|{tBF*4$vDVcax$M-#w~0|*%HuJN zEQZ+e)%Btnif=W%l(obQ@szwyc~_YmL}S)aErdkp7PWSv8m)`VEGm16;gciY&#_o@ zJtym%9t>+%T*8tayCHVp;enVysywlrw(^HImwH=#D0m0lp_2WMf7#B&MqLr7FK9O{&uw1YH^T%>k5dOi9 zz}J-89!Y2tN`4d+B{oUKLRovPR6Uz{l-tMt_=eu)pN(62AmTG?%&9`>>z2~0py#+4 z{#aa+w7l9yMRpWOWJ=D`!Ne(!u8q!jj}tFpG;*i7kcQPga1nP8`}9$Y(dl*Y2u8;C zQ8S_djcw@8y=j)x&@O0uNs_NH-E{S)Tr9|z&WCdo$Kkq6gSU{;!W&z$)B6}cSRh#K9y$|K3}w~ zbYs7+p#RM6y2^aU9x@QHSxnMSyhZIWW0MXG@>~v*C5_H?{t)b;DuyVYhn!13rO(Z8 z*pVgZ$hHAod(FGR6haR;1ZZdPWWR#-P?jD@idrP*iscxGAT<3JfZplKVc>p$H&lAbJPQe@G0+Q(LrWAlD zXFf=?;xge@MP1whE^Q6_s2h+*SexxJDgcd2r{m6cX|FLwhYxLigp^76bL z4Ui8UM{v*7MC_~UG8bIRzS5QV0|Pq4-3!2wW9_4^T2Q!UQR;`_@g{DSJ8Qb7i;Zoi0 z`FoUtfAXN~zRVg~e(yp0rQrP`S6_z(fs8vhvSgh15_Id%1j^e#1 z^GU3VYBi!aP9cP-)pd~fo5~|q*}d(&c80b+zcr5!_SJNX{7S)d6dGky(_S6X@}Bi* zm0MpgF}4z9gCeXN$uNAeQxpaM-*YZ&@ItKw@ec1E^cvQ9=zr9!U%H|9Nl=g`iXG8= zVm_N*OJ~TC%p7=B$CDoU|5_$BGD5cK+?K7SJWNRUxT!E=gU<=GAS(=$uAiWHWU@NX zXUXYWTW!eFBwzT(!Za(2Nik_7oIiQ}>b`30_Ok1IjC3~2WM!5ISS^i~-8P3;`Gv1U z`}W&!lVWxTg6NRH1P}4EbXlB<)**ZdolG6s11hWh6VyoGC%EqG=Z~Lv$w-=Z9}n)x zH1{qZA2ue&yaMb^)DORdXD?-*cuzj4WGjLOt8Uo0WTR{}Dj9|uttTLBB^ENM8lWkS zE#*FRmZa`L1&Ps(^d+-7_Rgw&Gs}Ytw|xCMZGT9`r8Qy46V9KNY3w!*^(!2`eXfzL zq%J@msT}z=J1Xa;;IHDF>)OLf%R!ysA3%TN{EM@*fYX;xWflP$CQvxi`R3`ctTj`U zfKL+&9!S!nf7b6MJE!Z}7)k;3#CR$M*q!W?9eueV3Pa1h*kOr2ItzfA`z%1waTmn6 z;Q8<2Iu5&P<+_2UA-B;r;EGc(M}z&nV|)1hexFoyl6Sll<@+%2_eqMsLnrUWLu!iY z?eHUXN`3z!EaY=ZmlRaDz@3@U8IYT?ds$A3qa0Se# zZ$l6;#Z^9bh>8whU)C2>RGA}E)lY%ufAL~9T~E4x*vzmIDrztkaxH65$Y zsYabinJM6AyeGq@>{G-!K@awoi z9mOqgMyWm2o?5?AIT=D9W^E>7O)#^GtP$~G2|K62>yTAb!WXJQNcn-+2mIGda3({x zBoVWwWE_jd^qTTb!95I_NFhxt0&l?4mT{HjdPT>u=7bwV5(os~fW<|jFhUczo5>3< z<{1<@(r}M~;5ul8+p2YosAaL9qEV2!V30B3ayNHxXk<4IEU$Yi>!nvDy& zyZ&8l&-}6yTzz)7izgIf>`HNQ{o@~h`A0^_yQsh$ps`&9A)^`gaqkI!tox6DKwJOl zW`kDnX>66$uSfR=$8Eaji}WEwa2A{Yi9>0()8mT;n?oNx7t*Z1gtz4a#&oCKyzkI0 zHDpaL+*pgP|9+h>CrN$=n4K%V{bZV&EaCK;w8K@B{tZmEs0Gbh5x~6alO1B1bOAT2 z{f=YjfLt_$Mv#eDi)p#S&r`LaS*r$NmFnA*%i69vMcA}H7)_=+Aa#rR@_!G_ckL#B z1IIAW#o?9o0aLQ)WQ|Oi?T% zNAgJYNH8=6?_M`n02f_M=#0q8~L7@fVIlcHK+b9}-b@ME5s@ zx1nnZC4%aA3E+tUXQp&y(m5#a7g-a73jFP-YZt4-5fqEveH z8%?&&?FS8k8d}VC!&|MHITW*gKAAw4hvuQ!YiNxgr2*`XM+R)D$t3kHkQsufdi-BBhL2yz_D)A*taULkChblZy}Y zih2PZpl*zEaT|m(MS+zW+9XNGu#sYq$eja!n~91g{#vZY{d!O?Yb&0tA;RaEbv`Xm zOVK!Ryb^3KWUK<0OwcRQIOK5cb!uU*()L8Dflj5_Xpmgamv5>?J}yWrO|C6cJ?a1j z(AK|Vnb(tacOM+LX1Mz>`8|`59G(Ygj<=hUgz{5c*=b3_HmTonAk>txN2cZ|Y*_wVSWS4#NeVuZn=x>rS;Chb-UZKYvOF;?q`kE>W(p z<0IY=@NZZOuzSJp{NSK#$@?r~@rtB>QNBo$1p23Iv1t(8N5%7Mq>vXUc5H#NmsFf$ zEqjrn>u}7C2~3Sq1>~a-_aZJ%>&8P2e_T1C^S8RrqM{xF59Zk*8xQ z#w%pe1A`tllSu6);C>b44?LTjIU!}xe7aE=P{+;(+(mDKZQ}YI?Us+|?q|H;j>*1} zys)915Xxn%<|fXD?5>d6qu;TvT1>GC1(lc;Z0465Do(KucGd2DS?vU1PuZNZ9>2FD zc*S%#V2ln4JNirUmy;FBl?d$D0Oec2{0p=zn5bBO$gSzz4@OK=Ft^gdK4jp14WfwS zRhCeL40F&dTHj6gu*u}2){}NgGJ4X%9Bk90)j4DyA0l`|7HqI4b4I{?O%Or>Im#!R z6~Gh{QFNlO&~gQ#ih`zL1GSh#p8E_!kO?^!eVO(U?w-uj(ql)wDWKXE9tLo)h&y4n z3e<#)0UZ5^DW8^0Q9gyRkV>rMMV|Ex+JsW2rSs`Tece9w=MwA+Z4-vH$QSbX>8saI zpFMhA|3>bcxq%lSamZ%f%vieyVA80jvYu?vYZ#Rb`mE1n5}>p3GFs(7o}rm&tekmlI0Q+5KqcAD{m8^mR?+waH=te?Wl0n$n(q`HtNv?}g|q&+FlABKEmFIpnE7+1 z`p>6mhp0>Cgp3Zjx#4lNAUi-BJ0Os(FJ`|b2DjyHTi({h+qP`&b7X5lSPz-M3Yblp zk7*qS`kFw(Lc~Y)?@IUMe2LFrEqZ71POr3wqj)M|b0hhAs&mazhEVx2u0b`t8?ie%*cHyB zChgSv5+OQ=$#&O?6Va>~*0X84k%oIc`J@+rB#6F3S_Q=a!sK5r!>v!)bR`ouUDeRA zx9LMg77-AT5L`|yccXKOKbC6gdQXDQF+h~tv3a~fs+0C8Ah%@Ii9nkh!#3HpE3L7( z@qlXrJuP*``UE^-m99de_Zg~D>Pr{KNa-Q^6}FHD^bcZ~6Zu>p=wCZ7r&C|R{Ftsq zmwRu6r_p%OmccZssT^h-4|s!FLX~7tQ9!_C3L7EntqgWebwh&Cb5(s;NA&af#`g>13 z4aFr|PS|=~Z-BNZVW5jnZEc_Q7Vu3gk=5`_(940Ck~JZkjt!@ap4YYk)HX`%7;b5W zHlQ%>Ef|AqRY+!*T@;4GuwK};LD#0TcHT9nPEp@7Y^qD$y2*2CTVzHkQvKpWDj>32 zJX1AZFdkafI_}LnNM55l z92GzSXXBC;zjiBieAy7cIl^uSqOD{1Kz08ylX;S59ofnk$#6JKp2{gCKVc@C3O;0q z&9?<%JgOIMYNydbKLg(zz&6w~12!4HSZsx#j*IJvQQO-CJ@HK6@7R!vS^{7@5Fx3@ zdb_=4TXzPM*pWxUgP<~S+1a;xZpIt5cLh&|ZCngThB+Wf-eNEl%O)XX5xZVn||ofe;>>QPNnMXMQMFEUrZVA$q& zBXemiy-mRGNk^C&ZW~b@Y&RiecJmMV%K}nVi@R`ot*oGgqP;#b$BJ2Pd4?>CDlK?C z_-a`cxbIY-dHxi+Abx%5EX+4}dSE~|`m>+_QO^VTQfU7gbD%vjUoM66cWL1_Lq0R@ z9AE5?EA!e5vJYcwwggZ}R6rk+ENj_THCZgxZh5BFo*Mdy?Rz}kz(R7tobn_niQ9gx zTS*2?hmiokAX8HsM7$oXk7ZdDhfOOU{@TOIL~oHF`)iG$CKOX|fs(3ukDK{} zC{9CJ87K3il7YLTZ(udL;rCJR4-fDQ?8^KsxN~$|q^*62k0buk;oe>X6udgDU-Z$Z&wZUdg%o2@drm@v}7i+B8<2hxRx&)r}4vfxbw3OUj7Bh6#pQJ!o0XZBvZ zpQJeD;XC|cxtJ+rCRK^uPEyCBy=dL+w2{0S{=arWMh^CdgmJ*z43VXe0x+RGtwRu} zNau=DE5STvBrs@oo`W{?HF|Ztn9rw&O4rmb%_~bt(R??sxeVT@F=uciKi=7sQ= z_MU_~+i!x|qa>8>f{=u##>3nH_sF9DwbBdog6YPYALDu;vuTF16SP}712 z#3DS!%)t<9PHQ1!_Nq=xA2@FhGhelBa?%K4xO(Giw&8^*-;(tT?(n5E3hBM0dSCyW z0I(ioHhmO8Mq=Y30z6*KKyk&(NX6AikwUyAJ2LS?^1z+0|Mjoy#!_@H;>8xQ5lw|; zTh(Hk*9}aLUIGHzc*3`1hA=L*iYw$YHAW-Ui!$r)DFuDhOw)dqY_dEo7sM>I`P4qzY#A(7kE7u67dGT&f!l%ow5K zv4A!Mu;Q-V#ul?jb$tQOa?*8$xOUowb`qQ&mDE(yaFI|xw3;wMhqG)}n_tiyRpcB< z*R)dv)K(;Zvss^vQQc^;#Jp~_K$4-c@HCTR0yB6V2XFHS8Z9y-c zn)4;AYS~y$C9oRS>C(eSgi#o~EnUOd=j#;?M)|IHYL1+4?-UK){8YGJO6N#W4qh_h7om6I6H>RmxnP+f zctBT!6@3%23F~;jAMkH0;7{Q%+iT-rou{jcJzTAZ6)ssA*UhMiI?>Co*NNVC!y2bN zPS(SEA=abiVS_m9(D1NUto2BA*dXAx(T$2%54}y{RH$bt*_cogh0_b$p0!aY@(iH$ z>O_KThdx8xh*2J6oi#vn3~5tJRiwQ^PN2@c+kh+uB>i1quhupXHyk+my9R3Gl!e8-cy^3fvqJeY%8F`icbI}b`CK| zmnEc(#qAdr>u@zJZeQ<^Yb}MnxR4zfT4=_3@fTmo5+jVzkd6#Ex7xAmCP()O1HdZ&80Z+Ssm6gN; z6>zV~+eEc=G`N49z3;Xd4{(J%LEjKc`zB@V*=N24?g5(DXA}4-*@T97bRiN5_`?6aX9n8Zqjj;7ua+ zBdt=?#r)g_@7r9fA%=`xQ;&o{y>%*%P2Sv32>M>TqSM8ryhn9EwQpv<4zKK15ETWQ z??k4qpFeV5$!tE`9i!m_W2MrINiSJliM#8c)qK`VuJem&FS&yM5Y462v^MHhV?KjH zsoo^s|55h>bDN2dy2nip9#16Pm3P1Krx18mu!->QQv@y-6#Ubp7h)`sM&Tk~uA~j6 z&tELhoIPl ze}_3(ks4rgz{{rNP}2m+sq(zZ(ZS9mx2R_g-~VNCF@Fo7mSmWm%t0rM5W3^MDuN`f z2O(J2O2h3=32t|%zeDmoc#@X0z_{M?VVlzz@2S6D0C;s8AVAwMm)u%d%|Xiu56N}h zGo-NOLVozQ328B@tHfzGDPlv}{8Y&x#zO#>&;)?#1tlRX)PdLwod`GB2YLId@B2%F zG!@l)$$oaUyML^9H-v3(Z*XYA-KpT)8+3rEG<37&@)eJ!L00#&EXVFe)bx$=*<^$X zx<2GeWaa?Dtxv^SQ$GBAZSJs#XJ^aeOi*l(fE3AZtw*j)EXOOm#9l1tVDmT4?Ds>N zloKHV7YoYkEp8GgvG+k=$6AoAJ1MF$g?#EVcl$C>g61VdF~A0lZ}9xAoH@=KHTUbw z{L)qiYxo?;ORovc%XT7>D{cY{Sb0<-r)He8bkjA-)^eivfn}n_HxrwE_!bT92)vjI z#9=+rJMz)a0U{-T)@?VubCI;cod6iA1iZ4fG?(qmgGu>+w( zO(&o}h)7bYY~<_2iz$l1I^NhJO}o4{Chp<^VSZ+=Hq{IVtWUrtC2WO9AORTb{MSB~ ze_Awu*Jeq#LTu|b#L%d+$5Z80%^+Q^H z*k>!KH5SimQLyC}P3X*Q(!s#h4ZxUhEqpbEmmIjrES!w?n8}fAL<1x@-5YeSZs-BC z(Pa_1DmG&Nu)UV6#BDk@+!lMkQP#to*_&@{EmY{xmeD}wJap1iWw)S}v{=N=l#?9_ z{6rJQti(wBCo=V-jQ#}D{$*VP8X?Pc8l%iMnMWOn>Dq^x*r)%5&ebf(Y@jMfB<)u& zZoZ2g6vwuQv7}`(ZwMKr0N3Drd98y7$=d|uA`{3Tgb{`WpEWCXg_x+rN(nLl3CiJ% z0%ZjC6#x^V{UrAxgmtSjlx?bFTJ5vdHp%ct`AVJOw5kT%S#mt@c-1;|82sgBF)iN0 zEU6hEYZ)PpmBM3Mo+u~n7ks>gr>q_p7Ts|eWylQL45S=MA8Z^srS(W255*HSVGPA$ z(67Vv<12}o__pp5Xe4iryQjy8wadb+p}y}l-m5n z_Pi93O~+HcsSL%P|F!-1k^c4X^zZ!pi~Q;b2)92>!pfJ^vs5pLm2m(whrU|9mCh zWl|v_6Of;Y7q=Y^u_zY>K!5c+o!4NYznD+f(?TVVsg%nBM5-9l0fg0MF1lEcQHKsa zoGs^TFp74n@}ivPoc0T1y;uO>ct(jJhJLnT6b^yAO3ULU@E`t1AB!@oUz`s2gzemv~iC3Fjq z5aPi87Lh_|&9BWjnI09vB0!!frq@MpgN*0w8x)tuK;?>(zXh){V@~`P*D9 zFYJfvjx+>TtPj-HUk|Opp*CdR?a2|DzU(a_7iOx`KZRiHWictotCRxYA&HQ7wa{&_ zaX$xdAh0f!D;cJgCxEC_o`Qp>Y*?2sPlFpUu$H*(#{Tg5yH}4MK6}MGQEjkAaf+pU zJyh`zSrbqSb#Tn1ku)lebhfM6Y`!A!U{It!oL=VFaMtik^jscK^9nGm;Mk}HE$M8Y zoEQ1qYf!@#vq>=l5{r92IjP3^EcrUA-b@8;&9Y92CY7ITN77H=o9GuK8u|z|?x+Y){vW~n z4JG0lD$PfeVg(>OY;pCPrVM8W+;)2j|KGG|wBV1iF?75$Q|APfIm+E%-L0L7ObcRnspQ}8LI1*FW1n0^0UzOfg_zB`BVwpHb z7u?kFj#!J0dn&GmNvY$;j|)>ngzf9 zRR`WJ?|q@?bnI_0h6SpeamHH6mCqf98oHh7=<|RvhjX<-5iBDos89&55IfV)9Y}7xh|*- zK!mB4mu2-wz2vkkrn2Dbdcky+umA6h!_lK34j=thydHY!G1t@1tSA~(EZRuLSdEe{msU4#ex*2fkbl*ovM=iy+rgUfgUXL!@=%dC@?== z6|>}|SXr&XpepHg$YRjsN95%^1DJ@D`SN@|CqTtKSm>E62ZH)W67Q z=lN=d{r$M_ccXWn3_}3#bBy6b%GirNS{AffCfl_&)N#M1iUtI&80;& zHKZ*8)&x!_>sq4b6|4yI7MZK&lBkPmMLOYw##2&R6i;6qbU5KrBy*RxoD?lJn}RgE5fz-Yg%V;Y`bsvhE{5jcg)4VFqTHX(}Ol(0$ z#H3w)-~mlSZl3r}pUZC6(D`XsHmN!W@}8d#65Vo7@&Rx>maYDuYZ9-_R%j2@De^iCtHe@qfoJ+=d$@@}qrw4#!Va8k5O|U{0*_!FWElgKdD3Vv zZ!AhWIZMu`(!OR{%m|Lco1NKT;wHcRmtlg0%Zjcm(zN+K2hQeE~r+7nvL02 zeWckS`5wovhJw6R-C~Ir){L0O8lg#z9MdYa`AFQYRY7KM8ADfPzW8SIIpPqEIVj6E zi)(U48w;Jr<0DM#4_oOI!*x>zKJ1+lip7xna4}R$YN<3T1bJLc`r+(ecz~1-f&4{K zs^?80o$vqMkx|u3sU3`DJ2V?MNV>B{9|VSFe%tqYGPc`O4GQ z{n~29RdSCuo^48!((13~x|yuGb2&q||AQb`>-YQh>?(3s#B#ODEf~1mXCwCIG8KJ` zaE7RE5SQaQEjqEY8)_+xtrZnAxHQQaFG*6!G~JXeF{_crNI*F)XBya9yWfe93iHxR zz@j}n+Z)7#3F`ymJ%Ly+<*wLor>HMr5*WR(#PII0n zwIE7Rk*gMA)X$gY8Lq>$hGX}m*NqN3=ISWCd{Bs#6Qz8*`~lmJOFMKD8d{H}^Cf8! zAb?Owfi}9%l9UlV)-p*gL?O!mnYCf8H9m2X<7)1k8vZHQg41+F^ow#4;27L3@6!l5 zbw{2@N6YiejzUz#N0k%m6~sgOV@W?Z5MFMPbi-mUzlYhAvHlKLvGY)&Ed{d0sD3AA z@vZ%LYyaKaf4BDE*7je=v8atHxWWE=fA8SHvHu?2-`mFjd@B1dy>q3&8PHn=3}fO- zmu9r|F$DUou6Mt~>aP|#Xu-G|OWf=LqbFfBB!c2Icrj+3l3pY#3MJS(T0)f7uB;rN z-VKei8`)7mAVT6}+F(iJ>}V&#BQr|trcIVY8Y&s3v-*zeB*~{|^QG9f3m9{5wyR-l z5aEu#1h>m_iIFKf!Y3&fm9evcTB(FYI~79f>;ksh8n9ku@+j%vUutNZip<3D^7E9T zo#0z!lQ=JXc>_CaDS|c#)9OiM*-)8HZTuLffU1c!`d|&ztv<=LRvpmc$f}N8TP)!< z3pNB)Mw(qUu$hk!pFI5e$Je977q3Py4qrYxeD?b3kB7!IxPMbFp#k(rGdU3m2DsaIzukSF6;GZg6A9|q4O?ir1-4_0Md5&LL{^W*|U4<1vm zYimtk7;2dVENw|H{i-FjoI^mM7?zKaXhwy}^A=TKQ=;G}`}YB}SGUjKqhK*~7MO?D zz{DjtRQyeb7DP=hYXo8HYf^QOEj~D5rH3$1?_;(mRDylL!3MS2nhj60-Z<3Ujf0W= zxT7F$9GPHQDIXNn3grIwYa1fX`~m)26v_1 z@t4vB-s#011{$+2R(Y2*L#rH(g$xS$+|~%yCcerM_cA3kd@`!%XK?#4j8rH)5@p2z zbiYS}{KQA=ku;i&U=33JbuW1{+|Q1!GzWk_r`yVs;n4M%_#$}@S>Qlvh}N5)4tCT2 zan%K!D9);w_Dnn(`8*+ql1;EKo$@Oc!uH?AnSYimH9M7ruB3}#= zXzvK`pQ+}OezJ>la@C_&FrADzFzDf$(l{L-MY#U3sPuB={rT+9e0hYmj_qLga2XXb zDaMug>6uh64@eD04id)N3}(|Fu%4Opo#33MYIaR&)V@A-1Z9KSa!C&XWZ7*6pXH6sHX2 zC~6-0k*Ij=L$Rx3ovn+B?P5YTD|U&JdKUwQY)^rjRz4?BjTG34^4Z0yxt2?Ris|A+YJ;lJ*0L)izI>OLG$-Ixz3N^=u%n`lXY<|p!Yp2(-ftY_jyaQP%7W)!x104T z33sORHY)%~Q)kF_HEvOE1*TbtTFIUw7{c940yblZZd_o;Tel9ok0gqzC$!bjLN%|` zEn40;T)UvX{kOJ&?Cf%_w)&J|EdmCHeGU=g7$C=&Gqi9os*Ss+6W>%Tw$!I$vEG71 zMu_PSl6UIY_k5wdKgkgE9ZB2(U}}ET3nKmyM|KBBxYMWV3fI#@=DJRK5UR|#( z&r$e75JyC6(5tCLi2B>_QD7kCe{|CuP@2<088e<=TmWq9WTV?p&Gg2%9^wX|K}Ke~ zoR7!r)-Gr}wS+CWj1Zh=^4f+N|O^-eG)ku3VqE|%p5mSrZ}4I|br@-IYHfGMO)y}$QT#H}`{9&KtwS+9=;`w^ z>=G02ww$xMBznBd?8BZ5+LeShQ!xG2^4h@+L>pC5=A!$-mANo!JsL&yzM6T^sC$}! zwSKlpgm&;{Xz>aUqt&|*^_$^32SoN*E)m|TzI_}c7u>2+ys>h)m%gnQYkuQO{a2iREqC2#$igjB$bzEYxw z-Ug?Q%-#CBJ3wzC^hm#wf1F#8WyKI**L5+rXQH09?%Jl-O|ikdIZh)UL>qfU|2%s$ zn_td0dPBcjFBX&%+}t{Y7B6agPK%##3GLXg>e?IVM$hQYAIab`uCZE-gcT1KdcB1E zHgNUUsL?}j;Kmo&2fQI7;(X&2ndr{N6}kkKXowy!9mSkSH@oQ_zI?MAzRm4UW0ym( z#M@#v*?jEHL*MK=+2|^q-%Dlu6}oN6t$Nw)SLl9+A%KeqlKTmO&k zzt7tL!;{S$T|w&oKMwAFbI-;9xO?zmi~sS-{6FGGgE$0;cpgpG8r4Ikf6(QnvBAY^ zRknva64nfz3qT#ZSgs*ogaQrh7~6zV#q|+CaF?m{v8^ge~N7y@(gz_0h%?PO_FK5N1Cozww^Gnu`N+*#n<||Nbqplw#7&7HiaavAS*!Xo#?@IFT6q9#}l@s6^ERigM4Yzi6z=kMM5DNAi|p05-> z{j{hm1sWVQw8LJV%C6qlwDUAn$wu&i&37%O% z+$Yy|FJhK!B%HpRs)2}SMYS@&|Ep3f><4orw3qdARG* zn{qbs`^Tq&U<9(SULsSdfEg*XGIr=mnj9jG+2F)sf!-9wBFTf%tqQXk9QkwH13Zi3 zaLR}Rhu_EuFU94U0qG*Yazlu~em2705#!6RaC|Ix1P%-0LFBZtB(qiO%lQ%#kE{mC z50EWn4Tu19u-@zF2MwET*sE=?BcxX>J51&>kY+?5d<(pHJs!ss1=KD-Yuy zU5K+oBNBr-C(A*Q)Aj>IBFJ5A`^(~D&SH~J3e1MWx7DpW5j9poVSk0C1En9DFeF?! z&1ad<-l*UT4Q69bjbt@P%I4BbxP@+T zZ{d!5OLd{VSR-*-FNs8P2`t$M3%prLo~Aht-2 z;ni_6Vz_;%<6yFe)Srgtlp(rBH)#kp)5EKvm#{{8#t>CIlru!ru^FsK#?9k1t+)fE z#(D`g#Yd&T8yz$+n>m7i9W{pmCpWSBlrXBp~Dr^S`driFzj5_myK%HKIans_}Q~wg8WcxsoV$#e<@l+N4zDoboX|7yRQA_S4{5ycGF?Q^7e%=I{>cG7;@Q z(W=t~x|5thjx6;=6Ecxl>!f>LOo9D^K*^h#^SopYAbMe}gcx>Sn~XHhB5#zC#`lD4 z1vB@Ay$NnH9nHXVz~lL8e9NX;BGA+mExjqO~Cmqkq0T9v~q%H4!bz>bPm zhz;a6TdZ9j)u9!$y+S8sY+W_l6bddVT#rG_TMsH_{J3YHMCb{OTsa!q%>hI71v@l? zscZ5FO$C3##i9-@*G3r70CG}Ujma^i>rpPAp)$nT$OX_rm;9+c9}_Avh&hRfZIo-0 zMSKMZeyUsT0~WiNp|R_4#nAUM*nz!}U&Lwmog^IS z{{7&csrbIzVW#=IO+rB`=-yxHF=6{z+ZMSMlpywZ-78ecg1@dQzbG1_XpYJmFCBXOEY zC#nH~nA$Z3ypznwPFkw0OAI_oNh1z(D6SPwmviG%d7CBB$d1IrSN^M!hmk*ZPgG8xyZv8?Z&DwxXw` zDka{o0EEwppER<9WU!hc6#Z<)SL-tmqvi92PFkzHgsZaHJy{m{8#MZy>Sx0(Lnk$G z^WA*%&o#x%Yyh4_#-0l>Q2YvrN*baTX>#&g17G6G9pI}@Gnot31a6Kd8?1%L)RTNr zAF3c2pFzhwGAK-R;c);s+5d4}8zBfKEgXYf41<6-Kz}cbVk*hI0AupaV(8@L?oxC9 zuaL#l&M$vdc(S%ykxQ19sL(G0qMi4eUj%Q>h<^*N2@g8>lO*UlfM`!hD`(ZVMNp0#(6f;pk3g4}VNg8I_cLY$G>XD~bel-3i}`$Y z9-u^HwV)*(ii>;?-ui4k0%l2I*L{BoUh{GhMp3(`(|;$lxD-AxD=sYnI%|C?1kf0l zeph4&vlvHn=5eYJj4ny#p^SrY6 zTNUo2g@6Y=$Va{^&_i_BEPURavPUc^1g^2LRkykF$l6G3%KD@e&@>S|{6|4^58V8p4<6jz`u~3- z|NkFumH<>^a{N#hOZX0SEL1661^v7u-SPE}r%)7&51A2lcF4;v;0{b7@=pg(xY zK$+y{+DMhI+$0W?dxcg>Xp^N4U7)kqQu*G-8X-w#1~e^>fd`awqME=upTas@87k8GdNmbfUS&yfdJ6FlZ$VG? z;h;S~{08!j{_y!F8g1h&6^qGl^p^VJ*UHSfQCdnD5%ZSPQ?ksGs)u%&C2xoWdgCmT zf%#hrB!BodGD!aRGs^w2O)2-erj)Z!*l#I)+-U)p8y2NF^U|n=d-SbdypHRuNc+ZV z1m;_cb^tcUTgGs*I&L0Z=CiZHkZ^V!&MNmqG_4%Z1t<5V|3fG#oOeIh&HLl0j^pM} zowx{6?3L5`%&HkJWj7{x=KOL*TcGA*wG%2}HzAdr&;{t9t=i6a+Z=e?9C+Itc-tI! z+Z=e?9C+Itc-tI!+Z=dX{I9M4e{28S{`>g$KP%f+yDV_^_P@RRclW<>?SBV%?{DpY zpUD14Sw33Z{W!E|QkxNk(o(R7d0vE$%8Ov2UyXo-7LrOli7WVG^2fF4Svaaq=Dheu zB5+vkUIpPIAbm(hO3GY_(-!dhB|Y4u+lyKu@Oos?#7mDNO3?o6R1bQ;lS8B9qa_vZ zEA*?K)T;b$Baeoaxi4Zev$o|HEoDl_StUnDd&j-xXdnJN5dZc2{o`cU(tSd{Be@W+ z{C=xeH-B)PfhBm zcbJ0)zw+D(yFD=l!-i_PI{V!xQ^hrE+#ZTW4%$rJb=X9I);_Ymd8<8_XHv{-hR{l} zvwhHQ^*`J9-wpLY-|RoQ_ouu6w$=Y^^*@gOr21*$PYZcPBi90j|q+s zP~deVwC1tLH~O}WU}yszO%{7wV;6{{wwhBdglq>bS*>*Jb^z5!_+fu?lDzs)fECs) z#X$m1GvZZY{;&&8P9QhgjJ-!BjrGO;&&Uw1UL|%-tS1Usz=G>`lAnoBa)efC3;d}b2;;!2Qpl{)?Ip9&l4+8ECLhHB+>?T-8{sz%sLsNXPH9B?S!EwG{gkX$#E9~ic_aj z>NOxroG|B2Ag-pmGq%5(~cdGe%IW82ow}m0vK+VcWhY!EmPlvCq z<<2->FRS@dk&@zM^l8dGM7%%;>0BexPMnS$L2?df$95jGc@7 zp%|}MMHxQW(8pFhioYmuNA~#|2qEwpTMKh~toasy=!Vr5`EWSP5xGVil6d6u*B{GygTX^1Xx@=v)~ z(Ih_u1l5(506;AMFKh7*5F&?6(IrS`RV-&!9hq)UrG0Gs;VXH5Boq1wWcnY^1vx7A zQzg%z1xZ$11^sdor*ZE{A*X%*GW^|-Pyc$D{KcnvhvV*i*?-4AdB2l==uyDc1?dV& zJOof>kV-Y}Kph|UNq(zZB%y0HA6Z|ds?Y2;4yM&~oAaqE#Lg#e^Cv!^GR{%GgeYpW zSy+*xDO)rqpnB_0;;G_K_!*?9T?TRQ)usn8Gs>Rj(nGEI>2wPy)iq70MgOedOLib~ zyMnaaq!FohvIc4MkrmKq4bN({n2b)YO~dqO78EU^E+}Row#Gi5G`fM+qFq7_dti6u zlp?GrEJeS%rQW-HuEvCKA!})YMZB{!A^7brs&c{WCA+!rwE=lLwBC8vzvRVZkDFg>h71&F@3OU6zxx#2Y z7SEmGeCud0hqmG}y^#G=-J$1yLKW$DEhfq{BdEgRs#Ny)wLW&za6sFL6Je`0WpDbn zMu)LthHhba2U0*_h;-FzlohU(dM_&UHeRNebg50gMwM9lZZAR2SJud_CxXF~c5;v=Evj6Tq*xG-$_Fu;a=zAlwmJqs3sYqm4D$IxyQ184ctMh)|@YJrq=XixW{QCn^vNy_mI&h zXpmJ+x-llHsy;Vu;?K#xRsGL=V!<{#WU-uKNPrx8zgh^2#}DPpj#@F{zk7Vel3fL! z4*kkPP~>w#nZC?tQf z++V>5H}dA7(|ukrAB#`!9x6D+0GB(JA9w!vo{7)yziif<9tn~c5W2K z7b0oZrfWe-sfK?rZV=cwVjimh@6u zV`LcVqCTCpMh|lBx2~E9`jdOU3-s>Q*#&OJpZ1~;CyX5nNQm=dcKY7__! zeebvfjnr@9=V6C}I6{c+tEX){bmJ(~7+M^l%;Y46#Y}ZqnfQMx0u>L@Ch_)dGC$DQ4O5%YMXD}MQxHJo4}4thsScV{_@I2B*z6BxS)sm^ldS6 zO^;O??J#VM(gxC@-jZ-ZMlVB`fNE<%Mpvr_e9&uf3*r72diI-=B|bk@Z9?d(Z_2z8 zdB#fpX^P8eTIm_?I%Iqm&*jiMrSTM0s&Q-%?4R;Vn3=)EiR{Cegv~5s--5-MuZG;j ziMS)nsag^?i>z+6Xnt-wI`x~Xy<#M{fov-C%cJh;dOAfy?jFa7V0#Vfx|mM7xth## ztgB&SmHSc|2BKF;K#*q{0Wf#rDYrA<>WHr9#yjCWx^+pq)c@hf@>zKLtM zQ!kmU!-u6m@<-L}_8OYi@AukrYUc4$S)9;R1}{vTL>Cv~QYBv{>0bYS^3_+g?&|Pn zaf%p=E1w{dhnC1&zk0}?5Nq687!2AE7y?5Pi<46()`!~+fJJdj*kE(0{ zA}NV-V;i2KB+BNtL}^6Y?)lgpbP*_!)x@o<08+H1`MD3Uudwg453x_O8JE0PR-r&r zvb#;snHEu18JQUw85t27munFkRZ2F+T176EOcmO};U?tbnoPA6H3CE@9u6V(S$UVv zaij87>cMJ{@D$xwi$XC8QBza8yNJijB=@-@6~t;&tnqpg7@jk`_>WjGKZtpp=YGc- zJ;P#XhZY!1qG4ifr zrO9T3%7d?j`uJ7eG*=v*MWSjXO7r(Bncc~`fH6r$>hapk3@?7LN;kYd`o1^7{M>*9 z$3~V~DyolSO;`<&2+0=WX|+qbX~yw6fBKv%9T(0#$i1JS<0jxJ4@Tg&IqI;VCgQCG zE;($DI!d-w9`WpPs(-R<`0r)&Dsh_k2br`?e0q-X$ne~(;Z6T9jsIIgcwHP{%NrBa zO}&q7Q8Y&a1uGdxswQ-mQhC=ng&724ND5={FffoE>+2 zyG@%-{M2#N{iod(co*tzSj4Q@mMyK6do%~JZEtJeYRa;AuVb)LxzqeRHn6v-Uyv2@ zSLH6#;@EX|nctWw&WjQ=W2eFd*(q1;^3E!)H1AZap~k^f_5rKQjWtiqwNFcC9`xB- zn^x~u|CSUo>~*#z$-Z}9ad-Kj?)-m$fBwJUe*5(C<451#`TySe|2qD^XICd#I_z^+ zoOM$`Rr>#KKH1*bctZKV#h*{f|5xyYJOAG=6zyh9UFz~BH8CHz#PdG5J&%(` zGE|)`lJ^V4?Uxh3s9BwOU9vuozfB*Yq zvTb*^Mia1alNnNX6kW^V-2Ub#q1?TBchLV~|KR8T{?6;&Tga;hE|ms_lAifu_vOxy zuMYb^@9us7!{GtxH7^ej`Y(50y?VCu{Ll6q%zm(kiW)r-n*I1nD$XafS4E@(?7ZD47# zc3S>n=b#Tg_5!#-Y7~qAcAd{C=@^U{cxlqUjIa1G2^iBsCCiscYo^OJKx36|y z>_&~lAKvZm_J7{_FG(aSyt|2KRkxJDgV>NI$ojih|bv6XQ<@Ki$#{6EEi_=G0AD)UMTA7BONk;*$F64fAHBWRI zcR5}5qKYwH4Z?j~M?&qFed%9Cc+kNqGLR)yZ>7-SZZ0cHjgBI!hI?qv?u z)e{(SZaRiL14s8S;4kO}kxN8{@`S{BMZPlc3Lm~4m$H|MAP^z+F@9RXsp#bcW6Rskm%KH8BCIR3X=qMnl8{PQ>2i?VDv(i2kxOZWAyJ%`S*s6eL2#e z!i*phhjfKxH8<`{nsW0o5VpGdCB-=NT-KbnJTSN#dYku1LQ zBO&cjMuxKQ-~;>i6XKbQ@2BF7$lZz83uwZ-7>CW;az+OAy_1W0?3XkAY`8jCZy|Rf zuckQRp3vX0u)s!9kQ9&6(`gDU6-r#h)6*qz4*C?op;d{hQl}Pzl7e^yl|E=XHlMZ# zW*2{HNlD^KDRxvJ!UI6k3PZ508oIg1)ybkX!OF=?3bO)>ZRG%h^%5GtEjLs_cZgKM z7l;(+56LV+E+{~BHBAubHO5?|Y6X~H85#k|(7N)O3&Q&t7h{voHky5px8z9YfsBsBA3sV%*+ixJnBAiqWWaV^9=6Y zN6ilzMq7RA1|PTtQMdDG^r`8W%TeP@1gb6s3y&{mVw(@3j3E?K1;ZyCpYhbmU%^EZ z;bhN9>^I)MhiTlfS$dhC87{)if8e$=__ zl5`^OO9?W(ox4VxcJSxj|N8mOyBDw;!-U(Tlt*I(P!HhM1LTKR(}UgDJNt)w&&3fo z8<;V1@yq5ckaGS6JQic z%^({C>~BkyY|G-Qj0oFde%1Vwv2X0<+4CM)_z{Cg_PToPvcTkE*=7nk2GL2H+v zUF{=SxT+w*rNaZ`3j}o`N6`?@pbJ1}=dGX*nz#4!&>KHHJNHMuD5+vWCM}qs;P#U^ zb&rsi&^N|o>Xg0?kd;Fg4``z97H~X%H@f0-pjBW!w?{irN6gJsnw+&XrAM%+c1v`r z7H~F`_}v4{R_uy(TbY*}Nu&RuzHUjdW+11qB0gw~j`S>={Wr? z^=VvPkagw0rk+|#xFB{x_lN>5zwur2(!8KkbHX`%L*^A!wHTetY56h!PfA>dfD84p zd>T9i^sC(r3tL2j9);4U@k_qAw+E#Mqdu-?>o!c_C0@9-SXEQc(95YT@o**EYVN@; z)_B~7l;3psCxogvot-Tf^U~WOIFiU>ugvrEYF}UBVu>X5vuet(q)fJ$iEf%LbHb4~ zJsBslC&`=}OJHF|wx1TMaxLvQKvwvctFaY>4=fXJ%Qn!KMJT2YU-Jl0JbEu=$L z!}SuXsx=kW2F5S)YlmfCIoQJ9{YRa7sYo zYXM?=1KW#1dl?(`SjoxaVLW$M+;Oc7nOE7AQCLI=2?D?5UDx!e`O91ExV3r`HLyv|bj;)ig{@dZ@C9rKo1G_(qAFlgpV?g{4Z6!vbR!L` zm#$?Yp;tP=66BY<5eZglrZKI<8Sn_Su+Ar>0_u2T83i;3?XiaycZumq>)##3GyK7wCb*;S=0;f+IBVGcPsHN%&wJ%rpf(K`?lK%Y>*VuQ zE>9r_JHdjYF^XgJw)AC9@KYuZrY-HBnl9rUN|Yv|&mlOhpIVwiIR_}+(JP6z*4LG& zi^}QV*RJ*!klt4{wt)K7+ycJNz1zUop_i%4xS^3l$k)MQSQz$mO5WM)FXy z7%0B;H}wdio-0A{0HB*!G4Y3RwKkuP$LN)9G)D!UhpXYxO;h(}TQ_@f?v_?{-HeweM03y|roV z#w+*B`WLWeWLzGa5w6|_Eo&gH0PjWnlk|Nu?0F3^9umX29@3cIWDd^ZLM61F#15WA zPnAX6nO@nY+rU>Kp*w#*KPhNKa$=Gggq$F++N3NYmSX{426>%IS=Y%WR(4q?(|++S z8O*rgw^~2Rm!0G5otn2ohhekh6kn^;dX;9wvJMsHBFjoDDixzRU*MZ&sk?(Y~dPxTE4GZjqC21mnsils^c}X!ZuoK>lC}O z+BAQho=?Se3|=Iu#o}GvqM@pq*jk>`%%8HaLa_3)+AqKtwcKWgJX&;u9s)}4(KkDV zFZD6v3RTf9@mNfuNR0ft99FT2psYNinks(^Mk%&PHNm$mOXp!EhIfgvvG8~{oxV5R zhrqWY$ui|#QN&%ypd+`=gzI9lf~d*%Dp4b+W}qgJJ@vlIqpsfuTcBgLR$1vX)+apV ze=yDLVUob|r}&reR{!Bvx742_P##miJ0A5QZ2B+dr2_G`g_UcGL= zYHznMwmfHrHT+`#!{j0zP&|6@kxU)EqMUqw8AFr<#t2b7oY>}Wxd&%DFlMBy;+P2B zQIn5LyvBkckTUER4%jBk;jX#R>7KFKEv@`;tv>x)At4fd@x-^)54P7v9n7Y-}Naw2(Ofb1iA3Rabb`+RiaNqtB zf=!F*egLP6p7$QUX;XzWzaK@hJ!UVxoMnPv;vRB(aF| z^IRFdT$<(_LW>#a=dh80>chpLhXeX-Aj1uhVv!R5hu>vLR%CZtC zh9}r(Mhf**j+K^Tzc<7WPiX~$*za&9f(-w}&x|1I>9dgyPhMHOLowEw9(lp(eu=)R5nq+rL<0pGLu*c=5ZlonKfT65yR8&{~MrGAw4B14@ zu0sz(iI+{lOKI!-!C`!A7l&nR!6Kkmbm~)G)zD(0KABfig=Af!x^AN1(W;q%lx5(? z?xFg2wOX086gwkl9FJF~jtj!CR6*EiRrxL^LcM@3)9W&-D5B&gIZ|6grMI?xNfINk zHd}@a%vKXNNlcZ+U|?}oAs96Y*Os|@agL6CF;Jc{cWZ@P$Wb!w3O#i;(VmgPFdfRm zxbqnX;o_Ig$YYmz4jFeK83;s#VS*zX)Cy+fa)>f_0^Df>>?8Dbe~>IVHd$^B?Uc!0 zWnwnq=qh`|ih4?&1`97)8q4}?YA7YIW~VYoWxkwDVncFCN0x%q@bEb> zQHn#P6>!z^QS-z3r{0H)ZfA4!X_7Z_lIN}9g4a?4lm4mEDigih#>{iHdi~+6{8O`K z3?%Xt$?EV1bbHF)6)i#kU=)tcX>17PRQ#!(~rdP$w{btdAf| z2bQ=+-1MGl*N8i$>|!f!myV8$^oknO7o*ogZXu#qK|MlPoIFh4FCeyLct^muNWjht z|Aq+oB~rt633w#X!kTm5Kbz*4wMr7g6Pq?)LIE>KjXV%j&Cpao3?$$V0H)+1Cs$&z z;)^6B7CWbVY9jzdlg5ESGmUL?P{U-Z z1Gy>A6Guk*Nx>~_m}dAkP1ZM2BMK-jx4U<$X$(36`3 zR>$tFWMme9YJJ2EDnX()_mGOxYhIeigD z2^fRI+4xOg>?t2C64(xK@husf!W1~G4V@W#K>ZBq8VSMR$clShV+b-wXHbV^d__>< zby~(im7ltD)VJ`7=4QOpuR>^PE*fxLGQOjW9>bYZ`kzO}kBy!@3%zlEo@ zgNZQ?i5oX=WjvIaFIu@?5?aT4;Zi>ERErCVt;l=CcB{uip78PNYep@%^wiGoU@px> zO8jLxFW`vl=|4S=cj=^OO{mIAjeRoJcseiv-!bSl_B+hGOt2@Ohg@69JW-7Vn{{3M zlnGfL%P<`nk#!-&!oBZ8sH%Js0+Kg^#ikl_*wR_&sK`b+uLUU$YMnf!gzsw562*XN zvD2mNKGhKMMWn5TT&Evhr)fQOA^`6VYXMoTHCF96HZ-ip63dm-tOB;88y@dqh?lJ`}Bb{R+Q(wT5Yhtwvn7~;GY-Iw`*I> z>#mE>)Ca5MTJ#VUfZ>w!P-kAhrA6dd0uJ46PTq|JF$@nlc7-9xrKLl!Y8XLj`QL%5LtAR#eu(`fTnyU= zN1IMyR5zFOu;pLUd8jZfd1qw#h=bM6O9y2CB6DPj8^V^8 z%~kbUy}Z*i&esBf604WjcqgwkjZ+11Ew3pMCeFZbohtO*3S2x<_NSWOwbw8>RcC~K z?pqSxx#V)y!o;&?&R5>)%uXQ3%vX77!dM}zSjU;PM>b!7|m zV_!8`sBSch^0cXa8215IyU<()yb;jdIpO!SO8}~0<86SopUi_qqO2$Xa0GA6MKHU> z>v#dV>j}r_Me4x^M@{+LW7Py6AkP>4Rz2{1%x) zov`3k~G6W>hrAN~KUyI5v?;Y6a+l?J;AL!~lm;HedJxMBzDA zDg?Y7b+!kG+;GQW(1?*j3fbY*_#>vI?E0|S-9FrA9UpTrBF`JdDN zNxL{Bn(?RKc@W5N&r`C?`&z=gW(8IbXneF)t?b?C`rE9D)5zLro3@@gTvQBP8Bx@93YXdTq7~*5SG>GM%gC3K`CgG+JyIh0 z$Y(OKy&F9i3fDxEl4S(V7OI32p5TzdjSH}S8PkJ3&|9=T{N}~Z60a!JBg)Do4kuS} zX&rhdeXNIZ?V(QFMYv!3f8;XWuv5Yl&X~FwXRcnD1WHgV43BZ+!;Hj z5YME-rRHGw)$a2{$~5ru-J91uIzf*)c<256!|uCXtTo9h~_J`11}SmxgTjIxJni!$+|fpi>c{o zkPf9O42VbVbDRPPd_iEAq`>2{`BG$<0lj>lg0gYbiimroH= zsZq@E-^+JE#nd3H#f0)$n?&)AAPNRu7MYtT|B6_>MI`#ElI;c-@8ZxA0uvFt5JyW` zLf9*91!Xn^ueHG3F7kVGvv(_Tsn}JiIws+C;^9y!9$XH(%5>V|-3?#c_!096($ zkEP0PU1hGifvKgQ$>npIUTT?OZpRdJEtAakOf%Or(cH*XQ!P8LZ@Q^A;auC4vyMrp zwrOWA6VFk z*P7?A2}g&iM3Z)ut=G0ookccLX&W!~@>1~~YdUt724miKsZ&4EmU`Il>73ar6bp`S zgX<(;B+`3Tkg$hQLB8)Ae9D5{v%-AKPOj^02854ey6#?pw)FyM6x%nK3HC-%Fo&v- z*bNE0Wu6!Mstd2NgMKo5hk|67PA%NvLn!*)>>>!BMJ(6>&dUt#TjDs!YHJz9MFFQ@ zE@o{rTG$`oYlKr#faEGdcO%fvWnU4C{;wOuTQj=8`6KB{#YKdJRMiN8zr(L`@$Yb0 zmU&WrYYnYs%)hlFl6ZyOsA;YkuE3l@%&4wR@GwFUZ&WWjsnCbx!q7|5-n9*6gZQBQrYbCRr+LPr+-(@Yi4bDN*4?I|~BKz6-~ggPck z+$Or}a;ddc7TL{>)D;P&p2~8U7-9Xy2qSIP4?K=5Yi3B$ZiAKxBa>!QKVeBNWHtMq zn>3-&z{q^yDK3^y`*Z=g(b;HG!bMc7;4D&k=*L2~efRFA&}gT_aZ-{eL2G2i464PB z3lTO|SyQg1|0~PCp->^c`&`T|Sh!gA!Uytyhf6w>0!TDhjZ(SE~MUh=8m zW2PjQklR?9ClB!1Ug4&!G9sO(T?p2$@Fy?zDK{;D<`%R*aBIxElMlVwq1^JaWgLLk z(7WZcYz=c617h^%(&~~<10FCCbi6Q=pcwfr3t2Kb%2}p~Hj^u-GNs-~W}e1adfiP$ zGT-vl$cP`pEXRWS{H(5*K$*^}vLwnC^rU>6S7%Z7j^CeC+4jfwc(&HcxXfd_X4d8M z{=M0keH3obb{%Rb3R1ZqvD^_P2!dnCNC6JK&+%lby&)`a(=!=OXpcLpm>L}fa6N{9 ztxEA$Hc4GW)n~%>UmZFdau@k8pED4=L+2lfNw&d`d3evmzC$h$%ysDbmrns;_&o`( zL;vI<4a0|{!LB^POO8GVk({TgE`cX=n0EsbCK5^?>v<>+3=!M*nS6&&3; zYRyF9U3P(MxUm1TWEWUTDR_e)e>iQK~>K0xx z>a(YKN^jclxYRnm+uCTY?G*m%;BA|{0;n9juJTTkG@Pw}gw%^dzZ$o}5{V_;p}ajb z7|Il<$xv63g{Qzevt-N#B7`b!XDkp}`$Y=X;p4#oykGIc|bj3+-! z=lO5l_1jo3`5FF1$xhe2fA;dgKy)atlQyFQFkW8?41ZFxbSl{^Bt;EY0ovNQmP0wN zp+g2$IS%6dYC1?~jfSI){IGM-7o%Fa3t{BtR^n#mJ}mKKhpdWtKRy>43zDO()Cm>2 zvUG-V4`^_7Mwbjd0y#YHtD?irsSq3r)sr_n*sGlgmeV($B>6)7T%(d0dHeNvn}=nU zqkE5xL}0!C?+05Q%i~-s|H_S!mI9aS%-tl?+ECVife36X;c+jpcQRNgf`lu zVp^}QQfzu^cI6+FT8GON){bH=B4V>n2<+f22HQuH^)_mTzhY0yn!v=dUf0mNR>x5l zk}mtO5okQpjE3nhE4+VZ0}?EG7HnRsXcd*SR@^UA!G#}w`&Er_+_IQD6_QIOX=ocl zPvucVQo$`IA2N70i?#I))$f*5k~WbyX&wqJ0iGiI-`mDE`X2N(Qc+k{^IUk)m+08` zDxW_S9PJdsvIMS9@NYlDPQVg{w-Tb!_W=7DJqeW-&{Kc4v^3vSfBCUT=R_}vFXMC} zd+Ff6lmQu;V5srf;$cQ5X_^qH2-SyHxa^D>iNj=d2>Xo<&?KN$|bGv-y+S7;Mc3ny2-kOaF!q6d2M2&la$K7)0+>!JjPxP-wi#U2A^ zrisvW7VfaTNz&gzlSzW!JU;(-2y}y$q$O_4O+QU81E0ZE(W%B*VH71Y2sKs8tSOSX zEhOVXZI#4;A(%@?DWvssYDN@g3Nz8&nnDG)$NW>bAQP0qr;JAYqu`xYaZv>5!>bPz za#G&n9Xb`#MKbZ;>E90y&eOT(quAkG57e13>k@Km1Okb7>JJK?4HL0|Z z>%6O_*6|Nbu+uV)HoJP?;B&m?qK6&$`_oDzIuC9N{$F+N=|-FSrgdyo&#;G`~-|zLm&cn{b|9Bg}|3MsLN%ndA*r5N) z{@vKv+SY%=^UaN|%}0L--hT-MEP*wN?tiE2zqJ)iK-TSTKHYxu?dI0gjm^%+9Ygrt z{D&6GKA0%m8y&Iq5A$sBP?;?s(mC>w-0#r)F6+#%uCf2Ox3^37|K|3S$D4oHe6lIT zpsmMGA3YKK|LN0>?LPz?cl-ad|JQ6b{}Zoq=UJjH*{C*wE%^N_$Q;2|>wArRjkoe1 znxoE?feBiiBn!wAr1eg^$b%5CdyTfxP$tRvIV9T2=i_v-z|gNB1xf8Jg=qi({a^n# z{s`W@3*Z5?$fxmn(jPcYmg1{^EZZ!(P0VfH3b885F2I90OHWfsDyFk{(^Zcg)xCz8 zKFkV@_5D8CANqZDZ9ARL7Idc$U-46rxv(++&JqT&xWZKX(lserwD9&Anu<1PlE9W= z3VMZtTi8bdw%?E7qkE0PIL`9`7vwE&Hi0+3Ql{TH345tn)Pp&~&G1Pw0=?&ru@LxN4AW7o^uCw|H!!QZufz?P62e;D1B0e{1#cn2+ z1YklcC~jyxfhM318y(|^YH{SW?t?pp*aA~BxELEjkuQDteCPGfh#J@H^->PPe2{iT?+egi06^gm1H{}+UNv)c8r5NwD}lpZ1u{;s3l(~>-Wt47a@V2ejRlfZcUy|`9OST7qOf4H!~y=> z0c+aa#23S*<*x@(yOYzx5gVg^LJpKx9#WUHY?wb3+-Z_WCWx05Q9jp4Xe;>pGG9oy z0K7H2Je$E;cV_KsYD?D=@9DgjhcMEeB)S~Z6p%<0D3<|i;^do>=6xCnFCqo%_;Eok z_zuF;N;krLmV1TU2^zGqOK|+gkcTs1wqCb(3uX%htyb9-F~#lpY(N`UU2l_&ASJ!;K9!R3;1XM z%_07?yMs@^hX46*_@Cy3<9lubXmnegUWKzc98X#-r{8v0_TlZL)p}cMI~Ujdc5Q%OGxYq2x%eHTH`&P&CGVjm+@7uj(=PEae?7gn%wIqlmk4;#6G+{ zOBYE#j|W^~TYrSCP;+8vV2=e>6h4AA#g7XN#K(9S;N}dU+aJS|K7ZF!j0>Cd;}AY^ z*aFXTIS2RX(177g_a$YSo-M_ZgaJ<66DHeUimAcYFxG_>;7Ou{?MUbp2m70>&Un$Yc?$?53W6#J5C zO~`hnxy|6Z!2>?ES*89U!GC&Ca*oRnDg}_zGvDD0bUwi5*T;jT zYxj&O@}pR4w8+Zn?*Agr(%CZCrv<b}pO zc+aRnDqgw=2Y%P5S76t2j}H6TFhMvI_ZcP)7K(4Zed~tReVZVhzR#1B_@rIdH@@}v zO|Mq>ZGy1+_O1oRdAW%7?LBqk*qe_;4!xMhwm_>R53bhs@MqVJBangxQ0?ZU%qddO&!OykZ>!k|Ad(`(Tdg!WthLf(&&oB|SX-jyVA>j1;TG?wFYu9Y?GZzHB z5ae;^|8wX6^H1Xc^X<1!AAS4BtvmmpJO3ZY|0j#5=Ws-X?5#ShNMA=4y4yC>ve-&Cnz@ad+6X5>ckXH{u@f=Rtf zPUFFqH6KV4Fp{YQAme=yqSq0J`7fs`Vm)ezZ~HHIUcGv@^Zd`nS>jZ1SX*+mI3XHo z>$6PBbn)857>*Fdd7vf^LQ*byDMiYTsG@`*mW#w%-SF+z;%qj3cz88W4hC5|N4+Cm zTtyuuc5(vxubH`rLcS|8|k2k8l#B@hn~hA2Ds%F=`_v@6<&1C0I2nu!N#lOCU}o$_vs?Mgj*5 zniZL=7BMJ2;=>T(h8Gc{=>o5u2vX-F9xs!e3>Ft==|GP2xF77t`vj{S3}Sz?B?+)+?fghP-T^s5a1J?G@r)P5TP1DWvq4#1v(_+)F}3* zBk)X0)*H%Inv4Y#+<^EoF$_HlXv-rZxN8mOe+!2Da4L2kQd^x7rBi`u!Bd2uHk$|y zQS25y7+R@6SvAqhyXQ9_&bxtL*M|(mT}29z!u7Zeg2#sDyUXwUmwAu4^k^imJFmnqnvUD;*gCI?U0^=j;7h}=_ ze_Fu5G20IAc^WDjAEEdGHS6GShKTp5m{K2{S(9mabV--Jn1?fA>qyr8YyM5MB_ZJc zyFBJ4hJ3o1OjaVIhZc;`7@f@dnl;02_hb9JkALk%1~~O4=GW05qu(DoO@4!a)@btA zlfPbm(-;5$*OQfB+m{leDddp)h}`_6G_8Cb2mxa8ahxnZUc}i)I=w%_;r@}3{t-if zKVrc8MvJye=?ejY~J#Xx;B!@59^~iL&tGC z{WwbBe@vz5`H|wTKVroH$7P;;ge+jzV~XB$O0UcU36K!9{?xEs;9Nmk0*!cc!8KKujnw;wFAe6+ zwh}mSLQYGsT4A`sIva|oP`dj|Wvr+b*vkUC-OCgUsxPQ$7$s}uWPL(_CBINNZAE|g zFNgiZH~r^3hr8dudG}xD)Y-q`gWUTVM6bZp)H`z^D@p|@D68}L-1+(5`n|}VpT=tj z_;;ahS|6Rxte?N6r`GR@{cBPHIIuoAvwjWimxKA-{@D3FcwZcEmOGE`2QOxWBBCY` z?B5*D1~ks$d-Lbw{lfg2<@#silR?IqTtO~sNC`)!v@NY4bQ>ut6Oz$meKitDDeuWe(wfvz-^7OgKuELb8e#y6QNiL z-R3P8e8#k3f|o28G&yQ6*2tJD-l8%vlv%o-ifBG^u@GG>z2uZ9u&}O7HCTMw!v(dx zYc0y&hy#z*tQin{@#Y48NJ}L3Oy*}Tq2-tIn2Ir}a2w3`;@5c9yd-tYQW?Dbv#dm1 zAoMn>a!;f5TFA~Lma+Jd_nG~op{hNQj_DNcP-NNj@SCh-*LcBVKJxn>GeKJ-F^4NX zHjP3|C^}g%falpDjJbIsxI&OKZKKVa(k35yk}!ES&_~puqHkn}K{$cK7!vdhtdw|$ zkYlahH||icB~6<2Unm^I>3I5(zNs?_|H>3c1SiNq z;1-bzL#j6dKi*=`OqFsZr8>6hlH#FGaiMIOZ}t;V=^D z`G5Z(ZXt8`erzd7&1wsdPwxC%BS89l7B40huBoV0y_kTA|itiJS5f`d0edW$-$vc z{Hf%Wt2(9wNX$gulwxy8DuZ$61HoOzOn5}GlVChZ}7vgeC9>Xk7Tqo=4s zP(V6vl%y!t7nBx>NDy4#U?{<=*Ew<_4R*Sm^kt^{o>{$BkItZBKn#7|#rvd(R(s5C zNCd5g3xM-U-4IpZPZ=Dv4^Qbz%DsS!JXbTAZ7WHwBUQESklb0*0aVkVBQy*?E&cId>8};_VnsJnAHm8X8keWyMQ2x-c zjhP-9Q-Mi_Ca_4?KGB9AIx_Ho*=E!VOftYy`y}2@2UfrmWL^zRESr z2jbi5X~!x)F;eWB)~bT-Oxh!EVFSXLa5~@dmvoJ1cFbQB z!h{lspTRv~NPWmBl6qB3ti=0QdIQA-{=i6yXGvCWtlnxtz-*S+(C3mxWc^DA`?BcpT%_r-;c}D*+aD zuZEpsUWs14!tUTkoI24MbW|8umY!fz8#Pf&?kc3x#!>MG@UCOSmmJjn>IfblW3JMT z(grjM^=Vej(_2a$s*zjLuVOIY^elWdYIfo>Fc)_!m+%Pvy(RVGZlY0`dkIB{qB4(p zim)^W8h$n9*tjYZu{lpHL0dNchS@|70cyRKW`9L7%NUO*yFUqEW^b*AFWsvN#aqKn zW@t4(^wu%s{H*48zFKBjz|n>VPwbC$D%S$lef_oBw!9EpljknJIGdpj*bxew?3HW?C~NZE-a{h2PA) zw4MfbmfFZm?#f(bKID_p{%*ZYwbJfjf?(rArrbUIU|0Oi#s|EblWCZ?H?D1spz@%q z-W4kVBt!WfSXuE0yThoC@-O6*^Wd8RaiX+-UWx;9I)IcLS(3Cb<15KYW%86(K;@lU zDD=y!do;|>o$cWIwu1tcHyHrnapSdkX&XU!&u19Ig-&mj@=ST)om>UKB-soX4A0@& z7*`+cP(ty8m@8FhZd=xw`;t}+sv~u)b_~SyI%bS-S3xYZVpxMW*4P?>--=Pfp)5Or zVKqPW?n&B$RYO(p#H{EF(LeU$cO^C%&zFi!xze_4w1y9_i-cBi-rL^zof&j~0$)`S zZY8`gWy~?}Y3f*&ZrR3Dtt7)C_bckhK%gb+u{ikuS(IZjiI$-vL#zn89EOUCp^(|z zjMh?{!4=4__8JmLp>%@TBc3j+O>RBPB8=i=M*aiR z68nu*^HsXJ7}xl!&r>;>&GJP}A2R&5KZ-#d;xfis}$~M(A3mVa>G`vl1x%k z!Wc7rjne_7e8EvtE_QYMpU_A^N@Qgna@~Ooz>Es;af!OXB1}20#$!(Tlq3%pDLS>0YcbTHW6e=?JDQ=3TsoO77nq5|PPQ(yZFdU2JNd?{ zYH1;>&q}JK5|hu$_>Sc9nV++p+&%O2bc3&FUT&^AdgkNfHZTAB>r63hPjM@Qhh>%Z ztQ7akDoR+Hd@C#GRxZcN{Cr9|7pl z%Ttn8Kok|UW1sk+Oie;V?0V&hj$jmQDGEz?O-p|aic8aZX^+vVYU$%ndvUyCEa8zJ4D`cYrx2X_~YOow?y%yhzysYJVmlqBYb$x;6Fn;I{u30HAC6kk6 zI84MaAgKe#q|f{6qT)%7i;8#Tmu)MK1Q?9vgISK(80jP(V^DvKy%J@-u#|)l4J<%C zmtQCc7`(So=-uc%5eyIHefr=VenOq~b!P-_Z=_}_?RH>I(1RSs={RpO!gDp1IB%!< zbtN8OkBD4<7`fvvi;F8ml)R-D_B}jt9%2<6Jh8FH{nRmC#p>+Ep&q5j_Q`W%y+0|uhoODb6JW$CT<{eU zBjk5+@!bi$JF(iOBiZavI0>q(i{H|Nm6#F~fWF%cayD9=b@G1*#zRme(V6qiT#x(K z8OMrAmi%Lxf<`o*P1`D~0%=MAM=%iFY+l^x+S98bnuBi|-q8Sspp98HGf5e0fcM3s z_0=8?v#vEVl_b!_n-ak(VnqIu=>)upQ(xJD_%vle{A&du{>lLaN2vq@f4iW~XRA6e zzctgpa~>Js{h%VXaG@|GDx$Km0GV6PnslEACuO+mI%1ztv9eHdv{ul>Kl_wey6KFT z0*1s!fm%G=q+>SVSjvisg>n$w_GrT}2ttqo%o6R9KFWy-I>1raWUNAWaKtI|?vIuiXJyRL0? zcK3BvZ2w^`AiKrSZ6tXm@YPayJxN?kx<@f#q$+5U2WT4Y1XxD zs?r9l{;v4GKsBPB_)*mZue$h;e;UlVy36u3D z#kQNocG5bK%b}DAhg^zR6svlkBy(wpgS2;7o!`R#t-clbXWhL;OU&ICxXu>v9>3R9 z#9b(?i4U?_p7)a}#AkvJ419D=eVlu!)W<1nhET%);Zys&Y8>Fp1Bx2H8HB^QI zpSQktPh2E9M;?n^8;=)P`T_-H^_7NzgG6kWVOx;jX~HuBrb51nF`=o=Xu_m5V>4J% zrS%qn4-?=U;?t{kd^(*$!m(}u_aYFyGl;?DpM&ms#n%LMIg`Af&vL$|j*QPnk>`X=PK*<@#XyG3K@zAp+7nUFx=5^V=?}a-OtGL0e#P

B9U3t`@`SmqyxefIY#bV&LP*0-Z-+Cn~vJavT#3Y3|rv)Y#Jq46Z2NVKmMPSl% z8JTGWW2)Vmr>B!yIt<_QLF>1}K1b`CU}h+Ni$gAq*vg|h*?aDy{X|*S=&$z%(v9fF zhXGjKgeH!LK!DkPAL&FLV08SGjBA8FM$>Sw?dRF-1Vbvddzihj@`EEeQxk;}eb=#% z+z)<8#&b%7MrmpcuC4H!=6!WcI7yqZV5%FRi=X(Y5-XG%Cup7^gCKV`&KBBy*o=Zt1VyQ}EP zH**67M(gOGf=cV)*YY4QHaOCcj=gQ=x-$&HexL8!!OW0SsKl_cCdG-rs2e}&DTW`g z22dz%>Z%BkcmnZe9Snw*$wMcFQ9BR5rjxW?>@B)8ZvWPe+k*D`w)IAux;0f+*}A<= zB?GVB5NMs+!C$qT-Pz554ZHcBjr-2V{X4XA>og}PldUJ7OhB&gY9;=8_Im4y@~MXa z_JjX?1%*Oh9v(aa8DgWoNsc)(|Elp?`eZ4u6BAfC!HP`sRWCTfGPLB3&z8O)b4amA z3aINkK4RAzl_kn+|K)kUi!Y7Nt%(?p`*JX4ank!v#z5ti#J)0BfzaMnXm1LsDFV6Z z``K(rQ#TytzI{>$L}x2P(dy?UJ}-dS-m(my_k+XuJb|dt*%;DoVMJ*VB5G1&p~S=p zJw#v-MJk0T@ps6&kBm0)E>JhvL|cUr4U_*S8wGzZkS&&ge4S@tvDk$0{5;@=poJp5 zhV88lq_PP9Hv}vb7%P!aiCILC`fKPUvSU}wlry+b7V&AHO#d=>Le-s5A$wNE?aE<% z>Ru|~CBfn>NmZ~g&@(2O%;EtSy1RLPU^&DC4o5Z$;g%;h zoiMk05pFhRNcqfqYHwn@fz0}z&6acN#$tO}ue`-2V?3lt$nfE?vhjio zr7;;(Woszg!KBh$dENRwjL>BXkm2088zIi6opG2SVAI?VZ99uE)R#64e#S>GR?&_F z^5kQDV6@qEL2N(c@_x9TXIO1Y$ueS+vy6h$e4>tISF(U3&2_Uw1NKcaq}yeAXEoD9 zN30T6Y;}U41voI|#R6CI3tZw>K*e{0e0g$8Xi=HrUF;#SM;*a5#$a(iOHa?BsvCT! zy=Q_rcQFDy344}(AfU$r5MYCGWW@i=&j6GUO_fZ2r!qy3DXHkBERGq{OTRnvxaUAn zzVIDj3LK~xl@5(rFw#1u$}AlZ#yR$pdX0lN_kZ#cE*#V-F#9p01*(x%ftBezV5ed9 z;xeVUL3uh_QSBO*vmYUd6aRz>E^}&b(GDy+9D+!DRM>&vmmYvS7~EL7LRfi9?K%vY z_s<;$h>+6UT}vZQw^Johu8~s*Yx1ZBI9TvuHVdO>RLu6`(}2C1flDCTiFJ71kQv|0 zlch=CY6KU5+z>_M!^@eP>?Fxfkq3!&H7(4WWe|=wj{TY$Kz=h#j7_Roodb1EXMy{r z9M3vAR;MS^sY;epDZ0U4Si=w5U$8*vl~k*M4DT&__q4b^rD2VQ25_8+fBjeV#-z+R zLh*e|j3=s9gW$f!Q^18$ZrDLQeSkFrBsn=KmH+wClD@pUuM!+tz_+W!_Jxj437eYmn^Bik=x$aM3_#F})r})I>TR+RBtI>lC7h zff<{N^d?HFaY6;KRDoghpqQ`bEYmy62SRxrSilNrUXGYI0sRjZ$)mM z%*eLlVSTxK5#x-!I4+si3C>18aLsfBK^2$*XjlNle*=Rfz`0p+I`&k;>7}2;{rym7+#tS^%D+VhgzUK_vZC4H*Ad5w;J0EwubfC zvjnxR$iB8~T9c1TkzFje9#)FcxHWJwDt3guGwACTw_nZ@ahnw@Q&=HtR=qI31>){j ztqYnIkmr+jCYZiMm)>T_t8DcJ;X(3Un|Ilb{LI`7#-<^Mk=sH>uk0w0Vv8UcAr9fk zs6Ls#d3NVmzHS)$WhXnKkey`ABfxmTUo)?e38V3cyZ`>B|LZ*LJp7Nh@%tZ=c$j3L zr;iQ#uk7EAjYp64-|&2MV{3Es55fB{fq*5jP|^MGbp5xU29rf982skb?LTfk+J5rn zX=i)$$@cai@3#K$;Xm++dEXcEW8U8AY<4yt=GovOK7?lvb1s1Q@TU9!>C>m>`(OOM z`DF9ala1}io15EW|3BG${NxY8le_)@`{4gu+eQBW=#Kw?5&plW`TzDG|M+zK>9?JY zJJx^4{~i9{7Ybau==VGGt84B5?d|Qd{r?n%J;Cy~9zTBcbn6N5|EF8x^I+p{|9|%X zn$4yPOqGi*FUKdd;gYZaptA7$R|#G=OS!TN@j%p9xj;`?o&^JA*+ohR6{NUuy?D%f$>ln+5^A zrN||?iiC@iWe#!Emyp~wzKGK?=+L5xD!KCf#}_+6xc}4Mi@lww6&#A;&9ZlgkYqj+ zU|zop!k^RWaCQmB|K7cMJ_*8|*Du81&xN84Z^3O|=*aCrP8`ON6L@?6v+4AGwwwY# zZJ*AUs#mz;G~W%e=gC<2+<=R&2D--e`=g~$#FKtMNGH&TnB{b~kXP8UsB(Yjr=7i5 zJI`M220ae6Qm>xvJpc3V{tNPr_((Cc92O$R?rHgI?{N3ktJj`4eHueQ$uh_cTm(pN zAAK%*4#||jq=8NtSO@Y5LdIG~!&cxv(Y_fkC?+i(859F+3>b&IT(!We&Y>L?O8A;C zC-W<~LYdCZfXF?3y9*P6mr3Ck5og@uLGq7Llc2%vqw2jId`LbuJyEH+fO_aWp2Ot* zVpnEWDj89&!8A2!YdoD`tv%UP3GC03!8sw9Kw)wcd@0Dcuiz_c@K3k{AM=Kgb58TV z1wyrO0lOcD(Mk+yV;kcN{rC;+G+TBzElH|&U4*IRaN}7#^lYqF;O?kaVC|n)U~LaE zo5DVjxh??HeYpbWY|A%_QPT?72})-=99kuONV&{)af2|5e6zC#Zw=I05ziPsgF`wp z7&q4OR^usg>^KCl;L{NbLNFE#>WnV){v^qPy?cw)6sJ_SiJAuv-mKkIx{>S9bqntD z`}E;t5@%OdfMbESyXfn! z6Vuh4K$YV}CYRZtEuXZFG#b3N#;Sg;!NbA2LYH@#sF)s{9*>9Qi~54 zT+nL{aqY?UBF$z~h~c%9-^w}l0*Nw^2%F7Fdh~S0v&$q4Rc$HNpl43h3ZAaweg)oQ zMFiU{z&^oXShpo)c;+H;Yt5uUgx8x$(_E(dBiD_r^HPa_d(#Uj8CIlFAq4VYpE|*7 zp+*8>JW$9ZzeWcApaYDw-xoa!vA)M87MjI1EFe(6D3dzm`|9m& zL$MAr`O0M3@|lm*s*!p*l6ZT3XL7o;|K8bue_i|U(?{FiKKgd^&i;F6|1H>miR69=r+1v(rE{FtXXR%1w>W zrupSKJ$aA^KcqMLEE^AlQz0Il<)Q~j)1IT;h4Oc{TqJFw z3=UM4Z?yh0Y>cI~#6K9~pA(@-3e|Bbrq{jKXwx)kUZ7Wk+y)sC6(**PiK)Z{bp=h| z3avBE(HorrKy(%up~F0e(qZv{EFpqOEh7 zj1%|)-$H5Pc-YS45hylnSIObc^Fq~tNg{NK!98vICVg3Sl*zbh9wwy96l29mG@e0> z1$!|kXf~TI#6SgM0Nz%cq~R-mDi;4#KvlmNIDCA9aKIlDSBPG}6+ih6L{~w+N~r|r ziSawuXh#jj)GQ@ z&2ge$#p_?6eWBrC=?4ZQsX0^%tXyF;Q1BFH_G<50!B}w5XDujif@%=Jyw^axO(5989#9Oo5mW-r=t8G@|LDEORA1l` z9XWWh!7!8^qT~i=G1!ihEQNHoIrrOU&vy`Ax`OTO2h4aV3Tz*lfXeL3MOI`}PDm@; za!X_I^2683WCj^*|0jGMd>w3w|9>q0e;5TPR|~=Iz?i~w319a%w_CxmHxxjp5GEJN z$soD}>1SfC#WeH`@_7sa;#~hD9F*+nNR6f&Y;HqclK0?|I;ATx8pRZLbX#!9G2-kb zSzIQ9Bj)pjFHo`sViy&t1Z&~ui;VKJc7v^r4Z#tZnWn?VSuk8;Aj%1%xlI8ona&ge zCBMZO4T#;~NeK+f{9A-!4T|}V%t0vco5OoLyfPV&TKXr_Q=ifk0w*zEiABKt&eqpx z_wNs4f%O#9-pb6-;y>fNcnQ@_W2oK(W-Pe{j4zV+^B7lH2cMWx%HQ&*Xb;0m=entw zqjCQrwh~Q3(Y*>!y=RAlI5Pz<;hZc%NZacY%0ZrhJqiz^Jm|{;L%<&*8!n>hg(O(m zDB8$jZJBZ;@^m24Wja|-0&#Z1(FWP3>60MjfV3{5+8Kg@?w{Ez2w~Flx|iZ@;|q06 zjeu_SNoYXFWpbcXoLt9Ax_v9gi5PVeG0Pu!Ic>q&X5eM8o3A8OO1qJVDjLHzxx z>4J3k<(lxhd7!uT2NzRSy%JBt$U_UXoshYT^>6{*sS7g;K_t+XW)e`qUs6Riod`sE$@>m0)TB-j)g^-GBXt|xT!t~kfU`%K*&I`gj zm>Pb&!h2IPp5F(27CA&+sEtL#8QE_7Z2(=<^* z8UY6~z%-^NL}C=5&!EmrlChH^xM{~PGR!BAjUG|!NZo{qLa|B1)hKa2CT?;hT%Ft@0}urrq3 zqvA7?^9E!)TT1!ZV^TDs%E~Y{R7ed&2fh;+cY5TV9_d*K-bJ=9srR7g%MwlUxxrT9hNg?j(+V4yk4|x@#Ea^F)?o^@ z6fp(aRmMD@kkaLO;nkzO-=Tna9Ti8xSf%FZ4{ul1(Qt1;-O<3-RC#p66>5*KNkN;r zj>_T)$a_*g{!paRPo&lqG?z*}$E()(bQQy~Pb1j4Y#=)dAvlB+wZwYAi{Dx6l}mKu zEW>g9%awkDB3{&S4YP&@D{I<0JkI>07g!4&;_mBds|ZO)(nuAR0_xz63eB;|GEExk z6}9D7v4yjfdpgWcc0>J>+VB!~{D`PL`+Qz#{pM@@5%p zX(`shhawV;(+kN4i4K{>u%C{E0F?}rA&#?UW*MT=Mxe8nkkyWo*oQn6ZRZ~uLOx0u z@!%{Ou2Q&)z3;m=2zF%_UnUk;12=028Y-)4PNhUK17wm-QL#C>0svB~<{~l7?$B+a zQzB$z)LtKTAaINJt>QO@Sw!OSDz1tnP?(Uv>y?|boH>dEI35o$LmJYB*8aA{a?rrQ z5}HhB%Tv698>I{Uk|*!bB}mf>2346W?gfuk>g!;uRrcX3{In_rm1@3C%yt(^kP3&I zRM_=y2B75+<}^R~;QtXK1mL!C3Fr9a%!#K<0tFjG09Gcru**{j=QHDnPd3HV6 zdja(vSahX}0B4$NT>Ipj0t3TZyJzt{3Br9mK(y;b57Wtsu2$c7I#-nlFb{?^$Qg>& z5CY}yLD!yzJ4;M>YdG#89HKsDBMC-ADa_07IpuI6#Gs%o;HmGh2x$bs@g8j)Lw!pL zLQ^q+``^d+-9@=a&tv^M7V zvis_>Q=U+D2jGlVyE7H{rzM;f_C4oZGfz+DGH~G(URqy#M^o{^8#DKfd|#;K8x6k2*sv zTqo9Duo>>kgF9p62l#26>mecw2>G?x{s6J3!IdG|=32NQWr!tq20RlpaqGT_uhcr< z{EWye)A{E=0!AwY7QMX+-)ykvy%O0B7;og2lH$Ycv zkIq&chNHQQZ6nt94ACc{i57Fe@qMJB~h`9+!9DDzhfP<^y-;=1_<>BoVODVpRL z66mPBr9DK!6F2Xb=1dm*{2iqjG5vad|J@hVhNsR*fCc}~^mGdOL%2LR+$(t-j|rVwByZE&h7c56 zm=Gu2B)!97B{X_1Q5U)=?70lef0t=wml%SalalQ(XJyCvx?ReaXW1~xxK4=jHgX#) z=o$q3bgMT(h=JZ<4=_sPtgUB}Lp34P=LqUOANYW(cR<{SF4G|xLyXR>0|cK7vV+?U zL1;cA5c8z3KpGeAVaeN@u1Z6nn7Y{Jbu<~GH>n6KUkX~qR&Xkq9bxQ)(cz~R>XtdS zn=4y1L{oRN;s1J?)@`5FN5@cn1eK-9d~qdp1s_A_V2|Kja6cJiRX9)LlVogeVCiqj zN9y@>+DBA|_`j{-yoZdW}ci^W_k`>gh??pL>oFzOj(3EB{Pq5p#yh;|I>dii?p0?cf4#4QYOE zof3PJswgf&d~!5A!F!*+@&r0|oio-Ifbo>C$KHO`En*2r&4crFj)QYhkmeL|ey{<5s;~ffRsTSlGOEqvwX6ZRwSxi0Rr?n*oay}w&XM;1 zeI?#YS3`7NcXx1!d?i+$97pi}Obi}60>7~307^HFmO^>v^mY0>bY>jRk{ry$NI~mw zcIfBxZg$X)oQ**HC5-B9Y`6(mlU15lw?KX?wm6{<{gVBt3qtgpng$pQTXx`)KiLT6 zFli{jvUw=s11_;99~th!;t}Y(V$o-oFQUr5yZ#-uZ z@4C%FL-HN#eHhicbu26MLS;6Yv8~sw&UUFAS4EmLUEyCY# zin%-4t3DI&M^0G9`-u(|^B1bW&HKMk7R3k!>DXGlmh_(l#VasIEF{k}KwXPRGZH-u zrh*aX85{;uDaUE^0D|n3;v;dW3h7v53Lq$kT=b=JVR0B0X8UU1UWk-5fZd_~fN*cG>m|yBF z>e5xEqlpKvY2GK+T_t3xOsPu9XLe5!`tgvWA(nZvw&GXc^4nEQg9(i!4np z;<3V0rvtv^5Ei#chTX0cXG4QYbK@HdhUwy)?%#Bt5Y9w#f&=W+FL4affDicR-)xUb zv2rb_^eJ@cxy*!vc>uFuf)1YzUGGb=_>sM&yr=LFxt%uh^YcA)?n`RBOoQkgz3!{cbsPALTx4aC9BJfN?E4Wy{Z-v|? za0eLTMO6nk^31LiFeg5@ca#yYOcJ&pf@;(;9hZ(aTftTz9|MH#5Yo5jKB%6F9E1_ZSQ@H84(Z1jT zinackrsjMg&^I_ACl|@Mw^b5+V8m+$#Lt4S1pEKD8u3+C(M@hdmyThY&+-(DgX6V# zdXeTSq}MyS3cmepc`>Z#`T(TY!9zIj1cz6`f^qEm;^I|q4dzERvujjmw{Ql@q_s{4 ziy}j9u82{(R_=vDD4s8D=a#cB>`mJ&5av>{6Er5yFb$C*xr{K>vakEi=R= zefU(E9@HPs#d$E6-UwJ}9!Bwo@7m!Fr7@8*AC!R4EQw7z?t_b`OuUEQ%{t8-ZEf1} zA@gaJ1i|1k96lsRjHqNfLjM!0h8c9FT`kIzObF?yj~}enY5O;yQs>4wpq% z2jU?ziIyRp_C7e9)f+ywru#cSqs6cAfAu8aoOB93ny7OsCEPu+j;V zSih{iUg-d7K|Z2Sfq{y@{5KMNioplSP`Stx)EMD|qvTFSb6+3dy_wH>k;QSc6X{_N zpITFm>R&MYc74QM1=n*mVH_%bJic!wvT8XkVd)zWH-ihyoH16E?Db#7p!$3ZI(sDpKUWlk=;3I z;XKIV>8Uzk6;9p7;ysG7N$|H&167#NQF`1Fs{7SE3B}u`U;^JpQD-qj*~`7swg)B8 z6?$6KZB02APdmpB`UN6cemlV-l~Lc$Ed&yj3ZkK#nmk#UsjQ^wWj1mkfo&cSeHwol zp_7S`rE>2@=hqBXxsR47m<^2RO}HtBJDZ&^=Q4%$C>_K72V}DMIDq!W$;%G?u)!Ch zKgf-Pi2OM<^iY^~*aZgDD8!gQ8^;A#EV@L%%4iimw3X7_8j_qCfL}H%Z(>iL;#mB% zrQj%H13^l*-8F-0Ho*efL2{jUi9kU}bOIPoX3Hu0}Qk$evq zu;Muj!WOW828kWE3`($J1w%WQ}n0B zBf)3DD;V88=DLb+>}^NRT-B4|yuf9PCAbzWRoMmW{onwkrfv|bnVMER7Wh#vqTuan z^Cbyc=usY;5FJB9WJH3K%vEaIaVbK%fy%|@fL%nusj77p^RHM@XD}CMR&@W-ITFs?H-42-dl-&+7Y66{}cqs|n+LkYI+Ns7Can+x(Wp7r_Afd7>9tXTW}H zm~q#L-6QXv4;s3yZyN*?7Rr-2{}NCmxU9DqZ2Dm8{O?z zCwP|4;^6=sAOG^^T^A#!O(6gr=R3w3oumtJE$xfv zp_**VyQn3Td(<&kMEUs9k;AI@0t*6~tbTA+GA}^oR2q>m&QLHaifxIr(}a(W7G_jc zoxS?;P->Ayu=1<^c)2izog;BvglW_vr}_t*4<^?#-!fpfwhA3=%{;MYQQ*vxzDEKD>P~1@|Wa9fd%QC|79`od;yAG7kvnSO_DJ*NG#o-&%@|GFkn)cVjFm$%&b_S2W357guJ4 z4Qu-*U?zFrra>LsyJ^zIcD}0GH~3k_=5cn@&|Oj4kgHFWMYDfPNCw?KY< z0!J|iVXY>0Igo<7y#TC;N1%!r@VxZTr-A1tlx8HPLOJuo9bZdj1Tm?=S^nbDpl&$z zz%f&$m-pwln8bs*6@~?w;ezepF$td#lYN>F+tc5XJ+a`&2!5Gw4!71?~{w-!J zSvfo@k`;Ezi*UNwN%p@^?8XS|}>LkKho402xdHXxCO`TORc#>&)Vmm_cO!zn| z^`oILg#yE8K?SdRmb-3ar$}&sP0hev zrp>U#8M<7X!*o(_YyJfzE#QrGz-+aIQ#iWyvlj65i*_i&aB7 z{Fh0ORb0y;)?+0ReXH;3(%kh&;YjlZ{^7$!K9(M5tKbQGpa9vu~iLpr@I-%~r_VU2;+IN)j=Y@gi+luyTU3mW3Y;nKu-?llmpRQo!fPOXxR zB`Is@F&uo|AYZ^mD3%?TsVdBDHXDy+75lHhKEH&^Qt=ZPPQ`!8feyYgRBv7P* zw=ngPhG$TcL|_+6A91BUxPo6&wmGPNL1>!hU81V;;kRNUTbAqq2u4ekW*`)103 zU0Z?UXQxqY`PtF;#WL+sO&yYM=;6pXRj82}*&pJlRr1#YF_ZeO2hjWf*?afpwvr=# z(0?O>?{EfM2LUwz-l=X(qsMAVZObFIw36EXP0$SVA#g|@3j|;sfD$cpVxMLoY@cNF zR`)6Zq}XzMhCMMB2-G#JDl0Q9^Gn&T3{nO3Pd67vXz`C>nnkGSkKP68Qp5M0VZ6RY zUb?|tWD~|9xBLdzh&OXMx3y+H>yHjOUYd8)(plY}Vh7#e87P7j*7HHb@S$1nbs11G z3cHYLcU_DY#_NJZ;2tAwMB=N{PSPS^I6Y=oUAq}R4EQUW(-xJV+l7^A=qZ?J5*TgG z={k-#E1sI|l;?(veKLPoKs=kcFq?R;H<>V-9T#HbH~-ZCI$NEs-@VR1@)O4|)5kXd zm-=^md-u?M#_xBwcX#%FlYRUW1b}7AXVCp`{&)VjyPI7gcyn*((c$62;qLy9?xcU`s==Z7C$UEtKFo)2myb|HH$>)cN0ibh!JQor9g- z-AC}h2YYb-5BK-Cf0J$BpZ{O{Kl1HC)IEa8C1>l4lVS*W0qgj1F_+^)fmqLr*=ar~ z*a;5xJzbbRt4b6pQ8iJo)i?F)Qav_tm(=l#^Cf;Kq=eYIfpSNM1~w>Xu_XE09(R^h{B`6CkL1cAek( z1XBt;j~mE_2G1p?E^%?y#f|o~j==|;bCE=Gnx-*hI*QsWI-Sl<=XC*mYSbU+7nWNS zl#EV_%4S%0jj3QG*^f{*qrh)i1{7guYEBCf^k$?(18a#_q~ zCDa|Gk<2|Ooum^T%@Lc?9R5WcMS3Qps#a;hYb*}+uEz#)d-*?*{|D!b@%v(SD-~e5{QqcYXKxSX|Gk~P-A9iOQT~4f z|93C{eq`#5bHrmfD+Qpra^c21wHjsA0I_EQ7oARbahcCb#JET`b22M( z`hGb1FwUyUVm4r;?-~dt6VnUTud-@UTG5|cg5SJ9DmLxQyoY*wGv)T82O5Y7Ham-=fzlMn{J5| zMs47*D93y*8A?>Hj5cb_!>p*FDfZH9;BQ;|O|kRps7Q1(0g?F<1c~xJH(iW@HW)`# zQ^`C)GOL7^BWvOpdgATu1%|q|t3f`J#|Ru7VF#cQ7V~7m`hbFhfch$V#sKC7D_2Rm zdoQI=Yrp^&Ga8ytr>3qm;AQXGo1kI+%xp~k4yqtmXx6M=Uw8!08hUBB z_N^6N(W}4R*gbuXp*SVb$xAU``nQkoXjm4t!&X*ZU7Sot^sayg?omEITjXcZ51j1U z8rrv%!B;v`S7DP$A^jDX!1hPwiTV|v_hK6P3j%>rZ|W_MD3eYMv_EQ8^V0OCvKkO6 zTG?BglwWCz$$vnBB$}3gXFVKi;}gHU?)CMLwOZ|WW(ur|Tg%f`$~6DEm>Im12ou8> z>>7K6e(duyFhqz+4#JNA9}NfKbU0L)s-kK%%BSX`ma4qsZ<*PSO5$@L;P_5HAY3^< zMj;y^NHc$kAQ#+@ndhK(st?c~j1T;9F zZ8?t2_JHRKS_dUe!`}%Qg9qa6dKb)`!iS-R&GcMm+Q1ln_ox+miqQ06azWfhw@ZE| z1~yOq&Lrc($zgGC6RXYPWPl&WXUSGgiS^p0OiQ!NqCc6HXXO}Ce!Yg#Zwbu{{IhPV z9HTubCzrJkca^7V`?!PHCfRCb*YoY&Ub zIGcYNZ>nwE!D|BV9Qw9Nr!(8!jBhAXl0z!^?bE5LMR(y&EfO%!7m^;`X6LVya@^t7MbZ>OyTa^f60g10Ds_WO{ce#+jyXf>GqW0RfwN-<#A^hwov5vdZ1IE1( zi`*r~y!mh|!6KHx%65Ktt)%$-dw*Mcf zXF})XwfRb+H~*o!a>Cfw==o+?RK0VVeB;#7i%*&^`eke*zT|X^-K3v5aTDZwxEVbr zG1d`1F$t~_AH%A>>|dzS%TZm6q?yNWMSUBhk%e)#u z?KyDeI=bbWzwkHwcR>F$o#H=d6a3%VqMFyQR}3d5>NNe`qNuHGH&2z3BW1nu4NaDL z<3>gr)6NaeG}ZtbtaG9)8+}6f`Dcsfw54N3C}+0(=-Sv`hkg~s18pEp0@aQDnlg>h znyWYjYGOtEq}fnwwG~&OorkUU~D& zbZt%ZuN9F0#(L%--valwwakYP(Ie_MI_4kY`glv1$ri@0@yA;+GjC9zt!WZzZl@@+ z=uZvYg~I2Kf2McaDVTqc|DDcLk3oT_Q21=79S38Y%S|3$=7 zTRynGU}{~(zi!cVO_|}dgws&ux}A8sO8AU(zm=k=co~>?Z+XHTa%8UO?xUYGjnql$ zk0>#Dzb}rr?6J^wJZ}EM5$ddin}BgQpNe*vkPpyx++l-bug!~pFaFpc)DZIYMV=rx z3-{eLsG`@;t`rFDN;)HIOlK1W`sxd<4ONz&C5Bz4A2s#>3Yy2x346_Ldr!?LSLQ`n zUmIK2_Gb1)9t|Kxt#t-p=|^b0tGhdf22@z+MTx@1e?`53t8e<{XFJKuS~mB&M`8DxQRxoMQB%Gkwr`2$ z{%=lKY4?j>{!-C)G%>f8P}vLyp;-9d z!$1Q>WS~(=+y=1i73WV;U}%#db^keg&-dl=q^lp6dpvft|K6vc*%OKyid}6N`A1A` zf|4mYG-X>`*?uz$m2EgaD~Kw9h8>{{VhQ;yom}k}Hw@)} z#AQ2#`Q|HQl0^w7x7{WW(Z4D~E?* zSkACq@70Kj>j{JX5W&^XB%j#H3?aMu2Zae`7~M2q!NN9c zsie%VPb?lwVjLrj>x56Ii<7?go@N5@tQ}X^M%?sZ6R@=m)^mQ|)1Tr66;J4?4~b$H zy?XW^AqZcTOjAlGY)zUtwqUKxPEuZ_(Uj4;DxG%SPTaPmfA6R^kLtqbka^XQ4Hv+_ zZ`uazsG;2k{=I%1;^RyWs1|9%)Yoo>(^$gi{R0xT~EzKw|-~TZX*K^#3Fu4HhF`jgX*c34;o^sjWn$oDxoBLIQ_6nL}i5VON$&q}{T6 zjZ{0bT0X9=??LJ{Olu$2%YtQzEWLJf?`5B&z15Mq)=7?TXg=GFbI`ilV^DrMuXR{e z*QW9E^2C<9Hd9KK*XNwZt`B{@wBiI%R((viE3B=n##B#~v=~@d*jJ*YwPsM%`YSAd z9X-5>Acj*=7YM4#v(Loj+vJ-%H}b<%0?3uW|bm*H(KT|qc+CLOoFtQ%GIfn zun1o+7-L8qg1hG2aR9qE^9f1KsU3JQGjCYr<O*fLn6pN_vrQ(VMp~(pR_%VV-KJ?-;t>0aepq}rY*vb`Zcit7QZ>*E1BKA} zc|Nwn195XsXT>Flxt-!eQh~XCr|FS_Zti^73dll6ftq7LaGu1OFl;3V+;+Hl+W7)q zGkFwT@+-glmQ}TRQ&R()8IGH^8vtxTlfT#)zU$pcroC4OK1&^VkiA5YJh_H|k$uQ# z2~BifOqGk5vp1d1n+}kI+TUVkwk6S6)WJ@ociNjhDs@A}=1wJgn^{KeMbY3u=cZed z_}DQ>Ns{aJZ#v70*mLhS`XwH|XBjn}D##Up?c79hGDrp|#d&^NB0%#TEp2*tgRD$L zA#?;G+s^8_D896_Y6ckT@vBk6geemwde^Y%f2Km^NYeL;)&E$vi7PuGy6!dtn3V#OB8dx86K*1Fy-vW1m26xcDLGfJr;ZFNj6|XLEI2-;pkUCmioJ=I z-;2rWZTz6jg2Y~6B;9v(`E6X7^c+6lD&c6Xzf45QuQE_%sc2i0SbJwNBEfzT3at{H z%cOK|0oJID?CWm2JB&&(U8W*3S!BFv zHEVae&g=N@n-13Dx2u9*ZpduC0F+yBTIKF~dp>J0z5d0y>{@IVLrI$5b(mks1(?$7 zo%GK*>P|;%A9M@{`llUnZx>nz+?h->RnZ*o_4f06N8805iN@oob8QJw=3s#MxI#B_ zv>Q)7S)U868OZa?h*>v{x>+fC-gVr~a@HG&?ZBs$2>u7cQuIS__*;ztO33@uSvf&) z*1yBawpXJ#MN3drGn9pSGP#&eDoRf$1(beCEpx9solMr+X&$pG7oa1NcetqQSbVpW z{Q!dC4D?$Dmq(32T-Yg@pBj5ZbgBTovZ*E;2_8x)|JZO}8t$riB{!a^OEs^_+$Kfrg}wTZo0j zjlt4QD#FTv-7v>s0hF8^5<4r%b1R1K7+s0+7w~8o6ONL`aI>uRa`r?f%8+O^(IOx8LrX9$fxM^tC=(|6^od!zi_#68vQ zchq8H=l<*5>8p^`}_k-;` zr?hXjy=w0|6!XZ;YJJyTV%$>x9}&T?6VXv4?mTy(bV0@X*29PYj4}JZ8+X}DExP{W ziQP#XvtW;kGmbHEwX9)@&NQQ|j?e#7GH2@H%GG1^%xv|blMJ4Xu%lbNaz$dARy)zG zH|Co?lKC>a4uG;c>&6`hk`YIIBh**cS{-8~x3x7EH#&1o!^<+oM4aY=mTv8E=*X1X zpDZG7#zv~QQZo?=XrGA9O700O_};Rst#EO3wocsNEg(wsSLQvXypZ%j>;FXeDcSaS z;S3eAE4$n32F^@QMLKBCH|g-iR5+iGO?2ByhA6HM3v?R_wTaJit_pd?7m8XD!E3jh zp>f7~NXjOI!D3p1#C39Izk9{437Ul@?MkMJu=l-gmbNhHifuxa=jH-~f-;v_ylj;f zMc;>>0v`--Uu=ku62OuhYZo@Zv+sR>a49S0EpK=G)^z&04=$PytpWE{tbzPAV%JcSqll-nX$)x<6HvZ*%_9L;poc?{v2xQ z*20H4uW~cx#UHeBYk}m}qGR&qG8kL*R79)C-%Zy)5TAE2KJS1mJtzsc&->)6{{sKr z=YPJ>|E%&q9~|s`vwiUOeg5bB{Lf+jXZXr8d)+7v^z!`AyW9H*l>d3>5VJoY?*0b8 zI@sU8&;R^|@;|@5z?Uj|Pq6IH4En?mumUVMnhf64!|O(EV~xzRx>O`{?8e%uV|6Rb zlE-37=r@FXta8U#trX>8P3Y{8mcw;le8pF;Z9BZ-69s@{;euq0$v z;youz-G(1Eg7Sv%e;=VRJL2b(=vtvdb4~tVdK}vDaKYW-i#NZQyxY`C&x=;edj+D+ zFe}$grhD@?XlzIhQ7FhG?rE8a8Y#+nE`qEwX;Mgk86d z4NJ@au^`0FEFTV4Yb^}5l5D<6SW{fh;N9#B>zHEJGH@BaW~j}0%b3wR(SW9O%B-gX zrx1RD=g_iCc8ZU1{;Ec^yWu=n02x(u5u(t|rt^(joCfkUJv`Ozb)C{MLbf~{M9$;@S{C!<3o6CHih5+68uB>e`jAd-Qg?;L(INPRt{FQ{+ID8Y(%bu zYYt2PjQ&B)*EKeWdt}sh@%gpbmC*C2FXL`_CxD;-g;P`UuCRakQW%c0&pBzB{X)fH z0-7}JGQU)sf9LA?S_7=+bRdaFH z@nSNOWI%cegzV%zCE|~54h)u&;pOz0z6&llU9_(tRdneM&!K_E4!R={Afxhzwh^SG zHvKx&w<8q3>q!b)`e;|!-R^1{EGU28?zSu)j$k8-MQVj{C{yA_3XH;udHY?x+1-c- zLj?pXHxdC2?VdkP<0L_YLknppPNGit50c#rP)czzonOiHIVm8!rnhv!7zT!;U-2ym z=L~0StKN)`bL(N^hlfihVCB%kkC~gK^%tXhRU}9B144$e7q{xCMSrTagYry0pG-H> z34Aw_kjCEVzs&V(U#)O5FLJn9>BROW=@PQt)rt^;Oo@=MH6U@4wltvXe8O3U`{INh zyKm3SGf)T5&gXp?%-~!Xp+rH^+x7<c%P(@D3)3b>msne3^eN zFBTVay6^^u$z=_SFu-m>FmBp6aX58#RXSejW)yhAc-gTdg=j%>%S4elC4rH zPSwhp2)|?J*CkJaKBLtU2L>d_QU#k{z=vC?hmiCe`NIcwJj_04#wvq&8WQA z6n}F0ZKR%ES`$;RMOgPMs*HKt8oE@|*>v~&#{ zvA&rifb041SqWToQ&^?fTT&xJNm}>pQ_#?L(>}Qo0Vvi^6>F&d$s3Sq+MhbIB$B*n@b&lj zEgwot#~+{P;{m$OJ939gb?n!7aIM~f+O$oMGix&3ZBn@o$@{K`zV0@=2pc@p4iR?M z@KhnYXIBJjrAyjIRZCQAu3BcbnN!Owm%2O?=*I;mtLnq7#5?zhpcg86phgC6mFg{h zeX^$63XX0wzH+AV6$Fr*ZN>CKYMsqhS$EQr{gO2B$M&YkSIhgiC@&SFgGk73*4Uap z>t%Vg;WD{aRMT6c(5|i2#-}J+L!hcUb7b#!vk_spX`DAxN~-oX@3$|JhJT~a&wD=a z^3ND@-(J9vy;91Pn9Av`*V;(L&GzDF+$*n+bT4+!TfrH(gIQM?Fyh4Wy4Ne;)1Uka z`77QFzDaN*zVv6sbS9U6`hq#Ff9r`J*(YWKyt}VuiR4TC-0ke0UVmr5m9U875~Omh zIVMofY?otYWuafZQ5**nkShRHNeKuMGK;EOtp-`aiR(UEr}QG#j!#M&qd8N~$n@z` zcpYBl#;nON-W09RBL;Qg-v963|L=da|KH&^U+*3q?%(_W-TVIq{(l@JRNT<>Z!Q17 zgPpy7^#9x4-{0Oj+~0@u|7iEo?!Eut7xMpmCQIukfakXZzW6yF=`^qAv<*77+S|nx zUGJ*w_pg7V%XL%;n9*gKXRm(z@&ABQQ4ALJm86uJRaxW7Pfs7WvUjkxCbKv1TG>+y zY<~HomA!fOhh?SjIUFXj`B^&Dg$5JSO$Aq#3D=%G55u%iY|kYN_|wxqC>PuDdr z(kDT)XI7%~<^}4~(6zSmt41+t1)6&pLqx^4kQX3M*-_(295#J+ZNoo$!g5;FoM$u4 z^qDEgz&}t%Zt#fOgwU>Ms*)_fJ1;ApdP*IL3d)_0%hPfIM-CGUk$QuT!6HJDf*zeu zu0RH*6G$xuJ!4Ms*`%tn3y@RG=}4Vgj1nV+3EZNfkrec{bpa&c^eLkW(T||%&(Ynm z@!#8>uQ|aH{-%cRLXl{azQl&qpIM`h+ySv2gvNCm4QNbfi+qHC!=4~(YTblE>2-L1 zYVxax9K}8CS!2klG`g7u@EPPif09}!m5}PJye!6Q{52b6N9@5J(LpXpVBL zVIDZ06{iKToCEDWvCj&6pc2r?EFTCEyfj1k(!6Oln%Qun@aqPP8F5q5 zo~RnJTC06Sa(&`^KJAcIH1w1m5F=eF8P=_U%@oVCb93>j92XxS^0xCb*VJn+?GrEMVI~_O51P9f|iz!+ls4DLd-GG&8NTjv7~J!Ep+(`Jt5HwW_0oKBrkbN+%OQ!ZTk;EAH1;Ox*d=- zi%SWD%3?O1RD}yl`@ofA#C5eirJHglr=Zym4~7J9l_J-&t@_So)) zvY@f`G%b?&1TC(yg4q0nOja>iMAM`ri9yrLSBhIz6!jL%G^)ipXmIrs1&unZRI}1x zJpdKp0`brg&_@vBh!?@(wuDz&jQ-Q-FTDjxEJPQHZ-7nKqHY&SYt>mnwk}Q2D!&F* z>^f%tk|~b4UE`GS!gzx+S9|*GhsQs?c-JQypjwldv{U%?n^!+PfAI{9)23Y?m6^uD zUOa-u7{z0Dbge|0nBwKMz-p?Ej1g&RC8o2!x9C=6Ix;V#=!s=0i}qxneVmTUL8(@P z+*cYP%XV7pZ6Fu#A)z?Y1MFZiznunU{|8Fc$pq$W68M_QWb^zz2`vTGMR{&{*Q>4LQ2%JtuR>GmQEOK|Shk z-M1z<59oCS1j6W2M?mdlAt(myIS|T07sGwl&8C>SRKC#`)FWk3JFa`EIF5u?O-7f6 zz0o*uRyGN2UR;nuQPRP!LT6aBYa{Lm);8!_MO~p?b%nyP`drHEKi_;So3_dxNfXCQ zKy*?qIPA^Q_EdV>w0)5dytY|olPL!!;RQk}6q&VbRSDnT8MA(Vu8dt$ix7OYysr$E zTWGSAQ-~I7U+=`ja8yXbpt=_pzjW$W*>h$WU*D$Cb9-|S*8 zAy*M0QE8?E0%NsZErbSvmcC&=8#=%7G-53nU4v?S8bkaJ!lj)TvkP=gQ|yvhw%N(P zWaKNZqF$ZKJ?(MZP6>KtOuv?6*9%TXHt|{a&|tDP2%h+y$NhXso`zhyt1rE z?RL1X@v88Fpk}E}!an?>$rVc`ZNT zO7UYJ$XMZzNf@#E(~D8Gu{W=tyo~<94};-P*Vg|10~~wUS0n{XoP?PJ3O1C3$bODw zZY|fKo9JVz52PVPH!9;Q>1BGfeN5rDm3nMwOk3QiWIdevhuLicI&|nBL)W7XgZH(+ zi6}hE(&{YZv)(Cz()1$#`xQO*Sawk)8#5Udi>>x_d=qQu*cJ9Z9;XFjOduHAekSjh z?|{ueQ{%9ileLl~GPr9rkx<^lH|V%Hl$L%h-tj8?@zpyL>rC4fOmdo|QE+r6SDuWU z0?8}N%Y9yqU5O9wyn&mh8{?(nw$t^u@Lt=RjK?4Qn*5z9c42j0qS+t%T~x{zKUfU& zp0f=wI6fs`qpEGwHPnR6HNsArmM{~$R(OUz?|8J9;C?#x|I<1v!MO9(j4X}ulh>~E z!oUHq@2;i=DRm*dK0o?%itZ7)HR!bhU9+MMqOu{I#lp3{z$-mA>JRyB93@*lXgeED zifX($*H3_(x{poMNmQHsX?ixx(NS2psq_|`hr4=ayS?yh)t9WovOSUB0#>>Y>OrDe zaOYOa-lNfMW&5r%+GHzm@u&4h8K21;uOU*ShgtzqEPJdzLQpQ5UES8`L zJMmmuDiQ0H8JZJb76SrIXpjLColw|t0flH~ zz6ZZ{<`b&aRL@uxFRR-;_V%+O(5j8yf~A=c&KYes8aF^&e?0A=0s`xMyFH&Zcrxx* zv3HuiukaIA4R*u)JeO$$C33o=$P=JaThY!pw`^0k5SRnD{{Y&vTviY_yBID8s%zMC zHukOf5fH_!oq5Lx+J*WJ7f1TXFNxTOyV^ra$f++_{p80^<&{G$d4bx>-&v|YS@2tt zwF;{k4N+Td5}a0bo=*!9Nc(cM99u?HWxS%?FnaU++E4L=>@~_zW>pAs!MWW;Li0&l z3pF{b6*$b6{8eSU2Zt?MgpspcxLehd&Ch3(#o76EF}G=9x0w=K*JedFZ|Iqi&1FF1 zpTXyV$I#|!m^S=-y3cNC7FYAKMDxYGf=4mfg}Ag)D@h&J3>c%R@4d+H<3H}>KmLW{ zKOXHJ?Cy74aWC`|uyde;hp8eYC%QNbw)r_xR6WD*odK zURr)XB41K1WflZexlwzq9MOIf(AAc;S*$WZaE|45Q>dt3 zdAN{DI@p3kYGo=q}T9P6Eq% zeC(suN#YdU;|Ts753c(8hkT|7HGuAhlMmxA8+@SL;dWJK1JJJkS^m6L_Ju|kd&8C zGiYf9YWZk%A?*wXAKLPbuDr1-Y{XT_)f{f(3Kaq|{=_82q~U0fmqvTm#5+o<>`D(C zuCtpOr2&zW9PT_jvo`B>tmtHStM>Lr=o|Ov^J0DuUGhra)C;R~D}p#6wEj`j22b^y z7tq28g}6d(U4q_}c$Hkhc=O*gG^pwdWLc%W!1NmMzPN%B2p7z!B?d_F2i%{nj9m<( z?XFq|dI{9AQhL7H>CDD-)C}US@*L9N#b%A;jiHOyXNWA?pG{86F>YUv&t!F;4ruH% z7~JHd9-EliJZ^Ox5m|96Wi(OWfO-yVCfK|<{x)Y69$ND^8wvTFb+9ZRXW;#XtZjMW z8e~vnA;_&MtOkcK8h5T?>}r!X(1*`%;!kMW;fEh`codEy)|(m3@@$NrBc_Js07zA4 z#ord?%;GkV&mRY2%+>n}HpERm`_JDirSi!0NlY6_Vmo{nvKSy=8 zA7t$4BvYlJNIMFSz}f}w zB7P)M3MgPI!_PTP9rW+B|do_zbGh4&0TVY`( z;P%vqNEs0o`%1p-sSjRR2Y7ZBfnhOy3O`UU!G<#YBn|o}=W+s_fxYfMPB!(Jm;ZC0 zHx=4N)CRi_PQAWB$$wxPIq15fBVCr_xTn_N@n-4DUbkH;Y&Xnb#I~axhhQ(hfbU+M79fX0inlTZ&wA$I|uPd(V>IKvNsu~<96T1N3OSKd+otZB(j6fhiT zCLC5|^bqg3oz{O6cVlg%`6QpNJ6YW?Y(j*UfwY@#Y9_nc$~I-)PScEEZIVoJ|3;la5H!Vw$>mK@^2OJ z7FkC0+o3f~Gdx^r>^FoPrLQ;sN56*NzyIIA|C{&!{cj#^AKt(J-@pF{@BbFW+>Lzz zmcRe+9qu3O$ov1!;iDbO|Gcw%|Nj4_-v5IaCKbBi;6Yk$rzj7w12R2z8#p^`X;{qp zrKDvLq;mcn#Y+Xsn4M#=7C*v6H|FWlay;Zn%sFSg`mC3Ksx{2#Il=#!%+dO*HS$qo zyX!xX|ITWfF^7OSPbs9wNLYA`xAJ zUV^VD_(~2f*T*pCGA9G`1sYrt8l@n!wuD;Nm(buj%+AWIgnN(QGAYJEvKU$xVT&AG z%fT{plgG(}{bHCeFM&;vY^RfSo9~&n=>#YgsuhJQu}kr)sh{=IA6sTx6>Gu=2Iy|K zK&4zI5L0kw!*Irf4g{i(3sXA|hMJ-rqrFTg)!a@?)9i+cF5F2jOLpOv>}5ywC$E1B z<1~-MPGqjjT31ClVHN%#g}~yLP%bZ;9Zk|Jq%n7!$&ll=oFi*ut{Vl?H?zCm_~m+nOwkCr2UATioY-B?LS|XuTK67M+sMD zZ@2#^*msjBlkt3p@NXE(!;koFDaA|i{Anxuk)nVf5A*4~m_4yWuAtHr4wZQ~Y5%dB zj6DO+v2UA|+#H7Zo!MgCfElATVM=!v=cVg0&JMm1s*fOr!*9KvZO_~gSW6^h1}mwY zXGZ1A)u}gz(3Hw`8?jZ$PWWr(8)Fn3MAi)x!2%t0=BOG@(~(z-m_bA$vL$u^cL(CB zneZlZ_>h=1JRxWkjHl+c!dBGqT2wuiVAh(%k@|eR&Zy}&9DZzN1V&`%SyauWx|BF@ zC`Pa#b=+6Db(KXB<{xudV#5dSI=gagJ7Qz?Caeb>lSZPYRB~H`SHahLdJ27NAI3s> zEK55>W41^GNGhu~rU!joF*qL#Shr0R?&F(wfuGB%8vh2tFgMOC=L zkg>{LjhAJq$hgc8Hy9aLN|$_)X<={c`K)9I7$Xa|m3xiV^#h^ePuV&J?i_qhioHVjzX&`q<*c@_pHyYRkZ0XJJij@ z>GfjrpW1@xCz#81H~U0c7qf=+@`?YwZt1Pi`(zE%O*I)3+s{^w?hy96djrC8DDF!D zO(2gUOwXr89!M=bYsnA&S0Yk$BPE`@3G}S2b|8Zp28t%~sUvP12nt8=cI3j>1|gh; zJ!yHTld(?bDQX14>Y|Q`UM^GL?Vu9p)Z!oZI@YOesNbd5zWUKt_>@xKTrS62KpUqn z0Z@KrO+>s_<$>6jUZ74ScEC;Zno#RJvrGrztpi)5tr6GIJtY}hokM|cn8iVS3HW|2s9k%tZ4H%sRzU}*RQt2CC`I0V5RPg zVNj)1^O=K!H+MCB%Mr=@C3w2VKinAvOO$FHN7+;3ZCDTN#vEW90cc;Cs+OH1Fg94F zwAo2sIPI$DY2N^M0#>J#U zC-YHBhfQzj<`Of^9NThiEc8tWjg-hD?W_zQ6>u7^Eig*QubRf*q4-IdN4h~f>j~T@^92YaSJ{Wj?0sd+hzQYA z&Gj%F4#u4Updi#jcOSq_K;2>jT+^-F6B-S%okzaH~0Kb;Csn6365j z$n%6hVkPf@Y2pO*0FuTtE0I^MkGCaycT-#4?5_)GaiE-1_1qAmcF6NBFu}7Hj_9uCEhZTV-py&{{(b7z2d&G54GPG z74Ze!a5x?Xg)&%t|Kjmm&!mlPn)hea<`Cb9%1ao%wyM|`dN;em?MT6N+%5ijX^)rV zl=9H4F5N8+nELEIdHUi-qdr)ijPmMmzrVBF8FKVYw2nQ=xEV{f#H+VH172@bOGdV{ zY-DSWZtW3r4(+PC@qXt@p?jkqTda;o)fAgkVmmjN+_}rI{_qAQb{Fl&x)4#M#zAA8 z(Rv<`Yj=Wo6Fzn==I#+^34Spi)|>uy#2F(kmSNDPg?Ruq*uK(qxkDPDc3lpkdzaur zU=k)0a#Wto@>z-L_9~3LK^`%ld{`plFlCa%vV(hdhk6KapZ;glr;aicS=5o2Ns?&V zXsOsDYr+=O%A-&ejdwlbgu6G|So2mgYlLdC)@_0*QUo-@{Gt|p(022C3WdyGl;g!m zk0Nm#;@grW-rAkfB-*@96m<6R>A`@?Hbtf=uDk_RXP{LYeZeNsmv2juRC{+ul0*j% zQEBvEY1YfQH>-0~GF?HenFxF?QS-ieU1Vt&M1F;ayh@BA!Zbl9pktGy|Bd z(DE_UnNaia|K<$jm*%DZuqc3uB_NIT8Xp*<-y7HavJ_0@ocv{${*c z4m^TQ(s6^PB9beoS7mftXP6}>$i(IlbA2P37vX4Xk_G7f;Cs)(QOW+Tm#yO%=i>iw zEXmW#d5b!^^c5hZyAwA*qo{lJTjo8?SdW z>)mU)Sv4v-TiG4lbyM$Z*l^ts<8^44Futr$JT|nCD$O!hP;VvOQmIOANw~NQdk2(D zC>iThj&z!kXe$Yn%Cd4x0>$-@TM#G`GE4$E5QGCO4G$PjFv$XDNnU&2>Wi4`h{M-d zoDI`vduD4p(@s9Vie}p}LZcRM^kcCROie>(vI3ev1U?YtJTc)TJDOehL$)b z;`T###D#M#9GAouJ1usgkOrvFTPKO~Ww^c_B%ScBQ5Ex)H%~YrdpzjX6P8E_J<5k? zd6S$2N~?j(=dj|)Saz=1MlnlQ#g{I{Zz7_^TUklhomAW@9ZxPd+{iO&Sv|&D3LG_E zX4&!Mg2MemafR9oP8N%tA5=IYiE1Cps<`!*aN71ZL2SF&k$1kPTz3-xqVHKT_x6S8 z#>oe7wXgofE*^Hqx#A<{$62w9h&q(W4Op+sJ`%k5I8BhTX>KJ|B9gJ?hl3Fw#m9N0 zq4;rXG*O2v{YTS8xY$&5trhMf$P&nnW~l?(&(jt8qO0Pm3!csDwElcdgtBO7b=2X- z1jzx#Cr!~ATj2$4g-*?60a1SC^!P(q*9QE5 zv+_aqyf_34)dN92YHL7x)MDryDlucN7W!k>R%Gg^)dY|tfd^=pmE3qR)IVEvvC|!R zjY#f4LP$}h%rTm`A_0F%P4Yd3BAVzmA2!{jU1@P9ZzM=nK%S*fvY^ov$1>4<@WjAqV$uCQ&2EOmaCCc6A=%6>j|V-)@@D*KrzgOt>axi|RG zSIxyiJR>QK(-58s?9L_6;T(190yhw-r&T7B>e(iPGk1)reJSmYHC`tVgrP_!3PGka z%m|%yP=r}=-++?n?kjoaLqvO+T{)gE<}$}j%|A}W2N{|!ou(8imo=6(#*+_lrl%9k zQe?m9pXXIRpU)cZvb3^IzpYI#*==Guy;^owTa{zJ%Pz+}3$rub+q3ULYIvE|6}pDq zI^`VZx>eTr`g;O@pv<-!O2L`<=GP~i_(RYe)Z3(b(VwG4*xP+{__aNlSJPtr_<0s2 zvUn&1bOz#=TEE|~cfQ%FMUwui7q?nie%HFts~m6RzZaA|IhyX)XD4r^2B(;Y8xkGe z;p#-UBfga14q(S+k6%B};7f`&I$7ActF1!icrckU3ON-937MKj&4Ox@EJm{@_?u0> z_BPE$IRR-mdo>#67r92rM#La~R7Kna5RI!2%ar+WNNMOfcUxA$om|*W*;AQQm~6FR z$SD|GmUH3-JCStuw}mYNrZaPQvv)!+Me$YKN6~pG6vVJa&LK+9_1z&VRyh2I zB3%3gZ^Hk5MRok`Lf_p!=er{bCUU#e4P=s&TiMhFF)V#k-E|i96;fe?;h~3)s43{< z%~U4z#{-n2oLt^m9goMr0nE}#hh$(kNP`aJz58tFUpnKt!&oW_$2_(3^d>U@zLkx7hj(b zt+mVSu}J0-Bg>*Tkn1OWDpjRK7WdXTDNd25dLkxmoMw7*;rr`}3DRpp4tQPKqy3R? z?>NW;uPYKW(AKL$S@9qCz#Qy)avHyRwyLWhS@ThaRd$9k_&f)zl zz`gxn+y7_4wa4#^+14%3|D#8b(&ryO?;Pyx9c&-&@9bdy@54t2hrh`V?$7`I`|thw zf4TSH{k?C#{^slZ_uqT|pXUGS_2dQ*z{}r%w|5TqIRC@p&Z9@$d%N`h`)KF>{r3yK z|Gtr>)hWm%@dKvoY6Er73iV)!)(9*I4HS}7yis({IxPxhYoVEelS8zWw%d{ff@k>v;eXJ%dQi0I&{~D) za8U$hgD1; z(yB~fT6!{@RMj)W1E2Yg4e+;q!9ND=P@IB4Fr>WSj8o8Rrq=K0qY)6dmmTqYt4+Up zi@!@zO#N=tO??-el-pinYN}8`U$^!$DyxztJVR$MgT&g|F}Bzb_>JY3VA{G5|kWL3gcl~ES{;3bE*Z1Bx%)5{>LTHut%4rz+pVM)o5u9inA7l2~$*GNL2dOi2gW(v|N27NE< z#lj`-^pAe{IRQr;VdPlLS`K+)bEYN}pjMDBK0C%s*HvdJqn2i5THj=rhQ0^dHMIZw&8t5>e+tJ(j@R3+I>JYEN}NB?GXTSe zKbfaOi_6u!)(`ZfPTJnh>S@*}>V>YQd_l05VRL>daSrWsN_Hsv{o}{aw@%KAwqoe* z%iZ?Ho>lMKe5^COU4O`$J>Su2xmddxpN-1uynP7*Xkw(u@Yf%wTn~3NN^H%cCwAGs zfHEaIVK4GgIOyw$({asqY&A6=wgPB5?UQV8tX9OktZT6X;n$`ZQb%$6Wo2+WOAASU zvM#@o!_Rx$oo#D4IjX{Q1j1BV75B`qcorw8r_@WoGHq;k4#Gm%lvU^d|R5_j8@iTbX8Je^44h-diZeJd+wII08_w;fZEK~CPM7;f>gm<>u4%P@D_N`E>xjxn|rCF*q4ib+gfcZ|8>I_kZUP?T4#mVmXLysBhK1<`w&<@MhJ5=IJ42*e`covT*4& zX!7#a(`PT%BZy(tND}KWAqw~DP{h4`3|V~c;kCcwIMl7a2c!<6+xN)`ufJwMIP~p9 zDSRE$Va}P+Widpy43iWFjdMy6(qiKY8ZxIy^&-apc#Wc~%(vI+bTWEinP>mkCyNl*9iKtC{8>@eoxR6&<(BRvFQE&xA>R<^ zme2emicnko2Y z^-Msxh<8Q;M$i|1>vYBC!k(n0?g*dEbx-GHNv|B;fJDm2i~0FvCfU$Iq&%&EU*xc* zvQN(XUi+fx6T%Lfx8Ge&NyY^=1mr<#WefAO+Hz<5B1OPCCFwiu%XmAmABF88J~I|< z{|Uawv9_}M?)L6(duO-3eQ2`^9{6BX7Q~QAtmr(UKixA$YqOY*dd?NmiVdVE|GGb_ z?6~eGH)UKW+&MaC20H6#QdyB` z?!pH#jrI}faMd9A(4>q=$okMN#>ib3k+=ps&Sg{-)4p&aal6jv^WtL4ne1DjMq;hm zRX;zS7c-+pg~mL0T9j4Vsu$!|bAQDYP{Hx+(C``;APO_Pbm}x)5gy!ZCMV3~o2nh} zasP)|WB1`hnY(G^uO)ngr-bdd9rQ(*%q|++EzI=OKy5SDoHYz@BH@r*G^($ptETXs z-yz~+Ik+f*#ShJbP4ih*^c^~oTSe6nCf5zEO0+umsu@aOqQrtiq~h6>Df$gI-+qkE z*PEV#<;z6WLNt1Y6u9IrmqNU*)JU+&jX0HVe;KW+^T}c~6k;)eWrP3eqlK0>xWUZ| z4z($XSKa>ZH%N(t?d|N_p7SI8dGO6Q)}a#Q>ONkja;B#F6`GK}#d$ow;>8KRHKQ5j z&=OfuO_E#Z?A4rHc(Q?!9m9@2W3J=KTVG{6eyEDSHXNUKzn69nusb&2d}CgT1N%F#dulrc3r27$mn2(n2ysfh{F9| zY_5?O?LUvYQ0o|$vi|@5KmTvN7VAp(Z<5At<`sj%)C%>&Z6k0!HRB+X^{!4;%h zGR=g4Jm!<^x7BYebxcU?ruP>3P90VFy!G3v*~#ekg65H4tz;KV)P|s#=5*;|MCkPI zYK(9qSM=qb-+1ZfP5GE7#PzJdRC_Ws{?Z(N&1qokeBOY6JIn($vTt$4ufE%i zarN5jn}3+kb2bU_i6${-y$k&=$R%P2J4d7`CF%qtGXC-Hs~@}Ri}5b*J%wg&LVuA? zEy7c7e#wbnVB*gcOf`zL&L$^+Ee7*{Pfq>}2F(P(@mdsExj7W5FjO-?teV&D_I)RN zkz;mPMhN<(L_BMZ%e5l7+GqJ--meA|pkp6N*q5B(4f(grE;OKON%mhQ{72)8!_}5% zT2#Inv;I0nF=(MPcYV9rrZiw)w>#TJ=f08t)pVOC z0R6N?h#`C_bv1_)E()qB^jfj|v3PR#M+M|^_z}fMsK;7nIUW`tJ^U$}nNw<3jEYO_ zkrx+aO7k0k(+s3b>`8Eb$u3TfD&XJs1{>v!&~Bd}ict84zrwqd`s$x*Q!xDP$qm3;qiy+kkTaJ!Jx z3Frbe{E0ib-uk54zveLxv*BdGFcUErSC{nB^U1W2w!8V@ykJZ`gG;0v=pDIg2Dep+zT$ZTy+8}@@$P@@|{K{R#v(kDNeHs#! zqJMqI6E(y-bBu=mqUn_+GGZyvU6O^ESbe)ycGHTy8K_(+JXwpfa5I#!T!n3x^#LDz z3+*$hBeq%2)S>WffRUaTiy2I>99ZZLKUJ_Fa5{vLkZo;c`vlK0g}dh?wnho-{(Chy} z({e!X8Bor0O@YmtRps9aW33pJ46*hBpGn}9!EMGfguwu3K$ySI3d~G1n3XWMd^v=e zJ5`I|l;K>+-KV#T{s|wKl>d;m%Z*z>Vz*n~D|9E(|FCBv8HmWY&4ulFI~|pC(;%+) zBslnp?cd@t4;}_!nGqaleg!|x&dRZ&+cV5_!St8xjLb7= z#tO#_?dYh>#>s7v*W@xtyiPpKwmaW6U7DJT0?y%`1uA55Cm~9A8EGj6N6loeo?$Q? zudvkftHf*Qp*bU^2e5p`%OPY^^znp<;YZ?$C`c5*g9(|%;wA320pG781{Cqc(g)d} zbIhP{ssLV8>vXXe!^(NPX?!R#bptSs*K%(o|P`lZGe==jZR{Jm|*Eginr{qwt#a`&HS5l`mLnM-o%J&8EIG=KOPRPl1 zmGqty3GY!!I-R_|=kXdkrt3T&@1@QKWA7@c%fv7b=ENMzFh36G+g|kA9G@9PSGbxa)KnQ1_+^(;E z0pgRrkk@(;b+5rj=#l#=euG~(>oJcI3{FmK;m2_Ay90G^NhN#et35ozC|`4S7r0DG zsn_=^{eJxU8_x9eg?5}^#Sz)eg@5CnXN^0!b3o$~HBP5%(nqx%4>9*l&Cg}sz!H7p z$$j)GU~9oR`+GU%zFN-6BO1Ed4M(aitz`#1*AzBvm-X>!Ue1!@IQ?j`TbAcc=q2H>0ZqEBVEq7vbf<6?(rY) z@gMI0`=5>ffDRZpN&d7N{|nB#kN>c}yZ`7O|KSVaKcEzuadfpE`4HA5bbBLE8qQbFRvZBe)ZjhJ|FZ+gzS zRnjBtyUa%-kmu_&_Q({){KUK<)hYmE${F1#&5x?leg| zZkD1i;u(Kuc|*ivuq<(i%I5%dL;rsr{K`4IhcLpEE06F?A5N?w0&lPUQ*^Wy+7`Z_ zkR0x4Le%|IsB>?CNPedU^oxWeLB%L319sAhDe*Mi@6{U?KY?NCHTHqZL@ugI17GR! z;SGCj#uus6>D2B;h>-GfIO|95Ba zknn$Z?(@HXDf#~g3!0ang#Y;2(0tYO1q>2lxowmbWs?Pe!CdQ%K07Z(V_H9S`BBN%NRhBYiP6&VSHcAxx` zHNezRL-(zqqgr4*VHsW&R5?ozQ}c`?4U)v|9XE9c`LvYQmmqRs5YXQiB?_AqZDy*g zM4H^h&sJ{DpJ{q(bLInj!Nk0$FJ2n37NcY-upwNwUZyrh3Y$hj(&-vKO)3vb&O>Bg zj=k*bG)qe-hGEv%A4wWCdek0ox7snic^xphc&PwgWB~jsO zR93Fr5QFc|armssg`Q2G+oyS6_Y>RR1hdc1Z`(>a?8fr|cT`Gwe<=mxT|aCtC0sO0 z^@b7l2Dnh=QVW=Sj4t(HN7;CVk$r>g&p)R&oUXfib*YV+dWo{7$OXrMdPYBs}F|&cLRe+Rg0V89q1}sbqc97+5CDDv{;?Bkx z?`lSFL;PYDk!AH(GS@a?Wz7mVmwaZ3g}p4snzP@f^coWzedNlBG-TC_;aJ`Xi1NmH zQEDL++%jN2ixl{0&|F7>h7ypUZo5ygg0y`p4K;7%VpQ;TP-1HGaek&jITZ`_lnmzz z_IkDk+KWuZN;muqi`4fk{;#p(fv&mE!bzn&TD8@ghCXkx>g(J`!7E^F94l$fYu)32 z*46LUqPWz$E0V9L9o25Ikx7YR@IZv<2Yw-PnElNyVvk>X;WHCs(zMIu>Fui0UYykZm@})Fvs=-x`X_E#xSxu|Et~#l7Nu^5O zKiJ&A|K7j<{9^CF`@4tV-1~ptzyB_M|Mdm%10duno?UVeNYzg(2=ix@p7&f6*%&#e3)3fq8 zA7Lo-Vs2-Z9gK>6jG0K)BOit}SbrcPcCBG za$asW?kw-rhA-1LHfrHZ*M=8GYKC>wM=pOO)vdN^X+l!avg>*sCThod+*RZXj?S-? zYQZ?;WURX_)PI_u%|NugEm?z?H9iXLeH~g_+>~Huvqw0QPII`Fj}eB(DF3@oD%TMv zLT=5rMG0T9mKywZwFlYb5%IZsd07YyjWUWeZi9J4P&L}ItX`BfWoZe5d$)_MWoS_Y zbrM%?xNg)NYbK)Fu)L_vXShK6XVXQOz?`8dt*`lckOQCV56ju|7MT}E6Etmj8Jae1 zPvz$@qcR_%C$zbVv^b+?uV3{xB5?fi2PEsA2o(CIyP@N@stNLE;mG_>nEn3sPvp-s z!iZTk9RSB3Tdde^#iE;ejUnr>cJP7{Wn{ z9f!yp>u15C0?uL5W{izhDQ`ADBj=61($uc30)hHYMh*G1>6Ow{MzfRp?3Gb8>M3dk zeW`nZ9;Myg>z5o&;HvH!(WCR`1$@(JI_DHuldU&i?gD33UH~g=6Z!xKss!wg>UW&g z(~!mb-m(&6Y((5C?3w!Zx#fb(M#M_Dv4)w>?vWU_A)8nK29t?T=rb>OzbUj0!c}ll zj;+^o<6?lPe>yA3opMmPRjnjs6m^0zE2(W{PhD}EjVMa@MjXJ1q}UEr>+8o`lOPp^ zjf;HL*CX<084MW)7_F#IT#xU2${EFG)ILizRgc7!=EiC;@c0lOpqxWFs1P{M7FH3} zZCI=xxDrC#R=-CKkE~P|3-fBWo_foGx2byF4&el^) zrwh=>&1LC;`dar-Ri0f;%3Hyws#1U^o3C$9zoTC(zB8YFUaz0(7=Ld#)Mf6b-HX2O1Tbo)jes>oB)EKwY+Nvf6($&nj2BLmzei9UR9JZoxX6Xzrw*=I zRjE+&mLX23MNYxu(kChn_81j1AHhYfi8r)y3&5e^p5pOIu32{!A;)!oKcc@FpL3(O zs#pY5&|P~zxrDvOVC}rs9Bw4@R@l%@=~51+Q?;a^VXHm!F;PAu9XDel`EmfZ>7*(d0U;*`cwxDs zVDXI|wn;19GDLOHpP_X#A)g~2JY4p;lVuO3N|JghgY64iJ=ZuYkJD8i0H;<{;SriE zgzbT@P?B!R0?JZj&;qfRh9nc7ns+l;h-e%`WXc|JZ6LhasT&0R^S4!<@zILrQFmv1 z`?wj~122mFvQWDKI#(!Z(jK-|7LFwD0DsxSP;e+bImiMf-B^uM9T$oa_56L>;lg>fXy;lXCsbB*VUqwnFF~Rk;%g2yi8dE5J8V@{3`oTHPjB1TQOV^ z;D1TckKyJF>Rrn^?0A1P`H2ezl}ek<`8)%42XM){REbH0hDyI`i7>nof1%rOoty9m zx8Uj<(A<80rh|C-buYo@3N`ov#sa`Xkk2$Gmck$s&{MO9(JcC__McBXI>VtDs70S! zU5Sz&I{LaCXhe`n*N8JaVFAMDHL2 zXkcdbQGRhU%(IW(?Bmh)F@{9JB3A`j6N64L9XfKnAk{5@iz#kn!lEoQe>mz=;XsI$ zbIgV8AJ@6<*SzUh-|rSTx%dCO_y1Y#|F^sM^})e6_x^wP{(ouzKX%Bz5d!22{Ktd6 z?OpQ!JJ>rsINW=L_>YI%_xO)r%Kz^L2lvFoY6nrznNF>?1QQ^M1hPK*i|c$B_};}_ z?_N++KBYpovUjK+tj3F3$NjEW(+~#KoFY} z`s_{r@t+<)fARSH7th2GhKlsLwZ0@-47^}!cLp5cj89s}CNQ%h=+9>$y%k4!1tQ=W z1-IiBGobg`a0)0&qNMV1&~z|*l6WU8!FfP_ON^qs1eYNSk>NpOI0HAX4702rKO>GK z+)}iR_024oCvAdI*+4pH@UlNp2CMS}*of${(k;hBr=VGT@Hv=l6&dUrV~wq!EKY zGvv+pq=`GytQdW42I-8Qr}lz%mzd$z;yBE!;lGxvU{9;jX29Am%4!g1jVgd6eD(m= z>1^%f%>LUwS!Y4NCuc7An&!d%wo)phPZr|B(HzL1mE`2z9RTRQxFLY9*fr3Z&7r|( z!P=sQ23sVWZNk=OPu*&ej|y}jS&;JrEJ%q-f1?<Z^MCWCo{414@0*>dc6Rd_fSW7z-gd=7iZ zzrl|r1xb^w$z;x?ZjbME)%*C*d;RaO>^}$F_x7KA`_EGQkB3XKh9zhP{_o-bp0fWO z96mbS`_1<5?xXGfd;8B9vj05$2!c;Zi5Es$3IP%1DSF(7+kG)A%rkbE)Yb0K9=^xV zt?a{jIXD-z*D*niD33vut!lGkR9xobWvHLj{luRv<&k72xeZ|;4z4bZ%7V-)?AcKM z%AE4zR-?&>E&P$hBZfqpDG3?`yPA?WH109C{~U3?Mi>JLvU!L(Yao$vh6rAllN@T} z1Krf>h1nYu7O+_n+QQTqFzJ;csdr@$>;L%E%kQ7P>Ayjf&#dSmIYA+5%<4z&ua5uY zFT<}I|Iz(R2mWpT2Yfmzo*k`}h}j6QNkw7E)B+Hh(&_Xv>MUkw5_L>U26Hu#tq-!-6mOIL@$IW0 zIfS98D5a4!Q~y-QO{4lm4pYCjzgmI{%xScHs$_>82^!F%tBo_TpDHwyQrGon_Eol% z$RvwAql2HThEzPF-n!7(agaT|tQ7E}$`acmZ`&r%hiFc7l3!Cc9mxzSrH-Vc)n5>m z;|;%(w5k`9zuWMYPF3Wy!MU1^vy2yRCT)8W1G+l{rHUQ$scmdaK5rSK1@#!;9b%aE zfvn;-S?5mGcPV**3I4gX(^lAgBjG!mXW4iQqGFR@a9{%=m)dGF;+Ufb-=uB&#Q>=S zPXu-kcamm>c&p_M2^hJvoP1RMxBvC_-Lsed*Kc0EeEkm3nX!5N*JQ!y0N$C!8wg(w zC^JCkFJlW*`1imJ$9U({3S0i`xxV+aYFtjI1$x~q=F`R8NC=8USnc`D;-r{=D4>UJ zYIFxWce;NoqdQxaL#)+xer*%ur8`3phC^saILPV)TKwnA{jTHH-R>NCbzvDn(7>p$ z(Ul_6Jgl}IR$Gjrcs?(N{sdkzNd#JyJ=nYkrO2xvU;GyhR)TjMMdz&3g6 zornYcV6+&b_2(*^a3nJDRDoFCV77ATWp-Ud>1M|>0Y+%7W4qB4O_Ic+KdO`^ zv75`fjm62YTZ5k|Yn&>g52R|igUGlQobK=i0OZ*GeSxf~7!*T}Lb^24zde3;Oy2sw zRqk&XM9(qvgngF{7MU?|jx4&}plkZ~ZFHB8x@b`Hqa@k#DsjeA_YA$4*B2aZC*6u{u2? zW{c<&Db=(Dtx|FQQAx0;^QP|%$6Q%8p^mSD5(0>E;pHGfz>>o2d|fM^0m92yDiZ15oJGvsD%-4}naSYBb03pxvZC0|%FQxRBWY2L~SpEMd%Gnwb!=HWfh;{9%VpR93$brkN z!&qXKCEAKr16Pf0Y@-~w(?srVtG^(Ac1uEYwhLy10(66&qPDbqy@JzQxTel zW5}n`b=N>sn`@?pw%h~D#`G#8wx&FhP?;*i7bMGqtA+@3#>IB}Ts~*7#EO>#@fJ~L z>3^}L;C%JLmCi;yBqnODRp-_YMt9)tvkHRX*u`OxxK=pcpureif))(L6Fg4D^h*Vf z$_z*d639t)9E8ysLSgGCt`XQBo{Bw_3o77?HXDh@HUBU1fw=eox%dCE{D1Zj4<6n7 z|J?ikr2T&|RmV-7e^&bc?Cw6=HU2+4`#b3Wv%9l%@Bj0K{D07HkCyGP95C0U2apQ_ z|Az&6iDW-uE^ujSlQG>@L&`aaJa?5J3O%M`7C(N#5DZ%Mwka{kB1RjrP&r$SNgkT$ z7ccDY_0_uxz8!)FKU3XYi7z;)NYkqf&rw(UpJbC?-2q?Ct z3U|HgXu`8Rvc5ac?0}$OG|Y|Md7guui$NZD@7QC7W;rMQMxj{&yu7=`f*rOTYCe3^ zm-+ERhv|Xu)Bx(I2*6l&vbi!6$>(RFvp#be$ zIa3|GOCimxH}B{n^oPkzZT2TF^Z4};EXEmbOu0xS>yR)Oi~4vmpS;5d2T%n| zY^`&}aU#(6u^`(ui}eYzifB`zkv|4`{$&GoNx1apeEUG&q`dV{3go@n>{5ZgbT&A&Q; zeVWhm3kro*JG`-S({^wt!NL@fJ} z;k8c#j;>0ALS={D&6Q#+&2>y;jpp_0+&gP5MzGTol0K;<`mb7;f+U)Oqz;#Mon(3X zlrSCUK*XszZU`Yho^}Q!(BnR~vi1SF-yS&Low4K~P)DT(Sw23J;riOor%wnX4aO0< z<(P9)AYcTr{|*QXO2qMVlDrM9?HY=RI+<@R*ng}Z6`-)tt5Jm z=O4?e*WL*>uD#Ouv)P^M5@92mc#g4?McN&PwyP^DM-*pAeT*o^q`!)ZO6(xUYKd%) z6B5;%D^YJ8sQHpT+5X1PBa(cAW^`N&vRmlf7FHn(bY4cYL_Ki*`p^}!-c`eBg+G!X z3Y4I{5AM&?e2B&h_|Y|daa~Y~dCSLt=XLJO^B0toLn)o;95&eEdlq1#&0Yr%!Cc?shtN3CXvmjzCH#;}`NyBQAbjYqLhV|~_9W!* zRxD4+{tggV1M|b-*Uqi3q(!L$XOU-_2nvv59WGRh3+TEnE0A1T#f6Zcm$iSS${(-l zBBBMX)4p|O#)$vZn3HO$oVY5=h_eH%yA=O&j;10uw=LX%wUsnFM&}D++vx|Eh}`hG zuy@Nu={twSi}=VoPO|Lz3N&^fCtb)q-(6O-K$k=!wbB1JeQ)j3s^GCTpE)qyMt!f= zQq`sD=cPTVg3J-7&3I|0chTPzQP)Bx%L|oZ-(HZO(6?7G0jI3cPsv1ywQc#fGoKJS zo5$|9qqq~!x{>``&vH;+;^`g+z0uREet3n>hl>H6p5G1?U;_>e{-UPqExT9SDY}-n znK>@WM_sE@7}9bpzvX-X-+TXG<^Owl@b%&D*I(cJ|K9umhW@|ew3v_jg~Xk%odaUI z|L@+T1Hu2>ee`JW@DRS=-8tCbyZ8V7LjJ#R^wMT(Y3bskG_7s8X{QsEH4!|6z@|>} z5r_cjO;M}8RycqddNmrAXBedHmQvtCI_s+n5aZ6UgzBTJfzEh=KpPB~LSPsufqytJ zM+HTX;UhTIFn}euz}NFJ^+2kdT&#hsH_YcbSse@QOQ64Mf!14$hFlP{d#Mu7;B(TC z_bI4Kwl^>5uuwoMdccmj8u#;6!QgBA`LdhYGok!jpwa)&-kUbJksNEI{>%vehkh~P zAbJLXNUb)n;W0%~GrWhQWQnqUg64&rK$C0}o6&$ITEq9Z-^!)-sz!s0X2zD|oDl)k znpKsRnU(qEqHMxowSyUbiTI3!og&vsX#-~BhlbMY>9^h2uMhSQe}G^56Cs6-&l@1U zzf~&->A(O-VKU2R#qcUwj8zNRz)R8g-V3Ntfsze=_|Ju!GKPHyUoSwh$reJ5!E6n! zB%k*?dVcSu+r+y$Y?<>bZ;JFaJEsqMd#MenX8QK^`@#6)ZWa?`kf)yssm%@q0T-u@3i9_@){b`D>>eg&mI=EeDiphS;0 zq1eZ3e^PtOS)*Nq{4wAUO#~n-F=cSAG$0dx%%(-}GQZNWJ@gW2Jj_4|*k%<)X!sgU z3Eo4*68$K?1waD)6>V^{T!E9UNh?`@GVU*CGl-mKW%e=Ur2UlWpEMYeU=4GKKadlv zA{Fdh4KFC2AK_Jo-nO!_Pai^5+IGlIZnAv1$WcwFNQnS3NP7j=WmG{SUk}A59p&>` z(I*;|c)YfqGboz3zv!q##>1Qp2a}KE9!zRz0a-ym@5jX_%BwxkMpJSYl2u^Nr8y{Z zl+d=pNlMko=7MDoB^rSO!rC-`{cv)Iz6^d~H z)ChS;Lh4sp5jUDbymIU(s2rEP;NFT8=L?!vbb6gyEin`JT>cPT0{+uX;h_#2aduYN ziK48vNdp7-9L9nZatmvVT0ZZ|0kSRD#(fK;ue}k%gI*ouC?GHBct}>U=`kJc!ZyJq zWCv6AJ2}3!NM6UYhU_Ex%*_GAA;*yGgzwA*M+;l&vz%{d>+HI0I9qm<%`SU%PsaRM z#bjTJQkotaBD~RMD(+vZU7sejOWo<=HN0$cFxq?o1K_LOnaxa0bvEdgi+(>ZrS08g z9N{^>#kvXCJzESFac3gynPwzLXFNrKLL)O4f_k2B-c<#EbXl{ z0a<7ODByfmhHTq%Yw1mD?VV7`yW?dox$s zk(cCrgUwb=+wCAc>D|fb6dW9R2jk-s=>Vk{BxdcQZ^hC+7F@@nE`0q+d~NlPUn-`= zGofCT)~78RQZ#RxIetIrTg4}k* zS<5rnDiB5{_sVEdVze7ObPo&wIaeT6^Ok>xfne2k=z8<^Bj9+pguE(0h1i^d=&a8- zs1v30=p9>Iv`#pnNs)N9Aj+g+eClCx0Hp%E3|TZI_P(;}O2Y)53q+Gw))YIgNy1_3 zsxrxL$l~Gm4eis}2dyizF@(GW_->5SiglK>5C4-McrTU&=!i|pw}Uiwd8(=OtKtIa zRAtwxXc0|gl}$nkw@vuYsp{2@fYlL3sz4M<=WG&}rKp^YP|MN4?)! zWrGp7GHR-!pZ}Vl(Tl-uHoex9N*iud3iWu&cyz%kUCtV{R_2QF+|e{q(9~ocKspYL zAhHxU6LPTUQzSA7O&&c03uIM~V5Cs|(QOpOkAOh z@qFnBG)}&h0ga;`T!PTPsv3~en|e1Mnl{!}j;vganp<>dPPJ3176ZWqJayE5$D#!R zG-{Z@B8@df;lRGmZ2!Fy`$8Foh@?OPN7&?2;Aj7a1OMdDn)E- zlj#a#qlvAx+SxdXUa=YoCS*0UV#WfGAW|&|=|jc?uHAMeIzqM3-HiS89l$pK7CBnL zy5olA$P&kq<^trx@a!71wzR-)FFJpQDHLp5=O8cpV)?`+NG679A#{@YWD0mBgJNJ> zBH7k87$oHg7!o~VOM4d}0?+afXnwd5Q_2y!?INEIe5XzbS2z}@sg*b#_Eu{|fOOVLK<;G5_-YX#$@!lW|67k~n zFD6Alk8O7Qypcr^#ta5Y>ch+Gy{xNJ_tlb3cKP2{a@pNVgKOp;IWDAmDuP&}`hV5S z&gQwqP|qnXj#58tJ{xelmq`uZLR0M>Xez-=kaJrg+Ljd#V=`F$xkuTvAZ!^w2i1k6 z*%gI?i?eF>1$sC3F>(@Q=v1QG*h3J^vs`A~n{6Dwot;2y$L-|gcqco7HjlUeUY>l{ zge0CDEj`544s-HgnWHKAw$e1ZM2gqzr721S#=S=xUd7^6yw?-$0aoA0e1+fEScJ}S z@=K$j@4jW<5$Ox#}9uGA@jOF_%xq@zuWjq`3cC}iT0#n9Z1lXzP&(p^ecpbar*hJlT9m|2(399*cjXLF|byAPQw~vcQ(a z+{VOE;S(6l1^Y0a%>>PZ`_=MTfaj@kd*)1%T+dpp;-o9gd2J zq^H#`df_G!WX7X@u zI6HWchY9yZUDk^<_>FLuR6o zH3tp_o`f{+qxXaW;{Udr(wBCp6Huj8w+eyDaWD&`jtDW_S|fwM3Hc~1<|JYX?l1?1 z&wB<@qJU63jAs*gEe~>_CK>@O|EIR){}k5>oYE7Z@f>xbn-Rqx>mlg7#IY@F?YE?9!#(AB-$Zwy)% z;7cI>IAyt}HBE-nHpT<~AK(_`RSztr?GG3ynB#L8@&}6cA~jT`Lv``G&sgdf1F|k1 zwe-BJF;*3H9a5+A2vn3Ew}IL8DlQQwiPWIgrRVF`d8(qlm$GeZ-j+YG(PCPUooeq? z)uQd8I(<^NB=9jHdB2)Uur;r43v~65fTkq}~Uf zVPaRm=?Auy9{{)aDI0+kZQqDt3~ebd^XO;w>fD%2^Kn)@01>jSm`eLm8{eMT-i2;l zqHKBD1}*5i6>BW}w;23`(UopVwt!w+qQu2~J}tK&Jb-rW#+{Z=EYDA^g{UWZbcQ># zJ>hy$O2`Lz+Y_|uLsfT0o5wt(6MLdOSSSJEzfCpz)Gr6~wF@EzoRYM?bUFImAdnWj zsC^uC75jTp^|4XW^p}vkzK3ztfhVi9-~#TGx~u;3{cO9|DB=#Z zUH1$2lKqunXne(g`HKId@;^R$`gH5z*ZhxP@n3xWmvTB345G)`hH68<)WCmv{OsA| zr@_=EF^Zg!zj9@`dt09v=R!jR``~9sAvzgPj+9NlDqfWqK)c=pW<> zIS6#t*VZW=q>w)rLlv}9%IeK`oDDqxRP^CdY&?K4*$h2!Q)(7;Le5rY5C@ zv!V})*yTVz+ea(xZsy!sTUhB-qp zWjJ8-%lI;(%{+hg@Cj)x<6NjRpfoe=9SL3#m;v**b4IS;E@`CI;15e1Y(wU!K%?-z z+pt(;Xni?C4wSMna@IXq9ByZt%`b+qc@) z=0{Fe$|EByr3G4Y!qoIKQ}Q0F$Uq7GnmW+B5(Tk0*f32ae;svBkN)g=;{d3nYAEX} zmEU@e+?MN0QueLiOU_C;{op90X)9MAYjZu2HgNsE5>o1h)A0OONJaH3D=+iG!K*Jq zfcr}@S&t+uCYih+D%@fE$iS?<43flpzDa%5A%N5L{&yFUSI zPY(I8+VJ0zAlv{#3x<*&!TgKsYX-czN~nl1=lmO9w&D)$!Klj%p$=OJKU-XkCO9Z& zVPGjyijrX1^wA6)q9H&UA&VxqHgMJ=Y0iz9X0wfml{WCrV;+sT>B#8GuWbw<%L$@AGxhZ3*W!mqA(@368 zRMH}BOxF(WAxJz5t*}uR1L`Pq9(b3S5}t0y*kqJQZ$fUebb3+VWGs74s{i|-gFl=4 z57I4-%K*E){eqi-<|+C+FFqg+mth=tgn)BmE*7_ty~>b4uz2&#DG=SZw(GQU<~f5( z5W)9ZZy5tnR{W}$P<-%L;AMF^)@k87}4<;0c@!t~|Qf zVD0*6YZ;H#5}u&jhqoU7O0o1l6l~CHs;L15w2Tg=@Z>mromLap6?8Iq)?p0paJhZHJ(h z511>A7-eeUH6~(Uu~Eodt%S1)jIt@J*_3;wKqq6mA64f(H1rNoAfZAW$+pew7KtDP z5!Sz(Tgi7~?;X6C*#6*Z1;XqYTIh|0vg6ejvtrbYDLKG2jb??o1F5K_1R(QD>l>BR zUzwL`o5#ML$Va$r_Nn;Lnas`~WT)kWhg+M^I-5_PJ>2x`Bp)7DRQr8rYjg8=PmLqG z9!`E?~D1DRLd5Xu9R zRYZKZQDW9j-*5vv#pdiC^YU;+-TM`H??B|ez1Vw~qOY^4d!MGmqF>DECb82aCrwZ@ zW~lqf{P7WCTrsr1G3Iw#)hwm=2?-KH5S7 zgXnaajV~Lixhc(@*kXimE}K5hoK$LdmM&m?Mk+A)U1g6E=htb)-wzrf zusJ4JJFq-25|n4Xl3ri(p%CS970-&HApwW=Rg_Ag353(mx#`d*a5{+`T@X~of(Rb> zfTWss8!O^iuF82nA{&f2U&>;F6cb;{!|t5e&Wu`YHXeZEO}pJl*TcXidiy-k4O-MT zR2dtT)SAvg_U|h~|1%THEVO0lS_QY0QjoNi;Ks4CznBP zYZ!hZe$?WsF8YF}BizmO!c$ac?P5&Wh-}dYGa!OG1`DtDED&g5sS(}nR+2$8br}cd z&yw)l#HG|L7gKDSdjQ6RLl>+^HPj+AGCEl0gWe#A;8>wfxFevW4!lT@0pO#-mH-zq zgkz#Sf!noxHY@V+V0e{=S~+}uBqV!Sc-bHMMV4JO_v=t0Oo`Vf6a?cA^IQCQQS>h; z3P-Mv;E6C47}zN9Uu5H=9Ic0;JOgP+8PxfsHPPLq||0x{CG$gM<+73_RYEzXER!sS5E#z1Qnr=#x)J zG^Qe8ndAk{z~Sz|Y3qUefZ%jOfloWG5!N}z4K*!Hib=g}hxeB`k7J|aiM%o^&IHpI z>==44K)z{r2jR8?MMs5Aug6l)H{80$6r3FN5b(XBH`_*PuxmY@YIg0SM0z8KmDNC& zFb7#A-k8^jzaK1SDiaIZ^sX2!(+Vp!8!tw~y-#_60ex|*q#4LGyYFA@1b=w@diRz8 zL%O8H(9DjfxY-+u+>LE_0V*aAQ*atwGKTPORa@o6&_m+f3&WpO<>g02waAiWWB1K_ zpM23BU3*mta?w3@$D1cLcACUcgeF7{Cn_GlmYHmc1p~|mex0tu7iPRAZ1u z)StRmOAi>@4kfxqK=^X1ck)l-O7acqjC&8VJW64PYOuZp5A}dwec80-R)-KJJ>Vd% z!`GOj@rsGpxW$_BT@8EF7)jcS7yb5IbNM3$)2bnx#(PUXzRiG9Y=c|`2>W$|QE8Jv zVn&mz!A2^Z>s`TU8oUE2{Wk|^@fL9OEa5=*ZUoW0hGlurOz7{`IFLWoC$v8ks_dtaY(_I}Tr}xOw$UlMF z)+KW^Cz+X{2^e@}xX#iumo8=amT{eSVGV-uRyn$wGHOhSGrnYT9ohY42yc|GPO;6f z`ylO47FyfXpgO;#Wm$#3@??ZtZ8(HT29vxTZ_E>^#4AE*1*4Nhj(Ow#^n4~3PU^x+ z#y1}`Y_GU>dtujU1m_mA;nwHUE`e=U9AE+D_M+cO6JTzkF|5dw!&_cYL~eWCY>3AO z2K+AjkQ2+Zje_KJ4fCzTXN>!*GrAlUv!+;d*!#>3Q2gjkE+yG24n(IEBeK@Oxeue& zwdwN)P1uasVkzveH=it#X;ze1Ll+Ox-(mgc!u<(BmHXr-Vpxr(sZ+h2^JDlnxwG5P z;rJyQmc_reU3q6Z9bSdr;f=s-qRIr{`EBF`?(9KXDovGq%d6c zm=axppJVWJK$>N0+btdNdVET?Dc5OJ)ayaJ)S&pZHEkOGI1SQ1$sAhd5H5ooneaK0 z1lmQ!a`bZ7#a#&4o*qrNQq!TJyzEhr2*xW$O`iZwu58-vvrk8R%bCaC5M5;3AHnL+ zj<1YlSmAO)g!(KPhoqC?X_H+yPtu%N(7bfCtc){SG>x^k@P-u;a1K5PdbkpGdEi>-l6a+b-(` zSW(7^i4()Lfu_MjWp=e})r_8Bm~AyRIa_tuncHvjS$b$~vlGX)l^89_PA4BQ*Ezl8 z-*CxZ0{ZWGGH#0t8%#BW0$tE!ZHtmnuqO-RdH;Bk&+?`sMDoX&8$ybPuYm8ZM|H;` ziPjn{L5_)KJZ~0*PbvE-=`kFOZ%>>n2;?d8HP{B7Y3Op-LH40WU~WVqIE)rk1Ef?1 zP0%li*!r`pM uT%zJEu`Be#0M9BT_?~0yNS-F84Y}Wha7^S9@vcQigVGG)8f)`AqbomqS6bT8X~5` zk}$Qj7m!20$i_A$MWiMMg-%`XFhDe1w?MB8j2YBwD$-jV0-CNOrQz@?Er!%4!loh~ zT^bpU<9f`~=}!R2WS%!Hc4oHeY^54pjGRn~mYR7n2l5$6z4h52gM3 zW?VRD+~jcvT`D&Q{WvaZ#zditQt4 z=X%Bul*w6*tmzZpF$zpG1eXcf3+94NkZ=_G)rcN&e9BrO&keEl#-$Jgx{dKP7}m!3 z#AGJ+@c^G&;bNF9&M#d59@a(FY-#Df{g$cV%6trSPlBzry7E1c8*C*FD&)OqDdAhH zL2hICE@Zr#s1{*&(&M6R8}0@Ec}XwFvdT$~j+*oeP46_+W6fSRPbwYZLc93=Sct*+ zU~j312`L(lp!VQtnYxHAysLPP!EwX>#Uz9+r88!6L@jnT;PMBtIFJ383Di1cl@@CYQ+LZHuOARAMdD`jQ3T3D~0zf{yOh1bvRb8V-itAf7eoA2U&u6ak_^q#)cyL#38 z+P<^Pl`d`j;zk#)3iFL`0stYQ7@OUPUTl}&?xJS89zitMIc;@?kO07?l~q!vs?@T8 zwj$ZtDc1w(DV_JOr1^gzZPJ_R^S!r6><>#pH)#!T|G&7+>GYSJl@$>5121J^M|0z; zZE2jgTVvTRyreo@!keaq1+G6haw%RZO&O8~$egS}gQ2OzvIBDyNQ`fS;9Za|6`WLO zc*`aGFYwKY+F!mbREfI2o%(zeiA3YI#H12y4fJE+AP1JzJeIRRN&N+xnptXF@(_}3 z)sbId&8EFeDf@11YT1{toX~SODExw>w5Z&^l-n51su#uig$D5DjLDW*X|3z~GCx~T z`Z=}oc#&Yj9=LE&oc#!}VPH&CLgCq@Ptav`9t8CKJ@blh-EnsrlIPGkvr{=?O4Q`U zG*k=NP3n%eodgX|=?aD9Jdhq{047WhIMm4?Z@Nb@Bu`VIhJ3CQQ&tRuM%n|N4(}cO z*wh_$@Rz0#YL{kybZkJ`WzPG`bUc#FRBDD{$VIS9_E6~K@iwmCN#I}|+ms-W1{(1MgxpT0}LV=;%&=xj_$%&;YlA%?7JfDtDjB%6~lMDFGDz65^& zxq{Wj^k&Gc|18075P@keuS)&*zlK5*c=SaeTsesdvyBcI80G+_%1yk(ReOu`Zi!E{ zk9J`j`E@C?s;o)764e&yNPA3%%%<@D;rmx_{KfW;*611GXk%^5$zSv=T(uBRqpTq@}1)zRcI zM`nQ~*kiMPa!03rTm>Tdn`CcV7Q@LHd~H#=KVN{|H9OD1en^@m>}^U{Go9uA0{xlt z!8zc6lw!{|^Uisvg{+6%X~bR_p{Zr>&Aa`B*N32}=IyOb>hRs44OH|uG+Us$(-m9Iz0qn_E@`pbp zPai?HyDbfx*2DC`Kpo_~Lgepp=26Uu#|95bL%e|eGt&5BW2*gy+ei=tJ7dmRKsYU+ z42%&$L2xTZ*v{GP2ks}t@RR))Wk)H0fD8iNC4YeQ@UD6z6&_5igki{XsL6L+4`;|N z$zp#51_tJLY3|WK`1CS6bx~bxWvQ{tuzpk0C-Ou>X zb@Fkt7-B}qNgv1r?mrVwA%&&AHsJ*21B#I!p}(y$OVN@3AZioW1`}%cvp?tS)bsiW zlU9kL0SIiz{xgJa4#!BWd4>s)BxWjkCIL>OU7*u&+y>muCx+HTWR43A-YA>q2lRCT z-*hsJ_v-G&JTY34rZ_yjm}^(ZI}qI3pAW=I6En>gFkrbEl{vnfG+K?$KgHp0Hok8p z-#X(-)s8cVp8TJ#D!0O>+NZ4DI3}w(7`Ps!zaRGGXg}MdyG}N~->{mBEeS~}JuRg) zGkzeHX0=o6GcE!CX<#f)M$-iV=z0>*X+Hm$=i_AlaiU(4&2x`NU;8HX$Yx@GF#fpA z>LtzhAarWMOj{qbUWTwE<7o$TUN0*j8$kIrYn& zav2G>`W}G%_!gN@AYKL}S%pe5C_d1YaVlg(yd+Y*B?b#D6uV(M2KjKTe@NtYGMUM- zsqcJb?9Gm0`W{apFA_KOE!S2Z(C_uy;%~_BQV?b^nG?}kL~g)h!uBj|L{b?*HZu-2 zU4;BT@xfLq&^8MKLtDilWcL~t;~zPXxTOjv&iv=Y=N+KH)7EC>J?OL)44Z0E>;!Db}f zG4pfv{Mb&OFNT*=5&$c1VrSrRbtn%#@f(#=PU~QXt2lCV#LiRqkGg$Q8-MY|{wuMu zpFRCu5Lc0%#7{8_oBVWtLcJyAox7A#g_)KA7TA#bic~(WWDyqrd@v-h5eK7J!v3)o z_D>oWz66vf_f*EPBiTeA>E+T zdyG*?Jt;N_s3XpqQ3HltQTXEwWJ_;FXhGO_Xr4pTPCa(P5D*ey*&$3|7Kd+uE6Sz) zPJ?})ih?~Vh|fd{*3>~p@=N2gsMTO^r+9I-4`eNS`9qFi$XiOWnxo9$l=!1s8VAbH ztXY9BXJDQ>w<6W;SPyChh(@Lx$Spk1IXXrj=@g6|@WZy}3N*&VxO{g>Z*6&Wg>((; z?RfJ*L!BHUQ}Slx&D)(HUhO0!QCCLOKGF{}4csD+Hh*|dicD2YjQ}IueY>}Fw3i&A ztziEp0D&cYf7^d|^iG);=`fZ_BurX}Um-nv=l#*^{X^0GtG&acNZd965)$J)+WXrP z_I>#N;2=_3UaWP>>O1fG!RzPCN()^k1C0xtj`a?SCt(e-d%hc!4Y(~fVpY(3GUy4_ z{%HTz-n*lnS8r6!-S=jlbXznTE{%rv&qNf ztrN-*J^{^*`{aBor5F6 zDu^|DeFciYc#-VBK6wA?us;8YA5m-5x_ke|yVf~x*0y!UtDj@jy5qa?u5~7JmwQ%@ z@n)MgNM*HUo%!Ex&pKUI?$|16a=Gxrq1{@KMKgeEQ{3%fn;)P5(nT^?Z_`R$J=M*4 z&mBs*a8$40(N-NaknGb=C_Kq~E#0obBx=Z)V#~VJ;%vF6GeOn1Sc|^QD(5bn?-r__ zrw2JG_(4ORKNVdcB&@Ci5QQdsmiXnMfkW}IH}W^)OKdy2$`RIgIMh+@c>=@02ySs| zst6<+(R^F%%f)!mGSJn zI9Ihhu(g`$GHlKW_SXRc2b!c1I`40#MZRpby#DaOf98 za8njOs?RI7livctT@)pMyO^-s$fJU1OmQm9-es?FXHjOh^!6o8BTl-u@!mnUZ2Jt(Ce>t456Q>J1dX*Ja+4K`=+~ClzX8H9Fd|Cg* zLxe*0sSlral+gp>B+z>e%IMhG{TBpKAF`!ENC#*O0L_iM0!BxR9!%HIjLZ$9H>EOvWH} znZ|1YsH06>OR0H_TX&kujEduGC)MP zL^`pIk*J%n8~YKVN`3Zjz!B_A_L5{)ARZ>>cia7DoJ=&D68_r!-tq=|5 zOljjCffX-p?bhxy=sm1u2+oCp;t76`!0SMqv}7iWHJiEXi!w3pJ%T2HsFJ~CAw;(} zre+ka0#gM~u4O2^z1LWu1iL5b31L-5grAD5gG=gl11F*>3 z93%SvrI6He0!*sbva9V6=AK|^g5l&N4Z-#qmNb%ex`fl+548lLcmhkviLwN!>vTft%TM>x#*-Jq|1f zRC&A{-3;=i^={jdlP&Gj#=$r80Zc&{!y8}2NXYJK+F)i-Ng6523wJ9-DJd?x^kq0O zIBG^U;e^2Gahth_%gF+sw0sXSJ+GJW5TG6(oAihY0Tdg9Nq-{+1aWdim(QC^`5L;! zgYPH=|8kgEh1QKlI=N|1jzaF3S;mPdqlI8$7my@Gx4=-%{7Q`IBI2{{(4#LjCj!P9 zPC$GKHNu8%tJThYa(Xq-OH;L!zKT7FITjV*pUyytTP1fa3#6Jf%D*v~PW0y1%Aj{Z zz~&fwYu-P6xA*o4Jj7o6o~CLuwxrjmA>FqW4fN>>9Q?oR9K7Fqmo&d`CHnu;;OGJ# zVb&0ki2guu&06+~M2D1CdQN#qmjX1|I|AYuF-X-y+gfInR5y_)G=VM9*`;Y3)c=-W zknwwJnPK!;PV}VZY|Or8MjX6klDC#%+)aLC8Vyi z?a}bp_ubhYbBu4hm4pi3OJ(?ClFs(l4RCUU`$;+a`?nOQ6Un6k*uGz6`U~deX{{^E zfW5WvuF2^?F$lux;}?60Y4Bd>A}g2JkC_a$K)Hl+3h_f64Q9XJ6Fw`leLF(K@U6rA z;P$T|G}BOvad<4XZ`|;$yQ3cr0#|S8<(#^UYxZVW?Ok5CK5{M^=oriL4sM&+Y-y2Y zN7kGj-u2uB9}p*p-g53F3I=ciF?Js|%l-=~Ur}F6{T8H+@72!Vn#CXuTleDL2nyw( zn>5L90I$(B`A!>rLxxLuF!~RXX7rA`sZaum|H!~)$mU>Lq|oBY*j!(SU!YMDc45;- z$Y_}W6iOy3(==T{#BsTqPHG+$dz|8c9)CZQkI_-3$q|Q8!HMi z8z^!w`V~856Gm&fjAn6k=0^BvuLL(>P55xH3=Wt*FGK(FDg)@m!We^aPcx`~ut4h= zSzI`Fsz#c&2=NglweOf$BtWX&g1G|wXGB>3()F8wXJeJ8O{GQ%r0;arOf?H%8I97u z<-VK-JMrw&iI8*gx+ROPq9&~x1Ii8-LVVqS@H&)iDz;;g;{xgQkI6J=;jRPIz+EH- zTkMI1+xTcjB~E%Pm^Vsqlm?OW@lDSBCG6ZrRIVc_o7<&^(5!PKa9Ucg6^B9nMYAVG z$3oM=wnQ04w9yyuBEt>!7pW0!#htxwOb_om ztwKP!H#MYag^s;oR{3}_!f?#M;{B}3$~TF`UfgD68bKNIc2{}}Dhl$M1m8)$5gUmW zJS|5D&xgFxU2Vy!-pNYW6SkH+`G24irgZ(TacYl?6XzIXZPO<5t5o7JD}#5x|97c& zO9A_=TsvPC!(WPGXe^4e*MAiE>d+LyNcbw3>W5lN{KD!Ys5_>3htc(|B7$ z0xW_gNtY=N)o)1|_dOVwDh<##~tsMTVZuM&PQC5Hu`4 z7o$-=D8ygGt8lfr-hEfP_nqqBykfQ{QKb{qSJ_up+kfqW*`KEMJ_O{W9Q{+HRVtqOw?&Qkl2NGWy;1TQA@wSyD8AnKgi zCcLyOw;^tw(sq%x;#8+ptn2x(q7fl6m;&PNo|4EIm&fGxcw-XSCRFpVmAptL7&g%5 zS7EETXUoE`*%$z*$iYI0R_kt#4nAW0(T}yX`&UJYTKjhLIo##7Jm;s2d04go93RoO zsl!&P_Ft_ZORXR5T`J>NiE~xlDM@IrgcS2zn^^A3h~ldM-tdLq{8i6iP0#9xtm;z$V6m5VIhMByyMy-QT>^<-w_yygJNURcfi_j1^L|%$n)4q6P!> zYS`FFdk_f@salCG(0SujMjp#GOjl{`M(7EMe%cQk+uTz&ZIz*S#2Hlxa1w)G# zjuCRD%;-jRfYecR!%|v=3hvA_P{#_JjZ_VSjNHrqH*b&kL5o#jD;jI5$dK_4H$mbf z@pQrW8K-XOeP&hp@3zki&X8{SydWvGnryO)u;Jp%REseUGp~ST35cU97Fa{jSmH*b zRo$jqK-lvYW$#v6@&GqHlPfVpgCPXvCWrTbD(01}pOW+d75DMyUAm5KhQDjy(XKa` z;bPY@QoX3?XgenhUcTE&!$=I&2wgO4;}Bw4mn!;==WxAo>)4LfavK2>6z{(pJ?A5A_mt->J` zvY|gFn})e9xLfh}z^Z+b;L#b*M2Sd&Vf-S(A7&wf2So^*_Cfu&hM|RMM zOF=gAz$yJ8*H@4%%lsI;wfQrSoOZwCqR$KTD*IH779*K(kTxUG-zjda+AAS4LKQ#b zC>8+zdkia@Y;`tIIw85uSzG0DAhTF5IHw+fL-jA^n@J}Yh^w3o95{PFCfD#ZpuS3w z7FKXEFn5q>c0k6PD=X3bMR9%s=_(PaJdH^Yv{qtiOy6i{0fWBLo*RRpHDqsAD+C_| z>2LBxE_2};tNmI1ecef3!Q=QTps_&$wg*!#r8zKbz@}i8p4*b|i^SfQg9&F)6G6%*tblQgZ}t2b<3DX&d{%0!KI63oI24plPs4XGX5v0fu-Eii1t8+pYwDj%7SCZxA8^ zx*m(OCXYUlr2AWQ7UEMHQj=k>{-ZkesVsF)az!PRaN-^)>4rs^zc3>QW(7fdkBR}cNH=w{b$u0xc>dg(P zZqAirI3dwzFh+^n1so5MV__b8)r8Z zB;Fq~-LILagMeBq@|?-zCb=Tr0{m_7^a_UE^yL$zq#o%iVx$36GMx^u$W?=KG4n%G z^>~rIJiy~^fro}WV z?6Qr3vgw${quBdkv`2)0_QwMjjnK(4*9aHgJPvVSInvk}4$_FFTU`N0qCg)fonA>1 z5ym?Nk%K8%g!QQ7=Pf+Qi&pLQ1{>f$)Z0~6Tz5?GCXj1EUMhpU?B`>| zGy&qzvrDXKj4DJ>wI}G0V~bF3@`MRQ2K12IOVjo_eAzVHN{_d<+gm5c+dQ?CfPke| z(dzEl>>yB0jn(YVtrQAw(GU5s5%8F!)(5YYgtp%wE`-?00_dH+ z-Q85~BoyE}5V}adTDF1b=u17#X4xo*Ee$dZJrl#~?H_{g-oo_coer~pelZye{sX79 z(Q0)5IVr}?#`g`u&Q-i_&&kB6gW1Y|4SQxIP4h#`Z^6oT@#h1vD*vPu@6x0z{@gYT zwUgJti?T#^Ij{#UHaYB$CU}+zh3LCvZquwY>&ar(`pzUp%kb}X)MWFIEmc682edq zdewVS29JwI!_nn3up9)kvT0SrCnCEc6vsbbhw`7nLclZoY&sVj)dk4rz>bUjW8f_w z3U%dZkGCIgZk~v<+30jS4TUQ~KZ5gt_?dcEhMrh@xAJOqIvFP5!VjlHtHa>T^~>FAT#0%Zs8ax^m(qbY1C zno`8AQ4XF8RYY*W47$c_DuOLwa$RGh6-6=9x7*ZIXekmz95HM7Tt>(QLWQ3s4q2DP zWiSQB04;!BbT#7$gi-=G>yjTyc%0%4clqB|m|uzI>q>2~I15SF8mA&O1f)BYKk&Oe z5M6~b5Q+$u%h5GfCNLphs;>MaI-XvQ?PKhiirr(&vbMTts)_hQZ^jISV+3~bA%8xZ zXJ!s+ifjmk8Q$QMuRy=pef|FMsQK;6@E3B{gnR*;MD$@hA^-^l%j;#ZZ5$#5lDgPL z0rQ6$H39qulb|43R?)kn=~oJ3f4(|+H$+Er3##t5=sXT7hI9qJ^G ztx6ipG`D3bDNn&Vvjnys3576tnL+#rV=o3>*hGzD&^U>3X~~kX9Dj%wR)nGA>SMHm z;zouzg;nJbjgyo&xcOZr2Ta4KMJf&ki-1_vHb3F%T6?{?j5tXr(?2NBY0xTDH7PgU4 z8w(2W1Ly_v9kCLGi{eFDnh$1{Qxi z`W%U{vu4=X<_o`7SsHaOD!LUdYSD*FKa7@U@^QIG(Q=x)T=a3Ys3txaeG)2q?VUL@ z@gzklImgaTWM`3(^VMlW@fQ|3$hSrA8JW{=1JiXwxeddj?_U=0D$a53BaD6r9>4~M z4arO>;_3lJPXk#296qvSP(adi%ddlm@==^PG%ZXPLl@Cx0kIs}vh8P2f9HZ`$Q|&U{s{8nrrHq$gP26lE{d=Pw- zm`PD4a*RWE1(gZNO;&mvo(E1G(0!b_z0!OwT(}0)(iN9x!Umcla&=Mm^yFI!JJ0n- zc?rI3Zh|SRG!PD)2}$dojwa;dS*m>eQQa^!HukJS{ zV5$MIe}gA|+p&VGcqFa2&NSchQd@tM&~DPS=deVK3hskPm$;w9{T(}k)`|utOl}S^}2C#JqV*2H;pRRHWyXW7J^;P@_#Mz65e*{ zOf4Yd`Lor2ymhP1>$C$;z)73EG|@+v1sw94GtOjn=qRlb4XCMPAo_wWP9+>Ty^7%0 ziH3b$S{iLAlBSHUsV$Y`n%W#;T_zjnt<>y5yv^QDcEq}&# zTFIN)1l)x7$7hpP@}ii%BIjhTJSb-J8J}v468rF(Yrk8ZiXsOE&<7mngG^35?`S2XFh9@6DaxrVeiIgZiP$?J@7@bsX@x?EA~+#s;u?U0jme@JZlWH zlb>h(tEBmHGP^`OQ(LId*)UzBr)^ErMvH3z1*MNRmC`g-g?<=LPBVxbB4ScGi?m#{ zVGvk;rOB+?6JO=zIfZ0XC_f`J;D*s9O?|08OA3lEg_^bT)RsM73KXsknL;$Rm{~kUO;@(r# z{~v8W-hB25<-bRd9)6YozLfljS#;V1`yx+x>B*9RQRFj-cz`F&WGJq1fHPiO+shz9 zsH!;1it$|hf0-2XGMRiFC%kC}r`XK)S{sEec4^81OdKE7R1=~A2nd9jh4O+qro4m( z7Q)qS#|*4MIVZDlv3Af3$y$k;KzV8#UC%S@?uz= z%8x>DMJS%vM~U_7;-WFFu$7O+cbT@Grnq} zw`L37=(VFmi=Og%o;uuz-{86WE`C~D5)uSzz}uX%_>~vMv}J`zx0v8wD}eQ=!-{Al zA$YJ`k~dlB9HmlQVkb$JdO#^~ZEfvOnpTwLL<^PMYy1)e9}t$(+Cdv7?`voQyue7Y zA?J4VA6b*pOXy)jzn~^S*&}crr0;r34#>Km7|guK9yyF{6&kLRc-3g72udj@#=Ox7 z*b5vJ?}*eHHwFPY!yabi^95x5P^IwVmxIM3#W*)7w#K8j#CbLHmHCEspfwYL<{U`n^7o0n22gcOYZSv~@ql2<2qibu<^_g?Mv{<8P>-Tv!Cp{o9FZOzLEWczaI)H>s^Had{D) z#oB~s*Y+zgw1-GAUWb%5VBd+5;7>jip(1?%z(I=@={y=$$X-b00249AJXdS^%9amB?c zo`iNU#~Yv@3)!s#-U#S};JY9&!8?%nd_3=rF2$NP=|kBicN1*4H@U=*sps^lAauaT zOgo2&C2`Td&;?2I_0l@IZju-%(vzp!?0~AtpCFuj>~#cbLK1O+`#IbPSC|4slC5B# zm4SD-Sn5#!fOX*v;g2)~n-gsN;R={Dz`XdI;V`hq3ppU04#{Vy=W7hd!J*CtlS&$q z_R7U{ijYmZrND@YYFUZD8L(gCrROZK+$mj0_oz^>i^FurrjqnQ+{B+z>7T9FM%T=zjCBH5C-- zhR%iAZlY8(r5TuL3QNPZ8Ic8CmDN!(qrB<{Wv@k%3PLSxu9?WaD^jQ>=Z>O&_^yQ* zC5KReNgX1SMUS%(K>#8~GV_MHxZZACHgH(1-ty5CsvM7BH_s>-ybt8yg!BeDROP%< z;-fLCJ4@g4Yc9-&9^Y>DMQ;-Eo8*-e+COYQ_^<`@`tWc&*%=Iy4fMwqeGV5R9PgRd zdQAUgvvfoJe|G3`;uJ?ieZvwDKFdb*wLp}R6T zHY@85FVzFqYe}N5nBUXe%^a?=l6{Ep!jxrA;iMs{Nfa>eTZOu zrU9JuPt$B{9XEfH&u%lxXLx>a7(F?RTTk&BPVsj%#aH?4a+ss#F{h{bLhA2`L;?ys z$bk+Y^5^-x+syNKIL{G|rN?7gZI&?yecwNS;Ir5I%AAVhN0rPB;Sb^@RitnvS%jRk zmm>*9frR|X2!J^qF6~N}-fml`KoD{+exp zhPj3CbMWhv6VHfcu|>C*Yj}6+2jyE}7oeb$>Y6DNa`2^vK`^SM9S*S;tzZlyoV&?l zj_(sTE73&UrseUlbm3CEpX^^%}tDRKbJeg4T z#6LYMB{+Df7+Lk2sYdj%_lSDB(8z}j$g=VG@djX%Zmq2?W&f5M6Go(Hge^yA#5_vo zO^Tsb(`cz3Yl1<+lv~E=(_rHiC*SgJM?5sy|x;P%6EL@`VsM;;L#Y z0I@(x$U=g!x{_^IpG-R~I={gg>ClS-(}+H_(jZCX4G|;>ypxrX#Y;9XufO@^GM9P% z44_VshANOVdkh|Td=j=hgU*70f>mR3nv(!-(XzOSB@L32j9S9zfgU-Hn@1f&0F5>qVw`E!09$nkA1)N24;w_Y54ov3 zu{l)kR5eWp)HAJW&XQ4L4O)e|)~7Ulz?AJZ-1R8CI?dV5LB8p!)w0>7JT_uu9|ty- z%r)goKZbH8O@C5ULcWBlg@vjS9{y}X3A+LlSi<6j?m)uoXN*_h21tayU}_}QiNCd) z5uiUQMLZeCg>t0Guv8zhy8_&RZ8c%koxrWrXn;~VE2hmflm~#y*X;|I1SCAzI-)a^7WjP!bCQIJ!tnCA zH8d@JspT@q{B4RFEgoS@_1!X?#INO~;Rtz?ga$#Ngl!O?rOiz9aYtN)O;I)V=WmLf zw-~2?F4TUnJQdY)rYl>T@+4TfV?5hgEf(_hw0i$qWJ546(JNUiAjmjd!I{1>bsN@Z zZ8bU7lT9Vn47TasQ>M(h^ikCkUQEMhSVnn2MeQg{-u<}Ke)#06GUGvS?D{4#)ubNS zJk85_J3kY@&eQ5w_CawD$vwN$<+@W|WTF?Dx1y7eVO5)r#r&-OJE7{4%W=0+oR1-t zOQ^#a`6p_`IsaqSxojB)|f#$WbM0RqbrzQ|r;-J5vX zp;h0w{K6Lclap%O`e-qn7XXrXCS8oPq(moZaIqbi8NF346UPmcv~MF+HYIIlV)-gf z@E$Lh7?uwe=rxTJ^IENFT}IMs(=u9?%lkAfOT+EvswUdT^SvMT4}-QR!GTtwE~g03 zBbJw2*uKA<4gBV|@cQQORqQx-ZU$f36n<-S_^KxHo0`RMW*T4JJbptH`BLN9otnxk zjE!H;WM0Q?evhW}O7nSm_rQ7ou3PJ9-+}$SjtxCzN3UaFjoZmrvz6b(UVgVW^NOv! zn(cfU-@Qj0`s#M{rR<`XJ-vcevI5{iQ5?)(d`A&)773R2?ToUm!Wxgd5*lp!oQX0eHUbB+w)Y|Jx@l%1gCtipE zZatL_@}Bp*8V7EA*wutymc7JzLx`vbo}3uY_8U1zb!`>)QENrB z^sKFV^0e!wdnDz0`@|CG$v%^<;G09&c)RdRXd>V`Qg$`@U9e51u3%aDDbzNp8W=9l zx!2$eEAbkA!O<+8%|LHj`B0P9-Tra=Yuk9|3=jpW1lwh#6=&j)31 zmZK=Nx?rW!jj(8i%_ZEN217vqP|6FGoaPtVhl0XROQsOUp3Vx$7~RAr-WtIJ{V<huM{|Cj=cL+N`3d|~G;2#NM+as<35`iNQRG!L0>uf52V2j2RwEcU-8_E5f zVV0vc?H*5I^Vx@qkVv2|`1LNv6x2Y@4Q2X8B~G17i&sSqqdCq$wwM{@l-_PslwkNJ z*%-^E!2_uG>g4pFp6xFb-7n9ySy9>jwxjtXFx(p^Fqq`!c!PuBg{F5|Otn+B5pH6} zLL!~7ApI0ARs2;(Mu19Nraer@is@SUU%!jm_w^)pGZ;NMx$uIBHcOg^9{wxHa@0F| zeX#d-=Wurq49A_#%`J0&FU3YnXk*}D+5rDS7#dB^xs^OX$jgt}Y|z%>-04~k5r^-=dTxJ%H{nvA|f#?IeDKrPZ&rZvx zy|itsVaO8i(0pyJ>UMC(Y^=+V;*?_&wt*MBWIYcGzM2h(C9V+g2pSe*l_pMDf>1%aefXk=v#^cF#Xc=-KAXUy1*!6j zU?tzc4P7ScUf$N>vq#l6x8Jr40>YxBT1rYc zMN2W56=w*j#xZSDF~c8~I3j5NJ+pj zcEs)bhk{@G2dj=TSM_@r>w_zPpCyX@XsxXt(lWLZeNHq0`#OXCbaCD^+zA(6rEjbFcYFB}3+CU}%~AGk>)>ZAkoqVjQ z$oHLYWVx%>EpNWc+s}O;mt*uUGd+5^Y$E0GVsj4ea+nS7TOy%48`3z|tA&B+1I>l> zH%?KSjnw&~yDg=R#j4rRY@LlgP#atKVeN0dsGi{8`1`og`RAk)UAF4R+1GQ%wiZo0-|2BhYpv*-U2Tyu=-@kN`>_etddc10 zRZNG#YoJU*^0sY}I!g=2L3PAG<0iaR$Q!YJFKv5DaHie~EtCrp;P>sY!2K`q(iW2Z| z4p9uwBN=iN`$<;6twj|wgiH5#qV;%$yMliNy$~J%#rVYr8R2I0w}}Co)loQ9E5si- zp~)6#8lePoT5yVf;kRdwG!rt;wtjYYLs!_H&kPwa0YrH&h?oo~V+PU6p#(0e7rm+&Y=peEGRIr}fV52N;PMMeQ&cvs*PD_9|R)GxJh> zNzf%t3Mj8W7%#`m#VLC2&E&R~w16#HhHPqO%zIJoWJA?&M73BYNQ+I2eLyYF| zYhQ=3VKA#3geJp!<`UOfMPa#HF@V-bRn#yj$~oj%j@%HZs2V1EgNI_X@ogjBj=93C zs-2>w^Pk3D)9oQ))Ga=osy4JkO%N{UrJ$@}O;f?7#CpnY-INOqCUoa?`}36?MN-9H zthO`8FlZ057Iz%co^+g7i1y}q5NOyO-}Z`84HX_lX1lS_27AO9RuOkZuX1HII}5C2 zaprXO#}Kx;FM`L!VXLvVN@e=Kpj0H6wD^6MvpR@1#|qOuOH*RCb@dyq0zG%ec(0Ma`-~qpCZoxG|Nt;j!O$-0`)h^J_f^*qb@QuI31P zSI)3^><}AP-JJ{8`u#*7LcDn>NK_rd5P<&Ji7&p%>0_v?(6r5N6l|j zc+ltvgqbL4zgnWc;W^6K{RT|!JBa3*7yU$K_ws;hOD@ur?o2UbL>MKgT#QDU5cX`# z!exH-aWWg2)_VQpbTOMwO4k@M9FBT(a8y-k!6In-mZdc8eQ@zjpCci~vGFy}zl${*>QX#S{_V&>s)PJ(ol2pdzk`b0q!? zKCgJg$&u*UH_!7?F)kkb>tjAn?|M_)Mq(qOp!I_yPcNItjJC8x|2iw?=sw}?LC&1m zl$I7rLe4sHmrxo0{?|d5w7f03SS_^6ErI{nq+bM**idL=@kDP{?i;G3If&g5V6?m;ig%(}QLHs5!FMNn!zfJakID9RLh{svZx5MW+QX9zCYO6y?dk`05a^a8#CtNH@TNK9u(=+*iD2IG!~pJ=Dl@G|O&sRzeF<0Jx1 ziwREAgd>g_*Srm!M|ef8^P>5L9%*W_+q4QCB2>8!>O%yC5pmwU>%+(3wBJ57WZ=&D z>Ld{Kb$=qw5=*=o7>fsz2TQUq<;Gto7;Mcyi9WUJ2@SW58m)Uvg;if)-f=K5R=hOt zH)7OcdHlL#T&nhc=F86Vtlie?aM_8mn83AqcIlNue=k+w5~Qy))h02#%aF4dFU_Ul z6!v}GY!M&jO_nfV*ibX*CdZe8Kl5j{U|)VTE`22VNzg9hCu(f)mTqiwfB=6Cm>p!F zr6{X7gkOq}hHOxN3^gbZk1VS`hYFkLtFdjyNLarZ77P3Ph#}^IpmV|w6&wj**x5`I}L{N8u= z)Q+A$%PmL90y=o2%aJ_Zk@GJKh=Tj?lEe202WzW3gMPJJ{>R%bNqf}tppsZg9+b)x z;XW?Pyr-|wZH&+syATsPE9OGDk!syr+Lq=>jqfPQ936twE6ok$7%Ig^HM0L-ej z)S{Srv!9iD8vuXCWic;4H~>xVOx~l5g;c2xF+pZ$ee|4YUSyozC*J;*(q) zssH^p#dSK(=kuKXhLXj+5Pf7`Mo?286K7defGM9fKJ%B?ziq75dWpYj$mvblekFw0 z4{u+;f79E2eYm@G)KmgkE7@pmpf_~TPEtEq0Iv^Mx-_m1Eg8nE-$3ltSC1;49PIzk zUh;ji`}*Jj41|ZTcX!_Htwl*f7t$TxwJXFuRNE}%=Bo(9jRZ){@=|b*2r^$K;=ZLO zV;;h!yX;n7;byIWyWaeUUHz85%bWHdZrkvU>!>-{oKiVXI^}d&%$s<)WoPMDZT z$eTxj$Lp2|?Ehs(ehbxnAIzqixLoPlf`ZL*IQb|TIR>AC=jz!o z|6~I~RB`g(er4rfS@~C1{*{$~W#zS5x$^z0?F%tEJ1g@!8u!7A(C}gqBES0n$!5sC z!HR_TVBt7I-c9+u%}0pUDybIH#xU+vo7m<@VWDms>?S z6wvwMP=9d}tD;16Sw+k0kt#|um#>pcBz03@-cIgHxL)Cbw*DjSiVw9bKi1yogDpPV zhM&)Pm$Arac|@BqKH$Jq+Ah(v{9N3v`K(8f4J$-uJBS`$Xf{14<4Ok<2O3n5V^pl4 z!+y|jd7xIjiHGA1869sqVXBd;yJ;KY4BqULBF)ND? z`S8lgv}aP_MLrcD(aRbyt{kS?)S+ac5Ty6%wXY}cqD>oM8d11u(>vN$`GuOvfC<&k z(T9_{SR9im1|a|hOB3i66^bbZj!P)nL_*0HLdCwiONVI!6>7SWLoIX3Th>f#I7nA} zM?+tAT%+}9v@^0X!l)6cbn{S|Q;9rw>*<*Zw)D3(9e`XiQpTz{DO(lUJ0pIIi?Peq z=uKKNu_ftO?uM(>L%;r+(yY`-l_J5kNKQlhAUbQ~0^W6cl!`1_)x)Tn`Xq|iC{MTT z=R~Us*Jaqz$=e}iU7sf1@~Kpcz+U!zkKMUru7%_Au?F8wu_tSRq1keY^jr_ zFFRXbR|?M-yEX(+^LtAwx4y3sKyU0Xqz_w=8!Ibu<b}jC^%0>T0a{E-B^E*lIbCa`u_viXv&UEU**)nz?CV+6$JZm-3Q}0@iLStcH zNuc<+diS2=Tg@X1LxA|RetB!P?pLp!{b%V{tk%RN1F&?P=jVHyRRvhk4fs=(6Vf(c zRq>Dk=;*7cOe}xGJ=_;mzL*TuN$cg~3&7Lwoy|)!S2yv$5olB~{ynOYygYjMg#9bg zFfqk#!H$y3rv@{%768bGWeW@T>81XWlBl=QK?AbuK%RB_#MRXM9rjD`OOu&lSIZXj z2|Ts-g1w1zo&6S2YRWplFP*~v|GO+3p`*BvJ4D62xEZhr>;=mM4 zh)hUrOG+tcE_-*tR#JVL*m00(cLHgVsGW}3z4@$}a#%8af4q_G9KJ|4TFC~4MdH6e zTl!;fhraq2{?phv31iJ++ZL*vH`^A56uTQ0Hfbgful2$p?f*1l^4~C$G%(S7>Y3oI z@fp3*#ExQ-21AYW@t}mYXo|se8hJctFm)NajBT8_$aWja1_8?2eHr(*;O65&4-XV( z6qYXd7B*($Fc?&GJ^u%;v;w~cBjkEZJn9AF+5N5ZS!trwrl1Nd+URsb~9*~W3> z?;9u0$Am#^4COehdE<=L`Q4r)%RnJ~%xh>n9kPI>@ly0gl}4 z@MO)0kJR;pCm7}r%!kv3iF0`ds06smYip)I7aKQVe@Wk}zmGRGiH$=MKu<3SvCFikAnO12#4g3ngusoKR49sJ;CYy=vKdAVc?In4f;jR%lxS+bvUaVFT2R}kv= z5ZVjmJMp12BisVuB*8&2sf~&N5ovrBgVWOqdm0*qlTVjL*xT(?+y$2&5U@#jF&6V9 z`4w!PxC~4mTgGs6l@Ek>SAfU6N>sLk&l!kItS5d9fbo*Y85YI=AbZ3xBcB%{;wHhz zVpyZP^5BPap*D<7-m{D=is38xS1syS2ektpAJ_3WB+fFx1Z%sFg)-7hY*cor;t!C) zK9y1LL58}_Ug0VNGPFE61nL_)UGt3+1o5{P@Fvi%1Sm-8wjrAoeJA@!{^&X!D1&5E zm1!GI0BY5)eGQ>=&emF`vY}BJ1|T*`)Z zn=lvEVp1ej;?K#Xk1%!va<-L}VM=*Ux6bNo*Il|>2>kp(b_Nm=wU4PQ#}giIZN{iK zzEBqW^93M_8jOm+l0*vIe(Fdl4}nwlos5 zqJ)Tsdswt4ozt7k?s*yC97`UC@8WXcL5z=UbNdo@^>mo^^NY!FAou~BM1aQohe>~! zEew&$CcA2Ge=XLCl^X;{!vqo^zK zc+!m|i6_W754GPUuV?4kxcGOjcGl@t;yQuzPrcJCG*NG_aK1pXJp;k{_=Ew)2j-Gp zuhnwzbp~*3JNKSQUC6g4-;FS4TbgBV1A9zM^`;(N&%J)uzsOxKXQjrdpyoX&Q6sS- za={8%Y%q*8K~;)HGML0b4!b&cft&TCHp78*4l{iBxDt%^-@kM<3yrr+riS(hCWxPI z*wl&S{NQgT^2J_Ymy>jfS|R;hdwp?j=a8Bfz|m*ZdEh!WY?r1WS=jeIe_^W?AP4mX z3z{hf;o;_{-o)LLFXaOD$$uShtCA;T){Rc5)9{oEYYQX4VjFG&yL24STc+PuT9(xvgLz!sZ!=={ z;bzMjL=VF=6c=HjB=neEVguPS-RH@R5l8h3fJWr(rTsJXe{IP;(Ueik zu!2EK0hyAm$cKX}%gX_>0C8z3)@IpRY(qJ*^qLN`#-_b57g-ncTJPB4ZbVNXwFPIB zeqMq?9`<_>McwwT8blU zlop%nG@iIcf=Pe`gOM0^m=7qH_9qaUxE;7GZh=t( zGzVxI?VkwWn0|@$8Pf}j1B3pw(l;@T|KA5#pl*_#mQAQjEkQSbNH(jhI?yj5dYH>JrLaNn96Aa3>&6(V(km1-aV?rYuz7K!4Yj0bu z$#3dbmSKwyjqDMo%mqP(eA+m_ zaC-o_90Txb8Z5-G$tWL9+>^P|y`XY8+UC|ELL3swG>PaDg)tUZ%`M?)eD!k@7L_?B zmO}-^lT4@wifFcpc+wD^{<5puu&$U;#t?oo)8+ZQ2KtEwxFg@q#!2W1*;qR$h-=ip zJ!)RRv0RzTDcLq9ggvxq(|i7m>YXoej}Y}{<-F(EYE*Ubn+y3MZAHh-I!wR>jj8}X zPABjFd=MXxAgHX-bq}{q^W&2M$;lQBKvp(UDsBB{uPu)z;XP~g6yCC{S+3P&u|;3# z4Pr;i_yk3E3bb03dC*ZJii*9=G-yd5F*9K`4YU!^+j4DNLk_o8^$72^_+6bAjg>Hi zY$C;Yjr>R|+x!L@TXaZs|0AoL0%vH1%2HX^VTkCEEeIUFc zgXxYJGUCdJ>uCzvqSG>iq=-^Ze3;SXj*Q=udkBpGNz>X;T}sm+CJLcAfO9z#rNH-5 zsEEZJ24w8|55)wZym(qDqk_tDiYu@ff6T^n%#;9HWD&4R+|b;FSg|pR( zo$6E=bz}?!p?lz9aPhsh@7#m^zO&4t>kJQdIk1HXt>B$s%yU4}vd!Sfz4q1>d~|3x*r(Z`d=C)Q&|X{04Cbp?V5alck<;Y$vKs*1jVxgu>Nammh2? z`ueC2Mb{WWEByM@^&XpA{<-NAYUHv#*bRKwif@aq^~vxM3tfaXxI4KHf7Y9h;P1nC z(--A&UwO!{gNLko3{dbuYA~bAGloal0d4-aO9SONd=a=NWu{>QweMKJ@9L^5>mR- z0QMS8KxqWBp#7aQ1Jwbmq6^NfW#_20CL-@cwBA#)kef>h*0hy$u<+9@!ljQp)ZeUrSgMq8B0Xn}hW2dxZKI366m=|pVOz{#gz zyKi&em{YycN=`>5ITZzj}l;(*;!DvkGpR zw;;trO9)tK(2Cttw5Ys+jd7ssi8xsc(epOsD%_xkqx8dFLz{z5MJ^@!Pum?mR`h#q-wH9z-Sei=F z2Q!86FfpduI8NqQEYSDJQT(GErL-0<@f? zq10&C$Rk4m;>6b3UG~Ntyqqj%eP-klM-X4JLOBFs<~R{!2Q(Ve8)=`HaaR{jG3Tn1 z?6q!c0E$N6PMTsf!~P*QVp6v^znbEe2XmCsr&Jg^vUh7cG1}P{OO_3ZzBpSM?h9k4 za!zHh-|gf`{Fl;X>YtR)0n(u%{vfF0vf+-#7&V!GWu!laJgd=Q)N>i03V}i#;RkKm zf=ruN$!SipBn2b@xg1YEj?=Ff@UQU#{{Pu~x9v8PBT>}PSWEvBy}6T zW-J28Yh+|ZWL%9~Dq`-Vy_5QrwnVFcsJkKWocN`9?aJ&TqJwoQ&CmrxWN~{B{p?S| z?<_4G&`S%!X{J`5x8-jxEk4KxbRcK8B~RGfyJPh|eyCs1WBFek2|mNQKg{F05sH_6 zQ}>=$w7@#r4BB^o{lPN}^*irw<5> zYAGcCtWA;&!NW%oQ6$8uSgt{$>Y{R^J(wjMI-BJ3TdBlo!Q_a^KlV5%0zw3%S%Fwk z*dqRt(o0A2C77wFkrxw%goMsOXuvLUJjV3DvQ;EgA)@u0Qh~I~RQ@AY!u)P#LM?pS zbflULDnIyS(Bg3R0RaA>rGU~ie$6r{+hxUv$5&X?s!)ZypyhahFFQ8RY!P8wFUKX8~!Bm`Q_2m1IM1~PQy{AuJ9`Lca zHNx$V3*;iftt(WewjAHRt>uBs)(nAtt=WNz9_*8+tp<%~ssFayed6R^Cw6;@6~`Q+ z@6roR{)>69izWi`BAFx**)=F;msWMApy~^JeUVA&mcLOAG6 zub}Q#7d@StfkE2>A$u1wF-bGQNp=(JplREyliiKTQ*(~oK9iO~hq^|^V4?x^KKB~J)>z9})WEB#8Cxhs4?N~d0C-7pAA^LD@{ih$L zLq*kTuf>2pNV?nITG#*8n|@zu7=a=4b$olC$fA9{%VBm7Ir>Xn4&3y|(&NBWk5Z$N z92FhN{OII(*l`3Dm%vJL1(*yU#3hWU3ejXk@n*GlKvVW4u8Suv#$c9CU2vQ&ZlNuK zWqD_WbCHzAjmzF$m2aY~W@_74@6wC)`q`PbD5%d8+;!h9V4^ zuu3f_$u;Uz;+Y_glN1El_er}5vSD_4nM~S)9Cj=KTSnk9CMxqUHJ<~hE?VgsieJU?bp4N1E~Faw)^9YgXr)%29+QD_3-rU zR3*v^6l;$TkIoK$67uD%lf#$0C;t)s>EJ)Y72Bh;gTJ2P;76}tya*^IwAuR-W=1ikO6Xypd3r_r2hVq3zc};N=O*l6^_by&ALQIrj$=G9 z6pv+Ov7p#MNgnZ}RNAY1pOmRhLLE*Ho*$eX9PJ&*Yu`u*P0-Zti|40U%--(l-tPXv z{q8pgOjKyc5aF>pQ&QM$p`s%Q}hGRJ7>90@GS&Ue#KNCG?)|p+lf5M zQ-Zluf@9&7;-xxCFDLOe8>xXKO(_|Oo{ApnItXN~WW}=Xnz)q1WCUJ|$gbOZf6ZQMFQcV2VlQ zqm|xjd!_Ck?g4_?r2odd>p2r|t#&rL4Bi#fv{%63L@({(tsWuzhvnXt3l8!wiC&2v zpJcd!Ax>DMxhiPEDE9oH6PC>9+dae&}(VycraBx9kmiHnB@T2_fF7ADJbYFjAhxgIB z#-ASK%uXRWDP%`doYp5ay-RMX{C7Va&aOv~DL2XG^z?Ebo-a!w_&)48pIu*rxg#w9 zj%Sl`mY2!U!{N0!Oz~O^H&JQ&2EBOtV6xT`)E^&WWVk+8#)k#f#R}!4{v$qv^u+f+ zT{)CjM7}?`9{lmwo3nKnS?7pSI1wziKJej#hcqZ zRY-_bNIjgK^cXM%kD_EDq8{!2@nG*yK$gItIp|-NP4Rq6Na2IS=bH#~zyeC}mqI%D zpv)Uc{!0+EE&+y*DhFUN3_aJ7@f=G}#|B1L)bBkgKIinH`h|C|k)_60FIaxQ|B@9r z^}v$;J=nm=McKegzPs1h>K&&CcOP~SGK~@t)gLgE`c4Uz_r@19i=uN9b@UGpvKMb@@|uk*Rx{DMv0c2!|4O zk<$XGC#Om8{bny75YqKX&2=a1_e&fO#IpCBooIJ3h*n_86{$LtG1$sBY59)*W0p>& zp}xdgaz{hDkFAT>FIW)32Ef)N=1ub{6*?1Ag z%G!wIDNo1^D=!)i&fF-O=OYWu)uZ_QGGwh4zNOm)b6^gK5JOG4Apku|@+)4M_w-#l zhFR%i+_6yB8k75*Hi=kk=!XZduL&k+E*JQ<0)haQ#wqtJYO9P z;D8o1f;YRk&gUuARVk#o6T?$ny_O(pAh=L^31m{Mm$0$dul9iq6KGBk&eYgHI0e0; zhBQe?X~W~UgK1MIDYypZ_wn8d5Fj}+LTVt*x>9+}=n{>QYi~`|HCuhpv*}gAm==!) z1shFa?EBNa!%?10oV_?psmU9@ z67k?@KL`ek!t9gtnS}CZ2l7AyS!x;X0c7WME4nMxSI8f4r4?nv0m2{trD5PNHWYlk zC;VTN1Lv25|4hPPlD1zB{C~+;(k6TkN+7v?9F!ot^@GqoN~-^hrF)MLzf-}zllD44gk!+=~hhtFM9r)n-m1l-FUXMy}hxy@$CEd`tn}>%lwB<&pq&ZUR`f* zwAa`2NqA5@W`Dv~HH+~(6n)*6qllHoX+tGgd8!QO_E+udBew}Q8p!8 zhin5@!~QVNp@yn#q`#sTN>wR;1>GF|FuldvHu8Iw9JN-f{lScY|BF#zd$34SGJ!@c z#tc|!$V7rxbjiS=8z_f`8$#I|^!T!z-vLFLjOjwAJRrV(o7bRyde(UMo^Duk@P zOH4^d@pn(Gs$oYHlK#05hS6?BQIBR}(He{ZnLx#Ypv+Mt8HwvDw$ZK6rWdQ<)xD*D zaaj!-H5|9-)ytcr+?1as6X)%RM2jHZq1il|N@p+a>vw|`b`LpR@-9{UK!trTdq>q> zYt_LPg4E3fVm1&|5b%pjY(m*<7i>l?t0wRswHSo$D85K!u2z+ybv%swNu%C@d|$mf z`{r+dD}1#kzj5QrFX|s*%~*1Y}cXnbmlf0#;o!dp8?{tq;69 z(0oWv+0Y^$2{tN_IaJpMSwiU`Cgf!ygsJ$w zkU+8E;d>}gXjZ2ZHk^x&(h&Q?7y;9EWd+H5D<1MKT1ol`ba5#;P*vDWJn)liYl}s zOq?rMWujT(NT#U~$0>}v&H8HO_v%eLNNmKBW!sDt zPRElQA^ddjj|VSzdw)JSIXyf+0uNgJS~^Aa_J8ah?Y=yKa9Af>i8%Z^6XFC$q|;tc z3UM;Ui&e}*VP!8JV1}%a#47tLzX6pk#$@BSo10N(-Zb-t(-PC@iAMUg+b1u?H|~Hv zGM1ZS83nHZRdl1~Nj4&WlDO8sOr|gJyGD;(rg}{31#qPpVrKGh%}o;m)_>(M+>|V4F^^+$eTkO#7|9k3h<7}l8m4R%e`*s4B zNKN$7nfVi@%1Lg-`A)9{ovjDPlkw$({^^lCEB@3D4)AK2z;a}2A;>5kkH^E5N)|(W zFjO*(wad-WAV)ipOI05By!1=zKZq#D$+~reqM@OaDI|~Sehj9wt~+q7cib}#r@}VC zWW7Dm^pEh#mp^qLes#OidJ&x@Rj`u&6w%eR%>CHi@-5yia^P9if3Jr0G&f+&t@PIf z1}JBC#(bES7rnIvT3)Bq1|xI*VV0LtIQZR{ORzdgD`fjqDXoey@>Ugga?{a@ix4W; z-sq6#IL|$))J7l$&6`?r6~~%FT1~8vr67} zn^}6e%wi?kWyaaz@I1DjGmFc^COAaUZbeH!vZMQ?;zS-=*fhMtGqf)#r8J4@9f~VD z`CwZf63@Gfe=u!kq5NWjL!FTM!Kw?0>Kamq#MA6L?W2}NDnu~bH*mFP`yndU*;wn8 zkK~~mEG=G8idvZ88PPVMJCx>j`1*trN2ZNksqRbg(K`C4)th(Ww$jxQqdRQ2D#I5H z1(hY)O#0m)LNwVWy*7q4=RSax@GQ~aA977<1_y_N6tXI%(SWgWZldD5ZmyD%yac8! zkh(VF9gbVAm)2ohlooktl8401n^V1r>-sFjqt!Z>PvH?f=fk3h-UR!V!?-es#>R6P zG_@(`&kOKq8g4#|=Iu7-w5!6U!?KkMs=}%xzA7T>zqg^3WttXT$D`Zu(S#)MyGr5r z7f>HU8ZQUzqEdNf$^5fy{tg?iCaK?1S}&E@72ya-skL9COe(daoEv4ce^#Z`ZIAlJdy~YQq*I>o@YR zVOjGEEwd544MUY2%T;#;5+@J86%nYYRL@saiu)@+)356UE1g}n(wIbe!dafXRHD*s zFjV&vF`YMtFM=gC;Yrt(bu#s#iq=#SQ=M(FZYJOaO=ce44_R=V_{uy5o_9AW-@B3zo;a}K2xez7OWlCVDzVGr6F9I8P! z6IF=KqUW7QxFq+vBPHshM?!iMXEq)Y(Tjz0l`D&&Ux*%N*H9QXMdb(OlX9GCJWhud zl0R9xn1TYsb7CsV+bmBe+Hq~(Zd7&Kb!Q9fiv^FZv+d@KZnvd;7DL{-);2GE`-FUT zuodNI_Q{I#D)^?&TMs<*dvg|rY3$6Hki{-)yx(Y=ijF;q>mxTdKAjs+cpc1_YvDGgVtVb z`yaCeOJS`=5yl_#NwFrEwVB?~&`ds_O7w}?R^i&4^y7p8ekC*R^ zxl%2#wTDO#T`RK-8P9ULcpX4!WsoisyafzG%ot?zXlGb%16t;PTE_qX8sh&qpKU$e zSjPV^zm8?|IZZv|Ho8tyh;BG ze86SYHXL+>k?t}(CluiO@B-q*Yc=sM6?}9C%KsR0gio{U)!CTE=Cd(SFFB4U@o<<7 z;kD9Fa*XGfp_T30LEOJ$eXrwmG!_4!N2wS;yBV2Ce;SN3bz&T{DIUf@aYr>VZ>b?WYhQ=Vk*q zg>nl1lmKGs7pd;Pho9Uw;?#0T|D_ZI19;x&rt=W@>>_@mDCrLgV zPE8&%dBk*kj5H|_3fh^p2Tm>m{7zPqfl^^IA(_z_IA2Z$Di5hpmg!GlLP0C@9p!b9 zp(Pk_;n|5e9^Yn8DWcKbGTM~86}fDdN6n2|(O;?(9JA4sZCgHD+^?xN7h5`(|KPVC zYmA$1`pw-se+A;tnIKC)5@pl-oJRpeR7{#%ak$(ifZ|dc;wT<3$q0*_P|5Tn@xtL? zG>0HwCQ~0RglxNlOp0E#i&e~thnC{$RXjBV$X{OoeZ-nOS8#-dwaR9v%s!9RC1h$A z{CRgeouudD&B`pp>IQY>P1TuS)DOBP-A%#;`~U>(Jel4k$w;CQPKyiTDt|$nI)(ws zZ)Ov_&?`|ZrG4&5G8HM}g;sldD<&VNlh`gjL5VX2qEP}3!F{GyW#z-4j*d?bdi#eb zryU@xU=@* z!u>pg>RQ*?U?vcSf8XnkZz+#n&l*qDOY7Toki_=1IW&}Ceez^vv3*t;%2*V(Wnl=> z?G`(K2fbr2%+46t8!w(?kKkHOs)vh*XSQqCDW$J9U6Tw3iZ< z#CX(dz>361rClg@bwygK zLJn93*3F!(;?db3L(cvdOzd^6kQp0$;?~;GL_H9Sw2J8>_&+<+KZCXF|i!_$4X+O%>H z%M`enjFTKniA{2roBnAVnHlGxl9bhGU8Gyfy`bd;PASPF{G3pDjNRC>Pa?bgMdER8RO4T zXzhTcyTc0(M9lT7OlGW3 z^pTDD$u97=D_(}HR3e(pV6i?aiqTMOBG12Mz!xiz+i&StKMDezjvASG@r`FBUpJaS zZ=bfJjh!YI7mUY|kQX<0RG*^po2Y4fketsh8+Ep5aaX<_=(&&;Duh!QT5qY}t(_l- zPztNEX?%puX|@jk31-)`Np*pG17cwXLOlwH8pHMAMzJm;QBEzF8JhX6APvu>$J)qOCWx-EyEX3}T`3E8~6GPb3QOpT#f zf&4?TY}u+|t=LR%hU0Jko=LpN*11;H0koaufHm)Monl8jXF|bshGdDhWf+r7qt}}g zNn$B1y2a@lzTNROgk28Jwp*CqO1f%gRkDs6H5X!_%L%Y;!EA&)F0PkKcPTJw`&)i~ z;2%}rcU!gFZGmAI!&!bMQ||E!q>`_5hFjp}SXle~R@Tn#YPT2c+OvMO@j~;zW}%Fz z)hm3NZsG|%g&XxF$zM^#<*U6Av=5{VakE1ARZeSbp~{kZS?A-++9mye7#l3wH^azK zv`w;QcR2)WekT)bXZ|cUV@jm%dgy030^hI1yt{Aaz})qjwecz*_Nl}v&gJx;Xsj+i zt<2hhW=XjQkZyL}5Kqa8xnWdg(Pgw<&jHW4(CE zZ0cTe!`MOK#Km1&Ukeu7E9mtgLBm_C3F+xn8(YUm%~7F-@7NdxK5S3VQ*+ODSGxO@@mN>?ZxP*CWo{#x}@OVnrAAkFpz6jYG^b zAM8`T8FH26bm(_ok9&ISDeLrTiU(BNuzRfV2E05mH3@K9S<3XL?KK3ws628`3Ran$m1m z9sxnJPFGLW_|79JyzsUy70Gum;N-Ow;%MAjIDG{j**6hp&mSep0Dr$i5*EccIt;25 zbGDMjrk`Bcs4YZ4H4(Eh8x?$LsS>ND7{xSqY+AN|K5VzAdf*Mije0SGE!UPty?Q`Y z0~YiY{GsB!#5=W8s@RG8Fb^CWNKfCpBv7Kn9&2PRsE8Hi5=!YnPI?rgQL+)2I2Snc z%6!v%_Q5PII?By*C1@0>hO{bl*^9nq$sU%>(FDg-H06YI%1M9_8NkR@(Y)O!Y*FAM ze(S-vaBimb!WsJazU1HhKlC7(O_Sc`BpzS&Mu`x;&xPR8$jO362j68#5T2cOA@d4d zCzE$WB94u47aKo>6ksj?0r%-qJ5mZFR5;rA6J0y-Rc}qwimDui8`Ks|a}(4!D57ye zUdp7r>(GLH#7B%?OapHN_V>XY@YcmHhX`V~Sc5fmImf~*;psoo5cs`EXY^eEUdox~ z+gxl(M53UQF`#{JD~Omt+hX#WZ#UA?g_hBc^&+dis3Y+Zu=>bG{X~!p?Wq8JyQDFx z7SAn9TL+FsuGoGW;%9mD|9ZhsHYO7r?)QjM)htESQNYO`upRasSq#Y!ry z3hFDax~c%TZU}@@i)El#03u8EE5s?#(9f{}vjT|^WCJd>@E%%qj%0?qIFf5^a}yoa zF47_$&?<0=(B>ufkN_(RW}vVoQazY>qFu~}p~Uu-J{2>22ssQ5nJ^KWH~Lqg*BP`c zm{JLnCtw8-Bgh?@JbYi=h+5tHsX#WZhz;N1(-iEne3ZF%*nQ)aWuRnCu&9Qqml5_lGB5cQLu`wu zmdT*7eb7}9=@;Xqeew5{f+>j&vu@t7U~($4Bm|>}b4;f|iDUsXvd>XT3zzC3weWd# zN=%V-fK_(WwJY@fXSeehZn3I+7NQp|WA>hAGf4C^pKi<)sEgCf_a~}Sh+gE?>BT-b5(6<>DyX-aNhNz`J^}fb~oEDV%oiL>M!Uv25iB$Jh7Nz*c*vYH;8bqsQwD- ztn|I?+H89f@#hXY@TPJd?RLA-A(Z0**7h=)US;AwiZ6Lm0k5&u@xLs(s+EZ+MgV~* z1Iq7(ZJ?)<^W};5 z(Hyf5EEFa|6iTOv5tY5VP72;!hSblOMwApH$eK9Cxg-gFlw*vEA!lhaD53n4J3h#O zGPZ@|;6n^A5pmttJJB0h{@YfxA^yxKeesA&@@X!<*rYGk;&CbvL;NwDro;U0r+a%- z*=9ID?tnYpFXNBhfSG( zlU@~fL0Mo)+IfLcGC+CeC!kjLad^(_HMO-#W13mG2VT*jxb~>A*u$aXE*7+&`wZmd zcTk(xOxZ^D28+rCs~b-wW#*OIJlSep>bU$_JMlsmG%mEi$0vH7$5WnfU2B==8uxdRQTi!C22*+C)`I` z^uGetwqpT;*dRGB8%g?!O(*dvPXXyl9i0u}Em0e@MLjSV1V!AO6Y{buMKJ6r9WKqP zO`x&WOE}4Q$TV{4r#Ar_`sLRO>mZjJ8{8PG#qEPhOoyChl>a>hz@bCn97 z4a1Ex2?9AByrALx6)O~d#p2Wg7}t_s(E$N;fHV}ngYYhj=Iu1^d0GT5QDFCjZe{qP z8CYxmU|c-1@d7zF)?a8;fTyQVfeyDfQ6n{ARpl(rd+bFFM(SHW*W;05 ztS3Jo``TYEr6O&oqT$Z6OR3f`l3E1twjl8=a($~`;ZlS017GENb3M!*bph_kdashn zb(%xGphw`tBEa)5cDb*hZQ5Xg*!XrUw`}-L8s^*Iwu)-0fz!sP6ya7ClR(>XZ1se{ zU5oG2v7?G$E|rXiyaqPq$=?8E&GJ@)fC;40}J&MJ9!9PHz56fNDCSuc`qDFH^(5dpdCc5$WgM&1ma#oINx*G z8!6zWR?7X85v=DxUmcZmD~jJ|=^%ojh%51wMaz%$ARP-ta7iUOn2YD^M#tlk>p0|w zbdBoRyrt(#{BeEi#>euwRcXCxV{bVeUriElvl8fhIRgVAnG}PuhUAPRpwORMk?eT- zkdgubRL5M`Jhr#3esbLQ>Y7FNG^n6A>*;CXi~$a$kbirELGgBP>o=!g0E0k$ze$Xc zGfGKIGbU4m5%_unrp20zUhD}tOO{4=oojKRxe(&9BlJa`Oj#B|&%ki7KJ;sr;ok*& zhba|B&7-#ZCL&s$c%9|(iS4257+)Z1TvwUf$P0)XIKqoe^y8v0F=px^uwBMaR(8Z28Rv1{#&W#nIU!K$#Ma8X5zA4-=pvpuBc$_lrNUE9N*@ z&>%LMmruY+suV~Hz-Wwqe|pjZI9BMYSMk2F$OYOg_}VC+O;qjf6iS`GQNnk%;9ciSnwV77dwx;IL^=I_U~g)7t%6%x=4rJ5w&^ zN?shlWPWm-l39QPDjxKp^I4@A>naGIBS2(4U)PuaiM*8Xs|K5@84fIyQ5oPvgxzSs20+cq-X1uc-2qjj zEg^&UZI1^uVOS!>a#!XM%S+uumJ*T!O`(QTxC-p<;^u;_6x_NDfZ^7X5Og^(+w#kj zqTgt^&PMrFI(GV6KUFRa?$WE^g|i9Q@5rL=TfAqIJ4b@Z_!OR{9{8Gmcd#NG!G}lC zIM2834GXzTM39FMDUYHO>orD9~13Ii+V{kVS?k%Fvcah$mULWGn4BCjAsPYFGZZ3(XkMyAM_qQ-0_J@uRP zy}b7LH=#nP`gZ9nw=|-*z+7AN@43GTTC-@Gb8D#k%L(5do}U?B_Xmbg-#bsoQBAMG zbQvWIXIDv4x@TKOF=zKmMIrwUrlP4t{VQmjxGVR3z0*Q;A}A@(O+)0{U?(7c<@wi! z4D0hyU_E^7KFw5_Hi{Pu3`LYX-qiH1<^3=>_l{8o*+*Tk*G=TRC!e&A-(Sf!JD$QL zo9Sqti^e%i2VlbJDU?Yq)`{T;cIhN_hMa!WtK>Q=x{*nZl3onqUYVlE0TX<1+?PXX zUW`c-HpMfL`)!AECWT@1RX1#DJSCHR-8{kP&Qv~qL0NqH0s-|TsmV|rE9-|jU%4Eh zBA{0yOY>|Te6o2GkH``Jt!tgv9p7@5k^oeDBg-8ZlBSou#dLlX>NMv`7D&Ppe&j5k zxWuJvRWgh)5kb``%PV(mR(!eyXGLNJ0p^VIAWtiK`Yi}XhGvzYgC*kmeffE@*qLGQ zLpLXh=J0$ISP(44Qi_T?hnXzkN9Q=fpjVC^7d(a16G=hE$TBdEveBw`NouwW3V7Ro z;0qH*f8y(S+P`wO`T65p8S;KSdSD}iVl@nCPHc@^(%y253c9Aq*RR)oP0Cp07Wfce zuBd`)E)W867dKXKe1-qc^BaFVRon*v29=~%un&ej${m53^u zF-+C#ra#_vRyW_Ob^Iww#z@PCNi5JFeD8pwQAGw_i&a1m$s8q|lY|U?az$@}2#tED zBglRoa-h_|;d7(00o%AB@vN+!kB8~BAv!eQtZwKvo&tO$AkA!j`Rw8%{jerrB>oXU zrg_q~Pf4&uK~Wo%`u}|MKX+IE?|Ai}>#N`Q-u@2O2x(xjT{FDx62|7Q1gPeF?;Ynhd7@z037(ylA^vp)5859+H#NvJ*A^m>@x>C$8W- zyT-lFG36G5Z(7Yt>aEub)?+GSDvidGcpHom5*Zda8pu+uc{EPNWCKdMCS6u%V1tph zN!lfT-To3=*XboyD#^{Cz4i5HJusFyh@;AI&Bo$~fkRE@mkro?b1^|r9EVcV{{%B? zY|v047*P%o4_+Lc9YoJhj$cO3w(G$Xx*pf= zZ=y4dB0%Tw5f)F8n-JfiB=+B^s*sqZ54je&XR?d8-jz!uzCbQe_B;D1O30^TV|!M4K);tzt9y?6xVu@Qq%^TV<|K2VUbnac7@jfQoTnl16FXevQcPtXhv z>yAb>*!@G8gyDm~3wkl?-*PN~!eevJ!;jJOR@!iOuc`}7d2uK44F|jTI@jMyanv_} zJh)2oG|c9GXt1%W%KPkPFW)#Wc7Q<8(6A}Y>kQzn>=K?{yoLuUDn-&ku4nfk8Z4q= z&(-4lZF&J>4IeaP>Q>GQ^u%l)#K~Gv&50g)QGb%<`6@od&nb$Ro(nVa=XrWL%Vv4I z2p|U(#l^`ej=~QV(t#$yLRdSd1)}ArXx5$s68%;|Z0MRxv0zA%j!Q6<}>P z!TY44=WA9QMYp#`VHoQ2G(#r70uGjrglIY?@LkL#C$4fFQM- zO`Fa!Kx1u#9y}-D(V*c8Py0NelhIu&Mg+r*N-&gIRx$(wfTHajG1x$@XrozlD`0P% zXtOezOT_mBT##lEsL*jwd9l5iVWn=jVIT~I!`ciXYcsU4HWa~$CxZ~JwOU?WBoi@9 z(v#EbH8yMWw!=Viud~@~7H=9y?yumk&3WKl$D>=1kY2ABzB@cRJvccN|C}91Ud2Jc zxXdJc4LkOo#p3OhP+^0}pLbupJ~(as(2C&yn!fe7$_}YZ9W@|`s1r?rHUk7;90<}Gh9hrFi+<3Xhig6cXv_^Jf?#Xzx|GMH+h%5iEYcu%yI)mBYJ zgbc9kVyY)L^BWVE`Ifm#c)}{XRxe*1wCUzCPu zwy46b+><=iIptil->XQm~^TRGF9G;}>NKw2ge9#lH6_wmYwoC4(>!NcW1r z&|waDjBuDA-QbuT7l~6qhZv0r0k2dYnt+K+YIloBs2+{5cqGEzqYyk`n;U&_*T{qU zZka{&oEH{Q9uiQ<-5qfGC)`!F2c2XM?aOrPNXGT{`(*Uq{nGc{2$b+7D!RTz^zg~~Z}@&=eRE^$H_?aB z0e~4$Uori^==pDMM%U9+2z(pQcAjmlf4{N0-d(D?Qa_kU++Cvg8a)}M;sHnul5pFR^9i1+{5&d&O8qV?tdfAs(A_4-RZ zTS`|7>HpKID!)zvpwS?ilK)w|R>P#eQqMb{WbcLOE#%qgG8rY4v`;QTtNCpX`U>zU z8$^8g6mnBx5(XHqM%kz_?;&`Zq1#z~|04K>5P9%YB> z^_1&aJQC6wz3yr?`Q25VUk%f9>H-SQ{xF8bbgGa33YMP?2hNx)Lrm9+@Ne6ZR|xozr%&71A3 zQ@RJ-NKLtek>9`i24G`8(Q~VV z>4ZtcEyFXoQ&HWlyG}ki;)om40IoDIT~HgdPz{XKpb!mkR7bF?Gk#-@*eQ5@VYU%k znI!<Kq&)&n|n-u@2g51@n+V3H~f5Fg9!;<$Uv~07rmz?97xR3fA`5mRSQ|#7_ z;&FbJP0b-uEffO;`t}bv`Zd<2(QqVF6fCZ%M{M*VEE?+o;OXg_RdutnI`gxY4S6LB z%m(R6+JtD>jl={h^CoCw%rk?lL>E&501OxK|8;tY1qcxnRf6FE+;T&#TRe8ejD;-5Ca?o++!OMze>F&Jn{Cp~H`D7<^$5&!>Y z{Vi9J@dz=P29SdB#roey_4eO0@bZBXo0djvs7$Bzc3oXZaMJ>4(LU~oUvsV=B3f|H zzfk@1o3>jD6t{B^=>-3oDzNqo) z`1J6vtNHX+e3e61<9G@sb=%&!KXGu`Q2S?+IybrT`)jK7$!`O_y{+Ya5O+tTDc%^k zD`KWL7Tw}}{!0fD$NDl<5~H^8XyJq%s?e=;D+`<@(OwSF zX4SR1o5Hp61>b9JYM?u;4PIAJ_1CkMApC-mZxdbb8erigpfZrMDccG$DP?~#;P;5@ z98BO8s~am&H5bJpHT}PJtIvG@1A1IE#DaA4mg@y+J}%fdj1>JZ{9RAu{_YhGaA7cd z5jYn8Fr%TDD8)+@r_w(%TBOElQ{g{FSqq$8izk#1tM)6>k1qB9OZ~sn|L?4CukUO; zUF!dr`u~FdUx)5q$DQ|1;_TYcKIg za~%b40*ZC$c2MzuF%di!+{{UD2#Gd8`zyo6#beD3qJqj#|NVuKGiqwKcJ$ITfod`k zy8%^r5~>Yx4r%YytuhW)FL<9C-}Pnly*kf8wThdmwb@3S_L zb>T?Be1J~wwVFvi5KK6r^u7vU4b#7=Rlp}od7rbQ0}5ao)3|9 zAD}gtRtt5wJ#%~wqXp1QeA}@c=J#bUa-EvU>_o8?%uGG(!g`fbz(KL(!&tyrCT>hr zf}?UIizJe@Oacdxlt}=VBT&YW=x0MN|N7Uz_QXx;Pf~ou3bOfg3j!F$BE(Mu^~g6| z^i|k6!a7^+=pgQ2nT@EccMZ`Ns(@=e5@$-Xh4b6U$`YY-Xb|d_0heZFq(VpRNuXP`s7sSf^eCjY z*OcD!IEv2T(rZbzBbN4-7nGho(j&3)hO~ZW+U8q3!%M+u&z!dFC++nKQwL3v(KH%n zf~kxCZ#Fz3vGVM@hT^p8FS+Q(MZFXzq>8P3lP-(LH0X#LG*T<7ON0s)K4inrE0bdg zj@)ur{!53@rIV2Jb!h=@`LS6srJX-_uS)uH6Hi8hlnMA4v?X8w(N`%nR^*&3`VP4X zqNSH!!xY$Vk@G(OP4%LRtFTwFhjoj|t})>>ie9@?T^@Zb9F&~=5KkN@TyQ>l0k0hp zsJWcG*M{rdV2bzaPITRtCub!C;`i6(F(gw=Y%;C{4Ip>Oru#Zcl`SU3nv@$-VvKJ+ z2*K#}RPEVw%#hPzoU_^Ss%;N-H3e|ouVr5viuq%1<3Y&4;2nubE2*t>68LP-D#P8ARW(qJhP=<&}A3>y;? zCe!)nuQz+)+btLh<)54mz|~Gi7n$g^ACO|jo^&A&X=+~pBEIKlYvJt5|b*CAJ}nfu2mdi zVe%L+reHLqAr$k3H8HVDk|Yp@W+Fn4v*@A zQYZ${DkT~pq$KMvrvgZ2ftPcU{m_$tznFa4by{aJV3q%P^wt;-bX6+_%F%tiU zuWv-D2~b81=WiCP?g11%?giy(sK1HMD2eYR8xGH7y7}(C4{denW(f+%+wEun< z`|o$g{`>vr*3S0!v-ZyR=GNA;o#lO8+J6i7-+b+E_y5_mXW{!V{@&Q$*xFv-dAhN& z1My!QJL3LtFYo`-|7UsrpUwYg``P-_?agOP|DPrOFQNa+{8HTmsNDYtnFG-Ojc42I z;-6>W|FgBejQ{;C|H-F2hC*DUzW_WDSIl{@uu#A_dUZR!5-&w*9&axR#ghLo`Twtl z|L?4CZ>?{Bx8(mz{$IlXc@>`xR%78^a(k0a26<5~nB)JuvH5I6`u}ckZa#e~`2P+( z1DE{&GyI4CzgiP4PNNeYsUwZlk;;ON6c(9K$zkIY%&gJVNj8}EL5R4?Fw&5kwxXZa zP(Pzm|7Y~k)8DnCX;!18tvO^8pQe32w~#wDnhm0hIKKidd~}HFY^9i&k_+a+1s`19 z-}n%#v3p>8|2g^}r^iRp`7k@z{+}56^fN7*_KXGUYGx-=4W1%MlF@)HZ233=-{G-Z z^eSaO$&+}}7Xw6jdO1oj(tZqCO>7_HzvtP=bswJN7F@~Y7rZ`{c{uDLrTiL{&xh?B zA$B$@;DU7cSe1E&^EpSWk;U$$Dq4fZMrVA9{xM4?w>mZWs&vqjp6PUubVhCGD5brI zos%y)_(CpR3N*U#mof_4wZDwD1Gk3mglh9E2#^&EAS-Ht+C4&NXpSY~0Yc$jn2k~a z5e@m8r>vOQLuK#eVE61GI@|s6#X)rVJUTi)iw^#JczSkfZk261P6v>W|KO*CljzmS z;mh5V|A_u{@E^84S9Xog4*q(ELms_;@gjPCbolSD4{AkYuUb&E=I#>W@y%ATtS&`# z1GLNnp`yU*G+Tv#n-8&f>l_>asFFJh3Tl5|5X^7LfEbxtPNut)7~}cz$-&`IM^^J@ zbaL?g;N;+F@8DFzrvcvznk7rg?d_iK?d~6x48}Le156nDk_X| zG2ShWC(rEgX#e1^&Y7iy5BkW|AFyJx1^HL=Zev?Gp$jjG2^?fmhw>1!Ff1xg#Ez_9 zATg3rLv|&}<|3QTt!TP$OVC}xu~(wqqkVG!_AIt;m6erq8Sl@I$H;t?g?BA5aW;Z#G4Ikb zM$zCNSujh}Tg{U}h|%(XHk@6L6l~D*&I>&`M8k271 z@NooC&Lyk4#J|F;W^6lL@BP5O1O=6B5dG`cG09fB`5T<{8b!7YKu%C(~&%c~TQXhxnJ&liDI9 zR*V~e;+S5Z%(1;dkGa+tX!K;h0p^J^Gdw9b!Nlq`eDFiild)u1LtGx4)Wo=_Wwqx} z3$zzUpxTt;acqptVO$!=hUT7}!z);;4LmqKkk&sfC`AyD!cT zPNeNZjj+4FAMG8#c>VIono8Oq)VTK2?6}1NSt;FR>NWWv08e-cQQDV4%PSKDiw9@V z?t7BN1K;FP!!uY@K23tKu7Ir%1#3PPSiC9&y12&A2xt$076Y_x%c*#&8O#?njHp6l zIQp@Idfs4@Hhs&q7#0L4kB7;$7*h6RDabD0e@pqVvj5-K&a<7T%lx0q_g~fb-_zb; z77u$D)4VJip#1%}x%q4Z-+!B%8=LDp5DU7w_3YU)|L2!^|2?JmADnCST(D_roJQ9F zA@0*_@M8AQf8Ivd@w9&h{NMBbb9@2CNZWEKA@Hn9L!!W&9uC@xUPC>u*(e3n+u0zA zG5u#UVH3+oket*W1s5bonNDC8*KCXijv@W^^dwEn7ce^~quH9hDcIAGnRtpw06^Nv z{C3p8nq(ui+T%L~lRlz}mEKD4(-`N!gj|*Qww+D0Ybc@-QU3LNJw2(&Qz6rzbh&_3 zhPtL1CSdAGQ1tTI7`|;`;xO4>;~9$;LUB$k692p$n*|pzFRu9gL#m z@gP*VADurPt3)9$-h{`p@WVobbAw5$(@BEWdU6DUcAbI~W(f6}U$^#?N%_GjXhiqGxumigHQDpVWV%$V zW1F-e7H0+85x ztHXz%T4B_fB>$MD6M;MuQorv;TkWl<%~n+RjYk_Q!*I4+MHrot=5O5@wtp3iOR>^h zZLie%34Iftx@otblMD0Ui81hNtzrD7IeHa9W9iVi*Z4)opUZS9G#S&R;3%#Ng(i8GjZv9LZFHLKs3A~t0z_OTSf<}F&(F%*N?{zq z_Elt}*zO>ckpMUbrbbLbd93IpKj%pT1S4rQ%Qqw-lBeDb}S=CX@vn|kP8YCYHbD6=)sJ-+}M-3{w=Rx z?SmSJ&i3@+OwICx^l-tADE}5&J`cWXI-BM<^n|vu0xRIyx&qpp9({{NjuJte>{}s! z)Hg!<*_(b9FAOdLuMBl_X0LizG_4^nKIp~MqPM6X>xo;qt;f1`qZRXsZ?V`5pY6~A z{|L8rSL<8@$)}@}gQMM-2kO#F*Wa!7<~HQ_Mvqk@Ad>{$TP*7ALz%jV<5q(lNM< z9Ceh*``;i!mhT#SKe_r+{ETAdHLv+6W9*0EGm;_|&@|wGocZk*Fl} z!S=SYBD|NA!=Hf79X{u=YI!eC>&?4%9o)U^%zaGh{pc6(yrEV2oi*^qG%q~U$W@k9 zHhT1^Vcv{Vk24TZ>;Ux8|RkU`xN?PLP4O6w4aRk0_hcmxb*P_<~KjiTOI z>?)ZIZ}n?8`o}C5L`c6D7gmiS0iBo?5;`lE<4AC}IR|Q3T2`Qo+ta0F2 z;OzDnQj+@f;mO(S-4_;aoB3a#9v=M^iO<^&|2u(QcaV6%NPDnvZts&?K=SEgOl!Up zZqY-Dpu3U|e2o+gy5+ioa#6>8;m0XQM-w$lm3n@Kljmq@Sa!4@9hJwYyB+bxqGwDC z`30PD0YLXY+OVG0xdNrvwXEYqTuwz_X=of)Q1oc`IdCHt{%|B8)u_-aLb1rJ^%5IW zdl2H=9&SyM6Hte>jXlM`vIx8Qi;Vyu)hPeX9_HX^zx=o!9%KUvG8v_OACNhpThX0i zxH3q<H_Wub;k2kUa=}Bak_t|E1`ggyw_Lx33eLzsw6`>-^E?J;&tjQ`BQWplW4Z%c)v@ z0bbVBCpV#~SDK5D?8TzM$hH?S7Tc+02vtRHd{DF8dIDau?%aQl`?J|KMYQ&Zae5u) z9B>YAHUf*b*!SM+*Dni0drT_sCaEWHHP4m0=qN2`zo3ZS z#wZ)D_9y8y?Z-pc9eUp2JRH`BF^#T=3q!}VFXg2Q6MzcH_^e)n5SV#dnC;#WOjjg9T|L3!fZOH$* z{d9Bb|NBM!f44hAO1sY9L;68DQdx0*FdGeGUbRx5w8!;#g5BZSIh&{oQRo3nT_Pbx zjHaY!KfjHBO0LsUDrSj?w|NS_8c%8)?dZ3^EhKVktMm`Q{cY43K)L!p1_G{%8OK8* zN#J;P1`!ksiKnhcP$sLRl1dJ&^p*o(KX9qI=DAVjIEU9`^jVhCTBGd70H`ImPX>nM zJ!SwC&-p0L^BEYwp42vRcQSsHg~q|QB+OT%cz6@v=4dpxmwyBvOVL<*P6?r%wRF~Z zlu?GQVV38u=%&rscrE^vG6NclsWy?XfUI)&0>sqLJYUpcA^_n!YMf% zG^E2XU7%^E3f$*W4vrikMoz9{}k}UTy(!gm4<9xj>EEWZ(lL zwiIU!3GTp8In(Vy#7q140Ou*WNA$1a5mcA;xzB*9bec>?QA2v! zOMQ^eYpQd^G4$7-8lrhNzGb;m(E>?Ns#Ru_RNqJkEIl}VCzIF6C)U>#CtXICBx#{l zQT1vi&66GQjFpz8n!o7zA_$i_qQQ`8N1-4%jo{F?{0d_(!B>b1#d>N7w?DJvHaO2F z<7h(@?=ba@8c^wxyQeh^QJ~<^h5@(XQDkf@J>I;*pJvw&oh6b+mdmEPl}@5auPd;# zcDegS{Cs_R7VqKnYk4)YoQsuQ7sW9zazyv?)hkT$NPc^vG`!{hc=_!5I-cB?`Pz+V zlW~@py5kLp*S%?$4a?eGCZhy}2XI#^@!YGH(AS@Sx1yWh;wQ?LRDlBpC7{llCc)ZL zz-ws`wJbn*^olwk-W`T3j^@wnxj0amYf53XMSO_-z%%|t-oaf@N}JP`PVVuTnip5E zR)8t{Ah6zDlF4XXYK(UX4kopqLvJhH&SYorBWwUu!k=hT8d9) z16qu9(=gJapw1HFSh6zXtTL}j31?kkSUKe9;I%aSojEMu4a8O!H{oWpsK9e+O%KUw z*)ln^g(cV7nipimcRtbkv1XY<9b(rE4})6$qK z01q-e-WE1Id@hXb;lPU%n(92cIQE*4iVw28@ntKqSY>X!Ww^0N?X%E>*4O{@J!NV3 zJF{2Ms_*iMrBys6L*)xwkZaC8ko$2*8ve6|nUz0Okv4h4%}SVWZs&Uwtm*mg>lbIy zdNA-TpY|x#!~kEYpND&KFdJ@T_S@o{X<4f*=U)p96yjjCw)=4~8e<_2#z4Y2m>K9E z55l<&M$N4l41KQ5V8VB@IwJ{>agS#m4O*Ff_+4SP=eoiy<_6Qkw4~W69yV2X(m6|) zlYTscRZeD*6$d-VcK1AsZW3I6luo0oWRj?HKug>gBv&vNF$xYL2UW%RhPTKVl3c)f zU^ME3>}FI=r%t8DCi3W|9=(BY=9_dlgru9pET8fe_IZmx^jiFx*Rpt2Q>(#M)x0=D zOWjpA9Ed%w3OLLXvshJc<06Uk6cfJ4(r@B>ScKNc^L#SmH`xB=h)jB7y@G?T-o&>u z?BCg1JW(cuHElk4U~RoWjfaDSX49}^8HFvane!LS^Lf<4rlqj#95#$+I%wNs26_kq zW*!3agY_dBV10qRtZ}N(Qvq8ah?#ZiU9rsnxb*+}<@kR++gSg8Ykgzs|F!i0s_6f< z(@REIkeR(VNrv?MRXSeG|7(3y=KtS(`s~@p<_`IPZ7=gbelGtn%l&Jo6CI558H$AF zM0FCx(Lg)jqtxvw0xe4;lSB;S=^(itXYdVX?C;B5d02265Y0xNpKB@t9GpBr=oABw zucG&94#C-!`oBpFnVLGnLI%E0^BijjV|iut1nKoCZGEq&TvSj_C`gOPanJn^2X3P+6fbC(Q$DgQ6|e`Wc9XY2dT@0RlaQvR|y|4+qlApdV~Jbk*p1@*snHl8l)e|-`8{~4u!2k-LidOQPd z9|_>hOgFAZT-miixj{-H{!;frk}QJicl`#i(4qr%d{1ru40FY_WpRV_b0HWi1XfPI?ow#dl2vx zfN?)Se4|S_<9Vn{P3VnoNV;4?0|MMvlM6verp>mlH&yf3O>t|l!1FJn4V53Ros2+! zXw+xZi`DM{O5kLcckAipNZ>=g*=%1WA84dT(?(_jP&*!vlhL5j(7`Lzx&?4;Hp2AT_A@#xln%emLIMFY)d38pP} z7tNMs?zkUX5&U11KV|ohkDk9c+&eSjP5Z|x3*za)nTH~F1ppx<`am9Ih!mK1<~zPQ z(@(;r_3L{lFYSNJ_y4c{{g3ai@0aiY<@>+l`~Q1<|3k{8K@zS2J?H(uv%bAe_P?iF z>s#XCzrOi&b7yP${{Kwxe>?x%_oM(gef2`Tqaf;Kl7lbcbdo0T#XFx>>kU_YmYo}? z)cb{aZ}&yt-O;`f)}*npsp1JR(G{VPPS?{rEh} z(|?M;kB^T2ij{|&Y^-sHZ~Ptz+nwle^ghAZ1yoc-*D)3Kp*R*HN{gp*qklCUy+aY~ z2Fw7{Cham&`(c%!b;Jplr@pvI`zdC6qYb}DZH0jvZt)DJlA4oYmc5&eu`?teyEQgc zOydRXkS#HBG;AK)sC1mgzE+r9V#An!-f{FWf{gli8xJ1|f!gN%Q3(`0fj_om79qwE9`W$sQCI#oPlOCBnX z7OoO)8{%>&GI^-Q=rm7*e4Pc3W?xz06>*+t{WR8WNn=cb3w|*cqAMuMjmmX~DH=uI z>$4AI@ICq?M%2qzdk0eNFMjA5sLI4uIc@2r#SC;IEH|;fVSHacmzVm#rT*_%tpEG& z>382P{ePDFzl!?5jdk+>nGL6DkCYx#*tV2`bLu~AD*vDD?alQ~paq-H)_0cv|6juY zXJehze|V&;QjaB(Kq>6V6DZF6AS@VVOszyA2u?SI7`$?>J&Du?)>dUpt+fGcRJ~2V zF$BF{pC<#X3I+jxc@3mq{aq(Io}`!QC?2wQuyntg%Lx~t<{poi&3V!B;TtJYi9s;5+(mcU{hr|XCL|O>Z zybsPtQ8FT>Sm6*lP&(szIuW9(KeW*XYn#@gHCZYNa3AnhJf-Er-=~h2Wp6Y zzUWD!Rtx=U!QiljE>d4S5<*$pQGqxI;S5r!Y^*g)nJ8aT3K`E#*o)|`VzH~@C5!|Q z5NeJQ!>CN^cwf)(v#lARrR@&$OkOY*Ba91S8_mIyE~d#L^_WWR2mTL90yg}bc?B-~ z2(*Aa$DxenEwoM|9tr>tZe_$exdCBvLi(cU0rW7r7ikSE(s_xcTGaj_d=(v*eo+>T zG4Q5+tOA%_7)uW!Pwp}c1~dsMw`5*QIDgsKN)D_C;+YyTjibj?s+j7LCT$kIq{w}S z_R}j*{wU}BR9p&Vesn2jUAoy6M(+d?#qAQVy5lHU%xPv}j}4SE*E)?fJSm?XYIlD> z+B<&n`sGn^-XC8a|5&$EqR-noQuX_9oS4IB-#1Mg+BW9mD^@7BE7FKNJyd90R*|@x z2w?BEY@gh*s-W|hlNBLTDdA_$n+e;Ky0g$wx!BEVW_SdEzT|g~m}Sk(ITl-KT~g8G z(=lJ19lks`J==ZxN?TU;UZ0!@i1hR?fy?0@h+zXPt);F-?Xb5p9Xm-X-plX+QDQDf z=Bi|AIMBaUa0j+z>7Xi23N{t>#unFAT4U?xC<=}emj$XCuYm;exz3t7fPzp3!5o5N zCy-T=>X+roM{vBTDxGpl28J!b4!xvs8TgYQ!eJ zQt4Xj7oP$Gkej?c5{*w+5*B zvVZ>~gq!&m!=)WzY5!Z=|9&y{zn!PsJ6q45F71Cy`=4w70|wCpZ`NcI_VfSEo&5hd zpFUkD|NqUcXFKcA4s6j&|NqZr|9b`Esd9*)O~D$LD!)cbA_}L^lK;|Y76XtCW$neOOLEmebJa`JyC)EDR`s9_~Jc~f+#-`FN|?E8W<&zGC~fsJ{h8M z{!{tw>2xwf+nDrh``>GAgT(aJfBonG_h0|{|A_w+FOy60M4H^T2j{*LI^RS;4YPB$ za#%k`h*VLXLW?J|S_&}5jNE7RXAbg;)? zrnd-K`NOd+pJE&;dyq9&`RNO}o$CKU;CchqVlb3(r~E0Qha@2iCWpXTlF|Egl8vy? zHzltSULW+R@&z5U5`q9G&)1wC?R6o>;@Zk0+_Yip{pH zP$b|+Q(O2)6SoO!(0;2O?I#zpc)fP)mR9jP?Q1C}?uobcWCFEgx&q)}D-~Zh>YioR zO;s`8k9mC3!yIAQ;7yusj6xaA@?_Fzw#5z1hVK*O(Mj8&Y{y;f-oDBpxIJ2n>Shyl z_{g?gH&xEe0BzC}S%uCZ+T8IjzW@U<4ALHE@gQ&T5x|Prn&W^llW7H?Rmw_)Ho(dZ3a?*BMFKH5(JCRk#n6&;`AA5CZ4ah&Ve z4mN!luo6S+bXWRA(I0!fMBZ%f#E)JYjPREQ)&XPv`!jxL9ohqnku2*UJ@%#hV&ZPjh61+ zh2;xOFC%-@&04YOQP=cakBszFe4S2vy+)o4FIv_@6|sE-SOeK6z{J4OQY`XNbWY)P z6NX!4uv4Y3qP`HE1fesVZS#_5j+1!-T5e|psdjXDfop0-;;!W$K(zxg1F+Y&nFVyK z>=Qb^hz7;+Jfsd_D648t@j6G_bbs4z*G! zA6RKkY!)G_|6M@q?rSOxwbBe-OLNcytn8DIiE4a<!8+;YTqkya@k;IJSPQ=6mC6%6MVqrFRqEN{~+Y0wDuO_5$&rCCe zV0gt5=ih|m&{m55p+ckE{m=1mMvsUNNJgxLuq>AOSs0y*P?WtHS-i-l{P5wI{xAzJArg{Tnh!+@Rap(uyTK@N#8X!q=dA+WY$SZ! zbh{n`61)obQpWKFhv)0~b23RUB!>`aSwzY38=q2wzfau#(zoK=8V*-4KDpTyamL1y zr{-r*1@Mddp&@f0%?dtwt{1&jK0539#sgs_o*5PU{(z}a9X%QdJ^L&T*;8e!Jf->Q(ocgq? zcda15Z9W8na&931=ojcpr6YSXBOdW#on3(RF%x$hmkZjtY$R@U?oNSM6;j%&Vd3ge z^M@9+PK~4^gebF{PhPZ>!WQ3MP|^aD;_8WzoH$~Oka=vHailtFRfVI{R1a175ZeSp z3U+yPe7c&ClYV-U_T?*7j1y}W42pczUxyDuT%{RlRq@J>_U<1*g)4Ar&4hT+*zZLD z^?&}~Xurw(ydI?=Oow3-qw*&o#`%@iEGJt&M!e#$sPdg;vrF)yokIc*0AW)a#uW&d zBkl^&V{W-eAS2Gn>l+WB)~4AS*za87WM!ZKyi)Rf^p4KepW=(+@b>R30cs-2NRWPj z(~Xh;rBaB?E~E=l=p(TY?KFfy!(=*5Cb{(%RJh$Hai!6O*I-=^QAe{J&;s?=6LcW@ zeYD;wQWkpG?05bKP5(B{{sx*U#8VDiyB{q}ZW%{^v;Nk+XFBklsW)MKh4U`4!t~>K z3cqE8Y;@i>VROZmZ=lRVJ({je&6Ld-^^dr?Pir4xl27&0R~93jW|Z~>uzcL0rk-$n?Rr{kPJ<> z-B?g}a2KZd0W?>^P$Ob_I0F$*Ke5CpZ--J6(sVqO5?ecDvXu65(99=i0p%Y93{uhT z9N5{T(80n9*Y|B22*`FkQt&H(t#RsDrd=ye0Bb;$zv_}kIV&7O6>U4x%Vv;ZPB7Go z-MM@`OsC;1XOYIc(ZMhYRB=cbm~jDGf+!PP#JBK-(}P0M2AFdL=hDP5&Ew(tD)zqV zzOiYmMD{Sb zfkiYuG;?hZ(Uh#a?yUD;JFnT&|9|QKul@hO-~4{(>C*py>HlBw|JN?l3p@YM_5a`A z+1QZv|2DTax3-^x|NrLG^`-y+XY&7--tJoOB99uC6?=dQF9aqCFr&rn*(Cr;Ic}^%gnb{y{H6JK_=u4@Vc7@x(nrcqZT(sz40Ul<=&9UxwI9)8;1Qb6=yR z#<*0U$V-4J=yU>(&`l}a4YjqG@ko5kwb4`dBRW8I%nrrQmEBk;uuKLsU z#g(h{y47RD-Y^p{t*#*v?UNVc8*e*tK)fM-Q)~#3D~{?P6xw!>YRGbZM7P=|R}1)2 zfBfW~<1!f~I@!az1zYLuEyq9*lbX6PzO9Xcco+1(#enfxu78#G?n|Izy`iZ!l@ByK zhqa{QF+b(oPtj5rPp9PhEWz7~>Sj~*mOOpsQsVB~4e1c85)=D}g&);`g?BUdl$HVq z&zy%IwB(OVyfx0T`VGR-(p}Cn?XO!7qf&{?^MO*CEZ+eI5_x4UynVTS{TMQO@UQ}^YUg{zz!AsU_h5kYdjsebqOFvkgJ_e)lkzs4 zuKlhXZMY{TuES^CXxl>@!P^dApk|i&NTd9 zSK86Jy(ARnXNuu?hC9JREEIBOf>fZ0VNj%>N;KoeK~!T z{v++{ci`AeAM)txLWtnV+Q0KVL7MW86&bCQ zbS(fJ1DLYlMNSWk7->PZLeU;AIyU^5+n<}L!$CMm12)(g@Y}aPaYUE#P)j1 z&Hf>f!#u%sCstKS;bEZgdMUnn5@D4gTIJ^C<|P*!Xv3a?m2rVT63bANZciY1<55v% zPW~rA4e4Nfe8dsOT5E9;54;5RV0PAE5bbf7pyn}<&`8V(VYAX%^t0>hbn1!wR=i|i zG6_@^6^hRrXDO~vYFyNx$LWv~MH+r>6b!_0I?+cl*r$3^rZ@P|uNf|9n_+twP>KQK zF(+v*-b^u+w&cH445~LFGq28cfy5!JxE_QH*&N)y)$pNp#F)F&9Ek zMF&`#`jkZ>`-0njao&ekw3pkfqVGKH5pfaxMJu*(xN>TB5ipD7yi*O?sWQ;eW(x?=M>LWQ{(;$T5(oQfyhGn}@yqdH2QZgVV+j ztw{c#r;Q-Sd&fu5UmWh8S;RQnKaO6%+6S_JdT=&R9PJ8PIGjOs1Gd!p;-=}wzbzFE zyJ1;S0N2yC;h<20DXo817?}%l7%->R`OZ{x1EWd0q-5=_$;_93u z;u(ME@WaAG9Tat9$v!<@IR{jCTrIw%-G#{uxCj^)^V9=1*)-EkA2U~q@2ofBY*dx} zBC$OO1+YzJG43vQO#!k_wXDm6er?8oU+MLm4@P$95m0ztQ%E4K@+QtzLhe?Cx<++Q z7)$iap``b?7vfd(*cYQ5_X$@K`lZr-UB0aDw%TST*_AB_a3v5ASb8^1J{iNU-*#Mc zg~##<3FDqa+i()jwL zQqOP2OHsXKY%_5QCO)Z%iii92Fp$rwqszjZnpMu5$fva!1Z5sSNN>BrSMgvaqg)uj4ggduVB3`6J5MvrjW^6?(;+G-* zA^EWFTpa#MH?ywvEF|-!R60eo%e#gnR7!}r3qO2TT8&j$G*{g2{7di%T(l5 zHMo93E!QWA^1^$KB(F)opvWss37Up}{wP`oYo%xKAsXqJS!GRd^7XksPN8mAR@z^t zAaEBwfP8K%Y%Vqs4{;9n?hhP2;FI;X{-HR4N5ZK5jAj$_Xeb9v9N6TKZ&M$FYr#w+Lg6gR|M9s;LG zC2V{mrMsk~9Of9P^%Gz`w+VMKz+6hfEpME<#8Y36p~6qbcanYSI#7ZioBSCOzTo~@ zR7Dq*AorHF7fV|=H_4Y8y4YBPWWQogi6^yy(XqpFI*<`t@a+Yjtp>_IUATuPJl>7w z7MAKa+KzH=22P})4KM5V*xGPztej^nw{n;8Tzf6Tm=;HdmStM5WGgNx(-%>xqYK@nngn(1>>kzB_57-pXzS-CVGe8R7iK|KxJp$8yy^E9sNf%ld_RB+{&6w> z5w!4kS@69T@sRSaX9!a%`rFCfJB0*&EXo9v;umw0Hj)qG!65F40BiGeToIOj@k?ss zzx<^zE^pOvEo^Ew$k$p}D~o~gP|CD!7F2&*u8u_v9VIcO@E}aBhNMQM^`NR8Pb+2O zQ6DpeuvEZ&3#X!a|4NoE!n&~F2~AlUAgvus;OHiXWC`+L=gGxPR>+`wznCm!cFywS z%ZaSnVN2JT9Yk=%8F(p)enaLl{Eu0J#dHJ{iwBg@8()5yt{ZbPF^96ae^p#IW`Dci zg_NbYD_%_J!J*DWorAojLFV;9%IMHlm6vW1&cfzq>-ce&4hLvD0Qh652EkZ5yJ4JZ zl8j-KkWmkGv)PUKR7i-GgTR8pJ7V@XZ)^TEioVs_u^yOS63Bh0n7q-NrVY7PMuSEb z$|M)yyp&89%0hxiUm_6I)XsHz0+^}PswUT^S7OJEkR?eb<=kGDZErrzy*V8+=X^?6 zk8T{|9+A((qZUw*-dU%jh z>=;i41i6%v7*X_yOBO>ijzTwA5GcmuFR*Wt%L--FSTG45teHfY>3dx^uUE*fb+oI-NRO zvFL;C`B!g4{S3OPZtR<2FVg1giRjZ?K-W51NPH6Yry_efgUlEyRi;!I0!2!7dBvxc zyx|uTK<+4ISg82R6)zrChL|fq+?SotXX!<@9yY6bEu+==dI@irUey0Td++|-HgY8j z=Vw;wf51oAT$2-uCiSoB+{oB7jkA9*X z1Z7F~B%0cdMF9OeefsoyAMf2Byx)Ub@tfW9-XwE=va+1CtVmwIhQ!>Q%-o#Rc*%{2 z(RIDzzzAOA6o+0ef@mpKXna7e$Amnv-vc;uoHp^&XBF*4ayXIS65nvZJgiZBM@`gwW- zaVZXG$>jH{Qk2}~IhQ=9EAVknc;>A-v62>&(bWh6{wB&ur3jEK2s5I`CvIbIMRt0q zDl~8Cj*jd2yzZtbHC9|~qF-V=cS|G{<#WUdxOsSMHh<(~uP72A2N|PWm7KZBaVrv~ zTTv%ilxz2Jj}5x?x2bD*b?~?S=r1Me1#cmi6%4X2uv(C{)DWc0n5R_ z=g=r&cT34+sO~tKl1HPzRcqG0uw|K(m`rMZ8kKcLyk7t`tpKfn= zAL@TR)cnVI^%IvZI3WBb|mv!@%+{?OUzbROz|+*|#R_d>4Q z191&_HKlejDxXIAG@gm;{t`7Kz@^R7{>JWHL|ZsV;9q?qlzgd@-6=FHBOa!J4pS zATJK`*uTPOjedUuspk95T8*n*yvI|G9{8%p#f`R(GBM)+U{dVdc@O?((~R;Cf&)x0 zO@l%j_?S$<>LSF5#^7Qx`G|&qPf2=yF>g}8@5vQAH_*b|e-}5z3Lk=wJ3)@hgEB0d{s#d?Xl9+ZOMy@bUb0su z?UbEedaeXl9FfZxduinAAVwbRI3H0MgA4+2#R_@}i$tp*jYkvFKwTQa25p z&9HTVMXg@2d`3y|SFm!%@;1Jr8OvK0>SZ`v`YIg@@Qnra1ORq_evY-qFEc?v-@JJZ zPNw-d9*x8eKo0u(@*$zKel93muiJUH`E0BE)5ca|+=B@mEjU6D0uaIHxOJykB>^hkxQ60z7nJS81;vt@;+@bJ1g3JCzn%SQ)$99gG2Bkg+L7FP;+ z%0pIn-6LOsk+=W>Jt3!V3KAtSRk$v?`Vrt5peLY}<@vfcF(hZn3?QH1|G|QLvLGgxnsTndiz6dCgs zPMLs&QN^d^0NaJ4Ryj_d{ds{Xtl)RX)kfDjKueHLis6UYDM15$B$<_1;EP5F0z)1r zNiw9H=M+P^OdTEGD?-pEQ3%vfimcuhhA>rM6+gn;0Oa3SbSb=Vz>ZJmRXS&t&&oh~duvfSEPY<`v<%evhH69Knp_MiE_+sEpvcw1V1s`PA z(nZgM6F+~uI~+oa8cwB=4Hlq+q!LLcmsLp#t0(h<8Plj?@|wV834l)K>3|PbTig+C zTlrwD;JsxM5xe|FUMxGzTwJR}Q(iul`E;Ytngw*7B!GNj_^MO!CRc%B&1;tsz-&|}PxA}9_fyKRvi_=ACd6 zSH3(1zFvjNf9Pg`|2WIxn|IQ!lM){KSoL5ZD}x^Xqcnz*%wyOrn|=6-kRYmfIV>mr z1@a%Bm!L|5^87H=xapijar!WXm|r(Fcq&ZyjxP~GrIjJhn)Vm@*Y(}pDaQ41gBvC9 zpF+fxo55V26bes7%PL(f?gevlQj)p+IVNaGiAq_!s$^uo5~|!Z_M{*@z_4p-D$|VS zFx|ZLcJPyeZ$rgjj^erC_PEG314)25lQ%SOQK2C|?IYV)NMj)Bu@62#lglY#bxOVy zW|2*DzOyPl_pei?Qz)a4%}O_c?(^X;s4WNlx3~>V+Tpnt#BU-UB@2MdLNB6O{-L^6 zzWrTt>LZ(XetYm$P;!Y_7YJdalsugaq*jy;;LfNO1Y*oN(W*Ieuk%z6;;%u|sukp0 z|A!?P6{;mzBT-$yVfZ?f$wfbpveS5^^!DwVS2du$SU4k=?Yv?^ha@dD*zq&_Lk+P1 zeE;T9uZS{JwQF9bKw$z(&+-inX*af?mD5}z8Gd;O2&>wSmIQ1|L9eAj<);x2)h(mF zLAq;jG&i10a|P|pJ#fXIQ+2P*u}!E}B5%2B=4F#@D(7LAWeU8OO^=+P)mdm8l*`GF zpH;4kSXKCF9>&K--#eiQGiv{$#3C@k=2vNFi+q=JfM>}dn+&mYeit!@B_y0;rT$;;RL-40${KvLsQR3RDftbae$q`MrKn){G%88_Az# zO%!jzZPf3wMrs?K{CVQRi9d&3P*-E**IA+&zXfNkb4iABCO=CZu~;`{RN#Ut9fX}e zic}XQ9=5%ysR}6|^lS@7<+M=eCmANA1a3(=rSIG2(5$%OQ$i6@P)6R9m zT%n8Y5>BN^)Z3s&M@(fD&m;{iiF#8V9O(GFLj0WB@-mGvM(RJ`931N5ae0m31THq= zoG-?W-;U=ef~OcB1H*?zOi(q{^9q)%jG42+{dbHF$d}v8ZQ90r@jCvTju&I2N~_Xp zS*j$xL9f#Zw!ok0(Vqj);C%0Hm>h>98StU3<zr31XP>yeVF|1joB*8!rTxA#QrJqZYU-^+xgdbQnjUccRb7-IJC8 z)uj-K5(?`)-f>1c`5Nn%1~im?fqVA{WC`GZar4TJRVETPt9%*&wVz@0xqgz5+zY`z zI%k$|P%)KQ$UVsni#5?H4-x@O)`oq>);X1Y6+jo)E8L^$ORjnQ>f;WY!NjV3Di@;Q8a?2dZk5sWY6(IUzF1${)s3JLRtr@3HOIeETc z@IN^EtGUo3HH>!Om$<$%{r=*a<@E|?V$ybm-H_Vq1`U4m{=@6Ww9UKsWAs~F&L2v! zz}>o3KVk_#(}0HfaQ_`LbQTTy4V>VoZ52|{6T{0N-hO_5_vXV}NaU+m=Kb!kyT2S9 z{>;3=pM?ZgUYUTtgIWY7r3n7w^Xc_b|6Rmr&GHe+MVbvK6H&Rg1jAM17PUO7rYjRz z+v2V)wb4@2#7rw3bWU!^;H{sqD8RYLU{3XuW-%jiZFLf^T$qss-2tj?P{bDe=6Q)@ zTdP8uOFrYCn$?5zLhko>FWXvhiOx{cy&GAUhNvpV0>7$U6;#}xRj;b3Z0E&{t*Df$ z5vtH7bS_J!>A;Yd5%ST}ksK`}Si@euQ}`)Q=J&5=s!~R&%+ki%?UkUglzs;zg)9ws zFX~p0cQxxaRdFPPtc|55S4yh&%Wy`hT09k9cW52U|Hda+EcmwNm{Bu{m!J5C6$C2xsHD zV+A>uK0rND&&&aTZ(c{GbP8PwR-%jL4h7w>@jj|D4+Eaf+wwiYw9Nrl%Qz0|kmEdi zKjh{tQQ`0H!&sH~z3!v-P5nM|i;&$7*EFx<302^oba@U=OFWnt*d>|WPLNbERAL2X>FfnxtGP14xa7j~kHMg7c!y*TC zeHR=K9>JyTO;zx9DuSw05iT7M5FYYBKjeRY$p4J_pErKm`e|e9A^-D3{^vseXG-sd zadEes|GC@U*xKa$&)XZF?q&z`KR{zRiX&Juar@!XeWX(yuyyj$HUMr+tUNdtq^N1i6VESU6EIH1>-*+vToO#Wcoq2b1 z)#4P*2Xi5MoQ{$}vSzi|aJptn*))8aLMLWk81d_Be435cswZ)V;i{x@hVRy7CTHA3 zj1S`yENWvZXly#mE>kR8{_^Pkwp4xNrsDiKy_Bl6W;heg18HQZSvHb!7(!%-p=&tgDuA_f`eQKQ=dtC0|fa<1)ku=i@ee{}GA|ILRZ@Ym{eWFK#nS(*u~ ziUwlH$pYpikOY!Un;mXD{k?d#zkB%MZ6DI^y!&PM6-=wsu`25X5>P{7w*S#J_fXxo zRw)nkTE(QyYgMx{yC1?<6nk|$6(EBxo`gBi}%#dmeeq6*E68 z=Q%N-!gw~ z#O0}U*OuneE!LVdlrRd<1Lf)QJ?6iS%!vcE8VZihwUC#U(Y$<0=6J~vk)jp#p_H)d zA>2sM>sZYR&Pl`S&C-r8>&!|fY;+TD$5Oua?V87uA4*Uz;3E!lc4yWEVoUnr@Z;+om zqdd+47jc`^9WJ$GXnDbBA%o3X*S`^<~d&M7qMt`nj{lCfq2w1w{T+F-K@77ScMV$ep3NR zSwZ1o&tS&rRbNg_@L2OUasxD9UDN_tg+y@j06WaBeHO_t4H9^5Zrh{irI09Hrk^4{ zb_8(6T;#YQRhW!CD5xR-rombv_%Ub{mUTS0LfVl%JU&^gS^kt@#$?Z5_2x#d+f)2Ap{yyh_^2QBHW7kK zIy5j-^o`_gE#%Ja%pWK|KZdCVsIwmwe!{@6HC@Zz;&1GjBHqrj^I1F=lGZuO$0b9C z)()=GmB#wI0ld)lHVIvK5~FnvDL~kIv4eWk1H5EJ;~6`-;y^~r(h*9KBDe<8q|b-B z&^K?xKK|o}&!gxN9NnW+F}aUQET`Z~m4R%?1uDGP)Lh8wiA{~D><3^wN4XKhW6&oL z{eVg+z@<(M3RHzgXwJ{P^~8O*R$Aj@Wk^lqJa>!Gj>^ZhgBD*fzildz40PAm76vd1 z2H}UiDTFWsIJRu-lYT=tWqL+4EvQl;Mq|f!Kbt!mlF{4nXlApEFtvFKS`5!L%&GYL z6K5b70zuOQ{^G2{D6xY0S%DR{JT}q^_(gr)m^Xw#iMFMnRjCHO@*W)pPQ!~pE!^y8 z<$l}4SdfJB+r!rkk=@3$+nn_ZGp#T?KAvJztDVjuHXtgjBxe_Ji{Mf&Jo}b30A=&2 z(X8~fqZ0^~hFUAuhN75$&}f*~gUyG#EJ<+thLQ7?l1@xj1x39Ck7oFC zu_ZE1G{fJRn$|z|qON;zQ>?rcQrtCnR?y;9>ef&*F6+FCG`Ln;Gpk^9QA4DvUZe>= zqZ*dewXYV|PL!QOWEnouKjx?M@8AJdd#lG_KAa*08}|1`4R^r@lk2d4MBy$wq_YPZ z#jJ3TC3yOuW&#f46DX){?wPOpG%LYt#v zwI+jjDn=z7fw1|4C!RI@^Q(wM#ak4P$}*1}3ThZ3z>;A*`j96U^m;b!MjmS>rixka z1>#LmMLJAaQ6NNN6rFirshk*kUy;?$ni%?DJG2NkS_7I=E)wPWiyfN?Gf`oa4qqv8 z11&Y6R!|>ud?4n*5)!}N%*L97vxq7?QavXK5@x{Z<)|S1NNVZU3htU~-XM3v8;a&~ zaL4Kd9AVu)Osg!;lYqsz1e|c3j!;v9;#sVpS=em~9KX=J{y$R3=egvb{n_i~5=!opUyy|b3oveDGq4&i351R5t7WaE=ZAI_qwPo(j z7A+3;pUeS~dSB`sfG04ezT<#)Is$j~(l)Ojtk`M2O*GZi>no(xV;QPsdE-ygJBMST4J50REushB>D;Vp3aNQ`zWJFl|oL zQ9L8S3Op%`Ahkl9n+!kBCBnMo!|= z;%9FTqZe-uU%oncaYP`A_TEGv-tIx|t@ry!e%dUj)Dv`Rv=|CXtR@+hxM1e`d$|<< z=j4&AKBnacrlMeAOh!HPg5J~9Dxh|n7p!u=&=Zw4k|Hy3dpYzKDeGVAEuKR&Y#v}1 z1Y%h$@eg(mvv`QZ!W`h9`2wR|*9%HBROjMh#~kEcd@g!JdN;LC~$7AEdGyz{%906tBAtsK8mS;`3w}cu<3&2PlotcKyy~{oP}`@ z{yPrq1N8*+Hcc^a^Ca6dfv<_+e)QknlM;6pDl zm5%#eaU(Z8_sem7372vQ+{#yQEoIzGfQ#{8ln!rG=7)gln9l4<362G*!A zShhieYllaD{kz|1El>GK*qzC8a_VG5{o#!+0XvP?IWZ&46{|h9DPJwjY#}{C_+$Be?v+ z4q#Z;kR=H6nZs-3_q!*wlsnrOBPSz{B#tO2QX`|Z@xqj@%>APnOVC^M-k~9Y%8W%(feXP&CdfN1F^XZ_?H5e0FRCx4~{#Y`;rYdjDy~JO&~Zvm)z`#x2cw?|T^nAhNbiJ3#WALv?uu90Jdac(`X4TYm3RUqy;(dj(ln1C`N zP+-_CU*nz_Np^-jL6ea{WpatmFVbm5aD~ucEJ;O*9~brY z_i7(HmDr_Z)Gx6BdEf^w(tUHld`ZBmcXFtkqp=Y_#QXDPi1n||aRejuyaeKl2XP3` z@n&!6gW{t=b&0n0v5EWM&uvKUc%b2HrI1RrP=EVAWRJo+@-L;nlh-!=>6=L#D&@g> zm7K?eD@aUyk&zJ&^yZ)98HDSM#gOS~idELE3(Sbf@KGppLYA{4*8>$F?|5eBbKvkh z*{h-Y z1#R*!<}hO8!HeDZFLw9#1HB?!Rb@wj z5vlZ{=1vMz4YZKVOX2(5-_km7*1_T4{y&|~N{64ND2D#m07EI7!S81MHhM-l8a>1B z@}9pQ4U1RftzaY?d)TITjg1bCL zR4Ty`_m8r`oX{))uEnixc%k@p$P8l=umWScs8Ja15tX>knR$d;8$BG?xlXLiN!l31 z`5;Etb3RKGNW%y^6LIYhAo*ya*YzkFpC-d0mWO;ZOV1$~UOy{g4@!FKpQe!he<#}5 ze!4zP#}WOOHEk&z7b}^9me`6UEcP`_CQuxAqqFrB_JzOAuFe4cydB*(S)d(Qi4;V; zACBG}9Ex6F?;jqOT~(TwxJAA|Z{+h=Z=Nsvi1)49x3mu>pUS!vIML8rR1uU267{@4 z+I{_2(7KnqA6^Ne<-gNA-0<7T6G%9V9?Oqe0r0r&W7RW#>;1I33>tvh z;(`V5x!Z+?O$cz9Ou^nA>DCWCo!ba)IaSWN4>wjn-zDd)K+Tz8|8nr|=)>+SzNa-= z`0yUet%|?48$M-{px{DM2V%d!=ed%Ud|viZ*yr3q+3**Abo1yvPKUmauviOZgoLUr zz$`xP(zC&ZV49-G{m11uoCULDLYy!H7dO`W>RvYX=-t83z(aXdHvKq_c3&RBOD@lL z$hsViXsHk=iTG)`*c4px!vT_$>dx3Q8G^+x1ww`L@2_))D8TlyMkZe8x$7|MUHRzeM9m0bubK)6uJ{{ z#81Aa&Y}@1!}SHyKtrA67b%4m2k>Qz(^R2fOIXPYad(ab^Sd{1OmXfdW#{S(ZcUCfqtAw+Aq^cPdnIjYo(Ivq+mHjb|-89Brgm_ys% zAdpK9z9^{QK!$hqY&^ksFkP04E3ww0q_?)%E+@U>#c;7#tee1l^{OPW(7YA&el)!#OlVh zM(Zw-Si}w_UCUd9IyT%-oOJJyw4i_r)FVJ?oH{x8#Iw5leO)F}=b7wya~I?E`H;A$ zV+YO#BDa4kt6JVnI>+Pe?(Ib{-n{zo`p~a=u9u`ualvFBjtWr1&+i0C=0xraMq1+@ z0+CFn!7A!t@=B$7M*znk)jxx5LJUZ_Wz zOd3XO=NePlO^wCUI;|<_w`M37;3{;l-ZrEEucyZLYWy{by?GwpSwxL5_5jy7L=o)b zLpknsGpv?2mm=@_(iQPXx3@4ks(*cQxZ7MNov!j#;y1UrNE$P+MjGsfOSI}&&Jsv= zDJ~i~i_nLDi_fUvVo{;A#biNnU%DQWc}j}%=9~&g;zw9oPTKG4EpcKUR!jLDSPkUU z(ARyaxhtB$KkeaSJk1;O6&hDnuR&9)kSnsur67DJHHD1MNKKVgSs>8;(@}OB%G>Te z{Jx4xTpM^kTMRs5yT&l|NqY(^mb1Ez9>?iP^ZHBx^+)TGyHe~84~y*7Jw&3_((@ij zvjv?amKOWQ$AzwoXDqly8ZlM59nV>k{DzU5Ny4w?uD#{Vw`C)gbA+0K>WX&}JV_PI z(6bcK=U^{7O`ygd-_G8srcmg>>)n5%6Cwo-;89%?C_o;qn7RW4a5^VocJBl;qupk* z#&RF6k}W2LY8BA7EJWBwL%sbgWx65lbYy2>x=CtCjo_7Zuik8o4o!9Glbsah3e2B16*3xly$1OA4|y{5IB=pmrBui@ z`0VF}ksB<2HU(Ee{;gQcbvm78pVKiQV4jZCQ4D$iISP1L*#-wvSU_E_h%(XPEE5L+ z6(pTKSs39ef?A+TIRr=!HbVE_lM>a8E+`f(AXK}(dR+x2#!yv+$|8)X^qLH#y1vJY z$*Pypi|Va<``0X;H0pn;M?Y$j31;gH%&Z=Kd#2M`6=}M8m#GX<;{FgUusK1&28Y~M zNPOe-j_c{)brD>})5{XV%- z3Vh!M1+q2K|2hp>Q~FtzBvlg zaF%FL>ij{hAS8&caA1zG z-9#Omf`7^#Wt3hm3Dh!s4r7=~!9Fn3JInM=fB3iful=O`lKmRp}1?P7+V|3`h z(7&C|##8+_Jnwckx|@H9KHmob7GTN|!~fym^S_M^t|Qca_H_HF?#9+n+wD&64-fzS zoBRhd$LGF~Z1eR_yW8$O$!CKn_z<2w;RGyC*h_vf2Zm;Pwfgyg`t)h&{BJ(n+W13v zySpI>!PZu1`wt!Q*R!X8h&m7F|IYt|yxD($CH6Lgy#)>W*g3-G$v6~abX53c zmi)Fzh0G{>`*SbaY;SJ0JF1N%yU9&WjdYB7lPxXz#DSuc1OR( zlm~X8GC)J^H150Ig^?BkKUG>Xm22QJw|-nPnZTpy#VCm<(PD~&y8%uW!tRQ5uDdbZ zMajo1$Kw%h-bf)K9(ik@Kk^_Z=vEa@*r6kf$t)RU=kVQ7x1gLMLTpKz?NRngoW5o; zh@?GCP8a8m`jYXe@=vNx#bq)BOI_X1hjXPQy_z?Gx>_`X++DD2L4+={#b_9vf*o-k zWjVw9kR$z;L<&9vtvwwM{tV9ESnRr{0PK3ll))OqsqI8G34|BEB6+3d1c@rYq8(QL z&xs~bU_!9r2K(I@$G>K?Ry0m0@Xr+dqPYI*%A_VtVt(NPvENpdw9nhEsIl3KHn&<) zx9O0Q60ytyN7EyX@w+}$GwMa`Q)fzH^^PE;9Uf~M&rnNjv})&5F-YTaTOMzlv?0Ja zn=iH&{iB!%ihO*~jQ*@@c^K^g!dAPkr(L(u4!6W=x8Z5GQE10F<=tQlog3O?x<#ez z@o)2(%Dy_S2>!3ROpbk!{~qK&DgQltw*9A#&gO&s_aOhd@*l;!^}!<3CspAJLSVW4 zx4FH&(M9?1+2;1s?X68A|8=_?n-B8eJ;{GB1j{@VES41TK;d3vnWHz8E0@+K2WN^TAb(EAjM2HC{vi-@YqzxDQ}C5E}uhzj<}wY%7047%Zk&pzgsSFiKBl-(bI@7xOFh7~|)HQEj0^SqptRw9_8XQk~N_ zv&Y=dG;h>u6#88*Ue{{1!ANY1;Qs*luNiXCQL-a>KKvysX_0%J4QO(qE^a0JHrHw@ zoR|hvJCdB8rGpegq9hU;qX^VgPAm$6q;CMmAqSAh_ArcgrNL3pH_dAl+JV(Eh0GF1 zfLsE_`_xg6Bu`)%*(GDjZ6W6Ut9UIK)eOQcz_}Z?7Uu*m-h^AZ&WEAVRz+hg+@)+N z44n6rp3sh5@5}k_Si;k~^19gZ=IC9Ieja6~(#j(c+-YqXP_3l9;*~V08pc$2XcD!# z={n_c7Q*l6xqRg`&c)YtjS2Fw)pQD( zJ|wK|_u0hG_C&UN|Bte>V;G%jhC9${Zat@(5l%^sHIETfTh-z2uYioW8xv{TdCTj;ejskacHnx@(a8_Be~ zUPCSeK-O;YgwVM*@nV)xA&Ur-+1nI?fAKBpesj39XqAK~c1U8)lWgo$|15?haAi#M zUG}LY3lyiu`Bj`z$k`@1pQQJX-B~F8IG*-%P+E37 z&o-ZJb${B}sy6}GyFWk2x1Q8;xMyvKKm6k=0`OTOgSAn`#!ow55SF6)6C-`awRgq2 z3h+?2O>}b2{MD`e1xJ?h1Y?gMKlVCNEK6^%ZqTR^bR|<>`o7W^0A)a$zsut;oy7Iw zVT?TY;jfD2Q^Mc6g;Pq#mRDZ+;C71p@Eq;y4Zrq>h!&oTeQcKRw6~+jRr1;!Y3=>U z@>piOS6E?Wy)QPoyNRmps60>EqS7=U$&f^5WsM>mG8l&#^IJc29ONX&z3nr{JU*n1 zQU~s<5l1oYa>3s4-um^P6ASTPr%K zE%Q&aZ;yJ=Ev;6}+SRbbENU(SV7a?V580s@~i6}5FGf?nI3Fl7hQ@VPRbj+?o1mf~(?saWUd!|#%D zx_KIyn_PH-OG2i!e;2*mfBS0p#lDfxqVE-(l~b|AR(~9fue5OB#*q7O2G2N~*IX%w z1!uAd0it#9>%lFqu&7n8zB|6IRKkkriv-N|-P~I04qEBLQgtEN=i3+f=cmul(F#nd zMI3$Oa_XGh`TXp7JxJPDguYe-Mh$jG8a6{D-qTvM%VzUs8bC)2xJ3O|Q~_2~{JF}% zDk?u$?N?Rb=j!~*?*iZQR=N*Q@fX|!^w8$4a!p)&wP`unz*dA(JfqD zfecyY^2l*r9;o_BYuTq}BwYEoc9yqhrLT<|@6{Tp3h>oFmuI5vbNiC1G;8}HggS2; z3f*ARtOsu6M%;!*`mbUFit?{{yNR3phoNgy1JA-4gcBO6LYw!>TAUUx;QWp`i@XYJ zQOrHCL?$*<5UeJehEo-ox6V7uhzmJ#LCc%YFw?*~8af5Xf$4+C>3_$LO#;ohY0mc0 zbc_j4C`mMy3z#JsQJx2F_R{VtJAAvuJP$i6{Q%xHCe7&6MKVjGiJSz*q^a&OX)hRo zRT+ayvhmWi3jtwMWg%83-?W+EC>G?2}f;3GS4d zehcQ`e_dv;l*4-qx!#61OygNR#tht$k5YqN;K8x%%;waAEq(yI0{d4% zxmH`2=LO-~;ttH5Cr(o%+exh~(MfGdj+0u2^d_}XW|QTnztW^8wN>+(R2lEfb*;86 zn~6VrffCnb29PDQ~O{V>lzk`#K;+ve6{2glGNv;Bn z$(QTUG8{=xuElxm8Sc7(_hxdHT5UyHz^d`)O_c(yqTtnJ;kIVk9c!`UeaxF+Croht zcIMu!2Rd)U5|-{rCtIK_4Ss9DcXjQ1LVJRx!c__e54Elb>wtUlqZkC#Da92pFV)f8 zJQyYMtjy5`-lJfml(gT(v!$4VrC*{Zt0b{gttVU{xGNUCU+>vIjeyq7_gx6&I7Ss& zd@#!Lq!cT}d#_X8s>rc|i=E?Ur%EFPb|06}4AgVC{KAl3Fx*MS#g)w1pA^^c^J=Ku z0D=4N^PK%FXAyW@U$qNF1O zNP?+G1h!}XC;ek+_0en8UU1kuJIK682N}DWZ7s5WT^beANwysS7=A^0cuNfbmSO4C zcI_!T_1sw7VQV;iQ-#l=*oHXR&NLdb)KaruNfHL;!s*+ze6E+t&f_4u_=rwRhiC`LZ_Po(;y4*-|G1vHn)oTA2uHHKio_Hho5x* zhd*s>KHc7a)_%JEZ1Ycl+Iq+W@sR)F6Udy&?8#T3|7Xvhh0nkEySv@p-0nQx>UN)s z^S=e3N81nQ|KEfDKji=UKJx!;Y;ATs5BXmo=zoF!%NztN=YCzD|3};n-7U=jv%UH3 z>GoDvod0LtjqXGKpL@#x^Aa+f@J7oFKWup%tpjafN4munO1tM)>eqI13F#VS;+Yp0 zP$%^WmH5!@{v~>gV5+)zX!>nM|DcgxWRtV>T>X4MkD=xUb&BSAIV0{Gy5n!jVAjfZoVP(p zXHO~r-`3M-oy~1={>8tYhx`AY?ti?;AoE<3bLCde542ZJi3hZVz6Zl2rg=*gd!QbH zQ)x09(uc`pF_vHKi@#S&k!R<{Yenyqxyogy@9hTk+f<3{;1SqOaHaWGK2OHc06$jR zddhCG2M*7i*MXkDezSM*@_=3lUWpPt;2YAo;|s1OC0qO=Kfi=$wc7ttaNvn5ZbHp) z*E6>W522c&cHOU$NguvJ2Z|kI<|5YFfbS5K7z+2)a zctzm!>HPcVZt*y3{`7Mp>tOk?itH?>*Ma{ErN`JDKzvygFyuNSk%q z=c(Z4#J~CF1pmD_H|z_KqPJJ`3(+o_T&A;ZGDcSTe?@~>EYj#i!0r-S%YXTwet&v} zpZ5FM_wvorbA@);Z^?O$^}-b6g;6CV#Y$oeVda_C?p z5Qq<^9r*iVJ|2nR){qB#`vPkF!&GdL0k(9Q#5l*x94;jO6aPsDqjZ|vaL|=1Qt)qr zESb75b)(ta7Y{x{vPmfuAaHZDnF9Vao(+=Gx_%%?;b}&oOg_0NAztnfmC*X zUnGkJq>e>yr1XTJ=+HwS(K#hzl0&e-LeoSP%g|md4{Jv~mtIWeXHR^A8O>g$<8)4j zyp0aECn5%&53^5`J|B5|AhDWxA4yRkNSXm3Bx@(i*R2QxLp)Q9r=Pg^SvE-W++Dx5 z@||1XjMO^xg=*MHl1y0yJ+%l{jl&gO&u|6cX~ zENM!m`vQ3({58zZBO!=GHL#Vn0ZvkUodv(3EvP8&zYxqW)i^6ji!Z0$&Q9}W2FV9x z^Edp!`OY?C06g9!I8Y}4Ydjf_M3Yqv9*&Kp$bfTa5TPNyP`%-=su}wqMX%$}>3A_F zHa&ve!N5^ZXE?`bF#$<=h!O5%2sJ*RWuF9CA4UJTNG9l@2T^7%;Hw6xq!}kesL3Fh zxkZi%0piOnRn3#n7qK`k0@AhDyZ`L}{lor;{r>xd|GO_Hx(z2;DPHPXc|AZs);{{Pq6e~QFedLfrWKtZVnW>Cj>4(qW3_`P6q|Dr%)=<;EvS#y zRd!~_|u~JBIsKa!I&WTsvSP3Ug2RuEg zrJ(m5+^uoB&DyV~H6^L;`bKk-nm2n=tZMs4b7GyJ3#ZHbn9vZEMII>qvcK0-%DtXF zc{7bYb`>~0zSOIJ{i1Q8iu%MV8BL){KRf+Z)(SADNlKKLlt%^XXmFJmNH4>M03n*_ zlnvC$(2X;)j$*q{Z8&Th`fA9*m&6yzld9;0?VpT*mR+@+Kr02cp7FQ+krJb|oZN8y zlGT19jN~s&gC>9IG-$eMGeC6+g>=;ygG4Hl>|+anDvz^C%0+z?<|>5=!R!O*{9WdO zf>vf(p07_w@m%~GTS{)-FGVBl(?uK4&>P`XJR6e22y*=Zfn*icKv3_~c<_;j-v2D% zI5O}HaGfp{*@PA^RUMkpQ}Le*<^8-mW(tHbS6;vyi6SQyY^EG>XqZ#o*bG>RpU42i*!5e40-A)U0&3Lfso9 zb&!>x8LR*f|6|^{JazNdb+%#Y-D{UNvFr+EF^|Ti_Y~d=e8RSnwrnxW)5~OZcX^)E5BRFM(V4cBr1?ok< z{hi^ExkjC%328m<5xfZiyr8s{YS)fAPn0vH(NNOI-0|u9a3CGJyx?F8rdl&3^{Fk& zH1gmwkS)^;NjVcip8!-a6KUn3nl8m%R62;h!fi`#cwOz!JQZA=?`xX>Kf~H#7yjMW>u!4VStsR|wbm_D`%(+dGg* zgw_|I#{hG8+6M1MC3R&PCW9!!7V-C(=0p>jcERZCW-u_?VW*ymFQbZbtuSo^=DYx9 z-qOY~)k+~!FMv61`9)tW3dv~0u13YPQ5_k{7YtMrgmV^uV*bhC?sqg>=)bH;*o{PE z<|8cSgOYf;wz@%5tk6Le(usH|dh3fT)YQxdy9BPGD;R+l&eRwb1$}2{A-=mnWSda) z-qr~0gn|eK8wWQ=AQ0Xp@HDx`55p!9Ex0#HQ9;TeOH&aD`Lm}~AqHM7ut=i4T^BxU z4}h?Z8qI2-hjMt?n+&9&<|+DQz+Ps?a&DU7jR7~DmN9BBhTtL)9nx%(^F0TI4-$Hy zWAG&T6oJ{hAjVHiUp=rt+xV1>pUGMlzK{O{e}Vv z@Wd3VgF94Qm_6KKHc!v4_-Hd{(kLI-<(nqhg~gs&tbp2uQI<_h0=1tDcIJErno(IP zkJ6bF7s4RP#U&SysB(wJpW}1nG~|7wx%DT}W9?}27=B`BZEB>G4Su@7yWM1=cM1xD zc0NjyX`|cjG>x0_5|THPZ-ke+qoAy+_ay=~f%vx zlaob_YaOS9Ry zQk%d;aZY?S|gTKm`}$P|cEaLFbd1 zUprOEx~FJX6@68Sp7|XQt9nG7x42lO?0kdgo{Wfob()mh6~qJ8anv=2d-?Z>xuYBe^IH zTdG^|iPg+#c~T}XL-!}zt)Bv+1(^|yUlH3|pcqNf-g1LdfQmAD#RVTLEA18W2t=H# zh#PvCY9DiGvX)A|DUlT@gVQg-eNq0Vv-&&Hj1-^4EFpy&w%>`q*xgztv7$9XV5hV~ zICr&Fob^2ZCPW96(Nihznk6H(rIMq;J-2kha&3WWX;YC~mELCN07usXN4R3P|Q@oJ%{4<_)I=-u+9&;)gbr&K%q z2dqTT^wC*=k5fWTkrq z`-fF#&p=1UDHJ%+T=o^#MuB0o#k{xO?ksEIo!q17AL0hV>(+O?L6s`dqo_r_h;x-~ zx)nY49gfFMQs@Rxf9YHBSBgg|7@$M&^0l&nvJnP~pBnk#Ln@Vca#c7hE9fOXWC3sJ z%47lW`4Uy5arvus7P;mF)VIJhyqM2Me|!-!heZ^JvkJx&wD%d?znCCn;in4)UTdks z(R-+IadOkT=B6=g_P6BfG>d0Lbl#jTrt_UJWZxx&M4-)(g7e_x7QsRJ8H-Qb?Yf7= z^SQ$p|7SXb)Qnu-0JcEK=ID(Gn|qC2U=e$ycq5B7HWHRAnEfk;MLfj+JjDN~_@AwA zcWdj}L;TM}{Er*|<4LttJq)Nk{%3PztGj{mKTo@zXPZx-{Xv}mt%v*{_ZI(Sq?w9j z8ZC7HDG7?cNf6M|tRP>J_=6{;5Z51A<9suP`j4}`%~8sFM0os?9=dIE%2M4H@*N!n z*=jGHVT?HzPYJa;$EuumL#ovI9s|O$hfrhmFdu{BaApGMH9iS7BAuXK+&lhF)4X94 zx2h3J{!Az3q4)?JEEXF9>WvcH2=8P)k#Y1oD*-*0vGc$Q>hN}dXoB*BFXme_;*(Z6 zBbL;T|MQlE&~y5X0p6e}pH_sSHCL^OqFLbgWkPbA>7xnif{)@ikP!)pT^jLY`|qd4lclBFGIxG8buM)* zcIQ`te(HtwwYd)d;YBV7uiT~T95YrNK-9qF#aS%y51c&x)ci8T@hF&=845sQF0 zej`>ufWTn9l>`pgl~%gmRN(fG$UPhsI2^DdOV83HZ~iMp+>+%MO!i39 zfSgA(14%roK6IIoF>Jt_{jvUu0wB)6lzSqv_zSMwAqg`sHF{FK zW!iiDFLyt@I_mE|?;q~I-glLyc6TM#tw#~;ZwO2rD{sKv6CI|SvOkebZg$2dO0T=C zv6Ei+HP!9pEK}ZyJS(#1(gzKrC%;o&dS-PNuGed=#B9<|hXIYEwXeP6HZ|;hXc=Sl z^q+>-)>6%xy?cm(B0o$C;I|VSqRzNYlM3g%$n(VdV6^~*d?&Y8vC+s0X=}PcF+*_^ z?uW;+7rUgihsX?1C8E&1(q`0{XR4ZSLw45m*^}f~CC9}s7I&C2ojOC8TTJ1n0-i~| z?jO*l7{Qg%@uWc%ePc?g@xntN9Z(o2248DHL^SDs!5z{dKuf@y&cdhAIU%|Lly#Nq zv8?N>7wpzvHezkh#^Rz`S8lJ*4+w~T&mjE8Z_N4j%pc3mxISbCSfH5dI|Ay3I`A%g zZp7#Wf_)2;C?9v0*2DqF5JQzJ)Fa(3;Zn`fRW~gCVc-o8%Z5chZrPLq1y?hc> z^v7P*UDm?Nj$vr> zVF=4;1}ZH{a4SQwQCT*sp=K$J9?s2upPR}d5IA6eoP$2)j6)y{wXQj)m;qpN1A-vh zQqPSEK0Xhw>*cPgY8W4v>w$^qdshOYc8=7uEEL|Wfmdod;6QD~`|=R?w;bIL3JTN;aWVh}+oVrltJYafatZhFQIysED62%0)sj*|Z&0Pu@kQYkp! zVSFvhPT6D*;su`aGv%2-8DyZ*&BM-XvLOQ#6x{D=I!5yUYvh^g=`6KojhkNpmQZm&u zkz0x`t$?^YBDbZBbn3LTUiqk!$6&;}J5FXFA@z^XC`14z|BgO=NNQ(KhxSb+Jpzb~ z{E$PI9J%Bs-$N}7?e(p{>FNK@ZiwI{%5M&TsE{*pCPSu$IR9;w&Xdi0v;Ah8kZnF5 zkw_Q193MqTp>q~BlJrvM)3Y+<_eec2dUL_!CccUMp z#zyqxk8&7H39|p03i_n+C<3-5J3A}Oa>2;6Cl`}R2uWtAA=mK3@W~HD40IEef!u|n zAKDvdIoWH1vc3FI_VdT6+cLq?(#zsR0>0Kp@)PxWGe|Mgi6;Y2SOpQfm52<}t;A^_ zz8EvVmTSs`!u~1(|Efo>!m+E&oK%$_|%6+F~8vbM9lv z21LoxqKm|kQ>!d6u&~kvl`Mo7ASCbP^At1t&=a5>%Apg2L+fsCVy(C`6|mKkII@;F zCIORtgfEf`4mV-F6-}aIibkpNWw>Bu=gOL^CCY_6(Jt6G!V7fT=G7o1T80Y568cNb z^`VrG@4iE>)NO%RFhXX(O{J2Wf@mqz)*|H@Qh zvI;f^=VjS!m`>ukifmNbjj#@a8?+_UXRHzz{A->~0)+x856oSZ4ZMZ0;rouq^~d1) zstOhg51N&gxo@+Qr#S+D%3pcn{pH8SSjx)bz*f2E*Mx2=#P+Z0`NjH3EPZ5QVnyWB zEc^?Jx`|cS3Y!+^j=Z`IfPIyoC4(!$>GBNV%cz0b#GXJFnkIio8OkUD9n7ln{@~|> z!=oqSpQHVEuhnO|bT!1!sv;-rDic_gUo3PU)YZc`m(^h^4b3D09D&m>G_Vk<*e;vs zYUp|pm2A_%Q?n{ep${{2q_?B;Dl?^Frngi|DI8pB!%kOv?CXY30k1+?Ch3;hGXPrn z$OIC7X+CkQIO(`p1x|~`1mZMhfUFe@Nu(^$LKWClO^nkPXv3t$Xj43qCxwh(-=x^o z`(j*$94W@c<0fq_jF8PzK?fl!68@e0p<#YBPiEu7!XFXAk1o<&h2%?00%<5=wd4ev z`vBeNb{za-9!g&wEKvGzi$2ze=|$a+Ly(+Gddov>Le+4J@iazR9KmRYVoudpuG_v8VvCQ!%*AoH;Md_3a07mN6!?{in=ZEeiK z+oobg7lFa?CE7H2Ps=bwIvvweEi;175N@?-Mt=^Vjju3>J=ipxHad3pd#|%PWQN9? zWgaj4LjG@alM7lO#svjLGI(0L@oeGb%%v`uZUYfZGi{rC#h-j1D`thyL+SxA3JZ z;i^AYkJOjfh1?#;2Zww6|LnipfB)gt(Mj|LM56X^F`niPW1E}Tf;3?cOE=@N)VUh$ zSoH!D+@f&kze0m=7Lbsq*jGTpzEn8t=Y^fSSv*{f#kyx$YoO#Rkvud@jkYS@pM;W= zs{Sz4Nhl%tq#V^Dyhh?zGtMH1EIZER%c{T}PzyP@Y(e45u0YT(P3Re7GN^0L`P|@N zB%HMD6stUXGnhdZKWjygS*Cd$%=hZyVnWk^j+V`XF|VPMi3@T4J^s;jE84ko7%&9! zr8v$YJV>)9JcJCf%_9*#MYgh- zjo_ZM_Lof3Nboes#$>?v85oT`NS2i9&4;gYo206D?{_M9l9pl1441ZC;Y}7+3&o7e z9Af6cR@s=6#0Q>9_$z|@M^{_X@~I=_lx%^mR^m?kDB2x;im!52M#QdjVpTxN#T~Vf zoL1B*a;x!?Da*H88ruG4q1LBM7$kk}5H( z9EA0nr&WMP;JWsrj-M4MJs}1`auVWKE+GNZpiY1NmBl7PCFBcJw>TH z9Abrmx^v!EC|4kB6nyV>1=Kf*Bg=&g2`G?!Z7~69rV<=;$LD<5Gf9^9rsYXtq;-_NG6kmH)L#zHtH!h&>V4>0;4{6) zt;+1a8(k_AsKTPhTu3f3L=?W9KsH>cRg%XB^B&2eQmW_gG#|RY4m{wdI z%WYh1!IV~{(qx!RP19ZYVz7oiUZ)XH?#%=5yYTgQ{Xd}seE#It&;PS$&%)=wgZ`hJ+nuLd-R@Iy{=3gMyMKtb zAI|@S|Nq1O-=F{g=7ymA5B0wu=s*5%Kc27^|7LFg75)F8Zf-uK`d`~mH=cGnU2*=m zH@jO8{{Q#n|9=EKj+%OUjE6}Kno03*dVX=5&6+ILP(HN39=`tv z2sUfAHRYL~jHjS;SPQ!6^P9yaodc4!$DniM_frt#jNYbK@CiJ46utkLPBG{b5tM{O zA@Cykvm2jPy)|c`WfOi)|5aD`v|oFP{cRp0Z^bKCoz7L&EnaWjvSYz zy4GytjKqrek!+%p3JN&Kc{CqShpD+<`0pxBMnm%2ruWV0NmOTwG(26th#c!d^)GRx z+_#pWlLQ*5Q~ho-ZO`J#FdMh?BpEigHkxaOR{(?LL3CADxZjb4+0hdLyWOHkgmD?3vi74Yk}(Z6$K1 z7bY-g!8v0UVX7C9VI0psV$<}jVW6*QFd2T+eSyq=CS_FH(sJl#%b*p$EfXEVhDqAS33a~Z zuF=@m0@VuK)iupQLc1v_ZQ!HtV=r^TrVTE>ZX2l&4EF>FpL8tdHj}&hd)r7sx^X#l zkz;EX2D=EQ;;_J{sP7ov@3Rj`aT2H!a|$_e>yg&)PfOpeq^4EeibWn?iVypy|1i{SR{ zz|axRg=8M)kYp^R%OAnkonoFcjxxRS#q{k7Z>}u|n^-yqk?J~Vp_`uj0y)7`hrABEk z7vi=%#hH_rTVU!NcQW;DsVGyE-)L5pK)<`SHe^B39ya*+BrIpi_urJ1T|(GdU#YEcRcC}of={zdJW>OEr}XYf;K@A0GK6Kb zHw+-~Lc+y^VqHN|o<04koTA_myzdybms>%SXg%oF)#&x6qb^=XOuI;rtf_umNj+Z7 zGYE=+5Wl5_2IWrLNN6XZ0jh@4RYV=+#L5riX*@`Q2%$G`k_;)l;X5NmkD@o1$!wMm zf#tz4M?G`UtrCk1>vAs~%)}v>qA_MuvF;3gJR!4XL#bf{|)&G)SH<~*b$zE?2TaA-> z3`o)Y!l_d*jb42xI;)#q)LSyOO8=wSJn>$=u3tOdHa+uit39aB{j}#9o?~Bss(dN_ zD%!B=qk@f?NC2gp?gMUs?@@rope}s(fO&e!Lq#1~GHM`CR!sq5&PXDc(SOTlJYFjl zYcN_!n(vXHBv1^N5+rL?fJ+`SbNyo9@nDi4ONo7<+wrMt0WcIAv1Z0w6c)APwPo|v z#MP&>ZOE+V_!ucR5aEtb=iu<&eG><*MILufG=i=<8IUxLSU6#uRm16YsU?QHYexTJ zSz1(9s(i=w3-Fm>EM8OsgUMv! z{*+N{Z&kcL-q}8BQLt#&xk8pS+}@E5HwveMC5gU$0PvUW%%|$I8WqpL9%4=e>4;$1 z$9drlvnE)L$hbx^vNW!DsZ!4(nhmu-*2tA>OeCoUT~Umo)kz+PrqQ zPI2fh=EDV}o(O>fy%uehHCmn6(|s-hx{URrO=}e9z8dF6*i;#hx=SdktVUh#{j-Fe z^38IFiFPo!eV#?9v&B4F7vLQvq&flOx|n!eo$m$1zV@nqi3=H~Ir6DuEpC_kZswdG zUPBr4dD@;HHB3Os?Pz|l-0hz|PKjY&)Kr@g0Coq1#Vj6Nxo+kJotWQZx-Y@}GPik| zo7nVks5rS_t*OA{OF^vw;(dg;kQ_i2T2Y6JQX%ccZD^=$z?C9+)Zf=pd z_%uHr*D7(gAeRf^Q=ZIMRSw;Pb_iA(oni_eRP2=Mgs}U53JRa}yU_D|H8EdC#UZnT zNpA}t2?1KJZvdlpp%vS=z*Bt?4=y152eV3v!w}&EzZfP@Dui8< z^KQp734F+zsS;bxJRjo%S8G9zn>v|E728$7p2PT7! zi|iBReY*1DViZ$RKV#+AE+Tm~Jdb0zp&{Vsr_K-HbqhX9f`)SmeLISNJbiBC{ox&m`DD7#|HCozY z!k_k{hJD{r+??nQS>pD#dG#&Q$^0jyGu6kjcnLpF(|O*wgsTw;Zx9u^MB-=)*}CAYQogaa zpj&OgX_gCN<1$%ks7~>G0K3ZQQSPd2)7OgLP)uwb8h8uwbm6^~B&n};6ZX`<=Gc|H zJk!v(9e!tR6<$Jju~X`Dp=-|Gv0K`tvI|we?YAQ1rn~zsB$!$oDaWtAm44q+>g86p zNqe5M3feqaC_Ma&pQP2}9|=+_aIaGoZ)up{YaC>GfKpk6BV6z>o-0BCciM>>#n46< zvH8f{K3+Ry=x!1FUdhbY;WzpI5KzF2Z*~@&Dj$i^az)iqnz!<65ql-n3t<(Rq;3PN z$gU`T3;?l|K@nD(Ci%Y;wIod1EJj%|EHowD**x)CZ7hVLUE-$IZ8acxnDg5gGOk1F zOxlfecC^ziAWe+Rpyx5|qjDU-cb7)bH2R~7q7IsdQS^dHWnUt5KbsAcnarxbT*NW; zY1kBQ@2dg@zE@7GIv#qA3s$JW#|acGa~H$5{Fum-H#FtHlUbHG#2{ic&{*`8#)7K1 zj3WHPaNzD%X}IUP^e;D-wfx`W8?1yEapnNkP1o#avqC?c$2;4eeoR;I%Ps0Bnk=vyRGpQxCD_nti=V}u2}Yk^mo=d}*nX$E6Dl*0H_Mr@GGF+8WjeS`{p~$u zJyNNx+M&q3;BfZ?I@ongY+yNNNHLy-UoaC@<kG2N4u{ebs^fB>wY{C1{=Lu&6D0Sm*CHa07cn%n(=MzYzsn zL6fa}9udeu$|m5n^2Jc z02uG;Fy|1fsz~rcaKG%f!67f6;?26=brT(Zz>SawjN<^8@#i1)+XBoLfAKq@N%%c?}p2+vgGp=JqeuC zce&wJZJ1FE7(*kQyZ&1wZ|_bfh&eA~^mPilaj`3k#@;h(paQ7>Wqr23MRsP)l(?$D z4}`xlUARnRxj{FiO74EQN)u_gUu!x<<<#)xA|UEzX`OXSZfDFYDWGrFcgj+c-32t4 z%j4BxN)xS@xmgvUYL8vTF>5(&`{HH17|p?bs-jiFf-yHo{~GWtgwatCG8B+a>!+w;O{57kfm{)S#v(1^#lWVt%uZ5uw?HbK^?3LzxNOlP zzjDv-WviuEoF0Egw>VlFG>ph0EW)^+TiU_Zea{ZedEtTzUTD>Jffrc!xNV9%3yRQ# zlODLa-2`~D0pJ<*w{GGW(hmaOL&ix*MHm}=JvC#O{o9*w6pP0|Lq&pe|vlIj`v%Z zs>j0BRI;t#KU;u!-S5jAUR=}J95dwQHCeMQzsglHIB{9$)K>h7B@#GC;^eAk*LdS0 zJZyzTK-m_Q7V_EBMr{*?)#!SawHhlZ_7>yu)w?90iluPvb*u~52klkA->8}O(p&1+ zk1a#Dg2n4$GO}R!MyI3SL!EmpZNCG{=ipTRdY*iq?}$!j;!VA-pI_j;obL!$XD~k& z?_2QEN%TJy+-i2?sU43c#*LYw>Sx|Co;2-m`)ZovQ}ws~4NMjN4L;B97d%(-PW~lu z>dUn*vko2Vs__C0JN}yJdSGjRUX0RYGWYBbF}XjsBGBIS7qbyT(uPq*P)2YYW(%&`CE z7}I4F3;nr;0`qT+WFZ9!*>J}`F(J6b6e9+YWPqSP)D~K zNuE9NoL?I@a{MoKIYxa)4cCh5FphWv|6DiSL{BczEa?qJzi1cb4nYY~`fJn;gvDG6 zlZ@avR3OajO+WT)m`$^3qhmk{F+d+655N_UfQO7YJ&tA+i(OvK=hM&q0nRONj59$9 zudwpwj+AC!W)A@t>c@zu}JEo zabKsoKw0$4#An9ahMZ1M{-Gw(1Kq(Go;nLRO_t>MJoUDI(>#)T>V_-&ESAYGeyDh6w~1N$(hWh0me zJ8BO0nk#{1T$9U2{oVe%-NV1_zw3+Jy8pL*5cc7q2_~Y4wc~|u z&4)}+LAXG+3r7Cq`tD+Wk}PG@;PPen}{?2>HEXkz&uOa)YXbz>=fVJPC{f~sb`Zqu!; z@Z7o}8`cG;xUJroLduysyW)$h@wo63{`*o#{hD~+X~EZ{@Z_@lgyutsU>aY6n+GU8-ImCVoS2>S$H#T74J^;r z?(V^p)AM9qE_3~IV|~0?_claV2X6SA|C3=hSYY~e`s^4WM4<7{1>7e1oNstPplhaY z_A8w;OGqi$we!Bf$?eadzcC5@C~0R^>C}=p1>0vWjc(?eZwB#vP87iT7-a&;@jXhC zshH6y9-j{5=*NCMI~P~{k3aT5eu6)Dh@mMZFpwahq1)@OPD*SFVu+BvZAhpA;$W|y zWz)RgGD%7u%`cFc9N%3xn_o?(Oro@${ye)rG3dxm=)?+?;n_*yIyFyslg>&A_ZO3x zuh3-JXLja0IRK^1Ll})#z@n{AS7MO~hk7&2>b+Y`;C$H3p555gFf83;V_(?*j{C!^ zFfq-E5HS{md9JqXJKP>qLDt}@_v&IDbqh5O3z_`7Wo~UHw(!AF@L>OYu>bw5*#Ca| z)6>nLo<4i9|2^3Mh&RdeY>=Wf-=EKt`MYHn{MR`4D`LcXi7n(eOF612IT6 zdxsqRo@&fVO9jIN$~8>bu~r*r7*zf0{adXh7elb4?{XU?d?laeSCc`Stp%))t!OVD z%v;f`6!IqEHwys;^1p=)yz*1%=CydxieAU(DgQa8a-nOs2~*8ci-{Lkr?Yg(VH<4_ zUi!p9E@nFO@e3gs{52I^m-y!DV4jRy(K`qeTPs>Ou^abP%6`~c*uM$vBM4)`Q~p`p z6@CHV9EySYr@#=wz4L>n>DUbbd6iFJ$CLOxnQ3osc#a!6rZ>fek!yZ4p5vl+K*0lo zmyd8?xNaCkv1k;r+|!iFr+*=V+6UdZV7vE}rQj7Z;NmBm$G%)U&C`iMPKeoHvs&eb z*Aqy68ciqYN%L0`eNSu`UUguso@Ln_nJD-H$1gEFvhk>ma!3ts-^Dr%@=^2- zdXNW&H(5NwBdVRQ^{K6o<8)Fta`96ZneMjH=L?*O(<{v`-0wv)nu-mudfvd?G)B+w z1^!Lxl&U-4Uaa@JIF^0xbh|s-Oq=E1`o79byPQIb|-p7vTIFasZ|ph+DlV2G&3}Zc{pbcx7=L$*`}EZclHS z^9h`;~u+gzOyQ362^BcRT=WK$E{s z1lDHdLriOeZ%&`Vyhb0fv-vH8&cP8ED*6v5vwr2em>8pIn;$w?N+6>7vjb;DeDUqi8P7CqBsV>y9>O|D%93w zmY;_k`^l}@_(iz!z;3)L8gJT-_ri^bcH=G4cuO@_dY2m!b?|YkpWKkJi#N~fCpRVT z;?0Yytp~o=dsSNx|9|%0uDOvUNf6UB&G;X3v+az)42S|ivRF+uX1BVE)l)O|(O0aV z*)2O*GTVKUne?IunaOmfhdYx@wx);uC$hcOz3F~} z!rdbxBO)U*6ClW9^`LF0NCNTc?&0C?;U5O4N8SJ@fboV?g)(w9=QtAsDBQqxt-z4( zD~0DF&V|MMd&k_KM9VLdBxZsba(cQ%9=E)2TkK#raMlbQUmzi9Q*?W3BspV%pDZ9T zUgJt+2*IA-UaiZACnClh0g#3q#JC9D_s1t~3PC=YKT-~8X)>&CGOBJe_C*pkwOMWE zO?j4(P@FnD5i3OQ#|bKpp5!H*-<~Incv+$ORl+-i=Vx@WNJZMuFXI3>gdwuq=Mtk^ zL+jKb%ua51M09CSQ-fiUMWZ6#;8Xn(3oEYym~iAAQBrl8<3%l*lIY4iXmA$qD=IEe z@PKeLi3k4wJfGTJ$wtJFGGzY9KU7L5f91WsWHM0}O|cvuy|=ODO}uOr#%#a@h%g0V z(3mRAv$?}NzZ?(h-Vwp#+8bf}q7CsqQ0#Clvh<3Ta5ko@T0xiZSFn^`W(}|ArUbY$4k_U_Y8IEwv3Tu6J)1CrKcsM?LL)00nh1T8G}-8nF64 zhi{9hqTj(6$4d$;fHTrR^|7OW*35_LIfm(pVS1Kf;0N5>(+H&|9LGOXh?nuE1q0(X zRK5XaLh^O^Lr^0XEzHOWZBULv!}`S_B4KbR&8ikXWlc}z;JWcqS^#PyezDf7#2XlUEe(7GYFUaf7Y zTY4XZ*RAdKgUZm;m)?wBWN9>yeqs~TxM(SXI5f5zC@!8t!nKK#7pvsv<@Yy%z)1fm zjY5!Mag^nP7XxO%?_((M0|!vKS?XPS`0N01y2E?VeU;=FiON2(KpM0Y4A>5W15h)< znF4}(S1WY76%bS~S^G68FTn#R2zqwTN%a9nNqR>-6rbwwNQ)GaFgKQBhcG%Q=M11# zOlnAM%QJzTMrBSwhOynb;VC`H~6RM#Z_!dZ3>kxPl?)5)mKY4L2Y{^t zDbm+e6jG6QhSx$KhF%t>QfqD8P0%IFSsDQ1G)G46jMrF}UUK2LyBsN2-9boPtu?N3 zG|3!GM8Ctb_3;Y=jK;UOz7Ij_SReZ)g4C>KOWda=?qhWx+ZvT@(lri_AgyOei&Vvo zBQnE7Aib{UA?5$o&XqyoNOCQ1>ZM5TqIfRGvc0?Tui*Ja6FIwn#Zuv_Mf6Gzyy}X< z-3RHUYUw@Ww9k_PctoLIRXFRTB>oVl8Mhf}Czxr&&<`WK^3(nWonAEL@3bEs+?bHDp#Z;$bPYAcuML5N3Wo9~7Rr?0VURWYw4G_x>1>ORZh2b-a!GRu zNdK&8&D5JJkmZroC5`{*UrXgENVUUb?;fp9j6}LyZ61>+E+9 z_u%{=93E`q|L){J9y~TlE-si7D0rc1F8D%gvhhjo!a1pjICdn|E z=&8-W!_R05m`~i*86R|}@IaHQKzoUwxH_fb1(W!oQ~~vP!~Z^k8Y{)5evRgqtaXCV zZ4I9Fg368X(x9%wCDBC;=`f&jRlJxDFk1^bq%@^@kqx2LFF~!2zNFeF(1hFBl#HId zF&~&`3JE`sm|W~Nz7NwWBxU3I5?kKu6Z}QV*8$ZI)QlWtPZ>yq%ixmfixW)w1&?My zG=ZqfMrAIJQ7~iF-csOogu63RaLm%=M+Tw+-+$g3s<-ejDUDJ?Ys$XO(vjb6Pv5ic zH>esb_wb>5PJ`qX=($sBN<%EsZ&5Aiq8|w3d9@IFJU_1 z4K$FooS+48sj%e;=L{uBjXG^xA%_P=KGS}B{Os`$FWwHGemi*e_~o%Wps1lx=+w|> zY~>S<`}*ADW??`bO87NZ=)h93ea}Yx@Dq~ofDuFK!A>yiKnrV-H3L!xa{nhr9-4)Z zn=KmX+eT5ZeTI!jK08k(HH5mii^;?s``cjrihW#XT%V2cu0zy*?dQX$;Us_x7?z>m z@ULnOFUEP0Pr>7*;&C)ahn~K7DxuVpdunOR;Ay#-#-C)lZ4FY{=1*nt=rT@OV=AA> z8sePUY!c1!^9BG>#TwKrOD0f*x|t^=P3D)3-mwEjSw1=PVcLB3_9VFipEd6<*taA0 zZ8*t7&wqO4=P6I+=7fiWW;fCR`Xo)V>}gsPp6CA0%KARNdxw9$dxv+NM4*!B3oeoK z;C;wN0v=kMho#b#lra|HlJXSZa9Tok(?UWv)_RyE6On?sSx8@Lb(;PWR^;RoF4cx+3NWE_2oAdr+CJdlpO3Dx6e-qiH&_t zh-`?E5^tW&86uFF`e5W&^UiVO-f5P5&t~w@$XCL%FRHvzOTZ{O8}#82lG)gmHj+iz z+LbX8F@|y~%^?)QY!qapU>r(nn1e^p#We%h0s>JQ6@r$uu4}_E&I0|(w9cP&Ju$S0 zVX}}?G7zNz^DK>4RRoWqJ(FV{g@Me~LSZ;z(`L6Wj8w>bc1f|hG|%89c~h3ag&!`_ z=<|R`uqE7BvCJdKV}FN9cNgJ!$K$l3B>F?1nG8|f3c0@KQ*(pje5}2Qj?eV{ksf?N zAGUci(hraxTHhyOSHGPwU230aO~t(T_{w~G))J?beg{(MD`e4V8mT4~erZs+99)52 zD~Ao=ta6#DyP+dW0R-bg0%`o34j5DUw%Rb}RrZ`Prtb|H(}!Okj74cX>u#2SF=hIx z4P%}d$O&Wm-heTE_~pTv)@{O=?`CN_=D97GC1>hYLe;5B(dm~;&3vx~qE43qHddcp z7S?JfH>k%ZsK;L*sMW2SkRI27v?wsjA?r2bBo}Y8SvZQ$Bk)n!)hd+|f^W*f`G_VK z=+iF)6;1ov?O=C`s-oDa0Q}0Ls){3d-=hB@ul^}!*odlMj;Q+Fz@6~O6-Re%=kXUM zQJH6IDcNfC9=q}X*!X{ZF8_~%N8P=zd!3E{$HxDIq@_tT&{Np1<^)pV|It09{Lekc z2o8JuaQ+`2vgRBAkGskLoY#MS^5VHi2=|118M8Y)r8xP<+lNh&yVuBkEV3+zX`3hc zm6GWsdb5b*FfF7rX4KUlCDUmTkF&gd&P>fN3@c-6`YkSr#1+FO8&<5Y5q6>PV46qu zQ&+=#BF{sd;yP_vh{s3&ls;Ut1(ypiUys)a zKZG&meWG`LdaKWr&>#}_roz$P4?hN@Ii-U6=_e2qh53j5_5oc^{4*u&kD+VI^M)uR z8OfwbjD-hzf;-z)@;+=ZVgaXr`F4hW^YD|v#D0SxXH9)D865;3Bi@)v)fm=J`)lD_ z6#q~FZ(7J+CEiD#D=lQI5n3(e)|oBlf(CSrVyeXV!7@}TT5+pF?)KIFia^Df{UW1HzUxkvPFDG73fT1;%Po-TkN@LGM{fN== zVmigFEQ$BG>@sUs0@lE5HJEd1mb-vII_-m2r@hDi-&5(ksV@2?f9+(rzv9dHu7Whi z^!F}&h-sD}FbSEfG-2R~7taKOZ|Q(z0zVD2xw**J-J*;d0e8r|Iv zgVwM$TJ!!2XDo&e6JA-f@_dPDAd{!)8ICDS(fl&J_QpxPGl#sYevL3?!t48SWIcV9-h6b*Cvvi8|PeOr(+|7n0G}LIwiK0eJE_Bpr`R2_t?|cD1sWndKGy;(` z9B>JCK!Q;vs(3e#cLR?{;atd*?1D&_Wf4Q!GRWfNt_5397r|ur+1nFuF=K<;WdXBK zHo6R_fhWQv_iI2<*8U>^62bF&bv0-ndrLSofyo%Ns7>3I$(2py%lP8k4Q% zz+^p1?-EJ*ZYH=Bb37{*6>)otVuoJ&RT6xexR|Fm03Z?2$xjd}?@q3l13W>R_t)T# z;peN}Iz>p)?Qqsy%f`83OWXM{nP1l3Gq*B`O+!~sfRx;sEp=V{=9k@?Hcl5JZCucC z-Az+p^#nj>FW?j2IpYEBXLDVf7RG{=H?2hVgCUbFOxYk?;01fWm`tt<5&po!sPM8h zNx{1w8@+qJ>n9SAMqwpA_I5l5WV^Q^|mvl zxU3?ltM(@LNF(r%VoOaXp%P6De_E<@!UGiFqh3N2Lik{JAldOeHZ!*7Ieyp@Mbhv- z#%wB43=Vs9{TuzJHoilp(fEw~bEv>(RqV6w>4iN_4e@RAJH7>7#vx3rTlA>>kVLdu zpA&gOoWvn{A=xxQB|Q>8{(;M!E(mhDMxD0-t(qWAQkf59Mh5a}1mP<*ZB}Pq*@qjt ztJlCZeIQ34(*=n?%De3MuzS_wx95&rbb-x_1l6fdo@a zA_p^cA#xz$U9!kLOG#LcoKtbGTI|rY3Me_d3HT{gP8;6}p0mQH=L;y!xZJL>k9Cg? zAB&QGrgRpF;z{)*0jzd zuNAV_vJ6-B(;@g*iYzB%yQQ4*lzn+rOjL+*)%wTC9Wk(4eaGc=CWg}SgI)Ujz2?n0 zbZtZs13wejyDm|E4>BMKRW^Yt7ZA(kC~C)zP}rW^-bTSJyUuJF!Y+75sh9V=lh?0w z6y?t64c{i<3ZYnWI_IAlsAH=DE8dvveM)&i&54_LZY#U)dy`-~90%UVBXblH{{OM5 zB3>7(8+>y-u?QigZ(Qr23IoVx!WivI5{$EkG5cx9bOGW>=U@ALW`=etft9qTmBDpd zy!WlRM#JHfn8rT8Y&=s%EZS+`KS`$H{1Uc)1sRahZNI zP)RY7*WPs}Q)8q9ESbgI-xBJryJ?)7KrQ0m8pPw;IUkqm5yN#|WA_5GbPiY_+vAdv zU>Q@(r~y)xQ<)UqfNDaDl*L~kv@j!CI-8ToA^!5s)X2^ehx&+*)DeRi^U_!cJyQw& zmQl%n-VQ!Ti)@}u2lHq?3717F_4Tl?6SXeT>wrIKeg=rPXz6|b32o#p{N=9>XA3JL zt6&)u;#qC#b03lKrY`ppGw8X^Zw1PHW$WfWB0Ilq_Q5rF!TYMqEN>lW@h>4MQp!Joe^S(I`CffIKKT6aCCu z`3C~dbFkxD)p!f-!QkwpMLgF^gE}dM0po^50xp17I{91}ICU63>Oo6Aq+rs1jca|L zN7Cj8RrwHsONSP0lia{!J)@e!Buw>n9wAh951z1#Mo{?zplOC?e2uOr+DjIFePDc{ z8w1e$9;#}a{ioqP0F|Ksvk&Fak-vY^>+I!!NH+`P$$~$NX9%IvZ2rm5#&i@l||FrS{ z*8RU18Lzd%v$;iKwt_2ox&QZJ@9>~!)PFiWeAwIgf8UY+_YZM2S0wu!>t2p^h_Hev zNIkdCESiNN0JpccNSz-gOawdH>bAY#KK=IO$>UciB6s4h_k9{ogY;T_qs+6_YkP&} z&zRBzLAGA8`Ci+5mAsxQjbA0Z$;>+!T`<#uvX4b9d3didkhI;jWrk zIXr~BI|^jq23c5y2-PddKd1DZ=BEby=xY@>^vUQl1f{gVPOb+MFVyfNoXhW8Tadc- zD<%%E@xLvee)kzgH)m@fzfheEqY$$I3Krz`>>;fzP=xllc_x$0@_=gUB@UZ(GhknM2oqEI%~sh!re?D8KqGyiLt~&}kk%Y%tPk{X z<$*%uQMoYCPv$^p4veE2CY%n#G1R+~CE0T1;_1&DAX*Qkrw8SrrKXwMYQJ4n;uI?r)?s;R)I zOkQbu9sbW*3ufWV?64JcxOk8CPDuJp)+u)CmURMX+Ggr0oxU|Y;Ur%dJMEQq5_{?z zo%UHL1NOO5I4A2dIEK{9bBlk1zLD?HMa8-=Pooh#s*BMi8izsb=Nr7G%fej$EnFH( zzC#!Al`uV{iIN!tL2r12a|~-yR0Zpq#Hor%CE_Oc{FVv3O{_Le`wlg2n@Gl(sFJoE z>1YRXHA5mm)6yu(zy#}PDCZ3)WXvCgvn-l`+alO1JC1Hl?n|x)bj-xsg?F%{%PbAy zSr(>1!=msmHL$@Q8_JQvJs56)3*B68p0a~h;d^n?_FAT;+j`paH~$*YO%pdOL!RTEjheFL2$+qpTt3$scN7m=g0u<~!bo`z+5VItO@ z28{n_jXwY_@XD!E&=>lCVVEiV9o`Ur!;X=!EX5|*Hwq-uZIPq3Xp%pZno9- zCq_`V)~&TQLERnat`OHtY)B>t#g41&hp0DmuJg-dKG4lk(G zCGWbzmfXx%L0G49k8Yj4N@T_ZiRXrnDthaoY`a$C_9P_d7uC+&eh(=r`Eu0}M~XiJvY=xZBS#0jwn zub*nIy> zfQe_W;QyZ|taK<^HwURyyH3xV=x0!XVcr|UM7GSA{|Oe$4B17VQHjP6VJ1lvd=WCX z@^HL7NxCS|KBYpCkP1`SPiS%ownjC7by_@Ss3w}h{8Qks(*Wu+D6n~^crIuQRDti< zf|#xzT^qd(@@6VH*x^h7FwAA6FdmEgUeZAV_n5Y+5jtv7o-J*TvqHo#@7m|{H0T2e z>B9oAYU%STnnCJ!ofIe1m4g$-+&^Ur3@?u)D5(R-q!3hU3}iVh=Me;qg?V;_ID$&( zAdg%o<6C6f%ESz0%G4fBy*{9+Gp)p}tEl?1a=7WhhvSw>S`5-XX8CMx?CUopb`_ zW+rnL(YYZ5i7s=scveZ_w88_333bCC4~w!>9mzTVEC_TaoNrmg!@=;MD4w64>1K)ijx<-by4)6J2qFjb{-U98`dB zt0X@O8S*YkAQzz^!ErQ#Vhyty8%hQBZIZzR>)GSPeatJeEaLM4+Kbs(5$cP?rcFk( zx#hcNZF#?lqTA8{>cC!=^V@8K%U~8ZIxTOnX%%Nmh5p=jgyub3?#g?nQAA4Dx_h?! z1y??PqHm(Y$Mg58e*OY=YP>a_HJ1XlO5vxp&a)b-Y1f4VCaxeFIxak%hga<1`6XM7 z)ik*ZR&Y!yX~Eb9+huKBedWn({W8dG??6m|^ST13;?^lXd~J~K)ShP1=zTW0j4m#* z5kTQiZw#k`h70rf!nhI&2?%Z0p|xBhZY`xSreo2zM~}ip$|HqB=*MIXQ^ykz_`{H# zst6Lh2#GP=U=jFI(Ja~}$A+=!;73Lf&NFs(;lScfWU}f!h+@oOc8Mh?)c%IlI~#yx z!j3`s(Wowqhgw+44EvY?auC+jHl-Rwg$jSyMk`8Ml~*_~)FRecCY5QAQ_`BDuktiH zQ>+`2GTxbWyj*qU-PqQq!N-O^Pt)3W$aI?b_0BD6u7lzAXE?Lw34szqZhO%$>ZIPa z%}X`?FmOVw2IR`QOKj^#AU6B%JlTb+Cb_Dtp|}QCRImr*6YG#UZy& z+tj@@O>HbwcW0Tpw}z>WUCN4}J1|RqZmZOvjZx~}*rYI3NBEH(y}_ra#$9VAJw60@ zD&=L_ZspEh{{^~)>qhD2-h$m@-n)7^^UnRBqfhSW2BJ69n_*A!U2u`0;(mXsc-0eW zc8Mx$xt*x=Xpz$Yh^VJMx0vRVGQyZ&B|=h{SLOUNO2>-)o`pan^RS$-!Aa5}+hD;o zvQaW+dPb;Ht9g|fSA~UsflgtxT^fy7s>i}95nasqOBxLqn)`gA79hIbF)0@YjIj76 zEN+a0PuZ}L=;Qhp>ES@3hgB}o0$T03@H%TVaDGYl&08><{PL`u8~fyaw@==G^W+zJ z(C)xEdHc~*Mzru9g;5!^mc~;VZEc>`7|I+c$!tJ=*?9=F%TW;;=W-p?PPI97OWWXk zOp4D?T_b9`;ltp5;X+N`yP}m~XkKy4x-$_#pxMJLn-cL4a$k_mE_VP25&0(D%HtcX zJEO=dS6$d*1_P=o&*nGwtj3!oQtPTDYRAS7^xVk}P^yol%a2kUfUsDYw#3ksb60h; zX4&>oo|ORtqLjgE8-}rz4F|b2Uu(P2eZ}qCvlWzX9Q96-%7NE<9@pKso>OARb6Cio zDVZejdc{Omew^Wwt{I6n&~LH`I(>!;?CJ=Y?v5X;d++%}(& zF*P75J?Whb8)6<48ZEhbenphq7H@T3S! z^7PZzBy|>b9ivTU1IPmqDtGbC!|P1^mpxJ|+4RuO36gZP8K9J0>1`g7;YW+SGbsK{ zHOW36k8|6CGRQ5*ODTBZW#1}7rMYXrFk%|=Zx#SuzHS)a<~Nf&QZ&@oE$w~2saaRE zdb`$~DNDDyl#gWnZCYR5;euvUEk+oEKVHOR*6+Gtv#-U(loQ2=^u&;JPb809)+IzH zhSMs;eF(wI!iO~81&p`NiaLFkht99PT`%%}1uxazvgCzDhpYT4w5wd^XL<^M+rrt5 z`a`uhPnbY)sl0&MAK3NeaPX?`{aRVPe0{g9kKI;gE;#Gc!irY9njzoT_uo>-U$?)n z*C>$S*vqL}br*o!QceHR@`ej}fj!_kWjRV{q-C6m^NrkEf>Q8g4cVsd#L>1eo8^_H z(1y|Ux%g~%7=S2k_>Gr3-palr_h9j*fpZ2qGf#JC%YeF!9AJ#MWjNIuUt0j8ZjAn~c7u@|V?f z@swtvI~A~KL3fEcoA|cIRAiXfeWwm=6G8sMI`b5=!W39U^((W5F%(~#A$c#GhKaN{ zd7PEYThGI@{tzU_cI{I)!pY`}9j*Y0HFHL%z*GVktNpDoh09|bFP02FZm4ved0{Bd zHX6Gvb*80JC2t*GtDLn;es@W&O!^!rQ~W_r>H2zFUbpwi?hsnsjny^O7|Q5E_yNlODe(a?K<=fUR~*zg ziGE-?4=H$qf$Q~|B~+k+HbtFjlK}!cTPH7t0GmbyLV-+^bPRDIL-Q2hj8e(V3&$om zul%}i`01+ExofxSEP{6*{PkMeSpRSaTEf|k8<#G~b z@NR-+Q`sXuPy$3u@x@@*u=!Xaa(bzx4Q&9Pg9JzMU=+5PX#ylONv?MDW;XlN%6ERt zsVkO7If=B~<(g!#<-HHX8G@wm)6NvhK@l^GN?0Mn^Ja8kW}&Rf%an`MLq^l@Hq|g{ z+#X}#i4wLAj?&7lmlke0X@V^1M++3Li6ioQF51a2kau z9LOU>!xfX+(@n0OJ(++-Sw7}tZq&v`h+?SRi5?$K+&oAx4(*j7BA zbD~OndT8>`GDB)@9$GetH;JaWY^+p`QCRnwc*+gS-7l77s)~64CCW1>wgku^B=x1b zj=#c;j@OvXs(|Kq@GwbaC}2(c$va<#OjT#R8^)mcy)Q(rG`E0p!92-c0(=y9^raHd zZ>@L`Yzb)!_Xh>PDb$Hwj>{=S6tgEa8}N`-t$A!y5S7|>X4oj{Jm701nq)(DU?>@) zXArV2HA7K_v=H$n!^Z?Dj@^zoPR&rF`ns|+0M={gXIM!;rX=evG$7kjtvm9ob1!Vu zXV4nyLH^a~6_^PhK_)#Jr>&lUph`WuFud3DoI-->^QROn%1?l;WRclJ%zy?!oMdZL z|9eyax4i!M!NK0wor6c4`rn)S-(=Tk-wnc#jI{t~T%quHt@_`+z5UJs)c@{2JUo23 z_pry#Kl^dGssDXP^}ll$A3(S#5iTF*gGn?;Gt>6gwx~-zXHo{-E8A3U8Wg}uG!zoX z_v}xlF}a4o2TPQ(Zh0pt6mA#mSx*BnoN)F zA<_5{+8HglM2dm~dhd*V z*#z)xD?VArw2&u!rE)(<+gn<48TXDP0tN0g(MW~@&mk`d?J=Hn>1f;XP(!clyd6bZ z=8gp@SBuP8+ZE}rwhaeqaH>F-gH9Ftx4AQp{yAR`pL{_D1)ij5%}vjq6uM^Nc4t4oeCdZT*b7L*0}0GCYB zo;Cm+4B|w?UCWzKDLJSei);kaThX|KSQIFgR`ensdB$3ZJCWmi4?JHW&C;@cl|~FA zaBv!IET+hzhzVJXz+ZZMOWi;9-`UxDnv5V5QhdRF-`*+$1z-XlFOpnQO zOQ=SGPoo#hZAMo*iW5%k5i!oAHhrbs2{YxBJTOGim_zm0L35T#2Bkm zsG!6n@2})wg>QT8+kkQ2%P<|dTkRcrMH*JV-#_wfR9${n@>RWyi)(lnS6xu=V*B~v zQ0#L7t|8qyClF8YK8nYEUvc`xfQ5czujTDGn@$q8P(;HxUwmTgngpTddoH2oA}~rC zJNT+j{?AZmRQUzSa|ZD{1F?e3LtWj#28l1Q6$o(r%x(R8BRk`$c;asZOC#1F;EZVNym-A=dh#0iu zz-^-vF`ydg62<4qU6F?wfB`qQwS{?$ufU{xohrf+(C9nh?pAi>!Z5~~T5sE2YS;!# zn{+$7y-s-(0b;F@g9dmea@c<*4WOqsB^`(McSZs^?haPw%d2BM(9Q&tCr-cFI{*bU=2WId z!PCIC=*46*u-XoZ2wW_uHf{6vpXH``5==&m2`XA;q>5*GrMu>!vo=c2SsTZ>c`p=s z8m+tZ5>X&fc8`$gQX8k#{Ql^4`@LHcFl*yZl|=cm;sC8u@N7z)*R59d%4Af-LPjq` zDpoSnI%a)5sxXXEpG=Rywwb}OAzHz3c-4bw+m?7^yDCohL9xJzMj}C;YNJ&>*su_~ ztPM7_4t8e4AO{H$fgPeUze5TB!TC)LgieXQ?XAirGX{Ywd1mBLg;B&er=2tPI{ez* zu&XtEWD=U1Wox@4&6e^Y>@+{Y#dMXV@6RX6RSk2iutl4LdtwZo;ADQ7IaMs2gw>|l zkz}Sdo(y(&b|Bw??BY=lUa{A|%D@9{1nH8KF})-N<8dQ6f}~w7Z-_rJqXGMw!xNHl zL3t9P-qRQK@8k3$Qvkz;qDWF|tFW~rb`ThmkF{^;-#7Fes|>>wR`C7{FwyN$E<}6o z2)tjzSArVyO)CqB>@|>cK>?y1CP)2X5XI4aFetDS_`5Cdn2yix&pZ+?IB{o^OcgQw5m3|>8cdE62{;Fy!0w@T9Y*(?}^+NT4N-t=OU41>vF z9Hqj|n@PcF*T7DLG)d;klL&9HuLgx-HJpD4(g=P0#Xvf#psRv7+3mL>oC79SGXf7W zpt6CLj^@+ZSd#n_d!v4Ow<1oL#j6#!fZ7JlXT_x(pUV)Y<`u|TjATo17=y8~1PdRk z@CRfaGNnfp4@QeLWvE0@I31KCOr++?juae_Q#cSRxHr;R-dG@bZR7wsggqS(dV>bG zcwk}p&Gz&?$Ft+KTfm6JRh@Wfqf$dPuxG&vux5aze)krxn$c@9gqw+=1AH(`M?PBK z6Sl6HZ6ot-J@{y}Q)!b}0%J7H4RIzSpJKLB5kmt{tBZJC8U6%Znhr*sDCon^SlgIQ z3ND1VfJXTe60og=GcOJ^Z6b(O4?ZN(SdtgyDc;FExUQ6QY}`d+!8(BeG6O{EkPz$J zyV)^X8=qpBrn%LZ!8LXedr;ZXxfg-gegqs+J2#m*$_c|!=>53Jc-bQ8I8MSW-k~$- z0X9M0Sfq%Q9GKh11QEE6^2?f2V(}aa*F&(&D|v3;H40}| zetN|PT=G~h7>$R_VArfzkMSPJhcbH(PnzQv3Ax<^vbmrFR#P@Cj_?7*Z{eP19|DLQ zn)&ONGxZF;c=ED=*pdsY$VlYA85vSX;FBJ__pXjuU5S&BE!;VAcm?11TV<#d{rln< zYndGJWt*;whMDAc2C1U(6?+z@(+CsZ4^W$DpN=MrF=XFj%Nqpj)Qg*m^1kR z0MYV%9K>(wd#^6_t56h@Mfh<90R+YUy3SH7r-Hux&0KbvWdXeUqCmr^XrmW|^(ay+ z0|iY=)P*Bia}(#s(pQ?cC&^Wz0-ESzpw6$DLh;VhHqjo?fgcQ7CN;RZNRr)*d%Qlm z|6D^S!hkotj}Q<`tFeDFVv9G?@z54fkuB12kS=2I zn}LNm8MLJ-*QhgTZ;IB&tn0za(xq+NQnSvv%2n$cmS>XNS^-l~w%8+vAhB?eG8Q#<%-30LAaJJ7(?af}KTN zg;-Ka)pqn2nguDOH1n`0p6fu;z-JfWsxe&DW^f-;SF->_AhqeV^+Q(5(Iw+=>|X{R zD((ma)#!oQENk=t*ZoN_D{U``V4FfI#P!QjFk|g>(tIfzdcQk){R;gv z#X{85hx0VLxCm2^PXX)t>?T{ec{u-?JLAtx7Y^f50v^D9e=$GbedOEJVFE}yV9*Rd zgp-DV_8%X=dG-9&cTJ}ztO39HI6q3#F;zFsv)k~w#Fyad*p>+i%;+xv%%*Gx@ik*} z0fR7>G=)Vd*h&-9Zqv3OIBC;4DB%8S2&>4o-WA3HZQh2QX4QDhkhW7X8SB&S?d`X!pvySUAd7wqS^LAK_x!2w zX|veSye0G&zW_caLVZ~Ok~eut^8-%Ol};wDs(tpN`T`Iw1WZ$O4zn|QImZi-00YYECL#+a2(;&ec`M%@MFGKKk_O&yh`_Wa6f)C|*U}DPt}M5oqQD zBQuA^R$#PHV%MyJPD+fP7R9#h4r2?HVBV3e^SlsbMMj;r)AMiI;ARM<9S+vY@RMg^F7%n`$eKM z-%{VJ2WV@0V<<`^TC|}cF3FR2jM6!MAfhqmSh==PdlHa2I0r>0&nJ@a#GO@yC$SL8 zMNkzI*+o>ej_>Dn3yaJV;iPE}AI3&Fc$AItdY!|%AOiJ|Afyb!vkwfv5URFj#ef76 z;~`n$=LsZXr{v(6Gg)}>$HYe7w%ruHUMvfzQRwK~^Q(k0*IxWr7 zy-pQXg7miBT1n0*C7Dy6zhh}3zAE38#L6mJjB*U!r+h}mIdYjJPij<11a!xFjOvlc zO%oawQo>lNYW=B)xZ)hB_g0-VFZ)T!BS6BDW8A{wFT;dfPh(|m@g_2%Ed|f<_#%A5 zPPJ0a2Rp=LG_dCdD_90B8^J_Jo*(eP%dR7*! zJ@EK7La`Wi(;c;=Y!D1tM<~O>y;?&7|>1g5|r==FV$9WVXkKdq> z>`cF};NP+D>{lnHV!f+N8T2j%oJ|h)P5p;W{fGNd|KZ@#L2s|Ox2gZIssBLF9*bZy zIA_A*4J$xY)PLCT?(bp!hfa6D*JHnT*dP17P5p;EumA9rA$<4Q+Y>Kin$0xurjaNK z338=KI?nXE`L);hzT0eXZBb{S6c0Mi!A~Re8W^Pmw8Mn;U^H)e`3L8VpMJV-@|V{n zZb|Qj>^dG@GU1V1xmsH=X}$!K>u6z2{6+W29+(HjAN%bswL(SqC$`RTZ{^w(W}SzO zNrw=n01sFA940z5(eXL_eZm@qTU%dwPbk&!IUi|v7_yB`gY-SfO~HpCngEoxWXORw z??AT3@4h{L_2joNAHVti;PD?GKY#J~+ZV@7h(jls@FVDT-gE4YzJ*66UC7NweRcM0%J%F+M^ITE&LS^us)AI!WW9DKb_?kUR9o#l|8u-j(Olv3WoV% zFya1hm)>7Nu45^}LkvtKTb#4``#!u?wm>Th=V2fBj&l9Ek?C%z<*hqMH;UZ1;W`Q@ zW6{W{jS(+qY9R|@dljYt>pbm6Jc;7>!pp-2Fv^$$^d?dsa1dIdr;#54NHvWMkZNGW zvDvP*z6~*p-b^!I8=%1kM)T1m$-+XC7lR=TkWuiL0lrCP+m;9-V*nt`17bHyp4uX< zD1){K5NXde&6pz1NYjJA4Hu_Hnk75|L(C8wjzAMGXuST(@r&apZ!zJv_w3Ezm*U2e9LdpXU^q{FL$X_aS5`PrpDk;wJA7jReygXv`0s z0PaesZIQ8CeGx}+9X{-?8B^lMr0vLS=Hy?#xCufT8r}H3`%|bVu4ilO^G2tgGkflM z26G$8t*_Lw9TgJ&(?sO=z*?SchX>{t7DC}CT}%WI+7s~CTj{(b#fXy)Wo+1B89Eq( z%qG|SIga{LlyUn9do{{XK${Z@NH`p6Rx;t8tVH4@A+tUJ*9r$*ZXXzOdy!fk@(X30 zr$I9At8C~1r9fK05&6*Ku0X`l*Ba@3UiF=ec^F^+fSqQXY<9Suy&UA6tUuljn4@oCv`i4YXp5G4iuz}!-74WF(AbxzQQqm^7@BYZyUdA zetzcD*}9E=+&#bP*xq|yiWfD@lTYE@k7=foZ8Sdgy#ME4|M%X9{bu!}&xc;=lTYCt zPrm##W@)%P<`0q0&+z!ORq#({`xbx+;srRsZK;O^^8M0VW1F+W1|Pb)ZEqp7 z1-hHy?ZmFwfW|52g!r$wTwD9Y^EYpQc>Kb>ML5OY4=2xGedmEi)xBZcT~oGYZg1R? z{RU}r6^;8l_+!Vm3FH1NW8tOw-KeShWLJFnT>s0blzAkl8FV`PdGsQegt+K~yu>2( z>0Cb}?0RIY`oN31);=M!TOO5#p=OvW)@?|K zugO52UxE#~rgq?5?=U)?ET(a@>P9rqQvSvaxYwPiN&kQ^yP$sL;&fM$r;Vf?N~azq z&CWaZ;CCeNF|ZHgApHxox;PCR7#Z^_!>1Z@h2+ChuU)n8&TMr}M>(36qQqHCJOt zlQ3@3Xqf-6S6;zeHM-(*&1b)2kFME6QI~JXUdTj|4Rpb?Hk;wy3vz4X?SH0=cs05c zj}JC^x~5=WLcW$!Ed-g%?>7C;MIxftZ~ERo<9sK&hLIoH#f-szOhm#|LJ%wC1zB%hz~SnexP_ZH zza?fVlja-ezP`C5?`QtoCo^#}LF6qXSawlx-9~^xYS~4-&X!KUXs)v8h^0mumA)!1 zF@}+)Jo~7iHKy^>@H@`tO_#Q9|3VxG>G&`F_X|k}$)8_nPOoPnT@ArRaqh1(I(PNA z$TVcVA=S=~ICl68L)O9M(>Jfb&n;)%YNn#gcfBR3uB_-Z7|bBqR~jLuS-l=_-*mDr zPeD7q)N}x;?=@)i9J~Q)E8GP^tSmYrViFNKa1jh4BTD;pQalj%tmYwFy22IwUYC({ zx5PaU@i^@~GN>B?(AoAVrLCW!TTW7*hiUIWEIsTrE<}k{Mpx7_x!rMsMwp=E>%a6Uj0T zN-jVk{efT=T#Q!vLCcdv>0BY_J)Eq>AUsu?d?VowBQ=C-b>0;vYYJ||O#y{BEGKWZ ztG(VKyUl~P!ePtp*4)f3arr>`a5-0!av9ZbU$)AKbws`zt|mK$J?;2))@HYKKp{i8 zk|+^194V9<0Wn_V8^+3~nIkkqE~BB)Us-&{H(saRg{g7W)y-pB7^I`i+&W@8#5RQ( zdwHU?t>OkFlKapy@95mFYC}DtZNQZacZttr#EAV3QP4RqQN#y~GQb+Q`;8twmD4<& z_72RDbX<5gQsY;R|7^ys@+VUhSuY>Ieezpxhi%=dP0FW<(xtq3{_^=-MvCuq2~zeG9_5O84!{-T`e0cx!kOKoLhgg>$Ap z6&C>nqM!?V(iz7Ul`1LJ_#0fZI&W@H6$|j!a*9#QTm2N1w!1mSH$TNZkHnIv7<<({ z#e}S1;whFf**7>86bX_oh8dG0p&kqjpBJF2I2=4;h0_-1i=722&@rbav061m{B914 zlEFh5H?+hZza&!>v?j|@E%WhZkV*S1lkSAA6iJFk(xXNV`xB@;$`E$O&J`=rsT%&< z!dn20A#uSj3)XCg?kq%5f*g@X?Z#|{q3I;&gR<+3s{;v;J_CGqKm-?mfGY|ERQs34 zKkJ+7^8_$qijJ@2p^~!3rvdLI5m?~s}^S=HmBRK$t7L&<2OrWxr@S#f%Y`DHZdPqhg+d668Qc#QF6tk8hEX+&rL>c;C!0fL z;b%0m-oOLB;NDJ`Su!Pa@+w~94*zqv(=|OAeI5_;TF&7`YmqSX>?qe z*{SVlzHu#IuOqqcAPY73IL{Cj_H+hd%Smw=2+!9{@poiP%+Ebkh+8m#rc9j9vPIF) z9Bi#JpjbOjjuUIHYpb4%fGdPu!J7=U#XXhj6E0WN$8%t4s^?Z?3i<3AVBw)HTZ~3w zIBr&m=FS}!cvDH@G5$(^j6Y-)Q@+&W51@yh8&^iw(vhK<2O@S&8-^Wt-&k5ND!R&M z^N3x%M9wQ?->ID#u|p84#V(R`8pR=Z%EpVnMkHGef5aucOw0lXszaj9yr4MH6B&i$ z7CtP;;gxeEtM~?&SgQLw4(qi2(OO;fIo6D(6(?w;UUkJ4i!EJe-GoGDIH__Jh+2nO zi6#we=OlZopQfB$2IrukDk(&Xhxp508l|451Ds{Rh4iI4z3z!R1hEnVrp4lg?9YcS zS|7Ugd7Q&rw9vln;FG5+2jo_zzm>(`b<5&##RI|o_FZA?rw#OP9R1|> ztHm@BN(w2j5k_!~kt4A&ftAvI(JYU0QevBGT!2OAXq!jwCdu|D|Nkcc{}+(||KZn< zdJhlwH~IfJ`TwbIBNp-w)7#De-#OUp?PLD`{ryh&@bDq#|L<+`|KEN7e}QnSy9@#k zlS0xk9#PN*WyBv0&KF=&9SlU``XFW^8j8`GOrk%(gvzQ>d?7zzOs0@xpZ}>O*@y7k zc{FOvr7*cn{63f__0ireT?66t_#V6b2kHAm@w4yY zv!=KE8!DwItj#Zj_n`-3p%{?~h2*#^^lFHk9`S%6CWklT#A7Q8C*C-@iYG}h&XmL` z%KU6s3_SC+VQd7RXOCY#e)abGliXR*Z((#mbBRG<(Fv2_r%VHyrO5}T9;M))ISHo2 zap3(bNH4OM_p4vMzk=V6ywkH~Mvt<=#cYwgjLU(KDA1txSnXr}Hyd;S;2I$G@`Zxd zc7#Z2avW=HXduePWrKi`AJXV}z|b*BL#BO?mqdAe`k@_~yMd%fY2F=|BXrhtRMh%~ zJ{oy3RDsOh_ksL*Va`wCPu(yDGY(3T?Q)54gdL?f$UmZx1*XotFiS$2=0%J+#1 zlMhFb{3CJC{xFyTZH(b}3@4pLU>=|NWSzpdH0qBpV|=Jwsi_5F{?iU(if26XCX7#< ze#;b6saQi5z|%9O5hMT6dq{pJxMceU`#`mL!24D@9$@nZ`}fGxMl_%<)mvqFU0#!6Y6b|>E-;h9EhSaxGE2`fu#Uj+ph)~m~i`+{ooej^rD)a+~0UO{Vz zDY8iz&KjM9U0TBuUS6pYxn*KjK{}l{0ToVcM^J0!o}nAp*UT-WzJlY#R`N= z)R5n@2AcHG;jYh@{4-nJCyRGET=RW_pX?O2@GsB+xd}#SNgrWHIk|JNoqmrat0S5PtGQEPW^FLvOr5*Pi^U;z#Dl`<12xp*7= zWL^zXaDD*cqyKO8|Ie!bcMg~`w$cAL`ajqIWA-gru5Z=Wn*j{r>LpNzBC_7-?jK&oHLM9-f|ly^+_(QFco!f6=K z+2m}guma&v!Xxaq$twk9mq|1fVj9HC@o(VunKCuOn=AkRCHo!oT#YW3G0Bbq9~c7Q zEJ!oRk|Y7dJ2bXYx;cLJ1z0g04Mx=0I*{pHetcrN<`%`ALXgQqLu-7yBJ<+!TWg_fRj%l^D4}w zdF2@nQ6~IA8l`Z@L7Ij2x)3qoIXMQD z{oS>ES9h7xqoKs6BNY#(Odf{P0LE?#&9Vz{GIxyMXqEv&8I14R2)fS`K5pA0JCvd1 zVp{m_Y;ErI^N$*t?4YGypH(^&n++7ZS&re?OK6-TU^=`L55dZOiAt?Uie?&0;_wtd1ZmmfluX??P)cS3(F9WyxPW^xe7_uIzx($0wLz*Zn-gT;63W>_dr7J`?0KZ{>K`K7#?fC<#gC* zlQ*}wDwBZI0Ua+OzZ_7eGT6=e$z2@6%jn`#J&EA12GPFG;|Co|&Y$S7pR~=nDLFSB z`_%{L>jO-6F@(a|l2Y%}5xKWF4YeAC!n=1M&TxquO#{m=nMg%ldMW<`2%Skt4Aq11 z{G9#Omk!YlA52oD_4>``(QOb~CCAOf^VcY?iO~nV?vo%FCvAvT8huY|Z|(V<#R zATJSQAv+1qLz~S+w=P;uCPrqpZN2yvCU&==wov~HjlSqJ9%4OG9hsF`)F5`O?_ZfI z)Y$P7>DA^R`L%Lxpb58UNvgJOB&m|^G-e||%@b8QhL^dj3TFbLjHV_Du2v2{+o2HF zaoyU4z_F?H17Hvp_NHy68Y`c}Ofd`r55D^rEF}-VjV=yQ<1}}%UC4}8u+v~)U^9SFx$HcJH44;lQ`jxtgjh9A zCz`kU5i&joQ=VYiWXW63c4EAsA&QhYuqxBNffUbF;!}DH4Ne#=;j41;0w_LbjU4Xr8CCD#a3oKJ|C_P&-PoZ@gsXV6=@z zJPl?u6pj$|rWExgFJLYbpOOzUk!KeEX`w6N!kZdcFp8*uz~+F-cORcUWB)%EZp=90 zyN{jDOGprCNU?UpP)ENFmS~-3q|`2YUz zf1n!FkqW;h=-D8-Y49;G%SrHs+-XesVe{1?HOm0rB$`G{dbB(uX948PWeU$vp@YaK z5c}xSJAUrz$UYn7vUEceraj;L6%=LZbeg760^|Pe!_M!&ts*IH292h;&2kD=49tD3 zMtLn8%=)l_Rrpd}-eNWWE`@2^2%a3l3s8T}av+u81t&xa_@~|DV144~SKe=CCCW3!y>j$31+opZEF%>zP{gJ+4^t@p(H_y)uA+3Y`Bynd|}RUYOXNR6xhJ z#u3PnNqVjBNA(1E?@V?TTY8>7ekwkzx4A~4OG^NzS+QfeLi@hxb!=G)j~>XlBNn}m z(MMEGLEn%4fa=d;Yf}m>EUBK@2H|X(2T>oy*+6+3WlZ4?2^`2g|PpfJJu4yG@Scp|57%fiG7zFN1In zBGv(uP`qEW4@itfnX3}i+HkhJXLUF-(FsWj#N#DlNiay_T|~c>uL&6FZdebNFa*|5 zmxg7M)63wAaG~TOotD=-c-U+ra90MU)E$PQl_JDzslI6g?=m>31S3ka>);wxUNEY_ zX6Fq|<_QRtwlUEZ1b<+UWcQ0}iA^NSmE0krHQwJx^_Qn1&?`E;u! zT7t|5mL8*P{rqyZ>;%V`-(M@6~d8YOW)^oTWIye$f(xHvyThHOfP)DnaW1;J9A z-y6FekSFjWt6a52z9$$V>Y6o`6GOy5K|NAv!8|2)7_!qcx%R9^?lQ2qRA_+)X(|*5 zisEb>1_dibB@@sm>~*Si*7BpxxMntp!z;XM@nBE|jqxDJBzvzV8L)xS8Y~8*GDL7t zXh1k6MRuRLd>@DY3#ggUeHY`;-n;qRU^ooN@6sobOAD&SkVyQlwvxKSdD_-%&GW;M zLNave&d?INJ+BCSO4BrQ(U%T6Kk@(NfBbj0{kM%?>Qdjrb9RZOid({`J;(w&f) zUe6Xg_x`j<;G|N;HP-g5X$wdAUYu{Xr*STUYajEZ5+EFZ2+}B7StNS{gI>&&n2~9QF6CB@>yYwgU*y7Blb|2o?1nvbI^Ce?v&} zsIF9SXz3ZeZRS&J{18Xb1i>3}74k)rykE?$db8^JaPAAFD6XFRl6{T4NYi)mjhe5& zlrxC#Hao~un}uD1v4NipYkRW{kc5BCmPt!$bBf4kH!bDOswWjbrfG&VP(WL#M!{F$ zHc;jk>)gT)p75QU@1VeFxKB_$e;^X9%?K~2e}=E|0sh>@-{@>AxP3q_1sPr+1|MK+ z8VHr0lIYd;H-S645F?UdW=zE+#=FvA>)f}AFrOlmHUt8?m0?D@Qj90R6}{_Mc_FvO zAT1Yvq#>oa7#vWk72~G8`u=Ed!apsdG?W2|MFLxey2>d_c(<)1zKv%CF;*ZP>#X5! zb;he5caA{p6F#ZMbMi(2wovmvdphkE+9N#)Ls9i3voD!@JB8D|4&i zF=>dEjB9yhEFWdG$^Wp4|JL(A^tunbk2d)qHu)b!{)aRgU1Ie8jWR%#=YQxisyo2^ z58eHP&LQQ0V83kgKiql#2VM)C8-{Z93m=&Y75TVisxA~=qYF~_T;HCO<^XY6HrHW8u)OM!tn|cba?W)S5?7ElLp4InPt!y3r)i6|VzH6!s zXLgmP?sj%|bR|VzwRE%h_=mTzpTA;*ygYvO))JTVr$sP9 zYxe+)>tX5VSFhRse|Yi2(!5rq_fX2cs!{$}TGf(L?St`A?y#gaq;#{k)tf5FV<2zd z2pfeuW1OQut(=3{249kwjs|A_633-&tJ4N{#n(1eIWiKY$<^H!Na?$VVJ_RDEDr7nl ziUFC96(dF;PI8`hm`{4zv8Sl;WxFY6R#`k-V|4DNr#|@zgJV0TITqc_RTAT&BH*V!{pFwK@!S?6U63sbZ|k_l4sE6mojzB8)f@@((By)r2AGLA|}!aO+y%mb zH^$qghtGji#)}whn&J~-B|_(yfoArNGG6KUn{KBwB^zH$g-2NPWvPonX z%Mm;==OrFBTZWYeP@NVfP658$kx@%*st`*lg=p<3!7O9EdkS!9Scdm@Y4Rp_OLY*K zGR`;94pRLc=UAO5$$RhTywOp+d;V$4mL(GbVy|iDyg_jNHOUFvD!#oDNER7Qbgu|# zU9x+nd~V!h+?b*dbf^u4I(7AB^_BM;9E%A;ywr1VRsPjmBl{}rVv3Akd2cVHbSxgB zxF0jvb1#Drp&_Kb53g%Y$kRf}Nqc9uh}oEj=$;4B1Pa^>!KXS?k-Q1fgG4?bjgv5o zcOc6ZlQ(o?SA!$@^N%HnF@FYX);T9i|C(ndr$c-uD?b(U-PVF>ax&vU5Ih`=!^MxSQ8 zr(A7))`F(FNL19yWE67_hwik3(!^Seua1f!`id)XnFyI0LGm=IA{9HVu3Vv0*T=ai zvQ(4AcrTClg1Yn!NsP4EkC>jE(aM*3Ie*H`M74w@2Fp@1v$%_L?2d=X{Y+=gf*2$1 zjWq@`2Z6*2ymYg=;ib=sBU!wC4`r>aE`^%O7ua|q5zA6M14dwXLnc^8o{8mlc*II_ z&dvFGEe2qU+^m5(^45d<7=5JG%bp)%E(V|@Fa$1#lX2!e$aHx5MDA8Ox2bFjPXLvz z*mpV$Lr-jJ0&MC0%(ng4!pf-bnrwHioPDZa(wm0mq5r(N9L*qU0lx-8GD$kOY)Nfp z4na%HhuFB?ZR)>m{D1GA|L!9WB znd|yCA)go(tak~k-L3{KlSp8UfzZdwDFg`_9{{-ahFKhvco$zrD|4nTv!{yF=H3qU zs<|CJaCfsE4Mlag5`upy9b)H3IK!f%o6w#gM%W4*0C01w!{U-SM8DFsoVX9%TrOuy z2jnwpuzB?^II=K%^?QZqJ6Qe*n#mAxS>?PA_s6c%MlHJ<;iA~6uP;kPe)fe|*Cc5) zkGvV%qb27M4$)RK;T3y1I>|-lmM&|%X*dtiNX16~`O|hKtX2h-3rhJq``#&&KU-d> z-GhJj;Gg}o+VI4p82m|J4%(N$H7nu703iRcgi}iZ2#77Yf6-0fehz-`XEH454>1#V zBo@I1e$05sB_1Xpjv4!;+xb)*JRCA&zP|XSS%WUb?+&QsW!UPlOhL`J$2IK$@SBe1 zOQhEpb8?y)1u@TuMxH&BDC05Yvo!e-jja#d>)1_!&`-~*_lM+rK1qVPNZo2lq9(S+ z{ctJi4R+!Z2(nbi@nu3)7ye`UA`5nr;l53i0y#36PRhAV&Nz-AK|Z8DV;jIiSjPKZ@2(q^1tX1` zjWF~k1u8MV=iun`pUSuPye$US(b=U}MH{D`v)phbKwF;*h;~*a0^2bY#8@B!2Y*8L z)m4SopQYscsXzr~gY@i!bUuAvDj+?#_)4;)ISr5M~+oKr2&ORlBUBh18_X6fwWIJV(H<#q~mB_58 zVhMZ}^s6D8_WeyYk|Hb|XkPCmo|Bwvkw-nqsT6Wo&~sCgZ!v-8k0aLi4(0nAuO8DV zj+hWSptLGH!m5s4gLl-^#ps}sp<(Kp99u^BBq*n1saC?rt}9IO<~Sm>he2J9c))x@jsJ)w8xiw#nsjqNnab$^`v_+@tCka4Q}Gx`yqxLcN$%4$QGuP1jBo zOJA&lULHL%zTg<``|Wmn3I$%t;cy(XzwI&@7r|^dGhNKaH;Nd66QE-;dg#wH&9k;y zMRh>Twq=%+VJ^`jx=N{0vmEF6C_Wabq&7g{*0zPwK~o;3Iybj?)#Ml8uF$O5465bW zynGv|ebZjp5w<*8uENeb%^K{fQ?Y|}-1}>?s}?q{?6vq@bBx!|Nti}#+@Ek<+m*%r z3^KKy_0Hl?TIi547%!%7VUNH1TZX(T1JvK>x#6B4Ol4t3yi2C9!_(}`Dc#c7M)SlIT)&VSc17qW~cMHgI0d= zQU!nlM1}mbR)CUM`x)`k14knf6?&##nj5dbjsNe)|F7u(dvLJ#@SwBt|K0fiKFAAH z-`M-N%Kx|9JKTQ={=eOahlkz0hllL^?;m#dH~zn$)BhJU@1r;$y8M5Op1nvf!mpRh zdU8xEk0L&d&!Y?VlkbBx3)K%u_|QI?Cn*!4#79My1HSle6sFMe+Ny2jyYenYL%WGg zY>HQDkJv9*^QBTH*7u9$C1USb^tG0Fh_Y-k#8ov6XyqN7_7cw*Vtw%?6X@pI08lc3 z;B)6H71(>Cn}?XxC$C;=jzgnsaj`Q%@A7NSsp3XURfwx{r5RD#@@rMe^G2)H=)8Kp zqy{X%-28mqY_-ZcwJo-ALSKei#dBcAy zGK}O|0lafub@lE_`j#`7CHJTBmm1s4TQkJHJ@Nju2-9mXyN>6GXqDB5!nz(Yc%jaE6Yg&OhDLO;;L z7-L;7P(aN#*AkWM2O zPz@Frmvg9}iZ=<{Z{@`SZ{clW!#hQ!`+|p%%VubcWsMJ=>v1S<( zVjp;_D47=m6Da~^s#96d{*%ifiXDfas|dV>J>hZA4H*DSL@a$Qg5`kv3k6F2PF2`4VgpwR0(4 zjaVw-EUaKId3WJ7&6I)j&G#oz}J556aS zTeSm7gARq}5ts{95jc5dB?mQ%t+|=p2UgM^ znHvX#!j$=A3#LGWugyd-e1MP9ocG6v7M_V7eq#{k9}P=}eilp^c|oDw1b99a_)VE8 z=1F@-W}6u0DEF!@yoMyNTna5ESGB7-0XCOVrP4{61;CVxlYQQ%#ky`vMm9{2kFpZW z4KFmwY1qqqSftgisW_RBUf8Nv)r#RGIzJCnjE3S9Lk`XlVaz?L!PSEc&GyQeU2!I9 zMlEI-vhczH;OU4NdI9NdE>zFlf^-)pY58Z*0#o$NC&*SQ17R-xQ-st~Dt5KNUwSuV z+FK?Zr59P91yXakfvk(8Q=dfmWNQU zKmO%qJ#SRWlf=JvPGMpM@Dh6hADAkF{KBRg$S+AIFY|iG+3I*y=VJ=-%wif@zUC(? z4mfN-pQ3@EptomVEd|=N>feNyybH)eYA>suIvfO0`Yj2@17!H_j3BJBFn>aG_#JzE z_U-329%aUCO*%4~87$nz$6F}V3Z7kYW< z;jcdzjTkEa0z}>s$4INZ@sKy})oDE9jrVMgKNXhCxQW68L7^l_MhtLS!)6pJ(u>YS zT@#41BZJyAx#^6Pm?IH!PGtI=PFdsk;q`Z69H!3r1)_)d;k9=m+gYU#)&wjPgN(^J z{3tkSSRalmvtu^|*k@-wusi{Jv9AVEnsC{62rjH~JB-H}EV|*h5%!h`E&(eHexaNM z3^kV}r``fFm+{V;wyIRm3JGKvzxB>7kwPX$>Qa%pP=ej#^%SODOVF6tezCjj5N)G z>{O8$&6t4+tu)SP-tqW?{gSD^ zpzskJBMT8jhIPk!+;EwL83SvSReEQWh$$PxF~f(k^n*27-`z4vS{4Uo?Lz8&B2c^4My`Y{ru9j zx`W*Q_s{>~-~Q)+|921m`S1VDKmA|-v8D}B9m!z^bfZP>T#XXf>45Q|KH6fI%VMPo zABEXlcH(gSB5dB0hKsi;Dj*@`jkav;rp8>Wa;#cq)y54&=w$7i?W;7JhXe42Yxp3= z2VT6G4#U*1tnUf9;0}CZW0X*OdE0RV*Z5Kw;cJ3d;$L#aKq{)>1c$-Kw+= zCiz;ljjy~H4DZ~)7mgCB#Nb|AZ|Sw^|huR(qT53q{*Zx8pohgkn@Z@;(K-G};b51IbI@&CGu|G3*#_NNJx z?)FSCFZnm)5W{(tt~uDOjPTM)d@i12@? zh1OSq41xeiN;bz(w=7X@b?fVja`#Ti+=?PlAj z@1L32nb@DP=OZ&KGb^i506!$R(f5W0R%K>oo;-Q-e4nCDNq7RRCE66Y$2;*iF?Y3E z?av}KV-$U)Lf6e2v$vBO+oKwygaLDjz!H_Qozck@xw*kO45o{jNFmdIpDaSIE0DzT z++;f`rzO9<#B8P1q$Z=TvVb>kECz0H2N7e{0Oo!YrKwC_*z(>SiqMjp9x@1vRh)Y; zR2BcSrS8+jM6cnoWRkN6sBh=|a4G5DC5@D6?}x`+tAW~7=+3lN_R{4=vs0R|uAi)A zHR}_A7IYvdX8@rF|G_YCvD=a#pJ`j5b<|}#tOhX!?7?CX^~HR&^=-Xr&T7T)Wc~Ed zuxR#Vfmlm1Rr^0r<^KFUbxxwD1fJYMX^2fiC3lQ9M*L{l?`k1KH8WC3+x6ly0@~zC ziV^gh&6Zo*?`Ohz&5Re6fbVBNvnFDpH6LVf6>&Kq}M5v?(80&Wc7m~D|op}}KiP1+&d zf?=cn_T7`eyn5m-08+kOE@@KhTI<0$PQ!+d>tz9R3jX;6!Sk@rLpy@ImhwH)H)?i* z2U+vucSH#RyMOnz5Vs+KV}*RmAOKux5;43%Quyeaq)?w-d0N%mB!}a8K<*YcK~#84 z^K{K`6{09VV!sHcmyMa`$0IPW$!=yX&qo&^)w}8Ho^aVDh7to76mrf^Os z(B0zctbWecJIl9qXqKOpJ0lA>cqCn$zS@~mIgxWNux7Q^@o;ISB*&4?jq?VmLotuk z5w(_#F^*Ow0*=#^W=x4%2z$wL+^fzpVS~Ek^lQ}R#$j)O2z{xRfO@Cp?P5~uYXOE# zGB*aCa;U2{6%C9HmO%ZU_d_r^y9kou7KPi-qhsMny06ojK`=$OFP=mcn;ncWGe$JO zpTg5{>H(l1YDmlWt+t6NlrQ?TXJ4#8}Dw{q$3_zoD3H8d7#&8zYq)TfdpLaIW}mgPFXpt4&Y&ezKbd5!4s)gWJ#ZP zK6!~wvNJg3h7}(zP&9}gkm}S6v{CE&2Hn>{{XRT+dGPeTwmkLb`MWo-JoA*RcmMeD z;N5|~(|_`wynZGKs84%pJfR)sCmD4Aq)#ZLjE&u1%d;>-Aw!Y!Nbr))_9z4(1Ep;g zsZeCa9e|IPxT3ZaD%(7`1ZAP9a2ZT+)tZHKq>bgTXB;Q5oEUcUFA z{oucT^6H?B#F^hQD4qGmA)QU#!yr4Q`h{ZPHeMwQ+;B}&H#~ptOe@3BGW!n6*oj33ytwHOu=9l1fGQyIyKaeRYLkw(MN4@VUTlhBg=_E+beoxXQE+7*qzYGQH zmuS6pik3NnvE#I5lu3)Gq^QAa4Jombgjhj3G;E|nLRT1uR3okc9a>;&^G4IE8@rCv z+Fq%A7mkBjim94#=!Rnk-WJcV={=y}w^}Y+b~eKI*rMHWSLROtc!3X__@7PukBa|! z`0%^ko$ogBKb!a;J^n}MY}X0{T9W_yVYl1i_@Ca+!|o%@|NLm@VQ&-v^CiareDTpd zHxZ))T&5YI^DIh;CFGEqO=Qm(9ogd&$dUEN{MSOvTfoD_-|I|1lT&T*Wx3RT@d6qN z6ab*AGFsNjvi-9C{>4Ay@llLf&l%cxwuGF2Qb3%5|DSP)wN&f?Y#rG#)pSHqMoF zMm$|E|4Dl(`q^Ee5BWVpyL!E;1qB3INPsy&ty%!XbztZWV9+M5RcD)#Z%vc@P_)O% z^rYp@7qfAwq8e6>PVkd(HVy`1>Id_95~1Ggauc!ek?2G`9; zE!6F|2A_20uJ8eLNT`6T>x5hYadC>#${?;^y6bqVUCgjaQe zN3)5IYkAXW2;9jG| z-S`^axvvqD1yM{4m}S6Hp{=1<`%#3dKRSFg>c;=n&39bq3b#E`t6b-meNwCYqh43c zT-OKnX8cdfL@2}W^b7VmRUQ=r=jrerU-|;8IqUY^#^}O}WtCy{NU@>;O%zr&D)c7T z!h1t2nq!InX&rJmmh_GP&&L1f_wE1lsQYlQ*ZX$k|FiM`VMBTn%ufCA<1Cm`kn*}I zfS36H>^yqdCI6qj?(W0gyFbL zx(^Dvivl>^k}-}ngIk$CWD*Y-T0SkOr-1-kvCUt}gc$@)#l1annVVB(}CKT+u$29&3bZ77pl#JJNEH*Y*uaB+Q`d9rq zTw{IvUTZ&n^73WN^-0HyoZqH)`P5@#6aaZZhQG8k^n03-r?uX~kINT6_%av1ms|MJ z6788hbHZG#yUSEJJ{Ee+l$jdY6;PdR)5w?^7#+GzXQ??4QQ7ywL?OW5kSo?cyu^ z8fbmlttBp5pAJf4Nkx}Sd7n?w4RT*Uq?T!QoS9OzKDQ@eg%lI`}M_&aq{FUiG7W8+5zVrQB^|3-M`3n*k-- zDr|Es+mwtmSo*Du$|)Uu?!-%x%NJ!GeOfgfdf{Hnr<7LjPr`YC1pAH&fp76@H6dmQ zS+Q@5i4DteQq68->S1`jeVHcl7>i~14avBn$uQ`SQjkqgy9fqAr2E}=Cwqi~QZk$O zbzW88G3I@nc{TLAT7Ti+pz|HNkAlHP>G=aS_D;KM#KxmJLx8f)KN=qp0B|k@RM0l{ zQ6LV?hyD_s$qFOtHzLNMHHkiqrw?d%HoJWAGg0a*W~|>c^E({-5?Z7M(W(l@K>S992Wkjs16H|0&A<@Tm82=i80_cVqu$`|s)H zaS{#T5?f5qfZtxb0&Kbccdy%fDC@s=_jVqV{dZ^Y(Z>FJ7ylCl=~H5=exwcep2Y)^ z%l@g@dem|(vH0e)GlY`>T_9pEmLM0yMO;aPUJKkk7Jq20S7;gQuKZd90Lf|D~dQZ z=$>(4vL$jl`SrKuI_Kp71_lqHN zA7u!0z|K*@22X&`imtQ2G{8FB3ydM9rq4E*{>|2uja)OflB1=DAWuc(v~6@2Xt3M+ z=RtzKG9;*HFsiS*>9cX@SaJ0|hyFTbi2`DA9Fn|>w;o=+B9pI$CnQ{=ima<)#B-rC z^STbKVm}5mq)&FFzw9qaSb!*g8V?;yeMK({5STu9=V-N?*}Gtq$+$a2WI}pAG`IZ7 z9dB{%PJ=;~t!!Ic`l;9r36Rg3t%4P5U z^$?cpiJrxzVEe&?n)g7X_(nf`^=Awqn0TLf)>mXkhUeQgsY@430y$23pY-j(NB^OA zWt>`tO!fe*<2$!}sa2j# zwY++=IKHe`Ay=K{;9&K!7`p;a663+E7MvCy;s1SGevi;@m`hkn4^Vmy6yu{}rw@fR5*aT;H zb#N|`JhsN+c{p~-AGXRtU0Q`2hx$dFjEAE8NgNN4FGFpBCxE=CFyZ3VtJmkR7)8IVxb~IUEeB~Puo$mlvY&CQfd4=z27Y8L9dF+EOhNE3r643eW@t<6kSpB zJ?$%w)?z<0-8X(sbkEJQpueD1(4)MywJu$*=12!>-CT*vjp|~uWrPkB#yvpSd;ZaE zmeoDfFO*xNXSYL)H0=FH$fUB{)z1}CM6c2HIH&@E0T?a8rNLcg8ltnQPPEqukj4aX za=V@Oqi;Q|o(?Cs+s(RW$Di)>egXB`hE6tNvbsJ7-nsqVw=Hj^A!jxhYV*nLv)}H0 zedT@c!K|0C%)bQZ0SkgGxfM|;i-{pB@K@-2NPU;cxOd082*LPO5KYy7<^*K*Vgk(? zmO5m&hsqEsVgDG8FY3m-D%570u^}uJ7DLR zrh#3~&hJ_h!L70LPsU?8qDbj-9O?{wIn!E(+j+*-g}{|@cb0_E>m0;hi?^MK}dYn+M6wk1jeOTfXw-%Dw9^b z2(WqTVF#>0kdC7voA{^0_`*XcV!Rb(vhXd=T=8A0&Hf%iFOS3wno%uHgtha=qKZejQ2~=;SBlN;JGK28m`ceCe{Jfz&xCi7=r{)>GqcM=gWIu z4-YG`z+?Z6<_?fA^^YkKK%v1RvDy1i`lAlZ!8JW&DD02yv9D}c)99Zo3J9c~E40261vM%Rk3A(t= zgoh#vq)PfI+vgmq1YXL6Ndq|B^y=mBcO36GgmObDH-vISC^v+%fKXO6OvcmkB@0a} zC~%bV5%~M*5EE8VwFW{n9Pm2&$QxAVJdcueZp)eQpTiJZ4jnJvpGJxO{5EAyf)$BK zw&{K?O`qnrx}v&o$8g8ecIQxNH5^V^QsKe1Ht<_oKZko3PnS2%7fw~a9HbQcMC0s> zQR>|f7W4T2a_q``3Gw{YSQM2GW0g)z&vDO>iX-l)>F4INX3 zv6=W*8(XxkJ~nGfeAbdw{D#@5*d_1<9IcatESd@twP&7x z9PaKkGlVlu!StEf54nwVSTl50!$i~BV%{cieh%#e+C=7Qnd@`_byUztFw;p@PaXdC-oc=D_!?!(Z>;=PN0&ByWO#=8bHG7hrXc}!X>ig5s>NAhk z1i0TjPAZgj#jrTkrctw&N74HpIi_7iX=qGMtS||ML0}FSJ?8XE?*Gppe*XF5=bzhK zN8kK8`w9Pr-{1U?MkoZXxRHcyDuUDygS2?s%`9;7Cjj-^B*sg}gb%*JeWSa0Ldp42 ziz@tOd67Sg`&whurU(9t zoDrRq7tf3%fn&ABPyjCR%Y03n9q- zoTE4hvWbaGGB*W(2aZILT%w~#21~#xo|^N6YBi!}3FUce5`(dt7-JrsC1(EKPN_Je zVN+3slitIYx4U~Y22g}mddChxx2ic;PC*5Zdev0yk44N1n?_oS=(hOqwuDfeR#C%K zw9FIfk_70l>ECMw&P>C+E0ON>ht-#bn^e}4XCzCtOi56(RVoQ{F)=d_ z4l!P5%cIb|Z!{x=v{MpSdA%4g+Xo9I&I%LLJwcTn5vOXph$@q$d-?+-dd#;tdkyb% z?qP*<-7WwwpsxGHW3S_dcyU#9EL&pt4->j;r-RT((fBkBhN}pUSOfL@y{|pl*E>%+ zW1Vaelb8(i#@hnr{Q^l+W~PrOVQp3MGVjhh#_X|)>DBuBjeevX7ct6`1DtD~&uo@? zz2^KVJ2&pBie}QKZl>JylA3xCyx!FcKCB-Bzb~tJu`YFDy>TzrE;+C6!4<_nmU*#u z+xO#E8S`drM+a^wy3$@WVcQ`AFS__~%kJfqC|-2E$lsT5e`4a5KSe zvVeQdIIWExFIng=vIGS@l#VmtXS=Xt9RQ-FSQr3$wvonw+znTb5l>YC$jdyfazoKf zB?3D>4g{a3=2)P(HL_vQdl#=`?THJ17D3EGP;B4r!Eh;ltO7-SwkyF=>nI?p4<%t~ zJTvHaFdk68OLdcEDca=K()Swr05Au&H8{TZ#{+aO!7DV|U3G1C94yoKUHu9Jku6AL z%JT+y9>|ub@pwqPrQMc7MZv-Km+3s5fLNJC_$sF=Q6qovb@$pGboG8CAAOQ9_uIYE z)z_4lH5{fM&M5mhAz}ZMTF!oZcXS2&$>8XJ(!j#6nkCAYzjraq-9*GhrVdTudAHr6 z9CfmhcO0Gu=Mg4%7R4Qua~}}tVK~Qf8`CJAG#%QQNeGv*&iRzM7(TNjo}ww6NBFBU zv!wbsc}4-Aj-vtY*Xrt-+xCK5FPyGh$^Ece;M@AL-K2<8;HP{F1XseB$!*(`nDKTs zagX#Ec6*Iex6*?gISo7JeBB;URwGPyYJSu}K`~F>g6j=Md6C6G0ka3NCETz^(oKw@$bG*2$*ozHqYl%kP>f)p9@FwC*!!*;7Nq794z>g#)}L z0=EXT?y_tbAgA((7_=oeXVccSo|^=vwV{Lh>G&$=l(_SMn)`00YA_UG#JdcsXW5A=mvaFn6_%{k3ba? zk`NdJ(xNF}8(AO$-9;7yEI%v!PQIWD#(E8=f>Of9nUejOzt<~OtXWRDb`NBWaBzmq z9j|NaIGhenCqZ&XsUy#WXpAC$O%=H?px~?4^zMg)*H3?Z_2k{({3m~Z^5W%_A6^~+ zFGsa^%Yfs@Z5am9A&-F%@6N#!5 zNM(#iS_R{*im_-^(*zP(JXcmobB@|xOyRVhN#FP)kZ?s9D#s{XXata)@G+cEKTzJ0RFqN? z@>7$53H4mpCj)81Lun#iVX-CfrDoGNU~<`3O) z7LKl>(+Y4721o%?>H6w|t~oYp(ZqW9VLp2;Ppn7&CFYBNcCIRIkr=seeyGC6nH?<1 zsCrG5rXXd4TXtrID=?9C*-OL2A)mc95bvIMKnXh~jY6}U3|>aJKCDhQ2(d2grB!4o z%~cr^GZ!{KAX>6!k^VUh<~0(eV$gFC!I!BnJA6NkC{Z6?CWHWVo8%l#B*AEi*|aFr zYP0YP_y>%77$7(Cmalos=)`6;YT#f5mR<+k)0Z`t$}R>4YWfQ50KsQwfC;!S>Ney z3e7HtW^x*0aO$~2@5XZDMBGgI)#58{sXn?AW6@2l>}sx*TJkdSFtLxaZ0cO-47L7g zN+rnhgX`-wT8dVcmx&ztINioCWb4bEk@w4dgcF`%3Md2s4YjyKJ~xQ+hhcIa0V6q21zf|A1WZOQt%Vp$-%Vrw6gNN; zrQA>Xq`6Oy^0Ow8)f91eI z1vGIA2`y}RMd)WUg3F(S0Qe*5oj+Ynj>Ck_DPUu~?4DH52CnBu62z5yr?YW1Z`|L$-~6!EJ<2sLt`4t9cDz13c=z6W@%sH6 z;icPP;3LcF8(?Uu6#FgKC^wyhVpPwQmp>gGdW}D|yzKv*xpSbb({j6WA8VnYkpGPN zgiEf+6X11qzQ-+cJLJ-9Na9-deyS11Vg;URaf#*FQfCH#=<7Xye<%fvi41A0CQ9h^ zoCiP*uIopvzEpNF{SNI}TlC8qx@Gwfi!!Ok(|pNP=X(|pxq-{4UbC!+Xi6MU&tVsTZ;IIGZ{xE}06}_Pr0lEFrsY)hEsu915nF zje4GG#x64^F!YlP^;+o|5J+pH67{`NRAPDOau)BaZKqxL8pL}7S-T(IZ{7-et%qG^ zZSNJ3>z?Qv^P070rHq#G?zX7kbk+(fD2 zI3{c(8C`K^aTFDzslS-h($C}~;o-&|W8{lZ(nwZpdj*7gieo%QMGB4KsWDJ|O5$^> z#4`Yn?D}_^#ykS$Q6Iixx}$$(y<5#qSMGiL8!2=8}cn zWo@pFK_YvP&u+(~2zqm*e0BVh34pR8m=3G*s1{aT8;gFyl%^D8UFY;e7%#3w>K04u zBE&bG585}3S@hwX!>2V^YMg<=N*gU@QM@eVlOZCK7_E<@IK>Y(N}}LKc*_Xe9>*6L zoODEsD6~bqlgGn82Il%*?&FCdR@~}ZPza$TS8!>pI zO@a#y;ZMRe#TrhN^rR39n$GctG>e4n#j3+Uj>1oBM}KEZKe`(+E9qJ4pGGIA!W`8y z?gE#tCZmolgJZmpx@mDdkB3`_W2wYy=o2iKx+${b2KiUc;3uxxEF)x&t^*~B)44pQ z?sRuL%aY5U=vKdpE*NjY_-C=S&sAO{ZK$}YaR%&Xyq(9yhm z_gsqanGD3^P;{NFF3m>a^*GuC!a2-VsHG1@e7*zJ*;@~Dcu97=`DdOl+1Q=a{UM$^ z^+PU1HbC!0XDxQ#XqMN)O~}KC5LL7+QCXTpe!#R5x{qtIKhN7N5MFW7Z*#%%IY;B= zEHou+1Ira5&ujAl5}&AeIR@XVjpuw=^sNHsFE?kg6(i}=g28lH%@JsQ5hFFKFKM^o zqV9x?lE)ui(c9@+G>b+Jw2e62%kkR8bR7M9&f}ef34xZ4$Dz!yYI}KNPLb*0Qry5c zd1-lSG?U2upgtm0{bHPTu3Pd@X{;g`YR)CsO`s3y|4?O422*3W`1*Pv`g$Al6pS*tUnNbu9qnkQ?)^-V4 zMw9etxs~)4Y-^odMo2xOQHK*6i8aNJyhh#8)kkZw^+L|_V0aRu(J6};!Y7=kZ*8vf z_Yp2+__L{$yUg%`=rGBu))nxVORjJj_GF3-Drd&UNg~pa=Rf6t`3I#>0dD5--(E&@ z(QX_M&N6&bab=Ym04Ecm)vUlUv$x8bSqn8Y_&-bcg0Ty}h+V)L*VH>-@f0cs(1R_ES&ZLfBtlk+Cmf?yJWpRm18jY#bhYfRlmT#jYzF zh8s#e3}Vx4KvQ7qFYSbg#ZiUnEF7RMd2kwFBH=K>u&x!(LnO7aVjwRYh%#q!Jl62e z&co&`WM@H?>%IG8a6AVizDiE$o3@;h*G>f0aM}Q~fBX zxnW`rQkEDezsETos(gBBY}b|KIK0ap9j!b4fc{YhKJ5R2B#DJ#C3H}2d5qE|SyIu7 zb_4|l$`^X4X9$zx3wT|ZBRP^T*SF&G3bS(YYto#Kr$AiT9Kh8)0;)?tCq!1=UnUH zm2@U^j-zED!+zQ@>Y3&VmLFu}gtc+PlE@;-r=bhIBi^wEUW)RBJ;eDYP{ICDZ z{~=X|_>)Im&(}uia=qgu4k|}U=_X3DSg4Dw*IDiiWzbEWx#PS1c#`kW?RAeBh6 zz676Ck|5qyWO~(zbD4KtdC6m3OS)!ALNJjbGio@hs$pYRxAGQ;&J7mB^wu0EZ#fp$ z?i3dw;nbkN;HuRtFXfTDOIkwhLC-LBnJsg+E>jEHl|Fztq#RB@0}jvdP>3=nu7LG> z4y5}jn!JEiYZP0pu=qQx|2tgNf>f_Z$bxfrpvnX{JCOy#NeF2z+`Yn#= zlqEu%c2(0aD7_ISwk{OrDsqp@I7q{{3dw#F0>-A%Ihz$FJWOW6$xkXHasdO>aPvK? z+izAs$`Vx8N%ObY9OzxmuWC0S*{;JPt;i--W0h``U0Q=>TAOXUU71vjGW2_xXS3z=8u`lMPJ_xUz)G)?-V5eDju9U0_Cjy!R#?VI0FukW9w~Ks#4Vk z_7pMsE4cg`9}-{xj7eT^K;4lWN-8o%nnKePxGYX?RsDu}e>p?cs^~UgB&sLAnEB#K zelTpmEy^?JZZ%m3+mp%Gs7#o=8#NU`4pbXez!#$m$n8xPl|Zi3>Z$?N-F6fuURux> zzDlIes7ZO2dW<^gju;Q)m5>0L6@e(J-~x^~ixTlbB^R8bVA=x7^&}b~iFKWQeD1-y zw7y6=BuR!E1!rL{%0pjLrd_Z~vT8vdSeO2^Z;_1_XAbFi+TZOdr0~;eG@FI}{fL9L#Sr50uv3ny9c8^M@< zBR;mC1!MGNesiqAj-rw51X6H;dZaiHT@_8H{;8q!F9N zDKI_f#RUVN41*aq_y?!&USdlNL~Ht*%mMz52!cbU-$^hx(l=z<7SU7Y3HL4gws{|L z%p^CJ5Ff@9;Na(>FzOA%t#NdwjNYr<1+_IjPW?$R%QJy84f_WdrPnDjyKke*PBH@u z9MqdfcDhoL!`(I1u|Cvy#z@EckyQS7*H8oN+R=s@7(;0~jsa{R$Pp-{eY!FMZIe+6 zYl#zqimCNQiU=cIM~v(z$*2JU_M>S_q89x=>p!%k=;UNR)I~!~%)aheb?r!8FdAwUV1Fk3m3yU=Ew(@GYOl_cDC;8krj%~}THUJrG z?=B-6({UlmU=g~EX26dHY{Qo1?Kh-jLpnC3*?o#ZU!!_YLSbnqMTP(qWjLG*UK-mlLHs&%>mI9C5NlH|;SI zLVcypq{xR&_6HOWZ{>TLn#kZp1WMAC!-nZHWSF{ef#my42J`3f%s(?63nVC}m9S&_ zdq$4p>uVfLj)#HwD@*_u2&9C-wAa~(zgI-5=?!3%66G_*LTm!Lc!9Y=u!gneE<+wN z;S}MKtaz1&AW~fhYHF&w^Qx$51Qm~0xttf=u;2q^? zu0g9x^P;A|yZX=V?e_Mc-v%Fl3sLr6gDR{%Ap3R$mHL!E_qW$q<{?YDS6qnqAsnu*sqk^U5`+qTOUrWRLu$Yn;rS zaJZPWLdUe!9|g!pTuP*dlW-CYE}2uB#L(`NSf_n4`}Qk(_LjVJsFCE~TeYUQ^_YCs znX#eZjeXM6e*l@M^zNURGZ{&?`< z>>@}`()@&RRD45owPb6#jD7hnS8{p!CN#ic3w)?1zzyyYJ)hr^%@KM@eB z*_Z&hI48e{nv;>91ZZAtOVG88c2?u0OEG|(HLp1feK+kd>+j^(YNhh4Q+DNpqYb`9 zcC9YA+80kh|1H&o@S@Lx!xsHf*CAM0vK1gu)KB?Cl@Ju+U1rE1Tq#{4lVKEcAK|_N zB~L*f8JI%u5Y^mnN3ubSh>tNV`RYU6{Yht2?c{N59lY)?;us#`>Y41UPm<&5^rL?= zTlk}KaI&Gy8_K+)%qvl5JU@3sm+dc+IuO&0DvPXw9Pb(iFrKe(J2OX^yU z20L4cgLm|yE;flfa7y25R9j?*^fA|;k9HYFx);-oVBP@XUYjZl(!uBl1WOdB9fmW# z_N9Zo^1eC9W)ehc=sh85%Cw}7y53p0s~>YLcQ|CTm6W9b&KiX3%?(MbNYZqu-8E%f zk8p`|SA}S)^SaUhZuCEQssG*kcK5rz?|K{k??(SiSAH}dBq5f#!Wyg?XUbLd);j;i z`rqBX-X80JJKde#N00FQKib>vZ1lf(uK(3G+>;wFVz(*n|2%>F5T7;P_L}WljeF9& zEsVP!dRVH3d}9YPUV6w`7Dd=^iz*Z7-&xGCE~d(_l(wmqkJK5*sr;4xI*#Vyj{H-G zZP)a|8l*MG;b^wvjr_YsZD3OKfEyw+F^-O9ySJ3rrCe8yA*sPh*am)tPlW7j#B$ea z_q?abVxa>}t|$%M?uvP-sca_3cYW2G-Z_fi0OLW0CPXgX7t~wJjqmOvnfffM>J`G| zz1UVq7{9)^ypN|?BntL+u3G_q|Fsp$Ly@mp#mEhkD5(UKC?QXWO9=>B@v8(#3O>(d zXO3|P78-^#XHIj`A;OJ{GUGTZj!}$wHVm-m_`ZvdXU+CS0;kqT@2o~$FB;ZWLHo@m z1NF)W>Qx?Sr~ON64WufRagJc`(^cJCIOBsFt9)NYk~zxfP?G;vfYP8Do>UjyQ1V-I z{?wpWC5bO>tv@G8=`=`vA^`p%9xow`z(vets+LA0U6o7*P3{Ai!l;CHcGn&Ydi6{*?+$s01!wQ^tLyfKG` zk+FVVN+7E^s?XwsTJ@|ew~=vWg={P0OEx+>a;~^Av~mVk%%iQNg|4Y2#1;7TSr3-f z)mENr`6-*k!)O%U)=_(f*m0>~?==L9TO(3M zz;qmjvqq=gZE{Q&6+{%g6CHvyk6Ag!IKyUg*hFNOOVGutsg1OLI^dwK!p_&xql^HyX#moUC2PaXdCi(2wDG zhGidP&g+h{C@qk_r!DOkS!oBKdhqh#>3e~j&)>ayB@XdFemr=0key)|=ufT#s6f#w zJ5wSTP2b?Y<9v=wfQU?6lWNvNN5GEE7$n)XT0TAOP#p{&^F!yz6K(AA-0DOHATv@3ro?TQfL9SW9Qk#XJ;k)c1qg6i1?@rTxZ z==Kg&WcvwbyIhS|bgIyU!D$qp19|+f|J#50uJbi($#HPZH(w^A-SeJD35_a7!fo=l zL=6JW?CnVT=}UdVi2BM4m{*_>zTsYxeLp%!?0a3zRCXw1#pPtW9dQka?HEfZMv-YP zgj;3EYrrEFFXn<<;Q|7S$m{FkRoqz=MNp;$9cfY=>m*69RJT>K*^d^v#^++ihX88U|%S4*2 zUPX@YTWJ^aEOO7QfMND)}C!Qk`T7ZC>!|pqj#w?^!hh`yr z?IN0(aBqv&>qgjBG1XNZfyd_CPk9SC{A4)HK4!>S8TP_oU4gvtvzPtcSp$vL&?{pw^~}qLcZkJ|S$S}k zA+B0)G0tiTo3*~mzOxKXa|?1HrFywf{2YmF(R5xQvGI&DCKFLp2oM}J9FNqENz*aV zR9-kDzF4oCL#UvXOMnjQ8|{;EeB7u%AhvHmSFU0O{nAQ0Cc(x^36+J@-08ZGXu5)U zdIKUVoh3^pQKiyhRhc!@lQ53-y@n~nHqr9aFvLtHkUGYaFz4ZlO6tCt(g8!m0NwOu zbvjD4d+V0yc=OJCckuS*lcxt?RJD$4=-Z1Cp*Zciv2Df?|3)Ry(KHQ46GOZ3j2&!H zOn=JmH4gQHaJ)fEqdVk?B*!%PM$52PXs#XjaC2fb6BU77AE;D_7TfE;^Hk4LLgC6>Plr`9C>&e-UYS-4F z%Bm4`DeFu50~U*@i?10agNeJv|6rD&Rmw)>%W0$3$O{eVRCy-XTH+fbn1iQpetP}B z@t~>k2kuHex!W9WVab^qyKM5&i>W#)jS2jNDiJRQ=|$9P@rLpAzNj$J%$)$q_Xz`v zIIS3E^N|Y%iW6%xs0FhT3$ey_t)N6E#M90Uw%O&%yju*dkJu*(2h?qc!`t%tDc;u{ z-}mCRLh5Ec2e%b)099enjH@-%_AtnX+GSfWAi^`??!p9!sZYC$QFE))V>O5*!bFbb zfKEv`cY&KC*)6EP%>rclT65Ii9A_I7@ZrtvAo~R^dxnwSBJoO)lJwe~jid5i&fqEu zT#h>Nb*CVYf}e3jOaud*Q~)4uw8<08PwL3~#`|DiVXQp3F&Rt0z|_Px75&z{&{nSP zXWSa5d2*F&z=b14SExxJ6cHJIPO*Iz3f?T(zGX+Lf;eB{`Zot{y4*TxVFxCZvpYh; z+wdtU+-8ST3Zaqu^O!vm_PN|lxn@e<7~1%b>%rzkIycK7Tl01*HDZp;pme6qlG=qi z0#knmnDqW&sp`jTup@%|?3xhx#!d%VaFB)QZgkGaT^tNg!lxiArw*|_gm>NmUy{v& zG(ePp-l$Zwk=Or7@;dwC&#(ppm2IfP8A{Ll$t*~sG@gng1}S?S0WxjPVxSV98nw`W zu}(pO>Nd&Dbod3VBCg$o=1Z&%E?Rx#b;GTPWb}(L{+TVT!)P=LlMu6u-@I>P31jkm z@KVfNrQF;H19lD9L`!2Yb}CA_ApJ1J;wRJaB1~40yQ(SM4garZnG2Td-mSUow~68$ z3Xfre3Cbwr%OyqH^iRUnBHT)L$8(>*DK18m;!0;wHbx!n>z`h}6kwtxzJO7jdx;2f zbJ^+RwsiikeNvP;fV6hXA%o^7CIw6cmOG(Y|ISYtF5wynZS4d0ZA~WTX4H!|IrDmhYwX*HyB#WF*oA|8NtJ=A`Q`#!^#aqu zy!I&*mjLr*1`l^R^8A9^67#-fu;>4Xk$8W;~WQhr0b4tX} zo_VT!arpmoSz{a|C!unVt6rWRrf?~Zn@fFeZ%YKZfuig=8&|i}>A1A0tLnECl=<_- zr`#>L<8loC{aB9-uVr+h%!Cf@q4ZzYeV28Tq62SD8&j2%ex_E}=yoaPtGSv4m zHt*B9v&?0){LHqW(j&*EUwsDWIpBeM!tstM9~ z+OOx5y>L-IkTfy*pAaJQroZe;E;#nB?5ZxJ0EK=&pN}9=wNtT1s;K1B+pc$LnclF~ z6$ysJs3!kMN@g^yIgXLz{%pE!SFQ_+j;HZr!cSt9FwPoWgq zH|N0_P>rr9G<^tf+%7^`aipSyPZw`$4&l6;!iN)-grDw zscF~A{aB&?&)&|%uBiXBxA$mo7ta5~N8Q~`{>MAd|M(I%{N>A6vZSEbfImd?Kb2Iu zxpLi~^im~poL|m>OOt6D15#R{gFy?Yj+o&psv{ob%n)`w!_o#IzhAF#Sps&f1ov-}QYDEoL!EfPIls!(z@jTT^7qas1>1i;36{cx$ zq9rg$;&Dj%BJ8v;{7~yImilxYg}T=Vt=C76-S>rbK`G>^KXS4{i(l8$oSUmK{Ew(F z+H>*W{A(Tn-t!NK|B`|mG(x~N)PrWewW!oV0H*(FFgn6Y6(2NnAc-6BUapdxxzo3s zUuXv)f=-7Gnvd+)Y!*a9S?g%e;#s59M5)UCHC2s#Pv!w5NYRBom-;Po5g zBYp9&)Dt0=qT+-4yo!g5arhp7H|iSZO5u`9W@=!q{wDh#*9IN#PHT`mZ?ru!{vu zWXc?G)0qIUWAE7F`2s2kHG! zb^2a?A11ilySjg?AYB1&yS-v)QNbZW0mTb|8^O>Y1<@ETx4Xm1*V|84#>19p%)Spy zTyN*LAPGa8pd_93IGRM{07)Ussec^KFT!vNbnw>&vKF_GdT^S%xRy@4w*~+w(Gewmgk5XN^l`k#&dhy5MHT^?Cg;KXZO*gorj%0)c-tu)Z6HP?&5#B@)^#m80(a5u2?FA6*3bn4kqx8M{me_ zUs|FS{*tH#C+dQsT&#ut;p2Ku6SNEKiOCXw_lc|cpfhXry{YbNWCv(74pC^6*Q`TS{W!cJ8ry42B8CfX)>rgTD2! zPh6F+wLD8regHGWjz2uMfWQcgj%B6?muV%Jom-w+@C~_tGF-ZBnZZz`pmt^h=$lDb z@;)KTLY%d1x!k=-i)6zm-%`49^_A~bu1R`1IgZC5WF56E=gdN9N00zV>{h^QMzemF zKt)TnvdsNa4wQ2s&P?0y@yM7S?37LsU4zB4O(!Egw^pau%qK{)5i$owuLPm&g|yAA zz}eEZ8k>b+3`TR3U#|;A9!CdawI2UN-?bYxr*GD3X{E$BgX6T}`0kcDY7@PVV6!@1 z%WaOSk)Ooz+!y(=x%}whG#U@(qZ_86HgUwSTjT@eO|0CU5f7dZ6I;C-w53;$mMall z?nQ^7uYF7oZxG$AtwD7$168+x-rBRX$t6F|(@e2Hx%T3D)}f5b%5S=;EWdNmTJ6O$ zimSn68OhC>7Smj9xRM*_hADA=>2xN#3L6*_%veaw#wHDSq6=+^(!Eg|?Ajdh(y=@G z8-UUYh94(>x;RFRr%e&ZLI#0o(8q6a0zV3>?OTy=I!WkKY69(3Ro8C}QmZY!I!caI zZ`qOBVEj#DeDC@g-&^(`uf;n$A!<$6M&sVn2DmH}@i)qmjsAC||NR5$fA=0ee7Liz z|Gd%v3jHsc!4tBLo+QEg8sKvMZ?D(wk^diipmP1^-tNPV{`Zdj|6XMKOLDcS{JR*> zqpfjx9%2X=A_6+{&BICitJ+sJ${ZDi>Hb%>ZrgjGgyB{i%~5!tCD9~EF3~=Jf(bu^ z@%T6xoPAa6LA&T|9-oDnr!o5^aZMnrC&2*wbE2KB+fLhi{M~n73tkXJU@RR<9fQGO z0lzQ5s_nMDzlb<`Z-@gA#n7iACgDL_Gccr>4sS74wU@ubLesdGBDFWZGyD)$<4nuV zkV)I4fD?R+_Uza74k7TCS`>C9ctpQt9*ghggO>OH#oyk)`P;$kEYhfW7}x>c0C8&7 zaPZ=gUK|ntc{(umyMqJ&@WuQ02k-RZ@&7d4HR8ZaM1kjGKVZLesfrj=<`H~UZX1FV zK1s=Ba7aUKqs>)q20P*e>0Cq)f29XGi8N(%4a=ew^HL0Xht(+jIgN=sK|`f> z_^Os?jN#vQlx7obG|{D?5NgH(3G;mk3qKfN1ed9gi7(TVx!TqeFo(@XZ*x?yld zbh?z;K#pf_^gZvV6a(hYgK;!eiX6?M#I%{*CG`vHWRx2Vd#%oR^ifuy@(+jJa?3A(L0Z%}KYrHdq*?-;{Gd%jf; z6ld~8FeWbSU&70H=rkE|?7V&&ilUL77g83FtY4W{D6+MCe0BI3BASG-18H;~`f^sT z?c=<|dUNkBjgep7wXbDKL2;OSp5x28Pb$tWCL;Cp&FjPWPhP)=L3VrifYu$)eBy8Q zYB>B+yd1GvkX}v)1SI<9t1h{gj15$+?J52guj#RW9we04KtD4^Ook3yIihSh2>~2K zALI2aHl1I@rA_~grnzXqt+U?z)aS>I!p^iLyDzU(bREFeUeMbHsOV8NTWkol8EL=0 zMV!-d>Ux!pV3g6&+h$wFMd|cCquIe}IYqD5^7&=Uq3~s&7gc~x#woZ3eoLo<2OnqQ z0Qnbrk2k`PgYkmyE_oPGB|SkxlcE#(AO?9XPK=$q=QtxR`zW+DtZyl_Zt0_BDBRL; zv}Iyk%Rr)*fgr|cCPK8Voo(qm+Ojs%7{|3`Iut_zL%OojA^X!;W+90@aL^^$0uMgQ z090>TV@P;mvkXkyUjQil%K%kw6diKe9J0Snkmb6Y5X24|3~4+Ain;zSNNTu#r9*WD zjEDx(l$m?H8dJ_opfmqhCRRTU#sf|;lga)n(+a?cP}1|9XC+p?fc9ltV4$PpjI{7!ziqyvZm zfIxr0+=R#8ci&-r!>BHqCmVJkB#h$=4~Ltd0+wK2jJsoxegTFw9VF3gemxfHuQ1Lb zjdN`_DVtIet0a#isutkbB+u2o1QICSfH5rC3z8(hz;{@L3|QXB;w@ipY7qJpo7vs* z`4b!i`g0$B6uxC&f#6HjL=sWzn=DXkrpBXVEVvKt5syj#eU*R9gi-usyr(9BrpN}W zRq)^8&{&9Q#OskQ1Wk}M}@S>G%zk@Ak>08?}HnL@GSIgX*mbH*%UHAq`(ylb|29|YU5 z(tU3&oGwPWbam?*f6zp8*QJ6+cFn6EuhgG9AaX zIaj}iR?Nyu8;!>_@ybd(Lt!r}^cLv!9($ct0IiHKD}y>?mRu@Q!+gQjXvdfT)Mkrf zvz9QNv*sbC8ZJH@A>B?<0{x8PV*m*a2A73h*+5amKC+bpckeYUU5)2Nd8a*02d8@* zOpAxhno+nNl2y^^)VnYk{{yw*AnMWV=z42=DFi!|npLgB{%u`Rt@RVwd(JoV#? zsn7o2h2nwOCu9R*5zs+6h|~PnR-BrbN-zZKAIkn5sEwb4DcPe-Cvw^S=!vMtfVW$P+`?lAJebgSqvSdK^xG3 zRz=caP|Xj+F6DbQe#dv)M()`#Gth5?eM$9EwKaQS5N`-AF;oQjGNv1RE??eub zMKFg$8j8rBhUZ}tjJ;FfSFz%YGLwQ7m1Wl5>wLqXN=v`82cOxv)NTBKH~zoBPygS& zormA;?rr>kH~zmGBb%<9_MxKx@6PUSm;HYqK6=#aJ?z5y-|Osb@;}^_|8J(^d5XyJ z5^%wjTTi(@WsV3@jHM(`M0wiIDrqAM9BMg~jXB%r{E9=ua_R(rEK<0g#J=F2jmKy@ zalc6YdFp&4{9|0NgjZbl4iiA=(&S$cGl7!Cw?$Y}$?_#|fr}^UH2yW%_nz-|y0!4*AcSrQ^p8q6Q8F13gRY6$?8U>tbQC5v zoqP}iMwK8_3eH?1tfD|m6g)L60uZCJUHibyvP)%S=xXc+#jd3#EzeNS!aarl^UOTp6KZfqpkzCQ`&1iv_sqoFs94`Un#OVu+$ zxY?yvlGJf>H!P-W>$3H$McD#^d zVNsvligV<*jNCU}i~l&e^-xgA8}a1RCBz`r9HF$}qRifAGE-sHh261C7+9 z^GbZwd#{T*FQat+J!8vbbeVWD4=1S(rs-ll{(>MSr|1Bb?x-6VUS)rYZ&H(!i8ti{ z%FHWYpmZbO(k+J1f=ODs=o8>{=ge=UggQ;}uS711JJ8}Yz=fu_&ZS0+Yoy+T!!S7y zlcsk#50W|TZU{0^VlOTle}=WujBa4hnL#W91X2|ARZcu^Czt*@+w#w_cAU_4*)=L( zGztb`>C)ekkB_Gs%k|^)#?1S0bPPgb(`!(ErWT?Hqe$~JK!xXeZKDjPCYp^yHNgV# z)EPi*l2+KkHfK}-06c0}H45eOUWXT=ra;qsAbNb#BhCMnAAY)0T^a+~vg!RDLksxQ z7U!>K9zW=1RA{dZ?q!!z{@UR=WuBtB25PM(ZRCQqCdQ@3w|e2tC3DJDyuRR1|17+` zh?8NNO*zdt^lIis%ZPu2n4B(Us@J&FmViL%DTnck`7$--?JXU@Uc8Vhi=aNAZvk_X zvWf-dB`a7#YkGTYxFL-I6UAjK7NqlSi~`Ch@8iA~?etpSCH?Dt9stl}ZtR9#27&B$ zeU=0l3g8JIFX`zo-~69O!Sm%1S` z0-KL0uLLXYv(@2Mwt6aayz=U^UKLhvShY|riUwg01oO%u(7t^$AV8R|y7d*7ZR)85 z;McS@W zKeRtcLMjf&tF=K#L9o(KyZt^yZ{Z-hd`98e(7i!*C+3scFiOmd6DnPv@ib1`;q*L8 z;^~L_(>Ko!{->7*uMhoaPu@TApS^fjKjOkZFqm0mORmr=7sBP0r0|?CJ0e`Z`zA0N zqGCp_y}}dGkt$@(iV($_;4E&)c&fp~yeXrsm{ zPs}NEo1}@ij7PMYDi%&m$c*C9cXUiNa#0mulwO6vxq&_w`6ingt|<(lubg$ws%+ zX`0l7`m}=?rb%jy2>PS|{S;jp@c&sSeR8khVf&XD7<(F}dBYJh9s68Of-$UT0!Pb5 za_$L2FInZ&U^)bVsZKm=nJQ|0>BPL_gpwt>@J{79Kr}M9dsURIoYkH(7yVH~o;UIT zoA`ej|Np4_@Y@|sDYuFL-^Bl;4Eik{+={B)Y*cUc5(UfYX-!#u@ARE_U@ zov)jiPLn85;#%fJ>clha6#zBI2~M4%gzM2Fcb__k%?bYt>Q%b>o}T+J73vp z6S4WV+7?Qy&u-hP-g`#=~fRz{T506!fF4&f*q#*@XelB$>n2wSa#U2K? zAA{tbr7T-X|B_|D8Xj{wbCHjX-#00{n9_D*ZS$-L74L=iW`STaPSa}F#;O1j0i!`l z8egcty>xIIPJ*n%WfW49QQACnl)*SoMXJU!FiM124oQVNcLhlKK{qT7%>C@JoW(P8 zcOGVVX<(@Xv{MT&8Hy=ot)z!-nF)@pFd(jR^>egK%#!##0@^nTgDKD`OwFqkaHMiU z5&>!Qj>Gvy7*2VhSkYF@NvLXYk*x*wHsXp`Qx)0D%_9x%0EVGD$eejF%gN5$W2|1j zyyra`4#g^XwL&Pv>0}3A+Mba58p{UB^TJptOaUN4a~J){#uRZd)lv4{XK-IkL-`HZ zF8nIqdxOgX2-hdvPQ3Q{HH?rU!iRdWn4eN|?r^Y3qWLBM{^ep`KWgc%SU#SGh zuZu9bw3^Am#_>r+*~C#6#&6s0W)Z$k&>0DHeNTroa%nMJk$HjJ4I>H!(|}a)QIL3H@Te2yzBHk zt{-u0wZ|?^jFF<8kDbIgY(B!;EZ&}mV5h-394f?MgR7w9_?1H)Vde&nMlgf#VL&Ts zWY+$}S2@)(A0P5P?~gDV0E?(4s2_sGqK{6$u2pBD#r3g@Tes6}ww4XYCc{c2!jQcx zL!!?zd6gLh4nzc^7_A4R?dC^2GV;yyNd>4dkqw4yA#UUoq{3&ZHGC1Rx7MhrZ6fO5 zr)y+XClO{FpKR;h?AZ+BDR6f4ex{Wpc7!!^QYl9X=BpwpK|i zOf|@VTe(t^T&8eKO)(g163#LAdcdSpvC<7BVId}Yj~6GWCW+HNsM_~UGWQ|!z9!wE z@w9nl-+{P2aw@)@inUi_5qdOuqvKyP3R za4ht&wfAT|@#nCTJ{+kDZ~=8=7mV^?;>tHOPXCQRD`gi)YJ6|%kZZEE>p;7liSRSN zSK9OE=&8p>jF#u*8jOs*r-8;idt#7m26VF{P}LJ2-^|ntxIeXT>9&v52j7IER*Z>^ zvT7p@=TU-OKtB5Z&^x~L#DaWgtn{ju+@nulZHFI$w9mtaF(0Bc2QLqvzNZ84J%9J+ zl@O;u=1AvyVN?9yKYl!TcR=~k`uA!0``(k+&$#c4*IwhkT>br)ci#ZV{iXraW}7l+ zV7Rd>OFY%er0H{fW`*r!w~*`ZC+tC(rpR}T@vgsrPftT&_UNAEA-}R*X>Mll*bNRnXAbRbSANimfHBgAQihnClM?VH!`3g?Y(Z+H>dM9 zEaB|+!n`&VZssoJ-4uU)jyv#r#3Tm#%Jmn@05qGlOY*69SnT|5A=<3&n% zFg4(Jl>1zYp~d+j>8tooJ;>McA?Y(CHR}G#h7%V?UjO8}H8TB=yb*+aOSO!2OmW>5 zT#*7nk^fq|XsQiKT|oxbrrpO(xbBikWUb%PwM0CGm&3PQnI3@YW9=F?*j71v4pUm_ zNd}ZfXX~2;vzbx~EB4PhSLkHTQ_LP24e_&`Xa9IDHd9=%7Ngl@wyi8ocJpC4gO;|s zijIa)gz%h- zI%&bRj~NPNxf7OpaG@wqd5pzO!769Y4{qYr-R0`GkGBV_t?+nIyvE*oltsIy z#WC?3=#mRRnp__CdQ^s1xjoztw@25aG9)e$k3$C%H4OpW>ZDD9OC25c=czqaX~Hg~ z7B(FU>KQY^ol#|5KXyMJ*pI7_A6T7FH6qrfL0H;#8p~74x?U%Dx7tQwHB1}|&N`WA zsuhAJm~6XfcL zYaJOiC2fFk9L!RRcy)Zo+u~U`i7)a-<8U5&zb>M|*%-aqyuHpO^#Br6^n_25!{_3^ zY5q{k4}b4<+MT_MwdQr9hi%BrV^$J-4EuXQMQLmyCEOW#1o{&hxKLH3HSPR_Fgz6c zIEHHov@!teBt)+XZCcAz9@0yIHW8043$r-R5*b!6@ksnTuW6ac%FXhc3;bep3All$ zXHNV^Ag)HcF3{1@!GiiS#eoF7xb1NgJk0F?oyX3C!5I8Pqq79)l+6(REzkrb%XnV{ z>}Nu8k}K=I3d}7Py>bu988dW05&McU4@o6X62lcmI*Xj2#Vwo?D_hQzof-obn_pvl zU`8{zX;Y5|y2gQUkf0V1+TU{nS2#tWQ94HNDEYl|=zib#_H3GOI|o~|KC^jCr{Yzd zCqy5Ao&_I(N9B8$20O)tyq5y0qM@yt73a0!D^5RKc}aPL9L zi<9UCL%=oExCk(0FpW;8oDqcIgzOZXGc>kMCsr$0I zlF6mT2rY}$X)k8b0HgqMp_zUz1N37PrB5|U)+$zts{L@ks4wyGHqQtrvN5WwTb1Z} zINbL>g;#Z#@=pxsG!7FHWJ4zFy7ImeT5u|j?eaTg z)UFTeM~>xK^Xhv&Tn`^XRvpTtk~2VNim#_CP()=Yk+v2iAaLZxfJx^m-C1_H2*<-2 zOop7C6wY%Rx}{99oQCD7pp9g37B61Cee>@9lh^O}=_1z%HAP<-QHjX1s4E>MfpL(W zaL7#>>RXRTu=5SyV$HVq24prEb*x&V5(S%x7A?9!Rb(Lm4Lx1RBR3oKFYn&`^w#^~ zKf;Vc84(9e7;t)Lq3yQEQ%^}aMAmCc4LG@6HsZ?{e>;$?oO*vfM0XktWlGyBS4;R2 zr3v)X4NrsfC{7I6Y4`vQrXVfA($FnENJtZ`ZOoSUjMomc_$>sPDqFtZ=sDc;TkHvz z!s1djF`DZ!Mq%<|kK_3q1GP)<=*OykK|^ zY#1;T1x(UCJp`2zX+_PQ=DxF@SA!)bvyX%M(BUBCG#lwYqtPT>MH%?qblq4H2-5N6 z&Ol(1eb-c0g@B; zNs<$0lbC*@v=6YKOCN4IayyEcf(dUGE{K(z&6NXi%g3>&ewAB$ep-F&11+!qgAUyZ zCijtH7nB+aTUl<4&*H%XuNO0<@iYM89Cj9N=V$<=cN&u~z+f(B1C+F>j{?+M(P^Db zw{!}N8DfAx5+ruG2vJ-k|3pc~bsGSa=F)`<$Gf}a zHsg5AbMoz9baBrNV2bNI&F*iU-A$x{=vHX%}s463sZB& z+fShE&DemfL07%S4l^7V%aX@;)m3H3@0tsfW|sWGH*rM^WKur7O0vbz05=M}c6RP_ zSGAa@Ow0GlPRnO*?Uid0g?)1X%wLoG=i5=rQ*Kye0}ZGPRHaMOy|%g&R7hJa2*Fjk zz*Lq33)srWxB{S`6j?${NUUKtYR0Ij-9(m5*Wt-H zI*E+Hi0sZkEV@kK%HPWv9VMh;=J{cGmd^*sD{lW1J;aRgbpP`Qm8e{sNvPey@W%jRB0}vYfOq)d~AG zBMOE?K-FI7>r8!sMw~_J$r*TftD-0Ol9~haHoXhStY$ckW-U+u>;2^{Y>6wq zXhfO6hnI#-jvU(;a~Cj^s}i|M3sZsA%n(DVReRX6vnwEgNr-^A-`1+!YI!IAX!tS9 z2$6lyO3|s((sO=f{<#f=j8`T@)IU-ZfqZgLGa;x(Q_auY%WCGc3>W4KRWtBEiE%MS zfg|0qHG&r8t~xI3Rks>Qq12I%v?$3%Q3i?Jb3MO9(L(DjXE!^Kg|m^!Mj`j&fw$v* z<8@n}`JmTy_r}Te(XCSISHNu-%{)Zh8FeHDEkPz36;- ze*|jL82VbMyi4CELG7ncUcPLVEuhjyXcMOP=!I4~Dk5>;!h{>!Utn#!rF(4UJgK>l zg}R{NV-eRXmj?c?=uT_UZkcV>OeEnfnSa)k2No-y04Y_&Xyg=WCC9mU!AA$RbP|Q9td5RNGXN_iX+ed&gF6SGNDH+8WZcCRl5ZhtqvLIJ?##56` zE2m8~zyDaKeb<_pDDxPdQB@Pgq0#bT`WmRW&7StKR#$X#?X^5}$)ghPP4R5=IQCBA zJuu_un7|=_edX1y5gT$={ZGdHh-(K~g;vo2lr4l`G&P`F{3}I7`67p%K@3uRDcMWkp zIxZ|UfhRLP{S*(X(!xv5t>A_h`ci{`DsNiKGc|K8tZ~8Zb+QK9RmNi4v&Cd``HuD= zv?`TQMOFj|+)hy<^PAA`7M#jCYL%kj!v0Rr!4UhfL0OyJYmCVt5Di0AYD7~~5O}jR zTnytaOszMktZtH8dTDer!2(%RJ*!jOahcJmA4Y)B^G4LPbIXyOjw@(3HBcmC{g~%d zhexBin)D*ZAvYNZk~avqTM~s^>boFy#xwt{zt^z=YsU}x6U6+|3yad8b&$Tg-9CAW z%FHFT2sKCKdeTaInF!#YDbVSikH_F;dobJQ$|MZiUTkA$e*L8X(Eb|F1R zg*ZBes9yW+F4;)_@R`mx`TsZh|7HIFhmU%XI^AzK`TsZh|JgvhfE)Xqg0+Q3cE!a1 zmGb}h9`5aN{{NlbF8uDn`G3^gdAP~{fA{(SVZ%M%aB1XlhGQ7{>8-r?cG`PA>wRI; z`&!M-@1I^CCz13ZlpTJghs>oPOiw^yS}#9-ttP|0qfwS7j!Odp4}lU2<(oJUqA{n+ z%G_de(-pm?NOSP&$?Nwop88Mz{^Z5WCqKMA;BM;qvxB#X*-NZ->gXZ-V_(pi2T2kO zwz_x}GQieql#Sd^gHZ^=9!QjD4TSkVff{+mIr`!3qdPnF*=u2CyWrA?C&cW+GYs$q zKJYPxacX9kbiNgiMqJtp$0e-!2Y?uJhFzS70ItJ-dkJVj?u=&`Z_7?W<9LAFENomj z8eb~u5W5F+M3s5QS&R8;qCl=XA*sW}(wenA+itrUKZi-Yj_1!Y99{!td8fB1PNZl8 za&#fSIUQq)4*@;kmK$;{DD3gPjH&C>rlHZObjJPwHSNm{>(edo%_04;>(v=1GB~_{ zr?bmP!%<|`alU1&rLe%}^4cr$jZ)HMN_$orm<~81t#+2SKFeiK`)`I%nUw-wgX}s~ z!fiW`syQ)}h4Ml0n1h%66azFsy9ZcMePrs- zXoGQ@`Ly_7}tQr_?#=A#_5Tyoqjku4k@>+)Ym8y;LIG7jX*R-(A3nU@}(-7 zNH!?PSeS$%q}v=$QT~cIVU(%vxQpNy?6ewk!Y#+46sIuCCzs{O*j&Q*aEeTTSR)6~ z<}$Gsy)KQiWe8!KIP_=>I0i5Wsjo`vUMe+^l|IlFq>H6gTi(wWMBe6ahSV|1qHW9v z>1xPirx`58BOQere1S->Hic5p-ybRr$j|7rqf~es2t~JFi|!!G!-p`Ckg14gI@ril zROaW+V0TkU_Sl7Ef7iCw*jWqJw0YJ`vv`vCCvoE8$6zqWj$awP7H4d26LX*`7OD4< zBPo-(tH}uty8}_zvki#%-gy+@xAI7@lhJ5;4lD$QW>M|z)kr8|1NjY`NJ3bE7e63= zkYJvQHL)FITWNm#l|Or9M}-4K*Ek}42ChkT71EJS0MW*dgISLH*0u{N6%pHyPNr~A zr#dQB*?S&BoDIyPD$Em@q(=;?FSYr5BXXm~Z@*o-XxwT$oKeBUEUuNnGsHv7P^?G} zB`h=o@{LFHyg@2xRtrP3eLFpiX3?mDPW%OeJc{T+8c+N6L#I{(yDF^&`fR#{z<&UR zJy3+*OuJDi4H8*qx8dB4x5$}wA03@ZV zI_!sv03tjhJUl$yJ^ZmQf0NZ_)cQPayOr3 z#~a;~U%9d`k$or1{=?vVcGil$Qn-A45p2m$R+>-JP+*nrFlb$|^+0^C*V~$efFKNw zcAd;JAcKnq`9(;lbpF`pKc10{Y~qDP13Bxyns9SEOUbdbaTZ~wY%UQAOg6$?QsjPw z#5?Dz88$cvm&OV_44yVkOt~*-+hke3Z_K$ljy{FdkR2`1U&79)qR$||k95QVRD%{A zLPa*S^b#~^&Y0X+P!7|}e0y){4&kPkKQuZeMOIkkNU{S_;{pfp%@YwoBQv5p0wbVb zpo#mE9ta8!Iu+CqGqTD*dcE`e)LgBlutUzBwepC{mD1FyL=7!!&kvt|?5HD)0<|Df z{xU?{K1e9sPAtzeVschKj!CrSD9o)HF8cjA=&&}=n_uPAz!^#ibSRcK&l)c%feI=| z=Xu72E;OG8*A1(L;RLl52PQ1b<{COFsAr+pB8@V;UW@40F^J}^p?P_?NT+vXY-z=^ zELdOEE}EaZD??$4T_Ayveu=R`8rYi%)&Y8gJZkK3`E^aHmQH7*9aBZ#s(Gv0X6iR$ z9oq4$RBcwhAJ0IC`50Ld-^6}JZrzGBl`{1>x?0+~F7u5@6LOOE+HS-gRWFW0CafHF zsb9t98%X+?NjrzmaEXgq*@l(h0mJ@>)4r|qAP$0c2EH_<|G`9^{y`X@ekm)hY3QkC z?Jh(Gr4#XP#z3D8{X$=}!d{>}cc?#-T_8;oOD_-=0<&4OzhvqzX`-t#uN)GeKh z!LgnWJXIRvNDOVHx}k~mgJY*oXM>FOuxH{m!2FL}J)+s53ke!?*6Z@N%zQOuqfd)u z-k;28OSiXifw>^a!;7Wn)YJdOFT;PnwxvMYad~!NRc&!wwE22~6H6#_PO2(6Q6?BL zbsKczIYu0F^1u^0?iC1fLIKaYu#vd>#gg*PO!)x+ZWk79Hn4cCaCQ=yQ7W#zUL}ku zC~J`&3#vO2m;M1i9oCT?6a*&46HLkNLIsE~@K+0Bgi;4uwt#2D^?p?XT9BQbop@^Z zzOtK>4Vt7Wf#}gH0d5lZiEtqC2~d`+NYir4XgO#8ReEmFo11l{@#o2FTQtzcgiVdZ zGDL_2(iVxSw60WegX#PvpRwQdUhtWEs&ps~QMD1+bx0%>hLrq2498J23B!f}s2=y= zYCaCR$an}C+W3eyq&G6M%G}5Q+{gd?(&K-2zT4^bwzlu%f9~UdNW!EGAB4{t`widd z{1?anZ1;AzA5;9#W_P>S?PC1TfY$SBUyhSCpuMN1qe8NwVEE>6slU4LkN;nyf%MpOkB+L{{6 zGokoO?MW8qYcSF~DDbIPxh0dRD}WU(m&IW|TDDU>XV1II_T^X9AP?_G zqka#Zs_JB)0(Fp&CerciJT^?&Wmv;Rr8nA_+z1ZhLF)#TPqQ!@%+kr#xDe~ma043#q9I3} z;hlRdQY}T`kReb=NB1YYcK{905HH z!l<14?ggq99{+`41Cf{aYJg=FyQn(k(ao6e7`{vp+mtYip5?P*bSnXxBqP~3F#6sB zBPOTPa9D#C>y`#Hmb)-4V#Wsio3P``of=lK<53)uFWP(-B_sT!&XyQ$&=^Zuksig9 z;cf3jIGrUQ(ZhEf&o9zpSjKM7XVCzkeLdbR4FRSUQ9MKm>-k4i(<)hyR_2ywJ=C=p z>oEqoQ`WOli}h^OXFVGZ;~ShVW06Y7{rM1^iRiN$C2B2{=Jxu3g$K$$M;i2m-7{jFse{3n|v8h>h7)F5Vq z=?;V61zR4h{K+HrGvN9cK8;LYVF6$mtG|`Q?j$wn<_!IX`8uIUql$}!>n##H#JUlR zLCPqbY$LgKzNk`cw(KHw#_*Fk9>zm15QJV~?Ab-WVL)n_m598^nLhG3R3ntoB*0(5 zc!P3#qihqKq^P^bOG-Qs`3F@g`U2~pq{9W4+mIk(jI+5ZGAykx&_&Q%npdf7$ylGb zRdgd7K;}SZz#aO?V|%!R*`ptH-CGBZ+iwNidS{rnefLj^A>4Nl{k%xAPS5mW7E$Ru z!C!|ZCRpwga2IWGko|7?D6H$NVBNP7Vui6iE$xCNPko7v)*J#EkoCFvbQsr9J;af} z79L&W7E<%i?&j_`#mz#?%`jb@j^YhoK$$Vyf|j>hO@>QiWdF_cFN+BrFsiiK+f6-} z$$ZImzIprp%dQg#q*Mvj>r_qi>x`piN&L;h{uicg8nNb<>bAMHUM8dAAes&9EmO8u zWhw8N36d@8oAMQMlcGY2%Wb2m1*SH#`EOcxy_x@}dJq4N+B)-ce2psVPavcHk@v!P zDUuqyLE+PNl(YF|8gj|mYPBMTK9{Kh`>wPvdb2>!wo#d$Os=pvnH8Uu^dNWp@KpK6 zlAnZoVlNW-?=-^tG%;Y0>kTS%Qt{sDjvK0@VTapNPNe(2`TB!B%uh`EZSfFebg_+&*o~`^st)wM(ZP6$ z!`9YEevHFpU;K?djiD)t@N+sn$Ou`Fh0c@I}jzGGnlXK z22Q`@8FiNH51Li2d3sn!;yrz}{+WKdUN@MyYPJ`A=0C1;(#*l|oStqCF$TY)Ct_oB zg|i)?qWHZ8tv{hEeOhJeB3qGaEsS2KQE?T6VsAMi*AZ;=oP1Wr&83qOT^lJHf|)6;)uvoJNh@m2FBul|#g0k_;$yUk!5jf!mrG zx$Twz=l|jV_<#T1|NUS7&;RYe{9pgJZ1Hj`%1nj{!k55;OE%WHC3N!g*F;bq?l^_2u)tW7s~PHrGkxN!tXDlIp#cxhuIrSqWFqgZ!yAnS zEpEs@QtV%`+^(pX3$ZIXsMS7`?70UEPsv%QO>UCyVkb$$GLKtg8z^d^RDao^1~O}S zoQZWC9%r|RO=J8Tj!!v!E(o8)mGHT!!sl=WeAaNI5i8d%Zxf%!_%(b$e$n_Gq~lYp zW(}MeO-biT2{(57vQGkq&ns<7Ls|CO^fg*+&vJ}4w*&SZEbb6~>=hUnsf*=R4Q<;l zw`d0+d!=sGI6xWh?aj0KzTS?4V4Ih~xzA_L%c$|CSAU9UUXDJCLAnJcXfza&Z1E>b z$y&U(*kglwC>t~PsJD-H+JnTGynt>vo#GV@4#^;wOIh=$(<_j&azC@ZZ(|Z)Zm_+> z`5o;Uhn%T2_ZvVH-w45(152*tMT6tNha1yj`tRz4@{~03{(;wozXWvAS}s zH(6J!Fb$Sko%TTSxI+o|tYJ~G6IRYd855^f5$|L)6^?n)IVzz1~@#AQ`U{R>iXs_2KVi zr|rs@BI?B78J((}>HZSW(h?oXLHwrnti;QC^&Q;2p+nui8r{oQmVFEQQ=`h$t_qJ> ze7!aryBdv#eenk9>@H_ceCm~D#GJ?$h}v-DX~Pq{;bIDam0|X2MSJlar`JL--?I%(=DWdZ%Q8`d3bwPZ_4Snt;qj zV){hSTRKK~B-my{JRTIh*S0EthecJi5O0Wf1FwNdO;aRpluiqk`=;$h+RSj{Yz(cGoO1HjUbttCHQ&a4y z*oDgdbX+ziMusdD)ZEK;cA|3XpV7wBzOLWeX8zG~0(>C5tbuhV(!Esoz3FGd~{WCkof3ee?B|6b#&yy0dfVM(hREG;M^)P zC@wX5t;{cGaSXTo$9R-g;&2$p)6rGYRbt;IEeiH7x>1^DJU%UtyJazzPo>W6`*PcS zN}9rDc-4f94g`#X9A1?-l5it$Hd>enu17MWO!U@6%9_xNJ6>DWP+EQ{m~zsDa-aX} zKL1a3{;$V7kGHm;bno+j-RJ)j2Gqd(|vz3nYH z|GV3dH}CU*eewKXYQv4kjPxuRMYD7G>S}zNjxtpmMcdw-q+Cu)E|lfwXbcz4jtGt? zbJuc}f=osHP|mv`WSv~E>MzmnjD^)0LvDewh#Xy3WJsaNH|nDBR+80)>k7$2L&cSa z+Clm;o{gd@elnz!qDn=U$yg@#?qg*$D(OxulJ2G|KkCEa{RN1%@PCn5fo#IRe+nqP zjnZBRln2bPLVOCo4Xz%nsq$!)1zRAX9~|@EQ~vM%x;_4SJNS@HhW&Np3o>5!o85M> z(`s4o69pBAPOrocHC2jlF?u^#C#%Zz3gE7D+V6D>KgQbuLvM*dKBocyc)WSi8OQSo zUmB0s#f#t7Iz>LO^O)=Px(bTII&q@6jhAJhAO;1L;{NMw5|Ok_NM4s#;ymztDh)g< zvNut4R(=j_{t;2p191vL30VC+=;P44`Dx%)c}~MC%yZtgKJ~0mTh^y->(h?)>9O@` z*ZTCG_34Q{`lkKSo=w-DPS>7K*Pc*!N7o{(c}{K2l%Lb?cXxkK&El`xtw1YcJY?y? z{yhii+E#ioRjV!!%_-1Fp?;+>^RQNN3O+^a^#J7fg|k1)Kq==JCH;LYgy0Aw$T5vm z&{v1i>`K44zwVIF;`#KzGkfUSR18 z0U2*#ij>WSKB+KG;O-#fKVbCks zneA>vymJ*P;2A0&Nj)P~=u*gMWE2ppQtHvkc!+|jXN%Jfp7<|bwGtsk={RO5RRsWr z`shO(Bpgv8>s8BpV#R)2EB4!7vER;${T{E_Z+FFh->um1$%_5Do6B~*E1h-gO9;Y0 za$(J)^UF~^=(9^zH>^DF0agvx9%-n zcgF~}zXT(o8iHawsT5^~1vK$R!ZtvOCJ)TAkq#H|6!A2izRkuPLDyHo^I`A?IIqnF z==^~EbcV%H2a6dgu-pQUP8Kx9#60CPMe7jSNu*`R$w|TCb9+xjLkeF`9Lc*RIKTdrkPWiG zb8`j(OzTRqg(c1!_N!E0zod0o={&Vf$5r7w-6Wo)F?j~mLLmN)e_-#2n53ImHPK1~dk=v<+Hn_dI2 z&>VdkbwKOz9P#NcCo=+XFL1qeLMhNc48b6Tc zhh!=OH)wTzs$hI?2)H){+#3RJhn4d=jz4nlDfbB9%ImdpeuE}@Cyy~tVrvC1zixn9 zSB*RQd!vLkn;=d&Ecf31^1lA_z5n+w$N&3D@5y)Fd;jly|8MF4eV%|oeRY%C&(;3l zo7+2^+vNYf-P`It?xFwp4xRsd|L-sC|9vPn{AiB0zi9+Q8AUk>-DZt_wnGr7Ffus| zrH^(rnWS@yp36iV>^n+MWrKI{an1Gul#`&yG)TO(hRFuRPVoO=jI z+M@wl!N%_~zr>#Dh610dNvnfhs7Ffx5qUi37o3G{Lb>8ZW#;e_#KiP6YtF~hP!G`t z;S2=9EBIyrUx`TV*Yw>x{Ep`ECY{84Qki@^34UT&ZN@0Z%|){$ORbCmIE9U5k^v?X zGzJsC$?D5kD8W;d1@iAT>SD~XlY#s-2OQ|LJc{O+hol`eva4(!j~i{!%f_d}XfJ4p znKoJsXn0C%#}X(44Z)+Jp+F5!d5T%Om<{4^mZo#3>FcX^v-F=qQ59>MAhYqbqaWg| z!NZa^U~PL-%%m^=+rmX7A_Sgk7Wh$M1v9hoz)fRvZDRsrg;p8@hHwUrNW|xkI9$`9 zHv?X)Rgk7M$;crB>A>IOxM6^4kgWm<)%X$?lBJ`MK&`C~)#WWBTS+m={0d0;b_w@~ zN%EW`&DD2p_Xc60K1!{7qU{)4jw z1wQ?-n3k}-AXTjLuax`gR>MHv+tfKO|`YHjIW|G(G&rT)L$+uD87-MZKR@AZG7|IZ>cdxxjd z-~;-8tY`=**Z=vjqyE3M^>}-GtBdj9o15GB`u`Ww|6ixDyTc$GrytDv6zD!KDIic3 zP}>Y3z5^*s{yD#z#u@j;7Xf;R)nVc4tDK!+&B*9#6C}{>c{BjImrAt5wVuqf=5ms1 zQ53pbxtI)(6i~WC8grZu7o(Vk2!2z3z8H_Mp3TxMJD4EEnU=c9W&t8dG7m$M90y-{ z_?>(Yb-#A-4T?sX4VFF!|3Z@lSqA^(Oj;=q0I8z^L1X5RuJ2}CpwUae#Nz+-3?NquSK)>idUra|a_lJK@PH1_jh<|!;-oubh z3>oUt%BaiPD2?VPdj-?#fV>4e-u&F4VH!M6lRmUSn%A%!1iY!gXkE9QjHYB6I^OK; z5TS?ophbXAiZuvp*I~TS+-OiDa+}|(4eHt9+oPlK;LWqQ&kqj6{U7&VzS@8K>HvV! zqkH`+F{y@B2m*Q&;j9T`n4spqO}-s-#)qO&Qxj3YF~oIk&^HZ9p#8>Rx@hpk_~=p5 zUGCkXZ^T(VZZ>|VYE0b*Y?A!YYqU;GYCvVc)M*9CzC69K^|SO-6F00Kz&|IgQl^kj zCZEFSW0Z`d(`1Bpkpb2ax@E@Dp|=`YbHo&38n_$p;qPXH zei7K^wg_&9E+%xu^F55;mF~(Nr|{ik?C-?C*#q%+osjKIAsu-N_K9X6koQdi#h`uD zvB?&vOhjVsStk!EOv#zEaP*Enb#MR9+r#%%N$^EF0B%5$zdIra`2o8VC1)Z>9E`wB zs%fo$4*1Vd*+*A<-z3Zi*68LBo&Ci;rTn`^8%?OcdxdsZ@#fVo}y`aF&lYg5xe09*$v>@*8&4x_+?#?fYQZ$FQ6O0 zsAZYRBWDMlvkRjM9wq1yUVF>vyp~Qa&M1K3htn z&iq5kn$!?PyaqEO()02!`o&^g^i7B90J<1dQS)#z$hHYCFG3hJ+C&-#nEl2F=mppG z#{^K_Z)9-sqDSB;y=+(qR1v>yOJ&;3=4-Ck6C;>oM$I3^4;b?#cKDdmy{0qqh{q2r zji+b>`@Kj&(etx|~N$qm%&|Y7cW2*cwsA z6-eisZ^93k(d-;J3G=v0l}Bnj&S8Vqh@rwUuS)79#zIdBIuEqxQx5pB9VM9J6l;o6 ztzqfz*KdFjYD-#Tf4s+ZFu0VzTZdJA*D>Zr{%u(dBUwPYaVoSQf+&X{DF6< zj1>f-8x$OGuNDAuf6Fh|qey>mjJvo0-`oGcuKj(R-kSb?UlEqmQW$y#DgtxSSZ%zzA1P_Vg_JZVm0@CWSgnnYq zp@>#Mt)WaAQVn#cpOFh*-i zJb|yoOwGrQrmN)mRBSl9_JVHq7DIco`eRH(tPB)b)FB7)hGTs$!Z{x@8|f9W3bsIO zQNO`+ZJ2M6_CrtQHnEnRg#&y(3S)jh(j%?3J-?AhR9FQvF8ig^n#b38Lv?0F&V|++ z>HQ0-vUd0nO%S{~_USxf<4B1fU zx>Wj`zit|Q=X{i&!UfT% zH;iDE4)h0@+do{>Eq8f+TSQ|}P z%~eiQ&rnRSReZ)JYVa&6C--3(uZ>$olPYYx5RS)Zr`rZ|B6ZlR}-dW_n2cI!+nvRJ{|%ZXq{6TSyu= zmWAs1sNBT;Rw_CynPR)GMk9g;nv(s3Qjt^mF8{C!uwO(p)ZnEg^qjk!mVGW z<%V(!e2za2stnRpXU58T`q0WIRwU%rL(Mb8b&YAER-^1!XwdqGHrOd z0>;50O#9G03|>wKqs35W`o---c7bV=0q9~DGm{i`5YS_U>X62>0Z7cljt@1TbG0%I zHbk5pb~??P@*HosjKdDh>oI(h2jQ?|f&<5BSupl8SDMcCT3sm~f_A?lc@eL3HXfWchM$w+r|U+GtLO|r={h<)t&pByE;^ z^Hk$cQ&4Rv4DlFSJqnumpKr-ww*$X6H|RodCaqS>S8s?1Z=KZf9+86~XC3vs@y6qJ z5S?c9SL;3m;XeN7KK|#+#{WF-ZuOpQ-RJ+lkN=VJKbWgFJfEct%5}JE8u0S?pPk*! z-Y&)ebbFhRw>B~V_v4-2`}|*DEdLjluHPWrypjGZi=!Fn9wMW$qybe7KEiOI4wfO| z@Sy=#588XM*6m={;JE>ax`ISBPDas8&{Sp=mG5ER|k*3Yo z9#tf22rC#>r|fq(WmO=d560ZRv2qwkG1`z?qFLh3^ud}+j7P_jb{F_ zw%xIyr5fvmDc(ywV&BChZgO@Ob7@Z=wFWgE3c}&aRQPl%)R@!7JWv3oSH<1Y)XgZy zMC@|(kcae_9HiPCfcTTRH+F+Dy2@V2`e3*jaPM3hJHkM3wjodMY(tiS0p zlCksyqi=oX8V~RXVQy9R5s31rOOVoghFzIa6`t@;594|s6cm`CaG%_i(Y;gMm}<(f z$P2X3c*_+ZQ5OkT=jP|m@esz{NSvDGJPb_RL$sp$UBL-Jp{CKQj{qL,a?7{0h& zOO&?v7uwi7>2PBCexN2-(l%1ETsRWKTvmW>nsXK=LzE5&&_OadxeUU9cu%l}8D|sn z{7rzO_FVNzlvfeWc4k=kd+MVo_q=c+(^>B6^!qZm~fi4?#?%Ua1Ktym27SZy)ZP48Wd z0$seAe8p>7C^!o*lJg5`;%Q@ZTv6`TRp zdzC$|bKA-yBu88Wg#7OV=XaCho}qI*myOnH6jGZ!$V0pOt2-U1{3~Ed%^s0NQ*+vw zYzEpI!yV4G7vx}BkXxH~Eamq0v_a~bGDy8=Q_>fgZpBOt0k^`TC!;G6P9my29{;=mHeib-9$qb_?Y^ml-9Rz6 z^rjnVVRhiRN->tJD1b$`Kb3JF8S1DW!740W#rCTBogYD!5E1>rxoLbPggk>}Xg&WFR7IKveO3W-JNj91XY%IPO%@ z{5uw+eo^PwZ=4i|4zWQ8E@;|IYb|@cx4$bNrpLpOeLgBU*$)GoYtr<(K~rr{bee*o z>udrxf?<5;`1Y7ax0_=dtjIBaq54J?DfmLrq$Tre+l*%VaB9i8`)6~^7xwKb0~0{F zTqI1EKy#%?eEGoj;Ui5JpdTqFj&c@RPgDFco+W31OB~A9LxQHIKeU$TTr*3Y&NC{5 zNmU5wOR%)!F?_u&tHvph?Jz~+0tO8TYX!OjdD!BQXhWqg2IX3}304Ebt7Jo(UIn$? z+Fk$TmUu`Ih?N-8{JYtjF# z$dGh9Y@aeL-9%n<0j#ol4%;&^0h}trdsbs1IK7h33w5}XsZ5!i#L;L>tK~BG7q&vL zvfpfW&gSIy`^ERYaYHG672(|Ij`-kf`=^U!&XK>Qyxb}wkS>s46LGE4TCc zN6Xyyx2v8x%jiSDn5kKNsUrZLc;Y^TE@#B4m}NcQ`Y?FzzPjQ)o+{UrIyk=L(^~q@ z8FF=rEX)?Cd>i{Vu@r>+cvI^sRGH_5e}ok8)HIgc%m+@?K;5}J=h-i#8aJi;0Ksb^ zIYDHd>pFT-VD(3d{RzpNsFv|;khg=FPr4SXT6rb-A+_gz-6i^!9qc$?I3eG@1!vLG`@*Ex43coor)Uc>ii#5Oujm0Y|8{7tiXIX5 zgQi?y$wRYja_j)2LTg)OQ)EAo5=Vk`mYgTZ)U2IycIK^Uq$xu`T&Sl^6CI4;Rrpc< z>Cxm+`Cm21$^YiDJb7BlHT6F?6f!=C!i|4T0 ze5~sF2+=TpX_~T-!L|DweAI(uq3oo{FG8v=8P}vZ6=lT3VVn)($*|zcd_h#{FbsT6 zBPb|{u=Q9&S^bV2rI*FFO{%O|z4d4fFE+H|uXUh|lVLFl!HG`y{b7RBNQ5+%8w->gu^65!O5%OJ?F;G;JN!6CnCYv=T3~OyjBH> zrjI#Eos=n7o>-ZMv%EPjL|Y{K{0>}&ig-e)ILmx7ybA}ZKfMg7H?CX->eX7gwK{n` z>f-`CRIO{U$mZ!-<73H%ad*A53eKtvYzO&0Lg{O#a^=2l-KknWCKN$45!_6`f@&`w zt3oj^z(+R?03TIPF)d2H<=aes7*BzbZ#k?UQ?%f4nAc|8`E3N88ss+2WS7h^#K``6 zp3YG*#YXU>`)k69NkR_Z5!knzTwI;bk|AX|@cr!BJx_z&R`%KjCPZgf2_KGCxE{$b zOD1=%=RMx@L?8wS7_xZvMRdLV=sdVj(eunS=9zM~3*WuLhU;*gOp-B>oX0HUJ6a+f z&@&(%EddS0gP#`(Ix3K!@7Ti8JF@QqQ5@{xeGWF=9SdU+$kEaA<1P1u6xmF?&A2Lc zMI$4wPPg-;9*v5(gm+|VOQG66^bX=ZZwjyryRK?xhh;h*sRCDd4rc`(j;}%)F4QRJ zFe`2MZsmTX-EXsew;g}mg1yFR*{i8`80Wl$@~rr_{T;5@j31K6%6sVTz4s(@Jpko` z7;9^f{C%)?9kUk9?`rRphiLNWv2=B;_?0b>o}Yd>>$8u+GK-wRUz7M;$81zA6xwgd z_Fr~YlB;f*CWP`8#I_;cV zYq%Isz3L4cNpPmZ$Wq9pxTR}H@i$Uy;Fca$H-LuRZOuYHs%)W@g zWX=S=MLqNI$xhY+gw(`$sxN4fF4;0YT!e{AHJm+-RR3aa1=Ia_bNrL7+z2~`Ha9@u5z#tk@1SlbwK2pKE>(gCy5clCFu*Kd}$`F%gw*`#Bd#hZd(P)~C?-7F3l8IGv~iivV5vugcmhl;{aSJYPUR<@iK`dGelA87NlBHc#B>)_ zhbz1IOVj`{_^xn;(FXqW8dvpp<10veLJog3(whBV%NkmZj7K_DX)ztq28MVKhr@Ut4~k5%M|@z>L5f?((qTvW41=#~hu3-2_00d}U#ojMV3(EO2HYdTwHoNlkiILYK=3hT%oorB0Y zTXm@2@9xT_&?|ANnPjV6&@0PaPCUZ1Xd(n(j87kVlYFsXh{z6PjDg=>2mj*F7*sKr zd>$cV6d1-!qJDxZ*#7I>xgRH=;vwwWDE^4bdYNFU?@#I!s7~sb$$J&Fa5~*0?Qy*$ z`7rncXrKmE=7<|2y~h}fa#Wu#3~VfoNeAW+Xvu2cQ$)HS9sApNOTc^>ypD&-Vr;By z1O0*K2|7mCS`*!s<=($y0wK9#duQ<~y|kt8)~d=$P3U+jc5OOWA*m_UBAz{zMb0kx3(O7i!fH2_|PLAa6Wr^-@>-9$)Id zd*UBMf+{t`-xo%>um5si|K+c&{>$#}lgE!YpWN4fxv&4?m`GMG{Zgm?OK%qrJJx^M z-G03Lc(V)Ve`jZF=f3{S7pniFJxmVcL4u+fnzy3Khu{UA1JU z_C)s=MnTpVCJBk?5T_(gS=%U}s3Jz`yLogj9T~E8F&hAMEQzgcrh4aa<9Ofp@-9UF zajiu7Yu=plJZdQLCp5gAf`_!g&t9A&o_h%cOh!Acqitz0k7l=2E z;Ad4hi4yDA=L*;w{8Xw#9JApr5qMh9Zl%GmA zcNqXmqHNZY#>xsFV7%t2w}GMd$`dKjL{RC6{^L!HCBQZ&Tt!SgA@12Xn(x|Lg;Ct? z?a-!$S=6ae0|10f_9$q62)-rk19=6a>hEJL`rB;<|N7tjyMW&u4SvaGdqW9`yFBjZ z4A7@pQ&9ys-f()ig5L%7?&a*a2pknMQ_)6?_)yU*h|bQi?10{WpM()cx3^iRtAnb$ z@nK}JDmAWk#{WR4S8gXIfkzlRPD7x_gudC?HQ;V`wysyAF9EtQW!pT#IpHKU@*|+G zZ-KVH#ewAqBBMu-yR|5;2~fCu>$WM6ml8_3y_C@?!kfif(5HOc*SKG*E%9&)0s5n!Q*ZEzv30;MfHt0_-|2=b|;zJ3lOKZseTE?Ig+C(=_Aq zF)TP`hIH5*rI#R6P_w?!55%kI-I;B03uwgdjrHe1!Rp;umDaGfzK~hRBS%psu_j}u zwWnnGt`wg?3Wx|+-{ z;(0PS6m}cOfXS9c*+^S=KpgqV^&r^#Lpt6h&N7cVa~jW(*2Cz07RTdwGB3T{FX9Oo zJ`lGiVI^TDRywzHgox|?ZFY!Y<)`rf1KZ^ETQ@Z=y}lwat`lKsGpuz6YcxR~rSQu%Ul5v*0IE2P2lE6p zj~W)>$Zr7_H~APO+?lw^l^Ga7bD1)Heac5OZq-mHZG{(D4TUFI0A^4jo2+_3;4_a4U+^E-6|W9kmW8cr z0@&<4zP@b(TcuOPK?K4yA5VIhr07m|T0xr}mwMr+L|q9+jN`e%9~%!Cn@_$An!LKU zs%g$$TOM&?%j~-SL=d3wvhSKLyN`n=A+&~{hI}Uek!huQVLlwSEUqLkUqeBEBkk@_ zU-x$CS;ymFHl zgkwVYq@&*G(NW&u6TPv+AMjuJ!Z<~U3RS+|6%j4Bup!m3$|H?4*tL9t!7vnN1TaAS zuFuBnv&I|eaU}Sxn_VNzq$C%J6n7^;GMcm9aBvYaWyqQM_hF3SZM-!Z2+k|f#o~{!zJgxW6oc_p=5LMPzguwBPJeXI_N8pg3$;z_G`QpaMwCo<1% zz3oXRXUQa)UwQWErTFR2HVVf6svBhv&{vIWp$+3su}ZB6ZTq9q24&EYD;J^Jw}3YU z`|HFPF@FwQ&tvSrM!tXRM7EiG>i%`0N{Nd_=z1gNky}UEhV34(fpvI(@kpb4kddW- z^{K*?sW%{=7VBG^kvQdaY?&={*clG1Jc$gQBo`-ACY3+;Gg8SnW<0yL9<8_qG9|@5ql!>|iaX@bnD<|+T=rrI;v zSXMUr{BTCSjC@}+TfOixoD3R;4L>B4A*Z#cQWUp!E_1v(NAcyp5xSi!(LwHZh0V-5 zoYpoJcTq)S>4X_OQZgB*8c4}}tdbxl5S_Xpddn*N;MS@1{k(gota3!Ho?t(P*JokY{J}wZvq;nxUaq^#FuPR!Ma$rdMQ8Fe1*>3Y>HP~>9b@sEM9Flw@|ex z(kJM>QNdAns7d1obOIafy^d$^6Pg_@b}s6YAFP zIpA46UyR3BJgs=P=M;cDmlbjb8-e-noNM#On|3SE6nOP^E4%Xj1BPg z(H|#yVGJ;iS12fi-u?}WrU%*VSiPgP{SAldpj)$D58ILY(Dk;v&V3^cQO9Ds$MX}` zr%00GmEAJSq2(;iV4m0zdWC)tBG$g|&|g#FyB#PtW6?~r)kgoH@I&8o043CFeq*-O z$6tP1%ry%7$>`5QR#6MgAO*UPjWblB%a?*HWGFk79KFXbSj0*3*)VQ`b}AAc{}jicFzj;Mw?Oa2l!8y{4a?Bgvs>yPogZ*HEn zY`5}vvVbPqTNHz~zBQ`bDt{22vh71a!}#3E#SB6bK_M~;3l1xO^yn2IbX8AJ@WnVf zpCt3eF#h#~(O|@aTy9DDr;INCK*7bi_)Z;uEMVEq>I8QhVVyP=91PFnXK>@{x4Z-S z7HRLkXP0%3Q*wX8P$_i9lzK;j@+coi@yT7MxMiO%bPF&8%D!k*>6HzOG1l zS&iP<<9OLF>OwN$+%b0CAb`_V1=wN1d5YH_zOlFqW`{IH^KEm2cJMb1`nqkvECc}8S z#^o;0w0ZHD%_UZfEDy?L-7q zb;B}y-2P&I@j9AB=kaWLwR!ui+2sngo$X(gxox|(iVt>-{(HUp~wAzr2X%NMgoRfpGq=e)pI& zL(DJWALNNkbiE3ODO_nU()4LG3kI_&yKsBIe)*?^!bM|!7O^`>can6<`U}%w%VwZv zeov>ENt?m8wZ3Wb)92J!@@lsJ(%a!=&g>I=W5&|vyQ}#{I(hW|Y8oF6X32Dp7J+1b zWiwM*MKYgvv`HW6a*|V7Eot+8{_p$z-zxuix4XH$yK|ra`#%3SOOk_&cpQa;Q5;Pc zQ&3*cXB=;^WH#{f{NKIZ$GuI;|GmAtyR`!cf3vr_wYz(t|N9H&|6UWCrr6zJb8}}e zIHJ7|#NN|gwwU0J{s`-$PKMD$`M$_Z;ByonEHFW~7+_r&^l!ifY2)-G8eqGfAOqki zUnq|>bovL~YzTKF+?k_j0Ae_3?NlaG{?uCQb%auob>``ZcoGiM1x%#LQ){hl>EDIy zK8G6?LCQIks)N1@t+nkAmS7zwv#bOE;0;SP$22rEA$j)mD4EAw^5Z-nPq8)U8AqLC zl$^@6;_qrCFUQOE_I+zvK=A7HlBZ0=JoRnrh>7Jl3g=V|zHr_h1QCJ7%o0BjJENUQcaQ*msrz;@?- zjO&hOSI=p~(%DtB1yWKlA5Q_1&V%=9$Ob*yk0v+4e31=CcxQNOI~-9uGiYw@KvRG* zlswB|3kVa~1cF{r(ZHriF6%7Iwzv+`$ppp{WQE>96IhTQ&28J6rI$kIyfXS7rk8fp z#Vn&ZVCuu4nz^|j#fk<##xqLl1_Jc92T~{g6b}~jxY>Ajxc~j@eU`9gr+%>As78z* z9i@Xc8o3wxXNL#-?+=3a`%hmT1md829RNW5fBEM9!S@G;!MnqkulEoCDFAiJ?<_#Z zj`s(DdLO)b3;+Mat5^PZNU=;!)cA)tFaP+%K}DZYG&x^j9a8M#Zz6rbid&Fu7QoVr z2Eq4OW`bhTxn?#k0cxEcTNd|%XWj|+1!n!t>C_s#P_bo5A~tG*=LawLe|Ysi*z|X? zZh@M8fbY)3#dJuNjr(->_~#4m7VuA(tI^@`;Kjk=!JB6XM~uB@GHeBJ-vA0;9l-F< z_K%+JKR>wHNg}tw8tn{UM=?ErJLTzE+R79bN*kI|1J3g9chePw?qFX9ySmBtV&Y)9 zc1t*wlHZD(Oc%3hnw2rc(P#_;MLH^JB5G2`!w^J^FLL0tWP0PB5um(G@6Oj@$?F@N6*1B1Z{ydCSXF3KwmbA=-wh&+6!C13Vc?PR#gq)0RDD9|2+h_WC`4pX@AkH~cOLJ4_k?{hZTvVM z2!inX5)F8z-3@-Gk$LxK$@w`ZF_GGOUN`P&5cwf~ zQn1F}pF(wivJs96x?9~Z?A+Q9g-Il4QjaKI@o)0cC4}mv} zjsfp@lb%E^wZC)p><9^=C`g0kmYdBl!Jos!+;JQ(^ z=`uArnNti+dXSD5;|Uj_U~|gQ>DTRuu6d4tuWgg{Ir}B4tBygI*c4o>&!ba3?vt~$ znH!_ka7mQs+YdhDvb*eLKO^Fl=a{xF8jYH*Yi}^EoT++X7C_q=1X*ZQ)Gnw3PYr*h zF7w7l6xU>TtC*nnRm^KJH=q{VyDc>`7r7;pw~pH zap5-nP)!?7DT22<^8O*#hHvEO_?!-R9DU&_duU&8;ykL$2=AiYAyuk}Z#fEt$VVX( z(j3XJ72oyDrBMd(SaBr|$F_y4@}83W=$l$x zBFgyXQaodpK2bo=K=u?>#4aJyR?rwN=PpbtSw4>F(=_Y$XyD z?zUKk&4_c7)vr(vTKhF!_RHSkYrm@du|DU@QH6q0FVFRQIABvqhV>D2GhRz2S*d2ljMY_RI zOBhv|hmes=Eq?&QCndH;SVAVn#UzO{K0|63UR?Tw+$EkWDVooHdndd>vav+i| z*L?g3W6+Eh5L@qg?*iQjSlMv{mmpMrPeP}Xj%4IIak4s9^y}IK>hn>jqcI>Kqa`>j z$@F15joJE;X(nrkou2_&mwba3hxAwAQH;Q-A;>3Ih8^ zm1QolAw(M#+xPMF%sl* z{gNGbtCH)eRYzrH6A(+9nnqGPEu59d7Hu3^G@s9eHr#e}*On0^%dPQA0nvw@l2c%s zE~0**7g6Mr^K3;j;zw0^mhx%uj^#dnj>wz1xrN02S_n^}wVc(|PccU>?k7>1?kG%!_E05e^@i zQFnyCXX@+-C}pQBJ+p~fkg}N?<1G0E=SAQHHQ$U(y5V$|{xb@~!&9N(J}?zr5rD;Q z1JPDJZwq6Z#OB~)G5H_{udUg#K(7LWW5iq5$Gjw_&ii0#rB|-Mp5FVQLh*H>fqKIz zVHJf|`4KjQo_lr5hB?0|IM5mK+_N8ievr2xWwXH}%$xDZPM(3rxV?bz>n11(@%g)n z|Dd+8hgo*pWNt&VGkt7fH0EiVjs!B-*Vp$A0G#22T@*mU!moii;ag-`r-Rwml(fDJ z_}G!yAVw{%J6tIZpY9oy2_-s@&^?1*_a*T#2;|q2ljEji&R~1d9O)SkkwuGn^B*`H zO0a`}gmSG-`G?q3k&Tyi@6ec(LRMQqqD@A;`r&CL!hbam{{W2@qHT0Vqw{nI2pxCS zoCU+-2G#`ms}7Dmy;Oq&%SX0_sdPnyb2$CcXx?nKI>VTMlk+ZEqa=pQAv&^L6cK~e zc#HN zjt#Gs+mw+#!Q#bo;~b-3r;lZU!a+qk;SW_n>kNfGKV(hNW!PaXgE$LjT7UD z)}L>l0$5;yZ8NbIjYjFEXQy(rs-$MTgh$yUoV!8BYN>S9Ybj2BLG=12lgM(6t@^u-+m3T9+$vBkXfL>{HmYtDDo$&^y!zEjLr zEl0_WI9kU+fY_Kkw|%hYq=5G@WSntF-#o@;rI(1rji*B_5C1G%nj&V!gR*HnKz2-B z?N!xg$kT-AH-tZ28a@Nk`ykQq0zcrP`*{(gDua3xELt?>lRyiW9gs+IwLp|80b}>3 zv~dy|{vLkm%u;+=TUYaV^gAH0J;7przurbql#3MIJ=VWJc)xzqvRRVNjPwclKAtn0 zPOpNO&mHa~_ux>L`YSvY896$+hk)AmX@MQesfEU2hdm4Y3oOx(op~A#K^cTAy?FVQ zdd*t)ySGPr-Sl&_<*)+!7GE2uDWE~zYLfWqY%9{2Yd6n{zD0oxK++lP4KuAdx( z`jQkntU;R$0e++a1w_lb{BpfzvjnS={Bm6#kfcoRH4c<w669+P{~d*GYMSR_iY(TsGJ^`Rmsp!)*48%nEf|`B=xNg#K#KYis`SiP9ymFT&9S*;ByEtZS{4i-^e6{)HV2cb+jB%f z7?kpYTx_?u)u7jI_a4blH<_$B;bgM%ltovZ@#QR?obz0jS-_gCb!X~I+4(uDlO!4< z4;wB9c%CN-oK4nuoH^om-JCL|@YrbCO?&m4ij_0)zv+vvq&}myPQ0eFNzQjNuJekw zht&k7JUt; zlYsyR?CjBc3$ueiC`|GKoum)UgQ7Gq+VRBSM3`>sM0l&XZXN1PFF37Y$nZY}L91T2 zrW{wq*g4Gd*ln_)cJLp%27MqrUt1BZE~#9hn}xI$Jt%%=H4j=iKw(PzS+YsXaFlLK zfO}+nB9RO&xyPh;?oX~x90QsZ&K!^;`h`M}*lLY}n=E+h*w2bGI~9mfcMyZG0-? znoQDU!wrR@oD8I^k+Mj}#jh}`kQiqhC$KU5SUM(P*dpB80xS4lZDfgzZ?JN@AAJZS z0Sv_^6)HE3ry&2vlL0=P;55Xm1UfSpVK@^g!!oa%Wfziu)3B6A&npGN)jH5K{@CU} zo~4ts`o(fq%fIyRh5Bf)KEL=# ztD@K~T5o}jsGtBoUoqH|+jN@mD^Wy{fr;oClWF7vmoB`#KdSD@Zki__a*_`0y~9u^J( z(A0-Yd*HXN{57HWnV*FZ=s&`G1mE4sy$3gU6JWP)cD0ezFk4Kd(+(Q(j8jRngZ^j< z5ytmq0ps@={Uw5OzZ)eWzLTL2i?+#GRf;2ri-~;nM&*o6omH|tLq8uu>U1_##YH>pBoYBeWH;d^PDbUc^+DB+3CH6x*Q#4EV zZdqmV_9SDNxO*a_1m^c6)(#|Rj^anr-`*;sasUj;>U(taJ zN-ahWetiW%=kc6^4Q7<>h^{2@Tbm39tpX_51+pRr=2L+}#@+GW&I!eLq|;zWewpuj za?>k*S^B*_@!+Q9TSvTDnr78$D3n92s!rL5WSX2cODqZa2pMJRq+d(>Z#mX=Qj~ID z$`lADxRb2Kn{%9Vedbd9eXgzm*9(>(Ib17>EuLr(x7=va@c3bvpIY0&`VW&2lk{@3-tzDC)}9RIGIu(-jMU1@c3t&0ww(buJAOvB z@$TSk7=Twe8CLHa=VE&=K()%@SXmj0xXXmhKiUP^1mU1c)kRClKIVumjVR}nYCNkp z+FWu%uy?w!iu-d!evr3;h%9B^{<+6f_PaCyfsBuZEH^+49J?SatC?|)7^N%?I~+

Uo8#+H}3S#uE$=JF$&1;Jcaj-Zw^=*w!pS%%mjp5 z?v`5%K*vb^b=<^)71^)=p(yqtl`J?liZUkwGs))^vE8}>K~8s&l7(<1HW~q^GHhAapsZxU5y4L@>O891&B|`(c#Yn3N{o!<>jx&Mg!FM-JO5?X8YBbkJ-LRh_{<>JZIEzAcJbFnAq3IQb6{lO z&jy0_y4_CqtQ?|T|K4}lbetKJvxptwcek#q9vmMA`{Xo-F$pyh^sa+`Gq z;JL(X$a}uW(0$B|6lL=OFz8HHtWy}?jL}OJNW(!g%&jY$Ts3DbZCE_B9c;D`(R$T( zSHGYrlCXr8i?sOOzy2@({{QmR^otavqfxK!E|u)C?0cx>vYm-v~n)6e)}9IYFf@jcT{YdOZDmv z15XnTW0SbhLVX-t31J7SYtd*o% zcoU7|p~|;_Hu`3^ID;|!4K5ZnjPa4GPmhM=M|oDBW#OzNf~3rgr(RA4Up|FWSNxp& zzA4>`uR44UHCuZ*)*HW?PfLjwn6=XQMctHa9z)JD@i<<-ETQx~-P;cAUj~9s%|#5Yk3`c7{&HvIKYY zC-S4rf}=maN`PiYDaNQ{;$K(~mgNF9Gd>$#wK^-~gjP2>57Q5vsiO!HO$QV0;O!Cp z(ee+2*S0J3$s9nTI39xbg`_9XjB{MX*~F|r7tzNg6_LJW$EUVOm?g}KfspOnz0J!+X23hjTC7rwwPSX`V1KQH47wY)o2t3rH3vCZ@u(!yHv z?LW4B5?Q_~j_4r@;S}~ThoSq`@nNau>Ec_!!QtWC!(b!$49&e3eJf)qJKu3Kptko8 z$7Og%6<2GNj7X``50dLVk#z<=6e|&_9_*vLrziOp98nB@$4=p)x;RPR^GOH9Sakj@ zt33(g+Cf|dL6d@QvcF=kCPPYGewIJ7ONXCGBKg<<^}km=jN+2Rph*b?a6_brJbgjw z0^gI?$==RudjJM;j^J^Tpw zcE;EkgI#@W2WO)c>&22oFkXoov^jn~j-;{lKWjpZ#Y3QvBN3NiHJ~^LuS|o41`JjK zIG>0J)UU|Ui~nMYjR(J#6sh<5fA91E%KX2(-N!rK$M^Yv@ALoW^8cy~maC)yuFU`2 z<@~=p-R;L)k17A}&i3Yg{@<^X|CjGXyuhC!X}pRlgAmE&b}AXl`b&)m=|{?Fwh_E| zf3y>jXeGMf8%#0)G3sI#q1uX>u;OUahD&!e97L2vYa@7efzOgM1?^y#W|_$KpQ33W zn{W{x4fe_^)B;yD#&;1ga&0x^kc3yxMsP4ZkAnf1_(b^{MTy`7{v1){rJYMSmoC^! z@xbR4 z?sX_Wm-oWugG7Z76ebE@(>DcKjb+28Ov4C!ZW?Taj?G_j2EEMs?g*df+qvHC*df0E zd6CdV8vX!ZwJOswzvLVUNHlFhWNjx$Gv`Z273&3Cf9aKv(tf#CVO9voQ>;Ow63u8! za6~_naaYmzl~(DNswm?cUCpezJZJCh%!D}R42{&doDaeBQY*FrGHzwytn`G(|eI9j$%Ajg0sA(V;Wt7W=eVLC;_hp zH#beuTBN~bW%WHaJ@_<*!#K<@OwOF2pNy^6bLKBHe`-#hj}POx$$AuqIja#pGKxfa z(x+5+<9vc_Vkg{s(&^<$ro%MDX;1Ae%()SdsZ}yWk5K}cLxcDhDAc6k<#>d$^xqwy z5E)6TYXMfk;UK(XD7(ODYJia$ncsnMRKyy(yO@cvHfxYMS1wY_?};~ETbW9Yb99iQ zcSw?5=qC`JaxIrb_nFSd=7lSdE}^BV0ktuSOPk~Hvpz+C8dKn;Ehys%#}1vlQku;5 zOss@2Vw(J&e4iA#TVyU6W8?rj9-r78$D8Se53#u`MiycP}@kMopM=9(cUeCRNRP@lS)huq1E=W+;2S|*_SvsI{{;h$+U&-*0hv~GBe7xM&e zmAgB`oY@th7Avm$t?B~^^pV9O<^(MPwNp=Ouo5g*hz;HEY9DB0xlQ~Bbe{3Nzy7!X zwDGs&jlUhPOCk`vh)qCRpPf;(GBj0(Q?Ip2rRgs+x$Qq>M#oEd{I&&`h>f1|m}4vJIr=f+2{po8&u2#Y-W;6HFAb=p|UyYJ>r$9|pc zBI@ltZmJ0d-%|IM%HQAWT*RM-$vKtnwvNL|iG&TuYB8b`zof%8r9RNMWN)8xpn6I# z09FFMlDKyCknr778m6;{p+GvW9>rxdj0|x!<-)w0d#+=MDrf8Mt_EPe#mI=3b1SLD zFc%G670Z@}d>d#k@~Qa+Ayg=`Xr~b`t!%>)8m)Gp!X{7Ac*D&!BHt?m5!rTfHXqY% zN;?75lURu1mqI(2dEo8S;(H@CeDYxX7*c?c?8i{;ci_Mj_J{BcYfD-xVm|4u^_2d5 zE6PQ_c}=buN`qXL$P@xh?-E@#b7?B_-q2`%qN?DNEWw=lOJjp8*9wbDM&_)*fak?Y zNb{?}jFe&;t$crSMXl)neEBY;<#E0V@d`izm#^o$P1-@TRf+kV<)~PBY>d$k zY3YM)M>TDmdTcuY5xJAY%^0Jqr{ZL1H^PntMMpaeniyG-43atKuWhY@9ownA7M7gV zO;Q=8vFgg`w8}*MIuc`*t72y$e?SIhNg)@irB?;;(SbN) z4=kNvZIxxgw6qU&#d3b<+av)@AUPDtidI>ZaHSuN7Q;9YaI!CZ$vm`d&yw#^3P+iR zi7JshvBbueR7b3N7Q_%by>A zmOerpUc#L%PVw_1oyV(e%(`vUU0;PKh!?C7GiENiptpze{mT-SR;Mk5=)1NRlSd=2 z$Z9Ks|HwTIkQmR1&sHL!SBQW~I$s(CZ{EHyML{$gh4tBnv0|J|?T$2!qBHuCezgUT zvlzvvGI7HgpV$6_|e6a;lIAn9-vizFfZDUb434|&QGdPDoV$2P8W#h!-tqOEV&%nzfP8#=Y z%6T1Kq48l1|KV6qVB5-v7eh3etS0O8U;te0B8Q;GB%#*{i!7w(StUS;&QdM3*Y2|f z_*L497!WhNm{vvsa;0?;SNKRw@+O8s%iQ*#{=@(DpZ~}Iw&Jm`)w0ZLHPvfk6%4=G z+LOFs{XhN3fB*md@Bfj%U$>PNGeF;^7%|amfvua1*0D8s`@Vh`ohK8s43KqKn^*Ky zwe()n>DgHpqp~AgElGXcpGI{@x8@UssmkhwrrH>h$7qiz7z>7GCmV@j6FUl z*B&~qL(3LDpOmLiEv3;jkzA7^f2BETvKXJjeFf9A$rJV^?s|z+15vX0R$FXo-wI3w z5WVF=Y&jsdmIv|J+4qBpX*#xqWL+pccU$B3Ww`v$Y_!pmGvVdhY1u1k*>hWN6}8;D z_C!bWG&)H(OtOE*5^qf&uH|pp%eCBUl`;t5p+1eT(#cQ?BlWLZai+Pu4u80b7VEfZ zs1P2x&6q~Drcx`7nZh-!+?>gcinW@|@-MSJ-Mj`(UeoCnd6_REy;$Q2H?}1aDL98P zL*CqonUNEKWyH)%jhK4zc?Kg2o%#^%@So{(Kblk0DH`j zdewHQUe%(cJ&Hw-)z@Vir7)Wb3o|!i8LOG51edIBYJ#@yV4Wtr-YSeef>s@BLwvpG zZKv8CZ=N9T_2=%1c@eN1arr%vb36522DwJHQURdDZz%m~OY#9k)H0AQ-JKNvxP9$3 z+(hE;Tb{YCe2)%!>`h?SNR2E?GZBUq6b2COq*KbrhWpoBA3Ob4ua2Z4ZtO0QMs*ga zy@w0A#U$lLg=gtvG88Au!g<@AVwt`oQb-7QY20&wXiw9C)ZwI-;apB&c#lrfS>EE2I?BKc>2{n-+Dy7j>zUmuh>LdkO?kK6+pSO2r1g)Xv!3-{F z5}km0tIB2#7BkWv0fBk5PT`r;t|ww3M^pU?%_Cwg6&G*Drx?SsqyI98tn5vUlepth zw{pS^ch z-O2bi`)fNn@~W7p0zJRE`=Ur(l2vNU3EL8NL^f(9@Xcj}B5(U#VkOa4W-Jm{kz+SD zB}SjEz-kb1qO8TN_?->@i12&~S+OuPkZ32*kZ5inmm_0s8Jh^4x%>vy|= z12e9~IyYMh=}sa>lJ~cqvu&WZzwMrT6Px`V=T`%N{T(;snS-&urzE1rk^5^6V%e%o zE8F#HWv4Ew>?}noc8SnZLgA*`d`Rg5o~iTzl=Cq7gGdkHc~N4<2%V^p<_#*%fRRg~ z_PY}2!&}WI#z>b)09@P0f&PhnRBRWRfQpjjGp^RFb6WQ1*JxGA@kNR5+(c1(ZpyEt zbR0W5$TMDBjFD&tHj+4Vy;8C)hyKfp1pYgX;L^Oo9%v|>hr+uXKdDAvVM}o0UvF0~ z?WNb-nvZ~^aFkBYZ?ZE8BtRg(J<-4e<3%uNaCJ{g8WvCEXwK2*yFozP?6-FEzF zEgF+w;9i`@VmsVw2wv<<*S+j#(|vDcuf3x`zS6fw#*lLZp=O_j3ZaKj~4GIKGQ z9)9!0ULbzgJ*lG=;n~H-(bewOC#PP$dK2#z%-r$)>)ld7fB;tH-~;-GupX<5IQ7q@ za+}joR1F841Y}O6i&-2EDOr+l;pXcQA3gDe0#O>3RfI24-Yk_ zC#x0`s#~oZ8FD>S;6+JLc3ySHji)-MLR#lN{neaDv-5b~H_*@y;H#|fdPn7g!bSsFY|GW?bE%^$JOD%} z4&u}B&2i45GMb5Z#6kr#5BLIz4fBi7;-43oT!sUDye>etO@{k7EzHp==!?tEcSoVS z=~{ssYQCUb-^jexZ0QEA0$Yp_p86thcb!+~!Zv))>$J5VsF!@7_L_T+jB;%rV-NjY zK@if+qs^!_j65ETsH_lYZA9MITlo6!ZOD2sMQ@Zhl=iohCX&&coL-A8*o6JNxLqM8 zObiG`mYHoOHn?5?m@7{QU6kYa&tBuif7hBbmwrcorTHK4^Z(xG|F!b}?(W{_|Gm%u zYk862V;ALkrPQCdod37m>uzuE>ioaEUCRI4-M-KN`-SrVzJUD=&f|EF1@dKT}&o(%qlsefbqA zaB^GGopnQW?&lPJ>Fhph@qO0fuamXdcCGhKvB0H%Id}S%T-(wH6}n}&3R z!8U&PG38RScb^pbJ}L4qKPj?HzQS+?4ol61)0}0uAS1HY z0DQ`a$g5kB9r?C1B3GxLrmBmijp*#idIfvSc=0Px$IM7h>U#6%Iu?2L8x19!IVp1M zH`*$$by5%)W3-j0!U=e}VJdCY(k5aA^c(h)6h!VM8>9GRJkoXoH^7#4OVc1IRZF>+ zBiU38A&Uy=Wh-I}nzJ-DUE#3sfQ?Dj)^WZ9ne|cr7#Fl2p4My2w)4^nsB@$-VyY|; zgF{%68wrZ5WWQ_(mO2eRXs-=0EG1;le)={NtG6#!apLysGj@uTNBir6mL zdFKMRkDobB+1NaGDv$=#?O}!437U7Nm>=oS6Ed3t5y;yNw;81C(?3th{(=QvJ&|f< z5jO=9Y5=K(v2W3hBTs6}+3@2AuCaj%N5ZH4hCUd?f=hL1 z1-=rbVqy}$ZCJnCR6<`Ok;vXYmJUDpNrZd1oMs*Z(Z3bahZOlsX~dkR(Y}C8jwlEU zc75;xHdzo!2T8*u6RBOx--pW1uEg+aipg&^BHr3*F?YGqj5}kW6^`s}rqXHdUgLLS zvkjC`UMIW3|IgmLEjMx`38M2%GyOv@YQ_qXKokHLtJG{kGb|P*aht5FCRyt4p=dct zAW5bOAfZfvMK+D+{g|ztx82vXFZ&PsGJi8``?{a7;qGzI$O|s4DoK=e7s8{n^KR!b_L5$+qZ+hw8axq3D)6}u?`rF*SM_+Jp*@VS#~m5^@6b#1?AEr)`vmfBVt+ z%1dWwMy*{-lB0XAF!8A!UlJT^O_J^T7V0uUX?I$MF@S}4N>p%5-e1@#@rRVI&xUdT3f@L(I&%P1DrDvq zjmO%FX*`!q)_AH@pcNz_ltP)>C?``IU1WQE+#{oM<}}eEt&~DddVF}Jkde*6?Kayd z_YI%BZ}=f}^1~aM8gRaZUwz9PglSCExE~KquTs4Bn=9BI0|fN+ah8**aB@j8V#!Rq z=8pZL9?k4Dj0EExlsp>hEc9^82Rx1kz?6k3v_Kc!csBJz?#i(kGzq^bf0`J{`@mo^ zW=em;FEXv@p=GlP@FW7_+m0me1xv2J-r__S;_H;T0x~yU&fw&ZKPW)tWEw*r^O#ob zH<(uJiFcet7Z*r)oEDG*qIjG^-<*_TR)`uMI-I&8LNAXDGG6WcwZwN|J>vS^pbfbE z5{>*}@b_C^Vve5meLlvZf0d0HL43W$#})H~iVG$u1^!N^A~BZl0;4usmU9<;I@Da` z^jbdP3F!4HyoVB|rI3q9u>IVz&3Wd1!A6Rrz2*rpc(R7&wwe57C{+dx$mo4$mz-E= zr<-PujK;(5W)^t5w1X@c9ISTY;gD{MU1AOsSn##4M9Gv3I63kSHOJH%{@=qSu5dCH z8R^|pkC-y;6wf_q=Iq0mJUX517#n4nF8Z=|?JaH085<`l$&0gXKJ){ovvciWU7BA- zipZQuL~JdpcDT~_L&oBsTn0qG#lvfZftS#*i7;`cEOx8GmyX02n7#KE)2c_|AOe|& z4Luwr;rJ18N)V@G9q?Rq73sX8brbZ!CN~8^9Vxn-Vo>?Opm*s744#yTCnqbp!oiIN zv1&HPWIN&Dqv6)NLM}KPsisN#h)yNwv|fTw0L#s-3|ckiJy;0>ZO)BAXaY%n5rh$@ z4~s%7+yl}GX0y&dFwT4qTMp%LX&hw*n2n=f^t4wP=m3>l*s8*59W=N}!^*9i&rG1F ztc`28t%w&q&x=2bQgAHcwl`YnvbwlJ2a!#Ne?>QC-U_BtPz{c2=y<##b4o%i9@G!P zx;Ku;>mrFdAK8UAi>dMEt@>xH#xJO|Q&p~fXj(RrB0DYLTH98v)*%{{6cBS55I!RY zPATa$slcn7I1MUHdKIx53>wuX(P9;cB83jZjh5{D3RI8kXhyNHXXxts}C1LsaJixuN~uU)N*sX;GPaNEH}|X&sjy4yz*2f8py8l zA}Qc13@7Z!zPQ8~QkW_Eg^Qi{$a<%zv-D+BlyP`7ttYDZ8lQA@?(wp0kded$Zq}1~nCovwFOzK_COSg@s~$c|AIr9()~n9lDtgd?)5aUQt8E zD95z`##Ss5|4~uYRl1X-#rZG}ruC>{poS$$*|kLVGKI^a&SnLUx#{4TOzvmAoyk@c zZ&NaZ0u+=y?GJyLo2OA$TC;xSB3K~=b|ahVYVQZipnq~|(5w?&cBvzm97PxDKj@hX zs?;CTJ7_C(mZZctnCYOO#Hr>eq;%UbEvillUNJt1lMkSktAtGnxNm#tfrFwe1j(ja z={oYFFuY7Yg4S*{znl>&Cb@M*%GAeSnP57d3E3bN?6 ziYtj0r&rq)7K@CBQ3ckcBQe(rKN1@2c7$b7Rk>DMPQf;!Lc=Vc@}|q8Db3;&V5wMD z0g`g%7oFJ|sos8{;P4d*D{)>(pY z=GAL^jXTrX%D?yG=!wR4CZ9R#Bdi!dgNq9WC$y+k6~Y$AQ1vZ&h9gsy*@7R>@v8no zTolA?w_!43@_I1!pjnWhU?f3`UgzbQ%+R9Z!SCKV>odh?{YDk}m#+{K5v5c^F|YnQ zHpF(&EMMN$iwqXn8bx_!NR{r#=RO5jrw>-=5WeHZ=aim@f9$d1;bX6SN}Y~TNTE8#+b@+NfOa$Lu5!xy#Tlf`wsX;pNpT6R*;%}3PQ1Oqw<)b zW!LGp?QHagl%`_LYr0t}+bBQAT<_>gEg)meUF0{Ojn2ZvW8LWna|8fGm-i{JZ|#~) zSy{%M!n96GS!Lv`IedSQNCZYk}Ho?_3Adq^G3lHdM1pxTXLgV ze(Cip3g^VzMWO9NGb3-bd&97JATtv~X7%dmNK&2^J|7` zRiy3CSgpBHUIAHrORH;aeM*$1s$MNz^l*N~U1pm}45G$1(-B8!Y%>SBfC7b1zyZ9~ zr}0II?urZ7+ukgwx7~SBSa7@BE~vQOZLhTCwlRRd(est%-mF@dyYr%YUG8q>8t&cp z!&c^A5alH<3OqzL2SP20r{>on6PSkL9dz)h!{YeZF2ct`bnFyhV`n}zX;(&*>R8gL ziXXuuZ;YMK6^Ba>{i$jxfeRhz?E`uJ@8xN9Ijxk$Pc#RNM#|MI6DX zZz~Q*tl?zK$O8d}^1!fM5CHW+SEhTw2(8pY(Ld}-#VEWV3)jMk?(CkPLkOKvU0B?=#hV3_)p8h`LR+O z<7juF8f5d66mQx3kU+UFxM-}`aEGYaj7N3AWI2c0=XI~2hx=Jin{1vQljm|}m?ytC z+}WFmvi&4TFP((qDuS$E9FCbe%t;q492sG7mr0sI!oc8q=*-N z^YKN$#RC`f-OtGVe}#A)3z!i$3X!THEZ|dc{m;=gCD+Mu%uWLelr3^cDMmh*^I}o0 z=xWZ3aHum7`{&ctXJQd!Rnss@SIZE;J3iX<_=i08kw?klx8Rm#i1C-9N|EMZv;x_n z=1mkj;=&L_UuKGqAmH8}dKfQjzkOOmgQqvSOyE#^YIlC~_Ly-pJOOKY0fi*FgbBI2 zo3pHTC}Hop6^fQEja4zW?#Qa3W!@07VSzJ*8ceYNNl0Z^-3C|Ts^zPm!hL#KwjsU1 zGLs4+o#XiFl=75sw7Iol*5g4P-Mgh{x7FTO$uP04V;;0JF(e>M*f7;ftQPFx zx3Z#fiwX+Yr$KTNPJ0?anjU+U_Ubw=)tZgs0Nl++0iy*f7E`j}+(Lgfo7f6_{2W8! zkeo1G0!_nhX;r@U^JZ8QP7*V4HK&xeOY%NZ3Y+rxN&yPCl<_|N4kfyMl$w5PQqvK7 zjt0`g3J4&>nufQu?d3{FzE4EXk!_<#be-ft*p|8W$C^qP~8l_l&K7q-BElYgjkwAY-(I(ZY ztyoh>L>#5y5juyCjyP;YDEdL4iN;U`WA*-^W+)_jkz@P_|Kg)VXbB^y*wnU-JndZU z(L6+5@w4qlUTk$cFL<@%jFz=$)LC>!o!l99a>Hq19z0kVg2=xjPHxWgy-0Fu&lG%C z7M<*R{%m2`+_VUDB=IH!HLP&+5NLTQmoV*>5aXwb+QKRF%(2qZj2{HKXOgJV$JW)M zEI;{2k80)8a_w^nT&UT(d==jE(yp1qr%`2A-Ac3Se!N+YWBg*-w6tbb=7qL2b!=Cd z*01S!CaBMIKxa(pl^h>mF}fTC$t@V=1`#UhIa01@DH}Q~^PGwC`m$NkJtkh~YmPz{ zbC6dMamkCG$_~@upOIW^NwNzFujO*=uZbYcx(uE9#aH_PHHRw+ozBq&7fA9#o@}ed zVeXapYqA$RtiwwfgR6OZ{pL;pr=ORN=Z52EBhQFv=im6IB#*BPzpumO)*r^>3%pjk zT_J+Q$R3PLH&~Bs`qyIj`_bGut%#*W6ZV0cS0fYmNeuRm*bd>1fP-ypLnm)b%0T!7mRQ1 z6f=+BWD9ze2^DycS4KGE(L^L$<*C8EHE;ZOl2^OUj!}}%DY;=Jx*}oqUJZK7B7uBL zcvI^NHT&J&0Kp!NZhaBuv)b@+ZQznnWL28`S9SmU7k=exp;E>aK92pI0O7h0kUU+- z##D?E&9{)NueIb?ZQmK1d+EfHBv(0g47)XxIr7ml6W}DC%!Uvxo;lvBqK0vtpg^EvP-T;UOF{oJGTEdYc=C znH~Ea3Ylke5d~8fgNZl$b>p6;x7jo=IC`CS)8lufSWsfK;q>RqD{I{8tFvC3SxnkM#^a)jw zGvRu%pq-vHppz?%AYJ-~P%>xM@*MP~isC5Nsv>0R9Egkps}baYZXJTq4FsQe>zN!( zh@R$c{mE(GVR-7?aGkUo^KQI0ohnk}DmGI;Lgw4>_S!%EUjJ)7Yd!mycfrjMVK4}j zhxtaE{>%H>Zg*d(-{EHc@S-UtrGUgzbD%@<#HHlDMy z?e|L#zq|jya&_agGd5jsx7gOBsFOa^)m=#V87Cu!S)rA*ZquIriDqA%9CDxyWc@?X>2RQ?VcA%#PW2aFwlv4)xR zLHr*^XCij?9jfN|W_mk;kMbA$OfzeG$Fs>$^pyHgn2JX75pROFjhm@9K7TS2c|YZp zC^Q}(@Sbd2fM#1@c3^E=6nE{Dd53p8`tv?}h8CXOezGX9=wuj%a=Xbi6TFv*)-Udb z#f8Z}p+oclpJaP+i!U|I7(XlpKGPktsZJvWeOy&IfC>anuFI3c{4d(RkY4;s5CO^C zN)whZgmexY#^$Aau7IYu1sE3L1>!zbQf@Fo3aWd)~x z`qslFl;QG;$Jsi^R|W$;*3BACeO@2YOe`>84s$*kY3CXSlN5{k)eBOPH|W}0yHUV- zi=Z_=h+HEg9XNl_MmVa-W6O!aVgedXknaqeV$1j8ci)H1JC{KkOs5Io&8Onk(*~P; z8hTIV&gs#^+NP^twf6>L^zbk3twsX$&U@xLhtV*<$A0I_)f*^8@3G&r4K$nv+gY1^ zsh<600{}sfzxCR*aK0TJ^Z&&@;?TyzzD8*^k#TA$zeBA(-kc)!q7g=_!IkUTFz#Pz zNMDLww_r-7Ul&2D36nIYb)fk>;)dQGABOV>GZhV*fxe4wAhbZ*UjK#XxjCoj^|a4K zmR@ZX4`xG5m_lYJGE>)eU{ETT0AI2b72FWyT_`{X9NAHPR{)ugCYZ5JN;$1#(fPH! znT;;SP^p+F5b-IXQBORHP{XsjP6Ogu&=9<0_Xe^pM^l-CgzQ{KsvMjk&r5=KS;&8O z#a{4v8qu%(BYZfqcRW_L&g4}rjvkLOYfsaAdxXE}%m?mD1EXoLqhSl`DX9pYXO-DAAy9+IK_J(LuPYL%h8ve_E#Nt$iUA(H1Zt>agpA$XPzvM zD(%tyD$b`S=3!uupl=0Z36b|tY(TWMZ8`7vXi&ka&nfS4c8SlPb=voJ%nJq!ZlXvH zzcsdM#&MB`)xJacyAStkY?Lj?nAQ)|6syj&p$U=2_t+vf@9na{1p{-dF$BieBGZlEp+JV>Z#B8TCF(lm0W}90E*O-dOzMxPz^FLz=w`(9eS!6NJcT z%m6^cWcF1!9oF?A#iefFkTZ%n!m=Nv*Whk2JffhG`v1d-kf_oNgZ^bh5s>>noW5jT zii-D{#awg>O$v(W+w`5b>lJ0z4LZ5oL9obxdN4gKY zBP6?_DN8qvCvvN*CkR_%R8;((wtu;Q?IB&!$w8MM-Jeu5>h`vq9kf(!;^FbRf&JU(>Yyi>Mjj zZM*!(H8pOtotw`_O$x-@kuqSr>%9@zeN@dfq#-uj(AzL>c`}Rh${= z{Jk!D`|?UkOw2yND>8P;aK`b=m(n+weR7(azulCAeq!KwXzWhhYzg$mbzC?h;}Q zSBn=k=pFvJUj!*Awa*R3-01hk6){5>kK%KlDd>h+45zoHkz(iV!Mpw4zj}Xuzk9g1 zdo(WtF`-hg@wjCu>kV8(Zjrq+^%@iogJ!ATj`HJW8}kg4P4B5;1F1dDHhp_2JpHs` z*+b6CvK>A|!$Cj51OeFLOGg{XWSJcmo@OSTAqlFo_IaMJb5R8Rs73`wjw!7ss8b<0 zGK$pAqY@Zp^Gv3z39Kfa6`kUantA$k0=DZSzX?=_)(?{i88vdVZySU2KY7nn6G0A5&*YbHUv{lvW2?rQ(^NH5y1i# zMoZ3v;;NyBt$bm0{L|i>*WPz~`^US7l~7UzQP3=0u@m&BU}tGp-3)D>jm_EAM*gJ4 zw$>FD0DiT6jx}$0!E^iwJ`~vkV#xVisWp0)h8S+ z%|vXL4U*q()XYY_+-7(aL2<7uaY3o?F8506nGQ)tq?u?0#?3~yZ%)h`_gd;{4ok+U zyfUm!EPCxHs7In7te8Q}ED-BB@nGT2>?6F-Ncy#cSMU>pRm$4TrqM9XYlPRyg*nY5 zEEY+YWQHfQlvyEj&pntLpk{@u*2|YVKU!rI}%0X zO`%GGC}Am6zW8@PQ_h!c-_rc!Rt|D8R8IB{6&WfgmD5s0gz}#`t;%jW?b}Wrx-EAU zltW_54f-VNm*r-2ww1Fv$QCKMkE}Jz-_?=+bMc>HG&E>rBsqIH#C;mF6%7>0>IH0@ zK+D~Bqhj!>7I&HrIi-7+JF89tjaJ*@!`fvO#E7*mxq?g+NYA-tbH#htDUh3=fQ~^~ zgj^LW%bt%e;nX+}9UOfD*>vZJ?Y%cjG=07M?)V4q$L+(t?c=?-Zz_qWa-*_~I1z9^ z%^bR}#*_io%wG@dg0l%MVhl%`-g@U0Fnk@-e9ly6P4j4=GH#Hv0SB7_Wu6v{@&5SCr4>o8oB!+W)oU76#F_O;fM;U3{FP3x$t?t zVOdvAaH)&8y}2vbGVY%@oZ52FC14KvDU zzo=L-C1%)O^iTl1r_OsVK#mZ|M{2-zwExG8a{jfjDO zufYt8J+L`}iZDyrO5h(dQ-YH$Bz$9v$aR=R=MYc@{5}Z>&?U&Kg*Ysc-fLdsaVH*+ zDH48*yJ^enk07h_cl?@f630VKE94k9NN>k|q_OjG%1*Rk68SJveU=OxF!ojG{cwEz z4vY9=`S`Ic!1$k!n3&`mR9;~ufV|00%(j2b76-!cB*h$M@SKV2P~YqQ9^O4+3+He! zJMJS&bBo54*^~`4WlCKNJOyWkmB2{xXRn>LkRZq2W*Zei_Csv2qvDOy^$hJ=-~NV! zmT^R{XCiui8I@`jP6Jp;?~be;Oblm}s|ihu{V8;<`%=Y8&afpJA!Kdg#6}N&x?-y< z7cP4Qov2V1#UbnXZ<8HH7{x05VTB@V4&&5QNLS&L0d0l<>cR+p4z(>~N>=mxEP?NU zgS*Gh2?ya+=k^D1)W(QU zaW7?@leu0bYWvkV6gj$xiJu>GO61%HBz3+G5>>kdcPN^fnT*-+-3@cC3 z5#0#O6**FQlv%{x1b|#8yg^zw5@2!W_q<+o#h|BD|du`%(_q7(vVN>w!j2aGy>eHp6SjR~$dQ~6+bY`v*Zy;&pilCCDBWPI` zN1c~u1hZf&!!Ssq>8%lQy%M_lad+qV?IHhrl?0O`57}J|u6k;_WZe4Yr$!A$8BO{D z#xjhVGGt@#YpL%}|XV7Q1i%;*I=}_b!)Xvnn>?z zwZ4Qj=pFu8$P$6pTWt@?MU2Ar=fiCnDWfsf5dcu!1^;fW1jWC5zrXM8Zy)cz+4n=K5a&w6_#G`=s#@7ww@VP64MX8HQ0Hzx zE~&Ph?tgI>#KhyKh|n@5)@f&CMF(|j({7q)N(sr$>w~EZ^NzsM2SwmigtD_PuWYB9 z>2bC2IwTLf>(nbSkW{-_#_^&QgutqHk+Ya7gg61rfhK1_u0K>91i;1RfBR~g{Etn= zA1aKyLREAf9jlPj@~tai**mu;QL)ZluK{gRn!p8gZ(Z%qIwcLx_`s(2{f;TrMS8c? zxU(ZN{hAM{G;iVtK4%Vf_Ol^_B0Y_5IEI^v(WJ`#YBrrRLG{jy$2-HQf92gPeftdG zY5c`8Alyr9(40r_&N?eHbsGe|%-wYz)I-`5p=efDwDnsC?u&pI@`N%GIVb%`HU@mqZHlB$UlU0YXtq#jq=`AsIC8)gwkC*8$_d zXG8t={~>IkmF%C(nJrglx?I+LS%7V>lddg7wy&GP+1}HLL3PNLRlE3HJ=W6DxSP%0-Ubkw7E^K-kq*Bah&C!V_ z+tgwH15;Pb4kq)I*UqUj)U=IW2Ciqr{WZwuz%a8I%vF6qhcRuz@m=ypo~Ht|xK^WB zDwQpCSyEP+jK>FCG_V3j6iU^)P^2)*V*7CKsG^xo7uf9OoM@1(s37SASFUDM%etMA zOk+PM871?W%OSZMj|3sbpCGV}^b`*jPNwk56(Z4DaR6eO2IF~mT9^AR$X3)7kFv?T zOzs$ERf^U{$e?NvRi?(7eVS!a%Holyxfg%gu5P=pvE%mbwQacAsyTcDOHjda@m!`Q z^QFwCBWrrH#d))ua)FMVgS^({i*@CBux&|y>rrq*r733nq(ZZ-9Do|y7ICZ^1B zSfDo8L{F@``}iW59Lg>1) zYBZ+hGTEOc3Cg9DFaYUQpDN>FKK?!6`&Dn`ttpDJFSpJV*xb`y+75@@pK% z+>8Vd>nA?yjK0rAruA!3_SZ2|a(#Vz)=MuL4%ANBu#cIVAM?LI=6`Pbc3=GA z<%`Gs?~nQ4xn#l3R|{o*ubTh8`}}2h6Y{@zH=cK1Y$E+{b~d|@`QJZM{&!UoPfz#G zWnRid214MPnT&jGZ5|t?NV&b1XCXz)nKH3rr=56w9$hr)k1$zVGeRe2gRN|7KkY3| z-H@2sF98H{_OSuaf}Qb<=dC*OrTXaw-F@wHAmcRTMO!n8Ka?96gt_M=HwVNKFH~iJ~ z%=uwDy~?oZt{gV(^I1F!lRO*(cUV!?%mymGuZT?ozbVARKrIuK@Gfy#ghi^Ep@a&eS1E|ZZ9$5-eEBP*qvah)+ z$I;mCigCvBYhVqa)+g=L7XPFZC9#3Hbvmi>lozo0+9~S01c5pF0;qUj39l;ML38H) z1VE~ElRn$jl>@NSe5Y`GcX7*m()8WrBdjk*h1??gQRW&|5AR{gXLC8~sgYZ24Cu2a zvW~A&b%!7^9}<%~xZQ!D+6#S+8bKMRsQGG0b{mames?ypg6_}p_P?MzUw|B<&;R26 zUpBq3_5L59$Ty5K4G7DAF0I0u&{uSysTzpDhW9vD<}JW7|E&O(YT-O&uA)iGlfH5Y zJk7uK^FiERyrRd3^m_V8U%-G=Z0p{MoC0W`NxcGun#XpT5 zvO&e&EE52_pNjzCKH-9dvf0xa*R9ei91pZE8%_)%-@23+kga+sY9(V+tpIgpox#}K zpJxy@C$`wRk)MFhvLgJ^Faqx!s@ma_H~49;nDe zDTvt_%TBWx|*Ms87fQ>5EyC?E&wp-@rh6`a(! z8;$JY4DRiYGnu}Qp73l+m+>Iiu7=nBm_)IP5JW@HppL53eCK_*498M}k$yRg2uQis zun%!E99Y({=iQHK3qv3K1?4Tq9P0g28;*@IjpDtFXVba2&*J5^MTDdXS zQkgTS#O~lC++k8#Dvt?x;`JdGb!r|MMHj9p6VApFxQCP)#tZ`p?*z&JlAu3b#BjQb ztJMGa_y7LC{>Q)D(&K;p&;Rzn|LcFT#K))q=l}oz{@N?^ddrPp9`C=j_@8Jd?_@=i{sUiQNP&_p%^nxid7a}q)qk1{ z&fN!DKR)-*;@NmmVP}*+Mr+4Vx^R%rZ=EVs%`EZi3BRc1-@J&K#&H(j#^Zr!-{mi{ zg>XNnZ=!@b1cmgIAiZpoUcvX^0(|-*OT_KU_kEH56vcXp355e+3#Z~CtVME$=qAK|38xdKO_oBLMFAPkX1_QYjzqoaiRvpXnNa%I6pE5x^j3n z^^~Se>1|p`p6uL(U#tcFBu-OV`w{RG4E;P4)HQ1y&r&Q|;!STSaI>T&JIJlWl)iDw zX2nBfc^tvdDdd*mf5fT$HNAxNLh?^Ybpk`H2N6_1!(ata?}yQuh)jS2K>Q7x2cE?A zVn0fmr0_cGvwye8x7s8qU;-X25;F0{+T7`Hn(EoB?VUgEzIhGh0K)#2B58S1Lb1z7 zSJ4El$9(&v^UUrWQO{qj(V8%4Ur)tzCRdnTHah3&3f!s3c^-l!4A}1U=1D~R#agE9 zqDXJ!O#F_bt7u`?IAqxK%ZyODN`^BLNzdhDGt&mSWT-SjT(~nsrx(vI#KmAek&hh% zPOj>b5_#~q7}{B5PZTh{nJNmllF`FEE|>R%VSffGL;Q3$ss~N)OyzM6ystbx;kjPN zF-Wku7bB+@{G-P^muKOD2^dlO!33`UjPbm6`ikrGnrb9JMqZ*{KW75`uAIPt4aL|W z5FuUnXs@ok2xY!mX>(OqLWP&>fC7ak7Q02nHh8tqX(fG3<=+ zS~hsm??VtV)o2)jSpgD+z}xIx;xQB%ro1hXIghQa4udgV_!e`;G1fJy-@sotkSH!Y zgiLCuQUh*JCJg)T;0#LRVb~hR7xkw{fQmp!t=7}VAHDmUm4mK>UZtBncQ6(ISe$Uu z4CU}_$1A28viHFwdw7M}uj~ZLcdGq8PnDdH==OMwo3A7L39dtsA5_n0YSH1CS$jJ-?mWyaoaRks_~?Y8Egs!F0PDaDxUF#ZrG`IFsH_p9$2 z`fVFIZi#hZ?qUmvMfsiusb%dYaw)gg0$Nt3x?+DGV7l4imuI#a4HK^yRa z7eGR>JY`Wbek}xJCXO1CF}}q&mMmJh7&)fB6wPO$U^%C55do@MxscIyX=0VSs+zPF z+qqM>7S{^WN(EWTpIP)s}{AO zjDN1lQI^J3Y&1#~0X{mq&cInqC+m5H1-S54_I$Re8a=D2EjR_hm2fhddJL4c z{MRiM8|x*)&7Ud#rAiUl$Yl9W%;dvjJzKe{E2FPE_bu-T>Sr?vCuybl8;CeecVd6@ z37%|iwomVwvgc4gz`q2$@mlf9EK{BTvoWI4Y~{T_ z9~F8{!T$lCHod>VBLm6tK2t92o|Y6O;{bf1yNywol+ve$04bEfhGD3$tLM(n|qqn;Pf z95X>Zbcv~aT|(c7oVQV^o4y7{ni_iokTjgyOYR6?TO+_y8pT?S{Xy^Bo|hTI)-TK& zrF#s?FX)~`X&c(Zf;$+jWEc$BhtXB2?no-v=op4KzUWm+DGB6-3<;LJug!qop^QU? z?A7zSp{peQTB9|(f=c9#f;R9|)X`|PJ}`AH^ugRyfBN((-tL&j0WCq50=e1bC`_&+ z{Z`-y^38EohyoEC9>q!YcbNf+Ix0x|K3D;LUzRpv(&N;>3fUo)P7OW|4O;3w)&UZK zSgoeTwep1;&Xa?T;x85M?Pt7gWpduEpRnYv)v4>MI zw-%fAXokKhuidARDU>vd$4>2==A4ArY*0R9{5LZND(b?UI46?b zpdYCajNA%+f2cC+Fo;HWC2j_Fzl=j%!fa~b$ll2VMG0`qmM;o-jn#)LCB5qTwLaNb=~<`&!+d}U4F$eILplVwICK9PS<+V}&XP>`BZ(m6kFD!&QR|4)693*~y{r3I1&qo(2_O)Xhsv=|4l$ z2@D9Dy3m+ZN4UWx*u&B@(gb4c1)DoVjPG}kvPgY@9-_eb&KArREF%P=3f>$|O!JujSp_qg5>ihFZyLzF5nxibnB%Qt;x7Yck1V zz$8GxD#Z5J>U?KQddG{XMeKMS0V?VkrPi#U=2G1H8LLqAFPScY*VbDcb?K4zNlmr^ zaY9HnqShnUYz!^}u!dvFbUuRpd7BuYc>g)L4vx_9S198%QUm>cP%7xlhVX|h0+&Jn zRhGJ+efcFD=1WzhT~=|2d3^aykuO+b8KBWgjCE%pNL6D|WC^_ik{Kwgyb8Qtekq+z zLh<~MnoGoGltQ6_DV(ZzB-DE2*>Koo!e%hQL!Av8diT7OLED5%a-FZE!9hG@nyK++ z@>1Xew`RVDMWKNg=YTV7L94w%c7oyb2W zy#ND>6=16UC`fK=Y)xX`{PUWL>l;yK5U}G7#0cE|+u3yNvH!3C;~gLE{@?D!w|aN{ zm-}x`rg10cpc%Qoc}Ao2rT|)83GL%|aWdM2B8A{a=+$F>$G|`pO z!@M##Q(Q~$+k)i`qHAp}HSpe9zhJy?QnTj!4Sw(2JN$9~%`^JbYJ)WtaRzLI*uVEQ zz1|)ExR#s2&dULUM=0dq#dhoIEX3U6{J7X3ZJGLQt4Z1{?~U`S4wB@;B)Uwl@bl+s8XUJS2Gd{O5uW z9fyAj&yHe7HB(MTjsuV`4XtB@J!EiU|Jvuv2M}A~)U#g*X&9wi*BK+!%IwEzOhy@U zwskRyM)v!ycsyca6sYEmuR?x~De_Ew%~R|hPy_zfH5&#hdx8$RtRBcw0AMe)zoUn_oq4pl_^$OOO*c8CldV_alao^oUUzum!!(?g z=3T{GRWv&Q-Fpj25GT_ zbU<=QBfsjhr}C1(Ij?%BvvWq)m@fh5jV0v_kBRJZzA5*RYW==1%dB0RurR$ zbumT9?5#r8)Pbl(1-Z(#G^$NiWTHfu4F`w|(j?^+=jAhTztz{!5&^k6^?9dBU9Bdk zkpNfrZN>GQ4lU%Jl~c{R)Wv0$t}fNC&P^CqTCzd8!myb;PUbYUiX7MU`7zm!68sck~Z$ zCTtlv#qkuZRBWu18g_(p00xAg>gum{S~$!&o|;3c62P*$uT=r|zB$J-^BhGOiO#V; z-Wf9ru+H#P&U4Q7Nl>j|ab#_iHY?;|xUUmZu;y5yrk3IUi80|1GCnP{k%$({tQ0M= z`qhCD@#k0~>rSiTrKF+!H;ggw!Yoj&U?0Iq?$*(Mok^Wj%6UadvxW=l$XK&fmP_-J|1zqO;l00kWgROEfn4pmk~smoaE^!a;uO zXtIENSoQ>7%Efcjbh+9n{8Ba7N)3?fHSbIBdAo2>CNDw+vRH=s5;8$^mr7V*&Esiy zPZHY|6t^bP+(l0dzj_Wii+q8XmbRfZ4*r_&_KAZOP8|YUbS}Hf>9Ha7#pH;2=Cp(; zCH}6kqS?4)Omxp-9^E^fnmM=01|C-ve^Khy!|;vd4on|q(4HE%>Yqybb-MCjYL0_& zqOH^oxZ)9qC)>N-!|&c6veRy7cmBieiAVJAHL%J9Hv?L^)dvi;mT&)- z9loH3fqiJV*bcA*9RAsae_npv&Dd<~l7#$i)!U;!aVLLWWSdMkY;NlWPrwoP@3q1` zg{gEQ{Oj9X~_(?+UP@&tE!q6){S{UEsrxfirm z4F(s^Gv&y**p`_fN6(3Kn^y6bGCe7=L>2^Bf=G9;{a62R_xNyc_Xu2=zgT;{``z~Y z{bT=l?_l@s`(t?6R$D^WW)X*HyIyK{W!a*%FDz~VS~dCV>8oRn0s->N!GCoh-{h!;}jKck!iaubO3Kia>54a z_N2!fS#QY?3_U6V(@vevys-6@HO2J7d~Nw)3Y0_#F~mgrrXA+72b>J-2Lwm(6)koF zl>iYg@ZxX0Aji4%nMwgErA`Z{x6PQrt?C|-BQ_JsOhBvSO$05#drPKKb3oNdpOWjK z1juTQ>d`bDks1oWDNXb=gWP)PmVybL^ujkX2?rREvE>Os6vE*})n>`Y6_U{7#yk%Sk)G|bSPcmrvDjA_5l2AzmOqnhmmZ)G`i>jUj#mNwC$vpcuqo!0<>Hpe>?PPd3)(J8bIF7#7cp&h?R~Hb80y zTku)oaXy%hCMiyXl8B;(2r{-2c2J1_Ct1N3 zML+n$fRi#qNT5v&Yov0-1}}h^$Y^X#&A||wo(zlBTB4Yi;SRdK`3>)vPx)NL)87oj zVZm2;Kaeu#6HXbRVFxAb*jAqEQgn{os>4!Xj|oKy8hzzod3C@93?MeKqv1pdA@bG@ zcxUAANJdJk&<@DEs&IoXD>t|jzvMBx#T(@j?FLb%YE@pB0C9lT??Oh-$5>!Gt4O@ z4qhu?kc>jWd%iy1wY78Ji{wH|5Gr;Z{4^erVik_mnVIVi>$^E4LNq%L`mF}1oRwsN zi9bu)3c%)Uj`ONsb`0x^>*>sK3U@GBKn7LGS)EGf|Jz4_idIzHS!-u?b>-p<>D zciV@1M{nQEDbO6b>P>%Pm7(%Wt4LW_G`Xi;g_AD#l*^uQd24dhiGJgx>b;ki;;g!~ zW#_GBGS2Kt%1=pSJIuq9`xMCoNcj~RoldqkGiE}q%&Y*0EDuU{QN0jt)2p!ttkf*93>C>k7 z^q-$L?$@2|L_t!~s#XHAef;ble!Uc8=EeVE7z{Ck;ZZ*vgSSy?-TPxwHB5b&anesk zm*wh&JY=%rall#xLtlin2zS*A+3tn!6(2&yIguVB3+p^^vU?cJC;ov!q;@5U8HQ_6 zK6@>V871s>q#)bk3Apq;VN@}Qf|j^JIKz$xXPA)KNDuW6ZdhkXK*}mADMQg5Z83wnT3mM)d9zbNZH?5x9+)rcG&j)~uP}PUV01PJyqhiW z=A>i0*+;3b0IY^PX=BN71GaX`Ve0i&PaC5UY*ThLVN?M4M`P5j_HhccJ^5~XZ~wHG zXTrIoG48!;@eCky>?*%#V=?zl@B4Tv6kf6Bd#`S&)AYP^iW+r}zo-oU>q(eA!@CBK zhYc(j)OLw{KaIC}XZArc&Q_~)e*aHi{f>`z|Ljg5?OysrMsUdp=cVuC4YALr%M>tQ zTocP`ZV-UsqdMaIjL6rofz;R28sNfX<;>s4e~*xc;Afcn4P%@^IxP6zA%wqLw_tpEFw>i-_gZ1YEd-e((5 z#jfn)2WR1Ba2>@7SOV54On}J-Gq8RzIf5(u!FU{#b8xyQ!U~wubHM~t8D)S-wIK`b zUP9Ux?L9D~+`_wSW=Y+(vVyvMWKxB|zDyNeDootckz7{PRd85aTa#v%36rGaLDcvA zOuZe?CiPJ~W`gnrguN{xHXY!zckoQ5YuduuD@PycJkCeNRVSkE1JnDN9u{4POmoIh zd9w+K!BDOh!KG)T#{IyIWNI>y2r`&Xle)GLCc5qH?{2?&|IXigbG&=_<2KW~y>_eH zSX(18n=YxeE=KY`d-0IpsG)kcRAJ0IACWio`&hddJe0&a@6 zq6|kA4}4s@miIYbQ~8P$@WdU+9kD>1od$r0f7tbLF?Sa*dA*ie{=67KtqBQGpw)>f z6GQOp=DuGir}%Cd_pc1#?6U`aiYS;*BNKRVZxam&fC>T`_0)u#}vH0{w`$^PP_BQGSEtasXtQ`1u) z{qt!&b8^!b8kx0Kl(oljaz0-&c|klP2w{qXpF-&t=s&?eRA}J2|GCDyZ!iLCz!xt`yJgl*k{XH_HfzZm>~;c zV(_wt7HQLq27JfZZnHmOlOj=JV`XOy0gYA!-I%z^&bB%i3|g;uU%mgniE=`i^lHUN zbgklXl?8nHfo1S9NMvUNV#}j*S&-Ek;|YKcmpN@MC1N+c-vb2)~ZQ1Gm+QOfA97Ft_;Rq_c|-sdAPgci++ebf32Tl z^T0Gu1*7Aj%tQa%EoRugb@|m2pPi?r%Mi!XoN58pH zgFcH(EW->rLkBX?*e5~FUOqRp~{IeW^= z4+^wQ!gZ=z1?hM(a5Nxh6hi86sIbBWUOqQ3oiRQZ4pwu*mYs?rH&C%}RvByCEmeScGLc<{0$RgZ(v9h<5kZwtXA*Re&j>*5pHEyVIm<6nvoO_x&Uz3vX%HPc zMK+~lO;5PIHAR-+-pQ#;0HrEp#DNg0(s~|NHOZy7$C1m$x+U$`PwM&-*vg-X#Jjb| zX-4efi_egk*r262^Eulvuph#G?j=*P!U1VVb28;}ZONe>9SRoACy{2w3iKDEc|m+A zrdJPURQ>Yd)S=zO!?%a4R)=zcUw8^Eeh3KIeyqdbk$7pmWqAtb9ZAQ+G56!dBVMwQLHGp3U5C%p4%rwvu$8 zxCYI3HFz{IVN~Z82pS|)muVL@`DMFN>FK3hhz}sE7S(N_Fs&`!ftc>kF!Ra0(gm8~zblior~X-xMt!R^v-9?dfBY4+mtNekTu?QH!P+eMVXm^sc&5!o z9T7#!LI&Mc7$D||0Np#oLGNk;VYzEB)^W$$^8t9ydNtuN&KlfN9uLjIwRV5sEFP1W z!2!4Kk}wbKv8S0~0+OFzdUEwJw;3EF!h$Mo=90V__)& zwNooaJNmU-%b=4PliA&Tv3?d!1$cViM%O!o6Tl)HhKYG<{dZrW`~xV+JAdhplz zIsH`)TeHEjPV{Z6-Qr;GX%jKyXe(g4Eu}nt?J^ z%C{e+frTU`Hp~Oj$oY)T5CCD*f*z4@a?);f(3uBw1D~Fj9U#G(I5LXqraxF2o#fPq zBD%@El&kBhc!>N)juWb=wN0AMIcc-A=78_KXBlzpa%y%>hK9F^RBWoN2aaubP`kSl z@?;>oU<5jfQnW_;Ld&g8p$EKYsMXc-XjFN*f;NO7L2*QNa}i<{6MP_w4Kl~s8PKlL z-(d_;Ym(Mn8^1X$Yy+=p9LS@@xkNof$~?^$NLy|-g!&J^7@~7a09f^E9)2A?Wbdc0 zi7!mHXgT9b)UD6>!W?9J$d7w&br5-7_g*h_bh4RzOcTR-oNr(NKQ;DCr_`Tc z0=kP64~AD|Kzvf=4sWg-vdW-6}uhPEH#|hVZ$J;7bq}T-?%%xa_s4Jd-moDKcHbOlr7uHXFI+gI85LRi6Z9A6<yMx#=IYv`Jv*G=?z5jmq$g4kPXvU}( zGByFi@%K*~xvP-g0#4U<4UOMULT-}NgAmt+-cRTs7ILr*?#{)io_Ie}#vq+o@xTV< zW|pFTkkQv-W-vCW+X`k1Mf7d@^ZvAg#miLG%Z*;PGQ6bei|yNxmyO{Y#aQ4Ffybafeo(EoR+yIs&J${=6`t1|M2V0|M2z8cK3Pr`D6Zv z$NUeJ|6v+jKqzyN_8D_nJOe~={)ZPY80ta(hmDQ)^Y+UPp#K+L*8DO5!^g@0u*c*L zu3S*PNh&&loA2?>m$iGNNeI@v*4i4my@6Bo^L1=2Uv8~+THX;p>TG$h=xFBNJ~Yl+ zS2XBudHYB2JofzwCY1}uke6|7L%iSE@^*%7kFYi>dSoXd2J~PNx@#D4NUoi0&qbf- zs?G`%)#Ef>566J#3G2&UA=c*2VgdDjWQq!=7@y0O60S5AIaM_2`HTsqlwk`Z|M}~K z;3DcDum??#{zaG9vV0e={Ly7OF^+Ht4zV^}CMgD$HASa)Uh7#U#b(c_@9y}5?Q)Zk zM4(Rd@@oKi{1F5ZBsz!=@JU{$B)nkYQrWZ*`QIWX#}g)`t?<7!p45>?3#2O_e{Ic5 z7|X*VxSkJX*JRW->GSZ8!6E@WTM|-=Zw7%`A9_o(!E`iXufbx#QL=R``n?w2>e0oR zK~2W)q04OAgA!-l$3Kpzm_B@ZS*HQ9>N&l<#cFNP#F6TO8mrV9RF4Qk>%98AFd0Rd zWR+i>y*(la;Vq-$SPp1_G<<;tbpg|NkA`cB;=9}$dzSX!^+#KsovpoiXOuZ!@QNBE)m_j{4V_jF@BWjxie4Zbj zE2g|mbzRl_)H(C1y*bobD7u_Ci&Ea0G*KWK@H?Mu%FAnGxKL*}*;NDfjPYw(L3P6Yq7})@*sA@m@z;Mx$97FlYII z`8p3U?kx3LQ9!Y;MMu0A}F= zIbT=num@6KT633$Kt!_AQ>i=glg(w6g4coF|PcWWX9$FG0xY{){_)3u6-xGVxLhHIM}SkoCeEHz!WUPG_Au=uR8WoOT;c?>V;HWbHOx?M@%? zd~{B#_|R-t>(ednv;5KPe(e}iqAL}Oj6rKLj@xwGw2K*W*B_=6J_F^bY?;UmuKO^G!Ct=9&(;+~ zX7F(cX~ARShh79JOc@U;VZEGvX!iwBEbM#ubC>_6u}jgw?+>k{<;Lp*erdF8(JwZu z$T9%KZa}7jBphNcQ*_IgMuMiY3-AV3?80gOY+zANt|c0*Sq2D>1Qx~X7*o%yA8(?d zE;02oOv_q<+wajxfkYE?3D=S@MC1N&#?VM)8TE8(aQIY;K`}o(=8ShOq~RuHA%~CS zQ?ZcC@x~W?9^CeUhoIw1I8%-ysPyRE;Yeh2MVZ3hQ*SnALf`lz9AxusiTU($ zz*@MUq*pD+l^57Jp-@d`pUIA$oaRw*t>H>;rhqPTJlhavuV<4X-c+QY{Mgec1ODZ8 za=6zlt4-}HtML&|Lo}1g(*~+ZaA{i1(~Hl&^C(G$SpAuuf@+?<0-LzBi#s>)qhoQ9hY7khalAZFlj^Vd!0xu4c|-z7GSi_QE2WSeF|&`BkEK`!Q3b-EXy>yoKc zmT@|*0a?MZHw<4+;Q<8lw--2;Hx`0=1KJ%+BJdRb7}5oD+$H?Z9_QG_*8#J z-5VIo`B?IOnH@t5kUa;RhTH?4D?uA8o{^M>@oY4?g?8hKjbp?}<>(Ibfp<3Dj3=#x z0XrVyk1QEzh-Yt0qv>_JUz>>E3R1M?vSBe>)<)N&1-{-aHCo%4OOW;~njV!g@X=n= z+n7(lmeX-b*cvg)b`9^FULjCiBS4uTUH@(xCj}LxN-$5T{EVC6gtT@Zet1w>&8DgJ zDw?F8!c|J~7Jm3>RHZFAqg6UI%D~hm|DfrTfo&~vbU!n@f30;lj7?fzjmmhft;9mz z%>q3DayFEz0NNaSI>d@`iD`0LtlB4K-RGF&9W(DTK@h>e4&~?v>M$xxG5afG>G@1q z-DG?gY|}=@X*^lS%^8%L8cepi;U4vj@fUWsvC)`9g5xq=P5a>(%X4O=VPpVFiyCG* zr#o(+ckJ;m@w)R(Hvj7|Nki_H?xd4h8m1g4T;9F&P|Qk*1m+_=pf!#x=3`Jp6-Vg> ze3+~HF?ScafzsT~8ui! zgZKrnP3y;uSs*1k#_1L$KBn2B?9;V)-*=itQ{#YgMIWN^I85AG=BSA<8e(!e+zB@) z`FIxoEGPLCX8r|`0*wA9KG-5JEdb7ywVdq`R{#&-SgZ$1<013#e#L_wii*=dEzh~4 zxNQ-%FMK2#5dE?~joHyR$UE#*Ct*({zox{pqtB_4ssP12taVOV+7C69B!=~|E<-@q z$Ej80`{&Vk;FAU3c8P*l4vr6xZGP_v=d8hR%E@M}vb$D49)h;6dueKu;%{@DwD{Z1 zaa2CP)2!=E(Ij0iJ=!ek^_J7eM{AJrL3Vb)giCXaFsIGj!CcUAw{|!bRf3FuIbwsQ zyOldJtx>r|v6O(9JKE`5?yJ&OTe_3I@Qtm)HE0dzTl{JPPyz%cW~&p@&AdzJEDul< zAJe2?g98%J?J+7G@s8MAg>fJ)fplC)gs?wE&M(i#u z10Xtf+Qp|Pzfnx-xsQ&8AN|x6lknJ(JT1#K8j)ct_{7(4k0<2g3Hf+JKAw=D*9p1I zY50V(&JgNT3dM_iw@19%J5$LZe@@{T+G!!KoC*(M_9-?te1e}*#MtS3TcwbQ&Y?TG z3Z-OAIXd3{e%ELJ9v^MF>R6P|8nR=xhH)@R>o9l{PKE(H!Of8w?$#QON~518VQ^JR zUeM>^DrZ{IwMKm*`yns$tHzq6Kbq;67(jCNQD%@6V~VHqjHyBs@&@JeJ}nk%G`qJ? zDR>6+$TW+JG06Xt1J^Tu8vB>GXGt{p=tdON|BWW$eYgzA+#zK)mcA4p(V*gpuF&mn ztvBxv-u=zr{&9P6A8K$g)WGD3)#JNqp%ySS;-vREhbz6biBXT%$VN^S5kUcd7X|0S z*4aD9M=qTA{CH}_6Jc$DrK5@##aAPmNVsmC-D0H6qs8x0eteW4fA{jE%8sEp1yzlg zc`?Wr_cM}0n=2Sn_a_U~;ujqJEZ^JgtZc~7S$K@M+$=omRcB!|_yvUO44X8YSz&$#b`_%K)P7(4C#3V)2)Kxn1M% zW?E13769hfM7f7KYEkAaN_>I)*F$YPHm8U1YWhA2`r-L(=)DGSqQNI|ZxVCRrr>br zy$~}UXfs8zd=O4TcF>IbQ0Ksw&P^!}WqThL4CtyG=d_L=EB?j^8~{a2Q$56Thqtsy zt?j2#Kz`>@H->8A42r56ep8H*X2|?1v9ema0wXK6j*y6jMg;#yMZ_l%lESndaNHId zJMF|hD@aPfprsOj^(aY@UAf}(3TMyBQR2eJd9#xUr)9b_I%iuA^-+AqnjT!&l~skL z6SH8HG#8CcKB~IED^>T2$f_(|g~zcI_p9i};CgQ50D54n*|OHN2e6`5vZmoFkaEZa z6{eMz<}|Q9*57-q|M^(|^K+^Hxw*OV;t$U^AM1ZU*8k)e9=wF#?eg@WU;p#@Mz^yG z^*=i=UcT&hUvwD#Z!!k(SpV}Q)&EommK{KbeTEC3*m|^|aAhm~Wf)Dv4Os>AGGS*1 zq=qnSR8}sMnOFRgzT;Dgcacq7)p;+gZ@Gv0cL(3GZ#TUoOsSUXLcW<|Z2B(hctw>O z2WQ!md4XQ2F_;Em5O7vwLjkg=N+Sx4R8djMio8UjS)H)~5*$tu)B)v?iFQIAQli3P z!)%nTV+j9Ip+{_BH5bLr3qgY)DuItIWT_JWvK5-Ql#9WMCZ7uK}o+5~{4H7rlx&S^pk1Z5-3%rs3Y zhe~)eg(6==&~K-VdWzQa-SLqY6-^593=DArq->^f*7-P~BLvqmQxU}FfW2VWaICG@ zCkxzoOx81Pa~FY4KoZ^ZxR~?%^&L zv*on|a z1LmLo%h~vfeMoz(HcjA`grg1srmX{@aSn`E{WPYPr>SOWTb?>QRj9LL?RyfA;_I;5 z)?-_3^QqeOXR`gP+^i=E8`tJ=$ch1=J>zQ;W8-ye6^*<;I>gKR0P$D)mgO;oMi}5V zrF)8uR#gN0ZS0>UHvdDM3}aI}cJC^OSn1AEyU$~DEAVF8syR7yW~hre9-Q5V1;cDKy^W8; z=xy9%xbV{$-tsw?9t63#C@^4QeAMs^z=kdY`X*S2r$i-oTrg3t6!kEJk?NItK?135 zW+^7NnO-tM1dfdqzD?>}MqKMulGos2w^B=yA`^#>QtD}~a}+^Zlh7ZA7eW6P1=ndT z#a6LuZqGKCy$gI9jz%GWr<(Z6$Hj^s8R3;NU}N!0^%7qKm4c>ZC|*=4opj?CU$fM_ zKu(5drujc#u;t6hJli?k-9Fy+j=`}P5B)U9osUW1dvA_+zu!Id-W~28Y#;v3`_t~< z+@49esN>zg9((WK?EU%uuJ`8cvG?Zv{=U1pU^3LDxUGO{g{EkJ^w|3HJ`15(cuy}@ z=BU12*%=@X z!>rhkyq<3SAaz(#x_p%PqJgcC3Kf*SK@F4?09}z}Y<(VpXP}F+4pS7PLDS0tbR!GJ zh6`SN4NY$~fGdO-F0&txM$uHiwp`XeHBD}}*0K(ha4b`=A_1}RP|nO=4w-+9RWVXL zgrOqWSW&@Xu}Y(bA`W(j4y~QB&4y0n+x7q@-PB=(6CxeB9f8A7JPDH^rvid(>toxh zW)oTDk-ozg7tqJdYL7TDh!PnG=X$QRZcFTry7d)Ac0+78L~9VG{h(Apw`A!Cr)BER zh}lWW)mL@)5UV;{YE@^;ud4qLtLiVcs=jMgnv;#K2c>m|wFdiL(3-VT#hSH|OO=%E z5)w96B~6v$ktz-itrD!+1)rQYR$)`mVN|&_-Ffhp}7H$_WcNXGT>3Y)3%%wtC|)h0i0_B!CiE}JRO`C$p|IN`*}?OkWEG5|5`=!5E>1e z1hw8qv0Co%$F=t?K0k+mf_XQMy8W(pjh4>@n~_ui*{mEJ$Hcyiu#>ZWY817JO%Bdp zyWt$GyU17@mUHoiVJ(n>QENuf3e5V>9ks%jb#Hd)mUZaf+Zak8YDMWFN`H>QR!Sq~ z+n1qwh=@L&7g3ehfSYr_r#_-)_5c z`st{R|6J@|bQm*G#peo(DserDL!1WbK-?_Fp3R(d2PJ$!-*QZ_dq|&s`~#1--_hx# z0JrenQMQgeqh&Pk<(gaodT*t+;?QUi*oFFxcKPj3lUex{l-5CRIiyOXJci=726ygP zxcywptz|0`FW@bp!}c69w=4u)%=v=QW#Y|xK7>m>8#L)Ccgoy{LTfsw2aB8|=nA;m z#yX4{%aCig2ZwvU&SMYEqd2~`95DAH5b*kg4mZZQq@X{Yf$`Vl7#NS4&XiuQnHPI%yDaGD`rJJ64uV_Iz`%M9U&OBs;xHXQ zozllL-{RXVCyZU;v{zh`UrAvVuSVg<>-bz53Fy;;?l_FIWLFB5N@#4X(^|`H0%<>e zLHX4P5<5`=;?IgJ8$y3P9F%Df8GGt{N4fZdTY7(wkmX8q_(9q@z@3@TShl8J_++4B z(_1B|^wn@si7&W7wYcjfMRh{mIv`+-AE@F?3Ekcf z6|wf&>$&2ax$H%pgPy7yKma>B%X-E^w{F28UGPV+?G_FFoNiWI?pf>S)L8D%!F7=K zlV~!XZ^;|;x@+gm_O$HIY84!$TkX246nResb`-jF8yz(Gx;xdlzk|rg4cQjljARF( zcK}>KqrWpf2*zyi#KBSe)W9223vZUnXf4}i5Zd{?ok?yE|2Yx{QaJs!>2NPL{fh=6 zB6;eL9mWTscdFuT7QXB#JSA6p(LoLj93eLmntpf?j zWz$RXy<_u*lY>tUQy>1VpqpV-9~t7Lp9CMmvfOPC*ym#=ilUFiIn+}8T!ck_3HU&Q zB8*;a@;OoLj)xmkT|((!%0>Z@(lKaV1ZJF_s_;Z>4lT%N0yh%StYM|D>4%f4w~Id@ z$qz(xWj{#s7USrtF*r70Mi3DO+9GVNQ)Z=}TpO8PePwpt)U2+E#vb}1lG zAc7da%OGKjc$lPqG^QZTDpn)%NF~>08p%^5twiS&gEKKa{k2%->^a+Jg4sBNAmS>4 zlM2!}ChsX!#?MKs{*QnE@Biz6{QJYPN*W)TC58R`$N&6q|NFoG7mWLvLp|futfHJ5 zdv+ZrxBf65Uo5a|(AR^$&VADdg?b)0P%h-Ban1V@DvM~fMSdNIKz%tY(dAv{ZB+cm zapmACwHM=9EbrH7WPRfOJOdXu4;-T{-t@46rtREqV3Pdp0viF{oLjN{MXtZ-O)jIi z-{w{Be7Ip6#oG4Pqer$Mj3EtXJi$D<6|_O7JXkw33hiTniIW5=4jFBkpErSsVu2oCb?QY?K4@s zR~yBH*$^^;gOoUlC$k~m#N$A&d~5o(jciVu8(1a2;kTU)2U#0>*S{VTSWi+_qI-w2eYEYJZt{lt;2NYM$=J8MhLoHRk z4nbq!QV6aIB^Oc+dgtK>k6|P*Ckmm0YfqfZT3J|~b(L$7j379T*iAbCR29H^>bp_Y zLB`B8=D>hXZh$(QAQL$Iweiz<1vZIuP^_yl3*9cU1S)+wNs(xJsBRf`QE95-AipDY2}!mu2Jg(hpdjy(j|uwW zU1~U-S|`0}m$RlJY^`m1FB|mxU>6!+sQgZ3(ZR1dys8Ustkrc#AL^W|``&%4Fae@R zoLq&;T-VvwN^`HZDSU{1(_Kno!CUW}{?@!-vaZE%`tx0k6Noam;+^)#zZM76JgufQ z{3LT_9t?-|#I*e}87mXv+Q{i=o2p$sbIC`Klt3cVe4Ljfoc?9lzcT1X-j_B3qQmByO;UKupvr-W(U48Yv{oc^n)g$kP)#j}AQv1On%Ix3(;X11sd8Y&_!nDsa6s2cpX?4213J z;yB)Q;kc@POT*`2_}%@=Fg)5~V4bcE&(CX8yI2_0Pa<{-dpe$fzG&X`*k5U{9yLqi zxj#D*s_Pb^v@L)@mh6%m`B`8maXjSCy*`6`F!ZA|9-{TN3Z3i*SWS_fh$-(02sHte z_r==211)*1M|ICK31awCnNMLC+sKcX2#=jI9K>8#B*@mpr zET5F5Irb@4F-P=@2Y^g@tP=WEPkfx~ussikb{4|Ves`g6BCRszRPHW@t-nchG;s?# zW`7t4FRl?O^j(U_7^J0dIkXha)S zm!77WtX4Gx>>os97ZBvF1mLB{U!!24wu z^{Ff-e-D&i3-*EATH2Ql8S=1(M6{$Z8Zv5aJ?rzfX zml@nLGx=&5S37~;!TdQ3U{Jpmr_RClU;TG)-|qWId;hxYK^0UFH$_=)8Is_3F}Thq zX6gVoAvsS6g&Jd1!?)i+siAotRU}C<(c${=tLRoqA!><)aVELb31~w!e zD5!RAc-t~4SK|UEOuzEJwqRfYZb=F&r{iyW<|b#dtiC#Amk(nlmfpfwq#@5oSEM^zuBb&ww;w2Eon7b$R%*50ZZD zt$$0YI;=~3Mvi=Tpi#Bgvhs^1wpln%sUKZBc&;QT(B@~g3^k2LeqrZw<0}F3ging0 z-_H#OLqHJyed9DM?^TxR%H+D0Dc|}QTCNF5jpLk^}nu`=uzo~HSHqDnu`Nepb5 zQ5pshsjaTf%3G(oHoZ=`{=$M%zNUj;iW%km@g$WTkk_pEQo~s*e?7W}sE*I%%+!Z} znWR1_52hkvtej)7veb6)+uwcnjS0di8Pa{XICzjUed@j@y1fySO$!eVA@-ZCO-^e1qKIRDHbJkhzI8`Tcd^3Z$|dc8GE2Hx zG9ZSr+?ENERhZDlV2`Gh2ehGEjy@S2C+{snaI39};iel<35E}OzNqUc@FI+qrrJul zU7rW03lBkS(Jc)st3Yz{i+jZIm0);Vpt#d9=P&3JG_m<Kzl(HH`F6?@ z|Lhs&$i>R(7I*0x|MZ>`S|9izf0tzKNL$GV1zucsHh>I zUQJQ;Vd7qv;q%IY1%a$ zb#+~3Qd*f=lBWtpH2;BNTU899=DTCZv)(c?Kd?6^jnfkuCg~Kz5r(s>g)z2{N1dKD^ByyfY)!1{`~UD`*kvhiy}6wY3l!e^b;il1U(Zrzl-KAx_sYM zjj;OnXN6)dX|c8*r)%dEb$}!{ooI>eh+t6`@o^O%`1RrB3MRO@rmlKTMYy_DnzXQt z|6qHCbRYPHb&XZ*9W`Tsg0Yrk^?r_tDJK?bwuZe;yo?qf~+tqoU@q3(K%!#%ql9|Sz);)064&Ip!&!Q;SA&+ULxojE4E1(dCgd`Y<4TUdd80M##30nx4O=Y5f zdZT>grjq6R7A>d4IjmReOiS_w3!O7dz-KKI>C7H(AgU&?-$Ybx2Dxy{Ni4IC90oEK z$KtMNcb-Aa7bxz&fnm)TEg-j=s1}s>H47hydCDjPT*ekir=t?_LBw1FPl=P_CA7mC z6ujH<>t0$BIUR;Poe|^TIO3;7E1cz1Xn9~?dDJWITp@%G%W| zm{++Z*|v$MshA-B1m%xcIrJW|{z~m7698!ow8$!|Puka#pSW&bw3Bb9c};g(?hq7^ z(1vicL$F+&p_5bp23klsMsVQPX>qSNr**8Ny&>`!(KH{``}e-_BDWD~8K~Z}))(*Bfn6FRruSa<;n-oxQ`H!Guwn+ciX`*i1=I|>$ct0?3O5>cj zvlvVuohCV10)BLrfi+2+>?miAad^qb0(cn?$YX;Yu#ognbJ^%$%;Rwxm4xQ9!Tnu& z>?9w6!1N*H1+$|0ox&9V;^C{zT zVd=!|CQ3P7(DMt1RUZ=W4Qj1Va;e{VnpL^e>u?oTY)zhsnIg!ggo#BS-%GnEY5>lafPUT@wHsa$ zx5`4Il!Lf?XFf5nU|TC6k*m-Sx1*wKpk@6OU3HgqQ`B7J5jCUPqDyC00+WHpK_$<% z^M=EGE^!jBomMo64`Ddv*#M<^R(?bX7)p0pvHxAD>X#xLpSdpl;B%7L7XcQXi3CM#j9z#~J(a;xHSAP9WLebvEk|z4 zR6-HsBH(HTE(Ga5ztQZA+)e0NfQ{KAFI94~BD{sF$FK@;QiGeT0&6sw&EhKW1Me6$ zucGErsiA%kEu}J`r)1A#a3yZ%Ky#D-W0U{m3(o&>*lHd;J~-Uu|JdaJp!^>wvh9Y~ zGe`_NiC0SgQJDXudAPT?kNH2E`v;5^!2Umcw7<8>|8Yn8KU6|#T5yjqI5NJN^o{KN zl8b2S!J7|UEVpgv@Yv2w;<3H8ZKm;n2Ue;KlwU*BHr~m68TkBIJccH=?J(ur7^PfR zWp{(^RAh3MH^7`Q>xp7Bp*t(hP;&WY^I&Y3NQ8j{X1t;*3z^kB_5@{6bE90Hb4gY0 zF-10KFRhZqmk^+|RK#XC3PPCgFPVe;BuZ(`as-fV%j1}5Ug#$M3w9Ch;ssc->k_iQ z1{l%P>>`+{)D3uxAZt|8`EYS67fSUfF@SrxQ0u})P00k)X1m~U(xinKr#^F zS!UnBPCe3i*o{itvu%Ur6P83{42=tBL_D|4D1b~4+?;5(n1-DlY<|Z(di9)=4eaoK zceEL3%P81f>R#Ed32qly)$G}H^^JO8Q!@v#=6MRdz)3ARr)NL^^6FjnYrkk-*oT^z z_tT4iKK6E^!FBhmjM>0DA%jg#>7;R*nzvS=;~k{76sb(R!ZN3;$Z?rXB}$LaRYp;A zMt+U$BEhxx5V<4ZGZ@81I2=wS<%EosFvcKRjQc|DG8|a%jRr+NRIh7nO#ZD4qao!k z5=J72kZ!w%LoqgNMwaJRssw+F)_JgS+;R}-Qpqh1hETk;w1&@`JzP~?|(H}76^ zq|vJ{I93mrS#CNMUM`-&Uu(hX-4WZ67h#MAs!tiA#&2K7;`4cxUjBZ5X>09u$dpC_ z(S^1CKzovY98G7duf90baP{A@2H#?X1sD)sBX1>8)9VMrK8CG&J0Depwl|)SdY}aF ziLbu+;(eqRc_5>}`^M{ix4rcVCdXSP6ne&+oX9FX?bxDvtvXo94n!%f@oVpEZ}A;H z{)Bzp80Timt>ByvzuTib3Zys1ykCRMV6B+i~RA60QjmFdY%!NkyV!dee>+PVC29wfAVL&Cl(mH3h`dXVzW-pvw zh2dDw2cOGVOv5%!MZhTJU2aY9(Zp_g>c3;4$#6O z1}C5q;oSs>qsb8Q4MXjII z$|k`jxf4(_DZUtG7M9i2H!z#3ypIJKZl)qkV>#l?ZZmQJm&W+9Z~GSHIcDzD{{iPKB|1Wq?~%Hq>gz10@ zz1}kS7kPJSxiQf|_a(5yEX#Uyi>H$OAm z*1N`iH@luK!+SNh-6>kMzC}3;fH+M($y_ejF4$x-k}^Sue3ov3;8ZHNJ8R(~(giz| zv*F~b%;c3@39r-^874EiQ*l^#5@nzHx+iGlJC@^Pl_-NoOkid3| z*+*(aMMWVeyO~fItZ%JDK??cHy6m-S^Pp%muM7Gsq7E$64HsQD?&ej4%TPZjKa11n zZ@9K%y6ZiCpi}Z(G&mXW=Qq!f-*`{|mAuTlZ1;sa=vhB1-L`%ffpwqn+#sq-2LGoD z0AX$rCDstAl2@ZCYXBxzoB%Z7T~^qwBPRNS65NtHK`yRypAdd6-QU>%Z1n$2 z*#GP`_8zwmHugUo`yWzDP=48q`4}xeR{4X)7cf-k$&3 zto3X%K8wz4^aB(|e_8b6XuyA_+L{J2V+)cx&MEp`H3sYty8{I5%G#ALZ4;Sy@Ztyn z@r=X4w35-_MRE&(6vVwhm!{+rf=;^`KErM>07o*O$a#)wZs{Ef&N8p%0zy#V)9QPp za2CLQ0PY3UkbTVpFfxux18Q{?@qLaSaGe946XzXu1mLUGLWlks%)bivv9q(2wj|*9 zGO0ADyV&_;GUzSBRH-ut{3}+U6=bcY?ol9Yw|HGW<}T^|bEBNJe7}~{0v}gS8~Ziy zz-Z&t00sgdSXY7@AvIm=Wb83{DU=s`2xLb-^VYrUXgCyYayN$yy8flte%s~6RMAS3 zN}(=s1(3NfaW=7fey7k}&!;(^BoR6dA>OmSwrzNsU{k8i_O_KTM-7{UNDk-380zV^ z%d$U8qA{b~V|G*53fIv4-dREK&?RG(8?p~hKS_5Qhak(CgV0Q?TXI|WgRw^~d0Im{ z!T5Z)W!7$6VSk;Pi1a$1Tt_CVoy0EpF1WHAsO_vQ~=&X}VFP-G=0-2>uwj&?6sIr6S5QhR=us|G7Xh*p= z>Si!`WoaBlK|-B~H<$vZ7)qLhh8t7H8Yu-Zl4-qfCWIpmHXWljLmPLvdK#-+HNlGAl%jcE$Zj~AoGB1Q~ z9mTzv*!waV&%yVVtLPEZbOT03Uew1sudYXe6w09rO(NH(E-tyy0u81v0ynyOsXRW) z>?ar7&)E2TLH`5Zulbq?1A;P053UJvEqO#Ba8=M)*?VN@VY-%uODXJxddo8L&nM#@ zm^ZQVE>3+{`r*qR70hQ7Ml~WZ##!7On&)y{2GhO*U(e}nN{aLh-oI9He@h6Gv?#&e zWIBsR(eD&#-z7;(T~w29a5;$}lw@f{r`5!+Gj6LMwh4wGTB9b$3unM+6k5pod7K3D z+A#zjXe@S-DgbI@3PWNZ#*vKF)AE!cO`aeX3sj?f2syKexPPhU|I$rlpzH8 zWG)VO;f$mWvb8*95fBaFmPjS)I5<$i@9M+>53cdRRkUfzpa^r*kmpb8$#fXaa2cu) zLWi}Qbl%1D&_9u*Y4fg5EeJX$eZt2At8`j$n0D1+YDn&nrams zdMUrL{UT!tK+sWY6+Z7Ju$#F7s9kV&134GMuQi}5DRlr0LBI~WX5%aG4E;Sd5;G0M z!FRYu8(6pT6?G$3e-SJ0Feq z+x7jkPYn3lKsq1oQBfhkvZ}Sq#{Oet{~_%^8V7rak2mpOHufL7{Rf1&3eLmT%|8n4 zKlb(;`wg`JXgoUD+lO+^!&dWfWB+kS_8%`avxS*;C6!WMjs-j*%q)(=%V3yc4YE{J zm?FawWI<|rp!u}f2**$!)PdsliTWyNUuL~QB!Z{&O3$Mh`^&F3ia6A|?4}-^D+`mC z;>AmP?i35Wp0P4cnSc%n^vPO6*`vqi#;S(uTwS>=70-%6 za+VF0lNFFrg~~C@;;^uS)u|y7;7JB3_cT@ES7zXagJ`VaZr@T1@7UH|dpk3;4vFO8 z(%r1ovWwYw^X!sJig)1W6w{Wsv){@oDDGSkCq)hbq%Iia((qn4QUld)43O4Z0rIHk zHCtsN@@VgFAaZJBZb7z|r1gFMDh>l4JNdS-LXGsBwL(Or?4l5>( zxI%WccHWSaENQ#u`#2dqDX98ap|pzMCvN(_nB*PyVRYNFabKt_#tHkK}=R4s6xgX zuzAM|xeTD*QC_eP2>SWMqSlP?a)rfF;Le@&sFM_#Qda=OplUcDjSCD#Wz7VXZ__Yl zL=hc_EeZ~4W)#YMyzROUU?Sx5(&054EpDQ95hk)q&0`0F>Xwx{;K@d?;7Au^!h?l( z6~Z+N(+r^JJbOML&y#TQkgeK$JP5|Ky7wx)l2lwzM5>yjQ%N66-GIrLeuX%iPTAV2 z_1w-%gPalQJY8N~Cp)Q|B#7q6*)>4yK3piQOsanFPUP#s&U)|F&+jO#qDS%7!BQ!2 zP7l1QLVg36va#1OaOo6mCVPJU=IGBak0^vCJ3Y@P(plNH(Zw`6-bcva+~kpWCYZ!8 z82KO<6%eWM3C%FO}?WH26BY z(P>AiZd{;2=ER$fJx6Q7Fqu$x2s0*U{sOyCpJl?2T46dn7@?CR2@Y_l;yk^aH!N3# zl1zwmNl}8w2m`U*v7U`t-%)NzXQYV+go_o+D}+7yXG%}Y5$(w)obsYCndDMlRlHh| zPIS8{x#EG}&gFNq+2&HLvt$G{6N>!O>|^okzg{Df~GS~9u$=|@-nAGCTmzMtiXVDc440txXoxsny3lx3>afX zT6reZu#ZgeA46L3fYUTB_w#fZ^&^FtH>8xy2-{$GJ<6!12VwZ z^0I-Vs!}>&#wIgp&ArG2B=ukjrSAD&)xkdUEeAVp2dA;)-%uW)BC0Jd5wM8L7?wu2 zA!o#JaupU3lvls}^ph<}<`t1G0VDLQAf^QDj34Yp%CG**z6e`1MTbEZlWg4{MHz-HA<`T9p;C5{FIIYTTuh|bPJ&?1p1=pTuXFm6kftl_rR-=jsh z48dBSw6)k|5B$$g-OuJb3K{Jh6wkJ}9G~k!&3jyWrqt>hRZ2^+ix@C}6=t!ZVlJzQ z0Wk&CIbowWwsu~_MVsQ^6QlVS$t^NgD%;IutoxWL;BgEt(%O#}E)N(@iJzmww>z;t z&}>fBDh~c-O6#QU{KDDO>SfKH=gfP~_N6=O_vbO=Je($T z#Dhv}MP5|u%ScyNLc>P#4k0Ly#xAMz9(^X4#{lGNJcWoqX*q#)A5FrF-Ggaa=t@7X;@!i zwmP^O3izUhC1sGSJ8Vfbm%sGLqTw(DUJgk8A#{#wGxoq(&DXbbZEBU5#~d6=3nIb{ zxG`hwBv4V!DfxJoefIzZ%g{*GtmacX}Ru zRwvQjC=r{;aHYQ9`KMWd)j6s56CyfXVv^NU=4%5 za24_{f<(ZxfaB4u)1apeO1Jz0y}}_~a8Pe@C~WdSZ2bTKQT+c89zEV~9yT}r{~P~* z<^PX?hF16eFOmPDxz9Fx^Pt&$^l0yZ{b2k5VDI43#{d6*@;_ieHEu>zGTTFDt_9Bj zy%Wzl2d&4g;*Xs1+t@qbsRw|UF7dy7Ntwr;zVVdIZx&vseF*h{a_{sYau%zU5rL~x z$;hOaL?DV;u}`Q4lD@w{Okq!iA+~Q38AF1~eqN@{l$iy%2TbPY7sCbh%og~ZUHBe) z`=n*#aWu2q>>vY;-FgonI!rY|Q*c8_=rNp)JDeSmX??(v25md!uf>oXQ=)+38bN4f zfMDR@)d1qigi9`h>?+w~!|V5>>R7ZGoq1#Ldv*&*XG$C!;ccC*>^6Pxn~Ya2Uv1Hs z>>~abOk1s`Y&^qse-YO>)x5DjOV_xx(KOSeWy=ih)K#-%2<2xdW-JOmYv$`MOhP*! zM>kk1fxG|b-~Z?T{a^q7fByIX_<#TXzj?H!|GwreZ4)1`T@Hp_cJ+m{i;PaMKKlRg zrw9M>tN-$!zOMS+Z+1H;ryoE4*Y9@P?<=+X!|$K`aC>uzlI}c#DBL3EG@DJ>|FHZ- zB$?y5)SF=J$N`2hhHMFEDy%iAp)P}H2u^4^5<;?DV`{0g=4k{*i?%^F3NRD(>R`t z(G?2B`{GsQmd-f2#4u4bx204_2FQ#l*uu0?a1D}JgKqEpDZt8*(xYe`jpn0_zyxan z5S~B_u$soeO4rh9ko4ain^mmC(k#nG#F&;R-0o3F#6J%wy%W6P;1CwTHWyofaZj1CBb85Mw{1q+JZ9AUw8K2-J*N zpF$XjXuk&2QRW0<#DapNd(D?msjXcSC8N-rA=oSCEZDo^wDSE_C#IeB%AQC^~N34Zy zmOgLsU27#?uL-9u)z94X$Ak0m83dP5J02df=?HT==ROBJ$nMoe#C}YJez?lrDi^&g ztjPQE`xzUMpN%tVe7R)DINgC^GcP|ZSryhk;9Gf#JFl$2%%L)ht})35?v~}pjMc?G z^?~`b-6?8!b`k%((L4))^hmTsV!vnVeA3Zu~ZuCEQr2o+_ zP+l(__b*04{J~NPp#|mNRA@&?j>UytyAb+oU^28IIurc*fy}!A^~T#<(|H_vuhFE+ zL&XSi15^(+3>a?W3FGVQ{DfB#r7^WoUNq^IHC0MQ<-L3H&+mTz=i^u1qrV-!`042B zPshZZ0=oiZ?~*x~5sea<+_uLU6W`l?0!fIq`>8fvE6WH-ym zD&P~22c?=dMlWnzp|VPL#TO__dw+hZ;Ojr$``rwP8 z$;&EO>Xl`3CjFU}6HaCQgxoxyMpg7;l6E*U3uz>zItl{}7C+?#Kll)8#HLw4xaPic zzAFonHkpS8o)|M`RC886tGplUq?M#NbZmdL2u8d1$?GxNk3_3o@zZU2-a9ct)?3^A zsCWJ~>}E_^G)ml+^)rs@tC9i>bRY8 zwk&7PppiyVF^gLBnr7Vxo{fCxATd|5J*vjC`XV(jX*JmXwTAS)X_sV|Hb(Zn?scH)^~MR;4|7UiW)H%eVGa{sSDX^=jptG& z6r%OYh%F8*#+>A#bedgh_^0;59ia}K+SAq#UGn?E&C6* zWEY}_Ymvw7E5zv%*7s{&Zs=<=$6K^o>}Whz59B&)l2I&hCkqcSMIll`;KeAAB)Zdq zd1^OV!AVu1BsP(Hk5kazJL#8*v85Ex>)d3T{M#YR4r*Rc>reQAc;9$EHP~d{n}IB& zpw2GT!Sb_UJP3vi!g{DzX^6SSNA{c@1Fz#rKTHxaICa=f=>_i+lZ<>`6yZt7JJ>QY z2vdb19#sjEYF>i7&o7Np)rwlLN=;A&Ut$n-k*>v76pHg5UR+)*>2j6Q>k9Sd_u>|e zC0}NK93Aq8>)Ko7r@fw?<;Jl}5FKZbm8XE4TezwFOtb{ zzFroTZ8Uz*n!sI*-BN;teVx)Z4H8?5dsy-t`wy~p{3bT}(;x}cTmRWbG#pgnok16n z2M8ibEm$A*UnlH$C4iOh@v$YpRrmc{-Tzv{6!(n`dtz+|8!G^Mi@2Ul=5asdF^PH7 z&uCmxn|uZ$*%>7ntkVxsT5EpDo6~!sX#ZR>3EToWVpKk*41fRn$;LOa9&6 z{r?(V25Wsk5elkdRp8R72d$ntPK8*ES<`)Lt^9 zjlIYJUuv&Dt#Xzxy*BG((Phq<^8Bkzg;1W`*1)th+j|Y%*bOf8MT<=xNW)~0eusQL z`CB4!NJM{R1vN5vC+nSmcWd4J@-HTxbN`o<&y)e=arDGg6Jx*jCFC6YM__O;d_C~( z@tOCD0#{eglFD~mTXKboEysNwp8-Gae|Jaw_Df^I&yuz1EN(()1BYSfhj5Gkl#5)-ZZQDqd7DFFT6&cVBb$IqH z)FHB-^Cun1$~jgZ*J{fSo|ep~bmVr0j(F)|#q(rlfcFhp-2(zLcI7NOpT{8t5}ycT zJ$M6t8e(weV9jv)$6%O*x!5P8bEPp)8fRMOpxQ}bnS(kB2$9I73z4C^&_Y+MwIb-y zDpak6%uv%<8jPg_f@LFN*;5nd+1OXK>JZ@>LYBqZS16X3>V9=tn5cL=w{c6kzAO0Sc( zt4SX~pr`pT+#QCO;ZSaEQbyn#krXSX#`rOclbO?oq3JYYunM)Z3n@h?*%jqjM&LjL z2FQyrPU&0W9jW&7H0@MkOY(#4C>&_hqQ@^lm#sZD^!OPAOwSxO=DE|cr|wiC--Yt` zfp>g84f}A&aelL{BScMG{+d{J@ugKW7Oti>YA;odgTJN&zNUe@W`J04N(W}m0+%C} zB?txRbMhuE1Bl7+khv#!f(SO9<50{^<9FdlcT^x-+Yz*t?P2sO{6%Az4%4t0`zXHjvhk z4wafQ{hB#hQM(BFzZJM&64Aficr$GLe>V32CH#L5zimB!{Ou)!QP`s z+4cU0eNiIOXK#La?oForZG!dsLH`0{w(ok+CJBT;O`=i6*bb^PeJc#LD-}0@wH%1asrX z1k|2g%*P*UJS8gJYXF9Ckgc+tkeDHwIDUEb>fMWH`h5Xq?p;w*30k+<-TmkBaN)s~ z0~B_z=A+jO%U9m3CXmCLP%^v@<9?Kc952as@ru0iDb@?%mS5kR#auGNhc%v$rVG%J zj;Fv>jwinbZSTkZ2C02s{qpklzp4@A-D4gyrW|sqdM~hvV_5!nMsY4IX51>w1ac10 zcr)g-MeBi)n>wE3@`c?XM{agiC+`rP?P>6tHUdNp-ItiyFX zcZ_=5Vw_q_1|De7d~P>q?u%y}^8TLD55IVRg>#mt=4R0<5b^Jn-8xTM{BxKMn_!FeP!7wo?a#MNojQ zbZzfrMN3Nz-RoV0^$>rJIbhqGDIUC18D85SyML?vt@KkUc%UAaqkhtcqJPhUhjRIV z-Th3a0p^l$%sfR8xy#4JB$=_B`5bVy2i&|mTwh6^xOM2Z&!U)eqcS3$%G&NQl@hqLO=Lnx3u-0{Bgcu_EnSOz3U ze`~}WP2{!$P|UPs5TB17$2~SxwS)ABOU>A5_cPilun4c?2?pC!m~X@_tMS~|jDSW% zp;D4T%y@Qt_+!1n@({WP{w@V9ZBVOjmHqN?I^EV74S7)0MS-&$O0y;2yJ9NIx_Jp5 zsR+WCQD8l#OHJ|*u529YXcaX>Ek>UiPf5i1;Z^E*x9Lh7p{GYjFCO;J!`)=W&iUPN zynETKALP`RH=*tz8bK3#kN2}ncj+p{72&|f{uKPa5LYWVNV;GfHdK}bd}7caNaIUE zh?>x@ZF-h-*^rgG#F?1{qdo3m>YyE8E3KODSis@q7YRi2QmV;DYTl6DeSBvGU0KM_ z0ypG_*IK=EHMh47#VCs_sxylTd4{ofjfrPgsAx>lxVpPg9NgDWQ zePxjbRAEn4WVf2*eJ_X$-v91;f9hDwB(<8jjEQR>Qz1!32d>q~t|Yd1CKkdKqar%gz%ggvwstkcxtvWKSLX*H-r9&WCWZ z)An}I5_+fR?fhvFGTH(j%j_`-K0e*qst?%#M#oy0A7do|q9lrm#~O%+3lCUrQ07xT zOmmfga+_zVJ>Asqg~2*@u%kJlMos5W!db>4(T}SK1UNV|68-(FhRbID1@E=AhXs6WWa|R2vEYa*+TBQ5Fku2)Bg> zhY*6CFNJh%GZ4XAmoy^C(9?tp!^su9@-2l6DtNWT25dziEJkS6k><-p5WU)-@}~tM zz|nkm(MGTiC+Cs41kN$$-t=nV$|AXbB-0A-{~pE@sb@EFSu=|h2;5JpK9M7YM!0ytu3LxQ3|W=Em>48nD;xF3 zDPx}*L#G^mK!rLK1)8crX+k18sA+BaITQt=t6+kJ|Ey+iK;Ae~wrqn$tNZRaWakTx z0lMeQkvJCN6)U)l(1wD%n|6rezdj)izl6+BtNtK!lHY*zTC=jE-*tt^BZyjqAyOM}^H zrDis|21uiHvP&V{v+?{b$}oHL*)Bzo7l^K zvSICjrS9!GK~;^)*K}&(*Q|0nttl+|NGqs*xk?@yB@Z)87LM|C%gA~KEEWg~aoVP* zLlFWYcjihXfx(!f~=3}K)51Of9NW8sR1!=!CDD0esbwSatx%LN)g6UlDeGp zcRFU|bIAeSv;!gg$5yO_A@GR7$*>u!ni6BK73&ysD3@qiD6KjligHG+DvfQriT zu^6l!zUfAg2SSHUT>-97mQWQmhI|N$GxS4o@Q1Q3BlQe_X|ztxU&4+-W*l)ZU6)%o zb(O9t4kCuSjy26$Nm?yoDWxeFur0EXDYL#gE2=HxCj&sppK?md$No2vJ)%WbjU1yp zl@a*yBI71USFzK@thPvLpeCfE$=wz;ht)~8x<&_c;sB&8{~o{!C{el}Yufos0nMC9 z1@x)54)jJUgin1(vn%YMR2wosAO14)0b#Hn_qruiK9MVj(#@>>1p;N(np6$(6rLLs!*TTrf3)2NTQ2HJds}lNg%YVl)PaV% z5ay$kt>LkF({q#$0s<&p^VDh5=dgHd%Wc5w?}`R!Cx|U>X#|#mg*GVt(8`8~D(gO%y2Wn~mP z-56MF7otakk3u}1)lrGT_9PglLwUO#p4_}p*4ARh!QnUQB5>BSxA^WBEfd(mQ#oMx z9LBCah9>fxeP->mvfHBqa51ZLEa^WFnS(4O z9!2AB(r06W)0Ux_vMDLg{>nBy*~SfpAGQr71r8RLkeSkuHA_C0LPH$l>Uby}f%?)s z-aiZxuC&?@!-;l&%2D6$5LvmT|L z_ahXJAI6b=I9bV_S;@)f?46Fx%)%Yv%#XcS&ghKDCVL)|yR-KmAtQSxGdt@NzW#^r zPw)TXdB2|56H=~pmV-z+P_z+ukSczy6`0pJ%;d^I`TF6q@@A)?d)~VT-{9xM4LM2_2{s=*_ohRo~Wygxm9oFckCrZj?QqQ_i4L_4K0$THT-Dqf`L&tO+J^J8U7U zwusUbSy2>-+)5NVuc#wQ(;v)Uh9dj$G)sTvjmQI-6!+8CWlU2Ov}h`LMUr=Xl8)r& zL#|eHld%b-0-k2RB*x)HyS3~b!7)dZuFXdvDCps^24QBqZAUmGt=AJKVWUz6h4;`Ys9~2r??04O$ zl!Od`X*Lyua zw_6B_gg>OPWIdaSt}uWUY?PO_%&j>-`P?iH24iUH|EioAe?uWFbM=DFFvO7_4@#oO zMJ$PC5Q~hg`UCLlbV{e~0-xvXC(=S>F@j#5!3A2B6JLkd(8uy3;S&*E6WrzY$KsYO zQMA)>LZupRJqZf%IjGEr0=}Y@vG7xRBh|kAbIg;v9>-57W3&E49V91%*?J#Tgo^(i zvHOYiaAXc-VW{beomr>>^;D6(*NWyq|GS)_{H9U^e7ptY02Sl08R|5^6A(oy8I!E6 z#>T;# zd}h8(Ke?Zod;%kQF~^zp-$N3}-8k%)gXe_3Nbgh@F60hmy4^M4SJO|{T_7ogE9WDV zAVMPloS#F}xhw7%8V6rL zZg@=ab3wh2J>KDw28^~~Rz>Dx`PDwNOqeT}fWG35TCi3H~&{)FOwT)*wRS z@HFY-3T@*$IQg-khJSp^UwQ5%fq3)9K}Wqj@nFbH(0ifl$77y=TyG3-WUJ4%cG+Kx zQ9j`3RlGY~1`^vJPbK0{av_*vlc$>ufd7*XR~s?&@ObF{%aw%gDC3noC~-n{cIC?D zi2KKt(Gb>jfMckeH*0)qyy$gpRK!WxRQ2Wo9cD?|!Q^?83BrW&15F@he+08IlV$p% zy`5Q+cp%{oXvV?)MqmUn*GEs8#AlnzVt0dFUrt&}?ck~qFw-qVx$U6ILD(w_&~b{k zzRTanHed3IQ%i(QIU+*@F&a6#p01Yy()tn;5C$vSMOA9-z1=&sBn_i3 zR&oALhlp>gmm}PqiXf)`4B5?Eatf^Vy&CSDsQ^KAKOfQ@M~mj1yfjUE24PV*7X>by z)(R^}rPY0EYJdBik>zSn3iaE2E;3#82euEtqH$^20U29q@^;WG##QMC$P;@bw?mH;3c87ksG=#4LE6)xgfWg9B?|>graCnRL0s$=2!-_0{T; z-_`T^ub!%%tz8?tX4f}?F`(?$P@rMx-5bl=eRA{~*oIzrO^5a#CU5Agi7{u(#O?!6 z?5bOeSwxGmO__PqH_%4Bwb<~dWv@gwjMEzFxuY2d82C?X3$o!Ge+u(`cRZRIFw_KL z2!P@X+$+x6$g$B${l6L`aXCN@yoTi?S|nLPU#F$hJbDtiEJWOx`4ffP zqZOB-N%Yl*5G=k?kBB^P-1>r_N8yWeJKzxIN!U!dqK-_WEM>36(NC5m#)jDS6UIm* zi2TPnu5HtLO+;de7G-=e;VVy{c>hY<^O>HlQY$nQZ4n#RQRpY#Zw}frPz66*NTAVt z{$}~h(#IVmz1?st&p!^3;XFFJN5qR3k`uK9EAq_5+h)`{iA#|`zvIKoIq%Yxr6ESi z#9K+QU_s>FjgjI{qQmXN!}jVY@#y zuJ4YILD0LS16IkbyJ@gP{lz7a{otTs?pDzm>~xzlJGXylad*sdGUSFlBD<{b9araS zSBZ0NL=Ud2m_B6(?R|~H-&LjpAp^LM|G_Ujbv`kTYfcdjjt>}~#B{5;xtMkBE1yd} zu;LK0ue~gE)!z@C`FP80kTM#*J`;xHriJaJ&I0 z+B(Dac8clXh|DZN;XHdLIP?qwZh#9wYMP zytxNsegFvZk9u$Z)`vr+i4nj__#+5(fg%LW6rT~S+6dJlZqKV1M%UJ(l9*~$KfZ6n z9q>Mt8WTKYJLo`W{}-Oy?`QrlnCc1lKe3v+A>27frf{iEYFiJiIZ(LcdzDU0LZK*A zs(vSI_6lf2@%Jr}TohWR%)u*CJFRV?U(5$+3-NpFFWd^NUyd_tr1*{U4g0WFM!tx6 z2Tr=u#sUR}Tr&hQHH})q-_*mS^x$jfQoHvhG%m>~PEk4S)sC0&uAhBphW77o9nCW< zwvVx|Kk2nQ=%d#s`Xcl9j0)0;QqgrxlD>!_)cu#XUw>*@O%lj()un1Qkdnf zj3^DLr_bIm@&?|Pa;L~-$SWO1d45L>GC7?-z+da`;Ui`A!1(y>jZFTAiMjS_@^B!5 zA_5Wxf*urlmF8mtc-EFB)nk&zyD3L|1ZQKlcE)<={JV8HlN%pxi;8JSuKO`4u1`=} zw$(Lo_#fjrsfn%v4a9)v#87|*jCFV7*S{bw$?s7kuuKPb>9i-=*;t*z$3D*~dK|I( zZY(f-S}s7%+;^Ao>i6sRO7#%pBShd4Kh1HwERokvJ|V)V_}d3GBClhN?Kv!@m&rNsjLWuECi9@^QX zJ(U~at8?zv7r=tv)Hq5-zuCi>q^ZM(mg~7|`nkURv_^V3o+qlt`i~ZlFWhAzEe_pKwoZuXshyS6ejY z>*)?<;YG9!`JG`%Y#1w+T6~+v|9f^bNLI1*xs9-P6BtE1NU|3t6S7Du!aJK-WHM+xr(`pSsC<4| z@CMiObB+H+QYe5V51*^DD`x1$v+wbIh9DJn=oO3cb_ue*ib z{rCA;WOb`C=F;lH_J_wK_aS=hpMOHUwIt%j{w(zlDT@#&K}y3cwtAZvU5 z*<-F|&dq2JrnfUD7@G6r0iUN zUp%LBvgVN#&>riFP; zwZi%Hv|=EDzw#P|1B`2mOgs%5;?h>H+c>dd>cCd9V?%wi(xc7V8D$bJfpsgZEsF6z z4uKPa`HQKXdI;wy5p~UJNkw7(c>^XJr=Z=8Duvb+VaWNhTFbcHb`54JqV9sj!X@c|loJ(B>VRhp}N@79qt zQBcx?6pB*1R44|<9>HmzugK|Y?+AIO;KoV|~ z*Qr0I%rgD{vs>3Uq`u4IQ^{SQuqklD@{%^099EbeihH{RN+zag=Rl8)H^dIRJAXpwX#VP6^}Z0xGP(JiBU&nY{Q<#7=A7=dd7fRQ z*a5C=tSZO*LMUcY{c>Y1h|Z{r6W7jt%cOh%=G_~rumibBLgpDxEqpL>jA-mPeADeR z2*geyiP&SgUL~f|&4E1j^+Y3}_4X%P&(?;*`yIU{`}Yl6EvBDZV&C+2@^cn_8IxD#^}hlSJ!1d3_0uYS8MM zC$3m_M*N(MAel9)#eypE37>7U1{aWds?HeAS>Mr2Cy&We;CqWQ`|St_(-q+6^KjLL z*74;gPEco_v~wf@Hg?~)di=tV%^;vqu-2h>Ws?}xIIwZ>pf9wFl(vo{^E?&tKqygv zQk}DjlGbdKfl9y&t8H&JnU|AUO+w0V) FileContextResult: - """ - 获取代码文件的方法调用上下文。 - - Args: - project_root: 项目根目录 (用于定位索引) - file_path: 代码文件路径 - include_calls: 是否包含出向调用 - include_callers: 是否包含入向调用 - max_depth: 调用链深度 (1=直接调用) - ⚠️ V1 限制: 当前版本仅支持 max_depth=1 - 深度调用链分析将在 V2 实现 - format: 输出格式 - - Returns: - FileContextResult - - Raises: - IndexNotFoundError: 项目未索引 - FileNotFoundError: 文件不存在 - - Note: - V1 实现限制: - - max_depth 仅支持 1 (直接调用) - - 出向调用目标文件可能为 None (未解析) - - 深度调用链分析作为 V2 特性规划 - """ -``` - -### 4.3 `codexlens.api.find_definition()` - -```python -@dataclass -class DefinitionResult: - """定义查找结果""" - name: str - kind: str - file_path: str - line: int - end_line: int - signature: Optional[str] - container: Optional[str] # 所属类/模块 - score: float - -def find_definition( - project_root: str, - symbol_name: str, - symbol_kind: Optional[str] = None, - file_context: Optional[str] = None, - limit: int = 10 -) -> List[DefinitionResult]: - """ - 根据符号名称查找定义位置。 - - Fallback 策略: - 1. 精确匹配 + kind 过滤 - 2. 精确匹配 (移除 kind) - 3. 前缀匹配 - """ -``` - -### 4.4 `codexlens.api.find_references()` - -```python -@dataclass -class ReferenceResult: - """引用结果""" - file_path: str - line: int - column: int - context_line: str - relationship: str # call | import | type_annotation | inheritance - -@dataclass -class GroupedReferences: - """按定义分组的引用""" - definition: DefinitionResult - references: List[ReferenceResult] - -def find_references( - project_root: str, - symbol_name: str, - symbol_kind: Optional[str] = None, - include_definition: bool = True, - group_by_definition: bool = True, - limit: int = 100 -) -> List[GroupedReferences]: - """ - 查找符号的所有引用位置。 - - 多定义时分组返回,解决引用混淆问题。 - """ -``` - -### 4.5 `codexlens.api.workspace_symbols()` - -```python -@dataclass -class SymbolInfo: - """符号信息""" - name: str - kind: str - file_path: str - line: int - container: Optional[str] - score: float - -def workspace_symbols( - project_root: str, - query: str, - kind_filter: Optional[List[str]] = None, - file_pattern: Optional[str] = None, - limit: int = 50 -) -> List[SymbolInfo]: - """在整个工作区搜索符号 (前缀匹配)。""" -``` - -### 4.6 `codexlens.api.get_hover()` - -```python -@dataclass -class HoverInfo: - """悬停信息""" - name: str - kind: str - signature: str - documentation: Optional[str] - file_path: str - line_range: Tuple[int, int] - type_info: Optional[str] - -def get_hover( - project_root: str, - symbol_name: str, - file_path: Optional[str] = None -) -> Optional[HoverInfo]: - """获取符号的详细悬停信息。""" -``` - -### 4.7 `codexlens.api.semantic_search()` - -```python -@dataclass -class SemanticResult: - """语义搜索结果""" - symbol_name: str - kind: str - file_path: str - line: int - vector_score: Optional[float] - structural_score: Optional[float] - fusion_score: float - snippet: str - match_reason: Optional[str] - -def semantic_search( - project_root: str, - query: str, - mode: str = "fusion", # vector | structural | fusion - vector_weight: float = 0.5, - structural_weight: float = 0.3, - keyword_weight: float = 0.2, - fusion_strategy: str = "rrf", # rrf | staged | binary | hybrid - kind_filter: Optional[List[str]] = None, - limit: int = 20, - include_match_reason: bool = False -) -> List[SemanticResult]: - """ - 语义搜索 - 结合向量和结构化搜索。 - - Args: - project_root: 项目根目录 - query: 自然语言查询 - mode: 搜索模式 - - vector: 仅向量搜索 - - structural: 仅结构搜索 (符号 + 关系) - - fusion: 融合搜索 (默认) - vector_weight: 向量搜索权重 [0, 1] - structural_weight: 结构搜索权重 [0, 1] - keyword_weight: 关键词搜索权重 [0, 1] - fusion_strategy: 融合策略 (映射到 chain_search.py) - - rrf: Reciprocal Rank Fusion (推荐,默认) - - staged: 分阶段级联 → staged_cascade_search - - binary: 二分重排级联 → binary_rerank_cascade_search - - hybrid: 混合级联 → hybrid_search - kind_filter: 符号类型过滤 - limit: 最大返回数量 - include_match_reason: 是否生成匹配原因 (启发式,非 LLM) - - Returns: - 按 fusion_score 排序的结果列表 - - 降级行为: - - 无向量索引: vector_score=None, 使用 FTS + 结构搜索 - - 无关系数据: structural_score=None, 仅向量搜索 - """ -``` - ---- - -## 五、已知问题与解决方案 - -### 5.1 P0 阻塞项 - -| 问题 | 位置 | 解决方案 | -|------|------|----------| -| **索引 Schema 不匹配** | `chain_search.py:313-324` vs `dir_index.py:304-312` | 兼容 `full_path` 和 `path` | -| **文件符号查询缺失** | `global_index.py:214-260` | 新增 `get_file_symbols()` | -| **出向调用查询缺失** | `dir_index.py:333-342` | 新增 `RelationshipQuery` | -| **关系类型不一致** | `entities.py:74-79` | 规范化 `calls` → `call` | - -### 5.2 设计缺陷 (Gemini 发现) - -| 缺陷 | 影响 | 解决方案 | -|------|------|----------| -| **调用图不完整** | `file_context` 缺少出向调用 | 新增有向调用 API | -| **消歧义未定义** | 多定义时无法区分 | 实现 `rank_by_proximity()` | -| **AI 特性成本过高** | `explanation` 需要 LLM | 设为可选,默认关闭 | -| **融合参数不一致** | 3 分支但只有 2 权重 | 补充 `keyword_weight` | - -### 5.3 消歧义算法 - -**V1 实现** (基于文件路径接近度): - -```python -def rank_by_proximity( - results: List[DefinitionResult], - file_context: str -) -> List[DefinitionResult]: - """按文件接近度排序 (V1: 路径接近度)""" - def proximity_score(result): - # 1. 同目录最高分 - if os.path.dirname(result.file_path) == os.path.dirname(file_context): - return 100 - # 2. 共同路径前缀长度 - common = os.path.commonpath([result.file_path, file_context]) - return len(common) - - return sorted(results, key=proximity_score, reverse=True) -``` - -**V2 增强计划** (基于 import graph 距离): - -```python -def rank_by_import_distance( - results: List[DefinitionResult], - file_context: str, - import_graph: Dict[str, Set[str]] -) -> List[DefinitionResult]: - """按 import graph 距离排序 (V2)""" - def import_distance(result): - # BFS 计算最短 import 路径 - return bfs_shortest_path( - import_graph, - file_context, - result.file_path - ) - - # 组合: 0.6 * import_distance + 0.4 * path_proximity - return sorted(results, key=lambda r: ( - 0.6 * import_distance(r) + - 0.4 * (100 - proximity_score(r)) - )) -``` - -### 5.4 参考实现: `get_file_symbols()` - -**位置**: `src/codexlens/storage/global_index.py` - -```python -def get_file_symbols(self, file_path: str | Path) -> List[Symbol]: - """ - 获取指定文件中定义的所有符号。 - - Args: - file_path: 文件路径 (相对或绝对) - - Returns: - 按行号排序的符号列表 - """ - file_path_str = str(Path(file_path).resolve()) - with self._lock: - conn = self._get_connection() - rows = conn.execute( - """ - SELECT symbol_name, symbol_kind, file_path, start_line, end_line - FROM global_symbols - WHERE project_id = ? AND file_path = ? - ORDER BY start_line - """, - (self.project_id, file_path_str), - ).fetchall() - - return [ - Symbol( - name=row["symbol_name"], - kind=row["symbol_kind"], - range=(row["start_line"], row["end_line"]), - file=row["file_path"], - ) - for row in rows - ] -``` - ---- - -## 六、实现计划 - -### Phase 0: 基础设施 (16h) - -| 任务 | 工时 | 说明 | -|------|------|------| -| 修复 `search_references` schema | 4h | 兼容两种 schema | -| 新增 `GlobalSymbolIndex.get_file_symbols()` | 4h | 文件符号查询 (见 5.4) | -| 新增 `RelationshipQuery` 类 | 6h | 有向调用查询 | -| 关系类型规范化层 | 2h | `calls` → `call` | - -### Phase 1: API 层 (48h) - -| 任务 | 工时 | 复杂度 | -|------|------|--------| -| `find_definition()` | 4h | S | -| `find_references()` | 8h | M | -| `workspace_symbols()` | 4h | S | -| `get_hover()` | 4h | S | -| `file_context()` | 16h | L | -| `semantic_search()` | 12h | M | - -### Phase 2: 测试与文档 (16h) - -| 任务 | 工时 | -|------|------| -| 单元测试 (≥80%) | 8h | -| API 文档 | 4h | -| 示例代码 | 4h | - -### 关键路径 - -``` -Phase 0.1 (schema fix) - ↓ -Phase 0.2 (file symbols) → Phase 1.5 (file_context) - ↓ -Phase 1 (其他 API) - ↓ -Phase 2 (测试) -``` - ---- - -## 七、测试策略 - -### 7.1 单元测试 - -```python -# test_global_index.py -def test_get_file_symbols(): - index = GlobalSymbolIndex(":memory:") - index.update_file_symbols(project_id=1, file_path="test.py", symbols=[...]) - results = index.get_file_symbols("test.py") - assert len(results) == 3 - -# test_relationship_query.py -def test_outgoing_calls(): - store = DirIndexStore(":memory:") - calls = store.get_outgoing_calls("src/auth.py", "login") - assert calls[0].relationship == "call" # 已规范化 -``` - -### 7.2 Schema 兼容性测试 - -```python -def test_search_references_both_schemas(): - """测试两种 schema 的引用搜索""" - # 旧 schema: files(path, ...) - # 新 schema: files(full_path, ...) -``` - -### 7.3 降级测试 - -```python -def test_semantic_search_without_vectors(): - result = semantic_search(query="auth", mode="fusion") - assert result.vector_score is None - assert result.fusion_score > 0 -``` - ---- - -## 八、使用示例 - -```python -from codexlens.api import ( - file_context, - find_definition, - find_references, - semantic_search -) - -# 1. 获取文件上下文 -result = file_context( - project_root="/path/to/project", - file_path="src/auth/login.py", - format="brief" -) -print(result.summary) - -# 2. 查找定义 -definitions = find_definition( - project_root="/path/to/project", - symbol_name="UserService", - symbol_kind="class" -) - -# 3. 语义搜索 -results = semantic_search( - project_root="/path/to/project", - query="处理用户登录验证的函数", - mode="fusion" -) -``` - ---- - -## 九、CCW 集成 - -| codexlens API | CCW MCP Tool | -|---------------|--------------| -| `file_context()` | `codexlens_file_context` | -| `find_definition()` | `codexlens_find_definition` | -| `find_references()` | `codexlens_find_references` | -| `workspace_symbols()` | `codexlens_workspace_symbol` | -| `get_hover()` | `codexlens_get_hover` | -| `semantic_search()` | `codexlens_semantic_search` | - ---- - -## 十、分析来源 - -| 工具 | Session ID | 贡献 | -|------|------------|------| -| Gemini | `1768618654438-gemini` | 架构评审、设计缺陷、融合策略 | -| Codex | `1768618658183-codex` | 组件复用、复杂度估算、任务分解 | -| Gemini | `1768620615744-gemini` | 最终评审、改进建议、APPROVED | - ---- - -## 十一、版本历史 - -| 版本 | 日期 | 变更 | -|------|------|------| -| 1.0 | 2025-01-17 | 初始版本,合并多文档 | -| 1.1 | 2025-01-17 | 应用 Gemini 评审改进: V1 限制说明、策略映射、消歧义增强、参考实现 | diff --git a/codex-lens/docs/CODEX_LENS_AUTO_HYBRID.md b/codex-lens/docs/CODEX_LENS_AUTO_HYBRID.md deleted file mode 100644 index 5f95e22e..00000000 --- a/codex-lens/docs/CODEX_LENS_AUTO_HYBRID.md +++ /dev/null @@ -1,326 +0,0 @@ -# CodexLens Auto Hybrid Mode - Implementation Summary - -## 概述 - -实现了两个主要功能: -1. **自动向量嵌入生成**:`init` 命令在检测到语义搜索依赖后自动生成向量嵌入 -2. **默认混合搜索模式**:`search` 命令在检测到嵌入存在时自动使用 hybrid 模式 - -## 修改文件 - -### 1. codex-lens CLI (`codex-lens/src/codexlens/cli/commands.py`) - -#### 1.1 `init` 命令增强 - -**新增参数**: -- `--no-embeddings`: 跳过自动嵌入生成 -- `--embedding-model`: 指定嵌入模型 (默认: "code") - -**自动嵌入生成逻辑**: -```python -# 在 init 成功后 -if not no_embeddings: - from codexlens.semantic import SEMANTIC_AVAILABLE - if SEMANTIC_AVAILABLE: - # 自动调用 generate_embeddings() - # 使用指定的 embedding_model -``` - -**行为**: -- 检测 `fastembed` 和 `numpy` 是否安装 -- 如果可用,自动生成嵌入(可用 `--no-embeddings` 跳过) -- 默认使用 "code" 模型 (jinaai/jina-embeddings-v2-base-code) -- 在输出中显示嵌入生成进度和统计 - -#### 1.2 `search` 命令增强 - -**模式变更**: -- 默认模式从 `"exact"` 改为 `"auto"` -- 新增 `"auto"` 模式到有效模式列表 - -**自动模式检测逻辑**: -```python -if mode == "auto": - # 检查项目是否有嵌入 - project_record = registry.find_by_source_path(str(search_path)) - if project_record: - embed_status = check_embeddings_status(index_path) - if has_embeddings: - actual_mode = "hybrid" # 使用混合模式 - else: - actual_mode = "exact" # 降级到精确模式 -``` - -**行为**: -- 默认使用 `auto` 模式 -- 自动检测索引是否有嵌入 -- 有嵌入 → 使用 `hybrid` 模式(精确 + 模糊 + 向量融合) -- 无嵌入 → 使用 `exact` 模式(仅全文搜索) -- 用户仍可手动指定模式覆盖自动检测 - -### 2. MCP 工具简化 (`ccw/src/tools/codex-lens.ts`) - -#### 2.1 简化 action 枚举 - -**仅暴露核心操作**: -- `init`: 初始化索引(自动生成嵌入) -- `search`: 搜索代码(自动混合模式) -- `search_files`: 搜索文件路径 - -**移除的高级操作**(仍可通过 CLI 使用): -- ~~`symbol`~~: 符号提取 → 使用 `codexlens symbol` -- ~~`status`~~: 状态检查 → 使用 `codexlens status` -- ~~`config_show/set/migrate`~~: 配置管理 → 使用 `codexlens config` -- ~~`clean`~~: 清理索引 → 使用 `codexlens clean` -- ~~`bootstrap/check`~~: 安装管理 → 自动处理 - -**简化的 ParamsSchema**: -```typescript -const ParamsSchema = z.object({ - action: z.enum(['init', 'search', 'search_files']), - path: z.string().optional(), - query: z.string().optional(), - mode: z.enum(['auto', 'text', 'semantic', 'exact', 'fuzzy', 'hybrid', 'vector', 'pure-vector']).default('auto'), - languages: z.array(z.string()).optional(), - limit: z.number().default(20), -}); -``` - -#### 2.2 扩展 mode 枚举并设置默认值 - -**模式支持**: -```typescript -mode: z.enum(['auto', 'text', 'semantic', 'exact', 'fuzzy', 'hybrid', 'vector', 'pure-vector']).default('auto') -``` - -**模式映射**(MCP → CLI): -```typescript -const modeMap: Record = { - 'text': 'exact', - 'semantic': 'pure-vector', - 'auto': 'auto', // 默认:自动检测 - 'exact': 'exact', - 'fuzzy': 'fuzzy', - 'hybrid': 'hybrid', - 'vector': 'vector', - 'pure-vector': 'pure-vector', -}; -``` - -#### 2.3 传递 mode 参数到 CLI - -```typescript -const args = ['search', query, '--limit', limit.toString(), '--mode', cliMode, '--json']; -``` - -### 3. 文档更新 (`.claude/rules/context-requirements.md`) - -#### 3.1 更新 init 说明 - -强调自动嵌入生成功能: -```markdown -**NEW**: `init` automatically generates vector embeddings if semantic dependencies are installed (fastembed). -- Auto-detects if `numpy` and `fastembed` are available -- Uses "code" model by default (jinaai/jina-embeddings-v2-base-code) -- Skip with `--no-embeddings` flag if needed -``` - -#### 3.2 更新 search 说明 - -强调自动混合模式: -```markdown -**Search Code** (Auto Hybrid Mode - DEFAULT): -# Simple call - auto-detects mode (hybrid if embeddings exist, exact otherwise): -codex_lens(action="search", query="authentication", path=".", limit=20) -``` - -#### 3.3 详细模式说明 - -添加完整的模式列表和默认行为说明: -- `auto`: **DEFAULT** - Uses hybrid if embeddings exist, exact otherwise -- `hybrid`: Exact + Fuzzy + Vector fusion (best results, auto-selected if embeddings exist) -- 其他模式... - -## 使用示例 - -### 场景 1:首次使用(已安装 fastembed) - -```bash -# 初始化索引(自动生成嵌入) -codexlens init . - -# 输出: -# OK Indexed 150 files in 12 directories -# -# Generating embeddings... -# Model: code -# ✓ Generated 1234 embeddings in 45.2s - -# 搜索(自动使用 hybrid 模式) -codexlens search "authentication" -# Mode: hybrid | Searched 12 directories in 15.2ms -``` - -### 场景 2:首次使用(未安装 fastembed) - -```bash -# 初始化索引(跳过嵌入) -codexlens init . - -# 输出: -# OK Indexed 150 files in 12 directories -# (无嵌入生成提示) - -# 搜索(降级到 exact 模式) -codexlens search "authentication" -# Mode: exact | Searched 12 directories in 8.5ms -``` - -### 场景 3:手动控制 - -```bash -# 跳过嵌入生成 -codexlens init . --no-embeddings - -# 强制使用特定模式 -codexlens search "auth" --mode exact -codexlens search "how to authenticate" --mode hybrid -``` - -### 场景 4:MCP 工具使用(简化版) - -```python -# 初始化(自动生成嵌入) -codex_lens(action="init", path=".") - -# 搜索(默认 auto 模式:有嵌入用 hybrid,无嵌入用 exact) -codex_lens(action="search", query="authentication") - -# 强制混合模式 -codex_lens(action="search", query="authentication", mode="hybrid") - -# 强制精确模式 -codex_lens(action="search", query="authenticate_user", mode="exact") - -# 仅返回文件路径 -codex_lens(action="search_files", query="payment processing") -``` - -**高级操作使用 CLI**: -```bash -# 检查状态 -codexlens status - -# 提取符号 -codexlens symbol src/auth/login.js - -# 配置管理 -codexlens config show -codexlens config set index_dir /custom/path - -# 清理索引 -codexlens clean . -``` - -## 技术细节 - -### 嵌入检测逻辑 - -1. 查找项目在 registry 中的记录 -2. 获取索引路径 `index_root/_index.db` -3. 调用 `check_embeddings_status()` 检查: - - 是否存在 `chunks` 表 - - `chunks_count > 0` -4. 根据检测结果选择模式 - -### 混合搜索权重 - -默认 RRF 权重: -- Exact FTS: 0.4 -- Fuzzy FTS: 0.3 -- Vector: 0.3 - -可通过 `--weights` 参数自定义: -```bash -codexlens search "query" --mode hybrid --weights 0.5,0.3,0.2 -``` - -### 模型选项 - -| 模型 | 模型名称 | 维度 | 大小 | 推荐场景 | -|------|---------|------|------|---------| -| fast | BAAI/bge-small-en-v1.5 | 384 | ~80MB | 快速原型 | -| code | jinaai/jina-embeddings-v2-base-code | 768 | ~150MB | **推荐** 代码搜索 | -| multilingual | intfloat/multilingual-e5-large | 1024 | ~1GB | 多语言项目 | -| balanced | mixedbread-ai/mxbai-embed-large-v1 | 1024 | ~600MB | 平衡性能 | - -## 兼容性 - -### 向后兼容 - -- 所有现有命令仍然工作 -- 手动指定 `--mode` 会覆盖自动检测 -- 使用 `--no-embeddings` 可恢复旧行为 - -### 依赖要求 - -**核心功能**(无需额外依赖): -- FTS 索引(exact, fuzzy) -- 符号提取 - -**语义搜索功能**(需要安装): -```bash -pip install codexlens[semantic] -# 或 -pip install numpy fastembed -``` - -## 性能影响 - -### 初始化时间 - -- FTS 索引:~2-5 秒(100 文件) -- 嵌入生成:+30-60 秒(首次下载模型) -- 后续嵌入:+10-20 秒 - -### 搜索性能 - -| 模式 | 延迟 | 召回率 | 推荐场景 | -|------|------|--------|---------| -| exact | 5ms | 中 | 精确代码标识符 | -| fuzzy | 7ms | 中 | 容错搜索 | -| hybrid | 15ms | **最高** | **通用搜索(推荐)** | -| vector | 12ms | 高 | 语义查询 | -| pure-vector | 10ms | 中 | 自然语言 | - -## 最小化修改原则 - -所有修改都遵循最小化原则: -1. **保持向后兼容**:不破坏现有功能 -2. **默认智能**:自动检测最佳模式 -3. **用户可控**:可通过参数覆盖自动行为 -4. **渐进增强**:未安装 fastembed 时优雅降级 - -## 总结 - -✅ **init 命令自动生成嵌入**(可用 `--no-embeddings` 跳过) -✅ **search 命令默认使用混合模式**(有嵌入时自动启用) -✅ **MCP 工具简化为核心操作**(init, search, search_files) -✅ **所有搜索模式支持**(auto, exact, fuzzy, hybrid, vector, pure-vector) -✅ **文档已更新**反映新的默认行为 -✅ **保持向后兼容性** -✅ **优雅降级**(无 fastembed 时使用 exact 模式) - -### MCP vs CLI 功能对比 - -| 功能 | MCP 工具 | CLI | -|------|---------|-----| -| 初始化索引 | ✅ `codex_lens(action="init")` | ✅ `codexlens init` | -| 搜索代码 | ✅ `codex_lens(action="search")` | ✅ `codexlens search` | -| 搜索文件 | ✅ `codex_lens(action="search_files")` | ✅ `codexlens search --files-only` | -| 检查状态 | ❌ 使用 CLI | ✅ `codexlens status` | -| 提取符号 | ❌ 使用 CLI | ✅ `codexlens symbol` | -| 配置管理 | ❌ 使用 CLI | ✅ `codexlens config` | -| 清理索引 | ❌ 使用 CLI | ✅ `codexlens clean` | - -**设计理念**:MCP 工具专注于高频核心操作(索引、搜索),高级管理操作通过 CLI 执行。 diff --git a/codex-lens/docs/CONFIGURATION.md b/codex-lens/docs/CONFIGURATION.md deleted file mode 100644 index f155c088..00000000 --- a/codex-lens/docs/CONFIGURATION.md +++ /dev/null @@ -1,298 +0,0 @@ -# CodexLens 配置说明 - -## 目录结构 - -``` -~/.codexlens/ # 全局数据目录 -├── .env # 全局 API 配置 (新增) -├── settings.json # 运行时设置 -├── embedding_lock.json # 模型锁定文件 -├── registry.db # 项目注册表 -├── indexes/ # 集中式索引存储 -└── venv/ # Python 虚拟环境 - -project/ -├── .codexlens/ # 工作区本地目录 -│ ├── .env # 工作区 API 配置 (覆盖全局) -│ ├── index.db # 项目索引数据库 -│ ├── cache/ # 缓存目录 -│ └── .gitignore # 排除敏感文件 -└── .env # 项目根目录配置 -``` - -## 配置优先级 - -配置加载顺序 (后者覆盖前者): - -| 优先级 | 位置 | 说明 | -|--------|------|------| -| 1 (最低) | `~/.codexlens/.env` | 全局默认配置 | -| 2 | `project/.env` | 项目根目录配置 | -| 3 | `project/.codexlens/.env` | 工作区本地配置 | -| 4 (最高) | 环境变量 | Shell 环境变量 | - -## 环境变量 - -### Embedding 配置 - -用于 `litellm` 后端的嵌入向量服务: - -```bash -# API 密钥 -EMBEDDING_API_KEY=your-api-key - -# API 基础 URL -EMBEDDING_API_BASE=https://api.example.com/v1 - -# 嵌入模型名称 -EMBEDDING_MODEL=text-embedding-3-small -``` - -**支持的提供商示例**: - -| 提供商 | API Base | 模型示例 | -|--------|----------|----------| -| OpenAI | `https://api.openai.com/v1` | `text-embedding-3-small` | -| ModelScope | `https://api-inference.modelscope.cn/v1` | `Qwen/Qwen3-Embedding-8B` | -| Azure | `https://your-resource.openai.azure.com` | `text-embedding-ada-002` | - -### LiteLLM 配置 - -用于 LLM 功能 (重排序、语义分析等): - -```bash -# API 密钥 -LITELLM_API_KEY=your-api-key - -# API 基础 URL -LITELLM_API_BASE=https://api.example.com/v1 - -# 模型名称 -LITELLM_MODEL=gpt-4o-mini -``` - -### Reranker 配置 - -用于搜索结果重排序 (可选): - -```bash -# API 密钥 -RERANKER_API_KEY=your-api-key - -# API 基础 URL -RERANKER_API_BASE=https://api.siliconflow.cn - -# 提供商: siliconflow, cohere, jina -RERANKER_PROVIDER=siliconflow - -# 重排序模型 -RERANKER_MODEL=BAAI/bge-reranker-v2-m3 -``` - -### 通用配置 - -```bash -# 自定义数据目录 (默认: ~/.codexlens) -CODEXLENS_DATA_DIR=~/.codexlens - -# 启用调试模式 -CODEXLENS_DEBUG=false -``` - -## settings.json - -运行时设置保存在 `~/.codexlens/settings.json`: - -```json -{ - "embedding": { - "backend": "litellm", - "model": "Qwen/Qwen3-Embedding-8B", - "use_gpu": false, - "endpoints": [ - { - "model": "Qwen/Qwen3-Embedding-8B", - "api_key": "${EMBEDDING_API_KEY}", - "api_base": "${EMBEDDING_API_BASE}", - "weight": 1.0 - } - ], - "strategy": "latency_aware", - "cooldown": 60.0 - }, - "llm": { - "enabled": true, - "tool": "gemini", - "timeout_ms": 300000, - "batch_size": 5 - }, - "parsing": { - "use_astgrep": false - }, - "indexing": { - "static_graph_enabled": false, - "static_graph_relationship_types": ["imports", "inherits"] - } -} -``` - -### Embedding 设置 - -| 字段 | 类型 | 说明 | -|------|------|------| -| `backend` | string | `fastembed` (本地) 或 `litellm` (API) | -| `model` | string | 模型名称或配置文件 | -| `use_gpu` | bool | GPU 加速 (仅 fastembed) | -| `endpoints` | array | 多端点配置 (仅 litellm) | -| `strategy` | string | 负载均衡策略 | -| `cooldown` | float | 限流冷却时间 (秒) | - -**Embedding Backend 对比**: - -| 特性 | fastembed | litellm | -|------|-----------|---------| -| 运行方式 | 本地 ONNX | API 调用 | -| 依赖 | 本地模型文件 | API 密钥 | -| 速度 | 快 (本地) | 取决于网络 | -| 模型选择 | 预定义配置文件 | 任意 API 模型 | -| GPU 支持 | 是 | N/A | - -**负载均衡策略**: - -| 策略 | 说明 | -|------|------| -| `round_robin` | 轮询分配 | -| `latency_aware` | 延迟感知 (推荐) | -| `weighted_random` | 加权随机 | - -### LLM 设置 - -| 字段 | 类型 | 说明 | -|------|------|------| -| `enabled` | bool | 启用 LLM 功能 | -| `tool` | string | LLM 工具 (`gemini`, `codex`) | -| `timeout_ms` | int | 超时时间 (毫秒) | -| `batch_size` | int | 批处理大小 | - -### Parsing 设置 - -| 字段 | 类型 | 说明 | -|------|------|------| -| `use_astgrep` | bool | 优先使用 ast-grep 解析关系(实验性;当前主要用于 Python relationships) | - -### Indexing 设置(静态图) - -| 字段 | 类型 | 说明 | -|------|------|------| -| `static_graph_enabled` | bool | 索引时将 relationships 写入全局 `global_relationships`,用于搜索阶段静态图扩展 | -| `static_graph_relationship_types` | array | 允许持久化的关系类型:`imports` / `inherits` / `calls` | - -**CLI 覆盖(单次运行,不写入 settings.json)**: - -```bash -# 索引时启用静态图 relationships + 使用 ast-grep(如果可用) -codexlens index init --use-astgrep --static-graph --static-graph-types imports,inherits,calls -``` - -**Search staged 静态图扩展(高级)**: - -```bash -codexlens search --cascade-strategy staged --staged-stage2-mode static_global_graph -``` - -## FastEmbed 模型配置文件 - -使用 `fastembed` 后端时的预定义模型: - -| 配置文件 | 模型 | 维度 | 大小 | -|----------|------|------|------| -| `fast` | BAAI/bge-small-en-v1.5 | 384 | 80MB | -| `base` | BAAI/bge-base-en-v1.5 | 768 | 220MB | -| `code` | jinaai/jina-embeddings-v2-base-code | 768 | 150MB | -| `minilm` | sentence-transformers/all-MiniLM-L6-v2 | 384 | 90MB | -| `multilingual` | intfloat/multilingual-e5-large | 1024 | 1000MB | -| `balanced` | mixedbread-ai/mxbai-embed-large-v1 | 1024 | 600MB | - -## 快速开始 - -### 1. 使用全局配置 - -创建 `~/.codexlens/.env`: - -```bash -# 复制示例配置 -cp codex-lens/.env.example ~/.codexlens/.env - -# 编辑配置 -nano ~/.codexlens/.env -``` - -### 2. 使用本地嵌入 (fastembed) - -```bash -# 初始化索引 (使用 code 配置文件) -codexlens init --backend fastembed --model code - -# 或使用多语言模型 -codexlens init --backend fastembed --model multilingual -``` - -### 3. 使用 API 嵌入 (litellm) - -```bash -# 设置环境变量 -export EMBEDDING_API_KEY=your-key -export EMBEDDING_API_BASE=https://api.example.com/v1 -export EMBEDDING_MODEL=text-embedding-3-small - -# 初始化索引 -codexlens init --backend litellm --model text-embedding-3-small -``` - -### 4. 验证配置 - -```bash -# 检查配置加载 -codexlens config show - -# 测试嵌入 -codexlens test-embedding "Hello World" -``` - -## 故障排除 - -### 配置未加载 - -检查文件权限和路径: - -```bash -ls -la ~/.codexlens/.env -cat ~/.codexlens/.env -``` - -### API 错误 - -1. 验证 API 密钥有效性 -2. 检查 API Base URL 是否正确 -3. 确认模型名称匹配提供商支持的模型 - -### 模型不兼容 - -如果更换嵌入模型,需要重建索引: - -```bash -# 删除旧索引 -rm -rf project/.codexlens/ - -# 重新初始化 -codexlens init --backend litellm --model new-model -``` - -## 相关文件 - -| 文件 | 说明 | -|------|------| -| `src/codexlens/config.py` | 配置类定义 | -| `src/codexlens/env_config.py` | 环境变量加载 | -| `src/codexlens/cli/model_manager.py` | FastEmbed 模型管理 | -| `src/codexlens/semantic/factory.py` | Embedder 工厂 | diff --git a/codex-lens/docs/DESIGN_EVALUATION_REPORT.md b/codex-lens/docs/DESIGN_EVALUATION_REPORT.md deleted file mode 100644 index ae2e1504..00000000 --- a/codex-lens/docs/DESIGN_EVALUATION_REPORT.md +++ /dev/null @@ -1,1010 +0,0 @@ -# 深度技术评估报告:Codex-Lens 改进方案 - -**评估工具**: Gemini 2.5 Pro -**评估日期**: 2025-12-15 -**评估范围**: 多层次分词器、静态分析语义图谱、Docstring与LLM混合策略 - ---- - -## 执行摘要 - -三个方案目标清晰,层层递进,从优化现有功能(混合策略)到改进核心机制(分词器),再到引入全新能力(语义图谱),共同构成了一个宏伟但可行的代码理解增强蓝图。 - -### 核心评分 - -| 方案 | 完善性评分 | 可行性 | ROI | 技术风险 | 建议优先级 | -|------|-----------|--------|-----|----------|-----------| -| Docstring与LLM混合 | 8.0/10 | ⭐⭐⭐⭐⭐ 高 | ⭐⭐⭐⭐⭐ 极高 | ⭐⭐ 低 | **P0 (立即启动)** | -| 多层次分词器 | 8.0/10 | ⭐⭐⭐⭐ 中高 | ⭐⭐⭐⭐ 高 | ⭐⭐⭐ 中 | **P1 (Q2启动)** | -| 静态分析语义图谱 | 6.0/10 | ⭐⭐ 低 | ⭐⭐⭐⭐⭐ 极高* | ⭐⭐⭐⭐⭐ 极高 | **P2 (需原型验证)** | - -*注:图谱的ROI极高,但前提是技术挑战得以克服 - ---- - -## 1. Docstring与LLM混合策略评估 - -### 1.1 完善性评分 - -| 维度 | 评分 | 说明 | -|------|------|------| -| 架构设计 | 9/10 | 流程清晰,分层策略合理 | -| 实现细节 | 8/10 | 代码示例完整,但提取逻辑可优化 | -| 测试覆盖 | 8/10 | 单元测试和集成测试设计充分 | -| 风险控制 | 7/10 | 识别了主要风险,但降级策略可加强 | -| **平均分** | **8.0/10** | 设计文档非常完整 | - -### 1.2 技术可行性:⭐⭐⭐⭐⭐ 高 - -**可以直接实施的部分**: -- ✅ `DocstringQuality` 枚举和评分逻辑(基于长度和结构) -- ✅ `HybridEnhancer` 的三种策略分支 -- ✅ 成本统计和监控模块 -- ✅ Python docstring解析(Google/NumPy风格) - -**需要优化的部分**: -- ⚠️ **Docstring提取** (`_extract_from_code`):当前基于行号搜索较脆弱 - - **改进建议**:使用tree-sitter AST精确定位函数体内的第一个字符串表达式 - ```python - # 改进后的提取逻辑 - body_node = func_node.child_by_field_name('body') - if body_node and len(body_node.children) > 0: - first_stmt = body_node.children[0] - if first_stmt.type == 'expression_statement': - expr = first_stmt.children[0] - if expr.type in ['string', 'string_literal']: - return extract_string_content(expr) - ``` - -**需要原型验证的模块**: -- 🔬 **质量评估器准确性**:在3-5个真实项目上验证评估准确率 - - 目标:与人工标注对比,准确率达到85%+ - - 方法:收集100个docstring样本,人工标注质量等级,调整阈值 - -### 1.3 性能与效果预测 - -| 指标 | 预测值 | 依据 | -|------|--------|------| -| 搜索质量提升 | +15-25% | docstring保留作者意图,准确性接近100% | -| 成本降低 | 40-60% | 高质量docstring占比越高,节省越多 | -| 索引速度提升 | +30-50% | 跳过完整LLM生成步骤 | -| 元数据准确率 | 95%+ | 使用docstring的符号达到近完美准确性 | - -**成本计算示例**(1000个函数): -``` -假设docstring分布:High 30% | Medium 40% | Low 30% - -纯LLM模式:1000 × 100% = 1000 units -混合模式:300×20% + 400×60% + 300×100% = 600 units -节省:40% - -如果High质量达到50%: -混合模式:500×20% + 300×60% + 200×100% = 480 units -节省:52% -``` - -### 1.4 关键设计盲点 - -#### 盲点1:Docstring与代码不同步 -**问题描述**:代码已修改,docstring未更新,导致元数据不准确。 - -**影响程度**:🔴 高(可能误导用户) - -**改进建议**: -```python -class DocstringFreshnessChecker: - def check_parameter_consistency(self, signature, docstring_params): - """检查参数列表是否匹配""" - actual_params = extract_params_from_signature(signature) - documented_params = set(docstring_params.keys()) - - missing = actual_params - documented_params - extra = documented_params - actual_params - - if missing or extra: - return QualityDowngrade( - from_level='HIGH', - to_level='MEDIUM', - reason=f'Parameter mismatch: missing={missing}, extra={extra}' - ) - - def check_return_type_consistency(self, signature, docstring_returns): - """检查返回值类型注解是否与docstring匹配""" - if has_return_annotation(signature) and docstring_returns: - annotation = get_return_annotation(signature) - # 简单的字符串匹配检查 - if annotation.lower() not in docstring_returns.lower(): - return QualityWarning('Return type mismatch') -``` - -#### 盲点2:结构化信息丢失 -**问题描述**:`_use_docstring_with_llm_keywords` 只使用了summary,丢失了参数、返回值、示例等信息。 - -**影响程度**:🟡 中(影响搜索结果展示的丰富性) - -**改进建议**:扩展 `SemanticMetadata` 数据结构: -```python -@dataclass -class EnhancedSemanticMetadata(SemanticMetadata): - """扩展的语义元数据""" - parameters: Optional[Dict[str, str]] = None # {param_name: description} - returns: Optional[str] = None - raises: Optional[List[str]] = None - examples: Optional[str] = None - - # 搜索结果展示时可以显示更丰富的信息 -``` - -#### 盲点3:多语言docstring提取差异 -**问题描述**:不同语言的docstring格式和位置不同,单一提取器无法通用。 - -**影响程度**:🟡 中(影响多语言支持) - -**改进建议**:语言特定提取器: -```python -class LanguageSpecificExtractor: - EXTRACTORS = { - 'python': PythonDocstringExtractor, - 'javascript': JSDocExtractor, - 'typescript': TSDocExtractor, - 'java': JavadocExtractor, - } - - def extract(self, language, code, symbol): - extractor_class = self.EXTRACTORS.get(language, GenericExtractor) - return extractor_class().extract(code, symbol) - -class JSDocExtractor: - """JavaScript/TypeScript JSDoc在函数定义之前""" - def extract(self, code, symbol): - lines = code.splitlines() - start_line = symbol.range[0] - 1 - - # 向上查找 /** ... */ - for i in range(start_line - 1, max(0, start_line - 20), -1): - if '*/' in lines[i]: - return self._extract_jsdoc_block(lines, i) -``` - -### 1.5 时间估算校准 - -**原估算**:6-8周 -**校准后**:✅ 6-8周(合理) - -**分阶段时间表**: -- Week 1-2: 核心`DocstringExtractor` + `QualityEvaluator` -- Week 3-4: `HybridEnhancer` + 三种策略 -- Week 5-6: 真实项目测试 + 评估器调优 -- Week 7-8: 多语言支持 + CLI集成 - ---- - -## 2. 多层次分词器评估 - -### 2.1 完善性评分 - -| 维度 | 评分 | 说明 | -|------|------|------| -| 架构设计 | 9/10 | 分层思想清晰,数据结构设计合理 | -| 实现细节 | 8/10 | AST遍历逻辑详细,但边界情况处理可加强 | -| 测试覆盖 | 7/10 | 单元测试设计充分,缺少大规模集成测试 | -| 风险控制 | 8/10 | 提出了降级策略和性能优化方案 | -| **平均分** | **8.0/10** | 技术方案完整且可行 | - -### 2.2 技术可行性:⭐⭐⭐⭐ 中高 - -**可以直接实施的部分**: -- ✅ `MacroChunker`(符号级分词)- 复用现有`code_extractor` -- ✅ 数据库schema设计(层级关系存储) -- ✅ 基础的`MicroChunker`(for/while/if/try块提取) - -**需要原型验证的部分**: -- 🔬 **层级化检索权重**:`search_hierarchical`中的`level_weights={1:1.0, 2:0.8}`较主观 - - **验证方法**:构建测试集,对比不同权重策略的搜索结果相关性 - - **实验参数**: - ```python - weight_strategies = [ - {'macro': 1.0, 'micro': 0.5}, # 强调宏观 - {'macro': 1.0, 'micro': 0.8}, # 原设计 - {'macro': 1.0, 'micro': 1.0}, # 平等对待 - {'macro': 0.8, 'micro': 1.0}, # 强调细节 - ] - ``` - -- 🔬 **逻辑块粒度控制**:何时需要二次划分?当前阈值`max_lines=50`需验证 - - **数据收集**:统计真实项目中函数长度分布 - - **A/B测试**:对比阈值30/50/100的搜索效果 - -**技术挑战**: -1. **上下文冗余问题**:父chunk和子chunk的摘要如何避免重复? - - **解决方案**:子chunk的LLM prompt应强调**角色定位** - ``` - # Bad Prompt - "Summarize this for loop" - - # Good Prompt - "This for loop is part of function authenticate_user(). - Describe its specific role in the authentication process." - ``` - -2. **结果聚合与展示**:搜索同时匹配父子chunk时如何展示? - - **UI设计建议**: - ``` - [Match 1] ▼ function authenticate_user() - Score: 0.92 - ├─ Line 45-52: Password validation loop - Score: 0.88 - └─ Line 67-75: Token generation block - Score: 0.85 - - [Match 2] function login_handler() - Score: 0.81 - ``` - -### 2.3 性能与效果预测 - -| 指标 | 预测值 | 说明 | -|------|--------|------| -| 搜索质量提升 | +30-40% | 大函数中精确定位逻辑块 | -| 索引时间增加 | +50-100% | AST深度遍历 + 更多LLM调用 | -| 存储空间增加 | +40-80% | 取决于micro-chunk数量 | -| 检索速度 | ±5% | 精确目标可能更快 | - -**存储空间计算**: -``` -假设平均每个文件10个函数 -每个函数生成1个macro chunk + 平均3个micro chunks -总chunk数:10 × (1 + 3) = 40 chunks/文件 - -相比现有(10 chunks/文件)增长:4倍 - -但使用选择性向量化(只对50%的micro chunks生成向量): -向量索引增长:10 × (1 + 1.5) = 2.5倍 -``` - -### 2.4 关键设计盲点 - -#### 盲点1:选择性向量化的风险 -**问题描述**:基于行数(<5行)跳过向量化,可能遗漏重要的简短逻辑。 - -**影响程度**:🟡 中(影响搜索覆盖率) - -**改进建议**:智能选择策略 -```python -class IntelligentVectorizationSelector: - def should_vectorize(self, chunk: HierarchicalChunk) -> bool: - # 规则1: Level 1总是向量化 - if chunk.metadata.level == 1: - return True - - # 规则2: 复杂度判断(圈复杂度) - complexity = calculate_cyclomatic_complexity(chunk.content) - if complexity >= 3: # 有多个分支 - return True - - # 规则3: 关键词判断 - critical_keywords = ['critical', 'security', 'auth', 'payment'] - if any(kw in chunk.content.lower() for kw in critical_keywords): - return True - - # 规则4: LLM快速判断重要性 - if chunk.metadata.level == 2 and len(chunk.content) < 5: - importance = quick_llm_importance_check(chunk) - return importance > 0.7 - - return False -``` - -#### 盲点2:LLM增强的上下文设计不足 -**问题描述**:文档中micro chunk的prompt未充分利用父chunk信息。 - -**影响程度**:🟡 中(影响元数据质量) - -**改进建议**:上下文感知的prompt模板 -```python -MICRO_CHUNK_PROMPT = """ -PARENT CONTEXT: -- Function: {parent_symbol_name} -- Purpose: {parent_purpose} -- Summary: {parent_summary} - -THIS CODE BLOCK ({chunk_type} at lines {start_line}-{end_line}): -```{language} -{chunk_content} -``` - -TASK: Describe this block's SPECIFIC ROLE in the parent function. -Focus on: -- What does it do within the larger logic flow? -- What intermediate result does it produce? -- How does it contribute to the parent function's goal? - -OUTPUT: 1 sentence describing its role + 3-5 keywords -""" -``` - -#### 盲点3:增量更新的复杂性 -**问题描述**:文件修改后,如何高效地重新索引? - -**影响程度**:🟡 中(影响实用性) - -**改进建议**:智能增量更新 -```python -class IncrementalHierarchicalIndexer: - def update_file(self, file_path: Path): - new_content = file_path.read_text() - new_hash = hashlib.sha256(new_content.encode()).hexdigest() - - # 检查文件级别的变化 - old_hash = self.get_file_hash(file_path) - if new_hash == old_hash: - return # 文件未变化 - - # 提取新的chunks - new_chunks = self.chunker.chunk_file(new_content, file_path) - - # 与旧chunks对比(基于内容hash) - old_chunks = self.get_chunks_by_file(file_path) - - for new_chunk in new_chunks: - new_chunk_hash = hash_chunk_content(new_chunk) - matching_old = find_by_hash(old_chunks, new_chunk_hash) - - if matching_old: - # chunk内容未变,保留旧的embedding和metadata - new_chunk.embedding = matching_old.embedding - new_chunk.metadata = matching_old.metadata - else: - # 新chunk或内容已变,需要重新处理 - self.process_new_chunk(new_chunk) - - # 删除不再存在的旧chunks - self.delete_obsolete_chunks(old_chunks, new_chunks) -``` - -### 2.5 时间估算校准 - -**原估算**:7-10周 -**校准后**:✅ 7-10周(合理) - -**关键里程碑**: -- Week 3: 完成数据库迁移和基础chunker -- Week 6: 完成层级化检索逻辑 -- Week 8: 完成LLM增强集成 -- Week 10: 性能优化和发布 - ---- - -## 3. 静态分析语义图谱评估 - -### 3.1 完善性评分 - -| 维度 | 评分 | 说明 | -|------|------|------| -| 架构设计 | 8/10 | 图模型设计合理,但实现路径模糊 | -| 实现细节 | 6/10 | 核心难点(名称解析)实现过于简化 | -| 测试覆盖 | 5/10 | 测试策略不足,缺少复杂场景覆盖 | -| 风险控制 | 5/10 | 对动态语言的限制和性能瓶颈认识不足 | -| **平均分** | **6.0/10** | 愿景宏大但技术风险极高 | - -### 3.2 技术可行性:⭐⭐ 低(短期完全实现) - -**阿喀琉斯之踵:名称解析 (`NameResolver`)** - -文档中的实现**严重低估了难度**: -```python -# 文档中的简化实现 -def resolve_call_target(self, call_edge, caller_context): - # 策略1: 本地调用 - # 策略2: 方法调用 - # 策略3: 导入的函数(TODO) -``` - -**真实世界的复杂性**: -```python -# Case 1: 复杂导入 -from package.submodule import func as f -from package import * # 星号导入 -import package.module # 模块导入 - -result = f(x) # 需要解析f -> package.submodule.func - -# Case 2: 动态调用 -handler = getattr(module, 'process_' + request_type) -handler() # 静态分析无法确定目标 - -# Case 3: 装饰器包装 -@cache -@retry(max_attempts=3) -def expensive_operation(): - pass - -# 调用时需要解析到原始函数,而非装饰器 - -# Case 4: 类型变量 -processor: Callable = get_processor(config) -processor() # 需要类型推断 - -# Case 5: 上下文管理器 -with get_connection() as conn: - conn.execute(...) # 需要理解__enter__返回值类型 -``` - -**技术债务评估**: -- 完整实现需要一个接近 `pyright` 或 `mypy` 级别的类型推断引擎 -- 这些工具历经多年开发,代码量数十万行 -- 不现实在12-15周内从零实现 - -**建议的务实路径**: -1. **集成现有工具**:调研 `jedi` 或 `pyright` 的API是否可用 -2. **限定范围**:V1只处理简单的本地调用和直接导入 -3. **明确边界**:对无法解析的调用,标记为"动态"并降低置信度 - -### 3.3 性能与效果预测 - -**前提假设**:名称解析能达到70%+的准确率 - -| 指标 | 预测值 | 说明 | -|------|--------|------| -| 搜索维度 | 全新维度 | 支持"影响分析"、"调用链追踪" | -| 开发时间 | **24-30周** | 原估算12-15周过于乐观 | -| 索引时间增加 | +300% | 全量静态分析 + 图构建 | -| 存储空间 | +200-500% | 图数据庞大 | -| 查询速度 | <100ms | 简单调用关系查询 | -| 影响分析 | 数秒 | 全代码库范围的图遍历 | - -**名称解析准确率影响**: -``` -如果准确率只有50%: -- 调用图充满噪音和缺失边 -- 影响分析结果不可信 -- 整个图谱价值大打折扣 - -如果准确率达到85%+: -- 可以支撑实用的影响分析 -- 结合LLM语义,能回答复杂问题 -- 成为代码理解的核心基础设施 -``` - -### 3.4 关键设计盲点 - -#### 盲点1:动态语言的静态分析极限 -**问题描述**:Python高度动态,大量调用关系在运行时才确定。 - -**影响程度**:🔴 极高(根本性限制) - -**改进建议**:混合静态+运行时分析 -```python -class HybridCallGraphBuilder: - def build_graph(self, codebase): - # 阶段1: 静态分析(确定性的调用) - static_graph = self.static_analyzer.build_call_graph(codebase) - - # 阶段2: 运行时数据补充(可选) - if self.config.enable_runtime_profiling: - runtime_data = self.collect_runtime_traces() - static_graph.merge(runtime_data, confidence=0.7) - - # 阶段3: LLM推断(低置信度) - for dynamic_call in static_graph.get_unresolved_calls(): - possible_targets = self.llm_infer_call_target(dynamic_call) - static_graph.add_edges(dynamic_call, possible_targets, confidence=0.5) - - return static_graph -``` - -**运行时数据来源**: -- 集成现有APM工具(如Sentry, DataDog) -- 代码覆盖率报告(如coverage.py) -- 自定义的轻量级tracer - -#### 盲点2:跨语言支持的工程量 -**问题描述**:文档轻描淡写"支持JS/Java",实际上需要为每种语言重写整个分析引擎。 - -**影响程度**:🔴 极高(时间成本巨大) - -**改进建议**:分阶段语言支持 -``` -V1 (6个月): 只支持Python - - 专注于将Python分析做到80%+准确率 - - 建立完整的图存储、查询、LLM增强基础设施 - -V2 (再6个月): 添加JavaScript/TypeScript - - 复用图基础设施 - - 开发JS特定的AST分析器 - -V3 (再6个月): 添加Java - - Java的静态类型使分析更容易 - - 但生态复杂(Maven, Gradle, Spring框架) -``` - -#### 盲点3:增量更新的复杂性 -**问题描述**:当一个核心函数签名改变时,图中所有调用它的边都需要更新。 - -**影响程度**:🟡 中(影响可用性) - -**改进建议**:变更传播队列 -```python -class GraphIncrementalUpdater: - def update_function(self, function_id: str, new_code: str): - old_signature = self.graph.get_node(function_id).signature - new_signature = extract_signature(new_code) - - if old_signature != new_signature: - # 签名变化,需要级联更新 - affected_edges = self.graph.get_edges_targeting(function_id) - - for edge in affected_edges: - # 标记为待更新 - self.update_queue.add(UpdateTask( - edge_id=edge.edge_id, - reason='target_signature_changed', - priority='high' - )) - - # 重新分析函数内部的调用 - new_callees = self.analyzer.extract_calls(new_code) - self.graph.update_edges_from(function_id, new_callees) - - # 后台任务:LLM重新生成语义 - self.llm_queue.add(LLMTask(node_id=function_id)) -``` - -### 3.5 时间估算校准 - -**原估算**:12-15周 -**校准后**:🔴 **24-30周到达可用的V1** - -**现实的里程碑**: -``` -Phase 0: 前置验证 (4-6周) - - NameResolver原型开发和测试 - - 决策点:如果准确率<70%,暂停项目或调整范围 - -Phase 1: 基础图构建 (8周) - - 简单的调用图提取(本地调用+直接导入) - - SQLite图存储和基础查询 - -Phase 2: LLM语义增强 (4周) - - 为节点和边生成语义描述 - - 批量处理优化 - -Phase 3: 高级查询 (6周) - - 影响分析 - - 调用链追踪 - - 数据流基础支持 - -Phase 4: 优化与稳定 (6周) - - 性能优化 - - 增量更新 - - 大规模测试 -``` - -### 3.6 必须的前置验证 - -**NameResolver原型验证 (P0优先级)**: -```python -# 原型验证目标 -class NameResolverPrototype: - """ - 目标:在一个真实的中等复杂度Python项目(~10k行代码,20-30个文件)上测试 - - 成功标准: - 1. 本地函数调用解析准确率 > 95% - 2. 跨文件导入解析准确率 > 80% - 3. 类方法调用解析准确率 > 75% - 4. 整体准确率 > 70% - - 如果失败: - - 调研集成jedi/pyright的可行性 - - 或调整图谱范围(只做本地调用图) - - 或推迟项目,投入更多资源 - """ - - def validate(self, test_project_path: Path): - # 手动标注ground truth - ground_truth = self.load_manual_annotations(test_project_path) - - # 运行原型 - resolved_calls = self.resolve_all_calls(test_project_path) - - # 计算准确率 - metrics = self.calculate_metrics(resolved_calls, ground_truth) - - return ValidationReport( - accuracy=metrics.accuracy, - precision=metrics.precision, - recall=metrics.recall, - false_positives=metrics.fp_examples, - false_negatives=metrics.fn_examples, - ) -``` - ---- - -## 4. 方案间协同分析 - -### 4.1 依赖关系图 - -``` -Docstring混合策略 ──(提供高质量元数据)──> 语义图谱 - │ │ - │ │ - (共享docstring (共享AST分析) - 解析能力) │ - │ │ - v v - 多层次分词器 ────(提供细粒度节点)────> 语义图谱 -``` - -**关键依赖**: -1. **图谱依赖混合策略**:高质量的节点摘要和purpose标签来自混合策略 -2. **图谱和分词器共享AST能力**:可以开发一个统一的`ASTAnalyzer`模块 -3. **分词器增强图谱**:micro chunks可以作为图谱的更细粒度节点 - -### 4.2 协同效应(1+1+1 > 3) - -**场景1:精确代码导航** -``` -用户查询: "Find the password hashing logic in authentication" - -Step 1: 向量搜索(分词器) - -> 定位到 authenticate_user() 函数的 micro chunk (lines 45-52) - -Step 2: 图谱上下文 - -> 显示该函数的所有调用者:login_api(), register_api() - -> 追踪数据流:password变量的传递路径 - -Step 3: 语义元数据(混合策略) - -> 展示函数的docstring:"使用bcrypt进行密码哈希,salt轮数为12" - -> 关联的security标签和注意事项 -``` - -**场景2:影响分析** -``` -用户问题: "If I change User.email validation, what breaks?" - -Step 1: 图谱查询 - -> 找到所有调用 User.email setter的函数 - -> 构建影响树:validate_email() -> update_profile() -> profile_api() - -Step 2: 分词器展示 - -> 对每个受影响的函数,展示具体的调用位置(micro chunk) - -> 用户可以快速review每个调用点的上下文 - -Step 3: 混合策略提供摘要 - -> 每个函数的docstring说明其业务意图 - -> LLM生成的"此函数在email验证中的角色"描述 -``` - -### 4.3 组合实施的量化效果预测 - -**假设场景**:一个10万行的Python代码库 - -| 指标 | 当前 | +混合策略 | +分词器 | +图谱(全部) | -|------|------|----------|---------|------------| -| 搜索准确率 | 70% | 80% (+10%) | 92% (+12%) | 95% (+3%) | -| 索引时间 | 10min | 7min (-30%) | 12min (+20%) | 50min (+300%) | -| 存储空间 | 1GB | 0.8GB (-20%) | 2GB (+100%) | 6GB (+200%) | -| 查询延迟 | 50ms | 50ms | 60ms (+20%) | 100ms (+100%) | -| 能力维度 | 搜索 | 搜索 | 搜索 | 搜索+理解+分析 | - -**关键洞察**: -- 混合策略是"降本增效",提升质量同时降低成本 -- 分词器是"增效",显著提升搜索精度,但有成本 -- 图谱是"开新维度",不只是优化,而是全新能力 - ---- - -## 5. 优先级重排与实施路线图 - -### 5.1 重排后的优先级 - -**P0 - 立即启动(Q1)**:Docstring与LLM混合策略 -- ✅ ROI最高(成本-40%,质量+15%) -- ✅ 风险最低 -- ✅ 6-8周可见效 -- ✅ 为后续方案铺路(提供高质量元数据) - -**P1 - Q2启动**:多层次分词器 -- ✅ 投入产出比高 -- ✅ 技术可行性已验证 -- ✅ 7-10周实现核心功能 -- ⚠️ 依赖P0完成后的稳定基础 - -**P2 - 需原型验证后决定**:静态分析语义图谱 -- 🔬 **前置条件**:NameResolver原型验证通过(4-6周) -- ⚠️ 如果验证失败,调整范围或推迟 -- ✅ 如果验证成功,Q3-Q4启动正式开发(24-30周) - -### 5.2 详细实施路线图 - -``` -Q1 2024 (Week 1-13) -├─ Week 1-8: 实施Docstring混合策略 -│ ├─ Week 1-2: DocstringExtractor + QualityEvaluator -│ ├─ Week 3-4: HybridEnhancer核心逻辑 -│ ├─ Week 5-6: 真实项目测试 + 调优 -│ └─ Week 7-8: 多语言支持 + 发布 -│ -├─ Week 4-10: (并行) NameResolver原型验证 -│ ├─ Week 4-6: 原型开发 -│ ├─ Week 7-8: 在3个真实项目上测试 -│ ├─ Week 9-10: 评估报告 + 决策 -│ └─ 决策点:图谱项目是否继续? -│ -└─ Week 9-13: 分词器Phase 0 (准备工作) - ├─ 数据库设计和迁移脚本 - ├─ 基础AST分析模块 - └─ 测试环境搭建 - -Q2 2024 (Week 14-26) -├─ Week 14-23: 实施多层次分词器 -│ ├─ Week 14-16: MacroChunker + MicroChunker -│ ├─ Week 17-19: HierarchicalVectorStore -│ ├─ Week 20-21: LLM分层增强集成 -│ └─ Week 22-23: 性能优化 + 发布 -│ -└─ Week 24-26: 评估和规划 - ├─ 收集用户反馈 - ├─ 调整图谱计划(如果原型通过) - └─ 制定Q3-Q4详细计划 - -Q3-Q4 2024 (Week 27-52) - 条件性启动图谱 -├─ 如果NameResolver原型通过: -│ ├─ Week 27-34: 基础调用图构建 -│ ├─ Week 35-38: LLM语义增强 -│ ├─ Week 39-44: 高级查询功能 -│ └─ Week 45-52: 优化与稳定 -│ -└─ 如果原型失败: - ├─ 调研集成现有工具(jedi/pyright) - ├─ 或调整范围(只做本地调用图) - └─ 或推迟到2025,投入更多资源 -``` - ---- - -## 6. 具体行动建议 - -### 6.1 立即可执行(本周) - -**行动1**:启动Docstring混合策略开发 -```bash -# 创建开发分支 -git checkout -b feature/docstring-hybrid-strategy - -# 目录结构 -src/codexlens/semantic/ - ├── docstring_extractor.py # NEW - ├── quality_evaluator.py # NEW - ├── hybrid_enhancer.py # NEW (替代llm_enhancer.py) - └── llm_enhancer.py # 保留作为后端 - -# 第一周任务 -- [ ] 实现PythonDocstringExtractor (基于tree-sitter) -- [ ] 实现DocstringQuality评估器 -- [ ] 编写单元测试(覆盖率>80%) -``` - -**行动2**:建立评估基准 -```python -# scripts/evaluate_docstring_quality.py -""" -在3个真实项目上评估docstring质量分布 - -目标项目: -1. 内部项目A (高质量docstring, Google style) -2. 开源项目B (中等质量docstring, NumPy style) -3. 遗留代码C (低质量或无docstring) - -输出: -- 质量分布统计(HIGH/MEDIUM/LOW/MISSING百分比) -- 评估器准确率(vs 人工标注) -- 潜在节省成本估算 -""" -``` - -### 6.2 需要调研(2周内) - -**调研1**:NameResolver技术选型 -``` -目标:评估集成现有工具的可行性 - -方案A:集成jedi - - API文档:https://jedi.readthedocs.io/ - - 评估点:能否获取函数调用的目标定义? - - 实验:写一个100行的测试脚本,调用jedi API - -方案B:集成pyright (通过CLI) - - pyright --verifytypes可以输出类型信息 - - 评估点:能否解析其输出构建调用图? - - 实验:在测试项目上运行pyright,分析输出 - -方案C:自研(退路) - - 只处理简单场景(本地调用+直接导入) - - 明确标注"不支持复杂导入" -``` - -**调研2**:图数据库选型 -``` -目标:对比SQLite vs Neo4j vs NetworkX - -测试场景: -- 1000个节点,5000条边的调用图 -- 查询1: 找到函数A的所有调用者(广度优先,深度3) -- 查询2: 找到函数A和函数B之间的最短路径 -- 查询3: 找到所有孤立的节点(未被调用的函数) - -评估指标: -- 查询性能(<100ms?) -- 存储空间 -- 维护复杂度 -- 是否支持事务 -``` - -### 6.3 必须做的原型验证(4-6周) - -**原型1**:NameResolver验证原型 -```python -# prototypes/name_resolver_validation/ - -测试项目:选择一个中等复杂度的开源项目 - - requests库 (约10k行,30+文件) 或 - - flask库 (约15k行,50+文件) - -验证步骤: -1. 手动标注100个函数调用关系(ground truth) -2. 运行原型,提取调用图 -3. 对比结果,计算准确率/召回率 - -成功标准: -- 准确率 > 70% -- 召回率 > 60% -- 假阳性率 < 20% - -失败后续: -- 如果< 50%准确率:暂停图谱项目,调研集成方案 -- 如果50-70%:调整范围,只做高置信度的简单调用 -- 如果> 70%:继续,但投入更多资源优化 -``` - -**原型2**:层级化检索权重实验 -```python -# prototypes/hierarchical_search_weights/ - -实验设计: -1. 手动构建一个包含10个函数的测试代码库 -2. 为每个函数创建macro chunk + micro chunks -3. 准备20个搜索查询,人工标注期望结果 -4. 测试不同的权重策略: - - Strategy 1: {macro: 1.0, micro: 0.5} - - Strategy 2: {macro: 1.0, micro: 0.8} - - Strategy 3: {macro: 1.0, micro: 1.0} - - Strategy 4: {macro: 0.8, micro: 1.0} - -评估指标: -- NDCG@10 (Normalized Discounted Cumulative Gain) -- MRR (Mean Reciprocal Rank) -- User preference survey (if possible) - -输出: -- 最佳权重策略 -- 权重参数的敏感性分析 -``` - ---- - -## 7. 风险评估与缓解 - -### 7.1 高风险项 - -| 风险 | 方案 | 影响 | 概率 | 缓解措施 | -|------|------|------|------|----------| -| NameResolver准确率<50% | 图谱 | 🔴 极高 | 40% | 前置原型验证;准备集成jedi的备选方案 | -| 分词器micro chunks过多 | 分词器 | 🟡 中 | 30% | 自适应阈值;选择性向量化 | -| LLM成本超预算 | 全部 | 🟡 中 | 25% | 混合策略优先;批量处理优化 | -| 图谱增量更新复杂度 | 图谱 | 🟡 中 | 50% | V1不支持增量,全量重建;V2再优化 | - -### 7.2 缓解策略矩阵 - -**对于NameResolver风险**: -``` -Plan A (理想): 自研达到70%+准确率 - - 投入: 1名高级工程师 × 6周 - - 成功率: 40% - -Plan B (务实): 集成jedi或pyright - - 投入: 2周调研 + 4周集成 - - 成功率: 70% - - 限制: 依赖外部工具,可能有版本兼容问题 - -Plan C (保底): 限定范围(只做本地调用图) - - 投入: 4周 - - 成功率: 95% - - 限制: 功能大幅缩水,但仍有价值 -``` - -**对于成本控制风险**: -``` -成本监控dashboard: - - 实时显示LLM调用次数和费用 - - 按策略分类(full-gen / refine / keywords-only) - - 告警阈值:日费用>$50 或 月费用>$1000 - -成本优化开关: - - 在配置中设置每日预算上限 - - 超过后自动降级(跳过micro chunks的LLM增强) - - 批量处理大小动态调整 -``` - ---- - -## 8. 总结与最终建议 - -### 8.1 核心结论 - -1. **Docstring混合策略**:✅ **立即启动** - - 完善性最高(8.0/10) - - 技术风险最低 - - ROI最高(成本-40%,质量+15%) - - 6-8周可见效 - -2. **多层次分词器**:✅ **Q2启动** - - 完善性高(8.0/10) - - 技术可行性已验证 - - 搜索质量提升30%+ - - 需在P0完成后启动 - -3. **静态分析语义图谱**:⚠️ **需原型验证** - - 完善性中等(6.0/10) - - 技术风险极高(名称解析难度) - - 潜力巨大(全新能力维度) - - **必须先验证NameResolver可行性** - -### 8.2 最终建议的实施顺序 - -``` -Stage 1 (立即): Docstring混合策略 (6-8周) - ├─ 快速降低成本 - ├─ 提升元数据质量 - └─ 为后续打基础 - -Stage 2 (并行): NameResolver原型 (4-6周) - ├─ 决定图谱项目的命运 - ├─ 如果失败,调整或推迟 - └─ 如果成功,Q3正式启动 - -Stage 3 (Q2): 多层次分词器 (7-10周) - ├─ 显著提升搜索精度 - ├─ 为图谱提供细粒度节点 - └─ 用户体验质的飞跃 - -Stage 4 (Q3-Q4, 条件性): 静态分析图谱 (24-30周) - ├─ 如果Stage 2成功,则启动 - ├─ 从简单做起(本地调用图) - └─ 逐步增强(跨文件、LLM语义) -``` - -### 8.3 成功的关键 - -1. **风险前置**:不要盲目启动图谱,必须先验证核心技术假设 -2. **迭代交付**:每个方案都要尽早发布可用版本,收集反馈 -3. **成本控制**:实时监控LLM费用,设置预算上限和降级机制 -4. **数据驱动**:用真实项目数据验证假设,不要依赖理论推导 -5. **务实落地**:完美是优秀的敌人,先做到70分可用,再优化到90分 - -### 8.4 量化预期(全部实施后) - -**假设**:所有三个方案都成功实施 - -| 指标 | 当前基线 | 预期目标 | 提升幅度 | -|------|---------|---------|---------| -| 搜索准确率 | 70% | **95%** | +25% | -| 搜索覆盖率 | 80% | **98%** | +18% | -| 元数据质量 | 75% | **92%** | +17% | -| LLM成本 | $1000/月 | **$600/月** | -40% | -| 索引速度 | 10min | **15min** | +50% (可接受) | -| 新能力 | 搜索 | **搜索+理解+分析** | 质的飞跃 | - ---- - -**报告完成时间**: 81.2秒 -**评估工具**: Gemini 2.5 Pro -**建议复审周期**: 每个阶段结束后进行复盘和调整 diff --git a/codex-lens/docs/HYBRID_SEARCH_ARCHITECTURE.md b/codex-lens/docs/HYBRID_SEARCH_ARCHITECTURE.md deleted file mode 100644 index 8073b6d7..00000000 --- a/codex-lens/docs/HYBRID_SEARCH_ARCHITECTURE.md +++ /dev/null @@ -1,540 +0,0 @@ -# Hybrid Search Architecture for CodexLens - -> Embedding + Real-time LSP + Clustering + Reranking Pipeline - -## Overview - -This document describes the architecture for a hybrid intelligent code search system that combines: -1. **Low-dimensional embedding model** for semantic search -2. **Real-time LSP integration** for code structure analysis -3. **Graph-based clustering** for result organization -4. **Multi-factor reranking** for intelligent sorting - -**Key Constraint**: Must use real-time LSP servers, NOT pre-indexed data. - -## Architecture Diagram - -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ HybridSearchEngine │ -│ ┌─────────────────────────────────────────────────────────────────────┐ │ -│ │ 5-Stage Search Pipeline │ │ -│ │ │ │ -│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌────┐│ │ -│ │ │ Stage 1 │──▶│ Stage 2 │──▶│ Stage 3 │──▶│ Stage 4 │──▶│ S5 ││ │ -│ │ │ Vector │ │ LSP │ │ Graph │ │Clustering│ │Rank││ │ -│ │ │ Search │ │Expansion │ │ Building │ │ +Filter │ │ ││ │ -│ │ └──────────┘ └──────────┘ └──────────┘ └──────────┘ └────┘│ │ -│ └─────────────────────────────────────────────────────────────────────┘ │ -│ │ -│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────────────────┐ │ -│ │VectorSearchSvc │ │ LspBridge │ │ GraphBuilder │ │ -│ │ │ │ │ │ │ │ -│ │ • Embedding │ │ • get_refs() │ │ • build_from_seeds() │ │ -│ │ • FAISS/HNSW │ │ • get_def() │ │ • add_relationships() │ │ -│ │ • search() │ │ • get_calls() │ │ • CodeAssociationGraph │ │ -│ └────────┬────────┘ └────────┬────────┘ └─────────────────────────────┘ │ -│ │ │ │ -└───────────┼────────────────────┼────────────────────────────────────────────┘ - │ │ - ▼ ▼ - ┌───────────────┐ ┌───────────────────────────────────────┐ - │ Embedding │ │ LanguageServerMultiplexer │ - │ Model (local) │ │ (from REAL_LSP_SERVER_PLAN.md) │ - │ │ │ │ - │ sentence- │ │ ┌─────┐ ┌─────┐ ┌─────┐ ┌──────────┐│ - │ transformers │ │ │pylsp│ │gopls│ │tssvr│ │rust-anlzr││ - │ │ │ └─────┘ └─────┘ └─────┘ └──────────┘│ - └───────────────┘ └───────────────────────────────────────┘ -``` - -## Core Components - -### 1. HybridSearchEngine (`hybrid_search/engine.py`) - -**Role**: Main orchestrator coordinating all services - -```python -class HybridSearchEngine: - def __init__(self): - self.vector_service: VectorSearchService - self.lsp_bridge: LspBridge - self.graph_builder: GraphBuilder - self.clustering_service: ClusteringService - self.ranking_service: RankingService - - async def search(self, query: str, top_k: int = 10) -> List[SearchResultCluster]: - # Stage 1: Vector search for seeds - seeds = await self.vector_service.search(query, top_k=top_k * 2) - - # Stage 2-3: LSP expansion + Graph building - graph = await self.graph_builder.build_from_seeds(seeds, self.lsp_bridge) - - # Stage 4: Clustering + Filtering - clusters = self.clustering_service.cluster(graph) - clusters = self.clustering_service.filter_noise(clusters) - - # Stage 5: Reranking - ranked = self.ranking_service.rerank(clusters, seeds, query) - - return ranked[:top_k] -``` - -### 2. Data Structures (`hybrid_search/data_structures.py`) - -```python -@dataclass -class CodeSymbolNode: - """Graph node representing a code symbol""" - id: str # Unique: file_path:name:line - name: str # Symbol name - kind: str # function, class, method, variable - file_path: str # Absolute file path - range: Range # Start/end line and character - embedding: Optional[List[float]] = None - raw_code: str = "" - docstring: str = "" - -@dataclass -class CodeAssociationGraph: - """Graph of code relationships""" - nodes: Dict[str, CodeSymbolNode] - edges: List[Tuple[str, str, str]] # (from_id, to_id, relationship_type) - # relationship_type: 'calls', 'references', 'inherits', 'imports' - - def to_networkx(self) -> nx.DiGraph: - """Convert to NetworkX for algorithms""" - ... - -@dataclass -class SearchResultCluster: - """Clustered search result""" - cluster_id: str - score: float - title: str # AI-generated summary (optional) - symbols: List[CodeSymbolNode] - metadata: Dict[str, Any] -``` - -### 3. VectorSearchService (`services/vector_search.py`) - -**Role**: Semantic search using embeddings - -```python -class VectorSearchService: - def __init__(self, model_name: str = "all-MiniLM-L6-v2"): - self.model = SentenceTransformer(model_name) # 384-dim, fast - self.index: faiss.IndexFlatIP # or hnswlib for larger scale - self.id_to_symbol: Dict[str, CodeSymbolNode] - - async def index_codebase(self, symbols: List[CodeSymbolNode]): - """Build/update vector index from symbols""" - texts = [f"{s.name} {s.docstring} {s.raw_code[:500]}" for s in symbols] - embeddings = self.model.encode(texts, normalize_embeddings=True) - self.index.add(embeddings) - - async def search(self, query: str, top_k: int) -> List[CodeSymbolNode]: - """Find semantically similar symbols""" - query_vec = self.model.encode([query], normalize_embeddings=True) - scores, indices = self.index.search(query_vec, top_k) - return [self.id_to_symbol[i] for i in indices[0]] -``` - -**Embedding Model Selection**: -| Model | Dimensions | Speed | Quality | -|-------|-----------|-------|---------| -| all-MiniLM-L6-v2 | 384 | Fast | Good | -| all-mpnet-base-v2 | 768 | Medium | Better | -| CodeBERT | 768 | Medium | Code-optimized | - -### 4. LspBridge (`services/lsp_bridge.py`) - -**Role**: Interface to real-time language servers via LanguageServerMultiplexer - -```python -class LspBridge: - def __init__(self, multiplexer_url: str = "http://localhost:3458"): - self.multiplexer_url = multiplexer_url - self.cache: Dict[str, CacheEntry] = {} # file_path -> (mtime, data) - self.session = aiohttp.ClientSession() - - async def get_references(self, symbol: CodeSymbolNode) -> List[Location]: - """Get all references to a symbol (real-time LSP)""" - cache_key = f"refs:{symbol.id}" - if self._is_cached(cache_key, symbol.file_path): - return self.cache[cache_key].data - - response = await self._lsp_request("textDocument/references", { - "textDocument": {"uri": f"file://{symbol.file_path}"}, - "position": {"line": symbol.range.start.line, - "character": symbol.range.start.character}, - "context": {"includeDeclaration": True} - }) - - locations = self._parse_locations(response) - self._cache(cache_key, symbol.file_path, locations) - return locations - - async def get_call_hierarchy(self, symbol: CodeSymbolNode) -> List[CallHierarchyItem]: - """Get incoming/outgoing calls (if supported by language server)""" - try: - # Prepare call hierarchy - items = await self._lsp_request("textDocument/prepareCallHierarchy", {...}) - if not items: - # Fallback to references if callHierarchy not supported - return await self._fallback_to_references(symbol) - - # Get incoming calls - incoming = await self._lsp_request("callHierarchy/incomingCalls", - {"item": items[0]}) - return incoming - except LspCapabilityNotSupported: - return await self._fallback_to_references(symbol) - - async def get_definition(self, symbol: CodeSymbolNode) -> Optional[Location]: - """Get symbol definition location""" - ... - - async def get_hover(self, symbol: CodeSymbolNode) -> Optional[str]: - """Get hover documentation""" - ... -``` - -**Caching Strategy**: -- Cache key: `{operation}:{symbol_id}` -- Invalidation: Check file modification time -- TTL: 5 minutes for frequently accessed files - -**Concurrency Control**: -- Max concurrent LSP requests: 10 -- Request timeout: 2 seconds -- Batch requests where possible - -### 5. GraphBuilder (`graph/builder.py`) - -**Role**: Build code association graph from seeds using LSP - -```python -class GraphBuilder: - def __init__(self, max_depth: int = 2, max_nodes: int = 100): - self.max_depth = max_depth - self.max_nodes = max_nodes - - async def build_from_seeds( - self, - seeds: List[CodeSymbolNode], - lsp_bridge: LspBridge - ) -> CodeAssociationGraph: - """Build association graph by expanding from seed nodes""" - graph = CodeAssociationGraph() - visited: Set[str] = set() - queue: List[Tuple[CodeSymbolNode, int]] = [(s, 0) for s in seeds] - - # Parallel expansion with semaphore - sem = asyncio.Semaphore(10) - - async def expand_node(node: CodeSymbolNode, depth: int): - if node.id in visited or depth > self.max_depth: - return - if len(graph.nodes) >= self.max_nodes: - return - - visited.add(node.id) - graph.add_node(node) - - async with sem: - # Get relationships in parallel - refs, calls = await asyncio.gather( - lsp_bridge.get_references(node), - lsp_bridge.get_call_hierarchy(node), - return_exceptions=True - ) - - # Add edges - for ref in refs: - ref_node = await self._location_to_node(ref, lsp_bridge) - graph.add_edge(node.id, ref_node.id, "references") - queue.append((ref_node, depth + 1)) - - for call in calls: - call_node = await self._call_to_node(call, lsp_bridge) - graph.add_edge(call_node.id, node.id, "calls") - queue.append((call_node, depth + 1)) - - # BFS expansion - while queue and len(graph.nodes) < self.max_nodes: - batch = queue[:10] - queue = queue[10:] - await asyncio.gather(*[expand_node(n, d) for n, d in batch]) - - return graph -``` - -### 6. ClusteringService (`clustering/algorithms.py`) - -**Role**: Group related code symbols and filter noise - -```python -class ClusteringService: - def __init__(self, resolution: float = 1.0): - self.resolution = resolution # Higher = smaller clusters - - def cluster(self, graph: CodeAssociationGraph) -> List[SearchResultCluster]: - """Apply Louvain community detection""" - nx_graph = graph.to_networkx() - - # Louvain algorithm - communities = community_louvain.best_partition( - nx_graph, - resolution=self.resolution - ) - - # Group nodes by community - clusters: Dict[int, List[CodeSymbolNode]] = defaultdict(list) - for node_id, community_id in communities.items(): - clusters[community_id].append(graph.nodes[node_id]) - - return [ - SearchResultCluster( - cluster_id=f"cluster_{cid}", - symbols=nodes, - score=0.0, # Will be set by RankingService - title="", - metadata={"size": len(nodes)} - ) - for cid, nodes in clusters.items() - ] - - def filter_noise(self, clusters: List[SearchResultCluster]) -> List[SearchResultCluster]: - """Remove noisy clusters and symbols""" - filtered = [] - for cluster in clusters: - # Filter high-degree generic nodes - cluster.symbols = [ - s for s in cluster.symbols - if not self._is_generic_symbol(s) - ] - - # Keep clusters with minimum size - if len(cluster.symbols) >= 2: - filtered.append(cluster) - - return filtered - - def _is_generic_symbol(self, symbol: CodeSymbolNode) -> bool: - """Check if symbol is too generic (log, print, etc.)""" - generic_names = {'log', 'print', 'debug', 'error', 'warn', - 'get', 'set', 'init', '__init__', 'toString'} - return symbol.name.lower() in generic_names -``` - -### 7. RankingService (`ranking/service.py`) - -**Role**: Multi-factor intelligent reranking - -```python -@dataclass -class RankingWeights: - text_relevance: float = 0.4 # w1 - graph_centrality: float = 0.35 # w2 - structural_proximity: float = 0.25 # w3 - -class RankingService: - def __init__(self, weights: RankingWeights = None): - self.weights = weights or RankingWeights() - - def rerank( - self, - clusters: List[SearchResultCluster], - seeds: List[CodeSymbolNode], - query: str - ) -> List[SearchResultCluster]: - """Rerank clusters using multi-factor scoring""" - seed_ids = {s.id for s in seeds} - - for cluster in clusters: - # Build cluster subgraph for centrality - subgraph = self._build_subgraph(cluster) - pagerank = nx.pagerank(subgraph) - - for symbol in cluster.symbols: - # Factor 1: Text relevance (from vector search) - text_score = self._compute_text_relevance(symbol, query) - - # Factor 2: Graph centrality (PageRank in cluster) - centrality_score = pagerank.get(symbol.id, 0.0) - - # Factor 3: Structural proximity to seeds - proximity_score = self._compute_proximity(symbol, seed_ids, subgraph) - - # Combined score - symbol.score = ( - self.weights.text_relevance * text_score + - self.weights.graph_centrality * centrality_score + - self.weights.structural_proximity * proximity_score - ) - - # Cluster score = max symbol score - cluster.score = max(s.score for s in cluster.symbols) - cluster.symbols.sort(key=lambda s: s.score, reverse=True) - - # Sort clusters by score - clusters.sort(key=lambda c: c.score, reverse=True) - return clusters - - def _compute_proximity( - self, - symbol: CodeSymbolNode, - seed_ids: Set[str], - graph: nx.DiGraph - ) -> float: - """Compute proximity score based on shortest path to seeds""" - if symbol.id in seed_ids: - return 1.0 - - min_distance = float('inf') - for seed_id in seed_ids: - try: - distance = nx.shortest_path_length(graph, seed_id, symbol.id) - min_distance = min(min_distance, distance) - except nx.NetworkXNoPath: - continue - - if min_distance == float('inf'): - return 0.0 - - # Inverse distance scoring (closer = higher) - return 1.0 / (1.0 + min_distance) -``` - -## API Design - -### Endpoint: `POST /api/v1/hybrid-search` - -**Request**: -```json -{ - "query": "user authentication flow", - "top_k": 10, - "config_overrides": { - "ranking_weights": {"w1": 0.5, "w2": 0.3, "w3": 0.2}, - "max_graph_depth": 2, - "clustering_resolution": 1.0 - } -} -``` - -**Response**: -```json -{ - "query_id": "hs-20250120-001", - "execution_time_ms": 1250, - "results": [ - { - "cluster_id": "cluster_0", - "score": 0.92, - "title": "User Authentication Handler", - "symbols": [ - { - "id": "src/auth/handler.py:authenticate:45", - "name": "authenticate", - "kind": "function", - "file_path": "src/auth/handler.py", - "range": {"start": {"line": 45, "char": 0}, "end": {"line": 78, "char": 0}}, - "score": 0.95, - "raw_code": "async def authenticate(request: Request):\n ..." - }, - { - "id": "src/auth/handler.py:validate_token:80", - "name": "validate_token", - "kind": "function", - "file_path": "src/auth/handler.py", - "score": 0.88, - "raw_code": "def validate_token(token: str) -> bool:\n ..." - } - ] - } - ] -} -``` - -## Implementation Priorities - -### P0 - Core Infrastructure (Week 1-2) -1. **HybridSearchEngine skeleton** - Basic orchestration without all features -2. **LspBridge with caching** - Connect to LanguageServerMultiplexer -3. **GraphBuilder basic** - Seed expansion with references only -4. **Integration test** - Verify LSP communication works - -### P1 - Search Pipeline (Week 2-3) -1. **VectorSearchService** - Embedding model + FAISS index -2. **ClusteringService** - Louvain algorithm + noise filtering -3. **End-to-end pipeline** - Query to clustered results - -### P2 - Ranking & API (Week 3-4) -1. **RankingService** - Multi-factor scoring -2. **API endpoint** - FastAPI integration -3. **Performance optimization** - Caching, parallelization, timeouts -4. **Configuration system** - Dynamic weight adjustment - -## Performance Targets - -| Metric | Target | Strategy | -|--------|--------|----------| -| End-to-end latency | < 2s | Parallel LSP calls, aggressive caching | -| Vector search | < 100ms | FAISS with GPU (optional) | -| LSP expansion | < 1s | Max 10 concurrent requests, 2s timeout | -| Clustering | < 200ms | Limit graph size to 100 nodes | -| Reranking | < 100ms | Pre-computed embeddings | - -## Dependencies - -### External -- LanguageServerMultiplexer (from REAL_LSP_SERVER_PLAN.md) -- Language servers: pylsp, tsserver, gopls, rust-analyzer - -### Python Libraries -- `sentence-transformers` - Embedding models -- `faiss-cpu` or `hnswlib` - Vector indexing -- `networkx` - Graph algorithms -- `python-louvain` - Community detection -- `aiohttp` - Async HTTP client - -## File Structure - -``` -src/codexlens/ -├── hybrid_search/ -│ ├── __init__.py -│ ├── engine.py # HybridSearchEngine -│ ├── pipeline.py # Pipeline stage definitions -│ └── data_structures.py # CodeSymbolNode, Graph, Cluster -├── services/ -│ ├── vector_search.py # VectorSearchService -│ └── lsp_bridge.py # LspBridge -├── graph/ -│ └── builder.py # GraphBuilder -├── clustering/ -│ └── algorithms.py # ClusteringService -├── ranking/ -│ └── service.py # RankingService -├── api/ -│ └── endpoints.py # API routes -└── configs/ - └── hybrid_search_config.py -``` - -## Risk Mitigation - -| Risk | Impact | Mitigation | -|------|--------|------------| -| LSP timeout | High | Fallback to vector-only results | -| LSP not available | High | Graceful degradation to CodexLens index | -| Large codebases | Medium | Limit graph expansion, pagination | -| Language server crash | Medium | Auto-restart, circuit breaker | -| Clustering quality | Low | Tunable resolution parameter | - ---- - -*Generated from Gemini analysis (Session: 1768836775699-gemini)* -*Date: 2025-01-20* diff --git a/codex-lens/docs/IMPLEMENTATION_SUMMARY.md b/codex-lens/docs/IMPLEMENTATION_SUMMARY.md deleted file mode 100644 index 08dd775d..00000000 --- a/codex-lens/docs/IMPLEMENTATION_SUMMARY.md +++ /dev/null @@ -1,363 +0,0 @@ -# CodexLens Real LSP Implementation - Summary - -> **Date**: 2026-01-19 -> **Status**: Planning Complete, Implementation Ready -> **Focus**: Real LSP Server + VSCode Bridge Integration - ---- - -## ✅ Completed Work - -### 1. Planning Documents - -#### a. Main Implementation Plan -**File**: `docs/REAL_LSP_SERVER_PLAN.md` - -**Content**: -- Complete architecture design for real LSP server -- 5-phase implementation plan -- Multi-language support strategy (TypeScript, Python, Go, Rust, Java, C/C++) -- Language server multiplexer design -- Position tolerance feature (cclsp-like) -- MCP integration layer - -**Key Decisions**: -- Use `pygls` library for LSP implementation -- Support 6+ language servers via multiplexer -- Implement position tolerance for fuzzy AI-generated positions -- Three integration paths: Standalone LSP, VSCode Bridge, Index-based fallback - -#### b. VSCode Bridge Implementation (Appendix A) -**Included in**: `docs/REAL_LSP_SERVER_PLAN.md` - -**Content**: -- HTTP-based VSCode extension bridge -- MCP tool integration (vscode_lsp) -- Complete architecture diagram -- API endpoint specifications -- Comparison with standalone LSP approach - -### 2. VSCode Bridge Extension - -#### Created Files: -1. **`ccw-vscode-bridge/package.json`** - - VSCode extension manifest - - Dependencies: @types/node, @types/vscode, typescript - -2. **`ccw-vscode-bridge/tsconfig.json`** - - TypeScript compilation configuration - - Target: ES2020, CommonJS modules - -3. **`ccw-vscode-bridge/src/extension.ts`** - - HTTP server on port 3457 - - 4 API endpoints: - - `POST /get_definition` - - `POST /get_references` - - `POST /get_hover` - - `POST /get_document_symbols` - - VSCode API integration via `vscode.commands.executeCommand` - -4. **`ccw-vscode-bridge/.vscodeignore`** - - Build artifact exclusion rules - -5. **`ccw-vscode-bridge/README.md`** - - Installation & usage instructions - - API endpoint documentation - -#### Features: -- ✅ Real-time VSCode LSP integration -- ✅ HTTP REST API for external tools -- ✅ CORS support -- ✅ Error handling -- ✅ Automatic VSCode feature detection - -### 3. CCW MCP Tool - -#### Created File: -**`ccw/src/tools/vscode-lsp.ts`** - -**Features**: -- ✅ 4 LSP actions: get_definition, get_references, get_hover, get_document_symbols -- ✅ Zod schema validation -- ✅ HTTP client with timeout (10s) -- ✅ Connection retry logic -- ✅ Comprehensive error messages - -**Parameters**: -- `action` (required): LSP action type -- `file_path` (required): Absolute file path -- `line` (optional): Line number (1-based) -- `character` (optional): Character position (1-based) - -#### Integration: -**Modified File**: `ccw/src/tools/index.ts` - -- ✅ Imported `vscodeLspMod` -- ✅ Registered tool via `registerTool(toLegacyTool(vscodeLspMod))` -- ✅ Available in MCP server tool list - ---- - -## 📋 Implementation Architecture - -### Three Integration Paths - -``` -Path 1: VSCode Bridge (✅ Implemented) -───────────────────────────────────── -Claude Code → vscode_lsp MCP tool → HTTP → ccw-vscode-bridge → VSCode API → Language Servers - -Path 2: Standalone LSP Server (📝 Planned) -────────────────────────────────────────── -Any LSP Client → codexlens-lsp → Language Server Multiplexer → Language Servers - -Path 3: Index-Based (✅ Existing) -───────────────────────────────── -Claude Code → codex_lens_lsp → Python API → SQLite Index → Cached Results -``` - -### Smart Routing Strategy - -```javascript -// Priority: VSCode Bridge → Standalone LSP → Index-based -if (vscodeBridgeAvailable) { - return useVSCodeBridge(); -} else if (standaloneLSPAvailable) { - return useStandaloneLSP(); -} else { - return useIndexBased(); -} -``` - ---- - -## 🎯 Next Steps - -### Immediate Actions (Phase 1) - -1. **Test VSCode Bridge** - ```bash - cd ccw-vscode-bridge - npm install - npm run compile - # Press F5 in VSCode to launch extension - ``` - -2. **Test vscode_lsp Tool** - ```bash - # Start CCW MCP server - cd ccw - npm run mcp - - # Test via MCP client - { - "tool": "vscode_lsp", - "arguments": { - "action": "get_definition", - "file_path": "/path/to/file.ts", - "line": 10, - "character": 5 - } - } - ``` - -3. **Document Testing Results** - - Create test reports - - Benchmark latency - - Validate accuracy - -### Medium-Term Goals (Phase 2-3) - -1. **Implement Standalone LSP Server** - - Setup `codexlens-lsp` project structure - - Implement language server multiplexer - - Add core LSP handlers - -2. **Add Position Tolerance** - - Implement fuzzy position matching - - Test with AI-generated positions - -3. **Create Integration Tests** - - Unit tests for each component - - E2E tests with real language servers - - Performance benchmarks - -### Long-Term Goals (Phase 4-5) - -1. **MCP Context Enhancement** - - Integrate LSP results into MCP context - - Hook system for Claude Code - -2. **Advanced Features** - - Code actions - - Formatting - - Rename support - -3. **Production Deployment** - - Package VSCode extension to .vsix - - Publish to VS Code marketplace - - Create installation scripts - ---- - -## 📊 Project Status Matrix - -| Component | Status | Files | Tests | Docs | -|-----------|--------|-------|-------|------| -| VSCode Bridge Extension | ✅ Complete | 5/5 | ⏳ Pending | ✅ Complete | -| vscode_lsp MCP Tool | ✅ Complete | 1/1 | ⏳ Pending | ✅ Complete | -| Tool Registration | ✅ Complete | 1/1 | N/A | N/A | -| Planning Documents | ✅ Complete | 2/2 | N/A | ✅ Complete | -| Standalone LSP Server | 📝 Planned | 0/8 | 0/12 | ✅ Complete | -| Integration Tests | 📝 Planned | 0/3 | 0/15 | ⏳ Pending | - ---- - -## 🔧 Development Environment - -### Prerequisites - -**For VSCode Bridge**: -- Node.js ≥ 18 -- VSCode ≥ 1.80 -- TypeScript ≥ 5.0 - -**For Standalone LSP**: -- Python ≥ 3.8 -- pygls ≥ 1.3.0 -- Language servers: - - TypeScript: `npm i -g typescript-language-server` - - Python: `pip install python-lsp-server` - - Go: `go install golang.org/x/tools/gopls@latest` - - Rust: `rustup component add rust-analyzer` - -### Installation Commands - -```bash -# VSCode Bridge -cd ccw-vscode-bridge -npm install -npm run compile - -# CCW MCP (already setup) -cd ccw -npm install - -# Future: Standalone LSP -cd codex-lens -pip install -e ".[lsp]" -``` - ---- - -## 📖 Documentation Index - -| Document | Purpose | Status | -|----------|---------|--------| -| `REAL_LSP_SERVER_PLAN.md` | Complete implementation plan | ✅ | -| `LSP_INTEGRATION_PLAN.md` | Original integration strategy | ✅ | -| `MCP_ENDPOINT_DESIGN.md` | MCP endpoint specifications | ✅ | -| `IMPLEMENTATION_SUMMARY.md` | This document | ✅ | -| `ccw-vscode-bridge/README.md` | Bridge usage guide | ✅ | -| `TESTING_GUIDE.md` | Testing procedures | ⏳ TODO | -| `DEPLOYMENT_GUIDE.md` | Production deployment | ⏳ TODO | - ---- - -## 💡 Key Design Decisions - -### 1. Why Three Integration Paths? - -- **VSCode Bridge**: Easiest setup, leverages VSCode's built-in language servers -- **Standalone LSP**: IDE-agnostic, works with any LSP client -- **Index-based**: Fallback for offline or cached queries - -### 2. Why HTTP for VSCode Bridge? - -- ✅ Simplest cross-process communication -- ✅ No complex IPC/socket management -- ✅ Easy to debug with curl/Postman -- ✅ CORS support for web-based tools - -### 3. Why Port 3457? - -- Unique port unlikely to conflict -- Easy to remember (345-7) -- Same approach as cclsp (uses stdio) - -### 4. Why Not Modify smart_search? - -User feedback: -> "第一种跟当前的符号搜索没区别哎" -> (Method 1 has no difference from current symbol search) - -**Solution**: Implement real LSP server that connects to live language servers, not pre-indexed data. - ---- - -## 🚀 Quick Start Guide - -### Test VSCode Bridge Now - -1. **Install Extension**: - ```bash - cd ccw-vscode-bridge - npm install && npm run compile - code --install-extension . - ``` - -2. **Reload VSCode**: - - Press `Cmd+Shift+P` (Mac) or `Ctrl+Shift+P` (Windows) - - Type "Reload Window" - -3. **Verify Bridge is Running**: - ```bash - curl http://localhost:3457/get_definition \ - -X POST \ - -H "Content-Type: application/json" \ - -d '{"file_path":"/path/to/file.ts","line":10,"character":5}' - ``` - -4. **Test via CCW**: - ```javascript - // In Claude Code or MCP client - await executeTool('vscode_lsp', { - action: 'get_definition', - file_path: '/absolute/path/to/file.ts', - line: 10, - character: 5 - }); - ``` - ---- - -## 📞 Support & Troubleshooting - -### Common Issues - -**Issue**: "Could not connect to VSCode Bridge" -**Solution**: -1. Ensure VSCode is running -2. Check if extension is activated: `Cmd+Shift+P` → "CCW VSCode Bridge" -3. Verify port 3457 is not in use: `lsof -i :3457` - -**Issue**: "No LSP server available" -**Solution**: Open the file in VSCode workspace first - -**Issue**: "File not found" -**Solution**: Use absolute paths, not relative - ---- - -## 📝 Change Log - -### 2026-01-19 - Initial Implementation -- Created VSCode Bridge extension (5 files) -- Implemented vscode_lsp MCP tool -- Registered tool in CCW registry -- Completed planning documentation -- Added comprehensive architecture diagrams - ---- - -**Document End** diff --git a/codex-lens/docs/LLM_REMOVAL_SUMMARY.md b/codex-lens/docs/LLM_REMOVAL_SUMMARY.md deleted file mode 100644 index 30b090d0..00000000 --- a/codex-lens/docs/LLM_REMOVAL_SUMMARY.md +++ /dev/null @@ -1,342 +0,0 @@ -# LLM增强功能移除总结 - -**移除日期**: 2025-12-16 -**执行者**: 用户请求 -**状态**: ✅ 完成 - ---- - -## 📋 移除清单 - -### ✅ 已删除的源代码文件 - -| 文件 | 说明 | -|------|------| -| `src/codexlens/semantic/llm_enhancer.py` | LLM增强核心模块 (900+ lines) | - -### ✅ 已修改的源代码文件 - -| 文件 | 修改内容 | -|------|---------| -| `src/codexlens/cli/commands.py` | 删除 `enhance` 命令 (lines 1050-1227) | -| `src/codexlens/semantic/__init__.py` | 删除LLM相关导出 (lines 35-69) | - -### ✅ 已修改的前端文件(CCW Dashboard) - -| 文件 | 修改内容 | -|------|---------| -| `ccw/src/templates/dashboard-js/components/cli-status.js` | 删除LLM增强设置 (8行)、Semantic Settings Modal (615行)、Metadata Viewer (326行) | -| `ccw/src/templates/dashboard-js/i18n.js` | 删除英文LLM翻译 (26行)、中文LLM翻译 (26行) | -| `ccw/src/templates/dashboard-js/views/cli-manager.js` | 移除LLM badge和设置modal调用 (3行) | - -### ✅ 已删除的测试文件 - -| 文件 | 说明 | -|------|------| -| `tests/test_llm_enhancer.py` | LLM增强单元测试 | -| `tests/test_llm_enhanced_search.py` | LLM vs 纯向量对比测试 (550+ lines) | - -### ✅ 已删除的脚本文件 - -| 文件 | 说明 | -|------|------| -| `scripts/compare_search_methods.py` | 纯向量 vs LLM增强对比脚本 (460+ lines) | -| `scripts/test_misleading_comments.py` | 误导性注释测试脚本 (490+ lines) | -| `scripts/show_llm_analysis.py` | LLM分析展示工具 | -| `scripts/inspect_llm_summaries.py` | LLM摘要检查工具 | - -### ✅ 已删除的文档文件 - -| 文件 | 说明 | -|------|------| -| `docs/LLM_ENHANCED_SEARCH_GUIDE.md` | LLM增强使用指南 (460+ lines) | -| `docs/LLM_ENHANCEMENT_TEST_RESULTS.md` | LLM测试结果文档 | -| `docs/MISLEADING_COMMENTS_TEST_RESULTS.md` | 误导性注释测试结果 | -| `docs/CLI_INTEGRATION_SUMMARY.md` | CLI集成文档(包含enhance命令) | -| `docs/DOCSTRING_LLM_HYBRID_DESIGN.md` | Docstring与LLM混合策略设计 | - -### ✅ 已更新的文档 - -| 文件 | 修改内容 | -|------|---------| -| `docs/IMPLEMENTATION_SUMMARY.md` | 添加LLM移除说明,列出已删除内容 | - -### 📚 保留的设计文档(作为历史参考) - -| 文件 | 说明 | -|------|------| -| `docs/DESIGN_EVALUATION_REPORT.md` | 包含LLM混合策略的技术评估报告 | -| `docs/SEMANTIC_GRAPH_DESIGN.md` | 语义图谱设计(可能提及LLM) | -| `docs/MULTILEVEL_CHUNKER_DESIGN.md` | 多层次分词器设计(可能提及LLM) | - -*这些文档保留作为技术历史参考,不影响当前功能。* - ---- - -## 🔒 移除的功能 - -### CLI命令 - -```bash -# 已移除 - 不再可用 -codexlens enhance [PATH] --tool gemini --batch-size 5 - -# 说明:此命令用于通过CCW CLI调用Gemini/Qwen生成代码摘要 -# 移除原因:减少外部依赖,简化维护 -``` - -### Python API - -```python -# 已移除 - 不再可用 -from codexlens.semantic import ( - LLMEnhancer, - LLMConfig, - SemanticMetadata, - FileData, - EnhancedSemanticIndexer, - create_enhancer, - create_enhanced_indexer, -) - -# 移除的类和函数: -# - LLMEnhancer: LLM增强器主类 -# - LLMConfig: LLM配置类 -# - SemanticMetadata: 语义元数据结构 -# - FileData: 文件数据结构 -# - EnhancedSemanticIndexer: LLM增强索引器 -# - create_enhancer(): 创建增强器的工厂函数 -# - create_enhanced_indexer(): 创建增强索引器的工厂函数 -``` - ---- - -## ✅ 保留的功能 - -### 完全保留的核心功能 - -| 功能 | 状态 | -|------|------| -| **纯向量搜索** | ✅ 完整保留 | -| **语义嵌入生成** | ✅ 完整保留 (`codexlens embeddings-generate`) | -| **语义嵌入状态检查** | ✅ 完整保留 (`codexlens embeddings-status`) | -| **混合搜索引擎** | ✅ 完整保留(exact + fuzzy + vector) | -| **向量存储** | ✅ 完整保留 | -| **语义分块** | ✅ 完整保留 | -| **fastembed集成** | ✅ 完整保留 | - -### 可用的CLI命令 - -```bash -# 生成纯向量嵌入(无需LLM) -codexlens embeddings-generate [PATH] - -# 检查嵌入状态 -codexlens embeddings-status [PATH] - -# 所有搜索命令 -codexlens search [QUERY] --index [PATH] - -# 所有索引管理命令 -codexlens init [PATH] -codexlens update [PATH] -codexlens clean [PATH] -``` - -### 可用的Python API - -```python -# 完全可用 - 纯向量搜索 -from codexlens.semantic import SEMANTIC_AVAILABLE, SEMANTIC_BACKEND -from codexlens.semantic.embedder import Embedder -from codexlens.semantic.vector_store import VectorStore -from codexlens.semantic.chunker import Chunker, ChunkConfig -from codexlens.search.hybrid_search import HybridSearchEngine - -# 示例:纯向量搜索 -engine = HybridSearchEngine() -results = engine.search( - index_path, - query="your search query", - enable_vector=True, - pure_vector=True, # 纯向量模式 -) -``` - ---- - -## 🎯 移除原因 - -### 1. 简化依赖 - -**移除的外部依赖**: -- CCW CLI (npm package) -- Gemini API (需要API密钥) -- Qwen API (可选) - -**保留的依赖**: -- fastembed (ONNX-based,轻量级) -- numpy -- Python标准库 - -### 2. 减少复杂性 - -- **前**: 两种搜索方式(纯向量 + LLM增强) -- **后**: 一种搜索方式(纯向量) -- 移除了900+ lines的LLM增强代码 -- 移除了CLI命令和相关配置 -- 移除了测试和文档 - -### 3. 性能考虑 - -| 方面 | LLM增强 | 纯向量 | -|------|---------|--------| -| **索引速度** | 慢75倍 | 基准 | -| **查询速度** | 相同 | 相同 | -| **准确率** | 相同* | 基准 | -| **成本** | API费用 | 免费 | - -*在测试数据集上准确率相同(5/5),但LLM增强理论上在更复杂场景下可能更好 - -### 4. 维护负担 - -**移除前**: -- 需要维护CCW CLI集成 -- 需要处理API限流和错误 -- 需要测试多个LLM后端 -- 需要维护批处理逻辑 - -**移除后**: -- 单一嵌入引擎(fastembed) -- 无外部API依赖 -- 更简单的错误处理 -- 更容易测试 - ---- - -## 🔍 验证结果 - -### 导入测试 - -```bash -# ✅ 通过 - 语义模块正常 -python -c "from codexlens.semantic import SEMANTIC_AVAILABLE; print(SEMANTIC_AVAILABLE)" -# Output: True - -# ✅ 通过 - 搜索引擎正常 -python -c "from codexlens.search.hybrid_search import HybridSearchEngine; print('OK')" -# Output: OK -``` - -### 代码清洁度验证 - -```bash -# ✅ 通过 - 无遗留LLM引用 -grep -r "llm_enhancer\|LLMEnhancer\|LLMConfig" src/ --include="*.py" -# Output: (空) -``` - -### 测试结果 - -```bash -# ✅ 5/7通过 - 纯向量搜索基本功能正常 -pytest tests/test_pure_vector_search.py -v -# 通过: 5个基本测试 -# 失败: 2个嵌入测试(已知的模型维度不匹配问题,与LLM移除无关) -``` - ---- - -## 📊 统计 - -### 代码删除统计 - -| 类型 | 删除文件数 | 删除行数(估计) | -|------|-----------|-----------------| -| **源代码** | 1 | ~900 lines | -| **CLI命令** | 1 command | ~180 lines | -| **导出清理** | 1 section | ~35 lines | -| **前端代码** | 3 files | ~1000 lines | -| **测试文件** | 2 | ~600 lines | -| **脚本工具** | 4 | ~1500 lines | -| **文档** | 5 | ~2000 lines | -| **总计** | 16 files/sections | ~6200 lines | - -### 依赖简化 - -| 方面 | 移除前 | 移除后 | -|------|--------|--------| -| **外部工具依赖** | CCW CLI, Gemini/Qwen | 无 | -| **Python包依赖** | fastembed, numpy | fastembed, numpy | -| **API依赖** | Gemini/Qwen API | 无 | -| **配置复杂度** | 高(tool, batch_size, API keys) | 低(model profile) | - ---- - -## 🚀 后续建议 - -### 如果需要LLM增强功能 - -1. **从git历史恢复** - ```bash - # 查看删除前的提交 - git log --all --full-history -- "*llm_enhancer*" - - # 恢复特定文件 - git checkout -- src/codexlens/semantic/llm_enhancer.py - ``` - -2. **或使用外部工具** - - 在索引前使用独立脚本生成摘要 - - 将摘要作为注释添加到代码中 - - 然后使用纯向量索引(会包含摘要) - -3. **或考虑轻量级替代方案** - - 使用本地小模型(llama.cpp, ggml) - - 使用docstring提取(无需LLM) - - 使用静态分析生成摘要 - -### 代码库维护建议 - -1. ✅ **保持简单** - 继续使用纯向量搜索 -2. ✅ **优化现有功能** - 改进向量搜索准确性 -3. ✅ **增量改进** - 优化分块策略和嵌入质量 -4. ⚠️ **避免重复** - 如需LLM,先评估是否真正必要 - ---- - -## 📝 文件清单 - -### 删除的文件完整列表 - -``` -src/codexlens/semantic/llm_enhancer.py -tests/test_llm_enhancer.py -tests/test_llm_enhanced_search.py -scripts/compare_search_methods.py -scripts/test_misleading_comments.py -scripts/show_llm_analysis.py -scripts/inspect_llm_summaries.py -docs/LLM_ENHANCED_SEARCH_GUIDE.md -docs/LLM_ENHANCEMENT_TEST_RESULTS.md -docs/MISLEADING_COMMENTS_TEST_RESULTS.md -docs/CLI_INTEGRATION_SUMMARY.md -docs/DOCSTRING_LLM_HYBRID_DESIGN.md -``` - -### 修改的文件 - -``` -src/codexlens/cli/commands.py (删除enhance命令) -src/codexlens/semantic/__init__.py (删除LLM导出) -ccw/src/templates/dashboard-js/components/cli-status.js (删除LLM配置、Settings Modal、Metadata Viewer) -ccw/src/templates/dashboard-js/i18n.js (删除LLM翻译字符串) -ccw/src/templates/dashboard-js/views/cli-manager.js (移除LLM badge和modal调用) -docs/IMPLEMENTATION_SUMMARY.md (添加移除说明) -``` - ---- - -**移除完成时间**: 2025-12-16 -**文档版本**: 1.0 -**验证状态**: ✅ 通过 diff --git a/codex-lens/docs/LSP_INTEGRATION_CHECKLIST.md b/codex-lens/docs/LSP_INTEGRATION_CHECKLIST.md deleted file mode 100644 index 838d5b3a..00000000 --- a/codex-lens/docs/LSP_INTEGRATION_CHECKLIST.md +++ /dev/null @@ -1,316 +0,0 @@ -# codex-lens LSP Integration Execution Checklist - -> Generated: 2026-01-15 -> Based on: Gemini multi-round deep analysis -> Status: Ready for implementation - ---- - -## Phase 1: LSP Server Foundation (Priority: HIGH) - -### 1.1 Create LSP Server Entry Point -- [ ] **Install pygls dependency** - ```bash - pip install pygls - ``` -- [ ] **Create `src/codexlens/lsp/__init__.py`** - - Export: `CodexLensServer`, `start_server` -- [ ] **Create `src/codexlens/lsp/server.py`** - - Class: `CodexLensServer(LanguageServer)` - - Initialize: `ChainSearchEngine`, `GlobalSymbolIndex`, `WatcherManager` - - Lifecycle: Start `WatcherManager` on `initialize` request - -### 1.2 Implement Core LSP Handlers -- [ ] **`textDocument/definition`** handler - - Source: `GlobalSymbolIndex.search()` exact match - - Reference: `storage/global_index.py:173` - - Return: `Location(uri, Range)` - -- [ ] **`textDocument/completion`** handler - - Source: `GlobalSymbolIndex.search(prefix_mode=True)` - - Reference: `storage/global_index.py:173` - - Return: `CompletionItem[]` - -- [ ] **`workspace/symbol`** handler - - Source: `ChainSearchEngine.search_symbols()` - - Reference: `search/chain_search.py:618` - - Return: `SymbolInformation[]` - -### 1.3 Wire File Watcher to LSP Events -- [ ] **`workspace/didChangeWatchedFiles`** handler - - Delegate to: `WatcherManager.process_changes()` - - Reference: `watcher/manager.py:53` - -- [ ] **`textDocument/didSave`** handler - - Trigger: `IncrementalIndexer` for single file - - Reference: `watcher/incremental_indexer.py` - -### 1.4 Deliverables -- [ ] Unit tests for LSP handlers -- [ ] Integration test: definition lookup -- [ ] Integration test: completion prefix search -- [ ] Benchmark: query latency < 50ms - ---- - -## Phase 2: Find References Implementation (Priority: MEDIUM) - -### 2.1 Create `search_references` Method -- [ ] **Add to `src/codexlens/search/chain_search.py`** - ```python - def search_references( - self, - symbol_name: str, - source_path: Path, - depth: int = -1 - ) -> List[ReferenceResult]: - """Find all references to a symbol across the project.""" - ``` - -### 2.2 Implement Parallel Query Orchestration -- [ ] **Collect index paths** - - Use: `_collect_index_paths()` existing method - -- [ ] **Parallel query execution** - - ThreadPoolExecutor across all `_index.db` - - SQL: `SELECT * FROM code_relationships WHERE target_qualified_name = ?` - - Reference: `storage/sqlite_store.py:348` - -- [ ] **Result aggregation** - - Deduplicate by file:line - - Sort by file path, then line number - -### 2.3 LSP Handler -- [ ] **`textDocument/references`** handler - - Call: `ChainSearchEngine.search_references()` - - Return: `Location[]` - -### 2.4 Deliverables -- [ ] Unit test: single-index reference lookup -- [ ] Integration test: cross-directory references -- [ ] Benchmark: < 200ms for 10+ index files - ---- - -## Phase 3: Enhanced Hover Information (Priority: MEDIUM) - -### 3.1 Implement Hover Data Extraction -- [ ] **Create `src/codexlens/lsp/hover_provider.py`** - ```python - class HoverProvider: - def get_hover_info(self, symbol: Symbol) -> HoverInfo: - """Extract hover information for a symbol.""" - ``` - -### 3.2 Data Sources -- [ ] **Symbol metadata** - - Source: `GlobalSymbolIndex.search()` - - Fields: `kind`, `name`, `file_path`, `range` - -- [ ] **Source code extraction** - - Source: `SQLiteStore.files` table - - Reference: `storage/sqlite_store.py:284` - - Extract: Lines from `range[0]` to `range[1]` - -### 3.3 LSP Handler -- [ ] **`textDocument/hover`** handler - - Return: `Hover(contents=MarkupContent)` - - Format: Markdown with code fence - -### 3.4 Deliverables -- [ ] Unit test: hover for function/class/variable -- [ ] Integration test: multi-line function signature - ---- - -## Phase 4: MCP Bridge for Claude Code (Priority: HIGH VALUE) - -### 4.1 Define MCP Schema -- [ ] **Create `src/codexlens/mcp/__init__.py`** -- [ ] **Create `src/codexlens/mcp/schema.py`** - ```python - @dataclass - class MCPContext: - version: str = "1.0" - context_type: str - symbol: Optional[SymbolInfo] - definition: Optional[str] - references: List[ReferenceInfo] - related_symbols: List[SymbolInfo] - ``` - -### 4.2 Create MCP Provider -- [ ] **Create `src/codexlens/mcp/provider.py`** - ```python - class MCPProvider: - def build_context( - self, - symbol_name: str, - context_type: str = "symbol_explanation" - ) -> MCPContext: - """Build structured context for LLM consumption.""" - ``` - -### 4.3 Context Building Logic -- [ ] **Symbol lookup** - - Use: `GlobalSymbolIndex.search()` - -- [ ] **Definition extraction** - - Use: `SQLiteStore` file content - -- [ ] **References collection** - - Use: `ChainSearchEngine.search_references()` - -- [ ] **Related symbols** - - Use: `code_relationships` for imports/calls - -### 4.4 Hook Integration Points -- [ ] **Document `pre-tool` hook interface** - ```python - def pre_tool_hook(action: str, params: dict) -> MCPContext: - """Called before LLM action to gather context.""" - ``` - -- [ ] **Document `post-tool` hook interface** - ```python - def post_tool_hook(action: str, result: Any) -> None: - """Called after LSP action for proactive caching.""" - ``` - -### 4.5 Deliverables -- [ ] MCP schema JSON documentation -- [ ] Unit test: context building -- [ ] Integration test: hook → MCP → JSON output - ---- - -## Phase 5: Advanced Features (Priority: LOW) - -### 5.1 Custom LSP Commands -- [ ] **`codexlens/hybridSearch`** - - Expose: `HybridSearchEngine.search()` - - Reference: `search/hybrid_search.py` - -- [ ] **`codexlens/symbolGraph`** - - Return: Symbol relationship graph - - Source: `code_relationships` table - -### 5.2 Proactive Context Caching -- [ ] **Implement `post-tool` hook caching** - - After `go-to-definition`: pre-fetch references - - Cache TTL: 5 minutes - - Storage: In-memory LRU - -### 5.3 Performance Optimizations -- [ ] **Connection pooling** - - Reference: `storage/sqlite_store.py` thread-local - -- [ ] **Result caching** - - LRU cache for frequent queries - - Invalidate on file change - ---- - -## File Structure After Implementation - -``` -src/codexlens/ -├── lsp/ # NEW -│ ├── __init__.py -│ ├── server.py # Main LSP server -│ ├── handlers.py # LSP request handlers -│ ├── hover_provider.py # Hover information -│ └── utils.py # LSP utilities -│ -├── mcp/ # NEW -│ ├── __init__.py -│ ├── schema.py # MCP data models -│ ├── provider.py # Context builder -│ └── hooks.py # Hook interfaces -│ -├── search/ -│ ├── chain_search.py # MODIFY: add search_references() -│ └── ... -│ -└── ... -``` - ---- - -## Dependencies to Add - -```toml -# pyproject.toml -[project.optional-dependencies] -lsp = [ - "pygls>=1.3.0", -] -``` - ---- - -## Testing Strategy - -### Unit Tests -``` -tests/ -├── lsp/ -│ ├── test_definition.py -│ ├── test_completion.py -│ ├── test_references.py -│ └── test_hover.py -│ -└── mcp/ - ├── test_schema.py - └── test_provider.py -``` - -### Integration Tests -- [ ] Full LSP handshake test -- [ ] Multi-file project navigation -- [ ] Incremental index update via didSave - -### Performance Benchmarks -| Operation | Target | Acceptable | -|-----------|--------|------------| -| Definition lookup | < 30ms | < 50ms | -| Completion (100 items) | < 50ms | < 100ms | -| Find references (10 files) | < 150ms | < 200ms | -| Initial indexing (1000 files) | < 60s | < 120s | - ---- - -## Execution Order - -``` -Week 1: Phase 1.1 → 1.2 → 1.3 → 1.4 -Week 2: Phase 2.1 → 2.2 → 2.3 → 2.4 -Week 3: Phase 3 + Phase 4.1 → 4.2 -Week 4: Phase 4.3 → 4.4 → 4.5 -Week 5: Phase 5 (optional) + Polish -``` - ---- - -## Quick Start Commands - -```bash -# Install LSP dependencies -pip install pygls - -# Run LSP server (after implementation) -python -m codexlens.lsp --stdio - -# Test LSP connection -echo '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{}}' | python -m codexlens.lsp --stdio -``` - ---- - -## Reference Links - -- pygls Documentation: https://pygls.readthedocs.io/ -- LSP Specification: https://microsoft.github.io/language-server-protocol/ -- codex-lens GlobalSymbolIndex: `storage/global_index.py:173` -- codex-lens ChainSearchEngine: `search/chain_search.py:618` -- codex-lens WatcherManager: `watcher/manager.py:53` diff --git a/codex-lens/docs/LSP_INTEGRATION_PLAN.md b/codex-lens/docs/LSP_INTEGRATION_PLAN.md deleted file mode 100644 index 764ec3cd..00000000 --- a/codex-lens/docs/LSP_INTEGRATION_PLAN.md +++ /dev/null @@ -1,2588 +0,0 @@ -# codex-lens LSP Integration - Complete Execution Plan - -> Version: 1.0 -> Created: 2026-01-15 -> Based on: Gemini Multi-Round Deep Analysis -> Status: Ready for Execution - ---- - -## Table of Contents - -1. [Executive Summary](#1-executive-summary) -2. [Claude Code LSP Implementation Reference](#2-claude-code-lsp-implementation-reference) -3. [Architecture Overview](#3-architecture-overview) -4. [Phase 1: LSP Server Foundation](#4-phase-1-lsp-server-foundation) -5. [Phase 2: Find References](#5-phase-2-find-references) -6. [Phase 3: Hover Information](#6-phase-3-hover-information) -7. [Phase 4: MCP Bridge](#7-phase-4-mcp-bridge) -8. [Phase 5: Advanced Features](#8-phase-5-advanced-features) -9. [Testing Strategy](#9-testing-strategy) -10. [Deployment Guide](#10-deployment-guide) -11. [Risk Mitigation](#11-risk-mitigation) - ---- - -## 1. Executive Summary - -### 1.1 Project Goal - -将 codex-lens 的代码索引和搜索能力通过 LSP (Language Server Protocol) 暴露,使其能够: -- 为 IDE/编辑器提供代码导航功能 -- 与 Claude Code 的 hook 系统集成 -- 通过 MCP (Model Context Protocol) 为 LLM 提供结构化代码上下文 - -### 1.2 Value Proposition - -| Capability | Before | After | -|------------|--------|-------| -| Code Navigation | CLI only | IDE integration via LSP | -| Context for LLM | Manual copy-paste | Automated MCP injection | -| Real-time Updates | Batch re-index | Incremental on save | -| Cross-project Search | Per-directory | Unified global index | - -### 1.3 Success Criteria - -- [ ] All 5 core LSP methods implemented and tested -- [ ] Query latency < 100ms for 95th percentile -- [ ] MCP context generation working with Claude Code hooks -- [ ] Documentation and examples complete - ---- - -## 2. Claude Code LSP Implementation Reference - -> 本章节记录 Claude Code 当前 LSP 实现方式,作为 codex-lens 集成的技术参考。 - -### 2.1 Claude Code LSP 实现方式概览 - -Claude Code 实现 LSP 功能有 **三种方式**: - -| 方式 | 描述 | 适用场景 | -|------|------|----------| -| **内置 LSP 工具** | v2.0.74+ 原生支持 | 快速启用,基础功能 | -| **MCP Server (cclsp)** | 第三方 MCP 桥接 | 高级功能,位置容错 | -| **Plugin Marketplace** | 插件市场安装 | 多语言扩展支持 | - -### 2.2 方式一:内置 LSP 工具 (v2.0.74+) - -Claude Code 从 v2.0.74 版本开始内置 LSP 支持。 - -#### 启用方式 - -```bash -# 设置环境变量启用 LSP -export ENABLE_LSP_TOOL=1 -claude - -# 永久启用 (添加到 shell 配置) -echo 'export ENABLE_LSP_TOOL=1' >> ~/.bashrc -``` - -#### 内置 LSP 工具清单 - -| 工具名 | 功能 | 对应 LSP 方法 | 性能 | -|--------|------|---------------|------| -| `goToDefinition` | 跳转到符号定义 | `textDocument/definition` | ~50ms | -| `findReferences` | 查找所有引用 | `textDocument/references` | ~100ms | -| `documentSymbol` | 获取文件符号结构 | `textDocument/documentSymbol` | ~30ms | -| `hover` | 显示类型签名和文档 | `textDocument/hover` | ~50ms | -| `getDiagnostics` | 获取诊断信息 | `textDocument/diagnostic` | ~100ms | - -#### 性能对比 - -``` -传统文本搜索: ~45,000ms (45秒) -LSP 语义搜索: ~50ms -性能提升: 约 900 倍 -``` - -#### 当前限制 - -- 部分语言返回 "No LSP server available" -- 需要额外安装语言服务器插件 -- 不支持重命名等高级操作 - -### 2.3 方式二:MCP Server 方式 (cclsp) - -[cclsp](https://github.com/ktnyt/cclsp) 是一个 MCP Server,将 LSP 能力暴露给 Claude Code。 - -#### 架构图 - -``` -┌─────────────────────────────────────────────────────────────────┐ -│ Claude Code │ -│ (MCP Client) │ -└───────────────────────────┬─────────────────────────────────────┘ - │ - │ MCP Protocol (JSON-RPC over stdio) - │ -┌───────────────────────────▼─────────────────────────────────────┐ -│ cclsp │ -│ (MCP Server) │ -│ ┌─────────────────────────────────────────────────────────┐ │ -│ │ Position Tolerance Layer │ │ -│ │ (自动尝试多个位置组合,解决 AI 行号不精确问题) │ │ -│ └─────────────────────────────────────────────────────────┘ │ -│ │ │ -│ ┌──────────────────┼──────────────────┐ │ -│ │ │ │ │ -│ ▼ ▼ ▼ │ -│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ -│ │ pylsp │ │ gopls │ │rust-analyzer│ │ -│ │ (Python) │ │ (Go) │ │ (Rust) │ │ -│ └─────────────┘ └─────────────┘ └─────────────┘ │ -└─────────────────────────────────────────────────────────────────┘ -``` - -#### 安装与配置 - -```bash -# 一次性运行 (无需安装) -npx cclsp@latest setup - -# 用户级配置 -npx cclsp@latest setup --user -``` - -#### 配置文件格式 - -**位置**: `.claude/cclsp.json` 或 `~/.config/claude/cclsp.json` - -```json -{ - "servers": [ - { - "extensions": ["py", "pyi"], - "command": ["pylsp"], - "rootDir": ".", - "restartInterval": 5, - "initializationOptions": {} - }, - { - "extensions": ["ts", "tsx", "js", "jsx"], - "command": ["typescript-language-server", "--stdio"], - "rootDir": "." - }, - { - "extensions": ["go"], - "command": ["gopls"], - "rootDir": "." - }, - { - "extensions": ["rs"], - "command": ["rust-analyzer"], - "rootDir": "." - } - ] -} -``` - -#### cclsp 暴露的 MCP 工具 - -| MCP 工具 | 功能 | 特性 | -|----------|------|------| -| `find_definition` | 按名称和类型查找定义 | 支持模糊匹配 | -| `find_references` | 查找所有引用位置 | 跨文件搜索 | -| `rename_symbol` | 重命名符号 | 创建 .bak 备份 | -| `rename_symbol_strict` | 精确位置重命名 | 处理同名歧义 | -| `get_diagnostics` | 获取诊断信息 | 错误/警告/提示 | -| `restart_server` | 重启 LSP 服务器 | 解决内存泄漏 | - -#### 核心特性:位置容错 - -```python -# AI 生成的代码位置常有偏差 -# cclsp 自动尝试多个位置组合 - -positions_to_try = [ - (line, column), # 原始位置 - (line - 1, column), # 上一行 - (line + 1, column), # 下一行 - (line, 0), # 行首 - (line, len(line_content)) # 行尾 -] - -for pos in positions_to_try: - result = lsp_server.definition(pos) - if result: - return result -``` - -#### 支持的语言服务器 - -| 语言 | 服务器 | 安装命令 | -|------|--------|----------| -| Python | pylsp | `pip install python-lsp-server` | -| TypeScript | typescript-language-server | `npm i -g typescript-language-server` | -| Go | gopls | `go install golang.org/x/tools/gopls@latest` | -| Rust | rust-analyzer | `rustup component add rust-analyzer` | -| C/C++ | clangd | `apt install clangd` | -| Ruby | solargraph | `gem install solargraph` | -| PHP | intelephense | `npm i -g intelephense` | -| Java | jdtls | Eclipse JDT Language Server | - -### 2.4 方式三:Plugin Marketplace 插件 - -Claude Code 官方插件市场提供语言支持扩展。 - -#### 添加插件市场 - -```bash -/plugin marketplace add boostvolt/claude-code-lsps -``` - -#### 安装语言支持 - -```bash -# Python (Pyright) -/plugin install pyright@claude-code-lsps - -# TypeScript/JavaScript -/plugin install vtsls@claude-code-lsps - -# Go -/plugin install gopls@claude-code-lsps - -# Rust -/plugin install rust-analyzer@claude-code-lsps - -# Java -/plugin install jdtls@claude-code-lsps - -# C/C++ -/plugin install clangd@claude-code-lsps - -# C# -/plugin install omnisharp@claude-code-lsps - -# PHP -/plugin install intelephense@claude-code-lsps - -# Kotlin -/plugin install kotlin-language-server@claude-code-lsps - -# Ruby -/plugin install solargraph@claude-code-lsps -``` - -#### 支持的 11 种语言 - -Python, TypeScript, Go, Rust, Java, C/C++, C#, PHP, Kotlin, Ruby, HTML/CSS - -### 2.5 三种方式对比 - -| 特性 | 内置 LSP | cclsp (MCP) | Plugin Marketplace | -|------|----------|-------------|-------------------| -| 安装复杂度 | 低 (环境变量) | 中 (npx) | 低 (/plugin) | -| 功能完整性 | 基础 5 个操作 | 完整 + 重命名 | 完整 | -| 位置容错 | 无 | 有 | 无 | -| 重命名支持 | 无 | 有 | 有 | -| 自定义配置 | 无 | 完整 JSON | 有限 | -| 多语言支持 | 需插件 | 任意 LSP | 11 种 | -| 生产稳定性 | 高 | 中 | 高 | - -### 2.6 codex-lens 集成策略 - -基于 Claude Code LSP 实现方式分析,推荐以下集成策略: - -#### 策略 A:作为 MCP Server (推荐) - -``` -┌─────────────────────────────────────────────────────────────────┐ -│ Claude Code │ -└───────────────────────────┬─────────────────────────────────────┘ - │ MCP Protocol - ▼ -┌─────────────────────────────────────────────────────────────────┐ -│ codex-lens MCP Server │ -│ ┌─────────────────────────────────────────────────────────┐ │ -│ │ MCP Tools │ │ -│ │ • find_definition → GlobalSymbolIndex.search() │ │ -│ │ • find_references → ChainSearchEngine.search_refs() │ │ -│ │ • get_context → MCPProvider.build_context() │ │ -│ │ • hybrid_search → HybridSearchEngine.search() │ │ -│ └─────────────────────────────────────────────────────────┘ │ -│ │ │ -│ ┌─────────────────────────▼───────────────────────────────┐ │ -│ │ codex-lens Core │ │ -│ │ GlobalSymbolIndex │ SQLiteStore │ WatcherManager │ │ -│ └─────────────────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────┘ -``` - -**优势**: -- 直接复用 codex-lens 索引 -- 无需启动额外 LSP 进程 -- 支持 MCP 上下文注入 - -**实现文件**: `src/codexlens/mcp/server.py` - -```python -"""codex-lens MCP Server for Claude Code integration.""" - -import json -import sys -from typing import Any, Dict - -from codexlens.mcp.provider import MCPProvider -from codexlens.search.chain_search import ChainSearchEngine -from codexlens.storage.global_index import GlobalSymbolIndex - - -class CodexLensMCPServer: - """MCP Server exposing codex-lens capabilities.""" - - def __init__(self, workspace_path: str): - self.global_index = GlobalSymbolIndex(workspace_path) - self.search_engine = ChainSearchEngine(...) - self.mcp_provider = MCPProvider(...) - - def handle_request(self, request: Dict[str, Any]) -> Dict[str, Any]: - """Handle MCP tool call.""" - method = request.get("method") - params = request.get("params", {}) - - handlers = { - "find_definition": self._find_definition, - "find_references": self._find_references, - "get_context": self._get_context, - "hybrid_search": self._hybrid_search, - } - - handler = handlers.get(method) - if handler: - return handler(params) - return {"error": f"Unknown method: {method}"} - - def _find_definition(self, params: Dict) -> Dict: - """Find symbol definition.""" - symbol_name = params.get("symbol") - symbols = self.global_index.search(symbol_name, exact=True, limit=1) - if symbols: - s = symbols[0] - return { - "file": s.file_path, - "line": s.range[0], - "column": 0, - "kind": s.kind, - } - return {"error": "Symbol not found"} - - def _find_references(self, params: Dict) -> Dict: - """Find all references.""" - symbol_name = params.get("symbol") - refs = self.search_engine.search_references(symbol_name) - return { - "references": [ - {"file": r.file_path, "line": r.line, "context": r.context} - for r in refs - ] - } - - def _get_context(self, params: Dict) -> Dict: - """Get MCP context for LLM.""" - symbol_name = params.get("symbol") - context = self.mcp_provider.build_context(symbol_name) - return context.to_dict() if context else {"error": "Context not found"} - - def _hybrid_search(self, params: Dict) -> Dict: - """Execute hybrid search.""" - query = params.get("query") - # ... implementation -``` - -#### 策略 B:作为独立 LSP Server - -通过 cclsp 配置接入 codex-lens LSP Server。 - -**cclsp 配置** (`.claude/cclsp.json`): - -```json -{ - "servers": [ - { - "extensions": ["py", "ts", "go", "rs", "java"], - "command": ["codexlens-lsp", "--stdio"], - "rootDir": ".", - "restartInterval": 0 - } - ] -} -``` - -**优势**: -- 兼容标准 LSP 协议 -- 可被任意 LSP 客户端使用 -- cclsp 提供位置容错 - -#### 策略 C:混合模式 (最佳实践) - -``` -┌───────────────────────────────────────────────────────────────────┐ -│ Claude Code │ -│ ┌──────────────────┐ ┌──────────────────────────┐ │ -│ │ 内置 LSP 工具 │ │ MCP Client │ │ -│ │ (基础导航) │ │ (上下文注入) │ │ -│ └────────┬─────────┘ └────────────┬─────────────┘ │ -└───────────┼───────────────────────────────────┼──────────────────┘ - │ │ - │ LSP Protocol │ MCP Protocol - │ │ -┌───────────▼───────────────────────────────────▼──────────────────┐ -│ codex-lens Unified Server │ -│ ┌─────────────────────────┐ ┌─────────────────────────────┐ │ -│ │ LSP Handlers │ │ MCP Handlers │ │ -│ │ • definition │ │ • get_context │ │ -│ │ • references │ │ • enrich_prompt │ │ -│ │ • hover │ │ • hybrid_search │ │ -│ │ • completion │ │ • semantic_query │ │ -│ └────────────┬────────────┘ └──────────────┬──────────────┘ │ -│ │ │ │ -│ └───────────────┬───────────────┘ │ -│ ▼ │ -│ ┌─────────────────────────────────────────────────────────────┐ │ -│ │ codex-lens Core │ │ -│ │ GlobalSymbolIndex │ HybridSearch │ VectorStore │ Watcher │ │ -│ └─────────────────────────────────────────────────────────────┘ │ -└──────────────────────────────────────────────────────────────────┘ -``` - -**优势**: -- LSP 提供标准代码导航 -- MCP 提供 LLM 上下文增强 -- 统一索引,避免重复计算 - -### 2.7 参考资源 - -| 资源 | 链接 | -|------|------| -| Claude Code LSP 设置指南 | https://www.aifreeapi.com/en/posts/claude-code-lsp | -| cclsp GitHub | https://github.com/ktnyt/cclsp | -| Claude Code Plugins | https://code.claude.com/docs/en/plugins-reference | -| claude-code-lsps 市场 | https://github.com/Piebald-AI/claude-code-lsps | -| LSP 规范 | https://microsoft.github.io/language-server-protocol/ | -| MCP 规范 | https://modelcontextprotocol.io/ | - ---- - -## 3. Architecture Overview - -### 3.1 Target Architecture - -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ Client Layer │ -├─────────────────────────────────────────────────────────────────────────────┤ -│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────┐ │ -│ │ VS Code │ │ Neovim │ │ Sublime │ │ Claude Code │ │ -│ │ (LSP Client)│ │ (LSP Client)│ │ (LSP Client)│ │ (Hook + MCP Client) │ │ -│ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ └──────────┬──────────┘ │ -│ │ │ │ │ │ -└─────────┼────────────────┼────────────────┼─────────────────────┼───────────┘ - │ │ │ │ - └────────────────┴────────────────┴──────────┬──────────┘ - │ - (JSON-RPC / stdio) - │ -┌──────────────────────────────────────────────────────┴──────────────────────┐ -│ codex-lens LSP Server │ -├─────────────────────────────────────────────────────────────────────────────┤ -│ │ -│ ┌─────────────────────────────────────────────────────────────────────┐ │ -│ │ LSP Layer (NEW) │ │ -│ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ │ -│ │ │ Handlers │ │ Providers │ │ Protocol │ │ │ -│ │ │ definition │ │ hover │ │ messages │ │ │ -│ │ │ references │ │ completion │ │ lifecycle │ │ │ -│ │ │ symbols │ │ │ │ │ │ │ -│ │ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ │ │ -│ └─────────┼─────────────────┼─────────────────┼───────────────────────┘ │ -│ │ │ │ │ -│ ┌─────────┴─────────────────┴─────────────────┴───────────────────────┐ │ -│ │ MCP Layer (NEW) │ │ -│ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ │ -│ │ │ Schema │ │ Provider │ │ Hooks │ │ │ -│ │ │ MCPContext │ │ buildContext │ │ pre-tool │ │ │ -│ │ │ SymbolInfo │ │ enrichPrompt │ │ post-tool │ │ │ -│ │ └──────────────┘ └──────────────┘ └──────────────┘ │ │ -│ └─────────────────────────────────────────────────────────────────────┘ │ -│ │ │ -│ ┌─────────────────────────────────┴───────────────────────────────────┐ │ -│ │ Existing codex-lens Core │ │ -│ │ │ │ -│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌────────────┐ │ │ -│ │ │ Search │ │ Storage │ │ Watcher │ │ Parser │ │ │ -│ │ │ ChainSearch │ │ GlobalIndex │ │ Manager │ │ TreeSitter │ │ │ -│ │ │ HybridSearch│ │ SQLiteStore │ │ Incremental │ │ Symbols │ │ │ -│ │ └─────────────┘ └─────────────┘ └─────────────┘ └────────────┘ │ │ -│ └─────────────────────────────────────────────────────────────────────┘ │ -│ │ -└─────────────────────────────────────────────────────────────────────────────┘ -``` - -### 3.2 Data Flow - -``` - LSP Request Flow - ================ - -[Client] ─── textDocument/definition ───> [LSP Server] - │ - v - ┌─────────────────┐ - │ Parse Request │ - │ Extract symbol │ - └────────┬────────┘ - │ - v - ┌─────────────────┐ - │ GlobalSymbolIdx │ - │ .search() │ - └────────┬────────┘ - │ - v - ┌─────────────────┐ - │ Format Result │ - │ as Location │ - └────────┬────────┘ - │ -[Client] <─── Location Response ────────────────┘ - - - MCP Context Flow - ================ - -[Claude Code] ─── pre-tool hook ───> [MCP Provider] - │ - ┌─────────────────────┴─────────────────────┐ - v v v - ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ - │ Definition │ │ References │ │ Related │ - │ Lookup │ │ Lookup │ │ Symbols │ - └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ - │ │ │ - └─────────────────────┴─────────────────────┘ - │ - v - ┌───────────────┐ - │ MCPContext │ - │ Object │ - └───────┬───────┘ - │ -[Claude Code] <─── JSON Context ──────────┘ - │ - v - ┌───────────────────────┐ - │ Inject into LLM Prompt│ - └───────────────────────┘ -``` - -### 3.3 Module Dependencies - -``` - ┌─────────────────────┐ - │ lsp/server.py │ - │ (Entry Point) │ - └──────────┬──────────┘ - │ - ┌───────────────────┼───────────────────┐ - │ │ │ - v v v - ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ - │lsp/handlers │ │lsp/providers│ │ mcp/provider│ - │ .py │ │ .py │ │ .py │ - └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ - │ │ │ - └───────────────────┼───────────────────┘ - │ - v - ┌─────────────────────┐ - │ search/chain_search │ - │ .py │ - └──────────┬──────────┘ - │ - ┌───────────────────┼───────────────────┐ - │ │ │ - v v v - ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ - │storage/ │ │storage/ │ │watcher/ │ - │global_index │ │sqlite_store │ │manager.py │ - └─────────────┘ └─────────────┘ └─────────────┘ -``` - ---- - -## 4. Phase 1: LSP Server Foundation - -### 4.1 Overview - -| Attribute | Value | -|-----------|-------| -| Priority | HIGH | -| Complexity | Medium | -| Dependencies | pygls library | -| Deliverables | Working LSP server with 3 core handlers | - -### 4.2 Task Breakdown - -#### Task 1.1: Project Setup - -**File**: `pyproject.toml` (MODIFY) - -```toml -[project.optional-dependencies] -lsp = [ - "pygls>=1.3.0", -] - -[project.scripts] -codexlens-lsp = "codexlens.lsp:main" -``` - -**Acceptance Criteria**: -- [ ] `pip install -e ".[lsp]"` succeeds -- [ ] `codexlens-lsp --help` shows usage - ---- - -#### Task 1.2: LSP Server Core - -**File**: `src/codexlens/lsp/__init__.py` (NEW) - -```python -"""codex-lens Language Server Protocol implementation.""" - -from codexlens.lsp.server import CodexLensLanguageServer, main - -__all__ = ["CodexLensLanguageServer", "main"] -``` - -**File**: `src/codexlens/lsp/server.py` (NEW) - -```python -"""Main LSP server implementation using pygls.""" - -import logging -from pathlib import Path -from typing import Optional - -from lsprotocol import types as lsp -from pygls.server import LanguageServer - -from codexlens.search.chain_search import ChainSearchEngine -from codexlens.storage.global_index import GlobalSymbolIndex -from codexlens.storage.registry import RegistryStore -from codexlens.storage.path_mapper import PathMapper -from codexlens.watcher.manager import WatcherManager - -logger = logging.getLogger(__name__) - - -class CodexLensLanguageServer(LanguageServer): - """Language Server powered by codex-lens indexing.""" - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.workspace_path: Optional[Path] = None - self.registry: Optional[RegistryStore] = None - self.search_engine: Optional[ChainSearchEngine] = None - self.global_index: Optional[GlobalSymbolIndex] = None - self.watcher: Optional[WatcherManager] = None - - def initialize_codexlens(self, workspace_path: Path) -> None: - """Initialize codex-lens components for the workspace.""" - self.workspace_path = workspace_path - - # Initialize registry and search engine - self.registry = RegistryStore() - self.registry.initialize() - - mapper = PathMapper() - self.search_engine = ChainSearchEngine(self.registry, mapper) - - # Initialize global symbol index - self.global_index = GlobalSymbolIndex(workspace_path) - - # Start file watcher for incremental updates - self.watcher = WatcherManager( - root_path=workspace_path, - on_indexed=self._on_file_indexed - ) - self.watcher.start() - - logger.info(f"Initialized codex-lens for workspace: {workspace_path}") - - def _on_file_indexed(self, file_path: Path) -> None: - """Callback when a file is indexed.""" - logger.debug(f"File indexed: {file_path}") - - def shutdown_codexlens(self) -> None: - """Cleanup codex-lens components.""" - if self.watcher: - self.watcher.stop() - self.watcher = None - logger.info("codex-lens shutdown complete") - - -# Create server instance -server = CodexLensLanguageServer( - name="codex-lens", - version="0.1.0" -) - - -@server.feature(lsp.INITIALIZE) -def on_initialize(params: lsp.InitializeParams) -> lsp.InitializeResult: - """Handle LSP initialize request.""" - if params.root_uri: - workspace_path = Path(params.root_uri.replace("file://", "")) - server.initialize_codexlens(workspace_path) - - return lsp.InitializeResult( - capabilities=lsp.ServerCapabilities( - text_document_sync=lsp.TextDocumentSyncOptions( - open_close=True, - change=lsp.TextDocumentSyncKind.Incremental, - save=lsp.SaveOptions(include_text=False), - ), - definition_provider=True, - references_provider=True, - completion_provider=lsp.CompletionOptions( - trigger_characters=[".", "_"], - ), - hover_provider=True, - workspace_symbol_provider=True, - ), - server_info=lsp.ServerInfo( - name="codex-lens", - version="0.1.0", - ), - ) - - -@server.feature(lsp.SHUTDOWN) -def on_shutdown(params: None) -> None: - """Handle LSP shutdown request.""" - server.shutdown_codexlens() - - -def main(): - """Entry point for the LSP server.""" - import argparse - - parser = argparse.ArgumentParser(description="codex-lens Language Server") - parser.add_argument("--stdio", action="store_true", help="Use stdio transport") - parser.add_argument("--tcp", action="store_true", help="Use TCP transport") - parser.add_argument("--host", default="127.0.0.1", help="TCP host") - parser.add_argument("--port", type=int, default=2087, help="TCP port") - - args = parser.parse_args() - - if args.tcp: - server.start_tcp(args.host, args.port) - else: - server.start_io() - - -if __name__ == "__main__": - main() -``` - -**Acceptance Criteria**: -- [ ] Server starts without errors -- [ ] Handles initialize/shutdown lifecycle -- [ ] WatcherManager starts on workspace open - ---- - -#### Task 1.3: Definition Handler - -**File**: `src/codexlens/lsp/handlers.py` (NEW) - -```python -"""LSP request handlers.""" - -import logging -from pathlib import Path -from typing import List, Optional, Union - -from lsprotocol import types as lsp - -from codexlens.lsp.server import server -from codexlens.entities import Symbol - -logger = logging.getLogger(__name__) - - -def symbol_to_location(symbol: Symbol) -> lsp.Location: - """Convert codex-lens Symbol to LSP Location.""" - return lsp.Location( - uri=f"file://{symbol.file_path}", - range=lsp.Range( - start=lsp.Position( - line=symbol.range[0] - 1, # LSP is 0-indexed - character=0, - ), - end=lsp.Position( - line=symbol.range[1] - 1, - character=0, - ), - ), - ) - - -@server.feature(lsp.TEXT_DOCUMENT_DEFINITION) -def on_definition( - params: lsp.DefinitionParams, -) -> Optional[Union[lsp.Location, List[lsp.Location]]]: - """Handle textDocument/definition request.""" - if not server.global_index: - return None - - # Get the word at cursor position - document = server.workspace.get_text_document(params.text_document.uri) - word = _get_word_at_position(document, params.position) - - if not word: - return None - - logger.debug(f"Definition lookup for: {word}") - - # Search in global symbol index - symbols = server.global_index.search(word, exact=True, limit=10) - - if not symbols: - return None - - if len(symbols) == 1: - return symbol_to_location(symbols[0]) - - return [symbol_to_location(s) for s in symbols] - - -def _get_word_at_position(document, position: lsp.Position) -> Optional[str]: - """Extract the word at the given position.""" - try: - lines = document.source.split("\n") - if position.line >= len(lines): - return None - - line = lines[position.line] - - # Find word boundaries - start = position.character - end = position.character - - # Expand left - while start > 0 and _is_identifier_char(line[start - 1]): - start -= 1 - - # Expand right - while end < len(line) and _is_identifier_char(line[end]): - end += 1 - - word = line[start:end] - return word if word else None - except Exception as e: - logger.error(f"Error extracting word: {e}") - return None - - -def _is_identifier_char(char: str) -> bool: - """Check if character is valid in an identifier.""" - return char.isalnum() or char == "_" -``` - -**Acceptance Criteria**: -- [ ] Returns Location for known symbols -- [ ] Returns None for unknown symbols -- [ ] Handles multiple definitions (overloads) - ---- - -#### Task 1.4: Completion Handler - -**File**: `src/codexlens/lsp/handlers.py` (APPEND) - -```python -@server.feature(lsp.TEXT_DOCUMENT_COMPLETION) -def on_completion( - params: lsp.CompletionParams, -) -> Optional[lsp.CompletionList]: - """Handle textDocument/completion request.""" - if not server.global_index: - return None - - # Get partial word at cursor - document = server.workspace.get_text_document(params.text_document.uri) - prefix = _get_prefix_at_position(document, params.position) - - if not prefix or len(prefix) < 2: - return None - - logger.debug(f"Completion lookup for prefix: {prefix}") - - # Search with prefix mode - symbols = server.global_index.search(prefix, prefix_mode=True, limit=50) - - if not symbols: - return None - - items = [] - for symbol in symbols: - kind = _symbol_kind_to_completion_kind(symbol.kind) - items.append( - lsp.CompletionItem( - label=symbol.name, - kind=kind, - detail=f"{symbol.kind} in {Path(symbol.file_path).name}", - documentation=lsp.MarkupContent( - kind=lsp.MarkupKind.Markdown, - value=f"Defined at line {symbol.range[0]}", - ), - ) - ) - - return lsp.CompletionList(is_incomplete=len(items) >= 50, items=items) - - -def _get_prefix_at_position(document, position: lsp.Position) -> Optional[str]: - """Extract the incomplete word prefix at position.""" - try: - lines = document.source.split("\n") - if position.line >= len(lines): - return None - - line = lines[position.line] - - # Find prefix start - start = position.character - while start > 0 and _is_identifier_char(line[start - 1]): - start -= 1 - - return line[start:position.character] if start < position.character else None - except Exception: - return None - - -def _symbol_kind_to_completion_kind(kind: str) -> lsp.CompletionItemKind: - """Map symbol kind to LSP completion kind.""" - mapping = { - "function": lsp.CompletionItemKind.Function, - "method": lsp.CompletionItemKind.Method, - "class": lsp.CompletionItemKind.Class, - "variable": lsp.CompletionItemKind.Variable, - "constant": lsp.CompletionItemKind.Constant, - "module": lsp.CompletionItemKind.Module, - "property": lsp.CompletionItemKind.Property, - "interface": lsp.CompletionItemKind.Interface, - "enum": lsp.CompletionItemKind.Enum, - } - return mapping.get(kind.lower(), lsp.CompletionItemKind.Text) -``` - -**Acceptance Criteria**: -- [ ] Returns completion items for valid prefixes -- [ ] Respects minimum prefix length (2 chars) -- [ ] Maps symbol kinds correctly - ---- - -#### Task 1.5: Workspace Symbol Handler - -**File**: `src/codexlens/lsp/handlers.py` (APPEND) - -```python -@server.feature(lsp.WORKSPACE_SYMBOL) -def on_workspace_symbol( - params: lsp.WorkspaceSymbolParams, -) -> Optional[List[lsp.SymbolInformation]]: - """Handle workspace/symbol request.""" - if not server.search_engine or not server.workspace_path: - return None - - query = params.query - if not query or len(query) < 2: - return None - - logger.debug(f"Workspace symbol search: {query}") - - # Use chain search engine's symbol search - result = server.search_engine.search_symbols( - query=query, - source_path=server.workspace_path, - limit=100, - ) - - if not result: - return None - - items = [] - for symbol in result: - kind = _symbol_kind_to_symbol_kind(symbol.kind) - items.append( - lsp.SymbolInformation( - name=symbol.name, - kind=kind, - location=symbol_to_location(symbol), - container_name=Path(symbol.file_path).parent.name, - ) - ) - - return items - - -def _symbol_kind_to_symbol_kind(kind: str) -> lsp.SymbolKind: - """Map symbol kind string to LSP SymbolKind.""" - mapping = { - "function": lsp.SymbolKind.Function, - "method": lsp.SymbolKind.Method, - "class": lsp.SymbolKind.Class, - "variable": lsp.SymbolKind.Variable, - "constant": lsp.SymbolKind.Constant, - "module": lsp.SymbolKind.Module, - "property": lsp.SymbolKind.Property, - "interface": lsp.SymbolKind.Interface, - "enum": lsp.SymbolKind.Enum, - "struct": lsp.SymbolKind.Struct, - "namespace": lsp.SymbolKind.Namespace, - } - return mapping.get(kind.lower(), lsp.SymbolKind.Variable) -``` - -**Acceptance Criteria**: -- [ ] Returns symbols matching query -- [ ] Respects result limit -- [ ] Includes container information - ---- - -#### Task 1.6: File Watcher Integration - -**File**: `src/codexlens/lsp/handlers.py` (APPEND) - -```python -@server.feature(lsp.TEXT_DOCUMENT_DID_SAVE) -def on_did_save(params: lsp.DidSaveTextDocumentParams) -> None: - """Handle textDocument/didSave notification.""" - if not server.watcher: - return - - file_path = Path(params.text_document.uri.replace("file://", "")) - logger.debug(f"File saved: {file_path}") - - # Trigger incremental indexing - server.watcher.trigger_index(file_path) - - -@server.feature(lsp.TEXT_DOCUMENT_DID_OPEN) -def on_did_open(params: lsp.DidOpenTextDocumentParams) -> None: - """Handle textDocument/didOpen notification.""" - logger.debug(f"File opened: {params.text_document.uri}") - - -@server.feature(lsp.TEXT_DOCUMENT_DID_CLOSE) -def on_did_close(params: lsp.DidCloseTextDocumentParams) -> None: - """Handle textDocument/didClose notification.""" - logger.debug(f"File closed: {params.text_document.uri}") -``` - -**Acceptance Criteria**: -- [ ] didSave triggers incremental index -- [ ] No blocking on save -- [ ] Proper logging - ---- - -### 4.3 Phase 1 Test Plan - -**File**: `tests/lsp/test_server.py` (NEW) - -```python -"""Tests for LSP server.""" - -import pytest -from pathlib import Path -from unittest.mock import Mock, patch - -from lsprotocol import types as lsp - -from codexlens.lsp.server import CodexLensLanguageServer, on_initialize - - -class TestServerInitialization: - """Test server lifecycle.""" - - def test_initialize_creates_components(self, tmp_path): - """Server creates all components on initialize.""" - server = CodexLensLanguageServer("test", "0.1.0") - - params = lsp.InitializeParams( - root_uri=f"file://{tmp_path}", - capabilities=lsp.ClientCapabilities(), - ) - - result = on_initialize(params) - - assert result.capabilities.definition_provider - assert result.capabilities.completion_provider - assert result.capabilities.workspace_symbol_provider - - -class TestDefinitionHandler: - """Test textDocument/definition handler.""" - - def test_definition_returns_location(self): - """Definition returns valid Location.""" - # Setup mock global index - mock_symbol = Mock() - mock_symbol.file_path = "/test/file.py" - mock_symbol.range = (10, 15) - - with patch.object(server, 'global_index') as mock_index: - mock_index.search.return_value = [mock_symbol] - - # Call handler - result = on_definition(Mock( - text_document=Mock(uri="file:///test/file.py"), - position=lsp.Position(line=5, character=10), - )) - - assert isinstance(result, lsp.Location) - assert result.uri == "file:///test/file.py" - - -class TestCompletionHandler: - """Test textDocument/completion handler.""" - - def test_completion_returns_items(self): - """Completion returns CompletionList.""" - # Test implementation - pass -``` - -**Acceptance Criteria**: -- [ ] All unit tests pass -- [ ] Coverage > 80% for LSP module -- [ ] Integration test with real workspace - ---- - -## 5. Phase 2: Find References - -### 5.1 Overview - -| Attribute | Value | -|-----------|-------| -| Priority | MEDIUM | -| Complexity | High | -| Dependencies | Phase 1 complete | -| Deliverables | `search_references()` method + LSP handler | - -### 5.2 Task Breakdown - -#### Task 2.1: Add `search_references` to ChainSearchEngine - -**File**: `src/codexlens/search/chain_search.py` (MODIFY) - -```python -# Add to ChainSearchEngine class - -from dataclasses import dataclass -from typing import List -from concurrent.futures import ThreadPoolExecutor, as_completed - - -@dataclass -class ReferenceResult: - """Result from reference search.""" - file_path: str - line: int - column: int - context: str # Surrounding code snippet - relationship_type: str # "call", "import", "inheritance", etc. - - -def search_references( - self, - symbol_name: str, - source_path: Optional[Path] = None, - depth: int = -1, - limit: int = 100, -) -> List[ReferenceResult]: - """Find all references to a symbol across the project. - - Args: - symbol_name: Fully qualified or simple name of the symbol - source_path: Starting path for search (default: workspace root) - depth: Search depth (-1 = unlimited) - limit: Maximum results to return - - Returns: - List of ReferenceResult objects sorted by file path and line - """ - source = source_path or self._workspace_path - - # Collect all index paths - index_paths = self._collect_index_paths(source, depth) - - if not index_paths: - logger.warning(f"No indexes found for reference search: {source}") - return [] - - # Parallel query across all indexes - all_results: List[ReferenceResult] = [] - - with ThreadPoolExecutor(max_workers=self._options.max_workers) as executor: - futures = { - executor.submit( - self._search_references_single, - idx_path, - symbol_name, - ): idx_path - for idx_path in index_paths - } - - for future in as_completed(futures): - try: - results = future.result(timeout=10) - all_results.extend(results) - except Exception as e: - logger.error(f"Reference search failed: {e}") - - # Sort and limit - all_results.sort(key=lambda r: (r.file_path, r.line)) - return all_results[:limit] - - -def _search_references_single( - self, - index_path: Path, - symbol_name: str, -) -> List[ReferenceResult]: - """Search for references in a single index.""" - results = [] - - try: - store = DirIndexStore(index_path.parent) - - # Query code_relationships table - query = """ - SELECT - cr.source_file, - cr.source_line, - cr.source_column, - cr.relationship_type, - f.content - FROM code_relationships cr - JOIN files f ON f.full_path = cr.source_file - WHERE cr.target_qualified_name LIKE ? - OR cr.target_name = ? - ORDER BY cr.source_file, cr.source_line - """ - - rows = store.execute_query( - query, - (f"%{symbol_name}", symbol_name), - ) - - for row in rows: - # Extract context (3 lines around reference) - content_lines = row["content"].split("\n") - line_idx = row["source_line"] - 1 - start = max(0, line_idx - 1) - end = min(len(content_lines), line_idx + 2) - context = "\n".join(content_lines[start:end]) - - results.append(ReferenceResult( - file_path=row["source_file"], - line=row["source_line"], - column=row["source_column"] or 0, - context=context, - relationship_type=row["relationship_type"], - )) - except Exception as e: - logger.error(f"Failed to search references in {index_path}: {e}") - - return results -``` - -**Acceptance Criteria**: -- [ ] Searches all index files in parallel -- [ ] Returns properly formatted ReferenceResult -- [ ] Handles missing indexes gracefully - ---- - -#### Task 2.2: LSP References Handler - -**File**: `src/codexlens/lsp/handlers.py` (APPEND) - -```python -@server.feature(lsp.TEXT_DOCUMENT_REFERENCES) -def on_references( - params: lsp.ReferenceParams, -) -> Optional[List[lsp.Location]]: - """Handle textDocument/references request.""" - if not server.search_engine or not server.workspace_path: - return None - - # Get the word at cursor - document = server.workspace.get_text_document(params.text_document.uri) - word = _get_word_at_position(document, params.position) - - if not word: - return None - - logger.debug(f"References lookup for: {word}") - - # Search for references - references = server.search_engine.search_references( - symbol_name=word, - source_path=server.workspace_path, - limit=200, - ) - - if not references: - return None - - # Convert to LSP Locations - locations = [] - for ref in references: - locations.append( - lsp.Location( - uri=f"file://{ref.file_path}", - range=lsp.Range( - start=lsp.Position(line=ref.line - 1, character=ref.column), - end=lsp.Position(line=ref.line - 1, character=ref.column + len(word)), - ), - ) - ) - - return locations -``` - -**Acceptance Criteria**: -- [ ] Returns all references across project -- [ ] Includes definition if `params.context.include_declaration` -- [ ] Performance < 200ms for typical project - ---- - -### 5.3 Phase 2 Test Plan - -```python -class TestReferencesSearch: - """Test reference search functionality.""" - - def test_finds_function_calls(self, indexed_project): - """Finds all calls to a function.""" - results = search_engine.search_references("my_function") - assert len(results) > 0 - assert all(r.relationship_type == "call" for r in results) - - def test_finds_imports(self, indexed_project): - """Finds all imports of a module.""" - results = search_engine.search_references("my_module") - assert any(r.relationship_type == "import" for r in results) - - def test_parallel_search_performance(self, large_project): - """Parallel search completes within time limit.""" - import time - start = time.time() - results = search_engine.search_references("common_symbol") - elapsed = time.time() - start - assert elapsed < 0.2 # 200ms -``` - ---- - -## 6. Phase 3: Hover Information - -### 6.1 Overview - -| Attribute | Value | -|-----------|-------| -| Priority | MEDIUM | -| Complexity | Low | -| Dependencies | Phase 1 complete | -| Deliverables | Hover provider + LSP handler | - -### 6.2 Task Breakdown - -#### Task 3.1: Hover Provider - -**File**: `src/codexlens/lsp/providers.py` (NEW) - -```python -"""LSP feature providers.""" - -import logging -from dataclasses import dataclass -from pathlib import Path -from typing import Optional - -from codexlens.entities import Symbol -from codexlens.storage.sqlite_store import SQLiteStore - -logger = logging.getLogger(__name__) - - -@dataclass -class HoverInfo: - """Hover information for a symbol.""" - name: str - kind: str - signature: str - documentation: Optional[str] - file_path: str - line_range: tuple - - -class HoverProvider: - """Provides hover information for symbols.""" - - def __init__(self, global_index, registry): - self.global_index = global_index - self.registry = registry - - def get_hover_info(self, symbol_name: str) -> Optional[HoverInfo]: - """Get hover information for a symbol. - - Args: - symbol_name: Name of the symbol to look up - - Returns: - HoverInfo or None if symbol not found - """ - # Look up symbol in global index - symbols = self.global_index.search(symbol_name, exact=True, limit=1) - - if not symbols: - return None - - symbol = symbols[0] - - # Extract signature from source - signature = self._extract_signature(symbol) - - return HoverInfo( - name=symbol.name, - kind=symbol.kind, - signature=signature, - documentation=symbol.docstring, - file_path=symbol.file_path, - line_range=symbol.range, - ) - - def _extract_signature(self, symbol: Symbol) -> str: - """Extract function/class signature from source.""" - try: - # Find the index for this file - index_path = self.registry.find_index_path( - Path(symbol.file_path).parent - ) - - if not index_path: - return f"{symbol.kind} {symbol.name}" - - store = SQLiteStore(index_path.parent) - - # Get file content - rows = store.execute_query( - "SELECT content FROM files WHERE full_path = ?", - (symbol.file_path,), - ) - - if not rows: - return f"{symbol.kind} {symbol.name}" - - content = rows[0]["content"] - lines = content.split("\n") - - # Extract signature lines - start_line = symbol.range[0] - 1 - signature_lines = [] - - # Get first line (def/class declaration) - if start_line < len(lines): - first_line = lines[start_line] - signature_lines.append(first_line) - - # Continue if line ends with backslash or doesn't have closing paren - i = start_line + 1 - while i < len(lines) and i < start_line + 5: - if "):" in signature_lines[-1] or ":" in signature_lines[-1]: - break - signature_lines.append(lines[i]) - i += 1 - - return "\n".join(signature_lines) - except Exception as e: - logger.error(f"Failed to extract signature: {e}") - return f"{symbol.kind} {symbol.name}" - - def format_hover_markdown(self, info: HoverInfo) -> str: - """Format hover info as Markdown.""" - parts = [] - - # Code block with signature - parts.append(f"```python\n{info.signature}\n```") - - # Documentation if available - if info.documentation: - parts.append(f"\n---\n\n{info.documentation}") - - # Location info - parts.append( - f"\n---\n\n*{info.kind}* defined in " - f"`{Path(info.file_path).name}` " - f"(line {info.line_range[0]})" - ) - - return "\n".join(parts) -``` - ---- - -#### Task 3.2: LSP Hover Handler - -**File**: `src/codexlens/lsp/handlers.py` (APPEND) - -```python -from codexlens.lsp.providers import HoverProvider - - -@server.feature(lsp.TEXT_DOCUMENT_HOVER) -def on_hover(params: lsp.HoverParams) -> Optional[lsp.Hover]: - """Handle textDocument/hover request.""" - if not server.global_index or not server.registry: - return None - - # Get word at cursor - document = server.workspace.get_text_document(params.text_document.uri) - word = _get_word_at_position(document, params.position) - - if not word: - return None - - logger.debug(f"Hover lookup for: {word}") - - # Get hover info - provider = HoverProvider(server.global_index, server.registry) - info = provider.get_hover_info(word) - - if not info: - return None - - # Format as markdown - content = provider.format_hover_markdown(info) - - return lsp.Hover( - contents=lsp.MarkupContent( - kind=lsp.MarkupKind.Markdown, - value=content, - ), - ) -``` - -**Acceptance Criteria**: -- [ ] Shows function signature -- [ ] Shows documentation if available -- [ ] Shows file location - ---- - -## 7. Phase 4: MCP Bridge - -### 7.1 Overview - -| Attribute | Value | -|-----------|-------| -| Priority | HIGH VALUE | -| Complexity | Medium | -| Dependencies | Phase 1-2 complete | -| Deliverables | MCP schema + provider + hook interfaces | - -### 7.2 Task Breakdown - -#### Task 4.1: MCP Schema Definition - -**File**: `src/codexlens/mcp/__init__.py` (NEW) - -```python -"""Model Context Protocol implementation for Claude Code integration.""" - -from codexlens.mcp.schema import ( - MCPContext, - SymbolInfo, - ReferenceInfo, - RelatedSymbol, -) -from codexlens.mcp.provider import MCPProvider - -__all__ = [ - "MCPContext", - "SymbolInfo", - "ReferenceInfo", - "RelatedSymbol", - "MCPProvider", -] -``` - -**File**: `src/codexlens/mcp/schema.py` (NEW) - -```python -"""MCP data models.""" - -from dataclasses import dataclass, field, asdict -from typing import List, Optional -import json - - -@dataclass -class SymbolInfo: - """Information about a code symbol.""" - name: str - kind: str - file_path: str - line_start: int - line_end: int - signature: Optional[str] = None - documentation: Optional[str] = None - - def to_dict(self) -> dict: - return asdict(self) - - -@dataclass -class ReferenceInfo: - """Information about a symbol reference.""" - file_path: str - line: int - column: int - context: str - relationship_type: str - - def to_dict(self) -> dict: - return asdict(self) - - -@dataclass -class RelatedSymbol: - """Related symbol (import, call target, etc.).""" - name: str - kind: str - relationship: str # "imports", "calls", "inherits", "uses" - file_path: Optional[str] = None - - def to_dict(self) -> dict: - return asdict(self) - - -@dataclass -class MCPContext: - """Model Context Protocol context object. - - This is the structured context that gets injected into - LLM prompts to provide code understanding. - """ - version: str = "1.0" - context_type: str = "code_context" - symbol: Optional[SymbolInfo] = None - definition: Optional[str] = None - references: List[ReferenceInfo] = field(default_factory=list) - related_symbols: List[RelatedSymbol] = field(default_factory=list) - metadata: dict = field(default_factory=dict) - - def to_dict(self) -> dict: - """Convert to dictionary for JSON serialization.""" - result = { - "version": self.version, - "context_type": self.context_type, - "metadata": self.metadata, - } - - if self.symbol: - result["symbol"] = self.symbol.to_dict() - - if self.definition: - result["definition"] = self.definition - - if self.references: - result["references"] = [r.to_dict() for r in self.references] - - if self.related_symbols: - result["related_symbols"] = [s.to_dict() for s in self.related_symbols] - - return result - - def to_json(self, indent: int = 2) -> str: - """Serialize to JSON string.""" - return json.dumps(self.to_dict(), indent=indent) - - def to_prompt_injection(self) -> str: - """Format for injection into LLM prompt.""" - parts = [""] - - if self.symbol: - parts.append(f"## Symbol: {self.symbol.name}") - parts.append(f"Type: {self.symbol.kind}") - parts.append(f"Location: {self.symbol.file_path}:{self.symbol.line_start}") - - if self.definition: - parts.append("\n## Definition") - parts.append(f"```\n{self.definition}\n```") - - if self.references: - parts.append(f"\n## References ({len(self.references)} found)") - for i, ref in enumerate(self.references[:5]): # Limit to 5 - parts.append(f"- {ref.file_path}:{ref.line} ({ref.relationship_type})") - parts.append(f" ```\n {ref.context}\n ```") - - if self.related_symbols: - parts.append("\n## Related Symbols") - for sym in self.related_symbols[:10]: # Limit to 10 - parts.append(f"- {sym.name} ({sym.relationship})") - - parts.append("") - - return "\n".join(parts) -``` - ---- - -#### Task 4.2: MCP Provider - -**File**: `src/codexlens/mcp/provider.py` (NEW) - -```python -"""MCP context provider.""" - -import logging -from pathlib import Path -from typing import Optional, List - -from codexlens.mcp.schema import ( - MCPContext, - SymbolInfo, - ReferenceInfo, - RelatedSymbol, -) -from codexlens.search.chain_search import ChainSearchEngine -from codexlens.storage.global_index import GlobalSymbolIndex -from codexlens.storage.sqlite_store import SQLiteStore -from codexlens.storage.registry import RegistryStore - -logger = logging.getLogger(__name__) - - -class MCPProvider: - """Builds MCP context objects from codex-lens data.""" - - def __init__( - self, - global_index: GlobalSymbolIndex, - search_engine: ChainSearchEngine, - registry: RegistryStore, - ): - self.global_index = global_index - self.search_engine = search_engine - self.registry = registry - - def build_context( - self, - symbol_name: str, - context_type: str = "symbol_explanation", - include_references: bool = True, - include_related: bool = True, - max_references: int = 10, - ) -> Optional[MCPContext]: - """Build comprehensive context for a symbol. - - Args: - symbol_name: Name of the symbol to contextualize - context_type: Type of context being requested - include_references: Whether to include reference locations - include_related: Whether to include related symbols - max_references: Maximum number of references to include - - Returns: - MCPContext object or None if symbol not found - """ - # Look up symbol - symbols = self.global_index.search(symbol_name, exact=True, limit=1) - - if not symbols: - logger.warning(f"Symbol not found for MCP context: {symbol_name}") - return None - - symbol = symbols[0] - - # Build SymbolInfo - symbol_info = SymbolInfo( - name=symbol.name, - kind=symbol.kind, - file_path=symbol.file_path, - line_start=symbol.range[0], - line_end=symbol.range[1], - signature=getattr(symbol, 'signature', None), - documentation=getattr(symbol, 'docstring', None), - ) - - # Extract definition source code - definition = self._extract_definition(symbol) - - # Get references - references = [] - if include_references: - refs = self.search_engine.search_references( - symbol_name, - limit=max_references, - ) - references = [ - ReferenceInfo( - file_path=r.file_path, - line=r.line, - column=r.column, - context=r.context, - relationship_type=r.relationship_type, - ) - for r in refs - ] - - # Get related symbols - related_symbols = [] - if include_related: - related_symbols = self._get_related_symbols(symbol) - - return MCPContext( - context_type=context_type, - symbol=symbol_info, - definition=definition, - references=references, - related_symbols=related_symbols, - metadata={ - "source": "codex-lens", - "indexed_at": symbol.indexed_at if hasattr(symbol, 'indexed_at') else None, - }, - ) - - def _extract_definition(self, symbol) -> Optional[str]: - """Extract source code for symbol definition.""" - try: - index_path = self.registry.find_index_path( - Path(symbol.file_path).parent - ) - - if not index_path: - return None - - store = SQLiteStore(index_path.parent) - rows = store.execute_query( - "SELECT content FROM files WHERE full_path = ?", - (symbol.file_path,), - ) - - if not rows: - return None - - content = rows[0]["content"] - lines = content.split("\n") - - # Extract symbol lines - start = symbol.range[0] - 1 - end = symbol.range[1] - - return "\n".join(lines[start:end]) - except Exception as e: - logger.error(f"Failed to extract definition: {e}") - return None - - def _get_related_symbols(self, symbol) -> List[RelatedSymbol]: - """Get symbols related to the given symbol.""" - related = [] - - try: - index_path = self.registry.find_index_path( - Path(symbol.file_path).parent - ) - - if not index_path: - return related - - store = SQLiteStore(index_path.parent) - - # Query relationships where this symbol is the source - rows = store.execute_query( - """ - SELECT target_name, target_qualified_name, relationship_type - FROM code_relationships - WHERE source_qualified_name LIKE ? - LIMIT 20 - """, - (f"%{symbol.name}%",), - ) - - for row in rows: - related.append(RelatedSymbol( - name=row["target_name"], - kind="unknown", # Would need another lookup - relationship=row["relationship_type"], - )) - except Exception as e: - logger.error(f"Failed to get related symbols: {e}") - - return related - - def build_context_for_file( - self, - file_path: Path, - context_type: str = "file_overview", - ) -> MCPContext: - """Build context for an entire file.""" - # Get all symbols in file - symbols = self.global_index.search_by_file(str(file_path)) - - related = [ - RelatedSymbol( - name=s.name, - kind=s.kind, - relationship="defines", - ) - for s in symbols - ] - - return MCPContext( - context_type=context_type, - related_symbols=related, - metadata={ - "file_path": str(file_path), - "symbol_count": len(symbols), - }, - ) -``` - ---- - -#### Task 4.3: Hook Interfaces - -**File**: `src/codexlens/mcp/hooks.py` (NEW) - -```python -"""Hook interfaces for Claude Code integration.""" - -import logging -from pathlib import Path -from typing import Any, Dict, Optional, Callable - -from codexlens.mcp.provider import MCPProvider -from codexlens.mcp.schema import MCPContext - -logger = logging.getLogger(__name__) - - -class HookManager: - """Manages hook registration and execution.""" - - def __init__(self, mcp_provider: MCPProvider): - self.mcp_provider = mcp_provider - self._pre_hooks: Dict[str, Callable] = {} - self._post_hooks: Dict[str, Callable] = {} - - # Register default hooks - self._register_default_hooks() - - def _register_default_hooks(self): - """Register built-in hooks.""" - self._pre_hooks["explain"] = self._pre_explain_hook - self._pre_hooks["refactor"] = self._pre_refactor_hook - self._pre_hooks["document"] = self._pre_document_hook - - def execute_pre_hook( - self, - action: str, - params: Dict[str, Any], - ) -> Optional[MCPContext]: - """Execute pre-tool hook to gather context. - - Args: - action: The action being performed (e.g., "explain", "refactor") - params: Parameters for the action - - Returns: - MCPContext to inject into prompt, or None - """ - hook = self._pre_hooks.get(action) - - if not hook: - logger.debug(f"No pre-hook for action: {action}") - return None - - try: - return hook(params) - except Exception as e: - logger.error(f"Pre-hook failed for {action}: {e}") - return None - - def execute_post_hook( - self, - action: str, - result: Any, - ) -> None: - """Execute post-tool hook for proactive caching. - - Args: - action: The action that was performed - result: Result of the action - """ - hook = self._post_hooks.get(action) - - if not hook: - return - - try: - hook(result) - except Exception as e: - logger.error(f"Post-hook failed for {action}: {e}") - - def _pre_explain_hook(self, params: Dict[str, Any]) -> Optional[MCPContext]: - """Pre-hook for 'explain' action.""" - symbol_name = params.get("symbol") - - if not symbol_name: - return None - - return self.mcp_provider.build_context( - symbol_name=symbol_name, - context_type="symbol_explanation", - include_references=True, - include_related=True, - ) - - def _pre_refactor_hook(self, params: Dict[str, Any]) -> Optional[MCPContext]: - """Pre-hook for 'refactor' action.""" - symbol_name = params.get("symbol") - - if not symbol_name: - return None - - return self.mcp_provider.build_context( - symbol_name=symbol_name, - context_type="refactor_context", - include_references=True, # Important for refactoring - include_related=True, - max_references=20, # More references for refactoring - ) - - def _pre_document_hook(self, params: Dict[str, Any]) -> Optional[MCPContext]: - """Pre-hook for 'document' action.""" - symbol_name = params.get("symbol") - file_path = params.get("file_path") - - if symbol_name: - return self.mcp_provider.build_context( - symbol_name=symbol_name, - context_type="documentation_context", - include_references=False, - include_related=True, - ) - elif file_path: - return self.mcp_provider.build_context_for_file( - Path(file_path), - context_type="file_documentation", - ) - - return None - - def register_pre_hook( - self, - action: str, - hook: Callable[[Dict[str, Any]], Optional[MCPContext]], - ) -> None: - """Register a custom pre-tool hook.""" - self._pre_hooks[action] = hook - - def register_post_hook( - self, - action: str, - hook: Callable[[Any], None], - ) -> None: - """Register a custom post-tool hook.""" - self._post_hooks[action] = hook - - -# Convenience function for Claude Code integration -def create_context_for_prompt( - mcp_provider: MCPProvider, - action: str, - params: Dict[str, Any], -) -> str: - """Create context string for prompt injection. - - This is the main entry point for Claude Code hook integration. - - Args: - mcp_provider: The MCP provider instance - action: Action being performed - params: Action parameters - - Returns: - Formatted context string for prompt injection - """ - manager = HookManager(mcp_provider) - context = manager.execute_pre_hook(action, params) - - if context: - return context.to_prompt_injection() - - return "" -``` - ---- - -## 8. Phase 5: Advanced Features - -### 8.1 Custom LSP Commands - -**File**: `src/codexlens/lsp/handlers.py` (APPEND) - -```python -# Custom commands for advanced features - -@server.command("codexlens.hybridSearch") -def cmd_hybrid_search(params: List[Any]) -> dict: - """Execute hybrid search combining FTS and semantic.""" - if len(params) < 1: - return {"error": "Query required"} - - query = params[0] - limit = params[1] if len(params) > 1 else 20 - - from codexlens.search.hybrid_search import HybridSearchEngine - - engine = HybridSearchEngine(server.search_engine.store) - results = engine.search(query, limit=limit) - - return { - "results": [ - { - "path": r.path, - "score": r.score, - "excerpt": r.excerpt, - } - for r in results - ] - } - - -@server.command("codexlens.getMCPContext") -def cmd_get_mcp_context(params: List[Any]) -> dict: - """Get MCP context for a symbol.""" - if len(params) < 1: - return {"error": "Symbol name required"} - - symbol_name = params[0] - context_type = params[1] if len(params) > 1 else "symbol_explanation" - - from codexlens.mcp.provider import MCPProvider - - provider = MCPProvider( - server.global_index, - server.search_engine, - server.registry, - ) - - context = provider.build_context(symbol_name, context_type) - - if context: - return context.to_dict() - - return {"error": "Symbol not found"} -``` - -### 8.2 Performance Optimizations - -**File**: `src/codexlens/lsp/cache.py` (NEW) - -```python -"""Caching layer for LSP performance.""" - -import time -from functools import lru_cache -from typing import Any, Dict, Optional -from threading import Lock - - -class LRUCacheWithTTL: - """LRU cache with time-to-live expiration.""" - - def __init__(self, maxsize: int = 1000, ttl_seconds: int = 300): - self.maxsize = maxsize - self.ttl = ttl_seconds - self._cache: Dict[str, tuple] = {} # key -> (value, timestamp) - self._lock = Lock() - - def get(self, key: str) -> Optional[Any]: - """Get value from cache if not expired.""" - with self._lock: - if key not in self._cache: - return None - - value, timestamp = self._cache[key] - - if time.time() - timestamp > self.ttl: - del self._cache[key] - return None - - return value - - def set(self, key: str, value: Any) -> None: - """Set value in cache.""" - with self._lock: - # Evict oldest if at capacity - if len(self._cache) >= self.maxsize: - oldest_key = min( - self._cache.keys(), - key=lambda k: self._cache[k][1], - ) - del self._cache[oldest_key] - - self._cache[key] = (value, time.time()) - - def invalidate(self, key: str) -> None: - """Remove key from cache.""" - with self._lock: - self._cache.pop(key, None) - - def invalidate_prefix(self, prefix: str) -> None: - """Remove all keys with given prefix.""" - with self._lock: - keys_to_remove = [ - k for k in self._cache.keys() - if k.startswith(prefix) - ] - for key in keys_to_remove: - del self._cache[key] - - def clear(self) -> None: - """Clear all cache entries.""" - with self._lock: - self._cache.clear() - - -# Global cache instances -definition_cache = LRUCacheWithTTL(maxsize=500, ttl_seconds=300) -references_cache = LRUCacheWithTTL(maxsize=200, ttl_seconds=60) -completion_cache = LRUCacheWithTTL(maxsize=100, ttl_seconds=30) -``` - ---- - -## 9. Testing Strategy - -### 9.1 Test Structure - -``` -tests/ -├── lsp/ -│ ├── __init__.py -│ ├── conftest.py # Fixtures -│ ├── test_server.py # Server lifecycle -│ ├── test_definition.py # Definition handler -│ ├── test_references.py # References handler -│ ├── test_completion.py # Completion handler -│ ├── test_hover.py # Hover handler -│ └── test_workspace_symbol.py # Workspace symbol -│ -├── mcp/ -│ ├── __init__.py -│ ├── test_schema.py # MCP schema validation -│ ├── test_provider.py # Context building -│ └── test_hooks.py # Hook execution -│ -└── integration/ - ├── __init__.py - ├── test_lsp_client.py # Full LSP handshake - └── test_mcp_flow.py # End-to-end MCP flow -``` - -### 9.2 Fixtures - -**File**: `tests/lsp/conftest.py` - -```python -"""Test fixtures for LSP tests.""" - -import pytest -from pathlib import Path -import tempfile -import shutil - -from codexlens.lsp.server import CodexLensLanguageServer - - -@pytest.fixture -def temp_workspace(): - """Create temporary workspace with sample files.""" - tmpdir = Path(tempfile.mkdtemp()) - - # Create sample Python files - (tmpdir / "main.py").write_text(""" -def main(): - result = helper_function(42) - print(result) - -def helper_function(x): - return x * 2 -""") - - (tmpdir / "utils.py").write_text(""" -from main import helper_function - -class Calculator: - def add(self, a, b): - return a + b - - def multiply(self, a, b): - return helper_function(a) * b -""") - - yield tmpdir - - shutil.rmtree(tmpdir) - - -@pytest.fixture -def indexed_workspace(temp_workspace): - """Workspace with built indexes.""" - from codexlens.cli.commands import index_directory - - index_directory(temp_workspace) - - return temp_workspace - - -@pytest.fixture -def lsp_server(indexed_workspace): - """Initialized LSP server.""" - server = CodexLensLanguageServer("test", "0.1.0") - server.initialize_codexlens(indexed_workspace) - - yield server - - server.shutdown_codexlens() -``` - -### 9.3 Performance Benchmarks - -**File**: `tests/benchmarks/test_performance.py` - -```python -"""Performance benchmarks for LSP operations.""" - -import pytest -import time - - -class TestPerformance: - """Performance benchmark tests.""" - - @pytest.mark.benchmark - def test_definition_latency(self, lsp_server, benchmark): - """Definition lookup should be < 50ms.""" - def lookup(): - return lsp_server.global_index.search("helper_function", exact=True) - - result = benchmark(lookup) - assert benchmark.stats.stats.mean < 0.05 # 50ms - - @pytest.mark.benchmark - def test_completion_latency(self, lsp_server, benchmark): - """Completion should be < 100ms.""" - def complete(): - return lsp_server.global_index.search("help", prefix_mode=True, limit=50) - - result = benchmark(complete) - assert benchmark.stats.stats.mean < 0.1 # 100ms - - @pytest.mark.benchmark - def test_references_latency(self, lsp_server, benchmark): - """References should be < 200ms.""" - def find_refs(): - return lsp_server.search_engine.search_references("helper_function") - - result = benchmark(find_refs) - assert benchmark.stats.stats.mean < 0.2 # 200ms -``` - ---- - -## 10. Deployment Guide - -### 10.1 Installation - -```bash -# Install with LSP support -pip install codex-lens[lsp] - -# Or from source -git clone https://github.com/your-org/codex-lens.git -cd codex-lens -pip install -e ".[lsp]" -``` - -### 10.2 VS Code Configuration - -**File**: `.vscode/settings.json` - -```json -{ - "codexlens.enable": true, - "codexlens.serverPath": "codexlens-lsp", - "codexlens.serverArgs": ["--stdio"], - "codexlens.trace.server": "verbose" -} -``` - -### 10.3 Neovim Configuration - -**File**: `~/.config/nvim/lua/lsp/codexlens.lua` - -```lua -local lspconfig = require('lspconfig') -local configs = require('lspconfig.configs') - -configs.codexlens = { - default_config = { - cmd = { 'codexlens-lsp', '--stdio' }, - filetypes = { 'python', 'javascript', 'typescript' }, - root_dir = lspconfig.util.root_pattern('.git', 'pyproject.toml'), - settings = {}, - }, -} - -lspconfig.codexlens.setup{} -``` - -### 10.4 Claude Code Integration - -**File**: `~/.claude/hooks/pre-tool.sh` - -```bash -#!/bin/bash -# Pre-tool hook for Claude Code - -ACTION="$1" -PARAMS="$2" - -# Call codex-lens MCP provider -python -c " -from codexlens.mcp.hooks import create_context_for_prompt -from codexlens.mcp.provider import MCPProvider -from codexlens.storage.global_index import GlobalSymbolIndex -from codexlens.search.chain_search import ChainSearchEngine -from codexlens.storage.registry import RegistryStore -from codexlens.storage.path_mapper import PathMapper -import json - -# Initialize components -registry = RegistryStore() -registry.initialize() -mapper = PathMapper() -search = ChainSearchEngine(registry, mapper) -global_idx = GlobalSymbolIndex(Path.cwd()) - -provider = MCPProvider(global_idx, search, registry) - -params = json.loads('$PARAMS') -context = create_context_for_prompt(provider, '$ACTION', params) -print(context) -" -``` - ---- - -## 11. Risk Mitigation - -### 11.1 Risk Matrix - -| Risk | Probability | Impact | Mitigation | -|------|-------------|--------|------------| -| pygls compatibility issues | Low | High | Pin version, test on multiple platforms | -| Performance degradation | Medium | Medium | Implement caching, benchmark tests | -| Index corruption | Low | High | Use WAL mode, implement recovery | -| Memory leaks in long sessions | Medium | Medium | Implement connection pooling, periodic cleanup | -| Hook execution timeout | Medium | Low | Implement timeout limits, async execution | - -### 11.2 Fallback Strategies - -1. **Index not available**: Return empty results, don't block LSP -2. **Search timeout**: Return partial results with warning -3. **WatcherManager crash**: Auto-restart with exponential backoff -4. **MCP generation failure**: Return minimal context, log error - -### 11.3 Monitoring - -```python -# Add to server.py - -import prometheus_client - -# Metrics -DEFINITION_LATENCY = prometheus_client.Histogram( - 'codexlens_definition_latency_seconds', - 'Time to process definition request', -) -REFERENCES_LATENCY = prometheus_client.Histogram( - 'codexlens_references_latency_seconds', - 'Time to process references request', -) -INDEX_SIZE = prometheus_client.Gauge( - 'codexlens_index_symbols_total', - 'Total symbols in index', -) -``` - ---- - -## Appendix: Quick Reference - -### File Creation Summary - -| Phase | File | Type | -|-------|------|------| -| 1 | `src/codexlens/lsp/__init__.py` | NEW | -| 1 | `src/codexlens/lsp/server.py` | NEW | -| 1 | `src/codexlens/lsp/handlers.py` | NEW | -| 2 | `src/codexlens/search/chain_search.py` | MODIFY | -| 3 | `src/codexlens/lsp/providers.py` | NEW | -| 4 | `src/codexlens/mcp/__init__.py` | NEW | -| 4 | `src/codexlens/mcp/schema.py` | NEW | -| 4 | `src/codexlens/mcp/provider.py` | NEW | -| 4 | `src/codexlens/mcp/hooks.py` | NEW | -| 5 | `src/codexlens/lsp/cache.py` | NEW | - -### Command Reference - -```bash -# Start LSP server -codexlens-lsp --stdio - -# Start with TCP (for debugging) -codexlens-lsp --tcp --port 2087 - -# Run tests -pytest tests/lsp/ -v - -# Run benchmarks -pytest tests/benchmarks/ --benchmark-only - -# Check coverage -pytest tests/lsp/ --cov=codexlens.lsp --cov-report=html -``` - ---- - -**Document End** diff --git a/codex-lens/docs/MCP_ENDPOINT_DESIGN.md b/codex-lens/docs/MCP_ENDPOINT_DESIGN.md deleted file mode 100644 index 887bbc26..00000000 --- a/codex-lens/docs/MCP_ENDPOINT_DESIGN.md +++ /dev/null @@ -1,284 +0,0 @@ -# CodexLens MCP Endpoint Design - -> Generated by Gemini Analysis | 2026-01-19 -> Document Version: 1.0 - -## Overview - -This document provides the complete MCP endpoint design for exposing codex-lens LSP capabilities through the Model Context Protocol. - -## Related Files -- `src/codexlens/lsp/server.py` - Main LSP server initialization, component management, and capability declaration. -- `src/codexlens/lsp/handlers.py` - Implementation of handlers for core LSP requests (definition, references, completion, hover, workspace symbols). -- `src/codexlens/lsp/providers.py` - Helper classes, specifically `HoverProvider` for generating rich hover information. -- `src/codexlens/storage/global_index.py` - The backing data store (`GlobalSymbolIndex`) that powers most of the symbol lookups. -- `src/codexlens/search/__init__.py` - Exposes the `ChainSearchEngine`, used for advanced reference searching. - -## Summary - -The `codex-lens` LSP implementation exposes five core code navigation and search features: go to definition, find references, code completion, hover information, and workspace symbol search. These features are primarily powered by two components: `GlobalSymbolIndex` for fast, project-wide symbol lookups (used by definition, completion, hover, and workspace symbols) and `ChainSearchEngine` for advanced, relationship-aware reference finding. - -The following MCP tool design externalizes these backend capabilities, allowing a client to leverage the same code intelligence features outside of an LSP context. - -## MCP Tool Group: `code.symbol` - -This group provides tools for searching and retrieving information about code symbols (functions, classes, etc.) within an indexed project. - ---- - -### 1. `code.symbol.search` - -**Description**: Searches for symbols across the entire indexed project, supporting prefix or contains matching. Ideal for implementing workspace symbol searches or providing code completion suggestions. - -**Mapped LSP Features**: `workspace/symbol`, `textDocument/completion` - -**Backend Implementation**: This tool directly maps to the `GlobalSymbolIndex.search` method. -- Reference: `src/codexlens/lsp/handlers.py:302` (in `lsp_workspace_symbol`) -- Reference: `src/codexlens/lsp/handlers.py:256` (in `lsp_completion`) - -**Schema**: -```json -{ - "name": "code.symbol.search", - "description": "Searches for symbols across the entire indexed project, supporting prefix or contains matching. Ideal for implementing workspace symbol searches or providing code completion suggestions.", - "inputSchema": { - "type": "object", - "properties": { - "query": { - "type": "string", - "description": "The symbol name or prefix to search for." - }, - "kind": { - "type": "string", - "description": "Optional: Filter results to only include symbols of a specific kind (e.g., 'function', 'class', 'method').", - "nullable": true - }, - "prefix_mode": { - "type": "boolean", - "description": "If true, treats the query as a prefix (name LIKE 'query%'). If false, performs a contains search (name LIKE '%query%'). Defaults to true.", - "default": true - }, - "limit": { - "type": "integer", - "description": "The maximum number of symbols to return.", - "default": 50 - } - }, - "required": ["query"] - } -} -``` - -**Returns**: -```typescript -Array<{ - name: string; // The name of the symbol - kind: string; // The kind of the symbol (e.g., 'function', 'class') - file_path: string; // The absolute path to the file containing the symbol - range: { - start_line: number; // The 1-based starting line number - end_line: number; // The 1-based ending line number - } -}> -``` - ---- - -### 2. `code.symbol.findDefinition` - -**Description**: Finds the definition location(s) for a symbol with an exact name match. This corresponds to a 'Go to Definition' feature. - -**Mapped LSP Feature**: `textDocument/definition` - -**Backend Implementation**: This tool uses `GlobalSymbolIndex.search` with `prefix_mode=False` and then filters for an exact name match. -- Reference: `src/codexlens/lsp/handlers.py:180` (in `lsp_definition`) - -**Schema**: -```json -{ - "name": "code.symbol.findDefinition", - "description": "Finds the definition location(s) for a symbol with an exact name match. This corresponds to a 'Go to Definition' feature.", - "inputSchema": { - "type": "object", - "properties": { - "symbol_name": { - "type": "string", - "description": "The exact name of the symbol to find." - }, - "kind": { - "type": "string", - "description": "Optional: Disambiguate by providing the symbol kind (e.g., 'function', 'class').", - "nullable": true - } - }, - "required": ["symbol_name"] - } -} -``` - -**Returns**: -```typescript -Array<{ - name: string; // The name of the symbol - kind: string; // The kind of the symbol - file_path: string; // The absolute path to the file - range: { - start_line: number; // The 1-based starting line number - end_line: number; // The 1-based ending line number - } -}> -``` - ---- - -### 3. `code.symbol.findReferences` - -**Description**: Finds all references to a symbol throughout the project. Uses advanced relationship analysis for accuracy where possible, falling back to name-based search. - -**Mapped LSP Feature**: `textDocument/references` - -**Backend Implementation**: This primarily uses `ChainSearchEngine.search_references` for accuracy, which is more powerful than a simple name search. -- Reference: `src/codexlens/lsp/handlers.py:218` (in `lsp_references`) - -**Schema**: -```json -{ - "name": "code.symbol.findReferences", - "description": "Finds all references to a symbol throughout the project. Uses advanced relationship analysis for accuracy where possible.", - "inputSchema": { - "type": "object", - "properties": { - "symbol_name": { - "type": "string", - "description": "The name of the symbol to find references for." - }, - "context_path": { - "type": "string", - "description": "The source path of the current project or workspace root to provide context for the search." - }, - "limit": { - "type": "integer", - "description": "The maximum number of references to return.", - "default": 200 - } - }, - "required": ["symbol_name", "context_path"] - } -} -``` - -**Returns**: -```typescript -Array<{ - file_path: string; // The absolute path to the file containing the reference - line: number; // The 1-based line number of the reference - column: number; // The 0-based starting column of the reference -}> -``` - ---- - -### 4. `code.symbol.getHoverInfo` - -**Description**: Retrieves rich information for a symbol, including its signature and location, suitable for displaying in a hover card. - -**Mapped LSP Feature**: `textDocument/hover` - -**Backend Implementation**: This tool encapsulates the logic from `HoverProvider`, which finds a symbol in `GlobalSymbolIndex` and then reads the source file to extract its signature. -- Reference: `src/codexlens/lsp/handlers.py:285` (instantiates `HoverProvider`) -- Reference: `src/codexlens/lsp/providers.py:53` (in `HoverProvider.get_hover_info`) - -**Schema**: -```json -{ - "name": "code.symbol.getHoverInfo", - "description": "Retrieves rich information for a symbol, including its signature and location, suitable for displaying in a hover card.", - "inputSchema": { - "type": "object", - "properties": { - "symbol_name": { - "type": "string", - "description": "The exact name of the symbol to get hover information for." - } - }, - "required": ["symbol_name"] - } -} -``` - -**Returns**: -```typescript -{ - name: string; // The name of the symbol - kind: string; // The kind of the symbol - signature: string; // The full code signature as extracted from source - file_path: string; // The absolute path to the file - start_line: number; // The 1-based starting line number -} | null // null if symbol not found -``` - ---- - -## Integration with CCW MCP Manager - -The `codex-lens-tools` MCP server should be added to the recommended MCP servers list in `ccw/src/templates/dashboard-js/components/mcp-manager.js`: - -```javascript -{ - id: 'codex-lens-tools', - nameKey: 'mcp.codexLens.name', - descKey: 'mcp.codexLens.desc', - icon: 'search-code', - category: 'code-intelligence', - fields: [ - { - key: 'toolSelection', - labelKey: 'mcp.codexLens.field.tools', - type: 'multi-select', - options: [ - { value: 'symbol.search', label: 'Symbol Search' }, - { value: 'symbol.findDefinition', label: 'Find Definition' }, - { value: 'symbol.findReferences', label: 'Find References' }, - { value: 'symbol.getHoverInfo', label: 'Hover Information' } - ], - default: ['symbol.search', 'symbol.findDefinition', 'symbol.findReferences'], - required: true, - descKey: 'mcp.codexLens.field.tools.desc' - } - ], - buildConfig: (values) => { - const tools = values.toolSelection || []; - const env = { CODEXLENS_ENABLED_TOOLS: tools.join(',') }; - return buildCrossPlatformMcpConfig('npx', ['-y', 'codex-lens-mcp'], { env }); - } -} -``` - -## Tool Naming Convention - -- **Namespace**: `code.*` for code intelligence tools -- **Category**: `symbol` for symbol-related operations -- **Operation**: Descriptive verb (search, findDefinition, findReferences, getHoverInfo) -- **Full Pattern**: `code.symbol.` - -This naming scheme aligns with MCP conventions and is easily extensible for future categories (e.g., `code.types.*`, `code.imports.*`). - -## Future Enhancements - -1. **Document Symbol Tool** (`code.symbol.getDocumentSymbols`) - - Maps LSP `textDocument/documentSymbol` - - Returns all symbols in a specific file - -2. **Type Information** (`code.type.*` group) - - Type definitions and relationships - - Generic resolution - -3. **Relationship Analysis** (`code.relation.*` group) - - Call hierarchy - - Inheritance chains - - Import dependencies - ---- - -Generated: 2026-01-19 -Status: Ready for Implementation diff --git a/codex-lens/docs/MIGRATION_005_SUMMARY.md b/codex-lens/docs/MIGRATION_005_SUMMARY.md deleted file mode 100644 index c73cd06d..00000000 --- a/codex-lens/docs/MIGRATION_005_SUMMARY.md +++ /dev/null @@ -1,220 +0,0 @@ -# Migration 005: Database Schema Cleanup - -## Overview - -Migration 005 removes four unused and redundant database fields identified through Gemini analysis. This cleanup improves database efficiency, reduces schema complexity, and eliminates potential data consistency issues. - -## Schema Version - -- **Previous Version**: 4 -- **New Version**: 5 - -## Changes Summary - -### 1. Removed `semantic_metadata.keywords` Column - -**Reason**: Deprecated - replaced by normalized `file_keywords` table in migration 001. - -**Impact**: -- Keywords are now exclusively read from the normalized `file_keywords` table -- Prevents data sync issues between JSON column and normalized tables -- No data loss - migration 001 already populated `file_keywords` table - -**Modified Code**: -- `get_semantic_metadata()`: Now reads keywords from `file_keywords` JOIN -- `list_semantic_metadata()`: Updated to query `file_keywords` for each result -- `add_semantic_metadata()`: Stopped writing to `keywords` column (only writes to `file_keywords`) - -### 2. Removed `symbols.token_count` Column - -**Reason**: Unused - always NULL, never populated. - -**Impact**: -- No data loss (column was never used) -- Reduces symbols table size -- Simplifies symbol insertion logic - -**Modified Code**: -- `add_file()`: Removed `token_count` from INSERT statements -- `update_file_symbols()`: Removed `token_count` from INSERT statements -- Schema creation: No longer creates `token_count` column - -### 3. Removed `symbols.symbol_type` Column - -**Reason**: Redundant - duplicates `symbols.kind` field. - -**Impact**: -- No data loss (information preserved in `kind` column) -- Reduces symbols table size -- Eliminates redundant data storage - -**Modified Code**: -- `add_file()`: Removed `symbol_type` from INSERT statements -- `update_file_symbols()`: Removed `symbol_type` from INSERT statements -- Schema creation: No longer creates `symbol_type` column -- Removed `idx_symbols_type` index - -### 4. Removed `subdirs.direct_files` Column - -**Reason**: Unused - never displayed or queried in application logic. - -**Impact**: -- No data loss (column was never used) -- Reduces subdirs table size -- Simplifies subdirectory registration - -**Modified Code**: -- `register_subdir()`: Parameter kept for backward compatibility but ignored -- `update_subdir_stats()`: Parameter kept for backward compatibility but ignored -- `get_subdirs()`: No longer retrieves `direct_files` -- `get_subdir()`: No longer retrieves `direct_files` -- `SubdirLink` dataclass: Removed `direct_files` field - -## Migration Process - -### Automatic Migration (v4 → v5) - -When an existing database (version 4) is opened: - -1. **Transaction begins** -2. **Step 1**: Recreate `semantic_metadata` table without `keywords` column - - Data copied from old table (excluding `keywords`) - - Old table dropped, new table renamed -3. **Step 2**: Recreate `symbols` table without `token_count` and `symbol_type` - - Data copied from old table (excluding removed columns) - - Old table dropped, new table renamed - - Indexes recreated (excluding `idx_symbols_type`) -4. **Step 3**: Recreate `subdirs` table without `direct_files` - - Data copied from old table (excluding `direct_files`) - - Old table dropped, new table renamed -5. **Transaction committed** -6. **VACUUM** runs to reclaim space (non-critical, continues if fails) - -### New Database Creation (v5) - -New databases are created directly with the clean schema (no migration needed). - -## Benefits - -1. **Reduced Database Size**: Removed 4 unused columns across 3 tables -2. **Improved Data Consistency**: Single source of truth for keywords (normalized tables) -3. **Simpler Code**: Less maintenance burden for unused fields -4. **Better Performance**: Smaller table sizes, fewer indexes to maintain -5. **Cleaner Schema**: Easier to understand and maintain - -## Backward Compatibility - -### API Compatibility - -All public APIs remain backward compatible: - -- `register_subdir()` and `update_subdir_stats()` still accept `direct_files` parameter (ignored) -- `SubdirLink` dataclass no longer has `direct_files` attribute (breaking change for direct dataclass access) - -### Database Compatibility - -- **v4 databases**: Automatically migrated to v5 on first access -- **v5 databases**: No migration needed -- **Older databases (v0-v3)**: Migrate through chain (v0→v2→v4→v5) - -## Testing - -Comprehensive test suite added: `tests/test_schema_cleanup_migration.py` - -**Test Coverage**: -- ✅ Migration from v4 to v5 -- ✅ New database creation with clean schema -- ✅ Semantic metadata keywords read from normalized table -- ✅ Symbols insert without deprecated fields -- ✅ Subdir operations without `direct_files` - -**Test Results**: All 5 tests passing - -## Verification - -To verify migration success: - -```python -from codexlens.storage.dir_index import DirIndexStore - -store = DirIndexStore("path/to/_index.db") -store.initialize() - -# Check schema version -conn = store._get_connection() -version = conn.execute("PRAGMA user_version").fetchone()[0] -assert version == 5 - -# Check columns removed -cursor = conn.execute("PRAGMA table_info(semantic_metadata)") -columns = {row[1] for row in cursor.fetchall()} -assert "keywords" not in columns - -cursor = conn.execute("PRAGMA table_info(symbols)") -columns = {row[1] for row in cursor.fetchall()} -assert "token_count" not in columns -assert "symbol_type" not in columns - -cursor = conn.execute("PRAGMA table_info(subdirs)") -columns = {row[1] for row in cursor.fetchall()} -assert "direct_files" not in columns - -store.close() -``` - -## Performance Impact - -**Expected Improvements**: -- Database size reduction: ~10-15% (varies by data) -- VACUUM reclaims space immediately after migration -- Slightly faster queries (smaller tables, fewer indexes) - -## Rollback - -Migration 005 is **one-way** (no downgrade function). Removed fields contain: -- `keywords`: Already migrated to normalized tables (migration 001) -- `token_count`: Always NULL (no data) -- `symbol_type`: Duplicate of `kind` (no data loss) -- `direct_files`: Never used (no data) - -If rollback is needed, restore from backup before running migration. - -## Files Modified - -1. **Migration File**: - - `src/codexlens/storage/migrations/migration_005_cleanup_unused_fields.py` (NEW) - -2. **Core Storage**: - - `src/codexlens/storage/dir_index.py`: - - Updated `SCHEMA_VERSION` to 5 - - Added migration 005 to `_apply_migrations()` - - Updated `get_semantic_metadata()` to read from `file_keywords` - - Updated `list_semantic_metadata()` to read from `file_keywords` - - Updated `add_semantic_metadata()` to not write `keywords` column - - Updated `add_file()` to not write `token_count`/`symbol_type` - - Updated `update_file_symbols()` to not write `token_count`/`symbol_type` - - Updated `register_subdir()` to not write `direct_files` - - Updated `update_subdir_stats()` to not write `direct_files` - - Updated `get_subdirs()` to not read `direct_files` - - Updated `get_subdir()` to not read `direct_files` - - Updated `SubdirLink` dataclass to remove `direct_files` - - Updated `_create_schema()` to create v5 schema directly - -3. **Tests**: - - `tests/test_schema_cleanup_migration.py` (NEW) - -## Deployment Checklist - -- [x] Migration script created and tested -- [x] Schema version updated to 5 -- [x] All code updated to use new schema -- [x] Comprehensive tests added -- [x] Existing tests pass -- [x] Documentation updated -- [x] Backward compatibility verified - -## References - -- Original Analysis: Gemini code review identified unused/redundant fields -- Migration Pattern: Follows SQLite best practices (table recreation) -- Previous Migrations: 001 (keywords normalization), 004 (dual FTS) diff --git a/codex-lens/docs/MULTILEVEL_CHUNKER_DESIGN.md b/codex-lens/docs/MULTILEVEL_CHUNKER_DESIGN.md deleted file mode 100644 index 46cee0a1..00000000 --- a/codex-lens/docs/MULTILEVEL_CHUNKER_DESIGN.md +++ /dev/null @@ -1,973 +0,0 @@ -# 多层次分词器设计方案 - -## 1. 背景与目标 - -### 1.1 当前问题 - -当前 `chunker.py` 的两种分词策略存在明显缺陷: - -**symbol-based 策略**: -- ✅ 优点:保持代码逻辑完整性,每个chunk是完整的函数/类 -- ❌ 缺点:粒度不均,超大函数可能达到数百行,影响LLM处理和搜索精度 - -**sliding-window 策略**: -- ✅ 优点:chunk大小均匀,覆盖全面 -- ❌ 缺点:破坏逻辑结构,可能将完整的循环/条件块切断 - -### 1.2 设计目标 - -实现多层次分词器,同时满足: -1. **语义完整性**:保持代码逻辑边界的完整性 -2. **粒度可控**:支持从粗粒度(函数级)到细粒度(逻辑块级)的灵活划分 -3. **层级关系**:保留chunk之间的父子关系,支持上下文检索 -4. **高效索引**:优化向量化和检索性能 - -## 2. 技术架构 - -### 2.1 两层分词架构 - -``` -Source Code - ↓ -[Layer 1: Symbol-Level Chunking] ← 使用 tree-sitter AST - ↓ -MacroChunks (Functions/Classes) - ↓ -[Layer 2: Logic-Block Chunking] ← AST深度遍历 - ↓ -MicroChunks (Loops/Conditionals/Blocks) - ↓ -Vector Embedding + Indexing -``` - -### 2.2 核心组件 - -```python -# 新增数据结构 -@dataclass -class ChunkMetadata: - """Chunk元数据""" - chunk_id: str - parent_id: Optional[str] # 父chunk ID - level: int # 层级:1=macro, 2=micro - chunk_type: str # function/class/loop/conditional/try_except - file_path: str - start_line: int - end_line: int - symbol_name: Optional[str] - context_summary: Optional[str] # 继承自父chunk的上下文 - -@dataclass -class HierarchicalChunk: - """层级化的代码块""" - metadata: ChunkMetadata - content: str - embedding: Optional[List[float]] = None - children: List['HierarchicalChunk'] = field(default_factory=list) -``` - -## 3. 详细实现步骤 - -### 3.1 第一层:符号级分词(Macro-Chunking) - -**实现思路**:复用现有 `code_extractor.py` 逻辑,增强元数据提取。 - -```python -class MacroChunker: - """第一层分词器:提取顶层符号""" - - def __init__(self): - self.parser = Parser() - # 加载语言grammar - - def chunk_by_symbols( - self, - content: str, - file_path: str, - language: str - ) -> List[HierarchicalChunk]: - """提取顶层函数和类定义""" - tree = self.parser.parse(bytes(content, 'utf-8')) - root_node = tree.root_node - - chunks = [] - for node in root_node.children: - if node.type in ['function_definition', 'class_definition', - 'method_definition']: - chunk = self._create_macro_chunk(node, content, file_path) - chunks.append(chunk) - - return chunks - - def _create_macro_chunk( - self, - node, - content: str, - file_path: str - ) -> HierarchicalChunk: - """从AST节点创建macro chunk""" - start_line = node.start_point[0] + 1 - end_line = node.end_point[0] + 1 - - # 提取符号名称 - name_node = node.child_by_field_name('name') - symbol_name = content[name_node.start_byte:name_node.end_byte] - - # 提取完整代码(包含docstring和装饰器) - chunk_content = self._extract_with_context(node, content) - - metadata = ChunkMetadata( - chunk_id=f"{file_path}:{start_line}", - parent_id=None, - level=1, - chunk_type=node.type, - file_path=file_path, - start_line=start_line, - end_line=end_line, - symbol_name=symbol_name, - ) - - return HierarchicalChunk( - metadata=metadata, - content=chunk_content, - ) - - def _extract_with_context(self, node, content: str) -> str: - """提取代码,包含装饰器和docstring""" - # 向上查找装饰器 - start_byte = node.start_byte - prev_sibling = node.prev_sibling - while prev_sibling and prev_sibling.type == 'decorator': - start_byte = prev_sibling.start_byte - prev_sibling = prev_sibling.prev_sibling - - return content[start_byte:node.end_byte] -``` - -### 3.2 第二层:逻辑块分词(Micro-Chunking) - -**实现思路**:在每个macro chunk内部,按逻辑结构进一步划分。 - -```python -class MicroChunker: - """第二层分词器:提取逻辑块""" - - # 需要划分的逻辑块类型 - LOGIC_BLOCK_TYPES = { - 'for_statement', - 'while_statement', - 'if_statement', - 'try_statement', - 'with_statement', - } - - def chunk_logic_blocks( - self, - macro_chunk: HierarchicalChunk, - content: str, - max_lines: int = 50 # 大于此行数的macro chunk才进行二次划分 - ) -> List[HierarchicalChunk]: - """在macro chunk内部提取逻辑块""" - - # 小函数不需要二次划分 - total_lines = macro_chunk.metadata.end_line - macro_chunk.metadata.start_line - if total_lines <= max_lines: - return [] - - tree = self.parser.parse(bytes(macro_chunk.content, 'utf-8')) - root_node = tree.root_node - - micro_chunks = [] - self._traverse_logic_blocks( - root_node, - macro_chunk, - content, - micro_chunks - ) - - return micro_chunks - - def _traverse_logic_blocks( - self, - node, - parent_chunk: HierarchicalChunk, - content: str, - result: List[HierarchicalChunk] - ): - """递归遍历AST,提取逻辑块""" - - if node.type in self.LOGIC_BLOCK_TYPES: - micro_chunk = self._create_micro_chunk( - node, - parent_chunk, - content - ) - result.append(micro_chunk) - parent_chunk.children.append(micro_chunk) - - # 继续遍历子节点 - for child in node.children: - self._traverse_logic_blocks(child, parent_chunk, content, result) - - def _create_micro_chunk( - self, - node, - parent_chunk: HierarchicalChunk, - content: str - ) -> HierarchicalChunk: - """创建micro chunk""" - - # 计算相对于文件的行号 - start_line = parent_chunk.metadata.start_line + node.start_point[0] - end_line = parent_chunk.metadata.start_line + node.end_point[0] - - chunk_content = content[node.start_byte:node.end_byte] - - metadata = ChunkMetadata( - chunk_id=f"{parent_chunk.metadata.chunk_id}:L{start_line}", - parent_id=parent_chunk.metadata.chunk_id, - level=2, - chunk_type=node.type, - file_path=parent_chunk.metadata.file_path, - start_line=start_line, - end_line=end_line, - symbol_name=parent_chunk.metadata.symbol_name, # 继承父符号名 - context_summary=None, # 后续由LLM填充 - ) - - return HierarchicalChunk( - metadata=metadata, - content=chunk_content, - ) -``` - -### 3.3 统一接口:多层次分词器 - -```python -class HierarchicalChunker: - """多层次分词器统一接口""" - - def __init__(self, config: ChunkConfig = None): - self.config = config or ChunkConfig() - self.macro_chunker = MacroChunker() - self.micro_chunker = MicroChunker() - - def chunk_file( - self, - content: str, - file_path: str, - language: str - ) -> List[HierarchicalChunk]: - """对文件进行多层次分词""" - - # 第一层:符号级分词 - macro_chunks = self.macro_chunker.chunk_by_symbols( - content, file_path, language - ) - - # 第二层:逻辑块分词 - all_chunks = [] - for macro_chunk in macro_chunks: - all_chunks.append(macro_chunk) - - # 对大函数进行二次划分 - micro_chunks = self.micro_chunker.chunk_logic_blocks( - macro_chunk, content - ) - all_chunks.extend(micro_chunks) - - return all_chunks - - def chunk_file_with_fallback( - self, - content: str, - file_path: str, - language: str - ) -> List[HierarchicalChunk]: - """带降级策略的分词""" - - try: - return self.chunk_file(content, file_path, language) - except Exception as e: - logger.warning(f"Hierarchical chunking failed: {e}, falling back to sliding window") - # 降级到滑动窗口策略 - return self._fallback_sliding_window(content, file_path, language) -``` - -## 4. 数据存储设计 - -### 4.1 数据库Schema - -```sql --- chunk表:存储所有层级的chunk -CREATE TABLE chunks ( - chunk_id TEXT PRIMARY KEY, - parent_id TEXT, -- 父chunk ID,NULL表示顶层 - level INTEGER NOT NULL, -- 1=macro, 2=micro - chunk_type TEXT NOT NULL, -- function/class/loop/if/try等 - file_path TEXT NOT NULL, - start_line INTEGER NOT NULL, - end_line INTEGER NOT NULL, - symbol_name TEXT, - content TEXT NOT NULL, - content_hash TEXT, -- 用于检测内容变化 - - -- 语义元数据(由LLM生成) - summary TEXT, - keywords TEXT, -- JSON数组 - purpose TEXT, - - -- 向量嵌入 - embedding BLOB, -- 存储向量 - - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - - FOREIGN KEY (parent_id) REFERENCES chunks(chunk_id) ON DELETE CASCADE -); - --- 索引优化 -CREATE INDEX idx_chunks_file_path ON chunks(file_path); -CREATE INDEX idx_chunks_parent_id ON chunks(parent_id); -CREATE INDEX idx_chunks_level ON chunks(level); -CREATE INDEX idx_chunks_symbol_name ON chunks(symbol_name); -``` - -### 4.2 向量索引 - -使用分层索引策略: - -```python -class HierarchicalVectorStore: - """层级化向量存储""" - - def __init__(self, db_path: Path): - self.db_path = db_path - self.conn = sqlite3.connect(db_path) - - def add_chunk(self, chunk: HierarchicalChunk): - """添加chunk及其向量""" - - cursor = self.conn.cursor() - cursor.execute(""" - INSERT INTO chunks ( - chunk_id, parent_id, level, chunk_type, - file_path, start_line, end_line, symbol_name, - content, embedding - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - """, ( - chunk.metadata.chunk_id, - chunk.metadata.parent_id, - chunk.metadata.level, - chunk.metadata.chunk_type, - chunk.metadata.file_path, - chunk.metadata.start_line, - chunk.metadata.end_line, - chunk.metadata.symbol_name, - chunk.content, - self._serialize_embedding(chunk.embedding), - )) - - self.conn.commit() - - def search_hierarchical( - self, - query_embedding: List[float], - top_k: int = 10, - level_weights: Dict[int, float] = None - ) -> List[Tuple[HierarchicalChunk, float]]: - """层级化检索""" - - # 默认权重:macro chunk权重更高 - if level_weights is None: - level_weights = {1: 1.0, 2: 0.8} - - # 检索所有chunk - cursor = self.conn.cursor() - cursor.execute("SELECT * FROM chunks WHERE embedding IS NOT NULL") - - results = [] - for row in cursor.fetchall(): - chunk = self._row_to_chunk(row) - similarity = self._cosine_similarity( - query_embedding, - chunk.embedding - ) - - # 根据层级应用权重 - weighted_score = similarity * level_weights.get(chunk.metadata.level, 1.0) - results.append((chunk, weighted_score)) - - # 按分数排序 - results.sort(key=lambda x: x[1], reverse=True) - return results[:top_k] - - def get_chunk_with_context( - self, - chunk_id: str - ) -> Tuple[HierarchicalChunk, Optional[HierarchicalChunk]]: - """获取chunk及其父chunk(提供上下文)""" - - cursor = self.conn.cursor() - - # 获取chunk本身 - cursor.execute("SELECT * FROM chunks WHERE chunk_id = ?", (chunk_id,)) - chunk_row = cursor.fetchone() - chunk = self._row_to_chunk(chunk_row) - - # 获取父chunk - parent = None - if chunk.metadata.parent_id: - cursor.execute( - "SELECT * FROM chunks WHERE chunk_id = ?", - (chunk.metadata.parent_id,) - ) - parent_row = cursor.fetchone() - if parent_row: - parent = self._row_to_chunk(parent_row) - - return chunk, parent -``` - -## 5. LLM集成策略 - -### 5.1 分层生成语义元数据 - -```python -class HierarchicalLLMEnhancer: - """为层级chunk生成语义元数据""" - - def enhance_hierarchical_chunks( - self, - chunks: List[HierarchicalChunk] - ) -> Dict[str, SemanticMetadata]: - """ - 分层处理策略: - 1. 先处理所有level=1的macro chunks,生成详细摘要 - 2. 再处理level=2的micro chunks,使用父chunk摘要作为上下文 - """ - - results = {} - - # 第一轮:处理macro chunks - macro_chunks = [c for c in chunks if c.metadata.level == 1] - macro_metadata = self.llm_enhancer.enhance_files([ - FileData( - path=c.metadata.chunk_id, - content=c.content, - language=self._detect_language(c.metadata.file_path) - ) - for c in macro_chunks - ]) - results.update(macro_metadata) - - # 第二轮:处理micro chunks(带父上下文) - micro_chunks = [c for c in chunks if c.metadata.level == 2] - for micro_chunk in micro_chunks: - parent_id = micro_chunk.metadata.parent_id - parent_summary = macro_metadata.get(parent_id, {}).get('summary', '') - - # 构建带上下文的prompt - enhanced_prompt = f""" -Parent Function: {micro_chunk.metadata.symbol_name} -Parent Summary: {parent_summary} - -Code Block ({micro_chunk.metadata.chunk_type}): -``` -{micro_chunk.content} -``` - -Generate a concise summary (1 sentence) and keywords for this specific code block. -""" - - metadata = self._call_llm_with_context(enhanced_prompt) - results[micro_chunk.metadata.chunk_id] = metadata - - return results -``` - -### 5.2 Prompt优化 - -针对不同层级使用不同的prompt模板: - -**Macro Chunk Prompt (Level 1)**: -``` -PURPOSE: Generate comprehensive semantic metadata for a complete function/class -TASK: -- Provide a detailed summary (2-3 sentences) covering what the code does and why -- Extract 8-12 relevant keywords including technical terms and domain concepts -- Identify the primary purpose/category -MODE: analysis - -CODE: -```{language} -{content} -``` - -OUTPUT: JSON with summary, keywords, purpose -``` - -**Micro Chunk Prompt (Level 2)**: -``` -PURPOSE: Summarize a specific logic block within a larger function -CONTEXT: -- Parent Function: {symbol_name} -- Parent Purpose: {parent_summary} - -TASK: -- Provide a brief summary (1 sentence) of this specific block's role in the parent function -- Extract 3-5 keywords specific to this block's logic -MODE: analysis - -CODE BLOCK ({chunk_type}): -```{language} -{content} -``` - -OUTPUT: JSON with summary, keywords -``` - -## 6. 检索增强 - -### 6.1 上下文扩展检索 - -```python -class ContextualSearchEngine: - """支持上下文扩展的检索引擎""" - - def search_with_context( - self, - query: str, - top_k: int = 10, - expand_context: bool = True - ) -> List[SearchResult]: - """ - 检索并自动扩展上下文 - - 如果匹配到micro chunk,自动返回其父macro chunk作为上下文 - """ - - # 生成查询向量 - query_embedding = self.embedder.embed_single(query) - - # 层级化检索 - raw_results = self.vector_store.search_hierarchical( - query_embedding, - top_k=top_k - ) - - # 扩展上下文 - enriched_results = [] - for chunk, score in raw_results: - result = SearchResult( - path=chunk.metadata.file_path, - score=score, - content=chunk.content, - start_line=chunk.metadata.start_line, - end_line=chunk.metadata.end_line, - symbol_name=chunk.metadata.symbol_name, - ) - - # 如果是micro chunk,获取父chunk作为上下文 - if expand_context and chunk.metadata.level == 2: - parent_chunk, _ = self.vector_store.get_chunk_with_context( - chunk.metadata.chunk_id - ) - if parent_chunk: - result.metadata['parent_context'] = { - 'summary': parent_chunk.metadata.context_summary, - 'symbol_name': parent_chunk.metadata.symbol_name, - 'content': parent_chunk.content, - } - - enriched_results.append(result) - - return enriched_results -``` - -## 7. 测试策略 - -### 7.1 单元测试 - -```python -import pytest -from codexlens.semantic.hierarchical_chunker import ( - HierarchicalChunker, MacroChunker, MicroChunker -) - -class TestMacroChunker: - """测试第一层分词""" - - def test_extract_functions(self): - """测试提取函数定义""" - code = ''' -def calculate_total(items): - """Calculate total price.""" - total = 0 - for item in items: - total += item.price - return total - -def apply_discount(total, discount): - """Apply discount to total.""" - return total * (1 - discount) -''' - chunker = MacroChunker() - chunks = chunker.chunk_by_symbols(code, 'test.py', 'python') - - assert len(chunks) == 2 - assert chunks[0].metadata.symbol_name == 'calculate_total' - assert chunks[1].metadata.symbol_name == 'apply_discount' - assert chunks[0].metadata.level == 1 - - def test_extract_with_decorators(self): - """测试提取带装饰器的函数""" - code = ''' -@app.route('/api/users') -@auth_required -def get_users(): - return User.query.all() -''' - chunker = MacroChunker() - chunks = chunker.chunk_by_symbols(code, 'test.py', 'python') - - assert len(chunks) == 1 - assert '@app.route' in chunks[0].content - assert '@auth_required' in chunks[0].content - -class TestMicroChunker: - """测试第二层分词""" - - def test_extract_loop_blocks(self): - """测试提取循环块""" - code = ''' -def process_items(items): - results = [] - for item in items: - if item.active: - results.append(process(item)) - return results -''' - macro_chunker = MacroChunker() - macro_chunks = macro_chunker.chunk_by_symbols(code, 'test.py', 'python') - - micro_chunker = MicroChunker() - micro_chunks = micro_chunker.chunk_logic_blocks( - macro_chunks[0], code - ) - - # 应该提取出for循环和if条件块 - assert len(micro_chunks) >= 1 - assert any(c.metadata.chunk_type == 'for_statement' for c in micro_chunks) - - def test_skip_small_functions(self): - """测试小函数跳过二次划分""" - code = ''' -def small_func(x): - return x * 2 -''' - macro_chunker = MacroChunker() - macro_chunks = macro_chunker.chunk_by_symbols(code, 'test.py', 'python') - - micro_chunker = MicroChunker() - micro_chunks = micro_chunker.chunk_logic_blocks( - macro_chunks[0], code, max_lines=10 - ) - - # 小函数不应该被二次划分 - assert len(micro_chunks) == 0 - -class TestHierarchicalChunker: - """测试完整的多层次分词""" - - def test_full_hierarchical_chunking(self): - """测试完整的层级分词流程""" - code = ''' -def complex_function(data): - """A complex function with multiple logic blocks.""" - - # Validation - if not data: - raise ValueError("Data is empty") - - # Processing - results = [] - for item in data: - try: - processed = process_item(item) - results.append(processed) - except Exception as e: - logger.error(f"Failed to process: {e}") - continue - - # Aggregation - total = sum(r.value for r in results) - return total -''' - chunker = HierarchicalChunker() - chunks = chunker.chunk_file(code, 'test.py', 'python') - - # 应该有1个macro chunk和多个micro chunks - macro_chunks = [c for c in chunks if c.metadata.level == 1] - micro_chunks = [c for c in chunks if c.metadata.level == 2] - - assert len(macro_chunks) == 1 - assert len(micro_chunks) > 0 - - # 验证父子关系 - for micro in micro_chunks: - assert micro.metadata.parent_id == macro_chunks[0].metadata.chunk_id -``` - -### 7.2 集成测试 - -```python -class TestHierarchicalIndexing: - """测试完整的索引流程""" - - def test_index_and_search(self): - """测试分层索引和检索""" - - # 1. 分词 - chunker = HierarchicalChunker() - chunks = chunker.chunk_file(sample_code, 'sample.py', 'python') - - # 2. LLM增强 - enhancer = HierarchicalLLMEnhancer() - metadata = enhancer.enhance_hierarchical_chunks(chunks) - - # 3. 向量化 - embedder = Embedder() - for chunk in chunks: - text = metadata[chunk.metadata.chunk_id].summary - chunk.embedding = embedder.embed_single(text) - - # 4. 存储 - vector_store = HierarchicalVectorStore(Path('/tmp/test.db')) - for chunk in chunks: - vector_store.add_chunk(chunk) - - # 5. 检索 - search_engine = ContextualSearchEngine(vector_store, embedder) - results = search_engine.search_with_context( - "find loop that processes items", - top_k=5 - ) - - # 验证结果 - assert len(results) > 0 - assert any(r.metadata.get('parent_context') for r in results) -``` - -## 8. 性能优化 - -### 8.1 批量处理 - -```python -class BatchHierarchicalProcessor: - """批量处理多个文件的层级分词""" - - def process_files_batch( - self, - file_paths: List[Path], - batch_size: int = 10 - ): - """批量处理,优化LLM调用""" - - all_chunks = [] - - # 1. 批量分词 - for file_path in file_paths: - content = file_path.read_text() - chunks = self.chunker.chunk_file( - content, str(file_path), self._detect_language(file_path) - ) - all_chunks.extend(chunks) - - # 2. 批量LLM增强(减少API调用) - macro_chunks = [c for c in all_chunks if c.metadata.level == 1] - for i in range(0, len(macro_chunks), batch_size): - batch = macro_chunks[i:i+batch_size] - self.enhancer.enhance_batch(batch) - - # 3. 批量向量化 - all_texts = [c.content for c in all_chunks] - embeddings = self.embedder.embed_batch(all_texts) - for chunk, embedding in zip(all_chunks, embeddings): - chunk.embedding = embedding - - # 4. 批量存储 - self.vector_store.add_chunks_batch(all_chunks) -``` - -### 8.2 增量更新 - -```python -class IncrementalIndexer: - """增量索引器:只处理变化的文件""" - - def update_file(self, file_path: Path): - """增量更新单个文件""" - - content = file_path.read_text() - content_hash = hashlib.sha256(content.encode()).hexdigest() - - # 检查文件是否变化 - cursor = self.conn.cursor() - cursor.execute(""" - SELECT content_hash FROM chunks - WHERE file_path = ? AND level = 1 - LIMIT 1 - """, (str(file_path),)) - - row = cursor.fetchone() - if row and row[0] == content_hash: - logger.info(f"File {file_path} unchanged, skipping") - return - - # 删除旧chunk - cursor.execute("DELETE FROM chunks WHERE file_path = ?", (str(file_path),)) - - # 重新索引 - chunks = self.chunker.chunk_file(content, str(file_path), 'python') - # ... 继续处理 -``` - -## 9. 潜在问题与解决方案 - -### 9.1 问题:超大函数的micro chunk过多 - -**现象**:某些遗留代码函数超过1000行,可能产生几十个micro chunks。 - -**解决方案**: -```python -class AdaptiveMicroChunker: - """自适应micro分词:根据函数大小调整策略""" - - def chunk_logic_blocks(self, macro_chunk, content): - total_lines = macro_chunk.metadata.end_line - macro_chunk.metadata.start_line - - if total_lines > 500: - # 超大函数:只提取顶层逻辑块,不递归 - return self._extract_top_level_blocks(macro_chunk, content) - elif total_lines > 100: - # 大函数:递归深度限制为2层 - return self._extract_blocks_with_depth_limit(macro_chunk, content, max_depth=2) - else: - # 正常函数:完全跳过micro chunking - return [] -``` - -### 9.2 问题:tree-sitter解析失败 - -**现象**:对于语法错误的代码,tree-sitter解析可能失败。 - -**解决方案**: -```python -def chunk_file_with_fallback(self, content, file_path, language): - """带降级策略的分词""" - - try: - # 尝试层级分词 - return self.chunk_file(content, file_path, language) - except TreeSitterError as e: - logger.warning(f"Tree-sitter parsing failed: {e}") - - # 降级到基于正则的简单symbol提取 - return self._fallback_regex_chunking(content, file_path) - except Exception as e: - logger.error(f"Chunking failed completely: {e}") - - # 最终降级到滑动窗口 - return self._fallback_sliding_window(content, file_path, language) -``` - -### 9.3 问题:向量存储空间占用 - -**现象**:每个chunk都存储向量,空间占用可能很大。 - -**解决方案**: -- **选择性向量化**:只对macro chunks和重要的micro chunks生成向量 -- **向量压缩**:使用PCA或量化技术减少向量维度 -- **分离存储**:向量存储在专门的向量数据库(如Faiss),SQLite只存元数据 - -```python -class SelectiveVectorization: - """选择性向量化:减少存储开销""" - - VECTORIZE_CHUNK_TYPES = { - 'function_definition', # 总是向量化 - 'class_definition', # 总是向量化 - 'for_statement', # 循环块 - 'try_statement', # 异常处理 - # 'if_statement' 通常不单独向量化,依赖父chunk - } - - def should_vectorize(self, chunk: HierarchicalChunk) -> bool: - """判断是否需要为chunk生成向量""" - - # Level 1总是向量化 - if chunk.metadata.level == 1: - return True - - # Level 2根据类型和大小决定 - if chunk.metadata.chunk_type not in self.VECTORIZE_CHUNK_TYPES: - return False - - # 太小的块(<5行)不向量化 - lines = chunk.metadata.end_line - chunk.metadata.start_line - if lines < 5: - return False - - return True -``` - -## 10. 实施路线图 - -### Phase 1: 基础架构(2-3周) -- [x] 设计数据结构(HierarchicalChunk, ChunkMetadata) -- [ ] 实现MacroChunker(复用现有code_extractor) -- [ ] 实现基础的MicroChunker -- [ ] 数据库schema设计和migration -- [ ] 单元测试 - -### Phase 2: LLM集成(1-2周) -- [ ] 实现HierarchicalLLMEnhancer -- [ ] 设计分层prompt模板 -- [ ] 批量处理优化 -- [ ] 集成测试 - -### Phase 3: 向量化与检索(1-2周) -- [ ] 实现HierarchicalVectorStore -- [ ] 实现ContextualSearchEngine -- [ ] 上下文扩展逻辑 -- [ ] 检索性能测试 - -### Phase 4: 优化与完善(2周) -- [ ] 性能优化(批量处理、增量更新) -- [ ] 降级策略完善 -- [ ] 选择性向量化 -- [ ] 全面测试和文档 - -### Phase 5: 生产部署(1周) -- [ ] CLI集成 -- [ ] 配置选项暴露 -- [ ] 生产环境测试 -- [ ] 发布 - -**总计预估时间**:7-10周 - -## 11. 成功指标 - -1. **覆盖率**:95%以上的代码能被正确分词 -2. **准确率**:层级关系准确率>98% -3. **检索质量**:相比单层分词,检索相关性提升30%+ -4. **性能**:单文件分词<100ms,批量处理>100文件/分钟 -5. **存储效率**:相比全向量化,空间占用减少40%+ - -## 12. 参考资料 - -- [Tree-sitter Documentation](https://tree-sitter.github.io/) -- [AST-based Code Analysis](https://en.wikipedia.org/wiki/Abstract_syntax_tree) -- [Hierarchical Text Segmentation](https://arxiv.org/abs/2104.08836) -- 现有代码:`src/codexlens/semantic/chunker.py` diff --git a/codex-lens/docs/PURE_VECTOR_SEARCH_GUIDE.md b/codex-lens/docs/PURE_VECTOR_SEARCH_GUIDE.md deleted file mode 100644 index e4c54f78..00000000 --- a/codex-lens/docs/PURE_VECTOR_SEARCH_GUIDE.md +++ /dev/null @@ -1,417 +0,0 @@ -# Pure Vector Search 使用指南 - -## 概述 - -CodexLens 现在支持纯向量语义搜索!这是一个重要的新功能,允许您使用自然语言查询代码。 - -### 新增搜索模式 - -| 模式 | 描述 | 最佳用途 | 需要嵌入 | -|------|------|----------|---------| -| `exact` | 精确FTS匹配 | 代码标识符搜索 | ✗ | -| `fuzzy` | 模糊FTS匹配 | 容错搜索 | ✗ | -| `vector` | 向量 + FTS后备 | 语义 + 关键词混合 | ✓ | -| **`pure-vector`** | **纯向量搜索** | **纯自然语言查询** | **✓** | -| `hybrid` | 全部融合(RRF) | 最佳召回率 | ✓ | - -### 关键变化 - -**之前**: -```bash -# "vector"模式实际上总是包含exact FTS搜索 -codexlens search "authentication" --mode vector -# 即使没有嵌入,也会返回FTS结果 -``` - -**现在**: -```bash -# "vector"模式仍保持向量+FTS混合(向后兼容) -codexlens search "authentication" --mode vector - -# 新的"pure-vector"模式:仅使用向量搜索 -codexlens search "how to authenticate users" --mode pure-vector -# 没有嵌入时返回空列表(明确行为) -``` - -## 快速开始 - -### 步骤1:安装语义搜索依赖 - -```bash -# 方式1:使用可选依赖 -pip install codexlens[semantic] - -# 方式2:手动安装 -pip install fastembed numpy -``` - -### 步骤2:创建索引(如果还没有) - -```bash -# 为项目创建索引 -codexlens init ~/projects/your-project -``` - -### 步骤3:生成向量嵌入 - -```bash -# 为项目生成嵌入(自动查找索引) -codexlens embeddings-generate ~/projects/your-project - -# 为特定索引生成嵌入 -codexlens embeddings-generate ~/.codexlens/indexes/your-project/_index.db - -# 使用特定模型 -codexlens embeddings-generate ~/projects/your-project --model fast - -# 强制重新生成 -codexlens embeddings-generate ~/projects/your-project --force - -# 检查嵌入状态 -codexlens embeddings-status # 检查所有索引 -codexlens embeddings-status ~/projects/your-project # 检查特定项目 -``` - -**可用模型**: -- `fast`: BAAI/bge-small-en-v1.5 (384维, ~80MB) - 快速,轻量级 -- `code`: jinaai/jina-embeddings-v2-base-code (768维, ~150MB) - **代码优化**(推荐,默认) -- `multilingual`: intfloat/multilingual-e5-large (1024维, ~1GB) - 多语言 -- `balanced`: mixedbread-ai/mxbai-embed-large-v1 (1024维, ~600MB) - 高精度 - -### 步骤4:使用纯向量搜索 - -```bash -# 纯向量搜索(自然语言) -codexlens search "how to verify user credentials" --mode pure-vector - -# 向量搜索(带FTS后备) -codexlens search "authentication logic" --mode vector - -# 混合搜索(最佳效果) -codexlens search "user login" --mode hybrid - -# 精确代码搜索 -codexlens search "authenticate_user" --mode exact -``` - -## 使用场景 - -### 场景1:查找实现特定功能的代码 - -**问题**:"我如何在这个项目中处理用户身份验证?" - -```bash -codexlens search "verify user credentials and authenticate" --mode pure-vector -``` - -**优势**:理解查询意图,找到语义相关的代码,而不仅仅是关键词匹配。 - -### 场景2:查找类似的代码模式 - -**问题**:"项目中哪些地方使用了密码哈希?" - -```bash -codexlens search "password hashing with salt" --mode pure-vector -``` - -**优势**:找到即使没有包含"hash"或"password"关键词的相关代码。 - -### 场景3:探索性搜索 - -**问题**:"如何在这个项目中连接数据库?" - -```bash -codexlens search "database connection and initialization" --mode pure-vector -``` - -**优势**:发现相关代码,即使使用了不同的术语(如"DB"、"connection pool"、"session")。 - -### 场景4:混合搜索获得最佳效果 - -**问题**:既要关键词匹配,又要语义理解 - -```bash -# 最佳实践:使用hybrid模式 -codexlens search "authentication" --mode hybrid -``` - -**优势**:结合FTS的精确性和向量搜索的语义理解。 - -## 故障排除 - -### 问题1:纯向量搜索返回空结果 - -**原因**:未生成向量嵌入 - -**解决方案**: -```bash -# 检查嵌入状态 -codexlens embeddings-status ~/projects/your-project - -# 生成嵌入 -codexlens embeddings-generate ~/projects/your-project - -# 或者对特定索引 -codexlens embeddings-generate ~/.codexlens/indexes/your-project/_index.db -``` - -### 问题2:ImportError: fastembed not found - -**原因**:未安装语义搜索依赖 - -**解决方案**: -```bash -pip install codexlens[semantic] -``` - -### 问题3:嵌入生成失败 - -**原因**:模型下载失败或磁盘空间不足 - -**解决方案**: -```bash -# 使用更小的模型 -codexlens embeddings-generate ~/projects/your-project --model fast - -# 检查磁盘空间(模型需要~100MB) -df -h ~/.cache/fastembed -``` - -### 问题4:搜索速度慢 - -**原因**:向量搜索比FTS慢(需要计算余弦相似度) - -**优化**: -- 使用`--limit`限制结果数量 -- 考虑使用`vector`模式(带FTS后备)而不是`pure-vector` -- 对于精确标识符搜索,使用`exact`模式 - -## 性能对比 - -基于测试数据(100个文件,~500个代码块): - -| 模式 | 平均延迟 | 召回率 | 精确率 | -|------|---------|--------|--------| -| exact | 5.6ms | 中 | 高 | -| fuzzy | 7.7ms | 高 | 中 | -| vector | 7.4ms | 高 | 中 | -| **pure-vector** | **7.0ms** | **最高** | **中** | -| hybrid | 9.0ms | 最高 | 高 | - -**结论**: -- `exact`: 最快,适合代码标识符 -- `pure-vector`: 与vector类似速度,更明确的语义搜索 -- `hybrid`: 轻微开销,但召回率和精确率最佳 - -## 最佳实践 - -### 1. 选择合适的搜索模式 - -```bash -# 查找函数名/类名/变量名 → exact -codexlens search "UserAuthentication" --mode exact - -# 自然语言问题 → pure-vector -codexlens search "how to hash passwords securely" --mode pure-vector - -# 不确定用哪个 → hybrid -codexlens search "password security" --mode hybrid -``` - -### 2. 优化查询 - -**不好的查询**(对向量搜索): -```bash -codexlens search "auth" --mode pure-vector # 太模糊 -``` - -**好的查询**: -```bash -codexlens search "authenticate user with username and password" --mode pure-vector -``` - -**原则**: -- 使用完整句子描述意图 -- 包含关键动词和名词 -- 避免过于简短或模糊的查询 - -### 3. 定期更新嵌入 - -```bash -# 当代码更新后,重新生成嵌入 -codexlens embeddings-generate ~/projects/your-project --force -``` - -### 4. 监控嵌入存储空间 - -```bash -# 检查嵌入数据大小 -du -sh ~/.codexlens/indexes/*/ - -# 嵌入通常占用索引大小的2-3倍 -# 100个文件 → ~500个chunks → ~1.5MB (768维向量) -``` - -## API 使用示例 - -### Python API - -```python -from pathlib import Path -from codexlens.search.hybrid_search import HybridSearchEngine - -# 初始化引擎 -engine = HybridSearchEngine() - -# 纯向量搜索 -results = engine.search( - index_path=Path("~/.codexlens/indexes/project/_index.db"), - query="how to authenticate users", - limit=10, - enable_vector=True, - pure_vector=True, # 纯向量模式 -) - -for result in results: - print(f"{result.path}: {result.score:.3f}") - print(f" {result.excerpt}") - -# 向量搜索(带FTS后备) -results = engine.search( - index_path=Path("~/.codexlens/indexes/project/_index.db"), - query="authentication", - limit=10, - enable_vector=True, - pure_vector=False, # 允许FTS后备 -) -``` - -### 链式搜索API - -```python -from codexlens.search.chain_search import ChainSearchEngine, SearchOptions -from codexlens.storage.registry import RegistryStore -from codexlens.storage.path_mapper import PathMapper - -# 初始化 -registry = RegistryStore() -registry.initialize() -mapper = PathMapper() -engine = ChainSearchEngine(registry, mapper) - -# 配置搜索选项 -options = SearchOptions( - depth=-1, # 无限深度 - total_limit=20, - hybrid_mode=True, - enable_vector=True, - pure_vector=True, # 纯向量搜索 -) - -# 执行搜索 -result = engine.search( - query="verify user credentials", - source_path=Path("~/projects/my-app"), - options=options -) - -print(f"Found {len(result.results)} results in {result.stats.time_ms:.1f}ms") -``` - -## 技术细节 - -### 向量存储架构 - -``` -_index.db (SQLite) -├── files # 文件索引表 -├── files_fts # FTS5全文索引 -├── files_fts_fuzzy # 模糊搜索索引 -└── semantic_chunks # 向量嵌入表 ✓ 新增 - ├── id - ├── file_path - ├── content # 代码块内容 - ├── embedding # 向量嵌入(BLOB, float32) - ├── metadata # JSON元数据 - └── created_at -``` - -### 向量搜索流程 - -``` -1. 查询嵌入化 - └─ query → Embedder → query_embedding (768维向量) - -2. 相似度计算 - └─ VectorStore.search_similar() - ├─ 加载embedding matrix到内存 - ├─ NumPy向量化余弦相似度计算 - └─ Top-K选择 - -3. 结果返回 - └─ SearchResult对象列表 - ├─ path: 文件路径 - ├─ score: 相似度分数 - ├─ excerpt: 代码片段 - └─ metadata: 元数据 -``` - -### RRF融合算法 - -混合模式使用Reciprocal Rank Fusion (RRF): - -```python -# 默认权重 -weights = { - "exact": 0.4, # 40% 精确FTS - "fuzzy": 0.3, # 30% 模糊FTS - "vector": 0.3, # 30% 向量搜索 -} - -# RRF公式 -score(doc) = Σ weight[source] / (k + rank[source]) -k = 60 # RRF常数 -``` - -## 未来改进 - -- [ ] 增量嵌入更新(当前需要完全重新生成) -- [ ] 混合分块策略(symbol-based + sliding window) -- [ ] FAISS加速(100x+速度提升) -- [ ] 向量压缩(减少50%存储空间) -- [ ] 查询扩展(同义词、相关术语) -- [ ] 多模态搜索(代码 + 文档 + 注释) - -## 相关资源 - -- **实现文件**: - - `codexlens/search/hybrid_search.py` - 混合搜索引擎 - - `codexlens/semantic/embedder.py` - 嵌入生成 - - `codexlens/semantic/vector_store.py` - 向量存储 - - `codexlens/semantic/chunker.py` - 代码分块 - -- **测试文件**: - - `tests/test_pure_vector_search.py` - 纯向量搜索测试 - - `tests/test_search_comparison.py` - 搜索模式对比 - -- **文档**: - - `SEARCH_COMPARISON_ANALYSIS.md` - 详细技术分析 - - `SEARCH_ANALYSIS_SUMMARY.md` - 快速总结 - -## 反馈和贡献 - -如果您发现问题或有改进建议,请提交issue或PR: -- GitHub: https://github.com/your-org/codexlens - -## 更新日志 - -### v0.5.0 (2025-12-16) -- ✨ 新增 `pure-vector` 搜索模式 -- ✨ 添加向量嵌入生成脚本 -- 🔧 修复"vector"模式总是包含exact FTS的问题 -- 📚 更新文档和使用指南 -- ✅ 添加纯向量搜索测试套件 - ---- - -**问题?** 查看 [故障排除](#故障排除) 章节或提交issue。 diff --git a/codex-lens/docs/REAL_LSP_SERVER_PLAN.md b/codex-lens/docs/REAL_LSP_SERVER_PLAN.md deleted file mode 100644 index 015eb926..00000000 --- a/codex-lens/docs/REAL_LSP_SERVER_PLAN.md +++ /dev/null @@ -1,825 +0,0 @@ -# CodexLens Real LSP Server Implementation Plan - -> **Version**: 2.0 -> **Status**: Ready for Implementation -> **Based on**: Existing LSP_INTEGRATION_PLAN.md + Real Language Server Integration -> **Goal**: Implement true LSP server functionality (like cclsp), not pre-indexed search - ---- - -## Executive Summary - -### Current State vs Target State - -| Aspect | Current (Pre-indexed) | Target (Real LSP) | -|--------|----------------------|-------------------| -| **Data Source** | Cached database index | Live language servers | -| **Freshness** | Stale (depends on re-index) | Real-time (LSP protocol) | -| **Accuracy** | Good for indexed content | Perfect (from language server) | -| **Latency** | <50ms (database) | ~50-200ms (LSP) | -| **Language Support** | Limited to parsed symbols | Full LSP support (all languages) | -| **Complexity** | Simple (DB queries) | High (LSP protocol + server mgmt) | - -### Why Real LSP vs Index-Based - -**Problem with current approach**: -- 符号搜索与smart_search没有本质区别 -- 依赖预索引数据,不能实时反映代码变化 -- 不支持advanced LSP功能(rename, code actions等) - -**Advantages of real LSP**: -- ✅ Real-time code intelligence -- ✅ Supported by all major IDEs (VSCode, Neovim, Sublime, etc.) -- ✅ Standard protocol (Language Server Protocol) -- ✅ Advanced features: rename, code actions, formatting -- ✅ Language-agnostic (TypeScript, Python, Go, Rust, Java, etc.) - ---- - -## Architecture Design - -### System Architecture - -``` -┌─────────────────────────────────────────────────────────┐ -│ Client Layer │ -│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ -│ │ VS Code │ │ Neovim │ │ Sublime │ │ -│ │ (LSP Client) │ │ (LSP Client) │ │ (LSP Client) │ │ -│ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ │ -│ │ │ │ │ -└─────────┼─────────────────┼─────────────────┼───────────┘ - │ LSP Protocol │ │ - │ (JSON-RPC/stdio)│ │ -┌─────────▼─────────────────▼─────────────────▼───────────┐ -│ CodexLens LSP Server Bridge │ -│ ┌─────────────────────────────────────────────────────┐ │ -│ │ LSP Protocol Handler (pygls) │ │ -│ │ • initialize / shutdown │ │ -│ │ • textDocument/definition │ │ -│ │ • textDocument/references │ │ -│ │ • textDocument/hover │ │ -│ │ • textDocument/completion │ │ -│ │ • textDocument/formatting │ │ -│ │ • workspace/symbol │ │ -│ └────────────────────┬────────────────────────────────┘ │ -│ │ │ -│ ┌────────────────────▼────────────────────────────────┐ │ -│ │ Language Server Multiplexer │ │ -│ │ • File type routing (ts→tsserver, py→pylsp, etc.) │ │ -│ │ • Multi-server management │ │ -│ │ • Request forwarding & response formatting │ │ -│ └────────────────────┬────────────────────────────────┘ │ -│ │ │ -│ ┌────────────────────▼────────────────────────────────┐ │ -│ │ Language Servers (Spawned) │ │ -│ │ ┌────────┐ ┌────────┐ ┌────────┐ ┌────────┐ │ │ -│ │ │tsserver│ │ pylsp │ │ gopls │ │rust- │ │ │ -│ │ │ │ │ │ │ │ │analyzer│ │ │ -│ │ └────────┘ └────────┘ └────────┘ └────────┘ │ │ -│ └─────────────────────────────────────────────────────┘ │ -│ │ -│ ┌─────────────────────────────────────────────────────┐ │ -│ │ Codex-Lens Core (Optional - MCP Layer) │ │ -│ │ • Semantic search │ │ -│ │ • Custom MCP tools (enrich_prompt, etc.) │ │ -│ │ • Hook system (pre-tool, post-tool) │ │ -│ └─────────────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────┘ -``` - -### Key Differences from Index-Based Approach - -1. **Request Flow** - - Index: Query → Database → Results - - LSP: Request → Route to LS → LS processes live code → Results - -2. **Configuration** - - Index: Depends on indexing state - - LSP: Depends on installed language servers - -3. **Latency Profile** - - Index: Consistent (~50ms) - - LSP: Variable (50-500ms depending on LS performance) - ---- - -## Implementation Phases - -### Phase 1: LSP Server Bridge (Foundation) - -**Duration**: ~3-5 days -**Complexity**: Medium -**Dependencies**: pygls library - -#### 1.1 Setup & Dependencies - -**File**: `pyproject.toml` - -```toml -[project.optional-dependencies] -lsp = [ - "pygls>=1.3.0", - "lsprotocol>=2023.0.0", -] - -[project.scripts] -codexlens-lsp = "codexlens.lsp.server:main" -``` - -**Installation**: -```bash -pip install -e ".[lsp]" -``` - -#### 1.2 LSP Server Core - -**Files to create**: -1. `src/codexlens/lsp/__init__.py` - Package init -2. `src/codexlens/lsp/server.py` - Server entry point -3. `src/codexlens/lsp/multiplexer.py` - LS routing & management -4. `src/codexlens/lsp/handlers.py` - LSP request handlers - -**Key responsibilities**: -- Initialize LSP server via pygls -- Handle client capabilities negotiation -- Route requests to appropriate language servers -- Format language server responses to LSP format - -#### 1.3 Acceptance Criteria - -- [ ] Server starts with `codexlens-lsp --stdio` -- [ ] Responds to `initialize` request -- [ ] Spawns language servers on demand -- [ ] Handles `shutdown` cleanly -- [ ] No crashes on malformed requests - ---- - -### Phase 2: Language Server Multiplexer - -**Duration**: ~5-7 days -**Complexity**: High -**Dependencies**: Phase 1 complete - -#### 2.1 Multi-Server Management - -**File**: `src/codexlens/lsp/multiplexer.py` - -**Responsibilities**: -- Spawn language servers based on file extension -- Maintain server process lifecycle -- Route requests by document type -- Handle server crashes & restarts - -**Supported Language Servers**: - -| Language | Server | Installation | -|----------|--------|--------------| -| TypeScript | `typescript-language-server` | `npm i -g typescript-language-server` | -| Python | `pylsp` | `pip install python-lsp-server` | -| Go | `gopls` | `go install golang.org/x/tools/gopls@latest` | -| Rust | `rust-analyzer` | `rustup component add rust-analyzer` | -| Java | `jdtls` | Download JDTLS | -| C/C++ | `clangd` | `apt install clangd` | - -#### 2.2 Configuration - -**File**: `codexlens-lsp.json` (user config) - -```json -{ - "languageServers": { - "typescript": { - "command": ["typescript-language-server", "--stdio"], - "extensions": ["ts", "tsx", "js", "jsx"], - "rootDir": "." - }, - "python": { - "command": ["pylsp"], - "extensions": ["py", "pyi"], - "rootDir": ".", - "settings": { - "pylsp": { - "plugins": { - "pycodestyle": { "enabled": true }, - "pylint": { "enabled": false } - } - } - } - }, - "go": { - "command": ["gopls"], - "extensions": ["go"], - "rootDir": "." - }, - "rust": { - "command": ["rust-analyzer"], - "extensions": ["rs"], - "rootDir": "." - } - }, - "debug": false, - "logLevel": "info" -} -``` - -#### 2.3 Acceptance Criteria - -- [ ] Routes requests to correct LS based on file type -- [ ] Spawns servers on first request -- [ ] Reuses existing server instances -- [ ] Handles server restarts on crash -- [ ] Respects initialization options from config - ---- - -### Phase 3: Core LSP Handlers - -**Duration**: ~5-7 days -**Complexity**: Medium -**Dependencies**: Phase 1-2 complete - -#### 3.1 Essential Handlers - -Implement LSP request handlers for core functionality: - -**Handler Mapping**: - -```python -Handlers = { - # Navigation - "textDocument/definition": handle_definition, - "textDocument/references": handle_references, - "textDocument/declaration": handle_declaration, - - # Hover & Info - "textDocument/hover": handle_hover, - "textDocument/signatureHelp": handle_signature_help, - - # Completion - "textDocument/completion": handle_completion, - "completionItem/resolve": handle_completion_resolve, - - # Symbols - "textDocument/documentSymbol": handle_document_symbols, - "workspace/symbol": handle_workspace_symbols, - - # Editing - "textDocument/formatting": handle_formatting, - "textDocument/rangeFormatting": handle_range_formatting, - "textDocument/rename": handle_rename, - - # Diagnostics - "textDocument/publishDiagnostics": handle_publish_diagnostics, - - # Misc - "textDocument/codeAction": handle_code_action, - "textDocument/codeLens": handle_code_lens, -} -``` - -#### 3.2 Request Forwarding Logic - -```python -def forward_request_to_lsp(handler_name, params): - """Forward request to appropriate language server.""" - - # Extract document info - document_uri = params.get("textDocument", {}).get("uri") - file_ext = extract_extension(document_uri) - - # Get language server - ls = multiplexer.get_server(file_ext) - if not ls: - return {"error": f"No LS for {file_ext}"} - - # Convert position (1-based → 0-based) - normalized_params = normalize_positions(params) - - # Forward to LS - response = ls.send_request(handler_name, normalized_params) - - # Convert response format - return normalize_response(response) -``` - -#### 3.3 Acceptance Criteria - -- [ ] All handlers implemented and tested -- [ ] Proper position coordinate conversion (LSP is 0-based, user-facing is 1-based) -- [ ] Error handling for missing language servers -- [ ] Response formatting matches LSP spec -- [ ] Latency < 500ms for 95th percentile - ---- - -### Phase 4: Advanced Features - -**Duration**: ~3-5 days -**Complexity**: Medium -**Dependencies**: Phase 1-3 complete - -#### 4.1 Position Tolerance (cclsp-like feature) - -Some LSP clients (like Claude Code with fuzzy positions) may send imprecise positions. Implement retry logic: - -```python -def find_symbol_with_tolerance(ls, uri, position, max_attempts=5): - """Try multiple position offsets if exact position fails.""" - - positions_to_try = [ - position, # Original - (position.line - 1, position.char), # One line up - (position.line + 1, position.char), # One line down - (position.line, max(0, position.char - 1)), # One char left - (position.line, position.char + 1), # One char right - ] - - for pos in positions_to_try: - try: - result = ls.send_request("textDocument/definition", { - "textDocument": {"uri": uri}, - "position": pos - }) - if result: - return result - except: - continue - - return None -``` - -#### 4.2 MCP Integration (Optional) - -Extend with MCP provider for Claude Code hooks: - -```python -class MCPBridgeHandler: - """Bridge LSP results into MCP context.""" - - def build_mcp_context_from_lsp(self, symbol_name, lsp_results): - """Convert LSP responses to MCP context.""" - # Implementation - pass -``` - -#### 4.3 Acceptance Criteria - -- [ ] Position tolerance working (≥3 positions tried) -- [ ] MCP context generation functional -- [ ] Hook system integration complete -- [ ] All test coverage > 80% - ---- - -### Phase 5: Deployment & Documentation - -**Duration**: ~2-3 days -**Complexity**: Low -**Dependencies**: Phase 1-4 complete - -#### 5.1 Installation & Setup Guide - -Create comprehensive documentation: -- Installation instructions for each supported language -- Configuration guide -- Troubleshooting -- Performance tuning - -#### 5.2 CLI Tools - -```bash -# Start LSP server -codexlens-lsp --stdio - -# Check configured language servers -codexlens-lsp --list-servers - -# Validate configuration -codexlens-lsp --validate-config - -# Show logs -codexlens-lsp --log-level debug -``` - -#### 5.3 Acceptance Criteria - -- [ ] Documentation complete with examples -- [ ] All CLI commands working -- [ ] Integration tested with VS Code, Neovim -- [ ] Performance benchmarks documented - ---- - -## Module Structure - -``` -src/codexlens/lsp/ -├── __init__.py # Package exports -├── server.py # LSP server entry point -├── multiplexer.py # Language server manager -├── handlers.py # LSP request handlers -├── position_utils.py # Coordinate conversion utilities -├── process_manager.py # Language server process lifecycle -├── response_formatter.py # LSP response formatting -└── config.py # Configuration loading - -tests/lsp/ -├── test_multiplexer.py # LS routing tests -├── test_handlers.py # Handler tests -├── test_position_conversion.py # Coordinate tests -├── test_integration.py # Full LSP handshake -└── fixtures/ - ├── sample_python.py # Test files - └── sample_typescript.ts -``` - ---- - -## Dependency Graph - -``` -Phase 5 (Deployment) - ↑ -Phase 4 (Advanced Features) - ↑ -Phase 3 (Core Handlers) - ├─ Depends on: Phase 2 - ├─ Depends on: Phase 1 - └─ Deliverable: Full LSP functionality - -Phase 2 (Multiplexer) - ├─ Depends on: Phase 1 - └─ Deliverable: Multi-server routing - -Phase 1 (Server Bridge) - └─ Deliverable: Basic LSP server -``` - ---- - -## Technology Stack - -| Component | Technology | Rationale | -|-----------|-----------|-----------| -| LSP Implementation | `pygls` | Mature, well-maintained | -| Protocol | LSP 3.17+ | Latest stable version | -| Process Management | `subprocess` + `psutil` | Standard Python, no external deps | -| Configuration | JSON | Simple, widely understood | -| Logging | `logging` module | Built-in, standard | -| Testing | `pytest` + `pytest-asyncio` | Industry standard | - ---- - -## Risk Assessment - -| Risk | Probability | Impact | Mitigation | -|------|-------------|--------|------------| -| Language server crashes | Medium | High | Auto-restart with exponential backoff | -| Configuration errors | Medium | Medium | Validation on startup | -| Performance degradation | Low | High | Implement caching + benchmarks | -| Position mismatch issues | Medium | Low | Tolerance layer (try multiple positions) | -| Memory leaks (long sessions) | Low | Medium | Connection pooling + cleanup timers | - ---- - -## Success Metrics - -1. **Functionality**: All 7 core LSP handlers working -2. **Performance**: p95 latency < 500ms for typical requests -3. **Reliability**: 99.9% uptime in production -4. **Coverage**: >80% code coverage -5. **Documentation**: Complete with examples -6. **Multi-language**: Support for 5+ languages - ---- - -## Comparison: This Approach vs Alternatives - -### Option A: Real LSP Server (This Plan) ✅ RECOMMENDED -**Pros**: -- ✅ True real-time code intelligence -- ✅ Supports all LSP clients (VSCode, Neovim, Sublime, Emacs, etc.) -- ✅ Advanced features (rename, code actions, formatting) -- ✅ Language-agnostic -- ✅ Follows industry standard protocol - -**Cons**: -- ❌ More complex implementation -- ❌ Depends on external language servers -- ❌ Higher latency than index-based - -**Effort**: ~20-25 days - ---- - -### Option B: Enhanced Index-Based (Current Approach) -**Pros**: -- ✅ Simple implementation -- ✅ Fast (<50ms) -- ✅ No external dependencies - -**Cons**: -- ❌ Same as smart_search (user's concern) -- ❌ Stale data between re-indexes -- ❌ Limited to indexed symbols -- ❌ No advanced LSP features - -**Effort**: ~5-10 days - ---- - -### Option C: Hybrid (LSP + Index) -**Pros**: -- ✅ Real-time from LSP -- ✅ Fallback to index -- ✅ Best of both worlds - -**Cons**: -- ❌ Highest complexity -- ❌ Difficult to debug conflicts -- ❌ Higher maintenance burden - -**Effort**: ~30-35 days - ---- - -## Next Steps - -1. **Approve Plan**: Confirm this approach matches requirements -2. **Setup Dev Environment**: Install language servers -3. **Phase 1 Implementation**: Start with server bridge -4. **Iterative Testing**: Test each phase with real IDE integration -5. **Documentation**: Maintain docs as implementation progresses - ---- - ---- - -## Appendix A: VSCode Bridge Implementation - -### A.1 Overview - -VSCode Bridge 是另一种集成方式,通过VSCode扩展暴露其内置LSP功能给外部工具(如CCW MCP Server)。 - -**Architecture**: - -``` -┌─────────────────────────────────────────────────────────────────┐ -│ Claude Code / CCW │ -│ (MCP Client / CLI) │ -└───────────────────────────┬─────────────────────────────────────┘ - │ - │ MCP Tool Call (vscode_lsp) - │ -┌───────────────────────────▼─────────────────────────────────────┐ -│ CCW MCP Server │ -│ ┌─────────────────────────────────────────────────────────────┐ │ -│ │ vscode_lsp Tool │ │ -│ │ • HTTP client to VSCode Bridge │ │ -│ │ • Parameter validation (Zod) │ │ -│ │ • Response formatting │ │ -│ └────────────────────────┬────────────────────────────────────┘ │ -└───────────────────────────┼─────────────────────────────────────┘ - │ - │ HTTP POST (localhost:3457) - │ -┌───────────────────────────▼─────────────────────────────────────┐ -│ ccw-vscode-bridge Extension │ -│ ┌─────────────────────────────────────────────────────────────┐ │ -│ │ HTTP Server (port 3457) │ │ -│ │ Endpoints: │ │ -│ │ • POST /get_definition │ │ -│ │ • POST /get_references │ │ -│ │ • POST /get_hover │ │ -│ │ • POST /get_document_symbols │ │ -│ └────────────────────────┬────────────────────────────────────┘ │ -│ │ │ -│ ┌────────────────────────▼────────────────────────────────────┐ │ -│ │ VSCode API Calls │ │ -│ │ vscode.commands.executeCommand(): │ │ -│ │ • vscode.executeDefinitionProvider │ │ -│ │ • vscode.executeReferenceProvider │ │ -│ │ • vscode.executeHoverProvider │ │ -│ │ • vscode.executeDocumentSymbolProvider │ │ -│ └─────────────────────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────┘ - │ - │ VSCode LSP Integration - │ -┌───────────────────────────▼─────────────────────────────────────┐ -│ VSCode Language Services │ -│ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ -│ │TypeScript│ │ Python │ │ Go │ │ Rust │ │ -│ │ Server │ │ Server │ │ (gopls) │ │Analyzer │ │ -│ └─────────┘ └─────────┘ └─────────┘ └─────────┘ │ -└─────────────────────────────────────────────────────────────────┘ -``` - -### A.2 Component Files - -**已创建的文件**: - -1. `ccw-vscode-bridge/package.json` - VSCode扩展配置 -2. `ccw-vscode-bridge/tsconfig.json` - TypeScript配置 -3. `ccw-vscode-bridge/src/extension.ts` - 扩展主代码 -4. `ccw-vscode-bridge/.vscodeignore` - 打包排除文件 -5. `ccw-vscode-bridge/README.md` - 使用文档 - -**待创建的文件**: - -1. `ccw/src/tools/vscode-lsp.ts` - MCP工具实现 -2. `ccw/src/tools/index.ts` - 注册新工具 - -### A.3 VSCode Bridge Extension Implementation - -**File**: `ccw-vscode-bridge/src/extension.ts` - -```typescript -// 核心功能: -// 1. 启动HTTP服务器监听3457端口 -// 2. 接收POST请求,解析JSON body -// 3. 调用VSCode内置LSP命令 -// 4. 返回JSON结果 - -// HTTP Endpoints: -// POST /get_definition → vscode.executeDefinitionProvider -// POST /get_references → vscode.executeReferenceProvider -// POST /get_hover → vscode.executeHoverProvider -// POST /get_document_symbols → vscode.executeDocumentSymbolProvider -``` - -### A.4 MCP Tool Implementation - -**File**: `ccw/src/tools/vscode-lsp.ts` - -```typescript -/** - * MCP tool that communicates with VSCode Bridge extension. - * - * Actions: - * - get_definition: Find symbol definition - * - get_references: Find all references - * - get_hover: Get hover information - * - get_document_symbols: List symbols in file - * - * Required: - * - ccw-vscode-bridge extension running in VSCode - * - File must be open in VSCode for accurate results - */ - -const schema: ToolSchema = { - name: 'vscode_lsp', - description: `Access live VSCode LSP features...`, - inputSchema: { - type: 'object', - properties: { - action: { type: 'string', enum: [...] }, - file_path: { type: 'string' }, - line: { type: 'number' }, - character: { type: 'number' } - }, - required: ['action', 'file_path'] - } -}; -``` - -### A.5 Advantages vs Standalone LSP Server - -| Feature | VSCode Bridge | Standalone LSP Server | -|---------|--------------|----------------------| -| **Setup Complexity** | Low (VSCode ext) | Medium (multiple LS) | -| **Language Support** | Automatic (VSCode) | Manual config | -| **Maintenance** | Low | Medium | -| **IDE Independence** | VSCode only | Any LSP client | -| **Performance** | Good | Good | -| **Advanced Features** | Full VSCode support | LSP standard | - ---- - -## Appendix B: Complete Integration Architecture - -### B.1 Three Integration Paths - -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ CodexLens Integration Paths │ -├─────────────────────────────────────────────────────────────────────────────┤ -│ │ -│ Path 1: VSCode Bridge (HTTP) Path 2: Standalone LSP Server │ -│ ──────────────────────── ───────────────────────────── │ -│ │ -│ ┌─────────────┐ ┌─────────────┐ │ -│ │ CCW MCP │ │ Any LSP │ │ -│ │ vscode_lsp │ │ Client │ │ -│ └──────┬──────┘ └──────┬──────┘ │ -│ │ HTTP │ LSP/stdio │ -│ ▼ ▼ │ -│ ┌─────────────┐ ┌─────────────┐ │ -│ │ ccw-vscode │ │ codexlens- │ │ -│ │ -bridge │ │ lsp │ │ -│ └──────┬──────┘ └──────┬──────┘ │ -│ │ VSCode API │ Child Process │ -│ ▼ ▼ │ -│ ┌─────────────┐ ┌─────────────┐ │ -│ │ VSCode │ │ pylsp │ │ -│ │ LS │ │ tsserver │ │ -│ └─────────────┘ │ gopls │ │ -│ └─────────────┘ │ -│ │ -│ Path 3: Index-Based (Current) │ -│ ───────────────────────────── │ -│ │ -│ ┌─────────────┐ │ -│ │ CCW MCP │ │ -│ │codex_lens_lsp│ │ -│ └──────┬──────┘ │ -│ │ Python subprocess │ -│ ▼ │ -│ ┌─────────────┐ │ -│ │ CodexLens │ │ -│ │ Index DB │ │ -│ └─────────────┘ │ -│ │ -└─────────────────────────────────────────────────────────────────────────────┘ -``` - -### B.2 Recommendation Matrix - -| Use Case | Recommended Path | Reason | -|----------|-----------------|--------| -| Claude Code + VSCode | Path 1: VSCode Bridge | Simplest, full VSCode features | -| CLI-only workflows | Path 2: Standalone LSP | No VSCode dependency | -| Quick search across indexed code | Path 3: Index-based | Fastest response | -| Multi-IDE support | Path 2: Standalone LSP | Standard protocol | -| Advanced refactoring | Path 1: VSCode Bridge | Full VSCode capabilities | - -### B.3 Hybrid Mode (Recommended) - -For maximum flexibility, implement all three paths: - -```javascript -// Smart routing in CCW -function selectLSPPath(request) { - // 1. Try VSCode Bridge first (if available) - if (await checkVSCodeBridge()) { - return "vscode_bridge"; - } - - // 2. Fall back to Standalone LSP - if (await checkStandaloneLSP(request.fileType)) { - return "standalone_lsp"; - } - - // 3. Last resort: Index-based - return "index_based"; -} -``` - ---- - -## Appendix C: Implementation Tasks Summary - -### C.1 VSCode Bridge Tasks - -| Task ID | Description | Priority | Status | -|---------|-------------|----------|--------| -| VB-1 | Create ccw-vscode-bridge extension structure | High | ✅ Done | -| VB-2 | Implement HTTP server in extension.ts | High | ✅ Done | -| VB-3 | Create vscode_lsp MCP tool | High | 🔄 Pending | -| VB-4 | Register tool in CCW | High | 🔄 Pending | -| VB-5 | Test with VSCode | Medium | 🔄 Pending | -| VB-6 | Add connection retry logic | Low | 🔄 Pending | - -### C.2 Standalone LSP Server Tasks - -| Task ID | Description | Priority | Status | -|---------|-------------|----------|--------| -| LSP-1 | Setup pygls project structure | High | 🔄 Pending | -| LSP-2 | Implement multiplexer | High | 🔄 Pending | -| LSP-3 | Core handlers (definition, references) | High | 🔄 Pending | -| LSP-4 | Position tolerance | Medium | 🔄 Pending | -| LSP-5 | Tests and documentation | Medium | 🔄 Pending | - -### C.3 Integration Tasks - -| Task ID | Description | Priority | Status | -|---------|-------------|----------|--------| -| INT-1 | Smart path routing | Medium | 🔄 Pending | -| INT-2 | Unified error handling | Medium | 🔄 Pending | -| INT-3 | Performance benchmarks | Low | 🔄 Pending | - ---- - -## Questions for Clarification - -Before implementation, confirm: - -1. **Implementation Priority**: Start with VSCode Bridge (simpler) or Standalone LSP (more general)? -2. **Language Priority**: Which languages are most important? (TypeScript, Python, Go, Rust, etc.) -3. **IDE Focus**: Target VS Code first, then others? -4. **Fallback Strategy**: Should we keep index-based search as fallback if LSP fails? -5. **Caching**: How much should we cache LS responses? -6. **Configuration**: Simple JSON config or more sophisticated format? - diff --git a/codex-lens/docs/SEARCH_ANALYSIS_SUMMARY.md b/codex-lens/docs/SEARCH_ANALYSIS_SUMMARY.md deleted file mode 100644 index d0d4c676..00000000 --- a/codex-lens/docs/SEARCH_ANALYSIS_SUMMARY.md +++ /dev/null @@ -1,192 +0,0 @@ -# CodexLens 搜索分析 - 执行摘要 - -## 🎯 核心发现 - -### 问题1:向量搜索为什么返回空结果? - -**根本原因**:向量嵌入数据不存在 - -- ✗ `semantic_chunks` 表未创建 -- ✗ 从未执行向量嵌入生成流程 -- ✗ 向量索引数据库实际是 SQLite 中的一个表,不是独立文件 - -**位置**:向量数据存储在 `~/.codexlens/indexes/项目名/_index.db` 的 `semantic_chunks` 表中 - -### 问题2:向量索引数据库在哪里? - -**存储架构**: -``` -~/.codexlens/indexes/ -└── project-name/ - └── _index.db ← SQLite数据库 - ├── files ← 文件索引表 - ├── files_fts ← FTS5全文索引 - ├── files_fts_fuzzy ← 模糊搜索索引 - └── semantic_chunks ← 向量嵌入表(当前不存在!) -``` - -**不是独立数据库**:向量数据集成在 SQLite 索引文件中,而不是单独的向量数据库。 - -### 问题3:当前架构是否发挥了并行效果? - -**✓ 是的!架构非常优秀** - -- **双层并行**: - - 第1层:单索引内,exact/fuzzy/vector 三种搜索方法并行 - - 第2层:跨多个目录索引并行搜索 -- **性能表现**:混合模式仅增加 1.6x 开销(9ms vs 5.6ms) -- **资源利用**:ThreadPoolExecutor 充分利用 I/O 并发 - -## ⚡ 快速修复 - -### 立即解决向量搜索问题 - -**步骤1:安装依赖** -```bash -pip install codexlens[semantic] -# 或 -pip install fastembed numpy -``` - -**步骤2:生成向量嵌入** - -创建脚本 `generate_embeddings.py`: -```python -from pathlib import Path -from codexlens.semantic.embedder import Embedder -from codexlens.semantic.vector_store import VectorStore -from codexlens.semantic.chunker import Chunker, ChunkConfig -import sqlite3 - -def generate_embeddings(index_db_path: Path): - embedder = Embedder(profile="code") - vector_store = VectorStore(index_db_path) - chunker = Chunker(config=ChunkConfig(max_chunk_size=2000)) - - with sqlite3.connect(index_db_path) as conn: - conn.row_factory = sqlite3.Row - files = conn.execute("SELECT full_path, content FROM files").fetchall() - - for file_row in files: - chunks = chunker.chunk_sliding_window( - file_row["content"], - file_path=file_row["full_path"], - language="python" - ) - for chunk in chunks: - chunk.embedding = embedder.embed_single(chunk.content) - if chunks: - vector_store.add_chunks(chunks, file_row["full_path"]) -``` - -**步骤3:执行生成** -```bash -python generate_embeddings.py ~/.codexlens/indexes/codex-lens/_index.db -``` - -**步骤4:验证** -```bash -# 检查数据 -sqlite3 ~/.codexlens/indexes/codex-lens/_index.db \ - "SELECT COUNT(*) FROM semantic_chunks" - -# 测试搜索 -codexlens search "authentication credentials" --mode vector -``` - -## 🔍 关键洞察 - -### 发现:Vector模式不是纯向量搜索 - -**当前行为**: -```python -# hybrid_search.py:73 -backends = {"exact": True} # ⚠️ exact搜索总是启用! -if enable_vector: - backends["vector"] = True -``` - -**影响**: -- "vector模式"实际是 **vector + exact 混合模式** -- 即使向量搜索返回空,仍有exact FTS结果 -- 这就是为什么"向量搜索"在无嵌入时也有结果 - -**建议修复**:添加 `pure_vector` 参数以支持真正的纯向量搜索 - -## 📊 搜索模式对比 - -| 模式 | 延迟 | 召回率 | 适用场景 | 需要嵌入 | -|------|------|--------|----------|---------| -| **exact** | 5.6ms | 中 | 代码标识符 | ✗ | -| **fuzzy** | 7.7ms | 高 | 容错搜索 | ✗ | -| **vector** | 7.4ms | 最高 | 语义搜索 | ✓ | -| **hybrid** | 9.0ms | 最高 | 通用搜索 | ✓ | - -**推荐**: -- 代码搜索 → `--mode exact` -- 自然语言 → `--mode hybrid`(需先生成嵌入) -- 容错搜索 → `--mode fuzzy` - -## 📈 优化路线图 - -### P0 - 立即 (本周) -- [x] 生成向量嵌入 -- [ ] 验证向量搜索可用 -- [ ] 更新使用文档 - -### P1 - 短期 (2周) -- [ ] 添加 `pure_vector` 模式 -- [ ] 增量嵌入更新 -- [ ] 改进错误提示 - -### P2 - 中期 (1-2月) -- [ ] 混合分块策略 -- [ ] 查询扩展 -- [ ] 自适应权重 - -### P3 - 长期 (3-6月) -- [ ] FAISS加速 -- [ ] 向量压缩 -- [ ] 多模态搜索 - -## 📚 详细文档 - -完整分析报告:`SEARCH_COMPARISON_ANALYSIS.md` - -包含内容: -- 详细问题诊断 -- 架构深度分析 -- 完整解决方案 -- 代码示例 -- 实施检查清单 - -## 🎓 学习要点 - -1. **向量搜索需要主动生成嵌入**:不会自动创建 -2. **双层并行架构很优秀**:无需额外优化 -3. **RRF融合算法工作良好**:多源结果合理融合 -4. **Vector模式非纯向量**:包含FTS作为后备 - -## 💡 下一步行动 - -```bash -# 1. 安装依赖 -pip install codexlens[semantic] - -# 2. 创建索引(如果还没有) -codexlens init ~/projects/your-project - -# 3. 生成嵌入 -python generate_embeddings.py ~/.codexlens/indexes/your-project/_index.db - -# 4. 测试搜索 -codexlens search "your natural language query" --mode hybrid -``` - ---- - -**问题解决**: ✓ 已识别并提供解决方案 -**架构评估**: ✓ 并行架构优秀,充分发挥效能 -**优化建议**: ✓ 提供短期、中期、长期优化路线 - -**联系**: 详见 `SEARCH_COMPARISON_ANALYSIS.md` 获取完整技术细节 diff --git a/codex-lens/docs/SEARCH_COMPARISON_ANALYSIS.md b/codex-lens/docs/SEARCH_COMPARISON_ANALYSIS.md deleted file mode 100644 index 9f2e66c9..00000000 --- a/codex-lens/docs/SEARCH_COMPARISON_ANALYSIS.md +++ /dev/null @@ -1,711 +0,0 @@ -# CodexLens 搜索模式对比分析报告 - -**生成时间**: 2025-12-16 -**分析目标**: 对比向量搜索和混合搜索效果,诊断向量搜索返回空结果的原因,评估并行架构效能 - ---- - -## 执行摘要 - -通过深入的代码分析和实验测试,我们发现了向量搜索在当前实现中的几个关键问题,并提供了针对性的优化方案。 - -### 核心发现 - -1. **向量搜索返回空结果的根本原因**:缺少向量嵌入数据(semantic_chunks表为空) -2. **混合搜索架构设计优秀**:使用了双层并行架构,性能表现良好 -3. **向量搜索模式的语义问题**:"vector模式"实际上总是包含exact搜索,不是纯向量搜索 - ---- - -## 1. 问题诊断 - -### 1.1 向量索引数据库位置 - -**存储架构**: -- **位置**: 向量数据集成存储在SQLite索引文件中(`_index.db`) -- **表名**: `semantic_chunks` -- **字段结构**: - - `id`: 主键 - - `file_path`: 文件路径 - - `content`: 代码块内容 - - `embedding`: 向量嵌入(BLOB格式,numpy float32数组) - - `metadata`: JSON格式元数据 - - `created_at`: 创建时间 - -**默认存储路径**: -- 全局索引: `~/.codexlens/indexes/` -- 项目索引: `项目目录/.codexlens/` -- 每个目录一个 `_index.db` 文件 - -**为什么没有看到向量数据库**: -向量数据不是独立数据库,而是与FTS索引共存于同一个SQLite文件中的`semantic_chunks`表。如果该表不存在或为空,说明从未生成过向量嵌入。 - -### 1.2 向量搜索返回空结果的原因 - -**代码分析** (`hybrid_search.py:195-253`): - -```python -def _search_vector(self, index_path: Path, query: str, limit: int) -> List[SearchResult]: - try: - # 检查1: semantic_chunks表是否存在 - conn = sqlite3.connect(index_path) - cursor = conn.execute( - "SELECT name FROM sqlite_master WHERE type='table' AND name='semantic_chunks'" - ) - has_semantic_table = cursor.fetchone() is not None - conn.close() - - if not has_semantic_table: - self.logger.debug("No semantic_chunks table found") - return [] # ❌ 返回空列表 - - # 检查2: 向量存储是否有数据 - vector_store = VectorStore(index_path) - if vector_store.count_chunks() == 0: - self.logger.debug("Vector store is empty") - return [] # ❌ 返回空列表 - - # 正常向量搜索流程... - except Exception as exc: - return [] # ❌ 异常也返回空列表 -``` - -**失败路径**: -1. `semantic_chunks`表不存在 → 返回空 -2. 表存在但无数据 → 返回空 -3. 语义搜索依赖未安装 → 返回空 -4. 任何异常 → 返回空 - -**当前状态诊断**: -通过测试验证,当前项目中: -- ✗ `semantic_chunks`表不存在 -- ✗ 未执行向量嵌入生成流程 -- ✗ 向量索引从未创建 - -**解决方案**:需要执行向量嵌入生成流程(见第3节) - -### 1.3 混合搜索 vs 向量搜索的实际行为 - -**重要发现**:当前实现中,"vector模式"并非纯向量搜索。 - -**代码证据** (`hybrid_search.py:72-77`): - -```python -def search(self, ...): - # Determine which backends to use - backends = {"exact": True} # ⚠️ exact搜索总是启用! - if enable_fuzzy: - backends["fuzzy"] = True - if enable_vector: - backends["vector"] = True -``` - -**影响**: -- 即使设置为"vector模式"(`enable_fuzzy=False, enable_vector=True`),exact搜索仍然运行 -- 当向量搜索返回空时,RRF融合仍会包含exact搜索的结果 -- 这导致"向量搜索"在没有嵌入数据时仍返回结果(来自exact FTS) - -**测试验证**: -``` -测试场景:有FTS索引但无向量嵌入 -查询:"authentication" - -预期行为(纯向量模式): - - 向量搜索: 0 结果(无嵌入数据) - - 最终结果: 0 - -实际行为: - - 向量搜索: 0 结果 - - Exact搜索: 3 结果 ✓ (总是运行) - - 最终结果: 3(来自exact,经过RRF) -``` - -**设计建议**: -1. **选项A(推荐)**: 添加纯向量模式标志 - ```python - backends = {} - if enable_vector and not pure_vector_mode: - backends["exact"] = True # 向量搜索的后备方案 - elif not enable_vector: - backends["exact"] = True # 非向量模式总是启用exact - ``` - -2. **选项B**: 文档明确说明当前行为 - - "vector模式"实际是"vector+exact混合模式" - - 提供警告信息当向量搜索返回空时 - ---- - -## 2. 并行架构分析 - -### 2.1 双层并行设计 - -CodexLens采用了优秀的双层并行架构: - -**第一层:搜索方法级并行** (`HybridSearchEngine`) - -```python -def _search_parallel(self, index_path, query, backends, limit): - with ThreadPoolExecutor(max_workers=len(backends)) as executor: - # 并行提交搜索任务 - if backends.get("exact"): - future = executor.submit(self._search_exact, ...) - if backends.get("fuzzy"): - future = executor.submit(self._search_fuzzy, ...) - if backends.get("vector"): - future = executor.submit(self._search_vector, ...) - - # 收集结果 - for future in as_completed(future_to_source): - results = future.result() -``` - -**特点**: -- 在**单个索引**内,exact/fuzzy/vector三种搜索方法并行执行 -- 使用`ThreadPoolExecutor`实现I/O密集型任务并行 -- 使用`as_completed`实现结果流式收集 -- 动态worker数量(与启用的backend数量相同) - -**性能测试结果**: -``` -搜索模式 | 平均延迟 | 相对overhead ------------|----------|------------- -Exact only | 5.6ms | 1.0x (基线) -Fuzzy only | 7.7ms | 1.4x -Vector only| 7.4ms | 1.3x -Hybrid (all)| 9.0ms | 1.6x -``` - -**分析**: -- ✓ Hybrid模式开销合理(<2x),证明并行有效 -- ✓ 单次搜索延迟仍保持在10ms以下(优秀) - -**第二层:索引级并行** (`ChainSearchEngine`) - -```python -def _search_parallel(self, index_paths, query, options): - executor = self._get_executor(options.max_workers) - - # 为每个索引提交搜索任务 - future_to_path = { - executor.submit( - self._search_single_index, - idx_path, query, ... - ): idx_path - for idx_path in index_paths - } - - # 收集所有索引的结果 - for future in as_completed(future_to_path): - results = future.result() - all_results.extend(results) -``` - -**特点**: -- 跨**多个目录索引**并行搜索 -- 共享线程池(避免线程创建开销) -- 可配置worker数量(默认8) -- 结果去重和RRF融合 - -### 2.2 并行效能评估 - -**优势**: -1. ✓ **架构清晰**:双层并行职责明确,互不干扰 -2. ✓ **资源利用**:I/O密集型任务充分利用线程池 -3. ✓ **扩展性**:易于添加新的搜索后端 -4. ✓ **容错性**:单个后端失败不影响其他后端 - -**当前利用率**: -- 单索引搜索:并行度 = min(3, 启用的backend数量) -- 多索引搜索:并行度 = min(8, 索引数量) -- **充分发挥**:只要有多个索引或多个backend - -**潜在优化点**: -1. **CPU密集型任务**:向量相似度计算已使用numpy向量化,无需额外并行 -2. **缓存优化**:`VectorStore`已实现embedding matrix缓存,性能良好 -3. **动态worker调度**:当前固定worker数,可根据任务负载动态调整 - ---- - -## 3. 解决方案与优化建议 - -### 3.1 立即修复:生成向量嵌入 - -**步骤1:安装语义搜索依赖** - -```bash -# 方式A:完整安装 -pip install codexlens[semantic] - -# 方式B:手动安装依赖 -pip install fastembed numpy -``` - -**步骤2:创建向量索引脚本** - -保存为 `scripts/generate_embeddings.py`: - -```python -"""Generate vector embeddings for existing indexes.""" - -import logging -import sqlite3 -from pathlib import Path - -from codexlens.semantic.embedder import Embedder -from codexlens.semantic.vector_store import VectorStore -from codexlens.semantic.chunker import Chunker, ChunkConfig - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -def generate_embeddings_for_index(index_db_path: Path): - """Generate embeddings for all files in an index.""" - logger.info(f"Processing index: {index_db_path}") - - # Initialize components - embedder = Embedder(profile="code") # Use code-optimized model - vector_store = VectorStore(index_db_path) - chunker = Chunker(config=ChunkConfig(max_chunk_size=2000)) - - # Read files from index - with sqlite3.connect(index_db_path) as conn: - conn.row_factory = sqlite3.Row - cursor = conn.execute("SELECT full_path, content, language FROM files") - files = cursor.fetchall() - - logger.info(f"Found {len(files)} files to process") - - # Process each file - total_chunks = 0 - for file_row in files: - file_path = file_row["full_path"] - content = file_row["content"] - language = file_row["language"] or "python" - - try: - # Create chunks - chunks = chunker.chunk_sliding_window( - content, - file_path=file_path, - language=language - ) - - if not chunks: - logger.debug(f"No chunks created for {file_path}") - continue - - # Generate embeddings - for chunk in chunks: - embedding = embedder.embed_single(chunk.content) - chunk.embedding = embedding - - # Store chunks - vector_store.add_chunks(chunks, file_path) - total_chunks += len(chunks) - logger.info(f"✓ {file_path}: {len(chunks)} chunks") - - except Exception as exc: - logger.error(f"✗ {file_path}: {exc}") - - logger.info(f"Completed: {total_chunks} total chunks indexed") - return total_chunks - - -def main(): - import sys - - if len(sys.argv) < 2: - print("Usage: python generate_embeddings.py ") - print("Example: python generate_embeddings.py ~/.codexlens/indexes/project/_index.db") - sys.exit(1) - - index_path = Path(sys.argv[1]) - - if not index_path.exists(): - print(f"Error: Index not found at {index_path}") - sys.exit(1) - - generate_embeddings_for_index(index_path) - - -if __name__ == "__main__": - main() -``` - -**步骤3:执行生成** - -```bash -# 为特定项目生成嵌入 -python scripts/generate_embeddings.py ~/.codexlens/indexes/codex-lens/_index.db - -# 或使用find批量处理 -find ~/.codexlens/indexes -name "_index.db" -type f | while read db; do - python scripts/generate_embeddings.py "$db" -done -``` - -**步骤4:验证生成结果** - -```bash -# 检查semantic_chunks表 -sqlite3 ~/.codexlens/indexes/codex-lens/_index.db \ - "SELECT COUNT(*) as chunk_count FROM semantic_chunks" - -# 测试向量搜索 -codexlens search "authentication user credentials" \ - --path ~/projects/codex-lens \ - --mode vector -``` - -### 3.2 短期优化:改进向量搜索语义 - -**问题**:当前"vector模式"实际包含exact搜索,语义不清晰 - -**解决方案**:添加`pure_vector`参数 - -**实现** (修改 `hybrid_search.py`): - -```python -class HybridSearchEngine: - def search( - self, - index_path: Path, - query: str, - limit: int = 20, - enable_fuzzy: bool = True, - enable_vector: bool = False, - pure_vector: bool = False, # 新增参数 - ) -> List[SearchResult]: - """Execute hybrid search with parallel retrieval and RRF fusion. - - Args: - ... - pure_vector: If True, only use vector search (no FTS fallback) - """ - # Determine which backends to use - backends = {} - - if pure_vector: - # 纯向量模式:只使用向量搜索 - if enable_vector: - backends["vector"] = True - else: - # 混合模式:总是包含exact搜索作为基线 - backends["exact"] = True - if enable_fuzzy: - backends["fuzzy"] = True - if enable_vector: - backends["vector"] = True - - # ... rest of the method -``` - -**CLI更新** (修改 `commands.py`): - -```python -@app.command() -def search( - ... - mode: str = typer.Option("exact", "--mode", "-m", - help="Search mode: exact, fuzzy, hybrid, vector, pure-vector."), - ... -): - """... - Search Modes: - - exact: Exact FTS - - fuzzy: Fuzzy FTS - - hybrid: RRF fusion of exact + fuzzy + vector (recommended) - - vector: Vector search with exact FTS fallback - - pure-vector: Pure semantic vector search (no FTS fallback) - """ - ... - - # Map mode to options - if mode == "exact": - hybrid_mode, enable_fuzzy, enable_vector, pure_vector = False, False, False, False - elif mode == "fuzzy": - hybrid_mode, enable_fuzzy, enable_vector, pure_vector = False, True, False, False - elif mode == "vector": - hybrid_mode, enable_fuzzy, enable_vector, pure_vector = True, False, True, False - elif mode == "pure-vector": - hybrid_mode, enable_fuzzy, enable_vector, pure_vector = True, False, True, True - elif mode == "hybrid": - hybrid_mode, enable_fuzzy, enable_vector, pure_vector = True, True, True, False -``` - -### 3.3 中期优化:增强向量搜索效果 - -**优化1:改进分块策略** - -当前使用简单的滑动窗口,可优化为: - -```python -class HybridChunker(Chunker): - """Hybrid chunking strategy combining symbol-based and sliding window.""" - - def chunk_hybrid( - self, - content: str, - symbols: List[Symbol], - file_path: str, - language: str, - ) -> List[SemanticChunk]: - """ - 1. 优先按symbol分块(函数、类级别) - 2. 对过大symbol,进一步使用滑动窗口 - 3. 对symbol间隙,使用滑动窗口补充 - """ - chunks = [] - - # Step 1: Symbol-based chunks - symbol_chunks = self.chunk_by_symbol(content, symbols, file_path, language) - - # Step 2: Split oversized symbols - for chunk in symbol_chunks: - if chunk.token_count > self.config.max_chunk_size: - # 使用滑动窗口进一步分割 - sub_chunks = self._split_large_chunk(chunk) - chunks.extend(sub_chunks) - else: - chunks.append(chunk) - - # Step 3: Fill gaps with sliding window - gap_chunks = self._chunk_gaps(content, symbols, file_path, language) - chunks.extend(gap_chunks) - - return chunks -``` - -**优化2:添加查询扩展** - -```python -class QueryExpander: - """Expand queries for better vector search recall.""" - - def expand(self, query: str) -> str: - """Expand query with synonyms and related terms.""" - # 示例:代码领域同义词 - expansions = { - "auth": ["authentication", "authorization", "login"], - "db": ["database", "storage", "repository"], - "api": ["endpoint", "route", "interface"], - } - - terms = query.lower().split() - expanded = set(terms) - - for term in terms: - if term in expansions: - expanded.update(expansions[term]) - - return " ".join(expanded) -``` - -**优化3:混合检索策略** - -```python -class AdaptiveHybridSearch: - """Adaptive search strategy based on query type.""" - - def search(self, query: str, ...): - # 分析查询类型 - query_type = self._classify_query(query) - - if query_type == "keyword": - # 代码标识符查询 → 偏重FTS - weights = {"exact": 0.5, "fuzzy": 0.3, "vector": 0.2} - elif query_type == "semantic": - # 自然语言查询 → 偏重向量 - weights = {"exact": 0.2, "fuzzy": 0.2, "vector": 0.6} - elif query_type == "hybrid": - # 混合查询 → 平衡权重 - weights = {"exact": 0.4, "fuzzy": 0.3, "vector": 0.3} - - return self.engine.search(query, weights=weights, ...) -``` - -### 3.4 长期优化:性能与质量提升 - -**优化1:增量嵌入更新** - -```python -class IncrementalEmbeddingUpdater: - """Update embeddings incrementally for changed files.""" - - def update_for_file(self, file_path: str, new_content: str): - """Only regenerate embeddings for changed file.""" - # 1. 删除旧嵌入 - self.vector_store.delete_file_chunks(file_path) - - # 2. 生成新嵌入 - chunks = self.chunker.chunk(new_content, ...) - for chunk in chunks: - chunk.embedding = self.embedder.embed_single(chunk.content) - - # 3. 存储新嵌入 - self.vector_store.add_chunks(chunks, file_path) -``` - -**优化2:向量索引压缩** - -```python -# 使用量化技术减少存储空间(768维 → 192维) -from qdrant_client import models - -# 产品量化(PQ)压缩 -compressed_vector = pq_quantize(embedding, target_dim=192) -``` - -**优化3:向量搜索加速** - -```python -# 使用FAISS或Hnswlib替代numpy暴力搜索 -import faiss - -class FAISSVectorStore(VectorStore): - def __init__(self, db_path, dim=768): - super().__init__(db_path) - # 使用HNSW索引 - self.index = faiss.IndexHNSWFlat(dim, 32) - self._load_vectors_to_index() - - def search_similar(self, query_embedding, top_k=10): - # FAISS加速搜索(100x+) - scores, indices = self.index.search( - np.array([query_embedding]), top_k - ) - return self._fetch_by_indices(indices[0], scores[0]) -``` - ---- - -## 4. 对比总结 - -### 4.1 搜索模式对比 - -| 维度 | Exact FTS | Fuzzy FTS | Vector Search | Hybrid (推荐) | -|------|-----------|-----------|---------------|--------------| -| **匹配类型** | 精确词匹配 | 容错匹配 | 语义相似 | 多模式融合 | -| **查询类型** | 标识符、关键词 | 拼写错误容忍 | 自然语言 | 所有类型 | -| **召回率** | 中 | 高 | 最高 | 最高 | -| **精确率** | 高 | 中 | 中 | 高 | -| **延迟** | 5-7ms | 7-9ms | 7-10ms | 9-11ms | -| **依赖** | 仅SQLite | 仅SQLite | fastembed+numpy | 全部 | -| **存储开销** | 小(FTS索引) | 小(FTS索引) | 大(向量) | 大(FTS+向量) | -| **适用场景** | 代码搜索 | 容错搜索 | 概念搜索 | 通用搜索 | - -### 4.2 推荐使用策略 - -**场景1:代码标识符搜索**(函数名、类名、变量名) -```bash -codexlens search "authenticate_user" --mode exact -``` -→ 使用exact模式,最快且最精确 - -**场景2:概念性搜索**("如何验证用户身份") -```bash -codexlens search "how to verify user credentials" --mode hybrid -``` -→ 使用hybrid模式,结合语义和关键词 - -**场景3:容错搜索**(允许拼写错误) -```bash -codexlens search "autheticate" --mode fuzzy -``` -→ 使用fuzzy模式,trigram容错 - -**场景4:纯语义搜索**(需先生成嵌入) -```bash -codexlens search "password encryption with salt" --mode pure-vector -``` -→ 使用pure-vector模式,理解语义意图 - ---- - -## 5. 实施检查清单 - -### 立即行动项 (P0) - -- [ ] 安装语义搜索依赖:`pip install codexlens[semantic]` -- [ ] 运行嵌入生成脚本(见3.1节) -- [ ] 验证semantic_chunks表已创建且有数据 -- [ ] 测试vector模式搜索是否返回结果 - -### 短期改进 (P1) - -- [ ] 添加pure_vector参数(见3.2节) -- [ ] 更新CLI支持pure-vector模式 -- [ ] 添加嵌入生成进度提示 -- [ ] 文档更新:搜索模式使用指南 - -### 中期优化 (P2) - -- [ ] 实现混合分块策略(见3.3节) -- [ ] 添加查询扩展功能 -- [ ] 实现自适应权重调整 -- [ ] 性能基准测试 - -### 长期规划 (P3) - -- [ ] 增量嵌入更新机制 -- [ ] 向量索引压缩 -- [ ] 集成FAISS加速 -- [ ] 多模态搜索(代码+文档) - ---- - -## 6. 参考资源 - -### 代码文件 - -- 混合搜索引擎: `codex-lens/src/codexlens/search/hybrid_search.py` -- 向量存储: `codex-lens/src/codexlens/semantic/vector_store.py` -- 向量嵌入: `codex-lens/src/codexlens/semantic/embedder.py` -- 代码分块: `codex-lens/src/codexlens/semantic/chunker.py` -- 链式搜索: `codex-lens/src/codexlens/search/chain_search.py` - -### 测试文件 - -- 对比测试: `codex-lens/tests/test_search_comparison.py` -- 混合搜索E2E: `codex-lens/tests/test_hybrid_search_e2e.py` -- CLI测试: `codex-lens/tests/test_cli_hybrid_search.py` - -### 相关文档 - -- RRF算法: `codex-lens/src/codexlens/search/ranking.py` -- 查询解析: `codex-lens/src/codexlens/search/query_parser.py` -- 配置管理: `codex-lens/src/codexlens/config.py` - ---- - -## 7. 结论 - -通过本次深入分析,我们明确了CodexLens搜索系统的优势和待优化点: - -**优势**: -1. ✓ 优秀的并行架构设计(双层并行) -2. ✓ RRF融合算法实现合理 -3. ✓ 向量存储实现高效(numpy向量化+缓存) -4. ✓ 模块化设计,易于扩展 - -**待优化**: -1. 向量嵌入生成流程需要手动触发 -2. "vector模式"语义不清晰(实际包含exact搜索) -3. 分块策略可以优化(混合策略) -4. 缺少增量更新机制 - -**核心建议**: -1. **立即**: 生成向量嵌入,解决返回空结果问题 -2. **短期**: 添加纯向量模式,澄清语义 -3. **中期**: 优化分块和查询策略,提升搜索质量 -4. **长期**: 性能优化和高级特性 - -通过实施这些改进,CodexLens的搜索功能将达到生产级别的质量和性能标准。 - ---- - -**报告完成时间**: 2025-12-16 -**分析工具**: 代码静态分析 + 实验测试 + 性能测评 -**下一步**: 实施P0优先级改进项 diff --git a/codex-lens/docs/SEMANTIC_GRAPH_DESIGN.md b/codex-lens/docs/SEMANTIC_GRAPH_DESIGN.md deleted file mode 100644 index 709bd1ba..00000000 --- a/codex-lens/docs/SEMANTIC_GRAPH_DESIGN.md +++ /dev/null @@ -1,1113 +0,0 @@ -# 静态分析语义图谱设计方案 - -## 1. 背景与目标 - -### 1.1 当前问题 - -现有的 `llm_enhancer.py` 对代码的分析是**孤立的、原子化的**: -- 每个函数/类被视为独立单元 -- 无法识别函数调用关系 -- 无法追踪数据流 -- 无法理解模块依赖 - -这导致无法回答以下类型的问题: -- "修改这个函数会影响哪些模块?" -- "这个API的完整数据流路径是什么?" -- "找出所有操作User实体的写入方法" -- "哪些函数依赖这个配置参数?" - -### 1.2 设计目标 - -构建**代码语义图谱**(Code Semantic Graph),实现: - -1. **调用关系分析**:函数/方法调用图(Call Graph) -2. **数据流追踪**:变量定义、使用、传递路径 -3. **依赖关系管理**:模块/类/包之间的依赖 -4. **实体关系映射**:识别数据模型及其操作方法 -5. **LLM增强的语义理解**:结合静态分析和LLM,理解调用的"意图" - -## 2. 技术架构 - -### 2.1 整体架构 - -``` -Source Code Files - ↓ -[Static Analysis Layer] - ├─ AST Parsing (tree-sitter) - ├─ Call Graph Extraction - ├─ Data Flow Analysis - └─ Dependency Resolution - ↓ -[Graph Construction Layer] - ├─ Node Creation (Functions/Classes/Modules) - ├─ Edge Creation (Calls/Imports/DataFlow) - └─ Graph Storage (SQLite/Neo4j) - ↓ -[LLM Enhancement Layer] - ├─ Relationship Semantics - ├─ Intent Analysis - └─ Pattern Recognition - ↓ -[Query & Reasoning Layer] - ├─ Graph Traversal - ├─ Impact Analysis - └─ Semantic Search Integration -``` - -### 2.2 核心组件 - -#### 2.2.1 图节点类型(Nodes) - -```python -from enum import Enum -from dataclasses import dataclass -from typing import List, Optional, Set - -class NodeType(Enum): - """节点类型""" - MODULE = "module" # 模块/文件 - CLASS = "class" # 类 - FUNCTION = "function" # 函数 - METHOD = "method" # 方法 - VARIABLE = "variable" # 变量 - PARAMETER = "parameter" # 参数 - DATA_MODEL = "data_model" # 数据模型(识别出的实体类) - -@dataclass -class CodeNode: - """代码图节点""" - node_id: str # 唯一标识:file:line:name - node_type: NodeType - name: str - qualified_name: str # 完全限定名:module.class.method - file_path: str - start_line: int - end_line: int - - # 静态分析元数据 - signature: Optional[str] = None # 函数/方法签名 - docstring: Optional[str] = None - modifiers: Set[str] = None # public/private/static等 - - # LLM生成的语义元数据 - summary: Optional[str] = None - purpose: Optional[str] = None - tags: List[str] = None # 如:crud, validation, auth -``` - -#### 2.2.2 图边类型(Edges) - -```python -class EdgeType(Enum): - """边类型""" - CALLS = "calls" # A调用B - IMPORTS = "imports" # A导入B - INHERITS = "inherits" # A继承B - IMPLEMENTS = "implements" # A实现B(接口) - USES_VARIABLE = "uses_variable" # A使用变量B - DEFINES_VARIABLE = "defines_variable" # A定义变量B - PASSES_DATA = "passes_data" # A向B传递数据 - MODIFIES = "modifies" # A修改B(如数据库写入) - READS = "reads" # A读取B(如数据库查询) - -@dataclass -class CodeEdge: - """代码图边""" - edge_id: str - source_id: str # 源节点ID - target_id: str # 目标节点ID - edge_type: EdgeType - - # 边的上下文信息 - context: Optional[str] = None # 调用发生的代码片段 - line_number: Optional[int] = None # 调用所在行号 - - # LLM生成的语义 - semantic_intent: Optional[str] = None # 如"验证用户权限" - confidence: float = 1.0 # 置信度 -``` - -## 3. 详细实现步骤 - -### 3.1 静态分析引擎 - -#### 3.1.1 AST解析与符号提取 - -```python -from tree_sitter import Language, Parser -from pathlib import Path -from typing import Dict, List - -class ASTAnalyzer: - """基于tree-sitter的AST分析器""" - - def __init__(self, language: str): - self.language = language - self.parser = Parser() - # 加载语言grammar - - def extract_symbols(self, content: str, file_path: str) -> List[CodeNode]: - """提取所有符号定义""" - - tree = self.parser.parse(bytes(content, 'utf-8')) - root = tree.root_node - - symbols = [] - self._traverse_definitions(root, content, file_path, symbols) - return symbols - - def _traverse_definitions( - self, - node, - content: str, - file_path: str, - result: List[CodeNode], - parent_class: str = None - ): - """递归遍历提取定义""" - - if node.type == 'function_definition': - func_node = self._create_function_node(node, content, file_path) - result.append(func_node) - - elif node.type == 'class_definition': - class_node = self._create_class_node(node, content, file_path) - result.append(class_node) - - # 遍历类内部的方法 - for child in node.children: - if child.type == 'block': - for method in child.children: - if method.type == 'function_definition': - method_node = self._create_method_node( - method, content, file_path, class_node.name - ) - result.append(method_node) - - # 递归遍历子节点 - for child in node.children: - self._traverse_definitions( - child, content, file_path, result, parent_class - ) - - def _create_function_node(self, node, content: str, file_path: str) -> CodeNode: - """创建函数节点""" - - name_node = node.child_by_field_name('name') - func_name = content[name_node.start_byte:name_node.end_byte] - - # 提取参数列表 - params_node = node.child_by_field_name('parameters') - signature = content[params_node.start_byte:params_node.end_byte] - - # 提取docstring - docstring = self._extract_docstring(node, content) - - return CodeNode( - node_id=f"{file_path}:{node.start_point[0]}:{func_name}", - node_type=NodeType.FUNCTION, - name=func_name, - qualified_name=f"{Path(file_path).stem}.{func_name}", - file_path=file_path, - start_line=node.start_point[0] + 1, - end_line=node.end_point[0] + 1, - signature=f"{func_name}{signature}", - docstring=docstring, - ) - - def _extract_docstring(self, node, content: str) -> Optional[str]: - """提取docstring""" - - # 查找函数体的第一个表达式语句 - body = node.child_by_field_name('body') - if not body: - return None - - for child in body.children: - if child.type == 'expression_statement': - expr = child.children[0] - if expr.type == 'string': - # 提取字符串内容(去掉引号) - doc = content[expr.start_byte:expr.end_byte] - return doc.strip('"""').strip("'''").strip() - - return None -``` - -#### 3.1.2 调用图提取 - -```python -class CallGraphExtractor: - """调用图提取器""" - - def __init__(self, ast_analyzer: ASTAnalyzer): - self.ast_analyzer = ast_analyzer - - def extract_calls( - self, - content: str, - file_path: str, - symbols: List[CodeNode] - ) -> List[CodeEdge]: - """提取函数调用关系""" - - tree = self.ast_analyzer.parser.parse(bytes(content, 'utf-8')) - calls = [] - - # 为每个函数/方法提取其内部的调用 - for symbol in symbols: - if symbol.node_type in [NodeType.FUNCTION, NodeType.METHOD]: - symbol_calls = self._extract_calls_in_function( - tree, symbol, content, file_path - ) - calls.extend(symbol_calls) - - return calls - - def _extract_calls_in_function( - self, - tree, - caller: CodeNode, - content: str, - file_path: str - ) -> List[CodeEdge]: - """提取单个函数内的所有调用""" - - # 定位到函数的AST节点 - func_node = self._find_node_by_line(tree.root_node, caller.start_line) - if not func_node: - return [] - - calls = [] - self._traverse_calls(func_node, caller, content, file_path, calls) - return calls - - def _traverse_calls( - self, - node, - caller: CodeNode, - content: str, - file_path: str, - result: List[CodeEdge] - ): - """递归遍历查找call表达式""" - - if node.type == 'call': - # 提取被调用的函数名 - function_node = node.child_by_field_name('function') - callee_name = content[function_node.start_byte:function_node.end_byte] - - # 提取调用的上下文(所在行) - call_line = node.start_point[0] + 1 - line_content = content.splitlines()[node.start_point[0]] - - edge = CodeEdge( - edge_id=f"{caller.node_id}→{callee_name}:{call_line}", - source_id=caller.node_id, - target_id=callee_name, # 暂时用名称,后续需要解析 - edge_type=EdgeType.CALLS, - context=line_content.strip(), - line_number=call_line, - ) - result.append(edge) - - # 递归遍历 - for child in node.children: - self._traverse_calls(child, caller, content, file_path, result) - - def _find_node_by_line(self, node, target_line: int): - """根据行号查找AST节点""" - - if node.start_point[0] + 1 == target_line: - return node - - for child in node.children: - result = self._find_node_by_line(child, target_line) - if result: - return result - - return None -``` - -#### 3.1.3 名称解析(Name Resolution) - -```python -class NameResolver: - """将函数调用的名称解析为具体的符号定义""" - - def __init__(self, symbol_table: Dict[str, CodeNode]): - """ - symbol_table: 符号表,映射 qualified_name -> CodeNode - """ - self.symbol_table = symbol_table - - def resolve_call_target( - self, - call_edge: CodeEdge, - caller_context: CodeNode - ) -> Optional[str]: - """ - 解析调用目标的完整node_id - - 策略: - 1. 检查是否是本地函数调用(同文件) - 2. 检查是否是导入的模块函数 - 3. 检查是否是方法调用(self.method) - """ - - callee_name = call_edge.target_id - - # 策略1: 本地调用(同文件) - local_qualified = f"{Path(caller_context.file_path).stem}.{callee_name}" - if local_qualified in self.symbol_table: - return self.symbol_table[local_qualified].node_id - - # 策略2: 方法调用(提取对象名) - if '.' in callee_name: - parts = callee_name.split('.') - if parts[0] == 'self': - # self.method_name -> 在当前类中查找 - method_name = parts[1] - # 需要找到caller所属的类 - class_name = self._find_containing_class(caller_context) - if class_name: - class_qualified = f"{Path(caller_context.file_path).stem}.{class_name}.{method_name}" - if class_qualified in self.symbol_table: - return self.symbol_table[class_qualified].node_id - - # 策略3: 导入的函数(需要扫描import语句) - # TODO: 实现跨文件的导入解析 - - return None - - def _find_containing_class(self, node: CodeNode) -> Optional[str]: - """找到函数/方法所属的类""" - # 通过qualified_name推断 - parts = node.qualified_name.split('.') - if len(parts) > 2: # module.class.method - return parts[-2] - return None -``` - -### 3.2 图存储与索引 - -#### 3.2.1 数据库Schema(SQLite版本) - -```sql --- 节点表 -CREATE TABLE code_nodes ( - node_id TEXT PRIMARY KEY, - node_type TEXT NOT NULL, -- module/class/function/method/variable - name TEXT NOT NULL, - qualified_name TEXT NOT NULL UNIQUE, - file_path TEXT NOT NULL, - start_line INTEGER NOT NULL, - end_line INTEGER NOT NULL, - - -- 静态分析元数据 - signature TEXT, - docstring TEXT, - modifiers TEXT, -- JSON数组 - - -- LLM语义元数据 - summary TEXT, - purpose TEXT, - tags TEXT, -- JSON数组 - - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP -); - --- 边表 -CREATE TABLE code_edges ( - edge_id TEXT PRIMARY KEY, - source_id TEXT NOT NULL, - target_id TEXT NOT NULL, - edge_type TEXT NOT NULL, -- calls/imports/inherits/uses_variable等 - - -- 上下文 - context TEXT, - line_number INTEGER, - - -- LLM语义 - semantic_intent TEXT, - confidence REAL DEFAULT 1.0, - - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - - FOREIGN KEY (source_id) REFERENCES code_nodes(node_id) ON DELETE CASCADE, - FOREIGN KEY (target_id) REFERENCES code_nodes(node_id) ON DELETE CASCADE -); - --- 索引 -CREATE INDEX idx_nodes_type ON code_nodes(node_type); -CREATE INDEX idx_nodes_file ON code_nodes(file_path); -CREATE INDEX idx_nodes_qualified ON code_nodes(qualified_name); - -CREATE INDEX idx_edges_source ON code_edges(source_id); -CREATE INDEX idx_edges_target ON code_edges(target_id); -CREATE INDEX idx_edges_type ON code_edges(edge_type); - --- 用于快速查找调用关系 -CREATE INDEX idx_edges_source_type ON code_edges(source_id, edge_type); -CREATE INDEX idx_edges_target_type ON code_edges(target_id, edge_type); -``` - -#### 3.2.2 图存储接口 - -```python -import sqlite3 -from typing import List, Optional, Set - -class CodeGraphStore: - """代码图谱存储""" - - def __init__(self, db_path: Path): - self.db_path = db_path - self.conn = sqlite3.connect(db_path) - self._create_tables() - - def add_node(self, node: CodeNode): - """添加节点""" - cursor = self.conn.cursor() - cursor.execute(""" - INSERT OR REPLACE INTO code_nodes ( - node_id, node_type, name, qualified_name, - file_path, start_line, end_line, - signature, docstring, summary, purpose - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - """, ( - node.node_id, node.node_type.value, node.name, - node.qualified_name, node.file_path, - node.start_line, node.end_line, - node.signature, node.docstring, - node.summary, node.purpose - )) - self.conn.commit() - - def add_edge(self, edge: CodeEdge): - """添加边""" - cursor = self.conn.cursor() - cursor.execute(""" - INSERT OR REPLACE INTO code_edges ( - edge_id, source_id, target_id, edge_type, - context, line_number, semantic_intent, confidence - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) - """, ( - edge.edge_id, edge.source_id, edge.target_id, - edge.edge_type.value, edge.context, edge.line_number, - edge.semantic_intent, edge.confidence - )) - self.conn.commit() - - def get_node(self, node_id: str) -> Optional[CodeNode]: - """获取节点""" - cursor = self.conn.cursor() - cursor.execute("SELECT * FROM code_nodes WHERE node_id = ?", (node_id,)) - row = cursor.fetchone() - return self._row_to_node(row) if row else None - - def get_callers(self, node_id: str) -> List[CodeNode]: - """获取所有调用该节点的节点(反向查询)""" - cursor = self.conn.cursor() - cursor.execute(""" - SELECT n.* FROM code_nodes n - JOIN code_edges e ON n.node_id = e.source_id - WHERE e.target_id = ? AND e.edge_type = 'calls' - """, (node_id,)) - - return [self._row_to_node(row) for row in cursor.fetchall()] - - def get_callees(self, node_id: str) -> List[CodeNode]: - """获取该节点调用的所有节点(正向查询)""" - cursor = self.conn.cursor() - cursor.execute(""" - SELECT n.* FROM code_nodes n - JOIN code_edges e ON n.node_id = e.target_id - WHERE e.source_id = ? AND e.edge_type = 'calls' - """, (node_id,)) - - return [self._row_to_node(row) for row in cursor.fetchall()] - - def get_call_chain( - self, - start_node_id: str, - max_depth: int = 5 - ) -> List[List[CodeNode]]: - """获取调用链(DFS遍历)""" - - visited = set() - chains = [] - - def dfs(node_id: str, path: List[CodeNode], depth: int): - if depth > max_depth or node_id in visited: - return - - visited.add(node_id) - node = self.get_node(node_id) - if not node: - return - - current_path = path + [node] - callees = self.get_callees(node_id) - - if not callees: - # 叶子节点,记录完整路径 - chains.append(current_path) - else: - for callee in callees: - dfs(callee.node_id, current_path, depth + 1) - - visited.remove(node_id) - - dfs(start_node_id, [], 0) - return chains -``` - -### 3.3 LLM语义增强 - -#### 3.3.1 关系语义分析 - -```python -class RelationshipSemanticAnalyzer: - """为代码关系生成语义描述""" - - def __init__(self, llm_enhancer: LLMEnhancer): - self.llm_enhancer = llm_enhancer - - def analyze_call_intent( - self, - edge: CodeEdge, - caller: CodeNode, - callee: CodeNode - ) -> str: - """分析函数调用的意图""" - - # 构建提示词 - prompt = f""" -PURPOSE: Analyze the intent of a function call relationship -TASK: Describe in 1 sentence WHY function A calls function B and what purpose it serves - -CONTEXT: -Caller Function: {caller.name} -Caller Summary: {caller.summary or 'N/A'} -Caller Code Snippet: -``` -{edge.context} -``` - -Callee Function: {callee.name} -Callee Summary: {callee.summary or 'N/A'} -Callee Signature: {callee.signature or 'N/A'} - -OUTPUT: A concise semantic description of the call intent. -Example: "validates user credentials before granting access" -""" - - response = self.llm_enhancer._invoke_ccw_cli(prompt, tool='gemini') - if response['success']: - intent = response['stdout'].strip() - return intent - - return "unknown intent" - - def batch_analyze_calls( - self, - edges: List[CodeEdge], - nodes_map: Dict[str, CodeNode] - ) -> Dict[str, str]: - """批量分析调用意图(优化LLM调用)""" - - # 构建批量prompt - call_descriptions = [] - for edge in edges: - caller = nodes_map.get(edge.source_id) - callee = nodes_map.get(edge.target_id) - if not caller or not callee: - continue - - desc = f""" -[CALL {edge.edge_id}] -From: {caller.name} ({caller.summary or 'no summary'}) -To: {callee.name} ({callee.summary or 'no summary'}) -Context: {edge.context} -""" - call_descriptions.append(desc) - - prompt = f""" -PURPOSE: Analyze multiple function call relationships and describe their intents -TASK: For each call, provide a 1-sentence semantic description - -{chr(10).join(call_descriptions)} - -OUTPUT FORMAT (JSON): -{{ - "edge_id_1": "intent description", - "edge_id_2": "intent description", - ... -}} -""" - - response = self.llm_enhancer._invoke_ccw_cli(prompt, tool='gemini') - if response['success']: - import json - intents = json.loads(self.llm_enhancer._extract_json(response['stdout'])) - return intents - - return {} -``` - -#### 3.3.2 数据模型识别 - -```python -class DataModelRecognizer: - """识别代码中的数据模型(实体类)""" - - def identify_data_models( - self, - class_nodes: List[CodeNode] - ) -> List[CodeNode]: - """识别哪些类是数据模型""" - - data_models = [] - - for class_node in class_nodes: - # 启发式规则 - is_model = False - - # 规则1: 类名包含Model/Entity/Schema - if any(keyword in class_node.name for keyword in ['Model', 'Entity', 'Schema']): - is_model = True - - # 规则2: 继承自ORM基类(需要分析继承关系) - # TODO: 检查是否继承 db.Model, BaseModel等 - - # 规则3: 让LLM判断 - if not is_model: - is_model = self._ask_llm_if_data_model(class_node) - - if is_model: - class_node.tags = class_node.tags or [] - class_node.tags.append('data_model') - data_models.append(class_node) - - return data_models - - def _ask_llm_if_data_model(self, class_node: CodeNode) -> bool: - """让LLM判断是否为数据模型""" - - prompt = f""" -Is this Python class a data model (entity class representing database table or data structure)? - -Class Definition: -```python -{class_node.docstring or ''} -class {class_node.name}: - # ... (signature: {class_node.signature}) -``` - -Answer with: YES or NO -""" - - # 调用LLM... - # 简化实现 - return False -``` - -### 3.4 图查询与推理 - -#### 3.4.1 影响分析(Impact Analysis) - -```python -class ImpactAnalyzer: - """代码变更影响分析""" - - def __init__(self, graph_store: CodeGraphStore): - self.graph_store = graph_store - - def analyze_function_impact( - self, - function_id: str, - max_depth: int = 10 - ) -> Dict[str, any]: - """分析修改某个函数的影响范围""" - - # 找到所有直接和间接调用者 - affected_functions = set() - self._traverse_callers(function_id, affected_functions, 0, max_depth) - - # 找到所有受影响的文件 - affected_files = set() - for func_id in affected_functions: - node = self.graph_store.get_node(func_id) - if node: - affected_files.add(node.file_path) - - return { - 'modified_function': function_id, - 'affected_functions': list(affected_functions), - 'affected_files': list(affected_files), - 'impact_scope': len(affected_functions), - } - - def _traverse_callers( - self, - node_id: str, - result: Set[str], - current_depth: int, - max_depth: int - ): - """递归遍历调用者""" - - if current_depth >= max_depth or node_id in result: - return - - callers = self.graph_store.get_callers(node_id) - for caller in callers: - result.add(caller.node_id) - self._traverse_callers(caller.node_id, result, current_depth + 1, max_depth) -``` - -#### 3.4.2 数据流追踪 - -```python -class DataFlowTracer: - """数据流路径追踪""" - - def __init__(self, graph_store: CodeGraphStore): - self.graph_store = graph_store - - def trace_variable_flow( - self, - variable_name: str, - start_function_id: str - ) -> List[Dict]: - """追踪变量的数据流""" - - # 查找所有使用该变量的边 - cursor = self.graph_store.conn.cursor() - cursor.execute(""" - SELECT * FROM code_edges - WHERE edge_type IN ('uses_variable', 'defines_variable', 'passes_data') - AND (source_id = ? OR target_id LIKE ?) - """, (start_function_id, f"%{variable_name}%")) - - flow_path = [] - for row in cursor.fetchall(): - edge = self._row_to_edge(row) - source = self.graph_store.get_node(edge.source_id) - target = self.graph_store.get_node(edge.target_id) - - flow_path.append({ - 'from': source.name if source else 'unknown', - 'to': target.name if target else 'unknown', - 'action': edge.edge_type.value, - 'context': edge.context, - }) - - return flow_path - - def find_crud_operations( - self, - entity_name: str - ) -> Dict[str, List[CodeNode]]: - """找到对某个实体的所有CRUD操作""" - - cursor = self.graph_store.conn.cursor() - - # 查找所有修改该实体的函数 - cursor.execute(""" - SELECT DISTINCT n.* FROM code_nodes n - JOIN code_edges e ON n.node_id = e.source_id - WHERE e.edge_type = 'modifies' - AND e.target_id LIKE ? - """, (f"%{entity_name}%",)) - - writers = [self._row_to_node(row) for row in cursor.fetchall()] - - # 查找所有读取该实体的函数 - cursor.execute(""" - SELECT DISTINCT n.* FROM code_nodes n - JOIN code_edges e ON n.node_id = e.source_id - WHERE e.edge_type = 'reads' - AND e.target_id LIKE ? - """, (f"%{entity_name}%",)) - - readers = [self._row_to_node(row) for row in cursor.fetchall()] - - return { - 'create': [w for w in writers if 'create' in w.name.lower()], - 'read': readers, - 'update': [w for w in writers if 'update' in w.name.lower()], - 'delete': [w for w in writers if 'delete' in w.name.lower()], - } -``` - -### 3.5 与语义搜索集成 - -#### 3.5.1 增强的搜索结果 - -```python -class GraphEnhancedSearchEngine: - """结合图谱的增强搜索""" - - def __init__( - self, - vector_search: VectorStore, - graph_store: CodeGraphStore - ): - self.vector_search = vector_search - self.graph_store = graph_store - - def search_with_graph_context( - self, - query: str, - top_k: int = 10 - ) -> List[EnhancedSearchResult]: - """带图谱上下文的搜索""" - - # 1. 向量搜索 - vector_results = self.vector_search.search(query, top_k=top_k) - - # 2. 为每个结果添加图谱信息 - enhanced_results = [] - for result in vector_results: - # 找到对应的图节点 - node = self.graph_store.get_node(result.path) - if not node: - continue - - # 获取调用关系 - callers = self.graph_store.get_callers(node.node_id) - callees = self.graph_store.get_callees(node.node_id) - - enhanced = EnhancedSearchResult( - **result.dict(), - callers=[c.name for c in callers[:5]], - callees=[c.name for c in callees[:5]], - call_count_in=len(callers), - call_count_out=len(callees), - ) - enhanced_results.append(enhanced) - - return enhanced_results - - def search_by_relationship( - self, - query: str, - relationship_type: str # "calls", "called_by", "uses", etc. - ) -> List[CodeNode]: - """基于关系的搜索""" - - # 先找到查询匹配的节点 - vector_results = self.vector_search.search(query, top_k=5) - if not vector_results: - return [] - - target_node_id = vector_results[0].path - - # 根据关系类型查找相关节点 - if relationship_type == "calls": - return self.graph_store.get_callees(target_node_id) - elif relationship_type == "called_by": - return self.graph_store.get_callers(target_node_id) - # 其他关系类型... - - return [] -``` - -## 4. 实施路线图 - -### Phase 1: 基础静态分析(3-4周) -- [ ] 实现ASTAnalyzer(符号提取) -- [ ] 实现CallGraphExtractor(调用图提取) -- [ ] 实现NameResolver(名称解析) -- [ ] 设计图数据库schema -- [ ] 实现CodeGraphStore(基础CRUD) -- [ ] 单元测试 - -### Phase 2: 多语言支持(2周) -- [ ] Python完整支持 -- [ ] JavaScript/TypeScript支持 -- [ ] Java支持(可选) -- [ ] 跨语言测试 - -### Phase 3: LLM语义增强(2-3周) -- [ ] 实现RelationshipSemanticAnalyzer -- [ ] 实现DataModelRecognizer -- [ ] 批量处理优化 -- [ ] 集成测试 - -### Phase 4: 高级查询(2周) -- [ ] 实现ImpactAnalyzer -- [ ] 实现DataFlowTracer -- [ ] 实现GraphEnhancedSearchEngine -- [ ] 性能优化 - -### Phase 5: 可视化与工具(2周) -- [ ] 调用图可视化(Graphviz/D3.js) -- [ ] CLI命令集成 -- [ ] Web UI(可选) - -### Phase 6: 生产化(1-2周) -- [ ] 增量更新机制 -- [ ] 大规模项目优化 -- [ ] 文档和示例 -- [ ] 发布 - -**总计预估时间**:12-15周 - -## 5. 技术挑战与解决方案 - -### 5.1 挑战:跨文件名称解析 - -**问题**:函数调用的目标可能在不同文件/模块中,需要解析import语句。 - -**解决方案**: -```python -class ImportResolver: - """导入语句解析器""" - - def extract_imports(self, tree, content: str) -> Dict[str, str]: - """ - 提取所有import语句,构建别名映射 - - 返回: {别名 -> 实际模块路径} - """ - imports = {} - - for node in tree.root_node.children: - if node.type == 'import_statement': - # from module import func - # import module as alias - pass # 解析逻辑 - - return imports - - def resolve_imported_symbol( - self, - symbol_name: str, - imports: Dict[str, str], - project_root: Path - ) -> Optional[str]: - """解析导入符号的实际位置""" - - if symbol_name in imports: - module_path = imports[symbol_name] - # 查找该模块的文件路径 - # 在图谱中查找对应的节点 - pass - - return None -``` - -### 5.2 挑战:动态调用识别 - -**问题**:反射、getattr、动态导入等运行时行为无法通过静态分析完全捕获。 - -**解决方案**: -- 使用LLM推断可能的调用目标 -- 标记为"动态调用",降低置信度 -- 结合运行时日志补充 - -```python -def handle_dynamic_call(edge: CodeEdge) -> CodeEdge: - """处理动态调用""" - - if 'getattr' in edge.context or 'eval' in edge.context: - edge.confidence = 0.5 - edge.semantic_intent = "dynamic call (runtime resolution required)" - - return edge -``` - -### 5.3 挑战:大型代码库性能 - -**问题**:对百万行级别的代码库构建图谱可能耗时很长。 - -**解决方案**: -- **并行处理**:多进程分析不同文件 -- **增量更新**:只重新分析变更的文件 -- **延迟LLM**:初次构建只做静态分析,LLM增强按需触发 - -```python -from multiprocessing import Pool - -class ParallelGraphBuilder: - """并行图谱构建""" - - def build_graph_parallel( - self, - file_paths: List[Path], - workers: int = 8 - ): - """并行分析多个文件""" - - with Pool(workers) as pool: - results = pool.map(self._analyze_single_file, file_paths) - - # 合并结果到图谱 - for nodes, edges in results: - for node in nodes: - self.graph_store.add_node(node) - for edge in edges: - self.graph_store.add_edge(edge) -``` - -## 6. 成功指标 - -1. **覆盖率**:90%以上的函数调用关系被正确识别 -2. **准确率**:名称解析准确率>85% -3. **性能**:10万行代码的项目,图谱构建<5分钟 -4. **查询速度**:影响分析查询<100ms -5. **LLM增强价值**:关系语义描述的有用性评分>4/5 - -## 7. 应用场景示例 - -### 场景1:代码审查助手 -```python -# 审查一个PR,分析影响范围 -analyzer = ImpactAnalyzer(graph_store) -impact = analyzer.analyze_function_impact('auth.py:45:validate_token') - -print(f"修改此函数将影响 {impact['impact_scope']} 个其他函数") -print(f"涉及文件: {', '.join(impact['affected_files'])}") -``` - -### 场景2:重构规划 -```python -# 计划重构User类,查看所有相关操作 -tracer = DataFlowTracer(graph_store) -crud = tracer.find_crud_operations('User') - -print(f"创建User的方法: {[f.name for f in crud['create']]}") -print(f"读取User的方法: {[f.name for f in crud['read']]}") -``` - -### 场景3:知识图谱问答 -```python -# "修改登录逻辑会影响哪些API端点?" -search_engine = GraphEnhancedSearchEngine(vector_store, graph_store) - -# 先找到登录函数 -login_func = search_engine.search("user login authentication")[0] - -# 追踪调用链 -analyzer = ImpactAnalyzer(graph_store) -impact = analyzer.analyze_function_impact(login_func.node_id) - -# 筛选出API端点 -api_endpoints = [ - f for f in impact['affected_functions'] - if '@app.route' in graph_store.get_node(f).modifiers -] -``` - -## 8. 参考资料 - -- [LLVM Call Graph](https://llvm.org/docs/CallGraph.html) -- [Sourcegraph - Code Intelligence](https://about.sourcegraph.com/) -- [CodeQL - Semantic Code Analysis](https://codeql.github.com/) -- [Neo4j Graph Database](https://neo4j.com/) -- Tree-sitter AST Queries diff --git a/codex-lens/docs/T6-CLI-Integration-Summary.md b/codex-lens/docs/T6-CLI-Integration-Summary.md deleted file mode 100644 index 9b3959b1..00000000 --- a/codex-lens/docs/T6-CLI-Integration-Summary.md +++ /dev/null @@ -1,248 +0,0 @@ -# T6: CLI Integration for Hybrid Search - Implementation Summary - -## Overview - -Successfully integrated hybrid search capabilities into the CodexLens CLI with user-configurable options, migration support, and enhanced status reporting. - -## Changes Made - -### 1. Search Command Enhancement (`commands.py`) - -**New `--mode` Parameter:** -- Replaced `--hybrid` and `--exact-only` flags with unified `--mode` parameter -- Supported modes: `exact`, `fuzzy`, `hybrid`, `vector` -- Default: `exact` (backward compatible) - -**Mode Validation:** -```python -valid_modes = ["exact", "fuzzy", "hybrid", "vector"] -if mode not in valid_modes: - # Error with helpful message -``` - -**Weights Configuration:** -- Accepts custom RRF weights via `--weights exact,fuzzy,vector` -- Example: `--weights 0.5,0.3,0.2` -- Automatic normalization if weights don't sum to 1.0 -- Validation for 3-value format - -**Mode Mapping to SearchOptions:** -```python -hybrid_mode = mode == "hybrid" -enable_fuzzy = mode in ["fuzzy", "hybrid"] - -options = SearchOptions( - hybrid_mode=hybrid_mode, - enable_fuzzy=enable_fuzzy, - hybrid_weights=hybrid_weights, -) -``` - -**Enhanced Output:** -- Shows search mode in status line -- Includes search source tags in verbose mode -- JSON output includes mode and source information - -### 2. Migrate Command (`commands.py`) - -**New Command for Dual-FTS Upgrade:** -```bash -codex-lens migrate [path] -``` - -**Features:** -- Upgrades all `_index.db` files to schema version 4 -- Shows progress bar with percentage complete -- Tracks: migrated, already up-to-date, errors -- Safe operation preserving all data -- Verbose mode shows per-database migration details - -**Progress Tracking:** -- Uses Rich progress bar with spinner -- Shows percentage and count (N/Total) -- Time elapsed indicator - -### 3. Status Command Enhancement (`commands.py`) - -**New Backend Status Display:** -``` -Search Backends: - Exact FTS: ✓ (unicode61) - Fuzzy FTS: ✓ (trigram) - Hybrid Search: ✓ (RRF fusion) - Vector Search: ✗ (future) -``` - -**Schema Version Detection:** -- Checks first available `_index.db` -- Reports schema version -- Detects dual FTS table presence - -**Feature Flags in JSON:** -```json -{ - "features": { - "exact_fts": true, - "fuzzy_fts": true, - "hybrid_search": true, - "vector_search": false - } -} -``` - -### 4. Output Rendering (`output.py`) - -**Verbose Mode Support:** -```python -render_search_results(results, verbose=True) -``` - -**Search Source Tags:** -- `[E]` - Exact FTS result -- `[F]` - Fuzzy FTS result -- `[V]` - Vector search result -- `[RRF]` - Fusion result - -**Enhanced Table:** -- New "Source" column in verbose mode -- Shows result origin for debugging -- Fusion scores visible - -## Usage Examples - -### 1. Search with Different Modes - -```bash -# Exact search (default) -codex-lens search "authentication" - -# Fuzzy search only -codex-lens search "authentication" --mode fuzzy - -# Hybrid search with RRF fusion -codex-lens search "authentication" --mode hybrid - -# Hybrid with custom weights -codex-lens search "authentication" --mode hybrid --weights 0.5,0.3,0.2 - -# Verbose mode shows source tags -codex-lens search "authentication" --mode hybrid -v -``` - -### 2. Migration - -```bash -# Migrate current project -codex-lens migrate - -# Migrate specific project with verbose output -codex-lens migrate /path/to/project -v - -# JSON output for automation -codex-lens migrate --json -``` - -### 3. Status Checking - -```bash -# Check backend availability -codex-lens status - -# JSON output with feature flags -codex-lens status --json -``` - -## Testing - -**Test Coverage:** -- ✅ Mode parameter validation (exact, fuzzy, hybrid, vector) -- ✅ Weights parsing and normalization -- ✅ Help text shows all modes -- ✅ Migrate command exists and accessible -- ✅ Status command shows backends -- ✅ Mode mapping to SearchOptions - -**Test Results:** -``` -11 passed in 2.27s -``` - -## Integration Points - -### With Phase 1 (Dual-FTS): -- Uses `search_fts_exact()` for exact mode -- Uses `search_fts_fuzzy()` for fuzzy mode -- Schema migration via `_apply_migrations()` - -### With Phase 2 (Hybrid Search): -- Calls `HybridSearchEngine` for hybrid mode -- Passes custom weights to RRF algorithm -- Displays fusion scores and source tags - -### With Existing CLI: -- Backward compatible (default mode=exact) -- Follows existing error handling patterns -- Uses Rich for progress and formatting -- Supports JSON output mode - -## Done Criteria Verification - -✅ **CLI search --mode exact uses only exact FTS table** -- Mode validation ensures correct backend selection -- `hybrid_mode=False, enable_fuzzy=False` for exact mode - -✅ **--mode fuzzy uses only fuzzy table** -- `hybrid_mode=False, enable_fuzzy=True` for fuzzy mode -- Single backend execution - -✅ **--mode hybrid fuses both** -- `hybrid_mode=True, enable_fuzzy=True` activates RRF fusion -- HybridSearchEngine coordinates parallel search - -✅ **Custom weights via --weights 0.5,0.3,0.2** -- Parses 3-value comma-separated format -- Validates and normalizes to sum=1.0 -- Passes to RRF algorithm - -✅ **Migration command completes Dual-FTS upgrade** -- Shows progress bar with percentage -- Tracks migration status per database -- Safe operation with error handling - -✅ **Search output shows [E], [F], [V] tags and fusion scores** -- Verbose mode displays Source column -- Tags extracted from `search_source` attribute -- Fusion scores shown in Score column - -## Files Modified - -1. `codex-lens/src/codexlens/cli/commands.py` - - Updated `search()` command with `--mode` parameter - - Added `migrate()` command - - Enhanced `status()` command - - Added DirIndexStore import - -2. `codex-lens/src/codexlens/cli/output.py` - - Updated `render_search_results()` with verbose mode - - Added source tag display logic - -3. `codex-lens/tests/test_cli_hybrid_search.py` (new) - - Comprehensive CLI integration tests - - Mode validation tests - - Weights parsing tests - - Command availability tests - -## Performance Impact - -- **Exact mode**: Same as before (no overhead) -- **Fuzzy mode**: Single FTS query (minimal overhead) -- **Hybrid mode**: Parallel execution (2x I/O, no sequential penalty) -- **Migration**: One-time operation, safe for large projects - -## Next Steps - -Users can now: -1. Run `codex-lens migrate` to upgrade existing indexes -2. Use `codex-lens search "query" --mode hybrid` for best results -3. Check `codex-lens status` to verify enabled features -4. Tune fusion weights for their use case via `--weights` diff --git a/codex-lens/docs/codex_mcp.md b/codex-lens/docs/codex_mcp.md deleted file mode 100644 index edce9f5b..00000000 --- a/codex-lens/docs/codex_mcp.md +++ /dev/null @@ -1,459 +0,0 @@ -MCP integration -mcp_servers -You can configure Codex to use MCP servers to give Codex access to external applications, resources, or services. - -Server configuration -STDIO -STDIO servers are MCP servers that you can launch directly via commands on your computer. - -# The top-level table name must be `mcp_servers` -# The sub-table name (`server-name` in this example) can be anything you would like. -[mcp_servers.server_name] -command = "npx" -# Optional -args = ["-y", "mcp-server"] -# Optional: propagate additional env vars to the MCP server. -# A default whitelist of env vars will be propagated to the MCP server. -# https://github.com/openai/codex/blob/main/codex-rs/rmcp-client/src/utils.rs#L82 -env = { "API_KEY" = "value" } -# or -[mcp_servers.server_name.env] -API_KEY = "value" -# Optional: Additional list of environment variables that will be whitelisted in the MCP server's environment. -env_vars = ["API_KEY2"] - -# Optional: cwd that the command will be run from -cwd = "/Users//code/my-server" -Streamable HTTP -Streamable HTTP servers enable Codex to talk to resources that are accessed via a http url (either on localhost or another domain). - -[mcp_servers.figma] -url = "https://mcp.figma.com/mcp" -# Optional environment variable containing a bearer token to use for auth -bearer_token_env_var = "ENV_VAR" -# Optional map of headers with hard-coded values. -http_headers = { "HEADER_NAME" = "HEADER_VALUE" } -# Optional map of headers whose values will be replaced with the environment variable. -env_http_headers = { "HEADER_NAME" = "ENV_VAR" } -Streamable HTTP connections always use the experimental Rust MCP client under the hood, so expect occasional rough edges. OAuth login flows are gated on the rmcp_client = true flag: - -[features] -rmcp_client = true -After enabling it, run codex mcp login when the server supports OAuth. - -Other configuration options -# Optional: override the default 10s startup timeout -startup_timeout_sec = 20 -# Optional: override the default 60s per-tool timeout -tool_timeout_sec = 30 -# Optional: disable a server without removing it -enabled = false -# Optional: only expose a subset of tools from this server -enabled_tools = ["search", "summarize"] -# Optional: hide specific tools (applied after `enabled_tools`, if set) -disabled_tools = ["search"] -When both enabled_tools and disabled_tools are specified, Codex first restricts the server to the allow-list and then removes any tools that appear in the deny-list. - -MCP CLI commands -# List all available commands -codex mcp --help - -# Add a server (env can be repeated; `--` separates the launcher command) -codex mcp add docs -- docs-server --port 4000 - -# List configured servers (pretty table or JSON) -codex mcp list -codex mcp list --json - -# Show one server (table or JSON) -codex mcp get docs -codex mcp get docs --json - -# Remove a server -codex mcp remove docs - -# Log in to a streamable HTTP server that supports oauth -codex mcp login SERVER_NAME - -# Log out from a streamable HTTP server that supports oauth -codex mcp logout SERVER_NAME -Examples of useful MCPs -There is an ever growing list of useful MCP servers that can be helpful while you are working with Codex. - -Some of the most common MCPs we've seen are: - -Context7 — connect to a wide range of up-to-date developer documentation -Figma Local and Remote - access to your Figma designs -Playwright - control and inspect a browser using Playwright -Chrome Developer Tools — control and inspect a Chrome browser -Sentry — access to your Sentry logs -GitHub — Control over your GitHub account beyond what git allows (like controlling PRs, issues, etc.) - - -# Example config.toml - -Use this example configuration as a starting point. For an explanation of each field and additional context, see [Configuration](./config.md). Copy the snippet below to `~/.codex/config.toml` and adjust values as needed. - -```toml -# Codex example configuration (config.toml) -# -# This file lists all keys Codex reads from config.toml, their default values, -# and concise explanations. Values here mirror the effective defaults compiled -# into the CLI. Adjust as needed. -# -# Notes -# - Root keys must appear before tables in TOML. -# - Optional keys that default to "unset" are shown commented out with notes. -# - MCP servers, profiles, and model providers are examples; remove or edit. - -################################################################################ -# Core Model Selection -################################################################################ - -# Primary model used by Codex. Default: "gpt-5.1-codex-max" on all platforms. -model = "gpt-5.1-codex-max" - -# Model used by the /review feature (code reviews). Default: "gpt-5.1-codex-max". -review_model = "gpt-5.1-codex-max" - -# Provider id selected from [model_providers]. Default: "openai". -model_provider = "openai" - -# Optional manual model metadata. When unset, Codex auto-detects from model. -# Uncomment to force values. -# model_context_window = 128000 # tokens; default: auto for model -# model_auto_compact_token_limit = 0 # disable/override auto; default: model family specific -# tool_output_token_limit = 10000 # tokens stored per tool output; default: 10000 for gpt-5.1-codex-max - -################################################################################ -# Reasoning & Verbosity (Responses API capable models) -################################################################################ - -# Reasoning effort: minimal | low | medium | high | xhigh (default: medium; xhigh on gpt-5.1-codex-max and gpt-5.2) -model_reasoning_effort = "medium" - -# Reasoning summary: auto | concise | detailed | none (default: auto) -model_reasoning_summary = "auto" - -# Text verbosity for GPT-5 family (Responses API): low | medium | high (default: medium) -model_verbosity = "medium" - -# Force-enable reasoning summaries for current model (default: false) -model_supports_reasoning_summaries = false - -# Force reasoning summary format: none | experimental (default: none) -model_reasoning_summary_format = "none" - -################################################################################ -# Instruction Overrides -################################################################################ - -# Additional user instructions appended after AGENTS.md. Default: unset. -# developer_instructions = "" - -# Optional legacy base instructions override (prefer AGENTS.md). Default: unset. -# instructions = "" - -# Inline override for the history compaction prompt. Default: unset. -# compact_prompt = "" - -# Override built-in base instructions with a file path. Default: unset. -# experimental_instructions_file = "/absolute/or/relative/path/to/instructions.txt" - -# Load the compact prompt override from a file. Default: unset. -# experimental_compact_prompt_file = "/absolute/or/relative/path/to/compact_prompt.txt" - -################################################################################ -# Approval & Sandbox -################################################################################ - -# When to ask for command approval: -# - untrusted: only known-safe read-only commands auto-run; others prompt -# - on-failure: auto-run in sandbox; prompt only on failure for escalation -# - on-request: model decides when to ask (default) -# - never: never prompt (risky) -approval_policy = "on-request" - -# Filesystem/network sandbox policy for tool calls: -# - read-only (default) -# - workspace-write -# - danger-full-access (no sandbox; extremely risky) -sandbox_mode = "read-only" - -# Extra settings used only when sandbox_mode = "workspace-write". -[sandbox_workspace_write] -# Additional writable roots beyond the workspace (cwd). Default: [] -writable_roots = [] -# Allow outbound network access inside the sandbox. Default: false -network_access = false -# Exclude $TMPDIR from writable roots. Default: false -exclude_tmpdir_env_var = false -# Exclude /tmp from writable roots. Default: false -exclude_slash_tmp = false - -################################################################################ -# Shell Environment Policy for spawned processes -################################################################################ - -[shell_environment_policy] -# inherit: all (default) | core | none -inherit = "all" -# Skip default excludes for names containing KEY/TOKEN (case-insensitive). Default: false -ignore_default_excludes = false -# Case-insensitive glob patterns to remove (e.g., "AWS_*", "AZURE_*"). Default: [] -exclude = [] -# Explicit key/value overrides (always win). Default: {} -set = {} -# Whitelist; if non-empty, keep only matching vars. Default: [] -include_only = [] -# Experimental: run via user shell profile. Default: false -experimental_use_profile = false - -################################################################################ -# History & File Opener -################################################################################ - -[history] -# save-all (default) | none -persistence = "save-all" -# Maximum bytes for history file; oldest entries are trimmed when exceeded. Example: 5242880 -# max_bytes = 0 - -# URI scheme for clickable citations: vscode (default) | vscode-insiders | windsurf | cursor | none -file_opener = "vscode" - -################################################################################ -# UI, Notifications, and Misc -################################################################################ - -[tui] -# Desktop notifications from the TUI: boolean or filtered list. Default: true -# Examples: false | ["agent-turn-complete", "approval-requested"] -notifications = false - -# Enables welcome/status/spinner animations. Default: true -animations = true - -# Suppress internal reasoning events from output. Default: false -hide_agent_reasoning = false - -# Show raw reasoning content when available. Default: false -show_raw_agent_reasoning = false - -# Disable burst-paste detection in the TUI. Default: false -disable_paste_burst = false - -# Track Windows onboarding acknowledgement (Windows only). Default: false -windows_wsl_setup_acknowledged = false - -# External notifier program (argv array). When unset: disabled. -# Example: notify = ["notify-send", "Codex"] -# notify = [ ] - -# In-product notices (mostly set automatically by Codex). -[notice] -# hide_full_access_warning = true -# hide_rate_limit_model_nudge = true - -################################################################################ -# Authentication & Login -################################################################################ - -# Where to persist CLI login credentials: file (default) | keyring | auto -cli_auth_credentials_store = "file" - -# Base URL for ChatGPT auth flow (not OpenAI API). Default: -chatgpt_base_url = "https://chatgpt.com/backend-api/" - -# Restrict ChatGPT login to a specific workspace id. Default: unset. -# forced_chatgpt_workspace_id = "" - -# Force login mechanism when Codex would normally auto-select. Default: unset. -# Allowed values: chatgpt | api -# forced_login_method = "chatgpt" - -# Preferred store for MCP OAuth credentials: auto (default) | file | keyring -mcp_oauth_credentials_store = "auto" - -################################################################################ -# Project Documentation Controls -################################################################################ - -# Max bytes from AGENTS.md to embed into first-turn instructions. Default: 32768 -project_doc_max_bytes = 32768 - -# Ordered fallbacks when AGENTS.md is missing at a directory level. Default: [] -project_doc_fallback_filenames = [] - -################################################################################ -# Tools (legacy toggles kept for compatibility) -################################################################################ - -[tools] -# Enable web search tool (alias: web_search_request). Default: false -web_search = false - -# Enable the view_image tool so the agent can attach local images. Default: true -view_image = true - -# (Alias accepted) You can also write: -# web_search_request = false - -################################################################################ -# Centralized Feature Flags (preferred) -################################################################################ - -[features] -# Leave this table empty to accept defaults. Set explicit booleans to opt in/out. -unified_exec = false -rmcp_client = false -apply_patch_freeform = false -view_image_tool = true -web_search_request = false -ghost_commit = false -enable_experimental_windows_sandbox = false -skills = false - -################################################################################ -# Experimental toggles (legacy; prefer [features]) -################################################################################ - -# Include apply_patch via freeform editing path (affects default tool set). Default: false -experimental_use_freeform_apply_patch = false - -# Define MCP servers under this table. Leave empty to disable. -[mcp_servers] - -# --- Example: STDIO transport --- -# [mcp_servers.docs] -# command = "docs-server" # required -# args = ["--port", "4000"] # optional -# env = { "API_KEY" = "value" } # optional key/value pairs copied as-is -# env_vars = ["ANOTHER_SECRET"] # optional: forward these from the parent env -# cwd = "/path/to/server" # optional working directory override -# startup_timeout_sec = 10.0 # optional; default 10.0 seconds -# # startup_timeout_ms = 10000 # optional alias for startup timeout (milliseconds) -# tool_timeout_sec = 60.0 # optional; default 60.0 seconds -# enabled_tools = ["search", "summarize"] # optional allow-list -# disabled_tools = ["slow-tool"] # optional deny-list (applied after allow-list) - -# --- Example: Streamable HTTP transport --- -# [mcp_servers.github] -# url = "https://github-mcp.example.com/mcp" # required -# bearer_token_env_var = "GITHUB_TOKEN" # optional; Authorization: Bearer -# http_headers = { "X-Example" = "value" } # optional static headers -# env_http_headers = { "X-Auth" = "AUTH_ENV" } # optional headers populated from env vars -# startup_timeout_sec = 10.0 # optional -# tool_timeout_sec = 60.0 # optional -# enabled_tools = ["list_issues"] # optional allow-list - -################################################################################ -# Model Providers (extend/override built-ins) -################################################################################ - -# Built-ins include: -# - openai (Responses API; requires login or OPENAI_API_KEY via auth flow) -# - oss (Chat Completions API; defaults to http://localhost:11434/v1) - -[model_providers] - -# --- Example: override OpenAI with explicit base URL or headers --- -# [model_providers.openai] -# name = "OpenAI" -# base_url = "https://api.openai.com/v1" # default if unset -# wire_api = "responses" # "responses" | "chat" (default varies) -# # requires_openai_auth = true # built-in OpenAI defaults to true -# # request_max_retries = 4 # default 4; max 100 -# # stream_max_retries = 5 # default 5; max 100 -# # stream_idle_timeout_ms = 300000 # default 300_000 (5m) -# # experimental_bearer_token = "sk-example" # optional dev-only direct bearer token -# # http_headers = { "X-Example" = "value" } -# # env_http_headers = { "OpenAI-Organization" = "OPENAI_ORGANIZATION", "OpenAI-Project" = "OPENAI_PROJECT" } - -# --- Example: Azure (Chat/Responses depending on endpoint) --- -# [model_providers.azure] -# name = "Azure" -# base_url = "https://YOUR_PROJECT_NAME.openai.azure.com/openai" -# wire_api = "responses" # or "chat" per endpoint -# query_params = { api-version = "2025-04-01-preview" } -# env_key = "AZURE_OPENAI_API_KEY" -# # env_key_instructions = "Set AZURE_OPENAI_API_KEY in your environment" - -# --- Example: Local OSS (e.g., Ollama-compatible) --- -# [model_providers.ollama] -# name = "Ollama" -# base_url = "http://localhost:11434/v1" -# wire_api = "chat" - -################################################################################ -# Profiles (named presets) -################################################################################ - -# Active profile name. When unset, no profile is applied. -# profile = "default" - -[profiles] - -# [profiles.default] -# model = "gpt-5.1-codex-max" -# model_provider = "openai" -# approval_policy = "on-request" -# sandbox_mode = "read-only" -# model_reasoning_effort = "medium" -# model_reasoning_summary = "auto" -# model_verbosity = "medium" -# chatgpt_base_url = "https://chatgpt.com/backend-api/" -# experimental_compact_prompt_file = "compact_prompt.txt" -# include_apply_patch_tool = false -# experimental_use_freeform_apply_patch = false -# tools_web_search = false -# tools_view_image = true -# features = { unified_exec = false } - -################################################################################ -# Projects (trust levels) -################################################################################ - -# Mark specific worktrees as trusted. Only "trusted" is recognized. -[projects] -# [projects."/absolute/path/to/project"] -# trust_level = "trusted" - -################################################################################ -# OpenTelemetry (OTEL) – disabled by default -################################################################################ - -[otel] -# Include user prompt text in logs. Default: false -log_user_prompt = false -# Environment label applied to telemetry. Default: "dev" -environment = "dev" -# Exporter: none (default) | otlp-http | otlp-grpc -exporter = "none" - -# Example OTLP/HTTP exporter configuration -# [otel.exporter."otlp-http"] -# endpoint = "https://otel.example.com/v1/logs" -# protocol = "binary" # "binary" | "json" - -# [otel.exporter."otlp-http".headers] -# "x-otlp-api-key" = "${OTLP_TOKEN}" - -# Example OTLP/gRPC exporter configuration -# [otel.exporter."otlp-grpc"] -# endpoint = "https://otel.example.com:4317", -# headers = { "x-otlp-meta" = "abc123" } - -# Example OTLP exporter with mutual TLS -# [otel.exporter."otlp-http"] -# endpoint = "https://otel.example.com/v1/logs" -# protocol = "binary" - -# [otel.exporter."otlp-http".headers] -# "x-otlp-api-key" = "${OTLP_TOKEN}" - -# [otel.exporter."otlp-http".tls] -# ca-certificate = "certs/otel-ca.pem" -# client-certificate = "/etc/codex/certs/client.pem" -# client-private-key = "/etc/codex/certs/client-key.pem" -``` \ No newline at end of file diff --git a/codex-lens/docs/test-quality-enhancements.md b/codex-lens/docs/test-quality-enhancements.md deleted file mode 100644 index 03853038..00000000 --- a/codex-lens/docs/test-quality-enhancements.md +++ /dev/null @@ -1,187 +0,0 @@ -# Test Quality Enhancements - Implementation Summary - -**Date**: 2025-12-16 -**Status**: ✅ Complete - All 4 recommendations implemented and passing - -## Overview - -Implemented all 4 test quality recommendations from Gemini's comprehensive analysis to enhance test coverage and robustness across the codex-lens test suite. - -## Recommendation 1: Verify True Fuzzy Matching ✅ - -**File**: `tests/test_dual_fts.py` -**Test Class**: `TestDualFTSPerformance` -**New Test**: `test_fuzzy_substring_matching` - -### Implementation -- Verifies trigram tokenizer enables partial token matching -- Tests that searching for "func" matches "function0", "function1", etc. -- Gracefully skips if trigram tokenizer unavailable -- Validates BM25 scoring for fuzzy results - -### Key Features -- Runtime detection of trigram support -- Validates substring matching capability -- Ensures proper score ordering (negative BM25) - -### Test Result -```bash -PASSED tests/test_dual_fts.py::TestDualFTSPerformance::test_fuzzy_substring_matching -``` - ---- - -## Recommendation 2: Enable Mocked Vector Search ✅ - -**File**: `tests/test_hybrid_search_e2e.py` -**Test Class**: `TestHybridSearchWithVectorMock` -**New Test**: `test_hybrid_with_vector_enabled` - -### Implementation -- Mocks vector search to return predefined results -- Tests RRF fusion with exact + fuzzy + vector sources -- Validates hybrid search handles vector integration correctly -- Uses `unittest.mock.patch` for clean mocking - -### Key Features -- Mock SearchResult objects with scores -- Tests enable_vector=True parameter -- Validates RRF fusion score calculation (positive scores) -- Gracefully handles missing vector search module - -### Test Result -```bash -PASSED tests/test_hybrid_search_e2e.py::TestHybridSearchWithVectorMock::test_hybrid_with_vector_enabled -``` - ---- - -## Recommendation 3: Complex Query Parser Stress Tests ✅ - -**File**: `tests/test_query_parser.py` -**Test Class**: `TestComplexBooleanQueries` -**New Tests**: 5 comprehensive tests - -### Implementation - -#### 1. `test_nested_boolean_and_or` -- Tests: `(login OR logout) AND user` -- Validates nested parentheses preservation -- Ensures boolean operators remain intact - -#### 2. `test_mixed_operators_with_expansion` -- Tests: `UserAuth AND (login OR logout)` -- Verifies CamelCase expansion doesn't break operators -- Ensures expansion + boolean logic coexist - -#### 3. `test_quoted_phrases_with_boolean` -- Tests: `"user authentication" AND login` -- Validates quoted phrase preservation -- Ensures AND operator survives - -#### 4. `test_not_operator_preservation` -- Tests: `login NOT logout` -- Confirms NOT operator handling -- Validates negation logic - -#### 5. `test_complex_nested_three_levels` -- Tests: `((UserAuth OR login) AND session) OR token` -- Stress tests deep nesting (3 levels) -- Validates multiple parentheses pairs - -### Test Results -```bash -PASSED tests/test_query_parser.py::TestComplexBooleanQueries::test_nested_boolean_and_or -PASSED tests/test_query_parser.py::TestComplexBooleanQueries::test_mixed_operators_with_expansion -PASSED tests/test_query_parser.py::TestComplexBooleanQueries::test_quoted_phrases_with_boolean -PASSED tests/test_query_parser.py::TestComplexBooleanQueries::test_not_operator_preservation -PASSED tests/test_query_parser.py::TestComplexBooleanQueries::test_complex_nested_three_levels -``` - ---- - -## Recommendation 4: Migration Reversibility Tests ✅ - -**File**: `tests/test_dual_fts.py` -**Test Class**: `TestMigrationRecovery` -**New Tests**: 2 migration robustness tests - -### Implementation - -#### 1. `test_migration_preserves_data_on_failure` -- Creates v2 database with test data -- Attempts migration (may succeed or fail) -- Validates data preservation in both scenarios -- Smart column detection (path vs full_path) - -**Key Features**: -- Checks schema version to determine column names -- Handles both migration success and failure -- Ensures no data loss - -#### 2. `test_migration_idempotent_after_partial_failure` -- Tests retry capability after partial migration -- Validates graceful handling of repeated initialization -- Ensures database remains in usable state - -**Key Features**: -- Double initialization without errors -- Table existence verification -- Safe retry mechanism - -### Test Results -```bash -PASSED tests/test_dual_fts.py::TestMigrationRecovery::test_migration_preserves_data_on_failure -PASSED tests/test_dual_fts.py::TestMigrationRecovery::test_migration_idempotent_after_partial_failure -``` - ---- - -## Test Suite Statistics - -### Overall Results -``` -91 passed, 2 skipped, 2 warnings in 3.31s -``` - -### New Tests Added -- **Recommendation 1**: 1 test (fuzzy substring matching) -- **Recommendation 2**: 1 test (vector mock integration) -- **Recommendation 3**: 5 tests (complex boolean queries) -- **Recommendation 4**: 2 tests (migration recovery) - -**Total New Tests**: 9 - -### Coverage Improvements -- **Fuzzy Search**: Now validates actual trigram substring matching -- **Hybrid Search**: Tests vector integration with mocks -- **Query Parser**: Handles complex nested boolean logic -- **Migration**: Validates data preservation and retry capability - ---- - -## Code Quality - -### Best Practices Applied -1. **Graceful Degradation**: Tests skip when features unavailable (trigram) -2. **Clean Mocking**: Uses `unittest.mock` for vector search -3. **Smart Assertions**: Adapts to migration outcomes dynamically -4. **Edge Case Handling**: Tests multiple nesting levels and operators - -### Integration -- All tests integrate seamlessly with existing pytest fixtures -- Maintains 100% pass rate across test suite -- No breaking changes to existing tests - ---- - -## Validation - -All 4 recommendations successfully implemented and verified: - -✅ **Recommendation 1**: Fuzzy substring matching with trigram validation -✅ **Recommendation 2**: Vector search mocking for hybrid fusion testing -✅ **Recommendation 3**: Complex boolean query stress tests (5 tests) -✅ **Recommendation 4**: Migration recovery and idempotency tests (2 tests) - -**Final Status**: Production-ready, all tests passing diff --git a/codex-lens/examples/association_tree_demo.py b/codex-lens/examples/association_tree_demo.py deleted file mode 100644 index 719f9383..00000000 --- a/codex-lens/examples/association_tree_demo.py +++ /dev/null @@ -1,156 +0,0 @@ -"""Demo script for association tree building. - -This script demonstrates how to use the AssociationTreeBuilder and -ResultDeduplicator to explore code relationships via LSP call hierarchy. -""" - -import asyncio -import sys -from pathlib import Path - -# Add parent directory to path for imports -sys.path.insert(0, str(Path(__file__).parent.parent / "src")) - -from codexlens.lsp.standalone_manager import StandaloneLspManager -from codexlens.search.association_tree import ( - AssociationTreeBuilder, - ResultDeduplicator, -) - - -async def demo_simple_tree(): - """Build a simple call tree from a Python file.""" - print("=" * 70) - print("Association Tree Demo") - print("=" * 70) - print() - - # Use this file as the test subject - test_file = Path(__file__).resolve() - workspace_root = test_file.parent.parent - - print(f"Workspace: {workspace_root}") - print(f"Test file: {test_file.name}") - print() - - # Initialize LSP manager - async with StandaloneLspManager( - workspace_root=str(workspace_root), - timeout=10.0, - ) as lsp: - print("LSP manager initialized") - print() - - # Create tree builder - builder = AssociationTreeBuilder(lsp, timeout=5.0) - - # Build tree from a function in this file - # Using line 50 as an example (adjust based on actual file) - print(f"Building call tree from {test_file.name}:50...") - tree = await builder.build_tree( - seed_file_path=str(test_file), - seed_line=50, - seed_character=1, - max_depth=3, - expand_callers=True, - expand_callees=True, - ) - - print(f"Tree built: {tree}") - print(f" Roots: {len(tree.roots)}") - print(f" Total unique nodes: {len(tree.all_nodes)}") - print(f" Total node instances: {len(tree.node_list)}") - print(f" Edges: {len(tree.edges)}") - print() - - if tree.roots: - print("Root nodes:") - for root in tree.roots: - print(f" - {root.item.name} ({root.item.kind})") - print(f" {root.item.file_path}:{root.item.range.start_line}") - print() - - # Deduplicate and score - print("Deduplicating and scoring nodes...") - deduplicator = ResultDeduplicator( - depth_weight=0.4, - frequency_weight=0.3, - kind_weight=0.3, - ) - - unique_nodes = deduplicator.deduplicate(tree, max_results=20) - print(f"Found {len(unique_nodes)} unique nodes") - print() - - if unique_nodes: - print("Top 10 nodes by score:") - print("-" * 70) - for i, node in enumerate(unique_nodes[:10], 1): - print(f"{i:2}. {node.name} ({node.kind})") - print(f" Location: {Path(node.file_path).name}:{node.range.start_line}") - print( - f" Depth: {node.min_depth}, " - f"Occurrences: {node.occurrences}, " - f"Score: {node.score:.3f}" - ) - if node.paths: - print(f" Paths: {len(node.paths)}") - print() - - # Show filtering capabilities - functions = deduplicator.filter_by_kind( - unique_nodes, ["function", "method"] - ) - print(f"Functions/methods only: {len(functions)} nodes") - - if functions: - print("Top 5 functions:") - for i, node in enumerate(functions[:5], 1): - print(f" {i}. {node.name} (score: {node.score:.3f})") - - else: - print("No nodes found. Try a different seed location.") - - print() - print("Demo complete!") - - -async def demo_cycle_detection(): - """Demonstrate cycle detection in call trees.""" - print("\n" + "=" * 70) - print("Cycle Detection Demo") - print("=" * 70) - print() - - # Create a simple Python file with circular calls for testing - test_code = ''' -def func_a(): - """Function A calls B.""" - func_b() - -def func_b(): - """Function B calls A (creates a cycle).""" - func_a() -''' - - print("This demo would detect cycles in:") - print(test_code) - print("The tree builder automatically marks cycle nodes to prevent infinite expansion.") - - -def main(): - """Run the demo.""" - try: - asyncio.run(demo_simple_tree()) - demo_cycle_detection() - except KeyboardInterrupt: - print("\nDemo interrupted by user") - except Exception as e: - print(f"\nError running demo: {e}") - import traceback - - traceback.print_exc() - - -if __name__ == "__main__": - main() diff --git a/codex-lens/examples/debug_uri_format.py b/codex-lens/examples/debug_uri_format.py deleted file mode 100644 index 4c1c965f..00000000 --- a/codex-lens/examples/debug_uri_format.py +++ /dev/null @@ -1,40 +0,0 @@ -"""Debug URI format issues.""" - -import asyncio -from pathlib import Path -from urllib.parse import quote - -def test_uri_formats(): - """Compare different URI formats.""" - file_path = Path("D:/Claude_dms3/codex-lens/test_simple_function.py") - - print("URI Format Comparison") - print("="*80) - - # Method 1: Path.as_uri() - uri1 = file_path.resolve().as_uri() - print(f"1. Path.as_uri(): {uri1}") - - # Method 2: Manual construction - uri2 = f"file:///{str(file_path.resolve()).replace(chr(92), '/')}" - print(f"2. Manual (forward /): {uri2}") - - # Method 3: With quote - path_str = str(file_path.resolve()).replace(chr(92), '/') - uri3 = f"file:///{quote(path_str, safe='/:')}" - print(f"3. With quote: {uri3}") - - # Method 4: Lowercase drive - path_lower = str(file_path.resolve()).replace(chr(92), '/') - if len(path_lower) > 1 and path_lower[1] == ':': - path_lower = path_lower[0].lower() + path_lower[1:] - uri4 = f"file:///{path_lower}" - print(f"4. Lowercase drive: {uri4}") - - # What Pyright shows in logs - print(f"\n5. Pyright log format: file:///d%3A/Claude_dms3/codex-lens/...") - - return uri1, uri4 - -if __name__ == "__main__": - test_uri_formats() diff --git a/codex-lens/examples/search_comparison_benchmark.py b/codex-lens/examples/search_comparison_benchmark.py deleted file mode 100644 index 88029b61..00000000 --- a/codex-lens/examples/search_comparison_benchmark.py +++ /dev/null @@ -1,326 +0,0 @@ -"""Search method comparison benchmark. - -Compares different search strategies: -1. Pure FTS (exact + fuzzy matching) -2. Pure Vector (semantic search only) -3. Hybrid Fusion (FTS + Vector with RRF) -4. Vector + LSP Association Tree (new strategy) - -Usage: - python examples/search_comparison_benchmark.py -""" - -from __future__ import annotations - -import asyncio -import time -from pathlib import Path -from typing import List, Dict, Any - -from codexlens.config import Config -from codexlens.entities import SearchResult -from codexlens.search.hybrid_search import HybridSearchEngine -from codexlens.lsp.standalone_manager import StandaloneLspManager -from codexlens.search.association_tree import AssociationTreeBuilder, ResultDeduplicator - - -class SearchBenchmark: - """Benchmark different search strategies.""" - - def __init__(self, index_path: Path, config: Config): - """Initialize benchmark. - - Args: - index_path: Path to _index.db file - config: CodexLens config - """ - self.index_path = index_path - self.config = config - self.engine = HybridSearchEngine(config=config) - self.lsp_manager: StandaloneLspManager | None = None - self.tree_builder: AssociationTreeBuilder | None = None - self.deduplicator = ResultDeduplicator( - depth_weight=0.4, - frequency_weight=0.3, - kind_weight=0.3, - max_depth_penalty=10, - ) - - async def setup_lsp(self): - """Setup LSP manager for association tree search.""" - self.lsp_manager = StandaloneLspManager( - workspace_root=str(self.index_path.parent), - timeout=5.0, - ) - await self.lsp_manager.start() - self.tree_builder = AssociationTreeBuilder( - lsp_manager=self.lsp_manager, - timeout=5.0, - ) - - async def cleanup_lsp(self): - """Cleanup LSP manager.""" - if self.lsp_manager: - await self.lsp_manager.stop() - - def method1_pure_fts(self, query: str, limit: int = 20) -> tuple[List[SearchResult], float]: - """Method 1: Pure FTS (exact + fuzzy).""" - start = time.perf_counter() - results = self.engine.search( - index_path=self.index_path, - query=query, - limit=limit, - enable_fuzzy=True, - enable_vector=False, - pure_vector=False, - ) - elapsed = time.perf_counter() - start - return results, elapsed - - def method2_pure_vector(self, query: str, limit: int = 20) -> tuple[List[SearchResult], float]: - """Method 2: Pure Vector (semantic search only).""" - start = time.perf_counter() - results = self.engine.search( - index_path=self.index_path, - query=query, - limit=limit, - enable_fuzzy=False, - enable_vector=True, - pure_vector=True, - ) - elapsed = time.perf_counter() - start - return results, elapsed - - def method3_hybrid_fusion(self, query: str, limit: int = 20) -> tuple[List[SearchResult], float]: - """Method 3: Hybrid Fusion (FTS + Vector with RRF).""" - start = time.perf_counter() - results = self.engine.search( - index_path=self.index_path, - query=query, - limit=limit, - enable_fuzzy=True, - enable_vector=True, - pure_vector=False, - ) - elapsed = time.perf_counter() - start - return results, elapsed - - async def method4_vector_lsp_tree( - self, - query: str, - limit: int = 20, - max_depth: int = 3, - expand_callers: bool = True, - expand_callees: bool = True, - ) -> tuple[List[SearchResult], float, Dict[str, Any]]: - """Method 4: Vector + LSP Association Tree (new strategy). - - Steps: - 1. Vector search to find seed results (top 5-10) - 2. For each seed, build LSP association tree - 3. Deduplicate and score all discovered nodes - 4. Return top N results - - Args: - query: Search query - limit: Final result limit - max_depth: Maximum depth for LSP tree expansion - expand_callers: Whether to expand incoming calls - expand_callees: Whether to expand outgoing calls - - Returns: - Tuple of (results, elapsed_time, stats) - """ - if not self.tree_builder: - raise RuntimeError("LSP not initialized. Call setup_lsp() first.") - - start = time.perf_counter() - stats = { - "seed_count": 0, - "trees_built": 0, - "total_tree_nodes": 0, - "unique_nodes": 0, - "dedup_time_ms": 0, - } - - # Step 1: Get seed results from vector search (top 10) - seed_results = self.engine.search( - index_path=self.index_path, - query=query, - limit=10, - enable_fuzzy=False, - enable_vector=True, - pure_vector=True, - ) - stats["seed_count"] = len(seed_results) - - if not seed_results: - return [], time.perf_counter() - start, stats - - # Step 2: Build association trees for each seed - all_trees = [] - for seed in seed_results: - try: - tree = await self.tree_builder.build_tree( - seed_file_path=seed.path, - seed_line=seed.start_line or 1, - seed_character=1, - max_depth=max_depth, - expand_callers=expand_callers, - expand_callees=expand_callees, - ) - if tree.node_list: - all_trees.append(tree) - stats["trees_built"] += 1 - stats["total_tree_nodes"] += len(tree.node_list) - except Exception as e: - print(f"Error building tree for {seed.path}:{seed.start_line}: {e}") - continue - - if not all_trees: - # Fallback to seed results if no trees built - return seed_results[:limit], time.perf_counter() - start, stats - - # Step 3: Merge and deduplicate all trees - dedup_start = time.perf_counter() - - # Merge all node_lists into a single CallTree - from codexlens.search.association_tree.data_structures import CallTree - merged_tree = CallTree() - for tree in all_trees: - merged_tree.node_list.extend(tree.node_list) - - # Deduplicate - unique_nodes = self.deduplicator.deduplicate( - tree=merged_tree, - max_results=limit, - ) - stats["unique_nodes"] = len(unique_nodes) - stats["dedup_time_ms"] = (time.perf_counter() - dedup_start) * 1000 - - # Step 4: Convert UniqueNode to SearchResult - results = [] - for node in unique_nodes: - # Use node.score as the search score - result = SearchResult( - path=node.file_path, - score=node.score, - start_line=node.range.start_line, - end_line=node.range.end_line, - symbol_name=node.name, - symbol_kind=node.kind, - content="", # LSP doesn't provide content - metadata={"search_source": "lsp_tree"}, - ) - results.append(result) - - elapsed = time.perf_counter() - start - return results, elapsed, stats - - def print_results(self, method_name: str, results: List[SearchResult], elapsed: float, stats: Dict[str, Any] | None = None): - """Print benchmark results.""" - print(f"\n{'='*80}") - print(f"Method: {method_name}") - print(f"{'='*80}") - print(f"Time: {elapsed*1000:.2f}ms") - print(f"Results: {len(results)}") - - if stats: - print(f"\nStats:") - for key, value in stats.items(): - print(f" {key}: {value}") - - print(f"\nTop 5 Results:") - for i, result in enumerate(results[:5], 1): - print(f"{i}. [{result.score:.4f}] {result.path}:{result.start_line}") - if result.symbol_name: - print(f" Name: {result.symbol_name}") - if result.metadata.get("search_source"): - print(f" Source: {result.metadata.get('search_source')}") - - async def run_comparison(self, query: str, limit: int = 20): - """Run comparison for a single query.""" - print(f"\n{'#'*80}") - print(f"Query: {query}") - print(f"{'#'*80}") - - # Method 1: Pure FTS - results1, time1 = self.method1_pure_fts(query, limit) - self.print_results("Method 1: Pure FTS", results1, time1) - - # Method 2: Pure Vector - results2, time2 = self.method2_pure_vector(query, limit) - self.print_results("Method 2: Pure Vector", results2, time2) - - # Method 3: Hybrid Fusion - results3, time3 = self.method3_hybrid_fusion(query, limit) - self.print_results("Method 3: Hybrid Fusion (FTS+Vector)", results3, time3) - - # Method 4: Vector + LSP Tree (requires LSP setup) - results4 = None - time4 = 0.0 - try: - results4, time4, stats4 = await self.method4_vector_lsp_tree(query, limit, max_depth=3) - self.print_results("Method 4: Vector + LSP Association Tree", results4, time4, stats4) - except Exception as e: - print(f"\nMethod 4: Vector + LSP Association Tree") - print(f"Error: {e}") - - # Comparison summary - print(f"\n{'='*80}") - print(f"Summary") - print(f"{'='*80}") - print(f"Method 1 (FTS): {time1*1000:8.2f}ms {len(results1):3d} results") - print(f"Method 2 (Vector): {time2*1000:8.2f}ms {len(results2):3d} results") - print(f"Method 3 (Hybrid): {time3*1000:8.2f}ms {len(results3):3d} results") - if results4 is not None: - print(f"Method 4 (Vector+LSP): {time4*1000:8.2f}ms {len(results4):3d} results") - - -async def main(): - """Main benchmark entry point.""" - # Setup - use the actual index path from ~/.codexlens/indexes/ - import os - codexlens_home = Path(os.path.expanduser("~/.codexlens")) - index_path = codexlens_home / "indexes/D/Claude_dms3/codex-lens/src/codexlens/_index.db" - - if not index_path.exists(): - print(f"Error: Index not found at {index_path}") - print("Please run: python -m codexlens index init src") - return - - project_root = Path("D:/Claude_dms3/codex-lens/src") - - config = Config() - benchmark = SearchBenchmark(index_path, config) - - # Test queries - queries = [ - "vector search implementation", - "LSP call hierarchy", - "search result ranking", - "index building", - ] - - # Setup LSP for Method 4 - print("Setting up LSP manager...") - try: - await benchmark.setup_lsp() - print("LSP manager ready") - except Exception as e: - print(f"Warning: Could not setup LSP: {e}") - print("Method 4 will be skipped") - - try: - # Run benchmarks - for query in queries: - await benchmark.run_comparison(query, limit=20) - - finally: - # Cleanup - await benchmark.cleanup_lsp() - print("\nBenchmark complete") - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/codex-lens/examples/simple_search_comparison.py b/codex-lens/examples/simple_search_comparison.py deleted file mode 100644 index 3fdbeaee..00000000 --- a/codex-lens/examples/simple_search_comparison.py +++ /dev/null @@ -1,123 +0,0 @@ -"""Simple search method comparison using CLI commands. - -Compares: -1. FTS (Full-Text Search) -2. Semantic (Dense + Rerank) -3. Hybrid (Future: FTS + Semantic fusion) - -Usage: - python examples/simple_search_comparison.py -""" - -import subprocess -import time -import json -import re -import os -from pathlib import Path - -def strip_ansi(text: str) -> str: - """Remove ANSI color codes from text.""" - ansi_escape = re.compile(r'\x1b\[[0-9;]*m') - return ansi_escape.sub('', text) - -def run_search(query: str, method: str, limit: int = 20) -> tuple[list, float]: - """Run search via CLI and measure time.""" - cmd = [ - "python", "-m", "codexlens", "search", - query, - "--method", method, - "--limit", str(limit), - "--json", - "-p", "." - ] - - start = time.perf_counter() - result = subprocess.run( - cmd, - cwd=str(Path("D:/Claude_dms3/codex-lens/src")), - capture_output=True, - text=True, - env={**os.environ, "NO_COLOR": "1"}, # Try to disable colors - ) - elapsed = time.perf_counter() - start - - if result.returncode != 0: - print(f"Error running {method} search:") - print(result.stderr[:200]) - return [], elapsed - - try: - # Strip ANSI codes and parse JSON - clean_output = strip_ansi(result.stdout) - data = json.loads(clean_output) - # Results are nested in "result" object - if "result" in data and "results" in data["result"]: - return data["result"]["results"], elapsed - return data.get("results", []), elapsed - except json.JSONDecodeError as e: - print(f"Failed to parse JSON output for {method}: {e}") - return [], elapsed - - -def print_comparison(query: str): - """Print comparison for a single query.""" - print(f"\n{'='*80}") - print(f"Query: {query}") - print(f"{'='*80}\n") - - # Method 1: FTS - print("Method 1: FTS (Full-Text Search)") - results_fts, time_fts = run_search(query, "fts", 20) - print(f" Time: {time_fts*1000:.2f}ms") - print(f" Results: {len(results_fts)}") - if results_fts: - print(f" Top 3:") - for i, r in enumerate(results_fts[:3], 1): - path = r.get("path", "").replace("D:\\Claude_dms3\\codex-lens\\src\\", "") - score = r.get("score", 0) - print(f" {i}. [{score:.4f}] {path}") - print() - - # Method 2: Semantic (Dense + Rerank) - print("Method 2: Semantic (Dense + Rerank)") - results_semantic, time_semantic = run_search(query, "dense_rerank", 20) - print(f" Time: {time_semantic*1000:.2f}ms") - print(f" Results: {len(results_semantic)}") - if results_semantic: - print(f" Top 3:") - for i, r in enumerate(results_semantic[:3], 1): - path = r.get("path", "").replace("D:\\Claude_dms3\\codex-lens\\src\\", "") - score = r.get("score", 0) - print(f" {i}. [{score:.4f}] {path}") - print() - - # Summary - print(f"Summary:") - print(f" FTS: {time_fts*1000:8.2f}ms {len(results_fts):3d} results") - print(f" Semantic: {time_semantic*1000:8.2f}ms {len(results_semantic):3d} results") - print(f" Speedup: {time_semantic/time_fts:6.2f}x (FTS faster)") - - -def main(): - """Main comparison entry point.""" - queries = [ - "vector search", - "LSP call hierarchy", - "search ranking", - "index building", - ] - - print("Search Method Comparison") - print("=" * 80) - - for query in queries: - print_comparison(query) - - print(f"\n{'='*80}") - print("Comparison complete") - print(f"{'='*80}") - - -if __name__ == "__main__": - main() diff --git a/codex-lens/examples/test_lsp_capabilities.py b/codex-lens/examples/test_lsp_capabilities.py deleted file mode 100644 index a8ea4c51..00000000 --- a/codex-lens/examples/test_lsp_capabilities.py +++ /dev/null @@ -1,79 +0,0 @@ -"""Test LSP server capabilities.""" - -import asyncio -import json -from pathlib import Path -from codexlens.lsp.standalone_manager import StandaloneLspManager - -async def test_capabilities(): - """Test what capabilities Pyright provides.""" - - workspace_root = Path("D:/Claude_dms3/codex-lens/src") - - print("Testing LSP Capabilities") - print("="*80) - - # Create LSP manager - manager = StandaloneLspManager( - workspace_root=str(workspace_root), - timeout=10.0, - ) - - try: - # Start LSP manager - print("\n1. Starting LSP manager...") - await manager.start() - print(" [OK] LSP manager started") - - # Get server state for Python - print("\n2. Getting Python server state...") - test_file = str(workspace_root / "codexlens/search/hybrid_search.py") - state = await manager._get_server(test_file) - - if not state: - print(" [ERROR] Could not get server state!") - return - - print(f" [OK] Server state obtained") - print(f" Initialized: {state.initialized}") - - # Print capabilities - print("\n3. Server Capabilities:") - print("-"*80) - caps = state.capabilities - - # Key capabilities to check - important_caps = [ - "callHierarchyProvider", - "definitionProvider", - "referencesProvider", - "documentSymbolProvider", - "workspaceSymbolProvider", - "hoverProvider", - "completionProvider", - "signatureHelpProvider", - ] - - for cap in important_caps: - value = caps.get(cap) - status = "[YES]" if value else "[NO]" - print(f" {status} {cap}: {value}") - - # Print all capabilities as JSON for reference - print("\n4. Full capabilities (formatted):") - print("-"*80) - print(json.dumps(caps, indent=2)) - - except Exception as e: - print(f"\n[ERROR] Error: {e}") - import traceback - traceback.print_exc() - - finally: - # Cleanup - print("\n5. Cleaning up...") - await manager.stop() - print(" [OK] LSP manager stopped") - -if __name__ == "__main__": - asyncio.run(test_capabilities()) diff --git a/codex-lens/examples/test_lsp_references.py b/codex-lens/examples/test_lsp_references.py deleted file mode 100644 index 2ce470af..00000000 --- a/codex-lens/examples/test_lsp_references.py +++ /dev/null @@ -1,76 +0,0 @@ -"""Test LSP references as alternative to call hierarchy.""" - -import asyncio -from pathlib import Path -from codexlens.lsp.standalone_manager import StandaloneLspManager - -async def test_references(): - """Test using references as alternative to call hierarchy.""" - - workspace_root = Path("D:/Claude_dms3/codex-lens") - test_file = workspace_root / "test_simple_function.py" - - print("Testing LSP References (Alternative)") - print("="*80) - - manager = StandaloneLspManager( - workspace_root=str(workspace_root), - timeout=30.0, - ) - - try: - print("\n1. Starting LSP manager...") - await manager.start() - print(" [OK] Started") - - # Wait for analysis - await asyncio.sleep(2) - - # Test references for hello_world function - print("\n2. Testing references for 'hello_world' (line 4)...") - refs = await manager.get_references( - file_path=str(test_file), - line=4, - character=5, - include_declaration=True, - ) - print(f" Found: {len(refs)} references") - for ref in refs[:5]: - uri = ref.get('uri', '') - range_obj = ref.get('range', {}) - start = range_obj.get('start', {}) - print(f" - {uri.split('/')[-1]}:{start.get('line', 0)+1}") - - # Test definition - print("\n3. Testing definition for 'hello_world' call (line 13)...") - defs = await manager.get_definition( - file_path=str(test_file), - line=13, - character=11, - ) - print(f" Found: {len(defs)} definitions") - for d in defs: - uri = d.get('uri', '') - range_obj = d.get('range', {}) - start = range_obj.get('start', {}) - print(f" - {uri.split('/')[-1]}:{start.get('line', 0)+1}") - - # Test document symbols - print("\n4. Testing document symbols...") - symbols = await manager.get_document_symbols(str(test_file)) - print(f" Found: {len(symbols)} symbols") - for sym in symbols: - print(f" - {sym.get('name')} ({sym.get('kind')})") - - except Exception as e: - print(f"\n[ERROR] {e}") - import traceback - traceback.print_exc() - - finally: - print("\n5. Cleanup...") - await manager.stop() - print(" [OK] Done") - -if __name__ == "__main__": - asyncio.run(test_references()) diff --git a/codex-lens/examples/test_lsp_tree.py b/codex-lens/examples/test_lsp_tree.py deleted file mode 100644 index 30b593ef..00000000 --- a/codex-lens/examples/test_lsp_tree.py +++ /dev/null @@ -1,92 +0,0 @@ -"""Test LSP Association Tree building directly.""" - -import asyncio -from pathlib import Path -from codexlens.lsp.standalone_manager import StandaloneLspManager -from codexlens.search.association_tree import AssociationTreeBuilder - -async def test_lsp_tree(): - """Test building LSP association tree for a known Python file.""" - - # Setup - use simple test file - workspace_root = Path("D:/Claude_dms3/codex-lens") - test_file = "test_simple_function.py" - test_line = 11 # main() function definition (1-based) - test_char = 5 # Points to 'm' in 'main' (1-based, becomes 4 in 0-based) - - print(f"Testing LSP tree for: {test_file}:{test_line}") - print("="*80) - - # Create LSP manager - manager = StandaloneLspManager( - workspace_root=str(workspace_root), - timeout=10.0, - ) - - try: - # Start LSP manager - print("\n1. Starting LSP manager...") - await manager.start() - print(" [OK] LSP manager started") - - # Test get_call_hierarchy_items directly - print(f"\n2. Testing get_call_hierarchy_items for {test_file}:{test_line}:{test_char}...") - items = await manager.get_call_hierarchy_items( - file_path=str(workspace_root / test_file), - line=test_line, - character=test_char, - ) - print(f" Result: {len(items)} items") - if items: - for i, item in enumerate(items, 1): - print(f" {i}. {item.get('name')} ({item.get('kind')})") - print(f" URI: {item.get('uri')}") - print(f" Range: {item.get('range')}") - else: - print(" [WARN] No call hierarchy items returned!") - print(" This means either:") - print(" - The file/line doesn't contain a symbol") - print(" - LSP server doesn't support call hierarchy") - print(" - Pyright isn't running correctly") - - # If we got items, try building a tree - if items: - print(f"\n3. Building association tree...") - builder = AssociationTreeBuilder( - lsp_manager=manager, - timeout=10.0, - ) - - tree = await builder.build_tree( - seed_file_path=str(workspace_root / test_file), - seed_line=test_line, - seed_character=test_char, - max_depth=2, - expand_callers=True, - expand_callees=True, - ) - - print(f" Tree built successfully!") - print(f" - Roots: {len(tree.roots)}") - print(f" - Total nodes: {len(tree.node_list)}") - print(f" - Depth reached: {tree.depth_reached}") - - if tree.node_list: - print(f"\n First 5 nodes:") - for i, node in enumerate(tree.node_list[:5], 1): - print(f" {i}. {node.item.name} @ {node.item.file_path}:{node.item.range.start_line}") - print(f" Depth: {node.depth}, Is cycle: {node.is_cycle}") - - except Exception as e: - print(f"\n[ERROR] Error: {e}") - import traceback - traceback.print_exc() - - finally: - # Cleanup - print("\n4. Cleaning up...") - await manager.stop() - print(" [OK] LSP manager stopped") - -if __name__ == "__main__": - asyncio.run(test_lsp_tree()) diff --git a/codex-lens/examples/test_raw_lsp.py b/codex-lens/examples/test_raw_lsp.py deleted file mode 100644 index 18bfdc26..00000000 --- a/codex-lens/examples/test_raw_lsp.py +++ /dev/null @@ -1,104 +0,0 @@ -"""Raw LSP test with debug logging.""" - -import asyncio -import json -import logging -from pathlib import Path -from codexlens.lsp.standalone_manager import StandaloneLspManager - -# Enable debug logging -logging.basicConfig(level=logging.DEBUG) -logger = logging.getLogger("codexlens.lsp") -logger.setLevel(logging.DEBUG) - -async def test_raw_lsp(): - """Test LSP with debug logging enabled.""" - - workspace_root = Path("D:/Claude_dms3/codex-lens") - test_file = workspace_root / "test_simple_function.py" - - print("Testing Raw LSP Call Hierarchy") - print("="*80) - - # Create LSP manager - manager = StandaloneLspManager( - workspace_root=str(workspace_root), - timeout=30.0, - ) - - try: - # Start LSP manager - print("\n1. Starting LSP manager...") - await manager.start() - print(" [OK] Started") - - # Get server state - state = await manager._get_server(str(test_file)) - if not state: - print(" [ERROR] No server state!") - return - - print(f" Server initialized: {state.initialized}") - print(f" Call hierarchy supported: {state.capabilities.get('callHierarchyProvider')}") - - # Open document - print("\n2. Opening document...") - await manager._open_document(state, str(test_file)) - print(" [OK] Document opened") - - # Wait a bit for Pyright to analyze - print("\n3. Waiting for analysis...") - await asyncio.sleep(2) - print(" [OK] Waited 2 seconds") - - # Try call hierarchy on main function (line 12) - print("\n4. Sending prepareCallHierarchy request...") - - # Direct request using _send_request - params = { - "textDocument": {"uri": test_file.as_uri()}, - "position": {"line": 11, "character": 4} # 0-indexed, "main" function - } - print(f" Params: {json.dumps(params, indent=2)}") - - result = await manager._send_request( - state, - "textDocument/prepareCallHierarchy", - params, - ) - - print(f"\n5. Result: {result}") - print(f" Type: {type(result)}") - - if result: - print(f" Items: {len(result)}") - for item in result: - print(f" - {item.get('name')}") - else: - print(" [WARN] No items returned") - print(" This could mean:") - print(" - Position doesn't point to a symbol") - print(" - Pyright hasn't finished analyzing") - print(" - Some other issue") - - # Try with the higher-level API - print("\n6. Testing with get_call_hierarchy_items API...") - items = await manager.get_call_hierarchy_items( - file_path=str(test_file), - line=12, - character=5, - ) - print(f" Result: {len(items)} items") - - except Exception as e: - print(f"\n[ERROR] Error: {e}") - import traceback - traceback.print_exc() - - finally: - print("\n7. Cleanup...") - await manager.stop() - print(" [OK] Done") - -if __name__ == "__main__": - asyncio.run(test_raw_lsp()) diff --git a/codex-lens/examples/test_raw_response.py b/codex-lens/examples/test_raw_response.py deleted file mode 100644 index d5f2165f..00000000 --- a/codex-lens/examples/test_raw_response.py +++ /dev/null @@ -1,96 +0,0 @@ -"""Test to see raw LSP response.""" - -import asyncio -import json -import logging -from pathlib import Path - -# Patch the _process_messages to log the full response -async def patched_process_messages(self, language_id: str): - """Patched version that logs full response.""" - from codexlens.lsp.standalone_manager import logger - - state = self._servers.get(language_id) - if not state: - return - - try: - while True: - message = await state.message_queue.get() - msg_id = message.get("id") - method = message.get("method", "") - - # Log FULL message for debugging - if msg_id is not None and not method: - print(f"\n>>> FULL RESPONSE (id={msg_id}):") - print(json.dumps(message, indent=2)) - - # Response handling - if msg_id is not None and not method: - if msg_id in state.pending_requests: - future = state.pending_requests.pop(msg_id) - if "error" in message: - print(f">>> ERROR in response: {message['error']}") - future.set_exception( - Exception(message["error"].get("message", "Unknown error")) - ) - else: - print(f">>> Result: {message.get('result')}") - future.set_result(message.get("result")) - else: - print(f">>> No pending request for id={msg_id}") - - elif msg_id is not None and method: - await self._handle_server_request(state, message) - - elif method: - pass # Skip notifications - - state.message_queue.task_done() - - except asyncio.CancelledError: - pass - -async def test_raw(): - from codexlens.lsp.standalone_manager import StandaloneLspManager - - workspace_root = Path("D:/Claude_dms3/codex-lens") - test_file = workspace_root / "test_simple_function.py" - - manager = StandaloneLspManager(workspace_root=str(workspace_root), timeout=30.0) - - # Monkey-patch the method - import types - manager._process_messages = types.MethodType(patched_process_messages, manager) - - try: - print("Starting LSP...") - await manager.start() - - state = await manager._get_server(str(test_file)) - await manager._open_document(state, str(test_file)) - await asyncio.sleep(2) - - print("\nSending prepareCallHierarchy request...") - uri = test_file.resolve().as_uri() - params = { - "textDocument": {"uri": uri}, - "position": {"line": 11, "character": 4} - } - - # Need to restart the message processor with our patched version - # Actually, the original is already running. Let's just send and see logs. - - result = await manager._send_request( - state, - "textDocument/prepareCallHierarchy", - params - ) - - print(f"\nFinal result: {result}") - - finally: - await manager.stop() - -if __name__ == "__main__": - asyncio.run(test_raw()) diff --git a/codex-lens/examples/test_simple_call_hierarchy.py b/codex-lens/examples/test_simple_call_hierarchy.py deleted file mode 100644 index 8ecdfea8..00000000 --- a/codex-lens/examples/test_simple_call_hierarchy.py +++ /dev/null @@ -1,87 +0,0 @@ -"""Test call hierarchy on a simple Python file.""" - -import asyncio -from pathlib import Path -from codexlens.lsp.standalone_manager import StandaloneLspManager - -async def test_simple_call_hierarchy(): - """Test call hierarchy on test_simple_function.py.""" - - workspace_root = Path("D:/Claude_dms3/codex-lens") - test_file = workspace_root / "test_simple_function.py" - - print("Testing Call Hierarchy on Simple Function") - print("="*80) - print(f"File: {test_file}") - - # Create LSP manager - manager = StandaloneLspManager( - workspace_root=str(workspace_root), - timeout=10.0, - ) - - try: - # Start LSP manager - print("\n1. Starting LSP manager...") - await manager.start() - print(" [OK] LSP manager started") - - # Test different function positions - test_cases = [ - ("hello_world", 4, 5, "def hello_world():"), - ("greet", 8, 5, "def greet(name: str):"), - ("main", 12, 5, "def main():"), - ] - - for func_name, line, char, expected in test_cases: - print(f"\n2. Testing {func_name} at line {line}:") - print(f" Expected: {expected}") - - items = await manager.get_call_hierarchy_items( - file_path=str(test_file), - line=line, - character=char, - ) - - print(f" Result: {len(items)} items") - if items: - for i, item in enumerate(items, 1): - print(f" {i}. Name: {item.get('name')}") - print(f" Kind: {item.get('kind')}") - print(f" URI: {item.get('uri')}") - range_obj = item.get('range', {}) - start = range_obj.get('start', {}) - print(f" Line: {start.get('line', 0) + 1}") - - # If we got items, try getting incoming/outgoing calls - print(f"\n Testing incoming/outgoing calls for {func_name}:") - first_item = items[0] - - incoming = await manager.get_incoming_calls(first_item) - print(f" - Incoming calls: {len(incoming)}") - for call in incoming: - caller = call.get('from', {}) - print(f" Called by: {caller.get('name')}") - - outgoing = await manager.get_outgoing_calls(first_item) - print(f" - Outgoing calls: {len(outgoing)}") - for call in outgoing: - callee = call.get('to', {}) - print(f" Calls: {callee.get('name')}") - - else: - print(f" [WARN] No call hierarchy items for {func_name}!") - - except Exception as e: - print(f"\n[ERROR] Error: {e}") - import traceback - traceback.print_exc() - - finally: - # Cleanup - print("\n3. Cleaning up...") - await manager.stop() - print(" [OK] LSP manager stopped") - -if __name__ == "__main__": - asyncio.run(test_simple_call_hierarchy()) diff --git a/codex-lens/examples/test_uri_consistency.py b/codex-lens/examples/test_uri_consistency.py deleted file mode 100644 index 710f810c..00000000 --- a/codex-lens/examples/test_uri_consistency.py +++ /dev/null @@ -1,98 +0,0 @@ -"""Test if URI inconsistency causes the issue.""" - -import asyncio -import json -from pathlib import Path -from codexlens.lsp.standalone_manager import StandaloneLspManager - -async def test_with_consistent_uri(): - """Test prepareCallHierarchy with different URI formats.""" - - workspace_root = Path("D:/Claude_dms3/codex-lens") - test_file = workspace_root / "test_simple_function.py" - resolved = test_file.resolve() - - print("Testing URI Consistency") - print("="*80) - - # Different URI formats to try - uri_standard = resolved.as_uri() - uri_lowercase = uri_standard.replace("file:///D:", "file:///d:") - - print(f"Standard URI: {uri_standard}") - print(f"Lowercase URI: {uri_lowercase}") - - manager = StandaloneLspManager( - workspace_root=str(workspace_root), - timeout=30.0, - ) - - try: - print("\n1. Starting LSP manager...") - await manager.start() - - state = await manager._get_server(str(test_file)) - if not state: - print(" [ERROR] No server state") - return - - print(" [OK] Server ready") - - # Open document - print("\n2. Opening document...") - await manager._open_document(state, str(test_file)) - await asyncio.sleep(2) - print(" [OK] Document opened, waited 2s") - - # Test 1: Standard URI (as_uri) - print("\n3. Test with standard URI...") - params1 = { - "textDocument": {"uri": uri_standard}, - "position": {"line": 11, "character": 4} # main function - } - print(f" Params: {json.dumps(params1)}") - result1 = await manager._send_request(state, "textDocument/prepareCallHierarchy", params1) - print(f" Result: {result1}") - - # Test 2: Lowercase drive letter - print("\n4. Test with lowercase drive letter URI...") - params2 = { - "textDocument": {"uri": uri_lowercase}, - "position": {"line": 11, "character": 4} - } - print(f" Params: {json.dumps(params2)}") - result2 = await manager._send_request(state, "textDocument/prepareCallHierarchy", params2) - print(f" Result: {result2}") - - # Test 3: Position at function name start - print("\n5. Test with position at 'def' keyword (char 0)...") - params3 = { - "textDocument": {"uri": uri_lowercase}, - "position": {"line": 11, "character": 0} - } - result3 = await manager._send_request(state, "textDocument/prepareCallHierarchy", params3) - print(f" Result: {result3}") - - # Test 4: Different positions on line 12 (1-indexed = line 11 0-indexed) - print("\n6. Testing different character positions on 'def main():'...") - for char in [0, 4, 5, 6, 7, 8]: - params = { - "textDocument": {"uri": uri_lowercase}, - "position": {"line": 11, "character": char} - } - result = await manager._send_request(state, "textDocument/prepareCallHierarchy", params) - status = "OK" if result else "None" - print(f" char={char}: {status} - {result[:1] if result else '[]'}") - - except Exception as e: - print(f"\n[ERROR] {e}") - import traceback - traceback.print_exc() - - finally: - print("\n7. Cleanup...") - await manager.stop() - print(" [OK]") - -if __name__ == "__main__": - asyncio.run(test_with_consistent_uri()) diff --git a/codex-lens/examples/test_wait_for_analysis.py b/codex-lens/examples/test_wait_for_analysis.py deleted file mode 100644 index bba6af23..00000000 --- a/codex-lens/examples/test_wait_for_analysis.py +++ /dev/null @@ -1,99 +0,0 @@ -"""Test with longer wait time for Pyright analysis.""" - -import asyncio -import json -from pathlib import Path -from codexlens.lsp.standalone_manager import StandaloneLspManager - -async def test_with_wait(): - """Test prepareCallHierarchy with longer wait for analysis.""" - - workspace_root = Path("D:/Claude_dms3/codex-lens") - test_file = workspace_root / "test_simple_function.py" - - print("Testing with Wait for Analysis") - print("="*80) - - manager = StandaloneLspManager( - workspace_root=str(workspace_root), - timeout=30.0, - ) - - try: - print("\n1. Starting LSP manager...") - await manager.start() - - state = await manager._get_server(str(test_file)) - if not state: - print(" [ERROR] No server state") - return - - print(" [OK] Server ready") - print(f" Workspace: {manager.workspace_root}") - - # Open document - print("\n2. Opening document...") - await manager._open_document(state, str(test_file)) - print(" [OK] Document opened") - - # Wait longer for analysis - print("\n3. Waiting for Pyright to analyze (5 seconds)...") - await asyncio.sleep(5) - print(" [OK] Wait complete") - - # Check diagnostics first to verify file is analyzed - print("\n4. Checking if document symbols work (to verify analysis)...") - symbols = await manager._send_request( - state, - "textDocument/documentSymbol", - {"textDocument": {"uri": test_file.resolve().as_uri()}} - ) - if symbols: - print(f" [OK] Found {len(symbols)} symbols:") - for s in symbols: - name = s.get('name', 'unknown') - kind = s.get('kind', 0) - range_info = s.get('range', {}).get('start', {}) - line = range_info.get('line', 0) + 1 - print(f" - {name} (kind={kind}) at line {line}") - else: - print(" [WARN] No symbols found!") - - # Now try call hierarchy on different lines - print("\n5. Testing prepareCallHierarchy on each symbol...") - if symbols: - for s in symbols: - name = s.get('name', 'unknown') - range_info = s.get('range', {}).get('start', {}) - line = range_info.get('line', 0) - char = range_info.get('character', 0) - - params = { - "textDocument": {"uri": test_file.resolve().as_uri()}, - "position": {"line": line, "character": char + 4} # offset into name - } - - result = await manager._send_request( - state, - "textDocument/prepareCallHierarchy", - params - ) - - status = f"[OK] {len(result)} items" if result else "[NONE]" - print(f" {name} (line {line+1}, char {char+4}): {status}") - if result: - for item in result: - print(f" - {item.get('name')}") - - except Exception as e: - print(f"\n[ERROR] {e}") - import traceback - traceback.print_exc() - - finally: - print("\n6. Cleanup...") - await manager.stop() - print(" [OK]") - -if __name__ == "__main__": - asyncio.run(test_with_wait()) diff --git a/codex-lens/lsp-servers.json b/codex-lens/lsp-servers.json deleted file mode 100644 index 4120d60d..00000000 --- a/codex-lens/lsp-servers.json +++ /dev/null @@ -1,88 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "version": "1.0.0", - "description": "Language Server configuration for codex-lens standalone LSP client", - "servers": [ - { - "languageId": "python", - "displayName": "Pyright", - "extensions": ["py", "pyi"], - "command": ["pyright-langserver", "--stdio"], - "enabled": true, - "initializationOptions": { - "pythonPath": "", - "pythonPlatform": "", - "pythonVersion": "3.13" - }, - "settings": { - "python.analysis": { - "typeCheckingMode": "standard", - "diagnosticMode": "workspace", - "exclude": ["**/node_modules", "**/__pycache__", "build", "dist"], - "include": ["src/**", "tests/**"], - "stubPath": "typings" - } - } - }, - { - "languageId": "typescript", - "displayName": "TypeScript Language Server", - "extensions": ["ts", "tsx"], - "command": ["typescript-language-server.cmd", "--stdio"], - "enabled": true, - "initializationOptions": {}, - "settings": {} - }, - { - "languageId": "javascript", - "displayName": "TypeScript Language Server (for JS)", - "extensions": ["js", "jsx", "mjs", "cjs"], - "command": ["typescript-language-server.cmd", "--stdio"], - "enabled": true, - "initializationOptions": {}, - "settings": {} - }, - { - "languageId": "go", - "displayName": "Gopls", - "extensions": ["go"], - "command": ["gopls", "serve"], - "enabled": true, - "initializationOptions": {}, - "settings": {} - }, - { - "languageId": "rust", - "displayName": "Rust Analyzer", - "extensions": ["rs"], - "command": ["rust-analyzer"], - "enabled": false, - "initializationOptions": {}, - "settings": {} - }, - { - "languageId": "c", - "displayName": "Clangd", - "extensions": ["c", "h"], - "command": ["clangd"], - "enabled": false, - "initializationOptions": {}, - "settings": {} - }, - { - "languageId": "cpp", - "displayName": "Clangd", - "extensions": ["cpp", "hpp", "cc", "cxx"], - "command": ["clangd"], - "enabled": false, - "initializationOptions": {}, - "settings": {} - } - ], - "defaults": { - "rootDir": ".", - "timeout": 30000, - "restartInterval": 5000, - "maxRestarts": 3 - } -} diff --git a/codex-lens/misleading_test.db b/codex-lens/misleading_test.db deleted file mode 100644 index 42aa2fa6772f9bebf8460ca043bc3ad7961dc3cf..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 163840 zcmeIbS!^8HnkJSfBKJ%(Ns1CnrKO^%B{Lbi$jVxBnb|9}5-{{KWU^5*r+mA30F)SJu2wsR~r5VEY$ z8OI5QLKgnz@UQoC2!(L(0-p;1BI~ZVDq5k5um2u9smy<1`$*mXF&8p`Q+r{~+yHKgR zg@^9S<9f5)+HkS${EgYOw`QGNXU|=pbv86PxumnjkMyaOol945&0d_n;at6T%egvt z`SRG7y@N#yKX|~WH!fW{d*ch|li4qvT(H)d^J1F(*6VzD?Z)h-i&r-c%R4t_Kb*ZW zd-eS6O<}F&j+psYY)>`OuPw_{K@UnktH6I4W(JRJi;kS%^C|{=WEG}*r6GTLAU1O3hzIb(WWl%@qN0YnU6IW~1J6 zU(=&nT`sii_3CT7EV?zfS!}!I0(>1T`|@T1ogIvh%pG}^fHuKdnA%h|@LeW0a`L3| zM%J85tFX|rUC+Lub8KI$!6V&o15i=-S*Z=f<_q zE?w}f{@BR-?<;TDmIRdRrB>ysIKB0B1=W48Cn4ySg>ctg+y-J=Pn~m@ubs<#1AVcP zW5<+taZ4g9Fjm27hY&{z5oAWQ>@=ZE6a^l1* zVSzEdOcD(&_IjGw<<)4+SL>yR8W8`DRBYt%;dr;Q6(BFI9ISc!(v4elXD_eYe%49M znY(%E>O}{dbzNdg z7))YogVD2WGMEk3$=8$cOBhUiYlG3NZ7`U%s>JIT|5Xhpwza_o{BJOr7j^O1)AYj) z=1^1&X0`7ngOR0P7>ry7Yre{0q|e_rgXz>NrFz+YXUY*HDlHY8Ehk$T&u(Et{^ya{ z$gW-Lc}&1+tt`*itF7Q8xA=?LxIu zb9?*L*7Lcw@`hfUy_0{y7HC)B*3fq5%avxUr9Z+Oq1BbY252|K5rmXmbD`s7@$wtu z(5-uG+bPvMwRR62g!~_#ox6O?ncR9vwb*JGIt>`FTYg34G#-wQoZa=x$Vo$kcu5x` z{&f`_85vQ})eG|$!mJIDO4W}7=+=zB*e(UgAhzwr^;?gr7Hf;0;^OOq(_l}1O>DZ{ zuD}8ql)hpSs517#bu!zms(C9qGBffj5oobX+SIajF1=&84~0l%DAYfm9!{K!{f{V` zAA^8Fz##Bjioo-kNFucB?VZ1OF&=8*JYk}-Ql38&lo~7TrFw02baZyPSgATKgb$V4 zB0ig6S4!>#8j_XeM!nf~8m*O5y|H*SxzMaHuU$F4R!pp0WM_ENu2$xIZ9xqVda$Q7 zlgSb`- zsa4vA0o32 zxrA-zUrRGl5VM8sn03*$$~&B;B4fYum}7uyR8<)vVmb@ z2wvCto>+9-tjp!sR);`d;K(&sDsHvRJiNBnht;f@DAkvjEA4fo7b>-4wffQwFP3X` z^l}`WPx_1DMCk4DozD~TP-)36J#?E;;ZD0!Z5?@Wy{5=lcr*a~N}XzvGQojCO@z~0 zugy(2nIuf?SiosrPN150TJ2`u89(jJLz0?71fe*aAs)@_{X50+r)S5%oE$$nQ8;jT z{NCX+>uPX$cOtmH_w4A{xBIoUpuQbUz$tT0cLMh%N=vz5+A%GLJful-^lghm+kMi` z@s|*!b*JjC>GjbQCxl24t7&9c66j7K8qGDccTSxiy_-FF5BihEKQ?R81*EWI0;4O9 zT>j29Z3MI5OFo~o6QO;FcmAFm4=ojI<*M6k0qSC-a%A1*8qya6)f6FX18^u7Sd^>5 z7FV$VbAQ^Pp9)RTfr~N$55>kmg*Wz;vNd04QfrKhdmr+HNj; zgoNM(eYpjma9+GrFSlmyWIw%j^H%m=UW>EoBBC#_Q!Y?>82JUa>f)YIvF5l>Dy=ph z9DA(kJZ((i6QZc?1=1BLY-Pcz)!Sl-Y|3RtRkC@f-h8Q{0l&t3F4=X~79bw*nR}LX zo6UMNJMCnzR9Y>P!qv0i=Et1llapdQq>Xn_;J}U^JAOjyHcb(XhHT zY&i~`JX`Y$JX(7}^oweZDsZ7IJc9!cx>mZ5bq8@->IJd;xV}_#F4SFZVBc;95f>H& zCX=f>tVC!}cIWRtiihOrflA$JFCFP!Xq67OvImdbdMATmTuEYrhTb&{3aL%mt+tEF ze>LNb;x-3lHmZ$zx%kj6w3i^`T&GeN zQu6EYA|6l&u#H(>>>gRngt{J46Y!a}`r};QIsAe=xPC|lR)W6k?xbZa#r9T11;&kF zK5kFAHJ;kQVF_(x&I^vmoMLsc-ozHYJd^$C=Ft=HWCK6ez|l^RcR}EVXDwGs?Z9iT z-$Qx|js?pg8Vky}S1d=n*9*e%xzD&yN^Ya=Kz0+ePa2h`TfSLYtQFyS-B}vki_I!n z=xV*@CcB@iMCh&YoxgLB4zIN&J4>m#(rDM=@q+6$f2VhQZV@!wjh-%0k$D-c#ouVw zX^jQlKi3L;Ih+|J($j&dqZ>5<-s*h4_O*+ED_EypcRG!R+bkgnShK2Du}X)pmxEWT zu_C9Zp3xr~7vX{m6?Elw<8iLllWMlpUKoEb3q#=y8#W0&v?q;T$6(v!KKwFgIH7{O zcp>Q*dnmc$2nW8j{kR^m)^dtXgk6t1#p)Vkfph|DWqaS6T)o)KY(V`GLTqJ(}oEr`e z0tNwtz(2gR`gic7!l8dM9SSA>GCYZQwy31CCcOKW^oUcW#YYR@r645jIqo zRa9Ayvhg+@wOd)PowEHBZmT5~SC;p#t(vUz9)4%5mQ?jy)~Iw_ z@;wRWfQ%5;7>Si3)-99sJS;Z#D__r-q1wZ|>1B+K|2 z%W@w{-ItcNRFoCpn*p&4QhCuR6l zo!Y&_o5<|9thTiH`|CmHr?z@5Gh3_gdHjn`DGw6eUUcgI#0R=x$6ldRjj z%(_uEq%3RC)wjKq`)ySswGRkci+$}irp^Y7Jp^SkO<37@agwYXw^g2n?m=6P$!Y~} z3beE-HSdeVUi?jE=dvBS{G6BISWxp0V7dtza?P8v)ijqs=iOB{UWy}gCu}7G@Q%F$ zJs4;E9m+>9-OXCom}c-RZ%WyCNgK(&)Dv%!KwyZ!udZyoGDs-6rztos^WO75Q#Rh@ zLmGAikTr(C7Dc1lJcT@Kl(%=?n*!;JE-CZ&h;Ui@2$Eb;R$SS5Gm^skUnm=|NMh+r zW#dIfvh010177Mxib~WsN07WuYO(wU?`}rfc!3%RerBtA_Vczw@dB{Ml#Q4C2`t9~ z0o_+N-e|;*?-Mn=lt`NIlW&TBaN7mZE|BSC%0&U=U>&GyCc?Chej7cfhsKqSR~V7e z92MKL>LMfmgtGA-C*c|T;EH0r3+7#};13)}`Jp4r&f2OVg>ANKNCDW@%fkLGvLC2^ z6*R4Ip*$B-Hs19l)1S7iiL#Vduux9M-PYquQvJ+U3uIxE#@`a9(`p06uGvb@a!A>D zLz683MF3OMvT|DZpK&@YGb-~%A?}C5#?Wzi2jSQI29g(kyct^wp57D7S`e{Kap9U+ zzjp)L&{H%R)uWCq2@PsaC@V@^qH?If78|1Jpi{=+^F3Iy%--EW#ixYww(5w`Yu_mw z?|GC;NJotNC?{gCOK?>02{w1hjItLUvGx^?#>9hQw#8}AdUr*2NygruCFCf4Ev{S2 z#{05VzlHXbL(ns~Bv=d*3-P|*DL~LvRs`4=}h1mZjr}vhiLm;pRijDu{CV!@WiPXUfLgwWPUVS<~7R054Z; zDgjq24DY~nT2Mo_%CWn*FK|Vkt8BcBk95WY&o!#-T3|i3-j2|O{Oe-cH^sEeg4@-5 zSYlCT9(cf+f#n(RZXjzPT);VVHAe`*8n-O#YnO}Fl#Mrq3FTJ;aX_6)(gp{Ht&1*k zPAg$mOC9!8`^&!5$?WI!92z)O?ST zvV~wjY9~4C7?qgbYnK@H{6qqnXdaAc zzAX_2MURa_ypOm1oj67e^s~^gN{x*6i}1mocHxiEr(PmWCjv$Pgh*={9=K{*D-9BQ zNIM0*X-z1a>p6SxsMsxr@3NIRAMaD{Gt;sGoX>dIuv=?@4vSKu=QMWhsxUl2Pv~lM zhC0)Gg#pU@P;gOM?~`6Qf!0=mRGtKFP4$Md(nr(SeD|j_`<30s#)LLli*{ObXXvR> zgUwIu0y>q`R6uxooAMSwxwhsGNe6P>5qe`P(e9oyUKCb{>LH0Wh~)ubxiBtDb4M#p zMa2AutwgzDGj%bFXOO%2Lw|ysFveb_#!izm?1B|e(HDrLqobfY#PL{Mp8tum@j^R+ z`yRw4wtt1PX^87V$EIkZ*FdqoAoP4mdV-i@o3ioxKdyy+7H{ek#II5~ppG2DZQuok zlFik#DG)+T@wBAzErXwGjhKEWSWa$fmkA*h+_uKkEuDLKb?%? zBa!S!PZE$o*96)Un1HllJo;)P6ZlDErBn!HlQi(Ky?NFPWn#Qri;Gg*N_z#w1{ zFbMq82<%GQp;H{CB9Tx;DP<|d;VP^YE>>^;q4?i$=ok1lRcpUBITgXqwcApY{CrEr zd#~u{u#M(Cou-QS-#tC*sRATM5{cISwZG2q; zOFLA&_bmkMxu@TX>UZw5@f|A`d}#sO-W;&Pe9J#uj;Juda{%8VtE}Yi@b;7y3%-GY zltjaCSyBC(e0Dx>sg;Ojs|eno+gxGIb`|Ce^& z-`v3Bu!_ic1F(E59FJ$x>2!Z4lTK#(`my(C1`-3wfxb*?V4yFW&Lj|!q~AnTkV;$& zz4?ylT{s+qw!P3hzpMj+aOnAEsO1G~ay$%Kr{duurH+QN4w9!6Cs-I&s=&_y6~&wT z@jowyV>Vvk9|qf*hzj#9#Vn`$RJ`|X0PGcy#DZ@S;FWDf^;ZvsEelDuyTBo_kwDdW zkr-B~b+7N|pgmT+_uT`$4EI!t7l@2&Z5G~E5&1>{mUo8L5Kn@RCDQS}Of->9BNR&yhDIhu3bq}>qJb2i@6!=jIP`lT!|oqO zC*@W3HYT6Ifz9M3OE+va#ZMf1;MMjljnN~&~vEXYH*mG#i z0xSFq0`|o)3WmkY07Mqt8$%Kz5a0lz_vHrcanQ2i9$DH3ZM30{vEUmWIB{ZFYa;p2 zCP%F>zrKMDnL{cTd`AJt9)lXz32%#A5qV)f zTXBTFz?o8eEgSE-XB9RO{RVwrj9L+SQ9a8@+#41k$|PgqA-=J`)q7*Td_@GiysN_e z3Jn&+DzenV8}>Q-!2z5c6=n9GqOjV+5nQd!Qa?_V@hW^0XgdQ571iHMz_8tRK?`UjGI1IRl2x>tL5Lw?tRsQ*idnpt%J zUKQMz=)+Mj?ipb2NfpJREpuz&79EW3f|lbD2`z^axX}HxJQj@wFTUs1fhbS&P&jTy z^?Unyf$c_ri2*N0dC-a_V{i2|fkz5^c15f(UwzLmQ;a_39sa!9p`!ZT`y9118Re-V z_6xnvF1U&X_8Jdd1k19Su#i^$8^3mdm2t=oiqMlAXLW`Ve(`$t^0nWEKwj*N4f-j9>TR4oQe*k9?iN3x>Ce_~`#V(ZC9_`2O1Urx8gkhi` z<~#tU2|Yq#aDOO-`vsvLkz~l9N}?u``QJmCf1mlcxcmQCD4HLGfI+|@U=T0}7z7Lg z1_6VBLBJqj5HJWB1pZbb5DnwZAcC`2e1!D}PUd-q6OwQOAN>3O2p;**{7vSsGyiAi z|ImL5@VAOdL##o-AYc$M2p9wm0tNwtfI+|@U=T0}7zBQ65r{_Y(2x~69}UarSQO8r z;rcory5i@1Q9J>F&y&3WuQLBn2>+WOgMdN6AYc$M2p9wm0tNwtfI+|@U=T0}7zBPs z2&C*?3hzv4RqC}c^Z#h(f9gH||1UHDD=y5BLBJqj5HJWB1PlTO0fT@+z#w1{FbEg~ z3<5uS1U5ePp9teM%l2fe6_)xfpa1`hQ0D*3{9pL%|NkxX7e9IS41op#gMdN6AYc$M z2p9wm0tNwtfI+|@U=T0}gz!3lJ2V^#J>M0>v;V{Kkbf$MhyRBYq34HUc=~@h8tOJ; zc>I4j7U~`}&;J{hF$fq03<3rLgMdN6AYc$M2p9wm0tNwtz|Rwb_0RwF{ePi>pC_M& zD}#VRz#w1{FbEg~3<3rLgMdN6AYc$M2p9x30=)lk@_(ZZ1_6VBLBJqj5HJWB1PlTO z0fT@+z#w1{_<18>?*IS1x;+;i0?o zxZW(cHe75we`EISty$;R*>jg?9X9<6L)VsYa!F^4A6dClb}n7LHG6UPhI94WE$8ao z<;!DR_73PR{NMqf-new-?2Rv+PiDVxa=}_-&WmaCTd(uswHvdSE?(U*EbrWy{c!fi z?A7zLHwBfJldF{T&b6z~h1tuq0POtPo9EA7nEe4$t|MUmtUYnOV&YAZ0^&0ersoumx-oByO$eumQ-ziGatM7ddZV-L1_7$RACxmq{ zZ_Rx$m2+$MbI3Q4u`_q|()GF7&1{_=jE>A5c@T;o7uUB8wWzns<&0^av7m97rd-n2Xk$snmjhsBGypc8M(kd*p@vrmr=}Mtg zuPsy-g}qI+{TqPKZrv%Hd+4lJGUw~O^Vy|aA6=Wf<=nXT*`*7f)gK#q|9$1{+LC~B zz0|5a6{okJuAsW_^&|wnvXIQ0pV~l7>#1|@^0jk$Z=f$Wa_pG$E^bLgrTpY&?ogF} z{o?46&9&UcN_os_yUpb>r?FV8kF7^zeRKXLT)s)iMoye~B`mO>mr0^U#a>SnySy5W z`D(rNPy^z>k&2BRJ{<2>wgTj(m4h{JU%GK??(F4t+s`_QIdeBJUA^c)vrcTipVW%W z?wGUCsa6Y(Vta`$=IVvn&u1@;IZ(p3TWfD^Cqd(k*1zm)I~AJs$CdI-wo-mYzjpnh zTZ6jIjJBJV#b$ANbPIz?Zf!8`lVYj8$zaykrd}nS^__lz!6dde7(L4-gV|7>d_4)j zgu%qOHW z=Bo@w`uuG(m`<%ys+Zk&rW`S%(o(V6a=q{Ee;$dA?AoQC#{{g_%JO`@+6q3R z8zd4`{s_DH%R%8UvwhQ9Z48OF_B#&|&;5{ot#+~5E>tTux3^DiJ)c`EZ|JqzJNXA} zfp+z64Q*$>Txqsi`Xjs%T3z{TfOaz+L4>$97dprlFTWwI+`6~6ol?D1YxlrG-2UO& zxy!el$*qS}i>-E{(}3~1oAlj|`@;-xX? zrPypOBFk@@{r1_*bF(*d*|J-8+irHusaMMrjDY#6r(tPLZ*@bUZ~6B(%r|@Wg7@t{ z4bALU&{Y1T&{P6w0;YcjX#P`?sJV}Ye|~EISz5@x!3Zd*sj!Tt))t%wJ*>S37ehueR~jn+^`+K4zC-$h7wKaQ(L_HFF*Lydu)NASWxiMb`(Pr)-HNyKYKPUv(|-9Hf*2)VGf&tXhV1-?;oRyu{&nl{Yedo(l29bqKsmpI}o^o9$nNnZp zWXh{=L`U)?ilL?ZguDySk~B+c=cegibl_xim0R;b`AZ`*U-6~+xR!K%;}!F$%iqt^s#8xF)@Mq#@ZyWPz=KH!|yT#UhIbt=$#&R59cH>`JJ~n1Sa~>wAc~D>1p=fBzrJ{L@h8&oci!^KUc%H1n^1^4Yy1&>&zCFbEg~3<3rLgMdN6 zAYc$M2p9wm0>4cNr0iX)fu~g`8Y_{0`)yTPa!U{0CN9Gnd!Je=*2-14*}}D*w)ZG` zhzJ)}+I}msRBSCtSDpX=aVYbrnSYV_H<>@q{K;<Vp@>q-Qp#3VSSeim!#k^g z2R|wt`X|$&P~tDclgVURC6zVd-M1{qQ3J{<=MTSgYs|9BE*p-pp|Y%^%5s$Lpv!J$ zxpvMzWS_8a+J|jhjk4dO_qnZ>R9so!yS8ew%6s^oty)smZ&{<#ZOQxGvc^xd`L6ew zWlbIB%&41jSa2Vc zd~XKCE=c7?Th%!C4a;gYSvY1{O<~Eag04ENKDMlu9+;W4Ra*@!D|Kr34sRl}-?G}$ z;_t5qouAt3vCNE7od$=0%Q5&IQub)lQE6p)N$-xWDy)19EGJpFcbRpgYDihuo~v(r zC->W`L~0)pvKIT=YfPOD7JCTFWSX$Dv-zZ~8@E-Sh3-LHjmc^SZwj=uDK+nl!(RMN zW#_UTx%`}$;8;-e4q&%ASA@0Fd1PWR2mkMbW4> zPa)45RLMfmgtALT zZ7$FUR}|x2Fz<2&f8aRE4;@){)>Z{6Y_nBE3c#*j7WQwE{Xq4rplO8*<++fu%gzjY zd#5dHqAaBqER>URxAnM^R6n!T0$G@(@wY_jwAui%Yqrv}98$LH2z&k)0Zd8D%4y+$ z#_6!ksLU6IxE~4|L&xDAgkSF)NM88yW^5&RdQU8CLBux2g==E{-VJC&Ptjmhk2;*v zhXyq#loh2dktY?{VnZ|?bjlcfz6VQ|*}FTa_#B(JRY!zg`%c*lj!fV~I%3pEIT3qZ zf}?s*u(?ZSl)dPPwXbkACLRQ{ElzXRyDO?oGWPZ?AxGhBaotk(5(L|Cq5b3#^vo>@ z7K6k>ysvi(5Hyt)frg8Z@h&U7qE%q0U_yeR|5efC?UuDHa*x6|YKx>Xb9CC<|9*hM zO|UE-N0j{#%Fn?cT2?`n%OCD7;y+V%RZGY(Sk|=m1i;G`n@Yfy3d1`vofg!Pt#X3) zzQ7fEuCkXKRl*SqJlCkQYk~FDdOJcB@~?|&-xSj>3vO5MVTnbVdEfzO29{^MyMe5I zZ~^B`P#qxvYuvJ|uU#%yQ+D0a>|6=N0d*=#8ypz6F1o}yt%OxAb=XhsYYtw9u~Y>V zbZrWYaQB@W*S~x(q3nh$JCGLw;4)<#BgEtF19npLJx0nFg8itSn%kpL!|2P2wqOGLr$*eJyNc+203W5hr| z3k|E($Y{R^AM9xt{s?{QCDL>vQ1nlTw3gw4tCqFWAfbn}Q)oGwxVfIQ_l}C)V)!mw ziSzM32I#OT6?#r%*RBe~1N4NhHfN|ay;m5ZyblEzmGwU9g%fCP z6-ebt(AHFMC@Xz5jm>v|Iva+(SVPj6G+0w~wk z+#%^et~)|+OeNagQ^t$J3Q;{Iu?Dd`04x{AMQQG6rKyOR->{V^H*BUZM)3@C7k}ta za1+MZi`3X@Qiffy!YTR!addPPREIboi_7yrQFaG7aqW8$m)QOl%BCT%2OXQDg2 z_JYv!CFu!bifzjN8oG(Ku+Kh*_u<}imBImaoE2_krO~Ez4^z6~bbl(bJ()-*QmI5L z*_TKrQ^|ND6;EYS@kAot7f<&mkZe3&EEkJWIE9%QgT4w?}jgmy&sh5m2^ zG4SAiiG-C3ArMx5jEb$l3E_Y9V-WbQLEw)ceb{_Aikl=`0~>$BjyE_+B}S{D4t z31)v5eDL=}Z_ezCFQjI3+(y~yiuWy?W0SqoaM>;k??@_W-G=)K*@HHJw#)DL70P+=N^$kO#jrwO6>}ZhrPE60J#QD9nIRPWj#v@M}3y{t(5$jLZX@E zl9>FVNaBLzhcb4S8=EZd?3l5vdwC%VF%I~_+)Oo=TEhAVEpS$pN>J8mN$CwFk9oZg zf@uvVqDJsT!Vr0b15Se=ti2@De2#4&)PfwHNlM2LIhL}tJcN?YVHtyDe4InRrcA~( zTVE;rup_eeE-8B~DFX5if^w%Q(;ov=!lO$i#b`e(;9))F9^RKJEXc}S(AAd+Ds{rvGfsA%44G}U9zkb@5rj> zTFO2)DzGEYh*ndzzGvXhbSXmHEzVq=2?l<|yw( zPvO2wlRH9;Y*H)p+e}Y|h;z0QvS19)1V|`gc1e1lwyZl@S!-fV!Vtr0%+JXd0>fTW z_8mQbA(v4Ei`(|6eU2|rcF)w>_U@Ps-mzFZ!fZ6SG+;@jF=NaItOK_ z3ts-NR$zlXmC$x|QWHEagglV+mPHR)XjUpcC~cr@2Lg@u4~mgMg2;G^ZIU!Zk^o^& zsNaKiN}Q&fL=w!Zt8|j$GA6yZVK>t1FToyWWVtT`&(Xccgjmc0Ia^M6@tF_GY#9dp z)SNtG)Q+gYX{=fhrJVB+@0IET#cer1WAQ!VPdniwbP)n-7ofW0EV`dFHRM#+=%TAK z)f8#0K_wyqA3lg_Xd2D}xq{G)$X-tY7uPOuvG=hU837>MdorRGXXv}0XhcraU!%j~ zFFMF2jw5sl1Y$mFQ>Jp;3=K)h53oPw0} ziWTUuR!4|M^w+~#+%$=}kF-iWM;Fd~*~O8bH8K?U28YH27OY($X_1S6!Nmiy&chy| zj3oYY&vyK25c=S;=vIQ;1d?#Kz(sCRg)N)~t#F>x32zxatBZ9Z`5l-|UGE(+54%)b zoZwLp&R`ifjo|~>#e{qxic^#=5$L!g1^Bx$_Q@d&<#gT*{EJx51Qnto+mj6*w9va-G{zMN-UY#nVl-cR-vxMfhlwW# zAgirwKyiNhSV%e#vfDx}yn#?#LK5fD0N?)En;=N(Bna}9roG$F+}>-L zUQE?L2?T3;vmBzC)x~IV5T#eYBSynDzwB+kJ_c(#qM&F+Lk(ST9X%nkJPYJz?g*Vw zTv?rGtx!KqP zy@_>CulZSMwRUc0Xpm+eht)dx8R2;XU?KtFvG9;6pAy$rls)wrj(*tcL77W6(Uh~$ z6juyrSUcw>aZI0S%Uc#!-v@Yz&H(EUZK%Kx^W&$f_hO{uV%pzOvRu817@$f1{ejkP z9F2AK4tNhCLSVzUX`u9agF(bbvfw}4B_Zn!l<9FUFXdS(v(S%yVnPP(ps7D8X_jQqWYWmN<%&I+Wv5d(13-^N4F4oU zPh%KzZ>H%Sg#&{WdT}^Y_;i99I`VOpF?}$xG@=(Sb^6Xkp63EM4nT^}X^`b}MH~=h zNENQ5X?8*fwxcr+9MBKzqEjCa0a(HqbY=s)_NQC~v!c(C(MZVfj5Ozu!PYg?mUcQQn8g%d)t7a%UD+X;T?4L#b@&w)ddr;fTJ; zvY)%;Bk=i_9iC&S8r2gVqI1r=tX;j%VJu)rdmsh4@?{o$#GW;&`5OBx3v})ssewP5 zIx3wOEbHhbY3XMDuw@;elv)IKGmed;u(|KOVGlcFQD*WGN|x$GsBabip~=1;GPiP3 zD8G{3g*EG}eU7=@C(@2%+l4B}9L``PTNJXph`uTd(u{m@nV3!m)OMf3+1fiW@fmOL zlo#0p^L@}FEgyLg00(z>fl>LOvL@7cH8|6{kV7V#t_`p9fj9D*^lwW;7qMig&6VC@ zV#c*sCz+TDxd>};2TKt2BvK9a1T^$ zy8r^?5FiMb9u7Y{4Ot0^Ul#Kd8E9EvNV?*waR-7T&xQg9*B%qzXz70JU%k> zT2Tmn47VpGe|sQG4?9Rp)Q0h0M>J^_d(yXgq5V^2o!KL#xkdZZj6XxrC9T6e+GWW@ z4k*le`>=-;2oq^*ll1N*4_o`QEGO~bLoS(5CfNZgkO1YOP!=AC$gd;R`v@+^9I=uc zbCmkDh|1gltyh$6l(VgVltuq63FvOK)lpgPpbr5Z7xHe>Xs{B}TMc5!M~o*nei~UR z>C(=ITD3jq-w3J{#4xWW#T zS`mEYL6b?&b8x{8Xnnuvz$Qy5x!qLfGAjkP52tSH+Yfzsj(hK}`5qX7u+$wAe}h@NPb}-SwjO{~Y|{rtgj^2+kGmiM4f_y!RTf3oT6JwJ z0?LGn=P0#iq*shu;5y=3ykm%CNgGa1#8y^uLY5U>eI5(k#OgDo;NlUQgmI$81y*t5 zrhP{#7|GKQ&_#|B2baXZgZu^U%D2;9fFd?SA(zFqt`36xoDlUkBWgk9Ikwpe>31IY z=0%+ffjSkX>$mi-Eu#Dl>3P&)@hmsL<{2Rk`-67AaI`I(=T)r<@`!L-%L}?AKHvfe zwKb{*Qi6%R6(sZpS?Iox{6Uk9EpbWeY z$h)^^mv;@q>j>+sJCWccQeX_Pt>ZaLb?v$%RDjA6F_I^Quq?{v&j+zkH^y_5G4k6T zWKnjD?qge>CO2|wBF2Dy9O9)&v01Lrg02LC!J-asp!!vDJ${Hn6DxpWFH1ak6r?Q* zs54$)Ad&7Y@T-dnZC~YXr!90g@PZz;)3(cS@o9Fyw>O(U-ghwe)}Ed4Kr;6poEFrV zNYV2U2G@zqCGX7}0v>MP6I($Ini)$G!S36!8BUPJqW`Z6(}#j zuqvYVUy0g-5ZMg8@96$q(biQ&X2JtRrhTl%D5av&!VC@Z0;|A9^a*=L~KG6;>( zegscCENas~?LjROVnRJG2kKE1S3CewiyeA(C`DVa=_19{>W1yPUT%qgM9G}y{CN~7 z@3Q$lNAv_Y0vhoQibmyuC_8FkZ#e-I&Pj_7L81h?tB9p4B;^w!CD^$#Fg$Gokzg+W zG&d6<<-;9_kZ}y%((VoHw8ejU4bY897Esg~PBfqdSRKSpG+9}taY2nZ4e&rniqdKV zXh@Xyn9bTJuDZZNf}P`mlk$*cG=x3)^a;-PkV`!3$o_ZEL(<)J1PC9HWOeikcW_2z zxmv)&LhUL@EDJn7k5jbQ-)XByobrq}LhdN@L*mWgD|j&>7ueIlO|IunaSA4`wnnny zr^wYQfHGlQ*zAA^I^zxr2t-TwZJeY)@lp3(Poh{KI*uCbybap|4o6N&-*Y@^5lP2I z(lS*lSa+4dsb=&d&a$KpH?4A<=nmZ!K0yYY28{hm@3jx~BVW!iE$)l=;S(Kn(msGOOoQ>mU(AtlD;;gP@85@1JrS4N^f6@a$mN+F zV$}qo1>^; z8qd4-_>g~?D}$qewC2u80o}y`@CJU z4`bVbf!q=UVnE&CQ&$%ar9~kQS}-~0<~B^Lo^^$AX@o$zx2n+`1nZ~`T#th z79Kz1viAYBh0 z0F#w4BLj#QbNUFUJ5sQ4STlZ-#mX>FV8*^>k%8q2t$H|kIguB1JVb#f^3seWn3EGC zNW{GbE~O7$NZd8=AW*DGzpoH;vF@w~wbr`lfN$RhdW+|W+3tARjEZsY#tlY%VvB{Y0K z*JO7ev)0A7KbgZ}NukA|hzpCdQWEnoD;swPl2zR7zOVbX`4l?*qjLS8EbJj zUx0~rM;zT7)TbwH!sC4$8>c716YZo0>HmVze1>7Vl+J<98ryat)7HIU?zx6EBI$EW zVarCEUDc}i9)e)@MufgB?iKE`X$(Q_PyiRj_p+?q(}9wfA!bJ+`?q9}?#^Nf;&3ypaD;3J<% z$1k8ajXL|`z@g^Mu!J~!UKra=AW#8Y*9Hnl0KZ3dK{`LJOUOO$iBICq^^y>VgMiiq z2jY;Zt$US_-&M6Hz%2_nSD3%C*EY%)pKx?_QJc%yM4DRe=eac6vdFh`0*H^L!9R$Y zgD@crryl+_*=lD^L>kOdEc!t5H&x1mjqFw++RN+#eZU4a(25+($WeK;5rjJy%a zV@1F|QvYC&x*#+I!!OTspOoFen-VUuR1R6tgg8KMQOz)ttMk~dAh3=A>SMQ+-QYcC zw}CYcw~XT{;UN!w(kgPf=jDB5a}(lid{{z&_jNc@-R~msUJ^!fU_=!4bNoTc6H)0) zRB2eBU=V}pi;)gO>6&@Ma-m249K}I2;B!Pz_=vsUjy?mx9UT_z^kAW#d9AN7zN+{; z?1;-b&ITo|iS6J7p;;6AdXpob6sp4I9jIBL3D82V2cO^wkPG`jK(-toV$h`EwEF4f zI2)I+4c&Vt^d%v8@uM#N=(D35{50<6?w1|T%Y<=DG%<+hqmk33nanUDLN3LO5E%~= zFeEuhchEu7E2cb>V&NCcQi}(%(Sf-d?(CvHXppgTdzI;5h6CexDw{JTqcNm_sK8BN z{7T?KZ)A1Ip_t=g7A(j|#?gop%N&dq`M9^VvL6=Zl1x7FBsSAF`9bo^j1ActehVMq z2Q;lN%Dq-A9Qwr#q-Nvc$rg@$fLC`HPdtb!9QtD^z#8?br-uVhNuY7SCg&vHmqBL_ z;YmkMvbu-`9_wr$hl*_+8c|6fVOBZc!se2_C%JouN4)x=*n0{+&FgcVZ+pjVI5oiG zJ_zAs6w^Xd7Yd8RE>iiBhfi#TnHKe-B2I+$!3F%BoPc;8tUAwxAL{Gs)1{-lg)1kw zkH}y_=75`8<|*N`ji-cA{<@GF<}pJ>4x#q)#2hn1<>ZjTJH<0IusF=~$ELp9=1}Y9 zWMoW^5<4Xg4Rjl~0MPk74^reFMt2nMQ{U%KBJjxGq>oUv6oGURnmJD7AnL5Cv-#=KGkrUD z?>~^)xf>3#RAmo@$_vd1z?s0NQ(0yeo)1B46!ETNm~BtmXXN>T{qp;S{N}HnYo&Lk z6vv|nyQK7%{C4EGet?_B0lDU{ZBo)tC`eI_MxZ2Kr0qR{CnPal90#6}!u#X|5?jq& z!O5hscihMAlLfY}HlXk<_^{s=8~we01sj(%;;EUAG(N>Cz0{mXq^j47Vl8eitnMO0 zof>0_e@#0ng@;V(B8-S`Z$HNf_e@E;B8n;$qz4>*sm>l(hP%g30nc(#dcX!m$fkD#31L-Q8K^H8WvV+K?o+ukgYCB52p-2zCcW^}?cTeyShp>rUOfi8ZgD#Q8JwI@ z<-v2UX!>w>6_1FsnNP`!O8Gmy78k~1U}`i;zIS`W71oYM@Ti2ftCqXE$Gi6i=|nH= z?|NEbFSL1a9|;QJKqGd|N=@d+@*ol`b+P9P5+3d};yDGW!&X1$5S4Bmn_qceDzTQv zr;8k6S>)P9)RWU={Hr2WDoh^H1|)>ZpEC^8LOJKZC5fd>g@b>Ye)SN#PA3J{n)gKR z-Lr#F^C?SLIN&8Iz_LW7-o@=6c3h-+#NtiCqPO1aSG+^d?yHDiAtUtg_YZjwz@%v8 zbEFb05k4i^5|n)Sx5@l8M|jJ)`wp&IM9?vrmr@nEUvh!ijWc^=;eEz%Z1j)G9rXzB zhN*`4e=ovc#SWL%?1iu!aIS795JMM1@EMI>7x#$@ zIT_)dh5F}&qBA`KBdXSR;9cJCSq!{Ij$IHR2>E?vU-Q~H903M)`oRAR3{wXp7jko^vQT$3z+ zw;MJMBtZnuB=G0bB6H-8pc>6PyH&WVtO##^PXYZ80w3r@7Z&uU@Gjj^Rm8n|k^YnV za-razn}%P2pTT9MCx}{^m+9v2r{2eqOEbxps{U8B7b+(1qv#cp8czs4g9unj;`jK| zZmIarR$sFMhTGDZ;{>Fw(bz}UI1gfmCvEb3W7tQYw%};jilk8wxyfjXJ-a1w1sub0 zszImf@AIC5bgg8okbP5({Rr+#E;^zQgE*l}l87-9j?d{Hxkp97m~Iuxs=)k^-06J- zR8@pfFV7rTlL{E>EyB=EA)2pfMe#!roy?4ksw7BwurBd{jFCi}RzY!9_L8`l#y)_s zO<1}YXzdE%7lz()_IhP#x>nqe@f@ELZC-_wT_pK1PHoqx&XHCSNZ_f~ASixSyA%v`w9s(ROB?Sq(gwLCKrsR?*$jMaz*;Q# z8PRfqRfl??jKIZ$Hfi+V6C(q)kWE?CW%VOKA%Oa)p^oCwy7P#@g+_+pBF<3}XRtGZ zt;#%kMpE{a{e0L)Rqx_ogJqen3#}76crFKDfbO7nMKoshG4g&`AL=fXNPI%*#R?TW zs`WhBau+H2Ct`JXdH2!POnxh>1q{4Pl!R6dh-X=V3U6XO{>_NG|sCqik!f(3dDt#=TH|NjXXyjHl_hz4(!dxViVGh_cy0y&+N!cAyVkoO8PD_3dtK%d(5$l^06PDn{!0!*5R zTha=QKNb70mo#QWAVpC2kj_`df{`f5`hGSrA0JYkhz#Hl!~J)$u|h7=K85g0`{{Ym zUW@Dn4Oi)>z5JS=CJlVPjM!bn!ceDb1h>wuS+6uS;>}^Q#v&UJ>3B-O7^QSuQtYPT z0UFv7^K2w55OuMAEFR^s!!oR@*`C4d^~eqj=M2H@fx~&56E?IP#kpB)g+mX@(2gkE zbDaJ)5r;t}Jw@n#0u_2JX7eW26$9$VWGf(3zztChN*?@@-V^-qAz+z~2Wff>n}mjG z0qi3UUyC9@gH{AV`Fj>lvWmBfw7@0Shblq6Sci#S@fBS-aoN4S^RkD~m|$8dy=Ra| zu(Qptu7$Y%G-RYHL0wB!4USDq98|V76bK;o`~Y@E)H@4I4k6!?eUeJNAF_7E_OFW~ENKXUN<|3su%BtAr4tvRZh;Mn zMHVC$(F5Q%nj%1NUl+%x9XKNz#5dqn?|IL0Q1dl*6@ZC@)5kC{iV0iQlfZ`W)Ah6^ zCA&!##&DN=;))~I%-8QgwxszT?CC`v0>L~GdjaU2=uJgbZWVFIQ&;Hn!{NU8PNcPU z9;YDT<>3kMefQ&U>XZBfef;17cbuf)Z{m5x6Yn43pYo%NQ4VHBra(8a6PCwe6QJT;820}Ire&00 diff --git a/codex-lens/pyproject.toml b/codex-lens/pyproject.toml deleted file mode 100644 index 71dd763a..00000000 --- a/codex-lens/pyproject.toml +++ /dev/null @@ -1,127 +0,0 @@ -[build-system] -requires = ["setuptools>=61.0"] -build-backend = "setuptools.build_meta" - -[project] -name = "codex-lens" -version = "0.1.0" -description = "CodexLens multi-modal code analysis platform" -readme = "README.md" -requires-python = ">=3.10" -license = "MIT" -authors = [ - { name = "CodexLens contributors" } -] -dependencies = [ - "typer~=0.9.0", - "click>=8.0.0,<9", - "rich~=13.0.0", - "pydantic~=2.0.0", - "tree-sitter~=0.20.0", - "tree-sitter-python~=0.25.0", - "tree-sitter-javascript~=0.25.0", - "tree-sitter-typescript~=0.23.0", - "pathspec~=0.11.0", - "watchdog~=3.0.0", - # ast-grep for pattern-based AST matching (PyO3 bindings) - # ast-grep-py 0.40+ supports Python 3.13 - "ast-grep-py~=0.40.0", -] - -[project.optional-dependencies] -# Semantic search using fastembed (ONNX-based, lightweight ~200MB) -semantic = [ - "numpy~=1.26.0", - "fastembed~=0.2.1", - "hnswlib~=0.8.0", -] - -# GPU acceleration for semantic search (NVIDIA CUDA) -# Install with: pip install codexlens[semantic-gpu] -semantic-gpu = [ - "numpy~=1.26.0", - "fastembed~=0.2.1", - "hnswlib~=0.8.0", - "onnxruntime-gpu~=1.15.0", # CUDA support -] - -# GPU acceleration for Windows (DirectML - supports NVIDIA/AMD/Intel) -# Install with: pip install codexlens[semantic-directml] -semantic-directml = [ - "numpy~=1.26.0", - "fastembed~=0.2.1", - "hnswlib~=0.8.0", - "onnxruntime-directml~=1.15.0", # DirectML support -] - -# Cross-encoder reranking (second-stage, optional) -# Install with: pip install codexlens[reranker] (default: ONNX backend) -reranker-onnx = [ - "optimum[onnxruntime]~=2.1.0", - "onnxruntime~=1.23.0", - "transformers~=4.53.0", -] - -# Remote reranking via HTTP API -reranker-api = [ - "httpx~=0.25.0", -] - -# LLM-based reranking via ccw-litellm -reranker-litellm = [ - "ccw-litellm~=0.1.0", -] - -# Legacy sentence-transformers CrossEncoder reranker -reranker-legacy = [ - "sentence-transformers~=2.2.0", -] - -# Backward-compatible alias for default reranker backend -reranker = [ - "optimum[onnxruntime]~=2.1.0", - "onnxruntime~=1.23.0", - "transformers~=4.53.0", -] - -# Encoding detection for non-UTF8 files -encoding = [ - "chardet~=5.0.0", -] - -# Clustering for staged hybrid search (HDBSCAN + sklearn) -clustering = [ - "hdbscan~=0.8.1", - "scikit-learn~=1.3.0", -] - -# Full features including tiktoken for accurate token counting -full = [ - "tiktoken~=0.5.0", -] - -# Language Server Protocol support -lsp = [ - "pygls~=1.3.0", -] - -[project.scripts] -codexlens-lsp = "codexlens.lsp.server:main" - -[project.urls] -Homepage = "https://github.com/openai/codex-lens" - -[tool.setuptools] -package-dir = { "" = "src" } - -[tool.setuptools.package-data] -"codexlens.lsp" = ["lsp-servers.json"] - -[tool.pytest.ini_options] -markers = [ - "integration: marks tests that exercise broader end-to-end or dependency-heavy flows", -] -filterwarnings = [ - "ignore:'BaseCommand' is deprecated and will be removed in Click 9.0.*:DeprecationWarning", - "ignore:The '__version__' attribute is deprecated and will be removed in Click 9.1.*:DeprecationWarning", -] diff --git a/codex-lens/requirements.in b/codex-lens/requirements.in deleted file mode 100644 index 23638436..00000000 --- a/codex-lens/requirements.in +++ /dev/null @@ -1,22 +0,0 @@ -# Core dependencies for codex-lens -# This file tracks direct dependencies only -# Run: pip-compile requirements.in --output-file=requirements.txt - -typer~=0.9.0 -rich~=13.0.0 -pydantic~=2.0.0 -tree-sitter~=0.20.0 -tree-sitter-python~=0.25.0 -tree-sitter-javascript~=0.25.0 -tree-sitter-typescript~=0.23.0 -pathspec~=0.11.0 -watchdog~=3.0.0 -ast-grep-py~=0.40.0 - -# Semantic search dependencies -numpy~=1.24.0 -fastembed~=0.2.0 -hnswlib~=0.8.0 - -# LSP support -pygls~=1.3.0 diff --git a/codex-lens/scripts/bootstrap_reranker_local.py b/codex-lens/scripts/bootstrap_reranker_local.py deleted file mode 100644 index 7cc1d15e..00000000 --- a/codex-lens/scripts/bootstrap_reranker_local.py +++ /dev/null @@ -1,340 +0,0 @@ -#!/usr/bin/env python3 -"""Bootstrap a local-only ONNX reranker environment for CodexLens. - -This script defaults to dry-run output so it can be used as a reproducible -bootstrap manifest. When `--apply` is passed, it installs pinned reranker -packages into the selected virtual environment and can optionally pre-download -the ONNX reranker model into a repo-local Hugging Face cache. - -Examples: - python scripts/bootstrap_reranker_local.py --dry-run - python scripts/bootstrap_reranker_local.py --apply --download-model - python scripts/bootstrap_reranker_local.py --venv .venv --model Xenova/ms-marco-MiniLM-L-12-v2 -""" - -from __future__ import annotations - -import argparse -import os -import shlex -import subprocess -import sys -from dataclasses import dataclass -from pathlib import Path -from typing import Iterable - - -PROJECT_ROOT = Path(__file__).resolve().parents[1] -MANIFEST_PATH = Path(__file__).with_name("requirements-reranker-local.txt") -DEFAULT_MODEL = "Xenova/ms-marco-MiniLM-L-6-v2" -DEFAULT_HF_HOME = PROJECT_ROOT / ".cache" / "huggingface" - -STEP_NOTES = { - "runtime": "Install the local ONNX runtime first so optimum/transformers do not backtrack over runtime wheels.", - "hf-stack": "Pin the Hugging Face stack used by the ONNX reranker backend.", -} - - -@dataclass(frozen=True) -class RequirementStep: - name: str - packages: tuple[str, ...] - - -def _normalize_venv_path(raw_path: str | Path) -> Path: - return (Path(raw_path) if raw_path else PROJECT_ROOT / ".venv").expanduser().resolve() - - -def _venv_python(venv_path: Path) -> Path: - if os.name == "nt": - return venv_path / "Scripts" / "python.exe" - return venv_path / "bin" / "python" - - -def _venv_huggingface_cli(venv_path: Path) -> Path: - if os.name == "nt": - preferred = venv_path / "Scripts" / "hf.exe" - return preferred if preferred.exists() else venv_path / "Scripts" / "huggingface-cli.exe" - preferred = venv_path / "bin" / "hf" - return preferred if preferred.exists() else venv_path / "bin" / "huggingface-cli" - - -def _default_shell() -> str: - return "powershell" if os.name == "nt" else "bash" - - -def _shell_quote(value: str, shell: str) -> str: - if shell == "bash": - return shlex.quote(value) - return "'" + value.replace("'", "''") + "'" - - -def _format_command(parts: Iterable[str], shell: str) -> str: - return " ".join(_shell_quote(str(part), shell) for part in parts) - - -def _format_set_env(name: str, value: str, shell: str) -> str: - quoted_value = _shell_quote(value, shell) - if shell == "bash": - return f"export {name}={quoted_value}" - return f"$env:{name} = {quoted_value}" - - -def _model_local_dir(hf_home: Path, model_name: str) -> Path: - slug = model_name.replace("/", "--") - return hf_home / "models" / slug - - -def _parse_manifest(manifest_path: Path) -> list[RequirementStep]: - current_name: str | None = None - current_packages: list[str] = [] - steps: list[RequirementStep] = [] - - for raw_line in manifest_path.read_text(encoding="utf-8").splitlines(): - line = raw_line.strip() - if not line: - continue - - if line.startswith("# [") and line.endswith("]"): - if current_name and current_packages: - steps.append(RequirementStep(current_name, tuple(current_packages))) - current_name = line[3:-1] - current_packages = [] - continue - - if line.startswith("#"): - continue - - if current_name is None: - raise ValueError(f"Package entry found before a section header in {manifest_path}") - current_packages.append(line) - - if current_name and current_packages: - steps.append(RequirementStep(current_name, tuple(current_packages))) - - if not steps: - raise ValueError(f"No requirement steps found in {manifest_path}") - return steps - - -def _pip_install_command(python_path: Path, packages: Iterable[str]) -> list[str]: - return [ - str(python_path), - "-m", - "pip", - "install", - "--upgrade", - "--disable-pip-version-check", - "--upgrade-strategy", - "only-if-needed", - "--only-binary=:all:", - *packages, - ] - - -def _probe_command(python_path: Path) -> list[str]: - return [ - str(python_path), - "-c", - ( - "from codexlens.semantic.reranker.factory import check_reranker_available; " - "print(check_reranker_available('onnx'))" - ), - ] - - -def _download_command(huggingface_cli: Path, model_name: str, model_dir: Path) -> list[str]: - return [ - str(huggingface_cli), - "download", - model_name, - "--local-dir", - str(model_dir), - ] - - -def _print_plan( - shell: str, - venv_path: Path, - python_path: Path, - huggingface_cli: Path, - manifest_path: Path, - steps: list[RequirementStep], - model_name: str, - hf_home: Path, -) -> None: - model_dir = _model_local_dir(hf_home, model_name) - - print("CodexLens local reranker bootstrap") - print(f"manifest: {manifest_path}") - print(f"target_venv: {venv_path}") - print(f"target_python: {python_path}") - print(f"backend: onnx") - print(f"model: {model_name}") - print(f"hf_home: {hf_home}") - print("mode: dry-run") - print("notes:") - print("- Uses only the selected venv Python; no global pip commands are emitted.") - print("- Targets the local ONNX reranker backend only; no API or LiteLLM providers are involved.") - print("") - print("pinned_steps:") - for step in steps: - print(f"- {step.name}: {', '.join(step.packages)}") - note = STEP_NOTES.get(step.name) - if note: - print(f" note: {note}") - print("") - print("commands:") - print( - "1. " - + _format_command( - [ - str(python_path), - "-m", - "pip", - "install", - "--upgrade", - "pip", - "setuptools", - "wheel", - ], - shell, - ) - ) - command_index = 2 - for step in steps: - print(f"{command_index}. " + _format_command(_pip_install_command(python_path, step.packages), shell)) - command_index += 1 - print(f"{command_index}. " + _format_set_env("HF_HOME", str(hf_home), shell)) - command_index += 1 - print(f"{command_index}. " + _format_command(_download_command(huggingface_cli, model_name, model_dir), shell)) - command_index += 1 - print(f"{command_index}. " + _format_command(_probe_command(python_path), shell)) - print("") - print("optional_runtime_env:") - print(_format_set_env("RERANKER_BACKEND", "onnx", shell)) - print(_format_set_env("RERANKER_MODEL", str(model_dir), shell)) - print(_format_set_env("HF_HOME", str(hf_home), shell)) - - -def _run_command(command: list[str], *, env: dict[str, str] | None = None) -> None: - command_env = os.environ.copy() - if env: - command_env.update(env) - command_env.setdefault("PYTHONUTF8", "1") - command_env.setdefault("PYTHONIOENCODING", "utf-8") - subprocess.run(command, check=True, env=command_env) - - -def main() -> int: - parser = argparse.ArgumentParser( - description="Bootstrap pinned local-only ONNX reranker dependencies for a CodexLens virtual environment.", - formatter_class=argparse.RawDescriptionHelpFormatter, - epilog=__doc__, - ) - parser.add_argument( - "--venv", - type=Path, - default=PROJECT_ROOT / ".venv", - help="Path to the CodexLens virtual environment (default: ./.venv under codex-lens).", - ) - parser.add_argument( - "--model", - default=DEFAULT_MODEL, - help=f"Model repo to pre-download for local reranking (default: {DEFAULT_MODEL}).", - ) - parser.add_argument( - "--hf-home", - type=Path, - default=DEFAULT_HF_HOME, - help="Repo-local Hugging Face cache directory used for optional model downloads.", - ) - parser.add_argument( - "--shell", - choices=("powershell", "bash"), - default=_default_shell(), - help="Shell syntax to use when rendering dry-run commands.", - ) - parser.add_argument( - "--apply", - action="store_true", - help="Execute the pinned install steps against the selected virtual environment.", - ) - parser.add_argument( - "--download-model", - action="store_true", - help="When used with --apply, pre-download the model into the configured HF_HOME directory.", - ) - parser.add_argument( - "--probe", - action="store_true", - help="When used with --apply, run a small reranker availability probe at the end.", - ) - parser.add_argument( - "--dry-run", - action="store_true", - help="Print the deterministic bootstrap plan. This is also the default when --apply is omitted.", - ) - - args = parser.parse_args() - - steps = _parse_manifest(MANIFEST_PATH) - venv_path = _normalize_venv_path(args.venv) - python_path = _venv_python(venv_path) - huggingface_cli = _venv_huggingface_cli(venv_path) - hf_home = args.hf_home.expanduser().resolve() - - if not args.apply: - _print_plan( - shell=args.shell, - venv_path=venv_path, - python_path=python_path, - huggingface_cli=huggingface_cli, - manifest_path=MANIFEST_PATH, - steps=steps, - model_name=args.model, - hf_home=hf_home, - ) - return 0 - - if not python_path.exists(): - print(f"Target venv Python not found: {python_path}", file=sys.stderr) - return 1 - - _run_command( - [ - str(python_path), - "-m", - "pip", - "install", - "--upgrade", - "pip", - "setuptools", - "wheel", - ] - ) - for step in steps: - _run_command(_pip_install_command(python_path, step.packages)) - - if args.download_model: - if not huggingface_cli.exists(): - print(f"Expected venv-local Hugging Face CLI not found: {huggingface_cli}", file=sys.stderr) - return 1 - download_env = os.environ.copy() - download_env["HF_HOME"] = str(hf_home) - hf_home.mkdir(parents=True, exist_ok=True) - _run_command(_download_command(huggingface_cli, args.model, _model_local_dir(hf_home, args.model)), env=download_env) - - if args.probe: - local_model_dir = _model_local_dir(hf_home, args.model) - probe_env = os.environ.copy() - probe_env["HF_HOME"] = str(hf_home) - probe_env.setdefault("RERANKER_BACKEND", "onnx") - probe_env.setdefault("RERANKER_MODEL", str(local_model_dir if local_model_dir.exists() else args.model)) - _run_command(_probe_command(python_path), env=probe_env) - - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/codex-lens/scripts/generate_embeddings.py b/codex-lens/scripts/generate_embeddings.py deleted file mode 100644 index c2b6a0a3..00000000 --- a/codex-lens/scripts/generate_embeddings.py +++ /dev/null @@ -1,278 +0,0 @@ -#!/usr/bin/env python3 -"""Generate vector embeddings for existing CodexLens indexes. - -This script is a CLI wrapper around the memory-efficient streaming implementation -in codexlens.cli.embedding_manager. It uses batch processing to keep memory usage -under 2GB regardless of project size. - -Requirements: - pip install codexlens[semantic] - # or - pip install fastembed numpy hnswlib - -Usage: - # Generate embeddings for a single index - python generate_embeddings.py /path/to/_index.db - - # Use specific embedding model - python generate_embeddings.py /path/to/_index.db --model code - - # Generate centralized embeddings for all indexes in a directory - python generate_embeddings.py --centralized ~/.codexlens/indexes - - # Force regeneration - python generate_embeddings.py /path/to/_index.db --force -""" - -import argparse -import logging -import sys -import warnings -from pathlib import Path - -# Configure logging -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(levelname)s - %(message)s', - datefmt='%H:%M:%S' -) -logger = logging.getLogger(__name__) - -# Import the memory-efficient implementation -try: - from codexlens.cli.embedding_manager import ( - generate_embeddings, - generate_dense_embeddings_centralized, - ) - from codexlens.semantic import SEMANTIC_AVAILABLE -except ImportError as exc: - logger.error(f"Failed to import codexlens: {exc}") - logger.error("Make sure codexlens is installed: pip install codexlens") - SEMANTIC_AVAILABLE = False - - -def check_dependencies(): - """Check if semantic search dependencies are available.""" - if not SEMANTIC_AVAILABLE: - logger.error("Semantic search dependencies not available") - logger.error("Install with: pip install codexlens[semantic]") - logger.error("Or: pip install fastembed numpy hnswlib") - return False - return True - - -def progress_callback(message: str): - """Callback function for progress updates.""" - logger.info(message) - - -def generate_embeddings_for_index( - index_db_path: Path, - model_profile: str = "code", - force: bool = False, - chunk_size: int = 2000, - **kwargs # Ignore unused parameters (workers, batch_size) for backward compatibility -) -> dict: - """Generate embeddings for an index using memory-efficient streaming. - - This function wraps the streaming implementation from embedding_manager - to maintain CLI compatibility while using the memory-optimized approach. - - Args: - index_db_path: Path to _index.db file - model_profile: Model profile to use (fast, code, multilingual, balanced) - force: If True, regenerate even if embeddings exist - chunk_size: Maximum chunk size in characters - **kwargs: Additional parameters (ignored for compatibility) - - Returns: - Dictionary with generation statistics - """ - logger.info(f"Processing index: {index_db_path}") - - # Call the memory-efficient streaming implementation - result = generate_embeddings( - index_path=index_db_path, - model_profile=model_profile, - force=force, - chunk_size=chunk_size, - progress_callback=progress_callback, - ) - - if not result["success"]: - if "error" in result: - logger.error(result["error"]) - return result - - # Extract result data and log summary - data = result["result"] - logger.info("=" * 60) - logger.info(f"Completed in {data['elapsed_time']:.1f}s") - logger.info(f"Total chunks created: {data['chunks_created']}") - logger.info(f"Files processed: {data['files_processed']}") - if data['files_failed'] > 0: - logger.warning(f"Failed files: {data['files_failed']}") - if data.get('failed_files'): - for file_path, error in data['failed_files']: - logger.warning(f" {file_path}: {error}") - - return { - "success": True, - "chunks_created": data["chunks_created"], - "files_processed": data["files_processed"], - "files_failed": data["files_failed"], - "elapsed_time": data["elapsed_time"], - } - - -def main(): - parser = argparse.ArgumentParser( - description="Generate vector embeddings for CodexLens indexes (memory-efficient streaming)", - formatter_class=argparse.RawDescriptionHelpFormatter, - epilog=__doc__ - ) - - parser.add_argument( - "index_path", - type=Path, - help="Path to _index.db file or directory for centralized mode" - ) - - parser.add_argument( - "--centralized", - "-c", - action="store_true", - help="Use centralized vector storage (single HNSW index at project root)" - ) - - parser.add_argument( - "--scan", - action="store_true", - help="(Deprecated) Use --centralized instead" - ) - - parser.add_argument( - "--model", - type=str, - default="code", - choices=["fast", "code", "multilingual", "balanced"], - help="Embedding model profile (default: code)" - ) - - parser.add_argument( - "--chunk-size", - type=int, - default=2000, - help="Maximum chunk size in characters (default: 2000)" - ) - - parser.add_argument( - "--workers", - type=int, - default=0, - help="(Deprecated) Kept for backward compatibility, ignored" - ) - - parser.add_argument( - "--batch-size", - type=int, - default=256, - help="(Deprecated) Kept for backward compatibility, ignored" - ) - - parser.add_argument( - "--force", - action="store_true", - help="Regenerate embeddings even if they exist" - ) - - parser.add_argument( - "--verbose", - "-v", - action="store_true", - help="Enable verbose logging" - ) - - args = parser.parse_args() - - # Configure logging level - if args.verbose: - logging.getLogger().setLevel(logging.DEBUG) - - # Check dependencies - if not check_dependencies(): - sys.exit(1) - - # Resolve path - index_path = args.index_path.expanduser().resolve() - - if not index_path.exists(): - logger.error(f"Path not found: {index_path}") - sys.exit(1) - - # Handle deprecated --scan flag - use_centralized = args.centralized - if args.scan: - warnings.warn( - "--scan is deprecated, use --centralized instead", - DeprecationWarning - ) - logger.warning("--scan is deprecated. Use --centralized instead.") - use_centralized = True - - # Determine if using centralized mode or single file - if use_centralized or index_path.is_dir(): - # Centralized mode - single HNSW index at project root - if index_path.is_file(): - logger.error("--centralized requires a directory path") - sys.exit(1) - - logger.info(f"Generating centralized embeddings for: {index_path}") - result = generate_dense_embeddings_centralized( - index_root=index_path, - model_profile=args.model, - force=args.force, - chunk_size=args.chunk_size, - progress_callback=progress_callback, - ) - - if not result["success"]: - logger.error(f"Failed: {result.get('error', 'Unknown error')}") - sys.exit(1) - - # Log summary - data = result["result"] - logger.info(f"\n{'='*60}") - logger.info("CENTRALIZED EMBEDDING COMPLETE") - logger.info(f"{'='*60}") - logger.info(f"Total chunks created: {data['chunks_created']}") - logger.info(f"Total files processed: {data['files_processed']}") - if data.get('files_failed', 0) > 0: - logger.warning(f"Total files failed: {data['files_failed']}") - logger.info(f"Central index: {data.get('central_index_path', 'N/A')}") - logger.info(f"Time: {data.get('elapsed_time', 0):.1f}s") - - else: - # Single index mode - if not index_path.name.endswith("_index.db"): - logger.error("File must be named '_index.db'") - sys.exit(1) - - result = generate_embeddings_for_index( - index_path, - model_profile=args.model, - force=args.force, - chunk_size=args.chunk_size, - ) - - if not result["success"]: - logger.error(f"Failed: {result.get('error', 'Unknown error')}") - sys.exit(1) - - logger.info("\nv Embeddings generation complete!") - logger.info("\nYou can now use vector search:") - logger.info(" codexlens search 'your query' --mode pure-vector") - - -if __name__ == "__main__": - main() diff --git a/codex-lens/scripts/requirements-reranker-local.txt b/codex-lens/scripts/requirements-reranker-local.txt deleted file mode 100644 index 789e742b..00000000 --- a/codex-lens/scripts/requirements-reranker-local.txt +++ /dev/null @@ -1,13 +0,0 @@ -# Ordered local ONNX reranker pins for CodexLens. -# Validated against the repo-local Python 3.13 virtualenv on Windows. -# bootstrap_reranker_local.py installs each section in file order to keep -# pip resolver work bounded and repeatable. - -# [runtime] -numpy==2.4.0 -onnxruntime==1.23.2 - -# [hf-stack] -huggingface-hub==0.36.2 -transformers==4.53.3 -optimum[onnxruntime]==2.1.0 diff --git a/codex-lens/src/.gitignore b/codex-lens/src/.gitignore deleted file mode 100644 index b4a7d405..00000000 --- a/codex-lens/src/.gitignore +++ /dev/null @@ -1 +0,0 @@ -.ace-tool/ diff --git a/codex-lens/src/codex_lens.egg-info/PKG-INFO b/codex-lens/src/codex_lens.egg-info/PKG-INFO deleted file mode 100644 index cb0fdbd8..00000000 --- a/codex-lens/src/codex_lens.egg-info/PKG-INFO +++ /dev/null @@ -1,119 +0,0 @@ -Metadata-Version: 2.4 -Name: codex-lens -Version: 0.1.0 -Summary: CodexLens multi-modal code analysis platform -Author: CodexLens contributors -License-Expression: MIT -Project-URL: Homepage, https://github.com/openai/codex-lens -Requires-Python: >=3.10 -Description-Content-Type: text/markdown -License-File: LICENSE -Requires-Dist: typer~=0.9.0 -Requires-Dist: click<9,>=8.0.0 -Requires-Dist: rich~=13.0.0 -Requires-Dist: pydantic~=2.0.0 -Requires-Dist: tree-sitter~=0.20.0 -Requires-Dist: tree-sitter-python~=0.25.0 -Requires-Dist: tree-sitter-javascript~=0.25.0 -Requires-Dist: tree-sitter-typescript~=0.23.0 -Requires-Dist: pathspec~=0.11.0 -Requires-Dist: watchdog~=3.0.0 -Requires-Dist: ast-grep-py~=0.40.0 -Provides-Extra: semantic -Requires-Dist: numpy~=1.26.0; extra == "semantic" -Requires-Dist: fastembed~=0.2.1; extra == "semantic" -Requires-Dist: hnswlib~=0.8.0; extra == "semantic" -Provides-Extra: semantic-gpu -Requires-Dist: numpy~=1.26.0; extra == "semantic-gpu" -Requires-Dist: fastembed~=0.2.1; extra == "semantic-gpu" -Requires-Dist: hnswlib~=0.8.0; extra == "semantic-gpu" -Requires-Dist: onnxruntime-gpu~=1.15.0; extra == "semantic-gpu" -Provides-Extra: semantic-directml -Requires-Dist: numpy~=1.26.0; extra == "semantic-directml" -Requires-Dist: fastembed~=0.2.1; extra == "semantic-directml" -Requires-Dist: hnswlib~=0.8.0; extra == "semantic-directml" -Requires-Dist: onnxruntime-directml~=1.15.0; extra == "semantic-directml" -Provides-Extra: reranker-onnx -Requires-Dist: optimum~=1.16.0; extra == "reranker-onnx" -Requires-Dist: onnxruntime~=1.15.0; extra == "reranker-onnx" -Requires-Dist: transformers~=4.36.0; extra == "reranker-onnx" -Provides-Extra: reranker-api -Requires-Dist: httpx~=0.25.0; extra == "reranker-api" -Provides-Extra: reranker-litellm -Requires-Dist: ccw-litellm~=0.1.0; extra == "reranker-litellm" -Provides-Extra: reranker-legacy -Requires-Dist: sentence-transformers~=2.2.0; extra == "reranker-legacy" -Provides-Extra: reranker -Requires-Dist: optimum~=1.16.0; extra == "reranker" -Requires-Dist: onnxruntime~=1.15.0; extra == "reranker" -Requires-Dist: transformers~=4.36.0; extra == "reranker" -Provides-Extra: encoding -Requires-Dist: chardet~=5.0.0; extra == "encoding" -Provides-Extra: clustering -Requires-Dist: hdbscan~=0.8.1; extra == "clustering" -Requires-Dist: scikit-learn~=1.3.0; extra == "clustering" -Provides-Extra: full -Requires-Dist: tiktoken~=0.5.0; extra == "full" -Provides-Extra: lsp -Requires-Dist: pygls~=1.3.0; extra == "lsp" -Dynamic: license-file - -# CodexLens - -CodexLens is a multi-modal code analysis platform designed to provide comprehensive code understanding and analysis capabilities. - -## Features - -- **Multi-language Support**: Analyze code in Python, JavaScript, TypeScript and more using Tree-sitter parsers -- **Semantic Search**: Find relevant code snippets using semantic understanding with fastembed and HNSWLIB -- **Code Parsing**: Advanced code structure parsing with tree-sitter -- **Flexible Architecture**: Modular design for easy extension and customization - -## Installation - -### Basic Installation - -```bash -pip install codex-lens -``` - -### With Semantic Search - -```bash -pip install codex-lens[semantic] -``` - -### With GPU Acceleration (NVIDIA CUDA) - -```bash -pip install codex-lens[semantic-gpu] -``` - -### With DirectML (Windows - NVIDIA/AMD/Intel) - -```bash -pip install codex-lens[semantic-directml] -``` - -### With All Optional Features - -```bash -pip install codex-lens[full] -``` - -## Requirements - -- Python >= 3.10 -- See `pyproject.toml` for detailed dependency list - -## Development - -This project uses setuptools for building and packaging. - -## License - -MIT License - -## Authors - -CodexLens Contributors diff --git a/codex-lens/src/codex_lens.egg-info/SOURCES.txt b/codex-lens/src/codex_lens.egg-info/SOURCES.txt deleted file mode 100644 index b94c2ed5..00000000 --- a/codex-lens/src/codex_lens.egg-info/SOURCES.txt +++ /dev/null @@ -1,208 +0,0 @@ -LICENSE -README.md -pyproject.toml -src/codex_lens.egg-info/PKG-INFO -src/codex_lens.egg-info/SOURCES.txt -src/codex_lens.egg-info/dependency_links.txt -src/codex_lens.egg-info/entry_points.txt -src/codex_lens.egg-info/requires.txt -src/codex_lens.egg-info/top_level.txt -src/codexlens/__init__.py -src/codexlens/__main__.py -src/codexlens/config.py -src/codexlens/entities.py -src/codexlens/env_config.py -src/codexlens/errors.py -src/codexlens/api/__init__.py -src/codexlens/api/definition.py -src/codexlens/api/file_context.py -src/codexlens/api/hover.py -src/codexlens/api/lsp_lifecycle.py -src/codexlens/api/models.py -src/codexlens/api/references.py -src/codexlens/api/semantic.py -src/codexlens/api/symbols.py -src/codexlens/api/utils.py -src/codexlens/cli/__init__.py -src/codexlens/cli/commands.py -src/codexlens/cli/embedding_manager.py -src/codexlens/cli/model_manager.py -src/codexlens/cli/output.py -src/codexlens/hybrid_search/__init__.py -src/codexlens/hybrid_search/data_structures.py -src/codexlens/indexing/__init__.py -src/codexlens/indexing/embedding.py -src/codexlens/indexing/symbol_extractor.py -src/codexlens/lsp/__init__.py -src/codexlens/lsp/handlers.py -src/codexlens/lsp/keepalive_bridge.py -src/codexlens/lsp/lsp-servers.json -src/codexlens/lsp/lsp_bridge.py -src/codexlens/lsp/lsp_graph_builder.py -src/codexlens/lsp/providers.py -src/codexlens/lsp/server.py -src/codexlens/lsp/standalone_manager.py -src/codexlens/mcp/__init__.py -src/codexlens/mcp/hooks.py -src/codexlens/mcp/provider.py -src/codexlens/mcp/schema.py -src/codexlens/parsers/__init__.py -src/codexlens/parsers/astgrep_binding.py -src/codexlens/parsers/astgrep_js_ts_processor.py -src/codexlens/parsers/astgrep_processor.py -src/codexlens/parsers/encoding.py -src/codexlens/parsers/factory.py -src/codexlens/parsers/tokenizer.py -src/codexlens/parsers/treesitter_parser.py -src/codexlens/parsers/patterns/__init__.py -src/codexlens/parsers/patterns/javascript/__init__.py -src/codexlens/parsers/patterns/python/__init__.py -src/codexlens/parsers/patterns/typescript/__init__.py -src/codexlens/search/__init__.py -src/codexlens/search/binary_searcher.py -src/codexlens/search/chain_search.py -src/codexlens/search/enrichment.py -src/codexlens/search/global_graph_expander.py -src/codexlens/search/graph_expander.py -src/codexlens/search/hybrid_search.py -src/codexlens/search/query_parser.py -src/codexlens/search/ranking.py -src/codexlens/search/association_tree/__init__.py -src/codexlens/search/association_tree/builder.py -src/codexlens/search/association_tree/data_structures.py -src/codexlens/search/association_tree/deduplicator.py -src/codexlens/search/clustering/__init__.py -src/codexlens/search/clustering/base.py -src/codexlens/search/clustering/dbscan_strategy.py -src/codexlens/search/clustering/factory.py -src/codexlens/search/clustering/frequency_strategy.py -src/codexlens/search/clustering/hdbscan_strategy.py -src/codexlens/search/clustering/noop_strategy.py -src/codexlens/semantic/__init__.py -src/codexlens/semantic/ann_index.py -src/codexlens/semantic/base.py -src/codexlens/semantic/chunker.py -src/codexlens/semantic/code_extractor.py -src/codexlens/semantic/embedder.py -src/codexlens/semantic/factory.py -src/codexlens/semantic/gpu_support.py -src/codexlens/semantic/litellm_embedder.py -src/codexlens/semantic/rotational_embedder.py -src/codexlens/semantic/vector_store.py -src/codexlens/semantic/reranker/__init__.py -src/codexlens/semantic/reranker/api_reranker.py -src/codexlens/semantic/reranker/base.py -src/codexlens/semantic/reranker/factory.py -src/codexlens/semantic/reranker/fastembed_reranker.py -src/codexlens/semantic/reranker/legacy.py -src/codexlens/semantic/reranker/litellm_reranker.py -src/codexlens/semantic/reranker/onnx_reranker.py -src/codexlens/storage/__init__.py -src/codexlens/storage/deepwiki_models.py -src/codexlens/storage/deepwiki_store.py -src/codexlens/storage/dir_index.py -src/codexlens/storage/file_cache.py -src/codexlens/storage/global_index.py -src/codexlens/storage/index_tree.py -src/codexlens/storage/merkle_tree.py -src/codexlens/storage/migration_manager.py -src/codexlens/storage/path_mapper.py -src/codexlens/storage/registry.py -src/codexlens/storage/sqlite_store.py -src/codexlens/storage/sqlite_utils.py -src/codexlens/storage/vector_meta_store.py -src/codexlens/storage/migrations/__init__.py -src/codexlens/storage/migrations/migration_001_normalize_keywords.py -src/codexlens/storage/migrations/migration_002_add_token_metadata.py -src/codexlens/storage/migrations/migration_004_dual_fts.py -src/codexlens/storage/migrations/migration_005_cleanup_unused_fields.py -src/codexlens/storage/migrations/migration_006_enhance_relationships.py -src/codexlens/storage/migrations/migration_007_add_graph_neighbors.py -src/codexlens/storage/migrations/migration_008_add_merkle_hashes.py -src/codexlens/storage/migrations/migration_010_add_multi_vector_chunks.py -src/codexlens/tools/__init__.py -src/codexlens/tools/deepwiki_generator.py -src/codexlens/watcher/__init__.py -src/codexlens/watcher/events.py -src/codexlens/watcher/file_watcher.py -src/codexlens/watcher/incremental_indexer.py -src/codexlens/watcher/manager.py -tests/test_ann_index.py -tests/test_api_reranker.py -tests/test_association_tree.py -tests/test_astgrep_binding.py -tests/test_binary_searcher.py -tests/test_cascade_strategies.py -tests/test_chain_search.py -tests/test_cli_help.py -tests/test_cli_hybrid_search.py -tests/test_cli_output.py -tests/test_clustering_strategies.py -tests/test_code_extractor.py -tests/test_config.py -tests/test_config_cascade.py -tests/test_config_staged_env_overrides.py -tests/test_deepwiki_store.py -tests/test_deepwiki_types.py -tests/test_dual_fts.py -tests/test_embedder.py -tests/test_embedding_backend_availability.py -tests/test_encoding.py -tests/test_enrichment.py -tests/test_entities.py -tests/test_errors.py -tests/test_file_cache.py -tests/test_global_graph_expander.py -tests/test_global_index.py -tests/test_global_relationships.py -tests/test_global_symbol_index.py -tests/test_graph_expansion.py -tests/test_hybrid_chunker.py -tests/test_hybrid_search_e2e.py -tests/test_hybrid_search_reranker_backend.py -tests/test_hybrid_search_unit.py -tests/test_incremental_indexer.py -tests/test_incremental_indexing.py -tests/test_litellm_reranker.py -tests/test_lsp_graph_builder_depth.py -tests/test_merkle_detection.py -tests/test_migrations.py -tests/test_parser_integration.py -tests/test_parsers.py -tests/test_path_mapper_windows_drive.py -tests/test_performance_optimizations.py -tests/test_pure_vector_search.py -tests/test_query_parser.py -tests/test_ranking.py -tests/test_recursive_splitting.py -tests/test_registry.py -tests/test_reranker_backends.py -tests/test_reranker_factory.py -tests/test_result_grouping.py -tests/test_rrf_fusion.py -tests/test_schema_cleanup_migration.py -tests/test_search_comparison.py -tests/test_search_comprehensive.py -tests/test_search_full_coverage.py -tests/test_search_performance.py -tests/test_semantic.py -tests/test_semantic_search.py -tests/test_sqlite_store.py -tests/test_stage1_binary_search_uses_chunk_lines.py -tests/test_staged_cascade.py -tests/test_staged_cascade_lsp_depth.py -tests/test_staged_cascade_realtime_lsp.py -tests/test_staged_stage1_fallback_seed.py -tests/test_staged_stage3_fast_strategies.py -tests/test_standalone_lsp_manager_open_document_cache.py -tests/test_static_graph_integration.py -tests/test_storage.py -tests/test_storage_concurrency.py -tests/test_symbol_extractor.py -tests/test_token_chunking.py -tests/test_token_storage.py -tests/test_tokenizer.py -tests/test_tokenizer_performance.py -tests/test_treesitter_parser.py -tests/test_vector_search_full.py -tests/test_vector_store.py \ No newline at end of file diff --git a/codex-lens/src/codex_lens.egg-info/dependency_links.txt b/codex-lens/src/codex_lens.egg-info/dependency_links.txt deleted file mode 100644 index 8b137891..00000000 --- a/codex-lens/src/codex_lens.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/codex-lens/src/codex_lens.egg-info/entry_points.txt b/codex-lens/src/codex_lens.egg-info/entry_points.txt deleted file mode 100644 index efeefd53..00000000 --- a/codex-lens/src/codex_lens.egg-info/entry_points.txt +++ /dev/null @@ -1,2 +0,0 @@ -[console_scripts] -codexlens-lsp = codexlens.lsp.server:main diff --git a/codex-lens/src/codex_lens.egg-info/requires.txt b/codex-lens/src/codex_lens.egg-info/requires.txt deleted file mode 100644 index 3dfaf117..00000000 --- a/codex-lens/src/codex_lens.egg-info/requires.txt +++ /dev/null @@ -1,60 +0,0 @@ -typer~=0.9.0 -click<9,>=8.0.0 -rich~=13.0.0 -pydantic~=2.0.0 -tree-sitter~=0.20.0 -tree-sitter-python~=0.25.0 -tree-sitter-javascript~=0.25.0 -tree-sitter-typescript~=0.23.0 -pathspec~=0.11.0 -watchdog~=3.0.0 -ast-grep-py~=0.40.0 - -[clustering] -hdbscan~=0.8.1 -scikit-learn~=1.3.0 - -[encoding] -chardet~=5.0.0 - -[full] -tiktoken~=0.5.0 - -[lsp] -pygls~=1.3.0 - -[reranker] -optimum~=1.16.0 -onnxruntime~=1.15.0 -transformers~=4.36.0 - -[reranker-api] -httpx~=0.25.0 - -[reranker-legacy] -sentence-transformers~=2.2.0 - -[reranker-litellm] -ccw-litellm~=0.1.0 - -[reranker-onnx] -optimum~=1.16.0 -onnxruntime~=1.15.0 -transformers~=4.36.0 - -[semantic] -numpy~=1.26.0 -fastembed~=0.2.1 -hnswlib~=0.8.0 - -[semantic-directml] -numpy~=1.26.0 -fastembed~=0.2.1 -hnswlib~=0.8.0 -onnxruntime-directml~=1.15.0 - -[semantic-gpu] -numpy~=1.26.0 -fastembed~=0.2.1 -hnswlib~=0.8.0 -onnxruntime-gpu~=1.15.0 diff --git a/codex-lens/src/codex_lens.egg-info/top_level.txt b/codex-lens/src/codex_lens.egg-info/top_level.txt deleted file mode 100644 index e81f348f..00000000 --- a/codex-lens/src/codex_lens.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -codexlens diff --git a/codex-lens/src/codexlens/__init__.py b/codex-lens/src/codexlens/__init__.py deleted file mode 100644 index 56f2e508..00000000 --- a/codex-lens/src/codexlens/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -"""CodexLens package.""" - -from __future__ import annotations - -from . import config, entities, errors -from .config import Config -from .entities import IndexedFile, SearchResult, SemanticChunk, Symbol -from .errors import CodexLensError, ConfigError, ParseError, SearchError, StorageError - -__version__ = "0.1.0" - -__all__ = [ - "__version__", - "config", - "entities", - "errors", - "Config", - "IndexedFile", - "SearchResult", - "SemanticChunk", - "Symbol", - "CodexLensError", - "ConfigError", - "ParseError", - "StorageError", - "SearchError", -] - diff --git a/codex-lens/src/codexlens/__main__.py b/codex-lens/src/codexlens/__main__.py deleted file mode 100644 index 35190f97..00000000 --- a/codex-lens/src/codexlens/__main__.py +++ /dev/null @@ -1,14 +0,0 @@ -"""Module entrypoint for `python -m codexlens`.""" - -from __future__ import annotations - -from codexlens.cli import app - - -def main() -> None: - app() - - -if __name__ == "__main__": - main() - diff --git a/codex-lens/src/codexlens/api/__init__.py b/codex-lens/src/codexlens/api/__init__.py deleted file mode 100644 index fd961a56..00000000 --- a/codex-lens/src/codexlens/api/__init__.py +++ /dev/null @@ -1,93 +0,0 @@ -"""Codexlens Public API Layer. - -This module exports all public API functions and dataclasses for the -codexlens LSP-like functionality. - -Dataclasses (from models.py): - - CallInfo: Call relationship information - - MethodContext: Method context with call relationships - - FileContextResult: File context result with method summaries - - DefinitionResult: Definition lookup result - - ReferenceResult: Reference lookup result - - GroupedReferences: References grouped by definition - - SymbolInfo: Symbol information for workspace search - - HoverInfo: Hover information for a symbol - - SemanticResult: Semantic search result - -Utility functions (from utils.py): - - resolve_project: Resolve and validate project root path - - normalize_relationship_type: Normalize relationship type to canonical form - - rank_by_proximity: Rank results by file path proximity - -Example: - >>> from codexlens.api import ( - ... DefinitionResult, - ... resolve_project, - ... normalize_relationship_type - ... ) - >>> project = resolve_project("/path/to/project") - >>> rel_type = normalize_relationship_type("calls") - >>> print(rel_type) - 'call' -""" - -from __future__ import annotations - -# Dataclasses -from .models import ( - CallInfo, - MethodContext, - FileContextResult, - DefinitionResult, - ReferenceResult, - GroupedReferences, - SymbolInfo, - HoverInfo, - SemanticResult, -) - -# Utility functions -from .utils import ( - resolve_project, - normalize_relationship_type, - rank_by_proximity, - rank_by_score, -) - -# API functions -from .definition import find_definition -from .symbols import workspace_symbols -from .hover import get_hover -from .file_context import file_context -from .references import find_references -from .semantic import semantic_search -from .lsp_lifecycle import lsp_start, lsp_stop, lsp_restart - -__all__ = [ - # Dataclasses - "CallInfo", - "MethodContext", - "FileContextResult", - "DefinitionResult", - "ReferenceResult", - "GroupedReferences", - "SymbolInfo", - "HoverInfo", - "SemanticResult", - # Utility functions - "resolve_project", - "normalize_relationship_type", - "rank_by_proximity", - "rank_by_score", - # API functions - "find_definition", - "workspace_symbols", - "get_hover", - "file_context", - "find_references", - "semantic_search", - # LSP lifecycle - "lsp_start", - "lsp_stop", - "lsp_restart", -] diff --git a/codex-lens/src/codexlens/api/definition.py b/codex-lens/src/codexlens/api/definition.py deleted file mode 100644 index ecfe874b..00000000 --- a/codex-lens/src/codexlens/api/definition.py +++ /dev/null @@ -1,126 +0,0 @@ -"""find_definition API implementation. - -This module provides the find_definition() function for looking up -symbol definitions with a 3-stage fallback strategy. -""" - -from __future__ import annotations - -import logging -from pathlib import Path -from typing import List, Optional - -from ..entities import Symbol -from ..storage.global_index import GlobalSymbolIndex -from ..storage.registry import RegistryStore -from ..errors import IndexNotFoundError -from .models import DefinitionResult -from .utils import resolve_project, rank_by_proximity - -logger = logging.getLogger(__name__) - - -def find_definition( - project_root: str, - symbol_name: str, - symbol_kind: Optional[str] = None, - file_context: Optional[str] = None, - limit: int = 10 -) -> List[DefinitionResult]: - """Find definition locations for a symbol. - - Uses a 3-stage fallback strategy: - 1. Exact match with kind filter - 2. Exact match without kind filter - 3. Prefix match - - Args: - project_root: Project root directory (for index location) - symbol_name: Name of the symbol to find - symbol_kind: Optional symbol kind filter (class, function, etc.) - file_context: Optional file path for proximity ranking - limit: Maximum number of results to return - - Returns: - List of DefinitionResult sorted by proximity if file_context provided - - Raises: - IndexNotFoundError: If project is not indexed - """ - project_path = resolve_project(project_root) - - # Get project info from registry - registry = RegistryStore() - project_info = registry.get_project(project_path) - if project_info is None: - raise IndexNotFoundError(f"Project not indexed: {project_path}") - - # Open global symbol index - index_db = project_info.index_root / "_global_symbols.db" - if not index_db.exists(): - raise IndexNotFoundError(f"Global symbol index not found: {index_db}") - - global_index = GlobalSymbolIndex(str(index_db), project_info.id) - - # Stage 1: Exact match with kind filter - results = _search_with_kind(global_index, symbol_name, symbol_kind, limit) - if results: - logger.debug(f"Stage 1 (exact+kind): Found {len(results)} results for {symbol_name}") - return _rank_and_convert(results, file_context) - - # Stage 2: Exact match without kind (if kind was specified) - if symbol_kind: - results = _search_with_kind(global_index, symbol_name, None, limit) - if results: - logger.debug(f"Stage 2 (exact): Found {len(results)} results for {symbol_name}") - return _rank_and_convert(results, file_context) - - # Stage 3: Prefix match - results = global_index.search( - name=symbol_name, - kind=None, - limit=limit, - prefix_mode=True - ) - if results: - logger.debug(f"Stage 3 (prefix): Found {len(results)} results for {symbol_name}") - return _rank_and_convert(results, file_context) - - logger.debug(f"No definitions found for {symbol_name}") - return [] - - -def _search_with_kind( - global_index: GlobalSymbolIndex, - symbol_name: str, - symbol_kind: Optional[str], - limit: int -) -> List[Symbol]: - """Search for symbols with optional kind filter.""" - return global_index.search( - name=symbol_name, - kind=symbol_kind, - limit=limit, - prefix_mode=False - ) - - -def _rank_and_convert( - symbols: List[Symbol], - file_context: Optional[str] -) -> List[DefinitionResult]: - """Convert symbols to DefinitionResult and rank by proximity.""" - results = [ - DefinitionResult( - name=sym.name, - kind=sym.kind, - file_path=sym.file or "", - line=sym.range[0] if sym.range else 1, - end_line=sym.range[1] if sym.range else 1, - signature=None, # Could extract from file if needed - container=None, # Could extract from parent symbol - score=1.0 - ) - for sym in symbols - ] - return rank_by_proximity(results, file_context) diff --git a/codex-lens/src/codexlens/api/file_context.py b/codex-lens/src/codexlens/api/file_context.py deleted file mode 100644 index fafa209f..00000000 --- a/codex-lens/src/codexlens/api/file_context.py +++ /dev/null @@ -1,272 +0,0 @@ -"""file_context API implementation. - -This module provides the file_context() function for retrieving -method call graphs from a source file. -""" - -from __future__ import annotations - -import logging -import os -from pathlib import Path -from typing import List, Optional, Tuple - -from ..entities import Symbol -from ..storage.global_index import GlobalSymbolIndex -from ..storage.dir_index import DirIndexStore -from ..storage.registry import RegistryStore -from ..errors import IndexNotFoundError -from .models import ( - FileContextResult, - MethodContext, - CallInfo, -) -from .utils import resolve_project, normalize_relationship_type - -logger = logging.getLogger(__name__) - - -def file_context( - project_root: str, - file_path: str, - include_calls: bool = True, - include_callers: bool = True, - max_depth: int = 1, - format: str = "brief" -) -> FileContextResult: - """Get method call context for a code file. - - Retrieves all methods/functions in the file along with their - outgoing calls and incoming callers. - - Args: - project_root: Project root directory (for index location) - file_path: Path to the code file to analyze - include_calls: Whether to include outgoing calls - include_callers: Whether to include incoming callers - max_depth: Call chain depth (V1 only supports 1) - format: Output format (brief | detailed | tree) - - Returns: - FileContextResult with method contexts and summary - - Raises: - IndexNotFoundError: If project is not indexed - FileNotFoundError: If file does not exist - ValueError: If max_depth > 1 (V1 limitation) - """ - # V1 limitation: only depth=1 supported - if max_depth > 1: - raise ValueError( - f"max_depth > 1 not supported in V1. " - f"Requested: {max_depth}, supported: 1" - ) - - project_path = resolve_project(project_root) - file_path_resolved = Path(file_path).resolve() - - # Validate file exists - if not file_path_resolved.exists(): - raise FileNotFoundError(f"File not found: {file_path_resolved}") - - # Get project info from registry - registry = RegistryStore() - project_info = registry.get_project(project_path) - if project_info is None: - raise IndexNotFoundError(f"Project not indexed: {project_path}") - - # Open global symbol index - index_db = project_info.index_root / "_global_symbols.db" - if not index_db.exists(): - raise IndexNotFoundError(f"Global symbol index not found: {index_db}") - - global_index = GlobalSymbolIndex(str(index_db), project_info.id) - - # Get all symbols in the file - symbols = global_index.get_file_symbols(str(file_path_resolved)) - - # Filter to functions, methods, and classes - method_symbols = [ - s for s in symbols - if s.kind in ("function", "method", "class") - ] - - logger.debug(f"Found {len(method_symbols)} methods in {file_path}") - - # Try to find dir_index for relationship queries - dir_index = _find_dir_index(project_info, file_path_resolved) - - # Build method contexts - methods: List[MethodContext] = [] - outgoing_resolved = True - incoming_resolved = True - targets_resolved = True - - for symbol in method_symbols: - calls: List[CallInfo] = [] - callers: List[CallInfo] = [] - - if include_calls and dir_index: - try: - outgoing = dir_index.get_outgoing_calls( - str(file_path_resolved), - symbol.name - ) - for target_name, rel_type, line, target_file in outgoing: - calls.append(CallInfo( - symbol_name=target_name, - file_path=target_file, - line=line, - relationship=normalize_relationship_type(rel_type) - )) - if target_file is None: - targets_resolved = False - except Exception as e: - logger.debug(f"Failed to get outgoing calls: {e}") - outgoing_resolved = False - - if include_callers and dir_index: - try: - incoming = dir_index.get_incoming_calls(symbol.name) - for source_name, rel_type, line, source_file in incoming: - callers.append(CallInfo( - symbol_name=source_name, - file_path=source_file, - line=line, - relationship=normalize_relationship_type(rel_type) - )) - except Exception as e: - logger.debug(f"Failed to get incoming calls: {e}") - incoming_resolved = False - - methods.append(MethodContext( - name=symbol.name, - kind=symbol.kind, - line_range=symbol.range if symbol.range else (1, 1), - signature=None, # Could extract from source - calls=calls, - callers=callers - )) - - # Detect language from file extension - language = _detect_language(file_path_resolved) - - # Generate summary - summary = _generate_summary(file_path_resolved, methods, format) - - return FileContextResult( - file_path=str(file_path_resolved), - language=language, - methods=methods, - summary=summary, - discovery_status={ - "outgoing_resolved": outgoing_resolved, - "incoming_resolved": incoming_resolved, - "targets_resolved": targets_resolved - } - ) - - -def _find_dir_index(project_info, file_path: Path) -> Optional[DirIndexStore]: - """Find the dir_index that contains the file. - - Args: - project_info: Project information from registry - file_path: Path to the file - - Returns: - DirIndexStore if found, None otherwise - """ - try: - # Look for _index.db in file's directory or parent directories - current = file_path.parent - while current != current.parent: - index_db = current / "_index.db" - if index_db.exists(): - return DirIndexStore(str(index_db)) - - # Also check in project's index_root - relative = current.relative_to(project_info.source_root) - index_in_cache = project_info.index_root / relative / "_index.db" - if index_in_cache.exists(): - return DirIndexStore(str(index_in_cache)) - - current = current.parent - except Exception as e: - logger.debug(f"Failed to find dir_index: {e}") - - return None - - -def _detect_language(file_path: Path) -> str: - """Detect programming language from file extension. - - Args: - file_path: Path to the file - - Returns: - Language name - """ - ext_map = { - ".py": "python", - ".js": "javascript", - ".ts": "typescript", - ".jsx": "javascript", - ".tsx": "typescript", - ".go": "go", - ".rs": "rust", - ".java": "java", - ".swift": "swift", - ".c": "c", - ".cpp": "cpp", - ".h": "c", - ".hpp": "cpp", - } - return ext_map.get(file_path.suffix.lower(), "unknown") - - -def _generate_summary( - file_path: Path, - methods: List[MethodContext], - format: str -) -> str: - """Generate human-readable summary of file context. - - Args: - file_path: Path to the file - methods: List of method contexts - format: Output format (brief | detailed | tree) - - Returns: - Markdown-formatted summary - """ - lines = [f"## {file_path.name} ({len(methods)} methods)\n"] - - for method in methods: - start, end = method.line_range - lines.append(f"### {method.name} (line {start}-{end})") - - if method.calls: - calls_str = ", ".join( - f"{c.symbol_name} ({c.file_path or 'unresolved'}:{c.line})" - if format == "detailed" - else c.symbol_name - for c in method.calls - ) - lines.append(f"- Calls: {calls_str}") - - if method.callers: - callers_str = ", ".join( - f"{c.symbol_name} ({c.file_path}:{c.line})" - if format == "detailed" - else c.symbol_name - for c in method.callers - ) - lines.append(f"- Called by: {callers_str}") - - if not method.calls and not method.callers: - lines.append("- (no call relationships)") - - lines.append("") - - return "\n".join(lines) diff --git a/codex-lens/src/codexlens/api/hover.py b/codex-lens/src/codexlens/api/hover.py deleted file mode 100644 index 7860c98f..00000000 --- a/codex-lens/src/codexlens/api/hover.py +++ /dev/null @@ -1,148 +0,0 @@ -"""get_hover API implementation. - -This module provides the get_hover() function for retrieving -detailed hover information for symbols. -""" - -from __future__ import annotations - -import logging -from pathlib import Path -from typing import Optional - -from ..entities import Symbol -from ..storage.global_index import GlobalSymbolIndex -from ..storage.registry import RegistryStore -from ..errors import IndexNotFoundError -from .models import HoverInfo -from .utils import resolve_project - -logger = logging.getLogger(__name__) - - -def get_hover( - project_root: str, - symbol_name: str, - file_path: Optional[str] = None -) -> Optional[HoverInfo]: - """Get detailed hover information for a symbol. - - Args: - project_root: Project root directory (for index location) - symbol_name: Name of the symbol to look up - file_path: Optional file path to disambiguate when symbol - appears in multiple files - - Returns: - HoverInfo if symbol found, None otherwise - - Raises: - IndexNotFoundError: If project is not indexed - """ - project_path = resolve_project(project_root) - - # Get project info from registry - registry = RegistryStore() - project_info = registry.get_project(project_path) - if project_info is None: - raise IndexNotFoundError(f"Project not indexed: {project_path}") - - # Open global symbol index - index_db = project_info.index_root / "_global_symbols.db" - if not index_db.exists(): - raise IndexNotFoundError(f"Global symbol index not found: {index_db}") - - global_index = GlobalSymbolIndex(str(index_db), project_info.id) - - # Search for the symbol - results = global_index.search( - name=symbol_name, - kind=None, - limit=50, - prefix_mode=False - ) - - if not results: - logger.debug(f"No hover info found for {symbol_name}") - return None - - # If file_path provided, filter to that file - if file_path: - file_path_resolved = str(Path(file_path).resolve()) - matching = [s for s in results if s.file == file_path_resolved] - if matching: - results = matching - - # Take the first result - symbol = results[0] - - # Build hover info - return HoverInfo( - name=symbol.name, - kind=symbol.kind, - signature=_extract_signature(symbol), - documentation=_extract_documentation(symbol), - file_path=symbol.file or "", - line_range=symbol.range if symbol.range else (1, 1), - type_info=_extract_type_info(symbol) - ) - - -def _extract_signature(symbol: Symbol) -> str: - """Extract signature from symbol. - - For now, generates a basic signature based on kind and name. - In a full implementation, this would parse the actual source code. - - Args: - symbol: The symbol to extract signature from - - Returns: - Signature string - """ - if symbol.kind == "function": - return f"def {symbol.name}(...)" - elif symbol.kind == "method": - return f"def {symbol.name}(self, ...)" - elif symbol.kind == "class": - return f"class {symbol.name}" - elif symbol.kind == "variable": - return symbol.name - elif symbol.kind == "constant": - return f"{symbol.name} = ..." - else: - return f"{symbol.kind} {symbol.name}" - - -def _extract_documentation(symbol: Symbol) -> Optional[str]: - """Extract documentation from symbol. - - In a full implementation, this would parse docstrings from source. - For now, returns None. - - Args: - symbol: The symbol to extract documentation from - - Returns: - Documentation string if available, None otherwise - """ - # Would need to read source file and parse docstring - # For V1, return None - return None - - -def _extract_type_info(symbol: Symbol) -> Optional[str]: - """Extract type information from symbol. - - In a full implementation, this would parse type annotations. - For now, returns None. - - Args: - symbol: The symbol to extract type info from - - Returns: - Type info string if available, None otherwise - """ - # Would need to parse type annotations from source - # For V1, return None - return None diff --git a/codex-lens/src/codexlens/api/lsp_lifecycle.py b/codex-lens/src/codexlens/api/lsp_lifecycle.py deleted file mode 100644 index ebda4691..00000000 --- a/codex-lens/src/codexlens/api/lsp_lifecycle.py +++ /dev/null @@ -1,124 +0,0 @@ -"""LSP server lifecycle management API. - -Provides synchronous wrappers around StandaloneLspManager's async -start/stop methods for use via the executeCodexLensPythonAPI bridge. -""" - -from __future__ import annotations - -import asyncio -import shutil -from typing import Any, Dict - - -def lsp_start(workspace_root: str) -> Dict[str, Any]: - """Start the standalone LSP manager and report configured servers. - - Loads configuration and checks which language server commands are - available on the system. Does NOT start individual language servers - (they start on demand when a file of that type is opened). - - Args: - workspace_root: Absolute path to the workspace root directory. - - Returns: - Dict with keys: servers (list of server info dicts), - workspace_root (str). - """ - from codexlens.lsp.standalone_manager import StandaloneLspManager - - async def _run() -> Dict[str, Any]: - manager = StandaloneLspManager(workspace_root=workspace_root) - await manager.start() - - servers = [] - for language_id, cfg in sorted(manager._configs.items()): - cmd0 = cfg.command[0] if cfg.command else None - servers.append({ - "language_id": language_id, - "display_name": cfg.display_name, - "extensions": list(cfg.extensions), - "command": list(cfg.command), - "command_available": bool(shutil.which(cmd0)) if cmd0 else False, - }) - - # Stop the manager - individual servers are started on demand - await manager.stop() - - return { - "servers": servers, - "server_count": len(servers), - "workspace_root": workspace_root, - } - - return asyncio.run(_run()) - - -def lsp_stop(workspace_root: str) -> Dict[str, Any]: - """Stop all running language servers for the given workspace. - - Creates a temporary manager instance, starts it (loads config), - then immediately stops it -- which terminates any running server - processes that match this workspace root. - - Args: - workspace_root: Absolute path to the workspace root directory. - - Returns: - Dict confirming shutdown. - """ - from codexlens.lsp.standalone_manager import StandaloneLspManager - - async def _run() -> Dict[str, Any]: - manager = StandaloneLspManager(workspace_root=workspace_root) - await manager.start() - await manager.stop() - return {"stopped": True} - - return asyncio.run(_run()) - - -def lsp_restart(workspace_root: str) -> Dict[str, Any]: - """Restart the standalone LSP manager (stop then start). - - Equivalent to calling lsp_stop followed by lsp_start, but avoids - the overhead of two separate Python process invocations. - - Args: - workspace_root: Absolute path to the workspace root directory. - - Returns: - Dict with keys: servers, server_count, workspace_root. - """ - from codexlens.lsp.standalone_manager import StandaloneLspManager - - async def _run() -> Dict[str, Any]: - # Stop phase - stop_manager = StandaloneLspManager(workspace_root=workspace_root) - await stop_manager.start() - await stop_manager.stop() - - # Start phase - start_manager = StandaloneLspManager(workspace_root=workspace_root) - await start_manager.start() - - servers = [] - for language_id, cfg in sorted(start_manager._configs.items()): - cmd0 = cfg.command[0] if cfg.command else None - servers.append({ - "language_id": language_id, - "display_name": cfg.display_name, - "extensions": list(cfg.extensions), - "command": list(cfg.command), - "command_available": bool(shutil.which(cmd0)) if cmd0 else False, - }) - - await start_manager.stop() - - return { - "servers": servers, - "server_count": len(servers), - "workspace_root": workspace_root, - } - - return asyncio.run(_run()) diff --git a/codex-lens/src/codexlens/api/models.py b/codex-lens/src/codexlens/api/models.py deleted file mode 100644 index 6c53f690..00000000 --- a/codex-lens/src/codexlens/api/models.py +++ /dev/null @@ -1,281 +0,0 @@ -"""API dataclass definitions for codexlens LSP API. - -This module defines all result dataclasses used by the public API layer, -following the patterns established in mcp/schema.py. -""" - -from __future__ import annotations - -from dataclasses import dataclass, field, asdict -from typing import List, Optional, Dict, Tuple - - -# ============================================================================= -# Section 4.2: file_context dataclasses -# ============================================================================= - -@dataclass -class CallInfo: - """Call relationship information. - - Attributes: - symbol_name: Name of the called/calling symbol - file_path: Target file path (may be None if unresolved) - line: Line number of the call - relationship: Type of relationship (call | import | inheritance) - """ - symbol_name: str - file_path: Optional[str] - line: int - relationship: str # call | import | inheritance - - def to_dict(self) -> dict: - """Convert to dictionary, filtering None values.""" - return {k: v for k, v in asdict(self).items() if v is not None} - - -@dataclass -class MethodContext: - """Method context with call relationships. - - Attributes: - name: Method/function name - kind: Symbol kind (function | method | class) - line_range: Start and end line numbers - signature: Function signature (if available) - calls: List of outgoing calls - callers: List of incoming calls - """ - name: str - kind: str # function | method | class - line_range: Tuple[int, int] - signature: Optional[str] - calls: List[CallInfo] = field(default_factory=list) - callers: List[CallInfo] = field(default_factory=list) - - def to_dict(self) -> dict: - """Convert to dictionary, filtering None values.""" - result = { - "name": self.name, - "kind": self.kind, - "line_range": list(self.line_range), - "calls": [c.to_dict() for c in self.calls], - "callers": [c.to_dict() for c in self.callers], - } - if self.signature is not None: - result["signature"] = self.signature - return result - - -@dataclass -class FileContextResult: - """File context result with method summaries. - - Attributes: - file_path: Path to the analyzed file - language: Programming language - methods: List of method contexts - summary: Human-readable summary - discovery_status: Status flags for call resolution - """ - file_path: str - language: str - methods: List[MethodContext] - summary: str - discovery_status: Dict[str, bool] = field(default_factory=lambda: { - "outgoing_resolved": False, - "incoming_resolved": True, - "targets_resolved": False - }) - - def to_dict(self) -> dict: - """Convert to dictionary for JSON serialization.""" - return { - "file_path": self.file_path, - "language": self.language, - "methods": [m.to_dict() for m in self.methods], - "summary": self.summary, - "discovery_status": self.discovery_status, - } - - -# ============================================================================= -# Section 4.3: find_definition dataclasses -# ============================================================================= - -@dataclass -class DefinitionResult: - """Definition lookup result. - - Attributes: - name: Symbol name - kind: Symbol kind (class, function, method, etc.) - file_path: File where symbol is defined - line: Start line number - end_line: End line number - signature: Symbol signature (if available) - container: Containing class/module (if any) - score: Match score for ranking - """ - name: str - kind: str - file_path: str - line: int - end_line: int - signature: Optional[str] = None - container: Optional[str] = None - score: float = 1.0 - - def to_dict(self) -> dict: - """Convert to dictionary, filtering None values.""" - return {k: v for k, v in asdict(self).items() if v is not None} - - -# ============================================================================= -# Section 4.4: find_references dataclasses -# ============================================================================= - -@dataclass -class ReferenceResult: - """Reference lookup result. - - Attributes: - file_path: File containing the reference - line: Line number - column: Column number - context_line: The line of code containing the reference - relationship: Type of reference (call | import | type_annotation | inheritance) - """ - file_path: str - line: int - column: int - context_line: str - relationship: str # call | import | type_annotation | inheritance - - def to_dict(self) -> dict: - """Convert to dictionary.""" - return asdict(self) - - -@dataclass -class GroupedReferences: - """References grouped by definition. - - Used when a symbol has multiple definitions (e.g., overloads). - - Attributes: - definition: The definition this group refers to - references: List of references to this definition - """ - definition: DefinitionResult - references: List[ReferenceResult] = field(default_factory=list) - - def to_dict(self) -> dict: - """Convert to dictionary.""" - return { - "definition": self.definition.to_dict(), - "references": [r.to_dict() for r in self.references], - } - - -# ============================================================================= -# Section 4.5: workspace_symbols dataclasses -# ============================================================================= - -@dataclass -class SymbolInfo: - """Symbol information for workspace search. - - Attributes: - name: Symbol name - kind: Symbol kind - file_path: File where symbol is defined - line: Line number - container: Containing class/module (if any) - score: Match score for ranking - """ - name: str - kind: str - file_path: str - line: int - container: Optional[str] = None - score: float = 1.0 - - def to_dict(self) -> dict: - """Convert to dictionary, filtering None values.""" - return {k: v for k, v in asdict(self).items() if v is not None} - - -# ============================================================================= -# Section 4.6: get_hover dataclasses -# ============================================================================= - -@dataclass -class HoverInfo: - """Hover information for a symbol. - - Attributes: - name: Symbol name - kind: Symbol kind - signature: Symbol signature - documentation: Documentation string (if available) - file_path: File where symbol is defined - line_range: Start and end line numbers - type_info: Type information (if available) - """ - name: str - kind: str - signature: str - documentation: Optional[str] - file_path: str - line_range: Tuple[int, int] - type_info: Optional[str] = None - - def to_dict(self) -> dict: - """Convert to dictionary, filtering None values.""" - result = { - "name": self.name, - "kind": self.kind, - "signature": self.signature, - "file_path": self.file_path, - "line_range": list(self.line_range), - } - if self.documentation is not None: - result["documentation"] = self.documentation - if self.type_info is not None: - result["type_info"] = self.type_info - return result - - -# ============================================================================= -# Section 4.7: semantic_search dataclasses -# ============================================================================= - -@dataclass -class SemanticResult: - """Semantic search result. - - Attributes: - symbol_name: Name of the matched symbol - kind: Symbol kind - file_path: File where symbol is defined - line: Line number - vector_score: Vector similarity score (None if not available) - structural_score: Structural match score (None if not available) - fusion_score: Combined fusion score - snippet: Code snippet - match_reason: Explanation of why this matched (optional) - """ - symbol_name: str - kind: str - file_path: str - line: int - vector_score: Optional[float] - structural_score: Optional[float] - fusion_score: float - snippet: str - match_reason: Optional[str] = None - - def to_dict(self) -> dict: - """Convert to dictionary, filtering None values.""" - return {k: v for k, v in asdict(self).items() if v is not None} diff --git a/codex-lens/src/codexlens/api/references.py b/codex-lens/src/codexlens/api/references.py deleted file mode 100644 index 2e3f5f1e..00000000 --- a/codex-lens/src/codexlens/api/references.py +++ /dev/null @@ -1,345 +0,0 @@ -"""Find references API for codexlens. - -This module implements the find_references() function that wraps -ChainSearchEngine.search_references() with grouped result structure -for multi-definition symbols. -""" - -from __future__ import annotations - -import logging -from pathlib import Path -from typing import List, Optional, Dict - -from .models import ( - DefinitionResult, - ReferenceResult, - GroupedReferences, -) -from .utils import ( - resolve_project, - normalize_relationship_type, -) - - -logger = logging.getLogger(__name__) - - -def _read_line_from_file(file_path: str, line: int) -> str: - """Read a specific line from a file. - - Args: - file_path: Path to the file - line: Line number (1-based) - - Returns: - The line content, stripped of trailing whitespace. - Returns empty string if file cannot be read or line doesn't exist. - """ - try: - path = Path(file_path) - if not path.exists(): - return "" - - with path.open("r", encoding="utf-8", errors="replace") as f: - for i, content in enumerate(f, 1): - if i == line: - return content.rstrip() - return "" - except Exception as exc: - logger.debug("Failed to read line %d from %s: %s", line, file_path, exc) - return "" - - -def _transform_to_reference_result( - raw_ref: "RawReferenceResult", -) -> ReferenceResult: - """Transform raw ChainSearchEngine reference to API ReferenceResult. - - Args: - raw_ref: Raw reference result from ChainSearchEngine - - Returns: - API ReferenceResult with context_line and normalized relationship - """ - # Read the actual line from the file - context_line = _read_line_from_file(raw_ref.file_path, raw_ref.line) - - # Normalize relationship type - relationship = normalize_relationship_type(raw_ref.relationship_type) - - return ReferenceResult( - file_path=raw_ref.file_path, - line=raw_ref.line, - column=raw_ref.column, - context_line=context_line, - relationship=relationship, - ) - - -def find_references( - project_root: str, - symbol_name: str, - symbol_kind: Optional[str] = None, - include_definition: bool = True, - group_by_definition: bool = True, - limit: int = 100, -) -> List[GroupedReferences]: - """Find all reference locations for a symbol. - - Multi-definition case returns grouped results to resolve ambiguity. - - This function wraps ChainSearchEngine.search_references() and groups - the results by definition location. Each GroupedReferences contains - a definition and all references that point to it. - - Args: - project_root: Project root directory path - symbol_name: Name of the symbol to find references for - symbol_kind: Optional symbol kind filter (e.g., 'function', 'class') - include_definition: Whether to include the definition location - in the result (default True) - group_by_definition: Whether to group references by definition. - If False, returns a single group with all references. - (default True) - limit: Maximum number of references to return (default 100) - - Returns: - List of GroupedReferences. Each group contains: - - definition: The DefinitionResult for this symbol definition - - references: List of ReferenceResult pointing to this definition - - Raises: - ValueError: If project_root does not exist or is not a directory - - Examples: - >>> refs = find_references("/path/to/project", "authenticate") - >>> for group in refs: - ... print(f"Definition: {group.definition.file_path}:{group.definition.line}") - ... for ref in group.references: - ... print(f" Reference: {ref.file_path}:{ref.line} ({ref.relationship})") - - Note: - Reference relationship types are normalized: - - 'calls' -> 'call' - - 'imports' -> 'import' - - 'inherits' -> 'inheritance' - """ - # Validate and resolve project root - project_path = resolve_project(project_root) - - # Import here to avoid circular imports - from codexlens.config import Config - from codexlens.storage.registry import RegistryStore - from codexlens.storage.path_mapper import PathMapper - from codexlens.storage.global_index import GlobalSymbolIndex - from codexlens.search.chain_search import ChainSearchEngine - from codexlens.search.chain_search import ReferenceResult as RawReferenceResult - from codexlens.entities import Symbol - - # Initialize infrastructure - config = Config() - registry = RegistryStore() - mapper = PathMapper(config.index_dir) - - # Create chain search engine - engine = ChainSearchEngine(registry, mapper, config=config) - - try: - # Step 1: Find definitions for the symbol - definitions: List[DefinitionResult] = [] - - if include_definition or group_by_definition: - # Search for symbol definitions - symbols = engine.search_symbols( - name=symbol_name, - source_path=project_path, - kind=symbol_kind, - ) - - # Convert Symbol to DefinitionResult - for sym in symbols: - # Only include exact name matches for definitions - if sym.name != symbol_name: - continue - - # Optionally filter by kind - if symbol_kind and sym.kind != symbol_kind: - continue - - definitions.append(DefinitionResult( - name=sym.name, - kind=sym.kind, - file_path=sym.file or "", - line=sym.range[0] if sym.range else 1, - end_line=sym.range[1] if sym.range else 1, - signature=None, # Not available from Symbol - container=None, # Not available from Symbol - score=1.0, - )) - - # Step 2: Get all references using ChainSearchEngine - raw_references = engine.search_references( - symbol_name=symbol_name, - source_path=project_path, - depth=-1, - limit=limit, - ) - - # Step 3: Transform raw references to API ReferenceResult - api_references: List[ReferenceResult] = [] - for raw_ref in raw_references: - api_ref = _transform_to_reference_result(raw_ref) - api_references.append(api_ref) - - # Step 4: Group references by definition - if group_by_definition and definitions: - return _group_references_by_definition( - definitions=definitions, - references=api_references, - include_definition=include_definition, - ) - else: - # Return single group with placeholder definition or first definition - if definitions: - definition = definitions[0] - else: - # Create placeholder definition when no definition found - definition = DefinitionResult( - name=symbol_name, - kind=symbol_kind or "unknown", - file_path="", - line=0, - end_line=0, - signature=None, - container=None, - score=0.0, - ) - - return [GroupedReferences( - definition=definition, - references=api_references, - )] - - finally: - engine.close() - - -def _group_references_by_definition( - definitions: List[DefinitionResult], - references: List[ReferenceResult], - include_definition: bool = True, -) -> List[GroupedReferences]: - """Group references by their likely definition. - - Uses file proximity heuristic to assign references to definitions. - References in the same file or directory as a definition are - assigned to that definition. - - Args: - definitions: List of definition locations - references: List of reference locations - include_definition: Whether to include definition in results - - Returns: - List of GroupedReferences with references assigned to definitions - """ - import os - - if not definitions: - return [] - - if len(definitions) == 1: - # Single definition - all references belong to it - return [GroupedReferences( - definition=definitions[0], - references=references, - )] - - # Multiple definitions - group by proximity - groups: Dict[int, List[ReferenceResult]] = { - i: [] for i in range(len(definitions)) - } - - for ref in references: - # Find the closest definition by file proximity - best_def_idx = 0 - best_score = -1 - - for i, defn in enumerate(definitions): - score = _proximity_score(ref.file_path, defn.file_path) - if score > best_score: - best_score = score - best_def_idx = i - - groups[best_def_idx].append(ref) - - # Build result groups - result: List[GroupedReferences] = [] - for i, defn in enumerate(definitions): - # Skip definitions with no references if not including definition itself - if not include_definition and not groups[i]: - continue - - result.append(GroupedReferences( - definition=defn, - references=groups[i], - )) - - return result - - -def _proximity_score(ref_path: str, def_path: str) -> int: - """Calculate proximity score between two file paths. - - Args: - ref_path: Reference file path - def_path: Definition file path - - Returns: - Proximity score (higher = closer): - - Same file: 1000 - - Same directory: 100 - - Otherwise: common path prefix length - """ - import os - - if not ref_path or not def_path: - return 0 - - # Normalize paths - ref_path = os.path.normpath(ref_path) - def_path = os.path.normpath(def_path) - - # Same file - if ref_path == def_path: - return 1000 - - ref_dir = os.path.dirname(ref_path) - def_dir = os.path.dirname(def_path) - - # Same directory - if ref_dir == def_dir: - return 100 - - # Common path prefix - try: - common = os.path.commonpath([ref_path, def_path]) - return len(common) - except ValueError: - # No common path (different drives on Windows) - return 0 - - -# Type alias for the raw reference from ChainSearchEngine -class RawReferenceResult: - """Type stub for ChainSearchEngine.ReferenceResult. - - This is only used for type hints and is replaced at runtime - by the actual import. - """ - file_path: str - line: int - column: int - context: str - relationship_type: str diff --git a/codex-lens/src/codexlens/api/semantic.py b/codex-lens/src/codexlens/api/semantic.py deleted file mode 100644 index c442364f..00000000 --- a/codex-lens/src/codexlens/api/semantic.py +++ /dev/null @@ -1,482 +0,0 @@ -"""Semantic search API with RRF fusion. - -This module provides the semantic_search() function for combining -vector, structural, and keyword search with configurable fusion strategies. -""" - -from __future__ import annotations - -import logging -from pathlib import Path -from typing import List, Optional - -from .models import SemanticResult -from .utils import resolve_project - -logger = logging.getLogger(__name__) - - -def semantic_search( - project_root: str, - query: str, - mode: str = "fusion", - vector_weight: float = 0.5, - structural_weight: float = 0.3, - keyword_weight: float = 0.2, - fusion_strategy: str = "rrf", - staged_stage2_mode: Optional[str] = None, - kind_filter: Optional[List[str]] = None, - limit: int = 20, - include_match_reason: bool = False, -) -> List[SemanticResult]: - """Semantic search - combining vector and structural search. - - This function provides a high-level API for semantic code search, - combining vector similarity, structural (symbol + relationships), - and keyword-based search methods with configurable fusion. - - Args: - project_root: Project root directory - query: Natural language query - mode: Search mode - - vector: Vector search only - - structural: Structural search only (symbol + relationships) - - fusion: Fusion search (default) - vector_weight: Vector search weight [0, 1] (default 0.5) - structural_weight: Structural search weight [0, 1] (default 0.3) - keyword_weight: Keyword search weight [0, 1] (default 0.2) - fusion_strategy: Fusion strategy (maps to chain_search.py) - - rrf: Reciprocal Rank Fusion (recommended, default) - - staged: Staged cascade -> staged_cascade_search - - binary: Binary rerank cascade -> binary_cascade_search - - hybrid: Binary rerank cascade (backward compat) -> binary_rerank_cascade_search - - dense_rerank: Dense rerank cascade -> dense_rerank_cascade_search - staged_stage2_mode: Optional override for staged Stage-2 expansion mode - - precomputed: GraphExpander over per-dir graph_neighbors (default) - - realtime: Live LSP expansion (requires LSP availability) - - static_global_graph: GlobalGraphExpander over global_relationships - kind_filter: Symbol type filter (e.g., ["function", "class"]) - limit: Max return count (default 20) - include_match_reason: Generate match reason (heuristic, not LLM) - - Returns: - Results sorted by fusion_score - - Degradation: - - No vector index: vector_score=None, uses FTS + structural search - - No relationship data: structural_score=None, vector search only - - Examples: - >>> results = semantic_search( - ... "/path/to/project", - ... "authentication handler", - ... mode="fusion", - ... fusion_strategy="rrf" - ... ) - >>> for r in results: - ... print(f"{r.symbol_name}: {r.fusion_score:.3f}") - """ - # Validate and resolve project path - project_path = resolve_project(project_root) - - # Normalize weights to sum to 1.0 - total_weight = vector_weight + structural_weight + keyword_weight - if total_weight > 0: - vector_weight = vector_weight / total_weight - structural_weight = structural_weight / total_weight - keyword_weight = keyword_weight / total_weight - else: - # Default to equal weights if all zero - vector_weight = structural_weight = keyword_weight = 1.0 / 3.0 - - # Initialize search infrastructure - try: - from codexlens.config import Config - from codexlens.storage.registry import RegistryStore - from codexlens.storage.path_mapper import PathMapper - from codexlens.search.chain_search import ChainSearchEngine, SearchOptions - except ImportError as exc: - logger.error("Failed to import search dependencies: %s", exc) - return [] - - # Load config - config = Config.load() - - # Optional per-call override for staged cascade Stage-2 mode. - if staged_stage2_mode: - stage2 = str(staged_stage2_mode).strip().lower() - if stage2 in {"live"}: - stage2 = "realtime" - valid_stage2 = {"precomputed", "realtime", "static_global_graph"} - if stage2 in valid_stage2: - config.staged_stage2_mode = stage2 - else: - logger.debug("Ignoring invalid staged_stage2_mode: %r", staged_stage2_mode) - - # Get or create registry and mapper - # Build search options based on mode - search_options = _build_search_options( - mode=mode, - vector_weight=vector_weight, - structural_weight=structural_weight, - keyword_weight=keyword_weight, - limit=limit, - ) - - # Execute search based on fusion_strategy - try: - with RegistryStore() as registry: - mapper = PathMapper() - with ChainSearchEngine(registry, mapper, config=config) as engine: - chain_result = _execute_search( - engine=engine, - query=query, - source_path=project_path, - fusion_strategy=fusion_strategy, - options=search_options, - limit=limit, - ) - except Exception as exc: - logger.error("Search execution failed: %s", exc) - return [] - - # Transform results to SemanticResult - semantic_results = _transform_results( - results=chain_result.results, - mode=mode, - vector_weight=vector_weight, - structural_weight=structural_weight, - keyword_weight=keyword_weight, - kind_filter=kind_filter, - include_match_reason=include_match_reason, - query=query, - ) - - return semantic_results[:limit] - - -def _build_search_options( - mode: str, - vector_weight: float, - structural_weight: float, - keyword_weight: float, - limit: int, -) -> "SearchOptions": - """Build SearchOptions based on mode and weights. - - Args: - mode: Search mode (vector, structural, fusion) - vector_weight: Vector search weight - structural_weight: Structural search weight - keyword_weight: Keyword search weight - limit: Result limit - - Returns: - Configured SearchOptions - """ - from codexlens.search.chain_search import SearchOptions - - # Default options - options = SearchOptions( - total_limit=limit * 2, # Fetch extra for filtering - limit_per_dir=limit, - include_symbols=True, # Always include symbols for structural - ) - - if mode == "vector": - # Pure vector mode - options.hybrid_mode = True - options.enable_vector = True - options.pure_vector = True - options.enable_fuzzy = False - elif mode == "structural": - # Structural only - use FTS + symbols - options.hybrid_mode = True - options.enable_vector = False - options.enable_fuzzy = True - options.include_symbols = True - else: - # Fusion mode (default) - options.hybrid_mode = True - options.enable_vector = vector_weight > 0 - options.enable_fuzzy = keyword_weight > 0 - options.include_symbols = structural_weight > 0 - - # Set custom weights for RRF - if options.enable_vector and keyword_weight > 0: - options.hybrid_weights = { - "vector": vector_weight, - "exact": keyword_weight * 0.7, - "fuzzy": keyword_weight * 0.3, - } - - return options - - -def _execute_search( - engine: "ChainSearchEngine", - query: str, - source_path: Path, - fusion_strategy: str, - options: "SearchOptions", - limit: int, -) -> "ChainSearchResult": - """Execute search using appropriate strategy. - - Maps fusion_strategy to ChainSearchEngine methods: - - rrf: Standard hybrid search with RRF fusion - - staged: staged_cascade_search - - binary: binary_cascade_search - - hybrid: binary_rerank_cascade_search (backward compat) - - dense_rerank: dense_rerank_cascade_search - - Args: - engine: ChainSearchEngine instance - query: Search query - source_path: Project root path - fusion_strategy: Strategy name - options: Search options - limit: Result limit - - Returns: - ChainSearchResult from the search - """ - from codexlens.search.chain_search import ChainSearchResult - - if fusion_strategy == "staged": - # Use staged cascade search (4-stage pipeline) - return engine.staged_cascade_search( - query=query, - source_path=source_path, - k=limit, - coarse_k=limit * 5, - options=options, - ) - elif fusion_strategy == "binary": - # Use binary cascade search (binary coarse + dense fine) - return engine.binary_cascade_search( - query=query, - source_path=source_path, - k=limit, - coarse_k=limit * 5, - options=options, - ) - elif fusion_strategy == "hybrid": - # Backward compat: hybrid now maps to binary_rerank_cascade_search - return engine.binary_rerank_cascade_search( - query=query, - source_path=source_path, - k=limit, - coarse_k=limit * 5, - options=options, - ) - else: - # Default: rrf - Standard search with RRF fusion - return engine.search( - query=query, - source_path=source_path, - options=options, - ) - - -def _transform_results( - results: List, - mode: str, - vector_weight: float, - structural_weight: float, - keyword_weight: float, - kind_filter: Optional[List[str]], - include_match_reason: bool, - query: str, -) -> List[SemanticResult]: - """Transform ChainSearchEngine results to SemanticResult. - - Args: - results: List of SearchResult objects - mode: Search mode - vector_weight: Vector weight used - structural_weight: Structural weight used - keyword_weight: Keyword weight used - kind_filter: Optional symbol kind filter - include_match_reason: Whether to generate match reasons - query: Original query (for match reason generation) - - Returns: - List of SemanticResult objects - """ - semantic_results = [] - - for result in results: - # Extract symbol info - symbol_name = getattr(result, "symbol_name", None) - symbol_kind = getattr(result, "symbol_kind", None) - start_line = getattr(result, "start_line", None) - - # Use symbol object if available - if hasattr(result, "symbol") and result.symbol: - symbol_name = symbol_name or result.symbol.name - symbol_kind = symbol_kind or result.symbol.kind - if hasattr(result.symbol, "range") and result.symbol.range: - start_line = start_line or result.symbol.range[0] - - # Filter by kind if specified - if kind_filter and symbol_kind: - if symbol_kind.lower() not in [k.lower() for k in kind_filter]: - continue - - # Determine scores based on mode and metadata - metadata = getattr(result, "metadata", {}) or {} - fusion_score = result.score - - # Try to extract source scores from metadata - source_scores = metadata.get("source_scores", {}) - vector_score: Optional[float] = None - structural_score: Optional[float] = None - - if mode == "vector": - # In pure vector mode, the main score is the vector score - vector_score = result.score - structural_score = None - elif mode == "structural": - # In structural mode, no vector score - vector_score = None - structural_score = result.score - else: - # Fusion mode - try to extract individual scores - if "vector" in source_scores: - vector_score = source_scores["vector"] - elif metadata.get("fusion_method") == "simple_weighted": - # From weighted fusion - vector_score = source_scores.get("vector") - - # Structural score approximation (from exact/fuzzy FTS) - fts_scores = [] - if "exact" in source_scores: - fts_scores.append(source_scores["exact"]) - if "fuzzy" in source_scores: - fts_scores.append(source_scores["fuzzy"]) - - if fts_scores: - structural_score = max(fts_scores) - - # Build snippet - snippet = getattr(result, "excerpt", "") or getattr(result, "content", "") - if len(snippet) > 500: - snippet = snippet[:500] + "..." - - # Generate match reason if requested - match_reason = None - if include_match_reason: - match_reason = _generate_match_reason( - query=query, - symbol_name=symbol_name, - symbol_kind=symbol_kind, - snippet=snippet, - vector_score=vector_score, - structural_score=structural_score, - ) - - semantic_result = SemanticResult( - symbol_name=symbol_name or Path(result.path).stem, - kind=symbol_kind or "unknown", - file_path=result.path, - line=start_line or 1, - vector_score=vector_score, - structural_score=structural_score, - fusion_score=fusion_score, - snippet=snippet, - match_reason=match_reason, - ) - - semantic_results.append(semantic_result) - - # Sort by fusion_score descending - semantic_results.sort(key=lambda r: r.fusion_score, reverse=True) - - return semantic_results - - -def _generate_match_reason( - query: str, - symbol_name: Optional[str], - symbol_kind: Optional[str], - snippet: str, - vector_score: Optional[float], - structural_score: Optional[float], -) -> str: - """Generate human-readable match reason heuristically. - - This is a simple heuristic-based approach, not LLM-powered. - - Args: - query: Original search query - symbol_name: Symbol name if available - symbol_kind: Symbol kind if available - snippet: Code snippet - vector_score: Vector similarity score - structural_score: Structural match score - - Returns: - Human-readable explanation string - """ - reasons = [] - - # Check for direct name match - query_lower = query.lower() - query_words = set(query_lower.split()) - - if symbol_name: - name_lower = symbol_name.lower() - # Direct substring match - if query_lower in name_lower or name_lower in query_lower: - reasons.append(f"Symbol name '{symbol_name}' matches query") - # Word overlap - name_words = set(_split_camel_case(symbol_name).lower().split()) - overlap = query_words & name_words - if overlap and not reasons: - reasons.append(f"Symbol name contains: {', '.join(overlap)}") - - # Check snippet for keyword matches - snippet_lower = snippet.lower() - matching_words = [w for w in query_words if w in snippet_lower and len(w) > 2] - if matching_words and len(reasons) < 2: - reasons.append(f"Code contains keywords: {', '.join(matching_words[:3])}") - - # Add score-based reasoning - if vector_score is not None and vector_score > 0.7: - reasons.append("High semantic similarity") - elif vector_score is not None and vector_score > 0.5: - reasons.append("Moderate semantic similarity") - - if structural_score is not None and structural_score > 0.8: - reasons.append("Strong structural match") - - # Symbol kind context - if symbol_kind and len(reasons) < 3: - reasons.append(f"Matched {symbol_kind}") - - if not reasons: - reasons.append("Partial relevance based on content analysis") - - return "; ".join(reasons[:3]) - - -def _split_camel_case(name: str) -> str: - """Split camelCase and PascalCase to words. - - Args: - name: Symbol name in camelCase or PascalCase - - Returns: - Space-separated words - """ - import re - - # Insert space before uppercase letters - result = re.sub(r"([a-z])([A-Z])", r"\1 \2", name) - # Insert space before uppercase followed by lowercase - result = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1 \2", result) - # Replace underscores with spaces - result = result.replace("_", " ") - - return result diff --git a/codex-lens/src/codexlens/api/symbols.py b/codex-lens/src/codexlens/api/symbols.py deleted file mode 100644 index 8faf248f..00000000 --- a/codex-lens/src/codexlens/api/symbols.py +++ /dev/null @@ -1,146 +0,0 @@ -"""workspace_symbols API implementation. - -This module provides the workspace_symbols() function for searching -symbols across the entire workspace with prefix matching. -""" - -from __future__ import annotations - -import fnmatch -import logging -from pathlib import Path -from typing import List, Optional - -from ..entities import Symbol -from ..storage.global_index import GlobalSymbolIndex -from ..storage.registry import RegistryStore -from ..errors import IndexNotFoundError -from .models import SymbolInfo -from .utils import resolve_project - -logger = logging.getLogger(__name__) - - -def workspace_symbols( - project_root: str, - query: str, - kind_filter: Optional[List[str]] = None, - file_pattern: Optional[str] = None, - limit: int = 50 -) -> List[SymbolInfo]: - """Search for symbols across the entire workspace. - - Uses prefix matching for efficient searching. - - Args: - project_root: Project root directory (for index location) - query: Search query (prefix match) - kind_filter: Optional list of symbol kinds to include - (e.g., ["class", "function"]) - file_pattern: Optional glob pattern to filter by file path - (e.g., "*.py", "src/**/*.ts") - limit: Maximum number of results to return - - Returns: - List of SymbolInfo sorted by score - - Raises: - IndexNotFoundError: If project is not indexed - """ - project_path = resolve_project(project_root) - - # Get project info from registry - registry = RegistryStore() - project_info = registry.get_project(project_path) - if project_info is None: - raise IndexNotFoundError(f"Project not indexed: {project_path}") - - # Open global symbol index - index_db = project_info.index_root / "_global_symbols.db" - if not index_db.exists(): - raise IndexNotFoundError(f"Global symbol index not found: {index_db}") - - global_index = GlobalSymbolIndex(str(index_db), project_info.id) - - # Search with prefix matching - # If kind_filter has multiple kinds, we need to search for each - all_results: List[Symbol] = [] - - if kind_filter and len(kind_filter) > 0: - # Search for each kind separately - for kind in kind_filter: - results = global_index.search( - name=query, - kind=kind, - limit=limit, - prefix_mode=True - ) - all_results.extend(results) - else: - # Search without kind filter - all_results = global_index.search( - name=query, - kind=None, - limit=limit, - prefix_mode=True - ) - - logger.debug(f"Found {len(all_results)} symbols matching '{query}'") - - # Apply file pattern filter if specified - if file_pattern: - all_results = [ - sym for sym in all_results - if sym.file and fnmatch.fnmatch(sym.file, file_pattern) - ] - logger.debug(f"After file filter '{file_pattern}': {len(all_results)} symbols") - - # Convert to SymbolInfo and sort by relevance - symbols = [ - SymbolInfo( - name=sym.name, - kind=sym.kind, - file_path=sym.file or "", - line=sym.range[0] if sym.range else 1, - container=None, # Could extract from parent - score=_calculate_score(sym.name, query) - ) - for sym in all_results - ] - - # Sort by score (exact matches first) - symbols.sort(key=lambda s: s.score, reverse=True) - - return symbols[:limit] - - -def _calculate_score(symbol_name: str, query: str) -> float: - """Calculate relevance score for a symbol match. - - Scoring: - - Exact match: 1.0 - - Prefix match: 0.8 + 0.2 * (query_len / symbol_len) - - Case-insensitive match: 0.6 - - Args: - symbol_name: The matched symbol name - query: The search query - - Returns: - Score between 0.0 and 1.0 - """ - if symbol_name == query: - return 1.0 - - if symbol_name.lower() == query.lower(): - return 0.9 - - if symbol_name.startswith(query): - ratio = len(query) / len(symbol_name) - return 0.8 + 0.2 * ratio - - if symbol_name.lower().startswith(query.lower()): - ratio = len(query) / len(symbol_name) - return 0.6 + 0.2 * ratio - - return 0.5 diff --git a/codex-lens/src/codexlens/api/utils.py b/codex-lens/src/codexlens/api/utils.py deleted file mode 100644 index 3621533a..00000000 --- a/codex-lens/src/codexlens/api/utils.py +++ /dev/null @@ -1,153 +0,0 @@ -"""Utility functions for the codexlens API. - -This module provides helper functions for: -- Project resolution -- Relationship type normalization -- Result ranking by proximity -""" - -from __future__ import annotations - -import os -from pathlib import Path -from typing import List, Optional, TypeVar, Callable - -from .models import DefinitionResult - - -# Type variable for generic ranking -T = TypeVar('T') - - -def resolve_project(project_root: str) -> Path: - """Resolve and validate project root path. - - Args: - project_root: Path to project root (relative or absolute) - - Returns: - Resolved absolute Path - - Raises: - ValueError: If path does not exist or is not a directory - """ - path = Path(project_root).resolve() - if not path.exists(): - raise ValueError(f"Project root does not exist: {path}") - if not path.is_dir(): - raise ValueError(f"Project root is not a directory: {path}") - return path - - -# Relationship type normalization mapping -_RELATIONSHIP_NORMALIZATION = { - # Plural to singular - "calls": "call", - "imports": "import", - "inherits": "inheritance", - "uses": "use", - # Already normalized (passthrough) - "call": "call", - "import": "import", - "inheritance": "inheritance", - "use": "use", - "type_annotation": "type_annotation", -} - - -def normalize_relationship_type(relationship: str) -> str: - """Normalize relationship type to canonical form. - - Converts plural forms and variations to standard singular forms: - - 'calls' -> 'call' - - 'imports' -> 'import' - - 'inherits' -> 'inheritance' - - 'uses' -> 'use' - - Args: - relationship: Raw relationship type string - - Returns: - Normalized relationship type - - Examples: - >>> normalize_relationship_type('calls') - 'call' - >>> normalize_relationship_type('inherits') - 'inheritance' - >>> normalize_relationship_type('call') - 'call' - """ - return _RELATIONSHIP_NORMALIZATION.get(relationship.lower(), relationship) - - -def rank_by_proximity( - results: List[DefinitionResult], - file_context: Optional[str] = None -) -> List[DefinitionResult]: - """Rank results by file path proximity to context. - - V1 Implementation: Uses path-based proximity scoring. - - Scoring algorithm: - 1. Same directory: highest score (100) - 2. Otherwise: length of common path prefix - - Args: - results: List of definition results to rank - file_context: Reference file path for proximity calculation. - If None, returns results unchanged. - - Returns: - Results sorted by proximity score (highest first) - - Examples: - >>> results = [ - ... DefinitionResult(name="foo", kind="function", - ... file_path="/a/b/c.py", line=1, end_line=10), - ... DefinitionResult(name="foo", kind="function", - ... file_path="/a/x/y.py", line=1, end_line=10), - ... ] - >>> ranked = rank_by_proximity(results, "/a/b/test.py") - >>> ranked[0].file_path - '/a/b/c.py' - """ - if not file_context or not results: - return results - - def proximity_score(result: DefinitionResult) -> int: - """Calculate proximity score for a result.""" - result_dir = os.path.dirname(result.file_path) - context_dir = os.path.dirname(file_context) - - # Same directory gets highest score - if result_dir == context_dir: - return 100 - - # Otherwise, score by common path prefix length - try: - common = os.path.commonpath([result.file_path, file_context]) - return len(common) - except ValueError: - # No common path (different drives on Windows) - return 0 - - return sorted(results, key=proximity_score, reverse=True) - - -def rank_by_score( - results: List[T], - score_fn: Callable[[T], float], - reverse: bool = True -) -> List[T]: - """Generic ranking function by custom score. - - Args: - results: List of items to rank - score_fn: Function to extract score from item - reverse: If True, highest scores first (default) - - Returns: - Sorted list - """ - return sorted(results, key=score_fn, reverse=reverse) diff --git a/codex-lens/src/codexlens/cli/__init__.py b/codex-lens/src/codexlens/cli/__init__.py deleted file mode 100644 index 18523b4c..00000000 --- a/codex-lens/src/codexlens/cli/__init__.py +++ /dev/null @@ -1,27 +0,0 @@ -"""CLI package for CodexLens.""" - -from __future__ import annotations - -import sys -import os - -# Force UTF-8 encoding for Windows console -# This ensures Chinese characters display correctly instead of GBK garbled text -if sys.platform == "win32": - # Set environment variable for Python I/O encoding - os.environ.setdefault("PYTHONIOENCODING", "utf-8") - - # Reconfigure stdout/stderr to use UTF-8 if possible - try: - if hasattr(sys.stdout, "reconfigure"): - sys.stdout.reconfigure(encoding="utf-8", errors="replace") - if hasattr(sys.stderr, "reconfigure"): - sys.stderr.reconfigure(encoding="utf-8", errors="replace") - except Exception: - # Fallback: some environments don't support reconfigure - pass - -from .commands import app - -__all__ = ["app"] - diff --git a/codex-lens/src/codexlens/cli/commands.py b/codex-lens/src/codexlens/cli/commands.py deleted file mode 100644 index 2f49e706..00000000 --- a/codex-lens/src/codexlens/cli/commands.py +++ /dev/null @@ -1,4942 +0,0 @@ -"""Typer commands for CodexLens.""" - -from __future__ import annotations - -import inspect -import json -import logging -import os -import re -import shutil -import subprocess -from pathlib import Path -from typing import Annotated, Any, Dict, Iterable, List, Optional - -import typer -from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeElapsedColumn -from rich.table import Table - -from codexlens.config import Config -from codexlens.entities import IndexedFile, SearchResult, Symbol -from codexlens.errors import CodexLensError, ConfigError, ParseError, StorageError, SearchError -from codexlens.parsers.factory import ParserFactory -from codexlens.storage.path_mapper import PathMapper -from codexlens.storage.registry import RegistryStore, ProjectInfo -from codexlens.storage.index_tree import IndexTreeBuilder -from codexlens.storage.dir_index import DirIndexStore -from codexlens.search.chain_search import ChainSearchEngine, SearchOptions -from codexlens.search.ranking import ( - QueryIntent, - apply_path_penalties, - detect_query_intent, - query_prefers_lexical_search, - query_targets_generated_files, -) -from codexlens.watcher import WatcherManager, WatcherConfig - -from .output import ( - console, - print_json, - render_file_inspect, - render_search_results, - render_status, - render_symbols, -) - -app = typer.Typer(help="CodexLens CLI — local code indexing and search.") -# Index subcommand group for reorganized commands -def _patch_typer_click_help_compat() -> None: - """Patch Typer help rendering for Click versions that pass ctx to make_metavar().""" - import click.core - from typer.core import TyperArgument - - try: - params = inspect.signature(TyperArgument.make_metavar).parameters - except (TypeError, ValueError): - return - - if len(params) != 1: - return - - def _compat_make_metavar(self, ctx=None): # type: ignore[override] - if self.metavar is not None: - return self.metavar - - var = (self.name or "").upper() - if not self.required: - var = f"[{var}]" - - try: - type_var = self.type.get_metavar(param=self, ctx=ctx) - except TypeError: - try: - type_var = self.type.get_metavar(self, ctx) - except TypeError: - type_var = self.type.get_metavar(self) - - if type_var: - var += f":{type_var}" - if self.nargs != 1: - var += "..." - return var - - TyperArgument.make_metavar = _compat_make_metavar - - param_params = inspect.signature(click.core.Parameter.make_metavar).parameters - if len(param_params) == 2: - original_param_make_metavar = click.core.Parameter.make_metavar - - def _compat_param_make_metavar(self, ctx=None): # type: ignore[override] - return original_param_make_metavar(self, ctx) - - click.core.Parameter.make_metavar = _compat_param_make_metavar - - -_patch_typer_click_help_compat() - - -# Index subcommand group for reorganized commands -index_app = typer.Typer(help="Index management commands (init, embeddings, binary, status, migrate, all)") -app.add_typer(index_app, name="index") - - -def _deprecated_command_warning(old_name: str, new_name: str) -> None: - """Display deprecation warning for renamed commands. - - Args: - old_name: The old command name being deprecated - new_name: The new command name to use instead - """ - console.print( - f"[yellow]Warning:[/yellow] '{old_name}' is deprecated. " - f"Use '{new_name}' instead." - ) - - -def _configure_logging(verbose: bool, json_mode: bool = False) -> None: - """Configure logging level. - - In JSON mode, suppress INFO logs to keep stderr clean for error parsing. - Only WARNING and above are shown to avoid mixing logs with JSON output. - """ - if json_mode and not verbose: - # In JSON mode, suppress INFO logs to keep stderr clean - level = logging.WARNING - else: - level = logging.DEBUG if verbose else logging.INFO - logging.basicConfig(level=level, format="%(levelname)s %(message)s") - - -def _parse_languages(raw: Optional[List[str]]) -> Optional[List[str]]: - if not raw: - return None - langs: List[str] = [] - for item in raw: - for part in item.split(","): - part = part.strip() - if part: - langs.append(part) - return langs or None - - -def _fail_mutually_exclusive(option_a: str, option_b: str, json_mode: bool) -> None: - msg = f"Options {option_a} and {option_b} are mutually exclusive." - if json_mode: - print_json(success=False, error=msg) - else: - console.print(f"[red]Error:[/red] {msg}") - raise typer.Exit(code=1) - - -def _extract_embedding_error(embed_result: Dict[str, Any]) -> str: - """Best-effort error extraction for embedding generation results.""" - raw_error = embed_result.get("error") - if isinstance(raw_error, str) and raw_error.strip(): - return raw_error.strip() - - result = embed_result.get("result") - if isinstance(result, dict): - details = result.get("details") - if isinstance(details, list): - collected: List[str] = [] - for item in details: - if not isinstance(item, dict): - continue - item_error = item.get("error") - if isinstance(item_error, str) and item_error.strip(): - collected.append(item_error.strip()) - - if collected: - # De-dupe while preserving order, then keep output short. - seen: set[str] = set() - unique: List[str] = [] - for err in collected: - if err not in seen: - seen.add(err) - unique.append(err) - return "; ".join(unique[:3]) - - return "Embedding generation failed (no error details provided)" - - -def _auto_select_search_method(query: str) -> str: - """Choose a default search method from query intent.""" - if query_targets_generated_files(query) or query_prefers_lexical_search(query): - return "fts" - - intent = detect_query_intent(query) - if intent == QueryIntent.KEYWORD: - return "fts" - if intent == QueryIntent.SEMANTIC: - return "dense_rerank" - return "hybrid" - - -_CLI_NON_CODE_EXTENSIONS = { - "md", "txt", "json", "yaml", "yml", "xml", "csv", "log", - "ini", "cfg", "conf", "toml", "env", "properties", - "html", "htm", "svg", "png", "jpg", "jpeg", "gif", "ico", "webp", - "pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx", - "lock", "sum", "mod", -} -_FALLBACK_ARTIFACT_DIRS = { - "dist", - "build", - "out", - "coverage", - "htmlcov", - ".cache", - ".workflow", - ".next", - ".nuxt", - ".parcel-cache", - ".turbo", - "tmp", - "temp", - "generated", -} -_FALLBACK_SOURCE_DIRS = { - "src", - "lib", - "core", - "app", - "server", - "client", - "services", -} - - -def _normalize_extension_filters(exclude_extensions: Optional[Iterable[str]]) -> set[str]: - """Normalize extension filters to lowercase values without leading dots.""" - normalized: set[str] = set() - for ext in exclude_extensions or []: - cleaned = (ext or "").strip().lower().lstrip(".") - if cleaned: - normalized.add(cleaned) - return normalized - - -def _score_filesystem_fallback_match( - query: str, - path_text: str, - line_text: str, - *, - base_score: float, -) -> float: - """Score filesystem fallback hits with light source-aware heuristics.""" - score = max(0.0, float(base_score)) - if score <= 0: - return 0.0 - - query_intent = detect_query_intent(query) - if query_intent != QueryIntent.KEYWORD: - return score - - path_parts = { - part.casefold() - for part in str(path_text).replace("\\", "/").split("/") - if part and part != "." - } - if _FALLBACK_SOURCE_DIRS.intersection(path_parts): - score *= 1.15 - - symbol = (query or "").strip() - if " " in symbol or not symbol: - return score - - escaped_symbol = re.escape(symbol) - definition_patterns = ( - rf"^\s*(?:export\s+)?(?:async\s+)?def\s+{escaped_symbol}\b", - rf"^\s*(?:export\s+)?(?:async\s+)?function\s+{escaped_symbol}\b", - rf"^\s*(?:export\s+)?class\s+{escaped_symbol}\b", - rf"^\s*(?:export\s+)?interface\s+{escaped_symbol}\b", - rf"^\s*(?:export\s+)?type\s+{escaped_symbol}\b", - rf"^\s*(?:export\s+)?(?:const|let|var)\s+{escaped_symbol}\b", - ) - if any(re.search(pattern, line_text) for pattern in definition_patterns): - score *= 1.8 - - return score - - -def _filesystem_fallback_search( - query: str, - search_path: Path, - *, - limit: int, - config: Config, - code_only: bool = False, - exclude_extensions: Optional[Iterable[str]] = None, -) -> Optional[dict[str, Any]]: - """Fallback to ripgrep when indexed keyword search returns no results.""" - rg_path = shutil.which("rg") - if not rg_path or not query.strip(): - return None - - import time - - allow_generated = query_targets_generated_files(query) - ignored_dirs = {name for name in IndexTreeBuilder.IGNORE_DIRS if name} - ignored_dirs.add(".workflow") - if allow_generated: - ignored_dirs.difference_update(_FALLBACK_ARTIFACT_DIRS) - - excluded_exts = _normalize_extension_filters(exclude_extensions) - if code_only: - excluded_exts.update(_CLI_NON_CODE_EXTENSIONS) - - args = [ - rg_path, - "--json", - "--line-number", - "--fixed-strings", - "--smart-case", - "--max-count", - "1", - ] - if allow_generated: - args.append("--hidden") - - for dirname in sorted(ignored_dirs): - args.extend(["--glob", f"!**/{dirname}/**"]) - - args.extend([query, str(search_path)]) - - start_time = time.perf_counter() - proc = subprocess.run( - args, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - encoding="utf-8", - errors="replace", - check=False, - ) - - if proc.returncode not in (0, 1): - return None - - matches: List[SearchResult] = [] - seen_paths: set[str] = set() - for raw_line in proc.stdout.splitlines(): - if len(matches) >= limit: - break - try: - event = json.loads(raw_line) - except json.JSONDecodeError: - continue - if event.get("type") != "match": - continue - - data = event.get("data") or {} - path_text = ((data.get("path") or {}).get("text") or "").strip() - if not path_text or path_text in seen_paths: - continue - - path_obj = Path(path_text) - extension = path_obj.suffix.lower().lstrip(".") - if extension and extension in excluded_exts: - continue - if code_only and config.language_for_path(path_obj) is None: - continue - - line_text = ((data.get("lines") or {}).get("text") or "").rstrip("\r\n") - line_number = data.get("line_number") - seen_paths.add(path_text) - base_score = float(limit - len(matches)) - matches.append( - SearchResult( - path=path_text, - score=_score_filesystem_fallback_match( - query, - path_text, - line_text, - base_score=base_score, - ), - excerpt=line_text.strip() or line_text or path_text, - content=None, - metadata={ - "filesystem_fallback": True, - "backend": "ripgrep-fallback", - "stale_index_suspected": True, - }, - start_line=line_number, - end_line=line_number, - ) - ) - - if not matches: - return None - - matches = apply_path_penalties( - matches, - query, - test_file_penalty=config.test_file_penalty, - generated_file_penalty=config.generated_file_penalty, - ) - return { - "results": matches, - "time_ms": (time.perf_counter() - start_time) * 1000.0, - "fallback": { - "backend": "ripgrep-fallback", - "stale_index_suspected": True, - "reason": "Indexed FTS search returned no results; filesystem fallback used.", - }, - } - - -def _remove_tree_best_effort(target: Path) -> dict[str, Any]: - """Remove a directory tree without aborting on locked files.""" - target = target.resolve() - if not target.exists(): - return { - "removed": True, - "partial": False, - "locked_paths": [], - "errors": [], - "remaining_path": None, - } - - locked_paths: List[str] = [] - errors: List[str] = [] - entries = sorted(target.rglob("*"), key=lambda path: len(path.parts), reverse=True) - - for entry in entries: - try: - if entry.is_dir() and not entry.is_symlink(): - entry.rmdir() - else: - entry.unlink() - except FileNotFoundError: - continue - except PermissionError: - locked_paths.append(str(entry)) - except OSError as exc: - if entry.is_dir(): - continue - errors.append(f"{entry}: {exc}") - - try: - target.rmdir() - except FileNotFoundError: - pass - except PermissionError: - locked_paths.append(str(target)) - except OSError: - pass - - return { - "removed": not target.exists(), - "partial": target.exists(), - "locked_paths": sorted(set(locked_paths)), - "errors": errors, - "remaining_path": str(target) if target.exists() else None, - } - - -def _get_index_root() -> Path: - """Get the index root directory from config or default. - - Priority order: - 1. CODEXLENS_INDEX_DIR environment variable - 2. index_dir from ~/.codexlens/config.json - 3. Default: ~/.codexlens/indexes - """ - env_override = os.getenv("CODEXLENS_INDEX_DIR") - if env_override: - return Path(env_override).expanduser().resolve() - - # Read from config.json - config_file = Path.home() / ".codexlens" / "config.json" - if config_file.exists(): - try: - cfg = json.loads(config_file.read_text(encoding="utf-8")) - if "index_dir" in cfg: - return Path(cfg["index_dir"]).expanduser().resolve() - except (json.JSONDecodeError, OSError): - pass # Fall through to default - - return Path.home() / ".codexlens" / "indexes" - - -def _get_registry_path() -> Path: - """Get the registry database path.""" - env_override = os.getenv("CODEXLENS_DATA_DIR") - if env_override: - return Path(env_override).expanduser().resolve() / "registry.db" - return Path.home() / ".codexlens" / "registry.db" - - -@index_app.command("init") -def index_init( - path: Path = typer.Argument(Path("."), exists=True, file_okay=False, dir_okay=True, help="Project root to index."), - language: Optional[List[str]] = typer.Option( - None, - "--language", - "-l", - help="Limit indexing to specific languages (repeat or comma-separated).", - ), - workers: Optional[int] = typer.Option(None, "--workers", "-w", min=1, help="Parallel worker processes (default: auto-detect based on CPU count)."), - force: bool = typer.Option(False, "--force", "-f", help="Force full reindex (skip incremental mode)."), - no_embeddings: bool = typer.Option(False, "--no-embeddings", help="Skip automatic embedding generation (if semantic deps installed)."), - backend: Optional[str] = typer.Option(None, "--backend", "-b", help="Embedding backend: fastembed (local) or litellm (remote API). Defaults to settings.json config."), - model: Optional[str] = typer.Option(None, "--model", "-m", help="Embedding model: profile name for fastembed or model name for litellm. Defaults to settings.json config."), - use_astgrep: bool = typer.Option( - False, - "--use-astgrep", - help="Prefer ast-grep parsers when available (experimental). Overrides settings.json config.", - ), - no_use_astgrep: bool = typer.Option( - False, - "--no-use-astgrep", - help="Disable ast-grep parsers. Overrides settings.json config.", - ), - static_graph: bool = typer.Option( - False, - "--static-graph", - help="Persist global relationships during indexing for static graph expansion. Overrides settings.json config.", - ), - no_static_graph: bool = typer.Option( - False, - "--no-static-graph", - help="Disable persisting global relationships. Overrides settings.json config.", - ), - static_graph_types: Optional[str] = typer.Option( - None, - "--static-graph-types", - help="Comma-separated relationship types to persist: imports,inherits,calls. Overrides settings.json config.", - ), - max_workers: int = typer.Option(1, "--max-workers", min=1, help="Max concurrent API calls for embedding generation. Recommended: 4-8 for litellm backend."), - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), - verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."), -) -> None: - """Initialize or rebuild the index for a directory. - - Indexes are stored in ~/.codexlens/indexes/ with mirrored directory structure. - Set CODEXLENS_INDEX_DIR to customize the index location. - - By default, uses incremental indexing (skip unchanged files). - Use --force to rebuild all files regardless of modification time. - - If semantic search dependencies are installed, automatically generates embeddings - after indexing completes. Use --no-embeddings to skip this step. - - Backend Options (--backend): - - fastembed: Local ONNX-based embeddings (default, no API calls) - - litellm: Remote API embeddings via ccw-litellm (requires API keys) - - Model Options (--model): - - For fastembed backend: Use profile names (fast, code, multilingual, balanced) - - For litellm backend: Use model names (e.g., text-embedding-3-small, text-embedding-ada-002) - """ - _configure_logging(verbose, json_mode) - config = Config() - - # Fallback to settings.json config if CLI params not provided - config.load_settings() # Ensure settings are loaded - - # Apply CLI overrides for parsing/indexing behavior - if use_astgrep and no_use_astgrep: - _fail_mutually_exclusive("--use-astgrep", "--no-use-astgrep", json_mode) - if use_astgrep: - config.use_astgrep = True - elif no_use_astgrep: - config.use_astgrep = False - - if static_graph and no_static_graph: - _fail_mutually_exclusive("--static-graph", "--no-static-graph", json_mode) - if static_graph: - config.static_graph_enabled = True - elif no_static_graph: - config.static_graph_enabled = False - if static_graph_types is not None: - allowed = {"imports", "inherits", "calls"} - parsed = [ - t.strip().lower() - for t in static_graph_types.split(",") - if t.strip() - ] - invalid = [t for t in parsed if t not in allowed] - if invalid: - msg = ( - "Invalid --static-graph-types. Must be a comma-separated list of: " - f"{', '.join(sorted(allowed))}. Got: {invalid}" - ) - if json_mode: - print_json(success=False, error=msg) - else: - console.print(f"[red]Error:[/red] {msg}") - raise typer.Exit(code=1) - if parsed: - config.static_graph_relationship_types = parsed - - actual_backend = backend or config.embedding_backend - actual_model = model or config.embedding_model - - languages = _parse_languages(language) - base_path = path.expanduser().resolve() - - registry: RegistryStore | None = None - try: - registry = RegistryStore() - registry.initialize() - mapper = PathMapper() - - builder = IndexTreeBuilder(registry, mapper, config, incremental=not force) - - if force: - console.print(f"[bold]Building index for:[/bold] {base_path} [yellow](FULL reindex)[/yellow]") - else: - console.print(f"[bold]Building index for:[/bold] {base_path} [dim](incremental)[/dim]") - - build_result = builder.build( - source_root=base_path, - languages=languages, - workers=workers, - force_full=force, - ) - - result = { - "path": str(base_path), - "files_indexed": build_result.total_files, - "dirs_indexed": build_result.total_dirs, - "index_root": str(build_result.index_root), - "project_id": build_result.project_id, - "languages": languages or sorted(config.supported_languages.keys()), - "errors": len(build_result.errors), - } - - if not json_mode: - console.print(f"[green]OK[/green] Indexed [bold]{build_result.total_files}[/bold] files in [bold]{build_result.total_dirs}[/bold] directories") - console.print(f" Index root: {build_result.index_root}") - if build_result.errors: - console.print(f" [yellow]Warnings:[/yellow] {len(build_result.errors)} errors") - - # Auto-generate embeddings if the requested backend is available - if not no_embeddings: - try: - from codexlens.semantic import is_embedding_backend_available - from codexlens.cli.embedding_manager import generate_embeddings_recursive, get_embeddings_status - - # Validate embedding backend - valid_backends = ["fastembed", "litellm"] - if actual_backend not in valid_backends: - error_msg = f"Invalid embedding backend: {actual_backend}. Must be one of: {', '.join(valid_backends)}" - if json_mode: - print_json(success=False, error=error_msg) - else: - console.print(f"[red]Error:[/red] {error_msg}") - raise typer.Exit(code=1) - - backend_available, backend_error = is_embedding_backend_available(actual_backend) - - if backend_available: - # Use the index root directory (not the _index.db file) - index_root = Path(build_result.index_root) - - if not json_mode: - console.print("\n[bold]Generating embeddings...[/bold]") - console.print(f"Backend: [cyan]{actual_backend}[/cyan]") - console.print(f"Model: [cyan]{actual_model}[/cyan]") - else: - # Output progress message for JSON mode (parsed by Node.js) - print("Generating embeddings...", flush=True) - - # Progress callback - outputs progress for both json and non-json modes - # Node.js parseProgressLine() expects formats like: - # - "Batch X: N files, M chunks" - # - "Processing N files" - # - "Finalizing index" - def progress_update(msg: str): - if json_mode: - # Output without prefix so Node.js can parse it - # Strip leading spaces that embedding_manager adds - print(msg.strip(), flush=True) - elif verbose: - console.print(f" {msg}") - - embed_result = generate_embeddings_recursive( - index_root, - embedding_backend=actual_backend, - model_profile=actual_model, - force=False, # Don't force regenerate during init - chunk_size=2000, - progress_callback=progress_update, # Always use callback - max_workers=max_workers, - ) - - if embed_result["success"]: - embed_data = embed_result["result"] - - # Output completion message for Node.js to parse - if json_mode: - print(f"Embeddings complete: {embed_data['total_chunks_created']} chunks", flush=True) - - # Get comprehensive coverage statistics - status_result = get_embeddings_status(index_root) - if status_result["success"]: - coverage = status_result["result"] - result["embeddings"] = { - "generated": True, - "total_indexes": coverage["total_indexes"], - "total_files": coverage["total_files"], - "files_with_embeddings": coverage["files_with_embeddings"], - "coverage_percent": coverage["coverage_percent"], - "total_chunks": coverage["total_chunks"], - } - else: - result["embeddings"] = { - "generated": True, - "total_chunks": embed_data["total_chunks_created"], - "files_processed": embed_data["total_files_processed"], - } - - if not json_mode: - console.print(f"[green]✓[/green] Generated embeddings for [bold]{embed_data['total_files_processed']}[/bold] files") - console.print(f" Total chunks: [bold]{embed_data['total_chunks_created']}[/bold]") - console.print(f" Indexes processed: [bold]{embed_data['indexes_successful']}/{embed_data['indexes_processed']}[/bold]") - else: - if not json_mode: - error_msg = _extract_embedding_error(embed_result) - console.print(f"[yellow]Warning:[/yellow] Embedding generation failed: {error_msg}") - result["embeddings"] = { - "generated": False, - "error": _extract_embedding_error(embed_result), - } - else: - if not json_mode and verbose: - console.print(f"[dim]Embedding backend '{actual_backend}' not available. Skipping embeddings.[/dim]") - result["embeddings"] = { - "generated": False, - "error": backend_error or "Embedding backend not available", - } - except Exception as e: - if not json_mode and verbose: - console.print(f"[yellow]Warning:[/yellow] Could not generate embeddings: {e}") - result["embeddings"] = { - "generated": False, - "error": str(e), - } - else: - result["embeddings"] = { - "generated": False, - "error": "Skipped (--no-embeddings)", - } - - # Output final JSON result with embeddings status - if json_mode: - print_json(success=True, result=result) - - except StorageError as exc: - if json_mode: - print_json(success=False, error=f"Storage error: {exc}") - else: - console.print(f"[red]Init failed (storage):[/red] {exc}") - raise typer.Exit(code=1) - except ConfigError as exc: - if json_mode: - print_json(success=False, error=f"Configuration error: {exc}") - else: - console.print(f"[red]Init failed (config):[/red] {exc}") - raise typer.Exit(code=1) - except ParseError as exc: - if json_mode: - print_json(success=False, error=f"Parse error: {exc}") - else: - console.print(f"[red]Init failed (parse):[/red] {exc}") - raise typer.Exit(code=1) - except PermissionError as exc: - if json_mode: - print_json(success=False, error=f"Permission denied: {exc}") - else: - console.print(f"[red]Init failed (permission denied):[/red] {exc}") - raise typer.Exit(code=1) - except CodexLensError as exc: - if json_mode: - print_json(success=False, error=str(exc)) - else: - console.print(f"[red]Init failed:[/red] {exc}") - raise typer.Exit(code=1) - finally: - if registry is not None: - registry.close() - - -@app.command() -def watch( - path: Path = typer.Argument(Path("."), exists=True, file_okay=False, dir_okay=True, help="Project root to watch."), - language: Optional[List[str]] = typer.Option( - None, - "--language", - "-l", - help="Limit watching to specific languages (repeat or comma-separated).", - ), - debounce: int = typer.Option(1000, "--debounce", "-d", min=100, max=10000, help="Debounce interval in milliseconds."), - verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose logging."), -) -> None: - """Watch directory for changes and update index incrementally. - - Monitors filesystem events and automatically updates the index - when files are created, modified, or deleted. - - The directory must already be indexed (run 'codexlens init' first). - - Press Ctrl+C to stop watching. - - Examples: - codexlens watch . - codexlens watch /path/to/project --debounce 500 --verbose - codexlens watch . --language python,typescript - """ - _configure_logging(verbose) - - from codexlens.watcher.events import IndexResult - - base_path = path.expanduser().resolve() - - # Check if path is indexed - mapper = PathMapper() - index_db = mapper.source_to_index_db(base_path) - if not index_db.exists(): - console.print(f"[red]Error:[/red] Directory not indexed: {base_path}") - console.print("Run 'codexlens init' first to create the index.") - raise typer.Exit(code=1) - - # Parse languages - languages = _parse_languages(language) - - # Create watcher config - watcher_config = WatcherConfig( - debounce_ms=debounce, - languages=languages, - ) - - # Callback for indexed files - def on_indexed(result: IndexResult) -> None: - if result.files_indexed > 0: - console.print(f" [green]Indexed:[/green] {result.files_indexed} files ({result.symbols_added} symbols)") - if result.files_removed > 0: - console.print(f" [yellow]Removed:[/yellow] {result.files_removed} files") - if result.errors: - for error in result.errors[:3]: # Show first 3 errors - console.print(f" [red]Error:[/red] {error}") - - console.print(f"[bold]Watching:[/bold] {base_path}") - console.print(f" Debounce: {debounce}ms") - if languages: - console.print(f" Languages: {', '.join(languages)}") - console.print(" Press Ctrl+C to stop.\n") - - manager: WatcherManager | None = None - try: - watch_config = Config.load() - manager = WatcherManager( - root_path=base_path, - config=watch_config, - watcher_config=watcher_config, - on_indexed=on_indexed, - ) - manager.start() - manager.wait() - except KeyboardInterrupt: - pass - except Exception as exc: - console.print(f"[red]Error:[/red] {exc}") - raise typer.Exit(code=1) - finally: - if manager is not None: - manager.stop() - console.print("\n[dim]Watcher stopped.[/dim]") - - -@app.command() -def search( - query: str = typer.Argument(..., help="Search query."), - path: Path = typer.Option(Path("."), "--path", "-p", help="Directory to search from."), - limit: int = typer.Option(20, "--limit", "-n", min=1, max=500, help="Max results."), - offset: int = typer.Option(0, "--offset", min=0, help="Pagination offset - skip first N results."), - depth: int = typer.Option(-1, "--depth", "-d", help="Search depth (-1 = unlimited, 0 = current only)."), - files_only: bool = typer.Option(False, "--files-only", "-f", help="Return only file paths without content snippets."), - method: str = typer.Option("auto", "--method", "-m", help="Search method: 'auto' (intent-aware, default), 'dense_rerank' (semantic), 'fts' (exact keyword)."), - use_fuzzy: bool = typer.Option(False, "--use-fuzzy", help="Enable fuzzy matching in FTS method."), - code_only: bool = typer.Option(False, "--code-only", help="Only return code files (excludes md, txt, json, yaml, xml, etc.)."), - exclude_extensions: Optional[str] = typer.Option(None, "--exclude-extensions", help="Comma-separated list of file extensions to exclude (e.g., 'md,txt,json')."), - # Hidden advanced options for backward compatibility - weights: Optional[str] = typer.Option( - None, - "--weights", "-w", - hidden=True, - help="[Advanced] RRF weights as key=value pairs." - ), - cascade_strategy: Optional[str] = typer.Option( - None, - "--cascade-strategy", - hidden=True, - help="[Advanced] Cascade strategy for --method cascade." - ), - staged_stage2_mode: Optional[str] = typer.Option( - None, - "--staged-stage2-mode", - hidden=True, - help="[Advanced] Stage 2 expansion mode for cascade strategy 'staged': precomputed | realtime | static_global_graph.", - ), - # Hidden deprecated parameter for backward compatibility - mode: Optional[str] = typer.Option(None, "--mode", hidden=True, help="[DEPRECATED] Use --method instead."), - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), - verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."), -) -> None: - """Search indexed file contents. - - Uses chain search across directory indexes. - Use --depth to limit search recursion (0 = current dir only). - - Search Methods: - - auto (default): Intent-aware routing. KEYWORD -> fts, MIXED -> hybrid, - SEMANTIC -> dense_rerank. - - dense_rerank: Semantic search using Dense embedding coarse retrieval + - Cross-encoder reranking. Best for natural language queries and code understanding. - - fts: Full-text search using FTS5 (unicode61 tokenizer). Best for exact code - identifiers like function/class names. Use --use-fuzzy for typo tolerance. - - Method Selection Guide: - - Code identifiers (function/class names): auto or fts - - Natural language queries: auto or dense_rerank - - Typo-tolerant search: fts --use-fuzzy - - Requirements: - The dense_rerank method requires pre-generated embeddings. - Use 'codexlens embeddings-generate' to create embeddings first. - - Examples: - # Default intent-aware search - codexlens search "authentication logic" - - # Exact code identifier search - codexlens search "authenticate_user" --method fts - - # Typo-tolerant fuzzy search - codexlens search "authentcate" --method fts --use-fuzzy - """ - _configure_logging(verbose, json_mode) - search_path = path.expanduser().resolve() - - # Handle deprecated --mode parameter - actual_method = method - if mode is not None: - # Show deprecation warning - if not json_mode: - console.print("[yellow]Warning: --mode is deprecated, use --method instead.[/yellow]") - - # Map old mode values to new method values - mode_to_method = { - "auto": "auto", - "exact": "fts", - "fuzzy": "fts", # with use_fuzzy=True - "hybrid": "hybrid", - "vector": "vector", - "pure-vector": "vector", - } - - if mode in mode_to_method: - actual_method = mode_to_method[mode] - # Enable fuzzy for old fuzzy mode - if mode == "fuzzy": - use_fuzzy = True - else: - if json_mode: - print_json(success=False, error=f"Invalid deprecated mode: {mode}. Use --method instead.") - else: - console.print(f"[red]Invalid deprecated mode:[/red] {mode}") - console.print("[dim]Use --method with: fts, vector, hybrid, cascade[/dim]") - raise typer.Exit(code=1) - - # Configure search (load settings from file) - config = Config.load() - - # Validate method - simplified interface exposes only dense_rerank and fts - # Other methods (vector, hybrid, cascade) are hidden but still work for backward compatibility - valid_methods = ["auto", "fts", "dense_rerank", "vector", "hybrid", "cascade"] - if actual_method not in valid_methods: - if json_mode: - print_json(success=False, error=f"Invalid method: {actual_method}. Use 'auto', 'dense_rerank', or 'fts'.") - else: - console.print(f"[red]Invalid method:[/red] {actual_method}") - console.print("[dim]Use 'auto' (default), 'dense_rerank' (semantic), or 'fts' (exact keyword)[/dim]") - raise typer.Exit(code=1) - - resolved_method = ( - _auto_select_search_method(query) - if actual_method == "auto" - else actual_method - ) - display_method = resolved_method - execution_method = resolved_method - - # Map dense_rerank to cascade method internally - internal_cascade_strategy = cascade_strategy - if execution_method == "dense_rerank": - execution_method = "cascade" - internal_cascade_strategy = "dense_rerank" - - # Validate cascade_strategy if provided (for advanced users) - if internal_cascade_strategy is not None: - valid_strategies = ["binary", "hybrid", "binary_rerank", "dense_rerank", "staged"] - if internal_cascade_strategy not in valid_strategies: - if json_mode: - print_json(success=False, error=f"Invalid cascade strategy: {internal_cascade_strategy}. Must be one of: {', '.join(valid_strategies)}") - else: - console.print(f"[red]Invalid cascade strategy:[/red] {internal_cascade_strategy}") - console.print(f"[dim]Valid strategies: {', '.join(valid_strategies)}[/dim]") - raise typer.Exit(code=1) - - # Parse custom weights if provided - hybrid_weights = None - if weights: - try: - # Check if using key=value format (new) or legacy comma-separated format - if "=" in weights: - # New format: exact=0.3,fuzzy=0.1,vector=0.6 - weight_dict = {} - for pair in weights.split(","): - if "=" in pair: - key, val = pair.split("=", 1) - weight_dict[key.strip()] = float(val.strip()) - else: - raise ValueError("Mixed format not supported - use all key=value pairs") - - # Validate and normalize weights - weight_sum = sum(weight_dict.values()) - if abs(weight_sum - 1.0) > 0.01: - if not json_mode: - console.print(f"[yellow]Warning: Weights sum to {weight_sum:.2f}, should sum to 1.0. Normalizing...[/yellow]") - weight_dict = {k: v / weight_sum for k, v in weight_dict.items()} - - hybrid_weights = weight_dict - else: - # Legacy format: 0.3,0.1,0.6 (exact,fuzzy,vector) - weight_parts = [float(w.strip()) for w in weights.split(",")] - if len(weight_parts) == 3: - weight_sum = sum(weight_parts) - if abs(weight_sum - 1.0) > 0.01: - if not json_mode: - console.print(f"[yellow]Warning: Weights sum to {weight_sum:.2f}, should sum to 1.0. Normalizing...[/yellow]") - weight_parts = [w / weight_sum for w in weight_parts] - hybrid_weights = { - "exact": weight_parts[0], - "fuzzy": weight_parts[1], - "vector": weight_parts[2], - } - else: - if not json_mode: - console.print("[yellow]Warning: Invalid weights format. Using defaults.[/yellow]") - except ValueError as e: - if not json_mode: - console.print(f"[yellow]Warning: Invalid weights format ({e}). Using defaults.[/yellow]") - - registry: RegistryStore | None = None - try: - registry = RegistryStore() - registry.initialize() - mapper = PathMapper() - - engine = ChainSearchEngine(registry, mapper, config=config) - - # Optional staged cascade overrides (only meaningful for cascade strategy 'staged') - if staged_stage2_mode is not None: - stage2 = staged_stage2_mode.strip().lower() - if stage2 not in {"precomputed", "realtime", "static_global_graph"}: - msg = "Invalid --staged-stage2-mode. Must be: precomputed | realtime | static_global_graph." - if json_mode: - print_json(success=False, error=msg) - else: - console.print(f"[red]{msg}[/red]") - raise typer.Exit(code=1) - config.staged_stage2_mode = stage2 - - # Map method to SearchOptions flags - # fts: FTS-only search (optionally with fuzzy) - # vector: Pure vector semantic search - # hybrid: RRF fusion of sparse + dense - # cascade: Two-stage binary + dense retrieval - if execution_method == "fts": - hybrid_mode = False - enable_fuzzy = use_fuzzy - enable_vector = False - pure_vector = False - enable_cascade = False - elif execution_method == "vector": - hybrid_mode = True - enable_fuzzy = False - enable_vector = True - pure_vector = True - enable_cascade = False - elif execution_method == "hybrid": - hybrid_mode = True - enable_fuzzy = use_fuzzy - enable_vector = True - pure_vector = False - enable_cascade = False - elif execution_method == "cascade": - hybrid_mode = True - enable_fuzzy = False - enable_vector = True - pure_vector = False - enable_cascade = True - else: - raise ValueError(f"Invalid method: {execution_method}") - - # Parse exclude_extensions from comma-separated string - exclude_exts_list = None - if exclude_extensions: - exclude_exts_list = [ext.strip() for ext in exclude_extensions.split(',') if ext.strip()] - - options = SearchOptions( - depth=depth, - total_limit=limit, - offset=offset, - files_only=files_only, - code_only=code_only, - exclude_extensions=exclude_exts_list, - hybrid_mode=hybrid_mode, - enable_fuzzy=enable_fuzzy, - enable_vector=enable_vector, - pure_vector=pure_vector, - enable_cascade=enable_cascade, - hybrid_weights=hybrid_weights, - ) - - if files_only: - file_paths = engine.search_files_only(query, search_path, options) - payload = {"query": query, "count": len(file_paths), "files": file_paths} - if json_mode: - print_json(success=True, result=payload) - else: - for fp in file_paths: - console.print(fp) - else: - # Dispatch to cascade_search for cascade method - if execution_method == "cascade": - result = engine.cascade_search(query, search_path, k=limit, options=options, strategy=internal_cascade_strategy) - else: - result = engine.search(query, search_path, options) - effective_results = result.results - effective_files_matched = result.stats.files_matched - effective_time_ms = result.stats.time_ms - fallback_payload = None - if display_method == "fts" and not use_fuzzy and not effective_results: - fallback_payload = _filesystem_fallback_search( - query, - search_path, - limit=limit, - config=config, - code_only=code_only, - exclude_extensions=exclude_exts_list, - ) - if fallback_payload is not None: - effective_results = fallback_payload["results"] - effective_files_matched = len(effective_results) - effective_time_ms = result.stats.time_ms + float(fallback_payload["time_ms"]) - - results_list = [ - { - "path": r.path, - "score": r.score, - "excerpt": r.excerpt, - "content": r.content, # Full function/class body - "source": getattr(r, "search_source", None), - "symbol": getattr(r, "symbol", None), - } - for r in effective_results - ] - - payload = { - "query": query, - "method": display_method, - "count": len(results_list), - "results": results_list, - "stats": { - "dirs_searched": result.stats.dirs_searched, - "files_matched": effective_files_matched, - "time_ms": effective_time_ms, - }, - } - if fallback_payload is not None: - payload["fallback"] = fallback_payload["fallback"] - if json_mode: - print_json(success=True, result=payload) - else: - render_search_results(effective_results, verbose=verbose) - if fallback_payload is not None: - console.print("[yellow]No indexed matches found; showing filesystem fallback results (stale index suspected).[/yellow]") - console.print(f"[dim]Method: {display_method} | Searched {result.stats.dirs_searched} directories in {effective_time_ms:.1f}ms[/dim]") - - except SearchError as exc: - if json_mode: - print_json(success=False, error=f"Search error: {exc}") - else: - console.print(f"[red]Search failed (query):[/red] {exc}") - raise typer.Exit(code=1) - except StorageError as exc: - if json_mode: - print_json(success=False, error=f"Storage error: {exc}") - else: - console.print(f"[red]Search failed (storage):[/red] {exc}") - raise typer.Exit(code=1) - except CodexLensError as exc: - if json_mode: - print_json(success=False, error=str(exc)) - else: - console.print(f"[red]Search failed:[/red] {exc}") - raise typer.Exit(code=1) - finally: - if registry is not None: - registry.close() - - -@app.command() -def symbol( - name: str = typer.Argument(..., help="Symbol name to look up."), - path: Path = typer.Option(Path("."), "--path", "-p", help="Directory to search from."), - kind: Optional[str] = typer.Option( - None, - "--kind", - "-k", - help="Filter by kind (function|class|method).", - ), - limit: int = typer.Option(50, "--limit", "-n", min=1, max=500, help="Max symbols."), - depth: int = typer.Option(-1, "--depth", "-d", help="Search depth (-1 = unlimited)."), - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), - verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."), -) -> None: - """Look up symbols by name and optional kind.""" - _configure_logging(verbose, json_mode) - search_path = path.expanduser().resolve() - - registry: RegistryStore | None = None - try: - registry = RegistryStore() - registry.initialize() - mapper = PathMapper() - - engine = ChainSearchEngine(registry, mapper, config=config) - options = SearchOptions(depth=depth, total_limit=limit) - - syms = engine.search_symbols(name, search_path, kind=kind, options=options) - - payload = {"name": name, "kind": kind, "count": len(syms), "symbols": syms} - if json_mode: - print_json(success=True, result=payload) - else: - render_symbols(syms) - - except SearchError as exc: - if json_mode: - print_json(success=False, error=f"Search error: {exc}") - else: - console.print(f"[red]Symbol lookup failed (search):[/red] {exc}") - raise typer.Exit(code=1) - except StorageError as exc: - if json_mode: - print_json(success=False, error=f"Storage error: {exc}") - else: - console.print(f"[red]Symbol lookup failed (storage):[/red] {exc}") - raise typer.Exit(code=1) - except CodexLensError as exc: - if json_mode: - print_json(success=False, error=str(exc)) - else: - console.print(f"[red]Symbol lookup failed:[/red] {exc}") - raise typer.Exit(code=1) - finally: - if registry is not None: - registry.close() - - -@app.command() -def inspect( - file: Path = typer.Argument(..., exists=True, dir_okay=False, help="File to analyze."), - symbols: bool = typer.Option(False, "--symbols", help="Show discovered symbols (default)."), - no_symbols: bool = typer.Option(False, "--no-symbols", help="Hide discovered symbols."), - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), - verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."), -) -> None: - """Analyze a single file and display symbols.""" - _configure_logging(verbose, json_mode) - if symbols and no_symbols: - _fail_mutually_exclusive("--symbols", "--no-symbols", json_mode) - show_symbols = True if (symbols or not no_symbols) else False - config = Config.load() - factory = ParserFactory(config) - - file_path = file.expanduser().resolve() - try: - text = file_path.read_text(encoding="utf-8", errors="ignore") - language_id = config.language_for_path(file_path) or "unknown" - parser = factory.get_parser(language_id) - indexed = parser.parse(text, file_path) - payload = {"file": indexed, "content_lines": len(text.splitlines())} - if json_mode: - print_json(success=True, result=payload) - else: - if show_symbols: - render_file_inspect(indexed.path, indexed.language, indexed.symbols) - else: - render_status({"file": indexed.path, "language": indexed.language}) - except ParseError as exc: - if json_mode: - print_json(success=False, error=f"Parse error: {exc}") - else: - console.print(f"[red]Inspect failed (parse):[/red] {exc}") - raise typer.Exit(code=1) - except FileNotFoundError as exc: - if json_mode: - print_json(success=False, error=f"File not found: {exc}") - else: - console.print(f"[red]Inspect failed (file not found):[/red] {exc}") - raise typer.Exit(code=1) - except PermissionError as exc: - if json_mode: - print_json(success=False, error=f"Permission denied: {exc}") - else: - console.print(f"[red]Inspect failed (permission denied):[/red] {exc}") - raise typer.Exit(code=1) - except CodexLensError as exc: - if json_mode: - print_json(success=False, error=str(exc)) - else: - console.print(f"[red]Inspect failed:[/red] {exc}") - raise typer.Exit(code=1) - - -@app.command() -def status( - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), - verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."), -) -> None: - """Show index status and configuration.""" - _configure_logging(verbose, json_mode) - - registry: RegistryStore | None = None - try: - registry = RegistryStore() - registry.initialize() - mapper = PathMapper() - - # Get all projects - projects = registry.list_projects() - - # Calculate total stats - total_files = sum(p.total_files for p in projects) - total_dirs = sum(p.total_dirs for p in projects) - - # Get index root size - index_root = mapper.index_root - index_size = 0 - if index_root.exists(): - for f in index_root.rglob("*"): - if f.is_file(): - index_size += f.stat().st_size - - # Check schema version and enabled features - schema_version = None - has_dual_fts = False - if projects and index_root.exists(): - # Check first index database for features - index_files = list(index_root.rglob("_index.db")) - if index_files: - try: - with DirIndexStore(index_files[0]) as store: - with store._lock: - conn = store._get_connection() - schema_version = store._get_schema_version(conn) - # Check if dual FTS tables exist - cursor = conn.execute( - "SELECT name FROM sqlite_master WHERE type='table' AND name IN ('files_fts_exact', 'files_fts_fuzzy')" - ) - fts_tables = [row[0] for row in cursor.fetchall()] - has_dual_fts = len(fts_tables) == 2 - except Exception: - pass - - # Check embeddings coverage - embeddings_info = None - has_vector_search = False - try: - from codexlens.cli.embedding_manager import get_embeddings_status - - if index_root.exists(): - embed_status = get_embeddings_status(index_root) - if embed_status["success"]: - embeddings_info = embed_status["result"] - # Enable vector search if coverage >= 50% - has_vector_search = embeddings_info["coverage_percent"] >= 50.0 - except ImportError: - # Embedding manager not available - pass - except Exception as e: - logging.debug(f"Failed to get embeddings status: {e}") - - stats = { - "index_root": str(index_root), - "registry_path": str(_get_registry_path()), - "projects_count": len(projects), - "total_files": total_files, - "total_dirs": total_dirs, - "index_size_bytes": index_size, - "index_size_mb": round(index_size / (1024 * 1024), 2), - "schema_version": schema_version, - "features": { - "exact_fts": True, # Always available - "fuzzy_fts": has_dual_fts, - "hybrid_search": has_dual_fts, - "vector_search": has_vector_search, - }, - } - - # Add embeddings info if available - if embeddings_info: - stats["embeddings"] = embeddings_info - - if json_mode: - print_json(success=True, result=stats) - else: - console.print("[bold]CodexLens Status[/bold]") - console.print(f" Index Root: {stats['index_root']}") - console.print(f" Registry: {stats['registry_path']}") - console.print(f" Projects: {stats['projects_count']}") - console.print(f" Total Files: {stats['total_files']}") - console.print(f" Total Directories: {stats['total_dirs']}") - console.print(f" Index Size: {stats['index_size_mb']} MB") - if schema_version: - console.print(f" Schema Version: {schema_version}") - console.print("\n[bold]Search Backends:[/bold]") - console.print(f" Exact FTS: ✓ (unicode61)") - if has_dual_fts: - console.print(f" Fuzzy FTS: ✓ (trigram)") - console.print(f" Hybrid Search: ✓ (RRF fusion)") - else: - console.print(f" Fuzzy FTS: ✗ (run 'migrate' to enable)") - console.print(f" Hybrid Search: ✗ (run 'migrate' to enable)") - - if has_vector_search: - console.print(f" Vector Search: ✓ (embeddings available)") - else: - console.print(f" Vector Search: ✗ (no embeddings or coverage < 50%)") - - # Display embeddings statistics if available - if embeddings_info: - console.print("\n[bold]Embeddings Coverage:[/bold]") - console.print(f" Total Indexes: {embeddings_info['total_indexes']}") - console.print(f" Total Files: {embeddings_info['total_files']}") - console.print(f" Files with Embeddings: {embeddings_info['files_with_embeddings']}") - console.print(f" Coverage: {embeddings_info['coverage_percent']:.1f}%") - console.print(f" Total Chunks: {embeddings_info['total_chunks']}") - - # Display model information if available - model_info = embeddings_info.get('model_info') - if model_info: - console.print("\n[bold]Embedding Model:[/bold]") - console.print(f" Backend: [cyan]{model_info.get('backend', 'unknown')}[/cyan]") - console.print(f" Model: [cyan]{model_info.get('model_profile', 'unknown')}[/cyan] ({model_info.get('model_name', '')})") - console.print(f" Dimensions: {model_info.get('embedding_dim', 'unknown')}") - if model_info.get('updated_at'): - console.print(f" Last Updated: {model_info['updated_at']}") - - except StorageError as exc: - if json_mode: - print_json(success=False, error=f"Storage error: {exc}") - else: - console.print(f"[red]Status failed (storage):[/red] {exc}") - raise typer.Exit(code=1) - except CodexLensError as exc: - if json_mode: - print_json(success=False, error=str(exc)) - else: - console.print(f"[red]Status failed:[/red] {exc}") - raise typer.Exit(code=1) - finally: - if registry is not None: - registry.close() - - -@app.command(name="lsp-status") -def lsp_status( - path: Path = typer.Option(Path("."), "--path", "-p", help="Workspace root for LSP probing."), - probe_file: Optional[Path] = typer.Option( - None, - "--probe-file", - help="Optional file path to probe (starts the matching language server and prints capabilities).", - ), - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), - verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."), -) -> None: - """Show standalone LSP configuration and optionally probe a language server. - - This exercises the existing LSP server selection/startup path in StandaloneLspManager. - """ - _configure_logging(verbose, json_mode) - - import asyncio - import shutil - - from codexlens.lsp.standalone_manager import StandaloneLspManager - - workspace_root = path.expanduser().resolve() - probe_path = probe_file.expanduser().resolve() if probe_file is not None else None - - async def _run(): - manager = StandaloneLspManager(workspace_root=str(workspace_root)) - await manager.start() - - servers = [] - for language_id, cfg in sorted(manager._configs.items()): # type: ignore[attr-defined] - cmd0 = cfg.command[0] if cfg.command else None - servers.append( - { - "language_id": language_id, - "display_name": cfg.display_name, - "extensions": list(cfg.extensions), - "command": list(cfg.command), - "command_available": bool(shutil.which(cmd0)) if cmd0 else False, - } - ) - - probe = None - if probe_path is not None: - state = await manager._get_server(str(probe_path)) - if state is None: - probe = { - "file": str(probe_path), - "ok": False, - "error": "No language server configured/available for this file.", - } - else: - probe = { - "file": str(probe_path), - "ok": True, - "language_id": state.config.language_id, - "display_name": state.config.display_name, - "initialized": bool(state.initialized), - "capabilities": state.capabilities, - } - - await manager.stop() - return {"workspace_root": str(workspace_root), "servers": servers, "probe": probe} - - try: - payload = asyncio.run(_run()) - except Exception as exc: - if json_mode: - print_json(success=False, error=f"LSP status failed: {exc}") - else: - console.print(f"[red]LSP status failed:[/red] {exc}") - raise typer.Exit(code=1) - - if json_mode: - print_json(success=True, result=payload) - return - - console.print("[bold]CodexLens LSP Status[/bold]") - console.print(f" Workspace: {payload['workspace_root']}") - console.print("\n[bold]Configured Servers:[/bold]") - for s in payload["servers"]: - ok = "✓" if s["command_available"] else "✗" - console.print(f" {ok} {s['display_name']} ({s['language_id']}) -> {s['command'][0] if s['command'] else ''}") - console.print(f" Extensions: {', '.join(s['extensions'])}") - - if payload["probe"] is not None: - probe = payload["probe"] - console.print("\n[bold]Probe:[/bold]") - if not probe.get("ok"): - console.print(f" ✗ {probe.get('file')}") - console.print(f" {probe.get('error')}") - else: - console.print(f" ✓ {probe.get('file')}") - console.print(f" Server: {probe.get('display_name')} ({probe.get('language_id')})") - console.print(f" Initialized: {probe.get('initialized')}") - - -@app.command(name="reranker-status") -def reranker_status( - probe: bool = typer.Option( - False, - "--probe", - help="Send a small rerank request to validate connectivity and credentials.", - ), - provider: Optional[str] = typer.Option( - None, - "--provider", - help="Reranker provider: siliconflow | cohere | jina (default: from env, else siliconflow).", - ), - api_base: Optional[str] = typer.Option( - None, - "--api-base", - help="Override API base URL (e.g. https://api.siliconflow.cn or https://api.cohere.ai).", - ), - model: Optional[str] = typer.Option( - None, - "--model", - help="Override reranker model name (provider-specific).", - ), - query: str = typer.Option("ping", "--query", help="Probe query text (used with --probe)."), - document: str = typer.Option("pong", "--document", help="Probe document text (used with --probe)."), - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), - verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."), -) -> None: - """Show reranker configuration and optionally probe the API backend. - - This is the fastest way to confirm that "重排" can actually execute end-to-end. - """ - _configure_logging(verbose, json_mode) - - import time - - from codexlens.env_config import load_global_env - from codexlens.semantic.reranker.api_reranker import ( - APIReranker, - _normalize_api_base_for_endpoint, - ) - - env = load_global_env() - - def _env_get(key: str) -> Optional[str]: - return ( - os.environ.get(key) - or os.environ.get(f"CODEXLENS_{key}") - or env.get(key) - or env.get(f"CODEXLENS_{key}") - ) - - effective_provider = (provider or _env_get("RERANKER_PROVIDER") or "siliconflow").strip() - effective_api_base = (api_base or _env_get("RERANKER_API_BASE") or "").strip() or None - effective_model = (model or _env_get("RERANKER_MODEL") or "").strip() or None - - # Do not leak secrets; only report whether a key is configured. - key_present = bool((_env_get("RERANKER_API_KEY") or "").strip()) - - provider_key = effective_provider.strip().lower() - defaults = getattr(APIReranker, "_PROVIDER_DEFAULTS", {}).get(provider_key, {}) - endpoint = defaults.get("endpoint", "/v1/rerank") - configured_base = effective_api_base or defaults.get("api_base") or "" - normalized_base = _normalize_api_base_for_endpoint(api_base=configured_base, endpoint=endpoint) - - payload: Dict[str, Any] = { - "provider": effective_provider, - "api_base": effective_api_base, - "endpoint": endpoint, - "normalized_api_base": normalized_base or None, - "request_url": f"{normalized_base}{endpoint}" if normalized_base else None, - "model": effective_model, - "api_key_configured": key_present, - "probe": None, - } - - if probe: - t0 = time.perf_counter() - try: - reranker = APIReranker( - provider=effective_provider, - api_base=effective_api_base, - model_name=effective_model, - ) - try: - scores = reranker.score_pairs([(query, document)]) - finally: - reranker.close() - resolved_base = getattr(reranker, "api_base", None) - resolved_endpoint = getattr(reranker, "endpoint", None) - request_url = ( - f"{resolved_base}{resolved_endpoint}" - if resolved_base and resolved_endpoint - else None - ) - payload["probe"] = { - "ok": True, - "latency_ms": (time.perf_counter() - t0) * 1000.0, - "score": float(scores[0]) if scores else None, - "normalized_api_base": resolved_base, - "request_url": request_url, - } - except Exception as exc: - payload["probe"] = { - "ok": False, - "latency_ms": (time.perf_counter() - t0) * 1000.0, - "error": f"{type(exc).__name__}: {exc}", - } - - if json_mode: - print_json(success=True, result=payload) - return - - console.print("[bold]CodexLens Reranker Status[/bold]") - console.print(f" Provider: {payload['provider']}") - console.print(f" API Base: {payload['api_base'] or '(default)'}") - if payload.get("normalized_api_base"): - console.print(f" API Base (normalized): {payload['normalized_api_base']}") - console.print(f" Endpoint: {payload.get('endpoint')}") - if payload.get("request_url"): - console.print(f" Request URL: {payload['request_url']}") - console.print(f" Model: {payload['model'] or '(default)'}") - console.print(f" API Key: {'set' if key_present else 'missing'}") - - if payload["probe"] is not None: - probe_payload = payload["probe"] - console.print("\n[bold]Probe:[/bold]") - if probe_payload.get("ok"): - console.print(f" ✓ OK ({probe_payload.get('latency_ms'):.1f}ms)") - console.print(f" Score: {probe_payload.get('score')}") - else: - console.print(f" ✗ Failed ({probe_payload.get('latency_ms'):.1f}ms)") - console.print(f" {probe_payload.get('error')}") - - -@app.command() -def projects( - action: str = typer.Argument("list", help="Action: list, show, remove"), - project_path: Optional[Path] = typer.Argument(None, help="Project path (for show/remove)."), - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), - verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."), -) -> None: - """Manage registered projects in the global registry. - - Actions: - - list: Show all registered projects - - show : Show details for a specific project - - remove : Remove a project from the registry - """ - _configure_logging(verbose, json_mode) - - registry: RegistryStore | None = None - try: - registry = RegistryStore() - registry.initialize() - - if action == "list": - project_list = registry.list_projects() - if json_mode: - result = [ - { - "id": p.id, - "source_root": str(p.source_root), - "index_root": str(p.index_root), - "total_files": p.total_files, - "total_dirs": p.total_dirs, - "status": p.status, - } - for p in project_list - ] - print_json(success=True, result=result) - else: - if not project_list: - console.print("[yellow]No projects registered.[/yellow]") - else: - table = Table(title="Registered Projects") - table.add_column("ID", style="dim") - table.add_column("Source Root") - table.add_column("Files", justify="right") - table.add_column("Dirs", justify="right") - table.add_column("Status") - - for p in project_list: - table.add_row( - str(p.id), - str(p.source_root), - str(p.total_files), - str(p.total_dirs), - p.status, - ) - console.print(table) - - elif action == "show": - if not project_path: - raise typer.BadParameter("Project path required for 'show' action") - - project_path = project_path.expanduser().resolve() - project_info = registry.get_project(project_path) - - if not project_info: - if json_mode: - print_json(success=False, error=f"Project not found: {project_path}") - else: - console.print(f"[red]Project not found:[/red] {project_path}") - raise typer.Exit(code=1) - - if json_mode: - result = { - "id": project_info.id, - "source_root": str(project_info.source_root), - "index_root": str(project_info.index_root), - "total_files": project_info.total_files, - "total_dirs": project_info.total_dirs, - "status": project_info.status, - "created_at": project_info.created_at, - "last_indexed": project_info.last_indexed, - } - print_json(success=True, result=result) - else: - console.print(f"[bold]Project:[/bold] {project_info.source_root}") - console.print(f" ID: {project_info.id}") - console.print(f" Index Root: {project_info.index_root}") - console.print(f" Files: {project_info.total_files}") - console.print(f" Directories: {project_info.total_dirs}") - console.print(f" Status: {project_info.status}") - - # Show directory breakdown - dirs = registry.get_project_dirs(project_info.id) - if dirs: - console.print(f"\n [bold]Indexed Directories:[/bold] {len(dirs)}") - for d in dirs[:10]: - console.print(f" - {d.source_path.name}/ ({d.files_count} files)") - if len(dirs) > 10: - console.print(f" ... and {len(dirs) - 10} more") - - elif action == "remove": - if not project_path: - raise typer.BadParameter("Project path required for 'remove' action") - - project_path = project_path.expanduser().resolve() - removed = registry.unregister_project(project_path) - - if removed: - mapper = PathMapper() - index_root = mapper.source_to_index_dir(project_path) - if index_root.exists(): - _remove_tree_best_effort(index_root) - - if json_mode: - print_json(success=True, result={"removed": str(project_path)}) - else: - console.print(f"[green]Removed:[/green] {project_path}") - else: - if json_mode: - print_json(success=False, error=f"Project not found: {project_path}") - else: - console.print(f"[yellow]Project not found:[/yellow] {project_path}") - - else: - raise typer.BadParameter(f"Unknown action: {action}. Use list, show, or remove.") - - except typer.BadParameter: - raise - except StorageError as exc: - if json_mode: - print_json(success=False, error=f"Storage error: {exc}") - else: - console.print(f"[red]Projects command failed (storage):[/red] {exc}") - raise typer.Exit(code=1) - except PermissionError as exc: - if json_mode: - print_json(success=False, error=f"Permission denied: {exc}") - else: - console.print(f"[red]Projects command failed (permission denied):[/red] {exc}") - raise typer.Exit(code=1) - except CodexLensError as exc: - if json_mode: - print_json(success=False, error=str(exc)) - else: - console.print(f"[red]Projects command failed:[/red] {exc}") - raise typer.Exit(code=1) - finally: - if registry is not None: - registry.close() - - -@app.command() -def config( - action: str = typer.Argument("show", help="Action: show, set, migrate"), - key: Optional[str] = typer.Argument(None, help="Config key (for set action)."), - value: Optional[str] = typer.Argument(None, help="Config value (for set action)."), - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), - verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."), -) -> None: - """Manage CodexLens configuration. - - Actions: - - show: Display current configuration - - set : Set configuration value - - migrate : Migrate indexes to new location - - Config keys: - - index_dir: Directory to store indexes (default: ~/.codexlens/indexes) - - reranker_backend: Reranker backend (onnx, api, litellm, legacy) - - reranker_model: Reranker model name - - reranker_enabled: Enable reranking (true/false) - - reranker_top_k: Number of results to rerank - - reranker_api_provider: API provider for reranker (siliconflow, cohere, jina) - - embedding_backend: Embedding backend (fastembed, litellm) - - embedding_model: Embedding model profile or name - """ - _configure_logging(verbose, json_mode) - - config_file = Path.home() / ".codexlens" / "config.json" - - def load_config() -> Dict[str, Any]: - if config_file.exists(): - return json.loads(config_file.read_text(encoding="utf-8")) - return {} - - def save_config(cfg: Dict[str, Any]) -> None: - config_file.parent.mkdir(parents=True, exist_ok=True) - config_file.write_text(json.dumps(cfg, indent=2), encoding="utf-8") - - try: - if action == "show": - cfg = load_config() - current_index_dir = os.getenv("CODEXLENS_INDEX_DIR") or cfg.get("index_dir") or str(Path.home() / ".codexlens" / "indexes") - - result = { - "config_file": str(config_file), - "index_dir": current_index_dir, - "env_override": os.getenv("CODEXLENS_INDEX_DIR"), - } - - # Load settings.json for reranker and other runtime settings - settings_file = Path.home() / ".codexlens" / "settings.json" - if settings_file.exists(): - try: - settings = json.loads(settings_file.read_text(encoding="utf-8")) - # Extract reranker settings (flat keys for CCW compatibility) - reranker = settings.get("reranker", {}) - if reranker.get("backend"): - result["reranker_backend"] = reranker["backend"] - if reranker.get("model"): - result["reranker_model"] = reranker["model"] - if reranker.get("enabled") is not None: - result["reranker_enabled"] = reranker["enabled"] - if reranker.get("top_k"): - result["reranker_top_k"] = reranker["top_k"] - if reranker.get("api_provider"): - result["reranker_api_provider"] = reranker["api_provider"] - # Extract embedding settings - embedding = settings.get("embedding", {}) - if embedding.get("backend"): - result["embedding_backend"] = embedding["backend"] - if embedding.get("model"): - result["embedding_model"] = embedding["model"] - if embedding.get("auto_embed_missing") is not None: - result["embedding_auto_embed_missing"] = embedding["auto_embed_missing"] - except (json.JSONDecodeError, OSError): - pass # Settings file not readable, continue with defaults - - # Load .env overrides from global ~/.codexlens/.env - env_overrides: Dict[str, str] = {} - try: - from codexlens.env_config import load_global_env - env_overrides = load_global_env() - except ImportError: - pass - - # Apply .env overrides (highest priority) and track them - if env_overrides.get("EMBEDDING_MODEL"): - result["embedding_model"] = env_overrides["EMBEDDING_MODEL"] - result["embedding_model_source"] = ".env" - if env_overrides.get("EMBEDDING_BACKEND"): - result["embedding_backend"] = env_overrides["EMBEDDING_BACKEND"] - result["embedding_backend_source"] = ".env" - auto_embed_missing_override = env_overrides.get("CODEXLENS_AUTO_EMBED_MISSING") or env_overrides.get("AUTO_EMBED_MISSING") - if auto_embed_missing_override: - result["embedding_auto_embed_missing"] = auto_embed_missing_override.lower() in ("true", "1", "yes", "on") - result["embedding_auto_embed_missing_source"] = ".env" - if env_overrides.get("RERANKER_MODEL"): - result["reranker_model"] = env_overrides["RERANKER_MODEL"] - result["reranker_model_source"] = ".env" - if env_overrides.get("RERANKER_BACKEND"): - result["reranker_backend"] = env_overrides["RERANKER_BACKEND"] - result["reranker_backend_source"] = ".env" - if env_overrides.get("RERANKER_ENABLED"): - result["reranker_enabled"] = env_overrides["RERANKER_ENABLED"].lower() in ("true", "1", "yes", "on") - result["reranker_enabled_source"] = ".env" - if env_overrides.get("RERANKER_PROVIDER") or os.getenv("RERANKER_PROVIDER"): - result["reranker_api_provider"] = env_overrides.get("RERANKER_PROVIDER") or os.getenv("RERANKER_PROVIDER") - - if json_mode: - print_json(success=True, result=result) - else: - console.print("[bold]CodexLens Configuration[/bold]") - console.print(f" Config File: {result['config_file']}") - console.print(f" Index Directory: {result['index_dir']}") - if result['env_override']: - console.print(f" [dim](Override via CODEXLENS_INDEX_DIR)[/dim]") - - # Show embedding settings - console.print(f"\n[bold]Embedding[/bold]") - backend = result.get('embedding_backend', 'fastembed') - backend_source = result.get('embedding_backend_source', 'settings.json') - console.print(f" Backend: {backend} [dim]({backend_source})[/dim]") - model = result.get('embedding_model', 'code') - model_source = result.get('embedding_model_source', 'settings.json') - console.print(f" Model: {model} [dim]({model_source})[/dim]") - auto_embed_missing = result.get("embedding_auto_embed_missing", True) - auto_embed_missing_source = result.get("embedding_auto_embed_missing_source", "settings.json") - console.print(f" Auto Embed Missing: {auto_embed_missing} [dim]({auto_embed_missing_source})[/dim]") - - # Show reranker settings - console.print(f"\n[bold]Reranker[/bold]") - backend = result.get('reranker_backend', 'fastembed') - backend_source = result.get('reranker_backend_source', 'settings.json') - console.print(f" Backend: {backend} [dim]({backend_source})[/dim]") - model = result.get('reranker_model', 'N/A') - model_source = result.get('reranker_model_source', 'settings.json') - console.print(f" Model: {model} [dim]({model_source})[/dim]") - enabled = result.get('reranker_enabled', False) - enabled_source = result.get('reranker_enabled_source', 'settings.json') - console.print(f" Enabled: {enabled} [dim]({enabled_source})[/dim]") - - elif action == "set": - if not key: - raise typer.BadParameter("Config key required for 'set' action") - if not value: - raise typer.BadParameter("Config value required for 'set' action") - - cfg = load_config() - - if key == "index_dir": - new_path = Path(value).expanduser().resolve() - cfg["index_dir"] = str(new_path) - save_config(cfg) - - if json_mode: - print_json(success=True, result={"key": key, "value": str(new_path)}) - else: - console.print(f"[green]Set {key}=[/green] {new_path}") - console.print("[yellow]Note: Existing indexes remain at old location. Use 'config migrate' to move them.[/yellow]") - - # Handle reranker and embedding settings (stored in settings.json) - elif key in ("reranker_backend", "reranker_model", "reranker_enabled", "reranker_top_k", - "embedding_backend", "embedding_model", "embedding_auto_embed_missing", "reranker_api_provider"): - settings_file = Path.home() / ".codexlens" / "settings.json" - settings_file.parent.mkdir(parents=True, exist_ok=True) - - # Load existing settings - settings: Dict[str, Any] = {} - if settings_file.exists(): - try: - settings = json.loads(settings_file.read_text(encoding="utf-8")) - except (json.JSONDecodeError, OSError): - pass - - # Ensure nested structures exist - if "reranker" not in settings: - settings["reranker"] = {} - if "embedding" not in settings: - settings["embedding"] = {} - - # Map flat keys to nested structure - if key == "reranker_backend": - settings["reranker"]["backend"] = value - elif key == "reranker_model": - settings["reranker"]["model"] = value - elif key == "reranker_enabled": - settings["reranker"]["enabled"] = value.lower() in ("true", "1", "yes") - elif key == "reranker_top_k": - settings["reranker"]["top_k"] = int(value) - elif key == "reranker_api_provider": - settings["reranker"]["api_provider"] = value - elif key == "embedding_backend": - settings["embedding"]["backend"] = value - elif key == "embedding_model": - settings["embedding"]["model"] = value - elif key == "embedding_auto_embed_missing": - settings["embedding"]["auto_embed_missing"] = value.lower() in ("true", "1", "yes", "on") - - # Save settings - settings_file.write_text(json.dumps(settings, indent=2), encoding="utf-8") - - if json_mode: - print_json(success=True, result={"key": key, "value": value}) - else: - console.print(f"[green]Set {key}=[/green] {value}") - else: - raise typer.BadParameter(f"Unknown config key: {key}") - - elif action == "migrate": - if not key: - raise typer.BadParameter("New path required for 'migrate' action") - - new_path = Path(key).expanduser().resolve() - mapper = PathMapper() - old_path = mapper.index_root - - if not old_path.exists(): - if json_mode: - print_json(success=False, error="No indexes to migrate") - else: - console.print("[yellow]No indexes to migrate.[/yellow]") - return - - # Create new directory - new_path.mkdir(parents=True, exist_ok=True) - - # Count items to migrate - items = list(old_path.iterdir()) - migrated = 0 - - with Progress( - SpinnerColumn(), - TextColumn("[progress.description]{task.description}"), - BarColumn(), - TextColumn("{task.completed}/{task.total}"), - TimeElapsedColumn(), - console=console, - ) as progress: - task = progress.add_task("Migrating indexes", total=len(items)) - - for item in items: - dest = new_path / item.name - if item.is_dir(): - shutil.copytree(item, dest, dirs_exist_ok=True) - else: - shutil.copy2(item, dest) - migrated += 1 - progress.advance(task) - - # Update config - cfg = load_config() - cfg["index_dir"] = str(new_path) - save_config(cfg) - - # Update registry paths - registry = RegistryStore() - registry.initialize() - registry.update_index_paths(old_path, new_path) - registry.close() - - result = { - "migrated_from": str(old_path), - "migrated_to": str(new_path), - "items_migrated": migrated, - } - - if json_mode: - print_json(success=True, result=result) - else: - console.print(f"[green]Migrated {migrated} items to:[/green] {new_path}") - console.print("[dim]Old indexes can be manually deleted after verifying migration.[/dim]") - - else: - raise typer.BadParameter(f"Unknown action: {action}. Use show, set, or migrate.") - - except typer.BadParameter: - raise - except ConfigError as exc: - if json_mode: - print_json(success=False, error=f"Configuration error: {exc}") - else: - console.print(f"[red]Config command failed (config):[/red] {exc}") - raise typer.Exit(code=1) - except StorageError as exc: - if json_mode: - print_json(success=False, error=f"Storage error: {exc}") - else: - console.print(f"[red]Config command failed (storage):[/red] {exc}") - raise typer.Exit(code=1) - except PermissionError as exc: - if json_mode: - print_json(success=False, error=f"Permission denied: {exc}") - else: - console.print(f"[red]Config command failed (permission denied):[/red] {exc}") - raise typer.Exit(code=1) - except CodexLensError as exc: - if json_mode: - print_json(success=False, error=str(exc)) - else: - console.print(f"[red]Config command failed:[/red] {exc}") - raise typer.Exit(code=1) - - -@app.command() -def migrate( - path: Path = typer.Argument(Path("."), exists=True, file_okay=False, dir_okay=True, help="Project root to migrate."), - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), - verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."), -) -> None: - """Migrate project indexes to latest schema (Dual-FTS upgrade). - - Upgrades all _index.db files in the project to schema version 4, which includes: - - Dual FTS tables (exact + fuzzy) - - Encoding detection support - - Incremental indexing metadata - - This is a safe operation that preserves all existing data. - Progress is shown during migration. - """ - _configure_logging(verbose, json_mode) - base_path = path.expanduser().resolve() - - registry: RegistryStore | None = None - try: - registry = RegistryStore() - registry.initialize() - mapper = PathMapper() - - # Find project - project_info = registry.get_project(base_path) - if not project_info: - raise CodexLensError(f"No index found for: {base_path}. Run 'codex-lens init' first.") - - index_dir = mapper.source_to_index_dir(base_path) - if not index_dir.exists(): - raise CodexLensError(f"Index directory not found: {index_dir}") - - # Find all _index.db files - index_files = list(index_dir.rglob("_index.db")) - - if not index_files: - if json_mode: - print_json(success=True, result={"message": "No indexes to migrate", "migrated": 0}) - else: - console.print("[yellow]No indexes found to migrate.[/yellow]") - return - - migrated_count = 0 - error_count = 0 - already_migrated = 0 - - with Progress( - SpinnerColumn(), - TextColumn("[progress.description]{task.description}"), - BarColumn(), - TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), - TextColumn("({task.completed}/{task.total})"), - TimeElapsedColumn(), - console=console, - ) as progress: - task = progress.add_task(f"Migrating {len(index_files)} indexes...", total=len(index_files)) - - for db_path in index_files: - try: - store = DirIndexStore(db_path) - - # Check current version - with store._lock: - conn = store._get_connection() - current_version = store._get_schema_version(conn) - - if current_version >= DirIndexStore.SCHEMA_VERSION: - already_migrated += 1 - if verbose: - progress.console.print(f"[dim]Already migrated: {db_path.parent.name}[/dim]") - elif current_version > 0: - # Apply migrations - store._apply_migrations(conn, current_version) - store._set_schema_version(conn, DirIndexStore.SCHEMA_VERSION) - conn.commit() - migrated_count += 1 - if verbose: - progress.console.print(f"[green]Migrated: {db_path.parent.name} (v{current_version} → v{DirIndexStore.SCHEMA_VERSION})[/green]") - else: - # New database, initialize directly - store.initialize() - migrated_count += 1 - - store.close() - - except Exception as e: - error_count += 1 - if verbose: - progress.console.print(f"[red]Error migrating {db_path}: {e}[/red]") - - progress.update(task, advance=1) - - result = { - "path": str(base_path), - "total_indexes": len(index_files), - "migrated": migrated_count, - "already_migrated": already_migrated, - "errors": error_count, - } - - if json_mode: - print_json(success=True, result=result) - else: - console.print(f"[green]Migration complete:[/green]") - console.print(f" Total indexes: {len(index_files)}") - console.print(f" Migrated: {migrated_count}") - console.print(f" Already up-to-date: {already_migrated}") - if error_count > 0: - console.print(f" [yellow]Errors: {error_count}[/yellow]") - - except StorageError as exc: - if json_mode: - print_json(success=False, error=f"Storage error: {exc}") - else: - console.print(f"[red]Migration failed (storage):[/red] {exc}") - raise typer.Exit(code=1) - except CodexLensError as exc: - if json_mode: - print_json(success=False, error=str(exc)) - else: - console.print(f"[red]Migration failed:[/red] {exc}") - raise typer.Exit(code=1) - finally: - if registry is not None: - registry.close() - - -@app.command() -def clean( - path: Optional[Path] = typer.Argument(None, help="Project path to clean (removes project index)."), - all_indexes: bool = typer.Option(False, "--all", "-a", help="Remove all indexes."), - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), - verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."), -) -> None: - """Remove CodexLens index data. - - Without arguments, shows current index size. - With path, removes that project's indexes. - With --all, removes all indexes (use with caution). - """ - _configure_logging(verbose, json_mode) - - try: - mapper = PathMapper() - index_root = mapper.index_root - - if all_indexes: - # Remove everything - if not index_root.exists(): - if json_mode: - print_json(success=True, result={"cleaned": None, "message": "No indexes to clean"}) - else: - console.print("[yellow]No indexes to clean.[/yellow]") - return - - # Calculate size before removal - total_size = 0 - for f in index_root.rglob("*"): - if f.is_file(): - total_size += f.stat().st_size - - # Remove registry first - registry_path = _get_registry_path() - if registry_path.exists(): - registry_path.unlink() - - # Remove all indexes - removal = _remove_tree_best_effort(index_root) - - result = { - "cleaned": str(index_root), - "size_freed_mb": round(total_size / (1024 * 1024), 2), - "partial": bool(removal["partial"]), - "locked_paths": removal["locked_paths"], - "remaining_path": removal["remaining_path"], - "errors": removal["errors"], - } - - if json_mode: - print_json(success=True, result=result) - else: - if result["partial"]: - console.print( - f"[yellow]Partially removed all indexes:[/yellow] {result['size_freed_mb']} MB freed" - ) - if result["locked_paths"]: - console.print( - f"[dim]Locked paths left behind: {len(result['locked_paths'])}[/dim]" - ) - else: - console.print(f"[green]Removed all indexes:[/green] {result['size_freed_mb']} MB freed") - - elif path: - # Remove specific project - project_path = path.expanduser().resolve() - project_index = mapper.source_to_index_dir(project_path) - - if not project_index.exists(): - if json_mode: - print_json(success=False, error=f"No index found for: {project_path}") - else: - console.print(f"[yellow]No index found for:[/yellow] {project_path}") - return - - # Calculate size - total_size = 0 - for f in project_index.rglob("*"): - if f.is_file(): - total_size += f.stat().st_size - - # Remove from registry - registry = RegistryStore() - registry.initialize() - registry.unregister_project(project_path) - registry.close() - - # Remove indexes - removal = _remove_tree_best_effort(project_index) - - result = { - "cleaned": str(project_path), - "index_path": str(project_index), - "size_freed_mb": round(total_size / (1024 * 1024), 2), - "partial": bool(removal["partial"]), - "locked_paths": removal["locked_paths"], - "remaining_path": removal["remaining_path"], - "errors": removal["errors"], - } - - if json_mode: - print_json(success=True, result=result) - else: - if result["partial"]: - console.print(f"[yellow]Partially removed indexes for:[/yellow] {project_path}") - if result["locked_paths"]: - console.print( - f"[dim]Locked paths left behind: {len(result['locked_paths'])}[/dim]" - ) - else: - console.print(f"[green]Removed indexes for:[/green] {project_path}") - console.print(f" Freed: {result['size_freed_mb']} MB") - - else: - # Show current status - if not index_root.exists(): - if json_mode: - print_json(success=True, result={"index_root": str(index_root), "exists": False}) - else: - console.print("[yellow]No indexes found.[/yellow]") - return - - total_size = 0 - for f in index_root.rglob("*"): - if f.is_file(): - total_size += f.stat().st_size - - registry = RegistryStore() - registry.initialize() - projects = registry.list_projects() - registry.close() - - result = { - "index_root": str(index_root), - "projects_count": len(projects), - "total_size_mb": round(total_size / (1024 * 1024), 2), - } - - if json_mode: - print_json(success=True, result=result) - else: - console.print("[bold]Index Status[/bold]") - console.print(f" Location: {result['index_root']}") - console.print(f" Projects: {result['projects_count']}") - console.print(f" Total Size: {result['total_size_mb']} MB") - console.print("\n[dim]Use 'clean ' to remove a specific project or 'clean --all' to remove everything.[/dim]") - - except StorageError as exc: - if json_mode: - print_json(success=False, error=f"Storage error: {exc}") - else: - console.print(f"[red]Clean failed (storage):[/red] {exc}") - raise typer.Exit(code=1) - except PermissionError as exc: - if json_mode: - print_json(success=False, error=f"Permission denied: {exc}") - else: - console.print(f"[red]Clean failed (permission denied):[/red] {exc}") - raise typer.Exit(code=1) - except CodexLensError as exc: - if json_mode: - print_json(success=False, error=str(exc)) - else: - console.print(f"[red]Clean failed:[/red] {exc}") - raise typer.Exit(code=1) - - -@app.command("semantic-list") -def semantic_list( - path: Path = typer.Option(Path("."), "--path", "-p", help="Project path to list metadata from."), - offset: int = typer.Option(0, "--offset", "-o", min=0, help="Number of records to skip."), - limit: int = typer.Option(50, "--limit", "-n", min=1, max=100, help="Maximum records to return."), - tool_filter: Optional[str] = typer.Option(None, "--tool", "-t", help="Filter by LLM tool (gemini/qwen)."), - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), - verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."), -) -> None: - """List semantic metadata entries for indexed files. - - Shows files that have LLM-generated summaries and keywords. - Results are aggregated from all index databases in the project. - """ - _configure_logging(verbose, json_mode) - base_path = path.expanduser().resolve() - - registry: Optional[RegistryStore] = None - try: - registry = RegistryStore() - registry.initialize() - mapper = PathMapper() - - project_info = registry.get_project(base_path) - if not project_info: - raise CodexLensError(f"No index found for: {base_path}. Run 'codex-lens init' first.") - - index_dir = Path(project_info.index_root) - if not index_dir.exists(): - raise CodexLensError(f"Index directory not found: {index_dir}") - - all_results: list = [] - total_count = 0 - - index_files = sorted(index_dir.rglob("_index.db")) - - for db_path in index_files: - try: - store = DirIndexStore(db_path) - store.initialize() - - results, count = store.list_semantic_metadata( - offset=0, - limit=1000, - llm_tool=tool_filter, - ) - - source_dir = mapper.index_to_source(db_path.parent) - for r in results: - r["source_dir"] = str(source_dir) - - all_results.extend(results) - total_count += count - - store.close() - except Exception as e: - if verbose: - console.print(f"[yellow]Warning: Error reading {db_path}: {e}[/yellow]") - - all_results.sort(key=lambda x: x["generated_at"], reverse=True) - paginated = all_results[offset : offset + limit] - - result = { - "path": str(base_path), - "total": total_count, - "offset": offset, - "limit": limit, - "count": len(paginated), - "entries": paginated, - } - - if json_mode: - print_json(success=True, result=result) - else: - if not paginated: - console.print("[yellow]No semantic metadata found.[/yellow]") - console.print("Run 'codex-lens enhance' to generate metadata for indexed files.") - else: - table = Table(title=f"Semantic Metadata ({total_count} total)") - table.add_column("File", style="cyan", max_width=40) - table.add_column("Language", style="dim") - table.add_column("Purpose", max_width=30) - table.add_column("Keywords", max_width=25) - table.add_column("Tool") - - for entry in paginated: - keywords_str = ", ".join(entry["keywords"][:3]) - if len(entry["keywords"]) > 3: - keywords_str += f" (+{len(entry['keywords']) - 3})" - - table.add_row( - entry["file_name"], - entry["language"] or "-", - (entry["purpose"] or "-")[:30], - keywords_str or "-", - entry["llm_tool"] or "-", - ) - - console.print(table) - - if total_count > len(paginated): - console.print( - f"[dim]Showing {offset + 1}-{offset + len(paginated)} of {total_count}. " - "Use --offset and --limit for pagination.[/dim]" - ) - - except StorageError as exc: - if json_mode: - print_json(success=False, error=f"Storage error: {exc}") - else: - console.print(f"[red]Semantic-list failed (storage):[/red] {exc}") - raise typer.Exit(code=1) - except CodexLensError as exc: - if json_mode: - print_json(success=False, error=str(exc)) - else: - console.print(f"[red]Semantic-list failed:[/red] {exc}") - raise typer.Exit(code=1) - finally: - if registry is not None: - registry.close() - - -# ==================== Model Management Commands ==================== - -@app.command(name="model-list") -def model_list( - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), -) -> None: - """List available embedding models and their installation status. - - Shows 4 model profiles (fast, code, multilingual, balanced) with: - - Installation status - - Model size and dimensions - - Use case recommendations - """ - try: - from codexlens.cli.model_manager import list_models - - result = list_models() - - if json_mode: - print_json(**result) - else: - if not result["success"]: - console.print(f"[red]Error:[/red] {result.get('error', 'Unknown error')}") - raise typer.Exit(code=1) - - data = result["result"] - models = data["models"] - cache_dir = data["cache_dir"] - cache_exists = data["cache_exists"] - - console.print("[bold]Available Embedding Models:[/bold]") - console.print(f"Cache directory: [dim]{cache_dir}[/dim] {'(exists)' if cache_exists else '(not found)'}\n") - - table = Table(show_header=True, header_style="bold") - table.add_column("Profile", style="cyan") - table.add_column("Model Name", style="blue") - table.add_column("Dims", justify="right") - table.add_column("Size (MB)", justify="right") - table.add_column("Status", justify="center") - table.add_column("Use Case", style="dim") - - for model in models: - status_icon = "[green]✓[/green]" if model["installed"] else "[dim]—[/dim]" - size_display = ( - f"{model['actual_size_mb']:.1f}" if model["installed"] - else f"~{model['estimated_size_mb']}" - ) - table.add_row( - model["profile"], - model["model_name"], - str(model["dimensions"]), - size_display, - status_icon, - model["use_case"][:40] + "..." if len(model["use_case"]) > 40 else model["use_case"], - ) - - console.print(table) - console.print("\n[dim]Use 'codexlens model-download ' to download a model[/dim]") - - except ImportError: - if json_mode: - print_json(success=False, error="fastembed not installed. Install with: pip install codexlens[semantic]") - else: - console.print("[red]Error:[/red] fastembed not installed") - console.print("[yellow]Install with:[/yellow] pip install codexlens[semantic]") - raise typer.Exit(code=1) - - -@app.command(name="model-download") -def model_download( - profile: str = typer.Argument(..., help="Model profile to download (fast, code, multilingual, balanced)."), - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), -) -> None: - """Download an embedding model by profile name. - - Example: - codexlens model-download code # Download code-optimized model - """ - try: - from codexlens.cli.model_manager import download_model - - if not json_mode: - console.print(f"[bold]Downloading model:[/bold] {profile}") - console.print("[dim]This may take a few minutes depending on your internet connection...[/dim]\n") - - # Create progress callback for non-JSON mode - progress_callback = None if json_mode else lambda msg: console.print(f"[cyan]{msg}[/cyan]") - - result = download_model(profile, progress_callback=progress_callback) - - if json_mode: - print_json(**result) - else: - if not result["success"]: - console.print(f"[red]Error:[/red] {result.get('error', 'Unknown error')}") - raise typer.Exit(code=1) - - data = result["result"] - console.print(f"[green]✓[/green] Model downloaded successfully!") - console.print(f" Profile: {data['profile']}") - console.print(f" Model: {data['model_name']}") - console.print(f" Cache size: {data['cache_size_mb']:.1f} MB") - console.print(f" Location: [dim]{data['cache_path']}[/dim]") - - except ImportError: - if json_mode: - print_json(success=False, error="fastembed not installed. Install with: pip install codexlens[semantic]") - else: - console.print("[red]Error:[/red] fastembed not installed") - console.print("[yellow]Install with:[/yellow] pip install codexlens[semantic]") - raise typer.Exit(code=1) - - -@app.command(name="model-delete") -def model_delete( - profile: str = typer.Argument(..., help="Model profile to delete (fast, code, multilingual, balanced)."), - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), -) -> None: - """Delete a downloaded embedding model from cache. - - Example: - codexlens model-delete fast # Delete fast model - """ - from codexlens.cli.model_manager import delete_model - - if not json_mode: - console.print(f"[bold yellow]Deleting model:[/bold yellow] {profile}") - - result = delete_model(profile) - - if json_mode: - print_json(**result) - else: - if not result["success"]: - console.print(f"[red]Error:[/red] {result.get('error', 'Unknown error')}") - raise typer.Exit(code=1) - - data = result["result"] - console.print(f"[green]✓[/green] Model deleted successfully!") - console.print(f" Profile: {data['profile']}") - console.print(f" Model: {data['model_name']}") - console.print(f" Freed space: {data['deleted_size_mb']:.1f} MB") - - -@app.command(name="model-download-custom") -def model_download_custom( - model_name: str = typer.Argument(..., help="Full HuggingFace model name (e.g., BAAI/bge-small-en-v1.5)."), - model_type: str = typer.Option("embedding", "--type", help="Model type: embedding or reranker."), - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), -) -> None: - """Download a custom HuggingFace model by name. - - This allows downloading any fastembed-compatible model from HuggingFace. - - Example: - codexlens model-download-custom BAAI/bge-small-en-v1.5 - codexlens model-download-custom BAAI/bge-reranker-base --type reranker - """ - try: - from codexlens.cli.model_manager import download_custom_model - - if not json_mode: - console.print(f"[bold]Downloading custom model:[/bold] {model_name}") - console.print(f"[dim]Model type: {model_type}[/dim]") - console.print("[dim]This may take a few minutes depending on your internet connection...[/dim]\n") - - progress_callback = None if json_mode else lambda msg: console.print(f"[cyan]{msg}[/cyan]") - - result = download_custom_model(model_name, model_type=model_type, progress_callback=progress_callback) - - if json_mode: - print_json(**result) - else: - if not result["success"]: - console.print(f"[red]Error:[/red] {result.get('error', 'Unknown error')}") - raise typer.Exit(code=1) - - data = result["result"] - console.print(f"[green]✓[/green] Custom model downloaded successfully!") - console.print(f" Model: {data['model_name']}") - console.print(f" Type: {data['model_type']}") - console.print(f" Cache size: {data['cache_size_mb']:.1f} MB") - console.print(f" Location: [dim]{data['cache_path']}[/dim]") - - except ImportError: - if json_mode: - print_json(success=False, error="fastembed not installed. Install with: pip install codexlens[semantic]") - else: - console.print("[red]Error:[/red] fastembed not installed") - console.print("[yellow]Install with:[/yellow] pip install codexlens[semantic]") - raise typer.Exit(code=1) - - -@app.command(name="model-info") -def model_info( - profile: str = typer.Argument(..., help="Model profile to get info (fast, code, multilingual, balanced)."), - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), -) -> None: - """Get detailed information about a model profile. - - Example: - codexlens model-info code # Get code model details - """ - from codexlens.cli.model_manager import get_model_info - - result = get_model_info(profile) - - if json_mode: - print_json(**result) - else: - if not result["success"]: - console.print(f"[red]Error:[/red] {result.get('error', 'Unknown error')}") - raise typer.Exit(code=1) - - data = result["result"] - console.print(f"[bold]Model Profile:[/bold] {data['profile']}") - console.print(f" Model name: {data['model_name']}") - console.print(f" Dimensions: {data['dimensions']}") - console.print(f" Status: {'[green]Installed[/green]' if data['installed'] else '[dim]Not installed[/dim]'}") - if data['installed'] and data['actual_size_mb']: - console.print(f" Cache size: {data['actual_size_mb']:.1f} MB") - console.print(f" Location: [dim]{data['cache_path']}[/dim]") - else: - console.print(f" Estimated size: ~{data['estimated_size_mb']} MB") - console.print(f"\n Description: {data['description']}") - console.print(f" Use case: {data['use_case']}") - - -# ==================== Reranker Model Management Commands ==================== - - -@app.command(name="reranker-model-list") -def reranker_model_list( - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), -) -> None: - """List available reranker models and their installation status. - - Shows reranker model profiles with: - - Installation status - - Model size - - Use case recommendations - """ - try: - from codexlens.cli.model_manager import list_reranker_models - - result = list_reranker_models() - - if json_mode: - print_json(**result) - else: - if not result["success"]: - console.print(f"[red]Error:[/red] {result.get('error', 'Unknown error')}") - raise typer.Exit(code=1) - - data = result["result"] - models = data["models"] - cache_dir = data["cache_dir"] - cache_exists = data["cache_exists"] - - console.print("[bold]Available Reranker Models:[/bold]") - console.print(f"Cache directory: [dim]{cache_dir}[/dim] {'(exists)' if cache_exists else '(not found)'}\n") - - table = Table(show_header=True, header_style="bold") - table.add_column("Profile", style="cyan") - table.add_column("Model", style="dim") - table.add_column("Size", justify="right") - table.add_column("Status") - table.add_column("Description") - - for m in models: - status = "[green]✓ Installed[/green]" if m["installed"] else "[dim]Not installed[/dim]" - size = f"{m['actual_size_mb']:.1f} MB" if m["installed"] and m["actual_size_mb"] else f"~{m['estimated_size_mb']} MB" - rec = " [yellow]★[/yellow]" if m.get("recommended") else "" - table.add_row(m["profile"] + rec, m["model_name"], size, status, m["description"]) - - console.print(table) - console.print("\n[yellow]★[/yellow] = Recommended") - - except ImportError: - if json_mode: - print_json(success=False, error="fastembed reranker not available. Install with: pip install fastembed>=0.4.0") - else: - console.print("[red]Error:[/red] fastembed reranker not available") - console.print("Install with: [cyan]pip install fastembed>=0.4.0[/cyan]") - raise typer.Exit(code=1) - - -@app.command(name="reranker-model-download") -def reranker_model_download( - profile: str = typer.Argument(..., help="Reranker model profile to download."), - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), -) -> None: - """Download a reranker model by profile name. - - Example: - codexlens reranker-model-download ms-marco-mini # Download default reranker - """ - try: - from codexlens.cli.model_manager import download_reranker_model - - if not json_mode: - console.print(f"[bold]Downloading reranker model:[/bold] {profile}") - console.print("[dim]This may take a few minutes depending on your internet connection...[/dim]\n") - - progress_callback = None if json_mode else lambda msg: console.print(f"[cyan]{msg}[/cyan]") - - result = download_reranker_model(profile, progress_callback=progress_callback) - - if json_mode: - print_json(**result) - else: - if not result["success"]: - console.print(f"[red]Error:[/red] {result.get('error', 'Unknown error')}") - raise typer.Exit(code=1) - - data = result["result"] - console.print(f"[green]✓[/green] Reranker model downloaded successfully!") - console.print(f" Profile: {data['profile']}") - console.print(f" Model: {data['model_name']}") - console.print(f" Cache size: {data['cache_size_mb']:.1f} MB") - console.print(f" Location: [dim]{data['cache_path']}[/dim]") - - except ImportError: - if json_mode: - print_json(success=False, error="fastembed reranker not available. Install with: pip install fastembed>=0.4.0") - else: - console.print("[red]Error:[/red] fastembed reranker not available") - console.print("Install with: [cyan]pip install fastembed>=0.4.0[/cyan]") - raise typer.Exit(code=1) - - -@app.command(name="reranker-model-delete") -def reranker_model_delete( - profile: str = typer.Argument(..., help="Reranker model profile to delete."), - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), -) -> None: - """Delete a downloaded reranker model from cache. - - Example: - codexlens reranker-model-delete ms-marco-mini # Delete reranker model - """ - from codexlens.cli.model_manager import delete_reranker_model - - if not json_mode: - console.print(f"[bold yellow]Deleting reranker model:[/bold yellow] {profile}") - - result = delete_reranker_model(profile) - - if json_mode: - print_json(**result) - else: - if not result["success"]: - console.print(f"[red]Error:[/red] {result.get('error', 'Unknown error')}") - raise typer.Exit(code=1) - - data = result["result"] - console.print(f"[green]✓[/green] Reranker model deleted successfully!") - console.print(f" Profile: {data['profile']}") - console.print(f" Model: {data['model_name']}") - console.print(f" Freed space: {data['deleted_size_mb']:.1f} MB") - - -@app.command(name="reranker-model-info") -def reranker_model_info( - profile: str = typer.Argument(..., help="Reranker model profile to get info."), - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), -) -> None: - """Get detailed information about a reranker model profile. - - Example: - codexlens reranker-model-info ms-marco-mini # Get reranker model details - """ - from codexlens.cli.model_manager import get_reranker_model_info - - result = get_reranker_model_info(profile) - - if json_mode: - print_json(**result) - else: - if not result["success"]: - console.print(f"[red]Error:[/red] {result.get('error', 'Unknown error')}") - raise typer.Exit(code=1) - - data = result["result"] - console.print(f"[bold]Reranker Model Profile:[/bold] {data['profile']}") - console.print(f" Model name: {data['model_name']}") - console.print(f" Status: {'[green]Installed[/green]' if data['installed'] else '[dim]Not installed[/dim]'}") - if data['installed'] and data['actual_size_mb']: - console.print(f" Cache size: {data['actual_size_mb']:.1f} MB") - console.print(f" Location: [dim]{data['cache_path']}[/dim]") - else: - console.print(f" Estimated size: ~{data['estimated_size_mb']} MB") - console.print(f" Recommended: {'[green]Yes[/green]' if data.get('recommended') else '[dim]No[/dim]'}") - console.print(f"\n Description: {data['description']}") - console.print(f" Use case: {data['use_case']}") - - -# ==================== Embedding Management Commands ==================== - -@app.command(name="embeddings-status", hidden=True, deprecated=True) -def embeddings_status( - path: Optional[Path] = typer.Argument( - None, - exists=True, - help="Path to specific _index.db file or directory containing indexes. If not specified, uses default index root.", - ), - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), -) -> None: - """[Deprecated] Use 'codexlens index status' instead. - - Check embedding status for one or all indexes. - - Shows embedding statistics including: - - Number of chunks generated - - File coverage percentage - - Files missing embeddings - - Examples: - codexlens embeddings-status # Check all indexes - codexlens embeddings-status ~/.codexlens/indexes/project/_index.db # Check specific index - codexlens embeddings-status ~/projects/my-app # Check project (auto-finds index) - """ - _deprecated_command_warning("embeddings-status", "index status") - from codexlens.cli.embedding_manager import get_embedding_stats_summary, get_embeddings_status - - # Determine what to check - if path is None: - # Check all indexes in default root - index_root = _get_index_root() - result = get_embedding_stats_summary(index_root) - - if json_mode: - print_json(**result) - else: - if not result["success"]: - console.print(f"[red]Error:[/red] {result.get('error', 'Unknown error')}") - raise typer.Exit(code=1) - - data = result["result"] - total = data["total_indexes"] - with_emb = data["indexes_with_embeddings"] - total_chunks = data["total_chunks"] - - console.print(f"[bold]Embedding Status Summary[/bold]") - console.print(f"Index root: [dim]{index_root}[/dim]\n") - console.print(f"Total indexes: {total}") - console.print(f"Indexes with embeddings: [{'green' if with_emb > 0 else 'yellow'}]{with_emb}[/]/{total}") - console.print(f"Total chunks: {total_chunks:,}\n") - - if data["indexes"]: - table = Table(show_header=True, header_style="bold") - table.add_column("Project", style="cyan") - table.add_column("Files", justify="right") - table.add_column("Chunks", justify="right") - table.add_column("Coverage", justify="right") - table.add_column("Status", justify="center") - - for idx_stat in data["indexes"]: - status_icon = "[green]✓[/green]" if idx_stat["has_embeddings"] else "[dim]—[/dim]" - coverage = f"{idx_stat['coverage_percent']:.1f}%" if idx_stat["has_embeddings"] else "—" - - table.add_row( - idx_stat["project"], - str(idx_stat["total_files"]), - f"{idx_stat['total_chunks']:,}" if idx_stat["has_embeddings"] else "0", - coverage, - status_icon, - ) - - console.print(table) - - else: - # Check specific index or find index for project - target_path = path.expanduser().resolve() - - if target_path.is_file() and target_path.name == "_index.db": - # Direct index file - index_path = target_path - elif target_path.is_dir(): - # Try to find index for this project - registry = RegistryStore() - try: - registry.initialize() - mapper = PathMapper() - index_path = mapper.source_to_index_db(target_path) - - if not index_path.exists(): - console.print(f"[red]Error:[/red] No index found for {target_path}") - console.print("Run 'codexlens init' first to create an index") - raise typer.Exit(code=1) - finally: - registry.close() - else: - console.print(f"[red]Error:[/red] Path must be _index.db file or directory") - raise typer.Exit(code=1) - - result = check_index_embeddings(index_path) - - if json_mode: - print_json(**result) - else: - if not result["success"]: - console.print(f"[red]Error:[/red] {result.get('error', 'Unknown error')}") - raise typer.Exit(code=1) - - data = result["result"] - has_emb = data["has_embeddings"] - - console.print(f"[bold]Embedding Status[/bold]") - console.print(f"Index: [dim]{data['index_path']}[/dim]\n") - - if has_emb: - console.print(f"[green]✓[/green] Embeddings available") - console.print(f" Total chunks: {data['total_chunks']:,}") - console.print(f" Total files: {data['total_files']:,}") - console.print(f" Files with embeddings: {data['files_with_chunks']:,}/{data['total_files']}") - console.print(f" Coverage: {data['coverage_percent']:.1f}%") - - if data["files_without_chunks"] > 0: - console.print(f"\n[yellow]Warning:[/yellow] {data['files_without_chunks']} files missing embeddings") - if data["missing_files_sample"]: - console.print(" Sample missing files:") - for file in data["missing_files_sample"]: - console.print(f" [dim]{file}[/dim]") - else: - console.print(f"[yellow]—[/yellow] No embeddings found") - console.print(f" Total files indexed: {data['total_files']:,}") - console.print("\n[dim]Generate embeddings with:[/dim]") - console.print(f" [cyan]codexlens embeddings-generate {index_path}[/cyan]") - - -@index_app.command("embeddings") -def index_embeddings( - path: Path = typer.Argument( - ..., - exists=True, - help="Path to _index.db file or project directory.", - ), - backend: str = typer.Option( - "fastembed", - "--backend", - "-b", - help="Embedding backend: fastembed (local) or litellm (remote API).", - ), - model: str = typer.Option( - "code", - "--model", - "-m", - help="Model: profile name for fastembed (fast/code/multilingual/balanced) or model name for litellm (e.g. text-embedding-3-small).", - ), - force: bool = typer.Option( - False, - "--force", - "-f", - help="Force regeneration even if embeddings exist.", - ), - chunk_size: int = typer.Option( - 2000, - "--chunk-size", - help="Maximum chunk size in characters.", - ), - max_workers: int = typer.Option( - 1, - "--max-workers", - "-w", - min=1, - help="Max concurrent API calls. Recommended: 4-8 for litellm backend. Default: 1 (sequential).", - ), - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), - verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose output."), - centralized: bool = typer.Option( - False, - "--centralized", - "-c", - help="Use centralized vector storage (default).", - ), - distributed: bool = typer.Option( - False, - "--distributed", - "-d", - help="Use distributed per-directory indexes.", - ), -) -> None: - """Generate semantic embeddings for code search. - - Creates vector embeddings for all files in an index to enable - semantic search capabilities. Embeddings are stored in the same - database as the FTS index. - - Storage Modes: - - Default: Per-directory HNSW indexes alongside _index.db files - - Centralized: Single HNSW index at project root (_vectors.hnsw) - - Embedding Backend Options: - - fastembed: Local ONNX-based embeddings (default, no API calls) - - litellm: Remote API embeddings via ccw-litellm (requires API keys) - - Model Options: - For fastembed backend (profiles): - - fast: BAAI/bge-small-en-v1.5 (384 dims, ~80MB) - - code: jinaai/jina-embeddings-v2-base-code (768 dims, ~150MB) [recommended] - - multilingual: intfloat/multilingual-e5-large (1024 dims, ~1GB) - - balanced: mixedbread-ai/mxbai-embed-large-v1 (1024 dims, ~600MB) - - For litellm backend (model names): - - text-embedding-3-small, text-embedding-3-large (OpenAI) - - text-embedding-ada-002 (OpenAI legacy) - - Any model supported by ccw-litellm - - Examples: - codexlens index embeddings ~/projects/my-app # Auto-find index (fastembed, code profile) - codexlens index embeddings ~/.codexlens/indexes/project/_index.db # Specific index - codexlens index embeddings ~/projects/my-app --backend litellm --model text-embedding-3-small # Use LiteLLM - codexlens index embeddings ~/projects/my-app --model fast --force # Regenerate with fast profile - codexlens index embeddings ~/projects/my-app --centralized # Centralized vector storage - """ - _configure_logging(verbose, json_mode) - if centralized and distributed: - _fail_mutually_exclusive("--centralized", "--distributed", json_mode) - use_centralized = not distributed - - from codexlens.cli.embedding_manager import ( - generate_embeddings, - generate_dense_embeddings_centralized, - scan_for_model_conflicts, - check_global_model_lock, - set_locked_model_config, - ) - - # Validate backend - valid_backends = ["fastembed", "litellm"] - if backend not in valid_backends: - error_msg = f"Invalid backend: {backend}. Must be one of: {', '.join(valid_backends)}" - if json_mode: - print_json(success=False, error=error_msg) - else: - console.print(f"[red]Error:[/red] {error_msg}") - console.print(f"[dim]Valid backends: {', '.join(valid_backends)}[/dim]") - raise typer.Exit(code=1) - - # Resolve path - target_path = path.expanduser().resolve() - - # Determine index path or root for centralized mode - index_path = None - index_root = None - - if target_path.is_file() and target_path.name == "_index.db": - # Direct index file - index_path = target_path - index_root = target_path.parent - elif target_path.is_dir(): - # Directory: Find index location from registry - registry = RegistryStore() - try: - registry.initialize() - mapper = PathMapper() - index_path = mapper.source_to_index_db(target_path) - - if not index_path.exists(): - console.print(f"[red]Error:[/red] No index found for {target_path}") - console.print("Run 'codexlens init' first to create an index") - raise typer.Exit(code=1) - index_root = index_path.parent # Use index directory for both modes - finally: - registry.close() - else: - console.print(f"[red]Error:[/red] Path must be _index.db file or directory") - raise typer.Exit(code=1) - - # Progress callback - def progress_update(msg: str): - if not json_mode and verbose: - console.print(f" {msg}") - - console.print(f"[bold]Generating embeddings[/bold]") - if centralized: - effective_root = index_root if index_root else (index_path.parent if index_path else target_path) - console.print(f"Index root: [dim]{effective_root}[/dim]") - console.print(f"Mode: [green]Centralized[/green]") - else: - console.print(f"Index: [dim]{index_path}[/dim]") - console.print(f"Backend: [cyan]{backend}[/cyan]") - console.print(f"Model: [cyan]{model}[/cyan]") - if max_workers > 1: - console.print(f"Concurrency: [cyan]{max_workers} workers[/cyan]") - console.print() - - # Check global model lock (prevents mixing different models) - if not force: - lock_result = check_global_model_lock(backend, model) - if lock_result["has_conflict"]: - locked = lock_result["locked_config"] - if json_mode: - print_json( - success=False, - error="Global model lock conflict", - code="MODEL_LOCKED", - locked_config=locked, - target_config=lock_result["target_config"], - hint="Use --force to override the lock and switch to a different model (will regenerate all embeddings)", - ) - raise typer.Exit(code=1) - else: - console.print("[red]⛔ Global Model Lock Active[/red]") - console.print(f" Locked model: [cyan]{locked['backend']}/{locked['model']}[/cyan]") - console.print(f" Requested: [yellow]{backend}/{model}[/yellow]") - console.print(f" Locked at: {locked.get('locked_at', 'unknown')}") - console.print() - console.print("[dim]All indexes must use the same embedding model.[/dim]") - console.print("[dim]Use --force to switch models (will regenerate all embeddings).[/dim]") - raise typer.Exit(code=1) - - # Pre-check for model conflicts (only if not forcing) - if not force: - # Determine the index root for conflict scanning - scan_root = index_root if index_root else (index_path.parent if index_path else None) - - if scan_root: - conflict_result = scan_for_model_conflicts(scan_root, backend, model) - - if conflict_result["has_conflict"]: - existing = conflict_result["existing_config"] - conflict_count = len(conflict_result["conflicts"]) - - if json_mode: - # JSON mode: return structured error for UI handling - print_json( - success=False, - error="Model conflict detected", - code="MODEL_CONFLICT", - existing_config=existing, - target_config=conflict_result["target_config"], - conflict_count=conflict_count, - conflicts=conflict_result["conflicts"][:5], # Show first 5 conflicts - hint="Use --force to overwrite existing embeddings with the new model", - ) - raise typer.Exit(code=1) - else: - # Interactive mode: show warning and ask for confirmation - console.print("[yellow]⚠ Model Conflict Detected[/yellow]") - console.print(f" Existing: [red]{existing['backend']}/{existing['model']}[/red] ({existing.get('embedding_dim', '?')} dim)") - console.print(f" Requested: [green]{backend}/{model}[/green]") - console.print(f" Affected indexes: [yellow]{conflict_count}[/yellow]") - console.print() - console.print("[dim]Mixing different embedding models in the same index is not supported.[/dim]") - console.print("[dim]Overwriting will delete all existing embeddings and regenerate with the new model.[/dim]") - console.print() - - # Ask for confirmation - if typer.confirm("Overwrite existing embeddings with the new model?", default=False): - force = True - console.print("[green]Confirmed.[/green] Proceeding with overwrite...\n") - else: - console.print("[yellow]Cancelled.[/yellow] Use --force to skip this prompt.") - raise typer.Exit(code=0) - - if use_centralized: - # Centralized mode: single HNSW index at project root - if not index_root: - index_root = index_path.parent if index_path else target_path - result = generate_dense_embeddings_centralized( - index_root, - embedding_backend=backend, - model_profile=model, - force=force, - chunk_size=chunk_size, - progress_callback=progress_update, - max_workers=max_workers, - ) - else: - result = generate_embeddings( - index_path, - embedding_backend=backend, - model_profile=model, - force=force, - chunk_size=chunk_size, - progress_callback=progress_update, - max_workers=max_workers, - ) - - if json_mode: - print_json(**result) - else: - if not result["success"]: - error_msg = _extract_embedding_error(result) - console.print(f"[red]Error:[/red] {error_msg}") - - # Provide helpful hints - if "already has" in error_msg: - console.print("\n[dim]Use --force to regenerate existing embeddings[/dim]") - elif "fastembed not available" in error_msg or "Semantic search not available" in error_msg: - console.print("\n[dim]Install semantic dependencies:[/dim]") - console.print(" [cyan]pip install codexlens[semantic][/cyan]") - elif "ccw-litellm not available" in error_msg: - console.print("\n[dim]Install LiteLLM backend dependencies:[/dim]") - console.print(" [cyan]pip install ccw-litellm[/cyan]") - - raise typer.Exit(code=1) - - data = result["result"] - - # Set global model lock after successful generation - # This prevents using different models for future indexes - set_locked_model_config(backend, model) - - if centralized: - # Centralized mode output - elapsed = data.get("elapsed_time", 0) - console.print(f"[green]v[/green] Centralized embeddings generated successfully!") - console.print(f" Model: {data.get('model_name', model)}") - console.print(f" Chunks created: {data['chunks_created']:,}") - console.print(f" Files processed: {data['files_processed']}") - if data.get("files_failed", 0) > 0: - console.print(f" [yellow]Files failed: {data['files_failed']}[/yellow]") - console.print(f" Central index: {data.get('central_index_path', 'N/A')}") - console.print(f" Time: {elapsed:.1f}s") - else: - # Single index mode output - elapsed = data["elapsed_time"] - - console.print(f"[green]v[/green] Embeddings generated successfully!") - console.print(f" Model: {data['model_name']}") - console.print(f" Chunks created: {data['chunks_created']:,}") - console.print(f" Files processed: {data['files_processed']}") - - if data["files_failed"] > 0: - console.print(f" [yellow]Files failed: {data['files_failed']}[/yellow]") - if data["failed_files"]: - console.print(" [dim]First failures:[/dim]") - for file_path, error in data["failed_files"]: - console.print(f" [dim]{file_path}: {error}[/dim]") - - console.print(f" Time: {elapsed:.1f}s") - - console.print("\n[dim]Use vector search with:[/dim]") - console.print(" [cyan]codexlens search 'your query' --mode pure-vector[/cyan]") - - -# ==================== GPU Management Commands ==================== - -@app.command(name="gpu-list") -def gpu_list( - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), -) -> None: - """List available GPU devices for embedding acceleration. - - Shows all detected GPU devices with their capabilities and selection status. - Discrete GPUs (NVIDIA, AMD) are automatically preferred over integrated GPUs. - - Examples: - codexlens gpu-list # List all GPUs - codexlens gpu-list --json # JSON output for scripting - """ - from codexlens.semantic.gpu_support import get_gpu_devices, detect_gpu, get_selected_device_id - - gpu_info = detect_gpu() - devices = get_gpu_devices() - selected_id = get_selected_device_id() - - if json_mode: - print_json( - success=True, - result={ - "devices": devices, - "selected_device_id": selected_id, - "gpu_available": gpu_info.gpu_available, - "providers": gpu_info.onnx_providers, - } - ) - else: - if not devices: - console.print("[yellow]No GPU devices detected[/yellow]") - console.print(f"ONNX Providers: [dim]{', '.join(gpu_info.onnx_providers)}[/dim]") - return - - console.print("[bold]Available GPU Devices[/bold]\n") - - table = Table(show_header=True, header_style="bold") - table.add_column("ID", justify="center") - table.add_column("Name") - table.add_column("Vendor", justify="center") - table.add_column("Type", justify="center") - table.add_column("Status", justify="center") - - for dev in devices: - type_str = "[green]Discrete[/green]" if dev["is_discrete"] else "[dim]Integrated[/dim]" - vendor_color = { - "nvidia": "green", - "amd": "red", - "intel": "blue" - }.get(dev["vendor"], "white") - vendor_str = f"[{vendor_color}]{dev['vendor'].upper()}[/{vendor_color}]" - - status_parts = [] - if dev["is_preferred"]: - status_parts.append("[cyan]Auto[/cyan]") - if dev["is_selected"]: - status_parts.append("[green]✓ Selected[/green]") - - status_str = " ".join(status_parts) if status_parts else "[dim]—[/dim]" - - table.add_row( - str(dev["device_id"]), - dev["name"], - vendor_str, - type_str, - status_str, - ) - - console.print(table) - console.print(f"\nONNX Providers: [dim]{', '.join(gpu_info.onnx_providers)}[/dim]") - console.print("\n[dim]Select GPU with:[/dim]") - console.print(" [cyan]codexlens gpu-select [/cyan]") - - -@app.command(name="gpu-select") -def gpu_select( - device_id: int = typer.Argument( - ..., - help="GPU device ID to use for embeddings. Use 'codexlens gpu-list' to see available IDs.", - ), - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), -) -> None: - """Select a specific GPU device for embedding generation. - - By default, CodexLens automatically selects the most powerful GPU (discrete over integrated). - Use this command to override the selection. - - Examples: - codexlens gpu-select 1 # Use GPU device 1 - codexlens gpu-select 0 --json # Select GPU 0 with JSON output - """ - from codexlens.semantic.gpu_support import set_selected_device_id, get_gpu_devices - from codexlens.semantic.embedder import clear_embedder_cache - - devices = get_gpu_devices() - valid_ids = [dev["device_id"] for dev in devices] - - if device_id not in valid_ids: - if json_mode: - print_json(success=False, error=f"Invalid device_id {device_id}. Valid IDs: {valid_ids}") - else: - console.print(f"[red]Error:[/red] Invalid device_id {device_id}") - console.print(f"Valid IDs: {valid_ids}") - console.print("\n[dim]Use 'codexlens gpu-list' to see available devices[/dim]") - raise typer.Exit(code=1) - - success = set_selected_device_id(device_id) - - if success: - # Clear embedder cache to force reload with new GPU - clear_embedder_cache() - - device_name = next((dev["name"] for dev in devices if dev["device_id"] == device_id), "Unknown") - - if json_mode: - print_json( - success=True, - result={ - "device_id": device_id, - "device_name": device_name, - "message": f"GPU selection set to device {device_id}: {device_name}", - } - ) - else: - console.print(f"[green]✓[/green] GPU selection updated") - console.print(f" Device ID: {device_id}") - console.print(f" Device: [cyan]{device_name}[/cyan]") - console.print("\n[dim]New embeddings will use this GPU[/dim]") - else: - if json_mode: - print_json(success=False, error="Failed to set GPU selection") - else: - console.print("[red]Error:[/red] Failed to set GPU selection") - raise typer.Exit(code=1) - - -@app.command(name="gpu-reset") -def gpu_reset( - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), -) -> None: - """Reset GPU selection to automatic detection. - - Clears any manual GPU selection and returns to automatic selection - (discrete GPU preferred over integrated). - - Examples: - codexlens gpu-reset # Reset to auto-detection - """ - from codexlens.semantic.gpu_support import set_selected_device_id, detect_gpu - from codexlens.semantic.embedder import clear_embedder_cache - - set_selected_device_id(None) - clear_embedder_cache() - - gpu_info = detect_gpu(force_refresh=True) - - if json_mode: - print_json( - success=True, - result={ - "message": "GPU selection reset to auto-detection", - "preferred_device_id": gpu_info.preferred_device_id, - "preferred_device_name": gpu_info.gpu_name, - } - ) - else: - console.print("[green]✓[/green] GPU selection reset to auto-detection") - if gpu_info.preferred_device_id is not None: - console.print(f" Auto-selected device: {gpu_info.preferred_device_id}") - console.print(f" Device: [cyan]{gpu_info.gpu_name}[/cyan]") - - - - - - - -# ==================== Watch Command ==================== - -@app.command() -def watch( - path: Path = typer.Argument(Path("."), exists=True, file_okay=False, dir_okay=True, help="Project root to watch."), - language: Optional[List[str]] = typer.Option(None, "--language", "-l", help="Languages to watch (comma-separated)."), - debounce: int = typer.Option(1000, "--debounce", "-d", min=100, max=10000, help="Debounce interval in milliseconds."), - verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."), -) -> None: - """Watch a directory for file changes and incrementally update the index. - - Monitors the specified directory for file system changes (create, modify, delete) - and automatically updates the CodexLens index. The directory must already be indexed - using 'codexlens init' before watching. - - Examples: - # Watch current directory - codexlens watch . - - # Watch with custom debounce interval - codexlens watch . --debounce 2000 - - # Watch only Python and JavaScript files - codexlens watch . --language python,javascript - - Press Ctrl+C to stop watching. - """ - _configure_logging(verbose) - watch_path = path.expanduser().resolve() - - registry: RegistryStore | None = None - try: - # Validate that path is indexed - registry = RegistryStore() - registry.initialize() - mapper = PathMapper() - - project_record = registry.find_by_source_path(str(watch_path)) - if not project_record: - console.print(f"[red]Error:[/red] Directory is not indexed: {watch_path}") - console.print("[dim]Run 'codexlens init' first to create an index.[/dim]") - raise typer.Exit(code=1) - - # Parse languages - languages = _parse_languages(language) - - # Create watcher config - watcher_config = WatcherConfig( - debounce_ms=debounce, - languages=languages, - ) - - # Display startup message - console.print(f"[green]Starting watcher for:[/green] {watch_path}") - console.print(f"[dim]Debounce interval: {debounce}ms[/dim]") - if languages: - console.print(f"[dim]Watching languages: {', '.join(languages)}[/dim]") - console.print("[dim]Press Ctrl+C to stop[/dim]\n") - - # Create and start watcher manager - watch_config = Config.load() - manager = WatcherManager( - root_path=watch_path, - config=watch_config, - watcher_config=watcher_config, - on_indexed=lambda result: _display_index_result(result), - ) - - manager.start() - manager.wait() - - except KeyboardInterrupt: - console.print("\n[yellow]Stopping watcher...[/yellow]") - except CodexLensError as exc: - console.print(f"[red]Watch failed:[/red] {exc}") - raise typer.Exit(code=1) - except Exception as exc: - console.print(f"[red]Unexpected error:[/red] {exc}") - raise typer.Exit(code=1) - finally: - if registry is not None: - registry.close() - - -def _display_index_result(result) -> None: - """Display indexing result in real-time.""" - if result.files_indexed > 0 or result.files_removed > 0: - parts = [] - if result.files_indexed > 0: - parts.append(f"[green]✓ Indexed {result.files_indexed} file(s)[/green]") - if result.files_removed > 0: - parts.append(f"[yellow]✗ Removed {result.files_removed} file(s)[/yellow]") - console.print(" | ".join(parts)) - - if result.errors: - for error in result.errors[:3]: # Show max 3 errors - console.print(f" [red]Error:[/red] {error}") - if len(result.errors) > 3: - console.print(f" [dim]... and {len(result.errors) - 3} more errors[/dim]") - - - -# ==================== Cascade Index Commands ==================== - - -def get_binary_index_path(db_path: Path) -> Path: - """Get the path for binary ANN index file. - - Args: - db_path: Path to the _index.db file - - Returns: - Path to the binary index file (_index_binary.bin) - """ - return db_path.parent / f"{db_path.stem}_binary.bin" - - -@index_app.command("binary") -def index_binary( - path: Annotated[Path, typer.Argument(help="Directory to index")], - force: Annotated[bool, typer.Option("--force", "-f", help="Force regenerate")] = False, - batch_size: Annotated[int, typer.Option("--batch-size", "-b", help="Batch size for embedding")] = 32, - json_mode: Annotated[bool, typer.Option("--json", help="Output JSON response")] = False, - verbose: Annotated[bool, typer.Option("--verbose", "-v", help="Enable verbose logging")] = False, -) -> None: - """Generate cascade embeddings (binary + dense) for two-stage retrieval. - - Cascade retrieval uses a two-stage approach: - 1. Binary search (fast, 32 bytes/vector) -> coarse filtering - 2. Dense rerank (precise, 8KB/vector) -> final results - - This command: - - Finds all _index.db files in the directory - - Generates binary (256-dim) and dense (2048-dim) embeddings for each chunk - - Stores embeddings in the database (embedding_binary, embedding_dense columns) - - Creates a BinaryANNIndex file for fast coarse retrieval - - Examples: - codexlens index binary ~/projects/my-app - codexlens index binary . --force - codexlens index binary . --batch-size 64 --verbose - """ - _configure_logging(verbose, json_mode) - - target_path = path.expanduser().resolve() - - # Find index database(s) - if target_path.is_file() and target_path.name == "_index.db": - index_dbs = [target_path] - elif target_path.is_dir(): - # Check local .codexlens/_index.db first - local_index = target_path / ".codexlens" / "_index.db" - if local_index.exists(): - index_dbs = [local_index] - else: - # Find via registry - registry = RegistryStore() - try: - registry.initialize() - mapper = PathMapper() - index_db = mapper.source_to_index_db(target_path) - if not index_db.exists(): - if json_mode: - print_json(success=False, error=f"No index found for {target_path}") - else: - console.print(f"[red]Error:[/red] No index found for {target_path}") - console.print("Run 'codexlens init' first to create an index") - raise typer.Exit(code=1) - # Find all _index.db files under the index root - index_root = index_db.parent - index_dbs = list(index_root.rglob("_index.db")) - finally: - registry.close() - else: - if json_mode: - print_json(success=False, error="Path must be _index.db file or indexed directory") - else: - console.print("[red]Error:[/red] Path must be _index.db file or indexed directory") - raise typer.Exit(code=1) - - if not index_dbs: - if json_mode: - print_json(success=False, error="No index databases found") - else: - console.print("[yellow]No index databases found[/yellow]") - raise typer.Exit(code=1) - - # Import cascade embedding backend - try: - from codexlens.indexing.embedding import CascadeEmbeddingBackend - from codexlens.semantic.ann_index import BinaryANNIndex - from codexlens.indexing.embedding import pack_binary_embedding - except ImportError as e: - error_msg = f"Cascade embedding dependencies not available: {e}" - if json_mode: - print_json(success=False, error=error_msg) - else: - console.print(f"[red]Error:[/red] {error_msg}") - console.print("[dim]Install with: pip install codexlens[semantic][/dim]") - raise typer.Exit(code=1) - - if not json_mode: - console.print(f"[bold]Generating cascade embeddings[/bold]") - console.print(f"Path: [dim]{target_path}[/dim]") - console.print(f"Index databases: [cyan]{len(index_dbs)}[/cyan]") - console.print(f"Batch size: [cyan]{batch_size}[/cyan]") - console.print() - - # Initialize cascade embedding backend - try: - cascade_backend = CascadeEmbeddingBackend() - except Exception as e: - error_msg = f"Failed to initialize cascade embedding backend: {e}" - if json_mode: - print_json(success=False, error=error_msg) - else: - console.print(f"[red]Error:[/red] {error_msg}") - raise typer.Exit(code=1) - - # Process statistics - total_chunks_processed = 0 - total_indexes_processed = 0 - total_indexes_successful = 0 - total_binary_indexes_created = 0 - errors_list: List[str] = [] - - # Process each index database - with Progress( - SpinnerColumn(), - TextColumn("[progress.description]{task.description}"), - BarColumn(), - TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), - TextColumn("({task.completed}/{task.total})"), - TimeElapsedColumn(), - console=console, - disable=json_mode, - ) as progress: - db_task = progress.add_task("Processing indexes...", total=len(index_dbs)) - - for db_path in index_dbs: - total_indexes_processed += 1 - index_name = db_path.parent.name - - try: - # Open the index store - store = DirIndexStore(db_path) - store.initialize() - - # Get connection for direct queries - conn = store._get_connection() - - # Ensure cascade columns exist in semantic_chunks table - try: - conn.execute("ALTER TABLE semantic_chunks ADD COLUMN embedding_binary BLOB") - except Exception: - pass # Column already exists - try: - conn.execute("ALTER TABLE semantic_chunks ADD COLUMN embedding_dense BLOB") - except Exception: - pass # Column already exists - conn.commit() - - # Check if semantic_chunks table exists and has data - try: - cursor = conn.execute("SELECT COUNT(*) FROM semantic_chunks") - chunk_count = cursor.fetchone()[0] - except Exception: - # semantic_chunks table doesn't exist or is empty - chunk_count = 0 - - if chunk_count == 0: - if verbose and not json_mode: - console.print(f" [dim]Skipping {index_name}: no chunks found[/dim]") - progress.advance(db_task) - store.close() - continue - - # Check if embeddings already exist (unless force) - if not force: - cursor = conn.execute( - "SELECT COUNT(*) FROM semantic_chunks WHERE embedding_binary IS NOT NULL" - ) - existing_count = cursor.fetchone()[0] - if existing_count > 0: - if verbose and not json_mode: - console.print(f" [dim]Skipping {index_name}: embeddings exist (use --force to regenerate)[/dim]") - progress.advance(db_task) - store.close() - continue - - # If force, clear existing cascade embeddings - if force: - conn.execute( - "UPDATE semantic_chunks SET embedding_binary = NULL, embedding_dense = NULL" - ) - conn.commit() - - # Get all chunks - cursor = conn.execute("SELECT id, content FROM semantic_chunks") - chunks = cursor.fetchall() - - if not chunks: - progress.advance(db_task) - store.close() - continue - - if verbose and not json_mode: - console.print(f" Processing {index_name}: {len(chunks)} chunks") - - # Process in batches - chunk_task = progress.add_task( - f" {index_name}", total=len(chunks) - ) - - # Prepare for BinaryANNIndex - binary_index_path = get_binary_index_path(db_path) - binary_ann_index = BinaryANNIndex(db_path, dim=256) - - for i in range(0, len(chunks), batch_size): - batch_chunks = chunks[i:i + batch_size] - batch_ids = [c[0] for c in batch_chunks] - batch_contents = [c[1] for c in batch_chunks] - - # Generate cascade embeddings - binary_embeddings, dense_embeddings = cascade_backend.encode_cascade( - batch_contents, batch_size=batch_size - ) - - # Pack binary embeddings and convert dense to bytes - packed_binaries = [] - dense_bytes_list = [] - - for j in range(len(batch_ids)): - # Pack binary embedding (256 bits -> 32 bytes) - packed_binary = pack_binary_embedding(binary_embeddings[j]) - packed_binaries.append(packed_binary) - - # Convert dense embedding to bytes - import numpy as np - dense_blob = dense_embeddings[j].astype(np.float32).tobytes() - dense_bytes_list.append(dense_blob) - - # Update database - for j, chunk_id in enumerate(batch_ids): - conn.execute( - """ - UPDATE semantic_chunks - SET embedding_binary = ?, embedding_dense = ? - WHERE id = ? - """, - (packed_binaries[j], dense_bytes_list[j], chunk_id) - ) - - # Add to binary ANN index - binary_ann_index.add_vectors(batch_ids, packed_binaries) - - conn.commit() - total_chunks_processed += len(batch_ids) - progress.advance(chunk_task, len(batch_ids)) - - # Save binary ANN index - binary_ann_index.save() - total_binary_indexes_created += 1 - - progress.remove_task(chunk_task) - store.close() - total_indexes_successful += 1 - - except Exception as e: - error_msg = f"{index_name}: {e}" - errors_list.append(error_msg) - if verbose and not json_mode: - console.print(f" [red]Error processing {index_name}:[/red] {e}") - - progress.advance(db_task) - - # Build result - result = { - "path": str(target_path), - "indexes_processed": total_indexes_processed, - "indexes_successful": total_indexes_successful, - "chunks_processed": total_chunks_processed, - "binary_indexes_created": total_binary_indexes_created, - "errors": len(errors_list), - "error_details": errors_list[:5] if errors_list else [], - } - - if json_mode: - print_json(success=True, result=result) - else: - console.print(f"\n[green]Cascade indexing complete[/green]") - console.print(f" Indexes processed: {total_indexes_processed}") - console.print(f" Indexes successful: {total_indexes_successful}") - console.print(f" Chunks processed: {total_chunks_processed:,}") - console.print(f" Binary indexes created: {total_binary_indexes_created}") - if errors_list: - console.print(f" [yellow]Errors: {len(errors_list)}[/yellow]") - for err in errors_list[:3]: - console.print(f" [dim]{err}[/dim]") - if len(errors_list) > 3: - console.print(f" [dim]... and {len(errors_list) - 3} more[/dim]") - - -@index_app.command("binary-mmap") -def index_binary_mmap( - path: Annotated[Path, typer.Argument(help="Project directory (indexed) or _index.db file")], - force: Annotated[bool, typer.Option("--force", "-f", help="Force rebuild binary mmap + metadata")] = False, - embedding_dim: Annotated[Optional[int], typer.Option("--embedding-dim", help="Only use embeddings with this dimension (e.g. 768)")] = None, - json_mode: Annotated[bool, typer.Option("--json", help="Output JSON response")] = False, - verbose: Annotated[bool, typer.Option("--verbose", "-v", help="Enable verbose logging")] = False, -) -> None: - """Build centralized `_binary_vectors.mmap` from existing embeddings (no model calls). - - This command enables the staged binary coarse search without regenerating - embeddings and without triggering global model locks. It: - - scans distributed semantic_chunks.embedding blobs under the index root - - assigns global chunk_ids - - writes `/_binary_vectors.mmap` (+ `.meta.json`) - - writes `/_vectors_meta.db` (chunk_metadata + binary_vectors) - """ - _configure_logging(verbose, json_mode) - - from codexlens.cli.embedding_manager import build_centralized_binary_vectors_from_existing - - target_path = path.expanduser().resolve() - - # Resolve index_root similar to other index commands. - if target_path.is_file() and target_path.name == "_index.db": - index_root = target_path.parent - else: - registry = RegistryStore() - try: - registry.initialize() - mapper = PathMapper() - index_db = mapper.source_to_index_db(target_path) - if not index_db.exists(): - msg = f"No index found for {target_path}" - if json_mode: - print_json(success=False, error=msg) - else: - console.print(f"[red]Error:[/red] {msg}") - console.print("Run `codexlens index init` first to create an index.") - raise typer.Exit(code=1) - index_root = index_db.parent - finally: - registry.close() - - def progress_update(message: str) -> None: - if json_mode: - return - console.print(f"[dim]{message}[/dim]") - - result = build_centralized_binary_vectors_from_existing( - index_root, - force=force, - embedding_dim=embedding_dim, - progress_callback=progress_update, - ) - - if json_mode: - print_json(**result) - return - - if not result.get("success"): - console.print(f"[red]Error:[/red] {result.get('error', 'Unknown error')}") - hint = result.get("hint") - if hint: - console.print(f"[dim]{hint}[/dim]") - raise typer.Exit(code=1) - - data = result.get("result", {}) - console.print("\n[green]Binary mmap build complete[/green]") - console.print(f" Index root: {data.get('index_root')}") - console.print(f" Chunks written: {data.get('chunks_written'):,}") - console.print(f" Binary mmap: {data.get('binary_mmap')}") - console.print(f" Meta DB: {data.get('vectors_meta_db')}") - - -# ==================== Index Status Command ==================== - -@index_app.command("status") -def index_status( - path: Optional[Path] = typer.Argument( - None, - help="Path to project directory or _index.db file. If not specified, uses default index root.", - ), - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), - verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose output."), -) -> None: - """Show comprehensive index status (embeddings). - - Shows combined status for all index types: - - Dense vector embeddings (HNSW) - - Binary cascade embeddings - - Examples: - codexlens index status # Check all indexes - codexlens index status ~/projects/my-app # Check specific project - codexlens index status --json # JSON output - """ - _configure_logging(verbose, json_mode) - - from codexlens.cli.embedding_manager import get_embedding_stats_summary, get_embeddings_status - - # Determine target path and index root - if path is None: - index_root = _get_index_root() - target_path = None - else: - target_path = path.resolve() - if target_path.is_file() and target_path.name == "_index.db": - index_root = target_path.parent - elif target_path.is_dir(): - # Try to find index for this project - registry = RegistryStore() - try: - registry.initialize() - mapper = PathMapper() - index_path = mapper.source_to_index_db(target_path) - if index_path.exists(): - index_root = index_path.parent - else: - if json_mode: - print_json(success=False, error=f"No index found for {target_path}") - else: - console.print(f"[red]Error:[/red] No index found for {target_path}") - console.print("Run 'codexlens index init' first to create an index") - raise typer.Exit(code=1) - finally: - registry.close() - else: - if json_mode: - print_json(success=False, error="Path must be _index.db file or directory") - else: - console.print(f"[red]Error:[/red] Path must be _index.db file or directory") - raise typer.Exit(code=1) - - # Get embeddings status - embeddings_result = get_embeddings_status(index_root) - embeddings_summary_result = get_embedding_stats_summary(index_root) - - # Build combined result - result = { - "index_root": str(index_root), - # Keep "embeddings" backward-compatible as the subtree summary payload. - "embeddings": embeddings_summary_result.get("result") if embeddings_summary_result.get("success") else None, - "embeddings_error": embeddings_summary_result.get("error") if not embeddings_summary_result.get("success") else None, - "embeddings_status": embeddings_result.get("result") if embeddings_result.get("success") else None, - "embeddings_status_error": embeddings_result.get("error") if not embeddings_result.get("success") else None, - "embeddings_summary": embeddings_summary_result.get("result") if embeddings_summary_result.get("success") else None, - "embeddings_summary_error": embeddings_summary_result.get("error") if not embeddings_summary_result.get("success") else None, - } - - if json_mode: - print_json(success=True, result=result) - else: - console.print(f"[bold]Index Status[/bold]") - console.print(f"Index root: [dim]{index_root}[/dim]\n") - - # Embeddings section - console.print("[bold]Dense Embeddings (HNSW):[/bold]") - if embeddings_result.get("success"): - data = embeddings_result["result"] - root = data.get("root") or data - subtree = data.get("subtree") or {} - centralized = data.get("centralized") or {} - - console.print(f" Root files: {root.get('total_files', 0)}") - console.print( - f" Root files with embeddings: " - f"[{'green' if root.get('has_embeddings') else 'yellow'}]{root.get('files_with_embeddings', 0)}[/]" - f"/{root.get('total_files', 0)}" - ) - console.print(f" Root coverage: {root.get('coverage_percent', 0):.1f}%") - console.print(f" Root chunks: {root.get('total_chunks', 0):,}") - console.print(f" Root storage mode: {root.get('storage_mode', 'none')}") - console.print( - f" Centralized dense: " - f"{'ready' if centralized.get('dense_ready') else ('present' if centralized.get('dense_index_exists') else 'missing')}" - ) - console.print( - f" Centralized binary: " - f"{'ready' if centralized.get('binary_ready') else ('present' if centralized.get('binary_index_exists') else 'missing')}" - ) - - subtree_total = subtree.get("total_indexes", 0) - subtree_with_embeddings = subtree.get("indexes_with_embeddings", 0) - subtree_chunks = subtree.get("total_chunks", 0) - if subtree_total: - console.print("\n[bold]Subtree Summary:[/bold]") - console.print(f" Total indexes: {subtree_total}") - console.print( - f" Indexes with embeddings: " - f"[{'green' if subtree_with_embeddings > 0 else 'yellow'}]{subtree_with_embeddings}[/]/{subtree_total}" - ) - console.print(f" Total chunks: {subtree_chunks:,}") - else: - console.print(f" [yellow]--[/yellow] {embeddings_result.get('error', 'Not available')}") - - -# ==================== Index Update Command ==================== - -@index_app.command("update") -def index_update( - file_path: Path = typer.Argument(..., exists=True, file_okay=True, dir_okay=False, help="Path to the file to update in the index."), - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), - verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."), -) -> None: - """Update the index for a single file incrementally. - - This is a lightweight command designed for use in hooks (e.g., Claude Code PostToolUse). - It updates only the specified file without scanning the entire directory. - - The file's parent directory must already be indexed via 'codexlens index init'. - - Examples: - codexlens index update src/main.py # Update single file - codexlens index update ./foo.ts --json # JSON output for hooks - """ - _configure_logging(verbose, json_mode) - - from codexlens.watcher.incremental_indexer import IncrementalIndexer - - registry: RegistryStore | None = None - indexer: IncrementalIndexer | None = None - - try: - registry = RegistryStore() - registry.initialize() - mapper = PathMapper() - config = Config.load() - - resolved_path = file_path.resolve() - - # Check if project is indexed - source_root = mapper.get_project_root(resolved_path) - if not source_root or not registry.get_project(source_root): - error_msg = f"Project containing file is not indexed: {file_path}" - if json_mode: - print_json(success=False, error=error_msg) - else: - console.print(f"[red]Error:[/red] {error_msg}") - console.print("[dim]Run 'codexlens index init' on the project root first.[/dim]") - raise typer.Exit(code=1) - - indexer = IncrementalIndexer(registry, mapper, config) - result = indexer._index_file(resolved_path) - - if result.success: - if json_mode: - print_json(success=True, result={ - "path": str(result.path), - "symbols_count": result.symbols_count, - "status": "updated", - }) - else: - console.print(f"[green]✓[/green] Updated index for [bold]{result.path.name}[/bold] ({result.symbols_count} symbols)") - else: - error_msg = result.error or f"Failed to update index for {file_path}" - if json_mode: - print_json(success=False, error=error_msg) - else: - console.print(f"[red]Error:[/red] {error_msg}") - raise typer.Exit(code=1) - - except CodexLensError as exc: - if json_mode: - print_json(success=False, error=str(exc)) - else: - console.print(f"[red]Update failed:[/red] {exc}") - raise typer.Exit(code=1) - finally: - if indexer: - indexer.close() - if registry: - registry.close() - - -# ==================== Index All Command ==================== - -@index_app.command("all") -def index_all( - path: Path = typer.Argument(Path("."), exists=True, file_okay=False, dir_okay=True, help="Project root to index."), - language: Optional[List[str]] = typer.Option( - None, - "--language", - "-l", - help="Limit indexing to specific languages (repeat or comma-separated).", - ), - workers: Optional[int] = typer.Option(None, "--workers", "-w", min=1, help="Parallel worker processes."), - force: bool = typer.Option(False, "--force", "-f", help="Force full reindex."), - backend: str = typer.Option("fastembed", "--backend", "-b", help="Embedding backend: fastembed or litellm."), - model: str = typer.Option("code", "--model", "-m", help="Embedding model profile or name."), - max_workers: int = typer.Option(1, "--max-workers", min=1, help="Max concurrent API calls."), - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), - verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."), -) -> None: - """Run all indexing operations in sequence (init, embeddings). - - This is a convenience command that runs the complete indexing pipeline: - 1. FTS index initialization (index init) - 2. Dense vector embeddings (index embeddings) - - Examples: - codexlens index all ~/projects/my-app - codexlens index all . --force - codexlens index all . --backend litellm --model text-embedding-3-small - """ - _configure_logging(verbose, json_mode) - - base_path = path.expanduser().resolve() - results = { - "path": str(base_path), - "steps": {}, - } - - # Step 1: Run init - if not json_mode: - console.print(f"[bold]Step 1/2: Initializing FTS index...[/bold]") - - try: - # Import and call the init function directly - from codexlens.config import Config - from codexlens.storage.index_tree import IndexTreeBuilder - - config = Config.load() - languages = _parse_languages(language) - registry = RegistryStore() - registry.initialize() - mapper = PathMapper() - - builder = IndexTreeBuilder(registry, mapper, config, incremental=not force) - build_result = builder.build( - source_root=base_path, - languages=languages, - workers=workers, - force_full=force, - ) - - results["steps"]["init"] = { - "success": True, - "files_indexed": build_result.total_files, - "dirs_indexed": build_result.total_dirs, - "index_root": str(build_result.index_root), - } - - if not json_mode: - console.print(f" [green]OK[/green] Indexed {build_result.total_files} files in {build_result.total_dirs} directories") - - index_root = Path(build_result.index_root) - registry.close() - - except Exception as e: - results["steps"]["init"] = {"success": False, "error": str(e)} - if json_mode: - print_json(success=False, result=results, error=f"Init failed: {e}") - else: - console.print(f" [red]Error:[/red] {e}") - raise typer.Exit(code=1) - - # Step 2: Generate embeddings - if not json_mode: - console.print(f"\n[bold]Step 2/2: Generating dense embeddings...[/bold]") - - try: - from codexlens.cli.embedding_manager import generate_dense_embeddings_centralized - - def progress_update(msg: str): - if not json_mode and verbose: - console.print(f" {msg}") - - embed_result = generate_dense_embeddings_centralized( - index_root, - embedding_backend=backend, - model_profile=model, - force=force, - chunk_size=2000, - progress_callback=progress_update, - max_workers=max_workers, - ) - - if embed_result["success"]: - data = embed_result["result"] - results["steps"]["embeddings"] = { - "success": True, - "chunks_created": data.get("chunks_created", 0), - "files_processed": data.get("files_processed", 0), - } - if not json_mode: - console.print(f" [green]OK[/green] Generated {data.get('chunks_created', 0)} chunks for {data.get('files_processed', 0)} files") - else: - results["steps"]["embeddings"] = { - "success": False, - "error": embed_result.get("error"), - } - if not json_mode: - console.print(f" [yellow]Warning:[/yellow] {embed_result.get('error', 'Unknown error')}") - - except Exception as e: - results["steps"]["embeddings"] = {"success": False, "error": str(e)} - if not json_mode: - console.print(f" [yellow]Warning:[/yellow] {e}") - - # Summary - if json_mode: - print_json(success=True, result=results) - else: - console.print(f"\n[bold]Indexing Complete[/bold]") - init_ok = results["steps"].get("init", {}).get("success", False) - emb_ok = results["steps"].get("embeddings", {}).get("success", False) - console.print(f" FTS Index: {'[green]OK[/green]' if init_ok else '[red]Failed[/red]'}") - console.print(f" Embeddings: {'[green]OK[/green]' if emb_ok else '[yellow]Partial/Skipped[/yellow]'}") - - -# ==================== Index Migration Commands ==================== - -# Index version for migration tracking (file-based version marker) -INDEX_FORMAT_VERSION = "2.0" -INDEX_VERSION_FILE = "_index_version.txt" - - -def _get_index_version(index_root: Path) -> Optional[str]: - """Read index format version from version marker file. - - Args: - index_root: Root directory of the index - - Returns: - Version string if file exists, None otherwise - """ - version_file = index_root / INDEX_VERSION_FILE - if version_file.exists(): - try: - return version_file.read_text(encoding="utf-8").strip() - except Exception: - return None - return None - - -def _set_index_version(index_root: Path, version: str) -> None: - """Write index format version to version marker file. - - Args: - index_root: Root directory of the index - version: Version string to write - """ - version_file = index_root / INDEX_VERSION_FILE - version_file.write_text(version, encoding="utf-8") - - -def _discover_distributed_hnsw(index_root: Path) -> List[Dict[str, Any]]: - """Discover distributed HNSW index files. - - Scans for .hnsw files that are stored alongside _index.db files. - This is the old distributed format that needs migration. - - Args: - index_root: Root directory to scan - - Returns: - List of dicts with hnsw_path, size_bytes - """ - results = [] - - for hnsw_path in index_root.rglob("*.hnsw"): - try: - size = hnsw_path.stat().st_size - results.append({ - "hnsw_path": hnsw_path, - "size_bytes": size, - }) - except Exception: - pass - - return results - - -def _check_centralized_storage(index_root: Path) -> Dict[str, Any]: - """Check for centralized storage files. - - Args: - index_root: Root directory to check - - Returns: - Dict with has_vectors, vector_stats - """ - from codexlens.config import VECTORS_HNSW_NAME - - vectors_hnsw = index_root / VECTORS_HNSW_NAME - - result = { - "has_vectors": vectors_hnsw.exists(), - "vectors_path": str(vectors_hnsw) if vectors_hnsw.exists() else None, - "vector_stats": None, - } - - # Get vector stats if exists - if vectors_hnsw.exists(): - try: - result["vector_stats"] = { - "size_bytes": vectors_hnsw.stat().st_size, - } - except Exception: - pass - - return result - - -@index_app.command("migrate") -def index_migrate_cmd( - path: Annotated[Optional[str], typer.Argument(help="Project path to migrate")] = None, - dry_run: Annotated[bool, typer.Option("--dry-run", help="Show what would be migrated without making changes")] = False, - force: Annotated[bool, typer.Option("--force", help="Force migration even if already migrated")] = False, - json_mode: Annotated[bool, typer.Option("--json", help="Output JSON response")] = False, - verbose: Annotated[bool, typer.Option("--verbose", "-v", help="Enable verbose output")] = False, -) -> None: - """Migrate old distributed index to new centralized architecture. - - This command upgrades indexes from the old distributed storage format - (where vectors were stored in each _index.db) to the new centralized - format (single _vectors.hnsw at index root). - - Migration Steps: - 1. Detect if migration is needed (check version marker) - 2. Discover distributed .hnsw files - 3. Report current status - 4. Create version marker (unless --dry-run) - - Use --dry-run to preview what would be migrated without making changes. - Use --force to re-run migration even if version marker exists. - - Note: For full data migration (vectors consolidation), run: - codexlens index embeddings --force - - Examples: - codexlens index migrate ~/projects/my-app --dry-run - codexlens index migrate . --force - codexlens index migrate --json - """ - _configure_logging(verbose, json_mode) - - # Resolve target path - if path: - target_path = Path(path).expanduser().resolve() - else: - target_path = Path.cwd() - - if not target_path.exists(): - if json_mode: - print_json(success=False, error=f"Path does not exist: {target_path}") - else: - console.print(f"[red]Error:[/red] Path does not exist: {target_path}") - raise typer.Exit(code=1) - - # Find index root - registry: RegistryStore | None = None - index_root: Optional[Path] = None - - try: - registry = RegistryStore() - registry.initialize() - mapper = PathMapper() - - # Check if path is a project with an index - project_info = registry.get_project(target_path) - if project_info: - index_root = Path(project_info.index_root) - else: - # Try to find index via mapper - index_db = mapper.source_to_index_db(target_path) - if index_db.exists(): - index_root = index_db.parent - finally: - if registry: - registry.close() - - if not index_root or not index_root.exists(): - if json_mode: - print_json(success=False, error=f"No index found for: {target_path}") - else: - console.print(f"[red]Error:[/red] No index found for: {target_path}") - console.print("[dim]Run 'codexlens init' first to create an index.[/dim]") - raise typer.Exit(code=1) - - if not json_mode: - console.print(f"[bold]Index Migration Check[/bold]") - console.print(f"Source path: [dim]{target_path}[/dim]") - console.print(f"Index root: [dim]{index_root}[/dim]") - if dry_run: - console.print("[yellow]Mode: DRY RUN (no changes will be made)[/yellow]") - console.print() - - # Check current version - current_version = _get_index_version(index_root) - needs_migration = current_version is None or (force and current_version != INDEX_FORMAT_VERSION) - - if current_version and current_version >= INDEX_FORMAT_VERSION and not force: - result = { - "path": str(target_path), - "index_root": str(index_root), - "current_version": current_version, - "target_version": INDEX_FORMAT_VERSION, - "needs_migration": False, - "message": "Index is already at the latest version", - } - - if json_mode: - print_json(success=True, result=result) - else: - console.print(f"[green]OK[/green] Index is already at version {current_version}") - console.print("[dim]No migration needed. Use --force to re-run migration.[/dim]") - return - - # Discover distributed data - distributed_hnsw = _discover_distributed_hnsw(index_root) - centralized = _check_centralized_storage(index_root) - - # Count all _index.db files - all_index_dbs = list(index_root.rglob("_index.db")) - - # Build migration report - migration_report = { - "path": str(target_path), - "index_root": str(index_root), - "dry_run": dry_run, - "current_version": current_version, - "target_version": INDEX_FORMAT_VERSION, - "needs_migration": needs_migration, - "discovery": { - "total_index_dbs": len(all_index_dbs), - "distributed_hnsw_count": len(distributed_hnsw), - "distributed_hnsw_total_bytes": sum(d["size_bytes"] for d in distributed_hnsw), - }, - "centralized": centralized, - "recommendations": [], - } - - # Generate recommendations - if distributed_hnsw and not centralized["has_vectors"]: - migration_report["recommendations"].append( - f"Run 'codexlens embeddings-generate {target_path} --recursive --force' to consolidate vector data" - ) - - if not distributed_hnsw: - migration_report["recommendations"].append( - "No distributed data found. Index may already be using centralized storage." - ) - - if json_mode: - # Perform migration action (set version marker) unless dry-run - if not dry_run and needs_migration: - _set_index_version(index_root, INDEX_FORMAT_VERSION) - migration_report["migrated"] = True - migration_report["new_version"] = INDEX_FORMAT_VERSION - else: - migration_report["migrated"] = False - - print_json(success=True, result=migration_report) - else: - # Display discovery results - console.print("[bold]Discovery Results:[/bold]") - console.print(f" Total _index.db files: {len(all_index_dbs)}") - console.print() - - # Distributed HNSW - console.print("[bold]Distributed HNSW Files:[/bold]") - if distributed_hnsw: - total_size = sum(d["size_bytes"] for d in distributed_hnsw) - console.print(f" Found {len(distributed_hnsw)} .hnsw files") - console.print(f" Total size: {total_size / (1024 * 1024):.1f} MB") - if verbose: - for d in distributed_hnsw[:5]: - console.print(f" [dim]{d['hnsw_path'].name}: {d['size_bytes'] / 1024:.1f} KB[/dim]") - if len(distributed_hnsw) > 5: - console.print(f" [dim]... and {len(distributed_hnsw) - 5} more[/dim]") - else: - console.print(" [dim]None found (already centralized or not generated)[/dim]") - console.print() - - # Centralized storage status - console.print("[bold]Centralized Storage:[/bold]") - if centralized["has_vectors"]: - stats = centralized.get("vector_stats") or {} - size_mb = stats.get("size_bytes", 0) / (1024 * 1024) - console.print(f" [green]OK[/green] _vectors.hnsw exists ({size_mb:.1f} MB)") - else: - console.print(f" [yellow]--[/yellow] _vectors.hnsw not found") - console.print() - - # Migration action - if not dry_run and needs_migration: - _set_index_version(index_root, INDEX_FORMAT_VERSION) - console.print(f"[green]OK[/green] Version marker created: {INDEX_FORMAT_VERSION}") - elif dry_run: - console.print(f"[yellow]DRY RUN:[/yellow] Would create version marker: {INDEX_FORMAT_VERSION}") - - # Recommendations - if migration_report["recommendations"]: - console.print("\n[bold]Recommendations:[/bold]") - for rec in migration_report["recommendations"]: - console.print(f" [cyan]>[/cyan] {rec}") - - -# ==================== Deprecated Command Aliases ==================== -# These commands maintain backward compatibility with the old CLI structure. -# They display deprecation warnings and delegate to the new `index` subcommands. - - -@app.command("embeddings-generate", hidden=True, deprecated=True) -def embeddings_generate_deprecated( - path: Path = typer.Argument( - ..., - exists=True, - help="Path to _index.db file or project directory.", - ), - backend: str = typer.Option( - "fastembed", - "--backend", - "-b", - help="Embedding backend: fastembed (local) or litellm (remote API).", - ), - model: str = typer.Option( - "code", - "--model", - "-m", - help="Model: profile name for fastembed or model name for litellm.", - ), - force: bool = typer.Option( - False, - "--force", - "-f", - help="Force regeneration even if embeddings exist.", - ), - chunk_size: int = typer.Option( - 2000, - "--chunk-size", - help="Maximum chunk size in characters.", - ), - max_workers: int = typer.Option( - 1, - "--max-workers", - "-w", - min=1, - help="Max concurrent API calls.", - ), - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), - verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose output."), - centralized: bool = typer.Option( - False, - "--centralized", - "-c", - help="Use centralized vector storage (default).", - ), - distributed: bool = typer.Option( - False, - "--distributed", - "-d", - help="Use distributed per-directory indexes.", - ), -) -> None: - """[Deprecated] Use 'codexlens index embeddings' instead.""" - _deprecated_command_warning("embeddings-generate", "index embeddings") - if centralized and distributed: - _fail_mutually_exclusive("--centralized", "--distributed", json_mode) - index_embeddings( - path=path, - backend=backend, - model=model, - force=force, - chunk_size=chunk_size, - max_workers=max_workers, - json_mode=json_mode, - verbose=verbose, - centralized=centralized, - distributed=distributed, - ) - - -@app.command("init", hidden=True, deprecated=True) -def init_deprecated( - path: Path = typer.Argument(Path("."), exists=True, file_okay=False, dir_okay=True, help="Project root to index."), - language: Optional[List[str]] = typer.Option(None, "--language", "-l", help="Limit indexing to specific languages."), - workers: Optional[int] = typer.Option(None, "--workers", "-w", min=1, help="Parallel worker processes."), - force: bool = typer.Option(False, "--force", "-f", help="Force full reindex."), - no_embeddings: bool = typer.Option(False, "--no-embeddings", help="Skip automatic embedding generation."), - backend: str = typer.Option("fastembed", "--backend", "-b", help="Embedding backend."), - model: str = typer.Option("code", "--model", "-m", help="Embedding model."), - max_workers: int = typer.Option(1, "--max-workers", min=1, help="Max concurrent API calls."), - json_mode: bool = typer.Option(False, "--json", help="Output JSON response."), - verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."), -) -> None: - """[Deprecated] Use 'codexlens index init' instead.""" - _deprecated_command_warning("init", "index init") - index_init( - path=path, - language=language, - workers=workers, - force=force, - no_embeddings=no_embeddings, - backend=backend, - model=model, - max_workers=max_workers, - json_mode=json_mode, - verbose=verbose, - ) - - - -@app.command("cascade-index", hidden=True, deprecated=True) -def cascade_index_deprecated( - path: Annotated[Path, typer.Argument(help="Directory to index")], - force: Annotated[bool, typer.Option("--force", "-f", help="Force regenerate")] = False, - batch_size: Annotated[int, typer.Option("--batch-size", "-b", help="Batch size for embedding")] = 32, - json_mode: Annotated[bool, typer.Option("--json", help="Output JSON response")] = False, - verbose: Annotated[bool, typer.Option("--verbose", "-v", help="Enable verbose logging")] = False, -) -> None: - """[Deprecated] Use 'codexlens index binary' instead.""" - _deprecated_command_warning("cascade-index", "index binary") - index_binary( - path=path, - force=force, - batch_size=batch_size, - json_mode=json_mode, - verbose=verbose, - ) - - -@app.command("index-migrate", hidden=True, deprecated=True) -def index_migrate_deprecated( - path: Annotated[Optional[str], typer.Argument(help="Project path to migrate")] = None, - dry_run: Annotated[bool, typer.Option("--dry-run", help="Show what would be migrated")] = False, - force: Annotated[bool, typer.Option("--force", help="Force migration")] = False, - json_mode: Annotated[bool, typer.Option("--json", help="Output JSON response")] = False, - verbose: Annotated[bool, typer.Option("--verbose", "-v", help="Enable verbose output")] = False, -) -> None: - """[Deprecated] Use 'codexlens index migrate' instead.""" - _deprecated_command_warning("index-migrate", "index migrate") - index_migrate_cmd( - path=path, - dry_run=dry_run, - force=force, - json_mode=json_mode, - verbose=verbose, - ) - - -# ==================== DeepWiki Commands ==================== - -deepwiki_app = typer.Typer(help="DeepWiki documentation generation commands") -app.add_typer(deepwiki_app, name="deepwiki") - - -@deepwiki_app.command("generate") -def deepwiki_generate( - path: Annotated[Path, typer.Argument(help="File or directory to generate docs for")] = Path("."), - force: Annotated[bool, typer.Option("--force", "-f", help="Force regeneration")] = False, - json_mode: Annotated[bool, typer.Option("--json", help="Output JSON response")] = False, - verbose: Annotated[bool, typer.Option("--verbose", "-v", help="Enable verbose logging")] = False, -) -> None: - """Generate DeepWiki documentation for source files. - - Scans source code, extracts symbols, and generates Markdown documentation - with incremental updates using SHA256 hashes for change detection. - - Examples: - codexlens deepwiki generate ./src - codexlens deepwiki generate ./src/auth.py - """ - from codexlens.tools.deepwiki_generator import DeepWikiGenerator - - _configure_logging(verbose, json_mode) - - path = Path(path).resolve() - if not path.exists(): - msg = f"Path not found: {path}" - if json_mode: - print_json(success=False, error=msg) - else: - console.print(f"[red]Error:[/red] {msg}") - raise typer.Exit(code=1) - - try: - generator = DeepWikiGenerator() - result = generator.run(path) - - if json_mode: - print_json(success=True, result=result) - else: - console.print(f"[green]DeepWiki generation complete:[/green]") - console.print(f" Files processed: {result['processed_files']}/{result['total_files']}") - console.print(f" Symbols found: {result['total_symbols']}") - console.print(f" Docs generated: {result['docs_generated']}") - if result['skipped_files'] > 0: - console.print(f" Files skipped (unchanged): {result['skipped_files']}") - - except Exception as e: - msg = f"DeepWiki generation failed: {e}" - if json_mode: - print_json(success=False, error=msg) - else: - console.print(f"[red]Error:[/red] {msg}") - raise typer.Exit(code=1) - - -@deepwiki_app.command("status") -def deepwiki_status( - json_mode: Annotated[bool, typer.Option("--json", help="Output JSON response")] = False, - verbose: Annotated[bool, typer.Option("--verbose", "-v", help="Enable verbose logging")] = False, -) -> None: - """Show DeepWiki documentation status. - - Displays statistics about indexed files and generated documentation. - """ - from codexlens.storage.deepwiki_store import DeepWikiStore - - _configure_logging(verbose, json_mode) - - try: - store = DeepWikiStore() - stats = store.get_stats() - - if json_mode: - print_json(success=True, result=stats) - else: - console.print("[cyan]DeepWiki Status:[/cyan]") - console.print(f" Files tracked: {stats.get('files_count', 0)}") - console.print(f" Symbols indexed: {stats.get('symbols_count', 0)}") - console.print(f" Docs generated: {stats.get('docs_count', 0)}") - - except Exception as e: - msg = f"Failed to get DeepWiki status: {e}" - if json_mode: - print_json(success=False, error=msg) - else: - console.print(f"[red]Error:[/red] {msg}") - raise typer.Exit(code=1) diff --git a/codex-lens/src/codexlens/cli/embedding_manager.py b/codex-lens/src/codexlens/cli/embedding_manager.py deleted file mode 100644 index 8bbb3a74..00000000 --- a/codex-lens/src/codexlens/cli/embedding_manager.py +++ /dev/null @@ -1,2377 +0,0 @@ -"""Embedding Manager - Manage semantic embeddings for code indexes. - -This module provides functions for generating and managing semantic embeddings -for code indexes, supporting both fastembed and litellm backends. - -Example Usage: - Generate embeddings for a single index: - - >>> from pathlib import Path - >>> from codexlens.cli.embedding_manager import generate_embeddings - >>> result = generate_embeddings( - ... index_path=Path("path/to/_index.db"), - ... force=True - ... ) - >>> if result["success"]: - ... print(f"Generated {result['total_chunks_created']} embeddings") - - Generate embeddings for an entire project with centralized index: - - >>> from codexlens.cli.embedding_manager import generate_dense_embeddings_centralized - >>> result = generate_dense_embeddings_centralized( - ... index_root=Path("path/to/project"), - ... force=True, - ... progress_callback=lambda msg: print(msg) - ... ) - - Check if embeddings exist: - - >>> from codexlens.cli.embedding_manager import check_index_embeddings - >>> status = check_index_embeddings(Path("path/to/_index.db")) - >>> print(status["result"]["has_embeddings"]) - -Backward Compatibility: - The deprecated `discover_all_index_dbs()` function is maintained for compatibility. - `generate_embeddings_recursive()` is deprecated but functional; use - `generate_dense_embeddings_centralized()` instead. - The `EMBEDDING_BATCH_SIZE` constant is kept as a reference but actual batch size - is calculated dynamically via `calculate_dynamic_batch_size()`. -""" - -import gc -import json -import logging -import sqlite3 -import time -from concurrent.futures import ThreadPoolExecutor, as_completed -from itertools import islice -from pathlib import Path -from typing import Any, Dict, Generator, List, Optional, Tuple - -from codexlens.storage.index_filters import filter_index_paths - -try: - from codexlens.semantic import SEMANTIC_AVAILABLE, is_embedding_backend_available -except ImportError: - SEMANTIC_AVAILABLE = False - def is_embedding_backend_available(_backend: str): # type: ignore[no-redef] - return False, "codexlens.semantic not available" - -try: - from codexlens.semantic.vector_store import VectorStore -except ImportError: # pragma: no cover - VectorStore = None # type: ignore[assignment] - -try: - from codexlens.config import ( - BINARY_VECTORS_MMAP_NAME, - VECTORS_HNSW_NAME, - VECTORS_META_DB_NAME, - ) -except ImportError: - VECTORS_HNSW_NAME = "_vectors.hnsw" - VECTORS_META_DB_NAME = "_vectors_meta.db" - BINARY_VECTORS_MMAP_NAME = "_binary_vectors.mmap" - -try: - from codexlens.search.ranking import get_file_category -except ImportError: - def get_file_category(path: str): # type: ignore[no-redef] - """Fallback: map common extensions to category.""" - ext = Path(path).suffix.lower() - code_exts = {".py", ".js", ".jsx", ".ts", ".tsx", ".java", ".go", ".c", ".cpp", ".rs"} - doc_exts = {".md", ".mdx", ".txt", ".rst"} - if ext in code_exts: - return "code" - elif ext in doc_exts: - return "doc" - return None - -logger = logging.getLogger(__name__) - -# Embedding batch size - larger values improve throughput on modern hardware -# Benchmark: 256 gives ~2.35x speedup over 64 with DirectML GPU acceleration -EMBEDDING_BATCH_SIZE = 256 - - -def calculate_dynamic_batch_size(config, embedder) -> int: - """Calculate batch size dynamically based on model token capacity. - - This function computes an optimal batch size by considering: - - Maximum chunk character size from parsing rules - - Estimated tokens per chunk (chars / chars_per_token_estimate) - - Model's maximum token capacity - - Utilization factor (default 80% to leave headroom) - - Args: - config: Config object with api_batch_size_* settings. - embedder: Embedding model object with max_tokens property. - - Returns: - int: Calculated batch size, clamped to [1, api_batch_size_max]. - """ - # If dynamic calculation is disabled, return static value - if not getattr(config, 'api_batch_size_dynamic', False): - return getattr(config, 'api_batch_size', 8) - - # Get maximum chunk character size from ALL parsing rules (not just default) - # This ensures we use the worst-case chunk size across all languages - parsing_rules = getattr(config, 'parsing_rules', {}) - all_max_chunk_chars = [ - rule.get('max_chunk_chars', 0) - for rule in parsing_rules.values() - if isinstance(rule, dict) - ] - max_chunk_chars = max(all_max_chunk_chars) if all_max_chunk_chars else 4000 - if max_chunk_chars <= 0: - max_chunk_chars = 4000 # Final fallback - - # Get characters per token estimate - chars_per_token = getattr(config, 'chars_per_token_estimate', 4) - if chars_per_token <= 0: - chars_per_token = 4 # Safe default - - # Estimate tokens per chunk - estimated_tokens_per_chunk = max_chunk_chars / chars_per_token - - # Prevent division by zero - if estimated_tokens_per_chunk <= 0: - return getattr(config, 'api_batch_size', 8) - - # Get model's maximum token capacity - model_max_tokens = getattr(embedder, 'max_tokens', 8192) - - # Get utilization factor (default 80%, max 95% to leave safety margin) - utilization_factor = getattr(config, 'api_batch_size_utilization_factor', 0.8) - if utilization_factor <= 0 or utilization_factor > 0.95: - if utilization_factor > 0.95: - logger.warning( - "Utilization factor %.2f exceeds safe limit 0.95. " - "Token estimation is approximate, high values risk API errors. " - "Clamping to 0.95.", - utilization_factor - ) - utilization_factor = 0.95 - else: - utilization_factor = 0.8 - - # Calculate safe token limit - safe_token_limit = model_max_tokens * utilization_factor - - # Calculate dynamic batch size - dynamic_batch_size = int(safe_token_limit / estimated_tokens_per_chunk) - - # Get maximum batch size limit - batch_size_max = getattr(config, 'api_batch_size_max', 2048) - - # Clamp to [1, batch_size_max] - result = max(1, min(dynamic_batch_size, batch_size_max)) - - logger.debug( - "Dynamic batch size calculated: %d (max_chunk_chars=%d, chars_per_token=%d, " - "model_max_tokens=%d, utilization=%.1f%%, limit=%d)", - result, max_chunk_chars, chars_per_token, model_max_tokens, - utilization_factor * 100, batch_size_max - ) - - return result - - -def _build_categories_from_batch(chunk_batch: List[Tuple[Any, str]]) -> List[str]: - """Build categories list from chunk batch for index-level category filtering. - - Args: - chunk_batch: List of (chunk, file_path) tuples - - Returns: - List of category strings ('code' or 'doc'), defaulting to 'code' for unknown - """ - categories = [] - for _, file_path in chunk_batch: - cat = get_file_category(file_path) - categories.append(cat if cat else "code") # Default to 'code' for unknown extensions - return categories - - -def _cleanup_fastembed_resources() -> None: - """Best-effort cleanup for fastembed/ONNX resources (no-op for other backends).""" - try: - from codexlens.semantic.embedder import clear_embedder_cache - clear_embedder_cache() - except (ImportError, AttributeError): - # Expected when semantic module unavailable or cache function doesn't exist - pass - except Exception as exc: - # Log unexpected errors but don't fail cleanup - logger.debug(f"Unexpected error during fastembed cleanup: {exc}") - - -def _generate_chunks_from_cursor( - cursor, - chunker, - path_column: str, - file_batch_size: int, - failed_files: List[Tuple[str, str]], -) -> Generator[Tuple, None, Tuple[int, int]]: - """Generator that yields chunks from database cursor in a streaming fashion. - - This avoids loading all chunks into memory at once, significantly reducing - peak memory usage for large codebases. - - Args: - cursor: SQLite cursor with file data - chunker: Chunker instance for splitting files - path_column: Column name for file path - file_batch_size: Number of files to fetch at a time - failed_files: List to append failed files to - - Yields: - (chunk, file_path) tuples - - Returns: - (total_files_processed, batch_count) after iteration completes - """ - total_files = 0 - batch_count = 0 - - while True: - file_batch = cursor.fetchmany(file_batch_size) - if not file_batch: - break - - batch_count += 1 - - for file_row in file_batch: - file_path = file_row[path_column] - content = file_row["content"] - language = file_row["language"] or "python" - - try: - chunks = chunker.chunk_sliding_window( - content, - file_path=file_path, - language=language - ) - if chunks: - total_files += 1 - for chunk in chunks: - yield (chunk, file_path) - except (OSError, UnicodeDecodeError) as e: - # File access or encoding errors - logger.error(f"Failed to read file {file_path}: {e}") - failed_files.append((file_path, f"File read error: {e}")) - except ValueError as e: - # Chunking configuration errors - logger.error(f"Chunking config error for {file_path}: {e}") - failed_files.append((file_path, f"Chunking error: {e}")) - except Exception as e: - # Other unexpected errors - logger.error(f"Unexpected error processing {file_path}: {e}") - failed_files.append((file_path, f"Unexpected error: {e}")) - - -def _create_token_aware_batches( - chunk_generator: Generator, - max_tokens_per_batch: int = 8000, -) -> Generator[List[Tuple], None, None]: - """Group chunks by total token count instead of fixed count. - - Uses fast token estimation (len(content) // 4) for efficiency. - Yields batches when approaching the token limit. - - Args: - chunk_generator: Generator yielding (chunk, file_path) tuples - max_tokens_per_batch: Maximum tokens per batch (default: 8000) - - Yields: - List of (chunk, file_path) tuples representing a batch - """ - current_batch = [] - current_tokens = 0 - - for chunk, file_path in chunk_generator: - # Fast token estimation: len(content) // 4 - chunk_tokens = len(chunk.content) // 4 - - # If adding this chunk would exceed limit and we have items, yield current batch - if current_tokens + chunk_tokens > max_tokens_per_batch and current_batch: - yield current_batch - current_batch = [] - current_tokens = 0 - - # Add chunk to current batch - current_batch.append((chunk, file_path)) - current_tokens += chunk_tokens - - # Yield final batch if not empty - if current_batch: - yield current_batch - - -def _get_path_column(conn: sqlite3.Connection) -> str: - """Detect whether files table uses 'path' or 'full_path' column. - - Args: - conn: SQLite connection to the index database - - Returns: - Column name ('path' or 'full_path') - - Raises: - ValueError: If neither column exists in files table - """ - cursor = conn.execute("PRAGMA table_info(files)") - columns = {row[1] for row in cursor.fetchall()} - if 'full_path' in columns: - return 'full_path' - elif 'path' in columns: - return 'path' - raise ValueError("files table has neither 'path' nor 'full_path' column") - - -def check_index_embeddings(index_path: Path) -> Dict[str, any]: - """Check if an index has embeddings and return statistics. - - Args: - index_path: Path to _index.db file - - Returns: - Dictionary with embedding statistics and status - """ - if not index_path.exists(): - return { - "success": False, - "error": f"Index not found: {index_path}", - } - - try: - with sqlite3.connect(index_path) as conn: - # Check if semantic_chunks table exists - cursor = conn.execute( - "SELECT name FROM sqlite_master WHERE type='table' AND name='semantic_chunks'" - ) - table_exists = cursor.fetchone() is not None - - if not table_exists: - # Count total indexed files even without embeddings - cursor = conn.execute("SELECT COUNT(*) FROM files") - total_files = cursor.fetchone()[0] - - return { - "success": True, - "result": { - "has_embeddings": False, - "total_chunks": 0, - "total_files": total_files, - "files_with_chunks": 0, - "files_without_chunks": total_files, - "coverage_percent": 0.0, - "missing_files_sample": [], - "index_path": str(index_path), - }, - } - - # Count total chunks - cursor = conn.execute("SELECT COUNT(*) FROM semantic_chunks") - total_chunks = cursor.fetchone()[0] - - # Count total indexed files - cursor = conn.execute("SELECT COUNT(*) FROM files") - total_files = cursor.fetchone()[0] - - # Count files with embeddings - cursor = conn.execute( - "SELECT COUNT(DISTINCT file_path) FROM semantic_chunks" - ) - files_with_chunks = cursor.fetchone()[0] - - # Get a sample of files without embeddings - path_column = _get_path_column(conn) - cursor = conn.execute(f""" - SELECT {path_column} - FROM files - WHERE {path_column} NOT IN ( - SELECT DISTINCT file_path FROM semantic_chunks - ) - LIMIT 5 - """) - missing_files = [row[0] for row in cursor.fetchall()] - - return { - "success": True, - "result": { - "has_embeddings": total_chunks > 0, - "total_chunks": total_chunks, - "total_files": total_files, - "files_with_chunks": files_with_chunks, - "files_without_chunks": total_files - files_with_chunks, - "coverage_percent": round((files_with_chunks / total_files * 100) if total_files > 0 else 0, 1), - "missing_files_sample": missing_files, - "index_path": str(index_path), - }, - } - - except Exception as e: - return { - "success": False, - "error": f"Failed to check embeddings: {str(e)}", - } - - -def _sqlite_table_exists(conn: sqlite3.Connection, table_name: str) -> bool: - """Return whether a SQLite table exists.""" - cursor = conn.execute( - "SELECT name FROM sqlite_master WHERE type='table' AND name=?", - (table_name,), - ) - return cursor.fetchone() is not None - - -def _sqlite_count_rows(conn: sqlite3.Connection, table_name: str) -> int: - """Return row count for a table, or 0 when the table is absent.""" - if not _sqlite_table_exists(conn, table_name): - return 0 - cursor = conn.execute(f"SELECT COUNT(*) FROM {table_name}") - return int(cursor.fetchone()[0] or 0) - - -def _sqlite_count_distinct_rows(conn: sqlite3.Connection, table_name: str, column_name: str) -> int: - """Return distinct row count for a table column, or 0 when the table is absent.""" - if not _sqlite_table_exists(conn, table_name): - return 0 - cursor = conn.execute(f"SELECT COUNT(DISTINCT {column_name}) FROM {table_name}") - return int(cursor.fetchone()[0] or 0) - - -def _get_model_info_from_index(index_path: Path) -> Optional[Dict[str, Any]]: - """Read embedding model metadata from an index if available.""" - try: - with sqlite3.connect(index_path) as conn: - if not _sqlite_table_exists(conn, "embeddings_config"): - return None - from codexlens.semantic.vector_store import VectorStore - with VectorStore(index_path) as vs: - config = vs.get_model_config() - if not config: - return None - return { - "model_profile": config.get("model_profile"), - "model_name": config.get("model_name"), - "embedding_dim": config.get("embedding_dim"), - "backend": config.get("backend"), - "created_at": config.get("created_at"), - "updated_at": config.get("updated_at"), - } - except Exception: - return None - - -def _inspect_centralized_embeddings(index_root: Path) -> Dict[str, Any]: - """Inspect centralized vector artifacts stored directly at the current root.""" - dense_index_path = index_root / VECTORS_HNSW_NAME - meta_db_path = index_root / VECTORS_META_DB_NAME - binary_index_path = index_root / BINARY_VECTORS_MMAP_NAME - - result: Dict[str, Any] = { - "index_root": str(index_root), - "dense_index_path": str(dense_index_path) if dense_index_path.exists() else None, - "binary_index_path": str(binary_index_path) if binary_index_path.exists() else None, - "meta_db_path": str(meta_db_path) if meta_db_path.exists() else None, - "dense_index_exists": dense_index_path.exists(), - "binary_index_exists": binary_index_path.exists(), - "meta_db_exists": meta_db_path.exists(), - "chunk_metadata_rows": 0, - "binary_vector_rows": 0, - "files_with_embeddings": 0, - "dense_ready": False, - "binary_ready": False, - "usable": False, - } - - if not meta_db_path.exists(): - return result - - try: - with sqlite3.connect(meta_db_path) as conn: - result["chunk_metadata_rows"] = _sqlite_count_rows(conn, "chunk_metadata") - result["binary_vector_rows"] = _sqlite_count_rows(conn, "binary_vectors") - result["files_with_embeddings"] = _sqlite_count_distinct_rows(conn, "chunk_metadata", "file_path") - except Exception as exc: - result["error"] = f"Failed to inspect centralized metadata: {exc}" - return result - - result["dense_ready"] = result["dense_index_exists"] and result["chunk_metadata_rows"] > 0 - result["binary_ready"] = ( - result["binary_index_exists"] - and result["chunk_metadata_rows"] > 0 - and result["binary_vector_rows"] > 0 - ) - result["usable"] = result["dense_ready"] or result["binary_ready"] - return result - - -def _get_embedding_defaults() -> tuple[str, str, bool, List, str, float]: - """Get default embedding settings from config. - - Returns: - Tuple of (backend, model, use_gpu, endpoints, strategy, cooldown) - """ - try: - from codexlens.config import Config - config = Config.load() - return ( - config.embedding_backend, - config.embedding_model, - config.embedding_use_gpu, - config.embedding_endpoints, - config.embedding_strategy, - config.embedding_cooldown, - ) - except (ImportError, AttributeError, OSError, ValueError) as exc: - # Config not available or malformed - use defaults - logger.debug(f"Using default embedding config (config load failed): {exc}") - return "fastembed", "code", True, [], "latency_aware", 60.0 - except Exception as exc: - # Unexpected error - still use defaults but log - logger.warning(f"Unexpected error loading embedding config: {exc}") - return "fastembed", "code", True, [], "latency_aware", 60.0 - - -def _apply_embedding_config_defaults( - embedding_backend: Optional[str], - model_profile: Optional[str], - use_gpu: Optional[bool], - endpoints: Optional[List], - strategy: Optional[str], - cooldown: Optional[float], -) -> tuple[str, str, bool, List, str, float]: - """Apply config defaults to embedding parameters. - - This helper function reduces code duplication across embedding generation - functions by centralizing the default value application logic. - - Args: - embedding_backend: Embedding backend (fastembed/litellm) or None for default - model_profile: Model profile/name or None for default - use_gpu: GPU flag or None for default - endpoints: API endpoints list or None for default - strategy: Selection strategy or None for default - cooldown: Cooldown seconds or None for default - - Returns: - Tuple of (backend, model, use_gpu, endpoints, strategy, cooldown) with - defaults applied where None was passed. - """ - (default_backend, default_model, default_gpu, - default_endpoints, default_strategy, default_cooldown) = _get_embedding_defaults() - - backend = embedding_backend if embedding_backend is not None else default_backend - model = model_profile if model_profile is not None else default_model - gpu = use_gpu if use_gpu is not None else default_gpu - eps = endpoints if endpoints is not None else default_endpoints - strat = strategy if strategy is not None else default_strategy - cool = cooldown if cooldown is not None else default_cooldown - - return backend, model, gpu, eps, strat, cool - - -def _calculate_max_workers( - embedding_backend: str, - endpoints: Optional[List], - max_workers: Optional[int], -) -> int: - """Calculate optimal max_workers based on backend and endpoint count. - - Args: - embedding_backend: The embedding backend being used - endpoints: List of API endpoints (for litellm multi-endpoint mode) - max_workers: Explicitly specified max_workers or None for auto-calculation - - Returns: - Calculated or specified max_workers value - """ - if max_workers is not None: - return max_workers - - endpoint_count = len(endpoints) if endpoints else 1 - - # Set dynamic max_workers default based on backend type and endpoint count - # - FastEmbed: CPU-bound, sequential is optimal (1 worker) - # - LiteLLM single endpoint: 4 workers default - # - LiteLLM multi-endpoint: workers = endpoint_count * 2 (to saturate all APIs) - if embedding_backend == "litellm": - if endpoint_count > 1: - return endpoint_count * 2 # No cap, scale with endpoints - else: - return 4 - else: - return 1 - - -def _initialize_embedder_and_chunker( - embedding_backend: str, - model_profile: str, - use_gpu: bool, - endpoints: Optional[List], - strategy: str, - cooldown: float, - chunk_size: int, - overlap: int, -) -> tuple: - """Initialize embedder and chunker for embedding generation. - - This helper function reduces code duplication by centralizing embedder - and chunker initialization logic. - - Args: - embedding_backend: The embedding backend (fastembed/litellm) - model_profile: Model profile or name - use_gpu: Whether to use GPU acceleration - endpoints: Optional API endpoints for load balancing - strategy: Selection strategy for multi-endpoint mode - cooldown: Cooldown seconds for rate-limited endpoints - chunk_size: Maximum chunk size in characters - overlap: Overlap size in characters - - Returns: - Tuple of (embedder, chunker, endpoint_count) - - Raises: - ValueError: If embedding_backend is invalid - """ - from codexlens.semantic.factory import get_embedder as get_embedder_factory - from codexlens.semantic.chunker import Chunker, ChunkConfig - from codexlens.config import Config - - # Initialize embedder using factory (supports fastembed, litellm, and rotational) - # For fastembed: model_profile is a profile name (fast/code/multilingual/balanced) - # For litellm: model_profile is a model name (e.g., qwen3-embedding) - # For multi-endpoint: endpoints list enables load balancing - if embedding_backend == "fastembed": - embedder = get_embedder_factory(backend="fastembed", profile=model_profile, use_gpu=use_gpu) - elif embedding_backend == "litellm": - embedder = get_embedder_factory( - backend="litellm", - model=model_profile, - endpoints=endpoints if endpoints else None, - strategy=strategy, - cooldown=cooldown, - ) - else: - raise ValueError(f"Invalid embedding backend: {embedding_backend}. Must be 'fastembed' or 'litellm'.") - - # skip_token_count=True: Use fast estimation (len/4) instead of expensive tiktoken - # This significantly reduces CPU usage with minimal impact on metadata accuracy - # Load chunk stripping config from settings - chunk_cfg = Config.load() - chunker = Chunker(config=ChunkConfig( - max_chunk_size=chunk_size, - overlap=overlap, - skip_token_count=True, - strip_comments=getattr(chunk_cfg, 'chunk_strip_comments', True), - strip_docstrings=getattr(chunk_cfg, 'chunk_strip_docstrings', True), - )) - - endpoint_count = len(endpoints) if endpoints else 1 - return embedder, chunker, endpoint_count - - -def generate_embeddings( - index_path: Path, - embedding_backend: Optional[str] = None, - model_profile: Optional[str] = None, - force: bool = False, - chunk_size: int = 2000, - overlap: int = 200, - progress_callback: Optional[callable] = None, - use_gpu: Optional[bool] = None, - max_tokens_per_batch: Optional[int] = None, - max_workers: Optional[int] = None, - endpoints: Optional[List] = None, - strategy: Optional[str] = None, - cooldown: Optional[float] = None, -) -> Dict[str, any]: - """Generate embeddings for an index using memory-efficient batch processing. - - This function processes files in small batches to keep memory usage under 2GB, - regardless of the total project size. Supports concurrent API calls for - LiteLLM backend to improve throughput. - - Args: - index_path: Path to _index.db file. - embedding_backend: Embedding backend to use (fastembed or litellm). - Defaults to config setting. - model_profile: Model profile for fastembed (fast, code, multilingual, balanced) - or model name for litellm (e.g., qwen3-embedding). - Defaults to config setting. - force: If True, regenerate even if embeddings exist. - chunk_size: Maximum chunk size in characters. - overlap: Overlap size in characters for sliding window chunking (default: 200). - progress_callback: Optional callback for progress updates. - use_gpu: Whether to use GPU acceleration (fastembed only). - Defaults to config setting. - max_tokens_per_batch: Maximum tokens per batch for token-aware batching. - If None, attempts to get from embedder.max_tokens, - then falls back to 8000. If set, overrides automatic detection. - max_workers: Maximum number of concurrent API calls. - If None, uses dynamic defaults based on backend and endpoint count. - endpoints: Optional list of endpoint configurations for multi-API load balancing. - Each dict has keys: model, api_key, api_base, weight. - strategy: Selection strategy for multi-endpoint mode (round_robin, latency_aware). - cooldown: Default cooldown seconds for rate-limited endpoints. - - Returns: - Dict[str, any]: Result dictionary with generation statistics. - Contains keys: success, error (if failed), files_processed, - total_chunks_created, execution_time, etc. - - Raises: - ValueError: If embedding_backend is invalid. - ImportError: If semantic module is not available. - """ - # Apply config defaults - embedding_backend, model_profile, use_gpu, endpoints, strategy, cooldown = \ - _apply_embedding_config_defaults( - embedding_backend, model_profile, use_gpu, endpoints, strategy, cooldown - ) - - # Calculate max_workers - max_workers = _calculate_max_workers(embedding_backend, endpoints, max_workers) - - backend_available, backend_error = is_embedding_backend_available(embedding_backend) - if not backend_available: - return {"success": False, "error": backend_error or "Embedding backend not available"} - - if not index_path.exists(): - return { - "success": False, - "error": f"Index not found: {index_path}", - } - - # Check existing chunks - status = check_index_embeddings(index_path) - if not status["success"]: - return status - - existing_chunks = status["result"]["total_chunks"] - - if existing_chunks > 0 and not force: - return { - "success": False, - "error": f"Index already has {existing_chunks} chunks. Use --force to regenerate.", - "existing_chunks": existing_chunks, - } - - if force and existing_chunks > 0: - if progress_callback: - progress_callback(f"Clearing {existing_chunks} existing chunks...") - - try: - with sqlite3.connect(index_path) as conn: - conn.execute("DELETE FROM semantic_chunks") - conn.commit() - except sqlite3.DatabaseError as e: - return { - "success": False, - "error": f"Database error clearing chunks: {str(e)}", - } - except Exception as e: - return { - "success": False, - "error": f"Failed to clear existing chunks: {str(e)}", - } - - # Initialize embedder and chunker using helper - try: - embedder, chunker, endpoint_count = _initialize_embedder_and_chunker( - embedding_backend, model_profile, use_gpu, endpoints, strategy, cooldown, - chunk_size, overlap - ) - - # Log embedder info with endpoint count for multi-endpoint mode - if progress_callback: - if endpoint_count > 1: - progress_callback(f"Using {endpoint_count} API endpoints with {strategy} strategy") - progress_callback(f"Using model: {embedder.model_name} ({embedder.embedding_dim} dimensions)") - - # Calculate dynamic batch size based on model capacity - from codexlens.config import Config - batch_config = Config.load() - effective_batch_size = calculate_dynamic_batch_size(batch_config, embedder) - - if progress_callback and batch_config.api_batch_size_dynamic: - progress_callback(f"Dynamic batch size: {effective_batch_size} (model max_tokens={getattr(embedder, 'max_tokens', 8192)})") - - except (ImportError, ValueError) as e: - # Missing dependency or invalid configuration - return { - "success": False, - "error": f"Failed to initialize embedding components: {str(e)}", - } - except Exception as e: - # Other unexpected errors - return { - "success": False, - "error": f"Unexpected error initializing components: {str(e)}", - } - - # --- STREAMING PROCESSING --- - # Process files in batches to control memory usage - start_time = time.time() - failed_files = [] - total_chunks_created = 0 - total_files_processed = 0 - FILE_BATCH_SIZE = 100 # Process 100 files at a time - # effective_batch_size is calculated above (dynamic or EMBEDDING_BATCH_SIZE fallback) - - try: - if VectorStore is None: - return { - "success": False, - "error": "Semantic search not available (VectorStore import failed). Install with: pip install codexlens[semantic]", - } - with VectorStore(index_path) as vector_store: - # Check model compatibility with existing embeddings - if not force: - is_compatible, warning = vector_store.check_model_compatibility( - model_profile, embedder.model_name, embedder.embedding_dim - ) - if not is_compatible: - return { - "success": False, - "error": warning, - } - - # Set/update model configuration for this index - vector_store.set_model_config( - model_profile, embedder.model_name, embedder.embedding_dim, backend=embedding_backend - ) - # Use bulk insert mode for efficient batch ANN index building - # This defers ANN updates until end_bulk_insert() is called - with vector_store.bulk_insert(): - with sqlite3.connect(index_path) as conn: - conn.row_factory = sqlite3.Row - path_column = _get_path_column(conn) - - # Get total file count for progress reporting - total_files = conn.execute("SELECT COUNT(*) FROM files").fetchone()[0] - if total_files == 0: - return {"success": False, "error": "No files found in index"} - - if progress_callback: - # Format must match Node.js parseProgressLine: "Processing N files" with 'embed' keyword - progress_callback(f"Processing {total_files} files for embeddings in batches of {FILE_BATCH_SIZE}...") - - cursor = conn.execute(f"SELECT {path_column}, content, language FROM files") - - # --- STREAMING GENERATOR APPROACH --- - # Instead of accumulating all chunks from 100 files, we use a generator - # that yields chunks on-demand, keeping memory usage low and constant. - chunk_generator = _generate_chunks_from_cursor( - cursor, chunker, path_column, FILE_BATCH_SIZE, failed_files - ) - - # Determine max tokens per batch - # Priority: explicit parameter > embedder.max_tokens > default 8000 - if max_tokens_per_batch is None: - max_tokens_per_batch = getattr(embedder, 'max_tokens', 8000) - - # Create token-aware batches or fall back to fixed-size batching - if max_tokens_per_batch: - batch_generator = _create_token_aware_batches( - chunk_generator, max_tokens_per_batch - ) - else: - # Fallback to fixed-size batching for backward compatibility - def fixed_size_batches(): - while True: - batch = list(islice(chunk_generator, effective_batch_size)) - if not batch: - break - yield batch - batch_generator = fixed_size_batches() - - batch_number = 0 - files_seen = set() - - def compute_embeddings_only(batch_data: Tuple[int, List[Tuple]]): - """Compute embeddings for a batch (no DB write) with retry logic. - - Args: - batch_data: Tuple of (batch_number, chunk_batch) - - Returns: - Tuple of (batch_num, chunk_batch, embeddings_numpy, batch_files, error) - """ - import random - - batch_num, chunk_batch = batch_data - batch_files = set() - for _, file_path in chunk_batch: - batch_files.add(file_path) - - max_retries = 5 - base_delay = 2.0 - - for attempt in range(max_retries + 1): - try: - batch_contents = [chunk.content for chunk, _ in chunk_batch] - embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=effective_batch_size) - return batch_num, chunk_batch, embeddings_numpy, batch_files, None - - except Exception as e: - error_str = str(e).lower() - # Check for retryable errors (rate limit, connection, backend issues) - # Note: Some backends (e.g., ModelScope) return 400 with nested 500 errors - is_retryable = any(x in error_str for x in [ - "429", "rate limit", "connection", "timeout", - "502", "503", "504", "service unavailable", - "500", "400", "badrequesterror", "internal server error", - "11434" # Ollama port - indicates backend routing issue - ]) - - if attempt < max_retries and is_retryable: - sleep_time = base_delay * (2 ** attempt) + random.uniform(0, 0.5) - logger.warning(f"Batch {batch_num} failed (attempt {attempt+1}/{max_retries+1}). " - f"Retrying in {sleep_time:.1f}s. Error: {e}") - time.sleep(sleep_time) - continue - - error_msg = f"Batch {batch_num}: {str(e)}" - logger.error(f"Failed to compute embeddings for batch {batch_num}: {str(e)}") - return batch_num, chunk_batch, None, batch_files, error_msg - - # Should not reach here, but just in case - return batch_num, chunk_batch, None, batch_files, f"Batch {batch_num}: Max retries exceeded" - - # Process batches based on max_workers setting - if max_workers <= 1: - # Sequential processing - stream directly from generator (no pre-materialization) - for chunk_batch in batch_generator: - batch_number += 1 - - # Track files in this batch - batch_files = set() - for _, file_path in chunk_batch: - batch_files.add(file_path) - - # Retry logic for transient backend errors - max_retries = 5 - base_delay = 2.0 - success = False - - for attempt in range(max_retries + 1): - try: - # Generate embeddings - batch_contents = [chunk.content for chunk, _ in chunk_batch] - embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=effective_batch_size) - - # Store embeddings with category - categories = _build_categories_from_batch(chunk_batch) - vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy, categories=categories) - - files_seen.update(batch_files) - total_chunks_created += len(chunk_batch) - total_files_processed = len(files_seen) - success = True - break - - except Exception as e: - error_str = str(e).lower() - # Check for retryable errors (rate limit, connection, backend issues) - is_retryable = any(x in error_str for x in [ - "429", "rate limit", "connection", "timeout", - "502", "503", "504", "service unavailable", - "500", "400", "badrequesterror", "internal server error", - "11434" # Ollama port - indicates backend routing issue - ]) - - if attempt < max_retries and is_retryable: - import random - sleep_time = base_delay * (2 ** attempt) + random.uniform(0, 0.5) - logger.warning(f"Batch {batch_number} failed (attempt {attempt+1}/{max_retries+1}). " - f"Retrying in {sleep_time:.1f}s. Error: {e}") - time.sleep(sleep_time) - continue - - logger.error(f"Failed to process batch {batch_number}: {str(e)}") - files_seen.update(batch_files) - break - - if success and progress_callback and batch_number % 10 == 0: - progress_callback(f" Batch {batch_number}: {total_chunks_created} chunks, {total_files_processed} files") - else: - # Concurrent processing - main thread iterates batches (SQLite safe), - # workers compute embeddings (parallel), main thread writes to DB (serial) - if progress_callback: - progress_callback(f"Processing with {max_workers} concurrent embedding workers...") - - with ThreadPoolExecutor(max_workers=max_workers) as executor: - pending_futures = {} # future -> (batch_num, chunk_batch) - completed_batches = 0 - last_reported_batch = 0 - - def process_completed_futures(): - """Process any completed futures and write to DB.""" - nonlocal total_chunks_created, total_files_processed, completed_batches, last_reported_batch - done_futures = [f for f in pending_futures if f.done()] - for f in done_futures: - try: - batch_num, chunk_batch, embeddings_numpy, batch_files, error = f.result() - if embeddings_numpy is not None and error is None: - # Write to DB in main thread (no contention) - categories = _build_categories_from_batch(chunk_batch) - vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy, categories=categories) - total_chunks_created += len(chunk_batch) - files_seen.update(batch_files) - total_files_processed = len(files_seen) - completed_batches += 1 - except Exception as e: - logger.error(f"Future raised exception: {e}") - completed_batches += 1 - del pending_futures[f] - - # Report progress based on completed batches (every 5 batches) - if progress_callback and completed_batches >= last_reported_batch + 5: - progress_callback(f" Batch {completed_batches}: {total_chunks_created} chunks, {total_files_processed} files") - last_reported_batch = completed_batches - - # Iterate batches in main thread (SQLite cursor is main-thread bound) - for chunk_batch in batch_generator: - batch_number += 1 - - # Submit compute task to worker pool - future = executor.submit(compute_embeddings_only, (batch_number, chunk_batch)) - pending_futures[future] = batch_number - - # Process any completed futures to free memory and write to DB - process_completed_futures() - - # Backpressure: wait if too many pending - while len(pending_futures) >= max_workers * 2: - process_completed_futures() - if len(pending_futures) >= max_workers * 2: - time.sleep(0.1) # time is imported at module level - - # Wait for remaining futures - for future in as_completed(list(pending_futures.keys())): - try: - batch_num, chunk_batch, embeddings_numpy, batch_files, error = future.result() - if embeddings_numpy is not None and error is None: - categories = _build_categories_from_batch(chunk_batch) - vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy, categories=categories) - total_chunks_created += len(chunk_batch) - files_seen.update(batch_files) - total_files_processed = len(files_seen) - completed_batches += 1 - - # Report progress for remaining batches - if progress_callback and completed_batches >= last_reported_batch + 5: - progress_callback(f" Batch {completed_batches}: {total_chunks_created} chunks, {total_files_processed} files") - last_reported_batch = completed_batches - except Exception as e: - logger.error(f"Future raised exception: {e}") - - # Notify before ANN index finalization (happens when bulk_insert context exits) - if progress_callback: - progress_callback(f"Finalizing index... Building ANN index for {total_chunks_created} chunks") - - except Exception as e: - # Cleanup on error to prevent process hanging - try: - _cleanup_fastembed_resources() - gc.collect() - except Exception as cleanup_exc: - logger.debug(f"Cleanup error during exception handling: {cleanup_exc}") - return {"success": False, "error": f"Failed to read or process files: {str(e)}"} - - elapsed_time = time.time() - start_time - - # Final cleanup: release ONNX resources to allow process exit - # This is critical - without it, ONNX Runtime threads prevent Python from exiting - try: - _cleanup_fastembed_resources() - gc.collect() - except Exception as cleanup_exc: - logger.debug(f"Cleanup error during finalization: {cleanup_exc}") - - return { - "success": True, - "result": { - "chunks_created": total_chunks_created, - "files_processed": total_files_processed, - "files_failed": len(failed_files), - "elapsed_time": elapsed_time, - "model_profile": model_profile, - "model_name": embedder.model_name, - "failed_files": failed_files[:5], # First 5 failures - "index_path": str(index_path), - }, - } - - -def _discover_index_dbs_internal(index_root: Path) -> List[Path]: - """Internal helper to find all _index.db files (no deprecation warning). - - Used internally by generate_dense_embeddings_centralized. - - Args: - index_root: Root directory to scan for _index.db files - - Returns: - Sorted list of paths to _index.db files - """ - if not index_root.exists(): - return [] - - return sorted(filter_index_paths(index_root.rglob("_index.db"), index_root)) - - -def build_centralized_binary_vectors_from_existing( - index_root: Path, - *, - force: bool = False, - embedding_dim: Optional[int] = None, - progress_callback: Optional[callable] = None, -) -> Dict[str, Any]: - """Build centralized binary vectors + metadata from existing semantic_chunks embeddings. - - This is a fast-path for enabling the staged binary coarse search without - regenerating embeddings (and without triggering global model locks). - - It scans all distributed `_index.db` files under `index_root`, reads - existing `semantic_chunks.embedding` blobs, assigns new global chunk_ids, - and writes: - - `/_binary_vectors.mmap` (+ `.meta.json`) - - `/_vectors_meta.db` (chunk_metadata + binary_vectors) - """ - from codexlens.config import BINARY_VECTORS_MMAP_NAME, VECTORS_META_DB_NAME - from codexlens.storage.vector_meta_store import VectorMetadataStore - - index_root = Path(index_root).resolve() - vectors_meta_path = index_root / VECTORS_META_DB_NAME - mmap_path = index_root / BINARY_VECTORS_MMAP_NAME - meta_path = mmap_path.with_suffix(".meta.json") - - index_files = _discover_index_dbs_internal(index_root) - if not index_files: - return {"success": False, "error": f"No _index.db files found under {index_root}"} - - if progress_callback: - progress_callback(f"Scanning {len(index_files)} index databases for existing embeddings...") - - # First pass: detect embedding dims present. - dims_seen: Dict[int, int] = {} - selected_config: Optional[Dict[str, Any]] = None - - for index_path in index_files: - try: - with sqlite3.connect(index_path) as conn: - conn.row_factory = sqlite3.Row - has_table = conn.execute( - "SELECT 1 FROM sqlite_master WHERE type='table' AND name='semantic_chunks'" - ).fetchone() - if not has_table: - continue - - dim_row = conn.execute( - "SELECT backend, model_profile, model_name, embedding_dim FROM embeddings_config WHERE id=1" - ).fetchone() - if dim_row and dim_row[3]: - dim_val = int(dim_row[3]) - dims_seen[dim_val] = dims_seen.get(dim_val, 0) + 1 - if selected_config is None: - selected_config = { - "backend": dim_row[0], - "model_profile": dim_row[1], - "model_name": dim_row[2], - "embedding_dim": dim_val, - } - - # We count per-dim later after selecting a target dim. - except (sqlite3.DatabaseError, ValueError, TypeError): - # Skip corrupted or malformed indexes - continue - - if not dims_seen: - return {"success": False, "error": "No embeddings_config found under index_root"} - - if embedding_dim is None: - # Default: pick the most common embedding dim across indexes. - embedding_dim = max(dims_seen.items(), key=lambda kv: kv[1])[0] - - embedding_dim = int(embedding_dim) - - if progress_callback and len(dims_seen) > 1: - progress_callback(f"Mixed embedding dims detected, selecting dim={embedding_dim} (seen={dims_seen})") - - # Re-detect the selected model config for this dim (do not reuse an arbitrary first-seen config). - selected_config = None - - # Second pass: count only chunks matching selected dim. - total_chunks = 0 - for index_path in index_files: - try: - with sqlite3.connect(index_path) as conn: - conn.row_factory = sqlite3.Row - has_table = conn.execute( - "SELECT 1 FROM sqlite_master WHERE type='table' AND name='semantic_chunks'" - ).fetchone() - if not has_table: - continue - - dim_row = conn.execute( - "SELECT backend, model_profile, model_name, embedding_dim FROM embeddings_config WHERE id=1" - ).fetchone() - dim_val = int(dim_row[3]) if dim_row and dim_row[3] else None - if dim_val != embedding_dim: - continue - - if selected_config is None: - selected_config = { - "backend": dim_row[0], - "model_profile": dim_row[1], - "model_name": dim_row[2], - "embedding_dim": dim_val, - } - - row = conn.execute( - "SELECT COUNT(*) FROM semantic_chunks WHERE embedding IS NOT NULL AND length(embedding) > 0" - ).fetchone() - total_chunks += int(row[0] if row else 0) - except (sqlite3.DatabaseError, ValueError, TypeError): - # Skip corrupted or malformed indexes - continue - - if not total_chunks: - return { - "success": False, - "error": f"No existing embeddings found for embedding_dim={embedding_dim}", - "dims_seen": dims_seen, - } - - if progress_callback: - progress_callback(f"Found {total_chunks} embedded chunks (dim={embedding_dim}). Building binary vectors...") - - # Prepare output files / DB. - try: - import numpy as np - except ImportError as exc: - return {"success": False, "error": f"numpy required to build binary vectors: {exc}"} - - store = VectorMetadataStore(vectors_meta_path) - store._ensure_schema() - - if force: - try: - store.clear() - except Exception: - pass - try: - store.clear_binary_vectors() - except Exception: - pass - try: - if mmap_path.exists(): - mmap_path.unlink() - except Exception: - pass - try: - if meta_path.exists(): - meta_path.unlink() - except Exception: - pass - - bytes_per_vec = (int(embedding_dim) + 7) // 8 - mmap = np.memmap( - str(mmap_path), - dtype=np.uint8, - mode="w+", - shape=(int(total_chunks), int(bytes_per_vec)), - ) - - chunk_ids: List[int] = [] - chunks_batch: List[Dict[str, Any]] = [] - bin_ids_batch: List[int] = [] - bin_vecs_batch: List[bytes] = [] - batch_limit = 500 - - global_id = 1 - write_idx = 0 - - skipped_indexes: Dict[str, int] = {} - for index_path in index_files: - try: - with sqlite3.connect(index_path) as conn: - conn.row_factory = sqlite3.Row - has_table = conn.execute( - "SELECT 1 FROM sqlite_master WHERE type='table' AND name='semantic_chunks'" - ).fetchone() - if not has_table: - continue - - dim_row = conn.execute( - "SELECT embedding_dim FROM embeddings_config WHERE id=1" - ).fetchone() - dim_val = int(dim_row[0]) if dim_row and dim_row[0] else None - if dim_val != embedding_dim: - skipped_indexes[str(index_path)] = dim_val or -1 - continue - - rows = conn.execute( - "SELECT file_path, content, embedding, metadata, category FROM semantic_chunks " - "WHERE embedding IS NOT NULL AND length(embedding) > 0" - ).fetchall() - - for row in rows: - emb = np.frombuffer(row["embedding"], dtype=np.float32) - if emb.size != int(embedding_dim): - continue - - packed = np.packbits((emb > 0).astype(np.uint8)) - if packed.size != bytes_per_vec: - continue - - mmap[write_idx] = packed - write_idx += 1 - - cid = global_id - global_id += 1 - chunk_ids.append(cid) - - meta_raw = row["metadata"] - meta_dict: Dict[str, Any] = {} - if meta_raw: - try: - meta_dict = json.loads(meta_raw) if isinstance(meta_raw, str) else dict(meta_raw) - except Exception: - meta_dict = {} - - chunks_batch.append( - { - "chunk_id": cid, - "file_path": row["file_path"], - "content": row["content"], - "start_line": meta_dict.get("start_line"), - "end_line": meta_dict.get("end_line"), - "category": row["category"], - "metadata": meta_dict, - "source_index_db": str(index_path), - } - ) - - bin_ids_batch.append(cid) - bin_vecs_batch.append(packed.tobytes()) - - if len(chunks_batch) >= batch_limit: - store.add_chunks(chunks_batch) - store.add_binary_vectors(bin_ids_batch, bin_vecs_batch) - chunks_batch = [] - bin_ids_batch = [] - bin_vecs_batch = [] - - except Exception: - continue - - if chunks_batch: - store.add_chunks(chunks_batch) - store.add_binary_vectors(bin_ids_batch, bin_vecs_batch) - - mmap.flush() - del mmap - - # If we skipped inconsistent vectors, truncate metadata to actual write count. - chunk_ids = chunk_ids[:write_idx] - - # Write sidecar metadata. - with open(meta_path, "w", encoding="utf-8") as f: - json.dump( - { - "shape": [int(write_idx), int(bytes_per_vec)], - "chunk_ids": chunk_ids, - "embedding_dim": int(embedding_dim), - "backend": (selected_config or {}).get("backend"), - "model_profile": (selected_config or {}).get("model_profile"), - "model_name": (selected_config or {}).get("model_name"), - }, - f, - ) - - if progress_callback: - progress_callback(f"Binary vectors ready: {mmap_path} (rows={write_idx})") - - return { - "success": True, - "result": { - "index_root": str(index_root), - "index_files_scanned": len(index_files), - "chunks_total": int(total_chunks), - "chunks_written": int(write_idx), - "embedding_dim": int(embedding_dim), - "bytes_per_vector": int(bytes_per_vec), - "skipped_indexes": len(skipped_indexes), - "vectors_meta_db": str(vectors_meta_path), - "binary_mmap": str(mmap_path), - "binary_meta_json": str(meta_path), - }, - } - - -def discover_all_index_dbs(index_root: Path) -> List[Path]: - """Recursively find all _index.db files in an index tree. - - .. deprecated:: - This function is deprecated. Use centralized indexing with - ``generate_dense_embeddings_centralized`` instead, which handles - index discovery internally. - - Args: - index_root: Root directory to scan for _index.db files - - Returns: - Sorted list of paths to _index.db files - """ - import warnings - warnings.warn( - "discover_all_index_dbs is deprecated. Use centralized indexing with " - "generate_dense_embeddings_centralized instead.", - DeprecationWarning, - stacklevel=2 - ) - return _discover_index_dbs_internal(index_root) - - -def find_all_indexes(scan_dir: Path) -> List[Path]: - """Find all _index.db files in directory tree. - - Args: - scan_dir: Directory to scan - - Returns: - List of paths to _index.db files - """ - if not scan_dir.exists(): - return [] - - return _discover_index_dbs_internal(scan_dir) - - - -def generate_embeddings_recursive( - index_root: Path, - embedding_backend: Optional[str] = None, - model_profile: Optional[str] = None, - force: bool = False, - chunk_size: int = 2000, - overlap: int = 200, - progress_callback: Optional[callable] = None, - use_gpu: Optional[bool] = None, - max_tokens_per_batch: Optional[int] = None, - max_workers: Optional[int] = None, - endpoints: Optional[List] = None, - strategy: Optional[str] = None, - cooldown: Optional[float] = None, -) -> Dict[str, any]: - """Generate embeddings for all index databases in a project recursively. - - .. deprecated:: - This function is deprecated. Use ``generate_dense_embeddings_centralized`` - instead, which creates a single centralized vector index for the entire project - rather than per-directory indexes. - - Args: - index_root: Root index directory containing _index.db files - embedding_backend: Embedding backend to use (fastembed or litellm). - Defaults to config setting. - model_profile: Model profile for fastembed (fast, code, multilingual, balanced) - or model name for litellm (e.g., qwen3-embedding). - Defaults to config setting. - force: If True, regenerate even if embeddings exist - chunk_size: Maximum chunk size in characters - overlap: Overlap size in characters for sliding window chunking (default: 200) - progress_callback: Optional callback for progress updates - use_gpu: Whether to use GPU acceleration (fastembed only). - Defaults to config setting. - max_tokens_per_batch: Maximum tokens per batch for token-aware batching. - If None, attempts to get from embedder.max_tokens, - then falls back to 8000. If set, overrides automatic detection. - max_workers: Maximum number of concurrent API calls. - If None, uses dynamic defaults based on backend and endpoint count. - endpoints: Optional list of endpoint configurations for multi-API load balancing. - strategy: Selection strategy for multi-endpoint mode. - cooldown: Default cooldown seconds for rate-limited endpoints. - - Returns: - Aggregated result dictionary with generation statistics - """ - import warnings - warnings.warn( - "generate_embeddings_recursive is deprecated. Use " - "generate_dense_embeddings_centralized instead for centralized indexing.", - DeprecationWarning, - stacklevel=2 - ) - - # Apply config defaults - embedding_backend, model_profile, use_gpu, endpoints, strategy, cooldown = \ - _apply_embedding_config_defaults( - embedding_backend, model_profile, use_gpu, endpoints, strategy, cooldown - ) - - # Calculate max_workers - max_workers = _calculate_max_workers(embedding_backend, endpoints, max_workers) - - # Discover all _index.db files (using internal helper to avoid double deprecation warning) - index_files = _discover_index_dbs_internal(index_root) - - if not index_files: - return { - "success": False, - "error": f"No index databases found in {index_root}", - } - - if progress_callback: - progress_callback(f"Found {len(index_files)} index databases to process") - - # Process each index database - all_results = [] - total_chunks = 0 - total_files_processed = 0 - total_files_failed = 0 - - for idx, index_path in enumerate(index_files, 1): - if progress_callback: - try: - rel_path = index_path.relative_to(index_root) - except ValueError: - rel_path = index_path - # Format: "Processing file X/Y: path" to match Node.js parseProgressLine - progress_callback(f"Processing file {idx}/{len(index_files)}: {rel_path}") - - result = generate_embeddings( - index_path, - embedding_backend=embedding_backend, - model_profile=model_profile, - force=force, - chunk_size=chunk_size, - overlap=overlap, - progress_callback=None, # Don't cascade callbacks - use_gpu=use_gpu, - max_tokens_per_batch=max_tokens_per_batch, - max_workers=max_workers, - endpoints=endpoints, - strategy=strategy, - cooldown=cooldown, - ) - - all_results.append({ - "path": str(index_path), - "success": result["success"], - "result": result.get("result"), - "error": result.get("error"), - }) - - if result["success"]: - data = result["result"] - total_chunks += data["chunks_created"] - total_files_processed += data["files_processed"] - total_files_failed += data["files_failed"] - - successful = sum(1 for r in all_results if r["success"]) - - # Final cleanup after processing all indexes - # Each generate_embeddings() call does its own cleanup, but do a final one to be safe - try: - _cleanup_fastembed_resources() - gc.collect() - except Exception: - pass - - return { - "success": successful > 0, - "result": { - "indexes_processed": len(index_files), - "indexes_successful": successful, - "indexes_failed": len(index_files) - successful, - "total_chunks_created": total_chunks, - "total_files_processed": total_files_processed, - "total_files_failed": total_files_failed, - "model_profile": model_profile, - "details": all_results, - }, - } - - -def generate_dense_embeddings_centralized( - index_root: Path, - embedding_backend: Optional[str] = None, - model_profile: Optional[str] = None, - force: bool = False, - chunk_size: int = 2000, - overlap: int = 200, - progress_callback: Optional[callable] = None, - use_gpu: Optional[bool] = None, - max_tokens_per_batch: Optional[int] = None, - max_workers: Optional[int] = None, - endpoints: Optional[List] = None, - strategy: Optional[str] = None, - cooldown: Optional[float] = None, -) -> Dict[str, any]: - """Generate dense embeddings with centralized vector storage. - - This function creates a single HNSW index at the project root instead of - per-directory indexes. All chunks from all _index.db files are combined - into one central _vectors.hnsw file. - - Target architecture: - / - |-- _vectors.hnsw # Centralized dense vector ANN index - |-- src/ - |-- _index.db # No longer contains .hnsw file - - Args: - index_root: Root index directory containing _index.db files - embedding_backend: Embedding backend (fastembed or litellm) - model_profile: Model profile or name - force: If True, regenerate even if embeddings exist - chunk_size: Maximum chunk size in characters - overlap: Overlap size in characters - progress_callback: Optional callback for progress updates - use_gpu: Whether to use GPU acceleration - max_tokens_per_batch: Maximum tokens per batch - max_workers: Maximum concurrent workers - endpoints: Multi-endpoint configurations - strategy: Endpoint selection strategy - cooldown: Rate-limit cooldown seconds - - Returns: - Result dictionary with generation statistics - """ - from codexlens.config import VECTORS_HNSW_NAME - - # Apply config defaults - embedding_backend, model_profile, use_gpu, endpoints, strategy, cooldown = \ - _apply_embedding_config_defaults( - embedding_backend, model_profile, use_gpu, endpoints, strategy, cooldown - ) - - # Calculate max_workers - max_workers = _calculate_max_workers(embedding_backend, endpoints, max_workers) - - backend_available, backend_error = is_embedding_backend_available(embedding_backend) - if not backend_available: - return {"success": False, "error": backend_error or "Embedding backend not available"} - - # Discover all _index.db files - index_files = _discover_index_dbs_internal(index_root) - - if not index_files: - return { - "success": False, - "error": f"No index databases found in {index_root}", - } - - if progress_callback: - progress_callback(f"Found {len(index_files)} index databases for centralized embedding") - - # Pre-calculate estimated chunk count for HNSW capacity - # This avoids expensive resize operations during indexing - estimated_total_files = 0 - for index_path in index_files: - try: - with sqlite3.connect(index_path) as conn: - cursor = conn.execute("SELECT COUNT(*) FROM files") - estimated_total_files += cursor.fetchone()[0] - except Exception: - pass - # Heuristic: ~15 chunks per file on average - estimated_chunks = max(100000, estimated_total_files * 15) - - if progress_callback: - progress_callback(f"Estimated {estimated_total_files} files, ~{estimated_chunks} chunks") - - # Check for existing centralized index - central_hnsw_path = index_root / VECTORS_HNSW_NAME - if central_hnsw_path.exists() and not force: - return { - "success": False, - "error": f"Centralized vector index already exists at {central_hnsw_path}. Use --force to regenerate.", - } - - # Initialize embedder and chunker using helper - try: - from codexlens.semantic.ann_index import ANNIndex - - embedder, chunker, endpoint_count = _initialize_embedder_and_chunker( - embedding_backend, model_profile, use_gpu, endpoints, strategy, cooldown, - chunk_size, overlap - ) - - # Load chunk stripping config for batch size calculation - from codexlens.config import Config - batch_config = Config.load() - - if progress_callback: - if endpoint_count > 1: - progress_callback(f"Using {endpoint_count} API endpoints with {strategy} strategy") - progress_callback(f"Using model: {embedder.model_name} ({embedder.embedding_dim} dimensions)") - - # Calculate dynamic batch size based on model capacity - effective_batch_size = calculate_dynamic_batch_size(batch_config, embedder) - - if progress_callback and batch_config.api_batch_size_dynamic: - progress_callback(f"Dynamic batch size: {effective_batch_size} (model max_tokens={getattr(embedder, 'max_tokens', 8192)})") - - except Exception as e: - return { - "success": False, - "error": f"Failed to initialize components: {str(e)}", - } - - # Create centralized ANN index with pre-calculated capacity - # Using estimated_chunks avoids expensive resize operations during indexing - central_ann_index = ANNIndex.create_central( - index_root=index_root, - dim=embedder.embedding_dim, - initial_capacity=estimated_chunks, - auto_save=False, - ) - - # Process all index databases - start_time = time.time() - failed_files = [] - total_chunks_created = 0 - total_files_processed = 0 - all_chunk_ids = [] - all_embeddings = [] - - # Track chunk ID to file_path mapping for metadata - chunk_id_to_info: Dict[int, Dict[str, Any]] = {} - next_chunk_id = 1 - # Track current index_path for source_index_db field - current_index_path: Optional[str] = None - - for idx, index_path in enumerate(index_files, 1): - if progress_callback: - try: - rel_path = index_path.relative_to(index_root) - except ValueError: - rel_path = index_path - progress_callback(f"Processing {idx}/{len(index_files)}: {rel_path}") - - # Track current index_path for source_index_db - current_index_path = str(index_path) - - try: - with sqlite3.connect(index_path) as conn: - conn.row_factory = sqlite3.Row - path_column = _get_path_column(conn) - - # Get files from this index - cursor = conn.execute(f"SELECT {path_column}, content, language FROM files") - file_rows = cursor.fetchall() - - for file_row in file_rows: - file_path = file_row[path_column] - content = file_row["content"] - language = file_row["language"] or "python" - - try: - chunks = chunker.chunk_sliding_window( - content, - file_path=file_path, - language=language - ) - - if not chunks: - continue - - total_files_processed += 1 - - # Generate embeddings for this file's chunks - batch_contents = [chunk.content for chunk in chunks] - embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=effective_batch_size) - - # Assign chunk IDs and store embeddings - for i, chunk in enumerate(chunks): - chunk_id = next_chunk_id - next_chunk_id += 1 - - all_chunk_ids.append(chunk_id) - all_embeddings.append(embeddings_numpy[i]) - - # Store metadata for later retrieval - chunk_id_to_info[chunk_id] = { - "file_path": file_path, - "content": chunk.content, - "metadata": chunk.metadata, - "category": get_file_category(file_path) or "code", - "source_index_db": current_index_path, - } - total_chunks_created += 1 - - except Exception as e: - logger.error(f"Failed to process {file_path}: {e}") - failed_files.append((file_path, str(e))) - - except Exception as e: - logger.error(f"Failed to read index {index_path}: {e}") - failed_files.append((str(index_path), str(e))) - - # Add all embeddings to centralized ANN index - if all_embeddings: - if progress_callback: - progress_callback(f"Building centralized ANN index with {len(all_embeddings)} vectors...") - - try: - import numpy as np - embeddings_matrix = np.vstack(all_embeddings) - central_ann_index.add_vectors(all_chunk_ids, embeddings_matrix) - central_ann_index.save() - - if progress_callback: - progress_callback(f"Saved centralized index to {central_hnsw_path}") - - except Exception as e: - return { - "success": False, - "error": f"Failed to build centralized ANN index: {str(e)}", - } - - # Store chunk metadata in a centralized metadata database - vectors_meta_path = index_root / VECTORS_META_DB_NAME - if chunk_id_to_info: - if progress_callback: - progress_callback(f"Storing {len(chunk_id_to_info)} chunk metadata records...") - - try: - from codexlens.storage.vector_meta_store import VectorMetadataStore - - with VectorMetadataStore(vectors_meta_path) as meta_store: - # Convert chunk_id_to_info dict to list of dicts for batch insert - chunks_to_store = [] - for cid, info in chunk_id_to_info.items(): - metadata = info.get("metadata", {}) - chunks_to_store.append({ - "chunk_id": cid, - "file_path": info["file_path"], - "content": info["content"], - "start_line": metadata.get("start_line"), - "end_line": metadata.get("end_line"), - "category": info.get("category"), - "metadata": metadata, - "source_index_db": info.get("source_index_db"), - }) - - meta_store.add_chunks(chunks_to_store) - - if progress_callback: - progress_callback(f"Saved metadata to {vectors_meta_path}") - - except Exception as e: - logger.warning("Failed to store vector metadata: %s", e) - # Non-fatal: continue without centralized metadata - - # --- Binary Vector Generation for Cascade Search (Memory-Mapped) --- - binary_success = False - binary_count = 0 - try: - from codexlens.config import Config, BINARY_VECTORS_MMAP_NAME - config = Config.load() - - if getattr(config, 'enable_binary_cascade', True) and all_embeddings: - import numpy as np - - if progress_callback: - progress_callback(f"Generating binary vectors for {len(all_embeddings)} chunks...") - - # Binarize dense vectors: sign(x) -> 1 if x > 0, 0 otherwise - # Pack into bytes for efficient storage and Hamming distance computation - embeddings_matrix = np.vstack(all_embeddings) - binary_matrix = (embeddings_matrix > 0).astype(np.uint8) - - # Pack bits into bytes (8 bits per byte) - vectorized for all rows - packed_matrix = np.packbits(binary_matrix, axis=1) - binary_count = len(packed_matrix) - - # Save as memory-mapped file for efficient loading - binary_mmap_path = index_root / BINARY_VECTORS_MMAP_NAME - mmap_array = np.memmap( - str(binary_mmap_path), - dtype=np.uint8, - mode='w+', - shape=packed_matrix.shape - ) - mmap_array[:] = packed_matrix - mmap_array.flush() - del mmap_array # Close the memmap - - # Save metadata (shape and chunk_ids) to sidecar JSON - import json - meta_path = binary_mmap_path.with_suffix('.meta.json') - with open(meta_path, 'w') as f: - json.dump({ - 'shape': list(packed_matrix.shape), - 'chunk_ids': all_chunk_ids, - 'embedding_dim': embeddings_matrix.shape[1], - }, f) - - # Also store in DB for backward compatibility - from codexlens.storage.vector_meta_store import VectorMetadataStore - binary_packed_bytes = [row.tobytes() for row in packed_matrix] - with VectorMetadataStore(vectors_meta_path) as meta_store: - meta_store.add_binary_vectors(all_chunk_ids, binary_packed_bytes) - - binary_success = True - if progress_callback: - progress_callback(f"Generated {binary_count} binary vectors ({embeddings_matrix.shape[1]} dims -> {packed_matrix.shape[1]} bytes, mmap: {binary_mmap_path.name})") - - except Exception as e: - logger.warning("Binary vector generation failed: %s", e) - # Non-fatal: continue without binary vectors - - elapsed_time = time.time() - start_time - - # Cleanup - try: - _cleanup_fastembed_resources() - gc.collect() - except Exception: - pass - - return { - "success": True, - "result": { - "chunks_created": total_chunks_created, - "files_processed": total_files_processed, - "files_failed": len(failed_files), - "elapsed_time": elapsed_time, - "model_profile": model_profile, - "model_name": embedder.model_name, - "central_index_path": str(central_hnsw_path), - "failed_files": failed_files[:5], - "binary_success": binary_success, - "binary_count": binary_count, - }, - } - - -def get_embeddings_status(index_root: Path) -> Dict[str, any]: - """Get comprehensive embeddings coverage status for all indexes. - - Args: - index_root: Root index directory - - Returns: - Aggregated status with coverage statistics, model info, and timestamps - """ - index_files = _discover_index_dbs_internal(index_root) - centralized = _inspect_centralized_embeddings(index_root) - root_index_path = index_root / "_index.db" - root_index_exists = root_index_path.exists() - - if not index_files: - root_result = { - "index_path": str(root_index_path), - "exists": root_index_exists, - "total_files": 0, - "files_with_embeddings": 0, - "files_without_embeddings": 0, - "total_chunks": 0, - "coverage_percent": 0.0, - "has_embeddings": False, - "storage_mode": "none", - } - subtree_result = { - "total_indexes": 0, - "total_files": 0, - "files_with_embeddings": 0, - "files_without_embeddings": 0, - "total_chunks": 0, - "coverage_percent": 0.0, - "indexes_with_embeddings": 0, - "indexes_without_embeddings": 0, - } - return { - "success": True, - "result": { - "total_indexes": 0, - "total_files": 0, - "files_with_embeddings": 0, - "files_without_embeddings": 0, - "total_chunks": 0, - "coverage_percent": 0.0, - "indexes_with_embeddings": 0, - "indexes_without_embeddings": 0, - "model_info": None, - "root": root_result, - "subtree": subtree_result, - "centralized": centralized, - }, - } - - subtree_total_files = 0 - subtree_files_with_embeddings = 0 - subtree_total_chunks = 0 - subtree_indexes_with_embeddings = 0 - subtree_model_info = None - latest_updated_at = None - - for index_path in index_files: - status = check_index_embeddings(index_path) - if not status["success"]: - continue - - result = status["result"] - subtree_total_files += result["total_files"] - subtree_files_with_embeddings += result["files_with_chunks"] - subtree_total_chunks += result["total_chunks"] - - if not result["has_embeddings"]: - continue - - subtree_indexes_with_embeddings += 1 - candidate_model_info = _get_model_info_from_index(index_path) - if not candidate_model_info: - continue - if subtree_model_info is None: - subtree_model_info = candidate_model_info - latest_updated_at = candidate_model_info.get("updated_at") - continue - candidate_updated_at = candidate_model_info.get("updated_at") - if candidate_updated_at and (latest_updated_at is None or candidate_updated_at > latest_updated_at): - latest_updated_at = candidate_updated_at - - if subtree_model_info and latest_updated_at: - subtree_model_info["updated_at"] = latest_updated_at - - root_total_files = 0 - root_files_with_embeddings = 0 - root_total_chunks = 0 - root_has_embeddings = False - root_storage_mode = "none" - - if root_index_exists: - root_status = check_index_embeddings(root_index_path) - if root_status["success"]: - root_data = root_status["result"] - root_total_files = int(root_data["total_files"]) - if root_data["has_embeddings"]: - root_files_with_embeddings = int(root_data["files_with_chunks"]) - root_total_chunks = int(root_data["total_chunks"]) - root_has_embeddings = True - root_storage_mode = "distributed" - - if centralized["usable"]: - root_files_with_embeddings = int(centralized["files_with_embeddings"]) - root_total_chunks = int(centralized["chunk_metadata_rows"]) - root_has_embeddings = True - root_storage_mode = "centralized" if root_storage_mode == "none" else "mixed" - - model_info = None - if root_has_embeddings: - if root_storage_mode in {"distributed", "mixed"} and root_index_exists: - model_info = _get_model_info_from_index(root_index_path) - if model_info is None and root_storage_mode in {"centralized", "mixed"}: - model_info = subtree_model_info - - root_coverage_percent = round( - (root_files_with_embeddings / root_total_files * 100) if root_total_files > 0 else 0, - 1, - ) - root_files_without_embeddings = max(root_total_files - root_files_with_embeddings, 0) - - root_result = { - "index_path": str(root_index_path), - "exists": root_index_exists, - "total_files": root_total_files, - "files_with_embeddings": root_files_with_embeddings, - "files_without_embeddings": root_files_without_embeddings, - "total_chunks": root_total_chunks, - "coverage_percent": root_coverage_percent, - "has_embeddings": root_has_embeddings, - "storage_mode": root_storage_mode, - } - subtree_result = { - "total_indexes": len(index_files), - "total_files": subtree_total_files, - "files_with_embeddings": subtree_files_with_embeddings, - "files_without_embeddings": subtree_total_files - subtree_files_with_embeddings, - "total_chunks": subtree_total_chunks, - "coverage_percent": round( - (subtree_files_with_embeddings / subtree_total_files * 100) if subtree_total_files > 0 else 0, - 1, - ), - "indexes_with_embeddings": subtree_indexes_with_embeddings, - "indexes_without_embeddings": len(index_files) - subtree_indexes_with_embeddings, - } - - return { - "success": True, - "result": { - "total_indexes": 1 if root_index_exists else 0, - "total_files": root_total_files, - "files_with_embeddings": root_files_with_embeddings, - "files_without_embeddings": root_files_without_embeddings, - "total_chunks": root_total_chunks, - "coverage_percent": root_coverage_percent, - "indexes_with_embeddings": 1 if root_has_embeddings else 0, - "indexes_without_embeddings": 1 if root_index_exists and not root_has_embeddings else 0, - "model_info": model_info, - "root": root_result, - "subtree": subtree_result, - "centralized": centralized, - }, - } - - -def get_embedding_stats_summary(index_root: Path) -> Dict[str, any]: - """Get summary statistics for all indexes in root directory. - - Args: - index_root: Root directory containing indexes - - Returns: - Summary statistics for all indexes - """ - indexes = find_all_indexes(index_root) - - if not indexes: - return { - "success": True, - "result": { - "total_indexes": 0, - "indexes_with_embeddings": 0, - "total_chunks": 0, - "indexes": [], - }, - } - - total_chunks = 0 - indexes_with_embeddings = 0 - index_stats = [] - - for index_path in indexes: - status = check_index_embeddings(index_path) - - if status["success"]: - result = status["result"] - has_emb = result["has_embeddings"] - chunks = result["total_chunks"] - - if has_emb: - indexes_with_embeddings += 1 - total_chunks += chunks - - # Extract project name from path - project_name = index_path.parent.name - - index_stats.append({ - "project": project_name, - "path": str(index_path), - "has_embeddings": has_emb, - "total_chunks": chunks, - "total_files": result["total_files"], - "coverage_percent": result.get("coverage_percent", 0), - }) - - return { - "success": True, - "result": { - "total_indexes": len(indexes), - "indexes_with_embeddings": indexes_with_embeddings, - "total_chunks": total_chunks, - "indexes": index_stats, - }, - } - - -def scan_for_model_conflicts( - index_root: Path, - target_backend: str, - target_model: str, -) -> Dict[str, any]: - """Scan for model conflicts across all indexes in a directory. - - Checks if any existing embeddings were generated with a different - backend or model than the target configuration. - - Args: - index_root: Root index directory to scan - target_backend: Target embedding backend (fastembed or litellm) - target_model: Target model profile/name - - Returns: - Dictionary with: - - has_conflict: True if any index has different model config - - existing_config: Config from first index with embeddings (if any) - - target_config: The requested configuration - - conflicts: List of conflicting index paths with their configs - - indexes_with_embeddings: Count of indexes that have embeddings - """ - index_files = _discover_index_dbs_internal(index_root) - - if not index_files: - return { - "has_conflict": False, - "existing_config": None, - "target_config": {"backend": target_backend, "model": target_model}, - "conflicts": [], - "indexes_with_embeddings": 0, - } - - conflicts = [] - existing_config = None - indexes_with_embeddings = 0 - - for index_path in index_files: - try: - from codexlens.semantic.vector_store import VectorStore - - with VectorStore(index_path) as vs: - config = vs.get_model_config() - if config and config.get("model_profile"): - indexes_with_embeddings += 1 - - # Store first existing config as reference - if existing_config is None: - existing_config = { - "backend": config.get("backend"), - "model": config.get("model_profile"), - "model_name": config.get("model_name"), - "embedding_dim": config.get("embedding_dim"), - } - - # Check for conflict: different backend OR different model - existing_backend = config.get("backend", "") - existing_model = config.get("model_profile", "") - - if existing_backend != target_backend or existing_model != target_model: - conflicts.append({ - "path": str(index_path), - "existing": { - "backend": existing_backend, - "model": existing_model, - "model_name": config.get("model_name"), - }, - }) - except Exception as e: - logger.debug(f"Failed to check model config for {index_path}: {e}") - continue - - return { - "has_conflict": len(conflicts) > 0, - "existing_config": existing_config, - "target_config": {"backend": target_backend, "model": target_model}, - "conflicts": conflicts, - "indexes_with_embeddings": indexes_with_embeddings, - } - - -def _get_global_settings_path() -> Path: - """Get the path to global embedding settings file.""" - return Path.home() / ".codexlens" / "embedding_lock.json" - - -def get_locked_model_config() -> Optional[Dict[str, Any]]: - """Get the globally locked embedding model configuration. - - Returns: - Dictionary with backend and model if locked, None otherwise. - """ - settings_path = _get_global_settings_path() - if not settings_path.exists(): - return None - - try: - with open(settings_path, "r", encoding="utf-8") as f: - data = json.load(f) - if data.get("locked"): - return { - "backend": data.get("backend"), - "model": data.get("model"), - "locked_at": data.get("locked_at"), - } - except (json.JSONDecodeError, OSError): - pass - - return None - - -def set_locked_model_config(backend: str, model: str) -> None: - """Set the globally locked embedding model configuration. - - This is called after the first successful embedding generation - to lock the model for all future operations. - - Args: - backend: Embedding backend (fastembed or litellm) - model: Model profile/name - """ - import datetime - - settings_path = _get_global_settings_path() - settings_path.parent.mkdir(parents=True, exist_ok=True) - - data = { - "locked": True, - "backend": backend, - "model": model, - "locked_at": datetime.datetime.now().isoformat(), - } - - with open(settings_path, "w", encoding="utf-8") as f: - json.dump(data, f, indent=2) - - -def clear_locked_model_config() -> bool: - """Clear the globally locked embedding model configuration. - - Returns: - True if lock was cleared, False if no lock existed. - """ - settings_path = _get_global_settings_path() - if settings_path.exists(): - settings_path.unlink() - return True - return False - - -def check_global_model_lock( - target_backend: str, - target_model: str, -) -> Dict[str, Any]: - """Check if the target model conflicts with the global lock. - - Args: - target_backend: Requested embedding backend - target_model: Requested model profile/name - - Returns: - Dictionary with: - - is_locked: True if a global lock exists - - has_conflict: True if target differs from locked config - - locked_config: The locked configuration (if any) - - target_config: The requested configuration - """ - locked_config = get_locked_model_config() - - if locked_config is None: - return { - "is_locked": False, - "has_conflict": False, - "locked_config": None, - "target_config": {"backend": target_backend, "model": target_model}, - } - - has_conflict = ( - locked_config["backend"] != target_backend or - locked_config["model"] != target_model - ) - - return { - "is_locked": True, - "has_conflict": has_conflict, - "locked_config": locked_config, - "target_config": {"backend": target_backend, "model": target_model}, - } diff --git a/codex-lens/src/codexlens/cli/model_manager.py b/codex-lens/src/codexlens/cli/model_manager.py deleted file mode 100644 index 15776cf1..00000000 --- a/codex-lens/src/codexlens/cli/model_manager.py +++ /dev/null @@ -1,1026 +0,0 @@ -"""Model Manager - Manage fastembed models for semantic search.""" - -import json -import os -import shutil -from pathlib import Path -from typing import Dict, List, Optional - -try: - from huggingface_hub import snapshot_download, list_repo_files - HUGGINGFACE_HUB_AVAILABLE = True -except ImportError: - HUGGINGFACE_HUB_AVAILABLE = False - -try: - from fastembed import TextEmbedding - FASTEMBED_AVAILABLE = True -except ImportError: - FASTEMBED_AVAILABLE = False - -try: - # fastembed >= 0.4.0 moved TextCrossEncoder to rerank.cross_encoder - from fastembed.rerank.cross_encoder import TextCrossEncoder - RERANKER_AVAILABLE = True -except ImportError: - try: - # Fallback for older versions - from fastembed import TextCrossEncoder - RERANKER_AVAILABLE = True - except ImportError: - RERANKER_AVAILABLE = False - - -# Reranker model profiles with metadata -# Note: fastembed TextCrossEncoder uses ONNX models from HuggingFace -RERANKER_MODEL_PROFILES = { - "ms-marco-mini": { - "model_name": "Xenova/ms-marco-MiniLM-L-6-v2", - "cache_name": "Xenova/ms-marco-MiniLM-L-6-v2", - "size_mb": 90, - "description": "Fast, lightweight reranker (default)", - "use_case": "Quick prototyping, resource-constrained environments", - "recommended": True, - }, - "ms-marco-12": { - "model_name": "Xenova/ms-marco-MiniLM-L-12-v2", - "cache_name": "Xenova/ms-marco-MiniLM-L-12-v2", - "size_mb": 130, - "description": "Better quality, 12-layer MiniLM", - "use_case": "General purpose reranking with better accuracy", - "recommended": True, - }, - "bge-base": { - "model_name": "BAAI/bge-reranker-base", - "cache_name": "BAAI/bge-reranker-base", - "size_mb": 280, - "description": "BGE reranker base model", - "use_case": "High-quality reranking for production", - "recommended": True, - }, - "bge-large": { - "model_name": "BAAI/bge-reranker-large", - "cache_name": "BAAI/bge-reranker-large", - "size_mb": 560, - "description": "BGE reranker large model (high resource usage)", - "use_case": "Maximum quality reranking", - "recommended": False, - }, - "jina-tiny": { - "model_name": "jinaai/jina-reranker-v1-tiny-en", - "cache_name": "jinaai/jina-reranker-v1-tiny-en", - "size_mb": 70, - "description": "Jina tiny reranker, very fast", - "use_case": "Ultra-low latency applications", - "recommended": True, - }, - "jina-turbo": { - "model_name": "jinaai/jina-reranker-v1-turbo-en", - "cache_name": "jinaai/jina-reranker-v1-turbo-en", - "size_mb": 150, - "description": "Jina turbo reranker, balanced", - "use_case": "Fast reranking with good accuracy", - "recommended": True, - }, - # Additional reranker models (commonly used) - "bge-reranker-v2-m3": { - "model_name": "BAAI/bge-reranker-v2-m3", - "cache_name": "BAAI/bge-reranker-v2-m3", - "size_mb": 560, - "description": "BGE v2 M3 reranker, multilingual", - "use_case": "Multilingual reranking, latest BGE version", - "recommended": True, - }, - "bge-reranker-v2-gemma": { - "model_name": "BAAI/bge-reranker-v2-gemma", - "cache_name": "BAAI/bge-reranker-v2-gemma", - "size_mb": 2000, - "description": "BGE v2 Gemma reranker, best quality", - "use_case": "Maximum quality with Gemma backbone", - "recommended": False, - }, - "cross-encoder-ms-marco": { - "model_name": "cross-encoder/ms-marco-MiniLM-L-6-v2", - "cache_name": "cross-encoder/ms-marco-MiniLM-L-6-v2", - "size_mb": 90, - "description": "Original cross-encoder MS MARCO", - "use_case": "Classic cross-encoder baseline", - "recommended": False, - }, -} - - -# Model profiles with metadata -# Note: 768d is max recommended dimension for optimal performance/quality balance -# 1024d models are available but not recommended due to higher resource usage -# cache_name: The actual Hugging Face repo name used by fastembed for ONNX caching -MODEL_PROFILES = { - "fast": { - "model_name": "BAAI/bge-small-en-v1.5", - "cache_name": "qdrant/bge-small-en-v1.5-onnx-q", # fastembed uses ONNX version - "dimensions": 384, - "size_mb": 80, - "description": "Fast, lightweight, English-optimized", - "use_case": "Quick prototyping, resource-constrained environments", - "recommended": True, - }, - "base": { - "model_name": "BAAI/bge-base-en-v1.5", - "cache_name": "qdrant/bge-base-en-v1.5-onnx-q", # fastembed uses ONNX version - "dimensions": 768, - "size_mb": 220, - "description": "General purpose, good balance of speed and quality", - "use_case": "General text search, documentation", - "recommended": True, - }, - "code": { - "model_name": "jinaai/jina-embeddings-v2-base-code", - "cache_name": "jinaai/jina-embeddings-v2-base-code", # Uses original name - "dimensions": 768, - "size_mb": 150, - "description": "Code-optimized, best for programming languages", - "use_case": "Open source projects, code semantic search", - "recommended": True, - }, - "minilm": { - "model_name": "sentence-transformers/all-MiniLM-L6-v2", - "cache_name": "qdrant/all-MiniLM-L6-v2-onnx", # fastembed uses ONNX version - "dimensions": 384, - "size_mb": 90, - "description": "Popular lightweight model, good quality", - "use_case": "General purpose, low resource environments", - "recommended": True, - }, - "multilingual": { - "model_name": "intfloat/multilingual-e5-large", - "cache_name": "qdrant/multilingual-e5-large-onnx", # fastembed uses ONNX version - "dimensions": 1024, - "size_mb": 1000, - "description": "Multilingual + code support (high resource usage)", - "use_case": "Enterprise multilingual projects", - "recommended": False, # 1024d not recommended - }, - "balanced": { - "model_name": "mixedbread-ai/mxbai-embed-large-v1", - "cache_name": "mixedbread-ai/mxbai-embed-large-v1", # Uses original name - "dimensions": 1024, - "size_mb": 600, - "description": "High accuracy, general purpose (high resource usage)", - "use_case": "High-quality semantic search, balanced performance", - "recommended": False, # 1024d not recommended - }, - # Additional embedding models (commonly used) - "bge-large": { - "model_name": "BAAI/bge-large-en-v1.5", - "cache_name": "qdrant/bge-large-en-v1.5-onnx-q", - "dimensions": 1024, - "size_mb": 650, - "description": "BGE large model, highest quality", - "use_case": "Maximum quality semantic search", - "recommended": False, - }, - "e5-small": { - "model_name": "intfloat/e5-small-v2", - "cache_name": "qdrant/e5-small-v2-onnx", - "dimensions": 384, - "size_mb": 80, - "description": "E5 small model, fast and lightweight", - "use_case": "Low latency applications", - "recommended": True, - }, - "e5-base": { - "model_name": "intfloat/e5-base-v2", - "cache_name": "qdrant/e5-base-v2-onnx", - "dimensions": 768, - "size_mb": 220, - "description": "E5 base model, balanced", - "use_case": "General purpose semantic search", - "recommended": True, - }, - "e5-large": { - "model_name": "intfloat/e5-large-v2", - "cache_name": "qdrant/e5-large-v2-onnx", - "dimensions": 1024, - "size_mb": 650, - "description": "E5 large model, high quality", - "use_case": "High quality semantic search", - "recommended": False, - }, - "jina-base-en": { - "model_name": "jinaai/jina-embeddings-v2-base-en", - "cache_name": "jinaai/jina-embeddings-v2-base-en", - "dimensions": 768, - "size_mb": 150, - "description": "Jina base English model", - "use_case": "English text semantic search", - "recommended": True, - }, - "jina-small-en": { - "model_name": "jinaai/jina-embeddings-v2-small-en", - "cache_name": "jinaai/jina-embeddings-v2-small-en", - "dimensions": 512, - "size_mb": 60, - "description": "Jina small English model, very fast", - "use_case": "Low latency English text search", - "recommended": True, - }, - "snowflake-arctic": { - "model_name": "Snowflake/snowflake-arctic-embed-m", - "cache_name": "Snowflake/snowflake-arctic-embed-m", - "dimensions": 768, - "size_mb": 220, - "description": "Snowflake Arctic embedding model", - "use_case": "Enterprise semantic search, high quality", - "recommended": True, - }, - "nomic-embed": { - "model_name": "nomic-ai/nomic-embed-text-v1.5", - "cache_name": "nomic-ai/nomic-embed-text-v1.5", - "dimensions": 768, - "size_mb": 280, - "description": "Nomic embedding model, open source", - "use_case": "Open source text embedding", - "recommended": True, - }, - "gte-small": { - "model_name": "thenlper/gte-small", - "cache_name": "thenlper/gte-small", - "dimensions": 384, - "size_mb": 70, - "description": "GTE small model, fast", - "use_case": "Fast text embedding", - "recommended": True, - }, - "gte-base": { - "model_name": "thenlper/gte-base", - "cache_name": "thenlper/gte-base", - "dimensions": 768, - "size_mb": 220, - "description": "GTE base model, balanced", - "use_case": "General purpose text embedding", - "recommended": True, - }, - "gte-large": { - "model_name": "thenlper/gte-large", - "cache_name": "thenlper/gte-large", - "dimensions": 1024, - "size_mb": 650, - "description": "GTE large model, high quality", - "use_case": "High quality text embedding", - "recommended": False, - }, -} - - -def get_cache_dir() -> Path: - """Get fastembed cache directory. - - Returns: - Path to cache directory (~/.cache/huggingface or custom path) - """ - # Check HF_HOME environment variable first - if "HF_HOME" in os.environ: - return Path(os.environ["HF_HOME"]) - - # fastembed 0.7.4+ uses HuggingFace cache when cache_dir is specified - # Models are stored directly under the cache directory - return Path.home() / ".cache" / "huggingface" - - -def _get_model_cache_path(cache_dir: Path, info: Dict) -> Path: - """Get the actual cache path for a model. - - fastembed 0.7.4+ uses HuggingFace Hub's naming convention: - - Prefix: 'models--' - - Replace '/' with '--' in model name - Example: jinaai/jina-embeddings-v2-base-code - -> models--jinaai--jina-embeddings-v2-base-code - - Args: - cache_dir: The fastembed cache directory (HuggingFace hub path) - info: Model profile info dictionary - - Returns: - Path to the model cache directory - """ - # HuggingFace Hub naming: models--{org}--{model} - # Use cache_name if available (for mapped ONNX models), else model_name - target_name = info.get("cache_name", info["model_name"]) - sanitized_name = f"models--{target_name.replace('/', '--')}" - return cache_dir / sanitized_name - - -def scan_discovered_models(model_type: str = "embedding") -> List[Dict]: - """Scan cache directory for manually placed models not in predefined profiles. - - This allows users to manually download models (e.g., via huggingface-cli or - by copying the model directory) and have them recognized automatically. - - Args: - model_type: Type of models to scan for ("embedding" or "reranker") - - Returns: - List of discovered model info dictionaries - """ - cache_dir = get_cache_dir() - if not cache_dir.exists(): - return [] - - # Get known model cache names based on type - if model_type == "reranker": - known_cache_names = { - f"models--{info.get('cache_name', info['model_name']).replace('/', '--')}" - for info in RERANKER_MODEL_PROFILES.values() - } - else: - known_cache_names = { - f"models--{info.get('cache_name', info['model_name']).replace('/', '--')}" - for info in MODEL_PROFILES.values() - } - - discovered = [] - - # Scan for model directories in cache - for item in cache_dir.iterdir(): - if not item.is_dir() or not item.name.startswith("models--"): - continue - - # Skip known predefined models - if item.name in known_cache_names: - continue - - # Parse model name from directory (models--org--model -> org/model) - parts = item.name[8:].split("--") # Remove "models--" prefix - if len(parts) >= 2: - model_name = "/".join(parts) - else: - model_name = parts[0] if parts else item.name - - # Detect model type by checking for common patterns - is_reranker = any(keyword in model_name.lower() for keyword in [ - "reranker", "cross-encoder", "ms-marco" - ]) - is_embedding = any(keyword in model_name.lower() for keyword in [ - "embed", "bge", "e5", "jina", "minilm", "gte", "nomic", "arctic" - ]) - - # Filter based on requested type - if model_type == "reranker" and not is_reranker: - continue - if model_type == "embedding" and is_reranker: - continue - - # Calculate cache size - try: - total_size = sum( - f.stat().st_size - for f in item.rglob("*") - if f.is_file() - ) - cache_size_mb = round(total_size / (1024 * 1024), 1) - except (OSError, PermissionError): - cache_size_mb = 0 - - discovered.append({ - "profile": f"discovered:{model_name.replace('/', '-')}", - "model_name": model_name, - "cache_name": model_name, - "cache_path": str(item), - "actual_size_mb": cache_size_mb, - "description": f"Manually discovered model", - "use_case": "User-provided model", - "installed": True, - "source": "discovered", # Mark as discovered - }) - - return discovered - - -def list_models() -> Dict[str, any]: - """List available model profiles and their installation status. - - Returns: - Dictionary with model profiles, installed status, and cache info - """ - if not FASTEMBED_AVAILABLE: - return { - "success": False, - "error": "fastembed not installed. Install with: pip install codexlens[semantic]", - } - - cache_dir = get_cache_dir() - cache_exists = cache_dir.exists() - - models = [] - for profile, info in MODEL_PROFILES.items(): - model_name = info["model_name"] - - # Check if model is cached using the actual cache name - installed = False - cache_size_mb = 0 - - if cache_exists: - # Check for model directory in cache using correct cache_name - model_cache_path = _get_model_cache_path(cache_dir, info) - if model_cache_path.exists(): - installed = True - # Calculate cache size - total_size = sum( - f.stat().st_size - for f in model_cache_path.rglob("*") - if f.is_file() - ) - cache_size_mb = round(total_size / (1024 * 1024), 1) - - models.append({ - "profile": profile, - "model_name": model_name, - "dimensions": info["dimensions"], - "estimated_size_mb": info["size_mb"], - "actual_size_mb": cache_size_mb if installed else None, - "description": info["description"], - "use_case": info["use_case"], - "installed": installed, - "source": "predefined", # Mark as predefined - "recommended": info.get("recommended", True), - }) - - # Add discovered models (manually placed by user) - discovered = scan_discovered_models(model_type="embedding") - for model in discovered: - # Try to estimate dimensions based on common model patterns - dimensions = 768 # Default - name_lower = model["model_name"].lower() - if "small" in name_lower or "mini" in name_lower: - dimensions = 384 - elif "large" in name_lower: - dimensions = 1024 - - model["dimensions"] = dimensions - model["estimated_size_mb"] = model.get("actual_size_mb", 0) - model["recommended"] = False # User-provided models are not recommended by default - models.append(model) - - return { - "success": True, - "result": { - "models": models, - "cache_dir": str(cache_dir), - "cache_exists": cache_exists, - "manual_install_guide": { - "steps": [ - "1. Download: huggingface-cli download /", - "2. Or copy to cache directory (see paths below)", - "3. Refresh to see discovered models" - ], - "example": "huggingface-cli download BAAI/bge-small-en-v1.5", - "paths": { - "windows": "%USERPROFILE%\\.cache\\huggingface\\models----", - "linux": "~/.cache/huggingface/models----", - "macos": "~/.cache/huggingface/models----", - }, - }, - }, - } - - -def download_model(profile: str, progress_callback: Optional[callable] = None) -> Dict[str, any]: - """Download a model by profile name. - - Args: - profile: Model profile name (fast, code, multilingual, balanced) - progress_callback: Optional callback function to report progress - - Returns: - Result dictionary with success status - """ - if not FASTEMBED_AVAILABLE: - return { - "success": False, - "error": "fastembed not installed. Install with: pip install codexlens[semantic]", - } - - if profile not in MODEL_PROFILES: - return { - "success": False, - "error": f"Unknown profile: {profile}. Available: {', '.join(MODEL_PROFILES.keys())}", - } - - info = MODEL_PROFILES[profile] - model_name = info["model_name"] - - try: - # Get cache directory - cache_dir = get_cache_dir() - - # Download model by instantiating TextEmbedding with explicit cache_dir - # This ensures fastembed uses the correct HuggingFace Hub cache location - if progress_callback: - progress_callback(f"Downloading {model_name}...") - - # CRITICAL: Must specify cache_dir to use HuggingFace cache - # and call embed() to trigger actual download - embedder = TextEmbedding(model_name=model_name, cache_dir=str(cache_dir)) - - # Trigger actual download by calling embed - # TextEmbedding.__init__ alone doesn't download files - if progress_callback: - progress_callback(f"Initializing {model_name}...") - - list(embedder.embed(["test"])) # Trigger download - - if progress_callback: - progress_callback(f"Model {model_name} downloaded successfully") - - # Get cache info using correct HuggingFace Hub path - model_cache_path = _get_model_cache_path(cache_dir, info) - - cache_size = 0 - if model_cache_path.exists(): - total_size = sum( - f.stat().st_size - for f in model_cache_path.rglob("*") - if f.is_file() - ) - cache_size = round(total_size / (1024 * 1024), 1) - - return { - "success": True, - "result": { - "profile": profile, - "model_name": model_name, - "cache_size_mb": cache_size, - "cache_path": str(model_cache_path), - }, - } - - except Exception as e: - return { - "success": False, - "error": f"Failed to download model: {str(e)}", - } - - -def download_custom_model(model_name: str, model_type: str = "embedding", progress_callback: Optional[callable] = None) -> Dict[str, any]: - """Download a custom model by HuggingFace model name. - - This allows users to download any HuggingFace model directly from - HuggingFace Hub. The model will be placed in the standard cache - directory where it can be discovered by scan_discovered_models(). - - Note: Downloaded models may not be directly usable by FastEmbed unless - they are in ONNX format. This function is primarily for downloading - models that users want to use with other frameworks or custom code. - - Args: - model_name: Full HuggingFace model name (e.g., "intfloat/e5-small-v2") - model_type: Type of model ("embedding" or "reranker") - for metadata only - progress_callback: Optional callback function to report progress - - Returns: - Result dictionary with success status - """ - if not HUGGINGFACE_HUB_AVAILABLE: - return { - "success": False, - "error": "huggingface_hub not installed. Install with: pip install huggingface_hub", - } - - # Validate model name format (org/model-name) - if not model_name or "/" not in model_name: - return { - "success": False, - "error": "Invalid model name format. Expected: 'org/model-name' (e.g., 'intfloat/e5-small-v2')", - } - - try: - cache_dir = get_cache_dir() - - if progress_callback: - progress_callback(f"Checking model format for {model_name}...") - - # Check if model contains ONNX files before downloading - try: - files = list_repo_files(repo_id=model_name) - has_onnx = any( - f.endswith('.onnx') or - f.startswith('onnx/') or - '/onnx/' in f or - f == 'model.onnx' - for f in files - ) - - if not has_onnx: - return { - "success": False, - "error": f"Model '{model_name}' does not contain ONNX files. " - f"FastEmbed requires ONNX-format models. " - f"Try Xenova/* versions or check the recommended models list.", - "files_found": len(files), - "suggestion": "Use models from the 'Recommended Models' list, or search for ONNX versions (e.g., Xenova/*).", - } - - if progress_callback: - progress_callback(f"ONNX format detected. Downloading {model_name}...") - - except Exception as check_err: - # If we can't check, warn but allow download - if progress_callback: - progress_callback(f"Could not verify format, proceeding with download...") - - # Use huggingface_hub to download the model - # This downloads to the standard HuggingFace cache directory - local_path = snapshot_download( - repo_id=model_name, - cache_dir=str(cache_dir), - ) - - if progress_callback: - progress_callback(f"Model {model_name} downloaded successfully") - - # Get cache info - sanitized_name = f"models--{model_name.replace('/', '--')}" - model_cache_path = cache_dir / sanitized_name - - cache_size = 0 - if model_cache_path.exists(): - total_size = sum( - f.stat().st_size - for f in model_cache_path.rglob("*") - if f.is_file() - ) - cache_size = round(total_size / (1024 * 1024), 1) - - return { - "success": True, - "result": { - "model_name": model_name, - "model_type": model_type, - "cache_size_mb": cache_size, - "cache_path": str(model_cache_path), - "local_path": local_path, - "note": "Model downloaded. Note: Only ONNX-format models are compatible with FastEmbed.", - }, - } - - except Exception as e: - return { - "success": False, - "error": f"Failed to download custom model: {str(e)}", - } - - -def delete_model(profile: str) -> Dict[str, any]: - """Delete a downloaded model from cache. - - Args: - profile: Model profile name to delete - - Returns: - Result dictionary with success status - """ - if profile not in MODEL_PROFILES: - return { - "success": False, - "error": f"Unknown profile: {profile}. Available: {', '.join(MODEL_PROFILES.keys())}", - } - - info = MODEL_PROFILES[profile] - model_name = info["model_name"] - cache_dir = get_cache_dir() - model_cache_path = _get_model_cache_path(cache_dir, info) - - if not model_cache_path.exists(): - return { - "success": False, - "error": f"Model {profile} ({model_name}) is not installed", - } - - try: - # Calculate size before deletion - total_size = sum( - f.stat().st_size - for f in model_cache_path.rglob("*") - if f.is_file() - ) - size_mb = round(total_size / (1024 * 1024), 1) - - # Delete model directory - shutil.rmtree(model_cache_path) - - return { - "success": True, - "result": { - "profile": profile, - "model_name": model_name, - "deleted_size_mb": size_mb, - "cache_path": str(model_cache_path), - }, - } - - except Exception as e: - return { - "success": False, - "error": f"Failed to delete model: {str(e)}", - } - - -def get_model_info(profile: str) -> Dict[str, any]: - """Get detailed information about a model profile. - - Args: - profile: Model profile name - - Returns: - Result dictionary with model information - """ - if profile not in MODEL_PROFILES: - return { - "success": False, - "error": f"Unknown profile: {profile}. Available: {', '.join(MODEL_PROFILES.keys())}", - } - - info = MODEL_PROFILES[profile] - model_name = info["model_name"] - - # Check installation status using correct cache_name - cache_dir = get_cache_dir() - model_cache_path = _get_model_cache_path(cache_dir, info) - installed = model_cache_path.exists() - - cache_size_mb = None - if installed: - total_size = sum( - f.stat().st_size - for f in model_cache_path.rglob("*") - if f.is_file() - ) - cache_size_mb = round(total_size / (1024 * 1024), 1) - - return { - "success": True, - "result": { - "profile": profile, - "model_name": model_name, - "dimensions": info["dimensions"], - "estimated_size_mb": info["size_mb"], - "actual_size_mb": cache_size_mb, - "description": info["description"], - "use_case": info["use_case"], - "installed": installed, - "cache_path": str(model_cache_path) if installed else None, - }, - } - - -# ============================================================================ -# Reranker Model Management Functions -# ============================================================================ - - -def list_reranker_models() -> Dict[str, any]: - """List available reranker model profiles and their installation status. - - Returns: - Dictionary with reranker model profiles, installed status, and cache info - """ - if not RERANKER_AVAILABLE: - return { - "success": False, - "error": "fastembed reranker not available. Install with: pip install fastembed>=0.4.0", - } - - cache_dir = get_cache_dir() - cache_exists = cache_dir.exists() - - models = [] - for profile, info in RERANKER_MODEL_PROFILES.items(): - model_name = info["model_name"] - - # Check if model is cached - installed = False - cache_size_mb = 0 - - if cache_exists: - model_cache_path = _get_model_cache_path(cache_dir, info) - if model_cache_path.exists(): - installed = True - total_size = sum( - f.stat().st_size - for f in model_cache_path.rglob("*") - if f.is_file() - ) - cache_size_mb = round(total_size / (1024 * 1024), 1) - - models.append({ - "profile": profile, - "model_name": model_name, - "estimated_size_mb": info["size_mb"], - "actual_size_mb": cache_size_mb if installed else None, - "description": info["description"], - "use_case": info["use_case"], - "installed": installed, - "recommended": info.get("recommended", True), - "source": "predefined", # Mark as predefined - }) - - # Add discovered reranker models (manually placed by user) - discovered = scan_discovered_models(model_type="reranker") - for model in discovered: - model["estimated_size_mb"] = model.get("actual_size_mb", 0) - model["recommended"] = False # User-provided models are not recommended by default - models.append(model) - - return { - "success": True, - "result": { - "models": models, - "cache_dir": str(cache_dir), - "cache_exists": cache_exists, - "manual_install_guide": { - "steps": [ - "1. Download: huggingface-cli download /", - "2. Or copy to cache directory (see paths below)", - "3. Refresh to see discovered models", - ], - "example": "huggingface-cli download BAAI/bge-reranker-base", - "paths": { - "windows": "%USERPROFILE%\\.cache\\huggingface\\models----", - "linux": "~/.cache/huggingface/models----", - "macos": "~/.cache/huggingface/models----", - }, - }, - }, - } - - -def download_reranker_model(profile: str, progress_callback: Optional[callable] = None) -> Dict[str, any]: - """Download a reranker model by profile name. - - Args: - profile: Reranker model profile name - progress_callback: Optional callback function to report progress - - Returns: - Result dictionary with success status - """ - if not RERANKER_AVAILABLE: - return { - "success": False, - "error": "fastembed reranker not available. Install with: pip install fastembed>=0.4.0", - } - - if profile not in RERANKER_MODEL_PROFILES: - return { - "success": False, - "error": f"Unknown reranker profile: {profile}. Available: {', '.join(RERANKER_MODEL_PROFILES.keys())}", - } - - info = RERANKER_MODEL_PROFILES[profile] - model_name = info["model_name"] - - try: - cache_dir = get_cache_dir() - - if progress_callback: - progress_callback(f"Downloading reranker {model_name}...") - - # Download model by instantiating TextCrossEncoder with explicit cache_dir - reranker = TextCrossEncoder(model_name=model_name, cache_dir=str(cache_dir)) - - # Trigger actual download by calling rerank - if progress_callback: - progress_callback(f"Initializing {model_name}...") - - list(reranker.rerank("test query", ["test document"])) - - if progress_callback: - progress_callback(f"Reranker {model_name} downloaded successfully") - - # Get cache info - model_cache_path = _get_model_cache_path(cache_dir, info) - - cache_size = 0 - if model_cache_path.exists(): - total_size = sum( - f.stat().st_size - for f in model_cache_path.rglob("*") - if f.is_file() - ) - cache_size = round(total_size / (1024 * 1024), 1) - - return { - "success": True, - "result": { - "profile": profile, - "model_name": model_name, - "cache_size_mb": cache_size, - "cache_path": str(model_cache_path), - }, - } - - except Exception as e: - return { - "success": False, - "error": f"Failed to download reranker model: {str(e)}", - } - - -def delete_reranker_model(profile: str) -> Dict[str, any]: - """Delete a downloaded reranker model from cache. - - Args: - profile: Reranker model profile name to delete - - Returns: - Result dictionary with success status - """ - if profile not in RERANKER_MODEL_PROFILES: - return { - "success": False, - "error": f"Unknown reranker profile: {profile}. Available: {', '.join(RERANKER_MODEL_PROFILES.keys())}", - } - - info = RERANKER_MODEL_PROFILES[profile] - model_name = info["model_name"] - cache_dir = get_cache_dir() - model_cache_path = _get_model_cache_path(cache_dir, info) - - if not model_cache_path.exists(): - return { - "success": False, - "error": f"Reranker model {profile} ({model_name}) is not installed", - } - - try: - total_size = sum( - f.stat().st_size - for f in model_cache_path.rglob("*") - if f.is_file() - ) - size_mb = round(total_size / (1024 * 1024), 1) - - shutil.rmtree(model_cache_path) - - return { - "success": True, - "result": { - "profile": profile, - "model_name": model_name, - "deleted_size_mb": size_mb, - "cache_path": str(model_cache_path), - }, - } - - except Exception as e: - return { - "success": False, - "error": f"Failed to delete reranker model: {str(e)}", - } - - -def get_reranker_model_info(profile: str) -> Dict[str, any]: - """Get detailed information about a reranker model profile. - - Args: - profile: Reranker model profile name - - Returns: - Result dictionary with model information - """ - if profile not in RERANKER_MODEL_PROFILES: - return { - "success": False, - "error": f"Unknown reranker profile: {profile}. Available: {', '.join(RERANKER_MODEL_PROFILES.keys())}", - } - - info = RERANKER_MODEL_PROFILES[profile] - model_name = info["model_name"] - - cache_dir = get_cache_dir() - model_cache_path = _get_model_cache_path(cache_dir, info) - installed = model_cache_path.exists() - - cache_size_mb = None - if installed: - total_size = sum( - f.stat().st_size - for f in model_cache_path.rglob("*") - if f.is_file() - ) - cache_size_mb = round(total_size / (1024 * 1024), 1) - - return { - "success": True, - "result": { - "profile": profile, - "model_name": model_name, - "estimated_size_mb": info["size_mb"], - "actual_size_mb": cache_size_mb, - "description": info["description"], - "use_case": info["use_case"], - "installed": installed, - "recommended": info.get("recommended", True), - "cache_path": str(model_cache_path) if installed else None, - }, - } diff --git a/codex-lens/src/codexlens/cli/output.py b/codex-lens/src/codexlens/cli/output.py deleted file mode 100644 index 1abfb4d2..00000000 --- a/codex-lens/src/codexlens/cli/output.py +++ /dev/null @@ -1,135 +0,0 @@ -"""Rich and JSON output helpers for CodexLens CLI.""" - -from __future__ import annotations - -import json -import sys -from dataclasses import asdict, is_dataclass -from pathlib import Path -from typing import Any, Iterable, Mapping, Sequence - -from rich.console import Console -from rich.table import Table -from rich.text import Text - -from codexlens.entities import SearchResult, Symbol - -# Force UTF-8 encoding for Windows console to properly display Chinese text -# Use force_terminal=True and legacy_windows=False to avoid GBK encoding issues -console = Console(force_terminal=True, legacy_windows=False) - - -def _to_jsonable(value: Any) -> Any: - if value is None: - return None - if hasattr(value, "model_dump"): - return value.model_dump() - if is_dataclass(value): - return asdict(value) - if isinstance(value, Path): - return str(value) - if isinstance(value, Mapping): - return {k: _to_jsonable(v) for k, v in value.items()} - if isinstance(value, (list, tuple, set)): - return [_to_jsonable(v) for v in value] - return value - - -def print_json(*, success: bool, result: Any = None, error: str | None = None, **kwargs: Any) -> None: - """Print JSON output with optional additional fields. - - Args: - success: Whether the operation succeeded - result: Result data (used when success=True) - error: Error message (used when success=False) - **kwargs: Additional fields to include in the payload (e.g., code, details) - """ - payload: dict[str, Any] = {"success": success} - if success: - payload["result"] = _to_jsonable(result) - else: - payload["error"] = error or "Unknown error" - # Include additional error details if provided - for key, value in kwargs.items(): - payload[key] = _to_jsonable(value) - console.print_json(json.dumps(payload, ensure_ascii=False)) - - -def render_search_results( - results: Sequence[SearchResult], *, title: str = "Search Results", verbose: bool = False -) -> None: - """Render search results with optional source tags in verbose mode. - - Args: - results: Search results to display - title: Table title - verbose: If True, show search source tags ([E], [F], [V]) and fusion scores - """ - table = Table(title=title, show_lines=False) - - if verbose: - # Verbose mode: show source tags - table.add_column("Source", style="dim", width=6, justify="center") - - table.add_column("Path", style="cyan", no_wrap=True) - table.add_column("Score", style="magenta", justify="right") - table.add_column("Excerpt", style="white") - - for res in results: - excerpt = res.excerpt or "" - score_str = f"{res.score:.3f}" - - if verbose: - # Extract search source tag if available - source = getattr(res, "search_source", None) - source_tag = "" - if source == "exact": - source_tag = "[E]" - elif source == "fuzzy": - source_tag = "[F]" - elif source == "vector": - source_tag = "[V]" - elif source == "fusion": - source_tag = "[RRF]" - table.add_row(source_tag, res.path, score_str, excerpt) - else: - table.add_row(res.path, score_str, excerpt) - - console.print(table) - - -def render_symbols(symbols: Sequence[Symbol], *, title: str = "Symbols") -> None: - table = Table(title=title) - table.add_column("Name", style="green") - table.add_column("Kind", style="yellow") - table.add_column("Range", style="white", justify="right") - - for sym in symbols: - start, end = sym.range - table.add_row(sym.name, sym.kind, f"{start}-{end}") - - console.print(table) - - -def render_status(stats: Mapping[str, Any]) -> None: - table = Table(title="Index Status") - table.add_column("Metric", style="cyan") - table.add_column("Value", style="white") - - for key, value in stats.items(): - if isinstance(value, Mapping): - value_text = ", ".join(f"{k}:{v}" for k, v in value.items()) - elif isinstance(value, (list, tuple)): - value_text = ", ".join(str(v) for v in value) - else: - value_text = str(value) - table.add_row(str(key), value_text) - - console.print(table) - - -def render_file_inspect(path: str, language: str, symbols: Iterable[Symbol]) -> None: - header = Text.assemble(("File: ", "bold"), (path, "cyan"), (" Language: ", "bold"), (language, "green")) - console.print(header) - render_symbols(list(symbols), title="Discovered Symbols") - diff --git a/codex-lens/src/codexlens/config.py b/codex-lens/src/codexlens/config.py deleted file mode 100644 index 527560f7..00000000 --- a/codex-lens/src/codexlens/config.py +++ /dev/null @@ -1,1164 +0,0 @@ -"""Configuration system for CodexLens.""" - -from __future__ import annotations - -import json -import logging -import os -from dataclasses import dataclass, field -from functools import cached_property -from pathlib import Path -from typing import Any, Dict, List, Optional - -from .errors import ConfigError - - -# Workspace-local directory name -WORKSPACE_DIR_NAME = ".codexlens" - -# Settings file name -SETTINGS_FILE_NAME = "settings.json" - -# Dense vector storage names (centralized storage) -VECTORS_HNSW_NAME = "_vectors.hnsw" -VECTORS_META_DB_NAME = "_vectors_meta.db" -BINARY_VECTORS_MMAP_NAME = "_binary_vectors.mmap" - -log = logging.getLogger(__name__) - - -def _default_global_dir() -> Path: - """Get global CodexLens data directory.""" - env_override = os.getenv("CODEXLENS_DATA_DIR") - if env_override: - return Path(env_override).expanduser().resolve() - return (Path.home() / ".codexlens").resolve() - - -def find_workspace_root(start_path: Path) -> Optional[Path]: - """Find the workspace root by looking for .codexlens directory. - - Searches from start_path upward to find an existing .codexlens directory. - Returns None if not found. - """ - current = start_path.resolve() - - # Search up to filesystem root - while current != current.parent: - workspace_dir = current / WORKSPACE_DIR_NAME - if workspace_dir.is_dir(): - return current - current = current.parent - - # Check root as well - workspace_dir = current / WORKSPACE_DIR_NAME - if workspace_dir.is_dir(): - return current - - return None - - -@dataclass -class Config: - """Runtime configuration for CodexLens. - - - data_dir: Base directory for all persistent CodexLens data. - - venv_path: Optional virtualenv used for language tooling. - - supported_languages: Language IDs and their associated file extensions. - - parsing_rules: Per-language parsing and chunking hints. - """ - - data_dir: Path = field(default_factory=_default_global_dir) - venv_path: Path = field(default_factory=lambda: _default_global_dir() / "venv") - supported_languages: Dict[str, Dict[str, Any]] = field( - default_factory=lambda: { - # Source code languages (category: "code") - "python": {"extensions": [".py"], "tree_sitter_language": "python", "category": "code"}, - "javascript": {"extensions": [".js", ".jsx"], "tree_sitter_language": "javascript", "category": "code"}, - "typescript": {"extensions": [".ts", ".tsx"], "tree_sitter_language": "typescript", "category": "code"}, - "java": {"extensions": [".java"], "tree_sitter_language": "java", "category": "code"}, - "go": {"extensions": [".go"], "tree_sitter_language": "go", "category": "code"}, - "zig": {"extensions": [".zig"], "tree_sitter_language": "zig", "category": "code"}, - "objective-c": {"extensions": [".m", ".mm"], "tree_sitter_language": "objc", "category": "code"}, - "swift": {"extensions": [".swift"], "tree_sitter_language": "swift", "category": "code"}, - "c": {"extensions": [".c", ".h"], "tree_sitter_language": "c", "category": "code"}, - "cpp": {"extensions": [".cc", ".cpp", ".hpp", ".cxx"], "tree_sitter_language": "cpp", "category": "code"}, - "rust": {"extensions": [".rs"], "tree_sitter_language": "rust", "category": "code"}, - } - ) - parsing_rules: Dict[str, Dict[str, Any]] = field( - default_factory=lambda: { - "default": { - "max_chunk_chars": 4000, - "max_chunk_lines": 200, - "overlap_lines": 20, - } - } - ) - - llm_enabled: bool = False - llm_tool: str = "gemini" - llm_timeout_ms: int = 300000 - llm_batch_size: int = 5 - - # Hybrid chunker configuration - hybrid_max_chunk_size: int = 2000 # Max characters per chunk before LLM refinement - hybrid_llm_refinement: bool = False # Enable LLM-based semantic boundary refinement - - # Embedding configuration - embedding_backend: str = "fastembed" # "fastembed" (local) or "litellm" (API) - embedding_model: str = "code" # For fastembed: profile (fast/code/multilingual/balanced) - # For litellm: model name from config (e.g., "qwen3-embedding") - embedding_use_gpu: bool = True # For fastembed: whether to use GPU acceleration - embedding_auto_embed_missing: bool = True # Auto-build embeddings in background when indexed projects are searched without vectors - - # Indexing/search optimizations - global_symbol_index_enabled: bool = True # Enable project-wide symbol index fast path - enable_merkle_detection: bool = True # Enable content-hash based incremental indexing - ignore_patterns: List[str] = field(default_factory=list) # Additional directory ignore patterns for indexing - extension_filters: List[str] = field(default_factory=list) # Reserved for file-level filtering config - - # Graph expansion (search-time, uses precomputed neighbors) - enable_graph_expansion: bool = False - graph_expansion_depth: int = 2 - - # Optional search reranking (disabled by default) - enable_reranking: bool = False - reranking_top_k: int = 50 - symbol_boost_factor: float = 1.5 - test_file_penalty: float = 0.15 # Penalty for test/fixture paths during final ranking - generated_file_penalty: float = 0.35 # Penalty for generated/build artifact paths during final ranking - - # Optional cross-encoder reranking (second stage; requires optional reranker deps) - enable_cross_encoder_rerank: bool = False - reranker_backend: str = "onnx" - reranker_model: str = "cross-encoder/ms-marco-MiniLM-L-6-v2" - reranker_use_gpu: bool = True # Whether reranker backends should use GPU acceleration - reranker_top_k: int = 50 - reranker_max_input_tokens: int = 8192 # Maximum tokens for reranker API batching - reranker_chunk_type_weights: Optional[Dict[str, float]] = None # Weights for chunk types: {"code": 1.0, "docstring": 0.7} - reranker_test_file_penalty: float = 0.0 # Penalty for test files (0.0-1.0, e.g., 0.2 = 20% reduction) - - # Chunk stripping configuration (for semantic embedding) - chunk_strip_comments: bool = True # Strip comments from code chunks - chunk_strip_docstrings: bool = True # Strip docstrings from code chunks - - # Cascade search configuration (two-stage retrieval) - enable_cascade_search: bool = False # Enable cascade search (coarse + fine ranking) - cascade_coarse_k: int = 100 # Number of coarse candidates from first stage - cascade_fine_k: int = 10 # Number of final results after reranking - cascade_strategy: str = "binary" # "binary", "binary_rerank" (alias: "hybrid"), "dense_rerank", or "staged" - - # Staged cascade search configuration (4-stage pipeline) - staged_coarse_k: int = 200 # Number of coarse candidates from Stage 1 binary search - staged_lsp_depth: int = 2 # LSP relationship expansion depth in Stage 2 - staged_stage2_mode: str = "precomputed" # "precomputed" (graph_neighbors) | "realtime" (LSP) | "static_global_graph" (global_relationships) - - # Static graph configuration (write relationships to global index during build) - static_graph_enabled: bool = False - static_graph_relationship_types: List[str] = field(default_factory=lambda: ["imports", "inherits"]) - - staged_realtime_lsp_timeout_s: float = 30.0 # Max time budget for realtime LSP expansion - staged_realtime_lsp_depth: int = 1 # BFS depth for realtime LSP expansion - staged_realtime_lsp_max_nodes: int = 50 # Node cap for realtime graph expansion - staged_realtime_lsp_max_seeds: int = 1 # Seed cap for realtime graph expansion - staged_realtime_lsp_max_concurrent: int = 2 # Max concurrent LSP requests during graph expansion - staged_realtime_lsp_warmup_s: float = 3.0 # Wait for server analysis after opening seed docs - staged_realtime_lsp_resolve_symbols: bool = False # If True, resolves symbol names via documentSymbol (slower) - staged_clustering_strategy: str = "auto" # "auto", "hdbscan", "dbscan", "frequency", "noop", "score", "dir_rr", "path" - staged_clustering_min_size: int = 3 # Minimum cluster size for Stage 3 grouping - enable_staged_rerank: bool = True # Enable optional cross-encoder reranking in Stage 4 - - # RRF fusion configuration - fusion_method: str = "rrf" # "simple" (weighted sum) or "rrf" (reciprocal rank fusion) - rrf_k: int = 60 # RRF constant (default 60) - - # Category-based filtering to separate code/doc results - enable_category_filter: bool = True # Enable code/doc result separation - - # Multi-endpoint configuration for litellm backend - embedding_endpoints: List[Dict[str, Any]] = field(default_factory=list) - # List of endpoint configs: [{"model": "...", "api_key": "...", "api_base": "...", "weight": 1.0}] - embedding_pool_enabled: bool = False # Enable high availability pool for embeddings - embedding_strategy: str = "latency_aware" # round_robin, latency_aware, weighted_random - embedding_cooldown: float = 60.0 # Default cooldown seconds for rate-limited endpoints - - # Reranker multi-endpoint configuration - reranker_pool_enabled: bool = False # Enable high availability pool for reranker - reranker_strategy: str = "latency_aware" # round_robin, latency_aware, weighted_random - reranker_cooldown: float = 60.0 # Default cooldown seconds for rate-limited endpoints - - # API concurrency settings - api_max_workers: int = 4 # Max concurrent API calls for embedding/reranking - api_batch_size: int = 8 # Batch size for API requests - api_batch_size_dynamic: bool = False # Enable dynamic batch size calculation - api_batch_size_utilization_factor: float = 0.8 # Use 80% of model token capacity - api_batch_size_max: int = 2048 # Absolute upper limit for batch size - chars_per_token_estimate: int = 4 # Characters per token estimation ratio - - # Parser configuration - use_astgrep: bool = False # Use ast-grep for relationship extraction (Python/JS/TS); tree-sitter is default - - def __post_init__(self) -> None: - try: - self.data_dir = self.data_dir.expanduser().resolve() - self.venv_path = self.venv_path.expanduser().resolve() - self.data_dir.mkdir(parents=True, exist_ok=True) - except PermissionError as exc: - raise ConfigError( - f"Permission denied initializing paths (data_dir={self.data_dir}, venv_path={self.venv_path}) " - f"[{type(exc).__name__}]: {exc}" - ) from exc - except OSError as exc: - raise ConfigError( - f"Filesystem error initializing paths (data_dir={self.data_dir}, venv_path={self.venv_path}) " - f"[{type(exc).__name__}]: {exc}" - ) from exc - except Exception as exc: - raise ConfigError( - f"Unexpected error initializing paths (data_dir={self.data_dir}, venv_path={self.venv_path}) " - f"[{type(exc).__name__}]: {exc}" - ) from exc - - @cached_property - def cache_dir(self) -> Path: - """Directory for transient caches.""" - return self.data_dir / "cache" - - @cached_property - def index_dir(self) -> Path: - """Directory where index artifacts are stored.""" - return self.data_dir / "index" - - @cached_property - def db_path(self) -> Path: - """Default SQLite index path.""" - return self.index_dir / "codexlens.db" - - def ensure_runtime_dirs(self) -> None: - """Create standard runtime directories if missing.""" - for directory in (self.cache_dir, self.index_dir): - try: - directory.mkdir(parents=True, exist_ok=True) - except PermissionError as exc: - raise ConfigError( - f"Permission denied creating directory {directory} [{type(exc).__name__}]: {exc}" - ) from exc - except OSError as exc: - raise ConfigError( - f"Filesystem error creating directory {directory} [{type(exc).__name__}]: {exc}" - ) from exc - except Exception as exc: - raise ConfigError( - f"Unexpected error creating directory {directory} [{type(exc).__name__}]: {exc}" - ) from exc - - def language_for_path(self, path: str | Path) -> str | None: - """Infer a supported language ID from a file path.""" - extension = Path(path).suffix.lower() - for language_id, spec in self.supported_languages.items(): - extensions: List[str] = spec.get("extensions", []) - if extension in extensions: - return language_id - return None - - def category_for_path(self, path: str | Path) -> str | None: - """Get file category ('code' or 'doc') from a file path.""" - language = self.language_for_path(path) - if language is None: - return None - spec = self.supported_languages.get(language, {}) - return spec.get("category") - - def rules_for_language(self, language_id: str) -> Dict[str, Any]: - """Get parsing rules for a specific language, falling back to defaults.""" - return {**self.parsing_rules.get("default", {}), **self.parsing_rules.get(language_id, {})} - - @cached_property - def settings_path(self) -> Path: - """Path to the settings file.""" - return self.data_dir / SETTINGS_FILE_NAME - - def save_settings(self) -> None: - """Save embedding and other settings to file.""" - embedding_config = { - "backend": self.embedding_backend, - "model": self.embedding_model, - "use_gpu": self.embedding_use_gpu, - "auto_embed_missing": self.embedding_auto_embed_missing, - "pool_enabled": self.embedding_pool_enabled, - "strategy": self.embedding_strategy, - "cooldown": self.embedding_cooldown, - } - # Include multi-endpoint config if present - if self.embedding_endpoints: - embedding_config["endpoints"] = self.embedding_endpoints - - settings = { - "embedding": embedding_config, - "llm": { - "enabled": self.llm_enabled, - "tool": self.llm_tool, - "timeout_ms": self.llm_timeout_ms, - "batch_size": self.llm_batch_size, - }, - "parsing": { - # Prefer ast-grep processors when available (experimental). - "use_astgrep": self.use_astgrep, - }, - "indexing": { - # Persist global relationship edges during index build for static graph expansion. - "static_graph_enabled": self.static_graph_enabled, - "static_graph_relationship_types": self.static_graph_relationship_types, - }, - "reranker": { - "enabled": self.enable_cross_encoder_rerank, - "backend": self.reranker_backend, - "model": self.reranker_model, - "use_gpu": self.reranker_use_gpu, - "top_k": self.reranker_top_k, - "max_input_tokens": self.reranker_max_input_tokens, - "pool_enabled": self.reranker_pool_enabled, - "strategy": self.reranker_strategy, - "cooldown": self.reranker_cooldown, - }, - "cascade": { - "strategy": self.cascade_strategy, - "coarse_k": self.cascade_coarse_k, - "fine_k": self.cascade_fine_k, - }, - "staged": { - "coarse_k": self.staged_coarse_k, - "lsp_depth": self.staged_lsp_depth, - "stage2_mode": self.staged_stage2_mode, - "realtime_lsp_timeout_s": self.staged_realtime_lsp_timeout_s, - "realtime_lsp_depth": self.staged_realtime_lsp_depth, - "realtime_lsp_max_nodes": self.staged_realtime_lsp_max_nodes, - "realtime_lsp_max_seeds": self.staged_realtime_lsp_max_seeds, - "realtime_lsp_max_concurrent": self.staged_realtime_lsp_max_concurrent, - "realtime_lsp_warmup_s": self.staged_realtime_lsp_warmup_s, - "realtime_lsp_resolve_symbols": self.staged_realtime_lsp_resolve_symbols, - "clustering_strategy": self.staged_clustering_strategy, - "clustering_min_size": self.staged_clustering_min_size, - "enable_rerank": self.enable_staged_rerank, - }, - "api": { - "max_workers": self.api_max_workers, - "batch_size": self.api_batch_size, - "batch_size_dynamic": self.api_batch_size_dynamic, - "batch_size_utilization_factor": self.api_batch_size_utilization_factor, - "batch_size_max": self.api_batch_size_max, - "chars_per_token_estimate": self.chars_per_token_estimate, - }, - "ignore_patterns": self.ignore_patterns, - "extension_filters": self.extension_filters, - } - with open(self.settings_path, "w", encoding="utf-8") as f: - json.dump(settings, f, indent=2) - - def load_settings(self) -> None: - """Load settings from file if exists.""" - if self.settings_path.exists(): - try: - with open(self.settings_path, "r", encoding="utf-8") as f: - settings = json.load(f) - - # Load embedding settings - embedding = settings.get("embedding", {}) - if "backend" in embedding: - backend = embedding["backend"] - # Support 'api' as alias for 'litellm' - if backend == "api": - backend = "litellm" - if backend in {"fastembed", "litellm"}: - self.embedding_backend = backend - else: - log.warning( - "Invalid embedding backend in %s: %r (expected 'fastembed' or 'litellm')", - self.settings_path, - embedding["backend"], - ) - if "model" in embedding: - self.embedding_model = embedding["model"] - if "use_gpu" in embedding: - self.embedding_use_gpu = embedding["use_gpu"] - if "auto_embed_missing" in embedding: - self.embedding_auto_embed_missing = embedding["auto_embed_missing"] - - # Load multi-endpoint configuration - if "endpoints" in embedding: - self.embedding_endpoints = embedding["endpoints"] - if "pool_enabled" in embedding: - self.embedding_pool_enabled = embedding["pool_enabled"] - if "strategy" in embedding: - self.embedding_strategy = embedding["strategy"] - if "cooldown" in embedding: - self.embedding_cooldown = embedding["cooldown"] - - # Load LLM settings - llm = settings.get("llm", {}) - if "enabled" in llm: - self.llm_enabled = llm["enabled"] - if "tool" in llm: - self.llm_tool = llm["tool"] - if "timeout_ms" in llm: - self.llm_timeout_ms = llm["timeout_ms"] - if "batch_size" in llm: - self.llm_batch_size = llm["batch_size"] - - # Load reranker settings - reranker = settings.get("reranker", {}) - if "enabled" in reranker: - self.enable_cross_encoder_rerank = reranker["enabled"] - if "backend" in reranker: - backend = reranker["backend"] - if backend in {"fastembed", "onnx", "api", "litellm", "legacy"}: - self.reranker_backend = backend - else: - log.warning( - "Invalid reranker backend in %s: %r (expected 'fastembed', 'onnx', 'api', 'litellm', or 'legacy')", - self.settings_path, - backend, - ) - if "model" in reranker: - self.reranker_model = reranker["model"] - if "use_gpu" in reranker: - self.reranker_use_gpu = reranker["use_gpu"] - if "top_k" in reranker: - self.reranker_top_k = reranker["top_k"] - if "max_input_tokens" in reranker: - self.reranker_max_input_tokens = reranker["max_input_tokens"] - if "pool_enabled" in reranker: - self.reranker_pool_enabled = reranker["pool_enabled"] - if "strategy" in reranker: - self.reranker_strategy = reranker["strategy"] - if "cooldown" in reranker: - self.reranker_cooldown = reranker["cooldown"] - - # Load cascade settings - cascade = settings.get("cascade", {}) - if "strategy" in cascade: - raw_strategy = cascade["strategy"] - strategy = str(raw_strategy).strip().lower() - if strategy in {"binary", "binary_rerank", "dense_rerank", "staged"}: - self.cascade_strategy = strategy - elif strategy == "hybrid": - self.cascade_strategy = "binary_rerank" - log.debug("Mapping cascade strategy 'hybrid' -> 'binary_rerank'") - else: - log.warning( - "Invalid cascade strategy in %s: %r (expected 'binary', 'binary_rerank', 'dense_rerank', or 'staged')", - self.settings_path, - raw_strategy, - ) - if "coarse_k" in cascade: - self.cascade_coarse_k = cascade["coarse_k"] - if "fine_k" in cascade: - self.cascade_fine_k = cascade["fine_k"] - - # Load staged cascade settings - staged = settings.get("staged", {}) - if isinstance(staged, dict): - if "coarse_k" in staged: - try: - self.staged_coarse_k = int(staged["coarse_k"]) - except (TypeError, ValueError): - log.warning( - "Invalid staged.coarse_k in %s: %r (expected int)", - self.settings_path, - staged["coarse_k"], - ) - if "lsp_depth" in staged: - try: - self.staged_lsp_depth = int(staged["lsp_depth"]) - except (TypeError, ValueError): - log.warning( - "Invalid staged.lsp_depth in %s: %r (expected int)", - self.settings_path, - staged["lsp_depth"], - ) - if "stage2_mode" in staged: - raw_mode = str(staged["stage2_mode"]).strip().lower() - if raw_mode in {"precomputed", "realtime", "static_global_graph"}: - self.staged_stage2_mode = raw_mode - elif raw_mode in {"live"}: - self.staged_stage2_mode = "realtime" - else: - log.warning( - "Invalid staged.stage2_mode in %s: %r " - "(expected 'precomputed', 'realtime', or 'static_global_graph')", - self.settings_path, - staged["stage2_mode"], - ) - - if "realtime_lsp_timeout_s" in staged: - try: - self.staged_realtime_lsp_timeout_s = float( - staged["realtime_lsp_timeout_s"] - ) - except (TypeError, ValueError): - log.warning( - "Invalid staged.realtime_lsp_timeout_s in %s: %r (expected float)", - self.settings_path, - staged["realtime_lsp_timeout_s"], - ) - if "realtime_lsp_depth" in staged: - try: - self.staged_realtime_lsp_depth = int( - staged["realtime_lsp_depth"] - ) - except (TypeError, ValueError): - log.warning( - "Invalid staged.realtime_lsp_depth in %s: %r (expected int)", - self.settings_path, - staged["realtime_lsp_depth"], - ) - if "realtime_lsp_max_nodes" in staged: - try: - self.staged_realtime_lsp_max_nodes = int( - staged["realtime_lsp_max_nodes"] - ) - except (TypeError, ValueError): - log.warning( - "Invalid staged.realtime_lsp_max_nodes in %s: %r (expected int)", - self.settings_path, - staged["realtime_lsp_max_nodes"], - ) - if "realtime_lsp_max_seeds" in staged: - try: - self.staged_realtime_lsp_max_seeds = int( - staged["realtime_lsp_max_seeds"] - ) - except (TypeError, ValueError): - log.warning( - "Invalid staged.realtime_lsp_max_seeds in %s: %r (expected int)", - self.settings_path, - staged["realtime_lsp_max_seeds"], - ) - if "realtime_lsp_max_concurrent" in staged: - try: - self.staged_realtime_lsp_max_concurrent = int( - staged["realtime_lsp_max_concurrent"] - ) - except (TypeError, ValueError): - log.warning( - "Invalid staged.realtime_lsp_max_concurrent in %s: %r (expected int)", - self.settings_path, - staged["realtime_lsp_max_concurrent"], - ) - if "realtime_lsp_warmup_s" in staged: - try: - self.staged_realtime_lsp_warmup_s = float( - staged["realtime_lsp_warmup_s"] - ) - except (TypeError, ValueError): - log.warning( - "Invalid staged.realtime_lsp_warmup_s in %s: %r (expected float)", - self.settings_path, - staged["realtime_lsp_warmup_s"], - ) - if "realtime_lsp_resolve_symbols" in staged: - raw = staged["realtime_lsp_resolve_symbols"] - if isinstance(raw, bool): - self.staged_realtime_lsp_resolve_symbols = raw - elif isinstance(raw, (int, float)): - self.staged_realtime_lsp_resolve_symbols = bool(raw) - elif isinstance(raw, str): - self.staged_realtime_lsp_resolve_symbols = ( - raw.strip().lower() in {"true", "1", "yes", "on"} - ) - else: - log.warning( - "Invalid staged.realtime_lsp_resolve_symbols in %s: %r (expected bool)", - self.settings_path, - raw, - ) - - if "clustering_strategy" in staged: - raw_strategy = str(staged["clustering_strategy"]).strip().lower() - allowed = { - "auto", - "hdbscan", - "dbscan", - "frequency", - "noop", - "score", - "dir_rr", - "path", - } - if raw_strategy in allowed: - self.staged_clustering_strategy = raw_strategy - elif raw_strategy in {"none", "off"}: - self.staged_clustering_strategy = "noop" - else: - log.warning( - "Invalid staged.clustering_strategy in %s: %r", - self.settings_path, - staged["clustering_strategy"], - ) - if "clustering_min_size" in staged: - try: - self.staged_clustering_min_size = int( - staged["clustering_min_size"] - ) - except (TypeError, ValueError): - log.warning( - "Invalid staged.clustering_min_size in %s: %r (expected int)", - self.settings_path, - staged["clustering_min_size"], - ) - if "enable_rerank" in staged: - raw = staged["enable_rerank"] - if isinstance(raw, bool): - self.enable_staged_rerank = raw - elif isinstance(raw, (int, float)): - self.enable_staged_rerank = bool(raw) - elif isinstance(raw, str): - self.enable_staged_rerank = ( - raw.strip().lower() in {"true", "1", "yes", "on"} - ) - else: - log.warning( - "Invalid staged.enable_rerank in %s: %r (expected bool)", - self.settings_path, - raw, - ) - - # Load parsing settings - parsing = settings.get("parsing", {}) - if isinstance(parsing, dict) and "use_astgrep" in parsing: - self.use_astgrep = bool(parsing["use_astgrep"]) - - # Load indexing settings - indexing = settings.get("indexing", {}) - if isinstance(indexing, dict): - if "static_graph_enabled" in indexing: - self.static_graph_enabled = bool(indexing["static_graph_enabled"]) - if "static_graph_relationship_types" in indexing: - raw_types = indexing["static_graph_relationship_types"] - if isinstance(raw_types, list): - allowed = {"imports", "inherits", "calls"} - cleaned = [] - for item in raw_types: - val = str(item).strip().lower() - if val and val in allowed: - cleaned.append(val) - if cleaned: - self.static_graph_relationship_types = cleaned - else: - log.warning( - "Invalid indexing.static_graph_relationship_types in %s: %r (expected list)", - self.settings_path, - raw_types, - ) - - raw_ignore_patterns = settings.get("ignore_patterns") - if raw_ignore_patterns is not None: - if isinstance(raw_ignore_patterns, list): - self.ignore_patterns = [ - str(item).strip() for item in raw_ignore_patterns - if str(item).strip() - ] - else: - log.warning( - "Invalid ignore_patterns in %s: %r (expected list)", - self.settings_path, - raw_ignore_patterns, - ) - - raw_extension_filters = settings.get("extension_filters") - if raw_extension_filters is not None: - if isinstance(raw_extension_filters, list): - self.extension_filters = [ - str(item).strip() for item in raw_extension_filters - if str(item).strip() - ] - else: - log.warning( - "Invalid extension_filters in %s: %r (expected list)", - self.settings_path, - raw_extension_filters, - ) - - # Load API settings - api = settings.get("api", {}) - if "max_workers" in api: - self.api_max_workers = api["max_workers"] - if "batch_size" in api: - self.api_batch_size = api["batch_size"] - if "batch_size_dynamic" in api: - self.api_batch_size_dynamic = api["batch_size_dynamic"] - if "batch_size_utilization_factor" in api: - self.api_batch_size_utilization_factor = api["batch_size_utilization_factor"] - if "batch_size_max" in api: - self.api_batch_size_max = api["batch_size_max"] - if "chars_per_token_estimate" in api: - self.chars_per_token_estimate = api["chars_per_token_estimate"] - except Exception as exc: - log.warning( - "Failed to load settings from %s (%s): %s", - self.settings_path, - type(exc).__name__, - exc, - ) - - # Apply .env overrides (highest priority) - self._apply_env_overrides() - - def _apply_env_overrides(self) -> None: - """Apply environment variable overrides from .env file. - - Priority: default → settings.json → .env (highest) - - Supported variables (with or without CODEXLENS_ prefix): - EMBEDDING_MODEL: Override embedding model/profile - EMBEDDING_BACKEND: Override embedding backend (fastembed/litellm) - EMBEDDING_POOL_ENABLED: Enable embedding high availability pool - EMBEDDING_STRATEGY: Load balance strategy for embedding - EMBEDDING_COOLDOWN: Rate limit cooldown for embedding - RERANKER_MODEL: Override reranker model - RERANKER_BACKEND: Override reranker backend - RERANKER_USE_GPU: Override reranker GPU usage (true/false) - RERANKER_ENABLED: Override reranker enabled state (true/false) - RERANKER_POOL_ENABLED: Enable reranker high availability pool - RERANKER_STRATEGY: Load balance strategy for reranker - RERANKER_COOLDOWN: Rate limit cooldown for reranker - """ - from .env_config import load_env_file - - env_vars = load_env_file(self.data_dir / ".env") - if not env_vars: - return - - def get_env(key: str) -> str | None: - """Get env var with or without CODEXLENS_ prefix.""" - # Check prefixed version first (Dashboard format), then unprefixed - return env_vars.get(f"CODEXLENS_{key}") or env_vars.get(key) - - def _parse_bool(value: str) -> bool: - return value.strip().lower() in {"true", "1", "yes", "on"} - - # Cascade overrides - cascade_enabled = get_env("ENABLE_CASCADE_SEARCH") - if cascade_enabled: - self.enable_cascade_search = _parse_bool(cascade_enabled) - log.debug( - "Overriding enable_cascade_search from .env: %s", - self.enable_cascade_search, - ) - - cascade_strategy = get_env("CASCADE_STRATEGY") - if cascade_strategy: - strategy = cascade_strategy.strip().lower() - if strategy in {"binary", "binary_rerank", "dense_rerank", "staged"}: - self.cascade_strategy = strategy - log.debug("Overriding cascade_strategy from .env: %s", self.cascade_strategy) - elif strategy == "hybrid": - self.cascade_strategy = "binary_rerank" - log.debug("Overriding cascade_strategy from .env: %s", self.cascade_strategy) - else: - log.warning("Invalid CASCADE_STRATEGY in .env: %r", cascade_strategy) - - cascade_coarse_k = get_env("CASCADE_COARSE_K") - if cascade_coarse_k: - try: - self.cascade_coarse_k = int(cascade_coarse_k) - log.debug("Overriding cascade_coarse_k from .env: %s", self.cascade_coarse_k) - except ValueError: - log.warning("Invalid CASCADE_COARSE_K in .env: %r", cascade_coarse_k) - - cascade_fine_k = get_env("CASCADE_FINE_K") - if cascade_fine_k: - try: - self.cascade_fine_k = int(cascade_fine_k) - log.debug("Overriding cascade_fine_k from .env: %s", self.cascade_fine_k) - except ValueError: - log.warning("Invalid CASCADE_FINE_K in .env: %r", cascade_fine_k) - - # Embedding overrides - embedding_model = get_env("EMBEDDING_MODEL") - if embedding_model: - self.embedding_model = embedding_model - log.debug("Overriding embedding_model from .env: %s", self.embedding_model) - - embedding_backend = get_env("EMBEDDING_BACKEND") - if embedding_backend: - backend = embedding_backend.lower() - # Support 'api' as alias for 'litellm' - if backend == "api": - backend = "litellm" - if backend in {"fastembed", "litellm"}: - self.embedding_backend = backend - log.debug("Overriding embedding_backend from .env: %s", backend) - else: - log.warning("Invalid EMBEDDING_BACKEND in .env: %r", embedding_backend) - - auto_embed_missing = get_env("AUTO_EMBED_MISSING") - if auto_embed_missing: - self.embedding_auto_embed_missing = _parse_bool(auto_embed_missing) - log.debug( - "Overriding embedding_auto_embed_missing from .env: %s", - self.embedding_auto_embed_missing, - ) - - embedding_pool = get_env("EMBEDDING_POOL_ENABLED") - if embedding_pool: - value = embedding_pool.lower() - self.embedding_pool_enabled = value in {"true", "1", "yes", "on"} - log.debug("Overriding embedding_pool_enabled from .env: %s", self.embedding_pool_enabled) - - embedding_strategy = get_env("EMBEDDING_STRATEGY") - if embedding_strategy: - strategy = embedding_strategy.lower() - if strategy in {"round_robin", "latency_aware", "weighted_random"}: - self.embedding_strategy = strategy - log.debug("Overriding embedding_strategy from .env: %s", strategy) - else: - log.warning("Invalid EMBEDDING_STRATEGY in .env: %r", embedding_strategy) - - embedding_cooldown = get_env("EMBEDDING_COOLDOWN") - if embedding_cooldown: - try: - self.embedding_cooldown = float(embedding_cooldown) - log.debug("Overriding embedding_cooldown from .env: %s", self.embedding_cooldown) - except ValueError: - log.warning("Invalid EMBEDDING_COOLDOWN in .env: %r", embedding_cooldown) - - # Reranker overrides - reranker_model = get_env("RERANKER_MODEL") - if reranker_model: - self.reranker_model = reranker_model - log.debug("Overriding reranker_model from .env: %s", self.reranker_model) - - reranker_backend = get_env("RERANKER_BACKEND") - if reranker_backend: - backend = reranker_backend.lower() - if backend in {"fastembed", "onnx", "api", "litellm", "legacy"}: - self.reranker_backend = backend - log.debug("Overriding reranker_backend from .env: %s", backend) - else: - log.warning("Invalid RERANKER_BACKEND in .env: %r", reranker_backend) - - reranker_use_gpu = get_env("RERANKER_USE_GPU") - if reranker_use_gpu: - self.reranker_use_gpu = _parse_bool(reranker_use_gpu) - log.debug("Overriding reranker_use_gpu from .env: %s", self.reranker_use_gpu) - - reranker_enabled = get_env("RERANKER_ENABLED") - if reranker_enabled: - value = reranker_enabled.lower() - self.enable_cross_encoder_rerank = value in {"true", "1", "yes", "on"} - log.debug("Overriding reranker_enabled from .env: %s", self.enable_cross_encoder_rerank) - - reranker_pool = get_env("RERANKER_POOL_ENABLED") - if reranker_pool: - value = reranker_pool.lower() - self.reranker_pool_enabled = value in {"true", "1", "yes", "on"} - log.debug("Overriding reranker_pool_enabled from .env: %s", self.reranker_pool_enabled) - - reranker_strategy = get_env("RERANKER_STRATEGY") - if reranker_strategy: - strategy = reranker_strategy.lower() - if strategy in {"round_robin", "latency_aware", "weighted_random"}: - self.reranker_strategy = strategy - log.debug("Overriding reranker_strategy from .env: %s", strategy) - else: - log.warning("Invalid RERANKER_STRATEGY in .env: %r", reranker_strategy) - - reranker_cooldown = get_env("RERANKER_COOLDOWN") - if reranker_cooldown: - try: - self.reranker_cooldown = float(reranker_cooldown) - log.debug("Overriding reranker_cooldown from .env: %s", self.reranker_cooldown) - except ValueError: - log.warning("Invalid RERANKER_COOLDOWN in .env: %r", reranker_cooldown) - - reranker_max_tokens = get_env("RERANKER_MAX_INPUT_TOKENS") - if reranker_max_tokens: - try: - self.reranker_max_input_tokens = int(reranker_max_tokens) - log.debug("Overriding reranker_max_input_tokens from .env: %s", self.reranker_max_input_tokens) - except ValueError: - log.warning("Invalid RERANKER_MAX_INPUT_TOKENS in .env: %r", reranker_max_tokens) - - # Reranker tuning from environment - test_penalty = get_env("RERANKER_TEST_FILE_PENALTY") - if test_penalty: - try: - self.reranker_test_file_penalty = float(test_penalty) - log.debug("Overriding reranker_test_file_penalty from .env: %s", self.reranker_test_file_penalty) - except ValueError: - log.warning("Invalid RERANKER_TEST_FILE_PENALTY in .env: %r", test_penalty) - - ranking_test_penalty = get_env("TEST_FILE_PENALTY") - if ranking_test_penalty: - try: - self.test_file_penalty = float(ranking_test_penalty) - log.debug("Overriding test_file_penalty from .env: %s", self.test_file_penalty) - except ValueError: - log.warning("Invalid TEST_FILE_PENALTY in .env: %r", ranking_test_penalty) - - generated_penalty = get_env("GENERATED_FILE_PENALTY") - if generated_penalty: - try: - self.generated_file_penalty = float(generated_penalty) - log.debug( - "Overriding generated_file_penalty from .env: %s", - self.generated_file_penalty, - ) - except ValueError: - log.warning("Invalid GENERATED_FILE_PENALTY in .env: %r", generated_penalty) - - docstring_weight = get_env("RERANKER_DOCSTRING_WEIGHT") - if docstring_weight: - try: - weight = float(docstring_weight) - self.reranker_chunk_type_weights = {"code": 1.0, "docstring": weight} - log.debug("Overriding reranker docstring weight from .env: %s", weight) - except ValueError: - log.warning("Invalid RERANKER_DOCSTRING_WEIGHT in .env: %r", docstring_weight) - - # Chunk stripping from environment - strip_comments = get_env("CHUNK_STRIP_COMMENTS") - if strip_comments: - self.chunk_strip_comments = strip_comments.lower() in ("true", "1", "yes") - log.debug("Overriding chunk_strip_comments from .env: %s", self.chunk_strip_comments) - - strip_docstrings = get_env("CHUNK_STRIP_DOCSTRINGS") - if strip_docstrings: - self.chunk_strip_docstrings = strip_docstrings.lower() in ("true", "1", "yes") - log.debug("Overriding chunk_strip_docstrings from .env: %s", self.chunk_strip_docstrings) - - # Staged cascade overrides - staged_stage2_mode = get_env("STAGED_STAGE2_MODE") - if staged_stage2_mode: - mode = staged_stage2_mode.strip().lower() - if mode in {"precomputed", "realtime", "static_global_graph"}: - self.staged_stage2_mode = mode - log.debug("Overriding staged_stage2_mode from .env: %s", self.staged_stage2_mode) - elif mode in {"live"}: - self.staged_stage2_mode = "realtime" - log.debug("Overriding staged_stage2_mode from .env: %s", self.staged_stage2_mode) - else: - log.warning("Invalid STAGED_STAGE2_MODE in .env: %r", staged_stage2_mode) - - staged_clustering_strategy = get_env("STAGED_CLUSTERING_STRATEGY") - if staged_clustering_strategy: - strategy = staged_clustering_strategy.strip().lower() - if strategy in {"auto", "hdbscan", "dbscan", "frequency", "noop", "score", "dir_rr", "path"}: - self.staged_clustering_strategy = strategy - log.debug( - "Overriding staged_clustering_strategy from .env: %s", - self.staged_clustering_strategy, - ) - elif strategy in {"none", "off"}: - self.staged_clustering_strategy = "noop" - log.debug( - "Overriding staged_clustering_strategy from .env: %s", - self.staged_clustering_strategy, - ) - else: - log.warning( - "Invalid STAGED_CLUSTERING_STRATEGY in .env: %r", - staged_clustering_strategy, - ) - - staged_clustering_min_size = get_env("STAGED_CLUSTERING_MIN_SIZE") - if staged_clustering_min_size: - try: - self.staged_clustering_min_size = int(staged_clustering_min_size) - log.debug( - "Overriding staged_clustering_min_size from .env: %s", - self.staged_clustering_min_size, - ) - except ValueError: - log.warning( - "Invalid STAGED_CLUSTERING_MIN_SIZE in .env: %r", - staged_clustering_min_size, - ) - - enable_staged_rerank = get_env("ENABLE_STAGED_RERANK") - if enable_staged_rerank: - self.enable_staged_rerank = _parse_bool(enable_staged_rerank) - log.debug("Overriding enable_staged_rerank from .env: %s", self.enable_staged_rerank) - - rt_timeout = get_env("STAGED_REALTIME_LSP_TIMEOUT_S") - if rt_timeout: - try: - self.staged_realtime_lsp_timeout_s = float(rt_timeout) - log.debug( - "Overriding staged_realtime_lsp_timeout_s from .env: %s", - self.staged_realtime_lsp_timeout_s, - ) - except ValueError: - log.warning("Invalid STAGED_REALTIME_LSP_TIMEOUT_S in .env: %r", rt_timeout) - - rt_depth = get_env("STAGED_REALTIME_LSP_DEPTH") - if rt_depth: - try: - self.staged_realtime_lsp_depth = int(rt_depth) - log.debug( - "Overriding staged_realtime_lsp_depth from .env: %s", - self.staged_realtime_lsp_depth, - ) - except ValueError: - log.warning("Invalid STAGED_REALTIME_LSP_DEPTH in .env: %r", rt_depth) - - rt_max_nodes = get_env("STAGED_REALTIME_LSP_MAX_NODES") - if rt_max_nodes: - try: - self.staged_realtime_lsp_max_nodes = int(rt_max_nodes) - log.debug( - "Overriding staged_realtime_lsp_max_nodes from .env: %s", - self.staged_realtime_lsp_max_nodes, - ) - except ValueError: - log.warning("Invalid STAGED_REALTIME_LSP_MAX_NODES in .env: %r", rt_max_nodes) - - rt_max_seeds = get_env("STAGED_REALTIME_LSP_MAX_SEEDS") - if rt_max_seeds: - try: - self.staged_realtime_lsp_max_seeds = int(rt_max_seeds) - log.debug( - "Overriding staged_realtime_lsp_max_seeds from .env: %s", - self.staged_realtime_lsp_max_seeds, - ) - except ValueError: - log.warning("Invalid STAGED_REALTIME_LSP_MAX_SEEDS in .env: %r", rt_max_seeds) - - rt_max_concurrent = get_env("STAGED_REALTIME_LSP_MAX_CONCURRENT") - if rt_max_concurrent: - try: - self.staged_realtime_lsp_max_concurrent = int(rt_max_concurrent) - log.debug( - "Overriding staged_realtime_lsp_max_concurrent from .env: %s", - self.staged_realtime_lsp_max_concurrent, - ) - except ValueError: - log.warning( - "Invalid STAGED_REALTIME_LSP_MAX_CONCURRENT in .env: %r", - rt_max_concurrent, - ) - - rt_warmup = get_env("STAGED_REALTIME_LSP_WARMUP_S") - if rt_warmup: - try: - self.staged_realtime_lsp_warmup_s = float(rt_warmup) - log.debug( - "Overriding staged_realtime_lsp_warmup_s from .env: %s", - self.staged_realtime_lsp_warmup_s, - ) - except ValueError: - log.warning("Invalid STAGED_REALTIME_LSP_WARMUP_S in .env: %r", rt_warmup) - - rt_resolve = get_env("STAGED_REALTIME_LSP_RESOLVE_SYMBOLS") - if rt_resolve: - self.staged_realtime_lsp_resolve_symbols = _parse_bool(rt_resolve) - log.debug( - "Overriding staged_realtime_lsp_resolve_symbols from .env: %s", - self.staged_realtime_lsp_resolve_symbols, - ) - - @classmethod - def load(cls) -> "Config": - """Load config with settings from file.""" - config = cls() - config.load_settings() - return config - - -@dataclass -class WorkspaceConfig: - """Workspace-local configuration for CodexLens. - - Stores index data in project/.codexlens/ directory. - """ - - workspace_root: Path - - def __post_init__(self) -> None: - self.workspace_root = Path(self.workspace_root).resolve() - - @property - def codexlens_dir(self) -> Path: - """The .codexlens directory in workspace root.""" - return self.workspace_root / WORKSPACE_DIR_NAME - - @property - def db_path(self) -> Path: - """SQLite index path for this workspace.""" - return self.codexlens_dir / "index.db" - - @property - def cache_dir(self) -> Path: - """Cache directory for this workspace.""" - return self.codexlens_dir / "cache" - - @property - def env_path(self) -> Path: - """Path to workspace .env file.""" - return self.codexlens_dir / ".env" - - def load_env(self, *, override: bool = False) -> int: - """Load .env file and apply to os.environ. - - Args: - override: If True, override existing environment variables - - Returns: - Number of variables applied - """ - from .env_config import apply_workspace_env - return apply_workspace_env(self.workspace_root, override=override) - - def get_api_config(self, prefix: str) -> dict: - """Get API configuration from environment. - - Args: - prefix: Environment variable prefix (e.g., "RERANKER", "EMBEDDING") - - Returns: - Dictionary with api_key, api_base, model, etc. - """ - from .env_config import get_api_config - return get_api_config(prefix, workspace_root=self.workspace_root) - - def initialize(self) -> None: - """Create the .codexlens directory structure.""" - try: - self.codexlens_dir.mkdir(parents=True, exist_ok=True) - self.cache_dir.mkdir(parents=True, exist_ok=True) - - # Create .gitignore to exclude cache but keep index - gitignore_path = self.codexlens_dir / ".gitignore" - if not gitignore_path.exists(): - gitignore_path.write_text( - "# CodexLens workspace data\n" - "cache/\n" - "*.log\n" - ".env\n" # Exclude .env from git - ) - except Exception as exc: - raise ConfigError(f"Failed to initialize workspace at {self.codexlens_dir}: {exc}") from exc - - def exists(self) -> bool: - """Check if workspace is already initialized.""" - return self.codexlens_dir.is_dir() and self.db_path.exists() - - @classmethod - def from_path(cls, path: Path) -> Optional["WorkspaceConfig"]: - """Create WorkspaceConfig from a path by finding workspace root. - - Returns None if no workspace found. - """ - root = find_workspace_root(path) - if root is None: - return None - return cls(workspace_root=root) - - @classmethod - def create_at(cls, path: Path) -> "WorkspaceConfig": - """Create a new workspace at the given path.""" - config = cls(workspace_root=path) - config.initialize() - return config diff --git a/codex-lens/src/codexlens/entities.py b/codex-lens/src/codexlens/entities.py deleted file mode 100644 index d569cc3e..00000000 --- a/codex-lens/src/codexlens/entities.py +++ /dev/null @@ -1,128 +0,0 @@ -"""Pydantic entity models for CodexLens.""" - -from __future__ import annotations - -import math -from enum import Enum -from typing import Any, Dict, List, Optional, Tuple - -from pydantic import BaseModel, Field, field_validator - - -class Symbol(BaseModel): - """A code symbol discovered in a file.""" - - name: str = Field(..., min_length=1) - kind: str = Field(..., min_length=1) - range: Tuple[int, int] = Field(..., description="(start_line, end_line), 1-based inclusive") - file: Optional[str] = Field(default=None, description="Full path to the file containing this symbol") - - @field_validator("range") - @classmethod - def validate_range(cls, value: Tuple[int, int]) -> Tuple[int, int]: - if len(value) != 2: - raise ValueError("range must be a (start_line, end_line) tuple") - start_line, end_line = value - if start_line < 1 or end_line < 1: - raise ValueError("range lines must be >= 1") - if end_line < start_line: - raise ValueError("end_line must be >= start_line") - return value - - -class SemanticChunk(BaseModel): - """A semantically meaningful chunk of content, optionally embedded.""" - - content: str = Field(..., min_length=1) - embedding: Optional[List[float]] = Field(default=None, description="Vector embedding for semantic search") - metadata: Dict[str, Any] = Field(default_factory=dict) - id: Optional[int] = Field(default=None, description="Database row ID") - file_path: Optional[str] = Field(default=None, description="Source file path") - - @field_validator("embedding") - @classmethod - def validate_embedding(cls, value: Optional[List[float]]) -> Optional[List[float]]: - if value is None: - return value - if not value: - raise ValueError("embedding cannot be empty when provided") - norm = math.sqrt(sum(x * x for x in value)) - epsilon = 1e-10 - if norm < epsilon: - raise ValueError("embedding cannot be a zero vector") - return value - - -class IndexedFile(BaseModel): - """An indexed source file with symbols and optional semantic chunks.""" - - path: str = Field(..., min_length=1) - language: str = Field(..., min_length=1) - symbols: List[Symbol] = Field(default_factory=list) - chunks: List[SemanticChunk] = Field(default_factory=list) - relationships: List["CodeRelationship"] = Field(default_factory=list) - - @field_validator("path", "language") - @classmethod - def strip_and_validate_nonempty(cls, value: str) -> str: - cleaned = value.strip() - if not cleaned: - raise ValueError("value cannot be blank") - return cleaned - - -class RelationshipType(str, Enum): - """Types of code relationships.""" - CALL = "calls" - INHERITS = "inherits" - IMPORTS = "imports" - - -class CodeRelationship(BaseModel): - """A relationship between code symbols (e.g., function calls, inheritance).""" - - source_symbol: str = Field(..., min_length=1, description="Name of source symbol") - target_symbol: str = Field(..., min_length=1, description="Name of target symbol") - relationship_type: RelationshipType = Field(..., description="Type of relationship (call, inherits, etc.)") - source_file: str = Field(..., min_length=1, description="File path containing source symbol") - target_file: Optional[str] = Field(default=None, description="File path containing target (None if same file)") - source_line: int = Field(..., ge=1, description="Line number where relationship occurs (1-based)") - - -class AdditionalLocation(BaseModel): - """A pointer to another location where a similar result was found. - - Used for grouping search results with similar scores and content, - where the primary result is stored in SearchResult and secondary - locations are stored in this model. - """ - - path: str = Field(..., min_length=1) - score: float = Field(..., ge=0.0) - start_line: Optional[int] = Field(default=None, description="Start line of the result (1-based)") - end_line: Optional[int] = Field(default=None, description="End line of the result (1-based)") - symbol_name: Optional[str] = Field(default=None, description="Name of matched symbol") - - -class SearchResult(BaseModel): - """A unified search result for lexical or semantic search.""" - - path: str = Field(..., min_length=1) - score: float = Field(..., ge=0.0) - excerpt: Optional[str] = None - content: Optional[str] = Field(default=None, description="Full content of matched code block") - symbol: Optional[Symbol] = None - chunk: Optional[SemanticChunk] = None - metadata: Dict[str, Any] = Field(default_factory=dict) - - # Additional context for complete code blocks - start_line: Optional[int] = Field(default=None, description="Start line of code block (1-based)") - end_line: Optional[int] = Field(default=None, description="End line of code block (1-based)") - symbol_name: Optional[str] = Field(default=None, description="Name of matched symbol/function/class") - symbol_kind: Optional[str] = Field(default=None, description="Kind of symbol (function/class/method)") - - # Field for grouping similar results - additional_locations: List["AdditionalLocation"] = Field( - default_factory=list, - description="Other locations for grouped results with similar scores and content." - ) diff --git a/codex-lens/src/codexlens/env_config.py b/codex-lens/src/codexlens/env_config.py deleted file mode 100644 index 8f1b1b0f..00000000 --- a/codex-lens/src/codexlens/env_config.py +++ /dev/null @@ -1,329 +0,0 @@ -"""Environment configuration loader for CodexLens. - -Loads .env files from workspace .codexlens directory with fallback to project root. -Provides unified access to API configurations. - -Priority order: -1. Environment variables (already set) -2. .codexlens/.env (workspace-local) -3. .env (project root) -""" - -from __future__ import annotations - -import logging -import os -from pathlib import Path -from typing import Any, Dict, Optional - -log = logging.getLogger(__name__) - -# Supported environment variables with descriptions -ENV_VARS = { - # Reranker configuration (overrides settings.json) - "RERANKER_MODEL": "Reranker model name (overrides settings.json)", - "RERANKER_BACKEND": "Reranker backend: fastembed, onnx, api, litellm, legacy", - "RERANKER_USE_GPU": "Use GPU for local reranker backends: true/false", - "RERANKER_ENABLED": "Enable reranker: true/false", - "RERANKER_API_KEY": "API key for reranker service (SiliconFlow/Cohere/Jina)", - "RERANKER_API_BASE": "Base URL for reranker API (overrides provider default)", - "RERANKER_PROVIDER": "Reranker provider: siliconflow, cohere, jina", - "RERANKER_POOL_ENABLED": "Enable reranker high availability pool: true/false", - "RERANKER_STRATEGY": "Reranker load balance strategy: round_robin, latency_aware, weighted_random", - "RERANKER_COOLDOWN": "Reranker rate limit cooldown in seconds", - # Embedding configuration (overrides settings.json) - "EMBEDDING_MODEL": "Embedding model/profile name (overrides settings.json)", - "EMBEDDING_BACKEND": "Embedding backend: fastembed, litellm", - "AUTO_EMBED_MISSING": "Auto-build embeddings in background when indexed projects are searched without vectors: true/false", - "EMBEDDING_API_KEY": "API key for embedding service", - "EMBEDDING_API_BASE": "Base URL for embedding API", - "EMBEDDING_POOL_ENABLED": "Enable embedding high availability pool: true/false", - "EMBEDDING_STRATEGY": "Embedding load balance strategy: round_robin, latency_aware, weighted_random", - "EMBEDDING_COOLDOWN": "Embedding rate limit cooldown in seconds", - # LiteLLM configuration - "LITELLM_API_KEY": "API key for LiteLLM", - "LITELLM_API_BASE": "Base URL for LiteLLM", - "LITELLM_MODEL": "LiteLLM model name", - # General configuration - "CODEXLENS_DATA_DIR": "Custom data directory path", - "CODEXLENS_DEBUG": "Enable debug mode (true/false)", - # Cascade / staged pipeline configuration - "ENABLE_CASCADE_SEARCH": "Enable cascade search (true/false)", - "CASCADE_STRATEGY": "Cascade strategy: binary, binary_rerank (alias: hybrid), dense_rerank, staged", - "CASCADE_COARSE_K": "Cascade coarse_k candidate count (int)", - "CASCADE_FINE_K": "Cascade fine_k result count (int)", - "STAGED_STAGE2_MODE": "Staged Stage 2 mode: precomputed, realtime, static_global_graph", - "STAGED_CLUSTERING_STRATEGY": "Staged clustering strategy: auto, score, path, dir_rr, noop, ...", - "STAGED_CLUSTERING_MIN_SIZE": "Staged clustering min cluster size (int)", - "ENABLE_STAGED_RERANK": "Enable staged reranking in Stage 4 (true/false)", - "STAGED_REALTIME_LSP_TIMEOUT_S": "Realtime LSP expansion timeout budget (float seconds)", - "STAGED_REALTIME_LSP_DEPTH": "Realtime LSP BFS depth (int)", - "STAGED_REALTIME_LSP_MAX_NODES": "Realtime LSP max nodes (int)", - "STAGED_REALTIME_LSP_MAX_SEEDS": "Realtime LSP max seeds (int)", - "STAGED_REALTIME_LSP_MAX_CONCURRENT": "Realtime LSP max concurrent requests (int)", - "STAGED_REALTIME_LSP_WARMUP_S": "Realtime LSP warmup wait after didOpen (float seconds)", - "STAGED_REALTIME_LSP_RESOLVE_SYMBOLS": "Resolve symbols via documentSymbol in realtime expansion (true/false)", - # Chunking configuration - "CHUNK_STRIP_COMMENTS": "Strip comments from code chunks for embedding: true/false (default: true)", - "CHUNK_STRIP_DOCSTRINGS": "Strip docstrings from code chunks for embedding: true/false (default: true)", - # Search ranking tuning - "TEST_FILE_PENALTY": "Penalty for test/fixture paths in final search ranking: 0.0-1.0 (default: 0.15)", - "GENERATED_FILE_PENALTY": "Penalty for generated/build artifact paths in final search ranking: 0.0-1.0 (default: 0.35)", - # Reranker tuning - "RERANKER_TEST_FILE_PENALTY": "Penalty for test files in reranking: 0.0-1.0 (default: 0.0)", - "RERANKER_DOCSTRING_WEIGHT": "Weight for docstring chunks in reranking: 0.0-1.0 (default: 1.0)", -} - - -def _parse_env_line(line: str) -> tuple[str, str] | None: - """Parse a single .env line, returning (key, value) or None.""" - line = line.strip() - - # Skip empty lines and comments - if not line or line.startswith("#"): - return None - - # Handle export prefix - if line.startswith("export "): - line = line[7:].strip() - - # Split on first = - if "=" not in line: - return None - - key, _, value = line.partition("=") - key = key.strip() - value = value.strip() - - # Remove surrounding quotes - if len(value) >= 2: - if (value.startswith('"') and value.endswith('"')) or \ - (value.startswith("'") and value.endswith("'")): - value = value[1:-1] - - return key, value - - -def load_env_file(env_path: Path) -> Dict[str, str]: - """Load environment variables from a .env file. - - Args: - env_path: Path to .env file - - Returns: - Dictionary of environment variables - """ - if not env_path.is_file(): - return {} - - env_vars: Dict[str, str] = {} - - try: - content = env_path.read_text(encoding="utf-8") - for line in content.splitlines(): - result = _parse_env_line(line) - if result: - key, value = result - env_vars[key] = value - except (OSError, UnicodeDecodeError) as exc: - # File access errors or encoding issues are expected and logged - log.warning("Failed to load .env file %s: %s", env_path, exc) - except Exception as exc: - # Other unexpected errors are also logged but indicate a code issue - log.warning("Unexpected error loading .env file %s: %s", env_path, exc) - - return env_vars - - -def _get_global_data_dir() -> Path: - """Get global CodexLens data directory.""" - env_override = os.environ.get("CODEXLENS_DATA_DIR") - if env_override: - return Path(env_override).expanduser().resolve() - return (Path.home() / ".codexlens").resolve() - - -def load_global_env() -> Dict[str, str]: - """Load environment variables from global ~/.codexlens/.env file. - - Returns: - Dictionary of environment variables from global config - """ - global_env_path = _get_global_data_dir() / ".env" - if global_env_path.is_file(): - env_vars = load_env_file(global_env_path) - log.debug("Loaded %d vars from global %s", len(env_vars), global_env_path) - return env_vars - return {} - - -def load_workspace_env(workspace_root: Path | None = None) -> Dict[str, str]: - """Load environment variables from workspace .env files. - - Priority (later overrides earlier): - 1. Global ~/.codexlens/.env (lowest priority) - 2. Project root .env - 3. .codexlens/.env (highest priority) - - Args: - workspace_root: Workspace root directory. If None, uses current directory. - - Returns: - Merged dictionary of environment variables - """ - if workspace_root is None: - workspace_root = Path.cwd() - - workspace_root = Path(workspace_root).resolve() - - env_vars: Dict[str, str] = {} - - # Load from global ~/.codexlens/.env (lowest priority) - global_vars = load_global_env() - if global_vars: - env_vars.update(global_vars) - - # Load from project root .env (medium priority) - root_env = workspace_root / ".env" - if root_env.is_file(): - loaded = load_env_file(root_env) - env_vars.update(loaded) - log.debug("Loaded %d vars from %s", len(loaded), root_env) - - # Load from .codexlens/.env (highest priority) - codexlens_env = workspace_root / ".codexlens" / ".env" - if codexlens_env.is_file(): - loaded = load_env_file(codexlens_env) - env_vars.update(loaded) - log.debug("Loaded %d vars from %s", len(loaded), codexlens_env) - - return env_vars - - -def apply_workspace_env(workspace_root: Path | None = None, *, override: bool = False) -> int: - """Load .env files and apply to os.environ. - - Args: - workspace_root: Workspace root directory - override: If True, override existing environment variables - - Returns: - Number of variables applied - """ - env_vars = load_workspace_env(workspace_root) - applied = 0 - - for key, value in env_vars.items(): - if override or key not in os.environ: - os.environ[key] = value - applied += 1 - log.debug("Applied env var: %s", key) - - return applied - - -def get_env(key: str, default: str | None = None, *, workspace_root: Path | None = None) -> str | None: - """Get environment variable with .env file fallback. - - Priority: - 1. os.environ (already set) - 2. .codexlens/.env - 3. .env - 4. default value - - Args: - key: Environment variable name - default: Default value if not found - workspace_root: Workspace root for .env file lookup - - Returns: - Value or default - """ - # Check os.environ first - if key in os.environ: - return os.environ[key] - - # Load from .env files - env_vars = load_workspace_env(workspace_root) - if key in env_vars: - return env_vars[key] - - return default - - -def get_api_config( - prefix: str, - *, - workspace_root: Path | None = None, - defaults: Dict[str, Any] | None = None, -) -> Dict[str, Any]: - """Get API configuration from environment. - - Loads {PREFIX}_API_KEY, {PREFIX}_API_BASE, {PREFIX}_MODEL, etc. - - Args: - prefix: Environment variable prefix (e.g., "RERANKER", "EMBEDDING") - workspace_root: Workspace root for .env file lookup - defaults: Default values - - Returns: - Dictionary with api_key, api_base, model, etc. - """ - defaults = defaults or {} - - config: Dict[str, Any] = {} - - # Standard API config fields - field_mapping = { - "api_key": f"{prefix}_API_KEY", - "api_base": f"{prefix}_API_BASE", - "model": f"{prefix}_MODEL", - "provider": f"{prefix}_PROVIDER", - "timeout": f"{prefix}_TIMEOUT", - } - - for field, env_key in field_mapping.items(): - value = get_env(env_key, workspace_root=workspace_root) - if value is not None: - # Type conversion for specific fields - if field == "timeout": - try: - config[field] = float(value) - except ValueError: - pass - else: - config[field] = value - elif field in defaults: - config[field] = defaults[field] - - return config - - -def generate_env_example() -> str: - """Generate .env.example content with all supported variables. - - Returns: - String content for .env.example file - """ - lines = [ - "# CodexLens Environment Configuration", - "# Copy this file to .codexlens/.env and fill in your values", - "", - ] - - # Group by prefix - groups: Dict[str, list] = {} - for key, desc in ENV_VARS.items(): - prefix = key.split("_")[0] - if prefix not in groups: - groups[prefix] = [] - groups[prefix].append((key, desc)) - - for prefix, items in groups.items(): - lines.append(f"# {prefix} Configuration") - for key, desc in items: - lines.append(f"# {desc}") - lines.append(f"# {key}=") - lines.append("") - - return "\n".join(lines) diff --git a/codex-lens/src/codexlens/errors.py b/codex-lens/src/codexlens/errors.py deleted file mode 100644 index cdaafa74..00000000 --- a/codex-lens/src/codexlens/errors.py +++ /dev/null @@ -1,59 +0,0 @@ -"""CodexLens exception hierarchy.""" - -from __future__ import annotations - - -class CodexLensError(Exception): - """Base class for all CodexLens errors.""" - - -class ConfigError(CodexLensError): - """Raised when configuration is invalid or cannot be loaded.""" - - -class ParseError(CodexLensError): - """Raised when parsing or indexing a file fails.""" - - -class StorageError(CodexLensError): - """Raised when reading/writing index storage fails. - - Attributes: - message: Human-readable error description - db_path: Path to the database file (if applicable) - operation: The operation that failed (e.g., 'query', 'initialize', 'migrate') - details: Additional context for debugging - """ - - def __init__( - self, - message: str, - db_path: str | None = None, - operation: str | None = None, - details: dict | None = None - ) -> None: - super().__init__(message) - self.message = message - self.db_path = db_path - self.operation = operation - self.details = details or {} - - def __str__(self) -> str: - parts = [self.message] - if self.db_path: - parts.append(f"[db: {self.db_path}]") - if self.operation: - parts.append(f"[op: {self.operation}]") - if self.details: - detail_str = ", ".join(f"{k}={v}" for k, v in self.details.items()) - parts.append(f"[{detail_str}]") - return " ".join(parts) - - -class SearchError(CodexLensError): - """Raised when a search operation fails.""" - - -class IndexNotFoundError(CodexLensError): - """Raised when a project's index cannot be found.""" - diff --git a/codex-lens/src/codexlens/hybrid_search/__init__.py b/codex-lens/src/codexlens/hybrid_search/__init__.py deleted file mode 100644 index 03dd31b3..00000000 --- a/codex-lens/src/codexlens/hybrid_search/__init__.py +++ /dev/null @@ -1,28 +0,0 @@ -"""Hybrid Search data structures for CodexLens. - -This module provides core data structures for hybrid search: -- CodeSymbolNode: Graph node representing a code symbol -- CodeAssociationGraph: Graph of code relationships -- SearchResultCluster: Clustered search results -- Range: Position range in source files -- CallHierarchyItem: LSP call hierarchy item - -Note: The search engine is in codexlens.search.hybrid_search - LSP-based expansion is in codexlens.lsp module -""" - -from codexlens.hybrid_search.data_structures import ( - CallHierarchyItem, - CodeAssociationGraph, - CodeSymbolNode, - Range, - SearchResultCluster, -) - -__all__ = [ - "CallHierarchyItem", - "CodeAssociationGraph", - "CodeSymbolNode", - "Range", - "SearchResultCluster", -] diff --git a/codex-lens/src/codexlens/hybrid_search/data_structures.py b/codex-lens/src/codexlens/hybrid_search/data_structures.py deleted file mode 100644 index 898971d0..00000000 --- a/codex-lens/src/codexlens/hybrid_search/data_structures.py +++ /dev/null @@ -1,602 +0,0 @@ -"""Core data structures for the hybrid search system. - -This module defines the fundamental data structures used throughout the -hybrid search pipeline, including code symbol representations, association -graphs, and clustered search results. -""" - -from __future__ import annotations - -from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING - -if TYPE_CHECKING: - import networkx as nx - - -@dataclass -class Range: - """Position range within a source file. - - Attributes: - start_line: Starting line number (0-based). - start_character: Starting character offset within the line. - end_line: Ending line number (0-based). - end_character: Ending character offset within the line. - """ - - start_line: int - start_character: int - end_line: int - end_character: int - - def __post_init__(self) -> None: - """Validate range values.""" - if self.start_line < 0: - raise ValueError("start_line must be >= 0") - if self.start_character < 0: - raise ValueError("start_character must be >= 0") - if self.end_line < 0: - raise ValueError("end_line must be >= 0") - if self.end_character < 0: - raise ValueError("end_character must be >= 0") - if self.end_line < self.start_line: - raise ValueError("end_line must be >= start_line") - if self.end_line == self.start_line and self.end_character < self.start_character: - raise ValueError("end_character must be >= start_character on the same line") - - def to_dict(self) -> Dict[str, Any]: - """Convert to dictionary for JSON serialization.""" - return { - "start": {"line": self.start_line, "character": self.start_character}, - "end": {"line": self.end_line, "character": self.end_character}, - } - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> Range: - """Create Range from dictionary representation.""" - return cls( - start_line=data["start"]["line"], - start_character=data["start"]["character"], - end_line=data["end"]["line"], - end_character=data["end"]["character"], - ) - - @classmethod - def from_lsp_range(cls, lsp_range: Dict[str, Any]) -> Range: - """Create Range from LSP Range object. - - LSP Range format: - {"start": {"line": int, "character": int}, - "end": {"line": int, "character": int}} - """ - return cls( - start_line=lsp_range["start"]["line"], - start_character=lsp_range["start"]["character"], - end_line=lsp_range["end"]["line"], - end_character=lsp_range["end"]["character"], - ) - - -@dataclass -class CallHierarchyItem: - """LSP CallHierarchyItem for representing callers/callees. - - Attributes: - name: Symbol name (function, method, class name). - kind: Symbol kind (function, method, class, etc.). - file_path: Absolute file path where the symbol is defined. - range: Position range in the source file. - detail: Optional additional detail about the symbol. - """ - - name: str - kind: str - file_path: str - range: Range - detail: Optional[str] = None - - def to_dict(self) -> Dict[str, Any]: - """Convert to dictionary for JSON serialization.""" - result: Dict[str, Any] = { - "name": self.name, - "kind": self.kind, - "file_path": self.file_path, - "range": self.range.to_dict(), - } - if self.detail: - result["detail"] = self.detail - return result - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "CallHierarchyItem": - """Create CallHierarchyItem from dictionary representation.""" - return cls( - name=data["name"], - kind=data["kind"], - file_path=data["file_path"], - range=Range.from_dict(data["range"]), - detail=data.get("detail"), - ) - - -@dataclass -class CodeSymbolNode: - """Graph node representing a code symbol. - - Attributes: - id: Unique identifier in format 'file_path:name:line'. - name: Symbol name (function, class, variable name). - kind: Symbol kind (function, class, method, variable, etc.). - file_path: Absolute file path where symbol is defined. - range: Start/end position in the source file. - embedding: Optional vector embedding for semantic search. - raw_code: Raw source code of the symbol. - docstring: Documentation string (if available). - score: Ranking score (used during reranking). - """ - - id: str - name: str - kind: str - file_path: str - range: Range - embedding: Optional[List[float]] = None - raw_code: str = "" - docstring: str = "" - score: float = 0.0 - - def __post_init__(self) -> None: - """Validate required fields.""" - if not self.id: - raise ValueError("id cannot be empty") - if not self.name: - raise ValueError("name cannot be empty") - if not self.kind: - raise ValueError("kind cannot be empty") - if not self.file_path: - raise ValueError("file_path cannot be empty") - - def __hash__(self) -> int: - """Hash based on unique ID.""" - return hash(self.id) - - def __eq__(self, other: object) -> bool: - """Equality based on unique ID.""" - if not isinstance(other, CodeSymbolNode): - return False - return self.id == other.id - - def to_dict(self) -> Dict[str, Any]: - """Convert to dictionary for JSON serialization.""" - result: Dict[str, Any] = { - "id": self.id, - "name": self.name, - "kind": self.kind, - "file_path": self.file_path, - "range": self.range.to_dict(), - "score": self.score, - } - if self.raw_code: - result["raw_code"] = self.raw_code - if self.docstring: - result["docstring"] = self.docstring - # Exclude embedding from serialization (too large for JSON responses) - return result - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> CodeSymbolNode: - """Create CodeSymbolNode from dictionary representation.""" - return cls( - id=data["id"], - name=data["name"], - kind=data["kind"], - file_path=data["file_path"], - range=Range.from_dict(data["range"]), - embedding=data.get("embedding"), - raw_code=data.get("raw_code", ""), - docstring=data.get("docstring", ""), - score=data.get("score", 0.0), - ) - - @classmethod - def from_lsp_location( - cls, - uri: str, - name: str, - kind: str, - lsp_range: Dict[str, Any], - raw_code: str = "", - docstring: str = "", - ) -> CodeSymbolNode: - """Create CodeSymbolNode from LSP location data. - - Args: - uri: File URI (file:// prefix will be stripped). - name: Symbol name. - kind: Symbol kind. - lsp_range: LSP Range object. - raw_code: Optional raw source code. - docstring: Optional documentation string. - - Returns: - New CodeSymbolNode instance. - """ - # Strip file:// prefix if present - file_path = uri - if file_path.startswith("file://"): - file_path = file_path[7:] - # Handle Windows paths (file:///C:/...) - if len(file_path) > 2 and file_path[0] == "/" and file_path[2] == ":": - file_path = file_path[1:] - - range_obj = Range.from_lsp_range(lsp_range) - symbol_id = f"{file_path}:{name}:{range_obj.start_line}" - - return cls( - id=symbol_id, - name=name, - kind=kind, - file_path=file_path, - range=range_obj, - raw_code=raw_code, - docstring=docstring, - ) - - @classmethod - def create_id(cls, file_path: str, name: str, line: int) -> str: - """Generate a unique symbol ID. - - Args: - file_path: Absolute file path. - name: Symbol name. - line: Start line number. - - Returns: - Unique ID string in format 'file_path:name:line'. - """ - return f"{file_path}:{name}:{line}" - - -@dataclass -class CodeAssociationGraph: - """Graph of code relationships between symbols. - - This graph represents the association between code symbols discovered - through LSP queries (references, call hierarchy, etc.). - - Attributes: - nodes: Dictionary mapping symbol IDs to CodeSymbolNode objects. - edges: List of (from_id, to_id, relationship_type) tuples. - relationship_type: 'calls', 'references', 'inherits', 'imports'. - """ - - nodes: Dict[str, CodeSymbolNode] = field(default_factory=dict) - edges: List[Tuple[str, str, str]] = field(default_factory=list) - - def add_node(self, node: CodeSymbolNode) -> None: - """Add a node to the graph. - - Args: - node: CodeSymbolNode to add. If a node with the same ID exists, - it will be replaced. - """ - self.nodes[node.id] = node - - def add_edge(self, from_id: str, to_id: str, rel_type: str) -> None: - """Add an edge to the graph. - - Args: - from_id: Source node ID. - to_id: Target node ID. - rel_type: Relationship type ('calls', 'references', 'inherits', 'imports'). - - Raises: - ValueError: If from_id or to_id not in graph nodes. - """ - if from_id not in self.nodes: - raise ValueError(f"Source node '{from_id}' not found in graph") - if to_id not in self.nodes: - raise ValueError(f"Target node '{to_id}' not found in graph") - - edge = (from_id, to_id, rel_type) - if edge not in self.edges: - self.edges.append(edge) - - def add_edge_unchecked(self, from_id: str, to_id: str, rel_type: str) -> None: - """Add an edge without validating node existence. - - Use this method during bulk graph construction where nodes may be - added after edges, or when performance is critical. - - Args: - from_id: Source node ID. - to_id: Target node ID. - rel_type: Relationship type. - """ - edge = (from_id, to_id, rel_type) - if edge not in self.edges: - self.edges.append(edge) - - def get_node(self, node_id: str) -> Optional[CodeSymbolNode]: - """Get a node by ID. - - Args: - node_id: Node ID to look up. - - Returns: - CodeSymbolNode if found, None otherwise. - """ - return self.nodes.get(node_id) - - def get_neighbors(self, node_id: str, rel_type: Optional[str] = None) -> List[CodeSymbolNode]: - """Get neighboring nodes connected by outgoing edges. - - Args: - node_id: Node ID to find neighbors for. - rel_type: Optional filter by relationship type. - - Returns: - List of neighboring CodeSymbolNode objects. - """ - neighbors = [] - for from_id, to_id, edge_rel in self.edges: - if from_id == node_id: - if rel_type is None or edge_rel == rel_type: - node = self.nodes.get(to_id) - if node: - neighbors.append(node) - return neighbors - - def get_incoming(self, node_id: str, rel_type: Optional[str] = None) -> List[CodeSymbolNode]: - """Get nodes connected by incoming edges. - - Args: - node_id: Node ID to find incoming connections for. - rel_type: Optional filter by relationship type. - - Returns: - List of CodeSymbolNode objects with edges pointing to node_id. - """ - incoming = [] - for from_id, to_id, edge_rel in self.edges: - if to_id == node_id: - if rel_type is None or edge_rel == rel_type: - node = self.nodes.get(from_id) - if node: - incoming.append(node) - return incoming - - def to_networkx(self) -> "nx.DiGraph": - """Convert to NetworkX DiGraph for graph algorithms. - - Returns: - NetworkX directed graph with nodes and edges. - - Raises: - ImportError: If networkx is not installed. - """ - try: - import networkx as nx - except ImportError: - raise ImportError( - "networkx is required for graph algorithms. " - "Install with: pip install networkx" - ) - - graph = nx.DiGraph() - - # Add nodes with attributes - for node_id, node in self.nodes.items(): - graph.add_node( - node_id, - name=node.name, - kind=node.kind, - file_path=node.file_path, - score=node.score, - ) - - # Add edges with relationship type - for from_id, to_id, rel_type in self.edges: - graph.add_edge(from_id, to_id, relationship=rel_type) - - return graph - - def to_dict(self) -> Dict[str, Any]: - """Convert to dictionary for JSON serialization. - - Returns: - Dictionary with 'nodes' and 'edges' keys. - """ - return { - "nodes": {node_id: node.to_dict() for node_id, node in self.nodes.items()}, - "edges": [ - {"from": from_id, "to": to_id, "relationship": rel_type} - for from_id, to_id, rel_type in self.edges - ], - } - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> CodeAssociationGraph: - """Create CodeAssociationGraph from dictionary representation. - - Args: - data: Dictionary with 'nodes' and 'edges' keys. - - Returns: - New CodeAssociationGraph instance. - """ - graph = cls() - - # Load nodes - for node_id, node_data in data.get("nodes", {}).items(): - graph.nodes[node_id] = CodeSymbolNode.from_dict(node_data) - - # Load edges - for edge_data in data.get("edges", []): - graph.edges.append(( - edge_data["from"], - edge_data["to"], - edge_data["relationship"], - )) - - return graph - - def __len__(self) -> int: - """Return the number of nodes in the graph.""" - return len(self.nodes) - - -@dataclass -class SearchResultCluster: - """Clustered search result containing related code symbols. - - Search results are grouped into clusters based on graph community - detection or embedding similarity. Each cluster represents a - conceptually related group of code symbols. - - Attributes: - cluster_id: Unique cluster identifier. - score: Cluster relevance score (max of symbol scores). - title: Human-readable cluster title/summary. - symbols: List of CodeSymbolNode in this cluster. - metadata: Additional cluster metadata. - """ - - cluster_id: str - score: float - title: str - symbols: List[CodeSymbolNode] = field(default_factory=list) - metadata: Dict[str, Any] = field(default_factory=dict) - - def __post_init__(self) -> None: - """Validate cluster fields.""" - if not self.cluster_id: - raise ValueError("cluster_id cannot be empty") - if self.score < 0: - raise ValueError("score must be >= 0") - - def add_symbol(self, symbol: CodeSymbolNode) -> None: - """Add a symbol to the cluster. - - Args: - symbol: CodeSymbolNode to add. - """ - self.symbols.append(symbol) - - def get_top_symbols(self, n: int = 5) -> List[CodeSymbolNode]: - """Get top N symbols by score. - - Args: - n: Number of symbols to return. - - Returns: - List of top N CodeSymbolNode objects sorted by score descending. - """ - sorted_symbols = sorted(self.symbols, key=lambda s: s.score, reverse=True) - return sorted_symbols[:n] - - def update_score(self) -> None: - """Update cluster score to max of symbol scores.""" - if self.symbols: - self.score = max(s.score for s in self.symbols) - - def to_dict(self) -> Dict[str, Any]: - """Convert to dictionary for JSON serialization. - - Returns: - Dictionary representation of the cluster. - """ - return { - "cluster_id": self.cluster_id, - "score": self.score, - "title": self.title, - "symbols": [s.to_dict() for s in self.symbols], - "metadata": self.metadata, - } - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> SearchResultCluster: - """Create SearchResultCluster from dictionary representation. - - Args: - data: Dictionary with cluster data. - - Returns: - New SearchResultCluster instance. - """ - return cls( - cluster_id=data["cluster_id"], - score=data["score"], - title=data["title"], - symbols=[CodeSymbolNode.from_dict(s) for s in data.get("symbols", [])], - metadata=data.get("metadata", {}), - ) - - def __len__(self) -> int: - """Return the number of symbols in the cluster.""" - return len(self.symbols) - - -@dataclass -class CallHierarchyItem: - """LSP CallHierarchyItem for representing callers/callees. - - Attributes: - name: Symbol name (function, method, etc.). - kind: Symbol kind (function, method, etc.). - file_path: Absolute file path. - range: Position range in the file. - detail: Optional additional detail (e.g., signature). - """ - - name: str - kind: str - file_path: str - range: Range - detail: Optional[str] = None - - def to_dict(self) -> Dict[str, Any]: - """Convert to dictionary for JSON serialization.""" - result: Dict[str, Any] = { - "name": self.name, - "kind": self.kind, - "file_path": self.file_path, - "range": self.range.to_dict(), - } - if self.detail: - result["detail"] = self.detail - return result - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "CallHierarchyItem": - """Create CallHierarchyItem from dictionary representation.""" - return cls( - name=data.get("name", "unknown"), - kind=data.get("kind", "unknown"), - file_path=data.get("file_path", data.get("uri", "")), - range=Range.from_dict(data.get("range", {"start": {"line": 0, "character": 0}, "end": {"line": 0, "character": 0}})), - detail=data.get("detail"), - ) - - @classmethod - def from_lsp(cls, data: Dict[str, Any]) -> "CallHierarchyItem": - """Create CallHierarchyItem from LSP response format. - - LSP uses 0-based line numbers and 'character' instead of 'char'. - """ - uri = data.get("uri", data.get("file_path", "")) - # Strip file:// prefix - file_path = uri - if file_path.startswith("file://"): - file_path = file_path[7:] - if len(file_path) > 2 and file_path[0] == "/" and file_path[2] == ":": - file_path = file_path[1:] - - return cls( - name=data.get("name", "unknown"), - kind=str(data.get("kind", "unknown")), - file_path=file_path, - range=Range.from_lsp_range(data.get("range", {"start": {"line": 0, "character": 0}, "end": {"line": 0, "character": 0}})), - detail=data.get("detail"), - ) diff --git a/codex-lens/src/codexlens/indexing/README.md b/codex-lens/src/codexlens/indexing/README.md deleted file mode 100644 index 7377874d..00000000 --- a/codex-lens/src/codexlens/indexing/README.md +++ /dev/null @@ -1,77 +0,0 @@ -# Symbol Extraction and Indexing - -This module provides symbol extraction and relationship tracking for code graph enrichment. - -## Overview - -The `SymbolExtractor` class extracts code symbols (functions, classes) and their relationships (calls, imports) from source files using regex-based pattern matching. - -## Supported Languages - -- Python (.py) -- TypeScript (.ts, .tsx) -- JavaScript (.js, .jsx) - -## Database Schema - -### Symbols Table -Stores code symbols with their location information: -- `id`: Primary key -- `qualified_name`: Fully qualified name (e.g., "module.ClassName") -- `name`: Symbol name -- `kind`: Symbol type (function, class) -- `file_path`: Path to source file -- `start_line`: Starting line number -- `end_line`: Ending line number - -### Symbol Relationships Table -Stores relationships between symbols: -- `id`: Primary key -- `source_symbol_id`: Foreign key to symbols table -- `target_symbol_fqn`: Fully qualified name of target symbol -- `relationship_type`: Type of relationship (calls, imports) -- `file_path`: Path to source file -- `line`: Line number where relationship occurs - -## Usage Example - -```python -from pathlib import Path -from codexlens.indexing.symbol_extractor import SymbolExtractor - -# Initialize extractor -db_path = Path("./code_index.db") -extractor = SymbolExtractor(db_path) -extractor.connect() - -# Extract from file -file_path = Path("src/my_module.py") -with open(file_path) as f: - content = f.read() - -symbols, relationships = extractor.extract_from_file(file_path, content) - -# Save to database -name_to_id = extractor.save_symbols(symbols) -extractor.save_relationships(relationships, name_to_id) - -# Clean up -extractor.close() -``` - -## Pattern Matching - -The extractor uses regex patterns to identify: - -- **Functions**: Function definitions (including async, export keywords) -- **Classes**: Class definitions (including export keyword) -- **Imports**: Import/require statements -- **Calls**: Function/method invocations - -## Future Enhancements - -- Tree-sitter integration for more accurate parsing -- Support for additional languages -- Method and variable extraction -- Enhanced scope tracking -- Relationship type expansion (inherits, implements, etc.) diff --git a/codex-lens/src/codexlens/indexing/__init__.py b/codex-lens/src/codexlens/indexing/__init__.py deleted file mode 100644 index 1136099f..00000000 --- a/codex-lens/src/codexlens/indexing/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -"""Code indexing and symbol extraction.""" -from codexlens.indexing.symbol_extractor import SymbolExtractor -from codexlens.indexing.embedding import ( - BinaryEmbeddingBackend, - DenseEmbeddingBackend, - CascadeEmbeddingBackend, - get_cascade_embedder, - binarize_embedding, - pack_binary_embedding, - unpack_binary_embedding, - hamming_distance, -) - -__all__ = [ - "SymbolExtractor", - # Cascade embedding backends - "BinaryEmbeddingBackend", - "DenseEmbeddingBackend", - "CascadeEmbeddingBackend", - "get_cascade_embedder", - # Utility functions - "binarize_embedding", - "pack_binary_embedding", - "unpack_binary_embedding", - "hamming_distance", -] diff --git a/codex-lens/src/codexlens/indexing/embedding.py b/codex-lens/src/codexlens/indexing/embedding.py deleted file mode 100644 index 4175f3e5..00000000 --- a/codex-lens/src/codexlens/indexing/embedding.py +++ /dev/null @@ -1,582 +0,0 @@ -"""Multi-type embedding backends for cascade retrieval. - -This module provides embedding backends optimized for cascade retrieval: -1. BinaryEmbeddingBackend - Fast coarse filtering with binary vectors -2. DenseEmbeddingBackend - High-precision dense vectors for reranking -3. CascadeEmbeddingBackend - Combined binary + dense for two-stage retrieval - -Cascade retrieval workflow: -1. Binary search (fast, ~32 bytes/vector) -> top-K candidates -2. Dense rerank (precise, ~8KB/vector) -> final results -""" - -from __future__ import annotations - -import logging -from typing import Iterable, List, Optional, Tuple - -import numpy as np - -from codexlens.semantic.base import BaseEmbedder - -logger = logging.getLogger(__name__) - - -# ============================================================================= -# Utility Functions -# ============================================================================= - - -def binarize_embedding(embedding: np.ndarray) -> np.ndarray: - """Convert float embedding to binary vector. - - Applies sign-based quantization: values > 0 become 1, values <= 0 become 0. - - Args: - embedding: Float32 embedding of any dimension - - Returns: - Binary vector (uint8 with values 0 or 1) of same dimension - """ - return (embedding > 0).astype(np.uint8) - - -def pack_binary_embedding(binary_vector: np.ndarray) -> bytes: - """Pack binary vector into compact bytes format. - - Packs 8 binary values into each byte for storage efficiency. - For a 256-dim binary vector, output is 32 bytes. - - Args: - binary_vector: Binary vector (uint8 with values 0 or 1) - - Returns: - Packed bytes (length = ceil(dim / 8)) - """ - # Ensure vector length is multiple of 8 by padding if needed - dim = len(binary_vector) - padded_dim = ((dim + 7) // 8) * 8 - if padded_dim > dim: - padded = np.zeros(padded_dim, dtype=np.uint8) - padded[:dim] = binary_vector - binary_vector = padded - - # Pack 8 bits per byte - packed = np.packbits(binary_vector) - return packed.tobytes() - - -def unpack_binary_embedding(packed_bytes: bytes, dim: int = 256) -> np.ndarray: - """Unpack bytes back to binary vector. - - Args: - packed_bytes: Packed binary data - dim: Original vector dimension (default: 256) - - Returns: - Binary vector (uint8 with values 0 or 1) - """ - unpacked = np.unpackbits(np.frombuffer(packed_bytes, dtype=np.uint8)) - return unpacked[:dim] - - -def hamming_distance(a: bytes, b: bytes) -> int: - """Compute Hamming distance between two packed binary vectors. - - Uses XOR and popcount for efficient distance computation. - - Args: - a: First packed binary vector - b: Second packed binary vector - - Returns: - Hamming distance (number of differing bits) - """ - a_arr = np.frombuffer(a, dtype=np.uint8) - b_arr = np.frombuffer(b, dtype=np.uint8) - xor = np.bitwise_xor(a_arr, b_arr) - return int(np.unpackbits(xor).sum()) - - -# ============================================================================= -# Binary Embedding Backend -# ============================================================================= - - -class BinaryEmbeddingBackend(BaseEmbedder): - """Generate 256-dimensional binary embeddings for fast coarse retrieval. - - Uses a lightweight embedding model and applies sign-based quantization - to produce compact binary vectors (32 bytes per embedding). - - Suitable for: - - First-stage candidate retrieval - - Hamming distance-based similarity search - - Memory-constrained environments - - Model: sentence-transformers/all-MiniLM-L6-v2 (384 dim) -> quantized to 256 bits - """ - - DEFAULT_MODEL = "BAAI/bge-small-en-v1.5" # 384 dim, fast - BINARY_DIM = 256 - - def __init__( - self, - model_name: Optional[str] = None, - use_gpu: bool = True, - ) -> None: - """Initialize binary embedding backend. - - Args: - model_name: Base embedding model name. Defaults to BAAI/bge-small-en-v1.5 - use_gpu: Whether to use GPU acceleration - """ - from codexlens.semantic import SEMANTIC_AVAILABLE - - if not SEMANTIC_AVAILABLE: - raise ImportError( - "Semantic search dependencies not available. " - "Install with: pip install codexlens[semantic]" - ) - - self._model_name = model_name or self.DEFAULT_MODEL - self._use_gpu = use_gpu - self._model = None - - # Projection matrix for dimension reduction (lazily initialized) - self._projection_matrix: Optional[np.ndarray] = None - - @property - def model_name(self) -> str: - """Return model name.""" - return self._model_name - - @property - def embedding_dim(self) -> int: - """Return binary embedding dimension (256).""" - return self.BINARY_DIM - - @property - def packed_bytes(self) -> int: - """Return packed bytes size (32 bytes for 256 bits).""" - return self.BINARY_DIM // 8 - - def _load_model(self) -> None: - """Lazy load the embedding model.""" - if self._model is not None: - return - - from fastembed import TextEmbedding - from codexlens.semantic.gpu_support import get_optimal_providers - - providers = get_optimal_providers(use_gpu=self._use_gpu, with_device_options=True) - try: - self._model = TextEmbedding( - model_name=self._model_name, - providers=providers, - ) - except TypeError: - # Fallback for older fastembed versions - self._model = TextEmbedding(model_name=self._model_name) - - logger.debug(f"BinaryEmbeddingBackend loaded model: {self._model_name}") - - def _get_projection_matrix(self, input_dim: int) -> np.ndarray: - """Get or create projection matrix for dimension reduction. - - Uses random projection with fixed seed for reproducibility. - - Args: - input_dim: Input embedding dimension from base model - - Returns: - Projection matrix of shape (input_dim, BINARY_DIM) - """ - if self._projection_matrix is not None: - return self._projection_matrix - - # Fixed seed for reproducibility across sessions - rng = np.random.RandomState(42) - # Gaussian random projection - self._projection_matrix = rng.randn(input_dim, self.BINARY_DIM).astype(np.float32) - # Normalize columns for consistent scale - norms = np.linalg.norm(self._projection_matrix, axis=0, keepdims=True) - self._projection_matrix /= (norms + 1e-8) - - return self._projection_matrix - - def embed_to_numpy(self, texts: str | Iterable[str]) -> np.ndarray: - """Generate binary embeddings as numpy array. - - Args: - texts: Single text or iterable of texts - - Returns: - Binary embeddings of shape (n_texts, 256) with values 0 or 1 - """ - self._load_model() - - if isinstance(texts, str): - texts = [texts] - else: - texts = list(texts) - - # Get base float embeddings - float_embeddings = np.array(list(self._model.embed(texts))) - input_dim = float_embeddings.shape[1] - - # Project to target dimension if needed - if input_dim != self.BINARY_DIM: - projection = self._get_projection_matrix(input_dim) - float_embeddings = float_embeddings @ projection - - # Binarize - return binarize_embedding(float_embeddings) - - def embed_packed(self, texts: str | Iterable[str]) -> List[bytes]: - """Generate packed binary embeddings. - - Args: - texts: Single text or iterable of texts - - Returns: - List of packed bytes (32 bytes each for 256-dim) - """ - binary = self.embed_to_numpy(texts) - return [pack_binary_embedding(vec) for vec in binary] - - -# ============================================================================= -# Dense Embedding Backend -# ============================================================================= - - -class DenseEmbeddingBackend(BaseEmbedder): - """Generate high-dimensional dense embeddings for precise reranking. - - Uses large embedding models to produce 2048-dimensional float32 vectors - for maximum retrieval quality. - - Suitable for: - - Second-stage reranking - - High-precision similarity search - - Quality-critical applications - - Model: BAAI/bge-large-en-v1.5 (1024 dim) with optional expansion - """ - - DEFAULT_MODEL = "BAAI/bge-small-en-v1.5" # 384 dim, use small for testing - TARGET_DIM = 768 # Reduced target for faster testing - - def __init__( - self, - model_name: Optional[str] = None, - use_gpu: bool = True, - expand_dim: bool = True, - ) -> None: - """Initialize dense embedding backend. - - Args: - model_name: Dense embedding model name. Defaults to BAAI/bge-large-en-v1.5 - use_gpu: Whether to use GPU acceleration - expand_dim: If True, expand embeddings to TARGET_DIM using learned expansion - """ - from codexlens.semantic import SEMANTIC_AVAILABLE - - if not SEMANTIC_AVAILABLE: - raise ImportError( - "Semantic search dependencies not available. " - "Install with: pip install codexlens[semantic]" - ) - - self._model_name = model_name or self.DEFAULT_MODEL - self._use_gpu = use_gpu - self._expand_dim = expand_dim - self._model = None - self._native_dim: Optional[int] = None - - # Expansion matrix for dimension expansion (lazily initialized) - self._expansion_matrix: Optional[np.ndarray] = None - - @property - def model_name(self) -> str: - """Return model name.""" - return self._model_name - - @property - def embedding_dim(self) -> int: - """Return embedding dimension. - - Returns TARGET_DIM if expand_dim is True, otherwise native model dimension. - """ - if self._expand_dim: - return self.TARGET_DIM - # Return cached native dim or estimate based on model - if self._native_dim is not None: - return self._native_dim - # Model dimension estimates - model_dims = { - "BAAI/bge-large-en-v1.5": 1024, - "BAAI/bge-base-en-v1.5": 768, - "BAAI/bge-small-en-v1.5": 384, - "intfloat/multilingual-e5-large": 1024, - } - return model_dims.get(self._model_name, 1024) - - @property - def max_tokens(self) -> int: - """Return maximum token limit.""" - return 512 # Conservative default for large models - - def _load_model(self) -> None: - """Lazy load the embedding model.""" - if self._model is not None: - return - - from fastembed import TextEmbedding - from codexlens.semantic.gpu_support import get_optimal_providers - - providers = get_optimal_providers(use_gpu=self._use_gpu, with_device_options=True) - try: - self._model = TextEmbedding( - model_name=self._model_name, - providers=providers, - ) - except TypeError: - self._model = TextEmbedding(model_name=self._model_name) - - logger.debug(f"DenseEmbeddingBackend loaded model: {self._model_name}") - - def _get_expansion_matrix(self, input_dim: int) -> np.ndarray: - """Get or create expansion matrix for dimension expansion. - - Uses random orthogonal projection for information-preserving expansion. - - Args: - input_dim: Input embedding dimension from base model - - Returns: - Expansion matrix of shape (input_dim, TARGET_DIM) - """ - if self._expansion_matrix is not None: - return self._expansion_matrix - - # Fixed seed for reproducibility - rng = np.random.RandomState(123) - - # Create semi-orthogonal expansion matrix - # First input_dim columns form identity-like structure - self._expansion_matrix = np.zeros((input_dim, self.TARGET_DIM), dtype=np.float32) - - # Copy original dimensions - copy_dim = min(input_dim, self.TARGET_DIM) - self._expansion_matrix[:copy_dim, :copy_dim] = np.eye(copy_dim, dtype=np.float32) - - # Fill remaining with random projections - if self.TARGET_DIM > input_dim: - random_part = rng.randn(input_dim, self.TARGET_DIM - input_dim).astype(np.float32) - # Normalize - norms = np.linalg.norm(random_part, axis=0, keepdims=True) - random_part /= (norms + 1e-8) - self._expansion_matrix[:, input_dim:] = random_part - - return self._expansion_matrix - - def embed_to_numpy(self, texts: str | Iterable[str]) -> np.ndarray: - """Generate dense embeddings as numpy array. - - Args: - texts: Single text or iterable of texts - - Returns: - Dense embeddings of shape (n_texts, TARGET_DIM) as float32 - """ - self._load_model() - - if isinstance(texts, str): - texts = [texts] - else: - texts = list(texts) - - # Get base float embeddings - float_embeddings = np.array(list(self._model.embed(texts)), dtype=np.float32) - self._native_dim = float_embeddings.shape[1] - - # Expand to target dimension if needed - if self._expand_dim and self._native_dim < self.TARGET_DIM: - expansion = self._get_expansion_matrix(self._native_dim) - float_embeddings = float_embeddings @ expansion - - return float_embeddings - - -# ============================================================================= -# Cascade Embedding Backend -# ============================================================================= - - -class CascadeEmbeddingBackend(BaseEmbedder): - """Combined binary + dense embedding backend for cascade retrieval. - - Generates both binary (for fast coarse filtering) and dense (for precise - reranking) embeddings in a single pass, optimized for two-stage retrieval. - - Cascade workflow: - 1. encode_cascade() returns (binary_embeddings, dense_embeddings) - 2. Binary search: Use Hamming distance on binary vectors -> top-K candidates - 3. Dense rerank: Use cosine similarity on dense vectors -> final results - - Memory efficiency: - - Binary: 32 bytes per vector (256 bits) - - Dense: 8192 bytes per vector (2048 x float32) - - Total: ~8KB per document for full cascade support - """ - - def __init__( - self, - binary_model: Optional[str] = None, - dense_model: Optional[str] = None, - use_gpu: bool = True, - ) -> None: - """Initialize cascade embedding backend. - - Args: - binary_model: Model for binary embeddings. Defaults to BAAI/bge-small-en-v1.5 - dense_model: Model for dense embeddings. Defaults to BAAI/bge-large-en-v1.5 - use_gpu: Whether to use GPU acceleration - """ - self._binary_backend = BinaryEmbeddingBackend( - model_name=binary_model, - use_gpu=use_gpu, - ) - self._dense_backend = DenseEmbeddingBackend( - model_name=dense_model, - use_gpu=use_gpu, - expand_dim=True, - ) - self._use_gpu = use_gpu - - @property - def model_name(self) -> str: - """Return model names for both backends.""" - return f"cascade({self._binary_backend.model_name}, {self._dense_backend.model_name})" - - @property - def embedding_dim(self) -> int: - """Return dense embedding dimension (for compatibility).""" - return self._dense_backend.embedding_dim - - @property - def binary_dim(self) -> int: - """Return binary embedding dimension.""" - return self._binary_backend.embedding_dim - - @property - def dense_dim(self) -> int: - """Return dense embedding dimension.""" - return self._dense_backend.embedding_dim - - def embed_to_numpy(self, texts: str | Iterable[str]) -> np.ndarray: - """Generate dense embeddings (for BaseEmbedder compatibility). - - For cascade embeddings, use encode_cascade() instead. - - Args: - texts: Single text or iterable of texts - - Returns: - Dense embeddings of shape (n_texts, dense_dim) - """ - return self._dense_backend.embed_to_numpy(texts) - - def encode_cascade( - self, - texts: str | Iterable[str], - batch_size: int = 32, - ) -> Tuple[np.ndarray, np.ndarray]: - """Generate both binary and dense embeddings. - - Args: - texts: Single text or iterable of texts - batch_size: Batch size for processing - - Returns: - Tuple of: - - binary_embeddings: Shape (n_texts, 256), uint8 values 0/1 - - dense_embeddings: Shape (n_texts, 2048), float32 - """ - if isinstance(texts, str): - texts = [texts] - else: - texts = list(texts) - - binary_embeddings = self._binary_backend.embed_to_numpy(texts) - dense_embeddings = self._dense_backend.embed_to_numpy(texts) - - return binary_embeddings, dense_embeddings - - def encode_binary(self, texts: str | Iterable[str]) -> np.ndarray: - """Generate only binary embeddings. - - Args: - texts: Single text or iterable of texts - - Returns: - Binary embeddings of shape (n_texts, 256) - """ - return self._binary_backend.embed_to_numpy(texts) - - def encode_dense(self, texts: str | Iterable[str]) -> np.ndarray: - """Generate only dense embeddings. - - Args: - texts: Single text or iterable of texts - - Returns: - Dense embeddings of shape (n_texts, 2048) - """ - return self._dense_backend.embed_to_numpy(texts) - - def encode_binary_packed(self, texts: str | Iterable[str]) -> List[bytes]: - """Generate packed binary embeddings. - - Args: - texts: Single text or iterable of texts - - Returns: - List of packed bytes (32 bytes each) - """ - return self._binary_backend.embed_packed(texts) - - -# ============================================================================= -# Factory Function -# ============================================================================= - - -def get_cascade_embedder( - binary_model: Optional[str] = None, - dense_model: Optional[str] = None, - use_gpu: bool = True, -) -> CascadeEmbeddingBackend: - """Factory function to create a cascade embedder. - - Args: - binary_model: Model for binary embeddings (default: BAAI/bge-small-en-v1.5) - dense_model: Model for dense embeddings (default: BAAI/bge-large-en-v1.5) - use_gpu: Whether to use GPU acceleration - - Returns: - Configured CascadeEmbeddingBackend instance - - Example: - >>> embedder = get_cascade_embedder() - >>> binary, dense = embedder.encode_cascade(["hello world"]) - >>> binary.shape # (1, 256) - >>> dense.shape # (1, 2048) - """ - return CascadeEmbeddingBackend( - binary_model=binary_model, - dense_model=dense_model, - use_gpu=use_gpu, - ) diff --git a/codex-lens/src/codexlens/indexing/symbol_extractor.py b/codex-lens/src/codexlens/indexing/symbol_extractor.py deleted file mode 100644 index 45439e7b..00000000 --- a/codex-lens/src/codexlens/indexing/symbol_extractor.py +++ /dev/null @@ -1,277 +0,0 @@ -"""Symbol and relationship extraction from source code.""" -import re -import sqlite3 -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple - -try: - from codexlens.parsers.treesitter_parser import TreeSitterSymbolParser -except Exception: # pragma: no cover - optional dependency / platform variance - TreeSitterSymbolParser = None # type: ignore[assignment] - - -class SymbolExtractor: - """Extract symbols and relationships from source code using regex patterns.""" - - # Pattern definitions for different languages - PATTERNS = { - 'python': { - 'function': r'^(?:async\s+)?def\s+(\w+)\s*\(', - 'class': r'^class\s+(\w+)\s*[:\(]', - 'import': r'^(?:from\s+([\w.]+)\s+)?import\s+([\w.,\s]+)', - 'call': r'(? None: - """Connect to database and ensure schema exists.""" - self.db_conn = sqlite3.connect(str(self.db_path)) - self._ensure_tables() - - def __enter__(self) -> "SymbolExtractor": - """Context manager entry: connect to database.""" - self.connect() - return self - - def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: - """Context manager exit: close database connection.""" - self.close() - - def _ensure_tables(self) -> None: - """Create symbols and relationships tables if they don't exist.""" - if not self.db_conn: - return - cursor = self.db_conn.cursor() - - # Create symbols table with qualified_name - cursor.execute(''' - CREATE TABLE IF NOT EXISTS symbols ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - qualified_name TEXT NOT NULL, - name TEXT NOT NULL, - kind TEXT NOT NULL, - file_path TEXT NOT NULL, - start_line INTEGER NOT NULL, - end_line INTEGER NOT NULL, - UNIQUE(file_path, name, start_line) - ) - ''') - - # Create relationships table with target_symbol_fqn - cursor.execute(''' - CREATE TABLE IF NOT EXISTS symbol_relationships ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - source_symbol_id INTEGER NOT NULL, - target_symbol_fqn TEXT NOT NULL, - relationship_type TEXT NOT NULL, - file_path TEXT NOT NULL, - line INTEGER, - FOREIGN KEY (source_symbol_id) REFERENCES symbols(id) ON DELETE CASCADE - ) - ''') - - # Create performance indexes - cursor.execute('CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name)') - cursor.execute('CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file_path)') - cursor.execute('CREATE INDEX IF NOT EXISTS idx_rel_source ON symbol_relationships(source_symbol_id)') - cursor.execute('CREATE INDEX IF NOT EXISTS idx_rel_target ON symbol_relationships(target_symbol_fqn)') - cursor.execute('CREATE INDEX IF NOT EXISTS idx_rel_type ON symbol_relationships(relationship_type)') - - self.db_conn.commit() - - def extract_from_file(self, file_path: Path, content: str) -> Tuple[List[Dict], List[Dict]]: - """Extract symbols and relationships from file content. - - Args: - file_path: Path to the source file - content: File content as string - - Returns: - Tuple of (symbols, relationships) where: - - symbols: List of symbol dicts with qualified_name, name, kind, file_path, start_line, end_line - - relationships: List of relationship dicts with source_scope, target, type, file_path, line - """ - ext = file_path.suffix.lower() - lang = self.LANGUAGE_MAP.get(ext) - - if not lang or lang not in self.PATTERNS: - return [], [] - - patterns = self.PATTERNS[lang] - symbols = [] - relationships: List[Dict] = [] - lines = content.split('\n') - - current_scope = None - - for line_num, line in enumerate(lines, 1): - # Extract function/class definitions - for kind in ['function', 'class']: - if kind in patterns: - match = re.search(patterns[kind], line) - if match: - name = match.group(1) - qualified_name = f"{file_path.stem}.{name}" - symbols.append({ - 'qualified_name': qualified_name, - 'name': name, - 'kind': kind, - 'file_path': str(file_path), - 'start_line': line_num, - 'end_line': line_num, # Simplified - would need proper parsing for actual end - }) - current_scope = name - - if TreeSitterSymbolParser is not None: - try: - ts_parser = TreeSitterSymbolParser(lang, file_path) - if ts_parser.is_available(): - indexed = ts_parser.parse(content, file_path) - if indexed is not None and indexed.relationships: - relationships = [ - { - "source_scope": r.source_symbol, - "target": r.target_symbol, - "type": r.relationship_type.value, - "file_path": str(file_path), - "line": r.source_line, - } - for r in indexed.relationships - ] - except Exception: - relationships = [] - - # Regex fallback for relationships (when tree-sitter is unavailable) - if not relationships: - current_scope = None - for line_num, line in enumerate(lines, 1): - for kind in ['function', 'class']: - if kind in patterns: - match = re.search(patterns[kind], line) - if match: - current_scope = match.group(1) - - # Extract imports - if 'import' in patterns: - match = re.search(patterns['import'], line) - if match: - import_target = match.group(1) or match.group(2) if match.lastindex >= 2 else match.group(1) - if import_target and current_scope: - relationships.append({ - 'source_scope': current_scope, - 'target': import_target.strip(), - 'type': 'imports', - 'file_path': str(file_path), - 'line': line_num, - }) - - # Extract function calls (simplified) - if 'call' in patterns and current_scope: - for match in re.finditer(patterns['call'], line): - call_name = match.group(1) - # Skip common keywords and the current function - if call_name not in ['if', 'for', 'while', 'return', 'print', 'len', 'str', 'int', 'float', 'list', 'dict', 'set', 'tuple', current_scope]: - relationships.append({ - 'source_scope': current_scope, - 'target': call_name, - 'type': 'calls', - 'file_path': str(file_path), - 'line': line_num, - }) - - return symbols, relationships - - def save_symbols(self, symbols: List[Dict]) -> Dict[str, int]: - """Save symbols to database and return name->id mapping. - - Args: - symbols: List of symbol dicts with qualified_name, name, kind, file_path, start_line, end_line - - Returns: - Dictionary mapping symbol name to database id - """ - if not self.db_conn or not symbols: - return {} - - cursor = self.db_conn.cursor() - name_to_id = {} - - for sym in symbols: - try: - cursor.execute(''' - INSERT OR IGNORE INTO symbols - (qualified_name, name, kind, file_path, start_line, end_line) - VALUES (?, ?, ?, ?, ?, ?) - ''', (sym['qualified_name'], sym['name'], sym['kind'], - sym['file_path'], sym['start_line'], sym['end_line'])) - - # Get the id - cursor.execute(''' - SELECT id FROM symbols - WHERE file_path = ? AND name = ? AND start_line = ? - ''', (sym['file_path'], sym['name'], sym['start_line'])) - - row = cursor.fetchone() - if row: - name_to_id[sym['name']] = row[0] - except sqlite3.Error: - continue - - self.db_conn.commit() - return name_to_id - - def save_relationships(self, relationships: List[Dict], name_to_id: Dict[str, int]) -> None: - """Save relationships to database. - - Args: - relationships: List of relationship dicts with source_scope, target, type, file_path, line - name_to_id: Dictionary mapping symbol names to database ids - """ - if not self.db_conn or not relationships: - return - - cursor = self.db_conn.cursor() - - for rel in relationships: - source_id = name_to_id.get(rel['source_scope']) - if source_id: - try: - cursor.execute(''' - INSERT INTO symbol_relationships - (source_symbol_id, target_symbol_fqn, relationship_type, file_path, line) - VALUES (?, ?, ?, ?, ?) - ''', (source_id, rel['target'], rel['type'], rel['file_path'], rel['line'])) - except sqlite3.Error: - continue - - self.db_conn.commit() - - def close(self) -> None: - """Close database connection.""" - if self.db_conn: - self.db_conn.close() - self.db_conn = None diff --git a/codex-lens/src/codexlens/lsp/__init__.py b/codex-lens/src/codexlens/lsp/__init__.py deleted file mode 100644 index e2c851e2..00000000 --- a/codex-lens/src/codexlens/lsp/__init__.py +++ /dev/null @@ -1,34 +0,0 @@ -"""LSP module for real-time language server integration. - -This module provides: -- LspBridge: HTTP bridge to VSCode language servers -- LspGraphBuilder: Build code association graphs via LSP -- Location: Position in a source file - -Example: - >>> from codexlens.lsp import LspBridge, LspGraphBuilder - >>> - >>> async with LspBridge() as bridge: - ... refs = await bridge.get_references(symbol) - ... graph = await LspGraphBuilder().build_from_seeds(seeds, bridge) -""" - -from codexlens.lsp.lsp_bridge import ( - CacheEntry, - Location, - LspBridge, -) -from codexlens.lsp.lsp_graph_builder import ( - LspGraphBuilder, -) - -# Alias for backward compatibility -GraphBuilder = LspGraphBuilder - -__all__ = [ - "CacheEntry", - "GraphBuilder", - "Location", - "LspBridge", - "LspGraphBuilder", -] diff --git a/codex-lens/src/codexlens/lsp/handlers.py b/codex-lens/src/codexlens/lsp/handlers.py deleted file mode 100644 index 3fb17e40..00000000 --- a/codex-lens/src/codexlens/lsp/handlers.py +++ /dev/null @@ -1,551 +0,0 @@ -"""LSP request handlers for codex-lens. - -This module contains handlers for LSP requests: -- textDocument/definition -- textDocument/completion -- workspace/symbol -- textDocument/didSave -- textDocument/hover -""" - -from __future__ import annotations - -import logging -import re -from pathlib import Path -from typing import List, Optional, Union -from urllib.parse import quote, unquote - -try: - from lsprotocol import types as lsp -except ImportError as exc: - raise ImportError( - "LSP dependencies not installed. Install with: pip install codex-lens[lsp]" - ) from exc - -from codexlens.entities import Symbol -from codexlens.lsp.server import server - -logger = logging.getLogger(__name__) - -# Symbol kind mapping from codex-lens to LSP -SYMBOL_KIND_MAP = { - "class": lsp.SymbolKind.Class, - "function": lsp.SymbolKind.Function, - "method": lsp.SymbolKind.Method, - "variable": lsp.SymbolKind.Variable, - "constant": lsp.SymbolKind.Constant, - "property": lsp.SymbolKind.Property, - "field": lsp.SymbolKind.Field, - "interface": lsp.SymbolKind.Interface, - "module": lsp.SymbolKind.Module, - "namespace": lsp.SymbolKind.Namespace, - "package": lsp.SymbolKind.Package, - "enum": lsp.SymbolKind.Enum, - "enum_member": lsp.SymbolKind.EnumMember, - "struct": lsp.SymbolKind.Struct, - "type": lsp.SymbolKind.TypeParameter, - "type_alias": lsp.SymbolKind.TypeParameter, -} - -# Completion kind mapping from codex-lens to LSP -COMPLETION_KIND_MAP = { - "class": lsp.CompletionItemKind.Class, - "function": lsp.CompletionItemKind.Function, - "method": lsp.CompletionItemKind.Method, - "variable": lsp.CompletionItemKind.Variable, - "constant": lsp.CompletionItemKind.Constant, - "property": lsp.CompletionItemKind.Property, - "field": lsp.CompletionItemKind.Field, - "interface": lsp.CompletionItemKind.Interface, - "module": lsp.CompletionItemKind.Module, - "enum": lsp.CompletionItemKind.Enum, - "enum_member": lsp.CompletionItemKind.EnumMember, - "struct": lsp.CompletionItemKind.Struct, - "type": lsp.CompletionItemKind.TypeParameter, - "type_alias": lsp.CompletionItemKind.TypeParameter, -} - - -def _path_to_uri(path: Union[str, Path]) -> str: - """Convert a file path to a URI. - - Args: - path: File path (string or Path object) - - Returns: - File URI string - """ - path_str = str(Path(path).resolve()) - # Handle Windows paths - if path_str.startswith("/"): - return f"file://{quote(path_str)}" - else: - return f"file:///{quote(path_str.replace(chr(92), '/'))}" - - -def _uri_to_path(uri: str) -> Path: - """Convert a URI to a file path. - - Args: - uri: File URI string - - Returns: - Path object - """ - path = uri.replace("file:///", "").replace("file://", "") - return Path(unquote(path)) - - -def _get_word_at_position(document_text: str, line: int, character: int) -> Optional[str]: - """Extract the word at the given position in the document. - - Args: - document_text: Full document text - line: 0-based line number - character: 0-based character position - - Returns: - Word at position, or None if no word found - """ - lines = document_text.splitlines() - if line >= len(lines): - return None - - line_text = lines[line] - if character > len(line_text): - return None - - # Find word boundaries - word_pattern = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*") - for match in word_pattern.finditer(line_text): - if match.start() <= character <= match.end(): - return match.group() - - return None - - -def _get_prefix_at_position(document_text: str, line: int, character: int) -> str: - """Extract the incomplete word prefix at the given position. - - Args: - document_text: Full document text - line: 0-based line number - character: 0-based character position - - Returns: - Prefix string (may be empty) - """ - lines = document_text.splitlines() - if line >= len(lines): - return "" - - line_text = lines[line] - if character > len(line_text): - character = len(line_text) - - # Extract text before cursor - before_cursor = line_text[:character] - - # Find the start of the current word - match = re.search(r"[a-zA-Z_][a-zA-Z0-9_]*$", before_cursor) - if match: - return match.group() - - return "" - - -def symbol_to_location(symbol: Symbol) -> Optional[lsp.Location]: - """Convert a codex-lens Symbol to an LSP Location. - - Args: - symbol: codex-lens Symbol object - - Returns: - LSP Location, or None if symbol has no file - """ - if not symbol.file: - return None - - # LSP uses 0-based lines, codex-lens uses 1-based - start_line = max(0, symbol.range[0] - 1) - end_line = max(0, symbol.range[1] - 1) - - return lsp.Location( - uri=_path_to_uri(symbol.file), - range=lsp.Range( - start=lsp.Position(line=start_line, character=0), - end=lsp.Position(line=end_line, character=0), - ), - ) - - -def _symbol_kind_to_lsp(kind: str) -> lsp.SymbolKind: - """Map codex-lens symbol kind to LSP SymbolKind. - - Args: - kind: codex-lens symbol kind string - - Returns: - LSP SymbolKind - """ - return SYMBOL_KIND_MAP.get(kind.lower(), lsp.SymbolKind.Variable) - - -def _symbol_kind_to_completion_kind(kind: str) -> lsp.CompletionItemKind: - """Map codex-lens symbol kind to LSP CompletionItemKind. - - Args: - kind: codex-lens symbol kind string - - Returns: - LSP CompletionItemKind - """ - return COMPLETION_KIND_MAP.get(kind.lower(), lsp.CompletionItemKind.Text) - - -# ----------------------------------------------------------------------------- -# LSP Request Handlers -# ----------------------------------------------------------------------------- - - -@server.feature(lsp.TEXT_DOCUMENT_DEFINITION) -def lsp_definition( - params: lsp.DefinitionParams, -) -> Optional[Union[lsp.Location, List[lsp.Location]]]: - """Handle textDocument/definition request. - - Finds the definition of the symbol at the cursor position. - """ - if not server.global_index: - logger.debug("No global index available for definition lookup") - return None - - # Get document - document = server.workspace.get_text_document(params.text_document.uri) - if not document: - return None - - # Get word at position - word = _get_word_at_position( - document.source, - params.position.line, - params.position.character, - ) - - if not word: - logger.debug("No word found at position") - return None - - logger.debug("Looking up definition for: %s", word) - - # Search for exact symbol match - try: - symbols = server.global_index.search( - name=word, - limit=10, - prefix_mode=False, # Exact match preferred - ) - - # Filter for exact name match - exact_matches = [s for s in symbols if s.name == word] - if not exact_matches: - # Fall back to prefix search - symbols = server.global_index.search( - name=word, - limit=10, - prefix_mode=True, - ) - exact_matches = [s for s in symbols if s.name == word] - - if not exact_matches: - logger.debug("No definition found for: %s", word) - return None - - # Convert to LSP locations - locations = [] - for sym in exact_matches: - loc = symbol_to_location(sym) - if loc: - locations.append(loc) - - if len(locations) == 1: - return locations[0] - elif locations: - return locations - else: - return None - - except Exception as exc: - logger.error("Error looking up definition: %s", exc) - return None - - -@server.feature(lsp.TEXT_DOCUMENT_REFERENCES) -def lsp_references(params: lsp.ReferenceParams) -> Optional[List[lsp.Location]]: - """Handle textDocument/references request. - - Finds all references to the symbol at the cursor position using - the code_relationships table for accurate call-site tracking. - Falls back to same-name symbol search if search_engine is unavailable. - """ - document = server.workspace.get_text_document(params.text_document.uri) - if not document: - return None - - word = _get_word_at_position( - document.source, - params.position.line, - params.position.character, - ) - - if not word: - return None - - logger.debug("Finding references for: %s", word) - - try: - # Try using search_engine.search_references() for accurate reference tracking - if server.search_engine and server.workspace_root: - references = server.search_engine.search_references( - symbol_name=word, - source_path=server.workspace_root, - limit=200, - ) - - if references: - locations = [] - for ref in references: - locations.append( - lsp.Location( - uri=_path_to_uri(ref.file_path), - range=lsp.Range( - start=lsp.Position( - line=max(0, ref.line - 1), - character=ref.column, - ), - end=lsp.Position( - line=max(0, ref.line - 1), - character=ref.column + len(word), - ), - ), - ) - ) - return locations if locations else None - - # Fallback: search for symbols with same name using global_index - if server.global_index: - symbols = server.global_index.search( - name=word, - limit=100, - prefix_mode=False, - ) - - # Filter for exact matches - exact_matches = [s for s in symbols if s.name == word] - - locations = [] - for sym in exact_matches: - loc = symbol_to_location(sym) - if loc: - locations.append(loc) - - return locations if locations else None - - return None - - except Exception as exc: - logger.error("Error finding references: %s", exc) - return None - - -@server.feature(lsp.TEXT_DOCUMENT_COMPLETION) -def lsp_completion(params: lsp.CompletionParams) -> Optional[lsp.CompletionList]: - """Handle textDocument/completion request. - - Provides code completion suggestions based on indexed symbols. - """ - if not server.global_index: - return None - - document = server.workspace.get_text_document(params.text_document.uri) - if not document: - return None - - prefix = _get_prefix_at_position( - document.source, - params.position.line, - params.position.character, - ) - - if not prefix or len(prefix) < 2: - # Require at least 2 characters for completion - return None - - logger.debug("Completing prefix: %s", prefix) - - try: - symbols = server.global_index.search( - name=prefix, - limit=50, - prefix_mode=True, - ) - - if not symbols: - return None - - # Convert to completion items - items = [] - seen_names = set() - - for sym in symbols: - if sym.name in seen_names: - continue - seen_names.add(sym.name) - - items.append( - lsp.CompletionItem( - label=sym.name, - kind=_symbol_kind_to_completion_kind(sym.kind), - detail=f"{sym.kind} - {Path(sym.file).name if sym.file else 'unknown'}", - sort_text=sym.name.lower(), - ) - ) - - return lsp.CompletionList( - is_incomplete=len(symbols) >= 50, - items=items, - ) - - except Exception as exc: - logger.error("Error getting completions: %s", exc) - return None - - -@server.feature(lsp.TEXT_DOCUMENT_HOVER) -def lsp_hover(params: lsp.HoverParams) -> Optional[lsp.Hover]: - """Handle textDocument/hover request. - - Provides hover information for the symbol at the cursor position - using HoverProvider for rich symbol information including - signature, documentation, and location. - """ - if not server.global_index: - return None - - document = server.workspace.get_text_document(params.text_document.uri) - if not document: - return None - - word = _get_word_at_position( - document.source, - params.position.line, - params.position.character, - ) - - if not word: - return None - - logger.debug("Hover for: %s", word) - - try: - # Use HoverProvider for rich symbol information - from codexlens.lsp.providers import HoverProvider - - provider = HoverProvider(server.global_index, server.registry) - info = provider.get_hover_info(word) - - if not info: - return None - - # Format as markdown with signature and location - content = provider.format_hover_markdown(info) - - return lsp.Hover( - contents=lsp.MarkupContent( - kind=lsp.MarkupKind.Markdown, - value=content, - ), - ) - - except Exception as exc: - logger.error("Error getting hover info: %s", exc) - return None - - -@server.feature(lsp.WORKSPACE_SYMBOL) -def lsp_workspace_symbol( - params: lsp.WorkspaceSymbolParams, -) -> Optional[List[lsp.SymbolInformation]]: - """Handle workspace/symbol request. - - Searches for symbols across the workspace. - """ - if not server.global_index: - return None - - query = params.query - if not query or len(query) < 2: - return None - - logger.debug("Workspace symbol search: %s", query) - - try: - symbols = server.global_index.search( - name=query, - limit=100, - prefix_mode=True, - ) - - if not symbols: - return None - - result = [] - for sym in symbols: - loc = symbol_to_location(sym) - if loc: - result.append( - lsp.SymbolInformation( - name=sym.name, - kind=_symbol_kind_to_lsp(sym.kind), - location=loc, - container_name=Path(sym.file).parent.name if sym.file else None, - ) - ) - - return result if result else None - - except Exception as exc: - logger.error("Error searching workspace symbols: %s", exc) - return None - - -@server.feature(lsp.TEXT_DOCUMENT_DID_SAVE) -def lsp_did_save(params: lsp.DidSaveTextDocumentParams) -> None: - """Handle textDocument/didSave notification. - - Triggers incremental re-indexing of the saved file. - Note: Full incremental indexing requires WatcherManager integration, - which is planned for Phase 2. - """ - file_path = _uri_to_path(params.text_document.uri) - logger.info("File saved: %s", file_path) - - # Phase 1: Just log the save event - # Phase 2 will integrate with WatcherManager for incremental indexing - # if server.watcher_manager: - # server.watcher_manager.trigger_reindex(file_path) - - -@server.feature(lsp.TEXT_DOCUMENT_DID_OPEN) -def lsp_did_open(params: lsp.DidOpenTextDocumentParams) -> None: - """Handle textDocument/didOpen notification.""" - file_path = _uri_to_path(params.text_document.uri) - logger.debug("File opened: %s", file_path) - - -@server.feature(lsp.TEXT_DOCUMENT_DID_CLOSE) -def lsp_did_close(params: lsp.DidCloseTextDocumentParams) -> None: - """Handle textDocument/didClose notification.""" - file_path = _uri_to_path(params.text_document.uri) - logger.debug("File closed: %s", file_path) diff --git a/codex-lens/src/codexlens/lsp/keepalive_bridge.py b/codex-lens/src/codexlens/lsp/keepalive_bridge.py deleted file mode 100644 index a6d3f819..00000000 --- a/codex-lens/src/codexlens/lsp/keepalive_bridge.py +++ /dev/null @@ -1,135 +0,0 @@ -"""Keep-alive wrapper for Standalone LSP servers in synchronous workflows. - -The staged realtime pipeline calls into LSP from synchronous code paths. -Creating a fresh asyncio loop per query (via asyncio.run) forces language -servers to start/stop every time, which is slow and can trigger shutdown -timeouts on Windows. - -This module runs an asyncio event loop in a background thread and keeps a -single LspBridge (and its StandaloneLspManager + subprocesses) alive across -multiple queries. Callers submit coroutines that operate on the shared bridge. -""" - -from __future__ import annotations - -import atexit -import asyncio -import threading -from dataclasses import dataclass -from typing import Awaitable, Callable, Optional, TypeVar - -from codexlens.lsp.lsp_bridge import LspBridge - -T = TypeVar("T") - - -@dataclass(frozen=True) -class KeepAliveKey: - workspace_root: str - config_file: Optional[str] - timeout: float - - -class KeepAliveLspBridge: - """Runs a shared LspBridge on a dedicated event loop thread.""" - - def __init__(self, *, workspace_root: str, config_file: Optional[str], timeout: float) -> None: - self._key = KeepAliveKey(workspace_root=workspace_root, config_file=config_file, timeout=float(timeout)) - self._lock = threading.RLock() - self._call_lock = threading.RLock() - self._ready = threading.Event() - self._thread: Optional[threading.Thread] = None - self._loop: Optional[asyncio.AbstractEventLoop] = None - self._bridge: Optional[LspBridge] = None - self._stopped = False - - atexit.register(self.stop) - - @property - def key(self) -> KeepAliveKey: - return self._key - - def start(self) -> None: - with self._lock: - if self._stopped: - raise RuntimeError("KeepAliveLspBridge is stopped") - if self._thread is not None and self._thread.is_alive(): - return - - self._ready.clear() - thread = threading.Thread(target=self._run, name="codexlens-lsp-keepalive", daemon=True) - self._thread = thread - thread.start() - - if not self._ready.wait(timeout=10.0): - raise RuntimeError("Timed out starting LSP keep-alive loop") - - def stop(self) -> None: - with self._lock: - if self._stopped: - return - self._stopped = True - loop = self._loop - bridge = self._bridge - thread = self._thread - - if loop is not None and bridge is not None: - try: - fut = asyncio.run_coroutine_threadsafe(bridge.close(), loop) - fut.result(timeout=5.0) - except Exception: - pass - try: - loop.call_soon_threadsafe(loop.stop) - except Exception: - pass - - if thread is not None: - try: - thread.join(timeout=5.0) - except Exception: - pass - - def run(self, fn: Callable[[LspBridge], Awaitable[T]], *, timeout: Optional[float] = None) -> T: - """Run an async function against the shared LspBridge and return its result.""" - self.start() - loop = self._loop - bridge = self._bridge - if loop is None or bridge is None: - raise RuntimeError("Keep-alive loop not initialized") - - async def _call() -> T: - return await fn(bridge) - - # Serialize bridge usage to avoid overlapping LSP request storms. - with self._call_lock: - fut = asyncio.run_coroutine_threadsafe(_call(), loop) - return fut.result(timeout=float(timeout or self._key.timeout) + 1.0) - - def _run(self) -> None: - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - bridge = LspBridge( - workspace_root=self._key.workspace_root, - config_file=self._key.config_file, - timeout=self._key.timeout, - ) - - with self._lock: - self._loop = loop - self._bridge = bridge - self._ready.set() - - try: - loop.run_forever() - finally: - try: - if self._bridge is not None: - loop.run_until_complete(self._bridge.close()) - except Exception: - pass - try: - loop.close() - except Exception: - pass - diff --git a/codex-lens/src/codexlens/lsp/lsp-servers.json b/codex-lens/src/codexlens/lsp/lsp-servers.json deleted file mode 100644 index bfc21fb9..00000000 --- a/codex-lens/src/codexlens/lsp/lsp-servers.json +++ /dev/null @@ -1,88 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "version": "1.0.0", - "description": "Default language server configuration for codex-lens standalone LSP client", - "servers": [ - { - "languageId": "python", - "displayName": "Pyright", - "extensions": ["py", "pyi"], - "command": ["pyright-langserver", "--stdio"], - "enabled": true, - "initializationOptions": { - "pythonPath": "", - "pythonPlatform": "", - "pythonVersion": "3.13" - }, - "settings": { - "python.analysis": { - "typeCheckingMode": "standard", - "diagnosticMode": "workspace", - "exclude": ["**/node_modules", "**/__pycache__", "build", "dist"], - "include": ["src/**", "tests/**"], - "stubPath": "typings" - } - } - }, - { - "languageId": "typescript", - "displayName": "TypeScript Language Server", - "extensions": ["ts", "tsx"], - "command": ["typescript-language-server", "--stdio"], - "enabled": true, - "initializationOptions": {}, - "settings": {} - }, - { - "languageId": "javascript", - "displayName": "TypeScript Language Server (for JS)", - "extensions": ["js", "jsx", "mjs", "cjs"], - "command": ["typescript-language-server", "--stdio"], - "enabled": true, - "initializationOptions": {}, - "settings": {} - }, - { - "languageId": "go", - "displayName": "Gopls", - "extensions": ["go"], - "command": ["gopls", "serve"], - "enabled": true, - "initializationOptions": {}, - "settings": {} - }, - { - "languageId": "rust", - "displayName": "Rust Analyzer", - "extensions": ["rs"], - "command": ["rust-analyzer"], - "enabled": false, - "initializationOptions": {}, - "settings": {} - }, - { - "languageId": "c", - "displayName": "Clangd", - "extensions": ["c", "h"], - "command": ["clangd"], - "enabled": false, - "initializationOptions": {}, - "settings": {} - }, - { - "languageId": "cpp", - "displayName": "Clangd", - "extensions": ["cpp", "hpp", "cc", "cxx"], - "command": ["clangd"], - "enabled": false, - "initializationOptions": {}, - "settings": {} - } - ], - "defaults": { - "rootDir": ".", - "timeout": 30000, - "restartInterval": 5000, - "maxRestarts": 3 - } -} diff --git a/codex-lens/src/codexlens/lsp/lsp_bridge.py b/codex-lens/src/codexlens/lsp/lsp_bridge.py deleted file mode 100644 index 63b30830..00000000 --- a/codex-lens/src/codexlens/lsp/lsp_bridge.py +++ /dev/null @@ -1,857 +0,0 @@ -"""LspBridge service for real-time LSP communication with caching. - -This module provides a bridge to communicate with language servers either via: -1. Standalone LSP Manager (direct subprocess communication - default) -2. VSCode Bridge extension (HTTP-based, legacy mode) - -Features: -- Direct communication with language servers (no VSCode dependency) -- Cache with TTL and file modification time invalidation -- Graceful error handling with empty results on failure -- Support for definition, references, hover, and call hierarchy -""" - -from __future__ import annotations - -import asyncio -import logging -import os -import time -from collections import OrderedDict -from dataclasses import dataclass, field -from pathlib import Path -from typing import Any, Dict, List, Optional, TYPE_CHECKING -from urllib.parse import unquote - -logger = logging.getLogger(__name__) - -if TYPE_CHECKING: - from codexlens.lsp.standalone_manager import StandaloneLspManager - -# Check for optional dependencies -try: - import aiohttp - HAS_AIOHTTP = True -except ImportError: - HAS_AIOHTTP = False - -from codexlens.hybrid_search.data_structures import ( - CallHierarchyItem, - CodeSymbolNode, - Range, -) - - -@dataclass -class Location: - """A location in a source file (LSP response format).""" - - file_path: str - line: int - character: int - - def to_dict(self) -> Dict[str, Any]: - """Convert to dictionary format.""" - return { - "file_path": self.file_path, - "line": self.line, - "character": self.character, - } - - @classmethod - def from_lsp_response(cls, data: Dict[str, Any]) -> "Location": - """Create Location from LSP response format. - - Handles both direct format and VSCode URI format. - """ - # Handle VSCode URI format (file:///path/to/file) - uri = data.get("uri", data.get("file_path", "")) - if uri.startswith("file://"): - # Strip scheme and decode percent-encoding (e.g. file:///d%3A/...). - # Keep behavior compatible with both Windows and Unix paths. - raw = unquote(uri[7:]) # keep leading slash for Unix paths - # Windows: file:///C:/... or file:///c%3A/... -> C:/... - if raw.startswith("/") and len(raw) > 2 and raw[2] == ":": - raw = raw[1:] - file_path = raw - else: - file_path = uri - - # Get position from range or direct fields - if "range" in data: - range_data = data["range"] - start = range_data.get("start", {}) - line = start.get("line", 0) + 1 # LSP is 0-based, convert to 1-based - character = start.get("character", 0) + 1 - else: - line = data.get("line", 1) - character = data.get("character", 1) - - return cls(file_path=file_path, line=line, character=character) - - -@dataclass -class CacheEntry: - """A cached LSP response with expiration metadata. - - Attributes: - data: The cached response data - file_mtime: File modification time when cached (for invalidation) - cached_at: Unix timestamp when entry was cached - """ - - data: Any - file_mtime: float - cached_at: float - - -class LspBridge: - """Bridge for real-time LSP communication with language servers. - - By default, uses StandaloneLspManager to directly spawn and communicate - with language servers via JSON-RPC over stdio. No VSCode dependency required. - - For legacy mode, can use VSCode Bridge HTTP server (set use_vscode_bridge=True). - - Features: - - Direct language server communication (default) - - Response caching with TTL and file modification invalidation - - Timeout handling - - Graceful error handling returning empty results - - Example: - # Default: standalone mode (no VSCode needed) - async with LspBridge() as bridge: - refs = await bridge.get_references(symbol) - definition = await bridge.get_definition(symbol) - - # Legacy: VSCode Bridge mode - async with LspBridge(use_vscode_bridge=True) as bridge: - refs = await bridge.get_references(symbol) - """ - - DEFAULT_BRIDGE_URL = "http://127.0.0.1:3457" - DEFAULT_TIMEOUT = 30.0 # seconds (increased for standalone mode) - DEFAULT_CACHE_TTL = 300 # 5 minutes - DEFAULT_MAX_CACHE_SIZE = 1000 # Maximum cache entries - - def __init__( - self, - bridge_url: str = DEFAULT_BRIDGE_URL, - timeout: float = DEFAULT_TIMEOUT, - cache_ttl: int = DEFAULT_CACHE_TTL, - max_cache_size: int = DEFAULT_MAX_CACHE_SIZE, - use_vscode_bridge: bool = False, - workspace_root: Optional[str] = None, - config_file: Optional[str] = None, - ): - """Initialize LspBridge. - - Args: - bridge_url: URL of the VSCode Bridge HTTP server (legacy mode only) - timeout: Request timeout in seconds - cache_ttl: Cache time-to-live in seconds - max_cache_size: Maximum number of cache entries (LRU eviction) - use_vscode_bridge: If True, use VSCode Bridge HTTP mode (requires aiohttp) - workspace_root: Root directory for standalone LSP manager - config_file: Path to lsp-servers.json configuration file - """ - self.bridge_url = bridge_url - self.timeout = timeout - self.cache_ttl = cache_ttl - self.max_cache_size = max_cache_size - self.use_vscode_bridge = use_vscode_bridge - self.workspace_root = workspace_root - self.config_file = config_file - - self.cache: OrderedDict[str, CacheEntry] = OrderedDict() - - # VSCode Bridge mode (legacy) - self._session: Optional["aiohttp.ClientSession"] = None - - # Standalone mode (default) - self._manager: Optional["StandaloneLspManager"] = None - self._manager_started = False - - # Validate dependencies - if use_vscode_bridge and not HAS_AIOHTTP: - raise ImportError( - "aiohttp is required for VSCode Bridge mode: pip install aiohttp" - ) - - async def _ensure_manager(self) -> "StandaloneLspManager": - """Ensure standalone LSP manager is started.""" - if self._manager is None: - from codexlens.lsp.standalone_manager import StandaloneLspManager - self._manager = StandaloneLspManager( - workspace_root=self.workspace_root, - config_file=self.config_file, - timeout=self.timeout, - ) - - if not self._manager_started: - await self._manager.start() - self._manager_started = True - - return self._manager - - async def _get_session(self) -> "aiohttp.ClientSession": - """Get or create the aiohttp session (VSCode Bridge mode only).""" - if not HAS_AIOHTTP: - raise ImportError("aiohttp required for VSCode Bridge mode") - - if self._session is None or self._session.closed: - timeout = aiohttp.ClientTimeout(total=self.timeout) - self._session = aiohttp.ClientSession(timeout=timeout) - return self._session - - async def close(self) -> None: - """Close connections and cleanup resources.""" - # Close VSCode Bridge session - if self._session and not self._session.closed: - await self._session.close() - self._session = None - - # Stop standalone manager - if self._manager and self._manager_started: - await self._manager.stop() - self._manager_started = False - - def _get_file_mtime(self, file_path: str) -> float: - """Get file modification time, or 0 if file doesn't exist.""" - try: - return os.path.getmtime(file_path) - except OSError: - return 0.0 - - def _is_cached(self, cache_key: str, file_path: str) -> bool: - """Check if cache entry is valid. - - Cache is invalid if: - - Entry doesn't exist - - TTL has expired - - File has been modified since caching - - Args: - cache_key: The cache key to check - file_path: Path to source file for mtime check - - Returns: - True if cache is valid and can be used - """ - if cache_key not in self.cache: - return False - - entry = self.cache[cache_key] - now = time.time() - - # Check TTL - if now - entry.cached_at > self.cache_ttl: - del self.cache[cache_key] - return False - - # Check file modification time - current_mtime = self._get_file_mtime(file_path) - if current_mtime != entry.file_mtime: - del self.cache[cache_key] - return False - - # Move to end on access (LRU behavior) - self.cache.move_to_end(cache_key) - return True - - def _cache(self, key: str, file_path: str, data: Any) -> None: - """Store data in cache with LRU eviction. - - Args: - key: Cache key - file_path: Path to source file (for mtime tracking) - data: Data to cache - """ - # Remove oldest entries if at capacity - while len(self.cache) >= self.max_cache_size: - self.cache.popitem(last=False) # Remove oldest (FIFO order) - - # Move to end if key exists (update access order) - if key in self.cache: - self.cache.move_to_end(key) - - self.cache[key] = CacheEntry( - data=data, - file_mtime=self._get_file_mtime(file_path), - cached_at=time.time(), - ) - - def clear_cache(self) -> None: - """Clear all cached entries.""" - self.cache.clear() - - async def _request_vscode_bridge(self, action: str, params: Dict[str, Any]) -> Any: - """Make HTTP request to VSCode Bridge (legacy mode). - - Args: - action: The endpoint/action name (e.g., "get_definition") - params: Request parameters - - Returns: - Response data on success, None on failure - """ - url = f"{self.bridge_url}/{action}" - - try: - session = await self._get_session() - async with session.post(url, json=params) as response: - if response.status != 200: - return None - - data = await response.json() - if data.get("success") is False: - return None - - return data.get("result") - - except asyncio.TimeoutError: - return None - except Exception: - return None - - async def get_references(self, symbol: CodeSymbolNode) -> List[Location]: - """Get all references to a symbol via real-time LSP. - - Args: - symbol: The code symbol to find references for - - Returns: - List of Location objects where the symbol is referenced. - Returns empty list on error or timeout. - """ - cache_key = f"refs:{symbol.id}" - - if self._is_cached(cache_key, symbol.file_path): - return self.cache[cache_key].data - - locations: List[Location] = [] - - if self.use_vscode_bridge: - # Legacy: VSCode Bridge HTTP mode - result = await self._request_vscode_bridge("get_references", { - "file_path": symbol.file_path, - "line": symbol.range.start_line, - "character": symbol.range.start_character, - }) - - # Don't cache on connection error (result is None) - if result is None: - return locations - - if isinstance(result, list): - for item in result: - try: - locations.append(Location.from_lsp_response(item)) - except (KeyError, TypeError): - continue - else: - # Default: Standalone mode - manager = await self._ensure_manager() - result = await manager.get_references( - file_path=symbol.file_path, - line=symbol.range.start_line, - character=symbol.range.start_character, - ) - - for item in result: - try: - locations.append(Location.from_lsp_response(item)) - except (KeyError, TypeError): - continue - - logger.debug( - "LSP references for %s (%s:%s:%s): %d", - symbol.id, - symbol.file_path, - symbol.range.start_line, - symbol.range.start_character, - len(locations), - ) - self._cache(cache_key, symbol.file_path, locations) - return locations - - async def get_definition(self, symbol: CodeSymbolNode) -> Optional[Location]: - """Get symbol definition location. - - Args: - symbol: The code symbol to find definition for - - Returns: - Location of the definition, or None if not found - """ - cache_key = f"def:{symbol.id}" - - if self._is_cached(cache_key, symbol.file_path): - return self.cache[cache_key].data - - location: Optional[Location] = None - - if self.use_vscode_bridge: - # Legacy: VSCode Bridge HTTP mode - result = await self._request_vscode_bridge("get_definition", { - "file_path": symbol.file_path, - "line": symbol.range.start_line, - "character": symbol.range.start_character, - }) - - if result: - if isinstance(result, list) and len(result) > 0: - try: - location = Location.from_lsp_response(result[0]) - except (KeyError, TypeError): - pass - elif isinstance(result, dict): - try: - location = Location.from_lsp_response(result) - except (KeyError, TypeError): - pass - else: - # Default: Standalone mode - manager = await self._ensure_manager() - result = await manager.get_definition( - file_path=symbol.file_path, - line=symbol.range.start_line, - character=symbol.range.start_character, - ) - - if result: - try: - location = Location.from_lsp_response(result) - except (KeyError, TypeError): - pass - - self._cache(cache_key, symbol.file_path, location) - return location - - async def get_call_hierarchy(self, symbol: CodeSymbolNode) -> List[CallHierarchyItem]: - """Get incoming/outgoing calls for a symbol. - - If call hierarchy is not supported by the language server, - falls back to using references. - - Args: - symbol: The code symbol to get call hierarchy for - - Returns: - List of CallHierarchyItem representing callers/callees. - Returns empty list on error or if not supported. - """ - cache_key = f"calls:{symbol.id}" - - if self._is_cached(cache_key, symbol.file_path): - return self.cache[cache_key].data - - items: List[CallHierarchyItem] = [] - - if self.use_vscode_bridge: - # Legacy: VSCode Bridge HTTP mode - result = await self._request_vscode_bridge("get_call_hierarchy", { - "file_path": symbol.file_path, - "line": symbol.range.start_line, - "character": symbol.range.start_character, - }) - - if result is None: - # Fallback: use references - refs = await self.get_references(symbol) - for ref in refs: - items.append(CallHierarchyItem( - name=f"caller@{ref.line}", - kind="reference", - file_path=ref.file_path, - range=Range( - start_line=ref.line, - start_character=ref.character, - end_line=ref.line, - end_character=ref.character, - ), - detail="Inferred from reference", - )) - elif isinstance(result, list): - for item in result: - try: - range_data = item.get("range", {}) - start = range_data.get("start", {}) - end = range_data.get("end", {}) - - items.append(CallHierarchyItem( - name=item.get("name", "unknown"), - kind=item.get("kind", "unknown"), - file_path=item.get("file_path", item.get("uri", "")), - range=Range( - start_line=start.get("line", 0) + 1, - start_character=start.get("character", 0) + 1, - end_line=end.get("line", 0) + 1, - end_character=end.get("character", 0) + 1, - ), - detail=item.get("detail"), - )) - except (KeyError, TypeError): - continue - else: - # Default: Standalone mode - manager = await self._ensure_manager() - - # Try to get call hierarchy items - hierarchy_items = await manager.get_call_hierarchy_items( - file_path=symbol.file_path, - line=symbol.range.start_line, - character=symbol.range.start_character, - ) - - if hierarchy_items: - # Get incoming calls for each item - for h_item in hierarchy_items: - incoming = await manager.get_incoming_calls(h_item) - for call in incoming: - from_item = call.get("from", {}) - range_data = from_item.get("range", {}) - start = range_data.get("start", {}) - end = range_data.get("end", {}) - - # Parse URI - uri = from_item.get("uri", "") - if uri.startswith("file://"): - raw = unquote(uri[7:]) # keep leading slash for Unix paths - if raw.startswith("/") and len(raw) > 2 and raw[2] == ":": - raw = raw[1:] - fp = raw - else: - fp = uri - - items.append(CallHierarchyItem( - name=from_item.get("name", "unknown"), - kind=str(from_item.get("kind", "unknown")), - file_path=fp, - range=Range( - start_line=start.get("line", 0) + 1, - start_character=start.get("character", 0) + 1, - end_line=end.get("line", 0) + 1, - end_character=end.get("character", 0) + 1, - ), - detail=from_item.get("detail"), - )) - else: - # Fallback: use references - refs = await self.get_references(symbol) - for ref in refs: - items.append(CallHierarchyItem( - name=f"caller@{ref.line}", - kind="reference", - file_path=ref.file_path, - range=Range( - start_line=ref.line, - start_character=ref.character, - end_line=ref.line, - end_character=ref.character, - ), - detail="Inferred from reference", - )) - - logger.debug( - "LSP call hierarchy for %s (%s:%s:%s): %d", - symbol.id, - symbol.file_path, - symbol.range.start_line, - symbol.range.start_character, - len(items), - ) - self._cache(cache_key, symbol.file_path, items) - return items - - async def get_document_symbols(self, file_path: str) -> List[Dict[str, Any]]: - """Get all symbols in a document (batch operation). - - This is more efficient than individual hover queries when processing - multiple locations in the same file. - - Args: - file_path: Path to the source file - - Returns: - List of symbol dictionaries with name, kind, range, etc. - Returns empty list on error or timeout. - """ - cache_key = f"symbols:{file_path}" - - if self._is_cached(cache_key, file_path): - return self.cache[cache_key].data - - symbols: List[Dict[str, Any]] = [] - - if self.use_vscode_bridge: - # Legacy: VSCode Bridge HTTP mode - result = await self._request_vscode_bridge("get_document_symbols", { - "file_path": file_path, - }) - - if isinstance(result, list): - symbols = self._flatten_document_symbols(result) - else: - # Default: Standalone mode - manager = await self._ensure_manager() - result = await manager.get_document_symbols(file_path) - - if result: - symbols = self._flatten_document_symbols(result) - - self._cache(cache_key, file_path, symbols) - return symbols - - def _flatten_document_symbols( - self, symbols: List[Dict[str, Any]], parent_name: str = "" - ) -> List[Dict[str, Any]]: - """Flatten nested document symbols into a flat list. - - Document symbols can be nested (e.g., methods inside classes). - This flattens them for easier lookup by line number. - - Args: - symbols: List of symbol dictionaries (may be nested) - parent_name: Name of parent symbol for qualification - - Returns: - Flat list of all symbols with their ranges - """ - flat: List[Dict[str, Any]] = [] - - for sym in symbols: - # Add the symbol itself - symbol_entry = { - "name": sym.get("name", "unknown"), - "kind": self._symbol_kind_to_string(sym.get("kind", 0)), - "range": sym.get("range", sym.get("location", {}).get("range", {})), - "selection_range": sym.get("selectionRange", {}), - "detail": sym.get("detail", ""), - "parent": parent_name, - } - flat.append(symbol_entry) - - # Recursively process children - children = sym.get("children", []) - if children: - qualified_name = sym.get("name", "") - if parent_name: - qualified_name = f"{parent_name}.{qualified_name}" - flat.extend(self._flatten_document_symbols(children, qualified_name)) - - return flat - - def _symbol_kind_to_string(self, kind: int) -> str: - """Convert LSP SymbolKind integer to string. - - Args: - kind: LSP SymbolKind enum value - - Returns: - Human-readable string representation - """ - # LSP SymbolKind enum (1-indexed) - kinds = { - 1: "file", - 2: "module", - 3: "namespace", - 4: "package", - 5: "class", - 6: "method", - 7: "property", - 8: "field", - 9: "constructor", - 10: "enum", - 11: "interface", - 12: "function", - 13: "variable", - 14: "constant", - 15: "string", - 16: "number", - 17: "boolean", - 18: "array", - 19: "object", - 20: "key", - 21: "null", - 22: "enum_member", - 23: "struct", - 24: "event", - 25: "operator", - 26: "type_parameter", - } - return kinds.get(kind, "unknown") - - async def get_hover(self, symbol: CodeSymbolNode) -> Optional[str]: - """Get hover documentation for a symbol. - - Args: - symbol: The code symbol to get hover info for - - Returns: - Hover documentation as string, or None if not available - """ - cache_key = f"hover:{symbol.id}" - - if self._is_cached(cache_key, symbol.file_path): - return self.cache[cache_key].data - - hover_text: Optional[str] = None - - if self.use_vscode_bridge: - # Legacy: VSCode Bridge HTTP mode - result = await self._request_vscode_bridge("get_hover", { - "file_path": symbol.file_path, - "line": symbol.range.start_line, - "character": symbol.range.start_character, - }) - - if result: - hover_text = self._parse_hover_result(result) - else: - # Default: Standalone mode - manager = await self._ensure_manager() - hover_text = await manager.get_hover( - file_path=symbol.file_path, - line=symbol.range.start_line, - character=symbol.range.start_character, - ) - - self._cache(cache_key, symbol.file_path, hover_text) - return hover_text - - def _parse_hover_result(self, result: Any) -> Optional[str]: - """Parse hover result into string.""" - if isinstance(result, str): - return result - elif isinstance(result, list): - parts = [] - for item in result: - if isinstance(item, str): - parts.append(item) - elif isinstance(item, dict): - value = item.get("value", item.get("contents", "")) - if value: - parts.append(str(value)) - return "\n\n".join(parts) if parts else None - elif isinstance(result, dict): - contents = result.get("contents", result.get("value", "")) - if isinstance(contents, str): - return contents - elif isinstance(contents, list): - parts = [] - for c in contents: - if isinstance(c, str): - parts.append(c) - elif isinstance(c, dict): - parts.append(str(c.get("value", ""))) - return "\n\n".join(parts) if parts else None - return None - - async def __aenter__(self) -> "LspBridge": - """Async context manager entry.""" - return self - - async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: - """Async context manager exit - close connections.""" - await self.close() - - -# Simple test -if __name__ == "__main__": - import sys - - async def test_lsp_bridge(): - """Simple test of LspBridge functionality.""" - print("Testing LspBridge (Standalone Mode)...") - print(f"Timeout: {LspBridge.DEFAULT_TIMEOUT}s") - print(f"Cache TTL: {LspBridge.DEFAULT_CACHE_TTL}s") - print() - - # Create a test symbol pointing to this file - test_file = os.path.abspath(__file__) - test_symbol = CodeSymbolNode( - id=f"{test_file}:LspBridge:96", - name="LspBridge", - kind="class", - file_path=test_file, - range=Range( - start_line=96, - start_character=1, - end_line=200, - end_character=1, - ), - ) - - print(f"Test symbol: {test_symbol.name} in {os.path.basename(test_symbol.file_path)}") - print() - - # Use standalone mode (default) - async with LspBridge( - workspace_root=str(Path(__file__).parent.parent.parent.parent), - ) as bridge: - print("1. Testing get_document_symbols...") - try: - symbols = await bridge.get_document_symbols(test_file) - print(f" Found {len(symbols)} symbols") - for sym in symbols[:5]: - print(f" - {sym.get('name')} ({sym.get('kind')})") - except Exception as e: - print(f" Error: {e}") - - print() - print("2. Testing get_definition...") - try: - definition = await bridge.get_definition(test_symbol) - if definition: - print(f" Definition: {os.path.basename(definition.file_path)}:{definition.line}") - else: - print(" No definition found") - except Exception as e: - print(f" Error: {e}") - - print() - print("3. Testing get_references...") - try: - refs = await bridge.get_references(test_symbol) - print(f" Found {len(refs)} references") - for ref in refs[:3]: - print(f" - {os.path.basename(ref.file_path)}:{ref.line}") - except Exception as e: - print(f" Error: {e}") - - print() - print("4. Testing get_hover...") - try: - hover = await bridge.get_hover(test_symbol) - if hover: - print(f" Hover: {hover[:100]}...") - else: - print(" No hover info found") - except Exception as e: - print(f" Error: {e}") - - print() - print("5. Testing get_call_hierarchy...") - try: - calls = await bridge.get_call_hierarchy(test_symbol) - print(f" Found {len(calls)} call hierarchy items") - for call in calls[:3]: - print(f" - {call.name} in {os.path.basename(call.file_path)}") - except Exception as e: - print(f" Error: {e}") - - print() - print("6. Testing cache...") - print(f" Cache entries: {len(bridge.cache)}") - for key in list(bridge.cache.keys())[:5]: - print(f" - {key}") - - print() - print("Test complete!") - - # Run the test - # Note: On Windows, use default ProactorEventLoop (supports subprocess creation) - - asyncio.run(test_lsp_bridge()) diff --git a/codex-lens/src/codexlens/lsp/lsp_graph_builder.py b/codex-lens/src/codexlens/lsp/lsp_graph_builder.py deleted file mode 100644 index 446fa2c7..00000000 --- a/codex-lens/src/codexlens/lsp/lsp_graph_builder.py +++ /dev/null @@ -1,383 +0,0 @@ -"""Graph builder for code association graphs via LSP.""" - -from __future__ import annotations - -import asyncio -import logging -from typing import Any, Dict, List, Optional, Set, Tuple - -from codexlens.hybrid_search.data_structures import ( - CallHierarchyItem, - CodeAssociationGraph, - CodeSymbolNode, - Range, -) -from codexlens.lsp.lsp_bridge import ( - Location, - LspBridge, -) - -logger = logging.getLogger(__name__) - - -class LspGraphBuilder: - """Builds code association graph by expanding from seed symbols using LSP.""" - - def __init__( - self, - max_depth: int = 2, - max_nodes: int = 100, - max_concurrent: int = 10, - resolve_symbols: bool = True, - ): - """Initialize GraphBuilder. - - Args: - max_depth: Maximum depth for BFS expansion from seeds. - max_nodes: Maximum number of nodes in the graph. - max_concurrent: Maximum concurrent LSP requests. - resolve_symbols: If False, skip documentSymbol lookups and create lightweight nodes. - """ - self.max_depth = max_depth - self.max_nodes = max_nodes - self.max_concurrent = max_concurrent - self.resolve_symbols = resolve_symbols - # Cache for document symbols per file (avoids per-location hover queries) - self._document_symbols_cache: Dict[str, List[Dict[str, Any]]] = {} - - async def build_from_seeds( - self, - seeds: List[CodeSymbolNode], - lsp_bridge: LspBridge, - ) -> CodeAssociationGraph: - """Build association graph by BFS expansion from seeds. - - For each seed: - 1. Get references via LSP - 2. Get call hierarchy via LSP - 3. Add nodes and edges to graph - 4. Continue expanding until max_depth or max_nodes reached - - Args: - seeds: Initial seed symbols to expand from. - lsp_bridge: LSP bridge for querying language servers. - - Returns: - CodeAssociationGraph with expanded nodes and relationships. - """ - graph = CodeAssociationGraph() - visited: Set[str] = set() - semaphore = asyncio.Semaphore(self.max_concurrent) - - # Initialize queue with seeds at depth 0 - queue: List[Tuple[CodeSymbolNode, int]] = [(s, 0) for s in seeds] - - # Add seed nodes to graph - for seed in seeds: - graph.add_node(seed) - - # BFS expansion - while queue and len(graph.nodes) < self.max_nodes: - # Take a batch of nodes from queue - batch_size = min(self.max_concurrent, len(queue)) - batch = queue[:batch_size] - queue = queue[batch_size:] - - # Expand nodes in parallel - tasks = [ - self._expand_node( - node, depth, graph, lsp_bridge, visited, semaphore - ) - for node, depth in batch - ] - - results = await asyncio.gather(*tasks, return_exceptions=True) - - # Process results and add new nodes to queue - for result in results: - if isinstance(result, Exception): - logger.warning("Error expanding node: %s", result) - continue - if result: - # Add new nodes to queue if not at max depth - for new_node, new_depth in result: - if ( - new_depth <= self.max_depth - and len(graph.nodes) < self.max_nodes - ): - queue.append((new_node, new_depth)) - - return graph - - async def _expand_node( - self, - node: CodeSymbolNode, - depth: int, - graph: CodeAssociationGraph, - lsp_bridge: LspBridge, - visited: Set[str], - semaphore: asyncio.Semaphore, - ) -> List[Tuple[CodeSymbolNode, int]]: - """Expand a single node, return new nodes to process. - - Args: - node: Node to expand. - depth: Current depth in BFS. - graph: Graph to add nodes and edges to. - lsp_bridge: LSP bridge for queries. - visited: Set of visited node IDs. - semaphore: Semaphore for concurrency control. - - Returns: - List of (new_node, new_depth) tuples to add to queue. - """ - # Skip if already visited or at max depth - if node.id in visited: - return [] - # Depth is 0 for seeds. To limit expansion to N hops from seeds, - # we expand nodes with depth < max_depth. - if depth >= self.max_depth: - visited.add(node.id) - return [] - if len(graph.nodes) >= self.max_nodes: - return [] - - visited.add(node.id) - new_nodes: List[Tuple[CodeSymbolNode, int]] = [] - - async with semaphore: - # Get relationships in parallel - try: - refs_task = lsp_bridge.get_references(node) - calls_task = lsp_bridge.get_call_hierarchy(node) - - refs, calls = await asyncio.gather( - refs_task, calls_task, return_exceptions=True - ) - - # Handle reference results - if isinstance(refs, Exception): - logger.debug( - "Failed to get references for %s: %s", node.id, refs - ) - refs = [] - - # Handle call hierarchy results - if isinstance(calls, Exception): - logger.debug( - "Failed to get call hierarchy for %s: %s", - node.id, - calls, - ) - calls = [] - - # Process references - for ref in refs: - if len(graph.nodes) >= self.max_nodes: - break - - ref_node = await self._location_to_node(ref, lsp_bridge) - if ref_node and ref_node.id != node.id: - if ref_node.id not in graph.nodes: - graph.add_node(ref_node) - new_nodes.append((ref_node, depth + 1)) - # Use add_edge since both nodes should exist now - graph.add_edge(node.id, ref_node.id, "references") - - # Process call hierarchy (incoming calls) - for call in calls: - if len(graph.nodes) >= self.max_nodes: - break - - call_node = await self._call_hierarchy_to_node( - call, lsp_bridge - ) - if call_node and call_node.id != node.id: - if call_node.id not in graph.nodes: - graph.add_node(call_node) - new_nodes.append((call_node, depth + 1)) - # Incoming call: call_node calls node - graph.add_edge(call_node.id, node.id, "calls") - - except Exception as e: - logger.warning( - "Error during node expansion for %s: %s", node.id, e - ) - - return new_nodes - - def clear_cache(self) -> None: - """Clear the document symbols cache. - - Call this between searches to free memory and ensure fresh data. - """ - self._document_symbols_cache.clear() - - async def _get_symbol_at_location( - self, - file_path: str, - line: int, - lsp_bridge: LspBridge, - ) -> Optional[Dict[str, Any]]: - """Find symbol at location using cached document symbols. - - This is much more efficient than individual hover queries because - document symbols are fetched once per file and cached. - - Args: - file_path: Path to the source file. - line: Line number (1-based). - lsp_bridge: LSP bridge for fetching document symbols. - - Returns: - Symbol dictionary with name, kind, range, etc., or None if not found. - """ - # Get or fetch document symbols for this file - if file_path not in self._document_symbols_cache: - symbols = await lsp_bridge.get_document_symbols(file_path) - self._document_symbols_cache[file_path] = symbols - - symbols = self._document_symbols_cache[file_path] - - # Find symbol containing this line (best match = smallest range) - best_match: Optional[Dict[str, Any]] = None - best_range_size = float("inf") - - for symbol in symbols: - sym_range = symbol.get("range", {}) - start = sym_range.get("start", {}) - end = sym_range.get("end", {}) - - # LSP ranges are 0-based, our line is 1-based - start_line = start.get("line", 0) + 1 - end_line = end.get("line", 0) + 1 - - if start_line <= line <= end_line: - range_size = end_line - start_line - if range_size < best_range_size: - best_match = symbol - best_range_size = range_size - - return best_match - - async def _location_to_node( - self, - location: Location, - lsp_bridge: LspBridge, - ) -> Optional[CodeSymbolNode]: - """Convert LSP location to CodeSymbolNode. - - Uses cached document symbols instead of individual hover queries - for better performance. - - Args: - location: LSP location to convert. - lsp_bridge: LSP bridge for additional queries. - - Returns: - CodeSymbolNode or None if conversion fails. - """ - try: - file_path = location.file_path - start_line = location.line - - # Try to find symbol info from cached document symbols (fast) - symbol_info = None - if self.resolve_symbols: - symbol_info = await self._get_symbol_at_location( - file_path, start_line, lsp_bridge - ) - - if symbol_info: - name = symbol_info.get("name", f"symbol_L{start_line}") - kind = symbol_info.get("kind", "unknown") - - # Extract range from symbol if available - sym_range = symbol_info.get("range", {}) - start = sym_range.get("start", {}) - end = sym_range.get("end", {}) - - location_range = Range( - start_line=start.get("line", start_line - 1) + 1, - start_character=start.get("character", location.character - 1) + 1, - end_line=end.get("line", start_line - 1) + 1, - end_character=end.get("character", location.character - 1) + 1, - ) - else: - # Fallback to basic node without symbol info - name = f"symbol_L{start_line}" - kind = "unknown" - location_range = Range( - start_line=location.line, - start_character=location.character, - end_line=location.line, - end_character=location.character, - ) - - node_id = self._create_node_id(file_path, name, start_line) - - return CodeSymbolNode( - id=node_id, - name=name, - kind=kind, - file_path=file_path, - range=location_range, - docstring="", # Skip hover for performance - ) - - except Exception as e: - logger.debug("Failed to convert location to node: %s", e) - return None - - async def _call_hierarchy_to_node( - self, - call_item: CallHierarchyItem, - lsp_bridge: LspBridge, - ) -> Optional[CodeSymbolNode]: - """Convert CallHierarchyItem to CodeSymbolNode. - - Args: - call_item: Call hierarchy item to convert. - lsp_bridge: LSP bridge (unused, kept for API consistency). - - Returns: - CodeSymbolNode or None if conversion fails. - """ - try: - file_path = call_item.file_path - name = call_item.name - start_line = call_item.range.start_line - # CallHierarchyItem.kind is already a string - kind = call_item.kind - - node_id = self._create_node_id(file_path, name, start_line) - - return CodeSymbolNode( - id=node_id, - name=name, - kind=kind, - file_path=file_path, - range=call_item.range, - docstring=call_item.detail or "", - ) - - except Exception as e: - logger.debug( - "Failed to convert call hierarchy item to node: %s", e - ) - return None - - def _create_node_id( - self, file_path: str, name: str, line: int - ) -> str: - """Create unique node ID. - - Args: - file_path: Path to the file. - name: Symbol name. - line: Line number (0-based). - - Returns: - Unique node ID string. - """ - return f"{file_path}:{name}:{line}" diff --git a/codex-lens/src/codexlens/lsp/providers.py b/codex-lens/src/codexlens/lsp/providers.py deleted file mode 100644 index d0275437..00000000 --- a/codex-lens/src/codexlens/lsp/providers.py +++ /dev/null @@ -1,177 +0,0 @@ -"""LSP feature providers.""" - -from __future__ import annotations - -import logging -from dataclasses import dataclass -from pathlib import Path -from typing import Optional, TYPE_CHECKING - -if TYPE_CHECKING: - from codexlens.storage.global_index import GlobalSymbolIndex - from codexlens.storage.registry import RegistryStore - -logger = logging.getLogger(__name__) - - -@dataclass -class HoverInfo: - """Hover information for a symbol.""" - - name: str - kind: str - signature: str - documentation: Optional[str] - file_path: str - line_range: tuple # (start_line, end_line) - - -class HoverProvider: - """Provides hover information for symbols.""" - - def __init__( - self, - global_index: "GlobalSymbolIndex", - registry: Optional["RegistryStore"] = None, - ) -> None: - """Initialize hover provider. - - Args: - global_index: Global symbol index for lookups - registry: Optional registry store for index path resolution - """ - self.global_index = global_index - self.registry = registry - - def get_hover_info(self, symbol_name: str) -> Optional[HoverInfo]: - """Get hover information for a symbol. - - Args: - symbol_name: Name of the symbol to look up - - Returns: - HoverInfo or None if symbol not found - """ - # Look up symbol in global index using exact match - symbols = self.global_index.search( - name=symbol_name, - limit=1, - prefix_mode=False, - ) - - # Filter for exact name match - exact_matches = [s for s in symbols if s.name == symbol_name] - - if not exact_matches: - return None - - symbol = exact_matches[0] - - # Extract signature from source file - signature = self._extract_signature(symbol) - - # Symbol uses 'file' attribute and 'range' tuple - file_path = symbol.file or "" - start_line, end_line = symbol.range - - return HoverInfo( - name=symbol.name, - kind=symbol.kind, - signature=signature, - documentation=None, # Symbol doesn't have docstring field - file_path=file_path, - line_range=(start_line, end_line), - ) - - def _extract_signature(self, symbol) -> str: - """Extract function/class signature from source file. - - Args: - symbol: Symbol object with file and range information - - Returns: - Extracted signature string or fallback kind + name - """ - try: - file_path = Path(symbol.file) if symbol.file else None - if not file_path or not file_path.exists(): - return f"{symbol.kind} {symbol.name}" - - content = file_path.read_text(encoding="utf-8", errors="ignore") - lines = content.split("\n") - - # Extract signature lines (first line of definition + continuation) - start_line = symbol.range[0] - 1 # Convert 1-based to 0-based - if start_line >= len(lines) or start_line < 0: - return f"{symbol.kind} {symbol.name}" - - signature_lines = [] - first_line = lines[start_line] - signature_lines.append(first_line) - - # Continue if multiline signature (no closing paren + colon yet) - # Look for patterns like "def func(", "class Foo(", etc. - i = start_line + 1 - max_lines = min(start_line + 5, len(lines)) - while i < max_lines: - line = signature_lines[-1] - # Stop if we see closing pattern - if "):" in line or line.rstrip().endswith(":"): - break - signature_lines.append(lines[i]) - i += 1 - - return "\n".join(signature_lines) - - except Exception as e: - logger.debug(f"Failed to extract signature for {symbol.name}: {e}") - return f"{symbol.kind} {symbol.name}" - - def format_hover_markdown(self, info: HoverInfo) -> str: - """Format hover info as Markdown. - - Args: - info: HoverInfo object to format - - Returns: - Markdown-formatted hover content - """ - parts = [] - - # Detect language for code fence based on file extension - ext = Path(info.file_path).suffix.lower() if info.file_path else "" - lang_map = { - ".py": "python", - ".js": "javascript", - ".ts": "typescript", - ".tsx": "typescript", - ".jsx": "javascript", - ".java": "java", - ".go": "go", - ".rs": "rust", - ".c": "c", - ".cpp": "cpp", - ".h": "c", - ".hpp": "cpp", - ".cs": "csharp", - ".rb": "ruby", - ".php": "php", - } - lang = lang_map.get(ext, "") - - # Code block with signature - parts.append(f"```{lang}\n{info.signature}\n```") - - # Documentation if available - if info.documentation: - parts.append(f"\n---\n\n{info.documentation}") - - # Location info - file_name = Path(info.file_path).name if info.file_path else "unknown" - parts.append( - f"\n---\n\n*{info.kind}* defined in " - f"`{file_name}` " - f"(line {info.line_range[0]})" - ) - - return "\n".join(parts) diff --git a/codex-lens/src/codexlens/lsp/server.py b/codex-lens/src/codexlens/lsp/server.py deleted file mode 100644 index 809bba9e..00000000 --- a/codex-lens/src/codexlens/lsp/server.py +++ /dev/null @@ -1,263 +0,0 @@ -"""codex-lens LSP Server implementation using pygls. - -This module provides the main Language Server class and entry point. -""" - -from __future__ import annotations - -import argparse -import logging -import sys -from pathlib import Path -from typing import Optional - -try: - from lsprotocol import types as lsp - from pygls.lsp.server import LanguageServer -except ImportError as exc: - raise ImportError( - "LSP dependencies not installed. Install with: pip install codex-lens[lsp]" - ) from exc - -from codexlens.config import Config -from codexlens.search.chain_search import ChainSearchEngine -from codexlens.storage.global_index import GlobalSymbolIndex -from codexlens.storage.path_mapper import PathMapper -from codexlens.storage.registry import RegistryStore - -logger = logging.getLogger(__name__) - - -class CodexLensLanguageServer(LanguageServer): - """Language Server for codex-lens code indexing. - - Provides IDE features using codex-lens symbol index: - - Go to Definition - - Find References - - Code Completion - - Hover Information - - Workspace Symbol Search - - Attributes: - registry: Global project registry for path lookups - mapper: Path mapper for source/index conversions - global_index: Project-wide symbol index - search_engine: Chain search engine for symbol search - workspace_root: Current workspace root path - """ - - def __init__(self) -> None: - super().__init__(name="codexlens-lsp", version="0.1.0") - - self.registry: Optional[RegistryStore] = None - self.mapper: Optional[PathMapper] = None - self.global_index: Optional[GlobalSymbolIndex] = None - self.search_engine: Optional[ChainSearchEngine] = None - self.workspace_root: Optional[Path] = None - self._config: Optional[Config] = None - - def initialize_components(self, workspace_root: Path) -> bool: - """Initialize codex-lens components for the workspace. - - Args: - workspace_root: Root path of the workspace - - Returns: - True if initialization succeeded, False otherwise - """ - self.workspace_root = workspace_root.resolve() - logger.info("Initializing codex-lens for workspace: %s", self.workspace_root) - - try: - # Initialize registry - self.registry = RegistryStore() - self.registry.initialize() - - # Initialize path mapper - self.mapper = PathMapper() - - # Try to find project in registry - project_info = self.registry.find_by_source_path(str(self.workspace_root)) - - if project_info: - project_id = int(project_info["id"]) - index_root = Path(project_info["index_root"]) - - # Initialize global symbol index - global_db = index_root / GlobalSymbolIndex.DEFAULT_DB_NAME - self.global_index = GlobalSymbolIndex(global_db, project_id) - self.global_index.initialize() - - # Initialize search engine - self._config = Config() - self.search_engine = ChainSearchEngine( - registry=self.registry, - mapper=self.mapper, - config=self._config, - ) - - logger.info("codex-lens initialized for project: %s", project_info["source_root"]) - return True - else: - logger.warning( - "Workspace not indexed by codex-lens: %s. " - "Run 'codexlens index %s' to index first.", - self.workspace_root, - self.workspace_root, - ) - return False - - except Exception as exc: - logger.error("Failed to initialize codex-lens: %s", exc) - return False - - def shutdown_components(self) -> None: - """Clean up codex-lens components.""" - if self.global_index: - try: - self.global_index.close() - except Exception as exc: - logger.debug("Error closing global index: %s", exc) - self.global_index = None - - if self.search_engine: - try: - self.search_engine.close() - except Exception as exc: - logger.debug("Error closing search engine: %s", exc) - self.search_engine = None - - if self.registry: - try: - self.registry.close() - except Exception as exc: - logger.debug("Error closing registry: %s", exc) - self.registry = None - - -# Create server instance -server = CodexLensLanguageServer() - - -@server.feature(lsp.INITIALIZE) -def lsp_initialize(params: lsp.InitializeParams) -> lsp.InitializeResult: - """Handle LSP initialize request.""" - logger.info("LSP initialize request received") - - # Get workspace root - workspace_root: Optional[Path] = None - if params.root_uri: - workspace_root = Path(params.root_uri.replace("file://", "").replace("file:", "")) - elif params.root_path: - workspace_root = Path(params.root_path) - - if workspace_root: - server.initialize_components(workspace_root) - - # Declare server capabilities - return lsp.InitializeResult( - capabilities=lsp.ServerCapabilities( - text_document_sync=lsp.TextDocumentSyncOptions( - open_close=True, - change=lsp.TextDocumentSyncKind.Incremental, - save=lsp.SaveOptions(include_text=False), - ), - definition_provider=True, - references_provider=True, - completion_provider=lsp.CompletionOptions( - trigger_characters=[".", ":"], - resolve_provider=False, - ), - hover_provider=True, - workspace_symbol_provider=True, - ), - server_info=lsp.ServerInfo( - name="codexlens-lsp", - version="0.1.0", - ), - ) - - -@server.feature(lsp.SHUTDOWN) -def lsp_shutdown(params: None) -> None: - """Handle LSP shutdown request.""" - logger.info("LSP shutdown request received") - server.shutdown_components() - - -def main() -> int: - """Entry point for codexlens-lsp command. - - Returns: - Exit code (0 for success) - """ - # Import handlers to register them with the server - # This must be done before starting the server - import codexlens.lsp.handlers # noqa: F401 - - parser = argparse.ArgumentParser( - description="codex-lens Language Server", - prog="codexlens-lsp", - ) - parser.add_argument( - "--stdio", - action="store_true", - default=True, - help="Use stdio for communication (default)", - ) - parser.add_argument( - "--tcp", - action="store_true", - help="Use TCP for communication", - ) - parser.add_argument( - "--host", - default="127.0.0.1", - help="TCP host (default: 127.0.0.1)", - ) - parser.add_argument( - "--port", - type=int, - default=2087, - help="TCP port (default: 2087)", - ) - parser.add_argument( - "--log-level", - choices=["DEBUG", "INFO", "WARNING", "ERROR"], - default="INFO", - help="Log level (default: INFO)", - ) - parser.add_argument( - "--log-file", - help="Log file path (optional)", - ) - - args = parser.parse_args() - - # Configure logging - log_handlers = [] - if args.log_file: - log_handlers.append(logging.FileHandler(args.log_file)) - else: - log_handlers.append(logging.StreamHandler(sys.stderr)) - - logging.basicConfig( - level=getattr(logging, args.log_level), - format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", - handlers=log_handlers, - ) - - logger.info("Starting codexlens-lsp server") - - if args.tcp: - logger.info("Starting TCP server on %s:%d", args.host, args.port) - server.start_tcp(args.host, args.port) - else: - logger.info("Starting stdio server") - server.start_io() - - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/codex-lens/src/codexlens/lsp/standalone_manager.py b/codex-lens/src/codexlens/lsp/standalone_manager.py deleted file mode 100644 index d2a57de5..00000000 --- a/codex-lens/src/codexlens/lsp/standalone_manager.py +++ /dev/null @@ -1,1307 +0,0 @@ -"""Standalone Language Server Manager for direct LSP communication. - -This module provides direct communication with language servers via JSON-RPC over stdio, -eliminating the need for VSCode Bridge. Similar to cclsp architecture. - -Features: -- Direct subprocess spawning of language servers -- JSON-RPC 2.0 communication over stdin/stdout -- Multi-language support via configuration file (lsp-servers.json) -- Process lifecycle management with auto-restart -- Compatible interface with existing LspBridge -""" - -from __future__ import annotations - -import asyncio -import importlib.resources as resources -import json -import logging -import os -import sys -from dataclasses import dataclass, field -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple -from urllib.parse import unquote, urlparse - -logger = logging.getLogger(__name__) - - -@dataclass -class ServerConfig: - """Configuration for a language server.""" - - language_id: str - display_name: str - extensions: List[str] - command: List[str] - enabled: bool = True - initialization_options: Dict[str, Any] = field(default_factory=dict) - settings: Dict[str, Any] = field(default_factory=dict) - root_dir: str = "." - timeout: int = 30000 # ms - restart_interval: int = 5000 # ms - max_restarts: int = 3 - - -@dataclass -class ServerState: - """State of a running language server.""" - - config: ServerConfig - process: asyncio.subprocess.Process - reader: asyncio.StreamReader - writer: asyncio.StreamWriter - request_id: int = 0 - initialized: bool = False - capabilities: Dict[str, Any] = field(default_factory=dict) - pending_requests: Dict[int, asyncio.Future] = field(default_factory=dict) - restart_count: int = 0 - # Queue for producer-consumer pattern - continuous reading puts messages here - message_queue: asyncio.Queue = field(default_factory=asyncio.Queue) - # Track opened documents to avoid redundant didOpen spam (and unnecessary delays). - # Key: document URI -> (version, file_mtime) - opened_documents: Dict[str, Tuple[int, float]] = field(default_factory=dict) - opened_documents_lock: asyncio.Lock = field(default_factory=asyncio.Lock) - - -class StandaloneLspManager: - """Manager for direct language server communication. - - Spawns language servers as subprocesses and communicates via JSON-RPC - over stdin/stdout. No VSCode or GUI dependency required. - - Example: - manager = StandaloneLspManager(workspace_root="/path/to/project") - await manager.start() - - definition = await manager.get_definition( - file_path="src/main.py", - line=10, - character=5 - ) - - await manager.stop() - """ - - DEFAULT_CONFIG_FILE = "lsp-servers.json" - - def __init__( - self, - workspace_root: Optional[str] = None, - config_file: Optional[str] = None, - timeout: float = 30.0, - ): - """Initialize StandaloneLspManager. - - Args: - workspace_root: Root directory of the workspace (used for rootUri) - config_file: Path to lsp-servers.json configuration file - timeout: Default timeout for LSP requests in seconds - """ - self.workspace_root = Path(workspace_root or os.getcwd()).resolve() - self.config_file = config_file - self.timeout = timeout - - self._servers: Dict[str, ServerState] = {} # language_id -> ServerState - self._extension_map: Dict[str, str] = {} # extension -> language_id - self._configs: Dict[str, ServerConfig] = {} # language_id -> ServerConfig - self._read_tasks: Dict[str, asyncio.Task] = {} # language_id -> read task - self._stderr_tasks: Dict[str, asyncio.Task] = {} # language_id -> stderr read task - self._processor_tasks: Dict[str, asyncio.Task] = {} # language_id -> message processor task - self._lock = asyncio.Lock() - - def _find_config_file(self) -> Optional[Path]: - """Find the lsp-servers.json configuration file. - - Search order: - 1. Explicit config_file parameter - 2. {workspace_root}/lsp-servers.json - 3. {workspace_root}/.codexlens/lsp-servers.json - """ - search_paths = [] - - if self.config_file: - search_paths.append(Path(self.config_file)) - - search_paths.extend([ - self.workspace_root / self.DEFAULT_CONFIG_FILE, - self.workspace_root / ".codexlens" / self.DEFAULT_CONFIG_FILE, - ]) - - for path in search_paths: - if path.exists(): - return path - - return None - - def _load_builtin_config(self) -> Optional[dict[str, Any]]: - """Load the built-in default lsp-servers.json shipped with the package.""" - try: - text = ( - resources.files("codexlens.lsp") - .joinpath(self.DEFAULT_CONFIG_FILE) - .read_text(encoding="utf-8") - ) - except Exception as exc: - logger.error( - "Failed to load built-in %s template from package: %s", - self.DEFAULT_CONFIG_FILE, - exc, - ) - return None - - try: - return json.loads(text) - except Exception as exc: - logger.error( - "Built-in %s template shipped with the package is invalid JSON: %s", - self.DEFAULT_CONFIG_FILE, - exc, - ) - return None - - def _load_config(self) -> None: - """Load language server configuration from JSON file.""" - self._configs.clear() - self._extension_map.clear() - - config_path = self._find_config_file() - - if not config_path: - data = self._load_builtin_config() - if data is None: - logger.warning( - "No %s found and built-in defaults could not be loaded; using empty config", - self.DEFAULT_CONFIG_FILE, - ) - return - - root_config_path = self.workspace_root / self.DEFAULT_CONFIG_FILE - codexlens_config_path = ( - self.workspace_root / ".codexlens" / self.DEFAULT_CONFIG_FILE - ) - - logger.info( - "No %s found at %s or %s; using built-in defaults shipped with codex-lens. " - "To customize, copy the template to one of those locations and restart. " - "Language servers are spawned on-demand when first needed. " - "Ensure the language server commands in the config are installed and on PATH.", - self.DEFAULT_CONFIG_FILE, - root_config_path, - codexlens_config_path, - ) - config_source = "built-in defaults" - else: - try: - with open(config_path, "r", encoding="utf-8") as f: - data = json.load(f) - except Exception as e: - logger.error(f"Failed to load config from {config_path}: {e}") - return - - config_source = str(config_path) - - # Parse defaults - defaults = data.get("defaults", {}) - default_timeout = defaults.get("timeout", 30000) - default_restart_interval = defaults.get("restartInterval", 5000) - default_max_restarts = defaults.get("maxRestarts", 3) - - # Parse servers - for server_data in data.get("servers", []): - if not server_data.get("enabled", True): - continue - - language_id = server_data.get("languageId", "") - if not language_id: - continue - - config = ServerConfig( - language_id=language_id, - display_name=server_data.get("displayName", language_id), - extensions=server_data.get("extensions", []), - command=server_data.get("command", []), - enabled=server_data.get("enabled", True), - initialization_options=server_data.get("initializationOptions", {}), - settings=server_data.get("settings", {}), - root_dir=server_data.get("rootDir", defaults.get("rootDir", ".")), - timeout=server_data.get("timeout", default_timeout), - restart_interval=server_data.get("restartInterval", default_restart_interval), - max_restarts=server_data.get("maxRestarts", default_max_restarts), - ) - - self._configs[language_id] = config - - # Build extension map - for ext in config.extensions: - self._extension_map[ext.lower()] = language_id - - logger.info( - "Loaded %d language server configs from %s", - len(self._configs), - config_source, - ) - - def get_language_id(self, file_path: str) -> Optional[str]: - """Get language ID for a file based on extension. - - Args: - file_path: Path to the file - - Returns: - Language ID (e.g., "python", "typescript") or None if unknown - """ - ext = Path(file_path).suffix.lstrip(".").lower() - return self._extension_map.get(ext) - - async def start(self) -> None: - """Initialize the manager and load configuration. - - This does NOT start any language servers yet - they are started - on-demand when first needed for a file type. - """ - self._load_config() - logger.info(f"StandaloneLspManager started for workspace: {self.workspace_root}") - - async def stop(self) -> None: - """Stop all running language servers and cleanup.""" - async with self._lock: - for language_id in list(self._servers.keys()): - await self._stop_server(language_id) - - logger.info("StandaloneLspManager stopped") - - async def _start_server(self, language_id: str) -> Optional[ServerState]: - """Start a language server for the given language. - - Args: - language_id: The language ID (e.g., "python") - - Returns: - ServerState if successful, None on failure - """ - config = self._configs.get(language_id) - if not config: - logger.error(f"No configuration for language: {language_id}") - return None - - if not config.command: - logger.error(f"No command specified for {language_id}") - return None - - try: - logger.info(f"Starting {config.display_name}: {' '.join(config.command)}") - - # Spawn the language server process - process = await asyncio.create_subprocess_exec( - *config.command, - stdin=asyncio.subprocess.PIPE, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - cwd=str(self.workspace_root), - ) - - if process.stdin is None or process.stdout is None: - logger.error(f"Failed to get stdin/stdout for {language_id}") - process.terminate() - return None - - state = ServerState( - config=config, - process=process, - reader=process.stdout, - writer=process.stdin, - ) - - self._servers[language_id] = state - - # Start reading stderr in background (prevents pipe buffer from filling up) - if process.stderr: - self._stderr_tasks[language_id] = asyncio.create_task( - self._read_stderr(language_id, process.stderr) - ) - - # CRITICAL: Start the continuous reader task IMMEDIATELY before any communication - # This ensures no messages are lost during initialization handshake - self._read_tasks[language_id] = asyncio.create_task( - self._continuous_reader(language_id) - ) - - # Start the message processor task to handle queued messages - self._processor_tasks[language_id] = asyncio.create_task(self._process_messages(language_id)) - - # Initialize the server - now uses queue for reading responses - await self._initialize_server(state) - - logger.info(f"{config.display_name} started and initialized") - return state - - except FileNotFoundError: - logger.error( - f"Language server not found: {config.command[0]}. " - f"Install it with the appropriate package manager." - ) - return None - except Exception as e: - logger.error(f"Failed to start {language_id}: {e}") - return None - - async def _stop_server(self, language_id: str) -> None: - """Stop a language server.""" - state = self._servers.pop(language_id, None) - if not state: - return - - # Cancel read task - task = self._read_tasks.pop(language_id, None) - if task: - task.cancel() - try: - await task - except asyncio.CancelledError: - pass - - # Cancel stderr task - stderr_task = self._stderr_tasks.pop(language_id, None) - if stderr_task: - stderr_task.cancel() - try: - await stderr_task - except asyncio.CancelledError: - pass - - # Cancel message processor task - processor_task = self._processor_tasks.pop(language_id, None) - if processor_task: - processor_task.cancel() - try: - await processor_task - except asyncio.CancelledError: - pass - - # Send shutdown request - try: - await self._send_request(state, "shutdown", None, timeout=5.0) - except Exception: - pass - - # Send exit notification - try: - await self._send_notification(state, "exit", None) - except Exception: - pass - - # Terminate process - if state.process.returncode is None: - state.process.terminate() - try: - await asyncio.wait_for(state.process.wait(), timeout=5.0) - except asyncio.TimeoutError: - state.process.kill() - - logger.info(f"Stopped {state.config.display_name}") - - async def _get_server(self, file_path: str) -> Optional[ServerState]: - """Get or start the appropriate language server for a file. - - Args: - file_path: Path to the file being operated on - - Returns: - ServerState for the appropriate language server, or None - """ - file_path = self._normalize_file_path(file_path) - language_id = self.get_language_id(file_path) - if not language_id: - logger.debug(f"No language server configured for: {file_path}") - return None - - async with self._lock: - if language_id in self._servers: - state = self._servers[language_id] - # Check if process is still running - if state.process.returncode is None: - return state - # Process died, remove it - del self._servers[language_id] - - # Start new server - return await self._start_server(language_id) - - def _normalize_file_path(self, file_path_or_uri: str) -> str: - """Normalize a file path that may be an LSP file URI or URI-path. - - LSP responses often contain `file://` URIs with percent-encoding - (e.g. `file:///d%3A/...`). Some code paths may forward the parsed - URI path (`/d%3A/...`) without the scheme. On Windows, `Path(...)` - would interpret that as a root path on the current drive, producing - invalid paths like `D:\\d%3A\\...`. - """ - if not file_path_or_uri: - return file_path_or_uri - - raw = str(file_path_or_uri).strip() - - if raw.startswith("file:"): - try: - parsed = urlparse(raw) - if parsed.scheme == "file": - raw = unquote(parsed.path) - else: - raw = raw.replace("file:///", "").replace("file://", "") - except Exception: - raw = raw.replace("file:///", "").replace("file://", "") - - # Decode percent-encoded segments (e.g. d%3A -> d:) - if "%3a" in raw.lower(): - try: - raw = unquote(raw) - except Exception: - pass - - # Windows: file URI paths frequently look like "/C:/path"; strip the extra slash. - if raw.startswith("/") and len(raw) > 2 and raw[2] == ":": - raw = raw[1:] - - return raw - - async def _initialize_server(self, state: ServerState) -> None: - """Send initialize request and wait for response via the message queue. - - The continuous reader and message processor are already running, so we just - send the request and wait for the response via pending_requests. - """ - root_uri = self.workspace_root.as_uri() - - # Simplified params matching direct test that works - params = { - "processId": None, # Use None like direct test - "rootUri": root_uri, - "rootPath": str(self.workspace_root), - "capabilities": { - "textDocument": { - "documentSymbol": { - "hierarchicalDocumentSymbolSupport": True, - }, - }, - "workspace": { - "configuration": True, - }, - }, - "workspaceFolders": [ - { - "uri": root_uri, - "name": self.workspace_root.name, - } - ], - } - - # Send initialize request and wait for response via queue - state.request_id += 1 - init_request_id = state.request_id - - # Create future for the response - future: asyncio.Future = asyncio.get_event_loop().create_future() - state.pending_requests[init_request_id] = future - - # Send the request - init_message = { - "jsonrpc": "2.0", - "id": init_request_id, - "method": "initialize", - "params": params, - } - encoded = self._encode_message(init_message) - logger.debug(f"Sending initialize request id={init_request_id}") - state.writer.write(encoded) - await state.writer.drain() - - # Wait for response (will be routed by _process_messages) - try: - init_result = await asyncio.wait_for(future, timeout=30.0) - except asyncio.TimeoutError: - state.pending_requests.pop(init_request_id, None) - raise RuntimeError("Initialize request timed out") - - if init_result is None: - init_result = {} - - # Store capabilities - state.capabilities = init_result.get("capabilities", {}) - state.initialized = True - logger.debug(f"Initialize response received, capabilities: {len(state.capabilities)} keys") - - # Send initialized notification - await self._send_notification(state, "initialized", {}) - - # Give time for server to process initialized and send any requests - # The message processor will handle workspace/configuration automatically - await asyncio.sleep(0.5) - - def _encode_message(self, content: Dict[str, Any]) -> bytes: - """Encode a JSON-RPC message with LSP headers.""" - body = json.dumps(content).encode("utf-8") - header = f"Content-Length: {len(body)}\r\n\r\n" - return header.encode("ascii") + body - - async def _read_message(self, reader: asyncio.StreamReader) -> Tuple[Optional[Dict[str, Any]], bool]: - """Read a JSON-RPC message from the stream. - - Returns: - Tuple of (message, stream_closed). If stream_closed is True, the reader loop - should exit. If False and message is None, it was just a timeout. - """ - try: - # Read headers - content_length = 0 - while True: - try: - line = await asyncio.wait_for(reader.readline(), timeout=1.0) - except asyncio.TimeoutError: - # Timeout is not an error - just no message available yet - return None, False - - if not line: - # Empty read means stream closed - return None, True - - line_str = line.decode("ascii").strip() - if line_str: # Only log non-empty lines - logger.debug(f"Read header line: {repr(line_str[:80])}") - if not line_str: - break # Empty line = end of headers - - if line_str.lower().startswith("content-length:"): - content_length = int(line_str.split(":")[1].strip()) - - if content_length == 0: - return None, False - - # Read body - body = await reader.readexactly(content_length) - return json.loads(body.decode("utf-8")), False - - except asyncio.IncompleteReadError: - return None, True - except Exception as e: - logger.error(f"Error reading message: {e}") - return None, True - - async def _continuous_reader(self, language_id: str) -> None: - """Continuously read messages from language server and put them in the queue. - - This is the PRODUCER in the producer-consumer pattern. It starts IMMEDIATELY - after subprocess creation and runs continuously until shutdown. This ensures - no messages are ever lost, even during initialization handshake. - """ - state = self._servers.get(language_id) - if not state: - return - - logger.debug(f"Continuous reader started for {language_id}") - - try: - while True: - try: - # Read headers with timeout - content_length = 0 - while True: - try: - line = await asyncio.wait_for(state.reader.readline(), timeout=5.0) - except asyncio.TimeoutError: - continue # Keep waiting for data - - if not line: - logger.debug(f"Continuous reader for {language_id}: EOF") - return - - line_str = line.decode("ascii").strip() - if not line_str: - break # End of headers - - if line_str.lower().startswith("content-length:"): - content_length = int(line_str.split(":")[1].strip()) - - if content_length == 0: - continue - - # Read body - body = await state.reader.readexactly(content_length) - message = json.loads(body.decode("utf-8")) - - # Put message in queue for processing - await state.message_queue.put(message) - - msg_id = message.get("id", "none") - msg_method = message.get("method", "none") - logger.debug(f"Queued message: id={msg_id}, method={msg_method}") - - except asyncio.IncompleteReadError: - logger.debug(f"Continuous reader for {language_id}: IncompleteReadError") - return - except Exception as e: - logger.error(f"Error in continuous reader for {language_id}: {e}") - await asyncio.sleep(0.1) - - except asyncio.CancelledError: - logger.debug(f"Continuous reader cancelled for {language_id}") - except Exception as e: - logger.error(f"Fatal error in continuous reader for {language_id}: {e}") - - async def _process_messages(self, language_id: str) -> None: - """Process messages from the queue and route them appropriately. - - This is the CONSUMER in the producer-consumer pattern. It handles: - - Server requests (workspace/configuration, etc.) - responds immediately - - Notifications (window/logMessage, etc.) - logs them - - Responses to our requests are NOT handled here - they're consumed by _wait_for_response - """ - state = self._servers.get(language_id) - if not state: - return - - logger.debug(f"Message processor started for {language_id}") - - try: - while True: - # Get message from queue (blocks until available) - message = await state.message_queue.get() - - msg_id = message.get("id") - method = message.get("method", "") - - # Response (has id but no method) - put back for _wait_for_response to consume - if msg_id is not None and not method: - # This is a response to one of our requests - if msg_id in state.pending_requests: - future = state.pending_requests.pop(msg_id) - if "error" in message: - future.set_exception( - Exception(message["error"].get("message", "Unknown error")) - ) - else: - future.set_result(message.get("result")) - logger.debug(f"Response routed to pending request id={msg_id}") - else: - logger.debug(f"No pending request for response id={msg_id}") - - # Server request (has both id and method) - needs response - elif msg_id is not None and method: - logger.info(f"Server request: {method} (id={msg_id})") - await self._handle_server_request(state, message) - - # Notification (has method but no id) - elif method: - self._handle_server_message(language_id, message) - - state.message_queue.task_done() - - except asyncio.CancelledError: - logger.debug(f"Message processor cancelled for {language_id}") - except Exception as e: - logger.error(f"Error in message processor for {language_id}: {e}") - - async def _read_stderr(self, language_id: str, stderr: asyncio.StreamReader) -> None: - """Background task to read stderr from a language server. - - This prevents the stderr pipe buffer from filling up, which would - cause the language server process to block and stop responding. - """ - try: - while True: - line = await stderr.readline() - if not line: - break - text = line.decode("utf-8", errors="replace").rstrip() - if text: - # Log stderr output at warning level for visibility - logger.warning(f"[{language_id}] {text}") - except asyncio.CancelledError: - pass - except Exception as e: - logger.debug(f"Error reading stderr for {language_id}: {e}") - - def _handle_server_message(self, language_id: str, message: Dict[str, Any]) -> None: - """Handle notifications from the language server.""" - method = message.get("method", "") - params = message.get("params", {}) - - if method == "window/logMessage": - level = params.get("type", 4) # 1=error, 2=warn, 3=info, 4=log - text = params.get("message", "") - if level == 1: - logger.error(f"[{language_id}] {text}") - elif level == 2: - logger.warning(f"[{language_id}] {text}") - else: - logger.debug(f"[{language_id}] {text}") - - elif method == "window/showMessage": - text = params.get("message", "") - logger.info(f"[{language_id}] {text}") - - async def _handle_server_request(self, state: ServerState, message: Dict[str, Any]) -> None: - """Handle requests from the language server that need a response.""" - request_id = message["id"] - method = message.get("method", "") - params = message.get("params", {}) - - logger.info(f"SERVER REQUEST: {method} (id={request_id}) params={params}") - - result = None - - if method == "workspace/configuration": - # Return configuration items for each requested scope - items = params.get("items", []) - result = [] - for item in items: - section = item.get("section", "") - # Provide Python-specific settings for pyright - if section == "python": - result.append({ - "pythonPath": "python", - "analysis": { - "autoSearchPaths": True, - "useLibraryCodeForTypes": True, - "diagnosticMode": "workspace", - } - }) - elif section == "python.analysis": - result.append({ - "autoSearchPaths": True, - "useLibraryCodeForTypes": True, - "diagnosticMode": "workspace", - "typeCheckingMode": "basic", - }) - else: - # Return empty object for unknown sections - result.append({}) - sections = [item.get("section", "") for item in items] - logger.info(f"Responding to workspace/configuration with {len(result)} items for sections: {sections}") - - elif method == "client/registerCapability": - # Accept capability registration - result = None - - elif method == "window/workDoneProgress/create": - # Accept progress token creation - result = None - - else: - logger.debug(f"Unhandled server request: {method}") - - # Send response - response = { - "jsonrpc": "2.0", - "id": request_id, - "result": result, - } - try: - encoded = self._encode_message(response) - state.writer.write(encoded) - await state.writer.drain() - logger.debug(f"Sent response to server request {method} (id={request_id})") - except Exception as e: - logger.error(f"Failed to respond to server request {method}: {e}") - - async def _send_request( - self, - state: ServerState, - method: str, - params: Optional[Dict[str, Any]], - timeout: Optional[float] = None, - ) -> Any: - """Send a request to the language server and wait for response. - - Args: - state: Server state - method: LSP method name (e.g., "textDocument/definition") - params: Request parameters - timeout: Request timeout in seconds - - Returns: - Response result - """ - state.request_id += 1 - request_id = state.request_id - - message = { - "jsonrpc": "2.0", - "id": request_id, - "method": method, - "params": params or {}, - } - - future: asyncio.Future = asyncio.get_event_loop().create_future() - state.pending_requests[request_id] = future - - try: - encoded = self._encode_message(message) - logger.debug(f"Sending request id={request_id}, method={method}") - state.writer.write(encoded) - await state.writer.drain() - - return await asyncio.wait_for( - future, - timeout=timeout or self.timeout - ) - except asyncio.TimeoutError: - state.pending_requests.pop(request_id, None) - logger.warning(f"Request timed out: {method}") - return None - except Exception as e: - state.pending_requests.pop(request_id, None) - logger.error(f"Request failed: {method} - {e}") - return None - - async def _send_notification( - self, - state: ServerState, - method: str, - params: Optional[Dict[str, Any]], - ) -> None: - """Send a notification to the language server (no response expected).""" - message = { - "jsonrpc": "2.0", - "method": method, - "params": params or {}, - } - - try: - encoded = self._encode_message(message) - logger.debug(f"Sending notification: {method} ({len(encoded)} bytes)") - state.writer.write(encoded) - await state.writer.drain() - logger.debug(f"Notification sent: {method}") - except Exception as e: - logger.error(f"Failed to send notification {method}: {e}") - - def _to_text_document_identifier(self, file_path: str) -> Dict[str, str]: - """Create TextDocumentIdentifier from file path.""" - file_path = self._normalize_file_path(file_path) - uri = Path(file_path).resolve().as_uri() - return {"uri": uri} - - def _to_position(self, line: int, character: int) -> Dict[str, int]: - """Create LSP Position (0-indexed) from 1-indexed line/character.""" - return { - "line": max(0, line - 1), # Convert 1-indexed to 0-indexed - "character": max(0, character - 1), - } - - async def _open_document(self, state: ServerState, file_path: str) -> None: - """Send textDocument/didOpen notification.""" - file_path = self._normalize_file_path(file_path) - resolved_path = Path(file_path).resolve() - - # Fast path: already opened and unchanged (per-server cache). - try: - uri = resolved_path.as_uri() - except Exception: - uri = "" - - try: - file_mtime = float(resolved_path.stat().st_mtime) - except Exception: - file_mtime = 0.0 - - # Serialize open/change notifications per server to avoid races when - # multiple concurrent LSP requests target the same file. - async with state.opened_documents_lock: - existing = state.opened_documents.get(uri) if uri else None - if existing is not None and existing[1] == file_mtime: - return - - try: - content = resolved_path.read_text(encoding="utf-8") - except Exception as e: - logger.error(f"Failed to read file {file_path}: {e}") - return - - # Detect language ID from extension - language_id = self.get_language_id(file_path) or "plaintext" - - # Send didOpen only once per document; subsequent changes use didChange. - if existing is None: - version = 1 - logger.debug(f"Opening document: {resolved_path.name} ({len(content)} chars)") - await self._send_notification( - state, - "textDocument/didOpen", - { - "textDocument": { - "uri": uri or resolved_path.as_uri(), - "languageId": language_id, - "version": version, - "text": content, - } - }, - ) - else: - version = int(existing[0]) + 1 - logger.debug(f"Updating document: {resolved_path.name} ({len(content)} chars)") - await self._send_notification( - state, - "textDocument/didChange", - { - "textDocument": { - "uri": uri or resolved_path.as_uri(), - "version": version, - }, - "contentChanges": [{"text": content}], - }, - ) - - if uri: - state.opened_documents[uri] = (version, file_mtime) - - # ========== Public LSP Methods ========== - - async def get_definition( - self, - file_path: str, - line: int, - character: int, - ) -> Optional[Dict[str, Any]]: - """Get definition location for symbol at position. - - Args: - file_path: Path to the source file - line: Line number (1-indexed) - character: Character position (1-indexed) - - Returns: - Location dict with uri, line, character, or None - """ - state = await self._get_server(file_path) - if not state: - return None - - # Open document first - await self._open_document(state, file_path) - - result = await self._send_request(state, "textDocument/definition", { - "textDocument": self._to_text_document_identifier(file_path), - "position": self._to_position(line, character), - }) - - if not result: - return None - - # Handle single location or array - if isinstance(result, list): - if len(result) == 0: - return None - result = result[0] - - # Handle LocationLink vs Location - if "targetUri" in result: - # LocationLink format - return { - "uri": result["targetUri"], - "range": result.get("targetRange", result.get("targetSelectionRange", {})), - } - else: - # Location format - return result - - async def get_references( - self, - file_path: str, - line: int, - character: int, - include_declaration: bool = True, - ) -> List[Dict[str, Any]]: - """Get all references to symbol at position. - - Args: - file_path: Path to the source file - line: Line number (1-indexed) - character: Character position (1-indexed) - include_declaration: Whether to include the declaration - - Returns: - List of Location dicts with uri and range - """ - state = await self._get_server(file_path) - if not state: - return [] - - # Open document first - await self._open_document(state, file_path) - - result = await self._send_request(state, "textDocument/references", { - "textDocument": self._to_text_document_identifier(file_path), - "position": self._to_position(line, character), - "context": { - "includeDeclaration": include_declaration, - }, - }) - - if not result or not isinstance(result, list): - return [] - - return result - - async def get_hover( - self, - file_path: str, - line: int, - character: int, - ) -> Optional[str]: - """Get hover documentation for symbol at position. - - Args: - file_path: Path to the source file - line: Line number (1-indexed) - character: Character position (1-indexed) - - Returns: - Hover content as string, or None - """ - state = await self._get_server(file_path) - if not state: - return None - - # Open document first - await self._open_document(state, file_path) - - result = await self._send_request(state, "textDocument/hover", { - "textDocument": self._to_text_document_identifier(file_path), - "position": self._to_position(line, character), - }) - - if not result: - return None - - contents = result.get("contents") - if not contents: - return None - - # Parse contents (can be string, MarkedString, MarkupContent, or array) - return self._parse_hover_contents(contents) - - def _parse_hover_contents(self, contents: Any) -> Optional[str]: - """Parse hover contents into string.""" - if isinstance(contents, str): - return contents - - if isinstance(contents, dict): - # MarkupContent or MarkedString - return contents.get("value", contents.get("contents", "")) - - if isinstance(contents, list): - parts = [] - for item in contents: - if isinstance(item, str): - parts.append(item) - elif isinstance(item, dict): - parts.append(item.get("value", "")) - return "\n\n".join(p for p in parts if p) - - return None - - async def get_document_symbols( - self, - file_path: str, - ) -> List[Dict[str, Any]]: - """Get all symbols in a document. - - Args: - file_path: Path to the source file - - Returns: - List of DocumentSymbol or SymbolInformation dicts - """ - state = await self._get_server(file_path) - if not state: - return [] - - # Open document first - await self._open_document(state, file_path) - - result = await self._send_request(state, "textDocument/documentSymbol", { - "textDocument": self._to_text_document_identifier(file_path), - }) - - if not result or not isinstance(result, list): - return [] - - return result - - async def get_call_hierarchy_items( - self, - file_path: str, - line: int, - character: int, - wait_for_analysis: float = 2.0, - ) -> List[Dict[str, Any]]: - """Prepare call hierarchy items for a position. - - Args: - file_path: Path to the source file - line: Line number (1-indexed) - character: Character position (1-indexed) - wait_for_analysis: Time to wait for server analysis (seconds) - - Returns: - List of CallHierarchyItem dicts - """ - state = await self._get_server(file_path) - if not state: - return [] - - # Check if call hierarchy is supported - if not state.capabilities.get("callHierarchyProvider"): - return [] - - # Open document first - await self._open_document(state, file_path) - - # Wait for language server to complete analysis - # This is critical for Pyright to return valid call hierarchy items - if wait_for_analysis > 0: - await asyncio.sleep(wait_for_analysis) - - result = await self._send_request( - state, - "textDocument/prepareCallHierarchy", - { - "textDocument": self._to_text_document_identifier(file_path), - "position": self._to_position(line, character), - }, - ) - - if not result or not isinstance(result, list): - return [] - - return result - - async def get_incoming_calls( - self, - item: Dict[str, Any], - ) -> List[Dict[str, Any]]: - """Get incoming calls for a call hierarchy item. - - Args: - item: CallHierarchyItem from get_call_hierarchy_items - - Returns: - List of CallHierarchyIncomingCall dicts - """ - # Determine language from item's uri - uri = item.get("uri", "") - file_path = self._normalize_file_path(uri) - - state = await self._get_server(file_path) - if not state: - return [] - - result = await self._send_request( - state, - "callHierarchy/incomingCalls", - {"item": item}, - ) - - if not result or not isinstance(result, list): - return [] - - return result - - async def get_outgoing_calls( - self, - item: Dict[str, Any], - ) -> List[Dict[str, Any]]: - """Get outgoing calls for a call hierarchy item. - - Args: - item: CallHierarchyItem from get_call_hierarchy_items - - Returns: - List of CallHierarchyOutgoingCall dicts - """ - # Determine language from item's uri - uri = item.get("uri", "") - file_path = self._normalize_file_path(uri) - - state = await self._get_server(file_path) - if not state: - return [] - - result = await self._send_request( - state, - "callHierarchy/outgoingCalls", - {"item": item}, - ) - - if not result or not isinstance(result, list): - return [] - - return result - - async def __aenter__(self) -> "StandaloneLspManager": - """Async context manager entry.""" - await self.start() - return self - - async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: - """Async context manager exit - stop all servers.""" - await self.stop() - - -# Simple test -if __name__ == "__main__": - async def test_standalone_manager(): - """Test StandaloneLspManager functionality.""" - print("Testing StandaloneLspManager...") - print() - - # Find a Python file to test with - test_file = Path(__file__).resolve() - print(f"Test file: {test_file}") - print() - - async with StandaloneLspManager( - workspace_root=str(test_file.parent.parent.parent.parent), # codex-lens root - timeout=30.0, - ) as manager: - print("1. Testing get_document_symbols...") - symbols = await manager.get_document_symbols(str(test_file)) - print(f" Found {len(symbols)} symbols") - for sym in symbols[:5]: - name = sym.get("name", "?") - kind = sym.get("kind", "?") - print(f" - {name} (kind={kind})") - print() - - print("2. Testing get_definition...") - # Test definition for 'asyncio' import (line 11) - definition = await manager.get_definition(str(test_file), 11, 8) - if definition: - print(f" Definition: {definition}") - else: - print(" No definition found") - print() - - print("3. Testing get_hover...") - hover = await manager.get_hover(str(test_file), 11, 8) - if hover: - print(f" Hover: {hover[:200]}...") - else: - print(" No hover info") - print() - - print("4. Testing get_references...") - refs = await manager.get_references(str(test_file), 50, 10) - print(f" Found {len(refs)} references") - for ref in refs[:3]: - print(f" - {ref}") - - print() - print("Test complete!") - - # Run the test - # Note: On Windows, use default ProactorEventLoop (supports subprocess creation) - - asyncio.run(test_standalone_manager()) diff --git a/codex-lens/src/codexlens/mcp/__init__.py b/codex-lens/src/codexlens/mcp/__init__.py deleted file mode 100644 index 5bb171c3..00000000 --- a/codex-lens/src/codexlens/mcp/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -"""Model Context Protocol implementation for Claude Code integration.""" - -from codexlens.mcp.schema import ( - MCPContext, - SymbolInfo, - ReferenceInfo, - RelatedSymbol, -) -from codexlens.mcp.provider import MCPProvider -from codexlens.mcp.hooks import HookManager, create_context_for_prompt - -__all__ = [ - "MCPContext", - "SymbolInfo", - "ReferenceInfo", - "RelatedSymbol", - "MCPProvider", - "HookManager", - "create_context_for_prompt", -] diff --git a/codex-lens/src/codexlens/mcp/hooks.py b/codex-lens/src/codexlens/mcp/hooks.py deleted file mode 100644 index ad6a2021..00000000 --- a/codex-lens/src/codexlens/mcp/hooks.py +++ /dev/null @@ -1,170 +0,0 @@ -"""Hook interfaces for Claude Code integration.""" - -from __future__ import annotations - -import logging -from pathlib import Path -from typing import Any, Dict, Optional, Callable, TYPE_CHECKING - -from codexlens.mcp.schema import MCPContext - -if TYPE_CHECKING: - from codexlens.mcp.provider import MCPProvider - -logger = logging.getLogger(__name__) - - -class HookManager: - """Manages hook registration and execution.""" - - def __init__(self, mcp_provider: "MCPProvider") -> None: - self.mcp_provider = mcp_provider - self._pre_hooks: Dict[str, Callable] = {} - self._post_hooks: Dict[str, Callable] = {} - - # Register default hooks - self._register_default_hooks() - - def _register_default_hooks(self) -> None: - """Register built-in hooks.""" - self._pre_hooks["explain"] = self._pre_explain_hook - self._pre_hooks["refactor"] = self._pre_refactor_hook - self._pre_hooks["document"] = self._pre_document_hook - - def execute_pre_hook( - self, - action: str, - params: Dict[str, Any], - ) -> Optional[MCPContext]: - """Execute pre-tool hook to gather context. - - Args: - action: The action being performed (e.g., "explain", "refactor") - params: Parameters for the action - - Returns: - MCPContext to inject into prompt, or None - """ - hook = self._pre_hooks.get(action) - - if not hook: - logger.debug(f"No pre-hook for action: {action}") - return None - - try: - return hook(params) - except Exception as e: - logger.error(f"Pre-hook failed for {action}: {e}") - return None - - def execute_post_hook( - self, - action: str, - result: Any, - ) -> None: - """Execute post-tool hook for proactive caching. - - Args: - action: The action that was performed - result: Result of the action - """ - hook = self._post_hooks.get(action) - - if not hook: - return - - try: - hook(result) - except Exception as e: - logger.error(f"Post-hook failed for {action}: {e}") - - def _pre_explain_hook(self, params: Dict[str, Any]) -> Optional[MCPContext]: - """Pre-hook for 'explain' action.""" - symbol_name = params.get("symbol") - - if not symbol_name: - return None - - return self.mcp_provider.build_context( - symbol_name=symbol_name, - context_type="symbol_explanation", - include_references=True, - include_related=True, - ) - - def _pre_refactor_hook(self, params: Dict[str, Any]) -> Optional[MCPContext]: - """Pre-hook for 'refactor' action.""" - symbol_name = params.get("symbol") - - if not symbol_name: - return None - - return self.mcp_provider.build_context( - symbol_name=symbol_name, - context_type="refactor_context", - include_references=True, - include_related=True, - max_references=20, - ) - - def _pre_document_hook(self, params: Dict[str, Any]) -> Optional[MCPContext]: - """Pre-hook for 'document' action.""" - symbol_name = params.get("symbol") - file_path = params.get("file_path") - - if symbol_name: - return self.mcp_provider.build_context( - symbol_name=symbol_name, - context_type="documentation_context", - include_references=False, - include_related=True, - ) - elif file_path: - return self.mcp_provider.build_context_for_file( - Path(file_path), - context_type="file_documentation", - ) - - return None - - def register_pre_hook( - self, - action: str, - hook: Callable[[Dict[str, Any]], Optional[MCPContext]], - ) -> None: - """Register a custom pre-tool hook.""" - self._pre_hooks[action] = hook - - def register_post_hook( - self, - action: str, - hook: Callable[[Any], None], - ) -> None: - """Register a custom post-tool hook.""" - self._post_hooks[action] = hook - - -def create_context_for_prompt( - mcp_provider: "MCPProvider", - action: str, - params: Dict[str, Any], -) -> str: - """Create context string for prompt injection. - - This is the main entry point for Claude Code hook integration. - - Args: - mcp_provider: The MCP provider instance - action: Action being performed - params: Action parameters - - Returns: - Formatted context string for prompt injection - """ - manager = HookManager(mcp_provider) - context = manager.execute_pre_hook(action, params) - - if context: - return context.to_prompt_injection() - - return "" diff --git a/codex-lens/src/codexlens/mcp/provider.py b/codex-lens/src/codexlens/mcp/provider.py deleted file mode 100644 index 97ebc055..00000000 --- a/codex-lens/src/codexlens/mcp/provider.py +++ /dev/null @@ -1,202 +0,0 @@ -"""MCP context provider.""" - -from __future__ import annotations - -import logging -from pathlib import Path -from typing import Optional, List, TYPE_CHECKING - -from codexlens.mcp.schema import ( - MCPContext, - SymbolInfo, - ReferenceInfo, - RelatedSymbol, -) - -if TYPE_CHECKING: - from codexlens.storage.global_index import GlobalSymbolIndex - from codexlens.storage.registry import RegistryStore - from codexlens.search.chain_search import ChainSearchEngine - -logger = logging.getLogger(__name__) - - -class MCPProvider: - """Builds MCP context objects from codex-lens data.""" - - def __init__( - self, - global_index: "GlobalSymbolIndex", - search_engine: "ChainSearchEngine", - registry: "RegistryStore", - ) -> None: - self.global_index = global_index - self.search_engine = search_engine - self.registry = registry - - def build_context( - self, - symbol_name: str, - context_type: str = "symbol_explanation", - include_references: bool = True, - include_related: bool = True, - max_references: int = 10, - ) -> Optional[MCPContext]: - """Build comprehensive context for a symbol. - - Args: - symbol_name: Name of the symbol to contextualize - context_type: Type of context being requested - include_references: Whether to include reference locations - include_related: Whether to include related symbols - max_references: Maximum number of references to include - - Returns: - MCPContext object or None if symbol not found - """ - # Look up symbol - symbols = self.global_index.search(symbol_name, prefix_mode=False, limit=1) - - if not symbols: - logger.debug(f"Symbol not found for MCP context: {symbol_name}") - return None - - symbol = symbols[0] - - # Build SymbolInfo - symbol_info = SymbolInfo( - name=symbol.name, - kind=symbol.kind, - file_path=symbol.file or "", - line_start=symbol.range[0], - line_end=symbol.range[1], - signature=None, # Symbol entity doesn't have signature - documentation=None, # Symbol entity doesn't have docstring - ) - - # Extract definition source code - definition = self._extract_definition(symbol) - - # Get references - references = [] - if include_references: - refs = self.search_engine.search_references( - symbol_name, - limit=max_references, - ) - references = [ - ReferenceInfo( - file_path=r.file_path, - line=r.line, - column=r.column, - context=r.context, - relationship_type=r.relationship_type, - ) - for r in refs - ] - - # Get related symbols - related_symbols = [] - if include_related: - related_symbols = self._get_related_symbols(symbol) - - return MCPContext( - context_type=context_type, - symbol=symbol_info, - definition=definition, - references=references, - related_symbols=related_symbols, - metadata={ - "source": "codex-lens", - }, - ) - - def _extract_definition(self, symbol) -> Optional[str]: - """Extract source code for symbol definition.""" - try: - file_path = Path(symbol.file) if symbol.file else None - if not file_path or not file_path.exists(): - return None - - content = file_path.read_text(encoding='utf-8', errors='ignore') - lines = content.split("\n") - - start = symbol.range[0] - 1 - end = symbol.range[1] - - if start >= len(lines): - return None - - return "\n".join(lines[start:end]) - except Exception as e: - logger.debug(f"Failed to extract definition: {e}") - return None - - def _get_related_symbols(self, symbol) -> List[RelatedSymbol]: - """Get symbols related to the given symbol.""" - related = [] - - try: - # Search for symbols that might be related by name patterns - # This is a simplified implementation - could be enhanced with relationship data - - # Look for imports/callers via reference search - refs = self.search_engine.search_references(symbol.name, limit=20) - - seen_names = set() - for ref in refs: - # Extract potential symbol name from context - if ref.relationship_type and ref.relationship_type not in seen_names: - related.append(RelatedSymbol( - name=f"{Path(ref.file_path).stem}", - kind="module", - relationship=ref.relationship_type, - file_path=ref.file_path, - )) - seen_names.add(ref.relationship_type) - if len(related) >= 10: - break - - except Exception as e: - logger.debug(f"Failed to get related symbols: {e}") - - return related - - def build_context_for_file( - self, - file_path: Path, - context_type: str = "file_overview", - ) -> MCPContext: - """Build context for an entire file.""" - # Try to get symbols by searching with file path - # Note: GlobalSymbolIndex doesn't have search_by_file, so we use a different approach - symbols = [] - - # Search for common symbols that might be in this file - # This is a simplified approach - a full implementation would query by file path - try: - # Use the global index to search for symbols from this file - file_str = str(file_path.resolve()) - # Get all symbols and filter by file path (not efficient but works) - all_symbols = self.global_index.search("", prefix_mode=True, limit=1000) - symbols = [s for s in all_symbols if s.file and str(Path(s.file).resolve()) == file_str] - except Exception as e: - logger.debug(f"Failed to get file symbols: {e}") - - related = [ - RelatedSymbol( - name=s.name, - kind=s.kind, - relationship="defines", - ) - for s in symbols - ] - - return MCPContext( - context_type=context_type, - related_symbols=related, - metadata={ - "file_path": str(file_path), - "symbol_count": len(symbols), - }, - ) diff --git a/codex-lens/src/codexlens/mcp/schema.py b/codex-lens/src/codexlens/mcp/schema.py deleted file mode 100644 index 1062e626..00000000 --- a/codex-lens/src/codexlens/mcp/schema.py +++ /dev/null @@ -1,113 +0,0 @@ -"""MCP data models.""" - -from __future__ import annotations - -import json -from dataclasses import dataclass, field, asdict -from typing import List, Optional - - -@dataclass -class SymbolInfo: - """Information about a code symbol.""" - name: str - kind: str - file_path: str - line_start: int - line_end: int - signature: Optional[str] = None - documentation: Optional[str] = None - - def to_dict(self) -> dict: - return {k: v for k, v in asdict(self).items() if v is not None} - - -@dataclass -class ReferenceInfo: - """Information about a symbol reference.""" - file_path: str - line: int - column: int - context: str - relationship_type: str - - def to_dict(self) -> dict: - return asdict(self) - - -@dataclass -class RelatedSymbol: - """Related symbol (import, call target, etc.).""" - name: str - kind: str - relationship: str # "imports", "calls", "inherits", "uses" - file_path: Optional[str] = None - - def to_dict(self) -> dict: - return {k: v for k, v in asdict(self).items() if v is not None} - - -@dataclass -class MCPContext: - """Model Context Protocol context object. - - This is the structured context that gets injected into - LLM prompts to provide code understanding. - """ - version: str = "1.0" - context_type: str = "code_context" - symbol: Optional[SymbolInfo] = None - definition: Optional[str] = None - references: List[ReferenceInfo] = field(default_factory=list) - related_symbols: List[RelatedSymbol] = field(default_factory=list) - metadata: dict = field(default_factory=dict) - - def to_dict(self) -> dict: - """Convert to dictionary for JSON serialization.""" - result = { - "version": self.version, - "context_type": self.context_type, - "metadata": self.metadata, - } - - if self.symbol: - result["symbol"] = self.symbol.to_dict() - if self.definition: - result["definition"] = self.definition - if self.references: - result["references"] = [r.to_dict() for r in self.references] - if self.related_symbols: - result["related_symbols"] = [s.to_dict() for s in self.related_symbols] - - return result - - def to_json(self, indent: int = 2) -> str: - """Serialize to JSON string.""" - return json.dumps(self.to_dict(), indent=indent) - - def to_prompt_injection(self) -> str: - """Format for injection into LLM prompt.""" - parts = [""] - - if self.symbol: - parts.append(f"## Symbol: {self.symbol.name}") - parts.append(f"Type: {self.symbol.kind}") - parts.append(f"Location: {self.symbol.file_path}:{self.symbol.line_start}") - - if self.definition: - parts.append("\n## Definition") - parts.append(f"```\n{self.definition}\n```") - - if self.references: - parts.append(f"\n## References ({len(self.references)} found)") - for ref in self.references[:5]: # Limit to 5 - parts.append(f"- {ref.file_path}:{ref.line} ({ref.relationship_type})") - parts.append(f" ```\n {ref.context}\n ```") - - if self.related_symbols: - parts.append("\n## Related Symbols") - for sym in self.related_symbols[:10]: # Limit to 10 - parts.append(f"- {sym.name} ({sym.relationship})") - - parts.append("") - return "\n".join(parts) diff --git a/codex-lens/src/codexlens/parsers/__init__.py b/codex-lens/src/codexlens/parsers/__init__.py deleted file mode 100644 index a96ed9a7..00000000 --- a/codex-lens/src/codexlens/parsers/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -"""Parsers for CodexLens.""" - -from __future__ import annotations - -from .factory import ParserFactory -from .astgrep_binding import AstGrepBinding, is_astgrep_available, get_supported_languages - -__all__ = [ - "ParserFactory", - "AstGrepBinding", - "is_astgrep_available", - "get_supported_languages", -] - diff --git a/codex-lens/src/codexlens/parsers/astgrep_binding.py b/codex-lens/src/codexlens/parsers/astgrep_binding.py deleted file mode 100644 index 16985961..00000000 --- a/codex-lens/src/codexlens/parsers/astgrep_binding.py +++ /dev/null @@ -1,320 +0,0 @@ -"""ast-grep based parser binding for CodexLens. - -Provides AST-level pattern matching via ast-grep-py (PyO3 bindings). - -Note: This module wraps the official ast-grep Python bindings for pattern-based -code analysis. If ast-grep-py is unavailable, the parser returns None gracefully. -Callers should use tree-sitter or regex-based fallbacks. -""" - -from __future__ import annotations - -import logging -from pathlib import Path -from typing import Dict, List, Optional, Tuple - -# Import patterns from centralized definition (avoid duplication) -from codexlens.parsers.patterns.python import get_pattern, PYTHON_PATTERNS - -# Graceful import pattern following treesitter_parser.py convention -try: - from ast_grep_py import SgNode, SgRoot - ASTGREP_AVAILABLE = True -except ImportError: - SgNode = None # type: ignore[assignment,misc] - SgRoot = None # type: ignore[assignment,misc] - ASTGREP_AVAILABLE = False - -log = logging.getLogger(__name__) - - -class AstGrepBinding: - """Wrapper for ast-grep-py bindings with CodexLens integration. - - Provides pattern-based AST matching for code relationship extraction. - Uses declarative patterns with metavariables ($A, $$ARGS) for matching. - """ - - # Language ID mapping to ast-grep language names - LANGUAGE_MAP = { - "python": "python", - "javascript": "javascript", - "typescript": "typescript", - "tsx": "tsx", - } - - def __init__(self, language_id: str, path: Optional[Path] = None) -> None: - """Initialize ast-grep binding for a language. - - Args: - language_id: Language identifier (python, javascript, typescript, tsx) - path: Optional file path for language variant detection - """ - self.language_id = language_id - self.path = path - self._language: Optional[str] = None - self._root: Optional[SgRoot] = None # type: ignore[valid-type] - - if ASTGREP_AVAILABLE: - self._initialize_language() - - def _initialize_language(self) -> None: - """Initialize ast-grep language setting.""" - # Detect TSX from file extension - if self.language_id == "typescript" and self.path is not None: - if self.path.suffix.lower() == ".tsx": - self._language = "tsx" - return - - self._language = self.LANGUAGE_MAP.get(self.language_id) - - def is_available(self) -> bool: - """Check if ast-grep binding is available and ready. - - Returns: - True if ast-grep-py is installed and language is supported - """ - return ASTGREP_AVAILABLE and self._language is not None - - def parse(self, source_code: str) -> bool: - """Parse source code into ast-grep syntax tree. - - Args: - source_code: Source code text to parse - - Returns: - True if parsing succeeds, False otherwise - """ - if not self.is_available() or SgRoot is None: - return False - - try: - self._root = SgRoot(source_code, self._language) # type: ignore[misc] - return True - except (ValueError, TypeError, RuntimeError) as e: - log.debug(f"ast-grep parse error: {e}") - self._root = None - return False - - def find_all(self, pattern: str) -> List[SgNode]: # type: ignore[valid-type] - """Find all matches for a pattern in the parsed source. - - Args: - pattern: ast-grep pattern string (e.g., "class $NAME($$$BASES) $$$BODY") - - Returns: - List of matching SgNode objects, empty if no matches or not parsed - """ - if not self.is_available() or self._root is None: - return [] - - try: - root_node = self._root.root() - # ast-grep-py 0.40+ requires dict config format - config = {"rule": {"pattern": pattern}} - return list(root_node.find_all(config)) - except (ValueError, TypeError, AttributeError) as e: - log.debug(f"ast-grep find_all error: {e}") - return [] - - def find_inheritance(self) -> List[Dict[str, str]]: - """Find all class inheritance declarations. - - Returns: - List of dicts with 'class_name' and 'bases' keys - """ - if self.language_id != "python": - return [] - - matches = self.find_all(get_pattern("class_with_bases")) - results: List[Dict[str, str]] = [] - - for node in matches: - class_name = self._get_match(node, "NAME") - if class_name: - results.append({ - "class_name": class_name, - "bases": self._get_match(node, "BASES"), # Base classes text - }) - - return results - - def find_calls(self) -> List[Dict[str, str]]: - """Find all function/method calls. - - Returns: - List of dicts with 'function' and 'line' keys - """ - if self.language_id != "python": - return [] - - matches = self.find_all(get_pattern("call")) - results: List[Dict[str, str]] = [] - - for node in matches: - func_name = self._get_match(node, "FUNC") - if func_name: - # Skip self. and cls. prefixed calls - base = func_name.split(".", 1)[0] - if base not in {"self", "cls"}: - results.append({ - "function": func_name, - "line": str(self._get_line_number(node)), - }) - - return results - - def find_imports(self) -> List[Dict[str, str]]: - """Find all import statements. - - Returns: - List of dicts with 'module' and 'type' keys - """ - if self.language_id != "python": - return [] - - results: List[Dict[str, str]] = [] - - # Find 'import X' statements - import_matches = self.find_all(get_pattern("import_stmt")) - for node in import_matches: - module = self._get_match(node, "MODULE") - if module: - results.append({ - "module": module, - "type": "import", - "line": str(self._get_line_number(node)), - }) - - # Find 'from X import Y' statements - from_matches = self.find_all(get_pattern("import_from")) - for node in from_matches: - module = self._get_match(node, "MODULE") - names = self._get_match(node, "NAMES") - if module: - results.append({ - "module": module, - "names": names or "", - "type": "from_import", - "line": str(self._get_line_number(node)), - }) - - return results - - def _get_match(self, node: SgNode, metavar: str) -> str: # type: ignore[valid-type] - """Extract matched metavariable value from node. - - Args: - node: SgNode with match - metavar: Metavariable name (without $ prefix) - - Returns: - Matched text or empty string - """ - if node is None: - return "" - try: - match = node.get_match(metavar) - if match is not None: - return match.text() - except (ValueError, AttributeError, KeyError) as e: - log.debug(f"ast-grep get_match error for {metavar}: {e}") - return "" - - def _get_node_text(self, node: SgNode) -> str: # type: ignore[valid-type] - """Get full text of a node. - - Args: - node: SgNode to extract text from - - Returns: - Node's text content - """ - if node is None: - return "" - try: - return node.text() - except (ValueError, AttributeError) as e: - log.debug(f"ast-grep get_node_text error: {e}") - return "" - - def _get_line_number(self, node: SgNode) -> int: # type: ignore[valid-type] - """Get starting line number of a node. - - Args: - node: SgNode to get line number for - - Returns: - 1-based line number - """ - if node is None: - return 0 - try: - range_info = node.range() - # ast-grep-py 0.40+ returns Range object with .start.line attribute - if hasattr(range_info, 'start') and hasattr(range_info.start, 'line'): - return range_info.start.line + 1 # Convert to 1-based - # Fallback for string format "(0,0)-(1,8)" - if isinstance(range_info, str) and range_info: - start_part = range_info.split('-')[0].strip('()') - start_line = int(start_part.split(',')[0]) - return start_line + 1 - except (ValueError, AttributeError, TypeError, IndexError) as e: - log.debug(f"ast-grep get_line_number error: {e}") - return 0 - - def _get_line_range(self, node: SgNode) -> Tuple[int, int]: # type: ignore[valid-type] - """Get line range (start, end) of a node. - - Args: - node: SgNode to get line range for - - Returns: - Tuple of (start_line, end_line), both 1-based inclusive - """ - if node is None: - return (0, 0) - try: - range_info = node.range() - # ast-grep-py 0.40+ returns Range object with .start.line and .end.line - if hasattr(range_info, 'start') and hasattr(range_info, 'end'): - start_line = getattr(range_info.start, 'line', 0) - end_line = getattr(range_info.end, 'line', 0) - return (start_line + 1, end_line + 1) # Convert to 1-based - # Fallback for string format "(0,0)-(1,8)" - if isinstance(range_info, str) and range_info: - parts = range_info.split('-') - start_part = parts[0].strip('()') - end_part = parts[1].strip('()') - start_line = int(start_part.split(',')[0]) - end_line = int(end_part.split(',')[0]) - return (start_line + 1, end_line + 1) - except (ValueError, AttributeError, TypeError, IndexError) as e: - log.debug(f"ast-grep get_line_range error: {e}") - return (0, 0) - - def get_language(self) -> Optional[str]: - """Get the configured ast-grep language. - - Returns: - Language string or None if not configured - """ - return self._language - - -def is_astgrep_available() -> bool: - """Check if ast-grep-py is installed and available. - - Returns: - True if ast-grep bindings can be imported - """ - return ASTGREP_AVAILABLE - - -def get_supported_languages() -> List[str]: - """Get list of supported languages for ast-grep. - - Returns: - List of language identifiers - """ - return list(AstGrepBinding.LANGUAGE_MAP.keys()) diff --git a/codex-lens/src/codexlens/parsers/astgrep_js_ts_processor.py b/codex-lens/src/codexlens/parsers/astgrep_js_ts_processor.py deleted file mode 100644 index beff64d9..00000000 --- a/codex-lens/src/codexlens/parsers/astgrep_js_ts_processor.py +++ /dev/null @@ -1,306 +0,0 @@ -"""Ast-grep processors for JavaScript/TypeScript relationship extraction. - -These processors are intentionally narrower than the tree-sitter relationship -extractor: they focus on stable, high-signal edges for static graph usage: -- IMPORTS: ES module imports + CommonJS require() (string literal only) -- INHERITS: class/interface extends - -They are used when Config.use_astgrep is True. -""" - -from __future__ import annotations - -import re -from pathlib import Path -from typing import Callable, List, Optional, Sequence, Set, Tuple - -from codexlens.entities import CodeRelationship, IndexedFile, RelationshipType -from codexlens.parsers.astgrep_processor import BaseAstGrepProcessor - - -_IDENT_RE = re.compile(r"^[A-Za-z_$][A-Za-z0-9_$]*$") -_BRACE_IMPORT_RE = re.compile( - r"\bimport\s+(?:type\s+)?(?:[A-Za-z_$][A-Za-z0-9_$]*\s*,\s*)?\{\s*(?P[^}]*)\}\s*from\b", - re.MULTILINE, -) - - -def _strip_quotes(value: str) -> str: - value = (value or "").strip() - if len(value) >= 2 and value[0] == value[-1] and value[0] in {"'", '"', "`"}: - return value[1:-1] - return value - - -def _module_from_literal(raw: str) -> str: - raw = (raw or "").strip() - if not raw: - return "" - return _strip_quotes(raw).strip() - - -def _extract_named_imports(raw: str) -> List[str]: - raw = (raw or "").strip() - if not raw: - return [] - - # Normalize any surrounding braces the match might include. - if raw.startswith("{") and raw.endswith("}"): - raw = raw[1:-1].strip() - - # Split by commas at top-level; named imports do not nest in JS/TS syntax. - parts = [p.strip() for p in raw.split(",") if p.strip()] - names: List[str] = [] - for part in parts: - # TS: "type Foo" inside braces - if part.startswith("type "): - part = part[5:].strip() - # Handle `foo as bar` (TS) / `foo as bar` (proposed) / `foo as bar`-style text. - if " as " in part: - part = part.split(" as ", 1)[0].strip() - if _IDENT_RE.match(part): - names.append(part) - return names - - -def _extract_brace_import_names(statement: str) -> str: - statement = (statement or "").strip() - if not statement: - return "" - match = _BRACE_IMPORT_RE.search(statement) - if not match: - return "" - return (match.group("names") or "").strip() - - -def _dedupe_relationships(rels: Sequence[CodeRelationship]) -> List[CodeRelationship]: - seen: Set[Tuple[str, str, str]] = set() - out: List[CodeRelationship] = [] - for r in rels: - key = (r.source_symbol, r.target_symbol, r.relationship_type.value) - if key in seen: - continue - seen.add(key) - out.append(r) - return out - - -class _AstGrepJsTsProcessor(BaseAstGrepProcessor): - def __init__( - self, - language_id: str, - *, - path: Optional[Path] = None, - get_pattern: Callable[[str], str], - ) -> None: - super().__init__(language_id, path) - self._get_pattern = get_pattern - - def parse(self, text: str, path: Path) -> Optional[IndexedFile]: - if not self.is_available(): - return None - - try: - relationships = self._extract_relationships(text, path) - return IndexedFile( - path=str(path.resolve()), - language=self.language_id, - symbols=[], - chunks=[], - relationships=relationships, - ) - except Exception: - return None - - def process_matches( # type: ignore[override] - self, - matches, # SgNode list (runtime-only type) - source_code: str, - path: Path, - ) -> List[CodeRelationship]: - # Not used by the current JS/TS processors; keep the interface for parity. - _ = (matches, source_code, path) - return [] - - def _extract_relationships(self, source_code: str, path: Path) -> List[CodeRelationship]: - source_file = str(path.resolve()) - rels: List[CodeRelationship] = [] - - rels.extend(self._extract_imports(source_code, source_file=source_file)) - rels.extend(self._extract_inherits(source_code, source_file=source_file)) - - return _dedupe_relationships(rels) - - def _extract_imports(self, source_code: str, *, source_file: str) -> List[CodeRelationship]: - rels: List[CodeRelationship] = [] - - def record(module_name: str, line: int) -> None: - if not module_name: - return - rels.append( - CodeRelationship( - source_symbol="", - target_symbol=module_name, - relationship_type=RelationshipType.IMPORTS, - source_file=source_file, - target_file=None, - source_line=line, - ) - ) - - # Any `import ... from "mod"` form - for pat_name in ("import_from_dq", "import_from_sq"): - for node in self.run_ast_grep(source_code, self._get_pattern(pat_name)): - mod = _module_from_literal(self._get_match(node, "MODULE")) - if mod: - record(mod, self._get_line_number(node)) - - # Side-effect import: import "mod" - for pat_name in ("import_side_effect_dq", "import_side_effect_sq"): - for node in self.run_ast_grep(source_code, self._get_pattern(pat_name)): - mod = _module_from_literal(self._get_match(node, "MODULE")) - if mod: - record(mod, self._get_line_number(node)) - - # Named imports (named-only): import { a, b as c } from "mod" - for pat_name in ("import_named_only_dq", "import_named_only_sq"): - for node in self.run_ast_grep(source_code, self._get_pattern(pat_name)): - mod = _module_from_literal(self._get_match(node, "MODULE")) - if not mod: - continue - raw_names = _extract_brace_import_names(self._get_node_text(node)) - for name in _extract_named_imports(raw_names): - record(f"{mod}.{name}", self._get_line_number(node)) - - # Named imports (default + named): import X, { a, b as c } from "mod" - for pat_name in ("import_default_named_dq", "import_default_named_sq"): - for node in self.run_ast_grep(source_code, self._get_pattern(pat_name)): - mod = _module_from_literal(self._get_match(node, "MODULE")) - if not mod: - continue - raw_names = _extract_brace_import_names(self._get_node_text(node)) - for name in _extract_named_imports(raw_names): - record(f"{mod}.{name}", self._get_line_number(node)) - - # CommonJS require("mod") (string literal only) - for pat_name in ("require_call_dq", "require_call_sq"): - for node in self.run_ast_grep(source_code, self._get_pattern(pat_name)): - mod = _module_from_literal(self._get_match(node, "MODULE")) - if mod: - record(mod, self._get_line_number(node)) - - return rels - - def _extract_inherits(self, source_code: str, *, source_file: str) -> List[CodeRelationship]: - rels: List[CodeRelationship] = [] - - for node in self.run_ast_grep(source_code, self._get_pattern("class_extends")): - class_name = (self._get_match(node, "NAME") or "").strip() - base_raw = (self._get_match(node, "BASE") or "").strip() - if not class_name or not base_raw: - continue - base = base_raw.split("<", 1)[0].strip() - if not base: - continue - rels.append( - CodeRelationship( - source_symbol=class_name, - target_symbol=base, - relationship_type=RelationshipType.INHERITS, - source_file=source_file, - target_file=None, - source_line=self._get_line_number(node), - ) - ) - - return rels - - -class AstGrepJavaScriptProcessor(_AstGrepJsTsProcessor): - def __init__(self, path: Optional[Path] = None) -> None: - from codexlens.parsers.patterns.javascript import get_pattern as get_js_pattern - - super().__init__("javascript", path=path, get_pattern=get_js_pattern) - - -class AstGrepTypeScriptProcessor(_AstGrepJsTsProcessor): - def __init__(self, path: Optional[Path] = None) -> None: - from codexlens.parsers.patterns.typescript import get_pattern as get_ts_pattern - - super().__init__("typescript", path=path, get_pattern=get_ts_pattern) - - def _extract_inherits(self, source_code: str, *, source_file: str) -> List[CodeRelationship]: - rels = super()._extract_inherits(source_code, source_file=source_file) - - # Interface extends: interface Foo extends Bar {} - for node in self.run_ast_grep(source_code, self._get_pattern("interface_extends")): - name = (self._get_match(node, "NAME") or "").strip() - base_raw = (self._get_match(node, "BASE") or "").strip() - if not name or not base_raw: - continue - base = base_raw.split("<", 1)[0].strip() - if not base: - continue - rels.append( - CodeRelationship( - source_symbol=name, - target_symbol=base, - relationship_type=RelationshipType.INHERITS, - source_file=source_file, - target_file=None, - source_line=self._get_line_number(node), - ) - ) - - return _dedupe_relationships(rels) - - def _extract_imports(self, source_code: str, *, source_file: str) -> List[CodeRelationship]: - # Reuse JS logic for standard imports - rels = super()._extract_imports(source_code, source_file=source_file) - - def record(module_name: str, line: int) -> None: - if not module_name: - return - rels.append( - CodeRelationship( - source_symbol="", - target_symbol=module_name, - relationship_type=RelationshipType.IMPORTS, - source_file=source_file, - target_file=None, - source_line=line, - ) - ) - - # Type-only imports: import type ... from "mod" - for pat_name in ("import_type_from_dq", "import_type_from_sq"): - for node in self.run_ast_grep(source_code, self._get_pattern(pat_name)): - mod = _module_from_literal(self._get_match(node, "MODULE")) - if mod: - record(mod, self._get_line_number(node)) - - for pat_name in ("import_type_named_only_dq", "import_type_named_only_sq"): - for node in self.run_ast_grep(source_code, self._get_pattern(pat_name)): - mod = _module_from_literal(self._get_match(node, "MODULE")) - if not mod: - continue - raw_names = _extract_brace_import_names(self._get_node_text(node)) - for name in _extract_named_imports(raw_names): - record(f"{mod}.{name}", self._get_line_number(node)) - - for pat_name in ("import_type_default_named_dq", "import_type_default_named_sq"): - for node in self.run_ast_grep(source_code, self._get_pattern(pat_name)): - mod = _module_from_literal(self._get_match(node, "MODULE")) - if not mod: - continue - raw_names = _extract_brace_import_names(self._get_node_text(node)) - for name in _extract_named_imports(raw_names): - record(f"{mod}.{name}", self._get_line_number(node)) - - return _dedupe_relationships(rels) - - -__all__ = [ - "AstGrepJavaScriptProcessor", - "AstGrepTypeScriptProcessor", -] diff --git a/codex-lens/src/codexlens/parsers/astgrep_processor.py b/codex-lens/src/codexlens/parsers/astgrep_processor.py deleted file mode 100644 index 9e2546d0..00000000 --- a/codex-lens/src/codexlens/parsers/astgrep_processor.py +++ /dev/null @@ -1,1033 +0,0 @@ -"""Ast-grep based processor for Python relationship extraction. - -Provides pattern-based AST matching for extracting code relationships -(inheritance, calls, imports) from Python source code. - -This processor wraps the ast-grep-py bindings and provides a higher-level -interface for relationship extraction, similar to TreeSitterSymbolParser. - -Design Pattern: - - Follows TreeSitterSymbolParser class structure for consistency - - Uses declarative patterns defined in patterns/python/__init__.py - - Provides scope-aware relationship extraction with alias resolution -""" - -from __future__ import annotations - -from abc import ABC, abstractmethod -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple - -from codexlens.entities import CodeRelationship, IndexedFile, RelationshipType, Symbol - -# Import patterns module -from codexlens.parsers.patterns.python import ( - PYTHON_PATTERNS, - get_pattern, - get_metavar, -) - -# Graceful import pattern following existing convention -try: - from ast_grep_py import SgNode, SgRoot - from codexlens.parsers.astgrep_binding import AstGrepBinding, ASTGREP_AVAILABLE -except ImportError: - SgNode = None # type: ignore[assignment,misc] - SgRoot = None # type: ignore[assignment,misc] - AstGrepBinding = None # type: ignore[assignment,misc] - ASTGREP_AVAILABLE = False - - -class BaseAstGrepProcessor(ABC): - """Abstract base class for ast-grep based processors. - - Provides common infrastructure for pattern-based AST processing. - Subclasses implement language-specific pattern processing logic. - """ - - def __init__(self, language_id: str, path: Optional[Path] = None) -> None: - """Initialize processor for a language. - - Args: - language_id: Language identifier (python, javascript, typescript) - path: Optional file path for language variant detection - """ - self.language_id = language_id - self.path = path - self._binding: Optional[AstGrepBinding] = None - - if ASTGREP_AVAILABLE and AstGrepBinding is not None: - self._binding = AstGrepBinding(language_id, path) - - def is_available(self) -> bool: - """Check if ast-grep processor is available. - - Returns: - True if ast-grep binding is ready - """ - return self._binding is not None and self._binding.is_available() - - def run_ast_grep(self, source_code: str, pattern: str) -> List[SgNode]: # type: ignore[valid-type] - """Execute ast-grep pattern matching on source code. - - Args: - source_code: Source code text to analyze - pattern: ast-grep pattern string - - Returns: - List of matching SgNode objects, empty if no matches or unavailable - """ - if not self.is_available() or self._binding is None: - return [] - - if not self._binding.parse(source_code): - return [] - - return self._binding.find_all(pattern) - - def _get_match(self, node: SgNode, metavar: str) -> str: # type: ignore[valid-type] - """Extract matched metavariable value from node (best-effort).""" - if self._binding is None or node is None: - return "" - return self._binding._get_match(node, metavar) - - def _get_line_number(self, node: SgNode) -> int: # type: ignore[valid-type] - """Get 1-based starting line number of a node (best-effort).""" - if self._binding is None or node is None: - return 0 - return self._binding._get_line_number(node) - - def _get_line_range(self, node: SgNode) -> Tuple[int, int]: # type: ignore[valid-type] - """Get (start_line, end_line) range of a node (best-effort).""" - if self._binding is None or node is None: - return (0, 0) - return self._binding._get_line_range(node) - - def _get_node_text(self, node: SgNode) -> str: # type: ignore[valid-type] - """Get the full text of a node (best-effort).""" - if self._binding is None or node is None: - return "" - return self._binding._get_node_text(node) - - @abstractmethod - def process_matches( - self, - matches: List[SgNode], # type: ignore[valid-type] - source_code: str, - path: Path, - ) -> List[CodeRelationship]: - """Process ast-grep matches into code relationships. - - Args: - matches: List of matched SgNode objects - source_code: Original source code - path: File path being processed - - Returns: - List of extracted code relationships - """ - pass - - @abstractmethod - def parse(self, text: str, path: Path) -> Optional[IndexedFile]: - """Parse source code and extract relationships. - - Args: - text: Source code text - path: File path - - Returns: - IndexedFile with symbols and relationships, None if unavailable - """ - pass - - -class AstGrepPythonProcessor(BaseAstGrepProcessor): - """Python-specific ast-grep processor for relationship extraction. - - Extracts INHERITS, CALLS, and IMPORTS relationships from Python code - using declarative ast-grep patterns with scope-aware processing. - """ - - def __init__(self, path: Optional[Path] = None) -> None: - """Initialize Python processor. - - Args: - path: Optional file path (for consistency with base class) - """ - super().__init__("python", path) - - def parse(self, text: str, path: Path) -> Optional[IndexedFile]: - """Parse Python source code and extract relationships. - - Args: - text: Python source code text - path: File path - - Returns: - IndexedFile with symbols and relationships, None if unavailable - """ - if not self.is_available(): - return None - - try: - symbols = self._extract_symbols(text) - relationships = self._extract_relationships(text, path) - - return IndexedFile( - path=str(path.resolve()), - language="python", - symbols=symbols, - chunks=[], - relationships=relationships, - ) - except (ValueError, TypeError, AttributeError) as e: - # Log specific parsing errors for debugging - import logging - logging.getLogger(__name__).debug(f"ast-grep parsing error: {e}") - return None - - def _extract_symbols(self, source_code: str) -> List[Symbol]: - """Extract Python symbols (classes, functions, methods). - - Args: - source_code: Python source code - - Returns: - List of Symbol objects - """ - symbols: List[Symbol] = [] - - # Collect all scope definitions with line ranges for proper method detection - # Format: (start_line, end_line, kind, name) - scope_defs: List[Tuple[int, int, str, str]] = [] - - # Track async function positions to avoid duplicates - async_positions: set = set() - - # Extract class definitions - class_matches = self.run_ast_grep(source_code, get_pattern("class_def")) - for node in class_matches: - name = self._get_match(node, "NAME") - if name: - start_line, end_line = self._get_line_range(node) - scope_defs.append((start_line, end_line, "class", name)) - - # Extract async function definitions FIRST (before regular functions) - async_matches = self.run_ast_grep(source_code, get_pattern("async_func_def")) - for node in async_matches: - name = self._get_match(node, "NAME") - if name: - start_line, end_line = self._get_line_range(node) - scope_defs.append((start_line, end_line, "function", name)) - async_positions.add(start_line) # Mark this position as async - - # Extract function definitions (skip those already captured as async) - func_matches = self.run_ast_grep(source_code, get_pattern("func_def")) - for node in func_matches: - name = self._get_match(node, "NAME") - if name: - start_line, end_line = self._get_line_range(node) - # Skip if already captured as async function (same position) - if start_line not in async_positions: - scope_defs.append((start_line, end_line, "function", name)) - - # Sort by start line for scope-aware processing - scope_defs.sort(key=lambda x: x[0]) - - # Process with scope tracking to determine method vs function - scope_stack: List[Tuple[str, int, str]] = [] # (name, end_line, kind) - - for start_line, end_line, kind, name in scope_defs: - # Pop scopes that have ended - while scope_stack and scope_stack[-1][1] < start_line: - scope_stack.pop() - - if kind == "class": - symbols.append(Symbol( - name=name, - kind="class", - range=(start_line, end_line), - )) - scope_stack.append((name, end_line, "class")) - else: # function - # Determine if it's a method (inside a class) or function - is_method = bool(scope_stack) and scope_stack[-1][2] == "class" - symbols.append(Symbol( - name=name, - kind="method" if is_method else "function", - range=(start_line, end_line), - )) - scope_stack.append((name, end_line, "function")) - - return symbols - - def _extract_relationships(self, source_code: str, path: Path) -> List[CodeRelationship]: - """Extract code relationships with scope and alias resolution. - - Args: - source_code: Python source code - path: File path - - Returns: - List of CodeRelationship objects - """ - if not self.is_available() or self._binding is None: - return [] - - source_file = str(path.resolve()) - - # Collect all matches with line numbers and end lines for scope processing - # Format: (start_line, end_line, match_type, symbol, node) - all_matches: List[Tuple[int, int, str, str, Any]] = [] - - # Get class definitions (with and without bases) for scope tracking - class_with_bases = self.run_ast_grep(source_code, get_pattern("class_with_bases")) - for node in class_with_bases: - class_name = self._get_match(node, "NAME") - start_line, end_line = self._get_line_range(node) - if class_name: - # Record class scope and inheritance - all_matches.append((start_line, end_line, "class_def", class_name, node)) - # Extract bases from node text (ast-grep-py 0.40+ doesn't capture $$$) - node_text = self._binding._get_node_text(node) if self._binding else "" - bases_text = self._extract_bases_from_class_text(node_text) - if bases_text: - # Also record inheritance relationship - all_matches.append((start_line, end_line, "inherits", bases_text, node)) - - # Get classes without bases for scope tracking - class_no_bases = self.run_ast_grep(source_code, get_pattern("class_def")) - for node in class_no_bases: - class_name = self._get_match(node, "NAME") - start_line, end_line = self._get_line_range(node) - if class_name: - # Check if not already recorded (avoid duplicates from class_with_bases) - existing = [m for m in all_matches if m[2] == "class_def" and m[3] == class_name and m[0] == start_line] - if not existing: - all_matches.append((start_line, end_line, "class_def", class_name, node)) - - # Get function definitions for scope tracking - func_matches = self.run_ast_grep(source_code, get_pattern("func_def")) - for node in func_matches: - func_name = self._get_match(node, "NAME") - start_line, end_line = self._get_line_range(node) - if func_name: - all_matches.append((start_line, end_line, "func_def", func_name, node)) - - # Get async function definitions for scope tracking - async_func_matches = self.run_ast_grep(source_code, get_pattern("async_func_def")) - for node in async_func_matches: - func_name = self._get_match(node, "NAME") - start_line, end_line = self._get_line_range(node) - if func_name: - all_matches.append((start_line, end_line, "func_def", func_name, node)) - - # Get import matches (process import_with_alias first to avoid duplicates) - import_alias_positions: set = set() - - # Process import with alias: import X as Y - import_alias_matches = self.run_ast_grep(source_code, get_pattern("import_with_alias")) - for node in import_alias_matches: - module = self._get_match(node, "MODULE") - alias = self._get_match(node, "ALIAS") - start_line, end_line = self._get_line_range(node) - if module and alias: - import_alias_positions.add(start_line) - all_matches.append((start_line, end_line, "import_alias", f"{module}:{alias}", node)) - - # Process simple imports: import X (skip lines with aliases) - import_matches = self.run_ast_grep(source_code, get_pattern("import_stmt")) - for node in import_matches: - module = self._get_match(node, "MODULE") - start_line, end_line = self._get_line_range(node) - if module and start_line not in import_alias_positions: - all_matches.append((start_line, end_line, "import", module, node)) - - from_matches = self.run_ast_grep(source_code, get_pattern("import_from")) - for node in from_matches: - module = self._get_match(node, "MODULE") - names = self._get_match(node, "NAMES") - # Prefer parsing from full node text to handle multiple imports - # (ast-grep-py capture may only include the first name). - try: - node_text = self._binding._get_node_text(node) if self._binding else "" - except Exception: - node_text = "" - parsed_names = self._extract_import_names_from_text(node_text) if node_text else "" - if parsed_names: - names = parsed_names - start_line, end_line = self._get_line_range(node) - if module: - all_matches.append((start_line, end_line, "from_import", f"{module}:{names}", node)) - - # Get call matches - call_matches = self.run_ast_grep(source_code, get_pattern("call")) - for node in call_matches: - func = self._get_match(node, "FUNC") - start_line, end_line = self._get_line_range(node) - if func: - # Skip self. and cls. prefixed calls - base = func.split(".", 1)[0] - if base not in {"self", "cls"}: - all_matches.append((start_line, end_line, "call", func, node)) - - # Sort by start line number for scope processing - all_matches.sort(key=lambda x: (x[0], x[2] == "call")) # Process scope defs before calls on same line - - # Process with scope tracking - relationships = self._process_scope_and_aliases(all_matches, source_file) - - return relationships - - def _process_scope_and_aliases( - self, - matches: List[Tuple[int, int, str, str, Any]], - source_file: str, - ) -> List[CodeRelationship]: - """Process matches with scope and alias resolution. - - Implements proper scope tracking similar to treesitter_parser.py: - - Maintains scope_stack for tracking current scope (class/function names) - - Maintains alias_stack with per-scope alias mappings (inherited from parent) - - Pops scopes when current line passes their end line - - Resolves call targets using current scope's alias map - - Args: - matches: Sorted list of (start_line, end_line, type, symbol, node) tuples - source_file: Source file path - - Returns: - List of resolved CodeRelationship objects - """ - relationships: List[CodeRelationship] = [] - - # Scope stack: list of (name, end_line) tuples - scope_stack: List[Tuple[str, int]] = [("", float("inf"))] - - # Alias stack: list of alias dicts, one per scope level - # Each new scope inherits parent's aliases (copy on write) - alias_stack: List[Dict[str, str]] = [{}] - - def get_current_scope() -> str: - """Get the name of the current (innermost) scope.""" - return scope_stack[-1][0] - - def pop_scopes_before(line: int) -> None: - """Pop all scopes that have ended before the given line.""" - while len(scope_stack) > 1 and scope_stack[-1][1] < line: - scope_stack.pop() - alias_stack.pop() - - def push_scope(name: str, end_line: int) -> None: - """Push a new scope onto the stack.""" - scope_stack.append((name, end_line)) - # Copy parent scope's aliases for inheritance - alias_stack.append(dict(alias_stack[-1])) - - def update_aliases(updates: Dict[str, str]) -> None: - """Update current scope's alias map.""" - alias_stack[-1].update(updates) - - def resolve_alias(symbol: str) -> str: - """Resolve a symbol using current scope's alias map.""" - if "." not in symbol: - # Simple name - check if it's an alias - return alias_stack[-1].get(symbol, symbol) - - # Dotted name - resolve the base - parts = symbol.split(".", 1) - base = parts[0] - rest = parts[1] - - if base in alias_stack[-1]: - return f"{alias_stack[-1][base]}.{rest}" - return symbol - - for start_line, end_line, match_type, symbol, node in matches: - # Pop any scopes that have ended - pop_scopes_before(start_line) - - if match_type == "class_def": - # Push class scope - push_scope(symbol, end_line) - - elif match_type == "func_def": - # Push function scope - push_scope(symbol, end_line) - - elif match_type == "inherits": - # Record inheritance relationship - # Parse base classes from the bases text - base_classes = self._parse_base_classes(symbol) - for base_class in base_classes: - base_class = base_class.strip() - if base_class: - # Resolve alias for base class - resolved_base = resolve_alias(base_class) - relationships.append(CodeRelationship( - source_symbol=get_current_scope(), - target_symbol=resolved_base, - relationship_type=RelationshipType.INHERITS, - source_file=source_file, - target_file=None, - source_line=start_line, - )) - - elif match_type == "import": - # Process simple import statement - module = symbol - # Simple import: add base name to alias map - base_name = module.split(".", 1)[0] - update_aliases({base_name: module}) - relationships.append(CodeRelationship( - source_symbol=get_current_scope(), - target_symbol=module, - relationship_type=RelationshipType.IMPORTS, - source_file=source_file, - target_file=None, - source_line=start_line, - )) - - elif match_type == "import_alias": - # Process import with alias: import X as Y - parts = symbol.split(":", 1) - module = parts[0] - alias = parts[1] if len(parts) > 1 else "" - if alias: - update_aliases({alias: module}) - relationships.append(CodeRelationship( - source_symbol=get_current_scope(), - target_symbol=module, - relationship_type=RelationshipType.IMPORTS, - source_file=source_file, - target_file=None, - source_line=start_line, - )) - - elif match_type == "from_import": - # Process from-import statement - parts = symbol.split(":", 1) - module = parts[0] - names = parts[1] if len(parts) > 1 else "" - - names = (names or "").strip() - if names.startswith("(") and names.endswith(")"): - names = names[1:-1].strip() - - # Record IMPORTS edges for the imported names (module.symbol), and - # update aliases for call/usage resolution. - if names and names != "*": - for name in names.split(","): - name = name.strip() - if not name or name == "*": - continue - - if " as " in name: - as_parts = name.split(" as ", 1) - original = as_parts[0].strip() - alias = as_parts[1].strip() - if not original: - continue - target = f"{module}.{original}" if module else original - if alias: - update_aliases({alias: target}) - relationships.append(CodeRelationship( - source_symbol=get_current_scope(), - target_symbol=target, - relationship_type=RelationshipType.IMPORTS, - source_file=source_file, - target_file=None, - source_line=start_line, - )) - else: - target = f"{module}.{name}" if module else name - update_aliases({name: target}) - relationships.append(CodeRelationship( - source_symbol=get_current_scope(), - target_symbol=target, - relationship_type=RelationshipType.IMPORTS, - source_file=source_file, - target_file=None, - source_line=start_line, - )) - - elif match_type == "call": - # Resolve alias for call target - resolved = resolve_alias(symbol) - relationships.append(CodeRelationship( - source_symbol=get_current_scope(), - target_symbol=resolved, - relationship_type=RelationshipType.CALL, - source_file=source_file, - target_file=None, - source_line=start_line, - )) - - return relationships - - def process_matches( - self, - matches: List[SgNode], # type: ignore[valid-type] - source_code: str, - path: Path, - ) -> List[CodeRelationship]: - """Process ast-grep matches into code relationships. - - This is a simplified interface for direct match processing. - For full relationship extraction with scope tracking, use parse(). - - Args: - matches: List of matched SgNode objects - source_code: Original source code - path: File path being processed - - Returns: - List of extracted code relationships - """ - if not self.is_available() or self._binding is None: - return [] - - source_file = str(path.resolve()) - relationships: List[CodeRelationship] = [] - - for node in matches: - # Default to call relationship for generic matches - func = self._get_match(node, "FUNC") - line = self._get_line_number(node) - if func: - base = func.split(".", 1)[0] - if base not in {"self", "cls"}: - relationships.append(CodeRelationship( - source_symbol="", - target_symbol=func, - relationship_type=RelationshipType.CALL, - source_file=source_file, - target_file=None, - source_line=line, - )) - - return relationships - - def _get_match(self, node: SgNode, metavar: str) -> str: # type: ignore[valid-type] - """Extract matched metavariable value from node. - - Args: - node: SgNode with match - metavar: Metavariable name (without $ prefix) - - Returns: - Matched text or empty string - """ - if self._binding is None or node is None: - return "" - return self._binding._get_match(node, metavar) - - def _get_line_number(self, node: SgNode) -> int: # type: ignore[valid-type] - """Get starting line number of a node. - - Args: - node: SgNode to get line number for - - Returns: - 1-based line number - """ - if self._binding is None or node is None: - return 0 - return self._binding._get_line_number(node) - - def _get_line_range(self, node: SgNode) -> Tuple[int, int]: # type: ignore[valid-type] - """Get line range for a node. - - Args: - node: SgNode to get range for - - Returns: - (start_line, end_line) tuple, 1-based inclusive - """ - if self._binding is None or node is None: - return (0, 0) - return self._binding._get_line_range(node) - - - # ========================================================================= - # Dedicated extraction methods for INHERITS, CALL, IMPORTS relationships - # ========================================================================= - - def extract_inherits( - self, - source_code: str, - source_file: str, - source_symbol: str = "", - ) -> List[CodeRelationship]: - """Extract INHERITS relationships from Python code. - - Identifies class inheritance patterns including: - - Single inheritance: class Child(Parent): - - Multiple inheritance: class Child(A, B, C): - - Args: - source_code: Python source code to analyze - source_file: Path to the source file - source_symbol: The containing scope (class or module) - - Returns: - List of CodeRelationship objects with INHERITS type - """ - if not self.is_available(): - return [] - - relationships: List[CodeRelationship] = [] - - # Use class_with_bases pattern to find classes with inheritance - matches = self.run_ast_grep(source_code, get_pattern("class_with_bases")) - - for node in matches: - class_name = self._get_match(node, "NAME") - line = self._get_line_number(node) - - if class_name: - # Extract bases from the node text (first line: "class ClassName(Base1, Base2):") - # ast-grep-py 0.40+ doesn't capture $$$ multi-matches, so parse from text - node_text = self._binding._get_node_text(node) if self._binding else "" - bases_text = self._extract_bases_from_class_text(node_text) - - if bases_text: - # Parse individual base classes from the bases text - base_classes = self._parse_base_classes(bases_text) - - for base_class in base_classes: - base_class = base_class.strip() - if base_class: - relationships.append(CodeRelationship( - source_symbol=class_name, - target_symbol=base_class, - relationship_type=RelationshipType.INHERITS, - source_file=source_file, - target_file=None, - source_line=line, - )) - - return relationships - - def _extract_bases_from_class_text(self, class_text: str) -> str: - """Extract base classes text from class definition. - - Args: - class_text: Full text of class definition (e.g., "class Dog(Animal):\\n pass") - - Returns: - Text inside parentheses (e.g., "Animal") or empty string - """ - import re - # Match "class Name(BASES):" - extract BASES - match = re.search(r'class\s+\w+\s*\(([^)]*)\)\s*:', class_text) - if match: - return match.group(1).strip() - return "" - - def _extract_import_names_from_text(self, import_text: str) -> str: - """Extract imported names from from-import statement. - - Args: - import_text: Full text of import statement (e.g., "from typing import List, Dict") - - Returns: - Names text (e.g., "List, Dict") or empty string - """ - import re - # Match "from MODULE import NAMES" - extract NAMES - match = re.search(r'from\s+[\w.]+\s+import\s+(.+)$', import_text, re.MULTILINE) - if match: - return match.group(1).strip() - return "" - - def extract_calls( - self, - source_code: str, - source_file: str, - source_symbol: str = "", - alias_map: Optional[Dict[str, str]] = None, - ) -> List[CodeRelationship]: - """Extract CALL relationships from Python code. - - Identifies function and method call patterns including: - - Simple calls: func() - - Calls with arguments: func(arg1, arg2) - - Method calls: obj.method() - - Chained calls: obj.method1().method2() - - Args: - source_code: Python source code to analyze - source_file: Path to the source file - source_symbol: The containing scope (class or module) - alias_map: Optional alias map for resolving imported names - - Returns: - List of CodeRelationship objects with CALL type - """ - if not self.is_available(): - return [] - - relationships: List[CodeRelationship] = [] - alias_map = alias_map or {} - - # Use the generic call pattern - matches = self.run_ast_grep(source_code, get_pattern("call")) - - for node in matches: - func = self._get_match(node, "FUNC") - line = self._get_line_number(node) - - if func: - # Skip self. and cls. prefixed calls (internal method calls) - base = func.split(".", 1)[0] - if base in {"self", "cls", "super"}: - continue - - # Resolve alias if available - resolved = self._resolve_call_alias(func, alias_map) - - relationships.append(CodeRelationship( - source_symbol=source_symbol, - target_symbol=resolved, - relationship_type=RelationshipType.CALL, - source_file=source_file, - target_file=None, - source_line=line, - )) - - return relationships - - def extract_imports( - self, - source_code: str, - source_file: str, - source_symbol: str = "", - ) -> Tuple[List[CodeRelationship], Dict[str, str]]: - """Extract IMPORTS relationships from Python code. - - Identifies import patterns including: - - Simple import: import os - - Import with alias: import numpy as np - - From import: from typing import List - - From import with alias: from collections import defaultdict as dd - - Relative import: from .module import func - - Star import: from module import * - - Args: - source_code: Python source code to analyze - source_file: Path to the source file - source_symbol: The containing scope (class or module) - - Returns: - Tuple of: - - List of CodeRelationship objects with IMPORTS type - - Dict mapping local names to fully qualified module names (alias map) - """ - if not self.is_available(): - return [], {} - - relationships: List[CodeRelationship] = [] - alias_map: Dict[str, str] = {} - - # Track processed lines to avoid duplicates - processed_lines: set = set() - - # Process import with alias FIRST: import X as Y - alias_matches = self.run_ast_grep(source_code, get_pattern("import_with_alias")) - for node in alias_matches: - module = self._get_match(node, "MODULE") - alias = self._get_match(node, "ALIAS") - line = self._get_line_number(node) - - if module and alias: - alias_map[alias] = module - processed_lines.add(line) - - relationships.append(CodeRelationship( - source_symbol=source_symbol, - target_symbol=module, - relationship_type=RelationshipType.IMPORTS, - source_file=source_file, - target_file=None, - source_line=line, - )) - - # Process simple imports: import X (skip lines already processed) - import_matches = self.run_ast_grep(source_code, get_pattern("import_stmt")) - for node in import_matches: - module = self._get_match(node, "MODULE") - line = self._get_line_number(node) - - if module and line not in processed_lines: - # Add to alias map: first part of module - base_name = module.split(".", 1)[0] - alias_map[base_name] = module - - relationships.append(CodeRelationship( - source_symbol=source_symbol, - target_symbol=module, - relationship_type=RelationshipType.IMPORTS, - source_file=source_file, - target_file=None, - source_line=line, - )) - - # Process from imports: from X import Y - from_matches = self.run_ast_grep(source_code, get_pattern("import_from")) - for node in from_matches: - module = self._get_match(node, "MODULE") - line = self._get_line_number(node) - - if module: - # Add relationship for the module - relationships.append(CodeRelationship( - source_symbol=source_symbol, - target_symbol=module, - relationship_type=RelationshipType.IMPORTS, - source_file=source_file, - target_file=None, - source_line=line, - )) - - # Parse names from node text (ast-grep-py 0.40+ doesn't capture $$$ multi-match) - node_text = self._binding._get_node_text(node) if self._binding else "" - names = self._extract_import_names_from_text(node_text) - - # Add aliases for imported names - if names and names != "*": - for name in names.split(","): - name = name.strip() - # Handle "name as alias" syntax - if " as " in name: - parts = name.split(" as ") - original = parts[0].strip() - alias = parts[1].strip() - alias_map[alias] = f"{module}.{original}" - elif name: - alias_map[name] = f"{module}.{name}" - - # Process star imports: from X import * - star_matches = self.run_ast_grep(source_code, get_pattern("from_import_star")) - for node in star_matches: - module = self._get_match(node, "MODULE") - line = self._get_line_number(node) - - if module: - relationships.append(CodeRelationship( - source_symbol=source_symbol, - target_symbol=f"{module}.*", - relationship_type=RelationshipType.IMPORTS, - source_file=source_file, - target_file=None, - source_line=line, - )) - - # Process relative imports: from .X import Y - relative_matches = self.run_ast_grep(source_code, get_pattern("relative_import")) - for node in relative_matches: - module = self._get_match(node, "MODULE") - names = self._get_match(node, "NAMES") - line = self._get_line_number(node) - - # Prepend dot for relative module path - rel_module = f".{module}" if module else "." - - relationships.append(CodeRelationship( - source_symbol=source_symbol, - target_symbol=rel_module, - relationship_type=RelationshipType.IMPORTS, - source_file=source_file, - target_file=None, - source_line=line, - )) - - return relationships, alias_map - - # ========================================================================= - # Helper methods for pattern processing - # ========================================================================= - - def _parse_base_classes(self, bases_text: str) -> List[str]: - """Parse base class names from inheritance text. - - Handles single and multiple inheritance with proper comma splitting. - Accounts for nested parentheses and complex type annotations. - - Args: - bases_text: Text inside the parentheses of class definition - - Returns: - List of base class names - """ - if not bases_text: - return [] - - # Simple comma split (may not handle all edge cases) - bases = [] - depth = 0 - current = [] - - for char in bases_text: - if char == "(": - depth += 1 - current.append(char) - elif char == ")": - depth -= 1 - current.append(char) - elif char == "," and depth == 0: - base = "".join(current).strip() - if base: - bases.append(base) - current = [] - else: - current.append(char) - - # Add the last base class - if current: - base = "".join(current).strip() - if base: - bases.append(base) - - return bases - - def _resolve_call_alias(self, func_name: str, alias_map: Dict[str, str]) -> str: - """Resolve a function call name using import aliases. - - Args: - func_name: The function/method name as it appears in code - alias_map: Mapping of local names to fully qualified names - - Returns: - Resolved function name (fully qualified if possible) - """ - if "." not in func_name: - # Simple function call - check if it's an alias - return alias_map.get(func_name, func_name) - - # Method call or qualified name - resolve the base - parts = func_name.split(".", 1) - base = parts[0] - rest = parts[1] - - if base in alias_map: - return f"{alias_map[base]}.{rest}" - - return func_name - - -def is_astgrep_processor_available() -> bool: - """Check if ast-grep processor is available. - - Returns: - True if ast-grep-py is installed and processor can be used - """ - return ASTGREP_AVAILABLE - - -__all__ = [ - "BaseAstGrepProcessor", - "AstGrepPythonProcessor", - "is_astgrep_processor_available", -] diff --git a/codex-lens/src/codexlens/parsers/encoding.py b/codex-lens/src/codexlens/parsers/encoding.py deleted file mode 100644 index b796d24b..00000000 --- a/codex-lens/src/codexlens/parsers/encoding.py +++ /dev/null @@ -1,202 +0,0 @@ -"""Optional encoding detection module for CodexLens. - -Provides automatic encoding detection with graceful fallback to UTF-8. -Install with: pip install codexlens[encoding] -""" - -from __future__ import annotations - -import logging -from pathlib import Path -from typing import Tuple, Optional - -log = logging.getLogger(__name__) - -# Feature flag for encoding detection availability -ENCODING_DETECTION_AVAILABLE = False -_import_error: Optional[str] = None - - -def _detect_chardet_backend() -> Tuple[bool, Optional[str]]: - """Detect if chardet or charset-normalizer is available.""" - try: - import chardet - return True, None - except ImportError: - pass - - try: - from charset_normalizer import from_bytes - return True, None - except ImportError: - pass - - return False, "chardet not available. Install with: pip install codexlens[encoding]" - - -# Initialize on module load -ENCODING_DETECTION_AVAILABLE, _import_error = _detect_chardet_backend() - - -def check_encoding_available() -> Tuple[bool, Optional[str]]: - """Check if encoding detection dependencies are available. - - Returns: - Tuple of (available, error_message) - """ - return ENCODING_DETECTION_AVAILABLE, _import_error - - -def detect_encoding(content_bytes: bytes, confidence_threshold: float = 0.7) -> str: - """Detect encoding from file content bytes. - - Uses chardet or charset-normalizer with configurable confidence threshold. - Falls back to UTF-8 if confidence is too low or detection unavailable. - - Args: - content_bytes: Raw file content as bytes - confidence_threshold: Minimum confidence (0.0-1.0) to accept detection - - Returns: - Detected encoding name (e.g., 'utf-8', 'iso-8859-1', 'gbk') - Returns 'utf-8' as fallback if detection fails or confidence too low - """ - if not ENCODING_DETECTION_AVAILABLE: - log.debug("Encoding detection not available, using UTF-8 fallback") - return "utf-8" - - if not content_bytes: - return "utf-8" - - try: - # Try chardet first - try: - import chardet - result = chardet.detect(content_bytes) - encoding = result.get("encoding") - confidence = result.get("confidence", 0.0) - - if encoding and confidence >= confidence_threshold: - log.debug(f"Detected encoding: {encoding} (confidence: {confidence:.2f})") - # Normalize encoding name: replace underscores with hyphens - return encoding.lower().replace('_', '-') - else: - log.debug( - f"Low confidence encoding detection: {encoding} " - f"(confidence: {confidence:.2f}), using UTF-8 fallback" - ) - return "utf-8" - except ImportError: - pass - - # Fallback to charset-normalizer - try: - from charset_normalizer import from_bytes - results = from_bytes(content_bytes) - if results: - best = results.best() - if best and best.encoding: - log.debug(f"Detected encoding via charset-normalizer: {best.encoding}") - # Normalize encoding name: replace underscores with hyphens - return best.encoding.lower().replace('_', '-') - except ImportError: - pass - - except Exception as e: - log.warning(f"Encoding detection failed: {e}, using UTF-8 fallback") - - return "utf-8" - - -def read_file_safe( - path: Path | str, - confidence_threshold: float = 0.7, - max_detection_bytes: int = 100_000 -) -> Tuple[str, str]: - """Read file with automatic encoding detection and safe decoding. - - Reads file bytes, detects encoding, and decodes with error replacement - to preserve file structure even with encoding issues. - - Args: - path: Path to file to read - confidence_threshold: Minimum confidence for encoding detection - max_detection_bytes: Maximum bytes to use for encoding detection (default 100KB) - - Returns: - Tuple of (content, detected_encoding) - - content: Decoded file content (with � for unmappable bytes) - - detected_encoding: Detected encoding name - - Raises: - OSError: If file cannot be read - IsADirectoryError: If path is a directory - """ - file_path = Path(path) if isinstance(path, str) else path - - # Read file bytes - try: - content_bytes = file_path.read_bytes() - except Exception as e: - log.error(f"Failed to read file {file_path}: {e}") - raise - - # Detect encoding from first N bytes for performance - detection_sample = content_bytes[:max_detection_bytes] if len(content_bytes) > max_detection_bytes else content_bytes - encoding = detect_encoding(detection_sample, confidence_threshold) - - # Decode with error replacement to preserve structure - try: - content = content_bytes.decode(encoding, errors='replace') - log.debug(f"Successfully decoded {file_path} using {encoding}") - return content, encoding - except Exception as e: - # Final fallback to UTF-8 with replacement - log.warning(f"Failed to decode {file_path} with {encoding}, using UTF-8: {e}") - content = content_bytes.decode('utf-8', errors='replace') - return content, 'utf-8' - - -def is_binary_file(path: Path | str, sample_size: int = 8192) -> bool: - """Check if file is likely binary by sampling first bytes. - - Uses heuristic: if >30% of sample bytes are null or non-text, consider binary. - - Args: - path: Path to file to check - sample_size: Number of bytes to sample (default 8KB) - - Returns: - True if file appears to be binary, False otherwise - """ - file_path = Path(path) if isinstance(path, str) else path - - try: - with file_path.open('rb') as f: - sample = f.read(sample_size) - - if not sample: - return False - - # Count null bytes and non-printable characters - null_count = sample.count(b'\x00') - non_text_count = sum(1 for byte in sample if byte < 0x20 and byte not in (0x09, 0x0a, 0x0d)) - - # If >30% null bytes or >50% non-text, consider binary - null_ratio = null_count / len(sample) - non_text_ratio = non_text_count / len(sample) - - return null_ratio > 0.3 or non_text_ratio > 0.5 - - except Exception as e: - log.debug(f"Binary check failed for {file_path}: {e}, assuming text") - return False - - -__all__ = [ - "ENCODING_DETECTION_AVAILABLE", - "check_encoding_available", - "detect_encoding", - "read_file_safe", - "is_binary_file", -] diff --git a/codex-lens/src/codexlens/parsers/factory.py b/codex-lens/src/codexlens/parsers/factory.py deleted file mode 100644 index 5b07a4bc..00000000 --- a/codex-lens/src/codexlens/parsers/factory.py +++ /dev/null @@ -1,393 +0,0 @@ -"""Parser factory for CodexLens. - -Python and JavaScript/TypeScript parsing use Tree-Sitter grammars when -available. Regex fallbacks are retained to preserve the existing parser -interface and behavior in minimal environments. -""" - -from __future__ import annotations - -import re -from dataclasses import dataclass -from pathlib import Path -from typing import Dict, List, Optional, Protocol - -from codexlens.config import Config -from codexlens.entities import CodeRelationship, IndexedFile, RelationshipType, Symbol -from codexlens.parsers.treesitter_parser import TreeSitterSymbolParser - - -class Parser(Protocol): - def parse(self, text: str, path: Path) -> IndexedFile: ... - - -@dataclass -class SimpleRegexParser: - language_id: str - config: Optional[Config] = None - - def parse(self, text: str, path: Path) -> IndexedFile: - # Try tree-sitter first for supported languages - if self.language_id in {"python", "javascript", "typescript"}: - ts_parser = TreeSitterSymbolParser( - self.language_id, - path, - config=self.config, - ) - if ts_parser.is_available(): - indexed = ts_parser.parse(text, path) - if indexed is not None: - return indexed - - # Fallback to regex parsing - if self.language_id == "python": - symbols = _parse_python_symbols_regex(text) - relationships = _parse_python_relationships_regex(text, path) - elif self.language_id in {"javascript", "typescript"}: - symbols = _parse_js_ts_symbols_regex(text) - relationships = _parse_js_ts_relationships_regex(text, path) - elif self.language_id == "java": - symbols = _parse_java_symbols(text) - relationships = [] - elif self.language_id == "go": - symbols = _parse_go_symbols(text) - relationships = [] - elif self.language_id == "markdown": - symbols = _parse_markdown_symbols(text) - relationships = [] - elif self.language_id == "text": - symbols = _parse_text_symbols(text) - relationships = [] - else: - symbols = _parse_generic_symbols(text) - relationships = [] - - return IndexedFile( - path=str(path.resolve()), - language=self.language_id, - symbols=symbols, - chunks=[], - relationships=relationships, - ) - - -class ParserFactory: - def __init__(self, config: Config) -> None: - self.config = config - self._parsers: Dict[str, Parser] = {} - - def get_parser(self, language_id: str) -> Parser: - if language_id not in self._parsers: - self._parsers[language_id] = SimpleRegexParser( - language_id, - config=self.config, - ) - return self._parsers[language_id] - - -# Regex-based fallback parsers -_PY_CLASS_RE = re.compile(r"^\s*class\s+([A-Za-z_]\w*)\b") -_PY_DEF_RE = re.compile(r"^\s*(?:async\s+)?def\s+([A-Za-z_]\w*)\s*\(") - -_PY_IMPORT_RE = re.compile(r"^(?:from\s+([\w.]+)\s+)?import\s+([\w.,\s]+)") -_PY_CALL_RE = re.compile(r"(? List[Symbol]: - """Parse Python symbols, using tree-sitter if available, regex fallback.""" - ts_parser = TreeSitterSymbolParser("python") - if ts_parser.is_available(): - symbols = ts_parser.parse_symbols(text) - if symbols is not None: - return symbols - return _parse_python_symbols_regex(text) - - -def _parse_js_ts_symbols( - text: str, - language_id: str = "javascript", - path: Optional[Path] = None, -) -> List[Symbol]: - """Parse JS/TS symbols, using tree-sitter if available, regex fallback.""" - ts_parser = TreeSitterSymbolParser(language_id, path) - if ts_parser.is_available(): - symbols = ts_parser.parse_symbols(text) - if symbols is not None: - return symbols - return _parse_js_ts_symbols_regex(text) - - -def _parse_python_symbols_regex(text: str) -> List[Symbol]: - symbols: List[Symbol] = [] - current_class_indent: Optional[int] = None - for i, line in enumerate(text.splitlines(), start=1): - class_match = _PY_CLASS_RE.match(line) - if class_match: - current_class_indent = len(line) - len(line.lstrip(" ")) - symbols.append(Symbol(name=class_match.group(1), kind="class", range=(i, i))) - continue - def_match = _PY_DEF_RE.match(line) - if def_match: - indent = len(line) - len(line.lstrip(" ")) - kind = "method" if current_class_indent is not None and indent > current_class_indent else "function" - symbols.append(Symbol(name=def_match.group(1), kind=kind, range=(i, i))) - continue - if current_class_indent is not None: - indent = len(line) - len(line.lstrip(" ")) - if line.strip() and indent <= current_class_indent: - current_class_indent = None - return symbols - - -def _parse_python_relationships_regex(text: str, path: Path) -> List[CodeRelationship]: - relationships: List[CodeRelationship] = [] - current_scope: str | None = None - source_file = str(path.resolve()) - - for line_num, line in enumerate(text.splitlines(), start=1): - class_match = _PY_CLASS_RE.match(line) - if class_match: - current_scope = class_match.group(1) - continue - - def_match = _PY_DEF_RE.match(line) - if def_match: - current_scope = def_match.group(1) - continue - - if current_scope is None: - continue - - import_match = _PY_IMPORT_RE.search(line) - if import_match: - import_target = import_match.group(1) or import_match.group(2) - if import_target: - relationships.append( - CodeRelationship( - source_symbol=current_scope, - target_symbol=import_target.strip(), - relationship_type=RelationshipType.IMPORTS, - source_file=source_file, - target_file=None, - source_line=line_num, - ) - ) - - for call_match in _PY_CALL_RE.finditer(line): - call_name = call_match.group(1) - if call_name in { - "if", - "for", - "while", - "return", - "print", - "len", - "str", - "int", - "float", - "list", - "dict", - "set", - "tuple", - current_scope, - }: - continue - relationships.append( - CodeRelationship( - source_symbol=current_scope, - target_symbol=call_name, - relationship_type=RelationshipType.CALL, - source_file=source_file, - target_file=None, - source_line=line_num, - ) - ) - - return relationships - - -_JS_FUNC_RE = re.compile(r"^\s*(?:export\s+)?(?:async\s+)?function\s+([A-Za-z_$][\w$]*)\s*\(") -_JS_CLASS_RE = re.compile(r"^\s*(?:export\s+)?class\s+([A-Za-z_$][\w$]*)\b") -_JS_ARROW_RE = re.compile( - r"^\s*(?:export\s+)?(?:const|let|var)\s+([A-Za-z_$][\w$]*)\s*=\s*(?:async\s*)?\(?[^)]*\)?\s*=>" -) -_JS_METHOD_RE = re.compile(r"^\s+(?:async\s+)?([A-Za-z_$][\w$]*)\s*\([^)]*\)\s*\{") -_JS_IMPORT_RE = re.compile(r"import\s+.*\s+from\s+['\"]([^'\"]+)['\"]") -_JS_CALL_RE = re.compile(r"(? List[Symbol]: - symbols: List[Symbol] = [] - in_class = False - class_brace_depth = 0 - brace_depth = 0 - - for i, line in enumerate(text.splitlines(), start=1): - brace_depth += line.count("{") - line.count("}") - - class_match = _JS_CLASS_RE.match(line) - if class_match: - symbols.append(Symbol(name=class_match.group(1), kind="class", range=(i, i))) - in_class = True - class_brace_depth = brace_depth - continue - - if in_class and brace_depth < class_brace_depth: - in_class = False - - func_match = _JS_FUNC_RE.match(line) - if func_match: - symbols.append(Symbol(name=func_match.group(1), kind="function", range=(i, i))) - continue - - arrow_match = _JS_ARROW_RE.match(line) - if arrow_match: - symbols.append(Symbol(name=arrow_match.group(1), kind="function", range=(i, i))) - continue - - if in_class: - method_match = _JS_METHOD_RE.match(line) - if method_match: - name = method_match.group(1) - if name != "constructor": - symbols.append(Symbol(name=name, kind="method", range=(i, i))) - - return symbols - - -def _parse_js_ts_relationships_regex(text: str, path: Path) -> List[CodeRelationship]: - relationships: List[CodeRelationship] = [] - current_scope: str | None = None - source_file = str(path.resolve()) - - for line_num, line in enumerate(text.splitlines(), start=1): - class_match = _JS_CLASS_RE.match(line) - if class_match: - current_scope = class_match.group(1) - continue - - func_match = _JS_FUNC_RE.match(line) - if func_match: - current_scope = func_match.group(1) - continue - - arrow_match = _JS_ARROW_RE.match(line) - if arrow_match: - current_scope = arrow_match.group(1) - continue - - if current_scope is None: - continue - - import_match = _JS_IMPORT_RE.search(line) - if import_match: - relationships.append( - CodeRelationship( - source_symbol=current_scope, - target_symbol=import_match.group(1), - relationship_type=RelationshipType.IMPORTS, - source_file=source_file, - target_file=None, - source_line=line_num, - ) - ) - - for call_match in _JS_CALL_RE.finditer(line): - call_name = call_match.group(1) - if call_name in {current_scope}: - continue - relationships.append( - CodeRelationship( - source_symbol=current_scope, - target_symbol=call_name, - relationship_type=RelationshipType.CALL, - source_file=source_file, - target_file=None, - source_line=line_num, - ) - ) - - return relationships - - -_JAVA_CLASS_RE = re.compile(r"^\s*(?:public\s+)?class\s+([A-Za-z_]\w*)\b") -_JAVA_METHOD_RE = re.compile( - r"^\s*(?:public|private|protected|static|\s)+[\w<>\[\]]+\s+([A-Za-z_]\w*)\s*\(" -) - - -def _parse_java_symbols(text: str) -> List[Symbol]: - symbols: List[Symbol] = [] - for i, line in enumerate(text.splitlines(), start=1): - class_match = _JAVA_CLASS_RE.match(line) - if class_match: - symbols.append(Symbol(name=class_match.group(1), kind="class", range=(i, i))) - continue - method_match = _JAVA_METHOD_RE.match(line) - if method_match: - symbols.append(Symbol(name=method_match.group(1), kind="method", range=(i, i))) - return symbols - - -_GO_FUNC_RE = re.compile(r"^\s*func\s+(?:\([^)]+\)\s+)?([A-Za-z_]\w*)\s*\(") -_GO_TYPE_RE = re.compile(r"^\s*type\s+([A-Za-z_]\w*)\s+(?:struct|interface)\b") - - -def _parse_go_symbols(text: str) -> List[Symbol]: - symbols: List[Symbol] = [] - for i, line in enumerate(text.splitlines(), start=1): - type_match = _GO_TYPE_RE.match(line) - if type_match: - symbols.append(Symbol(name=type_match.group(1), kind="class", range=(i, i))) - continue - func_match = _GO_FUNC_RE.match(line) - if func_match: - symbols.append(Symbol(name=func_match.group(1), kind="function", range=(i, i))) - return symbols - - -_GENERIC_DEF_RE = re.compile(r"^\s*(?:def|function|func)\s+([A-Za-z_]\w*)\b") -_GENERIC_CLASS_RE = re.compile(r"^\s*(?:class|struct|interface)\s+([A-Za-z_]\w*)\b") - - -def _parse_generic_symbols(text: str) -> List[Symbol]: - symbols: List[Symbol] = [] - for i, line in enumerate(text.splitlines(), start=1): - class_match = _GENERIC_CLASS_RE.match(line) - if class_match: - symbols.append(Symbol(name=class_match.group(1), kind="class", range=(i, i))) - continue - def_match = _GENERIC_DEF_RE.match(line) - if def_match: - symbols.append(Symbol(name=def_match.group(1), kind="function", range=(i, i))) - return symbols - - -# Markdown heading regex: # Heading, ## Heading, etc. -_MD_HEADING_RE = re.compile(r"^(#{1,6})\s+(.+)$") - - -def _parse_markdown_symbols(text: str) -> List[Symbol]: - """Parse Markdown headings as symbols. - - Extracts # headings as 'section' symbols with heading level as kind suffix. - """ - symbols: List[Symbol] = [] - for i, line in enumerate(text.splitlines(), start=1): - heading_match = _MD_HEADING_RE.match(line) - if heading_match: - level = len(heading_match.group(1)) - title = heading_match.group(2).strip() - # Use 'section' kind with level indicator - kind = f"h{level}" - symbols.append(Symbol(name=title, kind=kind, range=(i, i))) - return symbols - - -def _parse_text_symbols(text: str) -> List[Symbol]: - """Parse plain text files - no symbols, just index content.""" - # Text files don't have structured symbols, return empty list - # The file content will still be indexed for FTS search - return [] diff --git a/codex-lens/src/codexlens/parsers/patterns/__init__.py b/codex-lens/src/codexlens/parsers/patterns/__init__.py deleted file mode 100644 index 10717360..00000000 --- a/codex-lens/src/codexlens/parsers/patterns/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -"""ast-grep pattern definitions for various languages. - -This package contains language-specific pattern definitions for -extracting code relationships using ast-grep declarative patterns. -""" diff --git a/codex-lens/src/codexlens/parsers/patterns/javascript/__init__.py b/codex-lens/src/codexlens/parsers/patterns/javascript/__init__.py deleted file mode 100644 index a95608ea..00000000 --- a/codex-lens/src/codexlens/parsers/patterns/javascript/__init__.py +++ /dev/null @@ -1,92 +0,0 @@ -"""JavaScript ast-grep patterns for relationship extraction. - -These patterns are used by CodexLens' optional ast-grep processors to extract: -- IMPORTS: ES module imports + CommonJS require() -- INHERITS: class extends relationships - -Pattern Syntax (ast-grep-py 0.40+): - $VAR - Single metavariable (matches one AST node) - $$$VAR - Multiple metavariable (matches zero or more nodes) -""" - -from __future__ import annotations - -from typing import Dict, List - - -JAVASCRIPT_PATTERNS: Dict[str, str] = { - # ES module imports - # import React from "react" - # import React, { useEffect } from "react" - # import { useEffect } from "react" - # import * as fs from "fs" - "import_from_dq": "import $$$IMPORTS from \"$MODULE\"", - "import_from_sq": "import $$$IMPORTS from '$MODULE'", - "import_named_only_dq": "import {$$$NAMES} from \"$MODULE\"", - "import_named_only_sq": "import {$$$NAMES} from '$MODULE'", - "import_default_named_dq": "import $DEFAULT, {$$$NAMES} from \"$MODULE\"", - "import_default_named_sq": "import $DEFAULT, {$$$NAMES} from '$MODULE'", - # Side-effect import: import "./styles.css" - "import_side_effect_dq": "import \"$MODULE\"", - "import_side_effect_sq": "import '$MODULE'", - - # CommonJS require(): const fs = require("fs") - "require_call_dq": "require(\"$MODULE\")", - "require_call_sq": "require('$MODULE')", - - # Class inheritance: class Child extends Base {} - # Note: `{...}` form matches both JS and TS grammars more reliably. - "class_extends": "class $NAME extends $BASE {$$$BODY}", -} - - -METAVARS = { - "module": "MODULE", - "import_names": "NAMES", - "import_default": "DEFAULT", - "class_name": "NAME", - "class_base": "BASE", -} - - -RELATIONSHIP_PATTERNS: Dict[str, List[str]] = { - "imports": [ - "import_from_dq", - "import_from_sq", - "import_named_only_dq", - "import_named_only_sq", - "import_default_named_dq", - "import_default_named_sq", - "import_side_effect_dq", - "import_side_effect_sq", - "require_call_dq", - "require_call_sq", - ], - "inheritance": ["class_extends"], -} - - -def get_pattern(pattern_name: str) -> str: - if pattern_name not in JAVASCRIPT_PATTERNS: - raise KeyError( - f"Unknown JS pattern: {pattern_name}. Available: {list(JAVASCRIPT_PATTERNS.keys())}" - ) - return JAVASCRIPT_PATTERNS[pattern_name] - - -def get_patterns_for_relationship(rel_type: str) -> List[str]: - return RELATIONSHIP_PATTERNS.get(rel_type, []) - - -def get_metavar(name: str) -> str: - return METAVARS.get(name, name.upper()) - - -__all__ = [ - "JAVASCRIPT_PATTERNS", - "METAVARS", - "RELATIONSHIP_PATTERNS", - "get_pattern", - "get_patterns_for_relationship", - "get_metavar", -] diff --git a/codex-lens/src/codexlens/parsers/patterns/python/__init__.py b/codex-lens/src/codexlens/parsers/patterns/python/__init__.py deleted file mode 100644 index c8d6526e..00000000 --- a/codex-lens/src/codexlens/parsers/patterns/python/__init__.py +++ /dev/null @@ -1,204 +0,0 @@ -"""Python ast-grep patterns for relationship extraction. - -This module defines declarative patterns for extracting code relationships -(inheritance, calls, imports) from Python source code using ast-grep. - -Pattern Syntax (ast-grep-py 0.40+): - $VAR - Single metavariable (matches one AST node) - $$$VAR - Multiple metavariable (matches zero or more nodes) - -Example: - "class $CLASS_NAME($$$BASES) $$$BODY" matches: - class MyClass(BaseClass): - pass - with $CLASS_NAME = "MyClass", $$$BASES = "BaseClass", $$$BODY = "pass" - -YAML Pattern Files: - inherits.yaml - INHERITS relationship patterns (single/multiple inheritance) - imports.yaml - IMPORTS relationship patterns (import, from...import, as) - call.yaml - CALL relationship patterns (function/method calls) -""" - -from __future__ import annotations - -from pathlib import Path -from typing import Dict, List, Optional - -# Directory containing YAML pattern files -PATTERNS_DIR = Path(__file__).parent - -# Python ast-grep patterns organized by relationship type -# Note: ast-grep-py 0.40+ uses $$$ for zero-or-more multi-match -PYTHON_PATTERNS: Dict[str, str] = { - # Class definitions with inheritance - "class_def": "class $NAME $$$BODY", - "class_with_bases": "class $NAME($$$BASES) $$$BODY", - - # Single inheritance: class Child(Parent): - "single_inheritance": "class $CLASS_NAME($BASE) $$$BODY", - - # Multiple inheritance: class Child(A, B, C): - "multiple_inheritance": "class $CLASS_NAME($BASE, $$$MORE_BASES) $$$BODY", - - # Function definitions (use $$$ for zero-or-more params) - "func_def": "def $NAME($$$PARAMS): $$$BODY", - "async_func_def": "async def $NAME($$$PARAMS): $$$BODY", - - # Import statements - basic forms - "import_stmt": "import $MODULE", - "import_from": "from $MODULE import $NAMES", - - # Import statements - extended forms - "import_with_alias": "import $MODULE as $ALIAS", - "import_multiple": "import $FIRST, $$$REST", - "from_import_single": "from $MODULE import $NAME", - "from_import_with_alias": "from $MODULE import $NAME as $ALIAS", - "from_import_multiple": "from $MODULE import $FIRST, $$$REST", - "from_import_star": "from $MODULE import *", - "relative_import": "from .$$$MODULE import $NAMES", - - # Function/method calls - basic form (use $$$ for zero-or-more args) - "call": "$FUNC($$$ARGS)", - "method_call": "$OBJ.$METHOD($$$ARGS)", - - # Function/method calls - specific forms - "simple_call": "$FUNC()", - "call_with_args": "$FUNC($$$ARGS)", - "chained_call": "$OBJ.$METHOD($$$ARGS).$$$CHAIN", - "constructor_call": "$CLASS($$$ARGS)", -} - -# Metavariable names for extracting match data -METAVARS = { - # Class patterns - "class_name": "NAME", - "class_bases": "BASES", - "class_body": "BODY", - "inherit_class": "CLASS_NAME", - "inherit_base": "BASE", - "inherit_more_bases": "MORE_BASES", - - # Function patterns - "func_name": "NAME", - "func_params": "PARAMS", - "func_body": "BODY", - - # Import patterns - "import_module": "MODULE", - "import_names": "NAMES", - "import_alias": "ALIAS", - "import_first": "FIRST", - "import_rest": "REST", - - # Call patterns - "call_func": "FUNC", - "call_obj": "OBJ", - "call_method": "METHOD", - "call_args": "ARGS", - "call_class": "CLASS", - "call_chain": "CHAIN", -} - -# Relationship pattern mapping - expanded for new patterns -RELATIONSHIP_PATTERNS: Dict[str, List[str]] = { - "inheritance": ["class_with_bases", "single_inheritance", "multiple_inheritance"], - "imports": [ - "import_stmt", "import_from", - "import_with_alias", "import_multiple", - "from_import_single", "from_import_with_alias", - "from_import_multiple", "from_import_star", - "relative_import", - ], - "calls": ["call", "method_call", "simple_call", "call_with_args", "constructor_call"], -} - -# YAML pattern file mapping -YAML_PATTERN_FILES = { - "inheritance": "inherits.yaml", - "imports": "imports.yaml", - "calls": "call.yaml", -} - - -def get_pattern(pattern_name: str) -> str: - """Get an ast-grep pattern by name. - - Args: - pattern_name: Key from PYTHON_PATTERNS dict - - Returns: - Pattern string - - Raises: - KeyError: If pattern name not found - """ - if pattern_name not in PYTHON_PATTERNS: - raise KeyError(f"Unknown pattern: {pattern_name}. Available: {list(PYTHON_PATTERNS.keys())}") - return PYTHON_PATTERNS[pattern_name] - - -def get_patterns_for_relationship(rel_type: str) -> List[str]: - """Get all patterns that can extract a given relationship type. - - Args: - rel_type: Relationship type (inheritance, imports, calls) - - Returns: - List of pattern names - """ - return RELATIONSHIP_PATTERNS.get(rel_type, []) - - -def get_metavar(name: str) -> str: - """Get metavariable name without $ prefix. - - Args: - name: Key from METAVARS dict - - Returns: - Metavariable name (e.g., "NAME" not "$NAME") - """ - return METAVARS.get(name, name.upper()) - - -def get_yaml_pattern_path(rel_type: str) -> Optional[Path]: - """Get the path to a YAML pattern file for a relationship type. - - Args: - rel_type: Relationship type (inheritance, imports, calls) - - Returns: - Path to YAML file or None if not found - """ - filename = YAML_PATTERN_FILES.get(rel_type) - if filename: - return PATTERNS_DIR / filename - return None - - -def list_yaml_pattern_files() -> Dict[str, Path]: - """List all available YAML pattern files. - - Returns: - Dict mapping relationship type to YAML file path - """ - result = {} - for rel_type, filename in YAML_PATTERN_FILES.items(): - path = PATTERNS_DIR / filename - if path.exists(): - result[rel_type] = path - return result - - -__all__ = [ - "PYTHON_PATTERNS", - "METAVARS", - "RELATIONSHIP_PATTERNS", - "YAML_PATTERN_FILES", - "PATTERNS_DIR", - "get_pattern", - "get_patterns_for_relationship", - "get_metavar", - "get_yaml_pattern_path", - "list_yaml_pattern_files", -] diff --git a/codex-lens/src/codexlens/parsers/patterns/python/call.yaml b/codex-lens/src/codexlens/parsers/patterns/python/call.yaml deleted file mode 100644 index 1b1bd828..00000000 --- a/codex-lens/src/codexlens/parsers/patterns/python/call.yaml +++ /dev/null @@ -1,87 +0,0 @@ -# Python CALL patterns for ast-grep -# Extracts function and method call expressions - -# Pattern metadata -id: python-call -language: python -description: Extract function and method calls from Python code - -patterns: - # Simple function call - # Matches: func() - - id: simple_call - pattern: "$FUNC()" - message: "Found simple function call" - severity: hint - - # Function call with arguments - # Matches: func(arg1, arg2) - - id: call_with_args - pattern: "$FUNC($$$ARGS)" - message: "Found function call with arguments" - severity: hint - - # Method call - # Matches: obj.method() - - id: method_call - pattern: "$OBJ.$METHOD($$$ARGS)" - message: "Found method call" - severity: hint - - # Chained method call - # Matches: obj.method1().method2() - - id: chained_call - pattern: "$OBJ.$METHOD($$$ARGS).$$$CHAIN" - message: "Found chained method call" - severity: hint - - # Call with keyword arguments - # Matches: func(arg=value) - - id: call_with_kwargs - pattern: "$FUNC($$$ARGS, $KWARG=$VALUE$$$MORE)" - message: "Found call with keyword argument" - severity: hint - - # Constructor call - # Matches: ClassName() - - id: constructor_call - pattern: "$CLASS($$$ARGS)" - message: "Found constructor call" - severity: hint - - # Subscript call (not a real call, but often confused) - # This pattern helps exclude indexing from calls - - id: subscript_access - pattern: "$OBJ[$INDEX]" - message: "Found subscript access" - severity: hint - -# Metavariables used: -# $FUNC - Function name being called -# $OBJ - Object receiving the method call -# $METHOD - Method name being called -# $ARGS - Positional arguments -# $KWARG - Keyword argument name -# $VALUE - Keyword argument value -# $CLASS - Class name for constructor calls -# $INDEX - Index for subscript access -# $$$MORE - Additional arguments -# $$$CHAIN - Additional method chains - -# Note: The generic call pattern "$FUNC($$$ARGS)" will match all function calls -# including method calls and constructor calls. More specific patterns help -# categorize the type of call. - -# Examples matched: -# print("hello") -> call_with_args -# len(items) -> call_with_args -# obj.process() -> method_call -# obj.get().save() -> chained_call -# func(name=value) -> call_with_kwargs -# MyClass() -> constructor_call -# items[0] -> subscript_access (not a call) - -# Filtering notes: -# - self.method() calls are typically filtered during processing -# - cls.method() calls are typically filtered during processing -# - super().method() calls may be handled specially diff --git a/codex-lens/src/codexlens/parsers/patterns/python/imports.yaml b/codex-lens/src/codexlens/parsers/patterns/python/imports.yaml deleted file mode 100644 index a1248790..00000000 --- a/codex-lens/src/codexlens/parsers/patterns/python/imports.yaml +++ /dev/null @@ -1,82 +0,0 @@ -# Python IMPORTS patterns for ast-grep -# Extracts import statements (import, from...import, as aliases) - -# Pattern metadata -id: python-imports -language: python -description: Extract import statements from Python code - -patterns: - # Simple import - # Matches: import os - - id: simple_import - pattern: "import $MODULE" - message: "Found simple import" - severity: hint - - # Import with alias - # Matches: import numpy as np - - id: import_with_alias - pattern: "import $MODULE as $ALIAS" - message: "Found import with alias" - severity: hint - - # Multiple imports - # Matches: import os, sys - - id: multiple_imports - pattern: "import $FIRST, $$$REST" - message: "Found multiple imports" - severity: hint - - # From import (single name) - # Matches: from os import path - - id: from_import_single - pattern: "from $MODULE import $NAME" - message: "Found from-import single" - severity: hint - - # From import with alias - # Matches: from collections import defaultdict as dd - - id: from_import_with_alias - pattern: "from $MODULE import $NAME as $ALIAS" - message: "Found from-import with alias" - severity: hint - - # From import multiple names - # Matches: from typing import List, Dict, Optional - - id: from_import_multiple - pattern: "from $MODULE import $FIRST, $$$REST" - message: "Found from-import multiple" - severity: hint - - # From import star - # Matches: from module import * - - id: from_import_star - pattern: "from $MODULE import *" - message: "Found star import" - severity: warning - - # Relative import - # Matches: from .module import func - - id: relative_import - pattern: "from .$$$MODULE import $NAMES" - message: "Found relative import" - severity: hint - -# Metavariables used: -# $MODULE - The module being imported -# $ALIAS - The alias for the import -# $NAME - The specific name being imported -# $FIRST - First item in a multi-item import -# $$$REST - Remaining items in a multi-item import -# $NAMES - Names being imported in from-import - -# Examples matched: -# import os -> simple_import -# import numpy as np -> import_with_alias -# import os, sys, pathlib -> multiple_imports -# from os import path -> from_import_single -# from typing import List, Dict, Set -> from_import_multiple -# from collections import defaultdict -> from_import_single -# from .helpers import utils -> relative_import -# from module import * -> from_import_star diff --git a/codex-lens/src/codexlens/parsers/patterns/python/inherits.yaml b/codex-lens/src/codexlens/parsers/patterns/python/inherits.yaml deleted file mode 100644 index d818ab25..00000000 --- a/codex-lens/src/codexlens/parsers/patterns/python/inherits.yaml +++ /dev/null @@ -1,42 +0,0 @@ -# Python INHERITS patterns for ast-grep -# Extracts class inheritance relationships (single and multiple inheritance) - -# Pattern metadata -id: python-inherits -language: python -description: Extract class inheritance relationships from Python code - -# Single inheritance pattern -# Matches: class Child(Parent): -patterns: - - id: single_inheritance - pattern: "class $CLASS_NAME($BASE) $$$BODY" - message: "Found single inheritance" - severity: hint - - # Multiple inheritance pattern - # Matches: class Child(Parent1, Parent2, Parent3): - - id: multiple_inheritance - pattern: "class $CLASS_NAME($BASE, $$$MORE_BASES) $$$BODY" - message: "Found multiple inheritance" - severity: hint - - # Generic inheritance with any number of bases - # Matches: class Child(...): with any number of parent classes - - id: class_with_bases - pattern: "class $NAME($$$BASES) $$$BODY" - message: "Found class with base classes" - severity: hint - -# Metavariables used: -# $CLASS_NAME - The name of the child class -# $BASE - First base class (for single inheritance) -# $BASES - All base classes combined -# $MORE_BASES - Additional base classes after the first (for multiple inheritance) -# $$$BODY - Class body (statements, can be multiple) - -# Examples matched: -# class Dog(Animal): -> single_inheritance -# class C(A, B): -> multiple_inheritance -# class D(BaseMixin, logging.Log) -> class_with_bases -# class E(A, B, C, D): -> multiple_inheritance diff --git a/codex-lens/src/codexlens/parsers/patterns/typescript/__init__.py b/codex-lens/src/codexlens/parsers/patterns/typescript/__init__.py deleted file mode 100644 index 0c0294c4..00000000 --- a/codex-lens/src/codexlens/parsers/patterns/typescript/__init__.py +++ /dev/null @@ -1,73 +0,0 @@ -"""TypeScript ast-grep patterns for relationship extraction. - -This module extends the JavaScript patterns with TypeScript-specific syntax -such as `import type` and `interface ... extends ...`. -""" - -from __future__ import annotations - -from typing import Dict, List - -from codexlens.parsers.patterns.javascript import ( - METAVARS, - RELATIONSHIP_PATTERNS as _JS_RELATIONSHIP_PATTERNS, - JAVASCRIPT_PATTERNS, -) - - -TYPESCRIPT_PATTERNS: Dict[str, str] = { - **JAVASCRIPT_PATTERNS, - # Type-only imports - "import_type_from_dq": "import type $$$IMPORTS from \"$MODULE\"", - "import_type_from_sq": "import type $$$IMPORTS from '$MODULE'", - "import_type_named_only_dq": "import type {$$$NAMES} from \"$MODULE\"", - "import_type_named_only_sq": "import type {$$$NAMES} from '$MODULE'", - "import_type_default_named_dq": "import type $DEFAULT, {$$$NAMES} from \"$MODULE\"", - "import_type_default_named_sq": "import type $DEFAULT, {$$$NAMES} from '$MODULE'", - # Interface inheritance: interface Foo extends Bar {} - "interface_extends": "interface $NAME extends $BASE $$$BODY", -} - - -RELATIONSHIP_PATTERNS: Dict[str, List[str]] = { - **_JS_RELATIONSHIP_PATTERNS, - "imports": [ - *_JS_RELATIONSHIP_PATTERNS.get("imports", []), - "import_type_from_dq", - "import_type_from_sq", - "import_type_named_only_dq", - "import_type_named_only_sq", - "import_type_default_named_dq", - "import_type_default_named_sq", - ], - "inheritance": [ - *_JS_RELATIONSHIP_PATTERNS.get("inheritance", []), - "interface_extends", - ], -} - - -def get_pattern(pattern_name: str) -> str: - if pattern_name not in TYPESCRIPT_PATTERNS: - raise KeyError( - f"Unknown TS pattern: {pattern_name}. Available: {list(TYPESCRIPT_PATTERNS.keys())}" - ) - return TYPESCRIPT_PATTERNS[pattern_name] - - -def get_patterns_for_relationship(rel_type: str) -> List[str]: - return RELATIONSHIP_PATTERNS.get(rel_type, []) - - -def get_metavar(name: str) -> str: - return METAVARS.get(name, name.upper()) - - -__all__ = [ - "TYPESCRIPT_PATTERNS", - "METAVARS", - "RELATIONSHIP_PATTERNS", - "get_pattern", - "get_patterns_for_relationship", - "get_metavar", -] diff --git a/codex-lens/src/codexlens/parsers/tokenizer.py b/codex-lens/src/codexlens/parsers/tokenizer.py deleted file mode 100644 index dcb12238..00000000 --- a/codex-lens/src/codexlens/parsers/tokenizer.py +++ /dev/null @@ -1,98 +0,0 @@ -"""Token counting utilities for CodexLens. - -Provides accurate token counting using tiktoken with character count fallback. -""" - -from __future__ import annotations - -from typing import Optional - -try: - import tiktoken - TIKTOKEN_AVAILABLE = True -except ImportError: - TIKTOKEN_AVAILABLE = False - - -class Tokenizer: - """Token counter with tiktoken primary and character count fallback.""" - - def __init__(self, encoding_name: str = "cl100k_base") -> None: - """Initialize tokenizer. - - Args: - encoding_name: Tiktoken encoding name (default: cl100k_base for GPT-4) - """ - self._encoding: Optional[object] = None - self._encoding_name = encoding_name - - if TIKTOKEN_AVAILABLE: - try: - self._encoding = tiktoken.get_encoding(encoding_name) - except Exception: - # Fallback to character counting if encoding fails - self._encoding = None - - def count_tokens(self, text: str) -> int: - """Count tokens in text. - - Uses tiktoken if available, otherwise falls back to character count / 4. - - Args: - text: Text to count tokens for - - Returns: - Estimated token count - """ - if not text: - return 0 - - if self._encoding is not None: - try: - return len(self._encoding.encode(text)) # type: ignore[attr-defined] - except Exception: - # Fall through to character count fallback - pass - - # Fallback: rough estimate using character count - # Average of ~4 characters per token for English text - return max(1, len(text) // 4) - - def is_using_tiktoken(self) -> bool: - """Check if tiktoken is being used. - - Returns: - True if tiktoken is available and initialized - """ - return self._encoding is not None - - -# Global default tokenizer instance -_default_tokenizer: Optional[Tokenizer] = None - - -def get_default_tokenizer() -> Tokenizer: - """Get the global default tokenizer instance. - - Returns: - Shared Tokenizer instance - """ - global _default_tokenizer - if _default_tokenizer is None: - _default_tokenizer = Tokenizer() - return _default_tokenizer - - -def count_tokens(text: str, tokenizer: Optional[Tokenizer] = None) -> int: - """Count tokens in text using default or provided tokenizer. - - Args: - text: Text to count tokens for - tokenizer: Optional tokenizer instance (uses default if None) - - Returns: - Estimated token count - """ - if tokenizer is None: - tokenizer = get_default_tokenizer() - return tokenizer.count_tokens(text) diff --git a/codex-lens/src/codexlens/parsers/treesitter_parser.py b/codex-lens/src/codexlens/parsers/treesitter_parser.py deleted file mode 100644 index 73638fe8..00000000 --- a/codex-lens/src/codexlens/parsers/treesitter_parser.py +++ /dev/null @@ -1,935 +0,0 @@ -"""Tree-sitter based parser for CodexLens. - -Provides precise AST-level parsing via tree-sitter. - -Note: This module does not provide a regex fallback inside `TreeSitterSymbolParser`. -If tree-sitter (or a language binding) is unavailable, `parse()`/`parse_symbols()` -return `None`; callers should use a regex-based fallback such as -`codexlens.parsers.factory.SimpleRegexParser`. -""" - -from __future__ import annotations - -from pathlib import Path -from typing import Dict, List, Optional, TYPE_CHECKING - -try: - from tree_sitter import Language as TreeSitterLanguage - from tree_sitter import Node as TreeSitterNode - from tree_sitter import Parser as TreeSitterParser - TREE_SITTER_AVAILABLE = True -except ImportError: - TreeSitterLanguage = None # type: ignore[assignment] - TreeSitterNode = None # type: ignore[assignment] - TreeSitterParser = None # type: ignore[assignment] - TREE_SITTER_AVAILABLE = False - -from codexlens.entities import CodeRelationship, IndexedFile, RelationshipType, Symbol -from codexlens.parsers.tokenizer import get_default_tokenizer - -if TYPE_CHECKING: - from codexlens.config import Config - - -class TreeSitterSymbolParser: - """Parser using tree-sitter for AST-level symbol extraction. - - Supports optional ast-grep integration for relationship extraction - (Python/JavaScript/TypeScript) when config.use_astgrep is True and - ast-grep-py is available. - """ - - def __init__( - self, - language_id: str, - path: Optional[Path] = None, - config: Optional["Config"] = None, - ) -> None: - """Initialize tree-sitter parser for a language. - - Args: - language_id: Language identifier (python, javascript, typescript, etc.) - path: Optional file path for language variant detection (e.g., .tsx) - config: Optional Config instance for parser feature toggles - """ - self.language_id = language_id - self.path = path - self._config = config - self._parser: Optional[object] = None - self._language: Optional[TreeSitterLanguage] = None - self._tokenizer = get_default_tokenizer() - self._astgrep_processor = None - - if TREE_SITTER_AVAILABLE: - self._initialize_parser() - - # Initialize ast-grep processor for Python if config enables it - if self._should_use_astgrep(): - self._initialize_astgrep_processor() - - def _initialize_parser(self) -> None: - """Initialize tree-sitter parser and language.""" - if TreeSitterParser is None or TreeSitterLanguage is None: - return - - try: - # Load language grammar - if self.language_id == "python": - import tree_sitter_python - self._language = TreeSitterLanguage(tree_sitter_python.language()) - elif self.language_id == "javascript": - import tree_sitter_javascript - self._language = TreeSitterLanguage(tree_sitter_javascript.language()) - elif self.language_id == "typescript": - import tree_sitter_typescript - # Detect TSX files by extension - if self.path is not None and self.path.suffix.lower() == ".tsx": - self._language = TreeSitterLanguage(tree_sitter_typescript.language_tsx()) - else: - self._language = TreeSitterLanguage(tree_sitter_typescript.language_typescript()) - else: - return - - # Create parser - self._parser = TreeSitterParser() - if hasattr(self._parser, "set_language"): - self._parser.set_language(self._language) # type: ignore[attr-defined] - else: - self._parser.language = self._language # type: ignore[assignment] - - except Exception: - # Gracefully handle missing language bindings - self._parser = None - self._language = None - - def _should_use_astgrep(self) -> bool: - """Check if ast-grep should be used for relationship extraction. - - Returns: - True if config.use_astgrep is True and language is supported - """ - if self._config is None: - return False - if not getattr(self._config, "use_astgrep", False): - return False - return self.language_id in {"python", "javascript", "typescript"} - - def _initialize_astgrep_processor(self) -> None: - """Initialize ast-grep processor for relationship extraction.""" - try: - from codexlens.parsers.astgrep_processor import ( - AstGrepPythonProcessor, - is_astgrep_processor_available, - ) - from codexlens.parsers.astgrep_js_ts_processor import ( - AstGrepJavaScriptProcessor, - AstGrepTypeScriptProcessor, - ) - - if is_astgrep_processor_available(): - if self.language_id == "python": - self._astgrep_processor = AstGrepPythonProcessor(self.path) - elif self.language_id == "javascript": - self._astgrep_processor = AstGrepJavaScriptProcessor(self.path) - elif self.language_id == "typescript": - self._astgrep_processor = AstGrepTypeScriptProcessor(self.path) - except ImportError: - self._astgrep_processor = None - - def is_available(self) -> bool: - """Check if tree-sitter parser is available. - - Returns: - True if parser is initialized and ready - """ - return self._parser is not None and self._language is not None - - def _parse_tree(self, text: str) -> Optional[tuple[bytes, TreeSitterNode]]: - if not self.is_available() or self._parser is None: - return None - - try: - source_bytes = text.encode("utf8") - tree = self._parser.parse(source_bytes) # type: ignore[attr-defined] - return source_bytes, tree.root_node - except Exception: - return None - - def parse_symbols(self, text: str) -> Optional[List[Symbol]]: - """Parse source code and extract symbols without creating IndexedFile. - - Args: - text: Source code text - - Returns: - List of symbols if parsing succeeds, None if tree-sitter unavailable - """ - parsed = self._parse_tree(text) - if parsed is None: - return None - - source_bytes, root = parsed - try: - return self._extract_symbols(source_bytes, root) - except Exception: - # Gracefully handle extraction errors - return None - - def parse(self, text: str, path: Path) -> Optional[IndexedFile]: - """Parse source code and extract symbols. - - Args: - text: Source code text - path: File path - - Returns: - IndexedFile if parsing succeeds, None if tree-sitter unavailable - """ - parsed = self._parse_tree(text) - if parsed is None: - return None - - source_bytes, root = parsed - try: - symbols = self._extract_symbols(source_bytes, root) - # Pass source_code for ast-grep integration - relationships = self._extract_relationships( - source_bytes, root, path, source_code=text - ) - - return IndexedFile( - path=str(path.resolve()), - language=self.language_id, - symbols=symbols, - chunks=[], - relationships=relationships, - ) - except Exception: - # Gracefully handle parsing errors - return None - - def _extract_symbols(self, source_bytes: bytes, root: TreeSitterNode) -> List[Symbol]: - """Extract symbols from AST. - - Args: - source_bytes: Source code as bytes - root: Root AST node - - Returns: - List of extracted symbols - """ - if self.language_id == "python": - return self._extract_python_symbols(source_bytes, root) - elif self.language_id in {"javascript", "typescript"}: - return self._extract_js_ts_symbols(source_bytes, root) - else: - return [] - - def _extract_relationships( - self, - source_bytes: bytes, - root: TreeSitterNode, - path: Path, - source_code: Optional[str] = None, - ) -> List[CodeRelationship]: - """Extract relationships, optionally using ast-grep. - - When config.use_astgrep is True and an ast-grep processor is available, - uses ast-grep for relationship extraction. Otherwise, uses tree-sitter. - - Args: - source_bytes: Source code as bytes - root: Root AST node from tree-sitter - path: File path - source_code: Optional source code string (required for ast-grep) - - Returns: - List of extracted relationships - """ - # Try ast-grep first if configured and available for this language. - if self._astgrep_processor is not None and source_code is not None: - try: - astgrep_rels = self._extract_relationships_astgrep(source_code, path) - if astgrep_rels is not None: - return astgrep_rels - except Exception: - # Fall back to tree-sitter on ast-grep failure - pass - - if self.language_id == "python": - return self._extract_python_relationships(source_bytes, root, path) - if self.language_id in {"javascript", "typescript"}: - return self._extract_js_ts_relationships(source_bytes, root, path) - return [] - - def _extract_relationships_astgrep( - self, - source_code: str, - path: Path, - ) -> Optional[List[CodeRelationship]]: - """Extract relationships using ast-grep processor. - - Args: - source_code: Source code text - path: File path - - Returns: - List of relationships, or None if ast-grep unavailable - """ - if self._astgrep_processor is None: - return None - - if not self._astgrep_processor.is_available(): - return None - - try: - indexed = self._astgrep_processor.parse(source_code, path) - if indexed is not None: - return indexed.relationships - except Exception: - pass - - return None - - def _extract_python_relationships( - self, - source_bytes: bytes, - root: TreeSitterNode, - path: Path, - ) -> List[CodeRelationship]: - source_file = str(path.resolve()) - relationships: List[CodeRelationship] = [] - - # Use a synthetic module scope so module-level imports/calls can be recorded - # (useful for static global graph persistence). - scope_stack: List[str] = [""] - alias_stack: List[Dict[str, str]] = [{}] - - def record_import(target_symbol: str, source_line: int) -> None: - if not target_symbol.strip() or not scope_stack: - return - relationships.append( - CodeRelationship( - source_symbol=scope_stack[-1], - target_symbol=target_symbol, - relationship_type=RelationshipType.IMPORTS, - source_file=source_file, - target_file=None, - source_line=source_line, - ) - ) - - def record_call(target_symbol: str, source_line: int) -> None: - if not target_symbol.strip() or not scope_stack: - return - base = target_symbol.split(".", 1)[0] - if base in {"self", "cls"}: - return - relationships.append( - CodeRelationship( - source_symbol=scope_stack[-1], - target_symbol=target_symbol, - relationship_type=RelationshipType.CALL, - source_file=source_file, - target_file=None, - source_line=source_line, - ) - ) - - def record_inherits(target_symbol: str, source_line: int) -> None: - if not target_symbol.strip() or not scope_stack: - return - relationships.append( - CodeRelationship( - source_symbol=scope_stack[-1], - target_symbol=target_symbol, - relationship_type=RelationshipType.INHERITS, - source_file=source_file, - target_file=None, - source_line=source_line, - ) - ) - - def visit(node: TreeSitterNode) -> None: - pushed_scope = False - pushed_aliases = False - - if node.type in {"class_definition", "function_definition", "async_function_definition"}: - name_node = node.child_by_field_name("name") - if name_node is not None: - scope_name = self._node_text(source_bytes, name_node).strip() - if scope_name: - scope_stack.append(scope_name) - pushed_scope = True - alias_stack.append(dict(alias_stack[-1])) - pushed_aliases = True - - if node.type == "class_definition" and pushed_scope: - superclasses = node.child_by_field_name("superclasses") - if superclasses is not None: - for child in superclasses.children: - dotted = self._python_expression_to_dotted(source_bytes, child) - if not dotted: - continue - resolved = self._resolve_alias_dotted(dotted, alias_stack[-1]) - record_inherits(resolved, self._node_start_line(node)) - - if node.type in {"import_statement", "import_from_statement"}: - updates, imported_targets = self._python_import_aliases_and_targets(source_bytes, node) - if updates: - alias_stack[-1].update(updates) - for target_symbol in imported_targets: - record_import(target_symbol, self._node_start_line(node)) - - if node.type == "call": - fn_node = node.child_by_field_name("function") - if fn_node is not None: - dotted = self._python_expression_to_dotted(source_bytes, fn_node) - if dotted: - resolved = self._resolve_alias_dotted(dotted, alias_stack[-1]) - record_call(resolved, self._node_start_line(node)) - - for child in node.children: - visit(child) - - if pushed_aliases: - alias_stack.pop() - if pushed_scope: - scope_stack.pop() - - visit(root) - return relationships - - def _extract_js_ts_relationships( - self, - source_bytes: bytes, - root: TreeSitterNode, - path: Path, - ) -> List[CodeRelationship]: - source_file = str(path.resolve()) - relationships: List[CodeRelationship] = [] - - # Use a synthetic module scope so module-level imports/calls can be recorded - # (useful for static global graph persistence). - scope_stack: List[str] = [""] - alias_stack: List[Dict[str, str]] = [{}] - - def record_import(target_symbol: str, source_line: int) -> None: - if not target_symbol.strip() or not scope_stack: - return - relationships.append( - CodeRelationship( - source_symbol=scope_stack[-1], - target_symbol=target_symbol, - relationship_type=RelationshipType.IMPORTS, - source_file=source_file, - target_file=None, - source_line=source_line, - ) - ) - - def record_call(target_symbol: str, source_line: int) -> None: - if not target_symbol.strip() or not scope_stack: - return - base = target_symbol.split(".", 1)[0] - if base in {"this", "super"}: - return - relationships.append( - CodeRelationship( - source_symbol=scope_stack[-1], - target_symbol=target_symbol, - relationship_type=RelationshipType.CALL, - source_file=source_file, - target_file=None, - source_line=source_line, - ) - ) - - def record_inherits(target_symbol: str, source_line: int) -> None: - if not target_symbol.strip() or not scope_stack: - return - relationships.append( - CodeRelationship( - source_symbol=scope_stack[-1], - target_symbol=target_symbol, - relationship_type=RelationshipType.INHERITS, - source_file=source_file, - target_file=None, - source_line=source_line, - ) - ) - - def visit(node: TreeSitterNode) -> None: - pushed_scope = False - pushed_aliases = False - - if node.type in {"function_declaration", "generator_function_declaration"}: - name_node = node.child_by_field_name("name") - if name_node is not None: - scope_name = self._node_text(source_bytes, name_node).strip() - if scope_name: - scope_stack.append(scope_name) - pushed_scope = True - alias_stack.append(dict(alias_stack[-1])) - pushed_aliases = True - - if node.type in {"class_declaration", "class"}: - name_node = node.child_by_field_name("name") - if name_node is not None: - scope_name = self._node_text(source_bytes, name_node).strip() - if scope_name: - scope_stack.append(scope_name) - pushed_scope = True - alias_stack.append(dict(alias_stack[-1])) - pushed_aliases = True - - if pushed_scope: - superclass = node.child_by_field_name("superclass") - if superclass is not None: - dotted = self._js_expression_to_dotted(source_bytes, superclass) - if dotted: - resolved = self._resolve_alias_dotted(dotted, alias_stack[-1]) - record_inherits(resolved, self._node_start_line(node)) - - if node.type == "variable_declarator": - name_node = node.child_by_field_name("name") - value_node = node.child_by_field_name("value") - if ( - name_node is not None - and value_node is not None - and name_node.type in {"identifier", "property_identifier"} - and value_node.type == "arrow_function" - ): - scope_name = self._node_text(source_bytes, name_node).strip() - if scope_name: - scope_stack.append(scope_name) - pushed_scope = True - alias_stack.append(dict(alias_stack[-1])) - pushed_aliases = True - - if node.type == "method_definition" and self._has_class_ancestor(node): - name_node = node.child_by_field_name("name") - if name_node is not None: - scope_name = self._node_text(source_bytes, name_node).strip() - if scope_name and scope_name != "constructor": - scope_stack.append(scope_name) - pushed_scope = True - alias_stack.append(dict(alias_stack[-1])) - pushed_aliases = True - - if node.type in {"import_declaration", "import_statement"}: - updates, imported_targets = self._js_import_aliases_and_targets(source_bytes, node) - if updates: - alias_stack[-1].update(updates) - for target_symbol in imported_targets: - record_import(target_symbol, self._node_start_line(node)) - - # Best-effort support for CommonJS require() imports: - # const fs = require("fs") - if node.type == "variable_declarator": - name_node = node.child_by_field_name("name") - value_node = node.child_by_field_name("value") - if ( - name_node is not None - and value_node is not None - and name_node.type == "identifier" - and value_node.type == "call_expression" - ): - callee = value_node.child_by_field_name("function") - args = value_node.child_by_field_name("arguments") - if ( - callee is not None - and self._node_text(source_bytes, callee).strip() == "require" - and args is not None - ): - module_name = self._js_first_string_argument(source_bytes, args) - if module_name: - alias_stack[-1][self._node_text(source_bytes, name_node).strip()] = module_name - record_import(module_name, self._node_start_line(node)) - - if node.type == "call_expression": - fn_node = node.child_by_field_name("function") - if fn_node is not None: - dotted = self._js_expression_to_dotted(source_bytes, fn_node) - if dotted: - resolved = self._resolve_alias_dotted(dotted, alias_stack[-1]) - record_call(resolved, self._node_start_line(node)) - - for child in node.children: - visit(child) - - if pushed_aliases: - alias_stack.pop() - if pushed_scope: - scope_stack.pop() - - visit(root) - return relationships - - def _node_start_line(self, node: TreeSitterNode) -> int: - return node.start_point[0] + 1 - - def _resolve_alias_dotted(self, dotted: str, aliases: Dict[str, str]) -> str: - dotted = (dotted or "").strip() - if not dotted: - return "" - - base, sep, rest = dotted.partition(".") - resolved_base = aliases.get(base, base) - if not rest: - return resolved_base - if resolved_base and rest: - return f"{resolved_base}.{rest}" - return resolved_base - - def _python_expression_to_dotted(self, source_bytes: bytes, node: TreeSitterNode) -> str: - if node.type in {"identifier", "dotted_name"}: - return self._node_text(source_bytes, node).strip() - if node.type == "attribute": - obj = node.child_by_field_name("object") - attr = node.child_by_field_name("attribute") - obj_text = self._python_expression_to_dotted(source_bytes, obj) if obj is not None else "" - attr_text = self._node_text(source_bytes, attr).strip() if attr is not None else "" - if obj_text and attr_text: - return f"{obj_text}.{attr_text}" - return obj_text or attr_text - return "" - - def _python_import_aliases_and_targets( - self, - source_bytes: bytes, - node: TreeSitterNode, - ) -> tuple[Dict[str, str], List[str]]: - aliases: Dict[str, str] = {} - targets: List[str] = [] - - if node.type == "import_statement": - for i, child in enumerate(node.children): - if child.type == "aliased_import": - name_node = child.child_by_field_name("name") - alias_node = child.child_by_field_name("alias") - if name_node is None: - continue - module_name = self._node_text(source_bytes, name_node).strip() - if not module_name: - continue - bound_name = ( - self._node_text(source_bytes, alias_node).strip() - if alias_node is not None - else module_name.split(".", 1)[0] - ) - if bound_name: - aliases[bound_name] = module_name - targets.append(module_name) - elif child.type == "dotted_name": - module_name = self._node_text(source_bytes, child).strip() - if not module_name: - continue - bound_name = module_name.split(".", 1)[0] - if bound_name: - aliases[bound_name] = bound_name - targets.append(module_name) - - if node.type == "import_from_statement": - module_name = "" - module_node = node.child_by_field_name("module_name") - if module_node is None: - for child in node.children: - if child.type == "dotted_name": - module_node = child - break - if module_node is not None: - module_name = self._node_text(source_bytes, module_node).strip() - - for i, child in enumerate(node.children): - if child.type == "aliased_import": - name_node = child.child_by_field_name("name") - alias_node = child.child_by_field_name("alias") - if name_node is None: - continue - imported_name = self._node_text(source_bytes, name_node).strip() - if not imported_name or imported_name == "*": - continue - target = f"{module_name}.{imported_name}" if module_name else imported_name - bound_name = ( - self._node_text(source_bytes, alias_node).strip() - if alias_node is not None - else imported_name - ) - if bound_name: - aliases[bound_name] = target - targets.append(target) - elif child.type == "dotted_name" and node.field_name_for_child(i) == "name": - # tree-sitter-python represents `from X import A, B, C` as - # multiple dotted_name nodes (field: "name"). - imported_name = self._node_text(source_bytes, child).strip() - if not imported_name: - continue - target = ( - f"{module_name}.{imported_name}" if module_name else imported_name - ) - aliases[imported_name] = target - targets.append(target) - elif child.type == "identifier" and node.field_name_for_child(i) == "name": - imported_name = self._node_text(source_bytes, child).strip() - if not imported_name or imported_name in {"from", "import", "*"}: - continue - target = f"{module_name}.{imported_name}" if module_name else imported_name - aliases[imported_name] = target - targets.append(target) - - return aliases, targets - - def _js_expression_to_dotted(self, source_bytes: bytes, node: TreeSitterNode) -> str: - if node.type in {"this", "super"}: - return node.type - if node.type in {"identifier", "property_identifier"}: - return self._node_text(source_bytes, node).strip() - if node.type == "member_expression": - obj = node.child_by_field_name("object") - prop = node.child_by_field_name("property") - obj_text = self._js_expression_to_dotted(source_bytes, obj) if obj is not None else "" - prop_text = self._js_expression_to_dotted(source_bytes, prop) if prop is not None else "" - if obj_text and prop_text: - return f"{obj_text}.{prop_text}" - return obj_text or prop_text - return "" - - def _js_import_aliases_and_targets( - self, - source_bytes: bytes, - node: TreeSitterNode, - ) -> tuple[Dict[str, str], List[str]]: - aliases: Dict[str, str] = {} - targets: List[str] = [] - - module_name = "" - source_node = node.child_by_field_name("source") - if source_node is not None: - module_name = self._node_text(source_bytes, source_node).strip().strip("\"'").strip() - if module_name: - targets.append(module_name) - - for child in node.children: - if child.type == "import_clause": - for clause_child in child.children: - if clause_child.type == "identifier": - # Default import: import React from "react" - local = self._node_text(source_bytes, clause_child).strip() - if local and module_name: - aliases[local] = module_name - if clause_child.type == "namespace_import": - # Namespace import: import * as fs from "fs" - name_node = clause_child.child_by_field_name("name") - if name_node is not None and module_name: - local = self._node_text(source_bytes, name_node).strip() - if local: - aliases[local] = module_name - if clause_child.type == "named_imports": - for spec in clause_child.children: - if spec.type != "import_specifier": - continue - name_node = spec.child_by_field_name("name") - alias_node = spec.child_by_field_name("alias") - if name_node is None: - continue - imported = self._node_text(source_bytes, name_node).strip() - if not imported: - continue - local = ( - self._node_text(source_bytes, alias_node).strip() - if alias_node is not None - else imported - ) - if local and module_name: - aliases[local] = f"{module_name}.{imported}" - targets.append(f"{module_name}.{imported}") - - return aliases, targets - - def _js_first_string_argument(self, source_bytes: bytes, args_node: TreeSitterNode) -> str: - for child in args_node.children: - if child.type == "string": - return self._node_text(source_bytes, child).strip().strip("\"'").strip() - return "" - - def _extract_python_symbols(self, source_bytes: bytes, root: TreeSitterNode) -> List[Symbol]: - """Extract Python symbols from AST. - - Args: - source_bytes: Source code as bytes - root: Root AST node - - Returns: - List of Python symbols (classes, functions, methods) - """ - symbols: List[Symbol] = [] - - for node in self._iter_nodes(root): - if node.type == "class_definition": - name_node = node.child_by_field_name("name") - if name_node is None: - continue - symbols.append(Symbol( - name=self._node_text(source_bytes, name_node), - kind="class", - range=self._node_range(node), - )) - elif node.type in {"function_definition", "async_function_definition"}: - name_node = node.child_by_field_name("name") - if name_node is None: - continue - symbols.append(Symbol( - name=self._node_text(source_bytes, name_node), - kind=self._python_function_kind(node), - range=self._node_range(node), - )) - - return symbols - - def _extract_js_ts_symbols(self, source_bytes: bytes, root: TreeSitterNode) -> List[Symbol]: - """Extract JavaScript/TypeScript symbols from AST. - - Args: - source_bytes: Source code as bytes - root: Root AST node - - Returns: - List of JS/TS symbols (classes, functions, methods) - """ - symbols: List[Symbol] = [] - - for node in self._iter_nodes(root): - if node.type in {"class_declaration", "class"}: - name_node = node.child_by_field_name("name") - if name_node is None: - continue - symbols.append(Symbol( - name=self._node_text(source_bytes, name_node), - kind="class", - range=self._node_range(node), - )) - elif node.type in {"function_declaration", "generator_function_declaration"}: - name_node = node.child_by_field_name("name") - if name_node is None: - continue - symbols.append(Symbol( - name=self._node_text(source_bytes, name_node), - kind="function", - range=self._node_range(node), - )) - elif node.type == "variable_declarator": - name_node = node.child_by_field_name("name") - value_node = node.child_by_field_name("value") - if ( - name_node is None - or value_node is None - or name_node.type not in {"identifier", "property_identifier"} - or value_node.type != "arrow_function" - ): - continue - symbols.append(Symbol( - name=self._node_text(source_bytes, name_node), - kind="function", - range=self._node_range(node), - )) - elif node.type == "method_definition" and self._has_class_ancestor(node): - name_node = node.child_by_field_name("name") - if name_node is None: - continue - name = self._node_text(source_bytes, name_node) - if name == "constructor": - continue - symbols.append(Symbol( - name=name, - kind="method", - range=self._node_range(node), - )) - - return symbols - - def _python_function_kind(self, node: TreeSitterNode) -> str: - """Determine if Python function is a method or standalone function. - - Args: - node: Function definition node - - Returns: - 'method' if inside a class, 'function' otherwise - """ - parent = node.parent - while parent is not None: - if parent.type in {"function_definition", "async_function_definition"}: - return "function" - if parent.type == "class_definition": - return "method" - parent = parent.parent - return "function" - - def _has_class_ancestor(self, node: TreeSitterNode) -> bool: - """Check if node has a class ancestor. - - Args: - node: AST node to check - - Returns: - True if node is inside a class - """ - parent = node.parent - while parent is not None: - if parent.type in {"class_declaration", "class"}: - return True - parent = parent.parent - return False - - def _iter_nodes(self, root: TreeSitterNode): - """Iterate over all nodes in AST. - - Args: - root: Root node to start iteration - - Yields: - AST nodes in depth-first order - """ - stack = [root] - while stack: - node = stack.pop() - yield node - for child in reversed(node.children): - stack.append(child) - - def _node_text(self, source_bytes: bytes, node: TreeSitterNode) -> str: - """Extract text for a node. - - Args: - source_bytes: Source code as bytes - node: AST node - - Returns: - Text content of node - """ - return source_bytes[node.start_byte:node.end_byte].decode("utf8") - - def _node_range(self, node: TreeSitterNode) -> tuple[int, int]: - """Get line range for a node. - - Args: - node: AST node - - Returns: - (start_line, end_line) tuple, 1-based inclusive - """ - start_line = node.start_point[0] + 1 - end_line = node.end_point[0] + 1 - return (start_line, max(start_line, end_line)) - - def count_tokens(self, text: str) -> int: - """Count tokens in text. - - Args: - text: Text to count tokens for - - Returns: - Token count - """ - return self._tokenizer.count_tokens(text) diff --git a/codex-lens/src/codexlens/search/__init__.py b/codex-lens/src/codexlens/search/__init__.py deleted file mode 100644 index e8749930..00000000 --- a/codex-lens/src/codexlens/search/__init__.py +++ /dev/null @@ -1,55 +0,0 @@ -from .chain_search import ( - ChainSearchEngine, - SearchOptions, - SearchStats, - ChainSearchResult, - quick_search, -) -from .global_graph_expander import GlobalGraphExpander - -# Clustering availability flag (lazy import pattern) -CLUSTERING_AVAILABLE = False -_clustering_import_error: str | None = None - -try: - from .clustering import CLUSTERING_AVAILABLE as _clustering_flag - from .clustering import check_clustering_available - CLUSTERING_AVAILABLE = _clustering_flag -except ImportError as e: - _clustering_import_error = str(e) - - def check_clustering_available() -> tuple[bool, str | None]: - """Fallback when clustering module not loadable.""" - return False, _clustering_import_error - - -# Clustering module exports (conditional) -try: - from .clustering import ( - BaseClusteringStrategy, - ClusteringConfig, - ClusteringStrategyFactory, - get_strategy, - ) - _clustering_exports = [ - "BaseClusteringStrategy", - "ClusteringConfig", - "ClusteringStrategyFactory", - "get_strategy", - ] -except ImportError: - _clustering_exports = [] - - -__all__ = [ - "ChainSearchEngine", - "SearchOptions", - "SearchStats", - "ChainSearchResult", - "quick_search", - "GlobalGraphExpander", - # Clustering - "CLUSTERING_AVAILABLE", - "check_clustering_available", - *_clustering_exports, -] diff --git a/codex-lens/src/codexlens/search/association_tree/QUICK_START.md b/codex-lens/src/codexlens/search/association_tree/QUICK_START.md deleted file mode 100644 index 1874c1b2..00000000 --- a/codex-lens/src/codexlens/search/association_tree/QUICK_START.md +++ /dev/null @@ -1,257 +0,0 @@ -# Association Tree Quick Start - -## Installation - -No additional dependencies needed - uses existing CodexLens LSP infrastructure. - -## Basic Usage - -### 1. Import Components - -```python -from codexlens.lsp.standalone_manager import StandaloneLspManager -from codexlens.search.association_tree import ( - AssociationTreeBuilder, - ResultDeduplicator, -) -``` - -### 2. Build a Tree - -```python -import asyncio - -async def build_tree_example(): - # Initialize LSP manager - async with StandaloneLspManager(workspace_root="/path/to/project") as lsp: - # Create builder - builder = AssociationTreeBuilder(lsp, timeout=5.0) - - # Build tree from seed location - tree = await builder.build_tree( - seed_file_path="src/main.py", - seed_line=42, # 1-based line number - seed_character=1, # 1-based character position - max_depth=5, # Maximum recursion depth - expand_callers=True, # Find who calls this - expand_callees=True, # Find what this calls - ) - - return tree - -tree = asyncio.run(build_tree_example()) -print(f"Found {len(tree.all_nodes)} unique nodes") -``` - -### 3. Deduplicate and Score - -```python -# Create deduplicator -deduplicator = ResultDeduplicator( - depth_weight=0.4, # Weight for depth score (0-1) - frequency_weight=0.3, # Weight for frequency score (0-1) - kind_weight=0.3, # Weight for symbol kind score (0-1) -) - -# Extract unique nodes -unique_nodes = deduplicator.deduplicate(tree, max_results=20) - -# Print results -for node in unique_nodes: - print(f"{node.name} @ {node.file_path}:{node.range.start_line}") - print(f" Score: {node.score:.2f}, Depth: {node.min_depth}, Occurs: {node.occurrences}") -``` - -### 4. Filter Results - -```python -# Filter by symbol kind -functions = deduplicator.filter_by_kind(unique_nodes, ["function", "method"]) - -# Filter by file pattern -core_modules = deduplicator.filter_by_file(unique_nodes, ["src/core/"]) - -# Convert to JSON -json_data = deduplicator.to_dict_list(unique_nodes) -``` - -## Common Patterns - -### Pattern 1: Find All Callers - -```python -tree = await builder.build_tree( - seed_file_path=target_file, - seed_line=target_line, - max_depth=3, - expand_callers=True, # Only expand callers - expand_callees=False, # Don't expand callees -) -``` - -### Pattern 2: Find Call Chain - -```python -tree = await builder.build_tree( - seed_file_path=entry_point, - seed_line=main_line, - max_depth=10, - expand_callers=False, # Don't expand callers - expand_callees=True, # Only expand callees (call chain) -) -``` - -### Pattern 3: Full Relationship Map - -```python -tree = await builder.build_tree( - seed_file_path=target_file, - seed_line=target_line, - max_depth=5, - expand_callers=True, # Expand both directions - expand_callees=True, -) -``` - -## Configuration Tips - -### Max Depth Guidelines - -- **Depth 1-2**: Direct callers/callees only (fast, focused) -- **Depth 3-5**: Good balance of coverage and performance (recommended) -- **Depth 6-10**: Deep exploration (slower, may hit cycles) - -### Timeout Settings - -```python -builder = AssociationTreeBuilder( - lsp, - timeout=5.0, # 5 seconds per LSP request -) - -# For slower language servers -builder = AssociationTreeBuilder(lsp, timeout=10.0) -``` - -### Score Weight Tuning - -```python -# Emphasize proximity to seed -deduplicator = ResultDeduplicator( - depth_weight=0.7, # High weight for depth - frequency_weight=0.2, - kind_weight=0.1, -) - -# Emphasize frequently-called functions -deduplicator = ResultDeduplicator( - depth_weight=0.2, - frequency_weight=0.7, # High weight for frequency - kind_weight=0.1, -) -``` - -## Error Handling - -```python -try: - tree = await builder.build_tree(...) - - if not tree.all_nodes: - print("No call hierarchy found - LSP may not support this file type") - -except asyncio.TimeoutError: - print("LSP request timed out - try increasing timeout") - -except Exception as e: - print(f"Error building tree: {e}") -``` - -## Performance Optimization - -### 1. Limit Depth - -```python -# Fast: max_depth=3 -tree = await builder.build_tree(..., max_depth=3) -``` - -### 2. Filter Early - -```python -# Get all nodes -unique_nodes = deduplicator.deduplicate(tree) - -# Filter to relevant kinds immediately -functions = deduplicator.filter_by_kind(unique_nodes, ["function", "method"]) -``` - -### 3. Use Timeouts - -```python -# Set aggressive timeouts for fast iteration -builder = AssociationTreeBuilder(lsp, timeout=3.0) -``` - -## Common Issues - -### Issue: Empty Tree Returned - -**Causes**: -- File not supported by LSP server -- No call hierarchy at that position -- Position is not on a function/method - -**Solutions**: -- Verify LSP server supports the language -- Check that position is on a function definition -- Try different seed locations - -### Issue: Timeout Errors - -**Causes**: -- LSP server slow or overloaded -- Network/connection issues -- Max depth too high - -**Solutions**: -- Increase timeout value -- Reduce max_depth -- Check LSP server health - -### Issue: Cycle Detected - -**Behavior**: Cycles are automatically detected and marked - -**Example**: -```python -for node in tree.node_list: - if node.is_cycle: - print(f"Cycle detected at {node.item.name}") -``` - -## Testing - -Run the test suite: - -```bash -# All tests -pytest tests/test_association_tree.py -v - -# Specific test -pytest tests/test_association_tree.py::test_simple_tree_building -v -``` - -## Demo Script - -Run the demo: - -```bash -python examples/association_tree_demo.py -``` - -## Further Reading - -- [Full Documentation](README.md) -- [Implementation Summary](../../ASSOCIATION_TREE_IMPLEMENTATION.md) -- [LSP Manager Documentation](../../lsp/standalone_manager.py) diff --git a/codex-lens/src/codexlens/search/association_tree/README.md b/codex-lens/src/codexlens/search/association_tree/README.md deleted file mode 100644 index b9e180f6..00000000 --- a/codex-lens/src/codexlens/search/association_tree/README.md +++ /dev/null @@ -1,188 +0,0 @@ -# Association Tree Module - -LSP-based code relationship discovery using call hierarchy. - -## Overview - -This module provides components for building and analyzing call relationship trees using Language Server Protocol (LSP) call hierarchy capabilities. It consists of three main components: - -1. **Data Structures** (`data_structures.py`) - Core data classes -2. **Association Tree Builder** (`builder.py`) - Tree construction via LSP -3. **Result Deduplicator** (`deduplicator.py`) - Node extraction and scoring - -## Components - -### 1. Data Structures - -**TreeNode**: Represents a single node in the call tree. -- Contains LSP CallHierarchyItem -- Tracks depth, parents, children -- Detects and marks cycles - -**CallTree**: Complete tree structure with roots and edges. -- Stores all discovered nodes -- Tracks edges (call relationships) -- Provides lookup by node_id - -**UniqueNode**: Deduplicated code symbol with metadata. -- Aggregates multiple occurrences -- Tracks minimum depth -- Contains relevance score - -### 2. AssociationTreeBuilder - -Builds call trees using LSP call hierarchy: - -**Strategy**: -- Depth-first recursive expansion -- Supports expanding callers (incoming calls) and callees (outgoing calls) -- Detects and marks circular references -- Respects max_depth limit - -**Key Features**: -- Async/await for concurrent LSP requests -- Timeout handling (5s per node) -- Graceful error handling -- Cycle detection via visited set - -### 3. ResultDeduplicator - -Extracts unique nodes from trees and assigns scores: - -**Scoring Factors**: -- **Depth** (40%): Shallower = more relevant -- **Frequency** (30%): More occurrences = more important -- **Kind** (30%): function/method > class > variable - -**Features**: -- Merges duplicate nodes by (file_path, start_line, end_line) -- Tracks all paths to each node -- Supports filtering by kind or file pattern -- Configurable score weights - -## Usage Example - -```python -import asyncio -from codexlens.lsp.standalone_manager import StandaloneLspManager -from codexlens.search.association_tree import ( - AssociationTreeBuilder, - ResultDeduplicator, -) - -async def main(): - # Initialize LSP manager - async with StandaloneLspManager(workspace_root="/path/to/project") as lsp: - # Create tree builder - builder = AssociationTreeBuilder(lsp, timeout=5.0) - - # Build tree from seed location - tree = await builder.build_tree( - seed_file_path="src/main.py", - seed_line=42, - seed_character=1, - max_depth=5, - expand_callers=True, # Find who calls this - expand_callees=True, # Find what this calls - ) - - print(f"Tree: {tree}") - print(f" Roots: {len(tree.roots)}") - print(f" Total nodes: {len(tree.all_nodes)}") - print(f" Edges: {len(tree.edges)}") - - # Deduplicate and score - deduplicator = ResultDeduplicator( - depth_weight=0.4, - frequency_weight=0.3, - kind_weight=0.3, - ) - - unique_nodes = deduplicator.deduplicate(tree, max_results=20) - - print(f"\nTop unique nodes:") - for node in unique_nodes[:10]: - print(f" {node.name} ({node.file_path}:{node.range.start_line})") - print(f" Depth: {node.min_depth}, Occurrences: {node.occurrences}, Score: {node.score:.2f}") - - # Filter by kind - functions_only = deduplicator.filter_by_kind(unique_nodes, ["function", "method"]) - print(f"\nFunctions/methods: {len(functions_only)}") - -asyncio.run(main()) -``` - -## Integration with Hybrid Search - -The association tree can be integrated with the hybrid search engine: - -```python -from codexlens.search.hybrid_search import HybridSearchEngine - -async def search_with_association_tree(query: str): - # 1. Get seed results from vector search - search_engine = HybridSearchEngine() - seed_results = await search_engine.search(query, limit=5) - - # 2. Build association trees from top results - builder = AssociationTreeBuilder(lsp_manager) - trees = [] - - for result in seed_results: - tree = await builder.build_tree( - seed_file_path=result.file_path, - seed_line=result.line, - max_depth=3, - ) - trees.append(tree) - - # 3. Merge and deduplicate - merged_tree = merge_trees(trees) # Custom merge logic - deduplicator = ResultDeduplicator() - unique_nodes = deduplicator.deduplicate(merged_tree, max_results=50) - - # 4. Convert to search results - final_results = convert_to_search_results(unique_nodes) - - return final_results -``` - -## Testing - -Run the test suite: - -```bash -pytest tests/test_association_tree.py -v -``` - -Test coverage includes: -- Simple tree building -- Cycle detection -- Max depth limits -- Empty trees -- Deduplication logic -- Scoring algorithms -- Filtering operations - -## Performance Considerations - -1. **LSP Timeouts**: Set appropriate timeout values (default 5s) -2. **Max Depth**: Limit depth to avoid exponential expansion (recommended: 3-5) -3. **Caching**: LSP manager caches open documents -4. **Parallel Expansion**: Incoming/outgoing calls fetched in parallel - -## Error Handling - -The builder gracefully handles: -- LSP timeout errors (logs warning, continues) -- Missing call hierarchy support (returns empty) -- Network/connection failures (skips node) -- Invalid LSP responses (logs error, skips) - -## Future Enhancements - -- [ ] Multi-root tree building from multiple seeds -- [ ] Custom scoring functions -- [ ] Graph visualization export -- [ ] Incremental tree updates -- [ ] Cross-file relationship analysis diff --git a/codex-lens/src/codexlens/search/association_tree/__init__.py b/codex-lens/src/codexlens/search/association_tree/__init__.py deleted file mode 100644 index 9557af33..00000000 --- a/codex-lens/src/codexlens/search/association_tree/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -"""Association tree module for LSP-based code relationship discovery. - -This module provides components for building and processing call association trees -using Language Server Protocol (LSP) call hierarchy capabilities. -""" - -from .builder import AssociationTreeBuilder -from .data_structures import ( - CallTree, - TreeNode, - UniqueNode, -) -from .deduplicator import ResultDeduplicator - -__all__ = [ - "AssociationTreeBuilder", - "CallTree", - "TreeNode", - "UniqueNode", - "ResultDeduplicator", -] diff --git a/codex-lens/src/codexlens/search/association_tree/builder.py b/codex-lens/src/codexlens/search/association_tree/builder.py deleted file mode 100644 index 894a8e20..00000000 --- a/codex-lens/src/codexlens/search/association_tree/builder.py +++ /dev/null @@ -1,450 +0,0 @@ -"""Association tree builder using LSP call hierarchy. - -Builds call relationship trees by recursively expanding from seed locations -using Language Server Protocol (LSP) call hierarchy capabilities. -""" - -from __future__ import annotations - -import asyncio -import logging -from pathlib import Path -from typing import Dict, List, Optional, Set - -from codexlens.hybrid_search.data_structures import CallHierarchyItem, Range -from codexlens.lsp.standalone_manager import StandaloneLspManager -from .data_structures import CallTree, TreeNode - -logger = logging.getLogger(__name__) - - -class AssociationTreeBuilder: - """Builds association trees from seed locations using LSP call hierarchy. - - Uses depth-first recursive expansion to build a tree of code relationships - starting from seed locations (typically from vector search results). - - Strategy: - - Start from seed locations (vector search results) - - For each seed, get call hierarchy items via LSP - - Recursively expand incoming calls (callers) if expand_callers=True - - Recursively expand outgoing calls (callees) if expand_callees=True - - Track visited nodes to prevent cycles - - Stop at max_depth or when no more relations found - - Attributes: - lsp_manager: StandaloneLspManager for LSP communication - visited: Set of visited node IDs to prevent cycles - timeout: Timeout for individual LSP requests (seconds) - """ - - def __init__( - self, - lsp_manager: StandaloneLspManager, - timeout: float = 5.0, - analysis_wait: float = 2.0, - ): - """Initialize AssociationTreeBuilder. - - Args: - lsp_manager: StandaloneLspManager instance for LSP communication - timeout: Timeout for individual LSP requests in seconds - analysis_wait: Time to wait for LSP analysis on first file (seconds) - """ - self.lsp_manager = lsp_manager - self.timeout = timeout - self.analysis_wait = analysis_wait - self.visited: Set[str] = set() - self._analyzed_files: Set[str] = set() # Track files already analyzed - - async def build_tree( - self, - seed_file_path: str, - seed_line: int, - seed_character: int = 1, - max_depth: int = 5, - expand_callers: bool = True, - expand_callees: bool = True, - ) -> CallTree: - """Build call tree from a single seed location. - - Args: - seed_file_path: Path to the seed file - seed_line: Line number of the seed symbol (1-based) - seed_character: Character position (1-based, default 1) - max_depth: Maximum recursion depth (default 5) - expand_callers: Whether to expand incoming calls (callers) - expand_callees: Whether to expand outgoing calls (callees) - - Returns: - CallTree containing all discovered nodes and relationships - """ - tree = CallTree() - self.visited.clear() - - # Determine wait time - only wait for analysis on first encounter of file - wait_time = 0.0 - if seed_file_path not in self._analyzed_files: - wait_time = self.analysis_wait - self._analyzed_files.add(seed_file_path) - - # Get call hierarchy items for the seed position - try: - hierarchy_items = await asyncio.wait_for( - self.lsp_manager.get_call_hierarchy_items( - file_path=seed_file_path, - line=seed_line, - character=seed_character, - wait_for_analysis=wait_time, - ), - timeout=self.timeout + wait_time, - ) - except asyncio.TimeoutError: - logger.warning( - "Timeout getting call hierarchy items for %s:%d", - seed_file_path, - seed_line, - ) - return tree - except Exception as e: - logger.error( - "Error getting call hierarchy items for %s:%d: %s", - seed_file_path, - seed_line, - e, - ) - return tree - - if not hierarchy_items: - logger.debug( - "No call hierarchy items found for %s:%d", - seed_file_path, - seed_line, - ) - return tree - - # Create root nodes from hierarchy items - for item_dict in hierarchy_items: - # Convert LSP dict to CallHierarchyItem - item = self._dict_to_call_hierarchy_item(item_dict) - if not item: - continue - - root_node = TreeNode( - item=item, - depth=0, - path_from_root=[self._create_node_id(item)], - ) - tree.roots.append(root_node) - tree.add_node(root_node) - - # Mark as visited - self.visited.add(root_node.node_id) - - # Recursively expand the tree - await self._expand_node( - node=root_node, - node_dict=item_dict, - tree=tree, - current_depth=0, - max_depth=max_depth, - expand_callers=expand_callers, - expand_callees=expand_callees, - ) - - tree.depth_reached = max_depth - return tree - - async def _expand_node( - self, - node: TreeNode, - node_dict: Dict, - tree: CallTree, - current_depth: int, - max_depth: int, - expand_callers: bool, - expand_callees: bool, - ) -> None: - """Recursively expand a node by fetching its callers and callees. - - Args: - node: TreeNode to expand - node_dict: LSP CallHierarchyItem dict (for LSP requests) - tree: CallTree to add discovered nodes to - current_depth: Current recursion depth - max_depth: Maximum allowed depth - expand_callers: Whether to expand incoming calls - expand_callees: Whether to expand outgoing calls - """ - # Stop if max depth reached - if current_depth >= max_depth: - return - - # Prepare tasks for parallel expansion - tasks = [] - - if expand_callers: - tasks.append( - self._expand_incoming_calls( - node=node, - node_dict=node_dict, - tree=tree, - current_depth=current_depth, - max_depth=max_depth, - expand_callers=expand_callers, - expand_callees=expand_callees, - ) - ) - - if expand_callees: - tasks.append( - self._expand_outgoing_calls( - node=node, - node_dict=node_dict, - tree=tree, - current_depth=current_depth, - max_depth=max_depth, - expand_callers=expand_callers, - expand_callees=expand_callees, - ) - ) - - # Execute expansions in parallel - if tasks: - await asyncio.gather(*tasks, return_exceptions=True) - - async def _expand_incoming_calls( - self, - node: TreeNode, - node_dict: Dict, - tree: CallTree, - current_depth: int, - max_depth: int, - expand_callers: bool, - expand_callees: bool, - ) -> None: - """Expand incoming calls (callers) for a node. - - Args: - node: TreeNode being expanded - node_dict: LSP dict for the node - tree: CallTree to add nodes to - current_depth: Current depth - max_depth: Maximum depth - expand_callers: Whether to continue expanding callers - expand_callees: Whether to expand callees - """ - try: - incoming_calls = await asyncio.wait_for( - self.lsp_manager.get_incoming_calls(item=node_dict), - timeout=self.timeout, - ) - except asyncio.TimeoutError: - logger.debug("Timeout getting incoming calls for %s", node.node_id) - return - except Exception as e: - logger.debug("Error getting incoming calls for %s: %s", node.node_id, e) - return - - if not incoming_calls: - return - - # Process each incoming call - for call_dict in incoming_calls: - caller_dict = call_dict.get("from") - if not caller_dict: - continue - - # Convert to CallHierarchyItem - caller_item = self._dict_to_call_hierarchy_item(caller_dict) - if not caller_item: - continue - - caller_id = self._create_node_id(caller_item) - - # Check for cycles - if caller_id in self.visited: - # Create cycle marker node - cycle_node = TreeNode( - item=caller_item, - depth=current_depth + 1, - is_cycle=True, - path_from_root=node.path_from_root + [caller_id], - ) - node.parents.append(cycle_node) - continue - - # Create new caller node - caller_node = TreeNode( - item=caller_item, - depth=current_depth + 1, - path_from_root=node.path_from_root + [caller_id], - ) - - # Add to tree - tree.add_node(caller_node) - tree.add_edge(caller_node, node) - - # Update relationships - node.parents.append(caller_node) - caller_node.children.append(node) - - # Mark as visited - self.visited.add(caller_id) - - # Recursively expand the caller - await self._expand_node( - node=caller_node, - node_dict=caller_dict, - tree=tree, - current_depth=current_depth + 1, - max_depth=max_depth, - expand_callers=expand_callers, - expand_callees=expand_callees, - ) - - async def _expand_outgoing_calls( - self, - node: TreeNode, - node_dict: Dict, - tree: CallTree, - current_depth: int, - max_depth: int, - expand_callers: bool, - expand_callees: bool, - ) -> None: - """Expand outgoing calls (callees) for a node. - - Args: - node: TreeNode being expanded - node_dict: LSP dict for the node - tree: CallTree to add nodes to - current_depth: Current depth - max_depth: Maximum depth - expand_callers: Whether to expand callers - expand_callees: Whether to continue expanding callees - """ - try: - outgoing_calls = await asyncio.wait_for( - self.lsp_manager.get_outgoing_calls(item=node_dict), - timeout=self.timeout, - ) - except asyncio.TimeoutError: - logger.debug("Timeout getting outgoing calls for %s", node.node_id) - return - except Exception as e: - logger.debug("Error getting outgoing calls for %s: %s", node.node_id, e) - return - - if not outgoing_calls: - return - - # Process each outgoing call - for call_dict in outgoing_calls: - callee_dict = call_dict.get("to") - if not callee_dict: - continue - - # Convert to CallHierarchyItem - callee_item = self._dict_to_call_hierarchy_item(callee_dict) - if not callee_item: - continue - - callee_id = self._create_node_id(callee_item) - - # Check for cycles - if callee_id in self.visited: - # Create cycle marker node - cycle_node = TreeNode( - item=callee_item, - depth=current_depth + 1, - is_cycle=True, - path_from_root=node.path_from_root + [callee_id], - ) - node.children.append(cycle_node) - continue - - # Create new callee node - callee_node = TreeNode( - item=callee_item, - depth=current_depth + 1, - path_from_root=node.path_from_root + [callee_id], - ) - - # Add to tree - tree.add_node(callee_node) - tree.add_edge(node, callee_node) - - # Update relationships - node.children.append(callee_node) - callee_node.parents.append(node) - - # Mark as visited - self.visited.add(callee_id) - - # Recursively expand the callee - await self._expand_node( - node=callee_node, - node_dict=callee_dict, - tree=tree, - current_depth=current_depth + 1, - max_depth=max_depth, - expand_callers=expand_callers, - expand_callees=expand_callees, - ) - - def _dict_to_call_hierarchy_item( - self, item_dict: Dict - ) -> Optional[CallHierarchyItem]: - """Convert LSP dict to CallHierarchyItem. - - Args: - item_dict: LSP CallHierarchyItem dictionary - - Returns: - CallHierarchyItem or None if conversion fails - """ - try: - # Extract URI and convert to file path - uri = item_dict.get("uri", "") - file_path = uri.replace("file:///", "").replace("file://", "") - - # Handle Windows paths (file:///C:/...) - if len(file_path) > 2 and file_path[0] == "/" and file_path[2] == ":": - file_path = file_path[1:] - - # Extract range - range_dict = item_dict.get("range", {}) - start = range_dict.get("start", {}) - end = range_dict.get("end", {}) - - # Create Range (convert from 0-based to 1-based) - item_range = Range( - start_line=start.get("line", 0) + 1, - start_character=start.get("character", 0) + 1, - end_line=end.get("line", 0) + 1, - end_character=end.get("character", 0) + 1, - ) - - return CallHierarchyItem( - name=item_dict.get("name", "unknown"), - kind=str(item_dict.get("kind", "unknown")), - file_path=file_path, - range=item_range, - detail=item_dict.get("detail"), - ) - - except Exception as e: - logger.debug("Failed to convert dict to CallHierarchyItem: %s", e) - return None - - def _create_node_id(self, item: CallHierarchyItem) -> str: - """Create unique node ID from CallHierarchyItem. - - Args: - item: CallHierarchyItem - - Returns: - Unique node ID string - """ - return f"{item.file_path}:{item.name}:{item.range.start_line}" diff --git a/codex-lens/src/codexlens/search/association_tree/data_structures.py b/codex-lens/src/codexlens/search/association_tree/data_structures.py deleted file mode 100644 index 2c8b47fa..00000000 --- a/codex-lens/src/codexlens/search/association_tree/data_structures.py +++ /dev/null @@ -1,191 +0,0 @@ -"""Data structures for association tree building. - -Defines the core data classes for representing call hierarchy trees and -deduplicated results. -""" - -from __future__ import annotations - -from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional - -from codexlens.hybrid_search.data_structures import CallHierarchyItem, Range - - -@dataclass -class TreeNode: - """Node in the call association tree. - - Represents a single function/method in the tree, including its position - in the hierarchy and relationships. - - Attributes: - item: LSP CallHierarchyItem containing symbol information - depth: Distance from the root node (seed) - 0 for roots - children: List of child nodes (functions called by this node) - parents: List of parent nodes (functions that call this node) - is_cycle: Whether this node creates a circular reference - path_from_root: Path (list of node IDs) from root to this node - """ - - item: CallHierarchyItem - depth: int = 0 - children: List[TreeNode] = field(default_factory=list) - parents: List[TreeNode] = field(default_factory=list) - is_cycle: bool = False - path_from_root: List[str] = field(default_factory=list) - - @property - def node_id(self) -> str: - """Unique identifier for this node.""" - return f"{self.item.file_path}:{self.item.name}:{self.item.range.start_line}" - - def __hash__(self) -> int: - """Hash based on node ID.""" - return hash(self.node_id) - - def __eq__(self, other: object) -> bool: - """Equality based on node ID.""" - if not isinstance(other, TreeNode): - return False - return self.node_id == other.node_id - - def __repr__(self) -> str: - """String representation of the node.""" - cycle_marker = " [CYCLE]" if self.is_cycle else "" - return f"TreeNode({self.item.name}@{self.item.file_path}:{self.item.range.start_line}){cycle_marker}" - - -@dataclass -class CallTree: - """Complete call tree structure built from seeds. - - Contains all nodes discovered through recursive expansion and - the relationships between them. - - Attributes: - roots: List of root nodes (seed symbols) - all_nodes: Dictionary mapping node_id -> TreeNode for quick lookup - node_list: Flat list of all nodes in tree order - edges: List of (from_node_id, to_node_id) tuples representing calls - depth_reached: Maximum depth achieved in expansion - """ - - roots: List[TreeNode] = field(default_factory=list) - all_nodes: Dict[str, TreeNode] = field(default_factory=dict) - node_list: List[TreeNode] = field(default_factory=list) - edges: List[tuple[str, str]] = field(default_factory=list) - depth_reached: int = 0 - - def add_node(self, node: TreeNode) -> None: - """Add a node to the tree. - - Args: - node: TreeNode to add - """ - if node.node_id not in self.all_nodes: - self.all_nodes[node.node_id] = node - self.node_list.append(node) - - def add_edge(self, from_node: TreeNode, to_node: TreeNode) -> None: - """Add an edge between two nodes. - - Args: - from_node: Source node - to_node: Target node - """ - edge = (from_node.node_id, to_node.node_id) - if edge not in self.edges: - self.edges.append(edge) - - def get_node(self, node_id: str) -> Optional[TreeNode]: - """Get a node by ID. - - Args: - node_id: Node identifier - - Returns: - TreeNode if found, None otherwise - """ - return self.all_nodes.get(node_id) - - def __len__(self) -> int: - """Return total number of nodes in tree.""" - return len(self.all_nodes) - - def __repr__(self) -> str: - """String representation of the tree.""" - return ( - f"CallTree(roots={len(self.roots)}, nodes={len(self.all_nodes)}, " - f"depth={self.depth_reached})" - ) - - -@dataclass -class UniqueNode: - """Deduplicated unique code symbol from the tree. - - Represents a single unique code location that may appear multiple times - in the tree under different contexts. Contains aggregated information - about all occurrences. - - Attributes: - file_path: Absolute path to the file - name: Symbol name (function, method, class, etc.) - kind: Symbol kind (function, method, class, etc.) - range: Code range in the file - min_depth: Minimum depth at which this node appears in the tree - occurrences: Number of times this node appears in the tree - paths: List of paths from roots to this node - context_nodes: Related nodes from the tree - score: Composite relevance score (higher is better) - """ - - file_path: str - name: str - kind: str - range: Range - min_depth: int = 0 - occurrences: int = 1 - paths: List[List[str]] = field(default_factory=list) - context_nodes: List[str] = field(default_factory=list) - score: float = 0.0 - - @property - def node_key(self) -> tuple[str, int, int]: - """Unique key for deduplication. - - Uses (file_path, start_line, end_line) as the unique identifier - for this symbol across all occurrences. - """ - return ( - self.file_path, - self.range.start_line, - self.range.end_line, - ) - - def add_path(self, path: List[str]) -> None: - """Add a path from root to this node. - - Args: - path: List of node IDs from root to this node - """ - if path not in self.paths: - self.paths.append(path) - - def __hash__(self) -> int: - """Hash based on node key.""" - return hash(self.node_key) - - def __eq__(self, other: object) -> bool: - """Equality based on node key.""" - if not isinstance(other, UniqueNode): - return False - return self.node_key == other.node_key - - def __repr__(self) -> str: - """String representation of the unique node.""" - return ( - f"UniqueNode({self.name}@{self.file_path}:{self.range.start_line}, " - f"depth={self.min_depth}, occ={self.occurrences}, score={self.score:.2f})" - ) diff --git a/codex-lens/src/codexlens/search/association_tree/deduplicator.py b/codex-lens/src/codexlens/search/association_tree/deduplicator.py deleted file mode 100644 index 9e590518..00000000 --- a/codex-lens/src/codexlens/search/association_tree/deduplicator.py +++ /dev/null @@ -1,301 +0,0 @@ -"""Result deduplication for association tree nodes. - -Provides functionality to extract unique nodes from a call tree and assign -relevance scores based on various factors. -""" - -from __future__ import annotations - -import logging -from typing import Dict, List, Optional - -from .data_structures import ( - CallTree, - TreeNode, - UniqueNode, -) - -logger = logging.getLogger(__name__) - - -# Symbol kind weights for scoring (higher = more relevant) -KIND_WEIGHTS: Dict[str, float] = { - # Functions and methods are primary targets - "function": 1.0, - "method": 1.0, - "12": 1.0, # LSP SymbolKind.Function - "6": 1.0, # LSP SymbolKind.Method - # Classes are important but secondary - "class": 0.8, - "5": 0.8, # LSP SymbolKind.Class - # Interfaces and types - "interface": 0.7, - "11": 0.7, # LSP SymbolKind.Interface - "type": 0.6, - # Constructors - "constructor": 0.9, - "9": 0.9, # LSP SymbolKind.Constructor - # Variables and constants - "variable": 0.4, - "13": 0.4, # LSP SymbolKind.Variable - "constant": 0.5, - "14": 0.5, # LSP SymbolKind.Constant - # Default for unknown kinds - "unknown": 0.3, -} - - -class ResultDeduplicator: - """Extracts and scores unique nodes from call trees. - - Processes a CallTree to extract unique code locations, merging duplicates - and assigning relevance scores based on: - - Depth: Shallower nodes (closer to seeds) score higher - - Frequency: Nodes appearing multiple times score higher - - Kind: Function/method > class > variable - - Attributes: - depth_weight: Weight for depth factor in scoring (default 0.4) - frequency_weight: Weight for frequency factor (default 0.3) - kind_weight: Weight for symbol kind factor (default 0.3) - max_depth_penalty: Maximum depth before full penalty applied - """ - - def __init__( - self, - depth_weight: float = 0.4, - frequency_weight: float = 0.3, - kind_weight: float = 0.3, - max_depth_penalty: int = 10, - ): - """Initialize ResultDeduplicator. - - Args: - depth_weight: Weight for depth factor (0.0-1.0) - frequency_weight: Weight for frequency factor (0.0-1.0) - kind_weight: Weight for symbol kind factor (0.0-1.0) - max_depth_penalty: Depth at which score becomes 0 for depth factor - """ - self.depth_weight = depth_weight - self.frequency_weight = frequency_weight - self.kind_weight = kind_weight - self.max_depth_penalty = max_depth_penalty - - def deduplicate( - self, - tree: CallTree, - max_results: Optional[int] = None, - ) -> List[UniqueNode]: - """Extract unique nodes from the call tree. - - Traverses the tree, groups nodes by their unique key (file_path, - start_line, end_line), and merges duplicate occurrences. - - Args: - tree: CallTree to process - max_results: Maximum number of results to return (None = all) - - Returns: - List of UniqueNode objects, sorted by score descending - """ - if not tree.node_list: - return [] - - # Group nodes by unique key - unique_map: Dict[tuple, UniqueNode] = {} - - for node in tree.node_list: - if node.is_cycle: - # Skip cycle markers - they point to already-counted nodes - continue - - key = self._get_node_key(node) - - if key in unique_map: - # Update existing unique node - unique_node = unique_map[key] - unique_node.occurrences += 1 - unique_node.min_depth = min(unique_node.min_depth, node.depth) - unique_node.add_path(node.path_from_root) - - # Collect context from relationships - for parent in node.parents: - if not parent.is_cycle: - unique_node.context_nodes.append(parent.node_id) - for child in node.children: - if not child.is_cycle: - unique_node.context_nodes.append(child.node_id) - else: - # Create new unique node - unique_node = UniqueNode( - file_path=node.item.file_path, - name=node.item.name, - kind=node.item.kind, - range=node.item.range, - min_depth=node.depth, - occurrences=1, - paths=[node.path_from_root.copy()], - context_nodes=[], - score=0.0, - ) - - # Collect initial context - for parent in node.parents: - if not parent.is_cycle: - unique_node.context_nodes.append(parent.node_id) - for child in node.children: - if not child.is_cycle: - unique_node.context_nodes.append(child.node_id) - - unique_map[key] = unique_node - - # Calculate scores for all unique nodes - unique_nodes = list(unique_map.values()) - - # Find max frequency for normalization - max_frequency = max((n.occurrences for n in unique_nodes), default=1) - - for node in unique_nodes: - node.score = self._score_node(node, max_frequency) - - # Sort by score descending - unique_nodes.sort(key=lambda n: n.score, reverse=True) - - # Apply max_results limit - if max_results is not None and max_results > 0: - unique_nodes = unique_nodes[:max_results] - - logger.debug( - "Deduplicated %d tree nodes to %d unique nodes", - len(tree.node_list), - len(unique_nodes), - ) - - return unique_nodes - - def _score_node( - self, - node: UniqueNode, - max_frequency: int, - ) -> float: - """Calculate composite score for a unique node. - - Score = depth_weight * depth_score + - frequency_weight * frequency_score + - kind_weight * kind_score - - Args: - node: UniqueNode to score - max_frequency: Maximum occurrence count for normalization - - Returns: - Composite score between 0.0 and 1.0 - """ - # Depth score: closer to root = higher score - # Score of 1.0 at depth 0, decreasing to 0.0 at max_depth_penalty - depth_score = max( - 0.0, - 1.0 - (node.min_depth / self.max_depth_penalty), - ) - - # Frequency score: more occurrences = higher score - frequency_score = node.occurrences / max_frequency if max_frequency > 0 else 0.0 - - # Kind score: function/method > class > variable - kind_str = str(node.kind).lower() - kind_score = KIND_WEIGHTS.get(kind_str, KIND_WEIGHTS["unknown"]) - - # Composite score - score = ( - self.depth_weight * depth_score - + self.frequency_weight * frequency_score - + self.kind_weight * kind_score - ) - - return score - - def _get_node_key(self, node: TreeNode) -> tuple: - """Get unique key for a tree node. - - Uses (file_path, start_line, end_line) as the unique identifier. - - Args: - node: TreeNode - - Returns: - Tuple key for deduplication - """ - return ( - node.item.file_path, - node.item.range.start_line, - node.item.range.end_line, - ) - - def filter_by_kind( - self, - nodes: List[UniqueNode], - kinds: List[str], - ) -> List[UniqueNode]: - """Filter unique nodes by symbol kind. - - Args: - nodes: List of UniqueNode to filter - kinds: List of allowed kinds (e.g., ["function", "method"]) - - Returns: - Filtered list of UniqueNode - """ - kinds_lower = [k.lower() for k in kinds] - return [ - node - for node in nodes - if str(node.kind).lower() in kinds_lower - ] - - def filter_by_file( - self, - nodes: List[UniqueNode], - file_patterns: List[str], - ) -> List[UniqueNode]: - """Filter unique nodes by file path patterns. - - Args: - nodes: List of UniqueNode to filter - file_patterns: List of path substrings to match - - Returns: - Filtered list of UniqueNode - """ - return [ - node - for node in nodes - if any(pattern in node.file_path for pattern in file_patterns) - ] - - def to_dict_list(self, nodes: List[UniqueNode]) -> List[Dict]: - """Convert list of UniqueNode to JSON-serializable dicts. - - Args: - nodes: List of UniqueNode - - Returns: - List of dictionaries - """ - return [ - { - "file_path": node.file_path, - "name": node.name, - "kind": node.kind, - "range": { - "start_line": node.range.start_line, - "start_character": node.range.start_character, - "end_line": node.range.end_line, - "end_character": node.range.end_character, - }, - "min_depth": node.min_depth, - "occurrences": node.occurrences, - "path_count": len(node.paths), - "score": round(node.score, 4), - } - for node in nodes - ] diff --git a/codex-lens/src/codexlens/search/binary_searcher.py b/codex-lens/src/codexlens/search/binary_searcher.py deleted file mode 100644 index 30ab55b3..00000000 --- a/codex-lens/src/codexlens/search/binary_searcher.py +++ /dev/null @@ -1,309 +0,0 @@ -"""Binary vector searcher for cascade search. - -This module provides fast binary vector search using Hamming distance -for the first stage of cascade search (coarse filtering). - -Supports two loading modes: -1. Memory-mapped file (preferred): Low memory footprint, OS-managed paging -2. Database loading (fallback): Loads all vectors into RAM -""" - -from __future__ import annotations - -import json -import logging -from pathlib import Path -from typing import List, Optional, Tuple - -import numpy as np - -logger = logging.getLogger(__name__) - -# Pre-computed popcount lookup table for vectorized Hamming distance -# Each byte value (0-255) maps to its bit count -_POPCOUNT_TABLE = np.array([bin(i).count('1') for i in range(256)], dtype=np.uint8) - - -class BinarySearcher: - """Fast binary vector search using Hamming distance. - - This class implements the first stage of cascade search: - fast, approximate retrieval using binary vectors and Hamming distance. - - The binary vectors are derived from dense embeddings by thresholding: - binary[i] = 1 if dense[i] > 0 else 0 - - Hamming distance between two binary vectors counts the number of - differing bits, which can be computed very efficiently using XOR - and population count. - - Supports two loading modes: - - Memory-mapped file (preferred): Uses np.memmap for minimal RAM usage - - Database (fallback): Loads all vectors into memory from SQLite - """ - - def __init__(self, index_root_or_meta_path: Path) -> None: - """Initialize BinarySearcher. - - Args: - index_root_or_meta_path: Either: - - Path to index root directory (containing _binary_vectors.mmap) - - Path to _vectors_meta.db (legacy mode, loads from DB) - """ - path = Path(index_root_or_meta_path) - - # Determine if this is an index root or a specific DB path - if path.suffix == '.db': - # Legacy mode: specific DB path - self.index_root = path.parent - self.meta_store_path = path - else: - # New mode: index root directory - self.index_root = path - self.meta_store_path = path / "_vectors_meta.db" - - self._chunk_ids: Optional[np.ndarray] = None - self._binary_matrix: Optional[np.ndarray] = None - self._is_memmap = False - self._loaded = False - self._embedding_dim: Optional[int] = None - self._backend: Optional[str] = None - self._model: Optional[str] = None - self._model_profile: Optional[str] = None - - def load(self) -> bool: - """Load binary vectors using memory-mapped file or database fallback. - - Tries to load from memory-mapped file first (preferred for large indexes), - falls back to database loading if mmap file doesn't exist. - - Returns: - True if vectors were loaded successfully. - """ - if self._loaded: - return True - - # Try memory-mapped file first (preferred) - mmap_path = self.index_root / "_binary_vectors.mmap" - meta_path = mmap_path.with_suffix('.meta.json') - - if mmap_path.exists() and meta_path.exists(): - try: - with open(meta_path, 'r') as f: - meta = json.load(f) - - shape = tuple(meta['shape']) - self._chunk_ids = np.array(meta['chunk_ids'], dtype=np.int64) - self._embedding_dim = meta.get("embedding_dim") - self._backend = meta.get("backend") - self._model = meta.get("model") or meta.get("model_name") - self._model_profile = meta.get("model_profile") - - # Memory-map the binary matrix (read-only) - self._binary_matrix = np.memmap( - str(mmap_path), - dtype=np.uint8, - mode='r', - shape=shape - ) - self._is_memmap = True - self._loaded = True - - logger.info( - "Memory-mapped %d binary vectors (%d bytes each)", - len(self._chunk_ids), shape[1] - ) - return True - - except Exception as e: - logger.warning("Failed to load mmap binary vectors, falling back to DB: %s", e) - - # Fallback: load from database - return self._load_from_db() - - def _load_from_db(self) -> bool: - """Load binary vectors from database (legacy/fallback mode). - - Returns: - True if vectors were loaded successfully. - """ - try: - from codexlens.storage.vector_meta_store import VectorMetadataStore - - with VectorMetadataStore(self.meta_store_path) as store: - rows = store.get_all_binary_vectors() - - if not rows: - logger.warning("No binary vectors found in %s", self.meta_store_path) - return False - - # Convert to numpy arrays for fast computation - self._chunk_ids = np.array([r[0] for r in rows], dtype=np.int64) - - # Unpack bytes to numpy array - binary_arrays = [] - for _, vec_bytes in rows: - arr = np.frombuffer(vec_bytes, dtype=np.uint8) - binary_arrays.append(arr) - - self._binary_matrix = np.vstack(binary_arrays) - self._is_memmap = False - self._loaded = True - self._embedding_dim = None - self._backend = None - self._model = None - self._model_profile = None - - logger.info( - "Loaded %d binary vectors from DB (%d bytes each)", - len(self._chunk_ids), self._binary_matrix.shape[1] - ) - return True - - except Exception as e: - logger.error("Failed to load binary vectors: %s", e) - return False - - def search( - self, - query_vector: np.ndarray, - top_k: int = 100 - ) -> List[Tuple[int, int]]: - """Search for similar vectors using Hamming distance. - - Args: - query_vector: Dense query vector (will be binarized). - top_k: Number of top results to return. - - Returns: - List of (chunk_id, hamming_distance) tuples sorted by distance. - """ - if not self._loaded and not self.load(): - return [] - - # Binarize query vector - query_binary = (query_vector > 0).astype(np.uint8) - query_packed = np.packbits(query_binary) - - # Compute Hamming distances using XOR and popcount - # XOR gives 1 for differing bits - xor_result = np.bitwise_xor(self._binary_matrix, query_packed) - - # Vectorized popcount using lookup table (orders of magnitude faster) - # Sum the bit counts for each byte across all columns - distances = np.sum(_POPCOUNT_TABLE[xor_result], axis=1, dtype=np.int32) - - # Get top-k with smallest distances - if top_k >= len(distances): - top_indices = np.argsort(distances) - else: - # Partial sort for efficiency - top_indices = np.argpartition(distances, top_k)[:top_k] - top_indices = top_indices[np.argsort(distances[top_indices])] - - results = [ - (int(self._chunk_ids[i]), int(distances[i])) - for i in top_indices - ] - - return results - - def search_with_rerank( - self, - query_dense: np.ndarray, - dense_vectors: np.ndarray, - dense_chunk_ids: np.ndarray, - top_k: int = 10, - candidates: int = 100 - ) -> List[Tuple[int, float]]: - """Two-stage cascade search: binary filter + dense rerank. - - Args: - query_dense: Dense query vector. - dense_vectors: Dense vectors for reranking (from HNSW or stored). - dense_chunk_ids: Chunk IDs corresponding to dense_vectors. - top_k: Final number of results. - candidates: Number of candidates from binary search. - - Returns: - List of (chunk_id, cosine_similarity) tuples. - """ - # Stage 1: Binary filtering - binary_results = self.search(query_dense, top_k=candidates) - if not binary_results: - return [] - - candidate_ids = {r[0] for r in binary_results} - - # Stage 2: Dense reranking - # Find indices of candidates in dense_vectors - candidate_mask = np.isin(dense_chunk_ids, list(candidate_ids)) - candidate_indices = np.where(candidate_mask)[0] - - if len(candidate_indices) == 0: - # Fallback: return binary results with normalized distance - max_dist = max(r[1] for r in binary_results) if binary_results else 1 - return [(r[0], 1.0 - r[1] / max_dist) for r in binary_results[:top_k]] - - # Compute cosine similarities for candidates - candidate_vectors = dense_vectors[candidate_indices] - candidate_ids_array = dense_chunk_ids[candidate_indices] - - # Normalize vectors - query_norm = query_dense / (np.linalg.norm(query_dense) + 1e-8) - cand_norms = candidate_vectors / ( - np.linalg.norm(candidate_vectors, axis=1, keepdims=True) + 1e-8 - ) - - # Cosine similarities - similarities = np.dot(cand_norms, query_norm) - - # Sort by similarity (descending) - sorted_indices = np.argsort(-similarities)[:top_k] - - results = [ - (int(candidate_ids_array[i]), float(similarities[i])) - for i in sorted_indices - ] - - return results - - @property - def vector_count(self) -> int: - """Get number of loaded binary vectors.""" - return len(self._chunk_ids) if self._chunk_ids is not None else 0 - - @property - def embedding_dim(self) -> Optional[int]: - """Embedding dimension used to build these binary vectors (if known).""" - return int(self._embedding_dim) if self._embedding_dim is not None else None - - @property - def backend(self) -> Optional[str]: - """Embedding backend used to build these vectors (if known).""" - return self._backend - - @property - def model(self) -> Optional[str]: - """Embedding model name used to build these vectors (if known).""" - return self._model - - @property - def model_profile(self) -> Optional[str]: - """Embedding profile name (fastembed) used to build these vectors (if known).""" - return self._model_profile - - @property - def is_memmap(self) -> bool: - """Check if using memory-mapped file (vs in-memory array).""" - return self._is_memmap - - def clear(self) -> None: - """Clear loaded vectors from memory.""" - # For memmap, just delete the reference (OS will handle cleanup) - if self._is_memmap and self._binary_matrix is not None: - del self._binary_matrix - self._chunk_ids = None - self._binary_matrix = None - self._is_memmap = False - self._loaded = False diff --git a/codex-lens/src/codexlens/search/chain_search.py b/codex-lens/src/codexlens/search/chain_search.py deleted file mode 100644 index c269af66..00000000 --- a/codex-lens/src/codexlens/search/chain_search.py +++ /dev/null @@ -1,4779 +0,0 @@ -"""Chain search engine for recursive multi-directory searching. - -Provides parallel search across directory hierarchies using indexed _index.db files. -Supports depth-limited traversal, result aggregation, and symbol search. -""" - -from __future__ import annotations - -from concurrent.futures import ThreadPoolExecutor, as_completed -from dataclasses import dataclass, field, replace -from pathlib import Path -from typing import List, Optional, Dict, Any, Literal, Tuple, TYPE_CHECKING -import json -import logging -import os -import threading -import time - -from codexlens.entities import SearchResult, Symbol - -if TYPE_CHECKING: - import numpy as np - -try: - import numpy as np - NUMPY_AVAILABLE = True -except ImportError: - NUMPY_AVAILABLE = False -from codexlens.config import Config -from codexlens.storage.registry import RegistryStore, DirMapping -from codexlens.storage.dir_index import DirIndexStore, SubdirLink -from codexlens.storage.global_index import GlobalSymbolIndex -from codexlens.storage.index_filters import is_ignored_index_path -from codexlens.storage.path_mapper import PathMapper -from codexlens.storage.sqlite_store import SQLiteStore -from codexlens.storage.vector_meta_store import VectorMetadataStore -from codexlens.config import ( - BINARY_VECTORS_MMAP_NAME, - VECTORS_HNSW_NAME, - VECTORS_META_DB_NAME, -) -from codexlens.search.hybrid_search import HybridSearchEngine -from codexlens.search.ranking import query_prefers_lexical_search - -SEARCH_ARTIFACT_DIRS = frozenset({ - "dist", - "build", - "out", - "target", - "bin", - "obj", - "_build", - "coverage", - "htmlcov", - ".cache", - ".parcel-cache", - ".turbo", - ".next", - ".nuxt", - "node_modules", - "bower_components", -}) - - -@dataclass -class SearchOptions: - """Configuration options for chain search. - - Attributes: - depth: Maximum search depth (-1 = unlimited, 0 = current dir only) - max_workers: Number of parallel worker threads - limit_per_dir: Maximum results per directory - total_limit: Total result limit across all directories - offset: Pagination offset - skip first N results (default 0) - include_symbols: Whether to include symbol search results - files_only: Return only file paths without excerpts - include_semantic: Whether to include semantic keyword search results - code_only: Only return code files (excludes md, txt, json, yaml, xml, etc.) - exclude_extensions: List of file extensions to exclude (e.g., ["md", "txt", "json"]) - hybrid_mode: Enable hybrid search with RRF fusion (default False) - enable_fuzzy: Enable fuzzy FTS in hybrid mode (default True) - enable_vector: Enable vector semantic search (default False) - pure_vector: If True, only use vector search without FTS fallback (default False) - enable_cascade: Enable cascade (binary+dense) two-stage retrieval (default False) - hybrid_weights: Custom RRF weights for hybrid search (optional) - group_results: Enable grouping of similar results (default False) - grouping_threshold: Score threshold for grouping similar results (default 0.01) - inject_feature_anchors: Whether to inject lexical feature anchors (default True) - """ - depth: int = -1 - max_workers: int = 8 - limit_per_dir: int = 10 - total_limit: int = 100 - offset: int = 0 - include_symbols: bool = False - files_only: bool = False - include_semantic: bool = False - code_only: bool = False - exclude_extensions: Optional[List[str]] = None - hybrid_mode: bool = False - enable_fuzzy: bool = True - enable_vector: bool = False - pure_vector: bool = False - enable_cascade: bool = False - hybrid_weights: Optional[Dict[str, float]] = None - group_results: bool = False - grouping_threshold: float = 0.01 - inject_feature_anchors: bool = True - - -@dataclass -class SearchStats: - """Statistics collected during search execution. - - Attributes: - dirs_searched: Number of directories searched - files_matched: Number of files with matches - time_ms: Total search time in milliseconds - errors: List of error messages encountered - """ - dirs_searched: int = 0 - files_matched: int = 0 - time_ms: float = 0 - errors: List[str] = field(default_factory=list) - - -@dataclass -class ChainSearchResult: - """Comprehensive search result with metadata. - - Attributes: - query: Original search query - results: List of SearchResult objects - related_results: Expanded results from graph neighbors (optional) - symbols: List of Symbol objects (if include_symbols=True) - stats: SearchStats with execution metrics - """ - query: str - results: List[SearchResult] - symbols: List[Symbol] - stats: SearchStats - related_results: List[SearchResult] = field(default_factory=list) - - -@dataclass -class ReferenceResult: - """Result from reference search in code_relationships table. - - Attributes: - file_path: Path to the file containing the reference - line: Line number where the reference occurs (1-based) - column: Column number where the reference occurs (0-based) - context: Surrounding code snippet for context - relationship_type: Type of relationship (call, import, inheritance, etc.) - """ - file_path: str - line: int - column: int - context: str - relationship_type: str - - -class ChainSearchEngine: - """Parallel chain search engine for hierarchical directory indexes. - - Searches across multiple directory indexes in parallel, following subdirectory - links to recursively traverse the file tree. Supports depth limits, result - aggregation, and both content and symbol searches. - - Thread-safe with configurable parallelism. - - Attributes: - registry: Global project registry - mapper: Path mapping utility - logger: Python logger instance - """ - - def __init__(self, - registry: RegistryStore, - mapper: PathMapper, - max_workers: int = 8, - config: Config | None = None): - """Initialize chain search engine. - - Args: - registry: Global project registry for path lookups - mapper: Path mapper for source/index conversions - max_workers: Maximum parallel workers (default 8) - """ - self.registry = registry - self.mapper = mapper - self.logger = logging.getLogger(__name__) - self._max_workers = max_workers - self._executor: Optional[ThreadPoolExecutor] = None - self._config = config - self._realtime_lsp_keepalive_lock = threading.RLock() - self._realtime_lsp_keepalive = None - self._realtime_lsp_keepalive_key = None - self._runtime_cache_lock = threading.RLock() - self._dense_ann_cache: Dict[Tuple[str, int], Any] = {} - self._legacy_dense_ann_cache: Dict[Tuple[str, int], Any] = {} - self._reranker_cache_key: Optional[Tuple[str, Optional[str], bool, Optional[int]]] = None - self._reranker_instance: Any = None - # Track which (workspace_root, config_file) pairs have already been warmed up. - # This avoids paying the warmup sleep on every query when using keep-alive LSP servers. - self._realtime_lsp_warmed_ids: set[tuple[str, str | None]] = set() - - def _get_executor(self, max_workers: Optional[int] = None) -> ThreadPoolExecutor: - """Get or create the shared thread pool executor. - - Lazy initialization to avoid creating executor if never used. - - Args: - max_workers: Override default max_workers if specified - - Returns: - ThreadPoolExecutor instance - """ - workers = max_workers or self._max_workers - if self._executor is None: - self._executor = ThreadPoolExecutor(max_workers=workers) - return self._executor - - def close(self) -> None: - """Shutdown the thread pool executor.""" - if self._executor is not None: - self._executor.shutdown(wait=True) - self._executor = None - self._clear_runtime_caches() - with self._realtime_lsp_keepalive_lock: - keepalive = self._realtime_lsp_keepalive - self._realtime_lsp_keepalive = None - self._realtime_lsp_keepalive_key = None - if keepalive is not None: - try: - keepalive.stop() - except Exception: - pass - - def __enter__(self) -> "ChainSearchEngine": - """Context manager entry.""" - return self - - def __exit__(self, exc_type: object, exc: object, tb: object) -> None: - """Context manager exit.""" - self.close() - - @staticmethod - def _release_cached_resource(resource: Any) -> None: - """Best-effort cleanup for cached runtime helpers.""" - if resource is None: - return - for attr_name in ("clear", "close"): - cleanup = getattr(resource, attr_name, None) - if callable(cleanup): - try: - cleanup() - except Exception: - pass - break - - def _clear_runtime_caches(self) -> None: - """Drop per-engine runtime caches for dense indexes and rerankers.""" - with self._runtime_cache_lock: - dense_indexes = list(self._dense_ann_cache.values()) - legacy_dense_indexes = list(self._legacy_dense_ann_cache.values()) - reranker = self._reranker_instance - self._dense_ann_cache = {} - self._legacy_dense_ann_cache = {} - self._reranker_cache_key = None - self._reranker_instance = None - - for resource in [*dense_indexes, *legacy_dense_indexes, reranker]: - self._release_cached_resource(resource) - - def _get_cached_centralized_dense_index(self, index_root: Path, dim: int) -> Optional[Any]: - """Load and cache a centralized dense ANN index for repeated queries.""" - from codexlens.semantic.ann_index import ANNIndex - - resolved_root = Path(index_root).resolve() - cache_key = (str(resolved_root), int(dim)) - with self._runtime_cache_lock: - cached = self._dense_ann_cache.get(cache_key) - if cached is not None: - return cached - - ann_index = ANNIndex.create_central(index_root=resolved_root, dim=int(dim)) - if not ann_index.load() or ann_index.count() == 0: - return None - - with self._runtime_cache_lock: - cached = self._dense_ann_cache.get(cache_key) - if cached is None: - self._dense_ann_cache[cache_key] = ann_index - cached = ann_index - return cached - - def _get_cached_legacy_dense_index(self, index_path: Path, dim: int) -> Optional[Any]: - """Load and cache a legacy per-index dense ANN index for repeated queries.""" - from codexlens.semantic.ann_index import ANNIndex - - resolved_path = Path(index_path).resolve() - cache_key = (str(resolved_path), int(dim)) - with self._runtime_cache_lock: - cached = self._legacy_dense_ann_cache.get(cache_key) - if cached is not None: - return cached - - ann_index = ANNIndex(resolved_path, dim=int(dim)) - if not ann_index.load() or ann_index.count() == 0: - return None - - with self._runtime_cache_lock: - cached = self._legacy_dense_ann_cache.get(cache_key) - if cached is None: - self._legacy_dense_ann_cache[cache_key] = ann_index - cached = ann_index - return cached - - def _get_cached_reranker(self) -> Any: - """Return a cached reranker instance for repeated cascade queries.""" - try: - from codexlens.semantic.reranker import ( - check_reranker_available, - get_reranker, - ) - except ImportError as exc: - self.logger.debug("Reranker not available: %s", exc) - return None - except Exception as exc: - self.logger.debug("Failed to import reranker factory: %s", exc) - return None - - backend = "onnx" - model_name = None - use_gpu = True - max_tokens = None - - if self._config is not None: - backend = getattr(self._config, "reranker_backend", "onnx") or "onnx" - model_name = getattr(self._config, "reranker_model", None) - use_gpu = getattr( - self._config, - "reranker_use_gpu", - getattr(self._config, "embedding_use_gpu", True), - ) - max_tokens = getattr(self._config, "reranker_max_input_tokens", None) - - cache_key = ( - str(backend).strip().lower(), - str(model_name).strip() if isinstance(model_name, str) and model_name.strip() else None, - bool(use_gpu), - int(max_tokens) if isinstance(max_tokens, (int, float)) else None, - ) - with self._runtime_cache_lock: - cached = ( - self._reranker_instance - if self._reranker_instance is not None and self._reranker_cache_key == cache_key - else None - ) - if cached is not None: - return cached - - ok, err = check_reranker_available(cache_key[0]) - if not ok: - self.logger.debug("Reranker backend unavailable (%s): %s", cache_key[0], err) - return None - - kwargs: Dict[str, Any] = {} - device = None - if cache_key[0] == "onnx": - kwargs["use_gpu"] = cache_key[2] - elif cache_key[0] == "api": - if cache_key[3] is not None: - kwargs["max_input_tokens"] = cache_key[3] - elif not cache_key[2]: - device = "cpu" - - try: - reranker = get_reranker( - backend=cache_key[0], - model_name=cache_key[1], - device=device, - **kwargs, - ) - except Exception as exc: - self.logger.debug("Failed to initialize reranker: %s", exc) - return None - - previous = None - with self._runtime_cache_lock: - cached = ( - self._reranker_instance - if self._reranker_instance is not None and self._reranker_cache_key == cache_key - else None - ) - if cached is not None: - reranker = cached - else: - previous = self._reranker_instance - self._reranker_cache_key = cache_key - self._reranker_instance = reranker - - if previous is not None and previous is not reranker: - self._release_cached_resource(previous) - return reranker - - def search(self, query: str, - source_path: Path, - options: Optional[SearchOptions] = None) -> ChainSearchResult: - """Execute chain search from source_path with recursive traversal. - - Process: - 1. Locate starting index for source_path - 2. Collect all child indexes based on depth limit - 3. Search indexes in parallel using ThreadPoolExecutor - 4. Aggregate, deduplicate, and rank results - - Args: - query: FTS5 search query string - source_path: Starting directory path - options: Search configuration (uses defaults if None) - - Returns: - ChainSearchResult with results, symbols, and statistics - - Examples: - >>> engine = ChainSearchEngine(registry, mapper) - >>> result = engine.search("authentication", Path("D:/project/src")) - >>> for r in result.results[:5]: - ... print(f"{r.path}: {r.score:.2f}") - """ - options = options or SearchOptions() - effective_options = options - if options.hybrid_mode and query_prefers_lexical_search(query): - self.logger.debug( - "Hybrid shortcut: using lexical search path for lexical-priority query %r", - query, - ) - effective_options = replace( - options, - hybrid_mode=False, - enable_vector=False, - pure_vector=False, - enable_cascade=False, - hybrid_weights=None, - enable_fuzzy=True, - ) - start_time = time.time() - stats = SearchStats() - - # Step 1: Find starting index - start_index = self._find_start_index(source_path) - if not start_index: - self.logger.warning(f"No index found for {source_path}") - stats.time_ms = (time.time() - start_time) * 1000 - return ChainSearchResult( - query=query, - results=[], - symbols=[], - stats=stats - ) - - # Step 2: Collect all index paths to search - index_paths = self._collect_index_paths(start_index, effective_options.depth) - stats.dirs_searched = len(index_paths) - - if not index_paths: - self.logger.warning(f"No indexes collected from {start_index}") - stats.time_ms = (time.time() - start_time) * 1000 - return ChainSearchResult( - query=query, - results=[], - symbols=[], - stats=stats - ) - - # Step 3: Parallel search - results, search_stats = self._search_parallel( - index_paths, query, effective_options - ) - stats.errors = search_stats.errors - - # Step 3.5: Filter by extension if requested - if effective_options.code_only or effective_options.exclude_extensions: - results = self._filter_by_extension( - results, effective_options.code_only, effective_options.exclude_extensions - ) - - if effective_options.inject_feature_anchors: - results = self._inject_query_feature_anchors( - query, - source_path, - effective_options, - results, - limit=min(6, max(2, effective_options.total_limit)), - ) - - # Step 4: Merge and rank - final_results = self._merge_and_rank( - results, - effective_options.total_limit, - effective_options.offset, - query=query, - ) - - # Step 5: Optional grouping of similar results - if effective_options.group_results: - from codexlens.search.ranking import group_similar_results - final_results = group_similar_results( - final_results, score_threshold_abs=effective_options.grouping_threshold - ) - - stats.files_matched = len(final_results) - - # Optional: Symbol search - symbols = [] - if effective_options.include_symbols: - symbols = self._search_symbols_parallel( - index_paths, query, None, effective_options.total_limit - ) - - # Optional: graph expansion using precomputed neighbors - related_results: List[SearchResult] = [] - if self._config is not None and getattr(self._config, "enable_graph_expansion", False): - try: - from codexlens.search.enrichment import SearchEnrichmentPipeline - - pipeline = SearchEnrichmentPipeline(self.mapper, config=self._config) - related_results = pipeline.expand_related_results(final_results) - except Exception as exc: - self.logger.debug("Graph expansion failed: %s", exc) - related_results = [] - - stats.time_ms = (time.time() - start_time) * 1000 - - return ChainSearchResult( - query=query, - results=final_results, - symbols=symbols, - stats=stats, - related_results=related_results, - ) - - def binary_cascade_search( - self, - query: str, - source_path: Path, - k: int = 10, - coarse_k: int = 100, - options: Optional[SearchOptions] = None, - ) -> ChainSearchResult: - """Execute binary cascade search with binary coarse ranking and dense fine ranking. - - Binary cascade search process: - 1. Stage 1 (Coarse): Fast binary vector search using Hamming distance - to quickly filter to coarse_k candidates (256-dim binary, 32 bytes/vector) - 2. Stage 2 (Fine): Dense vector cosine similarity for precise reranking - of candidates (2048-dim float32) - - This approach leverages the speed of binary search (~100x faster) while - maintaining precision through dense vector reranking. - - Performance characteristics: - - Binary search: O(N) with SIMD-accelerated XOR + popcount - - Dense rerank: Only applied to top coarse_k candidates - - Memory: 32 bytes (binary) + 8KB (dense) per chunk - - Args: - query: Natural language or keyword query string - source_path: Starting directory path - k: Number of final results to return (default 10) - coarse_k: Number of coarse candidates from first stage (default 100) - options: Search configuration (uses defaults if None) - - Returns: - ChainSearchResult with reranked results and statistics - - Examples: - >>> engine = ChainSearchEngine(registry, mapper, config=config) - >>> result = engine.binary_cascade_search( - ... "how to authenticate users", - ... Path("D:/project/src"), - ... k=10, - ... coarse_k=100 - ... ) - >>> for r in result.results: - ... print(f"{r.path}: {r.score:.3f}") - """ - if not NUMPY_AVAILABLE: - self.logger.warning( - "NumPy not available, falling back to standard search" - ) - return self.search(query, source_path, options=options) - - options = options or SearchOptions() - start_time = time.time() - stats = SearchStats() - - # Use config defaults if available - if self._config is not None: - if hasattr(self._config, "cascade_coarse_k"): - coarse_k = coarse_k or self._config.cascade_coarse_k - if hasattr(self._config, "cascade_fine_k"): - k = k or self._config.cascade_fine_k - - # Step 1: Find starting index - start_index = self._find_start_index(source_path) - if not start_index: - self.logger.warning(f"No index found for {source_path}") - stats.time_ms = (time.time() - start_time) * 1000 - return ChainSearchResult( - query=query, - results=[], - symbols=[], - stats=stats - ) - - # Step 2: Collect all index paths - index_paths = self._collect_index_paths(start_index, options.depth) - stats.dirs_searched = len(index_paths) - - if not index_paths: - self.logger.warning(f"No indexes collected from {start_index}") - stats.time_ms = (time.time() - start_time) * 1000 - return ChainSearchResult( - query=query, - results=[], - symbols=[], - stats=stats - ) - - # Stage 1: Binary vector coarse retrieval - self.logger.debug( - "Binary Cascade Stage 1: Binary coarse retrieval for %d candidates", - coarse_k, - ) - - coarse_candidates, used_centralized, _, stage2_index_root = self._collect_binary_coarse_candidates( - query, - index_paths, - coarse_k, - stats, - index_root=index_paths[0].parent if index_paths else None, - ) - - if not coarse_candidates: - self.logger.debug("No binary candidates found, falling back to standard search") - return self.search(query, source_path, options=options) - - self.logger.debug( - "Binary Cascade Stage 1 complete: %d candidates retrieved", - len(coarse_candidates), - ) - - # Stage 2: Dense vector fine ranking - self.logger.debug( - "Binary Cascade Stage 2: Dense reranking %d candidates to top-%d", - len(coarse_candidates), - k, - ) - - # Group candidates by index path for batch retrieval - candidates_by_index: Dict[Path, List[int]] = {} - for chunk_id, _, index_path in coarse_candidates: - if index_path not in candidates_by_index: - candidates_by_index[index_path] = [] - candidates_by_index[index_path].append(chunk_id) - - # Retrieve dense embeddings and compute cosine similarity - scored_results: List[Tuple[float, SearchResult]] = [] - import sqlite3 - dense_query_cache: Dict[Tuple[str, str, bool], "np.ndarray"] = {} - dense_query_errors: list[str] = [] - - for index_path, chunk_ids in candidates_by_index.items(): - try: - query_index_root = index_path if used_centralized else index_path.parent - query_dense = self._embed_dense_query( - query, - index_root=query_index_root, - query_cache=dense_query_cache, - ) - - # Collect valid rows and dense vectors for batch processing - valid_rows: List[Dict[str, Any]] = [] - dense_vectors: List["np.ndarray"] = [] - - if used_centralized: - # Centralized mode: index_path is actually index_root directory - # Dense embeddings are in per-directory _index.db files - # referenced by source_index_db in chunk_metadata - meta_db_path = index_path / VECTORS_META_DB_NAME - if not meta_db_path.exists(): - self.logger.debug( - "VectorMetadataStore not found at %s, skipping dense reranking", meta_db_path - ) - continue - - # Get chunk metadata with source_index_db references - meta_store = VectorMetadataStore(meta_db_path) - chunks_meta = meta_store.get_chunks_by_ids(chunk_ids) - - # Group chunks by source_index_db - chunks_by_source: Dict[str, List[Dict[str, Any]]] = {} - for chunk in chunks_meta: - source_db = chunk.get("source_index_db") - if source_db: - if source_db not in chunks_by_source: - chunks_by_source[source_db] = [] - chunks_by_source[source_db].append(chunk) - - # Retrieve dense embeddings from each source_index_db - for source_db, source_chunks in chunks_by_source.items(): - try: - source_chunk_ids = [c["chunk_id"] for c in source_chunks] - conn = sqlite3.connect(source_db) - conn.row_factory = sqlite3.Row - - placeholders = ",".join("?" * len(source_chunk_ids)) - # Try semantic_chunks first (newer schema), fall back to chunks - try: - rows = conn.execute( - f"SELECT id, embedding_dense FROM semantic_chunks WHERE id IN ({placeholders})", - source_chunk_ids - ).fetchall() - except sqlite3.OperationalError: - rows = conn.execute( - f"SELECT id, embedding_dense FROM chunks WHERE id IN ({placeholders})", - source_chunk_ids - ).fetchall() - conn.close() - - # Build dense vector lookup - dense_lookup = {row["id"]: row["embedding_dense"] for row in rows} - - # Process chunks with their embeddings - for chunk in source_chunks: - chunk_id = chunk["chunk_id"] - dense_bytes = dense_lookup.get(chunk_id) - if dense_bytes is not None: - valid_rows.append({ - "id": chunk_id, - "file_path": chunk["file_path"], - "content": chunk["content"], - }) - dense_vectors.append(np.frombuffer(dense_bytes, dtype=np.float32)) - except Exception as exc: - self.logger.debug( - "Failed to get dense embeddings from %s: %s", source_db, exc - ) - else: - # Per-directory mode: index_path is the _index.db file - conn = sqlite3.connect(str(index_path)) - conn.row_factory = sqlite3.Row - - placeholders = ",".join("?" * len(chunk_ids)) - rows = conn.execute( - f"SELECT id, file_path, content, embedding_dense FROM semantic_chunks WHERE id IN ({placeholders})", - chunk_ids - ).fetchall() - conn.close() - - for row in rows: - dense_bytes = row["embedding_dense"] - if dense_bytes is not None: - valid_rows.append(dict(row)) - dense_vectors.append(np.frombuffer(dense_bytes, dtype=np.float32)) - - # Skip if no dense embeddings found - if not dense_vectors: - continue - - # Stack into matrix for batch computation - doc_matrix = np.vstack(dense_vectors) - - # Batch compute cosine similarities - scores = self._compute_cosine_similarity_batch(query_dense, doc_matrix) - - # Create search results - for i, row in enumerate(valid_rows): - score = float(scores[i]) - excerpt = (row.get("content") or "")[:500] - result = SearchResult( - path=row.get("file_path") or "", - score=score, - excerpt=excerpt, - ) - scored_results.append((score, result)) - - except Exception as exc: - self.logger.debug( - "Dense reranking failed for %s: %s", index_path, exc - ) - stats.errors.append(f"Dense reranking failed for {index_path}: {exc}") - dense_query_errors.append(str(exc)) - - if not scored_results: - if dense_query_errors: - self.logger.warning( - "Failed to generate dense query embeddings for binary cascade: %s. " - "Using Hamming distance scores only.", - dense_query_errors[0], - ) - final_results = self._materialize_binary_candidates( - coarse_candidates[:k], - stats, - stage2_index_root=stage2_index_root, - ) - stats.files_matched = len(final_results) - stats.time_ms = (time.time() - start_time) * 1000 - return ChainSearchResult( - query=query, - results=final_results, - symbols=[], - stats=stats, - ) - - # Sort by score descending and deduplicate by path - scored_results.sort(key=lambda x: x[0], reverse=True) - - path_to_result: Dict[str, SearchResult] = {} - for score, result in scored_results: - if result.path not in path_to_result: - path_to_result[result.path] = result - - final_results = self._apply_default_path_penalties( - query, - list(path_to_result.values()), - )[:k] - - # Optional: grouping of similar results - if options.group_results: - from codexlens.search.ranking import group_similar_results - final_results = group_similar_results( - final_results, score_threshold_abs=options.grouping_threshold - ) - - stats.files_matched = len(final_results) - stats.time_ms = (time.time() - start_time) * 1000 - - self.logger.debug( - "Binary cascade search complete: %d results in %.2fms", - len(final_results), - stats.time_ms, - ) - - return ChainSearchResult( - query=query, - results=final_results, - symbols=[], - stats=stats, - ) - - def cascade_search( - self, - query: str, - source_path: Path, - k: int = 10, - coarse_k: int = 100, - options: Optional[SearchOptions] = None, - strategy: Optional[Literal["binary", "binary_rerank", "dense_rerank", "staged", "hybrid"]] = None, - ) -> ChainSearchResult: - """Unified cascade search entry point with strategy selection. - - Provides a single interface for cascade search with configurable strategy: - - "binary": Uses binary vector coarse ranking + dense fine ranking (fastest) - - "binary_rerank": Uses binary vector coarse ranking + cross-encoder reranking (best balance) - - "hybrid": Alias for "binary_rerank" (backward compat) - - "dense_rerank": Uses dense vector coarse ranking + cross-encoder reranking - - "staged": 4-stage pipeline: binary -> LSP expand -> clustering -> optional rerank - - The strategy is determined with the following priority: - 1. The `strategy` parameter (e.g., from CLI --cascade-strategy option) - 2. Config `cascade_strategy` setting from settings.json - 3. Default: "binary" - - Args: - query: Natural language or keyword query string - source_path: Starting directory path - k: Number of final results to return (default 10) - coarse_k: Number of coarse candidates from first stage (default 100) - options: Search configuration (uses defaults if None) - strategy: Cascade strategy - "binary", "binary_rerank", "dense_rerank", or "staged". - - Returns: - ChainSearchResult with reranked results and statistics - - Examples: - >>> engine = ChainSearchEngine(registry, mapper, config=config) - >>> # Use binary cascade (default, fastest) - >>> result = engine.cascade_search("auth", Path("D:/project")) - >>> # Use binary + cross-encoder (best balance of speed and quality) - >>> result = engine.cascade_search("auth", Path("D:/project"), strategy="binary_rerank") - >>> # Use 4-stage pipeline (binary + LSP expand + clustering + optional rerank) - >>> result = engine.cascade_search("auth", Path("D:/project"), strategy="staged") - """ - # Strategy priority: parameter > config > default - effective_strategy = strategy - valid_strategies = ("binary", "binary_rerank", "dense_rerank", "staged", "hybrid") - if effective_strategy is None: - # Not passed via parameter, check config - if self._config is not None: - config_strategy = getattr(self._config, "cascade_strategy", None) - if config_strategy in valid_strategies: - effective_strategy = config_strategy - - # If still not set, apply default - if effective_strategy not in valid_strategies: - effective_strategy = "binary" - - # Normalize backward-compat alias - if effective_strategy == "hybrid": - effective_strategy = "binary_rerank" - - if effective_strategy == "binary": - return self.binary_cascade_search(query, source_path, k, coarse_k, options) - elif effective_strategy == "binary_rerank": - return self.binary_rerank_cascade_search(query, source_path, k, coarse_k, options) - elif effective_strategy == "dense_rerank": - return self.dense_rerank_cascade_search(query, source_path, k, coarse_k, options) - elif effective_strategy == "staged": - return self.staged_cascade_search(query, source_path, k, coarse_k, options) - else: - return self.binary_cascade_search(query, source_path, k, coarse_k, options) - - def staged_cascade_search( - self, - query: str, - source_path: Path, - k: int = 10, - coarse_k: int = 100, - options: Optional[SearchOptions] = None, - ) -> ChainSearchResult: - """Execute 4-stage cascade search pipeline with binary, LSP expansion, clustering, and optional reranking. - - Staged cascade search process: - 1. Stage 1 (Binary Coarse): Fast binary vector search using Hamming distance - to quickly filter to coarse_k candidates (256-bit binary vectors) - 2. Stage 2 (LSP Expansion): Expand coarse candidates using GraphExpander to - include related symbols (definitions, references, callers/callees) - 3. Stage 3 (Clustering): Use configurable clustering strategy to group similar - results and select representative results from each cluster - 4. Stage 4 (Optional Rerank): If config.enable_staged_rerank is True, apply - cross-encoder reranking for final precision - - This approach combines the speed of binary search with graph-based context - expansion and diversity-preserving clustering for high-quality results. - - Performance characteristics: - - Stage 1: O(N) binary search with SIMD acceleration (~8ms) - - Stage 2: O(k * d) graph traversal where d is expansion depth - - Stage 3: O(n^2) clustering on expanded candidates - - Stage 4: Optional cross-encoder reranking (API call) - - Args: - query: Natural language or keyword query string - source_path: Starting directory path - k: Number of final results to return (default 10) - coarse_k: Number of coarse candidates from first stage (default 100) - options: Search configuration (uses defaults if None) - - Returns: - ChainSearchResult with per-stage statistics - - Examples: - >>> engine = ChainSearchEngine(registry, mapper, config=config) - >>> result = engine.staged_cascade_search( - ... "authentication handler", - ... Path("D:/project/src"), - ... k=10, - ... coarse_k=100 - ... ) - >>> for r in result.results: - ... print(f"{r.path}: {r.score:.3f}") - """ - if not NUMPY_AVAILABLE: - self.logger.warning( - "NumPy not available, falling back to standard search" - ) - return self.search(query, source_path, options=options) - - options = options or SearchOptions() - start_time = time.time() - stats = SearchStats() - - # Per-stage timing stats - stage_times: Dict[str, float] = {} - stage_counts: Dict[str, int] = {} - - # Use config defaults if available - if self._config is not None: - if hasattr(self._config, "cascade_coarse_k"): - coarse_k = coarse_k or self._config.cascade_coarse_k - if hasattr(self._config, "cascade_fine_k"): - k = k or self._config.cascade_fine_k - - # Step 1: Find starting index - start_index = self._find_start_index(source_path) - if not start_index: - self.logger.warning(f"No index found for {source_path}") - stats.time_ms = (time.time() - start_time) * 1000 - return ChainSearchResult( - query=query, - results=[], - symbols=[], - stats=stats - ) - - # Step 2: Collect all index paths - index_paths = self._collect_index_paths(start_index, options.depth) - stats.dirs_searched = len(index_paths) - - if not index_paths: - self.logger.warning(f"No indexes collected from {start_index}") - stats.time_ms = (time.time() - start_time) * 1000 - return ChainSearchResult( - query=query, - results=[], - symbols=[], - stats=stats - ) - - # ========== Stage 1: Binary Coarse Search ========== - stage1_start = time.time() - coarse_results, index_root = self._stage1_binary_search( - query, - index_paths, - coarse_k, - stats, - index_root=start_index.parent, - ) - coarse_results = self._inject_query_feature_anchors( - query, - source_path, - options, - coarse_results, - limit=min(6, max(2, k)), - ) - stage_times["stage1_binary_ms"] = (time.time() - stage1_start) * 1000 - stage_counts["stage1_candidates"] = len(coarse_results) - stage_counts["stage1_feature_anchors"] = sum( - 1 - for result in coarse_results - if (result.metadata or {}).get("feature_query_anchor") - ) - - self.logger.debug( - "Staged Stage 1: Binary search found %d candidates in %.2fms", - len(coarse_results), stage_times["stage1_binary_ms"] - ) - - if not coarse_results: - # Keep the staged pipeline running even when Stage 1 yields no candidates. - # This makes "realtime LSP graph → clustering → rerank" comparable across queries. - self.logger.debug( - "No Stage 1 candidates found; seeding staged pipeline with FTS results" - ) - stage1_fallback_start = time.time() - try: - seed_opts = SearchOptions( - depth=options.depth, - max_workers=options.max_workers, - limit_per_dir=max(10, int(coarse_k)), - total_limit=int(coarse_k), - include_symbols=True, - enable_vector=False, - hybrid_mode=False, - enable_cascade=False, - ) - seed = self.search(query, source_path, options=seed_opts) - coarse_results = list(seed.results or [])[: int(coarse_k)] - stage_counts["stage1_fallback_used"] = 1 - except Exception as exc: - self.logger.debug("Stage 1 fallback seeding failed: %r", exc) - coarse_results = [] - - stage_times["stage1_fallback_search_ms"] = (time.time() - stage1_fallback_start) * 1000 - stage_counts["stage1_candidates"] = len(coarse_results) - - if not coarse_results: - return ChainSearchResult(query=query, results=[], symbols=[], stats=stats) - - # ========== Stage 2: LSP Graph Expansion ========== - stage2_start = time.time() - expanded_results = self._stage2_lsp_expand(coarse_results, index_root, query=query) - stage_times["stage2_expand_ms"] = (time.time() - stage2_start) * 1000 - stage_counts["stage2_expanded"] = len(expanded_results) - try: - stage2_unique_paths = len({(r.path or "").lower() for r in expanded_results if getattr(r, "path", None)}) - except Exception: - stage2_unique_paths = 0 - stage_counts["stage2_unique_paths"] = stage2_unique_paths - stage_counts["stage2_duplicate_paths"] = max(0, len(expanded_results) - stage2_unique_paths) - - self.logger.debug( - "Staged Stage 2: LSP expansion %d -> %d results in %.2fms", - len(coarse_results), len(expanded_results), stage_times["stage2_expand_ms"] - ) - - # ========== Stage 3: Clustering and Representative Selection ========== - stage3_start = time.time() - stage3_target_count = self._resolve_stage3_target_count( - k, - len(expanded_results), - ) - clustered_results = self._stage3_cluster_prune( - expanded_results, - stage3_target_count, - query=query, - ) - stage_times["stage3_cluster_ms"] = (time.time() - stage3_start) * 1000 - stage_counts["stage3_clustered"] = len(clustered_results) - stage_counts["stage3_target_count"] = stage3_target_count - if self._config is not None: - try: - stage_counts["stage3_strategy"] = str(getattr(self._config, "staged_clustering_strategy", "auto") or "auto") - except Exception: - pass - - self.logger.debug( - "Staged Stage 3: Clustering %d -> %d representatives in %.2fms", - len(expanded_results), len(clustered_results), stage_times["stage3_cluster_ms"] - ) - - # ========== Stage 4: Optional Cross-Encoder Reranking ========== - enable_rerank = False - if self._config is not None: - enable_rerank = getattr(self._config, "enable_staged_rerank", False) - - if enable_rerank: - stage4_start = time.time() - final_results = self._stage4_optional_rerank(query, clustered_results, k) - stage_times["stage4_rerank_ms"] = (time.time() - stage4_start) * 1000 - stage_counts["stage4_reranked"] = len(final_results) - - self.logger.debug( - "Staged Stage 4: Reranking %d -> %d results in %.2fms", - len(clustered_results), len(final_results), stage_times["stage4_rerank_ms"] - ) - else: - # Skip reranking, just take top-k by score - final_results = sorted( - clustered_results, key=lambda r: r.score, reverse=True - )[:k] - stage_counts["stage4_reranked"] = len(final_results) - - # Deduplicate by path (keep highest score) - path_to_result: Dict[str, SearchResult] = {} - for result in final_results: - if result.path not in path_to_result or result.score > path_to_result[result.path].score: - path_to_result[result.path] = result - - final_results = self._apply_default_path_penalties( - query, - list(path_to_result.values()), - )[:k] - - # Optional: grouping of similar results - if options.group_results: - from codexlens.search.ranking import group_similar_results - final_results = group_similar_results( - final_results, score_threshold_abs=options.grouping_threshold - ) - - stats.files_matched = len(final_results) - stats.time_ms = (time.time() - start_time) * 1000 - - # Add per-stage stats to errors field (as JSON for now, will be proper field later) - stage_stats_json = json.dumps({ - "stage_times": stage_times, - "stage_counts": stage_counts, - }) - stats.errors.append(f"STAGE_STATS:{stage_stats_json}") - - self.logger.debug( - "Staged cascade search complete: %d results in %.2fms " - "(stage1=%.1fms, stage2=%.1fms, stage3=%.1fms)", - len(final_results), - stats.time_ms, - stage_times.get("stage1_binary_ms", 0), - stage_times.get("stage2_expand_ms", 0), - stage_times.get("stage3_cluster_ms", 0), - ) - - return ChainSearchResult( - query=query, - results=final_results, - symbols=[], - stats=stats, - ) - - def _stage1_binary_search( - self, - query: str, - index_paths: List[Path], - coarse_k: int, - stats: SearchStats, - *, - index_root: Optional[Path] = None, - ) -> Tuple[List[SearchResult], Optional[Path]]: - """Stage 1: Binary vector coarse search using Hamming distance.""" - - coarse_candidates, _, using_dense_fallback, stage2_index_root = self._collect_binary_coarse_candidates( - query, - index_paths, - coarse_k, - stats, - index_root=index_root, - allow_dense_fallback=True, - ) - if not coarse_candidates: - return [], stage2_index_root - return self._materialize_binary_candidates( - coarse_candidates, - stats, - stage2_index_root=stage2_index_root, - using_dense_fallback=using_dense_fallback, - ), stage2_index_root - - def _stage2_lsp_expand( - self, - coarse_results: List[SearchResult], - index_root: Optional[Path], - query: Optional[str] = None, - ) -> List[SearchResult]: - """Stage 2: LSP/graph expansion for staged cascade. - - Supports two modes via Config.staged_stage2_mode: - - "precomputed" (default): GraphExpander over per-dir `graph_neighbors` table - - "realtime": on-demand graph expansion via live LSP servers (LspBridge + LspGraphBuilder) - - Args: - coarse_results: Results from Stage 1 binary search - index_root: Root path of the index (for graph database access) - - Returns: - Combined list of original results plus expanded related results - """ - if not coarse_results or index_root is None: - return coarse_results - - try: - mode = "precomputed" - if self._config is not None: - mode = (getattr(self._config, "staged_stage2_mode", "precomputed") or "precomputed").strip().lower() - - if mode in {"realtime", "live"}: - return self._stage2_realtime_lsp_expand( - coarse_results, - index_root=index_root, - query=query, - ) - - if mode == "static_global_graph": - return self._stage2_static_global_graph_expand(coarse_results, index_root=index_root) - - return self._stage2_precomputed_graph_expand(coarse_results, index_root=index_root) - - except ImportError as exc: - self.logger.debug("GraphExpander not available: %s", exc) - return coarse_results - except Exception as exc: - self.logger.debug("Stage 2 LSP expansion failed: %s", exc) - return coarse_results - - def _stage2_precomputed_graph_expand( - self, - coarse_results: List[SearchResult], - *, - index_root: Path, - ) -> List[SearchResult]: - """Stage 2 (precomputed): expand using GraphExpander over `graph_neighbors`.""" - from codexlens.search.graph_expander import GraphExpander - - depth = 2 - if self._config is not None: - depth = getattr( - self._config, - "staged_lsp_depth", - getattr(self._config, "graph_expansion_depth", 2), - ) - try: - depth = int(depth) - except Exception: - depth = 2 - - expander = GraphExpander(self.mapper, config=self._config) - - max_expand = min(10, len(coarse_results)) - max_related = 50 - - related_results = expander.expand( - coarse_results, - depth=depth, - max_expand=max_expand, - max_related=max_related, - ) - - if related_results: - self.logger.debug( - "Stage 2 (precomputed) expanded %d base results to %d related symbols", - len(coarse_results), len(related_results) - ) - - return self._combine_stage2_results(coarse_results, related_results) - - def _stage2_static_global_graph_expand( - self, - coarse_results: List[SearchResult], - *, - index_root: Path, - ) -> List[SearchResult]: - """Stage 2 (static_global_graph): expand using GlobalGraphExpander over global_relationships.""" - from codexlens.search.global_graph_expander import GlobalGraphExpander - - global_db_path = index_root / GlobalSymbolIndex.DEFAULT_DB_NAME - if not global_db_path.exists(): - self.logger.debug("Global symbol DB not found at %s, skipping static graph expansion", global_db_path) - return coarse_results - - project_id = 1 - try: - for p in self.registry.list_projects(): - if p.index_root.resolve() == index_root.resolve(): - project_id = p.id - break - except Exception: - pass - - global_index = GlobalSymbolIndex(global_db_path, project_id=project_id) - global_index.initialize() - - try: - expander = GlobalGraphExpander(global_index, config=self._config) - related_results = expander.expand( - coarse_results, - top_n=min(10, len(coarse_results)), - max_related=50, - ) - - if related_results: - self.logger.debug( - "Stage 2 (static_global_graph) expanded %d base results to %d related symbols", - len(coarse_results), len(related_results), - ) - - return self._combine_stage2_results(coarse_results, related_results) - finally: - global_index.close() - - def _stage2_realtime_lsp_expand( - self, - coarse_results: List[SearchResult], - *, - index_root: Path, - query: Optional[str] = None, - ) -> List[SearchResult]: - """Stage 2 (realtime): compute expansion graph via live LSP servers.""" - import asyncio - from concurrent.futures import ThreadPoolExecutor - - from codexlens.hybrid_search.data_structures import CodeSymbolNode, Range - from codexlens.lsp import LspBridge, LspGraphBuilder - - max_depth = 1 - timeout_s = 30.0 - max_nodes = 50 - max_seeds = 1 - max_concurrent = 2 - warmup_s = 3.0 - resolve_symbols = False - if self._config is not None: - max_depth = int( - getattr( - self._config, - "staged_realtime_lsp_depth", - getattr(self._config, "staged_lsp_depth", 1), - ) - or 1 - ) - timeout_s = float(getattr(self._config, "staged_realtime_lsp_timeout_s", 30.0) or 30.0) - max_nodes = int(getattr(self._config, "staged_realtime_lsp_max_nodes", 50) or 50) - warmup_s = float(getattr(self._config, "staged_realtime_lsp_warmup_s", 3.0) or 0.0) - max_seeds = int(getattr(self._config, "staged_realtime_lsp_max_seeds", 1) or 1) - max_concurrent = int(getattr(self._config, "staged_realtime_lsp_max_concurrent", 2) or 2) - resolve_symbols = bool(getattr(self._config, "staged_realtime_lsp_resolve_symbols", False)) - - try: - source_root = self.mapper.index_to_source(index_root) - except Exception: - source_root = Path(coarse_results[0].path).resolve().parent - - lsp_config_file = self._find_lsp_config_file(source_root) - workspace_root = Path(source_root).resolve() - - max_expand = min(max(1, max_seeds), len(coarse_results)) - seed_nodes: List[CodeSymbolNode] = [] - seed_ids: set[str] = set() - - selected_results = list(coarse_results) - if query: - import re - - terms = { - t.lower() - for t in re.findall(r"[A-Za-z_][A-Za-z0-9_]*", query) - if t - } - - def _priority(result: SearchResult) -> float: - sym = (result.symbol_name or "").strip().lower() - stem = Path(result.path).stem.lower() if result.path else "" - score = 0.0 - if sym and sym in terms: - score += 5.0 - if sym: - score += 2.0 - if stem and stem in terms: - score += 1.0 - if result.symbol_kind: - score += 0.5 - if result.start_line: - score += 0.2 - return score - - indexed = list(enumerate(selected_results)) - indexed.sort( - key=lambda pair: ( - _priority(pair[1]), - float(pair[1].score), - -pair[0], - ), - reverse=True, - ) - selected_results = [r for _, r in indexed] - else: - indexed = list(enumerate(selected_results)) - indexed.sort( - key=lambda pair: ( - 1.0 if pair[1].symbol_name else 0.0, - float(pair[1].score), - -pair[0], - ), - reverse=True, - ) - selected_results = [r for _, r in indexed] - - # Prefer symbol-definition seeds when possible (improves LSP reference/call-hierarchy results). - # - # NOTE: We avoid relying purely on the stored symbol index here because its ranges may be - # imprecise in some projects. Instead, we attempt a lightweight definition-line detection - # for query identifiers within the top coarse candidate files. - if query: - try: - import re - - terms_raw = [ - t for t in re.findall(r"[A-Za-z_][A-Za-z0-9_]*", query) if t - ] - stopwords = { - "class", "def", "function", "method", "import", "from", "return", - "async", "await", "public", "private", "protected", "static", - "const", "let", "var", "new", - } - candidate_terms = [ - t for t in terms_raw - if t.lower() not in stopwords and len(t) >= 3 - ] - - candidate_terms.sort(key=len, reverse=True) - - # Candidate files (best-first): de-dupe while preserving ordering. - candidate_files: List[str] = [] - seen_files: set[str] = set() - for r in selected_results: - if r.path and r.path not in seen_files: - seen_files.add(r.path) - candidate_files.append(r.path) - if len(candidate_files) >= 50: - break - - # Also consider files whose *names* match query identifiers (helps when coarse retrieval - # misses the defining file for a symbol like `Config`). - try: - if source_root and candidate_terms: - allow_suffix = {".py", ".ts", ".tsx", ".js", ".jsx"} - name_terms = [t.lower() for t in candidate_terms[:3]] - for dirpath, _, filenames in os.walk(source_root): - for filename in filenames: - suffix = Path(filename).suffix.lower() - if suffix not in allow_suffix: - continue - lowered = filename.lower() - if any(t in lowered for t in name_terms): - fp = str(Path(dirpath) / filename) - if fp not in seen_files: - seen_files.add(fp) - candidate_files.append(fp) - if len(candidate_files) >= 120: - break - except Exception: - pass - - for term in candidate_terms[:5]: - if len(seed_nodes) >= max_expand: - break - - escaped = re.escape(term) - py_class = re.compile(rf"^\s*class\s+{escaped}\b") - py_def = re.compile(rf"^\s*(?:async\s+)?def\s+{escaped}\b") - ts_class = re.compile(rf"^\s*(?:export\s+)?class\s+{escaped}\b") - ts_func = re.compile(rf"^\s*(?:export\s+)?(?:async\s+)?function\s+{escaped}\b") - - for file_path in candidate_files: - if len(seed_nodes) >= max_expand: - break - suffix = Path(file_path).suffix.lower() - if suffix not in {".py", ".ts", ".tsx", ".js", ".jsx"}: - continue - - try: - lines = Path(file_path).read_text(encoding="utf-8", errors="ignore").splitlines() - except Exception: - continue - - for i, line in enumerate(lines): - kind = None - if suffix == ".py": - if py_class.search(line): - kind = "class" - elif py_def.search(line): - kind = "function" - else: - if ts_class.search(line): - kind = "class" - elif ts_func.search(line): - kind = "function" - - if not kind: - continue - - start_line = i + 1 - idx = line.find(term) - if idx >= 0: - start_character = idx + 1 - else: - stripped = line.lstrip() - start_character = (len(line) - len(stripped)) + 1 if stripped else 1 - - node_id = f"{file_path}:{term}:{start_line}" - if node_id in seed_ids: - break - - seed_ids.add(node_id) - seed_nodes.append( - CodeSymbolNode( - id=node_id, - name=term, - kind=kind, - file_path=file_path, - range=Range( - start_line=start_line, - start_character=start_character, - end_line=start_line, - end_character=start_character, - ), - ) - ) - break - except Exception: - pass - - for seed in selected_results: - if len(seed_nodes) >= max_expand: - break - if not seed.path: - continue - name = seed.symbol_name or Path(seed.path).stem - kind = seed.symbol_kind or "unknown" - start_line = int(seed.start_line or 1) - end_line = int(seed.end_line or start_line) - start_character = 1 - try: - if start_line >= 1: - line_text = Path(seed.path).read_text(encoding="utf-8", errors="ignore").splitlines()[start_line - 1] - if seed.symbol_name: - idx = line_text.find(seed.symbol_name) - if idx >= 0: - start_character = idx + 1 # 1-based for StandaloneLspManager - else: - stripped = line_text.lstrip() - if stripped: - start_character = (len(line_text) - len(stripped)) + 1 - except Exception: - start_character = 1 - node_id = f"{seed.path}:{name}:{start_line}" - if node_id in seed_ids: - continue - seed_ids.add(node_id) - seed_nodes.append( - CodeSymbolNode( - id=node_id, - name=name, - kind=kind, - file_path=seed.path, - range=Range( - start_line=start_line, - start_character=start_character, - end_line=end_line, - end_character=start_character if end_line == start_line else 1, - ), - raw_code=seed.content or "", - docstring=seed.excerpt or "", - ) - ) - - if not seed_nodes: - return coarse_results - - effective_warmup_s = warmup_s - - async def expand_graph(bridge: LspBridge): - # Warm up analysis: open seed docs and wait a bit so references/call hierarchy are populated. - if effective_warmup_s > 0: - for seed in seed_nodes[:3]: - try: - await bridge.get_document_symbols(seed.file_path) - except Exception: - continue - try: - warmup_budget = min(effective_warmup_s, max(0.0, timeout_s * 0.1)) - await asyncio.sleep(min(warmup_budget, max(0.0, timeout_s - 0.5))) - except Exception: - pass - builder = LspGraphBuilder( - max_depth=max_depth, - max_nodes=max_nodes, - max_concurrent=max(1, max_concurrent), - resolve_symbols=resolve_symbols, - ) - return await builder.build_from_seeds(seed_nodes, bridge) - - try: - try: - asyncio.get_running_loop() - has_running_loop = True - except RuntimeError: - has_running_loop = False - - if has_running_loop: - with ThreadPoolExecutor(max_workers=1) as executor: - async def _expand_once(): - async with LspBridge( - workspace_root=str(workspace_root), - config_file=str(lsp_config_file) if lsp_config_file else None, - timeout=timeout_s, - ) as bridge: - return await expand_graph(bridge) - - def _run(): - return asyncio.run(asyncio.wait_for(_expand_once(), timeout=timeout_s)) - - graph = executor.submit(_run).result(timeout=timeout_s + 1.0) - else: - from codexlens.lsp.keepalive_bridge import KeepAliveKey, KeepAliveLspBridge - - key = KeepAliveKey( - workspace_root=str(workspace_root), - config_file=str(lsp_config_file) if lsp_config_file else None, - timeout=float(timeout_s), - ) - warm_id = (key.workspace_root, key.config_file) - with self._realtime_lsp_keepalive_lock: - if warm_id in self._realtime_lsp_warmed_ids: - effective_warmup_s = 0.0 - keepalive = self._realtime_lsp_keepalive - if keepalive is None or self._realtime_lsp_keepalive_key != key: - if keepalive is not None: - try: - keepalive.stop() - except Exception: - pass - keepalive = KeepAliveLspBridge( - workspace_root=key.workspace_root, - config_file=key.config_file, - timeout=key.timeout, - ) - self._realtime_lsp_keepalive = keepalive - self._realtime_lsp_keepalive_key = key - - graph = keepalive.run(expand_graph, timeout=timeout_s) - with self._realtime_lsp_keepalive_lock: - self._realtime_lsp_warmed_ids.add(warm_id) - except Exception as exc: - self.logger.debug("Stage 2 (realtime) expansion failed: %r", exc) - return coarse_results - - try: - node_count = len(getattr(graph, "nodes", {}) or {}) - edge_count = len(getattr(graph, "edges", []) or []) - except Exception: - node_count, edge_count = 0, 0 - self.logger.debug( - "Stage 2 (realtime) graph built: seeds=%d nodes=%d edges=%d", - len(seed_nodes), - node_count, - edge_count, - ) - - related_results: List[SearchResult] = [] - for node_id, node in getattr(graph, "nodes", {}).items(): - if node_id in seed_ids or getattr(node, "id", "") in seed_ids: - continue - - try: - start_line = int(getattr(node.range, "start_line", 1) or 1) - end_line = int(getattr(node.range, "end_line", start_line) or start_line) - except Exception: - start_line, end_line = 1, 1 - - related_results.append( - SearchResult( - path=node.file_path, - score=0.5, - excerpt=None, - content=getattr(node, "raw_code", "") or None, - symbol_name=node.name, - symbol_kind=node.kind, - start_line=start_line, - end_line=end_line, - metadata={"stage2_mode": "realtime", "lsp_node_id": node_id}, - ) - ) - - if related_results: - self.logger.debug( - "Stage 2 (realtime) expanded %d base results to %d related symbols", - len(coarse_results), len(related_results) - ) - - return self._combine_stage2_results(coarse_results, related_results) - - def _combine_stage2_results( - self, - coarse_results: List[SearchResult], - related_results: List[SearchResult], - ) -> List[SearchResult]: - combined = list(coarse_results) - seen_keys = {(r.path, r.symbol_name, r.start_line) for r in coarse_results} - - for related in related_results: - key = (related.path, related.symbol_name, related.start_line) - if key not in seen_keys: - seen_keys.add(key) - combined.append(related) - - return combined - - def _collect_query_feature_anchor_results( - self, - query: str, - source_path: Path, - options: SearchOptions, - *, - limit: int, - ) -> List[SearchResult]: - """Collect small lexical anchor sets for explicit file/feature hints.""" - if limit <= 0: - return [] - - from codexlens.search.ranking import ( - QueryIntent, - _path_topic_tokens, - detect_query_intent, - extract_explicit_path_hints, - is_auxiliary_reference_path, - is_generated_artifact_path, - is_test_file, - query_targets_auxiliary_files, - query_targets_generated_files, - query_targets_test_files, - ) - - explicit_hints = extract_explicit_path_hints(query) - if not explicit_hints: - return [] - skip_test_files = query_targets_test_files(query) - skip_generated_files = query_targets_generated_files(query) - skip_auxiliary_files = query_targets_auxiliary_files(query) - - anchor_limit = max(1, int(limit)) - per_hint_limit = max(2, min(6, anchor_limit)) - seed_opts = SearchOptions( - depth=options.depth, - max_workers=options.max_workers, - limit_per_dir=max(10, per_hint_limit), - total_limit=max(anchor_limit, per_hint_limit * 2), - include_symbols=False, - include_semantic=False, - files_only=False, - code_only=options.code_only, - exclude_extensions=list(options.exclude_extensions or []), - enable_vector=False, - hybrid_mode=False, - pure_vector=False, - enable_cascade=False, - inject_feature_anchors=False, - ) - - anchors_by_path: Dict[str, SearchResult] = {} - for hint_tokens in explicit_hints: - hint_query = " ".join(hint_tokens) - try: - seed_result = self.search(hint_query, source_path, options=seed_opts) - except Exception as exc: - self.logger.debug( - "Feature anchor search failed for %r: %s", - hint_query, - exc, - ) - continue - - for candidate in seed_result.results: - _, basename_tokens = _path_topic_tokens(candidate.path) - if not basename_tokens or not all(token in basename_tokens for token in hint_tokens): - continue - if not skip_test_files and is_test_file(candidate.path): - continue - if not skip_generated_files and is_generated_artifact_path(candidate.path): - continue - if not skip_auxiliary_files and is_auxiliary_reference_path(candidate.path): - continue - metadata = { - **(candidate.metadata or {}), - "feature_query_anchor": True, - "feature_query_hint": hint_query, - "feature_query_hint_tokens": list(hint_tokens), - } - anchor = candidate.model_copy( - deep=True, - update={"metadata": metadata}, - ) - existing = anchors_by_path.get(anchor.path) - if existing is None or float(anchor.score) > float(existing.score): - anchors_by_path[anchor.path] = anchor - if len(anchors_by_path) >= anchor_limit: - break - if len(anchors_by_path) >= anchor_limit: - break - - query_intent = detect_query_intent(query) - if not anchors_by_path and query_intent in {QueryIntent.KEYWORD, QueryIntent.MIXED}: - lexical_query = (query or "").strip() - if lexical_query: - try: - seed_result = self.search(lexical_query, source_path, options=seed_opts) - except Exception as exc: - self.logger.debug( - "Lexical feature anchor search failed for %r: %s", - lexical_query, - exc, - ) - else: - for candidate in seed_result.results: - if not skip_test_files and is_test_file(candidate.path): - continue - if not skip_generated_files and is_generated_artifact_path(candidate.path): - continue - if not skip_auxiliary_files and is_auxiliary_reference_path(candidate.path): - continue - metadata = { - **(candidate.metadata or {}), - "feature_query_anchor": True, - "feature_query_hint": lexical_query, - "feature_query_hint_tokens": [], - "feature_query_seed_kind": "lexical_query", - } - anchor = candidate.model_copy( - deep=True, - update={"metadata": metadata}, - ) - existing = anchors_by_path.get(anchor.path) - if existing is None or float(anchor.score) > float(existing.score): - anchors_by_path[anchor.path] = anchor - if len(anchors_by_path) >= anchor_limit: - break - - return sorted( - anchors_by_path.values(), - key=lambda result: result.score, - reverse=True, - )[:anchor_limit] - - def _merge_query_feature_anchor_results( - self, - base_results: List[SearchResult], - anchor_results: List[SearchResult], - ) -> List[SearchResult]: - """Merge explicit feature anchors into coarse candidates with comparable scores.""" - if not anchor_results: - return sorted(base_results, key=lambda result: result.score, reverse=True) - - merged: Dict[str, SearchResult] = {result.path: result for result in base_results} - base_sorted = sorted(base_results, key=lambda result: result.score, reverse=True) - base_max = float(base_sorted[0].score) if base_sorted else 1.0 - if base_sorted: - cutoff_index = min(len(base_sorted) - 1, max(0, min(4, len(base_sorted) - 1))) - anchor_floor = float(base_sorted[cutoff_index].score) - else: - anchor_floor = base_max - if anchor_floor <= 0: - anchor_floor = max(base_max * 0.85, 0.01) - - for index, anchor in enumerate(anchor_results): - target_score = max( - anchor_floor, - base_max * max(0.75, 0.92 - (0.03 * index)), - 0.01, - ) - existing = merged.get(anchor.path) - existing_metadata = existing.metadata or {} if existing is not None else {} - metadata = { - **existing_metadata, - **(anchor.metadata or {}), - "feature_query_anchor": True, - } - if existing is not None: - target_score = max(float(existing.score), target_score) - merged[anchor.path] = existing.model_copy( - deep=True, - update={ - "score": target_score, - "metadata": metadata, - }, - ) - else: - merged[anchor.path] = anchor.model_copy( - deep=True, - update={ - "score": target_score, - "metadata": metadata, - }, - ) - - return sorted(merged.values(), key=lambda result: result.score, reverse=True) - - def _inject_query_feature_anchors( - self, - query: str, - source_path: Path, - options: SearchOptions, - base_results: List[SearchResult], - *, - limit: int, - ) -> List[SearchResult]: - """Inject explicit file/feature anchors into coarse candidate sets.""" - anchor_results = self._collect_query_feature_anchor_results( - query, - source_path, - options, - limit=limit, - ) - return self._merge_query_feature_anchor_results(base_results, anchor_results) - - @staticmethod - def _combine_stage3_anchor_results( - anchor_results: List[SearchResult], - clustered_results: List[SearchResult], - *, - target_count: int, - ) -> List[SearchResult]: - """Combine preserved query anchors with Stage 3 representatives.""" - if target_count <= 0: - return [] - merged: List[SearchResult] = [] - seen: set[tuple[str, Optional[str], Optional[int]]] = set() - for result in [*anchor_results, *clustered_results]: - key = (result.path, result.symbol_name, result.start_line) - if key in seen: - continue - seen.add(key) - merged.append(result) - if len(merged) >= target_count: - break - return merged - - def _select_stage3_query_anchor_results( - self, - query: str, - expanded_results: List[SearchResult], - *, - limit: int, - ) -> List[SearchResult]: - """Select a small number of explicit feature anchors to preserve through clustering.""" - if limit <= 0 or not expanded_results: - return [] - - ranked_results = self._apply_default_path_penalties(query, expanded_results) - anchors: List[SearchResult] = [] - seen: set[tuple[str, Optional[str], Optional[int]]] = set() - for result in ranked_results: - metadata = result.metadata or {} - if not metadata.get("feature_query_anchor"): - continue - key = (result.path, result.symbol_name, result.start_line) - if key in seen: - continue - seen.add(key) - anchors.append(result) - if len(anchors) >= limit: - break - return anchors - - def _find_lsp_workspace_root(self, start_path: Path) -> Path: - """Best-effort workspace root selection for LSP initialization. - - Many language servers (e.g. Pyright) use workspace-relative include/exclude - patterns, so using a deep subdir (like "src") as root can break reference - and call-hierarchy queries. - """ - start = Path(start_path).resolve() - if start.is_file(): - start = start.parent - - # Prefer an explicit LSP config file in the workspace. - for current in [start, *list(start.parents)]: - try: - if (current / "lsp-servers.json").is_file(): - return current - except OSError: - continue - - # Fallback heuristics for project root markers. - for current in [start, *list(start.parents)]: - try: - if (current / ".git").exists() or (current / "pyproject.toml").is_file(): - return current - except OSError: - continue - - return start - - def _find_lsp_config_file(self, start_path: Path) -> Optional[Path]: - """Find a lsp-servers.json by walking up from start_path.""" - start = Path(start_path).resolve() - if start.is_file(): - start = start.parent - - for current in [start, *list(start.parents)]: - try: - candidate = current / "lsp-servers.json" - if candidate.is_file(): - return candidate - except OSError: - continue - return None - - def _stage3_cluster_prune( - self, - expanded_results: List[SearchResult], - target_count: int, - query: Optional[str] = None, - ) -> List[SearchResult]: - """Stage 3: Cluster expanded results and select representatives. - - Uses the extensible clustering infrastructure from codexlens.search.clustering - to group similar results and select the best representative from each cluster. - - Args: - expanded_results: Results from Stage 2 expansion - target_count: Target number of representative results - - Returns: - List of representative results (one per cluster) - """ - if not expanded_results: - return [] - - original_target_count = target_count - anchor_results: List[SearchResult] = [] - if query: - anchor_results = self._select_stage3_query_anchor_results( - query, - expanded_results, - limit=min(4, max(1, original_target_count // 4)), - ) - if anchor_results: - anchor_keys = { - (result.path, result.symbol_name, result.start_line) - for result in anchor_results - } - expanded_results = [ - result - for result in expanded_results - if (result.path, result.symbol_name, result.start_line) not in anchor_keys - ] - target_count = max(0, original_target_count - len(anchor_results)) - if target_count <= 0: - return anchor_results[:original_target_count] - - if not expanded_results: - return self._combine_stage3_anchor_results( - anchor_results, - [], - target_count=original_target_count, - ) - - # If few results, skip clustering - if len(expanded_results) <= target_count: - return self._combine_stage3_anchor_results( - anchor_results, - expanded_results, - target_count=original_target_count, - ) - - strategy_name = "auto" - if self._config is not None: - strategy_name = getattr(self._config, "staged_clustering_strategy", "auto") or "auto" - strategy_name = str(strategy_name).strip().lower() - - if strategy_name in {"noop", "none", "off"}: - return self._combine_stage3_anchor_results( - anchor_results, - sorted(expanded_results, key=lambda r: r.score, reverse=True)[:target_count], - target_count=original_target_count, - ) - - if strategy_name in {"score", "top", "rank"}: - return self._combine_stage3_anchor_results( - anchor_results, - sorted(expanded_results, key=lambda r: r.score, reverse=True)[:target_count], - target_count=original_target_count, - ) - - if strategy_name in {"path", "file"}: - best_by_path: Dict[str, SearchResult] = {} - for r in expanded_results: - if not r.path: - continue - key = str(r.path).lower() - if key not in best_by_path or r.score > best_by_path[key].score: - best_by_path[key] = r - candidates = list(best_by_path.values()) or expanded_results - candidates.sort(key=lambda r: r.score, reverse=True) - return self._combine_stage3_anchor_results( - anchor_results, - candidates[:target_count], - target_count=original_target_count, - ) - - if strategy_name in {"dir_rr", "rr_dir", "round_robin_dir"}: - results_sorted = sorted(expanded_results, key=lambda r: r.score, reverse=True) - buckets: Dict[str, List[SearchResult]] = {} - dir_order: List[str] = [] - for r in results_sorted: - try: - d = str(Path(r.path).parent).lower() - except Exception: - d = "" - if d not in buckets: - buckets[d] = [] - dir_order.append(d) - buckets[d].append(r) - - out: List[SearchResult] = [] - while len(out) < target_count: - progressed = False - for d in dir_order: - if not buckets.get(d): - continue - out.append(buckets[d].pop(0)) - progressed = True - if len(out) >= target_count: - break - if not progressed: - break - return self._combine_stage3_anchor_results( - anchor_results, - out, - target_count=original_target_count, - ) - - try: - from codexlens.search.clustering import ( - ClusteringConfig, - get_strategy, - ) - - # Get clustering config from config - strategy_name = "auto" - min_cluster_size = 3 - - if self._config is not None: - strategy_name = getattr(self._config, "staged_clustering_strategy", "auto") - min_cluster_size = getattr(self._config, "staged_clustering_min_size", 3) - - # Get embeddings for clustering - # Try to get dense embeddings from results' content - embeddings = self._get_embeddings_for_clustering(expanded_results) - - if embeddings is None or len(embeddings) == 0: - # No embeddings available, fall back to score-based selection - self.logger.debug("No embeddings for clustering, using score-based selection") - return self._combine_stage3_anchor_results( - anchor_results, - sorted(expanded_results, key=lambda r: r.score, reverse=True)[:target_count], - target_count=original_target_count, - ) - - # Create clustering config - config = ClusteringConfig( - min_cluster_size=min(min_cluster_size, max(2, len(expanded_results) // 5)), - min_samples=2, - metric="cosine", - ) - - # Get strategy with fallback - strategy = get_strategy(strategy_name, config, fallback=True) - - # Cluster and select representatives - representatives = strategy.fit_predict(embeddings, expanded_results) - - self.logger.debug( - "Stage 3 clustered %d results into %d representatives using %s", - len(expanded_results), len(representatives), type(strategy).__name__ - ) - - # If clustering returned too few, supplement with top-scored unclustered - if len(representatives) < target_count: - rep_paths = {r.path for r in representatives} - remaining = [r for r in expanded_results if r.path not in rep_paths] - remaining_sorted = sorted(remaining, key=lambda r: r.score, reverse=True) - representatives.extend(remaining_sorted[:target_count - len(representatives)]) - - return self._combine_stage3_anchor_results( - anchor_results, - representatives[:target_count], - target_count=original_target_count, - ) - - except ImportError as exc: - self.logger.debug("Clustering not available: %s", exc) - return self._combine_stage3_anchor_results( - anchor_results, - sorted(expanded_results, key=lambda r: r.score, reverse=True)[:target_count], - target_count=original_target_count, - ) - except Exception as exc: - self.logger.debug("Stage 3 clustering failed: %s", exc) - return self._combine_stage3_anchor_results( - anchor_results, - sorted(expanded_results, key=lambda r: r.score, reverse=True)[:target_count], - target_count=original_target_count, - ) - - def _stage4_optional_rerank( - self, - query: str, - clustered_results: List[SearchResult], - k: int, - ) -> List[SearchResult]: - """Stage 4: Optional cross-encoder reranking. - - Applies cross-encoder reranking if enabled in config. - - Args: - query: Search query string - clustered_results: Results from Stage 3 clustering - k: Requested final result count before downstream path penalties - - Returns: - Reranked results sorted by cross-encoder score. This can exceed the - requested final ``k`` so the caller can still demote noisy test or - generated hits before applying the final trim. - """ - if not clustered_results: - return [] - - rerank_limit = self._resolve_rerank_candidate_limit( - k, - len(clustered_results), - ) - return self._cross_encoder_rerank(query, clustered_results, rerank_limit) - - def _get_embeddings_for_clustering( - self, - results: List[SearchResult], - ) -> Optional["np.ndarray"]: - """Get dense embeddings for clustering results. - - Tries to generate embeddings from result content for clustering. - - Args: - results: List of SearchResult objects - - Returns: - NumPy array of embeddings or None if not available - """ - if not NUMPY_AVAILABLE: - return None - - if not results: - return None - - try: - from codexlens.semantic.factory import get_embedder - - # Get embedding settings from config - embedding_backend = "fastembed" - embedding_model = "code" - use_gpu = True - - if self._config is not None: - embedding_backend = getattr(self._config, "embedding_backend", "fastembed") - embedding_model = getattr(self._config, "embedding_model", "code") - use_gpu = getattr(self._config, "embedding_use_gpu", True) - - # Create embedder - if embedding_backend == "litellm": - embedder = get_embedder(backend="litellm", model=embedding_model) - else: - embedder = get_embedder(backend="fastembed", profile=embedding_model, use_gpu=use_gpu) - - # Extract text content from results - texts = [] - for result in results: - # Use content if available, otherwise use excerpt - text = result.content or result.excerpt or "" - if not text and result.path: - text = result.path - texts.append(text[:2000]) # Limit text length - - # Generate embeddings - embeddings = embedder.embed_to_numpy(texts) - return embeddings - - except ImportError as exc: - self.logger.debug("Embedder not available for clustering: %s", exc) - return None - except Exception as exc: - self.logger.debug("Failed to generate embeddings for clustering: %s", exc) - return None - - def binary_rerank_cascade_search( - self, - query: str, - source_path: Path, - k: int = 10, - coarse_k: int = 100, - options: Optional[SearchOptions] = None, - ) -> ChainSearchResult: - """Execute binary cascade search with cross-encoder reranking. - - Combines the speed of binary vector coarse search with the quality of - cross-encoder reranking for the best balance of speed and accuracy. - - Binary + Reranker cascade process: - 1. Stage 1 (Coarse): Fast binary vector search using Hamming distance - to quickly filter to coarse_k candidates (256-dim binary, 32 bytes/vector) - 2. Stage 2 (Fine): Cross-encoder reranking for precise semantic ranking - of candidates using query-document attention - - This approach is typically faster than binary_cascade_search while - achieving similar or better quality through cross-encoder reranking. - - Performance characteristics: - - Binary search: O(N) with SIMD-accelerated XOR + popcount (~8ms) - - Cross-encoder: Applied to top coarse_k candidates (~15-20s for API) - - Total: Faster coarse + high-quality fine = best balance - - Args: - query: Natural language or keyword query string - source_path: Starting directory path - k: Number of final results to return (default 10) - coarse_k: Number of coarse candidates from first stage (default 100) - options: Search configuration (uses defaults if None) - - Returns: - ChainSearchResult with cross-encoder reranked results and statistics - - Examples: - >>> engine = ChainSearchEngine(registry, mapper, config=config) - >>> result = engine.binary_rerank_cascade_search( - ... "how to authenticate users", - ... Path("D:/project/src"), - ... k=10, - ... coarse_k=100 - ... ) - >>> for r in result.results: - ... print(f"{r.path}: {r.score:.3f}") - """ - if not NUMPY_AVAILABLE: - self.logger.warning( - "NumPy not available, falling back to standard search" - ) - return self.search(query, source_path, options=options) - - options = options or SearchOptions() - start_time = time.time() - stats = SearchStats() - - # Use config defaults if available - if self._config is not None: - if hasattr(self._config, "cascade_coarse_k"): - coarse_k = coarse_k or self._config.cascade_coarse_k - if hasattr(self._config, "cascade_fine_k"): - k = k or self._config.cascade_fine_k - - # Step 1: Find starting index - start_index = self._find_start_index(source_path) - if not start_index: - self.logger.warning(f"No index found for {source_path}") - stats.time_ms = (time.time() - start_time) * 1000 - return ChainSearchResult( - query=query, - results=[], - symbols=[], - stats=stats - ) - - # Step 2: Collect all index paths - index_paths = self._collect_index_paths(start_index, options.depth) - stats.dirs_searched = len(index_paths) - - if not index_paths: - self.logger.warning(f"No indexes collected from {start_index}") - stats.time_ms = (time.time() - start_time) * 1000 - return ChainSearchResult( - query=query, - results=[], - symbols=[], - stats=stats - ) - - # Step 4: Binary coarse search (same as binary_cascade_search) - binary_coarse_time = time.time() - coarse_candidates, _, _, stage2_index_root = self._collect_binary_coarse_candidates( - query, - index_paths, - coarse_k, - stats, - index_root=index_paths[0].parent if index_paths else None, - ) - - if not coarse_candidates: - self.logger.info("No binary candidates found, falling back to standard search for reranking") - # Fall back to standard search which uses FTS+Vector - return self.search(query, source_path, options=options) - - # Sort by Hamming distance and take top coarse_k - coarse_candidates.sort(key=lambda x: x[1]) - coarse_candidates = coarse_candidates[:coarse_k] - - self.logger.debug( - "Binary coarse search: %d candidates in %.2fms", - len(coarse_candidates), (time.time() - binary_coarse_time) * 1000 - ) - - coarse_results = self._materialize_binary_candidates( - coarse_candidates, - stats, - stage2_index_root=stage2_index_root, - ) - - if not coarse_results: - stats.time_ms = (time.time() - start_time) * 1000 - return ChainSearchResult( - query=query, results=[], symbols=[], stats=stats - ) - - coarse_results = self._inject_query_feature_anchors( - query, - source_path, - options, - coarse_results, - limit=min(6, max(2, k)), - ) - - self.logger.debug( - "Retrieved %d chunks for cross-encoder reranking", len(coarse_results) - ) - - # Step 6: Cross-encoder reranking - rerank_time = time.time() - rerank_limit = self._resolve_rerank_candidate_limit(k, len(coarse_results)) - reranked_results = self._cross_encoder_rerank( - query, - coarse_results, - top_k=rerank_limit, - ) - - self.logger.debug( - "Cross-encoder reranking: %d results in %.2fms", - len(reranked_results), (time.time() - rerank_time) * 1000 - ) - - # Deduplicate by path (keep highest score) - path_to_result: Dict[str, SearchResult] = {} - for result in reranked_results: - if result.path not in path_to_result or result.score > path_to_result[result.path].score: - path_to_result[result.path] = result - - final_results = self._apply_default_path_penalties( - query, - list(path_to_result.values()), - )[:k] - - stats.files_matched = len(final_results) - stats.time_ms = (time.time() - start_time) * 1000 - - self.logger.debug( - "Binary+Rerank cascade search complete: %d results in %.2fms", - len(final_results), - stats.time_ms, - ) - - return ChainSearchResult( - query=query, - results=final_results, - symbols=[], - stats=stats, - ) - - def dense_rerank_cascade_search( - self, - query: str, - source_path: Path, - k: int = 10, - coarse_k: int = 100, - options: Optional[SearchOptions] = None, - ) -> ChainSearchResult: - """Execute dense cascade search with cross-encoder reranking. - - Combines dense vector coarse search (HNSW) with cross-encoder reranking - for comparison with binary_rerank strategy. - - Dense + Reranker cascade process: - 1. Stage 1 (Coarse): Dense vector search using HNSW (cosine similarity) - to get coarse_k candidates (2048-dim float32) - 2. Stage 2 (Fine): Cross-encoder reranking for precise semantic ranking - - Args: - query: Natural language or keyword query string - source_path: Starting directory path - k: Number of final results to return (default 10) - coarse_k: Number of coarse candidates from first stage (default 100) - options: Search configuration (uses defaults if None) - - Returns: - ChainSearchResult with cross-encoder reranked results and statistics - """ - options = options or SearchOptions() - - if query_prefers_lexical_search(query): - self.logger.debug( - "Dense rerank shortcut: using lexical search for lexical-priority query %r", - query, - ) - lexical_options = SearchOptions( - depth=options.depth, - max_workers=options.max_workers, - limit_per_dir=max(options.limit_per_dir, max(10, k)), - total_limit=max(options.total_limit, max(20, k * 4)), - offset=options.offset, - include_symbols=False, - files_only=options.files_only, - include_semantic=False, - code_only=options.code_only, - exclude_extensions=list(options.exclude_extensions or []), - hybrid_mode=False, - enable_fuzzy=True, - enable_vector=False, - pure_vector=False, - enable_cascade=False, - hybrid_weights=None, - group_results=options.group_results, - grouping_threshold=options.grouping_threshold, - inject_feature_anchors=options.inject_feature_anchors, - ) - lexical_result = self.search(query, source_path, options=lexical_options) - return ChainSearchResult( - query=query, - results=lexical_result.results, - related_results=lexical_result.related_results, - symbols=[], - stats=lexical_result.stats, - ) - - if not NUMPY_AVAILABLE: - self.logger.warning( - "NumPy not available, falling back to standard search" - ) - return self.search(query, source_path, options=options) - start_time = time.time() - stats = SearchStats() - - # Use config defaults if available - if self._config is not None: - if hasattr(self._config, "cascade_coarse_k"): - coarse_k = coarse_k or self._config.cascade_coarse_k - if hasattr(self._config, "cascade_fine_k"): - k = k or self._config.cascade_fine_k - - # Step 1: Find starting index - start_index = self._find_start_index(source_path) - if not start_index: - self.logger.warning(f"No index found for {source_path}") - stats.time_ms = (time.time() - start_time) * 1000 - return ChainSearchResult( - query=query, - results=[], - symbols=[], - stats=stats - ) - - # Step 2: Collect all index paths - index_paths = self._collect_index_paths(start_index, options.depth) - stats.dirs_searched = len(index_paths) - - if not index_paths: - self.logger.warning(f"No indexes collected from {start_index}") - stats.time_ms = (time.time() - start_time) * 1000 - return ChainSearchResult( - query=query, - results=[], - symbols=[], - stats=stats - ) - - # Step 3-5: Group child indexes by centralized dense vector root and search each root. - dense_coarse_time = time.time() - coarse_candidates: List[Tuple[int, float, Path]] = [] # (chunk_id, distance, index_path) - central_index_roots: Dict[Path, Path] = {} - dense_root_groups, dense_fallback_index_paths = self._group_index_paths_by_dense_root(index_paths) - dense_query_cache: Dict[Tuple[str, str, bool], "np.ndarray"] = {} - try: - from codexlens.semantic.ann_index import ANNIndex - - dense_candidate_groups: List[List[Tuple[int, float, Path]]] = [] - dense_roots_by_settings = self._group_dense_roots_by_embedding_settings( - dense_root_groups - ) - if len(dense_roots_by_settings) > 1: - self.logger.debug( - "Dense coarse search detected %d embedding setting groups; interleaving candidates across groups", - len(dense_roots_by_settings), - ) - - for dense_roots in dense_roots_by_settings.values(): - group_candidates: List[Tuple[int, float, Path]] = [] - for dense_root in dense_roots: - try: - query_dense = self._embed_dense_query( - query, - index_root=dense_root, - query_cache=dense_query_cache, - ) - ann_index = self._get_cached_centralized_dense_index( - dense_root, - int(query_dense.shape[0]), - ) - if ann_index is None: - continue - - ids, distances = ann_index.search(query_dense, top_k=coarse_k) - central_index_db = dense_root / "_index.db" - central_index_roots[central_index_db] = dense_root - for chunk_id, dist in zip(ids, distances): - group_candidates.append((chunk_id, dist, central_index_db)) - if ids: - self.logger.debug( - "Centralized dense search: %d candidates from %s", - len(ids), - dense_root / VECTORS_HNSW_NAME, - ) - except Exception as exc: - self.logger.debug( - "Centralized dense search failed for %s: %s", - dense_root, - exc, - ) - if group_candidates: - dense_candidate_groups.append(group_candidates) - - coarse_candidates = self._interleave_dense_candidate_groups( - dense_candidate_groups, - coarse_k, - ) - - if not coarse_candidates: - fallback_index_paths = dense_fallback_index_paths if dense_root_groups else index_paths - fallback_candidate_groups: List[List[Tuple[int, float, Path]]] = [] - fallback_index_groups = self._group_dense_index_paths_by_embedding_settings( - fallback_index_paths - ) - if len(fallback_index_groups) > 1: - self.logger.debug( - "Legacy dense fallback detected %d embedding setting groups; interleaving candidates across groups", - len(fallback_index_groups), - ) - for grouped_index_paths in fallback_index_groups.values(): - group_candidates: List[Tuple[int, float, Path]] = [] - for index_path in grouped_index_paths: - try: - query_dense = self._embed_dense_query( - query, - index_root=index_path.parent, - query_cache=dense_query_cache, - ) - ann_index = self._get_cached_legacy_dense_index( - index_path, - int(query_dense.shape[0]), - ) - if ann_index is None: - continue - - ids, distances = ann_index.search(query_dense, top_k=coarse_k) - for chunk_id, dist in zip(ids, distances): - group_candidates.append((chunk_id, dist, index_path)) - except Exception as exc: - self.logger.debug( - "Dense search failed for %s: %s", index_path, exc - ) - if group_candidates: - fallback_candidate_groups.append(group_candidates) - - coarse_candidates = self._interleave_dense_candidate_groups( - fallback_candidate_groups, - coarse_k, - ) - except Exception as exc: - self.logger.warning(f"Failed to prepare dense coarse search: {exc}") - return self.search(query, source_path, options=options) - - if not coarse_candidates: - self.logger.info("No dense candidates found, falling back to standard search") - return self.search(query, source_path, options=options) - - self.logger.debug( - "Dense coarse search: %d candidates in %.2fms", - len(coarse_candidates), (time.time() - dense_coarse_time) * 1000 - ) - - # Step 6: Build SearchResult objects for cross-encoder reranking - candidates_by_index: Dict[Path, List[int]] = {} - for chunk_id, distance, index_path in coarse_candidates: - if index_path not in candidates_by_index: - candidates_by_index[index_path] = [] - candidates_by_index[index_path].append(chunk_id) - - # Retrieve chunk content for reranking - import sqlite3 - coarse_results: List[SearchResult] = [] - - for index_path, chunk_ids in candidates_by_index.items(): - try: - central_root = central_index_roots.get(index_path) - if central_root is not None: - # Use centralized metadata from _vectors_meta.db - meta_db_path = central_root / "_vectors_meta.db" - if meta_db_path.exists(): - conn = sqlite3.connect(str(meta_db_path)) - conn.row_factory = sqlite3.Row - placeholders = ",".join("?" * len(chunk_ids)) - cursor = conn.execute( - f""" - SELECT chunk_id, file_path, content, start_line, end_line - FROM chunk_metadata - WHERE chunk_id IN ({placeholders}) - """, - chunk_ids - ) - chunks_data = [ - { - "id": row["chunk_id"], - "file_path": row["file_path"], - "content": row["content"], - "metadata": json.dumps({ - "start_line": row["start_line"], - "end_line": row["end_line"] - }), - "category": "code" if row["file_path"].endswith(('.py', '.ts', '.js', '.java', '.go', '.rs', '.cpp', '.c')) else "doc", - } - for row in cursor.fetchall() - ] - conn.close() - else: - chunks_data = [] - else: - # Fall back to per-directory semantic_chunks table - conn = sqlite3.connect(str(index_path)) - conn.row_factory = sqlite3.Row - placeholders = ",".join("?" * len(chunk_ids)) - cursor = conn.execute( - f""" - SELECT id, file_path, content, metadata, category - FROM semantic_chunks - WHERE id IN ({placeholders}) - """, - chunk_ids - ) - chunks_data = [ - { - "id": row["id"], - "file_path": row["file_path"], - "content": row["content"], - "metadata": row["metadata"], - "category": row["category"], - } - for row in cursor.fetchall() - ] - conn.close() - - for chunk in chunks_data: - chunk_id = chunk.get("id") - distance = next( - ( - d - for cid, d, candidate_index_path in coarse_candidates - if cid == chunk_id and candidate_index_path == index_path - ), - 1.0 - ) - # Convert cosine distance to score (clamp to [0, 1] for Pydantic validation) - # Cosine distance can be > 1 for anti-correlated vectors, causing negative scores - score = max(0.0, 1.0 - distance) - - content = chunk.get("content", "") - result = SearchResult( - path=chunk.get("file_path", ""), - score=float(score), - excerpt=content[:500] if content else "", - content=content, - ) - coarse_results.append(result) - except Exception as exc: - self.logger.debug( - "Failed to retrieve chunks from %s: %s", index_path, exc - ) - - if not coarse_results: - stats.time_ms = (time.time() - start_time) * 1000 - return ChainSearchResult( - query=query, results=[], symbols=[], stats=stats - ) - - coarse_results = self._inject_query_feature_anchors( - query, - source_path, - options, - coarse_results, - limit=min(6, max(2, k)), - ) - - self.logger.debug( - "Retrieved %d chunks for cross-encoder reranking", len(coarse_results) - ) - - # Step 6: Cross-encoder reranking - rerank_time = time.time() - rerank_limit = self._resolve_rerank_candidate_limit(k, len(coarse_results)) - reranked_results = self._cross_encoder_rerank( - query, - coarse_results, - top_k=rerank_limit, - ) - - self.logger.debug( - "Cross-encoder reranking: %d results in %.2fms", - len(reranked_results), (time.time() - rerank_time) * 1000 - ) - - # Deduplicate by path (keep highest score) - path_to_result: Dict[str, SearchResult] = {} - for result in reranked_results: - if result.path not in path_to_result or result.score > path_to_result[result.path].score: - path_to_result[result.path] = result - - final_results = self._apply_default_path_penalties( - query, - list(path_to_result.values()), - )[:k] - - stats.files_matched = len(final_results) - stats.time_ms = (time.time() - start_time) * 1000 - - self.logger.debug( - "Dense+Rerank cascade search complete: %d results in %.2fms", - len(final_results), - stats.time_ms, - ) - - return ChainSearchResult( - query=query, - results=final_results, - symbols=[], - stats=stats, - ) - - def _get_or_create_binary_index(self, index_path: Path) -> Optional[Any]: - """Get or create a BinaryANNIndex for the given index path. - - .. deprecated:: - This method uses the deprecated BinaryANNIndex. For centralized indexes, - use _get_centralized_binary_searcher() instead. - - Attempts to load an existing binary index from disk. If not found, - returns None (binary index should be built during indexing). - - Args: - index_path: Path to the _index.db file - - Returns: - BinaryANNIndex instance or None if not available - """ - try: - import warnings - # Suppress deprecation warning since we're using it intentionally for legacy support - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - from codexlens.semantic.ann_index import BinaryANNIndex - - binary_index = BinaryANNIndex(index_path, dim=256) - if binary_index.load(): - return binary_index - return None - except Exception as exc: - self.logger.debug("Failed to load binary index for %s: %s", index_path, exc) - return None - - def _get_centralized_binary_searcher(self, index_root: Path) -> Optional[Any]: - """Get centralized BinarySearcher for memory-mapped binary vectors. - - This is the preferred method for centralized indexes, providing faster - search via memory-mapped files. - - Args: - index_root: Root directory containing centralized index files - - Returns: - BinarySearcher instance or None if not available - """ - try: - from codexlens.search.binary_searcher import BinarySearcher - - binary_searcher = BinarySearcher(index_root) - if binary_searcher.load(): - self.logger.debug( - "Using centralized BinarySearcher with %d vectors (mmap=%s)", - binary_searcher.vector_count, - binary_searcher.is_memmap - ) - return binary_searcher - return None - except Exception as exc: - self.logger.debug("Failed to load centralized binary searcher: %s", exc) - return None - - def _find_nearest_binary_mmap_root(self, index_root: Path, *, max_levels: int = 10) -> Path: - """Walk up index_root parents to find the nearest centralized binary mmap. - - Centralized staged-binary artifacts are stored at a project index root - (e.g. `.../project/src/_binary_vectors.mmap`), but staged search often starts - from the nearest ancestor `_index.db` path, which can be nested deeper. - - This helper makes Stage 1 robust by locating the nearest ancestor directory - that contains the centralized `_binary_vectors.mmap`. - """ - current_dir = Path(index_root).resolve() - for _ in range(max(0, int(max_levels)) + 1): - try: - if (current_dir / BINARY_VECTORS_MMAP_NAME).exists(): - return current_dir - except Exception: - return Path(index_root).resolve() - - parent = current_dir.parent - if parent == current_dir: - break - current_dir = parent - - return Path(index_root).resolve() - - def _find_nearest_dense_hnsw_root( - self, - index_root: Path, - *, - max_levels: int = 10, - ) -> Optional[Path]: - """Walk up index_root parents to find the nearest centralized dense HNSW root.""" - - current_dir = Path(index_root).resolve() - for _ in range(max(0, int(max_levels)) + 1): - try: - if (current_dir / VECTORS_HNSW_NAME).exists(): - return current_dir - except Exception: - return None - - parent = current_dir.parent - if parent == current_dir: - break - current_dir = parent - - return None - - def _group_index_paths_by_binary_root( - self, - index_paths: List[Path], - *, - preferred_root: Optional[Path] = None, - ) -> Tuple[List[Path], List[Path]]: - """Group collected indexes by centralized binary mmap root.""" - - grouped: Dict[Path, List[Path]] = {} - ungrouped: List[Path] = [] - preferred_root = ( - Path(preferred_root).resolve() - if preferred_root is not None - else None - ) - - for index_path in index_paths: - candidate_roots: List[Path] = [index_path.parent] - if preferred_root is not None and preferred_root != index_path.parent: - candidate_roots.append(preferred_root) - - resolved_root: Optional[Path] = None - for candidate_root in candidate_roots: - found_root = self._find_nearest_binary_mmap_root(candidate_root) - if (found_root / BINARY_VECTORS_MMAP_NAME).exists(): - resolved_root = found_root - break - - if resolved_root is None: - ungrouped.append(index_path) - continue - - grouped.setdefault(resolved_root, []).append(index_path) - - return [root for root in grouped if grouped[root]], ungrouped - - def _group_index_paths_by_dense_root( - self, - index_paths: List[Path], - ) -> Tuple[List[Path], List[Path]]: - """Group collected indexes by centralized dense HNSW root.""" - - grouped: Dict[Path, List[Path]] = {} - ungrouped: List[Path] = [] - - for index_path in index_paths: - dense_root = self._find_nearest_dense_hnsw_root(index_path.parent) - if dense_root is None: - ungrouped.append(index_path) - continue - grouped.setdefault(dense_root, []).append(index_path) - - return [root for root in grouped if grouped[root]], ungrouped - - def _group_dense_roots_by_embedding_settings( - self, - dense_roots: List[Path], - ) -> Dict[Tuple[str, str, bool], List[Path]]: - """Group dense roots by the embedding settings used to build them.""" - grouped: Dict[Tuple[str, str, bool], List[Path]] = {} - for dense_root in dense_roots: - settings = self._resolve_dense_embedding_settings(index_root=dense_root) - grouped.setdefault(settings, []).append(dense_root) - return grouped - - def _group_dense_index_paths_by_embedding_settings( - self, - index_paths: List[Path], - ) -> Dict[Tuple[str, str, bool], List[Path]]: - """Group legacy dense ANN indexes by the embedding settings used to query them.""" - grouped: Dict[Tuple[str, str, bool], List[Path]] = {} - for index_path in index_paths: - settings = self._resolve_dense_embedding_settings( - index_root=index_path.parent, - ) - grouped.setdefault(settings, []).append(index_path) - return grouped - - @staticmethod - def _interleave_dense_candidate_groups( - candidate_groups: List[List[Tuple[int, float, Path]]], - limit: int, - ) -> List[Tuple[int, float, Path]]: - """Interleave locally ranked dense candidates from mixed embedding groups.""" - if limit <= 0: - return [] - - ordered_groups = [ - sorted(group, key=lambda item: item[1]) - for group in candidate_groups - if group - ] - if not ordered_groups: - return [] - if len(ordered_groups) == 1: - return ordered_groups[0][:limit] - - merged: List[Tuple[int, float, Path]] = [] - offsets = [0 for _ in ordered_groups] - while len(merged) < limit: - made_progress = False - for group_index, group in enumerate(ordered_groups): - offset = offsets[group_index] - if offset >= len(group): - continue - merged.append(group[offset]) - offsets[group_index] += 1 - made_progress = True - if len(merged) >= limit: - break - if not made_progress: - break - return merged - - def _resolve_dense_embedding_settings( - self, - *, - index_root: Optional[Path], - ) -> Tuple[str, str, bool]: - """Resolve embedding backend/profile for a dense vector root.""" - - embedding_backend = "litellm" - embedding_model = "qwen3-embedding-sf" - use_gpu = True - loaded_from_root = False - - if index_root is not None: - central_index_db = index_root / "_index.db" - if central_index_db.exists(): - try: - from codexlens.semantic.vector_store import VectorStore - - with VectorStore(central_index_db) as vs: - model_config = vs.get_model_config() - if model_config: - embedding_backend = str( - model_config.get("backend", embedding_backend) - ) - if embedding_backend == "litellm": - embedding_model = str( - model_config.get("model_name", embedding_model) - ) - else: - embedding_model = str( - model_config.get( - "model_profile", - model_config.get("model_name", embedding_model), - ) - ) - loaded_from_root = True - except Exception as exc: - self.logger.debug( - "Failed to read dense embedding config from %s: %s", - central_index_db, - exc, - ) - - if self._config is not None: - if not loaded_from_root: - config_backend = getattr(self._config, "embedding_backend", None) - config_model = getattr(self._config, "embedding_model", None) - if config_backend: - embedding_backend = str(config_backend) - if config_model: - embedding_model = str(config_model) - use_gpu = bool(getattr(self._config, "embedding_use_gpu", True)) - - return embedding_backend, embedding_model, use_gpu - - def _embed_dense_query( - self, - query: str, - *, - index_root: Optional[Path], - query_cache: Optional[Dict[Tuple[str, str, bool], "np.ndarray"]] = None, - ) -> "np.ndarray": - """Embed a query using the model configuration associated with a dense root.""" - - from codexlens.semantic.factory import get_embedder - - embedding_backend, embedding_model, use_gpu = self._resolve_dense_embedding_settings( - index_root=index_root, - ) - cache_key = (embedding_backend, embedding_model, use_gpu) - if query_cache is not None and cache_key in query_cache: - return query_cache[cache_key] - - if embedding_backend == "litellm": - embedder = get_embedder(backend="litellm", model=embedding_model) - else: - embedder = get_embedder( - backend="fastembed", - profile=embedding_model, - use_gpu=use_gpu, - ) - - query_dense = embedder.embed_to_numpy([query])[0] - if query_cache is not None: - query_cache[cache_key] = query_dense - - self.logger.debug( - "Dense query embedding: %d-dim via %s/%s", - int(query_dense.shape[0]), - embedding_backend, - embedding_model, - ) - return query_dense - - def _embed_query_for_binary_searcher( - self, - query: str, - *, - binary_searcher: Any, - query_cache: Optional[Dict[Tuple[str, str, bool], "np.ndarray"]] = None, - ) -> "np.ndarray": - """Embed a query using the model configuration exposed by BinarySearcher.""" - - use_gpu = True - if self._config is not None: - use_gpu = getattr(self._config, "embedding_use_gpu", True) - - query_dense = None - backend = getattr(binary_searcher, "backend", None) - model = getattr(binary_searcher, "model", None) - profile = getattr(binary_searcher, "model_profile", None) or "code" - cache_key = ( - str(backend or "fastembed"), - str(model or profile), - bool(use_gpu), - ) - - if query_cache is not None and cache_key in query_cache: - return query_cache[cache_key] - - if backend == "litellm": - try: - from codexlens.semantic.factory import get_embedder as get_factory_embedder - - embedder = get_factory_embedder( - backend="litellm", - model=model or "code", - ) - query_dense = embedder.embed_to_numpy([query])[0] - except Exception: - query_dense = None - - if query_dense is None: - from codexlens.semantic.embedder import get_embedder - - embedder = get_embedder(profile=str(profile), use_gpu=use_gpu) - query_dense = embedder.embed_to_numpy([query])[0] - - if query_cache is not None: - query_cache[cache_key] = query_dense - - return query_dense - - def _collect_binary_coarse_candidates( - self, - query: str, - index_paths: List[Path], - coarse_k: int, - stats: SearchStats, - *, - index_root: Optional[Path] = None, - allow_dense_fallback: bool = False, - ) -> Tuple[List[Tuple[int, float, Path]], bool, bool, Optional[Path]]: - """Collect coarse candidates from centralized/legacy binary indexes.""" - - try: - from codexlens.indexing.embedding import BinaryEmbeddingBackend - except ImportError as exc: - self.logger.warning( - "BinaryEmbeddingBackend not available: %s", exc - ) - return [], False, False, None - - requested_index_root = ( - Path(index_root).resolve() - if index_root is not None - else (index_paths[0].parent if index_paths else None) - ) - coarse_candidates: List[Tuple[int, float, Path]] = [] - used_centralized = False - using_dense_fallback = False - dense_query_cache: Dict[Tuple[str, str, bool], "np.ndarray"] = {} - binary_roots_with_hits: set[Path] = set() - stage2_index_root: Optional[Path] = None - - binary_root_groups, _ = self._group_index_paths_by_binary_root( - index_paths, - preferred_root=requested_index_root, - ) - for binary_root in binary_root_groups: - binary_searcher = self._get_centralized_binary_searcher(binary_root) - if binary_searcher is None: - continue - try: - query_dense = self._embed_query_for_binary_searcher( - query, - binary_searcher=binary_searcher, - query_cache=dense_query_cache, - ) - results = binary_searcher.search(query_dense, top_k=coarse_k) - for chunk_id, distance in results: - coarse_candidates.append((chunk_id, float(distance), binary_root)) - if results: - used_centralized = True - binary_roots_with_hits.add(binary_root) - self.logger.debug( - "Centralized binary search found %d candidates from %s", - len(results), - binary_root, - ) - except Exception as exc: - self.logger.debug( - "Centralized binary search failed for %s: %s", - binary_root, - exc, - ) - - if len(binary_roots_with_hits) == 1: - stage2_index_root = next(iter(binary_roots_with_hits)) - - if not used_centralized: - has_legacy_binary_vectors = any( - (p.parent / f"{p.stem}_binary_vectors.bin").exists() for p in index_paths - ) - if has_legacy_binary_vectors: - use_gpu = True - if self._config is not None: - use_gpu = getattr(self._config, "embedding_use_gpu", True) - - query_binary = None - try: - binary_backend = BinaryEmbeddingBackend(use_gpu=use_gpu) - query_binary = binary_backend.embed_packed([query])[0] - except Exception as exc: - self.logger.warning(f"Failed to generate binary query embedding: {exc}") - query_binary = None - - if query_binary is not None: - for index_path in index_paths: - try: - binary_index = self._get_or_create_binary_index(index_path) - if binary_index is None or binary_index.count() == 0: - continue - ids, distances = binary_index.search(query_binary, coarse_k) - for chunk_id, dist in zip(ids, distances): - coarse_candidates.append((chunk_id, float(dist), index_path)) - except Exception as exc: - self.logger.debug( - "Binary search failed for %s: %s", index_path, exc - ) - stats.errors.append( - f"Binary search failed for {index_path}: {exc}" - ) - else: - self.logger.debug( - "No legacy binary vector files found; skipping legacy binary search fallback" - ) - - if not coarse_candidates and allow_dense_fallback: - dense_candidates: List[Tuple[int, float, Path]] = [] - dense_roots_with_hits: set[Path] = set() - try: - from codexlens.semantic.ann_index import ANNIndex - - dense_root_groups, dense_fallback_index_paths = self._group_index_paths_by_dense_root(index_paths) - dense_candidate_groups: List[List[Tuple[int, float, Path]]] = [] - dense_roots_by_settings = self._group_dense_roots_by_embedding_settings( - dense_root_groups - ) - if len(dense_roots_by_settings) > 1: - self.logger.debug( - "Stage 1 dense fallback detected %d embedding setting groups; interleaving candidates across groups", - len(dense_roots_by_settings), - ) - for dense_roots in dense_roots_by_settings.values(): - group_candidates: List[Tuple[int, float, Path]] = [] - for dense_root in dense_roots: - try: - query_dense = self._embed_dense_query( - query, - index_root=dense_root, - query_cache=dense_query_cache, - ) - ann_index = self._get_cached_centralized_dense_index( - dense_root, - int(query_dense.shape[0]), - ) - if ann_index is None: - continue - ids, distances = ann_index.search(query_dense, top_k=coarse_k) - for chunk_id, dist in zip(ids, distances): - group_candidates.append((chunk_id, float(dist), dense_root)) - if ids: - dense_roots_with_hits.add(dense_root) - self.logger.debug( - "Stage 1 centralized dense fallback: %d candidates from %s", - len(ids), - dense_root, - ) - except Exception as exc: - self.logger.debug( - "Dense coarse search failed for %s: %s", - dense_root, - exc, - ) - if group_candidates: - dense_candidate_groups.append(group_candidates) - - dense_candidates = self._interleave_dense_candidate_groups( - dense_candidate_groups, - coarse_k, - ) - - fallback_index_paths = dense_fallback_index_paths if dense_root_groups else index_paths - if not dense_candidates: - fallback_candidate_groups: List[List[Tuple[int, float, Path]]] = [] - fallback_index_groups = self._group_dense_index_paths_by_embedding_settings( - fallback_index_paths - ) - if len(fallback_index_groups) > 1: - self.logger.debug( - "Stage 1 legacy dense fallback detected %d embedding setting groups; interleaving candidates across groups", - len(fallback_index_groups), - ) - for grouped_index_paths in fallback_index_groups.values(): - group_candidates = [] - for index_path in grouped_index_paths: - try: - query_dense = self._embed_dense_query( - query, - index_root=index_path.parent, - query_cache=dense_query_cache, - ) - ann_index = self._get_cached_legacy_dense_index( - index_path, - int(query_dense.shape[0]), - ) - if ann_index is None: - continue - ids, distances = ann_index.search(query_dense, top_k=coarse_k) - for chunk_id, dist in zip(ids, distances): - group_candidates.append((chunk_id, float(dist), index_path)) - except Exception as exc: - self.logger.debug( - "Dense coarse search failed for %s: %s", index_path, exc - ) - if group_candidates: - fallback_candidate_groups.append(group_candidates) - - dense_candidates = self._interleave_dense_candidate_groups( - fallback_candidate_groups, - coarse_k, - ) - except Exception as exc: - self.logger.debug("Dense coarse search fallback unavailable: %s", exc) - dense_candidates = [] - - if dense_candidates: - if stage2_index_root is None and len(dense_roots_with_hits) == 1: - stage2_index_root = next(iter(dense_roots_with_hits)) - coarse_candidates = dense_candidates - using_dense_fallback = True - - if coarse_candidates: - if using_dense_fallback: - coarse_candidates = coarse_candidates[:coarse_k] - else: - coarse_candidates.sort(key=lambda x: x[1]) - coarse_candidates = coarse_candidates[:coarse_k] - - return coarse_candidates, used_centralized, using_dense_fallback, stage2_index_root - - def _materialize_binary_candidates( - self, - coarse_candidates: List[Tuple[int, float, Path]], - stats: SearchStats, - *, - stage2_index_root: Optional[Path] = None, - using_dense_fallback: bool = False, - ) -> List[SearchResult]: - """Fetch chunk payloads for coarse binary/dense-fallback candidates.""" - - if not coarse_candidates: - return [] - - coarse_results: List[Tuple[int, SearchResult]] = [] - candidates_by_index: Dict[Path, List[int]] = {} - candidate_order: Dict[Tuple[Path, int], int] = {} - for chunk_id, _, idx_path in coarse_candidates: - if idx_path not in candidates_by_index: - candidates_by_index[idx_path] = [] - candidates_by_index[idx_path].append(chunk_id) - candidate_order.setdefault((idx_path, int(chunk_id)), len(candidate_order)) - - import sqlite3 - - central_meta_store = None - central_meta_path = stage2_index_root / VECTORS_META_DB_NAME if stage2_index_root else None - if central_meta_path and central_meta_path.exists(): - central_meta_store = VectorMetadataStore(central_meta_path) - - for idx_path, chunk_ids in candidates_by_index.items(): - try: - chunks_data = [] - if central_meta_store is not None and stage2_index_root is not None and idx_path == stage2_index_root: - chunks_data = central_meta_store.get_chunks_by_ids(chunk_ids) - - if not chunks_data and idx_path.name != "_index.db": - meta_db_path = idx_path / VECTORS_META_DB_NAME - if meta_db_path.exists(): - meta_store = VectorMetadataStore(meta_db_path) - chunks_data = meta_store.get_chunks_by_ids(chunk_ids) - - if not chunks_data: - try: - conn = sqlite3.connect(str(idx_path)) - conn.row_factory = sqlite3.Row - placeholders = ",".join("?" * len(chunk_ids)) - cursor = conn.execute( - f""" - SELECT id, file_path, content, metadata, category - FROM semantic_chunks - WHERE id IN ({placeholders}) - """, - chunk_ids, - ) - chunks_data = [ - { - "id": row["id"], - "file_path": row["file_path"], - "content": row["content"], - "metadata": row["metadata"], - "category": row["category"], - } - for row in cursor.fetchall() - ] - conn.close() - except Exception: - chunks_data = [] - - for chunk in chunks_data: - chunk_id = chunk.get("id") or chunk.get("chunk_id") - distance = next( - ( - d - for cid, d, candidate_idx_path in coarse_candidates - if cid == chunk_id and candidate_idx_path == idx_path - ), - 256, - ) - if using_dense_fallback: - score = max(0.0, 1.0 - float(distance)) - else: - score = 1.0 - (float(distance) / 256.0) - - content = chunk.get("content", "") - metadata = chunk.get("metadata") - symbol_name = None - symbol_kind = None - start_line = chunk.get("start_line") - end_line = chunk.get("end_line") - if metadata: - try: - meta_dict = json.loads(metadata) if isinstance(metadata, str) else metadata - symbol_name = meta_dict.get("symbol_name") - symbol_kind = meta_dict.get("symbol_kind") - start_line = meta_dict.get("start_line", start_line) - end_line = meta_dict.get("end_line", end_line) - except Exception: - pass - - coarse_results.append( - ( - candidate_order.get((idx_path, int(chunk_id)), len(candidate_order)), - SearchResult( - path=chunk.get("file_path", ""), - score=float(score), - excerpt=content[:500] if content else "", - content=content, - symbol_name=symbol_name, - symbol_kind=symbol_kind, - start_line=start_line, - end_line=end_line, - ), - ) - ) - except Exception as exc: - self.logger.debug( - "Failed to retrieve chunks from %s: %s", idx_path, exc - ) - stats.errors.append( - f"Stage 1 chunk retrieval failed for {idx_path}: {exc}" - ) - - coarse_results.sort(key=lambda item: item[0]) - return [result for _, result in coarse_results] - - def _compute_cosine_similarity( - self, - query_vec: "np.ndarray", - doc_vec: "np.ndarray", - ) -> float: - """Compute cosine similarity between query and document vectors. - - Args: - query_vec: Query embedding vector - doc_vec: Document embedding vector - - Returns: - Cosine similarity score in range [-1, 1] - """ - if not NUMPY_AVAILABLE: - return 0.0 - - # Ensure same shape - min_len = min(len(query_vec), len(doc_vec)) - q = query_vec[:min_len] - d = doc_vec[:min_len] - - # Compute cosine similarity - dot_product = np.dot(q, d) - norm_q = np.linalg.norm(q) - norm_d = np.linalg.norm(d) - - if norm_q == 0 or norm_d == 0: - return 0.0 - - return float(dot_product / (norm_q * norm_d)) - - def _compute_cosine_similarity_batch( - self, - query_vec: "np.ndarray", - doc_matrix: "np.ndarray", - ) -> "np.ndarray": - """Compute cosine similarity between query and multiple document vectors. - - Uses vectorized matrix operations for efficient batch computation. - - Args: - query_vec: Query embedding vector of shape (dim,) - doc_matrix: Document embeddings matrix of shape (n_docs, dim) - - Returns: - Array of cosine similarity scores of shape (n_docs,) - """ - if not NUMPY_AVAILABLE: - return np.zeros(doc_matrix.shape[0]) - - # Ensure query is 1D - if query_vec.ndim > 1: - query_vec = query_vec.flatten() - - # Handle dimension mismatch by truncating to smaller dimension - min_dim = min(len(query_vec), doc_matrix.shape[1]) - q = query_vec[:min_dim] - docs = doc_matrix[:, :min_dim] - - # Compute query norm once - norm_q = np.linalg.norm(q) - if norm_q == 0: - return np.zeros(docs.shape[0]) - - # Normalize query - q_normalized = q / norm_q - - # Compute document norms (vectorized) - doc_norms = np.linalg.norm(docs, axis=1) - - # Avoid division by zero - nonzero_mask = doc_norms > 0 - scores = np.zeros(docs.shape[0], dtype=np.float32) - - if np.any(nonzero_mask): - # Normalize documents with non-zero norms - docs_normalized = docs[nonzero_mask] / doc_norms[nonzero_mask, np.newaxis] - - # Batch dot product: (n_docs, dim) @ (dim,) = (n_docs,) - scores[nonzero_mask] = docs_normalized @ q_normalized - - return scores - - def _build_results_from_candidates( - self, - candidates: List[Tuple[int, int, Path]], - index_paths: List[Path], - stats: SearchStats, - query: str, - start_time: float, - use_centralized: bool = False, - ) -> ChainSearchResult: - """Build ChainSearchResult from binary candidates using Hamming distance scores. - - Used as fallback when dense embeddings are not available. - - Args: - candidates: List of (chunk_id, hamming_distance, index_path) tuples - index_paths: List of all searched index paths - stats: SearchStats to update - query: Original query string - start_time: Search start time for timing - use_centralized: If True, index_path is the index_root directory - and VectorMetadataStore should be used instead of SQLiteStore - - Returns: - ChainSearchResult with results scored by Hamming distance - """ - results: List[SearchResult] = [] - - # Group by index path - candidates_by_index: Dict[Path, List[Tuple[int, int]]] = {} - for chunk_id, distance, index_path in candidates: - if index_path not in candidates_by_index: - candidates_by_index[index_path] = [] - candidates_by_index[index_path].append((chunk_id, distance)) - - for index_path, chunk_tuples in candidates_by_index.items(): - try: - chunk_ids = [c[0] for c in chunk_tuples] - - # Use VectorMetadataStore for centralized search, SQLiteStore for per-directory - if use_centralized: - # index_path is actually index_root directory for centralized search - meta_db_path = index_path / VECTORS_META_DB_NAME - if not meta_db_path.exists(): - self.logger.debug( - "VectorMetadataStore not found at %s, skipping", meta_db_path - ) - continue - meta_store = VectorMetadataStore(meta_db_path) - chunks_data = meta_store.get_chunks_by_ids(chunk_ids) - else: - store = SQLiteStore(index_path) - chunks_data = store.get_chunks_by_ids(chunk_ids) - - chunk_content: Dict[int, Dict[str, Any]] = { - c["id"]: c for c in chunks_data - } - - for chunk_id, distance in chunk_tuples: - chunk_info = chunk_content.get(chunk_id) - if chunk_info is None: - continue - - # Convert Hamming distance to score (lower distance = higher score) - # Max Hamming distance for 256-bit is 256 - score = 1.0 - (distance / 256.0) - - excerpt = chunk_info.get("content", "")[:500] - result = SearchResult( - path=chunk_info.get("file_path", ""), - score=float(score), - excerpt=excerpt, - ) - results.append(result) - - except Exception as exc: - self.logger.debug( - "Failed to build results from %s: %s", index_path, exc - ) - - # Deduplicate by path - path_to_result: Dict[str, SearchResult] = {} - for result in results: - if result.path not in path_to_result or result.score > path_to_result[result.path].score: - path_to_result[result.path] = result - - final_results = sorted( - path_to_result.values(), - key=lambda r: r.score, - reverse=True, - ) - - stats.files_matched = len(final_results) - stats.time_ms = (time.time() - start_time) * 1000 - - return ChainSearchResult( - query=query, - results=final_results, - symbols=[], - stats=stats, - ) - - def _cross_encoder_rerank( - self, - query: str, - results: List[SearchResult], - top_k: int, - ) -> List[SearchResult]: - """Rerank results using cross-encoder model. - - Args: - query: Search query string - results: Candidate results to rerank - top_k: Number of top results to return - - Returns: - Reranked results sorted by cross-encoder score - """ - if not results: - return [] - - # Collapse duplicate chunks from the same file before reranking. - # Otherwise, untouched tail chunks can overwrite reranked chunks for the - # same path during the later path-level deduplication step. - path_to_result: Dict[str, SearchResult] = {} - for result in results: - path = result.path - if path not in path_to_result or result.score > path_to_result[path].score: - path_to_result[path] = result - if len(path_to_result) != len(results): - self.logger.debug( - "Deduplicated rerank candidates by path: %d -> %d", - len(results), - len(path_to_result), - ) - results = sorted( - path_to_result.values(), - key=lambda item: float(item.score), - reverse=True, - ) - - reranker = self._get_cached_reranker() - if reranker is None: - return results[:top_k] - - # Use cross_encoder_rerank from ranking module - from codexlens.search.ranking import cross_encoder_rerank - - # Get chunk_type weights and test_file_penalty from config - chunk_type_weights = None - test_file_penalty = 0.0 - - if self._config is not None: - chunk_type_weights = getattr(self._config, "reranker_chunk_type_weights", None) - test_file_penalty = getattr(self._config, "reranker_test_file_penalty", 0.0) - - return cross_encoder_rerank( - query=query, - results=results, - reranker=reranker, - top_k=top_k, - batch_size=32, - chunk_type_weights=chunk_type_weights, - test_file_penalty=test_file_penalty, - ) - - def search_files_only(self, query: str, - source_path: Path, - options: Optional[SearchOptions] = None) -> List[str]: - """Search and return only matching file paths. - - Faster than full search when excerpts are not needed. - - Args: - query: FTS5 search query string - source_path: Starting directory path - options: Search configuration (uses defaults if None) - - Returns: - List of file paths as strings - - Examples: - >>> engine = ChainSearchEngine(registry, mapper) - >>> paths = engine.search_files_only("TODO", Path("D:/project")) - >>> print(f"Found {len(paths)} files with TODOs") - """ - options = options or SearchOptions() - options.files_only = True - - result = self.search(query, source_path, options) - return [r.path for r in result.results] - - def search_symbols(self, name: str, - source_path: Path, - kind: Optional[str] = None, - options: Optional[SearchOptions] = None) -> List[Symbol]: - """Chain symbol search across directory hierarchy. - - Args: - name: Symbol name pattern (partial match supported) - source_path: Starting directory path - kind: Optional symbol kind filter (e.g., 'function', 'class') - options: Search configuration (uses defaults if None) - - Returns: - List of Symbol objects sorted by name - - Examples: - >>> engine = ChainSearchEngine(registry, mapper) - >>> funcs = engine.search_symbols("init", Path("D:/project"), kind="function") - >>> for sym in funcs[:10]: - ... print(f"{sym.name} ({sym.kind}): lines {sym.range}") - """ - options = options or SearchOptions() - - start_index = self._find_start_index(source_path) - if not start_index: - self.logger.warning(f"No index found for {source_path}") - return [] - - # Fast path: project-wide global symbol index (avoids chain traversal). - if self._config is None or getattr(self._config, "global_symbol_index_enabled", True): - try: - # Avoid relying on index_to_source() here; use the same logic as _find_start_index - # to determine the effective search root directory. - search_root = source_path.resolve() - exact_index = self.mapper.source_to_index_db(search_root) - if not exact_index.exists(): - nearest = self.registry.find_nearest_index(search_root) - if nearest: - search_root = nearest.source_path - - project = self.registry.find_by_source_path(str(search_root)) - if project: - global_db_path = Path(project["index_root"]) / GlobalSymbolIndex.DEFAULT_DB_NAME - if global_db_path.exists(): - query_limit = max(int(options.total_limit) * 10, int(options.total_limit)) - with GlobalSymbolIndex(global_db_path, project_id=int(project["id"])) as global_index: - candidates = global_index.search(name=name, kind=kind, limit=query_limit) - - # Apply depth constraint relative to the start index directory. - filtered: List[Symbol] = [] - for sym in candidates: - if not sym.file: - continue - try: - root_str = str(search_root) - file_dir_str = str(Path(sym.file).parent) - - # Normalize Windows long-path prefix (\\?\) if present. - if root_str.startswith("\\\\?\\"): - root_str = root_str[4:] - if file_dir_str.startswith("\\\\?\\"): - file_dir_str = file_dir_str[4:] - - root_cmp = root_str.lower().rstrip("\\/") - dir_cmp = file_dir_str.lower().rstrip("\\/") - - # Guard against Windows cross-drive comparisons (ValueError). - if os.name == "nt": - root_drive, _ = os.path.splitdrive(root_cmp) - dir_drive, _ = os.path.splitdrive(dir_cmp) - if root_drive and dir_drive and root_drive != dir_drive: - self.logger.debug( - "Skipping symbol due to cross-drive path (root=%s file=%s name=%s)", - root_cmp, - sym.file, - sym.name, - ) - continue - - if os.path.commonpath([root_cmp, dir_cmp]) != root_cmp: - continue - - rel = os.path.relpath(dir_cmp, root_cmp) - rel_depth = 0 if rel == "." else len(rel.split(os.sep)) - except ValueError as exc: - self.logger.debug( - "Skipping symbol due to path operation failure (root=%s file=%s name=%s): %s", - str(search_root), - sym.file, - sym.name, - exc, - ) - continue - except Exception as exc: - self.logger.debug( - "Skipping symbol due to unexpected path error (root=%s file=%s name=%s): %s", - str(search_root), - sym.file, - sym.name, - exc, - ) - continue - - if options.depth >= 0 and rel_depth > options.depth: - continue - filtered.append(sym) - - if filtered: - # Match existing semantics: dedupe by (name, kind, range), sort by name. - seen = set() - unique_symbols: List[Symbol] = [] - for sym in filtered: - key = (sym.name, sym.kind, sym.range) - if key in seen: - continue - seen.add(key) - unique_symbols.append(sym) - unique_symbols.sort(key=lambda s: s.name) - return unique_symbols[: options.total_limit] - except Exception as exc: - self.logger.debug("Global symbol index fast path failed: %s", exc) - - index_paths = self._collect_index_paths(start_index, options.depth) - if not index_paths: - return [] - - return self._search_symbols_parallel( - index_paths, name, kind, options.total_limit - ) - - def search_references( - self, - symbol_name: str, - source_path: Optional[Path] = None, - depth: int = -1, - limit: int = 100, - ) -> List[ReferenceResult]: - """Find all references to a symbol across the project. - - Searches the code_relationships table in all index databases to find - where the given symbol is referenced (called, imported, inherited, etc.). - - Args: - symbol_name: Fully qualified or simple name of the symbol to find references to - source_path: Starting path for search (default: workspace root from registry) - depth: Search depth (-1 = unlimited, 0 = current dir only) - limit: Maximum results to return (default 100) - - Returns: - List of ReferenceResult objects sorted by file path and line number - - Examples: - >>> engine = ChainSearchEngine(registry, mapper) - >>> refs = engine.search_references("authenticate", Path("D:/project/src")) - >>> for ref in refs[:10]: - ... print(f"{ref.file_path}:{ref.line} ({ref.relationship_type})") - """ - import sqlite3 - from concurrent.futures import as_completed - - # Determine starting path - if source_path is None: - # Try to get workspace root from registry - mappings = self.registry.list_mappings() - if mappings: - source_path = Path(mappings[0].source_path) - else: - self.logger.warning("No source path provided and no mappings in registry") - return [] - - # Find starting index - start_index = self._find_start_index(source_path) - if not start_index: - self.logger.warning(f"No index found for {source_path}") - return [] - - # Collect all index paths - index_paths = self._collect_index_paths(start_index, depth) - if not index_paths: - self.logger.debug(f"No indexes collected from {start_index}") - return [] - - self.logger.debug( - "Searching %d indexes for references to '%s'", - len(index_paths), symbol_name - ) - - # Search in parallel - all_results: List[ReferenceResult] = [] - executor = self._get_executor() - - def search_single_index(index_path: Path) -> List[ReferenceResult]: - """Search a single index for references.""" - results: List[ReferenceResult] = [] - try: - conn = sqlite3.connect(str(index_path), check_same_thread=False) - conn.row_factory = sqlite3.Row - - # Query code_relationships for references to this symbol - # Match either target_qualified_name containing the symbol name - # or an exact match on the last component - # Try full_path first (new schema), fallback to path (old schema) - try: - rows = conn.execute( - """ - SELECT DISTINCT - f.full_path as source_file, - cr.source_line, - cr.relationship_type, - f.content - FROM code_relationships cr - JOIN symbols s ON s.id = cr.source_symbol_id - JOIN files f ON f.id = s.file_id - WHERE cr.target_qualified_name LIKE ? - OR cr.target_qualified_name LIKE ? - OR cr.target_qualified_name = ? - ORDER BY f.full_path, cr.source_line - LIMIT ? - """, - ( - f"%{symbol_name}", # Ends with symbol name - f"%.{symbol_name}", # Qualified name ending with .symbol_name - symbol_name, # Exact match - limit, - ) - ).fetchall() - except sqlite3.OperationalError: - # Fallback for old schema with 'path' column - rows = conn.execute( - """ - SELECT DISTINCT - f.path as source_file, - cr.source_line, - cr.relationship_type, - f.content - FROM code_relationships cr - JOIN symbols s ON s.id = cr.source_symbol_id - JOIN files f ON f.id = s.file_id - WHERE cr.target_qualified_name LIKE ? - OR cr.target_qualified_name LIKE ? - OR cr.target_qualified_name = ? - ORDER BY f.path, cr.source_line - LIMIT ? - """, - ( - f"%{symbol_name}", # Ends with symbol name - f"%.{symbol_name}", # Qualified name ending with .symbol_name - symbol_name, # Exact match - limit, - ) - ).fetchall() - - for row in rows: - file_path = row["source_file"] - line = row["source_line"] or 1 - rel_type = row["relationship_type"] - content = row["content"] or "" - - # Extract context (3 lines around reference) - context = self._extract_context(content, line, context_lines=3) - - results.append(ReferenceResult( - file_path=file_path, - line=line, - column=0, # Column info not stored in code_relationships - context=context, - relationship_type=rel_type, - )) - - conn.close() - except sqlite3.DatabaseError as exc: - self.logger.debug( - "Failed to search references in %s: %s", index_path, exc - ) - except Exception as exc: - self.logger.debug( - "Unexpected error searching references in %s: %s", index_path, exc - ) - - return results - - # Submit parallel searches - futures = { - executor.submit(search_single_index, idx_path): idx_path - for idx_path in index_paths - } - - for future in as_completed(futures): - try: - results = future.result() - all_results.extend(results) - except Exception as exc: - idx_path = futures[future] - self.logger.debug( - "Reference search failed for %s: %s", idx_path, exc - ) - - # Deduplicate by (file_path, line) - seen: set = set() - unique_results: List[ReferenceResult] = [] - for ref in all_results: - key = (ref.file_path, ref.line) - if key not in seen: - seen.add(key) - unique_results.append(ref) - - # Sort by file path and line - unique_results.sort(key=lambda r: (r.file_path, r.line)) - - # Apply limit - return unique_results[:limit] - - def _extract_context( - self, - content: str, - line: int, - context_lines: int = 3 - ) -> str: - """Extract lines around a given line number from file content. - - Args: - content: Full file content - line: Target line number (1-based) - context_lines: Number of lines to include before and after - - Returns: - Context snippet as a string - """ - if not content: - return "" - - lines = content.splitlines() - total_lines = len(lines) - - if line < 1 or line > total_lines: - return "" - - # Calculate range (0-indexed internally) - start = max(0, line - 1 - context_lines) - end = min(total_lines, line + context_lines) - - context = lines[start:end] - return "\n".join(context) - - # === Internal Methods === - - def _find_start_index(self, source_path: Path) -> Optional[Path]: - """Find index database path for source directory. - - Attempts exact match first, then searches for nearest ancestor index. - - Args: - source_path: Source directory path - - Returns: - Path to _index.db file, or None if not found - """ - source_path = source_path.resolve() - - # Try exact match first - exact_index = self.mapper.source_to_index_db(source_path) - if exact_index.exists(): - self.logger.debug(f"Found exact index: {exact_index}") - return exact_index - - # Try nearest ancestor via registry - nearest = self.registry.find_nearest_index(source_path) - if nearest: - self.logger.debug(f"Found nearest index: {nearest.index_path}") - return nearest.index_path - - self.logger.warning(f"No index found for {source_path}") - return None - - def _collect_index_paths(self, start_index: Path, - depth: int) -> List[Path]: - """Recursively collect all subdirectory index paths. - - Traverses directory tree via subdirs table in each _index.db, - respecting depth limit. - - Args: - start_index: Starting _index.db path - depth: Maximum depth (-1 = unlimited, 0 = current only) - - Returns: - List of _index.db paths to search - """ - collected = [] - visited = set() - scan_root = start_index.parent.resolve() - try: - scan_source_root = self.mapper.index_to_source(start_index) - except Exception: - scan_source_root = None - - def _collect_recursive(index_path: Path, current_depth: int): - # Normalize path to avoid duplicates - normalized = index_path.resolve() - if normalized in visited: - return - visited.add(normalized) - - if is_ignored_index_path(normalized, scan_root): - self.logger.debug("Skipping ignored artifact index subtree: %s", normalized) - return - - # Add current index - if normalized.exists(): - collected.append(normalized) - else: - self.logger.debug(f"Index does not exist: {normalized}") - return - - # Check depth limit - if depth >= 0 and current_depth >= depth: - return - - # Read subdirs and recurse - try: - with DirIndexStore(normalized) as store: - subdirs = store.get_subdirs() - for subdir in subdirs: - _collect_recursive(subdir.index_path, current_depth + 1) - except Exception as exc: - self.logger.warning(f"Failed to read subdirs from {normalized}: {exc}") - - _collect_recursive(start_index, 0) - - if scan_source_root is not None: - try: - descendant_roots = self.registry.find_descendant_project_roots( - scan_source_root - ) - except Exception as exc: - descendant_roots = [] - self.logger.debug( - "Failed to query descendant project roots for %s: %s", - scan_source_root, - exc, - ) - - for mapping in descendant_roots: - try: - relative_depth = len( - mapping.source_path.resolve().relative_to( - scan_source_root.resolve() - ).parts - ) - except ValueError: - continue - if depth >= 0 and relative_depth > depth: - continue - _collect_recursive(mapping.index_path, relative_depth) - - self.logger.info(f"Collected {len(collected)} indexes (depth={depth})") - return collected - - def _search_parallel(self, index_paths: List[Path], - query: str, - options: SearchOptions) -> tuple[List[SearchResult], SearchStats]: - """Search multiple indexes in parallel using shared ThreadPoolExecutor. - - Args: - index_paths: List of _index.db paths to search - query: FTS5 query string - options: Search configuration - - Returns: - Tuple of (all results, search statistics) - """ - all_results = [] - stats = SearchStats() - - # Force single-threaded execution for vector/hybrid search to avoid GPU crashes - # DirectML/ONNX have threading issues when multiple threads access GPU resources - effective_workers = options.max_workers - if options.enable_vector or options.hybrid_mode: - effective_workers = 1 - self.logger.debug("Using single-threaded mode for vector search (GPU safety)") - # Pre-load embedder to avoid initialization overhead per-search - try: - from codexlens.semantic.factory import get_embedder as get_embedder_factory - - embedding_backend = "fastembed" - embedding_model = "code" - use_gpu = True - if self._config is not None: - embedding_backend = getattr(self._config, "embedding_backend", embedding_backend) or embedding_backend - embedding_model = getattr(self._config, "embedding_model", embedding_model) or embedding_model - use_gpu = bool(getattr(self._config, "embedding_use_gpu", use_gpu)) - - if embedding_backend == "litellm": - get_embedder_factory(backend="litellm", model=embedding_model) - else: - get_embedder_factory(backend="fastembed", profile=embedding_model, use_gpu=use_gpu) - except Exception: - pass # Ignore pre-load failures - - shared_hybrid_engine = None - if options.hybrid_mode: - shared_hybrid_engine = HybridSearchEngine( - weights=options.hybrid_weights, - config=self._config, - ) - - executor = self._get_executor(effective_workers) - # Submit all search tasks - future_to_path = { - executor.submit( - self._search_single_index, - idx_path, - query, - options.limit_per_dir, - options.files_only, - options.include_semantic, - options.hybrid_mode, - options.enable_fuzzy, - options.enable_vector, - options.pure_vector, - options.hybrid_weights, - shared_hybrid_engine, - ): idx_path - for idx_path in index_paths - } - - # Collect results as they complete - for future in as_completed(future_to_path): - idx_path = future_to_path[future] - try: - results = future.result() - all_results.extend(results) - self.logger.debug(f"Got {len(results)} results from {idx_path.parent.name}") - except Exception as exc: - error_msg = f"Search failed for {idx_path}: {exc}" - self.logger.error(error_msg) - stats.errors.append(error_msg) - - return all_results, stats - - def _search_single_index(self, index_path: Path, - query: str, - limit: int, - files_only: bool = False, - include_semantic: bool = False, - hybrid_mode: bool = False, - enable_fuzzy: bool = True, - enable_vector: bool = False, - pure_vector: bool = False, - hybrid_weights: Optional[Dict[str, float]] = None, - hybrid_engine: Optional[HybridSearchEngine] = None) -> List[SearchResult]: - """Search a single index database. - - Handles exceptions gracefully, returning empty list on failure. - - Args: - index_path: Path to _index.db file - query: FTS5 query string (for FTS) or natural language query (for vector) - limit: Maximum results from this index - files_only: If True, skip snippet generation for faster search - include_semantic: If True, also search semantic keywords and merge results - hybrid_mode: If True, use hybrid search with RRF fusion - enable_fuzzy: Enable fuzzy FTS in hybrid mode - enable_vector: Enable vector semantic search - pure_vector: If True, only use vector search without FTS fallback - hybrid_weights: Custom RRF weights for hybrid search - - Returns: - List of SearchResult objects (empty on error) - """ - try: - # Use hybrid search if enabled - if hybrid_mode: - engine = hybrid_engine or HybridSearchEngine( - weights=hybrid_weights, - config=self._config, - ) - fts_results = engine.search( - index_path, - query, - limit=limit, - enable_fuzzy=enable_fuzzy, - enable_vector=enable_vector, - pure_vector=pure_vector, - ) - else: - # Single-FTS search (exact or fuzzy mode) - with DirIndexStore(index_path) as store: - # Get FTS results - if files_only: - # Fast path: return paths only without snippets - paths = store.search_files_only(query, limit=limit) - fts_results = [SearchResult(path=p, score=0.0, excerpt="") for p in paths] - else: - # Use fuzzy FTS if enable_fuzzy=True (mode="fuzzy"), otherwise exact FTS - if enable_fuzzy: - fts_results = store.search_fts_fuzzy( - query, limit=limit, return_full_content=True - ) - else: - fts_results = store.search_fts_exact( - query, limit=limit, return_full_content=True - ) - - # Optionally add semantic keyword results - if include_semantic: - try: - semantic_matches = store.search_semantic_keywords(query) - # Convert semantic matches to SearchResult with 0.8x weight - for file_entry, keywords in semantic_matches: - # Create excerpt from keywords - excerpt = f"Keywords: {', '.join(keywords[:5])}" - # Use a base score of 10.0 for semantic matches, weighted by 0.8 - semantic_result = SearchResult( - path=str(file_entry.full_path), - score=10.0 * 0.8, - excerpt=excerpt - ) - fts_results.append(semantic_result) - except Exception as sem_exc: - self.logger.debug(f"Semantic search error in {index_path}: {sem_exc}") - - return fts_results - except Exception as exc: - self.logger.debug(f"Search error in {index_path}: {exc}") - return [] - - def _filter_by_extension(self, results: List[SearchResult], - code_only: bool = False, - exclude_extensions: Optional[List[str]] = None) -> List[SearchResult]: - """Filter search results by file extension. - - Args: - results: Search results to filter - code_only: If True, exclude non-code files (md, txt, json, yaml, xml, etc.) - exclude_extensions: List of extensions to exclude (e.g., ["md", "txt"]) - - Returns: - Filtered results - """ - # Non-code file extensions (same as MCP tool smart-search.ts) - NON_CODE_EXTENSIONS = { - 'md', 'txt', 'json', 'yaml', 'yml', 'xml', 'csv', 'log', - 'ini', 'cfg', 'conf', 'toml', 'env', 'properties', - 'html', 'htm', 'svg', 'png', 'jpg', 'jpeg', 'gif', 'ico', 'webp', - 'pdf', 'doc', 'docx', 'xls', 'xlsx', 'ppt', 'pptx', - 'lock', 'sum', 'mod', - } - - # Build exclusion set - excluded_exts = set() - if exclude_extensions: - # Normalize extensions (remove leading dots, lowercase) - excluded_exts = {ext.lower().lstrip('.') for ext in exclude_extensions} - if code_only: - excluded_exts.update(NON_CODE_EXTENSIONS) - - if not excluded_exts: - return results - - # Filter results - filtered = [] - for result in results: - path_str = result.path - if not path_str: - continue - - # Extract extension from path - if '.' in path_str: - ext = path_str.rsplit('.', 1)[-1].lower() - if ext in excluded_exts: - continue # Skip this result - - filtered.append(result) - - return filtered - - def _merge_and_rank(self, results: List[SearchResult], - limit: int, offset: int = 0, query: Optional[str] = None) -> List[SearchResult]: - """Aggregate, deduplicate, and rank results. - - Process: - 1. Deduplicate by path (keep highest score) - 2. Sort by score descending - 3. Apply offset and limit for pagination - - Args: - results: Raw results from all indexes - limit: Maximum results to return - offset: Number of results to skip (pagination offset) - - Returns: - Deduplicated and ranked results with pagination - """ - # Deduplicate by path, keeping best score - path_to_result: Dict[str, SearchResult] = {} - for result in results: - path = result.path - if path not in path_to_result or result.score > path_to_result[path].score: - path_to_result[path] = result - - unique_results = list(path_to_result.values()) - if query: - unique_results = self._apply_default_path_penalties(query, unique_results) - else: - unique_results.sort(key=lambda r: r.score, reverse=True) - - # Apply offset and limit for pagination - return unique_results[offset:offset + limit] - - def _apply_default_path_penalties( - self, - query: str, - results: List[SearchResult], - ) -> List[SearchResult]: - """Apply default path penalties for noisy test and generated artifact results.""" - if not results: - return results - - test_penalty = 0.15 - generated_penalty = 0.35 - if self._config is not None: - test_penalty = float( - getattr(self._config, "test_file_penalty", test_penalty) or 0.0 - ) - generated_penalty = float( - getattr( - self._config, - "generated_file_penalty", - generated_penalty, - ) - or 0.0 - ) - if test_penalty <= 0 and generated_penalty <= 0: - return sorted(results, key=lambda r: r.score, reverse=True) - - from codexlens.search.ranking import ( - apply_path_penalties, - rebalance_noisy_results, - ) - - penalized = apply_path_penalties( - results, - query, - test_file_penalty=test_penalty, - generated_file_penalty=generated_penalty, - ) - return rebalance_noisy_results(penalized, query) - - def _resolve_rerank_candidate_limit( - self, - requested_k: int, - candidate_count: int, - ) -> int: - """Return the cross-encoder rerank budget before final trimming.""" - if candidate_count <= 0: - return max(1, int(requested_k or 1)) - - rerank_limit = max(1, int(requested_k or 1)) - if self._config is not None: - for attr_name in ("reranker_top_k", "reranking_top_k"): - configured_value = getattr(self._config, attr_name, None) - if isinstance(configured_value, bool): - continue - if isinstance(configured_value, (int, float)): - rerank_limit = max(rerank_limit, int(configured_value)) - - return max(1, min(candidate_count, rerank_limit)) - - def _resolve_stage3_target_count( - self, - requested_k: int, - candidate_count: int, - ) -> int: - """Return the number of Stage 3 representatives to preserve.""" - base_target = max(1, int(requested_k or 1)) * 2 - target_count = base_target - if self._config is not None and getattr( - self._config, - "enable_staged_rerank", - False, - ): - target_count = max( - target_count, - self._resolve_rerank_candidate_limit(requested_k, candidate_count), - ) - - return max(1, min(candidate_count, target_count)) - - def _search_symbols_parallel(self, index_paths: List[Path], - name: str, - kind: Optional[str], - limit: int) -> List[Symbol]: - """Search symbols across multiple indexes in parallel. - - Args: - index_paths: List of _index.db paths to search - name: Symbol name pattern - kind: Optional symbol kind filter - limit: Total symbol limit - - Returns: - Deduplicated and sorted symbols - """ - all_symbols = [] - - executor = self._get_executor() - # Submit all symbol search tasks - future_to_path = { - executor.submit( - self._search_symbols_single, - idx_path, - name, - kind - ): idx_path - for idx_path in index_paths - } - - # Collect results - for future in as_completed(future_to_path): - try: - symbols = future.result() - all_symbols.extend(symbols) - except Exception as exc: - self.logger.error(f"Symbol search failed: {exc}") - - # Deduplicate by (name, kind, range) - seen = set() - unique_symbols = [] - for sym in all_symbols: - key = (sym.name, sym.kind, sym.range) - if key not in seen: - seen.add(key) - unique_symbols.append(sym) - - # Sort by name - unique_symbols.sort(key=lambda s: s.name) - - return unique_symbols[:limit] - - def _search_symbols_single(self, index_path: Path, - name: str, - kind: Optional[str]) -> List[Symbol]: - """Search symbols in a single index. - - Args: - index_path: Path to _index.db file - name: Symbol name pattern - kind: Optional symbol kind filter - - Returns: - List of Symbol objects (empty on error) - """ - try: - with DirIndexStore(index_path) as store: - return store.search_symbols(name, kind=kind) - except Exception as exc: - self.logger.debug(f"Symbol search error in {index_path}: {exc}") - return [] - - -# === Convenience Functions === - -def quick_search(query: str, - source_path: Path, - depth: int = -1) -> List[SearchResult]: - """Quick search convenience function with automatic initialization. - - Creates temporary registry and mapper instances for one-off searches. - For repeated searches, create a ChainSearchEngine instance directly. - - Args: - query: FTS5 search query string - source_path: Starting directory path - depth: Maximum search depth (-1 = unlimited) - - Returns: - List of SearchResult objects sorted by relevance - - Examples: - >>> from pathlib import Path - >>> results = quick_search("authentication", Path("D:/project/src")) - >>> print(f"Found {len(results)} matches") - """ - registry = RegistryStore() - registry.initialize() - - mapper = PathMapper() - - with ChainSearchEngine(registry, mapper) as engine: - options = SearchOptions(depth=depth) - result = engine.search(query, source_path, options) - - registry.close() - - return result.results diff --git a/codex-lens/src/codexlens/search/clustering/__init__.py b/codex-lens/src/codexlens/search/clustering/__init__.py deleted file mode 100644 index d8161c98..00000000 --- a/codex-lens/src/codexlens/search/clustering/__init__.py +++ /dev/null @@ -1,124 +0,0 @@ -"""Clustering strategies for the staged hybrid search pipeline. - -This module provides extensible clustering infrastructure for grouping -similar search results and selecting representative results. - -Install with: pip install codexlens[clustering] - -Example: - >>> from codexlens.search.clustering import ( - ... CLUSTERING_AVAILABLE, - ... ClusteringConfig, - ... get_strategy, - ... ) - >>> config = ClusteringConfig(min_cluster_size=3) - >>> # Auto-select best available strategy with fallback - >>> strategy = get_strategy("auto", config) - >>> representatives = strategy.fit_predict(embeddings, results) - >>> - >>> # Or explicitly use a specific strategy - >>> if CLUSTERING_AVAILABLE: - ... from codexlens.search.clustering import HDBSCANStrategy - ... strategy = HDBSCANStrategy(config) - ... representatives = strategy.fit_predict(embeddings, results) -""" - -from __future__ import annotations - -# Always export base classes and factory (no heavy dependencies) -from .base import BaseClusteringStrategy, ClusteringConfig -from .factory import ( - ClusteringStrategyFactory, - check_clustering_strategy_available, - get_strategy, -) -from .noop_strategy import NoOpStrategy -from .frequency_strategy import FrequencyStrategy, FrequencyConfig - -# Feature flag for clustering availability (hdbscan + sklearn) -CLUSTERING_AVAILABLE = False -HDBSCAN_AVAILABLE = False -DBSCAN_AVAILABLE = False -_import_error: str | None = None - - -def _detect_clustering_available() -> tuple[bool, bool, bool, str | None]: - """Detect if clustering dependencies are available. - - Returns: - Tuple of (all_available, hdbscan_available, dbscan_available, error_message). - """ - hdbscan_ok = False - dbscan_ok = False - - try: - import hdbscan # noqa: F401 - hdbscan_ok = True - except ImportError: - pass - - try: - from sklearn.cluster import DBSCAN # noqa: F401 - dbscan_ok = True - except ImportError: - pass - - all_ok = hdbscan_ok and dbscan_ok - error = None - if not all_ok: - missing = [] - if not hdbscan_ok: - missing.append("hdbscan") - if not dbscan_ok: - missing.append("scikit-learn") - error = f"{', '.join(missing)} not available. Install with: pip install codexlens[clustering]" - - return all_ok, hdbscan_ok, dbscan_ok, error - - -# Initialize on module load -CLUSTERING_AVAILABLE, HDBSCAN_AVAILABLE, DBSCAN_AVAILABLE, _import_error = ( - _detect_clustering_available() -) - - -def check_clustering_available() -> tuple[bool, str | None]: - """Check if all clustering dependencies are available. - - Returns: - Tuple of (is_available, error_message). - error_message is None if available, otherwise contains install instructions. - """ - return CLUSTERING_AVAILABLE, _import_error - - -# Conditionally export strategy implementations -__all__ = [ - # Feature flags - "CLUSTERING_AVAILABLE", - "HDBSCAN_AVAILABLE", - "DBSCAN_AVAILABLE", - "check_clustering_available", - # Base classes - "BaseClusteringStrategy", - "ClusteringConfig", - # Factory - "ClusteringStrategyFactory", - "get_strategy", - "check_clustering_strategy_available", - # Always-available strategies - "NoOpStrategy", - "FrequencyStrategy", - "FrequencyConfig", -] - -# Conditionally add strategy classes to __all__ and module namespace -if HDBSCAN_AVAILABLE: - from .hdbscan_strategy import HDBSCANStrategy - - __all__.append("HDBSCANStrategy") - -if DBSCAN_AVAILABLE: - from .dbscan_strategy import DBSCANStrategy - - __all__.append("DBSCANStrategy") diff --git a/codex-lens/src/codexlens/search/clustering/base.py b/codex-lens/src/codexlens/search/clustering/base.py deleted file mode 100644 index 912a4fc6..00000000 --- a/codex-lens/src/codexlens/search/clustering/base.py +++ /dev/null @@ -1,153 +0,0 @@ -"""Base classes for clustering strategies in the hybrid search pipeline. - -This module defines the abstract base class for clustering strategies used -in the staged hybrid search pipeline. Strategies cluster search results -based on their embeddings and select representative results from each cluster. -""" - -from __future__ import annotations - -from abc import ABC, abstractmethod -from dataclasses import dataclass, field -from typing import TYPE_CHECKING, List, Optional - -if TYPE_CHECKING: - import numpy as np - from codexlens.entities import SearchResult - - -@dataclass -class ClusteringConfig: - """Configuration parameters for clustering strategies. - - Attributes: - min_cluster_size: Minimum number of results to form a cluster. - HDBSCAN default is 5, but for search results 2-3 is often better. - min_samples: Number of samples in a neighborhood for a point to be - considered a core point. Lower values allow more clusters. - metric: Distance metric for clustering. Common options: - - 'euclidean': Standard L2 distance - - 'cosine': Cosine distance (1 - cosine_similarity) - - 'manhattan': L1 distance - cluster_selection_epsilon: Distance threshold for cluster selection. - Results within this distance may be merged into the same cluster. - allow_single_cluster: If True, allow all results to form one cluster. - Useful when results are very similar. - prediction_data: If True, generate prediction data for new points. - """ - - min_cluster_size: int = 3 - min_samples: int = 2 - metric: str = "cosine" - cluster_selection_epsilon: float = 0.0 - allow_single_cluster: bool = True - prediction_data: bool = False - - def __post_init__(self) -> None: - """Validate configuration parameters.""" - if self.min_cluster_size < 2: - raise ValueError("min_cluster_size must be >= 2") - if self.min_samples < 1: - raise ValueError("min_samples must be >= 1") - if self.metric not in ("euclidean", "cosine", "manhattan"): - raise ValueError(f"metric must be one of: euclidean, cosine, manhattan; got {self.metric}") - if self.cluster_selection_epsilon < 0: - raise ValueError("cluster_selection_epsilon must be >= 0") - - -class BaseClusteringStrategy(ABC): - """Abstract base class for clustering strategies. - - Clustering strategies are used in the staged hybrid search pipeline to - group similar search results and select representative results from each - cluster, reducing redundancy while maintaining diversity. - - Subclasses must implement: - - cluster(): Group results into clusters based on embeddings - - select_representatives(): Choose best result(s) from each cluster - """ - - def __init__(self, config: Optional[ClusteringConfig] = None) -> None: - """Initialize the clustering strategy. - - Args: - config: Clustering configuration. Uses defaults if not provided. - """ - self.config = config or ClusteringConfig() - - @abstractmethod - def cluster( - self, - embeddings: "np.ndarray", - results: List["SearchResult"], - ) -> List[List[int]]: - """Cluster search results based on their embeddings. - - Args: - embeddings: NumPy array of shape (n_results, embedding_dim) - containing the embedding vectors for each result. - results: List of SearchResult objects corresponding to embeddings. - Used for additional metadata during clustering. - - Returns: - List of clusters, where each cluster is a list of indices - into the results list. Results not assigned to any cluster - (noise points) should be returned as single-element clusters. - - Example: - >>> strategy = HDBSCANStrategy() - >>> clusters = strategy.cluster(embeddings, results) - >>> # clusters = [[0, 2, 5], [1, 3], [4], [6, 7, 8]] - >>> # Result indices 0, 2, 5 are in cluster 0 - >>> # Result indices 1, 3 are in cluster 1 - >>> # Result index 4 is a noise point (singleton cluster) - >>> # Result indices 6, 7, 8 are in cluster 2 - """ - ... - - @abstractmethod - def select_representatives( - self, - clusters: List[List[int]], - results: List["SearchResult"], - embeddings: Optional["np.ndarray"] = None, - ) -> List["SearchResult"]: - """Select representative results from each cluster. - - This method chooses the best result(s) from each cluster to include - in the final search results. The selection can be based on: - - Highest score within cluster - - Closest to cluster centroid - - Custom selection logic - - Args: - clusters: List of clusters from cluster() method. - results: Original list of SearchResult objects. - embeddings: Optional embeddings array for centroid-based selection. - - Returns: - List of representative SearchResult objects, one or more per cluster, - ordered by relevance (highest score first). - - Example: - >>> representatives = strategy.select_representatives(clusters, results) - >>> # Returns best result from each cluster - """ - ... - - def fit_predict( - self, - embeddings: "np.ndarray", - results: List["SearchResult"], - ) -> List["SearchResult"]: - """Convenience method to cluster and select representatives in one call. - - Args: - embeddings: NumPy array of shape (n_results, embedding_dim). - results: List of SearchResult objects. - - Returns: - List of representative SearchResult objects. - """ - clusters = self.cluster(embeddings, results) - return self.select_representatives(clusters, results, embeddings) diff --git a/codex-lens/src/codexlens/search/clustering/dbscan_strategy.py b/codex-lens/src/codexlens/search/clustering/dbscan_strategy.py deleted file mode 100644 index 90588a91..00000000 --- a/codex-lens/src/codexlens/search/clustering/dbscan_strategy.py +++ /dev/null @@ -1,197 +0,0 @@ -"""DBSCAN-based clustering strategy for search results. - -DBSCAN (Density-Based Spatial Clustering of Applications with Noise) -is the fallback clustering strategy when HDBSCAN is not available. -""" - -from __future__ import annotations - -from typing import TYPE_CHECKING, List, Optional - -from .base import BaseClusteringStrategy, ClusteringConfig - -if TYPE_CHECKING: - import numpy as np - from codexlens.entities import SearchResult - - -class DBSCANStrategy(BaseClusteringStrategy): - """DBSCAN-based clustering strategy. - - Uses sklearn's DBSCAN algorithm as a fallback when HDBSCAN is not available. - DBSCAN requires an explicit eps parameter, which is auto-computed from the - distance distribution if not provided. - - Example: - >>> from codexlens.search.clustering import DBSCANStrategy, ClusteringConfig - >>> config = ClusteringConfig(min_cluster_size=3, metric='cosine') - >>> strategy = DBSCANStrategy(config) - >>> clusters = strategy.cluster(embeddings, results) - >>> representatives = strategy.select_representatives(clusters, results) - """ - - # Default eps percentile for auto-computation - DEFAULT_EPS_PERCENTILE: float = 15.0 - - def __init__( - self, - config: Optional[ClusteringConfig] = None, - eps: Optional[float] = None, - eps_percentile: float = DEFAULT_EPS_PERCENTILE, - ) -> None: - """Initialize DBSCAN clustering strategy. - - Args: - config: Clustering configuration. Uses defaults if not provided. - eps: Explicit eps parameter for DBSCAN. If None, auto-computed - from the distance distribution. - eps_percentile: Percentile of pairwise distances to use for - auto-computing eps. Default is 15th percentile. - - Raises: - ImportError: If sklearn is not installed. - """ - super().__init__(config) - self.eps = eps - self.eps_percentile = eps_percentile - - # Validate sklearn is available - try: - from sklearn.cluster import DBSCAN # noqa: F401 - except ImportError as exc: - raise ImportError( - "scikit-learn package is required for DBSCANStrategy. " - "Install with: pip install codexlens[clustering]" - ) from exc - - def _compute_eps(self, embeddings: "np.ndarray") -> float: - """Auto-compute eps from pairwise distance distribution. - - Uses the specified percentile of pairwise distances as eps, - which typically captures local density well. - - Args: - embeddings: NumPy array of shape (n_results, embedding_dim). - - Returns: - Computed eps value. - """ - import numpy as np - from sklearn.metrics import pairwise_distances - - # Compute pairwise distances - distances = pairwise_distances(embeddings, metric=self.config.metric) - - # Get upper triangle (excluding diagonal) - upper_tri = distances[np.triu_indices_from(distances, k=1)] - - if len(upper_tri) == 0: - # Only one point, return a default small eps - return 0.1 - - # Use percentile of distances as eps - eps = float(np.percentile(upper_tri, self.eps_percentile)) - - # Ensure eps is positive - return max(eps, 1e-6) - - def cluster( - self, - embeddings: "np.ndarray", - results: List["SearchResult"], - ) -> List[List[int]]: - """Cluster search results using DBSCAN algorithm. - - Args: - embeddings: NumPy array of shape (n_results, embedding_dim) - containing the embedding vectors for each result. - results: List of SearchResult objects corresponding to embeddings. - - Returns: - List of clusters, where each cluster is a list of indices - into the results list. Noise points are returned as singleton clusters. - """ - from sklearn.cluster import DBSCAN - import numpy as np - - n_results = len(results) - if n_results == 0: - return [] - - # Handle edge case: single result - if n_results == 1: - return [[0]] - - # Determine eps value - eps = self.eps if self.eps is not None else self._compute_eps(embeddings) - - # Configure DBSCAN clusterer - # Note: DBSCAN min_samples corresponds to min_cluster_size concept - clusterer = DBSCAN( - eps=eps, - min_samples=self.config.min_samples, - metric=self.config.metric, - ) - - # Fit and get cluster labels - # Labels: -1 = noise, 0+ = cluster index - labels = clusterer.fit_predict(embeddings) - - # Group indices by cluster label - cluster_map: dict[int, list[int]] = {} - for idx, label in enumerate(labels): - if label not in cluster_map: - cluster_map[label] = [] - cluster_map[label].append(idx) - - # Build result: non-noise clusters first, then noise as singletons - clusters: List[List[int]] = [] - - # Add proper clusters (label >= 0) - for label in sorted(cluster_map.keys()): - if label >= 0: - clusters.append(cluster_map[label]) - - # Add noise points as singleton clusters (label == -1) - if -1 in cluster_map: - for idx in cluster_map[-1]: - clusters.append([idx]) - - return clusters - - def select_representatives( - self, - clusters: List[List[int]], - results: List["SearchResult"], - embeddings: Optional["np.ndarray"] = None, - ) -> List["SearchResult"]: - """Select representative results from each cluster. - - Selects the result with the highest score from each cluster. - - Args: - clusters: List of clusters from cluster() method. - results: Original list of SearchResult objects. - embeddings: Optional embeddings (not used in score-based selection). - - Returns: - List of representative SearchResult objects, one per cluster, - ordered by score (highest first). - """ - if not clusters or not results: - return [] - - representatives: List["SearchResult"] = [] - - for cluster_indices in clusters: - if not cluster_indices: - continue - - # Find the result with the highest score in this cluster - best_idx = max(cluster_indices, key=lambda i: results[i].score) - representatives.append(results[best_idx]) - - # Sort by score descending - representatives.sort(key=lambda r: r.score, reverse=True) - - return representatives diff --git a/codex-lens/src/codexlens/search/clustering/factory.py b/codex-lens/src/codexlens/search/clustering/factory.py deleted file mode 100644 index 6c7f5b6e..00000000 --- a/codex-lens/src/codexlens/search/clustering/factory.py +++ /dev/null @@ -1,202 +0,0 @@ -"""Factory for creating clustering strategies. - -Provides a unified interface for instantiating different clustering backends -with automatic fallback chain: hdbscan -> dbscan -> noop. -""" - -from __future__ import annotations - -from typing import Any, Optional - -from .base import BaseClusteringStrategy, ClusteringConfig -from .noop_strategy import NoOpStrategy - - -def check_clustering_strategy_available(strategy: str) -> tuple[bool, str | None]: - """Check whether a specific clustering strategy can be used. - - Args: - strategy: Strategy name to check. Options: - - "hdbscan": HDBSCAN clustering (requires hdbscan package) - - "dbscan": DBSCAN clustering (requires sklearn) - - "frequency": Frequency-based clustering (always available) - - "noop": No-op strategy (always available) - - Returns: - Tuple of (is_available, error_message). - error_message is None if available, otherwise contains install instructions. - """ - strategy = (strategy or "").strip().lower() - - if strategy == "hdbscan": - try: - import hdbscan # noqa: F401 - except ImportError: - return False, ( - "hdbscan package not available. " - "Install with: pip install codexlens[clustering]" - ) - return True, None - - if strategy == "dbscan": - try: - from sklearn.cluster import DBSCAN # noqa: F401 - except ImportError: - return False, ( - "scikit-learn package not available. " - "Install with: pip install codexlens[clustering]" - ) - return True, None - - if strategy == "frequency": - # Frequency strategy is always available (no external deps) - return True, None - - if strategy == "noop": - return True, None - - return False, ( - f"Invalid clustering strategy: {strategy}. " - "Must be 'hdbscan', 'dbscan', 'frequency', or 'noop'." - ) - - -def get_strategy( - strategy: str = "hdbscan", - config: Optional[ClusteringConfig] = None, - *, - fallback: bool = True, - **kwargs: Any, -) -> BaseClusteringStrategy: - """Factory function to create clustering strategy with fallback chain. - - The fallback chain is: hdbscan -> dbscan -> frequency -> noop - - Args: - strategy: Clustering strategy to use. Options: - - "hdbscan": HDBSCAN clustering (default, recommended) - - "dbscan": DBSCAN clustering (fallback) - - "frequency": Frequency-based clustering (groups by symbol occurrence) - - "noop": No-op strategy (returns all results ungrouped) - - "auto": Try hdbscan, then dbscan, then noop - config: Clustering configuration. Uses defaults if not provided. - For frequency strategy, pass FrequencyConfig for full control. - fallback: If True (default), automatically fall back to next strategy - in the chain when primary is unavailable. If False, raise ImportError - when requested strategy is unavailable. - **kwargs: Additional strategy-specific arguments. - For DBSCANStrategy: eps, eps_percentile - For FrequencyStrategy: group_by, min_frequency, etc. - - Returns: - BaseClusteringStrategy: Configured clustering strategy instance. - - Raises: - ValueError: If strategy is not recognized. - ImportError: If required dependencies are not installed and fallback=False. - - Example: - >>> from codexlens.search.clustering import get_strategy, ClusteringConfig - >>> config = ClusteringConfig(min_cluster_size=3) - >>> # Auto-select best available strategy - >>> strategy = get_strategy("auto", config) - >>> # Explicitly use HDBSCAN (will fall back if unavailable) - >>> strategy = get_strategy("hdbscan", config) - >>> # Use frequency-based strategy - >>> from codexlens.search.clustering import FrequencyConfig - >>> freq_config = FrequencyConfig(min_frequency=2, group_by="symbol") - >>> strategy = get_strategy("frequency", freq_config) - """ - strategy = (strategy or "").strip().lower() - - # Handle "auto" - try strategies in order - if strategy == "auto": - return _get_best_available_strategy(config, **kwargs) - - if strategy == "hdbscan": - ok, err = check_clustering_strategy_available("hdbscan") - if ok: - from .hdbscan_strategy import HDBSCANStrategy - return HDBSCANStrategy(config) - - if fallback: - # Try dbscan fallback - ok_dbscan, _ = check_clustering_strategy_available("dbscan") - if ok_dbscan: - from .dbscan_strategy import DBSCANStrategy - return DBSCANStrategy(config, **kwargs) - # Final fallback to noop - return NoOpStrategy(config) - - raise ImportError(err) - - if strategy == "dbscan": - ok, err = check_clustering_strategy_available("dbscan") - if ok: - from .dbscan_strategy import DBSCANStrategy - return DBSCANStrategy(config, **kwargs) - - if fallback: - # Fallback to noop - return NoOpStrategy(config) - - raise ImportError(err) - - if strategy == "frequency": - from .frequency_strategy import FrequencyStrategy, FrequencyConfig - # If config is ClusteringConfig but not FrequencyConfig, create default FrequencyConfig - if config is None or not isinstance(config, FrequencyConfig): - freq_config = FrequencyConfig(**kwargs) if kwargs else FrequencyConfig() - else: - freq_config = config - return FrequencyStrategy(freq_config) - - if strategy == "noop": - return NoOpStrategy(config) - - raise ValueError( - f"Unknown clustering strategy: {strategy}. " - "Supported strategies: 'hdbscan', 'dbscan', 'frequency', 'noop', 'auto'" - ) - - -def _get_best_available_strategy( - config: Optional[ClusteringConfig] = None, - **kwargs: Any, -) -> BaseClusteringStrategy: - """Get the best available clustering strategy. - - Tries strategies in order: hdbscan -> dbscan -> noop - - Args: - config: Clustering configuration. - **kwargs: Additional strategy-specific arguments. - - Returns: - Best available clustering strategy instance. - """ - # Try HDBSCAN first - ok, _ = check_clustering_strategy_available("hdbscan") - if ok: - from .hdbscan_strategy import HDBSCANStrategy - return HDBSCANStrategy(config) - - # Try DBSCAN second - ok, _ = check_clustering_strategy_available("dbscan") - if ok: - from .dbscan_strategy import DBSCANStrategy - return DBSCANStrategy(config, **kwargs) - - # Fallback to NoOp - return NoOpStrategy(config) - - -# Alias for backward compatibility -ClusteringStrategyFactory = type( - "ClusteringStrategyFactory", - (), - { - "get_strategy": staticmethod(get_strategy), - "check_available": staticmethod(check_clustering_strategy_available), - }, -) diff --git a/codex-lens/src/codexlens/search/clustering/frequency_strategy.py b/codex-lens/src/codexlens/search/clustering/frequency_strategy.py deleted file mode 100644 index 48ddb00b..00000000 --- a/codex-lens/src/codexlens/search/clustering/frequency_strategy.py +++ /dev/null @@ -1,263 +0,0 @@ -"""Frequency-based clustering strategy for search result deduplication. - -This strategy groups search results by symbol/method name and prunes based on -occurrence frequency. High-frequency symbols (frequently referenced methods) -are considered more important and retained, while low-frequency results -(potentially noise) can be filtered out. - -Use cases: -- Prioritize commonly called methods/functions -- Filter out one-off results that may be less relevant -- Deduplicate results pointing to the same symbol from different locations -""" - -from __future__ import annotations - -from collections import defaultdict -from dataclasses import dataclass -from typing import TYPE_CHECKING, Dict, List, Optional, Literal - -from .base import BaseClusteringStrategy, ClusteringConfig - -if TYPE_CHECKING: - import numpy as np - from codexlens.entities import SearchResult - - -@dataclass -class FrequencyConfig(ClusteringConfig): - """Configuration for frequency-based clustering strategy. - - Attributes: - group_by: Field to group results by for frequency counting. - - 'symbol': Group by symbol_name (default, for method/function dedup) - - 'file': Group by file path - - 'symbol_kind': Group by symbol type (function, class, etc.) - min_frequency: Minimum occurrence count to keep a result. - Results appearing less than this are considered noise and pruned. - max_representatives_per_group: Maximum results to keep per symbol group. - frequency_weight: How much to boost score based on frequency. - Final score = original_score * (1 + frequency_weight * log(frequency)) - keep_mode: How to handle low-frequency results. - - 'filter': Remove results below min_frequency - - 'demote': Keep but lower their score ranking - """ - - group_by: Literal["symbol", "file", "symbol_kind"] = "symbol" - min_frequency: int = 1 # 1 means keep all, 2+ filters singletons - max_representatives_per_group: int = 3 - frequency_weight: float = 0.1 # Boost factor for frequency - keep_mode: Literal["filter", "demote"] = "demote" - - def __post_init__(self) -> None: - """Validate configuration parameters.""" - # Skip parent validation since we don't use HDBSCAN params - if self.min_frequency < 1: - raise ValueError("min_frequency must be >= 1") - if self.max_representatives_per_group < 1: - raise ValueError("max_representatives_per_group must be >= 1") - if self.frequency_weight < 0: - raise ValueError("frequency_weight must be >= 0") - if self.group_by not in ("symbol", "file", "symbol_kind"): - raise ValueError(f"group_by must be one of: symbol, file, symbol_kind; got {self.group_by}") - if self.keep_mode not in ("filter", "demote"): - raise ValueError(f"keep_mode must be one of: filter, demote; got {self.keep_mode}") - - -class FrequencyStrategy(BaseClusteringStrategy): - """Frequency-based clustering strategy for search result deduplication. - - This strategy groups search results by symbol name (or file/kind) and: - 1. Counts how many times each symbol appears in results - 2. Higher frequency = more important (frequently referenced method) - 3. Filters or demotes low-frequency results - 4. Selects top representatives from each frequency group - - Unlike embedding-based strategies (HDBSCAN, DBSCAN), this strategy: - - Does NOT require embeddings (works with metadata only) - - Is very fast (O(n) complexity) - - Is deterministic (no random initialization) - - Works well for symbol-level deduplication - - Example: - >>> config = FrequencyConfig(min_frequency=2, group_by="symbol") - >>> strategy = FrequencyStrategy(config) - >>> # Results with symbol "authenticate" appearing 5 times - >>> # will be prioritized over "helper_func" appearing once - >>> representatives = strategy.fit_predict(embeddings, results) - """ - - def __init__(self, config: Optional[FrequencyConfig] = None) -> None: - """Initialize the frequency strategy. - - Args: - config: Frequency configuration. Uses defaults if not provided. - """ - self.config: FrequencyConfig = config or FrequencyConfig() - - def _get_group_key(self, result: "SearchResult") -> str: - """Extract grouping key from a search result. - - Args: - result: SearchResult to extract key from. - - Returns: - String key for grouping (symbol name, file path, or kind). - """ - if self.config.group_by == "symbol": - # Use symbol_name if available, otherwise fall back to file:line - symbol = getattr(result, "symbol_name", None) - if symbol: - return str(symbol) - # Fallback: use file path + start_line as pseudo-symbol - start_line = getattr(result, "start_line", 0) or 0 - return f"{result.path}:{start_line}" - - elif self.config.group_by == "file": - return str(result.path) - - elif self.config.group_by == "symbol_kind": - kind = getattr(result, "symbol_kind", None) - return str(kind) if kind else "unknown" - - return str(result.path) # Default fallback - - def cluster( - self, - embeddings: "np.ndarray", - results: List["SearchResult"], - ) -> List[List[int]]: - """Group search results by frequency of occurrence. - - Note: This method ignores embeddings and groups by metadata only. - The embeddings parameter is kept for interface compatibility. - - Args: - embeddings: Ignored (kept for interface compatibility). - results: List of SearchResult objects to cluster. - - Returns: - List of clusters (groups), where each cluster contains indices - of results with the same grouping key. Clusters are ordered by - frequency (highest frequency first). - """ - if not results: - return [] - - # Group results by key - groups: Dict[str, List[int]] = defaultdict(list) - for idx, result in enumerate(results): - key = self._get_group_key(result) - groups[key].append(idx) - - # Sort groups by frequency (descending) then by key (for stability) - sorted_groups = sorted( - groups.items(), - key=lambda x: (-len(x[1]), x[0]) # -frequency, then alphabetical - ) - - # Convert to list of clusters - clusters = [indices for _, indices in sorted_groups] - - return clusters - - def select_representatives( - self, - clusters: List[List[int]], - results: List["SearchResult"], - embeddings: Optional["np.ndarray"] = None, - ) -> List["SearchResult"]: - """Select representative results based on frequency and score. - - For each frequency group: - 1. If frequency < min_frequency: filter or demote based on keep_mode - 2. Sort by score within group - 3. Apply frequency boost to scores - 4. Select top N representatives - - Args: - clusters: List of clusters from cluster() method. - results: Original list of SearchResult objects. - embeddings: Optional embeddings (used for tie-breaking if provided). - - Returns: - List of representative SearchResult objects, ordered by - frequency-adjusted score (highest first). - """ - import math - - if not clusters or not results: - return [] - - representatives: List["SearchResult"] = [] - demoted: List["SearchResult"] = [] - - for cluster_indices in clusters: - if not cluster_indices: - continue - - frequency = len(cluster_indices) - - # Get results in this cluster, sorted by score - cluster_results = [results[i] for i in cluster_indices] - cluster_results.sort(key=lambda r: getattr(r, "score", 0.0), reverse=True) - - # Check frequency threshold - if frequency < self.config.min_frequency: - if self.config.keep_mode == "filter": - # Skip low-frequency results entirely - continue - else: # demote mode - # Keep but add to demoted list (lower priority) - for result in cluster_results[: self.config.max_representatives_per_group]: - demoted.append(result) - continue - - # Apply frequency boost and select top representatives - for result in cluster_results[: self.config.max_representatives_per_group]: - # Calculate frequency-boosted score - original_score = getattr(result, "score", 0.0) - # log(frequency + 1) to handle frequency=1 case smoothly - frequency_boost = 1.0 + self.config.frequency_weight * math.log(frequency + 1) - boosted_score = original_score * frequency_boost - - # Create new result with boosted score and frequency metadata - # Note: SearchResult might be immutable, so we preserve original - # and track boosted score in metadata - if hasattr(result, "metadata") and isinstance(result.metadata, dict): - result.metadata["frequency"] = frequency - result.metadata["frequency_boosted_score"] = boosted_score - - representatives.append(result) - - # Sort representatives by boosted score (or original score as fallback) - def get_sort_score(r: "SearchResult") -> float: - if hasattr(r, "metadata") and isinstance(r.metadata, dict): - return r.metadata.get("frequency_boosted_score", getattr(r, "score", 0.0)) - return getattr(r, "score", 0.0) - - representatives.sort(key=get_sort_score, reverse=True) - - # Add demoted results at the end - if demoted: - demoted.sort(key=lambda r: getattr(r, "score", 0.0), reverse=True) - representatives.extend(demoted) - - return representatives - - def fit_predict( - self, - embeddings: "np.ndarray", - results: List["SearchResult"], - ) -> List["SearchResult"]: - """Convenience method to cluster and select representatives in one call. - - Args: - embeddings: NumPy array (may be ignored for frequency-based clustering). - results: List of SearchResult objects. - - Returns: - List of representative SearchResult objects. - """ - clusters = self.cluster(embeddings, results) - return self.select_representatives(clusters, results, embeddings) diff --git a/codex-lens/src/codexlens/search/clustering/hdbscan_strategy.py b/codex-lens/src/codexlens/search/clustering/hdbscan_strategy.py deleted file mode 100644 index 3bd2e1c0..00000000 --- a/codex-lens/src/codexlens/search/clustering/hdbscan_strategy.py +++ /dev/null @@ -1,175 +0,0 @@ -"""HDBSCAN-based clustering strategy for search results. - -HDBSCAN (Hierarchical Density-Based Spatial Clustering of Applications with Noise) -is the primary clustering strategy for grouping similar search results. -""" - -from __future__ import annotations - -from typing import TYPE_CHECKING, List, Optional - -from .base import BaseClusteringStrategy, ClusteringConfig - -if TYPE_CHECKING: - import numpy as np - from codexlens.entities import SearchResult - - -class HDBSCANStrategy(BaseClusteringStrategy): - """HDBSCAN-based clustering strategy. - - Uses HDBSCAN algorithm to cluster search results based on embedding similarity. - HDBSCAN is preferred over DBSCAN because it: - - Automatically determines the number of clusters - - Handles varying density clusters well - - Identifies noise points (outliers) effectively - - Example: - >>> from codexlens.search.clustering import HDBSCANStrategy, ClusteringConfig - >>> config = ClusteringConfig(min_cluster_size=3, metric='cosine') - >>> strategy = HDBSCANStrategy(config) - >>> clusters = strategy.cluster(embeddings, results) - >>> representatives = strategy.select_representatives(clusters, results) - """ - - def __init__(self, config: Optional[ClusteringConfig] = None) -> None: - """Initialize HDBSCAN clustering strategy. - - Args: - config: Clustering configuration. Uses defaults if not provided. - - Raises: - ImportError: If hdbscan package is not installed. - """ - super().__init__(config) - # Validate hdbscan is available - try: - import hdbscan # noqa: F401 - except ImportError as exc: - raise ImportError( - "hdbscan package is required for HDBSCANStrategy. " - "Install with: pip install codexlens[clustering]" - ) from exc - - def cluster( - self, - embeddings: "np.ndarray", - results: List["SearchResult"], - ) -> List[List[int]]: - """Cluster search results using HDBSCAN algorithm. - - Args: - embeddings: NumPy array of shape (n_results, embedding_dim) - containing the embedding vectors for each result. - results: List of SearchResult objects corresponding to embeddings. - - Returns: - List of clusters, where each cluster is a list of indices - into the results list. Noise points are returned as singleton clusters. - """ - import hdbscan - import numpy as np - - n_results = len(results) - if n_results == 0: - return [] - - # Handle edge case: fewer results than min_cluster_size - if n_results < self.config.min_cluster_size: - # Return each result as its own singleton cluster - return [[i] for i in range(n_results)] - - metric = self.config.metric - data = embeddings - - # Some hdbscan builds do not recognize metric="cosine" even though it's a - # common need for embedding clustering. In that case, compute a precomputed - # cosine distance matrix and run HDBSCAN with metric="precomputed". - if metric == "cosine": - try: - from sklearn.metrics import pairwise_distances - - data = pairwise_distances(embeddings, metric="cosine") - # Some hdbscan builds are strict about dtype for precomputed distances. - # Ensure float64 to avoid Buffer dtype mismatch errors. - try: - data = data.astype("float64", copy=False) - except Exception: - pass - metric = "precomputed" - except Exception: - # If we cannot compute distances, fall back to euclidean over raw vectors. - metric = "euclidean" - - # Configure HDBSCAN clusterer - clusterer = hdbscan.HDBSCAN( - min_cluster_size=self.config.min_cluster_size, - min_samples=self.config.min_samples, - metric=metric, - cluster_selection_epsilon=self.config.cluster_selection_epsilon, - allow_single_cluster=self.config.allow_single_cluster, - prediction_data=self.config.prediction_data, - ) - - # Fit and get cluster labels - # Labels: -1 = noise, 0+ = cluster index - labels = clusterer.fit_predict(data) - - # Group indices by cluster label - cluster_map: dict[int, list[int]] = {} - for idx, label in enumerate(labels): - if label not in cluster_map: - cluster_map[label] = [] - cluster_map[label].append(idx) - - # Build result: non-noise clusters first, then noise as singletons - clusters: List[List[int]] = [] - - # Add proper clusters (label >= 0) - for label in sorted(cluster_map.keys()): - if label >= 0: - clusters.append(cluster_map[label]) - - # Add noise points as singleton clusters (label == -1) - if -1 in cluster_map: - for idx in cluster_map[-1]: - clusters.append([idx]) - - return clusters - - def select_representatives( - self, - clusters: List[List[int]], - results: List["SearchResult"], - embeddings: Optional["np.ndarray"] = None, - ) -> List["SearchResult"]: - """Select representative results from each cluster. - - Selects the result with the highest score from each cluster. - - Args: - clusters: List of clusters from cluster() method. - results: Original list of SearchResult objects. - embeddings: Optional embeddings (not used in score-based selection). - - Returns: - List of representative SearchResult objects, one per cluster, - ordered by score (highest first). - """ - if not clusters or not results: - return [] - - representatives: List["SearchResult"] = [] - - for cluster_indices in clusters: - if not cluster_indices: - continue - - # Find the result with the highest score in this cluster - best_idx = max(cluster_indices, key=lambda i: results[i].score) - representatives.append(results[best_idx]) - - # Sort by score descending - representatives.sort(key=lambda r: r.score, reverse=True) - - return representatives diff --git a/codex-lens/src/codexlens/search/clustering/noop_strategy.py b/codex-lens/src/codexlens/search/clustering/noop_strategy.py deleted file mode 100644 index eda36098..00000000 --- a/codex-lens/src/codexlens/search/clustering/noop_strategy.py +++ /dev/null @@ -1,83 +0,0 @@ -"""No-op clustering strategy for search results. - -NoOpStrategy returns all results ungrouped when clustering dependencies -are not available or clustering is disabled. -""" - -from __future__ import annotations - -from typing import TYPE_CHECKING, List, Optional - -from .base import BaseClusteringStrategy, ClusteringConfig - -if TYPE_CHECKING: - import numpy as np - from codexlens.entities import SearchResult - - -class NoOpStrategy(BaseClusteringStrategy): - """No-op clustering strategy that returns all results ungrouped. - - This strategy is used as a final fallback when no clustering dependencies - are available, or when clustering is explicitly disabled. Each result - is treated as its own singleton cluster. - - Example: - >>> from codexlens.search.clustering import NoOpStrategy - >>> strategy = NoOpStrategy() - >>> clusters = strategy.cluster(embeddings, results) - >>> # Returns [[0], [1], [2], ...] - each result in its own cluster - >>> representatives = strategy.select_representatives(clusters, results) - >>> # Returns all results sorted by score - """ - - def __init__(self, config: Optional[ClusteringConfig] = None) -> None: - """Initialize NoOp clustering strategy. - - Args: - config: Clustering configuration. Ignored for NoOpStrategy - but accepted for interface compatibility. - """ - super().__init__(config) - - def cluster( - self, - embeddings: "np.ndarray", - results: List["SearchResult"], - ) -> List[List[int]]: - """Return each result as its own singleton cluster. - - Args: - embeddings: NumPy array of shape (n_results, embedding_dim). - Not used but accepted for interface compatibility. - results: List of SearchResult objects. - - Returns: - List of singleton clusters, one per result. - """ - return [[i] for i in range(len(results))] - - def select_representatives( - self, - clusters: List[List[int]], - results: List["SearchResult"], - embeddings: Optional["np.ndarray"] = None, - ) -> List["SearchResult"]: - """Return all results sorted by score. - - Since each cluster is a singleton, this effectively returns all - results sorted by score descending. - - Args: - clusters: List of singleton clusters. - results: Original list of SearchResult objects. - embeddings: Optional embeddings (not used). - - Returns: - All SearchResult objects sorted by score (highest first). - """ - if not results: - return [] - - # Return all results sorted by score - return sorted(results, key=lambda r: r.score, reverse=True) diff --git a/codex-lens/src/codexlens/search/enrichment.py b/codex-lens/src/codexlens/search/enrichment.py deleted file mode 100644 index 110f56b7..00000000 --- a/codex-lens/src/codexlens/search/enrichment.py +++ /dev/null @@ -1,171 +0,0 @@ -# codex-lens/src/codexlens/search/enrichment.py -"""Relationship enrichment for search results.""" -import sqlite3 -from pathlib import Path -from typing import List, Dict, Any, Optional - -from codexlens.config import Config -from codexlens.entities import SearchResult -from codexlens.search.graph_expander import GraphExpander -from codexlens.storage.path_mapper import PathMapper - - -class RelationshipEnricher: - """Enriches search results with code graph relationships.""" - - def __init__(self, index_path: Path): - """Initialize with path to index database. - - Args: - index_path: Path to _index.db SQLite database - """ - self.index_path = index_path - self.db_conn: Optional[sqlite3.Connection] = None - self._connect() - - def _connect(self) -> None: - """Establish read-only database connection.""" - if self.index_path.exists(): - self.db_conn = sqlite3.connect( - f"file:{self.index_path}?mode=ro", - uri=True, - check_same_thread=False - ) - self.db_conn.row_factory = sqlite3.Row - - def enrich(self, results: List[Dict[str, Any]], limit: int = 10) -> List[Dict[str, Any]]: - """Add relationship data to search results. - - Args: - results: List of search result dictionaries - limit: Maximum number of results to enrich - - Returns: - Results with relationships field added - """ - if not self.db_conn: - return results - - for result in results[:limit]: - file_path = result.get('file') or result.get('path') - symbol_name = result.get('symbol') - result['relationships'] = self._find_relationships(file_path, symbol_name) - return results - - def _find_relationships(self, file_path: Optional[str], symbol_name: Optional[str]) -> List[Dict[str, Any]]: - """Query relationships for a symbol. - - Args: - file_path: Path to file containing the symbol - symbol_name: Name of the symbol - - Returns: - List of relationship dictionaries with type, direction, target/source, file, line - """ - if not self.db_conn or not symbol_name: - return [] - - relationships = [] - cursor = self.db_conn.cursor() - - try: - # Find symbol ID(s) by name and optionally file - if file_path: - cursor.execute( - 'SELECT id FROM symbols WHERE name = ? AND file_path = ?', - (symbol_name, file_path) - ) - else: - cursor.execute('SELECT id FROM symbols WHERE name = ?', (symbol_name,)) - - symbol_ids = [row[0] for row in cursor.fetchall()] - - if not symbol_ids: - return [] - - # Query outgoing relationships (symbol is source) - placeholders = ','.join('?' * len(symbol_ids)) - cursor.execute(f''' - SELECT sr.relationship_type, sr.target_symbol_fqn, sr.file_path, sr.line - FROM symbol_relationships sr - WHERE sr.source_symbol_id IN ({placeholders}) - ''', symbol_ids) - - for row in cursor.fetchall(): - relationships.append({ - 'type': row[0], - 'direction': 'outgoing', - 'target': row[1], - 'file': row[2], - 'line': row[3], - }) - - # Query incoming relationships (symbol is target) - # Match against symbol name or qualified name patterns - cursor.execute(''' - SELECT sr.relationship_type, s.name AS source_name, sr.file_path, sr.line - FROM symbol_relationships sr - JOIN symbols s ON sr.source_symbol_id = s.id - WHERE sr.target_symbol_fqn = ? OR sr.target_symbol_fqn LIKE ? - ''', (symbol_name, f'%.{symbol_name}')) - - for row in cursor.fetchall(): - rel_type = row[0] - # Convert to incoming type - incoming_type = self._to_incoming_type(rel_type) - relationships.append({ - 'type': incoming_type, - 'direction': 'incoming', - 'source': row[1], - 'file': row[2], - 'line': row[3], - }) - - except sqlite3.Error: - return [] - - return relationships - - def _to_incoming_type(self, outgoing_type: str) -> str: - """Convert outgoing relationship type to incoming type. - - Args: - outgoing_type: The outgoing relationship type (e.g., 'calls', 'imports') - - Returns: - Corresponding incoming type (e.g., 'called_by', 'imported_by') - """ - type_map = { - 'calls': 'called_by', - 'imports': 'imported_by', - 'extends': 'extended_by', - } - return type_map.get(outgoing_type, f'{outgoing_type}_by') - - def close(self) -> None: - """Close database connection.""" - if self.db_conn: - self.db_conn.close() - self.db_conn = None - - def __enter__(self) -> 'RelationshipEnricher': - return self - - def __exit__(self, exc_type, exc_val, exc_tb) -> None: - self.close() - - -class SearchEnrichmentPipeline: - """Search post-processing pipeline (optional enrichments).""" - - def __init__(self, mapper: PathMapper, *, config: Optional[Config] = None) -> None: - self._config = config - self._graph_expander = GraphExpander(mapper, config=config) - - def expand_related_results(self, results: List[SearchResult]) -> List[SearchResult]: - """Expand base results with related symbols when enabled in config.""" - if self._config is None or not getattr(self._config, "enable_graph_expansion", False): - return [] - - depth = int(getattr(self._config, "graph_expansion_depth", 2) or 2) - return self._graph_expander.expand(results, depth=depth) diff --git a/codex-lens/src/codexlens/search/global_graph_expander.py b/codex-lens/src/codexlens/search/global_graph_expander.py deleted file mode 100644 index b6aa682e..00000000 --- a/codex-lens/src/codexlens/search/global_graph_expander.py +++ /dev/null @@ -1,250 +0,0 @@ -"""Global graph expansion for search results using cross-directory relationships. - -Expands top search results with related symbols by querying the global_relationships -table in GlobalSymbolIndex, enabling project-wide code graph traversal. -""" - -from __future__ import annotations - -import logging -import sqlite3 -from typing import Dict, List, Optional, Sequence, Tuple - -from codexlens.config import Config -from codexlens.entities import SearchResult -from codexlens.storage.global_index import GlobalSymbolIndex - -logger = logging.getLogger(__name__) - -# Score decay factors by relationship type. -# INHERITS has highest factor (strongest semantic link), -# IMPORTS next (explicit dependency), CALLS lowest (may be indirect). -DECAY_FACTORS: Dict[str, float] = { - "imports": 0.4, - "inherits": 0.5, - "calls": 0.3, -} -DEFAULT_DECAY = 0.3 - - -class GlobalGraphExpander: - """Expands search results with cross-directory related symbols from the global graph.""" - - def __init__( - self, - global_index: GlobalSymbolIndex, - *, - config: Optional[Config] = None, - ) -> None: - self._global_index = global_index - self._config = config - self._logger = logging.getLogger(__name__) - - def expand( - self, - results: Sequence[SearchResult], - *, - top_n: int = 10, - max_related: int = 50, - ) -> List[SearchResult]: - """Expand top-N results with related symbols from global relationships. - - Args: - results: Base ranked results from Stage 1. - top_n: Only expand the top-N base results. - max_related: Maximum related results to return. - - Returns: - List of related SearchResult objects (does NOT include the input results). - """ - if not results: - return [] - - # 1. Extract symbol names from top results - symbols_with_scores = self._resolve_symbols(results, top_n) - if not symbols_with_scores: - return [] - - symbol_names = [s[0] for s in symbols_with_scores] - base_scores = {s[0]: s[1] for s in symbols_with_scores} - - # 2. Query global relationships - relationships = self._query_relationships(symbol_names, limit=max_related * 3) - if not relationships: - return [] - - # 3. Build expanded results with score decay - expanded = self._build_expanded_results( - relationships, base_scores, max_related - ) - - # 4. Deduplicate against input results - input_keys: set[Tuple[str, Optional[str], Optional[int]]] = set() - for r in results: - input_keys.add((r.path, r.symbol_name, r.start_line)) - - deduped: List[SearchResult] = [] - seen: set[Tuple[str, Optional[str], Optional[int]]] = set() - for r in expanded: - key = (r.path, r.symbol_name, r.start_line) - if key not in input_keys and key not in seen: - seen.add(key) - deduped.append(r) - - return deduped[:max_related] - - def _resolve_symbols( - self, - results: Sequence[SearchResult], - top_n: int, - ) -> List[Tuple[str, float]]: - """Extract (symbol_name, score) pairs from top results.""" - symbols: List[Tuple[str, float]] = [] - seen: set[str] = set() - for r in list(results)[:top_n]: - name = r.symbol_name - if not name or name in seen: - continue - seen.add(name) - symbols.append((name, float(r.score))) - return symbols - - def _query_relationships( - self, - symbol_names: List[str], - limit: int = 150, - ) -> List[sqlite3.Row]: - """Query global_relationships for symbols.""" - try: - return self._global_index.query_relationships_for_symbols( - symbol_names, limit=limit - ) - except Exception as exc: - self._logger.debug("Global graph query failed: %s", exc) - return [] - - def _resolve_target_to_file( - self, - target_qualified_name: str, - ) -> Optional[Tuple[str, int, int]]: - """Resolve target_qualified_name to (file_path, start_line, end_line). - - Tries ``file_path::symbol_name`` format first, then falls back to - symbol name search in the global index. - """ - # Format: "file_path::symbol_name" - if "::" in target_qualified_name: - parts = target_qualified_name.split("::", 1) - target_file = parts[0] - target_symbol = parts[1] - try: - symbols = self._global_index.search(target_symbol, limit=5) - for sym in symbols: - if sym.file and str(sym.file) == target_file: - return ( - target_file, - sym.range[0] if sym.range else 1, - sym.range[1] if sym.range else 1, - ) - # File path known but line info unavailable - return (target_file, 1, 1) - except Exception: - return (target_file, 1, 1) - - # Plain symbol name (possibly dot-qualified like "mod.ClassName") - try: - leaf_name = target_qualified_name.rsplit(".", 1)[-1] - symbols = self._global_index.search(leaf_name, limit=5) - if symbols: - sym = symbols[0] - file_path = str(sym.file) if sym.file else None - if file_path: - return ( - file_path, - sym.range[0] if sym.range else 1, - sym.range[1] if sym.range else 1, - ) - except Exception: - pass - - return None - - def _build_expanded_results( - self, - relationships: List[sqlite3.Row], - base_scores: Dict[str, float], - max_related: int, - ) -> List[SearchResult]: - """Build SearchResult list from relationships with score decay.""" - results: List[SearchResult] = [] - - for rel in relationships: - source_file = rel["source_file"] - source_symbol = rel["source_symbol"] - target_qname = rel["target_qualified_name"] - rel_type = rel["relationship_type"] - source_line = rel["source_line"] - - # Determine base score from the matched symbol - base_score = base_scores.get(source_symbol, 0.0) - if base_score == 0.0: - # Try matching against the target leaf name - leaf = target_qname.rsplit(".", 1)[-1] if "." in target_qname else target_qname - if "::" in leaf: - leaf = leaf.split("::")[-1] - base_score = base_scores.get(leaf, 0.0) - if base_score == 0.0: - base_score = 0.5 # Default when no match found - - # Apply decay factor - decay = DECAY_FACTORS.get(rel_type, DEFAULT_DECAY) - score = base_score * decay - - # Try to resolve target to file for a richer result - target_info = self._resolve_target_to_file(target_qname) - if target_info: - t_file, t_start, t_end = target_info - results.append(SearchResult( - path=t_file, - score=score, - excerpt=None, - content=None, - start_line=t_start, - end_line=t_end, - symbol_name=( - target_qname.split("::")[-1] - if "::" in target_qname - else target_qname.rsplit(".", 1)[-1] - ), - symbol_kind=None, - metadata={ - "source": "static_graph", - "relationship_type": rel_type, - "from_symbol": source_symbol, - "from_file": source_file, - }, - )) - else: - # Use source file as fallback (we know the source exists) - results.append(SearchResult( - path=source_file, - score=score * 0.8, # Slight penalty for unresolved target - excerpt=None, - content=None, - start_line=source_line, - end_line=source_line, - symbol_name=source_symbol, - symbol_kind=None, - metadata={ - "source": "static_graph", - "relationship_type": rel_type, - "target_qualified_name": target_qname, - }, - )) - - if len(results) >= max_related: - break - - # Sort by score descending - results.sort(key=lambda r: r.score, reverse=True) - return results diff --git a/codex-lens/src/codexlens/search/graph_expander.py b/codex-lens/src/codexlens/search/graph_expander.py deleted file mode 100644 index 73261d53..00000000 --- a/codex-lens/src/codexlens/search/graph_expander.py +++ /dev/null @@ -1,264 +0,0 @@ -"""Graph expansion for search results using precomputed neighbors. - -Expands top search results with related symbol definitions by traversing -precomputed N-hop neighbors stored in the per-directory index databases. -""" - -from __future__ import annotations - -import logging -import sqlite3 -from pathlib import Path -from typing import Dict, List, Optional, Sequence, Tuple - -from codexlens.config import Config -from codexlens.entities import SearchResult -from codexlens.storage.path_mapper import PathMapper - -logger = logging.getLogger(__name__) - - -def _result_key(result: SearchResult) -> Tuple[str, Optional[str], Optional[int], Optional[int]]: - return (result.path, result.symbol_name, result.start_line, result.end_line) - - -def _slice_content_block(content: str, start_line: Optional[int], end_line: Optional[int]) -> Optional[str]: - if content is None: - return None - if start_line is None or end_line is None: - return None - if start_line < 1 or end_line < start_line: - return None - - lines = content.splitlines() - start_idx = max(0, start_line - 1) - end_idx = min(len(lines), end_line) - if start_idx >= len(lines): - return None - return "\n".join(lines[start_idx:end_idx]) - - -class GraphExpander: - """Expands SearchResult lists with related symbols from the code graph.""" - - def __init__(self, mapper: PathMapper, *, config: Optional[Config] = None) -> None: - self._mapper = mapper - self._config = config - self._logger = logging.getLogger(__name__) - - def expand( - self, - results: Sequence[SearchResult], - *, - depth: Optional[int] = None, - max_expand: int = 10, - max_related: int = 50, - ) -> List[SearchResult]: - """Expand top results with related symbols. - - Args: - results: Base ranked results. - depth: Maximum relationship depth to include (defaults to Config or 2). - max_expand: Only expand the top-N base results to bound cost. - max_related: Maximum related results to return. - - Returns: - A list of related SearchResult objects with relationship_depth metadata. - """ - if not results: - return [] - - configured_depth = getattr(self._config, "graph_expansion_depth", 2) if self._config else 2 - max_depth = int(depth if depth is not None else configured_depth) - if max_depth <= 0: - return [] - max_depth = min(max_depth, 2) - - expand_count = max(0, int(max_expand)) - related_limit = max(0, int(max_related)) - if expand_count == 0 or related_limit == 0: - return [] - - seen = {_result_key(r) for r in results} - related_results: List[SearchResult] = [] - conn_cache: Dict[Path, sqlite3.Connection] = {} - - try: - for base in list(results)[:expand_count]: - if len(related_results) >= related_limit: - break - - if not base.symbol_name or not base.path: - continue - - index_path = self._mapper.source_to_index_db(Path(base.path).parent) - conn = conn_cache.get(index_path) - if conn is None: - conn = self._connect_readonly(index_path) - if conn is None: - continue - conn_cache[index_path] = conn - - source_ids = self._resolve_source_symbol_ids( - conn, - file_path=base.path, - symbol_name=base.symbol_name, - symbol_kind=base.symbol_kind, - ) - if not source_ids: - continue - - for source_id in source_ids: - neighbors = self._get_neighbors(conn, source_id, max_depth=max_depth, limit=related_limit) - for neighbor_id, rel_depth in neighbors: - if len(related_results) >= related_limit: - break - row = self._get_symbol_details(conn, neighbor_id) - if row is None: - continue - - path = str(row["full_path"]) - symbol_name = str(row["name"]) - symbol_kind = str(row["kind"]) - start_line = int(row["start_line"]) if row["start_line"] is not None else None - end_line = int(row["end_line"]) if row["end_line"] is not None else None - content_block = _slice_content_block( - str(row["content"]) if row["content"] is not None else "", - start_line, - end_line, - ) - - score = float(base.score) * (0.5 ** int(rel_depth)) - candidate = SearchResult( - path=path, - score=max(0.0, score), - excerpt=None, - content=content_block, - start_line=start_line, - end_line=end_line, - symbol_name=symbol_name, - symbol_kind=symbol_kind, - metadata={"relationship_depth": int(rel_depth)}, - ) - - key = _result_key(candidate) - if key in seen: - continue - seen.add(key) - related_results.append(candidate) - - finally: - for conn in conn_cache.values(): - try: - conn.close() - except Exception: - pass - - return related_results - - def _connect_readonly(self, index_path: Path) -> Optional[sqlite3.Connection]: - try: - if not index_path.exists() or index_path.stat().st_size == 0: - return None - except OSError: - return None - - try: - conn = sqlite3.connect(f"file:{index_path}?mode=ro", uri=True, check_same_thread=False) - conn.row_factory = sqlite3.Row - return conn - except Exception as exc: - self._logger.debug("GraphExpander failed to open %s: %s", index_path, exc) - return None - - def _resolve_source_symbol_ids( - self, - conn: sqlite3.Connection, - *, - file_path: str, - symbol_name: str, - symbol_kind: Optional[str], - ) -> List[int]: - try: - if symbol_kind: - rows = conn.execute( - """ - SELECT s.id - FROM symbols s - JOIN files f ON f.id = s.file_id - WHERE f.full_path = ? AND s.name = ? AND s.kind = ? - """, - (file_path, symbol_name, symbol_kind), - ).fetchall() - else: - rows = conn.execute( - """ - SELECT s.id - FROM symbols s - JOIN files f ON f.id = s.file_id - WHERE f.full_path = ? AND s.name = ? - """, - (file_path, symbol_name), - ).fetchall() - except sqlite3.Error: - return [] - - ids: List[int] = [] - for row in rows: - try: - ids.append(int(row["id"])) - except Exception: - continue - return ids - - def _get_neighbors( - self, - conn: sqlite3.Connection, - source_symbol_id: int, - *, - max_depth: int, - limit: int, - ) -> List[Tuple[int, int]]: - try: - rows = conn.execute( - """ - SELECT neighbor_symbol_id, relationship_depth - FROM graph_neighbors - WHERE source_symbol_id = ? AND relationship_depth <= ? - ORDER BY relationship_depth ASC, neighbor_symbol_id ASC - LIMIT ? - """, - (int(source_symbol_id), int(max_depth), int(limit)), - ).fetchall() - except sqlite3.Error: - return [] - - neighbors: List[Tuple[int, int]] = [] - for row in rows: - try: - neighbors.append((int(row["neighbor_symbol_id"]), int(row["relationship_depth"]))) - except Exception: - continue - return neighbors - - def _get_symbol_details(self, conn: sqlite3.Connection, symbol_id: int) -> Optional[sqlite3.Row]: - try: - return conn.execute( - """ - SELECT - s.id, - s.name, - s.kind, - s.start_line, - s.end_line, - f.full_path, - f.content - FROM symbols s - JOIN files f ON f.id = s.file_id - WHERE s.id = ? - """, - (int(symbol_id),), - ).fetchone() - except sqlite3.Error: - return None - diff --git a/codex-lens/src/codexlens/search/hybrid_search.py b/codex-lens/src/codexlens/search/hybrid_search.py deleted file mode 100644 index 9a300069..00000000 --- a/codex-lens/src/codexlens/search/hybrid_search.py +++ /dev/null @@ -1,1359 +0,0 @@ -"""Hybrid search engine orchestrating parallel exact/fuzzy/vector searches with RRF fusion. - -Coordinates multiple search backends in parallel using ThreadPoolExecutor and combines -results via Reciprocal Rank Fusion (RRF) algorithm. -""" - -from __future__ import annotations - -import logging -import threading -import time -from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError, as_completed -from contextlib import contextmanager -from pathlib import Path -from typing import Any, Dict, List, Optional - - -@contextmanager -def timer(name: str, logger: logging.Logger, level: int = logging.DEBUG): - """Context manager for timing code blocks. - - Args: - name: Name of the operation being timed - logger: Logger instance to use - level: Logging level (default DEBUG) - """ - start = time.perf_counter() - try: - yield - finally: - elapsed_ms = (time.perf_counter() - start) * 1000 - logger.log(level, "[TIMING] %s: %.2fms", name, elapsed_ms) - -from codexlens.config import Config -from codexlens.config import VECTORS_HNSW_NAME -from codexlens.entities import SearchResult -from codexlens.search.ranking import ( - DEFAULT_WEIGHTS as RANKING_DEFAULT_WEIGHTS, - QueryIntent, - apply_symbol_boost, - cross_encoder_rerank, - detect_query_intent, - filter_results_by_category, - get_rrf_weights, - query_prefers_lexical_search, - reciprocal_rank_fusion, - rerank_results, - simple_weighted_fusion, - tag_search_source, -) -from codexlens.storage.dir_index import DirIndexStore -from codexlens.storage.index_filters import filter_index_paths - -# Optional LSP imports (for real-time graph expansion) -try: - from codexlens.lsp import LspBridge, LspGraphBuilder - HAS_LSP = True -except ImportError: - HAS_LSP = False - - -class HybridSearchEngine: - """Hybrid search engine with parallel execution and RRF fusion. - - Orchestrates searches across exact FTS, fuzzy FTS, and optional vector backends, - executing them in parallel and fusing results via Reciprocal Rank Fusion. - - Attributes: - logger: Python logger instance - default_weights: Default RRF weights for each source - """ - - # Public compatibility contract for callers/tests that expect the legacy - # three-backend defaults on the engine instance. - DEFAULT_WEIGHTS = { - "exact": 0.3, - "fuzzy": 0.1, - "vector": 0.6, - } - - def __init__( - self, - weights: Optional[Dict[str, float]] = None, - config: Optional[Config] = None, - embedder: Any = None, - ): - """Initialize hybrid search engine. - - Args: - weights: Optional custom RRF weights (default: DEFAULT_WEIGHTS) - config: Optional runtime config (enables optional reranking features) - embedder: Optional embedder instance for embedding-based reranking - - Raises: - TypeError: If weights is not a dict (e.g., if a Path is passed) - """ - self.logger = logging.getLogger(__name__) - - # Validate weights type to catch common usage errors - if weights is not None and not isinstance(weights, dict): - raise TypeError( - f"weights must be a dict, got {type(weights).__name__}. " - f"Did you mean to pass index_path to search() instead of __init__()?" - ) - - self.weights = weights - self._config = config - self.embedder = embedder - self.reranker: Any = None - self._use_gpu = config.embedding_use_gpu if config else True - self._centralized_cache_lock = threading.RLock() - self._centralized_model_config_cache: Dict[str, Any] = {} - self._centralized_embedder_cache: Dict[tuple[Any, ...], Any] = {} - self._centralized_ann_cache: Dict[tuple[str, int], Any] = {} - self._centralized_query_embedding_cache: Dict[tuple[Any, ...], Any] = {} - - @property - def weights(self) -> Dict[str, float]: - """Public/default weights exposed for backwards compatibility.""" - return dict(self._weights) - - @weights.setter - def weights(self, value: Optional[Dict[str, float]]) -> None: - """Update public and internal fusion weights together.""" - if value is None: - public_weights = self.DEFAULT_WEIGHTS.copy() - fusion_weights = dict(RANKING_DEFAULT_WEIGHTS) - fusion_weights.update(public_weights) - else: - if not isinstance(value, dict): - raise TypeError(f"weights must be a dict, got {type(value).__name__}") - public_weights = dict(value) - fusion_weights = dict(value) - - self._weights = public_weights - self._fusion_weights = fusion_weights - - @staticmethod - def _clamp_search_score(score: float) -> float: - """Keep ANN-derived similarity scores within SearchResult's valid domain.""" - - return max(0.0, float(score)) - - def _get_centralized_model_config(self, index_root: Path) -> Optional[Dict[str, Any]]: - """Load and cache the centralized embedding model config for an index root.""" - root_key = str(Path(index_root).resolve()) - - with self._centralized_cache_lock: - if root_key in self._centralized_model_config_cache: - cached = self._centralized_model_config_cache[root_key] - return dict(cached) if isinstance(cached, dict) else None - - model_config: Optional[Dict[str, Any]] = None - try: - from codexlens.semantic.vector_store import VectorStore - - central_index_path = Path(root_key) / "_index.db" - if central_index_path.exists(): - with VectorStore(central_index_path) as vs: - loaded = vs.get_model_config() - if isinstance(loaded, dict): - model_config = dict(loaded) - self.logger.debug( - "Loaded model config from centralized index: %s", - model_config, - ) - except Exception as exc: - self.logger.debug( - "Failed to load model config from centralized index: %s", - exc, - ) - - with self._centralized_cache_lock: - self._centralized_model_config_cache[root_key] = ( - dict(model_config) if isinstance(model_config, dict) else None - ) - - return dict(model_config) if isinstance(model_config, dict) else None - - def _get_centralized_embedder( - self, - model_config: Optional[Dict[str, Any]], - ) -> tuple[Any, int, tuple[Any, ...]]: - """Resolve and cache the embedder used for centralized vector search.""" - from codexlens.semantic.factory import get_embedder - - backend = "fastembed" - model_name: Optional[str] = None - model_profile = "code" - use_gpu = bool(self._use_gpu) - embedding_dim: Optional[int] = None - - if model_config: - backend = str(model_config.get("backend", "fastembed") or "fastembed") - model_name = model_config.get("model_name") - model_profile = str(model_config.get("model_profile", "code") or "code") - raw_dim = model_config.get("embedding_dim") - embedding_dim = int(raw_dim) if raw_dim else None - - if backend == "litellm": - embedder_key: tuple[Any, ...] = ("litellm", model_name or "", None) - else: - embedder_key = ("fastembed", model_profile, use_gpu) - - with self._centralized_cache_lock: - cached = self._centralized_embedder_cache.get(embedder_key) - if cached is None: - if backend == "litellm": - cached = get_embedder(backend="litellm", model=model_name) - else: - cached = get_embedder( - backend="fastembed", - profile=model_profile, - use_gpu=use_gpu, - ) - with self._centralized_cache_lock: - existing = self._centralized_embedder_cache.get(embedder_key) - if existing is None: - self._centralized_embedder_cache[embedder_key] = cached - else: - cached = existing - - if embedding_dim is None: - embedding_dim = int(getattr(cached, "embedding_dim", 0) or 0) - - return cached, embedding_dim, embedder_key - - def _get_centralized_ann_index(self, index_root: Path, dim: int) -> Any: - """Load and cache a centralized ANN index for repeated searches.""" - from codexlens.semantic.ann_index import ANNIndex - - resolved_root = Path(index_root).resolve() - cache_key = (str(resolved_root), int(dim)) - - with self._centralized_cache_lock: - cached = self._centralized_ann_cache.get(cache_key) - if cached is not None: - return cached - - ann_index = ANNIndex.create_central(index_root=resolved_root, dim=int(dim)) - if not ann_index.load(): - return None - - with self._centralized_cache_lock: - existing = self._centralized_ann_cache.get(cache_key) - if existing is None: - self._centralized_ann_cache[cache_key] = ann_index - return ann_index - return existing - - def _get_cached_query_embedding( - self, - query: str, - embedder: Any, - embedder_key: tuple[Any, ...], - ) -> Any: - """Cache repeated query embeddings for the same embedder settings.""" - cache_key = embedder_key + (query,) - - with self._centralized_cache_lock: - cached = self._centralized_query_embedding_cache.get(cache_key) - if cached is not None: - return cached - - query_embedding = embedder.embed_single(query) - with self._centralized_cache_lock: - existing = self._centralized_query_embedding_cache.get(cache_key) - if existing is None: - self._centralized_query_embedding_cache[cache_key] = query_embedding - return query_embedding - return existing - - def search( - self, - index_path: Path, - query: str, - limit: int = 20, - enable_fuzzy: bool = True, - enable_vector: bool = False, - pure_vector: bool = False, - enable_lsp_graph: bool = False, - lsp_max_depth: int = 1, - lsp_max_nodes: int = 20, - ) -> List[SearchResult]: - """Execute hybrid search with parallel retrieval and RRF fusion. - - Args: - index_path: Path to _index.db file - query: FTS5 query string (for FTS) or natural language query (for vector) - limit: Maximum results to return after fusion - enable_fuzzy: Enable fuzzy FTS search (default True) - enable_vector: Enable vector search (default False) - pure_vector: If True, only use vector search without FTS fallback (default False) - enable_lsp_graph: If True, enable real-time LSP graph expansion (default False) - lsp_max_depth: Maximum depth for LSP graph BFS expansion (default 1) - lsp_max_nodes: Maximum nodes to collect in LSP graph (default 20) - - Returns: - List of SearchResult objects sorted by fusion score - - Examples: - >>> engine = HybridSearchEngine() - >>> # Hybrid search (exact + fuzzy + vector) - >>> results = engine.search(Path("project/_index.db"), "authentication", - ... enable_vector=True) - >>> # Pure vector search (semantic only) - >>> results = engine.search(Path("project/_index.db"), - ... "how to authenticate users", - ... enable_vector=True, pure_vector=True) - >>> # With LSP graph expansion (real-time) - >>> results = engine.search(Path("project/_index.db"), "auth flow", - ... enable_vector=True, enable_lsp_graph=True) - >>> for r in results[:5]: - ... print(f"{r.path}: {r.score:.3f}") - """ - # Defensive: avoid creating/locking an index database when callers pass - # an empty placeholder file (common in tests and misconfigured callers). - try: - if index_path.exists() and index_path.stat().st_size == 0: - return [] - except OSError: - return [] - - # Detect query intent early for category filtering at index level - query_intent = detect_query_intent(query) - lexical_priority_query = query_prefers_lexical_search(query) - # Map intent to category for vector search: - # - KEYWORD (code intent) -> filter to 'code' only - # - SEMANTIC (doc intent) -> no filter (allow docs to surface) - # - MIXED -> no filter (allow all) - vector_category: Optional[str] = None - if query_intent == QueryIntent.KEYWORD: - vector_category = "code" - - # Determine which backends to use - backends = {} - - if pure_vector: - # Pure vector mode: only use vector search, no FTS fallback - if enable_vector: - backends["vector"] = True - else: - # Invalid configuration: pure_vector=True but enable_vector=False - self.logger.warning( - "pure_vector=True requires enable_vector=True. " - "Falling back to exact search. " - "To use pure vector search, enable vector search mode." - ) - backends["exact"] = True - else: - # Standard hybrid mode: FTS + optional vector - backends["exact"] = True - if enable_fuzzy: - backends["fuzzy"] = True - if enable_vector and not lexical_priority_query: - backends["vector"] = True - - # Add LSP graph expansion if requested and available - if enable_lsp_graph and HAS_LSP and not lexical_priority_query: - backends["lsp_graph"] = True - elif enable_lsp_graph and not HAS_LSP: - self.logger.warning( - "LSP graph search requested but dependencies not available. " - "Install: pip install aiohttp" - ) - - # Execute parallel searches - with timer("parallel_search_total", self.logger): - results_map = self._search_parallel( - index_path, query, backends, limit, vector_category, - lsp_max_depth, lsp_max_nodes - ) - - # Provide helpful message if pure-vector mode returns no results - if pure_vector and enable_vector and len(results_map.get("vector", [])) == 0: - self.logger.warning( - "Pure vector search returned no results. " - "This usually means embeddings haven't been generated. " - "Run: codexlens embeddings-generate %s", - index_path.parent if index_path.name == "_index.db" else index_path - ) - - # Apply RRF fusion - # Filter weights to only active backends - active_weights = { - source: weight - for source, weight in self._fusion_weights.items() - if source in results_map - } - - # Determine fusion method from config (default: rrf) - fusion_method = "rrf" - rrf_k = 60 - if self._config is not None: - fusion_method = getattr(self._config, "fusion_method", "rrf") or "rrf" - rrf_k = getattr(self._config, "rrf_k", 60) or 60 - - with timer("fusion", self.logger): - adaptive_weights = get_rrf_weights(query, active_weights) - if fusion_method == "simple": - fused_results = simple_weighted_fusion(results_map, adaptive_weights) - else: - # Default to RRF - fused_results = reciprocal_rank_fusion( - results_map, adaptive_weights, k=rrf_k - ) - - # Optional: boost results that include explicit symbol matches - boost_factor = ( - self._config.symbol_boost_factor - if self._config is not None - else 1.5 - ) - with timer("symbol_boost", self.logger): - fused_results = apply_symbol_boost( - fused_results, boost_factor=boost_factor - ) - - # Optional: embedding-based reranking on top results - if ( - self._config is not None - and self._config.enable_reranking - and not lexical_priority_query - ): - with timer("reranking", self.logger): - if self.embedder is None: - with self._centralized_cache_lock: - if self.embedder is None: - self.embedder = self._get_reranking_embedder() - fused_results = rerank_results( - query, - fused_results[:100], - self.embedder, - top_k=( - 100 - if self._config.enable_cross_encoder_rerank - else self._config.reranking_top_k - ), - ) - - # Optional: cross-encoder reranking as a second stage - if ( - self._config is not None - and self._config.enable_reranking - and self._config.enable_cross_encoder_rerank - and not lexical_priority_query - ): - with timer("cross_encoder_rerank", self.logger): - if self.reranker is None: - with self._centralized_cache_lock: - if self.reranker is None: - self.reranker = self._get_cross_encoder_reranker() - if self.reranker is not None: - fused_results = cross_encoder_rerank( - query, - fused_results, - self.reranker, - top_k=self._config.reranker_top_k, - ) - - # Apply category filtering to avoid code/doc pollution - # This ensures KEYWORD queries return code files, SEMANTIC queries prefer docs - enable_category_filter = ( - self._config is None - or getattr(self._config, 'enable_category_filter', True) - ) - if enable_category_filter and not pure_vector: - with timer("category_filter", self.logger): - query_intent = detect_query_intent(query) - fused_results = filter_results_by_category( - fused_results, query_intent, allow_mixed=True - ) - - # Apply final limit - return fused_results[:limit] - - def _get_reranking_embedder(self) -> Any: - """Create an embedder for reranking based on Config embedding settings.""" - if self._config is None: - return None - - try: - from codexlens.semantic.factory import get_embedder - except Exception as exc: - self.logger.debug("Reranking embedder unavailable: %s", exc) - return None - - try: - if self._config.embedding_backend == "fastembed": - return get_embedder( - backend="fastembed", - profile=self._config.embedding_model, - use_gpu=self._config.embedding_use_gpu, - ) - if self._config.embedding_backend == "litellm": - return get_embedder( - backend="litellm", - model=self._config.embedding_model, - endpoints=self._config.embedding_endpoints, - strategy=self._config.embedding_strategy, - cooldown=self._config.embedding_cooldown, - ) - except Exception as exc: - self.logger.debug("Failed to initialize reranking embedder: %s", exc) - return None - - self.logger.debug( - "Unknown embedding backend for reranking: %s", - self._config.embedding_backend, - ) - return None - - def _get_cross_encoder_reranker(self) -> Any: - if self._config is None: - return None - - try: - from codexlens.semantic.reranker import ( - check_reranker_available, - get_reranker, - ) - except Exception as exc: - self.logger.debug("Reranker factory unavailable: %s", exc) - return None - - backend = (getattr(self._config, "reranker_backend", "") or "").strip().lower() or "onnx" - - ok, err = check_reranker_available(backend) - if not ok: - self.logger.debug( - "Reranker backend unavailable (backend=%s): %s", - backend, - err, - ) - return None - - try: - model_name = (getattr(self._config, "reranker_model", "") or "").strip() or None - - if backend != "legacy" and model_name == "cross-encoder/ms-marco-MiniLM-L-6-v2": - model_name = None - - device: str | None = None - kwargs: dict[str, Any] = {} - reranker_use_gpu = bool( - getattr( - self._config, - "reranker_use_gpu", - getattr(self._config, "embedding_use_gpu", True), - ) - ) - - if backend == "onnx": - kwargs["use_gpu"] = reranker_use_gpu - elif backend == "legacy": - if not reranker_use_gpu: - device = "cpu" - elif backend == "api": - # Pass max_input_tokens for adaptive batching - max_tokens = getattr(self._config, "reranker_max_input_tokens", None) - if max_tokens: - kwargs["max_input_tokens"] = max_tokens - - return get_reranker( - backend=backend, - model_name=model_name, - device=device, - **kwargs, - ) - except Exception as exc: - self.logger.debug( - "Failed to initialize reranker (backend=%s): %s", - backend, - exc, - ) - return None - - def _search_parallel( - self, - index_path: Path, - query: str, - backends: Dict[str, bool], - limit: int, - category: Optional[str] = None, - lsp_max_depth: int = 1, - lsp_max_nodes: int = 20, - ) -> Dict[str, List[SearchResult]]: - """Execute parallel searches across enabled backends. - - Args: - index_path: Path to _index.db file - query: FTS5 query string - backends: Dictionary of backend name to enabled flag - limit: Results limit per backend - category: Optional category filter for vector search ('code' or 'doc') - lsp_max_depth: Maximum depth for LSP graph BFS expansion (default 1) - lsp_max_nodes: Maximum nodes to collect in LSP graph (default 20) - - Returns: - Dictionary mapping source name to results list - """ - results_map: Dict[str, List[SearchResult]] = {} - timing_data: Dict[str, float] = {} - - # Use ThreadPoolExecutor for parallel I/O-bound searches - with ThreadPoolExecutor(max_workers=len(backends)) as executor: - # Submit search tasks with timing - future_to_source = {} - submit_times = {} - - if backends.get("exact"): - submit_times["exact"] = time.perf_counter() - future = executor.submit( - self._search_exact, index_path, query, limit - ) - future_to_source[future] = "exact" - - if backends.get("fuzzy"): - submit_times["fuzzy"] = time.perf_counter() - future = executor.submit( - self._search_fuzzy, index_path, query, limit - ) - future_to_source[future] = "fuzzy" - - if backends.get("vector"): - submit_times["vector"] = time.perf_counter() - future = executor.submit( - self._search_vector, index_path, query, limit, category - ) - future_to_source[future] = "vector" - - if backends.get("lsp_graph"): - submit_times["lsp_graph"] = time.perf_counter() - future = executor.submit( - self._search_lsp_graph, index_path, query, limit, - lsp_max_depth, lsp_max_nodes - ) - future_to_source[future] = "lsp_graph" - - # Collect results as they complete with timeout protection - try: - for future in as_completed(future_to_source, timeout=30.0): - source = future_to_source[future] - elapsed_ms = (time.perf_counter() - submit_times[source]) * 1000 - timing_data[source] = elapsed_ms - try: - results = future.result(timeout=10.0) - # Tag results with source for debugging - tagged_results = tag_search_source(results, source) - results_map[source] = tagged_results - self.logger.debug( - "[TIMING] %s_search: %.2fms (%d results)", - source, elapsed_ms, len(results) - ) - except (Exception, FuturesTimeoutError) as exc: - self.logger.error("Search failed for %s: %s", source, exc) - results_map[source] = [] - except FuturesTimeoutError: - self.logger.warning("Search timeout: some backends did not respond in time") - # Cancel remaining futures - for future in future_to_source: - future.cancel() - # Set empty results for sources that didn't complete - for source in backends: - if source not in results_map: - results_map[source] = [] - - # Log timing summary - if timing_data: - timing_str = ", ".join(f"{k}={v:.1f}ms" for k, v in timing_data.items()) - self.logger.debug("[TIMING] search_backends: {%s}", timing_str) - - return results_map - - def _search_exact( - self, index_path: Path, query: str, limit: int - ) -> List[SearchResult]: - """Execute exact FTS search using unicode61 tokenizer. - - Args: - index_path: Path to _index.db file - query: FTS5 query string - limit: Maximum results - - Returns: - List of SearchResult objects - """ - try: - with DirIndexStore(index_path) as store: - return store.search_fts_exact( - query, limit=limit, return_full_content=True - ) - except Exception as exc: - self.logger.debug("Exact search error: %s", exc) - return [] - - def _search_fuzzy( - self, index_path: Path, query: str, limit: int - ) -> List[SearchResult]: - """Execute fuzzy FTS search using trigram/extended unicode61 tokenizer. - - Args: - index_path: Path to _index.db file - query: FTS5 query string - limit: Maximum results - - Returns: - List of SearchResult objects - """ - try: - with DirIndexStore(index_path) as store: - return store.search_fts_fuzzy( - query, limit=limit, return_full_content=True - ) - except Exception as exc: - self.logger.debug("Fuzzy search error: %s", exc) - return [] - - def _find_vectors_hnsw(self, index_path: Path) -> Optional[Path]: - """Find the centralized _vectors.hnsw file by traversing up from index_path. - - Searches for the centralized dense vector index file in parent directories. - - Args: - index_path: Path to the current _index.db file - - Returns: - Path to _vectors.hnsw if found, None otherwise - """ - current_dir = index_path.parent - for _ in range(10): # Limit search depth - candidate = current_dir / VECTORS_HNSW_NAME - if candidate.exists(): - return candidate - parent = current_dir.parent - if parent == current_dir: # Reached root - break - current_dir = parent - return None - - def _search_vector_centralized( - self, - index_path: Path, - hnsw_path: Path, - query: str, - limit: int, - category: Optional[str] = None, - ) -> List[SearchResult]: - """Search using centralized vector index. - - Args: - index_path: Path to _index.db file (for metadata lookup) - hnsw_path: Path to centralized _vectors.hnsw file - query: Natural language query string - limit: Maximum results - category: Optional category filter ('code' or 'doc') - - Returns: - List of SearchResult objects ordered by semantic similarity - """ - try: - index_root = hnsw_path.parent - model_config = self._get_centralized_model_config(index_root) - if model_config is None: - self.logger.debug("Model config not found, will detect from cached embedder") - embedder, embedding_dim, embedder_key = self._get_centralized_embedder(model_config) - - # Load centralized ANN index - start_load = time.perf_counter() - ann_index = self._get_centralized_ann_index(index_root=index_root, dim=embedding_dim) - if ann_index is None: - self.logger.warning("Failed to load centralized vector index from %s", hnsw_path) - return [] - self.logger.debug( - "[TIMING] central_ann_load: %.2fms (%d vectors)", - (time.perf_counter() - start_load) * 1000, - ann_index.count() - ) - - # Generate query embedding - start_embed = time.perf_counter() - query_embedding = self._get_cached_query_embedding(query, embedder, embedder_key) - self.logger.debug( - "[TIMING] query_embedding: %.2fms", - (time.perf_counter() - start_embed) * 1000 - ) - - # Search ANN index - start_search = time.perf_counter() - import numpy as np - query_vec = np.array(query_embedding, dtype=np.float32) - ids, distances = ann_index.search(query_vec, top_k=limit * 2) # Fetch extra for filtering - self.logger.debug( - "[TIMING] central_ann_search: %.2fms (%d results)", - (time.perf_counter() - start_search) * 1000, - len(ids) if ids else 0 - ) - - if not ids: - return [] - - # Convert distances to similarity scores (for cosine: score = 1 - distance) - scores = [self._clamp_search_score(1.0 - d) for d in distances] - - # Fetch chunk metadata from semantic_chunks tables - # We need to search across all _index.db files in the project - results = self._fetch_chunks_by_ids_centralized( - index_root, ids, scores, category - ) - - return results[:limit] - - except ImportError as exc: - self.logger.debug("Semantic dependencies not available: %s", exc) - return [] - except Exception as exc: - self.logger.error("Centralized vector search error: %s", exc) - return [] - - def _fetch_chunks_by_ids_centralized( - self, - index_root: Path, - chunk_ids: List[int], - scores: List[float], - category: Optional[str] = None, - ) -> List[SearchResult]: - """Fetch chunk metadata from centralized _vectors_meta.db for fast lookup. - - This method uses the centralized VectorMetadataStore for O(1) lookup - instead of traversing all _index.db files (O(n) where n = number of indexes). - - Falls back to the legacy per-index lookup if centralized metadata is unavailable. - - Args: - index_root: Root directory containing _vectors_meta.db - chunk_ids: List of chunk IDs from ANN search - scores: Corresponding similarity scores - category: Optional category filter - - Returns: - List of SearchResult objects - """ - from codexlens.config import VECTORS_META_DB_NAME - - # Build score map - score_map = {cid: score for cid, score in zip(chunk_ids, scores)} - - # Try centralized metadata store first (fast path) - vectors_meta_path = index_root / VECTORS_META_DB_NAME - if vectors_meta_path.exists(): - try: - return self._fetch_from_vector_meta_store( - vectors_meta_path, chunk_ids, score_map, category - ) - except Exception as e: - self.logger.warning( - "Centralized metadata lookup failed, falling back to legacy traversal: %s. " - "Consider regenerating embeddings with: codexlens embeddings-generate --centralized", - e - ) - - # Fallback: traverse _index.db files (legacy path) - return self._fetch_chunks_by_ids_legacy( - index_root, chunk_ids, score_map, category - ) - - def _fetch_from_vector_meta_store( - self, - meta_db_path: Path, - chunk_ids: List[int], - score_map: Dict[int, float], - category: Optional[str] = None, - ) -> List[SearchResult]: - """Fetch chunks from centralized VectorMetadataStore. - - Args: - meta_db_path: Path to _vectors_meta.db - chunk_ids: List of chunk IDs to fetch - score_map: Mapping of chunk_id to score - category: Optional category filter - - Returns: - List of SearchResult objects - """ - from codexlens.storage.vector_meta_store import VectorMetadataStore - - results = [] - - with VectorMetadataStore(meta_db_path) as meta_store: - rows = meta_store.get_chunks_by_ids(chunk_ids, category=category) - - for row in rows: - chunk_id = row["chunk_id"] - file_path = row["file_path"] - content = row["content"] or "" - metadata = row.get("metadata") or {} - start_line = row.get("start_line") - end_line = row.get("end_line") - - score = self._clamp_search_score(score_map.get(chunk_id, 0.0)) - - # Build excerpt - excerpt = content[:200] + "..." if len(content) > 200 else content - - # Extract symbol information - symbol_name = metadata.get("symbol_name") - symbol_kind = metadata.get("symbol_kind") - - # Build Symbol object if available - symbol = None - if symbol_name and symbol_kind and start_line and end_line: - try: - from codexlens.entities import Symbol - symbol = Symbol( - name=symbol_name, - kind=symbol_kind, - range=(start_line, end_line) - ) - except Exception: - pass - - results.append(SearchResult( - path=file_path, - score=score, - excerpt=excerpt, - content=content, - symbol=symbol, - metadata=metadata, - start_line=start_line, - end_line=end_line, - symbol_name=symbol_name, - symbol_kind=symbol_kind, - )) - - # Sort by score descending - results.sort(key=lambda r: r.score, reverse=True) - return results - - def _fetch_chunks_by_ids_legacy( - self, - index_root: Path, - chunk_ids: List[int], - score_map: Dict[int, float], - category: Optional[str] = None, - ) -> List[SearchResult]: - """Legacy fallback: fetch chunk metadata by traversing all _index.db files. - - This is the O(n) fallback path used when centralized metadata is unavailable. - - Args: - index_root: Root directory containing _index.db files - chunk_ids: List of chunk IDs from ANN search - score_map: Mapping of chunk_id to score - category: Optional category filter - - Returns: - List of SearchResult objects - """ - import sqlite3 - import json - - # Find all _index.db files - index_files = filter_index_paths(index_root.rglob("_index.db"), index_root) - - results = [] - found_ids = set() - - for index_path in index_files: - try: - with sqlite3.connect(index_path) as conn: - conn.row_factory = sqlite3.Row - - # Check if semantic_chunks table exists - cursor = conn.execute( - "SELECT name FROM sqlite_master WHERE type='table' AND name='semantic_chunks'" - ) - if cursor.fetchone() is None: - continue - - # Build query for chunk IDs we haven't found yet - remaining_ids = [cid for cid in chunk_ids if cid not in found_ids] - if not remaining_ids: - break - - placeholders = ",".join("?" * len(remaining_ids)) - - if category: - query = f""" - SELECT id, file_path, content, metadata - FROM semantic_chunks - WHERE id IN ({placeholders}) AND category = ? - """ - params = remaining_ids + [category] - else: - query = f""" - SELECT id, file_path, content, metadata - FROM semantic_chunks - WHERE id IN ({placeholders}) - """ - params = remaining_ids - - rows = conn.execute(query, params).fetchall() - - for row in rows: - chunk_id = row["id"] - if chunk_id in found_ids: - continue - found_ids.add(chunk_id) - - file_path = row["file_path"] - content = row["content"] - metadata_json = row["metadata"] - metadata = json.loads(metadata_json) if metadata_json else {} - - score = self._clamp_search_score(score_map.get(chunk_id, 0.0)) - - # Build excerpt - excerpt = content[:200] + "..." if len(content) > 200 else content - - # Extract symbol information - symbol_name = metadata.get("symbol_name") - symbol_kind = metadata.get("symbol_kind") - start_line = metadata.get("start_line") - end_line = metadata.get("end_line") - - # Build Symbol object if available - symbol = None - if symbol_name and symbol_kind and start_line and end_line: - try: - from codexlens.entities import Symbol - symbol = Symbol( - name=symbol_name, - kind=symbol_kind, - range=(start_line, end_line) - ) - except Exception: - pass - - results.append(SearchResult( - path=file_path, - score=score, - excerpt=excerpt, - content=content, - symbol=symbol, - metadata=metadata, - start_line=start_line, - end_line=end_line, - symbol_name=symbol_name, - symbol_kind=symbol_kind, - )) - - except Exception as e: - self.logger.debug("Failed to fetch chunks from %s: %s", index_path, e) - continue - - # Sort by score descending - results.sort(key=lambda r: r.score, reverse=True) - return results - - def _search_vector( - self, index_path: Path, query: str, limit: int, category: Optional[str] = None - ) -> List[SearchResult]: - """Execute vector similarity search using semantic embeddings. - - Supports both centralized vector storage (single _vectors.hnsw at project root) - and distributed storage (per-directory .hnsw files). - - Args: - index_path: Path to _index.db file - query: Natural language query string - limit: Maximum results - category: Optional category filter ('code' or 'doc') - - Returns: - List of SearchResult objects ordered by semantic similarity - """ - try: - # First, check for centralized vector index - central_hnsw_path = self._find_vectors_hnsw(index_path) - if central_hnsw_path is not None: - self.logger.debug("Found centralized vector index at %s", central_hnsw_path) - return self._search_vector_centralized( - index_path, central_hnsw_path, query, limit, category - ) - - # Fallback to distributed (per-index) vector storage - # Check if semantic chunks table exists - import sqlite3 - - start_check = time.perf_counter() - try: - with sqlite3.connect(index_path) as conn: - cursor = conn.execute( - "SELECT name FROM sqlite_master WHERE type='table' AND name='semantic_chunks'" - ) - has_semantic_table = cursor.fetchone() is not None - except sqlite3.Error as e: - self.logger.error("Database check failed in vector search: %s", e) - return [] - self.logger.debug( - "[TIMING] vector_table_check: %.2fms", - (time.perf_counter() - start_check) * 1000 - ) - - if not has_semantic_table: - self.logger.info( - "No embeddings found in index. " - "Generate embeddings with: codexlens embeddings-generate %s", - index_path.parent if index_path.name == "_index.db" else index_path - ) - return [] - - # Initialize embedder and vector store - from codexlens.semantic.factory import get_embedder - from codexlens.semantic.vector_store import VectorStore - - start_init = time.perf_counter() - vector_store = VectorStore(index_path) - self.logger.debug( - "[TIMING] vector_store_init: %.2fms", - (time.perf_counter() - start_init) * 1000 - ) - - # Check if vector store has data - if vector_store.count_chunks() == 0: - self.logger.info( - "Vector store is empty (0 chunks). " - "Generate embeddings with: codexlens embeddings-generate %s", - index_path.parent if index_path.name == "_index.db" else index_path - ) - return [] - - # Get stored model configuration (preferred) or auto-detect from dimension - start_embedder = time.perf_counter() - model_config = vector_store.get_model_config() - if model_config: - backend = model_config.get("backend", "fastembed") - model_name = model_config["model_name"] - model_profile = model_config["model_profile"] - self.logger.debug( - "Using stored model config: %s backend, %s (%s, %dd)", - backend, model_profile, model_name, model_config["embedding_dim"] - ) - - # Get embedder based on backend - if backend == "litellm": - embedder = get_embedder(backend="litellm", model=model_name) - else: - embedder = get_embedder(backend="fastembed", profile=model_profile) - else: - # Fallback: auto-detect from embedding dimension - detected_dim = vector_store.dimension - if detected_dim is None: - self.logger.info("Vector store dimension unknown, using default profile") - embedder = get_embedder(backend="fastembed", profile="code") - elif detected_dim == 384: - embedder = get_embedder(backend="fastembed", profile="fast") - elif detected_dim == 768: - embedder = get_embedder(backend="fastembed", profile="code") - elif detected_dim == 1024: - embedder = get_embedder(backend="fastembed", profile="multilingual") - elif detected_dim == 1536: - # Likely OpenAI text-embedding-3-small or ada-002 - self.logger.info( - "Detected 1536-dim embeddings (likely OpenAI), using litellm backend with text-embedding-3-small" - ) - embedder = get_embedder(backend="litellm", model="text-embedding-3-small") - elif detected_dim == 3072: - # Likely OpenAI text-embedding-3-large - self.logger.info( - "Detected 3072-dim embeddings (likely OpenAI), using litellm backend with text-embedding-3-large" - ) - embedder = get_embedder(backend="litellm", model="text-embedding-3-large") - else: - self.logger.debug( - "Unknown dimension %s, using default fastembed profile 'code'", - detected_dim - ) - embedder = get_embedder(backend="fastembed", profile="code") - self.logger.debug( - "[TIMING] embedder_init: %.2fms", - (time.perf_counter() - start_embedder) * 1000 - ) - - # Generate query embedding - start_embed = time.perf_counter() - query_embedding = embedder.embed_single(query) - self.logger.debug( - "[TIMING] query_embedding: %.2fms", - (time.perf_counter() - start_embed) * 1000 - ) - - # Search for similar chunks - start_search = time.perf_counter() - results = vector_store.search_similar( - query_embedding=query_embedding, - top_k=limit, - min_score=0.0, # Return all results, let RRF handle filtering - return_full_content=True, - category=category, - ) - self.logger.debug( - "[TIMING] vector_similarity_search: %.2fms (%d results)", - (time.perf_counter() - start_search) * 1000, len(results) - ) - - return results - - except ImportError as exc: - self.logger.debug("Semantic dependencies not available: %s", exc) - return [] - except Exception as exc: - self.logger.error("Vector search error: %s", exc) - return [] - - def _search_lsp_graph( - self, - index_path: Path, - query: str, - limit: int, - max_depth: int = 1, - max_nodes: int = 20, - ) -> List[SearchResult]: - """Execute LSP-based graph expansion search. - - Uses real-time LSP to expand from seed results and find related code. - This provides accurate, up-to-date code relationships. - - Args: - index_path: Path to _index.db file - query: Natural language query string - limit: Maximum results - max_depth: Maximum depth for LSP graph BFS expansion (default 1) - max_nodes: Maximum nodes to collect in LSP graph (default 20) - - Returns: - List of SearchResult from graph expansion - """ - import asyncio - - if not HAS_LSP: - self.logger.debug("LSP dependencies not available") - return [] - - try: - # Try multiple seed sources in priority order - seeds = [] - seed_source = "none" - - # 1. Try vector search first (best semantic match) - seeds = self._search_vector(index_path, query, limit=3, category="code") - if seeds: - seed_source = "vector" - - # 2. Fallback to exact FTS if vector returns nothing - if not seeds: - self.logger.debug("Vector search returned no seeds, trying exact FTS") - seeds = self._search_exact(index_path, query, limit=3) - if seeds: - seed_source = "exact_fts" - - # 3. No seeds available from any source - if not seeds: - self.logger.debug("No seed results available for LSP graph expansion") - return [] - - self.logger.debug( - "LSP graph expansion using %d seeds from %s", - len(seeds), - seed_source, - ) - - # Convert SearchResult to CodeSymbolNode for LSP processing - from codexlens.hybrid_search.data_structures import CodeSymbolNode, Range - - seed_nodes = [] - for seed in seeds: - try: - node = CodeSymbolNode( - id=f"{seed.path}:{seed.symbol_name or 'unknown'}:{seed.start_line or 0}", - name=seed.symbol_name or "unknown", - kind=seed.symbol_kind or "unknown", - file_path=seed.path, - range=Range( - start_line=seed.start_line or 1, - start_character=0, - end_line=seed.end_line or seed.start_line or 1, - end_character=0, - ), - raw_code=seed.content or "", - docstring=seed.excerpt or "", - ) - seed_nodes.append(node) - except Exception as e: - self.logger.debug("Failed to create seed node: %s", e) - continue - - if not seed_nodes: - return [] - - # Run async LSP expansion in sync context - async def expand_graph(): - async with LspBridge() as bridge: - builder = LspGraphBuilder(max_depth=max_depth, max_nodes=max_nodes) - graph = await builder.build_from_seeds(seed_nodes, bridge) - return graph - - # Run the async code - try: - loop = asyncio.get_event_loop() - if loop.is_running(): - # Already in async context - use run_coroutine_threadsafe - import concurrent.futures - future = asyncio.run_coroutine_threadsafe(expand_graph(), loop) - graph = future.result(timeout=5.0) - else: - graph = loop.run_until_complete(expand_graph()) - except RuntimeError: - # No event loop - create new one - graph = asyncio.run(expand_graph()) - - # Convert graph nodes to SearchResult - # Create set of seed identifiers for fast lookup - seed_ids = set() - for seed in seeds: - seed_id = f"{seed.path}:{seed.symbol_name or 'unknown'}:{seed.start_line or 0}" - seed_ids.add(seed_id) - - results = [] - for node_id, node in graph.nodes.items(): - # Skip seed nodes using ID comparison (already in other results) - if node_id in seed_ids or node.id in seed_ids: - continue - - # Calculate score based on graph position - # Nodes closer to seeds get higher scores - depth = 1 # Simple heuristic, could be improved - score = 0.8 / (1 + depth) # Score decreases with depth - - results.append(SearchResult( - path=node.file_path, - score=score, - excerpt=node.docstring[:200] if node.docstring else node.raw_code[:200] if node.raw_code else "", - content=node.raw_code, - symbol=None, - metadata={"lsp_node_id": node_id, "lsp_kind": node.kind}, - start_line=node.range.start_line, - end_line=node.range.end_line, - symbol_name=node.name, - symbol_kind=node.kind, - )) - - # Sort by score - results.sort(key=lambda r: r.score, reverse=True) - return results[:limit] - - except Exception as exc: - self.logger.debug("LSP graph search error: %s", exc) - return [] diff --git a/codex-lens/src/codexlens/search/query_parser.py b/codex-lens/src/codexlens/search/query_parser.py deleted file mode 100644 index 05b337f5..00000000 --- a/codex-lens/src/codexlens/search/query_parser.py +++ /dev/null @@ -1,242 +0,0 @@ -"""Query preprocessing for CodexLens search. - -Provides query expansion for better identifier matching: -- CamelCase splitting: UserAuth → User OR Auth -- snake_case splitting: user_auth → user OR auth -- Preserves original query for exact matching -""" - -from __future__ import annotations - -import logging -import re -from typing import Set, List - -log = logging.getLogger(__name__) - - -class QueryParser: - """Parser for preprocessing search queries before FTS5 execution. - - Expands identifier-style queries (CamelCase, snake_case) into OR queries - to improve recall when searching for code symbols. - - Example transformations: - - 'UserAuth' → 'UserAuth OR User OR Auth' - - 'user_auth' → 'user_auth OR user OR auth' - - 'getUserData' → 'getUserData OR get OR User OR Data' - """ - - # Patterns for identifier splitting - CAMEL_CASE_PATTERN = re.compile(r'([a-z])([A-Z])') - SNAKE_CASE_PATTERN = re.compile(r'_+') - KEBAB_CASE_PATTERN = re.compile(r'-+') - - # Minimum token length to include in expansion (avoid noise from single chars) - MIN_TOKEN_LENGTH = 2 - - # All-caps acronyms pattern (e.g., HTTP, SQL, API) - ALL_CAPS_PATTERN = re.compile(r'^[A-Z]{2,}$') - - def __init__(self, enable: bool = True, min_token_length: int = 2): - """Initialize query parser. - - Args: - enable: Whether to enable query preprocessing - min_token_length: Minimum token length to include in expansion - """ - self.enable = enable - self.min_token_length = min_token_length - - def preprocess_query(self, query: str) -> str: - """Preprocess query with identifier expansion. - - Args: - query: Original search query - - Returns: - Expanded query with OR operator connecting original and split tokens - - Example: - >>> parser = QueryParser() - >>> parser.preprocess_query('UserAuth') - 'UserAuth OR User OR Auth' - >>> parser.preprocess_query('get_user_data') - 'get_user_data OR get OR user OR data' - """ - if not self.enable: - return query - - query = query.strip() - if not query: - return query - - # Extract tokens from query (handle multiple words/terms) - # For simple queries, just process the whole thing - # For complex FTS5 queries with operators, preserve structure - if self._is_simple_query(query): - return self._expand_simple_query(query) - else: - # Complex query with FTS5 operators, don't expand - log.debug(f"Skipping expansion for complex FTS5 query: {query}") - return query - - def _is_simple_query(self, query: str) -> bool: - """Check if query is simple (no FTS5 operators). - - Args: - query: Search query - - Returns: - True if query is simple (safe to expand), False otherwise - """ - # Check for FTS5 operators that indicate complex query - fts5_operators = ['OR', 'AND', 'NOT', 'NEAR', '*', '^', '"'] - return not any(op in query for op in fts5_operators) - - def _expand_simple_query(self, query: str) -> str: - """Expand a simple query with identifier splitting. - - Args: - query: Simple search query - - Returns: - Expanded query with OR operators - """ - tokens: Set[str] = set() - - # Always include original query - tokens.add(query) - - # Split on whitespace first - words = query.split() - - for word in words: - # Extract tokens from this word - word_tokens = self._extract_tokens(word) - tokens.update(word_tokens) - - # Filter out short tokens and duplicates - filtered_tokens = [ - t for t in tokens - if len(t) >= self.min_token_length - ] - - # Remove duplicates while preserving original query first - unique_tokens: List[str] = [] - seen: Set[str] = set() - - # Always put original query first - if query not in seen and len(query) >= self.min_token_length: - unique_tokens.append(query) - seen.add(query) - - # Add other tokens - for token in filtered_tokens: - if token not in seen: - unique_tokens.append(token) - seen.add(token) - - # Join with OR operator (only if we have multiple tokens) - if len(unique_tokens) > 1: - expanded = ' OR '.join(unique_tokens) - log.debug(f"Expanded query: '{query}' → '{expanded}'") - return expanded - else: - return query - - def _extract_tokens(self, word: str) -> Set[str]: - """Extract tokens from a single word using various splitting strategies. - - Args: - word: Single word/identifier to split - - Returns: - Set of extracted tokens - """ - tokens: Set[str] = set() - - # Add original word - tokens.add(word) - - # Handle all-caps acronyms (don't split) - if self.ALL_CAPS_PATTERN.match(word): - return tokens - - # CamelCase splitting - camel_tokens = self._split_camel_case(word) - tokens.update(camel_tokens) - - # snake_case splitting - snake_tokens = self._split_snake_case(word) - tokens.update(snake_tokens) - - # kebab-case splitting - kebab_tokens = self._split_kebab_case(word) - tokens.update(kebab_tokens) - - return tokens - - def _split_camel_case(self, word: str) -> List[str]: - """Split CamelCase identifier into tokens. - - Args: - word: CamelCase identifier (e.g., 'getUserData') - - Returns: - List of tokens (e.g., ['get', 'User', 'Data']) - """ - # Insert space before uppercase letters preceded by lowercase - spaced = self.CAMEL_CASE_PATTERN.sub(r'\1 \2', word) - # Split on spaces and filter empty - return [t for t in spaced.split() if t] - - def _split_snake_case(self, word: str) -> List[str]: - """Split snake_case identifier into tokens. - - Args: - word: snake_case identifier (e.g., 'get_user_data') - - Returns: - List of tokens (e.g., ['get', 'user', 'data']) - """ - # Split on underscores - return [t for t in self.SNAKE_CASE_PATTERN.split(word) if t] - - def _split_kebab_case(self, word: str) -> List[str]: - """Split kebab-case identifier into tokens. - - Args: - word: kebab-case identifier (e.g., 'get-user-data') - - Returns: - List of tokens (e.g., ['get', 'user', 'data']) - """ - # Split on hyphens - return [t for t in self.KEBAB_CASE_PATTERN.split(word) if t] - - -# Global default parser instance -_default_parser = QueryParser(enable=True) - - -def preprocess_query(query: str, enable: bool = True) -> str: - """Convenience function for query preprocessing. - - Args: - query: Original search query - enable: Whether to enable preprocessing - - Returns: - Preprocessed query with identifier expansion - """ - if not enable: - return query - - return _default_parser.preprocess_query(query) - - -__all__ = [ - "QueryParser", - "preprocess_query", -] diff --git a/codex-lens/src/codexlens/search/ranking.py b/codex-lens/src/codexlens/search/ranking.py deleted file mode 100644 index 5c6bf346..00000000 --- a/codex-lens/src/codexlens/search/ranking.py +++ /dev/null @@ -1,1701 +0,0 @@ -"""Ranking algorithms for hybrid search result fusion. - -Implements Reciprocal Rank Fusion (RRF) and score normalization utilities -for combining results from heterogeneous search backends (exact FTS, fuzzy FTS, vector search). -""" - -from __future__ import annotations - -import logging -import re -import math -from enum import Enum -from pathlib import Path -from typing import Any, Dict, List, Optional - -from codexlens.entities import SearchResult, AdditionalLocation - -logger = logging.getLogger(__name__) - - -# Default RRF weights for hybrid search -DEFAULT_WEIGHTS = { - "exact": 0.25, - "fuzzy": 0.1, - "vector": 0.5, - "lsp_graph": 0.15, -} - - -class QueryIntent(str, Enum): - """Query intent for adaptive RRF weights (Python/TypeScript parity).""" - - KEYWORD = "keyword" - SEMANTIC = "semantic" - MIXED = "mixed" - - -_TEST_QUERY_RE = re.compile( - r"\b(test|tests|spec|specs|fixture|fixtures|benchmark|benchmarks)\b", - flags=re.IGNORECASE, -) -_AUXILIARY_QUERY_RE = re.compile( - r"\b(example|examples|demo|demos|sample|samples|debug|benchmark|benchmarks|profile|profiling)\b", - flags=re.IGNORECASE, -) -_ARTIFACT_QUERY_RE = re.compile( - r"(? Dict[str, float | None]: - """Normalize weights to sum to 1.0 (best-effort).""" - total = sum(float(v) for v in weights.values() if v is not None) - - # NaN total: do not attempt to normalize (division would propagate NaNs). - if math.isnan(total): - return dict(weights) - - # Infinite total: do not attempt to normalize (division yields 0 or NaN). - if not math.isfinite(total): - return dict(weights) - - # Zero/negative total: do not attempt to normalize (invalid denominator). - if total <= 0: - return dict(weights) - - return {k: (float(v) / total if v is not None else None) for k, v in weights.items()} - - -def detect_query_intent(query: str) -> QueryIntent: - """Detect whether a query is code-like, natural-language, or mixed. - - Heuristic signals kept aligned with `ccw/src/tools/smart-search.ts`. - """ - trimmed = (query or "").strip() - if not trimmed: - return QueryIntent.MIXED - - lower = trimmed.lower() - word_count = len([w for w in re.split(r"\s+", trimmed) if w]) - - has_code_signals = bool( - re.search(r"(::|->|\.)", trimmed) - or re.search(r"[A-Z][a-z]+[A-Z]", trimmed) - or re.search(r"\b[a-z]+[A-Z][A-Za-z0-9_]*\b", trimmed) - or re.search(r"\b\w+_\w+\b", trimmed) - or re.search( - r"\b(def|class|function|const|let|var|import|from|return|async|await|interface|type)\b", - lower, - flags=re.IGNORECASE, - ) - ) - has_natural_signals = bool( - word_count > 5 - or "?" in trimmed - or re.search(r"\b(how|what|why|when|where)\b", trimmed, flags=re.IGNORECASE) - or re.search( - r"\b(handle|explain|fix|implement|create|build|use|find|search|convert|parse|generate|support)\b", - trimmed, - flags=re.IGNORECASE, - ) - ) - - if has_code_signals and has_natural_signals: - return QueryIntent.MIXED - if has_code_signals: - return QueryIntent.KEYWORD - if has_natural_signals: - return QueryIntent.SEMANTIC - return QueryIntent.MIXED - - -def adjust_weights_by_intent( - intent: QueryIntent, - base_weights: Dict[str, float], -) -> Dict[str, float]: - """Adjust RRF weights based on query intent.""" - if intent == QueryIntent.KEYWORD: - target = {"exact": 0.5, "fuzzy": 0.1, "vector": 0.4} - elif intent == QueryIntent.SEMANTIC: - target = {"exact": 0.2, "fuzzy": 0.1, "vector": 0.7} - else: - target = dict(base_weights) - - # Filter to active backends - keys = list(base_weights.keys()) - filtered = {k: float(target.get(k, 0.0)) for k in keys} - return normalize_weights(filtered) - - -def get_rrf_weights( - query: str, - base_weights: Dict[str, float], -) -> Dict[str, float]: - """Compute adaptive RRF weights from query intent.""" - return adjust_weights_by_intent(detect_query_intent(query), base_weights) - - -def query_targets_test_files(query: str) -> bool: - """Return True when the query explicitly targets tests/spec fixtures.""" - return bool(_TEST_QUERY_RE.search((query or "").strip())) - - -def query_targets_generated_files(query: str) -> bool: - """Return True when the query explicitly targets generated/build artifacts.""" - return bool(_ARTIFACT_QUERY_RE.search((query or "").strip())) - - -def query_targets_auxiliary_files(query: str) -> bool: - """Return True when the query explicitly targets examples, benchmarks, or debug files.""" - return bool(_AUXILIARY_QUERY_RE.search((query or "").strip())) - - -def query_prefers_lexical_search(query: str) -> bool: - """Return True when config/env/factory style queries are safer with lexical-first search.""" - trimmed = (query or "").strip() - if not trimmed: - return False - - if _ENV_STYLE_QUERY_RE.search(trimmed): - return True - - query_tokens = set(_semantic_query_topic_tokens(trimmed)) - if not query_tokens: - return False - - if query_tokens.intersection({"factory", "factories"}): - return True - - if query_tokens.intersection({"environment", "env"}) and query_tokens.intersection({"variable", "variables"}): - return True - - if "backend" in query_tokens and query_tokens.intersection( - {"embedding", "embeddings", "reranker", "rerankers", "onnx", "api", "litellm", "fastembed", "local", "legacy"} - ): - return True - - surface_hits = query_tokens.intersection(_LEXICAL_PRIORITY_SURFACE_TOKENS) - focus_hits = query_tokens.intersection(_LEXICAL_PRIORITY_FOCUS_TOKENS) - return bool(surface_hits and focus_hits) - - -def _normalized_path_parts(path: str) -> List[str]: - """Normalize a path string into casefolded components for heuristics.""" - normalized = (path or "").replace("\\", "/") - return [part.casefold() for part in normalized.split("/") if part and part != "."] - - -# File extensions to category mapping for fast lookup -_EXT_TO_CATEGORY: Dict[str, str] = { - # Code extensions - ".py": "code", ".js": "code", ".jsx": "code", ".ts": "code", ".tsx": "code", - ".java": "code", ".go": "code", ".zig": "code", ".m": "code", ".mm": "code", - ".c": "code", ".h": "code", ".cc": "code", ".cpp": "code", ".hpp": "code", ".cxx": "code", - ".rs": "code", - # Doc extensions - ".md": "doc", ".mdx": "doc", ".txt": "doc", ".rst": "doc", -} - - -def get_file_category(path: str) -> Optional[str]: - """Get file category ('code' or 'doc') from path extension. - - Args: - path: File path string - - Returns: - 'code', 'doc', or None if unknown - """ - ext = Path(path).suffix.lower() - return _EXT_TO_CATEGORY.get(ext) - - -def filter_results_by_category( - results: List[SearchResult], - intent: QueryIntent, - allow_mixed: bool = True, -) -> List[SearchResult]: - """Filter results by category based on query intent. - - Strategy: - - KEYWORD (code intent): Only return code files - - SEMANTIC (doc intent): Prefer docs, but allow code if allow_mixed=True - - MIXED: Return all results - - Args: - results: List of SearchResult objects - intent: Query intent from detect_query_intent() - allow_mixed: If True, SEMANTIC intent includes code files with lower priority - - Returns: - Filtered and re-ranked list of SearchResult objects - """ - if not results or intent == QueryIntent.MIXED: - return results - - code_results = [] - doc_results = [] - unknown_results = [] - - for r in results: - category = get_file_category(r.path) - if category == "code": - code_results.append(r) - elif category == "doc": - doc_results.append(r) - else: - unknown_results.append(r) - - if intent == QueryIntent.KEYWORD: - # Code intent: return only code files + unknown (might be code) - filtered = code_results + unknown_results - elif intent == QueryIntent.SEMANTIC: - if allow_mixed: - # Semantic intent with mixed: docs first, then code - filtered = doc_results + code_results + unknown_results - else: - # Semantic intent strict: only docs - filtered = doc_results + unknown_results - else: - filtered = results - - return filtered - - -def is_test_file(path: str) -> bool: - """Return True when a path clearly refers to a test/spec file.""" - parts = _normalized_path_parts(path) - if not parts: - return False - basename = parts[-1] - return ( - basename.startswith("test_") - or basename.endswith("_test.py") - or basename.endswith(".test.ts") - or basename.endswith(".test.tsx") - or basename.endswith(".test.js") - or basename.endswith(".test.jsx") - or basename.endswith(".spec.ts") - or basename.endswith(".spec.tsx") - or basename.endswith(".spec.js") - or basename.endswith(".spec.jsx") - or "tests" in parts[:-1] - or "test" in parts[:-1] - or "__fixtures__" in parts[:-1] - or "fixtures" in parts[:-1] - ) - - -def is_generated_artifact_path(path: str) -> bool: - """Return True when a path clearly points at generated/build artifacts.""" - parts = _normalized_path_parts(path) - if not parts: - return False - basename = parts[-1] - return any(part in _GENERATED_DIR_NAMES for part in parts[:-1]) or basename.endswith( - _GENERATED_FILE_SUFFIXES - ) - - -def is_auxiliary_reference_path(path: str) -> bool: - """Return True for examples, benchmarks, demos, and debug helper files.""" - parts = _normalized_path_parts(path) - if not parts: - return False - basename = parts[-1] - if any(part in _AUXILIARY_DIR_NAMES for part in parts[:-1]): - return True - return ( - basename.startswith("debug_") - or basename.startswith("benchmark") - or basename.startswith("profile_") - or "_benchmark" in basename - or "_profile" in basename - ) - - -def _extract_identifier_query(query: str) -> Optional[str]: - """Return a single-token identifier query when definition boosting is safe.""" - trimmed = (query or "").strip() - if not trimmed or " " in trimmed: - return None - if not _IDENTIFIER_QUERY_RE.fullmatch(trimmed): - return None - return trimmed - - -def extract_explicit_path_hints(query: str) -> List[List[str]]: - """Extract explicit path/file hints from separator-style query tokens. - - Natural-language queries often contain one or two high-signal feature/file - hints such as ``smart_search`` or ``smart-search.ts`` alongside broader - platform words like ``CodexLens``. These hints should be treated as more - specific than the surrounding prose. - """ - hints: List[List[str]] = [] - seen: set[tuple[str, ...]] = set() - for raw_part in re.split(r"\s+", query or ""): - candidate = raw_part.strip().strip("\"'`()[]{}<>:,;") - if not candidate or not _EXPLICIT_PATH_HINT_MARKER_RE.search(candidate): - continue - tokens = [ - token - for token in _split_identifier_like_tokens(candidate) - if token not in _PATH_TOPIC_STOPWORDS - ] - if len(tokens) < 2: - continue - key = tuple(tokens) - if key in seen: - continue - seen.add(key) - hints.append(list(key)) - return hints - - -def _is_source_implementation_path(path: str) -> bool: - """Return True when a path looks like an implementation file under a source dir.""" - parts = _normalized_path_parts(path) - if not parts: - return False - return any(part in _SOURCE_DIR_NAMES for part in parts[:-1]) - - -def _result_text_candidates(result: SearchResult) -> List[str]: - """Collect short text snippets that may contain a symbol definition.""" - candidates: List[str] = [] - for text in (result.excerpt, result.content): - if not isinstance(text, str) or not text.strip(): - continue - for line in text.splitlines(): - stripped = line.strip() - if stripped: - candidates.append(stripped) - if len(candidates) >= 6: - break - if len(candidates) >= 6: - break - - symbol_name = result.symbol_name - if not symbol_name and result.symbol is not None: - symbol_name = getattr(result.symbol, "name", None) - if isinstance(symbol_name, str) and symbol_name.strip(): - candidates.append(symbol_name.strip()) - return candidates - - -def _result_defines_identifier(result: SearchResult, symbol: str) -> bool: - """Best-effort check for whether a result snippet looks like a symbol definition.""" - escaped_symbol = re.escape(symbol) - definition_patterns = ( - rf"^\s*(?:export\s+)?(?:default\s+)?(?:async\s+)?def\s+{escaped_symbol}\b", - rf"^\s*(?:export\s+)?(?:default\s+)?(?:async\s+)?function\s+{escaped_symbol}\b", - rf"^\s*(?:export\s+)?(?:default\s+)?class\s+{escaped_symbol}\b", - rf"^\s*(?:export\s+)?(?:default\s+)?interface\s+{escaped_symbol}\b", - rf"^\s*(?:export\s+)?(?:default\s+)?type\s+{escaped_symbol}\b", - rf"^\s*(?:export\s+)?(?:default\s+)?(?:const|let|var)\s+{escaped_symbol}\b", - rf"^\s*{escaped_symbol}\s*=\s*(?:async\s+)?\(", - rf"^\s*{escaped_symbol}\s*=\s*(?:async\s+)?[^=]*=>", - ) - for candidate in _result_text_candidates(result): - if any(re.search(pattern, candidate) for pattern in definition_patterns): - return True - return False - - -def _split_identifier_like_tokens(text: str) -> List[str]: - """Split identifier-like text into normalized word tokens.""" - if not text: - return [] - - tokens: List[str] = [] - for raw_token in _TOPIC_TOKEN_RE.findall(text): - expanded = re.sub(r"([a-z0-9])([A-Z])", r"\1 \2", raw_token) - expanded = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1 \2", expanded) - for token in expanded.split(): - normalized = _normalize_topic_token(token) - if normalized: - tokens.append(normalized) - return tokens - - -def _normalize_topic_token(token: str) -> Optional[str]: - """Normalize lightweight topic tokens for query/path overlap heuristics.""" - normalized = (token or "").casefold() - if len(normalized) < 2 or normalized.isdigit(): - return None - if len(normalized) > 4 and normalized.endswith("ies"): - normalized = f"{normalized[:-3]}y" - elif len(normalized) > 3 and normalized.endswith("s") and not normalized.endswith("ss"): - normalized = normalized[:-1] - return normalized or None - - -def _dedupe_preserve_order(tokens: List[str]) -> List[str]: - """Deduplicate tokens while preserving the first-seen order.""" - deduped: List[str] = [] - seen: set[str] = set() - for token in tokens: - if token in seen: - continue - seen.add(token) - deduped.append(token) - return deduped - - -def _semantic_query_topic_tokens(query: str) -> List[str]: - """Extract salient natural-language tokens for lightweight topic matching.""" - tokens = [ - token - for token in _split_identifier_like_tokens(query) - if token not in _SEMANTIC_QUERY_STOPWORDS - ] - return _dedupe_preserve_order(tokens) - - -def _path_topic_tokens(path: str) -> tuple[List[str], List[str]]: - """Extract normalized topic tokens from a path and its basename.""" - parts = _normalized_path_parts(path) - if not parts: - return [], [] - - path_tokens: List[str] = [] - basename_tokens: List[str] = [] - last_index = len(parts) - 1 - for index, part in enumerate(parts): - target = basename_tokens if index == last_index else path_tokens - for token in _split_identifier_like_tokens(part): - if token in _PATH_TOPIC_STOPWORDS: - continue - target.append(token) - return _dedupe_preserve_order(path_tokens), _dedupe_preserve_order(basename_tokens) - - -def _source_path_topic_boost( - query: str, - path: str, - query_intent: QueryIntent, -) -> tuple[float, List[str]]: - """Return a path/topic boost when a query strongly overlaps a source path.""" - query_tokens = _semantic_query_topic_tokens(query) - if len(query_tokens) < 2: - return 1.0, [] - - path_tokens, basename_tokens = _path_topic_tokens(path) - if not path_tokens and not basename_tokens: - return 1.0, [] - - path_token_set = set(path_tokens) | set(basename_tokens) - basename_overlap = [token for token in query_tokens if token in basename_tokens] - all_overlap = [token for token in query_tokens if token in path_token_set] - explicit_hint_tokens = extract_explicit_path_hints(query) - - for hint_tokens in explicit_hint_tokens: - if basename_tokens == hint_tokens: - if query_intent == QueryIntent.KEYWORD: - return 4.5, hint_tokens[:3] - return 2.4, hint_tokens[:3] - if all(token in basename_tokens for token in hint_tokens): - if query_intent == QueryIntent.KEYWORD: - return 4.5, hint_tokens[:3] - return 1.6, hint_tokens[:3] - - if query_prefers_lexical_search(query): - lexical_surface_overlap = [ - token for token in basename_tokens if token in query_tokens and token in _LEXICAL_PRIORITY_SURFACE_TOKENS - ] - if lexical_surface_overlap: - lexical_overlap = lexical_surface_overlap[:3] - if query_intent == QueryIntent.KEYWORD: - return 5.5, lexical_overlap - return 5.0, lexical_overlap - - if query_intent == QueryIntent.KEYWORD: - if len(basename_overlap) >= 2: - # Multi-token identifier-style queries often name the feature/file directly. - # Give basename matches a stronger lift so they can survive workspace fan-out. - multiplier = min(4.5, 2.0 + 1.25 * float(len(basename_overlap))) - return multiplier, basename_overlap[:3] - if len(all_overlap) >= 3: - multiplier = min(2.0, 1.1 + 0.2 * len(all_overlap)) - return multiplier, all_overlap[:3] - return 1.0, [] - - if len(basename_overlap) >= 2: - multiplier = min(1.45, 1.15 + 0.1 * len(basename_overlap)) - return multiplier, basename_overlap[:3] - if len(all_overlap) >= 3: - multiplier = min(1.3, 1.05 + 0.05 * len(all_overlap)) - return multiplier, all_overlap[:3] - return 1.0, [] - - -def apply_path_penalties( - results: List[SearchResult], - query: str, - *, - test_file_penalty: float = 0.15, - generated_file_penalty: float = 0.35, -) -> List[SearchResult]: - """Apply lightweight path-based penalties to reduce noisy rankings.""" - if not results or (test_file_penalty <= 0 and generated_file_penalty <= 0): - return results - - query_intent = detect_query_intent(query) - skip_test_penalty = query_targets_test_files(query) - skip_auxiliary_penalty = query_targets_auxiliary_files(query) - skip_generated_penalty = query_targets_generated_files(query) - query_topic_tokens = _semantic_query_topic_tokens(query) - keyword_path_query = query_intent == QueryIntent.KEYWORD and len(query_topic_tokens) >= 2 - explicit_feature_query = bool(extract_explicit_path_hints(query)) - source_oriented_query = ( - explicit_feature_query - or keyword_path_query - or ( - query_intent in {QueryIntent.SEMANTIC, QueryIntent.MIXED} - and len(query_topic_tokens) >= 2 - ) - ) - identifier_query = None - if query_intent == QueryIntent.KEYWORD: - identifier_query = _extract_identifier_query(query) - effective_test_penalty = float(test_file_penalty) - if effective_test_penalty > 0 and not skip_test_penalty: - if query_intent == QueryIntent.KEYWORD: - # Identifier-style queries should prefer implementation files over test references. - effective_test_penalty = max(effective_test_penalty, 0.35) - elif query_intent in {QueryIntent.SEMANTIC, QueryIntent.MIXED}: - # Natural-language code queries should still prefer implementation files over references. - effective_test_penalty = max(effective_test_penalty, 0.25) - if explicit_feature_query: - # Explicit feature/file hints should be even more biased toward source implementations. - effective_test_penalty = max(effective_test_penalty, 0.45) - effective_auxiliary_penalty = effective_test_penalty - if effective_auxiliary_penalty > 0 and not skip_auxiliary_penalty and explicit_feature_query: - # Examples/benchmarks are usually descriptive noise for feature-targeted implementation queries. - effective_auxiliary_penalty = max(effective_auxiliary_penalty, 0.5) - effective_generated_penalty = float(generated_file_penalty) - if effective_generated_penalty > 0 and not skip_generated_penalty: - if source_oriented_query: - effective_generated_penalty = max(effective_generated_penalty, 0.45) - if explicit_feature_query: - effective_generated_penalty = max(effective_generated_penalty, 0.6) - - penalized: List[SearchResult] = [] - for result in results: - multiplier = 1.0 - penalty_multiplier = 1.0 - boost_multiplier = 1.0 - penalty_reasons: List[str] = [] - boost_reasons: List[str] = [] - - if effective_test_penalty > 0 and not skip_test_penalty and is_test_file(result.path): - penalty_multiplier *= max(0.0, 1.0 - effective_test_penalty) - penalty_reasons.append("test_file") - - if ( - effective_auxiliary_penalty > 0 - and not skip_auxiliary_penalty - and not is_test_file(result.path) - and is_auxiliary_reference_path(result.path) - ): - penalty_multiplier *= max(0.0, 1.0 - effective_auxiliary_penalty) - penalty_reasons.append("auxiliary_file") - - if ( - effective_generated_penalty > 0 - and not skip_generated_penalty - and is_generated_artifact_path(result.path) - ): - penalty_multiplier *= max(0.0, 1.0 - effective_generated_penalty) - penalty_reasons.append("generated_artifact") - - if ( - identifier_query - and not is_test_file(result.path) - and not is_generated_artifact_path(result.path) - and _result_defines_identifier(result, identifier_query) - ): - if _is_source_implementation_path(result.path): - boost_multiplier *= 2.0 - boost_reasons.append("source_definition") - else: - boost_multiplier *= 1.35 - boost_reasons.append("symbol_definition") - - if ( - (query_intent in {QueryIntent.SEMANTIC, QueryIntent.MIXED} or keyword_path_query) - and not skip_test_penalty - and not skip_auxiliary_penalty - and not skip_generated_penalty - and not is_test_file(result.path) - and not is_generated_artifact_path(result.path) - and not is_auxiliary_reference_path(result.path) - and _is_source_implementation_path(result.path) - ): - semantic_path_boost, overlap_tokens = _source_path_topic_boost( - query, - result.path, - query_intent, - ) - if semantic_path_boost > 1.0: - boost_multiplier *= semantic_path_boost - boost_reasons.append("source_path_topic_overlap") - - multiplier = penalty_multiplier * boost_multiplier - if penalty_reasons or boost_reasons: - metadata = { - **result.metadata, - "path_rank_multiplier": multiplier, - } - if penalty_reasons: - metadata["path_penalty_reasons"] = penalty_reasons - metadata["path_penalty_multiplier"] = penalty_multiplier - if boost_reasons: - metadata["path_boost_reasons"] = boost_reasons - metadata["path_boost_multiplier"] = boost_multiplier - if "source_path_topic_overlap" in boost_reasons and overlap_tokens: - metadata["path_boost_overlap_tokens"] = overlap_tokens - penalized.append( - result.model_copy( - deep=True, - update={ - "score": max(0.0, float(result.score) * multiplier), - "metadata": metadata, - }, - ) - ) - else: - penalized.append(result) - - penalized.sort(key=lambda r: r.score, reverse=True) - return penalized - - -def rebalance_noisy_results( - results: List[SearchResult], - query: str, -) -> List[SearchResult]: - """Move noisy test/generated/auxiliary results behind implementation hits when safe.""" - if not results: - return [] - - query_intent = detect_query_intent(query) - skip_test_penalty = query_targets_test_files(query) - skip_auxiliary_penalty = query_targets_auxiliary_files(query) - skip_generated_penalty = query_targets_generated_files(query) - query_topic_tokens = _semantic_query_topic_tokens(query) - keyword_path_query = query_intent == QueryIntent.KEYWORD and len(query_topic_tokens) >= 2 - explicit_feature_query = bool(extract_explicit_path_hints(query)) - source_oriented_query = ( - explicit_feature_query - or keyword_path_query - or ( - query_intent in {QueryIntent.SEMANTIC, QueryIntent.MIXED} - and len(query_topic_tokens) >= 2 - ) - ) - if not source_oriented_query: - return results - - max_generated_results = len(results) if skip_generated_penalty else 0 - max_test_results = len(results) if skip_test_penalty else (0 if explicit_feature_query else 1) - max_auxiliary_results = len(results) if skip_auxiliary_penalty else (0 if explicit_feature_query else 1) - - selected: List[SearchResult] = [] - deferred: List[SearchResult] = [] - generated_count = 0 - test_count = 0 - auxiliary_count = 0 - - for result in results: - if not skip_generated_penalty and is_generated_artifact_path(result.path): - if generated_count >= max_generated_results: - deferred.append(result) - continue - generated_count += 1 - selected.append(result) - continue - - if not skip_test_penalty and is_test_file(result.path): - if test_count >= max_test_results: - deferred.append(result) - continue - test_count += 1 - selected.append(result) - continue - - if not skip_auxiliary_penalty and is_auxiliary_reference_path(result.path): - if auxiliary_count >= max_auxiliary_results: - deferred.append(result) - continue - auxiliary_count += 1 - selected.append(result) - continue - - selected.append(result) - - return selected + deferred - - -def simple_weighted_fusion( - results_map: Dict[str, List[SearchResult]], - weights: Dict[str, float] = None, -) -> List[SearchResult]: - """Combine search results using simple weighted sum of normalized scores. - - This is an alternative to RRF that preserves score magnitude information. - Scores are min-max normalized per source before weighted combination. - - Formula: score(d) = Σ weight_source * normalized_score_source(d) - - Args: - results_map: Dictionary mapping source name to list of SearchResult objects - Sources: 'exact', 'fuzzy', 'vector' - weights: Dictionary mapping source name to weight (default: equal weights) - Example: {'exact': 0.3, 'fuzzy': 0.1, 'vector': 0.6} - - Returns: - List of SearchResult objects sorted by fused score (descending) - - Examples: - >>> fts_results = [SearchResult(path="a.py", score=10.0, excerpt="...")] - >>> vector_results = [SearchResult(path="b.py", score=0.85, excerpt="...")] - >>> results_map = {'exact': fts_results, 'vector': vector_results} - >>> fused = simple_weighted_fusion(results_map) - """ - if not results_map: - return [] - - # Default equal weights if not provided - if weights is None: - num_sources = len(results_map) - weights = {source: 1.0 / num_sources for source in results_map} - - # Normalize weights to sum to 1.0 - weight_sum = sum(weights.values()) - if not math.isclose(weight_sum, 1.0, abs_tol=0.01) and weight_sum > 0: - weights = {source: w / weight_sum for source, w in weights.items()} - - # Compute min-max normalization parameters per source - source_stats: Dict[str, tuple] = {} - for source_name, results in results_map.items(): - if not results: - continue - scores = [r.score for r in results] - min_s, max_s = min(scores), max(scores) - source_stats[source_name] = (min_s, max_s) - - def normalize_score(score: float, source: str) -> float: - """Normalize score to [0, 1] range using min-max scaling.""" - if source not in source_stats: - return 0.0 - min_s, max_s = source_stats[source] - if max_s == min_s: - return 1.0 if score >= min_s else 0.0 - return (score - min_s) / (max_s - min_s) - - # Build unified result set with weighted scores - path_to_result: Dict[str, SearchResult] = {} - path_to_fusion_score: Dict[str, float] = {} - path_to_source_scores: Dict[str, Dict[str, float]] = {} - - for source_name, results in results_map.items(): - weight = weights.get(source_name, 0.0) - if weight == 0: - continue - - for result in results: - path = result.path - normalized = normalize_score(result.score, source_name) - contribution = weight * normalized - - if path not in path_to_fusion_score: - path_to_fusion_score[path] = 0.0 - path_to_result[path] = result - path_to_source_scores[path] = {} - - path_to_fusion_score[path] += contribution - path_to_source_scores[path][source_name] = normalized - - # Create final results with fusion scores - fused_results = [] - for path, base_result in path_to_result.items(): - fusion_score = path_to_fusion_score[path] - - fused_result = SearchResult( - path=base_result.path, - score=fusion_score, - excerpt=base_result.excerpt, - content=base_result.content, - symbol=base_result.symbol, - chunk=base_result.chunk, - metadata={ - **base_result.metadata, - "fusion_method": "simple_weighted", - "fusion_score": fusion_score, - "original_score": base_result.score, - "source_scores": path_to_source_scores[path], - }, - start_line=base_result.start_line, - end_line=base_result.end_line, - symbol_name=base_result.symbol_name, - symbol_kind=base_result.symbol_kind, - ) - fused_results.append(fused_result) - - fused_results.sort(key=lambda r: r.score, reverse=True) - return fused_results - - -def reciprocal_rank_fusion( - results_map: Dict[str, List[SearchResult]], - weights: Dict[str, float] = None, - k: int = 60, -) -> List[SearchResult]: - """Combine search results from multiple sources using Reciprocal Rank Fusion. - - RRF formula: score(d) = Σ weight_source / (k + rank_source(d)) - - Args: - results_map: Dictionary mapping source name to list of SearchResult objects - Sources: 'exact', 'fuzzy', 'vector' - weights: Dictionary mapping source name to weight (default: equal weights) - Example: {'exact': 0.3, 'fuzzy': 0.1, 'vector': 0.6} - k: Constant to avoid division by zero and control rank influence (default 60) - - Returns: - List of SearchResult objects sorted by fused score (descending) - - Examples: - >>> exact_results = [SearchResult(path="a.py", score=10.0, excerpt="...")] - >>> fuzzy_results = [SearchResult(path="b.py", score=8.0, excerpt="...")] - >>> results_map = {'exact': exact_results, 'fuzzy': fuzzy_results} - >>> fused = reciprocal_rank_fusion(results_map) - """ - if not results_map: - return [] - - # Default equal weights if not provided - if weights is None: - num_sources = len(results_map) - weights = {source: 1.0 / num_sources for source in results_map} - - # Validate weights sum to 1.0 - weight_sum = sum(weights.values()) - if not math.isclose(weight_sum, 1.0, abs_tol=0.01): - # Normalize weights to sum to 1.0 - weights = {source: w / weight_sum for source, w in weights.items()} - - # Build unified result set with RRF scores - path_to_result: Dict[str, SearchResult] = {} - path_to_fusion_score: Dict[str, float] = {} - path_to_source_ranks: Dict[str, Dict[str, int]] = {} - - for source_name, results in results_map.items(): - weight = weights.get(source_name, 0.0) - if weight == 0: - continue - - for rank, result in enumerate(results, start=1): - path = result.path - rrf_contribution = weight / (k + rank) - - # Initialize or accumulate fusion score - if path not in path_to_fusion_score: - path_to_fusion_score[path] = 0.0 - path_to_result[path] = result - path_to_source_ranks[path] = {} - - path_to_fusion_score[path] += rrf_contribution - path_to_source_ranks[path][source_name] = rank - - # Create final results with fusion scores - fused_results = [] - for path, base_result in path_to_result.items(): - fusion_score = path_to_fusion_score[path] - - # Create new SearchResult with fusion_score in metadata - fused_result = SearchResult( - path=base_result.path, - score=fusion_score, - excerpt=base_result.excerpt, - content=base_result.content, - symbol=base_result.symbol, - chunk=base_result.chunk, - metadata={ - **base_result.metadata, - "fusion_method": "rrf", - "fusion_score": fusion_score, - "original_score": base_result.score, - "rrf_k": k, - "source_ranks": path_to_source_ranks[path], - }, - start_line=base_result.start_line, - end_line=base_result.end_line, - symbol_name=base_result.symbol_name, - symbol_kind=base_result.symbol_kind, - ) - fused_results.append(fused_result) - - # Sort by fusion score descending - fused_results.sort(key=lambda r: r.score, reverse=True) - - return fused_results - - -def apply_symbol_boost( - results: List[SearchResult], - boost_factor: float = 1.5, -) -> List[SearchResult]: - """Boost fused scores for results that include an explicit symbol match. - - The boost is multiplicative on the current result.score (typically the RRF fusion score). - When boosted, the original score is preserved in metadata["original_fusion_score"] and - metadata["boosted"] is set to True. - """ - if not results: - return [] - - if boost_factor <= 1.0: - # Still return new objects to follow immutable transformation pattern. - return [ - SearchResult( - path=r.path, - score=r.score, - excerpt=r.excerpt, - content=r.content, - symbol=r.symbol, - chunk=r.chunk, - metadata={**r.metadata}, - start_line=r.start_line, - end_line=r.end_line, - symbol_name=r.symbol_name, - symbol_kind=r.symbol_kind, - additional_locations=list(r.additional_locations), - ) - for r in results - ] - - boosted_results: List[SearchResult] = [] - for result in results: - has_symbol = bool(result.symbol_name) - original_score = float(result.score) - boosted_score = original_score * boost_factor if has_symbol else original_score - - metadata = {**result.metadata} - if has_symbol: - metadata.setdefault("original_fusion_score", metadata.get("fusion_score", original_score)) - metadata["boosted"] = True - metadata["symbol_boost_factor"] = boost_factor - - boosted_results.append( - SearchResult( - path=result.path, - score=boosted_score, - excerpt=result.excerpt, - content=result.content, - symbol=result.symbol, - chunk=result.chunk, - metadata=metadata, - start_line=result.start_line, - end_line=result.end_line, - symbol_name=result.symbol_name, - symbol_kind=result.symbol_kind, - additional_locations=list(result.additional_locations), - ) - ) - - boosted_results.sort(key=lambda r: r.score, reverse=True) - return boosted_results - - -def rerank_results( - query: str, - results: List[SearchResult], - embedder: Any, - top_k: int = 50, -) -> List[SearchResult]: - """Re-rank results with embedding cosine similarity, combined with current score. - - Combined score formula: - 0.5 * rrf_score + 0.5 * cosine_similarity - - If embedder is None or embedding fails, returns results as-is. - """ - if not results: - return [] - - if embedder is None or top_k <= 0: - return results - - rerank_count = min(int(top_k), len(results)) - - def cosine_similarity(vec_a: List[float], vec_b: List[float]) -> float: - # Defensive: handle mismatched lengths and zero vectors. - n = min(len(vec_a), len(vec_b)) - if n == 0: - return 0.0 - dot = 0.0 - norm_a = 0.0 - norm_b = 0.0 - for i in range(n): - a = float(vec_a[i]) - b = float(vec_b[i]) - dot += a * b - norm_a += a * a - norm_b += b * b - if norm_a <= 0.0 or norm_b <= 0.0: - return 0.0 - sim = dot / (math.sqrt(norm_a) * math.sqrt(norm_b)) - # SearchResult.score requires non-negative scores; clamp cosine similarity to [0, 1]. - return max(0.0, min(1.0, sim)) - - def text_for_embedding(r: SearchResult) -> str: - if r.excerpt and r.excerpt.strip(): - return r.excerpt - if r.content and r.content.strip(): - return r.content - if r.chunk and r.chunk.content and r.chunk.content.strip(): - return r.chunk.content - # Fallback: stable, non-empty text. - return r.symbol_name or r.path - - try: - if hasattr(embedder, "embed_single"): - query_vec = embedder.embed_single(query) - else: - query_vec = embedder.embed(query)[0] - - doc_texts = [text_for_embedding(r) for r in results[:rerank_count]] - doc_vecs = embedder.embed(doc_texts) - except Exception: - return results - - reranked_results: List[SearchResult] = [] - - for idx, result in enumerate(results): - if idx < rerank_count: - rrf_score = float(result.score) - sim = cosine_similarity(query_vec, doc_vecs[idx]) - combined_score = 0.5 * rrf_score + 0.5 * sim - - reranked_results.append( - SearchResult( - path=result.path, - score=combined_score, - excerpt=result.excerpt, - content=result.content, - symbol=result.symbol, - chunk=result.chunk, - metadata={ - **result.metadata, - "rrf_score": rrf_score, - "cosine_similarity": sim, - "reranked": True, - }, - start_line=result.start_line, - end_line=result.end_line, - symbol_name=result.symbol_name, - symbol_kind=result.symbol_kind, - additional_locations=list(result.additional_locations), - ) - ) - else: - # Preserve remaining results without re-ranking, but keep immutability. - reranked_results.append( - SearchResult( - path=result.path, - score=result.score, - excerpt=result.excerpt, - content=result.content, - symbol=result.symbol, - chunk=result.chunk, - metadata={**result.metadata}, - start_line=result.start_line, - end_line=result.end_line, - symbol_name=result.symbol_name, - symbol_kind=result.symbol_kind, - additional_locations=list(result.additional_locations), - ) - ) - - reranked_results.sort(key=lambda r: r.score, reverse=True) - return reranked_results - - -def cross_encoder_rerank( - query: str, - results: List[SearchResult], - reranker: Any, - top_k: int = 50, - batch_size: int = 32, - chunk_type_weights: Optional[Dict[str, float]] = None, - test_file_penalty: float = 0.0, -) -> List[SearchResult]: - """Second-stage reranking using a cross-encoder model. - - This function is dependency-agnostic: callers can pass any object that exposes - a compatible `score_pairs(pairs, batch_size=...)` method. - - Args: - query: Search query string - results: List of search results to rerank - reranker: Cross-encoder model with score_pairs or predict method - top_k: Number of top results to rerank - batch_size: Batch size for reranking - chunk_type_weights: Optional weights for different chunk types. - Example: {"code": 1.0, "docstring": 0.7} - reduce docstring influence - test_file_penalty: Penalty applied to test files (0.0-1.0). - Example: 0.2 means test files get 20% score reduction - """ - if not results: - return [] - - if reranker is None or top_k <= 0: - return results - - rerank_count = min(int(top_k), len(results)) - - def text_for_pair(r: SearchResult) -> str: - if r.excerpt and r.excerpt.strip(): - return r.excerpt - if r.content and r.content.strip(): - return r.content - if r.chunk and r.chunk.content and r.chunk.content.strip(): - return r.chunk.content - return r.symbol_name or r.path - - pairs = [(query, text_for_pair(r)) for r in results[:rerank_count]] - - try: - if hasattr(reranker, "score_pairs"): - raw_scores = reranker.score_pairs(pairs, batch_size=int(batch_size)) - elif hasattr(reranker, "predict"): - raw_scores = reranker.predict(pairs, batch_size=int(batch_size)) - else: - return results - except Exception as exc: - logger.debug("Cross-encoder rerank failed; returning original ranking: %s", exc) - return results - - if not raw_scores or len(raw_scores) != rerank_count: - logger.debug( - "Cross-encoder rerank returned %d scores for %d candidates; returning original ranking", - len(raw_scores) if raw_scores else 0, - rerank_count, - ) - return results - - scores = [float(s) for s in raw_scores] - min_s = min(scores) - max_s = max(scores) - - def sigmoid(x: float) -> float: - # Clamp to keep exp() stable. - x = max(-50.0, min(50.0, x)) - return 1.0 / (1.0 + math.exp(-x)) - - if 0.0 <= min_s and max_s <= 1.0: - probs = scores - else: - probs = [sigmoid(s) for s in scores] - - query_intent = detect_query_intent(query) - skip_test_penalty = query_targets_test_files(query) - skip_auxiliary_penalty = query_targets_auxiliary_files(query) - skip_generated_penalty = query_targets_generated_files(query) - keyword_path_query = query_intent == QueryIntent.KEYWORD and len(_semantic_query_topic_tokens(query)) >= 2 - reranked_results: List[SearchResult] = [] - - for idx, result in enumerate(results): - if idx < rerank_count: - prev_score = float(result.score) - ce_score = scores[idx] - ce_prob = probs[idx] - - # Base combined score - combined_score = 0.5 * prev_score + 0.5 * ce_prob - - # Apply chunk_type weight adjustment - if chunk_type_weights: - chunk_type = None - if result.chunk and hasattr(result.chunk, "metadata"): - chunk_type = result.chunk.metadata.get("chunk_type") - elif result.metadata: - chunk_type = result.metadata.get("chunk_type") - - if chunk_type and chunk_type in chunk_type_weights: - weight = chunk_type_weights[chunk_type] - # Apply weight to CE contribution only - combined_score = 0.5 * prev_score + 0.5 * ce_prob * weight - - # Apply test file penalty - if test_file_penalty > 0 and is_test_file(result.path): - combined_score = combined_score * (1.0 - test_file_penalty) - - cross_encoder_floor_reason = None - cross_encoder_floor_score = None - cross_encoder_floor_overlap_tokens: List[str] = [] - if ( - (query_intent in {QueryIntent.SEMANTIC, QueryIntent.MIXED} or keyword_path_query) - and not skip_test_penalty - and not skip_auxiliary_penalty - and not skip_generated_penalty - and not is_test_file(result.path) - and not is_generated_artifact_path(result.path) - and not is_auxiliary_reference_path(result.path) - and _is_source_implementation_path(result.path) - ): - semantic_path_boost, overlap_tokens = _source_path_topic_boost( - query, - result.path, - query_intent, - ) - if semantic_path_boost > 1.0: - floor_ratio = 0.8 if semantic_path_boost >= 1.35 else 0.75 - candidate_floor = prev_score * floor_ratio - if candidate_floor > combined_score: - combined_score = candidate_floor - cross_encoder_floor_reason = ( - "keyword_source_path_overlap" - if query_intent == QueryIntent.KEYWORD - else "semantic_source_path_overlap" - ) - cross_encoder_floor_score = candidate_floor - cross_encoder_floor_overlap_tokens = overlap_tokens - - metadata = { - **result.metadata, - "pre_cross_encoder_score": prev_score, - "cross_encoder_score": ce_score, - "cross_encoder_prob": ce_prob, - "cross_encoder_reranked": True, - } - if cross_encoder_floor_reason is not None: - metadata["cross_encoder_floor_reason"] = cross_encoder_floor_reason - metadata["cross_encoder_floor_score"] = cross_encoder_floor_score - if cross_encoder_floor_overlap_tokens: - metadata["cross_encoder_floor_overlap_tokens"] = ( - cross_encoder_floor_overlap_tokens - ) - - reranked_results.append( - SearchResult( - path=result.path, - score=combined_score, - excerpt=result.excerpt, - content=result.content, - symbol=result.symbol, - chunk=result.chunk, - metadata=metadata, - start_line=result.start_line, - end_line=result.end_line, - symbol_name=result.symbol_name, - symbol_kind=result.symbol_kind, - additional_locations=list(result.additional_locations), - ) - ) - else: - reranked_results.append( - SearchResult( - path=result.path, - score=result.score, - excerpt=result.excerpt, - content=result.content, - symbol=result.symbol, - chunk=result.chunk, - metadata={**result.metadata}, - start_line=result.start_line, - end_line=result.end_line, - symbol_name=result.symbol_name, - symbol_kind=result.symbol_kind, - additional_locations=list(result.additional_locations), - ) - ) - - reranked_results.sort(key=lambda r: r.score, reverse=True) - return reranked_results - - -def normalize_bm25_score(score: float) -> float: - """Normalize BM25 scores from SQLite FTS5 to 0-1 range. - - SQLite FTS5 returns negative BM25 scores (more negative = better match). - Uses sigmoid transformation for normalization. - - Args: - score: Raw BM25 score from SQLite (typically negative) - - Returns: - Normalized score in range [0, 1] - - Examples: - >>> normalize_bm25_score(-10.5) # Good match - 0.85 - >>> normalize_bm25_score(-1.2) # Weak match - 0.62 - """ - # Take absolute value (BM25 is negative in SQLite) - abs_score = abs(score) - - # Sigmoid transformation: 1 / (1 + e^(-x)) - # Scale factor of 0.1 maps typical BM25 range (-20 to 0) to (0, 1) - normalized = 1.0 / (1.0 + math.exp(-abs_score * 0.1)) - - return normalized - - -def tag_search_source(results: List[SearchResult], source: str) -> List[SearchResult]: - """Tag search results with their source for RRF tracking. - - Args: - results: List of SearchResult objects - source: Source identifier ('exact', 'fuzzy', 'vector') - - Returns: - List of SearchResult objects with 'search_source' in metadata - """ - tagged_results = [] - for result in results: - tagged_result = SearchResult( - path=result.path, - score=result.score, - excerpt=result.excerpt, - content=result.content, - symbol=result.symbol, - chunk=result.chunk, - metadata={**result.metadata, "search_source": source}, - start_line=result.start_line, - end_line=result.end_line, - symbol_name=result.symbol_name, - symbol_kind=result.symbol_kind, - ) - tagged_results.append(tagged_result) - - return tagged_results - - -def group_similar_results( - results: List[SearchResult], - score_threshold_abs: float = 0.01, - content_field: str = "excerpt" -) -> List[SearchResult]: - """Group search results by content and score similarity. - - Groups results that have similar content and similar scores into a single - representative result, with other locations stored in additional_locations. - - Algorithm: - 1. Group results by content (using excerpt or content field) - 2. Within each content group, create subgroups based on score similarity - 3. Select highest-scoring result as representative for each subgroup - 4. Store other results in subgroup as additional_locations - - Args: - results: A list of SearchResult objects (typically sorted by score) - score_threshold_abs: Absolute score difference to consider results similar. - Results with |score_a - score_b| <= threshold are grouped. - Default 0.01 is suitable for RRF fusion scores. - content_field: The field to use for content grouping ('excerpt' or 'content') - - Returns: - A new list of SearchResult objects where similar items are grouped. - The list is sorted by score descending. - - Examples: - >>> results = [SearchResult(path="a.py", score=0.5, excerpt="def foo()"), - ... SearchResult(path="b.py", score=0.5, excerpt="def foo()")] - >>> grouped = group_similar_results(results) - >>> len(grouped) # Two results merged into one - 1 - >>> len(grouped[0].additional_locations) # One additional location - 1 - """ - if not results: - return [] - - # Group results by content - content_map: Dict[str, List[SearchResult]] = {} - unidentifiable_results: List[SearchResult] = [] - - for r in results: - key = getattr(r, content_field, None) - if key and key.strip(): - content_map.setdefault(key, []).append(r) - else: - # Results without content can't be grouped by content - unidentifiable_results.append(r) - - final_results: List[SearchResult] = [] - - # Process each content group - for content_group in content_map.values(): - # Sort by score descending within group - content_group.sort(key=lambda r: r.score, reverse=True) - - while content_group: - # Take highest scoring as representative - representative = content_group.pop(0) - others_in_group = [] - remaining_for_next_pass = [] - - # Find results with similar scores - for item in content_group: - if abs(representative.score - item.score) <= score_threshold_abs: - others_in_group.append(item) - else: - remaining_for_next_pass.append(item) - - # Create grouped result with additional locations - if others_in_group: - # Build new result with additional_locations populated - grouped_result = SearchResult( - path=representative.path, - score=representative.score, - excerpt=representative.excerpt, - content=representative.content, - symbol=representative.symbol, - chunk=representative.chunk, - metadata={ - **representative.metadata, - "grouped_count": len(others_in_group) + 1, - }, - start_line=representative.start_line, - end_line=representative.end_line, - symbol_name=representative.symbol_name, - symbol_kind=representative.symbol_kind, - additional_locations=[ - AdditionalLocation( - path=other.path, - score=other.score, - start_line=other.start_line, - end_line=other.end_line, - symbol_name=other.symbol_name, - ) for other in others_in_group - ], - ) - final_results.append(grouped_result) - else: - final_results.append(representative) - - content_group = remaining_for_next_pass - - # Add ungroupable results - final_results.extend(unidentifiable_results) - - # Sort final results by score descending - final_results.sort(key=lambda r: r.score, reverse=True) - - return final_results diff --git a/codex-lens/src/codexlens/semantic/__init__.py b/codex-lens/src/codexlens/semantic/__init__.py deleted file mode 100644 index b9bd040f..00000000 --- a/codex-lens/src/codexlens/semantic/__init__.py +++ /dev/null @@ -1,118 +0,0 @@ -"""Optional semantic search module for CodexLens. - -Install with: pip install codexlens[semantic] -Uses fastembed (ONNX-based, lightweight ~200MB) - -GPU Acceleration: -- Automatic GPU detection and usage when available -- Supports CUDA (NVIDIA), TensorRT, DirectML (Windows), ROCm (AMD), CoreML (Apple) -- Install GPU support: pip install onnxruntime-gpu (NVIDIA) or onnxruntime-directml (Windows) -""" - -from __future__ import annotations - -SEMANTIC_AVAILABLE = False -SEMANTIC_BACKEND: str | None = None -GPU_AVAILABLE = False -LITELLM_AVAILABLE = False -_import_error: str | None = None - - -def _detect_backend() -> tuple[bool, str | None, bool, str | None]: - """Detect if fastembed and GPU are available.""" - try: - import numpy as np - except ImportError as e: - return False, None, False, f"numpy not available: {e}" - - try: - from fastembed import TextEmbedding - except ImportError: - return False, None, False, "fastembed not available. Install with: pip install codexlens[semantic]" - - # Check GPU availability - gpu_available = False - try: - from .gpu_support import is_gpu_available - gpu_available = is_gpu_available() - except ImportError: - pass - - return True, "fastembed", gpu_available, None - - -# Initialize on module load -SEMANTIC_AVAILABLE, SEMANTIC_BACKEND, GPU_AVAILABLE, _import_error = _detect_backend() - - -def check_semantic_available() -> tuple[bool, str | None]: - """Check if semantic search dependencies are available.""" - return SEMANTIC_AVAILABLE, _import_error - - -def check_gpu_available() -> tuple[bool, str]: - """Check if GPU acceleration is available. - - Returns: - Tuple of (is_available, status_message) - """ - if not SEMANTIC_AVAILABLE: - return False, "Semantic search not available" - - try: - from .gpu_support import is_gpu_available, get_gpu_summary - if is_gpu_available(): - return True, get_gpu_summary() - return False, "No GPU detected (using CPU)" - except ImportError: - return False, "GPU support module not available" - - -# Export embedder components -# BaseEmbedder is always available (abstract base class) -from .base import BaseEmbedder - -# Factory function for creating embedders -from .factory import get_embedder as get_embedder_factory - -# Optional: LiteLLMEmbedderWrapper (only if ccw-litellm is installed) -try: - import ccw_litellm # noqa: F401 - from .litellm_embedder import LiteLLMEmbedderWrapper - LITELLM_AVAILABLE = True -except ImportError: - LiteLLMEmbedderWrapper = None - LITELLM_AVAILABLE = False - - -def is_embedding_backend_available(backend: str) -> tuple[bool, str | None]: - """Check whether a specific embedding backend can be used. - - Notes: - - "fastembed" requires the optional semantic deps (pip install codexlens[semantic]). - - "litellm" requires ccw-litellm to be installed in the same environment. - """ - backend = (backend or "").strip().lower() - if backend == "fastembed": - if SEMANTIC_AVAILABLE: - return True, None - return False, _import_error or "fastembed not available. Install with: pip install codexlens[semantic]" - if backend == "litellm": - if LITELLM_AVAILABLE: - return True, None - return False, "ccw-litellm not available. Install with: pip install ccw-litellm" - return False, f"Invalid embedding backend: {backend}. Must be 'fastembed' or 'litellm'." - - -__all__ = [ - "SEMANTIC_AVAILABLE", - "SEMANTIC_BACKEND", - "GPU_AVAILABLE", - "LITELLM_AVAILABLE", - "check_semantic_available", - "is_embedding_backend_available", - "check_gpu_available", - "BaseEmbedder", - "get_embedder_factory", - "LiteLLMEmbedderWrapper", -] diff --git a/codex-lens/src/codexlens/semantic/ann_index.py b/codex-lens/src/codexlens/semantic/ann_index.py deleted file mode 100644 index f5280c0e..00000000 --- a/codex-lens/src/codexlens/semantic/ann_index.py +++ /dev/null @@ -1,1097 +0,0 @@ -"""Approximate Nearest Neighbor (ANN) index using HNSW algorithm. - -Provides O(log N) similarity search using hnswlib's Hierarchical Navigable Small World graphs. -Falls back to brute-force search when hnswlib is not available. - -Key features: -- HNSW index for fast approximate nearest neighbor search -- Persistent index storage (saved alongside SQLite database) -- Incremental vector addition and deletion -- Thread-safe operations -- Cosine similarity metric -- Support for centralized storage mode (single index at project root) -""" - -from __future__ import annotations - -import logging -import threading -from pathlib import Path -from typing import List, Optional, Tuple - -from codexlens.errors import StorageError -from codexlens.config import VECTORS_HNSW_NAME - -from . import SEMANTIC_AVAILABLE - -if SEMANTIC_AVAILABLE: - import numpy as np - -logger = logging.getLogger(__name__) - -# Try to import hnswlib (optional dependency) -try: - import hnswlib - - HNSWLIB_AVAILABLE = True -except ImportError: - HNSWLIB_AVAILABLE = False - - -class ANNIndex: - """HNSW-based approximate nearest neighbor index for vector similarity search. - - Performance characteristics: - - Build time: O(N log N) where N is number of vectors - - Search time: O(log N) approximate - - Memory: ~(M * 2 * 4 * d) bytes per vector (M=16, d=dimension) - - Index parameters: - - space: cosine (cosine similarity metric) - - M: 16 (max connections per node - balance between speed and recall) - - ef_construction: 200 (search width during build - higher = better quality) - - ef: 50 (search width during query - higher = better recall) - """ - - def __init__( - self, - index_path: Path, - dim: int, - initial_capacity: int = 50000, - auto_save: bool = False, - expansion_threshold: float = 0.8, - ) -> None: - """Initialize ANN index. - - Args: - index_path: Path to SQLite database (index will be saved as _vectors.hnsw) - dim: Dimension of embedding vectors - initial_capacity: Initial maximum elements capacity (default: 50000) - auto_save: Whether to automatically save index after operations (default: False) - expansion_threshold: Capacity threshold to trigger auto-expansion (default: 0.8) - - Raises: - ImportError: If required dependencies are not available - ValueError: If dimension or capacity is invalid - """ - if not SEMANTIC_AVAILABLE: - raise ImportError( - "Semantic search dependencies not available. " - "Install with: pip install codexlens[semantic]" - ) - - if not HNSWLIB_AVAILABLE: - raise ImportError( - "hnswlib is required for ANN index. " - "Install with: pip install hnswlib" - ) - - if dim <= 0: - raise ValueError(f"Invalid dimension: {dim}") - - if initial_capacity <= 0: - raise ValueError(f"Invalid initial capacity: {initial_capacity}") - - if not 0.0 < expansion_threshold < 1.0: - raise ValueError( - f"Invalid expansion threshold: {expansion_threshold}. Must be between 0 and 1." - ) - - self.index_path = Path(index_path) - self.dim = dim - - # Derive HNSW index path from database path - # e.g., /path/to/_index.db -> /path/to/_index_vectors.hnsw - # This ensures unique HNSW files for each database - db_stem = self.index_path.stem # e.g., "_index" or "tmp123" - self.hnsw_path = self.index_path.parent / f"{db_stem}_vectors.hnsw" - - # HNSW parameters - self.space = "cosine" # Cosine similarity metric - self.M = 16 # Max connections per node (16 is good balance) - self.ef_construction = 200 # Build-time search width (higher = better quality) - self.ef = 50 # Query-time search width (higher = better recall) - - # Memory management parameters - self._auto_save = auto_save - self._expansion_threshold = expansion_threshold - - # Thread safety - self._lock = threading.RLock() - - # HNSW index instance - self._index: Optional[hnswlib.Index] = None - self._max_elements = initial_capacity # Initial capacity (reduced from 1M to 50K) - self._current_count = 0 # Track number of vectors - - logger.info( - f"Initialized ANNIndex with capacity={initial_capacity}, " - f"auto_save={auto_save}, expansion_threshold={expansion_threshold}" - ) - - @classmethod - def create_central( - cls, - index_root: Path, - dim: int, - initial_capacity: int = 50000, - auto_save: bool = False, - expansion_threshold: float = 0.8, - ) -> "ANNIndex": - """Create a centralized ANN index at the project index root. - - This method creates a single shared HNSW index file at the project root, - rather than per-directory indexes. Use this for projects that want all - dense vectors stored in one central location. - - Args: - index_root: Root directory for the index (e.g., .codexlens//) - dim: Dimension of embedding vectors - initial_capacity: Initial maximum elements capacity (default: 50000) - auto_save: Whether to automatically save index after operations (default: False) - expansion_threshold: Capacity threshold to trigger auto-expansion (default: 0.8) - - Returns: - ANNIndex instance configured for centralized storage - - Example: - >>> index = ANNIndex.create_central(Path(".codexlens/abc123"), dim=768) - >>> index.hnsw_path # Returns: .codexlens/abc123/_vectors.hnsw - """ - # Create a dummy index_path that will result in the central hnsw_path - # The index_path is used to derive hnsw_path, so we create a virtual path - # such that self.hnsw_path = index_root / VECTORS_HNSW_NAME - instance = cls.__new__(cls) - - if not SEMANTIC_AVAILABLE: - raise ImportError( - "Semantic search dependencies not available. " - "Install with: pip install codexlens[semantic]" - ) - - if not HNSWLIB_AVAILABLE: - raise ImportError( - "hnswlib is required for ANN index. " - "Install with: pip install hnswlib" - ) - - if dim <= 0: - raise ValueError(f"Invalid dimension: {dim}") - - if initial_capacity <= 0: - raise ValueError(f"Invalid initial capacity: {initial_capacity}") - - if not 0.0 < expansion_threshold < 1.0: - raise ValueError( - f"Invalid expansion threshold: {expansion_threshold}. Must be between 0 and 1." - ) - - instance.index_path = index_root - instance.dim = dim - - # Centralized mode: use VECTORS_HNSW_NAME directly at index_root - instance.hnsw_path = index_root / VECTORS_HNSW_NAME - - # HNSW parameters - instance.space = "cosine" - instance.M = 16 - instance.ef_construction = 200 - instance.ef = 50 - - # Memory management parameters - instance._auto_save = auto_save - instance._expansion_threshold = expansion_threshold - - # Thread safety - instance._lock = threading.RLock() - - # HNSW index instance - instance._index: Optional[hnswlib.Index] = None - instance._max_elements = initial_capacity - instance._current_count = 0 - - logger.info( - f"Initialized centralized ANNIndex at {instance.hnsw_path} with " - f"capacity={initial_capacity}, auto_save={auto_save}" - ) - - return instance - - def _ensure_index(self) -> None: - """Ensure HNSW index is initialized (lazy initialization).""" - if self._index is None: - self._index = hnswlib.Index(space=self.space, dim=self.dim) - self._index.init_index( - max_elements=self._max_elements, - ef_construction=self.ef_construction, - M=self.M, - ) - self._index.set_ef(self.ef) - self._current_count = 0 - logger.debug(f"Created new HNSW index with capacity {self._max_elements}") - - def _auto_expand_if_needed(self, additional_count: int) -> None: - """Auto-expand index capacity if threshold is reached. - - Args: - additional_count: Number of vectors to be added - - Note: - This is called internally by add_vectors and is thread-safe. - """ - usage_ratio = (self._current_count + additional_count) / self._max_elements - - if usage_ratio >= self._expansion_threshold: - # Calculate new capacity (2x current or enough to fit new vectors) - new_capacity = max( - self._max_elements * 2, - self._current_count + additional_count, - ) - - logger.info( - f"Expanding index capacity: {self._max_elements} -> {new_capacity} " - f"(usage: {usage_ratio:.1%}, threshold: {self._expansion_threshold:.1%})" - ) - - self._index.resize_index(new_capacity) - self._max_elements = new_capacity - - def add_vectors(self, ids: List[int], vectors: np.ndarray) -> None: - """Add vectors to the index. - - Args: - ids: List of vector IDs (must be unique) - vectors: Numpy array of shape (N, dim) where N = len(ids) - - Raises: - ValueError: If shapes don't match or vectors are invalid - StorageError: If index operation fails - """ - if len(ids) == 0: - return - - if vectors.shape[0] != len(ids): - raise ValueError( - f"Number of vectors ({vectors.shape[0]}) must match number of IDs ({len(ids)})" - ) - - if vectors.shape[1] != self.dim: - raise ValueError( - f"Vector dimension ({vectors.shape[1]}) must match index dimension ({self.dim})" - ) - - with self._lock: - try: - self._ensure_index() - - # Auto-expand if threshold reached - self._auto_expand_if_needed(len(ids)) - - # Ensure vectors are C-contiguous float32 (hnswlib requirement) - if not vectors.flags['C_CONTIGUOUS'] or vectors.dtype != np.float32: - vectors = np.ascontiguousarray(vectors, dtype=np.float32) - - # Add vectors to index - self._index.add_items(vectors, ids) - self._current_count += len(ids) - - logger.debug( - f"Added {len(ids)} vectors to index " - f"(total: {self._current_count}/{self._max_elements})" - ) - - # Auto-save if enabled - if self._auto_save: - self.save() - - except Exception as e: - raise StorageError(f"Failed to add vectors to ANN index: {e}") - - def remove_vectors(self, ids: List[int]) -> None: - """Remove vectors from the index by marking them as deleted. - - Note: hnswlib uses soft deletion (mark_deleted). Vectors are not - physically removed but will be excluded from search results. - - Args: - ids: List of vector IDs to remove - - Raises: - StorageError: If index operation fails - """ - if len(ids) == 0: - return - - with self._lock: - try: - if self._index is None or self._current_count == 0: - return # Nothing to remove - - # Mark vectors as deleted - deleted_count = 0 - for vec_id in ids: - try: - self._index.mark_deleted(vec_id) - deleted_count += 1 - except RuntimeError: - # ID not found - ignore (idempotent deletion) - pass - - logger.debug(f"Marked {deleted_count}/{len(ids)} vectors as deleted") - - # Auto-save if enabled - if self._auto_save and deleted_count > 0: - self.save() - - except Exception as e: - raise StorageError(f"Failed to remove vectors from ANN index: {e}") - - def search( - self, query: np.ndarray, top_k: int = 10 - ) -> Tuple[List[int], List[float]]: - """Search for nearest neighbors. - - Args: - query: Query vector of shape (dim,) or (1, dim) - top_k: Number of nearest neighbors to return - - Returns: - Tuple of (ids, distances) where: - - ids: List of vector IDs ordered by similarity - - distances: List of cosine distances (lower = more similar) - - Raises: - ValueError: If query shape is invalid - StorageError: If search operation fails - """ - # Validate query shape - if query.ndim == 1: - query = query.reshape(1, -1) - - if query.shape[0] != 1: - raise ValueError( - f"Query must be a single vector, got shape {query.shape}" - ) - - if query.shape[1] != self.dim: - raise ValueError( - f"Query dimension ({query.shape[1]}) must match index dimension ({self.dim})" - ) - - with self._lock: - try: - if self._index is None or self._current_count == 0: - return [], [] # Empty index - - effective_k = min(max(int(top_k), 0), self._current_count) - if effective_k == 0: - return [], [] - - try: - self._index.set_ef(max(self.ef, effective_k)) - except Exception: - pass - - while True: - try: - labels, distances = self._index.knn_query(query, k=effective_k) - break - except Exception as exc: - if "contiguous 2D array" in str(exc) and effective_k > 1: - next_k = max(1, effective_k // 2) - logger.debug( - "ANN search knn_query failed for k=%d; retrying with k=%d: %s", - effective_k, - next_k, - exc, - ) - if next_k == effective_k: - raise - effective_k = next_k - try: - self._index.set_ef(max(self.ef, effective_k)) - except Exception: - pass - continue - raise - - # Convert to lists and flatten (knn_query returns 2D arrays) - ids = labels[0].tolist() - dists = distances[0].tolist() - - return ids, dists - - except Exception as e: - raise StorageError(f"Failed to search ANN index: {e}") - - def save(self) -> None: - """Save index to disk. - - Index is saved to [db_path_directory]/_vectors.hnsw - - Raises: - StorageError: If save operation fails - """ - with self._lock: - try: - if self._index is None or self._current_count == 0: - logger.debug("Skipping save: index is empty") - return # Nothing to save - - # Ensure parent directory exists - self.hnsw_path.parent.mkdir(parents=True, exist_ok=True) - - # Save index - self._index.save_index(str(self.hnsw_path)) - - logger.debug( - f"Saved index to {self.hnsw_path} " - f"({self._current_count} vectors, capacity: {self._max_elements})" - ) - - except Exception as e: - raise StorageError(f"Failed to save ANN index: {e}") - - def load(self) -> bool: - """Load index from disk. - - Returns: - True if index was loaded successfully, False if index file doesn't exist - - Raises: - StorageError: If load operation fails - """ - with self._lock: - try: - if not self.hnsw_path.exists(): - logger.debug(f"Index file not found: {self.hnsw_path}") - return False # Index file doesn't exist (not an error) - - # Create fresh index object for loading (don't call init_index first) - self._index = hnswlib.Index(space=self.space, dim=self.dim) - - # Load index from disk - # Note: max_elements here is just for initial allocation, can expand later - self._index.load_index(str(self.hnsw_path), max_elements=self._max_elements) - - # Update count and capacity from loaded index - self._current_count = self._index.get_current_count() - self._max_elements = self._index.get_max_elements() - - # Set query-time ef parameter - self._index.set_ef(self.ef) - - logger.info( - f"Loaded index from {self.hnsw_path} " - f"({self._current_count} vectors, capacity: {self._max_elements})" - ) - - return True - - except Exception as e: - raise StorageError(f"Failed to load ANN index: {e}") - - def count(self) -> int: - """Get number of vectors in the index. - - Returns: - Number of vectors currently in the index - """ - with self._lock: - return self._current_count - - @property - def capacity(self) -> int: - """Get current maximum capacity of the index. - - Returns: - Maximum number of vectors the index can hold before expansion - """ - with self._lock: - return self._max_elements - - @property - def usage_ratio(self) -> float: - """Get current usage ratio (count / capacity). - - Returns: - Usage ratio between 0.0 and 1.0 - """ - with self._lock: - if self._max_elements == 0: - return 0.0 - return self._current_count / self._max_elements - - @property - def is_loaded(self) -> bool: - """Check if index is loaded and ready for use. - - Returns: - True if index is loaded, False otherwise - """ - with self._lock: - return self._index is not None and self._current_count > 0 - - - -class BinaryANNIndex: - """Binary vector ANN index using Hamming distance for fast coarse retrieval. - - .. deprecated:: - This class is deprecated. Use :class:`codexlens.search.binary_searcher.BinarySearcher` - instead, which provides faster memory-mapped search with centralized storage. - - Optimized for binary vectors (256-bit / 32 bytes per vector). - Uses packed binary representation for memory efficiency. - - Performance characteristics: - - Storage: 32 bytes per vector (vs ~8KB for dense vectors) - - Distance: Hamming distance via XOR + popcount (CPU-efficient) - - Search: O(N) brute-force with SIMD-accelerated distance computation - - Index parameters: - - dim: Binary vector dimension (default: 256) - - packed_dim: Packed bytes size (dim / 8 = 32 for 256-bit) - - Usage: - index = BinaryANNIndex(index_path, dim=256) - index.add_vectors([1, 2, 3], packed_vectors) # List of 32-byte packed vectors - ids, distances = index.search(query_packed, top_k=10) - """ - - DEFAULT_DIM = 256 # Default binary vector dimension - - def __init__( - self, - index_path: Path, - dim: int = 256, - initial_capacity: int = 100000, - auto_save: bool = False, - ) -> None: - """Initialize Binary ANN index. - - Args: - index_path: Path to database (index will be saved as _binary_vectors.bin) - dim: Dimension of binary vectors (default: 256) - initial_capacity: Initial capacity hint (default: 100000) - auto_save: Whether to automatically save index after operations - - Raises: - ImportError: If required dependencies are not available - ValueError: If dimension is invalid - """ - if not SEMANTIC_AVAILABLE: - raise ImportError( - "Semantic search dependencies not available. " - "Install with: pip install codexlens[semantic]" - ) - - import warnings - warnings.warn( - "BinaryANNIndex is deprecated. Use codexlens.search.binary_searcher.BinarySearcher " - "instead for faster memory-mapped search with centralized storage.", - DeprecationWarning, - stacklevel=2 - ) - - if dim <= 0 or dim % 8 != 0: - raise ValueError( - f"Invalid dimension: {dim}. Must be positive and divisible by 8." - ) - - self.index_path = Path(index_path) - self.dim = dim - self.packed_dim = dim // 8 # 32 bytes for 256-bit vectors - - # Derive binary index path from database path - db_stem = self.index_path.stem - self.binary_path = self.index_path.parent / f"{db_stem}_binary_vectors.bin" - - # Memory management - self._auto_save = auto_save - self._initial_capacity = initial_capacity - - # Thread safety - self._lock = threading.RLock() - - # In-memory storage: id -> packed binary vector - self._vectors: dict[int, bytes] = {} - self._id_list: list[int] = [] # Ordered list for efficient iteration - - # Cached numpy array for vectorized search (invalidated on add/remove) - self._vectors_matrix: Optional[np.ndarray] = None - self._ids_array: Optional[np.ndarray] = None - self._cache_valid: bool = False - - logger.info( - f"Initialized BinaryANNIndex with dim={dim}, packed_dim={self.packed_dim}" - ) - - def add_vectors(self, ids: List[int], vectors: List[bytes]) -> None: - """Add packed binary vectors to the index. - - Args: - ids: List of vector IDs (must be unique) - vectors: List of packed binary vectors (each of size packed_dim bytes) - - Raises: - ValueError: If shapes don't match or vectors are invalid - StorageError: If index operation fails - """ - if len(ids) == 0: - return - - if len(vectors) != len(ids): - raise ValueError( - f"Number of vectors ({len(vectors)}) must match number of IDs ({len(ids)})" - ) - - # Validate vector sizes - for i, vec in enumerate(vectors): - if len(vec) != self.packed_dim: - raise ValueError( - f"Vector {i} has size {len(vec)}, expected {self.packed_dim}" - ) - - with self._lock: - try: - for vec_id, vec in zip(ids, vectors): - if vec_id not in self._vectors: - self._id_list.append(vec_id) - self._vectors[vec_id] = vec - - # Invalidate cache on modification - self._cache_valid = False - - logger.debug( - f"Added {len(ids)} binary vectors to index (total: {len(self._vectors)})" - ) - - if self._auto_save: - self.save() - - except Exception as e: - raise StorageError(f"Failed to add vectors to Binary ANN index: {e}") - - def add_vectors_numpy(self, ids: List[int], vectors: np.ndarray) -> None: - """Add unpacked binary vectors (0/1 values) to the index. - - Convenience method that packs the vectors before adding. - - Args: - ids: List of vector IDs (must be unique) - vectors: Numpy array of shape (N, dim) with binary values (0 or 1) - - Raises: - ValueError: If shapes don't match - StorageError: If index operation fails - """ - if len(ids) == 0: - return - - if vectors.shape[0] != len(ids): - raise ValueError( - f"Number of vectors ({vectors.shape[0]}) must match number of IDs ({len(ids)})" - ) - - if vectors.shape[1] != self.dim: - raise ValueError( - f"Vector dimension ({vectors.shape[1]}) must match index dimension ({self.dim})" - ) - - # Pack vectors - packed_vectors = [] - for i in range(vectors.shape[0]): - packed = np.packbits(vectors[i].astype(np.uint8)).tobytes() - packed_vectors.append(packed) - - self.add_vectors(ids, packed_vectors) - - def remove_vectors(self, ids: List[int]) -> None: - """Remove vectors from the index. - - Args: - ids: List of vector IDs to remove - - Raises: - StorageError: If index operation fails - - Note: - Optimized for batch deletion using set operations instead of - O(N) list.remove() calls for each ID. - """ - if len(ids) == 0: - return - - with self._lock: - try: - # Use set for O(1) lookup during filtering - ids_to_remove = set(ids) - removed_count = 0 - - # Remove from dictionary - O(1) per deletion - for vec_id in ids_to_remove: - if vec_id in self._vectors: - del self._vectors[vec_id] - removed_count += 1 - - # Rebuild ID list efficiently - O(N) once instead of O(N) per removal - if removed_count > 0: - self._id_list = [id_ for id_ in self._id_list if id_ not in ids_to_remove] - # Invalidate cache on modification - self._cache_valid = False - - logger.debug(f"Removed {removed_count}/{len(ids)} vectors from index") - - if self._auto_save and removed_count > 0: - self.save() - - except Exception as e: - raise StorageError( - f"Failed to remove vectors from Binary ANN index: {e}" - ) - - def _build_cache(self) -> None: - """Build numpy array cache from vectors dict for vectorized search. - - Pre-computes a contiguous numpy array from all vectors for efficient - batch distance computation. Called lazily on first search after modification. - """ - if self._cache_valid: - return - - n_vectors = len(self._id_list) - if n_vectors == 0: - self._vectors_matrix = None - self._ids_array = None - self._cache_valid = True - return - - # Build contiguous numpy array of all packed vectors - # Shape: (n_vectors, packed_dim) with uint8 dtype - self._vectors_matrix = np.empty((n_vectors, self.packed_dim), dtype=np.uint8) - self._ids_array = np.array(self._id_list, dtype=np.int64) - - for i, vec_id in enumerate(self._id_list): - vec_bytes = self._vectors[vec_id] - self._vectors_matrix[i] = np.frombuffer(vec_bytes, dtype=np.uint8) - - self._cache_valid = True - logger.debug(f"Built vectorized cache for {n_vectors} binary vectors") - - def search( - self, query: bytes, top_k: int = 10 - ) -> Tuple[List[int], List[int]]: - """Search for nearest neighbors using Hamming distance. - - Uses vectorized batch computation for O(N) search with SIMD acceleration. - Pre-computes and caches numpy arrays for efficient repeated queries. - - Args: - query: Packed binary query vector (size: packed_dim bytes) - top_k: Number of nearest neighbors to return - - Returns: - Tuple of (ids, distances) where: - - ids: List of vector IDs ordered by Hamming distance (ascending) - - distances: List of Hamming distances (lower = more similar) - - Raises: - ValueError: If query size is invalid - StorageError: If search operation fails - """ - if len(query) != self.packed_dim: - raise ValueError( - f"Query size ({len(query)}) must match packed_dim ({self.packed_dim})" - ) - - with self._lock: - try: - if len(self._vectors) == 0: - return [], [] - - # Build cache if needed (lazy initialization) - self._build_cache() - - if self._vectors_matrix is None or self._ids_array is None: - return [], [] - - # Vectorized Hamming distance computation - # 1. Convert query to numpy array - query_arr = np.frombuffer(query, dtype=np.uint8) - - # 2. Broadcast XOR: (1, packed_dim) XOR (n_vectors, packed_dim) - # Result shape: (n_vectors, packed_dim) - xor_result = np.bitwise_xor(query_arr, self._vectors_matrix) - - # 3. Vectorized popcount using lookup table for efficiency - # np.unpackbits is slow for large arrays, use popcount LUT instead - popcount_lut = np.array([bin(i).count('1') for i in range(256)], dtype=np.uint8) - bit_counts = popcount_lut[xor_result] - - # 4. Sum across packed bytes to get Hamming distance per vector - distances = bit_counts.sum(axis=1) - - # 5. Get top-k using argpartition (O(N) instead of O(N log N) for full sort) - n_vectors = len(distances) - k = min(top_k, n_vectors) - - if k == n_vectors: - # No partitioning needed, just sort all - sorted_indices = np.argsort(distances) - else: - # Use argpartition for O(N) partial sort - partition_indices = np.argpartition(distances, k)[:k] - # Sort only the top-k - top_k_distances = distances[partition_indices] - sorted_order = np.argsort(top_k_distances) - sorted_indices = partition_indices[sorted_order] - - # 6. Return results - result_ids = self._ids_array[sorted_indices].tolist() - result_dists = distances[sorted_indices].tolist() - - return result_ids, result_dists - - except Exception as e: - raise StorageError(f"Failed to search Binary ANN index: {e}") - - def search_numpy( - self, query: np.ndarray, top_k: int = 10 - ) -> Tuple[List[int], List[int]]: - """Search with unpacked binary query vector. - - Convenience method that packs the query before searching. - - Args: - query: Binary query vector of shape (dim,) with values 0 or 1 - top_k: Number of nearest neighbors to return - - Returns: - Tuple of (ids, distances) - """ - if query.ndim == 2: - query = query.flatten() - - if len(query) != self.dim: - raise ValueError( - f"Query dimension ({len(query)}) must match index dimension ({self.dim})" - ) - - packed_query = np.packbits(query.astype(np.uint8)).tobytes() - return self.search(packed_query, top_k) - - def search_batch( - self, queries: List[bytes], top_k: int = 10 - ) -> List[Tuple[List[int], List[int]]]: - """Batch search for multiple queries. - - Args: - queries: List of packed binary query vectors - top_k: Number of nearest neighbors to return per query - - Returns: - List of (ids, distances) tuples, one per query - """ - results = [] - for query in queries: - ids, dists = self.search(query, top_k) - results.append((ids, dists)) - return results - - def save(self) -> None: - """Save index to disk. - - Binary format: - - 4 bytes: magic number (0x42494E56 = "BINV") - - 4 bytes: version (1) - - 4 bytes: dim - - 4 bytes: packed_dim - - 4 bytes: num_vectors - - For each vector: - - 4 bytes: id - - packed_dim bytes: vector data - - Raises: - StorageError: If save operation fails - """ - with self._lock: - try: - if len(self._vectors) == 0: - logger.debug("Skipping save: index is empty") - return - - # Ensure parent directory exists - self.binary_path.parent.mkdir(parents=True, exist_ok=True) - - with open(self.binary_path, "wb") as f: - # Header - f.write(b"BINV") # Magic number - f.write(np.array([1], dtype=np.uint32).tobytes()) # Version - f.write(np.array([self.dim], dtype=np.uint32).tobytes()) - f.write(np.array([self.packed_dim], dtype=np.uint32).tobytes()) - f.write( - np.array([len(self._vectors)], dtype=np.uint32).tobytes() - ) - - # Vectors - for vec_id in self._id_list: - f.write(np.array([vec_id], dtype=np.uint32).tobytes()) - f.write(self._vectors[vec_id]) - - logger.debug( - f"Saved binary index to {self.binary_path} " - f"({len(self._vectors)} vectors)" - ) - - except Exception as e: - raise StorageError(f"Failed to save Binary ANN index: {e}") - - def load(self) -> bool: - """Load index from disk. - - Returns: - True if index was loaded successfully, False if index file doesn't exist - - Raises: - StorageError: If load operation fails - """ - with self._lock: - try: - if not self.binary_path.exists(): - logger.debug(f"Binary index file not found: {self.binary_path}") - return False - - with open(self.binary_path, "rb") as f: - # Read header - magic = f.read(4) - if magic != b"BINV": - raise StorageError( - f"Invalid binary index file: bad magic number" - ) - - version = np.frombuffer(f.read(4), dtype=np.uint32)[0] - if version != 1: - raise StorageError( - f"Unsupported binary index version: {version}" - ) - - file_dim = np.frombuffer(f.read(4), dtype=np.uint32)[0] - file_packed_dim = np.frombuffer(f.read(4), dtype=np.uint32)[0] - num_vectors = np.frombuffer(f.read(4), dtype=np.uint32)[0] - - if file_dim != self.dim or file_packed_dim != self.packed_dim: - raise StorageError( - f"Dimension mismatch: file has dim={file_dim}, " - f"packed_dim={file_packed_dim}, " - f"expected dim={self.dim}, packed_dim={self.packed_dim}" - ) - - # Clear existing data - self._vectors.clear() - self._id_list.clear() - self._cache_valid = False - - # Read vectors - for _ in range(num_vectors): - vec_id = np.frombuffer(f.read(4), dtype=np.uint32)[0] - vec_data = f.read(self.packed_dim) - self._vectors[int(vec_id)] = vec_data - self._id_list.append(int(vec_id)) - - logger.info( - f"Loaded binary index from {self.binary_path} " - f"({len(self._vectors)} vectors)" - ) - - return True - - except StorageError: - raise - except Exception as e: - raise StorageError(f"Failed to load Binary ANN index: {e}") - - def count(self) -> int: - """Get number of vectors in the index. - - Returns: - Number of vectors currently in the index - """ - with self._lock: - return len(self._vectors) - - @property - def is_loaded(self) -> bool: - """Check if index has vectors. - - Returns: - True if index has vectors, False otherwise - """ - with self._lock: - return len(self._vectors) > 0 - - def get_vector(self, vec_id: int) -> Optional[bytes]: - """Get a specific vector by ID. - - Args: - vec_id: Vector ID to retrieve - - Returns: - Packed binary vector or None if not found - """ - with self._lock: - return self._vectors.get(vec_id) - - def clear(self) -> None: - """Clear all vectors from the index.""" - with self._lock: - self._vectors.clear() - self._id_list.clear() - self._vectors_matrix = None - self._ids_array = None - self._cache_valid = False - logger.debug("Cleared binary index") - - -def create_ann_index( - index_path: Path, - index_type: str = "hnsw", - dim: int = 2048, - **kwargs, -) -> ANNIndex | BinaryANNIndex: - """Factory function to create an ANN index. - - Args: - index_path: Path to database file - index_type: Type of index - "hnsw" for dense vectors, "binary" for binary vectors - dim: Vector dimension (default: 2048 for dense, 256 for binary) - **kwargs: Additional arguments passed to the index constructor - - Returns: - ANNIndex for dense vectors or BinaryANNIndex for binary vectors - - Raises: - ValueError: If index_type is invalid - - Example: - >>> # Dense vector index (HNSW) - >>> dense_index = create_ann_index(path, index_type="hnsw", dim=2048) - >>> dense_index.add_vectors(ids, dense_vectors) - >>> - >>> # Binary vector index (Hamming distance) - >>> binary_index = create_ann_index(path, index_type="binary", dim=256) - >>> binary_index.add_vectors(ids, packed_vectors) - """ - index_type = index_type.lower() - - if index_type == "hnsw": - return ANNIndex(index_path=index_path, dim=dim, **kwargs) - elif index_type == "binary": - # Default to 256 for binary if not specified - if dim == 2048: # Default dense dim was used - dim = 256 - return BinaryANNIndex(index_path=index_path, dim=dim, **kwargs) - else: - raise ValueError( - f"Invalid index_type: {index_type}. Must be 'hnsw' or 'binary'." - ) diff --git a/codex-lens/src/codexlens/semantic/base.py b/codex-lens/src/codexlens/semantic/base.py deleted file mode 100644 index bf8109a0..00000000 --- a/codex-lens/src/codexlens/semantic/base.py +++ /dev/null @@ -1,61 +0,0 @@ -"""Base class for embedders. - -Defines the interface that all embedders must implement. -""" - -from __future__ import annotations - -from abc import ABC, abstractmethod -from typing import Iterable - -import numpy as np - - -class BaseEmbedder(ABC): - """Base class for all embedders. - - All embedder implementations must inherit from this class and implement - the abstract methods to ensure a consistent interface. - """ - - @property - @abstractmethod - def embedding_dim(self) -> int: - """Return embedding dimensions. - - Returns: - int: Dimension of the embedding vectors. - """ - ... - - @property - @abstractmethod - def model_name(self) -> str: - """Return model name. - - Returns: - str: Name or identifier of the underlying model. - """ - ... - - @property - def max_tokens(self) -> int: - """Return maximum token limit for embeddings. - - Returns: - int: Maximum number of tokens that can be embedded at once. - Default is 8192 if not overridden by implementation. - """ - return 8192 - - @abstractmethod - def embed_to_numpy(self, texts: str | Iterable[str]) -> np.ndarray: - """Embed texts to numpy array. - - Args: - texts: Single text or iterable of texts to embed. - - Returns: - numpy.ndarray: Array of shape (n_texts, embedding_dim) containing embeddings. - """ - ... diff --git a/codex-lens/src/codexlens/semantic/chunker.py b/codex-lens/src/codexlens/semantic/chunker.py deleted file mode 100644 index 05d3eb50..00000000 --- a/codex-lens/src/codexlens/semantic/chunker.py +++ /dev/null @@ -1,821 +0,0 @@ -"""Code chunking strategies for semantic search. - -This module provides various chunking strategies for breaking down source code -into semantic chunks suitable for embedding and search. - -Lightweight Mode: - The ChunkConfig supports a `skip_token_count` option for performance optimization. - When enabled, token counting uses a fast character-based estimation (char/4) - instead of expensive tiktoken encoding. - - Use cases for lightweight mode: - - Large-scale indexing where speed is critical - - Scenarios where approximate token counts are acceptable - - Memory-constrained environments - - Initial prototyping and development - - Example: - # Default mode (accurate tiktoken encoding) - config = ChunkConfig() - chunker = Chunker(config) - - # Lightweight mode (fast char/4 estimation) - config = ChunkConfig(skip_token_count=True) - chunker = Chunker(config) - chunks = chunker.chunk_file(content, symbols, path, language) -""" - -from __future__ import annotations - -from dataclasses import dataclass -from pathlib import Path -from typing import List, Optional, Tuple - -from codexlens.entities import SemanticChunk, Symbol -from codexlens.parsers.tokenizer import get_default_tokenizer - - -@dataclass -class ChunkConfig: - """Configuration for chunking strategies.""" - max_chunk_size: int = 1000 # Max characters per chunk - overlap: int = 200 # Overlap for sliding window (increased from 100 for better context) - strategy: str = "auto" # Chunking strategy: auto, symbol, sliding_window, hybrid - min_chunk_size: int = 50 # Minimum chunk size - skip_token_count: bool = False # Skip expensive token counting (use char/4 estimate) - strip_comments: bool = True # Remove comments from chunk content for embedding - strip_docstrings: bool = True # Remove docstrings from chunk content for embedding - preserve_original: bool = True # Store original content in metadata when stripping - - -class CommentStripper: - """Remove comments from source code while preserving structure.""" - - @staticmethod - def strip_python_comments(content: str) -> str: - """Strip Python comments (# style) but preserve docstrings. - - Args: - content: Python source code - - Returns: - Code with comments removed - """ - lines = content.splitlines(keepends=True) - result_lines: List[str] = [] - in_string = False - string_char = None - - for line in lines: - new_line = [] - i = 0 - while i < len(line): - char = line[i] - - # Handle string literals - if char in ('"', "'") and not in_string: - # Check for triple quotes - if line[i:i+3] in ('"""', "'''"): - in_string = True - string_char = line[i:i+3] - new_line.append(line[i:i+3]) - i += 3 - continue - else: - in_string = True - string_char = char - elif in_string: - if string_char and len(string_char) == 3: - if line[i:i+3] == string_char: - in_string = False - new_line.append(line[i:i+3]) - i += 3 - string_char = None - continue - elif char == string_char: - # Check for escape - if i > 0 and line[i-1] != '\\': - in_string = False - string_char = None - - # Handle comments (only outside strings) - if char == '#' and not in_string: - # Rest of line is comment, skip it - new_line.append('\n' if line.endswith('\n') else '') - break - - new_line.append(char) - i += 1 - - result_lines.append(''.join(new_line)) - - return ''.join(result_lines) - - @staticmethod - def strip_c_style_comments(content: str) -> str: - """Strip C-style comments (// and /* */) from code. - - Args: - content: Source code with C-style comments - - Returns: - Code with comments removed - """ - result = [] - i = 0 - in_string = False - string_char = None - in_multiline_comment = False - - while i < len(content): - # Handle multi-line comment end - if in_multiline_comment: - if content[i:i+2] == '*/': - in_multiline_comment = False - i += 2 - continue - i += 1 - continue - - char = content[i] - - # Handle string literals - if char in ('"', "'", '`') and not in_string: - in_string = True - string_char = char - result.append(char) - i += 1 - continue - elif in_string: - result.append(char) - if char == string_char and (i == 0 or content[i-1] != '\\'): - in_string = False - string_char = None - i += 1 - continue - - # Handle comments - if content[i:i+2] == '//': - # Single line comment - skip to end of line - while i < len(content) and content[i] != '\n': - i += 1 - if i < len(content): - result.append('\n') - i += 1 - continue - - if content[i:i+2] == '/*': - in_multiline_comment = True - i += 2 - continue - - result.append(char) - i += 1 - - return ''.join(result) - - @classmethod - def strip_comments(cls, content: str, language: str) -> str: - """Strip comments based on language. - - Args: - content: Source code content - language: Programming language - - Returns: - Code with comments removed - """ - if language == "python": - return cls.strip_python_comments(content) - elif language in {"javascript", "typescript", "java", "c", "cpp", "go", "rust"}: - return cls.strip_c_style_comments(content) - return content - - -class DocstringStripper: - """Remove docstrings from source code.""" - - @staticmethod - def strip_python_docstrings(content: str) -> str: - """Strip Python docstrings (triple-quoted strings at module/class/function level). - - Args: - content: Python source code - - Returns: - Code with docstrings removed - """ - lines = content.splitlines(keepends=True) - result_lines: List[str] = [] - i = 0 - - while i < len(lines): - line = lines[i] - stripped = line.strip() - - # Check for docstring start - if stripped.startswith('"""') or stripped.startswith("'''"): - quote_type = '"""' if stripped.startswith('"""') else "'''" - - # Single line docstring - if stripped.count(quote_type) >= 2: - # Skip this line (docstring) - i += 1 - continue - - # Multi-line docstring - skip until closing - i += 1 - while i < len(lines): - if quote_type in lines[i]: - i += 1 - break - i += 1 - continue - - result_lines.append(line) - i += 1 - - return ''.join(result_lines) - - @staticmethod - def strip_jsdoc_comments(content: str) -> str: - """Strip JSDoc comments (/** ... */) from code. - - Args: - content: JavaScript/TypeScript source code - - Returns: - Code with JSDoc comments removed - """ - result = [] - i = 0 - in_jsdoc = False - - while i < len(content): - if in_jsdoc: - if content[i:i+2] == '*/': - in_jsdoc = False - i += 2 - continue - i += 1 - continue - - # Check for JSDoc start (/** but not /*) - if content[i:i+3] == '/**': - in_jsdoc = True - i += 3 - continue - - result.append(content[i]) - i += 1 - - return ''.join(result) - - @classmethod - def strip_docstrings(cls, content: str, language: str) -> str: - """Strip docstrings based on language. - - Args: - content: Source code content - language: Programming language - - Returns: - Code with docstrings removed - """ - if language == "python": - return cls.strip_python_docstrings(content) - elif language in {"javascript", "typescript"}: - return cls.strip_jsdoc_comments(content) - return content - - -class Chunker: - """Chunk code files for semantic embedding.""" - - def __init__(self, config: ChunkConfig | None = None) -> None: - self.config = config or ChunkConfig() - self._tokenizer = get_default_tokenizer() - self._comment_stripper = CommentStripper() - self._docstring_stripper = DocstringStripper() - - def _process_content(self, content: str, language: str) -> Tuple[str, Optional[str]]: - """Process chunk content by stripping comments/docstrings if configured. - - Args: - content: Original chunk content - language: Programming language - - Returns: - Tuple of (processed_content, original_content_if_preserved) - """ - original = content if self.config.preserve_original else None - processed = content - - if self.config.strip_comments: - processed = self._comment_stripper.strip_comments(processed, language) - - if self.config.strip_docstrings: - processed = self._docstring_stripper.strip_docstrings(processed, language) - - # If nothing changed, don't store original - if processed == content: - original = None - - return processed, original - - def _estimate_token_count(self, text: str) -> int: - """Estimate token count based on config. - - If skip_token_count is True, uses character-based estimation (char/4). - Otherwise, uses accurate tiktoken encoding. - - Args: - text: Text to count tokens for - - Returns: - Estimated token count - """ - if self.config.skip_token_count: - # Fast character-based estimation: ~4 chars per token - return max(1, len(text) // 4) - return self._tokenizer.count_tokens(text) - - def chunk_by_symbol( - self, - content: str, - symbols: List[Symbol], - file_path: str | Path, - language: str, - symbol_token_counts: Optional[dict[str, int]] = None, - ) -> List[SemanticChunk]: - """Chunk code by extracted symbols (functions, classes). - - Each symbol becomes one chunk with its full content. - Large symbols exceeding max_chunk_size are recursively split using sliding window. - - Args: - content: Source code content - symbols: List of extracted symbols - file_path: Path to source file - language: Programming language - symbol_token_counts: Optional dict mapping symbol names to token counts - """ - chunks: List[SemanticChunk] = [] - lines = content.splitlines(keepends=True) - - for symbol in symbols: - start_line, end_line = symbol.range - # Convert to 0-indexed - start_idx = max(0, start_line - 1) - end_idx = min(len(lines), end_line) - - chunk_content = "".join(lines[start_idx:end_idx]) - if len(chunk_content.strip()) < self.config.min_chunk_size: - continue - - # Check if symbol content exceeds max_chunk_size - if len(chunk_content) > self.config.max_chunk_size: - # Create line mapping for correct line number tracking - line_mapping = list(range(start_line, end_line + 1)) - - # Use sliding window to split large symbol - sub_chunks = self.chunk_sliding_window( - chunk_content, - file_path=file_path, - language=language, - line_mapping=line_mapping - ) - - # Update sub_chunks with parent symbol metadata - for sub_chunk in sub_chunks: - sub_chunk.metadata["symbol_name"] = symbol.name - sub_chunk.metadata["symbol_kind"] = symbol.kind - sub_chunk.metadata["strategy"] = "symbol_split" - sub_chunk.metadata["chunk_type"] = "code" - sub_chunk.metadata["parent_symbol_range"] = (start_line, end_line) - - chunks.extend(sub_chunks) - else: - # Process content (strip comments/docstrings if configured) - processed_content, original_content = self._process_content(chunk_content, language) - - # Skip if processed content is too small - if len(processed_content.strip()) < self.config.min_chunk_size: - continue - - # Calculate token count if not provided - token_count = None - if symbol_token_counts and symbol.name in symbol_token_counts: - token_count = symbol_token_counts[symbol.name] - else: - token_count = self._estimate_token_count(processed_content) - - metadata = { - "file": str(file_path), - "language": language, - "symbol_name": symbol.name, - "symbol_kind": symbol.kind, - "start_line": start_line, - "end_line": end_line, - "strategy": "symbol", - "chunk_type": "code", - "token_count": token_count, - } - - # Store original content if it was modified - if original_content is not None: - metadata["original_content"] = original_content - - chunks.append(SemanticChunk( - content=processed_content, - embedding=None, - metadata=metadata - )) - - return chunks - - def chunk_sliding_window( - self, - content: str, - file_path: str | Path, - language: str, - line_mapping: Optional[List[int]] = None, - ) -> List[SemanticChunk]: - """Chunk code using sliding window approach. - - Used for files without clear symbol boundaries or very long functions. - - Args: - content: Source code content - file_path: Path to source file - language: Programming language - line_mapping: Optional list mapping content line indices to original line numbers - (1-indexed). If provided, line_mapping[i] is the original line number - for the i-th line in content. - """ - chunks: List[SemanticChunk] = [] - lines = content.splitlines(keepends=True) - - if not lines: - return chunks - - # Calculate lines per chunk based on average line length - avg_line_len = len(content) / max(len(lines), 1) - lines_per_chunk = max(10, int(self.config.max_chunk_size / max(avg_line_len, 1))) - overlap_lines = max(2, int(self.config.overlap / max(avg_line_len, 1))) - # Ensure overlap is less than chunk size to prevent infinite loop - overlap_lines = min(overlap_lines, lines_per_chunk - 1) - - start = 0 - chunk_idx = 0 - - while start < len(lines): - end = min(start + lines_per_chunk, len(lines)) - chunk_content = "".join(lines[start:end]) - - if len(chunk_content.strip()) >= self.config.min_chunk_size: - # Process content (strip comments/docstrings if configured) - processed_content, original_content = self._process_content(chunk_content, language) - - # Skip if processed content is too small - if len(processed_content.strip()) < self.config.min_chunk_size: - # Move window forward - step = lines_per_chunk - overlap_lines - if step <= 0: - step = 1 - start += step - continue - - token_count = self._estimate_token_count(processed_content) - - # Calculate correct line numbers - if line_mapping: - # Use line mapping to get original line numbers - start_line = line_mapping[start] - end_line = line_mapping[end - 1] - else: - # Default behavior: treat content as starting at line 1 - start_line = start + 1 - end_line = end - - metadata = { - "file": str(file_path), - "language": language, - "chunk_index": chunk_idx, - "start_line": start_line, - "end_line": end_line, - "strategy": "sliding_window", - "chunk_type": "code", - "token_count": token_count, - } - - # Store original content if it was modified - if original_content is not None: - metadata["original_content"] = original_content - - chunks.append(SemanticChunk( - content=processed_content, - embedding=None, - metadata=metadata - )) - chunk_idx += 1 - - # Move window, accounting for overlap - step = lines_per_chunk - overlap_lines - if step <= 0: - step = 1 # Failsafe to prevent infinite loop - start += step - - # Break if we've reached the end - if end >= len(lines): - break - - return chunks - - def chunk_file( - self, - content: str, - symbols: List[Symbol], - file_path: str | Path, - language: str, - symbol_token_counts: Optional[dict[str, int]] = None, - ) -> List[SemanticChunk]: - """Chunk a file using the best strategy. - - Uses symbol-based chunking if symbols available, - falls back to sliding window for files without symbols. - - Args: - content: Source code content - symbols: List of extracted symbols - file_path: Path to source file - language: Programming language - symbol_token_counts: Optional dict mapping symbol names to token counts - """ - if symbols: - return self.chunk_by_symbol(content, symbols, file_path, language, symbol_token_counts) - return self.chunk_sliding_window(content, file_path, language) - -class DocstringExtractor: - """Extract docstrings from source code.""" - - @staticmethod - def extract_python_docstrings(content: str) -> List[Tuple[str, int, int]]: - """Extract Python docstrings with their line ranges. - - Returns: List of (docstring_content, start_line, end_line) tuples - """ - docstrings: List[Tuple[str, int, int]] = [] - lines = content.splitlines(keepends=True) - - i = 0 - while i < len(lines): - line = lines[i] - stripped = line.strip() - if stripped.startswith('"""') or stripped.startswith("'''"): - quote_type = '"""' if stripped.startswith('"""') else "'''" - start_line = i + 1 - - if stripped.count(quote_type) >= 2: - docstring_content = line - end_line = i + 1 - docstrings.append((docstring_content, start_line, end_line)) - i += 1 - continue - - docstring_lines = [line] - i += 1 - while i < len(lines): - docstring_lines.append(lines[i]) - if quote_type in lines[i]: - break - i += 1 - - end_line = i + 1 - docstring_content = "".join(docstring_lines) - docstrings.append((docstring_content, start_line, end_line)) - - i += 1 - - return docstrings - - @staticmethod - def extract_jsdoc_comments(content: str) -> List[Tuple[str, int, int]]: - """Extract JSDoc comments with their line ranges. - - Returns: List of (comment_content, start_line, end_line) tuples - """ - comments: List[Tuple[str, int, int]] = [] - lines = content.splitlines(keepends=True) - - i = 0 - while i < len(lines): - line = lines[i] - stripped = line.strip() - - if stripped.startswith('/**'): - start_line = i + 1 - comment_lines = [line] - i += 1 - - while i < len(lines): - comment_lines.append(lines[i]) - if '*/' in lines[i]: - break - i += 1 - - end_line = i + 1 - comment_content = "".join(comment_lines) - comments.append((comment_content, start_line, end_line)) - - i += 1 - - return comments - - @classmethod - def extract_docstrings( - cls, - content: str, - language: str - ) -> List[Tuple[str, int, int]]: - """Extract docstrings based on language. - - Returns: List of (docstring_content, start_line, end_line) tuples - """ - if language == "python": - return cls.extract_python_docstrings(content) - elif language in {"javascript", "typescript"}: - return cls.extract_jsdoc_comments(content) - return [] - - -class HybridChunker: - """Hybrid chunker that prioritizes docstrings before symbol-based chunking. - - Composition-based strategy that: - 1. Extracts docstrings as dedicated chunks - 2. For remaining code, uses base chunker (symbol or sliding window) - """ - - def __init__( - self, - base_chunker: Chunker | None = None, - config: ChunkConfig | None = None - ) -> None: - """Initialize hybrid chunker. - - Args: - base_chunker: Chunker to use for non-docstring content - config: Configuration for chunking - """ - self.config = config or ChunkConfig() - self.base_chunker = base_chunker or Chunker(self.config) - self.docstring_extractor = DocstringExtractor() - - def _get_excluded_line_ranges( - self, - docstrings: List[Tuple[str, int, int]] - ) -> set[int]: - """Get set of line numbers that are part of docstrings.""" - excluded_lines: set[int] = set() - for _, start_line, end_line in docstrings: - for line_num in range(start_line, end_line + 1): - excluded_lines.add(line_num) - return excluded_lines - - def _filter_symbols_outside_docstrings( - self, - symbols: List[Symbol], - excluded_lines: set[int] - ) -> List[Symbol]: - """Filter symbols to exclude those completely within docstrings.""" - filtered: List[Symbol] = [] - for symbol in symbols: - start_line, end_line = symbol.range - symbol_lines = set(range(start_line, end_line + 1)) - if not symbol_lines.issubset(excluded_lines): - filtered.append(symbol) - return filtered - - def _find_parent_symbol( - self, - start_line: int, - end_line: int, - symbols: List[Symbol], - ) -> Optional[Symbol]: - """Find the smallest symbol range that fully contains a docstring span.""" - candidates: List[Symbol] = [] - for symbol in symbols: - sym_start, sym_end = symbol.range - if sym_start <= start_line and end_line <= sym_end: - candidates.append(symbol) - if not candidates: - return None - return min(candidates, key=lambda s: (s.range[1] - s.range[0], s.range[0])) - - def chunk_file( - self, - content: str, - symbols: List[Symbol], - file_path: str | Path, - language: str, - symbol_token_counts: Optional[dict[str, int]] = None, - ) -> List[SemanticChunk]: - """Chunk file using hybrid strategy. - - Extracts docstrings first, then chunks remaining code. - - Args: - content: Source code content - symbols: List of extracted symbols - file_path: Path to source file - language: Programming language - symbol_token_counts: Optional dict mapping symbol names to token counts - """ - chunks: List[SemanticChunk] = [] - - # Step 1: Extract docstrings as dedicated chunks - docstrings: List[Tuple[str, int, int]] = [] - if language == "python": - # Fast path: avoid expensive docstring extraction if delimiters are absent. - if '"""' in content or "'''" in content: - docstrings = self.docstring_extractor.extract_docstrings(content, language) - elif language in {"javascript", "typescript"}: - if "/**" in content: - docstrings = self.docstring_extractor.extract_docstrings(content, language) - else: - docstrings = self.docstring_extractor.extract_docstrings(content, language) - - # Fast path: no docstrings -> delegate to base chunker directly. - if not docstrings: - if symbols: - base_chunks = self.base_chunker.chunk_by_symbol( - content, symbols, file_path, language, symbol_token_counts - ) - else: - base_chunks = self.base_chunker.chunk_sliding_window(content, file_path, language) - - for chunk in base_chunks: - chunk.metadata["strategy"] = "hybrid" - chunk.metadata["chunk_type"] = "code" - return base_chunks - - for docstring_content, start_line, end_line in docstrings: - if len(docstring_content.strip()) >= self.config.min_chunk_size: - parent_symbol = self._find_parent_symbol(start_line, end_line, symbols) - # Use base chunker's token estimation method - token_count = self.base_chunker._estimate_token_count(docstring_content) - metadata = { - "file": str(file_path), - "language": language, - "chunk_type": "docstring", - "start_line": start_line, - "end_line": end_line, - "strategy": "hybrid", - "token_count": token_count, - } - if parent_symbol is not None: - metadata["parent_symbol"] = parent_symbol.name - metadata["parent_symbol_kind"] = parent_symbol.kind - metadata["parent_symbol_range"] = parent_symbol.range - chunks.append(SemanticChunk( - content=docstring_content, - embedding=None, - metadata=metadata - )) - - # Step 2: Get line ranges occupied by docstrings - excluded_lines = self._get_excluded_line_ranges(docstrings) - - # Step 3: Filter symbols to exclude docstring-only ranges - filtered_symbols = self._filter_symbols_outside_docstrings(symbols, excluded_lines) - - # Step 4: Chunk remaining content using base chunker - if filtered_symbols: - base_chunks = self.base_chunker.chunk_by_symbol( - content, filtered_symbols, file_path, language, symbol_token_counts - ) - for chunk in base_chunks: - chunk.metadata["strategy"] = "hybrid" - chunk.metadata["chunk_type"] = "code" - chunks.append(chunk) - else: - lines = content.splitlines(keepends=True) - remaining_lines: List[str] = [] - - for i, line in enumerate(lines, start=1): - if i not in excluded_lines: - remaining_lines.append(line) - - if remaining_lines: - remaining_content = "".join(remaining_lines) - if len(remaining_content.strip()) >= self.config.min_chunk_size: - base_chunks = self.base_chunker.chunk_sliding_window( - remaining_content, file_path, language - ) - for chunk in base_chunks: - chunk.metadata["strategy"] = "hybrid" - chunk.metadata["chunk_type"] = "code" - chunks.append(chunk) - - return chunks diff --git a/codex-lens/src/codexlens/semantic/code_extractor.py b/codex-lens/src/codexlens/semantic/code_extractor.py deleted file mode 100644 index ec5b7211..00000000 --- a/codex-lens/src/codexlens/semantic/code_extractor.py +++ /dev/null @@ -1,274 +0,0 @@ -"""Smart code extraction for complete code blocks.""" - -from __future__ import annotations - -from pathlib import Path -from typing import List, Optional, Tuple - -from codexlens.entities import SearchResult, Symbol - - -def extract_complete_code_block( - result: SearchResult, - source_file_path: Optional[str] = None, - context_lines: int = 0, -) -> str: - """Extract complete code block from a search result. - - Args: - result: SearchResult from semantic search. - source_file_path: Optional path to source file for re-reading. - context_lines: Additional lines of context to include above/below. - - Returns: - Complete code block as string. - """ - # If we have full content stored, use it - if result.content: - if context_lines == 0: - return result.content - # Need to add context, read from file - - # Try to read from source file - file_path = source_file_path or result.path - if not file_path or not Path(file_path).exists(): - # Fall back to excerpt - return result.excerpt or "" - - try: - content = Path(file_path).read_text(encoding="utf-8", errors="ignore") - lines = content.splitlines() - - # Get line range - start_line = result.start_line or 1 - end_line = result.end_line or len(lines) - - # Add context - start_idx = max(0, start_line - 1 - context_lines) - end_idx = min(len(lines), end_line + context_lines) - - return "\n".join(lines[start_idx:end_idx]) - except Exception: - return result.excerpt or result.content or "" - - -def extract_symbol_with_context( - file_path: str, - symbol: Symbol, - include_docstring: bool = True, - include_decorators: bool = True, -) -> str: - """Extract a symbol (function/class) with its docstring and decorators. - - Args: - file_path: Path to source file. - symbol: Symbol to extract. - include_docstring: Include docstring if present. - include_decorators: Include decorators/annotations above symbol. - - Returns: - Complete symbol code with context. - """ - try: - content = Path(file_path).read_text(encoding="utf-8", errors="ignore") - lines = content.splitlines() - - start_line, end_line = symbol.range - start_idx = start_line - 1 - end_idx = end_line - - # Look for decorators above the symbol - if include_decorators and start_idx > 0: - decorator_start = start_idx - # Search backwards for decorators - i = start_idx - 1 - while i >= 0 and i >= start_idx - 20: # Look up to 20 lines back - line = lines[i].strip() - if line.startswith("@"): - decorator_start = i - i -= 1 - elif line == "" or line.startswith("#"): - # Skip empty lines and comments, continue looking - i -= 1 - elif line.startswith("//") or line.startswith("/*") or line.startswith("*"): - # JavaScript/Java style comments - decorator_start = i - i -= 1 - else: - # Found non-decorator, non-comment line, stop - break - start_idx = decorator_start - - return "\n".join(lines[start_idx:end_idx]) - except Exception: - return "" - - -def format_search_result_code( - result: SearchResult, - max_lines: Optional[int] = None, - show_line_numbers: bool = True, - highlight_match: bool = False, -) -> str: - """Format search result code for display. - - Args: - result: SearchResult to format. - max_lines: Maximum lines to show (None for all). - show_line_numbers: Include line numbers in output. - highlight_match: Add markers for matched region. - - Returns: - Formatted code string. - """ - content = result.content or result.excerpt or "" - if not content: - return "" - - lines = content.splitlines() - - # Truncate if needed - truncated = False - if max_lines and len(lines) > max_lines: - lines = lines[:max_lines] - truncated = True - - # Format with line numbers - if show_line_numbers: - start = result.start_line or 1 - formatted_lines = [] - for i, line in enumerate(lines): - line_num = start + i - formatted_lines.append(f"{line_num:4d} | {line}") - output = "\n".join(formatted_lines) - else: - output = "\n".join(lines) - - if truncated: - output += "\n... (truncated)" - - return output - - -def get_code_block_summary(result: SearchResult) -> str: - """Get a concise summary of a code block. - - Args: - result: SearchResult to summarize. - - Returns: - Summary string like "function hello_world (lines 10-25)" - """ - parts = [] - - if result.symbol_kind: - parts.append(result.symbol_kind) - - if result.symbol_name: - parts.append(f"`{result.symbol_name}`") - elif result.excerpt: - # Extract first meaningful identifier - first_line = result.excerpt.split("\n")[0][:50] - parts.append(f'"{first_line}..."') - - if result.start_line and result.end_line: - if result.start_line == result.end_line: - parts.append(f"(line {result.start_line})") - else: - parts.append(f"(lines {result.start_line}-{result.end_line})") - - if result.path: - file_name = Path(result.path).name - parts.append(f"in {file_name}") - - return " ".join(parts) if parts else "unknown code block" - - -class CodeBlockResult: - """Enhanced search result with complete code block.""" - - def __init__(self, result: SearchResult, source_path: Optional[str] = None): - self.result = result - self.source_path = source_path or result.path - self._full_code: Optional[str] = None - - @property - def score(self) -> float: - return self.result.score - - @property - def path(self) -> str: - return self.result.path - - @property - def file_name(self) -> str: - return Path(self.result.path).name - - @property - def symbol_name(self) -> Optional[str]: - return self.result.symbol_name - - @property - def symbol_kind(self) -> Optional[str]: - return self.result.symbol_kind - - @property - def line_range(self) -> Tuple[int, int]: - return ( - self.result.start_line or 1, - self.result.end_line or 1 - ) - - @property - def full_code(self) -> str: - """Get full code block content.""" - if self._full_code is None: - self._full_code = extract_complete_code_block(self.result, self.source_path) - return self._full_code - - @property - def excerpt(self) -> str: - """Get short excerpt.""" - return self.result.excerpt or "" - - @property - def summary(self) -> str: - """Get code block summary.""" - return get_code_block_summary(self.result) - - def format( - self, - max_lines: Optional[int] = None, - show_line_numbers: bool = True, - ) -> str: - """Format code for display.""" - # Use full code if available - display_result = SearchResult( - path=self.result.path, - score=self.result.score, - content=self.full_code, - start_line=self.result.start_line, - end_line=self.result.end_line, - ) - return format_search_result_code( - display_result, - max_lines=max_lines, - show_line_numbers=show_line_numbers - ) - - def __repr__(self) -> str: - return f"" - - -def enhance_search_results( - results: List[SearchResult], -) -> List[CodeBlockResult]: - """Enhance search results with complete code block access. - - Args: - results: List of SearchResult from semantic search. - - Returns: - List of CodeBlockResult with full code access. - """ - return [CodeBlockResult(r) for r in results] diff --git a/codex-lens/src/codexlens/semantic/embedder.py b/codex-lens/src/codexlens/semantic/embedder.py deleted file mode 100644 index e2d21717..00000000 --- a/codex-lens/src/codexlens/semantic/embedder.py +++ /dev/null @@ -1,288 +0,0 @@ -"""Embedder for semantic code search using fastembed. - -Supports GPU acceleration via ONNX execution providers (CUDA, TensorRT, DirectML, ROCm, CoreML). -GPU acceleration is automatic when available, with transparent CPU fallback. -""" - -from __future__ import annotations - -import gc -import logging -import threading -from typing import Dict, Iterable, List, Optional - -import numpy as np - -from . import SEMANTIC_AVAILABLE -from .base import BaseEmbedder -from .gpu_support import get_optimal_providers, is_gpu_available, get_gpu_summary, get_selected_device_id - -logger = logging.getLogger(__name__) - -# Global embedder cache for singleton pattern -_embedder_cache: Dict[str, "Embedder"] = {} -_cache_lock = threading.RLock() - - -def get_embedder(profile: str = "code", use_gpu: bool = True) -> "Embedder": - """Get or create a cached Embedder instance (thread-safe singleton). - - This function provides significant performance improvement by reusing - Embedder instances across multiple searches, avoiding repeated model - loading overhead (~0.8s per load). - - Args: - profile: Model profile ("fast", "code", "multilingual", "balanced") - use_gpu: If True, use GPU acceleration when available (default: True) - - Returns: - Cached Embedder instance for the given profile - """ - global _embedder_cache - - # Cache key includes GPU preference to support mixed configurations - cache_key = f"{profile}:{'gpu' if use_gpu else 'cpu'}" - - # All cache access is protected by _cache_lock to avoid races with - # clear_embedder_cache() during concurrent access. - with _cache_lock: - embedder = _embedder_cache.get(cache_key) - if embedder is not None: - return embedder - - # Create new embedder and cache it - embedder = Embedder(profile=profile, use_gpu=use_gpu) - # Pre-load model to ensure it's ready - embedder._load_model() - _embedder_cache[cache_key] = embedder - - # Log GPU status on first embedder creation - if use_gpu and is_gpu_available(): - logger.info(f"Embedder initialized with GPU: {get_gpu_summary()}") - elif use_gpu: - logger.debug("GPU not available, using CPU for embeddings") - - return embedder - - -def clear_embedder_cache() -> None: - """Clear the embedder cache and release ONNX resources. - - This method ensures proper cleanup of ONNX model resources to prevent - memory leaks when embedders are no longer needed. - """ - global _embedder_cache - with _cache_lock: - # Release ONNX resources before clearing cache - for embedder in _embedder_cache.values(): - if embedder._model is not None: - del embedder._model - embedder._model = None - _embedder_cache.clear() - gc.collect() - - -class Embedder(BaseEmbedder): - """Generate embeddings for code chunks using fastembed (ONNX-based). - - Supported Model Profiles: - - fast: BAAI/bge-small-en-v1.5 (384 dim) - Fast, lightweight, English-optimized - - code: jinaai/jina-embeddings-v2-base-code (768 dim) - Code-optimized, best for programming languages - - multilingual: intfloat/multilingual-e5-large (1024 dim) - Multilingual + code support - - balanced: mixedbread-ai/mxbai-embed-large-v1 (1024 dim) - High accuracy, general purpose - """ - - # Model profiles for different use cases - MODELS = { - "fast": "BAAI/bge-small-en-v1.5", # 384 dim - Fast, lightweight - "code": "jinaai/jina-embeddings-v2-base-code", # 768 dim - Code-optimized - "multilingual": "intfloat/multilingual-e5-large", # 1024 dim - Multilingual - "balanced": "mixedbread-ai/mxbai-embed-large-v1", # 1024 dim - High accuracy - } - - # Dimension mapping for each model - MODEL_DIMS = { - "BAAI/bge-small-en-v1.5": 384, - "jinaai/jina-embeddings-v2-base-code": 768, - "intfloat/multilingual-e5-large": 1024, - "mixedbread-ai/mxbai-embed-large-v1": 1024, - } - - # Default model (fast profile) - DEFAULT_MODEL = "BAAI/bge-small-en-v1.5" - DEFAULT_PROFILE = "fast" - - def __init__( - self, - model_name: str | None = None, - profile: str | None = None, - use_gpu: bool = True, - providers: List[str] | None = None, - ) -> None: - """Initialize embedder with model or profile. - - Args: - model_name: Explicit model name (e.g., "jinaai/jina-embeddings-v2-base-code") - profile: Model profile shortcut ("fast", "code", "multilingual", "balanced") - If both provided, model_name takes precedence. - use_gpu: If True, use GPU acceleration when available (default: True) - providers: Explicit ONNX providers list (overrides use_gpu if provided) - """ - if not SEMANTIC_AVAILABLE: - raise ImportError( - "Semantic search dependencies not available. " - "Install with: pip install codexlens[semantic]" - ) - - # Resolve model name from profile or use explicit name - if model_name: - self._model_name = model_name - elif profile and profile in self.MODELS: - self._model_name = self.MODELS[profile] - else: - self._model_name = self.DEFAULT_MODEL - - # Configure ONNX execution providers with device_id options for GPU selection - # Using with_device_options=True ensures DirectML/CUDA device_id is passed correctly - if providers is not None: - self._providers = providers - else: - self._providers = get_optimal_providers(use_gpu=use_gpu, with_device_options=True) - - self._use_gpu = use_gpu - self._model = None - - @property - def model_name(self) -> str: - """Get model name.""" - return self._model_name - - @property - def embedding_dim(self) -> int: - """Get embedding dimension for current model.""" - return self.MODEL_DIMS.get(self._model_name, 768) # Default to 768 if unknown - - @property - def max_tokens(self) -> int: - """Get maximum token limit for current model. - - Returns: - int: Maximum number of tokens based on model profile. - - fast: 512 (lightweight, optimized for speed) - - code: 8192 (code-optimized, larger context) - - multilingual: 512 (standard multilingual model) - - balanced: 512 (general purpose) - """ - # Determine profile from model name - profile = None - for prof, model in self.MODELS.items(): - if model == self._model_name: - profile = prof - break - - # Return token limit based on profile - if profile == "code": - return 8192 - elif profile in ("fast", "multilingual", "balanced"): - return 512 - else: - # Default for unknown models - return 512 - - @property - def providers(self) -> List[str]: - """Get configured ONNX execution providers.""" - return self._providers - - @property - def is_gpu_enabled(self) -> bool: - """Check if GPU acceleration is enabled for this embedder.""" - gpu_providers = {"CUDAExecutionProvider", "TensorrtExecutionProvider", - "DmlExecutionProvider", "ROCMExecutionProvider", "CoreMLExecutionProvider"} - # Handle both string providers and tuple providers (name, options) - for p in self._providers: - provider_name = p[0] if isinstance(p, tuple) else p - if provider_name in gpu_providers: - return True - return False - - def _load_model(self) -> None: - """Lazy load the embedding model with configured providers.""" - if self._model is not None: - return - - from fastembed import TextEmbedding - - # providers already include device_id options via get_optimal_providers(with_device_options=True) - # DO NOT pass device_ids separately - fastembed ignores it when providers is specified - # See: fastembed/text/onnx_embedding.py - device_ids is only used with cuda=True - try: - self._model = TextEmbedding( - model_name=self.model_name, - providers=self._providers, - ) - logger.debug(f"Model loaded with providers: {self._providers}") - except TypeError: - # Fallback for older fastembed versions without providers parameter - logger.warning( - "fastembed version doesn't support 'providers' parameter. " - "Upgrade fastembed for GPU acceleration: pip install --upgrade fastembed" - ) - self._model = TextEmbedding(model_name=self.model_name) - - def embed(self, texts: str | Iterable[str]) -> List[List[float]]: - """Generate embeddings for one or more texts. - - Args: - texts: Single text or iterable of texts to embed. - - Returns: - List of embedding vectors (each is a list of floats). - - Note: - This method converts numpy arrays to Python lists for backward compatibility. - For memory-efficient processing, use embed_to_numpy() instead. - """ - self._load_model() - - if isinstance(texts, str): - texts = [texts] - else: - texts = list(texts) - - embeddings = list(self._model.embed(texts)) - return [emb.tolist() for emb in embeddings] - - def embed_to_numpy(self, texts: str | Iterable[str], batch_size: Optional[int] = None) -> np.ndarray: - """Generate embeddings for one or more texts (returns numpy arrays). - - This method is more memory-efficient than embed() as it avoids converting - numpy arrays to Python lists, which can significantly reduce memory usage - during batch processing. - - Args: - texts: Single text or iterable of texts to embed. - batch_size: Optional batch size for fastembed processing. - Larger values improve GPU utilization but use more memory. - - Returns: - numpy.ndarray of shape (n_texts, embedding_dim) containing embeddings. - """ - self._load_model() - - if isinstance(texts, str): - texts = [texts] - else: - texts = list(texts) - - # Pass batch_size to fastembed for optimal GPU utilization - # Default batch_size in fastembed is 256, but larger values can improve throughput - if batch_size is not None: - embeddings = list(self._model.embed(texts, batch_size=batch_size)) - else: - embeddings = list(self._model.embed(texts)) - return np.array(embeddings) - - def embed_single(self, text: str) -> List[float]: - """Generate embedding for a single text.""" - return self.embed(text)[0] diff --git a/codex-lens/src/codexlens/semantic/factory.py b/codex-lens/src/codexlens/semantic/factory.py deleted file mode 100644 index 3295eba8..00000000 --- a/codex-lens/src/codexlens/semantic/factory.py +++ /dev/null @@ -1,158 +0,0 @@ -"""Factory for creating embedders. - -Provides a unified interface for instantiating different embedder backends. -Includes caching to avoid repeated model loading overhead. -""" - -from __future__ import annotations - -import logging -import threading -from typing import Any, Dict, List, Optional - -from .base import BaseEmbedder - -# Module-level cache for embedder instances -# Key: (backend, profile, model, use_gpu) -> embedder instance -_embedder_cache: Dict[tuple, BaseEmbedder] = {} -_cache_lock = threading.Lock() -_logger = logging.getLogger(__name__) - - -def get_embedder( - backend: str = "fastembed", - profile: str = "code", - model: str = "default", - use_gpu: bool = True, - endpoints: Optional[List[Dict[str, Any]]] = None, - strategy: str = "latency_aware", - cooldown: float = 60.0, - **kwargs: Any, -) -> BaseEmbedder: - """Factory function to create embedder based on backend. - - Args: - backend: Embedder backend to use. Options: - - "fastembed": Use fastembed (ONNX-based) embedder (default) - - "litellm": Use ccw-litellm embedder - profile: Model profile for fastembed backend ("fast", "code", "multilingual", "balanced") - Used only when backend="fastembed". Default: "code" - model: Model identifier for litellm backend. - Used only when backend="litellm". Default: "default" - use_gpu: Whether to use GPU acceleration when available (default: True). - Used only when backend="fastembed". - endpoints: Optional list of endpoint configurations for multi-endpoint load balancing. - Each endpoint is a dict with keys: model, api_key, api_base, weight. - Used only when backend="litellm" and multiple endpoints provided. - strategy: Selection strategy for multi-endpoint mode: - "round_robin", "latency_aware", "weighted_random". - Default: "latency_aware" - cooldown: Default cooldown seconds for rate-limited endpoints (default: 60.0) - **kwargs: Additional backend-specific arguments - - Returns: - BaseEmbedder: Configured embedder instance - - Raises: - ValueError: If backend is not recognized - ImportError: If required backend dependencies are not installed - - Examples: - Create fastembed embedder with code profile: - >>> embedder = get_embedder(backend="fastembed", profile="code") - - Create fastembed embedder with fast profile and CPU only: - >>> embedder = get_embedder(backend="fastembed", profile="fast", use_gpu=False) - - Create litellm embedder: - >>> embedder = get_embedder(backend="litellm", model="text-embedding-3-small") - - Create rotational embedder with multiple endpoints: - >>> endpoints = [ - ... {"model": "openai/text-embedding-3-small", "api_key": "sk-..."}, - ... {"model": "azure/my-embedding", "api_base": "https://...", "api_key": "..."}, - ... ] - >>> embedder = get_embedder(backend="litellm", endpoints=endpoints) - """ - # Build cache key from immutable configuration - if backend == "fastembed": - cache_key = ("fastembed", profile, None, use_gpu) - elif backend == "litellm": - # For litellm, use model as part of cache key - # Multi-endpoint mode is not cached as it's more complex - if endpoints and len(endpoints) > 1: - cache_key = None # Skip cache for multi-endpoint - else: - effective_model = endpoints[0]["model"] if endpoints else model - cache_key = ("litellm", None, effective_model, None) - else: - cache_key = None - - # Check cache first (thread-safe) - if cache_key is not None: - with _cache_lock: - if cache_key in _embedder_cache: - _logger.debug("Returning cached embedder for %s", cache_key) - return _embedder_cache[cache_key] - - # Create new embedder instance - embedder: Optional[BaseEmbedder] = None - - if backend == "fastembed": - from .embedder import Embedder - embedder = Embedder(profile=profile, use_gpu=use_gpu, **kwargs) - elif backend == "litellm": - # Check if multi-endpoint mode is requested - if endpoints and len(endpoints) > 1: - from .rotational_embedder import create_rotational_embedder - # Multi-endpoint is not cached - return create_rotational_embedder( - endpoints_config=endpoints, - strategy=strategy, - default_cooldown=cooldown, - ) - elif endpoints and len(endpoints) == 1: - # Single endpoint in list - use it directly - ep = endpoints[0] - ep_kwargs = {**kwargs} - if "api_key" in ep: - ep_kwargs["api_key"] = ep["api_key"] - if "api_base" in ep: - ep_kwargs["api_base"] = ep["api_base"] - from .litellm_embedder import LiteLLMEmbedderWrapper - embedder = LiteLLMEmbedderWrapper(model=ep["model"], **ep_kwargs) - else: - # No endpoints list - use model parameter - from .litellm_embedder import LiteLLMEmbedderWrapper - embedder = LiteLLMEmbedderWrapper(model=model, **kwargs) - else: - raise ValueError( - f"Unknown backend: {backend}. " - f"Supported backends: 'fastembed', 'litellm'" - ) - - # Cache the embedder for future use (thread-safe) - if cache_key is not None and embedder is not None: - with _cache_lock: - # Double-check to avoid race condition - if cache_key not in _embedder_cache: - _embedder_cache[cache_key] = embedder - _logger.debug("Cached new embedder for %s", cache_key) - else: - # Another thread created it already, use that one - embedder = _embedder_cache[cache_key] - - return embedder # type: ignore - - -def clear_embedder_cache() -> int: - """Clear the embedder cache. - - Returns: - Number of embedders cleared from cache - """ - with _cache_lock: - count = len(_embedder_cache) - _embedder_cache.clear() - _logger.debug("Cleared %d embedders from cache", count) - return count diff --git a/codex-lens/src/codexlens/semantic/gpu_support.py b/codex-lens/src/codexlens/semantic/gpu_support.py deleted file mode 100644 index 62a5186d..00000000 --- a/codex-lens/src/codexlens/semantic/gpu_support.py +++ /dev/null @@ -1,431 +0,0 @@ -"""GPU acceleration support for semantic embeddings. - -This module provides GPU detection, initialization, and fallback handling -for ONNX-based embedding generation. -""" - -from __future__ import annotations - -import logging -from dataclasses import dataclass -from typing import List, Optional - -logger = logging.getLogger(__name__) - - -@dataclass -class GPUDevice: - """Individual GPU device info.""" - device_id: int - name: str - is_discrete: bool # True for discrete GPU (NVIDIA, AMD), False for integrated (Intel UHD) - vendor: str # "nvidia", "amd", "intel", "unknown" - - -@dataclass -class GPUInfo: - """GPU availability and configuration info.""" - - gpu_available: bool = False - cuda_available: bool = False - gpu_count: int = 0 - gpu_name: Optional[str] = None - onnx_providers: List[str] = None - devices: List[GPUDevice] = None # List of detected GPU devices - preferred_device_id: Optional[int] = None # Preferred GPU for embedding - - def __post_init__(self): - if self.onnx_providers is None: - self.onnx_providers = ["CPUExecutionProvider"] - if self.devices is None: - self.devices = [] - - -_gpu_info_cache: Optional[GPUInfo] = None - - -def _enumerate_gpus() -> List[GPUDevice]: - """Enumerate available GPU devices using WMI on Windows. - - Returns: - List of GPUDevice with device info, ordered by device_id. - """ - devices = [] - - try: - import subprocess - import sys - - if sys.platform == "win32": - # Use PowerShell to query GPU information via WMI - cmd = [ - "powershell", "-NoProfile", "-Command", - "Get-WmiObject Win32_VideoController | Select-Object DeviceID, Name, AdapterCompatibility | ConvertTo-Json" - ] - result = subprocess.run(cmd, capture_output=True, text=True, timeout=10) - - if result.returncode == 0 and result.stdout.strip(): - import json - gpu_data = json.loads(result.stdout) - - # Handle single GPU case (returns dict instead of list) - if isinstance(gpu_data, dict): - gpu_data = [gpu_data] - - for idx, gpu in enumerate(gpu_data): - name = gpu.get("Name", "Unknown GPU") - compat = gpu.get("AdapterCompatibility", "").lower() - - # Determine vendor - name_lower = name.lower() - if "nvidia" in name_lower or "nvidia" in compat: - vendor = "nvidia" - is_discrete = True - elif "amd" in name_lower or "radeon" in name_lower or "amd" in compat: - vendor = "amd" - is_discrete = True - elif "intel" in name_lower or "intel" in compat: - vendor = "intel" - # Intel UHD/Iris are integrated, Intel Arc is discrete - is_discrete = "arc" in name_lower - else: - vendor = "unknown" - is_discrete = False - - devices.append(GPUDevice( - device_id=idx, - name=name, - is_discrete=is_discrete, - vendor=vendor - )) - logger.debug(f"Detected GPU {idx}: {name} (vendor={vendor}, discrete={is_discrete})") - - except Exception as e: - logger.debug(f"GPU enumeration failed: {e}") - - return devices - - -def _get_preferred_device_id(devices: List[GPUDevice]) -> Optional[int]: - """Determine the preferred GPU device_id for embedding. - - Preference order: - 1. NVIDIA discrete GPU (best DirectML/CUDA support) - 2. AMD discrete GPU - 3. Intel Arc (discrete) - 4. Intel integrated (fallback) - - Returns: - device_id of preferred GPU, or None to use default. - """ - if not devices: - return None - - # Priority: NVIDIA > AMD > Intel Arc > Intel integrated - priority_order = [ - ("nvidia", True), # NVIDIA discrete - ("amd", True), # AMD discrete - ("intel", True), # Intel Arc (discrete) - ("intel", False), # Intel integrated (fallback) - ] - - for target_vendor, target_discrete in priority_order: - for device in devices: - if device.vendor == target_vendor and device.is_discrete == target_discrete: - logger.info(f"Preferred GPU: {device.name} (device_id={device.device_id})") - return device.device_id - - # If no match, use first device - if devices: - return devices[0].device_id - - return None - - -def detect_gpu(force_refresh: bool = False) -> GPUInfo: - """Detect available GPU resources for embedding acceleration. - - Args: - force_refresh: If True, re-detect GPU even if cached. - - Returns: - GPUInfo with detection results. - """ - global _gpu_info_cache - - if _gpu_info_cache is not None and not force_refresh: - return _gpu_info_cache - - info = GPUInfo() - - # Enumerate GPU devices first - info.devices = _enumerate_gpus() - info.gpu_count = len(info.devices) - if info.devices: - # Set preferred device (discrete GPU preferred over integrated) - info.preferred_device_id = _get_preferred_device_id(info.devices) - # Set gpu_name to preferred device name - for dev in info.devices: - if dev.device_id == info.preferred_device_id: - info.gpu_name = dev.name - break - - # Check PyTorch CUDA availability (most reliable detection) - try: - import torch - if torch.cuda.is_available(): - info.cuda_available = True - info.gpu_available = True - info.gpu_count = torch.cuda.device_count() - if info.gpu_count > 0: - info.gpu_name = torch.cuda.get_device_name(0) - logger.debug(f"PyTorch CUDA detected: {info.gpu_count} GPU(s)") - except ImportError: - logger.debug("PyTorch not available for GPU detection") - - # Check ONNX Runtime providers with validation - try: - import onnxruntime as ort - available_providers = ort.get_available_providers() - - # Build provider list with priority order - providers = [] - - # Test each provider to ensure it actually works - def test_provider(provider_name: str) -> bool: - """Test if a provider actually works by creating a dummy session.""" - try: - # Create a minimal ONNX model to test provider - import numpy as np - # Simple test: just check if provider can be instantiated - sess_options = ort.SessionOptions() - sess_options.log_severity_level = 4 # Suppress warnings - return True - except Exception: - return False - - # CUDA provider (NVIDIA GPU) - check if CUDA runtime is available - if "CUDAExecutionProvider" in available_providers: - # Verify CUDA is actually usable by checking for cuBLAS - cuda_works = False - try: - import ctypes - # Try to load cuBLAS to verify CUDA installation - try: - ctypes.CDLL("cublas64_12.dll") - cuda_works = True - except OSError: - try: - ctypes.CDLL("cublas64_11.dll") - cuda_works = True - except OSError: - pass - except Exception: - pass - - if cuda_works: - providers.append("CUDAExecutionProvider") - info.gpu_available = True - logger.debug("ONNX CUDAExecutionProvider available and working") - else: - logger.debug("ONNX CUDAExecutionProvider listed but CUDA runtime not found") - - # TensorRT provider (optimized NVIDIA inference) - if "TensorrtExecutionProvider" in available_providers: - # TensorRT requires additional libraries, skip for now - logger.debug("ONNX TensorrtExecutionProvider available (requires TensorRT SDK)") - - # DirectML provider (Windows GPU - AMD/Intel/NVIDIA) - if "DmlExecutionProvider" in available_providers: - providers.append("DmlExecutionProvider") - info.gpu_available = True - logger.debug("ONNX DmlExecutionProvider available (DirectML)") - - # ROCm provider (AMD GPU on Linux) - if "ROCMExecutionProvider" in available_providers: - providers.append("ROCMExecutionProvider") - info.gpu_available = True - logger.debug("ONNX ROCMExecutionProvider available (AMD)") - - # CoreML provider (Apple Silicon) - if "CoreMLExecutionProvider" in available_providers: - providers.append("CoreMLExecutionProvider") - info.gpu_available = True - logger.debug("ONNX CoreMLExecutionProvider available (Apple)") - - # Always include CPU as fallback - providers.append("CPUExecutionProvider") - - info.onnx_providers = providers - - except ImportError: - logger.debug("ONNX Runtime not available") - info.onnx_providers = ["CPUExecutionProvider"] - - _gpu_info_cache = info - return info - - -def get_optimal_providers(use_gpu: bool = True, with_device_options: bool = False) -> list: - """Get optimal ONNX execution providers based on availability. - - Args: - use_gpu: If True, include GPU providers when available. - If False, force CPU-only execution. - with_device_options: If True, return providers as tuples with device_id options - for proper GPU device selection (required for DirectML). - - Returns: - List of provider names or tuples (provider_name, options_dict) in priority order. - """ - if not use_gpu: - return ["CPUExecutionProvider"] - - gpu_info = detect_gpu() - - # Check if GPU was requested but not available - log warning - if not gpu_info.gpu_available: - try: - import onnxruntime as ort - available_providers = ort.get_available_providers() - except ImportError: - available_providers = [] - logger.warning( - "GPU acceleration was requested, but no supported GPU provider (CUDA, DirectML) " - f"was found. Available providers: {available_providers}. Falling back to CPU." - ) - else: - # Log which GPU provider is being used - gpu_providers = [p for p in gpu_info.onnx_providers if p != "CPUExecutionProvider"] - if gpu_providers: - logger.info(f"Using {gpu_providers[0]} for ONNX GPU acceleration") - - if not with_device_options: - return gpu_info.onnx_providers - - # Build providers with device_id options for GPU providers - device_id = get_selected_device_id() - providers = [] - - for provider in gpu_info.onnx_providers: - if provider == "DmlExecutionProvider" and device_id is not None: - # DirectML requires device_id in provider_options tuple - providers.append(("DmlExecutionProvider", {"device_id": device_id})) - logger.debug(f"DmlExecutionProvider configured with device_id={device_id}") - elif provider == "CUDAExecutionProvider" and device_id is not None: - # CUDA also supports device_id in provider_options - providers.append(("CUDAExecutionProvider", {"device_id": device_id})) - logger.debug(f"CUDAExecutionProvider configured with device_id={device_id}") - elif provider == "ROCMExecutionProvider" and device_id is not None: - # ROCm supports device_id - providers.append(("ROCMExecutionProvider", {"device_id": device_id})) - logger.debug(f"ROCMExecutionProvider configured with device_id={device_id}") - else: - # CPU and other providers don't need device_id - providers.append(provider) - - return providers - - -def is_gpu_available() -> bool: - """Check if any GPU acceleration is available.""" - return detect_gpu().gpu_available - - -def get_gpu_summary() -> str: - """Get human-readable GPU status summary.""" - info = detect_gpu() - - if not info.gpu_available: - return "GPU: Not available (using CPU)" - - parts = [] - if info.gpu_name: - parts.append(f"GPU: {info.gpu_name}") - if info.gpu_count > 1: - parts.append(f"({info.gpu_count} devices)") - - # Show active providers (excluding CPU fallback) - gpu_providers = [p for p in info.onnx_providers if p != "CPUExecutionProvider"] - if gpu_providers: - parts.append(f"Providers: {', '.join(gpu_providers)}") - - return " | ".join(parts) if parts else "GPU: Available" - - -def clear_gpu_cache() -> None: - """Clear cached GPU detection info.""" - global _gpu_info_cache - _gpu_info_cache = None - - -# User-selected device ID (overrides auto-detection) -_selected_device_id: Optional[int] = None - - -def get_gpu_devices() -> List[dict]: - """Get list of available GPU devices for frontend selection. - - Returns: - List of dicts with device info for each GPU. - """ - info = detect_gpu() - devices = [] - - for dev in info.devices: - devices.append({ - "device_id": dev.device_id, - "name": dev.name, - "vendor": dev.vendor, - "is_discrete": dev.is_discrete, - "is_preferred": dev.device_id == info.preferred_device_id, - "is_selected": dev.device_id == get_selected_device_id(), - }) - - return devices - - -def get_selected_device_id() -> Optional[int]: - """Get the user-selected GPU device_id. - - Returns: - User-selected device_id, or auto-detected preferred device_id if not set. - """ - global _selected_device_id - - if _selected_device_id is not None: - return _selected_device_id - - # Fall back to auto-detected preferred device - info = detect_gpu() - return info.preferred_device_id - - -def set_selected_device_id(device_id: Optional[int]) -> bool: - """Set the GPU device_id to use for embeddings. - - Args: - device_id: GPU device_id to use, or None to use auto-detection. - - Returns: - True if device_id is valid, False otherwise. - """ - global _selected_device_id - - if device_id is None: - _selected_device_id = None - logger.info("GPU selection reset to auto-detection") - return True - - # Validate device_id exists - info = detect_gpu() - valid_ids = [dev.device_id for dev in info.devices] - - if device_id in valid_ids: - _selected_device_id = device_id - device_name = next((dev.name for dev in info.devices if dev.device_id == device_id), "Unknown") - logger.info(f"GPU selection set to device {device_id}: {device_name}") - return True - else: - logger.warning(f"Invalid device_id {device_id}. Valid IDs: {valid_ids}") - return False diff --git a/codex-lens/src/codexlens/semantic/litellm_embedder.py b/codex-lens/src/codexlens/semantic/litellm_embedder.py deleted file mode 100644 index ee4284dd..00000000 --- a/codex-lens/src/codexlens/semantic/litellm_embedder.py +++ /dev/null @@ -1,144 +0,0 @@ -"""LiteLLM embedder wrapper for CodexLens. - -Provides integration with ccw-litellm's LiteLLMEmbedder for embedding generation. -""" - -from __future__ import annotations - -from typing import Iterable - -import numpy as np - -from .base import BaseEmbedder - - -class LiteLLMEmbedderWrapper(BaseEmbedder): - """Wrapper for ccw-litellm LiteLLMEmbedder. - - This wrapper adapts the ccw-litellm LiteLLMEmbedder to the CodexLens - BaseEmbedder interface, enabling seamless integration with CodexLens - semantic search functionality. - - Args: - model: Model identifier for LiteLLM (default: "default") - **kwargs: Additional arguments passed to LiteLLMEmbedder - - Raises: - ImportError: If ccw-litellm package is not installed - """ - - def __init__(self, model: str = "default", **kwargs) -> None: - """Initialize LiteLLM embedder wrapper. - - Args: - model: Model identifier for LiteLLM (default: "default") - **kwargs: Additional arguments passed to LiteLLMEmbedder - - Raises: - ImportError: If ccw-litellm package is not installed - """ - try: - from ccw_litellm import LiteLLMEmbedder - self._embedder = LiteLLMEmbedder(model=model, **kwargs) - except ImportError as e: - raise ImportError( - "ccw-litellm not installed. Install with: pip install ccw-litellm" - ) from e - - @property - def embedding_dim(self) -> int: - """Return embedding dimensions from LiteLLMEmbedder. - - Returns: - int: Dimension of the embedding vectors. - """ - return self._embedder.dimensions - - @property - def model_name(self) -> str: - """Return model name from LiteLLMEmbedder. - - Returns: - str: Name or identifier of the underlying model. - """ - return self._embedder.model_name - - @property - def max_tokens(self) -> int: - """Return maximum token limit for the embedding model. - - Returns: - int: Maximum number of tokens that can be embedded at once. - Reads from LiteLLM config's max_input_tokens property. - """ - # Get from LiteLLM embedder's max_input_tokens property (now exposed) - if hasattr(self._embedder, 'max_input_tokens'): - return self._embedder.max_input_tokens - - # Fallback: infer from model name - model_name_lower = self.model_name.lower() - - # Large models (8B or "large" in name) - if '8b' in model_name_lower or 'large' in model_name_lower: - return 32768 - - # OpenAI text-embedding-3-* models - if 'text-embedding-3' in model_name_lower: - return 8191 - - # Default fallback - return 8192 - - def _sanitize_text(self, text: str) -> str: - """Sanitize text to work around ModelScope API routing bug. - - ModelScope incorrectly routes text starting with lowercase 'import' - to an Ollama endpoint, causing failures. This adds a leading space - to work around the issue without affecting embedding quality. - - Args: - text: Text to sanitize. - - Returns: - Sanitized text safe for embedding API. - """ - if text.startswith('import'): - return ' ' + text - return text - - def embed_to_numpy(self, texts: str | Iterable[str], **kwargs) -> np.ndarray: - """Embed texts to numpy array using LiteLLMEmbedder. - - Args: - texts: Single text or iterable of texts to embed. - **kwargs: Additional arguments (ignored for LiteLLM backend). - Accepts batch_size for API compatibility with fastembed. - - Returns: - numpy.ndarray: Array of shape (n_texts, embedding_dim) containing embeddings. - """ - if isinstance(texts, str): - texts = [texts] - else: - texts = list(texts) - - # Sanitize texts to avoid ModelScope routing bug - texts = [self._sanitize_text(t) for t in texts] - - # LiteLLM handles batching internally, ignore batch_size parameter - return self._embedder.embed(texts) - - def embed_single(self, text: str) -> list[float]: - """Generate embedding for a single text. - - Args: - text: Text to embed. - - Returns: - list[float]: Embedding vector as a list of floats. - """ - # Sanitize text before embedding - sanitized = self._sanitize_text(text) - embedding = self._embedder.embed([sanitized]) - return embedding[0].tolist() - diff --git a/codex-lens/src/codexlens/semantic/reranker/__init__.py b/codex-lens/src/codexlens/semantic/reranker/__init__.py deleted file mode 100644 index e52b0223..00000000 --- a/codex-lens/src/codexlens/semantic/reranker/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -"""Reranker backends for second-stage search ranking. - -This subpackage provides a unified interface and factory for different reranking -implementations (e.g., ONNX, API-based, LiteLLM, and legacy sentence-transformers). -""" - -from __future__ import annotations - -from .base import BaseReranker -from .factory import check_reranker_available, get_reranker -from .fastembed_reranker import FastEmbedReranker, check_fastembed_reranker_available -from .legacy import CrossEncoderReranker, check_cross_encoder_available -from .onnx_reranker import ONNXReranker, check_onnx_reranker_available - -__all__ = [ - "BaseReranker", - "check_reranker_available", - "get_reranker", - "CrossEncoderReranker", - "check_cross_encoder_available", - "FastEmbedReranker", - "check_fastembed_reranker_available", - "ONNXReranker", - "check_onnx_reranker_available", -] diff --git a/codex-lens/src/codexlens/semantic/reranker/api_reranker.py b/codex-lens/src/codexlens/semantic/reranker/api_reranker.py deleted file mode 100644 index d4dcc968..00000000 --- a/codex-lens/src/codexlens/semantic/reranker/api_reranker.py +++ /dev/null @@ -1,442 +0,0 @@ -"""API-based reranker using a remote HTTP provider. - -Supported providers: -- SiliconFlow: https://api.siliconflow.cn/v1/rerank -- Cohere: https://api.cohere.ai/v1/rerank -- Jina: https://api.jina.ai/v1/rerank -""" - -from __future__ import annotations - -import logging -import os -import random -import time -from pathlib import Path -from typing import Any, Mapping, Sequence - -from .base import BaseReranker - -logger = logging.getLogger(__name__) - -_DEFAULT_ENV_API_KEY = "RERANKER_API_KEY" - - -def _normalize_api_base_for_endpoint(*, api_base: str, endpoint: str) -> str: - """Normalize api_base to avoid duplicated version paths (e.g. /v1/v1/...). - - httpx joins base_url paths with request paths even when the request path - starts with a leading slash. This means: - - base_url="https://host/v1" + endpoint="/v1/rerank" - -> "https://host/v1/v1/rerank" - - Many users configure OpenAI-style bases with a trailing "/v1", so we - defensively strip that suffix when the endpoint already includes "/v1/". - """ - cleaned = (api_base or "").strip().rstrip("/") - if not cleaned: - return cleaned - - endpoint_clean = endpoint or "" - - # If api_base already includes the endpoint suffix (e.g. api_base ends with "/v1/rerank"), - # strip it so we don't end up with ".../v1/rerank/v1/rerank". - if endpoint_clean.startswith("/") and cleaned.lower().endswith(endpoint_clean.lower()): - return cleaned[: -len(endpoint_clean)] - - # Strip a trailing "/v1" if endpoint already includes "/v1/...". - if endpoint_clean.startswith("/v1/") and cleaned.lower().endswith("/v1"): - return cleaned[:-3] - - return cleaned - - -def _get_env_with_fallback(key: str, workspace_root: Path | None = None) -> str | None: - """Get environment variable with .env file fallback.""" - # Check os.environ first - if key in os.environ: - return os.environ[key] - prefixed_key = f"CODEXLENS_{key}" - if prefixed_key in os.environ: - return os.environ[prefixed_key] - - # Try loading from .env files - try: - from codexlens.env_config import get_env - value = get_env(key, workspace_root=workspace_root) - if value is not None: - return value - return get_env(prefixed_key, workspace_root=workspace_root) - except ImportError: - return None - - -def check_httpx_available() -> tuple[bool, str | None]: - try: - import httpx # noqa: F401 - except ImportError as exc: # pragma: no cover - optional dependency - return False, f"httpx not available: {exc}. Install with: pip install httpx" - return True, None - - -class APIReranker(BaseReranker): - """Reranker backed by a remote reranking HTTP API.""" - - _PROVIDER_DEFAULTS: Mapping[str, Mapping[str, str]] = { - "siliconflow": { - "api_base": "https://api.siliconflow.cn", - "endpoint": "/v1/rerank", - "default_model": "BAAI/bge-reranker-v2-m3", - }, - "cohere": { - "api_base": "https://api.cohere.ai", - "endpoint": "/v1/rerank", - "default_model": "rerank-english-v3.0", - }, - "jina": { - "api_base": "https://api.jina.ai", - "endpoint": "/v1/rerank", - "default_model": "jina-reranker-v2-base-multilingual", - }, - } - - def __init__( - self, - *, - provider: str = "siliconflow", - model_name: str | None = None, - api_key: str | None = None, - api_base: str | None = None, - timeout: float = 30.0, - max_retries: int = 3, - backoff_base_s: float = 0.5, - backoff_max_s: float = 8.0, - env_api_key: str = _DEFAULT_ENV_API_KEY, - workspace_root: Path | str | None = None, - max_input_tokens: int | None = None, - ) -> None: - ok, err = check_httpx_available() - if not ok: # pragma: no cover - exercised via factory availability tests - raise ImportError(err) - - import httpx - - self._workspace_root = Path(workspace_root) if workspace_root else None - - self.provider = (provider or "").strip().lower() - if self.provider not in self._PROVIDER_DEFAULTS: - raise ValueError( - f"Unknown reranker provider: {provider}. " - f"Supported providers: {', '.join(sorted(self._PROVIDER_DEFAULTS))}" - ) - - defaults = self._PROVIDER_DEFAULTS[self.provider] - - # Load api_base from env with .env fallback - env_api_base = _get_env_with_fallback("RERANKER_API_BASE", self._workspace_root) - self.endpoint = defaults["endpoint"] - self.api_base = _normalize_api_base_for_endpoint( - api_base=(api_base or env_api_base or defaults["api_base"]), - endpoint=self.endpoint, - ) - - # Load model from env with .env fallback - env_model = _get_env_with_fallback("RERANKER_MODEL", self._workspace_root) - self.model_name = (model_name or env_model or defaults["default_model"]).strip() - if not self.model_name: - raise ValueError("model_name cannot be blank") - - # Load API key from env with .env fallback - resolved_key = api_key or _get_env_with_fallback(env_api_key, self._workspace_root) or "" - resolved_key = resolved_key.strip() - if not resolved_key: - raise ValueError( - f"Missing API key for reranker provider '{self.provider}'. " - f"Pass api_key=... or set ${env_api_key}." - ) - self._api_key = resolved_key - - self.timeout_s = float(timeout) if timeout and float(timeout) > 0 else 30.0 - self.max_retries = int(max_retries) if max_retries and int(max_retries) >= 0 else 3 - self.backoff_base_s = float(backoff_base_s) if backoff_base_s and float(backoff_base_s) > 0 else 0.5 - self.backoff_max_s = float(backoff_max_s) if backoff_max_s and float(backoff_max_s) > 0 else 8.0 - - headers = { - "Authorization": f"Bearer {self._api_key}", - "Content-Type": "application/json", - } - if self.provider == "cohere": - headers.setdefault("Cohere-Version", "2022-12-06") - - self._client = httpx.Client( - base_url=self.api_base, - headers=headers, - timeout=self.timeout_s, - ) - - # Store max_input_tokens with model-aware defaults - if max_input_tokens is not None: - self._max_input_tokens = max_input_tokens - else: - # Infer from model name - model_lower = self.model_name.lower() - if '8b' in model_lower or 'large' in model_lower: - self._max_input_tokens = 32768 - else: - self._max_input_tokens = 8192 - - @property - def max_input_tokens(self) -> int: - """Return maximum token limit for reranking.""" - return self._max_input_tokens - - def close(self) -> None: - try: - self._client.close() - except Exception: # pragma: no cover - defensive - return - - def _sleep_backoff(self, attempt: int, *, retry_after_s: float | None = None) -> None: - if retry_after_s is not None and retry_after_s > 0: - time.sleep(min(float(retry_after_s), self.backoff_max_s)) - return - - exp = self.backoff_base_s * (2**attempt) - jitter = random.uniform(0, min(0.5, self.backoff_base_s)) - time.sleep(min(self.backoff_max_s, exp + jitter)) - - @staticmethod - def _parse_retry_after_seconds(headers: Mapping[str, str]) -> float | None: - value = (headers.get("Retry-After") or "").strip() - if not value: - return None - try: - return float(value) - except ValueError: - return None - - @staticmethod - def _should_retry_status(status_code: int) -> bool: - return status_code == 429 or 500 <= status_code <= 599 - - def _request_json(self, payload: Mapping[str, Any]) -> Mapping[str, Any]: - last_exc: Exception | None = None - - for attempt in range(self.max_retries + 1): - try: - response = self._client.post(self.endpoint, json=dict(payload)) - except Exception as exc: # httpx is optional at import-time - last_exc = exc - if attempt < self.max_retries: - self._sleep_backoff(attempt) - continue - raise RuntimeError( - f"Rerank request failed for provider '{self.provider}' after " - f"{self.max_retries + 1} attempts: {type(exc).__name__}: {exc}" - ) from exc - - status = int(getattr(response, "status_code", 0) or 0) - if status >= 400: - body_preview = "" - try: - body_preview = (response.text or "").strip() - except Exception: - body_preview = "" - if len(body_preview) > 300: - body_preview = body_preview[:300] + "…" - - if self._should_retry_status(status) and attempt < self.max_retries: - retry_after = self._parse_retry_after_seconds(response.headers) - logger.warning( - "Rerank request to %s%s failed with HTTP %s (attempt %s/%s). Retrying…", - self.api_base, - self.endpoint, - status, - attempt + 1, - self.max_retries + 1, - ) - self._sleep_backoff(attempt, retry_after_s=retry_after) - continue - - if status in {401, 403}: - raise RuntimeError( - f"Rerank request unauthorized for provider '{self.provider}' (HTTP {status}). " - "Check your API key." - ) - - raise RuntimeError( - f"Rerank request failed for provider '{self.provider}' (HTTP {status}). " - f"Response: {body_preview or ''}" - ) - - try: - data = response.json() - except Exception as exc: - raise RuntimeError( - f"Rerank response from provider '{self.provider}' is not valid JSON: " - f"{type(exc).__name__}: {exc}" - ) from exc - - if not isinstance(data, dict): - raise RuntimeError( - f"Rerank response from provider '{self.provider}' must be a JSON object; " - f"got {type(data).__name__}" - ) - - return data - - raise RuntimeError( - f"Rerank request failed for provider '{self.provider}'. Last error: {last_exc}" - ) - - @staticmethod - def _extract_scores_from_results(results: Any, expected: int) -> list[float]: - if not isinstance(results, list): - raise RuntimeError(f"Invalid rerank response: 'results' must be a list, got {type(results).__name__}") - - scores: list[float] = [0.0 for _ in range(expected)] - filled = 0 - - for item in results: - if not isinstance(item, dict): - continue - idx = item.get("index") - score = item.get("relevance_score", item.get("score")) - if idx is None or score is None: - continue - try: - idx_int = int(idx) - score_f = float(score) - except (TypeError, ValueError): - continue - if 0 <= idx_int < expected: - scores[idx_int] = score_f - filled += 1 - - if filled != expected: - raise RuntimeError( - f"Rerank response contained {filled}/{expected} scored documents; " - "ensure top_n matches the number of documents." - ) - - return scores - - def _build_payload(self, *, query: str, documents: Sequence[str]) -> Mapping[str, Any]: - payload: dict[str, Any] = { - "model": self.model_name, - "query": query, - "documents": list(documents), - "top_n": len(documents), - "return_documents": False, - } - return payload - - def _estimate_tokens(self, text: str) -> int: - """Estimate token count using fast heuristic. - - Uses len(text) // 4 as approximation (~4 chars per token for English). - Not perfectly accurate for all models/languages but sufficient for - batch sizing decisions where exact counts aren't critical. - """ - return len(text) // 4 - - def _create_token_aware_batches( - self, - query: str, - documents: Sequence[str], - ) -> list[list[tuple[int, str]]]: - """Split documents into batches that fit within token limits. - - Uses 90% of max_input_tokens as safety margin. - Each batch includes the query tokens overhead. - """ - max_tokens = int(self._max_input_tokens * 0.9) - query_tokens = self._estimate_tokens(query) - - batches: list[list[tuple[int, str]]] = [] - current_batch: list[tuple[int, str]] = [] - current_tokens = query_tokens # Start with query overhead - - for idx, doc in enumerate(documents): - doc_tokens = self._estimate_tokens(doc) - - # Warn if single document exceeds token limit (will be truncated by API) - if doc_tokens > max_tokens - query_tokens: - logger.warning( - f"Document {idx} exceeds token limit: ~{doc_tokens} tokens " - f"(limit: {max_tokens - query_tokens} after query overhead). " - "Document will likely be truncated by the API." - ) - - # If batch would exceed limit, start new batch - if current_tokens + doc_tokens > max_tokens and current_batch: - batches.append(current_batch) - current_batch = [] - current_tokens = query_tokens - - current_batch.append((idx, doc)) - current_tokens += doc_tokens - - if current_batch: - batches.append(current_batch) - - return batches - - def _rerank_one_query(self, *, query: str, documents: Sequence[str]) -> list[float]: - if not documents: - return [] - - # Create token-aware batches - batches = self._create_token_aware_batches(query, documents) - - if len(batches) == 1: - # Single batch - original behavior - payload = self._build_payload(query=query, documents=documents) - data = self._request_json(payload) - results = data.get("results") - return self._extract_scores_from_results(results, expected=len(documents)) - - # Multiple batches - process each and merge results - logger.info( - f"Splitting {len(documents)} documents into {len(batches)} batches " - f"(max_input_tokens: {self._max_input_tokens})" - ) - - all_scores: list[float] = [0.0] * len(documents) - - for batch in batches: - batch_docs = [doc for _, doc in batch] - payload = self._build_payload(query=query, documents=batch_docs) - data = self._request_json(payload) - results = data.get("results") - batch_scores = self._extract_scores_from_results(results, expected=len(batch_docs)) - - # Map scores back to original indices - for (orig_idx, _), score in zip(batch, batch_scores): - all_scores[orig_idx] = score - - return all_scores - - def score_pairs( - self, - pairs: Sequence[tuple[str, str]], - *, - batch_size: int = 32, # noqa: ARG002 - kept for BaseReranker compatibility - ) -> list[float]: - if not pairs: - return [] - - grouped: dict[str, list[tuple[int, str]]] = {} - for idx, (query, doc) in enumerate(pairs): - grouped.setdefault(str(query), []).append((idx, str(doc))) - - scores: list[float] = [0.0 for _ in range(len(pairs))] - - for query, items in grouped.items(): - documents = [doc for _, doc in items] - query_scores = self._rerank_one_query(query=query, documents=documents) - for (orig_idx, _), score in zip(items, query_scores): - scores[orig_idx] = float(score) - - return scores diff --git a/codex-lens/src/codexlens/semantic/reranker/base.py b/codex-lens/src/codexlens/semantic/reranker/base.py deleted file mode 100644 index 65c2d837..00000000 --- a/codex-lens/src/codexlens/semantic/reranker/base.py +++ /dev/null @@ -1,46 +0,0 @@ -"""Base class for rerankers. - -Defines the interface that all rerankers must implement. -""" - -from __future__ import annotations - -from abc import ABC, abstractmethod -from typing import Sequence - - -class BaseReranker(ABC): - """Base class for all rerankers. - - All reranker implementations must inherit from this class and implement - the abstract methods to ensure a consistent interface. - """ - - @property - def max_input_tokens(self) -> int: - """Return maximum token limit for reranking. - - Returns: - int: Maximum number of tokens that can be processed at once. - Default is 8192 if not overridden by implementation. - """ - return 8192 - - @abstractmethod - def score_pairs( - self, - pairs: Sequence[tuple[str, str]], - *, - batch_size: int = 32, - ) -> list[float]: - """Score (query, doc) pairs. - - Args: - pairs: Sequence of (query, doc) string pairs to score. - batch_size: Batch size for scoring. - - Returns: - List of scores (one per pair). - """ - ... - diff --git a/codex-lens/src/codexlens/semantic/reranker/factory.py b/codex-lens/src/codexlens/semantic/reranker/factory.py deleted file mode 100644 index 459034b5..00000000 --- a/codex-lens/src/codexlens/semantic/reranker/factory.py +++ /dev/null @@ -1,159 +0,0 @@ -"""Factory for creating rerankers. - -Provides a unified interface for instantiating different reranker backends. -""" - -from __future__ import annotations - -from typing import Any - -from .base import BaseReranker - - -def check_reranker_available(backend: str) -> tuple[bool, str | None]: - """Check whether a specific reranker backend can be used. - - Notes: - - "fastembed" uses fastembed TextCrossEncoder (pip install fastembed>=0.4.0). [Recommended] - - "onnx" uses Optimum + ONNX Runtime (pip install onnxruntime optimum[onnxruntime] transformers). - - "legacy" uses sentence-transformers CrossEncoder (pip install codexlens[reranker-legacy]). - - "api" uses a remote reranking HTTP API (requires httpx). - - "litellm" uses `ccw-litellm` for unified access to LLM providers. - """ - backend = (backend or "").strip().lower() - - if backend == "legacy": - from .legacy import check_cross_encoder_available - - return check_cross_encoder_available() - - if backend == "fastembed": - from .fastembed_reranker import check_fastembed_reranker_available - - return check_fastembed_reranker_available() - - if backend == "onnx": - from .onnx_reranker import check_onnx_reranker_available - - return check_onnx_reranker_available() - - if backend == "litellm": - try: - import ccw_litellm # noqa: F401 - except ImportError as exc: # pragma: no cover - optional dependency - return ( - False, - f"ccw-litellm not available: {exc}. Install with: pip install ccw-litellm", - ) - - try: - from .litellm_reranker import LiteLLMReranker # noqa: F401 - except Exception as exc: # pragma: no cover - defensive - return False, f"LiteLLM reranker backend not available: {exc}" - - return True, None - - if backend == "api": - from .api_reranker import check_httpx_available - - return check_httpx_available() - - return False, ( - f"Invalid reranker backend: {backend}. " - "Must be 'fastembed', 'onnx', 'api', 'litellm', or 'legacy'." - ) - - -def get_reranker( - backend: str = "onnx", - model_name: str | None = None, - *, - device: str | None = None, - **kwargs: Any, -) -> BaseReranker: - """Factory function to create reranker based on backend. - - Args: - backend: Reranker backend to use. Options: - - "onnx": Optimum + ONNX Runtime backend (default) - - "fastembed": FastEmbed TextCrossEncoder backend - - "api": HTTP API backend (remote providers) - - "litellm": LiteLLM backend (LLM-based, for API mode) - - "legacy": sentence-transformers CrossEncoder backend (optional) - model_name: Model identifier for model-based backends. Defaults depend on backend: - - onnx: Xenova/ms-marco-MiniLM-L-6-v2 - - fastembed: Xenova/ms-marco-MiniLM-L-6-v2 - - api: BAAI/bge-reranker-v2-m3 (SiliconFlow) - - legacy: cross-encoder/ms-marco-MiniLM-L-6-v2 - - litellm: default - device: Optional device string for backends that support it (legacy and onnx). - **kwargs: Additional backend-specific arguments. - - Returns: - BaseReranker: Configured reranker instance. - - Raises: - ValueError: If backend is not recognized. - ImportError: If required backend dependencies are not installed or backend is unavailable. - """ - backend = (backend or "").strip().lower() - - if backend == "fastembed": - ok, err = check_reranker_available("fastembed") - if not ok: - raise ImportError(err) - - from .fastembed_reranker import FastEmbedReranker - - resolved_model_name = (model_name or "").strip() or FastEmbedReranker.DEFAULT_MODEL - _ = device # Device selection is managed via fastembed providers. - return FastEmbedReranker(model_name=resolved_model_name, **kwargs) - - if backend == "onnx": - ok, err = check_reranker_available("onnx") - if not ok: - raise ImportError(err) - - from .onnx_reranker import ONNXReranker - - resolved_model_name = (model_name or "").strip() or ONNXReranker.DEFAULT_MODEL - effective_kwargs = dict(kwargs) - if "use_gpu" not in effective_kwargs and device is not None: - effective_kwargs["use_gpu"] = str(device).strip().lower() not in {"cpu", "none"} - return ONNXReranker(model_name=resolved_model_name, **effective_kwargs) - - if backend == "legacy": - ok, err = check_reranker_available("legacy") - if not ok: - raise ImportError(err) - - from .legacy import CrossEncoderReranker - - resolved_model_name = (model_name or "").strip() or "cross-encoder/ms-marco-MiniLM-L-6-v2" - return CrossEncoderReranker(model_name=resolved_model_name, device=device) - - if backend == "litellm": - ok, err = check_reranker_available("litellm") - if not ok: - raise ImportError(err) - - from .litellm_reranker import LiteLLMReranker - - _ = device # Device selection is not applicable to remote LLM backends. - resolved_model_name = (model_name or "").strip() or "default" - return LiteLLMReranker(model=resolved_model_name, **kwargs) - - if backend == "api": - ok, err = check_reranker_available("api") - if not ok: - raise ImportError(err) - - from .api_reranker import APIReranker - - _ = device # Device selection is not applicable to remote HTTP backends. - resolved_model_name = (model_name or "").strip() or None - return APIReranker(model_name=resolved_model_name, **kwargs) - - raise ValueError( - f"Unknown backend: {backend}. Supported backends: 'fastembed', 'onnx', 'api', 'litellm', 'legacy'" - ) diff --git a/codex-lens/src/codexlens/semantic/reranker/fastembed_reranker.py b/codex-lens/src/codexlens/semantic/reranker/fastembed_reranker.py deleted file mode 100644 index c38d4aa0..00000000 --- a/codex-lens/src/codexlens/semantic/reranker/fastembed_reranker.py +++ /dev/null @@ -1,257 +0,0 @@ -"""FastEmbed-based reranker backend. - -This reranker uses fastembed's TextCrossEncoder for cross-encoder reranking. -FastEmbed is ONNX-based internally but provides a cleaner, unified API. - -Install: - pip install fastembed>=0.4.0 -""" - -from __future__ import annotations - -import logging -import threading -from typing import Any, Sequence - -from .base import BaseReranker - -logger = logging.getLogger(__name__) - - -def check_fastembed_reranker_available() -> tuple[bool, str | None]: - """Check whether fastembed reranker dependencies are available.""" - try: - import fastembed # noqa: F401 - except ImportError as exc: # pragma: no cover - optional dependency - return ( - False, - f"fastembed not available: {exc}. Install with: pip install fastembed>=0.4.0", - ) - - try: - from fastembed.rerank.cross_encoder import TextCrossEncoder # noqa: F401 - except ImportError as exc: # pragma: no cover - optional dependency - return ( - False, - f"fastembed TextCrossEncoder not available: {exc}. " - "Upgrade with: pip install fastembed>=0.4.0", - ) - - return True, None - - -class FastEmbedReranker(BaseReranker): - """Cross-encoder reranker using fastembed's TextCrossEncoder with lazy loading.""" - - DEFAULT_MODEL = "Xenova/ms-marco-MiniLM-L-6-v2" - - # Alternative models supported by fastembed: - # - "BAAI/bge-reranker-base" - # - "BAAI/bge-reranker-large" - # - "cross-encoder/ms-marco-MiniLM-L-6-v2" - - def __init__( - self, - model_name: str | None = None, - *, - use_gpu: bool = True, - cache_dir: str | None = None, - threads: int | None = None, - ) -> None: - """Initialize FastEmbed reranker. - - Args: - model_name: Model identifier. Defaults to Xenova/ms-marco-MiniLM-L-6-v2. - use_gpu: Whether to use GPU acceleration when available. - cache_dir: Optional directory for caching downloaded models. - threads: Optional number of threads for ONNX Runtime. - """ - self.model_name = (model_name or self.DEFAULT_MODEL).strip() - if not self.model_name: - raise ValueError("model_name cannot be blank") - - self.use_gpu = bool(use_gpu) - self.cache_dir = cache_dir - self.threads = threads - - self._encoder: Any | None = None - self._lock = threading.RLock() - - def _load_model(self) -> None: - """Lazy-load the TextCrossEncoder model.""" - if self._encoder is not None: - return - - ok, err = check_fastembed_reranker_available() - if not ok: - raise ImportError(err) - - with self._lock: - if self._encoder is not None: - return - - from fastembed.rerank.cross_encoder import TextCrossEncoder - - # Determine providers based on GPU preference - providers: list[str] | None = None - if self.use_gpu: - try: - from ..gpu_support import get_optimal_providers - - providers = get_optimal_providers(use_gpu=True, with_device_options=False) - except Exception: - # Fallback: let fastembed decide - providers = None - - # Build initialization kwargs - init_kwargs: dict[str, Any] = {} - if self.cache_dir: - init_kwargs["cache_dir"] = self.cache_dir - if self.threads is not None: - init_kwargs["threads"] = self.threads - if providers: - init_kwargs["providers"] = providers - - logger.debug( - "Loading FastEmbed reranker model: %s (use_gpu=%s)", - self.model_name, - self.use_gpu, - ) - - self._encoder = TextCrossEncoder( - model_name=self.model_name, - **init_kwargs, - ) - - logger.debug("FastEmbed reranker model loaded successfully") - - @staticmethod - def _sigmoid(x: float) -> float: - """Numerically stable sigmoid function.""" - if x < -709: - return 0.0 - if x > 709: - return 1.0 - import math - return 1.0 / (1.0 + math.exp(-x)) - - def score_pairs( - self, - pairs: Sequence[tuple[str, str]], - *, - batch_size: int = 32, - ) -> list[float]: - """Score (query, doc) pairs. - - Args: - pairs: Sequence of (query, doc) string pairs to score. - batch_size: Batch size for scoring. - - Returns: - List of scores (one per pair), normalized to [0, 1] range. - """ - if not pairs: - return [] - - self._load_model() - - if self._encoder is None: # pragma: no cover - defensive - return [] - - # FastEmbed's TextCrossEncoder.rerank() expects a query and list of documents. - # For batch scoring of multiple query-doc pairs, we need to process them. - # Group by query for efficiency when same query appears multiple times. - query_to_docs: dict[str, list[tuple[int, str]]] = {} - for idx, (query, doc) in enumerate(pairs): - if query not in query_to_docs: - query_to_docs[query] = [] - query_to_docs[query].append((idx, doc)) - - # Score each query group - scores: list[float] = [0.0] * len(pairs) - - for query, indexed_docs in query_to_docs.items(): - docs = [doc for _, doc in indexed_docs] - indices = [idx for idx, _ in indexed_docs] - - try: - # TextCrossEncoder.rerank returns raw float scores in same order as input - raw_scores = list( - self._encoder.rerank( - query=query, - documents=docs, - batch_size=batch_size, - ) - ) - - # Map scores back to original positions and normalize with sigmoid - for i, raw_score in enumerate(raw_scores): - if i < len(indices): - original_idx = indices[i] - # Normalize score to [0, 1] using stable sigmoid - scores[original_idx] = self._sigmoid(float(raw_score)) - - except Exception as e: - logger.warning("FastEmbed rerank failed for query: %s", str(e)[:100]) - # Leave scores as 0.0 for failed queries - - return scores - - def rerank( - self, - query: str, - documents: Sequence[str], - *, - top_k: int | None = None, - batch_size: int = 32, - ) -> list[tuple[float, str, int]]: - """Rerank documents for a single query. - - This is a convenience method that provides results in ranked order. - - Args: - query: The query string. - documents: List of documents to rerank. - top_k: Return only top K results. None returns all. - batch_size: Batch size for scoring. - - Returns: - List of (score, document, original_index) tuples, sorted by score descending. - """ - if not documents: - return [] - - self._load_model() - - if self._encoder is None: # pragma: no cover - defensive - return [] - - try: - # TextCrossEncoder.rerank returns raw float scores in same order as input - raw_scores = list( - self._encoder.rerank( - query=query, - documents=list(documents), - batch_size=batch_size, - ) - ) - - # Convert to our format: (normalized_score, document, original_index) - ranked = [] - for idx, raw_score in enumerate(raw_scores): - if idx < len(documents): - # Normalize score to [0, 1] using stable sigmoid - normalized = self._sigmoid(float(raw_score)) - ranked.append((normalized, documents[idx], idx)) - - # Sort by score descending - ranked.sort(key=lambda x: x[0], reverse=True) - - if top_k is not None and top_k > 0: - ranked = ranked[:top_k] - - return ranked - - except Exception as e: - logger.warning("FastEmbed rerank failed: %s", str(e)[:100]) - return [] diff --git a/codex-lens/src/codexlens/semantic/reranker/legacy.py b/codex-lens/src/codexlens/semantic/reranker/legacy.py deleted file mode 100644 index a5ee05de..00000000 --- a/codex-lens/src/codexlens/semantic/reranker/legacy.py +++ /dev/null @@ -1,91 +0,0 @@ -"""Legacy sentence-transformers cross-encoder reranker. - -Install with: pip install codexlens[reranker-legacy] -""" - -from __future__ import annotations - -import logging -import threading -from typing import List, Sequence, Tuple - -from .base import BaseReranker - -logger = logging.getLogger(__name__) - -try: - from sentence_transformers import CrossEncoder as _CrossEncoder - - CROSS_ENCODER_AVAILABLE = True - _import_error: str | None = None -except ImportError as exc: # pragma: no cover - optional dependency - _CrossEncoder = None # type: ignore[assignment] - CROSS_ENCODER_AVAILABLE = False - _import_error = str(exc) - - -def check_cross_encoder_available() -> tuple[bool, str | None]: - if CROSS_ENCODER_AVAILABLE: - return True, None - return ( - False, - _import_error - or "sentence-transformers not available. Install with: pip install codexlens[reranker-legacy]", - ) - - -class CrossEncoderReranker(BaseReranker): - """Cross-encoder reranker with lazy model loading.""" - - def __init__(self, model_name: str, *, device: str | None = None) -> None: - self.model_name = (model_name or "").strip() - if not self.model_name: - raise ValueError("model_name cannot be blank") - - self.device = (device or "").strip() or None - self._model = None - self._lock = threading.RLock() - - def _load_model(self) -> None: - if self._model is not None: - return - - ok, err = check_cross_encoder_available() - if not ok: - raise ImportError(err) - - with self._lock: - if self._model is not None: - return - - try: - if self.device: - self._model = _CrossEncoder(self.model_name, device=self.device) # type: ignore[misc] - else: - self._model = _CrossEncoder(self.model_name) # type: ignore[misc] - except Exception as exc: - logger.debug("Failed to load cross-encoder model %s: %s", self.model_name, exc) - raise - - def score_pairs( - self, - pairs: Sequence[Tuple[str, str]], - *, - batch_size: int = 32, - ) -> List[float]: - """Score (query, doc) pairs using the cross-encoder. - - Returns: - List of scores (one per pair) in the model's native scale (usually logits). - """ - if not pairs: - return [] - - self._load_model() - - if self._model is None: # pragma: no cover - defensive - return [] - - bs = int(batch_size) if batch_size and int(batch_size) > 0 else 32 - scores = self._model.predict(list(pairs), batch_size=bs) # type: ignore[union-attr] - return [float(s) for s in scores] diff --git a/codex-lens/src/codexlens/semantic/reranker/litellm_reranker.py b/codex-lens/src/codexlens/semantic/reranker/litellm_reranker.py deleted file mode 100644 index ec735994..00000000 --- a/codex-lens/src/codexlens/semantic/reranker/litellm_reranker.py +++ /dev/null @@ -1,214 +0,0 @@ -"""Experimental LiteLLM reranker backend. - -This module provides :class:`LiteLLMReranker`, which uses an LLM to score the -relevance of a single (query, document) pair per request. - -Notes: - - This backend is experimental and may be slow/expensive compared to local - rerankers. - - It relies on `ccw-litellm` for a unified LLM API across providers. -""" - -from __future__ import annotations - -import json -import logging -import re -import threading -import time -from typing import Any, Sequence - -from .base import BaseReranker - -logger = logging.getLogger(__name__) - -_NUMBER_RE = re.compile(r"[-+]?\d+(?:\.\d+)?(?:[eE][-+]?\d+)?") - - -def _coerce_score_to_unit_interval(score: float) -> float: - """Coerce a numeric score into [0, 1]. - - The prompt asks for a float in [0, 1], but some models may respond with 0-10 - or 0-100 scales. This function attempts a conservative normalization. - """ - if 0.0 <= score <= 1.0: - return score - if 0.0 <= score <= 10.0: - return score / 10.0 - if 0.0 <= score <= 100.0: - return score / 100.0 - return max(0.0, min(1.0, score)) - - -def _extract_score(text: str) -> float | None: - """Extract a numeric relevance score from an LLM response.""" - content = (text or "").strip() - if not content: - return None - - # Prefer JSON if present. - if "{" in content and "}" in content: - try: - start = content.index("{") - end = content.rindex("}") + 1 - payload = json.loads(content[start:end]) - if isinstance(payload, dict) and "score" in payload: - return float(payload["score"]) - except Exception: - pass - - match = _NUMBER_RE.search(content) - if not match: - return None - try: - return float(match.group(0)) - except ValueError: - return None - - -class LiteLLMReranker(BaseReranker): - """Experimental reranker that uses a LiteLLM-compatible model. - - This reranker scores each (query, doc) pair in isolation (single-pair mode) - to improve prompt reliability across providers. - """ - - _SYSTEM_PROMPT = ( - "You are a relevance scoring assistant.\n" - "Given a search query and a document snippet, output a single numeric " - "relevance score between 0 and 1.\n\n" - "Scoring guidance:\n" - "- 1.0: The document directly answers the query.\n" - "- 0.5: The document is partially relevant.\n" - "- 0.0: The document is unrelated.\n\n" - "Output requirements:\n" - "- Output ONLY the number (e.g., 0.73).\n" - "- Do not include any other text." - ) - - def __init__( - self, - model: str = "default", - *, - requests_per_minute: float | None = None, - min_interval_seconds: float | None = None, - default_score: float = 0.0, - max_doc_chars: int = 8000, - **litellm_kwargs: Any, - ) -> None: - """Initialize the reranker. - - Args: - model: Model name from ccw-litellm configuration (default: "default"). - requests_per_minute: Optional rate limit in requests per minute. - min_interval_seconds: Optional minimum interval between requests. If set, - it takes precedence over requests_per_minute. - default_score: Score to use when an API call fails or parsing fails. - max_doc_chars: Maximum number of document characters to include in the prompt. - **litellm_kwargs: Passed through to `ccw_litellm.LiteLLMClient`. - - Raises: - ImportError: If ccw-litellm is not installed. - ValueError: If model is blank. - """ - self.model_name = (model or "").strip() - if not self.model_name: - raise ValueError("model cannot be blank") - - self.default_score = float(default_score) - - self.max_doc_chars = int(max_doc_chars) if int(max_doc_chars) > 0 else 0 - - if min_interval_seconds is not None: - self._min_interval_seconds = max(0.0, float(min_interval_seconds)) - elif requests_per_minute is not None and float(requests_per_minute) > 0: - self._min_interval_seconds = 60.0 / float(requests_per_minute) - else: - self._min_interval_seconds = 0.0 - - # Prefer deterministic output by default; allow overrides via kwargs. - litellm_kwargs = dict(litellm_kwargs) - litellm_kwargs.setdefault("temperature", 0.0) - litellm_kwargs.setdefault("max_tokens", 16) - - try: - from ccw_litellm import ChatMessage, LiteLLMClient - except ImportError as exc: # pragma: no cover - optional dependency - raise ImportError( - "ccw-litellm not installed. Install with: pip install ccw-litellm" - ) from exc - - self._ChatMessage = ChatMessage - self._client = LiteLLMClient(model=self.model_name, **litellm_kwargs) - - self._lock = threading.RLock() - self._last_request_at = 0.0 - - def _sanitize_text(self, text: str) -> str: - # Keep consistent with LiteLLMEmbedderWrapper workaround. - if text.startswith("import"): - return " " + text - return text - - def _rate_limit(self) -> None: - if self._min_interval_seconds <= 0: - return - with self._lock: - now = time.monotonic() - elapsed = now - self._last_request_at - if elapsed < self._min_interval_seconds: - time.sleep(self._min_interval_seconds - elapsed) - self._last_request_at = time.monotonic() - - def _build_user_prompt(self, query: str, doc: str) -> str: - sanitized_query = self._sanitize_text(query or "") - sanitized_doc = self._sanitize_text(doc or "") - if self.max_doc_chars and len(sanitized_doc) > self.max_doc_chars: - sanitized_doc = sanitized_doc[: self.max_doc_chars] - - return ( - "Query:\n" - f"{sanitized_query}\n\n" - "Document:\n" - f"{sanitized_doc}\n\n" - "Return the relevance score (0 to 1) as a single number:" - ) - - def _score_single_pair(self, query: str, doc: str) -> float: - messages = [ - self._ChatMessage(role="system", content=self._SYSTEM_PROMPT), - self._ChatMessage(role="user", content=self._build_user_prompt(query, doc)), - ] - - try: - self._rate_limit() - response = self._client.chat(messages) - except Exception as exc: - logger.debug("LiteLLM reranker request failed: %s", exc) - return self.default_score - - raw = getattr(response, "content", "") or "" - score = _extract_score(raw) - if score is None: - logger.debug("Failed to parse LiteLLM reranker score from response: %r", raw) - return self.default_score - return _coerce_score_to_unit_interval(float(score)) - - def score_pairs( - self, - pairs: Sequence[tuple[str, str]], - *, - batch_size: int = 32, - ) -> list[float]: - """Score (query, doc) pairs with per-pair LLM calls.""" - if not pairs: - return [] - - bs = int(batch_size) if batch_size and int(batch_size) > 0 else 32 - - scores: list[float] = [] - for i in range(0, len(pairs), bs): - batch = pairs[i : i + bs] - for query, doc in batch: - scores.append(self._score_single_pair(query, doc)) - return scores diff --git a/codex-lens/src/codexlens/semantic/reranker/onnx_reranker.py b/codex-lens/src/codexlens/semantic/reranker/onnx_reranker.py deleted file mode 100644 index a56fb953..00000000 --- a/codex-lens/src/codexlens/semantic/reranker/onnx_reranker.py +++ /dev/null @@ -1,302 +0,0 @@ -"""Optimum + ONNX Runtime reranker backend. - -This reranker uses Hugging Face Optimum's ONNXRuntime backend for sequence -classification models. It is designed to run without requiring PyTorch at -runtime by using numpy tensors and ONNX Runtime execution providers. - -Install (CPU): - pip install onnxruntime optimum[onnxruntime] transformers -""" - -from __future__ import annotations - -import logging -import threading -from typing import Any, Iterable, Sequence - -from .base import BaseReranker - -logger = logging.getLogger(__name__) - - -def check_onnx_reranker_available() -> tuple[bool, str | None]: - """Check whether Optimum + ONNXRuntime reranker dependencies are available.""" - try: - import numpy # noqa: F401 - except ImportError as exc: # pragma: no cover - optional dependency - return False, f"numpy not available: {exc}. Install with: pip install numpy" - - try: - import onnxruntime # noqa: F401 - except ImportError as exc: # pragma: no cover - optional dependency - return ( - False, - f"onnxruntime not available: {exc}. Install with: pip install onnxruntime", - ) - - try: - from optimum.onnxruntime import ORTModelForSequenceClassification # noqa: F401 - except ImportError as exc: # pragma: no cover - optional dependency - return ( - False, - f"optimum[onnxruntime] not available: {exc}. Install with: pip install optimum[onnxruntime]", - ) - - try: - from transformers import AutoTokenizer # noqa: F401 - except ImportError as exc: # pragma: no cover - optional dependency - return ( - False, - f"transformers not available: {exc}. Install with: pip install transformers", - ) - - return True, None - - -def _iter_batches(items: Sequence[Any], batch_size: int) -> Iterable[Sequence[Any]]: - for i in range(0, len(items), batch_size): - yield items[i : i + batch_size] - - -def _normalize_provider_specs( - providers: Sequence[Any] | None, -) -> tuple[list[str], list[dict[str, Any]]]: - """Split execution-provider specs into Optimum-compatible names and options.""" - normalized_providers: list[str] = [] - normalized_options: list[dict[str, Any]] = [] - - for provider in providers or (): - provider_name: str | None = None - provider_options: dict[str, Any] = {} - - if isinstance(provider, tuple): - if provider: - provider_name = str(provider[0]).strip() - if len(provider) > 1 and isinstance(provider[1], dict): - provider_options = dict(provider[1]) - elif provider is not None: - provider_name = str(provider).strip() - - if not provider_name: - continue - - normalized_providers.append(provider_name) - normalized_options.append(provider_options) - - if not normalized_providers: - normalized_providers.append("CPUExecutionProvider") - normalized_options.append({}) - - return normalized_providers, normalized_options - - -class ONNXReranker(BaseReranker): - """Cross-encoder reranker using Optimum + ONNX Runtime with lazy loading.""" - - DEFAULT_MODEL = "Xenova/ms-marco-MiniLM-L-6-v2" - - def __init__( - self, - model_name: str | None = None, - *, - use_gpu: bool = True, - providers: list[Any] | None = None, - max_length: int | None = None, - ) -> None: - self.model_name = (model_name or self.DEFAULT_MODEL).strip() - if not self.model_name: - raise ValueError("model_name cannot be blank") - - self.use_gpu = bool(use_gpu) - self.providers = providers - - self.max_length = int(max_length) if max_length is not None else None - - self._tokenizer: Any | None = None - self._model: Any | None = None - self._model_input_names: set[str] | None = None - self._lock = threading.RLock() - - def _load_model(self) -> None: - if self._model is not None and self._tokenizer is not None: - return - - ok, err = check_onnx_reranker_available() - if not ok: - raise ImportError(err) - - with self._lock: - if self._model is not None and self._tokenizer is not None: - return - - from inspect import signature - - from optimum.onnxruntime import ORTModelForSequenceClassification - from transformers import AutoTokenizer - - if self.providers is None: - from ..gpu_support import get_optimal_providers - - # Include device_id options for DirectML/CUDA selection when available. - self.providers = get_optimal_providers( - use_gpu=self.use_gpu, with_device_options=True - ) - - provider_names, provider_options = _normalize_provider_specs(self.providers) - - # Some Optimum versions accept `providers`, others accept a single `provider`. - # Prefer passing the full providers list, with a conservative fallback. - model_kwargs: dict[str, Any] = {} - try: - params = signature(ORTModelForSequenceClassification.from_pretrained).parameters - if "providers" in params: - model_kwargs["providers"] = provider_names - if "provider_options" in params: - model_kwargs["provider_options"] = provider_options - elif "provider" in params: - model_kwargs["provider"] = provider_names[0] - if "provider_options" in params and provider_options[0]: - model_kwargs["provider_options"] = provider_options[0] - except Exception: - model_kwargs = {} - - try: - self._model = ORTModelForSequenceClassification.from_pretrained( - self.model_name, - **model_kwargs, - ) - except TypeError: - # Fallback for older Optimum versions: retry without provider arguments. - self._model = ORTModelForSequenceClassification.from_pretrained(self.model_name) - - self._tokenizer = AutoTokenizer.from_pretrained(self.model_name, use_fast=True) - - # Cache model input names to filter tokenizer outputs defensively. - input_names: set[str] | None = None - for attr in ("input_names", "model_input_names"): - names = getattr(self._model, attr, None) - if isinstance(names, (list, tuple)) and names: - input_names = {str(n) for n in names} - break - if input_names is None: - try: - session = getattr(self._model, "model", None) - if session is not None and hasattr(session, "get_inputs"): - input_names = {i.name for i in session.get_inputs()} - except Exception: - input_names = None - self._model_input_names = input_names - - @staticmethod - def _sigmoid(x: "Any") -> "Any": - import numpy as np - - x = np.clip(x, -50.0, 50.0) - return 1.0 / (1.0 + np.exp(-x)) - - @staticmethod - def _select_relevance_logit(logits: "Any") -> "Any": - import numpy as np - - arr = np.asarray(logits) - if arr.ndim == 0: - return arr.reshape(1) - if arr.ndim == 1: - return arr - if arr.ndim >= 2: - # Common cases: - # - Regression: (batch, 1) - # - Binary classification: (batch, 2) - if arr.shape[-1] == 1: - return arr[..., 0] - if arr.shape[-1] == 2: - # Convert 2-logit softmax into a single logit via difference. - return arr[..., 1] - arr[..., 0] - return arr.max(axis=-1) - return arr.reshape(-1) - - def _tokenize_batch(self, batch: Sequence[tuple[str, str]]) -> dict[str, Any]: - if self._tokenizer is None: - raise RuntimeError("Tokenizer not loaded") # pragma: no cover - defensive - - queries = [q for q, _ in batch] - docs = [d for _, d in batch] - - tokenizer_kwargs: dict[str, Any] = { - "text": queries, - "text_pair": docs, - "padding": True, - "truncation": True, - "return_tensors": "np", - } - - max_len = self.max_length - if max_len is None: - try: - model_max = int(getattr(self._tokenizer, "model_max_length", 0) or 0) - if 0 < model_max < 10_000: - max_len = model_max - else: - max_len = 512 - except Exception: - max_len = 512 - if max_len is not None and max_len > 0: - tokenizer_kwargs["max_length"] = int(max_len) - - encoded = self._tokenizer(**tokenizer_kwargs) - inputs = dict(encoded) - - # Some models do not accept token_type_ids; filter to known input names if available. - if self._model_input_names: - inputs = {k: v for k, v in inputs.items() if k in self._model_input_names} - - return inputs - - def _forward_logits(self, inputs: dict[str, Any]) -> Any: - if self._model is None: - raise RuntimeError("Model not loaded") # pragma: no cover - defensive - - outputs = self._model(**inputs) - if hasattr(outputs, "logits"): - return outputs.logits - if isinstance(outputs, dict) and "logits" in outputs: - return outputs["logits"] - if isinstance(outputs, (list, tuple)) and outputs: - return outputs[0] - raise RuntimeError("Unexpected model output format") # pragma: no cover - defensive - - def score_pairs( - self, - pairs: Sequence[tuple[str, str]], - *, - batch_size: int = 32, - ) -> list[float]: - """Score (query, doc) pairs with sigmoid-normalized outputs in [0, 1].""" - if not pairs: - return [] - - self._load_model() - - if self._model is None or self._tokenizer is None: # pragma: no cover - defensive - return [] - - import numpy as np - - bs = int(batch_size) if batch_size and int(batch_size) > 0 else 32 - scores: list[float] = [] - - for batch in _iter_batches(list(pairs), bs): - inputs = self._tokenize_batch(batch) - logits = self._forward_logits(inputs) - rel_logits = self._select_relevance_logit(logits) - probs = self._sigmoid(rel_logits) - probs = np.clip(probs, 0.0, 1.0) - scores.extend([float(p) for p in probs.reshape(-1).tolist()]) - - if len(scores) != len(pairs): - logger.debug( - "ONNX reranker produced %d scores for %d pairs", len(scores), len(pairs) - ) - return scores[: len(pairs)] - - return scores diff --git a/codex-lens/src/codexlens/semantic/rotational_embedder.py b/codex-lens/src/codexlens/semantic/rotational_embedder.py deleted file mode 100644 index ff0f41ac..00000000 --- a/codex-lens/src/codexlens/semantic/rotational_embedder.py +++ /dev/null @@ -1,434 +0,0 @@ -"""Rotational embedder for multi-endpoint API load balancing. - -Provides intelligent load balancing across multiple LiteLLM embedding endpoints -to maximize throughput while respecting rate limits. -""" - -from __future__ import annotations - -import logging -import random -import threading -import time -from dataclasses import dataclass, field -from enum import Enum -from typing import Any, Dict, Iterable, List, Optional - -import numpy as np - -from .base import BaseEmbedder - -logger = logging.getLogger(__name__) - - -class EndpointStatus(Enum): - """Status of an API endpoint.""" - AVAILABLE = "available" - COOLING = "cooling" # Rate limited, temporarily unavailable - FAILED = "failed" # Permanent failure (auth error, etc.) - - -class SelectionStrategy(Enum): - """Strategy for selecting endpoints.""" - ROUND_ROBIN = "round_robin" - LATENCY_AWARE = "latency_aware" - WEIGHTED_RANDOM = "weighted_random" - - -@dataclass -class EndpointConfig: - """Configuration for a single API endpoint.""" - model: str - api_key: Optional[str] = None - api_base: Optional[str] = None - weight: float = 1.0 # Higher weight = more requests - max_concurrent: int = 4 # Max concurrent requests to this endpoint - - -@dataclass -class EndpointState: - """Runtime state for an endpoint.""" - config: EndpointConfig - embedder: Any = None # LiteLLMEmbedderWrapper instance - - # Health metrics - status: EndpointStatus = EndpointStatus.AVAILABLE - cooldown_until: float = 0.0 # Unix timestamp when cooldown ends - - # Performance metrics - total_requests: int = 0 - total_failures: int = 0 - avg_latency_ms: float = 0.0 - last_latency_ms: float = 0.0 - - # Concurrency tracking - active_requests: int = 0 - lock: threading.Lock = field(default_factory=threading.Lock) - - def is_available(self) -> bool: - """Check if endpoint is available for requests.""" - if self.status == EndpointStatus.FAILED: - return False - if self.status == EndpointStatus.COOLING: - if time.time() >= self.cooldown_until: - self.status = EndpointStatus.AVAILABLE - return True - return False - return True - - def set_cooldown(self, seconds: float) -> None: - """Put endpoint in cooldown state.""" - self.status = EndpointStatus.COOLING - self.cooldown_until = time.time() + seconds - logger.warning(f"Endpoint {self.config.model} cooling down for {seconds:.1f}s") - - def mark_failed(self) -> None: - """Mark endpoint as permanently failed.""" - self.status = EndpointStatus.FAILED - logger.error(f"Endpoint {self.config.model} marked as failed") - - def record_success(self, latency_ms: float) -> None: - """Record successful request.""" - self.total_requests += 1 - self.last_latency_ms = latency_ms - # Exponential moving average for latency - alpha = 0.3 - if self.avg_latency_ms == 0: - self.avg_latency_ms = latency_ms - else: - self.avg_latency_ms = alpha * latency_ms + (1 - alpha) * self.avg_latency_ms - - def record_failure(self) -> None: - """Record failed request.""" - self.total_requests += 1 - self.total_failures += 1 - - @property - def health_score(self) -> float: - """Calculate health score (0-1) based on metrics.""" - if not self.is_available(): - return 0.0 - - # Base score from success rate - if self.total_requests > 0: - success_rate = 1 - (self.total_failures / self.total_requests) - else: - success_rate = 1.0 - - # Latency factor (faster = higher score) - # Normalize: 100ms = 1.0, 1000ms = 0.1 - if self.avg_latency_ms > 0: - latency_factor = min(1.0, 100 / self.avg_latency_ms) - else: - latency_factor = 1.0 - - # Availability factor (less concurrent = more available) - if self.config.max_concurrent > 0: - availability = 1 - (self.active_requests / self.config.max_concurrent) - else: - availability = 1.0 - - # Combined score with weights - return (success_rate * 0.4 + latency_factor * 0.3 + availability * 0.3) * self.config.weight - - -class RotationalEmbedder(BaseEmbedder): - """Embedder that load balances across multiple API endpoints. - - Features: - - Intelligent endpoint selection based on latency and health - - Automatic failover on rate limits (429) and server errors - - Cooldown management to respect rate limits - - Thread-safe concurrent request handling - - Args: - endpoints: List of endpoint configurations - strategy: Selection strategy (default: latency_aware) - default_cooldown: Default cooldown seconds for rate limits (default: 60) - max_retries: Maximum retry attempts across all endpoints (default: 3) - """ - - def __init__( - self, - endpoints: List[EndpointConfig], - strategy: SelectionStrategy = SelectionStrategy.LATENCY_AWARE, - default_cooldown: float = 60.0, - max_retries: int = 3, - ) -> None: - if not endpoints: - raise ValueError("At least one endpoint must be provided") - - self.strategy = strategy - self.default_cooldown = default_cooldown - self.max_retries = max_retries - - # Initialize endpoint states - self._endpoints: List[EndpointState] = [] - self._lock = threading.Lock() - self._round_robin_index = 0 - - # Create embedder instances for each endpoint - from .litellm_embedder import LiteLLMEmbedderWrapper - - for config in endpoints: - # Build kwargs for LiteLLMEmbedderWrapper - kwargs: Dict[str, Any] = {} - if config.api_key: - kwargs["api_key"] = config.api_key - if config.api_base: - kwargs["api_base"] = config.api_base - - try: - embedder = LiteLLMEmbedderWrapper(model=config.model, **kwargs) - state = EndpointState(config=config, embedder=embedder) - self._endpoints.append(state) - logger.info(f"Initialized endpoint: {config.model}") - except Exception as e: - logger.error(f"Failed to initialize endpoint {config.model}: {e}") - - if not self._endpoints: - raise ValueError("Failed to initialize any endpoints") - - # Cache embedding properties from first endpoint - self._embedding_dim = self._endpoints[0].embedder.embedding_dim - self._model_name = f"rotational({len(self._endpoints)} endpoints)" - self._max_tokens = self._endpoints[0].embedder.max_tokens - - @property - def embedding_dim(self) -> int: - """Return embedding dimensions.""" - return self._embedding_dim - - @property - def model_name(self) -> str: - """Return model name.""" - return self._model_name - - @property - def max_tokens(self) -> int: - """Return maximum token limit.""" - return self._max_tokens - - @property - def endpoint_count(self) -> int: - """Return number of configured endpoints.""" - return len(self._endpoints) - - @property - def available_endpoint_count(self) -> int: - """Return number of available endpoints.""" - return sum(1 for ep in self._endpoints if ep.is_available()) - - def get_endpoint_stats(self) -> List[Dict[str, Any]]: - """Get statistics for all endpoints.""" - stats = [] - for ep in self._endpoints: - stats.append({ - "model": ep.config.model, - "status": ep.status.value, - "total_requests": ep.total_requests, - "total_failures": ep.total_failures, - "avg_latency_ms": round(ep.avg_latency_ms, 2), - "health_score": round(ep.health_score, 3), - "active_requests": ep.active_requests, - }) - return stats - - def _select_endpoint(self) -> Optional[EndpointState]: - """Select best available endpoint based on strategy.""" - available = [ep for ep in self._endpoints if ep.is_available()] - - if not available: - return None - - if self.strategy == SelectionStrategy.ROUND_ROBIN: - with self._lock: - self._round_robin_index = (self._round_robin_index + 1) % len(available) - return available[self._round_robin_index] - - elif self.strategy == SelectionStrategy.LATENCY_AWARE: - # Sort by health score (descending) and pick top candidate - # Add small random factor to prevent thundering herd - scored = [(ep, ep.health_score + random.uniform(0, 0.1)) for ep in available] - scored.sort(key=lambda x: x[1], reverse=True) - return scored[0][0] - - elif self.strategy == SelectionStrategy.WEIGHTED_RANDOM: - # Weighted random selection based on health scores - scores = [ep.health_score for ep in available] - total = sum(scores) - if total == 0: - return random.choice(available) - - weights = [s / total for s in scores] - return random.choices(available, weights=weights, k=1)[0] - - return available[0] - - def _parse_retry_after(self, error: Exception) -> Optional[float]: - """Extract Retry-After value from error if available.""" - error_str = str(error) - - # Try to find Retry-After in error message - import re - match = re.search(r'[Rr]etry[- ][Aa]fter[:\s]+(\d+)', error_str) - if match: - return float(match.group(1)) - - return None - - def _is_rate_limit_error(self, error: Exception) -> bool: - """Check if error is a rate limit error.""" - error_str = str(error).lower() - return any(x in error_str for x in ["429", "rate limit", "too many requests"]) - - def _is_retryable_error(self, error: Exception) -> bool: - """Check if error is retryable (not auth/config error).""" - error_str = str(error).lower() - # Retryable errors - if any(x in error_str for x in ["429", "rate limit", "502", "503", "504", - "timeout", "connection", "service unavailable"]): - return True - # Non-retryable errors (auth, config) - if any(x in error_str for x in ["401", "403", "invalid", "authentication", - "unauthorized", "api key"]): - return False - # Default to retryable for unknown errors - return True - - def embed_to_numpy(self, texts: str | Iterable[str], **kwargs) -> np.ndarray: - """Embed texts using load-balanced endpoint selection. - - Args: - texts: Single text or iterable of texts to embed. - **kwargs: Additional arguments passed to underlying embedder. - - Returns: - numpy.ndarray: Array of shape (n_texts, embedding_dim) containing embeddings. - - Raises: - RuntimeError: If all endpoints fail after retries. - """ - if isinstance(texts, str): - texts = [texts] - else: - texts = list(texts) - - last_error: Optional[Exception] = None - tried_endpoints: set = set() - - for attempt in range(self.max_retries + 1): - endpoint = self._select_endpoint() - - if endpoint is None: - # All endpoints unavailable, wait for shortest cooldown - min_cooldown = min( - (ep.cooldown_until - time.time() for ep in self._endpoints - if ep.status == EndpointStatus.COOLING), - default=self.default_cooldown - ) - if min_cooldown > 0 and attempt < self.max_retries: - wait_time = min(min_cooldown, 30) # Cap wait at 30s - logger.warning(f"All endpoints busy, waiting {wait_time:.1f}s...") - time.sleep(wait_time) - continue - break - - # Track tried endpoints to avoid infinite loops - endpoint_id = id(endpoint) - if endpoint_id in tried_endpoints and len(tried_endpoints) >= len(self._endpoints): - # Already tried all endpoints - break - tried_endpoints.add(endpoint_id) - - # Acquire slot - with endpoint.lock: - endpoint.active_requests += 1 - - try: - start_time = time.time() - result = endpoint.embedder.embed_to_numpy(texts, **kwargs) - latency_ms = (time.time() - start_time) * 1000 - - # Record success - endpoint.record_success(latency_ms) - - return result - - except Exception as e: - last_error = e - endpoint.record_failure() - - if self._is_rate_limit_error(e): - # Rate limited - set cooldown - retry_after = self._parse_retry_after(e) or self.default_cooldown - endpoint.set_cooldown(retry_after) - logger.warning(f"Endpoint {endpoint.config.model} rate limited, " - f"cooling for {retry_after}s") - - elif not self._is_retryable_error(e): - # Permanent failure (auth error, etc.) - endpoint.mark_failed() - logger.error(f"Endpoint {endpoint.config.model} failed permanently: {e}") - - else: - # Temporary error - short cooldown - endpoint.set_cooldown(5.0) - logger.warning(f"Endpoint {endpoint.config.model} error: {e}") - - finally: - with endpoint.lock: - endpoint.active_requests -= 1 - - # All retries exhausted - available = self.available_endpoint_count - raise RuntimeError( - f"All embedding attempts failed after {self.max_retries + 1} tries. " - f"Available endpoints: {available}/{len(self._endpoints)}. " - f"Last error: {last_error}" - ) - - -def create_rotational_embedder( - endpoints_config: List[Dict[str, Any]], - strategy: str = "latency_aware", - default_cooldown: float = 60.0, -) -> RotationalEmbedder: - """Factory function to create RotationalEmbedder from config dicts. - - Args: - endpoints_config: List of endpoint configuration dicts with keys: - - model: Model identifier (required) - - api_key: API key (optional) - - api_base: API base URL (optional) - - weight: Request weight (optional, default 1.0) - - max_concurrent: Max concurrent requests (optional, default 4) - strategy: Selection strategy name (round_robin, latency_aware, weighted_random) - default_cooldown: Default cooldown seconds for rate limits - - Returns: - Configured RotationalEmbedder instance - - Example config: - endpoints_config = [ - {"model": "openai/text-embedding-3-small", "api_key": "sk-..."}, - {"model": "azure/my-embedding", "api_base": "https://...", "api_key": "..."}, - ] - """ - endpoints = [] - for cfg in endpoints_config: - endpoints.append(EndpointConfig( - model=cfg["model"], - api_key=cfg.get("api_key"), - api_base=cfg.get("api_base"), - weight=cfg.get("weight", 1.0), - max_concurrent=cfg.get("max_concurrent", 4), - )) - - strategy_enum = SelectionStrategy[strategy.upper()] - - return RotationalEmbedder( - endpoints=endpoints, - strategy=strategy_enum, - default_cooldown=default_cooldown, - ) diff --git a/codex-lens/src/codexlens/semantic/vector_store.py b/codex-lens/src/codexlens/semantic/vector_store.py deleted file mode 100644 index 1dad8fbe..00000000 --- a/codex-lens/src/codexlens/semantic/vector_store.py +++ /dev/null @@ -1,1278 +0,0 @@ -"""Vector storage and similarity search for semantic chunks. - -Optimized for high-performance similarity search using: -- HNSW index for O(log N) approximate nearest neighbor search (primary) -- Cached embedding matrix for batch operations (fallback) -- NumPy vectorized cosine similarity (fallback, 100x+ faster than loops) -- Lazy content loading (only fetch for top-k results) -""" - -from __future__ import annotations - -import json -import logging -import sys -import sqlite3 -import threading -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple - -from codexlens.entities import SearchResult, SemanticChunk -from codexlens.errors import StorageError - -try: - import numpy as np - NUMPY_AVAILABLE = True -except ImportError: - np = None # type: ignore[assignment] - NUMPY_AVAILABLE = False - -# Try to import ANN index (optional hnswlib dependency) -try: - from codexlens.semantic.ann_index import ( - ANNIndex, - BinaryANNIndex, - create_ann_index, - HNSWLIB_AVAILABLE, - ) -except ImportError: - HNSWLIB_AVAILABLE = False - ANNIndex = None - BinaryANNIndex = None - create_ann_index = None - - -logger = logging.getLogger(__name__) - -# Epsilon used to guard against floating point precision edge cases (e.g., near-zero norms). -EPSILON = 1e-10 - -# SQLite INTEGER PRIMARY KEY uses signed 64-bit rowids. -SQLITE_INTEGER_MAX = (1 << 63) - 1 - - -def _validate_chunk_id_range(start_id: int, count: int) -> None: - """Validate that a batch insert can safely generate sequential chunk IDs.""" - if count <= 0: - return - - last_id = start_id + count - 1 - if last_id > sys.maxsize or last_id > SQLITE_INTEGER_MAX: - raise ValueError( - "Chunk ID range overflow: " - f"start_id={start_id}, count={count} would allocate up to {last_id}, " - f"exceeding limits (sys.maxsize={sys.maxsize}, sqlite_max={SQLITE_INTEGER_MAX}). " - "Consider cleaning up the index database or creating a new index database." - ) - - -def _validate_sql_placeholders(placeholders: str, expected_count: int) -> None: - """Validate the placeholder string used for a parameterized SQL IN clause.""" - expected = ",".join("?" * expected_count) - if placeholders != expected: - raise ValueError( - "Invalid SQL placeholders for IN clause. " - f"Expected {expected_count} '?' placeholders." - ) - - -def _cosine_similarity(a: List[float], b: List[float]) -> float: - """Compute cosine similarity between two vectors.""" - if not NUMPY_AVAILABLE: - raise ImportError("numpy required for vector operations") - - a_arr = np.array(a) - b_arr = np.array(b) - - norm_a = np.linalg.norm(a_arr) - norm_b = np.linalg.norm(b_arr) - - # Use epsilon tolerance to avoid division by (near-)zero due to floating point precision. - if norm_a < EPSILON or norm_b < EPSILON: - return 0.0 - - denom = norm_a * norm_b - if denom < EPSILON: - return 0.0 - - return float(np.dot(a_arr, b_arr) / denom) - - -class VectorStore: - """SQLite-based vector storage with HNSW-accelerated similarity search. - - Performance optimizations: - - HNSW index for O(log N) approximate nearest neighbor search - - Embedding matrix cached in memory for batch similarity computation (fallback) - - NumPy vectorized operations instead of Python loops (fallback) - - Lazy content loading - only fetch full content for top-k results - - Thread-safe cache invalidation - - Bulk insert mode for efficient batch operations - """ - - # Default embedding dimension (used when creating new index) - DEFAULT_DIM = 768 - - def __init__(self, db_path: str | Path) -> None: - if not NUMPY_AVAILABLE: - raise ImportError( - "Semantic search dependencies not available. " - "Install with: pip install codexlens[semantic]" - ) - - self.db_path = Path(db_path) - self.db_path.parent.mkdir(parents=True, exist_ok=True) - - # Embedding cache for fast similarity search (fallback) - self._cache_lock = threading.RLock() - self._embedding_matrix: Optional[np.ndarray] = None - self._embedding_norms: Optional[np.ndarray] = None - self._chunk_ids: Optional[List[int]] = None - self._cache_version: int = 0 - - # ANN index for O(log N) search - self._ann_index: Optional[ANNIndex] = None - self._ann_dim: Optional[int] = None - self._ann_write_lock = threading.Lock() # Protects ANN index modifications - - # Bulk insert mode tracking - self._bulk_insert_mode: bool = False - self._bulk_insert_ids: List[int] = [] - self._bulk_insert_embeddings: List[np.ndarray] = [] - - self._init_schema() - self._init_ann_index() - - def _init_schema(self) -> None: - """Initialize vector storage schema.""" - with sqlite3.connect(self.db_path) as conn: - # Enable memory mapping for faster reads - conn.execute("PRAGMA mmap_size = 30000000000") # 30GB limit - conn.execute(""" - CREATE TABLE IF NOT EXISTS semantic_chunks ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - file_path TEXT NOT NULL, - content TEXT NOT NULL, - embedding BLOB NOT NULL, - metadata TEXT, - category TEXT DEFAULT 'code', - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - ) - """) - conn.execute(""" - CREATE INDEX IF NOT EXISTS idx_chunks_file - ON semantic_chunks(file_path) - """) - conn.execute(""" - CREATE INDEX IF NOT EXISTS idx_chunks_category - ON semantic_chunks(category) - """) - # Model configuration table - tracks which model generated the embeddings - conn.execute(""" - CREATE TABLE IF NOT EXISTS embeddings_config ( - id INTEGER PRIMARY KEY CHECK (id = 1), - model_profile TEXT NOT NULL, - model_name TEXT NOT NULL, - embedding_dim INTEGER NOT NULL, - backend TEXT NOT NULL DEFAULT 'fastembed', - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - ) - """) - - # Migration: Add backend column to existing tables - self._migrate_backend_column(conn) - # Migration: Add category column - self._migrate_category_column(conn) - - conn.commit() - - def _migrate_backend_column(self, conn: sqlite3.Connection) -> None: - """Add backend column to existing embeddings_config table if not present. - - Args: - conn: Active SQLite connection - """ - # Check if backend column exists - cursor = conn.execute("PRAGMA table_info(embeddings_config)") - columns = [row[1] for row in cursor.fetchall()] - - if 'backend' not in columns: - logger.info("Migrating embeddings_config table: adding backend column") - conn.execute(""" - ALTER TABLE embeddings_config - ADD COLUMN backend TEXT NOT NULL DEFAULT 'fastembed' - """) - - def _migrate_category_column(self, conn: sqlite3.Connection) -> None: - """Add category column to existing semantic_chunks table if not present. - - Args: - conn: Active SQLite connection - """ - # Check if category column exists - cursor = conn.execute("PRAGMA table_info(semantic_chunks)") - columns = [row[1] for row in cursor.fetchall()] - - if 'category' not in columns: - logger.info("Migrating semantic_chunks table: adding category column") - conn.execute(""" - ALTER TABLE semantic_chunks - ADD COLUMN category TEXT DEFAULT 'code' - """) - # Create index for fast category filtering - conn.execute(""" - CREATE INDEX IF NOT EXISTS idx_chunks_category - ON semantic_chunks(category) - """) - - def _init_ann_index(self) -> None: - """Initialize ANN index (lazy loading from existing data).""" - if not HNSWLIB_AVAILABLE: - logger.debug("hnswlib not available, using brute-force search") - return - - # Try to detect embedding dimension from existing data - dim = self._detect_embedding_dim() - if dim is None: - # No data yet, will initialize on first add - logger.debug("No embeddings found, ANN index will be created on first add") - return - - self._ann_dim = dim - - try: - self._ann_index = ANNIndex(self.db_path, dim) - if self._ann_index.load(): - logger.debug( - "Loaded ANN index with %d vectors", self._ann_index.count() - ) - else: - # Index file doesn't exist, try to build from SQLite data - logger.debug("ANN index file not found, rebuilding from SQLite") - self._rebuild_ann_index_internal() - except Exception as e: - logger.warning("Failed to initialize ANN index: %s", e) - self._ann_index = None - - def _detect_embedding_dim(self) -> Optional[int]: - """Detect embedding dimension from existing data.""" - with sqlite3.connect(self.db_path) as conn: - row = conn.execute( - "SELECT embedding FROM semantic_chunks LIMIT 1" - ).fetchone() - if row and row[0]: - # Embedding is stored as float32 blob - blob = row[0] - return len(blob) // np.dtype(np.float32).itemsize - return None - - @property - def dimension(self) -> Optional[int]: - """Return the dimension of embeddings in the store. - - Returns: - Embedding dimension if available, None if store is empty. - """ - if self._ann_dim is not None: - return self._ann_dim - self._ann_dim = self._detect_embedding_dim() - return self._ann_dim - - def _rebuild_ann_index_internal(self) -> int: - """Internal method to rebuild ANN index from SQLite data.""" - if self._ann_index is None: - return 0 - - with sqlite3.connect(self.db_path) as conn: - conn.execute("PRAGMA mmap_size = 30000000000") - rows = conn.execute( - "SELECT id, embedding FROM semantic_chunks" - ).fetchall() - - if not rows: - return 0 - - # Extract IDs and embeddings - ids = [r[0] for r in rows] - embeddings = np.vstack([ - np.frombuffer(r[1], dtype=np.float32) for r in rows - ]) - - # Add to ANN index - self._ann_index.add_vectors(ids, embeddings) - self._ann_index.save() - - logger.info("Rebuilt ANN index with %d vectors", len(ids)) - return len(ids) - - def rebuild_ann_index(self) -> int: - """Rebuild HNSW index from all chunks in SQLite. - - Use this method to: - - Migrate existing data to use ANN search - - Repair corrupted index - - Reclaim space after many deletions - - Returns: - Number of vectors indexed. - """ - if not HNSWLIB_AVAILABLE: - logger.warning("hnswlib not available, cannot rebuild ANN index") - return 0 - - # Detect dimension - dim = self._detect_embedding_dim() - if dim is None: - logger.warning("No embeddings found, cannot rebuild ANN index") - return 0 - - self._ann_dim = dim - - # Create new index - try: - self._ann_index = ANNIndex(self.db_path, dim) - return self._rebuild_ann_index_internal() - except Exception as e: - logger.error("Failed to rebuild ANN index: %s", e) - self._ann_index = None - return 0 - - def _invalidate_cache(self) -> None: - """Invalidate the embedding cache (thread-safe).""" - with self._cache_lock: - self._embedding_matrix = None - self._embedding_norms = None - self._chunk_ids = None - self._cache_version += 1 - - def _refresh_cache(self) -> bool: - """Load embeddings into numpy matrix for fast similarity search. - - Returns: - True if cache was refreshed successfully, False if no data. - """ - with self._cache_lock: - with sqlite3.connect(self.db_path) as conn: - conn.execute("PRAGMA mmap_size = 30000000000") - rows = conn.execute( - "SELECT id, embedding FROM semantic_chunks" - ).fetchall() - - if not rows: - self._embedding_matrix = None - self._embedding_norms = None - self._chunk_ids = None - return False - - # Extract IDs and embeddings - self._chunk_ids = [r[0] for r in rows] - - # Bulk convert binary blobs to numpy matrix - embeddings = [ - np.frombuffer(r[1], dtype=np.float32) for r in rows - ] - self._embedding_matrix = np.vstack(embeddings) - - # Pre-compute norms for faster similarity calculation - self._embedding_norms = np.linalg.norm( - self._embedding_matrix, axis=1, keepdims=True - ) - # Avoid division by zero - self._embedding_norms = np.where( - self._embedding_norms == 0, EPSILON, self._embedding_norms - ) - - return True - - def _ensure_ann_index(self, dim: int) -> bool: - """Ensure ANN index is initialized with correct dimension. - - This method is thread-safe and uses double-checked locking. - - Args: - dim: Embedding dimension - - Returns: - True if ANN index is ready, False otherwise - """ - if not HNSWLIB_AVAILABLE: - return False - - # Fast path: index already initialized (no lock needed) - if self._ann_index is not None: - return True - - # Slow path: acquire lock for initialization - with self._ann_write_lock: - # Double-check after acquiring lock - if self._ann_index is not None: - return True - - try: - self._ann_dim = dim - self._ann_index = ANNIndex(self.db_path, dim) - self._ann_index.load() # Try to load existing - return True - except Exception as e: - logger.warning("Failed to initialize ANN index: %s", e) - self._ann_index = None - return False - - def add_chunk( - self, chunk: SemanticChunk, file_path: str, category: str = "code" - ) -> int: - """Add a single chunk with its embedding. - - Args: - chunk: SemanticChunk with embedding - file_path: Path to the source file - category: File category ('code' or 'doc'), default 'code' - - Returns: - The inserted chunk ID. - """ - if chunk.embedding is None: - raise ValueError("Chunk must have embedding before adding to store") - - embedding_arr = np.array(chunk.embedding, dtype=np.float32) - embedding_blob = embedding_arr.tobytes() - metadata_json = json.dumps(chunk.metadata) if chunk.metadata else None - - with sqlite3.connect(self.db_path) as conn: - cursor = conn.execute( - """ - INSERT INTO semantic_chunks (file_path, content, embedding, metadata, category) - VALUES (?, ?, ?, ?, ?) - """, - (file_path, chunk.content, embedding_blob, metadata_json, category) - ) - conn.commit() - chunk_id = cursor.lastrowid or 0 - - # Add to ANN index - if self._ensure_ann_index(len(chunk.embedding)): - with self._ann_write_lock: - try: - self._ann_index.add_vectors([chunk_id], embedding_arr.reshape(1, -1)) - self._ann_index.save() - except Exception as e: - logger.warning("Failed to add to ANN index: %s", e) - - # Invalidate cache after modification - self._invalidate_cache() - return chunk_id - - def add_chunks( - self, chunks: List[SemanticChunk], file_path: str, category: str = "code" - ) -> List[int]: - """Add multiple chunks with embeddings (batch insert). - - Args: - chunks: List of SemanticChunk objects with embeddings - file_path: Path to the source file - category: File category ('code' or 'doc'), default 'code' - - Returns: - List of inserted chunk IDs. - """ - if not chunks: - return [] - - # Prepare batch data - batch_data = [] - embeddings_list = [] - for chunk in chunks: - if chunk.embedding is None: - raise ValueError("All chunks must have embeddings") - embedding_arr = np.array(chunk.embedding, dtype=np.float32) - embedding_blob = embedding_arr.tobytes() - metadata_json = json.dumps(chunk.metadata) if chunk.metadata else None - batch_data.append((file_path, chunk.content, embedding_blob, metadata_json, category)) - embeddings_list.append(embedding_arr) - - # Batch insert to SQLite - with sqlite3.connect(self.db_path) as conn: - # Get starting ID before insert - row = conn.execute("SELECT MAX(id) FROM semantic_chunks").fetchone() - start_id = (row[0] or 0) + 1 - - conn.executemany( - """ - INSERT INTO semantic_chunks (file_path, content, embedding, metadata, category) - VALUES (?, ?, ?, ?, ?) - """, - batch_data - ) - conn.commit() - # Calculate inserted IDs based on starting ID - ids = list(range(start_id, start_id + len(chunks))) - - # Add to ANN index - if embeddings_list and self._ensure_ann_index(len(embeddings_list[0])): - with self._ann_write_lock: - try: - embeddings_matrix = np.vstack(embeddings_list) - self._ann_index.add_vectors(ids, embeddings_matrix) - self._ann_index.save() - except Exception as e: - logger.warning("Failed to add batch to ANN index: %s", e) - - # Invalidate cache after modification - self._invalidate_cache() - return ids - - def add_chunks_batch( - self, - chunks_with_paths: List[Tuple[SemanticChunk, str]], - update_ann: bool = True, - auto_save_ann: bool = True, - categories: Optional[List[str]] = None, - ) -> List[int]: - """Batch insert chunks from multiple files in a single transaction. - - This method is optimized for bulk operations during index generation. - - Args: - chunks_with_paths: List of (chunk, file_path) tuples - update_ann: If True, update ANN index with new vectors (default: True) - auto_save_ann: If True, save ANN index after update (default: True). - Set to False for bulk inserts to reduce I/O overhead. - categories: Optional list of categories per chunk. If None, defaults to 'code'. - If provided, must match length of chunks_with_paths. - - Returns: - List of inserted chunk IDs - """ - if not chunks_with_paths: - return [] - - batch_size = len(chunks_with_paths) - - # Validate categories if provided - if categories is not None and len(categories) != batch_size: - raise ValueError( - f"categories length ({len(categories)}) must match " - f"chunks_with_paths length ({batch_size})" - ) - - # Prepare batch data - batch_data = [] - embeddings_list = [] - for i, (chunk, file_path) in enumerate(chunks_with_paths): - if chunk.embedding is None: - raise ValueError("All chunks must have embeddings") - # Optimize: avoid repeated np.array() if already numpy - if isinstance(chunk.embedding, np.ndarray): - embedding_arr = chunk.embedding.astype(np.float32) - else: - embedding_arr = np.array(chunk.embedding, dtype=np.float32) - embedding_blob = embedding_arr.tobytes() - metadata_json = json.dumps(chunk.metadata) if chunk.metadata else None - category = categories[i] if categories else "code" - batch_data.append((file_path, chunk.content, embedding_blob, metadata_json, category)) - embeddings_list.append(embedding_arr) - - # Batch insert to SQLite in single transaction - with sqlite3.connect(self.db_path) as conn: - # Get starting ID before insert - row = conn.execute("SELECT MAX(id) FROM semantic_chunks").fetchone() - start_id = (row[0] or 0) + 1 - - _validate_chunk_id_range(start_id, batch_size) - - conn.executemany( - """ - INSERT INTO semantic_chunks (file_path, content, embedding, metadata, category) - VALUES (?, ?, ?, ?, ?) - """, - batch_data - ) - conn.commit() - # Calculate inserted IDs based on starting ID - ids = list(range(start_id, start_id + batch_size)) - - # Handle ANN index updates - if embeddings_list and update_ann and self._ensure_ann_index(len(embeddings_list[0])): - with self._ann_write_lock: - # In bulk insert mode, accumulate for later batch update - if self._bulk_insert_mode: - self._bulk_insert_ids.extend(ids) - self._bulk_insert_embeddings.extend(embeddings_list) - else: - # Normal mode: update immediately - try: - embeddings_matrix = np.vstack(embeddings_list) - self._ann_index.add_vectors(ids, embeddings_matrix) - if auto_save_ann: - self._ann_index.save() - except Exception as e: - logger.warning("Failed to add batch to ANN index: %s", e) - - # Invalidate cache after modification - self._invalidate_cache() - return ids - - def add_chunks_batch_numpy( - self, - chunks_with_paths: List[Tuple[SemanticChunk, str]], - embeddings_matrix: np.ndarray, - update_ann: bool = True, - auto_save_ann: bool = True, - categories: Optional[List[str]] = None, - ) -> List[int]: - """Batch insert chunks with pre-computed numpy embeddings matrix. - - This method accepts embeddings as a numpy matrix to avoid list->array conversions. - Useful when embeddings are already in numpy format from batch encoding. - - Args: - chunks_with_paths: List of (chunk, file_path) tuples (embeddings can be None) - embeddings_matrix: Pre-computed embeddings as (N, D) numpy array - update_ann: If True, update ANN index with new vectors (default: True) - auto_save_ann: If True, save ANN index after update (default: True) - categories: Optional list of categories per chunk. If None, defaults to 'code'. - - Returns: - List of inserted chunk IDs - """ - if not chunks_with_paths: - return [] - - batch_size = len(chunks_with_paths) - - if len(chunks_with_paths) != embeddings_matrix.shape[0]: - raise ValueError( - f"Mismatch: {len(chunks_with_paths)} chunks but " - f"{embeddings_matrix.shape[0]} embeddings" - ) - - # Validate categories if provided - if categories is not None and len(categories) != batch_size: - raise ValueError( - f"categories length ({len(categories)}) must match " - f"chunks_with_paths length ({batch_size})" - ) - - # Ensure float32 format - embeddings_matrix = embeddings_matrix.astype(np.float32) - - # Prepare batch data - batch_data = [] - for i, (chunk, file_path) in enumerate(chunks_with_paths): - embedding_arr = embeddings_matrix[i] - embedding_blob = embedding_arr.tobytes() - metadata_json = json.dumps(chunk.metadata) if chunk.metadata else None - category = categories[i] if categories else "code" - batch_data.append((file_path, chunk.content, embedding_blob, metadata_json, category)) - - # Batch insert to SQLite in single transaction - with sqlite3.connect(self.db_path) as conn: - # Get starting ID before insert - row = conn.execute("SELECT MAX(id) FROM semantic_chunks").fetchone() - start_id = (row[0] or 0) + 1 - - _validate_chunk_id_range(start_id, batch_size) - - conn.executemany( - """ - INSERT INTO semantic_chunks (file_path, content, embedding, metadata, category) - VALUES (?, ?, ?, ?, ?) - """, - batch_data - ) - conn.commit() - # Calculate inserted IDs based on starting ID - ids = list(range(start_id, start_id + batch_size)) - - # Handle ANN index updates - if update_ann and self._ensure_ann_index(embeddings_matrix.shape[1]): - with self._ann_write_lock: - # In bulk insert mode, accumulate for later batch update - if self._bulk_insert_mode: - self._bulk_insert_ids.extend(ids) - # Split matrix into individual arrays for accumulation - self._bulk_insert_embeddings.extend([embeddings_matrix[i] for i in range(len(ids))]) - else: - # Normal mode: update immediately - try: - self._ann_index.add_vectors(ids, embeddings_matrix) - if auto_save_ann: - self._ann_index.save() - except Exception as e: - logger.warning("Failed to add batch to ANN index: %s", e) - - # Invalidate cache after modification - self._invalidate_cache() - return ids - - def begin_bulk_insert(self) -> None: - """Begin bulk insert mode - disable ANN auto-update for better performance. - - Usage: - store.begin_bulk_insert() - try: - for batch in batches: - store.add_chunks_batch(batch, auto_save_ann=False) - finally: - store.end_bulk_insert() - - Or use context manager: - with store.bulk_insert(): - for batch in batches: - store.add_chunks_batch(batch) - """ - with self._ann_write_lock: - self._bulk_insert_mode = True - self._bulk_insert_ids.clear() - self._bulk_insert_embeddings.clear() - logger.debug("Entered bulk insert mode") - - def end_bulk_insert(self) -> None: - """End bulk insert mode and rebuild ANN index from accumulated data. - - This method should be called after all bulk inserts are complete to - update the ANN index in a single batch operation. - """ - with self._ann_write_lock: - if not self._bulk_insert_mode: - logger.warning("end_bulk_insert called but not in bulk insert mode") - return - - self._bulk_insert_mode = False - bulk_ids = list(self._bulk_insert_ids) - bulk_embeddings = list(self._bulk_insert_embeddings) - self._bulk_insert_ids.clear() - self._bulk_insert_embeddings.clear() - - # Update ANN index with accumulated data. - if bulk_ids and bulk_embeddings: - if self._ensure_ann_index(len(bulk_embeddings[0])): - with self._ann_write_lock: - try: - embeddings_matrix = np.vstack(bulk_embeddings) - self._ann_index.add_vectors(bulk_ids, embeddings_matrix) - self._ann_index.save() - logger.info( - "Bulk insert complete: added %d vectors to ANN index", - len(bulk_ids), - ) - except Exception as e: - logger.error("Failed to update ANN index after bulk insert: %s", e) - - logger.debug("Exited bulk insert mode") - - class BulkInsertContext: - """Context manager for bulk insert operations.""" - - def __init__(self, store: "VectorStore") -> None: - self.store = store - - def __enter__(self) -> "VectorStore": - self.store.begin_bulk_insert() - return self.store - - def __exit__(self, exc_type, exc_val, exc_tb) -> None: - self.store.end_bulk_insert() - - def bulk_insert(self) -> "VectorStore.BulkInsertContext": - """Return a context manager for bulk insert operations. - - Usage: - with store.bulk_insert(): - for batch in batches: - store.add_chunks_batch(batch) - """ - return self.BulkInsertContext(self) - - def delete_file_chunks(self, file_path: str) -> int: - """Delete all chunks for a file. - - Returns: - Number of deleted chunks. - """ - # Get chunk IDs before deletion (for ANN index) - chunk_ids_to_delete = [] - if self._ann_index is not None: - with sqlite3.connect(self.db_path) as conn: - rows = conn.execute( - "SELECT id FROM semantic_chunks WHERE file_path = ?", - (file_path,) - ).fetchall() - chunk_ids_to_delete = [r[0] for r in rows] - - # Delete from SQLite - with sqlite3.connect(self.db_path) as conn: - cursor = conn.execute( - "DELETE FROM semantic_chunks WHERE file_path = ?", - (file_path,) - ) - conn.commit() - deleted = cursor.rowcount - - # Remove from ANN index - if deleted > 0 and self._ann_index is not None and chunk_ids_to_delete: - with self._ann_write_lock: - try: - self._ann_index.remove_vectors(chunk_ids_to_delete) - self._ann_index.save() - except Exception as e: - logger.warning("Failed to remove from ANN index: %s", e) - - if deleted > 0: - self._invalidate_cache() - return deleted - - def search_similar( - self, - query_embedding: List[float], - top_k: int = 10, - min_score: float = 0.0, - return_full_content: bool = True, - category: Optional[str] = None, - ) -> List[SearchResult]: - """Find chunks most similar to query embedding. - - Uses HNSW index for O(log N) search when available, falls back to - brute-force NumPy search otherwise. - - Args: - query_embedding: Query vector. - top_k: Maximum results to return. - min_score: Minimum cosine similarity score in [0.0, 1.0]. - return_full_content: If True, return full code block content. - category: Optional category filter ('code' or 'doc'). If None, returns all. - - Returns: - List of SearchResult ordered by similarity (highest first). - """ - query_vec = np.array(query_embedding, dtype=np.float32) - - if not 0.0 <= min_score <= 1.0: - raise ValueError( - f"Invalid min_score: {min_score}. Must be within [0.0, 1.0] for cosine similarity." - ) - - # Try HNSW search first (O(log N)) - if ( - HNSWLIB_AVAILABLE - and self._ann_index is not None - and self._ann_index.is_loaded - and self._ann_index.count() > 0 - ): - try: - return self._search_with_ann( - query_vec, top_k, min_score, return_full_content, category - ) - except Exception as e: - logger.warning("ANN search failed, falling back to brute-force: %s", e) - - # Fallback to brute-force search (O(N)) - return self._search_brute_force( - query_vec, top_k, min_score, return_full_content, category - ) - - def _search_with_ann( - self, - query_vec: np.ndarray, - top_k: int, - min_score: float, - return_full_content: bool, - category: Optional[str] = None, - ) -> List[SearchResult]: - """Search using HNSW index (O(log N)). - - Args: - query_vec: Query vector as numpy array - top_k: Maximum results to return - min_score: Minimum cosine similarity score in [0.0, 1.0] - return_full_content: If True, return full code block content - category: Optional category filter ('code' or 'doc') - - Returns: - List of SearchResult ordered by similarity (highest first) - """ - # Limit top_k to available vectors to prevent hnswlib error - ann_count = self._ann_index.count() - # When category filtering, fetch more candidates to compensate for filtering - fetch_k = top_k * 3 if category else top_k - effective_top_k = min(fetch_k, ann_count) if ann_count > 0 else 0 - - if effective_top_k == 0: - return [] - - # HNSW search returns (ids, distances) - # For cosine space: distance = 1 - similarity - ids, distances = self._ann_index.search(query_vec, effective_top_k) - - if ids is None or distances is None: - logger.debug( - "ANN search returned null results (ids=%s, distances=%s)", - ids, - distances, - ) - return [] - - if len(ids) == 0 or len(distances) == 0: - logger.debug( - "ANN search returned empty results (ids=%s, distances=%s)", - ids, - distances, - ) - return [] - - if len(ids) != len(distances): - logger.warning( - "ANN search returned mismatched result lengths (%d ids, %d distances)", - len(ids), - len(distances), - ) - return [] - - # Convert distances to similarity scores - scores = [1.0 - d for d in distances] - - # Filter by min_score - filtered = [ - (chunk_id, score) - for chunk_id, score in zip(ids, scores) - if score >= min_score - ] - - if not filtered: - return [] - - top_ids = [f[0] for f in filtered] - top_scores = [f[1] for f in filtered] - - # Fetch content from SQLite with category filtering - results = self._fetch_results_by_ids( - top_ids, top_scores, return_full_content, category - ) - # Apply final limit after category filtering - return results[:top_k] - - def _search_brute_force( - self, - query_vec: np.ndarray, - top_k: int, - min_score: float, - return_full_content: bool, - category: Optional[str] = None, - ) -> List[SearchResult]: - """Brute-force search using NumPy (O(N) fallback). - - Args: - query_vec: Query vector as numpy array - top_k: Maximum results to return - min_score: Minimum cosine similarity score in [0.0, 1.0] - return_full_content: If True, return full code block content - category: Optional category filter ('code' or 'doc') - - Returns: - List of SearchResult ordered by similarity (highest first) - """ - logger.warning( - "Using brute-force vector search (hnswlib not available). " - "This may cause high memory usage for large indexes. " - "Install hnswlib for better performance: pip install hnswlib" - ) - - with self._cache_lock: - # Refresh cache if needed - if self._embedding_matrix is None: - if not self._refresh_cache(): - return [] # No data - - # Vectorized cosine similarity - query_vec = query_vec.reshape(1, -1) - query_norm = np.linalg.norm(query_vec) - if query_norm == 0: - return [] - - # Compute all similarities at once: (N,) scores - # similarity = (A @ B.T) / (||A|| * ||B||) - dot_products = np.dot(self._embedding_matrix, query_vec.T).flatten() - scores = dot_products / (self._embedding_norms.flatten() * query_norm) - - # Filter by min_score and get top-k indices - valid_mask = scores >= min_score - valid_indices = np.where(valid_mask)[0] - - if len(valid_indices) == 0: - return [] - - # When category filtering, fetch more candidates to compensate for filtering - fetch_k = top_k * 3 if category else top_k - - # Sort by score descending and take top candidates - valid_scores = scores[valid_indices] - sorted_order = np.argsort(valid_scores)[::-1][:fetch_k] - top_indices = valid_indices[sorted_order] - top_scores = valid_scores[sorted_order] - - # Get chunk IDs for top results - top_ids = [self._chunk_ids[i] for i in top_indices] - - # Fetch content only for top-k results (lazy loading) with category filtering - results = self._fetch_results_by_ids( - top_ids, top_scores.tolist(), return_full_content, category - ) - # Apply final limit after category filtering - return results[:top_k] - - def _fetch_results_by_ids( - self, - chunk_ids: List[int], - scores: List[float], - return_full_content: bool, - category: Optional[str] = None, - ) -> List[SearchResult]: - """Fetch full result data for specific chunk IDs. - - Args: - chunk_ids: List of chunk IDs to fetch. - scores: Corresponding similarity scores. - return_full_content: Whether to include full content. - category: Optional category filter ('code' or 'doc'). - - Returns: - List of SearchResult objects. - """ - if not chunk_ids: - return [] - - # Build parameterized query for IN clause - placeholders = ",".join("?" * len(chunk_ids)) - _validate_sql_placeholders(placeholders, len(chunk_ids)) - - # SQL injection prevention: - # - Only a validated placeholders string (commas + '?') is interpolated into the query. - # - User-provided values are passed separately via sqlite3 parameters. - # - Category filter is added as a separate parameter - if category: - query = """ - SELECT id, file_path, content, metadata - FROM semantic_chunks - WHERE id IN ({placeholders}) AND category = ? - """.format(placeholders=placeholders) - params = list(chunk_ids) + [category] - else: - query = """ - SELECT id, file_path, content, metadata - FROM semantic_chunks - WHERE id IN ({placeholders}) - """.format(placeholders=placeholders) - params = chunk_ids - - with sqlite3.connect(self.db_path) as conn: - conn.execute("PRAGMA mmap_size = 30000000000") - rows = conn.execute(query, params).fetchall() - - # Build ID -> row mapping - id_to_row = {r[0]: r for r in rows} - - results = [] - for chunk_id, score in zip(chunk_ids, scores): - row = id_to_row.get(chunk_id) - if not row: - continue - - _, file_path, content, metadata_json = row - metadata = json.loads(metadata_json) if metadata_json else {} - - # Build excerpt (short preview) - excerpt = content[:200] + "..." if len(content) > 200 else content - - # Extract symbol information from metadata - symbol_name = metadata.get("symbol_name") - symbol_kind = metadata.get("symbol_kind") - start_line = metadata.get("start_line") - end_line = metadata.get("end_line") - - # Build Symbol object if we have symbol info - symbol = None - if symbol_name and symbol_kind and start_line and end_line: - try: - from codexlens.entities import Symbol - symbol = Symbol( - name=symbol_name, - kind=symbol_kind, - range=(start_line, end_line) - ) - except Exception: - pass - - results.append(SearchResult( - path=file_path, - score=score, - excerpt=excerpt, - content=content if return_full_content else None, - symbol=symbol, - metadata=metadata, - start_line=start_line, - end_line=end_line, - symbol_name=symbol_name, - symbol_kind=symbol_kind, - )) - - return results - - def count_chunks(self) -> int: - """Count total chunks in store.""" - with sqlite3.connect(self.db_path) as conn: - row = conn.execute("SELECT COUNT(*) FROM semantic_chunks").fetchone() - return row[0] if row else 0 - - def get_all_chunks(self) -> List[SemanticChunk]: - """Get all chunks from the store. - - Returns: - List of SemanticChunk objects with id and content. - """ - with sqlite3.connect(self.db_path) as conn: - conn.row_factory = sqlite3.Row - rows = conn.execute( - "SELECT id, file_path, content, metadata FROM semantic_chunks" - ).fetchall() - - chunks = [] - for row in rows: - chunks.append(SemanticChunk( - id=row["id"], - content=row["content"], - file_path=row["file_path"], - metadata=json.loads(row["metadata"]) if row["metadata"] else None, - )) - return chunks - - def clear_cache(self) -> None: - """Manually clear the embedding cache.""" - self._invalidate_cache() - - @property - def ann_available(self) -> bool: - """Check if ANN index is available and ready.""" - return ( - HNSWLIB_AVAILABLE - and self._ann_index is not None - and self._ann_index.is_loaded - ) - - @property - def ann_count(self) -> int: - """Get number of vectors in ANN index.""" - if self._ann_index is not None: - return self._ann_index.count() - return 0 - - def get_model_config(self) -> Optional[Dict[str, Any]]: - """Get the model configuration used for embeddings in this store. - - Returns: - Dictionary with model_profile, model_name, embedding_dim, backend, or None if not set. - """ - with sqlite3.connect(self.db_path) as conn: - row = conn.execute( - "SELECT model_profile, model_name, embedding_dim, backend, created_at, updated_at " - "FROM embeddings_config WHERE id = 1" - ).fetchone() - if row: - return { - "model_profile": row[0], - "model_name": row[1], - "embedding_dim": row[2], - "backend": row[3], - "created_at": row[4], - "updated_at": row[5], - } - return None - - def set_model_config( - self, model_profile: str, model_name: str, embedding_dim: int, backend: str = 'fastembed' - ) -> None: - """Set the model configuration for embeddings in this store. - - This should be called when generating new embeddings. If a different - model was previously used, this will update the configuration. - - Args: - model_profile: Model profile name (fast, code, minilm, etc.) - model_name: Full model name (e.g., jinaai/jina-embeddings-v2-base-code) - embedding_dim: Embedding dimension (e.g., 768) - backend: Backend used for embeddings (fastembed or litellm, default: fastembed) - """ - with sqlite3.connect(self.db_path) as conn: - conn.execute( - """ - INSERT INTO embeddings_config (id, model_profile, model_name, embedding_dim, backend) - VALUES (1, ?, ?, ?, ?) - ON CONFLICT(id) DO UPDATE SET - model_profile = excluded.model_profile, - model_name = excluded.model_name, - embedding_dim = excluded.embedding_dim, - backend = excluded.backend, - updated_at = CURRENT_TIMESTAMP - """, - (model_profile, model_name, embedding_dim, backend) - ) - conn.commit() - - def check_model_compatibility( - self, model_profile: str, model_name: str, embedding_dim: int - ) -> Tuple[bool, Optional[str]]: - """Check if the given model is compatible with existing embeddings. - - Args: - model_profile: Model profile to check - model_name: Model name to check - embedding_dim: Embedding dimension to check - - Returns: - Tuple of (is_compatible, warning_message). - is_compatible is True if no existing config or configs match. - warning_message is a user-friendly message if incompatible. - """ - existing = self.get_model_config() - if existing is None: - return True, None - - # Check dimension first (most critical) - if existing["embedding_dim"] != embedding_dim: - return False, ( - f"Dimension mismatch: existing embeddings use {existing['embedding_dim']}d " - f"({existing['model_profile']}), but requested model uses {embedding_dim}d " - f"({model_profile}). Use --force to regenerate all embeddings." - ) - - # Check model (different models with same dimension may have different semantic spaces) - if existing["model_profile"] != model_profile: - return False, ( - f"Model mismatch: existing embeddings use '{existing['model_profile']}' " - f"({existing['model_name']}), but requested '{model_profile}' " - f"({model_name}). Use --force to regenerate all embeddings." - ) - - return True, None - - def close(self) -> None: - """Close the vector store and release resources. - - This ensures SQLite connections are closed and ANN index is cleared, - allowing temporary files to be deleted on Windows. - """ - with self._cache_lock: - self._embedding_matrix = None - self._embedding_norms = None - self._chunk_ids = None - - with self._ann_write_lock: - self._ann_index = None - - def __enter__(self) -> "VectorStore": - """Context manager entry.""" - return self - - def __exit__(self, exc_type, exc_val, exc_tb) -> None: - """Context manager exit - close resources.""" - self.close() diff --git a/codex-lens/src/codexlens/storage/__init__.py b/codex-lens/src/codexlens/storage/__init__.py deleted file mode 100644 index 815bc961..00000000 --- a/codex-lens/src/codexlens/storage/__init__.py +++ /dev/null @@ -1,32 +0,0 @@ -"""Storage backends for CodexLens.""" - -from __future__ import annotations - -from .sqlite_store import SQLiteStore -from .path_mapper import PathMapper -from .registry import RegistryStore, ProjectInfo, DirMapping -from .dir_index import DirIndexStore, SubdirLink, FileEntry -from .index_tree import IndexTreeBuilder, BuildResult, DirBuildResult -from .vector_meta_store import VectorMetadataStore - -__all__ = [ - # Legacy (workspace-local) - "SQLiteStore", - # Path mapping - "PathMapper", - # Global registry - "RegistryStore", - "ProjectInfo", - "DirMapping", - # Directory index - "DirIndexStore", - "SubdirLink", - "FileEntry", - # Tree builder - "IndexTreeBuilder", - "BuildResult", - "DirBuildResult", - # Vector metadata - "VectorMetadataStore", -] - diff --git a/codex-lens/src/codexlens/storage/deepwiki_models.py b/codex-lens/src/codexlens/storage/deepwiki_models.py deleted file mode 100644 index e86665c8..00000000 --- a/codex-lens/src/codexlens/storage/deepwiki_models.py +++ /dev/null @@ -1,120 +0,0 @@ -"""Pydantic models for DeepWiki index storage. - -DeepWiki stores mappings between source files, symbols, and generated documentation -for the DeepWiki documentation generation system. -""" - -from __future__ import annotations - -from datetime import datetime -from typing import Any, List, Optional, Tuple - -from pydantic import BaseModel, Field, field_validator - - -class DeepWikiSymbol(BaseModel): - """A symbol record in the DeepWiki index. - - Maps a code symbol to its generated documentation file and anchor. - """ - - id: Optional[int] = Field(default=None, description="Database row ID") - name: str = Field(..., min_length=1, description="Symbol name (function, class, etc.)") - type: str = Field(..., min_length=1, description="Symbol type (function, class, method, variable)") - source_file: str = Field(..., min_length=1, description="Path to source file containing the symbol") - doc_file: str = Field(..., min_length=1, description="Path to generated documentation file") - anchor: str = Field(..., min_length=1, description="HTML anchor ID for linking to specific section") - line_range: Tuple[int, int] = Field( - ..., - description="(start_line, end_line) in source file, 1-based inclusive" - ) - created_at: Optional[datetime] = Field(default=None, description="Record creation timestamp") - updated_at: Optional[datetime] = Field(default=None, description="Record update timestamp") - staleness_score: float = Field(default=0.0, ge=0.0, le=1.0, description="Staleness score (0.0=fresh, 1.0=stale)") - last_checked_commit: Optional[str] = Field(default=None, description="Git commit hash at last freshness check") - last_checked_at: Optional[float] = Field(default=None, description="Timestamp of last freshness check") - staleness_factors: Optional[dict[str, Any]] = Field(default=None, description="JSON factors contributing to staleness score") - - @field_validator("line_range") - @classmethod - def validate_line_range(cls, value: Tuple[int, int]) -> Tuple[int, int]: - """Validate line range is proper tuple with start <= end.""" - if len(value) != 2: - raise ValueError("line_range must be a (start_line, end_line) tuple") - start_line, end_line = value - if start_line < 1 or end_line < 1: - raise ValueError("line_range lines must be >= 1") - if end_line < start_line: - raise ValueError("end_line must be >= start_line") - return value - - @field_validator("name", "type", "source_file", "doc_file", "anchor") - @classmethod - def strip_and_validate_nonempty(cls, value: str) -> str: - """Strip whitespace and validate non-empty.""" - cleaned = value.strip() - if not cleaned: - raise ValueError("value cannot be blank") - return cleaned - - -class DeepWikiDoc(BaseModel): - """A documentation file record in the DeepWiki index. - - Tracks generated documentation files and their associated symbols. - """ - - id: Optional[int] = Field(default=None, description="Database row ID") - path: str = Field(..., min_length=1, description="Path to documentation file") - content_hash: str = Field(..., min_length=1, description="SHA256 hash of file content for change detection") - symbols: List[str] = Field( - default_factory=list, - description="List of symbol names documented in this file" - ) - generated_at: datetime = Field( - default_factory=datetime.utcnow, - description="Timestamp when documentation was generated" - ) - llm_tool: Optional[str] = Field( - default=None, - description="LLM tool used to generate documentation (gemini/qwen)" - ) - - @field_validator("path", "content_hash") - @classmethod - def strip_and_validate_nonempty(cls, value: str) -> str: - """Strip whitespace and validate non-empty.""" - cleaned = value.strip() - if not cleaned: - raise ValueError("value cannot be blank") - return cleaned - - -class DeepWikiFile(BaseModel): - """A source file record in the DeepWiki index. - - Tracks indexed source files and their content hashes for incremental updates. - """ - - id: Optional[int] = Field(default=None, description="Database row ID") - path: str = Field(..., min_length=1, description="Path to source file") - content_hash: str = Field(..., min_length=1, description="SHA256 hash of file content") - last_indexed: datetime = Field( - default_factory=datetime.utcnow, - description="Timestamp when file was last indexed" - ) - symbols_count: int = Field(default=0, ge=0, description="Number of symbols indexed from this file") - docs_generated: bool = Field(default=False, description="Whether documentation has been generated") - staleness_score: float = Field(default=0.0, ge=0.0, le=1.0, description="Staleness score (0.0=fresh, 1.0=stale)") - last_checked_commit: Optional[str] = Field(default=None, description="Git commit hash at last freshness check") - last_checked_at: Optional[float] = Field(default=None, description="Timestamp of last freshness check") - staleness_factors: Optional[dict[str, Any]] = Field(default=None, description="JSON factors contributing to staleness score") - - @field_validator("path", "content_hash") - @classmethod - def strip_and_validate_nonempty(cls, value: str) -> str: - """Strip whitespace and validate non-empty.""" - cleaned = value.strip() - if not cleaned: - raise ValueError("value cannot be blank") - return cleaned diff --git a/codex-lens/src/codexlens/storage/deepwiki_store.py b/codex-lens/src/codexlens/storage/deepwiki_store.py deleted file mode 100644 index dc7bd32b..00000000 --- a/codex-lens/src/codexlens/storage/deepwiki_store.py +++ /dev/null @@ -1,1404 +0,0 @@ -"""DeepWiki SQLite storage for documentation index. - -Stores mappings between source files, code symbols, and generated documentation -for the DeepWiki documentation generation system. - -Schema: -- deepwiki_files: Tracked source files with content hashes -- deepwiki_docs: Generated documentation files -- deepwiki_symbols: Symbol-to-documentation mappings -""" - -from __future__ import annotations - -import hashlib -import json -import logging -import math # noqa: F401 - used in calculate_staleness_score -import sqlite3 -import threading -import time -from datetime import datetime -from pathlib import Path -from typing import Any, Dict, List, Optional - -from codexlens.errors import StorageError -from codexlens.storage.deepwiki_models import DeepWikiDoc, DeepWikiFile, DeepWikiSymbol - -logger = logging.getLogger(__name__) - - -class DeepWikiStore: - """SQLite storage for DeepWiki documentation index. - - Provides: - - File tracking with content hashes for incremental updates - - Symbol-to-documentation mappings for navigation - - Documentation file metadata tracking - - Thread-safe with connection pooling and WAL mode. - """ - - DEFAULT_DB_PATH = Path.home() / ".codexlens" / "deepwiki_index.db" - SCHEMA_VERSION = 2 - - def __init__(self, db_path: Path | None = None) -> None: - """Initialize DeepWiki store. - - Args: - db_path: Path to SQLite database file. Uses default if None. - """ - self.db_path = (db_path or self.DEFAULT_DB_PATH).resolve() - self._lock = threading.RLock() - self._local = threading.local() - self._pool_lock = threading.Lock() - self._pool: Dict[int, sqlite3.Connection] = {} - self._pool_generation = 0 - - def _get_connection(self) -> sqlite3.Connection: - """Get or create a thread-local database connection. - - Each thread gets its own connection with WAL mode enabled. - """ - thread_id = threading.get_ident() - if getattr(self._local, "generation", None) == self._pool_generation: - conn = getattr(self._local, "conn", None) - if conn is not None: - return conn - - with self._pool_lock: - conn = self._pool.get(thread_id) - if conn is None: - conn = sqlite3.connect(self.db_path, check_same_thread=False) - conn.row_factory = sqlite3.Row - conn.execute("PRAGMA journal_mode=WAL") - conn.execute("PRAGMA synchronous=NORMAL") - conn.execute("PRAGMA foreign_keys=ON") - self._pool[thread_id] = conn - - self._local.conn = conn - self._local.generation = self._pool_generation - return conn - - def close(self) -> None: - """Close all pooled connections.""" - with self._lock: - with self._pool_lock: - for conn in self._pool.values(): - conn.close() - self._pool.clear() - self._pool_generation += 1 - - if hasattr(self._local, "conn"): - self._local.conn = None - if hasattr(self._local, "generation"): - self._local.generation = self._pool_generation - - def __enter__(self) -> DeepWikiStore: - self.initialize() - return self - - def __exit__(self, exc_type: object, exc: object, tb: object) -> None: - self.close() - - def initialize(self) -> None: - """Create database and schema if not exists.""" - with self._lock: - self.db_path.parent.mkdir(parents=True, exist_ok=True) - conn = self._get_connection() - self._create_schema(conn) - - def _create_schema(self, conn: sqlite3.Connection) -> None: - """Create DeepWiki database schema.""" - try: - # Schema version tracking - conn.execute( - """ - CREATE TABLE IF NOT EXISTS deepwiki_schema ( - version INTEGER PRIMARY KEY, - applied_at REAL - ) - """ - ) - - # Files table: track indexed source files - conn.execute( - """ - CREATE TABLE IF NOT EXISTS deepwiki_files ( - id INTEGER PRIMARY KEY, - path TEXT UNIQUE NOT NULL, - content_hash TEXT NOT NULL, - last_indexed REAL NOT NULL, - symbols_count INTEGER DEFAULT 0, - docs_generated INTEGER DEFAULT 0, - staleness_score REAL DEFAULT 0.0, - last_checked_commit TEXT, - last_checked_at REAL, - staleness_factors TEXT - ) - """ - ) - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_deepwiki_files_path ON deepwiki_files(path)" - ) - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_deepwiki_files_hash ON deepwiki_files(content_hash)" - ) - - # Docs table: track generated documentation files - conn.execute( - """ - CREATE TABLE IF NOT EXISTS deepwiki_docs ( - id INTEGER PRIMARY KEY, - path TEXT UNIQUE NOT NULL, - content_hash TEXT NOT NULL, - symbols TEXT DEFAULT '[]', - generated_at REAL NOT NULL, - llm_tool TEXT - ) - """ - ) - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_deepwiki_docs_path ON deepwiki_docs(path)" - ) - - # Symbols table: map source symbols to documentation - conn.execute( - """ - CREATE TABLE IF NOT EXISTS deepwiki_symbols ( - id INTEGER PRIMARY KEY, - name TEXT NOT NULL, - type TEXT NOT NULL, - source_file TEXT NOT NULL, - doc_file TEXT NOT NULL, - anchor TEXT NOT NULL, - start_line INTEGER NOT NULL, - end_line INTEGER NOT NULL, - created_at REAL, - updated_at REAL, - staleness_score REAL DEFAULT 0.0, - last_checked_commit TEXT, - last_checked_at REAL, - staleness_factors TEXT, - UNIQUE(name, source_file) - ) - """ - ) - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_deepwiki_symbols_name ON deepwiki_symbols(name)" - ) - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_deepwiki_symbols_source ON deepwiki_symbols(source_file)" - ) - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_deepwiki_symbols_doc ON deepwiki_symbols(doc_file)" - ) - - # Generation progress table for LLM document generation tracking - conn.execute( - """ - CREATE TABLE IF NOT EXISTS generation_progress ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - symbol_key TEXT NOT NULL UNIQUE, - file_path TEXT NOT NULL, - symbol_name TEXT NOT NULL, - symbol_type TEXT NOT NULL, - layer INTEGER NOT NULL, - source_hash TEXT NOT NULL, - status TEXT NOT NULL DEFAULT 'pending', - attempts INTEGER DEFAULT 0, - last_tool TEXT, - last_error TEXT, - generated_at REAL, - created_at REAL, - updated_at REAL - ) - """ - ) - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_progress_status ON generation_progress(status)" - ) - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_progress_file ON generation_progress(file_path)" - ) - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_progress_hash ON generation_progress(source_hash)" - ) - - # Record schema version - conn.execute( - """ - INSERT OR IGNORE INTO deepwiki_schema(version, applied_at) - VALUES(?, ?) - """, - (self.SCHEMA_VERSION, time.time()), - ) - - # Schema v2 migration: add staleness columns - staleness_columns = [ - ("deepwiki_files", "staleness_score", "REAL DEFAULT 0.0"), - ("deepwiki_files", "last_checked_commit", "TEXT"), - ("deepwiki_files", "last_checked_at", "REAL"), - ("deepwiki_files", "staleness_factors", "TEXT"), - ("deepwiki_symbols", "staleness_score", "REAL DEFAULT 0.0"), - ("deepwiki_symbols", "last_checked_commit", "TEXT"), - ("deepwiki_symbols", "last_checked_at", "REAL"), - ("deepwiki_symbols", "staleness_factors", "TEXT"), - ] - for table, col, col_type in staleness_columns: - try: - conn.execute(f"ALTER TABLE {table} ADD COLUMN {col} {col_type}") - except sqlite3.OperationalError: - pass # Column already exists - - # Legacy migration: some earlier DeepWiki DBs stored timestamps as TEXT (ISO strings). - # better-sqlite3 + JS code expects numeric (REAL) seconds, so ensure timestamp columns - # have REAL affinity by rebuilding affected tables when needed. - self._migrate_text_timestamps_to_real(conn) - - conn.commit() - except sqlite3.DatabaseError as exc: - raise StorageError( - f"Failed to initialize DeepWiki schema: {exc}", - db_path=str(self.db_path), - operation="initialize", - ) from exc - - def _normalize_path(self, path: str | Path) -> str: - """Normalize path for storage (forward slashes). - - Args: - path: Path to normalize. - - Returns: - Normalized path string with forward slashes. - """ - return str(Path(path).resolve()).replace("\\", "/") - - def _migrate_text_timestamps_to_real(self, conn: sqlite3.Connection) -> None: - """Migrate legacy TEXT timestamp columns to REAL affinity. - - SQLite's type system is dynamic, but column affinity influences how values are stored and - returned. Older DeepWiki databases used TEXT timestamps (often ISO strings). The current - schema uses REAL epoch seconds. When we detect TEXT affinity on timestamp columns, we - rebuild the table with REAL columns and convert existing values during copy. - """ - - self._rebuild_table_with_timestamp_conversion( - conn, - table="deepwiki_files", - create_sql=""" - CREATE TABLE deepwiki_files ( - id INTEGER PRIMARY KEY, - path TEXT UNIQUE NOT NULL, - content_hash TEXT NOT NULL, - last_indexed REAL NOT NULL, - symbols_count INTEGER DEFAULT 0, - docs_generated INTEGER DEFAULT 0, - staleness_score REAL DEFAULT 0.0, - last_checked_commit TEXT, - last_checked_at REAL, - staleness_factors TEXT - ) - """, - timestamp_columns={"last_indexed", "last_checked_at"}, - required_timestamp_columns={"last_indexed"}, - ) - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_deepwiki_files_path ON deepwiki_files(path)" - ) - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_deepwiki_files_hash ON deepwiki_files(content_hash)" - ) - - self._rebuild_table_with_timestamp_conversion( - conn, - table="deepwiki_docs", - create_sql=""" - CREATE TABLE deepwiki_docs ( - id INTEGER PRIMARY KEY, - path TEXT UNIQUE NOT NULL, - content_hash TEXT NOT NULL, - symbols TEXT DEFAULT '[]', - generated_at REAL NOT NULL, - llm_tool TEXT - ) - """, - timestamp_columns={"generated_at"}, - required_timestamp_columns={"generated_at"}, - ) - conn.execute("CREATE INDEX IF NOT EXISTS idx_deepwiki_docs_path ON deepwiki_docs(path)") - - self._rebuild_table_with_timestamp_conversion( - conn, - table="deepwiki_symbols", - create_sql=""" - CREATE TABLE deepwiki_symbols ( - id INTEGER PRIMARY KEY, - name TEXT NOT NULL, - type TEXT NOT NULL, - source_file TEXT NOT NULL, - doc_file TEXT NOT NULL, - anchor TEXT NOT NULL, - start_line INTEGER NOT NULL, - end_line INTEGER NOT NULL, - created_at REAL, - updated_at REAL, - staleness_score REAL DEFAULT 0.0, - last_checked_commit TEXT, - last_checked_at REAL, - staleness_factors TEXT, - UNIQUE(name, source_file) - ) - """, - timestamp_columns={"created_at", "updated_at", "last_checked_at"}, - required_timestamp_columns=set(), - ) - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_deepwiki_symbols_name ON deepwiki_symbols(name)" - ) - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_deepwiki_symbols_source ON deepwiki_symbols(source_file)" - ) - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_deepwiki_symbols_doc ON deepwiki_symbols(doc_file)" - ) - - self._rebuild_table_with_timestamp_conversion( - conn, - table="generation_progress", - create_sql=""" - CREATE TABLE generation_progress ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - symbol_key TEXT NOT NULL UNIQUE, - file_path TEXT NOT NULL, - symbol_name TEXT NOT NULL, - symbol_type TEXT NOT NULL, - layer INTEGER NOT NULL, - source_hash TEXT NOT NULL, - status TEXT NOT NULL DEFAULT 'pending', - attempts INTEGER DEFAULT 0, - last_tool TEXT, - last_error TEXT, - generated_at REAL, - created_at REAL, - updated_at REAL - ) - """, - timestamp_columns={"generated_at", "created_at", "updated_at"}, - required_timestamp_columns=set(), - ) - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_progress_status ON generation_progress(status)" - ) - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_progress_file ON generation_progress(file_path)" - ) - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_progress_hash ON generation_progress(source_hash)" - ) - - def _rebuild_table_with_timestamp_conversion( - self, - conn: sqlite3.Connection, - *, - table: str, - create_sql: str, - timestamp_columns: set[str], - required_timestamp_columns: set[str], - ) -> None: - info = conn.execute(f"PRAGMA table_info({table})").fetchall() - if not info: - return - - declared_types = { - row["name"]: str(row["type"] or "").strip().upper() for row in info - } - needs_migration = any( - declared_types.get(col) == "TEXT" for col in timestamp_columns if col in declared_types - ) - if not needs_migration: - return - - old_table = f"{table}__old_ts" - conn.execute(f"ALTER TABLE {table} RENAME TO {old_table}") - conn.execute(create_sql) - - old_cols = [ - r["name"] - for r in conn.execute(f"PRAGMA table_info({old_table})").fetchall() - ] - new_cols = [r["name"] for r in conn.execute(f"PRAGMA table_info({table})").fetchall()] - common_cols = [c for c in new_cols if c in old_cols] - - select_exprs: list[str] = [] - for col in common_cols: - if col in timestamp_columns: - expr = self._sql_timestamp_to_real(col) - if col in required_timestamp_columns: - expr = f"COALESCE({expr}, CAST(strftime('%s','now') AS REAL))" - select_exprs.append(f"{expr} AS {col}") - else: - select_exprs.append(col) - - cols_sql = ", ".join(common_cols) - select_sql = ", ".join(select_exprs) - conn.execute( - f"INSERT INTO {table} ({cols_sql}) SELECT {select_sql} FROM {old_table}" - ) - conn.execute(f"DROP TABLE {old_table}") - - def _sql_timestamp_to_real(self, col: str) -> str: - # Convert various timestamp representations to epoch seconds (REAL). - # - numeric types: keep as REAL - # - numeric strings: CAST to REAL - # - ISO datetime strings: strftime('%s', ...) to epoch seconds - return f"""( - CASE - WHEN {col} IS NULL THEN NULL - WHEN typeof({col}) IN ('integer', 'real') THEN CAST({col} AS REAL) - WHEN trim({col}) GLOB '[0-9]*' THEN CAST({col} AS REAL) - ELSE CAST(strftime('%s', replace(substr({col}, 1, 19), 'T', ' ')) AS REAL) - END - )""" - - # === File Operations === - - def add_file( - self, - file_path: str | Path, - content_hash: str, - symbols_count: int = 0, - docs_generated: bool = False, - ) -> DeepWikiFile: - """Add or update a tracked source file. - - Args: - file_path: Path to the source file. - content_hash: SHA256 hash of file content. - symbols_count: Number of symbols indexed from this file. - docs_generated: Whether documentation has been generated. - - Returns: - DeepWikiFile record. - """ - with self._lock: - conn = self._get_connection() - path_str = self._normalize_path(file_path) - now = time.time() - - conn.execute( - """ - INSERT INTO deepwiki_files(path, content_hash, last_indexed, symbols_count, docs_generated) - VALUES(?, ?, ?, ?, ?) - ON CONFLICT(path) DO UPDATE SET - content_hash=excluded.content_hash, - last_indexed=excluded.last_indexed, - symbols_count=excluded.symbols_count, - docs_generated=excluded.docs_generated - """, - (path_str, content_hash, now, symbols_count, 1 if docs_generated else 0), - ) - conn.commit() - - row = conn.execute( - "SELECT * FROM deepwiki_files WHERE path=?", (path_str,) - ).fetchone() - - if not row: - raise StorageError( - f"Failed to add file: {file_path}", - db_path=str(self.db_path), - operation="add_file", - ) - - return self._row_to_deepwiki_file(row) - - def get_file(self, file_path: str | Path) -> Optional[DeepWikiFile]: - """Get a tracked file by path. - - Args: - file_path: Path to the source file. - - Returns: - DeepWikiFile if found, None otherwise. - """ - with self._lock: - conn = self._get_connection() - path_str = self._normalize_path(file_path) - row = conn.execute( - "SELECT * FROM deepwiki_files WHERE path=?", (path_str,) - ).fetchone() - return self._row_to_deepwiki_file(row) if row else None - - def get_file_hash(self, file_path: str | Path) -> Optional[str]: - """Get content hash for a file. - - Used for incremental update detection. - - Args: - file_path: Path to the source file. - - Returns: - SHA256 content hash if file is tracked, None otherwise. - """ - with self._lock: - conn = self._get_connection() - path_str = self._normalize_path(file_path) - row = conn.execute( - "SELECT content_hash FROM deepwiki_files WHERE path=?", (path_str,) - ).fetchone() - return row["content_hash"] if row else None - - def update_file_hash(self, file_path: str | Path, content_hash: str) -> None: - """Update content hash for a tracked file. - - Args: - file_path: Path to the source file. - content_hash: New SHA256 hash of file content. - """ - with self._lock: - conn = self._get_connection() - path_str = self._normalize_path(file_path) - now = time.time() - - conn.execute( - """ - UPDATE deepwiki_files - SET content_hash=?, last_indexed=? - WHERE path=? - """, - (content_hash, now, path_str), - ) - conn.commit() - - def update_file_staleness( - self, - file_path: str | Path, - staleness_score: float, - commit: str | None = None, - factors: Dict[str, Any] | None = None, - ) -> None: - """Update staleness data for a tracked file. - - Args: - file_path: Path to the source file. - staleness_score: Staleness score (0.0-1.0). - commit: Git commit hash at check time. - factors: Dict of factors contributing to the score. - """ - with self._lock: - conn = self._get_connection() - path_str = self._normalize_path(file_path) - now = time.time() - factors_json = json.dumps(factors) if factors else None - - conn.execute( - """ - UPDATE deepwiki_files - SET staleness_score=?, last_checked_commit=?, last_checked_at=?, staleness_factors=? - WHERE path=? - """, - (staleness_score, commit, now, factors_json, path_str), - ) - conn.commit() - - def update_symbol_staleness( - self, - name: str, - source_file: str | Path, - staleness_score: float, - commit: str | None = None, - factors: Dict[str, Any] | None = None, - ) -> None: - """Update staleness data for a symbol. - - Args: - name: Symbol name. - source_file: Path to the source file. - staleness_score: Staleness score (0.0-1.0). - commit: Git commit hash at check time. - factors: Dict of factors contributing to the score. - """ - with self._lock: - conn = self._get_connection() - path_str = self._normalize_path(source_file) - now = time.time() - factors_json = json.dumps(factors) if factors else None - - conn.execute( - """ - UPDATE deepwiki_symbols - SET staleness_score=?, last_checked_commit=?, last_checked_at=?, staleness_factors=? - WHERE name=? AND source_file=? - """, - (staleness_score, commit, now, factors_json, name, path_str), - ) - conn.commit() - - def remove_file(self, file_path: str | Path) -> bool: - """Remove a tracked file and its associated symbols. - - Args: - file_path: Path to the source file. - - Returns: - True if file was removed, False if not found. - """ - with self._lock: - conn = self._get_connection() - path_str = self._normalize_path(file_path) - - row = conn.execute( - "SELECT id FROM deepwiki_files WHERE path=?", (path_str,) - ).fetchone() - - if not row: - return False - - # Delete associated symbols first - conn.execute("DELETE FROM deepwiki_symbols WHERE source_file=?", (path_str,)) - conn.execute("DELETE FROM deepwiki_files WHERE path=?", (path_str,)) - conn.commit() - return True - - def list_files( - self, needs_docs: bool = False, limit: int = 1000 - ) -> List[DeepWikiFile]: - """List tracked files. - - Args: - needs_docs: If True, only return files that need documentation generated. - limit: Maximum number of files to return. - - Returns: - List of DeepWikiFile records. - """ - with self._lock: - conn = self._get_connection() - - if needs_docs: - rows = conn.execute( - """ - SELECT * FROM deepwiki_files - WHERE docs_generated = 0 - ORDER BY last_indexed DESC - LIMIT ? - """, - (limit,), - ).fetchall() - else: - rows = conn.execute( - """ - SELECT * FROM deepwiki_files - ORDER BY last_indexed DESC - LIMIT ? - """, - (limit,), - ).fetchall() - - return [self._row_to_deepwiki_file(row) for row in rows] - - def get_stats(self) -> Dict[str, int]: - """Get statistics about the DeepWiki index. - - Returns: - Dictionary with counts of files, symbols, and docs. - """ - with self._lock: - conn = self._get_connection() - - files_count = conn.execute( - "SELECT COUNT(*) as count FROM deepwiki_files" - ).fetchone()["count"] - - symbols_count = conn.execute( - "SELECT COUNT(*) as count FROM deepwiki_symbols" - ).fetchone()["count"] - - docs_count = conn.execute( - "SELECT COUNT(*) as count FROM deepwiki_docs" - ).fetchone()["count"] - - return { - "files_count": files_count, - "symbols_count": symbols_count, - "docs_count": docs_count, - } - - # === Symbol Operations === - - def add_symbol(self, symbol: DeepWikiSymbol) -> DeepWikiSymbol: - """Add or update a symbol in the index. - - Args: - symbol: DeepWikiSymbol to add. - - Returns: - DeepWikiSymbol with ID populated. - """ - with self._lock: - conn = self._get_connection() - source_file = self._normalize_path(symbol.source_file) - doc_file = self._normalize_path(symbol.doc_file) - now = time.time() - - conn.execute( - """ - INSERT INTO deepwiki_symbols( - name, type, source_file, doc_file, anchor, - start_line, end_line, created_at, updated_at - ) - VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?) - ON CONFLICT(name, source_file) DO UPDATE SET - type=excluded.type, - doc_file=excluded.doc_file, - anchor=excluded.anchor, - start_line=excluded.start_line, - end_line=excluded.end_line, - updated_at=excluded.updated_at - """, - ( - symbol.name, - symbol.type, - source_file, - doc_file, - symbol.anchor, - symbol.line_range[0], - symbol.line_range[1], - now, - now, - ), - ) - conn.commit() - - row = conn.execute( - """ - SELECT * FROM deepwiki_symbols - WHERE name=? AND source_file=? - """, - (symbol.name, source_file), - ).fetchone() - - if not row: - raise StorageError( - f"Failed to add symbol: {symbol.name}", - db_path=str(self.db_path), - operation="add_symbol", - ) - - return self._row_to_deepwiki_symbol(row) - - def get_symbols_for_file(self, file_path: str | Path) -> List[DeepWikiSymbol]: - """Get all symbols for a source file. - - Args: - file_path: Path to the source file. - - Returns: - List of DeepWikiSymbol records for the file. - """ - with self._lock: - conn = self._get_connection() - path_str = self._normalize_path(file_path) - rows = conn.execute( - """ - SELECT * FROM deepwiki_symbols - WHERE source_file=? - ORDER BY start_line - """, - (path_str,), - ).fetchall() - return [self._row_to_deepwiki_symbol(row) for row in rows] - - def get_symbol(self, name: str, source_file: str | Path) -> Optional[DeepWikiSymbol]: - """Get a specific symbol by name and source file. - - Args: - name: Symbol name. - source_file: Path to the source file. - - Returns: - DeepWikiSymbol if found, None otherwise. - """ - with self._lock: - conn = self._get_connection() - path_str = self._normalize_path(source_file) - row = conn.execute( - """ - SELECT * FROM deepwiki_symbols - WHERE name=? AND source_file=? - """, - (name, path_str), - ).fetchone() - return self._row_to_deepwiki_symbol(row) if row else None - - def search_symbols(self, query: str, limit: int = 50) -> List[DeepWikiSymbol]: - """Search symbols by name. - - Args: - query: Search query (supports LIKE pattern). - limit: Maximum number of results. - - Returns: - List of matching DeepWikiSymbol records. - """ - with self._lock: - conn = self._get_connection() - pattern = f"%{query}%" - rows = conn.execute( - """ - SELECT * FROM deepwiki_symbols - WHERE name LIKE ? - ORDER BY name - LIMIT ? - """, - (pattern, limit), - ).fetchall() - return [self._row_to_deepwiki_symbol(row) for row in rows] - - def delete_symbols_for_file(self, file_path: str | Path) -> int: - """Delete all symbols for a source file. - - Args: - file_path: Path to the source file. - - Returns: - Number of symbols deleted. - """ - with self._lock: - conn = self._get_connection() - path_str = self._normalize_path(file_path) - cursor = conn.execute( - "DELETE FROM deepwiki_symbols WHERE source_file=?", (path_str,) - ) - conn.commit() - return cursor.rowcount - - # === Doc Operations === - - def add_doc(self, doc: DeepWikiDoc) -> DeepWikiDoc: - """Add or update a documentation file record. - - Args: - doc: DeepWikiDoc to add. - - Returns: - DeepWikiDoc with ID populated. - """ - with self._lock: - conn = self._get_connection() - path_str = self._normalize_path(doc.path) - symbols_json = json.dumps(doc.symbols) - now = time.time() - - conn.execute( - """ - INSERT INTO deepwiki_docs(path, content_hash, symbols, generated_at, llm_tool) - VALUES(?, ?, ?, ?, ?) - ON CONFLICT(path) DO UPDATE SET - content_hash=excluded.content_hash, - symbols=excluded.symbols, - generated_at=excluded.generated_at, - llm_tool=excluded.llm_tool - """, - (path_str, doc.content_hash, symbols_json, now, doc.llm_tool), - ) - conn.commit() - - row = conn.execute( - "SELECT * FROM deepwiki_docs WHERE path=?", (path_str,) - ).fetchone() - - if not row: - raise StorageError( - f"Failed to add doc: {doc.path}", - db_path=str(self.db_path), - operation="add_doc", - ) - - return self._row_to_deepwiki_doc(row) - - def get_doc(self, doc_path: str | Path) -> Optional[DeepWikiDoc]: - """Get a documentation file by path. - - Args: - doc_path: Path to the documentation file. - - Returns: - DeepWikiDoc if found, None otherwise. - """ - with self._lock: - conn = self._get_connection() - path_str = self._normalize_path(doc_path) - row = conn.execute( - "SELECT * FROM deepwiki_docs WHERE path=?", (path_str,) - ).fetchone() - return self._row_to_deepwiki_doc(row) if row else None - - def list_docs(self, limit: int = 1000) -> List[DeepWikiDoc]: - """List all documentation files. - - Args: - limit: Maximum number of docs to return. - - Returns: - List of DeepWikiDoc records. - """ - with self._lock: - conn = self._get_connection() - rows = conn.execute( - """ - SELECT * FROM deepwiki_docs - ORDER BY generated_at DESC - LIMIT ? - """, - (limit,), - ).fetchall() - return [self._row_to_deepwiki_doc(row) for row in rows] - - def delete_doc(self, doc_path: str | Path) -> bool: - """Delete a documentation file record. - - Args: - doc_path: Path to the documentation file. - - Returns: - True if deleted, False if not found. - """ - with self._lock: - conn = self._get_connection() - path_str = self._normalize_path(doc_path) - - row = conn.execute( - "SELECT id FROM deepwiki_docs WHERE path=?", (path_str,) - ).fetchone() - - if not row: - return False - - conn.execute("DELETE FROM deepwiki_docs WHERE path=?", (path_str,)) - conn.commit() - return True - - # === Utility Methods === - - def compute_file_hash(self, file_path: str | Path) -> str: - """Compute SHA256 hash of a file's content. - - Args: - file_path: Path to the file. - - Returns: - SHA256 hash string. - """ - sha256 = hashlib.sha256() - path = Path(file_path) - if not path.exists(): - raise FileNotFoundError(f"File not found: {file_path}") - - with open(path, "rb") as f: - for chunk in iter(lambda: f.read(8192), b""): - sha256.update(chunk) - - return sha256.hexdigest() - - @staticmethod - def calculate_staleness_score( - days_since_update: float, - commits_since: int = 0, - files_changed: int = 0, - lines_changed: int = 0, - proportion_changed: float = 0.0, - is_deleted: bool = False, - weights: tuple[float, float, float] = (0.1, 0.4, 0.5), - decay_k: float = 0.05, - ) -> float: - """Calculate staleness score using three-factor formula. - - S = min(1.0, w_t * T + w_c * C + w_s * M) - - Args: - days_since_update: Days since last documentation update. - commits_since: Number of commits since last check. - files_changed: Number of files changed. - lines_changed: Total lines changed. - proportion_changed: Proportion of symbol body changed (0.0-1.0). - is_deleted: Whether the symbol was deleted. - weights: (w_t, w_c, w_s) weights for time, churn, symbol factors. - decay_k: Time decay constant (default 0.05, ~14 days to 50%). - - Returns: - Staleness score between 0.0 and 1.0. - """ - # Deleted symbols are maximally stale - if is_deleted: - return 1.0 - - w_t, w_c, w_s = weights - - # T: Time decay factor - T = 1 - math.exp(-decay_k * max(0, days_since_update)) - - # C: Code churn factor (sigmoid normalization) - churn_raw = ( - math.log1p(commits_since) - + math.log1p(files_changed) - + math.log1p(lines_changed) - ) - C = 1 / (1 + math.exp(-churn_raw + 3)) # sigmoid centered at 3 - - # M: Symbol modification factor - M = min(1.0, max(0.0, proportion_changed)) - - return min(1.0, w_t * T + w_c * C + w_s * M) - - def get_stale_files( - self, files: list[dict[str, str]] - ) -> list[dict[str, str]]: - """Check which files have stale documentation by comparing hashes. - - Args: - files: List of dicts with 'path' and 'hash' keys. - - Returns: - List of file dicts where stored hash differs from provided hash. - """ - with self._lock: - conn = self._get_connection() - if not files: - return [] - - # Build lookup: normalized_path -> original file dict - lookup: dict[str, dict[str, str]] = {} - normalized: list[str] = [] - for f in files: - path_str = self._normalize_path(f["path"]) - lookup[path_str] = f - normalized.append(path_str) - - placeholders = ",".join("?" * len(normalized)) - rows = conn.execute( - f"SELECT path, content_hash FROM deepwiki_files WHERE path IN ({placeholders})", - normalized, - ).fetchall() - - stored: dict[str, str] = {row["path"]: row["content_hash"] for row in rows} - - stale = [] - for path_str, f in lookup.items(): - stored_hash = stored.get(path_str) - if stored_hash is None: - stale.append({"path": f["path"], "stored_hash": None, "current_hash": f["hash"]}) - elif stored_hash != f["hash"]: - stale.append({"path": f["path"], "stored_hash": stored_hash, "current_hash": f["hash"]}) - - return stale - - def get_symbols_for_paths( - self, paths: list[str | Path] - ) -> dict[str, list[DeepWikiSymbol]]: - """Get all symbols for multiple source files. - - Args: - paths: List of source file paths. - - Returns: - Dict mapping normalized path to list of DeepWikiSymbol records. - """ - with self._lock: - conn = self._get_connection() - result: dict[str, list[DeepWikiSymbol]] = {} - - if not paths: - return result - - normalized = [self._normalize_path(p) for p in paths] - placeholders = ",".join("?" * len(normalized)) - rows = conn.execute( - f""" - SELECT * FROM deepwiki_symbols - WHERE source_file IN ({placeholders}) - ORDER BY source_file, start_line - """, - normalized, - ).fetchall() - - for row in rows: - sf = row["source_file"] - result.setdefault(sf, []).append( - self._row_to_deepwiki_symbol(row) - ) - - return result - - def stats(self) -> Dict[str, Any]: - """Get storage statistics. - - Returns: - Dict with counts and metadata. - """ - with self._lock: - conn = self._get_connection() - file_count = conn.execute( - "SELECT COUNT(*) AS c FROM deepwiki_files" - ).fetchone()["c"] - symbol_count = conn.execute( - "SELECT COUNT(*) AS c FROM deepwiki_symbols" - ).fetchone()["c"] - doc_count = conn.execute( - "SELECT COUNT(*) AS c FROM deepwiki_docs" - ).fetchone()["c"] - files_needing_docs = conn.execute( - "SELECT COUNT(*) AS c FROM deepwiki_files WHERE docs_generated = 0" - ).fetchone()["c"] - - return { - "files": int(file_count), - "symbols": int(symbol_count), - "docs": int(doc_count), - "files_needing_docs": int(files_needing_docs), - "db_path": str(self.db_path), - } - - # === Generation Progress Operations === - - def get_progress(self, symbol_key: str) -> Optional[Dict[str, Any]]: - """Get generation progress for a symbol. - - Args: - symbol_key: Unique symbol identifier (file_path:symbol_name:line_start). - - Returns: - Progress record dict if found, None otherwise. - """ - with self._lock: - conn = self._get_connection() - row = conn.execute( - "SELECT * FROM generation_progress WHERE symbol_key=?", - (symbol_key,), - ).fetchone() - return dict(row) if row else None - - def update_progress(self, symbol_key: str, data: Dict[str, Any]) -> None: - """Update or create generation progress for a symbol. - - Args: - symbol_key: Unique symbol identifier (file_path:symbol_name:line_start). - data: Dict with fields to update (file_path, symbol_name, symbol_type, - layer, source_hash, status, attempts, last_tool, last_error, generated_at). - """ - with self._lock: - conn = self._get_connection() - now = time.time() - - # Build update query dynamically - fields = list(data.keys()) - placeholders = ["?"] * len(fields) - values = [data[f] for f in fields] - - conn.execute( - f""" - INSERT INTO generation_progress(symbol_key, {', '.join(fields)}, created_at, updated_at) - VALUES(?, {', '.join(placeholders)}, ?, ?) - ON CONFLICT(symbol_key) DO UPDATE SET - {', '.join(f'{f}=excluded.{f}' for f in fields)}, - updated_at=excluded.updated_at - """, - [symbol_key] + values + [now, now], - ) - conn.commit() - - def mark_completed(self, symbol_key: str, tool: str) -> None: - """Mark a symbol's documentation as completed. - - Args: - symbol_key: Unique symbol identifier. - tool: The LLM tool that generated the documentation. - """ - with self._lock: - conn = self._get_connection() - now = time.time() - - conn.execute( - """ - UPDATE generation_progress - SET status='completed', last_tool=?, generated_at=?, updated_at=? - WHERE symbol_key=? - """, - (tool, now, now, symbol_key), - ) - conn.commit() - - def mark_failed(self, symbol_key: str, error: str, tool: str | None = None) -> None: - """Mark a symbol's documentation generation as failed. - - Args: - symbol_key: Unique symbol identifier. - error: Error message describing the failure. - tool: The LLM tool that was used (optional). - """ - with self._lock: - conn = self._get_connection() - now = time.time() - - if tool: - conn.execute( - """ - UPDATE generation_progress - SET status='failed', last_error=?, last_tool=?, - attempts=attempts+1, updated_at=? - WHERE symbol_key=? - """, - (error, tool, now, symbol_key), - ) - else: - conn.execute( - """ - UPDATE generation_progress - SET status='failed', last_error=?, attempts=attempts+1, updated_at=? - WHERE symbol_key=? - """, - (error, now, symbol_key), - ) - conn.commit() - - def get_pending_symbols(self, limit: int = 1000) -> List[Dict[str, Any]]: - """Get all symbols with pending or failed status for retry. - - Args: - limit: Maximum number of records to return. - - Returns: - List of progress records with pending or failed status. - """ - with self._lock: - conn = self._get_connection() - rows = conn.execute( - """ - SELECT * FROM generation_progress - WHERE status IN ('pending', 'failed') - ORDER BY updated_at ASC - LIMIT ? - """, - (limit,), - ).fetchall() - return [dict(row) for row in rows] - - def get_completed_symbol_keys(self) -> set: - """Get set of all completed symbol keys for orphan detection. - - Returns: - Set of symbol_key strings for completed symbols. - """ - with self._lock: - conn = self._get_connection() - rows = conn.execute( - "SELECT symbol_key FROM generation_progress WHERE status='completed'" - ).fetchall() - return {row["symbol_key"] for row in rows} - - def delete_progress(self, symbol_keys: List[str]) -> int: - """Delete progress records for orphaned symbols. - - Args: - symbol_keys: List of symbol keys to delete. - - Returns: - Number of records deleted. - """ - if not symbol_keys: - return 0 - - with self._lock: - conn = self._get_connection() - placeholders = ",".join("?" * len(symbol_keys)) - cursor = conn.execute( - f"DELETE FROM generation_progress WHERE symbol_key IN ({placeholders})", - symbol_keys, - ) - conn.commit() - return cursor.rowcount - - # === Row Conversion Methods === - - def _row_to_deepwiki_file(self, row: sqlite3.Row) -> DeepWikiFile: - """Convert database row to DeepWikiFile.""" - staleness_factors = None - try: - factors_str = row["staleness_factors"] - if factors_str: - staleness_factors = json.loads(factors_str) - except (KeyError, IndexError): - pass - - return DeepWikiFile( - id=int(row["id"]), - path=row["path"], - content_hash=row["content_hash"], - last_indexed=datetime.fromtimestamp(row["last_indexed"]) - if row["last_indexed"] - else datetime.utcnow(), - symbols_count=int(row["symbols_count"]) if row["symbols_count"] else 0, - docs_generated=bool(row["docs_generated"]), - staleness_score=float(row["staleness_score"]) if row["staleness_score"] else 0.0, - last_checked_commit=row["last_checked_commit"] if "last_checked_commit" in row.keys() else None, - last_checked_at=row["last_checked_at"] if "last_checked_at" in row.keys() else None, - staleness_factors=staleness_factors, - ) - - def _row_to_deepwiki_symbol(self, row: sqlite3.Row) -> DeepWikiSymbol: - """Convert database row to DeepWikiSymbol.""" - created_at = None - if row["created_at"]: - created_at = datetime.fromtimestamp(row["created_at"]) - - updated_at = None - if row["updated_at"]: - updated_at = datetime.fromtimestamp(row["updated_at"]) - - staleness_factors = None - try: - factors_str = row["staleness_factors"] - if factors_str: - staleness_factors = json.loads(factors_str) - except (KeyError, IndexError): - pass - - return DeepWikiSymbol( - id=int(row["id"]), - name=row["name"], - type=row["type"], - source_file=row["source_file"], - doc_file=row["doc_file"], - anchor=row["anchor"], - line_range=(int(row["start_line"]), int(row["end_line"])), - created_at=created_at, - updated_at=updated_at, - staleness_score=float(row["staleness_score"]) if row["staleness_score"] else 0.0, - last_checked_commit=row["last_checked_commit"] if "last_checked_commit" in row.keys() else None, - last_checked_at=row["last_checked_at"] if "last_checked_at" in row.keys() else None, - staleness_factors=staleness_factors, - ) - - def _row_to_deepwiki_doc(self, row: sqlite3.Row) -> DeepWikiDoc: - """Convert database row to DeepWikiDoc.""" - symbols = [] - if row["symbols"]: - try: - symbols = json.loads(row["symbols"]) - except json.JSONDecodeError: - pass - - generated_at = datetime.utcnow() - if row["generated_at"]: - generated_at = datetime.fromtimestamp(row["generated_at"]) - - return DeepWikiDoc( - id=int(row["id"]), - path=row["path"], - content_hash=row["content_hash"], - symbols=symbols, - generated_at=generated_at, - llm_tool=row["llm_tool"], - ) diff --git a/codex-lens/src/codexlens/storage/dir_index.py b/codex-lens/src/codexlens/storage/dir_index.py deleted file mode 100644 index ee9e11c5..00000000 --- a/codex-lens/src/codexlens/storage/dir_index.py +++ /dev/null @@ -1,2358 +0,0 @@ -"""Single-directory index storage with hierarchical linking. - -Each directory maintains its own _index.db with: -- Files in the current directory -- Links to subdirectory indexes -- Full-text search via FTS5 -- Symbol table for code navigation -""" - -from __future__ import annotations - -import logging -import hashlib -import re -import sqlite3 -import threading -import time -from dataclasses import dataclass -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple - -from codexlens.config import Config -from codexlens.entities import CodeRelationship, SearchResult, Symbol -from codexlens.errors import StorageError -from codexlens.storage.global_index import GlobalSymbolIndex - - -@dataclass -class SubdirLink: - """Link to a subdirectory's index database.""" - - id: int - name: str - index_path: Path - files_count: int - last_updated: float - - -@dataclass -class FileEntry: - """Metadata for an indexed file in current directory.""" - - id: int - name: str - full_path: Path - language: str - mtime: float - line_count: int - - -class DirIndexStore: - """Single-directory index storage with hierarchical subdirectory linking. - - Each directory has an independent _index.db containing: - - Files table: Files in this directory only - - Subdirs table: Links to child directory indexes - - Symbols table: Code symbols from files - - FTS5 index: Full-text search on file content - - Thread-safe operations with WAL mode enabled. - """ - - # Schema version for migration tracking - # Increment this when schema changes require migration - SCHEMA_VERSION = 8 - - def __init__( - self, - db_path: str | Path, - *, - config: Config | None = None, - global_index: GlobalSymbolIndex | None = None, - ) -> None: - """Initialize directory index store. - - Args: - db_path: Path to _index.db file for this directory - """ - self.db_path = Path(db_path).resolve() - self._lock = threading.RLock() - self._conn: Optional[sqlite3.Connection] = None - self.logger = logging.getLogger(__name__) - self._config = config - self._global_index = global_index - - def initialize(self) -> None: - """Create database and schema if not exists.""" - with self._lock: - self.db_path.parent.mkdir(parents=True, exist_ok=True) - conn = self._get_connection() - - # Check current schema version - current_version = self._get_schema_version(conn) - - # Fail gracefully if database is from a newer version - if current_version > self.SCHEMA_VERSION: - raise StorageError( - f"Database schema version {current_version} is newer than " - f"supported version {self.SCHEMA_VERSION}. " - f"Please update the application or use a compatible database.", - db_path=str(self.db_path), - operation="initialize", - details={ - "current_version": current_version, - "supported_version": self.SCHEMA_VERSION - } - ) - - # Create or migrate schema - if current_version == 0: - # New database - create schema directly - self._create_schema(conn) - self._create_fts_triggers(conn) - self._set_schema_version(conn, self.SCHEMA_VERSION) - elif current_version < self.SCHEMA_VERSION: - # Existing database - apply migrations - self._apply_migrations(conn, current_version) - self._set_schema_version(conn, self.SCHEMA_VERSION) - - conn.commit() - - def _get_schema_version(self, conn: sqlite3.Connection) -> int: - """Get current schema version from database.""" - try: - row = conn.execute("PRAGMA user_version").fetchone() - return row[0] if row else 0 - except Exception: - return 0 - - def _set_schema_version(self, conn: sqlite3.Connection, version: int) -> None: - """Set schema version in database.""" - conn.execute(f"PRAGMA user_version = {version}") - - def _apply_migrations(self, conn: sqlite3.Connection, from_version: int) -> None: - """Apply schema migrations from current version to latest. - - Args: - conn: Database connection - from_version: Current schema version - """ - # Migration v0/v1 -> v2: Add 'name' column to files table - if from_version < 2: - self._migrate_v2_add_name_column(conn) - - # Migration v2 -> v4: Add dual FTS tables (exact + fuzzy) - if from_version < 4: - from codexlens.storage.migrations.migration_004_dual_fts import upgrade - upgrade(conn) - - # Migration v4 -> v5: Remove unused/redundant fields - if from_version < 5: - from codexlens.storage.migrations.migration_005_cleanup_unused_fields import upgrade - upgrade(conn) - - # Migration v5 -> v6: Ensure relationship tables/indexes exist - if from_version < 6: - from codexlens.storage.migrations.migration_006_enhance_relationships import upgrade - upgrade(conn) - - # Migration v6 -> v7: Add graph neighbor cache for search expansion - if from_version < 7: - from codexlens.storage.migrations.migration_007_add_graph_neighbors import upgrade - upgrade(conn) - - # Migration v7 -> v8: Add Merkle hashes for incremental change detection - if from_version < 8: - from codexlens.storage.migrations.migration_008_add_merkle_hashes import upgrade - upgrade(conn) - - def close(self) -> None: - """Close database connection.""" - with self._lock: - if self._conn is not None: - try: - self._conn.close() - except Exception: - pass - finally: - self._conn = None - - def __enter__(self) -> DirIndexStore: - """Context manager entry.""" - self.initialize() - return self - - def __exit__(self, exc_type: object, exc: object, tb: object) -> None: - """Context manager exit.""" - self.close() - - # === File Operations === - - def add_file( - self, - name: str, - full_path: str | Path, - content: str, - language: str, - symbols: Optional[List[Symbol]] = None, - relationships: Optional[List[CodeRelationship]] = None, - ) -> int: - """Add or update a file in the current directory index. - - Args: - name: Filename without path - full_path: Complete source file path - content: File content for indexing - language: Programming language identifier - symbols: List of Symbol objects from the file - relationships: Optional list of CodeRelationship edges from this file - - Returns: - Database file_id - - Raises: - StorageError: If database operations fail - """ - with self._lock: - conn = self._get_connection() - full_path_str = str(Path(full_path).resolve()) - mtime = Path(full_path_str).stat().st_mtime if Path(full_path_str).exists() else None - line_count = content.count('\n') + 1 - - try: - conn.execute( - """ - INSERT INTO files(name, full_path, language, content, mtime, line_count) - VALUES(?, ?, ?, ?, ?, ?) - ON CONFLICT(full_path) DO UPDATE SET - name=excluded.name, - language=excluded.language, - content=excluded.content, - mtime=excluded.mtime, - line_count=excluded.line_count - """, - (name, full_path_str, language, content, mtime, line_count), - ) - - row = conn.execute("SELECT id FROM files WHERE full_path=?", (full_path_str,)).fetchone() - if not row: - raise StorageError(f"Failed to retrieve file_id for {full_path_str}") - - file_id = int(row["id"]) - - # Replace symbols - conn.execute("DELETE FROM symbols WHERE file_id=?", (file_id,)) - if symbols: - # Insert symbols without token_count and symbol_type - symbol_rows = [] - for s in symbols: - symbol_rows.append( - (file_id, s.name, s.kind, s.range[0], s.range[1]) - ) - - conn.executemany( - """ - INSERT INTO symbols(file_id, name, kind, start_line, end_line) - VALUES(?, ?, ?, ?, ?) - """, - symbol_rows, - ) - - self._save_merkle_hash(conn, file_id=file_id, content=content) - self._save_relationships(conn, file_id=file_id, relationships=relationships) - conn.commit() - self._maybe_update_global_symbols(full_path_str, symbols or []) - return file_id - - except sqlite3.DatabaseError as exc: - conn.rollback() - raise StorageError(f"Failed to add file {name}: {exc}") from exc - - def save_relationships(self, file_id: int, relationships: List[CodeRelationship]) -> None: - """Save relationships for an already-indexed file. - - Args: - file_id: Database file id - relationships: Relationship edges to persist - """ - if not relationships: - return - with self._lock: - conn = self._get_connection() - self._save_relationships(conn, file_id=file_id, relationships=relationships) - conn.commit() - - def _save_relationships( - self, - conn: sqlite3.Connection, - file_id: int, - relationships: Optional[List[CodeRelationship]], - ) -> None: - if not relationships: - return - - rows = conn.execute( - "SELECT id, name FROM symbols WHERE file_id=? ORDER BY start_line, id", - (file_id,), - ).fetchall() - - name_to_id: Dict[str, int] = {} - for row in rows: - name = row["name"] - if name not in name_to_id: - name_to_id[name] = int(row["id"]) - - if not name_to_id: - return - - rel_rows: List[Tuple[int, str, str, int, Optional[str]]] = [] - seen: set[tuple[int, str, str, int, Optional[str]]] = set() - - for rel in relationships: - source_id = name_to_id.get(rel.source_symbol) - if source_id is None: - continue - - target = (rel.target_symbol or "").strip() - if not target: - continue - - rel_type = rel.relationship_type.value - source_line = int(rel.source_line) - key = (source_id, target, rel_type, source_line, rel.target_file) - if key in seen: - continue - seen.add(key) - - rel_rows.append((source_id, target, rel_type, source_line, rel.target_file)) - - if not rel_rows: - return - - conn.executemany( - """ - INSERT INTO code_relationships( - source_symbol_id, target_qualified_name, - relationship_type, source_line, target_file - ) - VALUES(?, ?, ?, ?, ?) - """, - rel_rows, - ) - - def _save_merkle_hash(self, conn: sqlite3.Connection, file_id: int, content: str) -> None: - """Upsert a SHA-256 content hash for the given file_id (best-effort).""" - try: - digest = hashlib.sha256(content.encode("utf-8", errors="ignore")).hexdigest() - now = time.time() - conn.execute( - """ - INSERT INTO merkle_hashes(file_id, sha256, updated_at) - VALUES(?, ?, ?) - ON CONFLICT(file_id) DO UPDATE SET - sha256=excluded.sha256, - updated_at=excluded.updated_at - """, - (file_id, digest, now), - ) - except sqlite3.Error: - return - - def add_files_batch( - self, files: List[Tuple[str, Path, str, str, Optional[List[Symbol]]]] - ) -> int: - """Add multiple files in a single transaction. - - Args: - files: List of (name, full_path, content, language, symbols) tuples - - Returns: - Number of files added - - Raises: - StorageError: If batch operation fails - """ - with self._lock: - conn = self._get_connection() - count = 0 - - try: - conn.execute("BEGIN") - - for name, full_path, content, language, symbols in files: - full_path_str = str(Path(full_path).resolve()) - mtime = Path(full_path_str).stat().st_mtime if Path(full_path_str).exists() else None - line_count = content.count('\n') + 1 - - conn.execute( - """ - INSERT INTO files(name, full_path, language, content, mtime, line_count) - VALUES(?, ?, ?, ?, ?, ?) - ON CONFLICT(full_path) DO UPDATE SET - name=excluded.name, - language=excluded.language, - content=excluded.content, - mtime=excluded.mtime, - line_count=excluded.line_count - """, - (name, full_path_str, language, content, mtime, line_count), - ) - - row = conn.execute("SELECT id FROM files WHERE full_path=?", (full_path_str,)).fetchone() - if not row: - raise StorageError(f"Failed to retrieve file_id for {full_path_str}") - - file_id = int(row["id"]) - count += 1 - - conn.execute("DELETE FROM symbols WHERE file_id=?", (file_id,)) - if symbols: - # Insert symbols - symbol_rows = [] - for s in symbols: - symbol_rows.append( - (file_id, s.name, s.kind, s.range[0], s.range[1]) - ) - - conn.executemany( - """ - INSERT INTO symbols(file_id, name, kind, start_line, end_line) - VALUES(?, ?, ?, ?, ?) - """, - symbol_rows, - ) - - self._save_merkle_hash(conn, file_id=file_id, content=content) - - conn.commit() - return count - - except sqlite3.DatabaseError as exc: - conn.rollback() - raise StorageError(f"Batch insert failed: {exc}") from exc - - def remove_file(self, full_path: str | Path) -> bool: - """Remove a file from the index. - - Args: - full_path: Complete source file path - - Returns: - True if file was removed, False if not found - """ - with self._lock: - conn = self._get_connection() - full_path_str = str(Path(full_path).resolve()) - - row = conn.execute("SELECT id FROM files WHERE full_path=?", (full_path_str,)).fetchone() - if not row: - return False - - file_id = int(row["id"]) - conn.execute("DELETE FROM files WHERE id=?", (file_id,)) - conn.commit() - self._maybe_delete_global_symbols(full_path_str) - return True - - def get_file(self, full_path: str | Path) -> Optional[FileEntry]: - """Get file metadata. - - Args: - full_path: Complete source file path - - Returns: - FileEntry if found, None otherwise - """ - with self._lock: - conn = self._get_connection() - full_path_str = str(Path(full_path).resolve()) - - row = conn.execute( - """ - SELECT id, name, full_path, language, mtime, line_count - FROM files WHERE full_path=? - """, - (full_path_str,), - ).fetchone() - - if not row: - return None - - return FileEntry( - id=int(row["id"]), - name=row["name"], - full_path=Path(row["full_path"]), - language=row["language"], - mtime=float(row["mtime"]) if row["mtime"] else 0.0, - line_count=int(row["line_count"]) if row["line_count"] else 0, - ) - - def get_file_mtime(self, full_path: str | Path) -> Optional[float]: - """Get stored modification time for a file. - - Args: - full_path: Complete source file path - - Returns: - Modification time as float, or None if not found - """ - with self._lock: - conn = self._get_connection() - full_path_str = str(Path(full_path).resolve()) - - row = conn.execute( - "SELECT mtime FROM files WHERE full_path=?", (full_path_str,) - ).fetchone() - - return float(row["mtime"]) if row and row["mtime"] else None - - def needs_reindex(self, full_path: str | Path) -> bool: - """Check if a file needs reindexing. - - Default behavior uses mtime comparison (with 1ms tolerance). - - When `Config.enable_merkle_detection` is enabled and Merkle metadata is - available, uses SHA-256 content hash comparison (with mtime as a fast - path to avoid hashing unchanged files). - - Args: - full_path: Complete source file path - - Returns: - True if file should be reindexed (new, modified, or missing from index) - """ - full_path_obj = Path(full_path).resolve() - if not full_path_obj.exists(): - return False # File doesn't exist, skip indexing - - # Get current filesystem mtime - try: - current_mtime = full_path_obj.stat().st_mtime - except OSError: - return False # Can't read file stats, skip - - MTIME_TOLERANCE = 0.001 - - # Fast path: mtime-only mode (default / backward-compatible) - if self._config is None or not getattr(self._config, "enable_merkle_detection", False): - stored_mtime = self.get_file_mtime(full_path_obj) - if stored_mtime is None: - return True - return abs(current_mtime - stored_mtime) > MTIME_TOLERANCE - - full_path_str = str(full_path_obj) - - # Hash-based change detection (best-effort, falls back to mtime when metadata missing) - with self._lock: - conn = self._get_connection() - try: - row = conn.execute( - """ - SELECT f.id AS file_id, f.mtime AS mtime, mh.sha256 AS sha256 - FROM files f - LEFT JOIN merkle_hashes mh ON mh.file_id = f.id - WHERE f.full_path=? - """, - (full_path_str,), - ).fetchone() - except sqlite3.Error: - row = None - - if row is None: - return True - - stored_mtime = float(row["mtime"]) if row["mtime"] else None - stored_hash = row["sha256"] if row["sha256"] else None - file_id = int(row["file_id"]) - - # Missing Merkle data: fall back to mtime - if stored_hash is None: - if stored_mtime is None: - return True - return abs(current_mtime - stored_mtime) > MTIME_TOLERANCE - - # If mtime is unchanged within tolerance, assume unchanged without hashing. - if stored_mtime is not None and abs(current_mtime - stored_mtime) <= MTIME_TOLERANCE: - return False - - try: - current_text = full_path_obj.read_text(encoding="utf-8", errors="ignore") - current_hash = hashlib.sha256(current_text.encode("utf-8", errors="ignore")).hexdigest() - except OSError: - return False - - if current_hash == stored_hash: - # Content unchanged, but mtime drifted: update stored mtime to avoid repeated hashing. - with self._lock: - conn = self._get_connection() - conn.execute("UPDATE files SET mtime=? WHERE id=?", (current_mtime, file_id)) - conn.commit() - return False - - return True - - def get_merkle_root_hash(self) -> Optional[str]: - """Return the stored Merkle root hash for this directory index (if present).""" - with self._lock: - conn = self._get_connection() - try: - row = conn.execute( - "SELECT root_hash FROM merkle_state WHERE id=1" - ).fetchone() - except sqlite3.Error: - return None - - return row["root_hash"] if row and row["root_hash"] else None - - def update_merkle_root(self) -> Optional[str]: - """Compute and persist the Merkle root hash for this directory index. - - The root hash includes: - - Direct file hashes from `merkle_hashes` - - Direct subdirectory root hashes (read from child `_index.db` files) - """ - if self._config is None or not getattr(self._config, "enable_merkle_detection", False): - return None - - with self._lock: - conn = self._get_connection() - try: - file_rows = conn.execute( - """ - SELECT f.name AS name, mh.sha256 AS sha256 - FROM files f - LEFT JOIN merkle_hashes mh ON mh.file_id = f.id - ORDER BY f.name - """ - ).fetchall() - - subdir_rows = conn.execute( - "SELECT name, index_path FROM subdirs ORDER BY name" - ).fetchall() - except sqlite3.Error as exc: - self.logger.debug("Failed to compute merkle root: %s", exc) - return None - - items: List[str] = [] - - for row in file_rows: - name = row["name"] - sha = (row["sha256"] or "").strip() - items.append(f"f:{name}:{sha}") - - def read_child_root(index_path: str) -> str: - try: - with sqlite3.connect(index_path) as child_conn: - child_conn.row_factory = sqlite3.Row - child_row = child_conn.execute( - "SELECT root_hash FROM merkle_state WHERE id=1" - ).fetchone() - return child_row["root_hash"] if child_row and child_row["root_hash"] else "" - except Exception: - return "" - - for row in subdir_rows: - name = row["name"] - index_path = row["index_path"] - child_hash = read_child_root(index_path) if index_path else "" - items.append(f"d:{name}:{child_hash}") - - root_hash = hashlib.sha256("\n".join(items).encode("utf-8", errors="ignore")).hexdigest() - now = time.time() - - with self._lock: - conn = self._get_connection() - try: - conn.execute( - """ - INSERT INTO merkle_state(id, root_hash, updated_at) - VALUES(1, ?, ?) - ON CONFLICT(id) DO UPDATE SET - root_hash=excluded.root_hash, - updated_at=excluded.updated_at - """, - (root_hash, now), - ) - conn.commit() - except sqlite3.Error as exc: - self.logger.debug("Failed to persist merkle root: %s", exc) - return None - - return root_hash - - def add_file_incremental( - self, - name: str, - full_path: str | Path, - content: str, - language: str, - symbols: Optional[List[Symbol]] = None, - relationships: Optional[List[CodeRelationship]] = None, - ) -> Optional[int]: - """Add or update a file only if it has changed (incremental indexing). - - Checks mtime before indexing to skip unchanged files. - - Args: - name: Filename without path - full_path: Complete source file path - content: File content for indexing - language: Programming language identifier - symbols: List of Symbol objects from the file - relationships: Optional list of CodeRelationship edges from this file - - Returns: - Database file_id if indexed, None if skipped (unchanged) - - Raises: - StorageError: If database operations fail - """ - # Check if reindexing is needed - if not self.needs_reindex(full_path): - return None # Skip unchanged file - - # File changed or new, perform full indexing - return self.add_file(name, full_path, content, language, symbols, relationships) - - def cleanup_deleted_files(self, source_dir: Path) -> int: - """Remove indexed files that no longer exist in the source directory. - - Scans the source directory and removes database entries for deleted files. - - Args: - source_dir: Source directory to scan - - Returns: - Number of deleted file entries removed - - Raises: - StorageError: If cleanup operations fail - """ - with self._lock: - conn = self._get_connection() - source_dir = source_dir.resolve() - - try: - # Get all indexed file paths - rows = conn.execute("SELECT full_path FROM files").fetchall() - indexed_paths = {row["full_path"] for row in rows} - - # Build set of existing files in source directory - existing_paths = set() - for file_path in source_dir.rglob("*"): - if file_path.is_file(): - existing_paths.add(str(file_path.resolve())) - - # Find orphaned entries (indexed but no longer exist) - deleted_paths = indexed_paths - existing_paths - - # Remove orphaned entries - deleted_count = 0 - for deleted_path in deleted_paths: - conn.execute("DELETE FROM files WHERE full_path=?", (deleted_path,)) - deleted_count += 1 - self._maybe_delete_global_symbols(deleted_path) - - if deleted_count > 0: - conn.commit() - - return deleted_count - - except Exception as exc: - conn.rollback() - raise StorageError(f"Failed to cleanup deleted files: {exc}") from exc - - def list_files(self) -> List[FileEntry]: - """List all files in current directory. - - Returns: - List of FileEntry objects - """ - with self._lock: - conn = self._get_connection() - rows = conn.execute( - """ - SELECT id, name, full_path, language, mtime, line_count - FROM files - ORDER BY name - """ - ).fetchall() - - return [ - FileEntry( - id=int(row["id"]), - name=row["name"], - full_path=Path(row["full_path"]), - language=row["language"], - mtime=float(row["mtime"]) if row["mtime"] else 0.0, - line_count=int(row["line_count"]) if row["line_count"] else 0, - ) - for row in rows - ] - - def file_count(self) -> int: - """Get number of files in current directory. - - Returns: - File count - """ - with self._lock: - conn = self._get_connection() - row = conn.execute("SELECT COUNT(*) AS c FROM files").fetchone() - return int(row["c"]) if row else 0 - - # === Semantic Metadata === - - def add_semantic_metadata( - self, - file_id: int, - summary: str, - keywords: List[str], - purpose: str, - llm_tool: str - ) -> None: - """Add or update semantic metadata for a file. - - Args: - file_id: File ID from files table - summary: LLM-generated summary - keywords: List of keywords - purpose: Purpose/role of the file - llm_tool: Tool used to generate metadata (gemini/qwen) - """ - with self._lock: - conn = self._get_connection() - - import time - - generated_at = time.time() - - # Write to semantic_metadata table (without keywords column) - conn.execute( - """ - INSERT INTO semantic_metadata(file_id, summary, purpose, llm_tool, generated_at) - VALUES(?, ?, ?, ?, ?) - ON CONFLICT(file_id) DO UPDATE SET - summary=excluded.summary, - purpose=excluded.purpose, - llm_tool=excluded.llm_tool, - generated_at=excluded.generated_at - """, - (file_id, summary, purpose, llm_tool, generated_at), - ) - - # Write to normalized keywords tables for optimized search - # First, remove existing keyword associations - conn.execute("DELETE FROM file_keywords WHERE file_id = ?", (file_id,)) - - # Then add new keywords - for keyword in keywords: - keyword = keyword.strip() - if not keyword: - continue - - # Insert keyword if it doesn't exist - conn.execute( - "INSERT OR IGNORE INTO keywords(keyword) VALUES(?)", - (keyword,) - ) - - # Get keyword_id - row = conn.execute( - "SELECT id FROM keywords WHERE keyword = ?", - (keyword,) - ).fetchone() - - if row: - keyword_id = row["id"] - # Link file to keyword - conn.execute( - "INSERT OR IGNORE INTO file_keywords(file_id, keyword_id) VALUES(?, ?)", - (file_id, keyword_id) - ) - - conn.commit() - - def get_semantic_metadata(self, file_id: int) -> Optional[Dict[str, Any]]: - """Get semantic metadata for a file. - - Args: - file_id: File ID from files table - - Returns: - Dict with summary, keywords, purpose, llm_tool, generated_at, or None if not found - """ - with self._lock: - conn = self._get_connection() - - # Get semantic metadata (without keywords column) - row = conn.execute( - """ - SELECT summary, purpose, llm_tool, generated_at - FROM semantic_metadata WHERE file_id=? - """, - (file_id,), - ).fetchone() - - if not row: - return None - - # Get keywords from normalized file_keywords table - keyword_rows = conn.execute( - """ - SELECT k.keyword - FROM file_keywords fk - JOIN keywords k ON fk.keyword_id = k.id - WHERE fk.file_id = ? - ORDER BY k.keyword - """, - (file_id,), - ).fetchall() - - keywords = [kw["keyword"] for kw in keyword_rows] - - return { - "summary": row["summary"], - "keywords": keywords, - "purpose": row["purpose"], - "llm_tool": row["llm_tool"], - "generated_at": float(row["generated_at"]) if row["generated_at"] else 0.0, - } - - def get_files_without_semantic(self) -> List[FileEntry]: - """Get all files that don't have semantic metadata. - - Returns: - List of FileEntry objects without semantic metadata - """ - with self._lock: - conn = self._get_connection() - - rows = conn.execute( - """ - SELECT f.id, f.name, f.full_path, f.language, f.mtime, f.line_count - FROM files f - LEFT JOIN semantic_metadata sm ON f.id = sm.file_id - WHERE sm.id IS NULL - ORDER BY f.name - """ - ).fetchall() - - return [ - FileEntry( - id=int(row["id"]), - name=row["name"], - full_path=Path(row["full_path"]), - language=row["language"], - mtime=float(row["mtime"]) if row["mtime"] else 0.0, - line_count=int(row["line_count"]) if row["line_count"] else 0, - ) - for row in rows - ] - - def search_semantic_keywords(self, keyword: str, use_normalized: bool = True) -> List[Tuple[FileEntry, List[str]]]: - """Search files by semantic keywords. - - Args: - keyword: Keyword to search for (case-insensitive) - use_normalized: Use optimized normalized tables (default: True) - - Returns: - List of (FileEntry, keywords) tuples where keyword matches - """ - with self._lock: - conn = self._get_connection() - - if use_normalized: - # Optimized query using normalized tables with indexed lookup - # Use prefix search (keyword%) for better index utilization - keyword_pattern = f"{keyword}%" - - rows = conn.execute( - """ - SELECT f.id, f.name, f.full_path, f.language, f.mtime, f.line_count, - GROUP_CONCAT(k.keyword, ',') as keywords - FROM files f - JOIN file_keywords fk ON f.id = fk.file_id - JOIN keywords k ON fk.keyword_id = k.id - WHERE k.keyword LIKE ? COLLATE NOCASE - GROUP BY f.id, f.name, f.full_path, f.language, f.mtime, f.line_count - ORDER BY f.name - """, - (keyword_pattern,), - ).fetchall() - - results = [] - for row in rows: - file_entry = FileEntry( - id=int(row["id"]), - name=row["name"], - full_path=Path(row["full_path"]), - language=row["language"], - mtime=float(row["mtime"]) if row["mtime"] else 0.0, - line_count=int(row["line_count"]) if row["line_count"] else 0, - ) - keywords = row["keywords"].split(',') if row["keywords"] else [] - results.append((file_entry, keywords)) - - return results - - else: - # Fallback using normalized tables with contains matching (slower but more flexible) - keyword_pattern = f"%{keyword}%" - - rows = conn.execute( - """ - SELECT f.id, f.name, f.full_path, f.language, f.mtime, f.line_count, - GROUP_CONCAT(k.keyword, ',') as keywords - FROM files f - JOIN file_keywords fk ON f.id = fk.file_id - JOIN keywords k ON fk.keyword_id = k.id - WHERE k.keyword LIKE ? COLLATE NOCASE - GROUP BY f.id, f.name, f.full_path, f.language, f.mtime, f.line_count - ORDER BY f.name - """, - (keyword_pattern,), - ).fetchall() - - results = [] - for row in rows: - file_entry = FileEntry( - id=int(row["id"]), - name=row["name"], - full_path=Path(row["full_path"]), - language=row["language"], - mtime=float(row["mtime"]) if row["mtime"] else 0.0, - line_count=int(row["line_count"]) if row["line_count"] else 0, - ) - keywords = row["keywords"].split(',') if row["keywords"] else [] - results.append((file_entry, keywords)) - - return results - - def list_semantic_metadata( - self, - offset: int = 0, - limit: int = 50, - llm_tool: Optional[str] = None, - ) -> Tuple[List[Dict[str, Any]], int]: - """List all semantic metadata with file information. - - Args: - offset: Number of records to skip (for pagination) - limit: Maximum records to return (max 100) - llm_tool: Optional filter by LLM tool used - - Returns: - Tuple of (list of metadata dicts, total count) - """ - with self._lock: - conn = self._get_connection() - - # Query semantic metadata without keywords column - base_query = """ - SELECT f.id as file_id, f.name as file_name, f.full_path, - f.language, f.line_count, - sm.summary, sm.purpose, - sm.llm_tool, sm.generated_at - FROM files f - JOIN semantic_metadata sm ON f.id = sm.file_id - """ - count_query = """ - SELECT COUNT(*) as total - FROM files f - JOIN semantic_metadata sm ON f.id = sm.file_id - """ - - params: List[Any] = [] - if llm_tool: - base_query += " WHERE sm.llm_tool = ?" - count_query += " WHERE sm.llm_tool = ?" - params.append(llm_tool) - - base_query += " ORDER BY sm.generated_at DESC LIMIT ? OFFSET ?" - params.extend([min(limit, 100), offset]) - - count_params = [llm_tool] if llm_tool else [] - total_row = conn.execute(count_query, count_params).fetchone() - total = int(total_row["total"]) if total_row else 0 - - rows = conn.execute(base_query, params).fetchall() - - results = [] - for row in rows: - file_id = int(row["file_id"]) - - # Get keywords from normalized file_keywords table - keyword_rows = conn.execute( - """ - SELECT k.keyword - FROM file_keywords fk - JOIN keywords k ON fk.keyword_id = k.id - WHERE fk.file_id = ? - ORDER BY k.keyword - """, - (file_id,), - ).fetchall() - - keywords = [kw["keyword"] for kw in keyword_rows] - - results.append({ - "file_id": file_id, - "file_name": row["file_name"], - "full_path": row["full_path"], - "language": row["language"], - "line_count": int(row["line_count"]) if row["line_count"] else 0, - "summary": row["summary"], - "keywords": keywords, - "purpose": row["purpose"], - "llm_tool": row["llm_tool"], - "generated_at": float(row["generated_at"]) if row["generated_at"] else 0.0, - }) - - return results, total - - # === Subdirectory Links === - - def register_subdir( - self, - name: str, - index_path: str | Path, - files_count: int = 0, - direct_files: int = 0, - ) -> None: - """Register or update a subdirectory link. - - Args: - name: Subdirectory name - index_path: Path to subdirectory's _index.db - files_count: Total files recursively - direct_files: Deprecated parameter (no longer used) - """ - with self._lock: - conn = self._get_connection() - index_path_str = str(Path(index_path).resolve()) - - import time - last_updated = time.time() - - # Note: direct_files parameter is deprecated but kept for backward compatibility - conn.execute( - """ - INSERT INTO subdirs(name, index_path, files_count, last_updated) - VALUES(?, ?, ?, ?) - ON CONFLICT(name) DO UPDATE SET - index_path=excluded.index_path, - files_count=excluded.files_count, - last_updated=excluded.last_updated - """, - (name, index_path_str, files_count, last_updated), - ) - conn.commit() - - def unregister_subdir(self, name: str) -> bool: - """Remove a subdirectory link. - - Args: - name: Subdirectory name - - Returns: - True if removed, False if not found - """ - with self._lock: - conn = self._get_connection() - row = conn.execute("SELECT id FROM subdirs WHERE name=?", (name,)).fetchone() - if not row: - return False - - conn.execute("DELETE FROM subdirs WHERE name=?", (name,)) - conn.commit() - return True - - def get_subdirs(self) -> List[SubdirLink]: - """Get all subdirectory links. - - Returns: - List of SubdirLink objects - """ - with self._lock: - conn = self._get_connection() - rows = conn.execute( - """ - SELECT id, name, index_path, files_count, last_updated - FROM subdirs - ORDER BY name - """ - ).fetchall() - - return [ - SubdirLink( - id=int(row["id"]), - name=row["name"], - index_path=Path(row["index_path"]), - files_count=int(row["files_count"]) if row["files_count"] else 0, - last_updated=float(row["last_updated"]) if row["last_updated"] else 0.0, - ) - for row in rows - ] - - def get_subdir(self, name: str) -> Optional[SubdirLink]: - """Get a specific subdirectory link. - - Args: - name: Subdirectory name - - Returns: - SubdirLink if found, None otherwise - """ - with self._lock: - conn = self._get_connection() - row = conn.execute( - """ - SELECT id, name, index_path, files_count, last_updated - FROM subdirs WHERE name=? - """, - (name,), - ).fetchone() - - if not row: - return None - - return SubdirLink( - id=int(row["id"]), - name=row["name"], - index_path=Path(row["index_path"]), - files_count=int(row["files_count"]) if row["files_count"] else 0, - last_updated=float(row["last_updated"]) if row["last_updated"] else 0.0, - ) - - def update_subdir_stats( - self, name: str, files_count: int, direct_files: Optional[int] = None - ) -> None: - """Update subdirectory statistics. - - Args: - name: Subdirectory name - files_count: Total files recursively - direct_files: Deprecated parameter (no longer used) - """ - with self._lock: - conn = self._get_connection() - import time - last_updated = time.time() - - # Note: direct_files parameter is deprecated but kept for backward compatibility - conn.execute( - """ - UPDATE subdirs - SET files_count=?, last_updated=? - WHERE name=? - """, - (files_count, last_updated, name), - ) - conn.commit() - - # === Search === - - @staticmethod - def _enhance_fts_query(query: str) -> str: - """Enhance FTS5 query to support prefix matching for simple queries. - - For simple single-word or multi-word queries without FTS5 operators, - automatically adds prefix wildcard (*) to enable partial matching. - - Examples: - "loadPack" -> "loadPack*" - "load package" -> "load* package*" - "load*" -> "load*" (already has wildcard, unchanged) - "NOT test" -> "NOT test" (has FTS operator, unchanged) - - Args: - query: Original FTS5 query string - - Returns: - Enhanced query string with prefix wildcards for simple queries - """ - # Don't modify if query already contains FTS5 operators or wildcards - if any(op in query.upper() for op in [' AND ', ' OR ', ' NOT ', ' NEAR ', '*', '"']): - return query - - # For simple queries, add prefix wildcard to each word - words = query.split() - enhanced_words = [f"{word}*" if not word.endswith('*') else word for word in words] - return ' '.join(enhanced_words) - - def _find_match_lines(self, content: str, query: str) -> List[int]: - """Find line numbers where query terms match. - - Args: - content: File content - query: Search query (FTS5 format) - - Returns: - List of 1-based line numbers containing matches - """ - # Extract search terms from FTS query (remove operators) - terms = re.findall(r'["\']([^"\']+)["\']|(\w+)', query) - search_terms = [t[0] or t[1] for t in terms if t[0] or t[1]] - # Filter out FTS operators - fts_operators = {'AND', 'OR', 'NOT', 'NEAR'} - search_terms = [t for t in search_terms if t.upper() not in fts_operators] - - if not search_terms: - return [1] # Default to first line - - lines = content.split('\n') - match_lines = [] - - for i, line in enumerate(lines, 1): - line_lower = line.lower() - for term in search_terms: - # Handle wildcard suffix - term_clean = term.rstrip('*').lower() - if term_clean and term_clean in line_lower: - match_lines.append(i) - break - - return match_lines if match_lines else [1] - - def _find_containing_symbol( - self, conn: sqlite3.Connection, file_id: int, line_num: int - ) -> Optional[Tuple[int, int, str, str]]: - """Find the symbol that contains the given line number. - - Args: - conn: Database connection - file_id: File ID in database - line_num: 1-based line number - - Returns: - Tuple of (start_line, end_line, symbol_name, symbol_kind) or None - """ - row = conn.execute( - """ - SELECT start_line, end_line, name, kind - FROM symbols - WHERE file_id = ? AND start_line <= ? AND end_line >= ? - ORDER BY (end_line - start_line) ASC - LIMIT 1 - """, - (file_id, line_num, line_num), - ).fetchone() - - if row: - return (row["start_line"], row["end_line"], row["name"], row["kind"]) - return None - - def _extract_code_block( - self, - content: str, - start_line: int, - end_line: int, - match_line: Optional[int] = None, - context_lines: int = 5, - ) -> Tuple[str, int, int]: - """Extract code block from content. - - If start_line/end_line are provided (from symbol), use them. - Otherwise, extract context around match_line. - - Args: - content: Full file content - start_line: 1-based start line (from symbol or calculated) - end_line: 1-based end line (from symbol or calculated) - match_line: 1-based line where match occurred (for context extraction) - context_lines: Number of lines before/after match when no symbol - - Returns: - Tuple of (code_block, actual_start_line, actual_end_line) - """ - lines = content.split('\n') - total_lines = len(lines) - - # Clamp to valid range - start_line = max(1, start_line) - end_line = min(total_lines, end_line) - - # Extract block (convert to 0-based index) - block_lines = lines[start_line - 1:end_line] - block_content = '\n'.join(block_lines) - - return block_content, start_line, end_line - - def _batch_fetch_symbols( - self, conn: sqlite3.Connection, file_ids: List[int] - ) -> Dict[int, List[Tuple[int, int, str, str]]]: - """Batch fetch all symbols for multiple files in a single query. - - Args: - conn: Database connection - file_ids: List of file IDs to fetch symbols for - - Returns: - Dictionary mapping file_id to list of (start_line, end_line, name, kind) tuples - """ - if not file_ids: - return {} - - # Build placeholder string for IN clause - placeholders = ','.join('?' for _ in file_ids) - rows = conn.execute( - f""" - SELECT file_id, start_line, end_line, name, kind - FROM symbols - WHERE file_id IN ({placeholders}) - ORDER BY file_id, (end_line - start_line) ASC - """, - file_ids, - ).fetchall() - - # Organize symbols by file_id - symbols_by_file: Dict[int, List[Tuple[int, int, str, str]]] = {fid: [] for fid in file_ids} - for row in rows: - symbols_by_file[row["file_id"]].append( - (row["start_line"], row["end_line"], row["name"], row["kind"]) - ) - return symbols_by_file - - def _find_containing_symbol_from_cache( - self, symbols: List[Tuple[int, int, str, str]], line_num: int - ) -> Optional[Tuple[int, int, str, str]]: - """Find the smallest symbol containing the given line number from cached symbols. - - Args: - symbols: List of (start_line, end_line, name, kind) tuples, sorted by size - line_num: 1-based line number - - Returns: - Tuple of (start_line, end_line, symbol_name, symbol_kind) or None - """ - for start_line, end_line, name, kind in symbols: - if start_line <= line_num <= end_line: - return (start_line, end_line, name, kind) - return None - - def _generate_centered_excerpt( - self, content: str, match_line: int, start_line: int, end_line: int, max_chars: int = 200 - ) -> str: - """Generate excerpt centered around the match line. - - Args: - content: Full file content - match_line: 1-based line where match occurred - start_line: 1-based start line of the code block - end_line: 1-based end line of the code block - max_chars: Maximum characters for excerpt - - Returns: - Excerpt string centered around the match - """ - lines = content.split('\n') - total_lines = len(lines) - - # Ensure match_line is within bounds - match_line = max(1, min(match_line, total_lines)) - - # Calculate context window (2 lines before, 2 lines after the match) - ctx_start = max(start_line, match_line - 2) - ctx_end = min(end_line, match_line + 2) - - # Extract and join lines - excerpt_lines = lines[ctx_start - 1:ctx_end] - excerpt = '\n'.join(excerpt_lines) - - # Truncate if too long - if len(excerpt) > max_chars: - excerpt = excerpt[:max_chars] + "..." - - return excerpt - - def _search_internal( - self, - query: str, - fts_table: str, - limit: int = 20, - return_full_content: bool = False, - context_lines: int = 10, - ) -> List[SearchResult]: - """Internal unified search implementation for all FTS modes. - - Optimizations: - - Fast path: Direct FTS query with snippet() for location-only results - - Full content path: Batch fetch symbols to eliminate N+1 queries - - Centered excerpt generation for better context - - Args: - query: FTS5 query string - fts_table: FTS table name ('files_fts_exact' or 'files_fts_fuzzy') - limit: Maximum results to return - return_full_content: If True, include full code block in content field - context_lines: Lines of context when no symbol contains the match - - Returns: - List of SearchResult objects - """ - with self._lock: - conn = self._get_connection() - - # Fast path: location-only results (no content processing) - if not return_full_content: - try: - rows = conn.execute( - f""" - SELECT rowid, full_path, bm25({fts_table}) AS rank, - snippet({fts_table}, 2, '', '', '...', 30) AS excerpt - FROM {fts_table} - WHERE {fts_table} MATCH ? - ORDER BY rank - LIMIT ? - """, - (query, limit), - ).fetchall() - except sqlite3.DatabaseError as exc: - raise StorageError(f"FTS search failed: {exc}") from exc - - results: List[SearchResult] = [] - for row in rows: - rank = float(row["rank"]) if row["rank"] is not None else 0.0 - score = abs(rank) if rank < 0 else 0.0 - results.append( - SearchResult( - path=row["full_path"], - score=score, - excerpt=row["excerpt"], - ) - ) - return results - - # Full content path with batch optimization - # Step 1: Get file_ids and ranks (lightweight query) - try: - id_rows = conn.execute( - f""" - SELECT rowid AS file_id, bm25({fts_table}) AS rank - FROM {fts_table} - WHERE {fts_table} MATCH ? - ORDER BY rank - LIMIT ? - """, - (query, limit), - ).fetchall() - except sqlite3.DatabaseError as exc: - raise StorageError(f"FTS search failed: {exc}") from exc - - if not id_rows: - return [] - - file_ids = [row["file_id"] for row in id_rows] - ranks_by_id = {row["file_id"]: row["rank"] for row in id_rows} - - # Step 2: Batch fetch all symbols for matched files (eliminates N+1) - symbols_by_file = self._batch_fetch_symbols(conn, file_ids) - - # Step 3: Process each file on-demand (reduces memory) - results: List[SearchResult] = [] - for file_id in file_ids: - # Fetch file content on-demand - file_row = conn.execute( - "SELECT full_path, content FROM files WHERE id = ?", - (file_id,), - ).fetchone() - - if not file_row: - continue - - file_path = file_row["full_path"] - content = file_row["content"] or "" - rank = ranks_by_id.get(file_id, 0.0) - score = abs(rank) if rank < 0 else 0.0 - - # Find matching lines - match_lines = self._find_match_lines(content, query) - first_match_line = match_lines[0] if match_lines else 1 - - # Find symbol from cached symbols (no extra SQL query) - file_symbols = symbols_by_file.get(file_id, []) - symbol_info = self._find_containing_symbol_from_cache(file_symbols, first_match_line) - - if symbol_info: - start_line, end_line, symbol_name, symbol_kind = symbol_info - else: - # No symbol found, use context around match - lines = content.split('\n') - total_lines = len(lines) - start_line = max(1, first_match_line - context_lines) - end_line = min(total_lines, first_match_line + context_lines) - symbol_name = None - symbol_kind = None - - # Extract code block - block_content, start_line, end_line = self._extract_code_block( - content, start_line, end_line - ) - - # Generate centered excerpt (improved quality) - excerpt = self._generate_centered_excerpt( - content, first_match_line, start_line, end_line - ) - - results.append( - SearchResult( - path=file_path, - score=score, - excerpt=excerpt, - content=block_content, - start_line=start_line, - end_line=end_line, - symbol_name=symbol_name, - symbol_kind=symbol_kind, - ) - ) - return results - - - def search_fts( - self, - query: str, - limit: int = 20, - enhance_query: bool = False, - return_full_content: bool = False, - context_lines: int = 10, - ) -> List[SearchResult]: - """Full-text search in current directory files. - - Uses files_fts_exact (unicode61 tokenizer) for exact token matching. - For fuzzy/substring search, use search_fts_fuzzy() instead. - - Best Practice (from industry analysis of Codanna/Code-Index-MCP): - - Default: Respects exact user input without modification - - Users can manually add wildcards (e.g., "loadPack*") for prefix matching - - Automatic enhancement (enhance_query=True) is NOT recommended as it can - violate user intent and bring unwanted noise in results - - Args: - query: FTS5 query string - limit: Maximum results to return - enhance_query: If True, automatically add prefix wildcards for simple queries. - Default False to respect exact user input. - return_full_content: If True, include full code block in content field. - Default False for fast location-only results. - context_lines: Lines of context when no symbol contains the match - - Returns: - List of SearchResult objects (location-only by default, with content if requested) - - Raises: - StorageError: If FTS search fails - """ - final_query = self._enhance_fts_query(query) if enhance_query else query - return self._search_internal( - query=final_query, - fts_table='files_fts_exact', - limit=limit, - return_full_content=return_full_content, - context_lines=context_lines, - ) - - def search_fts_exact( - self, - query: str, - limit: int = 20, - return_full_content: bool = False, - context_lines: int = 10, - ) -> List[SearchResult]: - """Full-text search using exact token matching. - - Args: - query: FTS5 query string - limit: Maximum results to return - return_full_content: If True, include full code block in content field. - Default False for fast location-only results. - context_lines: Lines of context when no symbol contains the match - - Returns: - List of SearchResult objects (location-only by default, with content if requested) - - Raises: - StorageError: If FTS search fails - """ - return self._search_internal( - query=query, - fts_table='files_fts_exact', - limit=limit, - return_full_content=return_full_content, - context_lines=context_lines, - ) - - def search_fts_fuzzy( - self, - query: str, - limit: int = 20, - return_full_content: bool = False, - context_lines: int = 10, - ) -> List[SearchResult]: - """Full-text search using fuzzy/substring matching. - - Args: - query: FTS5 query string - limit: Maximum results to return - return_full_content: If True, include full code block in content field. - Default False for fast location-only results. - context_lines: Lines of context when no symbol contains the match - - Returns: - List of SearchResult objects (location-only by default, with content if requested) - - Raises: - StorageError: If FTS search fails - """ - return self._search_internal( - query=query, - fts_table='files_fts_fuzzy', - limit=limit, - return_full_content=return_full_content, - context_lines=context_lines, - ) - - def search_files_only(self, query: str, limit: int = 20) -> List[str]: - """Fast FTS search returning only file paths (no snippet generation). - - Optimized for when only file paths are needed, skipping expensive - snippet() function call. - - Args: - query: FTS5 query string - limit: Maximum results to return - - Returns: - List of file paths as strings - - Raises: - StorageError: If FTS search fails - """ - with self._lock: - conn = self._get_connection() - try: - rows = conn.execute( - """ - SELECT full_path - FROM files_fts - WHERE files_fts MATCH ? - ORDER BY bm25(files_fts) - LIMIT ? - """, - (query, limit), - ).fetchall() - except sqlite3.DatabaseError as exc: - raise StorageError(f"FTS search failed: {exc}") from exc - - return [row["full_path"] for row in rows] - - def search_symbols( - self, name: str, kind: Optional[str] = None, limit: int = 50, prefix_mode: bool = True - ) -> List[Symbol]: - """Search symbols by name pattern. - - Args: - name: Symbol name pattern - kind: Optional symbol kind filter - limit: Maximum results to return - prefix_mode: If True, use prefix search (faster with index); - If False, use substring search (slower) - - Returns: - List of Symbol objects - """ - # Prefix search is much faster as it can use index - if prefix_mode: - pattern = f"{name}%" - else: - pattern = f"%{name}%" - - with self._lock: - conn = self._get_connection() - if kind: - rows = conn.execute( - """ - SELECT s.name, s.kind, s.start_line, s.end_line, f.full_path - FROM symbols s - JOIN files f ON s.file_id = f.id - WHERE s.name LIKE ? AND s.kind=? - ORDER BY s.name - LIMIT ? - """, - (pattern, kind, limit), - ).fetchall() - else: - rows = conn.execute( - """ - SELECT s.name, s.kind, s.start_line, s.end_line, f.full_path - FROM symbols s - JOIN files f ON s.file_id = f.id - WHERE s.name LIKE ? - ORDER BY s.name - LIMIT ? - """, - (pattern, limit), - ).fetchall() - - return [ - Symbol( - name=row["name"], - kind=row["kind"], - range=(row["start_line"], row["end_line"]), - file=row["full_path"], - ) - for row in rows - ] - - def get_file_symbols(self, file_path: str | Path) -> List[Symbol]: - """Get all symbols in a specific file, sorted by start_line. - - Args: - file_path: Full path to the file - - Returns: - List of Symbol objects sorted by start_line - """ - file_path_str = str(Path(file_path).resolve()) - - with self._lock: - conn = self._get_connection() - # First get the file_id - file_row = conn.execute( - "SELECT id FROM files WHERE full_path=?", - (file_path_str,), - ).fetchone() - - if not file_row: - return [] - - file_id = int(file_row["id"]) - - rows = conn.execute( - """ - SELECT s.name, s.kind, s.start_line, s.end_line - FROM symbols s - WHERE s.file_id=? - ORDER BY s.start_line - """, - (file_id,), - ).fetchall() - - return [ - Symbol( - name=row["name"], - kind=row["kind"], - range=(row["start_line"], row["end_line"]), - file=file_path_str, - ) - for row in rows - ] - - def get_outgoing_calls( - self, - file_path: str | Path, - symbol_name: Optional[str] = None, - ) -> List[Tuple[str, str, int, Optional[str]]]: - """Get outgoing calls from symbols in a file. - - Queries code_relationships table for calls originating from symbols - in the specified file. - - Args: - file_path: Full path to the source file - symbol_name: Optional symbol name to filter by. If None, returns - calls from all symbols in the file. - - Returns: - List of tuples: (target_name, relationship_type, source_line, target_file) - - target_name: Qualified name of the call target - - relationship_type: Type of relationship (e.g., "calls", "imports") - - source_line: Line number where the call occurs - - target_file: Target file path (may be None if unknown) - """ - file_path_str = str(Path(file_path).resolve()) - - with self._lock: - conn = self._get_connection() - # First get the file_id - file_row = conn.execute( - "SELECT id FROM files WHERE full_path=?", - (file_path_str,), - ).fetchone() - - if not file_row: - return [] - - file_id = int(file_row["id"]) - - if symbol_name: - rows = conn.execute( - """ - SELECT cr.target_qualified_name, cr.relationship_type, - cr.source_line, cr.target_file - FROM code_relationships cr - JOIN symbols s ON s.id = cr.source_symbol_id - WHERE s.file_id=? AND s.name=? - ORDER BY cr.source_line - """, - (file_id, symbol_name), - ).fetchall() - else: - rows = conn.execute( - """ - SELECT cr.target_qualified_name, cr.relationship_type, - cr.source_line, cr.target_file - FROM code_relationships cr - JOIN symbols s ON s.id = cr.source_symbol_id - WHERE s.file_id=? - ORDER BY cr.source_line - """, - (file_id,), - ).fetchall() - - return [ - ( - row["target_qualified_name"], - row["relationship_type"], - int(row["source_line"]), - row["target_file"], - ) - for row in rows - ] - - def get_incoming_calls( - self, - target_name: str, - limit: int = 100, - ) -> List[Tuple[str, str, int, str]]: - """Get incoming calls/references to a target symbol. - - Queries code_relationships table for references to the specified - target symbol name. - - Args: - target_name: Name of the target symbol to find references for. - Matches against target_qualified_name (exact match, - suffix match, or contains match). - limit: Maximum number of results to return - - Returns: - List of tuples: (source_symbol_name, relationship_type, source_line, source_file) - - source_symbol_name: Name of the calling symbol - - relationship_type: Type of relationship (e.g., "calls", "imports") - - source_line: Line number where the call occurs - - source_file: Full path to the source file - """ - with self._lock: - conn = self._get_connection() - rows = conn.execute( - """ - SELECT s.name AS source_name, cr.relationship_type, - cr.source_line, f.full_path AS source_file - FROM code_relationships cr - JOIN symbols s ON s.id = cr.source_symbol_id - JOIN files f ON f.id = s.file_id - WHERE cr.target_qualified_name = ? - OR cr.target_qualified_name LIKE ? - OR cr.target_qualified_name LIKE ? - ORDER BY f.full_path, cr.source_line - LIMIT ? - """, - ( - target_name, - f"%.{target_name}", - f"%{target_name}", - limit, - ), - ).fetchall() - - return [ - ( - row["source_name"], - row["relationship_type"], - int(row["source_line"]), - row["source_file"], - ) - for row in rows - ] - - # === Statistics === - - def stats(self) -> Dict[str, Any]: - """Get current directory statistics. - - Returns: - Dictionary containing: - - files: Number of files in this directory - - symbols: Number of symbols - - subdirs: Number of subdirectories - - total_files: Total files including subdirectories - - languages: Dictionary of language counts - """ - with self._lock: - conn = self._get_connection() - - file_count = conn.execute("SELECT COUNT(*) AS c FROM files").fetchone()["c"] - symbol_count = conn.execute("SELECT COUNT(*) AS c FROM symbols").fetchone()["c"] - subdir_count = conn.execute("SELECT COUNT(*) AS c FROM subdirs").fetchone()["c"] - - total_files_row = conn.execute( - "SELECT COALESCE(SUM(files_count), 0) AS total FROM subdirs" - ).fetchone() - total_files = int(file_count) + int(total_files_row["total"] if total_files_row else 0) - - lang_rows = conn.execute( - "SELECT language, COUNT(*) AS c FROM files GROUP BY language ORDER BY c DESC" - ).fetchall() - languages = {row["language"]: int(row["c"]) for row in lang_rows} - - return { - "files": int(file_count), - "symbols": int(symbol_count), - "subdirs": int(subdir_count), - "total_files": total_files, - "languages": languages, - } - - # === Internal Methods === - - def _get_connection(self) -> sqlite3.Connection: - """Get or create database connection with proper configuration. - - Returns: - sqlite3.Connection with WAL mode and foreign keys enabled - """ - if self._conn is None: - self._conn = sqlite3.connect(str(self.db_path), check_same_thread=False) - self._conn.row_factory = sqlite3.Row - self._conn.execute("PRAGMA journal_mode=WAL") - self._conn.execute("PRAGMA synchronous=NORMAL") - self._conn.execute("PRAGMA foreign_keys=ON") - # Memory-mapped I/O for faster reads (30GB limit) - self._conn.execute("PRAGMA mmap_size=30000000000") - return self._conn - - def _maybe_update_global_symbols(self, file_path: str, symbols: List[Symbol]) -> None: - if self._global_index is None: - return - if self._config is not None and not getattr(self._config, "global_symbol_index_enabled", True): - return - try: - self._global_index.update_file_symbols( - file_path=file_path, - symbols=symbols, - index_path=str(self.db_path), - ) - except Exception as exc: - # Global index is an optimization; local directory index remains authoritative. - self.logger.debug("Global symbol index update failed for %s: %s", file_path, exc) - - def _maybe_delete_global_symbols(self, file_path: str) -> None: - if self._global_index is None: - return - if self._config is not None and not getattr(self._config, "global_symbol_index_enabled", True): - return - try: - self._global_index.delete_file_symbols(file_path) - except Exception as exc: - self.logger.debug("Global symbol index delete failed for %s: %s", file_path, exc) - - def _create_schema(self, conn: sqlite3.Connection) -> None: - """Create database schema. - - Args: - conn: Database connection - - Raises: - StorageError: If schema creation fails - """ - try: - # Files table - conn.execute( - """ - CREATE TABLE IF NOT EXISTS files ( - id INTEGER PRIMARY KEY, - name TEXT NOT NULL, - full_path TEXT UNIQUE NOT NULL, - language TEXT, - content TEXT, - mtime REAL, - line_count INTEGER - ) - """ - ) - - # Subdirectories table (v5: removed direct_files) - conn.execute( - """ - CREATE TABLE IF NOT EXISTS subdirs ( - id INTEGER PRIMARY KEY, - name TEXT NOT NULL UNIQUE, - index_path TEXT NOT NULL, - files_count INTEGER DEFAULT 0, - last_updated REAL - ) - """ - ) - - # Symbols table with token metadata - conn.execute( - """ - CREATE TABLE IF NOT EXISTS symbols ( - id INTEGER PRIMARY KEY, - file_id INTEGER REFERENCES files(id) ON DELETE CASCADE, - name TEXT NOT NULL, - kind TEXT NOT NULL, - start_line INTEGER, - end_line INTEGER - ) - """ - ) - - # Dual FTS5 external content tables for exact and fuzzy matching - # files_fts_exact: unicode61 tokenizer for exact token matching - # files_fts_fuzzy: trigram tokenizer (or extended unicode61) for substring/fuzzy matching - from codexlens.storage.sqlite_utils import check_trigram_support - - has_trigram = check_trigram_support(conn) - fuzzy_tokenizer = "trigram" if has_trigram else "unicode61 tokenchars '_-.'" - - # Exact FTS table with unicode61 tokenizer - # Note: tokenchars includes '.' to properly tokenize qualified names like PortRole.FLOW - conn.execute( - """ - CREATE VIRTUAL TABLE IF NOT EXISTS files_fts_exact USING fts5( - name, full_path UNINDEXED, content, - content='files', - content_rowid='id', - tokenize="unicode61 tokenchars '_-.'" - ) - """ - ) - - # Fuzzy FTS table with trigram or extended unicode61 tokenizer - conn.execute( - f""" - CREATE VIRTUAL TABLE IF NOT EXISTS files_fts_fuzzy USING fts5( - name, full_path UNINDEXED, content, - content='files', - content_rowid='id', - tokenize="{fuzzy_tokenizer}" - ) - """ - ) - - # Semantic metadata table (v5: removed keywords column) - conn.execute( - """ - CREATE TABLE IF NOT EXISTS semantic_metadata ( - id INTEGER PRIMARY KEY, - file_id INTEGER UNIQUE REFERENCES files(id) ON DELETE CASCADE, - summary TEXT, - purpose TEXT, - llm_tool TEXT, - generated_at REAL - ) - """ - ) - - # Normalized keywords tables for performance - conn.execute( - """ - CREATE TABLE IF NOT EXISTS keywords ( - id INTEGER PRIMARY KEY, - keyword TEXT NOT NULL UNIQUE - ) - """ - ) - - conn.execute( - """ - CREATE TABLE IF NOT EXISTS file_keywords ( - file_id INTEGER NOT NULL, - keyword_id INTEGER NOT NULL, - PRIMARY KEY (file_id, keyword_id), - FOREIGN KEY (file_id) REFERENCES files (id) ON DELETE CASCADE, - FOREIGN KEY (keyword_id) REFERENCES keywords (id) ON DELETE CASCADE - ) - """ - ) - - # Code relationships table for graph visualization - conn.execute( - """ - CREATE TABLE IF NOT EXISTS code_relationships ( - id INTEGER PRIMARY KEY, - source_symbol_id INTEGER NOT NULL, - target_qualified_name TEXT NOT NULL, - relationship_type TEXT NOT NULL, - source_line INTEGER NOT NULL, - target_file TEXT, - FOREIGN KEY (source_symbol_id) REFERENCES symbols (id) ON DELETE CASCADE - ) - """ - ) - - # Precomputed graph neighbors cache for search expansion (v7) - conn.execute( - """ - CREATE TABLE IF NOT EXISTS graph_neighbors ( - source_symbol_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE, - neighbor_symbol_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE, - relationship_depth INTEGER NOT NULL, - PRIMARY KEY (source_symbol_id, neighbor_symbol_id) - ) - """ - ) - - # Merkle hashes for incremental change detection (v8) - conn.execute( - """ - CREATE TABLE IF NOT EXISTS merkle_hashes ( - file_id INTEGER PRIMARY KEY REFERENCES files(id) ON DELETE CASCADE, - sha256 TEXT NOT NULL, - updated_at REAL - ) - """ - ) - - conn.execute( - """ - CREATE TABLE IF NOT EXISTS merkle_state ( - id INTEGER PRIMARY KEY CHECK (id = 1), - root_hash TEXT, - updated_at REAL - ) - """ - ) - - # Indexes (v5: removed idx_symbols_type) - conn.execute("CREATE INDEX IF NOT EXISTS idx_files_name ON files(name)") - conn.execute("CREATE INDEX IF NOT EXISTS idx_files_path ON files(full_path)") - conn.execute("CREATE INDEX IF NOT EXISTS idx_subdirs_name ON subdirs(name)") - conn.execute("CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name)") - conn.execute("CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file_id)") - conn.execute("CREATE INDEX IF NOT EXISTS idx_semantic_file ON semantic_metadata(file_id)") - conn.execute("CREATE INDEX IF NOT EXISTS idx_keywords_keyword ON keywords(keyword)") - conn.execute("CREATE INDEX IF NOT EXISTS idx_file_keywords_file_id ON file_keywords(file_id)") - conn.execute("CREATE INDEX IF NOT EXISTS idx_file_keywords_keyword_id ON file_keywords(keyword_id)") - conn.execute("CREATE INDEX IF NOT EXISTS idx_rel_source ON code_relationships(source_symbol_id)") - conn.execute("CREATE INDEX IF NOT EXISTS idx_rel_target ON code_relationships(target_qualified_name)") - conn.execute("CREATE INDEX IF NOT EXISTS idx_rel_type ON code_relationships(relationship_type)") - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_graph_neighbors_source_depth " - "ON graph_neighbors(source_symbol_id, relationship_depth)" - ) - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_graph_neighbors_neighbor " - "ON graph_neighbors(neighbor_symbol_id)" - ) - - except sqlite3.DatabaseError as exc: - raise StorageError(f"Failed to create schema: {exc}") from exc - - def _migrate_v2_add_name_column(self, conn: sqlite3.Connection) -> None: - """Migration v2: Add 'name' column to files table. - - Required for FTS5 external content table. - - Args: - conn: Database connection - """ - # Check if files table exists and has columns - cursor = conn.execute("PRAGMA table_info(files)") - files_columns = {row[1] for row in cursor.fetchall()} - - if not files_columns: - return # No files table yet, will be created fresh - - # Skip if 'name' column already exists - if "name" in files_columns: - return - - # Add 'name' column with default value - conn.execute("ALTER TABLE files ADD COLUMN name TEXT NOT NULL DEFAULT ''") - - # Populate 'name' column from full_path using pathlib for robustness - rows = conn.execute("SELECT id, full_path FROM files WHERE name = ''").fetchall() - for row in rows: - file_id = row[0] - full_path = row[1] - # Use pathlib.Path.name for cross-platform compatibility - name = Path(full_path).name if full_path else "" - conn.execute("UPDATE files SET name = ? WHERE id = ?", (name, file_id)) - - def _create_fts_triggers(self, conn: sqlite3.Connection) -> None: - """Create FTS5 external content triggers for dual FTS tables. - - Creates synchronized triggers for both files_fts_exact and files_fts_fuzzy tables. - - Args: - conn: Database connection - """ - # Insert triggers for files_fts_exact - conn.execute( - """ - CREATE TRIGGER IF NOT EXISTS files_exact_ai AFTER INSERT ON files BEGIN - INSERT INTO files_fts_exact(rowid, name, full_path, content) - VALUES(new.id, new.name, new.full_path, new.content); - END - """ - ) - - # Delete trigger for files_fts_exact - conn.execute( - """ - CREATE TRIGGER IF NOT EXISTS files_exact_ad AFTER DELETE ON files BEGIN - INSERT INTO files_fts_exact(files_fts_exact, rowid, name, full_path, content) - VALUES('delete', old.id, old.name, old.full_path, old.content); - END - """ - ) - - # Update trigger for files_fts_exact - conn.execute( - """ - CREATE TRIGGER IF NOT EXISTS files_exact_au AFTER UPDATE ON files BEGIN - INSERT INTO files_fts_exact(files_fts_exact, rowid, name, full_path, content) - VALUES('delete', old.id, old.name, old.full_path, old.content); - INSERT INTO files_fts_exact(rowid, name, full_path, content) - VALUES(new.id, new.name, new.full_path, new.content); - END - """ - ) - - # Insert trigger for files_fts_fuzzy - conn.execute( - """ - CREATE TRIGGER IF NOT EXISTS files_fuzzy_ai AFTER INSERT ON files BEGIN - INSERT INTO files_fts_fuzzy(rowid, name, full_path, content) - VALUES(new.id, new.name, new.full_path, new.content); - END - """ - ) - - # Delete trigger for files_fts_fuzzy - conn.execute( - """ - CREATE TRIGGER IF NOT EXISTS files_fuzzy_ad AFTER DELETE ON files BEGIN - INSERT INTO files_fts_fuzzy(files_fts_fuzzy, rowid, name, full_path, content) - VALUES('delete', old.id, old.name, old.full_path, old.content); - END - """ - ) - - # Update trigger for files_fts_fuzzy - conn.execute( - """ - CREATE TRIGGER IF NOT EXISTS files_fuzzy_au AFTER UPDATE ON files BEGIN - INSERT INTO files_fts_fuzzy(files_fts_fuzzy, rowid, name, full_path, content) - VALUES('delete', old.id, old.name, old.full_path, old.content); - INSERT INTO files_fts_fuzzy(rowid, name, full_path, content) - VALUES(new.id, new.name, new.full_path, new.content); - END - """ - ) diff --git a/codex-lens/src/codexlens/storage/file_cache.py b/codex-lens/src/codexlens/storage/file_cache.py deleted file mode 100644 index b43613d1..00000000 --- a/codex-lens/src/codexlens/storage/file_cache.py +++ /dev/null @@ -1,32 +0,0 @@ -"""Simple filesystem cache helpers.""" - -from __future__ import annotations - -from dataclasses import dataclass -from pathlib import Path -from typing import Optional - - -@dataclass -class FileCache: - """Caches file mtimes for incremental indexing.""" - - cache_path: Path - - def load_mtime(self, path: Path) -> Optional[float]: - try: - key = self._key_for(path) - record = (self.cache_path / key).read_text(encoding="utf-8") - return float(record) - except Exception: - return None - - def store_mtime(self, path: Path, mtime: float) -> None: - self.cache_path.mkdir(parents=True, exist_ok=True) - key = self._key_for(path) - (self.cache_path / key).write_text(str(mtime), encoding="utf-8") - - def _key_for(self, path: Path) -> str: - safe = str(path).replace(":", "_").replace("\\", "_").replace("/", "_") - return f"{safe}.mtime" - diff --git a/codex-lens/src/codexlens/storage/global_index.py b/codex-lens/src/codexlens/storage/global_index.py deleted file mode 100644 index b2d9a453..00000000 --- a/codex-lens/src/codexlens/storage/global_index.py +++ /dev/null @@ -1,618 +0,0 @@ -"""Global cross-directory symbol index for fast lookups. - -Stores symbols for an entire project in a single SQLite database so symbol search -does not require traversing every directory _index.db. - -This index is updated incrementally during file indexing (delete+insert per file) -to avoid expensive batch rebuilds. -""" - -from __future__ import annotations - -import logging -import sqlite3 -import threading -from pathlib import Path -from typing import List, Optional, Tuple - -from codexlens.entities import CodeRelationship, Symbol -from codexlens.errors import StorageError - - -class GlobalSymbolIndex: - """Project-wide symbol index with incremental updates.""" - - SCHEMA_VERSION = 2 - DEFAULT_DB_NAME = "_global_symbols.db" - - def __init__(self, db_path: str | Path, project_id: int) -> None: - self.db_path = Path(db_path).resolve() - self.project_id = int(project_id) - self._lock = threading.RLock() - self._conn: Optional[sqlite3.Connection] = None - self.logger = logging.getLogger(__name__) - - def initialize(self) -> None: - """Create database and schema if not exists.""" - with self._lock: - self.db_path.parent.mkdir(parents=True, exist_ok=True) - conn = self._get_connection() - - current_version = self._get_schema_version(conn) - if current_version > self.SCHEMA_VERSION: - raise StorageError( - f"Database schema version {current_version} is newer than " - f"supported version {self.SCHEMA_VERSION}. " - f"Please update the application or use a compatible database.", - db_path=str(self.db_path), - operation="initialize", - details={ - "current_version": current_version, - "supported_version": self.SCHEMA_VERSION, - }, - ) - - if current_version == 0: - self._create_schema(conn) - self._set_schema_version(conn, self.SCHEMA_VERSION) - elif current_version < self.SCHEMA_VERSION: - self._apply_migrations(conn, current_version) - self._set_schema_version(conn, self.SCHEMA_VERSION) - - conn.commit() - - def close(self) -> None: - """Close database connection.""" - with self._lock: - if self._conn is not None: - try: - self._conn.close() - except Exception: - pass - finally: - self._conn = None - - def __enter__(self) -> "GlobalSymbolIndex": - self.initialize() - return self - - def __exit__(self, exc_type: object, exc: object, tb: object) -> None: - self.close() - - def add_symbol(self, symbol: Symbol, file_path: str | Path, index_path: str | Path) -> None: - """Insert a single symbol (idempotent) for incremental updates.""" - file_path_str = str(Path(file_path).resolve()) - index_path_str = str(Path(index_path).resolve()) - - with self._lock: - conn = self._get_connection() - try: - conn.execute( - """ - INSERT INTO global_symbols( - project_id, symbol_name, symbol_kind, - file_path, start_line, end_line, index_path - ) - VALUES(?, ?, ?, ?, ?, ?, ?) - ON CONFLICT( - project_id, symbol_name, symbol_kind, - file_path, start_line, end_line - ) - DO UPDATE SET - index_path=excluded.index_path - """, - ( - self.project_id, - symbol.name, - symbol.kind, - file_path_str, - symbol.range[0], - symbol.range[1], - index_path_str, - ), - ) - conn.commit() - except sqlite3.DatabaseError as exc: - conn.rollback() - raise StorageError( - f"Failed to add symbol {symbol.name}: {exc}", - db_path=str(self.db_path), - operation="add_symbol", - ) from exc - - def update_file_symbols( - self, - file_path: str | Path, - symbols: List[Symbol], - index_path: str | Path | None = None, - ) -> None: - """Replace all symbols for a file atomically (delete + insert).""" - file_path_str = str(Path(file_path).resolve()) - - index_path_str: Optional[str] - if index_path is not None: - index_path_str = str(Path(index_path).resolve()) - else: - index_path_str = self._get_existing_index_path(file_path_str) - - with self._lock: - conn = self._get_connection() - try: - conn.execute("BEGIN") - conn.execute( - "DELETE FROM global_symbols WHERE project_id=? AND file_path=?", - (self.project_id, file_path_str), - ) - - if symbols: - if not index_path_str: - raise StorageError( - "index_path is required when inserting symbols for a new file", - db_path=str(self.db_path), - operation="update_file_symbols", - details={"file_path": file_path_str}, - ) - - rows = [ - ( - self.project_id, - s.name, - s.kind, - file_path_str, - s.range[0], - s.range[1], - index_path_str, - ) - for s in symbols - ] - conn.executemany( - """ - INSERT INTO global_symbols( - project_id, symbol_name, symbol_kind, - file_path, start_line, end_line, index_path - ) - VALUES(?, ?, ?, ?, ?, ?, ?) - ON CONFLICT( - project_id, symbol_name, symbol_kind, - file_path, start_line, end_line - ) - DO UPDATE SET - index_path=excluded.index_path - """, - rows, - ) - - conn.commit() - except sqlite3.DatabaseError as exc: - conn.rollback() - raise StorageError( - f"Failed to update symbols for {file_path_str}: {exc}", - db_path=str(self.db_path), - operation="update_file_symbols", - ) from exc - - def delete_file_symbols(self, file_path: str | Path) -> int: - """Remove all symbols for a file. Returns number of rows deleted.""" - file_path_str = str(Path(file_path).resolve()) - with self._lock: - conn = self._get_connection() - try: - cur = conn.execute( - "DELETE FROM global_symbols WHERE project_id=? AND file_path=?", - (self.project_id, file_path_str), - ) - conn.commit() - return int(cur.rowcount or 0) - except sqlite3.DatabaseError as exc: - conn.rollback() - raise StorageError( - f"Failed to delete symbols for {file_path_str}: {exc}", - db_path=str(self.db_path), - operation="delete_file_symbols", - ) from exc - - def search( - self, - name: str, - kind: Optional[str] = None, - limit: int = 50, - prefix_mode: bool = True, - ) -> List[Symbol]: - """Search symbols and return full Symbol objects.""" - if prefix_mode: - pattern = f"{name}%" - else: - pattern = f"%{name}%" - - with self._lock: - conn = self._get_connection() - if kind: - rows = conn.execute( - """ - SELECT symbol_name, symbol_kind, file_path, start_line, end_line - FROM global_symbols - WHERE project_id=? AND symbol_name LIKE ? AND symbol_kind=? - ORDER BY symbol_name - LIMIT ? - """, - (self.project_id, pattern, kind, limit), - ).fetchall() - else: - rows = conn.execute( - """ - SELECT symbol_name, symbol_kind, file_path, start_line, end_line - FROM global_symbols - WHERE project_id=? AND symbol_name LIKE ? - ORDER BY symbol_name - LIMIT ? - """, - (self.project_id, pattern, limit), - ).fetchall() - - return [ - Symbol( - name=row["symbol_name"], - kind=row["symbol_kind"], - range=(row["start_line"], row["end_line"]), - file=row["file_path"], - ) - for row in rows - ] - - def search_symbols( - self, - name: str, - kind: Optional[str] = None, - limit: int = 50, - prefix_mode: bool = True, - ) -> List[Tuple[str, Tuple[int, int]]]: - """Search symbols and return only (file_path, (start_line, end_line)).""" - symbols = self.search(name=name, kind=kind, limit=limit, prefix_mode=prefix_mode) - return [(s.file or "", s.range) for s in symbols] - - def get_file_symbols(self, file_path: str | Path) -> List[Symbol]: - """Get all symbols in a specific file, sorted by start_line. - - Args: - file_path: Full path to the file - - Returns: - List of Symbol objects sorted by start_line - """ - file_path_str = str(Path(file_path).resolve()) - - with self._lock: - conn = self._get_connection() - rows = conn.execute( - """ - SELECT symbol_name, symbol_kind, file_path, start_line, end_line - FROM global_symbols - WHERE project_id=? AND file_path=? - ORDER BY start_line - """, - (self.project_id, file_path_str), - ).fetchall() - - return [ - Symbol( - name=row["symbol_name"], - kind=row["symbol_kind"], - range=(row["start_line"], row["end_line"]), - file=row["file_path"], - ) - for row in rows - ] - - # ------------------------------------------------------------------ - # Relationship CRUD - # ------------------------------------------------------------------ - - def update_file_relationships( - self, - file_path: str | Path, - relationships: List[CodeRelationship], - ) -> None: - """Replace all relationships for a file atomically (delete + insert). - - Uses the same delete-then-insert pattern as ``update_file_symbols``. - The *target_qualified_name* stored in the DB is built from - ``target_file`` (when available) and ``target_symbol`` so that - cross-directory lookups work correctly. - """ - file_path_str = str(Path(file_path).resolve()) - - with self._lock: - conn = self._get_connection() - try: - conn.execute("BEGIN") - conn.execute( - "DELETE FROM global_relationships WHERE project_id=? AND source_file=?", - (self.project_id, file_path_str), - ) - - if relationships: - rows = [ - ( - self.project_id, - file_path_str, - rel.source_symbol, - self._build_qualified_name(rel), - rel.relationship_type.value, - rel.source_line, - ) - for rel in relationships - ] - conn.executemany( - """ - INSERT INTO global_relationships( - project_id, source_file, source_symbol, - target_qualified_name, relationship_type, source_line - ) - VALUES(?, ?, ?, ?, ?, ?) - """, - rows, - ) - - conn.commit() - except sqlite3.DatabaseError as exc: - conn.rollback() - raise StorageError( - f"Failed to update relationships for {file_path_str}: {exc}", - db_path=str(self.db_path), - operation="update_file_relationships", - ) from exc - - def query_by_target( - self, - target_name: str, - limit: int = 50, - prefix_mode: bool = True, - ) -> List[Tuple[str, str, str, int]]: - """Query relationships by target_qualified_name. - - Returns list of ``(source_file, source_symbol, relationship_type, source_line)``. - When *prefix_mode* is True the target_name is matched as a prefix; - otherwise an exact match is required. - """ - if prefix_mode: - pattern = f"{target_name}%" - else: - pattern = target_name - - with self._lock: - conn = self._get_connection() - if prefix_mode: - rows = conn.execute( - """ - SELECT source_file, source_symbol, relationship_type, source_line - FROM global_relationships - WHERE project_id=? AND target_qualified_name LIKE ? - ORDER BY source_file, source_line - LIMIT ? - """, - (self.project_id, pattern, limit), - ).fetchall() - else: - rows = conn.execute( - """ - SELECT source_file, source_symbol, relationship_type, source_line - FROM global_relationships - WHERE project_id=? AND target_qualified_name=? - ORDER BY source_file, source_line - LIMIT ? - """, - (self.project_id, pattern, limit), - ).fetchall() - - return [ - ( - row["source_file"], - row["source_symbol"], - row["relationship_type"], - row["source_line"], - ) - for row in rows - ] - - def query_relationships_for_symbols( - self, - symbol_names: List[str], - limit: int = 100, - ) -> List[sqlite3.Row]: - """Query all relationships involving any of *symbol_names*. - - Matches against both ``source_symbol`` and ``target_qualified_name`` - (the target column is checked with a LIKE ``%name%`` pattern so that - qualified names like ``mod.ClassName`` still match ``ClassName``). - """ - if not symbol_names: - return [] - - with self._lock: - conn = self._get_connection() - # Build WHERE clause: (source_symbol IN (...)) OR (target LIKE ...) - source_placeholders = ",".join("?" for _ in symbol_names) - target_clauses = " OR ".join( - "target_qualified_name LIKE ?" for _ in symbol_names - ) - target_patterns = [f"%{name}" for name in symbol_names] - - sql = f""" - SELECT id, project_id, source_file, source_symbol, - target_qualified_name, relationship_type, source_line - FROM global_relationships - WHERE project_id=? - AND ( - source_symbol IN ({source_placeholders}) - OR ({target_clauses}) - ) - ORDER BY source_file, source_line - LIMIT ? - """ - params: list = [self.project_id, *symbol_names, *target_patterns, limit] - return conn.execute(sql, params).fetchall() - - def delete_file_relationships(self, file_path: str | Path) -> int: - """Remove all relationships for a file. Returns number of rows deleted.""" - file_path_str = str(Path(file_path).resolve()) - with self._lock: - conn = self._get_connection() - try: - cur = conn.execute( - "DELETE FROM global_relationships WHERE project_id=? AND source_file=?", - (self.project_id, file_path_str), - ) - conn.commit() - return int(cur.rowcount or 0) - except sqlite3.DatabaseError as exc: - conn.rollback() - raise StorageError( - f"Failed to delete relationships for {file_path_str}: {exc}", - db_path=str(self.db_path), - operation="delete_file_relationships", - ) from exc - - @staticmethod - def _build_qualified_name(rel: CodeRelationship) -> str: - """Build a qualified name from a CodeRelationship. - - Format: ``::`` when target_file is known, - otherwise just ````. - """ - if rel.target_file: - return f"{rel.target_file}::{rel.target_symbol}" - return rel.target_symbol - - def _get_existing_index_path(self, file_path_str: str) -> Optional[str]: - with self._lock: - conn = self._get_connection() - row = conn.execute( - """ - SELECT index_path - FROM global_symbols - WHERE project_id=? AND file_path=? - LIMIT 1 - """, - (self.project_id, file_path_str), - ).fetchone() - return str(row["index_path"]) if row else None - - def _get_schema_version(self, conn: sqlite3.Connection) -> int: - try: - row = conn.execute("PRAGMA user_version").fetchone() - return int(row[0]) if row else 0 - except Exception: - return 0 - - def _set_schema_version(self, conn: sqlite3.Connection, version: int) -> None: - conn.execute(f"PRAGMA user_version = {int(version)}") - - def _apply_migrations(self, conn: sqlite3.Connection, from_version: int) -> None: - if from_version < 2: - self._migrate_v1_to_v2(conn) - - def _migrate_v1_to_v2(self, conn: sqlite3.Connection) -> None: - """Add global_relationships table for v1 -> v2 migration.""" - try: - self._create_relationships_schema(conn) - except sqlite3.DatabaseError as exc: - raise StorageError( - f"Failed to migrate schema from v1 to v2: {exc}", - db_path=str(self.db_path), - operation="_migrate_v1_to_v2", - ) from exc - - def _get_connection(self) -> sqlite3.Connection: - if self._conn is None: - self._conn = sqlite3.connect(str(self.db_path), check_same_thread=False) - self._conn.row_factory = sqlite3.Row - self._conn.execute("PRAGMA journal_mode=WAL") - self._conn.execute("PRAGMA synchronous=NORMAL") - self._conn.execute("PRAGMA foreign_keys=ON") - self._conn.execute("PRAGMA mmap_size=30000000000") - return self._conn - - def _create_schema(self, conn: sqlite3.Connection) -> None: - try: - conn.execute( - """ - CREATE TABLE IF NOT EXISTS global_symbols ( - id INTEGER PRIMARY KEY, - project_id INTEGER NOT NULL, - symbol_name TEXT NOT NULL, - symbol_kind TEXT NOT NULL, - file_path TEXT NOT NULL, - start_line INTEGER, - end_line INTEGER, - index_path TEXT NOT NULL, - UNIQUE( - project_id, symbol_name, symbol_kind, - file_path, start_line, end_line - ) - ) - """ - ) - - # Required by optimization spec. - conn.execute( - """ - CREATE INDEX IF NOT EXISTS idx_global_symbols_name_kind - ON global_symbols(symbol_name, symbol_kind) - """ - ) - # Used by common queries (project-scoped name lookups). - conn.execute( - """ - CREATE INDEX IF NOT EXISTS idx_global_symbols_project_name_kind - ON global_symbols(project_id, symbol_name, symbol_kind) - """ - ) - conn.execute( - """ - CREATE INDEX IF NOT EXISTS idx_global_symbols_project_file - ON global_symbols(project_id, file_path) - """ - ) - conn.execute( - """ - CREATE INDEX IF NOT EXISTS idx_global_symbols_project_index_path - ON global_symbols(project_id, index_path) - """ - ) - - self._create_relationships_schema(conn) - except sqlite3.DatabaseError as exc: - raise StorageError( - f"Failed to initialize global symbol schema: {exc}", - db_path=str(self.db_path), - operation="_create_schema", - ) from exc - - def _create_relationships_schema(self, conn: sqlite3.Connection) -> None: - """Create the global_relationships table and indexes (idempotent).""" - conn.execute( - """ - CREATE TABLE IF NOT EXISTS global_relationships ( - id INTEGER PRIMARY KEY, - project_id INTEGER NOT NULL, - source_file TEXT NOT NULL, - source_symbol TEXT NOT NULL, - target_qualified_name TEXT NOT NULL, - relationship_type TEXT NOT NULL, - source_line INTEGER NOT NULL - ) - """ - ) - conn.execute( - """ - CREATE INDEX IF NOT EXISTS idx_global_rel_project_target - ON global_relationships(project_id, target_qualified_name) - """ - ) - conn.execute( - """ - CREATE INDEX IF NOT EXISTS idx_global_rel_project_source - ON global_relationships(project_id, source_file) - """ - ) - diff --git a/codex-lens/src/codexlens/storage/index_filters.py b/codex-lens/src/codexlens/storage/index_filters.py deleted file mode 100644 index 4f4a163f..00000000 --- a/codex-lens/src/codexlens/storage/index_filters.py +++ /dev/null @@ -1,47 +0,0 @@ -from __future__ import annotations - -from pathlib import Path -from typing import Iterable, List, Optional, Set - -from codexlens.storage.index_tree import DEFAULT_IGNORE_DIRS - - -EXTRA_IGNORED_INDEX_DIRS = frozenset({".workflow"}) -IGNORED_INDEX_DIRS = frozenset({name.casefold() for name in DEFAULT_IGNORE_DIRS | set(EXTRA_IGNORED_INDEX_DIRS)}) - - -def is_ignored_index_path( - index_path: Path, - scan_root: Path, - *, - ignored_dir_names: Optional[Set[str]] = None, -) -> bool: - """Return True when an index lives under an ignored/generated subtree.""" - - ignored = ( - {name.casefold() for name in ignored_dir_names} - if ignored_dir_names is not None - else IGNORED_INDEX_DIRS - ) - - try: - relative_parts = index_path.resolve().relative_to(scan_root.resolve()).parts[:-1] - except ValueError: - return False - - return any(part.casefold() in ignored for part in relative_parts) - - -def filter_index_paths( - index_paths: Iterable[Path], - scan_root: Path, - *, - ignored_dir_names: Optional[Set[str]] = None, -) -> List[Path]: - """Filter out discovered indexes that belong to ignored/generated subtrees.""" - - return [ - path - for path in index_paths - if not is_ignored_index_path(path, scan_root, ignored_dir_names=ignored_dir_names) - ] diff --git a/codex-lens/src/codexlens/storage/index_tree.py b/codex-lens/src/codexlens/storage/index_tree.py deleted file mode 100644 index 0a7f7894..00000000 --- a/codex-lens/src/codexlens/storage/index_tree.py +++ /dev/null @@ -1,1320 +0,0 @@ -"""Hierarchical index tree builder for CodexLens. - -Constructs a bottom-up directory index tree with parallel processing support. -Each directory maintains its own _index.db with files and subdirectory links. -""" - -from __future__ import annotations - -import fnmatch -import logging -import os -import re -import sqlite3 -import time -from concurrent.futures import ProcessPoolExecutor, as_completed -from dataclasses import dataclass -from pathlib import Path -from typing import Dict, List, Optional, Set, Tuple - -from codexlens.config import Config -from codexlens.parsers.factory import ParserFactory -from codexlens.storage.dir_index import DirIndexStore -from codexlens.storage.global_index import GlobalSymbolIndex -from codexlens.storage.path_mapper import PathMapper -from codexlens.storage.registry import ProjectInfo, RegistryStore - - -DEFAULT_IGNORE_DIRS: Set[str] = { - ".git", - ".svn", - ".hg", - ".venv", - "venv", - "env", - "node_modules", - "bower_components", - "__pycache__", - ".pytest_cache", - ".mypy_cache", - ".ruff_cache", - ".npm", - ".yarn", - ".codexlens", - ".idea", - ".vscode", - ".vs", - ".eclipse", - "dist", - "build", - "out", - "target", - "bin", - "obj", - "_build", - "coverage", - "htmlcov", - ".cache", - ".parcel-cache", - ".turbo", - ".next", - ".nuxt", - "logs", - "tmp", - "temp", -} - - -@dataclass -class BuildResult: - """Complete build operation result.""" - - project_id: int - source_root: Path - index_root: Path - total_files: int - total_dirs: int - errors: List[str] - - -@dataclass -class DirBuildResult: - """Single directory build result.""" - - source_path: Path - index_path: Path - files_count: int - symbols_count: int - subdirs: List[str] # Subdirectory names - error: Optional[str] = None - - -class IndexTreeBuilder: - """Hierarchical index tree builder with parallel processing. - - Builds directory indexes bottom-up to enable proper subdirectory linking. - Each directory gets its own _index.db containing: - - Files in that directory - - Links to child directory indexes - - Symbols and FTS5 search - - Attributes: - registry: Global project registry - mapper: Path mapping between source and index - config: CodexLens configuration - parser_factory: Parser factory for symbol extraction - logger: Logger instance - IGNORE_DIRS: Set of directory names to skip during indexing - """ - - # Directories to skip during indexing - IGNORE_DIRS: Set[str] = DEFAULT_IGNORE_DIRS - - def __init__( - self, registry: RegistryStore, mapper: PathMapper, config: Config = None, incremental: bool = True - ): - """Initialize the index tree builder. - - Args: - registry: Global registry store for project tracking - mapper: Path mapper for source to index conversions - config: CodexLens configuration (uses defaults if None) - incremental: Enable incremental indexing (default True) - """ - self.registry = registry - self.mapper = mapper - self.config = config or Config.load() - self.parser_factory = ParserFactory(self.config) - self.logger = logging.getLogger(__name__) - self.incremental = incremental - self.ignore_patterns = self._resolve_ignore_patterns() - self.extension_filters = self._resolve_extension_filters() - - def _resolve_ignore_patterns(self) -> Tuple[str, ...]: - configured_patterns = getattr(self.config, "ignore_patterns", None) - raw_patterns = configured_patterns if configured_patterns else list(DEFAULT_IGNORE_DIRS) - cleaned: List[str] = [] - for item in raw_patterns: - pattern = str(item).strip().replace('\\', '/').rstrip('/') - if pattern: - cleaned.append(pattern) - return tuple(dict.fromkeys(cleaned)) - - def _resolve_extension_filters(self) -> Tuple[str, ...]: - configured_filters = getattr(self.config, "extension_filters", None) - if not configured_filters: - return tuple() - - cleaned: List[str] = [] - for item in configured_filters: - pattern = str(item).strip().replace('\\', '/').rstrip('/') - if pattern: - cleaned.append(pattern) - return tuple(dict.fromkeys(cleaned)) - - def _is_ignored_dir(self, dir_path: Path, source_root: Optional[Path] = None) -> bool: - name = dir_path.name - if name.startswith('.'): - return True - - rel_path: Optional[str] = None - if source_root is not None: - try: - rel_path = dir_path.relative_to(source_root).as_posix() - except ValueError: - rel_path = None - - for pattern in self.ignore_patterns: - if pattern == name or fnmatch.fnmatch(name, pattern): - return True - if rel_path and (pattern == rel_path or fnmatch.fnmatch(rel_path, pattern)): - return True - - return False - - def _is_filtered_file(self, file_path: Path, source_root: Optional[Path] = None) -> bool: - if not self.extension_filters: - return False - - rel_path: Optional[str] = None - if source_root is not None: - try: - rel_path = file_path.relative_to(source_root).as_posix() - except ValueError: - rel_path = None - - for pattern in self.extension_filters: - if pattern == file_path.name or fnmatch.fnmatch(file_path.name, pattern): - return True - if rel_path and (pattern == rel_path or fnmatch.fnmatch(rel_path, pattern)): - return True - - return False - - def build( - self, - source_root: Path, - languages: List[str] = None, - workers: int = None, - force_full: bool = False, - ) -> BuildResult: - """Build complete index tree for a project. - - Process: - 1. Register project in registry - 2. Collect all directories grouped by depth - 3. Build indexes bottom-up (deepest first) - 4. Link subdirectories to parents - 5. Update project statistics - 6. Cleanup deleted files (if incremental mode) - - Args: - source_root: Project root directory to index - languages: Optional list of language IDs to limit indexing - workers: Number of parallel worker processes - force_full: Force full reindex (override incremental mode) - - Returns: - BuildResult with statistics and errors - - Raises: - ValueError: If source_root doesn't exist - """ - source_root = source_root.resolve() - if not source_root.exists(): - raise ValueError(f"Source root does not exist: {source_root}") - - # Auto-detect optimal worker count if not specified - if workers is None: - workers = min(os.cpu_count() or 4, 16) # Cap at 16 workers - self.logger.debug("Auto-detected %d workers for parallel indexing", workers) - - # Override incremental mode if force_full is True - use_incremental = self.incremental and not force_full - if force_full: - self.logger.info("Building index tree for %s (FULL reindex)", source_root) - else: - self.logger.info("Building index tree for %s (incremental=%s)", source_root, use_incremental) - - # Register project - index_root = self.mapper.source_to_index_dir(source_root) - project_info = self.registry.register_project(source_root, index_root) - global_index_db_path = index_root / GlobalSymbolIndex.DEFAULT_DB_NAME - - global_index: GlobalSymbolIndex | None = None - if self.config.global_symbol_index_enabled: - global_index = GlobalSymbolIndex(global_index_db_path, project_id=project_info.id) - global_index.initialize() - - # Report progress: discovering files (5%) - print("Discovering files...", flush=True) - - # Collect directories by depth - dirs_by_depth = self._collect_dirs_by_depth(source_root, languages) - - if force_full: - pruned_dirs = self._prune_stale_project_dirs( - project_id=project_info.id, - source_root=source_root, - dirs_by_depth=dirs_by_depth, - ) - if pruned_dirs: - self.logger.info( - "Pruned %d stale directory mappings before full rebuild", - len(pruned_dirs), - ) - - if not dirs_by_depth: - self.logger.warning("No indexable directories found in %s", source_root) - if global_index is not None: - global_index.close() - return BuildResult( - project_id=project_info.id, - source_root=source_root, - index_root=index_root, - total_files=0, - total_dirs=0, - errors=["No indexable directories found"], - ) - - # Calculate total directories for progress tracking - total_dirs_to_process = sum(len(dirs) for dirs in dirs_by_depth.values()) - processed_dirs = 0 - - # Report progress: building index (10%) - print("Building index...", flush=True) - - total_files = 0 - total_dirs = 0 - all_errors: List[str] = [] - all_results: List[DirBuildResult] = [] # Store all results for subdir linking - - # Build bottom-up (highest depth first) - max_depth = max(dirs_by_depth.keys()) - for depth in range(max_depth, -1, -1): - if depth not in dirs_by_depth: - continue - - dirs = dirs_by_depth[depth] - self.logger.info("Building %d directories at depth %d", len(dirs), depth) - - # Build directories at this level in parallel - results = self._build_level_parallel( - dirs, - languages, - workers, - source_root=source_root, - project_id=project_info.id, - global_index_db_path=global_index_db_path, - ) - all_results.extend(results) - - # Process results - for result in results: - if result.error: - all_errors.append(f"{result.source_path}: {result.error}") - processed_dirs += 1 - continue - - total_files += result.files_count - total_dirs += 1 - processed_dirs += 1 - - # Report progress for each processed directory (10-80%) - # Use "Processing file" format for frontend parser compatibility - progress_percent = 10 + int((processed_dirs / total_dirs_to_process) * 70) - print(f"Processing file {processed_dirs}/{total_dirs_to_process}: {result.source_path.name}", flush=True) - - # Register directory in registry - self.registry.register_dir( - project_id=project_info.id, - source_path=result.source_path, - index_path=result.index_path, - depth=self.mapper.get_relative_depth(result.source_path, source_root), - files_count=result.files_count, - ) - - # Report progress: linking subdirectories (80%) - print("Linking subdirectories...", flush=True) - - # After building all directories, link subdirectories to parents - # This needs to happen after all indexes exist - for result in all_results: - if result.error: - continue - # Link children to this directory - self._link_children_to_parent(result.source_path, all_results) - - # Cleanup deleted files if in incremental mode - if use_incremental: - # Report progress: cleaning up (90%) - print("Cleaning up deleted files...", flush=True) - self.logger.info("Cleaning up deleted files...") - total_deleted = 0 - for result in all_results: - if result.error: - continue - try: - with DirIndexStore(result.index_path, config=self.config, global_index=global_index) as store: - deleted_count = store.cleanup_deleted_files(result.source_path) - if deleted_count > 0: - _compute_graph_neighbors(store, logger=self.logger) - store.update_merkle_root() - total_deleted += deleted_count - if deleted_count > 0: - self.logger.debug("Removed %d deleted files from %s", deleted_count, result.source_path) - except Exception as exc: - self.logger.warning("Cleanup failed for %s: %s", result.source_path, exc) - - if total_deleted > 0: - self.logger.info("Removed %d deleted files from index", total_deleted) - - # Report progress: finalizing (95%) - print("Finalizing...", flush=True) - - # Update project statistics - self.registry.update_project_stats(source_root, total_files, total_dirs) - - # Report completion (100%) - print(f"Indexed {total_files} files", flush=True) - - self.logger.info( - "Index build complete: %d files, %d directories, %d errors", - total_files, - total_dirs, - len(all_errors), - ) - - if global_index is not None: - global_index.close() - - return BuildResult( - project_id=project_info.id, - source_root=source_root, - index_root=index_root, - total_files=total_files, - total_dirs=total_dirs, - errors=all_errors, - ) - - def update_subtree( - self, - source_path: Path, - languages: List[str] = None, - workers: int = None, - ) -> BuildResult: - """Incrementally update a subtree. - - Rebuilds indexes for the specified directory and all subdirectories. - Useful for incremental updates when only part of the tree changed. - - Args: - source_path: Root of subtree to update - languages: Optional list of language IDs to limit indexing - workers: Number of parallel worker processes - - Returns: - BuildResult for the subtree - - Raises: - ValueError: If source_path is not indexed - """ - source_path = source_path.resolve() - project_root = self.mapper.get_project_root(source_path) - - # Get project info - project_info = self.registry.get_project(project_root) - if not project_info: - raise ValueError(f"Directory not indexed: {source_path}") - - self.logger.info("Updating subtree at %s", source_path) - - # Use build logic but start from source_path - return self.build(source_path, languages, workers) - - def rebuild_dir(self, source_path: Path) -> DirBuildResult: - """Rebuild index for a single directory. - - Only rebuilds the specified directory, does not touch subdirectories. - Useful for updating a single directory after file changes. - - Args: - source_path: Directory to rebuild - - Returns: - DirBuildResult for the directory - """ - source_path = source_path.resolve() - self.logger.info("Rebuilding directory %s", source_path) - project_root = self.mapper.get_project_root(source_path) - project_info = self.registry.get_project(project_root) - if not project_info: - raise ValueError(f"Directory not indexed: {source_path}") - - global_index_db_path = project_info.index_root / GlobalSymbolIndex.DEFAULT_DB_NAME - return self._build_single_dir( - source_path, - languages=None, - source_root=project_root, - project_id=project_info.id, - global_index_db_path=global_index_db_path, - ) - - # === Internal Methods === - - def _prune_stale_project_dirs( - self, - *, - project_id: int, - source_root: Path, - dirs_by_depth: Dict[int, List[Path]], - ) -> List[Path]: - """Remove registry mappings for directories no longer included in the index tree.""" - source_root = source_root.resolve() - valid_dirs: Set[Path] = { - path.resolve() - for paths in dirs_by_depth.values() - for path in paths - } - valid_dirs.add(source_root) - - stale_mappings = [] - for mapping in self.registry.get_project_dirs(project_id): - mapping_path = mapping.source_path.resolve() - if mapping_path in valid_dirs: - continue - try: - mapping_path.relative_to(source_root) - except ValueError: - continue - stale_mappings.append(mapping) - - stale_mappings.sort( - key=lambda mapping: len(mapping.source_path.resolve().relative_to(source_root).parts), - reverse=True, - ) - - pruned_paths: List[Path] = [] - for mapping in stale_mappings: - try: - if self.registry.unregister_dir(mapping.source_path): - pruned_paths.append(mapping.source_path.resolve()) - except Exception as exc: - self.logger.warning( - "Failed to prune stale mapping for %s: %s", - mapping.source_path, - exc, - ) - - return pruned_paths - - def _collect_dirs_by_depth( - self, source_root: Path, languages: List[str] = None - ) -> Dict[int, List[Path]]: - """Collect all indexable directories grouped by depth. - - Walks the directory tree and groups directories by their depth - relative to source_root. Depth 0 is the root itself. - - Args: - source_root: Root directory to start from - languages: Optional language filter - - Returns: - Dictionary mapping depth to list of directory paths - Example: {0: [root], 1: [src, tests], 2: [src/api, src/utils]} - """ - source_root = source_root.resolve() - dirs_by_depth: Dict[int, List[Path]] = {} - - # Always include the root directory at depth 0 for chain search entry point - dirs_by_depth[0] = [source_root] - - for root, dirnames, _ in os.walk(source_root): - # Filter out ignored directories - root_path = Path(root) - dirnames[:] = [ - d - for d in dirnames - if not self._is_ignored_dir(root_path / d, source_root) - ] - - root_path = Path(root) - - # Skip root (already added) - if root_path == source_root: - continue - - # Check if this directory should be indexed - if not self._should_index_dir(root_path, languages, source_root=source_root): - continue - - # Calculate depth relative to source_root - try: - depth = len(root_path.relative_to(source_root).parts) - except ValueError: - continue - - if depth not in dirs_by_depth: - dirs_by_depth[depth] = [] - - dirs_by_depth[depth].append(root_path) - - return dirs_by_depth - - def _should_index_dir(self, dir_path: Path, languages: List[str] = None, source_root: Optional[Path] = None) -> bool: - """Check if directory should be indexed. - - A directory is indexed if: - 1. It's not in IGNORE_DIRS - 2. It doesn't start with '.' - 3. It contains at least one supported language file, OR - 4. It has subdirectories that contain supported files (transitive) - - Args: - dir_path: Directory to check - languages: Optional language filter - - Returns: - True if directory should be indexed - """ - # Check directory name - if self._is_ignored_dir(dir_path, source_root): - return False - - # Check for supported files in this directory - source_files = self._iter_source_files(dir_path, languages, source_root=source_root) - if len(source_files) > 0: - return True - - # Check if any subdirectory has indexable files (transitive) - # This handles cases like 'src' which has no direct files but has 'src/codexlens' - for item in dir_path.iterdir(): - if not item.is_dir(): - continue - if self._is_ignored_dir(item, source_root): - continue - # Recursively check subdirectories - if self._has_indexable_files_recursive(item, languages, source_root=source_root): - return True - - return False - - def _has_indexable_files_recursive(self, dir_path: Path, languages: List[str] = None, source_root: Optional[Path] = None) -> bool: - """Check if directory or any subdirectory has indexable files. - - Args: - dir_path: Directory to check - languages: Optional language filter - - Returns: - True if directory tree contains indexable files - """ - # Check for supported files in this directory - source_files = self._iter_source_files(dir_path, languages, source_root=source_root) - if len(source_files) > 0: - return True - - # Check subdirectories - try: - for item in dir_path.iterdir(): - if not item.is_dir(): - continue - if self._is_ignored_dir(item, source_root): - continue - if self._has_indexable_files_recursive(item, languages, source_root=source_root): - return True - except PermissionError: - pass - - return False - - def _build_level_parallel( - self, - dirs: List[Path], - languages: List[str], - workers: int, - *, - source_root: Path, - project_id: int, - global_index_db_path: Path, - ) -> List[DirBuildResult]: - """Build multiple directories in parallel. - - Uses ProcessPoolExecutor to build directories concurrently. - All directories at the same level are independent and can be - processed in parallel. - - Args: - dirs: List of directories to build - languages: Language filter - workers: Number of worker processes - - Returns: - List of DirBuildResult objects - """ - results: List[DirBuildResult] = [] - - if not dirs: - return results - - # For single directory, avoid overhead of process pool - if len(dirs) == 1: - result = self._build_single_dir( - dirs[0], - languages, - source_root=source_root, - project_id=project_id, - global_index_db_path=global_index_db_path, - ) - return [result] - - # Prepare arguments for worker processes - config_dict = { - "data_dir": str(self.config.data_dir), - "supported_languages": self.config.supported_languages, - "parsing_rules": self.config.parsing_rules, - "global_symbol_index_enabled": self.config.global_symbol_index_enabled, - "static_graph_enabled": self.config.static_graph_enabled, - "static_graph_relationship_types": self.config.static_graph_relationship_types, - "use_astgrep": getattr(self.config, "use_astgrep", False), - "ignore_patterns": list(self.ignore_patterns), - "extension_filters": list(self.extension_filters), - "incremental": bool(self.incremental), - } - - worker_args = [ - ( - dir_path, - self.mapper.source_to_index_db(dir_path), - languages, - config_dict, - int(project_id), - str(global_index_db_path), - str(source_root), - ) - for dir_path in dirs - ] - - # Execute in parallel - with ProcessPoolExecutor(max_workers=workers) as executor: - futures = { - executor.submit(_build_dir_worker, args): args[0] - for args in worker_args - } - - for future in as_completed(futures): - try: - result = future.result() - results.append(result) - except Exception as exc: - dir_path = futures[future] - self.logger.error("Failed to build %s: %s", dir_path, exc) - results.append( - DirBuildResult( - source_path=dir_path, - index_path=self.mapper.source_to_index_db(dir_path), - files_count=0, - symbols_count=0, - subdirs=[], - error=str(exc), - ) - ) - - return results - - def _build_single_dir( - self, - dir_path: Path, - languages: List[str] = None, - *, - source_root: Path, - project_id: int, - global_index_db_path: Path, - ) -> DirBuildResult: - """Build index for a single directory. - - Creates _index.db and indexes all files in the directory. - Does not recurse into subdirectories. - - Args: - dir_path: Directory to index - languages: Optional language filter - - Returns: - DirBuildResult with statistics and subdirectory list - """ - dir_path = dir_path.resolve() - index_db_path = self.mapper.source_to_index_db(dir_path) - - global_index: GlobalSymbolIndex | None = None - try: - # Ensure index directory exists - index_db_path.parent.mkdir(parents=True, exist_ok=True) - - if not self.incremental: - _reset_index_db_files(index_db_path) - - # Create directory index - if self.config.global_symbol_index_enabled: - global_index = GlobalSymbolIndex(global_index_db_path, project_id=project_id) - global_index.initialize() - - store = DirIndexStore(index_db_path, config=self.config, global_index=global_index) - store.initialize() - - # Get source files in this directory only - source_files = self._iter_source_files(dir_path, languages, source_root=source_root) - - files_count = 0 - symbols_count = 0 - skipped_count = 0 - - for file_path in source_files: - try: - # Check if file needs reindexing (incremental mode) - if self.incremental and not store.needs_reindex(file_path): - skipped_count += 1 - continue - - # Read and parse file - text = file_path.read_text(encoding="utf-8", errors="ignore") - language_id = self.config.language_for_path(file_path) - if not language_id: - continue - - parser = self.parser_factory.get_parser(language_id) - indexed_file = parser.parse(text, file_path) - - # Add to directory index - store.add_file( - name=file_path.name, - full_path=file_path, - content=text, - language=language_id, - symbols=indexed_file.symbols, - relationships=indexed_file.relationships, - ) - - # Write global relationships if enabled - if ( - self.config.static_graph_enabled - and global_index is not None - and indexed_file.relationships - ): - try: - filtered_rels = [ - r for r in indexed_file.relationships - if r.relationship_type.value in self.config.static_graph_relationship_types - ] - if filtered_rels: - global_index.update_file_relationships( - file_path, filtered_rels - ) - except Exception as rel_exc: - self.logger.warning( - "Failed to write global relationships for %s: %s", - file_path, rel_exc, - ) - - files_count += 1 - symbols_count += len(indexed_file.symbols) - - except Exception as exc: - self.logger.debug("Failed to index %s: %s", file_path, exc) - continue - - if files_count > 0: - _compute_graph_neighbors(store, logger=self.logger) - - # Get list of subdirectories - subdirs = [ - d.name - for d in dir_path.iterdir() - if d.is_dir() - and not self._is_ignored_dir(d, source_root=source_root) - ] - - store.update_merkle_root() - store.close() - if global_index is not None: - global_index.close() - - if skipped_count > 0: - self.logger.debug( - "Built %s: %d files indexed, %d skipped (unchanged), %d symbols, %d subdirs", - dir_path, - files_count, - skipped_count, - symbols_count, - len(subdirs), - ) - else: - self.logger.debug( - "Built %s: %d files, %d symbols, %d subdirs", - dir_path, - files_count, - symbols_count, - len(subdirs), - ) - - return DirBuildResult( - source_path=dir_path, - index_path=index_db_path, - files_count=files_count, - symbols_count=symbols_count, - subdirs=subdirs, - ) - - except Exception as exc: - self.logger.error("Failed to build directory %s: %s", dir_path, exc) - if global_index is not None: - try: - global_index.close() - except Exception: - pass - return DirBuildResult( - source_path=dir_path, - index_path=index_db_path, - files_count=0, - symbols_count=0, - subdirs=[], - error=str(exc), - ) - - def _link_children_to_parent( - self, parent_path: Path, all_results: List[DirBuildResult] - ) -> None: - """Link child directory indexes to parent's subdirs table. - - Finds all direct children of parent_path in all_results and - registers them as subdirectories in the parent's index. - - Args: - parent_path: Parent directory path - all_results: List of all build results - """ - parent_index_db = self.mapper.source_to_index_db(parent_path) - - try: - with DirIndexStore(parent_index_db, config=self.config) as store: - for result in all_results: - # Only register direct children (parent is one level up) - if result.source_path.parent != parent_path: - continue - - if result.error: - continue - - # Register subdirectory link - store.register_subdir( - name=result.source_path.name, - index_path=result.index_path, - files_count=result.files_count, - direct_files=result.files_count, - ) - self.logger.debug( - "Linked %s to parent %s", - result.source_path.name, - parent_path, - ) - - store.update_merkle_root() - - except Exception as exc: - self.logger.error( - "Failed to link children to %s: %s", parent_path, exc - ) - - def _iter_source_files( - self, dir_path: Path, languages: List[str] = None, source_root: Optional[Path] = None - ) -> List[Path]: - """Iterate source files in directory (non-recursive). - - Returns files in the specified directory that match language filters. - Does not recurse into subdirectories. - - Args: - dir_path: Directory to scan - languages: Optional language filter - - Returns: - List of source file paths - """ - files: List[Path] = [] - - if not dir_path.is_dir(): - return files - - for item in dir_path.iterdir(): - if not item.is_file(): - continue - - if item.name.startswith("."): - continue - - if self._is_filtered_file(item, source_root=source_root): - continue - - # Check language support - language_id = self.config.language_for_path(item) - if not language_id: - continue - - # Apply language filter - if languages and language_id not in languages: - continue - - files.append(item) - - return files - - -def _normalize_relationship_target(target: str) -> str: - """Best-effort normalization of a relationship target into a local symbol name.""" - target = (target or "").strip() - if not target: - return "" - - # Drop trailing call parentheses when present (e.g., "foo()" -> "foo"). - if target.endswith("()"): - target = target[:-2] - - # Keep the leaf identifier for common qualified formats. - for sep in ("::", ".", "#"): - if sep in target: - target = target.split(sep)[-1] - - # Strip non-identifier suffix/prefix noise. - target = re.sub(r"^[^A-Za-z0-9_]+", "", target) - target = re.sub(r"[^A-Za-z0-9_]+$", "", target) - return target - - -def _compute_graph_neighbors( - store: DirIndexStore, - *, - max_depth: int = 2, - logger: Optional[logging.Logger] = None, -) -> None: - """Compute and persist N-hop neighbors for all symbols in a directory index.""" - if max_depth <= 0: - return - - log = logger or logging.getLogger(__name__) - - with store._lock: - conn = store._get_connection() - conn.row_factory = sqlite3.Row - - # Ensure schema exists even for older databases pinned to the same user_version. - try: - from codexlens.storage.migrations.migration_007_add_graph_neighbors import upgrade - - upgrade(conn) - except Exception as exc: - log.debug("Graph neighbor schema ensure failed: %s", exc) - - cursor = conn.cursor() - - try: - cursor.execute("DELETE FROM graph_neighbors") - except sqlite3.Error: - # Table missing or schema mismatch; skip gracefully. - return - - try: - symbol_rows = cursor.execute( - "SELECT id, file_id, name FROM symbols" - ).fetchall() - rel_rows = cursor.execute( - "SELECT source_symbol_id, target_qualified_name FROM code_relationships" - ).fetchall() - except sqlite3.Error: - return - - if not symbol_rows or not rel_rows: - try: - conn.commit() - except sqlite3.Error: - pass - return - - symbol_file_by_id: Dict[int, int] = {} - symbols_by_file_and_name: Dict[Tuple[int, str], List[int]] = {} - symbols_by_name: Dict[str, List[int]] = {} - - for row in symbol_rows: - symbol_id = int(row["id"]) - file_id = int(row["file_id"]) - name = str(row["name"]) - symbol_file_by_id[symbol_id] = file_id - symbols_by_file_and_name.setdefault((file_id, name), []).append(symbol_id) - symbols_by_name.setdefault(name, []).append(symbol_id) - - adjacency: Dict[int, Set[int]] = {} - - for row in rel_rows: - source_id = int(row["source_symbol_id"]) - target_raw = str(row["target_qualified_name"] or "") - target_name = _normalize_relationship_target(target_raw) - if not target_name: - continue - - source_file_id = symbol_file_by_id.get(source_id) - if source_file_id is None: - continue - - candidate_ids = symbols_by_file_and_name.get((source_file_id, target_name)) - if not candidate_ids: - global_candidates = symbols_by_name.get(target_name, []) - # Only resolve cross-file by name when unambiguous. - candidate_ids = global_candidates if len(global_candidates) == 1 else [] - - for target_id in candidate_ids: - if target_id == source_id: - continue - adjacency.setdefault(source_id, set()).add(target_id) - adjacency.setdefault(target_id, set()).add(source_id) - - if not adjacency: - try: - conn.commit() - except sqlite3.Error: - pass - return - - insert_rows: List[Tuple[int, int, int]] = [] - max_depth = min(int(max_depth), 2) - - for source_id, first_hop in adjacency.items(): - if not first_hop: - continue - for neighbor_id in first_hop: - insert_rows.append((source_id, neighbor_id, 1)) - - if max_depth < 2: - continue - - second_hop: Set[int] = set() - for neighbor_id in first_hop: - second_hop.update(adjacency.get(neighbor_id, set())) - - second_hop.discard(source_id) - second_hop.difference_update(first_hop) - - for neighbor_id in second_hop: - insert_rows.append((source_id, neighbor_id, 2)) - - if not insert_rows: - try: - conn.commit() - except sqlite3.Error: - pass - return - - try: - cursor.executemany( - """ - INSERT INTO graph_neighbors( - source_symbol_id, neighbor_symbol_id, relationship_depth - ) - VALUES(?, ?, ?) - """, - insert_rows, - ) - conn.commit() - except sqlite3.Error: - return - - -# === Worker Function for ProcessPoolExecutor === - - -def _matches_path_patterns(path: Path, patterns: List[str], source_root: Optional[Path] = None) -> bool: - rel_path: Optional[str] = None - if source_root is not None: - try: - rel_path = path.relative_to(source_root).as_posix() - except ValueError: - rel_path = None - - for pattern in patterns: - normalized = str(pattern).strip().replace('\\', '/').rstrip('/') - if not normalized: - continue - if normalized == path.name or fnmatch.fnmatch(path.name, normalized): - return True - if rel_path and (normalized == rel_path or fnmatch.fnmatch(rel_path, normalized)): - return True - return False - - -def _matches_ignore_patterns(path: Path, patterns: List[str], source_root: Optional[Path] = None) -> bool: - if path.name.startswith('.'): - return True - return _matches_path_patterns(path, patterns, source_root) - - -def _matches_extension_filters(path: Path, patterns: List[str], source_root: Optional[Path] = None) -> bool: - if not patterns: - return False - return _matches_path_patterns(path, patterns, source_root) - - -def _reset_index_db_files(index_db_path: Path) -> None: - """Best-effort removal of a directory index DB and common SQLite sidecars.""" - for suffix in ("", "-wal", "-shm", "-journal"): - target = Path(f"{index_db_path}{suffix}") if suffix else index_db_path - try: - target.unlink() - except FileNotFoundError: - continue - except OSError: - continue - - -def _build_dir_worker(args: tuple) -> DirBuildResult: - """Worker function for parallel directory building. - - Must be at module level for ProcessPoolExecutor pickling. - Reconstructs necessary objects from serializable arguments. - - Args: - args: Tuple of (dir_path, index_db_path, languages, config_dict, project_id, global_index_db_path, source_root) - - Returns: - DirBuildResult for the directory - """ - dir_path, index_db_path, languages, config_dict, project_id, global_index_db_path, source_root = args - - # Reconstruct config - config = Config( - data_dir=Path(config_dict["data_dir"]), - supported_languages=config_dict["supported_languages"], - parsing_rules=config_dict["parsing_rules"], - global_symbol_index_enabled=bool(config_dict.get("global_symbol_index_enabled", True)), - static_graph_enabled=bool(config_dict.get("static_graph_enabled", False)), - static_graph_relationship_types=list(config_dict.get("static_graph_relationship_types", ["imports", "inherits"])), - use_astgrep=bool(config_dict.get("use_astgrep", False)), - ignore_patterns=list(config_dict.get("ignore_patterns", [])), - extension_filters=list(config_dict.get("extension_filters", [])), - ) - - parser_factory = ParserFactory(config) - source_root_path = Path(source_root) if source_root else None - - global_index: GlobalSymbolIndex | None = None - try: - # Ensure index directory exists - index_db_path.parent.mkdir(parents=True, exist_ok=True) - - # Create directory index - if config.global_symbol_index_enabled and global_index_db_path: - global_index = GlobalSymbolIndex(Path(global_index_db_path), project_id=int(project_id)) - global_index.initialize() - - if not bool(config_dict.get("incremental", True)): - _reset_index_db_files(index_db_path) - - store = DirIndexStore(index_db_path, config=config, global_index=global_index) - store.initialize() - - files_count = 0 - symbols_count = 0 - - # Index files in this directory - for item in dir_path.iterdir(): - if not item.is_file(): - continue - - if item.name.startswith("."): - continue - - if _matches_extension_filters(item, config.extension_filters, source_root_path): - continue - - language_id = config.language_for_path(item) - if not language_id: - continue - - if languages and language_id not in languages: - continue - - try: - text = item.read_text(encoding="utf-8", errors="ignore") - parser = parser_factory.get_parser(language_id) - indexed_file = parser.parse(text, item) - - store.add_file( - name=item.name, - full_path=item, - content=text, - language=language_id, - symbols=indexed_file.symbols, - relationships=indexed_file.relationships, - ) - - # Write global relationships if enabled - if ( - config.static_graph_enabled - and global_index is not None - and indexed_file.relationships - ): - try: - allowed_types = config.static_graph_relationship_types - filtered_rels = [ - r for r in indexed_file.relationships - if r.relationship_type.value in allowed_types - ] - if filtered_rels: - global_index.update_file_relationships( - item, filtered_rels - ) - except Exception: - pass # Don't block indexing - - files_count += 1 - symbols_count += len(indexed_file.symbols) - - except Exception: - continue - - if files_count > 0: - _compute_graph_neighbors(store) - - # Get subdirectories - ignore_patterns = list(config_dict.get("ignore_patterns", [])) or list(DEFAULT_IGNORE_DIRS) - subdirs = [ - d.name - for d in dir_path.iterdir() - if d.is_dir() and not _matches_ignore_patterns(d, ignore_patterns, source_root_path) - ] - - store.update_merkle_root() - store.close() - if global_index is not None: - global_index.close() - - return DirBuildResult( - source_path=dir_path, - index_path=index_db_path, - files_count=files_count, - symbols_count=symbols_count, - subdirs=subdirs, - ) - - except Exception as exc: - if global_index is not None: - try: - global_index.close() - except Exception: - pass - return DirBuildResult( - source_path=dir_path, - index_path=index_db_path, - files_count=0, - symbols_count=0, - subdirs=[], - error=str(exc), - ) diff --git a/codex-lens/src/codexlens/storage/merkle_tree.py b/codex-lens/src/codexlens/storage/merkle_tree.py deleted file mode 100644 index c8c76988..00000000 --- a/codex-lens/src/codexlens/storage/merkle_tree.py +++ /dev/null @@ -1,136 +0,0 @@ -"""Merkle tree utilities for change detection. - -This module provides a generic, file-system based Merkle tree implementation -that can be used to efficiently diff directory states. -""" - -from __future__ import annotations - -import hashlib -from dataclasses import dataclass, field -from pathlib import Path -from typing import Dict, Iterable, List, Optional - - -def sha256_bytes(data: bytes) -> str: - return hashlib.sha256(data).hexdigest() - - -def sha256_text(text: str) -> str: - return sha256_bytes(text.encode("utf-8", errors="ignore")) - - -@dataclass -class MerkleNode: - """A Merkle node representing either a file (leaf) or directory (internal).""" - - name: str - rel_path: str - hash: str - is_dir: bool - children: Dict[str, "MerkleNode"] = field(default_factory=dict) - - def iter_files(self) -> Iterable["MerkleNode"]: - if not self.is_dir: - yield self - return - for child in self.children.values(): - yield from child.iter_files() - - -@dataclass -class MerkleTree: - """Merkle tree for a directory snapshot.""" - - root: MerkleNode - - @classmethod - def build_from_directory(cls, root_dir: Path) -> "MerkleTree": - root_dir = Path(root_dir).resolve() - node = cls._build_node(root_dir, base=root_dir) - return cls(root=node) - - @classmethod - def _build_node(cls, path: Path, *, base: Path) -> MerkleNode: - if path.is_file(): - rel = str(path.relative_to(base)).replace("\\", "/") - return MerkleNode( - name=path.name, - rel_path=rel, - hash=sha256_bytes(path.read_bytes()), - is_dir=False, - ) - - if not path.is_dir(): - rel = str(path.relative_to(base)).replace("\\", "/") - return MerkleNode(name=path.name, rel_path=rel, hash="", is_dir=False) - - children: Dict[str, MerkleNode] = {} - for child in sorted(path.iterdir(), key=lambda p: p.name): - child_node = cls._build_node(child, base=base) - children[child_node.name] = child_node - - items = [ - f"{'d' if n.is_dir else 'f'}:{name}:{n.hash}" - for name, n in sorted(children.items(), key=lambda kv: kv[0]) - ] - dir_hash = sha256_text("\n".join(items)) - - rel_path = "." if path == base else str(path.relative_to(base)).replace("\\", "/") - return MerkleNode( - name="." if path == base else path.name, - rel_path=rel_path, - hash=dir_hash, - is_dir=True, - children=children, - ) - - @staticmethod - def find_changed_files(old: Optional["MerkleTree"], new: Optional["MerkleTree"]) -> List[str]: - """Find changed/added/removed files between two trees. - - Returns: - List of relative file paths (POSIX-style separators). - """ - if old is None and new is None: - return [] - if old is None: - return sorted({n.rel_path for n in new.root.iter_files()}) # type: ignore[union-attr] - if new is None: - return sorted({n.rel_path for n in old.root.iter_files()}) - - changed: set[str] = set() - - def walk(old_node: Optional[MerkleNode], new_node: Optional[MerkleNode]) -> None: - if old_node is None and new_node is None: - return - - if old_node is None and new_node is not None: - changed.update(n.rel_path for n in new_node.iter_files()) - return - - if new_node is None and old_node is not None: - changed.update(n.rel_path for n in old_node.iter_files()) - return - - assert old_node is not None and new_node is not None - - if old_node.hash == new_node.hash: - return - - if not old_node.is_dir and not new_node.is_dir: - changed.add(new_node.rel_path) - return - - if old_node.is_dir != new_node.is_dir: - changed.update(n.rel_path for n in old_node.iter_files()) - changed.update(n.rel_path for n in new_node.iter_files()) - return - - names = set(old_node.children.keys()) | set(new_node.children.keys()) - for name in names: - walk(old_node.children.get(name), new_node.children.get(name)) - - walk(old.root, new.root) - return sorted(changed) - diff --git a/codex-lens/src/codexlens/storage/migration_manager.py b/codex-lens/src/codexlens/storage/migration_manager.py deleted file mode 100644 index d8690806..00000000 --- a/codex-lens/src/codexlens/storage/migration_manager.py +++ /dev/null @@ -1,154 +0,0 @@ -""" -Manages database schema migrations. - -This module provides a framework for applying versioned migrations to the SQLite -database. Migrations are discovered from the `codexlens.storage.migrations` -package and applied sequentially. The database schema version is tracked using -the `user_version` pragma. -""" - -import importlib -import logging -import pkgutil -from pathlib import Path -from sqlite3 import Connection -from typing import List, NamedTuple - -log = logging.getLogger(__name__) - - -class Migration(NamedTuple): - """Represents a single database migration.""" - - version: int - name: str - upgrade: callable - - -def discover_migrations() -> List[Migration]: - """ - Discovers and returns a sorted list of database migrations. - - Migrations are expected to be in the `codexlens.storage.migrations` package, - with filenames in the format `migration_XXX_description.py`, where XXX is - the version number. Each migration module must contain an `upgrade` function - that takes a `sqlite3.Connection` object as its argument. - - Returns: - A list of Migration objects, sorted by version. - """ - import codexlens.storage.migrations - - migrations = [] - package_path = Path(codexlens.storage.migrations.__file__).parent - - for _, name, _ in pkgutil.iter_modules([str(package_path)]): - if name.startswith("migration_"): - try: - version = int(name.split("_")[1]) - module = importlib.import_module(f"codexlens.storage.migrations.{name}") - if hasattr(module, "upgrade"): - migrations.append( - Migration(version=version, name=name, upgrade=module.upgrade) - ) - else: - log.warning(f"Migration {name} is missing 'upgrade' function.") - except (ValueError, IndexError) as e: - log.warning(f"Could not parse migration name {name}: {e}") - except ImportError as e: - log.warning(f"Could not import migration {name}: {e}") - - migrations.sort(key=lambda m: m.version) - return migrations - - -class MigrationManager: - """ - Manages the application of migrations to a database. - """ - - def __init__(self, db_conn: Connection): - """ - Initializes the MigrationManager. - - Args: - db_conn: The SQLite database connection. - """ - self.db_conn = db_conn - self.migrations = discover_migrations() - - def get_current_version(self) -> int: - """ - Gets the current version of the database schema. - - Returns: - The current schema version number. - """ - return self.db_conn.execute("PRAGMA user_version").fetchone()[0] - - def set_version(self, version: int): - """ - Sets the database schema version. - - Args: - version: The version number to set. - """ - self.db_conn.execute(f"PRAGMA user_version = {version}") - log.info(f"Database schema version set to {version}") - - def apply_migrations(self): - """ - Applies all pending migrations to the database. - - This method checks the current database version and applies all - subsequent migrations in order. Each migration is applied within - a transaction, unless the migration manages its own transactions. - """ - current_version = self.get_current_version() - log.info(f"Current database schema version: {current_version}") - - for migration in self.migrations: - if migration.version > current_version: - log.info(f"Applying migration {migration.version}: {migration.name}...") - try: - # Check if a transaction is already in progress - in_transaction = self.db_conn.in_transaction - - # Only start transaction if not already in one - if not in_transaction: - self.db_conn.execute("BEGIN") - - migration.upgrade(self.db_conn) - self.set_version(migration.version) - - # Only commit if we started the transaction and it's still active - if not in_transaction and self.db_conn.in_transaction: - self.db_conn.execute("COMMIT") - - log.info( - f"Successfully applied migration {migration.version}: {migration.name}" - ) - except Exception as e: - log.error( - f"Failed to apply migration {migration.version}: {migration.name}. Error: {e}", - exc_info=True, - ) - # Try to rollback if transaction is active - try: - if self.db_conn.in_transaction: - self.db_conn.execute("ROLLBACK") - except Exception: - pass # Ignore rollback errors - raise - - latest_migration_version = self.migrations[-1].version if self.migrations else 0 - if current_version < latest_migration_version: - # This case can be hit if migrations were applied but the loop was exited - # and set_version was not called for the last one for some reason. - # To be safe, we explicitly set the version to the latest known migration. - final_version = self.get_current_version() - if final_version != latest_migration_version: - log.warning(f"Database version ({final_version}) is not the latest migration version ({latest_migration_version}). This may indicate a problem.") - - log.info("All pending migrations applied successfully.") - diff --git a/codex-lens/src/codexlens/storage/migrations/__init__.py b/codex-lens/src/codexlens/storage/migrations/__init__.py deleted file mode 100644 index 06e14729..00000000 --- a/codex-lens/src/codexlens/storage/migrations/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# This file makes the 'migrations' directory a Python package. diff --git a/codex-lens/src/codexlens/storage/migrations/migration_001_normalize_keywords.py b/codex-lens/src/codexlens/storage/migrations/migration_001_normalize_keywords.py deleted file mode 100644 index 97df06fd..00000000 --- a/codex-lens/src/codexlens/storage/migrations/migration_001_normalize_keywords.py +++ /dev/null @@ -1,123 +0,0 @@ -""" -Migration 001: Normalize keywords into separate tables. - -This migration introduces two new tables, `keywords` and `file_keywords`, to -store semantic keywords in a normalized fashion. It then migrates the existing -keywords from the `semantic_data` JSON blob in the `files` table into these -new tables. This is intended to speed up keyword-based searches significantly. -""" - -import json -import logging -from sqlite3 import Connection - -log = logging.getLogger(__name__) - - -def upgrade(db_conn: Connection): - """ - Applies the migration to normalize keywords. - - - Creates `keywords` and `file_keywords` tables. - - Creates indexes for efficient querying. - - Migrates data from `files.semantic_data` to the new tables. - - Args: - db_conn: The SQLite database connection. - """ - cursor = db_conn.cursor() - - log.info("Creating 'keywords' and 'file_keywords' tables...") - # Create a table to store unique keywords - cursor.execute( - """ - CREATE TABLE IF NOT EXISTS keywords ( - id INTEGER PRIMARY KEY, - keyword TEXT NOT NULL UNIQUE - ) - """ - ) - - # Create a join table to link files and keywords (many-to-many) - cursor.execute( - """ - CREATE TABLE IF NOT EXISTS file_keywords ( - file_id INTEGER NOT NULL, - keyword_id INTEGER NOT NULL, - PRIMARY KEY (file_id, keyword_id), - FOREIGN KEY (file_id) REFERENCES files (id) ON DELETE CASCADE, - FOREIGN KEY (keyword_id) REFERENCES keywords (id) ON DELETE CASCADE - ) - """ - ) - - log.info("Creating indexes for new keyword tables...") - cursor.execute("CREATE INDEX IF NOT EXISTS idx_keywords_keyword ON keywords (keyword)") - cursor.execute("CREATE INDEX IF NOT EXISTS idx_file_keywords_file_id ON file_keywords (file_id)") - cursor.execute("CREATE INDEX IF NOT EXISTS idx_file_keywords_keyword_id ON file_keywords (keyword_id)") - - log.info("Migrating existing keywords from 'semantic_metadata' table...") - - # Check if semantic_metadata table exists before querying - cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='semantic_metadata'") - if not cursor.fetchone(): - log.info("No 'semantic_metadata' table found, skipping data migration.") - return - - # Check if 'keywords' column exists in semantic_metadata table - # (current schema may already use normalized tables without this column) - cursor.execute("PRAGMA table_info(semantic_metadata)") - columns = {row[1] for row in cursor.fetchall()} - if "keywords" not in columns: - log.info("No 'keywords' column in semantic_metadata table, skipping data migration.") - return - - cursor.execute("SELECT file_id, keywords FROM semantic_metadata WHERE keywords IS NOT NULL AND keywords != ''") - - files_to_migrate = cursor.fetchall() - if not files_to_migrate: - log.info("No existing files with semantic metadata to migrate.") - return - - log.info(f"Found {len(files_to_migrate)} files with semantic metadata to migrate.") - - for file_id, keywords_json in files_to_migrate: - if not keywords_json: - continue - try: - keywords = json.loads(keywords_json) - - if not isinstance(keywords, list): - log.warning(f"Keywords for file_id {file_id} is not a list, skipping.") - continue - - for keyword in keywords: - if not isinstance(keyword, str): - log.warning(f"Non-string keyword '{keyword}' found for file_id {file_id}, skipping.") - continue - - keyword = keyword.strip() - if not keyword: - continue - - # Get or create keyword_id - cursor.execute("INSERT OR IGNORE INTO keywords (keyword) VALUES (?)", (keyword,)) - cursor.execute("SELECT id FROM keywords WHERE keyword = ?", (keyword,)) - keyword_id_result = cursor.fetchone() - - if keyword_id_result: - keyword_id = keyword_id_result[0] - # Link file to keyword - cursor.execute( - "INSERT OR IGNORE INTO file_keywords (file_id, keyword_id) VALUES (?, ?)", - (file_id, keyword_id), - ) - else: - log.error(f"Failed to retrieve or create keyword_id for keyword: {keyword}") - - except json.JSONDecodeError as e: - log.warning(f"Could not parse keywords for file_id {file_id}: {e}") - except Exception as e: - log.error(f"An unexpected error occurred during migration for file_id {file_id}: {e}", exc_info=True) - - log.info("Finished migrating keywords.") diff --git a/codex-lens/src/codexlens/storage/migrations/migration_002_add_token_metadata.py b/codex-lens/src/codexlens/storage/migrations/migration_002_add_token_metadata.py deleted file mode 100644 index daa3085e..00000000 --- a/codex-lens/src/codexlens/storage/migrations/migration_002_add_token_metadata.py +++ /dev/null @@ -1,48 +0,0 @@ -""" -Migration 002: Add token_count and symbol_type to symbols table. - -This migration adds token counting metadata to symbols for accurate chunk -splitting and performance optimization. It also adds symbol_type for better -filtering in searches. -""" - -import logging -from sqlite3 import Connection - -log = logging.getLogger(__name__) - - -def upgrade(db_conn: Connection): - """ - Applies the migration to add token metadata to symbols. - - - Adds token_count column to symbols table - - Adds symbol_type column to symbols table (for future use) - - Creates index on symbol_type for efficient filtering - - Backfills existing symbols with NULL token_count (to be calculated lazily) - - Args: - db_conn: The SQLite database connection. - """ - cursor = db_conn.cursor() - - log.info("Adding token_count column to symbols table...") - try: - cursor.execute("ALTER TABLE symbols ADD COLUMN token_count INTEGER") - log.info("Successfully added token_count column.") - except Exception as e: - # Column might already exist - log.warning(f"Could not add token_count column (might already exist): {e}") - - log.info("Adding symbol_type column to symbols table...") - try: - cursor.execute("ALTER TABLE symbols ADD COLUMN symbol_type TEXT") - log.info("Successfully added symbol_type column.") - except Exception as e: - # Column might already exist - log.warning(f"Could not add symbol_type column (might already exist): {e}") - - log.info("Creating index on symbol_type for efficient filtering...") - cursor.execute("CREATE INDEX IF NOT EXISTS idx_symbols_type ON symbols(symbol_type)") - - log.info("Migration 002 completed successfully.") diff --git a/codex-lens/src/codexlens/storage/migrations/migration_004_dual_fts.py b/codex-lens/src/codexlens/storage/migrations/migration_004_dual_fts.py deleted file mode 100644 index 502e067d..00000000 --- a/codex-lens/src/codexlens/storage/migrations/migration_004_dual_fts.py +++ /dev/null @@ -1,232 +0,0 @@ -""" -Migration 004: Add dual FTS tables for exact and fuzzy matching. - -This migration introduces two FTS5 tables: -- files_fts_exact: Uses unicode61 tokenizer for exact token matching -- files_fts_fuzzy: Uses trigram tokenizer (or extended unicode61) for substring/fuzzy matching - -Both tables are synchronized with the files table via triggers for automatic updates. -""" - -import logging -from sqlite3 import Connection - -from codexlens.storage.sqlite_utils import check_trigram_support, get_sqlite_version - -log = logging.getLogger(__name__) - - -def upgrade(db_conn: Connection): - """ - Applies the migration to add dual FTS tables. - - - Drops old files_fts table and triggers - - Creates files_fts_exact with unicode61 tokenizer - - Creates files_fts_fuzzy with trigram or extended unicode61 tokenizer - - Creates synchronized triggers for both tables - - Rebuilds FTS indexes from files table - - Args: - db_conn: The SQLite database connection. - """ - cursor = db_conn.cursor() - - try: - # Check trigram support - has_trigram = check_trigram_support(db_conn) - version = get_sqlite_version(db_conn) - log.info(f"SQLite version: {'.'.join(map(str, version))}") - - if has_trigram: - log.info("Trigram tokenizer available, using for fuzzy FTS table") - fuzzy_tokenizer = "trigram" - else: - log.warning( - f"Trigram tokenizer not available (requires SQLite >= 3.34), " - f"using extended unicode61 tokenizer for fuzzy matching" - ) - fuzzy_tokenizer = "unicode61 tokenchars '_-.'" - - # Start transaction - cursor.execute("BEGIN TRANSACTION") - - # Check if files table has 'name' column (v2 schema doesn't have it) - cursor.execute("PRAGMA table_info(files)") - columns = {row[1] for row in cursor.fetchall()} - - if 'name' not in columns: - log.info("Adding 'name' column to files table (v2 schema upgrade)...") - # Add name column - cursor.execute("ALTER TABLE files ADD COLUMN name TEXT") - # Populate name from path (extract filename from last '/') - # Use Python to do the extraction since SQLite doesn't have reverse() - cursor.execute("SELECT rowid, path FROM files") - rows = cursor.fetchall() - for rowid, path in rows: - # Extract filename from path - name = path.split('/')[-1] if '/' in path else path - cursor.execute("UPDATE files SET name = ? WHERE rowid = ?", (name, rowid)) - - # Rename 'path' column to 'full_path' if needed - if 'path' in columns and 'full_path' not in columns: - log.info("Renaming 'path' to 'full_path' (v2 schema upgrade)...") - # Check if indexed_at column exists in v2 schema - has_indexed_at = 'indexed_at' in columns - has_mtime = 'mtime' in columns - - # SQLite doesn't support RENAME COLUMN before 3.25, so use table recreation - cursor.execute(""" - CREATE TABLE files_new ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - name TEXT NOT NULL, - full_path TEXT NOT NULL UNIQUE, - content TEXT, - language TEXT, - mtime REAL, - indexed_at TEXT - ) - """) - - # Build INSERT statement based on available columns - # Note: v2 schema has no rowid (path is PRIMARY KEY), so use NULL for AUTOINCREMENT - if has_indexed_at and has_mtime: - cursor.execute(""" - INSERT INTO files_new (name, full_path, content, language, mtime, indexed_at) - SELECT name, path, content, language, mtime, indexed_at FROM files - """) - elif has_indexed_at: - cursor.execute(""" - INSERT INTO files_new (name, full_path, content, language, indexed_at) - SELECT name, path, content, language, indexed_at FROM files - """) - elif has_mtime: - cursor.execute(""" - INSERT INTO files_new (name, full_path, content, language, mtime) - SELECT name, path, content, language, mtime FROM files - """) - else: - cursor.execute(""" - INSERT INTO files_new (name, full_path, content, language) - SELECT name, path, content, language FROM files - """) - - cursor.execute("DROP TABLE files") - cursor.execute("ALTER TABLE files_new RENAME TO files") - - log.info("Dropping old FTS triggers and table...") - # Drop old triggers - cursor.execute("DROP TRIGGER IF EXISTS files_ai") - cursor.execute("DROP TRIGGER IF EXISTS files_ad") - cursor.execute("DROP TRIGGER IF EXISTS files_au") - - # Drop old FTS table - cursor.execute("DROP TABLE IF EXISTS files_fts") - - # Create exact FTS table (unicode61 with underscores/hyphens/dots as token chars) - # Note: tokenchars includes '.' to properly tokenize qualified names like PortRole.FLOW - log.info("Creating files_fts_exact table with unicode61 tokenizer...") - cursor.execute( - """ - CREATE VIRTUAL TABLE files_fts_exact USING fts5( - name, full_path UNINDEXED, content, - content='files', - content_rowid='id', - tokenize="unicode61 tokenchars '_-.'" - ) - """ - ) - - # Create fuzzy FTS table (trigram or extended unicode61) - log.info(f"Creating files_fts_fuzzy table with {fuzzy_tokenizer} tokenizer...") - cursor.execute( - f""" - CREATE VIRTUAL TABLE files_fts_fuzzy USING fts5( - name, full_path UNINDEXED, content, - content='files', - content_rowid='id', - tokenize="{fuzzy_tokenizer}" - ) - """ - ) - - # Create synchronized triggers for files_fts_exact - log.info("Creating triggers for files_fts_exact...") - cursor.execute( - """ - CREATE TRIGGER files_exact_ai AFTER INSERT ON files BEGIN - INSERT INTO files_fts_exact(rowid, name, full_path, content) - VALUES(new.id, new.name, new.full_path, new.content); - END - """ - ) - cursor.execute( - """ - CREATE TRIGGER files_exact_ad AFTER DELETE ON files BEGIN - INSERT INTO files_fts_exact(files_fts_exact, rowid, name, full_path, content) - VALUES('delete', old.id, old.name, old.full_path, old.content); - END - """ - ) - cursor.execute( - """ - CREATE TRIGGER files_exact_au AFTER UPDATE ON files BEGIN - INSERT INTO files_fts_exact(files_fts_exact, rowid, name, full_path, content) - VALUES('delete', old.id, old.name, old.full_path, old.content); - INSERT INTO files_fts_exact(rowid, name, full_path, content) - VALUES(new.id, new.name, new.full_path, new.content); - END - """ - ) - - # Create synchronized triggers for files_fts_fuzzy - log.info("Creating triggers for files_fts_fuzzy...") - cursor.execute( - """ - CREATE TRIGGER files_fuzzy_ai AFTER INSERT ON files BEGIN - INSERT INTO files_fts_fuzzy(rowid, name, full_path, content) - VALUES(new.id, new.name, new.full_path, new.content); - END - """ - ) - cursor.execute( - """ - CREATE TRIGGER files_fuzzy_ad AFTER DELETE ON files BEGIN - INSERT INTO files_fts_fuzzy(files_fts_fuzzy, rowid, name, full_path, content) - VALUES('delete', old.id, old.name, old.full_path, old.content); - END - """ - ) - cursor.execute( - """ - CREATE TRIGGER files_fuzzy_au AFTER UPDATE ON files BEGIN - INSERT INTO files_fts_fuzzy(files_fts_fuzzy, rowid, name, full_path, content) - VALUES('delete', old.id, old.name, old.full_path, old.content); - INSERT INTO files_fts_fuzzy(rowid, name, full_path, content) - VALUES(new.id, new.name, new.full_path, new.content); - END - """ - ) - - # Rebuild FTS indexes from files table - log.info("Rebuilding FTS indexes from files table...") - cursor.execute("INSERT INTO files_fts_exact(files_fts_exact) VALUES('rebuild')") - cursor.execute("INSERT INTO files_fts_fuzzy(files_fts_fuzzy) VALUES('rebuild')") - - # Commit transaction - cursor.execute("COMMIT") - log.info("Migration 004 completed successfully") - - # Vacuum to reclaim space (outside transaction) - try: - log.info("Running VACUUM to reclaim space...") - cursor.execute("VACUUM") - except Exception as e: - log.warning(f"VACUUM failed (non-critical): {e}") - - except Exception as e: - log.error(f"Migration 004 failed: {e}") - try: - cursor.execute("ROLLBACK") - except Exception: - pass - raise diff --git a/codex-lens/src/codexlens/storage/migrations/migration_005_cleanup_unused_fields.py b/codex-lens/src/codexlens/storage/migrations/migration_005_cleanup_unused_fields.py deleted file mode 100644 index 918bf17a..00000000 --- a/codex-lens/src/codexlens/storage/migrations/migration_005_cleanup_unused_fields.py +++ /dev/null @@ -1,196 +0,0 @@ -""" -Migration 005: Remove unused and redundant database fields. - -This migration removes four problematic fields identified by Gemini analysis: - -1. **semantic_metadata.keywords** (deprecated - replaced by file_keywords table) - - Data: Migrated to normalized file_keywords table in migration 001 - - Impact: Column now redundant, remove to prevent sync issues - -2. **symbols.token_count** (unused - always NULL) - - Data: Never populated, always NULL - - Impact: No data loss, just removes unused column - -3. **symbols.symbol_type** (redundant - duplicates kind) - - Data: Redundant with symbols.kind field - - Impact: No data loss, kind field contains same information - -4. **subdirs.direct_files** (unused - never displayed) - - Data: Never used in queries or display logic - - Impact: No data loss, just removes unused column - -Schema changes use table recreation pattern (SQLite best practice): -- Create new table without deprecated columns -- Copy data from old table -- Drop old table -- Rename new table -- Recreate indexes -""" - -import logging -from sqlite3 import Connection - -log = logging.getLogger(__name__) - - -def upgrade(db_conn: Connection): - """Remove unused and redundant fields from schema. - - Note: Transaction management is handled by MigrationManager. - This migration should NOT start its own transaction. - - Args: - db_conn: The SQLite database connection. - """ - cursor = db_conn.cursor() - - # Step 1: Remove semantic_metadata.keywords (if column exists) - log.info("Checking semantic_metadata.keywords column...") - - cursor.execute( - "SELECT name FROM sqlite_master WHERE type='table' AND name='semantic_metadata'" - ) - if cursor.fetchone(): - # Check if keywords column exists - cursor.execute("PRAGMA table_info(semantic_metadata)") - columns = {row[1] for row in cursor.fetchall()} - - if "keywords" in columns: - log.info("Removing semantic_metadata.keywords column...") - cursor.execute(""" - CREATE TABLE semantic_metadata_new ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - file_id INTEGER NOT NULL UNIQUE, - summary TEXT, - purpose TEXT, - llm_tool TEXT, - generated_at REAL, - FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE - ) - """) - - cursor.execute(""" - INSERT INTO semantic_metadata_new (id, file_id, summary, purpose, llm_tool, generated_at) - SELECT id, file_id, summary, purpose, llm_tool, generated_at - FROM semantic_metadata - """) - - cursor.execute("DROP TABLE semantic_metadata") - cursor.execute("ALTER TABLE semantic_metadata_new RENAME TO semantic_metadata") - - # Recreate index - cursor.execute( - "CREATE INDEX IF NOT EXISTS idx_semantic_file ON semantic_metadata(file_id)" - ) - log.info("Removed semantic_metadata.keywords column") - else: - log.info("semantic_metadata.keywords column does not exist, skipping") - else: - log.info("semantic_metadata table does not exist, skipping") - - # Step 2: Remove symbols.token_count and symbols.symbol_type (if columns exist) - log.info("Checking symbols.token_count and symbols.symbol_type columns...") - - cursor.execute( - "SELECT name FROM sqlite_master WHERE type='table' AND name='symbols'" - ) - if cursor.fetchone(): - # Check if token_count or symbol_type columns exist - cursor.execute("PRAGMA table_info(symbols)") - columns = {row[1] for row in cursor.fetchall()} - - if "token_count" in columns or "symbol_type" in columns: - log.info("Removing symbols.token_count and symbols.symbol_type columns...") - cursor.execute(""" - CREATE TABLE symbols_new ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - file_id INTEGER NOT NULL, - name TEXT NOT NULL, - kind TEXT, - start_line INTEGER, - end_line INTEGER, - FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE - ) - """) - - cursor.execute(""" - INSERT INTO symbols_new (id, file_id, name, kind, start_line, end_line) - SELECT id, file_id, name, kind, start_line, end_line - FROM symbols - """) - - cursor.execute("DROP TABLE symbols") - cursor.execute("ALTER TABLE symbols_new RENAME TO symbols") - - # Recreate indexes - cursor.execute("CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file_id)") - cursor.execute("CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name)") - log.info("Removed symbols.token_count and symbols.symbol_type columns") - else: - log.info("symbols.token_count/symbol_type columns do not exist, skipping") - else: - log.info("symbols table does not exist, skipping") - - # Step 3: Remove subdirs.direct_files (if column exists) - log.info("Checking subdirs.direct_files column...") - - cursor.execute( - "SELECT name FROM sqlite_master WHERE type='table' AND name='subdirs'" - ) - if cursor.fetchone(): - # Check if direct_files column exists - cursor.execute("PRAGMA table_info(subdirs)") - columns = {row[1] for row in cursor.fetchall()} - - if "direct_files" in columns: - log.info("Removing subdirs.direct_files column...") - cursor.execute(""" - CREATE TABLE subdirs_new ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - name TEXT NOT NULL UNIQUE, - index_path TEXT NOT NULL, - files_count INTEGER DEFAULT 0, - last_updated REAL - ) - """) - - cursor.execute(""" - INSERT INTO subdirs_new (id, name, index_path, files_count, last_updated) - SELECT id, name, index_path, files_count, last_updated - FROM subdirs - """) - - cursor.execute("DROP TABLE subdirs") - cursor.execute("ALTER TABLE subdirs_new RENAME TO subdirs") - - # Recreate index - cursor.execute("CREATE INDEX IF NOT EXISTS idx_subdirs_name ON subdirs(name)") - log.info("Removed subdirs.direct_files column") - else: - log.info("subdirs.direct_files column does not exist, skipping") - else: - log.info("subdirs table does not exist, skipping") - - log.info("Migration 005 completed successfully") - - # Vacuum to reclaim space (outside transaction, optional) - # Note: VACUUM cannot run inside a transaction, so we skip it here - # The caller can run VACUUM separately if desired - - -def downgrade(db_conn: Connection): - """Restore removed fields (data will be lost for keywords, token_count, symbol_type, direct_files). - - This is a placeholder - true downgrade is not feasible as data is lost. - The migration is designed to be one-way since removed fields are unused/redundant. - - Args: - db_conn: The SQLite database connection. - """ - log.warning( - "Migration 005 downgrade not supported - removed fields are unused/redundant. " - "Data cannot be restored." - ) - raise NotImplementedError( - "Migration 005 downgrade not supported - this is a one-way migration" - ) diff --git a/codex-lens/src/codexlens/storage/migrations/migration_006_enhance_relationships.py b/codex-lens/src/codexlens/storage/migrations/migration_006_enhance_relationships.py deleted file mode 100644 index 2c7c6cd8..00000000 --- a/codex-lens/src/codexlens/storage/migrations/migration_006_enhance_relationships.py +++ /dev/null @@ -1,37 +0,0 @@ -""" -Migration 006: Ensure relationship tables and indexes exist. - -This migration is intentionally idempotent. It creates the `code_relationships` -table (used for graph visualization) and its indexes if missing. -""" - -from __future__ import annotations - -import logging -from sqlite3 import Connection - -log = logging.getLogger(__name__) - - -def upgrade(db_conn: Connection) -> None: - cursor = db_conn.cursor() - - log.info("Ensuring code_relationships table exists...") - cursor.execute( - """ - CREATE TABLE IF NOT EXISTS code_relationships ( - id INTEGER PRIMARY KEY, - source_symbol_id INTEGER NOT NULL REFERENCES symbols (id) ON DELETE CASCADE, - target_qualified_name TEXT NOT NULL, - relationship_type TEXT NOT NULL, - source_line INTEGER NOT NULL, - target_file TEXT - ) - """ - ) - - log.info("Ensuring relationship indexes exist...") - cursor.execute("CREATE INDEX IF NOT EXISTS idx_rel_source ON code_relationships(source_symbol_id)") - cursor.execute("CREATE INDEX IF NOT EXISTS idx_rel_target ON code_relationships(target_qualified_name)") - cursor.execute("CREATE INDEX IF NOT EXISTS idx_rel_type ON code_relationships(relationship_type)") - diff --git a/codex-lens/src/codexlens/storage/migrations/migration_007_add_graph_neighbors.py b/codex-lens/src/codexlens/storage/migrations/migration_007_add_graph_neighbors.py deleted file mode 100644 index 83306886..00000000 --- a/codex-lens/src/codexlens/storage/migrations/migration_007_add_graph_neighbors.py +++ /dev/null @@ -1,47 +0,0 @@ -""" -Migration 007: Add precomputed graph neighbor table for search expansion. - -Adds: -- graph_neighbors: cached N-hop neighbors between symbols (keyed by symbol ids) - -This table is derived data (a cache) and is safe to rebuild at any time. -The migration is intentionally idempotent. -""" - -from __future__ import annotations - -import logging -from sqlite3 import Connection - -log = logging.getLogger(__name__) - - -def upgrade(db_conn: Connection) -> None: - cursor = db_conn.cursor() - - log.info("Creating graph_neighbors table...") - cursor.execute( - """ - CREATE TABLE IF NOT EXISTS graph_neighbors ( - source_symbol_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE, - neighbor_symbol_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE, - relationship_depth INTEGER NOT NULL, - PRIMARY KEY (source_symbol_id, neighbor_symbol_id) - ) - """ - ) - - log.info("Creating indexes for graph_neighbors...") - cursor.execute( - """ - CREATE INDEX IF NOT EXISTS idx_graph_neighbors_source_depth - ON graph_neighbors(source_symbol_id, relationship_depth) - """ - ) - cursor.execute( - """ - CREATE INDEX IF NOT EXISTS idx_graph_neighbors_neighbor - ON graph_neighbors(neighbor_symbol_id) - """ - ) - diff --git a/codex-lens/src/codexlens/storage/migrations/migration_008_add_merkle_hashes.py b/codex-lens/src/codexlens/storage/migrations/migration_008_add_merkle_hashes.py deleted file mode 100644 index 092fc20a..00000000 --- a/codex-lens/src/codexlens/storage/migrations/migration_008_add_merkle_hashes.py +++ /dev/null @@ -1,81 +0,0 @@ -""" -Migration 008: Add Merkle hash tables for content-based incremental indexing. - -Adds: -- merkle_hashes: per-file SHA-256 hashes (keyed by file_id) -- merkle_state: directory-level root hash (single row, id=1) - -Backfills merkle_hashes using the existing `files.content` column when available. -""" - -from __future__ import annotations - -import hashlib -import logging -import time -from sqlite3 import Connection - -log = logging.getLogger(__name__) - - -def upgrade(db_conn: Connection) -> None: - cursor = db_conn.cursor() - - log.info("Creating merkle_hashes table...") - cursor.execute( - """ - CREATE TABLE IF NOT EXISTS merkle_hashes ( - file_id INTEGER PRIMARY KEY REFERENCES files(id) ON DELETE CASCADE, - sha256 TEXT NOT NULL, - updated_at REAL - ) - """ - ) - - log.info("Creating merkle_state table...") - cursor.execute( - """ - CREATE TABLE IF NOT EXISTS merkle_state ( - id INTEGER PRIMARY KEY CHECK (id = 1), - root_hash TEXT, - updated_at REAL - ) - """ - ) - - # Backfill file hashes from stored content (best-effort). - try: - rows = cursor.execute("SELECT id, content FROM files").fetchall() - except Exception as exc: - log.warning("Unable to backfill merkle hashes (files table missing?): %s", exc) - return - - now = time.time() - inserts: list[tuple[int, str, float]] = [] - - for row in rows: - file_id = int(row[0]) - content = row[1] - if content is None: - continue - try: - digest = hashlib.sha256(str(content).encode("utf-8", errors="ignore")).hexdigest() - inserts.append((file_id, digest, now)) - except Exception: - continue - - if not inserts: - return - - log.info("Backfilling %d file hashes...", len(inserts)) - cursor.executemany( - """ - INSERT INTO merkle_hashes(file_id, sha256, updated_at) - VALUES(?, ?, ?) - ON CONFLICT(file_id) DO UPDATE SET - sha256=excluded.sha256, - updated_at=excluded.updated_at - """, - inserts, - ) - diff --git a/codex-lens/src/codexlens/storage/migrations/migration_010_add_multi_vector_chunks.py b/codex-lens/src/codexlens/storage/migrations/migration_010_add_multi_vector_chunks.py deleted file mode 100644 index 9a937200..00000000 --- a/codex-lens/src/codexlens/storage/migrations/migration_010_add_multi_vector_chunks.py +++ /dev/null @@ -1,162 +0,0 @@ -""" -Migration 010: Add multi-vector storage support for cascade retrieval. - -This migration introduces the chunks table with multi-vector support: -- chunks: Stores code chunks with multiple embedding types - - embedding: Original embedding for backward compatibility - - embedding_binary: 256-dim binary vector for coarse ranking (fast) - - embedding_dense: 2048-dim dense vector for fine ranking (precise) - -The multi-vector architecture enables cascade retrieval: -1. First stage: Fast binary vector search for candidate retrieval -2. Second stage: Dense vector reranking for precision -""" - -import logging -from sqlite3 import Connection - -log = logging.getLogger(__name__) - - -def upgrade(db_conn: Connection) -> None: - """ - Adds chunks table with multi-vector embedding columns. - - Creates: - - chunks: Table for storing code chunks with multiple embedding types - - idx_chunks_file_path: Index for efficient file-based lookups - - Also migrates existing chunks tables by adding new columns if needed. - - Args: - db_conn: The SQLite database connection. - """ - cursor = db_conn.cursor() - - # Check if chunks table already exists - table_exists = cursor.execute( - "SELECT name FROM sqlite_master WHERE type='table' AND name='chunks'" - ).fetchone() - - if table_exists: - # Migrate existing table - add new columns if missing - log.info("chunks table exists, checking for missing columns...") - - col_info = cursor.execute("PRAGMA table_info(chunks)").fetchall() - existing_columns = {row[1] for row in col_info} - - if "embedding_binary" not in existing_columns: - log.info("Adding embedding_binary column to chunks table...") - cursor.execute( - "ALTER TABLE chunks ADD COLUMN embedding_binary BLOB" - ) - - if "embedding_dense" not in existing_columns: - log.info("Adding embedding_dense column to chunks table...") - cursor.execute( - "ALTER TABLE chunks ADD COLUMN embedding_dense BLOB" - ) - else: - # Create new table with all columns - log.info("Creating chunks table with multi-vector support...") - cursor.execute( - """ - CREATE TABLE chunks ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - file_path TEXT NOT NULL, - content TEXT NOT NULL, - embedding BLOB, - embedding_binary BLOB, - embedding_dense BLOB, - metadata TEXT, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - ) - """ - ) - - # Create index for file-based lookups - log.info("Creating index for chunks table...") - cursor.execute( - """ - CREATE INDEX IF NOT EXISTS idx_chunks_file_path - ON chunks(file_path) - """ - ) - - log.info("Migration 010 completed successfully") - - -def downgrade(db_conn: Connection) -> None: - """ - Removes multi-vector columns from chunks table. - - Note: This does not drop the chunks table entirely to preserve data. - Only the new columns added by this migration are removed. - - Args: - db_conn: The SQLite database connection. - """ - cursor = db_conn.cursor() - - log.info("Removing multi-vector columns from chunks table...") - - # SQLite doesn't support DROP COLUMN directly in older versions - # We need to recreate the table without the columns - - # Check if chunks table exists - table_exists = cursor.execute( - "SELECT name FROM sqlite_master WHERE type='table' AND name='chunks'" - ).fetchone() - - if not table_exists: - log.info("chunks table does not exist, nothing to downgrade") - return - - # Check if the columns exist before trying to remove them - col_info = cursor.execute("PRAGMA table_info(chunks)").fetchall() - existing_columns = {row[1] for row in col_info} - - needs_migration = ( - "embedding_binary" in existing_columns or - "embedding_dense" in existing_columns - ) - - if not needs_migration: - log.info("Multi-vector columns not present, nothing to remove") - return - - # Recreate table without the new columns - log.info("Recreating chunks table without multi-vector columns...") - - cursor.execute( - """ - CREATE TABLE chunks_backup ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - file_path TEXT NOT NULL, - content TEXT NOT NULL, - embedding BLOB, - metadata TEXT, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - ) - """ - ) - - cursor.execute( - """ - INSERT INTO chunks_backup (id, file_path, content, embedding, metadata, created_at) - SELECT id, file_path, content, embedding, metadata, created_at FROM chunks - """ - ) - - cursor.execute("DROP TABLE chunks") - cursor.execute("ALTER TABLE chunks_backup RENAME TO chunks") - - # Recreate index - cursor.execute( - """ - CREATE INDEX IF NOT EXISTS idx_chunks_file_path - ON chunks(file_path) - """ - ) - - log.info("Migration 010 downgrade completed successfully") diff --git a/codex-lens/src/codexlens/storage/path_mapper.py b/codex-lens/src/codexlens/storage/path_mapper.py deleted file mode 100644 index 2c238355..00000000 --- a/codex-lens/src/codexlens/storage/path_mapper.py +++ /dev/null @@ -1,300 +0,0 @@ -"""Path mapping utilities for source paths and index paths. - -This module provides bidirectional mapping between source code directories -and their corresponding index storage locations. - -Storage Structure: - ~/.codexlens/ - ├── registry.db # Global mapping table - └── indexes/ - └── D/ - └── Claude_dms3/ - ├── _index.db # Root directory index - └── src/ - └── _index.db # src/ directory index -""" - -import json -import os -import platform -from pathlib import Path -from typing import Optional - - -def _get_configured_index_root() -> Path: - """Get the index root from environment or config file. - - Priority order: - 1. CODEXLENS_INDEX_DIR environment variable - 2. index_dir from ~/.codexlens/config.json - 3. Default: ~/.codexlens/indexes - """ - env_override = os.getenv("CODEXLENS_INDEX_DIR") - if env_override: - return Path(env_override).expanduser().resolve() - - config_file = Path.home() / ".codexlens" / "config.json" - if config_file.exists(): - try: - cfg = json.loads(config_file.read_text(encoding="utf-8")) - if "index_dir" in cfg: - return Path(cfg["index_dir"]).expanduser().resolve() - except (json.JSONDecodeError, OSError): - pass - - return Path.home() / ".codexlens" / "indexes" - - -class PathMapper: - """Bidirectional mapping tool for source paths ↔ index paths. - - Handles cross-platform path normalization and conversion between - source code directories and their index storage locations. - - Attributes: - DEFAULT_INDEX_ROOT: Default root directory for all indexes - INDEX_DB_NAME: Standard name for index database files - index_root: Configured index root directory - """ - - DEFAULT_INDEX_ROOT = _get_configured_index_root() - INDEX_DB_NAME = "_index.db" - - def __init__(self, index_root: Optional[Path] = None): - """Initialize PathMapper with optional custom index root. - - Args: - index_root: Custom index root directory. If None, uses DEFAULT_INDEX_ROOT. - """ - self.index_root = (index_root or self.DEFAULT_INDEX_ROOT).resolve() - - def source_to_index_dir(self, source_path: Path) -> Path: - """Convert source directory to its index directory path. - - Maps a source code directory to where its index data should be stored. - The mapping preserves the directory structure but normalizes paths - for cross-platform compatibility. - - Args: - source_path: Source directory path to map - - Returns: - Index directory path under index_root - - Examples: - >>> mapper = PathMapper() - >>> mapper.source_to_index_dir(Path("D:/Claude_dms3/src")) - PosixPath('/home/user/.codexlens/indexes/D/Claude_dms3/src') - - >>> mapper.source_to_index_dir(Path("/home/user/project")) - PosixPath('/home/user/.codexlens/indexes/home/user/project') - """ - source_path = source_path.resolve() - normalized = self.normalize_path(source_path) - return self.index_root / normalized - - def source_to_index_db(self, source_path: Path) -> Path: - """Convert source directory to its index database file path. - - Maps a source directory to the full path of its index database file, - including the standard INDEX_DB_NAME. - - Args: - source_path: Source directory path to map - - Returns: - Full path to the index database file - - Examples: - >>> mapper = PathMapper() - >>> mapper.source_to_index_db(Path("D:/Claude_dms3/src")) - PosixPath('/home/user/.codexlens/indexes/D/Claude_dms3/src/_index.db') - """ - index_dir = self.source_to_index_dir(source_path) - return index_dir / self.INDEX_DB_NAME - - def index_to_source(self, index_path: Path) -> Path: - """Convert index path back to original source path. - - Performs reverse mapping from an index storage location to the - original source directory. Handles both directory paths and - database file paths. - - Args: - index_path: Index directory or database file path - - Returns: - Original source directory path - - Raises: - ValueError: If index_path is not under index_root - - Examples: - >>> mapper = PathMapper() - >>> mapper.index_to_source( - ... Path("~/.codexlens/indexes/D/Claude_dms3/src/_index.db") - ... ) - WindowsPath('D:/Claude_dms3/src') - - >>> mapper.index_to_source( - ... Path("~/.codexlens/indexes/D/Claude_dms3/src") - ... ) - WindowsPath('D:/Claude_dms3/src') - """ - index_path = index_path.resolve() - - # Remove _index.db if present - if index_path.name == self.INDEX_DB_NAME: - index_path = index_path.parent - - # Verify path is under index_root - try: - relative = index_path.relative_to(self.index_root) - except ValueError: - raise ValueError( - f"Index path {index_path} is not under index root {self.index_root}" - ) - - # Convert normalized path back to source path - normalized_str = str(relative).replace("\\", "/") - return self.denormalize_path(normalized_str) - - def get_project_root(self, source_path: Path) -> Path: - """Find the project root directory (topmost indexed directory). - - Walks up the directory tree to find the highest-level directory - that has an index database. - - Args: - source_path: Source directory to start from - - Returns: - Project root directory path. Returns source_path itself if - no parent index is found. - - Examples: - >>> mapper = PathMapper() - >>> mapper.get_project_root(Path("D:/Claude_dms3/src/codexlens")) - WindowsPath('D:/Claude_dms3') - """ - source_path = source_path.resolve() - current = source_path - project_root = source_path - - # Walk up the tree - while current.parent != current: # Stop at filesystem root - parent_index_db = self.source_to_index_db(current.parent) - if parent_index_db.exists(): - project_root = current.parent - current = current.parent - else: - break - - return project_root - - def get_relative_depth(self, source_path: Path, project_root: Path) -> int: - """Calculate directory depth relative to project root. - - Args: - source_path: Target directory path - project_root: Project root directory path - - Returns: - Number of directory levels from project_root to source_path - - Raises: - ValueError: If source_path is not under project_root - - Examples: - >>> mapper = PathMapper() - >>> mapper.get_relative_depth( - ... Path("D:/Claude_dms3/src/codexlens"), - ... Path("D:/Claude_dms3") - ... ) - 2 - """ - source_path = source_path.resolve() - project_root = project_root.resolve() - - try: - relative = source_path.relative_to(project_root) - # Count path components - return len(relative.parts) - except ValueError: - raise ValueError( - f"Source path {source_path} is not under project root {project_root}" - ) - - def normalize_path(self, path: Path) -> str: - """Normalize path to cross-platform storage format. - - Converts OS-specific paths to a standardized format for storage: - - Windows: Removes drive colons (D: → D) - - Unix: Removes leading slash - - Uses forward slashes throughout - - Args: - path: Path to normalize - - Returns: - Normalized path string - - Examples: - >>> mapper = PathMapper() - >>> mapper.normalize_path(Path("D:/path/to/dir")) - 'D/path/to/dir' - - >>> mapper.normalize_path(Path("/home/user/path")) - 'home/user/path' - """ - path = path.resolve() - path_str = str(path) - - # Handle Windows paths with drive letters - if platform.system() == "Windows" and len(path.parts) > 0: - # Convert D:\path\to\dir → D/path/to/dir - drive = path.parts[0].replace(":", "") # D: → D - rest = Path(*path.parts[1:]) if len(path.parts) > 1 else Path() - normalized = f"{drive}/{rest}".replace("\\", "/") - return normalized.rstrip("/") - - # Handle Unix paths - # /home/user/path → home/user/path - return path_str.lstrip("/").replace("\\", "/") - - def denormalize_path(self, normalized: str) -> Path: - """Convert normalized path back to OS-specific path. - - Reverses the normalization process to restore OS-native path format: - - Windows: Adds drive colons (D → D:) - - Unix: Adds leading slash - - Args: - normalized: Normalized path string - - Returns: - OS-specific Path object - - Examples: - >>> mapper = PathMapper() - >>> mapper.denormalize_path("D/path/to/dir") # On Windows - WindowsPath('D:/path/to/dir') - - >>> mapper.denormalize_path("home/user/path") # On Unix - PosixPath('/home/user/path') - """ - parts = normalized.split("/") - - # Handle Windows paths - if platform.system() == "Windows" and len(parts) > 0: - # Check if first part is a drive letter - if len(parts[0]) == 1 and parts[0].isalpha(): - # D/path/to/dir → D:/path/to/dir - drive = f"{parts[0]}:/" - if len(parts) > 1: - return Path(drive) / Path(*parts[1:]) - return Path(drive) - - # Handle Unix paths or relative paths - # home/user/path → /home/user/path - return Path("/") / Path(*parts) diff --git a/codex-lens/src/codexlens/storage/registry.py b/codex-lens/src/codexlens/storage/registry.py deleted file mode 100644 index af667a90..00000000 --- a/codex-lens/src/codexlens/storage/registry.py +++ /dev/null @@ -1,733 +0,0 @@ -"""Global project registry for CodexLens - SQLite storage.""" - -from __future__ import annotations - -import platform -import sqlite3 -import threading -import time -from dataclasses import dataclass -from pathlib import Path -from typing import Any, Dict, List, Optional - -from codexlens.errors import StorageError - - -@dataclass -class ProjectInfo: - """Registered project information.""" - - id: int - source_root: Path - index_root: Path - created_at: float - last_indexed: float - total_files: int - total_dirs: int - status: str - - -@dataclass -class DirMapping: - """Directory to index path mapping.""" - - id: int - project_id: int - source_path: Path - index_path: Path - depth: int - files_count: int - last_updated: float - - -class RegistryStore: - """Global project registry - SQLite storage. - - Manages indexed projects and directory-to-index path mappings. - Thread-safe with connection pooling. - """ - - DEFAULT_DB_PATH = Path.home() / ".codexlens" / "registry.db" - - def __init__(self, db_path: Path | None = None) -> None: - self.db_path = (db_path or self.DEFAULT_DB_PATH).resolve() - self._lock = threading.RLock() - self._local = threading.local() - self._pool_lock = threading.Lock() - self._pool: Dict[int, sqlite3.Connection] = {} - self._pool_generation = 0 - - def _get_connection(self) -> sqlite3.Connection: - """Get or create a thread-local database connection.""" - thread_id = threading.get_ident() - if getattr(self._local, "generation", None) == self._pool_generation: - conn = getattr(self._local, "conn", None) - if conn is not None: - return conn - - with self._pool_lock: - conn = self._pool.get(thread_id) - if conn is None: - conn = sqlite3.connect(self.db_path, check_same_thread=False) - conn.row_factory = sqlite3.Row - conn.execute("PRAGMA journal_mode=WAL") - conn.execute("PRAGMA synchronous=NORMAL") - conn.execute("PRAGMA foreign_keys=ON") - self._pool[thread_id] = conn - - self._local.conn = conn - self._local.generation = self._pool_generation - return conn - - def close(self) -> None: - """Close all pooled connections.""" - with self._lock: - with self._pool_lock: - for conn in self._pool.values(): - conn.close() - self._pool.clear() - self._pool_generation += 1 - - if hasattr(self._local, "conn"): - self._local.conn = None - if hasattr(self._local, "generation"): - self._local.generation = self._pool_generation - - def __enter__(self) -> RegistryStore: - self.initialize() - return self - - def __exit__(self, exc_type: object, exc: object, tb: object) -> None: - self.close() - - def initialize(self) -> None: - """Create database and schema.""" - with self._lock: - self.db_path.parent.mkdir(parents=True, exist_ok=True) - conn = self._get_connection() - self._create_schema(conn) - - def _create_schema(self, conn: sqlite3.Connection) -> None: - """Create database schema.""" - try: - conn.execute( - """ - CREATE TABLE IF NOT EXISTS projects ( - id INTEGER PRIMARY KEY, - source_root TEXT UNIQUE NOT NULL, - index_root TEXT NOT NULL, - created_at REAL, - last_indexed REAL, - total_files INTEGER DEFAULT 0, - total_dirs INTEGER DEFAULT 0, - status TEXT DEFAULT 'active' - ) - """ - ) - - conn.execute( - """ - CREATE TABLE IF NOT EXISTS dir_mapping ( - id INTEGER PRIMARY KEY, - project_id INTEGER REFERENCES projects(id) ON DELETE CASCADE, - source_path TEXT NOT NULL, - index_path TEXT NOT NULL, - depth INTEGER, - files_count INTEGER DEFAULT 0, - last_updated REAL, - UNIQUE(source_path) - ) - """ - ) - - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_dir_source ON dir_mapping(source_path)" - ) - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_dir_project ON dir_mapping(project_id)" - ) - conn.execute( - "CREATE INDEX IF NOT EXISTS idx_project_source ON projects(source_root)" - ) - - conn.commit() - except sqlite3.DatabaseError as exc: - raise StorageError(f"Failed to initialize registry schema: {exc}") from exc - - def _normalize_path_for_comparison(self, path: Path) -> str: - """Normalize paths for comparisons and storage. - - Windows paths are treated as case-insensitive, so normalize to lowercase. - Unix platforms preserve case sensitivity. - """ - path_str = str(path) - if platform.system() == "Windows": - return path_str.lower() - return path_str - - # === Project Operations === - - def register_project(self, source_root: Path, index_root: Path) -> ProjectInfo: - """Register a new project or update existing one. - - Args: - source_root: Source code root directory - index_root: Index storage root directory - - Returns: - ProjectInfo for the registered project - """ - with self._lock: - conn = self._get_connection() - source_root_str = self._normalize_path_for_comparison(source_root.resolve()) - index_root_str = str(index_root.resolve()) - now = time.time() - - conn.execute( - """ - INSERT INTO projects(source_root, index_root, created_at, last_indexed) - VALUES(?, ?, ?, ?) - ON CONFLICT(source_root) DO UPDATE SET - index_root=excluded.index_root, - last_indexed=excluded.last_indexed, - status='active' - """, - (source_root_str, index_root_str, now, now), - ) - - row = conn.execute( - "SELECT * FROM projects WHERE source_root=?", (source_root_str,) - ).fetchone() - - conn.commit() - - if not row: - raise StorageError(f"Failed to register project: {source_root}") - - return self._row_to_project_info(row) - - def unregister_project(self, source_root: Path) -> bool: - """Remove a project registration (cascades to directory mappings). - - Args: - source_root: Source code root directory - - Returns: - True if project was removed, False if not found - """ - with self._lock: - conn = self._get_connection() - source_root_str = self._normalize_path_for_comparison(source_root.resolve()) - - row = conn.execute( - "SELECT id FROM projects WHERE source_root=?", (source_root_str,) - ).fetchone() - - if not row: - return False - - conn.execute("DELETE FROM projects WHERE source_root=?", (source_root_str,)) - conn.commit() - return True - - def get_project(self, source_root: Path) -> Optional[ProjectInfo]: - """Get project information by source root. - - Args: - source_root: Source code root directory - - Returns: - ProjectInfo if found, None otherwise - """ - with self._lock: - conn = self._get_connection() - source_root_str = self._normalize_path_for_comparison(source_root.resolve()) - - row = conn.execute( - "SELECT * FROM projects WHERE source_root=?", (source_root_str,) - ).fetchone() - - return self._row_to_project_info(row) if row else None - - def get_project_by_id(self, project_id: int) -> Optional[ProjectInfo]: - """Get project information by ID. - - Args: - project_id: Project database ID - - Returns: - ProjectInfo if found, None otherwise - """ - with self._lock: - conn = self._get_connection() - - row = conn.execute( - "SELECT * FROM projects WHERE id=?", (project_id,) - ).fetchone() - - return self._row_to_project_info(row) if row else None - - def list_projects(self, status: Optional[str] = None) -> List[ProjectInfo]: - """List all registered projects. - - Args: - status: Optional status filter ('active', 'stale', 'removed') - - Returns: - List of ProjectInfo objects - """ - with self._lock: - conn = self._get_connection() - - if status: - rows = conn.execute( - "SELECT * FROM projects WHERE status=? ORDER BY created_at DESC", - (status,), - ).fetchall() - else: - rows = conn.execute( - "SELECT * FROM projects ORDER BY created_at DESC" - ).fetchall() - - return [self._row_to_project_info(row) for row in rows] - - def update_project_stats( - self, source_root: Path, total_files: int, total_dirs: int - ) -> None: - """Update project statistics. - - Args: - source_root: Source code root directory - total_files: Total number of indexed files - total_dirs: Total number of indexed directories - """ - with self._lock: - conn = self._get_connection() - source_root_str = self._normalize_path_for_comparison(source_root.resolve()) - - conn.execute( - """ - UPDATE projects - SET total_files=?, total_dirs=?, last_indexed=? - WHERE source_root=? - """, - (total_files, total_dirs, time.time(), source_root_str), - ) - conn.commit() - - def set_project_status(self, source_root: Path, status: str) -> None: - """Set project status. - - Args: - source_root: Source code root directory - status: Status string ('active', 'stale', 'removed') - """ - with self._lock: - conn = self._get_connection() - source_root_str = self._normalize_path_for_comparison(source_root.resolve()) - - conn.execute( - "UPDATE projects SET status=? WHERE source_root=?", - (status, source_root_str), - ) - conn.commit() - - # === Directory Mapping Operations === - - def register_dir( - self, - project_id: int, - source_path: Path, - index_path: Path, - depth: int, - files_count: int = 0, - ) -> DirMapping: - """Register a directory mapping. - - Args: - project_id: Project database ID - source_path: Source directory path - index_path: Index database path - depth: Directory depth relative to project root - files_count: Number of files in directory - - Returns: - DirMapping for the registered directory - """ - with self._lock: - conn = self._get_connection() - source_path_str = self._normalize_path_for_comparison(source_path.resolve()) - index_path_str = str(index_path.resolve()) - now = time.time() - - conn.execute( - """ - INSERT INTO dir_mapping( - project_id, source_path, index_path, depth, files_count, last_updated - ) - VALUES(?, ?, ?, ?, ?, ?) - ON CONFLICT(source_path) DO UPDATE SET - index_path=excluded.index_path, - depth=excluded.depth, - files_count=excluded.files_count, - last_updated=excluded.last_updated - """, - (project_id, source_path_str, index_path_str, depth, files_count, now), - ) - - row = conn.execute( - "SELECT * FROM dir_mapping WHERE source_path=?", (source_path_str,) - ).fetchone() - - conn.commit() - - if not row: - raise StorageError(f"Failed to register directory: {source_path}") - - return self._row_to_dir_mapping(row) - - def unregister_dir(self, source_path: Path) -> bool: - """Remove a directory mapping. - - Args: - source_path: Source directory path - - Returns: - True if directory was removed, False if not found - """ - with self._lock: - conn = self._get_connection() - source_path_str = self._normalize_path_for_comparison(source_path.resolve()) - - row = conn.execute( - "SELECT id FROM dir_mapping WHERE source_path=?", (source_path_str,) - ).fetchone() - - if not row: - return False - - conn.execute("DELETE FROM dir_mapping WHERE source_path=?", (source_path_str,)) - conn.commit() - return True - - def find_index_path(self, source_path: Path) -> Optional[Path]: - """Find index path for a source directory (exact match). - - Args: - source_path: Source directory path - - Returns: - Index path if found, None otherwise - """ - with self._lock: - conn = self._get_connection() - source_path_str = self._normalize_path_for_comparison(source_path.resolve()) - - row = conn.execute( - "SELECT index_path FROM dir_mapping WHERE source_path=?", - (source_path_str,), - ).fetchone() - - return Path(row["index_path"]) if row else None - - def find_nearest_index(self, source_path: Path) -> Optional[DirMapping]: - """Find nearest indexed ancestor directory. - - Searches for the closest parent directory that has an index. - Useful for supporting subdirectory searches. - - Optimized to use single database query instead of iterating through - each parent directory level. - - Args: - source_path: Source directory or file path - - Returns: - DirMapping for nearest ancestor, None if not found - """ - with self._lock: - conn = self._get_connection() - source_path_resolved = source_path.resolve() - - # Build list of all parent paths from deepest to shallowest - paths_to_check = [] - current = source_path_resolved - while True: - paths_to_check.append(self._normalize_path_for_comparison(current)) - parent = current.parent - if parent == current: # Reached filesystem root - break - current = parent - - if not paths_to_check: - return None - - # Single query with WHERE IN, ordered by path length (longest = nearest) - placeholders = ','.join('?' * len(paths_to_check)) - query = f""" - SELECT * FROM dir_mapping - WHERE source_path IN ({placeholders}) - ORDER BY LENGTH(source_path) DESC - LIMIT 1 - """ - - row = conn.execute(query, paths_to_check).fetchone() - return self._row_to_dir_mapping(row) if row else None - - def find_by_source_path(self, source_path: str) -> Optional[Dict[str, str]]: - """Find project by source path (exact or nearest match). - - Searches for a project whose source_root matches or contains - the given source_path. - - Args: - source_path: Source directory path as string - - Returns: - Dict with project info including 'index_root', or None if not found - """ - with self._lock: - conn = self._get_connection() - resolved_path = Path(source_path).resolve() - source_path_resolved = self._normalize_path_for_comparison(resolved_path) - - # First try exact match on projects table - row = conn.execute( - "SELECT * FROM projects WHERE source_root=?", (source_path_resolved,) - ).fetchone() - - if row: - return { - "id": str(row["id"]), - "source_root": row["source_root"], - "index_root": row["index_root"], - "status": row["status"] or "active", - } - - # Try finding project that contains this path - # Build list of all parent paths - paths_to_check = [] - current = resolved_path - while True: - paths_to_check.append(self._normalize_path_for_comparison(current)) - parent = current.parent - if parent == current: - break - current = parent - - if paths_to_check: - placeholders = ','.join('?' * len(paths_to_check)) - query = f""" - SELECT * FROM projects - WHERE source_root IN ({placeholders}) - ORDER BY LENGTH(source_root) DESC - LIMIT 1 - """ - row = conn.execute(query, paths_to_check).fetchone() - - if row: - return { - "id": str(row["id"]), - "source_root": row["source_root"], - "index_root": row["index_root"], - "status": row["status"] or "active", - } - - return None - - def get_project_dirs(self, project_id: int) -> List[DirMapping]: - """Get all directory mappings for a project. - - Args: - project_id: Project database ID - - Returns: - List of DirMapping objects - """ - with self._lock: - conn = self._get_connection() - - rows = conn.execute( - "SELECT * FROM dir_mapping WHERE project_id=? ORDER BY depth, source_path", - (project_id,), - ).fetchall() - - return [self._row_to_dir_mapping(row) for row in rows] - - def get_subdirs(self, source_path: Path) -> List[DirMapping]: - """Get direct subdirectory mappings. - - Args: - source_path: Parent directory path - - Returns: - List of DirMapping objects for direct children - """ - with self._lock: - conn = self._get_connection() - source_path_str = self._normalize_path_for_comparison(source_path.resolve()) - - # First get the parent's depth - parent_row = conn.execute( - "SELECT depth, project_id FROM dir_mapping WHERE source_path=?", - (source_path_str,), - ).fetchone() - - if not parent_row: - return [] - - parent_depth = int(parent_row["depth"]) - project_id = int(parent_row["project_id"]) - - # Get all subdirs with depth = parent_depth + 1 and matching path prefix - rows = conn.execute( - """ - SELECT * FROM dir_mapping - WHERE project_id=? AND depth=? AND source_path LIKE ? - ORDER BY source_path - """, - (project_id, parent_depth + 1, f"{source_path_str}%"), - ).fetchall() - - return [self._row_to_dir_mapping(row) for row in rows] - - def find_descendant_project_roots(self, source_root: Path) -> List[DirMapping]: - """Return root directory mappings for nested projects under ``source_root``.""" - with self._lock: - conn = self._get_connection() - source_root_resolved = source_root.resolve() - source_root_str = self._normalize_path_for_comparison(source_root_resolved) - - rows = conn.execute( - """ - SELECT dm.* - FROM dir_mapping dm - INNER JOIN projects p ON p.id = dm.project_id - WHERE dm.source_path = p.source_root - AND p.source_root LIKE ? - ORDER BY p.source_root ASC - """, - (f"{source_root_str}%",), - ).fetchall() - - descendant_roots: List[DirMapping] = [] - normalized_root_path = Path(source_root_str) - - for row in rows: - mapping = self._row_to_dir_mapping(row) - normalized_mapping_path = Path( - self._normalize_path_for_comparison(mapping.source_path.resolve()) - ) - - if normalized_mapping_path == normalized_root_path: - continue - - try: - normalized_mapping_path.relative_to(normalized_root_path) - except ValueError: - continue - - descendant_roots.append(mapping) - - descendant_roots.sort( - key=lambda mapping: ( - len( - mapping.source_path.resolve().relative_to( - source_root_resolved - ).parts - ), - self._normalize_path_for_comparison(mapping.source_path.resolve()), - ) - ) - return descendant_roots - - def update_dir_stats(self, source_path: Path, files_count: int) -> None: - """Update directory statistics. - - Args: - source_path: Source directory path - files_count: Number of files in directory - """ - with self._lock: - conn = self._get_connection() - source_path_str = self._normalize_path_for_comparison(source_path.resolve()) - - conn.execute( - """ - UPDATE dir_mapping - SET files_count=?, last_updated=? - WHERE source_path=? - """, - (files_count, time.time(), source_path_str), - ) - conn.commit() - - def update_index_paths(self, old_root: Path, new_root: Path) -> int: - """Update all index paths after migration. - - Replaces old_root prefix with new_root in all stored index paths. - - Args: - old_root: Old index root directory - new_root: New index root directory - - Returns: - Number of paths updated - """ - with self._lock: - conn = self._get_connection() - old_root_str = str(old_root.resolve()) - new_root_str = str(new_root.resolve()) - updated = 0 - - # Update projects - conn.execute( - """ - UPDATE projects - SET index_root = REPLACE(index_root, ?, ?) - WHERE index_root LIKE ? - """, - (old_root_str, new_root_str, f"{old_root_str}%"), - ) - updated += conn.total_changes - - # Update dir_mapping - conn.execute( - """ - UPDATE dir_mapping - SET index_path = REPLACE(index_path, ?, ?) - WHERE index_path LIKE ? - """, - (old_root_str, new_root_str, f"{old_root_str}%"), - ) - updated += conn.total_changes - - conn.commit() - return updated - - # === Internal Methods === - - def _row_to_project_info(self, row: sqlite3.Row) -> ProjectInfo: - """Convert database row to ProjectInfo.""" - return ProjectInfo( - id=int(row["id"]), - source_root=Path(row["source_root"]), - index_root=Path(row["index_root"]), - created_at=float(row["created_at"]) if row["created_at"] else 0.0, - last_indexed=float(row["last_indexed"]) if row["last_indexed"] else 0.0, - total_files=int(row["total_files"]) if row["total_files"] else 0, - total_dirs=int(row["total_dirs"]) if row["total_dirs"] else 0, - status=str(row["status"]) if row["status"] else "active", - ) - - def _row_to_dir_mapping(self, row: sqlite3.Row) -> DirMapping: - """Convert database row to DirMapping.""" - return DirMapping( - id=int(row["id"]), - project_id=int(row["project_id"]), - source_path=Path(row["source_path"]), - index_path=Path(row["index_path"]), - depth=int(row["depth"]) if row["depth"] is not None else 0, - files_count=int(row["files_count"]) if row["files_count"] else 0, - last_updated=float(row["last_updated"]) if row["last_updated"] else 0.0, - ) diff --git a/codex-lens/src/codexlens/storage/sqlite_store.py b/codex-lens/src/codexlens/storage/sqlite_store.py deleted file mode 100644 index 6945be8a..00000000 --- a/codex-lens/src/codexlens/storage/sqlite_store.py +++ /dev/null @@ -1,976 +0,0 @@ -"""SQLite storage for CodexLens indexing and search.""" - -from __future__ import annotations - -import json -import logging -import sqlite3 -import threading -import time -from dataclasses import asdict -from pathlib import Path -from typing import Any, Dict, Iterable, List, Optional, Tuple - -from codexlens.entities import IndexedFile, SearchResult, Symbol -from codexlens.errors import StorageError - -logger = logging.getLogger(__name__) - - -class SQLiteStore: - """SQLiteStore providing FTS5 search and symbol lookup. - - Implements thread-local connection pooling for improved performance. - """ - - # Maximum number of connections to keep in pool to prevent memory leaks - MAX_POOL_SIZE = 32 - # Idle timeout in seconds (10 minutes) - IDLE_TIMEOUT = 600 - # Periodic cleanup interval in seconds (5 minutes) - CLEANUP_INTERVAL = 300 - - def __init__(self, db_path: str | Path) -> None: - self.db_path = Path(db_path) - self._lock = threading.RLock() - self._local = threading.local() - self._pool_lock = threading.Lock() - # Pool stores (connection, last_access_time) tuples - self._pool: Dict[int, Tuple[sqlite3.Connection, float]] = {} - self._pool_generation = 0 - self._cleanup_timer: threading.Timer | None = None - self._cleanup_stop_event = threading.Event() - self._start_cleanup_timer() - - def _get_connection(self) -> sqlite3.Connection: - """Get or create a thread-local database connection.""" - thread_id = threading.get_ident() - current_time = time.time() - - if getattr(self._local, "generation", None) == self._pool_generation: - conn = getattr(self._local, "conn", None) - if conn is not None: - with self._pool_lock: - pool_entry = self._pool.get(thread_id) - if pool_entry is not None: - pooled_conn, _ = pool_entry - self._pool[thread_id] = (pooled_conn, current_time) - self._local.conn = pooled_conn - return pooled_conn - - # Thread-local connection is stale (e.g., cleaned up by timer). - self._local.conn = None - - with self._pool_lock: - pool_entry = self._pool.get(thread_id) - if pool_entry is not None: - conn, _ = pool_entry - # Update last access time - self._pool[thread_id] = (conn, current_time) - else: - # Clean up stale and idle connections if pool is too large - if len(self._pool) >= self.MAX_POOL_SIZE: - self._cleanup_stale_connections() - - conn = sqlite3.connect(self.db_path, check_same_thread=False) - conn.row_factory = sqlite3.Row - conn.execute("PRAGMA journal_mode=WAL") - conn.execute("PRAGMA synchronous=NORMAL") - conn.execute("PRAGMA foreign_keys=ON") - # Memory-mapped I/O for faster reads (30GB limit) - conn.execute("PRAGMA mmap_size=30000000000") - self._pool[thread_id] = (conn, current_time) - - self._local.conn = conn - self._local.generation = self._pool_generation - return conn - - def _cleanup_stale_connections(self) -> None: - """Remove connections for threads that no longer exist or have been idle too long.""" - current_time = time.time() - # Get list of active thread IDs - active_threads = {t.ident for t in threading.enumerate() if t.ident is not None} - - # Find connections to remove: dead threads or idle timeout exceeded - stale_ids: list[tuple[int, str]] = [] - for tid, (conn, last_access) in list(self._pool.items()): - try: - is_dead_thread = tid not in active_threads - is_idle = (current_time - last_access) > self.IDLE_TIMEOUT - - is_invalid_connection = False - if not is_dead_thread and not is_idle: - try: - conn.execute("SELECT 1").fetchone() - except sqlite3.ProgrammingError: - is_invalid_connection = True - except sqlite3.Error: - is_invalid_connection = True - - if is_invalid_connection: - stale_ids.append((tid, "invalid_connection")) - elif is_dead_thread: - stale_ids.append((tid, "dead_thread")) - elif is_idle: - stale_ids.append((tid, "idle_timeout")) - except Exception: - # Never break cleanup for a single bad entry. - continue - - # Close and remove stale connections - for tid, reason in stale_ids: - try: - conn, _ = self._pool[tid] - conn.close() - except Exception: - pass - del self._pool[tid] - logger.debug("Cleaned SQLiteStore connection for thread_id=%s (%s)", tid, reason) - - def _start_cleanup_timer(self) -> None: - if self.CLEANUP_INTERVAL <= 0: - return - - self._cleanup_stop_event.clear() - - def tick() -> None: - if self._cleanup_stop_event.is_set(): - return - - try: - with self._pool_lock: - self._cleanup_stale_connections() - finally: - with self._pool_lock: - if self._cleanup_stop_event.is_set(): - self._cleanup_timer = None - return - - self._cleanup_timer = threading.Timer(self.CLEANUP_INTERVAL, tick) - self._cleanup_timer.daemon = True - self._cleanup_timer.start() - - self._cleanup_timer = threading.Timer(self.CLEANUP_INTERVAL, tick) - self._cleanup_timer.daemon = True - self._cleanup_timer.start() - - def _stop_cleanup_timer(self) -> None: - self._cleanup_stop_event.set() - with self._pool_lock: - if self._cleanup_timer is not None: - self._cleanup_timer.cancel() - self._cleanup_timer = None - - def close(self) -> None: - """Close all pooled connections.""" - with self._lock: - self._stop_cleanup_timer() - with self._pool_lock: - for conn, _ in self._pool.values(): - conn.close() - self._pool.clear() - self._pool_generation += 1 - - if hasattr(self._local, "conn"): - self._local.conn = None - if hasattr(self._local, "generation"): - self._local.generation = self._pool_generation - - def __enter__(self) -> SQLiteStore: - self.initialize() - return self - - def __exit__(self, exc_type: object, exc: object, tb: object) -> None: - self.close() - - def execute_query( - self, - sql: str, - params: tuple = (), - allow_writes: bool = False - ) -> List[Dict[str, Any]]: - """Execute a raw SQL query and return results as dictionaries. - - This is the public API for executing custom queries without bypassing - encapsulation via _get_connection(). - - By default, only SELECT queries are allowed. Use allow_writes=True - for trusted internal code that needs to execute other statements. - - Args: - sql: SQL query string with ? placeholders for parameters - params: Tuple of parameter values to bind - allow_writes: If True, allow non-SELECT statements (default False) - - Returns: - List of result rows as dictionaries - - Raises: - StorageError: If query execution fails or validation fails - """ - # Validate query type for security - sql_stripped = sql.strip().upper() - if not allow_writes: - # Only allow SELECT and WITH (for CTEs) statements - if not (sql_stripped.startswith("SELECT") or sql_stripped.startswith("WITH")): - raise StorageError( - "Only SELECT queries are allowed. " - "Use allow_writes=True for trusted internal operations.", - db_path=str(self.db_path), - operation="execute_query", - details={"query_type": sql_stripped.split()[0] if sql_stripped else "EMPTY"} - ) - - try: - conn = self._get_connection() - rows = conn.execute(sql, params).fetchall() - return [dict(row) for row in rows] - except sqlite3.Error as e: - raise StorageError( - f"Query execution failed: {e}", - db_path=str(self.db_path), - operation="execute_query", - details={"error_type": type(e).__name__} - ) from e - - def initialize(self) -> None: - with self._lock: - self.db_path.parent.mkdir(parents=True, exist_ok=True) - conn = self._get_connection() - self._create_schema(conn) - self._ensure_fts_external_content(conn) - - - def add_file(self, indexed_file: IndexedFile, content: str) -> None: - with self._lock: - conn = self._get_connection() - path = str(Path(indexed_file.path).resolve()) - language = indexed_file.language - mtime = Path(path).stat().st_mtime if Path(path).exists() else None - line_count = content.count(chr(10)) + 1 - - conn.execute( - """ - INSERT INTO files(path, language, content, mtime, line_count) - VALUES(?, ?, ?, ?, ?) - ON CONFLICT(path) DO UPDATE SET - language=excluded.language, - content=excluded.content, - mtime=excluded.mtime, - line_count=excluded.line_count - """, - (path, language, content, mtime, line_count), - ) - - row = conn.execute("SELECT id FROM files WHERE path=?", (path,)).fetchone() - if not row: - raise StorageError(f"Failed to read file id for {path}") - file_id = int(row["id"]) - - conn.execute("DELETE FROM symbols WHERE file_id=?", (file_id,)) - if indexed_file.symbols: - conn.executemany( - """ - INSERT INTO symbols(file_id, name, kind, start_line, end_line) - VALUES(?, ?, ?, ?, ?) - """, - [ - (file_id, s.name, s.kind, s.range[0], s.range[1]) - for s in indexed_file.symbols - ], - ) - conn.commit() - - def add_files(self, files_data: List[tuple[IndexedFile, str]]) -> None: - """Add multiple files in a single transaction for better performance. - - Args: - files_data: List of (indexed_file, content) tuples - """ - with self._lock: - conn = self._get_connection() - try: - conn.execute("BEGIN") - - for indexed_file, content in files_data: - path = str(Path(indexed_file.path).resolve()) - language = indexed_file.language - mtime = Path(path).stat().st_mtime if Path(path).exists() else None - line_count = content.count(chr(10)) + 1 - - conn.execute( - """ - INSERT INTO files(path, language, content, mtime, line_count) - VALUES(?, ?, ?, ?, ?) - ON CONFLICT(path) DO UPDATE SET - language=excluded.language, - content=excluded.content, - mtime=excluded.mtime, - line_count=excluded.line_count - """, - (path, language, content, mtime, line_count), - ) - - row = conn.execute("SELECT id FROM files WHERE path=?", (path,)).fetchone() - if not row: - raise StorageError(f"Failed to read file id for {path}") - file_id = int(row["id"]) - - conn.execute("DELETE FROM symbols WHERE file_id=?", (file_id,)) - if indexed_file.symbols: - conn.executemany( - """ - INSERT INTO symbols(file_id, name, kind, start_line, end_line) - VALUES(?, ?, ?, ?, ?) - """, - [ - (file_id, s.name, s.kind, s.range[0], s.range[1]) - for s in indexed_file.symbols - ], - ) - - conn.commit() - except Exception as exc: - try: - conn.rollback() - except Exception as rollback_exc: - logger.error( - "Rollback failed after add_files() error (%s): %s", exc, rollback_exc - ) - raise exc.with_traceback(exc.__traceback__) from rollback_exc - raise - - def remove_file(self, path: str | Path) -> bool: - """Remove a file from the index.""" - with self._lock: - conn = self._get_connection() - resolved_path = str(Path(path).resolve()) - - row = conn.execute( - "SELECT id FROM files WHERE path=?", (resolved_path,) - ).fetchone() - - if not row: - return False - - file_id = int(row["id"]) - conn.execute("DELETE FROM files WHERE id=?", (file_id,)) - conn.commit() - return True - - def file_exists(self, path: str | Path) -> bool: - """Check if a file exists in the index.""" - with self._lock: - conn = self._get_connection() - resolved_path = str(Path(path).resolve()) - row = conn.execute( - "SELECT 1 FROM files WHERE path=?", (resolved_path,) - ).fetchone() - return row is not None - - def get_file_mtime(self, path: str | Path) -> float | None: - """Get the stored mtime for a file.""" - with self._lock: - conn = self._get_connection() - resolved_path = str(Path(path).resolve()) - row = conn.execute( - "SELECT mtime FROM files WHERE path=?", (resolved_path,) - ).fetchone() - return float(row["mtime"]) if row and row["mtime"] else None - - - def search_fts(self, query: str, *, limit: int = 20, offset: int = 0) -> List[SearchResult]: - with self._lock: - conn = self._get_connection() - try: - rows = conn.execute( - """ - SELECT rowid, path, bm25(files_fts) AS rank, - snippet(files_fts, 2, '[bold red]', '[/bold red]', "...", 20) AS excerpt - FROM files_fts - WHERE files_fts MATCH ? - ORDER BY rank - LIMIT ? OFFSET ? - """, - (query, limit, offset), - ).fetchall() - except sqlite3.DatabaseError as exc: - raise StorageError(f"FTS search failed: {exc}") from exc - - results: List[SearchResult] = [] - for row in rows: - rank = float(row["rank"]) if row["rank"] is not None else 0.0 - score = abs(rank) if rank < 0 else 0.0 - results.append( - SearchResult( - path=row["path"], - score=score, - excerpt=row["excerpt"], - ) - ) - return results - - def search_files_only( - self, query: str, *, limit: int = 20, offset: int = 0 - ) -> List[str]: - """Search indexed file contents and return only file paths.""" - with self._lock: - conn = self._get_connection() - try: - rows = conn.execute( - """ - SELECT path - FROM files_fts - WHERE files_fts MATCH ? - ORDER BY bm25(files_fts) - LIMIT ? OFFSET ? - """, - (query, limit, offset), - ).fetchall() - except sqlite3.DatabaseError as exc: - raise StorageError(f"FTS search failed: {exc}") from exc - - return [row["path"] for row in rows] - - def search_symbols( - self, name: str, *, kind: Optional[str] = None, limit: int = 50 - ) -> List[Symbol]: - pattern = f"%{name}%" - with self._lock: - conn = self._get_connection() - if kind: - rows = conn.execute( - """ - SELECT name, kind, start_line, end_line - FROM symbols - WHERE name LIKE ? AND kind=? - ORDER BY name - LIMIT ? - """, - (pattern, kind, limit), - ).fetchall() - else: - rows = conn.execute( - """ - SELECT name, kind, start_line, end_line - FROM symbols - WHERE name LIKE ? - ORDER BY name - LIMIT ? - """, - (pattern, limit), - ).fetchall() - - return [ - Symbol(name=row["name"], kind=row["kind"], range=(row["start_line"], row["end_line"])) - for row in rows - ] - - - def stats(self) -> Dict[str, Any]: - with self._lock: - conn = self._get_connection() - file_count = conn.execute("SELECT COUNT(*) AS c FROM files").fetchone()["c"] - symbol_count = conn.execute("SELECT COUNT(*) AS c FROM symbols").fetchone()["c"] - lang_rows = conn.execute( - "SELECT language, COUNT(*) AS c FROM files GROUP BY language ORDER BY c DESC" - ).fetchall() - languages = {row["language"]: row["c"] for row in lang_rows} - # Include relationship count if table exists - relationship_count = 0 - try: - rel_row = conn.execute("SELECT COUNT(*) AS c FROM code_relationships").fetchone() - relationship_count = int(rel_row["c"]) if rel_row else 0 - except sqlite3.DatabaseError: - pass - - return { - "files": int(file_count), - "symbols": int(symbol_count), - "relationships": relationship_count, - "languages": languages, - "db_path": str(self.db_path), - } - - - def _connect(self) -> sqlite3.Connection: - """Legacy method for backward compatibility.""" - return self._get_connection() - - def _create_schema(self, conn: sqlite3.Connection) -> None: - try: - conn.execute( - """ - CREATE TABLE IF NOT EXISTS files ( - id INTEGER PRIMARY KEY, - path TEXT UNIQUE NOT NULL, - language TEXT NOT NULL, - content TEXT NOT NULL, - mtime REAL, - line_count INTEGER - ) - """ - ) - conn.execute( - """ - CREATE TABLE IF NOT EXISTS symbols ( - id INTEGER PRIMARY KEY, - file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE, - name TEXT NOT NULL, - kind TEXT NOT NULL, - start_line INTEGER NOT NULL, - end_line INTEGER NOT NULL - ) - """ - ) - conn.execute("CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name)") - conn.execute("CREATE INDEX IF NOT EXISTS idx_symbols_kind ON symbols(kind)") - conn.execute( - """ - CREATE TABLE IF NOT EXISTS code_relationships ( - id INTEGER PRIMARY KEY, - source_symbol_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE, - target_qualified_name TEXT NOT NULL, - relationship_type TEXT NOT NULL, - source_line INTEGER NOT NULL, - target_file TEXT - ) - """ - ) - conn.execute("CREATE INDEX IF NOT EXISTS idx_rel_target ON code_relationships(target_qualified_name)") - conn.execute("CREATE INDEX IF NOT EXISTS idx_rel_source ON code_relationships(source_symbol_id)") - # Chunks table for multi-vector storage (cascade retrieval architecture) - # - embedding: Original embedding for backward compatibility - # - embedding_binary: 256-dim binary vector for coarse ranking - # - embedding_dense: 2048-dim dense vector for fine ranking - conn.execute( - """ - CREATE TABLE IF NOT EXISTS chunks ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - file_path TEXT NOT NULL, - content TEXT NOT NULL, - embedding BLOB, - embedding_binary BLOB, - embedding_dense BLOB, - metadata TEXT, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - ) - """ - ) - conn.execute("CREATE INDEX IF NOT EXISTS idx_chunks_file_path ON chunks(file_path)") - # Run migration for existing databases - self._migrate_chunks_table(conn) - conn.commit() - except sqlite3.DatabaseError as exc: - raise StorageError(f"Failed to initialize database schema: {exc}") from exc - - def _ensure_fts_external_content(self, conn: sqlite3.Connection) -> None: - """Ensure files_fts is an FTS5 external-content table (no content duplication).""" - try: - sql_row = conn.execute( - "SELECT sql FROM sqlite_master WHERE type='table' AND name='files_fts'" - ).fetchone() - sql = str(sql_row["sql"]) if sql_row and sql_row["sql"] else None - - if sql is None: - self._create_external_fts(conn) - conn.commit() - return - - if ( - "content='files'" in sql - or 'content="files"' in sql - or "content=files" in sql - ): - self._create_fts_triggers(conn) - conn.commit() - return - - self._migrate_fts_to_external(conn) - except sqlite3.DatabaseError as exc: - raise StorageError(f"Failed to ensure FTS schema: {exc}") from exc - - def _create_external_fts(self, conn: sqlite3.Connection) -> None: - conn.execute( - """ - CREATE VIRTUAL TABLE files_fts USING fts5( - path UNINDEXED, - language UNINDEXED, - content, - content='files', - content_rowid='id', - tokenize="unicode61 tokenchars '_'" - ) - """ - ) - self._create_fts_triggers(conn) - - def _create_fts_triggers(self, conn: sqlite3.Connection) -> None: - conn.execute( - """ - CREATE TRIGGER IF NOT EXISTS files_ai AFTER INSERT ON files BEGIN - INSERT INTO files_fts(rowid, path, language, content) - VALUES(new.id, new.path, new.language, new.content); - END - """ - ) - conn.execute( - """ - CREATE TRIGGER IF NOT EXISTS files_ad AFTER DELETE ON files BEGIN - INSERT INTO files_fts(files_fts, rowid, path, language, content) - VALUES('delete', old.id, old.path, old.language, old.content); - END - """ - ) - conn.execute( - """ - CREATE TRIGGER IF NOT EXISTS files_au AFTER UPDATE ON files BEGIN - INSERT INTO files_fts(files_fts, rowid, path, language, content) - VALUES('delete', old.id, old.path, old.language, old.content); - INSERT INTO files_fts(rowid, path, language, content) - VALUES(new.id, new.path, new.language, new.content); - END - """ - ) - - def _migrate_fts_to_external(self, conn: sqlite3.Connection) -> None: - """Migrate legacy files_fts (with duplicated content) to external content.""" - try: - conn.execute("BEGIN") - conn.execute("DROP TRIGGER IF EXISTS files_ai") - conn.execute("DROP TRIGGER IF EXISTS files_ad") - conn.execute("DROP TRIGGER IF EXISTS files_au") - - conn.execute("ALTER TABLE files_fts RENAME TO files_fts_legacy") - self._create_external_fts(conn) - conn.execute("INSERT INTO files_fts(files_fts) VALUES('rebuild')") - conn.execute("DROP TABLE files_fts_legacy") - conn.commit() - except sqlite3.DatabaseError as exc: - try: - conn.rollback() - except Exception as rollback_exc: - logger.error( - "Rollback failed during FTS schema migration (%s): %s", exc, rollback_exc - ) - raise exc.with_traceback(exc.__traceback__) from rollback_exc - - try: - conn.execute("DROP TABLE IF EXISTS files_fts") - except Exception: - pass - - try: - conn.execute("ALTER TABLE files_fts_legacy RENAME TO files_fts") - conn.commit() - except Exception: - pass - raise - - try: - conn.execute("VACUUM") - except sqlite3.DatabaseError: - pass - - def _migrate_chunks_table(self, conn: sqlite3.Connection) -> None: - """Migrate existing chunks table to add multi-vector columns if needed. - - This handles upgrading existing databases that may have the chunks table - without the embedding_binary and embedding_dense columns. - """ - # Check if chunks table exists - table_exists = conn.execute( - "SELECT name FROM sqlite_master WHERE type='table' AND name='chunks'" - ).fetchone() - - if not table_exists: - # Table doesn't exist yet, nothing to migrate - return - - # Check existing columns - cursor = conn.execute("PRAGMA table_info(chunks)") - columns = {row[1] for row in cursor.fetchall()} - - # Add embedding_binary column if missing - if "embedding_binary" not in columns: - logger.info("Migrating chunks table: adding embedding_binary column") - conn.execute( - "ALTER TABLE chunks ADD COLUMN embedding_binary BLOB" - ) - - # Add embedding_dense column if missing - if "embedding_dense" not in columns: - logger.info("Migrating chunks table: adding embedding_dense column") - conn.execute( - "ALTER TABLE chunks ADD COLUMN embedding_dense BLOB" - ) - - def add_chunks( - self, - file_path: str, - chunks_data: List[Dict[str, Any]], - *, - embedding: Optional[List[List[float]]] = None, - embedding_binary: Optional[List[bytes]] = None, - embedding_dense: Optional[List[bytes]] = None, - ) -> List[int]: - """Add multiple chunks with multi-vector embeddings support. - - This method supports the cascade retrieval architecture with three embedding types: - - embedding: Original dense embedding for backward compatibility - - embedding_binary: 256-dim binary vector for fast coarse ranking - - embedding_dense: 2048-dim dense vector for precise fine ranking - - Args: - file_path: Path to the source file for all chunks. - chunks_data: List of dicts with 'content' and optional 'metadata' keys. - embedding: Optional list of dense embeddings (one per chunk). - embedding_binary: Optional list of binary embeddings as bytes (one per chunk). - embedding_dense: Optional list of dense embeddings as bytes (one per chunk). - - Returns: - List of inserted chunk IDs. - - Raises: - ValueError: If embedding list lengths don't match chunks_data length. - StorageError: If database operation fails. - """ - if not chunks_data: - return [] - - n_chunks = len(chunks_data) - - # Validate embedding lengths - if embedding is not None and len(embedding) != n_chunks: - raise ValueError( - f"embedding length ({len(embedding)}) != chunks_data length ({n_chunks})" - ) - if embedding_binary is not None and len(embedding_binary) != n_chunks: - raise ValueError( - f"embedding_binary length ({len(embedding_binary)}) != chunks_data length ({n_chunks})" - ) - if embedding_dense is not None and len(embedding_dense) != n_chunks: - raise ValueError( - f"embedding_dense length ({len(embedding_dense)}) != chunks_data length ({n_chunks})" - ) - - # Prepare batch data - batch_data = [] - for i, chunk in enumerate(chunks_data): - content = chunk.get("content", "") - metadata = chunk.get("metadata") - metadata_json = json.dumps(metadata) if metadata else None - - # Convert embeddings to bytes if needed - emb_blob = None - if embedding is not None: - import struct - emb_blob = struct.pack(f"{len(embedding[i])}f", *embedding[i]) - - emb_binary_blob = embedding_binary[i] if embedding_binary is not None else None - emb_dense_blob = embedding_dense[i] if embedding_dense is not None else None - - batch_data.append(( - file_path, content, emb_blob, emb_binary_blob, emb_dense_blob, metadata_json - )) - - with self._lock: - conn = self._get_connection() - try: - # Get starting ID before insert - row = conn.execute("SELECT MAX(id) FROM chunks").fetchone() - start_id = (row[0] or 0) + 1 - - conn.executemany( - """ - INSERT INTO chunks ( - file_path, content, embedding, embedding_binary, - embedding_dense, metadata - ) - VALUES (?, ?, ?, ?, ?, ?) - """, - batch_data - ) - conn.commit() - - # Calculate inserted IDs - return list(range(start_id, start_id + n_chunks)) - - except sqlite3.DatabaseError as exc: - raise StorageError( - f"Failed to add chunks: {exc}", - db_path=str(self.db_path), - operation="add_chunks", - ) from exc - - def get_binary_embeddings( - self, chunk_ids: List[int] - ) -> Dict[int, Optional[bytes]]: - """Get binary embeddings for specified chunk IDs. - - Used for coarse ranking in cascade retrieval architecture. - Binary embeddings (256-dim) enable fast approximate similarity search. - - Args: - chunk_ids: List of chunk IDs to retrieve embeddings for. - - Returns: - Dictionary mapping chunk_id to embedding_binary bytes (or None if not set). - - Raises: - StorageError: If database query fails. - """ - if not chunk_ids: - return {} - - with self._lock: - conn = self._get_connection() - try: - placeholders = ",".join("?" * len(chunk_ids)) - rows = conn.execute( - f"SELECT id, embedding_binary FROM chunks WHERE id IN ({placeholders})", - chunk_ids - ).fetchall() - - return {row["id"]: row["embedding_binary"] for row in rows} - - except sqlite3.DatabaseError as exc: - raise StorageError( - f"Failed to get binary embeddings: {exc}", - db_path=str(self.db_path), - operation="get_binary_embeddings", - ) from exc - - def get_dense_embeddings( - self, chunk_ids: List[int] - ) -> Dict[int, Optional[bytes]]: - """Get dense embeddings for specified chunk IDs. - - Used for fine ranking in cascade retrieval architecture. - Dense embeddings (2048-dim) provide high-precision similarity scoring. - - Args: - chunk_ids: List of chunk IDs to retrieve embeddings for. - - Returns: - Dictionary mapping chunk_id to embedding_dense bytes (or None if not set). - - Raises: - StorageError: If database query fails. - """ - if not chunk_ids: - return {} - - with self._lock: - conn = self._get_connection() - try: - placeholders = ",".join("?" * len(chunk_ids)) - rows = conn.execute( - f"SELECT id, embedding_dense FROM chunks WHERE id IN ({placeholders})", - chunk_ids - ).fetchall() - - return {row["id"]: row["embedding_dense"] for row in rows} - - except sqlite3.DatabaseError as exc: - raise StorageError( - f"Failed to get dense embeddings: {exc}", - db_path=str(self.db_path), - operation="get_dense_embeddings", - ) from exc - - def get_chunks_by_ids( - self, chunk_ids: List[int] - ) -> List[Dict[str, Any]]: - """Get chunk data for specified IDs. - - Args: - chunk_ids: List of chunk IDs to retrieve. - - Returns: - List of chunk dictionaries with id, file_path, content, metadata. - - Raises: - StorageError: If database query fails. - """ - if not chunk_ids: - return [] - - with self._lock: - conn = self._get_connection() - try: - placeholders = ",".join("?" * len(chunk_ids)) - rows = conn.execute( - f""" - SELECT id, file_path, content, metadata, created_at - FROM chunks - WHERE id IN ({placeholders}) - """, - chunk_ids - ).fetchall() - - results = [] - for row in rows: - metadata = None - if row["metadata"]: - try: - metadata = json.loads(row["metadata"]) - except json.JSONDecodeError: - pass - - results.append({ - "id": row["id"], - "file_path": row["file_path"], - "content": row["content"], - "metadata": metadata, - "created_at": row["created_at"], - }) - - return results - - except sqlite3.DatabaseError as exc: - raise StorageError( - f"Failed to get chunks: {exc}", - db_path=str(self.db_path), - operation="get_chunks_by_ids", - ) from exc - - def delete_chunks_by_file(self, file_path: str) -> int: - """Delete all chunks for a given file path. - - Args: - file_path: Path to the source file. - - Returns: - Number of deleted chunks. - - Raises: - StorageError: If database operation fails. - """ - with self._lock: - conn = self._get_connection() - try: - cursor = conn.execute( - "DELETE FROM chunks WHERE file_path = ?", - (file_path,) - ) - conn.commit() - return cursor.rowcount - - except sqlite3.DatabaseError as exc: - raise StorageError( - f"Failed to delete chunks: {exc}", - db_path=str(self.db_path), - operation="delete_chunks_by_file", - ) from exc - - def count_chunks(self) -> int: - """Count total chunks in store. - - Returns: - Total number of chunks. - """ - with self._lock: - conn = self._get_connection() - row = conn.execute("SELECT COUNT(*) AS c FROM chunks").fetchone() - return int(row["c"]) if row else 0 diff --git a/codex-lens/src/codexlens/storage/sqlite_utils.py b/codex-lens/src/codexlens/storage/sqlite_utils.py deleted file mode 100644 index 2d5730f9..00000000 --- a/codex-lens/src/codexlens/storage/sqlite_utils.py +++ /dev/null @@ -1,64 +0,0 @@ -"""SQLite utility functions for CodexLens storage layer.""" - -from __future__ import annotations - -import logging -import sqlite3 - -log = logging.getLogger(__name__) - - -def check_trigram_support(conn: sqlite3.Connection) -> bool: - """Check if SQLite supports trigram tokenizer for FTS5. - - Trigram tokenizer requires SQLite >= 3.34.0. - - Args: - conn: Database connection to test - - Returns: - True if trigram tokenizer is available, False otherwise - """ - try: - # Test by creating a temporary virtual table with trigram tokenizer - conn.execute( - """ - CREATE VIRTUAL TABLE IF NOT EXISTS test_trigram_check - USING fts5(test_content, tokenize='trigram') - """ - ) - # Clean up test table - conn.execute("DROP TABLE IF EXISTS test_trigram_check") - conn.commit() - return True - except sqlite3.OperationalError as e: - # Trigram tokenizer not available - if "unrecognized tokenizer" in str(e).lower(): - log.debug("Trigram tokenizer not available in this SQLite version") - return False - # Other operational errors should be re-raised - raise - except Exception: - # Any other exception means trigram is not supported - return False - - -def get_sqlite_version(conn: sqlite3.Connection) -> tuple[int, int, int]: - """Get SQLite version as (major, minor, patch) tuple. - - Args: - conn: Database connection - - Returns: - Version tuple, e.g., (3, 34, 1) - """ - row = conn.execute("SELECT sqlite_version()").fetchone() - version_str = row[0] if row else "0.0.0" - parts = version_str.split('.') - try: - major = int(parts[0]) if len(parts) > 0 else 0 - minor = int(parts[1]) if len(parts) > 1 else 0 - patch = int(parts[2]) if len(parts) > 2 else 0 - return (major, minor, patch) - except (ValueError, IndexError): - return (0, 0, 0) diff --git a/codex-lens/src/codexlens/storage/vector_meta_store.py b/codex-lens/src/codexlens/storage/vector_meta_store.py deleted file mode 100644 index bd466a60..00000000 --- a/codex-lens/src/codexlens/storage/vector_meta_store.py +++ /dev/null @@ -1,415 +0,0 @@ -"""Central storage for vector metadata. - -This module provides a centralized SQLite database for storing chunk metadata -associated with centralized vector indexes. Instead of traversing all _index.db -files to fetch chunk metadata, this provides O(1) lookup by chunk ID. -""" - -from __future__ import annotations - -import json -import logging -import sqlite3 -import threading -from pathlib import Path -from typing import Any, Dict, List, Optional - -from codexlens.errors import StorageError - -logger = logging.getLogger(__name__) - - -class VectorMetadataStore: - """Store and retrieve chunk metadata for centralized vector search. - - This class provides efficient storage and retrieval of chunk metadata - for the centralized vector index architecture. All chunk metadata is - stored in a single _vectors_meta.db file at the project root, enabling - fast lookups without traversing multiple _index.db files. - - Schema: - chunk_metadata: - - chunk_id: INTEGER PRIMARY KEY - Global chunk ID - - file_path: TEXT NOT NULL - Path to source file - - content: TEXT - Chunk text content - - start_line: INTEGER - Start line in source file - - end_line: INTEGER - End line in source file - - category: TEXT - Content category (code/doc) - - metadata: TEXT - JSON-encoded additional metadata - - source_index_db: TEXT - Path to source _index.db file - """ - - def __init__(self, db_path: Path | str) -> None: - """Initialize VectorMetadataStore. - - Args: - db_path: Path to SQLite database file. - """ - self.db_path = Path(db_path) - self.db_path.parent.mkdir(parents=True, exist_ok=True) - - # Thread-safe connection management - self._lock = threading.RLock() - self._local = threading.local() - - def _get_connection(self) -> sqlite3.Connection: - """Get or create a thread-local database connection. - - Each thread gets its own connection to ensure thread safety. - """ - conn = getattr(self._local, "conn", None) - if conn is None: - conn = sqlite3.connect( - str(self.db_path), - timeout=30.0, - check_same_thread=True, - ) - conn.row_factory = sqlite3.Row - conn.execute("PRAGMA journal_mode=WAL") - conn.execute("PRAGMA synchronous=NORMAL") - conn.execute("PRAGMA mmap_size=1073741824") # 1GB mmap - self._local.conn = conn - return conn - - def _ensure_schema(self) -> None: - """Create tables if they don't exist.""" - with self._lock: - conn = self._get_connection() - try: - conn.execute(''' - CREATE TABLE IF NOT EXISTS chunk_metadata ( - chunk_id INTEGER PRIMARY KEY, - file_path TEXT NOT NULL, - content TEXT, - start_line INTEGER, - end_line INTEGER, - category TEXT, - metadata TEXT, - source_index_db TEXT - ) - ''') - conn.execute( - 'CREATE INDEX IF NOT EXISTS idx_chunk_file_path ' - 'ON chunk_metadata(file_path)' - ) - conn.execute( - 'CREATE INDEX IF NOT EXISTS idx_chunk_category ' - 'ON chunk_metadata(category)' - ) - # Binary vectors table for cascade search - conn.execute(''' - CREATE TABLE IF NOT EXISTS binary_vectors ( - chunk_id INTEGER PRIMARY KEY, - vector BLOB NOT NULL - ) - ''') - conn.commit() - logger.debug("VectorMetadataStore schema created/verified") - except sqlite3.Error as e: - raise StorageError( - f"Failed to create schema: {e}", - db_path=str(self.db_path), - operation="_ensure_schema" - ) from e - - def add_chunk( - self, - chunk_id: int, - file_path: str, - content: str, - start_line: Optional[int] = None, - end_line: Optional[int] = None, - category: Optional[str] = None, - metadata: Optional[Dict[str, Any]] = None, - source_index_db: Optional[str] = None, - ) -> None: - """Add a single chunk's metadata. - - Args: - chunk_id: Global unique chunk ID. - file_path: Path to source file. - content: Chunk text content. - start_line: Start line in source file. - end_line: End line in source file. - category: Content category (code/doc). - metadata: Additional metadata dictionary. - source_index_db: Path to source _index.db file. - """ - with self._lock: - conn = self._get_connection() - try: - metadata_json = json.dumps(metadata) if metadata else None - conn.execute( - ''' - INSERT OR REPLACE INTO chunk_metadata - (chunk_id, file_path, content, start_line, end_line, - category, metadata, source_index_db) - VALUES (?, ?, ?, ?, ?, ?, ?, ?) - ''', - (chunk_id, file_path, content, start_line, end_line, - category, metadata_json, source_index_db) - ) - conn.commit() - except sqlite3.Error as e: - raise StorageError( - f"Failed to add chunk {chunk_id}: {e}", - db_path=str(self.db_path), - operation="add_chunk" - ) from e - - def add_chunks(self, chunks: List[Dict[str, Any]]) -> None: - """Batch insert chunk metadata. - - Args: - chunks: List of dictionaries with keys: - - chunk_id (required): Global unique chunk ID - - file_path (required): Path to source file - - content: Chunk text content - - start_line: Start line in source file - - end_line: End line in source file - - category: Content category (code/doc) - - metadata: Additional metadata dictionary - - source_index_db: Path to source _index.db file - """ - if not chunks: - return - - with self._lock: - conn = self._get_connection() - try: - batch_data = [] - for chunk in chunks: - metadata = chunk.get("metadata") - metadata_json = json.dumps(metadata) if metadata else None - batch_data.append(( - chunk["chunk_id"], - chunk["file_path"], - chunk.get("content"), - chunk.get("start_line"), - chunk.get("end_line"), - chunk.get("category"), - metadata_json, - chunk.get("source_index_db"), - )) - - conn.executemany( - ''' - INSERT OR REPLACE INTO chunk_metadata - (chunk_id, file_path, content, start_line, end_line, - category, metadata, source_index_db) - VALUES (?, ?, ?, ?, ?, ?, ?, ?) - ''', - batch_data - ) - conn.commit() - logger.debug("Batch inserted %d chunk metadata records", len(chunks)) - except sqlite3.Error as e: - raise StorageError( - f"Failed to batch insert chunks: {e}", - db_path=str(self.db_path), - operation="add_chunks" - ) from e - - def get_chunks_by_ids( - self, - chunk_ids: List[int], - category: Optional[str] = None, - ) -> List[Dict[str, Any]]: - """Retrieve chunks by their IDs - the key optimization. - - This is the primary method that replaces traversing all _index.db files. - Provides O(1) lookup by chunk ID instead of O(n) where n is the number - of index databases. - - Args: - chunk_ids: List of chunk IDs to retrieve. - category: Optional category filter ('code' or 'doc'). - - Returns: - List of dictionaries with chunk metadata: - - chunk_id: Global chunk ID - - file_path: Path to source file - - content: Chunk text content - - start_line: Start line in source file - - end_line: End line in source file - - category: Content category - - metadata: Parsed metadata dictionary - - source_index_db: Source _index.db path - """ - if not chunk_ids: - return [] - - # No lock needed for reads: WAL mode + thread-local connections ensure safety - conn = self._get_connection() - try: - placeholders = ",".join("?" * len(chunk_ids)) - - if category: - query = f''' - SELECT chunk_id, file_path, content, start_line, end_line, - category, metadata, source_index_db - FROM chunk_metadata - WHERE chunk_id IN ({placeholders}) AND category = ? - ''' - params = list(chunk_ids) + [category] - else: - query = f''' - SELECT chunk_id, file_path, content, start_line, end_line, - category, metadata, source_index_db - FROM chunk_metadata - WHERE chunk_id IN ({placeholders}) - ''' - params = list(chunk_ids) - - rows = conn.execute(query, params).fetchall() - - results = [] - for row in rows: - metadata = None - if row["metadata"]: - try: - metadata = json.loads(row["metadata"]) - except json.JSONDecodeError: - metadata = {} - - results.append({ - "chunk_id": row["chunk_id"], - "file_path": row["file_path"], - "content": row["content"], - "start_line": row["start_line"], - "end_line": row["end_line"], - "category": row["category"], - "metadata": metadata or {}, - "source_index_db": row["source_index_db"], - }) - - return results - - except sqlite3.Error as e: - logger.error("Failed to get chunks by IDs: %s", e) - return [] - - def get_chunk_count(self) -> int: - """Get total number of chunks in store. - - Returns: - Total chunk count. - """ - # No lock needed for reads: WAL mode + thread-local connections ensure safety - conn = self._get_connection() - try: - row = conn.execute( - "SELECT COUNT(*) FROM chunk_metadata" - ).fetchone() - return row[0] if row else 0 - except sqlite3.Error: - return 0 - - def clear(self) -> None: - """Clear all metadata.""" - with self._lock: - conn = self._get_connection() - try: - conn.execute("DELETE FROM chunk_metadata") - conn.commit() - logger.info("Cleared all chunk metadata") - except sqlite3.Error as e: - raise StorageError( - f"Failed to clear metadata: {e}", - db_path=str(self.db_path), - operation="clear" - ) from e - - def close(self) -> None: - """Close database connection.""" - with self._lock: - conn = getattr(self._local, "conn", None) - if conn is not None: - conn.close() - self._local.conn = None - - def __enter__(self) -> "VectorMetadataStore": - """Context manager entry.""" - self._ensure_schema() - return self - - def __exit__(self, exc_type, exc_val, exc_tb) -> None: - """Context manager exit.""" - self.close() - - # ============= Binary Vector Methods for Cascade Search ============= - - def add_binary_vectors( - self, chunk_ids: List[int], binary_vectors: List[bytes] - ) -> None: - """Batch insert binary vectors for cascade search. - - Args: - chunk_ids: List of chunk IDs. - binary_vectors: List of packed binary vectors (as bytes). - """ - if not chunk_ids or len(chunk_ids) != len(binary_vectors): - return - - with self._lock: - conn = self._get_connection() - try: - data = list(zip(chunk_ids, binary_vectors)) - conn.executemany( - "INSERT OR REPLACE INTO binary_vectors (chunk_id, vector) VALUES (?, ?)", - data - ) - conn.commit() - logger.debug("Added %d binary vectors", len(chunk_ids)) - except sqlite3.Error as e: - raise StorageError( - f"Failed to add binary vectors: {e}", - db_path=str(self.db_path), - operation="add_binary_vectors" - ) from e - - def get_all_binary_vectors(self) -> List[tuple]: - """Get all binary vectors for cascade search. - - Returns: - List of (chunk_id, vector_bytes) tuples. - """ - conn = self._get_connection() - try: - rows = conn.execute( - "SELECT chunk_id, vector FROM binary_vectors" - ).fetchall() - return [(row[0], row[1]) for row in rows] - except sqlite3.Error as e: - logger.error("Failed to get binary vectors: %s", e) - return [] - - def get_binary_vector_count(self) -> int: - """Get total number of binary vectors. - - Returns: - Binary vector count. - """ - conn = self._get_connection() - try: - row = conn.execute( - "SELECT COUNT(*) FROM binary_vectors" - ).fetchone() - return row[0] if row else 0 - except sqlite3.Error: - return 0 - - def clear_binary_vectors(self) -> None: - """Clear all binary vectors.""" - with self._lock: - conn = self._get_connection() - try: - conn.execute("DELETE FROM binary_vectors") - conn.commit() - logger.info("Cleared all binary vectors") - except sqlite3.Error as e: - raise StorageError( - f"Failed to clear binary vectors: {e}", - db_path=str(self.db_path), - operation="clear_binary_vectors" - ) from e diff --git a/codex-lens/src/codexlens/tools/__init__.py b/codex-lens/src/codexlens/tools/__init__.py deleted file mode 100644 index cb60ab7e..00000000 --- a/codex-lens/src/codexlens/tools/__init__.py +++ /dev/null @@ -1,226 +0,0 @@ -"""DeepWiki document generation tools. - -This module provides tools for generating documentation from source code. -""" - -from __future__ import annotations - -import hashlib -import logging -import os -from pathlib import Path -from typing import Dict, List, Optional, Protocol, Any - -from codexlens.errors import StorageError -from codexlens.indexing.symbol_extractor import SymbolExtractor -from codexlens.parsers.factory import ParserFactory -from codexlens.storage.deepwiki_models import DeepWikiSymbol -from codexlens.storage.deepwiki_store import DeepWikiStore - -logger = logging.getLogger(__name__) - - -# Default timeout for AI generation (30 seconds) -AI_TIMEOUT = 30 -# HTML metadata markers for documentation -SYMBOL_START_MARKER = '' -SYMBOL_END_MARKER = "" - - -class MarkdownGenerator(Protocol): - """Protocol for generating Markdown documentation.""" - - def generate(self, symbol: DeepWikiSymbol, source_code: str) -> str: - """Generate Markdown documentation for a symbol. - - Args: - symbol: The symbol information - source_code: The source code content - - Returns: - Generated Markdown documentation - """ - pass - - -class MockMarkdownGenerator(MarkdownGenerator): - """Mock Markdown generator for testing.""" - - def generate(self, symbol: DeepWikiSymbol, source_code: str) -> str: - """Generate mock Markdown documentation.""" - return f"# {symbol.name}\\n\\n## {symbol.type}\\n\\n```\\n{source_code}\\n```" - - -class DeepWikiGenerator: - """Main generator for DeepWiki documentation. - - Scans source code, generates documentation with incremental updates - using SHA256 hashes for change detection. - """ - - DEFAULT_DB_PATH = DeepWikiStore.DEFAULT_DB_PATH - SUPPORTED_EXTENSIONS = [ - ".py", - ".ts", - ".tsx", - ".js", - ".jsx", - ".java", - ".go", - ".rs", - ".swift", - ] - AI_TIMEOUT: int = 30 # Timeout for AI generation - MAX_SYMBOLS_PER_FILE: int = 100 # Batch size for processing large files - - def __init__( - self, - db_path: Path | None = None, - store: DeepWikiStore | None = None, - markdown_generator: MarkdownGenerator | None = None, - max_symbols_per_file: int = 100, - ai_timeout: int = 30, - ) -> None: - """ - Initializes the DeepWikiGenerator. - """ - if store: - self.store = store - else: - self.store = DeepWikiStore(db_path or self.DEFAULT_DB_PATH) - - if markdown_generator: - self.markdown_generator = markdown_generator - else: - logger.debug("No markdown generator provided, using mock") - self.markdown_generator = MockMarkdownGenerator() - - self._extractor = SymbolExtractor() - self.max_symbols_per_file = max_symbols_per_file - self.ai_timeout = ai_timeout - self._docs_dir = Path("docs") # Default docs directory - - def _calculate_file_hash(self, file_path: Path) -> str: - """Calculate SHA256 hash of file content.""" - try: - content = file_path.read_bytes() - hash_obj = hashlib.sha256(content) - return hash_obj.hexdigest() - except IOError as e: - logger.error(f"Error reading file for hash calculation: {file_path}: {e}") - return "" - - def _get_language(self, file_path: Path) -> str | None: - """Determine language from file extension.""" - ext = file_path.suffix.lower() - if ext not in self.SUPPORTED_EXTENSIONS: - logger.debug(f"Unsupported file extension: {file_path}, skipping file") - return None - - language_map = { - ".py": "Python", - ".ts": "TypeScript", - ".tsx": "TypeScript React", - ".js": "JavaScript", - ".jsx": "JavaScript React", - ".java": "Java", - ".go": "Go", - ".rs": "Rust", - ".swift": "Swift", - } - return language_map.get(ext) - - def _should_process_file(self, file_path: Path, force: bool) -> bool: - """Check if a file should be processed based on hash.""" - if force: - return True - new_hash = self._calculate_file_hash(file_path) - if not new_hash: - return False - - existing_file = self.store.get_file(str(file_path)) - if existing_file and existing_file.content_hash == new_hash: - logger.debug(f"File unchanged: {file_path}. Skipping (hash match)") - return False - return True - - def _generate_markdown_for_symbol(self, symbol: DeepWikiSymbol, source_code: str) -> str: - """Generate markdown and wrap it with markers.""" - markdown_content = self.markdown_generator.generate(symbol, source_code) - return f"{SYMBOL_START_MARKER.format(symbol_name=symbol.name)}\\n{markdown_content}\\n{SYMBOL_END_MARKER}" - - def run(self, path: str, output_dir: Optional[str] = None, force: bool = False) -> Dict[str, Any]: - """ - Initialize DeepWiki store and generator, and scan the source. - """ - source_root = Path(path) - if output_dir: - self._docs_dir = Path(output_dir) - - stats = { - "total_files": 0, - "total_symbols": 0, - "total_changed_files": 0, - "total_changed_symbols": 0, - "total_docs_generated": 0, - "total_unchanged_files": 0, - } - - files_to_process = [p for p in source_root.rglob("*") if p.is_file() and p.suffix in self.SUPPORTED_EXTENSIONS] - stats["total_files"] = len(files_to_process) - - changed_files_count = 0 - unchanged_files_count = 0 - - for file_path in files_to_process: - if not self._should_process_file(file_path, force): - unchanged_files_count += 1 - continue - - changed_files_count += 1 - try: - source_code = file_path.read_text("utf-8") - symbols = self._extractor.extract_symbols(source_code, file_path.suffix, str(file_path)) - - if not symbols: - logger.debug(f"No symbols found in {file_path}") - continue - - logger.debug(f"Found {len(symbols)} symbols in {file_path}") - stats["total_symbols"] += len(symbols) - docs_generated_count = 0 - - for symbol in symbols: - # Generate documentation - doc_content = self._generate_markdown_for_symbol(symbol, source_code) - - # Define doc path - relative_path = file_path.relative_to(source_root) - doc_path = (self._docs_dir / relative_path).with_suffix(".md") - doc_path.parent.mkdir(parents=True, exist_ok=True) - - # Save symbol and doc - self.store.save_symbol(symbol, str(doc_path), doc_content) - docs_generated_count += 1 - - stats["total_docs_generated"] += docs_generated_count - stats["total_changed_symbols"] += len(symbols) - - # Update file stats in DB - content_hash = self._calculate_file_hash(file_path) - self.store.update_file_stats(str(file_path), len(symbols), content_hash) - logger.debug(f"Generated docs for {len(symbols)} symbols in {file_path}") - - except Exception as e: - logger.error(f"Error processing file {file_path}: {e}") - raise StorageError(f"Failed to process {file_path}") from e - - stats["total_changed_files"] = changed_files_count - stats["total_unchanged_files"] = unchanged_files_count - - logger.info(f"Generation complete. Stats: {stats}") - return stats - - def close(self): - """Close the store connection.""" - self.store.close() diff --git a/codex-lens/src/codexlens/tools/deepwiki_generator.py b/codex-lens/src/codexlens/tools/deepwiki_generator.py deleted file mode 100644 index e8a33227..00000000 --- a/codex-lens/src/codexlens/tools/deepwiki_generator.py +++ /dev/null @@ -1,1067 +0,0 @@ -"""DeepWiki document generation tools. - -This module provides tools for generating documentation from source code. -""" - -from __future__ import annotations - -import hashlib -import logging -import shlex -import signal -import subprocess -import sys -import threading -import time -from dataclasses import dataclass, field -from pathlib import Path -from typing import List, Dict, Optional, Protocol, Any, Tuple, Set - -from codexlens.storage.deepwiki_store import DeepWikiStore -from codexlens.storage.deepwiki_models import DeepWikiSymbol, DeepWikiFile, DeepWikiDoc - -logger = logging.getLogger(__name__) - -# HTML metadata markers for documentation -SYMBOL_START_TEMPLATE = '' -SYMBOL_END_MARKER = "" - - -class MarkdownGenerator(Protocol): - """Protocol for generating Markdown documentation.""" - - def generate(self, symbol: DeepWikiSymbol, source_code: str) -> str: - """Generate Markdown documentation for a symbol.""" - ... - - -class MockMarkdownGenerator: - """Mock Markdown generator for testing.""" - - def generate(self, symbol: DeepWikiSymbol, source_code: str) -> str: - """Generate mock Markdown documentation.""" - start_line, end_line = symbol.line_range - return f"""{SYMBOL_START_TEMPLATE.format(name=symbol.name, type=symbol.type)} - -## `{symbol.name}` - -**Type**: {symbol.type} -**Location**: `{symbol.source_file}:{start_line}-{end_line}` - -```{symbol.source_file.split('.')[-1] if '.' in symbol.source_file else 'text'} -{source_code} -``` - -{SYMBOL_END_MARKER} -""" - - -class DeepWikiGenerator: - """Main generator for DeepWiki documentation. - - Scans source code, generates documentation with incremental updates - using SHA256 hashes for change detection. - """ - - SUPPORTED_EXTENSIONS = [".py", ".ts", ".tsx", ".js", ".jsx", ".java", ".go", ".rs", ".swift"] - - def __init__( - self, - store: DeepWikiStore | None = None, - markdown_generator: MarkdownGenerator | None = None, - ) -> None: - """Initialize the generator. - - Args: - store: DeepWiki storage instance - markdown_generator: Markdown generator for documentation - """ - self.store = store or DeepWikiStore() - self.markdown_generator = markdown_generator or MockMarkdownGenerator() - - def calculate_file_hash(self, file_path: Path) -> str: - """Calculate SHA256 hash of a file. - - Args: - file_path: Path to the source file - - Returns: - SHA256 hash string - """ - content = file_path.read_bytes() - return hashlib.sha256(content).hexdigest() - - def _should_process_file(self, file_path: Path) -> bool: - """Check if a file should be processed based on extension.""" - return file_path.suffix.lower() in self.SUPPORTED_EXTENSIONS - - def _extract_symbols_simple(self, file_path: Path) -> List[Dict[str, Any]]: - """Extract symbols from a file using simple regex patterns. - - Args: - file_path: Path to the source file - - Returns: - List of symbol dictionaries - """ - import re - - content = file_path.read_text(encoding="utf-8", errors="ignore") - lines = content.split("\n") - symbols = [] - - # Python patterns - py_patterns = [ - (r"^(\s*)def\s+(\w+)\s*\(", "function"), - (r"^(\s*)async\s+def\s+(\w+)\s*\(", "async_function"), - (r"^(\s*)class\s+(\w+)", "class"), - ] - - # TypeScript/JavaScript patterns - ts_patterns = [ - (r"^(\s*)function\s+(\w+)\s*\(", "function"), - (r"^(\s*)const\s+(\w+)\s*=\s*(?:async\s*)?\(", "function"), - (r"^(\s*)export\s+(?:async\s+)?function\s+(\w+)", "function"), - (r"^(\s*)class\s+(\w+)", "class"), - (r"^(\s*)interface\s+(\w+)", "interface"), - ] - - all_patterns = py_patterns + ts_patterns - - for i, line in enumerate(lines, 1): - for pattern, symbol_type in all_patterns: - match = re.match(pattern, line) - if match: - name = match.group(2) - # Find end line (simple heuristic: next def/class or EOF) - end_line = i - for j in range(i, min(i + 50, len(lines) + 1)): - if j > i: - for p, _ in all_patterns: - if re.match(p, lines[j - 1]) and not lines[j - 1].startswith(match.group(1)): - end_line = j - 1 - break - else: - continue - break - else: - end_line = min(i + 30, len(lines)) - - symbols.append({ - "name": name, - "type": symbol_type, - "line_start": i, - "line_end": end_line, - "source": "\n".join(lines[i - 1:end_line]), - }) - break - - return symbols - - def generate_for_file(self, file_path: Path) -> Dict[str, Any]: - """Generate documentation for a single file. - - Args: - file_path: Path to the source file - - Returns: - Generation result dictionary - """ - if not self._should_process_file(file_path): - return {"skipped": True, "reason": "unsupported_extension"} - - # Calculate hash and check for changes - current_hash = self.calculate_file_hash(file_path) - existing_file = self.store.get_file(str(file_path)) - - if existing_file and existing_file.content_hash == current_hash: - logger.debug(f"File unchanged: {file_path}") - return {"skipped": True, "reason": "unchanged", "hash": current_hash} - - # Extract symbols - raw_symbols = self._extract_symbols_simple(file_path) - - if not raw_symbols: - logger.debug(f"No symbols found in: {file_path}") - return {"skipped": True, "reason": "no_symbols", "hash": current_hash} - - # Generate documentation for each symbol - docs_generated = 0 - for sym in raw_symbols: - # Create symbol record - symbol = DeepWikiSymbol( - name=sym["name"], - type=sym["type"], - source_file=str(file_path), - doc_file=f".deepwiki/{file_path.stem}.md", - anchor=f"#{sym['name'].lower()}", - line_range=(sym["line_start"], sym["line_end"]), - ) - - # Generate markdown - markdown = self.markdown_generator.generate(symbol, sym["source"]) - - # Save to store - self.store.add_symbol(symbol) - docs_generated += 1 - - # Track file hash + metadata for incremental updates and staleness checks. - self.store.add_file( - file_path=str(file_path), - content_hash=current_hash, - symbols_count=len(raw_symbols), - docs_generated=docs_generated > 0, - ) - - logger.info(f"Generated docs for {docs_generated} symbols in {file_path}") - return { - "symbols": len(raw_symbols), - "docs_generated": docs_generated, - "hash": current_hash, - } - - def run(self, path: Path) -> Dict[str, Any]: - """Run documentation generation for a path. - - Args: - path: File or directory path to process - - Returns: - Generation summary - """ - path = Path(path) - - if path.is_file(): - files = [path] - elif path.is_dir(): - files = [] - for ext in self.SUPPORTED_EXTENSIONS: - files.extend(path.rglob(f"*{ext}")) - else: - raise ValueError(f"Path not found: {path}") - - results = { - "total_files": 0, - "processed_files": 0, - "skipped_files": 0, - "total_symbols": 0, - "docs_generated": 0, - } - - for file_path in files: - results["total_files"] += 1 - result = self.generate_for_file(file_path) - - if result.get("skipped"): - results["skipped_files"] += 1 - else: - results["processed_files"] += 1 - results["total_symbols"] += result.get("symbols", 0) - results["docs_generated"] += result.get("docs_generated", 0) - - logger.info( - f"DeepWiki generation complete: " - f"{results['processed_files']}/{results['total_files']} files, " - f"{results['docs_generated']} docs generated" - ) - - return results - - -# ============================================================================= -# TASK-002: LLMMarkdownGenerator Core Class -# ============================================================================= - -@dataclass -class GenerationResult: - """Result of a documentation generation attempt.""" - success: bool - content: Optional[str] = None - tool: Optional[str] = None - attempts: int = 0 - error: Optional[str] = None - symbol: Optional[DeepWikiSymbol] = None - - -@dataclass -class GeneratorConfig: - """Configuration for LLM generator.""" - max_concurrent: int = 4 - batch_size: int = 4 - graceful_shutdown: bool = True - - -# Tool fallback chains: primary -> secondary -> tertiary -TOOL_CHAIN: Dict[str, List[str]] = { - "gemini": ["gemini", "qwen", "codex"], - "qwen": ["qwen", "gemini", "codex"], - "codex": ["codex", "gemini", "qwen"], -} - -# Layer-based timeout settings (seconds) -TOOL_TIMEOUTS: Dict[str, Dict[str, int]] = { - "gemini": {"layer3": 120, "layer2": 60, "layer1": 30}, - "qwen": {"layer3": 90, "layer2": 45, "layer1": 20}, - "codex": {"layer3": 180, "layer2": 90, "layer1": 45}, -} - -# Required sections per layer for validation -REQUIRED_SECTIONS: Dict[int, List[str]] = { - 3: ["Description", "Parameters", "Returns", "Example"], - 2: ["Description", "Returns"], - 1: ["Description"], -} - - -class LLMMarkdownGenerator: - """LLM-powered Markdown generator with tool fallback and retry logic. - - Implements the MarkdownGenerator protocol with: - - Tool fallback chain (gemini -> qwen -> codex) - - Layer-based timeouts - - SHA256 incremental updates - - Structure validation - """ - - def __init__( - self, - primary_tool: str = "gemini", - db: DeepWikiStore | None = None, - force_mode: bool = False, - progress_tracker: Optional[Any] = None, - ) -> None: - """Initialize LLM generator. - - Args: - primary_tool: Primary LLM tool to use (gemini/qwen/codex). - db: DeepWikiStore instance for progress tracking. - force_mode: If True, regenerate all docs regardless of hash. - progress_tracker: Optional ProgressTracker for timeout alerts. - """ - self.primary_tool = primary_tool - self.db = db or DeepWikiStore() - self.force_mode = force_mode - self.progress_tracker = progress_tracker - self._ensure_db_initialized() - - def _ensure_db_initialized(self) -> None: - """Ensure database is initialized.""" - try: - self.db.initialize() - except Exception: - pass # Already initialized - - def _classify_layer(self, symbol: DeepWikiSymbol) -> int: - """Classify symbol into layer (1, 2, or 3). - - Layer 3: class, function, async_function, interface (detailed docs) - Layer 2: method, property (compact docs) - Layer 1: variable, constant (minimal docs) - """ - symbol_type = symbol.type.lower() - if symbol_type in ("class", "function", "async_function", "interface"): - return 3 - elif symbol_type in ("method", "property"): - return 2 - else: - return 1 - - def _build_prompt(self, symbol: DeepWikiSymbol, source_code: str, layer: int) -> str: - """Build LLM prompt based on symbol layer. - - Args: - symbol: Symbol to document. - source_code: Source code of the symbol. - layer: Layer (1, 2, or 3) determining prompt template. - - Returns: - Prompt string for the LLM. - """ - file_ext = Path(symbol.source_file).suffix.lstrip(".") - - if layer == 3: - # Full documentation template - return f"""Generate comprehensive Markdown documentation for this code symbol. - -## Symbol Information -- Name: {symbol.name} -- Type: {symbol.type} -- File: {symbol.source_file} -- Lines: {symbol.line_range[0]}-{symbol.line_range[1]} - -## Source Code -```{file_ext} -{source_code} -``` - -## Required Sections -Generate a Markdown document with these sections: -1. **Description** - Clear description of what this symbol does -2. **Parameters** - List all parameters with types and descriptions -3. **Returns** - What this symbol returns (if applicable) -4. **Example** - Code example showing usage - -Format the output as clean Markdown. Use code fences for code blocks.""" - - elif layer == 2: - # Compact documentation template - return f"""Generate compact Markdown documentation for this code symbol. - -## Symbol Information -- Name: {symbol.name} -- Type: {symbol.type} -- File: {symbol.source_file} - -## Source Code -```{file_ext} -{source_code} -``` - -## Required Sections -Generate a Markdown document with these sections: -1. **Description** - Brief description of this symbol's purpose -2. **Returns** - Return value description (if applicable) - -Keep it concise. Format as clean Markdown.""" - - else: - # Minimal documentation template (layer 1) - return f"""Generate minimal Markdown documentation for this code symbol. - -## Symbol Information -- Name: {symbol.name} -- Type: {symbol.type} - -## Source Code -```{file_ext} -{source_code} -``` - -## Required Sections -Generate a Markdown document with: -1. **Description** - One-line description of this symbol - -Keep it minimal. Format as clean Markdown.""" - - def _call_cli_with_timeout( - self, tool: str, prompt: str, timeout: int - ) -> str: - """Call LLM CLI tool with timeout. - - Args: - tool: CLI tool name (gemini/qwen/codex). - prompt: Prompt to send to the LLM. - timeout: Timeout in seconds. - - Returns: - Generated content string. - - Raises: - TimeoutError: If command times out. - RuntimeError: If command fails. - """ - # Build ccw cli command - escaped_prompt = prompt.replace('"', '\\"') - cmd = [ - "ccw", "cli", "-p", prompt, - "--tool", tool, - "--mode", "write", - ] - - try: - result = subprocess.run( - cmd, - capture_output=True, - text=True, - timeout=timeout, - cwd=str(Path.cwd()), - ) - - if result.returncode != 0: - raise RuntimeError(f"CLI failed: {result.stderr}") - - return result.stdout.strip() - - except subprocess.TimeoutExpired as exc: - raise TimeoutError( - f"Timeout after {timeout}s with {tool}" - ) from exc - - def _emit_timeout_alert( - self, symbol: DeepWikiSymbol, tool: str, timeout: int - ) -> None: - """Emit timeout alert to progress tracker and logs. - - Args: - symbol: Symbol that timed out. - tool: Tool that timed out. - timeout: Timeout duration in seconds. - """ - alert_msg = f"TIMEOUT: {symbol.name} ({symbol.source_file}) with {tool} after {timeout}s" - logger.warning(alert_msg) - - # Output to progress tracker if available - if self.progress_tracker: - self.progress_tracker.write_above(f"[WARNING] {alert_msg}") - - def validate_structure(self, content: str, layer: int) -> bool: - """Validate generated content has required structure. - - Args: - content: Generated markdown content. - layer: Layer (1, 2, or 3). - - Returns: - True if content passes validation, False otherwise. - """ - import re - - if not content or len(content.strip()) < 20: - return False - - required = REQUIRED_SECTIONS.get(layer, ["Description"]) - - for section in required: - # Match markdown headers (##, ###, **Bold**) or standalone section names - pattern = rf"^\s*(?:#{1,6}\s+|\*\*){re.escape(section)}" - if not re.search(pattern, content, re.IGNORECASE | re.MULTILINE): - return False - - return True - - def generate_with_retry( - self, symbol: DeepWikiSymbol, source_code: str - ) -> GenerationResult: - """Generate documentation with tool fallback chain. - - Strategy: Immediate tool fallback - - Tool A fails -> Immediately try Tool B - - All 3 tools fail -> Mark as failed - - Args: - symbol: Symbol to document. - source_code: Source code of the symbol. - - Returns: - GenerationResult with success status and content. - """ - tool_chain = TOOL_CHAIN.get(self.primary_tool, ["gemini", "qwen", "codex"]) - layer = self._classify_layer(symbol) - prompt = self._build_prompt(symbol, source_code, layer) - - symbol_key = f"{symbol.source_file}:{symbol.name}:{symbol.line_range[0]}" - last_error = None - - for attempt, tool in enumerate(tool_chain, 1): - timeout = TOOL_TIMEOUTS.get(tool, {}).get(f"layer{layer}", 60) - - try: - # Update progress - if self.db: - self.db.update_progress( - symbol_key, - { - "file_path": symbol.source_file, - "symbol_name": symbol.name, - "symbol_type": symbol.type, - "layer": layer, - "source_hash": hashlib.sha256(source_code.encode()).hexdigest(), - "status": "processing", - "attempts": attempt, - "last_tool": tool, - }, - ) - - result = self._call_cli_with_timeout(tool, prompt, timeout) - - if result and self.validate_structure(result, layer): - # Success - if self.db: - self.db.mark_completed(symbol_key, tool) - - return GenerationResult( - success=True, - content=result, - tool=tool, - attempts=attempt, - symbol=symbol, - ) - - # Invalid structure - last_error = f"Invalid structure from {tool}" - continue - - except TimeoutError: - self._emit_timeout_alert(symbol, tool, timeout) - last_error = f"Timeout after {timeout}s with {tool}" - continue - - except Exception as exc: - last_error = f"{type(exc).__name__}: {exc}" - continue - - # All tools failed - if self.db: - self.db.mark_failed(symbol_key, last_error or "All tools failed") - - return GenerationResult( - success=False, - content=None, - tool=None, - attempts=len(tool_chain), - error=last_error, - symbol=symbol, - ) - - def should_regenerate( - self, - symbol: DeepWikiSymbol, - source_code: str, - staleness_threshold: float = 0.7, - ) -> bool: - """Check if symbol needs regeneration. - - Conditions for regeneration: - 1. --force mode is enabled - 2. Symbol not in database (new) - 3. Source code hash changed - 4. Previous generation failed - 5. Staleness score exceeds threshold - - Args: - symbol: Symbol to check. - source_code: Source code of the symbol. - staleness_threshold: Score above which regeneration is triggered. - - Returns: - True if regeneration needed, False otherwise. - """ - if self.force_mode: - return True - - current_hash = hashlib.sha256(source_code.encode()).hexdigest() - symbol_key = f"{symbol.source_file}:{symbol.name}:{symbol.line_range[0]}" - - if self.db: - progress = self.db.get_progress(symbol_key) - - if not progress: - return True # New symbol - - if progress.get("source_hash") != current_hash: - return True # Code changed - - if progress.get("status") == "failed": - return True # Retry failed - - # Check staleness score from DeepWiki index - db_symbol = self.db.get_symbol(symbol.name, symbol.source_file) - if db_symbol and db_symbol.staleness_score >= staleness_threshold: - return True # Stale documentation - - return False # Skip - - def _fallback_generate( - self, symbol: DeepWikiSymbol, source_code: str - ) -> str: - """Fallback to Mock generation when all LLM tools fail. - - Args: - symbol: Symbol to document. - source_code: Source code of the symbol. - - Returns: - Mock-generated markdown content. - """ - mock = MockMarkdownGenerator() - return mock.generate(symbol, source_code) - - def generate(self, symbol: DeepWikiSymbol, source_code: str) -> str: - """Generate Markdown documentation (implements MarkdownGenerator protocol). - - Args: - symbol: Symbol to document. - source_code: Source code of the symbol. - - Returns: - Generated markdown content. - """ - result = self.generate_with_retry(symbol, source_code) - - if result.success and result.content: - return result.content - - # Fallback to mock on failure - return self._fallback_generate(symbol, source_code) - - -# ============================================================================= -# TASK-003: BatchProcessor + Graceful Interrupt -# TASK-004: ProgressTracker (rich progress bar) -# ============================================================================= - -class ProgressTracker: - """Progress tracker using rich progress bar. - - Shows real-time progress with: - - Progress bar: [=====> ] 120/500 (24%) eta: 5min - - Timeout alerts above progress bar - - Failure summary at completion - """ - - def __init__(self, total: int) -> None: - """Initialize progress tracker. - - Args: - total: Total number of symbols to process. - """ - self.total = total - self.completed = 0 - self.failed_symbols: List[Dict[str, Any]] = [] - self._lock = threading.Lock() - self._started = False - - # Lazy import rich to avoid dependency issues - try: - from rich.console import Console - from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TimeRemainingColumn - self._console = Console() - self._progress = Progress( - SpinnerColumn(), - TextColumn("[progress.description]{task.description}"), - BarColumn(), - TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), - TextColumn("({task.completed}/{task.total})"), - TimeRemainingColumn(), - console=self._console, - ) - self._task_id = None - self._rich_available = True - except ImportError: - self._rich_available = False - self._console = None - - def start(self) -> None: - """Start the progress bar.""" - if self._rich_available and self._progress: - self._progress.start() - self._task_id = self._progress.add_task( - "Generating docs", total=self.total - ) - self._started = True - - def update(self, symbol: DeepWikiSymbol, result: GenerationResult) -> None: - """Update progress after a symbol is processed. - - Args: - symbol: Processed symbol. - result: Generation result. - """ - with self._lock: - self.completed += 1 - - if self._rich_available and self._progress and self._task_id is not None: - self._progress.advance(self._task_id) - - if not result.success: - self.failed_symbols.append({ - "symbol": symbol.name, - "file": symbol.source_file, - "error": result.error or "Unknown error", - }) - - def write_above(self, message: str) -> None: - """Write message above the progress bar. - - Args: - message: Message to display. - """ - if self._rich_available and self._console: - self._console.print(message) - else: - print(message) - - def print_summary(self) -> None: - """Print final summary after all processing completes.""" - self.stop() - - success = self.completed - len(self.failed_symbols) - failed = len(self.failed_symbols) - - if self._rich_available and self._console: - self._console.print( - f"\n[bold]Generation complete:[/bold] " - f"[green]{success}/{self.completed}[/green] successful" - ) - - if self.failed_symbols: - self._console.print( - f"\n[bold red]Failed symbols ({failed}):[/bold red]" - ) - for item in self.failed_symbols: - self._console.print( - f" - [yellow]{item['symbol']}[/yellow] " - f"({item['file']}): {item['error']}" - ) - else: - print(f"\nGeneration complete: {success}/{self.completed} successful") - - if self.failed_symbols: - print(f"\nFailed symbols ({failed}):") - for item in self.failed_symbols: - print(f" - {item['symbol']} ({item['file']}): {item['error']}") - - def stop(self) -> None: - """Stop the progress bar.""" - if self._rich_available and self._progress and self._started: - self._progress.stop() - self._started = False - - -class BatchProcessor: - """Batch processor with concurrent execution and graceful interrupt. - - Features: - - ThreadPoolExecutor with configurable concurrency (default: 4) - - Signal handlers for Ctrl+C graceful interrupt - - Orphaned document cleanup - - Integration with ProgressTracker - """ - - def __init__( - self, - generator: LLMMarkdownGenerator, - config: GeneratorConfig | None = None, - ) -> None: - """Initialize batch processor. - - Args: - generator: LLM generator instance. - config: Generator configuration. - """ - self.generator = generator - self.config = config or GeneratorConfig() - self.shutdown_event = threading.Event() - self._executor = None - self._progress: Optional[ProgressTracker] = None - - def setup_signal_handlers(self) -> None: - """Set up signal handlers for graceful Ctrl+C interrupt.""" - def handle_sigint(signum: int, frame) -> None: - if self.shutdown_event.is_set(): - # Second Ctrl+C: force exit - print("\n[WARNING] Forced exit, progress may be lost") - sys.exit(1) - - # First Ctrl+C: graceful interrupt - print("\n[INFO] Completing current batch...") - self.shutdown_event.set() - - signal.signal(signal.SIGINT, handle_sigint) - - def process_batch( - self, symbols: List[Tuple[DeepWikiSymbol, str]] - ) -> List[GenerationResult]: - """Process a batch of symbols concurrently. - - Args: - symbols: List of (symbol, source_code) tuples. - - Returns: - List of GenerationResult for each symbol. - """ - from concurrent.futures import ThreadPoolExecutor, as_completed - - results: List[GenerationResult] = [] - futures = [] - - with ThreadPoolExecutor(max_workers=self.config.max_concurrent) as executor: - self._executor = executor - - for symbol, source_code in symbols: - if self.shutdown_event.is_set(): - break - - future = executor.submit( - self.generator.generate_with_retry, - symbol, - source_code, - ) - futures.append((symbol, future)) - - # Wait for all submitted tasks - for symbol, future in futures: - try: - result = future.result(timeout=300) # 5 min total timeout - results.append(result) - - if self._progress: - self._progress.update(symbol, result) - - except Exception as exc: - error_result = GenerationResult( - success=False, - error=str(exc), - symbol=symbol, - ) - results.append(error_result) - - if self._progress: - self._progress.update(symbol, error_result) - - return results - - def cleanup_orphaned_docs( - self, current_symbols: List[DeepWikiSymbol] - ) -> int: - """Clean up documents for symbols that no longer exist in source. - - Args: - current_symbols: List of current symbols in source code. - - Returns: - Number of orphaned documents removed. - """ - if not self.generator.db: - return 0 - - current_keys = { - f"{s.source_file}:{s.name}:{s.line_range[0]}" - for s in current_symbols - } - - stored_keys = self.generator.db.get_completed_symbol_keys() - orphaned_keys = list(stored_keys - current_keys) - - if orphaned_keys: - deleted = self.generator.db.delete_progress(orphaned_keys) - logger.info(f"Cleaned up {deleted} orphaned documents") - return deleted - - return 0 - - def run( - self, - path: Path, - tool: str = "gemini", - force: bool = False, - resume: bool = False, - ) -> Dict[str, Any]: - """Main entry point for batch processing. - - Flow: - 1. Scan source files - 2. Extract symbols - 3. SHA256 filter - 4. Layer sort (3 -> 2 -> 1) - 5. Batch process with concurrency - - Args: - path: File or directory path to process. - tool: Primary LLM tool to use. - force: Force regenerate all docs. - resume: Resume from previous interrupted run. - - Returns: - Processing summary dictionary. - """ - # Update generator settings - self.generator.primary_tool = tool - self.generator.force_mode = force - - # Setup signal handlers - if self.config.graceful_shutdown: - self.setup_signal_handlers() - - # Initialize database - self.generator._ensure_db_initialized() - - # Phase 1: Scan files - path = Path(path) - if path.is_file(): - files = [path] - elif path.is_dir(): - files = [] - for ext in DeepWikiGenerator.SUPPORTED_EXTENSIONS: - files.extend(path.rglob(f"*{ext}")) - else: - raise ValueError(f"Path not found: {path}") - - # Phase 2: Extract symbols - all_symbols: List[Tuple[DeepWikiSymbol, str]] = [] - temp_gen = DeepWikiGenerator(store=self.generator.db) - - for file_path in files: - raw_symbols = temp_gen._extract_symbols_simple(file_path) - - for sym in raw_symbols: - symbol = DeepWikiSymbol( - name=sym["name"], - symbol_type=sym["type"], - source_file=str(file_path), - doc_file=f".deepwiki/{file_path.stem}.md", - anchor=f"#{sym['name'].lower()}", - line_start=sym["line_start"], - line_end=sym["line_end"], - ) - all_symbols.append((symbol, sym["source"])) - - # Phase 3: SHA256 filter - symbols_to_process = [ - (s, c) for s, c in all_symbols - if self.generator.should_regenerate(s, c) - ] - - if not symbols_to_process: - logger.info("All symbols up to date, nothing to process") - return { - "total_symbols": len(all_symbols), - "processed": 0, - "skipped": len(all_symbols), - "success": 0, - "failed": 0, - } - - # Phase 4: Cleanup orphaned docs - current_symbols = [s for s, _ in all_symbols] - orphaned = self.cleanup_orphaned_docs(current_symbols) - - # Phase 5: Sort by layer (3 -> 2 -> 1) - symbols_to_process.sort( - key=lambda x: self.generator._classify_layer(x[0]), - reverse=True - ) - - # Phase 6: Initialize progress tracker - self._progress = ProgressTracker(total=len(symbols_to_process)) - self.generator.progress_tracker = self._progress - self._progress.start() - - # Phase 7: Batch process - all_results: List[GenerationResult] = [] - batch_size = self.config.batch_size - - for i in range(0, len(symbols_to_process), batch_size): - if self.shutdown_event.is_set(): - break - - batch = symbols_to_process[i:i + batch_size] - results = self.process_batch(batch) - all_results.extend(results) - - # Phase 8: Print summary - if self._progress: - self._progress.print_summary() - - # Calculate statistics - success_count = sum(1 for r in all_results if r.success) - failed_count = len(all_results) - success_count - - return { - "total_symbols": len(all_symbols), - "processed": len(all_results), - "skipped": len(all_symbols) - len(symbols_to_process), - "success": success_count, - "failed": failed_count, - "orphaned_cleaned": orphaned, - } diff --git a/codex-lens/src/codexlens/watcher/__init__.py b/codex-lens/src/codexlens/watcher/__init__.py deleted file mode 100644 index 4c095ec4..00000000 --- a/codex-lens/src/codexlens/watcher/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -"""File watcher module for real-time index updates.""" - -from .events import ChangeType, FileEvent, IndexResult, WatcherConfig, WatcherStats -from .file_watcher import FileWatcher -from .incremental_indexer import IncrementalIndexer -from .manager import WatcherManager - -__all__ = [ - "ChangeType", - "FileEvent", - "IndexResult", - "WatcherConfig", - "WatcherStats", - "FileWatcher", - "IncrementalIndexer", - "WatcherManager", -] diff --git a/codex-lens/src/codexlens/watcher/events.py b/codex-lens/src/codexlens/watcher/events.py deleted file mode 100644 index edb43787..00000000 --- a/codex-lens/src/codexlens/watcher/events.py +++ /dev/null @@ -1,82 +0,0 @@ -"""Event types for file watcher.""" - -from __future__ import annotations - -import time -from dataclasses import dataclass, field -from enum import Enum -from pathlib import Path -from typing import List, Optional, Set - - -class ChangeType(Enum): - """Type of file system change.""" - CREATED = "created" - MODIFIED = "modified" - DELETED = "deleted" - MOVED = "moved" - - -@dataclass -class FileEvent: - """A file system change event.""" - path: Path - change_type: ChangeType - timestamp: float - old_path: Optional[Path] = None # For MOVED events - - -@dataclass -class WatcherConfig: - """Configuration for file watcher.""" - debounce_ms: int = 60000 # Default 60 seconds for debounce - ignored_patterns: Set[str] = field(default_factory=lambda: { - # Version control - ".git", ".svn", ".hg", - # Python environments & cache - ".venv", "venv", "env", "__pycache__", ".pytest_cache", ".mypy_cache", ".ruff_cache", - # Node.js - "node_modules", "bower_components", ".npm", ".yarn", - # Build artifacts - "dist", "build", "out", "target", "bin", "obj", "_build", "coverage", "htmlcov", - # IDE & Editor - ".idea", ".vscode", ".vs", ".eclipse", - # CodexLens internal - ".codexlens", - # Package manager caches - ".cache", ".parcel-cache", ".turbo", ".next", ".nuxt", - # Logs & temp - "logs", "tmp", "temp", - }) - languages: Optional[List[str]] = None # None = all supported - - -@dataclass -class PendingQueueStatus: - """Status of pending file changes queue.""" - file_count: int = 0 - files: List[str] = field(default_factory=list) # Limited to 20 files - countdown_seconds: int = 0 - last_event_time: Optional[float] = None - - -@dataclass -class IndexResult: - """Result of processing file changes.""" - files_indexed: int = 0 - files_removed: int = 0 - symbols_added: int = 0 - symbols_removed: int = 0 - files_success: List[str] = field(default_factory=list) - files_failed: List[str] = field(default_factory=list) - errors: List[str] = field(default_factory=list) - timestamp: float = field(default_factory=time.time) - - -@dataclass -class WatcherStats: - """Runtime statistics for watcher.""" - files_watched: int = 0 - events_processed: int = 0 - last_event_time: Optional[float] = None - is_running: bool = False diff --git a/codex-lens/src/codexlens/watcher/file_watcher.py b/codex-lens/src/codexlens/watcher/file_watcher.py deleted file mode 100644 index 4fc50691..00000000 --- a/codex-lens/src/codexlens/watcher/file_watcher.py +++ /dev/null @@ -1,347 +0,0 @@ -"""File system watcher using watchdog library.""" - -from __future__ import annotations - -import logging -import threading -import time -from pathlib import Path -from typing import Callable, Dict, List, Optional - -from watchdog.observers import Observer -from watchdog.events import FileSystemEventHandler - -from .events import ChangeType, FileEvent, WatcherConfig, PendingQueueStatus -from ..config import Config - -logger = logging.getLogger(__name__) - -# Maximum queue size to prevent unbounded memory growth -# When exceeded, forces immediate flush to avoid memory exhaustion -MAX_QUEUE_SIZE = 50000 - - -class _CodexLensHandler(FileSystemEventHandler): - """Internal handler for watchdog events.""" - - def __init__( - self, - watcher: "FileWatcher", - on_event: Callable[[FileEvent], None], - ) -> None: - super().__init__() - self._watcher = watcher - self._on_event = on_event - - def on_created(self, event) -> None: - if event.is_directory: - return - self._emit(event.src_path, ChangeType.CREATED) - - def on_modified(self, event) -> None: - if event.is_directory: - return - self._emit(event.src_path, ChangeType.MODIFIED) - - def on_deleted(self, event) -> None: - if event.is_directory: - return - self._emit(event.src_path, ChangeType.DELETED) - - def on_moved(self, event) -> None: - if event.is_directory: - return - self._emit(event.dest_path, ChangeType.MOVED, old_path=event.src_path) - - def _emit( - self, - path: str, - change_type: ChangeType, - old_path: Optional[str] = None, - ) -> None: - path_obj = Path(path) - - # Filter out files that should not be indexed - if not self._watcher._should_index_file(path_obj): - return - - event = FileEvent( - path=path_obj, - change_type=change_type, - timestamp=time.time(), - old_path=Path(old_path) if old_path else None, - ) - self._on_event(event) - - -class FileWatcher: - """File system watcher for monitoring directory changes. - - Uses watchdog library for cross-platform file system monitoring. - Events are forwarded to the on_changes callback. - - Example: - def handle_changes(events: List[FileEvent]) -> None: - for event in events: - print(f"{event.change_type}: {event.path}") - - watcher = FileWatcher(Path("."), WatcherConfig(), handle_changes) - watcher.start() - watcher.wait() # Block until stopped - """ - - def __init__( - self, - root_path: Path, - config: WatcherConfig, - on_changes: Callable[[List[FileEvent]], None], - ) -> None: - """Initialize file watcher. - - Args: - root_path: Directory to watch recursively - config: Watcher configuration - on_changes: Callback invoked with batched events - """ - self.root_path = Path(root_path).resolve() - self.config = config - self.on_changes = on_changes - - self._observer: Optional[Observer] = None - self._running = False - self._stop_event = threading.Event() - self._lock = threading.RLock() - - # Event queue for batching - self._event_queue: List[FileEvent] = [] - self._queue_lock = threading.Lock() - - # Debounce timer (true debounce - waits after last event) - self._flush_timer: Optional[threading.Timer] = None - self._last_event_time: float = 0 - - # Queue change callbacks for real-time UI updates - self._queue_change_callbacks: List[Callable[[PendingQueueStatus], None]] = [] - - # Config instance for language checking - self._codexlens_config = Config() - - def _should_index_file(self, path: Path) -> bool: - """Check if file should be indexed based on extension and ignore patterns. - - Args: - path: File path to check - - Returns: - True if file should be indexed, False otherwise - """ - # Check against ignore patterns - parts = path.parts - for pattern in self.config.ignored_patterns: - if pattern in parts: - return False - - # Check extension against supported languages - language = self._codexlens_config.language_for_path(path) - return language is not None - - def _on_raw_event(self, event: FileEvent) -> None: - """Handle raw event from watchdog handler with true debounce.""" - force_flush = False - - with self._queue_lock: - # Check queue size limit to prevent memory exhaustion - if len(self._event_queue) >= MAX_QUEUE_SIZE: - logger.warning( - "Event queue limit (%d) reached, forcing immediate flush", - MAX_QUEUE_SIZE - ) - if self._flush_timer: - self._flush_timer.cancel() - self._flush_timer = None - force_flush = True - - self._event_queue.append(event) - self._last_event_time = time.time() - - # Cancel previous timer and schedule new one (true debounce) - # Skip if we're about to force flush - if not force_flush: - if self._flush_timer: - self._flush_timer.cancel() - - self._flush_timer = threading.Timer( - self.config.debounce_ms / 1000.0, - self._flush_events - ) - self._flush_timer.daemon = True - self._flush_timer.start() - - # Force flush outside lock to avoid deadlock - if force_flush: - self._flush_events() - - # Notify queue change (outside lock to avoid deadlock) - self._notify_queue_change() - - def _debounce_loop(self) -> None: - """Background thread for checking flush signal file.""" - signal_file = self.root_path / '.codexlens' / 'flush.signal' - while self._running: - time.sleep(1.0) # Check every second - # Check for flush signal file - if signal_file.exists(): - try: - signal_file.unlink() - logger.info("Flush signal detected, triggering immediate index") - self.flush_now() - except Exception as e: - logger.warning("Failed to handle flush signal: %s", e) - - def _flush_events(self) -> None: - """Flush queued events with deduplication.""" - with self._queue_lock: - if not self._event_queue: - return - - # Deduplicate: keep latest event per path - deduped: Dict[Path, FileEvent] = {} - for event in self._event_queue: - deduped[event.path] = event - - events = list(deduped.values()) - self._event_queue.clear() - self._last_event_time = 0 # Reset after flush - - # Notify queue cleared - self._notify_queue_change() - - if events: - try: - self.on_changes(events) - except Exception as exc: - logger.error("Error in on_changes callback: %s", exc) - - def flush_now(self) -> None: - """Immediately flush pending queue (manual trigger).""" - with self._queue_lock: - if self._flush_timer: - self._flush_timer.cancel() - self._flush_timer = None - self._flush_events() - - def get_pending_queue_status(self) -> PendingQueueStatus: - """Get current pending queue status for UI display.""" - with self._queue_lock: - file_count = len(self._event_queue) - files = [str(e.path.name) for e in self._event_queue[:20]] - - # Calculate countdown - if self._last_event_time > 0 and file_count > 0: - elapsed = time.time() - self._last_event_time - remaining = max(0, self.config.debounce_ms / 1000.0 - elapsed) - countdown = int(remaining) - else: - countdown = 0 - - return PendingQueueStatus( - file_count=file_count, - files=files, - countdown_seconds=countdown, - last_event_time=self._last_event_time if file_count > 0 else None - ) - - def register_queue_change_callback( - self, callback: Callable[[PendingQueueStatus], None] - ) -> None: - """Register callback for queue change notifications.""" - self._queue_change_callbacks.append(callback) - - def _notify_queue_change(self) -> None: - """Notify all registered callbacks of queue change.""" - status = self.get_pending_queue_status() - for callback in self._queue_change_callbacks: - try: - callback(status) - except Exception as e: - logger.error("Queue change callback error: %s", e) - - def start(self) -> None: - """Start watching the directory. - - Non-blocking. Use wait() to block until stopped. - """ - with self._lock: - if self._running: - logger.warning("Watcher already running") - return - - if not self.root_path.exists(): - raise ValueError(f"Root path does not exist: {self.root_path}") - - self._observer = Observer() - handler = _CodexLensHandler(self, self._on_raw_event) - self._observer.schedule(handler, str(self.root_path), recursive=True) - - self._running = True - self._stop_event.clear() - self._observer.start() - - # Start signal check thread (for flush.signal file) - self._signal_check_thread = threading.Thread( - target=self._debounce_loop, - daemon=True, - name="FileWatcher-SignalCheck", - ) - self._signal_check_thread.start() - - logger.info("Started watching: %s", self.root_path) - - def stop(self) -> None: - """Stop watching the directory. - - Gracefully stops the observer and flushes remaining events. - """ - with self._lock: - if not self._running: - return - - self._running = False - self._stop_event.set() - - # Cancel pending flush timer - if self._flush_timer: - self._flush_timer.cancel() - self._flush_timer = None - - if self._observer: - self._observer.stop() - self._observer.join(timeout=5.0) - self._observer = None - - # Wait for signal check thread to finish - if hasattr(self, '_signal_check_thread') and self._signal_check_thread and self._signal_check_thread.is_alive(): - self._signal_check_thread.join(timeout=2.0) - self._signal_check_thread = None - - # Flush any remaining events - self._flush_events() - - logger.info("Stopped watching: %s", self.root_path) - - def wait(self) -> None: - """Block until watcher is stopped. - - Use Ctrl+C or call stop() from another thread to unblock. - """ - try: - while self._running: - self._stop_event.wait(timeout=1.0) - except KeyboardInterrupt: - logger.info("Received interrupt, stopping watcher...") - self.stop() - - @property - def is_running(self) -> bool: - """Check if watcher is currently running.""" - return self._running diff --git a/codex-lens/src/codexlens/watcher/incremental_indexer.py b/codex-lens/src/codexlens/watcher/incremental_indexer.py deleted file mode 100644 index 39888115..00000000 --- a/codex-lens/src/codexlens/watcher/incremental_indexer.py +++ /dev/null @@ -1,423 +0,0 @@ -"""Incremental indexer for processing file changes.""" - -from __future__ import annotations - -import logging -from dataclasses import dataclass -from pathlib import Path -from typing import List, Optional - -from codexlens.config import Config -from codexlens.parsers.factory import ParserFactory -from codexlens.storage.dir_index import DirIndexStore -from codexlens.storage.global_index import GlobalSymbolIndex -from codexlens.storage.path_mapper import PathMapper -from codexlens.storage.registry import RegistryStore - -from .events import ChangeType, FileEvent, IndexResult - -logger = logging.getLogger(__name__) - - -@dataclass -class FileIndexResult: - """Result of indexing a single file.""" - path: Path - symbols_count: int - success: bool - error: Optional[str] = None - - -class IncrementalIndexer: - """Incremental indexer for processing file change events. - - Processes file events (create, modify, delete, move) and updates - the corresponding index databases incrementally. - - Reuses existing infrastructure: - - ParserFactory for symbol extraction - - DirIndexStore for per-directory storage - - GlobalSymbolIndex for cross-file symbols - - PathMapper for source-to-index path conversion - - Example: - indexer = IncrementalIndexer(registry, mapper, config) - result = indexer.process_changes([ - FileEvent(Path("foo.py"), ChangeType.MODIFIED, time.time()), - ]) - print(f"Indexed {result.files_indexed} files") - """ - - def __init__( - self, - registry: RegistryStore, - mapper: PathMapper, - config: Optional[Config] = None, - ) -> None: - """Initialize incremental indexer. - - Args: - registry: Global project registry - mapper: Path mapper for source-to-index conversion - config: CodexLens configuration (uses defaults if None) - """ - self.registry = registry - self.mapper = mapper - self.config = config or Config() - self.parser_factory = ParserFactory(self.config) - - self._global_index: Optional[GlobalSymbolIndex] = None - self._dir_stores: dict[Path, DirIndexStore] = {} - self._lock = __import__("threading").RLock() - - def _get_global_index(self, index_root: Path, source_root: Optional[Path] = None) -> Optional[GlobalSymbolIndex]: - """Get or create global symbol index. - - Args: - index_root: Root directory containing the global symbol index DB - source_root: Source directory root for looking up project_id from registry - """ - if not self.config.global_symbol_index_enabled: - return None - - if self._global_index is None: - global_db_path = index_root / GlobalSymbolIndex.DEFAULT_DB_NAME - if global_db_path.exists(): - # Get project_id from registry using source_root - project_id = 0 # Default fallback - if source_root: - project_info = self.registry.get_project(source_root) - if project_info: - project_id = project_info.id - try: - self._global_index = GlobalSymbolIndex(global_db_path, project_id=project_id) - # Ensure schema exists (best-effort). The DB should already be initialized - # by `codexlens index init`, but watcher/index-update should be robust. - self._global_index.initialize() - except Exception as exc: - logger.debug( - "Failed to initialize global symbol index at %s: %s", - global_db_path, - exc, - ) - self._global_index = None - - return self._global_index - - def _get_dir_store(self, dir_path: Path) -> Optional[DirIndexStore]: - """Get DirIndexStore for a directory, if indexed.""" - with self._lock: - if dir_path in self._dir_stores: - return self._dir_stores[dir_path] - - index_db = self.mapper.source_to_index_db(dir_path) - if not index_db.exists(): - logger.debug("No index found for directory: %s", dir_path) - return None - - # Get index root for global index - source_root = self.mapper.get_project_root(dir_path) or dir_path - index_root = self.mapper.source_to_index_dir(source_root) - global_index = self._get_global_index(index_root, source_root=source_root) - - store = DirIndexStore( - index_db, - config=self.config, - global_index=global_index, - ) - self._dir_stores[dir_path] = store - return store - - def process_changes(self, events: List[FileEvent]) -> IndexResult: - """Process a batch of file change events. - - Args: - events: List of file events to process - - Returns: - IndexResult with statistics - """ - result = IndexResult() - - for event in events: - try: - if event.change_type == ChangeType.CREATED: - file_result = self._index_file(event.path) - if file_result.success: - result.files_indexed += 1 - result.symbols_added += file_result.symbols_count - else: - result.errors.append(file_result.error or f"Failed to index: {event.path}") - - elif event.change_type == ChangeType.MODIFIED: - file_result = self._index_file(event.path) - if file_result.success: - result.files_indexed += 1 - result.symbols_added += file_result.symbols_count - else: - result.errors.append(file_result.error or f"Failed to index: {event.path}") - - elif event.change_type == ChangeType.DELETED: - self._remove_file(event.path) - result.files_removed += 1 - - elif event.change_type == ChangeType.MOVED: - # Remove from old location, add at new location - if event.old_path: - self._remove_file(event.old_path) - result.files_removed += 1 - file_result = self._index_file(event.path) - if file_result.success: - result.files_indexed += 1 - result.symbols_added += file_result.symbols_count - else: - result.errors.append(file_result.error or f"Failed to index: {event.path}") - - except Exception as exc: - error_msg = f"Error processing {event.path}: {type(exc).__name__}: {exc}" - logger.error(error_msg) - result.errors.append(error_msg) - - return result - - def _index_file(self, path: Path) -> FileIndexResult: - """Index a single file. - - Args: - path: Path to the file to index - - Returns: - FileIndexResult with status - """ - path = Path(path).resolve() - - # Check if file exists - if not path.exists(): - return FileIndexResult( - path=path, - symbols_count=0, - success=False, - error=f"File not found: {path}", - ) - - # Check if language is supported - language = self.config.language_for_path(path) - if not language: - return FileIndexResult( - path=path, - symbols_count=0, - success=False, - error=f"Unsupported language for: {path}", - ) - - # Get directory store - dir_path = path.parent - store = self._get_dir_store(dir_path) - if store is None: - return FileIndexResult( - path=path, - symbols_count=0, - success=False, - error=f"Directory not indexed: {dir_path}", - ) - - # Read file content with fallback encodings - try: - content = path.read_text(encoding="utf-8") - except UnicodeDecodeError: - logger.debug("UTF-8 decode failed for %s, using fallback with errors='ignore'", path) - try: - content = path.read_text(encoding="utf-8", errors="ignore") - except Exception as exc: - return FileIndexResult( - path=path, - symbols_count=0, - success=False, - error=f"Failed to read file: {exc}", - ) - except Exception as exc: - return FileIndexResult( - path=path, - symbols_count=0, - success=False, - error=f"Failed to read file: {exc}", - ) - - # Parse symbols - try: - parser = self.parser_factory.get_parser(language) - indexed_file = parser.parse(content, path) - except Exception as exc: - error_msg = f"Failed to parse {path}: {type(exc).__name__}: {exc}" - logger.error(error_msg) - return FileIndexResult( - path=path, - symbols_count=0, - success=False, - error=error_msg, - ) - - # Update store with retry logic for transient database errors - max_retries = 3 - for attempt in range(max_retries): - try: - store.add_file( - name=path.name, - full_path=str(path), - content=content, - language=language, - symbols=indexed_file.symbols, - relationships=indexed_file.relationships, - ) - - # Update merkle root - store.update_merkle_root() - - # Update global relationships for static graph expansion (best-effort). - if getattr(self.config, "static_graph_enabled", False): - try: - source_root = self.mapper.get_project_root(path) or dir_path - index_root = self.mapper.source_to_index_dir(source_root) - global_index = self._get_global_index(index_root, source_root=source_root) - if global_index is not None: - allowed_types = set( - getattr( - self.config, - "static_graph_relationship_types", - ["imports", "inherits"], - ) - or [] - ) - filtered_rels = [ - r - for r in (indexed_file.relationships or []) - if r.relationship_type.value in allowed_types - ] - global_index.update_file_relationships(path, filtered_rels) - except Exception as exc: - logger.debug( - "Failed to update global relationships for %s: %s", - path, - exc, - ) - - logger.debug("Indexed file: %s (%d symbols)", path, len(indexed_file.symbols)) - - return FileIndexResult( - path=path, - symbols_count=len(indexed_file.symbols), - success=True, - ) - - except __import__("sqlite3").OperationalError as exc: - # Transient database errors (e.g., database locked) - if attempt < max_retries - 1: - import time - wait_time = 0.1 * (2 ** attempt) # Exponential backoff - logger.debug("Database operation failed (attempt %d/%d), retrying in %.2fs: %s", - attempt + 1, max_retries, wait_time, exc) - time.sleep(wait_time) - continue - else: - error_msg = f"Failed to store {path} after {max_retries} attempts: {exc}" - logger.error(error_msg) - return FileIndexResult( - path=path, - symbols_count=0, - success=False, - error=error_msg, - ) - except Exception as exc: - error_msg = f"Failed to store {path}: {type(exc).__name__}: {exc}" - logger.error(error_msg) - return FileIndexResult( - path=path, - symbols_count=0, - success=False, - error=error_msg, - ) - - # Should never reach here - return FileIndexResult( - path=path, - symbols_count=0, - success=False, - error="Unexpected error in indexing loop", - ) - - def _remove_file(self, path: Path) -> bool: - """Remove a file from the index. - - Args: - path: Path to the file to remove - - Returns: - True if removed successfully - """ - path = Path(path).resolve() - dir_path = path.parent - - store = self._get_dir_store(dir_path) - if store is None: - logger.debug("Cannot remove file, directory not indexed: %s", dir_path) - return False - - # Retry logic for transient database errors - max_retries = 3 - for attempt in range(max_retries): - try: - store.remove_file(str(path)) - store.update_merkle_root() - - # Best-effort cleanup of static graph relationships (keeps global DB consistent). - if getattr(self.config, "static_graph_enabled", False): - try: - source_root = self.mapper.get_project_root(path) or dir_path - index_root = self.mapper.source_to_index_dir(source_root) - global_index = self._get_global_index(index_root, source_root=source_root) - if global_index is not None: - global_index.delete_file_relationships(path) - except Exception as exc: - logger.debug( - "Failed to delete global relationships for %s: %s", - path, - exc, - ) - logger.debug("Removed file from index: %s", path) - return True - - except __import__("sqlite3").OperationalError as exc: - # Transient database errors (e.g., database locked) - if attempt < max_retries - 1: - import time - wait_time = 0.1 * (2 ** attempt) # Exponential backoff - logger.debug("Database operation failed (attempt %d/%d), retrying in %.2fs: %s", - attempt + 1, max_retries, wait_time, exc) - time.sleep(wait_time) - continue - else: - logger.error("Failed to remove %s after %d attempts: %s", path, max_retries, exc) - return False - except Exception as exc: - logger.error("Failed to remove %s: %s", path, exc) - return False - - # Should never reach here - return False - - def close(self) -> None: - """Close all open stores.""" - with self._lock: - for store in self._dir_stores.values(): - try: - store.close() - except Exception: - pass - self._dir_stores.clear() - - if self._global_index: - try: - self._global_index.close() - except Exception: - pass - self._global_index = None diff --git a/codex-lens/src/codexlens/watcher/manager.py b/codex-lens/src/codexlens/watcher/manager.py deleted file mode 100644 index 5a5653d4..00000000 --- a/codex-lens/src/codexlens/watcher/manager.py +++ /dev/null @@ -1,255 +0,0 @@ -"""Watcher manager for coordinating file watching and incremental indexing.""" - -from __future__ import annotations - -import json -import logging -import signal -import threading -import time -from pathlib import Path -from typing import Callable, List, Optional - -from codexlens.config import Config -from codexlens.storage.path_mapper import PathMapper -from codexlens.storage.registry import RegistryStore - -from .events import FileEvent, IndexResult, PendingQueueStatus, WatcherConfig, WatcherStats -from .file_watcher import FileWatcher -from .incremental_indexer import IncrementalIndexer - -logger = logging.getLogger(__name__) - - -class WatcherManager: - """High-level manager for file watching and incremental indexing. - - Coordinates FileWatcher and IncrementalIndexer with: - - Lifecycle management (start/stop) - - Signal handling (SIGINT/SIGTERM) - - Statistics tracking - - Graceful shutdown - """ - - def __init__( - self, - root_path: Path, - config: Optional[Config] = None, - watcher_config: Optional[WatcherConfig] = None, - on_indexed: Optional[Callable[[IndexResult], None]] = None, - on_queue_change: Optional[Callable[[PendingQueueStatus], None]] = None, - ) -> None: - self.root_path = Path(root_path).resolve() - self.config = config or Config() - self.watcher_config = watcher_config or WatcherConfig() - self.on_indexed = on_indexed - self.on_queue_change = on_queue_change - - self._registry: Optional[RegistryStore] = None - self._mapper: Optional[PathMapper] = None - self._watcher: Optional[FileWatcher] = None - self._indexer: Optional[IncrementalIndexer] = None - - self._running = False - self._stop_event = threading.Event() - self._lock = threading.RLock() - - # Statistics - self._stats = WatcherStats() - self._original_sigint = None - self._original_sigterm = None - - # Index history for tracking recent results - self._index_history: List[IndexResult] = [] - self._max_history_size = 10 - - def _handle_changes(self, events: List[FileEvent]) -> None: - """Handle file change events from watcher.""" - if not self._indexer or not events: - return - - logger.info("Processing %d file changes", len(events)) - result = self._indexer.process_changes(events) - - # Update stats - self._stats.events_processed += len(events) - self._stats.last_event_time = time.time() - - # Save to history - self._index_history.append(result) - if len(self._index_history) > self._max_history_size: - self._index_history.pop(0) - - if result.files_indexed > 0 or result.files_removed > 0: - logger.info( - "Indexed %d files, removed %d files, %d errors", - result.files_indexed, result.files_removed, len(result.errors) - ) - - # Output JSON for TypeScript backend parsing - result_data = { - "files_indexed": result.files_indexed, - "files_removed": result.files_removed, - "symbols_added": result.symbols_added, - "symbols_removed": result.symbols_removed, - "files_success": result.files_success[:20], # Limit output - "files_failed": result.files_failed[:20], - "errors": result.errors[:10], - "timestamp": result.timestamp - } - print(f"[INDEX_RESULT] {json.dumps(result_data)}", flush=True) - - if self.on_indexed: - try: - self.on_indexed(result) - except Exception as exc: - logger.error("Error in on_indexed callback: %s", exc) - - def _signal_handler(self, signum, frame) -> None: - """Handle shutdown signals.""" - logger.info("Received signal %d, stopping...", signum) - self.stop() - - def _install_signal_handlers(self) -> None: - """Install signal handlers for graceful shutdown.""" - try: - self._original_sigint = signal.signal(signal.SIGINT, self._signal_handler) - if hasattr(signal, 'SIGTERM'): - self._original_sigterm = signal.signal(signal.SIGTERM, self._signal_handler) - except (ValueError, OSError): - # Signal handling not available (e.g., not main thread) - pass - - def _restore_signal_handlers(self) -> None: - """Restore original signal handlers.""" - try: - if self._original_sigint is not None: - signal.signal(signal.SIGINT, self._original_sigint) - if self._original_sigterm is not None and hasattr(signal, 'SIGTERM'): - signal.signal(signal.SIGTERM, self._original_sigterm) - except (ValueError, OSError): - pass - - def start(self) -> None: - """Start watching and indexing.""" - with self._lock: - if self._running: - logger.warning("WatcherManager already running") - return - - # Validate path - if not self.root_path.exists(): - raise ValueError(f"Root path does not exist: {self.root_path}") - - # Initialize components - self._registry = RegistryStore() - self._registry.initialize() - self._mapper = PathMapper() - - self._indexer = IncrementalIndexer( - self._registry, self._mapper, self.config - ) - - self._watcher = FileWatcher( - self.root_path, self.watcher_config, self._handle_changes - ) - - # Always register queue change callback for stdout output (TypeScript backend) - # The wrapper prints [QUEUE_STATUS] JSON and optionally calls on_queue_change - self._watcher.register_queue_change_callback(self._on_queue_change_wrapper) - - # Install signal handlers - self._install_signal_handlers() - - # Start watcher - self._running = True - self._stats.is_running = True - self._stop_event.clear() - self._watcher.start() - - logger.info("WatcherManager started for: %s", self.root_path) - - def stop(self) -> None: - """Stop watching and clean up.""" - with self._lock: - if not self._running: - return - - self._running = False - self._stats.is_running = False - self._stop_event.set() - - # Stop watcher - if self._watcher: - self._watcher.stop() - self._watcher = None - - # Close indexer - if self._indexer: - self._indexer.close() - self._indexer = None - - # Close registry - if self._registry: - self._registry.close() - self._registry = None - - # Restore signal handlers - self._restore_signal_handlers() - - logger.info("WatcherManager stopped") - - def wait(self) -> None: - """Block until stopped.""" - try: - while self._running: - self._stop_event.wait(timeout=1.0) - except KeyboardInterrupt: - logger.info("Interrupted, stopping...") - self.stop() - - @property - def is_running(self) -> bool: - """Check if manager is running.""" - return self._running - - def get_stats(self) -> WatcherStats: - """Get runtime statistics.""" - return WatcherStats( - files_watched=self._stats.files_watched, - events_processed=self._stats.events_processed, - last_event_time=self._stats.last_event_time, - is_running=self._running, - ) - - def _on_queue_change_wrapper(self, status: PendingQueueStatus) -> None: - """Wrapper for queue change callback with JSON output.""" - # Output JSON for TypeScript backend parsing - status_data = { - "file_count": status.file_count, - "files": status.files, - "countdown_seconds": status.countdown_seconds, - "last_event_time": status.last_event_time - } - print(f"[QUEUE_STATUS] {json.dumps(status_data)}", flush=True) - - if self.on_queue_change: - try: - self.on_queue_change(status) - except Exception as exc: - logger.error("Error in on_queue_change callback: %s", exc) - - def flush_now(self) -> None: - """Immediately flush pending queue (manual trigger).""" - if self._watcher: - self._watcher.flush_now() - - def get_pending_queue_status(self) -> Optional[PendingQueueStatus]: - """Get current pending queue status.""" - if self._watcher: - return self._watcher.get_pending_queue_status() - return None - - def get_index_history(self, limit: int = 5) -> List[IndexResult]: - """Get recent index history.""" - return self._index_history[-limit:] diff --git a/codex-lens/test_chain_search.py b/codex-lens/test_chain_search.py deleted file mode 100644 index d2ed55c0..00000000 --- a/codex-lens/test_chain_search.py +++ /dev/null @@ -1,146 +0,0 @@ -"""Test script for chain search engine functionality.""" - -from pathlib import Path -from codexlens.search import ChainSearchEngine, SearchOptions, quick_search -from codexlens.storage.registry import RegistryStore -from codexlens.storage.path_mapper import PathMapper - - -def test_basic_search(): - """Test basic chain search functionality.""" - print("=== Testing Chain Search Engine ===\n") - - # Initialize components - registry = RegistryStore() - registry.initialize() - mapper = PathMapper() - - # Create engine - engine = ChainSearchEngine(registry, mapper) - print(f"[OK] ChainSearchEngine initialized") - - # Test search options - options = SearchOptions( - depth=-1, - max_workers=4, - limit_per_dir=10, - total_limit=50, - include_symbols=False, - files_only=False - ) - print(f"[OK] SearchOptions configured: depth={options.depth}, workers={options.max_workers}") - - # Test path that exists in the current project - test_path = Path("D:/Claude_dms3/codex-lens/src/codexlens") - - if test_path.exists(): - print(f"\n[OK] Test path exists: {test_path}") - - # Perform search - result = engine.search("search", test_path, options) - - print(f"\n=== Search Results ===") - print(f"Query: '{result.query}'") - print(f"Directories searched: {result.stats.dirs_searched}") - print(f"Files matched: {result.stats.files_matched}") - print(f"Time: {result.stats.time_ms:.2f}ms") - - if result.stats.errors: - print(f"Errors: {len(result.stats.errors)}") - for err in result.stats.errors[:3]: - print(f" - {err}") - - print(f"\nTop Results (showing first 5):") - for i, res in enumerate(result.results[:5], 1): - print(f"{i}. {res.path}") - print(f" Score: {res.score:.2f}") - if res.excerpt: - excerpt = res.excerpt.replace('\n', ' ')[:100] - print(f" Excerpt: {excerpt}...") - else: - print(f"\n[SKIP] Test path does not exist: {test_path}") - print(" (Index may not be built yet)") - - registry.close() - print("\n[OK] Test completed") - - -def test_quick_search(): - """Test quick_search convenience function.""" - print("\n\n=== Testing Quick Search ===\n") - - test_path = Path("D:/Claude_dms3/codex-lens/src") - - if test_path.exists(): - results = quick_search("index", test_path, depth=2) - print(f"[OK] Quick search completed") - print(f" Found {len(results)} results") - if results: - print(f" Top result: {results[0].path}") - else: - print(f"[SKIP] Test path does not exist: {test_path}") - - print("\n[OK] Quick search test completed") - - -def test_symbol_search(): - """Test symbol search functionality.""" - print("\n\n=== Testing Symbol Search ===\n") - - registry = RegistryStore() - registry.initialize() - mapper = PathMapper() - engine = ChainSearchEngine(registry, mapper) - - test_path = Path("D:/Claude_dms3/codex-lens/src/codexlens") - - if test_path.exists(): - symbols = engine.search_symbols("search", test_path, kind=None) - print(f"[OK] Symbol search completed") - print(f" Found {len(symbols)} symbols") - for i, sym in enumerate(symbols[:5], 1): - print(f" {i}. {sym.name} ({sym.kind}) - lines {sym.range[0]}-{sym.range[1]}") - else: - print(f"[SKIP] Test path does not exist: {test_path}") - - registry.close() - print("\n[OK] Symbol search test completed") - - -def test_files_only_search(): - """Test files-only search mode.""" - print("\n\n=== Testing Files-Only Search ===\n") - - registry = RegistryStore() - registry.initialize() - mapper = PathMapper() - engine = ChainSearchEngine(registry, mapper) - - test_path = Path("D:/Claude_dms3/codex-lens/src") - - if test_path.exists(): - file_paths = engine.search_files_only("class", test_path) - print(f"[OK] Files-only search completed") - print(f" Found {len(file_paths)} files") - for i, path in enumerate(file_paths[:5], 1): - print(f" {i}. {path}") - else: - print(f"[SKIP] Test path does not exist: {test_path}") - - registry.close() - print("\n[OK] Files-only search test completed") - - -if __name__ == "__main__": - try: - test_basic_search() - test_quick_search() - test_symbol_search() - test_files_only_search() - print("\n" + "=" * 50) - print("All tests completed successfully!") - print("=" * 50) - except Exception as e: - print(f"\n[ERROR] Test failed with error: {e}") - import traceback - traceback.print_exc() diff --git a/codex-lens/test_simple_function.py b/codex-lens/test_simple_function.py deleted file mode 100644 index 19fbf4ab..00000000 --- a/codex-lens/test_simple_function.py +++ /dev/null @@ -1,19 +0,0 @@ -"""Simple test file with clear function definitions.""" - -def hello_world(): - """A simple function.""" - return "Hello, World!" - -def greet(name: str) -> str: - """Greet someone by name.""" - return f"Hello, {name}!" - -def main(): - """Main function that calls other functions.""" - msg = hello_world() - greeting = greet("Alice") - print(msg) - print(greeting) - -if __name__ == "__main__": - main() diff --git a/codex-lens/tests/TEST_SUITE_SUMMARY.md b/codex-lens/tests/TEST_SUITE_SUMMARY.md deleted file mode 100644 index 889372b2..00000000 --- a/codex-lens/tests/TEST_SUITE_SUMMARY.md +++ /dev/null @@ -1,347 +0,0 @@ -# Hybrid Search Test Suite Summary - -## Overview - -Comprehensive test suite for hybrid search components covering Dual-FTS schema, encoding detection, incremental indexing, RRF fusion, query parsing, and end-to-end workflows. - -## Test Coverage - -### ✅ test_rrf_fusion.py (29 tests - 100% passing) -**Module Tested**: `codexlens.search.ranking` - -**Coverage**: -- ✅ Reciprocal Rank Fusion algorithm (9 tests) - - Single/multiple source ranking - - RRF score calculation with custom k values - - Weight handling and normalization - - Fusion score metadata storage -- ✅ Synthetic ranking scenarios (4 tests) - - Perfect agreement between sources - - Complete disagreement handling - - Partial overlap fusion - - Three-source fusion (exact, fuzzy, vector) -- ✅ BM25 score normalization (4 tests) - - Negative score handling - - 0-1 range normalization - - Better match = higher score validation -- ✅ Search source tagging (4 tests) - - Metadata preservation - - Source tracking for RRF -- ✅ Parameterized k-value tests (3 tests) -- ✅ Edge cases (5 tests) - - Duplicate paths - - Large result lists (1000 items) - - Missing weights handling - -**Key Test Examples**: -```python -def test_two_sources_fusion(): - """Test RRF combines rankings from two sources.""" - exact_results = [SearchResult(path="a.py", score=10.0, ...)] - fuzzy_results = [SearchResult(path="b.py", score=9.0, ...)] - fused = reciprocal_rank_fusion({"exact": exact, "fuzzy": fuzzy}) - # Items in both sources rank highest -``` - ---- - -### ✅ test_query_parser.py (47 tests - 100% passing) -**Module Tested**: `codexlens.search.query_parser` - -**Coverage**: -- ✅ CamelCase splitting (4 tests) - - `UserAuth` → `UserAuth OR User OR Auth` - - lowerCamelCase handling - - ALL_CAPS acronym preservation -- ✅ snake_case splitting (3 tests) - - `get_user_data` → `get_user_data OR get OR user OR data` -- ✅ kebab-case splitting (2 tests) -- ✅ Query expansion logic (5 tests) - - OR operator insertion - - Original query preservation - - Token deduplication - - min_token_length filtering -- ✅ FTS5 operator preservation (7 tests) - - Quoted phrases not expanded - - OR/AND/NOT/NEAR operators preserved - - Wildcard queries (`auth*`) preserved -- ✅ Multi-word queries (2 tests) -- ✅ Parameterized splitting (5 tests covering all formats) -- ✅ Edge cases (6 tests) - - Unicode identifiers - - Very long identifiers - - Mixed case styles -- ✅ Token extraction internals (4 tests) -- ✅ Integration tests (2 tests) - - Real-world query examples - - Performance (1000 queries) -- ✅ Min token length configuration (3 tests) - -**Key Test Examples**: -```python -@pytest.mark.parametrize("query,expected_tokens", [ - ("UserAuth", ["UserAuth", "User", "Auth"]), - ("get_user_data", ["get_user_data", "get", "user", "data"]), -]) -def test_identifier_splitting(query, expected_tokens): - parser = QueryParser() - result = parser.preprocess_query(query) - for token in expected_tokens: - assert token in result -``` - ---- - -### ⚠️ test_encoding.py (34 tests - 24 passing, 7 failing, 3 skipped) -**Module Tested**: `codexlens.parsers.encoding` - -**Passing Coverage**: -- ✅ Encoding availability detection (2 tests) -- ✅ Basic encoding detection (3 tests) -- ✅ read_file_safe functionality (9 tests) - - UTF-8, GBK, Latin-1 file reading - - Error replacement with `errors='replace'` - - Empty files, nonexistent files, directories -- ✅ Binary file detection (7 tests) - - Null byte detection - - Non-text character ratio - - Sample size parameter -- ✅ Parameterized encoding tests (4 tests) - - UTF-8, GBK, ISO-8859-1, Windows-1252 - -**Known Issues** (7 failing tests): -- Chardet-specific tests failing due to mock/patch issues -- Tests expect exact encoding detection behavior -- **Resolution**: Tests work correctly when chardet is available, mock issues are minor - ---- - -### ⚠️ test_dual_fts.py (17 tests - needs API fixes) -**Module Tested**: `codexlens.storage.dir_index` (Dual-FTS schema) - -**Test Structure**: -- 🔧 Dual FTS schema creation (4 tests) - - `files_fts_exact` and `files_fts_fuzzy` table existence - - Tokenizer validation (unicode61 for exact, trigram for fuzzy) -- 🔧 Trigger synchronization (3 tests) - - INSERT/UPDATE/DELETE triggers - - Content sync between tables -- 🔧 Migration tests (4 tests) - - v2 → v4 migration - - Data preservation - - Schema version updates - - Idempotency -- 🔧 Trigram availability (1 test) - - Fallback to unicode61 when trigram unavailable -- 🔧 Performance benchmarks (2 tests) - - INSERT overhead measurement - - Search performance on exact/fuzzy FTS - -**Required Fix**: Replace `_connect()` with `_get_connection()` to match DirIndexStore API - ---- - -### ⚠️ test_incremental_indexing.py (14 tests - needs API fixes) -**Module Tested**: `codexlens.storage.dir_index` (mtime tracking) - -**Test Structure**: -- 🔧 Mtime tracking (4 tests) - - needs_reindex() logic for new/unchanged/modified files - - mtime column validation -- 🔧 Incremental update workflows (3 tests) - - ≥90% skip rate verification - - Modified file detection - - New file detection -- 🔧 Deleted file cleanup (2 tests) - - Nonexistent file removal - - Existing file preservation -- 🔧 Mtime edge cases (3 tests) - - Floating-point precision - - NULL mtime handling - - Future mtime (clock skew) -- 🔧 Performance benchmarks (2 tests) - - Skip rate on 1000 files - - Cleanup performance - -**Required Fix**: Same as dual_fts.py - API method name correction - ---- - -### ⚠️ test_hybrid_search_e2e.py (30 tests - needs API fixes) -**Module Tested**: `codexlens.search.hybrid_search` + full pipeline - -**Test Structure**: -- 🔧 Basic engine tests (3 tests) - - Initialization with default/custom weights - - Empty index handling -- 🔧 Sample project tests (7 tests) - - Exact/fuzzy/hybrid search modes - - Python + TypeScript project structure - - CamelCase/snake_case query expansion - - Partial identifier matching -- 🔧 Relevance ranking (3 tests) - - Exact match ranking - - Hybrid RRF fusion improvement -- 🔧 Performance tests (2 tests) - - Search latency benchmarks - - Hybrid overhead (<2x exact search) -- 🔧 Edge cases (5 tests) - - Empty index - - No matches - - Special characters - - Unicode queries - - Very long queries -- 🔧 Integration workflows (2 tests) - - Index → search → refine - - Result consistency - -**Required Fix**: API method corrections - ---- - -## Test Statistics - -| Test File | Total | Passing | Failing | Skipped | -|-----------|-------|---------|---------|---------| -| test_rrf_fusion.py | 29 | 29 | 0 | 0 | -| test_query_parser.py | 47 | 47 | 0 | 0 | -| test_encoding.py | 34 | 24 | 7 | 3 | -| test_dual_fts.py | 17 | 0* | 17* | 0 | -| test_incremental_indexing.py | 14 | 0* | 14* | 0 | -| test_hybrid_search_e2e.py | 30 | 0* | 30* | 0 | -| **TOTAL** | **171** | **100** | **68** | **3** | - -*Requires minor API fixes (method name corrections) - ---- - -## Accomplishments - -### ✅ Fully Implemented -1. **RRF Fusion Testing** (29 tests) - - Complete coverage of reciprocal rank fusion algorithm - - Synthetic ranking scenarios validation - - BM25 normalization testing - - Weight handling and edge cases - -2. **Query Parser Testing** (47 tests) - - Comprehensive identifier splitting coverage - - CamelCase, snake_case, kebab-case expansion - - FTS5 operator preservation - - Parameterized tests for all formats - - Performance and integration tests - -3. **Encoding Detection Testing** (34 tests - 24 passing) - - UTF-8, GBK, Latin-1, Windows-1252 support - - Binary file detection heuristics - - Safe file reading with error replacement - - Chardet integration tests - -### 🔧 Implemented (Needs Minor Fixes) -4. **Dual-FTS Schema Testing** (17 tests) - - Schema creation and migration - - Trigger synchronization - - Trigram tokenizer availability - - Performance benchmarks - -5. **Incremental Indexing Testing** (14 tests) - - Mtime-based change detection - - ≥90% skip rate validation - - Deleted file cleanup - - Edge case handling - -6. **Hybrid Search E2E Testing** (30 tests) - - Complete workflow testing - - Sample project structure - - Relevance ranking validation - - Performance benchmarks - ---- - -## Test Execution Examples - -### Run All Working Tests -```bash -cd codex-lens -python -m pytest tests/test_rrf_fusion.py tests/test_query_parser.py -v -``` - -### Run Encoding Tests (with optional dependencies) -```bash -pip install chardet # Optional for encoding detection -python -m pytest tests/test_encoding.py -v -``` - -### Run All Tests (including failing ones for debugging) -```bash -python -m pytest tests/test_*.py -v --tb=short -``` - -### Run with Coverage -```bash -python -m pytest tests/test_rrf_fusion.py tests/test_query_parser.py --cov=codexlens.search --cov-report=term -``` - ---- - -## Quick Fixes Required - -### Fix DirIndexStore API References -All database-related tests need one change: -- Replace: `with store._connect() as conn:` -- With: `conn = store._get_connection()` - -**Files to Fix**: -1. `test_dual_fts.py` - 17 tests -2. `test_incremental_indexing.py` - 14 tests -3. `test_hybrid_search_e2e.py` - 30 tests - -**Example Fix**: -```python -# Before (incorrect) -with index_store._connect() as conn: - conn.execute("SELECT * FROM files") - -# After (correct) -conn = index_store._get_connection() -conn.execute("SELECT * FROM files") -``` - ---- - -## Coverage Goals Achieved - -✅ **50+ test cases** across all components (171 total) -✅ **90%+ code coverage** on new modules (RRF, query parser) -✅ **Integration tests** verify end-to-end workflows -✅ **Performance benchmarks** measure latency and overhead -✅ **Parameterized tests** cover multiple input variations -✅ **Edge case handling** for Unicode, special chars, empty inputs - ---- - -## Next Steps - -1. **Apply API fixes** to database tests (est. 15 min) -2. **Run full test suite** with `pytest --cov` -3. **Verify ≥90% coverage** on hybrid search modules -4. **Document any optional dependencies** (chardet for encoding) -5. **Add pytest markers** for benchmark tests - ---- - -## Test Quality Features - -- ✅ **Fixture-based setup** for database isolation -- ✅ **Temporary files** prevent test pollution -- ✅ **Parameterized tests** reduce duplication -- ✅ **Benchmark markers** for performance tests -- ✅ **Skip markers** for optional dependencies -- ✅ **Clear assertions** with descriptive messages -- ✅ **Mocking** for external dependencies (chardet) - ---- - -**Generated**: 2025-12-16 -**Test Framework**: pytest 8.4.2 -**Python Version**: 3.13.5 diff --git a/codex-lens/tests/__init__.py b/codex-lens/tests/__init__.py deleted file mode 100644 index 263cbbec..00000000 --- a/codex-lens/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""CodexLens test suite.""" diff --git a/codex-lens/tests/api/test_references.py b/codex-lens/tests/api/test_references.py deleted file mode 100644 index b50bed9b..00000000 --- a/codex-lens/tests/api/test_references.py +++ /dev/null @@ -1,282 +0,0 @@ -"""Tests for codexlens.api.references module.""" - -import os -import tempfile -from pathlib import Path -from unittest.mock import MagicMock, patch - -import pytest - -from codexlens.api.references import ( - find_references, - _read_line_from_file, - _proximity_score, - _group_references_by_definition, - _transform_to_reference_result, -) -from codexlens.api.models import ( - DefinitionResult, - ReferenceResult, - GroupedReferences, -) - - -class TestReadLineFromFile: - """Tests for _read_line_from_file helper.""" - - def test_read_existing_line(self, tmp_path): - """Test reading an existing line from a file.""" - test_file = tmp_path / "test.py" - test_file.write_text("line 1\nline 2\nline 3\n") - - assert _read_line_from_file(str(test_file), 1) == "line 1" - assert _read_line_from_file(str(test_file), 2) == "line 2" - assert _read_line_from_file(str(test_file), 3) == "line 3" - - def test_read_nonexistent_line(self, tmp_path): - """Test reading a line that doesn't exist.""" - test_file = tmp_path / "test.py" - test_file.write_text("line 1\nline 2\n") - - assert _read_line_from_file(str(test_file), 10) == "" - - def test_read_nonexistent_file(self): - """Test reading from a file that doesn't exist.""" - assert _read_line_from_file("/nonexistent/path/file.py", 1) == "" - - def test_strips_trailing_whitespace(self, tmp_path): - """Test that trailing whitespace is stripped.""" - test_file = tmp_path / "test.py" - test_file.write_text("line with spaces \n") - - assert _read_line_from_file(str(test_file), 1) == "line with spaces" - - -class TestProximityScore: - """Tests for _proximity_score helper.""" - - def test_same_file(self): - """Same file should return highest score.""" - score = _proximity_score("/a/b/c.py", "/a/b/c.py") - assert score == 1000 - - def test_same_directory(self): - """Same directory should return 100.""" - score = _proximity_score("/a/b/x.py", "/a/b/y.py") - assert score == 100 - - def test_different_directories(self): - """Different directories should return common prefix length.""" - score = _proximity_score("/a/b/c/x.py", "/a/b/d/y.py") - # Common path is /a/b - assert score > 0 - - def test_empty_paths(self): - """Empty paths should return 0.""" - assert _proximity_score("", "/a/b/c.py") == 0 - assert _proximity_score("/a/b/c.py", "") == 0 - assert _proximity_score("", "") == 0 - - -class TestGroupReferencesByDefinition: - """Tests for _group_references_by_definition helper.""" - - def test_single_definition(self): - """Single definition should have all references.""" - definition = DefinitionResult( - name="foo", - kind="function", - file_path="/a/b/c.py", - line=10, - end_line=20, - ) - references = [ - ReferenceResult( - file_path="/a/b/d.py", - line=5, - column=0, - context_line="foo()", - relationship="call", - ), - ReferenceResult( - file_path="/a/x/y.py", - line=10, - column=0, - context_line="foo()", - relationship="call", - ), - ] - - result = _group_references_by_definition([definition], references) - - assert len(result) == 1 - assert result[0].definition == definition - assert len(result[0].references) == 2 - - def test_multiple_definitions(self): - """Multiple definitions should group by proximity.""" - def1 = DefinitionResult( - name="foo", - kind="function", - file_path="/a/b/c.py", - line=10, - end_line=20, - ) - def2 = DefinitionResult( - name="foo", - kind="function", - file_path="/x/y/z.py", - line=10, - end_line=20, - ) - - # Reference closer to def1 - ref1 = ReferenceResult( - file_path="/a/b/d.py", - line=5, - column=0, - context_line="foo()", - relationship="call", - ) - # Reference closer to def2 - ref2 = ReferenceResult( - file_path="/x/y/w.py", - line=10, - column=0, - context_line="foo()", - relationship="call", - ) - - result = _group_references_by_definition( - [def1, def2], [ref1, ref2], include_definition=True - ) - - assert len(result) == 2 - # Each definition should have the closer reference - def1_refs = [g for g in result if g.definition == def1][0].references - def2_refs = [g for g in result if g.definition == def2][0].references - - assert any(r.file_path == "/a/b/d.py" for r in def1_refs) - assert any(r.file_path == "/x/y/w.py" for r in def2_refs) - - def test_empty_definitions(self): - """Empty definitions should return empty result.""" - result = _group_references_by_definition([], []) - assert result == [] - - -class TestTransformToReferenceResult: - """Tests for _transform_to_reference_result helper.""" - - def test_normalizes_relationship_type(self, tmp_path): - """Test that relationship type is normalized.""" - test_file = tmp_path / "test.py" - test_file.write_text("def foo(): pass\n") - - # Create a mock raw reference - raw_ref = MagicMock() - raw_ref.file_path = str(test_file) - raw_ref.line = 1 - raw_ref.column = 0 - raw_ref.relationship_type = "calls" # Plural form - - result = _transform_to_reference_result(raw_ref) - - assert result.relationship == "call" # Normalized form - assert result.context_line == "def foo(): pass" - - -class TestFindReferences: - """Tests for find_references API function.""" - - def test_raises_for_invalid_project_root(self): - """Test that ValueError is raised for invalid project root.""" - with pytest.raises(ValueError, match="does not exist"): - find_references("/nonexistent/path", "some_symbol") - - @patch("codexlens.search.chain_search.ChainSearchEngine") - @patch("codexlens.storage.registry.RegistryStore") - @patch("codexlens.storage.path_mapper.PathMapper") - @patch("codexlens.config.Config") - def test_returns_grouped_references( - self, mock_config, mock_mapper, mock_registry, mock_engine_class, tmp_path - ): - """Test that find_references returns GroupedReferences.""" - # Setup mocks - mock_engine = MagicMock() - mock_engine_class.return_value = mock_engine - - # Mock symbol search (for definitions) - mock_symbol = MagicMock() - mock_symbol.name = "test_func" - mock_symbol.kind = "function" - mock_symbol.file = str(tmp_path / "test.py") - mock_symbol.range = (10, 20) - mock_engine.search_symbols.return_value = [mock_symbol] - - # Mock reference search - mock_ref = MagicMock() - mock_ref.file_path = str(tmp_path / "caller.py") - mock_ref.line = 5 - mock_ref.column = 0 - mock_ref.relationship_type = "call" - mock_engine.search_references.return_value = [mock_ref] - - # Create test files - test_file = tmp_path / "test.py" - test_file.write_text("def test_func():\n pass\n") - caller_file = tmp_path / "caller.py" - caller_file.write_text("test_func()\n") - - # Call find_references - result = find_references(str(tmp_path), "test_func") - - # Verify result structure - assert isinstance(result, list) - assert len(result) == 1 - assert isinstance(result[0], GroupedReferences) - assert result[0].definition.name == "test_func" - assert len(result[0].references) == 1 - - @patch("codexlens.search.chain_search.ChainSearchEngine") - @patch("codexlens.storage.registry.RegistryStore") - @patch("codexlens.storage.path_mapper.PathMapper") - @patch("codexlens.config.Config") - def test_respects_include_definition_false( - self, mock_config, mock_mapper, mock_registry, mock_engine_class, tmp_path - ): - """Test include_definition=False behavior.""" - mock_engine = MagicMock() - mock_engine_class.return_value = mock_engine - mock_engine.search_symbols.return_value = [] - mock_engine.search_references.return_value = [] - - result = find_references( - str(tmp_path), "test_func", include_definition=False - ) - - # Should still return a result with placeholder definition - assert len(result) == 1 - assert result[0].definition.name == "test_func" - - -class TestImports: - """Tests for module imports and exports.""" - - def test_find_references_exported_from_api(self): - """Test that find_references is exported from codexlens.api.""" - from codexlens.api import find_references as api_find_references - - assert callable(api_find_references) - - def test_models_exported_from_api(self): - """Test that result models are exported from codexlens.api.""" - from codexlens.api import ( - GroupedReferences, - ReferenceResult, - DefinitionResult, - ) - - assert GroupedReferences is not None - assert ReferenceResult is not None - assert DefinitionResult is not None diff --git a/codex-lens/tests/api/test_semantic_integration.py b/codex-lens/tests/api/test_semantic_integration.py deleted file mode 100644 index 3c54f82b..00000000 --- a/codex-lens/tests/api/test_semantic_integration.py +++ /dev/null @@ -1,264 +0,0 @@ -"""Integration tests for semantic.py API - fusion strategy routing and result transform. - -Tests cover: -- _execute_search: Strategy routing for rrf, binary, staged, hybrid (compat), dense_rerank -- _transform_results: Score extraction and kind filtering -""" - -from __future__ import annotations - -from pathlib import Path -from typing import List, Optional -from unittest.mock import MagicMock, Mock, patch - -import pytest - -from codexlens.api.models import SemanticResult -from codexlens.api.semantic import _execute_search, _transform_results -from codexlens.entities import SearchResult -from codexlens.search.chain_search import ( - ChainSearchEngine, - ChainSearchResult, - SearchOptions, - SearchStats, -) - - -# ============================================================================= -# Test Fixtures -# ============================================================================= - - -@pytest.fixture -def mock_engine(): - """Create mock ChainSearchEngine.""" - engine = MagicMock(spec=ChainSearchEngine) - return engine - - -@pytest.fixture -def mock_chain_result(): - """Create mock ChainSearchResult with sample data.""" - return ChainSearchResult( - query="test query", - results=[ - SearchResult( - path="auth.py", - score=0.9, - excerpt="def authenticate(user):", - symbol_name="authenticate", - symbol_kind="function", - start_line=10, - end_line=20, - ), - SearchResult( - path="login.py", - score=0.7, - excerpt="class LoginHandler:", - symbol_name="LoginHandler", - symbol_kind="class", - start_line=5, - end_line=50, - ), - ], - symbols=[], - stats=SearchStats(), - ) - - -@pytest.fixture -def mock_options(): - """Create mock SearchOptions.""" - return SearchOptions( - hybrid_mode=True, - enable_vector=True, - enable_fuzzy=True, - ) - - -# ============================================================================= -# Tests: _execute_search strategy routing -# ============================================================================= - - -class TestExecuteSearchStrategyRouting: - """Tests for _execute_search() fusion strategy routing.""" - - def test_fusion_strategy_rrf(self, mock_engine, mock_chain_result, mock_options): - """Default 'rrf' strategy should call engine.search().""" - mock_engine.search.return_value = mock_chain_result - - result = _execute_search( - engine=mock_engine, - query="test", - source_path=Path("/project"), - fusion_strategy="rrf", - options=mock_options, - limit=20, - ) - - mock_engine.search.assert_called_once() - assert isinstance(result, ChainSearchResult) - - def test_fusion_strategy_binary(self, mock_engine, mock_chain_result, mock_options): - """'binary' strategy should call engine.binary_cascade_search().""" - mock_engine.binary_cascade_search.return_value = mock_chain_result - - result = _execute_search( - engine=mock_engine, - query="test", - source_path=Path("/project"), - fusion_strategy="binary", - options=mock_options, - limit=20, - ) - - mock_engine.binary_cascade_search.assert_called_once() - # Verify k and coarse_k parameters - call_kwargs = mock_engine.binary_cascade_search.call_args - assert call_kwargs[1]["k"] == 20 - assert call_kwargs[1]["coarse_k"] == 100 # limit * 5 - - def test_fusion_strategy_staged(self, mock_engine, mock_chain_result, mock_options): - """'staged' strategy should call engine.staged_cascade_search().""" - mock_engine.staged_cascade_search.return_value = mock_chain_result - - result = _execute_search( - engine=mock_engine, - query="test", - source_path=Path("/project"), - fusion_strategy="staged", - options=mock_options, - limit=20, - ) - - mock_engine.staged_cascade_search.assert_called_once() - - def test_fusion_strategy_hybrid_compat( - self, mock_engine, mock_chain_result, mock_options - ): - """'hybrid' strategy should map to binary_rerank_cascade_search (backward compat).""" - mock_engine.binary_rerank_cascade_search.return_value = mock_chain_result - - result = _execute_search( - engine=mock_engine, - query="test", - source_path=Path("/project"), - fusion_strategy="hybrid", - options=mock_options, - limit=20, - ) - - mock_engine.binary_rerank_cascade_search.assert_called_once() - - def test_fusion_strategy_dense_rerank( - self, mock_engine, mock_chain_result, mock_options - ): - """'dense_rerank' strategy should call engine.search() (default fallback).""" - # In the current implementation, dense_rerank is not explicitly handled, - # so it falls through to the default (rrf) branch - mock_engine.search.return_value = mock_chain_result - - result = _execute_search( - engine=mock_engine, - query="test", - source_path=Path("/project"), - fusion_strategy="dense_rerank", - options=mock_options, - limit=20, - ) - - # dense_rerank falls to default (else branch -> engine.search) - mock_engine.search.assert_called_once() - - -# ============================================================================= -# Tests: _transform_results -# ============================================================================= - - -class TestTransformResults: - """Tests for _transform_results().""" - - def test_transform_results_basic(self): - """_transform_results should convert SearchResult to SemanticResult.""" - results = [ - SearchResult( - path="auth.py", - score=0.9, - excerpt="def authenticate(user):", - symbol_name="authenticate", - symbol_kind="function", - start_line=10, - end_line=20, - ), - SearchResult( - path="models.py", - score=0.7, - excerpt="class UserModel:", - symbol_name="UserModel", - symbol_kind="class", - start_line=1, - end_line=30, - ), - ] - - semantic_results = _transform_results( - results=results, - mode="fusion", - vector_weight=0.5, - structural_weight=0.3, - keyword_weight=0.2, - kind_filter=None, - include_match_reason=False, - query="authentication", - ) - - assert len(semantic_results) == 2 - assert all(isinstance(r, SemanticResult) for r in semantic_results) - - # Check first result - first = semantic_results[0] - assert first.fusion_score == 0.9 - assert first.symbol_name == "authenticate" - assert first.kind == "function" - assert first.file_path == "auth.py" - assert first.line == 10 - - # Should be sorted by fusion_score descending - scores = [r.fusion_score for r in semantic_results] - assert scores == sorted(scores, reverse=True) - - def test_transform_results_kind_filter(self): - """_transform_results should filter by kind when kind_filter is set.""" - results = [ - SearchResult( - path="auth.py", - score=0.9, - excerpt="def auth():", - symbol_name="auth", - symbol_kind="function", - ), - SearchResult( - path="models.py", - score=0.8, - excerpt="class User:", - symbol_name="User", - symbol_kind="class", - ), - ] - - # Filter to only functions - semantic_results = _transform_results( - results=results, - mode="fusion", - vector_weight=0.5, - structural_weight=0.3, - keyword_weight=0.2, - kind_filter=["function"], - include_match_reason=False, - query="test", - ) - - assert len(semantic_results) == 1 - assert semantic_results[0].kind == "function" diff --git a/codex-lens/tests/api/test_semantic_search.py b/codex-lens/tests/api/test_semantic_search.py deleted file mode 100644 index 02720f88..00000000 --- a/codex-lens/tests/api/test_semantic_search.py +++ /dev/null @@ -1,530 +0,0 @@ -"""Tests for semantic_search API.""" -import tempfile -from pathlib import Path -from unittest.mock import MagicMock, patch - -import pytest - -from codexlens.api import SemanticResult -from codexlens.api.semantic import ( - semantic_search, - _build_search_options, - _generate_match_reason, - _split_camel_case, - _transform_results, -) - - -class TestSemanticSearchFunctionSignature: - """Test that semantic_search has the correct function signature.""" - - def test_function_accepts_all_parameters(self): - """Verify function signature matches spec.""" - import inspect - sig = inspect.signature(semantic_search) - params = list(sig.parameters.keys()) - - expected_params = [ - "project_root", - "query", - "mode", - "vector_weight", - "structural_weight", - "keyword_weight", - "fusion_strategy", - "staged_stage2_mode", - "kind_filter", - "limit", - "include_match_reason", - ] - - assert params == expected_params - - def test_default_parameter_values(self): - """Verify default parameter values match spec.""" - import inspect - sig = inspect.signature(semantic_search) - - assert sig.parameters["mode"].default == "fusion" - assert sig.parameters["vector_weight"].default == 0.5 - assert sig.parameters["structural_weight"].default == 0.3 - assert sig.parameters["keyword_weight"].default == 0.2 - assert sig.parameters["fusion_strategy"].default == "rrf" - assert sig.parameters["staged_stage2_mode"].default is None - assert sig.parameters["kind_filter"].default is None - assert sig.parameters["limit"].default == 20 - assert sig.parameters["include_match_reason"].default is False - - -class TestBuildSearchOptions: - """Test _build_search_options helper function.""" - - def test_vector_mode_options(self): - """Test options for pure vector mode.""" - options = _build_search_options( - mode="vector", - vector_weight=1.0, - structural_weight=0.0, - keyword_weight=0.0, - limit=20, - ) - - assert options.hybrid_mode is True - assert options.enable_vector is True - assert options.pure_vector is True - assert options.enable_fuzzy is False - - def test_structural_mode_options(self): - """Test options for structural mode.""" - options = _build_search_options( - mode="structural", - vector_weight=0.0, - structural_weight=1.0, - keyword_weight=0.0, - limit=20, - ) - - assert options.hybrid_mode is True - assert options.enable_vector is False - assert options.enable_fuzzy is True - assert options.include_symbols is True - - def test_fusion_mode_options(self): - """Test options for fusion mode (default).""" - options = _build_search_options( - mode="fusion", - vector_weight=0.5, - structural_weight=0.3, - keyword_weight=0.2, - limit=20, - ) - - assert options.hybrid_mode is True - assert options.enable_vector is True # vector_weight > 0 - assert options.enable_fuzzy is True # keyword_weight > 0 - assert options.include_symbols is True # structural_weight > 0 - - -class TestTransformResults: - """Test _transform_results helper function.""" - - def test_transforms_basic_result(self): - """Test basic result transformation.""" - mock_result = MagicMock() - mock_result.path = "/project/src/auth.py" - mock_result.score = 0.85 - mock_result.excerpt = "def authenticate():" - mock_result.symbol_name = "authenticate" - mock_result.symbol_kind = "function" - mock_result.start_line = 10 - mock_result.symbol = None - mock_result.metadata = {} - - results = _transform_results( - results=[mock_result], - mode="fusion", - vector_weight=0.5, - structural_weight=0.3, - keyword_weight=0.2, - kind_filter=None, - include_match_reason=False, - query="auth", - ) - - assert len(results) == 1 - assert results[0].symbol_name == "authenticate" - assert results[0].kind == "function" - assert results[0].file_path == "/project/src/auth.py" - assert results[0].line == 10 - assert results[0].fusion_score == 0.85 - - def test_kind_filter_excludes_non_matching(self): - """Test that kind_filter excludes non-matching results.""" - mock_result = MagicMock() - mock_result.path = "/project/src/auth.py" - mock_result.score = 0.85 - mock_result.excerpt = "AUTH_TOKEN = 'secret'" - mock_result.symbol_name = "AUTH_TOKEN" - mock_result.symbol_kind = "variable" - mock_result.start_line = 5 - mock_result.symbol = None - mock_result.metadata = {} - - results = _transform_results( - results=[mock_result], - mode="fusion", - vector_weight=0.5, - structural_weight=0.3, - keyword_weight=0.2, - kind_filter=["function", "class"], # Exclude variable - include_match_reason=False, - query="auth", - ) - - assert len(results) == 0 - - def test_kind_filter_includes_matching(self): - """Test that kind_filter includes matching results.""" - mock_result = MagicMock() - mock_result.path = "/project/src/auth.py" - mock_result.score = 0.85 - mock_result.excerpt = "class AuthManager:" - mock_result.symbol_name = "AuthManager" - mock_result.symbol_kind = "class" - mock_result.start_line = 1 - mock_result.symbol = None - mock_result.metadata = {} - - results = _transform_results( - results=[mock_result], - mode="fusion", - vector_weight=0.5, - structural_weight=0.3, - keyword_weight=0.2, - kind_filter=["function", "class"], # Include class - include_match_reason=False, - query="auth", - ) - - assert len(results) == 1 - assert results[0].symbol_name == "AuthManager" - - def test_include_match_reason_generates_reason(self): - """Test that include_match_reason generates match reasons.""" - mock_result = MagicMock() - mock_result.path = "/project/src/auth.py" - mock_result.score = 0.85 - mock_result.excerpt = "def authenticate(user, password):" - mock_result.symbol_name = "authenticate" - mock_result.symbol_kind = "function" - mock_result.start_line = 10 - mock_result.symbol = None - mock_result.metadata = {} - - results = _transform_results( - results=[mock_result], - mode="fusion", - vector_weight=0.5, - structural_weight=0.3, - keyword_weight=0.2, - kind_filter=None, - include_match_reason=True, - query="authenticate", - ) - - assert len(results) == 1 - assert results[0].match_reason is not None - assert "authenticate" in results[0].match_reason.lower() - - -class TestGenerateMatchReason: - """Test _generate_match_reason helper function.""" - - def test_direct_name_match(self): - """Test match reason for direct name match.""" - reason = _generate_match_reason( - query="authenticate", - symbol_name="authenticate", - symbol_kind="function", - snippet="def authenticate(user): pass", - vector_score=0.8, - structural_score=None, - ) - - assert "authenticate" in reason.lower() - - def test_keyword_match(self): - """Test match reason for keyword match in snippet.""" - reason = _generate_match_reason( - query="password validation", - symbol_name="verify_user", - symbol_kind="function", - snippet="def verify_user(password): validate(password)", - vector_score=0.6, - structural_score=None, - ) - - assert "password" in reason.lower() or "validation" in reason.lower() - - def test_high_semantic_similarity(self): - """Test match reason mentions semantic similarity for high vector score.""" - reason = _generate_match_reason( - query="authentication", - symbol_name="login_handler", - symbol_kind="function", - snippet="def login_handler(): pass", - vector_score=0.85, - structural_score=None, - ) - - assert "semantic" in reason.lower() - - def test_returns_string_even_with_no_matches(self): - """Test that a reason string is always returned.""" - reason = _generate_match_reason( - query="xyz123", - symbol_name="abc456", - symbol_kind="function", - snippet="completely unrelated code", - vector_score=0.3, - structural_score=None, - ) - - assert isinstance(reason, str) - assert len(reason) > 0 - - -class TestSplitCamelCase: - """Test _split_camel_case helper function.""" - - def test_camel_case(self): - """Test splitting camelCase.""" - result = _split_camel_case("authenticateUser") - assert "authenticate" in result.lower() - assert "user" in result.lower() - - def test_pascal_case(self): - """Test splitting PascalCase.""" - result = _split_camel_case("AuthManager") - assert "auth" in result.lower() - assert "manager" in result.lower() - - def test_snake_case(self): - """Test splitting snake_case.""" - result = _split_camel_case("auth_manager") - assert "auth" in result.lower() - assert "manager" in result.lower() - - def test_mixed_case(self): - """Test splitting mixed case.""" - result = _split_camel_case("HTTPRequestHandler") - # Should handle acronyms - assert "http" in result.lower() or "request" in result.lower() - - -class TestSemanticResultDataclass: - """Test SemanticResult dataclass structure.""" - - def test_semantic_result_fields(self): - """Test SemanticResult has all required fields.""" - result = SemanticResult( - symbol_name="test", - kind="function", - file_path="/test.py", - line=1, - vector_score=0.8, - structural_score=0.6, - fusion_score=0.7, - snippet="def test(): pass", - match_reason="Test match", - ) - - assert result.symbol_name == "test" - assert result.kind == "function" - assert result.file_path == "/test.py" - assert result.line == 1 - assert result.vector_score == 0.8 - assert result.structural_score == 0.6 - assert result.fusion_score == 0.7 - assert result.snippet == "def test(): pass" - assert result.match_reason == "Test match" - - def test_semantic_result_optional_fields(self): - """Test SemanticResult with optional None fields.""" - result = SemanticResult( - symbol_name="test", - kind="function", - file_path="/test.py", - line=1, - vector_score=None, # Degraded - no vector index - structural_score=None, # Degraded - no relationships - fusion_score=0.5, - snippet="def test(): pass", - match_reason=None, # Not requested - ) - - assert result.vector_score is None - assert result.structural_score is None - assert result.match_reason is None - - def test_semantic_result_to_dict(self): - """Test SemanticResult.to_dict() filters None values.""" - result = SemanticResult( - symbol_name="test", - kind="function", - file_path="/test.py", - line=1, - vector_score=None, - structural_score=0.6, - fusion_score=0.7, - snippet="def test(): pass", - match_reason=None, - ) - - d = result.to_dict() - - assert "symbol_name" in d - assert "vector_score" not in d # None values filtered - assert "structural_score" in d - assert "match_reason" not in d # None values filtered - - -class TestFusionStrategyMapping: - """Test fusion_strategy parameter mapping via _execute_search.""" - - def test_rrf_strategy_calls_search(self): - """Test that rrf strategy maps to standard search.""" - from codexlens.api.semantic import _execute_search - - mock_engine = MagicMock() - mock_engine.search.return_value = MagicMock(results=[]) - mock_options = MagicMock() - - _execute_search( - engine=mock_engine, - query="test query", - source_path=Path("/test"), - fusion_strategy="rrf", - options=mock_options, - limit=20, - ) - - mock_engine.search.assert_called_once() - - def test_staged_strategy_calls_staged_cascade_search(self): - """Test that staged strategy maps to staged_cascade_search.""" - from codexlens.api.semantic import _execute_search - - mock_engine = MagicMock() - mock_engine.staged_cascade_search.return_value = MagicMock(results=[]) - mock_options = MagicMock() - - _execute_search( - engine=mock_engine, - query="test query", - source_path=Path("/test"), - fusion_strategy="staged", - options=mock_options, - limit=20, - ) - - mock_engine.staged_cascade_search.assert_called_once() - - def test_binary_strategy_calls_binary_cascade_search(self): - """Test that binary strategy maps to binary_cascade_search.""" - from codexlens.api.semantic import _execute_search - - mock_engine = MagicMock() - mock_engine.binary_cascade_search.return_value = MagicMock(results=[]) - mock_options = MagicMock() - - _execute_search( - engine=mock_engine, - query="test query", - source_path=Path("/test"), - fusion_strategy="binary", - options=mock_options, - limit=20, - ) - - mock_engine.binary_cascade_search.assert_called_once() - - def test_hybrid_strategy_maps_to_binary_rerank(self): - """Test that hybrid strategy maps to binary_rerank_cascade_search (backward compat).""" - from codexlens.api.semantic import _execute_search - - mock_engine = MagicMock() - mock_engine.binary_rerank_cascade_search.return_value = MagicMock(results=[]) - mock_options = MagicMock() - - _execute_search( - engine=mock_engine, - query="test query", - source_path=Path("/test"), - fusion_strategy="hybrid", - options=mock_options, - limit=20, - ) - - mock_engine.binary_rerank_cascade_search.assert_called_once() - - def test_unknown_strategy_defaults_to_rrf(self): - """Test that unknown strategy defaults to standard search (rrf).""" - from codexlens.api.semantic import _execute_search - - mock_engine = MagicMock() - mock_engine.search.return_value = MagicMock(results=[]) - mock_options = MagicMock() - - _execute_search( - engine=mock_engine, - query="test query", - source_path=Path("/test"), - fusion_strategy="unknown_strategy", - options=mock_options, - limit=20, - ) - - mock_engine.search.assert_called_once() - - -class TestGracefulDegradation: - """Test graceful degradation behavior.""" - - def test_vector_score_none_when_no_vector_index(self): - """Test vector_score=None when vector index unavailable.""" - mock_result = MagicMock() - mock_result.path = "/project/src/auth.py" - mock_result.score = 0.5 - mock_result.excerpt = "def auth(): pass" - mock_result.symbol_name = "auth" - mock_result.symbol_kind = "function" - mock_result.start_line = 1 - mock_result.symbol = None - mock_result.metadata = {} # No vector score in metadata - - results = _transform_results( - results=[mock_result], - mode="fusion", - vector_weight=0.5, - structural_weight=0.3, - keyword_weight=0.2, - kind_filter=None, - include_match_reason=False, - query="auth", - ) - - assert len(results) == 1 - # When no source_scores in metadata, vector_score should be None - assert results[0].vector_score is None - - def test_structural_score_extracted_from_fts(self): - """Test structural_score extracted from FTS scores.""" - mock_result = MagicMock() - mock_result.path = "/project/src/auth.py" - mock_result.score = 0.8 - mock_result.excerpt = "def auth(): pass" - mock_result.symbol_name = "auth" - mock_result.symbol_kind = "function" - mock_result.start_line = 1 - mock_result.symbol = None - mock_result.metadata = { - "source_scores": { - "exact": 0.9, - "fuzzy": 0.7, - } - } - - results = _transform_results( - results=[mock_result], - mode="fusion", - vector_weight=0.5, - structural_weight=0.3, - keyword_weight=0.2, - kind_filter=None, - include_match_reason=False, - query="auth", - ) - - assert len(results) == 1 - assert results[0].structural_score == 0.9 # max of exact/fuzzy diff --git a/codex-lens/tests/conftest.py b/codex-lens/tests/conftest.py deleted file mode 100644 index 40915fff..00000000 --- a/codex-lens/tests/conftest.py +++ /dev/null @@ -1,291 +0,0 @@ -"""Pytest configuration and shared fixtures for codex-lens tests. - -This module provides common fixtures and test utilities to reduce code duplication -across the test suite. Using fixtures ensures consistent test setup and makes tests -more maintainable. - -Common Fixtures: -- temp_dir: Temporary directory for test files -- sample_index_db: Sample index database with test data -- mock_config: Mock configuration object -- sample_code_files: Factory for creating sample code files -""" - -import sqlite3 -import shutil -import tempfile -import warnings -from pathlib import Path -from typing import Any, Dict - -import pytest - -warnings.filterwarnings( - "ignore", - message=r"'BaseCommand' is deprecated and will be removed in Click 9\.0\..*", - category=DeprecationWarning, -) -warnings.filterwarnings( - "ignore", - message=r"The '__version__' attribute is deprecated and will be removed in Click 9\.1\..*", - category=DeprecationWarning, -) - - -@pytest.fixture -def temp_dir(): - """Create a temporary directory for test files. - - The directory is automatically cleaned up after the test. - - Yields: - Path: Path to the temporary directory. - """ - temp_path = Path(tempfile.mkdtemp()) - yield temp_path - # Cleanup - if temp_path.exists(): - shutil.rmtree(temp_path) - - -@pytest.fixture -def sample_index_db(temp_dir): - """Create a sample index database with test data. - - The database has a basic schema with files and chunks tables - populated with sample data. - - Args: - temp_dir: Temporary directory fixture. - - Yields: - Path: Path to the sample index database. - """ - db_path = temp_dir / "_index.db" - - # Create database with basic schema - conn = sqlite3.connect(db_path) - cursor = conn.cursor() - - # Files table - cursor.execute(""" - CREATE TABLE files ( - id INTEGER PRIMARY KEY, - path TEXT NOT NULL UNIQUE, - content TEXT, - language TEXT, - hash TEXT, - indexed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - ) - """) - - # Insert sample files - sample_files = [ - ("test.py", "def hello():\n print('world')", "python", "hash1"), - ("test.js", "function hello() { console.log('world'); }", "javascript", "hash2"), - ("README.md", "# Test Project", "markdown", "hash3"), - ] - cursor.executemany( - "INSERT INTO files (path, content, language, hash) VALUES (?, ?, ?, ?)", - sample_files - ) - - conn.commit() - conn.close() - - yield db_path - - -@pytest.fixture -def mock_config(): - """Create a mock configuration object with default values. - - Returns: - Mock: Mock object with common config attributes. - """ - from unittest.mock import Mock - - config = Mock() - config.index_path = Path("/tmp/test_index") - config.chunk_size = 2000 - config.overlap = 200 - config.embedding_backend = "fastembed" - config.embedding_model = "code" - config.max_results = 10 - - return config - - -@pytest.fixture -def sample_code_factory(temp_dir): - """Factory for creating sample code files. - - Args: - temp_dir: Temporary directory fixture. - - Returns: - callable: Function that creates sample code files. - """ - def _create_file(filename: str, content: str, language: str = "python") -> Path: - """Create a sample code file. - - Args: - filename: Name of the file to create. - content: Content of the file. - language: Programming language (default: python). - - Returns: - Path: Path to the created file. - """ - file_path = temp_dir / filename - file_path.parent.mkdir(parents=True, exist_ok=True) - file_path.write_text(content) - return file_path - - return _create_file - - -@pytest.fixture -def sample_python_code(): - """Sample Python code for testing. - - Returns: - str: Sample Python code snippet. - """ - return ''' -def calculate_sum(a: int, b: int) -> int: - """Calculate the sum of two integers.""" - return a + b - -class Calculator: - """A simple calculator class.""" - - def __init__(self): - self.value = 0 - - def add(self, x: int) -> None: - """Add a value to the calculator.""" - self.value += x - -if __name__ == "__main__": - calc = Calculator() - calc.add(5) - print(f"Result: {calc.value}") -''' - - -@pytest.fixture -def sample_javascript_code(): - """Sample JavaScript code for testing. - - Returns: - str: Sample JavaScript code snippet. - """ - return ''' -// Simple utility functions -function add(a, b) { - return a + b; -} - -const Calculator = class { - constructor() { - this.value = 0; - } - - add(x) { - this.value += x; - } -}; - -// Example usage -const calc = new Calculator(); -calc.add(5); -console.log(`Result: ${calc.value}`); -''' - - -class CodeSampleFactory: - """Factory class for generating various code samples. - - This class provides methods to generate code samples in different - languages with various patterns (classes, functions, imports, etc.). - """ - - @staticmethod - def python_function(name: str = "example", docstring: bool = True) -> str: - """Generate a Python function sample. - - Args: - name: Function name. - docstring: Whether to include docstring. - - Returns: - str: Python function code. - """ - doc = f' """Example function."""\n' if docstring else '' - return f''' -def {name}(param1: str, param2: int = 10) -> str: -{doc} return param1 * param2 -'''.strip() - - @staticmethod - def python_class(name: str = "Example") -> str: - """Generate a Python class sample. - - Args: - name: Class name. - - Returns: - str: Python class code. - """ - return f''' -class {name}: - """Example class.""" - - def __init__(self, value: int = 0): - self.value = value - - def increment(self) -> None: - """Increment the value.""" - self.value += 1 -'''.strip() - - @staticmethod - def javascript_function(name: str = "example") -> str: - """Generate a JavaScript function sample. - - Args: - name: Function name. - - Returns: - str: JavaScript function code. - """ - return f'''function {name}(param1, param2 = 10) {{ - return param1 * param2; -}}'''.strip() - - @staticmethod - def typescript_interface(name: str = "Example") -> str: - """Generate a TypeScript interface sample. - - Args: - name: Interface name. - - Returns: - str: TypeScript interface code. - """ - return f'''interface {name} {{ - id: number; - name: string; - getValue(): number; -}}'''.strip() - - -@pytest.fixture -def code_sample_factory(): - """Create a code sample factory instance. - - Returns: - CodeSampleFactory: Factory for generating code samples. - """ - return CodeSampleFactory() diff --git a/codex-lens/tests/fix_sql.py b/codex-lens/tests/fix_sql.py deleted file mode 100644 index 55e66fa8..00000000 --- a/codex-lens/tests/fix_sql.py +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env python3 -"""Fix SQL statements in test files to match new schema.""" -import re -from pathlib import Path - -def fix_insert_statement(line): - """Fix INSERT statements to provide both name and full_path.""" - # Match pattern: (test_path, test_content, "python") - # or ("test/file1.py", "content1", "python") - pattern = r'\(([^,]+),\s*([^,]+),\s*([^)]+)\)' - - def replace_values(match): - path_var, content_var, lang_var = match.groups() - # If it's a variable, we need to extract name from it - # For now, use path_var for both name and full_path - return f'({path_var}.split("/")[-1] if "/" in {path_var} else {path_var}, {path_var}, {content_var}, {lang_var}, 1234567890.0)' - - # Check if this is an INSERT VALUES line - if 'INSERT INTO files' in line and 'VALUES' in line: - # Simple string values like ("test/file1.py", "content1", "python") - if re.search(r'\("[^"]+",\s*"[^"]+",\s*"[^"]+"\)', line): - def replace_str_values(match): - parts = match.group(0)[1:-1].split('", "') - if len(parts) == 3: - path = parts[0].strip('"') - content = parts[1] - lang = parts[2].strip('"') - name = path.split('/')[-1] - return f'("{name}", "{path}", "{content}", "{lang}", 1234567890.0)' - return match.group(0) - - line = re.sub(r'\("[^"]+",\s*"[^"]+",\s*"[^"]+"\)', replace_str_values, line) - - return line - -def main(): - test_files = [ - Path("test_dual_fts.py"), - Path("test_incremental_indexing.py"), - Path("test_hybrid_search_e2e.py") - ] - - for test_file in test_files: - if not test_file.exists(): - continue - - lines = test_file.read_text(encoding='utf-8').splitlines(keepends=True) - - # Fix tuple values in execute calls - new_lines = [] - i = 0 - while i < len(lines): - line = lines[i] - - # Check if this is an execute with VALUES and tuple on next line - if 'conn.execute(' in line or 'conn.executemany(' in line: - # Look ahead for VALUES pattern - if i + 2 < len(lines) and 'VALUES' in lines[i+1]: - # Check for tuple pattern on line after VALUES - if i + 2 < len(lines) and re.search(r'^\s*\([^)]+\)\s*$', lines[i+2]): - tuple_line = lines[i+2] - # Extract values: (test_path, test_content, "python") - match = re.search(r'\(([^,]+),\s*([^,]+),\s*"([^"]+)"\)', tuple_line) - if match: - var1, var2, var3 = match.groups() - var1 = var1.strip() - var2 = var2.strip() - # Create new tuple with name extraction - indent = re.match(r'^(\s*)', tuple_line).group(1) - new_tuple = f'{indent}({var1}.split("/")[-1], {var1}, {var2}, "{var3}", 1234567890.0)\n' - new_lines.append(line) - new_lines.append(lines[i+1]) - new_lines.append(new_tuple) - i += 3 - continue - - new_lines.append(line) - i += 1 - - test_file.write_text(''.join(new_lines), encoding='utf-8') - print(f"Fixed {test_file}") - -if __name__ == "__main__": - main() diff --git a/codex-lens/tests/integration/__init__.py b/codex-lens/tests/integration/__init__.py deleted file mode 100644 index 35c99c66..00000000 --- a/codex-lens/tests/integration/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Integration tests for CodexLens.""" diff --git a/codex-lens/tests/integration/test_lsp_search_integration.py b/codex-lens/tests/integration/test_lsp_search_integration.py deleted file mode 100644 index f6b68bc0..00000000 --- a/codex-lens/tests/integration/test_lsp_search_integration.py +++ /dev/null @@ -1,583 +0,0 @@ -"""Integration tests for HybridSearchEngine LSP graph search. - -Tests the _search_lsp_graph method which orchestrates: -1. Seed retrieval via vector/exact fallback chain -2. LSP graph expansion via LspBridge and LspGraphBuilder -3. Result deduplication and merging - -Test Priority: -- P0: Critical path tests (e2e success, fallback chain) -- P1: Important edge cases (no seeds, bridge failures) -- P2: Supplementary tests (deduplication) -""" - -from __future__ import annotations - -import asyncio -import logging -import tempfile -from pathlib import Path -from typing import Any, Dict, List, Optional -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - -from codexlens.entities import SearchResult -from codexlens.hybrid_search.data_structures import ( - CallHierarchyItem, - CodeAssociationGraph, - CodeSymbolNode, - Range, -) -from codexlens.search.hybrid_search import HybridSearchEngine - - -# ----------------------------------------------------------------------------- -# Fixtures -# ----------------------------------------------------------------------------- - - -@pytest.fixture -def tmp_index_path(tmp_path: Path) -> Path: - """Create a temporary index database path.""" - db_path = tmp_path / "_index.db" - # Create empty file to satisfy existence checks - db_path.write_bytes(b"") - return db_path - - -@pytest.fixture -def sample_search_result() -> SearchResult: - """Create a sample SearchResult for use as seed.""" - return SearchResult( - path="/path/to/file.py", - content="def auth_flow(): ...", - excerpt="def auth_flow(): ...", - start_line=10, - end_line=20, - symbol_name="auth_flow", - symbol_kind="function", - score=0.9, - ) - - -@pytest.fixture -def sample_search_result_2() -> SearchResult: - """Create a second sample SearchResult.""" - return SearchResult( - path="/path/to/other.py", - content="def init_db(): ...", - excerpt="def init_db(): ...", - start_line=5, - end_line=15, - symbol_name="init_db", - symbol_kind="function", - score=0.85, - ) - - -@pytest.fixture -def sample_code_symbol_node() -> CodeSymbolNode: - """Create a sample CodeSymbolNode for graph expansion.""" - return CodeSymbolNode( - id="/path/to/related.py:helper_func:30", - name="helper_func", - kind="function", - file_path="/path/to/related.py", - range=Range( - start_line=30, - start_character=0, - end_line=40, - end_character=0, - ), - raw_code="def helper_func(): pass", - docstring="Helper function", - ) - - -@pytest.fixture -def sample_code_symbol_node_2() -> CodeSymbolNode: - """Create another sample CodeSymbolNode.""" - return CodeSymbolNode( - id="/path/to/util.py:validate:50", - name="validate", - kind="function", - file_path="/path/to/util.py", - range=Range( - start_line=50, - start_character=0, - end_line=60, - end_character=0, - ), - raw_code="def validate(): pass", - docstring="Validation function", - ) - - -@pytest.fixture -def mock_search_engine() -> HybridSearchEngine: - """Create a HybridSearchEngine with default settings.""" - return HybridSearchEngine() - - -def create_mock_graph_with_seed_and_related( - seed_result: SearchResult, - related_nodes: List[CodeSymbolNode], -) -> CodeAssociationGraph: - """Helper to create a mock graph with seed and related nodes.""" - graph = CodeAssociationGraph() - - # Add seed node - seed_node_id = f"{seed_result.path}:{seed_result.symbol_name or 'unknown'}:{seed_result.start_line or 0}" - seed_node = CodeSymbolNode( - id=seed_node_id, - name=seed_result.symbol_name or "unknown", - kind=seed_result.symbol_kind or "unknown", - file_path=seed_result.path, - range=Range( - start_line=seed_result.start_line or 1, - start_character=0, - end_line=seed_result.end_line or 1, - end_character=0, - ), - ) - graph.add_node(seed_node) - - # Add related nodes - for node in related_nodes: - graph.add_node(node) - - return graph - - -# ----------------------------------------------------------------------------- -# P0: Critical Tests -# ----------------------------------------------------------------------------- - - -class TestP0CriticalLspSearch: - """P0 Critical: Core E2E tests for LSP graph search.""" - - def test_e2e_lsp_search_vector_seed_success( - self, - tmp_index_path: Path, - sample_search_result: SearchResult, - sample_code_symbol_node: CodeSymbolNode, - sample_code_symbol_node_2: CodeSymbolNode, - ) -> None: - """Test E2E LSP search with vector providing seed, returning graph-expanded results. - - Input: query="authentication flow" - Mock: _search_vector returns 1 SearchResult as seed - Mock: LspBridge/LspGraphBuilder returns 2 related symbols - Assert: Returns 2 new results (seed is filtered from final results) - """ - engine = HybridSearchEngine() - - # Create mock graph with seed and 2 related nodes - mock_graph = create_mock_graph_with_seed_and_related( - sample_search_result, - [sample_code_symbol_node, sample_code_symbol_node_2], - ) - - # Patch seed search methods - with patch.object( - engine, "_search_vector", return_value=[sample_search_result] - ) as mock_vector, patch.object( - engine, "_search_exact", return_value=[] - ): - # Patch LSP module at the import location - with patch.dict("sys.modules", {"codexlens.lsp": MagicMock()}): - # Patch the module-level HAS_LSP check - with patch("codexlens.search.hybrid_search.HAS_LSP", True): - # Create mock LspBridge class - mock_bridge_instance = AsyncMock() - mock_bridge_class = MagicMock() - mock_bridge_class.return_value.__aenter__ = AsyncMock( - return_value=mock_bridge_instance - ) - mock_bridge_class.return_value.__aexit__ = AsyncMock( - return_value=None - ) - - # Create mock LspGraphBuilder - async def mock_build(seeds, bridge): - return mock_graph - - mock_builder_instance = MagicMock() - mock_builder_instance.build_from_seeds = mock_build - mock_builder_class = MagicMock(return_value=mock_builder_instance) - - # Patch at module level - with patch( - "codexlens.search.hybrid_search.LspBridge", - mock_bridge_class, - ), patch( - "codexlens.search.hybrid_search.LspGraphBuilder", - mock_builder_class, - ): - results = engine._search_lsp_graph( - index_path=tmp_index_path, - query="authentication flow", - limit=10, - max_depth=1, - max_nodes=20, - ) - - # Verify vector search was called first - mock_vector.assert_called_once() - - # Should return 2 results (the two non-seed nodes) - assert len(results) == 2 - - # Verify seed is not in results - seed_node_id = f"{sample_search_result.path}:{sample_search_result.symbol_name or 'unknown'}:{sample_search_result.start_line or 0}" - result_node_ids = { - f"{r.path}:{r.symbol_name or 'unknown'}:{r.start_line or 0}" - for r in results - } - assert seed_node_id not in result_node_ids - - # Verify the returned results are the graph-expanded nodes - result_paths = {r.path for r in results} - assert sample_code_symbol_node.file_path in result_paths - assert sample_code_symbol_node_2.file_path in result_paths - - def test_seed_fallback_chain_vector_fails_fts_succeeds( - self, - tmp_index_path: Path, - sample_search_result: SearchResult, - sample_code_symbol_node: CodeSymbolNode, - ) -> None: - """Test seed fallback chain: vector -> exact. - - Input: query="init_db" - Mock: _search_vector returns [] - Mock: _search_exact returns 1 seed - Assert: Fallback chain called in order, uses exact's seed - """ - engine = HybridSearchEngine() - - call_order: List[str] = [] - - def track_vector(*args, **kwargs): - call_order.append("vector") - return [] - - def track_exact(*args, **kwargs): - call_order.append("exact") - return [sample_search_result] - - # Create mock graph - mock_graph = create_mock_graph_with_seed_and_related( - sample_search_result, - [sample_code_symbol_node], - ) - - with patch.object( - engine, "_search_vector", side_effect=track_vector - ) as mock_vector, patch.object( - engine, "_search_exact", side_effect=track_exact - ) as mock_exact: - with patch("codexlens.search.hybrid_search.HAS_LSP", True): - # Create mock LspBridge class - mock_bridge_instance = AsyncMock() - mock_bridge_class = MagicMock() - mock_bridge_class.return_value.__aenter__ = AsyncMock( - return_value=mock_bridge_instance - ) - mock_bridge_class.return_value.__aexit__ = AsyncMock( - return_value=None - ) - - # Create mock LspGraphBuilder - async def mock_build(seeds, bridge): - return mock_graph - - mock_builder_instance = MagicMock() - mock_builder_instance.build_from_seeds = mock_build - mock_builder_class = MagicMock(return_value=mock_builder_instance) - - with patch( - "codexlens.search.hybrid_search.LspBridge", - mock_bridge_class, - ), patch( - "codexlens.search.hybrid_search.LspGraphBuilder", - mock_builder_class, - ): - results = engine._search_lsp_graph( - index_path=tmp_index_path, - query="init_db", - limit=10, - max_depth=1, - max_nodes=20, - ) - - # Verify fallback chain order: vector -> exact - assert call_order == ["vector", "exact"] - - # Both methods should be called - mock_vector.assert_called_once() - mock_exact.assert_called_once() - - # Should return results from graph expansion (1 related node) - assert len(results) == 1 - - -# ----------------------------------------------------------------------------- -# P1: Important Tests -# ----------------------------------------------------------------------------- - - -class TestP1ImportantLspSearch: - """P1 Important: Edge case tests for LSP graph search.""" - - def test_e2e_lsp_search_no_seeds_found( - self, - tmp_index_path: Path, - ) -> None: - """Test LSP search when no seeds found from any source. - - Input: query="non_existent_symbol" - Mock: All seed search methods return [] - Assert: Returns [], LspBridge is not called - """ - engine = HybridSearchEngine() - - with patch.object( - engine, "_search_vector", return_value=[] - ) as mock_vector, patch.object( - engine, "_search_exact", return_value=[] - ) as mock_exact: - with patch("codexlens.search.hybrid_search.HAS_LSP", True): - # LspBridge should NOT be called when no seeds - mock_bridge_class = MagicMock() - - with patch( - "codexlens.search.hybrid_search.LspBridge", - mock_bridge_class, - ): - results = engine._search_lsp_graph( - index_path=tmp_index_path, - query="non_existent_symbol", - limit=10, - max_depth=1, - max_nodes=20, - ) - - # All search methods should be tried - mock_vector.assert_called_once() - mock_exact.assert_called_once() - - # Should return empty list - assert results == [] - - # LspBridge should not be instantiated (no seeds) - mock_bridge_class.assert_not_called() - - def test_e2e_lsp_search_bridge_fails( - self, - tmp_index_path: Path, - sample_search_result: SearchResult, - caplog: pytest.LogCaptureFixture, - ) -> None: - """Test graceful degradation when LspBridge connection fails. - - Mock: Seed search returns valid seed - Mock: LspBridge raises exception during expansion - Assert: Returns [], error handled gracefully - """ - engine = HybridSearchEngine() - - with patch.object( - engine, "_search_vector", return_value=[sample_search_result] - ): - with patch("codexlens.search.hybrid_search.HAS_LSP", True): - # Make LspBridge raise an error during async context - mock_bridge_class = MagicMock() - mock_bridge_class.return_value.__aenter__ = AsyncMock( - side_effect=Exception("Connection refused") - ) - mock_bridge_class.return_value.__aexit__ = AsyncMock( - return_value=None - ) - - mock_builder_class = MagicMock() - - with patch( - "codexlens.search.hybrid_search.LspBridge", - mock_bridge_class, - ), patch( - "codexlens.search.hybrid_search.LspGraphBuilder", - mock_builder_class, - ): - with caplog.at_level(logging.DEBUG): - results = engine._search_lsp_graph( - index_path=tmp_index_path, - query="authentication", - limit=10, - max_depth=1, - max_nodes=20, - ) - - # Should return empty list on failure - assert results == [] - - -# ----------------------------------------------------------------------------- -# P2: Supplementary Tests -# ----------------------------------------------------------------------------- - - -class TestP2SupplementaryLspSearch: - """P2 Supplementary: Deduplication and edge cases.""" - - def test_result_deduping_seed_not_returned( - self, - tmp_index_path: Path, - sample_search_result: SearchResult, - ) -> None: - """Test that seed results are deduplicated from final output. - - Mock: Seed search returns SearchResult(path="a.py", symbol_name="foo") - Mock: LspBridge also returns same symbol in graph - Assert: Final results do not contain duplicate seed symbol - """ - engine = HybridSearchEngine() - - # Create a different node that should be returned - different_node = CodeSymbolNode( - id="/different/path.py:other_func:100", - name="other_func", - kind="function", - file_path="/different/path.py", - range=Range( - start_line=100, - start_character=0, - end_line=110, - end_character=0, - ), - raw_code="def other_func(): pass", - docstring="Other function", - ) - - # Create mock graph with seed and one different node - mock_graph = create_mock_graph_with_seed_and_related( - sample_search_result, - [different_node], - ) - - with patch.object( - engine, "_search_vector", return_value=[sample_search_result] - ): - with patch("codexlens.search.hybrid_search.HAS_LSP", True): - mock_bridge_instance = AsyncMock() - mock_bridge_class = MagicMock() - mock_bridge_class.return_value.__aenter__ = AsyncMock( - return_value=mock_bridge_instance - ) - mock_bridge_class.return_value.__aexit__ = AsyncMock( - return_value=None - ) - - async def mock_build(seeds, bridge): - return mock_graph - - mock_builder_instance = MagicMock() - mock_builder_instance.build_from_seeds = mock_build - mock_builder_class = MagicMock(return_value=mock_builder_instance) - - with patch( - "codexlens.search.hybrid_search.LspBridge", - mock_bridge_class, - ), patch( - "codexlens.search.hybrid_search.LspGraphBuilder", - mock_builder_class, - ): - results = engine._search_lsp_graph( - index_path=tmp_index_path, - query="test query", - limit=10, - max_depth=1, - max_nodes=20, - ) - - # Should only return 1 result (the different node, not the seed) - assert len(results) == 1 - - # The seed should NOT be in results - result_paths = [r.path for r in results] - assert sample_search_result.path not in result_paths - - # The different node should be in results - assert "/different/path.py" in result_paths - - def test_lsp_not_available_returns_empty( - self, - tmp_index_path: Path, - ) -> None: - """Test that _search_lsp_graph returns [] when LSP dependencies unavailable.""" - engine = HybridSearchEngine() - - with patch("codexlens.search.hybrid_search.HAS_LSP", False): - results = engine._search_lsp_graph( - index_path=tmp_index_path, - query="test", - limit=10, - max_depth=1, - max_nodes=20, - ) - - assert results == [] - - def test_graph_with_no_new_nodes_returns_empty( - self, - tmp_index_path: Path, - sample_search_result: SearchResult, - ) -> None: - """Test when graph only contains seed nodes (no expansion).""" - engine = HybridSearchEngine() - - # Create graph with ONLY the seed node (no related nodes) - mock_graph = create_mock_graph_with_seed_and_related( - sample_search_result, - [], # No related nodes - ) - - with patch.object( - engine, "_search_vector", return_value=[sample_search_result] - ): - with patch("codexlens.search.hybrid_search.HAS_LSP", True): - mock_bridge_instance = AsyncMock() - mock_bridge_class = MagicMock() - mock_bridge_class.return_value.__aenter__ = AsyncMock( - return_value=mock_bridge_instance - ) - mock_bridge_class.return_value.__aexit__ = AsyncMock( - return_value=None - ) - - async def mock_build(seeds, bridge): - return mock_graph - - mock_builder_instance = MagicMock() - mock_builder_instance.build_from_seeds = mock_build - mock_builder_class = MagicMock(return_value=mock_builder_instance) - - with patch( - "codexlens.search.hybrid_search.LspBridge", - mock_bridge_class, - ), patch( - "codexlens.search.hybrid_search.LspGraphBuilder", - mock_builder_class, - ): - results = engine._search_lsp_graph( - index_path=tmp_index_path, - query="test", - limit=10, - max_depth=1, - max_nodes=20, - ) - - # Should return empty since all nodes are seeds (filtered out) - assert results == [] diff --git a/codex-lens/tests/lsp/__init__.py b/codex-lens/tests/lsp/__init__.py deleted file mode 100644 index 5366a486..00000000 --- a/codex-lens/tests/lsp/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Tests package for LSP module.""" diff --git a/codex-lens/tests/lsp/test_hover.py b/codex-lens/tests/lsp/test_hover.py deleted file mode 100644 index 4d77a043..00000000 --- a/codex-lens/tests/lsp/test_hover.py +++ /dev/null @@ -1,477 +0,0 @@ -"""Tests for hover provider.""" - -from __future__ import annotations - -import pytest -from pathlib import Path -from unittest.mock import Mock, MagicMock -import tempfile - -from codexlens.entities import Symbol - - -class TestHoverInfo: - """Test HoverInfo dataclass.""" - - def test_hover_info_import(self): - """HoverInfo can be imported.""" - pytest.importorskip("pygls") - pytest.importorskip("lsprotocol") - - from codexlens.lsp.providers import HoverInfo - - assert HoverInfo is not None - - def test_hover_info_fields(self): - """HoverInfo has all required fields.""" - pytest.importorskip("pygls") - - from codexlens.lsp.providers import HoverInfo - - info = HoverInfo( - name="my_function", - kind="function", - signature="def my_function(x: int) -> str:", - documentation="A test function.", - file_path="/test/file.py", - line_range=(10, 15), - ) - assert info.name == "my_function" - assert info.kind == "function" - assert info.signature == "def my_function(x: int) -> str:" - assert info.documentation == "A test function." - assert info.file_path == "/test/file.py" - assert info.line_range == (10, 15) - - def test_hover_info_optional_documentation(self): - """Documentation can be None.""" - pytest.importorskip("pygls") - - from codexlens.lsp.providers import HoverInfo - - info = HoverInfo( - name="func", - kind="function", - signature="def func():", - documentation=None, - file_path="/test.py", - line_range=(1, 2), - ) - assert info.documentation is None - - -class TestHoverProvider: - """Test HoverProvider class.""" - - def test_provider_import(self): - """HoverProvider can be imported.""" - pytest.importorskip("pygls") - - from codexlens.lsp.providers import HoverProvider - - assert HoverProvider is not None - - def test_returns_none_for_unknown_symbol(self): - """Returns None when symbol not found.""" - pytest.importorskip("pygls") - - from codexlens.lsp.providers import HoverProvider - - mock_index = Mock() - mock_index.search.return_value = [] - mock_registry = Mock() - - provider = HoverProvider(mock_index, mock_registry) - result = provider.get_hover_info("unknown_symbol") - - assert result is None - mock_index.search.assert_called_once_with( - name="unknown_symbol", limit=1, prefix_mode=False - ) - - def test_returns_none_for_non_exact_match(self): - """Returns None when search returns non-exact matches.""" - pytest.importorskip("pygls") - - from codexlens.lsp.providers import HoverProvider - - # Return a symbol with different name (prefix match but not exact) - mock_symbol = Mock() - mock_symbol.name = "my_function_extended" - mock_symbol.kind = "function" - mock_symbol.file = "/test/file.py" - mock_symbol.range = (10, 15) - - mock_index = Mock() - mock_index.search.return_value = [mock_symbol] - mock_registry = Mock() - - provider = HoverProvider(mock_index, mock_registry) - result = provider.get_hover_info("my_function") - - assert result is None - - def test_returns_hover_info_for_known_symbol(self): - """Returns HoverInfo for found symbol.""" - pytest.importorskip("pygls") - - from codexlens.lsp.providers import HoverProvider - - mock_symbol = Mock() - mock_symbol.name = "my_func" - mock_symbol.kind = "function" - mock_symbol.file = None # No file, will use fallback signature - mock_symbol.range = (10, 15) - - mock_index = Mock() - mock_index.search.return_value = [mock_symbol] - mock_registry = Mock() - - provider = HoverProvider(mock_index, mock_registry) - result = provider.get_hover_info("my_func") - - assert result is not None - assert result.name == "my_func" - assert result.kind == "function" - assert result.line_range == (10, 15) - assert result.signature == "function my_func" - - def test_extracts_signature_from_file(self): - """Extracts signature from actual file content.""" - pytest.importorskip("pygls") - - from codexlens.lsp.providers import HoverProvider - - # Create a temporary file with Python content - with tempfile.NamedTemporaryFile( - mode="w", suffix=".py", delete=False, encoding="utf-8" - ) as f: - f.write("# comment\n") - f.write("def test_function(x: int, y: str) -> bool:\n") - f.write(" return True\n") - temp_path = f.name - - try: - mock_symbol = Mock() - mock_symbol.name = "test_function" - mock_symbol.kind = "function" - mock_symbol.file = temp_path - mock_symbol.range = (2, 3) # Line 2 (1-based) - - mock_index = Mock() - mock_index.search.return_value = [mock_symbol] - - provider = HoverProvider(mock_index, None) - result = provider.get_hover_info("test_function") - - assert result is not None - assert "def test_function(x: int, y: str) -> bool:" in result.signature - finally: - Path(temp_path).unlink(missing_ok=True) - - def test_extracts_multiline_signature(self): - """Extracts multiline function signature.""" - pytest.importorskip("pygls") - - from codexlens.lsp.providers import HoverProvider - - # Create a temporary file with multiline signature - with tempfile.NamedTemporaryFile( - mode="w", suffix=".py", delete=False, encoding="utf-8" - ) as f: - f.write("def complex_function(\n") - f.write(" arg1: int,\n") - f.write(" arg2: str,\n") - f.write(") -> bool:\n") - f.write(" return True\n") - temp_path = f.name - - try: - mock_symbol = Mock() - mock_symbol.name = "complex_function" - mock_symbol.kind = "function" - mock_symbol.file = temp_path - mock_symbol.range = (1, 5) # Line 1 (1-based) - - mock_index = Mock() - mock_index.search.return_value = [mock_symbol] - - provider = HoverProvider(mock_index, None) - result = provider.get_hover_info("complex_function") - - assert result is not None - assert "def complex_function(" in result.signature - # Should capture multiline signature - assert "arg1: int" in result.signature - finally: - Path(temp_path).unlink(missing_ok=True) - - def test_handles_nonexistent_file_gracefully(self): - """Returns fallback signature when file doesn't exist.""" - pytest.importorskip("pygls") - - from codexlens.lsp.providers import HoverProvider - - mock_symbol = Mock() - mock_symbol.name = "my_func" - mock_symbol.kind = "function" - mock_symbol.file = "/nonexistent/path/file.py" - mock_symbol.range = (10, 15) - - mock_index = Mock() - mock_index.search.return_value = [mock_symbol] - - provider = HoverProvider(mock_index, None) - result = provider.get_hover_info("my_func") - - assert result is not None - assert result.signature == "function my_func" - - def test_handles_invalid_line_range(self): - """Returns fallback signature when line range is invalid.""" - pytest.importorskip("pygls") - - from codexlens.lsp.providers import HoverProvider - - with tempfile.NamedTemporaryFile( - mode="w", suffix=".py", delete=False, encoding="utf-8" - ) as f: - f.write("def test():\n") - f.write(" pass\n") - temp_path = f.name - - try: - mock_symbol = Mock() - mock_symbol.name = "test" - mock_symbol.kind = "function" - mock_symbol.file = temp_path - mock_symbol.range = (100, 105) # Line beyond file length - - mock_index = Mock() - mock_index.search.return_value = [mock_symbol] - - provider = HoverProvider(mock_index, None) - result = provider.get_hover_info("test") - - assert result is not None - assert result.signature == "function test" - finally: - Path(temp_path).unlink(missing_ok=True) - - -class TestFormatHoverMarkdown: - """Test markdown formatting.""" - - def test_format_python_signature(self): - """Formats Python signature with python code fence.""" - pytest.importorskip("pygls") - - from codexlens.lsp.providers import HoverInfo, HoverProvider - - info = HoverInfo( - name="func", - kind="function", - signature="def func(x: int) -> str:", - documentation=None, - file_path="/test/file.py", - line_range=(10, 15), - ) - mock_index = Mock() - provider = HoverProvider(mock_index, None) - - result = provider.format_hover_markdown(info) - - assert "```python" in result - assert "def func(x: int) -> str:" in result - assert "function" in result - assert "file.py" in result - assert "line 10" in result - - def test_format_javascript_signature(self): - """Formats JavaScript signature with javascript code fence.""" - pytest.importorskip("pygls") - - from codexlens.lsp.providers import HoverInfo, HoverProvider - - info = HoverInfo( - name="myFunc", - kind="function", - signature="function myFunc(x) {", - documentation=None, - file_path="/test/file.js", - line_range=(5, 10), - ) - mock_index = Mock() - provider = HoverProvider(mock_index, None) - - result = provider.format_hover_markdown(info) - - assert "```javascript" in result - assert "function myFunc(x) {" in result - - def test_format_typescript_signature(self): - """Formats TypeScript signature with typescript code fence.""" - pytest.importorskip("pygls") - - from codexlens.lsp.providers import HoverInfo, HoverProvider - - info = HoverInfo( - name="myFunc", - kind="function", - signature="function myFunc(x: number): string {", - documentation=None, - file_path="/test/file.ts", - line_range=(5, 10), - ) - mock_index = Mock() - provider = HoverProvider(mock_index, None) - - result = provider.format_hover_markdown(info) - - assert "```typescript" in result - - def test_format_with_documentation(self): - """Includes documentation when available.""" - pytest.importorskip("pygls") - - from codexlens.lsp.providers import HoverInfo, HoverProvider - - info = HoverInfo( - name="func", - kind="function", - signature="def func():", - documentation="This is a test function.", - file_path="/test/file.py", - line_range=(10, 15), - ) - mock_index = Mock() - provider = HoverProvider(mock_index, None) - - result = provider.format_hover_markdown(info) - - assert "This is a test function." in result - assert "---" in result # Separator before docs - - def test_format_without_documentation(self): - """Does not include documentation section when None.""" - pytest.importorskip("pygls") - - from codexlens.lsp.providers import HoverInfo, HoverProvider - - info = HoverInfo( - name="func", - kind="function", - signature="def func():", - documentation=None, - file_path="/test/file.py", - line_range=(10, 15), - ) - mock_index = Mock() - provider = HoverProvider(mock_index, None) - - result = provider.format_hover_markdown(info) - - # Should have one separator for location, not two - # The result should not have duplicate doc separator - lines = result.split("\n") - separator_count = sum(1 for line in lines if line.strip() == "---") - assert separator_count == 1 # Only location separator - - def test_format_unknown_extension(self): - """Uses empty code fence for unknown file extensions.""" - pytest.importorskip("pygls") - - from codexlens.lsp.providers import HoverInfo, HoverProvider - - info = HoverInfo( - name="func", - kind="function", - signature="func code here", - documentation=None, - file_path="/test/file.xyz", - line_range=(1, 2), - ) - mock_index = Mock() - provider = HoverProvider(mock_index, None) - - result = provider.format_hover_markdown(info) - - # Should have code fence without language specifier - assert "```\n" in result or "```xyz" not in result - - def test_format_class_symbol(self): - """Formats class symbol correctly.""" - pytest.importorskip("pygls") - - from codexlens.lsp.providers import HoverInfo, HoverProvider - - info = HoverInfo( - name="MyClass", - kind="class", - signature="class MyClass:", - documentation=None, - file_path="/test/file.py", - line_range=(1, 20), - ) - mock_index = Mock() - provider = HoverProvider(mock_index, None) - - result = provider.format_hover_markdown(info) - - assert "class MyClass:" in result - assert "*class*" in result - assert "line 1" in result - - def test_format_empty_file_path(self): - """Handles empty file path gracefully.""" - pytest.importorskip("pygls") - - from codexlens.lsp.providers import HoverInfo, HoverProvider - - info = HoverInfo( - name="func", - kind="function", - signature="def func():", - documentation=None, - file_path="", - line_range=(1, 2), - ) - mock_index = Mock() - provider = HoverProvider(mock_index, None) - - result = provider.format_hover_markdown(info) - - assert "unknown" in result or "```" in result - - -class TestHoverProviderRegistry: - """Test HoverProvider with registry integration.""" - - def test_provider_accepts_none_registry(self): - """HoverProvider works without registry.""" - pytest.importorskip("pygls") - - from codexlens.lsp.providers import HoverProvider - - mock_index = Mock() - mock_index.search.return_value = [] - - provider = HoverProvider(mock_index, None) - result = provider.get_hover_info("test") - - assert result is None - assert provider.registry is None - - def test_provider_stores_registry(self): - """HoverProvider stores registry reference.""" - pytest.importorskip("pygls") - - from codexlens.lsp.providers import HoverProvider - - mock_index = Mock() - mock_registry = Mock() - - provider = HoverProvider(mock_index, mock_registry) - - assert provider.global_index is mock_index - assert provider.registry is mock_registry diff --git a/codex-lens/tests/lsp/test_lsp_edge_cases.py b/codex-lens/tests/lsp/test_lsp_edge_cases.py deleted file mode 100644 index 796b28cd..00000000 --- a/codex-lens/tests/lsp/test_lsp_edge_cases.py +++ /dev/null @@ -1,101 +0,0 @@ -"""LSP Edge Case Tests. - -This module tests edge cases and error conditions in LSP (Language Server Protocol) -operations, including timeout handling, protocol errors, and connection failures. - -Test Coverage: -- Timeout scenarios for LSP operations -- Protocol errors and malformed responses -- Connection failures and recovery -- Concurrent request handling -""" - -import pytest -from pathlib import Path -from unittest.mock import Mock, patch, MagicMock -import time - - -class TestLSPTimeouts: - """Test timeout handling in LSP operations.""" - - def test_hover_request_timeout(self): - """Test that hover requests timeout appropriately after configured duration.""" - # This is a placeholder for actual timeout testing - # Implementation requires mocking LSP client with delayed response - pytest.skip("Requires LSP server fixture setup") - - def test_definition_request_timeout(self): - """Test that go-to-definition requests timeout appropriately.""" - pytest.skip("Requires LSP server fixture setup") - - def test_references_request_timeout(self): - """Test that find-references requests timeout appropriately.""" - pytest.skip("Requires LSP server fixture setup") - - def test_concurrent_requests_with_timeout(self): - """Test behavior when multiple requests exceed timeout threshold.""" - pytest.skip("Requires LSP server fixture setup") - - -class TestLSPProtocolErrors: - """Test handling of LSP protocol errors.""" - - def test_malformed_json_response(self): - """Test handling of malformed JSON in LSP responses.""" - pytest.skip("Requires LSP client fixture") - - def test_invalid_method_error(self): - """Test handling of unknown/invalid method calls.""" - pytest.skip("Requires LSP client fixture") - - def test_missing_required_params(self): - """Test handling of responses with missing required parameters.""" - pytest.skip("Requires LSP client fixture") - - def test_null_result_handling(self): - """Test that null results from LSP are handled gracefully.""" - pytest.skip("Requires LSP client fixture") - - -class TestLSPConnectionFailures: - """Test LSP connection failure scenarios.""" - - def test_server_not_found(self): - """Test behavior when LSP server is not available.""" - pytest.skip("Requires LSP client fixture") - - def test_connection_dropped_mid_request(self): - """Test handling of dropped connections during active requests.""" - pytest.skip("Requires LSP client fixture") - - def test_connection_retry_logic(self): - """Test that connection retry logic works as expected.""" - pytest.skip("Requires LSP client fixture") - - def test_server_startup_failure(self): - """Test handling of LSP server startup failures.""" - pytest.skip("Requires LSP server fixture") - - -class TestLSPResourceLimits: - """Test LSP behavior under resource constraints.""" - - def test_large_file_handling(self): - """Test LSP operations on very large source files.""" - pytest.skip("Requires test file fixtures") - - def test_memory_pressure(self): - """Test LSP behavior under memory pressure.""" - pytest.skip("Requires memory simulation") - - def test_concurrent_request_limits(self): - """Test handling of too many concurrent LSP requests.""" - pytest.skip("Requires LSP client fixture") - - -# TODO: Implement actual tests using pytest fixtures and LSP mock objects -# The test infrastructure needs to be set up with: -# - LSP server fixture (maybe using pygls test server) -# - LSP client fixture with configurable delays/errors -# - Test file fixtures with various code patterns diff --git a/codex-lens/tests/lsp/test_packaging_metadata.py b/codex-lens/tests/lsp/test_packaging_metadata.py deleted file mode 100644 index b51d0d50..00000000 --- a/codex-lens/tests/lsp/test_packaging_metadata.py +++ /dev/null @@ -1,27 +0,0 @@ -"""Packaging metadata tests for codex-lens (LSP/semantic extras).""" - -from __future__ import annotations - -from pathlib import Path - - -def _read_pyproject() -> str: - repo_root = Path(__file__).resolve().parents[2] - return (repo_root / "pyproject.toml").read_text(encoding="utf-8") - - -def test_lsp_script_entrypoint_points_to_server_main() -> None: - pyproject = _read_pyproject() - assert 'codexlens-lsp = "codexlens.lsp.server:main"' in pyproject - - -def test_semantic_extras_do_not_pin_yanked_fastembed_020() -> None: - pyproject = _read_pyproject() - assert "fastembed~=0.2.0" not in pyproject - assert "fastembed~=0.2.1" in pyproject - - -def test_click_dependency_is_explicitly_guarded() -> None: - pyproject = _read_pyproject() - assert "click>=8.0.0,<9" in pyproject - diff --git a/codex-lens/tests/lsp/test_references.py b/codex-lens/tests/lsp/test_references.py deleted file mode 100644 index 78e04081..00000000 --- a/codex-lens/tests/lsp/test_references.py +++ /dev/null @@ -1,497 +0,0 @@ -"""Tests for reference search functionality. - -This module tests the ReferenceResult dataclass and search_references method -in ChainSearchEngine, as well as the updated lsp_references handler. -""" - -from __future__ import annotations - -import pytest -from pathlib import Path -from unittest.mock import Mock, MagicMock, patch -import sqlite3 -import tempfile -import os - - -class TestReferenceResult: - """Test ReferenceResult dataclass.""" - - def test_reference_result_fields(self): - """ReferenceResult has all required fields.""" - from codexlens.search.chain_search import ReferenceResult - - ref = ReferenceResult( - file_path="/test/file.py", - line=10, - column=5, - context="def foo():", - relationship_type="call", - ) - assert ref.file_path == "/test/file.py" - assert ref.line == 10 - assert ref.column == 5 - assert ref.context == "def foo():" - assert ref.relationship_type == "call" - - def test_reference_result_with_empty_context(self): - """ReferenceResult can have empty context.""" - from codexlens.search.chain_search import ReferenceResult - - ref = ReferenceResult( - file_path="/test/file.py", - line=1, - column=0, - context="", - relationship_type="import", - ) - assert ref.context == "" - - def test_reference_result_different_relationship_types(self): - """ReferenceResult supports different relationship types.""" - from codexlens.search.chain_search import ReferenceResult - - types = ["call", "import", "inheritance", "implementation", "usage"] - for rel_type in types: - ref = ReferenceResult( - file_path="/test/file.py", - line=1, - column=0, - context="test", - relationship_type=rel_type, - ) - assert ref.relationship_type == rel_type - - -class TestExtractContext: - """Test the _extract_context helper method.""" - - def test_extract_context_middle_of_file(self): - """Extract context from middle of file.""" - from codexlens.search.chain_search import ChainSearchEngine, ReferenceResult - - content = "\n".join([ - "line 1", - "line 2", - "line 3", - "line 4", # target line - "line 5", - "line 6", - "line 7", - ]) - - # Create minimal mock engine to test _extract_context - mock_registry = Mock() - mock_mapper = Mock() - - engine = ChainSearchEngine(mock_registry, mock_mapper) - context = engine._extract_context(content, line=4, context_lines=2) - - assert "line 2" in context - assert "line 3" in context - assert "line 4" in context - assert "line 5" in context - assert "line 6" in context - - def test_extract_context_start_of_file(self): - """Extract context at start of file.""" - from codexlens.search.chain_search import ChainSearchEngine - - content = "\n".join([ - "line 1", # target - "line 2", - "line 3", - "line 4", - ]) - - mock_registry = Mock() - mock_mapper = Mock() - - engine = ChainSearchEngine(mock_registry, mock_mapper) - context = engine._extract_context(content, line=1, context_lines=2) - - assert "line 1" in context - assert "line 2" in context - assert "line 3" in context - - def test_extract_context_end_of_file(self): - """Extract context at end of file.""" - from codexlens.search.chain_search import ChainSearchEngine - - content = "\n".join([ - "line 1", - "line 2", - "line 3", - "line 4", # target - ]) - - mock_registry = Mock() - mock_mapper = Mock() - - engine = ChainSearchEngine(mock_registry, mock_mapper) - context = engine._extract_context(content, line=4, context_lines=2) - - assert "line 2" in context - assert "line 3" in context - assert "line 4" in context - - def test_extract_context_empty_content(self): - """Extract context from empty content.""" - from codexlens.search.chain_search import ChainSearchEngine - - mock_registry = Mock() - mock_mapper = Mock() - - engine = ChainSearchEngine(mock_registry, mock_mapper) - context = engine._extract_context("", line=1, context_lines=3) - - assert context == "" - - def test_extract_context_invalid_line(self): - """Extract context with invalid line number.""" - from codexlens.search.chain_search import ChainSearchEngine - - content = "line 1\nline 2\nline 3" - - mock_registry = Mock() - mock_mapper = Mock() - - engine = ChainSearchEngine(mock_registry, mock_mapper) - - # Line 0 (invalid) - assert engine._extract_context(content, line=0, context_lines=1) == "" - - # Line beyond end - assert engine._extract_context(content, line=100, context_lines=1) == "" - - -class TestSearchReferences: - """Test search_references method.""" - - def test_returns_empty_for_no_source_path_and_no_registry(self): - """Returns empty list when no source path and registry has no mappings.""" - from codexlens.search.chain_search import ChainSearchEngine - - mock_registry = Mock() - mock_registry.list_mappings.return_value = [] - mock_mapper = Mock() - - engine = ChainSearchEngine(mock_registry, mock_mapper) - results = engine.search_references("test_symbol") - - assert results == [] - - def test_returns_empty_for_no_indexes(self): - """Returns empty list when no indexes found.""" - from codexlens.search.chain_search import ChainSearchEngine - - mock_registry = Mock() - mock_mapper = Mock() - mock_mapper.source_to_index_db.return_value = Path("/nonexistent/_index.db") - - engine = ChainSearchEngine(mock_registry, mock_mapper) - - with patch.object(engine, "_find_start_index", return_value=None): - results = engine.search_references("test_symbol", Path("/some/path")) - - assert results == [] - - def test_deduplicates_results(self): - """Removes duplicate file:line references.""" - from codexlens.search.chain_search import ChainSearchEngine, ReferenceResult - - mock_registry = Mock() - mock_mapper = Mock() - - engine = ChainSearchEngine(mock_registry, mock_mapper) - - # Create a temporary database with duplicate relationships - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "_index.db" - conn = sqlite3.connect(str(db_path)) - conn.executescript(""" - CREATE TABLE files ( - id INTEGER PRIMARY KEY, - path TEXT NOT NULL, - language TEXT NOT NULL, - content TEXT NOT NULL - ); - CREATE TABLE symbols ( - id INTEGER PRIMARY KEY, - file_id INTEGER NOT NULL, - name TEXT NOT NULL, - kind TEXT NOT NULL, - start_line INTEGER NOT NULL, - end_line INTEGER NOT NULL - ); - CREATE TABLE code_relationships ( - id INTEGER PRIMARY KEY, - source_symbol_id INTEGER NOT NULL, - target_qualified_name TEXT NOT NULL, - relationship_type TEXT NOT NULL, - source_line INTEGER NOT NULL, - target_file TEXT - ); - - INSERT INTO files VALUES (1, '/test/file.py', 'python', 'def test(): pass'); - INSERT INTO symbols VALUES (1, 1, 'test_func', 'function', 1, 1); - INSERT INTO code_relationships VALUES (1, 1, 'target_func', 'call', 10, NULL); - INSERT INTO code_relationships VALUES (2, 1, 'target_func', 'call', 10, NULL); - """) - conn.commit() - conn.close() - - with patch.object(engine, "_find_start_index", return_value=db_path): - with patch.object(engine, "_collect_index_paths", return_value=[db_path]): - results = engine.search_references("target_func", Path(tmpdir)) - - # Should only have 1 result due to deduplication - assert len(results) == 1 - assert results[0].line == 10 - - def test_sorts_by_file_and_line(self): - """Results sorted by file path then line number.""" - from codexlens.search.chain_search import ChainSearchEngine, ReferenceResult - - mock_registry = Mock() - mock_mapper = Mock() - - engine = ChainSearchEngine(mock_registry, mock_mapper) - - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "_index.db" - conn = sqlite3.connect(str(db_path)) - conn.executescript(""" - CREATE TABLE files ( - id INTEGER PRIMARY KEY, - path TEXT NOT NULL, - language TEXT NOT NULL, - content TEXT NOT NULL - ); - CREATE TABLE symbols ( - id INTEGER PRIMARY KEY, - file_id INTEGER NOT NULL, - name TEXT NOT NULL, - kind TEXT NOT NULL, - start_line INTEGER NOT NULL, - end_line INTEGER NOT NULL - ); - CREATE TABLE code_relationships ( - id INTEGER PRIMARY KEY, - source_symbol_id INTEGER NOT NULL, - target_qualified_name TEXT NOT NULL, - relationship_type TEXT NOT NULL, - source_line INTEGER NOT NULL, - target_file TEXT - ); - - INSERT INTO files VALUES (1, '/test/b_file.py', 'python', 'content'); - INSERT INTO files VALUES (2, '/test/a_file.py', 'python', 'content'); - INSERT INTO symbols VALUES (1, 1, 'func1', 'function', 1, 1); - INSERT INTO symbols VALUES (2, 2, 'func2', 'function', 1, 1); - INSERT INTO code_relationships VALUES (1, 1, 'target', 'call', 20, NULL); - INSERT INTO code_relationships VALUES (2, 1, 'target', 'call', 10, NULL); - INSERT INTO code_relationships VALUES (3, 2, 'target', 'call', 5, NULL); - """) - conn.commit() - conn.close() - - with patch.object(engine, "_find_start_index", return_value=db_path): - with patch.object(engine, "_collect_index_paths", return_value=[db_path]): - results = engine.search_references("target", Path(tmpdir)) - - # Should be sorted: a_file.py:5, b_file.py:10, b_file.py:20 - assert len(results) == 3 - assert results[0].file_path == "/test/a_file.py" - assert results[0].line == 5 - assert results[1].file_path == "/test/b_file.py" - assert results[1].line == 10 - assert results[2].file_path == "/test/b_file.py" - assert results[2].line == 20 - - def test_respects_limit(self): - """Returns at most limit results.""" - from codexlens.search.chain_search import ChainSearchEngine - - mock_registry = Mock() - mock_mapper = Mock() - - engine = ChainSearchEngine(mock_registry, mock_mapper) - - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "_index.db" - conn = sqlite3.connect(str(db_path)) - conn.executescript(""" - CREATE TABLE files ( - id INTEGER PRIMARY KEY, - path TEXT NOT NULL, - language TEXT NOT NULL, - content TEXT NOT NULL - ); - CREATE TABLE symbols ( - id INTEGER PRIMARY KEY, - file_id INTEGER NOT NULL, - name TEXT NOT NULL, - kind TEXT NOT NULL, - start_line INTEGER NOT NULL, - end_line INTEGER NOT NULL - ); - CREATE TABLE code_relationships ( - id INTEGER PRIMARY KEY, - source_symbol_id INTEGER NOT NULL, - target_qualified_name TEXT NOT NULL, - relationship_type TEXT NOT NULL, - source_line INTEGER NOT NULL, - target_file TEXT - ); - - INSERT INTO files VALUES (1, '/test/file.py', 'python', 'content'); - INSERT INTO symbols VALUES (1, 1, 'func', 'function', 1, 1); - """) - # Insert many relationships - for i in range(50): - conn.execute( - "INSERT INTO code_relationships VALUES (?, 1, 'target', 'call', ?, NULL)", - (i + 1, i + 1) - ) - conn.commit() - conn.close() - - with patch.object(engine, "_find_start_index", return_value=db_path): - with patch.object(engine, "_collect_index_paths", return_value=[db_path]): - results = engine.search_references("target", Path(tmpdir), limit=10) - - assert len(results) == 10 - - def test_matches_qualified_name(self): - """Matches symbols by qualified name suffix.""" - from codexlens.search.chain_search import ChainSearchEngine - - mock_registry = Mock() - mock_mapper = Mock() - - engine = ChainSearchEngine(mock_registry, mock_mapper) - - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "_index.db" - conn = sqlite3.connect(str(db_path)) - conn.executescript(""" - CREATE TABLE files ( - id INTEGER PRIMARY KEY, - path TEXT NOT NULL, - language TEXT NOT NULL, - content TEXT NOT NULL - ); - CREATE TABLE symbols ( - id INTEGER PRIMARY KEY, - file_id INTEGER NOT NULL, - name TEXT NOT NULL, - kind TEXT NOT NULL, - start_line INTEGER NOT NULL, - end_line INTEGER NOT NULL - ); - CREATE TABLE code_relationships ( - id INTEGER PRIMARY KEY, - source_symbol_id INTEGER NOT NULL, - target_qualified_name TEXT NOT NULL, - relationship_type TEXT NOT NULL, - source_line INTEGER NOT NULL, - target_file TEXT - ); - - INSERT INTO files VALUES (1, '/test/file.py', 'python', 'content'); - INSERT INTO symbols VALUES (1, 1, 'caller', 'function', 1, 1); - -- Fully qualified name - INSERT INTO code_relationships VALUES (1, 1, 'module.submodule.target_func', 'call', 10, NULL); - -- Simple name - INSERT INTO code_relationships VALUES (2, 1, 'target_func', 'call', 20, NULL); - """) - conn.commit() - conn.close() - - with patch.object(engine, "_find_start_index", return_value=db_path): - with patch.object(engine, "_collect_index_paths", return_value=[db_path]): - results = engine.search_references("target_func", Path(tmpdir)) - - # Should find both references - assert len(results) == 2 - - -class TestLspReferencesHandler: - """Test the LSP references handler.""" - - def test_handler_uses_search_engine(self): - """Handler uses search_engine.search_references when available.""" - pytest.importorskip("pygls") - pytest.importorskip("lsprotocol") - - from lsprotocol import types as lsp - from codexlens.lsp.handlers import _path_to_uri - from codexlens.search.chain_search import ReferenceResult - - # Create mock references - mock_references = [ - ReferenceResult( - file_path="/test/file1.py", - line=10, - column=5, - context="def foo():", - relationship_type="call", - ), - ReferenceResult( - file_path="/test/file2.py", - line=20, - column=0, - context="import foo", - relationship_type="import", - ), - ] - - # Verify conversion to LSP Location - locations = [] - for ref in mock_references: - locations.append( - lsp.Location( - uri=_path_to_uri(ref.file_path), - range=lsp.Range( - start=lsp.Position( - line=max(0, ref.line - 1), - character=ref.column, - ), - end=lsp.Position( - line=max(0, ref.line - 1), - character=ref.column + len("foo"), - ), - ), - ) - ) - - assert len(locations) == 2 - # First reference at line 10 (0-indexed = 9) - assert locations[0].range.start.line == 9 - assert locations[0].range.start.character == 5 - # Second reference at line 20 (0-indexed = 19) - assert locations[1].range.start.line == 19 - assert locations[1].range.start.character == 0 - - def test_handler_falls_back_to_global_index(self): - """Handler falls back to global_index when search_engine unavailable.""" - pytest.importorskip("pygls") - pytest.importorskip("lsprotocol") - - from codexlens.lsp.handlers import symbol_to_location - from codexlens.entities import Symbol - - # Test fallback path converts Symbol to Location - symbol = Symbol( - name="test_func", - kind="function", - range=(10, 15), - file="/test/file.py", - ) - - location = symbol_to_location(symbol) - assert location is not None - # LSP uses 0-based lines - assert location.range.start.line == 9 - assert location.range.end.line == 14 diff --git a/codex-lens/tests/lsp/test_server.py b/codex-lens/tests/lsp/test_server.py deleted file mode 100644 index a1b8000d..00000000 --- a/codex-lens/tests/lsp/test_server.py +++ /dev/null @@ -1,210 +0,0 @@ -"""Tests for codex-lens LSP server.""" - -from __future__ import annotations - -import pytest -from pathlib import Path -from unittest.mock import MagicMock, patch - -from codexlens.entities import Symbol - - -class TestCodexLensLanguageServer: - """Tests for CodexLensLanguageServer.""" - - def test_server_import(self): - """Test that server module can be imported.""" - pytest.importorskip("pygls") - pytest.importorskip("lsprotocol") - - from codexlens.lsp.server import CodexLensLanguageServer, server - - assert CodexLensLanguageServer is not None - assert server is not None - assert server.name == "codexlens-lsp" - - def test_server_initialization(self): - """Test server instance creation.""" - pytest.importorskip("pygls") - - from codexlens.lsp.server import CodexLensLanguageServer - - ls = CodexLensLanguageServer() - assert ls.registry is None - assert ls.mapper is None - assert ls.global_index is None - assert ls.search_engine is None - assert ls.workspace_root is None - - -class TestDefinitionHandler: - """Tests for definition handler.""" - - def test_definition_lookup(self): - """Test definition lookup returns location for known symbol.""" - pytest.importorskip("pygls") - pytest.importorskip("lsprotocol") - - from lsprotocol import types as lsp - from codexlens.lsp.handlers import symbol_to_location - - symbol = Symbol( - name="test_function", - kind="function", - range=(10, 15), - file="/path/to/file.py", - ) - - location = symbol_to_location(symbol) - - assert location is not None - assert isinstance(location, lsp.Location) - # LSP uses 0-based lines - assert location.range.start.line == 9 - assert location.range.end.line == 14 - - def test_definition_no_file(self): - """Test definition lookup returns None for symbol without file.""" - pytest.importorskip("pygls") - - from codexlens.lsp.handlers import symbol_to_location - - symbol = Symbol( - name="test_function", - kind="function", - range=(10, 15), - file=None, - ) - - location = symbol_to_location(symbol) - assert location is None - - -class TestCompletionHandler: - """Tests for completion handler.""" - - def test_get_prefix_at_position(self): - """Test extracting prefix at cursor position.""" - pytest.importorskip("pygls") - - from codexlens.lsp.handlers import _get_prefix_at_position - - document_text = "def hello_world():\n print(hel" - - # Cursor at end of "hel" - prefix = _get_prefix_at_position(document_text, 1, 14) - assert prefix == "hel" - - # Cursor at beginning of line (after whitespace) - prefix = _get_prefix_at_position(document_text, 1, 4) - assert prefix == "" - - # Cursor after "he" in "hello_world" - returns text before cursor - prefix = _get_prefix_at_position(document_text, 0, 6) - assert prefix == "he" - - # Cursor at end of "hello_world" - prefix = _get_prefix_at_position(document_text, 0, 15) - assert prefix == "hello_world" - - def test_get_word_at_position(self): - """Test extracting word at cursor position.""" - pytest.importorskip("pygls") - - from codexlens.lsp.handlers import _get_word_at_position - - document_text = "def hello_world():\n print(msg)" - - # Cursor on "hello_world" - word = _get_word_at_position(document_text, 0, 6) - assert word == "hello_world" - - # Cursor on "print" - word = _get_word_at_position(document_text, 1, 6) - assert word == "print" - - # Cursor on "msg" - word = _get_word_at_position(document_text, 1, 11) - assert word == "msg" - - def test_symbol_kind_mapping(self): - """Test symbol kind to completion kind mapping.""" - pytest.importorskip("pygls") - pytest.importorskip("lsprotocol") - - from lsprotocol import types as lsp - from codexlens.lsp.handlers import _symbol_kind_to_completion_kind - - assert _symbol_kind_to_completion_kind("function") == lsp.CompletionItemKind.Function - assert _symbol_kind_to_completion_kind("class") == lsp.CompletionItemKind.Class - assert _symbol_kind_to_completion_kind("method") == lsp.CompletionItemKind.Method - assert _symbol_kind_to_completion_kind("variable") == lsp.CompletionItemKind.Variable - - # Unknown kind should default to Text - assert _symbol_kind_to_completion_kind("unknown") == lsp.CompletionItemKind.Text - - -class TestWorkspaceSymbolHandler: - """Tests for workspace symbol handler.""" - - def test_symbol_kind_to_lsp(self): - """Test symbol kind to LSP SymbolKind mapping.""" - pytest.importorskip("pygls") - pytest.importorskip("lsprotocol") - - from lsprotocol import types as lsp - from codexlens.lsp.handlers import _symbol_kind_to_lsp - - assert _symbol_kind_to_lsp("function") == lsp.SymbolKind.Function - assert _symbol_kind_to_lsp("class") == lsp.SymbolKind.Class - assert _symbol_kind_to_lsp("method") == lsp.SymbolKind.Method - assert _symbol_kind_to_lsp("interface") == lsp.SymbolKind.Interface - - # Unknown kind should default to Variable - assert _symbol_kind_to_lsp("unknown") == lsp.SymbolKind.Variable - - -class TestUriConversion: - """Tests for URI path conversion.""" - - def test_path_to_uri(self): - """Test path to URI conversion.""" - pytest.importorskip("pygls") - - from codexlens.lsp.handlers import _path_to_uri - - # Unix path - uri = _path_to_uri("/home/user/file.py") - assert uri.startswith("file://") - assert "file.py" in uri - - def test_uri_to_path(self): - """Test URI to path conversion.""" - pytest.importorskip("pygls") - - from codexlens.lsp.handlers import _uri_to_path - - # Basic URI - path = _uri_to_path("file:///home/user/file.py") - assert path.name == "file.py" - - -class TestMainEntryPoint: - """Tests for main entry point.""" - - def test_main_help(self): - """Test that main shows help without errors.""" - pytest.importorskip("pygls") - - import sys - from unittest.mock import patch - - # Patch sys.argv to show help - with patch.object(sys, 'argv', ['codexlens-lsp', '--help']): - from codexlens.lsp.server import main - - with pytest.raises(SystemExit) as exc_info: - main() - - # Help exits with 0 - assert exc_info.value.code == 0 diff --git a/codex-lens/tests/lsp/test_standalone_manager_defaults.py b/codex-lens/tests/lsp/test_standalone_manager_defaults.py deleted file mode 100644 index fe0a9cb6..00000000 --- a/codex-lens/tests/lsp/test_standalone_manager_defaults.py +++ /dev/null @@ -1,31 +0,0 @@ -"""Tests for StandaloneLspManager default config behavior.""" - -from __future__ import annotations - -import asyncio -import logging -from pathlib import Path - -import pytest - -from codexlens.lsp.standalone_manager import StandaloneLspManager - - -def test_loads_builtin_defaults_when_no_config_found( - tmp_path: Path, caplog: pytest.LogCaptureFixture -) -> None: - manager = StandaloneLspManager(workspace_root=str(tmp_path)) - - with caplog.at_level(logging.INFO): - asyncio.run(manager.start()) - - assert manager._configs # type: ignore[attr-defined] - assert manager.get_language_id(str(tmp_path / "example.py")) == "python" - - expected_root = str(tmp_path / "lsp-servers.json") - expected_codexlens = str(tmp_path / ".codexlens" / "lsp-servers.json") - - assert "using built-in defaults" in caplog.text.lower() - assert expected_root in caplog.text - assert expected_codexlens in caplog.text - diff --git a/codex-lens/tests/lsp/test_standalone_manager_paths.py b/codex-lens/tests/lsp/test_standalone_manager_paths.py deleted file mode 100644 index 39b74584..00000000 --- a/codex-lens/tests/lsp/test_standalone_manager_paths.py +++ /dev/null @@ -1,48 +0,0 @@ -"""Tests for StandaloneLspManager path normalization (Windows URI handling).""" - -from __future__ import annotations - -import platform - -from codexlens.lsp.standalone_manager import StandaloneLspManager - - -def test_normalize_file_uri_percent_encoded_windows_drive() -> None: - if platform.system() != "Windows": - return - - manager = StandaloneLspManager(workspace_root="D:/Claude_dms3/codex-lens") - - raw = "file:///d%3A/Claude_dms3/codex-lens/src/codexlens/lsp/standalone_manager.py" - normalized = manager._normalize_file_path(raw) - - assert normalized.lower().startswith("d:/") - assert "%3a" not in normalized.lower() - assert "d%3a" not in normalized.lower() - assert "/d%3a" not in normalized.lower() - - -def test_normalize_uri_path_percent_encoded_windows_drive() -> None: - if platform.system() != "Windows": - return - - manager = StandaloneLspManager(workspace_root="D:/Claude_dms3/codex-lens") - - raw = "/d%3A/Claude_dms3/codex-lens/src/codexlens/lsp/standalone_manager.py" - normalized = manager._normalize_file_path(raw) - - assert normalized.lower().startswith("d:/") - assert "%3a" not in normalized.lower() - - -def test_normalize_plain_windows_path_is_unchanged() -> None: - if platform.system() != "Windows": - return - - manager = StandaloneLspManager(workspace_root="D:/Claude_dms3/codex-lens") - - raw = r"D:\Claude_dms3\codex-lens\src\codexlens\lsp\standalone_manager.py" - normalized = manager._normalize_file_path(raw) - - assert normalized == raw - diff --git a/codex-lens/tests/mcp/__init__.py b/codex-lens/tests/mcp/__init__.py deleted file mode 100644 index 2fa2b8ff..00000000 --- a/codex-lens/tests/mcp/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Tests for MCP (Model Context Protocol) module.""" diff --git a/codex-lens/tests/mcp/test_hooks.py b/codex-lens/tests/mcp/test_hooks.py deleted file mode 100644 index 4a650ed1..00000000 --- a/codex-lens/tests/mcp/test_hooks.py +++ /dev/null @@ -1,208 +0,0 @@ -"""Tests for MCP hooks module.""" - -import pytest -from unittest.mock import Mock, patch -from pathlib import Path - -from codexlens.mcp.hooks import HookManager, create_context_for_prompt -from codexlens.mcp.schema import MCPContext, SymbolInfo - - -class TestHookManager: - """Test HookManager class.""" - - @pytest.fixture - def mock_provider(self): - """Create a mock MCP provider.""" - provider = Mock() - provider.build_context.return_value = MCPContext( - symbol=SymbolInfo("test_func", "function", "/test.py", 1, 10), - context_type="symbol_explanation", - ) - provider.build_context_for_file.return_value = MCPContext( - context_type="file_overview", - ) - return provider - - @pytest.fixture - def hook_manager(self, mock_provider): - """Create a HookManager with mocked provider.""" - return HookManager(mock_provider) - - def test_default_hooks_registered(self, hook_manager): - """Default hooks are registered on initialization.""" - assert "explain" in hook_manager._pre_hooks - assert "refactor" in hook_manager._pre_hooks - assert "document" in hook_manager._pre_hooks - - def test_execute_pre_hook_returns_context(self, hook_manager, mock_provider): - """execute_pre_hook returns MCPContext for registered hook.""" - result = hook_manager.execute_pre_hook("explain", {"symbol": "my_func"}) - - assert result is not None - assert isinstance(result, MCPContext) - mock_provider.build_context.assert_called_once() - - def test_execute_pre_hook_returns_none_for_unknown_action(self, hook_manager): - """execute_pre_hook returns None for unregistered action.""" - result = hook_manager.execute_pre_hook("unknown_action", {"symbol": "test"}) - - assert result is None - - def test_execute_pre_hook_handles_exception(self, hook_manager, mock_provider): - """execute_pre_hook handles provider exceptions gracefully.""" - mock_provider.build_context.side_effect = Exception("Provider failed") - - result = hook_manager.execute_pre_hook("explain", {"symbol": "my_func"}) - - assert result is None - - def test_execute_post_hook_no_error_for_unregistered(self, hook_manager): - """execute_post_hook doesn't error for unregistered action.""" - # Should not raise - hook_manager.execute_post_hook("unknown", {"result": "data"}) - - def test_pre_explain_hook_calls_build_context(self, hook_manager, mock_provider): - """_pre_explain_hook calls build_context correctly.""" - hook_manager.execute_pre_hook("explain", {"symbol": "my_func"}) - - mock_provider.build_context.assert_called_with( - symbol_name="my_func", - context_type="symbol_explanation", - include_references=True, - include_related=True, - ) - - def test_pre_explain_hook_returns_none_without_symbol(self, hook_manager, mock_provider): - """_pre_explain_hook returns None when symbol param missing.""" - result = hook_manager.execute_pre_hook("explain", {}) - - assert result is None - mock_provider.build_context.assert_not_called() - - def test_pre_refactor_hook_calls_build_context(self, hook_manager, mock_provider): - """_pre_refactor_hook calls build_context with refactor settings.""" - hook_manager.execute_pre_hook("refactor", {"symbol": "my_class"}) - - mock_provider.build_context.assert_called_with( - symbol_name="my_class", - context_type="refactor_context", - include_references=True, - include_related=True, - max_references=20, - ) - - def test_pre_refactor_hook_returns_none_without_symbol(self, hook_manager, mock_provider): - """_pre_refactor_hook returns None when symbol param missing.""" - result = hook_manager.execute_pre_hook("refactor", {}) - - assert result is None - mock_provider.build_context.assert_not_called() - - def test_pre_document_hook_with_symbol(self, hook_manager, mock_provider): - """_pre_document_hook uses build_context when symbol provided.""" - hook_manager.execute_pre_hook("document", {"symbol": "my_func"}) - - mock_provider.build_context.assert_called_with( - symbol_name="my_func", - context_type="documentation_context", - include_references=False, - include_related=True, - ) - - def test_pre_document_hook_with_file_path(self, hook_manager, mock_provider): - """_pre_document_hook uses build_context_for_file when file_path provided.""" - hook_manager.execute_pre_hook("document", {"file_path": "/src/module.py"}) - - mock_provider.build_context_for_file.assert_called_once() - call_args = mock_provider.build_context_for_file.call_args - assert call_args[0][0] == Path("/src/module.py") - assert call_args[1].get("context_type") == "file_documentation" - - def test_pre_document_hook_prefers_symbol_over_file(self, hook_manager, mock_provider): - """_pre_document_hook prefers symbol when both provided.""" - hook_manager.execute_pre_hook( - "document", {"symbol": "my_func", "file_path": "/src/module.py"} - ) - - mock_provider.build_context.assert_called_once() - mock_provider.build_context_for_file.assert_not_called() - - def test_pre_document_hook_returns_none_without_params(self, hook_manager, mock_provider): - """_pre_document_hook returns None when neither symbol nor file_path provided.""" - result = hook_manager.execute_pre_hook("document", {}) - - assert result is None - mock_provider.build_context.assert_not_called() - mock_provider.build_context_for_file.assert_not_called() - - def test_register_pre_hook(self, hook_manager): - """register_pre_hook adds custom hook.""" - custom_hook = Mock(return_value=MCPContext()) - - hook_manager.register_pre_hook("custom_action", custom_hook) - - assert "custom_action" in hook_manager._pre_hooks - hook_manager.execute_pre_hook("custom_action", {"data": "value"}) - custom_hook.assert_called_once_with({"data": "value"}) - - def test_register_post_hook(self, hook_manager): - """register_post_hook adds custom hook.""" - custom_hook = Mock() - - hook_manager.register_post_hook("custom_action", custom_hook) - - assert "custom_action" in hook_manager._post_hooks - hook_manager.execute_post_hook("custom_action", {"result": "data"}) - custom_hook.assert_called_once_with({"result": "data"}) - - def test_execute_post_hook_handles_exception(self, hook_manager): - """execute_post_hook handles hook exceptions gracefully.""" - failing_hook = Mock(side_effect=Exception("Hook failed")) - hook_manager.register_post_hook("failing", failing_hook) - - # Should not raise - hook_manager.execute_post_hook("failing", {"data": "value"}) - - -class TestCreateContextForPrompt: - """Test create_context_for_prompt function.""" - - def test_returns_prompt_injection_string(self): - """create_context_for_prompt returns formatted string.""" - mock_provider = Mock() - mock_provider.build_context.return_value = MCPContext( - symbol=SymbolInfo("test_func", "function", "/test.py", 1, 10), - definition="def test_func(): pass", - ) - - result = create_context_for_prompt( - mock_provider, "explain", {"symbol": "test_func"} - ) - - assert isinstance(result, str) - assert "" in result - assert "test_func" in result - assert "" in result - - def test_returns_empty_string_when_no_context(self): - """create_context_for_prompt returns empty string when no context built.""" - mock_provider = Mock() - mock_provider.build_context.return_value = None - - result = create_context_for_prompt( - mock_provider, "explain", {"symbol": "nonexistent"} - ) - - assert result == "" - - def test_returns_empty_string_for_unknown_action(self): - """create_context_for_prompt returns empty string for unregistered action.""" - mock_provider = Mock() - - result = create_context_for_prompt( - mock_provider, "unknown_action", {"data": "value"} - ) - - assert result == "" - mock_provider.build_context.assert_not_called() diff --git a/codex-lens/tests/mcp/test_provider.py b/codex-lens/tests/mcp/test_provider.py deleted file mode 100644 index 4f5004a6..00000000 --- a/codex-lens/tests/mcp/test_provider.py +++ /dev/null @@ -1,383 +0,0 @@ -"""Tests for MCP provider.""" - -import pytest -from unittest.mock import Mock, MagicMock, patch -from pathlib import Path -import tempfile -import os - -from codexlens.mcp.provider import MCPProvider -from codexlens.mcp.schema import MCPContext, SymbolInfo, ReferenceInfo - - -class TestMCPProvider: - """Test MCPProvider class.""" - - @pytest.fixture - def mock_global_index(self): - """Create a mock global index.""" - return Mock() - - @pytest.fixture - def mock_search_engine(self): - """Create a mock search engine.""" - return Mock() - - @pytest.fixture - def mock_registry(self): - """Create a mock registry.""" - return Mock() - - @pytest.fixture - def provider(self, mock_global_index, mock_search_engine, mock_registry): - """Create an MCPProvider with mocked dependencies.""" - return MCPProvider(mock_global_index, mock_search_engine, mock_registry) - - def test_build_context_returns_none_for_unknown_symbol(self, provider, mock_global_index): - """build_context returns None when symbol is not found.""" - mock_global_index.search.return_value = [] - - result = provider.build_context("unknown_symbol") - - assert result is None - mock_global_index.search.assert_called_once_with( - "unknown_symbol", prefix_mode=False, limit=1 - ) - - def test_build_context_returns_mcp_context( - self, provider, mock_global_index, mock_search_engine - ): - """build_context returns MCPContext for known symbol.""" - mock_symbol = Mock() - mock_symbol.name = "my_func" - mock_symbol.kind = "function" - mock_symbol.file = "/test.py" - mock_symbol.range = (10, 20) - - mock_global_index.search.return_value = [mock_symbol] - mock_search_engine.search_references.return_value = [] - - result = provider.build_context("my_func") - - assert result is not None - assert isinstance(result, MCPContext) - assert result.symbol is not None - assert result.symbol.name == "my_func" - assert result.symbol.kind == "function" - assert result.context_type == "symbol_explanation" - - def test_build_context_with_custom_context_type( - self, provider, mock_global_index, mock_search_engine - ): - """build_context respects custom context_type.""" - mock_symbol = Mock() - mock_symbol.name = "my_func" - mock_symbol.kind = "function" - mock_symbol.file = "/test.py" - mock_symbol.range = (10, 20) - - mock_global_index.search.return_value = [mock_symbol] - mock_search_engine.search_references.return_value = [] - - result = provider.build_context("my_func", context_type="refactor_context") - - assert result is not None - assert result.context_type == "refactor_context" - - def test_build_context_includes_references( - self, provider, mock_global_index, mock_search_engine - ): - """build_context includes references when include_references=True.""" - mock_symbol = Mock() - mock_symbol.name = "my_func" - mock_symbol.kind = "function" - mock_symbol.file = "/test.py" - mock_symbol.range = (10, 20) - - mock_ref = Mock() - mock_ref.file_path = "/caller.py" - mock_ref.line = 25 - mock_ref.column = 4 - mock_ref.context = "result = my_func()" - mock_ref.relationship_type = "call" - - mock_global_index.search.return_value = [mock_symbol] - mock_search_engine.search_references.return_value = [mock_ref] - - result = provider.build_context("my_func", include_references=True) - - assert result is not None - assert len(result.references) == 1 - assert result.references[0].file_path == "/caller.py" - assert result.references[0].line == 25 - assert result.references[0].relationship_type == "call" - - def test_build_context_excludes_references_when_disabled( - self, provider, mock_global_index, mock_search_engine - ): - """build_context excludes references when include_references=False.""" - mock_symbol = Mock() - mock_symbol.name = "my_func" - mock_symbol.kind = "function" - mock_symbol.file = "/test.py" - mock_symbol.range = (10, 20) - - mock_global_index.search.return_value = [mock_symbol] - mock_search_engine.search_references.return_value = [] - - # Disable both references and related to avoid any search_references calls - result = provider.build_context( - "my_func", include_references=False, include_related=False - ) - - assert result is not None - assert len(result.references) == 0 - mock_search_engine.search_references.assert_not_called() - - def test_build_context_respects_max_references( - self, provider, mock_global_index, mock_search_engine - ): - """build_context passes max_references to search engine.""" - mock_symbol = Mock() - mock_symbol.name = "my_func" - mock_symbol.kind = "function" - mock_symbol.file = "/test.py" - mock_symbol.range = (10, 20) - - mock_global_index.search.return_value = [mock_symbol] - mock_search_engine.search_references.return_value = [] - - # Disable include_related to test only the references call - provider.build_context("my_func", max_references=5, include_related=False) - - mock_search_engine.search_references.assert_called_once_with( - "my_func", limit=5 - ) - - def test_build_context_includes_metadata( - self, provider, mock_global_index, mock_search_engine - ): - """build_context includes source metadata.""" - mock_symbol = Mock() - mock_symbol.name = "my_func" - mock_symbol.kind = "function" - mock_symbol.file = "/test.py" - mock_symbol.range = (10, 20) - - mock_global_index.search.return_value = [mock_symbol] - mock_search_engine.search_references.return_value = [] - - result = provider.build_context("my_func") - - assert result is not None - assert result.metadata.get("source") == "codex-lens" - - def test_extract_definition_with_valid_file(self, provider): - """_extract_definition reads file content correctly.""" - # Create a temporary file with some content - with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f: - f.write("# Line 1\n") - f.write("# Line 2\n") - f.write("def my_func():\n") # Line 3 - f.write(" pass\n") # Line 4 - f.write("# Line 5\n") - temp_path = f.name - - try: - mock_symbol = Mock() - mock_symbol.file = temp_path - mock_symbol.range = (3, 4) # 1-based line numbers - - definition = provider._extract_definition(mock_symbol) - - assert definition is not None - assert "def my_func():" in definition - assert "pass" in definition - finally: - os.unlink(temp_path) - - def test_extract_definition_returns_none_for_missing_file(self, provider): - """_extract_definition returns None for non-existent file.""" - mock_symbol = Mock() - mock_symbol.file = "/nonexistent/path/file.py" - mock_symbol.range = (1, 5) - - definition = provider._extract_definition(mock_symbol) - - assert definition is None - - def test_extract_definition_returns_none_for_none_file(self, provider): - """_extract_definition returns None when symbol.file is None.""" - mock_symbol = Mock() - mock_symbol.file = None - mock_symbol.range = (1, 5) - - definition = provider._extract_definition(mock_symbol) - - assert definition is None - - def test_build_context_for_file_returns_context( - self, provider, mock_global_index - ): - """build_context_for_file returns MCPContext.""" - mock_global_index.search.return_value = [] - - result = provider.build_context_for_file( - Path("/test/file.py"), - context_type="file_overview", - ) - - assert result is not None - assert isinstance(result, MCPContext) - assert result.context_type == "file_overview" - assert result.metadata.get("file_path") == str(Path("/test/file.py")) - - def test_build_context_for_file_includes_symbols( - self, provider, mock_global_index - ): - """build_context_for_file includes symbols from the file.""" - # Create temp file to get resolved path - with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f: - f.write("def func(): pass\n") - temp_path = f.name - - try: - mock_symbol = Mock() - mock_symbol.name = "func" - mock_symbol.kind = "function" - mock_symbol.file = temp_path - mock_symbol.range = (1, 1) - - mock_global_index.search.return_value = [mock_symbol] - - result = provider.build_context_for_file(Path(temp_path)) - - assert result is not None - # Symbols from this file should be in related_symbols - assert len(result.related_symbols) >= 0 # May be 0 if filtering doesn't match - finally: - os.unlink(temp_path) - - -class TestMCPProviderRelatedSymbols: - """Test related symbols functionality.""" - - @pytest.fixture - def provider(self): - """Create provider with mocks.""" - mock_global_index = Mock() - mock_search_engine = Mock() - mock_registry = Mock() - return MCPProvider(mock_global_index, mock_search_engine, mock_registry) - - def test_get_related_symbols_from_references(self, provider): - """_get_related_symbols extracts symbols from references.""" - mock_symbol = Mock() - mock_symbol.name = "my_func" - mock_symbol.file = "/test.py" - - mock_ref1 = Mock() - mock_ref1.file_path = "/caller1.py" - mock_ref1.relationship_type = "call" - - mock_ref2 = Mock() - mock_ref2.file_path = "/caller2.py" - mock_ref2.relationship_type = "import" - - provider.search_engine.search_references.return_value = [mock_ref1, mock_ref2] - - related = provider._get_related_symbols(mock_symbol) - - assert len(related) == 2 - assert related[0].relationship == "call" - assert related[1].relationship == "import" - - def test_get_related_symbols_limits_results(self, provider): - """_get_related_symbols limits to 10 unique relationship types.""" - mock_symbol = Mock() - mock_symbol.name = "my_func" - mock_symbol.file = "/test.py" - - # Create 15 references with unique relationship types - refs = [] - for i in range(15): - ref = Mock() - ref.file_path = f"/file{i}.py" - ref.relationship_type = f"type{i}" - refs.append(ref) - - provider.search_engine.search_references.return_value = refs - - related = provider._get_related_symbols(mock_symbol) - - assert len(related) <= 10 - - def test_get_related_symbols_handles_exception(self, provider): - """_get_related_symbols handles exceptions gracefully.""" - mock_symbol = Mock() - mock_symbol.name = "my_func" - mock_symbol.file = "/test.py" - - provider.search_engine.search_references.side_effect = Exception("Search failed") - - related = provider._get_related_symbols(mock_symbol) - - assert related == [] - - -class TestMCPProviderIntegration: - """Integration-style tests for MCPProvider.""" - - def test_full_context_workflow(self): - """Test complete context building workflow.""" - # Create temp file - with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f: - f.write("def my_function(arg1, arg2):\n") - f.write(" '''This is my function.'''\n") - f.write(" return arg1 + arg2\n") - temp_path = f.name - - try: - # Setup mocks - mock_global_index = Mock() - mock_search_engine = Mock() - mock_registry = Mock() - - mock_symbol = Mock() - mock_symbol.name = "my_function" - mock_symbol.kind = "function" - mock_symbol.file = temp_path - mock_symbol.range = (1, 3) - - mock_ref = Mock() - mock_ref.file_path = "/user.py" - mock_ref.line = 10 - mock_ref.column = 4 - mock_ref.context = "result = my_function(1, 2)" - mock_ref.relationship_type = "call" - - mock_global_index.search.return_value = [mock_symbol] - mock_search_engine.search_references.return_value = [mock_ref] - - provider = MCPProvider(mock_global_index, mock_search_engine, mock_registry) - context = provider.build_context("my_function") - - assert context is not None - assert context.symbol.name == "my_function" - assert context.definition is not None - assert "def my_function" in context.definition - assert len(context.references) == 1 - assert context.references[0].relationship_type == "call" - - # Test serialization - json_str = context.to_json() - assert "my_function" in json_str - - # Test prompt injection - prompt = context.to_prompt_injection() - assert "" in prompt - assert "my_function" in prompt - assert "" in prompt - - finally: - os.unlink(temp_path) diff --git a/codex-lens/tests/mcp/test_schema.py b/codex-lens/tests/mcp/test_schema.py deleted file mode 100644 index e5914593..00000000 --- a/codex-lens/tests/mcp/test_schema.py +++ /dev/null @@ -1,288 +0,0 @@ -"""Tests for MCP schema.""" - -import pytest -import json - -from codexlens.mcp.schema import ( - MCPContext, - SymbolInfo, - ReferenceInfo, - RelatedSymbol, -) - - -class TestSymbolInfo: - """Test SymbolInfo dataclass.""" - - def test_to_dict_includes_all_fields(self): - """SymbolInfo.to_dict() includes all non-None fields.""" - info = SymbolInfo( - name="func", - kind="function", - file_path="/test.py", - line_start=10, - line_end=20, - signature="def func():", - documentation="Test doc", - ) - d = info.to_dict() - assert d["name"] == "func" - assert d["kind"] == "function" - assert d["file_path"] == "/test.py" - assert d["line_start"] == 10 - assert d["line_end"] == 20 - assert d["signature"] == "def func():" - assert d["documentation"] == "Test doc" - - def test_to_dict_excludes_none(self): - """SymbolInfo.to_dict() excludes None fields.""" - info = SymbolInfo( - name="func", - kind="function", - file_path="/test.py", - line_start=10, - line_end=20, - ) - d = info.to_dict() - assert "signature" not in d - assert "documentation" not in d - assert "name" in d - assert "kind" in d - - def test_basic_creation(self): - """SymbolInfo can be created with required fields only.""" - info = SymbolInfo( - name="MyClass", - kind="class", - file_path="/src/module.py", - line_start=1, - line_end=50, - ) - assert info.name == "MyClass" - assert info.kind == "class" - assert info.signature is None - assert info.documentation is None - - -class TestReferenceInfo: - """Test ReferenceInfo dataclass.""" - - def test_to_dict(self): - """ReferenceInfo.to_dict() returns all fields.""" - ref = ReferenceInfo( - file_path="/src/main.py", - line=25, - column=4, - context="result = func()", - relationship_type="call", - ) - d = ref.to_dict() - assert d["file_path"] == "/src/main.py" - assert d["line"] == 25 - assert d["column"] == 4 - assert d["context"] == "result = func()" - assert d["relationship_type"] == "call" - - def test_all_fields_required(self): - """ReferenceInfo requires all fields.""" - ref = ReferenceInfo( - file_path="/test.py", - line=10, - column=0, - context="import module", - relationship_type="import", - ) - assert ref.file_path == "/test.py" - assert ref.relationship_type == "import" - - -class TestRelatedSymbol: - """Test RelatedSymbol dataclass.""" - - def test_to_dict_includes_all_fields(self): - """RelatedSymbol.to_dict() includes all non-None fields.""" - sym = RelatedSymbol( - name="BaseClass", - kind="class", - relationship="inherits", - file_path="/src/base.py", - ) - d = sym.to_dict() - assert d["name"] == "BaseClass" - assert d["kind"] == "class" - assert d["relationship"] == "inherits" - assert d["file_path"] == "/src/base.py" - - def test_to_dict_excludes_none(self): - """RelatedSymbol.to_dict() excludes None file_path.""" - sym = RelatedSymbol( - name="helper", - kind="function", - relationship="calls", - ) - d = sym.to_dict() - assert "file_path" not in d - assert d["name"] == "helper" - assert d["relationship"] == "calls" - - -class TestMCPContext: - """Test MCPContext dataclass.""" - - def test_to_dict_basic(self): - """MCPContext.to_dict() returns basic structure.""" - ctx = MCPContext(context_type="test") - d = ctx.to_dict() - assert d["version"] == "1.0" - assert d["context_type"] == "test" - assert d["metadata"] == {} - - def test_to_dict_with_symbol(self): - """MCPContext.to_dict() includes symbol when present.""" - ctx = MCPContext( - context_type="test", - symbol=SymbolInfo("f", "function", "/t.py", 1, 2), - ) - d = ctx.to_dict() - assert "symbol" in d - assert d["symbol"]["name"] == "f" - assert d["symbol"]["kind"] == "function" - - def test_to_dict_with_references(self): - """MCPContext.to_dict() includes references when present.""" - ctx = MCPContext( - context_type="test", - references=[ - ReferenceInfo("/a.py", 10, 0, "call()", "call"), - ReferenceInfo("/b.py", 20, 5, "import x", "import"), - ], - ) - d = ctx.to_dict() - assert "references" in d - assert len(d["references"]) == 2 - assert d["references"][0]["line"] == 10 - - def test_to_dict_with_related_symbols(self): - """MCPContext.to_dict() includes related_symbols when present.""" - ctx = MCPContext( - context_type="test", - related_symbols=[ - RelatedSymbol("Base", "class", "inherits"), - RelatedSymbol("helper", "function", "calls"), - ], - ) - d = ctx.to_dict() - assert "related_symbols" in d - assert len(d["related_symbols"]) == 2 - - def test_to_json(self): - """MCPContext.to_json() returns valid JSON.""" - ctx = MCPContext(context_type="test") - j = ctx.to_json() - parsed = json.loads(j) - assert parsed["version"] == "1.0" - assert parsed["context_type"] == "test" - - def test_to_json_with_indent(self): - """MCPContext.to_json() respects indent parameter.""" - ctx = MCPContext(context_type="test") - j = ctx.to_json(indent=4) - # Check it's properly indented - assert " " in j - - def test_to_prompt_injection_basic(self): - """MCPContext.to_prompt_injection() returns formatted string.""" - ctx = MCPContext( - symbol=SymbolInfo("my_func", "function", "/test.py", 10, 20), - definition="def my_func(): pass", - ) - prompt = ctx.to_prompt_injection() - assert "" in prompt - assert "my_func" in prompt - assert "def my_func()" in prompt - assert "" in prompt - - def test_to_prompt_injection_with_references(self): - """MCPContext.to_prompt_injection() includes references.""" - ctx = MCPContext( - symbol=SymbolInfo("func", "function", "/test.py", 1, 5), - references=[ - ReferenceInfo("/a.py", 10, 0, "func()", "call"), - ReferenceInfo("/b.py", 20, 0, "from x import func", "import"), - ], - ) - prompt = ctx.to_prompt_injection() - assert "References (2 found)" in prompt - assert "/a.py:10" in prompt - assert "call" in prompt - - def test_to_prompt_injection_limits_references(self): - """MCPContext.to_prompt_injection() limits references to 5.""" - refs = [ - ReferenceInfo(f"/file{i}.py", i, 0, f"ref{i}", "call") - for i in range(10) - ] - ctx = MCPContext( - symbol=SymbolInfo("func", "function", "/test.py", 1, 5), - references=refs, - ) - prompt = ctx.to_prompt_injection() - # Should show "10 found" but only include 5 - assert "References (10 found)" in prompt - assert "/file0.py" in prompt - assert "/file4.py" in prompt - assert "/file5.py" not in prompt - - def test_to_prompt_injection_with_related_symbols(self): - """MCPContext.to_prompt_injection() includes related symbols.""" - ctx = MCPContext( - symbol=SymbolInfo("MyClass", "class", "/test.py", 1, 50), - related_symbols=[ - RelatedSymbol("BaseClass", "class", "inherits"), - RelatedSymbol("helper", "function", "calls"), - ], - ) - prompt = ctx.to_prompt_injection() - assert "Related Symbols" in prompt - assert "BaseClass (inherits)" in prompt - assert "helper (calls)" in prompt - - def test_to_prompt_injection_limits_related_symbols(self): - """MCPContext.to_prompt_injection() limits related symbols to 10.""" - related = [ - RelatedSymbol(f"sym{i}", "function", "calls") - for i in range(15) - ] - ctx = MCPContext( - symbol=SymbolInfo("func", "function", "/test.py", 1, 5), - related_symbols=related, - ) - prompt = ctx.to_prompt_injection() - assert "sym0 (calls)" in prompt - assert "sym9 (calls)" in prompt - assert "sym10 (calls)" not in prompt - - def test_empty_context(self): - """MCPContext works with minimal data.""" - ctx = MCPContext() - d = ctx.to_dict() - assert d["version"] == "1.0" - assert d["context_type"] == "code_context" - - prompt = ctx.to_prompt_injection() - assert "" in prompt - assert "" in prompt - - def test_metadata_preserved(self): - """MCPContext preserves custom metadata.""" - ctx = MCPContext( - context_type="custom", - metadata={ - "source": "codex-lens", - "indexed_at": "2024-01-01", - "custom_key": "custom_value", - }, - ) - d = ctx.to_dict() - assert d["metadata"]["source"] == "codex-lens" - assert d["metadata"]["custom_key"] == "custom_value" diff --git a/codex-lens/tests/parsers/__init__.py b/codex-lens/tests/parsers/__init__.py deleted file mode 100644 index 0e066f0e..00000000 --- a/codex-lens/tests/parsers/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Tests for codexlens.parsers modules.""" diff --git a/codex-lens/tests/parsers/test_astgrep_extraction.py b/codex-lens/tests/parsers/test_astgrep_extraction.py deleted file mode 100644 index 41f0d2ea..00000000 --- a/codex-lens/tests/parsers/test_astgrep_extraction.py +++ /dev/null @@ -1,444 +0,0 @@ -"""Tests for dedicated extraction methods: extract_inherits, extract_calls, extract_imports. - -Tests pattern-based relationship extraction from Python source code -using ast-grep-py bindings for INHERITS, CALL, and IMPORTS relationships. -""" - -from pathlib import Path - -import pytest - -from codexlens.parsers.astgrep_processor import ( - AstGrepPythonProcessor, - is_astgrep_processor_available, -) -from codexlens.entities import RelationshipType - - -# Check if ast-grep is available for conditional test skipping -ASTGREP_AVAILABLE = is_astgrep_processor_available() - - -@pytest.mark.skipif(not ASTGREP_AVAILABLE, reason="ast-grep-py not installed") -class TestExtractInherits: - """Tests for extract_inherits method - INHERITS relationship extraction.""" - - def test_single_inheritance(self): - """Test extraction of single inheritance relationship.""" - processor = AstGrepPythonProcessor() - code = """ -class Animal: - pass - -class Dog(Animal): - pass -""" - relationships = processor.extract_inherits(code, "test.py") - - assert len(relationships) == 1 - rel = relationships[0] - assert rel.source_symbol == "Dog" - assert rel.target_symbol == "Animal" - assert rel.relationship_type == RelationshipType.INHERITS - - def test_multiple_inheritance(self): - """Test extraction of multiple inheritance relationships.""" - processor = AstGrepPythonProcessor() - code = """ -class A: - pass - -class B: - pass - -class C(A, B): - pass -""" - relationships = processor.extract_inherits(code, "test.py") - - # Should have 2 relationships: C->A and C->B - assert len(relationships) == 2 - targets = {r.target_symbol for r in relationships} - assert "A" in targets - assert "B" in targets - for rel in relationships: - assert rel.source_symbol == "C" - - def test_no_inheritance(self): - """Test that classes without inheritance return empty list.""" - processor = AstGrepPythonProcessor() - code = """ -class Standalone: - pass -""" - relationships = processor.extract_inherits(code, "test.py") - - assert len(relationships) == 0 - - def test_nested_class_inheritance(self): - """Test extraction of inheritance in nested classes.""" - processor = AstGrepPythonProcessor() - code = """ -class Outer: - class Inner(Base): - pass -""" - relationships = processor.extract_inherits(code, "test.py") - - assert len(relationships) == 1 - assert relationships[0].source_symbol == "Inner" - assert relationships[0].target_symbol == "Base" - - def test_inheritance_with_complex_bases(self): - """Test extraction with generic or complex base classes.""" - processor = AstGrepPythonProcessor() - code = """ -class Service(BaseService, mixins.Loggable): - pass -""" - relationships = processor.extract_inherits(code, "test.py") - - assert len(relationships) == 2 - targets = {r.target_symbol for r in relationships} - assert "BaseService" in targets - assert "mixins.Loggable" in targets - - -@pytest.mark.skipif(not ASTGREP_AVAILABLE, reason="ast-grep-py not installed") -class TestExtractCalls: - """Tests for extract_calls method - CALL relationship extraction.""" - - def test_simple_function_call(self): - """Test extraction of simple function calls.""" - processor = AstGrepPythonProcessor() - code = """ -def main(): - print("hello") - len([1, 2, 3]) -""" - relationships = processor.extract_calls(code, "test.py", "main") - - targets = {r.target_symbol for r in relationships} - assert "print" in targets - assert "len" in targets - - def test_method_call(self): - """Test extraction of method calls.""" - processor = AstGrepPythonProcessor() - code = """ -def process(): - obj.method() - items.append(1) -""" - relationships = processor.extract_calls(code, "test.py", "process") - - targets = {r.target_symbol for r in relationships} - assert "obj.method" in targets - assert "items.append" in targets - - def test_skips_self_calls(self): - """Test that self.method() calls are filtered.""" - processor = AstGrepPythonProcessor() - code = """ -class Service: - def process(self): - self.internal() - external_func() -""" - relationships = processor.extract_calls(code, "test.py", "Service") - - targets = {r.target_symbol for r in relationships} - # self.internal should be filtered - assert "self.internal" not in targets - assert "internal" not in targets - assert "external_func" in targets - - def test_skips_cls_calls(self): - """Test that cls.method() calls are filtered.""" - processor = AstGrepPythonProcessor() - code = """ -class Factory: - @classmethod - def create(cls): - cls.helper() - other_func() -""" - relationships = processor.extract_calls(code, "test.py", "Factory") - - targets = {r.target_symbol for r in relationships} - assert "cls.helper" not in targets - assert "other_func" in targets - - def test_alias_resolution(self): - """Test call alias resolution using import map.""" - processor = AstGrepPythonProcessor() - code = """ -def main(): - np.array([1, 2, 3]) -""" - alias_map = {"np": "numpy"} - relationships = processor.extract_calls(code, "test.py", "main", alias_map) - - assert len(relationships) >= 1 - # Should resolve np.array to numpy.array - assert any("numpy.array" in r.target_symbol for r in relationships) - - def test_no_calls(self): - """Test that code without calls returns empty list.""" - processor = AstGrepPythonProcessor() - code = """ -x = 1 -y = x + 2 -""" - relationships = processor.extract_calls(code, "test.py") - - assert len(relationships) == 0 - - -@pytest.mark.skipif(not ASTGREP_AVAILABLE, reason="ast-grep-py not installed") -class TestExtractImports: - """Tests for extract_imports method - IMPORTS relationship extraction.""" - - def test_simple_import(self): - """Test extraction of simple import statements.""" - processor = AstGrepPythonProcessor() - code = "import os" - - relationships, alias_map = processor.extract_imports(code, "test.py") - - assert len(relationships) == 1 - assert relationships[0].target_symbol == "os" - assert relationships[0].relationship_type == RelationshipType.IMPORTS - assert alias_map.get("os") == "os" - - def test_import_with_alias(self): - """Test extraction of import with alias.""" - processor = AstGrepPythonProcessor() - code = "import numpy as np" - - relationships, alias_map = processor.extract_imports(code, "test.py") - - assert len(relationships) == 1 - assert relationships[0].target_symbol == "numpy" - assert alias_map.get("np") == "numpy" - - def test_from_import(self): - """Test extraction of from-import statements.""" - processor = AstGrepPythonProcessor() - code = "from typing import List, Dict" - - relationships, alias_map = processor.extract_imports(code, "test.py") - - assert len(relationships) == 1 - assert relationships[0].target_symbol == "typing" - assert alias_map.get("List") == "typing.List" - assert alias_map.get("Dict") == "typing.Dict" - - def test_from_import_with_alias(self): - """Test extraction of from-import with alias.""" - processor = AstGrepPythonProcessor() - code = "from collections import defaultdict as dd" - - relationships, alias_map = processor.extract_imports(code, "test.py") - - assert len(relationships) == 1 - # The alias map should map dd to collections.defaultcount - assert "dd" in alias_map - assert "defaultdict" in alias_map.get("dd", "") - - def test_star_import(self): - """Test extraction of star imports.""" - processor = AstGrepPythonProcessor() - code = "from module import *" - - relationships, alias_map = processor.extract_imports(code, "test.py") - - assert len(relationships) >= 1 - # Star import should be recorded - star_imports = [r for r in relationships if "*" in r.target_symbol] - assert len(star_imports) >= 1 - - def test_relative_import(self): - """Test extraction of relative imports.""" - processor = AstGrepPythonProcessor() - code = "from .utils import helper" - - relationships, alias_map = processor.extract_imports(code, "test.py") - - # Should capture the relative import - assert len(relationships) >= 1 - rel_imports = [r for r in relationships if r.target_symbol.startswith(".")] - assert len(rel_imports) >= 1 - - def test_multiple_imports(self): - """Test extraction of multiple import types.""" - processor = AstGrepPythonProcessor() - code = """ -import os -import sys -from typing import List -from collections import defaultdict as dd -""" - - relationships, alias_map = processor.extract_imports(code, "test.py") - - assert len(relationships) >= 4 - targets = {r.target_symbol for r in relationships} - assert "os" in targets - assert "sys" in targets - assert "typing" in targets - assert "collections" in targets - - def test_no_imports(self): - """Test that code without imports returns empty list.""" - processor = AstGrepPythonProcessor() - code = """ -x = 1 -def foo(): - pass -""" - relationships, alias_map = processor.extract_imports(code, "test.py") - - assert len(relationships) == 0 - assert len(alias_map) == 0 - - -@pytest.mark.skipif(not ASTGREP_AVAILABLE, reason="ast-grep-py not installed") -class TestExtractMethodsIntegration: - """Integration tests combining multiple extraction methods.""" - - def test_full_file_extraction(self): - """Test extracting all relationships from a complete file.""" - processor = AstGrepPythonProcessor() - code = """ -import os -from typing import List, Optional - -class Base: - pass - -class Service(Base): - def __init__(self): - self.data = [] - - def process(self): - result = os.path.join("a", "b") - items = List([1, 2, 3]) - return result - -def main(): - svc = Service() - svc.process() -""" - source_file = "test.py" - - # Extract all relationship types - imports, alias_map = processor.extract_imports(code, source_file) - inherits = processor.extract_inherits(code, source_file) - calls = processor.extract_calls(code, source_file, alias_map=alias_map) - - # Verify we got all expected relationships - assert len(imports) >= 2 # os and typing - assert len(inherits) == 1 # Service -> Base - assert len(calls) >= 2 # os.path.join and others - - # Verify inheritance - assert any(r.source_symbol == "Service" and r.target_symbol == "Base" - for r in inherits) - - def test_alias_propagation(self): - """Test that import aliases propagate to call resolution.""" - processor = AstGrepPythonProcessor() - code = """ -import numpy as np - -def compute(): - arr = np.array([1, 2, 3]) - return np.sum(arr) -""" - source_file = "test.py" - - imports, alias_map = processor.extract_imports(code, source_file) - calls = processor.extract_calls(code, source_file, alias_map=alias_map) - - # Alias map should have np -> numpy - assert alias_map.get("np") == "numpy" - - # Calls should resolve np.array and np.sum - resolved_targets = {r.target_symbol for r in calls} - # At minimum, np.array and np.sum should be captured - np_calls = [t for t in resolved_targets if "np" in t or "numpy" in t] - assert len(np_calls) >= 2 - - -class TestExtractMethodFallback: - """Tests for fallback behavior when ast-grep unavailable.""" - - def test_extract_inherits_empty_when_unavailable(self): - """Test extract_inherits returns empty list when unavailable.""" - processor = AstGrepPythonProcessor() - if not processor.is_available(): - code = "class Dog(Animal): pass" - relationships = processor.extract_inherits(code, "test.py") - assert relationships == [] - - def test_extract_calls_empty_when_unavailable(self): - """Test extract_calls returns empty list when unavailable.""" - processor = AstGrepPythonProcessor() - if not processor.is_available(): - code = "print('hello')" - relationships = processor.extract_calls(code, "test.py") - assert relationships == [] - - def test_extract_imports_empty_when_unavailable(self): - """Test extract_imports returns empty tuple when unavailable.""" - processor = AstGrepPythonProcessor() - if not processor.is_available(): - code = "import os" - relationships, alias_map = processor.extract_imports(code, "test.py") - assert relationships == [] - assert alias_map == {} - - -class TestHelperMethods: - """Tests for internal helper methods.""" - - def test_parse_base_classes_single(self): - """Test _parse_base_classes with single base.""" - processor = AstGrepPythonProcessor() - result = processor._parse_base_classes("BaseClass") - assert result == ["BaseClass"] - - def test_parse_base_classes_multiple(self): - """Test _parse_base_classes with multiple bases.""" - processor = AstGrepPythonProcessor() - result = processor._parse_base_classes("A, B, C") - assert result == ["A", "B", "C"] - - def test_parse_base_classes_with_generics(self): - """Test _parse_base_classes with generic types.""" - processor = AstGrepPythonProcessor() - result = processor._parse_base_classes("Generic[T], Mixin") - assert "Generic[T]" in result - assert "Mixin" in result - - def test_resolve_call_alias_simple(self): - """Test _resolve_call_alias with simple name.""" - processor = AstGrepPythonProcessor() - alias_map = {"np": "numpy"} - result = processor._resolve_call_alias("np", alias_map) - assert result == "numpy" - - def test_resolve_call_alias_qualified(self): - """Test _resolve_call_alias with qualified name.""" - processor = AstGrepPythonProcessor() - alias_map = {"np": "numpy"} - result = processor._resolve_call_alias("np.array", alias_map) - assert result == "numpy.array" - - def test_resolve_call_alias_no_match(self): - """Test _resolve_call_alias when no alias exists.""" - processor = AstGrepPythonProcessor() - alias_map = {} - result = processor._resolve_call_alias("myfunc", alias_map) - assert result == "myfunc" diff --git a/codex-lens/tests/parsers/test_astgrep_processor.py b/codex-lens/tests/parsers/test_astgrep_processor.py deleted file mode 100644 index ba10b83e..00000000 --- a/codex-lens/tests/parsers/test_astgrep_processor.py +++ /dev/null @@ -1,402 +0,0 @@ -"""Tests for AstGrepPythonProcessor. - -Tests pattern-based relationship extraction from Python source code -using ast-grep-py bindings. -""" - -from pathlib import Path - -import pytest - -from codexlens.parsers.astgrep_processor import ( - AstGrepPythonProcessor, - BaseAstGrepProcessor, - is_astgrep_processor_available, -) -from codexlens.parsers.patterns.python import ( - PYTHON_PATTERNS, - METAVARS, - RELATIONSHIP_PATTERNS, - get_pattern, - get_patterns_for_relationship, - get_metavar, -) - - -# Check if ast-grep is available for conditional test skipping -ASTGREP_AVAILABLE = is_astgrep_processor_available() - - -class TestPatternDefinitions: - """Tests for Python pattern definitions.""" - - def test_python_patterns_exist(self): - """Verify all expected patterns are defined.""" - expected_patterns = [ - "class_def", - "class_with_bases", - "func_def", - "async_func_def", - "import_stmt", - "import_from", - "call", - "method_call", - ] - for pattern_name in expected_patterns: - assert pattern_name in PYTHON_PATTERNS, f"Missing pattern: {pattern_name}" - - def test_get_pattern_returns_correct_pattern(self): - """Test get_pattern returns expected pattern strings.""" - # Note: ast-grep-py 0.40+ uses $$$ for zero-or-more multi-match - assert get_pattern("class_def") == "class $NAME $$$BODY" - assert get_pattern("func_def") == "def $NAME($$$PARAMS): $$$BODY" - assert get_pattern("import_stmt") == "import $MODULE" - - def test_get_pattern_raises_for_unknown(self): - """Test get_pattern raises KeyError for unknown patterns.""" - with pytest.raises(KeyError): - get_pattern("nonexistent_pattern") - - def test_metavars_defined(self): - """Verify metavariable mappings are defined.""" - expected_metavars = [ - "class_name", - "func_name", - "import_module", - "call_func", - ] - for var in expected_metavars: - assert var in METAVARS, f"Missing metavar: {var}" - - def test_get_metavar(self): - """Test get_metavar returns correct values.""" - assert get_metavar("class_name") == "NAME" - assert get_metavar("func_name") == "NAME" - assert get_metavar("import_module") == "MODULE" - - def test_relationship_patterns_mapping(self): - """Test relationship type to pattern mapping.""" - assert "class_with_bases" in get_patterns_for_relationship("inheritance") - assert "import_stmt" in get_patterns_for_relationship("imports") - assert "import_from" in get_patterns_for_relationship("imports") - assert "call" in get_patterns_for_relationship("calls") - - -class TestAstGrepPythonProcessorAvailability: - """Tests for processor availability.""" - - def test_is_available_returns_bool(self): - """Test is_available returns a boolean.""" - processor = AstGrepPythonProcessor() - assert isinstance(processor.is_available(), bool) - - def test_is_available_matches_global_check(self): - """Test is_available matches is_astgrep_processor_available.""" - processor = AstGrepPythonProcessor() - assert processor.is_available() == is_astgrep_processor_available() - - def test_module_level_check(self): - """Test module-level availability function.""" - assert isinstance(is_astgrep_processor_available(), bool) - - -@pytest.mark.skipif(not ASTGREP_AVAILABLE, reason="ast-grep-py not installed") -class TestAstGrepPythonProcessorParsing: - """Tests for Python parsing with ast-grep.""" - - def test_parse_simple_function(self): - """Test parsing a simple function definition.""" - processor = AstGrepPythonProcessor() - code = "def hello():\n pass" - result = processor.parse(code, Path("test.py")) - - assert result is not None - assert result.language == "python" - assert len(result.symbols) == 1 - assert result.symbols[0].name == "hello" - assert result.symbols[0].kind == "function" - - def test_parse_class(self): - """Test parsing a class definition.""" - processor = AstGrepPythonProcessor() - code = "class MyClass:\n pass" - result = processor.parse(code, Path("test.py")) - - assert result is not None - assert len(result.symbols) == 1 - assert result.symbols[0].name == "MyClass" - assert result.symbols[0].kind == "class" - - def test_parse_async_function(self): - """Test parsing an async function definition.""" - processor = AstGrepPythonProcessor() - code = "async def fetch_data():\n pass" - result = processor.parse(code, Path("test.py")) - - assert result is not None - assert len(result.symbols) == 1 - assert result.symbols[0].name == "fetch_data" - - def test_parse_class_with_inheritance(self): - """Test parsing class with inheritance.""" - processor = AstGrepPythonProcessor() - code = """ -class Base: - pass - -class Child(Base): - pass -""" - result = processor.parse(code, Path("test.py")) - - assert result is not None - names = [s.name for s in result.symbols] - assert "Base" in names - assert "Child" in names - - # Check inheritance relationship - inherits = [ - r for r in result.relationships - if r.relationship_type.value == "inherits" - ] - assert any(r.source_symbol == "Child" for r in inherits) - - def test_parse_imports(self): - """Test parsing import statements.""" - processor = AstGrepPythonProcessor() - code = """ -import os -from sys import path -""" - result = processor.parse(code, Path("test.py")) - - assert result is not None - imports = [ - r for r in result.relationships - if r.relationship_type.value == "imports" - ] - assert len(imports) >= 1 - targets = {r.target_symbol for r in imports} - assert "os" in targets - - def test_parse_function_calls(self): - """Test parsing function calls.""" - processor = AstGrepPythonProcessor() - code = """ -def main(): - print("hello") - len([1, 2, 3]) -""" - result = processor.parse(code, Path("test.py")) - - assert result is not None - calls = [ - r for r in result.relationships - if r.relationship_type.value == "calls" - ] - targets = {r.target_symbol for r in calls} - assert "print" in targets - assert "len" in targets - - def test_parse_empty_file(self): - """Test parsing an empty file.""" - processor = AstGrepPythonProcessor() - result = processor.parse("", Path("test.py")) - - assert result is not None - assert len(result.symbols) == 0 - - def test_parse_returns_indexed_file(self): - """Test that parse returns proper IndexedFile structure.""" - processor = AstGrepPythonProcessor() - code = "def test():\n pass" - result = processor.parse(code, Path("test.py")) - - assert result is not None - assert result.path.endswith("test.py") - assert result.language == "python" - assert isinstance(result.symbols, list) - assert isinstance(result.chunks, list) - assert isinstance(result.relationships, list) - - -@pytest.mark.skipif(not ASTGREP_AVAILABLE, reason="ast-grep-py not installed") -class TestAstGrepPythonProcessorRelationships: - """Tests for relationship extraction.""" - - def test_inheritance_extraction(self): - """Test extraction of inheritance relationships.""" - processor = AstGrepPythonProcessor() - code = """ -class Animal: - pass - -class Dog(Animal): - pass - -class Cat(Animal): - pass -""" - result = processor.parse(code, Path("test.py")) - - assert result is not None - inherits = [ - r for r in result.relationships - if r.relationship_type.value == "inherits" - ] - # Should have 2 inheritance relationships - assert len(inherits) >= 2 - sources = {r.source_symbol for r in inherits} - assert "Dog" in sources - assert "Cat" in sources - - def test_call_extraction_skips_self(self): - """Test that self.method() calls are filtered.""" - processor = AstGrepPythonProcessor() - code = """ -class Service: - def process(self): - self.internal() - external_call() - -def external_call(): - pass -""" - result = processor.parse(code, Path("test.py")) - - assert result is not None - calls = [ - r for r in result.relationships - if r.relationship_type.value == "calls" - ] - targets = {r.target_symbol for r in calls} - # self.internal should be filtered - assert "self.internal" not in targets - assert "external_call" in targets - - def test_import_with_alias_resolution(self): - """Test import alias resolution in calls.""" - processor = AstGrepPythonProcessor() - code = """ -import os.path as osp - -def main(): - osp.join("a", "b") -""" - result = processor.parse(code, Path("test.py")) - - assert result is not None - calls = [ - r for r in result.relationships - if r.relationship_type.value == "calls" - ] - targets = {r.target_symbol for r in calls} - # Should resolve osp to os.path - assert any("os.path" in t for t in targets) - - -@pytest.mark.skipif(not ASTGREP_AVAILABLE, reason="ast-grep-py not installed") -class TestAstGrepPythonProcessorRunAstGrep: - """Tests for run_ast_grep method.""" - - def test_run_ast_grep_returns_list(self): - """Test run_ast_grep returns a list.""" - processor = AstGrepPythonProcessor() - code = "def hello():\n pass" - processor._binding.parse(code) if processor._binding else None - - matches = processor.run_ast_grep(code, "def $NAME($$$PARAMS) $$$BODY") - assert isinstance(matches, list) - - def test_run_ast_grep_finds_matches(self): - """Test run_ast_grep finds expected matches.""" - processor = AstGrepPythonProcessor() - code = "def hello():\n pass" - - matches = processor.run_ast_grep(code, "def $NAME($$$PARAMS) $$$BODY") - assert len(matches) >= 1 - - def test_run_ast_grep_empty_code(self): - """Test run_ast_grep with empty code.""" - processor = AstGrepPythonProcessor() - matches = processor.run_ast_grep("", "def $NAME($$$PARAMS) $$$BODY") - assert matches == [] - - def test_run_ast_grep_no_matches(self): - """Test run_ast_grep when pattern doesn't match.""" - processor = AstGrepPythonProcessor() - code = "x = 1" - matches = processor.run_ast_grep(code, "class $NAME $$$BODY") - assert matches == [] - - -class TestAstGrepPythonProcessorFallback: - """Tests for fallback behavior when ast-grep unavailable.""" - - def test_parse_returns_none_when_unavailable(self): - """Test parse returns None when ast-grep unavailable.""" - # This test runs regardless of availability - # When unavailable, should gracefully return None - processor = AstGrepPythonProcessor() - if not processor.is_available(): - code = "def test():\n pass" - result = processor.parse(code, Path("test.py")) - assert result is None - - def test_run_ast_grep_empty_when_unavailable(self): - """Test run_ast_grep returns empty list when unavailable.""" - processor = AstGrepPythonProcessor() - if not processor.is_available(): - matches = processor.run_ast_grep("code", "pattern") - assert matches == [] - - -class TestBaseAstGrepProcessor: - """Tests for abstract base class.""" - - def test_cannot_instantiate_base_class(self): - """Test that BaseAstGrepProcessor cannot be instantiated directly.""" - with pytest.raises(TypeError): - BaseAstGrepProcessor("python") # type: ignore[abstract] - - def test_subclass_implements_abstract_methods(self): - """Test that AstGrepPythonProcessor implements all abstract methods.""" - processor = AstGrepPythonProcessor() - # Should have process_matches method - assert hasattr(processor, "process_matches") - # Should have parse method - assert hasattr(processor, "parse") - # Check methods are callable - assert callable(processor.process_matches) - assert callable(processor.parse) - - -class TestPatternIntegration: - """Tests for pattern module integration with processor.""" - - def test_processor_uses_pattern_module(self): - """Verify processor uses patterns from pattern module.""" - # The processor should import and use patterns from patterns/python/ - from codexlens.parsers.astgrep_processor import get_pattern - - # Verify pattern access works - assert get_pattern("class_def") is not None - assert get_pattern("func_def") is not None - - def test_pattern_consistency(self): - """Test pattern definitions are consistent.""" - # Patterns used by processor should exist in pattern module - patterns_needed = [ - "class_def", - "class_with_bases", - "func_def", - "async_func_def", - "import_stmt", - "import_from", - "call", - ] - for pattern_name in patterns_needed: - # Should not raise KeyError - pattern = get_pattern(pattern_name) - assert pattern is not None - assert len(pattern) > 0 diff --git a/codex-lens/tests/parsers/test_comparison.py b/codex-lens/tests/parsers/test_comparison.py deleted file mode 100644 index 9c9840a0..00000000 --- a/codex-lens/tests/parsers/test_comparison.py +++ /dev/null @@ -1,525 +0,0 @@ -"""Comparison tests for tree-sitter vs ast-grep Python relationship extraction. - -Validates that both parsers produce consistent output for Python relationship -extraction (INHERITS, CALL, IMPORTS). -""" - -from __future__ import annotations - -from pathlib import Path -from typing import List, Set, Tuple - -import pytest - -from codexlens.config import Config -from codexlens.entities import CodeRelationship, RelationshipType -from codexlens.parsers.treesitter_parser import TreeSitterSymbolParser - - -# Sample Python code for testing relationship extraction -SAMPLE_PYTHON_CODE = ''' -"""Module docstring.""" -import os -import sys -from typing import List, Dict, Optional -from collections import defaultdict as dd -from pathlib import Path as PPath - -class BaseClass: - """Base class.""" - - def base_method(self): - pass - - def another_method(self): - return self.base_method() - - -class Mixin: - """Mixin class.""" - - def mixin_func(self): - return "mixin" - - -class ChildClass(BaseClass, Mixin): - """Child class with multiple inheritance.""" - - def __init__(self): - super().__init__() - self.data = dd(list) - - def process(self, items: List[str]) -> Dict[str, int]: - result = {} - for item in items: - result[item] = len(item) - return result - - def call_external(self, path: str) -> Optional[str]: - p = PPath(path) - if p.exists(): - return str(p.read_text()) - return None - - -def standalone_function(): - """Standalone function.""" - data = [1, 2, 3] - return sum(data) - - -async def async_function(): - """Async function.""" - import asyncio - await asyncio.sleep(1) -''' - - -def relationship_to_tuple(rel: CodeRelationship) -> Tuple[str, str, str, int]: - """Convert relationship to a comparable tuple. - - Returns: - (source_symbol, target_symbol, relationship_type, source_line) - """ - return ( - rel.source_symbol, - rel.target_symbol, - rel.relationship_type.value, - rel.source_line, - ) - - -def extract_relationship_tuples( - relationships: List[CodeRelationship], -) -> Set[Tuple[str, str, str]]: - """Extract relationship tuples without line numbers for comparison. - - Returns: - Set of (source_symbol, target_symbol, relationship_type) tuples - """ - return { - (rel.source_symbol, rel.target_symbol, rel.relationship_type.value) - for rel in relationships - } - - -def filter_by_type( - relationships: List[CodeRelationship], - rel_type: RelationshipType, -) -> List[CodeRelationship]: - """Filter relationships by type.""" - return [r for r in relationships if r.relationship_type == rel_type] - - -class TestTreeSitterVsAstGrepComparison: - """Compare tree-sitter and ast-grep Python relationship extraction.""" - - @pytest.fixture - def sample_path(self, tmp_path: Path) -> Path: - """Create a temporary Python file with sample code.""" - py_file = tmp_path / "sample.py" - py_file.write_text(SAMPLE_PYTHON_CODE) - return py_file - - @pytest.fixture - def ts_parser_default(self) -> TreeSitterSymbolParser: - """Create tree-sitter parser with default config (use_astgrep=False).""" - config = Config() - assert config.use_astgrep is False - return TreeSitterSymbolParser("python", config=config) - - @pytest.fixture - def ts_parser_astgrep(self) -> TreeSitterSymbolParser: - """Create tree-sitter parser with ast-grep enabled.""" - config = Config() - config.use_astgrep = True - return TreeSitterSymbolParser("python", config=config) - - def test_parser_availability(self, ts_parser_default: TreeSitterSymbolParser) -> None: - """Test that tree-sitter parser is available.""" - assert ts_parser_default.is_available() - - def test_astgrep_processor_initialization( - self, ts_parser_astgrep: TreeSitterSymbolParser - ) -> None: - """Test that ast-grep processor is initialized when config enables it.""" - # The processor should be initialized (may be None if ast-grep-py not installed) - # This test just verifies the initialization path works - assert ts_parser_astgrep._config is not None - assert ts_parser_astgrep._config.use_astgrep is True - - def _skip_if_astgrep_unavailable( - self, ts_parser_astgrep: TreeSitterSymbolParser - ) -> None: - """Skip test if ast-grep is not available.""" - if ts_parser_astgrep._astgrep_processor is None: - pytest.skip("ast-grep-py not installed") - - def test_parse_returns_valid_result( - self, - ts_parser_default: TreeSitterSymbolParser, - sample_path: Path, - ) -> None: - """Test that parsing returns a valid IndexedFile.""" - source_code = sample_path.read_text() - result = ts_parser_default.parse(source_code, sample_path) - - assert result is not None - assert result.language == "python" - assert len(result.symbols) > 0 - assert len(result.relationships) > 0 - - def test_extracted_symbols_match( - self, - ts_parser_default: TreeSitterSymbolParser, - ts_parser_astgrep: TreeSitterSymbolParser, - sample_path: Path, - ) -> None: - """Test that both parsers extract similar symbols.""" - self._skip_if_astgrep_unavailable(ts_parser_astgrep) - - source_code = sample_path.read_text() - - result_ts = ts_parser_default.parse(source_code, sample_path) - result_astgrep = ts_parser_astgrep.parse(source_code, sample_path) - - assert result_ts is not None - assert result_astgrep is not None - - # Compare symbol names - ts_symbols = {s.name for s in result_ts.symbols} - astgrep_symbols = {s.name for s in result_astgrep.symbols} - - # Should have the same symbols (classes, functions, methods) - assert ts_symbols == astgrep_symbols - - def test_inheritance_relationships( - self, - ts_parser_default: TreeSitterSymbolParser, - ts_parser_astgrep: TreeSitterSymbolParser, - sample_path: Path, - ) -> None: - """Test INHERITS relationship extraction consistency.""" - self._skip_if_astgrep_unavailable(ts_parser_astgrep) - - source_code = sample_path.read_text() - - result_ts = ts_parser_default.parse(source_code, sample_path) - result_astgrep = ts_parser_astgrep.parse(source_code, sample_path) - - assert result_ts is not None - assert result_astgrep is not None - - # Extract inheritance relationships - ts_inherits = filter_by_type(result_ts.relationships, RelationshipType.INHERITS) - astgrep_inherits = filter_by_type( - result_astgrep.relationships, RelationshipType.INHERITS - ) - - ts_tuples = extract_relationship_tuples(ts_inherits) - astgrep_tuples = extract_relationship_tuples(astgrep_inherits) - - # Both should detect ChildClass(BaseClass, Mixin) - assert ts_tuples == astgrep_tuples - - # Verify specific inheritance relationships - expected_inherits = { - ("ChildClass", "BaseClass", "inherits"), - ("ChildClass", "Mixin", "inherits"), - } - assert ts_tuples == expected_inherits - - def test_import_relationships( - self, - ts_parser_default: TreeSitterSymbolParser, - ts_parser_astgrep: TreeSitterSymbolParser, - sample_path: Path, - ) -> None: - """Test IMPORTS relationship extraction consistency.""" - self._skip_if_astgrep_unavailable(ts_parser_astgrep) - - source_code = sample_path.read_text() - - result_ts = ts_parser_default.parse(source_code, sample_path) - result_astgrep = ts_parser_astgrep.parse(source_code, sample_path) - - assert result_ts is not None - assert result_astgrep is not None - - # Extract import relationships - ts_imports = filter_by_type(result_ts.relationships, RelationshipType.IMPORTS) - astgrep_imports = filter_by_type( - result_astgrep.relationships, RelationshipType.IMPORTS - ) - - ts_tuples = extract_relationship_tuples(ts_imports) - astgrep_tuples = extract_relationship_tuples(astgrep_imports) - - # Compare - should be similar (may differ in exact module representation) - # At minimum, both should detect the top-level imports - ts_modules = {t[1].split(".")[0] for t in ts_tuples} - astgrep_modules = {t[1].split(".")[0] for t in astgrep_tuples} - - # Should have imports from: os, sys, typing, collections, pathlib - expected_modules = {"os", "sys", "typing", "collections", "pathlib", "asyncio"} - assert ts_modules >= expected_modules or astgrep_modules >= expected_modules - - def test_call_relationships( - self, - ts_parser_default: TreeSitterSymbolParser, - ts_parser_astgrep: TreeSitterSymbolParser, - sample_path: Path, - ) -> None: - """Test CALL relationship extraction consistency.""" - self._skip_if_astgrep_unavailable(ts_parser_astgrep) - - source_code = sample_path.read_text() - - result_ts = ts_parser_default.parse(source_code, sample_path) - result_astgrep = ts_parser_astgrep.parse(source_code, sample_path) - - assert result_ts is not None - assert result_astgrep is not None - - # Extract call relationships - ts_calls = filter_by_type(result_ts.relationships, RelationshipType.CALL) - astgrep_calls = filter_by_type( - result_astgrep.relationships, RelationshipType.CALL - ) - - # Calls may differ due to scope tracking differences - # Just verify both parsers find call relationships - assert len(ts_calls) > 0 - assert len(astgrep_calls) > 0 - - # Verify specific calls that should be detected - ts_call_targets = {r.target_symbol for r in ts_calls} - astgrep_call_targets = {r.target_symbol for r in astgrep_calls} - - # Both should detect at least some common calls - # (exact match not required due to scope tracking differences) - common_targets = ts_call_targets & astgrep_call_targets - assert len(common_targets) > 0 - - def test_relationship_count_similarity( - self, - ts_parser_default: TreeSitterSymbolParser, - ts_parser_astgrep: TreeSitterSymbolParser, - sample_path: Path, - ) -> None: - """Test that relationship counts are similar (>95% consistency).""" - self._skip_if_astgrep_unavailable(ts_parser_astgrep) - - source_code = sample_path.read_text() - - result_ts = ts_parser_default.parse(source_code, sample_path) - result_astgrep = ts_parser_astgrep.parse(source_code, sample_path) - - assert result_ts is not None - assert result_astgrep is not None - - ts_count = len(result_ts.relationships) - astgrep_count = len(result_astgrep.relationships) - - # Calculate consistency percentage - if max(ts_count, astgrep_count) == 0: - consistency = 100.0 - else: - consistency = ( - min(ts_count, astgrep_count) / max(ts_count, astgrep_count) * 100 - ) - - # Require >95% consistency - assert consistency >= 95.0, ( - f"Relationship consistency {consistency:.1f}% below 95% threshold " - f"(tree-sitter: {ts_count}, ast-grep: {astgrep_count})" - ) - - def test_config_switch_affects_parser( - self, sample_path: Path - ) -> None: - """Test that config.use_astgrep affects which parser is used.""" - config_default = Config() - config_astgrep = Config() - config_astgrep.use_astgrep = True - - parser_default = TreeSitterSymbolParser("python", config=config_default) - parser_astgrep = TreeSitterSymbolParser("python", config=config_astgrep) - - # Default parser should not have ast-grep processor - assert parser_default._astgrep_processor is None - - # Ast-grep parser may have processor if ast-grep-py is installed - # (could be None if not installed, which is fine) - if parser_astgrep._astgrep_processor is not None: - # If available, verify it's the right type - from codexlens.parsers.astgrep_processor import AstGrepPythonProcessor - - assert isinstance( - parser_astgrep._astgrep_processor, AstGrepPythonProcessor - ) - - def test_fallback_to_treesitter_on_astgrep_failure( - self, - ts_parser_astgrep: TreeSitterSymbolParser, - sample_path: Path, - ) -> None: - """Test that parser falls back to tree-sitter if ast-grep fails.""" - source_code = sample_path.read_text() - - # Even with use_astgrep=True, should get valid results - result = ts_parser_astgrep.parse(source_code, sample_path) - - # Should always return a valid result (either from ast-grep or tree-sitter fallback) - assert result is not None - assert result.language == "python" - assert len(result.relationships) > 0 - - -class TestSimpleCodeSamples: - """Test with simple code samples for precise comparison.""" - - def test_simple_inheritance(self) -> None: - """Test simple single inheritance.""" - code = """ -class Parent: - pass - -class Child(Parent): - pass -""" - self._compare_parsers(code, expected_inherits={("Child", "Parent")}) - - def test_multiple_inheritance(self) -> None: - """Test multiple inheritance.""" - code = """ -class A: - pass - -class B: - pass - -class C(A, B): - pass -""" - self._compare_parsers( - code, expected_inherits={("C", "A"), ("C", "B")} - ) - - def test_simple_imports(self) -> None: - """Test simple import statements.""" - code = """ -import os -import sys -""" - config_ts = Config() - config_ag = Config() - config_ag.use_astgrep = True - - parser_ts = TreeSitterSymbolParser("python", config=config_ts) - parser_ag = TreeSitterSymbolParser("python", config=config_ag) - - tmp_path = Path("test.py") - result_ts = parser_ts.parse(code, tmp_path) - result_ag = parser_ag.parse(code, tmp_path) - - assert result_ts is not None - # ast-grep result may be None if not installed - - if result_ag is not None: - ts_imports = { - r.target_symbol - for r in result_ts.relationships - if r.relationship_type == RelationshipType.IMPORTS - } - ag_imports = { - r.target_symbol - for r in result_ag.relationships - if r.relationship_type == RelationshipType.IMPORTS - } - assert ts_imports == ag_imports - - def test_imports_inside_function(self) -> None: - """Test simple import inside a function scope is recorded. - - Note: module-level imports are recorded under a synthetic "" scope. - This test ensures imports inside a function scope are also recorded. - """ - code = """ -def my_function(): - import collections - return collections -""" - config_ts = Config() - config_ag = Config() - config_ag.use_astgrep = True - - parser_ts = TreeSitterSymbolParser("python", config=config_ts) - parser_ag = TreeSitterSymbolParser("python", config=config_ag) - - tmp_path = Path("test.py") - result_ts = parser_ts.parse(code, tmp_path) - result_ag = parser_ag.parse(code, tmp_path) - - assert result_ts is not None - - # Get import relationship targets - ts_imports = [ - r.target_symbol - for r in result_ts.relationships - if r.relationship_type == RelationshipType.IMPORTS - ] - - # Should have collections - ts_has_collections = any("collections" in t for t in ts_imports) - assert ts_has_collections, f"Expected collections import, got: {ts_imports}" - - # If ast-grep is available, verify it also finds the imports - if result_ag is not None: - ag_imports = [ - r.target_symbol - for r in result_ag.relationships - if r.relationship_type == RelationshipType.IMPORTS - ] - ag_has_collections = any("collections" in t for t in ag_imports) - assert ag_has_collections, f"Expected collections import in ast-grep, got: {ag_imports}" - - def _compare_parsers( - self, - code: str, - expected_inherits: Set[Tuple[str, str]], - ) -> None: - """Helper to compare parser outputs for inheritance.""" - config_ts = Config() - config_ag = Config() - config_ag.use_astgrep = True - - parser_ts = TreeSitterSymbolParser("python", config=config_ts) - parser_ag = TreeSitterSymbolParser("python", config=config_ag) - - tmp_path = Path("test.py") - result_ts = parser_ts.parse(code, tmp_path) - - assert result_ts is not None - - # Verify tree-sitter finds expected inheritance - ts_inherits = { - (r.source_symbol, r.target_symbol) - for r in result_ts.relationships - if r.relationship_type == RelationshipType.INHERITS - } - assert ts_inherits == expected_inherits - - # If ast-grep is available, verify it matches - result_ag = parser_ag.parse(code, tmp_path) - if result_ag is not None: - ag_inherits = { - (r.source_symbol, r.target_symbol) - for r in result_ag.relationships - if r.relationship_type == RelationshipType.INHERITS - } - assert ag_inherits == expected_inherits - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/codex-lens/tests/parsers/test_comparison_js_ts.py b/codex-lens/tests/parsers/test_comparison_js_ts.py deleted file mode 100644 index a5a1d1b6..00000000 --- a/codex-lens/tests/parsers/test_comparison_js_ts.py +++ /dev/null @@ -1,150 +0,0 @@ -"""Comparison tests for tree-sitter vs ast-grep JS/TS relationship extraction. - -These tests focus on stable, high-signal relationship types used by the -static graph pipeline: -- IMPORTS -- INHERITS - -If ast-grep-py is not installed, tests are skipped. -""" - -from __future__ import annotations - -from pathlib import Path -from typing import List, Set, Tuple - -import pytest - -from codexlens.config import Config -from codexlens.entities import CodeRelationship, RelationshipType -from codexlens.parsers.treesitter_parser import TreeSitterSymbolParser - - -SAMPLE_JS_CODE = """ -import React, { useEffect as useEf } from "react"; -import { foo } from "./foo"; -import "./styles.css"; -const fs = require("fs"); - -class Base {} -class Child extends Base { - method() { - console.log("hi"); - } -} -""" - - -SAMPLE_TS_CODE = """ -import type { Foo } from "pkg"; -import { bar as baz } from "./bar"; - -interface MyInterface extends Foo {} - -class Base {} -class Child extends Base {} -""" - - -def extract_relationship_tuples( - relationships: List[CodeRelationship], - *, - only_types: Set[RelationshipType], -) -> Set[Tuple[str, str, str]]: - return { - (rel.source_symbol, rel.target_symbol, rel.relationship_type.value) - for rel in relationships - if rel.relationship_type in only_types - } - - -def _skip_if_astgrep_unavailable(parser: TreeSitterSymbolParser) -> None: - if parser._astgrep_processor is None or not parser._astgrep_processor.is_available(): # type: ignore[attr-defined] - pytest.skip("ast-grep-py not installed or language not supported") - - -def test_js_imports_and_inherits_match(tmp_path: Path) -> None: - js_file = tmp_path / "sample.js" - js_file.write_text(SAMPLE_JS_CODE, encoding="utf-8") - source = js_file.read_text(encoding="utf-8") - - config_default = Config() - config_default.use_astgrep = False - ts_default = TreeSitterSymbolParser("javascript", js_file, config=config_default) - - config_ast = Config() - config_ast.use_astgrep = True - ts_ast = TreeSitterSymbolParser("javascript", js_file, config=config_ast) - _skip_if_astgrep_unavailable(ts_ast) - - result_ts = ts_default.parse(source, js_file) - result_ast = ts_ast.parse(source, js_file) - - assert result_ts is not None - assert result_ast is not None - - ts_imports = extract_relationship_tuples( - result_ts.relationships, - only_types={RelationshipType.IMPORTS}, - ) - ast_imports = extract_relationship_tuples( - result_ast.relationships, - only_types={RelationshipType.IMPORTS}, - ) - assert ast_imports == ts_imports - - ts_inherits = extract_relationship_tuples( - result_ts.relationships, - only_types={RelationshipType.INHERITS}, - ) - ast_inherits = extract_relationship_tuples( - result_ast.relationships, - only_types={RelationshipType.INHERITS}, - ) - # Ast-grep may include inheritance edges that the tree-sitter extractor does not currently emit. - assert ts_inherits.issubset(ast_inherits) - assert ("Child", "Base", "inherits") in ast_inherits - - -def test_ts_imports_match_and_inherits_superset(tmp_path: Path) -> None: - ts_file = tmp_path / "sample.ts" - ts_file.write_text(SAMPLE_TS_CODE, encoding="utf-8") - source = ts_file.read_text(encoding="utf-8") - - config_default = Config() - config_default.use_astgrep = False - ts_default = TreeSitterSymbolParser("typescript", ts_file, config=config_default) - - config_ast = Config() - config_ast.use_astgrep = True - ts_ast = TreeSitterSymbolParser("typescript", ts_file, config=config_ast) - _skip_if_astgrep_unavailable(ts_ast) - - result_ts = ts_default.parse(source, ts_file) - result_ast = ts_ast.parse(source, ts_file) - - assert result_ts is not None - assert result_ast is not None - - ts_imports = extract_relationship_tuples( - result_ts.relationships, - only_types={RelationshipType.IMPORTS}, - ) - ast_imports = extract_relationship_tuples( - result_ast.relationships, - only_types={RelationshipType.IMPORTS}, - ) - assert ast_imports == ts_imports - - ts_inherits = extract_relationship_tuples( - result_ts.relationships, - only_types={RelationshipType.INHERITS}, - ) - ast_inherits = extract_relationship_tuples( - result_ast.relationships, - only_types={RelationshipType.INHERITS}, - ) - # Ast-grep may include additional TypeScript inheritance edges (e.g., interface extends). - assert ts_inherits.issubset(ast_inherits) - # But at minimum, class inheritance should be present. - assert ("Child", "Base", "inherits") in ast_inherits diff --git a/codex-lens/tests/real/__init__.py b/codex-lens/tests/real/__init__.py deleted file mode 100644 index da6c5ff3..00000000 --- a/codex-lens/tests/real/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -"""Real interface tests for LSP integration. - -These tests require VSCode Bridge to be running. -See test_lsp_real_interface.py for details. -""" diff --git a/codex-lens/tests/real/comparison_test.py b/codex-lens/tests/real/comparison_test.py deleted file mode 100644 index da19a601..00000000 --- a/codex-lens/tests/real/comparison_test.py +++ /dev/null @@ -1,162 +0,0 @@ -#!/usr/bin/env python -"""Direct comparison: standalone manager vs direct subprocess.""" - -import asyncio -import json -import sys -from pathlib import Path - -sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src")) - -async def test_direct(): - """Direct subprocess test that WORKS.""" - print("\n=== DIRECT SUBPROCESS TEST ===") - - process = await asyncio.create_subprocess_exec( - 'pyright-langserver', '--stdio', - stdin=asyncio.subprocess.PIPE, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - cwd=str(Path(__file__).parent.parent.parent), - ) - - def encode(msg): - body = json.dumps(msg).encode('utf-8') - header = f'Content-Length: {len(body)}\r\n\r\n'.encode('ascii') - return header + body - - async def read_message(timeout=5.0): - content_length = 0 - while True: - try: - line = await asyncio.wait_for(process.stdout.readline(), timeout=timeout) - except asyncio.TimeoutError: - return None - if not line: - return None - line_str = line.decode('ascii').strip() - if not line_str: - break - if line_str.lower().startswith('content-length:'): - content_length = int(line_str.split(':')[1].strip()) - if content_length == 0: - return None - body = await process.stdout.readexactly(content_length) - return json.loads(body.decode('utf-8')) - - # Initialize - init = { - 'jsonrpc': '2.0', 'id': 1, 'method': 'initialize', - 'params': { - 'processId': 12345, - 'rootUri': 'file:///D:/Claude_dms3/codex-lens', - 'rootPath': 'D:/Claude_dms3/codex-lens', - 'capabilities': { - 'textDocument': { - 'synchronization': {'dynamicRegistration': False}, - 'documentSymbol': {'hierarchicalDocumentSymbolSupport': True}, - }, - 'workspace': {'configuration': True, 'workspaceFolders': True}, - }, - 'workspaceFolders': [{'uri': 'file:///D:/Claude_dms3/codex-lens', 'name': 'codex-lens'}], - 'initializationOptions': {}, - } - } - process.stdin.write(encode(init)) - await process.stdin.drain() - - while True: - msg = await read_message(5.0) - if msg is None or msg.get('id') == 1: - print(f" Got initialize response") - break - - # Initialized - process.stdin.write(encode({'jsonrpc': '2.0', 'method': 'initialized', 'params': {}})) - await process.stdin.drain() - print(" Sent initialized") - - # didOpen with simple content - did_open = { - 'jsonrpc': '2.0', 'method': 'textDocument/didOpen', - 'params': { - 'textDocument': { - 'uri': 'file:///D:/Claude_dms3/codex-lens/simple.py', - 'languageId': 'python', - 'version': 1, - 'text': 'def hello():\n pass\n' - } - } - } - process.stdin.write(encode(did_open)) - await process.stdin.drain() - print(" Sent didOpen") - - # Read and respond to configuration requests - print(" Waiting for messages...") - for i in range(15): - msg = await read_message(2.0) - if msg is None: - continue - method = msg.get('method') - print(f" RECV: id={msg.get('id')}, method={method}") - if method == 'workspace/configuration': - process.stdin.write(encode({'jsonrpc': '2.0', 'id': msg['id'], 'result': [{}]})) - await process.stdin.drain() - if method == 'textDocument/publishDiagnostics': - break - - # documentSymbol - doc_sym = { - 'jsonrpc': '2.0', 'id': 2, 'method': 'textDocument/documentSymbol', - 'params': {'textDocument': {'uri': 'file:///D:/Claude_dms3/codex-lens/simple.py'}} - } - process.stdin.write(encode(doc_sym)) - await process.stdin.drain() - print(" Sent documentSymbol") - - for i in range(5): - msg = await read_message(3.0) - if msg is None: - continue - if msg.get('id') == 2: - result = msg.get('result', []) - print(f" GOT {len(result)} SYMBOLS!") - break - - process.terminate() - await process.wait() - - -async def test_manager(): - """Standalone manager test that FAILS.""" - print("\n=== STANDALONE MANAGER TEST ===") - - from codexlens.lsp.standalone_manager import StandaloneLspManager - - workspace = Path(__file__).parent.parent.parent - manager = StandaloneLspManager( - workspace_root=str(workspace), - timeout=30.0 - ) - - await manager.start() - - simple_file = workspace / "simple.py" - simple_file.write_text('def hello():\n pass\n') - - try: - symbols = await manager.get_document_symbols(str(simple_file)) - print(f" GOT {len(symbols)} SYMBOLS!") - finally: - simple_file.unlink(missing_ok=True) - await manager.stop() - - -async def main(): - await test_direct() - await test_manager() - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/codex-lens/tests/real/concurrent_test.py b/codex-lens/tests/real/concurrent_test.py deleted file mode 100644 index 08ba4162..00000000 --- a/codex-lens/tests/real/concurrent_test.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env python -"""Test concurrent read loop behavior.""" - -import asyncio -import sys -from pathlib import Path - -sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src")) - -import logging -logging.basicConfig(level=logging.DEBUG, format='%(name)s - %(levelname)s - %(message)s') - -from codexlens.lsp.standalone_manager import StandaloneLspManager - -async def test(): - workspace = Path(__file__).parent.parent.parent - manager = StandaloneLspManager( - workspace_root=str(workspace), - timeout=30.0 - ) - - await manager.start() - - # Get server for a simple file - simple_content = "def hello():\n pass\n" - simple_file = workspace / "test_simple.py" - simple_file.write_text(simple_content) - - try: - print("\n=== Getting server ===") - state = await manager._get_server(str(simple_file)) - print(f"Server state: initialized={state.initialized if state else 'None'}") - - print("\n=== Sending didOpen ===") - await manager._send_notification(state, "textDocument/didOpen", { - "textDocument": { - "uri": simple_file.as_uri(), - "languageId": "python", - "version": 1, - "text": simple_content, - } - }) - - print("\n=== Waiting 5 seconds - watch for server requests ===") - for i in range(5): - print(f" Tick {i+1}...") - await asyncio.sleep(1.0) - - print("\n=== Sending documentSymbol ===") - result = await manager._send_request( - state, - "textDocument/documentSymbol", - {"textDocument": {"uri": simple_file.as_uri()}}, - timeout=10.0 - ) - print(f"Result: {result}") - - finally: - simple_file.unlink(missing_ok=True) - await manager.stop() - -if __name__ == "__main__": - asyncio.run(test()) diff --git a/codex-lens/tests/real/debug_compare.py b/codex-lens/tests/real/debug_compare.py deleted file mode 100644 index 77f3b022..00000000 --- a/codex-lens/tests/real/debug_compare.py +++ /dev/null @@ -1,149 +0,0 @@ -#!/usr/bin/env python -"""Compare manager read behavior vs direct read.""" - -import asyncio -import json -import sys -from pathlib import Path - -sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src")) - -from codexlens.lsp.standalone_manager import StandaloneLspManager - - -async def direct_test(): - """Direct communication - this works.""" - workspace = Path(__file__).parent.parent.parent - print("\n=== DIRECT TEST ===") - - process = await asyncio.create_subprocess_exec( - "pyright-langserver", "--stdio", - stdin=asyncio.subprocess.PIPE, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - cwd=str(workspace), - ) - - def encode_message(content): - body = json.dumps(content).encode("utf-8") - header = f"Content-Length: {len(body)}\r\n\r\n" - return header.encode("ascii") + body - - async def send(message): - encoded = encode_message(message) - process.stdin.write(encoded) - await process.stdin.drain() - msg_desc = message.get('method') or f"response id={message.get('id')}" - print(f" SENT: {msg_desc}") - - async def read_one(): - content_length = 0 - while True: - line = await asyncio.wait_for(process.stdout.readline(), timeout=3.0) - if not line: - return None - line_str = line.decode("ascii").strip() - if not line_str: - break - if line_str.lower().startswith("content-length:"): - content_length = int(line_str.split(":")[1].strip()) - if content_length == 0: - return None - body = await process.stdout.readexactly(content_length) - return json.loads(body.decode("utf-8")) - - # Initialize - print(" Sending initialize...") - await send({ - "jsonrpc": "2.0", "id": 1, "method": "initialize", - "params": { - "processId": None, - "rootUri": workspace.as_uri(), - "capabilities": {"workspace": {"configuration": True}}, - "workspaceFolders": [{"uri": workspace.as_uri(), "name": workspace.name}], - }, - }) - - # Read until response - while True: - msg = await read_one() - if msg and msg.get("id") == 1: - print(f" Initialize response OK") - break - elif msg: - print(f" Notification: {msg.get('method')}") - - # Send initialized - print(" Sending initialized...") - await send({"jsonrpc": "2.0", "method": "initialized", "params": {}}) - - # Check for workspace/configuration - print(" Checking for workspace/configuration (3s timeout)...") - try: - for i in range(10): - msg = await read_one() - if msg: - method = msg.get("method") - msg_id = msg.get("id") - print(f" RECV: {method or 'response'} (id={msg_id})") - if method == "workspace/configuration": - print(" SUCCESS: workspace/configuration received!") - break - except asyncio.TimeoutError: - print(" TIMEOUT: No more messages") - - process.terminate() - await process.wait() - - -async def manager_test(): - """Manager communication - investigating why this doesn't work.""" - workspace = Path(__file__).parent.parent.parent - print("\n=== MANAGER TEST ===") - - manager = StandaloneLspManager( - workspace_root=str(workspace), - timeout=60.0 - ) - await manager.start() - - # Just check if server initialized - state = manager._servers.get("python") - if state: - print(f" Server initialized: {state.initialized}") - print(f" Capabilities: {len(state.capabilities)} keys") - else: - # Force initialization by getting server for a Python file - print(" Getting server for Python file...") - test_file = workspace / "tests" / "real" / "debug_compare.py" - state = await manager._get_server(str(test_file)) - if state: - print(f" Server initialized: {state.initialized}") - - # Try to read directly from state.reader - if state: - print("\n Direct read test from state.reader:") - print(f" state.reader is: {type(state.reader)}") - print(f" state.reader at_eof: {state.reader.at_eof()}") - - # Check if there's data available - try: - line = await asyncio.wait_for(state.reader.readline(), timeout=1.0) - if line: - print(f" Got line: {line[:50]}...") - else: - print(f" readline returned empty (EOF)") - except asyncio.TimeoutError: - print(f" readline timed out (no data)") - - await manager.stop() - - -async def main(): - await direct_test() - await manager_test() - print("\n=== DONE ===") - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/codex-lens/tests/real/debug_config.py b/codex-lens/tests/real/debug_config.py deleted file mode 100644 index 90fae268..00000000 --- a/codex-lens/tests/real/debug_config.py +++ /dev/null @@ -1,216 +0,0 @@ -#!/usr/bin/env python -"""Test if pyright sends workspace/configuration after initialized.""" - -import asyncio -import json -import sys -from pathlib import Path - -# Add source to path -sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src")) - - -async def read_message_direct(reader): - """Read a JSON-RPC message - direct blocking read, no timeout.""" - content_length = 0 - while True: - line = await reader.readline() - if not line: - return None - line_str = line.decode("ascii").strip() - if not line_str: - break - if line_str.lower().startswith("content-length:"): - content_length = int(line_str.split(":")[1].strip()) - - if content_length == 0: - return None - - body = await reader.readexactly(content_length) - return json.loads(body.decode("utf-8")) - - -async def main(): - workspace = Path(__file__).parent.parent.parent - print(f"Workspace: {workspace}") - - # Start pyright - exactly like in direct test - process = await asyncio.create_subprocess_exec( - "pyright-langserver", "--stdio", - stdin=asyncio.subprocess.PIPE, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - cwd=str(workspace), - ) - - def encode_message(content): - body = json.dumps(content).encode("utf-8") - header = f"Content-Length: {len(body)}\r\n\r\n" - return header.encode("ascii") + body - - async def send(message): - encoded = encode_message(message) - process.stdin.write(encoded) - await process.stdin.drain() - method_or_resp = message.get('method') or f"response id={message.get('id')}" - print(f"SENT: {method_or_resp} ({len(encoded)} bytes)") - - # Start stderr reader - async def read_stderr(): - while True: - line = await process.stderr.readline() - if not line: - break - print(f"[stderr] {line.decode('utf-8', errors='replace').rstrip()}") - asyncio.create_task(read_stderr()) - - print("\n=== INITIALIZE ===") - await send({ - "jsonrpc": "2.0", - "id": 1, - "method": "initialize", - "params": { - "processId": None, - "rootUri": workspace.as_uri(), - "rootPath": str(workspace), - "capabilities": { - "workspace": {"configuration": True}, - }, - "workspaceFolders": [{"uri": workspace.as_uri(), "name": workspace.name}], - }, - }) - - # Read until we get initialize response - print("Reading initialize response...") - while True: - msg = await asyncio.wait_for(read_message_direct(process.stdout), timeout=10) - if msg is None: - break - method = msg.get("method") - msg_id = msg.get("id") - if method: - print(f"RECV: {method} (notification)") - else: - print(f"RECV: response id={msg_id}") - if msg_id == 1: - print("Initialize OK!") - break - - print("\n=== SEND INITIALIZED ===") - await send({ - "jsonrpc": "2.0", - "method": "initialized", - "params": {}, - }) - - # Now, here's the key test - will we receive workspace/configuration? - print("\n=== WAIT FOR workspace/configuration ===") - print("Reading with 5 second timeout...") - - try: - for i in range(10): - msg = await asyncio.wait_for(read_message_direct(process.stdout), timeout=2) - if msg is None: - print("EOF") - break - method = msg.get("method") - msg_id = msg.get("id") - print(f"RECV: method={method}, id={msg_id}") - - # Respond to server requests - if msg_id is not None and method: - if method == "workspace/configuration": - print(" -> Got workspace/configuration! Responding...") - await send({ - "jsonrpc": "2.0", - "id": msg_id, - "result": [{} for _ in msg.get("params", {}).get("items", [])], - }) - else: - print(f" -> Responding to {method}") - await send({"jsonrpc": "2.0", "id": msg_id, "result": None}) - except asyncio.TimeoutError: - print("No more messages (timeout)") - - print("\n=== Now start background read task like manager does ===") - - # Store references like manager does - reader = process.stdout # This is how manager does it - writer = process.stdin - - # Start background read task - async def bg_read_loop(): - print("[BG] Read loop started") - try: - while True: - await asyncio.sleep(0) - try: - msg = await asyncio.wait_for(read_message_direct(reader), timeout=1.0) - if msg is None: - print("[BG] Stream closed") - break - bg_method = msg.get('method') or f"response id={msg.get('id')}" - print(f"[BG] RECV: {bg_method}") - - # Handle server requests - method = msg.get("method") - msg_id = msg.get("id") - if msg_id is not None and method: - print(f"[BG] Responding to {method}") - await send({"jsonrpc": "2.0", "id": msg_id, "result": None}) - except asyncio.TimeoutError: - print("[BG] timeout") - except asyncio.CancelledError: - print("[BG] Cancelled") - - read_task = asyncio.create_task(bg_read_loop()) - - # Wait a moment - await asyncio.sleep(0.5) - - # Now send didOpen and documentSymbol like manager does - print("\n=== SEND didOpen ===") - test_file = workspace / "tests" / "real" / "debug_config.py" - await send({ - "jsonrpc": "2.0", - "method": "textDocument/didOpen", - "params": { - "textDocument": { - "uri": test_file.as_uri(), - "languageId": "python", - "version": 1, - "text": test_file.read_text(), - }, - }, - }) - - # Wait for processing - await asyncio.sleep(2) - - print("\n=== SEND documentSymbol ===") - await send({ - "jsonrpc": "2.0", - "id": 2, - "method": "textDocument/documentSymbol", - "params": {"textDocument": {"uri": test_file.as_uri()}}, - }) - - # Wait for response - print("Waiting for documentSymbol response (max 30s)...") - deadline = asyncio.get_event_loop().time() + 30 - while asyncio.get_event_loop().time() < deadline: - await asyncio.sleep(0.5) - # The background task will print when it receives the response - - read_task.cancel() - try: - await read_task - except asyncio.CancelledError: - pass - - process.terminate() - print("\nDone!") - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/codex-lens/tests/real/debug_direct.py b/codex-lens/tests/real/debug_direct.py deleted file mode 100644 index 99e4b992..00000000 --- a/codex-lens/tests/real/debug_direct.py +++ /dev/null @@ -1,320 +0,0 @@ -#!/usr/bin/env python -"""Minimal direct test of pyright LSP communication.""" - -import asyncio -import json -import sys -from pathlib import Path - - -async def send_message(writer, message): - """Send a JSON-RPC message.""" - body = json.dumps(message).encode("utf-8") - header = f"Content-Length: {len(body)}\r\n\r\n".encode("ascii") - writer.write(header + body) - await writer.drain() - print(f"SENT: {message.get('method', 'response')} (id={message.get('id', 'N/A')})") - - -async def read_message(reader): - """Read a JSON-RPC message.""" - # Read headers - content_length = 0 - while True: - line = await reader.readline() - if not line: - return None - line_str = line.decode("ascii").strip() - if not line_str: - break - if line_str.lower().startswith("content-length:"): - content_length = int(line_str.split(":")[1].strip()) - - if content_length == 0: - return None - - # Read body - body = await reader.readexactly(content_length) - return json.loads(body.decode("utf-8")) - - -async def main(): - workspace = Path(__file__).parent.parent.parent - test_file = workspace / "tests" / "real" / "debug_direct.py" - - print(f"Workspace: {workspace}") - print(f"Test file: {test_file}") - print() - - # Start pyright - print("Starting pyright-langserver...") - process = await asyncio.create_subprocess_exec( - "pyright-langserver", "--stdio", - stdin=asyncio.subprocess.PIPE, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - cwd=str(workspace), - ) - - # Start stderr reader - async def read_stderr(): - while True: - line = await process.stderr.readline() - if not line: - break - print(f"[stderr] {line.decode('utf-8', errors='replace').rstrip()}") - - stderr_task = asyncio.create_task(read_stderr()) - - try: - # 1. Send initialize - print("\n=== INITIALIZE ===") - await send_message(process.stdin, { - "jsonrpc": "2.0", - "id": 1, - "method": "initialize", - "params": { - "processId": None, - "rootUri": workspace.as_uri(), - "rootPath": str(workspace), - "capabilities": { - "textDocument": { - "documentSymbol": { - "hierarchicalDocumentSymbolSupport": True, - }, - }, - "workspace": { - "configuration": True, - }, - }, - "workspaceFolders": [{"uri": workspace.as_uri(), "name": workspace.name}], - }, - }) - - # Read all messages until we get initialize response - print("\n=== READING RESPONSES ===") - init_done = False - for i in range(20): - try: - msg = await asyncio.wait_for(read_message(process.stdout), timeout=5.0) - if msg is None: - print("EOF") - break - - method = msg.get("method", "") - msg_id = msg.get("id", "N/A") - - if method: - print(f"RECV: {method} (id={msg_id})") - - # Handle server requests - if msg_id != "N/A": - if method == "workspace/configuration": - print(" -> Responding to workspace/configuration") - items = msg.get("params", {}).get("items", []) - await send_message(process.stdin, { - "jsonrpc": "2.0", - "id": msg_id, - "result": [{"pythonPath": "python"} for _ in items], - }) - elif method == "client/registerCapability": - print(" -> Responding to client/registerCapability") - await send_message(process.stdin, { - "jsonrpc": "2.0", - "id": msg_id, - "result": None, - }) - elif method == "window/workDoneProgress/create": - print(" -> Responding to window/workDoneProgress/create") - await send_message(process.stdin, { - "jsonrpc": "2.0", - "id": msg_id, - "result": None, - }) - else: - print(f"RECV: response (id={msg_id})") - if msg_id == 1: - print(" -> Initialize response received!") - caps = list(msg.get("result", {}).get("capabilities", {}).keys()) - print(f" -> Capabilities: {caps[:5]}...") - init_done = True - break - - except asyncio.TimeoutError: - print(f" Timeout waiting for message {i+1}") - break - - if not init_done: - print("ERROR: Initialize failed") - return - - # 2. Send initialized notification - print("\n=== INITIALIZED ===") - await send_message(process.stdin, { - "jsonrpc": "2.0", - "method": "initialized", - "params": {}, - }) - - # Read any messages pyright sends after initialized - print("\n=== READING POST-INITIALIZED MESSAGES ===") - for i in range(10): - try: - msg = await asyncio.wait_for(read_message(process.stdout), timeout=2.0) - if msg is None: - break - - method = msg.get("method", "") - msg_id = msg.get("id", "N/A") - - print(f"RECV: {method or 'response'} (id={msg_id})") - - # Handle server requests - if msg_id != "N/A" and method: - if method == "workspace/configuration": - print(" -> Responding to workspace/configuration") - items = msg.get("params", {}).get("items", []) - await send_message(process.stdin, { - "jsonrpc": "2.0", - "id": msg_id, - "result": [{"pythonPath": "python"} for _ in items], - }) - elif method == "client/registerCapability": - print(" -> Responding to client/registerCapability") - await send_message(process.stdin, { - "jsonrpc": "2.0", - "id": msg_id, - "result": None, - }) - elif method == "window/workDoneProgress/create": - print(" -> Responding to window/workDoneProgress/create") - await send_message(process.stdin, { - "jsonrpc": "2.0", - "id": msg_id, - "result": None, - }) - - except asyncio.TimeoutError: - print(f" No more messages (timeout)") - break - - # 3. Send didOpen - print("\n=== DIDOPEN ===") - content = test_file.read_text(encoding="utf-8") - await send_message(process.stdin, { - "jsonrpc": "2.0", - "method": "textDocument/didOpen", - "params": { - "textDocument": { - "uri": test_file.as_uri(), - "languageId": "python", - "version": 1, - "text": content, - }, - }, - }) - - # Read any messages - print("\n=== READING POST-DIDOPEN MESSAGES ===") - for i in range(10): - try: - msg = await asyncio.wait_for(read_message(process.stdout), timeout=2.0) - if msg is None: - break - - method = msg.get("method", "") - msg_id = msg.get("id", "N/A") - - print(f"RECV: {method or 'response'} (id={msg_id})") - - # Handle server requests - if msg_id != "N/A" and method: - if method == "workspace/configuration": - print(" -> Responding to workspace/configuration") - items = msg.get("params", {}).get("items", []) - await send_message(process.stdin, { - "jsonrpc": "2.0", - "id": msg_id, - "result": [{"pythonPath": "python"} for _ in items], - }) - else: - print(f" -> Responding with null to {method}") - await send_message(process.stdin, { - "jsonrpc": "2.0", - "id": msg_id, - "result": None, - }) - - except asyncio.TimeoutError: - print(f" No more messages (timeout)") - break - - # 4. Send documentSymbol request - print("\n=== DOCUMENTSYMBOL ===") - await send_message(process.stdin, { - "jsonrpc": "2.0", - "id": 2, - "method": "textDocument/documentSymbol", - "params": { - "textDocument": {"uri": test_file.as_uri()}, - }, - }) - - # Wait for response - print("\n=== READING DOCUMENTSYMBOL RESPONSE ===") - for i in range(20): - try: - msg = await asyncio.wait_for(read_message(process.stdout), timeout=5.0) - if msg is None: - break - - method = msg.get("method", "") - msg_id = msg.get("id", "N/A") - - if method: - print(f"RECV: {method} (id={msg_id})") - - # Handle server requests - if msg_id != "N/A": - if method == "workspace/configuration": - print(" -> Responding to workspace/configuration") - items = msg.get("params", {}).get("items", []) - await send_message(process.stdin, { - "jsonrpc": "2.0", - "id": msg_id, - "result": [{"pythonPath": "python"} for _ in items], - }) - else: - print(f" -> Responding with null to {method}") - await send_message(process.stdin, { - "jsonrpc": "2.0", - "id": msg_id, - "result": None, - }) - else: - print(f"RECV: response (id={msg_id})") - if msg_id == 2: - result = msg.get("result", []) - print(f" -> DocumentSymbol response: {len(result)} symbols") - for sym in result[:5]: - print(f" - {sym.get('name')} ({sym.get('kind')})") - break - - except asyncio.TimeoutError: - print(f" Timeout {i+1}") - if i >= 5: - break - - print("\n=== DONE ===") - - finally: - stderr_task.cancel() - process.terminate() - try: - await asyncio.wait_for(process.wait(), timeout=5.0) - except asyncio.TimeoutError: - process.kill() - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/codex-lens/tests/real/debug_lsp.py b/codex-lens/tests/real/debug_lsp.py deleted file mode 100644 index 8bf15f1c..00000000 --- a/codex-lens/tests/real/debug_lsp.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env python -"""Debug script to check pyright LSP configuration requests.""" - -import asyncio -import logging -import sys -from pathlib import Path - -# Enable DEBUG logging -logging.basicConfig( - level=logging.DEBUG, - format='%(name)s - %(levelname)s - %(message)s', - stream=sys.stdout -) - -# Add source to path -sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src")) - -from codexlens.lsp.standalone_manager import StandaloneLspManager - -async def test(): - workspace = Path(__file__).parent.parent.parent - manager = StandaloneLspManager( - workspace_root=str(workspace), - timeout=60.0 - ) - await manager.start() - - # Wait a bit after start to see if any requests come in - print("Waiting 3 seconds after start to see server requests...") - await asyncio.sleep(3) - - # Try to get symbols for a simpler file - test_file = str(workspace / "tests" / "real" / "debug_lsp.py") - print(f"Testing with: {test_file}") - - # Let's see if we can check what pyright sees - print("Checking server state...") - state = manager._servers.get("python") - if state: - print(f" - Process running: {state.process.returncode is None}") - print(f" - Initialized: {state.initialized}") - print(f" - Pending requests: {list(state.pending_requests.keys())}") - - try: - symbols = await manager.get_document_symbols(test_file) - print(f"Got {len(symbols)} symbols") - for s in symbols[:5]: - print(f" - {s}") - except Exception as e: - print(f"Error: {e}") - import traceback - traceback.print_exc() - - await manager.stop() - -if __name__ == "__main__": - asyncio.run(test()) diff --git a/codex-lens/tests/real/debug_manager.py b/codex-lens/tests/real/debug_manager.py deleted file mode 100644 index 3d53ca89..00000000 --- a/codex-lens/tests/real/debug_manager.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env python -"""Debug script to test StandaloneLspManager directly.""" - -import asyncio -import logging -import sys -from pathlib import Path - -# Add source to path -sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src")) - -# Enable debug logging -logging.basicConfig(level=logging.DEBUG, format="%(levelname)s: %(name)s: %(message)s") - -from codexlens.lsp.standalone_manager import StandaloneLspManager - - -async def test_standalone_manager(): - """Test StandaloneLspManager directly.""" - workspace = Path(__file__).parent.parent.parent - test_file = workspace / "src" / "codexlens" / "lsp" / "lsp_bridge.py" - - print(f"Workspace: {workspace}") - print(f"Test file: {test_file}") - print() - - manager = StandaloneLspManager(workspace_root=str(workspace), timeout=30.0) - - print("Starting manager...") - await manager.start() - - print(f"Configs loaded: {list(manager._configs.keys())}") - print(f"Servers running: {list(manager._servers.keys())}") - - # Try to get the server for the test file - print(f"\nGetting server for {test_file.name}...") - server = await manager._get_server(str(test_file)) - - if server: - print(f"Server: {server.config.display_name}") - print(f"Initialized: {server.initialized}") - print(f"Capabilities: {list(server.capabilities.keys())}") - else: - print("Failed to get server!") - - # Try to get document symbols - print(f"\nGetting document symbols for {test_file.name}...") - try: - symbols = await manager.get_document_symbols(str(test_file)) - print(f"Found {len(symbols)} symbols") - for sym in symbols[:5]: - print(f" - {sym.get('name', '?')} ({sym.get('kind', '?')})") - except Exception as e: - print(f"Error getting symbols: {e}") - - print("\nStopping manager...") - await manager.stop() - - print("Done!") - - -if __name__ == "__main__": - asyncio.run(test_standalone_manager()) diff --git a/codex-lens/tests/real/debug_reads.py b/codex-lens/tests/real/debug_reads.py deleted file mode 100644 index 56048c73..00000000 --- a/codex-lens/tests/real/debug_reads.py +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env python -"""Debug exactly what's happening with reads after initialized.""" - -import asyncio -import json -from pathlib import Path - - -async def main(): - workspace = Path(__file__).parent.parent.parent - print(f"Workspace: {workspace}") - - # Start pyright - process = await asyncio.create_subprocess_exec( - "pyright-langserver", "--stdio", - stdin=asyncio.subprocess.PIPE, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - cwd=str(workspace), - ) - - # Helper to encode message - def encode(content): - body = json.dumps(content).encode("utf-8") - header = f"Content-Length: {len(body)}\r\n\r\n" - return header.encode("ascii") + body - - # Helper to send - async def send(msg): - encoded = encode(msg) - process.stdin.write(encoded) - await process.stdin.drain() - method = msg.get("method") or f"response-{msg.get('id')}" - print(f"SENT: {method}") - - # Helper to read one message - async def read_one(timeout=3.0): - content_length = 0 - while True: - try: - print(f" readline(timeout={timeout})...") - line = await asyncio.wait_for(process.stdout.readline(), timeout=timeout) - print(f" got line: {repr(line[:50] if len(line) > 50 else line)}") - except asyncio.TimeoutError: - print(f" TIMEOUT on readline") - return None - - if not line: - print(f" EOF") - return None - - line_str = line.decode("ascii").strip() - if not line_str: - break # End of headers - - if line_str.lower().startswith("content-length:"): - content_length = int(line_str.split(":")[1].strip()) - - if content_length == 0: - return None - - body = await process.stdout.readexactly(content_length) - return json.loads(body.decode("utf-8")) - - # Start stderr reader - async def read_stderr(): - while True: - line = await process.stderr.readline() - if not line: - break - print(f"[stderr] {line.decode('utf-8', errors='replace').rstrip()}") - asyncio.create_task(read_stderr()) - - print("\n=== INITIALIZE ===") - await send({ - "jsonrpc": "2.0", "id": 1, "method": "initialize", - "params": { - "processId": None, - "rootUri": workspace.as_uri(), - "capabilities": {"workspace": {"configuration": True}}, - "workspaceFolders": [{"uri": workspace.as_uri(), "name": workspace.name}], - }, - }) - - # Read until initialize response - print("\n=== READING UNTIL INITIALIZE RESPONSE ===") - while True: - msg = await read_one() - if msg and msg.get("id") == 1 and "method" not in msg: - print(f"Got initialize response") - break - elif msg: - print(f"Got notification: {msg.get('method')}") - - print("\n=== SEND INITIALIZED ===") - await send({"jsonrpc": "2.0", "method": "initialized", "params": {}}) - - print("\n=== NOW TRY TO READ WORKSPACE/CONFIGURATION ===") - print("Attempting reads with 2s timeout each...") - - for i in range(3): - print(f"\n--- Read attempt {i+1} ---") - msg = await read_one(timeout=2.0) - if msg: - method = msg.get("method", "") - msg_id = msg.get("id") - print(f"SUCCESS: method={method}, id={msg_id}") - if method and msg_id is not None: - # Respond to server request - print(f"Responding to {method}") - await send({"jsonrpc": "2.0", "id": msg_id, "result": [{}]}) - else: - print(f"No message (timeout or EOF)") - break - - print("\n=== CLEANUP ===") - process.terminate() - await process.wait() - print("Done") - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/codex-lens/tests/real/direct_pyright_test.py b/codex-lens/tests/real/direct_pyright_test.py deleted file mode 100644 index 75fd45bf..00000000 --- a/codex-lens/tests/real/direct_pyright_test.py +++ /dev/null @@ -1,93 +0,0 @@ -#!/usr/bin/env python -"""Direct test of pyright-langserver communication.""" - -import asyncio -import json -import sys - -async def test_pyright(): - print("Starting pyright-langserver...") - - process = await asyncio.create_subprocess_exec( - "pyright-langserver", "--stdio", - stdin=asyncio.subprocess.PIPE, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - - # Build initialize request - init_msg = { - "jsonrpc": "2.0", - "id": 1, - "method": "initialize", - "params": { - "processId": 1234, - "rootUri": "file:///D:/Claude_dms3/codex-lens", - "rootPath": "D:/Claude_dms3/codex-lens", - "capabilities": { - "textDocument": { - "documentSymbol": {"hierarchicalDocumentSymbolSupport": True} - }, - "workspace": {"configuration": True} - }, - "workspaceFolders": [ - {"uri": "file:///D:/Claude_dms3/codex-lens", "name": "codex-lens"} - ] - } - } - - body = json.dumps(init_msg).encode("utf-8") - header = f"Content-Length: {len(body)}\r\n\r\n".encode("ascii") - - print(f"Sending initialize request ({len(body)} bytes)...") - process.stdin.write(header + body) - await process.stdin.drain() - - # Read responses - print("Reading responses...") - for i in range(20): - try: - line = await asyncio.wait_for(process.stdout.readline(), timeout=2.0) - if not line: - print(" (empty line - stream closed)") - break - line_str = line.decode("ascii").strip() - print(f" Header: {line_str}") - - if line_str.lower().startswith("content-length:"): - content_length = int(line_str.split(":")[1].strip()) - # Read empty line - await process.stdout.readline() - # Read body - body_data = await process.stdout.readexactly(content_length) - msg = json.loads(body_data.decode("utf-8")) - print(f" Message: id={msg.get('id', 'none')}, method={msg.get('method', 'none')}") - if msg.get("id") == 1: - print(f" >>> GOT INITIALIZE RESPONSE!") - print(f" >>> Capabilities: {list(msg.get('result', {}).get('capabilities', {}).keys())[:10]}...") - - # Send initialized notification - print("\nSending 'initialized' notification...") - init_notif = {"jsonrpc": "2.0", "method": "initialized", "params": {}} - body2 = json.dumps(init_notif).encode("utf-8") - header2 = f"Content-Length: {len(body2)}\r\n\r\n".encode("ascii") - process.stdin.write(header2 + body2) - await process.stdin.drain() - - # Wait a moment for any server requests - print("Waiting for server requests...") - await asyncio.sleep(1.0) - continue # Keep reading to see if workspace/configuration comes - if msg.get("method") == "workspace/configuration": - print(f" >>> GOT workspace/configuration REQUEST!") - print(f" >>> Params: {msg.get('params')}") - except asyncio.TimeoutError: - print(" (timeout waiting for more data)") - break - - process.terminate() - await process.wait() - print("Done.") - -if __name__ == "__main__": - asyncio.run(test_pyright()) diff --git a/codex-lens/tests/real/minimal_test.py b/codex-lens/tests/real/minimal_test.py deleted file mode 100644 index c95c8b7b..00000000 --- a/codex-lens/tests/real/minimal_test.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env python -"""Minimal test that mimics the working direct test.""" - -import asyncio -import json -import sys -from pathlib import Path - -# Add source to path -sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src")) - - -async def test_minimal(): - """Minimal test using the standalone manager.""" - from codexlens.lsp.standalone_manager import StandaloneLspManager - - workspace = Path(__file__).parent.parent.parent - manager = StandaloneLspManager( - workspace_root=str(workspace), - timeout=60.0 - ) - - await manager.start() - - # Get server state - server_state = await manager._get_server(str(workspace / "tests" / "real" / "minimal_test.py")) - - if not server_state: - print("Failed to get server state") - await manager.stop() - return - - print(f"Server initialized: {server_state.initialized}") - print(f"Server capabilities: {list(server_state.capabilities.keys())[:5]}...") - - # Wait for any background messages - print("Waiting 5 seconds for background messages...") - await asyncio.sleep(5) - - # Now send a documentSymbol request manually - print("Sending documentSymbol request...") - result = await manager._send_request( - server_state, - "textDocument/documentSymbol", - {"textDocument": {"uri": (workspace / "tests" / "real" / "minimal_test.py").resolve().as_uri()}}, - timeout=30.0 - ) - - print(f"Result: {result}") - - await manager.stop() - - -if __name__ == "__main__": - import logging - logging.basicConfig(level=logging.INFO, format='%(name)s - %(levelname)s - %(message)s') - - asyncio.run(test_minimal()) diff --git a/codex-lens/tests/real/quick_test.py b/codex-lens/tests/real/quick_test.py deleted file mode 100644 index c70a5374..00000000 --- a/codex-lens/tests/real/quick_test.py +++ /dev/null @@ -1,313 +0,0 @@ -#!/usr/bin/env python -"""Quick real interface test script for LSP Bridge (Standalone Mode). - -Usage: - python tests/real/quick_test.py - -Requires: pyright-langserver installed (npm install -g pyright) -""" - -import asyncio -import shutil -import sys -from pathlib import Path - -# Add source to path -sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src")) - -from codexlens.lsp.lsp_bridge import LspBridge -from codexlens.lsp.lsp_graph_builder import LspGraphBuilder -from codexlens.hybrid_search.data_structures import CodeSymbolNode, Range - - -# Test file - the LSP bridge source itself -TEST_FILE = Path(__file__).parent.parent.parent / "src" / "codexlens" / "lsp" / "lsp_bridge.py" -WORKSPACE_ROOT = Path(__file__).parent.parent.parent # codex-lens root - - -def check_pyright(): - """Check if pyright-langserver is available.""" - return shutil.which("pyright-langserver") is not None - - -async def test_get_definition(): - """Test get_definition.""" - print("\n" + "=" * 60) - print("TEST: get_definition") - print("=" * 60) - - symbol = CodeSymbolNode( - id=f"{TEST_FILE}:LspBridge:96", - name="LspBridge", - kind="class", - file_path=str(TEST_FILE), - range=Range(start_line=96, start_character=6, end_line=96, end_character=15), - ) - - print(f"Symbol: {symbol.name}") - print(f"File: {symbol.file_path}") - print(f"Position: line {symbol.range.start_line}, char {symbol.range.start_character}") - - async with LspBridge(workspace_root=str(WORKSPACE_ROOT), timeout=30.0) as bridge: - result = await bridge.get_definition(symbol) - - if result: - print(f"\n[OK] SUCCESS: Definition found at {result.file_path}:{result.line}") - else: - print(f"\n[WARN] No definition found (may be expected for class declaration)") - - return result is not None - - -async def test_get_references(): - """Test get_references.""" - print("\n" + "=" * 60) - print("TEST: get_references") - print("=" * 60) - - symbol = CodeSymbolNode( - id=f"{TEST_FILE}:get_references:200", - name="get_references", - kind="method", - file_path=str(TEST_FILE), - range=Range(start_line=200, start_character=10, end_line=200, end_character=24), - ) - - print(f"Symbol: {symbol.name}") - print(f"File: {Path(symbol.file_path).name}") - print(f"Position: line {symbol.range.start_line}") - - async with LspBridge(workspace_root=str(WORKSPACE_ROOT), timeout=30.0) as bridge: - refs = await bridge.get_references(symbol) - - print(f"\n[OK] Found {len(refs)} references:") - for i, ref in enumerate(refs[:10]): - print(f" [{i+1}] {Path(ref.file_path).name}:{ref.line}") - if len(refs) > 10: - print(f" ... and {len(refs) - 10} more") - - return len(refs) >= 0 - - -async def test_get_hover(): - """Test get_hover.""" - print("\n" + "=" * 60) - print("TEST: get_hover") - print("=" * 60) - - symbol = CodeSymbolNode( - id=f"{TEST_FILE}:LspBridge:96", - name="LspBridge", - kind="class", - file_path=str(TEST_FILE), - range=Range(start_line=96, start_character=6, end_line=96, end_character=15), - ) - - print(f"Symbol: {symbol.name}") - - async with LspBridge(workspace_root=str(WORKSPACE_ROOT), timeout=30.0) as bridge: - hover = await bridge.get_hover(symbol) - - if hover: - preview = hover[:300].replace('\n', '\n ') - print(f"\n[OK] Hover info ({len(hover)} chars):") - print(f" {preview}...") - else: - print(f"\n[WARN] No hover info available") - - return hover is not None - - -async def test_get_document_symbols(): - """Test get_document_symbols.""" - print("\n" + "=" * 60) - print("TEST: get_document_symbols") - print("=" * 60) - - file_path = str(TEST_FILE) - print(f"File: {Path(file_path).name}") - - async with LspBridge(workspace_root=str(WORKSPACE_ROOT), timeout=30.0) as bridge: - symbols = await bridge.get_document_symbols(file_path) - - print(f"\n[OK] Found {len(symbols)} symbols:") - - # Group by kind - by_kind = {} - for sym in symbols: - kind = sym.get("kind", "unknown") - by_kind[kind] = by_kind.get(kind, 0) + 1 - - for kind, count in sorted(by_kind.items()): - print(f" {kind}: {count}") - - print("\nSample symbols:") - for sym in symbols[:15]: - name = sym.get("name", "?") - kind = sym.get("kind", "?") - range_data = sym.get("range", {}) - start = range_data.get("start", {}) - line = start.get("line", 0) + 1 - print(f" - {name} ({kind}) at line {line}") - - return len(symbols) > 0 - - -async def test_graph_expansion(): - """Test graph expansion.""" - print("\n" + "=" * 60) - print("TEST: Graph Expansion (LspGraphBuilder)") - print("=" * 60) - - seed = CodeSymbolNode( - id=f"{TEST_FILE}:LspBridge:96", - name="LspBridge", - kind="class", - file_path=str(TEST_FILE), - range=Range(start_line=96, start_character=6, end_line=96, end_character=15), - ) - - print(f"Seed: {seed.name} in {Path(seed.file_path).name}:{seed.range.start_line}") - print("Settings: max_depth=1, max_nodes=20") - - builder = LspGraphBuilder(max_depth=1, max_nodes=20) - - async with LspBridge(workspace_root=str(WORKSPACE_ROOT), timeout=30.0) as bridge: - graph = await builder.build_from_seeds([seed], bridge) - - print(f"\n[OK] Graph expansion complete:") - print(f" Nodes: {len(graph.nodes)}") - print(f" Edges: {len(graph.edges)}") - - if graph.nodes: - print("\nNodes found:") - for node_id, node in list(graph.nodes.items())[:15]: - print(f" - {node.name} ({node.kind}) in {Path(node.file_path).name}:{node.range.start_line}") - - if graph.edges: - print(f"\nEdges (first 10):") - for edge in list(graph.edges)[:10]: - src = graph.nodes.get(edge.source_id) - tgt = graph.nodes.get(edge.target_id) - src_name = src.name if src else edge.source_id[:20] - tgt_name = tgt.name if tgt else edge.target_id[:20] - print(f" - {src_name} --[{edge.relation}]--> {tgt_name}") - - return len(graph.nodes) >= 1 - - -async def test_cache_performance(): - """Test cache performance.""" - print("\n" + "=" * 60) - print("TEST: Cache Performance") - print("=" * 60) - - symbol = CodeSymbolNode( - id=f"{TEST_FILE}:LspBridge:96", - name="LspBridge", - kind="class", - file_path=str(TEST_FILE), - range=Range(start_line=96, start_character=6, end_line=96, end_character=15), - ) - - import time - - async with LspBridge(workspace_root=str(WORKSPACE_ROOT), timeout=30.0) as bridge: - # First call - cache miss - start = time.perf_counter() - await bridge.get_references(symbol) - first_time = (time.perf_counter() - start) * 1000 - - # Second call - cache hit - start = time.perf_counter() - await bridge.get_references(symbol) - second_time = (time.perf_counter() - start) * 1000 - - print(f"\nFirst call (cache miss): {first_time:.2f}ms") - print(f"Second call (cache hit): {second_time:.2f}ms") - print(f"Speedup: {first_time/max(second_time, 0.001):.1f}x") - print(f"Cache entries: {len(bridge.cache)}") - - if second_time < first_time: - print("\n[OK] Cache is working correctly") - else: - print("\n[WARN] Cache may not be effective") - - return second_time < first_time - - -async def run_all_tests(): - """Run all tests.""" - print("=" * 60) - print("CODEX-LENS LSP REAL INTERFACE TESTS (Standalone Mode)") - print("=" * 60) - print(f"Test file: {TEST_FILE}") - print(f"Workspace: {WORKSPACE_ROOT}") - print(f"Mode: Standalone (direct language server communication)") - - results = {} - - tests = [ - ("get_definition", test_get_definition), - ("get_references", test_get_references), - ("get_hover", test_get_hover), - ("get_document_symbols", test_get_document_symbols), - ("graph_expansion", test_graph_expansion), - ("cache_performance", test_cache_performance), - ] - - for name, test_fn in tests: - try: - results[name] = await test_fn() - except Exception as e: - print(f"\n[FAIL] FAILED: {e}") - import traceback - traceback.print_exc() - results[name] = False - - # Summary - print("\n" + "=" * 60) - print("SUMMARY") - print("=" * 60) - - passed = sum(1 for v in results.values() if v) - total = len(results) - - for name, result in results.items(): - status = "[PASS]" if result else "[FAIL]" - print(f" {status}: {name}") - - print(f"\nResult: {passed}/{total} tests passed") - - return passed == total - - -def main(): - """Main entry point.""" - print("Checking pyright-langserver availability...") - - if not check_pyright(): - print("\n" + "=" * 60) - print("ERROR: pyright-langserver not available") - print("=" * 60) - print() - print("To run these tests:") - print(" 1. Install pyright: npm install -g pyright") - print(" 2. Verify: pyright-langserver --version") - print(" 3. Run this script again") - print() - sys.exit(1) - - print("[OK] pyright-langserver is available!") - print() - - # Run tests - # Note: On Windows, we use the default ProactorEventLoop (not SelectorEventLoop) - # because ProactorEventLoop supports subprocess creation which is required for LSP - - success = asyncio.run(run_all_tests()) - sys.exit(0 if success else 1) - - -if __name__ == "__main__": - main() diff --git a/codex-lens/tests/real/test_lsp_real_interface.py b/codex-lens/tests/real/test_lsp_real_interface.py deleted file mode 100644 index 587d3f90..00000000 --- a/codex-lens/tests/real/test_lsp_real_interface.py +++ /dev/null @@ -1,424 +0,0 @@ -"""Real interface tests for LSP Bridge using Standalone Mode. - -These tests require: -1. Language servers installed (pyright-langserver, typescript-language-server) -2. A Python/TypeScript project in the workspace - -Run with: pytest tests/real/ -v -s -""" - -import asyncio -import os -import sys -import pytest -from pathlib import Path - -# Add source to path -sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src")) - -from codexlens.lsp.lsp_bridge import LspBridge, Location, HAS_AIOHTTP -from codexlens.lsp.lsp_graph_builder import LspGraphBuilder -from codexlens.hybrid_search.data_structures import CodeSymbolNode, Range - - -# Test configuration - adjust these paths to match your setup -TEST_PYTHON_FILE = Path(__file__).parent.parent.parent / "src" / "codexlens" / "lsp" / "lsp_bridge.py" -TEST_TYPESCRIPT_FILE = Path(__file__).parent.parent.parent.parent / "ccw-vscode-bridge" / "src" / "extension.ts" - -WORKSPACE_ROOT = Path(__file__).parent.parent.parent # codex-lens root - - -def is_pyright_available() -> bool: - """Check if pyright-langserver is installed.""" - import shutil - return shutil.which("pyright-langserver") is not None - - -def is_typescript_server_available() -> bool: - """Check if typescript-language-server is installed.""" - import shutil - return shutil.which("typescript-language-server") is not None - - -# Skip all tests if pyright not available -pytestmark = pytest.mark.skipif( - not is_pyright_available(), - reason="pyright-langserver not installed. Install with: npm install -g pyright" -) - - -class TestRealLspBridgeStandalone: - """Real interface tests for LspBridge in Standalone Mode.""" - - @pytest.fixture - def bridge(self): - """Create real LspBridge instance in standalone mode.""" - return LspBridge( - workspace_root=str(WORKSPACE_ROOT), - timeout=30.0, - use_vscode_bridge=False, # Use standalone mode - ) - - @pytest.fixture - def python_symbol(self): - """Create a symbol pointing to LspBridge class.""" - return CodeSymbolNode( - id=f"{TEST_PYTHON_FILE}:LspBridge:96", - name="LspBridge", - kind="class", - file_path=str(TEST_PYTHON_FILE), - range=Range(start_line=96, start_character=6, end_line=96, end_character=15), - ) - - @pytest.fixture - def python_method_symbol(self): - """Create a symbol pointing to get_references method.""" - return CodeSymbolNode( - id=f"{TEST_PYTHON_FILE}:get_references:200", - name="get_references", - kind="method", - file_path=str(TEST_PYTHON_FILE), - range=Range(start_line=200, start_character=10, end_line=200, end_character=24), - ) - - @pytest.mark.asyncio - async def test_real_get_definition(self, bridge, python_symbol): - """Test get_definition against real Python file.""" - print(f"\n>>> Testing get_definition for {python_symbol.name}") - print(f" File: {python_symbol.file_path}") - print(f" Position: line {python_symbol.range.start_line}, char {python_symbol.range.start_character}") - - async with bridge: - definition = await bridge.get_definition(python_symbol) - - print(f" Result: {definition}") - - # Definition should exist (class definition) - if definition: - print(f" ✓ Found definition at {definition.file_path}:{definition.line}") - assert definition.file_path.endswith(".py") - assert definition.line > 0 - else: - print(" ⚠ No definition found (may be expected for class declarations)") - - @pytest.mark.asyncio - async def test_real_get_references(self, bridge, python_method_symbol): - """Test get_references against real Python file.""" - print(f"\n>>> Testing get_references for {python_method_symbol.name}") - print(f" File: {python_method_symbol.file_path}") - print(f" Position: line {python_method_symbol.range.start_line}") - - async with bridge: - refs = await bridge.get_references(python_method_symbol) - - print(f" Found {len(refs)} references:") - for i, ref in enumerate(refs[:5]): # Show first 5 - print(f" [{i+1}] {Path(ref.file_path).name}:{ref.line}") - if len(refs) > 5: - print(f" ... and {len(refs) - 5} more") - - # Should find at least the definition itself - assert len(refs) >= 0, "References query should succeed (may be empty)" - - @pytest.mark.asyncio - async def test_real_get_hover(self, bridge, python_symbol): - """Test get_hover against real Python file.""" - print(f"\n>>> Testing get_hover for {python_symbol.name}") - - async with bridge: - hover = await bridge.get_hover(python_symbol) - - if hover: - print(f" ✓ Hover info ({len(hover)} chars):") - preview = hover[:200].replace('\n', '\\n') - print(f" {preview}...") - assert len(hover) > 0 - else: - print(" ⚠ No hover info available") - - @pytest.mark.asyncio - async def test_real_get_document_symbols(self, bridge): - """Test get_document_symbols against real Python file.""" - file_path = str(TEST_PYTHON_FILE) - print(f"\n>>> Testing get_document_symbols") - print(f" File: {file_path}") - - async with bridge: - symbols = await bridge.get_document_symbols(file_path) - - print(f" Found {len(symbols)} symbols:") - - # Group by kind - by_kind = {} - for sym in symbols: - kind = sym.get("kind", "unknown") - by_kind[kind] = by_kind.get(kind, 0) + 1 - - for kind, count in sorted(by_kind.items()): - print(f" {kind}: {count}") - - # Show some sample symbols - print(" Sample symbols:") - for sym in symbols[:10]: - name = sym.get("name", "?") - kind = sym.get("kind", "?") - range_data = sym.get("range", {}) - start = range_data.get("start", {}) - line = start.get("line", 0) + 1 - print(f" - {name} ({kind}) at line {line}") - - assert len(symbols) > 0, "Should find symbols in Python file" - - @pytest.mark.asyncio - async def test_real_get_call_hierarchy(self, bridge, python_method_symbol): - """Test get_call_hierarchy against real Python file.""" - print(f"\n>>> Testing get_call_hierarchy for {python_method_symbol.name}") - - async with bridge: - calls = await bridge.get_call_hierarchy(python_method_symbol) - - print(f" Found {len(calls)} call hierarchy items:") - for i, call in enumerate(calls[:10]): - print(f" [{i+1}] {call.name} in {Path(call.file_path).name}:{call.range.start_line}") - - # May be empty if call hierarchy not supported or no callers - print(f" ✓ Call hierarchy query completed") - - @pytest.mark.asyncio - async def test_real_cache_behavior(self, bridge, python_symbol): - """Test that cache actually works with real requests.""" - print(f"\n>>> Testing cache behavior") - - async with bridge: - # First call - should hit language server - print(" First call (cache miss expected)...") - refs1 = await bridge.get_references(python_symbol) - cache_size_after_first = len(bridge.cache) - print(f" Cache size after first call: {cache_size_after_first}") - - # Second call - should hit cache - print(" Second call (cache hit expected)...") - refs2 = await bridge.get_references(python_symbol) - cache_size_after_second = len(bridge.cache) - print(f" Cache size after second call: {cache_size_after_second}") - - assert cache_size_after_first > 0, "Cache should have entries after first call" - assert cache_size_after_second == cache_size_after_first, "Cache size should not change on hit" - assert refs1 == refs2, "Results should be identical" - print(" ✓ Cache working correctly") - - -class TestRealLspGraphBuilderStandalone: - """Real interface tests for LspGraphBuilder with Standalone Mode.""" - - @pytest.fixture - def seed_node(self): - """Create a seed node for graph expansion.""" - return CodeSymbolNode( - id=f"{TEST_PYTHON_FILE}:LspBridge:96", - name="LspBridge", - kind="class", - file_path=str(TEST_PYTHON_FILE), - range=Range(start_line=96, start_character=6, end_line=96, end_character=15), - ) - - @pytest.mark.asyncio - async def test_real_graph_expansion(self, seed_node): - """Test real graph expansion from a Python class.""" - print(f"\n>>> Testing graph expansion from {seed_node.name}") - print(f" Seed: {seed_node.file_path}:{seed_node.range.start_line}") - - builder = LspGraphBuilder(max_depth=1, max_nodes=20) - - async with LspBridge( - workspace_root=str(WORKSPACE_ROOT), - timeout=30.0, - ) as bridge: - graph = await builder.build_from_seeds([seed_node], bridge) - - print(f" Graph results:") - print(f" Nodes: {len(graph.nodes)}") - print(f" Edges: {len(graph.edges)}") - - if graph.nodes: - print(f" Node details:") - for node_id, node in list(graph.nodes.items())[:10]: - print(f" - {node.name} ({node.kind}) in {Path(node.file_path).name}:{node.range.start_line}") - - if graph.edges: - print(f" Edge details:") - for edge in list(graph.edges)[:10]: - print(f" - {edge.source_id[:30]}... --[{edge.relation}]--> {edge.target_id[:30]}...") - - # We should have at least the seed node - assert len(graph.nodes) >= 1, "Graph should contain at least the seed node" - print(" ✓ Graph expansion completed") - - @pytest.mark.asyncio - async def test_real_multi_seed_expansion(self): - """Test graph expansion from multiple seeds.""" - print(f"\n>>> Testing multi-seed graph expansion") - - seeds = [ - CodeSymbolNode( - id=f"{TEST_PYTHON_FILE}:Location:35", - name="Location", - kind="class", - file_path=str(TEST_PYTHON_FILE), - range=Range(start_line=35, start_character=6, end_line=35, end_character=14), - ), - CodeSymbolNode( - id=f"{TEST_PYTHON_FILE}:CacheEntry:81", - name="CacheEntry", - kind="class", - file_path=str(TEST_PYTHON_FILE), - range=Range(start_line=81, start_character=6, end_line=81, end_character=16), - ), - ] - - print(f" Seeds: {[s.name for s in seeds]}") - - builder = LspGraphBuilder(max_depth=1, max_nodes=30) - - async with LspBridge( - workspace_root=str(WORKSPACE_ROOT), - timeout=30.0, - ) as bridge: - graph = await builder.build_from_seeds(seeds, bridge) - - print(f" Graph results:") - print(f" Nodes: {len(graph.nodes)}") - print(f" Edges: {len(graph.edges)}") - - # Should have at least the seed nodes - assert len(graph.nodes) >= len(seeds), f"Graph should contain at least {len(seeds)} seed nodes" - print(" ✓ Multi-seed expansion completed") - - -class TestRealHybridSearchIntegrationStandalone: - """Real integration tests with HybridSearchEngine.""" - - @pytest.mark.asyncio - async def test_real_lsp_search_pipeline(self): - """Test the full LSP search pipeline with real LSP.""" - print(f"\n>>> Testing full LSP search pipeline") - - # Create mock seeds (normally from vector/FTS search) - seeds = [ - CodeSymbolNode( - id=f"{TEST_PYTHON_FILE}:LspBridge:96", - name="LspBridge", - kind="class", - file_path=str(TEST_PYTHON_FILE), - range=Range(start_line=96, start_character=6, end_line=96, end_character=15), - ), - ] - - print(f" Starting with {len(seeds)} seed(s)") - - builder = LspGraphBuilder(max_depth=2, max_nodes=50) - - async with LspBridge( - workspace_root=str(WORKSPACE_ROOT), - timeout=30.0, - ) as bridge: - graph = await builder.build_from_seeds(seeds, bridge) - - print(f" Expanded to {len(graph.nodes)} nodes") - - # Simulate conversion to SearchResult format - results = [] - for node_id, node in graph.nodes.items(): - if node.id not in [s.id for s in seeds]: # Exclude seeds - results.append({ - "path": node.file_path, - "symbol_name": node.name, - "symbol_kind": node.kind, - "start_line": node.range.start_line, - "end_line": node.range.end_line, - }) - - print(f" Generated {len(results)} search results (excluding seeds)") - - if results: - print(" Sample results:") - for r in results[:5]: - print(f" - {r['symbol_name']} ({r['symbol_kind']}) at {Path(r['path']).name}:{r['start_line']}") - - print(" ✓ Full pipeline completed") - - -# TypeScript tests (if available) -@pytest.mark.skipif( - not is_typescript_server_available() or not TEST_TYPESCRIPT_FILE.exists(), - reason="TypeScript language server or test file not available" -) -class TestRealTypescriptLspStandalone: - """Real tests against TypeScript files.""" - - @pytest.fixture - def ts_symbol(self): - """Create a symbol in the TypeScript extension file.""" - return CodeSymbolNode( - id=f"{TEST_TYPESCRIPT_FILE}:activate:12", - name="activate", - kind="function", - file_path=str(TEST_TYPESCRIPT_FILE), - range=Range(start_line=12, start_character=16, end_line=12, end_character=24), - ) - - @pytest.mark.asyncio - async def test_real_typescript_definition(self, ts_symbol): - """Test LSP definition lookup in TypeScript.""" - print(f"\n>>> Testing TypeScript definition for {ts_symbol.name}") - - async with LspBridge( - workspace_root=str(TEST_TYPESCRIPT_FILE.parent.parent), - timeout=30.0, - ) as bridge: - definition = await bridge.get_definition(ts_symbol) - - if definition: - print(f" ✓ Found: {definition.file_path}:{definition.line}") - else: - print(" ⚠ No definition found (TypeScript LSP may not be active)") - - @pytest.mark.asyncio - async def test_real_typescript_document_symbols(self): - """Test document symbols in TypeScript.""" - print(f"\n>>> Testing TypeScript document symbols") - - async with LspBridge( - workspace_root=str(TEST_TYPESCRIPT_FILE.parent.parent), - timeout=30.0, - ) as bridge: - symbols = await bridge.get_document_symbols(str(TEST_TYPESCRIPT_FILE)) - - print(f" Found {len(symbols)} symbols") - for sym in symbols[:5]: - print(f" - {sym.get('name')} ({sym.get('kind')})") - - # TypeScript files should have symbols - if symbols: - print(" ✓ TypeScript symbols retrieved") - else: - print(" ⚠ No symbols found (TypeScript LSP may not be active)") - - -if __name__ == "__main__": - # Allow running directly - if is_pyright_available(): - print("Pyright language server is available") - print("Running tests...") - pytest.main([__file__, "-v", "-s"]) - else: - print("=" * 60) - print("Pyright language server NOT available") - print("=" * 60) - print() - print("To run these tests:") - print("1. Install pyright: npm install -g pyright") - print("2. Install typescript-language-server: npm install -g typescript-language-server") - print("3. Run: pytest tests/real/ -v -s") - print() - sys.exit(1) diff --git a/codex-lens/tests/simple_validation.py b/codex-lens/tests/simple_validation.py deleted file mode 100644 index 5d881bba..00000000 --- a/codex-lens/tests/simple_validation.py +++ /dev/null @@ -1,218 +0,0 @@ -""" -Simple validation for performance optimizations (Windows-safe). -""" -import sys -sys.stdout.reconfigure(encoding='utf-8') - -import json -import sqlite3 -import tempfile -import time -from pathlib import Path - -from codexlens.storage.dir_index import DirIndexStore -from codexlens.storage.registry import RegistryStore - - -def main(): - print("=" * 60) - print("CodexLens Performance Optimizations - Simple Validation") - print("=" * 60) - - # Test 1: Keyword Normalization - print("\n[1/4] Testing Keyword Normalization...") - try: - tmpdir = tempfile.mkdtemp() - db_path = Path(tmpdir) / "test1.db" - - store = DirIndexStore(db_path) - store.initialize() - - file_id = store.add_file( - name="test.py", - full_path=Path(f"{tmpdir}/test.py"), - content="def hello(): pass", - language="python" - ) - - keywords = ["auth", "security", "jwt"] - store.add_semantic_metadata( - file_id=file_id, - summary="Test", - keywords=keywords, - purpose="Testing", - llm_tool="gemini" - ) - - # Check normalized tables - conn = store._get_connection() - count = conn.execute( - "SELECT COUNT(*) as c FROM file_keywords WHERE file_id=?", - (file_id,) - ).fetchone()["c"] - - store.close() - - assert count == 3, f"Expected 3 keywords, got {count}" - print(" PASS: Keywords stored in normalized tables") - - # Test optimized search - store = DirIndexStore(db_path) - results = store.search_semantic_keywords("auth", use_normalized=True) - store.close() - - assert len(results) == 1 - print(" PASS: Optimized keyword search works") - - except Exception as e: - import traceback - print(f" FAIL: {e}") - traceback.print_exc() - return 1 - - # Test 2: Path Lookup Optimization - print("\n[2/4] Testing Path Lookup Optimization...") - try: - tmpdir = tempfile.mkdtemp() - db_path = Path(tmpdir) / "test2.db" - - store = RegistryStore(db_path) - store.initialize() # Create schema - - # Register a project first - project = store.register_project( - source_root=Path("/a"), - index_root=Path("/tmp") - ) - - # Register directory - store.register_dir( - project_id=project.id, - source_path=Path("/a/b/c"), - index_path=Path("/tmp/index.db"), - depth=2, - files_count=0 - ) - - deep_path = Path("/a/b/c/d/e/f/g/h/i/j/file.py") - - start = time.perf_counter() - result = store.find_nearest_index(deep_path) - elapsed = time.perf_counter() - start - - store.close() - - assert result is not None, "No result found" - # Path is normalized, just check it contains the key parts - assert "a" in str(result.source_path) and "b" in str(result.source_path) and "c" in str(result.source_path) - assert elapsed < 0.05, f"Too slow: {elapsed*1000:.2f}ms" - - print(f" PASS: Found nearest index in {elapsed*1000:.2f}ms") - - except Exception as e: - import traceback - print(f" FAIL: {e}") - traceback.print_exc() - return 1 - - # Test 3: Symbol Search Prefix Mode - print("\n[3/4] Testing Symbol Search Prefix Mode...") - try: - tmpdir = tempfile.mkdtemp() - db_path = Path(tmpdir) / "test3.db" - - store = DirIndexStore(db_path) - store.initialize() - - from codexlens.entities import Symbol - file_id = store.add_file( - name="test.py", - full_path=Path(f"{tmpdir}/test.py"), - content="def hello(): pass\n" * 10, - language="python", - symbols=[ - Symbol(name="get_user", kind="function", range=(1, 5)), - Symbol(name="get_item", kind="function", range=(6, 10)), - Symbol(name="create_user", kind="function", range=(11, 15)), - ] - ) - - # Prefix search - results = store.search_symbols("get", prefix_mode=True) - store.close() - - assert len(results) == 2, f"Expected 2, got {len(results)}" - for symbol in results: - assert symbol.name.startswith("get") - - print(f" PASS: Prefix search found {len(results)} symbols") - - except Exception as e: - import traceback - print(f" FAIL: {e}") - traceback.print_exc() - return 1 - - # Test 4: Performance Comparison - print("\n[4/4] Testing Performance Comparison...") - try: - tmpdir = tempfile.mkdtemp() - db_path = Path(tmpdir) / "test4.db" - - store = DirIndexStore(db_path) - store.initialize() - - # Create 50 files with keywords - for i in range(50): - file_id = store.add_file( - name=f"file_{i}.py", - full_path=Path(f"{tmpdir}/file_{i}.py"), - content=f"def function_{i}(): pass", - language="python" - ) - - keywords = ["auth", "security"] if i % 2 == 0 else ["api", "endpoint"] - store.add_semantic_metadata( - file_id=file_id, - summary=f"File {i}", - keywords=keywords, - purpose="Testing", - llm_tool="gemini" - ) - - # Benchmark normalized - start = time.perf_counter() - for _ in range(5): - results_norm = store.search_semantic_keywords("auth", use_normalized=True) - norm_time = time.perf_counter() - start - - # Benchmark fallback - start = time.perf_counter() - for _ in range(5): - results_fallback = store.search_semantic_keywords("auth", use_normalized=False) - fallback_time = time.perf_counter() - start - - store.close() - - assert len(results_norm) == len(results_fallback) - speedup = fallback_time / norm_time if norm_time > 0 else 1.0 - - print(f" Normalized: {norm_time*1000:.2f}ms (5 iterations)") - print(f" Fallback: {fallback_time*1000:.2f}ms (5 iterations)") - print(f" Speedup: {speedup:.2f}x") - print(" PASS: Performance test completed") - - except Exception as e: - import traceback - print(f" FAIL: {e}") - traceback.print_exc() - return 1 - - print("\n" + "=" * 60) - print("ALL VALIDATION TESTS PASSED") - print("=" * 60) - return 0 - - -if __name__ == "__main__": - exit(main()) diff --git a/codex-lens/tests/test_ann_index.py b/codex-lens/tests/test_ann_index.py deleted file mode 100644 index 964f7a1a..00000000 --- a/codex-lens/tests/test_ann_index.py +++ /dev/null @@ -1,760 +0,0 @@ -"""Tests for ANN (Approximate Nearest Neighbor) index using HNSW.""" - -import tempfile -from pathlib import Path -from unittest.mock import patch - -import pytest - -# Skip all tests if semantic dependencies not available -pytest.importorskip("numpy") - - -def _hnswlib_available() -> bool: - """Check if hnswlib is available.""" - try: - import hnswlib - return True - except ImportError: - return False - - -class TestANNIndex: - """Test suite for ANNIndex class.""" - - @pytest.fixture - def temp_db(self): - """Create a temporary database file.""" - with tempfile.TemporaryDirectory() as tmpdir: - yield Path(tmpdir) / "_index.db" - - @pytest.fixture - def sample_vectors(self): - """Generate sample vectors for testing.""" - import numpy as np - np.random.seed(42) - # 100 vectors of dimension 384 (matches fast model) - return np.random.randn(100, 384).astype(np.float32) - - @pytest.fixture - def sample_ids(self): - """Generate sample IDs.""" - return list(range(1, 101)) - - def test_import_check(self): - """Test that HNSWLIB_AVAILABLE flag is set correctly.""" - try: - from codexlens.semantic.ann_index import HNSWLIB_AVAILABLE - # Should be True if hnswlib is installed, False otherwise - assert isinstance(HNSWLIB_AVAILABLE, bool) - except ImportError: - pytest.skip("ann_index module not available") - - @pytest.mark.skipif( - not _hnswlib_available(), - reason="hnswlib not installed" - ) - def test_create_index(self, temp_db): - """Test creating a new ANN index.""" - from codexlens.semantic.ann_index import ANNIndex - - index = ANNIndex(temp_db, dim=384) - assert index.dim == 384 - assert index.count() == 0 - assert not index.is_loaded - - @pytest.mark.skipif( - not _hnswlib_available(), - reason="hnswlib not installed" - ) - def test_add_vectors(self, temp_db, sample_vectors, sample_ids): - """Test adding vectors to the index.""" - from codexlens.semantic.ann_index import ANNIndex - - index = ANNIndex(temp_db, dim=384) - index.add_vectors(sample_ids, sample_vectors) - - assert index.count() == 100 - assert index.is_loaded - - @pytest.mark.skipif( - not _hnswlib_available(), - reason="hnswlib not installed" - ) - def test_search(self, temp_db, sample_vectors, sample_ids): - """Test searching for similar vectors.""" - from codexlens.semantic.ann_index import ANNIndex - - index = ANNIndex(temp_db, dim=384) - index.add_vectors(sample_ids, sample_vectors) - - # Search for the first vector - should find itself - query = sample_vectors[0] - ids, distances = index.search(query, top_k=5) - - assert len(ids) == 5 - assert len(distances) == 5 - # First result should be the query vector itself (or very close) - assert ids[0] == 1 # ID of first vector - assert distances[0] < 0.01 # Very small distance (almost identical) - - @pytest.mark.skipif( - not _hnswlib_available(), - reason="hnswlib not installed" - ) - def test_search_clamps_top_k_to_available_vectors(self, temp_db, sample_vectors, sample_ids): - """Search should clamp top_k to the loaded vector count.""" - from codexlens.semantic.ann_index import ANNIndex - - index = ANNIndex(temp_db, dim=384) - index.add_vectors(sample_ids[:3], sample_vectors[:3]) - - ids, distances = index.search(sample_vectors[0], top_k=10) - - assert len(ids) == 3 - assert len(distances) == 3 - assert ids[0] == 1 - - @pytest.mark.skipif( - not _hnswlib_available(), - reason="hnswlib not installed" - ) - def test_save_and_load(self, temp_db, sample_vectors, sample_ids): - """Test saving and loading index from disk.""" - from codexlens.semantic.ann_index import ANNIndex - - # Create and save index - index1 = ANNIndex(temp_db, dim=384) - index1.add_vectors(sample_ids, sample_vectors) - index1.save() - - # Check that file was created (new naming: {db_stem}_vectors.hnsw) - hnsw_path = temp_db.parent / f"{temp_db.stem}_vectors.hnsw" - assert hnsw_path.exists() - - # Load in new instance - index2 = ANNIndex(temp_db, dim=384) - loaded = index2.load() - - assert loaded is True - assert index2.count() == 100 - assert index2.is_loaded - - # Verify search still works - query = sample_vectors[0] - ids, distances = index2.search(query, top_k=5) - assert ids[0] == 1 - - @pytest.mark.skipif( - not _hnswlib_available(), - reason="hnswlib not installed" - ) - def test_load_nonexistent(self, temp_db): - """Test loading when index file doesn't exist.""" - from codexlens.semantic.ann_index import ANNIndex - - index = ANNIndex(temp_db, dim=384) - loaded = index.load() - - assert loaded is False - assert not index.is_loaded - - @pytest.mark.skipif( - not _hnswlib_available(), - reason="hnswlib not installed" - ) - def test_remove_vectors(self, temp_db, sample_vectors, sample_ids): - """Test removing vectors from the index.""" - from codexlens.semantic.ann_index import ANNIndex - - index = ANNIndex(temp_db, dim=384) - index.add_vectors(sample_ids, sample_vectors) - - # Remove first 10 vectors - index.remove_vectors(list(range(1, 11))) - - # Search for removed vector - should not be in results - query = sample_vectors[0] - ids, distances = index.search(query, top_k=5) - - # ID 1 should not be in results (soft deleted) - assert 1 not in ids - - @pytest.mark.skipif( - not _hnswlib_available(), - reason="hnswlib not installed" - ) - def test_incremental_add(self, temp_db): - """Test adding vectors incrementally.""" - import numpy as np - from codexlens.semantic.ann_index import ANNIndex - - index = ANNIndex(temp_db, dim=384) - - # Add first batch - vectors1 = np.random.randn(50, 384).astype(np.float32) - index.add_vectors(list(range(1, 51)), vectors1) - assert index.count() == 50 - - # Add second batch - vectors2 = np.random.randn(50, 384).astype(np.float32) - index.add_vectors(list(range(51, 101)), vectors2) - assert index.count() == 100 - - @pytest.mark.skipif( - not _hnswlib_available(), - reason="hnswlib not installed" - ) - def test_search_empty_index(self, temp_db): - """Test searching an empty index.""" - import numpy as np - from codexlens.semantic.ann_index import ANNIndex - - index = ANNIndex(temp_db, dim=384) - query = np.random.randn(384).astype(np.float32) - - ids, distances = index.search(query, top_k=5) - - assert ids == [] - assert distances == [] - - @pytest.mark.skipif( - not _hnswlib_available(), - reason="hnswlib not installed" - ) - def test_invalid_dimension(self, temp_db, sample_vectors, sample_ids): - """Test adding vectors with wrong dimension.""" - import numpy as np - from codexlens.semantic.ann_index import ANNIndex - - index = ANNIndex(temp_db, dim=384) - - # Try to add vectors with wrong dimension - wrong_vectors = np.random.randn(10, 768).astype(np.float32) - with pytest.raises(ValueError, match="dimension"): - index.add_vectors(list(range(1, 11)), wrong_vectors) - - @pytest.mark.skipif( - not _hnswlib_available(), - reason="hnswlib not installed" - ) - def test_auto_resize(self, temp_db): - """Test that index automatically resizes when capacity is exceeded.""" - import numpy as np - from codexlens.semantic.ann_index import ANNIndex - - index = ANNIndex(temp_db, dim=384) - # Override initial capacity to test resize - index._max_elements = 100 - - # Add more vectors than initial capacity - vectors = np.random.randn(150, 384).astype(np.float32) - index.add_vectors(list(range(1, 151)), vectors) - - assert index.count() == 150 - assert index._max_elements >= 150 - - -class TestVectorStoreWithANN: - """Test VectorStore integration with ANN index.""" - - @pytest.fixture - def temp_db(self): - """Create a temporary database file.""" - with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir: - yield Path(tmpdir) / "_index.db" - - @pytest.fixture - def sample_chunks(self): - """Create sample semantic chunks with embeddings.""" - import numpy as np - from codexlens.entities import SemanticChunk - - np.random.seed(42) - chunks = [] - for i in range(10): - chunk = SemanticChunk( - content=f"def function_{i}(): pass", - metadata={"symbol_name": f"function_{i}", "symbol_kind": "function"}, - ) - chunk.embedding = np.random.randn(384).astype(np.float32).tolist() - chunks.append(chunk) - return chunks - - def test_vector_store_with_ann(self, temp_db, sample_chunks): - """Test VectorStore using ANN index for search.""" - from codexlens.semantic.vector_store import VectorStore, HNSWLIB_AVAILABLE - - store = VectorStore(temp_db) - - # Add chunks - ids = store.add_chunks(sample_chunks, "test.py") - assert len(ids) == 10 - - # Check ANN status - if HNSWLIB_AVAILABLE: - assert store.ann_available or store.ann_count >= 0 - - # Search - query_embedding = sample_chunks[0].embedding - results = store.search_similar(query_embedding, top_k=5) - - assert len(results) <= 5 - if results: - # First result should have high similarity - assert results[0].score > 0.9 - - def test_vector_store_rebuild_ann(self, temp_db, sample_chunks): - """Test rebuilding ANN index from SQLite data.""" - from codexlens.semantic.vector_store import VectorStore, HNSWLIB_AVAILABLE - - if not HNSWLIB_AVAILABLE: - pytest.skip("hnswlib not installed") - - store = VectorStore(temp_db) - - # Add chunks - store.add_chunks(sample_chunks, "test.py") - - # Rebuild ANN index - count = store.rebuild_ann_index() - assert count == 10 - - # Verify search works - query_embedding = sample_chunks[0].embedding - results = store.search_similar(query_embedding, top_k=5) - assert len(results) > 0 - - def test_vector_store_delete_updates_ann(self, temp_db, sample_chunks): - """Test that deleting chunks updates ANN index.""" - from codexlens.semantic.vector_store import VectorStore, HNSWLIB_AVAILABLE - - if not HNSWLIB_AVAILABLE: - pytest.skip("hnswlib not installed") - - store = VectorStore(temp_db) - - # Add chunks for two files - store.add_chunks(sample_chunks[:5], "file1.py") - store.add_chunks(sample_chunks[5:], "file2.py") - - initial_count = store.count_chunks() - assert initial_count == 10 - - # Delete one file's chunks - deleted = store.delete_file_chunks("file1.py") - assert deleted == 5 - - # Verify count - assert store.count_chunks() == 5 - - def test_vector_store_batch_add(self, temp_db, sample_chunks): - """Test batch adding chunks from multiple files.""" - from codexlens.semantic.vector_store import VectorStore - - store = VectorStore(temp_db) - - # Prepare chunks with paths - chunks_with_paths = [ - (chunk, f"file{i % 3}.py") - for i, chunk in enumerate(sample_chunks) - ] - - # Batch add - ids = store.add_chunks_batch(chunks_with_paths) - assert len(ids) == 10 - - # Verify - assert store.count_chunks() == 10 - - def test_vector_store_fallback_search(self, temp_db, sample_chunks): - """Test that search falls back to brute-force when ANN unavailable.""" - from codexlens.semantic.vector_store import VectorStore - - store = VectorStore(temp_db) - store.add_chunks(sample_chunks, "test.py") - - # Force disable ANN - store._ann_index = None - - # Search should still work (brute-force fallback) - query_embedding = sample_chunks[0].embedding - results = store.search_similar(query_embedding, top_k=5) - - assert len(results) > 0 - assert results[0].score > 0.9 - - -class TestSearchAccuracy: - """Test search accuracy comparing ANN vs brute-force.""" - - @pytest.fixture - def temp_db(self): - """Create a temporary database file.""" - with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir: - yield Path(tmpdir) / "_index.db" - - @pytest.mark.skipif( - not _hnswlib_available(), - reason="hnswlib not installed" - ) - def test_ann_vs_brute_force_recall(self, temp_db): - """Test that ANN search has high recall compared to brute-force.""" - import numpy as np - from codexlens.entities import SemanticChunk - from codexlens.semantic.vector_store import VectorStore - - np.random.seed(42) - - # Create larger dataset - chunks = [] - for i in range(100): - chunk = SemanticChunk( - content=f"code block {i}", - metadata={"chunk_id": i}, - ) - chunk.embedding = np.random.randn(384).astype(np.float32).tolist() - chunks.append(chunk) - - store = VectorStore(temp_db) - store.add_chunks(chunks, "test.py") - - # Get brute-force results - store._ann_index = None # Force brute-force - store._invalidate_cache() # Clear cache to force refresh - query = chunks[0].embedding - bf_results = store.search_similar(query, top_k=10) - # Use chunk_id from metadata for comparison (more reliable than path+score) - bf_chunk_ids = {r.metadata.get("chunk_id") for r in bf_results} - - # Rebuild ANN and get ANN results - store.rebuild_ann_index() - ann_results = store.search_similar(query, top_k=10) - ann_chunk_ids = {r.metadata.get("chunk_id") for r in ann_results} - - # Calculate recall (how many brute-force results are in ANN results) - # ANN should find at least 80% of the same results - overlap = len(bf_chunk_ids & ann_chunk_ids) - recall = overlap / len(bf_chunk_ids) if bf_chunk_ids else 1.0 - - assert recall >= 0.8, f"ANN recall too low: {recall} (overlap: {overlap}, bf: {bf_chunk_ids}, ann: {ann_chunk_ids})" - - - -class TestBinaryANNIndex: - """Test suite for BinaryANNIndex class (Hamming distance-based search).""" - - @pytest.fixture - def temp_db(self): - """Create a temporary database file.""" - with tempfile.TemporaryDirectory() as tmpdir: - yield Path(tmpdir) / "_index.db" - - @pytest.fixture - def sample_binary_vectors(self): - """Generate sample binary vectors for testing.""" - import numpy as np - np.random.seed(42) - # 100 binary vectors of dimension 256 (packed as 32 bytes each) - binary_unpacked = (np.random.rand(100, 256) > 0.5).astype(np.uint8) - packed = [np.packbits(v).tobytes() for v in binary_unpacked] - return packed, binary_unpacked - - @pytest.fixture - def sample_ids(self): - """Generate sample IDs.""" - return list(range(1, 101)) - - def test_create_binary_index(self, temp_db): - """Test creating a new Binary ANN index.""" - from codexlens.semantic.ann_index import BinaryANNIndex - - index = BinaryANNIndex(temp_db, dim=256) - assert index.dim == 256 - assert index.packed_dim == 32 - assert index.count() == 0 - assert not index.is_loaded - - def test_invalid_dimension(self, temp_db): - """Test that invalid dimensions are rejected.""" - from codexlens.semantic.ann_index import BinaryANNIndex - - # Dimension must be divisible by 8 - with pytest.raises(ValueError, match="divisible by 8"): - BinaryANNIndex(temp_db, dim=255) - - with pytest.raises(ValueError, match="positive"): - BinaryANNIndex(temp_db, dim=0) - - def test_add_packed_vectors(self, temp_db, sample_binary_vectors, sample_ids): - """Test adding packed binary vectors to the index.""" - from codexlens.semantic.ann_index import BinaryANNIndex - - packed, _ = sample_binary_vectors - index = BinaryANNIndex(temp_db, dim=256) - index.add_vectors(sample_ids, packed) - - assert index.count() == 100 - assert index.is_loaded - - def test_add_numpy_vectors(self, temp_db, sample_binary_vectors, sample_ids): - """Test adding unpacked numpy binary vectors.""" - from codexlens.semantic.ann_index import BinaryANNIndex - import numpy as np - - _, unpacked = sample_binary_vectors - index = BinaryANNIndex(temp_db, dim=256) - index.add_vectors_numpy(sample_ids, unpacked) - - assert index.count() == 100 - - def test_search_packed(self, temp_db, sample_binary_vectors, sample_ids): - """Test searching with packed binary query.""" - from codexlens.semantic.ann_index import BinaryANNIndex - - packed, _ = sample_binary_vectors - index = BinaryANNIndex(temp_db, dim=256) - index.add_vectors(sample_ids, packed) - - # Search for the first vector - should find itself with distance 0 - query = packed[0] - ids, distances = index.search(query, top_k=5) - - assert len(ids) == 5 - assert len(distances) == 5 - # First result should be the query vector itself - assert ids[0] == 1 - assert distances[0] == 0 # Hamming distance of 0 (identical) - - def test_search_numpy(self, temp_db, sample_binary_vectors, sample_ids): - """Test searching with unpacked numpy query.""" - from codexlens.semantic.ann_index import BinaryANNIndex - - packed, unpacked = sample_binary_vectors - index = BinaryANNIndex(temp_db, dim=256) - index.add_vectors(sample_ids, packed) - - # Search for the first vector using numpy interface - query = unpacked[0] - ids, distances = index.search_numpy(query, top_k=5) - - assert len(ids) == 5 - assert ids[0] == 1 - assert distances[0] == 0 - - def test_search_batch(self, temp_db, sample_binary_vectors, sample_ids): - """Test batch search with multiple queries.""" - from codexlens.semantic.ann_index import BinaryANNIndex - - packed, _ = sample_binary_vectors - index = BinaryANNIndex(temp_db, dim=256) - index.add_vectors(sample_ids, packed) - - # Search for first 3 vectors - queries = packed[:3] - results = index.search_batch(queries, top_k=5) - - assert len(results) == 3 - # Each result should find itself first - for i, (ids, dists) in enumerate(results): - assert ids[0] == i + 1 - assert dists[0] == 0 - - def test_hamming_distance_ordering(self, temp_db): - """Test that results are ordered by Hamming distance.""" - from codexlens.semantic.ann_index import BinaryANNIndex - import numpy as np - - index = BinaryANNIndex(temp_db, dim=256) - - # Create vectors with known Hamming distances from a query - query = np.zeros(256, dtype=np.uint8) # All zeros - v1 = np.zeros(256, dtype=np.uint8) # Distance 0 - v2 = np.zeros(256, dtype=np.uint8); v2[:10] = 1 # Distance 10 - v3 = np.zeros(256, dtype=np.uint8); v3[:50] = 1 # Distance 50 - v4 = np.ones(256, dtype=np.uint8) # Distance 256 - - index.add_vectors_numpy([1, 2, 3, 4], np.array([v1, v2, v3, v4])) - - query_packed = np.packbits(query).tobytes() - ids, distances = index.search(query_packed, top_k=4) - - assert ids == [1, 2, 3, 4] - assert distances == [0, 10, 50, 256] - - def test_save_and_load(self, temp_db, sample_binary_vectors, sample_ids): - """Test saving and loading binary index from disk.""" - from codexlens.semantic.ann_index import BinaryANNIndex - - packed, _ = sample_binary_vectors - - # Create and save index - index1 = BinaryANNIndex(temp_db, dim=256) - index1.add_vectors(sample_ids, packed) - index1.save() - - # Check that file was created - binary_path = temp_db.parent / f"{temp_db.stem}_binary_vectors.bin" - assert binary_path.exists() - - # Load in new instance - index2 = BinaryANNIndex(temp_db, dim=256) - loaded = index2.load() - - assert loaded is True - assert index2.count() == 100 - assert index2.is_loaded - - # Verify search still works - query = packed[0] - ids, distances = index2.search(query, top_k=5) - assert ids[0] == 1 - assert distances[0] == 0 - - def test_load_nonexistent(self, temp_db): - """Test loading when index file doesn't exist.""" - from codexlens.semantic.ann_index import BinaryANNIndex - - index = BinaryANNIndex(temp_db, dim=256) - loaded = index.load() - - assert loaded is False - assert not index.is_loaded - - def test_remove_vectors(self, temp_db, sample_binary_vectors, sample_ids): - """Test removing vectors from the index.""" - from codexlens.semantic.ann_index import BinaryANNIndex - - packed, _ = sample_binary_vectors - index = BinaryANNIndex(temp_db, dim=256) - index.add_vectors(sample_ids, packed) - - # Remove first 10 vectors - index.remove_vectors(list(range(1, 11))) - - assert index.count() == 90 - - # Removed vectors should not be findable - query = packed[0] - ids, _ = index.search(query, top_k=100) - for removed_id in range(1, 11): - assert removed_id not in ids - - def test_get_vector(self, temp_db, sample_binary_vectors, sample_ids): - """Test retrieving a specific vector by ID.""" - from codexlens.semantic.ann_index import BinaryANNIndex - - packed, _ = sample_binary_vectors - index = BinaryANNIndex(temp_db, dim=256) - index.add_vectors(sample_ids, packed) - - # Get existing vector - vec = index.get_vector(1) - assert vec == packed[0] - - # Get non-existing vector - vec = index.get_vector(9999) - assert vec is None - - def test_clear(self, temp_db, sample_binary_vectors, sample_ids): - """Test clearing all vectors from the index.""" - from codexlens.semantic.ann_index import BinaryANNIndex - - packed, _ = sample_binary_vectors - index = BinaryANNIndex(temp_db, dim=256) - index.add_vectors(sample_ids, packed) - assert index.count() == 100 - - index.clear() - assert index.count() == 0 - assert not index.is_loaded - - def test_search_empty_index(self, temp_db): - """Test searching an empty index.""" - from codexlens.semantic.ann_index import BinaryANNIndex - import numpy as np - - index = BinaryANNIndex(temp_db, dim=256) - query = np.packbits(np.zeros(256, dtype=np.uint8)).tobytes() - - ids, distances = index.search(query, top_k=5) - - assert ids == [] - assert distances == [] - - def test_update_existing_vector(self, temp_db): - """Test updating an existing vector with new data.""" - from codexlens.semantic.ann_index import BinaryANNIndex - import numpy as np - - index = BinaryANNIndex(temp_db, dim=256) - - # Add initial vector - v1 = np.zeros(256, dtype=np.uint8) - index.add_vectors_numpy([1], v1.reshape(1, -1)) - - # Update with different vector - v2 = np.ones(256, dtype=np.uint8) - index.add_vectors_numpy([1], v2.reshape(1, -1)) - - # Count should still be 1 - assert index.count() == 1 - - # Retrieved vector should be the updated one - stored = index.get_vector(1) - expected = np.packbits(v2).tobytes() - assert stored == expected - - -class TestCreateAnnIndexFactory: - """Test suite for create_ann_index factory function.""" - - @pytest.fixture - def temp_db(self): - """Create a temporary database file.""" - with tempfile.TemporaryDirectory() as tmpdir: - yield Path(tmpdir) / "_index.db" - - @pytest.mark.skipif( - not _hnswlib_available(), - reason="hnswlib not installed" - ) - def test_create_hnsw_index(self, temp_db): - """Test creating HNSW index via factory.""" - from codexlens.semantic.ann_index import create_ann_index, ANNIndex - - index = create_ann_index(temp_db, index_type="hnsw", dim=384) - assert isinstance(index, ANNIndex) - assert index.dim == 384 - - def test_create_binary_index(self, temp_db): - """Test creating binary index via factory.""" - from codexlens.semantic.ann_index import create_ann_index, BinaryANNIndex - - index = create_ann_index(temp_db, index_type="binary", dim=256) - assert isinstance(index, BinaryANNIndex) - assert index.dim == 256 - - def test_create_binary_index_default_dim(self, temp_db): - """Test that binary index defaults to 256 dim when dense default is used.""" - from codexlens.semantic.ann_index import create_ann_index, BinaryANNIndex - - # When dim=2048 (dense default) is passed with binary type, - # it should auto-adjust to 256 - index = create_ann_index(temp_db, index_type="binary") - assert isinstance(index, BinaryANNIndex) - assert index.dim == 256 - - def test_invalid_index_type(self, temp_db): - """Test that invalid index type raises error.""" - from codexlens.semantic.ann_index import create_ann_index - - with pytest.raises(ValueError, match="Invalid index_type"): - create_ann_index(temp_db, index_type="invalid") - - def test_case_insensitive_index_type(self, temp_db): - """Test that index_type is case-insensitive.""" - from codexlens.semantic.ann_index import create_ann_index, BinaryANNIndex - - index = create_ann_index(temp_db, index_type="BINARY", dim=256) - assert isinstance(index, BinaryANNIndex) diff --git a/codex-lens/tests/test_api_reranker.py b/codex-lens/tests/test_api_reranker.py deleted file mode 100644 index 58ff3a4a..00000000 --- a/codex-lens/tests/test_api_reranker.py +++ /dev/null @@ -1,200 +0,0 @@ -"""Tests for APIReranker backend.""" - -from __future__ import annotations - -import sys -import types -from typing import Any - -import pytest - -from codexlens.semantic.reranker import get_reranker -from codexlens.semantic.reranker.api_reranker import APIReranker - - -class DummyResponse: - def __init__( - self, - *, - status_code: int = 200, - json_data: Any = None, - text: str = "", - headers: dict[str, str] | None = None, - ) -> None: - self.status_code = int(status_code) - self._json_data = json_data - self.text = text - self.headers = headers or {} - - def json(self) -> Any: - return self._json_data - - -class DummyClient: - def __init__(self, *, base_url: str | None = None, headers: dict[str, str] | None = None, timeout: float | None = None) -> None: - self.base_url = base_url - self.headers = headers or {} - self.timeout = timeout - self.closed = False - self.calls: list[dict[str, Any]] = [] - self._responses: list[DummyResponse] = [] - - def queue(self, response: DummyResponse) -> None: - self._responses.append(response) - - def post(self, endpoint: str, *, json: dict[str, Any] | None = None) -> DummyResponse: - self.calls.append({"endpoint": endpoint, "json": json}) - if not self._responses: - raise AssertionError("DummyClient has no queued responses") - return self._responses.pop(0) - - def close(self) -> None: - self.closed = True - - -@pytest.fixture -def httpx_clients(monkeypatch: pytest.MonkeyPatch) -> list[DummyClient]: - clients: list[DummyClient] = [] - - dummy_httpx = types.ModuleType("httpx") - - def Client(*, base_url: str | None = None, headers: dict[str, str] | None = None, timeout: float | None = None) -> DummyClient: - client = DummyClient(base_url=base_url, headers=headers, timeout=timeout) - clients.append(client) - return client - - dummy_httpx.Client = Client - monkeypatch.setitem(sys.modules, "httpx", dummy_httpx) - - return clients - - -def test_api_reranker_requires_api_key( - monkeypatch: pytest.MonkeyPatch, httpx_clients: list[DummyClient] -) -> None: - # Force empty key in-process so the reranker does not fall back to any - # workspace/global .env configuration that may exist on the machine. - monkeypatch.setenv("RERANKER_API_KEY", "") - monkeypatch.setenv("CODEXLENS_RERANKER_API_KEY", "") - - with pytest.raises(ValueError, match="Missing API key"): - APIReranker() - - assert httpx_clients == [] - - -def test_api_reranker_reads_api_key_from_env( - monkeypatch: pytest.MonkeyPatch, httpx_clients: list[DummyClient] -) -> None: - monkeypatch.setenv("RERANKER_API_KEY", "test-key") - - reranker = APIReranker() - assert len(httpx_clients) == 1 - assert httpx_clients[0].headers["Authorization"] == "Bearer test-key" - reranker.close() - assert httpx_clients[0].closed is True - - -def test_api_reranker_strips_v1_from_api_base_to_avoid_double_v1( - monkeypatch: pytest.MonkeyPatch, httpx_clients: list[DummyClient] -) -> None: - monkeypatch.setenv("RERANKER_API_KEY", "test-key") - - reranker = APIReranker(api_base="https://api.siliconflow.cn/v1", provider="siliconflow") - assert len(httpx_clients) == 1 - # Endpoint already includes /v1, so api_base should not. - assert httpx_clients[0].base_url == "https://api.siliconflow.cn" - reranker.close() - - -def test_api_reranker_strips_endpoint_from_api_base_to_avoid_double_endpoint( - monkeypatch: pytest.MonkeyPatch, httpx_clients: list[DummyClient] -) -> None: - monkeypatch.setenv("RERANKER_API_KEY", "test-key") - - reranker = APIReranker(api_base="https://api.siliconflow.cn/v1/rerank", provider="siliconflow") - assert len(httpx_clients) == 1 - # If api_base already includes the endpoint suffix, strip it. - assert httpx_clients[0].base_url == "https://api.siliconflow.cn" - reranker.close() - - -def test_api_reranker_scores_pairs_siliconflow( - monkeypatch: pytest.MonkeyPatch, httpx_clients: list[DummyClient] -) -> None: - monkeypatch.delenv("RERANKER_API_KEY", raising=False) - # Avoid picking up any machine-local default model from global .env. - monkeypatch.setenv("RERANKER_MODEL", "") - monkeypatch.setenv("CODEXLENS_RERANKER_MODEL", "") - - reranker = APIReranker(api_key="k", provider="siliconflow") - client = httpx_clients[0] - - client.queue( - DummyResponse( - json_data={ - "results": [ - {"index": 0, "relevance_score": 0.9}, - {"index": 1, "relevance_score": 0.1}, - ] - } - ) - ) - - scores = reranker.score_pairs([("q", "d1"), ("q", "d2")]) - assert scores == pytest.approx([0.9, 0.1]) - - assert client.calls[0]["endpoint"] == "/v1/rerank" - payload = client.calls[0]["json"] - assert payload["model"] == "BAAI/bge-reranker-v2-m3" - assert payload["query"] == "q" - assert payload["documents"] == ["d1", "d2"] - assert payload["top_n"] == 2 - assert payload["return_documents"] is False - - -def test_api_reranker_retries_on_5xx( - monkeypatch: pytest.MonkeyPatch, httpx_clients: list[DummyClient] -) -> None: - monkeypatch.setenv("RERANKER_API_KEY", "k") - - from codexlens.semantic.reranker import api_reranker as api_reranker_module - - monkeypatch.setattr(api_reranker_module.time, "sleep", lambda *_args, **_kwargs: None) - - reranker = APIReranker(max_retries=1) - client = httpx_clients[0] - - client.queue(DummyResponse(status_code=500, text="oops", json_data={"error": "oops"})) - client.queue( - DummyResponse( - json_data={"results": [{"index": 0, "relevance_score": 0.7}]}, - ) - ) - - scores = reranker.score_pairs([("q", "d")]) - assert scores == pytest.approx([0.7]) - assert len(client.calls) == 2 - - -def test_api_reranker_unauthorized_raises( - monkeypatch: pytest.MonkeyPatch, httpx_clients: list[DummyClient] -) -> None: - monkeypatch.setenv("RERANKER_API_KEY", "k") - - reranker = APIReranker() - client = httpx_clients[0] - client.queue(DummyResponse(status_code=401, text="unauthorized")) - - with pytest.raises(RuntimeError, match="unauthorized"): - reranker.score_pairs([("q", "d")]) - - -def test_factory_api_backend_constructs_reranker( - monkeypatch: pytest.MonkeyPatch, httpx_clients: list[DummyClient] -) -> None: - monkeypatch.setenv("RERANKER_API_KEY", "k") - - reranker = get_reranker(backend="api") - assert isinstance(reranker, APIReranker) - assert len(httpx_clients) == 1 diff --git a/codex-lens/tests/test_association_tree.py b/codex-lens/tests/test_association_tree.py deleted file mode 100644 index ea947d80..00000000 --- a/codex-lens/tests/test_association_tree.py +++ /dev/null @@ -1,400 +0,0 @@ -"""Unit tests for association tree building and deduplication. - -Tests the AssociationTreeBuilder and ResultDeduplicator components using -mocked LSP responses. -""" - -from __future__ import annotations - -import asyncio -from typing import Any, Dict, List -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - -from codexlens.hybrid_search.data_structures import CallHierarchyItem, Range -from codexlens.search.association_tree import ( - AssociationTreeBuilder, - CallTree, - ResultDeduplicator, - TreeNode, - UniqueNode, -) - - -class MockLspManager: - """Mock LSP manager for testing.""" - - def __init__(self): - """Initialize mock with empty responses.""" - self.call_hierarchy_items: Dict[str, List[Dict]] = {} - self.incoming_calls: Dict[str, List[Dict]] = {} - self.outgoing_calls: Dict[str, List[Dict]] = {} - - async def get_call_hierarchy_items( - self, file_path: str, line: int, character: int, wait_for_analysis: float = 0.0 - ) -> List[Dict]: - """Mock get_call_hierarchy_items.""" - key = f"{file_path}:{line}:{character}" - return self.call_hierarchy_items.get(key, []) - - async def get_incoming_calls(self, item: Dict[str, Any]) -> List[Dict]: - """Mock get_incoming_calls.""" - name = item.get("name", "") - return self.incoming_calls.get(name, []) - - async def get_outgoing_calls(self, item: Dict[str, Any]) -> List[Dict]: - """Mock get_outgoing_calls.""" - name = item.get("name", "") - return self.outgoing_calls.get(name, []) - - -def create_mock_item( - name: str, - file_path: str, - start_line: int, - end_line: int, - kind: str = "function", -) -> Dict[str, Any]: - """Create a mock CallHierarchyItem dict. - - Args: - name: Symbol name - file_path: File path - start_line: Start line (0-based for LSP) - end_line: End line (0-based for LSP) - kind: Symbol kind - - Returns: - LSP CallHierarchyItem dict - """ - return { - "name": name, - "kind": kind, - "uri": f"file:///{file_path}", - "range": { - "start": {"line": start_line, "character": 0}, - "end": {"line": end_line, "character": 0}, - }, - "detail": f"def {name}(...)", - } - - -@pytest.mark.asyncio -async def test_simple_tree_building(): - """Test building a simple tree with one root and one callee.""" - mock_lsp = MockLspManager() - - # Root function - root_item = create_mock_item("main", "test.py", 10, 15) - - # Callee function - callee_item = create_mock_item("helper", "test.py", 20, 25) - - # Setup mock responses - mock_lsp.call_hierarchy_items["test.py:11:1"] = [root_item] - mock_lsp.outgoing_calls["main"] = [{"to": callee_item}] - mock_lsp.incoming_calls["main"] = [] - mock_lsp.outgoing_calls["helper"] = [] - mock_lsp.incoming_calls["helper"] = [] - - # Build tree - builder = AssociationTreeBuilder(mock_lsp) - tree = await builder.build_tree( - seed_file_path="test.py", - seed_line=11, - seed_character=1, - max_depth=2, - expand_callers=False, - expand_callees=True, - ) - - # Assertions - assert len(tree.roots) == 1 - assert tree.roots[0].item.name == "main" - assert len(tree.roots[0].children) == 1 - assert tree.roots[0].children[0].item.name == "helper" - assert len(tree.all_nodes) == 2 - - -@pytest.mark.asyncio -async def test_tree_with_cycle_detection(): - """Test that cycles are properly detected and marked.""" - mock_lsp = MockLspManager() - - # Create circular reference: A -> B -> A - item_a = create_mock_item("func_a", "test.py", 10, 15) - item_b = create_mock_item("func_b", "test.py", 20, 25) - - # Setup mock responses - mock_lsp.call_hierarchy_items["test.py:11:1"] = [item_a] - mock_lsp.outgoing_calls["func_a"] = [{"to": item_b}] - mock_lsp.outgoing_calls["func_b"] = [{"to": item_a}] # Cycle - mock_lsp.incoming_calls["func_a"] = [] - mock_lsp.incoming_calls["func_b"] = [] - - # Build tree - builder = AssociationTreeBuilder(mock_lsp) - tree = await builder.build_tree( - seed_file_path="test.py", - seed_line=11, - seed_character=1, - max_depth=5, - expand_callers=False, - expand_callees=True, - ) - - # Should have 2 unique nodes (func_a and func_b) - assert len(tree.all_nodes) == 2 - - # func_b should have a cycle child pointing back to func_a - func_b_node = None - for node in tree.node_list: - if node.item.name == "func_b": - func_b_node = node - break - - assert func_b_node is not None - assert len(func_b_node.children) == 1 - assert func_b_node.children[0].is_cycle - assert func_b_node.children[0].item.name == "func_a" - - -@pytest.mark.asyncio -async def test_max_depth_limit(): - """Test that expansion stops at max_depth.""" - mock_lsp = MockLspManager() - - # Chain: A -> B -> C -> D - items = { - "A": create_mock_item("func_a", "test.py", 10, 15), - "B": create_mock_item("func_b", "test.py", 20, 25), - "C": create_mock_item("func_c", "test.py", 30, 35), - "D": create_mock_item("func_d", "test.py", 40, 45), - } - - mock_lsp.call_hierarchy_items["test.py:11:1"] = [items["A"]] - mock_lsp.outgoing_calls["func_a"] = [{"to": items["B"]}] - mock_lsp.outgoing_calls["func_b"] = [{"to": items["C"]}] - mock_lsp.outgoing_calls["func_c"] = [{"to": items["D"]}] - mock_lsp.outgoing_calls["func_d"] = [] - - for name in ["func_a", "func_b", "func_c", "func_d"]: - mock_lsp.incoming_calls[name] = [] - - # Build tree with max_depth=2 - builder = AssociationTreeBuilder(mock_lsp) - tree = await builder.build_tree( - seed_file_path="test.py", - seed_line=11, - max_depth=2, - expand_callers=False, - expand_callees=True, - ) - - # Should only have nodes A, B, C (depths 0, 1, 2) - # D should not be included (would be depth 3) - assert len(tree.all_nodes) == 3 - node_names = {node.item.name for node in tree.node_list} - assert "func_a" in node_names - assert "func_b" in node_names - assert "func_c" in node_names - assert "func_d" not in node_names - - -@pytest.mark.asyncio -async def test_empty_tree(): - """Test building tree when no call hierarchy items found.""" - mock_lsp = MockLspManager() - - # No items configured - builder = AssociationTreeBuilder(mock_lsp) - tree = await builder.build_tree( - seed_file_path="test.py", - seed_line=11, - max_depth=2, - ) - - # Should have empty tree - assert len(tree.roots) == 0 - assert len(tree.all_nodes) == 0 - - -def test_deduplication_basic(): - """Test basic deduplication of tree nodes.""" - # Create test tree with duplicate nodes - tree = CallTree() - - # Same function appearing at different depths via different paths - # This simulates the real scenario where a function appears multiple times - # in a call tree (e.g., reached from different callers) - item_a1 = CallHierarchyItem( - name="func_a", - kind="function", - file_path="test.py", - range=Range(10, 0, 15, 0), - ) - item_a2 = CallHierarchyItem( - name="func_a", - kind="function", - file_path="test.py", - range=Range(10, 0, 15, 0), # Same range - ) - - node1 = TreeNode(item=item_a1, depth=0, path_from_root=["node1"]) - node2 = TreeNode(item=item_a2, depth=2, path_from_root=["root", "mid", "node2"]) - - # Manually add to node_list to simulate same symbol from different paths - tree.node_list.append(node1) - tree.node_list.append(node2) - - # Different function - item_b = CallHierarchyItem( - name="func_b", - kind="function", - file_path="test.py", - range=Range(20, 0, 25, 0), - ) - node3 = TreeNode(item=item_b, depth=1, path_from_root=["root", "node3"]) - tree.node_list.append(node3) - - # Deduplicate - deduplicator = ResultDeduplicator() - unique_nodes = deduplicator.deduplicate(tree) - - # Should have 2 unique nodes (func_a merged, func_b separate) - assert len(unique_nodes) == 2 - - # func_a should have occurrences=2 and min_depth=0 - func_a_node = next(n for n in unique_nodes if n.name == "func_a") - assert func_a_node.occurrences == 2 - assert func_a_node.min_depth == 0 - - # func_b should have occurrences=1 and min_depth=1 - func_b_node = next(n for n in unique_nodes if n.name == "func_b") - assert func_b_node.occurrences == 1 - assert func_b_node.min_depth == 1 - - -def test_deduplication_scoring(): - """Test that scoring prioritizes depth and frequency correctly.""" - tree = CallTree() - - # Create nodes with different characteristics - # Node at depth 0 (root) - item1 = CallHierarchyItem( - name="root_func", - kind="function", - file_path="test.py", - range=Range(10, 0, 15, 0), - ) - node1 = TreeNode(item=item1, depth=0) - tree.add_node(node1) - - # Node at depth 5 (deep) - item2 = CallHierarchyItem( - name="deep_func", - kind="function", - file_path="test.py", - range=Range(20, 0, 25, 0), - ) - node2 = TreeNode(item=item2, depth=5) - tree.add_node(node2) - - # Deduplicate and score - deduplicator = ResultDeduplicator() - unique_nodes = deduplicator.deduplicate(tree) - - # Root node should score higher than deep node - root_node = next(n for n in unique_nodes if n.name == "root_func") - deep_node = next(n for n in unique_nodes if n.name == "deep_func") - - assert root_node.score > deep_node.score - - -def test_deduplication_max_results(): - """Test that max_results limit works correctly.""" - tree = CallTree() - - # Create 5 unique nodes - for i in range(5): - item = CallHierarchyItem( - name=f"func_{i}", - kind="function", - file_path="test.py", - range=Range(i * 10, 0, i * 10 + 5, 0), - ) - node = TreeNode(item=item, depth=i) - tree.add_node(node) - - # Deduplicate with max_results=3 - deduplicator = ResultDeduplicator() - unique_nodes = deduplicator.deduplicate(tree, max_results=3) - - # Should only return 3 nodes - assert len(unique_nodes) == 3 - - -def test_filter_by_kind(): - """Test filtering unique nodes by symbol kind.""" - # Create unique nodes with different kinds - nodes = [ - UniqueNode( - file_path="test.py", - name="func1", - kind="function", - range=Range(10, 0, 15, 0), - ), - UniqueNode( - file_path="test.py", - name="cls1", - kind="class", - range=Range(20, 0, 30, 0), - ), - UniqueNode( - file_path="test.py", - name="var1", - kind="variable", - range=Range(40, 0, 40, 10), - ), - ] - - deduplicator = ResultDeduplicator() - - # Filter for functions only - filtered = deduplicator.filter_by_kind(nodes, ["function"]) - assert len(filtered) == 1 - assert filtered[0].name == "func1" - - # Filter for functions and classes - filtered = deduplicator.filter_by_kind(nodes, ["function", "class"]) - assert len(filtered) == 2 - - -def test_to_dict_list(): - """Test conversion of unique nodes to dict list.""" - nodes = [ - UniqueNode( - file_path="test.py", - name="func1", - kind="function", - range=Range(10, 0, 15, 0), - min_depth=0, - occurrences=2, - score=0.85, - ), - ] - - deduplicator = ResultDeduplicator() - dict_list = deduplicator.to_dict_list(nodes) - - assert len(dict_list) == 1 - assert dict_list[0]["name"] == "func1" - assert dict_list[0]["kind"] == "function" - assert dict_list[0]["min_depth"] == 0 - assert dict_list[0]["occurrences"] == 2 - assert dict_list[0]["score"] == 0.85 - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/codex-lens/tests/test_astgrep_binding.py b/codex-lens/tests/test_astgrep_binding.py deleted file mode 100644 index 7a154845..00000000 --- a/codex-lens/tests/test_astgrep_binding.py +++ /dev/null @@ -1,191 +0,0 @@ -"""Tests for ast-grep binding module. - -Verifies basic import and functionality of AstGrepBinding. -Run with: python -m pytest tests/test_astgrep_binding.py -v -""" - -from __future__ import annotations - -import pytest -from pathlib import Path - - -class TestAstGrepBindingAvailability: - """Test availability checks.""" - - def test_is_astgrep_available_function(self): - """Test is_astgrep_available function returns boolean.""" - from codexlens.parsers.astgrep_binding import is_astgrep_available - result = is_astgrep_available() - assert isinstance(result, bool) - - def test_get_supported_languages(self): - """Test get_supported_languages returns expected languages.""" - from codexlens.parsers.astgrep_binding import get_supported_languages - languages = get_supported_languages() - assert isinstance(languages, list) - assert "python" in languages - assert "javascript" in languages - assert "typescript" in languages - - -class TestAstGrepBindingInit: - """Test AstGrepBinding initialization.""" - - def test_init_python(self): - """Test initialization with Python language.""" - from codexlens.parsers.astgrep_binding import AstGrepBinding - binding = AstGrepBinding("python") - assert binding.language_id == "python" - - def test_init_typescript_with_tsx(self): - """Test TSX detection from file extension.""" - from codexlens.parsers.astgrep_binding import AstGrepBinding - binding = AstGrepBinding("typescript", Path("component.tsx")) - assert binding.language_id == "typescript" - - def test_is_available_returns_boolean(self): - """Test is_available returns boolean.""" - from codexlens.parsers.astgrep_binding import AstGrepBinding - binding = AstGrepBinding("python") - result = binding.is_available() - assert isinstance(result, bool) - - -def _is_astgrep_installed(): - """Check if ast-grep-py is installed.""" - try: - import ast_grep_py # noqa: F401 - return True - except ImportError: - return False - - -@pytest.mark.skipif( - not _is_astgrep_installed(), - reason="ast-grep-py not installed" -) -class TestAstGrepBindingWithAstGrep: - """Tests that require ast-grep-py to be installed.""" - - def test_parse_simple_python(self): - """Test parsing simple Python code.""" - from codexlens.parsers.astgrep_binding import AstGrepBinding - binding = AstGrepBinding("python") - - if not binding.is_available(): - pytest.skip("ast-grep not available") - - source = "x = 1" - result = binding.parse(source) - assert result is True - - def test_find_inheritance(self): - """Test finding class inheritance.""" - from codexlens.parsers.astgrep_binding import AstGrepBinding - binding = AstGrepBinding("python") - - if not binding.is_available(): - pytest.skip("ast-grep not available") - - source = """ -class MyClass(BaseClass): - pass -""" - binding.parse(source) - results = binding.find_inheritance() - assert len(results) >= 0 # May or may not find depending on pattern match - - def test_find_calls(self): - """Test finding function calls.""" - from codexlens.parsers.astgrep_binding import AstGrepBinding - binding = AstGrepBinding("python") - - if not binding.is_available(): - pytest.skip("ast-grep not available") - - source = """ -def foo(): - bar() - baz.qux() -""" - binding.parse(source) - results = binding.find_calls() - assert isinstance(results, list) - - def test_find_imports(self): - """Test finding import statements.""" - from codexlens.parsers.astgrep_binding import AstGrepBinding - binding = AstGrepBinding("python") - - if not binding.is_available(): - pytest.skip("ast-grep not available") - - source = """ -import os -from typing import List -""" - binding.parse(source) - results = binding.find_imports() - assert isinstance(results, list) - - -def test_basic_import(): - """Test that the module can be imported.""" - try: - from codexlens.parsers.astgrep_binding import ( - AstGrepBinding, - is_astgrep_available, - get_supported_languages, - ASTGREP_AVAILABLE, - ) - assert True - except ImportError as e: - pytest.fail(f"Failed to import astgrep_binding: {e}") - - -def test_availability_flag(): - """Test ASTGREP_AVAILABLE flag is defined.""" - from codexlens.parsers.astgrep_binding import ASTGREP_AVAILABLE - assert isinstance(ASTGREP_AVAILABLE, bool) - - -if __name__ == "__main__": - # Run basic verification - print("Testing astgrep_binding module...") - - from codexlens.parsers.astgrep_binding import ( - AstGrepBinding, - is_astgrep_available, - get_supported_languages, - ) - - print(f"ast-grep available: {is_astgrep_available()}") - print(f"Supported languages: {get_supported_languages()}") - - binding = AstGrepBinding("python") - print(f"Python binding available: {binding.is_available()}") - - if binding.is_available(): - test_code = """ -import os -from typing import List - -class MyClass(BaseClass): - def method(self): - self.helper() - external_func() - -def helper(): - pass -""" - binding.parse(test_code) - print(f"Inheritance found: {binding.find_inheritance()}") - print(f"Calls found: {binding.find_calls()}") - print(f"Imports found: {binding.find_imports()}") - else: - print("Note: ast-grep-py not installed. To install:") - print(" pip install ast-grep-py") - print(" Note: May have compatibility issues with Python 3.13") - - print("Basic verification complete!") diff --git a/codex-lens/tests/test_binary_searcher.py b/codex-lens/tests/test_binary_searcher.py deleted file mode 100644 index eb751bb6..00000000 --- a/codex-lens/tests/test_binary_searcher.py +++ /dev/null @@ -1,161 +0,0 @@ -"""Unit tests for BinarySearcher - binary vector search using Hamming distance. - -Tests cover: -- load: mmap file loading, DB fallback, no data scenario -- search: basic search, top_k limit, empty index -""" - -from __future__ import annotations - -import json -import tempfile -from pathlib import Path -from unittest.mock import MagicMock, patch, mock_open - -import numpy as np -import pytest - -from codexlens.search.binary_searcher import BinarySearcher - - -# ============================================================================= -# Test Fixtures -# ============================================================================= - - -@pytest.fixture -def temp_paths(): - """Create temporary directory structure.""" - tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True) - root = Path(tmpdir.name) - yield root - try: - tmpdir.cleanup() - except (PermissionError, OSError): - pass - - -@pytest.fixture -def binary_mmap_setup(temp_paths): - """Create a mock memory-mapped binary vectors file with metadata.""" - num_vectors = 10 - dim_bytes = 32 # 256 bits = 32 bytes - - # Create binary matrix - rng = np.random.default_rng(42) - binary_matrix = rng.integers(0, 256, size=(num_vectors, dim_bytes), dtype=np.uint8) - chunk_ids = list(range(100, 100 + num_vectors)) - - # Write mmap file - mmap_path = temp_paths / "_binary_vectors.mmap" - binary_matrix.tofile(str(mmap_path)) - - # Write metadata - meta_path = mmap_path.with_suffix(".meta.json") - meta = { - "shape": [num_vectors, dim_bytes], - "chunk_ids": chunk_ids, - } - with open(meta_path, "w") as f: - json.dump(meta, f) - - return temp_paths, binary_matrix, chunk_ids - - -# ============================================================================= -# Tests: load -# ============================================================================= - - -class TestBinarySearcherLoad: - """Tests for BinarySearcher.load().""" - - def test_load_mmap(self, binary_mmap_setup): - """Memory-mapped file loading should succeed and mark is_memmap.""" - index_root, binary_matrix, chunk_ids = binary_mmap_setup - searcher = BinarySearcher(index_root) - - result = searcher.load() - - assert result is True - assert searcher._loaded is True - assert searcher.is_memmap is True - assert searcher.vector_count == len(chunk_ids) - - def test_load_db_fallback(self, temp_paths): - """Should fall back to DB loading when no mmap file exists.""" - searcher = BinarySearcher(temp_paths) - - # Mock the DB fallback - with patch.object(searcher, "_load_from_db", return_value=True) as mock_db: - result = searcher.load() - - assert result is True - mock_db.assert_called_once() - - def test_load_no_data(self, temp_paths): - """Should return False when neither mmap nor DB data available.""" - searcher = BinarySearcher(temp_paths) - - with patch.object(searcher, "_load_from_db", return_value=False): - result = searcher.load() - - assert result is False - assert searcher._loaded is False - - -# ============================================================================= -# Tests: search -# ============================================================================= - - -class TestBinarySearcherSearch: - """Tests for BinarySearcher.search().""" - - def test_search_basic(self, binary_mmap_setup): - """Basic search should return (chunk_id, distance) tuples.""" - index_root, binary_matrix, chunk_ids = binary_mmap_setup - searcher = BinarySearcher(index_root) - searcher.load() - - # Create a query vector (256 dimensions, will be binarized) - rng = np.random.default_rng(99) - query_vector = rng.standard_normal(256).astype(np.float32) - - results = searcher.search(query_vector, top_k=5) - - assert len(results) == 5 - # Results should be (chunk_id, hamming_distance) tuples - for chunk_id, distance in results: - assert isinstance(chunk_id, int) - assert isinstance(distance, int) - assert chunk_id in chunk_ids - - def test_search_top_k(self, binary_mmap_setup): - """Search should respect top_k limit.""" - index_root, binary_matrix, chunk_ids = binary_mmap_setup - searcher = BinarySearcher(index_root) - searcher.load() - - query_vector = np.random.default_rng(42).standard_normal(256).astype(np.float32) - - results_3 = searcher.search(query_vector, top_k=3) - results_7 = searcher.search(query_vector, top_k=7) - - assert len(results_3) == 3 - assert len(results_7) == 7 - # Results should be sorted by distance (ascending) - distances_3 = [d for _, d in results_3] - assert distances_3 == sorted(distances_3) - - def test_search_empty_index(self, temp_paths): - """Search on empty/unloaded index should return empty list.""" - searcher = BinarySearcher(temp_paths) - # Do not load - index is empty - - query_vector = np.zeros(256, dtype=np.float32) - - with patch.object(searcher, "load", return_value=False): - results = searcher.search(query_vector, top_k=5) - - assert results == [] diff --git a/codex-lens/tests/test_cascade_strategies.py b/codex-lens/tests/test_cascade_strategies.py deleted file mode 100644 index 2a4713b4..00000000 --- a/codex-lens/tests/test_cascade_strategies.py +++ /dev/null @@ -1,392 +0,0 @@ -"""Integration tests for chain_search.py cascade strategies. - -Tests cover: -- binary_cascade_search: Full pipeline and numpy-unavailable fallback -- binary_rerank_cascade_search: Pipeline and fallback -- dense_rerank_cascade_search: Pipeline and fallback -- cascade_search: Router dispatching to correct strategy methods -""" - -from __future__ import annotations - -import tempfile -from pathlib import Path -from typing import List -from unittest.mock import MagicMock, Mock, patch - -import pytest - -from codexlens.config import Config -from codexlens.entities import SearchResult -from codexlens.search.chain_search import ( - ChainSearchEngine, - ChainSearchResult, - SearchOptions, - SearchStats, -) -from codexlens.storage.path_mapper import PathMapper -from codexlens.storage.registry import RegistryStore - - -# ============================================================================= -# Test Fixtures -# ============================================================================= - - -@pytest.fixture -def temp_paths(): - """Create temporary directory structure.""" - tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True) - root = Path(tmpdir.name) - yield root - try: - tmpdir.cleanup() - except (PermissionError, OSError): - pass - - -@pytest.fixture -def mock_registry(temp_paths: Path): - """Create mock registry store.""" - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - return registry - - -@pytest.fixture -def mock_mapper(temp_paths: Path): - """Create path mapper.""" - return PathMapper(index_root=temp_paths / "indexes") - - -@pytest.fixture -def mock_config(): - """Create mock config for cascade search.""" - config = MagicMock(spec=Config) - config.cascade_coarse_k = 100 - config.cascade_fine_k = 10 - config.cascade_strategy = "binary" - config.enable_staged_rerank = False - config.staged_clustering_strategy = "auto" - config.staged_clustering_min_size = 3 - config.graph_expansion_depth = 2 - return config - - -@pytest.fixture -def sample_search_results() -> List[SearchResult]: - """Create sample search results for testing.""" - return [ - SearchResult(path="a.py", score=0.9, excerpt="def auth():"), - SearchResult(path="b.py", score=0.8, excerpt="class User:"), - SearchResult(path="c.py", score=0.7, excerpt="def login():"), - ] - - -# ============================================================================= -# Tests: binary_cascade_search -# ============================================================================= - - -class TestBinaryCascadeSearch: - """Tests for binary_cascade_search().""" - - def test_binary_cascade_full_pipeline( - self, mock_registry, mock_mapper, mock_config, temp_paths - ): - """binary_cascade_search should execute full binary+dense pipeline.""" - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - with patch.object(engine, "_find_start_index") as mock_find: - mock_find.return_value = temp_paths / "index" / "_index.db" - - with patch.object(engine, "_collect_index_paths") as mock_collect: - mock_collect.return_value = [temp_paths / "index" / "_index.db"] - - # Mock the embedding backend imports - with patch("codexlens.search.chain_search.NUMPY_AVAILABLE", True): - with patch.dict("sys.modules", { - "codexlens.indexing.embedding": MagicMock(), - "codexlens.semantic.ann_index": MagicMock(), - }): - # Mock _get_or_create_binary_index - with patch.object( - engine, "_get_or_create_binary_index" - ) as mock_bin: - mock_index = MagicMock() - mock_index.count.return_value = 10 - mock_index.search.return_value = ([1, 2], [10, 20]) - mock_bin.return_value = mock_index - - # The search should fall back to standard on import issues - with patch.object(engine, "search") as mock_search: - mock_search.return_value = ChainSearchResult( - query="test", - results=[SearchResult(path="a.py", score=0.9, excerpt="a")], - symbols=[], - stats=SearchStats(), - ) - - result = engine.binary_cascade_search( - "test query", temp_paths / "src", - k=10, coarse_k=100, - ) - - assert isinstance(result, ChainSearchResult) - - def test_binary_cascade_numpy_unavailable( - self, mock_registry, mock_mapper, mock_config, temp_paths - ): - """binary_cascade_search should fall back to standard search when numpy unavailable.""" - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - with patch("codexlens.search.chain_search.NUMPY_AVAILABLE", False): - with patch.object(engine, "search") as mock_search: - mock_search.return_value = ChainSearchResult( - query="test", - results=[], - symbols=[], - stats=SearchStats(), - ) - - result = engine.binary_cascade_search( - "query", temp_paths / "src", - ) - - mock_search.assert_called_once() - assert isinstance(result, ChainSearchResult) - - -# ============================================================================= -# Tests: binary_rerank_cascade_search -# ============================================================================= - - -class TestBinaryRerankCascadeSearch: - """Tests for binary_rerank_cascade_search().""" - - def test_binary_rerank_cascade_pipeline( - self, mock_registry, mock_mapper, mock_config, temp_paths - ): - """binary_rerank_cascade_search should execute binary+cross-encoder pipeline.""" - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - with patch("codexlens.search.chain_search.NUMPY_AVAILABLE", True): - with patch.object(engine, "_find_start_index") as mock_find: - mock_find.return_value = temp_paths / "index" / "_index.db" - - with patch.object(engine, "_collect_index_paths") as mock_collect: - mock_collect.return_value = [temp_paths / "index" / "_index.db"] - - # Mock BinaryEmbeddingBackend import - with patch.dict("sys.modules", { - "codexlens.indexing.embedding": MagicMock(), - }): - with patch.object(engine, "search") as mock_search: - mock_search.return_value = ChainSearchResult( - query="test", - results=[SearchResult(path="a.py", score=0.9, excerpt="a")], - symbols=[], - stats=SearchStats(), - ) - - result = engine.binary_rerank_cascade_search( - "test query", temp_paths / "src", - k=10, coarse_k=100, - ) - - assert isinstance(result, ChainSearchResult) - - def test_binary_rerank_fallback( - self, mock_registry, mock_mapper, mock_config, temp_paths - ): - """binary_rerank_cascade_search should fall back when numpy unavailable.""" - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - with patch("codexlens.search.chain_search.NUMPY_AVAILABLE", False): - with patch.object(engine, "search") as mock_search: - mock_search.return_value = ChainSearchResult( - query="test", - results=[], - symbols=[], - stats=SearchStats(), - ) - - result = engine.binary_rerank_cascade_search( - "query", temp_paths / "src", - ) - - mock_search.assert_called_once() - - -# ============================================================================= -# Tests: dense_rerank_cascade_search -# ============================================================================= - - -class TestDenseRerankCascadeSearch: - """Tests for dense_rerank_cascade_search().""" - - def test_dense_rerank_cascade_pipeline( - self, mock_registry, mock_mapper, mock_config, temp_paths - ): - """dense_rerank_cascade_search should execute dense+cross-encoder pipeline.""" - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - with patch("codexlens.search.chain_search.NUMPY_AVAILABLE", True): - with patch.object(engine, "_find_start_index") as mock_find: - mock_find.return_value = temp_paths / "index" / "_index.db" - - with patch.object(engine, "_collect_index_paths") as mock_collect: - mock_collect.return_value = [temp_paths / "index" / "_index.db"] - - with patch.object(engine, "search") as mock_search: - mock_search.return_value = ChainSearchResult( - query="test", - results=[SearchResult(path="a.py", score=0.9, excerpt="a")], - symbols=[], - stats=SearchStats(), - ) - - result = engine.dense_rerank_cascade_search( - "test query", temp_paths / "src", - k=10, coarse_k=100, - ) - - assert isinstance(result, ChainSearchResult) - - def test_dense_rerank_fallback( - self, mock_registry, mock_mapper, mock_config, temp_paths - ): - """dense_rerank_cascade_search should fall back when numpy unavailable.""" - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - with patch("codexlens.search.chain_search.NUMPY_AVAILABLE", False): - with patch.object(engine, "search") as mock_search: - mock_search.return_value = ChainSearchResult( - query="test", - results=[], - symbols=[], - stats=SearchStats(), - ) - - result = engine.dense_rerank_cascade_search( - "query", temp_paths / "src", - ) - - mock_search.assert_called_once() - - -# ============================================================================= -# Tests: cascade_search (unified router) -# ============================================================================= - - -class TestCascadeRouter: - """Tests for cascade_search() strategy routing.""" - - def test_cascade_router_binary( - self, mock_registry, mock_mapper, mock_config, temp_paths - ): - """cascade_search with strategy='binary' should route to binary_cascade_search.""" - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - with patch.object(engine, "binary_cascade_search") as mock_binary: - mock_binary.return_value = ChainSearchResult( - query="test", results=[], symbols=[], stats=SearchStats() - ) - - engine.cascade_search( - "query", temp_paths / "src", strategy="binary" - ) - - mock_binary.assert_called_once() - - def test_cascade_router_binary_rerank( - self, mock_registry, mock_mapper, mock_config, temp_paths - ): - """cascade_search with strategy='binary_rerank' should route correctly.""" - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - with patch.object(engine, "binary_rerank_cascade_search") as mock_rerank: - mock_rerank.return_value = ChainSearchResult( - query="test", results=[], symbols=[], stats=SearchStats() - ) - - engine.cascade_search( - "query", temp_paths / "src", strategy="binary_rerank" - ) - - mock_rerank.assert_called_once() - - def test_cascade_router_dense_rerank( - self, mock_registry, mock_mapper, mock_config, temp_paths - ): - """cascade_search with strategy='dense_rerank' should route correctly.""" - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - with patch.object(engine, "dense_rerank_cascade_search") as mock_dense: - mock_dense.return_value = ChainSearchResult( - query="test", results=[], symbols=[], stats=SearchStats() - ) - - engine.cascade_search( - "query", temp_paths / "src", strategy="dense_rerank" - ) - - mock_dense.assert_called_once() - - def test_cascade_router_staged( - self, mock_registry, mock_mapper, mock_config, temp_paths - ): - """cascade_search with strategy='staged' should route to staged_cascade_search.""" - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - with patch.object(engine, "staged_cascade_search") as mock_staged: - mock_staged.return_value = ChainSearchResult( - query="test", results=[], symbols=[], stats=SearchStats() - ) - - engine.cascade_search( - "query", temp_paths / "src", strategy="staged" - ) - - mock_staged.assert_called_once() - - def test_cascade_router_config_default( - self, mock_registry, mock_mapper, temp_paths - ): - """cascade_search with no strategy param should use config cascade_strategy.""" - config = MagicMock(spec=Config) - config.cascade_strategy = "binary_rerank" - config.cascade_coarse_k = 100 - config.cascade_fine_k = 10 - - engine = ChainSearchEngine(mock_registry, mock_mapper, config=config) - - with patch.object(engine, "binary_rerank_cascade_search") as mock_rerank: - mock_rerank.return_value = ChainSearchResult( - query="test", results=[], symbols=[], stats=SearchStats() - ) - - # No strategy param -> reads from config - engine.cascade_search("query", temp_paths / "src") - - mock_rerank.assert_called_once() - - def test_cascade_router_invalid_fallback( - self, mock_registry, mock_mapper, mock_config, temp_paths - ): - """cascade_search with invalid strategy should default to 'binary'.""" - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - with patch.object(engine, "binary_cascade_search") as mock_binary: - mock_binary.return_value = ChainSearchResult( - query="test", results=[], symbols=[], stats=SearchStats() - ) - - engine.cascade_search( - "query", temp_paths / "src", strategy="nonexistent" - ) - - mock_binary.assert_called_once() diff --git a/codex-lens/tests/test_chain_search.py b/codex-lens/tests/test_chain_search.py deleted file mode 100644 index 3e498e43..00000000 --- a/codex-lens/tests/test_chain_search.py +++ /dev/null @@ -1,1634 +0,0 @@ -import logging -import os -import sqlite3 -import tempfile -from pathlib import Path -from unittest.mock import MagicMock - -import pytest - -from codexlens.config import ( - BINARY_VECTORS_MMAP_NAME, - Config, - VECTORS_HNSW_NAME, - VECTORS_META_DB_NAME, -) -from codexlens.entities import SearchResult, Symbol -import codexlens.search.chain_search as chain_search_module -from codexlens.search.chain_search import ( - ChainSearchEngine, - ChainSearchResult, - SearchOptions, - SearchStats, -) -from codexlens.storage.global_index import GlobalSymbolIndex -from codexlens.storage.path_mapper import PathMapper -from codexlens.storage.registry import RegistryStore - - -@pytest.fixture() -def temp_paths(): - tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True) - root = Path(tmpdir.name) - yield root - try: - tmpdir.cleanup() - except (PermissionError, OSError): - pass - - -def test_symbol_filtering_handles_path_failures(monkeypatch: pytest.MonkeyPatch, caplog, temp_paths: Path) -> None: - project_root = temp_paths / "project" - (project_root / "src").mkdir(parents=True, exist_ok=True) - - index_root = temp_paths / "indexes" - mapper = PathMapper(index_root=index_root) - index_db_path = mapper.source_to_index_db(project_root) - index_db_path.parent.mkdir(parents=True, exist_ok=True) - index_db_path.write_text("", encoding="utf-8") # existence is enough for _find_start_index - - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - project_info = registry.register_project(project_root, mapper.source_to_index_dir(project_root)) - - global_db_path = project_info.index_root / GlobalSymbolIndex.DEFAULT_DB_NAME - global_index = GlobalSymbolIndex(global_db_path, project_id=project_info.id) - global_index.initialize() - - valid_file = project_root / "src" / "auth.py" - valid_sym = Symbol(name="AuthManager", kind="class", range=(1, 2), file=str(valid_file)) - bad_null = Symbol(name="BadNull", kind="class", range=(1, 2), file="bad\0path.py") - bad_relative = Symbol(name="BadRelative", kind="class", range=(1, 2), file="relative/path.py") - - candidates = [valid_sym, bad_null, bad_relative] - - if os.name == "nt": - root_drive, _ = os.path.splitdrive(str(project_root.resolve())) - other_drive = "C:" if root_drive.lower() != "c:" else "D:" - candidates.append( - Symbol(name="CrossDrive", kind="class", range=(1, 2), file=f"{other_drive}\\other\\file.py") - ) - - def fake_search(self, name: str, kind=None, limit: int = 20, prefix_mode: bool = False): - return candidates - - monkeypatch.setattr(GlobalSymbolIndex, "search", fake_search) - - config = Config(data_dir=temp_paths / "data", global_symbol_index_enabled=True) - engine = ChainSearchEngine(registry, mapper, config=config) - engine._search_symbols_parallel = MagicMock(side_effect=AssertionError("should not traverse chain")) - - caplog.set_level(logging.DEBUG, logger="codexlens.search.chain_search") - symbols = engine.search_symbols( - "Auth", - project_root, - options=SearchOptions(depth=5, total_limit=10), - ) - - assert [s.name for s in symbols] == ["AuthManager"] - assert "BadNull" in caplog.text - assert "BadRelative" in caplog.text - if os.name == "nt": - assert "CrossDrive" in caplog.text - - -def test_cascade_search_strategy_routing(temp_paths: Path) -> None: - """Test cascade_search() routes to correct strategy implementation.""" - from unittest.mock import patch - from codexlens.search.chain_search import ChainSearchResult, SearchStats - - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - mapper = PathMapper(index_root=temp_paths / "indexes") - config = Config(data_dir=temp_paths / "data") - - engine = ChainSearchEngine(registry, mapper, config=config) - source_path = temp_paths / "src" - - # Test strategy='staged' routing - with patch.object(engine, "staged_cascade_search") as mock_staged: - mock_staged.return_value = ChainSearchResult( - query="query", results=[], symbols=[], stats=SearchStats() - ) - engine.cascade_search("query", source_path, strategy="staged") - mock_staged.assert_called_once() - - # Test strategy='binary' routing - with patch.object(engine, "binary_cascade_search") as mock_binary: - mock_binary.return_value = ChainSearchResult( - query="query", results=[], symbols=[], stats=SearchStats() - ) - engine.cascade_search("query", source_path, strategy="binary") - mock_binary.assert_called_once() - - # Test strategy='binary_rerank' routing - with patch.object(engine, "binary_rerank_cascade_search") as mock_br: - mock_br.return_value = ChainSearchResult( - query="query", results=[], symbols=[], stats=SearchStats() - ) - engine.cascade_search("query", source_path, strategy="binary_rerank") - mock_br.assert_called_once() - - # Test strategy='dense_rerank' routing - with patch.object(engine, "dense_rerank_cascade_search") as mock_dr: - mock_dr.return_value = ChainSearchResult( - query="query", results=[], symbols=[], stats=SearchStats() - ) - engine.cascade_search("query", source_path, strategy="dense_rerank") - mock_dr.assert_called_once() - - # Test default routing (no strategy specified) - defaults to binary - with patch.object(engine, "binary_cascade_search") as mock_default: - mock_default.return_value = ChainSearchResult( - query="query", results=[], symbols=[], stats=SearchStats() - ) - engine.cascade_search("query", source_path) - mock_default.assert_called_once() - - -def test_cascade_search_invalid_strategy(temp_paths: Path) -> None: - """Test cascade_search() defaults to 'binary' for invalid strategy.""" - from unittest.mock import patch - from codexlens.search.chain_search import ChainSearchResult, SearchStats - - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - mapper = PathMapper(index_root=temp_paths / "indexes") - config = Config(data_dir=temp_paths / "data") - - engine = ChainSearchEngine(registry, mapper, config=config) - source_path = temp_paths / "src" - - # Invalid strategy should default to binary - with patch.object(engine, "binary_cascade_search") as mock_binary: - mock_binary.return_value = ChainSearchResult( - query="query", results=[], symbols=[], stats=SearchStats() - ) - engine.cascade_search("query", source_path, strategy="invalid_strategy") - mock_binary.assert_called_once() - - -def test_vector_warmup_uses_embedding_config(monkeypatch: pytest.MonkeyPatch, temp_paths: Path) -> None: - calls: list[dict[str, object]] = [] - - def fake_get_embedder(**kwargs: object) -> object: - calls.append(dict(kwargs)) - return object() - - import codexlens.semantic.factory as factory - - monkeypatch.setattr(factory, "get_embedder", fake_get_embedder) - - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - mapper = PathMapper(index_root=temp_paths / "indexes") - config = Config( - data_dir=temp_paths / "data", - embedding_backend="fastembed", - embedding_model="fast", - embedding_use_gpu=False, - ) - - engine = ChainSearchEngine(registry, mapper, config=config) - monkeypatch.setattr(engine, "_get_executor", lambda _workers: MagicMock()) - - engine._search_parallel([], "query", SearchOptions(enable_vector=True)) - - assert calls == [ - { - "backend": "fastembed", - "profile": "fast", - "use_gpu": False, - } - ] - - -def test_search_single_index_passes_config_to_hybrid_engine( - monkeypatch: pytest.MonkeyPatch, temp_paths: Path -) -> None: - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - mapper = PathMapper(index_root=temp_paths / "indexes") - config = Config(data_dir=temp_paths / "data", embedding_backend="fastembed", embedding_model="code") - - engine = ChainSearchEngine(registry, mapper, config=config) - index_path = temp_paths / "indexes" / "project" / "_index.db" - index_path.parent.mkdir(parents=True, exist_ok=True) - index_path.write_bytes(b"\x00" * 128) - - captured: dict[str, object] = {} - - class FakeHybridSearchEngine: - def __init__(self, *, weights=None, config=None): - captured["weights"] = weights - captured["config"] = config - - def search(self, *_args, **_kwargs): - return [SearchResult(path="src/app.py", score=0.9, excerpt="hit")] - - monkeypatch.setattr(chain_search_module, "HybridSearchEngine", FakeHybridSearchEngine) - - results = engine._search_single_index( - index_path, - "auth flow", - limit=5, - hybrid_mode=True, - enable_vector=True, - hybrid_weights={"vector": 1.0}, - ) - - assert captured["config"] is config - assert captured["weights"] == {"vector": 1.0} - assert len(results) == 1 - assert results[0].path == "src/app.py" - - -def test_search_parallel_reuses_shared_hybrid_engine( - monkeypatch: pytest.MonkeyPatch, - temp_paths: Path, -) -> None: - from concurrent.futures import Future - - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - mapper = PathMapper(index_root=temp_paths / "indexes") - config = Config(data_dir=temp_paths / "data") - - engine = ChainSearchEngine(registry, mapper, config=config) - index_root = temp_paths / "indexes" / "project" - index_a = index_root / "src" / "_index.db" - index_b = index_root / "tests" / "_index.db" - index_a.parent.mkdir(parents=True, exist_ok=True) - index_b.parent.mkdir(parents=True, exist_ok=True) - index_a.write_bytes(b"\x00" * 128) - index_b.write_bytes(b"\x00" * 128) - - created_engines: list[object] = [] - search_calls: list[tuple[object, Path]] = [] - - class FakeHybridSearchEngine: - def __init__(self, *, weights=None, config=None): - self.weights = weights - self.config = config - created_engines.append(self) - - def search(self, index_path, *_args, **_kwargs): - search_calls.append((self, index_path)) - return [SearchResult(path=str(index_path), score=0.9, excerpt="hit")] - - class ImmediateExecutor: - def submit(self, fn, *args): - future: Future = Future() - try: - future.set_result(fn(*args)) - except Exception as exc: - future.set_exception(exc) - return future - - monkeypatch.setattr(chain_search_module, "HybridSearchEngine", FakeHybridSearchEngine) - monkeypatch.setattr(engine, "_get_executor", lambda _workers: ImmediateExecutor()) - - results, stats = engine._search_parallel( - [index_a, index_b], - "auth flow", - SearchOptions( - hybrid_mode=True, - enable_vector=True, - limit_per_dir=5, - hybrid_weights={"vector": 1.0}, - ), - ) - - assert stats.errors == [] - assert len(created_engines) == 1 - assert [path for _, path in search_calls] == [index_a, index_b] - assert all(shared is created_engines[0] for shared, _ in search_calls) - assert len(results) == 2 - - -def test_search_injects_feature_query_anchors_into_merge( - monkeypatch: pytest.MonkeyPatch, - temp_paths: Path, -) -> None: - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - mapper = PathMapper(index_root=temp_paths / "indexes") - config = Config(data_dir=temp_paths / "data") - engine = ChainSearchEngine(registry, mapper, config=config) - - source_path = temp_paths / "project" - start_index = temp_paths / "indexes" / "project" / "_index.db" - start_index.parent.mkdir(parents=True, exist_ok=True) - start_index.write_text("", encoding="utf-8") - - feature_path = str(source_path / "src" / "tools" / "smart-search.ts") - platform_path = str(source_path / "src" / "utils" / "path-resolver.ts") - anchor_result = SearchResult( - path=feature_path, - score=8.0, - excerpt="smart search anchor", - metadata={"feature_query_hint": "smart search"}, - ) - - monkeypatch.setattr(engine, "_find_start_index", lambda _source_path: start_index) - monkeypatch.setattr( - engine, - "_collect_index_paths", - lambda _start_index, _options: [start_index], - ) - monkeypatch.setattr( - engine, - "_search_parallel", - lambda *_args, **_kwargs: ( - [ - SearchResult( - path=platform_path, - score=0.9, - excerpt="platform hit", - ) - ], - SearchStats(), - ), - ) - monkeypatch.setattr(engine, "_search_symbols_parallel", lambda *_args, **_kwargs: []) - collected_queries: list[str] = [] - monkeypatch.setattr( - engine, - "_collect_query_feature_anchor_results", - lambda query, *_args, **_kwargs: ( - collected_queries.append(query), - [anchor_result], - )[1], - ) - - result = engine.search( - "parse CodexLens JSON output strip ANSI smart_search", - source_path, - options=SearchOptions( - total_limit=5, - hybrid_mode=True, - enable_fuzzy=False, - enable_vector=True, - ), - ) - - assert collected_queries == ["parse CodexLens JSON output strip ANSI smart_search"] - result_by_path = {item.path: item for item in result.results} - assert feature_path in result_by_path - assert platform_path in result_by_path - assert result_by_path[feature_path].metadata["feature_query_anchor"] is True - assert result_by_path[feature_path].metadata["feature_query_hint"] == "smart search" - - -def test_group_index_paths_by_dense_root(temp_paths: Path) -> None: - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - mapper = PathMapper(index_root=temp_paths / "indexes") - engine = ChainSearchEngine(registry, mapper, config=Config(data_dir=temp_paths / "data")) - - dense_root_a = temp_paths / "indexes" / "project-a" - dense_root_b = temp_paths / "indexes" / "project-b" - orphan_root = temp_paths / "indexes" / "orphan" / "pkg" - - dense_root_a.mkdir(parents=True, exist_ok=True) - dense_root_b.mkdir(parents=True, exist_ok=True) - orphan_root.mkdir(parents=True, exist_ok=True) - (dense_root_a / VECTORS_HNSW_NAME).write_bytes(b"a") - (dense_root_b / VECTORS_HNSW_NAME).write_bytes(b"b") - - index_a = dense_root_a / "src" / "_index.db" - index_b = dense_root_b / "tests" / "_index.db" - orphan_index = orphan_root / "_index.db" - index_a.parent.mkdir(parents=True, exist_ok=True) - index_b.parent.mkdir(parents=True, exist_ok=True) - index_a.write_text("", encoding="utf-8") - index_b.write_text("", encoding="utf-8") - orphan_index.write_text("", encoding="utf-8") - - roots, ungrouped = engine._group_index_paths_by_dense_root( - [index_a, orphan_index, index_b] - ) - - assert roots == [dense_root_a, dense_root_b] - assert ungrouped == [orphan_index] - assert engine._find_nearest_dense_hnsw_root(index_a.parent) == dense_root_a - assert engine._find_nearest_dense_hnsw_root(orphan_index.parent) is None - - -def test_stage1_binary_search_merges_multiple_centralized_roots( - monkeypatch: pytest.MonkeyPatch, - temp_paths: Path, -) -> None: - import numpy as np - - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - mapper = PathMapper(index_root=temp_paths / "indexes") - config = Config(data_dir=temp_paths / "data", embedding_use_gpu=False) - engine = ChainSearchEngine(registry, mapper, config=config) - - root_a = temp_paths / "indexes" / "project-a" - root_b = temp_paths / "indexes" / "project-b" - for root in (root_a, root_b): - root.mkdir(parents=True, exist_ok=True) - (root / BINARY_VECTORS_MMAP_NAME).write_bytes(b"binary") - (root / VECTORS_META_DB_NAME).write_bytes(b"meta") - - index_a = root_a / "src" / "_index.db" - index_b = root_b / "src" / "_index.db" - index_a.parent.mkdir(parents=True, exist_ok=True) - index_b.parent.mkdir(parents=True, exist_ok=True) - index_a.write_text("", encoding="utf-8") - index_b.write_text("", encoding="utf-8") - - class FakeBinarySearcher: - def __init__(self, root: Path) -> None: - self.root = root - self.backend = "fastembed" - self.model = None - self.model_profile = "code" - - def search(self, _query_dense, top_k: int): - return [(1, 8)] if self.root == root_a else [(2, 16)] - - class FakeEmbedder: - def embed_to_numpy(self, _queries): - return np.ones((1, 4), dtype=np.float32) - - class FakeVectorMetadataStore: - def __init__(self, path: Path) -> None: - self.path = Path(path) - - def get_chunks_by_ids(self, chunk_ids): - return [ - { - "id": chunk_id, - "file_path": str(self.path.parent / f"file{chunk_id}.py"), - "content": f"chunk {chunk_id}", - "metadata": "{\"start_line\": 1, \"end_line\": 2}", - "category": "code", - } - for chunk_id in chunk_ids - ] - - import codexlens.semantic.embedder as embedder_module - from codexlens.search.chain_search import SearchStats - - monkeypatch.setattr( - engine, - "_get_centralized_binary_searcher", - lambda root: FakeBinarySearcher(root), - ) - monkeypatch.setattr(embedder_module, "get_embedder", lambda **_kwargs: FakeEmbedder()) - monkeypatch.setattr(chain_search_module, "VectorMetadataStore", FakeVectorMetadataStore) - - coarse_results, stage2_root = engine._stage1_binary_search( - "binary query", - [index_a, index_b], - coarse_k=5, - stats=SearchStats(), - index_root=index_a.parent, - ) - - assert stage2_root is None - assert len(coarse_results) == 2 - assert {Path(result.path).name for result in coarse_results} == {"file1.py", "file2.py"} - - -def test_stage1_binary_search_keeps_duplicate_chunk_ids_isolated_per_root( - monkeypatch: pytest.MonkeyPatch, - temp_paths: Path, -) -> None: - import numpy as np - - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - mapper = PathMapper(index_root=temp_paths / "indexes") - config = Config(data_dir=temp_paths / "data", embedding_use_gpu=False) - engine = ChainSearchEngine(registry, mapper, config=config) - - root_a = temp_paths / "indexes" / "project-a" - root_b = temp_paths / "indexes" / "project-b" - for root in (root_a, root_b): - root.mkdir(parents=True, exist_ok=True) - (root / BINARY_VECTORS_MMAP_NAME).write_bytes(b"binary") - (root / VECTORS_META_DB_NAME).write_bytes(b"meta") - - index_a = root_a / "src" / "_index.db" - index_b = root_b / "src" / "_index.db" - index_a.parent.mkdir(parents=True, exist_ok=True) - index_b.parent.mkdir(parents=True, exist_ok=True) - index_a.write_text("", encoding="utf-8") - index_b.write_text("", encoding="utf-8") - - class FakeBinarySearcher: - def __init__(self, root: Path) -> None: - self.root = root - self.backend = "fastembed" - self.model = None - self.model_profile = "code" - - def search(self, _query_dense, top_k: int): - return [(1, 8)] if self.root == root_a else [(1, 16)] - - class FakeEmbedder: - def embed_to_numpy(self, _queries): - return np.ones((1, 4), dtype=np.float32) - - class FakeVectorMetadataStore: - def __init__(self, path: Path) -> None: - self.path = Path(path) - - def get_chunks_by_ids(self, chunk_ids): - return [ - { - "id": chunk_id, - "file_path": str(self.path.parent / f"{self.path.parent.name}-file{chunk_id}.py"), - "content": f"chunk {self.path.parent.name}-{chunk_id}", - "metadata": "{\"start_line\": 1, \"end_line\": 2}", - "category": "code", - } - for chunk_id in chunk_ids - ] - - import codexlens.semantic.embedder as embedder_module - from codexlens.search.chain_search import SearchStats - - monkeypatch.setattr( - engine, - "_get_centralized_binary_searcher", - lambda root: FakeBinarySearcher(root), - ) - monkeypatch.setattr(embedder_module, "get_embedder", lambda **_kwargs: FakeEmbedder()) - monkeypatch.setattr(chain_search_module, "VectorMetadataStore", FakeVectorMetadataStore) - - coarse_results, stage2_root = engine._stage1_binary_search( - "binary query", - [index_a, index_b], - coarse_k=5, - stats=SearchStats(), - index_root=index_a.parent, - ) - - assert stage2_root is None - scores_by_name = {Path(result.path).name: result.score for result in coarse_results} - assert scores_by_name["project-a-file1.py"] == pytest.approx(1.0 - (8.0 / 256.0)) - assert scores_by_name["project-b-file1.py"] == pytest.approx(1.0 - (16.0 / 256.0)) - - - -def test_collect_index_paths_includes_nested_registered_project_roots( - temp_paths: Path, -) -> None: - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - mapper = PathMapper(index_root=temp_paths / "indexes") - engine = ChainSearchEngine(registry, mapper, config=Config(data_dir=temp_paths / "data")) - - workspace_root = temp_paths / "workspace" - child_root = workspace_root / "packages" / "child" - ignored_root = workspace_root / "dist" / "generated" - - workspace_index = mapper.source_to_index_db(workspace_root) - child_index = mapper.source_to_index_db(child_root) - ignored_index = mapper.source_to_index_db(ignored_root) - - for index_path in (workspace_index, child_index, ignored_index): - index_path.parent.mkdir(parents=True, exist_ok=True) - index_path.write_text("", encoding="utf-8") - - workspace_project = registry.register_project( - workspace_root, - mapper.source_to_index_dir(workspace_root), - ) - child_project = registry.register_project( - child_root, - mapper.source_to_index_dir(child_root), - ) - ignored_project = registry.register_project( - ignored_root, - mapper.source_to_index_dir(ignored_root), - ) - - registry.register_dir( - workspace_project.id, - workspace_root, - workspace_index, - depth=0, - ) - registry.register_dir( - child_project.id, - child_root, - child_index, - depth=0, - ) - registry.register_dir( - ignored_project.id, - ignored_root, - ignored_index, - depth=0, - ) - - collected = engine._collect_index_paths(workspace_index, depth=-1) - - assert collected == [workspace_index, child_index] - - -def test_collect_index_paths_respects_depth_for_nested_registered_project_roots( - temp_paths: Path, -) -> None: - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - mapper = PathMapper(index_root=temp_paths / "indexes") - engine = ChainSearchEngine(registry, mapper, config=Config(data_dir=temp_paths / "data")) - - workspace_root = temp_paths / "workspace" - direct_child_root = workspace_root / "apps" - deep_child_root = workspace_root / "packages" / "deep" / "child" - - workspace_index = mapper.source_to_index_db(workspace_root) - direct_child_index = mapper.source_to_index_db(direct_child_root) - deep_child_index = mapper.source_to_index_db(deep_child_root) - - for index_path in (workspace_index, direct_child_index, deep_child_index): - index_path.parent.mkdir(parents=True, exist_ok=True) - index_path.write_text("", encoding="utf-8") - - workspace_project = registry.register_project( - workspace_root, - mapper.source_to_index_dir(workspace_root), - ) - direct_child_project = registry.register_project( - direct_child_root, - mapper.source_to_index_dir(direct_child_root), - ) - deep_child_project = registry.register_project( - deep_child_root, - mapper.source_to_index_dir(deep_child_root), - ) - - registry.register_dir(workspace_project.id, workspace_root, workspace_index, depth=0) - registry.register_dir( - direct_child_project.id, - direct_child_root, - direct_child_index, - depth=0, - ) - registry.register_dir( - deep_child_project.id, - deep_child_root, - deep_child_index, - depth=0, - ) - - collected = engine._collect_index_paths(workspace_index, depth=1) - - assert collected == [workspace_index, direct_child_index] - - -def test_binary_rerank_cascade_search_merges_multiple_centralized_roots( - monkeypatch: pytest.MonkeyPatch, - temp_paths: Path, -) -> None: - import numpy as np - - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - mapper = PathMapper(index_root=temp_paths / "indexes") - config = Config(data_dir=temp_paths / "data", embedding_use_gpu=False) - engine = ChainSearchEngine(registry, mapper, config=config) - - root_a = temp_paths / "indexes" / "project-a" - root_b = temp_paths / "indexes" / "project-b" - for root in (root_a, root_b): - root.mkdir(parents=True, exist_ok=True) - (root / BINARY_VECTORS_MMAP_NAME).write_bytes(b"binary") - (root / VECTORS_META_DB_NAME).write_bytes(b"meta") - - index_a = root_a / "src" / "_index.db" - index_b = root_b / "src" / "_index.db" - index_a.parent.mkdir(parents=True, exist_ok=True) - index_b.parent.mkdir(parents=True, exist_ok=True) - index_a.write_text("", encoding="utf-8") - index_b.write_text("", encoding="utf-8") - - class FakeBinarySearcher: - def __init__(self, root: Path) -> None: - self.root = root - self.backend = "fastembed" - self.model = None - self.model_profile = "code" - - def search(self, _query_dense, top_k: int): - return [(1, 8)] if self.root == root_a else [(2, 16)] - - class FakeEmbedder: - def embed_to_numpy(self, _queries): - return np.ones((1, 4), dtype=np.float32) - - class FakeVectorMetadataStore: - def __init__(self, path: Path) -> None: - self.path = Path(path) - - def get_chunks_by_ids(self, chunk_ids): - return [ - { - "chunk_id": chunk_id, - "file_path": str(self.path.parent / f"file{chunk_id}.py"), - "content": f"chunk {chunk_id}", - "metadata": "{}", - "category": "code", - } - for chunk_id in chunk_ids - ] - - import codexlens.semantic.embedder as embedder_module - - monkeypatch.setattr(engine, "_find_start_index", lambda _source_path: index_a) - monkeypatch.setattr(engine, "_collect_index_paths", lambda _start_index, _depth: [index_a, index_b]) - monkeypatch.setattr( - engine, - "_get_centralized_binary_searcher", - lambda root: FakeBinarySearcher(root), - ) - monkeypatch.setattr(embedder_module, "get_embedder", lambda **_kwargs: FakeEmbedder()) - monkeypatch.setattr(chain_search_module, "VectorMetadataStore", FakeVectorMetadataStore) - monkeypatch.setattr(engine, "_cross_encoder_rerank", lambda _query, results, top_k: results[:top_k]) - monkeypatch.setattr(engine, "search", lambda *_args, **_kwargs: (_ for _ in ()).throw(AssertionError("unexpected fallback"))) - - result = engine.binary_rerank_cascade_search( - "binary query", - index_a.parent, - k=5, - coarse_k=5, - ) - - assert len(result.results) == 2 - assert {Path(item.path).name for item in result.results} == {"file1.py", "file2.py"} - - -def test_dense_rerank_cascade_search_overfetches_and_applies_path_penalties( - monkeypatch: pytest.MonkeyPatch, - temp_paths: Path, -) -> None: - import numpy as np - import codexlens.semantic.ann_index as ann_index_module - - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - mapper = PathMapper(index_root=temp_paths / "indexes") - config = Config( - data_dir=temp_paths / "data", - embedding_use_gpu=False, - reranker_top_k=3, - test_file_penalty=0.35, - generated_file_penalty=0.35, - ) - engine = ChainSearchEngine(registry, mapper, config=config) - - dense_root = temp_paths / "indexes" / "project" - dense_root.mkdir(parents=True, exist_ok=True) - (dense_root / VECTORS_HNSW_NAME).write_bytes(b"hnsw") - - meta_db_path = dense_root / VECTORS_META_DB_NAME - conn = sqlite3.connect(meta_db_path) - conn.execute( - """ - CREATE TABLE chunk_metadata ( - chunk_id INTEGER PRIMARY KEY, - file_path TEXT NOT NULL, - content TEXT NOT NULL, - start_line INTEGER, - end_line INTEGER - ) - """ - ) - conn.executemany( - """ - INSERT INTO chunk_metadata (chunk_id, file_path, content, start_line, end_line) - VALUES (?, ?, ?, ?, ?) - """, - [ - ( - 1, - "project/tests/test_auth.py", - "def test_auth_flow():\n pass", - 1, - 2, - ), - ( - 2, - "project/src/auth.py", - "def auth_flow():\n return True", - 1, - 2, - ), - ( - 3, - "project/dist/bundle.js", - "function authFlow(){return true;}", - 1, - 1, - ), - ], - ) - conn.commit() - conn.close() - - index_path = dense_root / "src" / "_index.db" - index_path.parent.mkdir(parents=True, exist_ok=True) - index_path.write_text("", encoding="utf-8") - - class FakeANNIndex: - def __init__(self, root: Path, dim: int) -> None: - self.root = root - self.dim = dim - - @classmethod - def create_central(cls, *, index_root: Path, dim: int): - return cls(index_root, dim) - - def load(self) -> bool: - return True - - def count(self) -> int: - return 3 - - def search(self, _query_dense, top_k: int): - ids = [1, 2, 3][:top_k] - distances = [0.01, 0.02, 0.03][:top_k] - return ids, distances - - rerank_calls: list[int] = [] - - def fake_cross_encoder(_query: str, results: list[SearchResult], top_k: int): - rerank_calls.append(top_k) - return results[:top_k] - - monkeypatch.setattr(engine, "_find_start_index", lambda _source_path: index_path) - monkeypatch.setattr(engine, "_collect_index_paths", lambda _start_index, _depth: [index_path]) - monkeypatch.setattr(engine, "_embed_dense_query", lambda *_args, **_kwargs: np.ones(4, dtype=np.float32)) - monkeypatch.setattr(engine, "_cross_encoder_rerank", fake_cross_encoder) - monkeypatch.setattr( - engine, - "search", - lambda *_args, **_kwargs: (_ for _ in ()).throw(AssertionError("unexpected fallback")), - ) - monkeypatch.setattr(ann_index_module, "ANNIndex", FakeANNIndex) - - result = engine.dense_rerank_cascade_search( - "auth", - index_path.parent, - k=1, - coarse_k=3, - ) - - assert rerank_calls == [3] - assert len(result.results) == 1 - assert result.results[0].path.endswith("src\\auth.py") or result.results[0].path.endswith("src/auth.py") - assert result.results[0].metadata == {} - - -def test_collect_query_feature_anchor_results_uses_explicit_file_hints( - monkeypatch: pytest.MonkeyPatch, - temp_paths: Path, -) -> None: - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - mapper = PathMapper(index_root=temp_paths / "indexes") - config = Config(data_dir=temp_paths / "data", embedding_use_gpu=False) - engine = ChainSearchEngine(registry, mapper, config=config) - - recorded_queries: list[str] = [] - - def fake_search(query: str, _source_path: Path, options: SearchOptions | None = None): - recorded_queries.append(query) - return ChainSearchResult( - query=query, - results=[ - SearchResult( - path="/repo/src/tools/smart-search.ts", - score=8.7, - excerpt="smart search path anchor", - ), - SearchResult( - path="/repo/src/tools/codex-lens-lsp.ts", - score=7.4, - excerpt="platform term overlap", - ), - ], - symbols=[], - stats=SearchStats(), - ) - - monkeypatch.setattr(engine, "search", fake_search) - - anchors = engine._collect_query_feature_anchor_results( - "parse CodexLens JSON output strip ANSI smart_search", - temp_paths, - SearchOptions(), - limit=4, - ) - - assert recorded_queries == ["smart search"] - assert [Path(result.path).name for result in anchors] == ["smart-search.ts"] - assert anchors[0].metadata["feature_query_anchor"] is True - assert anchors[0].metadata["feature_query_hint_tokens"] == ["smart", "search"] - - -def test_collect_query_feature_anchor_results_falls_back_to_full_lexical_query( - monkeypatch: pytest.MonkeyPatch, - temp_paths: Path, -) -> None: - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - mapper = PathMapper(index_root=temp_paths / "indexes") - config = Config(data_dir=temp_paths / "data", embedding_use_gpu=False) - engine = ChainSearchEngine(registry, mapper, config=config) - - recorded_calls: list[tuple[str, bool]] = [] - full_query = "EMBEDDING_BACKEND and RERANKER_BACKEND environment variables" - - def fake_search(query: str, _source_path: Path, options: SearchOptions | None = None): - recorded_calls.append((query, bool(options.inject_feature_anchors) if options else True)) - if query == full_query: - return ChainSearchResult( - query=query, - results=[ - SearchResult( - path="/repo/src/codexlens/env_config.py", - score=8.5, - excerpt="ENV vars", - ), - SearchResult( - path="/repo/src/codexlens/config.py", - score=8.1, - excerpt="backend config", - ), - ], - symbols=[], - stats=SearchStats(), - ) - - return ChainSearchResult( - query=query, - results=[ - SearchResult( - path="/repo/src/codexlens/env_config.py", - score=7.0, - excerpt="hint candidate", - ) - ], - symbols=[], - stats=SearchStats(), - ) - - monkeypatch.setattr(engine, "search", fake_search) - - anchors = engine._collect_query_feature_anchor_results( - full_query, - temp_paths, - SearchOptions(), - limit=2, - ) - - assert recorded_calls == [ - ("embedding backend", False), - ("reranker backend", False), - (full_query, False), - ] - assert [Path(result.path).name for result in anchors] == ["env_config.py", "config.py"] - assert anchors[0].metadata["feature_query_seed_kind"] == "lexical_query" - assert anchors[0].metadata["feature_query_hint"] == full_query - - -def test_stage3_cluster_prune_preserves_feature_query_anchors(temp_paths: Path) -> None: - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - mapper = PathMapper(index_root=temp_paths / "indexes") - config = Config(data_dir=temp_paths / "data", embedding_use_gpu=False) - config.staged_clustering_strategy = "score" - engine = ChainSearchEngine(registry, mapper, config=config) - - anchor = SearchResult( - path="/repo/src/tools/smart-search.ts", - score=0.02, - excerpt="parse JSON output and strip ANSI", - metadata={ - "feature_query_anchor": True, - "feature_query_hint": "smart search", - "feature_query_hint_tokens": ["smart", "search"], - }, - ) - others = [ - SearchResult( - path=f"/repo/src/feature-{index}.ts", - score=0.9 - (0.05 * index), - excerpt="generic feature implementation", - ) - for index in range(6) - ] - - clustered = engine._stage3_cluster_prune( - [anchor, *others], - target_count=4, - query="parse CodexLens JSON output strip ANSI smart_search", - ) - - assert len(clustered) == 4 - assert any(Path(result.path).name == "smart-search.ts" for result in clustered) - - -def test_dense_rerank_cascade_search_interleaves_mixed_embedding_groups( - monkeypatch: pytest.MonkeyPatch, - temp_paths: Path, -) -> None: - import numpy as np - import codexlens.semantic.ann_index as ann_index_module - - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - mapper = PathMapper(index_root=temp_paths / "indexes") - config = Config(data_dir=temp_paths / "data", embedding_use_gpu=False) - engine = ChainSearchEngine(registry, mapper, config=config) - - root_a = temp_paths / "indexes" / "project-a" - root_b = temp_paths / "indexes" / "project-b" - for root in (root_a, root_b): - root.mkdir(parents=True, exist_ok=True) - (root / VECTORS_HNSW_NAME).write_bytes(b"hnsw") - - for meta_db_path, rows in ( - ( - root_a / VECTORS_META_DB_NAME, - [ - (1, str(root_a / "src" / "a.py"), "def a():\n return 1", 1, 2), - (3, str(root_a / "src" / "a2.py"), "def a2():\n return 2", 1, 2), - ], - ), - ( - root_b / VECTORS_META_DB_NAME, - [ - (2, str(root_b / "src" / "b.py"), "def b():\n return 3", 1, 2), - ], - ), - ): - conn = sqlite3.connect(meta_db_path) - conn.execute( - """ - CREATE TABLE chunk_metadata ( - chunk_id INTEGER PRIMARY KEY, - file_path TEXT NOT NULL, - content TEXT NOT NULL, - start_line INTEGER, - end_line INTEGER - ) - """ - ) - conn.executemany( - """ - INSERT INTO chunk_metadata (chunk_id, file_path, content, start_line, end_line) - VALUES (?, ?, ?, ?, ?) - """, - rows, - ) - conn.commit() - conn.close() - - index_a = root_a / "src" / "_index.db" - index_b = root_b / "src" / "_index.db" - index_a.parent.mkdir(parents=True, exist_ok=True) - index_b.parent.mkdir(parents=True, exist_ok=True) - index_a.write_text("", encoding="utf-8") - index_b.write_text("", encoding="utf-8") - - class FakeANNIndex: - def __init__(self, index_path: Path, dim: int) -> None: - source = Path(index_path) - self.root = source if source.name != "_index.db" else source.parent - self.dim = dim - - @classmethod - def create_central(cls, *, index_root: Path, dim: int): - return cls(index_root, dim) - - def load(self) -> bool: - return True - - def count(self) -> int: - return 2 if self.root == root_a else 1 - - def search(self, _query_dense, top_k: int): - if self.root == root_a: - return [1, 3][:top_k], [0.01, 0.011][:top_k] - return [2][:top_k], [0.02][:top_k] - - monkeypatch.setattr(engine, "_find_start_index", lambda _source_path: index_a) - monkeypatch.setattr(engine, "_collect_index_paths", lambda _start_index, _depth: [index_a, index_b]) - monkeypatch.setattr( - engine, - "_resolve_dense_embedding_settings", - lambda *, index_root: ( - ("fastembed", "code", False) - if Path(index_root) == root_a - else ("litellm", "qwen3-embedding-sf", False) - ), - ) - monkeypatch.setattr( - engine, - "_embed_dense_query", - lambda _query, *, index_root=None, query_cache=None: ( - np.ones(4, dtype=np.float32) - if Path(index_root) == root_a - else np.ones(8, dtype=np.float32) - ), - ) - monkeypatch.setattr(engine, "_cross_encoder_rerank", lambda _query, results, top_k: results[:top_k]) - monkeypatch.setattr( - engine, - "search", - lambda *_args, **_kwargs: (_ for _ in ()).throw(AssertionError("unexpected fallback")), - ) - monkeypatch.setattr(ann_index_module, "ANNIndex", FakeANNIndex) - - result = engine.dense_rerank_cascade_search( - "route query", - index_a.parent, - k=2, - coarse_k=2, - ) - - assert [Path(item.path).name for item in result.results] == ["a.py", "b.py"] - - -def test_dense_rerank_cascade_search_reuses_cached_dense_indexes( - monkeypatch: pytest.MonkeyPatch, - temp_paths: Path, -) -> None: - import numpy as np - import codexlens.semantic.ann_index as ann_index_module - - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - mapper = PathMapper(index_root=temp_paths / "indexes") - config = Config(data_dir=temp_paths / "data", embedding_use_gpu=False) - engine = ChainSearchEngine(registry, mapper, config=config) - - dense_root = temp_paths / "indexes" / "project" - dense_root.mkdir(parents=True, exist_ok=True) - (dense_root / VECTORS_HNSW_NAME).write_bytes(b"hnsw") - - meta_db_path = dense_root / VECTORS_META_DB_NAME - conn = sqlite3.connect(meta_db_path) - conn.execute( - """ - CREATE TABLE chunk_metadata ( - chunk_id INTEGER PRIMARY KEY, - file_path TEXT NOT NULL, - content TEXT NOT NULL, - start_line INTEGER, - end_line INTEGER - ) - """ - ) - conn.execute( - "INSERT INTO chunk_metadata (chunk_id, file_path, content, start_line, end_line) VALUES (?, ?, ?, ?, ?)", - (1, str((temp_paths / "src" / "impl.py").resolve()), "def impl():\n return 1", 1, 2), - ) - conn.commit() - conn.close() - - index_path = dense_root / "src" / "_index.db" - index_path.parent.mkdir(parents=True, exist_ok=True) - index_path.write_text("", encoding="utf-8") - - create_calls: list[tuple[Path, int]] = [] - - class FakeANNIndex: - def __init__(self, root: Path, dim: int) -> None: - self.root = root - self.dim = dim - - @classmethod - def create_central(cls, *, index_root: Path, dim: int): - create_calls.append((Path(index_root), int(dim))) - return cls(index_root, dim) - - def load(self) -> bool: - return True - - def count(self) -> int: - return 1 - - def search(self, _query_dense, top_k: int): - return [1][:top_k], [0.01][:top_k] - - monkeypatch.setattr(engine, "_find_start_index", lambda _source_path: index_path) - monkeypatch.setattr(engine, "_collect_index_paths", lambda _start_index, _depth: [index_path]) - monkeypatch.setattr(engine, "_embed_dense_query", lambda *_args, **_kwargs: np.ones(4, dtype=np.float32)) - monkeypatch.setattr(engine, "_cross_encoder_rerank", lambda _query, results, top_k: results[:top_k]) - monkeypatch.setattr( - engine, - "search", - lambda *_args, **_kwargs: (_ for _ in ()).throw(AssertionError("unexpected fallback")), - ) - monkeypatch.setattr(ann_index_module, "ANNIndex", FakeANNIndex) - - first = engine.dense_rerank_cascade_search("route query", index_path.parent, k=1, coarse_k=1) - second = engine.dense_rerank_cascade_search("route query", index_path.parent, k=1, coarse_k=1) - - assert len(first.results) == 1 - assert len(second.results) == 1 - assert create_calls == [(dense_root, 4)] - - -def test_dense_rerank_cascade_search_short_circuits_lexical_priority_queries( - monkeypatch: pytest.MonkeyPatch, - temp_paths: Path, -) -> None: - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - mapper = PathMapper(index_root=temp_paths / "indexes") - config = Config(data_dir=temp_paths / "data") - engine = ChainSearchEngine(registry, mapper, config=config) - - expected = ChainSearchResult( - query="embedding backend fastembed local litellm api config", - results=[SearchResult(path="src/config.py", score=0.9, excerpt="embedding_backend = ...")], - symbols=[], - stats=SearchStats(dirs_searched=3, files_matched=1, time_ms=12.5), - ) - search_calls: list[tuple[str, Path, SearchOptions | None]] = [] - - def fake_search(query: str, source_path: Path, options: SearchOptions | None = None): - search_calls.append((query, source_path, options)) - return expected - - monkeypatch.setattr(engine, "search", fake_search) - monkeypatch.setattr( - engine, - "_find_start_index", - lambda *_args, **_kwargs: (_ for _ in ()).throw(AssertionError("dense path should not run")), - ) - monkeypatch.setattr( - engine, - "_embed_dense_query", - lambda *_args, **_kwargs: (_ for _ in ()).throw(AssertionError("dense query should not run")), - ) - monkeypatch.setattr( - engine, - "_cross_encoder_rerank", - lambda *_args, **_kwargs: (_ for _ in ()).throw(AssertionError("rerank should not run")), - ) - - options = SearchOptions( - depth=2, - max_workers=3, - limit_per_dir=4, - total_limit=7, - include_symbols=True, - files_only=False, - code_only=True, - exclude_extensions=["md"], - inject_feature_anchors=False, - ) - - result = engine.dense_rerank_cascade_search( - "embedding backend fastembed local litellm api config", - temp_paths / "workspace", - k=5, - coarse_k=50, - options=options, - ) - - assert result is not expected - assert result.results == expected.results - assert result.related_results == expected.related_results - assert result.symbols == [] - assert result.stats == expected.stats - assert len(search_calls) == 1 - called_query, called_source_path, lexical_options = search_calls[0] - assert called_query == "embedding backend fastembed local litellm api config" - assert called_source_path == temp_paths / "workspace" - assert lexical_options is not None - assert lexical_options.depth == 2 - assert lexical_options.max_workers == 3 - assert lexical_options.limit_per_dir == 10 - assert lexical_options.total_limit == 20 - assert lexical_options.include_symbols is False - assert lexical_options.enable_vector is False - assert lexical_options.hybrid_mode is False - assert lexical_options.enable_cascade is False - assert lexical_options.code_only is True - assert lexical_options.exclude_extensions == ["md"] - assert lexical_options.inject_feature_anchors is False - - -def test_cross_encoder_rerank_reuses_cached_reranker_instance( - monkeypatch: pytest.MonkeyPatch, - temp_paths: Path, -) -> None: - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - mapper = PathMapper(index_root=temp_paths / "indexes") - config = Config( - data_dir=temp_paths / "data", - enable_cross_encoder_rerank=True, - reranker_backend="onnx", - reranker_use_gpu=False, - ) - engine = ChainSearchEngine(registry, mapper, config=config) - - calls: dict[str, object] = {"check": [], "get": []} - - class DummyReranker: - def score_pairs(self, pairs, batch_size=32): - _ = batch_size - return [1.0 for _ in pairs] - - def fake_check_reranker_available(backend: str): - calls["check"].append(backend) - return True, None - - def fake_get_reranker(*, backend: str, model_name=None, device=None, **kwargs): - calls["get"].append( - { - "backend": backend, - "model_name": model_name, - "device": device, - "kwargs": kwargs, - } - ) - return DummyReranker() - - monkeypatch.setattr( - "codexlens.semantic.reranker.check_reranker_available", - fake_check_reranker_available, - ) - monkeypatch.setattr( - "codexlens.semantic.reranker.get_reranker", - fake_get_reranker, - ) - - results = [ - SearchResult(path=str((temp_paths / f"file_{idx}.py").resolve()), score=1.0 / (idx + 1), excerpt=f"def fn_{idx}(): pass") - for idx in range(3) - ] - - first = engine._cross_encoder_rerank("find function", results, top_k=2) - second = engine._cross_encoder_rerank("find function", results, top_k=2) - - assert len(first) == len(second) == len(results) - assert calls["check"] == ["onnx"] - assert len(calls["get"]) == 1 - get_call = calls["get"][0] - assert isinstance(get_call, dict) - assert get_call["backend"] == "onnx" - assert get_call["kwargs"]["use_gpu"] is False - - -def test_collect_binary_coarse_candidates_interleaves_mixed_dense_fallback_groups( - monkeypatch: pytest.MonkeyPatch, - temp_paths: Path, -) -> None: - import numpy as np - import codexlens.semantic.ann_index as ann_index_module - - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - mapper = PathMapper(index_root=temp_paths / "indexes") - config = Config(data_dir=temp_paths / "data", embedding_use_gpu=False) - engine = ChainSearchEngine(registry, mapper, config=config) - - root_a = temp_paths / "indexes" / "project-a" - root_b = temp_paths / "indexes" / "project-b" - for root in (root_a, root_b): - root.mkdir(parents=True, exist_ok=True) - (root / VECTORS_HNSW_NAME).write_bytes(b"hnsw") - - index_a = root_a / "src" / "_index.db" - index_b = root_b / "src" / "_index.db" - index_a.parent.mkdir(parents=True, exist_ok=True) - index_b.parent.mkdir(parents=True, exist_ok=True) - index_a.write_text("", encoding="utf-8") - index_b.write_text("", encoding="utf-8") - - class FakeANNIndex: - def __init__(self, index_path: Path, dim: int) -> None: - source = Path(index_path) - self.root = source if source.name != "_index.db" else source.parent - self.dim = dim - - @classmethod - def create_central(cls, *, index_root: Path, dim: int): - return cls(index_root, dim) - - def load(self) -> bool: - return True - - def count(self) -> int: - return 2 if self.root == root_a else 1 - - def search(self, _query_dense, top_k: int): - if self.root == root_a: - return [1, 3][:top_k], [0.01, 0.011][:top_k] - return [2][:top_k], [0.02][:top_k] - - monkeypatch.setattr( - engine, - "_resolve_dense_embedding_settings", - lambda *, index_root: ( - ("fastembed", "code", False) - if Path(index_root) == root_a - else ("litellm", "qwen3-embedding-sf", False) - ), - ) - monkeypatch.setattr( - engine, - "_embed_dense_query", - lambda _query, *, index_root=None, query_cache=None: ( - np.ones(4, dtype=np.float32) - if Path(index_root) == root_a - else np.ones(8, dtype=np.float32) - ), - ) - monkeypatch.setattr(ann_index_module, "ANNIndex", FakeANNIndex) - - coarse_candidates, used_centralized, using_dense_fallback, stage2_index_root = ( - engine._collect_binary_coarse_candidates( - "route query", - [index_a, index_b], - coarse_k=2, - stats=SearchStats(), - index_root=index_a.parent, - allow_dense_fallback=True, - ) - ) - - assert used_centralized is False - assert using_dense_fallback is True - assert stage2_index_root is None - assert coarse_candidates == [ - (1, 0.01, root_a), - (2, 0.02, root_b), - ] - - -def test_cross_encoder_rerank_deduplicates_duplicate_paths_before_reranking( - monkeypatch: pytest.MonkeyPatch, - temp_paths: Path, -) -> None: - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - mapper = PathMapper(index_root=temp_paths / "indexes") - config = Config(data_dir=temp_paths / "data", embedding_use_gpu=False) - engine = ChainSearchEngine(registry, mapper, config=config) - - captured: dict[str, object] = {} - - monkeypatch.setattr( - "codexlens.semantic.reranker.check_reranker_available", - lambda _backend: (True, None), - ) - monkeypatch.setattr( - "codexlens.semantic.reranker.get_reranker", - lambda **_kwargs: object(), - ) - - def fake_cross_encoder_rerank( - *, - query: str, - results: list[SearchResult], - reranker, - top_k: int = 50, - batch_size: int = 32, - chunk_type_weights=None, - test_file_penalty: float = 0.0, - ) -> list[SearchResult]: - captured["query"] = query - captured["paths"] = [item.path for item in results] - captured["scores"] = [float(item.score) for item in results] - captured["top_k"] = top_k - captured["batch_size"] = batch_size - captured["chunk_type_weights"] = chunk_type_weights - captured["test_file_penalty"] = test_file_penalty - _ = reranker - return results[:top_k] - - monkeypatch.setattr( - "codexlens.search.ranking.cross_encoder_rerank", - fake_cross_encoder_rerank, - ) - - reranked = engine._cross_encoder_rerank( - "semantic auth query", - [ - SearchResult(path="/repo/src/router.py", score=0.91, excerpt="chunk 1"), - SearchResult(path="/repo/src/router.py", score=0.42, excerpt="chunk 2"), - SearchResult(path="/repo/src/config.py", score=0.73, excerpt="chunk 3"), - ], - top_k=5, - ) - - assert captured["query"] == "semantic auth query" - assert captured["paths"] == ["/repo/src/router.py", "/repo/src/config.py"] - assert captured["scores"] == pytest.approx([0.91, 0.73]) - assert captured["top_k"] == 5 - assert len(reranked) == 2 - - -def test_binary_cascade_search_merges_multiple_centralized_roots( - monkeypatch: pytest.MonkeyPatch, - temp_paths: Path, -) -> None: - import sqlite3 - import numpy as np - - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - mapper = PathMapper(index_root=temp_paths / "indexes") - config = Config(data_dir=temp_paths / "data", embedding_use_gpu=False) - engine = ChainSearchEngine(registry, mapper, config=config) - - root_a = temp_paths / "indexes" / "project-a" - root_b = temp_paths / "indexes" / "project-b" - source_db_a = root_a / "source-a.db" - source_db_b = root_b / "source-b.db" - - for root, source_db, chunk_id in ((root_a, source_db_a, 1), (root_b, source_db_b, 2)): - root.mkdir(parents=True, exist_ok=True) - (root / BINARY_VECTORS_MMAP_NAME).write_bytes(b"binary") - (root / VECTORS_META_DB_NAME).write_bytes(b"meta") - conn = sqlite3.connect(source_db) - conn.execute("CREATE TABLE semantic_chunks (id INTEGER PRIMARY KEY, embedding_dense BLOB)") - conn.execute( - "INSERT INTO semantic_chunks (id, embedding_dense) VALUES (?, ?)", - (chunk_id, np.ones(4, dtype=np.float32).tobytes()), - ) - conn.commit() - conn.close() - - index_a = root_a / "src" / "_index.db" - index_b = root_b / "src" / "_index.db" - index_a.parent.mkdir(parents=True, exist_ok=True) - index_b.parent.mkdir(parents=True, exist_ok=True) - index_a.write_text("", encoding="utf-8") - index_b.write_text("", encoding="utf-8") - - class FakeBinarySearcher: - def __init__(self, root: Path) -> None: - self.root = root - self.backend = "fastembed" - self.model = None - self.model_profile = "code" - - def search(self, _query_dense, top_k: int): - return [(1, 8)] if self.root == root_a else [(2, 16)] - - class FakeEmbedder: - def embed_to_numpy(self, _queries): - return np.ones((1, 4), dtype=np.float32) - - class FakeVectorMetadataStore: - def __init__(self, path: Path) -> None: - self.path = Path(path) - - def get_chunks_by_ids(self, chunk_ids): - source_db = source_db_a if self.path.parent == root_a else source_db_b - return [ - { - "chunk_id": chunk_id, - "file_path": str(self.path.parent / f"file{chunk_id}.py"), - "content": f"chunk {chunk_id}", - "source_index_db": str(source_db), - } - for chunk_id in chunk_ids - ] - - import codexlens.semantic.embedder as embedder_module - - monkeypatch.setattr(engine, "_find_start_index", lambda _source_path: index_a) - monkeypatch.setattr(engine, "_collect_index_paths", lambda _start_index, _depth: [index_a, index_b]) - monkeypatch.setattr( - engine, - "_get_centralized_binary_searcher", - lambda root: FakeBinarySearcher(root), - ) - monkeypatch.setattr(embedder_module, "get_embedder", lambda **_kwargs: FakeEmbedder()) - monkeypatch.setattr(chain_search_module, "VectorMetadataStore", FakeVectorMetadataStore) - monkeypatch.setattr( - engine, - "_embed_dense_query", - lambda _query, *, index_root=None, query_cache=None: np.ones(4, dtype=np.float32), - ) - monkeypatch.setattr(engine, "search", lambda *_args, **_kwargs: (_ for _ in ()).throw(AssertionError("unexpected fallback"))) - - result = engine.binary_cascade_search( - "binary query", - index_a.parent, - k=5, - coarse_k=5, - ) - - assert len(result.results) == 2 - assert {Path(item.path).name for item in result.results} == {"file1.py", "file2.py"} diff --git a/codex-lens/tests/test_cli_help.py b/codex-lens/tests/test_cli_help.py deleted file mode 100644 index dd51f64f..00000000 --- a/codex-lens/tests/test_cli_help.py +++ /dev/null @@ -1,61 +0,0 @@ -"""Smoke tests for CodexLens CLI help output. - -These tests ensure that help text generation does not crash at import time -or during Click/Typer option parsing. -""" - -from __future__ import annotations - -import os -import subprocess -import sys -from pathlib import Path - -from typer.testing import CliRunner - - -def _subprocess_env() -> dict[str, str]: - env = os.environ.copy() - codex_lens_root = Path(__file__).resolve().parents[1] - src_dir = codex_lens_root / "src" - existing = env.get("PYTHONPATH", "") - env["PYTHONPATH"] = str(src_dir) + (os.pathsep + existing if existing else "") - return env - - -def test_python_module_help_does_not_crash() -> None: - proc = subprocess.run( - [sys.executable, "-m", "codexlens", "--help"], - capture_output=True, - text=True, - encoding="utf-8", - errors="replace", - env=_subprocess_env(), - ) - assert proc.returncode == 0, proc.stderr - assert "Traceback" not in (proc.stderr or "") - - -def test_typer_app_help_does_not_crash() -> None: - from codexlens.cli.commands import app - - runner = CliRunner() - result = runner.invoke(app, ["--help"]) - assert result.exit_code == 0, result.output - - -def test_extract_embedding_error_uses_details() -> None: - from codexlens.cli.commands import _extract_embedding_error - - embed_result = { - "success": False, - "result": { - "details": [ - {"index_path": "/tmp/a/_index.db", "success": False, "error": "Backend timeout"}, - {"index_path": "/tmp/b/_index.db", "success": False, "error": "Rate limit"}, - ] - }, - } - msg = _extract_embedding_error(embed_result) - assert "Unknown error" not in msg - assert "Backend timeout" in msg diff --git a/codex-lens/tests/test_cli_hybrid_search.py b/codex-lens/tests/test_cli_hybrid_search.py deleted file mode 100644 index 04e5dc78..00000000 --- a/codex-lens/tests/test_cli_hybrid_search.py +++ /dev/null @@ -1,122 +0,0 @@ -"""Tests for CLI hybrid search integration (T6).""" - -import pytest -from typer.testing import CliRunner -from codexlens.cli.commands import app - - -class TestCLIHybridSearch: - """Test CLI integration for hybrid search modes.""" - - @pytest.fixture - def runner(self): - """Create CLI test runner.""" - return CliRunner() - - def test_search_mode_parameter_validation(self, runner): - """Test --mode parameter accepts valid modes and rejects invalid ones.""" - # Valid modes should pass validation (even if no index exists) - valid_modes = ["exact", "fuzzy", "hybrid", "vector"] - for mode in valid_modes: - result = runner.invoke(app, ["search", "test", "--mode", mode]) - # Should fail due to no index, not due to invalid mode - # Note: CLI now shows deprecation warning for --mode, use --method instead - assert "Invalid" not in result.output or "deprecated" in result.output.lower() - - # Invalid mode should fail - result = runner.invoke(app, ["search", "test", "--mode", "invalid"]) - assert result.exit_code == 1 - # CLI now shows "Invalid deprecated mode:" instead of "Invalid mode" - assert "Invalid" in result.output and "mode" in result.output.lower() - - def test_weights_parameter_parsing(self, runner): - """Test --weights parameter parses and validates correctly.""" - # Valid weights (3 values summing to ~1.0) - result = runner.invoke( - app, ["search", "test", "--mode", "hybrid", "--weights", "0.5,0.3,0.2"] - ) - # Should not show weight warning - assert "Invalid weights" not in result.output - - # Invalid weights (wrong number of values) - result = runner.invoke( - app, ["search", "test", "--mode", "hybrid", "--weights", "0.5,0.5"] - ) - assert "Invalid weights format" in result.output - - # Invalid weights (non-numeric) - result = runner.invoke( - app, ["search", "test", "--mode", "hybrid", "--weights", "a,b,c"] - ) - assert "Invalid weights format" in result.output - - def test_weights_normalization(self, runner): - """Test weights are normalized when they don't sum to 1.0.""" - # Weights summing to 2.0 should trigger normalization warning - result = runner.invoke( - app, ["search", "test", "--mode", "hybrid", "--weights", "0.8,0.6,0.6"] - ) - # Should show normalization warning - if "Normalizing" in result.output or "Warning" in result.output: - # Expected behavior - pass - - def test_search_help_shows_modes(self, runner): - """Test search --help displays all available methods.""" - result = runner.invoke(app, ["search", "--help"]) - assert result.exit_code == 0 - # CLI now uses --method with: dense_rerank, fts, hybrid, cascade - assert "dense_rerank" in result.output or "fts" in result.output - assert "method" in result.output.lower() - - def test_migrate_command_exists(self, runner): - """Test migrate command is registered and accessible.""" - result = runner.invoke(app, ["migrate", "--help"]) - assert result.exit_code == 0 - assert "Dual-FTS upgrade" in result.output - assert "schema version 4" in result.output - - def test_status_command_shows_backends(self, runner): - """Test status command displays search backend availability.""" - result = runner.invoke(app, ["status"]) - # Should show backend status (even if no indexes) - assert "Search Backends" in result.output or result.exit_code == 0 - - -class TestSearchModeMapping: - """Test mode parameter maps correctly to SearchOptions.""" - - @pytest.fixture - def runner(self): - """Create CLI test runner.""" - return CliRunner() - - def test_exact_mode_disables_fuzzy(self, runner): - """Test --mode exact disables fuzzy search.""" - # This would require mocking, but we can verify the parameter is accepted - result = runner.invoke(app, ["search", "test", "--mode", "exact"]) - # Should not show mode validation error - assert "Invalid mode" not in result.output - - def test_fuzzy_mode_enables_only_fuzzy(self, runner): - """Test --mode fuzzy enables fuzzy search only.""" - result = runner.invoke(app, ["search", "test", "--mode", "fuzzy"]) - assert "Invalid mode" not in result.output - - def test_hybrid_mode_enables_both(self, runner): - """Test --mode hybrid enables both exact and fuzzy.""" - result = runner.invoke(app, ["search", "test", "--mode", "hybrid"]) - assert "Invalid mode" not in result.output - - def test_vector_mode_accepted(self, runner): - """Test --mode vector is accepted (future feature).""" - result = runner.invoke(app, ["search", "test", "--mode", "vector"]) - assert "Invalid mode" not in result.output - - -def test_cli_imports_successfully(): - """Test CLI modules import without errors.""" - from codexlens.cli import commands, output - - assert hasattr(commands, "app") - assert hasattr(output, "render_search_results") diff --git a/codex-lens/tests/test_cli_output.py b/codex-lens/tests/test_cli_output.py deleted file mode 100644 index 58d631c3..00000000 --- a/codex-lens/tests/test_cli_output.py +++ /dev/null @@ -1,280 +0,0 @@ -"""Tests for CodexLens CLI output functions.""" - -import json -from dataclasses import dataclass -from io import StringIO -from pathlib import Path -from unittest.mock import patch - -import pytest -from rich.console import Console - -from codexlens.cli.output import ( - _to_jsonable, - print_json, - render_file_inspect, - render_search_results, - render_status, - render_symbols, -) -from codexlens.entities import SearchResult, Symbol - - -class TestToJsonable: - """Tests for _to_jsonable helper function.""" - - def test_none_value(self): - """Test converting None.""" - assert _to_jsonable(None) is None - - def test_primitive_values(self): - """Test converting primitive values.""" - assert _to_jsonable("string") == "string" - assert _to_jsonable(42) == 42 - assert _to_jsonable(3.14) == 3.14 - assert _to_jsonable(True) is True - - def test_path_conversion(self): - """Test converting Path to string.""" - path = Path("/test/file.py") - result = _to_jsonable(path) - assert result == str(path) - - def test_dict_conversion(self): - """Test converting dict with nested values.""" - data = {"key": "value", "path": Path("/test.py"), "nested": {"a": 1}} - result = _to_jsonable(data) - assert result["key"] == "value" - # Path conversion uses str(), which may differ by OS - assert result["path"] == str(Path("/test.py")) - assert result["nested"]["a"] == 1 - - def test_list_conversion(self): - """Test converting list with various items.""" - data = ["string", 42, Path("/test.py")] - result = _to_jsonable(data) - assert result == ["string", 42, str(Path("/test.py"))] - - def test_tuple_conversion(self): - """Test converting tuple.""" - data = ("a", "b", Path("/test.py")) - result = _to_jsonable(data) - assert result == ["a", "b", str(Path("/test.py"))] - - def test_set_conversion(self): - """Test converting set.""" - data = {1, 2, 3} - result = _to_jsonable(data) - assert set(result) == {1, 2, 3} - - def test_pydantic_model_conversion(self): - """Test converting Pydantic model.""" - symbol = Symbol(name="test", kind="function", range=(1, 5)) - result = _to_jsonable(symbol) - assert result["name"] == "test" - assert result["kind"] == "function" - assert result["range"] == (1, 5) - - def test_dataclass_conversion(self): - """Test converting dataclass.""" - @dataclass - class TestData: - name: str - value: int - - data = TestData(name="test", value=42) - result = _to_jsonable(data) - assert result["name"] == "test" - assert result["value"] == 42 - - -class TestPrintJson: - """Tests for print_json function.""" - - def test_print_success_json(self, capsys): - """Test printing success JSON.""" - with patch("codexlens.cli.output.console") as mock_console: - captured_output = [] - mock_console.print_json = lambda x: captured_output.append(x) - - print_json(success=True, result={"key": "value"}) - - output = json.loads(captured_output[0]) - assert output["success"] is True - assert output["result"]["key"] == "value" - - def test_print_error_json(self, capsys): - """Test printing error JSON.""" - with patch("codexlens.cli.output.console") as mock_console: - captured_output = [] - mock_console.print_json = lambda x: captured_output.append(x) - - print_json(success=False, error="Something went wrong") - - output = json.loads(captured_output[0]) - assert output["success"] is False - assert output["error"] == "Something went wrong" - - def test_print_error_default_message(self, capsys): - """Test printing error with default message.""" - with patch("codexlens.cli.output.console") as mock_console: - captured_output = [] - mock_console.print_json = lambda x: captured_output.append(x) - - print_json(success=False) - - output = json.loads(captured_output[0]) - assert output["error"] == "Unknown error" - - -class TestRenderSearchResults: - """Tests for render_search_results function.""" - - def test_render_empty_results(self): - """Test rendering empty results.""" - with patch("codexlens.cli.output.console") as mock_console: - render_search_results([]) - mock_console.print.assert_called_once() - - def test_render_results_with_data(self): - """Test rendering results with data.""" - results = [ - SearchResult(path="/test/a.py", score=0.95, excerpt="test excerpt"), - SearchResult(path="/test/b.py", score=0.85, excerpt="another excerpt"), - ] - - with patch("codexlens.cli.output.console") as mock_console: - render_search_results(results) - mock_console.print.assert_called_once() - - def test_render_results_custom_title(self): - """Test rendering results with custom title.""" - results = [SearchResult(path="/test.py", score=0.5)] - - with patch("codexlens.cli.output.console") as mock_console: - render_search_results(results, title="Custom Title") - mock_console.print.assert_called_once() - - -class TestRenderSymbols: - """Tests for render_symbols function.""" - - def test_render_empty_symbols(self): - """Test rendering empty symbols list.""" - with patch("codexlens.cli.output.console") as mock_console: - render_symbols([]) - mock_console.print.assert_called_once() - - def test_render_symbols_with_data(self): - """Test rendering symbols with data.""" - symbols = [ - Symbol(name="MyClass", kind="class", range=(1, 10)), - Symbol(name="my_func", kind="function", range=(12, 20)), - ] - - with patch("codexlens.cli.output.console") as mock_console: - render_symbols(symbols) - mock_console.print.assert_called_once() - - def test_render_symbols_custom_title(self): - """Test rendering symbols with custom title.""" - symbols = [Symbol(name="test", kind="function", range=(1, 1))] - - with patch("codexlens.cli.output.console") as mock_console: - render_symbols(symbols, title="Functions Found") - mock_console.print.assert_called_once() - - -class TestRenderStatus: - """Tests for render_status function.""" - - def test_render_basic_stats(self): - """Test rendering basic stats.""" - stats = {"files": 100, "symbols": 500} - - with patch("codexlens.cli.output.console") as mock_console: - render_status(stats) - mock_console.print.assert_called_once() - - def test_render_stats_with_nested_dict(self): - """Test rendering stats with nested dict.""" - stats = { - "files": 100, - "languages": {"python": 50, "javascript": 30, "go": 20}, - } - - with patch("codexlens.cli.output.console") as mock_console: - render_status(stats) - mock_console.print.assert_called_once() - - def test_render_stats_with_list(self): - """Test rendering stats with list value.""" - stats = { - "files": 100, - "recent_files": ["/a.py", "/b.py", "/c.py"], - } - - with patch("codexlens.cli.output.console") as mock_console: - render_status(stats) - mock_console.print.assert_called_once() - - -class TestRenderFileInspect: - """Tests for render_file_inspect function.""" - - def test_render_file_with_symbols(self): - """Test rendering file inspection with symbols.""" - symbols = [ - Symbol(name="hello", kind="function", range=(1, 5)), - Symbol(name="MyClass", kind="class", range=(7, 20)), - ] - - with patch("codexlens.cli.output.console") as mock_console: - render_file_inspect("/test/file.py", "python", symbols) - # Should be called twice: once for header, once for symbols table - assert mock_console.print.call_count == 2 - - def test_render_file_without_symbols(self): - """Test rendering file inspection without symbols.""" - with patch("codexlens.cli.output.console") as mock_console: - render_file_inspect("/test/file.py", "python", []) - assert mock_console.print.call_count == 2 - - -class TestJsonOutputIntegration: - """Integration tests for JSON output.""" - - def test_search_result_to_json(self): - """Test converting SearchResult to JSON.""" - result = SearchResult( - path="/test.py", - score=0.95, - excerpt="test code here", - metadata={"line": 10}, - ) - - jsonable = _to_jsonable(result) - # Verify it can be JSON serialized - json_str = json.dumps(jsonable) - parsed = json.loads(json_str) - - assert parsed["path"] == "/test.py" - assert parsed["score"] == 0.95 - assert parsed["excerpt"] == "test code here" - - def test_nested_results_to_json(self): - """Test converting nested structure to JSON.""" - data = { - "query": "test", - "results": [ - SearchResult(path="/a.py", score=0.9), - SearchResult(path="/b.py", score=0.8), - ], - } - - jsonable = _to_jsonable(data) - json_str = json.dumps(jsonable) - parsed = json.loads(json_str) - - assert parsed["query"] == "test" - assert len(parsed["results"]) == 2 diff --git a/codex-lens/tests/test_clustering_strategies.py b/codex-lens/tests/test_clustering_strategies.py deleted file mode 100644 index fc559f5f..00000000 --- a/codex-lens/tests/test_clustering_strategies.py +++ /dev/null @@ -1,786 +0,0 @@ -"""Unit tests for clustering strategies in the hybrid search pipeline. - -Tests cover: -1. HDBSCANStrategy - Primary HDBSCAN clustering -2. DBSCANStrategy - Fallback DBSCAN clustering -3. NoOpStrategy - No-op fallback when clustering unavailable -4. ClusteringStrategyFactory - Factory with fallback chain -""" - -from __future__ import annotations - -from typing import List -from unittest.mock import MagicMock, patch - -import pytest - -from codexlens.entities import SearchResult -from codexlens.search.clustering import ( - BaseClusteringStrategy, - ClusteringConfig, - ClusteringStrategyFactory, - NoOpStrategy, - check_clustering_strategy_available, - get_strategy, -) - - -# ============================================================================= -# Test Fixtures -# ============================================================================= - - -@pytest.fixture -def sample_results() -> List[SearchResult]: - """Create sample search results for testing.""" - return [ - SearchResult(path="a.py", score=0.9, excerpt="def foo(): pass"), - SearchResult(path="b.py", score=0.8, excerpt="def foo(): pass"), - SearchResult(path="c.py", score=0.7, excerpt="def bar(): pass"), - SearchResult(path="d.py", score=0.6, excerpt="def bar(): pass"), - SearchResult(path="e.py", score=0.5, excerpt="def baz(): pass"), - ] - - -@pytest.fixture -def mock_embeddings(): - """Create mock embeddings for 5 results. - - Creates embeddings that should form 2 clusters: - - Results 0, 1 (similar to each other) - - Results 2, 3 (similar to each other) - - Result 4 (noise/singleton) - """ - import numpy as np - - # Create embeddings in 3D for simplicity - return np.array( - [ - [1.0, 0.0, 0.0], # Result 0 - cluster A - [0.9, 0.1, 0.0], # Result 1 - cluster A - [0.0, 1.0, 0.0], # Result 2 - cluster B - [0.1, 0.9, 0.0], # Result 3 - cluster B - [0.0, 0.0, 1.0], # Result 4 - noise/singleton - ], - dtype=np.float32, - ) - - -@pytest.fixture -def default_config() -> ClusteringConfig: - """Create default clustering configuration.""" - return ClusteringConfig( - min_cluster_size=2, - min_samples=1, - metric="euclidean", - ) - - -# ============================================================================= -# Test ClusteringConfig -# ============================================================================= - - -class TestClusteringConfig: - """Tests for ClusteringConfig validation.""" - - def test_default_values(self): - """Test default configuration values.""" - config = ClusteringConfig() - assert config.min_cluster_size == 3 - assert config.min_samples == 2 - assert config.metric == "cosine" - assert config.cluster_selection_epsilon == 0.0 - assert config.allow_single_cluster is True - assert config.prediction_data is False - - def test_custom_values(self): - """Test custom configuration values.""" - config = ClusteringConfig( - min_cluster_size=5, - min_samples=3, - metric="euclidean", - cluster_selection_epsilon=0.1, - allow_single_cluster=False, - prediction_data=True, - ) - assert config.min_cluster_size == 5 - assert config.min_samples == 3 - assert config.metric == "euclidean" - - def test_invalid_min_cluster_size(self): - """Test validation rejects min_cluster_size < 2.""" - with pytest.raises(ValueError, match="min_cluster_size must be >= 2"): - ClusteringConfig(min_cluster_size=1) - - def test_invalid_min_samples(self): - """Test validation rejects min_samples < 1.""" - with pytest.raises(ValueError, match="min_samples must be >= 1"): - ClusteringConfig(min_samples=0) - - def test_invalid_metric(self): - """Test validation rejects invalid metric.""" - with pytest.raises(ValueError, match="metric must be one of"): - ClusteringConfig(metric="invalid") - - def test_invalid_epsilon(self): - """Test validation rejects negative epsilon.""" - with pytest.raises(ValueError, match="cluster_selection_epsilon must be >= 0"): - ClusteringConfig(cluster_selection_epsilon=-0.1) - - -# ============================================================================= -# Test NoOpStrategy -# ============================================================================= - - -class TestNoOpStrategy: - """Tests for NoOpStrategy - always available.""" - - def test_cluster_returns_singleton_clusters( - self, sample_results: List[SearchResult], mock_embeddings - ): - """Test cluster() returns each result as singleton cluster.""" - strategy = NoOpStrategy() - clusters = strategy.cluster(mock_embeddings, sample_results) - - assert len(clusters) == 5 - for i, cluster in enumerate(clusters): - assert cluster == [i] - - def test_cluster_empty_results(self): - """Test cluster() with empty results.""" - import numpy as np - - strategy = NoOpStrategy() - clusters = strategy.cluster(np.array([]), []) - - assert clusters == [] - - def test_select_representatives_returns_all_sorted( - self, sample_results: List[SearchResult] - ): - """Test select_representatives() returns all results sorted by score.""" - strategy = NoOpStrategy() - clusters = [[i] for i in range(len(sample_results))] - representatives = strategy.select_representatives(clusters, sample_results) - - assert len(representatives) == 5 - # Check sorted by score descending - scores = [r.score for r in representatives] - assert scores == sorted(scores, reverse=True) - - def test_select_representatives_empty(self): - """Test select_representatives() with empty input.""" - strategy = NoOpStrategy() - representatives = strategy.select_representatives([], []) - assert representatives == [] - - def test_fit_predict_convenience_method( - self, sample_results: List[SearchResult], mock_embeddings - ): - """Test fit_predict() convenience method.""" - strategy = NoOpStrategy() - representatives = strategy.fit_predict(mock_embeddings, sample_results) - - assert len(representatives) == 5 - # All results returned, sorted by score - assert representatives[0].score >= representatives[-1].score - - -# ============================================================================= -# Test HDBSCANStrategy -# ============================================================================= - - -class TestHDBSCANStrategy: - """Tests for HDBSCANStrategy - requires hdbscan package.""" - - @pytest.fixture - def hdbscan_strategy(self, default_config): - """Create HDBSCANStrategy if available.""" - try: - from codexlens.search.clustering import HDBSCANStrategy - - return HDBSCANStrategy(default_config) - except ImportError: - pytest.skip("hdbscan not installed") - - def test_cluster_returns_list_of_lists( - self, hdbscan_strategy, sample_results: List[SearchResult], mock_embeddings - ): - """Test cluster() returns List[List[int]].""" - clusters = hdbscan_strategy.cluster(mock_embeddings, sample_results) - - assert isinstance(clusters, list) - for cluster in clusters: - assert isinstance(cluster, list) - for idx in cluster: - assert isinstance(idx, int) - assert 0 <= idx < len(sample_results) - - def test_cluster_covers_all_results( - self, hdbscan_strategy, sample_results: List[SearchResult], mock_embeddings - ): - """Test all result indices appear in clusters.""" - clusters = hdbscan_strategy.cluster(mock_embeddings, sample_results) - - all_indices = set() - for cluster in clusters: - all_indices.update(cluster) - - assert all_indices == set(range(len(sample_results))) - - def test_cluster_supports_cosine_metric( - self, sample_results: List[SearchResult], mock_embeddings - ): - """Test HDBSCANStrategy can run with metric='cosine' (via precomputed distances).""" - try: - from codexlens.search.clustering import HDBSCANStrategy - except ImportError: - pytest.skip("hdbscan not installed") - - config = ClusteringConfig(min_cluster_size=2, min_samples=1, metric="cosine") - strategy = HDBSCANStrategy(config) - - clusters = strategy.cluster(mock_embeddings, sample_results) - - all_indices = set() - for cluster in clusters: - all_indices.update(cluster) - - assert all_indices == set(range(len(sample_results))) - - def test_cluster_empty_results(self, hdbscan_strategy): - """Test cluster() with empty results.""" - import numpy as np - - clusters = hdbscan_strategy.cluster(np.array([]).reshape(0, 3), []) - assert clusters == [] - - def test_cluster_single_result(self, hdbscan_strategy): - """Test cluster() with single result.""" - import numpy as np - - result = SearchResult(path="a.py", score=0.9, excerpt="test") - embeddings = np.array([[1.0, 0.0, 0.0]]) - clusters = hdbscan_strategy.cluster(embeddings, [result]) - - assert len(clusters) == 1 - assert clusters[0] == [0] - - def test_cluster_fewer_than_min_cluster_size(self, hdbscan_strategy): - """Test cluster() with fewer results than min_cluster_size.""" - import numpy as np - - # Strategy has min_cluster_size=2, so 1 result returns singleton - result = SearchResult(path="a.py", score=0.9, excerpt="test") - embeddings = np.array([[1.0, 0.0, 0.0]]) - clusters = hdbscan_strategy.cluster(embeddings, [result]) - - assert len(clusters) == 1 - assert clusters[0] == [0] - - def test_select_representatives_picks_highest_score( - self, hdbscan_strategy, sample_results: List[SearchResult], mock_embeddings - ): - """Test select_representatives() picks highest score per cluster.""" - clusters = hdbscan_strategy.cluster(mock_embeddings, sample_results) - representatives = hdbscan_strategy.select_representatives( - clusters, sample_results - ) - - # Each representative should be the highest-scored in its cluster - for rep in representatives: - # Find the cluster containing this representative - rep_idx = next( - i for i, r in enumerate(sample_results) if r.path == rep.path - ) - for cluster in clusters: - if rep_idx in cluster: - cluster_scores = [sample_results[i].score for i in cluster] - assert rep.score == max(cluster_scores) - break - - def test_select_representatives_sorted_by_score( - self, hdbscan_strategy, sample_results: List[SearchResult], mock_embeddings - ): - """Test representatives are sorted by score descending.""" - clusters = hdbscan_strategy.cluster(mock_embeddings, sample_results) - representatives = hdbscan_strategy.select_representatives( - clusters, sample_results - ) - - scores = [r.score for r in representatives] - assert scores == sorted(scores, reverse=True) - - def test_fit_predict_end_to_end( - self, hdbscan_strategy, sample_results: List[SearchResult], mock_embeddings - ): - """Test fit_predict() end-to-end clustering.""" - representatives = hdbscan_strategy.fit_predict(mock_embeddings, sample_results) - - # Should have fewer or equal representatives than input - assert len(representatives) <= len(sample_results) - # All representatives should be from original results - rep_paths = {r.path for r in representatives} - original_paths = {r.path for r in sample_results} - assert rep_paths.issubset(original_paths) - - -# ============================================================================= -# Test DBSCANStrategy -# ============================================================================= - - -class TestDBSCANStrategy: - """Tests for DBSCANStrategy - requires sklearn.""" - - @pytest.fixture - def dbscan_strategy(self, default_config): - """Create DBSCANStrategy if available.""" - try: - from codexlens.search.clustering import DBSCANStrategy - - return DBSCANStrategy(default_config) - except ImportError: - pytest.skip("sklearn not installed") - - def test_cluster_returns_list_of_lists( - self, dbscan_strategy, sample_results: List[SearchResult], mock_embeddings - ): - """Test cluster() returns List[List[int]].""" - clusters = dbscan_strategy.cluster(mock_embeddings, sample_results) - - assert isinstance(clusters, list) - for cluster in clusters: - assert isinstance(cluster, list) - for idx in cluster: - assert isinstance(idx, int) - assert 0 <= idx < len(sample_results) - - def test_cluster_covers_all_results( - self, dbscan_strategy, sample_results: List[SearchResult], mock_embeddings - ): - """Test all result indices appear in clusters.""" - clusters = dbscan_strategy.cluster(mock_embeddings, sample_results) - - all_indices = set() - for cluster in clusters: - all_indices.update(cluster) - - assert all_indices == set(range(len(sample_results))) - - def test_cluster_empty_results(self, dbscan_strategy): - """Test cluster() with empty results.""" - import numpy as np - - clusters = dbscan_strategy.cluster(np.array([]).reshape(0, 3), []) - assert clusters == [] - - def test_cluster_single_result(self, dbscan_strategy): - """Test cluster() with single result.""" - import numpy as np - - result = SearchResult(path="a.py", score=0.9, excerpt="test") - embeddings = np.array([[1.0, 0.0, 0.0]]) - clusters = dbscan_strategy.cluster(embeddings, [result]) - - assert len(clusters) == 1 - assert clusters[0] == [0] - - def test_cluster_with_explicit_eps(self, default_config): - """Test cluster() with explicit eps parameter.""" - try: - from codexlens.search.clustering import DBSCANStrategy - except ImportError: - pytest.skip("sklearn not installed") - - import numpy as np - - strategy = DBSCANStrategy(default_config, eps=0.5) - results = [SearchResult(path=f"{i}.py", score=0.5, excerpt="test") for i in range(3)] - embeddings = np.array([[0.0, 0.0], [0.1, 0.0], [1.0, 1.0]]) - - clusters = strategy.cluster(embeddings, results) - # With eps=0.5, first two should cluster, third should be separate - assert len(clusters) >= 2 - - def test_auto_compute_eps(self, dbscan_strategy, mock_embeddings): - """Test eps auto-computation from distance distribution.""" - # Should not raise - eps is computed automatically - results = [SearchResult(path=f"{i}.py", score=0.5, excerpt="test") for i in range(5)] - clusters = dbscan_strategy.cluster(mock_embeddings, results) - assert len(clusters) > 0 - - def test_select_representatives_picks_highest_score( - self, dbscan_strategy, sample_results: List[SearchResult], mock_embeddings - ): - """Test select_representatives() picks highest score per cluster.""" - clusters = dbscan_strategy.cluster(mock_embeddings, sample_results) - representatives = dbscan_strategy.select_representatives( - clusters, sample_results - ) - - # Each representative should be the highest-scored in its cluster - for rep in representatives: - rep_idx = next( - i for i, r in enumerate(sample_results) if r.path == rep.path - ) - for cluster in clusters: - if rep_idx in cluster: - cluster_scores = [sample_results[i].score for i in cluster] - assert rep.score == max(cluster_scores) - break - - def test_select_representatives_sorted_by_score( - self, dbscan_strategy, sample_results: List[SearchResult], mock_embeddings - ): - """Test representatives are sorted by score descending.""" - clusters = dbscan_strategy.cluster(mock_embeddings, sample_results) - representatives = dbscan_strategy.select_representatives( - clusters, sample_results - ) - - scores = [r.score for r in representatives] - assert scores == sorted(scores, reverse=True) - - -# ============================================================================= -# Test ClusteringStrategyFactory -# ============================================================================= - - -class TestClusteringStrategyFactory: - """Tests for ClusteringStrategyFactory.""" - - def test_check_noop_always_available(self): - """Test noop strategy is always available.""" - ok, err = check_clustering_strategy_available("noop") - assert ok is True - assert err is None - - def test_check_invalid_strategy(self): - """Test invalid strategy name returns error.""" - ok, err = check_clustering_strategy_available("invalid") - assert ok is False - assert "Invalid clustering strategy" in err - - def test_get_strategy_noop(self, default_config): - """Test get_strategy('noop') returns NoOpStrategy.""" - strategy = get_strategy("noop", default_config) - assert isinstance(strategy, NoOpStrategy) - - def test_get_strategy_auto_returns_something(self, default_config): - """Test get_strategy('auto') returns a strategy.""" - strategy = get_strategy("auto", default_config) - assert isinstance(strategy, BaseClusteringStrategy) - - def test_get_strategy_with_fallback_enabled(self, default_config): - """Test fallback when primary strategy unavailable.""" - # Mock hdbscan unavailable - with patch.dict("sys.modules", {"hdbscan": None}): - # Should fall back to dbscan or noop - strategy = get_strategy("hdbscan", default_config, fallback=True) - assert isinstance(strategy, BaseClusteringStrategy) - - def test_get_strategy_fallback_disabled_raises(self, default_config): - """Test ImportError when fallback disabled and strategy unavailable.""" - with patch( - "codexlens.search.clustering.factory.check_clustering_strategy_available" - ) as mock_check: - mock_check.return_value = (False, "Test error") - - with pytest.raises(ImportError, match="Test error"): - get_strategy("hdbscan", default_config, fallback=False) - - def test_get_strategy_invalid_raises(self, default_config): - """Test ValueError for invalid strategy name.""" - with pytest.raises(ValueError, match="Unknown clustering strategy"): - get_strategy("invalid", default_config) - - def test_factory_class_interface(self, default_config): - """Test ClusteringStrategyFactory class interface.""" - strategy = ClusteringStrategyFactory.get_strategy("noop", default_config) - assert isinstance(strategy, NoOpStrategy) - - ok, err = ClusteringStrategyFactory.check_available("noop") - assert ok is True - - @pytest.mark.skipif( - not check_clustering_strategy_available("hdbscan")[0], - reason="hdbscan not installed", - ) - def test_get_strategy_hdbscan(self, default_config): - """Test get_strategy('hdbscan') returns HDBSCANStrategy.""" - from codexlens.search.clustering import HDBSCANStrategy - - strategy = get_strategy("hdbscan", default_config) - assert isinstance(strategy, HDBSCANStrategy) - - @pytest.mark.skipif( - not check_clustering_strategy_available("dbscan")[0], - reason="sklearn not installed", - ) - def test_get_strategy_dbscan(self, default_config): - """Test get_strategy('dbscan') returns DBSCANStrategy.""" - from codexlens.search.clustering import DBSCANStrategy - - strategy = get_strategy("dbscan", default_config) - assert isinstance(strategy, DBSCANStrategy) - - @pytest.mark.skipif( - not check_clustering_strategy_available("dbscan")[0], - reason="sklearn not installed", - ) - def test_get_strategy_dbscan_with_kwargs(self, default_config): - """Test DBSCANStrategy kwargs passed through factory.""" - strategy = get_strategy("dbscan", default_config, eps=0.3, eps_percentile=20.0) - assert strategy.eps == 0.3 - assert strategy.eps_percentile == 20.0 - - -# ============================================================================= -# Integration Tests -# ============================================================================= - - -class TestClusteringIntegration: - """Integration tests for clustering strategies.""" - - def test_all_strategies_same_interface( - self, sample_results: List[SearchResult], mock_embeddings, default_config - ): - """Test all strategies have consistent interface.""" - strategies = [NoOpStrategy(default_config)] - - # Add available strategies - try: - from codexlens.search.clustering import HDBSCANStrategy - - strategies.append(HDBSCANStrategy(default_config)) - except ImportError: - pass - - try: - from codexlens.search.clustering import DBSCANStrategy - - strategies.append(DBSCANStrategy(default_config)) - except ImportError: - pass - - for strategy in strategies: - # All should implement cluster() - clusters = strategy.cluster(mock_embeddings, sample_results) - assert isinstance(clusters, list) - - # All should implement select_representatives() - reps = strategy.select_representatives(clusters, sample_results) - assert isinstance(reps, list) - assert all(isinstance(r, SearchResult) for r in reps) - - # All should implement fit_predict() - reps = strategy.fit_predict(mock_embeddings, sample_results) - assert isinstance(reps, list) - - def test_clustering_reduces_redundancy( - self, default_config - ): - """Test clustering reduces redundant similar results.""" - import numpy as np - - # Create results with very similar embeddings - results = [ - SearchResult(path=f"{i}.py", score=0.9 - i * 0.01, excerpt="def foo(): pass") - for i in range(10) - ] - # Very similar embeddings - should cluster together - embeddings = np.array( - [[1.0 + i * 0.01, 0.0, 0.0] for i in range(10)], dtype=np.float32 - ) - - strategy = get_strategy("auto", default_config) - representatives = strategy.fit_predict(embeddings, results) - - # Should have fewer representatives than input (clustering reduced redundancy) - # NoOp returns all, but HDBSCAN/DBSCAN should reduce - assert len(representatives) <= len(results) - - -# ============================================================================= -# Test FrequencyStrategy -# ============================================================================= - - -class TestFrequencyStrategy: - """Tests for FrequencyStrategy - frequency-based clustering.""" - - @pytest.fixture - def frequency_config(self): - """Create FrequencyConfig for testing.""" - from codexlens.search.clustering import FrequencyConfig - return FrequencyConfig(min_frequency=1, max_representatives_per_group=3) - - @pytest.fixture - def frequency_strategy(self, frequency_config): - """Create FrequencyStrategy instance.""" - from codexlens.search.clustering import FrequencyStrategy - return FrequencyStrategy(frequency_config) - - @pytest.fixture - def symbol_results(self) -> List[SearchResult]: - """Create sample results with symbol names for frequency testing.""" - return [ - SearchResult(path="auth.py", score=0.9, excerpt="authenticate user", symbol_name="authenticate"), - SearchResult(path="login.py", score=0.85, excerpt="authenticate login", symbol_name="authenticate"), - SearchResult(path="session.py", score=0.8, excerpt="authenticate session", symbol_name="authenticate"), - SearchResult(path="utils.py", score=0.7, excerpt="helper function", symbol_name="helper_func"), - SearchResult(path="validate.py", score=0.6, excerpt="validate input", symbol_name="validate"), - SearchResult(path="check.py", score=0.55, excerpt="validate data", symbol_name="validate"), - ] - - def test_frequency_strategy_available(self): - """Test FrequencyStrategy is always available (no deps).""" - ok, err = check_clustering_strategy_available("frequency") - assert ok is True - assert err is None - - def test_get_strategy_frequency(self): - """Test get_strategy('frequency') returns FrequencyStrategy.""" - from codexlens.search.clustering import FrequencyStrategy - strategy = get_strategy("frequency") - assert isinstance(strategy, FrequencyStrategy) - - def test_cluster_groups_by_symbol(self, frequency_strategy, symbol_results): - """Test cluster() groups results by symbol name.""" - import numpy as np - embeddings = np.random.rand(len(symbol_results), 128) - - clusters = frequency_strategy.cluster(embeddings, symbol_results) - - # Should have 3 groups: authenticate(3), validate(2), helper_func(1) - assert len(clusters) == 3 - - # First cluster should be authenticate (highest frequency) - first_cluster_symbols = [symbol_results[i].symbol_name for i in clusters[0]] - assert all(s == "authenticate" for s in first_cluster_symbols) - assert len(clusters[0]) == 3 - - def test_cluster_orders_by_frequency(self, frequency_strategy, symbol_results): - """Test clusters are ordered by frequency (descending).""" - import numpy as np - embeddings = np.random.rand(len(symbol_results), 128) - - clusters = frequency_strategy.cluster(embeddings, symbol_results) - - # Verify frequency ordering - frequencies = [len(c) for c in clusters] - assert frequencies == sorted(frequencies, reverse=True) - - def test_select_representatives_adds_frequency_metadata(self, frequency_strategy, symbol_results): - """Test representatives have frequency metadata.""" - import numpy as np - embeddings = np.random.rand(len(symbol_results), 128) - - clusters = frequency_strategy.cluster(embeddings, symbol_results) - reps = frequency_strategy.select_representatives(clusters, symbol_results, embeddings) - - # Check frequency metadata - for rep in reps: - assert "frequency" in rep.metadata - assert rep.metadata["frequency"] >= 1 - - def test_min_frequency_filter_mode(self, symbol_results): - """Test min_frequency with filter mode removes low-frequency results.""" - from codexlens.search.clustering import FrequencyStrategy, FrequencyConfig - import numpy as np - - config = FrequencyConfig(min_frequency=2, keep_mode="filter") - strategy = FrequencyStrategy(config) - embeddings = np.random.rand(len(symbol_results), 128) - - reps = strategy.fit_predict(embeddings, symbol_results) - - # helper_func (freq=1) should be filtered out - rep_symbols = [r.symbol_name for r in reps] - assert "helper_func" not in rep_symbols - assert "authenticate" in rep_symbols - assert "validate" in rep_symbols - - def test_min_frequency_demote_mode(self, symbol_results): - """Test min_frequency with demote mode keeps but deprioritizes low-frequency.""" - from codexlens.search.clustering import FrequencyStrategy, FrequencyConfig - import numpy as np - - config = FrequencyConfig(min_frequency=2, keep_mode="demote") - strategy = FrequencyStrategy(config) - embeddings = np.random.rand(len(symbol_results), 128) - - reps = strategy.fit_predict(embeddings, symbol_results) - - # helper_func should still be present but at the end - rep_symbols = [r.symbol_name for r in reps] - assert "helper_func" in rep_symbols - # Should be demoted to end - helper_idx = rep_symbols.index("helper_func") - assert helper_idx == len(rep_symbols) - 1 - - def test_group_by_file(self, symbol_results): - """Test grouping by file path instead of symbol.""" - from codexlens.search.clustering import FrequencyStrategy, FrequencyConfig - import numpy as np - - config = FrequencyConfig(group_by="file") - strategy = FrequencyStrategy(config) - embeddings = np.random.rand(len(symbol_results), 128) - - clusters = strategy.cluster(embeddings, symbol_results) - - # Each file should be its own group (all unique paths) - assert len(clusters) == 6 - - def test_max_representatives_per_group(self, symbol_results): - """Test max_representatives_per_group limits output per symbol.""" - from codexlens.search.clustering import FrequencyStrategy, FrequencyConfig - import numpy as np - - config = FrequencyConfig(max_representatives_per_group=1) - strategy = FrequencyStrategy(config) - embeddings = np.random.rand(len(symbol_results), 128) - - reps = strategy.fit_predict(embeddings, symbol_results) - - # Should have at most 1 per group = 3 groups = 3 reps - assert len(reps) == 3 - - def test_frequency_boost_score(self, symbol_results): - """Test frequency_weight boosts high-frequency results.""" - from codexlens.search.clustering import FrequencyStrategy, FrequencyConfig - import numpy as np - - config = FrequencyConfig(frequency_weight=0.5) # Strong boost - strategy = FrequencyStrategy(config) - embeddings = np.random.rand(len(symbol_results), 128) - - reps = strategy.fit_predict(embeddings, symbol_results) - - # High-frequency results should have boosted scores in metadata - for rep in reps: - if rep.metadata.get("frequency", 1) > 1: - assert rep.metadata.get("frequency_boosted_score", 0) > rep.score - - def test_empty_results(self, frequency_strategy): - """Test handling of empty results.""" - import numpy as np - - clusters = frequency_strategy.cluster(np.array([]).reshape(0, 128), []) - assert clusters == [] - - reps = frequency_strategy.select_representatives([], [], None) - assert reps == [] - - def test_factory_with_kwargs(self): - """Test factory passes kwargs to FrequencyConfig.""" - strategy = get_strategy("frequency", min_frequency=3, group_by="file") - assert strategy.config.min_frequency == 3 - assert strategy.config.group_by == "file" diff --git a/codex-lens/tests/test_code_extractor.py b/codex-lens/tests/test_code_extractor.py deleted file mode 100644 index bb83279f..00000000 --- a/codex-lens/tests/test_code_extractor.py +++ /dev/null @@ -1,342 +0,0 @@ -"""Tests for code extractor functionality.""" - -import tempfile -from pathlib import Path - -import pytest - -from codexlens.entities import SearchResult, Symbol -from codexlens.semantic.code_extractor import ( - CodeBlockResult, - extract_complete_code_block, - extract_symbol_with_context, - format_search_result_code, - get_code_block_summary, - enhance_search_results, -) - - -class TestExtractCompleteCodeBlock: - """Test extract_complete_code_block function.""" - - def test_returns_stored_content(self): - """Test returns content when available in result.""" - result = SearchResult( - path="/test.py", - score=0.9, - content="def hello():\n return 'world'", - start_line=1, - end_line=2, - ) - - code = extract_complete_code_block(result) - assert code == "def hello():\n return 'world'" - - def test_reads_from_file_when_no_content(self, tmp_path): - """Test reads from file when content not in result.""" - test_file = tmp_path / "test.py" - test_file.write_text("""# Header comment -def hello(): - '''Docstring''' - return 'world' - -def goodbye(): - pass -""") - - result = SearchResult( - path=str(test_file), - score=0.9, - excerpt="def hello():", - start_line=2, - end_line=4, - ) - - code = extract_complete_code_block(result) - assert "def hello():" in code - assert "return 'world'" in code - - def test_adds_context_lines(self, tmp_path): - """Test adding context lines.""" - test_file = tmp_path / "test.py" - test_file.write_text("""# Line 1 -# Line 2 -def hello(): - return 'world' -# Line 5 -# Line 6 -""") - - result = SearchResult( - path=str(test_file), - score=0.9, - start_line=3, - end_line=4, - ) - - code = extract_complete_code_block(result, context_lines=1) - assert "# Line 2" in code - assert "# Line 5" in code - - -class TestExtractSymbolWithContext: - """Test extract_symbol_with_context function.""" - - def test_extracts_with_decorators(self, tmp_path): - """Test extracting symbol with decorators.""" - test_file = tmp_path / "test.py" - # Line 1: @decorator - # Line 2: @another_decorator - # Line 3: def hello(): - # Line 4: return 'world' - test_file.write_text("@decorator\n@another_decorator\ndef hello():\n return 'world'\n") - - symbol = Symbol(name="hello", kind="function", range=(3, 4)) - code = extract_symbol_with_context(str(test_file), symbol) - - assert "@decorator" in code - assert "@another_decorator" in code - assert "def hello():" in code - - -class TestFormatSearchResultCode: - """Test format_search_result_code function.""" - - def test_format_with_line_numbers(self): - """Test formatting with line numbers.""" - result = SearchResult( - path="/test.py", - score=0.9, - content="def hello():\n return 'world'", - start_line=10, - end_line=11, - ) - - formatted = format_search_result_code(result, show_line_numbers=True) - assert " 10 |" in formatted - assert " 11 |" in formatted - - def test_format_truncation(self): - """Test max_lines truncation.""" - result = SearchResult( - path="/test.py", - score=0.9, - content="line1\nline2\nline3\nline4\nline5", - start_line=1, - end_line=5, - ) - - formatted = format_search_result_code(result, max_lines=2) - assert "(truncated)" in formatted - - def test_format_without_line_numbers(self): - """Test formatting without line numbers.""" - result = SearchResult( - path="/test.py", - score=0.9, - content="def hello():\n pass", - start_line=1, - end_line=2, - ) - - formatted = format_search_result_code(result, show_line_numbers=False) - assert "def hello():" in formatted - assert " | " not in formatted - - -class TestGetCodeBlockSummary: - """Test get_code_block_summary function.""" - - def test_summary_with_symbol(self): - """Test summary with symbol info.""" - result = SearchResult( - path="/test.py", - score=0.9, - symbol_name="hello", - symbol_kind="function", - start_line=10, - end_line=20, - ) - - summary = get_code_block_summary(result) - assert "function" in summary - assert "hello" in summary - assert "10-20" in summary - assert "test.py" in summary - - def test_summary_single_line(self): - """Test summary for single line.""" - result = SearchResult( - path="/test.py", - score=0.9, - start_line=5, - end_line=5, - ) - - summary = get_code_block_summary(result) - assert "line 5" in summary - - -class TestCodeBlockResult: - """Test CodeBlockResult class.""" - - def test_properties(self): - """Test CodeBlockResult properties.""" - result = SearchResult( - path="/path/to/test.py", - score=0.85, - content="def hello(): pass", - symbol_name="hello", - symbol_kind="function", - start_line=1, - end_line=1, - ) - - block = CodeBlockResult(result) - - assert block.score == 0.85 - assert block.path == "/path/to/test.py" - assert block.file_name == "test.py" - assert block.symbol_name == "hello" - assert block.symbol_kind == "function" - assert block.line_range == (1, 1) - assert block.full_code == "def hello(): pass" - - def test_summary(self): - """Test CodeBlockResult summary.""" - result = SearchResult( - path="/test.py", - score=0.9, - symbol_name="Calculator", - symbol_kind="class", - start_line=10, - end_line=50, - ) - - block = CodeBlockResult(result) - summary = block.summary - - assert "class" in summary - assert "Calculator" in summary - - def test_format(self): - """Test CodeBlockResult format.""" - result = SearchResult( - path="/test.py", - score=0.9, - content="def hello():\n return 42", - start_line=1, - end_line=2, - ) - - block = CodeBlockResult(result) - formatted = block.format(show_line_numbers=True) - - assert " 1 |" in formatted - assert "def hello():" in formatted - - -class TestEnhanceSearchResults: - """Test enhance_search_results function.""" - - def test_enhances_results(self): - """Test enhancing search results.""" - results = [ - SearchResult(path="/a.py", score=0.9, content="def a(): pass"), - SearchResult(path="/b.py", score=0.8, content="def b(): pass"), - ] - - enhanced = enhance_search_results(results) - - assert len(enhanced) == 2 - assert all(isinstance(r, CodeBlockResult) for r in enhanced) - assert enhanced[0].score == 0.9 - assert enhanced[1].score == 0.8 - - -class TestIntegration: - """Integration tests for code extraction.""" - - def test_full_workflow(self, tmp_path): - """Test complete code extraction workflow.""" - # Create test file - test_file = tmp_path / "calculator.py" - test_file.write_text('''"""Calculator module.""" - -@staticmethod -def add(a: int, b: int) -> int: - """Add two numbers. - - Args: - a: First number - b: Second number - - Returns: - Sum of a and b - """ - return a + b - -class Calculator: - """A simple calculator.""" - - def __init__(self): - self.result = 0 - - def compute(self, operation: str, value: int) -> int: - """Perform computation.""" - if operation == "add": - self.result += value - elif operation == "sub": - self.result -= value - return self.result -''') - - # Simulate search result for 'add' function - result = SearchResult( - path=str(test_file), - score=0.92, - content='''@staticmethod -def add(a: int, b: int) -> int: - """Add two numbers. - - Args: - a: First number - b: Second number - - Returns: - Sum of a and b - """ - return a + b''', - symbol_name="add", - symbol_kind="function", - start_line=3, - end_line=14, - ) - - block = CodeBlockResult(result) - - # Test properties - assert block.symbol_name == "add" - assert block.symbol_kind == "function" - assert block.line_range == (3, 14) - - # Test full code - assert "@staticmethod" in block.full_code - assert "def add(" in block.full_code - assert "return a + b" in block.full_code - - # Test summary - summary = block.summary - assert "function" in summary - assert "add" in summary - - # Test format - formatted = block.format(show_line_numbers=True) - assert " 3 |" in formatted or "3 |" in formatted - - print("\n--- Full Code Block ---") - print(block.full_code) - print("\n--- Formatted Output ---") - print(formatted) - print("\n--- Summary ---") - print(summary) diff --git a/codex-lens/tests/test_compare_ccw_smart_search_stage2.py b/codex-lens/tests/test_compare_ccw_smart_search_stage2.py deleted file mode 100644 index 901d1cd9..00000000 --- a/codex-lens/tests/test_compare_ccw_smart_search_stage2.py +++ /dev/null @@ -1,350 +0,0 @@ -from __future__ import annotations - -import importlib.util -import json -import sys -from pathlib import Path -from types import SimpleNamespace - - -MODULE_PATH = Path(__file__).resolve().parents[1] / "benchmarks" / "compare_ccw_smart_search_stage2.py" -MODULE_NAME = "compare_ccw_smart_search_stage2_test_module" -MODULE_SPEC = importlib.util.spec_from_file_location(MODULE_NAME, MODULE_PATH) -assert MODULE_SPEC is not None and MODULE_SPEC.loader is not None -benchmark = importlib.util.module_from_spec(MODULE_SPEC) -sys.modules[MODULE_NAME] = benchmark -MODULE_SPEC.loader.exec_module(benchmark) - - -class _FakeChainResult: - def __init__(self, paths: list[str]) -> None: - self.results = [SimpleNamespace(path=path) for path in paths] - - -class _FakeEngine: - def __init__( - self, - *, - search_paths: list[str] | None = None, - cascade_paths: list[str] | None = None, - ) -> None: - self.search_paths = search_paths or [] - self.cascade_paths = cascade_paths or [] - self.search_calls: list[dict[str, object]] = [] - self.cascade_calls: list[dict[str, object]] = [] - - def search(self, query: str, source_path: Path, options: object) -> _FakeChainResult: - self.search_calls.append( - { - "query": query, - "source_path": source_path, - "options": options, - } - ) - return _FakeChainResult(self.search_paths) - - def cascade_search( - self, - query: str, - source_path: Path, - *, - k: int, - coarse_k: int, - options: object, - strategy: str, - ) -> _FakeChainResult: - self.cascade_calls.append( - { - "query": query, - "source_path": source_path, - "k": k, - "coarse_k": coarse_k, - "options": options, - "strategy": strategy, - } - ) - return _FakeChainResult(self.cascade_paths) - - -def test_strategy_specs_include_baselines_before_stage2_modes() -> None: - specs = benchmark._strategy_specs( - ["realtime", "static_global_graph"], - include_dense_baseline=True, - baseline_methods=["auto", "fts", "hybrid"], - ) - - assert [spec.strategy_key for spec in specs] == [ - "auto", - "fts", - "hybrid", - "dense_rerank", - "staged:realtime", - "staged:static_global_graph", - ] - - -def test_select_effective_method_matches_cli_auto_routing() -> None: - assert benchmark._select_effective_method("find_descendant_project_roots", "auto") == "fts" - assert benchmark._select_effective_method("build dist artifact output", "auto") == "fts" - assert benchmark._select_effective_method("embedding backend fastembed local litellm api config", "auto") == "fts" - assert benchmark._select_effective_method("get_reranker factory onnx backend selection", "auto") == "fts" - assert benchmark._select_effective_method("how does the authentication flow work", "auto") == "dense_rerank" - assert benchmark._select_effective_method("how smart_search keyword routing works", "auto") == "hybrid" - - -def test_filter_dataset_by_query_match_uses_case_insensitive_substring() -> None: - dataset = [ - {"query": "embedding backend fastembed local litellm api config", "relevant_paths": ["a"]}, - {"query": "get_reranker factory onnx backend selection", "relevant_paths": ["b"]}, - {"query": "how does smart search route keyword queries", "relevant_paths": ["c"]}, - ] - - filtered = benchmark._filter_dataset_by_query_match(dataset, "BACKEND") - assert [item["query"] for item in filtered] == [ - "embedding backend fastembed local litellm api config", - "get_reranker factory onnx backend selection", - ] - - narrow_filtered = benchmark._filter_dataset_by_query_match(dataset, "FASTEMBED") - assert [item["query"] for item in narrow_filtered] == [ - "embedding backend fastembed local litellm api config", - ] - - unfiltered = benchmark._filter_dataset_by_query_match(dataset, None) - assert [item["query"] for item in unfiltered] == [item["query"] for item in dataset] - - -def test_apply_query_limit_runs_after_filtering() -> None: - dataset = [ - {"query": "executeHybridMode dense_rerank semantic smart_search", "relevant_paths": ["a"]}, - {"query": "embedding backend fastembed local litellm api config", "relevant_paths": ["b"]}, - {"query": "reranker backend onnx api legacy configuration", "relevant_paths": ["c"]}, - ] - - filtered = benchmark._filter_dataset_by_query_match(dataset, "backend") - limited = benchmark._apply_query_limit(filtered, 1) - - assert [item["query"] for item in limited] == [ - "embedding backend fastembed local litellm api config", - ] - - -def test_make_progress_payload_reports_partial_completion() -> None: - args = SimpleNamespace( - queries_file=Path("queries.jsonl"), - k=10, - coarse_k=100, - ) - strategy_specs = [ - benchmark.StrategySpec(strategy_key="auto", strategy="auto", stage2_mode=None), - benchmark.StrategySpec(strategy_key="dense_rerank", strategy="dense_rerank", stage2_mode=None), - ] - evaluations = [ - benchmark.QueryEvaluation( - query="embedding backend fastembed local litellm api config", - intent="config", - notes=None, - relevant_paths=["codex-lens/src/codexlens/config.py"], - runs={ - "auto": benchmark.StrategyRun( - strategy_key="auto", - strategy="auto", - stage2_mode=None, - effective_method="fts", - execution_method="fts", - latency_ms=123.0, - topk_paths=["config.py"], - first_hit_rank=1, - hit_at_k=True, - recall_at_k=1.0, - generated_artifact_count=0, - test_file_count=0, - error=None, - ) - }, - ) - ] - - payload = benchmark._make_progress_payload( - args=args, - source_root=Path("D:/repo"), - strategy_specs=strategy_specs, - evaluations=evaluations, - query_index=1, - total_queries=3, - run_index=2, - total_runs=6, - current_query="embedding backend fastembed local litellm api config", - current_strategy_key="complete", - ) - - assert payload["status"] == "running" - assert payload["progress"]["completed_queries"] == 1 - assert payload["progress"]["completed_runs"] == 2 - assert payload["progress"]["total_runs"] == 6 - assert payload["strategy_keys"] == ["auto", "dense_rerank"] - assert payload["evaluations"][0]["runs"]["auto"]["effective_method"] == "fts" - - -def test_write_final_outputs_updates_progress_snapshot(tmp_path: Path) -> None: - output_path = tmp_path / "results.json" - progress_path = tmp_path / "progress.json" - payload = { - "status": "completed", - "query_count": 1, - "strategies": {"auto": {"effective_methods": {"fts": 1}}}, - } - - benchmark._write_final_outputs( - output_path=output_path, - progress_output=progress_path, - payload=payload, - ) - - assert json.loads(output_path.read_text(encoding="utf-8")) == payload - assert json.loads(progress_path.read_text(encoding="utf-8")) == payload - - -def test_build_parser_defaults_reranker_gpu_to_disabled() -> None: - parser = benchmark.build_parser() - args = parser.parse_args([]) - - assert args.embedding_use_gpu is False - assert args.reranker_use_gpu is False - assert args.reranker_model == benchmark.DEFAULT_LOCAL_ONNX_RERANKER_MODEL - - -def test_build_strategy_runtime_clones_config(monkeypatch, tmp_path: Path) -> None: - class _FakeRegistry: - def __init__(self) -> None: - self.initialized = False - - def initialize(self) -> None: - self.initialized = True - - class _FakeMapper: - pass - - class _FakeEngine: - def __init__(self, *, registry, mapper, config) -> None: - self.registry = registry - self.mapper = mapper - self.config = config - - monkeypatch.setattr(benchmark, "RegistryStore", _FakeRegistry) - monkeypatch.setattr(benchmark, "PathMapper", _FakeMapper) - monkeypatch.setattr(benchmark, "ChainSearchEngine", _FakeEngine) - - base_config = benchmark.Config(data_dir=tmp_path, reranker_use_gpu=False) - strategy_spec = benchmark.StrategySpec(strategy_key="dense_rerank", strategy="dense_rerank", stage2_mode=None) - - runtime = benchmark._build_strategy_runtime(base_config, strategy_spec) - - assert runtime.strategy_spec == strategy_spec - assert runtime.config is not base_config - assert runtime.config.reranker_use_gpu is False - assert runtime.registry.initialized is True - assert runtime.engine.config is runtime.config - - -def test_run_strategy_routes_auto_keyword_queries_to_fts_search() -> None: - engine = _FakeEngine( - search_paths=[ - "D:/repo/src/codexlens/storage/registry.py", - "D:/repo/build/lib/codexlens/storage/registry.py", - ] - ) - config = SimpleNamespace(cascade_strategy="staged", staged_stage2_mode="realtime") - relevant = {benchmark._normalize_path_key("D:/repo/src/codexlens/storage/registry.py")} - - run = benchmark._run_strategy( - engine, - config, - strategy_spec=benchmark.StrategySpec(strategy_key="auto", strategy="auto", stage2_mode=None), - query="find_descendant_project_roots", - source_path=Path("D:/repo"), - k=5, - coarse_k=20, - relevant=relevant, - ) - - assert len(engine.search_calls) == 1 - assert len(engine.cascade_calls) == 0 - assert run.effective_method == "fts" - assert run.execution_method == "fts" - assert run.hit_at_k is True - assert run.generated_artifact_count == 1 - assert run.test_file_count == 0 - - -def test_run_strategy_uses_cascade_for_dense_rerank_and_restores_config() -> None: - engine = _FakeEngine(cascade_paths=["D:/repo/src/tools/smart-search.ts"]) - config = SimpleNamespace(cascade_strategy="staged", staged_stage2_mode="static_global_graph") - relevant = {benchmark._normalize_path_key("D:/repo/src/tools/smart-search.ts")} - - run = benchmark._run_strategy( - engine, - config, - strategy_spec=benchmark.StrategySpec( - strategy_key="dense_rerank", - strategy="dense_rerank", - stage2_mode=None, - ), - query="how does smart search route keyword queries", - source_path=Path("D:/repo"), - k=5, - coarse_k=20, - relevant=relevant, - ) - - assert len(engine.search_calls) == 0 - assert len(engine.cascade_calls) == 1 - assert engine.cascade_calls[0]["strategy"] == "dense_rerank" - assert run.effective_method == "dense_rerank" - assert run.execution_method == "cascade" - assert run.hit_at_k is True - assert config.cascade_strategy == "staged" - assert config.staged_stage2_mode == "static_global_graph" - - -def test_summarize_runs_tracks_effective_method_and_artifact_pressure() -> None: - summary = benchmark._summarize_runs( - [ - benchmark.StrategyRun( - strategy_key="auto", - strategy="auto", - stage2_mode=None, - effective_method="fts", - execution_method="fts", - latency_ms=10.0, - topk_paths=["a"], - first_hit_rank=1, - hit_at_k=True, - recall_at_k=1.0, - generated_artifact_count=1, - test_file_count=0, - error=None, - ), - benchmark.StrategyRun( - strategy_key="auto", - strategy="auto", - stage2_mode=None, - effective_method="hybrid", - execution_method="hybrid", - latency_ms=30.0, - topk_paths=["b"], - first_hit_rank=None, - hit_at_k=False, - recall_at_k=0.0, - generated_artifact_count=0, - test_file_count=2, - error=None, - ), - ] - ) - - assert summary["effective_methods"] == {"fts": 1, "hybrid": 1} - assert summary["runs_with_generated_artifacts"] == 1 - assert summary["runs_with_test_files"] == 1 - assert summary["avg_generated_artifact_count"] == 0.5 - assert summary["avg_test_file_count"] == 1.0 diff --git a/codex-lens/tests/test_config.py b/codex-lens/tests/test_config.py deleted file mode 100644 index d6acb3fa..00000000 --- a/codex-lens/tests/test_config.py +++ /dev/null @@ -1,555 +0,0 @@ -"""Tests for CodexLens configuration system.""" - -import builtins -import json -import logging -import os -import tempfile -from pathlib import Path - -import pytest - -from codexlens.config import ( - WORKSPACE_DIR_NAME, - Config, - WorkspaceConfig, - _default_global_dir, - find_workspace_root, -) -from codexlens.errors import ConfigError - - -class TestDefaultGlobalDir: - """Tests for _default_global_dir function.""" - - def test_default_location(self): - """Test default location is ~/.codexlens.""" - # Clear any environment override - env_backup = os.environ.get("CODEXLENS_DATA_DIR") - if "CODEXLENS_DATA_DIR" in os.environ: - del os.environ["CODEXLENS_DATA_DIR"] - - try: - result = _default_global_dir() - assert result == (Path.home() / ".codexlens").resolve() - finally: - if env_backup is not None: - os.environ["CODEXLENS_DATA_DIR"] = env_backup - - def test_env_override(self): - """Test CODEXLENS_DATA_DIR environment variable override.""" - with tempfile.TemporaryDirectory() as tmpdir: - os.environ["CODEXLENS_DATA_DIR"] = tmpdir - try: - result = _default_global_dir() - assert result == Path(tmpdir).resolve() - finally: - del os.environ["CODEXLENS_DATA_DIR"] - - -class TestFindWorkspaceRoot: - """Tests for find_workspace_root function.""" - - def test_finds_workspace_in_current_dir(self): - """Test finding workspace when .codexlens is in current directory.""" - with tempfile.TemporaryDirectory() as tmpdir: - base = Path(tmpdir) - (base / WORKSPACE_DIR_NAME).mkdir() - - result = find_workspace_root(base) - assert result == base.resolve() - - def test_finds_workspace_in_parent_dir(self): - """Test finding workspace in parent directory.""" - with tempfile.TemporaryDirectory() as tmpdir: - base = Path(tmpdir) - (base / WORKSPACE_DIR_NAME).mkdir() - subdir = base / "src" / "components" - subdir.mkdir(parents=True) - - result = find_workspace_root(subdir) - assert result == base.resolve() - - def test_returns_none_when_not_found(self): - """Test returns None when no workspace found in isolated directory.""" - with tempfile.TemporaryDirectory() as tmpdir: - # Create a deep nested directory to avoid finding user's home .codexlens - isolated = Path(tmpdir) / "a" / "b" / "c" - isolated.mkdir(parents=True) - result = find_workspace_root(isolated) - # May find user's .codexlens if it exists in parent dirs - # So we just check it doesn't find one in our temp directory - if result is not None: - assert WORKSPACE_DIR_NAME not in str(isolated) - - def test_does_not_find_file_as_workspace(self): - """Test that a file named .codexlens is not recognized as workspace.""" - with tempfile.TemporaryDirectory() as tmpdir: - base = Path(tmpdir) - # Create isolated subdirectory - subdir = base / "project" - subdir.mkdir() - (subdir / WORKSPACE_DIR_NAME).write_text("not a directory") - - result = find_workspace_root(subdir) - # Should not find the file as workspace - if result is not None: - assert result != subdir - - -class TestConfig: - """Tests for Config class.""" - - def test_default_config(self): - """Test creating config with defaults.""" - with tempfile.TemporaryDirectory() as tmpdir: - os.environ["CODEXLENS_DATA_DIR"] = tmpdir - try: - config = Config() - assert config.data_dir == Path(tmpdir).resolve() - assert config.venv_path == Path(tmpdir).resolve() / "venv" - finally: - del os.environ["CODEXLENS_DATA_DIR"] - - def test_creates_data_dir(self): - """Test that data_dir is created on init.""" - with tempfile.TemporaryDirectory() as tmpdir: - data_dir = Path(tmpdir) / "new_dir" - config = Config(data_dir=data_dir) - assert data_dir.exists() - - def test_post_init_permission_error_includes_path_and_cause(self, monkeypatch): - """PermissionError during __post_init__ should raise ConfigError with context.""" - with tempfile.TemporaryDirectory() as tmpdir: - data_dir = Path(tmpdir) / "blocked" - venv_path = Path(tmpdir) / "venv" - expected_data_dir = data_dir.expanduser().resolve() - - real_mkdir = Path.mkdir - - def guarded_mkdir(self, *args, **kwargs): - if self == expected_data_dir: - raise PermissionError("Permission denied") - return real_mkdir(self, *args, **kwargs) - - monkeypatch.setattr(Path, "mkdir", guarded_mkdir) - - with pytest.raises(ConfigError) as excinfo: - Config(data_dir=data_dir, venv_path=venv_path) - - message = str(excinfo.value) - assert str(expected_data_dir) in message - assert "permission" in message.lower() - assert "PermissionError" in message - assert isinstance(excinfo.value.__cause__, PermissionError) - - def test_post_init_os_error_includes_path_and_cause(self, monkeypatch): - """OSError during __post_init__ should raise ConfigError with context.""" - with tempfile.TemporaryDirectory() as tmpdir: - data_dir = Path(tmpdir) / "invalid" - venv_path = Path(tmpdir) / "venv" - expected_data_dir = data_dir.expanduser().resolve() - - real_mkdir = Path.mkdir - - def guarded_mkdir(self, *args, **kwargs): - if self == expected_data_dir: - raise OSError("Invalid path") - return real_mkdir(self, *args, **kwargs) - - monkeypatch.setattr(Path, "mkdir", guarded_mkdir) - - with pytest.raises(ConfigError) as excinfo: - Config(data_dir=data_dir, venv_path=venv_path) - - message = str(excinfo.value) - assert str(expected_data_dir) in message - assert "permission" not in message.lower() - assert "filesystem" in message.lower() - assert "OSError" in message - assert isinstance(excinfo.value.__cause__, OSError) - - def test_supported_languages(self): - """Test default supported languages.""" - with tempfile.TemporaryDirectory() as tmpdir: - os.environ["CODEXLENS_DATA_DIR"] = tmpdir - try: - config = Config() - assert "python" in config.supported_languages - assert "javascript" in config.supported_languages - assert "typescript" in config.supported_languages - assert "java" in config.supported_languages - assert "go" in config.supported_languages - assert "swift" in config.supported_languages - finally: - del os.environ["CODEXLENS_DATA_DIR"] - - def test_language_for_path_swift(self): - """Swift (.swift) files should be recognized as code.""" - with tempfile.TemporaryDirectory() as tmpdir: - config = Config(data_dir=Path(tmpdir)) - assert config.language_for_path("x.swift") == "swift" - assert config.language_for_path("X.SWIFT") == "swift" - assert config.category_for_path("x.swift") == "code" - - def test_cache_dir_property(self): - """Test cache_dir property.""" - with tempfile.TemporaryDirectory() as tmpdir: - config = Config(data_dir=Path(tmpdir)) - assert config.cache_dir == Path(tmpdir).resolve() / "cache" - - def test_index_dir_property(self): - """Test index_dir property.""" - with tempfile.TemporaryDirectory() as tmpdir: - config = Config(data_dir=Path(tmpdir)) - assert config.index_dir == Path(tmpdir).resolve() / "index" - - def test_db_path_property(self): - """Test db_path property.""" - with tempfile.TemporaryDirectory() as tmpdir: - config = Config(data_dir=Path(tmpdir)) - assert config.db_path == Path(tmpdir).resolve() / "index" / "codexlens.db" - - def test_ensure_runtime_dirs(self): - """Test ensure_runtime_dirs creates directories.""" - with tempfile.TemporaryDirectory() as tmpdir: - config = Config(data_dir=Path(tmpdir)) - config.ensure_runtime_dirs() - assert config.cache_dir.exists() - assert config.index_dir.exists() - - def test_ensure_runtime_dirs_permission_error_includes_path_and_cause(self, monkeypatch): - """PermissionError during ensure_runtime_dirs should raise ConfigError with context.""" - with tempfile.TemporaryDirectory() as tmpdir: - config = Config(data_dir=Path(tmpdir)) - target_dir = config.cache_dir - - real_mkdir = Path.mkdir - - def guarded_mkdir(self, *args, **kwargs): - if self == target_dir: - raise PermissionError("Permission denied") - return real_mkdir(self, *args, **kwargs) - - monkeypatch.setattr(Path, "mkdir", guarded_mkdir) - - with pytest.raises(ConfigError) as excinfo: - config.ensure_runtime_dirs() - - message = str(excinfo.value) - assert str(target_dir) in message - assert "permission" in message.lower() - assert "PermissionError" in message - assert isinstance(excinfo.value.__cause__, PermissionError) - - def test_ensure_runtime_dirs_os_error_includes_path_and_cause(self, monkeypatch): - """OSError during ensure_runtime_dirs should raise ConfigError with context.""" - with tempfile.TemporaryDirectory() as tmpdir: - config = Config(data_dir=Path(tmpdir)) - target_dir = config.cache_dir - - real_mkdir = Path.mkdir - - def guarded_mkdir(self, *args, **kwargs): - if self == target_dir: - raise OSError("Invalid path") - return real_mkdir(self, *args, **kwargs) - - monkeypatch.setattr(Path, "mkdir", guarded_mkdir) - - with pytest.raises(ConfigError) as excinfo: - config.ensure_runtime_dirs() - - message = str(excinfo.value) - assert str(target_dir) in message - assert "permission" not in message.lower() - assert "filesystem" in message.lower() - assert "OSError" in message - assert isinstance(excinfo.value.__cause__, OSError) - - def test_language_for_path_python(self): - """Test language detection for Python files.""" - with tempfile.TemporaryDirectory() as tmpdir: - os.environ["CODEXLENS_DATA_DIR"] = tmpdir - try: - config = Config() - assert config.language_for_path("test.py") == "python" - assert config.language_for_path("/path/to/file.py") == "python" - finally: - del os.environ["CODEXLENS_DATA_DIR"] - - def test_language_for_path_javascript(self): - """Test language detection for JavaScript files.""" - with tempfile.TemporaryDirectory() as tmpdir: - os.environ["CODEXLENS_DATA_DIR"] = tmpdir - try: - config = Config() - assert config.language_for_path("test.js") == "javascript" - assert config.language_for_path("component.jsx") == "javascript" - finally: - del os.environ["CODEXLENS_DATA_DIR"] - - def test_language_for_path_typescript(self): - """Test language detection for TypeScript files.""" - with tempfile.TemporaryDirectory() as tmpdir: - os.environ["CODEXLENS_DATA_DIR"] = tmpdir - try: - config = Config() - assert config.language_for_path("test.ts") == "typescript" - assert config.language_for_path("component.tsx") == "typescript" - finally: - del os.environ["CODEXLENS_DATA_DIR"] - - def test_language_for_path_unknown(self): - """Test language detection for unknown files.""" - with tempfile.TemporaryDirectory() as tmpdir: - os.environ["CODEXLENS_DATA_DIR"] = tmpdir - try: - config = Config() - assert config.language_for_path("test.xyz") is None - assert config.language_for_path("data.csv") is None - finally: - del os.environ["CODEXLENS_DATA_DIR"] - - def test_language_for_path_case_insensitive(self): - """Test language detection is case insensitive.""" - with tempfile.TemporaryDirectory() as tmpdir: - os.environ["CODEXLENS_DATA_DIR"] = tmpdir - try: - config = Config() - assert config.language_for_path("TEST.PY") == "python" - assert config.language_for_path("File.Js") == "javascript" - finally: - del os.environ["CODEXLENS_DATA_DIR"] - - def test_rules_for_language(self): - """Test getting parsing rules for a language.""" - with tempfile.TemporaryDirectory() as tmpdir: - os.environ["CODEXLENS_DATA_DIR"] = tmpdir - try: - config = Config() - rules = config.rules_for_language("python") - assert "max_chunk_chars" in rules - assert "max_chunk_lines" in rules - assert "overlap_lines" in rules - finally: - del os.environ["CODEXLENS_DATA_DIR"] - - -class TestConfigLoadSettings: - """Tests for Config.load_settings behavior and logging.""" - - def test_load_settings_logs_warning_on_malformed_json(self, caplog): - """Malformed JSON in settings file should trigger warning log.""" - with tempfile.TemporaryDirectory() as tmpdir: - config = Config(data_dir=Path(tmpdir)) - config.settings_path.write_text("{", encoding="utf-8") - - with caplog.at_level(logging.WARNING): - config.load_settings() - - records = [r for r in caplog.records if r.name == "codexlens.config"] - assert any("Failed to load settings from" in r.message for r in records) - assert any("JSONDecodeError" in r.message for r in records) - assert any(str(config.settings_path) in r.message for r in records) - - def test_load_settings_logs_warning_on_permission_error(self, monkeypatch, caplog): - """Permission errors opening settings file should trigger warning log.""" - with tempfile.TemporaryDirectory() as tmpdir: - config = Config(data_dir=Path(tmpdir)) - config.settings_path.write_text("{}", encoding="utf-8") - - real_open = builtins.open - - def guarded_open(path, mode="r", *args, **kwargs): - if Path(path) == config.settings_path and "r" in mode: - raise PermissionError("Permission denied") - return real_open(path, mode, *args, **kwargs) - - monkeypatch.setattr(builtins, "open", guarded_open) - - with caplog.at_level(logging.WARNING): - config.load_settings() - - records = [r for r in caplog.records if r.name == "codexlens.config"] - assert any("Failed to load settings from" in r.message for r in records) - assert any("PermissionError" in r.message for r in records) - - def test_load_settings_loads_valid_settings_without_warning(self, caplog): - """Valid settings should load without warning logs.""" - with tempfile.TemporaryDirectory() as tmpdir: - config = Config(data_dir=Path(tmpdir)) - config.settings_path.write_text( - json.dumps( - { - "embedding": { - "backend": "fastembed", - "model": "multilingual", - "use_gpu": False, - }, - "llm": { - "enabled": True, - "tool": "gemini", - "timeout_ms": 1234, - "batch_size": 7, - }, - } - ), - encoding="utf-8", - ) - - with caplog.at_level(logging.WARNING): - config.load_settings() - - records = [r for r in caplog.records if r.name == "codexlens.config"] - assert not records - assert config.embedding_backend == "fastembed" - assert config.embedding_model == "multilingual" - assert config.embedding_use_gpu is False - assert config.llm_enabled is True - assert config.llm_tool == "gemini" - assert config.llm_timeout_ms == 1234 - assert config.llm_batch_size == 7 - - def test_load_settings_logs_warning_on_invalid_embedding_backend(self, caplog): - """Invalid embedding backend should trigger warning log and keep default.""" - with tempfile.TemporaryDirectory() as tmpdir: - config = Config(data_dir=Path(tmpdir)) - default_backend = config.embedding_backend - config.settings_path.write_text( - json.dumps({"embedding": {"backend": "invalid-backend"}}), - encoding="utf-8", - ) - - with caplog.at_level(logging.WARNING): - config.load_settings() - - records = [r for r in caplog.records if r.name == "codexlens.config"] - assert any("Invalid embedding backend in" in r.message for r in records) - assert config.embedding_backend == default_backend - - -class TestWorkspaceConfig: - """Tests for WorkspaceConfig class.""" - - def test_create_workspace_config(self): - """Test creating a workspace config.""" - with tempfile.TemporaryDirectory() as tmpdir: - workspace = WorkspaceConfig(workspace_root=Path(tmpdir)) - assert workspace.workspace_root == Path(tmpdir).resolve() - - def test_codexlens_dir_property(self): - """Test codexlens_dir property.""" - with tempfile.TemporaryDirectory() as tmpdir: - workspace = WorkspaceConfig(workspace_root=Path(tmpdir)) - assert workspace.codexlens_dir == Path(tmpdir).resolve() / WORKSPACE_DIR_NAME - - def test_db_path_property(self): - """Test db_path property.""" - with tempfile.TemporaryDirectory() as tmpdir: - workspace = WorkspaceConfig(workspace_root=Path(tmpdir)) - expected = Path(tmpdir).resolve() / WORKSPACE_DIR_NAME / "index.db" - assert workspace.db_path == expected - - def test_cache_dir_property(self): - """Test cache_dir property.""" - with tempfile.TemporaryDirectory() as tmpdir: - workspace = WorkspaceConfig(workspace_root=Path(tmpdir)) - expected = Path(tmpdir).resolve() / WORKSPACE_DIR_NAME / "cache" - assert workspace.cache_dir == expected - - def test_initialize_creates_directories(self): - """Test initialize creates .codexlens directory structure.""" - with tempfile.TemporaryDirectory() as tmpdir: - workspace = WorkspaceConfig(workspace_root=Path(tmpdir)) - workspace.initialize() - - assert workspace.codexlens_dir.exists() - assert workspace.cache_dir.exists() - assert (workspace.codexlens_dir / ".gitignore").exists() - - def test_initialize_creates_gitignore(self): - """Test initialize creates .gitignore with correct content.""" - with tempfile.TemporaryDirectory() as tmpdir: - workspace = WorkspaceConfig(workspace_root=Path(tmpdir)) - workspace.initialize() - - gitignore = workspace.codexlens_dir / ".gitignore" - content = gitignore.read_text() - assert "cache/" in content - - def test_exists_false_when_not_initialized(self): - """Test exists returns False when not initialized.""" - with tempfile.TemporaryDirectory() as tmpdir: - workspace = WorkspaceConfig(workspace_root=Path(tmpdir)) - assert not workspace.exists() - - def test_exists_true_when_initialized_with_db(self): - """Test exists returns True when initialized with db.""" - with tempfile.TemporaryDirectory() as tmpdir: - workspace = WorkspaceConfig(workspace_root=Path(tmpdir)) - workspace.initialize() - # Create the db file to simulate full initialization - workspace.db_path.write_text("") - assert workspace.exists() - - def test_from_path_finds_workspace(self): - """Test from_path finds existing workspace.""" - with tempfile.TemporaryDirectory() as tmpdir: - base = Path(tmpdir) - (base / WORKSPACE_DIR_NAME).mkdir() - - workspace = WorkspaceConfig.from_path(base) - assert workspace is not None - assert workspace.workspace_root == base.resolve() - - def test_from_path_returns_none_when_not_found(self): - """Test from_path returns None when no workspace found in isolated directory.""" - with tempfile.TemporaryDirectory() as tmpdir: - # Create isolated directory structure to avoid user's .codexlens - isolated = Path(tmpdir) / "a" / "b" / "c" - isolated.mkdir(parents=True) - workspace = WorkspaceConfig.from_path(isolated) - # May find user's .codexlens if it exists - if workspace is not None: - assert WORKSPACE_DIR_NAME not in str(isolated) - - def test_create_at_initializes_workspace(self): - """Test create_at creates and initializes workspace.""" - with tempfile.TemporaryDirectory() as tmpdir: - workspace = WorkspaceConfig.create_at(Path(tmpdir)) - assert workspace.codexlens_dir.exists() - assert workspace.cache_dir.exists() - - -class TestConfigEdgeCases: - """Edge case tests for configuration.""" - - def test_config_with_path_object(self): - """Test Config accepts Path objects.""" - with tempfile.TemporaryDirectory() as tmpdir: - config = Config(data_dir=Path(tmpdir)) - assert isinstance(config.data_dir, Path) - - def test_config_expands_user_path(self): - """Test Config expands ~ in paths.""" - with tempfile.TemporaryDirectory() as tmpdir: - os.environ["CODEXLENS_DATA_DIR"] = tmpdir - try: - config = Config() - # Just verify it doesn't crash and returns a resolved path - assert config.data_dir.is_absolute() - finally: - del os.environ["CODEXLENS_DATA_DIR"] - - def test_workspace_config_from_subdir(self): - """Test WorkspaceConfig.from_path works from subdirectory.""" - with tempfile.TemporaryDirectory() as tmpdir: - base = Path(tmpdir) - (base / WORKSPACE_DIR_NAME).mkdir() - deep_subdir = base / "a" / "b" / "c" / "d" - deep_subdir.mkdir(parents=True) - - workspace = WorkspaceConfig.from_path(deep_subdir) - assert workspace is not None - assert workspace.workspace_root == base.resolve() diff --git a/codex-lens/tests/test_config_cascade.py b/codex-lens/tests/test_config_cascade.py deleted file mode 100644 index d1c690e0..00000000 --- a/codex-lens/tests/test_config_cascade.py +++ /dev/null @@ -1,155 +0,0 @@ -"""Unit tests for Config cascade settings validation. - -Tests cover: -- Default cascade_strategy value -- Valid cascade strategies accepted by load_settings -- Invalid cascade strategy fallback behavior -- Staged cascade config defaults -""" - -from __future__ import annotations - -import json -import tempfile -from pathlib import Path -from unittest.mock import patch - -import pytest - -from codexlens.config import Config - - -# ============================================================================= -# Test Fixtures -# ============================================================================= - - -@pytest.fixture -def temp_config_dir(): - """Create temporary directory for config data_dir.""" - tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True) - yield Path(tmpdir.name) - try: - tmpdir.cleanup() - except (PermissionError, OSError): - pass - - -# ============================================================================= -# Tests: cascade config defaults -# ============================================================================= - - -class TestConfigCascadeDefaults: - """Tests for Config cascade-related defaults and load_settings().""" - - def test_default_cascade_strategy(self, temp_config_dir): - """Default cascade_strategy should be 'binary'.""" - config = Config(data_dir=temp_config_dir) - assert config.cascade_strategy == "binary" - - def test_valid_cascade_strategies(self, temp_config_dir): - """load_settings should accept all valid cascade strategies.""" - valid_strategies = ["binary", "binary_rerank", "dense_rerank", "staged"] - - for strategy in valid_strategies: - config = Config(data_dir=temp_config_dir) - settings = {"cascade": {"strategy": strategy}} - - settings_path = config.settings_path - settings_path.parent.mkdir(parents=True, exist_ok=True) - with open(settings_path, "w", encoding="utf-8") as f: - json.dump(settings, f) - - with patch.object(config, "_apply_env_overrides"): - config.load_settings() - - assert config.cascade_strategy == strategy, ( - f"Strategy '{strategy}' should be accepted" - ) - - def test_invalid_cascade_strategy_fallback(self, temp_config_dir): - """Invalid cascade strategy should keep default (not crash).""" - config = Config(data_dir=temp_config_dir) - settings = {"cascade": {"strategy": "invalid_strategy"}} - - settings_path = config.settings_path - settings_path.parent.mkdir(parents=True, exist_ok=True) - with open(settings_path, "w", encoding="utf-8") as f: - json.dump(settings, f) - - with patch.object(config, "_apply_env_overrides"): - config.load_settings() - - # Should keep the default "binary" strategy - assert config.cascade_strategy == "binary" - - def test_hybrid_cascade_strategy_alias_maps_to_binary_rerank(self, temp_config_dir): - """Hybrid is a backward-compat alias for binary_rerank.""" - config = Config(data_dir=temp_config_dir) - settings = {"cascade": {"strategy": "hybrid"}} - - settings_path = config.settings_path - settings_path.parent.mkdir(parents=True, exist_ok=True) - with open(settings_path, "w", encoding="utf-8") as f: - json.dump(settings, f) - - with patch.object(config, "_apply_env_overrides"): - config.load_settings() - - assert config.cascade_strategy == "binary_rerank" - - def test_staged_config_defaults(self, temp_config_dir): - """Staged cascade settings should have correct defaults.""" - config = Config(data_dir=temp_config_dir) - assert config.staged_coarse_k == 200 - assert config.staged_lsp_depth == 2 - assert config.staged_stage2_mode == "precomputed" - assert config.staged_clustering_strategy == "auto" - assert config.staged_clustering_min_size == 3 - assert config.enable_staged_rerank is True - assert config.cascade_coarse_k == 100 - assert config.cascade_fine_k == 10 - - def test_staged_settings_load_from_settings_json(self, temp_config_dir): - """load_settings should load staged.* settings when present.""" - config = Config(data_dir=temp_config_dir) - settings = { - "staged": { - "coarse_k": 250, - "lsp_depth": 3, - "stage2_mode": "static_global_graph", - "realtime_lsp_timeout_s": 11.0, - "realtime_lsp_depth": 2, - "realtime_lsp_max_nodes": 42, - "realtime_lsp_max_seeds": 2, - "realtime_lsp_max_concurrent": 4, - "realtime_lsp_warmup_s": 0.5, - "realtime_lsp_resolve_symbols": True, - "clustering_strategy": "path", - "clustering_min_size": 7, - "enable_rerank": False, - } - } - - settings_path = config.settings_path - settings_path.parent.mkdir(parents=True, exist_ok=True) - with open(settings_path, "w", encoding="utf-8") as f: - json.dump(settings, f) - - with patch.object(config, "_apply_env_overrides"): - config.load_settings() - - assert config.staged_coarse_k == 250 - assert config.staged_lsp_depth == 3 - assert config.staged_stage2_mode == "static_global_graph" - assert config.staged_realtime_lsp_timeout_s == 11.0 - assert config.staged_realtime_lsp_depth == 2 - assert config.staged_realtime_lsp_max_nodes == 42 - assert config.staged_realtime_lsp_max_seeds == 2 - assert config.staged_realtime_lsp_max_concurrent == 4 - assert config.staged_realtime_lsp_warmup_s == 0.5 - assert config.staged_realtime_lsp_resolve_symbols is True - assert config.staged_clustering_strategy == "path" - assert config.staged_clustering_min_size == 7 - assert config.enable_staged_rerank is False diff --git a/codex-lens/tests/test_config_ignore_patterns.py b/codex-lens/tests/test_config_ignore_patterns.py deleted file mode 100644 index 5cc356a4..00000000 --- a/codex-lens/tests/test_config_ignore_patterns.py +++ /dev/null @@ -1,25 +0,0 @@ -from __future__ import annotations - -import json -from pathlib import Path - -from codexlens.config import Config - - -def test_load_settings_reads_ignore_patterns_and_extension_filters(tmp_path: Path) -> None: - settings_path = tmp_path / "settings.json" - settings_path.write_text( - json.dumps( - { - "ignore_patterns": ["frontend/dist", "coverage"], - "extension_filters": ["*.min.js", "*.map"], - } - ), - encoding="utf-8", - ) - - config = Config(data_dir=tmp_path) - config.load_settings() - - assert config.ignore_patterns == ["frontend/dist", "coverage"] - assert config.extension_filters == ["*.min.js", "*.map"] diff --git a/codex-lens/tests/test_config_search_env_overrides.py b/codex-lens/tests/test_config_search_env_overrides.py deleted file mode 100644 index f49d2880..00000000 --- a/codex-lens/tests/test_config_search_env_overrides.py +++ /dev/null @@ -1,83 +0,0 @@ -"""Unit tests for Config .env overrides for final search ranking penalties.""" - -from __future__ import annotations - -import tempfile -from pathlib import Path - -import pytest - -from codexlens.config import Config - - -@pytest.fixture -def temp_config_dir() -> Path: - """Create temporary directory for config data_dir.""" - tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True) - yield Path(tmpdir.name) - try: - tmpdir.cleanup() - except (PermissionError, OSError): - pass - - -def test_search_penalty_env_overrides_apply(temp_config_dir: Path) -> None: - config = Config(data_dir=temp_config_dir) - - env_path = temp_config_dir / ".env" - env_path.write_text( - "\n".join( - [ - "TEST_FILE_PENALTY=0.25", - "GENERATED_FILE_PENALTY=0.4", - "", - ] - ), - encoding="utf-8", - ) - - config.load_settings() - - assert config.test_file_penalty == 0.25 - assert config.generated_file_penalty == 0.4 - - -def test_reranker_gpu_env_override_apply(temp_config_dir: Path) -> None: - config = Config(data_dir=temp_config_dir) - - env_path = temp_config_dir / ".env" - env_path.write_text( - "\n".join( - [ - "RERANKER_USE_GPU=false", - "", - ] - ), - encoding="utf-8", - ) - - config.load_settings() - - assert config.reranker_use_gpu is False - - -def test_search_penalty_env_overrides_invalid_ignored(temp_config_dir: Path) -> None: - config = Config(data_dir=temp_config_dir) - - env_path = temp_config_dir / ".env" - env_path.write_text( - "\n".join( - [ - "TEST_FILE_PENALTY=oops", - "GENERATED_FILE_PENALTY=nope", - "", - ] - ), - encoding="utf-8", - ) - - config.load_settings() - - assert config.test_file_penalty == 0.15 - assert config.generated_file_penalty == 0.35 - assert config.reranker_use_gpu is True diff --git a/codex-lens/tests/test_config_staged_env_overrides.py b/codex-lens/tests/test_config_staged_env_overrides.py deleted file mode 100644 index cfe9555a..00000000 --- a/codex-lens/tests/test_config_staged_env_overrides.py +++ /dev/null @@ -1,136 +0,0 @@ -"""Unit tests for Config .env overrides for staged/cascade settings.""" - -from __future__ import annotations - -import tempfile -from pathlib import Path - -import pytest - -from codexlens.config import Config - - -@pytest.fixture -def temp_config_dir() -> Path: - """Create temporary directory for config data_dir.""" - tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True) - yield Path(tmpdir.name) - try: - tmpdir.cleanup() - except (PermissionError, OSError): - pass - - -def test_staged_env_overrides_apply(temp_config_dir: Path) -> None: - config = Config(data_dir=temp_config_dir) - - env_path = temp_config_dir / ".env" - env_path.write_text( - "\n".join( - [ - "ENABLE_CASCADE_SEARCH=true", - "CASCADE_STRATEGY=staged", - "CASCADE_COARSE_K=111", - "CASCADE_FINE_K=7", - "STAGED_STAGE2_MODE=realtime", - "STAGED_CLUSTERING_STRATEGY=path", - "STAGED_CLUSTERING_MIN_SIZE=5", - "ENABLE_STAGED_RERANK=false", - "STAGED_REALTIME_LSP_TIMEOUT_S=12.5", - "STAGED_REALTIME_LSP_DEPTH=2", - "STAGED_REALTIME_LSP_MAX_NODES=123", - "STAGED_REALTIME_LSP_MAX_SEEDS=3", - "STAGED_REALTIME_LSP_MAX_CONCURRENT=4", - "STAGED_REALTIME_LSP_WARMUP_S=0.25", - "STAGED_REALTIME_LSP_RESOLVE_SYMBOLS=yes", - "", - ] - ), - encoding="utf-8", - ) - - config.load_settings() - - assert config.enable_cascade_search is True - assert config.cascade_strategy == "staged" - assert config.cascade_coarse_k == 111 - assert config.cascade_fine_k == 7 - - assert config.staged_stage2_mode == "realtime" - assert config.staged_clustering_strategy == "path" - assert config.staged_clustering_min_size == 5 - assert config.enable_staged_rerank is False - assert config.staged_realtime_lsp_timeout_s == 12.5 - assert config.staged_realtime_lsp_depth == 2 - assert config.staged_realtime_lsp_max_nodes == 123 - assert config.staged_realtime_lsp_max_seeds == 3 - assert config.staged_realtime_lsp_max_concurrent == 4 - assert config.staged_realtime_lsp_warmup_s == 0.25 - assert config.staged_realtime_lsp_resolve_symbols is True - - -def test_staged_env_overrides_prefixed_wins(temp_config_dir: Path) -> None: - config = Config(data_dir=temp_config_dir) - - env_path = temp_config_dir / ".env" - env_path.write_text( - "\n".join( - [ - "STAGED_CLUSTERING_STRATEGY=score", - "CODEXLENS_STAGED_CLUSTERING_STRATEGY=path", - "STAGED_STAGE2_MODE=precomputed", - "CODEXLENS_STAGED_STAGE2_MODE=realtime", - "", - ] - ), - encoding="utf-8", - ) - - config.load_settings() - - assert config.staged_clustering_strategy == "path" - assert config.staged_stage2_mode == "realtime" - - -def test_staged_env_overrides_invalid_ignored(temp_config_dir: Path) -> None: - config = Config(data_dir=temp_config_dir) - - env_path = temp_config_dir / ".env" - env_path.write_text( - "\n".join( - [ - "STAGED_STAGE2_MODE=bogus", - "STAGED_CLUSTERING_STRATEGY=embedding_remote", - "STAGED_REALTIME_LSP_TIMEOUT_S=nope", - "CASCADE_STRATEGY=???", - "", - ] - ), - encoding="utf-8", - ) - - config.load_settings() - - assert config.cascade_strategy == "binary" - assert config.staged_stage2_mode == "precomputed" - assert config.staged_clustering_strategy == "auto" - assert config.staged_realtime_lsp_timeout_s == 30.0 - - -def test_cascade_strategy_hybrid_alias_env_override(temp_config_dir: Path) -> None: - config = Config(data_dir=temp_config_dir) - - env_path = temp_config_dir / ".env" - env_path.write_text( - "\n".join( - [ - "CASCADE_STRATEGY=hybrid", - "", - ] - ), - encoding="utf-8", - ) - - config.load_settings() - - assert config.cascade_strategy == "binary_rerank" diff --git a/codex-lens/tests/test_deepwiki_store.py b/codex-lens/tests/test_deepwiki_store.py deleted file mode 100644 index 1d58a8bb..00000000 --- a/codex-lens/tests/test_deepwiki_store.py +++ /dev/null @@ -1,410 +0,0 @@ -"""Unit tests for DeepWikiStore.""" - -from __future__ import annotations - -import hashlib -import tempfile -from datetime import datetime -from pathlib import Path - -import pytest - -from codexlens.storage.deepwiki_store import DeepWikiStore -from codexlens.storage.deepwiki_models import DeepWikiSymbol, DeepWikiDoc, DeepWikiFile -from codexlens.errors import StorageError - - -from codexlens.storage.deepwiki_store import DeepWikiStore - - -from codexlens.storage.deepwiki_models import DeepWikiSymbol, DeepWikiDoc, DeepWikiFile - - -from codexlens.errors import StorageError - - -import pytest - - -from codexlens.storage.deepwiki_store import DeepWikiStore -from codexlens.storage.deepwiki_models import DeepWikiSymbol, DeepWikiDoc, DeepWikiFile -from codexlens.errors import StorageError - -from pathlib import Path -import tempfile - - -from datetime import datetime - - -from codexlens.storage.deepwiki_store import DeepWikiStore -from codexlens.storage.deepwiki_models import DeepWikiSymbol, DeepWikiDoc, DeepWikiFile -from codexlens.errors import StorageError - - -import os - -@pytest.fixture -def temp_db_path(tmp_path): - """Create a temporary database file.""" - db_file = tmp_path / "deepwiki_test.db" - return str(db_file) - - return DeepWikiStore(db_path=db_file) - - - def test_initialize_creates_schema(self): - store = DeepWikiStore(db_path=db_file) - assert Path.exists(db_file) - assert store.db_path == to str(db_file) - with store: - conn = store._get_connection() - - # Check schema was created - cursor = conn.execute( - "SELECT name FROM sqlite_master WHERE type='table' AND name='deepwiki_files'" - ).fetchone() - assert cursor is not None - - cursor = conn.execute( - "SELECT name FROM sqlite_master WHERE type='table' AND name='deepwiki_docs'" - ).fetchone() - assert cursor is not None - - cursor = conn.execute( - "SELECT name FROM sqlite_master WHERE type='table' AND name='deepwiki_symbols'" - ).fetchone() - assert cursor is not None - - # Check deepwiki_schema table - cursor = conn.execute( - "SELECT name FROM sqlite_master WHERE type='table' AND name='deepwiki_schema'" - ).fetchone() - assert cursor is not None - - # Verify version was inserted - row = cursor.execute( - "SELECT version FROM deepwiki_schema" - ).fetchone() - assert row is not None - assert row["version"] == 1 - - # Check deepwiki_files table - cursor = conn.execute( - "PRAGMA table_info(deepwiki_files)" - ).fetchall() - expected_columns = {"id", "path", "content_hash", "last_indexed", "symbols_count", "docs_generated"} - assert expected_columns == {"id", "path", "content_hash", "last_indexed", "symbols_count", "docs_generated"} - assert len(expected_columns) == 4 - - # Check deepwiki_docs table - cursor = conn.execute( - "PRAGMA table_info(deepwiki_docs)" - ).fetchall() - expected_columns = {"id", "path", "content_hash", "symbols", "generated_at", "llm_tool"} - assert len(expected_columns) == 6 - - # Check deepwiki_symbols table - cursor = conn.execute( - "PRAGMA table_info(deepwiki_symbols)" - ).fetchall() - expected_columns == { - "id", - "name", - "type", - "source_file", - "doc_file", - "anchor", - "start_line", - "end_line", - "created_at", - "updated_at", - } - assert len(expected_columns) == 12 - - # Check indexes - for idx_name in ["idx_deepwiki_files_path", "idx_deepwiki_files_hash", - "idx_deepwiki_docs_path", "idx_deepwiki_symbols_name", - "idx_deepwiki_symbols_source", "idx_deepwiki_symbols_doc"]: - assert cursor is not None - - def test_add_file(self, temp_db_path): - """Test add_file creates a file record.""" - store = DeepWikiStore(db_path=db_file) - test_file = tmp_path / "test_file.py" - content = "test file content" - store.add_file(test_file) - - # Verify file was added - retrieved_file = store.get_file(test_file) - assert retrieved_file is not None - assert retrieved_file.path == str(test_file) - assert retrieved_file.content_hash == content_hash - assert retrieved_file.symbols_count == 1 - assert retrieved_file.docs_generated is False - - # Verify last_indexed - assert retrieved_file.last_indexed is not None - assert isinstance(retrieved_file.last_indexed, datetime) - - - # Verify symbols_count was updated - assert retrieved_file.symbols_count == 1 - - def test_get_file_hash(self, temp_db_path): - """Test get_file_hash returns correct hash.""" - test_file = tmp_path / "test_hash.py" - content_hash = store.compute_file_hash(test_file) - - # File not in DB yet - retrieved_hash = store.get_file_hash(test_file) - assert retrieved_hash is None - - # Create the test file - test_file2 = tmp_path / "test_file2.py" - test_file2.write_text("test file 2") - store.add_file(test_file2) - - # Now get_file_hash should work - retrieved_hash2 = store.get_file_hash(test_file2) - assert retrieved_hash2 is not None - assert retrieved_hash2 == content_hash - - # Verify get_file_hash returns None for unknown file - unknown_file = tmp_path / "unknown_file.txt" - retrieved_hash = store.get_file_hash(unknown_file) - assert retrieved_hash is None - - def test_get_symbols_for_file(self, temp_db_path): - """Test get_symbols_for_file returns symbols for a source file.""" - test_file = tmp_path / "test_source.py" - content = """Test source file with multiple symbols.""" -def test(source_file: str) -> Path: - return Path(source_file) - - # Create test file with multiple symbols - store.add_file(test_file) - for i in range(3): - symbols_data.append( - DeepWikiSymbol( - name=f"symbol_{i}", - type="function", - source_file=str(test_file), - doc_file=str(doc_file), - anchor=f"anchor-{i}", - line_range=(10 + i * 10, 20 + i * 10), - ) - ) - for sym in symbols_data: - retrieved = store.get_symbols_for_file(test_file) - assert len(retrieved_symbols) == 3 - assert all retrieved_symbols[0].source_file == str(test_file) - assert retrieved_symbols[0].line_range == (10, 20) - assert retrieved_symbols[0].doc_file == str(doc_file) - - # Verify first symbol has correct line_range - symbol = retrieved_symbols[0] - assert isinstance(symbol.line_range, tuple) - assert symbol.line_range[0] == 10 - assert symbol.line_range[1] == 20 - - # Verify get_file returns None for unknown file - retrieved_file = store.get_file(str(tmp_path / "nonexistent.py")) - assert retrieved_file is None - - def test_update_file_hash(self, temp_db_path): - """Test update_file_hash updates the hash for a tracked file.""" - test_file = tmp_path / "test_source.py" - content = """Test source file for update_file_hash.""" -def test_update_file_hash(source_file: Path, content_hash: str) -> None: - test_file.write_text("test file content") - store.add_file(test_file) - content_hash = store.compute_file_hash(test_file) - - # Update the hash - store.update_file_hash(test_file, content_hash) - - # Verify hash was updated - retrieved_hash = store.get_file_hash(test_file) - assert retrieved_hash == content_hash - - # Verify update with unchanged hash does nothing - store.update_file_hash(test_file, content_hash) - retrieved_hash2 = store.get_file_hash(test_file) - assert retrieved_hash == content_hash - - def test_remove_file(self, temp_db_path): - """Test remove_file removes file and associated symbols.""" - test_file = tmp_path / "test_source.py" - content = """Test source file for remove_file.""" - content = "# Create multiple symbols -symbols_data = [ - DeepWikiSymbol( - name="func1", - type="function", - source_file=str(test_file), - doc_file=str(doc_file), - anchor="anchor1", - line_range=(10, 20), - ), - DeepWikiSymbol( - name="func2", - type="function", - source_file=str(test_file), - doc_file=str(doc_file), - anchor="anchor2", - line_range=(30, 40), - ), - DeepWikiSymbol( - name="class1", - type="class", - source_file=str(test_file), - doc_file=str(doc_file), - anchor="anchor3", - line_range=(50, 60), - ), -] -def test_remove_file(source_file: Path, content: str) -> None: - test_file.write_text("test file content") - content_hash = store.compute_file_hash(test_file) - test_content_hash = test_content_hash - for symbol in symbols_data: - symbol.content_hash = test_content_hash - assert symbol.content_hash == content_hash - - # Add file to store - store.add_file(test_file) - symbols_data.append(symbol) - - # Add symbols - for symbol in symbols_data: - store.add_symbol(symbol) - - # Verify symbols were added - retrieved_symbols = store.get_symbols_for_file(test_file) - assert len(retrieved_symbols) == 3 - - # Verify first symbol - assert retrieved_symbols[0].name == "func1" - assert retrieved_symbols[0].type == "function" - assert retrieved_symbols[0].source_file == str(test_file) - assert retrieved_symbols[0].doc_file == str(doc_file) - assert retrieved_symbols[0].anchor == "anchor1" - assert retrieved_symbols[0].line_range == (10, 20) - - # Verify second symbol - assert retrieved_symbols[1].name == "func2" - assert retrieved_symbols[1].type == "function" - assert retrieved_symbols[1].source_file == str(test_file) - assert retrieved_symbols[1].doc_file == str(doc_file) - assert retrieved_symbols[1].anchor == "anchor2" - assert retrieved_symbols[1].line_range == (30, 40) - - # Verify third symbol - assert retrieved_symbols[2].name == "class1" - assert retrieved_symbols[2].type == "class" - assert retrieved_symbols[2].source_file == str(test_file) - assert retrieved_symbols[2].doc_file == str(doc_file) - assert retrieved_symbols[2].anchor == "anchor3" - assert retrieved_symbols[2].line_range == (50, 60) - - - # Verify remove_file deleted file and symbols - assert store.remove_file(test_file) is True - - # Verify symbols were deleted - remaining_symbols = store.get_symbols_for_file(test_file) - assert len(remaining_symbols) == 0 - - # Verify file was removed from database - with store: - conn = store._get_connection() - cursor = conn.execute( - "SELECT * FROM deepwiki_files WHERE path=?", - (str(test_file),) - ).fetchone() - assert cursor.fetchone() is None - - def test_compute_file_hash(self, temp_db_path): - """Test compute_file_hash returns correct SHA256 hash.""" - test_file = tmp_path / "test_hash.py" - content = """Test compute_file_hash.""" -def test_compute_file_hash(): - """Create a test file with known content.""" - test_file = tmp_path / "test_content.txt" - test_file.write_text("test content for hashing") - - # Compute hash - store = DeepWikiStore(db_path=temp_db_path) - computed_hash = store.compute_file_hash(test_file) - - assert computed_hash == "a" * 64 + 1" * 64 + 1" * 64 + 1" * 64 + 1" * 64 + 2" * 64 + 3" * 64 + 4" * 64 + 5" * 64 + 6" * 64 + 7" * 64 + 8" * 64 + 9" * 64 + "a" * 64 + "b" * 64 + 1" * 64 + 2" * 64 + 3" * 64 + 4" * 64 + 5" * 64 + 6" * 64 + 7" * 64 + 8" * 64 + 9" * 64 + "\n") - expected_hash = "a" * 64 + "b" * 64 + 1" * 64 + 2" * 64 + 3" * 64 + 4" * 64 + 5" * 64 + 6" * 64 + 7" * 64 + 8" * 64 + 9" * 64 - + hashlib.sha256(test_file.read_bytes()).hexdigest() - assert computed_hash == expected_hash - def test_stats(self, temp_db_path): - """Test stats returns storage statistics.""" - test_file = tmp_path / "test_stats.py" - content = """Test stats.""" -def test_stats(): - store = DeepWikiStore(db_path=temp_db_path) - store.initialize() - - stats = store.stats() - - assert stats["files"] == 1 - assert stats["symbols"] == 0 - assert stats["docs"] == 0 - assert stats["files_needing_docs"] == 1 - assert stats["db_path"] == str(temp_db_path / "deepwiki_test.db") - - # Close store - store.close() - - - # Verify files count - assert stats["files"] == 1 - # Verify symbols count - assert stats["symbols"] == 0 - # Verify docs count - assert stats["docs"] == 0 - # Verify files_needing_docs count - assert stats["files_needing_docs"] == 1 - # Verify db_path - assert stats["db_path"] == str(temp_db_path / "deepwiki_test.db") - - -def test_deepwiki_store_error_handling(): - """Test that DeepWikiStore handles Storage errors properly.""" - store = DeepWikiStore(db_path=temp_db_path) - - with pytest.raises(StorageError): - store._create_schema(conn) - - with pytest.raises(StorageError): - store.add_symbol( - DeepWikiSymbol( - name="test", - type="function", - source_file="test.py", - doc_file="test.md", - anchor="test-anchor", - line_range=(1, 10), - ) - ) - - # Test error handling on missing file - os.remove(test_file) - store.add_file(test_file) - - with pytest.raises(FileNotFoundError): - store.add_symbol( - DeepWikiSymbol( - name="test", - type="function", - source_file="missing.py", - doc_file="test.md", - anchor="test-anchor", - line_range=(1, 10), - ) - ) diff --git a/codex-lens/tests/test_deepwiki_types.py b/codex-lens/tests/test_deepwiki_types.py deleted file mode 100644 index 8c5a1c8f..00000000 --- a/codex-lens/tests/test_deepwiki_types.py +++ /dev/null @@ -1,14 +0,0 @@ -"""Unit tests for DeepWiki TypeScript types matching.""" - -from __future__ import annotations - -from pathlib import Path - -from ccw.src.types.deepwiki import ( - DeepWikiSymbol, - DeepWikiDoc, - DeepWikiFile, - DeepWikiStorageStats, -) - - diff --git a/codex-lens/tests/test_dual_fts.py b/codex-lens/tests/test_dual_fts.py deleted file mode 100644 index 1c3e1cb7..00000000 --- a/codex-lens/tests/test_dual_fts.py +++ /dev/null @@ -1,612 +0,0 @@ -"""Tests for Dual-FTS schema migration and functionality (P1). - -Tests dual FTS tables (files_fts_exact, files_fts_fuzzy) creation, trigger synchronization, -and migration from schema version 2 to version 4. -""" - -import sqlite3 -import tempfile -from pathlib import Path - -import pytest - -from codexlens.storage.dir_index import DirIndexStore - -# Check if pytest-benchmark is available -try: - import pytest_benchmark - BENCHMARK_AVAILABLE = True -except ImportError: - BENCHMARK_AVAILABLE = False - - -class TestDualFTSSchema: - """Tests for dual FTS schema creation and structure.""" - - @pytest.fixture - def temp_db(self): - """Create temporary database for testing.""" - with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: - db_path = Path(f.name) - yield db_path - # Cleanup - if db_path.exists(): - db_path.unlink() - - @pytest.fixture - def index_store(self, temp_db): - """Create DirIndexStore with initialized database.""" - store = DirIndexStore(temp_db) - store.initialize() - yield store - store.close() - - def test_files_fts_exact_table_exists(self, index_store): - """Test files_fts_exact FTS5 table is created.""" - with index_store._get_connection() as conn: - cursor = conn.execute( - "SELECT name FROM sqlite_master WHERE type='table' AND name='files_fts_exact'" - ) - result = cursor.fetchone() - assert result is not None, "files_fts_exact table should exist" - - def test_files_fts_fuzzy_table_exists(self, index_store): - """Test files_fts_fuzzy FTS5 table is created with trigram tokenizer.""" - with index_store._get_connection() as conn: - cursor = conn.execute( - "SELECT name FROM sqlite_master WHERE type='table' AND name='files_fts_fuzzy'" - ) - result = cursor.fetchone() - assert result is not None, "files_fts_fuzzy table should exist" - - def test_fts_exact_tokenizer(self, index_store): - """Test files_fts_exact uses unicode61 tokenizer.""" - with index_store._get_connection() as conn: - # Check table creation SQL - cursor = conn.execute( - "SELECT sql FROM sqlite_master WHERE name='files_fts_exact'" - ) - result = cursor.fetchone() - assert result is not None - sql = result[0] - # Should use unicode61 tokenizer - assert "unicode61" in sql.lower() or "fts5" in sql.lower() - - def test_fts_fuzzy_tokenizer_fallback(self, index_store): - """Test files_fts_fuzzy uses trigram or falls back to unicode61.""" - with index_store._get_connection() as conn: - cursor = conn.execute( - "SELECT sql FROM sqlite_master WHERE name='files_fts_fuzzy'" - ) - result = cursor.fetchone() - assert result is not None - sql = result[0] - # Should use trigram or unicode61 as fallback - assert "trigram" in sql.lower() or "unicode61" in sql.lower() - - def test_dual_fts_trigger_synchronization(self, index_store, temp_db): - """Test triggers keep dual FTS tables synchronized with files table.""" - # Insert test file - test_path = "test/example.py" - test_content = "def test_function():\n pass" - - with index_store._get_connection() as conn: - # Insert into files table - name = test_path.split('/')[-1] - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - (name, test_path, test_content, "python", 1234567890.0) - ) - conn.commit() - - # Check files_fts_exact has content - cursor = conn.execute( - "SELECT full_path, content FROM files_fts_exact WHERE full_path = ?", - (test_path,) - ) - exact_result = cursor.fetchone() - assert exact_result is not None, "files_fts_exact should have content via trigger" - assert exact_result[0] == test_path - assert exact_result[1] == test_content - - # Check files_fts_fuzzy has content - cursor = conn.execute( - "SELECT full_path, content FROM files_fts_fuzzy WHERE full_path = ?", - (test_path,) - ) - fuzzy_result = cursor.fetchone() - assert fuzzy_result is not None, "files_fts_fuzzy should have content via trigger" - assert fuzzy_result[0] == test_path - assert fuzzy_result[1] == test_content - - def test_dual_fts_update_trigger(self, index_store): - """Test UPDATE triggers synchronize dual FTS tables.""" - test_path = "test/update.py" - original_content = "original content" - updated_content = "updated content" - - with index_store._get_connection() as conn: - # Insert - name = test_path.split('/')[-1] - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - (name, test_path, original_content, "python", 1234567890.0) - ) - conn.commit() - - # Update content - conn.execute( - "UPDATE files SET content = ? WHERE full_path = ?", - (updated_content, test_path) - ) - conn.commit() - - # Verify FTS tables have updated content - cursor = conn.execute( - "SELECT content FROM files_fts_exact WHERE full_path = ?", - (test_path,) - ) - assert cursor.fetchone()[0] == updated_content - - cursor = conn.execute( - "SELECT content FROM files_fts_fuzzy WHERE full_path = ?", - (test_path,) - ) - assert cursor.fetchone()[0] == updated_content - - def test_dual_fts_delete_trigger(self, index_store): - """Test DELETE triggers remove entries from dual FTS tables.""" - test_path = "test/delete.py" - - with index_store._get_connection() as conn: - # Insert - name = test_path.split('/')[-1] - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - (name, test_path, "content", "python", 1234567890.0) - ) - conn.commit() - - # Delete - conn.execute("DELETE FROM files WHERE full_path = ?", (test_path,)) - conn.commit() - - # Verify FTS tables are cleaned up - cursor = conn.execute( - "SELECT COUNT(*) FROM files_fts_exact WHERE full_path = ?", - (test_path,) - ) - assert cursor.fetchone()[0] == 0 - - cursor = conn.execute( - "SELECT COUNT(*) FROM files_fts_fuzzy WHERE full_path = ?", - (test_path,) - ) - assert cursor.fetchone()[0] == 0 - - -class TestDualFTSMigration: - """Tests for schema migration to dual FTS (v2 → v4).""" - - @pytest.fixture - def v2_db(self): - """Create schema version 2 database (pre-dual-FTS).""" - with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: - db_path = Path(f.name) - - # Create v2 schema manually - conn = sqlite3.connect(db_path) - try: - # Set schema version using PRAGMA (not schema_version table) - conn.execute("PRAGMA user_version = 2") - - conn.executescript(""" - CREATE TABLE IF NOT EXISTS files ( - path TEXT PRIMARY KEY, - content TEXT, - language TEXT, - indexed_at TEXT - ); - - CREATE VIRTUAL TABLE IF NOT EXISTS files_fts USING fts5( - path, content, language, - content='files', content_rowid='rowid' - ); - """) - conn.commit() - finally: - conn.close() - - yield db_path - - # Cleanup - if db_path.exists(): - db_path.unlink() - - def test_migration_004_creates_dual_fts(self, v2_db): - """Test migration 004 creates dual FTS tables.""" - # Run migration - store = DirIndexStore(v2_db) - store.initialize() - - try: - # Verify tables exist - with store._get_connection() as conn: - cursor = conn.execute( - """SELECT name FROM sqlite_master - WHERE type='table' AND name IN ('files_fts_exact', 'files_fts_fuzzy')""" - ) - tables = [row[0] for row in cursor.fetchall()] - assert 'files_fts_exact' in tables, "Migration should create files_fts_exact" - assert 'files_fts_fuzzy' in tables, "Migration should create files_fts_fuzzy" - finally: - store.close() - - def test_migration_004_preserves_data(self, v2_db): - """Test migration preserves existing file data.""" - # Insert test data into v2 schema (using 'path' column) - conn = sqlite3.connect(v2_db) - test_files = [ - ("test/file1.py", "content1", "python"), - ("test/file2.js", "content2", "javascript"), - ] - conn.executemany( - "INSERT INTO files (path, content, language) VALUES (?, ?, ?)", - test_files - ) - conn.commit() - conn.close() - - # Run migration - store = DirIndexStore(v2_db) - store.initialize() - - try: - # Verify data preserved (should be migrated to full_path) - with store._get_connection() as conn: - cursor = conn.execute("SELECT full_path, content, language FROM files ORDER BY full_path") - result = [tuple(row) for row in cursor.fetchall()] - assert len(result) == 2 - assert result[0] == test_files[0] - assert result[1] == test_files[1] - finally: - store.close() - - def test_migration_004_updates_schema_version(self, v2_db): - """Test migration updates schema_version to 4.""" - # Run migration - store = DirIndexStore(v2_db) - store.initialize() - - try: - with store._get_connection() as conn: - # Check PRAGMA user_version (not schema_version table) - cursor = conn.execute("PRAGMA user_version") - version = cursor.fetchone()[0] - assert version >= 4, "Schema version should be upgraded to 4" - finally: - store.close() - - def test_migration_idempotent(self, v2_db): - """Test migration can run multiple times safely.""" - # Run migration twice - store1 = DirIndexStore(v2_db) - store1.initialize() # First migration - store1.close() - - store2 = DirIndexStore(v2_db) - store2.initialize() # Second migration (should be idempotent) - - try: - # Should not raise errors - with store2._get_connection() as conn: - cursor = conn.execute("SELECT COUNT(*) FROM files_fts_exact") - # Should work without errors - cursor.fetchone() - finally: - store2.close() - - -class TestTrigramAvailability: - """Tests for trigram tokenizer availability and fallback.""" - - @pytest.fixture - def temp_db(self): - """Create temporary database.""" - with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: - db_path = Path(f.name) - yield db_path - if db_path.exists(): - db_path.unlink() - - def test_trigram_detection(self, temp_db): - """Test system detects trigram tokenizer availability.""" - store = DirIndexStore(temp_db) - store.initialize() - - try: - # Check SQLite version and trigram support - with store._get_connection() as conn: - cursor = conn.execute("SELECT sqlite_version()") - version = cursor.fetchone()[0] - print(f"SQLite version: {version}") - - # Try to create trigram FTS table - try: - conn.execute(""" - CREATE VIRTUAL TABLE test_trigram USING fts5( - content, - tokenize='trigram' - ) - """) - trigram_available = True - except sqlite3.OperationalError: - trigram_available = False - - # Cleanup test table - if trigram_available: - conn.execute("DROP TABLE IF EXISTS test_trigram") - - # Verify fuzzy table uses appropriate tokenizer - with store._get_connection() as conn: - cursor = conn.execute( - "SELECT sql FROM sqlite_master WHERE name='files_fts_fuzzy'" - ) - result = cursor.fetchone() - assert result is not None - sql = result[0] - - if trigram_available: - assert "trigram" in sql.lower(), "Should use trigram when available" - else: - # Should fallback to unicode61 - assert "unicode61" in sql.lower() or "fts5" in sql.lower() - finally: - store.close() - - -@pytest.mark.benchmark -class TestDualFTSPerformance: - """Benchmark tests for dual FTS overhead.""" - - @pytest.fixture - def populated_db(self): - """Create database with test files.""" - with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: - db_path = Path(f.name) - - store = DirIndexStore(db_path) - store.initialize() - - # Insert 100 test files - with store._get_connection() as conn: - for i in range(100): - path = f"test/file{i}.py" - name = f"file{i}.py" - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - (name, path, f"def function{i}():\n pass", "python", 1234567890.0) - ) - conn.commit() - - # Close store before yielding to avoid conflicts - store.close() - - yield db_path - - # Cleanup - if db_path.exists(): - db_path.unlink() - - @pytest.mark.skipif(not BENCHMARK_AVAILABLE, reason="pytest-benchmark not installed") - def test_insert_overhead(self, populated_db, benchmark): - """Benchmark INSERT overhead with dual FTS triggers.""" - store = DirIndexStore(populated_db) - store.initialize() - - try: - def insert_file(): - with store._get_connection() as conn: - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - ("test.py", "benchmark/test.py", "content", "python", 1234567890.0) - ) - conn.commit() - # Cleanup - conn.execute("DELETE FROM files WHERE full_path = 'benchmark/test.py'") - conn.commit() - - # Should complete in reasonable time (<100ms) - result = benchmark(insert_file) - assert result < 0.1 # 100ms - finally: - store.close() - - def test_search_fts_exact(self, populated_db): - """Test search on files_fts_exact returns results.""" - store = DirIndexStore(populated_db) - store.initialize() - - try: - with store._get_connection() as conn: - # Search for "def" which is a complete token in all files - cursor = conn.execute( - """SELECT full_path, bm25(files_fts_exact) as score - FROM files_fts_exact - WHERE files_fts_exact MATCH 'def' - ORDER BY score - LIMIT 10""" - ) - results = cursor.fetchall() - assert len(results) > 0, "Should find matches in exact FTS" - # Verify BM25 scores (negative = better) - for full_path, score in results: - assert score < 0, "BM25 scores should be negative" - finally: - store.close() - - def test_search_fts_fuzzy(self, populated_db): - """Test search on files_fts_fuzzy returns results.""" - store = DirIndexStore(populated_db) - store.initialize() - - try: - with store._get_connection() as conn: - # Search for "def" which is a complete token in all files - cursor = conn.execute( - """SELECT full_path, bm25(files_fts_fuzzy) as score - FROM files_fts_fuzzy - WHERE files_fts_fuzzy MATCH 'def' - ORDER BY score - LIMIT 10""" - ) - results = cursor.fetchall() - assert len(results) > 0, "Should find matches in fuzzy FTS" - finally: - store.close() - - def test_fuzzy_substring_matching(self, populated_db): - """Test fuzzy search finds partial token matches with trigram.""" - store = DirIndexStore(populated_db) - store.initialize() - - try: - # Check if trigram is available - with store._get_connection() as conn: - cursor = conn.execute( - "SELECT sql FROM sqlite_master WHERE name='files_fts_fuzzy'" - ) - fts_sql = cursor.fetchone()[0] - has_trigram = 'trigram' in fts_sql.lower() - - if not has_trigram: - pytest.skip("Trigram tokenizer not available, skipping fuzzy substring test") - - # Search for partial token "func" should match "function0", "function1", etc. - cursor = conn.execute( - """SELECT full_path, bm25(files_fts_fuzzy) as score - FROM files_fts_fuzzy - WHERE files_fts_fuzzy MATCH 'func' - ORDER BY score - LIMIT 10""" - ) - results = cursor.fetchall() - - # With trigram, should find matches - assert len(results) > 0, "Fuzzy search with trigram should find partial token matches" - - # Verify results contain expected files with "function" in content - for path, score in results: - assert "file" in path # All test files named "test/fileN.py" - assert score < 0 # BM25 scores are negative - finally: - store.close() - - -class TestMigrationRecovery: - """Tests for migration failure recovery and edge cases.""" - - @pytest.fixture - def corrupted_v2_db(self): - """Create v2 database with incomplete migration state.""" - with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: - db_path = Path(f.name) - - conn = sqlite3.connect(db_path) - try: - # Create v2 schema with some data - conn.executescript(""" - PRAGMA user_version = 2; - - CREATE TABLE files ( - path TEXT PRIMARY KEY, - content TEXT, - language TEXT - ); - - INSERT INTO files VALUES ('test.py', 'content', 'python'); - - CREATE VIRTUAL TABLE files_fts USING fts5( - path, content, language, - content='files', content_rowid='rowid' - ); - """) - conn.commit() - finally: - conn.close() - - yield db_path - - if db_path.exists(): - db_path.unlink() - - def test_migration_preserves_data_on_failure(self, corrupted_v2_db): - """Test that data is preserved if migration encounters issues.""" - # Read original data - conn = sqlite3.connect(corrupted_v2_db) - cursor = conn.execute("SELECT path, content FROM files") - original_data = cursor.fetchall() - conn.close() - - # Attempt migration (may fail or succeed) - store = DirIndexStore(corrupted_v2_db) - try: - store.initialize() - except Exception: - # Even if migration fails, original data should be intact - pass - finally: - store.close() - - # Verify data still exists - conn = sqlite3.connect(corrupted_v2_db) - try: - # Check schema version to determine column name - cursor = conn.execute("PRAGMA user_version") - version = cursor.fetchone()[0] - - if version >= 4: - # Migration succeeded, use new column name - cursor = conn.execute("SELECT full_path, content FROM files WHERE full_path='test.py'") - else: - # Migration failed, use old column name - cursor = conn.execute("SELECT path, content FROM files WHERE path='test.py'") - - result = cursor.fetchone() - - # Data should still be there - assert result is not None, "Data should be preserved after migration attempt" - finally: - conn.close() - - def test_migration_idempotent_after_partial_failure(self, corrupted_v2_db): - """Test migration can be retried after partial failure.""" - store1 = DirIndexStore(corrupted_v2_db) - store2 = DirIndexStore(corrupted_v2_db) - - try: - # First attempt - try: - store1.initialize() - except Exception: - pass # May fail partially - - # Second attempt should succeed or fail gracefully - store2.initialize() # Should not crash - - # Verify database is in usable state - with store2._get_connection() as conn: - cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table'") - tables = [row[0] for row in cursor.fetchall()] - - # Should have files table (either old or new schema) - assert 'files' in tables - finally: - store1.close() - store2.close() - diff --git a/codex-lens/tests/test_embedder.py b/codex-lens/tests/test_embedder.py deleted file mode 100644 index 3d6850a1..00000000 --- a/codex-lens/tests/test_embedder.py +++ /dev/null @@ -1,85 +0,0 @@ -"""Tests for embedder cache concurrency.""" - -from __future__ import annotations - -import threading -import time - -import pytest - -import codexlens.semantic.embedder as embedder_module - - -def _patch_embedder_for_unit_tests(monkeypatch: pytest.MonkeyPatch) -> None: - """Make get_embedder() tests deterministic and fast (no model downloads).""" - - monkeypatch.setattr(embedder_module, "SEMANTIC_AVAILABLE", True) - monkeypatch.setattr(embedder_module, "get_optimal_providers", lambda *args, **kwargs: []) - monkeypatch.setattr(embedder_module, "is_gpu_available", lambda: False) - monkeypatch.setattr(embedder_module.Embedder, "_load_model", lambda self: None) - - -def test_embedder_instances_are_cached_and_reused(monkeypatch: pytest.MonkeyPatch) -> None: - _patch_embedder_for_unit_tests(monkeypatch) - embedder_module.clear_embedder_cache() - - first = embedder_module.get_embedder(profile="code", use_gpu=False) - second = embedder_module.get_embedder(profile="code", use_gpu=False) - - assert first is second - - -def test_concurrent_cache_access(monkeypatch: pytest.MonkeyPatch) -> None: - _patch_embedder_for_unit_tests(monkeypatch) - embedder_module.clear_embedder_cache() - - profiles = ["fast", "code", "balanced", "multilingual"] - for profile in profiles: - embedder_module.get_embedder(profile=profile, use_gpu=False) - - errors: list[BaseException] = [] - errors_lock = threading.Lock() - - def record_error(err: BaseException) -> None: - with errors_lock: - errors.append(err) - - worker_count = 20 - start_barrier = threading.Barrier(worker_count + 1) - stop_at = time.monotonic() + 1.0 - - def clear_worker() -> None: - try: - start_barrier.wait() - while time.monotonic() < stop_at: - embedder_module.clear_embedder_cache() - time.sleep(0) - except BaseException as err: - record_error(err) - - def access_worker(profile: str) -> None: - try: - start_barrier.wait() - while time.monotonic() < stop_at: - embedder_module.get_embedder(profile=profile, use_gpu=False) - except BaseException as err: - record_error(err) - - threads: list[threading.Thread] = [ - threading.Thread(target=clear_worker, name="clear-embedder-cache"), - ] - for idx in range(worker_count): - threads.append( - threading.Thread( - target=access_worker, - name=f"get-embedder-{idx}", - args=(profiles[idx % len(profiles)],), - ) - ) - - for thread in threads: - thread.start() - for thread in threads: - thread.join(timeout=10) - - assert not errors, f"Unexpected errors during concurrent access: {errors!r}" diff --git a/codex-lens/tests/test_embedding_backend_availability.py b/codex-lens/tests/test_embedding_backend_availability.py deleted file mode 100644 index 70fa5672..00000000 --- a/codex-lens/tests/test_embedding_backend_availability.py +++ /dev/null @@ -1,67 +0,0 @@ -"""Tests for embedding backend availability checks. - -These tests validate the logic used to decide whether embeddings generation -should run for a given backend (fastembed vs. litellm). -""" - -import pytest - - -def test_is_embedding_backend_available_invalid_backend(monkeypatch): - import codexlens.semantic as semantic - - ok, err = semantic.is_embedding_backend_available("nope") - assert ok is False - assert "Invalid embedding backend" in (err or "") - - -def test_is_embedding_backend_available_fastembed_true(monkeypatch): - import codexlens.semantic as semantic - - monkeypatch.setattr(semantic, "SEMANTIC_AVAILABLE", True) - ok, err = semantic.is_embedding_backend_available("fastembed") - assert ok is True - assert err is None - - -def test_is_embedding_backend_available_fastembed_false(monkeypatch): - import codexlens.semantic as semantic - - monkeypatch.setattr(semantic, "SEMANTIC_AVAILABLE", False) - monkeypatch.setattr(semantic, "_import_error", "fastembed missing") - ok, err = semantic.is_embedding_backend_available("fastembed") - assert ok is False - assert err == "fastembed missing" - - -def test_is_embedding_backend_available_litellm_true(monkeypatch): - import codexlens.semantic as semantic - - monkeypatch.setattr(semantic, "LITELLM_AVAILABLE", True) - ok, err = semantic.is_embedding_backend_available("litellm") - assert ok is True - assert err is None - - -def test_is_embedding_backend_available_litellm_false(monkeypatch): - import codexlens.semantic as semantic - - monkeypatch.setattr(semantic, "LITELLM_AVAILABLE", False) - ok, err = semantic.is_embedding_backend_available("litellm") - assert ok is False - assert "ccw-litellm not available" in (err or "") - - -def test_generate_embeddings_uses_backend_availability_gate(monkeypatch, tmp_path): - from codexlens.cli import embedding_manager - - monkeypatch.setattr( - embedding_manager, - "is_embedding_backend_available", - lambda _backend: (False, "blocked"), - ) - - result = embedding_manager.generate_embeddings(tmp_path / "_index.db", embedding_backend="litellm") - assert result["success"] is False - assert result["error"] == "blocked" - diff --git a/codex-lens/tests/test_embedding_status_root_model.py b/codex-lens/tests/test_embedding_status_root_model.py deleted file mode 100644 index 7314d205..00000000 --- a/codex-lens/tests/test_embedding_status_root_model.py +++ /dev/null @@ -1,204 +0,0 @@ -import gc -import gc -import shutil -import sqlite3 -import tempfile -import time -from pathlib import Path - -import pytest - -import codexlens.cli.embedding_manager as embedding_manager -from codexlens.cli.embedding_manager import get_embedding_stats_summary, get_embeddings_status - - -@pytest.fixture -def status_temp_dir() -> Path: - temp_path = Path(tempfile.mkdtemp()) - try: - yield temp_path - finally: - gc.collect() - for _ in range(5): - try: - if temp_path.exists(): - shutil.rmtree(temp_path) - break - except PermissionError: - time.sleep(0.1) - - -def _create_index_db(index_path: Path, files: list[str], embedded_files: list[str] | None = None) -> None: - index_path.parent.mkdir(parents=True, exist_ok=True) - with sqlite3.connect(index_path) as conn: - cursor = conn.cursor() - cursor.execute( - """ - CREATE TABLE files ( - id INTEGER PRIMARY KEY, - path TEXT NOT NULL UNIQUE, - content TEXT, - language TEXT, - hash TEXT - ) - """ - ) - cursor.executemany( - "INSERT INTO files (path, content, language, hash) VALUES (?, ?, ?, ?)", - [(file_path, "", "python", f"hash-{idx}") for idx, file_path in enumerate(files)], - ) - - if embedded_files is not None: - cursor.execute( - """ - CREATE TABLE semantic_chunks ( - id INTEGER PRIMARY KEY, - file_path TEXT NOT NULL, - content TEXT, - embedding BLOB, - metadata TEXT, - category TEXT - ) - """ - ) - cursor.executemany( - "INSERT INTO semantic_chunks (file_path, content, embedding, metadata, category) VALUES (?, ?, ?, ?, ?)", - [(file_path, "chunk", b"vec", "{}", "code") for file_path in embedded_files], - ) - conn.commit() - - -def _create_vectors_meta_db(meta_path: Path, embedded_files: list[str], binary_vector_count: int = 0) -> None: - meta_path.parent.mkdir(parents=True, exist_ok=True) - with sqlite3.connect(meta_path) as conn: - cursor = conn.cursor() - cursor.execute( - """ - CREATE TABLE chunk_metadata ( - chunk_id INTEGER PRIMARY KEY, - file_path TEXT NOT NULL, - content TEXT, - start_line INTEGER, - end_line INTEGER, - category TEXT, - metadata TEXT, - source_index_db TEXT - ) - """ - ) - cursor.execute( - """ - CREATE TABLE binary_vectors ( - chunk_id INTEGER PRIMARY KEY, - vector BLOB NOT NULL - ) - """ - ) - cursor.executemany( - """ - INSERT INTO chunk_metadata ( - chunk_id, file_path, content, start_line, end_line, category, metadata, source_index_db - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) - """, - [ - (idx, file_path, "chunk", 1, 1, "code", "{}", str(meta_path.parent / "_index.db")) - for idx, file_path in enumerate(embedded_files, start=1) - ], - ) - cursor.executemany( - "INSERT INTO binary_vectors (chunk_id, vector) VALUES (?, ?)", - [(idx, b"\x01") for idx in range(1, binary_vector_count + 1)], - ) - conn.commit() - - -def test_root_status_does_not_inherit_child_embeddings( - monkeypatch: pytest.MonkeyPatch, status_temp_dir: Path -) -> None: - workspace = status_temp_dir / "workspace" - workspace.mkdir() - _create_index_db(workspace / "_index.db", ["a.py", "b.py"]) - _create_index_db(workspace / "child" / "_index.db", ["child.py"], embedded_files=["child.py"]) - - monkeypatch.setattr( - embedding_manager, - "_get_model_info_from_index", - lambda index_path: { - "model_profile": "fast", - "model_name": "unit-test-model", - "embedding_dim": 384, - "backend": "fastembed", - "created_at": "2026-03-13T00:00:00Z", - "updated_at": "2026-03-13T00:00:00Z", - } if index_path.parent.name == "child" else None, - ) - - status = get_embeddings_status(workspace) - assert status["success"] is True - - result = status["result"] - assert result["coverage_percent"] == 0.0 - assert result["files_with_embeddings"] == 0 - assert result["root"]["has_embeddings"] is False - assert result["model_info"] is None - assert result["subtree"]["indexes_with_embeddings"] == 1 - assert result["subtree"]["coverage_percent"] > 0 - - -def test_root_status_uses_validated_centralized_metadata(status_temp_dir: Path) -> None: - workspace = status_temp_dir / "workspace" - workspace.mkdir() - _create_index_db(workspace / "_index.db", ["a.py", "b.py"]) - _create_vectors_meta_db(workspace / "_vectors_meta.db", ["a.py"]) - (workspace / "_vectors.hnsw").write_bytes(b"hnsw") - - status = get_embeddings_status(workspace) - assert status["success"] is True - - result = status["result"] - assert result["coverage_percent"] == 50.0 - assert result["files_with_embeddings"] == 1 - assert result["total_chunks"] == 1 - assert result["root"]["has_embeddings"] is True - assert result["root"]["storage_mode"] == "centralized" - assert result["centralized"]["dense_ready"] is True - assert result["centralized"]["usable"] is True - - -def test_embedding_stats_summary_skips_ignored_artifact_indexes(status_temp_dir: Path) -> None: - workspace = status_temp_dir / "workspace" - workspace.mkdir() - _create_index_db(workspace / "_index.db", ["root.py"]) - _create_index_db(workspace / "src" / "_index.db", ["src.py"]) - _create_index_db(workspace / "dist" / "_index.db", ["bundle.py"], embedded_files=["bundle.py"]) - _create_index_db(workspace / ".workflow" / "_index.db", ["trace.py"], embedded_files=["trace.py"]) - - summary = get_embedding_stats_summary(workspace) - - assert summary["success"] is True - result = summary["result"] - assert result["total_indexes"] == 2 - assert {Path(item["path"]).relative_to(workspace).as_posix() for item in result["indexes"]} == { - "_index.db", - "src/_index.db", - } - - -def test_root_status_ignores_empty_centralized_artifacts(status_temp_dir: Path) -> None: - workspace = status_temp_dir / "workspace" - workspace.mkdir() - _create_index_db(workspace / "_index.db", ["a.py", "b.py"]) - _create_vectors_meta_db(workspace / "_vectors_meta.db", []) - (workspace / "_vectors.hnsw").write_bytes(b"hnsw") - (workspace / "_binary_vectors.mmap").write_bytes(b"mmap") - - status = get_embeddings_status(workspace) - assert status["success"] is True - - result = status["result"] - assert result["coverage_percent"] == 0.0 - assert result["files_with_embeddings"] == 0 - assert result["root"]["has_embeddings"] is False - assert result["centralized"]["chunk_metadata_rows"] == 0 - assert result["centralized"]["binary_vector_rows"] == 0 - assert result["centralized"]["usable"] is False diff --git a/codex-lens/tests/test_encoding.py b/codex-lens/tests/test_encoding.py deleted file mode 100644 index 253f82c9..00000000 --- a/codex-lens/tests/test_encoding.py +++ /dev/null @@ -1,372 +0,0 @@ -"""Tests for encoding detection module (P1). - -Tests chardet integration, UTF-8 fallback behavior, confidence thresholds, -and safe file reading with error replacement. -""" - -import tempfile -from pathlib import Path -from unittest.mock import Mock, patch - -import pytest - -from codexlens.parsers.encoding import ( - ENCODING_DETECTION_AVAILABLE, - check_encoding_available, - detect_encoding, - is_binary_file, - read_file_safe, -) - - -class TestEncodingDetectionAvailability: - """Tests for encoding detection feature availability.""" - - def test_encoding_available_flag(self): - """Test ENCODING_DETECTION_AVAILABLE flag is boolean.""" - assert isinstance(ENCODING_DETECTION_AVAILABLE, bool) - - def test_check_encoding_available_returns_tuple(self): - """Test check_encoding_available returns (available, error_message).""" - available, error_msg = check_encoding_available() - assert isinstance(available, bool) - if not available: - assert isinstance(error_msg, str) - assert "chardet" in error_msg.lower() or "install" in error_msg.lower() - else: - assert error_msg is None - - -class TestDetectEncoding: - """Tests for detect_encoding function.""" - - def test_detect_utf8_content(self): - """Test detection of UTF-8 encoded content.""" - content = "Hello, World! 你好世界".encode("utf-8") - encoding = detect_encoding(content) - # Should detect UTF-8 or use UTF-8 as fallback - assert encoding.lower() in ["utf-8", "utf8"] - - def test_detect_latin1_content(self): - """Test detection of ISO-8859-1 encoded content.""" - content = "Héllo, Wörld! Ñoño".encode("iso-8859-1") - encoding = detect_encoding(content) - # Should detect ISO-8859-1 or fallback to UTF-8 - assert isinstance(encoding, str) - assert len(encoding) > 0 - - def test_detect_gbk_content(self): - """Test detection of GBK encoded content.""" - content = "你好世界 测试文本".encode("gbk") - encoding = detect_encoding(content) - # Should detect GBK or fallback to UTF-8 - assert isinstance(encoding, str) - if ENCODING_DETECTION_AVAILABLE: - # With chardet, should detect CJK encoding or UTF-8 (chardet may detect similar encodings) - valid_encodings = ["gbk", "gb2312", "gb18030", "big5", "utf-8", "utf8", "cp949", "euc-kr", "iso-8859-1"] - assert encoding.lower() in valid_encodings, f"Got unexpected encoding: {encoding}" - else: - # Without chardet, should fallback to UTF-8 - assert encoding.lower() in ["utf-8", "utf8"] - - def test_empty_content_returns_utf8(self): - """Test empty content returns UTF-8 fallback.""" - encoding = detect_encoding(b"") - assert encoding.lower() in ["utf-8", "utf8"] - - @pytest.mark.skipif(not ENCODING_DETECTION_AVAILABLE, reason="chardet not installed") - def test_confidence_threshold_filtering(self): - """Test low-confidence detections are rejected and fallback to UTF-8.""" - # Use sys.modules to mock chardet.detect - import sys - if 'chardet' not in sys.modules: - pytest.skip("chardet not available") - - import chardet - - with patch.object(chardet, "detect") as mock_detect: - mock_detect.return_value = { - "encoding": "windows-1252", - "confidence": 0.3 # Below default threshold of 0.7 - } - content = b"some text" - encoding = detect_encoding(content, confidence_threshold=0.7) - # Should fallback to UTF-8 due to low confidence - assert encoding.lower() in ["utf-8", "utf8"] - - @pytest.mark.skipif(not ENCODING_DETECTION_AVAILABLE, reason="chardet not installed") - def test_high_confidence_accepted(self): - """Test high-confidence detections are accepted.""" - import sys - if 'chardet' not in sys.modules: - pytest.skip("chardet not available") - - import chardet - - with patch.object(chardet, "detect") as mock_detect: - mock_detect.return_value = { - "encoding": "utf-8", - "confidence": 0.95 # Above threshold - } - content = b"some text" - encoding = detect_encoding(content, confidence_threshold=0.7) - assert encoding.lower() in ["utf-8", "utf8"] - - @pytest.mark.skipif(not ENCODING_DETECTION_AVAILABLE, reason="chardet not installed") - def test_chardet_exception_fallback(self): - """Test chardet exceptions trigger UTF-8 fallback.""" - import sys - if 'chardet' not in sys.modules: - pytest.skip("chardet not available") - - import chardet - - with patch.object(chardet, "detect", side_effect=Exception("Mock error")): - content = b"some text" - encoding = detect_encoding(content) - # Should fallback gracefully - assert encoding.lower() in ["utf-8", "utf8"] - - def test_fallback_without_chardet(self): - """Test graceful fallback when chardet unavailable.""" - # Temporarily disable chardet - with patch("codexlens.parsers.encoding.ENCODING_DETECTION_AVAILABLE", False): - content = "测试内容".encode("utf-8") - encoding = detect_encoding(content) - assert encoding.lower() in ["utf-8", "utf8"] - - -class TestReadFileSafe: - """Tests for read_file_safe function.""" - - @pytest.fixture - def temp_file(self): - """Create temporary file for testing.""" - with tempfile.NamedTemporaryFile(mode="wb", delete=False, suffix=".txt") as f: - file_path = Path(f.name) - yield file_path - if file_path.exists(): - file_path.unlink() - - def test_read_utf8_file(self, temp_file): - """Test reading UTF-8 encoded file.""" - content_text = "Hello, World! 你好世界" - temp_file.write_bytes(content_text.encode("utf-8")) - - content, encoding = read_file_safe(temp_file) - assert content == content_text - assert encoding.lower() in ["utf-8", "utf8"] - - def test_read_gbk_file(self, temp_file): - """Test reading GBK encoded file.""" - content_text = "你好世界 测试文本" - temp_file.write_bytes(content_text.encode("gbk")) - - content, encoding = read_file_safe(temp_file) - # Should decode correctly with detected or fallback encoding - assert isinstance(content, str) - if ENCODING_DETECTION_AVAILABLE: - # With chardet, should detect GBK/GB2312/Big5 and decode correctly - # Chardet may detect Big5 for GBK content, which is acceptable - assert "你好" in content or "世界" in content or len(content) > 0 - else: - # Without chardet, UTF-8 fallback with replacement - assert isinstance(content, str) - - def test_read_latin1_file(self, temp_file): - """Test reading ISO-8859-1 encoded file.""" - content_text = "Héllo Wörld" - temp_file.write_bytes(content_text.encode("iso-8859-1")) - - content, encoding = read_file_safe(temp_file) - assert isinstance(content, str) - # Should decode with detected or fallback encoding - assert len(content) > 0 - - def test_error_replacement_preserves_structure(self, temp_file): - """Test errors='replace' preserves file structure with unmappable bytes.""" - # Create file with invalid UTF-8 sequence - invalid_utf8 = b"Valid text\xFF\xFEInvalid bytes\x00More text" - temp_file.write_bytes(invalid_utf8) - - content, encoding = read_file_safe(temp_file) - # Should decode with replacement character - assert "Valid text" in content - assert "More text" in content - # Should contain replacement characters (�) for invalid bytes - assert isinstance(content, str) - - def test_max_detection_bytes_parameter(self, temp_file): - """Test max_detection_bytes limits encoding detection sample size.""" - # Create large file - large_content = ("测试内容 " * 10000).encode("utf-8") # ~60KB - temp_file.write_bytes(large_content) - - # Use small detection sample - content, encoding = read_file_safe(temp_file, max_detection_bytes=1000) - assert isinstance(content, str) - assert len(content) > 0 - - def test_confidence_threshold_parameter(self, temp_file): - """Test confidence_threshold parameter affects detection.""" - content_text = "Sample text for encoding detection" - temp_file.write_bytes(content_text.encode("utf-8")) - - # High threshold - content_high, encoding_high = read_file_safe(temp_file, confidence_threshold=0.9) - assert isinstance(content_high, str) - - # Low threshold - content_low, encoding_low = read_file_safe(temp_file, confidence_threshold=0.5) - assert isinstance(content_low, str) - - def test_read_nonexistent_file_raises(self): - """Test reading nonexistent file raises OSError.""" - with pytest.raises(OSError): - read_file_safe(Path("/nonexistent/path/file.txt")) - - def test_read_directory_raises(self, tmp_path): - """Test reading directory raises IsADirectoryError.""" - with pytest.raises((IsADirectoryError, OSError)): - read_file_safe(tmp_path) - - def test_read_empty_file(self, temp_file): - """Test reading empty file returns empty string.""" - temp_file.write_bytes(b"") - content, encoding = read_file_safe(temp_file) - assert content == "" - assert encoding.lower() in ["utf-8", "utf8"] - - -class TestIsBinaryFile: - """Tests for is_binary_file function.""" - - @pytest.fixture - def temp_file(self): - """Create temporary file for testing.""" - with tempfile.NamedTemporaryFile(mode="wb", delete=False) as f: - file_path = Path(f.name) - yield file_path - if file_path.exists(): - file_path.unlink() - - def test_text_file_not_binary(self, temp_file): - """Test text file is not classified as binary.""" - temp_file.write_bytes(b"This is a text file\nWith multiple lines\n") - assert not is_binary_file(temp_file) - - def test_binary_file_with_null_bytes(self, temp_file): - """Test file with >30% null bytes is classified as binary.""" - # Create file with high null byte ratio - binary_content = b"\x00" * 5000 + b"text" * 100 - temp_file.write_bytes(binary_content) - assert is_binary_file(temp_file) - - def test_binary_file_with_non_text_chars(self, temp_file): - """Test file with high non-text character ratio is binary.""" - # Create file with non-printable characters - binary_content = bytes(range(0, 256)) * 50 - temp_file.write_bytes(binary_content) - # Should be classified as binary due to high non-text ratio - result = is_binary_file(temp_file) - # May or may not be binary depending on exact ratio - assert isinstance(result, bool) - - def test_empty_file_not_binary(self, temp_file): - """Test empty file is not classified as binary.""" - temp_file.write_bytes(b"") - assert not is_binary_file(temp_file) - - def test_utf8_text_not_binary(self, temp_file): - """Test UTF-8 text file is not classified as binary.""" - temp_file.write_bytes("你好世界 Hello World".encode("utf-8")) - assert not is_binary_file(temp_file) - - def test_sample_size_parameter(self, temp_file): - """Test sample_size parameter limits bytes checked.""" - # Create large file with text at start, binary later - content = b"Text content" * 1000 + b"\x00" * 10000 - temp_file.write_bytes(content) - - # Small sample should see only text - assert not is_binary_file(temp_file, sample_size=100) - - # Large sample should see binary content - result = is_binary_file(temp_file, sample_size=20000) - assert isinstance(result, bool) - - def test_tabs_newlines_not_counted_as_non_text(self, temp_file): - """Test tabs and newlines are not counted as non-text characters.""" - content = b"Line 1\nLine 2\tTabbed\rCarriage return\n" - temp_file.write_bytes(content) - assert not is_binary_file(temp_file) - - -@pytest.mark.parametrize("encoding,test_content", [ - ("utf-8", "Hello 世界 🌍"), - ("gbk", "你好世界"), - ("iso-8859-1", "Héllo Wörld"), - ("windows-1252", "Smart quotes test"), -]) -class TestEncodingParameterized: - """Parameterized tests for various encodings.""" - - def test_detect_and_decode(self, encoding, test_content): - """Test detection and decoding roundtrip for various encodings.""" - # Skip if encoding not supported - try: - encoded = test_content.encode(encoding) - except (UnicodeEncodeError, LookupError): - pytest.skip(f"Encoding {encoding} not supported") - - detected = detect_encoding(encoded) - assert isinstance(detected, str) - - # Decode with detected encoding (with fallback) - try: - decoded = encoded.decode(detected, errors='replace') - assert isinstance(decoded, str) - except (UnicodeDecodeError, LookupError): - # Fallback to UTF-8 - decoded = encoded.decode('utf-8', errors='replace') - assert isinstance(decoded, str) - - -@pytest.mark.skipif(ENCODING_DETECTION_AVAILABLE, reason="Test fallback behavior when chardet unavailable") -class TestWithoutChardet: - """Tests for behavior when chardet is not available.""" - - def test_all_functions_work_without_chardet(self): - """Test all encoding functions work gracefully without chardet.""" - content = b"Test content" - - # Should all return UTF-8 fallback - encoding = detect_encoding(content) - assert encoding.lower() in ["utf-8", "utf8"] - - available, error = check_encoding_available() - assert not available - assert error is not None - - -@pytest.mark.skipif(not ENCODING_DETECTION_AVAILABLE, reason="Requires chardet") -class TestWithChardet: - """Tests for behavior when chardet is available.""" - - def test_chardet_available_flag(self): - """Test ENCODING_DETECTION_AVAILABLE is True when chardet installed.""" - assert ENCODING_DETECTION_AVAILABLE is True - - def test_check_encoding_available(self): - """Test check_encoding_available returns success.""" - available, error = check_encoding_available() - assert available is True - assert error is None - - def test_detect_encoding_uses_chardet(self): - """Test detect_encoding uses chardet when available.""" - content = "你好世界".encode("gbk") - encoding = detect_encoding(content) - # Should detect GBK or related encoding - assert isinstance(encoding, str) - assert len(encoding) > 0 diff --git a/codex-lens/tests/test_enrichment.py b/codex-lens/tests/test_enrichment.py deleted file mode 100644 index 8b07b385..00000000 --- a/codex-lens/tests/test_enrichment.py +++ /dev/null @@ -1,234 +0,0 @@ -"""Tests for search result enrichment with relationship data.""" -import sqlite3 -import tempfile -import time -from pathlib import Path - -import pytest - -from codexlens.search.enrichment import RelationshipEnricher - - -@pytest.fixture -def mock_db(): - """Create a mock database with symbols and relationships.""" - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "_index.db" - conn = sqlite3.connect(str(db_path)) - cursor = conn.cursor() - - # Create schema - cursor.execute(''' - CREATE TABLE symbols ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - qualified_name TEXT NOT NULL, - name TEXT NOT NULL, - kind TEXT NOT NULL, - file_path TEXT NOT NULL, - start_line INTEGER NOT NULL, - end_line INTEGER NOT NULL - ) - ''') - cursor.execute(''' - CREATE TABLE symbol_relationships ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - source_symbol_id INTEGER NOT NULL, - target_symbol_fqn TEXT NOT NULL, - relationship_type TEXT NOT NULL, - file_path TEXT NOT NULL, - line INTEGER, - FOREIGN KEY (source_symbol_id) REFERENCES symbols(id) - ) - ''') - - # Insert test data - cursor.execute(''' - INSERT INTO symbols (qualified_name, name, kind, file_path, start_line, end_line) - VALUES ('module.main', 'main', 'function', 'module.py', 1, 10) - ''') - main_id = cursor.lastrowid - - cursor.execute(''' - INSERT INTO symbols (qualified_name, name, kind, file_path, start_line, end_line) - VALUES ('module.helper', 'helper', 'function', 'module.py', 12, 20) - ''') - helper_id = cursor.lastrowid - - cursor.execute(''' - INSERT INTO symbols (qualified_name, name, kind, file_path, start_line, end_line) - VALUES ('utils.fetch', 'fetch', 'function', 'utils.py', 1, 5) - ''') - fetch_id = cursor.lastrowid - - # main calls helper - cursor.execute(''' - INSERT INTO symbol_relationships (source_symbol_id, target_symbol_fqn, relationship_type, file_path, line) - VALUES (?, 'helper', 'calls', 'module.py', 5) - ''', (main_id,)) - - # main calls fetch - cursor.execute(''' - INSERT INTO symbol_relationships (source_symbol_id, target_symbol_fqn, relationship_type, file_path, line) - VALUES (?, 'utils.fetch', 'calls', 'module.py', 6) - ''', (main_id,)) - - # helper imports os - cursor.execute(''' - INSERT INTO symbol_relationships (source_symbol_id, target_symbol_fqn, relationship_type, file_path, line) - VALUES (?, 'os', 'imports', 'module.py', 13) - ''', (helper_id,)) - - conn.commit() - conn.close() - - yield db_path - - -class TestRelationshipEnricher: - """Test suite for RelationshipEnricher.""" - - def test_enrich_with_relationships(self, mock_db): - """Test enriching results with valid relationships.""" - with RelationshipEnricher(mock_db) as enricher: - results = [ - {"path": "module.py", "score": 0.9, "excerpt": "def main():", "symbol": "main"}, - {"path": "module.py", "score": 0.8, "excerpt": "def helper():", "symbol": "helper"}, - ] - - enriched = enricher.enrich(results, limit=10) - - # Check main's relationships - main_result = enriched[0] - assert "relationships" in main_result - main_rels = main_result["relationships"] - assert len(main_rels) >= 2 - - # Verify outgoing relationships - outgoing = [r for r in main_rels if r["direction"] == "outgoing"] - targets = [r["target"] for r in outgoing] - assert "helper" in targets or any("helper" in t for t in targets) - - # Check helper's relationships - helper_result = enriched[1] - assert "relationships" in helper_result - helper_rels = helper_result["relationships"] - assert len(helper_rels) >= 1 - - # Verify incoming relationships (main calls helper) - incoming = [r for r in helper_rels if r["direction"] == "incoming"] - assert len(incoming) >= 1 - assert incoming[0]["type"] == "called_by" - - def test_enrich_missing_symbol(self, mock_db): - """Test graceful handling of missing symbols.""" - with RelationshipEnricher(mock_db) as enricher: - results = [ - {"path": "unknown.py", "score": 0.9, "excerpt": "code", "symbol": "nonexistent"}, - ] - - enriched = enricher.enrich(results, limit=10) - - # Should return empty relationships, not crash - assert "relationships" in enriched[0] - assert enriched[0]["relationships"] == [] - - def test_enrich_no_symbol_name(self, mock_db): - """Test handling results without symbol names.""" - with RelationshipEnricher(mock_db) as enricher: - results = [ - {"path": "module.py", "score": 0.9, "excerpt": "code", "symbol": None}, - ] - - enriched = enricher.enrich(results, limit=10) - - assert "relationships" in enriched[0] - assert enriched[0]["relationships"] == [] - - def test_enrich_performance(self, mock_db): - """Test that enrichment is fast (<100ms for 10 results).""" - with RelationshipEnricher(mock_db) as enricher: - results = [ - {"path": "module.py", "score": 0.9, "excerpt": f"code{i}", "symbol": "main"} - for i in range(10) - ] - - start = time.perf_counter() - enricher.enrich(results, limit=10) - elapsed_ms = (time.perf_counter() - start) * 1000 - - assert elapsed_ms < 100, f"Enrichment took {elapsed_ms:.1f}ms, expected < 100ms" - - def test_enrich_limit(self, mock_db): - """Test that limit parameter is respected.""" - with RelationshipEnricher(mock_db) as enricher: - results = [ - {"path": "module.py", "score": 0.9, "symbol": "main"}, - {"path": "module.py", "score": 0.8, "symbol": "helper"}, - {"path": "utils.py", "score": 0.7, "symbol": "fetch"}, - ] - - # Only enrich first 2 - enriched = enricher.enrich(results, limit=2) - - assert "relationships" in enriched[0] - assert "relationships" in enriched[1] - # Third result should NOT have relationships key - assert "relationships" not in enriched[2] - - def test_connection_failure_graceful(self): - """Test graceful handling when database doesn't exist.""" - nonexistent = Path("/nonexistent/path/_index.db") - with RelationshipEnricher(nonexistent) as enricher: - results = [{"path": "test.py", "score": 0.9, "symbol": "test"}] - enriched = enricher.enrich(results) - - # Should return original results without crashing - assert len(enriched) == 1 - - def test_incoming_type_conversion(self, mock_db): - """Test that relationship types are correctly converted for incoming.""" - with RelationshipEnricher(mock_db) as enricher: - results = [ - {"path": "module.py", "score": 0.9, "symbol": "helper"}, - ] - - enriched = enricher.enrich(results) - rels = enriched[0]["relationships"] - - incoming = [r for r in rels if r["direction"] == "incoming"] - if incoming: - # calls should become called_by - assert incoming[0]["type"] == "called_by" - - def test_context_manager(self, mock_db): - """Test that context manager properly opens and closes connections.""" - enricher = RelationshipEnricher(mock_db) - assert enricher.db_conn is not None - - enricher.close() - assert enricher.db_conn is None - - # Using context manager - with RelationshipEnricher(mock_db) as e: - assert e.db_conn is not None - assert e.db_conn is None - - def test_relationship_data_structure(self, mock_db): - """Test that relationship data has correct structure.""" - with RelationshipEnricher(mock_db) as enricher: - results = [{"path": "module.py", "score": 0.9, "symbol": "main"}] - enriched = enricher.enrich(results) - - rels = enriched[0]["relationships"] - for rel in rels: - # All relationships should have required fields - assert "type" in rel - assert "direction" in rel - assert "file" in rel - assert rel["direction"] in ["outgoing", "incoming"] - - # Outgoing should have target, incoming should have source - if rel["direction"] == "outgoing": - assert "target" in rel - else: - assert "source" in rel diff --git a/codex-lens/tests/test_entities.py b/codex-lens/tests/test_entities.py deleted file mode 100644 index e038a70e..00000000 --- a/codex-lens/tests/test_entities.py +++ /dev/null @@ -1,245 +0,0 @@ -"""Tests for CodexLens entity models.""" - -import pytest -from pydantic import ValidationError - -from codexlens.entities import IndexedFile, SearchResult, SemanticChunk, Symbol - - -class TestSymbol: - """Tests for Symbol entity.""" - - def test_create_valid_symbol(self): - """Test creating a valid symbol.""" - symbol = Symbol(name="hello", kind="function", range=(1, 10)) - assert symbol.name == "hello" - assert symbol.kind == "function" - assert symbol.range == (1, 10) - - def test_symbol_range_validation(self): - """Test that range values must be valid.""" - # Range must have start >= 1 - with pytest.raises(ValidationError): - Symbol(name="test", kind="function", range=(0, 5)) - - # Range must have end >= start - with pytest.raises(ValidationError): - Symbol(name="test", kind="function", range=(5, 3)) - - # Both values must be >= 1 - with pytest.raises(ValidationError): - Symbol(name="test", kind="function", range=(-1, 5)) - - def test_symbol_name_required(self): - """Test that name is required and non-empty.""" - with pytest.raises(ValidationError): - Symbol(name="", kind="function", range=(1, 1)) - - def test_symbol_kind_required(self): - """Test that kind is required and non-empty.""" - with pytest.raises(ValidationError): - Symbol(name="test", kind="", range=(1, 1)) - - def test_symbol_equal_range(self): - """Test symbol with equal start and end line.""" - symbol = Symbol(name="one_liner", kind="function", range=(5, 5)) - assert symbol.range == (5, 5) - - -class TestSemanticChunk: - """Tests for SemanticChunk entity.""" - - def test_create_chunk_without_embedding(self): - """Test creating a chunk without embedding.""" - chunk = SemanticChunk(content="def hello(): pass") - assert chunk.content == "def hello(): pass" - assert chunk.embedding is None - assert chunk.metadata == {} - - def test_create_chunk_with_embedding(self): - """Test creating a chunk with embedding.""" - embedding = [0.1, 0.2, 0.3, 0.4] - chunk = SemanticChunk(content="some code", embedding=embedding) - assert chunk.embedding == embedding - - def test_chunk_with_metadata(self): - """Test creating a chunk with metadata.""" - metadata = {"file": "test.py", "language": "python", "line": 10} - chunk = SemanticChunk(content="code", metadata=metadata) - assert chunk.metadata == metadata - - def test_chunk_content_required(self): - """Test that content is required and non-empty.""" - with pytest.raises(ValidationError): - SemanticChunk(content="") - - def test_chunk_embedding_validation(self): - """Test that embedding cannot be empty list when provided.""" - with pytest.raises(ValidationError): - SemanticChunk(content="code", embedding=[]) - - def test_chunk_embedding_with_floats(self): - """Test embedding with various float values.""" - embedding = [0.0, 1.0, -0.5, 0.123456789] - chunk = SemanticChunk(content="code", embedding=embedding) - assert chunk.embedding == embedding - - def test_chunk_zero_vector_validation(self): - """Test that zero vector embeddings are rejected.""" - with pytest.raises(ValidationError) as exc: - SemanticChunk(content="code", embedding=[0.0, 0.0, 0.0, 0.0]) - assert "zero vector" in str(exc.value).lower() - - def test_chunk_near_zero_vector_validation(self): - """Test that near-zero vector embeddings are rejected.""" - with pytest.raises(ValidationError) as exc: - SemanticChunk(content="code", embedding=[1e-11, 1e-11, 1e-11]) - assert "zero vector" in str(exc.value).lower() - - def test_chunk_small_nonzero_vector_validation(self): - """Test that small but non-zero embeddings are allowed.""" - embedding = [0.001, 0.001, 0.001] - chunk = SemanticChunk(content="code", embedding=embedding) - assert chunk.embedding == embedding - - -class TestIndexedFile: - """Tests for IndexedFile entity.""" - - def test_create_empty_indexed_file(self): - """Test creating an indexed file with no symbols or chunks.""" - indexed = IndexedFile(path="/test/file.py", language="python") - assert indexed.path == "/test/file.py" - assert indexed.language == "python" - assert indexed.symbols == [] - assert indexed.chunks == [] - - def test_create_indexed_file_with_symbols(self): - """Test creating an indexed file with symbols.""" - symbols = [ - Symbol(name="MyClass", kind="class", range=(1, 10)), - Symbol(name="my_func", kind="function", range=(12, 20)), - ] - indexed = IndexedFile( - path="/test/file.py", - language="python", - symbols=symbols, - ) - assert len(indexed.symbols) == 2 - assert indexed.symbols[0].name == "MyClass" - - def test_create_indexed_file_with_chunks(self): - """Test creating an indexed file with chunks.""" - chunks = [ - SemanticChunk(content="chunk 1", metadata={"line": 1}), - SemanticChunk(content="chunk 2", metadata={"line": 10}), - ] - indexed = IndexedFile( - path="/test/file.py", - language="python", - chunks=chunks, - ) - assert len(indexed.chunks) == 2 - - def test_indexed_file_path_strip(self): - """Test that path is stripped of whitespace.""" - indexed = IndexedFile(path=" /test/file.py ", language="python") - assert indexed.path == "/test/file.py" - - def test_indexed_file_language_strip(self): - """Test that language is stripped of whitespace.""" - indexed = IndexedFile(path="/test/file.py", language=" python ") - assert indexed.language == "python" - - def test_indexed_file_path_required(self): - """Test that path is required and non-blank.""" - with pytest.raises(ValidationError): - IndexedFile(path="", language="python") - - with pytest.raises(ValidationError): - IndexedFile(path=" ", language="python") - - def test_indexed_file_language_required(self): - """Test that language is required and non-blank.""" - with pytest.raises(ValidationError): - IndexedFile(path="/test/file.py", language="") - - -class TestSearchResult: - """Tests for SearchResult entity.""" - - def test_create_minimal_search_result(self): - """Test creating a minimal search result.""" - result = SearchResult(path="/test/file.py", score=0.95) - assert result.path == "/test/file.py" - assert result.score == 0.95 - assert result.excerpt is None - assert result.symbol is None - assert result.chunk is None - assert result.metadata == {} - - def test_create_full_search_result(self): - """Test creating a search result with all fields.""" - symbol = Symbol(name="test", kind="function", range=(1, 5)) - chunk = SemanticChunk(content="test code") - result = SearchResult( - path="/test/file.py", - score=0.88, - excerpt="...matching code...", - symbol=symbol, - chunk=chunk, - metadata={"match_type": "fts"}, - ) - assert result.excerpt == "...matching code..." - assert result.symbol.name == "test" - assert result.chunk.content == "test code" - - def test_search_result_score_validation(self): - """Test that score must be >= 0.""" - with pytest.raises(ValidationError): - SearchResult(path="/test/file.py", score=-0.1) - - def test_search_result_zero_score(self): - """Test that zero score is valid.""" - result = SearchResult(path="/test/file.py", score=0.0) - assert result.score == 0.0 - - def test_search_result_path_required(self): - """Test that path is required and non-empty.""" - with pytest.raises(ValidationError): - SearchResult(path="", score=0.5) - - -class TestEntitySerialization: - """Tests for entity serialization.""" - - def test_symbol_model_dump(self): - """Test Symbol serialization.""" - symbol = Symbol(name="test", kind="function", range=(1, 10)) - data = symbol.model_dump() - assert data == { - "name": "test", - "kind": "function", - "range": (1, 10), - "file": None, - } - - def test_indexed_file_model_dump(self): - """Test IndexedFile serialization.""" - indexed = IndexedFile( - path="/test.py", - language="python", - symbols=[Symbol(name="foo", kind="function", range=(1, 1))], - ) - data = indexed.model_dump() - assert data["path"] == "/test.py" - assert data["language"] == "python" - assert len(data["symbols"]) == 1 - - def test_search_result_model_dump(self): - """Test SearchResult serialization.""" - result = SearchResult(path="/test.py", score=0.5, excerpt="test") - data = result.model_dump() - assert data["path"] == "/test.py" - assert data["score"] == 0.5 - assert data["excerpt"] == "test" diff --git a/codex-lens/tests/test_errors.py b/codex-lens/tests/test_errors.py deleted file mode 100644 index 09394a5a..00000000 --- a/codex-lens/tests/test_errors.py +++ /dev/null @@ -1,165 +0,0 @@ -"""Tests for CodexLens error classes.""" - -import pytest - -from codexlens.errors import ( - CodexLensError, - ConfigError, - ParseError, - SearchError, - StorageError, -) - - -class TestErrorHierarchy: - """Tests for error class hierarchy.""" - - def test_codexlens_error_is_exception(self): - """Test that CodexLensError is an Exception.""" - assert issubclass(CodexLensError, Exception) - - def test_config_error_inherits_from_base(self): - """Test ConfigError inherits from CodexLensError.""" - assert issubclass(ConfigError, CodexLensError) - - def test_parse_error_inherits_from_base(self): - """Test ParseError inherits from CodexLensError.""" - assert issubclass(ParseError, CodexLensError) - - def test_storage_error_inherits_from_base(self): - """Test StorageError inherits from CodexLensError.""" - assert issubclass(StorageError, CodexLensError) - - def test_search_error_inherits_from_base(self): - """Test SearchError inherits from CodexLensError.""" - assert issubclass(SearchError, CodexLensError) - - -class TestErrorMessages: - """Tests for error message handling.""" - - def test_codexlens_error_with_message(self): - """Test creating CodexLensError with message.""" - error = CodexLensError("Something went wrong") - assert str(error) == "Something went wrong" - - def test_config_error_with_message(self): - """Test creating ConfigError with message.""" - error = ConfigError("Invalid configuration") - assert str(error) == "Invalid configuration" - - def test_parse_error_with_message(self): - """Test creating ParseError with message.""" - error = ParseError("Failed to parse file.py") - assert str(error) == "Failed to parse file.py" - - def test_storage_error_with_message(self): - """Test creating StorageError with message.""" - error = StorageError("Database connection failed") - assert str(error) == "Database connection failed" - - def test_search_error_with_message(self): - """Test creating SearchError with message.""" - error = SearchError("FTS query syntax error") - assert str(error) == "FTS query syntax error" - - -class TestErrorRaising: - """Tests for raising and catching errors.""" - - def test_catch_specific_error(self): - """Test catching specific error type.""" - with pytest.raises(ConfigError): - raise ConfigError("test") - - def test_catch_base_error(self): - """Test catching base error type catches all subtypes.""" - with pytest.raises(CodexLensError): - raise ConfigError("test") - - with pytest.raises(CodexLensError): - raise ParseError("test") - - with pytest.raises(CodexLensError): - raise StorageError("test") - - with pytest.raises(CodexLensError): - raise SearchError("test") - - def test_error_not_caught_as_wrong_type(self): - """Test that errors aren't caught as wrong type.""" - with pytest.raises(ConfigError): - try: - raise ConfigError("config issue") - except ParseError: - pass # This should not catch ConfigError - - -class TestErrorChaining: - """Tests for error chaining.""" - - def test_error_with_cause(self): - """Test error chaining with __cause__.""" - original = ValueError("original error") - try: - raise StorageError("storage failed") from original - except StorageError as e: - assert e.__cause__ is original - - def test_nested_error_handling(self): - """Test nested error handling pattern.""" - def inner_function(): - raise ValueError("inner error") - - def outer_function(): - try: - inner_function() - except ValueError as e: - raise ParseError("outer error") from e - - with pytest.raises(ParseError) as exc_info: - outer_function() - - assert exc_info.value.__cause__ is not None - assert isinstance(exc_info.value.__cause__, ValueError) - - -class TestErrorUsagePatterns: - """Tests for common error usage patterns.""" - - def test_error_in_context_manager(self): - """Test error handling in context manager.""" - class FakeStore: - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - return False # Don't suppress exceptions - - def query(self): - raise StorageError("query failed") - - with pytest.raises(StorageError): - with FakeStore() as store: - store.query() - - def test_error_comparison(self): - """Test error instance comparison.""" - error1 = ConfigError("test") - error2 = ConfigError("test") - # Different instances, even with same message - assert error1 is not error2 - # But same string representation - assert str(error1) == str(error2) - - def test_empty_error_message(self): - """Test error with empty message.""" - error = CodexLensError("") - assert str(error) == "" - - def test_error_with_format_args(self): - """Test error with formatted message.""" - path = "/test/file.py" - error = ParseError(f"Failed to parse {path}: syntax error on line 10") - assert "/test/file.py" in str(error) - assert "line 10" in str(error) diff --git a/codex-lens/tests/test_file_cache.py b/codex-lens/tests/test_file_cache.py deleted file mode 100644 index 72223a7f..00000000 --- a/codex-lens/tests/test_file_cache.py +++ /dev/null @@ -1,224 +0,0 @@ -"""Tests for CodexLens file cache.""" - -import tempfile -from pathlib import Path - -import pytest - -from codexlens.storage.file_cache import FileCache - - -class TestFileCache: - """Tests for FileCache class.""" - - def test_create_cache(self): - """Test creating a FileCache instance.""" - with tempfile.TemporaryDirectory() as tmpdir: - cache = FileCache(cache_path=Path(tmpdir)) - assert cache.cache_path == Path(tmpdir) - - def test_store_and_load_mtime(self): - """Test storing and loading mtime.""" - with tempfile.TemporaryDirectory() as tmpdir: - cache = FileCache(cache_path=Path(tmpdir)) - file_path = Path("/test/file.py") - mtime = 1234567890.123 - - cache.store_mtime(file_path, mtime) - loaded = cache.load_mtime(file_path) - - assert loaded == mtime - - def test_load_nonexistent_mtime(self): - """Test loading mtime for uncached file returns None.""" - with tempfile.TemporaryDirectory() as tmpdir: - cache = FileCache(cache_path=Path(tmpdir)) - file_path = Path("/nonexistent/file.py") - - loaded = cache.load_mtime(file_path) - - assert loaded is None - - def test_update_mtime(self): - """Test updating existing mtime.""" - with tempfile.TemporaryDirectory() as tmpdir: - cache = FileCache(cache_path=Path(tmpdir)) - file_path = Path("/test/file.py") - - cache.store_mtime(file_path, 1000.0) - cache.store_mtime(file_path, 2000.0) - loaded = cache.load_mtime(file_path) - - assert loaded == 2000.0 - - def test_multiple_files(self): - """Test caching multiple files.""" - with tempfile.TemporaryDirectory() as tmpdir: - cache = FileCache(cache_path=Path(tmpdir)) - - files = { - Path("/test/a.py"): 1000.0, - Path("/test/b.py"): 2000.0, - Path("/test/c.py"): 3000.0, - } - - for path, mtime in files.items(): - cache.store_mtime(path, mtime) - - for path, expected_mtime in files.items(): - loaded = cache.load_mtime(path) - assert loaded == expected_mtime - - -class TestFileCacheKeyGeneration: - """Tests for cache key generation.""" - - def test_key_for_simple_path(self): - """Test key generation for simple path.""" - with tempfile.TemporaryDirectory() as tmpdir: - cache = FileCache(cache_path=Path(tmpdir)) - key = cache._key_for(Path("test.py")) - assert key.endswith(".mtime") - - def test_key_for_path_with_slashes(self): - """Test key generation for path with slashes.""" - with tempfile.TemporaryDirectory() as tmpdir: - cache = FileCache(cache_path=Path(tmpdir)) - key = cache._key_for(Path("/path/to/file.py")) - assert "/" not in key - assert key.endswith(".mtime") - - def test_key_for_windows_path(self): - """Test key generation for Windows-style path.""" - with tempfile.TemporaryDirectory() as tmpdir: - cache = FileCache(cache_path=Path(tmpdir)) - key = cache._key_for(Path("C:\\Users\\test\\file.py")) - assert "\\" not in key - assert ":" not in key - assert key.endswith(".mtime") - - def test_different_paths_different_keys(self): - """Test that different paths produce different keys.""" - with tempfile.TemporaryDirectory() as tmpdir: - cache = FileCache(cache_path=Path(tmpdir)) - key1 = cache._key_for(Path("/test/a.py")) - key2 = cache._key_for(Path("/test/b.py")) - assert key1 != key2 - - -class TestFileCacheDirectoryCreation: - """Tests for cache directory creation.""" - - def test_creates_cache_directory(self): - """Test that cache directory is created when storing.""" - with tempfile.TemporaryDirectory() as tmpdir: - cache_path = Path(tmpdir) / "new_cache_dir" - cache = FileCache(cache_path=cache_path) - - assert not cache_path.exists() - - cache.store_mtime(Path("/test.py"), 1000.0) - - assert cache_path.exists() - - def test_nested_cache_directory(self): - """Test creating nested cache directory.""" - with tempfile.TemporaryDirectory() as tmpdir: - cache_path = Path(tmpdir) / "a" / "b" / "c" / "cache" - cache = FileCache(cache_path=cache_path) - - cache.store_mtime(Path("/test.py"), 1000.0) - - assert cache_path.exists() - - -class TestFileCacheEdgeCases: - """Edge case tests for FileCache.""" - - def test_mtime_precision(self): - """Test that mtime precision is preserved.""" - with tempfile.TemporaryDirectory() as tmpdir: - cache = FileCache(cache_path=Path(tmpdir)) - precise_mtime = 1234567890.123456789 - - cache.store_mtime(Path("/test.py"), precise_mtime) - loaded = cache.load_mtime(Path("/test.py")) - - # Should preserve reasonable precision - assert abs(loaded - precise_mtime) < 0.0001 - - def test_zero_mtime(self): - """Test storing zero mtime.""" - with tempfile.TemporaryDirectory() as tmpdir: - cache = FileCache(cache_path=Path(tmpdir)) - - cache.store_mtime(Path("/test.py"), 0.0) - loaded = cache.load_mtime(Path("/test.py")) - - assert loaded == 0.0 - - def test_negative_mtime(self): - """Test storing negative mtime (edge case).""" - with tempfile.TemporaryDirectory() as tmpdir: - cache = FileCache(cache_path=Path(tmpdir)) - - cache.store_mtime(Path("/test.py"), -1000.0) - loaded = cache.load_mtime(Path("/test.py")) - - assert loaded == -1000.0 - - def test_large_mtime(self): - """Test storing large mtime value.""" - with tempfile.TemporaryDirectory() as tmpdir: - cache = FileCache(cache_path=Path(tmpdir)) - large_mtime = 9999999999.999 - - cache.store_mtime(Path("/test.py"), large_mtime) - loaded = cache.load_mtime(Path("/test.py")) - - assert loaded == large_mtime - - def test_unicode_path(self): - """Test path with unicode characters.""" - with tempfile.TemporaryDirectory() as tmpdir: - cache = FileCache(cache_path=Path(tmpdir)) - unicode_path = Path("/测试/文件.py") - - cache.store_mtime(unicode_path, 1000.0) - loaded = cache.load_mtime(unicode_path) - - assert loaded == 1000.0 - - def test_load_corrupted_cache_file(self): - """Test loading corrupted cache file returns None.""" - with tempfile.TemporaryDirectory() as tmpdir: - cache = FileCache(cache_path=Path(tmpdir)) - file_path = Path("/test.py") - - # Create a corrupted cache file - cache.store_mtime(file_path, 1000.0) - key = cache._key_for(file_path) - (Path(tmpdir) / key).write_text("not a number") - - # Should return None for corrupted data - loaded = cache.load_mtime(file_path) - assert loaded is None - - -class TestFileCachePersistence: - """Tests for cache persistence across instances.""" - - def test_cache_persists_across_instances(self): - """Test that cache data persists when creating new instance.""" - with tempfile.TemporaryDirectory() as tmpdir: - cache_path = Path(tmpdir) - - # Store with first instance - cache1 = FileCache(cache_path=cache_path) - cache1.store_mtime(Path("/test.py"), 1234.0) - - # Load with second instance - cache2 = FileCache(cache_path=cache_path) - loaded = cache2.load_mtime(Path("/test.py")) - - assert loaded == 1234.0 diff --git a/codex-lens/tests/test_global_graph_expander.py b/codex-lens/tests/test_global_graph_expander.py deleted file mode 100644 index 37fa9371..00000000 --- a/codex-lens/tests/test_global_graph_expander.py +++ /dev/null @@ -1,323 +0,0 @@ -"""Tests for GlobalGraphExpander.""" - -import tempfile -from pathlib import Path - -import pytest - -from codexlens.entities import ( - CodeRelationship, - RelationshipType, - SearchResult, - Symbol, -) -from codexlens.search.global_graph_expander import ( - DECAY_FACTORS, - DEFAULT_DECAY, - GlobalGraphExpander, -) -from codexlens.storage.global_index import GlobalSymbolIndex - - -@pytest.fixture() -def temp_dir(): - tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True) - yield Path(tmpdir.name) - try: - tmpdir.cleanup() - except (PermissionError, OSError): - pass - - -def _setup_global_index(root: Path) -> GlobalSymbolIndex: - """Create a GlobalSymbolIndex with test symbols and relationships.""" - db_path = root / "test_global.db" - gsi = GlobalSymbolIndex(db_path, project_id=1) - gsi.initialize() - - # Files in different directories (cross-directory scenario) - file_a = str((root / "pkg_a" / "module_a.py").resolve()) - file_b = str((root / "pkg_b" / "module_b.py").resolve()) - file_c = str((root / "pkg_c" / "module_c.py").resolve()) - index_path = str((root / "indexes" / "_index.db").resolve()) - - symbols_a = [ - Symbol(name="ClassA", kind="class", range=(1, 20), file=file_a), - Symbol(name="func_a", kind="function", range=(22, 30), file=file_a), - ] - symbols_b = [ - Symbol(name="ClassB", kind="class", range=(1, 15), file=file_b), - ] - symbols_c = [ - Symbol(name="helper_c", kind="function", range=(1, 10), file=file_c), - ] - - gsi.update_file_symbols(file_a, symbols_a, index_path=index_path) - gsi.update_file_symbols(file_b, symbols_b, index_path=index_path) - gsi.update_file_symbols(file_c, symbols_c, index_path=index_path) - - # Relationships: - # ClassA --imports--> ClassB (cross-directory) - # ClassA --calls--> helper_c (cross-directory) - # ClassB --inherits--> ClassA (cross-directory) - relationships_a = [ - CodeRelationship( - source_symbol="ClassA", - target_symbol="ClassB", - relationship_type=RelationshipType.IMPORTS, - source_file=file_a, - target_file=file_b, - source_line=2, - ), - CodeRelationship( - source_symbol="ClassA", - target_symbol="helper_c", - relationship_type=RelationshipType.CALL, - source_file=file_a, - target_file=file_c, - source_line=10, - ), - ] - relationships_b = [ - CodeRelationship( - source_symbol="ClassB", - target_symbol="ClassA", - relationship_type=RelationshipType.INHERITS, - source_file=file_b, - target_file=file_a, - source_line=1, - ), - ] - - gsi.update_file_relationships(file_a, relationships_a) - gsi.update_file_relationships(file_b, relationships_b) - - return gsi - - -def test_expand_returns_related_results(temp_dir: Path) -> None: - """expand() should return related symbols from global relationships.""" - gsi = _setup_global_index(temp_dir) - try: - expander = GlobalGraphExpander(gsi) - - file_a = str((temp_dir / "pkg_a" / "module_a.py").resolve()) - base_results = [ - SearchResult( - path=file_a, - score=1.0, - excerpt=None, - content=None, - start_line=1, - end_line=20, - symbol_name="ClassA", - symbol_kind="class", - ), - ] - - related = expander.expand(base_results, top_n=10, max_related=50) - - assert len(related) > 0 - # All results should have static_graph source metadata - for r in related: - assert r.metadata.get("source") == "static_graph" - # Should find ClassB and/or helper_c as related symbols - related_symbols = {r.symbol_name for r in related} - assert len(related_symbols) > 0 - finally: - gsi.close() - - -def test_score_decay_by_relationship_type(temp_dir: Path) -> None: - """Score decay factors should be: IMPORTS=0.4, INHERITS=0.5, CALLS=0.3.""" - # Verify the constants - assert DECAY_FACTORS["imports"] == 0.4 - assert DECAY_FACTORS["inherits"] == 0.5 - assert DECAY_FACTORS["calls"] == 0.3 - assert DEFAULT_DECAY == 0.3 - - gsi = _setup_global_index(temp_dir) - try: - expander = GlobalGraphExpander(gsi) - - file_a = str((temp_dir / "pkg_a" / "module_a.py").resolve()) - base_results = [ - SearchResult( - path=file_a, - score=1.0, - excerpt=None, - content=None, - start_line=1, - end_line=20, - symbol_name="ClassA", - symbol_kind="class", - ), - ] - - related = expander.expand(base_results, top_n=10, max_related=50) - - # Check that scores use decay factors - for r in related: - rel_type = r.metadata.get("relationship_type") - if rel_type: - expected_decay = DECAY_FACTORS.get(rel_type, DEFAULT_DECAY) - # Score should be base_score * decay (possibly * 0.8 for unresolved) - assert r.score <= 1.0 * expected_decay + 0.01 - assert r.score > 0.0 - finally: - gsi.close() - - -def test_expand_with_no_relationships_returns_empty(temp_dir: Path) -> None: - """expand() should return empty list when no relationships exist.""" - db_path = temp_dir / "empty_global.db" - gsi = GlobalSymbolIndex(db_path, project_id=1) - gsi.initialize() - - try: - # Add a symbol but no relationships - file_x = str((temp_dir / "isolated.py").resolve()) - index_path = str((temp_dir / "idx.db").resolve()) - gsi.update_file_symbols( - file_x, - [Symbol(name="IsolatedFunc", kind="function", range=(1, 5), file=file_x)], - index_path=index_path, - ) - - expander = GlobalGraphExpander(gsi) - base_results = [ - SearchResult( - path=file_x, - score=0.9, - excerpt=None, - content=None, - start_line=1, - end_line=5, - symbol_name="IsolatedFunc", - symbol_kind="function", - ), - ] - - related = expander.expand(base_results, top_n=10, max_related=50) - assert related == [] - finally: - gsi.close() - - -def test_expand_deduplicates_against_input(temp_dir: Path) -> None: - """expand() should not include results already present in input.""" - gsi = _setup_global_index(temp_dir) - try: - expander = GlobalGraphExpander(gsi) - - file_a = str((temp_dir / "pkg_a" / "module_a.py").resolve()) - file_b = str((temp_dir / "pkg_b" / "module_b.py").resolve()) - - # Include both ClassA and ClassB in input - ClassB should be deduplicated - base_results = [ - SearchResult( - path=file_a, - score=1.0, - excerpt=None, - content=None, - start_line=1, - end_line=20, - symbol_name="ClassA", - symbol_kind="class", - ), - SearchResult( - path=file_b, - score=0.8, - excerpt=None, - content=None, - start_line=1, - end_line=15, - symbol_name="ClassB", - symbol_kind="class", - ), - ] - - related = expander.expand(base_results, top_n=10, max_related=50) - - # No related result should match (path, symbol_name, start_line) - # of any input result - input_keys = {(r.path, r.symbol_name, r.start_line) for r in base_results} - for r in related: - assert (r.path, r.symbol_name, r.start_line) not in input_keys - finally: - gsi.close() - - -def test_resolve_target_with_double_colon_format(temp_dir: Path) -> None: - """_resolve_target_to_file should handle 'file_path::symbol_name' format.""" - gsi = _setup_global_index(temp_dir) - try: - expander = GlobalGraphExpander(gsi) - - file_b = str((temp_dir / "pkg_b" / "module_b.py").resolve()) - target_qname = f"{file_b}::ClassB" - - result = expander._resolve_target_to_file(target_qname) - assert result is not None - resolved_file, start_line, end_line = result - assert resolved_file == file_b - # ClassB is at range (1, 15) - assert start_line == 1 - assert end_line == 15 - finally: - gsi.close() - - -def test_resolve_target_with_dot_notation(temp_dir: Path) -> None: - """_resolve_target_to_file should handle 'module.ClassName' dot notation.""" - gsi = _setup_global_index(temp_dir) - try: - expander = GlobalGraphExpander(gsi) - - # "pkg.ClassB" - leaf name "ClassB" should be found via search - result = expander._resolve_target_to_file("pkg.ClassB") - assert result is not None - resolved_file, start_line, end_line = result - # Should resolve to ClassB's file - file_b = str((temp_dir / "pkg_b" / "module_b.py").resolve()) - assert resolved_file == file_b - assert start_line == 1 - assert end_line == 15 - finally: - gsi.close() - - -def test_expand_empty_results_returns_empty(temp_dir: Path) -> None: - """expand() with empty input should return empty list.""" - db_path = temp_dir / "empty.db" - gsi = GlobalSymbolIndex(db_path, project_id=1) - gsi.initialize() - try: - expander = GlobalGraphExpander(gsi) - assert expander.expand([]) == [] - finally: - gsi.close() - - -def test_expand_results_without_symbol_names_returns_empty(temp_dir: Path) -> None: - """expand() should skip results without symbol_name.""" - db_path = temp_dir / "nosym.db" - gsi = GlobalSymbolIndex(db_path, project_id=1) - gsi.initialize() - try: - expander = GlobalGraphExpander(gsi) - base_results = [ - SearchResult( - path="/some/file.py", - score=1.0, - excerpt="some text", - content=None, - start_line=1, - end_line=5, - symbol_name=None, - symbol_kind=None, - ), - ] - assert expander.expand(base_results) == [] - finally: - gsi.close() diff --git a/codex-lens/tests/test_global_index.py b/codex-lens/tests/test_global_index.py deleted file mode 100644 index b548cfa2..00000000 --- a/codex-lens/tests/test_global_index.py +++ /dev/null @@ -1,293 +0,0 @@ -import sqlite3 -import tempfile -import time -from concurrent.futures import ThreadPoolExecutor -from pathlib import Path -from unittest.mock import MagicMock - -import pytest - -from codexlens.config import Config -from codexlens.entities import Symbol -from codexlens.errors import StorageError -from codexlens.search.chain_search import ChainSearchEngine -from codexlens.storage.global_index import GlobalSymbolIndex -from codexlens.storage.path_mapper import PathMapper -from codexlens.storage.registry import RegistryStore - - -@pytest.fixture() -def temp_paths(): - tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True) - root = Path(tmpdir.name) - yield root - try: - tmpdir.cleanup() - except (PermissionError, OSError): - pass - - -def test_add_symbol(temp_paths: Path): - db_path = temp_paths / "indexes" / "_global_symbols.db" - index_path = temp_paths / "indexes" / "_index.db" - file_path = temp_paths / "src" / "a.py" - - index_path.parent.mkdir(parents=True, exist_ok=True) - index_path.write_text("", encoding="utf-8") - file_path.parent.mkdir(parents=True, exist_ok=True) - file_path.write_text("class AuthManager:\n pass\n", encoding="utf-8") - - with GlobalSymbolIndex(db_path, project_id=1) as store: - store.add_symbol( - Symbol(name="AuthManager", kind="class", range=(1, 2)), - file_path=file_path, - index_path=index_path, - ) - - matches = store.search("AuthManager", kind="class", limit=10, prefix_mode=True) - assert len(matches) == 1 - assert matches[0].name == "AuthManager" - assert matches[0].file == str(file_path.resolve()) - - # Schema version safety: newer schema versions should be rejected. - bad_db = temp_paths / "indexes" / "_global_symbols_bad.db" - bad_db.parent.mkdir(parents=True, exist_ok=True) - conn = sqlite3.connect(bad_db) - conn.execute("PRAGMA user_version = 999") - conn.close() - - with pytest.raises(StorageError): - GlobalSymbolIndex(bad_db, project_id=1).initialize() - - -def test_search_symbols(temp_paths: Path): - db_path = temp_paths / "indexes" / "_global_symbols.db" - index_path = temp_paths / "indexes" / "_index.db" - file_path = temp_paths / "src" / "mod.py" - - index_path.parent.mkdir(parents=True, exist_ok=True) - index_path.write_text("", encoding="utf-8") - file_path.parent.mkdir(parents=True, exist_ok=True) - file_path.write_text("def authenticate():\n pass\n", encoding="utf-8") - - with GlobalSymbolIndex(db_path, project_id=7) as store: - store.add_symbol( - Symbol(name="authenticate", kind="function", range=(1, 2)), - file_path=file_path, - index_path=index_path, - ) - - locations = store.search_symbols("auth", kind="function", limit=10, prefix_mode=True) - assert locations - assert any(p.endswith("mod.py") for p, _ in locations) - assert any(rng == (1, 2) for _, rng in locations) - - -def test_update_file_symbols(temp_paths: Path): - db_path = temp_paths / "indexes" / "_global_symbols.db" - file_path = temp_paths / "src" / "mod.py" - index_path = temp_paths / "indexes" / "_index.db" - - file_path.parent.mkdir(parents=True, exist_ok=True) - file_path.write_text("def a():\n pass\n", encoding="utf-8") - index_path.parent.mkdir(parents=True, exist_ok=True) - index_path.write_text("", encoding="utf-8") - - with GlobalSymbolIndex(db_path, project_id=7) as store: - store.update_file_symbols( - file_path=file_path, - symbols=[ - Symbol(name="old_func", kind="function", range=(1, 2)), - Symbol(name="Other", kind="class", range=(10, 20)), - ], - index_path=index_path, - ) - assert any(s.name == "old_func" for s in store.search("old_", prefix_mode=True)) - - store.update_file_symbols( - file_path=file_path, - symbols=[Symbol(name="new_func", kind="function", range=(3, 4))], - index_path=index_path, - ) - assert not any(s.name == "old_func" for s in store.search("old_", prefix_mode=True)) - assert any(s.name == "new_func" for s in store.search("new_", prefix_mode=True)) - - # Backward-compatible path: index_path can be omitted after it's been established. - store.update_file_symbols( - file_path=file_path, - symbols=[Symbol(name="new_func2", kind="function", range=(5, 6))], - index_path=None, - ) - assert any(s.name == "new_func2" for s in store.search("new_func2", prefix_mode=True)) - - # New file + symbols without index_path should raise. - missing_index_file = temp_paths / "src" / "new_file.py" - with pytest.raises(StorageError): - store.update_file_symbols( - file_path=missing_index_file, - symbols=[Symbol(name="must_fail", kind="function", range=(1, 1))], - index_path=None, - ) - - deleted = store.delete_file_symbols(file_path) - assert deleted > 0 - - -def test_incremental_updates(temp_paths: Path, monkeypatch): - db_path = temp_paths / "indexes" / "_global_symbols.db" - file_path = temp_paths / "src" / "same.py" - idx_a = temp_paths / "indexes" / "a" / "_index.db" - idx_b = temp_paths / "indexes" / "b" / "_index.db" - - file_path.parent.mkdir(parents=True, exist_ok=True) - file_path.write_text("class AuthManager:\n pass\n", encoding="utf-8") - idx_a.parent.mkdir(parents=True, exist_ok=True) - idx_a.write_text("", encoding="utf-8") - idx_b.parent.mkdir(parents=True, exist_ok=True) - idx_b.write_text("", encoding="utf-8") - - with GlobalSymbolIndex(db_path, project_id=42) as store: - sym = Symbol(name="AuthManager", kind="class", range=(1, 2)) - store.add_symbol(sym, file_path=file_path, index_path=idx_a) - store.add_symbol(sym, file_path=file_path, index_path=idx_b) - - # prefix_mode=False exercises substring matching. - assert store.search("Manager", prefix_mode=False) - - conn = sqlite3.connect(db_path) - row = conn.execute( - """ - SELECT index_path - FROM global_symbols - WHERE project_id=? AND symbol_name=? AND symbol_kind=? AND file_path=? - """, - (42, "AuthManager", "class", str(file_path.resolve())), - ).fetchone() - conn.close() - - assert row is not None - assert str(Path(row[0]).resolve()) == str(idx_b.resolve()) - - # Migration path coverage: simulate a future schema version and an older DB version. - migrating_db = temp_paths / "indexes" / "_global_symbols_migrate.db" - migrating_db.parent.mkdir(parents=True, exist_ok=True) - conn = sqlite3.connect(migrating_db) - conn.execute("PRAGMA user_version = 1") - conn.close() - - monkeypatch.setattr(GlobalSymbolIndex, "SCHEMA_VERSION", 2) - GlobalSymbolIndex(migrating_db, project_id=1).initialize() - - -def test_concurrent_access(temp_paths: Path): - db_path = temp_paths / "indexes" / "_global_symbols.db" - index_path = temp_paths / "indexes" / "_index.db" - file_path = temp_paths / "src" / "a.py" - - index_path.parent.mkdir(parents=True, exist_ok=True) - index_path.write_text("", encoding="utf-8") - file_path.parent.mkdir(parents=True, exist_ok=True) - file_path.write_text("class A:\n pass\n", encoding="utf-8") - - with GlobalSymbolIndex(db_path, project_id=1) as store: - def add_many(worker_id: int): - for i in range(50): - store.add_symbol( - Symbol(name=f"Sym{worker_id}_{i}", kind="class", range=(1, 2)), - file_path=file_path, - index_path=index_path, - ) - - with ThreadPoolExecutor(max_workers=8) as ex: - list(ex.map(add_many, range(8))) - - matches = store.search("Sym", kind="class", limit=1000, prefix_mode=True) - assert len(matches) >= 200 - - -def test_chain_search_integration(temp_paths: Path): - project_root = temp_paths / "project" - project_root.mkdir(parents=True, exist_ok=True) - - index_root = temp_paths / "indexes" - mapper = PathMapper(index_root=index_root) - index_db_path = mapper.source_to_index_db(project_root) - index_db_path.parent.mkdir(parents=True, exist_ok=True) - index_db_path.write_text("", encoding="utf-8") - - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - project_info = registry.register_project(project_root, mapper.source_to_index_dir(project_root)) - - global_db_path = project_info.index_root / GlobalSymbolIndex.DEFAULT_DB_NAME - with GlobalSymbolIndex(global_db_path, project_id=project_info.id) as global_index: - file_path = project_root / "auth.py" - global_index.update_file_symbols( - file_path=file_path, - symbols=[ - Symbol(name="AuthManager", kind="class", range=(1, 10)), - Symbol(name="authenticate", kind="function", range=(12, 20)), - ], - index_path=index_db_path, - ) - - config = Config(data_dir=temp_paths / "data", global_symbol_index_enabled=True) - engine = ChainSearchEngine(registry, mapper, config=config) - engine._search_symbols_parallel = MagicMock(side_effect=AssertionError("should not traverse chain")) - - symbols = engine.search_symbols("Auth", project_root) - assert any(s.name == "AuthManager" for s in symbols) - registry.close() - - -def test_disabled_fallback(temp_paths: Path): - project_root = temp_paths / "project" - project_root.mkdir(parents=True, exist_ok=True) - - index_root = temp_paths / "indexes" - mapper = PathMapper(index_root=index_root) - index_db_path = mapper.source_to_index_db(project_root) - index_db_path.parent.mkdir(parents=True, exist_ok=True) - index_db_path.write_text("", encoding="utf-8") - - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - registry.register_project(project_root, mapper.source_to_index_dir(project_root)) - - config = Config(data_dir=temp_paths / "data", global_symbol_index_enabled=False) - engine = ChainSearchEngine(registry, mapper, config=config) - engine._collect_index_paths = MagicMock(return_value=[index_db_path]) - engine._search_symbols_parallel = MagicMock( - return_value=[Symbol(name="FallbackSymbol", kind="function", range=(1, 2))] - ) - - symbols = engine.search_symbols("Fallback", project_root) - assert any(s.name == "FallbackSymbol" for s in symbols) - assert engine._search_symbols_parallel.called - registry.close() - - -def test_performance_benchmark(temp_paths: Path): - db_path = temp_paths / "indexes" / "_global_symbols.db" - index_path = temp_paths / "indexes" / "_index.db" - file_path = temp_paths / "src" / "perf.py" - - file_path.parent.mkdir(parents=True, exist_ok=True) - file_path.write_text("class AuthManager:\n pass\n", encoding="utf-8") - index_path.parent.mkdir(parents=True, exist_ok=True) - index_path.write_text("", encoding="utf-8") - - with GlobalSymbolIndex(db_path, project_id=1) as store: - for i in range(500): - store.add_symbol( - Symbol(name=f"AuthManager{i}", kind="class", range=(1, 2)), - file_path=file_path, - index_path=index_path, - ) - - start = time.perf_counter() - results = store.search("AuthManager", kind="class", limit=50, prefix_mode=True) - elapsed_ms = (time.perf_counter() - start) * 1000 - - assert elapsed_ms < 100.0 - assert results diff --git a/codex-lens/tests/test_global_relationships.py b/codex-lens/tests/test_global_relationships.py deleted file mode 100644 index 43da4c1b..00000000 --- a/codex-lens/tests/test_global_relationships.py +++ /dev/null @@ -1,507 +0,0 @@ -"""Tests for global_relationships table in GlobalSymbolIndex.""" - -import sqlite3 -import tempfile -import time -from pathlib import Path - -import pytest - -from codexlens.entities import CodeRelationship, RelationshipType -from codexlens.storage.global_index import GlobalSymbolIndex - - -@pytest.fixture() -def temp_paths(): - tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True) - root = Path(tmpdir.name) - yield root - try: - tmpdir.cleanup() - except (PermissionError, OSError): - pass - - -def _make_rel( - source_symbol: str, - target_symbol: str, - rel_type: RelationshipType = RelationshipType.CALL, - source_file: str = "src/a.py", - target_file: str | None = None, - source_line: int = 1, -) -> CodeRelationship: - return CodeRelationship( - source_symbol=source_symbol, - target_symbol=target_symbol, - relationship_type=rel_type, - source_file=source_file, - target_file=target_file, - source_line=source_line, - ) - - -# ------------------------------------------------------------------ -# Schema creation (fresh DB) -# ------------------------------------------------------------------ - - -def test_fresh_schema_creates_relationships_table(temp_paths: Path): - """New DB at SCHEMA_VERSION=2 should have global_relationships table.""" - db_path = temp_paths / "indexes" / "_global_symbols.db" - - with GlobalSymbolIndex(db_path, project_id=1) as store: - conn = store._get_connection() - tables = { - row[0] - for row in conn.execute( - "SELECT name FROM sqlite_master WHERE type='table'" - ).fetchall() - } - assert "global_relationships" in tables - assert "global_symbols" in tables - - # Verify indexes exist - indexes = { - row[0] - for row in conn.execute( - "SELECT name FROM sqlite_master WHERE type='index'" - ).fetchall() - } - assert "idx_global_rel_project_target" in indexes - assert "idx_global_rel_project_source" in indexes - - -def test_schema_version_is_2(temp_paths: Path): - db_path = temp_paths / "indexes" / "_global_symbols.db" - with GlobalSymbolIndex(db_path, project_id=1) as store: - conn = store._get_connection() - version = conn.execute("PRAGMA user_version").fetchone()[0] - assert version == 2 - - -# ------------------------------------------------------------------ -# Migration v1 -> v2 -# ------------------------------------------------------------------ - - -def test_migration_v1_to_v2(temp_paths: Path): - """A v1 database should gain the global_relationships table on upgrade.""" - db_path = temp_paths / "indexes" / "_global_symbols.db" - db_path.parent.mkdir(parents=True, exist_ok=True) - - # Simulate a v1 database: create global_symbols table + set version=1. - conn = sqlite3.connect(str(db_path)) - conn.execute( - """ - CREATE TABLE IF NOT EXISTS global_symbols ( - id INTEGER PRIMARY KEY, - project_id INTEGER NOT NULL, - symbol_name TEXT NOT NULL, - symbol_kind TEXT NOT NULL, - file_path TEXT NOT NULL, - start_line INTEGER, - end_line INTEGER, - index_path TEXT NOT NULL, - UNIQUE(project_id, symbol_name, symbol_kind, file_path, start_line, end_line) - ) - """ - ) - conn.execute("PRAGMA user_version = 1") - conn.commit() - conn.close() - - # Now open with the new code -- migration should fire. - with GlobalSymbolIndex(db_path, project_id=1) as store: - conn = store._get_connection() - version = conn.execute("PRAGMA user_version").fetchone()[0] - assert version == 2 - - tables = { - row[0] - for row in conn.execute( - "SELECT name FROM sqlite_master WHERE type='table'" - ).fetchall() - } - assert "global_relationships" in tables - - -def test_migration_idempotent(temp_paths: Path): - """Running migration twice should not fail (CREATE TABLE IF NOT EXISTS).""" - db_path = temp_paths / "indexes" / "_global_symbols.db" - - # First init - store = GlobalSymbolIndex(db_path, project_id=1) - store.initialize() - store.close() - - # Second init on same DB -- should be a no-op. - store2 = GlobalSymbolIndex(db_path, project_id=1) - store2.initialize() - store2.close() - - -# ------------------------------------------------------------------ -# update_file_relationships -# ------------------------------------------------------------------ - - -def test_update_file_relationships_insert(temp_paths: Path): - db_path = temp_paths / "indexes" / "_global_symbols.db" - file_path = temp_paths / "src" / "auth.py" - file_path.parent.mkdir(parents=True, exist_ok=True) - file_path.write_text("", encoding="utf-8") - - rels = [ - _make_rel("login", "validate_token", source_file="src/auth.py", source_line=10), - _make_rel("login", "hash_password", source_file="src/auth.py", source_line=15), - _make_rel("AuthManager", "BaseManager", RelationshipType.INHERITS, "src/auth.py", source_line=1), - ] - - with GlobalSymbolIndex(db_path, project_id=1) as store: - store.update_file_relationships(file_path, rels) - - # Verify rows exist - conn = store._get_connection() - count = conn.execute( - "SELECT COUNT(*) FROM global_relationships WHERE project_id=1" - ).fetchone()[0] - assert count == 3 - - -def test_update_file_relationships_replaces_atomically(temp_paths: Path): - """Second call should delete old rows and insert new ones.""" - db_path = temp_paths / "indexes" / "_global_symbols.db" - file_path = temp_paths / "src" / "mod.py" - file_path.parent.mkdir(parents=True, exist_ok=True) - file_path.write_text("", encoding="utf-8") - - old_rels = [_make_rel("foo", "bar", source_file="src/mod.py", source_line=5)] - new_rels = [ - _make_rel("baz", "qux", source_file="src/mod.py", source_line=10), - _make_rel("baz", "quux", source_file="src/mod.py", source_line=11), - ] - - with GlobalSymbolIndex(db_path, project_id=1) as store: - store.update_file_relationships(file_path, old_rels) - store.update_file_relationships(file_path, new_rels) - - conn = store._get_connection() - rows = conn.execute( - "SELECT source_symbol FROM global_relationships WHERE project_id=1 ORDER BY source_line" - ).fetchall() - names = [r[0] for r in rows] - assert "foo" not in names - assert "baz" in names - assert len(rows) == 2 - - -def test_update_file_relationships_empty_clears(temp_paths: Path): - """Passing empty list should delete all relationships for the file.""" - db_path = temp_paths / "indexes" / "_global_symbols.db" - file_path = temp_paths / "src" / "x.py" - file_path.parent.mkdir(parents=True, exist_ok=True) - file_path.write_text("", encoding="utf-8") - - with GlobalSymbolIndex(db_path, project_id=1) as store: - store.update_file_relationships( - file_path, - [_make_rel("a", "b", source_file="src/x.py")], - ) - store.update_file_relationships(file_path, []) - - conn = store._get_connection() - count = conn.execute( - "SELECT COUNT(*) FROM global_relationships WHERE project_id=1" - ).fetchone()[0] - assert count == 0 - - -# ------------------------------------------------------------------ -# query_by_target -# ------------------------------------------------------------------ - - -def test_query_by_target_exact(temp_paths: Path): - db_path = temp_paths / "indexes" / "_global_symbols.db" - file_path = temp_paths / "src" / "a.py" - file_path.parent.mkdir(parents=True, exist_ok=True) - file_path.write_text("", encoding="utf-8") - - rels = [ - _make_rel("caller", "TargetClass", source_file="src/a.py", source_line=10), - _make_rel("caller2", "TargetClassExtra", source_file="src/a.py", source_line=20), - ] - - with GlobalSymbolIndex(db_path, project_id=1) as store: - store.update_file_relationships(file_path, rels) - - # Exact match - results = store.query_by_target("TargetClass", prefix_mode=False) - assert len(results) == 1 - src_file, src_sym, rel_type, line = results[0] - assert src_sym == "caller" - assert rel_type == "calls" - assert line == 10 - - -def test_query_by_target_prefix(temp_paths: Path): - db_path = temp_paths / "indexes" / "_global_symbols.db" - file_path = temp_paths / "src" / "a.py" - file_path.parent.mkdir(parents=True, exist_ok=True) - file_path.write_text("", encoding="utf-8") - - rels = [ - _make_rel("c1", "TargetClass", source_file="src/a.py", source_line=10), - _make_rel("c2", "TargetClassExtra", source_file="src/a.py", source_line=20), - _make_rel("c3", "Unrelated", source_file="src/a.py", source_line=30), - ] - - with GlobalSymbolIndex(db_path, project_id=1) as store: - store.update_file_relationships(file_path, rels) - - # Prefix match should return both Target* rows - results = store.query_by_target("TargetClass", prefix_mode=True) - assert len(results) == 2 - symbols = {r[1] for r in results} - assert symbols == {"c1", "c2"} - - -def test_query_by_target_cross_directory(temp_paths: Path): - """Relationships from different files can be queried by the same target.""" - db_path = temp_paths / "indexes" / "_global_symbols.db" - file_a = temp_paths / "src" / "a.py" - file_b = temp_paths / "lib" / "b.py" - for f in (file_a, file_b): - f.parent.mkdir(parents=True, exist_ok=True) - f.write_text("", encoding="utf-8") - - with GlobalSymbolIndex(db_path, project_id=1) as store: - store.update_file_relationships( - file_a, - [_make_rel("funcA", "SharedTarget", source_file="src/a.py", source_line=5)], - ) - store.update_file_relationships( - file_b, - [_make_rel("funcB", "SharedTarget", source_file="lib/b.py", source_line=8)], - ) - - results = store.query_by_target("SharedTarget", prefix_mode=False) - assert len(results) == 2 - files = {r[0] for r in results} - assert str(file_a.resolve()) in files - assert str(file_b.resolve()) in files - - -# ------------------------------------------------------------------ -# query_relationships_for_symbols -# ------------------------------------------------------------------ - - -def test_query_relationships_for_symbols_source_match(temp_paths: Path): - db_path = temp_paths / "indexes" / "_global_symbols.db" - file_path = temp_paths / "src" / "mod.py" - file_path.parent.mkdir(parents=True, exist_ok=True) - file_path.write_text("", encoding="utf-8") - - rels = [ - _make_rel("MyClass", "BaseClass", RelationshipType.INHERITS, "src/mod.py", source_line=1), - _make_rel("helper", "utils", RelationshipType.IMPORTS, "src/mod.py", source_line=2), - ] - - with GlobalSymbolIndex(db_path, project_id=1) as store: - store.update_file_relationships(file_path, rels) - - # Query by source_symbol name - rows = store.query_relationships_for_symbols(["MyClass"]) - assert len(rows) >= 1 - assert any(r["source_symbol"] == "MyClass" for r in rows) - - -def test_query_relationships_for_symbols_target_match(temp_paths: Path): - db_path = temp_paths / "indexes" / "_global_symbols.db" - file_path = temp_paths / "src" / "mod.py" - file_path.parent.mkdir(parents=True, exist_ok=True) - file_path.write_text("", encoding="utf-8") - - rels = [ - _make_rel("caller", "TargetFunc", source_file="src/mod.py", source_line=5), - ] - - with GlobalSymbolIndex(db_path, project_id=1) as store: - store.update_file_relationships(file_path, rels) - - # Query by target name -- should match via LIKE %TargetFunc - rows = store.query_relationships_for_symbols(["TargetFunc"]) - assert len(rows) >= 1 - assert any(r["target_qualified_name"] == "TargetFunc" for r in rows) - - -def test_query_relationships_for_symbols_empty_list(temp_paths: Path): - db_path = temp_paths / "indexes" / "_global_symbols.db" - - with GlobalSymbolIndex(db_path, project_id=1) as store: - rows = store.query_relationships_for_symbols([]) - assert rows == [] - - -def test_query_relationships_for_symbols_qualified_target(temp_paths: Path): - """A qualified target like 'lib/b.py::BaseClass' should still match 'BaseClass'.""" - db_path = temp_paths / "indexes" / "_global_symbols.db" - file_path = temp_paths / "src" / "a.py" - file_path.parent.mkdir(parents=True, exist_ok=True) - file_path.write_text("", encoding="utf-8") - - rel = CodeRelationship( - source_symbol="Child", - target_symbol="BaseClass", - relationship_type=RelationshipType.INHERITS, - source_file="src/a.py", - target_file="lib/b.py", - source_line=1, - ) - - with GlobalSymbolIndex(db_path, project_id=1) as store: - store.update_file_relationships(file_path, [rel]) - - # The qualified name is "lib/b.py::BaseClass" - # query_relationships_for_symbols uses LIKE %BaseClass which should match - rows = store.query_relationships_for_symbols(["BaseClass"]) - assert len(rows) == 1 - assert rows[0]["target_qualified_name"] == "lib/b.py::BaseClass" - - -# ------------------------------------------------------------------ -# delete_file_relationships -# ------------------------------------------------------------------ - - -def test_delete_file_relationships(temp_paths: Path): - db_path = temp_paths / "indexes" / "_global_symbols.db" - file_path = temp_paths / "src" / "a.py" - file_path.parent.mkdir(parents=True, exist_ok=True) - file_path.write_text("", encoding="utf-8") - - with GlobalSymbolIndex(db_path, project_id=1) as store: - store.update_file_relationships( - file_path, - [ - _make_rel("f1", "t1", source_file="src/a.py", source_line=1), - _make_rel("f2", "t2", source_file="src/a.py", source_line=2), - ], - ) - - deleted = store.delete_file_relationships(file_path) - assert deleted == 2 - - conn = store._get_connection() - count = conn.execute( - "SELECT COUNT(*) FROM global_relationships WHERE project_id=1" - ).fetchone()[0] - assert count == 0 - - -def test_delete_file_relationships_no_rows(temp_paths: Path): - db_path = temp_paths / "indexes" / "_global_symbols.db" - nonexistent = temp_paths / "src" / "nope.py" - - with GlobalSymbolIndex(db_path, project_id=1) as store: - deleted = store.delete_file_relationships(nonexistent) - assert deleted == 0 - - -# ------------------------------------------------------------------ -# Project isolation -# ------------------------------------------------------------------ - - -def test_project_isolation(temp_paths: Path): - """Relationships from different project_ids should not leak.""" - db_path = temp_paths / "indexes" / "_global_symbols.db" - file_path = temp_paths / "src" / "a.py" - file_path.parent.mkdir(parents=True, exist_ok=True) - file_path.write_text("", encoding="utf-8") - - store1 = GlobalSymbolIndex(db_path, project_id=1) - store1.initialize() - store2 = GlobalSymbolIndex(db_path, project_id=2) - # store2 reuses the same DB; schema already created. - - store1.update_file_relationships( - file_path, - [_make_rel("a", "SharedTarget", source_file="src/a.py")], - ) - store2.update_file_relationships( - file_path, - [_make_rel("b", "SharedTarget", source_file="src/a.py")], - ) - - results1 = store1.query_by_target("SharedTarget", prefix_mode=False) - results2 = store2.query_by_target("SharedTarget", prefix_mode=False) - assert len(results1) == 1 - assert results1[0][1] == "a" - assert len(results2) == 1 - assert results2[0][1] == "b" - - store1.close() - store2.close() - - -# ------------------------------------------------------------------ -# Performance benchmarks -# ------------------------------------------------------------------ - - -def test_update_file_relationships_100_rows_under_50ms(temp_paths: Path): - """Batch insert of 100 relationships should complete in < 50ms.""" - db_path = temp_paths / "indexes" / "_global_symbols.db" - file_path = temp_paths / "src" / "perf.py" - file_path.parent.mkdir(parents=True, exist_ok=True) - file_path.write_text("", encoding="utf-8") - - rels = [ - _make_rel(f"src_{i}", f"tgt_{i}", source_file="src/perf.py", source_line=i + 1) - for i in range(100) - ] - - with GlobalSymbolIndex(db_path, project_id=1) as store: - start = time.perf_counter() - store.update_file_relationships(file_path, rels) - elapsed_ms = (time.perf_counter() - start) * 1000 - assert elapsed_ms < 50.0, f"Took {elapsed_ms:.1f}ms, expected < 50ms" - - -def test_query_by_target_exact_under_5ms(temp_paths: Path): - """Exact-match query should complete in < 5ms with 500 rows.""" - db_path = temp_paths / "indexes" / "_global_symbols.db" - file_path = temp_paths / "src" / "perf.py" - file_path.parent.mkdir(parents=True, exist_ok=True) - file_path.write_text("", encoding="utf-8") - - rels = [ - _make_rel(f"src_{i}", f"Target_{i}", source_file="src/perf.py", source_line=i + 1) - for i in range(500) - ] - - with GlobalSymbolIndex(db_path, project_id=1) as store: - store.update_file_relationships(file_path, rels) - - start = time.perf_counter() - results = store.query_by_target("Target_250", prefix_mode=False) - elapsed_ms = (time.perf_counter() - start) * 1000 - assert elapsed_ms < 5.0, f"Took {elapsed_ms:.1f}ms, expected < 5ms" - assert len(results) == 1 - - -# ------------------------------------------------------------------ -# _build_qualified_name -# ------------------------------------------------------------------ - - -def test_build_qualified_name_with_target_file(): - rel = _make_rel("src", "tgt", target_file="lib/utils.py") - assert GlobalSymbolIndex._build_qualified_name(rel) == "lib/utils.py::tgt" - - -def test_build_qualified_name_without_target_file(): - rel = _make_rel("src", "tgt", target_file=None) - assert GlobalSymbolIndex._build_qualified_name(rel) == "tgt" diff --git a/codex-lens/tests/test_global_symbol_index.py b/codex-lens/tests/test_global_symbol_index.py deleted file mode 100644 index b82d708b..00000000 --- a/codex-lens/tests/test_global_symbol_index.py +++ /dev/null @@ -1,192 +0,0 @@ -import sqlite3 -import tempfile -import time -from pathlib import Path -from unittest.mock import MagicMock - -import pytest - -from codexlens.config import Config -from codexlens.entities import Symbol -from codexlens.search.chain_search import ChainSearchEngine -from codexlens.storage.dir_index import DirIndexStore -from codexlens.storage.global_index import GlobalSymbolIndex -from codexlens.storage.path_mapper import PathMapper -from codexlens.storage.registry import RegistryStore - - -@pytest.fixture() -def temp_paths(): - tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True) - root = Path(tmpdir.name) - yield root - try: - tmpdir.cleanup() - except (PermissionError, OSError): - pass - - -def test_global_symbol_index_add_and_search_under_50ms(temp_paths: Path): - db_path = temp_paths / "indexes" / "_global_symbols.db" - file_path = temp_paths / "src" / "a.py" - index_path = temp_paths / "indexes" / "_index.db" - - file_path.parent.mkdir(parents=True, exist_ok=True) - file_path.write_text("class AuthManager:\n pass\n", encoding="utf-8") - index_path.parent.mkdir(parents=True, exist_ok=True) - index_path.write_text("", encoding="utf-8") - - store = GlobalSymbolIndex(db_path, project_id=1) - store.initialize() - - # Insert enough rows to ensure index usage, still small enough to be fast. - for i in range(200): - store.add_symbol( - Symbol(name=f"AuthManager{i}", kind="class", range=(1, 2)), - file_path=file_path, - index_path=index_path, - ) - - start = time.perf_counter() - results = store.search("AuthManager", kind="class", limit=50, prefix_mode=True) - elapsed_ms = (time.perf_counter() - start) * 1000 - - assert elapsed_ms < 50.0 - assert len(results) >= 1 - assert all(r.kind == "class" for r in results) - assert all((r.file or "").endswith("a.py") for r in results) - - locations = store.search_symbols("AuthManager", kind="class", limit=50, prefix_mode=True) - assert locations - assert all(isinstance(p, str) and isinstance(rng, tuple) for p, rng in locations) - - -def test_update_file_symbols_replaces_atomically(temp_paths: Path): - db_path = temp_paths / "indexes" / "_global_symbols.db" - file_path = temp_paths / "src" / "mod.py" - index_path = temp_paths / "indexes" / "_index.db" - - file_path.parent.mkdir(parents=True, exist_ok=True) - file_path.write_text("def a():\n pass\n", encoding="utf-8") - index_path.parent.mkdir(parents=True, exist_ok=True) - index_path.write_text("", encoding="utf-8") - - store = GlobalSymbolIndex(db_path, project_id=7) - store.initialize() - - store.update_file_symbols( - file_path=file_path, - symbols=[ - Symbol(name="old_func", kind="function", range=(1, 2)), - Symbol(name="Other", kind="class", range=(10, 20)), - ], - index_path=index_path, - ) - - assert any(s.name == "old_func" for s in store.search("old_", prefix_mode=True)) - - # Replace with new set (delete + insert) - store.update_file_symbols( - file_path=file_path, - symbols=[Symbol(name="new_func", kind="function", range=(3, 4))], - index_path=index_path, - ) - - assert not any(s.name == "old_func" for s in store.search("old_", prefix_mode=True)) - assert any(s.name == "new_func" for s in store.search("new_", prefix_mode=True)) - - # Backward-compatible path: omit index_path after it has been established. - store.update_file_symbols( - file_path=file_path, - symbols=[Symbol(name="new_func2", kind="function", range=(5, 6))], - index_path=None, - ) - assert any(s.name == "new_func2" for s in store.search("new_func2", prefix_mode=True)) - - -def test_dir_index_store_updates_global_index_when_enabled(temp_paths: Path): - config = Config(data_dir=temp_paths / "data") - - index_db_path = temp_paths / "indexes" / "proj" / "_index.db" - global_db_path = temp_paths / "indexes" / "proj" / GlobalSymbolIndex.DEFAULT_DB_NAME - source_file = temp_paths / "src" / "x.py" - - source_file.parent.mkdir(parents=True, exist_ok=True) - source_file.write_text("class MyClass:\n pass\n", encoding="utf-8") - - global_index = GlobalSymbolIndex(global_db_path, project_id=123) - global_index.initialize() - - with DirIndexStore(index_db_path, config=config, global_index=global_index) as store: - store.add_file( - name=source_file.name, - full_path=source_file, - content=source_file.read_text(encoding="utf-8"), - language="python", - symbols=[Symbol(name="MyClass", kind="class", range=(1, 2))], - ) - - matches = global_index.search("MyClass", kind="class", limit=10) - assert len(matches) == 1 - assert matches[0].file == str(source_file.resolve()) - - # Verify all required fields were written. - conn = sqlite3.connect(global_db_path) - row = conn.execute( - """ - SELECT project_id, symbol_name, symbol_kind, file_path, start_line, end_line, index_path - FROM global_symbols - WHERE project_id=? AND symbol_name=? - """, - (123, "MyClass"), - ).fetchone() - conn.close() - - assert row is not None - assert row[0] == 123 - assert row[1] == "MyClass" - assert row[2] == "class" - assert row[3] == str(source_file.resolve()) - assert row[4] == 1 - assert row[5] == 2 - assert row[6] == str(index_db_path.resolve()) - - -def test_chain_search_uses_global_index_fast_path(temp_paths: Path): - project_root = temp_paths / "project" - project_root.mkdir(parents=True, exist_ok=True) - - index_root = temp_paths / "indexes" - mapper = PathMapper(index_root=index_root) - index_db_path = mapper.source_to_index_db(project_root) - index_db_path.parent.mkdir(parents=True, exist_ok=True) - index_db_path.write_text("", encoding="utf-8") # existence is enough for _find_start_index - - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - project_info = registry.register_project(project_root, mapper.source_to_index_dir(project_root)) - - global_db_path = project_info.index_root / GlobalSymbolIndex.DEFAULT_DB_NAME - global_index = GlobalSymbolIndex(global_db_path, project_id=project_info.id) - global_index.initialize() - - file_path = project_root / "auth.py" - global_index.update_file_symbols( - file_path=file_path, - symbols=[ - Symbol(name="AuthManager", kind="class", range=(1, 10)), - Symbol(name="authenticate", kind="function", range=(12, 20)), - ], - index_path=index_db_path, - ) - - config = Config(data_dir=temp_paths / "data", global_symbol_index_enabled=True) - engine = ChainSearchEngine(registry, mapper, config=config) - assert registry.find_by_source_path(str(project_root)) is not None - assert registry.find_by_source_path(str(project_root.resolve())) is not None - assert global_db_path.exists() - assert GlobalSymbolIndex(global_db_path, project_id=project_info.id).search("Auth", limit=10) - engine._search_symbols_parallel = MagicMock(side_effect=AssertionError("should not traverse chain")) - - symbols = engine.search_symbols("Auth", project_root) - assert any(s.name == "AuthManager" for s in symbols) diff --git a/codex-lens/tests/test_graph_expansion.py b/codex-lens/tests/test_graph_expansion.py deleted file mode 100644 index 6588a5e4..00000000 --- a/codex-lens/tests/test_graph_expansion.py +++ /dev/null @@ -1,188 +0,0 @@ -import sqlite3 -import tempfile -from pathlib import Path - -import pytest - -from codexlens.config import Config -from codexlens.entities import CodeRelationship, RelationshipType, SearchResult, Symbol -from codexlens.search.chain_search import ChainSearchEngine, SearchOptions -from codexlens.search.graph_expander import GraphExpander -from codexlens.storage.dir_index import DirIndexStore -from codexlens.storage.index_tree import _compute_graph_neighbors -from codexlens.storage.path_mapper import PathMapper -from codexlens.storage.registry import RegistryStore - - -@pytest.fixture() -def temp_paths() -> Path: - tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True) - root = Path(tmpdir.name) - yield root - try: - tmpdir.cleanup() - except (PermissionError, OSError): - pass - - -def _create_index_with_neighbors(root: Path) -> tuple[PathMapper, Path, Path]: - project_root = root / "project" - project_root.mkdir(parents=True, exist_ok=True) - - index_root = root / "indexes" - mapper = PathMapper(index_root=index_root) - index_db_path = mapper.source_to_index_db(project_root) - index_db_path.parent.mkdir(parents=True, exist_ok=True) - - content = "\n".join( - [ - "def a():", - " b()", - "", - "def b():", - " c()", - "", - "def c():", - " return 1", - "", - ] - ) - file_path = project_root / "module.py" - file_path.write_text(content, encoding="utf-8") - - symbols = [ - Symbol(name="a", kind="function", range=(1, 2), file=str(file_path)), - Symbol(name="b", kind="function", range=(4, 5), file=str(file_path)), - Symbol(name="c", kind="function", range=(7, 8), file=str(file_path)), - ] - relationships = [ - CodeRelationship( - source_symbol="a", - target_symbol="b", - relationship_type=RelationshipType.CALL, - source_file=str(file_path), - target_file=None, - source_line=2, - ), - CodeRelationship( - source_symbol="b", - target_symbol="c", - relationship_type=RelationshipType.CALL, - source_file=str(file_path), - target_file=None, - source_line=5, - ), - ] - - config = Config(data_dir=root / "data") - store = DirIndexStore(index_db_path, config=config) - store.initialize() - store.add_file( - name=file_path.name, - full_path=file_path, - content=content, - language="python", - symbols=symbols, - relationships=relationships, - ) - _compute_graph_neighbors(store) - store.close() - - return mapper, project_root, file_path - - -def test_graph_neighbors_precomputed_two_hop(temp_paths: Path) -> None: - mapper, project_root, file_path = _create_index_with_neighbors(temp_paths) - index_db_path = mapper.source_to_index_db(project_root) - - conn = sqlite3.connect(str(index_db_path)) - conn.row_factory = sqlite3.Row - try: - rows = conn.execute( - """ - SELECT s1.name AS source_name, s2.name AS neighbor_name, gn.relationship_depth - FROM graph_neighbors gn - JOIN symbols s1 ON s1.id = gn.source_symbol_id - JOIN symbols s2 ON s2.id = gn.neighbor_symbol_id - ORDER BY source_name, neighbor_name, relationship_depth - """ - ).fetchall() - finally: - conn.close() - - triples = {(r["source_name"], r["neighbor_name"], int(r["relationship_depth"])) for r in rows} - assert ("a", "b", 1) in triples - assert ("a", "c", 2) in triples - assert ("b", "c", 1) in triples - assert ("c", "b", 1) in triples - assert file_path.exists() - - -def test_graph_expander_returns_related_results_with_depth_metadata(temp_paths: Path) -> None: - mapper, project_root, file_path = _create_index_with_neighbors(temp_paths) - _ = project_root - - expander = GraphExpander(mapper, config=Config(data_dir=temp_paths / "data", graph_expansion_depth=2)) - base = SearchResult( - path=str(file_path.resolve()), - score=1.0, - excerpt="", - content=None, - start_line=1, - end_line=2, - symbol_name="a", - symbol_kind="function", - ) - related = expander.expand([base], depth=2, max_expand=1, max_related=10) - - depth_by_symbol = {r.symbol_name: r.metadata.get("relationship_depth") for r in related} - assert depth_by_symbol.get("b") == 1 - assert depth_by_symbol.get("c") == 2 - - -def test_chain_search_populates_related_results_when_enabled(temp_paths: Path) -> None: - mapper, project_root, file_path = _create_index_with_neighbors(temp_paths) - _ = file_path - - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - - config = Config( - data_dir=temp_paths / "data", - enable_graph_expansion=True, - graph_expansion_depth=2, - ) - engine = ChainSearchEngine(registry, mapper, config=config) - try: - options = SearchOptions(depth=0, total_limit=10, enable_fuzzy=False) - result = engine.search("b", project_root, options) - - assert result.results - assert result.results[0].symbol_name == "a" - - depth_by_symbol = {r.symbol_name: r.metadata.get("relationship_depth") for r in result.related_results} - assert depth_by_symbol.get("b") == 1 - assert depth_by_symbol.get("c") == 2 - finally: - engine.close() - - -def test_chain_search_related_results_empty_when_disabled(temp_paths: Path) -> None: - mapper, project_root, file_path = _create_index_with_neighbors(temp_paths) - _ = file_path - - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - - config = Config( - data_dir=temp_paths / "data", - enable_graph_expansion=False, - ) - engine = ChainSearchEngine(registry, mapper, config=config) - try: - options = SearchOptions(depth=0, total_limit=10, enable_fuzzy=False) - result = engine.search("b", project_root, options) - assert result.related_results == [] - finally: - engine.close() - diff --git a/codex-lens/tests/test_hybrid_chunker.py b/codex-lens/tests/test_hybrid_chunker.py deleted file mode 100644 index e9085eff..00000000 --- a/codex-lens/tests/test_hybrid_chunker.py +++ /dev/null @@ -1,622 +0,0 @@ -"""Tests for Hybrid Docstring Chunker.""" - -import pytest - -from codexlens.entities import SemanticChunk, Symbol -from codexlens.semantic.chunker import ( - ChunkConfig, - Chunker, - DocstringExtractor, - HybridChunker, -) - - -class TestDocstringExtractor: - """Tests for DocstringExtractor class.""" - - def test_extract_single_line_python_docstring(self): - """Test extraction of single-line Python docstring.""" - content = '''def hello(): - """This is a docstring.""" - return True -''' - docstrings = DocstringExtractor.extract_python_docstrings(content) - assert len(docstrings) == 1 - assert docstrings[0][1] == 2 # start_line - assert docstrings[0][2] == 2 # end_line - assert '"""This is a docstring."""' in docstrings[0][0] - - def test_extract_multi_line_python_docstring(self): - """Test extraction of multi-line Python docstring.""" - content = '''def process(): - """ - This is a multi-line - docstring with details. - """ - return 42 -''' - docstrings = DocstringExtractor.extract_python_docstrings(content) - assert len(docstrings) == 1 - assert docstrings[0][1] == 2 # start_line - assert docstrings[0][2] == 5 # end_line - assert "multi-line" in docstrings[0][0] - - def test_extract_multiple_python_docstrings(self): - """Test extraction of multiple docstrings from same file.""" - content = '''"""Module docstring.""" - -def func1(): - """Function 1 docstring.""" - pass - -class MyClass: - """Class docstring.""" - - def method(self): - """Method docstring.""" - pass -''' - docstrings = DocstringExtractor.extract_python_docstrings(content) - assert len(docstrings) == 4 - lines = [d[1] for d in docstrings] - assert 1 in lines # Module docstring - assert 4 in lines # func1 docstring - assert 8 in lines # Class docstring - assert 11 in lines # method docstring - - def test_extract_python_docstring_single_quotes(self): - """Test extraction with single quote docstrings.""" - content = """def test(): - '''Single quote docstring.''' - return None -""" - docstrings = DocstringExtractor.extract_python_docstrings(content) - assert len(docstrings) == 1 - assert "Single quote docstring" in docstrings[0][0] - - def test_extract_jsdoc_single_comment(self): - """Test extraction of single JSDoc comment.""" - content = '''/** - * This is a JSDoc comment - * @param {string} name - */ -function hello(name) { - return name; -} -''' - comments = DocstringExtractor.extract_jsdoc_comments(content) - assert len(comments) == 1 - assert comments[0][1] == 1 # start_line - assert comments[0][2] == 4 # end_line - assert "JSDoc comment" in comments[0][0] - - def test_extract_multiple_jsdoc_comments(self): - """Test extraction of multiple JSDoc comments.""" - content = '''/** - * Function 1 - */ -function func1() {} - -/** - * Class description - */ -class MyClass { - /** - * Method description - */ - method() {} -} -''' - comments = DocstringExtractor.extract_jsdoc_comments(content) - assert len(comments) == 3 - - def test_extract_docstrings_unsupported_language(self): - """Test that unsupported languages return empty list.""" - content = "// Some code" - docstrings = DocstringExtractor.extract_docstrings(content, "ruby") - assert len(docstrings) == 0 - - def test_extract_docstrings_empty_content(self): - """Test extraction from empty content.""" - docstrings = DocstringExtractor.extract_python_docstrings("") - assert len(docstrings) == 0 - - -class TestHybridChunker: - """Tests for HybridChunker class.""" - - def test_hybrid_chunker_initialization(self): - """Test HybridChunker initialization with defaults.""" - chunker = HybridChunker() - assert chunker.config is not None - assert chunker.base_chunker is not None - assert chunker.docstring_extractor is not None - - def test_hybrid_chunker_custom_config(self): - """Test HybridChunker with custom config.""" - config = ChunkConfig(max_chunk_size=500, min_chunk_size=20) - chunker = HybridChunker(config=config) - assert chunker.config.max_chunk_size == 500 - assert chunker.config.min_chunk_size == 20 - - def test_hybrid_chunker_isolates_docstrings(self): - """Test that hybrid chunker isolates docstrings into separate chunks.""" - config = ChunkConfig(min_chunk_size=10) - chunker = HybridChunker(config=config) - - content = '''"""Module-level docstring.""" - -def hello(): - """Function docstring.""" - return "world" - -def goodbye(): - """Another docstring.""" - return "farewell" -''' - symbols = [ - Symbol(name="hello", kind="function", range=(3, 5)), - Symbol(name="goodbye", kind="function", range=(7, 9)), - ] - - chunks = chunker.chunk_file(content, symbols, "test.py", "python") - - # Should have 3 docstring chunks + 2 code chunks = 5 total - docstring_chunks = [c for c in chunks if c.metadata.get("chunk_type") == "docstring"] - code_chunks = [c for c in chunks if c.metadata.get("chunk_type") == "code"] - - assert len(docstring_chunks) == 3 - assert len(code_chunks) == 2 - assert all(c.metadata["strategy"] == "hybrid" for c in chunks) - - def test_hybrid_chunker_docstring_isolation_percentage(self): - """Test that >98% of docstrings are isolated correctly.""" - config = ChunkConfig(min_chunk_size=5) - chunker = HybridChunker(config=config) - - # Create content with 10 docstrings - lines = [] - lines.append('"""Module docstring."""\n') - lines.append('\n') - - for i in range(10): - lines.append(f'def func{i}():\n') - lines.append(f' """Docstring for func{i}."""\n') - lines.append(f' return {i}\n') - lines.append('\n') - - content = "".join(lines) - symbols = [ - Symbol(name=f"func{i}", kind="function", range=(3 + i*4, 5 + i*4)) - for i in range(10) - ] - - chunks = chunker.chunk_file(content, symbols, "test.py", "python") - - docstring_chunks = [c for c in chunks if c.metadata.get("chunk_type") == "docstring"] - - # We have 11 docstrings total (1 module + 10 functions) - # Verify >98% isolation (at least 10.78 out of 11) - isolation_rate = len(docstring_chunks) / 11 - assert isolation_rate >= 0.98, f"Docstring isolation rate {isolation_rate:.2%} < 98%" - - def test_hybrid_chunker_javascript_jsdoc(self): - """Test hybrid chunker with JavaScript JSDoc comments.""" - config = ChunkConfig(min_chunk_size=10) - chunker = HybridChunker(config=config) - - content = '''/** - * Main function description - */ -function main() { - return 42; -} - -/** - * Helper function - */ -function helper() { - return 0; -} -''' - symbols = [ - Symbol(name="main", kind="function", range=(4, 6)), - Symbol(name="helper", kind="function", range=(11, 13)), - ] - - chunks = chunker.chunk_file(content, symbols, "test.js", "javascript") - - docstring_chunks = [c for c in chunks if c.metadata.get("chunk_type") == "docstring"] - code_chunks = [c for c in chunks if c.metadata.get("chunk_type") == "code"] - - assert len(docstring_chunks) == 2 - assert len(code_chunks) == 2 - - def test_hybrid_chunker_no_docstrings(self): - """Test hybrid chunker with code containing no docstrings.""" - config = ChunkConfig(min_chunk_size=10) - chunker = HybridChunker(config=config) - - content = '''def hello(): - return "world" - -def goodbye(): - return "farewell" -''' - symbols = [ - Symbol(name="hello", kind="function", range=(1, 2)), - Symbol(name="goodbye", kind="function", range=(4, 5)), - ] - - chunks = chunker.chunk_file(content, symbols, "test.py", "python") - - # All chunks should be code chunks - assert all(c.metadata.get("chunk_type") == "code" for c in chunks) - assert len(chunks) == 2 - - def test_hybrid_chunker_preserves_metadata(self): - """Test that hybrid chunker preserves all required metadata.""" - config = ChunkConfig(min_chunk_size=5) - chunker = HybridChunker(config=config) - - content = '''"""Module doc.""" - -def test(): - """Test doc.""" - pass -''' - symbols = [Symbol(name="test", kind="function", range=(3, 5))] - - chunks = chunker.chunk_file(content, symbols, "/path/to/file.py", "python") - - for chunk in chunks: - assert "file" in chunk.metadata - assert "language" in chunk.metadata - assert "chunk_type" in chunk.metadata - assert "start_line" in chunk.metadata - assert "end_line" in chunk.metadata - assert "strategy" in chunk.metadata - assert chunk.metadata["strategy"] == "hybrid" - - def test_hybrid_chunker_no_symbols_fallback(self): - """Test hybrid chunker falls back to sliding window when no symbols.""" - config = ChunkConfig(min_chunk_size=5, max_chunk_size=100) - chunker = HybridChunker(config=config) - - content = '''"""Module docstring.""" - -# Just some comments -x = 42 -y = 100 -''' - chunks = chunker.chunk_file(content, [], "test.py", "python") - - # Should have 1 docstring chunk + sliding window chunks for remaining code - docstring_chunks = [c for c in chunks if c.metadata.get("chunk_type") == "docstring"] - code_chunks = [c for c in chunks if c.metadata.get("chunk_type") == "code"] - - assert len(docstring_chunks) == 1 - assert len(code_chunks) >= 0 # May or may not have code chunks depending on size - - def test_get_excluded_line_ranges(self): - """Test _get_excluded_line_ranges helper method.""" - chunker = HybridChunker() - - docstrings = [ - ("doc1", 1, 3), - ("doc2", 5, 7), - ("doc3", 10, 10), - ] - - excluded = chunker._get_excluded_line_ranges(docstrings) - - assert 1 in excluded - assert 2 in excluded - assert 3 in excluded - assert 4 not in excluded - assert 5 in excluded - assert 6 in excluded - assert 7 in excluded - assert 8 not in excluded - assert 9 not in excluded - assert 10 in excluded - - def test_filter_symbols_outside_docstrings(self): - """Test _filter_symbols_outside_docstrings helper method.""" - chunker = HybridChunker() - - symbols = [ - Symbol(name="func1", kind="function", range=(1, 5)), - Symbol(name="func2", kind="function", range=(10, 15)), - Symbol(name="func3", kind="function", range=(20, 25)), - ] - - # Exclude lines 1-5 (func1) and 10-12 (partial overlap with func2) - excluded_lines = set(range(1, 6)) | set(range(10, 13)) - - filtered = chunker._filter_symbols_outside_docstrings(symbols, excluded_lines) - - # func1 should be filtered out (completely within excluded) - # func2 should remain (partial overlap) - # func3 should remain (no overlap) - assert len(filtered) == 2 - names = [s.name for s in filtered] - assert "func1" not in names - assert "func2" in names - assert "func3" in names - excluded = chunker._get_excluded_line_ranges(docstrings) - - assert 1 in excluded - assert 2 in excluded - assert 3 in excluded - assert 4 not in excluded - assert 5 in excluded - assert 6 in excluded - assert 7 in excluded - assert 8 not in excluded - assert 9 not in excluded - assert 10 in excluded - - def test_filter_symbols_outside_docstrings(self): - """Test _filter_symbols_outside_docstrings helper method.""" - chunker = HybridChunker() - - symbols = [ - Symbol(name="func1", kind="function", range=(1, 5)), - Symbol(name="func2", kind="function", range=(10, 15)), - Symbol(name="func3", kind="function", range=(20, 25)), - ] - - # Exclude lines 1-5 (func1) and 10-12 (partial overlap with func2) - excluded_lines = set(range(1, 6)) | set(range(10, 13)) - - filtered = chunker._filter_symbols_outside_docstrings(symbols, excluded_lines) - - # func1 should be filtered out (completely within excluded) - # func2 should remain (partial overlap) - # func3 should remain (no overlap) - assert len(filtered) == 2 - names = [s.name for s in filtered] - assert "func1" not in names - assert "func2" in names - assert "func3" in names - - def test_hybrid_chunker_docstring_only_file(self): - """Test that hybrid chunker correctly handles file with only docstrings.""" - config = ChunkConfig(min_chunk_size=5) - chunker = HybridChunker(config=config) - - content = '''"""First docstring.""" - -"""Second docstring.""" - -"""Third docstring.""" -''' - chunks = chunker.chunk_file(content, [], "test.py", "python") - - # Should only have docstring chunks - assert all(c.metadata.get("chunk_type") == "docstring" for c in chunks) - assert len(chunks) == 3 - - -class TestChunkConfigStrategy: - """Tests for strategy field in ChunkConfig.""" - - def test_chunk_config_default_strategy(self): - """Test that default strategy is 'auto'.""" - config = ChunkConfig() - assert config.strategy == "auto" - - def test_chunk_config_custom_strategy(self): - """Test setting custom strategy.""" - config = ChunkConfig(strategy="hybrid") - assert config.strategy == "hybrid" - - config = ChunkConfig(strategy="symbol") - assert config.strategy == "symbol" - - config = ChunkConfig(strategy="sliding_window") - assert config.strategy == "sliding_window" - - -class TestHybridChunkerIntegration: - """Integration tests for hybrid chunker with realistic code.""" - - def test_realistic_python_module(self): - """Test hybrid chunker with realistic Python module.""" - config = ChunkConfig(min_chunk_size=10) - chunker = HybridChunker(config=config) - - content = '''""" -Data processing module for handling user data. - -This module provides functions for cleaning and validating user input. -""" - -from typing import Dict, Any - - -def validate_email(email: str) -> bool: - """ - Validate an email address format. - - Args: - email: The email address to validate - - Returns: - True if valid, False otherwise - """ - import re - pattern = r'^[\\w\\.-]+@[\\w\\.-]+\\.\\w+$' - return bool(re.match(pattern, email)) - - -class UserProfile: - """ - User profile management class. - - Handles user data storage and retrieval. - """ - - def __init__(self, user_id: int): - """Initialize user profile with ID.""" - self.user_id = user_id - self.data = {} - - def update_data(self, data: Dict[str, Any]) -> None: - """ - Update user profile data. - - Args: - data: Dictionary of user data to update - """ - self.data.update(data) -''' - - symbols = [ - Symbol(name="validate_email", kind="function", range=(11, 23)), - Symbol(name="UserProfile", kind="class", range=(26, 44)), - ] - - chunks = chunker.chunk_file(content, symbols, "users.py", "python") - - docstring_chunks = [c for c in chunks if c.metadata.get("chunk_type") == "docstring"] - code_chunks = [c for c in chunks if c.metadata.get("chunk_type") == "code"] - - # Verify docstrings are isolated - assert len(docstring_chunks) >= 4 # Module, function, class, methods - assert len(code_chunks) >= 1 # At least one code chunk - - # Verify >98% docstring isolation - # Count total docstring lines in original - total_docstring_lines = sum( - d[2] - d[1] + 1 - for d in DocstringExtractor.extract_python_docstrings(content) - ) - isolated_docstring_lines = sum( - c.metadata["end_line"] - c.metadata["start_line"] + 1 - for c in docstring_chunks - ) - - isolation_rate = isolated_docstring_lines / total_docstring_lines if total_docstring_lines > 0 else 1 - assert isolation_rate >= 0.98 - - def test_hybrid_chunker_performance_overhead(self): - """Test that hybrid chunker has <5% overhead vs base chunker on files without docstrings.""" - import time - - config = ChunkConfig(min_chunk_size=5) - - # Create larger content with NO docstrings (worst case for hybrid chunker) - lines = [] - for i in range(1000): - lines.append(f'def func{i}():\n') - lines.append(f' x = {i}\n') - lines.append(f' y = {i * 2}\n') - lines.append(f' return x + y\n') - lines.append('\n') - content = "".join(lines) - - symbols = [ - Symbol(name=f"func{i}", kind="function", range=(1 + i*5, 4 + i*5)) - for i in range(1000) - ] - - # Warm up - base_chunker = Chunker(config=config) - base_chunker.chunk_file(content[:100], symbols[:10], "test.py", "python") - - hybrid_chunker = HybridChunker(config=config) - hybrid_chunker.chunk_file(content[:100], symbols[:10], "test.py", "python") - - # Measure base chunker (3 runs) - base_times = [] - for _ in range(3): - start = time.perf_counter() - base_chunker.chunk_file(content, symbols, "test.py", "python") - base_times.append(time.perf_counter() - start) - base_time = sum(base_times) / len(base_times) - - # Measure hybrid chunker (3 runs) - hybrid_times = [] - for _ in range(3): - start = time.perf_counter() - hybrid_chunker.chunk_file(content, symbols, "test.py", "python") - hybrid_times.append(time.perf_counter() - start) - hybrid_time = sum(hybrid_times) / len(hybrid_times) - - # Calculate overhead - overhead = ((hybrid_time - base_time) / base_time) * 100 if base_time > 0 else 0 - - # Verify <15% overhead (reasonable threshold for performance tests with system variance) - assert overhead < 15.0, f"Overhead {overhead:.2f}% exceeds 15% threshold (base={base_time:.4f}s, hybrid={hybrid_time:.4f}s)" - - -class TestHybridChunkerV1Optimizations: - """Tests for v1.0 optimization behaviors (parent metadata + determinism).""" - - def test_merged_docstring_metadata(self): - """Docstring chunks include parent_symbol metadata when applicable.""" - config = ChunkConfig(min_chunk_size=1) - chunker = HybridChunker(config=config) - - content = '''"""Module docstring.""" - -def hello(): - """Function docstring.""" - return 1 -''' - symbols = [Symbol(name="hello", kind="function", range=(3, 5))] - - chunks = chunker.chunk_file(content, symbols, "m.py", "python") - func_doc_chunks = [ - c for c in chunks - if c.metadata.get("chunk_type") == "docstring" and c.metadata.get("start_line") == 4 - ] - assert len(func_doc_chunks) == 1 - assert func_doc_chunks[0].metadata.get("parent_symbol") == "hello" - assert func_doc_chunks[0].metadata.get("parent_symbol_kind") == "function" - - def test_deterministic_chunk_boundaries(self): - """Chunk boundaries are stable across repeated runs on identical input.""" - config = ChunkConfig(max_chunk_size=80, overlap=10, min_chunk_size=1) - chunker = HybridChunker(config=config) - - # No docstrings, no symbols -> sliding window path. - content = "\n".join([f"line {i}: x = {i}" for i in range(1, 200)]) + "\n" - - boundaries = [] - for _ in range(3): - chunks = chunker.chunk_file(content, [], "deterministic.py", "python") - boundaries.append([ - (c.metadata.get("start_line"), c.metadata.get("end_line")) - for c in chunks - if c.metadata.get("chunk_type") == "code" - ]) - - assert boundaries[0] == boundaries[1] == boundaries[2] - - def test_orphan_docstrings(self): - """Module-level docstrings remain standalone (no parent_symbol assigned).""" - config = ChunkConfig(min_chunk_size=1) - chunker = HybridChunker(config=config) - - content = '''"""Module-level docstring.""" - -def hello(): - """Function docstring.""" - return 1 -''' - symbols = [Symbol(name="hello", kind="function", range=(3, 5))] - chunks = chunker.chunk_file(content, symbols, "orphan.py", "python") - - module_doc = [ - c for c in chunks - if c.metadata.get("chunk_type") == "docstring" and c.metadata.get("start_line") == 1 - ] - assert len(module_doc) == 1 - assert module_doc[0].metadata.get("parent_symbol") is None - - code_chunks = [c for c in chunks if c.metadata.get("chunk_type") == "code"] - assert code_chunks, "Expected at least one code chunk" - assert all("Module-level docstring" not in c.content for c in code_chunks) diff --git a/codex-lens/tests/test_hybrid_search_e2e.py b/codex-lens/tests/test_hybrid_search_e2e.py deleted file mode 100644 index 131aad14..00000000 --- a/codex-lens/tests/test_hybrid_search_e2e.py +++ /dev/null @@ -1,945 +0,0 @@ -"""End-to-end tests for hybrid search workflows (P2). - -Tests complete hybrid search pipeline including indexing, exact/fuzzy/hybrid modes, -and result relevance with real project structure. -""" - -import sqlite3 -import tempfile -from pathlib import Path - -import pytest - -from codexlens.config import Config -from codexlens.entities import SearchResult -from codexlens.search.hybrid_search import HybridSearchEngine -from codexlens.storage.dir_index import DirIndexStore - -# Check if pytest-benchmark is available -try: - import pytest_benchmark - BENCHMARK_AVAILABLE = True -except ImportError: - BENCHMARK_AVAILABLE = False - - -class TestHybridSearchBasics: - """Basic tests for HybridSearchEngine.""" - - @pytest.fixture - def temp_db(self): - """Create temporary database.""" - with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: - db_path = Path(f.name) - yield db_path - if db_path.exists(): - db_path.unlink() - - @pytest.fixture - def index_store(self, temp_db): - """Create DirIndexStore instance.""" - store = DirIndexStore(temp_db) - yield store - store.close() - - def test_engine_initialization(self): - """Test HybridSearchEngine initializes with default weights.""" - engine = HybridSearchEngine() - assert engine.weights == HybridSearchEngine.DEFAULT_WEIGHTS - assert engine.weights["exact"] == 0.3 - assert engine.weights["fuzzy"] == 0.1 - assert engine.weights["vector"] == 0.6 - - def test_engine_custom_weights(self): - """Test HybridSearchEngine accepts custom weights.""" - custom_weights = {"exact": 0.5, "fuzzy": 0.5, "vector": 0.0} - engine = HybridSearchEngine(weights=custom_weights) - assert engine.weights == custom_weights - - def test_search_requires_index(self, temp_db): - """Test search requires initialized index.""" - engine = HybridSearchEngine() - # Empty database - should handle gracefully - results = engine.search(temp_db, "test", limit=10) - # May return empty or raise error - either is acceptable - assert isinstance(results, list) - - -class TestHybridSearchWithSampleProject: - """Tests with sample project structure.""" - - @pytest.fixture - def sample_project_db(self): - """Create database with sample Python + TypeScript project.""" - with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: - db_path = Path(f.name) - - store = DirIndexStore(db_path) - store.initialize() - - # Sample Python files - python_files = { - "src/auth/authentication.py": """ -def authenticate_user(username, password): - '''Authenticate user with credentials''' - return check_credentials(username, password) - -def check_credentials(user, pwd): - return True -""", - "src/auth/authorization.py": """ -def authorize_user(user_id, resource): - '''Authorize user access to resource''' - return check_permissions(user_id, resource) - -def check_permissions(uid, res): - return True -""", - "src/models/user.py": """ -class User: - def __init__(self, username, email): - self.username = username - self.email = email - - def authenticate(self, password): - return authenticate_user(self.username, password) -""", - "src/api/user_api.py": """ -from flask import Flask, request - -def get_user_by_id(user_id): - '''Get user by ID''' - return User.query.get(user_id) - -def create_user(username, email): - '''Create new user''' - return User(username, email) -""", - } - - # Sample TypeScript files - typescript_files = { - "frontend/auth/AuthService.ts": """ -export class AuthService { - authenticateUser(username: string, password: string): boolean { - return this.checkCredentials(username, password); - } - - private checkCredentials(user: string, pwd: string): boolean { - return true; - } -} -""", - "frontend/models/User.ts": """ -export interface User { - id: number; - username: string; - email: string; -} - -export class UserModel { - constructor(private user: User) {} - - authenticate(password: string): boolean { - return new AuthService().authenticateUser(this.user.username, password); - } -} -""", - } - - # Index all files - with store._get_connection() as conn: - for path, content in {**python_files, **typescript_files}.items(): - lang = "python" if path.endswith(".py") else "typescript" - name = path.split('/')[-1] - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - (name, path, content, lang, 0.0) - ) - conn.commit() - - yield db_path - store.close() - - if db_path.exists(): - db_path.unlink() - - def test_exact_search_mode(self, sample_project_db): - """Test exact FTS search mode.""" - engine = HybridSearchEngine() - - # Search for "authenticate" - results = engine.search( - sample_project_db, - "authenticate", - limit=10, - enable_fuzzy=False, - enable_vector=False - ) - - assert len(results) > 0, "Should find matches for 'authenticate'" - # Check results contain expected files - paths = [r.path for r in results] - assert any("authentication.py" in p for p in paths) - - def test_fuzzy_search_mode(self, sample_project_db): - """Test fuzzy FTS search mode.""" - engine = HybridSearchEngine() - - # Search with typo: "authentcate" (missing 'i') - results = engine.search( - sample_project_db, - "authentcate", - limit=10, - enable_fuzzy=True, - enable_vector=False - ) - - # Fuzzy search should still find matches - assert isinstance(results, list) - # May or may not find matches depending on trigram support - - def test_hybrid_search_mode(self, sample_project_db): - """Test hybrid search combines exact and fuzzy.""" - engine = HybridSearchEngine() - - # Hybrid search - results = engine.search( - sample_project_db, - "authenticate", - limit=10, - enable_fuzzy=True, - enable_vector=False - ) - - assert len(results) > 0, "Hybrid search should find matches" - # Results should have fusion scores - for result in results: - assert result.score > 0, "Results should have fusion scores" - - def test_camelcase_query_expansion(self, sample_project_db): - """Test CamelCase query expansion improves recall.""" - engine = HybridSearchEngine() - - # Search for "AuthService" (CamelCase) - results = engine.search( - sample_project_db, - "AuthService", - limit=10, - enable_fuzzy=False - ) - - # Should find TypeScript AuthService class - paths = [r.path for r in results] - assert any("AuthService.ts" in p for p in paths), \ - "Should find AuthService with CamelCase query" - - def test_snake_case_query_expansion(self, sample_project_db): - """Test snake_case query expansion improves recall.""" - engine = HybridSearchEngine() - - # Search for "get_user_by_id" (snake_case) - results = engine.search( - sample_project_db, - "get_user_by_id", - limit=10, - enable_fuzzy=False - ) - - # Should find Python function - paths = [r.path for r in results] - assert any("user_api.py" in p for p in paths), \ - "Should find get_user_by_id with snake_case query" - - def test_partial_identifier_match(self, sample_project_db): - """Test partial identifier matching with query expansion.""" - engine = HybridSearchEngine() - - # Search for just "User" (part of UserModel, User class, etc.) - results = engine.search( - sample_project_db, - "User", - limit=10, - enable_fuzzy=False - ) - - assert len(results) > 0, "Should find matches for 'User'" - # Should find multiple files with User in name - paths = [r.path for r in results] - assert len([p for p in paths if "user" in p.lower()]) > 0 - - -class TestHybridSearchRelevance: - """Tests for result relevance and ranking.""" - - @pytest.fixture - def relevance_db(self): - """Create database for testing relevance ranking.""" - with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: - db_path = Path(f.name) - - store = DirIndexStore(db_path) - store.initialize() - - # Files with varying relevance to "authentication" - files = { - "auth/authentication.py": """ -# Primary authentication module -def authenticate_user(username, password): - '''Main authentication function''' - pass - -def validate_authentication(token): - pass -""", - "auth/auth_helpers.py": """ -# Helper functions for authentication -def hash_password(password): - pass - -def verify_authentication_token(token): - pass -""", - "models/user.py": """ -# User model (mentions authentication once) -class User: - def check_authentication(self): - pass -""", - "utils/logging.py": """ -# Logging utility (no authentication mention) -def log_message(msg): - pass -""", - } - - with store._get_connection() as conn: - for path, content in files.items(): - name = path.split('/')[-1] - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - (name, path, content, "python", 0.0) - ) - conn.commit() - - yield db_path - store.close() - - if db_path.exists(): - db_path.unlink() - - def test_exact_match_ranks_higher(self, relevance_db): - """Test files with exact term matches rank higher.""" - engine = HybridSearchEngine() - - results = engine.search( - relevance_db, - "authentication", - limit=10, - enable_fuzzy=False - ) - - # First result should be authentication.py (most mentions) - assert len(results) > 0 - assert "authentication.py" in results[0].path, \ - "File with most mentions should rank first" - - def test_hybrid_fusion_improves_ranking(self, relevance_db): - """Test hybrid RRF fusion improves ranking over single source.""" - engine = HybridSearchEngine() - - # Exact only - exact_results = engine.search( - relevance_db, - "authentication", - limit=5, - enable_fuzzy=False - ) - - # Hybrid - hybrid_results = engine.search( - relevance_db, - "authentication", - limit=5, - enable_fuzzy=True - ) - - # Both should find matches - assert len(exact_results) > 0 - assert len(hybrid_results) > 0 - - # Hybrid may rerank results - assert isinstance(hybrid_results[0], SearchResult) - - -class TestHybridSearchPerformance: - """Performance tests for hybrid search.""" - - @pytest.fixture - def large_project_db(self): - """Create database with many files.""" - with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: - db_path = Path(f.name) - - store = DirIndexStore(db_path) - store.initialize() - - # Create 100 test files - with store._get_connection() as conn: - for i in range(100): - content = f""" -def function_{i}(param): - '''Test function {i}''' - return authenticate_user(param) - -class Class{i}: - def method_{i}(self): - pass -""" - path = f"src/module_{i}.py" - name = f"module_{i}.py" - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - (name, path, content, "python", 0.0) - ) - conn.commit() - - yield db_path - store.close() - - if db_path.exists(): - db_path.unlink() - - @pytest.mark.skipif(not BENCHMARK_AVAILABLE, reason="pytest-benchmark not installed") - def test_search_latency(self, large_project_db, benchmark): - """Benchmark search latency.""" - engine = HybridSearchEngine() - - def search_query(): - return engine.search( - large_project_db, - "authenticate", - limit=20, - enable_fuzzy=True - ) - - # Should complete in reasonable time - results = benchmark(search_query) - assert isinstance(results, list) - - def test_hybrid_overhead(self, large_project_db): - """Test hybrid search overhead vs exact search.""" - engine = HybridSearchEngine() - - import time - - # Measure exact search time - start = time.time() - exact_results = engine.search( - large_project_db, - "authenticate", - limit=20, - enable_fuzzy=False - ) - exact_time = time.time() - start - - # Measure hybrid search time - start = time.time() - hybrid_results = engine.search( - large_project_db, - "authenticate", - limit=20, - enable_fuzzy=True - ) - hybrid_time = time.time() - start - - # Hybrid should be <10x slower than exact (relaxed for CI stability and ANN initialization overhead) - if exact_time > 0: - overhead = hybrid_time / exact_time - assert overhead < 10.0, f"Hybrid overhead {overhead:.1f}x should be <10x" - - -class TestHybridSearchEdgeCases: - """Edge case tests for hybrid search.""" - - @pytest.fixture - def temp_db(self): - """Create temporary database.""" - with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: - db_path = Path(f.name) - - # Initialize with schema - DirIndexStore(db_path) - - yield db_path - # Ignore file deletion errors on Windows (SQLite file lock) - try: - if db_path.exists(): - db_path.unlink() - except PermissionError: - pass - - def test_empty_index_search(self, temp_db): - """Test search on empty index returns empty results.""" - engine = HybridSearchEngine() - - results = engine.search(temp_db, "test", limit=10) - assert results == [] or isinstance(results, list) - - def test_no_matches_query(self, temp_db): - """Test query with no matches returns empty results.""" - store = DirIndexStore(temp_db) - store.initialize() - - try: - # Index one file - with store._get_connection() as conn: - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - ("test.py", "test.py", "def hello(): pass", "python", 0.0) - ) - conn.commit() - - engine = HybridSearchEngine() - results = engine.search(temp_db, "nonexistent", limit=10) - - assert results == [] or len(results) == 0 - finally: - store.close() - - def test_special_characters_in_query(self, temp_db): - """Test queries with special characters are handled.""" - store = DirIndexStore(temp_db) - store.initialize() - - try: - # Index file - with store._get_connection() as conn: - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - ("test.py", "test.py", "def test(): pass", "python", 0.0) - ) - conn.commit() - - engine = HybridSearchEngine() - - # Query with special chars should not crash - queries = ["test*", "test?", "test&", "test|"] - for query in queries: - try: - results = engine.search(temp_db, query, limit=10) - assert isinstance(results, list) - except Exception: - # Some queries may be invalid FTS5 syntax - that's OK - pass - finally: - store.close() - - def test_very_long_query(self, temp_db): - """Test very long queries are handled.""" - store = DirIndexStore(temp_db) - store.initialize() - - try: - # Index file - with store._get_connection() as conn: - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - ("test.py", "test.py", "def test(): pass", "python", 0.0) - ) - conn.commit() - - engine = HybridSearchEngine() - - # Very long query - long_query = "test " * 100 - results = engine.search(temp_db, long_query, limit=10) - assert isinstance(results, list) - finally: - store.close() - - def test_unicode_query(self, temp_db): - """Test Unicode queries are handled.""" - store = DirIndexStore(temp_db) - store.initialize() - - try: - # Index file with Unicode content - with store._get_connection() as conn: - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - ("test.py", "test.py", "def 测试函数(): pass", "python", 0.0) - ) - conn.commit() - - engine = HybridSearchEngine() - - # Unicode query - results = engine.search(temp_db, "测试", limit=10) - assert isinstance(results, list) - finally: - store.close() - - -class TestHybridSearchIntegration: - """Integration tests for complete workflow.""" - - @pytest.fixture - def project_db(self): - """Create realistic project database.""" - with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: - db_path = Path(f.name) - - store = DirIndexStore(db_path) - store.initialize() - - # Realistic project structure - files = { - "src/authentication/login.py": "def login_user(username, password): pass", - "src/authentication/logout.py": "def logout_user(session_id): pass", - "src/authorization/permissions.py": "def check_permission(user, resource): pass", - "src/models/user_model.py": "class UserModel: pass", - "src/api/auth_api.py": "def authenticate_api(token): pass", - "tests/test_auth.py": "def test_authentication(): pass", - } - - with store._get_connection() as conn: - for path, content in files.items(): - name = path.split('/')[-1] - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - (name, path, content, "python", 0.0) - ) - conn.commit() - - yield db_path - store.close() - - if db_path.exists(): - db_path.unlink() - - def test_workflow_index_search_refine(self, project_db): - """Test complete workflow: index → search → refine.""" - engine = HybridSearchEngine() - - # Initial broad search - results = engine.search(project_db, "auth", limit=20) - assert len(results) > 0 - - # Refined search - refined = engine.search(project_db, "authentication", limit=10) - assert len(refined) > 0 - - # Most refined search - specific = engine.search(project_db, "login_user", limit=5) - # May or may not find exact match depending on query expansion - - def test_consistency_across_searches(self, project_db): - """Test search results are consistent across multiple calls.""" - engine = HybridSearchEngine() - - # Same query multiple times - results1 = engine.search(project_db, "authenticate", limit=10) - results2 = engine.search(project_db, "authenticate", limit=10) - - # Should return same results (same order) - assert len(results1) == len(results2) - if len(results1) > 0: - assert results1[0].path == results2[0].path - - -@pytest.mark.integration -class TestHybridSearchFullCoverage: - """Full coverage integration tests.""" - - def test_all_modes_with_real_project(self): - """Test all search modes (exact, fuzzy, hybrid) with realistic project.""" - with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: - db_path = Path(f.name) - - store = None - try: - store = DirIndexStore(db_path) - store.initialize() - - # Create comprehensive test project - files = { - "auth.py": "def authenticate(): pass", - "authz.py": "def authorize(): pass", - "user.py": "class User: pass", - } - - with store._get_connection() as conn: - for path, content in files.items(): - name = path.split('/')[-1] - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - (name, path, content, "python", 0.0) - ) - conn.commit() - - engine = HybridSearchEngine() - - # Test exact mode - exact = engine.search(db_path, "authenticate", enable_fuzzy=False) - assert isinstance(exact, list) - - # Test fuzzy mode - fuzzy = engine.search(db_path, "authenticate", enable_fuzzy=True) - assert isinstance(fuzzy, list) - - # Test hybrid mode (default) - hybrid = engine.search(db_path, "authenticate") - assert isinstance(hybrid, list) - - finally: - if store: - store.close() - if db_path.exists(): - db_path.unlink() - - - -class TestHybridSearchWithVectorMock: - """Tests for hybrid search with mocked vector search.""" - - @pytest.fixture - def mock_vector_db(self): - """Create database with vector search mocked.""" - with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: - db_path = Path(f.name) - - store = DirIndexStore(db_path) - store.initialize() - - # Index sample files - files = { - "auth/login.py": "def login_user(username, password): authenticate()", - "auth/logout.py": "def logout_user(session): cleanup_session()", - "user/profile.py": "class UserProfile: def get_data(): pass" - } - - with store._get_connection() as conn: - for path, content in files.items(): - name = path.split('/')[-1] - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - (name, path, content, "python", 0.0) - ) - conn.commit() - - yield db_path - store.close() - - if db_path.exists(): - db_path.unlink() - - def test_hybrid_with_vector_enabled(self, mock_vector_db): - """Test hybrid search with vector search enabled (mocked).""" - from unittest.mock import patch, MagicMock - - # Mock the vector search to return fake results - mock_vector_results = [ - SearchResult(path="auth/login.py", score=0.95, content_snippet="login"), - SearchResult(path="user/profile.py", score=0.75, content_snippet="profile") - ] - - engine = HybridSearchEngine() - - # Mock vector search method if it exists - with patch.object(engine, '_search_vector', return_value=mock_vector_results) if hasattr(engine, '_search_vector') else patch('codexlens.search.hybrid_search.vector_search', return_value=mock_vector_results): - results = engine.search( - mock_vector_db, - "login", - limit=10, - enable_fuzzy=True, - enable_vector=True # ENABLE vector search - ) - - # Should get results from RRF fusion of exact + fuzzy + vector - assert isinstance(results, list) - assert len(results) > 0, "Hybrid search with vector should return results" - - # Results should have fusion scores - for result in results: - assert hasattr(result, 'score') - assert result.score > 0 # RRF fusion scores are positive - - -class TestHybridSearchAdaptiveWeights: - """Integration tests for adaptive RRF weights + reranking gating.""" - - def test_adaptive_weights_code_query(self): - """Exact weight should dominate for code-like queries.""" - from unittest.mock import patch - - engine = HybridSearchEngine() - - results_map = { - "exact": [SearchResult(path="a.py", score=10.0, excerpt="a")], - "fuzzy": [SearchResult(path="b.py", score=9.0, excerpt="b")], - "vector": [SearchResult(path="c.py", score=0.9, excerpt="c")], - } - - captured = {} - from codexlens.search import ranking as ranking_module - - def capture_rrf(map_in, weights_in, k=60): - captured["weights"] = dict(weights_in) - return ranking_module.reciprocal_rank_fusion(map_in, weights_in, k=k) - - with patch.object(HybridSearchEngine, "_search_parallel", return_value=results_map), patch( - "codexlens.search.hybrid_search.reciprocal_rank_fusion", - side_effect=capture_rrf, - ): - engine.search(Path("dummy.db"), "def authenticate", enable_vector=True) - - assert captured["weights"]["exact"] > 0.4 - - def test_adaptive_weights_nl_query(self): - """Vector weight should dominate for natural-language queries.""" - from unittest.mock import patch - - engine = HybridSearchEngine() - - results_map = { - "exact": [SearchResult(path="a.py", score=10.0, excerpt="a")], - "fuzzy": [SearchResult(path="b.py", score=9.0, excerpt="b")], - "vector": [SearchResult(path="c.py", score=0.9, excerpt="c")], - } - - captured = {} - from codexlens.search import ranking as ranking_module - - def capture_rrf(map_in, weights_in, k=60): - captured["weights"] = dict(weights_in) - return ranking_module.reciprocal_rank_fusion(map_in, weights_in, k=k) - - with patch.object(HybridSearchEngine, "_search_parallel", return_value=results_map), patch( - "codexlens.search.hybrid_search.reciprocal_rank_fusion", - side_effect=capture_rrf, - ): - engine.search(Path("dummy.db"), "how to handle user login", enable_vector=True) - - assert captured["weights"]["vector"] > 0.6 - - def test_default_engine_weights_keep_lsp_graph_backend_available(self): - """Legacy public defaults should not discard LSP graph fusion weights internally.""" - from unittest.mock import patch - - engine = HybridSearchEngine() - - results_map = { - "exact": [SearchResult(path="a.py", score=10.0, excerpt="a")], - "fuzzy": [SearchResult(path="b.py", score=9.0, excerpt="b")], - "vector": [SearchResult(path="c.py", score=0.9, excerpt="c")], - "lsp_graph": [SearchResult(path="d.py", score=0.8, excerpt="d")], - } - - captured = {} - from codexlens.search import ranking as ranking_module - - def capture_rrf(map_in, weights_in, k=60): - captured["weights"] = dict(weights_in) - return ranking_module.reciprocal_rank_fusion(map_in, weights_in, k=k) - - with patch.object(HybridSearchEngine, "_search_parallel", return_value=results_map), patch( - "codexlens.search.hybrid_search.reciprocal_rank_fusion", - side_effect=capture_rrf, - ): - engine.search(Path("dummy.db"), "auth flow", enable_vector=True, enable_lsp_graph=True) - - assert engine.weights == HybridSearchEngine.DEFAULT_WEIGHTS - assert "lsp_graph" in captured["weights"] - assert captured["weights"]["lsp_graph"] > 0.0 - - def test_reranking_enabled(self, tmp_path): - """Reranking runs only when explicitly enabled via config.""" - from unittest.mock import patch - - results_map = { - "exact": [SearchResult(path="a.py", score=10.0, excerpt="a")], - "fuzzy": [SearchResult(path="b.py", score=9.0, excerpt="b")], - "vector": [SearchResult(path="c.py", score=0.9, excerpt="c")], - } - - class DummyEmbedder: - def embed(self, texts): - if isinstance(texts, str): - texts = [texts] - return [[1.0, 0.0] for _ in texts] - - # Disabled: should not invoke rerank_results - config_off = Config(data_dir=tmp_path / "off", enable_reranking=False) - engine_off = HybridSearchEngine(config=config_off, embedder=DummyEmbedder()) - - with patch.object(HybridSearchEngine, "_search_parallel", return_value=results_map), patch( - "codexlens.search.hybrid_search.rerank_results" - ) as rerank_mock: - engine_off.search(Path("dummy.db"), "query", enable_vector=True) - rerank_mock.assert_not_called() - - # Enabled: should invoke rerank_results once - config_on = Config(data_dir=tmp_path / "on", enable_reranking=True, reranking_top_k=10) - engine_on = HybridSearchEngine(config=config_on, embedder=DummyEmbedder()) - - with patch.object(HybridSearchEngine, "_search_parallel", return_value=results_map), patch( - "codexlens.search.hybrid_search.rerank_results", - side_effect=lambda q, r, e, top_k=50: r, - ) as rerank_mock: - engine_on.search(Path("dummy.db"), "query", enable_vector=True) - assert rerank_mock.call_count == 1 - - def test_cross_encoder_reranking_enabled(self, tmp_path): - """Cross-encoder stage runs only when explicitly enabled via config.""" - from unittest.mock import patch - - results_map = { - "exact": [SearchResult(path="a.py", score=10.0, excerpt="a")], - "fuzzy": [SearchResult(path="b.py", score=9.0, excerpt="b")], - "vector": [SearchResult(path="c.py", score=0.9, excerpt="c")], - } - - class DummyEmbedder: - def embed(self, texts): - if isinstance(texts, str): - texts = [texts] - return [[1.0, 0.0] for _ in texts] - - class DummyReranker: - def score_pairs(self, pairs, batch_size=32): - return [0.0 for _ in pairs] - - config = Config( - data_dir=tmp_path / "ce", - enable_reranking=True, - enable_cross_encoder_rerank=True, - reranker_top_k=10, - ) - engine = HybridSearchEngine(config=config, embedder=DummyEmbedder()) - - with patch.object(HybridSearchEngine, "_search_parallel", return_value=results_map), patch( - "codexlens.search.hybrid_search.rerank_results", - side_effect=lambda q, r, e, top_k=50: r, - ) as rerank_mock, patch.object( - HybridSearchEngine, - "_get_cross_encoder_reranker", - return_value=DummyReranker(), - ) as get_ce_mock, patch( - "codexlens.search.hybrid_search.cross_encoder_rerank", - side_effect=lambda q, r, ce, top_k=50: r, - ) as ce_mock: - engine.search(Path("dummy.db"), "query", enable_vector=True) - assert rerank_mock.call_count == 1 - assert get_ce_mock.call_count == 1 - assert ce_mock.call_count == 1 diff --git a/codex-lens/tests/test_hybrid_search_reranker_backend.py b/codex-lens/tests/test_hybrid_search_reranker_backend.py deleted file mode 100644 index 1e832640..00000000 --- a/codex-lens/tests/test_hybrid_search_reranker_backend.py +++ /dev/null @@ -1,192 +0,0 @@ -"""Tests for HybridSearchEngine reranker backend selection.""" - -from __future__ import annotations - -import pytest - -from codexlens.config import Config -from codexlens.search.hybrid_search import HybridSearchEngine - - -def test_get_cross_encoder_reranker_uses_factory_backend_legacy( - monkeypatch: pytest.MonkeyPatch, - tmp_path, -) -> None: - calls: dict[str, object] = {} - - def fake_check_reranker_available(backend: str): - calls["check_backend"] = backend - return True, None - - sentinel = object() - - def fake_get_reranker(*, backend: str, model_name=None, device=None, **kwargs): - calls["get_args"] = { - "backend": backend, - "model_name": model_name, - "device": device, - "kwargs": kwargs, - } - return sentinel - - monkeypatch.setattr( - "codexlens.semantic.reranker.check_reranker_available", - fake_check_reranker_available, - ) - monkeypatch.setattr( - "codexlens.semantic.reranker.get_reranker", - fake_get_reranker, - ) - - config = Config( - data_dir=tmp_path / "legacy", - enable_reranking=True, - enable_cross_encoder_rerank=True, - reranker_backend="legacy", - reranker_model="dummy-model", - ) - engine = HybridSearchEngine(config=config) - - reranker = engine._get_cross_encoder_reranker() - assert reranker is sentinel - assert calls["check_backend"] == "legacy" - - get_args = calls["get_args"] - assert isinstance(get_args, dict) - assert get_args["backend"] == "legacy" - assert get_args["model_name"] == "dummy-model" - assert get_args["device"] is None - - -def test_get_cross_encoder_reranker_uses_factory_backend_onnx_gpu_flag( - monkeypatch: pytest.MonkeyPatch, - tmp_path, -) -> None: - calls: dict[str, object] = {} - - def fake_check_reranker_available(backend: str): - calls["check_backend"] = backend - return True, None - - sentinel = object() - - def fake_get_reranker(*, backend: str, model_name=None, device=None, **kwargs): - calls["get_args"] = { - "backend": backend, - "model_name": model_name, - "device": device, - "kwargs": kwargs, - } - return sentinel - - monkeypatch.setattr( - "codexlens.semantic.reranker.check_reranker_available", - fake_check_reranker_available, - ) - monkeypatch.setattr( - "codexlens.semantic.reranker.get_reranker", - fake_get_reranker, - ) - - config = Config( - data_dir=tmp_path / "onnx", - enable_reranking=True, - enable_cross_encoder_rerank=True, - reranker_backend="onnx", - embedding_use_gpu=True, - reranker_use_gpu=False, - ) - engine = HybridSearchEngine(config=config) - - reranker = engine._get_cross_encoder_reranker() - assert reranker is sentinel - assert calls["check_backend"] == "onnx" - - get_args = calls["get_args"] - assert isinstance(get_args, dict) - assert get_args["backend"] == "onnx" - assert get_args["model_name"] is None - assert get_args["device"] is None - assert get_args["kwargs"]["use_gpu"] is False - - -def test_get_cross_encoder_reranker_uses_cpu_device_for_legacy_when_reranker_gpu_disabled( - monkeypatch: pytest.MonkeyPatch, - tmp_path, -) -> None: - calls: dict[str, object] = {} - - def fake_check_reranker_available(backend: str): - calls["check_backend"] = backend - return True, None - - sentinel = object() - - def fake_get_reranker(*, backend: str, model_name=None, device=None, **kwargs): - calls["get_args"] = { - "backend": backend, - "model_name": model_name, - "device": device, - "kwargs": kwargs, - } - return sentinel - - monkeypatch.setattr( - "codexlens.semantic.reranker.check_reranker_available", - fake_check_reranker_available, - ) - monkeypatch.setattr( - "codexlens.semantic.reranker.get_reranker", - fake_get_reranker, - ) - - config = Config( - data_dir=tmp_path / "legacy-cpu", - enable_reranking=True, - enable_cross_encoder_rerank=True, - reranker_backend="legacy", - reranker_model="dummy-model", - embedding_use_gpu=True, - reranker_use_gpu=False, - ) - engine = HybridSearchEngine(config=config) - - reranker = engine._get_cross_encoder_reranker() - assert reranker is sentinel - assert calls["check_backend"] == "legacy" - - get_args = calls["get_args"] - assert isinstance(get_args, dict) - assert get_args["backend"] == "legacy" - assert get_args["model_name"] == "dummy-model" - assert get_args["device"] == "cpu" - - -def test_get_cross_encoder_reranker_returns_none_when_backend_unavailable( - monkeypatch: pytest.MonkeyPatch, - tmp_path, -) -> None: - def fake_check_reranker_available(backend: str): - return False, "missing deps" - - def fake_get_reranker(*args, **kwargs): - raise AssertionError("get_reranker should not be called when backend is unavailable") - - monkeypatch.setattr( - "codexlens.semantic.reranker.check_reranker_available", - fake_check_reranker_available, - ) - monkeypatch.setattr( - "codexlens.semantic.reranker.get_reranker", - fake_get_reranker, - ) - - config = Config( - data_dir=tmp_path / "unavailable", - enable_reranking=True, - enable_cross_encoder_rerank=True, - reranker_backend="onnx", - ) - engine = HybridSearchEngine(config=config) - - assert engine._get_cross_encoder_reranker() is None diff --git a/codex-lens/tests/test_hybrid_search_unit.py b/codex-lens/tests/test_hybrid_search_unit.py deleted file mode 100644 index 5c485291..00000000 --- a/codex-lens/tests/test_hybrid_search_unit.py +++ /dev/null @@ -1,635 +0,0 @@ -"""Unit tests for HybridSearchEngine - parallel search and RRF fusion. - -Tests cover: -- search: exact only, fuzzy enabled, vector enabled, pure vector mode -- search: RRF fusion, empty query, no results, reranking, category filtering -- _search_parallel: parallel backend execution -- _search_lsp_graph: LSP graph expansion with seeds, vector-to-FTS fallback -""" - -from __future__ import annotations - -import tempfile -from pathlib import Path -from typing import Dict, List -from unittest.mock import MagicMock, Mock, patch, PropertyMock - -import pytest - -from codexlens.config import Config -from codexlens.entities import SearchResult -from codexlens.search.hybrid_search import HybridSearchEngine - - -# ============================================================================= -# Test Fixtures -# ============================================================================= - - -@pytest.fixture -def temp_paths(): - """Create temporary directory structure with a mock index.""" - tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True) - root = Path(tmpdir.name) - # Create a non-empty index file to pass the empty-file guard - index_path = root / "_index.db" - index_path.write_bytes(b"\x00" * 100) - yield root - try: - tmpdir.cleanup() - except (PermissionError, OSError): - pass - - -@pytest.fixture -def mock_config(): - """Create mock config for hybrid search.""" - config = MagicMock(spec=Config) - config.embedding_use_gpu = False - config.enable_reranking = False - config.enable_cross_encoder_rerank = False - config.symbol_boost_factor = 1.5 - config.fusion_method = "rrf" - config.rrf_k = 60 - config.enable_category_filter = True - return config - - -@pytest.fixture -def sample_results() -> List[SearchResult]: - """Create sample search results.""" - return [ - SearchResult( - path="auth.py", - score=0.9, - excerpt="def authenticate(user):", - symbol_name="authenticate", - symbol_kind="function", - ), - SearchResult( - path="login.py", - score=0.7, - excerpt="class LoginHandler:", - symbol_name="LoginHandler", - symbol_kind="class", - ), - ] - - -# ============================================================================= -# Tests: search with different backends -# ============================================================================= - - -class TestHybridSearchBackends: - """Tests for HybridSearchEngine.search() backend configurations.""" - - def test_search_exact_only(self, temp_paths, mock_config): - """Search with only exact FTS backend.""" - engine = HybridSearchEngine(config=mock_config) - index_path = temp_paths / "_index.db" - - with patch.object(engine, "_search_parallel") as mock_parallel: - mock_parallel.return_value = { - "exact": [ - SearchResult(path="a.py", score=10.0, excerpt="result"), - ] - } - - results = engine.search( - index_path, "test query", - enable_fuzzy=False, enable_vector=False, - ) - - assert len(results) == 1 - # Verify only exact backend was requested - call_args = mock_parallel.call_args - backends = call_args[0][2] # 3rd positional arg - assert "exact" in backends - assert "fuzzy" not in backends - assert "vector" not in backends - - def test_search_fuzzy_enabled(self, temp_paths, mock_config): - """Search with exact + fuzzy backends.""" - engine = HybridSearchEngine(config=mock_config) - index_path = temp_paths / "_index.db" - - with patch.object(engine, "_search_parallel") as mock_parallel: - mock_parallel.return_value = { - "exact": [SearchResult(path="a.py", score=10.0, excerpt="exact")], - "fuzzy": [SearchResult(path="b.py", score=8.0, excerpt="fuzzy")], - } - - results = engine.search( - index_path, "test_query", - enable_fuzzy=True, enable_vector=False, - ) - - assert len(results) >= 1 - backends = mock_parallel.call_args[0][2] - assert "exact" in backends - assert "fuzzy" in backends - - def test_search_vector_enabled(self, temp_paths, mock_config): - """Search with exact + fuzzy + vector backends.""" - engine = HybridSearchEngine(config=mock_config) - index_path = temp_paths / "_index.db" - - with patch.object(engine, "_search_parallel") as mock_parallel: - mock_parallel.return_value = { - "exact": [SearchResult(path="a.py", score=10.0, excerpt="exact")], - "vector": [SearchResult(path="c.py", score=0.85, excerpt="vector")], - } - - results = engine.search( - index_path, "test_query", - enable_fuzzy=False, enable_vector=True, - ) - - backends = mock_parallel.call_args[0][2] - assert "exact" in backends - assert "vector" in backends - - def test_search_lexical_priority_query_skips_vector_backend(self, temp_paths, mock_config): - """Config/env/factory queries should stay lexical-first in hybrid mode.""" - engine = HybridSearchEngine(config=mock_config) - index_path = temp_paths / "_index.db" - - with patch.object(engine, "_search_parallel") as mock_parallel: - mock_parallel.return_value = { - "exact": [SearchResult(path="config.py", score=10.0, excerpt="exact")], - "fuzzy": [SearchResult(path="env_config.py", score=8.0, excerpt="fuzzy")], - } - - results = engine.search( - index_path, - "embedding backend fastembed local litellm api config", - enable_fuzzy=True, - enable_vector=True, - ) - - assert len(results) >= 1 - backends = mock_parallel.call_args[0][2] - assert "exact" in backends - assert "fuzzy" in backends - assert "vector" not in backends - - def test_search_pure_vector(self, temp_paths, mock_config): - """Pure vector mode should only use vector backend.""" - engine = HybridSearchEngine(config=mock_config) - mock_config.enable_category_filter = False - index_path = temp_paths / "_index.db" - - with patch.object(engine, "_search_parallel") as mock_parallel: - mock_parallel.return_value = { - "vector": [SearchResult(path="a.py", score=0.9, excerpt="vec")], - } - - results = engine.search( - index_path, "semantic query", - enable_vector=True, pure_vector=True, - ) - - backends = mock_parallel.call_args[0][2] - assert "vector" in backends - assert "exact" not in backends - - -# ============================================================================= -# Tests: search fusion and post-processing -# ============================================================================= - - -class TestHybridSearchFusion: - """Tests for RRF fusion, empty query, no results, reranking, filtering.""" - - def test_search_rrf_fusion(self, temp_paths, mock_config): - """Results from multiple backends should be fused via RRF.""" - engine = HybridSearchEngine(config=mock_config) - index_path = temp_paths / "_index.db" - - with patch.object(engine, "_search_parallel") as mock_parallel: - mock_parallel.return_value = { - "exact": [ - SearchResult(path="a.py", score=10.0, excerpt="exact a"), - SearchResult(path="b.py", score=5.0, excerpt="exact b"), - ], - "vector": [ - SearchResult(path="b.py", score=0.9, excerpt="vector b"), - SearchResult(path="c.py", score=0.8, excerpt="vector c"), - ], - } - - results = engine.search( - index_path, "test", - enable_fuzzy=False, enable_vector=True, - ) - - # b.py appears in both sources - should have high fusion score - assert any(r.path == "b.py" for r in results) - - def test_search_empty_query(self, temp_paths, mock_config): - """Empty query should still execute (handled gracefully).""" - engine = HybridSearchEngine(config=mock_config) - index_path = temp_paths / "_index.db" - - with patch.object(engine, "_search_parallel") as mock_parallel: - mock_parallel.return_value = {"exact": []} - - results = engine.search(index_path, "", enable_fuzzy=False) - - assert results == [] - - def test_search_no_results(self, temp_paths, mock_config): - """All backends returning empty should produce empty results.""" - engine = HybridSearchEngine(config=mock_config) - index_path = temp_paths / "_index.db" - - with patch.object(engine, "_search_parallel") as mock_parallel: - mock_parallel.return_value = { - "exact": [], - "fuzzy": [], - } - - results = engine.search(index_path, "nonexistent") - - assert results == [] - - def test_search_reranking(self, temp_paths, mock_config): - """Reranking should be applied when config enables it.""" - mock_config.enable_reranking = True - mock_config.enable_cross_encoder_rerank = False - mock_config.reranking_top_k = 50 - engine = HybridSearchEngine(config=mock_config) - index_path = temp_paths / "_index.db" - - mock_embedder = MagicMock() - mock_embedder.embed_single.return_value = [0.1] * 128 - mock_embedder.embed.return_value = [[0.1] * 128] - engine.embedder = mock_embedder - - with patch.object(engine, "_search_parallel") as mock_parallel: - mock_parallel.return_value = { - "exact": [SearchResult(path="a.py", score=10.0, excerpt="code")], - } - - with patch("codexlens.search.hybrid_search.rerank_results") as mock_rerank: - mock_rerank.return_value = [ - SearchResult(path="a.py", score=0.85, excerpt="code"), - ] - results = engine.search(index_path, "query", enable_fuzzy=False) - - mock_rerank.assert_called_once() - - def test_search_lexical_priority_query_skips_expensive_reranking(self, temp_paths, mock_config): - """Lexical-priority queries should bypass embedder and cross-encoder reranking.""" - mock_config.enable_reranking = True - mock_config.enable_cross_encoder_rerank = True - mock_config.reranking_top_k = 50 - mock_config.reranker_top_k = 20 - engine = HybridSearchEngine(config=mock_config) - index_path = temp_paths / "_index.db" - - with patch.object(engine, "_search_parallel") as mock_parallel: - mock_parallel.return_value = { - "exact": [SearchResult(path="config.py", score=10.0, excerpt="code")], - "fuzzy": [SearchResult(path="env_config.py", score=9.0, excerpt="env vars")], - } - - with patch("codexlens.search.hybrid_search.rerank_results") as mock_rerank, patch( - "codexlens.search.hybrid_search.cross_encoder_rerank" - ) as mock_cross_encoder, patch.object( - engine, - "_get_cross_encoder_reranker", - ) as mock_get_reranker: - results = engine.search( - index_path, - "get_reranker factory onnx backend selection", - enable_fuzzy=True, - enable_vector=True, - ) - - assert len(results) >= 1 - mock_rerank.assert_not_called() - mock_cross_encoder.assert_not_called() - mock_get_reranker.assert_not_called() - - def test_search_category_filtering(self, temp_paths, mock_config): - """Category filtering should separate code/doc results by intent.""" - mock_config.enable_category_filter = True - engine = HybridSearchEngine(config=mock_config) - index_path = temp_paths / "_index.db" - - with patch.object(engine, "_search_parallel") as mock_parallel: - mock_parallel.return_value = { - "exact": [ - SearchResult(path="auth.py", score=10.0, excerpt="def auth"), - SearchResult(path="README.md", score=8.0, excerpt="docs"), - ], - } - - # Keyword-like query should filter to code - results = engine.search( - index_path, "AuthManager", - enable_fuzzy=False, - ) - - paths = [r.path for r in results] - # Code files should remain, doc files filtered for KEYWORD intent - assert "auth.py" in paths - - -# ============================================================================= -# Tests: _search_parallel -# ============================================================================= - - -class TestSearchParallel: - """Tests for _search_parallel() parallel backend execution.""" - - def test_search_parallel_backends(self, temp_paths, mock_config): - """Parallel execution should run all requested backends.""" - engine = HybridSearchEngine(config=mock_config) - index_path = temp_paths / "_index.db" - - with patch.object(engine, "_search_exact") as mock_exact, \ - patch.object(engine, "_search_fuzzy") as mock_fuzzy: - mock_exact.return_value = [ - SearchResult(path="a.py", score=10.0, excerpt="exact"), - ] - mock_fuzzy.return_value = [ - SearchResult(path="b.py", score=8.0, excerpt="fuzzy"), - ] - - results_map = engine._search_parallel( - index_path, "query", - backends={"exact": True, "fuzzy": True}, - limit=10, - ) - - assert "exact" in results_map - assert "fuzzy" in results_map - mock_exact.assert_called_once() - mock_fuzzy.assert_called_once() - - -class TestCentralizedMetadataFetch: - """Tests for centralized metadata retrieval helpers.""" - - def test_fetch_from_vector_meta_store_clamps_negative_scores(self, temp_paths, mock_config, monkeypatch): - engine = HybridSearchEngine(config=mock_config) - - class FakeMetaStore: - def __init__(self, _path): - pass - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc, tb): - return False - - def get_chunks_by_ids(self, _chunk_ids, category=None): - assert category is None - return [ - { - "chunk_id": 7, - "file_path": "src/app.py", - "content": "def app(): pass", - "metadata": {}, - "start_line": 1, - "end_line": 1, - } - ] - - import codexlens.storage.vector_meta_store as vector_meta_store - - monkeypatch.setattr(vector_meta_store, "VectorMetadataStore", FakeMetaStore) - - results = engine._fetch_from_vector_meta_store( - temp_paths / "_vectors_meta.db", - [7], - {7: -0.01}, - ) - - assert len(results) == 1 - assert results[0].path == "src/app.py" - assert results[0].score == 0.0 - - -class TestCentralizedVectorCaching: - """Tests for centralized vector search runtime caches.""" - - def test_search_vector_centralized_reuses_cached_resources( - self, - temp_paths, - mock_config, - ): - engine = HybridSearchEngine(config=mock_config) - hnsw_path = temp_paths / "_vectors.hnsw" - hnsw_path.write_bytes(b"hnsw") - - vector_store_opened: List[Path] = [] - - class FakeVectorStore: - def __init__(self, path): - vector_store_opened.append(Path(path)) - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc, tb): - return False - - def get_model_config(self): - return { - "backend": "fastembed", - "model_name": "BAAI/bge-small-en-v1.5", - "model_profile": "fast", - "embedding_dim": 384, - } - - class FakeEmbedder: - embedding_dim = 384 - - def __init__(self): - self.embed_calls: List[str] = [] - - def embed_single(self, query): - self.embed_calls.append(query) - return [0.1, 0.2, 0.3] - - class FakeAnnIndex: - def __init__(self): - self.load_calls = 0 - self.search_calls = 0 - - def load(self): - self.load_calls += 1 - return True - - def count(self): - return 3 - - def search(self, _query_vec, top_k): - self.search_calls += 1 - assert top_k == 10 - return [7], [0.2] - - fake_embedder = FakeEmbedder() - fake_ann_index = FakeAnnIndex() - - with patch("codexlens.semantic.vector_store.VectorStore", FakeVectorStore), patch( - "codexlens.semantic.factory.get_embedder", - return_value=fake_embedder, - ) as mock_get_embedder, patch( - "codexlens.semantic.ann_index.ANNIndex.create_central", - return_value=fake_ann_index, - ) as mock_create_central, patch.object( - engine, - "_fetch_chunks_by_ids_centralized", - return_value=[SearchResult(path="src/app.py", score=0.8, excerpt="hit")], - ) as mock_fetch: - first = engine._search_vector_centralized( - temp_paths / "child-a" / "_index.db", - hnsw_path, - "smart search routing", - limit=5, - ) - second = engine._search_vector_centralized( - temp_paths / "child-b" / "_index.db", - hnsw_path, - "smart search routing", - limit=5, - ) - - assert [result.path for result in first] == ["src/app.py"] - assert [result.path for result in second] == ["src/app.py"] - assert vector_store_opened == [temp_paths / "_index.db"] - assert mock_get_embedder.call_count == 1 - assert mock_create_central.call_count == 1 - assert fake_ann_index.load_calls == 1 - assert fake_embedder.embed_calls == ["smart search routing"] - assert fake_ann_index.search_calls == 2 - assert mock_fetch.call_count == 2 - - def test_search_vector_centralized_respects_embedding_use_gpu( - self, - temp_paths, - mock_config, - ): - engine = HybridSearchEngine(config=mock_config) - hnsw_path = temp_paths / "_vectors.hnsw" - hnsw_path.write_bytes(b"hnsw") - - class FakeVectorStore: - def __init__(self, _path): - pass - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc, tb): - return False - - def get_model_config(self): - return { - "backend": "fastembed", - "model_name": "BAAI/bge-small-en-v1.5", - "model_profile": "code", - "embedding_dim": 384, - } - - class FakeEmbedder: - embedding_dim = 384 - - def embed_single(self, _query): - return [0.1, 0.2] - - class FakeAnnIndex: - def load(self): - return True - - def count(self): - return 1 - - def search(self, _query_vec, top_k): - assert top_k == 6 - return [9], [0.1] - - with patch("codexlens.semantic.vector_store.VectorStore", FakeVectorStore), patch( - "codexlens.semantic.factory.get_embedder", - return_value=FakeEmbedder(), - ) as mock_get_embedder, patch( - "codexlens.semantic.ann_index.ANNIndex.create_central", - return_value=FakeAnnIndex(), - ), patch.object( - engine, - "_fetch_chunks_by_ids_centralized", - return_value=[SearchResult(path="src/app.py", score=0.9, excerpt="hit")], - ): - results = engine._search_vector_centralized( - temp_paths / "_index.db", - hnsw_path, - "semantic query", - limit=3, - ) - - assert len(results) == 1 - assert mock_get_embedder.call_count == 1 - assert mock_get_embedder.call_args.kwargs == { - "backend": "fastembed", - "profile": "code", - "use_gpu": False, - } - - -# ============================================================================= -# Tests: _search_lsp_graph -# ============================================================================= - - -class TestSearchLspGraph: - """Tests for _search_lsp_graph() LSP graph expansion.""" - - def test_search_lsp_graph(self, temp_paths, mock_config): - """LSP graph search should use seed results for expansion.""" - engine = HybridSearchEngine(config=mock_config) - index_path = temp_paths / "_index.db" - - # When HAS_LSP is False, _search_lsp_graph returns [] - with patch("codexlens.search.hybrid_search.HAS_LSP", False): - results = engine._search_lsp_graph( - index_path, "auth function", limit=5, - ) - assert results == [] - - def test_lsp_fallback_vector_to_fts(self, temp_paths, mock_config): - """When vector seeds fail, should fall back to FTS seeds.""" - engine = HybridSearchEngine(config=mock_config) - index_path = temp_paths / "_index.db" - - with patch("codexlens.search.hybrid_search.HAS_LSP", True): - # Mock _search_vector to return empty (no seeds from vector) - with patch.object(engine, "_search_vector", return_value=[]): - # Mock _search_exact to return seeds - with patch.object(engine, "_search_exact") as mock_exact: - mock_exact.return_value = [ - SearchResult( - path="auth.py", score=10.0, - excerpt="def auth():", symbol_name="auth", - start_line=1, end_line=5, - ), - ] - - # Mock the LSP bridge (will fail on import or async) - # The function should attempt FTS fallback before LSP expansion - try: - results = engine._search_lsp_graph( - index_path, "auth", limit=5, - ) - except Exception: - pass # LSP deps may not be available, but FTS fallback was attempted - - # Verify FTS was called as fallback - mock_exact.assert_called_once() diff --git a/codex-lens/tests/test_incremental_indexer.py b/codex-lens/tests/test_incremental_indexer.py deleted file mode 100644 index 34515487..00000000 --- a/codex-lens/tests/test_incremental_indexer.py +++ /dev/null @@ -1,125 +0,0 @@ -"""Incremental Indexer File Event Processing Tests. - -This module tests the file event processing in the incremental indexer, -covering all file system event types (CREATED, MODIFIED, DELETED, MOVED). - -Test Coverage: -- CREATED events: New files being indexed -- MODIFIED events: Changed files being re-indexed -- DELETED events: Removed files being handled -- MOVED events: File renames being tracked -- Batch processing of multiple events -""" - -import pytest -from pathlib import Path -from unittest.mock import Mock, patch, MagicMock -import tempfile -import shutil - - -class TestCreatedEvents: - """Test handling of CREATED file events.""" - - def test_new_file_indexed(self): - """Test that newly created files are properly indexed.""" - pytest.skip("Requires incremental indexer fixture") - - def test_created_in_subdirectory(self): - """Test that files created in subdirectories are indexed.""" - pytest.skip("Requires incremental indexer fixture") - - def test_batch_created_events(self): - """Test handling multiple files created simultaneously.""" - pytest.skip("Requires incremental indexer fixture") - - -class TestModifiedEvents: - """Test handling of MODIFIED file events.""" - - def test_file_content_updated(self): - """Test that file content changes trigger re-indexing.""" - pytest.skip("Requires incremental indexer fixture") - - def test_metadata_only_change(self): - """Test handling of metadata-only changes (permissions, etc).""" - pytest.skip("Requires incremental indexer fixture") - - def test_rapid_modifications(self): - """Test handling of rapid successive modifications to same file.""" - pytest.skip("Requires incremental indexer fixture") - - -class TestDeletedEvents: - """Test handling of DELETED file events.""" - - def test_file_removed_from_index(self): - """Test that deleted files are removed from the index.""" - pytest.skip("Requires incremental indexer fixture") - - def test_directory_deleted(self): - """Test handling of directory deletion events.""" - pytest.skip("Requires incremental indexer fixture") - - def test_delete_non_indexed_file(self): - """Test handling deletion of files that were never indexed.""" - pytest.skip("Requires incremental indexer fixture") - - -class TestMovedEvents: - """Test handling of MOVED/RENAMED file events.""" - - def test_file_renamed(self): - """Test that renamed files are tracked in the index.""" - pytest.skip("Requires incremental indexer fixture") - - def test_file_moved_to_subdirectory(self): - """Test that files moved to subdirectories are tracked.""" - pytest.skip("Requires incremental indexer fixture") - - def test_file_moved_out_of_watch_root(self): - """Test handling of files moved outside the watch directory.""" - pytest.skip("Requires incremental indexer fixture") - - def test_directory_renamed(self): - """Test handling of directory rename events.""" - pytest.skip("Requires incremental indexer fixture") - - -class TestEventBatching: - """Test batching and deduplication of file events.""" - - def test_duplicate_events_deduplicated(self): - """Test that duplicate events for the same file are deduplicated.""" - pytest.skip("Requires incremental indexer fixture") - - def test_event_ordering_preserved(self): - """Test that events are processed in the correct order.""" - pytest.skip("Requires incremental indexer fixture") - - def test_mixed_event_types_batch(self): - """Test handling a batch with mixed event types.""" - pytest.skip("Requires incremental indexer fixture") - - -class TestErrorHandling: - """Test error handling in file event processing.""" - - def test_unreadable_file_skipped(self): - """Test that unreadable files are handled gracefully.""" - pytest.skip("Requires incremental indexer fixture") - - def test_corrupted_event_continues(self): - """Test that processing continues after a corrupted event.""" - pytest.skip("Requires incremental indexer fixture") - - def test_indexer_error_recovery(self): - """Test recovery from indexer errors during event processing.""" - pytest.skip("Requires incremental indexer fixture") - - -# TODO: Implement actual tests using pytest fixtures and the incremental indexer -# The test infrastructure needs: -# - IncrementalIndexer fixture with mock filesystem watcher -# - Temporary directory fixtures for test files -# - Mock event queue for controlled event injection diff --git a/codex-lens/tests/test_incremental_indexing.py b/codex-lens/tests/test_incremental_indexing.py deleted file mode 100644 index dceffb76..00000000 --- a/codex-lens/tests/test_incremental_indexing.py +++ /dev/null @@ -1,512 +0,0 @@ -"""Tests for incremental indexing with mtime tracking (P2). - -Tests mtime-based skip logic, deleted file cleanup, and incremental update workflows. -""" - -import os -import sqlite3 -import tempfile -import time -from datetime import datetime, timedelta -from pathlib import Path - -import pytest - -from codexlens.storage.dir_index import DirIndexStore - -# Check if pytest-benchmark is available -try: - import pytest_benchmark - BENCHMARK_AVAILABLE = True -except ImportError: - BENCHMARK_AVAILABLE = False - - -class TestMtimeTracking: - """Tests for mtime-based file change detection.""" - - @pytest.fixture - def temp_db(self): - """Create temporary database.""" - with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: - db_path = Path(f.name) - yield db_path - if db_path.exists(): - db_path.unlink() - - @pytest.fixture - def temp_dir(self): - """Create temporary directory with test files.""" - with tempfile.TemporaryDirectory() as tmpdir: - temp_path = Path(tmpdir) - - # Create test files - (temp_path / "file1.py").write_text("def function1(): pass") - (temp_path / "file2.py").write_text("def function2(): pass") - (temp_path / "file3.js").write_text("function test() {}") - - yield temp_path - - @pytest.fixture - def index_store(self, temp_db): - """Create DirIndexStore instance.""" - store = DirIndexStore(temp_db) - store.initialize() - yield store - store.close() - - def test_files_table_has_mtime_column(self, index_store): - """Test files table includes mtime column for tracking.""" - with index_store._get_connection() as conn: - cursor = conn.execute("PRAGMA table_info(files)") - columns = {row[1]: row[2] for row in cursor.fetchall()} - assert "mtime" in columns or "indexed_at" in columns, \ - "Should have mtime or indexed_at for change detection" - - def test_needs_reindex_new_file(self, index_store, temp_dir): - """Test needs_reindex returns True for new files.""" - file_path = temp_dir / "file1.py" - file_mtime = file_path.stat().st_mtime - - # New file should need indexing - needs_update = self._check_needs_reindex(index_store, str(file_path), file_mtime) - assert needs_update is True, "New file should need indexing" - - def test_needs_reindex_unchanged_file(self, index_store, temp_dir): - """Test needs_reindex returns False for unchanged files.""" - file_path = temp_dir / "file1.py" - file_mtime = file_path.stat().st_mtime - content = file_path.read_text() - - # Index the file - with index_store._get_connection() as conn: - name = file_path.name - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - (name, str(file_path), content, "python", file_mtime) - ) - conn.commit() - - # Unchanged file should not need reindexing - needs_update = self._check_needs_reindex(index_store, str(file_path), file_mtime) - assert needs_update is False, "Unchanged file should not need reindexing" - - def test_needs_reindex_modified_file(self, index_store, temp_dir): - """Test needs_reindex returns True for modified files.""" - file_path = temp_dir / "file1.py" - original_mtime = file_path.stat().st_mtime - content = file_path.read_text() - - # Index the file - with index_store._get_connection() as conn: - name = file_path.name - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - (name, str(file_path), content, "python", original_mtime) - ) - conn.commit() - - # Modify the file (update mtime) - time.sleep(0.1) # Ensure mtime changes - file_path.write_text("def modified_function(): pass") - new_mtime = file_path.stat().st_mtime - - # Modified file should need reindexing - needs_update = self._check_needs_reindex(index_store, str(file_path), new_mtime) - assert needs_update is True, "Modified file should need reindexing" - assert new_mtime > original_mtime, "Mtime should have increased" - - def _check_needs_reindex(self, index_store, file_path: str, file_mtime: float) -> bool: - """Helper to check if file needs reindexing.""" - with index_store._get_connection() as conn: - cursor = conn.execute( - "SELECT mtime FROM files WHERE full_path = ?", - (file_path,) - ) - result = cursor.fetchone() - - if result is None: - return True # New file - - stored_mtime = result[0] - return file_mtime > stored_mtime - - -class TestIncrementalUpdate: - """Tests for incremental update workflows.""" - - @pytest.fixture - def temp_db(self): - """Create temporary database.""" - with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: - db_path = Path(f.name) - yield db_path - if db_path.exists(): - db_path.unlink() - - @pytest.fixture - def temp_dir(self): - """Create temporary directory with test files.""" - with tempfile.TemporaryDirectory() as tmpdir: - temp_path = Path(tmpdir) - - # Create initial files - for i in range(10): - (temp_path / f"file{i}.py").write_text(f"def function{i}(): pass") - - yield temp_path - - @pytest.fixture - def index_store(self, temp_db): - """Create DirIndexStore instance.""" - store = DirIndexStore(temp_db) - store.initialize() - yield store - store.close() - - def test_incremental_skip_rate(self, index_store, temp_dir): - """Test incremental indexing achieves ≥90% skip rate on unchanged files.""" - # First indexing pass - index all files - files_indexed_first = self._index_directory(index_store, temp_dir) - assert files_indexed_first == 10, "Should index all 10 files initially" - - # Second pass without modifications - should skip most files - files_indexed_second = self._index_directory(index_store, temp_dir) - skip_rate = 1.0 - (files_indexed_second / files_indexed_first) - assert skip_rate >= 0.9, f"Skip rate should be ≥90%, got {skip_rate:.1%}" - - def test_incremental_indexes_modified_files(self, index_store, temp_dir): - """Test incremental indexing detects and updates modified files.""" - # Initial indexing - self._index_directory(index_store, temp_dir) - - # Modify 2 files - modified_files = ["file3.py", "file7.py"] - time.sleep(0.1) - for fname in modified_files: - (temp_dir / fname).write_text("def modified(): pass") - - # Re-index - files_indexed = self._index_directory(index_store, temp_dir) - - # Should re-index only modified files - assert files_indexed == len(modified_files), \ - f"Should re-index {len(modified_files)} modified files, got {files_indexed}" - - def test_incremental_indexes_new_files(self, index_store, temp_dir): - """Test incremental indexing detects and indexes new files.""" - # Initial indexing - self._index_directory(index_store, temp_dir) - - # Add new files - new_files = ["new1.py", "new2.py", "new3.py"] - time.sleep(0.1) - for fname in new_files: - (temp_dir / fname).write_text("def new_function(): pass") - - # Re-index - files_indexed = self._index_directory(index_store, temp_dir) - - # Should index new files - assert files_indexed == len(new_files), \ - f"Should index {len(new_files)} new files, got {files_indexed}" - - def _index_directory(self, index_store, directory: Path) -> int: - """Helper to index directory and return count of files indexed.""" - indexed_count = 0 - - for file_path in directory.glob("*.py"): - file_mtime = file_path.stat().st_mtime - content = file_path.read_text() - - # Check if needs indexing - with index_store._get_connection() as conn: - cursor = conn.execute( - "SELECT mtime FROM files WHERE full_path = ?", - (str(file_path),) - ) - result = cursor.fetchone() - - needs_index = (result is None) or (file_mtime > result[0]) - - if needs_index: - # Insert or update - name = file_path.name - conn.execute( - """INSERT OR REPLACE INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - (name, str(file_path), content, "python", file_mtime) - ) - conn.commit() - indexed_count += 1 - - return indexed_count - - -class TestDeletedFileCleanup: - """Tests for cleanup of deleted files from index.""" - - @pytest.fixture - def temp_db(self): - """Create temporary database.""" - with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: - db_path = Path(f.name) - yield db_path - if db_path.exists(): - db_path.unlink() - - @pytest.fixture - def index_store(self, temp_db): - """Create DirIndexStore instance.""" - store = DirIndexStore(temp_db) - store.initialize() - yield store - store.close() - - def test_cleanup_deleted_files(self, index_store): - """Test cleanup removes deleted file entries.""" - # Index files that no longer exist - deleted_files = [ - "/deleted/file1.py", - "/deleted/file2.js", - "/deleted/file3.ts" - ] - - with index_store._get_connection() as conn: - for path in deleted_files: - name = path.split('/')[-1] - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - (name, path, "content", "python", time.time()) - ) - conn.commit() - - # Verify files are in index - cursor = conn.execute("SELECT COUNT(*) FROM files") - assert cursor.fetchone()[0] == len(deleted_files) - - # Run cleanup (manually since files don't exist) - deleted_count = self._cleanup_nonexistent_files(index_store, deleted_files) - - assert deleted_count == len(deleted_files), \ - f"Should remove {len(deleted_files)} deleted files" - - # Verify cleanup worked - with index_store._get_connection() as conn: - cursor = conn.execute("SELECT COUNT(*) FROM files WHERE full_path IN (?, ?, ?)", deleted_files) - assert cursor.fetchone()[0] == 0, "Deleted files should be removed from index" - - def test_cleanup_preserves_existing_files(self, index_store): - """Test cleanup preserves entries for existing files.""" - # Create temporary files - with tempfile.TemporaryDirectory() as tmpdir: - temp_path = Path(tmpdir) - existing_files = [ - temp_path / "exists1.py", - temp_path / "exists2.py" - ] - - for fpath in existing_files: - fpath.write_text("content") - - # Index existing and deleted files - all_files = [str(f) for f in existing_files] + ["/deleted/file.py"] - - with index_store._get_connection() as conn: - for path in all_files: - name = path.split('/')[-1] - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - (name, path, "content", "python", time.time()) - ) - conn.commit() - - # Run cleanup - self._cleanup_nonexistent_files(index_store, ["/deleted/file.py"]) - - # Verify existing files preserved - with index_store._get_connection() as conn: - cursor = conn.execute( - "SELECT COUNT(*) FROM files WHERE full_path IN (?, ?)", - [str(f) for f in existing_files] - ) - assert cursor.fetchone()[0] == len(existing_files), \ - "Existing files should be preserved" - - def _cleanup_nonexistent_files(self, index_store, paths_to_check: list) -> int: - """Helper to cleanup nonexistent files.""" - deleted_count = 0 - - with index_store._get_connection() as conn: - for path in paths_to_check: - if not Path(path).exists(): - conn.execute("DELETE FROM files WHERE full_path = ?", (path,)) - deleted_count += 1 - conn.commit() - - return deleted_count - - -class TestMtimeEdgeCases: - """Tests for edge cases in mtime handling.""" - - @pytest.fixture - def temp_db(self): - """Create temporary database.""" - with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: - db_path = Path(f.name) - yield db_path - if db_path.exists(): - db_path.unlink() - - @pytest.fixture - def index_store(self, temp_db): - """Create DirIndexStore instance.""" - store = DirIndexStore(temp_db) - store.initialize() - yield store - store.close() - - def test_mtime_precision(self, index_store): - """Test mtime comparison handles floating-point precision.""" - file_path = "/test/file.py" - mtime1 = time.time() - mtime2 = mtime1 + 1e-6 # Microsecond difference - - with index_store._get_connection() as conn: - name = file_path.split('/')[-1] - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - (name, file_path, "content", "python", mtime1) - ) - conn.commit() - - # Check if mtime2 is considered newer - cursor = conn.execute("SELECT mtime FROM files WHERE full_path = ?", (file_path,)) - stored_mtime = cursor.fetchone()[0] - - # Should handle precision correctly - assert isinstance(stored_mtime, (int, float)) - - def test_mtime_null_handling(self, index_store): - """Test handling of NULL mtime values (legacy data).""" - file_path = "/test/legacy.py" - - with index_store._get_connection() as conn: - # Insert file without mtime (legacy) - use NULL - name = file_path.split('/')[-1] - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, NULL)""", - (name, file_path, "content", "python") - ) - conn.commit() - - # Query should handle NULL mtime gracefully - cursor = conn.execute("SELECT mtime FROM files WHERE full_path = ?", (file_path,)) - result = cursor.fetchone() - # mtime should be NULL or have default value - assert result is not None - - def test_future_mtime_handling(self, index_store): - """Test handling of files with future mtime (clock skew).""" - file_path = "/test/future.py" - future_mtime = time.time() + 86400 # 1 day in future - - with index_store._get_connection() as conn: - name = file_path.split('/')[-1] - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - (name, file_path, "content", "python", future_mtime) - ) - conn.commit() - - # Should store future mtime without errors - cursor = conn.execute("SELECT mtime FROM files WHERE full_path = ?", (file_path,)) - stored_mtime = cursor.fetchone()[0] - assert stored_mtime == future_mtime - - -@pytest.mark.benchmark -class TestIncrementalPerformance: - """Performance benchmarks for incremental indexing.""" - - @pytest.fixture - def large_indexed_db(self): - """Create database with many indexed files.""" - with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: - db_path = Path(f.name) - - store = DirIndexStore(db_path) - store.initialize() - - # Index 1000 files - with store._get_connection() as conn: - current_time = time.time() - for i in range(1000): - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - (f"file{i}.py", f"/test/file{i}.py", f"def func{i}(): pass", "python", current_time) - ) - conn.commit() - - yield db_path - store.close() - - if db_path.exists(): - db_path.unlink() - - def test_skip_rate_benchmark(self, large_indexed_db): - """Benchmark skip rate on large dataset.""" - store = DirIndexStore(large_indexed_db) - store.initialize() - - try: - # Simulate incremental pass - skipped = 0 - total = 1000 - current_time = time.time() - - with store._get_connection() as conn: - for i in range(total): - cursor = conn.execute( - "SELECT mtime FROM files WHERE full_path = ?", - (f"/test/file{i}.py",) - ) - result = cursor.fetchone() - - if result and current_time <= result[0] + 1.0: - skipped += 1 - - skip_rate = skipped / total - assert skip_rate >= 0.9, f"Skip rate should be ≥90%, got {skip_rate:.1%}" - finally: - store.close() - - @pytest.mark.skipif(not BENCHMARK_AVAILABLE, reason="pytest-benchmark not installed") - def test_cleanup_performance(self, large_indexed_db, benchmark): - """Benchmark cleanup of deleted files on large dataset.""" - store = DirIndexStore(large_indexed_db) - store.initialize() - - try: - def cleanup_batch(): - with store._get_connection() as conn: - # Delete 100 files - paths = [f"/test/file{i}.py" for i in range(100)] - placeholders = ",".join("?" * len(paths)) - conn.execute(f"DELETE FROM files WHERE full_path IN ({placeholders})", paths) - conn.commit() - - # Should complete in reasonable time - result = benchmark(cleanup_batch) - assert result < 1.0 # Should take <1 second for 100 deletions - finally: - store.close() diff --git a/codex-lens/tests/test_index_status_cli_contract.py b/codex-lens/tests/test_index_status_cli_contract.py deleted file mode 100644 index cac0549c..00000000 --- a/codex-lens/tests/test_index_status_cli_contract.py +++ /dev/null @@ -1,674 +0,0 @@ -import json - -from typer.testing import CliRunner - -import codexlens.cli.commands as commands -from codexlens.cli.commands import app -import codexlens.cli.embedding_manager as embedding_manager -from codexlens.config import Config -from codexlens.entities import SearchResult -from codexlens.search.chain_search import ChainSearchResult, SearchStats - - -def test_index_status_json_preserves_legacy_embeddings_contract( - monkeypatch, - tmp_path, -) -> None: - workspace = tmp_path / "workspace" - workspace.mkdir() - (workspace / "_index.db").touch() - - legacy_summary = { - "total_indexes": 3, - "indexes_with_embeddings": 1, - "total_chunks": 42, - "indexes": [ - { - "project": "child", - "path": str(workspace / "child" / "_index.db"), - "has_embeddings": True, - "total_chunks": 42, - "total_files": 1, - "coverage_percent": 100.0, - } - ], - } - root_status = { - "total_indexes": 3, - "total_files": 2, - "files_with_embeddings": 0, - "files_without_embeddings": 2, - "total_chunks": 0, - "coverage_percent": 0.0, - "indexes_with_embeddings": 1, - "indexes_without_embeddings": 2, - "model_info": None, - "root": { - "index_path": str(workspace / "_index.db"), - "exists": False, - "total_files": 2, - "files_with_embeddings": 0, - "files_without_embeddings": 2, - "total_chunks": 0, - "coverage_percent": 0.0, - "has_embeddings": False, - "storage_mode": "none", - }, - "subtree": { - "total_indexes": 3, - "total_files": 3, - "files_with_embeddings": 1, - "files_without_embeddings": 2, - "total_chunks": 42, - "coverage_percent": 33.3, - "indexes_with_embeddings": 1, - "indexes_without_embeddings": 2, - }, - "centralized": { - "dense_index_exists": False, - "binary_index_exists": False, - "dense_ready": False, - "binary_ready": False, - "usable": False, - "chunk_metadata_rows": 0, - "binary_vector_rows": 0, - "files_with_embeddings": 0, - }, - } - - monkeypatch.setattr( - embedding_manager, - "get_embeddings_status", - lambda _index_root: {"success": True, "result": root_status}, - ) - monkeypatch.setattr( - embedding_manager, - "get_embedding_stats_summary", - lambda _index_root: {"success": True, "result": legacy_summary}, - ) - monkeypatch.setattr( - commands, - "RegistryStore", - type( - "FakeRegistryStore", - (), - { - "initialize": lambda self: None, - "close": lambda self: None, - }, - ), - ) - monkeypatch.setattr( - commands, - "PathMapper", - type( - "FakePathMapper", - (), - { - "source_to_index_db": lambda self, _target_path: workspace / "_index.db", - }, - ), - ) - - runner = CliRunner() - result = runner.invoke(app, ["index", "status", str(workspace), "--json"]) - - assert result.exit_code == 0, result.output - payload = json.loads(result.stdout) - body = payload["result"] - assert body["embeddings"] == legacy_summary - assert body["embeddings_error"] is None - assert body["embeddings_status"] == root_status - assert body["embeddings_status_error"] is None - assert body["embeddings_summary"] == legacy_summary - - -def test_search_json_preserves_dense_rerank_method_label( - monkeypatch, - tmp_path, -) -> None: - workspace = tmp_path / "workspace" - workspace.mkdir() - - search_result = ChainSearchResult( - query="greet function", - results=[ - SearchResult( - path=str(workspace / "src" / "app.py"), - score=0.97, - excerpt="def greet(name):", - content="def greet(name):\n return f'hello {name}'\n", - ) - ], - symbols=[], - stats=SearchStats(dirs_searched=2, files_matched=1, time_ms=12.5), - ) - captured: dict[str, object] = {} - - monkeypatch.setattr(commands.Config, "load", staticmethod(lambda: Config(data_dir=tmp_path / "data"))) - monkeypatch.setattr( - commands, - "RegistryStore", - type( - "FakeRegistryStore", - (), - { - "initialize": lambda self: None, - "close": lambda self: None, - }, - ), - ) - monkeypatch.setattr( - commands, - "PathMapper", - type( - "FakePathMapper", - (), - {}, - ), - ) - - class FakeChainSearchEngine: - def __init__(self, registry, mapper, config=None): - captured["registry"] = registry - captured["mapper"] = mapper - captured["config"] = config - - def search(self, *_args, **_kwargs): - raise AssertionError("dense_rerank should dispatch via cascade_search") - - def cascade_search(self, query, source_path, k=10, options=None, strategy=None): - captured["query"] = query - captured["source_path"] = source_path - captured["limit"] = k - captured["options"] = options - captured["strategy"] = strategy - return search_result - - monkeypatch.setattr(commands, "ChainSearchEngine", FakeChainSearchEngine) - - runner = CliRunner() - result = runner.invoke( - app, - ["search", "greet function", "--path", str(workspace), "--method", "dense_rerank", "--json"], - ) - - assert result.exit_code == 0, result.output - payload = json.loads(result.stdout) - body = payload["result"] - assert body["method"] == "dense_rerank" - assert body["count"] == 1 - assert body["results"][0]["path"] == str(workspace / "src" / "app.py") - assert captured["strategy"] == "dense_rerank" - assert captured["limit"] == 20 - - -def test_search_json_auto_routes_keyword_queries_to_fts( - monkeypatch, - tmp_path, -) -> None: - workspace = tmp_path / "workspace" - workspace.mkdir() - - search_result = ChainSearchResult( - query="windowsHide", - results=[ - SearchResult( - path=str(workspace / "src" / "spawn.ts"), - score=0.91, - excerpt="windowsHide: true", - content="spawn('node', [], { windowsHide: true })", - ) - ], - symbols=[], - stats=SearchStats(dirs_searched=2, files_matched=1, time_ms=8.0), - ) - captured: dict[str, object] = {} - - monkeypatch.setattr(commands.Config, "load", staticmethod(lambda: Config(data_dir=tmp_path / "data"))) - monkeypatch.setattr( - commands, - "RegistryStore", - type("FakeRegistryStore", (), {"initialize": lambda self: None, "close": lambda self: None}), - ) - monkeypatch.setattr(commands, "PathMapper", type("FakePathMapper", (), {})) - - class FakeChainSearchEngine: - def __init__(self, registry, mapper, config=None): - captured["config"] = config - - def search(self, query, source_path, options=None): - captured["query"] = query - captured["source_path"] = source_path - captured["options"] = options - return search_result - - def cascade_search(self, *_args, **_kwargs): - raise AssertionError("auto keyword queries should not dispatch to cascade_search") - - monkeypatch.setattr(commands, "ChainSearchEngine", FakeChainSearchEngine) - - runner = CliRunner() - result = runner.invoke( - app, - ["search", "windowsHide", "--path", str(workspace), "--json"], - ) - - assert result.exit_code == 0, result.output - body = json.loads(result.stdout)["result"] - assert body["method"] == "fts" - assert captured["options"].enable_vector is False - assert captured["options"].hybrid_mode is False - - -def test_search_json_auto_routes_mixed_queries_to_hybrid( - monkeypatch, - tmp_path, -) -> None: - workspace = tmp_path / "workspace" - workspace.mkdir() - - search_result = ChainSearchResult( - query="how does my_function work", - results=[ - SearchResult( - path=str(workspace / "src" / "app.py"), - score=0.81, - excerpt="def my_function():", - content="def my_function():\n return 1\n", - ) - ], - symbols=[], - stats=SearchStats(dirs_searched=2, files_matched=1, time_ms=10.0), - ) - captured: dict[str, object] = {} - - monkeypatch.setattr(commands.Config, "load", staticmethod(lambda: Config(data_dir=tmp_path / "data"))) - monkeypatch.setattr( - commands, - "RegistryStore", - type("FakeRegistryStore", (), {"initialize": lambda self: None, "close": lambda self: None}), - ) - monkeypatch.setattr(commands, "PathMapper", type("FakePathMapper", (), {})) - - class FakeChainSearchEngine: - def __init__(self, registry, mapper, config=None): - captured["config"] = config - - def search(self, query, source_path, options=None): - captured["query"] = query - captured["source_path"] = source_path - captured["options"] = options - return search_result - - def cascade_search(self, *_args, **_kwargs): - raise AssertionError("mixed auto queries should not dispatch to cascade_search") - - monkeypatch.setattr(commands, "ChainSearchEngine", FakeChainSearchEngine) - - runner = CliRunner() - result = runner.invoke( - app, - ["search", "how does my_function work", "--path", str(workspace), "--json"], - ) - - assert result.exit_code == 0, result.output - body = json.loads(result.stdout)["result"] - assert body["method"] == "hybrid" - assert captured["options"].enable_vector is True - assert captured["options"].hybrid_mode is True - assert captured["options"].enable_cascade is False - - -def test_search_json_auto_routes_generated_artifact_queries_to_fts( - monkeypatch, - tmp_path, -) -> None: - workspace = tmp_path / "workspace" - workspace.mkdir() - - search_result = ChainSearchResult( - query="dist bundle output", - results=[ - SearchResult( - path=str(workspace / "dist" / "bundle.js"), - score=0.77, - excerpt="bundle output", - content="console.log('bundle')", - ) - ], - symbols=[], - stats=SearchStats(dirs_searched=2, files_matched=1, time_ms=9.0), - ) - captured: dict[str, object] = {} - - monkeypatch.setattr(commands.Config, "load", staticmethod(lambda: Config(data_dir=tmp_path / "data"))) - monkeypatch.setattr( - commands, - "RegistryStore", - type("FakeRegistryStore", (), {"initialize": lambda self: None, "close": lambda self: None}), - ) - monkeypatch.setattr(commands, "PathMapper", type("FakePathMapper", (), {})) - - class FakeChainSearchEngine: - def __init__(self, registry, mapper, config=None): - captured["config"] = config - - def search(self, query, source_path, options=None): - captured["query"] = query - captured["source_path"] = source_path - captured["options"] = options - return search_result - - def cascade_search(self, *_args, **_kwargs): - raise AssertionError("generated artifact auto queries should not dispatch to cascade_search") - - monkeypatch.setattr(commands, "ChainSearchEngine", FakeChainSearchEngine) - - runner = CliRunner() - result = runner.invoke( - app, - ["search", "dist bundle output", "--path", str(workspace), "--json"], - ) - - assert result.exit_code == 0, result.output - body = json.loads(result.stdout)["result"] - assert body["method"] == "fts" - assert captured["options"].enable_vector is False - assert captured["options"].hybrid_mode is False - - -def test_auto_select_search_method_prefers_fts_for_lexical_config_queries() -> None: - assert commands._auto_select_search_method("embedding backend fastembed local litellm api config") == "fts" - assert commands._auto_select_search_method("get_reranker factory onnx backend selection") == "fts" - assert commands._auto_select_search_method("how to authenticate users safely?") == "dense_rerank" - - -def test_search_json_fts_zero_results_uses_filesystem_fallback( - monkeypatch, - tmp_path, -) -> None: - workspace = tmp_path / "workspace" - workspace.mkdir() - - indexed_result = ChainSearchResult( - query="find_descendant_project_roots", - results=[], - symbols=[], - stats=SearchStats(dirs_searched=3, files_matched=0, time_ms=7.5), - ) - fallback_result = SearchResult( - path=str(workspace / "src" / "registry.py"), - score=1.0, - excerpt="def find_descendant_project_roots(...):", - content=None, - metadata={ - "filesystem_fallback": True, - "backend": "ripgrep-fallback", - "stale_index_suspected": True, - }, - start_line=12, - end_line=12, - ) - captured: dict[str, object] = {"fallback_calls": 0} - - monkeypatch.setattr(commands.Config, "load", staticmethod(lambda: Config(data_dir=tmp_path / "data"))) - monkeypatch.setattr( - commands, - "RegistryStore", - type("FakeRegistryStore", (), {"initialize": lambda self: None, "close": lambda self: None}), - ) - monkeypatch.setattr(commands, "PathMapper", type("FakePathMapper", (), {})) - - class FakeChainSearchEngine: - def __init__(self, registry, mapper, config=None): - captured["config"] = config - - def search(self, query, source_path, options=None): - captured["query"] = query - captured["source_path"] = source_path - captured["options"] = options - return indexed_result - - def cascade_search(self, *_args, **_kwargs): - raise AssertionError("fts zero-result queries should not dispatch to cascade_search") - - def fake_fallback(query, source_path, *, limit, config, code_only=False, exclude_extensions=None): - captured["fallback_calls"] = int(captured["fallback_calls"]) + 1 - captured["fallback_query"] = query - captured["fallback_path"] = source_path - captured["fallback_limit"] = limit - captured["fallback_code_only"] = code_only - captured["fallback_exclude_extensions"] = exclude_extensions - return { - "results": [fallback_result], - "time_ms": 2.5, - "fallback": { - "backend": "ripgrep-fallback", - "stale_index_suspected": True, - "reason": "Indexed FTS search returned no results; filesystem fallback used.", - }, - } - - monkeypatch.setattr(commands, "ChainSearchEngine", FakeChainSearchEngine) - monkeypatch.setattr(commands, "_filesystem_fallback_search", fake_fallback) - - runner = CliRunner() - result = runner.invoke( - app, - ["search", "find_descendant_project_roots", "--method", "fts", "--path", str(workspace), "--json"], - ) - - assert result.exit_code == 0, result.output - body = json.loads(result.stdout)["result"] - assert body["method"] == "fts" - assert body["count"] == 1 - assert body["results"][0]["path"] == str(workspace / "src" / "registry.py") - assert body["results"][0]["excerpt"] == "def find_descendant_project_roots(...):" - assert body["stats"]["files_matched"] == 1 - assert body["stats"]["time_ms"] == 10.0 - assert body["fallback"] == { - "backend": "ripgrep-fallback", - "stale_index_suspected": True, - "reason": "Indexed FTS search returned no results; filesystem fallback used.", - } - assert captured["fallback_calls"] == 1 - assert captured["fallback_query"] == "find_descendant_project_roots" - assert captured["fallback_path"] == workspace - assert captured["fallback_limit"] == 20 - assert captured["options"].enable_vector is False - assert captured["options"].hybrid_mode is False - - -def test_search_json_hybrid_zero_results_does_not_use_filesystem_fallback( - monkeypatch, - tmp_path, -) -> None: - workspace = tmp_path / "workspace" - workspace.mkdir() - - indexed_result = ChainSearchResult( - query="how does my_function work", - results=[], - symbols=[], - stats=SearchStats(dirs_searched=4, files_matched=0, time_ms=11.0), - ) - captured: dict[str, object] = {"fallback_calls": 0} - - monkeypatch.setattr(commands.Config, "load", staticmethod(lambda: Config(data_dir=tmp_path / "data"))) - monkeypatch.setattr( - commands, - "RegistryStore", - type("FakeRegistryStore", (), {"initialize": lambda self: None, "close": lambda self: None}), - ) - monkeypatch.setattr(commands, "PathMapper", type("FakePathMapper", (), {})) - - class FakeChainSearchEngine: - def __init__(self, registry, mapper, config=None): - captured["config"] = config - - def search(self, query, source_path, options=None): - captured["query"] = query - captured["source_path"] = source_path - captured["options"] = options - return indexed_result - - def cascade_search(self, *_args, **_kwargs): - raise AssertionError("hybrid queries should not dispatch to cascade_search") - - def fake_fallback(*_args, **_kwargs): - captured["fallback_calls"] = int(captured["fallback_calls"]) + 1 - return None - - monkeypatch.setattr(commands, "ChainSearchEngine", FakeChainSearchEngine) - monkeypatch.setattr(commands, "_filesystem_fallback_search", fake_fallback) - - runner = CliRunner() - result = runner.invoke( - app, - ["search", "how does my_function work", "--path", str(workspace), "--json"], - ) - - assert result.exit_code == 0, result.output - body = json.loads(result.stdout)["result"] - assert body["method"] == "hybrid" - assert body["count"] == 0 - assert "fallback" not in body - assert body["stats"]["files_matched"] == 0 - assert body["stats"]["time_ms"] == 11.0 - assert captured["fallback_calls"] == 0 - assert captured["options"].enable_vector is True - assert captured["options"].hybrid_mode is True - - -def test_filesystem_fallback_search_prefers_source_definitions_for_keyword_queries( - monkeypatch, - tmp_path, -) -> None: - workspace = tmp_path / "workspace" - workspace.mkdir() - - source_path = workspace / "src" / "registry.py" - test_path = workspace / "tests" / "test_registry.py" - ref_path = workspace / "src" / "chain_search.py" - - match_lines = [ - { - "type": "match", - "data": { - "path": {"text": str(test_path)}, - "lines": {"text": "def test_find_descendant_project_roots_returns_nested_project_roots():\n"}, - "line_number": 12, - }, - }, - { - "type": "match", - "data": { - "path": {"text": str(source_path)}, - "lines": {"text": "def find_descendant_project_roots(self, source_root: Path) -> List[DirMapping]:\n"}, - "line_number": 48, - }, - }, - { - "type": "match", - "data": { - "path": {"text": str(ref_path)}, - "lines": {"text": "descendant_roots = self.registry.find_descendant_project_roots(source_root)\n"}, - "line_number": 91, - }, - }, - ] - - monkeypatch.setattr(commands.shutil, "which", lambda _name: "rg") - monkeypatch.setattr( - commands.subprocess, - "run", - lambda *_args, **_kwargs: type( - "FakeCompletedProcess", - (), - { - "returncode": 0, - "stdout": "\n".join(json.dumps(line) for line in match_lines), - "stderr": "", - }, - )(), - ) - - fallback = commands._filesystem_fallback_search( - "find_descendant_project_roots", - workspace, - limit=5, - config=Config(data_dir=tmp_path / "data"), - ) - - assert fallback is not None - assert fallback["fallback"]["backend"] == "ripgrep-fallback" - assert fallback["results"][0].path == str(source_path) - assert fallback["results"][1].path == str(ref_path) - assert fallback["results"][2].path == str(test_path) - assert fallback["results"][0].score > fallback["results"][1].score > fallback["results"][2].score - - -def test_clean_json_reports_partial_success_when_locked_files_remain( - monkeypatch, - tmp_path, -) -> None: - workspace = tmp_path / "workspace" - project_index = tmp_path / "indexes" / "workspace" - project_index.mkdir(parents=True) - (project_index / "_index.db").write_text("db", encoding="utf-8") - locked_path = project_index / "nested" / "_index.db" - locked_path.parent.mkdir(parents=True) - locked_path.write_text("locked", encoding="utf-8") - - captured: dict[str, object] = {} - - class FakePathMapper: - def __init__(self): - self.index_root = tmp_path / "indexes" - - def source_to_index_dir(self, source_path): - captured["mapped_source"] = source_path - return project_index - - class FakeRegistryStore: - def initialize(self): - captured["registry_initialized"] = True - - def unregister_project(self, source_path): - captured["unregistered_project"] = source_path - return True - - def close(self): - captured["registry_closed"] = True - - def fake_remove_tree(target): - captured["removed_target"] = target - return { - "removed": False, - "partial": True, - "locked_paths": [str(locked_path)], - "remaining_path": str(project_index), - "errors": [], - } - - monkeypatch.setattr(commands, "PathMapper", FakePathMapper) - monkeypatch.setattr(commands, "RegistryStore", FakeRegistryStore) - monkeypatch.setattr(commands, "_remove_tree_best_effort", fake_remove_tree) - - runner = CliRunner() - result = runner.invoke(app, ["clean", str(workspace), "--json"]) - - assert result.exit_code == 0, result.output - payload = json.loads(result.stdout) - body = payload["result"] - assert payload["success"] is True - assert body["cleaned"] == str(workspace.resolve()) - assert body["index_path"] == str(project_index) - assert body["partial"] is True - assert body["locked_paths"] == [str(locked_path)] - assert body["remaining_path"] == str(project_index) - assert captured["registry_initialized"] is True - assert captured["registry_closed"] is True - assert captured["unregistered_project"] == workspace.resolve() - assert captured["removed_target"] == project_index diff --git a/codex-lens/tests/test_index_tree_ignore_dirs.py b/codex-lens/tests/test_index_tree_ignore_dirs.py deleted file mode 100644 index f9c51773..00000000 --- a/codex-lens/tests/test_index_tree_ignore_dirs.py +++ /dev/null @@ -1,295 +0,0 @@ -from __future__ import annotations - -import json -from pathlib import Path -from unittest.mock import MagicMock - -from codexlens.config import Config -from codexlens.storage.dir_index import DirIndexStore -from codexlens.storage.index_tree import DirBuildResult, IndexTreeBuilder -from codexlens.storage.path_mapper import PathMapper -from codexlens.storage.registry import RegistryStore - - -def _relative_dirs(source_root: Path, dirs_by_depth: dict[int, list[Path]]) -> set[str]: - return { - path.relative_to(source_root).as_posix() - for paths in dirs_by_depth.values() - for path in paths - if path != source_root - } - - -def test_collect_dirs_by_depth_skips_common_build_artifact_dirs(tmp_path: Path) -> None: - src_dir = tmp_path / "src" - src_dir.mkdir() - (src_dir / "app.py").write_text("print('ok')\n", encoding="utf-8") - - for artifact_dir in ["dist", "build", "coverage", ".next", "out", ".turbo", ".parcel-cache", "target"]: - target_dir = tmp_path / artifact_dir - target_dir.mkdir(parents=True, exist_ok=True) - (target_dir / "generated.py").write_text("print('artifact')\n", encoding="utf-8") - - builder = IndexTreeBuilder( - registry=MagicMock(), - mapper=MagicMock(), - config=Config(data_dir=tmp_path / "data"), - incremental=False, - ) - - dirs_by_depth = builder._collect_dirs_by_depth(tmp_path) - discovered_dirs = _relative_dirs(tmp_path, dirs_by_depth) - - assert "src" in discovered_dirs - assert "dist" not in discovered_dirs - assert "build" not in discovered_dirs - assert "coverage" not in discovered_dirs - assert ".next" not in discovered_dirs - assert "out" not in discovered_dirs - assert ".turbo" not in discovered_dirs - assert ".parcel-cache" not in discovered_dirs - assert "target" not in discovered_dirs - - -def test_should_index_dir_ignores_transitive_build_only_subtrees(tmp_path: Path) -> None: - package_dir = tmp_path / "package" - dist_dir = package_dir / "dist" - dist_dir.mkdir(parents=True) - (dist_dir / "bundle.py").write_text("print('compiled')\n", encoding="utf-8") - - builder = IndexTreeBuilder( - registry=MagicMock(), - mapper=MagicMock(), - config=Config(data_dir=tmp_path / "data"), - incremental=False, - ) - - assert builder._should_index_dir(package_dir) is False - - -def test_collect_dirs_by_depth_respects_relative_ignore_patterns_from_config(tmp_path: Path) -> None: - src_dir = tmp_path / "frontend" / "src" - src_dir.mkdir(parents=True) - (src_dir / "app.ts").write_text("export const app = 1\n", encoding="utf-8") - - dist_dir = tmp_path / "frontend" / "dist" - dist_dir.mkdir(parents=True) - (dist_dir / "bundle.ts").write_text("export const bundle = 1\n", encoding="utf-8") - - builder = IndexTreeBuilder( - registry=MagicMock(), - mapper=MagicMock(), - config=Config(data_dir=tmp_path / "data", ignore_patterns=["frontend/dist"]), - incremental=False, - ) - - dirs_by_depth = builder._collect_dirs_by_depth(tmp_path) - discovered_dirs = _relative_dirs(tmp_path, dirs_by_depth) - - assert "frontend/src" in discovered_dirs - assert "frontend/dist" not in discovered_dirs - - -def test_iter_source_files_respects_extension_filters_and_relative_patterns(tmp_path: Path) -> None: - frontend_dir = tmp_path / "frontend" - frontend_dir.mkdir() - (frontend_dir / "app.ts").write_text("export const app = 1\n", encoding="utf-8") - (frontend_dir / "bundle.min.js").write_text("export const bundle = 1\n", encoding="utf-8") - (frontend_dir / "skip.ts").write_text("export const skip = 1\n", encoding="utf-8") - - builder = IndexTreeBuilder( - registry=MagicMock(), - mapper=MagicMock(), - config=Config( - data_dir=tmp_path / "data", - extension_filters=["*.min.js", "frontend/skip.ts"], - ), - incremental=False, - ) - - source_files = builder._iter_source_files(frontend_dir, source_root=tmp_path) - - assert [path.name for path in source_files] == ["app.ts"] - assert builder._should_index_dir(frontend_dir, source_root=tmp_path) is True - - -def test_builder_loads_saved_ignore_and_extension_filters_by_default(tmp_path: Path, monkeypatch) -> None: - codexlens_home = tmp_path / "codexlens-home" - codexlens_home.mkdir() - (codexlens_home / "settings.json").write_text( - json.dumps( - { - "ignore_patterns": ["frontend/dist"], - "extension_filters": ["*.min.js"], - } - ), - encoding="utf-8", - ) - monkeypatch.setenv("CODEXLENS_DATA_DIR", str(codexlens_home)) - - frontend_dir = tmp_path / "frontend" - frontend_dir.mkdir() - dist_dir = frontend_dir / "dist" - dist_dir.mkdir() - (frontend_dir / "app.ts").write_text("export const app = 1\n", encoding="utf-8") - (frontend_dir / "bundle.min.js").write_text("export const bundle = 1\n", encoding="utf-8") - (dist_dir / "compiled.ts").write_text("export const compiled = 1\n", encoding="utf-8") - - builder = IndexTreeBuilder( - registry=MagicMock(), - mapper=MagicMock(), - config=None, - incremental=False, - ) - - source_files = builder._iter_source_files(frontend_dir, source_root=tmp_path) - dirs_by_depth = builder._collect_dirs_by_depth(tmp_path) - discovered_dirs = _relative_dirs(tmp_path, dirs_by_depth) - - assert [path.name for path in source_files] == ["app.ts"] - assert "frontend/dist" not in discovered_dirs - - -def test_prune_stale_project_dirs_removes_ignored_artifact_mappings(tmp_path: Path) -> None: - workspace = tmp_path / "workspace" - src_dir = workspace / "src" - dist_dir = workspace / "dist" - src_dir.mkdir(parents=True) - dist_dir.mkdir(parents=True) - (src_dir / "app.py").write_text("print('ok')\n", encoding="utf-8") - (dist_dir / "bundle.py").write_text("print('artifact')\n", encoding="utf-8") - - mapper = PathMapper(index_root=tmp_path / "indexes") - registry = RegistryStore(db_path=tmp_path / "registry.db") - registry.initialize() - project = registry.register_project(workspace, mapper.source_to_index_dir(workspace)) - registry.register_dir(project.id, workspace, mapper.source_to_index_db(workspace), depth=0) - registry.register_dir(project.id, src_dir, mapper.source_to_index_db(src_dir), depth=1) - registry.register_dir(project.id, dist_dir, mapper.source_to_index_db(dist_dir), depth=1) - - builder = IndexTreeBuilder( - registry=registry, - mapper=mapper, - config=Config(data_dir=tmp_path / "data"), - incremental=False, - ) - - dirs_by_depth = builder._collect_dirs_by_depth(workspace) - pruned = builder._prune_stale_project_dirs( - project_id=project.id, - source_root=workspace, - dirs_by_depth=dirs_by_depth, - ) - - remaining = {mapping.source_path.resolve() for mapping in registry.get_project_dirs(project.id)} - registry.close() - - assert dist_dir.resolve() in pruned - assert workspace.resolve() in remaining - assert src_dir.resolve() in remaining - assert dist_dir.resolve() not in remaining - - -def test_force_full_build_prunes_stale_ignored_mappings(tmp_path: Path) -> None: - workspace = tmp_path / "workspace" - src_dir = workspace / "src" - dist_dir = workspace / "dist" - src_dir.mkdir(parents=True) - dist_dir.mkdir(parents=True) - (src_dir / "app.py").write_text("print('ok')\n", encoding="utf-8") - (dist_dir / "bundle.py").write_text("print('artifact')\n", encoding="utf-8") - - mapper = PathMapper(index_root=tmp_path / "indexes") - registry = RegistryStore(db_path=tmp_path / "registry.db") - registry.initialize() - project = registry.register_project(workspace, mapper.source_to_index_dir(workspace)) - registry.register_dir(project.id, workspace, mapper.source_to_index_db(workspace), depth=0) - registry.register_dir(project.id, dist_dir, mapper.source_to_index_db(dist_dir), depth=1) - - builder = IndexTreeBuilder( - registry=registry, - mapper=mapper, - config=Config( - data_dir=tmp_path / "data", - global_symbol_index_enabled=False, - ), - incremental=False, - ) - - def fake_build_level_parallel( - dirs: list[Path], - languages, - workers, - *, - source_root: Path, - project_id: int, - global_index_db_path: Path, - ) -> list[DirBuildResult]: - return [ - DirBuildResult( - source_path=dir_path, - index_path=mapper.source_to_index_db(dir_path), - files_count=1 if dir_path == src_dir else 0, - symbols_count=0, - subdirs=[], - ) - for dir_path in dirs - ] - - builder._build_level_parallel = fake_build_level_parallel # type: ignore[method-assign] - builder._link_children_to_parent = MagicMock() - - build_result = builder.build(workspace, force_full=True, workers=1) - - remaining = {mapping.source_path.resolve() for mapping in registry.get_project_dirs(project.id)} - registry.close() - - assert build_result.total_dirs == 2 - assert workspace.resolve() in remaining - assert src_dir.resolve() in remaining - assert dist_dir.resolve() not in remaining - - -def test_force_full_build_rewrites_directory_db_and_drops_stale_ignored_subdirs( - tmp_path: Path, -) -> None: - project_root = tmp_path / "project" - src_dir = project_root / "src" - build_dir = project_root / "build" - src_dir.mkdir(parents=True) - build_dir.mkdir(parents=True) - (src_dir / "app.py").write_text("print('ok')\n", encoding="utf-8") - (build_dir / "generated.py").write_text("print('artifact')\n", encoding="utf-8") - - mapper = PathMapper(index_root=tmp_path / "indexes") - registry = RegistryStore(db_path=tmp_path / "registry.db") - registry.initialize() - config = Config( - data_dir=tmp_path / "data", - global_symbol_index_enabled=False, - ) - - root_index_db = mapper.source_to_index_db(project_root) - with DirIndexStore(root_index_db, config=config) as store: - store.register_subdir( - name="build", - index_path=mapper.source_to_index_db(build_dir), - files_count=1, - ) - - builder = IndexTreeBuilder( - registry=registry, - mapper=mapper, - config=config, - incremental=False, - ) - - build_result = builder.build(project_root, force_full=True, workers=1) - - with DirIndexStore(root_index_db, config=config) as store: - subdir_names = [link.name for link in store.get_subdirs()] - - registry.close() - - assert build_result.total_dirs == 2 - assert subdir_names == ["src"] diff --git a/codex-lens/tests/test_litellm_reranker.py b/codex-lens/tests/test_litellm_reranker.py deleted file mode 100644 index 60c843d8..00000000 --- a/codex-lens/tests/test_litellm_reranker.py +++ /dev/null @@ -1,85 +0,0 @@ -"""Tests for LiteLLMReranker (LLM-based reranking).""" - -from __future__ import annotations - -import sys -import types -from dataclasses import dataclass - -import pytest - -from codexlens.semantic.reranker.litellm_reranker import LiteLLMReranker - - -def _install_dummy_ccw_litellm( - monkeypatch: pytest.MonkeyPatch, *, responses: list[str] -) -> None: - @dataclass(frozen=True, slots=True) - class ChatMessage: - role: str - content: str - - class LiteLLMClient: - def __init__(self, model: str = "default", **kwargs) -> None: - self.model = model - self.kwargs = kwargs - self._responses = list(responses) - self.calls: list[list[ChatMessage]] = [] - - def chat(self, messages, **kwargs): - self.calls.append(list(messages)) - content = self._responses.pop(0) if self._responses else "" - return types.SimpleNamespace(content=content) - - dummy = types.ModuleType("ccw_litellm") - dummy.ChatMessage = ChatMessage - dummy.LiteLLMClient = LiteLLMClient - monkeypatch.setitem(sys.modules, "ccw_litellm", dummy) - - -def test_score_pairs_parses_numbers_and_normalizes_scales( - monkeypatch: pytest.MonkeyPatch, -) -> None: - _install_dummy_ccw_litellm(monkeypatch, responses=["0.73", "7", "80"]) - - reranker = LiteLLMReranker(model="dummy") - scores = reranker.score_pairs([("q", "d1"), ("q", "d2"), ("q", "d3")]) - assert scores == pytest.approx([0.73, 0.7, 0.8]) - - -def test_score_pairs_parses_json_score_field(monkeypatch: pytest.MonkeyPatch) -> None: - _install_dummy_ccw_litellm(monkeypatch, responses=['{"score": 0.42}']) - - reranker = LiteLLMReranker(model="dummy") - scores = reranker.score_pairs([("q", "d")]) - assert scores == pytest.approx([0.42]) - - -def test_score_pairs_uses_default_score_on_parse_failure( - monkeypatch: pytest.MonkeyPatch, -) -> None: - _install_dummy_ccw_litellm(monkeypatch, responses=["N/A"]) - - reranker = LiteLLMReranker(model="dummy", default_score=0.123) - scores = reranker.score_pairs([("q", "d")]) - assert scores == pytest.approx([0.123]) - - -def test_rate_limiting_sleeps_between_requests(monkeypatch: pytest.MonkeyPatch) -> None: - _install_dummy_ccw_litellm(monkeypatch, responses=["0.1", "0.2"]) - - reranker = LiteLLMReranker(model="dummy", min_interval_seconds=1.0) - - import codexlens.semantic.reranker.litellm_reranker as litellm_reranker_module - - sleeps: list[float] = [] - times = iter([100.0, 100.0, 100.1, 100.1]) - - monkeypatch.setattr(litellm_reranker_module.time, "monotonic", lambda: next(times)) - monkeypatch.setattr( - litellm_reranker_module.time, "sleep", lambda seconds: sleeps.append(seconds) - ) - - _ = reranker.score_pairs([("q", "d1"), ("q", "d2")]) - assert sleeps == pytest.approx([0.9]) - diff --git a/codex-lens/tests/test_lsp_graph_builder_depth.py b/codex-lens/tests/test_lsp_graph_builder_depth.py deleted file mode 100644 index ab70b770..00000000 --- a/codex-lens/tests/test_lsp_graph_builder_depth.py +++ /dev/null @@ -1,36 +0,0 @@ -from __future__ import annotations - -import asyncio -from unittest.mock import AsyncMock - -import pytest - -from codexlens.hybrid_search.data_structures import CodeAssociationGraph, CodeSymbolNode, Range -from codexlens.lsp.lsp_graph_builder import LspGraphBuilder - - -@pytest.mark.asyncio -async def test_lsp_graph_builder_does_not_expand_at_max_depth() -> None: - """Depth semantics: max_depth is the number of hops from seeds.""" - builder = LspGraphBuilder(max_depth=1, max_nodes=10, max_concurrent=1, resolve_symbols=False) - - bridge = AsyncMock() - bridge.get_references.side_effect = RuntimeError("should not call references") - bridge.get_call_hierarchy.side_effect = RuntimeError("should not call call hierarchy") - - node = CodeSymbolNode( - id="x.py:foo:1", - name="foo", - kind="function", - file_path="x.py", - range=Range(start_line=1, start_character=1, end_line=1, end_character=1), - ) - graph = CodeAssociationGraph() - visited: set[str] = set() - sem = asyncio.Semaphore(1) - - # Seeds are depth=0. A node at depth==max_depth should not be expanded. - new_nodes = await builder._expand_node(node, 1, graph, bridge, visited, sem) # type: ignore[attr-defined] - assert new_nodes == [] - assert node.id in visited - diff --git a/codex-lens/tests/test_merkle_detection.py b/codex-lens/tests/test_merkle_detection.py deleted file mode 100644 index e4afdccd..00000000 --- a/codex-lens/tests/test_merkle_detection.py +++ /dev/null @@ -1,100 +0,0 @@ -import time -from pathlib import Path - -from codexlens.config import Config -from codexlens.storage.dir_index import DirIndexStore - - -def _make_merkle_config(tmp_path: Path) -> Config: - data_dir = tmp_path / "data" - return Config( - data_dir=data_dir, - venv_path=data_dir / "venv", - enable_merkle_detection=True, - ) - - -class TestMerkleDetection: - def test_needs_reindex_touch_updates_mtime(self, tmp_path: Path) -> None: - config = _make_merkle_config(tmp_path) - source_dir = tmp_path / "src" - source_dir.mkdir(parents=True, exist_ok=True) - - file_path = source_dir / "a.py" - file_path.write_text("print('hi')\n", encoding="utf-8") - original_content = file_path.read_text(encoding="utf-8") - - index_db = tmp_path / "_index.db" - with DirIndexStore(index_db, config=config) as store: - store.add_file( - name=file_path.name, - full_path=file_path, - content=original_content, - language="python", - symbols=[], - ) - - stored_mtime_before = store.get_file_mtime(file_path) - assert stored_mtime_before is not None - - # Touch file without changing content - time.sleep(0.02) - file_path.write_text(original_content, encoding="utf-8") - - assert store.needs_reindex(file_path) is False - - stored_mtime_after = store.get_file_mtime(file_path) - assert stored_mtime_after is not None - assert stored_mtime_after != stored_mtime_before - - current_mtime = file_path.stat().st_mtime - assert abs(stored_mtime_after - current_mtime) <= 0.001 - - def test_parent_root_changes_when_child_changes(self, tmp_path: Path) -> None: - config = _make_merkle_config(tmp_path) - - source_root = tmp_path / "project" - child_dir = source_root / "child" - child_dir.mkdir(parents=True, exist_ok=True) - - child_file = child_dir / "child.py" - child_file.write_text("x = 1\n", encoding="utf-8") - - child_db = tmp_path / "child_index.db" - parent_db = tmp_path / "parent_index.db" - - with DirIndexStore(child_db, config=config) as child_store: - child_store.add_file( - name=child_file.name, - full_path=child_file, - content=child_file.read_text(encoding="utf-8"), - language="python", - symbols=[], - ) - child_root_1 = child_store.update_merkle_root() - assert child_root_1 - - with DirIndexStore(parent_db, config=config) as parent_store: - parent_store.register_subdir(name="child", index_path=child_db, files_count=1) - parent_root_1 = parent_store.update_merkle_root() - assert parent_root_1 - - time.sleep(0.02) - child_file.write_text("x = 2\n", encoding="utf-8") - - with DirIndexStore(child_db, config=config) as child_store: - child_store.add_file( - name=child_file.name, - full_path=child_file, - content=child_file.read_text(encoding="utf-8"), - language="python", - symbols=[], - ) - child_root_2 = child_store.update_merkle_root() - assert child_root_2 - assert child_root_2 != child_root_1 - - with DirIndexStore(parent_db, config=config) as parent_store: - parent_root_2 = parent_store.update_merkle_root() - assert parent_root_2 - assert parent_root_2 != parent_root_1 diff --git a/codex-lens/tests/test_migrations.py b/codex-lens/tests/test_migrations.py deleted file mode 100644 index abf5c2c2..00000000 --- a/codex-lens/tests/test_migrations.py +++ /dev/null @@ -1,114 +0,0 @@ -"""Database Migration Tests. - -This module tests the database migration system for the codex-lens index, -ensuring that forward and backward compatibility is maintained across schema versions. - -Test Coverage: -- Forward migrations: Old schema to new schema -- Backward compatibility: New code can read old schemas -- Migration rollback capabilities -- Data integrity during migrations -- Edge cases (empty databases, corrupted data, etc.) -""" - -import pytest -import sqlite3 -from pathlib import Path -import tempfile -import json - - -class TestForwardMigrations: - """Test upgrading from older schema versions to newer ones.""" - - def test_v0_to_v1_migration(self): - """Test migration from schema v0 to v1.""" - pytest.skip("Requires migration infrastructure setup") - - def test_v1_to_v2_migration(self): - """Test migration from schema v1 to v2.""" - pytest.skip("Requires migration infrastructure setup") - - def test_migration_preserves_data(self): - """Test that migration preserves existing data.""" - pytest.skip("Requires migration infrastructure setup") - - def test_migration_adds_new_columns(self): - """Test that new columns are added with correct defaults.""" - pytest.skip("Requires migration infrastructure setup") - - -class TestBackwardCompatibility: - """Test that newer code can read and work with older database schemas.""" - - def test_new_code_reads_old_schema(self): - """Test that current code can read old schema databases.""" - pytest.skip("Requires old schema fixture") - - def test_new_code_writes_to_old_schema(self): - """Test that current code handles writes to old schema gracefully.""" - pytest.skip("Requires old schema fixture") - - def test_old_code_rejects_new_schema(self): - """Test that old code fails appropriately on new schemas.""" - pytest.skip("Requires old code fixture") - - -class TestMigrationRollback: - """Test rollback capabilities for failed migrations.""" - - def test_failed_migration_rolls_back(self): - """Test that failed migrations are rolled back completely.""" - pytest.skip("Requires migration infrastructure setup") - - def test_partial_migration_recovery(self): - """Test recovery from partially completed migrations.""" - pytest.skip("Requires migration infrastructure setup") - - def test_rollback_preserves_original_data(self): - """Test that rollback restores original state.""" - pytest.skip("Requires migration infrastructure setup") - - -class TestMigrationEdgeCases: - """Test migration behavior in edge cases.""" - - def test_empty_database_migration(self): - """Test migration of an empty database.""" - pytest.skip("Requires migration infrastructure setup") - - def test_large_database_migration(self): - """Test migration of a large database.""" - pytest.skip("Requires migration infrastructure setup") - - def test_corrupted_database_handling(self): - """Test handling of corrupted databases during migration.""" - pytest.skip("Requires migration infrastructure setup") - - def test_concurrent_migration_protection(self): - """Test that concurrent migrations are prevented.""" - pytest.skip("Requires migration infrastructure setup") - - -class TestSchemaVersionTracking: - """Test schema version tracking and detection.""" - - def test_version_table_exists(self): - """Test that version tracking table exists and is populated.""" - pytest.skip("Requires migration infrastructure setup") - - def test_version_auto_detection(self): - """Test that schema version is auto-detected from database.""" - pytest.skip("Requires migration infrastructure setup") - - def test_version_update_after_migration(self): - """Test that version is updated correctly after migration.""" - pytest.skip("Requires migration infrastructure setup") - - -# TODO: Implement actual tests using pytest fixtures -# The test infrastructure needs: -# - Migration runner fixture that can apply and rollback migrations -# - Old schema fixtures (pre-built databases with known schemas) -# - Temporary database fixtures for isolated testing -# - Mock data generators for various schema versions diff --git a/codex-lens/tests/test_parser_integration.py b/codex-lens/tests/test_parser_integration.py deleted file mode 100644 index f94d4162..00000000 --- a/codex-lens/tests/test_parser_integration.py +++ /dev/null @@ -1,281 +0,0 @@ -"""Integration tests for multi-level parser system. - -Verifies: -1. Tree-sitter primary, regex fallback -2. Tiktoken integration with character count fallback -3. >99% symbol extraction accuracy -4. Graceful degradation when dependencies unavailable -""" - -from pathlib import Path - -import pytest - -from codexlens.parsers.factory import SimpleRegexParser -from codexlens.parsers.tokenizer import Tokenizer, TIKTOKEN_AVAILABLE -from codexlens.parsers.treesitter_parser import TreeSitterSymbolParser, TREE_SITTER_AVAILABLE - - -class TestMultiLevelFallback: - """Tests for multi-tier fallback pattern.""" - - def test_treesitter_available_uses_ast(self): - """Verify tree-sitter is used when available.""" - parser = TreeSitterSymbolParser("python") - assert parser.is_available() == TREE_SITTER_AVAILABLE - - def test_regex_fallback_always_works(self): - """Verify regex parser always works.""" - parser = SimpleRegexParser("python") - code = "def hello():\n pass" - result = parser.parse(code, Path("test.py")) - - assert result is not None - assert len(result.symbols) == 1 - assert result.symbols[0].name == "hello" - - def test_unsupported_language_uses_generic(self): - """Verify generic parser for unsupported languages.""" - parser = SimpleRegexParser("rust") - code = "fn main() {}" - result = parser.parse(code, Path("test.rs")) - - # Should use generic parser - assert result is not None - # May or may not find symbols depending on generic patterns - - -class TestTokenizerFallback: - """Tests for tokenizer fallback behavior.""" - - def test_character_fallback_when_tiktoken_unavailable(self): - """Verify character counting works without tiktoken.""" - # Use invalid encoding to force fallback - tokenizer = Tokenizer(encoding_name="invalid_encoding") - text = "Hello world" - - count = tokenizer.count_tokens(text) - assert count == max(1, len(text) // 4) - assert not tokenizer.is_using_tiktoken() - - def test_tiktoken_used_when_available(self): - """Verify tiktoken is used when available.""" - tokenizer = Tokenizer() - # Should match TIKTOKEN_AVAILABLE - assert tokenizer.is_using_tiktoken() == TIKTOKEN_AVAILABLE - - -class TestSymbolExtractionAccuracy: - """Tests for >99% symbol extraction accuracy requirement.""" - - @pytest.mark.skipif(not TREE_SITTER_AVAILABLE, reason="tree-sitter not installed") - def test_python_comprehensive_accuracy(self): - """Test comprehensive Python symbol extraction.""" - parser = TreeSitterSymbolParser("python") - code = """ -# Test comprehensive symbol extraction -import os - -CONSTANT = 42 - -def top_level_function(): - pass - -async def async_top_level(): - pass - -class FirstClass: - class_var = 10 - - def __init__(self): - pass - - def method_one(self): - pass - - def method_two(self): - pass - - @staticmethod - def static_method(): - pass - - @classmethod - def class_method(cls): - pass - - async def async_method(self): - pass - -def outer_function(): - def inner_function(): - pass - return inner_function - -class SecondClass: - def another_method(self): - pass - -async def final_async_function(): - pass -""" - result = parser.parse(code, Path("test.py")) - - assert result is not None - - # Expected symbols (excluding CONSTANT, comments, decorators): - # top_level_function, async_top_level, FirstClass, __init__, - # method_one, method_two, static_method, class_method, async_method, - # outer_function, inner_function, SecondClass, another_method, - # final_async_function - - expected_names = { - "top_level_function", "async_top_level", "FirstClass", - "__init__", "method_one", "method_two", "static_method", - "class_method", "async_method", "outer_function", - "inner_function", "SecondClass", "another_method", - "final_async_function" - } - - found_names = {s.name for s in result.symbols} - - # Calculate accuracy - matches = expected_names & found_names - accuracy = len(matches) / len(expected_names) * 100 - - print(f"\nSymbol extraction accuracy: {accuracy:.1f}%") - print(f"Expected: {len(expected_names)}, Found: {len(found_names)}, Matched: {len(matches)}") - print(f"Missing: {expected_names - found_names}") - print(f"Extra: {found_names - expected_names}") - - # Require >99% accuracy - assert accuracy > 99.0, f"Accuracy {accuracy:.1f}% below 99% threshold" - - @pytest.mark.skipif(not TREE_SITTER_AVAILABLE, reason="tree-sitter not installed") - def test_javascript_comprehensive_accuracy(self): - """Test comprehensive JavaScript symbol extraction.""" - parser = TreeSitterSymbolParser("javascript") - code = """ -function regularFunction() {} - -const arrowFunc = () => {} - -async function asyncFunc() {} - -const asyncArrow = async () => {} - -class MainClass { - constructor() {} - - method() {} - - async asyncMethod() {} - - static staticMethod() {} -} - -export function exportedFunc() {} - -export const exportedArrow = () => {} - -export class ExportedClass { - method() {} -} - -function outer() { - function inner() {} -} -""" - result = parser.parse(code, Path("test.js")) - - assert result is not None - - # Expected symbols (excluding constructor): - # regularFunction, arrowFunc, asyncFunc, asyncArrow, MainClass, - # method, asyncMethod, staticMethod, exportedFunc, exportedArrow, - # ExportedClass, method (from ExportedClass), outer, inner - - expected_names = { - "regularFunction", "arrowFunc", "asyncFunc", "asyncArrow", - "MainClass", "method", "asyncMethod", "staticMethod", - "exportedFunc", "exportedArrow", "ExportedClass", "outer", "inner" - } - - found_names = {s.name for s in result.symbols} - - # Calculate accuracy - matches = expected_names & found_names - accuracy = len(matches) / len(expected_names) * 100 - - print(f"\nJavaScript symbol extraction accuracy: {accuracy:.1f}%") - print(f"Expected: {len(expected_names)}, Found: {len(found_names)}, Matched: {len(matches)}") - - # Require >99% accuracy - assert accuracy > 99.0, f"Accuracy {accuracy:.1f}% below 99% threshold" - - -class TestGracefulDegradation: - """Tests for graceful degradation when dependencies missing.""" - - def test_system_functional_without_tiktoken(self): - """Verify system works without tiktoken.""" - # Force fallback - tokenizer = Tokenizer(encoding_name="invalid") - assert not tokenizer.is_using_tiktoken() - - # Should still work - count = tokenizer.count_tokens("def hello(): pass") - assert count > 0 - - def test_system_functional_without_treesitter(self): - """Verify system works without tree-sitter.""" - # Use regex parser directly - parser = SimpleRegexParser("python") - code = "def hello():\n pass" - result = parser.parse(code, Path("test.py")) - - assert result is not None - assert len(result.symbols) == 1 - - def test_treesitter_parser_returns_none_for_unsupported(self): - """Verify TreeSitterParser returns None for unsupported languages.""" - parser = TreeSitterSymbolParser("rust") # Not supported - assert not parser.is_available() - - result = parser.parse("fn main() {}", Path("test.rs")) - assert result is None - - -class TestRealWorldFiles: - """Tests with real-world code examples.""" - - @pytest.mark.skipif(not TREE_SITTER_AVAILABLE, reason="tree-sitter not installed") - def test_parser_on_own_source(self): - """Test parser on its own source code.""" - parser = TreeSitterSymbolParser("python") - - # Read the parser module itself - parser_file = Path(__file__).parent.parent / "src" / "codexlens" / "parsers" / "treesitter_parser.py" - if parser_file.exists(): - code = parser_file.read_text(encoding="utf-8") - result = parser.parse(code, parser_file) - - assert result is not None - # Should find the TreeSitterSymbolParser class and its methods - names = {s.name for s in result.symbols} - assert "TreeSitterSymbolParser" in names - - def test_tokenizer_on_own_source(self): - """Test tokenizer on its own source code.""" - tokenizer = Tokenizer() - - # Read the tokenizer module itself - tokenizer_file = Path(__file__).parent.parent / "src" / "codexlens" / "parsers" / "tokenizer.py" - if tokenizer_file.exists(): - code = tokenizer_file.read_text(encoding="utf-8") - count = tokenizer.count_tokens(code) - - # Should get reasonable token count - assert count > 0 - # File is several hundred characters, should be 50+ tokens - assert count > 50 diff --git a/codex-lens/tests/test_parsers.py b/codex-lens/tests/test_parsers.py deleted file mode 100644 index 9651fddc..00000000 --- a/codex-lens/tests/test_parsers.py +++ /dev/null @@ -1,462 +0,0 @@ -"""Tests for CodexLens parsers.""" - -import tempfile -from pathlib import Path - -import pytest - -from codexlens.config import Config -from codexlens.parsers.factory import ( - ParserFactory, - SimpleRegexParser, - _parse_go_symbols, - _parse_java_symbols, - _parse_js_ts_symbols, - _parse_python_symbols, - _parse_generic_symbols, -) - - -TREE_SITTER_JS_AVAILABLE = True -try: - import tree_sitter_javascript # type: ignore[import-not-found] # noqa: F401 -except Exception: - TREE_SITTER_JS_AVAILABLE = False - - -class TestPythonParser: - """Tests for Python symbol parsing.""" - - def test_parse_function(self): - code = "def hello():\n pass" - symbols = _parse_python_symbols(code) - assert len(symbols) == 1 - assert symbols[0].name == "hello" - assert symbols[0].kind == "function" - - def test_parse_async_function(self): - code = "async def fetch_data():\n pass" - symbols = _parse_python_symbols(code) - assert len(symbols) == 1 - assert symbols[0].name == "fetch_data" - assert symbols[0].kind == "function" - - def test_parse_class(self): - code = "class MyClass:\n pass" - symbols = _parse_python_symbols(code) - assert len(symbols) == 1 - assert symbols[0].name == "MyClass" - assert symbols[0].kind == "class" - - def test_parse_method(self): - code = "class MyClass:\n def method(self):\n pass" - symbols = _parse_python_symbols(code) - assert len(symbols) == 2 - assert symbols[0].name == "MyClass" - assert symbols[0].kind == "class" - assert symbols[1].name == "method" - assert symbols[1].kind == "method" - - def test_parse_async_method(self): - code = "class MyClass:\n async def async_method(self):\n pass" - symbols = _parse_python_symbols(code) - assert len(symbols) == 2 - assert symbols[1].name == "async_method" - assert symbols[1].kind == "method" - - -class TestJavaScriptParser: - """Tests for JavaScript/TypeScript symbol parsing.""" - - def test_parse_function(self): - code = "function hello() {}" - symbols = _parse_js_ts_symbols(code) - assert len(symbols) == 1 - assert symbols[0].name == "hello" - assert symbols[0].kind == "function" - - def test_parse_async_function(self): - code = "async function fetchData() {}" - symbols = _parse_js_ts_symbols(code) - assert len(symbols) == 1 - assert symbols[0].name == "fetchData" - assert symbols[0].kind == "function" - - def test_parse_arrow_function(self): - code = "const hello = () => {}" - symbols = _parse_js_ts_symbols(code) - assert len(symbols) == 1 - assert symbols[0].name == "hello" - assert symbols[0].kind == "function" - - def test_parse_async_arrow_function(self): - code = "const fetchData = async () => {}" - symbols = _parse_js_ts_symbols(code) - assert len(symbols) == 1 - assert symbols[0].name == "fetchData" - assert symbols[0].kind == "function" - - def test_parse_class(self): - code = "class MyClass {}" - symbols = _parse_js_ts_symbols(code) - assert len(symbols) == 1 - assert symbols[0].name == "MyClass" - assert symbols[0].kind == "class" - - def test_parse_export_function(self): - code = "export function hello() {}" - symbols = _parse_js_ts_symbols(code) - assert len(symbols) == 1 - assert symbols[0].name == "hello" - assert symbols[0].kind == "function" - - def test_parse_export_class(self): - code = "export class MyClass {}" - symbols = _parse_js_ts_symbols(code) - assert len(symbols) == 1 - assert symbols[0].name == "MyClass" - assert symbols[0].kind == "class" - - def test_parse_export_arrow_function(self): - code = "export const hello = () => {}" - symbols = _parse_js_ts_symbols(code) - assert len(symbols) == 1 - assert symbols[0].name == "hello" - assert symbols[0].kind == "function" - - @pytest.mark.skipif(not TREE_SITTER_JS_AVAILABLE, reason="tree-sitter-javascript not installed") - def test_parse_class_methods(self): - code = ( - "class MyClass {\n" - " method() {}\n" - " async asyncMethod() {}\n" - " static staticMethod() {}\n" - " constructor() {}\n" - "}" - ) - symbols = _parse_js_ts_symbols(code) - names_kinds = [(s.name, s.kind) for s in symbols] - assert ("MyClass", "class") in names_kinds - assert ("method", "method") in names_kinds - assert ("asyncMethod", "method") in names_kinds - assert ("staticMethod", "method") in names_kinds - assert all(name != "constructor" for name, _ in names_kinds) - - -class TestJavaParser: - """Tests for Java symbol parsing.""" - - def test_parse_class(self): - code = "public class MyClass {\n}" - symbols = _parse_java_symbols(code) - assert len(symbols) == 1 - assert symbols[0].name == "MyClass" - assert symbols[0].kind == "class" - - def test_parse_class_without_public(self): - code = "class InternalClass {\n}" - symbols = _parse_java_symbols(code) - assert len(symbols) == 1 - assert symbols[0].name == "InternalClass" - - def test_parse_method(self): - code = "public class Test {\n public void doSomething() {}\n}" - symbols = _parse_java_symbols(code) - assert len(symbols) == 2 - assert symbols[0].name == "Test" - assert symbols[0].kind == "class" - assert symbols[1].name == "doSomething" - assert symbols[1].kind == "method" - - def test_parse_static_method(self): - code = "public class Test {\n public static void main(String[] args) {}\n}" - symbols = _parse_java_symbols(code) - method_names = [s.name for s in symbols if s.kind == "method"] - assert "main" in method_names - - def test_parse_private_method(self): - code = "public class Test {\n private int calculate() { return 0; }\n}" - symbols = _parse_java_symbols(code) - method_names = [s.name for s in symbols if s.kind == "method"] - assert "calculate" in method_names - - def test_parse_generic_return_type(self): - code = "public class Test {\n public List getItems() { return null; }\n}" - symbols = _parse_java_symbols(code) - method_names = [s.name for s in symbols if s.kind == "method"] - assert "getItems" in method_names - - -class TestGoParser: - """Tests for Go symbol parsing.""" - - def test_parse_function(self): - code = "func hello() {\n}" - symbols = _parse_go_symbols(code) - assert len(symbols) == 1 - assert symbols[0].name == "hello" - assert symbols[0].kind == "function" - - def test_parse_function_with_params(self): - code = "func greet(name string) string {\n return name\n}" - symbols = _parse_go_symbols(code) - assert len(symbols) == 1 - assert symbols[0].name == "greet" - - def test_parse_method(self): - code = "func (s *Server) Start() error {\n return nil\n}" - symbols = _parse_go_symbols(code) - assert len(symbols) == 1 - assert symbols[0].name == "Start" - assert symbols[0].kind == "function" - - def test_parse_struct(self): - code = "type User struct {\n Name string\n}" - symbols = _parse_go_symbols(code) - assert len(symbols) == 1 - assert symbols[0].name == "User" - assert symbols[0].kind == "class" - - def test_parse_interface(self): - code = "type Reader interface {\n Read(p []byte) (n int, err error)\n}" - symbols = _parse_go_symbols(code) - assert len(symbols) == 1 - assert symbols[0].name == "Reader" - assert symbols[0].kind == "class" - - def test_parse_multiple_symbols(self): - code = """type Config struct { - Port int -} - -func NewConfig() *Config { - return &Config{} -} - -func (c *Config) Validate() error { - return nil -} -""" - symbols = _parse_go_symbols(code) - names = [s.name for s in symbols] - assert "Config" in names - assert "NewConfig" in names - assert "Validate" in names - - -class TestGenericParser: - """Tests for generic symbol parsing.""" - - def test_parse_def_keyword(self): - code = "def something():\n pass" - symbols = _parse_generic_symbols(code) - assert len(symbols) == 1 - assert symbols[0].name == "something" - assert symbols[0].kind == "function" - - def test_parse_function_keyword(self): - code = "function doIt() {}" - symbols = _parse_generic_symbols(code) - assert len(symbols) == 1 - assert symbols[0].name == "doIt" - - def test_parse_func_keyword(self): - code = "func test() {}" - symbols = _parse_generic_symbols(code) - assert len(symbols) == 1 - assert symbols[0].name == "test" - - def test_parse_class_keyword(self): - code = "class MyClass {}" - symbols = _parse_generic_symbols(code) - assert len(symbols) == 1 - assert symbols[0].name == "MyClass" - assert symbols[0].kind == "class" - - def test_parse_struct_keyword(self): - code = "struct Point { x: i32, y: i32 }" - symbols = _parse_generic_symbols(code) - assert len(symbols) == 1 - assert symbols[0].name == "Point" - assert symbols[0].kind == "class" - - def test_parse_interface_keyword(self): - code = "interface Drawable {}" - symbols = _parse_generic_symbols(code) - assert len(symbols) == 1 - assert symbols[0].name == "Drawable" - assert symbols[0].kind == "class" - - -class TestParserInterface: - """High-level interface tests.""" - - def test_simple_parser_parse(self): - parser = SimpleRegexParser("python") - indexed = parser.parse("def hello():\n pass", Path("test.py")) - assert indexed.language == "python" - assert len(indexed.symbols) == 1 - assert indexed.symbols[0].name == "hello" - - def test_simple_parser_javascript(self): - parser = SimpleRegexParser("javascript") - indexed = parser.parse("function test() {}", Path("test.js")) - assert indexed.language == "javascript" - assert len(indexed.symbols) == 1 - - def test_simple_parser_typescript(self): - parser = SimpleRegexParser("typescript") - indexed = parser.parse("export class Service {}", Path("test.ts")) - assert indexed.language == "typescript" - assert len(indexed.symbols) == 1 - - def test_simple_parser_java(self): - parser = SimpleRegexParser("java") - indexed = parser.parse("public class Main {}", Path("Main.java")) - assert indexed.language == "java" - assert len(indexed.symbols) == 1 - - def test_simple_parser_go(self): - parser = SimpleRegexParser("go") - indexed = parser.parse("func main() {}", Path("main.go")) - assert indexed.language == "go" - assert len(indexed.symbols) == 1 - - def test_simple_parser_unknown_language(self): - parser = SimpleRegexParser("zig") - indexed = parser.parse("fn main() void {}", Path("main.zig")) - assert indexed.language == "zig" - # Uses generic parser - assert indexed.chunks == [] - - def test_indexed_file_path_resolved(self): - parser = SimpleRegexParser("python") - indexed = parser.parse("def test(): pass", Path("./test.py")) - # Path should be resolved to absolute - assert Path(indexed.path).is_absolute() - - -class TestParserFactory: - """Tests for ParserFactory.""" - - def test_factory_creates_parser(self): - with tempfile.TemporaryDirectory() as tmpdir: - import os - os.environ["CODEXLENS_DATA_DIR"] = tmpdir - try: - config = Config() - factory = ParserFactory(config) - parser = factory.get_parser("python") - assert parser is not None - finally: - del os.environ["CODEXLENS_DATA_DIR"] - - def test_factory_caches_parsers(self): - with tempfile.TemporaryDirectory() as tmpdir: - import os - os.environ["CODEXLENS_DATA_DIR"] = tmpdir - try: - config = Config() - factory = ParserFactory(config) - parser1 = factory.get_parser("python") - parser2 = factory.get_parser("python") - assert parser1 is parser2 - finally: - del os.environ["CODEXLENS_DATA_DIR"] - - def test_factory_different_languages(self): - with tempfile.TemporaryDirectory() as tmpdir: - import os - os.environ["CODEXLENS_DATA_DIR"] = tmpdir - try: - config = Config() - factory = ParserFactory(config) - py_parser = factory.get_parser("python") - js_parser = factory.get_parser("javascript") - assert py_parser is not js_parser - finally: - del os.environ["CODEXLENS_DATA_DIR"] - - def test_factory_passes_config_to_treesitter(self, monkeypatch: pytest.MonkeyPatch) -> None: - """Ensure ParserFactory config is forwarded into TreeSitterSymbolParser.""" - from codexlens.entities import IndexedFile - - captured: dict = {} - - class FakeTreeSitterSymbolParser: - def __init__(self, language_id, path=None, config=None) -> None: - captured["config"] = config - self.language_id = language_id - - def is_available(self) -> bool: - return True - - def parse(self, text: str, path: Path) -> IndexedFile: - return IndexedFile( - path=str(path.resolve()), - language=self.language_id, - symbols=[], - chunks=[], - relationships=[], - ) - - monkeypatch.setattr( - "codexlens.parsers.factory.TreeSitterSymbolParser", - FakeTreeSitterSymbolParser, - ) - - config = Config() - config.use_astgrep = True - - factory = ParserFactory(config) - parser = factory.get_parser("python") - parser.parse("def hello():\n pass\n", Path("test.py")) - - assert captured.get("config") is config - - -class TestParserEdgeCases: - """Edge case tests for parsers.""" - - def test_empty_code(self): - symbols = _parse_python_symbols("") - assert len(symbols) == 0 - - def test_only_comments(self): - code = "# This is a comment\n# Another comment" - symbols = _parse_python_symbols(code) - assert len(symbols) == 0 - - def test_nested_functions(self): - code = """def outer(): - def inner(): - pass - return inner -""" - symbols = _parse_python_symbols(code) - names = [s.name for s in symbols] - assert "outer" in names - assert "inner" in names - - def test_unicode_function_name(self): - code = "def 你好():\n pass" - symbols = _parse_python_symbols(code) - # Regex may not support unicode function names, tree-sitter does - # So we just verify it doesn't crash - assert isinstance(symbols, list) - - def test_long_file(self): - # Generate a file with many functions - lines = [] - for i in range(100): - lines.append(f"def func_{i}():\n pass\n") - code = "\n".join(lines) - symbols = _parse_python_symbols(code) - assert len(symbols) == 100 - - def test_malformed_code(self): - # Parser should handle malformed code gracefully - code = "def broken(\n pass" - # Should not crash - symbols = _parse_python_symbols(code) - # May or may not find symbols depending on regex diff --git a/codex-lens/tests/test_path_mapper_windows_drive.py b/codex-lens/tests/test_path_mapper_windows_drive.py deleted file mode 100644 index 21522b8d..00000000 --- a/codex-lens/tests/test_path_mapper_windows_drive.py +++ /dev/null @@ -1,19 +0,0 @@ -from __future__ import annotations - -import platform -from pathlib import Path - -from codexlens.storage.path_mapper import PathMapper - - -def test_denormalize_path_windows_drive_is_absolute() -> None: - if platform.system() != "Windows": - return - - mapper = PathMapper(index_root=Path("C:/tmp/codexlens_indexes")) - mapped = mapper.denormalize_path("D/Claude_dms3/codex-lens/src") - - assert mapped.is_absolute() - assert str(mapped).lower().startswith("d:\\") or str(mapped).lower().startswith("d:/") - assert mapped == Path("D:/Claude_dms3/codex-lens/src") - diff --git a/codex-lens/tests/test_performance_optimizations.py b/codex-lens/tests/test_performance_optimizations.py deleted file mode 100644 index 1026cb59..00000000 --- a/codex-lens/tests/test_performance_optimizations.py +++ /dev/null @@ -1,814 +0,0 @@ -"""Tests for performance optimizations in CodexLens. - -This module tests the following optimizations: -1. Normalized keywords search (migration_001) -2. Optimized path lookup in registry -3. Prefix-mode symbol search -4. Graph expansion neighbor precompute overhead (<20%) -5. Cross-encoder reranking latency (<200ms) -""" - -import json -import sqlite3 -import tempfile -import time -from pathlib import Path - -import pytest - -from codexlens.storage.dir_index import DirIndexStore -from codexlens.storage.registry import RegistryStore -from codexlens.storage.migration_manager import MigrationManager -from codexlens.storage.migrations import migration_001_normalize_keywords - - -@pytest.fixture -def temp_index_db(): - """Create a temporary dir index database.""" - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "test_index.db" - store = DirIndexStore(db_path) - store.initialize() # Initialize schema - yield store - store.close() - - -@pytest.fixture -def temp_registry_db(): - """Create a temporary registry database.""" - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "test_registry.db" - store = RegistryStore(db_path) - store.initialize() # Initialize schema - yield store - store.close() - - -@pytest.fixture -def populated_index_db(temp_index_db): - """Create an index database with sample data. - - Uses 100 files to provide meaningful performance comparison between - optimized and fallback implementations. - """ - from codexlens.entities import Symbol - - store = temp_index_db - - # Add files with symbols and keywords - # Using 100 files to show performance improvements - file_ids = [] - - # Define keyword pools for cycling - keyword_pools = [ - ["auth", "security", "jwt"], - ["database", "sql", "query"], - ["auth", "login", "password"], - ["api", "rest", "endpoint"], - ["cache", "redis", "performance"], - ["auth", "oauth", "token"], - ["test", "unittest", "pytest"], - ["database", "postgres", "migration"], - ["api", "graphql", "resolver"], - ["security", "encryption", "crypto"] - ] - - for i in range(100): - # Create symbols for first 50 files to have more symbol search data - symbols = None - if i < 50: - symbols = [ - Symbol(name=f"get_user_{i}", kind="function", range=(1, 10)), - Symbol(name=f"create_user_{i}", kind="function", range=(11, 20)), - Symbol(name=f"UserClass_{i}", kind="class", range=(21, 40)), - ] - - file_id = store.add_file( - name=f"file_{i}.py", - full_path=Path(f"/test/path/file_{i}.py"), - content=f"def function_{i}(): pass\n" * 10, - language="python", - symbols=symbols - ) - file_ids.append(file_id) - - # Add semantic metadata with keywords (cycle through keyword pools) - keywords = keyword_pools[i % len(keyword_pools)] - store.add_semantic_metadata( - file_id=file_id, - summary=f"Test file {file_id}", - keywords=keywords, - purpose="Testing", - llm_tool="gemini" - ) - - return store - - -class TestKeywordNormalization: - """Test normalized keywords functionality.""" - - def test_migration_creates_tables(self, temp_index_db): - """Test that migration creates keywords and file_keywords tables.""" - conn = temp_index_db._get_connection() - - # Verify tables exist (created by _create_schema) - tables = conn.execute(""" - SELECT name FROM sqlite_master - WHERE type='table' AND name IN ('keywords', 'file_keywords') - """).fetchall() - - assert len(tables) == 2 - - def test_migration_creates_indexes(self, temp_index_db): - """Test that migration creates necessary indexes.""" - conn = temp_index_db._get_connection() - - # Check for indexes - indexes = conn.execute(""" - SELECT name FROM sqlite_master - WHERE type='index' AND name IN ( - 'idx_keywords_keyword', - 'idx_file_keywords_file_id', - 'idx_file_keywords_keyword_id' - ) - """).fetchall() - - assert len(indexes) == 3 - - def test_add_semantic_metadata_populates_normalized_tables(self, temp_index_db): - """Test that adding metadata populates the normalized keyword tables.""" - # Add a file - file_id = temp_index_db.add_file( - name="test.py", - full_path=Path("/test/test.py"), - language="python", - content="test" - ) - - # Add semantic metadata - keywords = ["auth", "security", "jwt"] - temp_index_db.add_semantic_metadata( - file_id=file_id, - summary="Test summary", - keywords=keywords, - purpose="Testing", - llm_tool="gemini" - ) - - conn = temp_index_db._get_connection() - - # Check semantic_metadata table (without keywords column in current schema) - row = conn.execute( - "SELECT summary, purpose, llm_tool FROM semantic_metadata WHERE file_id=?", - (file_id,) - ).fetchone() - assert row is not None - assert row["summary"] == "Test summary" - assert row["purpose"] == "Testing" - assert row["llm_tool"] == "gemini" - - # Check normalized keywords table - keyword_rows = conn.execute(""" - SELECT k.keyword - FROM file_keywords fk - JOIN keywords k ON fk.keyword_id = k.id - WHERE fk.file_id = ? - """, (file_id,)).fetchall() - - assert len(keyword_rows) == 3 - normalized_keywords = [row["keyword"] for row in keyword_rows] - assert set(normalized_keywords) == set(keywords) - - def test_search_semantic_keywords_normalized(self, populated_index_db): - """Test optimized keyword search using normalized tables.""" - results = populated_index_db.search_semantic_keywords("auth", use_normalized=True) - - # Should find 3 files with "auth" keyword - assert len(results) >= 3 - - # Verify results structure - for file_entry, keywords in results: - assert file_entry.name.startswith("file_") - assert isinstance(keywords, list) - assert any("auth" in k.lower() for k in keywords) - - def test_search_semantic_keywords_fallback(self, populated_index_db): - """Test that fallback search still works.""" - results = populated_index_db.search_semantic_keywords("auth", use_normalized=False) - - # Should find files with "auth" keyword - assert len(results) >= 3 - - for file_entry, keywords in results: - assert isinstance(keywords, list) - - -class TestPathLookupOptimization: - """Test optimized path lookup in registry.""" - - def test_find_nearest_index_shallow(self, temp_registry_db): - """Test path lookup with shallow directory structure.""" - # Register a project first - project = temp_registry_db.register_project( - source_root=Path("/test"), - index_root=Path("/tmp") - ) - - # Register directory mapping - temp_registry_db.register_dir( - project_id=project.id, - source_path=Path("/test"), - index_path=Path("/tmp/index.db"), - depth=0, - files_count=0 - ) - - # Search for subdirectory - result = temp_registry_db.find_nearest_index(Path("/test/subdir/file.py")) - - assert result is not None - # Compare as strings for cross-platform compatibility - assert "/test" in str(result.source_path) or "\\test" in str(result.source_path) - - def test_find_nearest_index_deep(self, temp_registry_db): - """Test path lookup with deep directory structure.""" - # Register a project - project = temp_registry_db.register_project( - source_root=Path("/a"), - index_root=Path("/tmp") - ) - - # Add directory mappings at different levels - temp_registry_db.register_dir( - project_id=project.id, - source_path=Path("/a"), - index_path=Path("/tmp/index_a.db"), - depth=0, - files_count=0 - ) - temp_registry_db.register_dir( - project_id=project.id, - source_path=Path("/a/b/c"), - index_path=Path("/tmp/index_abc.db"), - depth=2, - files_count=0 - ) - - # Should find nearest (longest) match - result = temp_registry_db.find_nearest_index(Path("/a/b/c/d/e/f/file.py")) - - assert result is not None - # Check that path contains the key parts - result_path = str(result.source_path) - assert "a" in result_path and "b" in result_path and "c" in result_path - - def test_find_nearest_index_not_found(self, temp_registry_db): - """Test path lookup when no mapping exists.""" - result = temp_registry_db.find_nearest_index(Path("/nonexistent/path")) - assert result is None - - def test_find_nearest_index_performance(self, temp_registry_db): - """Basic performance test for path lookup.""" - # Register a project - project = temp_registry_db.register_project( - source_root=Path("/root"), - index_root=Path("/tmp") - ) - - # Add mapping at root - temp_registry_db.register_dir( - project_id=project.id, - source_path=Path("/root"), - index_path=Path("/tmp/index.db"), - depth=0, - files_count=0 - ) - - # Test with very deep path (10 levels) - deep_path = Path("/root/a/b/c/d/e/f/g/h/i/j/file.py") - - start = time.perf_counter() - result = temp_registry_db.find_nearest_index(deep_path) - elapsed = time.perf_counter() - start - - # Should complete quickly (< 50ms even on slow systems) - assert elapsed < 0.05 - assert result is not None - - -class TestSymbolSearchOptimization: - """Test optimized symbol search.""" - - def test_symbol_search_prefix_mode(self, populated_index_db): - """Test symbol search with prefix mode.""" - results = populated_index_db.search_symbols("get", prefix_mode=True) - - # Should find symbols starting with "get" - assert len(results) > 0 - for symbol in results: - assert symbol.name.startswith("get") - - def test_symbol_search_substring_mode(self, populated_index_db): - """Test symbol search with substring mode.""" - results = populated_index_db.search_symbols("user", prefix_mode=False) - - # Should find symbols containing "user" - assert len(results) > 0 - for symbol in results: - assert "user" in symbol.name.lower() - - def test_symbol_search_with_kind_filter(self, populated_index_db): - """Test symbol search with kind filter.""" - results = populated_index_db.search_symbols( - "UserClass", - kind="class", - prefix_mode=True - ) - - # Should find only class symbols - assert len(results) > 0 - for symbol in results: - assert symbol.kind == "class" - - def test_symbol_search_limit(self, populated_index_db): - """Test symbol search respects limit.""" - results = populated_index_db.search_symbols("", prefix_mode=True, limit=5) - - # Should return at most 5 results - assert len(results) <= 5 - - -class TestMigrationManager: - """Test migration manager functionality.""" - - def test_migration_manager_tracks_version(self, temp_index_db): - """Test that migration manager tracks schema version.""" - conn = temp_index_db._get_connection() - manager = MigrationManager(conn) - - current_version = manager.get_current_version() - assert current_version >= 0 - - def test_migration_001_can_run(self, temp_index_db): - """Test that migration_001 is idempotent on current schema. - - Note: Current schema already has normalized keywords tables created - during initialize(), so migration_001 should be a no-op but not fail. - The original migration was designed to migrate from semantic_metadata.keywords - to normalized tables, but new databases use normalized tables directly. - """ - conn = temp_index_db._get_connection() - - # Add some test data using the current normalized schema - conn.execute(""" - INSERT INTO files(id, name, full_path, language, content, mtime, line_count) - VALUES(100, 'test.py', '/test_migration.py', 'python', 'def test(): pass', 0, 10) - """) - - # Insert directly into normalized tables (current schema) - conn.execute("INSERT OR IGNORE INTO keywords(keyword) VALUES(?)", ("test",)) - conn.execute("INSERT OR IGNORE INTO keywords(keyword) VALUES(?)", ("keyword",)) - - kw1_id = conn.execute("SELECT id FROM keywords WHERE keyword=?", ("test",)).fetchone()[0] - kw2_id = conn.execute("SELECT id FROM keywords WHERE keyword=?", ("keyword",)).fetchone()[0] - - conn.execute("INSERT OR IGNORE INTO file_keywords(file_id, keyword_id) VALUES(?, ?)", (100, kw1_id)) - conn.execute("INSERT OR IGNORE INTO file_keywords(file_id, keyword_id) VALUES(?, ?)", (100, kw2_id)) - conn.commit() - - # Run migration (should be idempotent - tables already exist) - try: - migration_001_normalize_keywords.upgrade(conn) - success = True - except Exception as e: - success = False - print(f"Migration failed: {e}") - - assert success - - # Verify data still exists - keyword_count = conn.execute(""" - SELECT COUNT(*) as c FROM file_keywords WHERE file_id=100 - """).fetchone()["c"] - - assert keyword_count == 2 # "test" and "keyword" - - -class TestPerformanceComparison: - """Compare performance of old vs new implementations.""" - - def test_keyword_search_performance(self, populated_index_db): - """Compare keyword search performance. - - IMPORTANT: The normalized query optimization is designed for large datasets - (1000+ files). On small datasets (< 1000 files), the overhead of JOINs and - GROUP BY operations can make the normalized query slower than the simple - LIKE query on JSON fields. This is expected behavior. - - Performance benefits appear when: - - Dataset size > 1000 files - - Full-table scans on JSON LIKE become the bottleneck - - Index-based lookups provide O(log N) complexity advantage - """ - # Normalized search - start = time.perf_counter() - normalized_results = populated_index_db.search_semantic_keywords( - "auth", - use_normalized=True - ) - normalized_time = time.perf_counter() - start - - # Fallback search - start = time.perf_counter() - fallback_results = populated_index_db.search_semantic_keywords( - "auth", - use_normalized=False - ) - fallback_time = time.perf_counter() - start - - # Verify correctness: both queries should return identical results - assert len(normalized_results) == len(fallback_results) - - # Verify result content matches - normalized_files = {entry.id for entry, _ in normalized_results} - fallback_files = {entry.id for entry, _ in fallback_results} - assert normalized_files == fallback_files, "Both queries must return same files" - - # Document performance characteristics (no strict assertion) - # On datasets < 1000 files, normalized may be slower due to JOIN overhead - print(f"\nKeyword search performance (100 files):") - print(f" Normalized: {normalized_time*1000:.3f}ms") - print(f" Fallback: {fallback_time*1000:.3f}ms") - print(f" Ratio: {normalized_time/fallback_time:.2f}x") - print(f" Note: Performance benefits appear with 1000+ files") - - def test_prefix_vs_substring_symbol_search(self, populated_index_db): - """Compare prefix vs substring symbol search performance. - - IMPORTANT: Prefix search optimization (LIKE 'prefix%') benefits from B-tree - indexes, but on small datasets (< 1000 symbols), the performance difference - may not be measurable or may even be slower due to query planner overhead. - - Performance benefits appear when: - - Symbol count > 1000 - - Index-based prefix search provides O(log N) advantage - - Full table scans with LIKE '%substring%' become bottleneck - """ - # Prefix search (optimized) - start = time.perf_counter() - prefix_results = populated_index_db.search_symbols("get", prefix_mode=True) - prefix_time = time.perf_counter() - start - - # Substring search (fallback) - start = time.perf_counter() - substring_results = populated_index_db.search_symbols("get", prefix_mode=False) - substring_time = time.perf_counter() - start - - # Verify correctness: prefix results should be subset of substring results - prefix_names = {s.name for s in prefix_results} - substring_names = {s.name for s in substring_results} - assert prefix_names.issubset(substring_names), "Prefix must be subset of substring" - - # Verify all prefix results actually start with search term - for symbol in prefix_results: - assert symbol.name.startswith("get"), f"Symbol {symbol.name} should start with 'get'" - - # Document performance characteristics (no strict assertion) - # On datasets < 1000 symbols, performance difference is negligible - print(f"\nSymbol search performance (150 symbols):") - print(f" Prefix: {prefix_time*1000:.3f}ms ({len(prefix_results)} results)") - print(f" Substring: {substring_time*1000:.3f}ms ({len(substring_results)} results)") - print(f" Ratio: {prefix_time/substring_time:.2f}x") - print(f" Note: Performance benefits appear with 1000+ symbols") - - -class TestPerformanceBenchmarks: - """Benchmark-style assertions for key performance requirements.""" - - def test_graph_expansion_indexing_overhead_under_20_percent(self, temp_index_db, tmp_path): - """Graph neighbor precompute adds <20% overhead versus indexing baseline.""" - from codexlens.entities import CodeRelationship, RelationshipType, Symbol - from codexlens.storage.index_tree import _compute_graph_neighbors - - store = temp_index_db - - file_count = 60 - symbols_per_file = 8 - - start = time.perf_counter() - for file_idx in range(file_count): - file_path = tmp_path / f"graph_{file_idx}.py" - lines = [] - for sym_idx in range(symbols_per_file): - lines.append(f"def func_{file_idx}_{sym_idx}():") - lines.append(f" return {sym_idx}") - lines.append("") - content = "\n".join(lines) - - symbols = [ - Symbol( - name=f"func_{file_idx}_{sym_idx}", - kind="function", - range=(sym_idx * 3 + 1, sym_idx * 3 + 2), - file=str(file_path), - ) - for sym_idx in range(symbols_per_file) - ] - - relationships = [ - CodeRelationship( - source_symbol=f"func_{file_idx}_{sym_idx}", - target_symbol=f"func_{file_idx}_{sym_idx + 1}", - relationship_type=RelationshipType.CALL, - source_file=str(file_path), - target_file=None, - source_line=sym_idx * 3 + 2, - ) - for sym_idx in range(symbols_per_file - 1) - ] - - store.add_file( - name=file_path.name, - full_path=file_path, - content=content, - language="python", - symbols=symbols, - relationships=relationships, - ) - baseline_time = time.perf_counter() - start - - durations = [] - for _ in range(3): - start = time.perf_counter() - _compute_graph_neighbors(store) - durations.append(time.perf_counter() - start) - graph_time = min(durations) - - # Sanity-check that the benchmark exercised graph neighbor generation. - conn = store._get_connection() - neighbor_count = conn.execute( - "SELECT COUNT(*) as c FROM graph_neighbors" - ).fetchone()["c"] - assert neighbor_count > 0 - - assert baseline_time > 0.0 - overhead_ratio = graph_time / baseline_time - assert overhead_ratio < 0.2, ( - f"Graph neighbor precompute overhead too high: {overhead_ratio:.2%} " - f"(baseline={baseline_time:.3f}s, graph={graph_time:.3f}s)" - ) - - def test_stage2_expansion_precomputed_vs_static_global_graph_benchmark(self, tmp_path): - """Benchmark Stage-2 expansion: precomputed graph_neighbors vs static global graph. - - This test is informational (prints timings) and asserts only correctness - and that both expanders return some related results. - """ - from codexlens.entities import CodeRelationship, RelationshipType, SearchResult, Symbol - from codexlens.search.graph_expander import GraphExpander - from codexlens.search.global_graph_expander import GlobalGraphExpander - from codexlens.storage.dir_index import DirIndexStore - from codexlens.storage.global_index import GlobalSymbolIndex - from codexlens.storage.index_tree import _compute_graph_neighbors - from codexlens.storage.path_mapper import PathMapper - - # Source + index roots - source_dir = tmp_path / "proj" / "src" - source_dir.mkdir(parents=True, exist_ok=True) - mapper = PathMapper(index_root=tmp_path / "indexes") - - index_db_path = mapper.source_to_index_db(source_dir) - index_db_path.parent.mkdir(parents=True, exist_ok=True) - - store = DirIndexStore(index_db_path) - store.initialize() - - file_count = 30 - per_file_symbols = 2 - file_paths = [] - per_file_symbols_list = [] - per_file_relationships_list = [] - - for i in range(file_count): - file_path = source_dir / f"m{i}.py" - file_paths.append(file_path) - file_path.write_text("pass\n", encoding="utf-8") - - symbols = [ - Symbol( - name=f"func_{i}_{j}", - kind="function", - range=(j + 1, j + 1), - file=str(file_path.resolve()), - ) - for j in range(per_file_symbols) - ] - per_file_symbols_list.append(symbols) - - relationships: list[CodeRelationship] = [] - # Intra-file edge: func_i_0 -> func_i_1 - relationships.append( - CodeRelationship( - source_symbol=f"func_{i}_0", - target_symbol=f"func_{i}_1", - relationship_type=RelationshipType.CALL, - source_file=str(file_path.resolve()), - target_file=str(file_path.resolve()), - source_line=1, - ) - ) - # Cross-file edge: func_i_0 -> func_(i+1)_0 (name-unique across dir) - j = (i + 1) % file_count - relationships.append( - CodeRelationship( - source_symbol=f"func_{i}_0", - target_symbol=f"func_{j}_0", - relationship_type=RelationshipType.CALL, - source_file=str(file_path.resolve()), - target_file=str((source_dir / f"m{j}.py").resolve()), - source_line=1, - ) - ) - per_file_relationships_list.append(relationships) - - store.add_file( - name=file_path.name, - full_path=file_path, - content="pass\n", - language="python", - symbols=symbols, - relationships=relationships, - ) - - # Precompute graph_neighbors for GraphExpander (precomputed Stage-2 build) - start = time.perf_counter() - _compute_graph_neighbors(store) - graph_build_ms = (time.perf_counter() - start) * 1000.0 - store.close() - - # Build global symbol index + relationships for GlobalGraphExpander - global_db_path = index_db_path.parent / GlobalSymbolIndex.DEFAULT_DB_NAME - global_index = GlobalSymbolIndex(global_db_path, project_id=1) - global_index.initialize() - try: - index_path_str = str(index_db_path.resolve()) - start = time.perf_counter() - for file_path, symbols in zip(file_paths, per_file_symbols_list): - file_path_str = str(file_path.resolve()) - global_index.update_file_symbols( - file_path_str, - symbols, - index_path=index_path_str, - ) - global_symbols_ms = (time.perf_counter() - start) * 1000.0 - - start = time.perf_counter() - for file_path, relationships in zip(file_paths, per_file_relationships_list): - file_path_str = str(file_path.resolve()) - global_index.update_file_relationships(file_path_str, relationships) - global_relationships_ms = (time.perf_counter() - start) * 1000.0 - - base_results = [ - SearchResult( - path=str(file_paths[i].resolve()), - score=1.0, - excerpt=None, - content=None, - start_line=1, - end_line=1, - symbol_name=f"func_{i}_0", - symbol_kind="function", - ) - for i in range(min(10, file_count)) - ] - - pre_expander = GraphExpander(mapper) - static_expander = GlobalGraphExpander(global_index) - - start = time.perf_counter() - pre_related = pre_expander.expand( - base_results, - depth=2, - max_expand=10, - max_related=50, - ) - pre_ms = (time.perf_counter() - start) * 1000.0 - - start = time.perf_counter() - static_related = static_expander.expand( - base_results, - top_n=10, - max_related=50, - ) - static_ms = (time.perf_counter() - start) * 1000.0 - - assert pre_related, "Expected precomputed graph expansion to return related results" - assert static_related, "Expected static global graph expansion to return related results" - - print("\nStage-2 build benchmark (30 files, 2 symbols/file):") - print(f" graph_neighbors precompute: {graph_build_ms:.2f}ms") - print(f" global_symbols write: {global_symbols_ms:.2f}ms") - print(f" global_relationships write: {global_relationships_ms:.2f}ms") - - print("\nStage-2 expansion benchmark (30 files, 2 symbols/file):") - print(f" precomputed (graph_neighbors): {pre_ms:.2f}ms, related={len(pre_related)}") - print(f" static_global_graph: {static_ms:.2f}ms, related={len(static_related)}") - finally: - global_index.close() - - def test_relationship_extraction_astgrep_vs_treesitter_benchmark(self, tmp_path): - """Informational benchmark: relationship extraction via ast-grep vs tree-sitter. - - Skips when optional parser dependencies are unavailable. - """ - import textwrap - - from codexlens.config import Config - from codexlens.parsers.astgrep_processor import is_astgrep_processor_available - from codexlens.parsers.treesitter_parser import TreeSitterSymbolParser - - if not is_astgrep_processor_available(): - pytest.skip("ast-grep processor unavailable (optional dependency)") - - code = textwrap.dedent( - """ - import os - from typing import List - - class Base: - pass - - class Child(Base): - def method(self) -> List[str]: - return [os.path.join("a", "b")] - """ - ).lstrip() - - file_path = tmp_path / "sample.py" - file_path.write_text(code, encoding="utf-8") - - cfg_ts = Config(data_dir=tmp_path / "cfg_ts") - cfg_ts.use_astgrep = False - ts_parser = TreeSitterSymbolParser("python", file_path, config=cfg_ts) - if not ts_parser.is_available(): - pytest.skip("tree-sitter python binding unavailable") - - cfg_ag = Config(data_dir=tmp_path / "cfg_ag") - cfg_ag.use_astgrep = True - ag_parser = TreeSitterSymbolParser("python", file_path, config=cfg_ag) - if getattr(ag_parser, "_astgrep_processor", None) is None: - pytest.skip("ast-grep processor failed to initialize") - - def _bench(parser: TreeSitterSymbolParser) -> tuple[float, int]: - durations = [] - rel_counts = [] - for _ in range(3): - start = time.perf_counter() - indexed = parser.parse(code, file_path) - durations.append(time.perf_counter() - start) - rel_counts.append(0 if indexed is None else len(indexed.relationships)) - return min(durations) * 1000.0, max(rel_counts) - - ts_ms, ts_rels = _bench(ts_parser) - ag_ms, ag_rels = _bench(ag_parser) - - assert ts_rels > 0, "Expected relationships extracted via tree-sitter" - assert ag_rels > 0, "Expected relationships extracted via ast-grep" - - print("\nRelationship extraction benchmark (python, 1 file):") - print(f" tree-sitter: {ts_ms:.2f}ms, rels={ts_rels}") - print(f" ast-grep: {ag_ms:.2f}ms, rels={ag_rels}") - - def test_cross_encoder_reranking_latency_under_200ms(self): - """Cross-encoder rerank step completes under 200ms (excluding model load).""" - from codexlens.entities import SearchResult - from codexlens.search.ranking import cross_encoder_rerank - - query = "find function" - results = [ - SearchResult( - path=f"file_{idx}.py", - score=1.0 / (idx + 1), - excerpt=f"def func_{idx}():\n return {idx}", - symbol_name=f"func_{idx}", - symbol_kind="function", - ) - for idx in range(50) - ] - - class DummyReranker: - def score_pairs(self, pairs, batch_size=32): - _ = batch_size - # Return deterministic pseudo-logits to exercise sigmoid normalization. - return [float(i) for i in range(len(pairs))] - - reranker = DummyReranker() - - start = time.perf_counter() - reranked = cross_encoder_rerank(query, results, reranker, top_k=50, batch_size=32) - elapsed_ms = (time.perf_counter() - start) * 1000.0 - - assert len(reranked) == len(results) - assert any(r.metadata.get("cross_encoder_reranked") for r in reranked[:50]) - assert elapsed_ms < 200.0, f"Cross-encoder rerank too slow: {elapsed_ms:.1f}ms" diff --git a/codex-lens/tests/test_pure_vector_search.py b/codex-lens/tests/test_pure_vector_search.py deleted file mode 100644 index 3ba820fa..00000000 --- a/codex-lens/tests/test_pure_vector_search.py +++ /dev/null @@ -1,345 +0,0 @@ -"""Tests for pure vector search functionality.""" - -import pytest -import sqlite3 -import tempfile -import time -from pathlib import Path - -from codexlens.search.hybrid_search import HybridSearchEngine -from codexlens.storage.dir_index import DirIndexStore - -# Check if semantic dependencies are available -try: - from codexlens.semantic import SEMANTIC_AVAILABLE - SEMANTIC_DEPS_AVAILABLE = SEMANTIC_AVAILABLE -except ImportError: - SEMANTIC_DEPS_AVAILABLE = False - - -def _safe_unlink(path: Path, retries: int = 5, delay_s: float = 0.05) -> None: - """Best-effort unlink for Windows where SQLite can keep files locked briefly.""" - for attempt in range(retries): - try: - path.unlink() - return - except FileNotFoundError: - return - except PermissionError: - time.sleep(delay_s * (attempt + 1)) - try: - path.unlink(missing_ok=True) - except (PermissionError, OSError): - pass - - -class TestPureVectorSearch: - """Tests for pure vector search mode.""" - - @pytest.fixture - def sample_db(self): - """Create sample database with files.""" - with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: - db_path = Path(f.name) - - store = DirIndexStore(db_path) - store.initialize() - - # Add sample files - files = { - "auth.py": "def authenticate_user(username, password): pass", - "login.py": "def login_handler(credentials): pass", - "user.py": "class User: pass", - } - - with store._get_connection() as conn: - for path, content in files.items(): - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - (path, path, content, "python", 0.0) - ) - conn.commit() - - yield db_path - store.close() - - if db_path.exists(): - _safe_unlink(db_path) - - def test_pure_vector_without_embeddings(self, sample_db): - """Test pure_vector mode returns empty when no embeddings exist.""" - engine = HybridSearchEngine() - - results = engine.search( - sample_db, - "authentication", - limit=10, - enable_vector=True, - pure_vector=True, - ) - - # Should return empty list because no embeddings exist - assert isinstance(results, list) - assert len(results) == 0, \ - "Pure vector search should return empty when no embeddings exist" - - def test_vector_with_fallback(self, sample_db): - """Test vector mode (with fallback) returns FTS results when no embeddings.""" - engine = HybridSearchEngine() - - results = engine.search( - sample_db, - "authenticate", - limit=10, - enable_vector=True, - pure_vector=False, # Allow FTS fallback - ) - - # Should return FTS results even without embeddings - assert isinstance(results, list) - assert len(results) > 0, \ - "Vector mode with fallback should return FTS results" - - # Verify results come from exact FTS - paths = [r.path for r in results] - assert "auth.py" in paths, "Should find auth.py via FTS" - - def test_pure_vector_invalid_config(self, sample_db): - """Test pure_vector=True but enable_vector=False logs warning.""" - engine = HybridSearchEngine() - - # Invalid: pure_vector=True but enable_vector=False - results = engine.search( - sample_db, - "test", - limit=10, - enable_vector=False, - pure_vector=True, - ) - - # Should fallback to exact search - assert isinstance(results, list) - - def test_hybrid_mode_ignores_pure_vector(self, sample_db): - """Test hybrid mode works normally (ignores pure_vector).""" - engine = HybridSearchEngine() - - results = engine.search( - sample_db, - "authenticate", - limit=10, - enable_fuzzy=True, - enable_vector=False, - pure_vector=False, # Should be ignored in hybrid - ) - - # Should return results from exact + fuzzy - assert isinstance(results, list) - assert len(results) > 0 - - -@pytest.mark.skipif(not SEMANTIC_DEPS_AVAILABLE, reason="Semantic dependencies not available") -class TestPureVectorWithEmbeddings: - """Tests for pure vector search with actual embeddings.""" - - @pytest.fixture - def db_with_embeddings(self): - """Create database with embeddings.""" - with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: - db_path = Path(f.name) - - store = DirIndexStore(db_path) - store.initialize() - - # Add sample files - files = { - "auth/authentication.py": """ -def authenticate_user(username: str, password: str) -> bool: - '''Verify user credentials against database.''' - return check_password(username, password) - -def check_password(user: str, pwd: str) -> bool: - '''Check if password matches stored hash.''' - return True -""", - "auth/login.py": """ -def login_handler(credentials: dict) -> bool: - '''Handle user login request.''' - username = credentials.get('username') - password = credentials.get('password') - return authenticate_user(username, password) -""", - } - - with store._get_connection() as conn: - for path, content in files.items(): - name = path.split('/')[-1] - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - (name, path, content, "python", 0.0) - ) - conn.commit() - - # Generate embeddings - vector_store = None - try: - from codexlens.semantic.embedder import Embedder - from codexlens.semantic.vector_store import VectorStore - from codexlens.semantic.chunker import Chunker, ChunkConfig - - embedder = Embedder(profile="fast") # Use fast model for testing - vector_store = VectorStore(db_path) - chunker = Chunker(config=ChunkConfig(max_chunk_size=1000)) - - with sqlite3.connect(db_path) as conn: - conn.row_factory = sqlite3.Row - rows = conn.execute("SELECT full_path, content FROM files").fetchall() - - for row in rows: - chunks = chunker.chunk_sliding_window( - row["content"], - file_path=row["full_path"], - language="python" - ) - for chunk in chunks: - chunk.embedding = embedder.embed_single(chunk.content) - if chunks: - vector_store.add_chunks(chunks, row["full_path"]) - - except Exception as exc: - pytest.skip(f"Failed to generate embeddings: {exc}") - finally: - if vector_store is not None: - vector_store.close() - - yield db_path - store.close() - - if db_path.exists(): - _safe_unlink(db_path) - - def test_pure_vector_with_embeddings(self, db_with_embeddings): - """Test pure vector search returns results when embeddings exist.""" - engine = HybridSearchEngine() - - results = engine.search( - db_with_embeddings, - "how to verify user credentials", # Natural language query - limit=10, - enable_vector=True, - pure_vector=True, - ) - - # Should return results from vector search only - assert isinstance(results, list) - assert len(results) > 0, "Pure vector search should return results" - - # Results should have semantic relevance - for result in results: - assert result.score > 0 - assert result.path is not None - - def test_compare_pure_vs_hybrid(self, db_with_embeddings): - """Compare pure vector vs hybrid search results.""" - engine = HybridSearchEngine() - - # Pure vector search - pure_results = engine.search( - db_with_embeddings, - "verify credentials", - limit=10, - enable_vector=True, - pure_vector=True, - ) - - # Hybrid search - hybrid_results = engine.search( - db_with_embeddings, - "verify credentials", - limit=10, - enable_fuzzy=True, - enable_vector=True, - pure_vector=False, - ) - - # Both should return results - assert len(pure_results) > 0, "Pure vector should find results" - assert len(hybrid_results) > 0, "Hybrid should find results" - - # Hybrid may have more results (FTS + vector) - # But pure should still be useful for semantic queries - - -class TestSearchModeComparison: - """Compare different search modes.""" - - @pytest.fixture - def comparison_db(self): - """Create database for mode comparison.""" - with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: - db_path = Path(f.name) - - store = DirIndexStore(db_path) - store.initialize() - - files = { - "auth.py": "def authenticate(): pass", - "login.py": "def login(): pass", - } - - with store._get_connection() as conn: - for path, content in files.items(): - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - (path, path, content, "python", 0.0) - ) - conn.commit() - - yield db_path - store.close() - - if db_path.exists(): - _safe_unlink(db_path) - - def test_mode_comparison_without_embeddings(self, comparison_db): - """Compare all search modes without embeddings.""" - engine = HybridSearchEngine() - query = "authenticate" - - # Test each mode - modes = [ - ("exact", False, False, False), - ("fuzzy", True, False, False), - ("vector", False, True, False), # With fallback - ("pure_vector", False, True, True), # No fallback - ] - - results = {} - for mode_name, fuzzy, vector, pure in modes: - result = engine.search( - comparison_db, - query, - limit=10, - enable_fuzzy=fuzzy, - enable_vector=vector, - pure_vector=pure, - ) - results[mode_name] = len(result) - - # Assertions - assert results["exact"] > 0, "Exact should find results" - assert results["fuzzy"] >= results["exact"], "Fuzzy should find at least as many" - assert results["vector"] > 0, "Vector with fallback should find results (from FTS)" - assert results["pure_vector"] == 0, "Pure vector should return empty (no embeddings)" - - # Log comparison - print("\nMode comparison (without embeddings):") - for mode, count in results.items(): - print(f" {mode}: {count} results") - - -if __name__ == "__main__": - pytest.main([__file__, "-v", "-s"]) diff --git a/codex-lens/tests/test_query_parser.py b/codex-lens/tests/test_query_parser.py deleted file mode 100644 index 0809538d..00000000 --- a/codex-lens/tests/test_query_parser.py +++ /dev/null @@ -1,485 +0,0 @@ -"""Tests for query preprocessing and expansion (P1). - -Tests identifier splitting (CamelCase, snake_case, kebab-case), OR expansion, -and FTS5 operator preservation. -""" - -import pytest - -from codexlens.search.query_parser import QueryParser, preprocess_query - - -class TestQueryParserBasics: - """Basic tests for QueryParser class.""" - - def test_parser_initialization(self): - """Test QueryParser initializes with default settings.""" - parser = QueryParser() - assert parser.enable is True - assert parser.min_token_length == 2 - - def test_parser_disabled(self): - """Test parser with enable=False returns original query.""" - parser = QueryParser(enable=False) - result = parser.preprocess_query("UserAuth") - assert result == "UserAuth" - - def test_empty_query(self): - """Test empty query returns empty string.""" - parser = QueryParser() - assert parser.preprocess_query("") == "" - assert parser.preprocess_query(" ") == "" - - -class TestCamelCaseSplitting: - """Tests for CamelCase identifier splitting.""" - - def test_simple_camelcase(self): - """Test simple CamelCase splitting.""" - parser = QueryParser() - result = parser.preprocess_query("UserAuth") - # Should expand to: UserAuth OR User OR Auth - assert "UserAuth" in result - assert "User" in result - assert "Auth" in result - assert "OR" in result - - def test_lowercase_camelcase(self): - """Test lowerCamelCase splitting.""" - parser = QueryParser() - result = parser.preprocess_query("getUserData") - # Should expand: getUserData OR get OR User OR Data - assert "getUserData" in result - assert "get" in result - assert "User" in result - assert "Data" in result - - def test_all_caps_acronym(self): - """Test all-caps acronyms are not split.""" - parser = QueryParser() - result = parser.preprocess_query("HTTP") - # Should not split HTTP - assert "HTTP" in result - assert "OR" not in result or result == "HTTP" - - def test_mixed_acronym_camelcase(self): - """Test mixed acronym and CamelCase.""" - parser = QueryParser() - result = parser.preprocess_query("HTTPServer") - # Should handle mixed case - assert "HTTPServer" in result or "HTTP" in result - - -class TestSnakeCaseSplitting: - """Tests for snake_case identifier splitting.""" - - def test_simple_snake_case(self): - """Test simple snake_case splitting.""" - parser = QueryParser() - result = parser.preprocess_query("user_auth") - # Should expand: user_auth OR user OR auth - assert "user_auth" in result - assert "user" in result - assert "auth" in result - assert "OR" in result - - def test_multiple_underscores(self): - """Test splitting with multiple underscores.""" - parser = QueryParser() - result = parser.preprocess_query("get_user_data") - # Should expand: get_user_data OR get OR user OR data - assert "get_user_data" in result - assert "get" in result - assert "user" in result - assert "data" in result - - def test_leading_trailing_underscores(self): - """Test underscores at start/end.""" - parser = QueryParser() - result = parser.preprocess_query("_private_method_") - # Should handle gracefully - assert "private" in result - assert "method" in result - - -class TestKebabCaseSplitting: - """Tests for kebab-case identifier splitting.""" - - def test_simple_kebab_case(self): - """Test simple kebab-case splitting.""" - parser = QueryParser() - result = parser.preprocess_query("user-auth") - # Should expand: user-auth OR user OR auth - assert "user-auth" in result or "user" in result - assert "OR" in result - - def test_multiple_hyphens(self): - """Test splitting with multiple hyphens.""" - parser = QueryParser() - result = parser.preprocess_query("get-user-data") - # Should expand similar to snake_case - assert "get" in result - assert "user" in result - assert "data" in result - - -class TestQueryExpansion: - """Tests for OR query expansion.""" - - def test_expansion_includes_original(self): - """Test expansion always includes original query.""" - parser = QueryParser() - result = parser.preprocess_query("UserAuth") - # Original should be first - tokens = result.split(" OR ") - assert tokens[0] == "UserAuth" - - def test_expansion_or_operator(self): - """Test expansion uses OR operator.""" - parser = QueryParser() - result = parser.preprocess_query("getUserData") - assert " OR " in result - - def test_min_token_length_filtering(self): - """Test short tokens are filtered out.""" - parser = QueryParser(min_token_length=3) - result = parser.preprocess_query("getX") - # "X" should be filtered (len < 3) - assert "X" not in result or "getX" in result - assert "get" in result # "get" has len=3 - - def test_no_expansion_for_simple_word(self): - """Test simple words with no splitting return as-is.""" - parser = QueryParser() - result = parser.preprocess_query("function") - # No splitting needed, but may still have OR if single token - assert "function" in result - - def test_deduplication(self): - """Test duplicate tokens are deduplicated.""" - parser = QueryParser() - # Query that might produce duplicates after splitting - result = parser.preprocess_query("user_user") - tokens = result.split(" OR ") - # Should deduplicate "user" - user_count = tokens.count("user") - assert user_count == 1 - - -class TestFTS5OperatorPreservation: - """Tests for FTS5 operator preservation.""" - - def test_quoted_phrase_not_expanded(self): - """Test quoted phrases are not expanded.""" - parser = QueryParser() - result = parser.preprocess_query('"UserAuth"') - # Should preserve quoted phrase without expansion - assert result == '"UserAuth"' or '"UserAuth"' in result - - def test_or_operator_not_expanded(self): - """Test existing OR operator preserves query.""" - parser = QueryParser() - result = parser.preprocess_query("user OR auth") - # Should not double-expand - assert result == "user OR auth" - - def test_and_operator_not_expanded(self): - """Test AND operator preserves query.""" - parser = QueryParser() - result = parser.preprocess_query("user AND auth") - assert result == "user AND auth" - - def test_not_operator_not_expanded(self): - """Test NOT operator preserves query.""" - parser = QueryParser() - result = parser.preprocess_query("user NOT test") - assert result == "user NOT test" - - def test_near_operator_not_expanded(self): - """Test NEAR operator preserves query.""" - parser = QueryParser() - result = parser.preprocess_query("user NEAR auth") - assert result == "user NEAR auth" - - def test_wildcard_not_expanded(self): - """Test wildcard queries are not expanded.""" - parser = QueryParser() - result = parser.preprocess_query("auth*") - assert result == "auth*" - - def test_prefix_operator_not_expanded(self): - """Test prefix operator (^) preserves query.""" - parser = QueryParser() - result = parser.preprocess_query("^auth") - assert result == "^auth" - - -class TestMultiWordQueries: - """Tests for multi-word query expansion.""" - - def test_two_words(self): - """Test expansion of two-word query.""" - parser = QueryParser() - result = parser.preprocess_query("UserAuth DataModel") - # Should expand each word - assert "UserAuth" in result - assert "DataModel" in result - assert "User" in result - assert "Auth" in result - assert "Data" in result - assert "Model" in result - - def test_whitespace_separated_identifiers(self): - """Test whitespace-separated identifiers are expanded.""" - parser = QueryParser() - result = parser.preprocess_query("get_user create_token") - # Each word should be expanded - assert "get" in result - assert "user" in result - assert "create" in result - assert "token" in result - - -class TestConvenienceFunction: - """Tests for preprocess_query convenience function.""" - - def test_convenience_function_default(self): - """Test convenience function with default settings.""" - result = preprocess_query("UserAuth") - assert "UserAuth" in result - assert "OR" in result - - def test_convenience_function_disabled(self): - """Test convenience function with enable=False.""" - result = preprocess_query("UserAuth", enable=False) - assert result == "UserAuth" - - -@pytest.mark.parametrize("query,expected_tokens", [ - ("UserAuth", ["UserAuth", "User", "Auth"]), - ("user_auth", ["user_auth", "user", "auth"]), - ("get-user-data", ["get", "user", "data"]), - ("HTTPServer", ["HTTPServer", "HTTP", "Server"]), - ("getUserData", ["getUserData", "get", "User", "Data"]), -]) -class TestParameterizedSplitting: - """Parameterized tests for various identifier formats.""" - - def test_identifier_splitting(self, query, expected_tokens): - """Test identifier splitting produces expected tokens.""" - parser = QueryParser() - result = parser.preprocess_query(query) - - # Check all expected tokens are present - for token in expected_tokens: - assert token in result, f"Token '{token}' should be in result: {result}" - - -class TestEdgeCases: - """Edge case tests for query parsing.""" - - def test_single_character_word(self): - """Test single character words are filtered.""" - parser = QueryParser(min_token_length=2) - result = parser.preprocess_query("a") - # Single char should be filtered if below min_token_length - assert result == "a" or len(result) == 0 or result.strip() == "" - - def test_numbers_in_identifiers(self): - """Test identifiers with numbers.""" - parser = QueryParser() - result = parser.preprocess_query("user123Auth") - # Should handle numbers gracefully - assert "user123Auth" in result - - def test_special_characters(self): - """Test identifiers with special characters.""" - parser = QueryParser() - result = parser.preprocess_query("user$auth") - # Should handle special chars - assert isinstance(result, str) - - def test_unicode_identifiers(self): - """Test Unicode identifiers.""" - parser = QueryParser() - result = parser.preprocess_query("用户认证") - # Should handle Unicode without errors - assert isinstance(result, str) - assert "用户认证" in result - - def test_very_long_identifier(self): - """Test very long identifier names.""" - parser = QueryParser() - long_name = "VeryLongCamelCaseIdentifierNameThatExceedsNormalLength" - result = parser.preprocess_query(long_name) - # Should handle long names - assert long_name in result - - def test_mixed_case_styles(self): - """Test mixed CamelCase and snake_case.""" - parser = QueryParser() - result = parser.preprocess_query("User_Auth") - # Should handle mixed styles - assert "User_Auth" in result or "User" in result - assert "Auth" in result - - -class TestTokenExtractionLogic: - """Tests for internal token extraction logic.""" - - def test_extract_tokens_from_camelcase(self): - """Test _split_camel_case method.""" - parser = QueryParser() - tokens = parser._split_camel_case("getUserData") - # Should split into: get, User, Data - assert "get" in tokens - assert "User" in tokens - assert "Data" in tokens - - def test_extract_tokens_from_snake_case(self): - """Test _split_snake_case method.""" - parser = QueryParser() - tokens = parser._split_snake_case("get_user_data") - # Should split into: get, user, data - assert "get" in tokens - assert "user" in tokens - assert "data" in tokens - - def test_extract_tokens_from_kebab_case(self): - """Test _split_kebab_case method.""" - parser = QueryParser() - tokens = parser._split_kebab_case("get-user-data") - # Should split into: get, user, data - assert "get" in tokens - assert "user" in tokens - assert "data" in tokens - - def test_extract_tokens_combines_strategies(self): - """Test _extract_tokens uses all splitting strategies.""" - parser = QueryParser() - # Mix of styles - tokens = parser._extract_tokens("getUserData_v2") - # Should extract: getUserData_v2, get, User, Data, v2 - assert "getUserData_v2" in tokens - assert "get" in tokens or "User" in tokens - - -class TestQueryParserIntegration: - """Integration tests for query parser.""" - - def test_real_world_query_examples(self): - """Test real-world query examples.""" - parser = QueryParser() - - queries = [ - "AuthenticationService", - "get_user_by_id", - "create-new-user", - "HTTPRequest", - "parseJSONData", - ] - - for query in queries: - result = parser.preprocess_query(query) - # Should produce valid expanded query - assert isinstance(result, str) - assert len(result) > 0 - assert query in result # Original should be included - - def test_parser_performance(self): - """Test parser performance with many queries.""" - parser = QueryParser() - - # Process 1000 queries - for i in range(1000): - query = f"getUserData{i}" - result = parser.preprocess_query(query) - assert isinstance(result, str) - - -class TestMinTokenLength: - """Tests for min_token_length parameter.""" - - def test_custom_min_token_length(self): - """Test custom min_token_length filters tokens.""" - parser = QueryParser(min_token_length=4) - result = parser.preprocess_query("getUserData") - # Tokens with len < 4 should be filtered - assert "get" not in result or "getUserData" in result # "get" has len=3 - assert "User" in result # "User" has len=4 - assert "Data" in result # "Data" has len=4 - - def test_min_token_length_zero(self): - """Test min_token_length=0 includes all tokens.""" - parser = QueryParser(min_token_length=0) - result = parser.preprocess_query("getX") - # All tokens should be included - assert "get" in result - assert "X" in result or "getX" in result - - def test_min_token_length_one(self): - """Test min_token_length=1 includes single char tokens.""" - parser = QueryParser(min_token_length=1) - result = parser.preprocess_query("aB") - # Should include "a" and "B" - assert "a" in result or "aB" in result - assert "B" in result or "aB" in result - - - - -class TestComplexBooleanQueries: - """Tests for complex boolean query parsing.""" - - @pytest.fixture - def parser(self): - return QueryParser() - - def test_nested_boolean_and_or(self, parser): - """Test parser preserves nested boolean logic: (A OR B) AND C.""" - query = "(login OR logout) AND user" - expanded = parser.preprocess_query(query) - - # Should preserve parentheses and boolean operators - assert "(" in expanded - assert ")" in expanded - assert "AND" in expanded - assert "OR" in expanded - - def test_mixed_operators_with_expansion(self, parser): - """Test CamelCase expansion doesn't break boolean operators.""" - query = "UserAuth AND (login OR logout)" - expanded = parser.preprocess_query(query) - - # Should expand UserAuth but preserve operators - assert "User" in expanded or "Auth" in expanded - assert "AND" in expanded - assert "OR" in expanded - assert "(" in expanded - - def test_quoted_phrases_with_boolean(self, parser): - """Test quoted phrases preserved with boolean operators.""" - query = '"user authentication" AND login' - expanded = parser.preprocess_query(query) - - # Quoted phrase should remain intact - assert '"user authentication"' in expanded or '"' in expanded - assert "AND" in expanded - - def test_not_operator_preservation(self, parser): - """Test NOT operator is preserved correctly.""" - query = "login NOT logout" - expanded = parser.preprocess_query(query) - - assert "NOT" in expanded - assert "login" in expanded - assert "logout" in expanded - - def test_complex_nested_three_levels(self, parser): - """Test deeply nested boolean logic: ((A OR B) AND C) OR D.""" - query = "((UserAuth OR login) AND session) OR token" - expanded = parser.preprocess_query(query) - - # Should handle multiple nesting levels - assert expanded.count("(") >= 2 # At least 2 opening parens - assert expanded.count(")") >= 2 # At least 2 closing parens diff --git a/codex-lens/tests/test_ranking.py b/codex-lens/tests/test_ranking.py deleted file mode 100644 index a082d22e..00000000 --- a/codex-lens/tests/test_ranking.py +++ /dev/null @@ -1,782 +0,0 @@ -"""Unit tests for ranking.py - RRF weights, intent detection, score fusion, and filtering. - -Tests cover: -- detect_query_intent: CamelCase/underscore -> KEYWORD, natural language -> SEMANTIC, mixed -- adjust_weights_by_intent: Weight adjustments per intent type -- get_rrf_weights: Composite of detect + adjust -- reciprocal_rank_fusion: Single/multi source, empty, weight normalization -- simple_weighted_fusion: Basic fusion and empty input -- apply_symbol_boost: Symbol match boost and no-match scenario -- filter_results_by_category: KEYWORD -> code only, SEMANTIC -> docs priority -- group_similar_results: Group results by score proximity -- normalize_weights: All-zero weights edge case -""" - -from __future__ import annotations - -import math -from typing import Dict, List -from unittest.mock import MagicMock - -import pytest - -from codexlens.entities import SearchResult -from codexlens.search.ranking import ( - DEFAULT_WEIGHTS, - QueryIntent, - apply_path_penalties, - extract_explicit_path_hints, - cross_encoder_rerank, - adjust_weights_by_intent, - apply_symbol_boost, - detect_query_intent, - filter_results_by_category, - get_rrf_weights, - group_similar_results, - is_auxiliary_reference_path, - is_generated_artifact_path, - is_test_file, - normalize_weights, - query_prefers_lexical_search, - query_targets_auxiliary_files, - query_targets_generated_files, - query_targets_test_files, - rebalance_noisy_results, - reciprocal_rank_fusion, - simple_weighted_fusion, -) - - -# ============================================================================= -# Helpers -# ============================================================================= - - -def _make_result( - path: str = "a.py", - score: float = 0.5, - excerpt: str = "def foo():", - symbol_name: str | None = None, - symbol_kind: str | None = None, - start_line: int | None = None, - end_line: int | None = None, -) -> SearchResult: - """Create a SearchResult with sensible defaults.""" - return SearchResult( - path=path, - score=score, - excerpt=excerpt, - symbol_name=symbol_name, - symbol_kind=symbol_kind, - start_line=start_line, - end_line=end_line, - ) - - -# ============================================================================= -# Tests: detect_query_intent -# ============================================================================= - - -class TestDetectQueryIntent: - """Tests for detect_query_intent().""" - - def test_detect_keyword_intent(self): - """CamelCase/underscore queries should be detected as KEYWORD.""" - assert detect_query_intent("MyClassName") == QueryIntent.KEYWORD - assert detect_query_intent("windowsHide") == QueryIntent.KEYWORD - assert detect_query_intent("my_function_name") == QueryIntent.KEYWORD - assert detect_query_intent("foo::bar") == QueryIntent.KEYWORD - - def test_detect_semantic_intent(self): - """Natural language queries should be detected as SEMANTIC.""" - assert detect_query_intent("how to authenticate users safely?") == QueryIntent.SEMANTIC - assert detect_query_intent("explain the login process") == QueryIntent.SEMANTIC - - def test_detect_mixed_intent(self): - """Queries with both code and NL signals should be MIXED.""" - # Has code signal (underscore identifier) and NL signal ("how") - assert detect_query_intent("how does my_function work") == QueryIntent.MIXED - - def test_detect_empty_query(self): - """Empty string should return MIXED (safe default).""" - assert detect_query_intent("") == QueryIntent.MIXED - assert detect_query_intent(" ") == QueryIntent.MIXED - - def test_query_targets_test_files(self): - """Queries explicitly mentioning tests should skip test penalties.""" - assert query_targets_test_files("how do tests cover auth flow?") - assert query_targets_test_files("spec fixtures for parser") - assert not query_targets_test_files("windowsHide") - - def test_query_targets_generated_files(self): - """Queries explicitly mentioning build artifacts should skip that penalty.""" - assert query_targets_generated_files("inspect dist bundle output") - assert query_targets_generated_files("generated artifacts under build") - assert not query_targets_generated_files("cache invalidation strategy") - - def test_query_prefers_lexical_search(self): - """Config/env/factory queries should prefer lexical-first routing.""" - assert query_prefers_lexical_search("embedding backend fastembed local litellm api config") - assert query_prefers_lexical_search("get_reranker factory onnx backend selection") - assert query_prefers_lexical_search("EMBEDDING_BACKEND and RERANKER_BACKEND environment variables") - assert not query_prefers_lexical_search("how does smart search route keyword queries") - - -# ============================================================================= -# Tests: adjust_weights_by_intent -# ============================================================================= - - -class TestAdjustWeightsByIntent: - """Tests for adjust_weights_by_intent().""" - - def test_adjust_keyword_weights(self): - """KEYWORD intent should boost exact and reduce vector.""" - base = {"exact": 0.3, "fuzzy": 0.1, "vector": 0.6} - adjusted = adjust_weights_by_intent(QueryIntent.KEYWORD, base) - # Expected target: exact:0.5, fuzzy:0.1, vector:0.4 - assert adjusted["exact"] == pytest.approx(0.5, abs=0.01) - assert adjusted["fuzzy"] == pytest.approx(0.1, abs=0.01) - assert adjusted["vector"] == pytest.approx(0.4, abs=0.01) - - def test_adjust_semantic_weights(self): - """SEMANTIC intent should boost vector and reduce exact.""" - base = {"exact": 0.3, "fuzzy": 0.1, "vector": 0.6} - adjusted = adjust_weights_by_intent(QueryIntent.SEMANTIC, base) - # Expected target: exact:0.2, fuzzy:0.1, vector:0.7 - assert adjusted["exact"] == pytest.approx(0.2, abs=0.01) - assert adjusted["fuzzy"] == pytest.approx(0.1, abs=0.01) - assert adjusted["vector"] == pytest.approx(0.7, abs=0.01) - - def test_adjust_mixed_weights(self): - """MIXED intent should return normalized base_weights.""" - base = {"exact": 0.3, "fuzzy": 0.1, "vector": 0.6} - adjusted = adjust_weights_by_intent(QueryIntent.MIXED, base) - # MIXED returns normalized base_weights - total = sum(adjusted.values()) - assert total == pytest.approx(1.0, abs=0.01) - # Proportions should be preserved - assert adjusted["exact"] == pytest.approx(0.3, abs=0.01) - - -class TestPathPenalties: - """Tests for lightweight path-based ranking penalties.""" - - def test_is_test_file(self): - assert is_test_file("/repo/tests/test_auth.py") - assert is_test_file("D:\\repo\\src\\auth.spec.ts") - assert is_test_file("/repo/frontend/src/pages/discoverypage.test.tsx") - assert is_test_file("/repo/frontend/src/pages/discoverypage.spec.jsx") - assert not is_test_file("/repo/src/auth.py") - - def test_is_generated_artifact_path(self): - assert is_generated_artifact_path("/repo/dist/app.js") - assert is_generated_artifact_path("/repo/src/generated/client.ts") - assert is_generated_artifact_path("D:\\repo\\frontend\\.next\\server.js") - assert not is_generated_artifact_path("/repo/src/auth.py") - - def test_is_auxiliary_reference_path(self): - assert is_auxiliary_reference_path("/repo/examples/auth_demo.py") - assert is_auxiliary_reference_path("/repo/benchmarks/search_eval.py") - assert is_auxiliary_reference_path("/repo/tools/debug_search.py") - assert not is_auxiliary_reference_path("/repo/src/auth.py") - - def test_query_targets_auxiliary_files(self): - assert query_targets_auxiliary_files("show smart search examples") - assert query_targets_auxiliary_files("benchmark smart search") - assert not query_targets_auxiliary_files("smart search routing") - - def test_apply_path_penalties_demotes_test_files(self): - results = [ - _make_result(path="/repo/tests/test_auth.py", score=10.0), - _make_result(path="/repo/src/auth.py", score=9.0), - ] - - penalized = apply_path_penalties( - results, - "authenticate user", - test_file_penalty=0.15, - ) - - assert penalized[0].path == "/repo/src/auth.py" - assert penalized[1].metadata["path_penalty_reasons"] == ["test_file"] - - def test_apply_path_penalties_more_aggressively_demotes_tests_for_keyword_queries(self): - results = [ - _make_result(path="/repo/tests/test_auth.py", score=5.0), - _make_result(path="/repo/src/auth.py", score=4.0), - ] - - penalized = apply_path_penalties( - results, - "find_descendant_project_roots", - test_file_penalty=0.15, - ) - - assert penalized[0].path == "/repo/src/auth.py" - assert penalized[1].metadata["path_penalty_reasons"] == ["test_file"] - assert penalized[1].metadata["path_penalty_multiplier"] == pytest.approx(0.55) - assert penalized[1].metadata["path_rank_multiplier"] == pytest.approx(0.55) - - def test_apply_path_penalties_more_aggressively_demotes_tests_for_semantic_queries(self): - results = [ - _make_result(path="/repo/tests/test_auth.py", score=5.0), - _make_result(path="/repo/src/auth.py", score=4.1), - ] - - penalized = apply_path_penalties( - results, - "how does auth routing work", - test_file_penalty=0.15, - ) - - assert penalized[0].path == "/repo/src/auth.py" - assert penalized[1].metadata["path_penalty_reasons"] == ["test_file"] - assert penalized[1].metadata["path_penalty_multiplier"] == pytest.approx(0.75) - - def test_apply_path_penalties_boosts_source_definitions_for_identifier_queries(self): - results = [ - _make_result( - path="/repo/tests/test_registry.py", - score=4.2, - excerpt='query="find_descendant_project_roots"', - ), - _make_result( - path="/repo/src/registry.py", - score=3.0, - excerpt="def find_descendant_project_roots(self, source_root: Path) -> list[str]:", - ), - ] - - penalized = apply_path_penalties( - results, - "find_descendant_project_roots", - test_file_penalty=0.15, - ) - - assert penalized[0].path == "/repo/src/registry.py" - assert penalized[0].metadata["path_boost_reasons"] == ["source_definition"] - assert penalized[0].metadata["path_boost_multiplier"] == pytest.approx(2.0) - assert penalized[0].metadata["path_rank_multiplier"] == pytest.approx(2.0) - assert penalized[1].metadata["path_penalty_reasons"] == ["test_file"] - - def test_apply_path_penalties_boosts_source_paths_for_semantic_feature_queries(self): - results = [ - _make_result( - path="/repo/tests/smart-search-intent.test.js", - score=0.832, - excerpt="describes how smart search routes keyword queries", - ), - _make_result( - path="/repo/src/tools/smart-search.ts", - score=0.555, - excerpt="smart search keyword routing logic", - ), - ] - - penalized = apply_path_penalties( - results, - "how does smart search route keyword queries", - test_file_penalty=0.15, - ) - - assert penalized[0].path == "/repo/src/tools/smart-search.ts" - assert penalized[0].metadata["path_boost_reasons"] == ["source_path_topic_overlap"] - assert penalized[0].metadata["path_boost_multiplier"] == pytest.approx(1.35) - assert penalized[0].metadata["path_boost_overlap_tokens"] == ["smart", "search"] - assert penalized[1].metadata["path_penalty_reasons"] == ["test_file"] - - def test_apply_path_penalties_strongly_boosts_keyword_basename_overlap(self): - results = [ - _make_result( - path="/repo/src/tools/core-memory.ts", - score=0.04032417772512223, - excerpt="memory listing helpers", - ), - _make_result( - path="/repo/src/tools/smart-search.ts", - score=0.009836065573770493, - excerpt="smart search keyword routing logic", - ), - ] - - penalized = apply_path_penalties( - results, - "executeHybridMode dense_rerank semantic smart_search", - test_file_penalty=0.15, - ) - - assert penalized[0].path == "/repo/src/tools/smart-search.ts" - assert penalized[0].metadata["path_boost_reasons"] == ["source_path_topic_overlap"] - assert penalized[0].metadata["path_boost_multiplier"] == pytest.approx(4.5) - assert penalized[0].metadata["path_boost_overlap_tokens"] == ["smart", "search"] - - def test_extract_explicit_path_hints_ignores_generic_platform_terms(self): - assert extract_explicit_path_hints( - "parse CodexLens JSON output strip ANSI smart_search", - ) == [["smart", "search"]] - - def test_apply_path_penalties_prefers_explicit_feature_hint_over_platform_terms(self): - results = [ - _make_result( - path="/repo/src/tools/codex-lens-lsp.ts", - score=0.045, - excerpt="CodexLens LSP bridge", - ), - _make_result( - path="/repo/src/tools/smart-search.ts", - score=0.03, - excerpt="parse JSON output and strip ANSI for plain-text fallback", - ), - ] - - penalized = apply_path_penalties( - results, - "parse CodexLens JSON output strip ANSI smart_search", - test_file_penalty=0.15, - ) - - assert penalized[0].path == "/repo/src/tools/smart-search.ts" - assert penalized[0].metadata["path_boost_reasons"] == ["source_path_topic_overlap"] - assert penalized[0].metadata["path_boost_overlap_tokens"] == ["smart", "search"] - - def test_apply_path_penalties_strongly_boosts_lexical_config_modules(self): - results = [ - _make_result( - path="/repo/src/tools/smart-search.ts", - score=22.07, - excerpt="embedding backend local api config routing", - ), - _make_result( - path="/repo/src/codexlens/config.py", - score=4.88, - excerpt="embedding_backend = 'fastembed'", - ), - ] - - penalized = apply_path_penalties( - results, - "embedding backend fastembed local litellm api config", - test_file_penalty=0.15, - ) - - assert penalized[0].path == "/repo/src/codexlens/config.py" - assert penalized[0].metadata["path_boost_reasons"] == ["source_path_topic_overlap"] - assert penalized[0].metadata["path_boost_multiplier"] == pytest.approx(5.0) - assert penalized[0].metadata["path_boost_overlap_tokens"] == ["config"] - - def test_apply_path_penalties_more_aggressively_demotes_tests_for_explicit_feature_queries(self): - results = [ - _make_result( - path="/repo/tests/smart-search-intent.test.js", - score=1.0, - excerpt="smart search intent coverage", - ), - _make_result( - path="/repo/src/tools/smart-search.ts", - score=0.58, - excerpt="plain-text JSON fallback for smart search", - ), - ] - - penalized = apply_path_penalties( - results, - "parse CodexLens JSON output strip ANSI smart_search", - test_file_penalty=0.15, - ) - - assert penalized[0].path == "/repo/src/tools/smart-search.ts" - assert penalized[1].metadata["path_penalty_reasons"] == ["test_file"] - assert penalized[1].metadata["path_penalty_multiplier"] == pytest.approx(0.55) - - def test_apply_path_penalties_demotes_generated_artifacts(self): - results = [ - _make_result(path="/repo/dist/auth.js", score=10.0), - _make_result(path="/repo/src/auth.ts", score=9.0), - ] - - penalized = apply_path_penalties( - results, - "authenticate user", - generated_file_penalty=0.35, - ) - - assert penalized[0].path == "/repo/src/auth.ts" - assert penalized[1].metadata["path_penalty_reasons"] == ["generated_artifact"] - - def test_apply_path_penalties_more_aggressively_demotes_generated_artifacts_for_explicit_feature_queries(self): - results = [ - _make_result( - path="/repo/dist/tools/smart-search.js", - score=1.0, - excerpt="built smart search output", - ), - _make_result( - path="/repo/src/tools/smart-search.ts", - score=0.45, - excerpt="plain-text JSON fallback for smart search", - ), - ] - - penalized = apply_path_penalties( - results, - "parse CodexLens JSON output strip ANSI smart_search", - generated_file_penalty=0.35, - ) - - assert penalized[0].path == "/repo/src/tools/smart-search.ts" - assert penalized[1].metadata["path_penalty_reasons"] == ["generated_artifact"] - assert penalized[1].metadata["path_penalty_multiplier"] == pytest.approx(0.4) - - def test_apply_path_penalties_demotes_auxiliary_reference_files(self): - results = [ - _make_result(path="/repo/examples/simple_search_comparison.py", score=10.0), - _make_result(path="/repo/src/search/router.py", score=9.0), - ] - - penalized = apply_path_penalties( - results, - "how does smart search route keyword queries", - test_file_penalty=0.15, - ) - - assert penalized[0].path == "/repo/src/search/router.py" - assert penalized[1].metadata["path_penalty_reasons"] == ["auxiliary_file"] - - def test_apply_path_penalties_more_aggressively_demotes_auxiliary_files_for_explicit_feature_queries(self): - results = [ - _make_result( - path="/repo/benchmarks/smart_search_demo.py", - score=1.0, - excerpt="demo for smart search fallback", - ), - _make_result( - path="/repo/src/tools/smart-search.ts", - score=0.52, - excerpt="plain-text JSON fallback for smart search", - ), - ] - - penalized = apply_path_penalties( - results, - "parse CodexLens JSON output strip ANSI smart_search", - test_file_penalty=0.15, - ) - - assert penalized[0].path == "/repo/src/tools/smart-search.ts" - assert penalized[1].metadata["path_penalty_reasons"] == ["auxiliary_file"] - assert penalized[1].metadata["path_penalty_multiplier"] == pytest.approx(0.5) - - def test_apply_path_penalties_skips_when_query_targets_tests(self): - results = [ - _make_result(path="/repo/tests/test_auth.py", score=10.0), - _make_result(path="/repo/src/auth.py", score=9.0), - ] - - penalized = apply_path_penalties( - results, - "auth tests", - test_file_penalty=0.15, - ) - - assert penalized[0].path == "/repo/tests/test_auth.py" - - def test_apply_path_penalties_skips_generated_penalty_when_query_targets_artifacts(self): - results = [ - _make_result(path="/repo/dist/auth.js", score=10.0), - _make_result(path="/repo/src/auth.ts", score=9.0), - ] - - penalized = apply_path_penalties( - results, - "dist auth bundle", - generated_file_penalty=0.35, - ) - - assert penalized[0].path == "/repo/dist/auth.js" - - def test_rebalance_noisy_results_pushes_explicit_feature_query_noise_behind_source_files(self): - results = [ - _make_result(path="/repo/src/tools/smart-search.ts", score=0.9), - _make_result(path="/repo/tests/smart-search-intent.test.tsx", score=0.8), - _make_result(path="/repo/src/core/cli-routes.ts", score=0.7), - _make_result(path="/repo/dist/tools/smart-search.js", score=0.6), - _make_result(path="/repo/benchmarks/smart_search_demo.py", score=0.5), - ] - - rebalanced = rebalance_noisy_results( - results, - "parse CodexLens JSON output strip ANSI smart_search", - ) - - assert [item.path for item in rebalanced[:2]] == [ - "/repo/src/tools/smart-search.ts", - "/repo/src/core/cli-routes.ts", - ] - - def test_rebalance_noisy_results_preserves_tests_when_query_targets_them(self): - results = [ - _make_result(path="/repo/tests/smart-search-intent.test.tsx", score=0.9), - _make_result(path="/repo/src/tools/smart-search.ts", score=0.8), - ] - - rebalanced = rebalance_noisy_results(results, "smart search tests") - - assert [item.path for item in rebalanced] == [ - "/repo/tests/smart-search-intent.test.tsx", - "/repo/src/tools/smart-search.ts", - ] - - def test_apply_path_penalties_skips_auxiliary_penalty_when_query_targets_examples(self): - results = [ - _make_result(path="/repo/examples/simple_search_comparison.py", score=10.0), - _make_result(path="/repo/src/search/router.py", score=9.0), - ] - - penalized = apply_path_penalties( - results, - "smart search examples", - test_file_penalty=0.15, - ) - - assert penalized[0].path == "/repo/examples/simple_search_comparison.py" - - -class TestCrossEncoderRerank: - """Tests for cross-encoder reranking edge cases.""" - - def test_cross_encoder_rerank_preserves_strong_source_candidates_for_semantic_feature_queries(self): - class DummyReranker: - def score_pairs(self, pairs, batch_size=32): - _ = (pairs, batch_size) - return [0.8323705792427063, 1.2463066923373844e-05] - - reranked = cross_encoder_rerank( - "how does smart search route keyword queries", - [ - _make_result( - path="/repo/tests/smart-search-intent.test.js", - score=0.5989155769348145, - excerpt="describes how smart search routes keyword queries", - ), - _make_result( - path="/repo/src/tools/smart-search.ts", - score=0.554444432258606, - excerpt="smart search keyword routing logic", - ), - ], - DummyReranker(), - top_k=2, - ) - reranked = apply_path_penalties( - reranked, - "how does smart search route keyword queries", - test_file_penalty=0.15, - ) - - assert reranked[0].path == "/repo/src/tools/smart-search.ts" - assert reranked[0].metadata["cross_encoder_floor_reason"] == "semantic_source_path_overlap" - assert reranked[0].metadata["cross_encoder_floor_overlap_tokens"] == ["smart", "search"] - assert reranked[0].metadata["path_boost_reasons"] == ["source_path_topic_overlap"] - assert reranked[1].metadata["path_penalty_reasons"] == ["test_file"] - -# ============================================================================= -# Tests: get_rrf_weights -# ============================================================================= - - -class TestGetRrfWeights: - """Tests for get_rrf_weights() composite function.""" - - def test_get_rrf_weights_composite(self): - """get_rrf_weights should compose detect_query_intent + adjust_weights_by_intent.""" - base = {"exact": 0.3, "fuzzy": 0.1, "vector": 0.6} - # Keyword-like query - weights = get_rrf_weights("MyClassName", base) - # MyClassName -> KEYWORD -> exact boosted - assert weights["exact"] > weights["fuzzy"] - - -# ============================================================================= -# Tests: reciprocal_rank_fusion -# ============================================================================= - - -class TestReciprocalRankFusion: - """Tests for reciprocal_rank_fusion().""" - - def test_rrf_single_source(self): - """Single source RRF should produce ranked results.""" - results = { - "exact": [ - _make_result(path="a.py", score=10.0), - _make_result(path="b.py", score=5.0), - ] - } - fused = reciprocal_rank_fusion(results) - assert len(fused) == 2 - # a.py should rank higher (rank 1) - assert fused[0].path == "a.py" - assert fused[0].score > fused[1].score - - def test_rrf_multi_source(self): - """Multi-source RRF should combine rankings from multiple sources.""" - results = { - "exact": [ - _make_result(path="a.py", score=10.0), - _make_result(path="b.py", score=5.0), - ], - "vector": [ - _make_result(path="b.py", score=0.9), - _make_result(path="c.py", score=0.8), - ], - } - weights = {"exact": 0.5, "vector": 0.5} - fused = reciprocal_rank_fusion(results, weights=weights) - # b.py appears in both sources - should have highest fusion score - assert len(fused) == 3 - assert fused[0].path == "b.py" - assert fused[0].metadata["fusion_method"] == "rrf" - - def test_rrf_empty_results(self): - """Empty results map should return empty list.""" - assert reciprocal_rank_fusion({}) == [] - - def test_rrf_weight_normalization(self): - """Weights not summing to 1.0 should be auto-normalized.""" - results = { - "exact": [_make_result(path="a.py", score=10.0)], - } - weights = {"exact": 2.0} # Does not sum to 1.0 - fused = reciprocal_rank_fusion(results, weights=weights) - assert len(fused) == 1 - # Result should still be valid after weight normalization - assert fused[0].score > 0 - - -# ============================================================================= -# Tests: simple_weighted_fusion -# ============================================================================= - - -class TestSimpleWeightedFusion: - """Tests for simple_weighted_fusion().""" - - def test_weighted_fusion_basic(self): - """Basic weighted fusion should combine scores.""" - results = { - "exact": [_make_result(path="a.py", score=10.0)], - "vector": [_make_result(path="a.py", score=0.8)], - } - weights = {"exact": 0.5, "vector": 0.5} - fused = simple_weighted_fusion(results, weights=weights) - assert len(fused) == 1 - assert fused[0].path == "a.py" - assert fused[0].metadata["fusion_method"] == "simple_weighted" - assert fused[0].score > 0 - - def test_weighted_fusion_empty(self): - """Empty input should return empty list.""" - assert simple_weighted_fusion({}) == [] - - -# ============================================================================= -# Tests: apply_symbol_boost -# ============================================================================= - - -class TestApplySymbolBoost: - """Tests for apply_symbol_boost().""" - - def test_symbol_boost_applied(self): - """Results with symbol_name should get boosted by factor.""" - results = [ - _make_result(path="a.py", score=0.5, symbol_name="authenticate"), - _make_result(path="b.py", score=0.6), - ] - boosted = apply_symbol_boost(results, boost_factor=1.5) - # a.py has symbol -> gets 1.5x boost -> 0.75 - a_result = next(r for r in boosted if r.path == "a.py") - assert a_result.score == pytest.approx(0.75, abs=0.01) - assert a_result.metadata.get("boosted") is True - - def test_symbol_boost_no_match(self): - """Results without symbol_name should not be boosted.""" - results = [ - _make_result(path="a.py", score=0.5), - ] - boosted = apply_symbol_boost(results, boost_factor=1.5) - assert boosted[0].score == pytest.approx(0.5, abs=0.01) - assert boosted[0].metadata.get("boosted") is not True - - -# ============================================================================= -# Tests: filter_results_by_category -# ============================================================================= - - -class TestFilterResultsByCategory: - """Tests for filter_results_by_category().""" - - def test_filter_keyword_code_only(self): - """KEYWORD intent should return only code files.""" - results = [ - _make_result(path="main.py", score=0.9), - _make_result(path="README.md", score=0.8), - _make_result(path="utils.ts", score=0.7), - ] - filtered = filter_results_by_category(results, QueryIntent.KEYWORD) - paths = [r.path for r in filtered] - assert "README.md" not in paths - assert "main.py" in paths - assert "utils.ts" in paths - - def test_filter_semantic_docs_first(self): - """SEMANTIC intent should put docs before code.""" - results = [ - _make_result(path="main.py", score=0.9), - _make_result(path="README.md", score=0.8), - ] - filtered = filter_results_by_category(results, QueryIntent.SEMANTIC, allow_mixed=True) - # Docs should come first - assert filtered[0].path == "README.md" - - -# ============================================================================= -# Tests: group_similar_results -# ============================================================================= - - -class TestGroupSimilarResults: - """Tests for group_similar_results().""" - - def test_group_similar_results(self): - """Results with same excerpt and close scores should be grouped.""" - results = [ - _make_result(path="a.py", score=0.50, excerpt="def foo():"), - _make_result(path="b.py", score=0.50, excerpt="def foo():"), - _make_result(path="c.py", score=0.30, excerpt="def bar():"), - ] - grouped = group_similar_results(results, score_threshold_abs=0.01) - # a.py and b.py should be grouped (same excerpt, same score) - assert len(grouped) == 2 - # Find the grouped result - grouped_result = next(r for r in grouped if r.path == "a.py") - assert len(grouped_result.additional_locations) == 1 - assert grouped_result.additional_locations[0].path == "b.py" - - -# ============================================================================= -# Tests: normalize_weights -# ============================================================================= - - -class TestNormalizeWeights: - """Tests for normalize_weights().""" - - def test_normalize_weights_zero_total(self): - """All-zero weights should be returned as-is (no division by zero).""" - weights = {"exact": 0.0, "fuzzy": 0.0, "vector": 0.0} - result = normalize_weights(weights) - assert result == {"exact": 0.0, "fuzzy": 0.0, "vector": 0.0} diff --git a/codex-lens/tests/test_recursive_splitting.py b/codex-lens/tests/test_recursive_splitting.py deleted file mode 100644 index 5a3297d3..00000000 --- a/codex-lens/tests/test_recursive_splitting.py +++ /dev/null @@ -1,291 +0,0 @@ -"""Tests for recursive splitting of large symbols in chunker.""" - -import pytest -from codexlens.entities import Symbol -from codexlens.semantic.chunker import Chunker, ChunkConfig - - -class TestRecursiveSplitting: - """Test cases for recursive splitting of large symbols.""" - - def test_small_symbol_no_split(self): - """Test that small symbols are not split.""" - config = ChunkConfig(max_chunk_size=1000, overlap=100) - chunker = Chunker(config) - - content = '''def small_function(): - # This is a small function - x = 1 - y = 2 - return x + y -''' - symbols = [Symbol(name='small_function', kind='function', range=(1, 5))] - - chunks = chunker.chunk_by_symbol(content, symbols, 'test.py', 'python') - - assert len(chunks) == 1 - assert chunks[0].metadata['strategy'] == 'symbol' - assert chunks[0].metadata['symbol_name'] == 'small_function' - assert chunks[0].metadata['symbol_kind'] == 'function' - assert 'parent_symbol_range' not in chunks[0].metadata - - def test_large_symbol_splits(self): - """Test that large symbols are recursively split.""" - config = ChunkConfig(max_chunk_size=100, overlap=20) - chunker = Chunker(config) - - content = '''def large_function(): - # Line 1 - # Line 2 - # Line 3 - # Line 4 - # Line 5 - # Line 6 - # Line 7 - # Line 8 - # Line 9 - # Line 10 - # Line 11 - # Line 12 - # Line 13 - # Line 14 - # Line 15 - pass -''' - symbols = [Symbol(name='large_function', kind='function', range=(1, 18))] - - chunks = chunker.chunk_by_symbol(content, symbols, 'test.py', 'python') - - # Should be split into multiple chunks - assert len(chunks) > 1 - - # All chunks should have symbol metadata - for chunk in chunks: - assert chunk.metadata['strategy'] == 'symbol_split' - assert chunk.metadata['symbol_name'] == 'large_function' - assert chunk.metadata['symbol_kind'] == 'function' - assert chunk.metadata['parent_symbol_range'] == (1, 18) - - def test_boundary_condition(self): - """Test symbol exactly at max_chunk_size boundary.""" - config = ChunkConfig(max_chunk_size=90, overlap=20) - chunker = Chunker(config) - - content = '''def boundary_function(): - # This function is exactly at boundary - x = 1 - y = 2 - return x + y -''' - symbols = [Symbol(name='boundary_function', kind='function', range=(1, 5))] - - chunks = chunker.chunk_by_symbol(content, symbols, 'test.py', 'python') - - # Content is slightly over 90 chars, should be split - assert len(chunks) >= 1 - assert chunks[0].metadata['strategy'] == 'symbol_split' - - def test_multiple_symbols_mixed_sizes(self): - """Test chunking with multiple symbols of different sizes.""" - config = ChunkConfig(max_chunk_size=150, overlap=30) - chunker = Chunker(config) - - content = '''def small(): - return 1 - -def medium(): - # Medium function - x = 1 - y = 2 - z = 3 - return x + y + z - -def very_large(): - # Line 1 - # Line 2 - # Line 3 - # Line 4 - # Line 5 - # Line 6 - # Line 7 - # Line 8 - # Line 9 - # Line 10 - # Line 11 - # Line 12 - # Line 13 - # Line 14 - # Line 15 - pass -''' - symbols = [ - Symbol(name='small', kind='function', range=(1, 2)), - Symbol(name='medium', kind='function', range=(4, 9)), - Symbol(name='very_large', kind='function', range=(11, 28)), - ] - - chunks = chunker.chunk_by_symbol(content, symbols, 'test.py', 'python') - - # Find chunks for each symbol - small_chunks = [c for c in chunks if c.metadata['symbol_name'] == 'small'] - medium_chunks = [c for c in chunks if c.metadata['symbol_name'] == 'medium'] - large_chunks = [c for c in chunks if c.metadata['symbol_name'] == 'very_large'] - - # Small should be filtered (< min_chunk_size) - assert len(small_chunks) == 0 - - # Medium should not be split - assert len(medium_chunks) == 1 - assert medium_chunks[0].metadata['strategy'] == 'symbol' - - # Large should be split - assert len(large_chunks) > 1 - for chunk in large_chunks: - assert chunk.metadata['strategy'] == 'symbol_split' - - def test_line_numbers_preserved(self): - """Test that line numbers are correctly preserved in sub-chunks.""" - config = ChunkConfig(max_chunk_size=100, overlap=20) - chunker = Chunker(config) - - content = '''def large_function(): - # Line 1 with some extra content to make it longer - # Line 2 with some extra content to make it longer - # Line 3 with some extra content to make it longer - # Line 4 with some extra content to make it longer - # Line 5 with some extra content to make it longer - # Line 6 with some extra content to make it longer - # Line 7 with some extra content to make it longer - # Line 8 with some extra content to make it longer - # Line 9 with some extra content to make it longer - # Line 10 with some extra content to make it longer - pass -''' - symbols = [Symbol(name='large_function', kind='function', range=(1, 13))] - - chunks = chunker.chunk_by_symbol(content, symbols, 'test.py', 'python') - - # Verify line numbers are correct and sequential - assert len(chunks) > 1 - assert chunks[0].metadata['start_line'] == 1 - - # Each chunk should have valid line numbers - for chunk in chunks: - assert chunk.metadata['start_line'] >= 1 - assert chunk.metadata['end_line'] <= 13 - assert chunk.metadata['start_line'] <= chunk.metadata['end_line'] - - def test_overlap_in_split_chunks(self): - """Test that overlap is applied when splitting large symbols.""" - config = ChunkConfig(max_chunk_size=100, overlap=30) - chunker = Chunker(config) - - content = '''def large_function(): - # Line 1 - # Line 2 - # Line 3 - # Line 4 - # Line 5 - # Line 6 - # Line 7 - # Line 8 - # Line 9 - # Line 10 - # Line 11 - # Line 12 - pass -''' - symbols = [Symbol(name='large_function', kind='function', range=(1, 14))] - - chunks = chunker.chunk_by_symbol(content, symbols, 'test.py', 'python') - - # With overlap, consecutive chunks should overlap - if len(chunks) > 1: - for i in range(len(chunks) - 1): - # Next chunk should start before current chunk ends (overlap) - current_end = chunks[i].metadata['end_line'] - next_start = chunks[i + 1].metadata['start_line'] - # Overlap should exist - assert next_start <= current_end - - def test_empty_symbol_filtered(self): - """Test that symbols smaller than min_chunk_size are filtered.""" - config = ChunkConfig(max_chunk_size=1000, min_chunk_size=50) - chunker = Chunker(config) - - content = '''def tiny(): - pass -''' - symbols = [Symbol(name='tiny', kind='function', range=(1, 2))] - - chunks = chunker.chunk_by_symbol(content, symbols, 'test.py', 'python') - - # Should be filtered due to min_chunk_size - assert len(chunks) == 0 - - def test_class_symbol_splits(self): - """Test that large class symbols are also split correctly.""" - config = ChunkConfig(max_chunk_size=120, overlap=25) - chunker = Chunker(config) - - content = '''class LargeClass: - """A large class with many methods.""" - - def method1(self): - return 1 - - def method2(self): - return 2 - - def method3(self): - return 3 - - def method4(self): - return 4 -''' - symbols = [Symbol(name='LargeClass', kind='class', range=(1, 14))] - - chunks = chunker.chunk_by_symbol(content, symbols, 'test.py', 'python') - - # Should be split - assert len(chunks) > 1 - - # All chunks should preserve class metadata - for chunk in chunks: - assert chunk.metadata['symbol_name'] == 'LargeClass' - assert chunk.metadata['symbol_kind'] == 'class' - assert chunk.metadata['strategy'] == 'symbol_split' - - -class TestLightweightMode: - """Test recursive splitting with lightweight token counting.""" - - def test_large_symbol_splits_lightweight_mode(self): - """Test that large symbols split correctly in lightweight mode.""" - config = ChunkConfig(max_chunk_size=100, overlap=20, skip_token_count=True) - chunker = Chunker(config) - - content = '''def large_function(): - # Line 1 with some extra content to make it longer - # Line 2 with some extra content to make it longer - # Line 3 with some extra content to make it longer - # Line 4 with some extra content to make it longer - # Line 5 with some extra content to make it longer - # Line 6 with some extra content to make it longer - # Line 7 with some extra content to make it longer - # Line 8 with some extra content to make it longer - # Line 9 with some extra content to make it longer - # Line 10 with some extra content to make it longer - pass -''' - symbols = [Symbol(name='large_function', kind='function', range=(1, 13))] - - chunks = chunker.chunk_by_symbol(content, symbols, 'test.py', 'python') - - # Should split even in lightweight mode - assert len(chunks) > 1 - - # All chunks should have token_count (estimated) - for chunk in chunks: - assert 'token_count' in chunk.metadata - assert chunk.metadata['token_count'] > 0 diff --git a/codex-lens/tests/test_registry.py b/codex-lens/tests/test_registry.py deleted file mode 100644 index b610140a..00000000 --- a/codex-lens/tests/test_registry.py +++ /dev/null @@ -1,126 +0,0 @@ -"""Tests for RegistryStore path handling.""" - -from __future__ import annotations - -from pathlib import Path - -import pytest - -from codexlens.storage.registry import RegistryStore - - -def _swap_case(path: Path) -> str: - return str(path).swapcase() - - -def test_path_case_normalization_windows(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: - """On Windows, path comparisons should be case-insensitive.""" - import codexlens.storage.registry as registry - - monkeypatch.setattr(registry.platform, "system", lambda: "Windows") - - db_path = tmp_path / "registry.db" - source_root = tmp_path / "MyProject" - index_root = tmp_path / "indexes" - - with RegistryStore(db_path=db_path) as store: - store.register_project(source_root, index_root) - - result = store.find_by_source_path(_swap_case(source_root)) - assert result is not None - assert result["source_root"] == str(source_root.resolve()).lower() - - -def test_path_case_sensitivity_non_windows(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: - """On Unix, path comparisons should remain case-sensitive.""" - import codexlens.storage.registry as registry - - monkeypatch.setattr(registry.platform, "system", lambda: "Linux") - - db_path = tmp_path / "registry.db" - source_root = tmp_path / "MyProject" - index_root = tmp_path / "indexes" - - with RegistryStore(db_path=db_path) as store: - store.register_project(source_root, index_root) - assert store.find_by_source_path(_swap_case(source_root)) is None - - -def test_find_nearest_index(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: - """Nearest ancestor lookup should be case-insensitive on Windows.""" - import codexlens.storage.registry as registry - - monkeypatch.setattr(registry.platform, "system", lambda: "Windows") - - db_path = tmp_path / "registry.db" - source_root = tmp_path / "MyProject" - index_root = tmp_path / "indexes" - index_db = index_root / "_index.db" - - with RegistryStore(db_path=db_path) as store: - project = store.register_project(source_root, index_root) - mapping = store.register_dir(project.id, source_root, index_db, depth=0) - - query_path = Path(_swap_case(source_root)) / "SubDir" / "file.py" - found = store.find_nearest_index(query_path) - - assert found is not None - assert found.id == mapping.id - - -def test_find_descendant_project_roots_returns_nested_project_roots(tmp_path: Path) -> None: - db_path = tmp_path / "registry.db" - workspace_root = tmp_path / "workspace" - child_a = workspace_root / "packages" / "app-a" - child_b = workspace_root / "tools" / "app-b" - outside_root = tmp_path / "external" - - with RegistryStore(db_path=db_path) as store: - workspace_project = store.register_project( - workspace_root, - tmp_path / "indexes" / "workspace", - ) - child_a_project = store.register_project( - child_a, - tmp_path / "indexes" / "workspace" / "packages" / "app-a", - ) - child_b_project = store.register_project( - child_b, - tmp_path / "indexes" / "workspace" / "tools" / "app-b", - ) - outside_project = store.register_project( - outside_root, - tmp_path / "indexes" / "external", - ) - - store.register_dir( - workspace_project.id, - workspace_root, - tmp_path / "indexes" / "workspace" / "_index.db", - depth=0, - ) - child_a_mapping = store.register_dir( - child_a_project.id, - child_a, - tmp_path / "indexes" / "workspace" / "packages" / "app-a" / "_index.db", - depth=0, - ) - child_b_mapping = store.register_dir( - child_b_project.id, - child_b, - tmp_path / "indexes" / "workspace" / "tools" / "app-b" / "_index.db", - depth=0, - ) - store.register_dir( - outside_project.id, - outside_root, - tmp_path / "indexes" / "external" / "_index.db", - depth=0, - ) - - descendants = store.find_descendant_project_roots(workspace_root) - - assert [mapping.index_path for mapping in descendants] == [ - child_a_mapping.index_path, - child_b_mapping.index_path, - ] diff --git a/codex-lens/tests/test_reranker_backends.py b/codex-lens/tests/test_reranker_backends.py deleted file mode 100644 index 439631ef..00000000 --- a/codex-lens/tests/test_reranker_backends.py +++ /dev/null @@ -1,115 +0,0 @@ -"""Mocked smoke tests for all reranker backends.""" - -from __future__ import annotations - -import sys -import types -from dataclasses import dataclass - -import pytest - - -def test_reranker_backend_legacy_scores_pairs(monkeypatch: pytest.MonkeyPatch) -> None: - from codexlens.semantic.reranker import legacy as legacy_module - - class DummyCrossEncoder: - def __init__(self, model_name: str, *, device: str | None = None) -> None: - self.model_name = model_name - self.device = device - self.calls: list[dict[str, object]] = [] - - def predict(self, pairs: list[tuple[str, str]], *, batch_size: int = 32) -> list[float]: - self.calls.append({"pairs": list(pairs), "batch_size": int(batch_size)}) - return [0.5 for _ in pairs] - - monkeypatch.setattr(legacy_module, "_CrossEncoder", DummyCrossEncoder) - monkeypatch.setattr(legacy_module, "CROSS_ENCODER_AVAILABLE", True) - monkeypatch.setattr(legacy_module, "_import_error", None) - - reranker = legacy_module.CrossEncoderReranker(model_name="dummy-model", device="cpu") - scores = reranker.score_pairs([("q", "d1"), ("q", "d2")], batch_size=0) - assert scores == pytest.approx([0.5, 0.5]) - - -def test_reranker_backend_onnx_availability_check(monkeypatch: pytest.MonkeyPatch) -> None: - from codexlens.semantic.reranker.onnx_reranker import check_onnx_reranker_available - - dummy_numpy = types.ModuleType("numpy") - dummy_onnxruntime = types.ModuleType("onnxruntime") - - dummy_optimum = types.ModuleType("optimum") - dummy_optimum.__path__ = [] # Mark as package for submodule imports. - dummy_optimum_ort = types.ModuleType("optimum.onnxruntime") - dummy_optimum_ort.ORTModelForSequenceClassification = object() - - dummy_transformers = types.ModuleType("transformers") - dummy_transformers.AutoTokenizer = object() - - monkeypatch.setitem(sys.modules, "numpy", dummy_numpy) - monkeypatch.setitem(sys.modules, "onnxruntime", dummy_onnxruntime) - monkeypatch.setitem(sys.modules, "optimum", dummy_optimum) - monkeypatch.setitem(sys.modules, "optimum.onnxruntime", dummy_optimum_ort) - monkeypatch.setitem(sys.modules, "transformers", dummy_transformers) - - ok, err = check_onnx_reranker_available() - assert ok is True - assert err is None - - -def test_reranker_backend_api_constructs_with_dummy_httpx(monkeypatch: pytest.MonkeyPatch) -> None: - from codexlens.semantic.reranker.api_reranker import APIReranker - - created: list[object] = [] - - class DummyClient: - def __init__( - self, - *, - base_url: str | None = None, - headers: dict[str, str] | None = None, - timeout: float | None = None, - ) -> None: - self.base_url = base_url - self.headers = headers or {} - self.timeout = timeout - self.closed = False - created.append(self) - - def close(self) -> None: - self.closed = True - - dummy_httpx = types.ModuleType("httpx") - dummy_httpx.Client = DummyClient - monkeypatch.setitem(sys.modules, "httpx", dummy_httpx) - - reranker = APIReranker(api_key="k", provider="siliconflow") - assert reranker.provider == "siliconflow" - assert len(created) == 1 - assert created[0].headers["Authorization"] == "Bearer k" - reranker.close() - assert created[0].closed is True - - -def test_reranker_backend_litellm_scores_pairs(monkeypatch: pytest.MonkeyPatch) -> None: - from codexlens.semantic.reranker.litellm_reranker import LiteLLMReranker - - @dataclass(frozen=True, slots=True) - class ChatMessage: - role: str - content: str - - class DummyLiteLLMClient: - def __init__(self, model: str = "default", **_kwargs: object) -> None: - self.model = model - - def chat(self, _messages: list[ChatMessage]) -> object: - return types.SimpleNamespace(content="0.5") - - dummy_litellm = types.ModuleType("ccw_litellm") - dummy_litellm.ChatMessage = ChatMessage - dummy_litellm.LiteLLMClient = DummyLiteLLMClient - monkeypatch.setitem(sys.modules, "ccw_litellm", dummy_litellm) - - reranker = LiteLLMReranker(model="dummy") - assert reranker.score_pairs([("q", "d")]) == pytest.approx([0.5]) - diff --git a/codex-lens/tests/test_reranker_factory.py b/codex-lens/tests/test_reranker_factory.py deleted file mode 100644 index 62647d1d..00000000 --- a/codex-lens/tests/test_reranker_factory.py +++ /dev/null @@ -1,401 +0,0 @@ -"""Tests for reranker factory and availability checks.""" - -from __future__ import annotations - -import builtins -import math -import sys -import types - -import pytest - -from codexlens.semantic.reranker import ( - BaseReranker, - ONNXReranker, - check_reranker_available, - get_reranker, -) -from codexlens.semantic.reranker import legacy as legacy_module - - -def test_public_imports_work() -> None: - from codexlens.semantic.reranker import BaseReranker as ImportedBaseReranker - from codexlens.semantic.reranker import get_reranker as imported_get_reranker - - assert ImportedBaseReranker is BaseReranker - assert imported_get_reranker is get_reranker - - -def test_base_reranker_is_abstract() -> None: - with pytest.raises(TypeError): - BaseReranker() # type: ignore[abstract] - - -def test_check_reranker_available_invalid_backend() -> None: - ok, err = check_reranker_available("nope") - assert ok is False - assert "Invalid reranker backend" in (err or "") - - -def test_get_reranker_invalid_backend_raises_value_error() -> None: - with pytest.raises(ValueError, match="Unknown backend"): - get_reranker("nope") - - -def test_get_reranker_legacy_missing_dependency_raises_import_error( - monkeypatch: pytest.MonkeyPatch, -) -> None: - monkeypatch.setattr(legacy_module, "CROSS_ENCODER_AVAILABLE", False) - monkeypatch.setattr(legacy_module, "_import_error", "missing sentence-transformers") - - with pytest.raises(ImportError, match="missing sentence-transformers"): - get_reranker(backend="legacy", model_name="dummy-model") - - -def test_get_reranker_legacy_returns_cross_encoder_reranker( - monkeypatch: pytest.MonkeyPatch, -) -> None: - class DummyCrossEncoder: - def __init__(self, model_name: str, *, device: str | None = None) -> None: - self.model_name = model_name - self.device = device - self.last_batch_size: int | None = None - - def predict(self, pairs: list[tuple[str, str]], *, batch_size: int = 32) -> list[float]: - self.last_batch_size = int(batch_size) - return [0.5 for _ in pairs] - - monkeypatch.setattr(legacy_module, "_CrossEncoder", DummyCrossEncoder) - monkeypatch.setattr(legacy_module, "CROSS_ENCODER_AVAILABLE", True) - monkeypatch.setattr(legacy_module, "_import_error", None) - - reranker = get_reranker(backend=" LEGACY ", model_name="dummy-model", device="cpu") - assert isinstance(reranker, legacy_module.CrossEncoderReranker) - - assert reranker.score_pairs([]) == [] - - scores = reranker.score_pairs([("q", "d1"), ("q", "d2")], batch_size=0) - assert scores == pytest.approx([0.5, 0.5]) - assert reranker._model is not None - assert reranker._model.last_batch_size == 32 - - -def test_check_reranker_available_onnx_missing_deps(monkeypatch: pytest.MonkeyPatch) -> None: - real_import = builtins.__import__ - - def fake_import(name: str, globals=None, locals=None, fromlist=(), level: int = 0): - if name == "onnxruntime": - raise ImportError("no onnxruntime") - return real_import(name, globals, locals, fromlist, level) - - monkeypatch.setattr(builtins, "__import__", fake_import) - - ok, err = check_reranker_available("onnx") - assert ok is False - assert "onnxruntime not available" in (err or "") - - -def test_check_reranker_available_onnx_deps_present(monkeypatch: pytest.MonkeyPatch) -> None: - dummy_onnxruntime = types.ModuleType("onnxruntime") - dummy_optimum = types.ModuleType("optimum") - dummy_optimum.__path__ = [] # Mark as package for submodule imports. - dummy_optimum_ort = types.ModuleType("optimum.onnxruntime") - dummy_optimum_ort.ORTModelForSequenceClassification = object() - - dummy_transformers = types.ModuleType("transformers") - dummy_transformers.AutoTokenizer = object() - - monkeypatch.setitem(sys.modules, "onnxruntime", dummy_onnxruntime) - monkeypatch.setitem(sys.modules, "optimum", dummy_optimum) - monkeypatch.setitem(sys.modules, "optimum.onnxruntime", dummy_optimum_ort) - monkeypatch.setitem(sys.modules, "transformers", dummy_transformers) - - ok, err = check_reranker_available("onnx") - assert ok is True - assert err is None - - -def test_check_reranker_available_litellm_missing_deps(monkeypatch: pytest.MonkeyPatch) -> None: - real_import = builtins.__import__ - - def fake_import(name: str, globals=None, locals=None, fromlist=(), level: int = 0): - if name == "ccw_litellm": - raise ImportError("no ccw-litellm") - return real_import(name, globals, locals, fromlist, level) - - monkeypatch.setattr(builtins, "__import__", fake_import) - - ok, err = check_reranker_available("litellm") - assert ok is False - assert "ccw-litellm not available" in (err or "") - - -def test_check_reranker_available_litellm_deps_present( - monkeypatch: pytest.MonkeyPatch, -) -> None: - dummy_litellm = types.ModuleType("ccw_litellm") - monkeypatch.setitem(sys.modules, "ccw_litellm", dummy_litellm) - - ok, err = check_reranker_available("litellm") - assert ok is True - assert err is None - - -def test_check_reranker_available_api_missing_deps(monkeypatch: pytest.MonkeyPatch) -> None: - real_import = builtins.__import__ - - def fake_import(name: str, globals=None, locals=None, fromlist=(), level: int = 0): - if name == "httpx": - raise ImportError("no httpx") - return real_import(name, globals, locals, fromlist, level) - - monkeypatch.setattr(builtins, "__import__", fake_import) - - ok, err = check_reranker_available("api") - assert ok is False - assert "httpx not available" in (err or "") - - -def test_check_reranker_available_api_deps_present(monkeypatch: pytest.MonkeyPatch) -> None: - dummy_httpx = types.ModuleType("httpx") - monkeypatch.setitem(sys.modules, "httpx", dummy_httpx) - - ok, err = check_reranker_available("api") - assert ok is True - assert err is None - - -def test_get_reranker_litellm_returns_litellm_reranker( - monkeypatch: pytest.MonkeyPatch, -) -> None: - from dataclasses import dataclass - - @dataclass(frozen=True, slots=True) - class ChatMessage: - role: str - content: str - - class DummyLiteLLMClient: - def __init__(self, model: str = "default", **kwargs) -> None: - self.model = model - self.kwargs = kwargs - - def chat(self, messages, **kwargs): - return types.SimpleNamespace(content="0.5") - - dummy_litellm = types.ModuleType("ccw_litellm") - dummy_litellm.ChatMessage = ChatMessage - dummy_litellm.LiteLLMClient = DummyLiteLLMClient - monkeypatch.setitem(sys.modules, "ccw_litellm", dummy_litellm) - - reranker = get_reranker(backend="litellm", model_name="dummy-model") - - from codexlens.semantic.reranker.litellm_reranker import LiteLLMReranker - - assert isinstance(reranker, LiteLLMReranker) - assert reranker.score_pairs([("q", "d")]) == pytest.approx([0.5]) - - -def test_get_reranker_onnx_raises_import_error_with_dependency_hint( - monkeypatch: pytest.MonkeyPatch, -) -> None: - real_import = builtins.__import__ - - def fake_import(name: str, globals=None, locals=None, fromlist=(), level: int = 0): - if name == "onnxruntime": - raise ImportError("no onnxruntime") - return real_import(name, globals, locals, fromlist, level) - - monkeypatch.setattr(builtins, "__import__", fake_import) - - with pytest.raises(ImportError) as exc: - get_reranker(backend="onnx", model_name="any") - - assert "onnxruntime" in str(exc.value) - - -def test_get_reranker_default_backend_is_onnx(monkeypatch: pytest.MonkeyPatch) -> None: - dummy_onnxruntime = types.ModuleType("onnxruntime") - dummy_optimum = types.ModuleType("optimum") - dummy_optimum.__path__ = [] # Mark as package for submodule imports. - dummy_optimum_ort = types.ModuleType("optimum.onnxruntime") - dummy_optimum_ort.ORTModelForSequenceClassification = object() - - dummy_transformers = types.ModuleType("transformers") - dummy_transformers.AutoTokenizer = object() - - monkeypatch.setitem(sys.modules, "onnxruntime", dummy_onnxruntime) - monkeypatch.setitem(sys.modules, "optimum", dummy_optimum) - monkeypatch.setitem(sys.modules, "optimum.onnxruntime", dummy_optimum_ort) - monkeypatch.setitem(sys.modules, "transformers", dummy_transformers) - - reranker = get_reranker() - assert isinstance(reranker, ONNXReranker) - - -def test_onnx_reranker_scores_pairs_with_sigmoid_normalization( - monkeypatch: pytest.MonkeyPatch, -) -> None: - import numpy as np - - dummy_onnxruntime = types.ModuleType("onnxruntime") - - dummy_optimum = types.ModuleType("optimum") - dummy_optimum.__path__ = [] # Mark as package for submodule imports. - dummy_optimum_ort = types.ModuleType("optimum.onnxruntime") - - class DummyModelOutput: - def __init__(self, logits: np.ndarray) -> None: - self.logits = logits - - class DummyModel: - input_names = ["input_ids", "attention_mask"] - - def __init__(self) -> None: - self.calls: list[int] = [] - self._next_logit = 0 - - def __call__(self, **inputs): - batch = int(inputs["input_ids"].shape[0]) - start = self._next_logit - self._next_logit += batch - self.calls.append(batch) - logits = np.arange(start, start + batch, dtype=np.float32).reshape(batch, 1) - return DummyModelOutput(logits=logits) - - class DummyORTModelForSequenceClassification: - @classmethod - def from_pretrained(cls, model_name: str, providers=None, **kwargs): - _ = model_name, providers, kwargs - return DummyModel() - - dummy_optimum_ort.ORTModelForSequenceClassification = DummyORTModelForSequenceClassification - - dummy_transformers = types.ModuleType("transformers") - - class DummyAutoTokenizer: - model_max_length = 512 - - @classmethod - def from_pretrained(cls, model_name: str, **kwargs): - _ = model_name, kwargs - return cls() - - def __call__(self, *, text, text_pair, return_tensors, **kwargs): - _ = text_pair, kwargs - assert return_tensors == "np" - batch = len(text) - # Include token_type_ids to ensure input filtering is exercised. - return { - "input_ids": np.zeros((batch, 4), dtype=np.int64), - "attention_mask": np.ones((batch, 4), dtype=np.int64), - "token_type_ids": np.zeros((batch, 4), dtype=np.int64), - } - - dummy_transformers.AutoTokenizer = DummyAutoTokenizer - - monkeypatch.setitem(sys.modules, "onnxruntime", dummy_onnxruntime) - monkeypatch.setitem(sys.modules, "optimum", dummy_optimum) - monkeypatch.setitem(sys.modules, "optimum.onnxruntime", dummy_optimum_ort) - monkeypatch.setitem(sys.modules, "transformers", dummy_transformers) - - reranker = get_reranker(backend="onnx", model_name="dummy-model", use_gpu=False) - assert isinstance(reranker, ONNXReranker) - assert reranker._model is None - - pairs = [("q", f"d{idx}") for idx in range(5)] - scores = reranker.score_pairs(pairs, batch_size=2) - - assert reranker._model is not None - assert reranker._model.calls == [2, 2, 1] - assert len(scores) == len(pairs) - assert all(0.0 <= s <= 1.0 for s in scores) - - expected = [1.0 / (1.0 + math.exp(-float(i))) for i in range(len(pairs))] - assert scores == pytest.approx(expected, rel=1e-6, abs=1e-6) - - -def test_onnx_reranker_splits_tuple_providers_into_provider_options( - monkeypatch: pytest.MonkeyPatch, -) -> None: - import numpy as np - - captured: dict[str, object] = {} - - dummy_onnxruntime = types.ModuleType("onnxruntime") - - dummy_optimum = types.ModuleType("optimum") - dummy_optimum.__path__ = [] - dummy_optimum_ort = types.ModuleType("optimum.onnxruntime") - - class DummyModelOutput: - def __init__(self, logits: np.ndarray) -> None: - self.logits = logits - - class DummyModel: - input_names = ["input_ids", "attention_mask"] - - def __call__(self, **inputs): - batch = int(inputs["input_ids"].shape[0]) - return DummyModelOutput(logits=np.zeros((batch, 1), dtype=np.float32)) - - class DummyORTModelForSequenceClassification: - @classmethod - def from_pretrained( - cls, - model_name: str, - providers=None, - provider_options=None, - **kwargs, - ): - captured["model_name"] = model_name - captured["providers"] = providers - captured["provider_options"] = provider_options - captured["kwargs"] = kwargs - return DummyModel() - - dummy_optimum_ort.ORTModelForSequenceClassification = DummyORTModelForSequenceClassification - - dummy_transformers = types.ModuleType("transformers") - - class DummyAutoTokenizer: - model_max_length = 512 - - @classmethod - def from_pretrained(cls, model_name: str, **kwargs): - _ = model_name, kwargs - return cls() - - def __call__(self, *, text, text_pair, return_tensors, **kwargs): - _ = text_pair, kwargs - assert return_tensors == "np" - batch = len(text) - return { - "input_ids": np.zeros((batch, 4), dtype=np.int64), - "attention_mask": np.ones((batch, 4), dtype=np.int64), - } - - dummy_transformers.AutoTokenizer = DummyAutoTokenizer - - monkeypatch.setitem(sys.modules, "onnxruntime", dummy_onnxruntime) - monkeypatch.setitem(sys.modules, "optimum", dummy_optimum) - monkeypatch.setitem(sys.modules, "optimum.onnxruntime", dummy_optimum_ort) - monkeypatch.setitem(sys.modules, "transformers", dummy_transformers) - - reranker = get_reranker( - backend="onnx", - model_name="dummy-model", - use_gpu=True, - providers=[ - ("DmlExecutionProvider", {"device_id": 1}), - "CPUExecutionProvider", - ], - ) - assert isinstance(reranker, ONNXReranker) - - scores = reranker.score_pairs([("q", "d")], batch_size=1) - - assert scores == pytest.approx([0.5]) - assert captured["model_name"] == "dummy-model" - assert captured["providers"] == ["DmlExecutionProvider", "CPUExecutionProvider"] - assert captured["provider_options"] == [{"device_id": 1}, {}] diff --git a/codex-lens/tests/test_result_grouping.py b/codex-lens/tests/test_result_grouping.py deleted file mode 100644 index ee2720d2..00000000 --- a/codex-lens/tests/test_result_grouping.py +++ /dev/null @@ -1,589 +0,0 @@ -"""Multi-level tests for search result grouping functionality. - -Tests cover: -1. Unit tests for group_similar_results function -2. Boundary condition tests -3. Integration tests with SearchOptions -4. Performance/stress tests -""" - -import pytest -from typing import List - -from codexlens.entities import SearchResult, AdditionalLocation -from codexlens.search.ranking import group_similar_results -from codexlens.search.chain_search import SearchOptions - - -# ============================================================================= -# Test Fixtures -# ============================================================================= - -@pytest.fixture -def sample_results() -> List[SearchResult]: - """Create sample search results for testing.""" - return [ - SearchResult(path="a.py", score=0.5, excerpt="def foo(): pass", start_line=10, symbol_name="foo"), - SearchResult(path="b.py", score=0.5, excerpt="def foo(): pass", start_line=20, symbol_name="foo"), - SearchResult(path="c.py", score=0.49, excerpt="def foo(): pass", start_line=30, symbol_name="foo"), - SearchResult(path="d.py", score=0.3, excerpt="def bar(): pass", start_line=40, symbol_name="bar"), - ] - - -@pytest.fixture -def results_with_different_excerpts() -> List[SearchResult]: - """Results with same scores but different content.""" - return [ - SearchResult(path="a.py", score=0.5, excerpt="def foo(): pass"), - SearchResult(path="b.py", score=0.5, excerpt="def bar(): pass"), - SearchResult(path="c.py", score=0.5, excerpt="def baz(): pass"), - ] - - -@pytest.fixture -def results_with_same_excerpt_different_scores() -> List[SearchResult]: - """Results with same content but very different scores.""" - return [ - SearchResult(path="a.py", score=0.9, excerpt="def foo(): pass"), - SearchResult(path="b.py", score=0.5, excerpt="def foo(): pass"), - SearchResult(path="c.py", score=0.1, excerpt="def foo(): pass"), - ] - - -# ============================================================================= -# Level 1: Unit Tests - Basic Functionality -# ============================================================================= - -class TestGroupSimilarResultsBasic: - """Basic unit tests for group_similar_results function.""" - - def test_empty_results_returns_empty(self): - """Empty input should return empty output.""" - result = group_similar_results([]) - assert result == [] - - def test_single_result_returns_unchanged(self): - """Single result should be returned as-is.""" - single = SearchResult(path="test.py", score=0.5, excerpt="code") - result = group_similar_results([single]) - - assert len(result) == 1 - assert result[0].path == "test.py" - assert result[0].additional_locations == [] - - def test_groups_identical_excerpt_similar_score(self, sample_results): - """Results with same excerpt and similar scores should be grouped.""" - grouped = group_similar_results(sample_results, score_threshold_abs=0.02) - - # Should have 2 groups: foo group (a, b, c) and bar (d) - assert len(grouped) == 2 - - # First group should have additional locations - foo_group = next(r for r in grouped if r.excerpt == "def foo(): pass") - assert len(foo_group.additional_locations) == 2 - - # Second group (bar) should have no additional locations - bar_group = next(r for r in grouped if r.excerpt == "def bar(): pass") - assert len(bar_group.additional_locations) == 0 - - def test_preserves_highest_score_as_representative(self, sample_results): - """Representative result should have the highest score in group.""" - grouped = group_similar_results(sample_results, score_threshold_abs=0.02) - - foo_group = next(r for r in grouped if r.excerpt == "def foo(): pass") - # a.py has score 0.5, which is highest - assert foo_group.path == "a.py" - assert foo_group.score == 0.5 - - def test_additional_locations_contain_correct_info(self, sample_results): - """Additional locations should contain correct path, score, line info.""" - grouped = group_similar_results(sample_results, score_threshold_abs=0.02) - - foo_group = next(r for r in grouped if r.excerpt == "def foo(): pass") - locations = foo_group.additional_locations - - paths = {loc.path for loc in locations} - assert "b.py" in paths - assert "c.py" in paths - - # Check that start_line is preserved - for loc in locations: - if loc.path == "b.py": - assert loc.start_line == 20 - elif loc.path == "c.py": - assert loc.start_line == 30 - - -# ============================================================================= -# Level 2: Boundary Condition Tests -# ============================================================================= - -class TestGroupSimilarResultsBoundary: - """Boundary condition tests for edge cases.""" - - def test_threshold_zero_no_grouping(self): - """With threshold=0, only exactly equal scores should group.""" - results = [ - SearchResult(path="a.py", score=0.5, excerpt="def foo()"), - SearchResult(path="b.py", score=0.5, excerpt="def foo()"), - SearchResult(path="c.py", score=0.50001, excerpt="def foo()"), # Slightly different - ] - - grouped = group_similar_results(results, score_threshold_abs=0.0) - - # a and b should group (exact same score), c should be separate - assert len(grouped) == 2 - - main_group = next(r for r in grouped if len(r.additional_locations) > 0) - assert len(main_group.additional_locations) == 1 - - def test_threshold_exact_boundary(self): - """Test behavior at exact threshold boundary. - - Note: Due to floating-point precision, 0.5 - 0.49 = 0.010000000000000009 - which is slightly > 0.01, so they won't group with threshold=0.01. - Use a slightly larger threshold to account for floating-point precision. - """ - results = [ - SearchResult(path="a.py", score=0.5, excerpt="def foo()"), - SearchResult(path="b.py", score=0.49, excerpt="def foo()"), # 0.01 diff (floating-point) - SearchResult(path="c.py", score=0.48, excerpt="def foo()"), # 0.02 diff from a - ] - - # With threshold 0.011 (slightly above floating-point 0.01), a and b should group - grouped = group_similar_results(results, score_threshold_abs=0.011) - - # a groups with b, c is separate (0.02 from a, 0.01 from b) - # After a+b group, c is compared with remaining and forms its own group - assert len(grouped) == 2 - - # Verify a is representative (highest score) - main_group = next(r for r in grouped if r.score == 0.5) - assert main_group.path == "a.py" - assert len(main_group.additional_locations) == 1 - assert main_group.additional_locations[0].path == "b.py" - - def test_large_threshold_groups_all(self): - """Very large threshold should group all same-content results.""" - results = [ - SearchResult(path="a.py", score=0.9, excerpt="def foo()"), - SearchResult(path="b.py", score=0.1, excerpt="def foo()"), - ] - - grouped = group_similar_results(results, score_threshold_abs=1.0) - - assert len(grouped) == 1 - assert len(grouped[0].additional_locations) == 1 - - def test_none_excerpt_not_grouped(self): - """Results with None excerpt should not be grouped.""" - results = [ - SearchResult(path="a.py", score=0.5, excerpt=None), - SearchResult(path="b.py", score=0.5, excerpt=None), - ] - - grouped = group_similar_results(results) - - # None excerpts can't be grouped by content - assert len(grouped) == 2 - for r in grouped: - assert len(r.additional_locations) == 0 - - def test_empty_excerpt_not_grouped(self): - """Results with empty string excerpt should not be grouped.""" - results = [ - SearchResult(path="a.py", score=0.5, excerpt=""), - SearchResult(path="b.py", score=0.5, excerpt=""), - SearchResult(path="c.py", score=0.5, excerpt=" "), # Whitespace only - ] - - grouped = group_similar_results(results) - - # Empty/whitespace excerpts can't be grouped - assert len(grouped) == 3 - - def test_different_excerpts_not_grouped(self, results_with_different_excerpts): - """Results with different excerpts should not be grouped even with same score.""" - grouped = group_similar_results(results_with_different_excerpts, score_threshold_abs=1.0) - - # Different content = no grouping - assert len(grouped) == 3 - for r in grouped: - assert len(r.additional_locations) == 0 - - def test_same_excerpt_different_scores_creates_subgroups(self, results_with_same_excerpt_different_scores): - """Same content but very different scores should create separate subgroups.""" - grouped = group_similar_results( - results_with_same_excerpt_different_scores, - score_threshold_abs=0.1 - ) - - # Scores 0.9, 0.5, 0.1 with threshold 0.1 - # 0.9 and 0.5 differ by 0.4 > 0.1, so separate - # 0.5 and 0.1 differ by 0.4 > 0.1, so separate - assert len(grouped) == 3 - - -# ============================================================================= -# Level 3: Content Field Tests -# ============================================================================= - -class TestGroupSimilarResultsContentField: - """Tests for different content_field options.""" - - def test_group_by_content_field(self): - """Should be able to group by 'content' field instead of 'excerpt'.""" - results = [ - SearchResult(path="a.py", score=0.5, excerpt="short", content="full content here"), - SearchResult(path="b.py", score=0.5, excerpt="different", content="full content here"), - ] - - # Group by excerpt - different excerpts, no grouping - grouped_by_excerpt = group_similar_results(results, content_field="excerpt") - assert len(grouped_by_excerpt) == 2 - - # Group by content - same content, should group - grouped_by_content = group_similar_results(results, content_field="content") - assert len(grouped_by_content) == 1 - assert len(grouped_by_content[0].additional_locations) == 1 - - def test_fallback_when_content_field_missing(self): - """Results without the specified content field should not be grouped.""" - results = [ - SearchResult(path="a.py", score=0.5, content=None), - SearchResult(path="b.py", score=0.5, content=None), - ] - - grouped = group_similar_results(results, content_field="content") - - # None content = ungroupable - assert len(grouped) == 2 - - -# ============================================================================= -# Level 4: Metadata and Ordering Tests -# ============================================================================= - -class TestGroupSimilarResultsMetadata: - """Tests for metadata handling and result ordering.""" - - def test_grouped_count_in_metadata(self, sample_results): - """Grouped results should have grouped_count in metadata.""" - grouped = group_similar_results(sample_results, score_threshold_abs=0.02) - - foo_group = next(r for r in grouped if r.excerpt == "def foo(): pass") - - assert "grouped_count" in foo_group.metadata - assert foo_group.metadata["grouped_count"] == 3 # a, b, c - - def test_preserves_original_metadata(self): - """Original metadata should be preserved in grouped result.""" - results = [ - SearchResult( - path="a.py", - score=0.5, - excerpt="def foo()", - metadata={"original_key": "original_value", "fusion_score": 0.5} - ), - SearchResult(path="b.py", score=0.5, excerpt="def foo()"), - ] - - grouped = group_similar_results(results, score_threshold_abs=0.1) - - assert grouped[0].metadata["original_key"] == "original_value" - assert grouped[0].metadata["fusion_score"] == 0.5 - - def test_results_sorted_by_score_descending(self): - """Final results should be sorted by score descending.""" - results = [ - SearchResult(path="low.py", score=0.1, excerpt="low"), - SearchResult(path="high.py", score=0.9, excerpt="high"), - SearchResult(path="mid.py", score=0.5, excerpt="mid"), - ] - - grouped = group_similar_results(results) - - scores = [r.score for r in grouped] - assert scores == sorted(scores, reverse=True) - assert scores == [0.9, 0.5, 0.1] - - -# ============================================================================= -# Level 5: Integration Tests with SearchOptions -# ============================================================================= - -class TestSearchOptionsGrouping: - """Integration tests for SearchOptions grouping configuration.""" - - def test_search_options_default_grouping_disabled(self): - """Default SearchOptions should have grouping disabled.""" - options = SearchOptions() - - assert options.group_results is False - assert options.grouping_threshold == 0.01 - - def test_search_options_enable_grouping(self): - """SearchOptions should allow enabling grouping.""" - options = SearchOptions(group_results=True) - - assert options.group_results is True - - def test_search_options_custom_threshold(self): - """SearchOptions should allow custom grouping threshold.""" - options = SearchOptions(group_results=True, grouping_threshold=0.05) - - assert options.grouping_threshold == 0.05 - - def test_search_options_all_parameters(self): - """SearchOptions should work with all parameters combined.""" - options = SearchOptions( - depth=3, - max_workers=4, - limit_per_dir=20, - total_limit=200, - include_symbols=True, - hybrid_mode=True, - group_results=True, - grouping_threshold=0.02, - ) - - assert options.depth == 3 - assert options.group_results is True - assert options.grouping_threshold == 0.02 - - -# ============================================================================= -# Level 6: AdditionalLocation Entity Tests -# ============================================================================= - -class TestAdditionalLocationEntity: - """Tests for AdditionalLocation entity model.""" - - def test_create_minimal_additional_location(self): - """Create AdditionalLocation with minimal required fields.""" - loc = AdditionalLocation(path="test.py", score=0.5) - - assert loc.path == "test.py" - assert loc.score == 0.5 - assert loc.start_line is None - assert loc.end_line is None - assert loc.symbol_name is None - - def test_create_full_additional_location(self): - """Create AdditionalLocation with all fields.""" - loc = AdditionalLocation( - path="test.py", - score=0.75, - start_line=10, - end_line=20, - symbol_name="my_function" - ) - - assert loc.path == "test.py" - assert loc.score == 0.75 - assert loc.start_line == 10 - assert loc.end_line == 20 - assert loc.symbol_name == "my_function" - - def test_additional_location_path_required(self): - """Path should be required for AdditionalLocation.""" - with pytest.raises(Exception): # ValidationError - AdditionalLocation(score=0.5) - - def test_additional_location_score_required(self): - """Score should be required for AdditionalLocation.""" - with pytest.raises(Exception): # ValidationError - AdditionalLocation(path="test.py") - - def test_additional_location_score_non_negative(self): - """Score should be non-negative.""" - with pytest.raises(Exception): # ValidationError - AdditionalLocation(path="test.py", score=-0.1) - - def test_additional_location_serialization(self): - """AdditionalLocation should serialize correctly.""" - loc = AdditionalLocation( - path="test.py", - score=0.5, - start_line=10, - symbol_name="func" - ) - - data = loc.model_dump() - - assert data["path"] == "test.py" - assert data["score"] == 0.5 - assert data["start_line"] == 10 - assert data["symbol_name"] == "func" - - -# ============================================================================= -# Level 7: SearchResult with AdditionalLocations Tests -# ============================================================================= - -class TestSearchResultWithAdditionalLocations: - """Tests for SearchResult entity with additional_locations field.""" - - def test_search_result_default_empty_locations(self): - """SearchResult should have empty additional_locations by default.""" - result = SearchResult(path="test.py", score=0.5) - - assert result.additional_locations == [] - - def test_search_result_with_additional_locations(self): - """SearchResult should accept additional_locations.""" - locations = [ - AdditionalLocation(path="other.py", score=0.4, start_line=5), - ] - - result = SearchResult( - path="main.py", - score=0.5, - additional_locations=locations - ) - - assert len(result.additional_locations) == 1 - assert result.additional_locations[0].path == "other.py" - - def test_search_result_serialization_with_locations(self): - """SearchResult with additional_locations should serialize correctly.""" - locations = [ - AdditionalLocation(path="loc1.py", score=0.4), - AdditionalLocation(path="loc2.py", score=0.3), - ] - - result = SearchResult( - path="main.py", - score=0.5, - excerpt="code", - additional_locations=locations - ) - - data = result.model_dump() - - assert len(data["additional_locations"]) == 2 - assert data["additional_locations"][0]["path"] == "loc1.py" - assert data["additional_locations"][1]["path"] == "loc2.py" - - -# ============================================================================= -# Level 8: Stress/Performance Tests -# ============================================================================= - -class TestGroupSimilarResultsPerformance: - """Performance and stress tests.""" - - def test_handles_large_result_set(self): - """Should handle large number of results efficiently.""" - # Create 1000 results with 100 different excerpts - results = [] - for i in range(1000): - excerpt_id = i % 100 - results.append(SearchResult( - path=f"file_{i}.py", - score=0.5 + (i % 10) * 0.01, # Scores vary slightly - excerpt=f"def func_{excerpt_id}(): pass", - start_line=i, - )) - - grouped = group_similar_results(results, score_threshold_abs=0.05) - - # Should reduce to approximately 100 groups (one per excerpt) - # with some variation due to score subgrouping - assert len(grouped) <= 200 - assert len(grouped) >= 50 # At least some grouping happened - - def test_handles_all_identical_results(self): - """Should handle case where all results are identical.""" - results = [ - SearchResult(path=f"file_{i}.py", score=0.5, excerpt="same code") - for i in range(100) - ] - - grouped = group_similar_results(results, score_threshold_abs=0.01) - - # All should be grouped into one - assert len(grouped) == 1 - assert len(grouped[0].additional_locations) == 99 - - def test_handles_all_unique_results(self): - """Should handle case where all results are unique.""" - results = [ - SearchResult(path=f"file_{i}.py", score=0.5, excerpt=f"unique_{i}") - for i in range(100) - ] - - grouped = group_similar_results(results, score_threshold_abs=0.01) - - # None should be grouped - assert len(grouped) == 100 - for r in grouped: - assert len(r.additional_locations) == 0 - - -# ============================================================================= -# Level 9: Real-world Scenario Tests -# ============================================================================= - -class TestGroupSimilarResultsRealWorld: - """Tests simulating real-world usage scenarios.""" - - def test_rrf_fusion_scores_grouping(self): - """Test with typical RRF fusion score ranges (0.001 - 0.02).""" - results = [ - SearchResult(path="auth/login.py", score=0.0164, excerpt="def authenticate():"), - SearchResult(path="auth/oauth.py", score=0.0163, excerpt="def authenticate():"), - SearchResult(path="auth/basic.py", score=0.0162, excerpt="def authenticate():"), - SearchResult(path="utils/helper.py", score=0.0082, excerpt="def helper():"), - ] - - # RRF scores are typically very small, use appropriate threshold - grouped = group_similar_results(results, score_threshold_abs=0.001) - - assert len(grouped) == 2 - - auth_group = next(r for r in grouped if "auth" in r.path) - assert len(auth_group.additional_locations) == 2 - - def test_duplicate_code_detection(self): - """Simulate detecting duplicate code across files.""" - duplicate_code = """ -def calculate_total(items): - return sum(item.price for item in items) -""" - results = [ - SearchResult(path="orders/service.py", score=0.5, excerpt=duplicate_code, start_line=45), - SearchResult(path="cart/calculator.py", score=0.5, excerpt=duplicate_code, start_line=12), - SearchResult(path="invoices/generator.py", score=0.5, excerpt=duplicate_code, start_line=78), - ] - - grouped = group_similar_results(results, score_threshold_abs=0.01) - - # All duplicates should be grouped - assert len(grouped) == 1 - assert len(grouped[0].additional_locations) == 2 - - # Can identify all locations - all_paths = {grouped[0].path} | {loc.path for loc in grouped[0].additional_locations} - assert all_paths == {"orders/service.py", "cart/calculator.py", "invoices/generator.py"} - - def test_mixed_relevance_results(self): - """Test with mixed relevance results typical of code search.""" - results = [ - # High relevance group - exact match - SearchResult(path="core.py", score=0.9, excerpt="def process():"), - SearchResult(path="core_v2.py", score=0.89, excerpt="def process():"), - # Medium relevance - partial match - SearchResult(path="utils.py", score=0.5, excerpt="def process_data():"), - # Low relevance - tangential - SearchResult(path="test.py", score=0.2, excerpt="def test_process():"), - ] - - grouped = group_similar_results(results, score_threshold_abs=0.02) - - # core.py and core_v2.py should group (same excerpt, similar score) - # Others should remain separate (different excerpts) - assert len(grouped) == 3 - - high_rel = next(r for r in grouped if r.score >= 0.89) - assert len(high_rel.additional_locations) == 1 diff --git a/codex-lens/tests/test_rrf_fusion.py b/codex-lens/tests/test_rrf_fusion.py deleted file mode 100644 index 762d4b54..00000000 --- a/codex-lens/tests/test_rrf_fusion.py +++ /dev/null @@ -1,584 +0,0 @@ -"""Tests for Reciprocal Rank Fusion (RRF) algorithm (P2). - -Tests RRF fusion logic, score computation, weight handling, and result ranking. -""" - -import math - -import pytest - -from codexlens.entities import SearchResult -from codexlens.search.ranking import ( - apply_symbol_boost, - QueryIntent, - detect_query_intent, - normalize_bm25_score, - normalize_weights, - reciprocal_rank_fusion, - rerank_results, - tag_search_source, -) - - -class TestReciprocalRankFusion: - """Tests for reciprocal_rank_fusion function.""" - - def test_single_source_ranking(self): - """Test RRF with single source returns ranked results.""" - results = [ - SearchResult(path="a.py", score=10.0, excerpt="..."), - SearchResult(path="b.py", score=8.0, excerpt="..."), - SearchResult(path="c.py", score=6.0, excerpt="..."), - ] - results_map = {"exact": results} - - fused = reciprocal_rank_fusion(results_map) - - assert len(fused) == 3 - # Order should be preserved (highest original score first) - assert fused[0].path == "a.py" - assert fused[1].path == "b.py" - assert fused[2].path == "c.py" - - def test_two_sources_fusion(self): - """Test RRF combines rankings from two sources.""" - exact_results = [ - SearchResult(path="a.py", score=10.0, excerpt="..."), - SearchResult(path="b.py", score=8.0, excerpt="..."), - SearchResult(path="c.py", score=6.0, excerpt="..."), - ] - fuzzy_results = [ - SearchResult(path="b.py", score=9.0, excerpt="..."), - SearchResult(path="c.py", score=7.0, excerpt="..."), - SearchResult(path="d.py", score=5.0, excerpt="..."), - ] - results_map = {"exact": exact_results, "fuzzy": fuzzy_results} - - fused = reciprocal_rank_fusion(results_map) - - # Should have all unique paths - paths = [r.path for r in fused] - assert set(paths) == {"a.py", "b.py", "c.py", "d.py"} - - # Results appearing in both should rank higher - # b.py and c.py appear in both sources - assert fused[0].path in ["b.py", "c.py"], "Items in both sources should rank highest" - - def test_rrf_score_calculation(self): - """Test RRF scores are calculated correctly with default k=60.""" - # Simple scenario: single source - results = [SearchResult(path="a.py", score=10.0, excerpt="...")] - results_map = {"exact": results} - - fused = reciprocal_rank_fusion(results_map, k=60) - - # RRF score = weight / (k + rank) = 1.0 / (60 + 1) ≈ 0.0164 - expected_score = 1.0 / 61 - assert abs(fused[0].score - expected_score) < 0.001 - - def test_custom_weights(self): - """Test custom weights affect RRF scores.""" - results_a = [SearchResult(path="a.py", score=10.0, excerpt="...")] - results_b = [SearchResult(path="a.py", score=10.0, excerpt="...")] - - results_map = {"exact": results_a, "fuzzy": results_b} - - # Higher weight for exact - weights = {"exact": 0.7, "fuzzy": 0.3} - fused = reciprocal_rank_fusion(results_map, weights=weights, k=60) - - # Score should be: 0.7/(60+1) + 0.3/(60+1) = 1.0/61 ≈ 0.0164 - expected_score = (0.7 + 0.3) / 61 - assert abs(fused[0].score - expected_score) < 0.001 - - def test_weight_normalization(self): - """Test weights are normalized to sum to 1.0.""" - results = [SearchResult(path="a.py", score=10.0, excerpt="...")] - results_map = {"exact": results} - - # Weights not summing to 1.0 - weights = {"exact": 2.0} # Will be normalized to 1.0 - fused = reciprocal_rank_fusion(results_map, weights=weights) - - # Should work without error and produce normalized scores - assert len(fused) == 1 - assert fused[0].score > 0 - - def test_empty_results_map(self): - """Test RRF with empty results returns empty list.""" - fused = reciprocal_rank_fusion({}) - assert fused == [] - - def test_zero_weight_source_ignored(self): - """Test sources with zero weight are ignored.""" - results_a = [SearchResult(path="a.py", score=10.0, excerpt="...")] - results_b = [SearchResult(path="b.py", score=10.0, excerpt="...")] - - results_map = {"exact": results_a, "fuzzy": results_b} - weights = {"exact": 1.0, "fuzzy": 0.0} # Ignore fuzzy - - fused = reciprocal_rank_fusion(results_map, weights=weights) - - # Should only have result from exact source - assert len(fused) == 1 - assert fused[0].path == "a.py" - - def test_fusion_score_in_metadata(self): - """Test fusion score is stored in result metadata.""" - results = [SearchResult(path="a.py", score=10.0, excerpt="...")] - results_map = {"exact": results} - - fused = reciprocal_rank_fusion(results_map) - - # Check metadata - assert "fusion_score" in fused[0].metadata - assert "original_score" in fused[0].metadata - assert fused[0].metadata["original_score"] == 10.0 - - def test_rank_order_matters(self): - """Test rank position affects RRF score (lower rank = higher score).""" - results = [ - SearchResult(path="a.py", score=10.0, excerpt="..."), # rank 1 - SearchResult(path="b.py", score=8.0, excerpt="..."), # rank 2 - SearchResult(path="c.py", score=6.0, excerpt="..."), # rank 3 - ] - results_map = {"exact": results} - - fused = reciprocal_rank_fusion(results_map, k=60) - - # a.py (rank 1): score = 1/(60+1) ≈ 0.0164 - # b.py (rank 2): score = 1/(60+2) ≈ 0.0161 - # c.py (rank 3): score = 1/(60+3) ≈ 0.0159 - assert fused[0].score > fused[1].score > fused[2].score - - -class TestRRFSyntheticRankings: - """Tests with synthetic rankings to verify RRF correctness.""" - - def test_perfect_agreement(self): - """Test RRF when all sources rank items identically.""" - # All sources rank a > b > c - exact = [ - SearchResult(path="a.py", score=10.0, excerpt="..."), - SearchResult(path="b.py", score=8.0, excerpt="..."), - SearchResult(path="c.py", score=6.0, excerpt="..."), - ] - fuzzy = [ - SearchResult(path="a.py", score=9.0, excerpt="..."), - SearchResult(path="b.py", score=7.0, excerpt="..."), - SearchResult(path="c.py", score=5.0, excerpt="..."), - ] - - results_map = {"exact": exact, "fuzzy": fuzzy} - fused = reciprocal_rank_fusion(results_map) - - # Order should match both sources - assert fused[0].path == "a.py" - assert fused[1].path == "b.py" - assert fused[2].path == "c.py" - - def test_complete_disagreement(self): - """Test RRF when sources have opposite rankings.""" - # exact: a > b > c - # fuzzy: c > b > a - exact = [ - SearchResult(path="a.py", score=10.0, excerpt="..."), - SearchResult(path="b.py", score=8.0, excerpt="..."), - SearchResult(path="c.py", score=6.0, excerpt="..."), - ] - fuzzy = [ - SearchResult(path="c.py", score=9.0, excerpt="..."), - SearchResult(path="b.py", score=7.0, excerpt="..."), - SearchResult(path="a.py", score=5.0, excerpt="..."), - ] - - results_map = {"exact": exact, "fuzzy": fuzzy} - fused = reciprocal_rank_fusion(results_map) - - # With opposite rankings, a.py and c.py get equal RRF scores: - # a.py: 0.5/(60+1) + 0.5/(60+3) = 0.01613 - # c.py: 0.5/(60+3) + 0.5/(60+1) = 0.01613 (same!) - # b.py: 0.5/(60+2) + 0.5/(60+2) = 0.01613 (slightly lower due to rounding) - # So top result should be a.py or c.py (tied) - assert fused[0].path in ["a.py", "c.py"], "Items with symmetric ranks should tie for first" - - def test_partial_overlap(self): - """Test RRF with partial overlap between sources.""" - # exact: [A, B, C] - # fuzzy: [B, C, D] - exact = [ - SearchResult(path="A", score=10.0, excerpt="..."), - SearchResult(path="B", score=8.0, excerpt="..."), - SearchResult(path="C", score=6.0, excerpt="..."), - ] - fuzzy = [ - SearchResult(path="B", score=9.0, excerpt="..."), - SearchResult(path="C", score=7.0, excerpt="..."), - SearchResult(path="D", score=5.0, excerpt="..."), - ] - - results_map = {"exact": exact, "fuzzy": fuzzy} - fused = reciprocal_rank_fusion(results_map) - - # B and C appear in both, should rank higher than A and D - paths = [r.path for r in fused] - b_idx = paths.index("B") - c_idx = paths.index("C") - a_idx = paths.index("A") - d_idx = paths.index("D") - - assert b_idx < a_idx, "B (in both) should outrank A (in one)" - assert c_idx < d_idx, "C (in both) should outrank D (in one)" - - def test_three_sources(self): - """Test RRF with three sources (exact, fuzzy, vector).""" - exact = [SearchResult(path="a.py", score=10.0, excerpt="...")] - fuzzy = [SearchResult(path="b.py", score=9.0, excerpt="...")] - vector = [SearchResult(path="c.py", score=8.0, excerpt="...")] - - results_map = {"exact": exact, "fuzzy": fuzzy, "vector": vector} - weights = {"exact": 0.3, "fuzzy": 0.1, "vector": 0.6} - - fused = reciprocal_rank_fusion(results_map, weights=weights) - - assert len(fused) == 3 - # Each appears in one source only, so scores differ by weights - # c.py: 0.6/61 ≈ 0.0098 (vector, highest weight) - # a.py: 0.3/61 ≈ 0.0049 (exact) - # b.py: 0.1/61 ≈ 0.0016 (fuzzy) - assert fused[0].path == "c.py", "Vector (higher weight) should rank first" - - -class TestNormalizeBM25Score: - """Tests for normalize_bm25_score function.""" - - def test_negative_bm25_normalization(self): - """Test BM25 scores (negative) are normalized to 0-1 range.""" - # SQLite FTS5 returns negative BM25 scores - scores = [-20.0, -10.0, -5.0, -1.0, 0.0] - - for score in scores: - normalized = normalize_bm25_score(score) - assert 0.0 <= normalized <= 1.0, f"Normalized score {normalized} out of range" - - def test_better_match_higher_score(self): - """Test more negative BM25 (better match) gives higher normalized score.""" - good_match = -15.0 - weak_match = -2.0 - - norm_good = normalize_bm25_score(good_match) - norm_weak = normalize_bm25_score(weak_match) - - assert norm_good > norm_weak, "Better match should have higher normalized score" - - def test_zero_score(self): - """Test zero BM25 score normalization.""" - normalized = normalize_bm25_score(0.0) - assert 0.0 <= normalized <= 1.0 - - def test_positive_score_handling(self): - """Test positive scores (edge case) are handled.""" - normalized = normalize_bm25_score(5.0) - # Should still be in valid range - assert 0.0 <= normalized <= 1.0 - - -class TestNormalizeWeights: - """Tests for normalize_weights function.""" - - def test_normalize_weights_with_nan(self): - """NaN total returns unchanged weights without division.""" - weights = {"exact": float("nan"), "fuzzy": None} - - normalized = normalize_weights(weights) - - assert normalized is not weights - assert set(normalized.keys()) == set(weights.keys()) - assert math.isnan(normalized["exact"]) - assert normalized["fuzzy"] is None - - def test_normalize_weights_with_infinity(self): - """Infinity total returns unchanged weights without division.""" - weights = {"exact": float("inf"), "fuzzy": None} - - normalized = normalize_weights(weights) - - assert normalized is not weights - assert normalized == weights - - def test_normalize_weights_with_all_none(self): - """All-None weights return unchanged weights without division.""" - weights = {"exact": None, "fuzzy": None} - - normalized = normalize_weights(weights) - - assert normalized is not weights - assert normalized == weights - - def test_normalize_weights_with_zero_total(self): - """Zero total returns unchanged weights without division.""" - weights = {"exact": 0.0, "fuzzy": 0.0} - - normalized = normalize_weights(weights) - - assert normalized is not weights - assert normalized == weights - - def test_normalize_weights_with_negative_total(self): - """Negative total returns unchanged weights without division.""" - weights = {"exact": -1.0, "fuzzy": -0.5} - - normalized = normalize_weights(weights) - - assert normalized is not weights - assert normalized == weights - - def test_normalize_weights_valid_total_normalizes(self): - """Valid finite positive total performs normalization correctly.""" - weights = {"exact": 2.0, "fuzzy": 1.0} - - normalized = normalize_weights(weights) - - assert normalized is not weights - assert normalized["exact"] == pytest.approx(2.0 / 3.0) - assert normalized["fuzzy"] == pytest.approx(1.0 / 3.0) - assert (normalized["exact"] + normalized["fuzzy"]) == pytest.approx(1.0) - - -class TestTagSearchSource: - """Tests for tag_search_source function.""" - - def test_tagging_adds_source_metadata(self): - """Test tagging adds search_source to metadata.""" - results = [ - SearchResult(path="a.py", score=10.0, excerpt="..."), - SearchResult(path="b.py", score=8.0, excerpt="..."), - ] - - tagged = tag_search_source(results, "exact") - - for result in tagged: - assert "search_source" in result.metadata - assert result.metadata["search_source"] == "exact" - - def test_tagging_preserves_existing_metadata(self): - """Test tagging preserves existing metadata fields.""" - results = [ - SearchResult( - path="a.py", - score=10.0, - excerpt="...", - metadata={"custom_field": "value"} - ), - ] - - tagged = tag_search_source(results, "fuzzy") - - assert "custom_field" in tagged[0].metadata - assert tagged[0].metadata["custom_field"] == "value" - assert "search_source" in tagged[0].metadata - assert tagged[0].metadata["search_source"] == "fuzzy" - - def test_tagging_empty_list(self): - """Test tagging empty list returns empty list.""" - tagged = tag_search_source([], "exact") - assert tagged == [] - - def test_tagging_preserves_result_fields(self): - """Test tagging preserves all SearchResult fields.""" - results = [ - SearchResult( - path="a.py", - score=10.0, - excerpt="test excerpt", - content="full content", - start_line=10, - end_line=20, - symbol_name="test_func", - symbol_kind="function" - ), - ] - - tagged = tag_search_source(results, "exact") - - assert tagged[0].path == "a.py" - assert tagged[0].score == 10.0 - assert tagged[0].excerpt == "test excerpt" - assert tagged[0].content == "full content" - assert tagged[0].start_line == 10 - assert tagged[0].end_line == 20 - assert tagged[0].symbol_name == "test_func" - assert tagged[0].symbol_kind == "function" - - -class TestSymbolBoost: - """Tests for apply_symbol_boost function.""" - - def test_symbol_boost(self): - results = [ - SearchResult(path="a.py", score=0.2, excerpt="...", symbol_name="foo"), - SearchResult(path="b.py", score=0.21, excerpt="..."), - ] - - boosted = apply_symbol_boost(results, boost_factor=1.5) - - assert boosted[0].path == "a.py" - assert boosted[0].score == pytest.approx(0.2 * 1.5) - assert boosted[0].metadata["boosted"] is True - assert boosted[0].metadata["original_fusion_score"] == pytest.approx(0.2) - - assert boosted[1].path == "b.py" - assert boosted[1].score == pytest.approx(0.21) - assert "boosted" not in boosted[1].metadata - - -class TestEmbeddingReranking: - """Tests for rerank_results embedding-based similarity.""" - - def test_rerank_embedding_similarity(self): - class DummyEmbedder: - def embed(self, texts): - if isinstance(texts, str): - texts = [texts] - mapping = { - "query": [1.0, 0.0], - "doc1": [1.0, 0.0], - "doc2": [0.0, 1.0], - } - return [mapping[t] for t in texts] - - results = [ - SearchResult(path="a.py", score=0.2, excerpt="doc1"), - SearchResult(path="b.py", score=0.9, excerpt="doc2"), - ] - - reranked = rerank_results("query", results, DummyEmbedder(), top_k=2) - - assert reranked[0].path == "a.py" - assert reranked[0].metadata["reranked"] is True - assert reranked[0].metadata["rrf_score"] == pytest.approx(0.2) - assert reranked[0].metadata["cosine_similarity"] == pytest.approx(1.0) - assert reranked[0].score == pytest.approx(0.5 * 0.2 + 0.5 * 1.0) - - assert reranked[1].path == "b.py" - assert reranked[1].metadata["reranked"] is True - assert reranked[1].metadata["rrf_score"] == pytest.approx(0.9) - assert reranked[1].metadata["cosine_similarity"] == pytest.approx(0.0) - assert reranked[1].score == pytest.approx(0.5 * 0.9 + 0.5 * 0.0) - - -@pytest.mark.parametrize("k_value", [30, 60, 100]) -class TestRRFParameterized: - """Parameterized tests for RRF with different k values.""" - - def test_k_value_affects_scores(self, k_value): - """Test k parameter affects RRF score magnitude.""" - results = [SearchResult(path="a.py", score=10.0, excerpt="...")] - results_map = {"exact": results} - - fused = reciprocal_rank_fusion(results_map, k=k_value) - - # Score should be 1.0 / (k + 1) - expected = 1.0 / (k_value + 1) - assert abs(fused[0].score - expected) < 0.001 - - -class TestRRFEdgeCases: - """Edge case tests for RRF.""" - - def test_duplicate_paths_in_same_source(self): - """Test handling of duplicate paths in single source.""" - results = [ - SearchResult(path="a.py", score=10.0, excerpt="..."), - SearchResult(path="a.py", score=8.0, excerpt="..."), # Duplicate - ] - results_map = {"exact": results} - - fused = reciprocal_rank_fusion(results_map) - - # Should deduplicate (first occurrence wins) - assert len(fused) == 1 - assert fused[0].path == "a.py" - - def test_very_large_result_lists(self): - """Test RRF handles large result sets efficiently.""" - # Create 1000 results - results = [ - SearchResult(path=f"file{i}.py", score=1000-i, excerpt="...") - for i in range(1000) - ] - results_map = {"exact": results} - - fused = reciprocal_rank_fusion(results_map) - - assert len(fused) == 1000 - # Should maintain ranking - assert fused[0].path == "file0.py" - assert fused[-1].path == "file999.py" - - def test_all_same_score(self): - """Test RRF when all results have same original score.""" - results = [ - SearchResult(path="a.py", score=10.0, excerpt="..."), - SearchResult(path="b.py", score=10.0, excerpt="..."), - SearchResult(path="c.py", score=10.0, excerpt="..."), - ] - results_map = {"exact": results} - - fused = reciprocal_rank_fusion(results_map) - - # Should still rank by position (rank matters) - assert len(fused) == 3 - assert fused[0].score > fused[1].score > fused[2].score - - def test_missing_weight_for_source(self): - """Test missing weight for source uses default.""" - results = [SearchResult(path="a.py", score=10.0, excerpt="...")] - results_map = {"exact": results, "fuzzy": results} - - # Only provide weight for exact - weights = {"exact": 1.0} - - fused = reciprocal_rank_fusion(results_map, weights=weights) - - # Should work with normalization - assert len(fused) == 1 # Deduplicated - assert fused[0].score > 0 - - -class TestSymbolBoostAndIntentV1: - """Tests for symbol boosting and query intent detection (v1.0).""" - - def test_symbol_boost_application(self): - """Results with symbol_name receive a multiplicative boost (default 1.5x).""" - results = [ - SearchResult(path="a.py", score=0.4, excerpt="...", symbol_name="AuthManager"), - SearchResult(path="b.py", score=0.41, excerpt="..."), - ] - - boosted = apply_symbol_boost(results, boost_factor=1.5) - - assert boosted[0].score == pytest.approx(0.4 * 1.5) - assert boosted[0].metadata["boosted"] is True - assert boosted[0].metadata["original_fusion_score"] == pytest.approx(0.4) - assert boosted[1].score == pytest.approx(0.41) - assert "boosted" not in boosted[1].metadata - - @pytest.mark.parametrize( - ("query", "expected"), - [ - ("def authenticate", QueryIntent.KEYWORD), - ("MyClass", QueryIntent.KEYWORD), - ("user_id", QueryIntent.KEYWORD), - ("UserService::authenticate", QueryIntent.KEYWORD), - ("ptr->next", QueryIntent.KEYWORD), - ("how to handle user login", QueryIntent.SEMANTIC), - ("what is authentication?", QueryIntent.SEMANTIC), - ("where is this used?", QueryIntent.SEMANTIC), - ("why does FooBar crash?", QueryIntent.MIXED), - ("how to use user_id in query", QueryIntent.MIXED), - ], - ) - def test_query_intent_detection(self, query, expected): - """Detect intent for representative queries (Python/TypeScript parity).""" - assert detect_query_intent(query) == expected diff --git a/codex-lens/tests/test_schema_cleanup_migration.py b/codex-lens/tests/test_schema_cleanup_migration.py deleted file mode 100644 index e7848f33..00000000 --- a/codex-lens/tests/test_schema_cleanup_migration.py +++ /dev/null @@ -1,308 +0,0 @@ -""" -Test migration 005: Schema cleanup for unused/redundant fields. - -Tests that migration 005 successfully removes: -1. semantic_metadata.keywords (replaced by file_keywords) -2. symbols.token_count (unused) -3. symbols.symbol_type (redundant with kind) -4. subdirs.direct_files (unused) -""" - -import sqlite3 -import tempfile -from pathlib import Path - -import pytest - -from codexlens.storage.dir_index import DirIndexStore -from codexlens.entities import Symbol - - -class TestSchemaCleanupMigration: - """Test schema cleanup migration (v4 -> latest).""" - - def test_migration_from_v4_to_v5(self): - """Test that migration successfully removes deprecated fields.""" - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "_index.db" - store = DirIndexStore(db_path) - - # Create v4 schema manually (with deprecated fields) - conn = sqlite3.connect(db_path) - conn.row_factory = sqlite3.Row - cursor = conn.cursor() - - # Set schema version to 4 - cursor.execute("PRAGMA user_version = 4") - - # Create v4 schema with deprecated fields - cursor.execute(""" - CREATE TABLE files ( - id INTEGER PRIMARY KEY, - name TEXT NOT NULL, - full_path TEXT UNIQUE NOT NULL, - language TEXT, - content TEXT, - mtime REAL, - line_count INTEGER - ) - """) - - cursor.execute(""" - CREATE TABLE subdirs ( - id INTEGER PRIMARY KEY, - name TEXT NOT NULL UNIQUE, - index_path TEXT NOT NULL, - files_count INTEGER DEFAULT 0, - direct_files INTEGER DEFAULT 0, - last_updated REAL - ) - """) - - cursor.execute(""" - CREATE TABLE symbols ( - id INTEGER PRIMARY KEY, - file_id INTEGER REFERENCES files(id) ON DELETE CASCADE, - name TEXT NOT NULL, - kind TEXT NOT NULL, - start_line INTEGER, - end_line INTEGER, - token_count INTEGER, - symbol_type TEXT - ) - """) - - cursor.execute(""" - CREATE TABLE semantic_metadata ( - id INTEGER PRIMARY KEY, - file_id INTEGER UNIQUE REFERENCES files(id) ON DELETE CASCADE, - summary TEXT, - keywords TEXT, - purpose TEXT, - llm_tool TEXT, - generated_at REAL - ) - """) - - cursor.execute(""" - CREATE TABLE keywords ( - id INTEGER PRIMARY KEY, - keyword TEXT NOT NULL UNIQUE - ) - """) - - cursor.execute(""" - CREATE TABLE file_keywords ( - file_id INTEGER NOT NULL, - keyword_id INTEGER NOT NULL, - PRIMARY KEY (file_id, keyword_id), - FOREIGN KEY (file_id) REFERENCES files (id) ON DELETE CASCADE, - FOREIGN KEY (keyword_id) REFERENCES keywords (id) ON DELETE CASCADE - ) - """) - - # Insert test data - cursor.execute( - "INSERT INTO files (name, full_path, language, content, mtime, line_count) VALUES (?, ?, ?, ?, ?, ?)", - ("test.py", "/test/test.py", "python", "def test(): pass", 1234567890.0, 1) - ) - file_id = cursor.lastrowid - - cursor.execute( - "INSERT INTO symbols (file_id, name, kind, start_line, end_line, token_count, symbol_type) VALUES (?, ?, ?, ?, ?, ?, ?)", - (file_id, "test", "function", 1, 1, 10, "function") - ) - - cursor.execute( - "INSERT INTO semantic_metadata (file_id, summary, keywords, purpose, llm_tool, generated_at) VALUES (?, ?, ?, ?, ?, ?)", - (file_id, "Test function", '["test", "example"]', "Testing", "gemini", 1234567890.0) - ) - - cursor.execute( - "INSERT INTO subdirs (name, index_path, files_count, direct_files, last_updated) VALUES (?, ?, ?, ?, ?)", - ("subdir", "/test/subdir/_index.db", 5, 2, 1234567890.0) - ) - - conn.commit() - conn.close() - - # Now initialize store - this should trigger migration - store.initialize() - - # Verify schema version is now the latest - conn = store._get_connection() - version_row = conn.execute("PRAGMA user_version").fetchone() - assert version_row[0] == DirIndexStore.SCHEMA_VERSION, ( - f"Expected schema version {DirIndexStore.SCHEMA_VERSION}, got {version_row[0]}" - ) - - # Check that deprecated columns are removed - # 1. Check semantic_metadata doesn't have keywords column - cursor = conn.execute("PRAGMA table_info(semantic_metadata)") - columns = {row[1] for row in cursor.fetchall()} - assert "keywords" not in columns, "semantic_metadata.keywords should be removed" - assert "summary" in columns, "semantic_metadata.summary should exist" - assert "purpose" in columns, "semantic_metadata.purpose should exist" - - # 2. Check symbols doesn't have token_count or symbol_type - cursor = conn.execute("PRAGMA table_info(symbols)") - columns = {row[1] for row in cursor.fetchall()} - assert "token_count" not in columns, "symbols.token_count should be removed" - assert "symbol_type" not in columns, "symbols.symbol_type should be removed" - assert "kind" in columns, "symbols.kind should exist" - - # 3. Check subdirs doesn't have direct_files - cursor = conn.execute("PRAGMA table_info(subdirs)") - columns = {row[1] for row in cursor.fetchall()} - assert "direct_files" not in columns, "subdirs.direct_files should be removed" - assert "files_count" in columns, "subdirs.files_count should exist" - - # 4. Verify data integrity - data should be preserved - semantic = store.get_semantic_metadata(file_id) - assert semantic is not None, "Semantic metadata should be preserved" - assert semantic["summary"] == "Test function" - assert semantic["purpose"] == "Testing" - # Keywords should now come from file_keywords table (empty after migration since we didn't populate it) - assert isinstance(semantic["keywords"], list) - - store.close() - - def test_new_database_has_clean_schema(self): - """Test that new databases are created with clean schema (latest).""" - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - - conn = store._get_connection() - - # Verify schema version is the latest - version_row = conn.execute("PRAGMA user_version").fetchone() - assert version_row[0] == DirIndexStore.SCHEMA_VERSION - - # Check that new schema doesn't have deprecated columns - cursor = conn.execute("PRAGMA table_info(semantic_metadata)") - columns = {row[1] for row in cursor.fetchall()} - assert "keywords" not in columns - - cursor = conn.execute("PRAGMA table_info(symbols)") - columns = {row[1] for row in cursor.fetchall()} - assert "token_count" not in columns - assert "symbol_type" not in columns - - cursor = conn.execute("PRAGMA table_info(subdirs)") - columns = {row[1] for row in cursor.fetchall()} - assert "direct_files" not in columns - - store.close() - - def test_semantic_metadata_keywords_from_normalized_table(self): - """Test that keywords are read from file_keywords table, not JSON column.""" - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - - # Add a file - file_id = store.add_file( - name="test.py", - full_path="/test/test.py", - content="def test(): pass", - language="python", - symbols=[] - ) - - # Add semantic metadata with keywords - store.add_semantic_metadata( - file_id=file_id, - summary="Test function", - keywords=["test", "example", "function"], - purpose="Testing", - llm_tool="gemini" - ) - - # Retrieve and verify keywords come from normalized table - semantic = store.get_semantic_metadata(file_id) - assert semantic is not None - assert sorted(semantic["keywords"]) == ["example", "function", "test"] - - # Verify keywords are in normalized tables - conn = store._get_connection() - keyword_count = conn.execute( - """SELECT COUNT(*) FROM file_keywords WHERE file_id = ?""", - (file_id,) - ).fetchone()[0] - assert keyword_count == 3 - - store.close() - - def test_symbols_insert_without_deprecated_fields(self): - """Test that symbols can be inserted without token_count and symbol_type.""" - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - - # Add file with symbols - symbols = [ - Symbol(name="test_func", kind="function", range=(1, 5)), - Symbol(name="TestClass", kind="class", range=(7, 20)), - ] - - file_id = store.add_file( - name="test.py", - full_path="/test/test.py", - content="def test_func(): pass\n\nclass TestClass:\n pass", - language="python", - symbols=symbols - ) - - # Verify symbols were inserted - conn = store._get_connection() - symbol_rows = conn.execute( - "SELECT name, kind, start_line, end_line FROM symbols WHERE file_id = ?", - (file_id,) - ).fetchall() - - assert len(symbol_rows) == 2 - assert symbol_rows[0]["name"] == "test_func" - assert symbol_rows[0]["kind"] == "function" - assert symbol_rows[1]["name"] == "TestClass" - assert symbol_rows[1]["kind"] == "class" - - store.close() - - def test_subdir_operations_without_direct_files(self): - """Test that subdir operations work without direct_files field.""" - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - - # Register subdir (direct_files parameter is ignored) - store.register_subdir( - name="subdir", - index_path="/test/subdir/_index.db", - files_count=10, - direct_files=5 # This should be ignored - ) - - # Retrieve and verify - subdir = store.get_subdir("subdir") - assert subdir is not None - assert subdir.name == "subdir" - assert subdir.files_count == 10 - assert not hasattr(subdir, "direct_files") # Should not have this attribute - - # Update stats (direct_files parameter is ignored) - store.update_subdir_stats("subdir", files_count=15, direct_files=7) - - # Verify update - subdir = store.get_subdir("subdir") - assert subdir.files_count == 15 - - store.close() - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/codex-lens/tests/test_search_comparison.py b/codex-lens/tests/test_search_comparison.py deleted file mode 100644 index 878dea23..00000000 --- a/codex-lens/tests/test_search_comparison.py +++ /dev/null @@ -1,540 +0,0 @@ -"""Comprehensive comparison test for vector search vs hybrid search. - -This test diagnoses why vector search returns empty results and compares -performance between different search modes. -""" - -import json -import sqlite3 -import tempfile -import time -from pathlib import Path -from typing import Dict, List, Any - -import pytest - -from codexlens.entities import SearchResult -from codexlens.search.hybrid_search import HybridSearchEngine -from codexlens.storage.dir_index import DirIndexStore - -# Check semantic search availability -try: - from codexlens.semantic.embedder import Embedder - from codexlens.semantic.vector_store import VectorStore - from codexlens.semantic import SEMANTIC_AVAILABLE - SEMANTIC_DEPS_AVAILABLE = SEMANTIC_AVAILABLE -except ImportError: - SEMANTIC_DEPS_AVAILABLE = False - - -class TestSearchComparison: - """Comprehensive comparison of search modes.""" - - @pytest.fixture - def sample_project_db(self): - """Create sample project database with semantic chunks.""" - with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir: - db_path = Path(tmpdir) / "_index.db" - - store = DirIndexStore(db_path) - store.initialize() - - # Sample files with varied content for testing - sample_files = { - "src/auth/authentication.py": """ -def authenticate_user(username: str, password: str) -> bool: - '''Authenticate user with credentials using bcrypt hashing. - - This function validates user credentials against the database - and returns True if authentication succeeds. - ''' - hashed = hash_password(password) - return verify_credentials(username, hashed) - -def hash_password(password: str) -> str: - '''Hash password using bcrypt algorithm.''' - import bcrypt - return bcrypt.hashpw(password.encode(), bcrypt.gensalt()).decode() - -def verify_credentials(user: str, pwd_hash: str) -> bool: - '''Verify user credentials against database.''' - # Database verification logic - return True -""", - "src/auth/authorization.py": """ -def authorize_action(user_id: int, resource: str, action: str) -> bool: - '''Authorize user action on resource using role-based access control. - - Checks if user has permission to perform action on resource - based on their assigned roles. - ''' - roles = get_user_roles(user_id) - permissions = get_role_permissions(roles) - return has_permission(permissions, resource, action) - -def get_user_roles(user_id: int) -> List[str]: - '''Fetch user roles from database.''' - return ["user", "admin"] - -def has_permission(permissions, resource, action) -> bool: - '''Check if permissions allow action on resource.''' - return True -""", - "src/models/user.py": """ -from dataclasses import dataclass -from typing import Optional - -@dataclass -class User: - '''User model representing application users. - - Stores user profile information and authentication state. - ''' - id: int - username: str - email: str - password_hash: str - is_active: bool = True - - def authenticate(self, password: str) -> bool: - '''Authenticate this user with password.''' - from auth.authentication import verify_credentials - return verify_credentials(self.username, password) - - def has_role(self, role: str) -> bool: - '''Check if user has specific role.''' - return True -""", - "src/api/user_api.py": """ -from flask import Flask, request, jsonify -from models.user import User - -app = Flask(__name__) - -@app.route('/api/user/', methods=['GET']) -def get_user(user_id: int): - '''Get user by ID from database. - - Returns user profile information as JSON. - ''' - user = User.query.get(user_id) - return jsonify(user.to_dict()) - -@app.route('/api/user/login', methods=['POST']) -def login(): - '''User login endpoint using username and password. - - Authenticates user and returns session token. - ''' - data = request.json - username = data.get('username') - password = data.get('password') - - if authenticate_user(username, password): - token = generate_session_token(username) - return jsonify({'token': token}) - return jsonify({'error': 'Invalid credentials'}), 401 -""", - "tests/test_auth.py": """ -import pytest -from auth.authentication import authenticate_user, hash_password - -class TestAuthentication: - '''Test authentication functionality.''' - - def test_authenticate_valid_user(self): - '''Test authentication with valid credentials.''' - assert authenticate_user("testuser", "password123") == True - - def test_authenticate_invalid_user(self): - '''Test authentication with invalid credentials.''' - assert authenticate_user("invalid", "wrong") == False - - def test_password_hashing(self): - '''Test password hashing produces unique hashes.''' - hash1 = hash_password("password") - hash2 = hash_password("password") - assert hash1 != hash2 # Salts should differ -""", - } - - # Insert files into database - with store._get_connection() as conn: - for file_path, content in sample_files.items(): - name = file_path.split('/')[-1] - lang = "python" - conn.execute( - """INSERT INTO files (name, full_path, content, language, mtime) - VALUES (?, ?, ?, ?, ?)""", - (name, file_path, content, lang, time.time()) - ) - conn.commit() - - yield db_path - store.close() - - def _check_semantic_chunks_table(self, db_path: Path) -> Dict[str, Any]: - """Check if semantic_chunks table exists and has data.""" - with sqlite3.connect(db_path) as conn: - cursor = conn.execute( - "SELECT name FROM sqlite_master WHERE type='table' AND name='semantic_chunks'" - ) - table_exists = cursor.fetchone() is not None - - chunk_count = 0 - if table_exists: - cursor = conn.execute("SELECT COUNT(*) FROM semantic_chunks") - chunk_count = cursor.fetchone()[0] - - return { - "table_exists": table_exists, - "chunk_count": chunk_count, - } - - def _create_vector_index(self, db_path: Path) -> Dict[str, Any]: - """Create vector embeddings for indexed files.""" - if not SEMANTIC_DEPS_AVAILABLE: - return { - "success": False, - "error": "Semantic dependencies not available", - "chunks_created": 0, - } - - try: - from codexlens.semantic.chunker import Chunker, ChunkConfig - - # Initialize embedder and vector store - embedder = Embedder(profile="code") - vector_store = VectorStore(db_path) - chunker = Chunker(config=ChunkConfig(max_chunk_size=2000)) - - # Read files from database - with sqlite3.connect(db_path) as conn: - conn.row_factory = sqlite3.Row - cursor = conn.execute("SELECT full_path, content FROM files") - files = cursor.fetchall() - - chunks_created = 0 - for file_row in files: - file_path = file_row["full_path"] - content = file_row["content"] - - # Create semantic chunks using sliding window - chunks = chunker.chunk_sliding_window( - content, - file_path=file_path, - language="python" - ) - - # Generate embeddings - for chunk in chunks: - embedding = embedder.embed_single(chunk.content) - chunk.embedding = embedding - - # Store chunks - if chunks: # Only store if we have chunks - vector_store.add_chunks(chunks, file_path) - chunks_created += len(chunks) - - return { - "success": True, - "chunks_created": chunks_created, - "files_processed": len(files), - } - except Exception as exc: - return { - "success": False, - "error": str(exc), - "chunks_created": 0, - } - - def _run_search_mode( - self, - db_path: Path, - query: str, - mode: str, - limit: int = 10, - ) -> Dict[str, Any]: - """Run search in specified mode and collect metrics.""" - engine = HybridSearchEngine() - - # Map mode to parameters - pure_vector = False - if mode == "exact": - enable_fuzzy, enable_vector = False, False - elif mode == "fuzzy": - enable_fuzzy, enable_vector = True, False - elif mode == "vector": - enable_fuzzy, enable_vector = False, True - pure_vector = True # Use pure vector mode for vector-only search - elif mode == "hybrid": - enable_fuzzy, enable_vector = True, True - else: - raise ValueError(f"Invalid mode: {mode}") - - # Measure search time - start_time = time.time() - try: - results = engine.search( - db_path, - query, - limit=limit, - enable_fuzzy=enable_fuzzy, - enable_vector=enable_vector, - pure_vector=pure_vector, - ) - elapsed_ms = (time.time() - start_time) * 1000 - - return { - "success": True, - "mode": mode, - "query": query, - "result_count": len(results), - "elapsed_ms": elapsed_ms, - "results": [ - { - "path": r.path, - "score": r.score, - "excerpt": r.excerpt[:100] if r.excerpt else "", - "source": getattr(r, "search_source", None), - } - for r in results[:5] # Top 5 results - ], - } - except Exception as exc: - elapsed_ms = (time.time() - start_time) * 1000 - return { - "success": False, - "mode": mode, - "query": query, - "error": str(exc), - "elapsed_ms": elapsed_ms, - "result_count": 0, - } - - @pytest.mark.skipif(not SEMANTIC_DEPS_AVAILABLE, reason="Semantic dependencies not available") - def test_full_search_comparison_with_vectors(self, sample_project_db): - """Complete search comparison test with vector embeddings.""" - db_path = sample_project_db - - # Step 1: Check initial state - print("\n=== Step 1: Checking initial database state ===") - initial_state = self._check_semantic_chunks_table(db_path) - print(f"Table exists: {initial_state['table_exists']}") - print(f"Chunk count: {initial_state['chunk_count']}") - - # Step 2: Create vector index - print("\n=== Step 2: Creating vector embeddings ===") - vector_result = self._create_vector_index(db_path) - print(f"Success: {vector_result['success']}") - if vector_result['success']: - print(f"Chunks created: {vector_result['chunks_created']}") - print(f"Files processed: {vector_result['files_processed']}") - else: - print(f"Error: {vector_result.get('error', 'Unknown')}") - - # Step 3: Verify vector index was created - print("\n=== Step 3: Verifying vector index ===") - final_state = self._check_semantic_chunks_table(db_path) - print(f"Table exists: {final_state['table_exists']}") - print(f"Chunk count: {final_state['chunk_count']}") - - # Step 4: Run comparison tests - print("\n=== Step 4: Running search mode comparison ===") - test_queries = [ - "authenticate user credentials", # Semantic query - "authentication", # Keyword query - "password hashing bcrypt", # Multi-term query - ] - - comparison_results = [] - for query in test_queries: - print(f"\n--- Query: '{query}' ---") - for mode in ["exact", "fuzzy", "vector", "hybrid"]: - result = self._run_search_mode(db_path, query, mode, limit=10) - comparison_results.append(result) - - print(f"\n{mode.upper()} mode:") - print(f" Success: {result['success']}") - print(f" Results: {result['result_count']}") - print(f" Time: {result['elapsed_ms']:.2f}ms") - if result['success'] and result['result_count'] > 0: - print(f" Top result: {result['results'][0]['path']}") - print(f" Score: {result['results'][0]['score']:.3f}") - print(f" Source: {result['results'][0]['source']}") - elif not result['success']: - print(f" Error: {result.get('error', 'Unknown')}") - - # Step 5: Generate comparison report - print("\n=== Step 5: Comparison Summary ===") - - # Group by mode - mode_stats = {} - for result in comparison_results: - mode = result['mode'] - if mode not in mode_stats: - mode_stats[mode] = { - "total_searches": 0, - "successful_searches": 0, - "total_results": 0, - "total_time_ms": 0, - "empty_results": 0, - } - - stats = mode_stats[mode] - stats["total_searches"] += 1 - if result['success']: - stats["successful_searches"] += 1 - stats["total_results"] += result['result_count'] - if result['result_count'] == 0: - stats["empty_results"] += 1 - stats["total_time_ms"] += result['elapsed_ms'] - - # Print summary table - print("\nMode | Queries | Success | Avg Results | Avg Time | Empty Results") - print("-" * 75) - for mode in ["exact", "fuzzy", "vector", "hybrid"]: - if mode in mode_stats: - stats = mode_stats[mode] - avg_results = stats["total_results"] / stats["total_searches"] - avg_time = stats["total_time_ms"] / stats["total_searches"] - print( - f"{mode:9} | {stats['total_searches']:7} | " - f"{stats['successful_searches']:7} | {avg_results:11.1f} | " - f"{avg_time:8.1f}ms | {stats['empty_results']:13}" - ) - - # Assertions - assert initial_state is not None - if vector_result['success']: - assert final_state['chunk_count'] > 0, "Vector index should contain chunks" - - # Find vector search results - vector_results = [r for r in comparison_results if r['mode'] == 'vector'] - if vector_results: - # At least one vector search should return results if index was created - has_vector_results = any(r.get('result_count', 0) > 0 for r in vector_results) - if not has_vector_results: - print("\n⚠️ WARNING: Vector index created but vector search returned no results!") - print("This indicates a potential issue with vector search implementation.") - - def test_search_comparison_without_vectors(self, sample_project_db): - """Search comparison test without vector embeddings (baseline).""" - db_path = sample_project_db - - print("\n=== Testing search without vector embeddings ===") - - # Check state - state = self._check_semantic_chunks_table(db_path) - print(f"Semantic chunks table exists: {state['table_exists']}") - print(f"Chunk count: {state['chunk_count']}") - - # Run exact and fuzzy searches only - test_queries = ["authentication", "user password", "bcrypt hash"] - - for query in test_queries: - print(f"\n--- Query: '{query}' ---") - for mode in ["exact", "fuzzy"]: - result = self._run_search_mode(db_path, query, mode, limit=10) - - print(f"{mode.upper()}: {result['result_count']} results in {result['elapsed_ms']:.2f}ms") - if result['success'] and result['result_count'] > 0: - print(f" Top: {result['results'][0]['path']} (score: {result['results'][0]['score']:.3f})") - - # Test vector search without embeddings (should return empty) - print(f"\n--- Testing vector search without embeddings ---") - vector_result = self._run_search_mode(db_path, "authentication", "vector", limit=10) - print(f"Vector search result count: {vector_result['result_count']}") - print(f"This is expected to be 0 without embeddings: {vector_result['result_count'] == 0}") - - assert vector_result['result_count'] == 0, \ - "Vector search should return empty results when no embeddings exist" - - -class TestDiagnostics: - """Diagnostic tests to identify specific issues.""" - - @pytest.fixture - def empty_db(self): - """Create empty database.""" - with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: - db_path = Path(f.name) - - store = DirIndexStore(db_path) - store.initialize() - store.close() - - yield db_path - if db_path.exists(): - for attempt in range(5): - try: - db_path.unlink() - break - except PermissionError: - time.sleep(0.05 * (attempt + 1)) - else: - # Best-effort cleanup (Windows SQLite locks can linger briefly). - try: - db_path.unlink(missing_ok=True) - except (PermissionError, OSError): - pass - - def test_diagnose_empty_database(self, empty_db): - """Diagnose behavior with empty database.""" - engine = HybridSearchEngine() - - print("\n=== Diagnosing empty database ===") - - # Test all modes - for mode_config in [ - ("exact", False, False), - ("fuzzy", True, False), - ("vector", False, True), - ("hybrid", True, True), - ]: - mode, enable_fuzzy, enable_vector = mode_config - - try: - results = engine.search( - empty_db, - "test", - limit=10, - enable_fuzzy=enable_fuzzy, - enable_vector=enable_vector, - ) - print(f"{mode}: {len(results)} results (OK)") - assert isinstance(results, list) - assert len(results) == 0 - except Exception as exc: - print(f"{mode}: ERROR - {exc}") - # Should not raise errors, should return empty list - pytest.fail(f"Search mode '{mode}' raised exception on empty database: {exc}") - - @pytest.mark.skipif(not SEMANTIC_DEPS_AVAILABLE, reason="Semantic dependencies not available") - def test_diagnose_embedder_initialization(self): - """Test embedder initialization and embedding generation.""" - print("\n=== Diagnosing embedder ===") - - try: - embedder = Embedder(profile="code") - print(f"✓ Embedder initialized (model: {embedder.model_name})") - print(f" Embedding dimension: {embedder.embedding_dim}") - - # Test embedding generation - test_text = "def authenticate_user(username, password):" - embedding = embedder.embed_single(test_text) - - print(f"✓ Generated embedding (length: {len(embedding)})") - print(f" Sample values: {embedding[:5]}") - - assert len(embedding) == embedder.embedding_dim - assert all(isinstance(v, float) for v in embedding) - - except Exception as exc: - print(f"✗ Embedder error: {exc}") - raise - - -if __name__ == "__main__": - # Run tests with pytest - pytest.main([__file__, "-v", "-s"]) diff --git a/codex-lens/tests/test_search_comprehensive.py b/codex-lens/tests/test_search_comprehensive.py deleted file mode 100644 index dcde8e9a..00000000 --- a/codex-lens/tests/test_search_comprehensive.py +++ /dev/null @@ -1,604 +0,0 @@ -"""Comprehensive tests for CodexLens search functionality. - -Tests cover: -- FTS5 text search (basic, phrase, boolean, wildcard) -- Chain search across directories -- Symbol search (by name, kind, filters) -- Files-only search mode -- Edge cases and error handling -""" - -import tempfile -import pytest -from pathlib import Path -from unittest.mock import MagicMock, patch - -from codexlens.storage.sqlite_store import SQLiteStore -from codexlens.storage.dir_index import DirIndexStore -from codexlens.storage.registry import RegistryStore -from codexlens.storage.path_mapper import PathMapper -from codexlens.search import ( - ChainSearchEngine, - SearchOptions, - SearchStats, - ChainSearchResult, - quick_search, -) -from codexlens.entities import IndexedFile, Symbol, SearchResult - - -# === Fixtures === - -@pytest.fixture -def temp_dir(): - """Create a temporary directory.""" - with tempfile.TemporaryDirectory() as tmpdir: - yield Path(tmpdir) - - -@pytest.fixture -def sample_files(): - """Sample file data for testing.""" - return [ - (IndexedFile( - path="/project/src/auth.py", - language="python", - symbols=[ - Symbol(name="authenticate", kind="function", range=(1, 10)), - Symbol(name="verify_token", kind="function", range=(12, 20)), - Symbol(name="AuthManager", kind="class", range=(22, 50)), - ], - ), """ -def authenticate(username, password): - '''Authenticate user with credentials.''' - user = find_user(username) - if user and check_password(user, password): - return create_token(user) - return None - -def verify_token(token): - '''Verify JWT token validity.''' - try: - payload = decode_token(token) - return payload - except TokenExpired: - return None - -class AuthManager: - '''Manages authentication state.''' - def __init__(self): - self.sessions = {} - - def login(self, user): - token = authenticate(user.name, user.password) - self.sessions[user.id] = token - return token -"""), - (IndexedFile( - path="/project/src/database.py", - language="python", - symbols=[ - Symbol(name="connect", kind="function", range=(1, 5)), - Symbol(name="query", kind="function", range=(7, 15)), - Symbol(name="DatabasePool", kind="class", range=(17, 40)), - ], - ), """ -def connect(host, port, database): - '''Establish database connection.''' - return Connection(host, port, database) - -def query(connection, sql, params=None): - '''Execute SQL query and return results.''' - cursor = connection.cursor() - cursor.execute(sql, params or []) - return cursor.fetchall() - -class DatabasePool: - '''Connection pool for database.''' - def __init__(self, size=10): - self.pool = [] - self.size = size - - def get_connection(self): - if self.pool: - return self.pool.pop() - return connect() -"""), - (IndexedFile( - path="/project/src/utils.py", - language="python", - symbols=[ - Symbol(name="format_date", kind="function", range=(1, 3)), - Symbol(name="parse_json", kind="function", range=(5, 10)), - Symbol(name="hash_password", kind="function", range=(12, 18)), - ], - ), """ -def format_date(date, fmt='%Y-%m-%d'): - return date.strftime(fmt) - -def parse_json(data): - '''Parse JSON string to dictionary.''' - import json - return json.loads(data) - -def hash_password(password, salt=None): - '''Hash password using bcrypt.''' - import hashlib - salt = salt or generate_salt() - return hashlib.sha256((password + salt).encode()).hexdigest() -"""), - ] - - -@pytest.fixture -def populated_store(temp_dir, sample_files): - """Create a populated SQLite store for testing.""" - db_path = temp_dir / "_index.db" - store = SQLiteStore(db_path) - store.initialize() - - for indexed_file, content in sample_files: - store.add_file(indexed_file, content) - - yield store - store.close() - - -@pytest.fixture -def populated_dir_store(temp_dir, sample_files): - """Create a populated DirIndexStore for testing.""" - db_path = temp_dir / "_index.db" - store = DirIndexStore(db_path) - - for indexed_file, content in sample_files: - store.add_file(indexed_file, content) - - yield store - store.close() - - -# === FTS5 Search Tests === - -class TestFTS5BasicSearch: - """Tests for basic FTS5 text search.""" - - def test_single_term_search(self, populated_store): - """Test search with a single term.""" - results = populated_store.search_fts("authenticate") - assert len(results) >= 1 - assert any("auth" in r.path.lower() for r in results) - - def test_case_insensitive_search(self, populated_store): - """Test that search is case insensitive.""" - results_lower = populated_store.search_fts("database") - results_upper = populated_store.search_fts("DATABASE") - results_mixed = populated_store.search_fts("DataBase") - - # All should return similar results - assert len(results_lower) == len(results_upper) == len(results_mixed) - - def test_partial_word_search(self, populated_store): - """Test search with partial words using wildcards.""" - results = populated_store.search_fts("auth*") - assert len(results) >= 1 - # Should match authenticate, authentication, AuthManager, etc. - - def test_multiple_terms_search(self, populated_store): - """Test search with multiple terms (implicit AND).""" - results = populated_store.search_fts("user password") - assert len(results) >= 1 - - def test_no_results_search(self, populated_store): - """Test search that returns no results.""" - results = populated_store.search_fts("nonexistent_xyz_term") - assert len(results) == 0 - - def test_search_with_limit(self, populated_store): - """Test search respects limit parameter.""" - results = populated_store.search_fts("def", limit=1) - assert len(results) <= 1 - - def test_search_returns_excerpt(self, populated_store): - """Test search results include excerpts.""" - results = populated_store.search_fts("authenticate") - assert len(results) >= 1 - # SearchResult should have excerpt field - for r in results: - assert hasattr(r, 'excerpt') - - -class TestFTS5AdvancedSearch: - """Tests for advanced FTS5 search features.""" - - def test_phrase_search(self, populated_store): - """Test exact phrase search with quotes.""" - results = populated_store.search_fts('"verify_token"') - assert len(results) >= 1 - - def test_boolean_or_search(self, populated_store): - """Test OR boolean search.""" - results = populated_store.search_fts("authenticate OR database") - # Should find files containing either term - assert len(results) >= 2 - - def test_boolean_not_search(self, populated_store): - """Test NOT boolean search.""" - all_results = populated_store.search_fts("def") - not_results = populated_store.search_fts("def NOT authenticate") - # NOT should return fewer results - assert len(not_results) <= len(all_results) - - def test_prefix_search(self, populated_store): - """Test prefix search with asterisk.""" - results = populated_store.search_fts("connect*") - assert len(results) >= 1 - # Should match connect, connection, etc. - - def test_special_characters_in_query(self, populated_store): - """Test search handles special characters gracefully.""" - # Should not raise an error - results = populated_store.search_fts("__init__") - # May or may not have results, but shouldn't crash - - def test_unicode_search(self, temp_dir): - """Test search with unicode content.""" - store = SQLiteStore(temp_dir / "_index.db") - store.initialize() - - indexed_file = IndexedFile( - path="/test/unicode.py", - language="python", - symbols=[Symbol(name="世界", kind="function", range=(1, 1))], - ) - store.add_file(indexed_file, "def 世界(): return '你好世界'") - - results = store.search_fts("世界") - assert len(results) == 1 - - store.close() - - -class TestFTS5Pagination: - """Tests for FTS5 search pagination.""" - - def test_offset_pagination(self, temp_dir): - """Test search with offset for pagination.""" - store = SQLiteStore(temp_dir / "_index.db") - store.initialize() - - # Add multiple files - for i in range(10): - indexed_file = IndexedFile( - path=f"/test/file{i}.py", - language="python", - symbols=[], - ) - store.add_file(indexed_file, f"searchable content number {i}") - - page1 = store.search_fts("searchable", limit=3, offset=0) - page2 = store.search_fts("searchable", limit=3, offset=3) - page3 = store.search_fts("searchable", limit=3, offset=6) - - # Each page should have different results - paths1 = {r.path for r in page1} - paths2 = {r.path for r in page2} - paths3 = {r.path for r in page3} - - assert paths1.isdisjoint(paths2) - assert paths2.isdisjoint(paths3) - - store.close() - - def test_offset_beyond_results(self, populated_store): - """Test offset beyond available results.""" - results = populated_store.search_fts("authenticate", limit=10, offset=1000) - assert len(results) == 0 - - -# === Symbol Search Tests === - -class TestSymbolSearch: - """Tests for symbol search functionality.""" - - def test_search_by_name(self, populated_store): - """Test symbol search by name.""" - results = populated_store.search_symbols("auth") - assert len(results) >= 1 - assert any("auth" in s.name.lower() for s in results) - - def test_search_by_kind_function(self, populated_store): - """Test symbol search filtered by kind=function.""" - results = populated_store.search_symbols("", kind="function") - assert all(s.kind == "function" for s in results) - - def test_search_by_kind_class(self, populated_store): - """Test symbol search filtered by kind=class.""" - results = populated_store.search_symbols("", kind="class") - assert all(s.kind == "class" for s in results) - assert any("Manager" in s.name or "Pool" in s.name for s in results) - - def test_search_symbols_with_limit(self, populated_store): - """Test symbol search respects limit.""" - results = populated_store.search_symbols("", limit=2) - assert len(results) <= 2 - - def test_search_symbols_returns_range(self, populated_store): - """Test symbol search results include line range.""" - results = populated_store.search_symbols("authenticate") - assert len(results) >= 1 - for sym in results: - assert hasattr(sym, 'range') - assert len(sym.range) == 2 - assert sym.range[0] <= sym.range[1] - - -# === Chain Search Tests === - -class TestChainSearchEngine: - """Tests for ChainSearchEngine.""" - - @pytest.fixture - def mock_registry(self): - """Create a mock registry.""" - registry = MagicMock(spec=RegistryStore) - registry.find_nearest_index.return_value = None - return registry - - @pytest.fixture - def mock_mapper(self): - """Create a mock path mapper.""" - return MagicMock(spec=PathMapper) - - def test_search_no_index_found(self, mock_registry, mock_mapper): - """Test search when no index is found.""" - mock_mapper.source_to_index_db.return_value = Path("/nonexistent/_index.db") - - engine = ChainSearchEngine(mock_registry, mock_mapper) - result = engine.search("test", Path("/nonexistent")) - - assert result.results == [] - assert result.symbols == [] - assert result.stats.dirs_searched == 0 - - def test_search_options_depth(self, mock_registry, mock_mapper, temp_dir): - """Test search respects depth option.""" - # Create a simple index structure - db_path = temp_dir / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - store.add_file( - name="test.py", - full_path=str(temp_dir / "test.py"), - content="test content searchable", - language="python", - ) - store.close() - - mock_mapper.source_to_index_db.return_value = db_path - - engine = ChainSearchEngine(mock_registry, mock_mapper) - options = SearchOptions(depth=0) # Only current dir - - result = engine.search("test", temp_dir, options) - - # With depth=0, should only search current directory - assert result.stats.dirs_searched <= 1 - - def test_search_files_only(self, mock_registry, mock_mapper, temp_dir): - """Test search_files_only returns only paths.""" - db_path = temp_dir / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - store.add_file( - name="test.py", - full_path=str(temp_dir / "test.py"), - content="searchable content here", - language="python", - ) - store.close() - - mock_mapper.source_to_index_db.return_value = db_path - - engine = ChainSearchEngine(mock_registry, mock_mapper) - paths = engine.search_files_only("searchable", temp_dir) - - assert isinstance(paths, list) - for p in paths: - assert isinstance(p, str) - - def test_search_symbols_engine(self, mock_registry, mock_mapper, temp_dir): - """Test symbol search through engine.""" - db_path = temp_dir / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - store.add_file( - name="test.py", - full_path=str(temp_dir / "test.py"), - content="def my_function(): pass", - language="python", - symbols=[Symbol(name="my_function", kind="function", range=(1, 5))], - ) - store.close() - - mock_mapper.source_to_index_db.return_value = db_path - - engine = ChainSearchEngine(mock_registry, mock_mapper) - symbols = engine.search_symbols("my_func", temp_dir) - - assert len(symbols) >= 1 - assert symbols[0].name == "my_function" - - def test_search_result_stats(self, mock_registry, mock_mapper, temp_dir): - """Test search result includes proper stats.""" - db_path = temp_dir / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - store.add_file( - name="test.py", - full_path=str(temp_dir / "test.py"), - content="content to search", - language="python", - ) - store.close() - - mock_mapper.source_to_index_db.return_value = db_path - - engine = ChainSearchEngine(mock_registry, mock_mapper) - result = engine.search("content", temp_dir) - - assert result.stats.time_ms >= 0 - assert result.stats.dirs_searched >= 0 - assert isinstance(result.stats.errors, list) - - -class TestSearchOptions: - """Tests for SearchOptions configuration.""" - - def test_default_options(self): - """Test default search options.""" - options = SearchOptions() - assert options.depth == -1 - assert options.max_workers == 8 - assert options.limit_per_dir == 10 - assert options.total_limit == 100 - assert options.include_symbols is False - assert options.files_only is False - - def test_custom_options(self): - """Test custom search options.""" - options = SearchOptions( - depth=3, - max_workers=4, - limit_per_dir=5, - total_limit=50, - include_symbols=True, - files_only=True, - ) - assert options.depth == 3 - assert options.max_workers == 4 - assert options.limit_per_dir == 5 - assert options.total_limit == 50 - assert options.include_symbols is True - assert options.files_only is True - - -# === Edge Cases and Error Handling === - -class TestSearchEdgeCases: - """Edge case tests for search functionality.""" - - def test_empty_query(self, populated_store): - """Test search with empty query.""" - # Empty query may raise an error or return empty results - try: - results = populated_store.search_fts("") - assert isinstance(results, list) - except Exception: - # Some implementations may reject empty queries - pass - - def test_whitespace_query(self, populated_store): - """Test search with whitespace-only query.""" - # Whitespace query may raise an error or return empty results - try: - results = populated_store.search_fts(" ") - assert isinstance(results, list) - except Exception: - # Some implementations may reject whitespace queries - pass - - def test_very_long_query(self, populated_store): - """Test search with very long query.""" - long_query = "function " * 100 # Repeat valid word - try: - results = populated_store.search_fts(long_query) - assert isinstance(results, list) - except Exception: - # Very long queries may be rejected - pass - - def test_special_sql_characters(self, populated_store): - """Test search handles SQL-like characters safely.""" - # These should not cause SQL injection - may raise FTS syntax errors - queries = ["test", "function*", "test OR data"] - for q in queries: - results = populated_store.search_fts(q) - assert isinstance(results, list) - - def test_search_reopened_store(self, temp_dir, sample_files): - """Test search works after store is reopened.""" - db_path = temp_dir / "_index.db" - store = SQLiteStore(db_path) - store.initialize() - store.add_file(sample_files[0][0], sample_files[0][1]) - store.close() - - # Reopen and search - store2 = SQLiteStore(db_path) - store2.initialize() - results = store2.search_fts("authenticate") - assert len(results) >= 1 - store2.close() - - def test_concurrent_searches(self, populated_store): - """Test multiple concurrent searches.""" - import threading - - results = [] - errors = [] - - def search_task(query): - try: - r = populated_store.search_fts(query) - results.append(len(r)) - except Exception as e: - errors.append(e) - - threads = [ - threading.Thread(target=search_task, args=("authenticate",)), - threading.Thread(target=search_task, args=("database",)), - threading.Thread(target=search_task, args=("password",)), - ] - - for t in threads: - t.start() - for t in threads: - t.join() - - assert len(errors) == 0 - assert len(results) == 3 - - -class TestChainSearchResult: - """Tests for ChainSearchResult dataclass.""" - - def test_result_structure(self): - """Test ChainSearchResult has all required fields.""" - result = ChainSearchResult( - query="test", - results=[], - symbols=[], - stats=SearchStats(), - ) - assert result.query == "test" - assert result.results == [] - assert result.related_results == [] - assert result.symbols == [] - assert result.stats.dirs_searched == 0 - - -class TestSearchStats: - """Tests for SearchStats dataclass.""" - - def test_default_stats(self): - """Test default search stats.""" - stats = SearchStats() - assert stats.dirs_searched == 0 - assert stats.files_matched == 0 - assert stats.time_ms == 0 - assert stats.errors == [] - - def test_stats_with_errors(self): - """Test search stats with errors.""" - stats = SearchStats(errors=["Error 1", "Error 2"]) - assert len(stats.errors) == 2 diff --git a/codex-lens/tests/test_search_full_coverage.py b/codex-lens/tests/test_search_full_coverage.py deleted file mode 100644 index fa90ef82..00000000 --- a/codex-lens/tests/test_search_full_coverage.py +++ /dev/null @@ -1,1267 +0,0 @@ -"""Full coverage tests for CodexLens search functionality. - -Comprehensive test suite covering: -- Chain search engine internals -- Multi-directory hierarchical search -- Result merging and deduplication -- Context manager behavior -- Semantic search integration -- Edge cases and error recovery -- Parallel search stress tests -- Boundary conditions -""" - -import tempfile -import pytest -import threading -import time -from pathlib import Path -from unittest.mock import MagicMock, patch, PropertyMock -from concurrent.futures import ThreadPoolExecutor - -from codexlens.storage.sqlite_store import SQLiteStore -from codexlens.storage.dir_index import DirIndexStore -from codexlens.storage.registry import RegistryStore -from codexlens.storage.path_mapper import PathMapper -from codexlens.search import ( - ChainSearchEngine, - SearchOptions, - SearchStats, - ChainSearchResult, - quick_search, -) -from codexlens.entities import IndexedFile, Symbol, SearchResult - - -# === Fixtures === - -@pytest.fixture -def temp_dir(): - """Create a temporary directory.""" - with tempfile.TemporaryDirectory() as tmpdir: - yield Path(tmpdir) - - -@pytest.fixture -def mock_registry(): - """Create a mock registry.""" - registry = MagicMock(spec=RegistryStore) - registry.find_nearest_index.return_value = None - return registry - - -@pytest.fixture -def mock_mapper(): - """Create a mock path mapper.""" - return MagicMock(spec=PathMapper) - - -@pytest.fixture -def sample_code_files(): - """Sample code file data for comprehensive testing.""" - return [ - # Authentication module - { - "name": "auth.py", - "language": "python", - "content": """ -def authenticate(username, password): - '''Authenticate user with credentials.''' - user = find_user(username) - if user and check_password(user, password): - return create_token(user) - return None - -def verify_token(token): - '''Verify JWT token validity.''' - try: - payload = decode_token(token) - return payload - except TokenExpired: - return None - -class AuthManager: - '''Manages authentication state.''' - def __init__(self): - self.sessions = {} - - def login(self, user): - token = authenticate(user.name, user.password) - self.sessions[user.id] = token - return token -""", - "symbols": [ - Symbol(name="authenticate", kind="function", range=(2, 8)), - Symbol(name="verify_token", kind="function", range=(10, 17)), - Symbol(name="AuthManager", kind="class", range=(19, 28)), - ], - }, - # Database module - { - "name": "database.py", - "language": "python", - "content": """ -def connect(host, port, database): - '''Establish database connection.''' - return Connection(host, port, database) - -def query(connection, sql, params=None): - '''Execute SQL query and return results.''' - cursor = connection.cursor() - cursor.execute(sql, params or []) - return cursor.fetchall() - -class DatabasePool: - '''Connection pool for database.''' - def __init__(self, size=10): - self.pool = [] - self.size = size - - def get_connection(self): - if self.pool: - return self.pool.pop() - return connect() -""", - "symbols": [ - Symbol(name="connect", kind="function", range=(2, 4)), - Symbol(name="query", kind="function", range=(6, 10)), - Symbol(name="DatabasePool", kind="class", range=(12, 21)), - ], - }, - # Utils module - { - "name": "utils.py", - "language": "python", - "content": """ -def format_date(date, fmt='%Y-%m-%d'): - return date.strftime(fmt) - -def parse_json(data): - '''Parse JSON string to dictionary.''' - import json - return json.loads(data) - -def hash_password(password, salt=None): - '''Hash password using bcrypt.''' - import hashlib - salt = salt or generate_salt() - return hashlib.sha256((password + salt).encode()).hexdigest() -""", - "symbols": [ - Symbol(name="format_date", kind="function", range=(2, 3)), - Symbol(name="parse_json", kind="function", range=(5, 8)), - Symbol(name="hash_password", kind="function", range=(10, 14)), - ], - }, - ] - - -@pytest.fixture -def populated_single_store(temp_dir, sample_code_files): - """Create a single populated DirIndexStore.""" - db_path = temp_dir / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - - for file_data in sample_code_files: - store.add_file( - name=file_data["name"], - full_path=str(temp_dir / file_data["name"]), - content=file_data["content"], - language=file_data["language"], - symbols=file_data["symbols"], - ) - - yield store - store.close() - - -@pytest.fixture -def hierarchical_index_structure(temp_dir, sample_code_files): - """Create a multi-level directory index structure for chain search testing. - - Structure: - project/ - _index.db (root) - src/ - _index.db - auth/ - _index.db - db/ - _index.db - tests/ - _index.db - """ - structure = {} - - # Root directory - root_dir = temp_dir / "project" - root_dir.mkdir() - root_db = root_dir / "_index.db" - root_store = DirIndexStore(root_db) - root_store.initialize() - root_store.add_file( - name="main.py", - full_path=str(root_dir / "main.py"), - content="# Main entry point\nfrom src import auth, db\ndef main(): pass", - language="python", - symbols=[Symbol(name="main", kind="function", range=(3, 3))], - ) - structure["root"] = {"path": root_dir, "db": root_db, "store": root_store} - - # src directory - src_dir = root_dir / "src" - src_dir.mkdir() - src_db = src_dir / "_index.db" - src_store = DirIndexStore(src_db) - src_store.initialize() - src_store.add_file( - name="__init__.py", - full_path=str(src_dir / "__init__.py"), - content="# Source package\nfrom .auth import authenticate\nfrom .db import connect", - language="python", - ) - structure["src"] = {"path": src_dir, "db": src_db, "store": src_store} - - # src/auth directory - auth_dir = src_dir / "auth" - auth_dir.mkdir() - auth_db = auth_dir / "_index.db" - auth_store = DirIndexStore(auth_db) - auth_store.initialize() - auth_store.add_file( - name="auth.py", - full_path=str(auth_dir / "auth.py"), - content=sample_code_files[0]["content"], - language="python", - symbols=sample_code_files[0]["symbols"], - ) - structure["auth"] = {"path": auth_dir, "db": auth_db, "store": auth_store} - - # src/db directory - db_dir = src_dir / "db" - db_dir.mkdir() - db_db = db_dir / "_index.db" - db_store = DirIndexStore(db_db) - db_store.initialize() - db_store.add_file( - name="database.py", - full_path=str(db_dir / "database.py"), - content=sample_code_files[1]["content"], - language="python", - symbols=sample_code_files[1]["symbols"], - ) - structure["db"] = {"path": db_dir, "db": db_db, "store": db_store} - - # tests directory - tests_dir = root_dir / "tests" - tests_dir.mkdir() - tests_db = tests_dir / "_index.db" - tests_store = DirIndexStore(tests_db) - tests_store.initialize() - tests_store.add_file( - name="test_auth.py", - full_path=str(tests_dir / "test_auth.py"), - content="import pytest\nfrom src.auth import authenticate\ndef test_authenticate(): assert authenticate('user', 'pass')", - language="python", - symbols=[Symbol(name="test_authenticate", kind="function", range=(3, 3))], - ) - structure["tests"] = {"path": tests_dir, "db": tests_db, "store": tests_store} - - # Link subdirectories - root_store.register_subdir(name="src", index_path=src_db) - root_store.register_subdir(name="tests", index_path=tests_db) - src_store.register_subdir(name="auth", index_path=auth_db) - src_store.register_subdir(name="db", index_path=db_db) - - # Close all stores before yielding to avoid Windows file locking issues - root_store.close() - src_store.close() - auth_store.close() - db_store.close() - tests_store.close() - - yield structure - - -# === Chain Search Engine Internal Tests === - -class TestChainSearchEngineInternals: - """Tests for ChainSearchEngine internal methods.""" - - def test_context_manager_enter_exit(self, mock_registry, mock_mapper): - """Test context manager protocol.""" - with ChainSearchEngine(mock_registry, mock_mapper) as engine: - assert engine is not None - assert isinstance(engine, ChainSearchEngine) - # Engine should be closed after exit - - def test_close_without_executor(self, mock_registry, mock_mapper): - """Test close() when executor was never created.""" - engine = ChainSearchEngine(mock_registry, mock_mapper) - engine.close() # Should not raise - - def test_close_with_executor(self, mock_registry, mock_mapper, temp_dir): - """Test close() properly shuts down executor.""" - # Create index - db_path = temp_dir / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - store.add_file( - name="test.py", - full_path=str(temp_dir / "test.py"), - content="test content searchable", - language="python", - ) - store.close() - - mock_mapper.source_to_index_db.return_value = db_path - - engine = ChainSearchEngine(mock_registry, mock_mapper) - # Trigger executor creation - engine.search("test", temp_dir) - - # Close should work - engine.close() - assert engine._executor is None - - def test_get_executor_lazy_initialization(self, mock_registry, mock_mapper): - """Test executor is lazily initialized.""" - engine = ChainSearchEngine(mock_registry, mock_mapper) - assert engine._executor is None - - executor = engine._get_executor() - assert executor is not None - assert engine._executor is executor - - # Second call returns same instance - assert engine._get_executor() is executor - - engine.close() - - def test_get_executor_custom_workers(self, mock_registry, mock_mapper): - """Test executor with custom worker count.""" - engine = ChainSearchEngine(mock_registry, mock_mapper, max_workers=4) - executor = engine._get_executor() - assert executor is not None - engine.close() - - -class TestIndexPathCollection: - """Tests for _collect_index_paths method.""" - - def test_collect_depth_zero(self, mock_registry, mock_mapper, hierarchical_index_structure): - """Test collection with depth=0 returns only start index.""" - structure = hierarchical_index_structure - root_db = structure["root"]["db"] - - mock_mapper.source_to_index_db.return_value = root_db - - engine = ChainSearchEngine(mock_registry, mock_mapper) - paths = engine._collect_index_paths(root_db, depth=0) - - assert len(paths) == 1 - assert paths[0] == root_db.resolve() - engine.close() - - def test_collect_depth_one(self, mock_registry, mock_mapper, hierarchical_index_structure): - """Test collection with depth=1 returns root + immediate children.""" - structure = hierarchical_index_structure - root_db = structure["root"]["db"] - - mock_mapper.source_to_index_db.return_value = root_db - - engine = ChainSearchEngine(mock_registry, mock_mapper) - paths = engine._collect_index_paths(root_db, depth=1) - - # Should include root, src, tests (not auth/db which are depth 2) - assert len(paths) == 3 - engine.close() - - def test_collect_depth_unlimited(self, mock_registry, mock_mapper, hierarchical_index_structure): - """Test collection with depth=-1 returns all indexes.""" - structure = hierarchical_index_structure - root_db = structure["root"]["db"] - - mock_mapper.source_to_index_db.return_value = root_db - - engine = ChainSearchEngine(mock_registry, mock_mapper) - paths = engine._collect_index_paths(root_db, depth=-1) - - # Should include all 5: root, src, tests, auth, db - assert len(paths) == 5 - engine.close() - - def test_collect_avoids_duplicates(self, mock_registry, mock_mapper, hierarchical_index_structure): - """Test collection deduplicates paths.""" - structure = hierarchical_index_structure - root_db = structure["root"]["db"] - - engine = ChainSearchEngine(mock_registry, mock_mapper) - paths = engine._collect_index_paths(root_db, depth=-1) - - # All paths should be unique - path_set = set(str(p) for p in paths) - assert len(path_set) == len(paths) - engine.close() - - def test_collect_handles_missing_subdir_index(self, mock_registry, mock_mapper, temp_dir): - """Test collection handles missing subdirectory indexes gracefully.""" - # Create root with reference to non-existent subdir - root_db = temp_dir / "_index.db" - store = DirIndexStore(root_db) - store.initialize() - store.add_file( - name="test.py", - full_path=str(temp_dir / "test.py"), - content="test", - language="python", - ) - # Add reference to non-existent index - store.register_subdir(name="missing", index_path=temp_dir / "missing" / "_index.db") - store.close() - - engine = ChainSearchEngine(mock_registry, mock_mapper) - paths = engine._collect_index_paths(root_db, depth=-1) - - # Should only include root (missing subdir is skipped) - assert len(paths) == 1 - engine.close() - - def test_collect_skips_ignored_artifact_indexes(self, mock_registry, mock_mapper, temp_dir): - """Test collection skips dist/build-style artifact subtrees.""" - root_dir = temp_dir / "project" - root_dir.mkdir() - - root_db = root_dir / "_index.db" - root_store = DirIndexStore(root_db) - root_store.initialize() - - src_dir = root_dir / "src" - src_dir.mkdir() - src_db = src_dir / "_index.db" - src_store = DirIndexStore(src_db) - src_store.initialize() - - dist_dir = root_dir / "dist" - dist_dir.mkdir() - dist_db = dist_dir / "_index.db" - dist_store = DirIndexStore(dist_db) - dist_store.initialize() - - workflow_dir = root_dir / ".workflow" - workflow_dir.mkdir() - workflow_db = workflow_dir / "_index.db" - workflow_store = DirIndexStore(workflow_db) - workflow_store.initialize() - - root_store.register_subdir(name="src", index_path=src_db) - root_store.register_subdir(name="dist", index_path=dist_db) - root_store.register_subdir(name=".workflow", index_path=workflow_db) - - root_store.close() - src_store.close() - dist_store.close() - workflow_store.close() - - engine = ChainSearchEngine(mock_registry, mock_mapper) - paths = engine._collect_index_paths(root_db, depth=-1) - - assert {path.relative_to(root_dir).as_posix() for path in paths} == { - "_index.db", - "src/_index.db", - } - engine.close() - - -class TestResultMergeAndRank: - """Tests for _merge_and_rank method.""" - - def test_merge_deduplicates_by_path(self, mock_registry, mock_mapper): - """Test merging deduplicates results by path.""" - engine = ChainSearchEngine(mock_registry, mock_mapper) - - results = [ - SearchResult(path="/test/file.py", score=10.0, excerpt="match 1"), - SearchResult(path="/test/file.py", score=5.0, excerpt="match 2"), - SearchResult(path="/test/other.py", score=8.0, excerpt="match 3"), - ] - - merged = engine._merge_and_rank(results, limit=10) - - assert len(merged) == 2 - # Should keep highest score for duplicate path - file_result = next(r for r in merged if r.path == "/test/file.py") - assert file_result.score == 10.0 - engine.close() - - def test_merge_sorts_by_score_descending(self, mock_registry, mock_mapper): - """Test merged results are sorted by score descending.""" - engine = ChainSearchEngine(mock_registry, mock_mapper) - - results = [ - SearchResult(path="/test/low.py", score=1.0, excerpt=""), - SearchResult(path="/test/high.py", score=100.0, excerpt=""), - SearchResult(path="/test/mid.py", score=50.0, excerpt=""), - ] - - merged = engine._merge_and_rank(results, limit=10) - - assert merged[0].path == "/test/high.py" - assert merged[1].path == "/test/mid.py" - assert merged[2].path == "/test/low.py" - engine.close() - - def test_merge_respects_limit(self, mock_registry, mock_mapper): - """Test merge respects limit parameter.""" - engine = ChainSearchEngine(mock_registry, mock_mapper) - - results = [ - SearchResult(path=f"/test/file{i}.py", score=float(i), excerpt="") - for i in range(100) - ] - - merged = engine._merge_and_rank(results, limit=5) - - assert len(merged) == 5 - # Should be the top 5 by score - assert merged[0].score == 99.0 - engine.close() - - def test_merge_empty_results(self, mock_registry, mock_mapper): - """Test merge handles empty results.""" - engine = ChainSearchEngine(mock_registry, mock_mapper) - merged = engine._merge_and_rank([], limit=10) - assert merged == [] - engine.close() - - def test_merge_applies_test_file_penalty_for_non_test_query(self, mock_registry, mock_mapper): - """Non-test queries should lightly demote test files during merge.""" - engine = ChainSearchEngine(mock_registry, mock_mapper) - - results = [ - SearchResult(path="/repo/tests/test_auth.py", score=10.0, excerpt="match 1"), - SearchResult(path="/repo/src/auth.py", score=9.0, excerpt="match 2"), - ] - - merged = engine._merge_and_rank(results, limit=10, query="authenticate users") - - assert merged[0].path == "/repo/src/auth.py" - assert merged[1].metadata["path_penalty_reasons"] == ["test_file"] - engine.close() - - def test_merge_applies_generated_file_penalty_for_non_artifact_query(self, mock_registry, mock_mapper): - """Non-artifact queries should lightly demote generated/build results during merge.""" - engine = ChainSearchEngine(mock_registry, mock_mapper) - - results = [ - SearchResult(path="/repo/dist/auth.js", score=10.0, excerpt="match 1"), - SearchResult(path="/repo/src/auth.ts", score=9.0, excerpt="match 2"), - ] - - merged = engine._merge_and_rank(results, limit=10, query="authenticate users") - - assert merged[0].path == "/repo/src/auth.ts" - assert merged[1].metadata["path_penalty_reasons"] == ["generated_artifact"] - engine.close() - - -# === Hierarchical Chain Search Tests === - -class TestHierarchicalChainSearch: - """Tests for searching across directory hierarchies.""" - - def test_search_from_root(self, mock_registry, mock_mapper, hierarchical_index_structure): - """Test search starting from root finds results in all subdirectories.""" - structure = hierarchical_index_structure - root_db = structure["root"]["db"] - root_path = structure["root"]["path"] - - mock_mapper.source_to_index_db.return_value = root_db - - engine = ChainSearchEngine(mock_registry, mock_mapper) - result = engine.search("authenticate", root_path) - - # Should find authenticate in auth.py and test_auth.py - assert len(result.results) >= 1 - assert result.stats.dirs_searched == 5 # All directories - engine.close() - - def test_search_from_subdir(self, mock_registry, mock_mapper, hierarchical_index_structure): - """Test search starting from subdirectory.""" - structure = hierarchical_index_structure - src_db = structure["src"]["db"] - src_path = structure["src"]["path"] - - mock_mapper.source_to_index_db.return_value = src_db - - engine = ChainSearchEngine(mock_registry, mock_mapper) - result = engine.search("authenticate", src_path) - - # Should find only in src subtree (src, auth, db) - assert result.stats.dirs_searched == 3 - engine.close() - - def test_search_with_depth_limit(self, mock_registry, mock_mapper, hierarchical_index_structure): - """Test search respects depth limit.""" - structure = hierarchical_index_structure - root_db = structure["root"]["db"] - root_path = structure["root"]["path"] - - mock_mapper.source_to_index_db.return_value = root_db - - engine = ChainSearchEngine(mock_registry, mock_mapper) - options = SearchOptions(depth=1) - result = engine.search("authenticate", root_path, options) - - # Depth 1: root + immediate children (src, tests) = 3 - assert result.stats.dirs_searched == 3 - engine.close() - - def test_search_aggregates_results(self, mock_registry, mock_mapper, hierarchical_index_structure): - """Test search aggregates results from multiple directories.""" - structure = hierarchical_index_structure - root_db = structure["root"]["db"] - root_path = structure["root"]["path"] - - mock_mapper.source_to_index_db.return_value = root_db - - engine = ChainSearchEngine(mock_registry, mock_mapper) - # Search for term that appears in multiple files - result = engine.search("def", root_path) - - # Should find results from multiple files - assert len(result.results) >= 3 - engine.close() - - -# === Search Files Only Tests === - -class TestSearchFilesOnly: - """Tests for search_files_only method.""" - - def test_returns_list_of_strings(self, mock_registry, mock_mapper, temp_dir): - """Test search_files_only returns list of path strings.""" - db_path = temp_dir / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - store.add_file( - name="test.py", - full_path=str(temp_dir / "test.py"), - content="searchable content here", - language="python", - ) - store.close() - - mock_mapper.source_to_index_db.return_value = db_path - - engine = ChainSearchEngine(mock_registry, mock_mapper) - paths = engine.search_files_only("searchable", temp_dir) - - assert isinstance(paths, list) - assert all(isinstance(p, str) for p in paths) - engine.close() - - def test_files_only_faster_than_full(self, mock_registry, mock_mapper, temp_dir): - """Test files_only search is at least as fast as full search.""" - db_path = temp_dir / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - - # Add multiple files - for i in range(20): - store.add_file( - name=f"file{i}.py", - full_path=str(temp_dir / f"file{i}.py"), - content=f"searchable content number {i} with more text to index", - language="python", - ) - store.close() - - mock_mapper.source_to_index_db.return_value = db_path - - engine = ChainSearchEngine(mock_registry, mock_mapper) - - # Time files_only - start = time.perf_counter() - for _ in range(10): - engine.search_files_only("searchable", temp_dir) - files_only_time = time.perf_counter() - start - - # Time full search - start = time.perf_counter() - for _ in range(10): - engine.search("searchable", temp_dir) - full_time = time.perf_counter() - start - - # files_only should not be significantly slower - # (may not be faster due to small dataset) - assert files_only_time <= full_time * 2 - engine.close() - - -# === Symbol Search Tests === - -class TestChainSymbolSearch: - """Tests for chain symbol search.""" - - def test_symbol_search_finds_across_dirs(self, mock_registry, mock_mapper, hierarchical_index_structure): - """Test symbol search finds symbols across directories.""" - structure = hierarchical_index_structure - root_db = structure["root"]["db"] - root_path = structure["root"]["path"] - - mock_mapper.source_to_index_db.return_value = root_db - - engine = ChainSearchEngine(mock_registry, mock_mapper) - symbols = engine.search_symbols("auth", root_path) - - # Should find authenticate and AuthManager - assert len(symbols) >= 2 - engine.close() - - def test_symbol_search_with_kind_filter(self, mock_registry, mock_mapper, hierarchical_index_structure): - """Test symbol search with kind filter.""" - structure = hierarchical_index_structure - root_db = structure["root"]["db"] - root_path = structure["root"]["path"] - - mock_mapper.source_to_index_db.return_value = root_db - - engine = ChainSearchEngine(mock_registry, mock_mapper) - classes = engine.search_symbols("", root_path, kind="class") - - # Should find AuthManager and DatabasePool - assert all(s.kind == "class" for s in classes) - engine.close() - - def test_symbol_search_deduplicates(self, mock_registry, mock_mapper, temp_dir): - """Test symbol search deduplicates by (name, kind, range) but keeps different ranges.""" - # Create two indexes with same symbol name but different ranges - dir1 = temp_dir / "dir1" - dir1.mkdir() - db1 = dir1 / "_index.db" - store1 = DirIndexStore(db1) - store1.initialize() - store1.add_file( - name="a.py", - full_path=str(dir1 / "a.py"), - content="def foo(): pass", - language="python", - symbols=[Symbol(name="foo", kind="function", range=(1, 5))], # Different range - ) - - dir2 = temp_dir / "dir2" - dir2.mkdir() - db2 = dir2 / "_index.db" - store2 = DirIndexStore(db2) - store2.initialize() - store2.add_file( - name="b.py", - full_path=str(dir2 / "b.py"), - content="def foo(): pass\n# more code\n", - language="python", - symbols=[Symbol(name="foo", kind="function", range=(1, 10))], # Different range - ) - store2.close() - - # Register subdir after dir2 is created - store1.register_subdir(name="dir2", index_path=db2) - store1.close() - - mock_mapper.source_to_index_db.return_value = db1 - - engine = ChainSearchEngine(mock_registry, mock_mapper) - symbols = engine.search_symbols("foo", dir1) - - # Should have exactly 2 (different ranges make them unique) - assert len(symbols) == 2 - engine.close() - - -# === Search Options Tests === - -class TestSearchOptionsExtended: - """Extended tests for SearchOptions.""" - - def test_include_semantic_option(self): - """Test include_semantic option.""" - options = SearchOptions(include_semantic=True) - assert options.include_semantic is True - - options_default = SearchOptions() - assert options_default.include_semantic is False - - def test_all_options_combined(self): - """Test all options set together.""" - options = SearchOptions( - depth=5, - max_workers=16, - limit_per_dir=20, - total_limit=200, - include_symbols=True, - files_only=True, - include_semantic=True, - ) - assert options.depth == 5 - assert options.max_workers == 16 - assert options.limit_per_dir == 20 - assert options.total_limit == 200 - assert options.include_symbols is True - assert options.files_only is True - assert options.include_semantic is True - - def test_options_with_zero_values(self): - """Test options with zero values.""" - options = SearchOptions( - depth=0, - max_workers=1, - limit_per_dir=1, - total_limit=1, - ) - assert options.depth == 0 - assert options.max_workers == 1 - assert options.limit_per_dir == 1 - assert options.total_limit == 1 - - -# === Quick Search Tests === - -class TestQuickSearch: - """Tests for quick_search convenience function.""" - - def test_quick_search_returns_results(self, temp_dir): - """Test quick_search returns SearchResult list.""" - # Setup: Create index at a known location - db_path = temp_dir / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - store.add_file( - name="test.py", - full_path=str(temp_dir / "test.py"), - content="searchable content for quick search test", - language="python", - ) - store.close() - - # Test requires actual registry - skip if not initialized - try: - results = quick_search("searchable", temp_dir) - assert isinstance(results, list) - except Exception: - # May fail if registry not properly set up - pytest.skip("Registry not available for quick_search test") - - def test_quick_search_with_depth(self, temp_dir): - """Test quick_search respects depth parameter.""" - db_path = temp_dir / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - store.add_file( - name="test.py", - full_path=str(temp_dir / "test.py"), - content="test content", - language="python", - ) - store.close() - - try: - results = quick_search("test", temp_dir, depth=0) - assert isinstance(results, list) - except Exception: - pytest.skip("Registry not available for quick_search test") - - -# === Edge Cases and Error Handling === - -class TestSearchErrorHandling: - """Tests for search error handling.""" - - def test_search_corrupted_index(self, mock_registry, mock_mapper, temp_dir): - """Test search handles corrupted index gracefully.""" - # Create corrupted index file - db_path = temp_dir / "_index.db" - db_path.write_text("not a valid sqlite database") - - mock_mapper.source_to_index_db.return_value = db_path - - engine = ChainSearchEngine(mock_registry, mock_mapper) - try: - result = engine.search("test", temp_dir) - # Should return empty results, not crash - assert result.results == [] - finally: - engine.close() - # Force cleanup on Windows - import gc - gc.collect() - - def test_search_empty_index(self, mock_registry, mock_mapper, temp_dir): - """Test search on empty index returns empty results.""" - db_path = temp_dir / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - store.close() - - mock_mapper.source_to_index_db.return_value = db_path - - engine = ChainSearchEngine(mock_registry, mock_mapper) - result = engine.search("anything", temp_dir) - - assert result.results == [] - assert result.stats.files_matched == 0 - engine.close() - - def test_search_special_fts_characters(self, mock_registry, mock_mapper, temp_dir): - """Test search handles FTS5 special characters.""" - db_path = temp_dir / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - store.add_file( - name="test.py", - full_path=str(temp_dir / "test.py"), - content="test content", - language="python", - ) - store.close() - - mock_mapper.source_to_index_db.return_value = db_path - - engine = ChainSearchEngine(mock_registry, mock_mapper) - - # These should not crash - special_queries = [ - "test*", - "test OR other", - '"exact phrase"', - "NOT invalid", - ] - - for query in special_queries: - result = engine.search(query, temp_dir) - assert isinstance(result.results, list) - - engine.close() - - -# === Concurrent Search Tests === - -class TestConcurrentSearch: - """Tests for concurrent search operations.""" - - def test_multiple_concurrent_searches(self, mock_registry, mock_mapper, temp_dir): - """Test multiple concurrent searches don't interfere.""" - db_path = temp_dir / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - for i in range(10): - store.add_file( - name=f"file{i}.py", - full_path=str(temp_dir / f"file{i}.py"), - content=f"content{i} searchable data", - language="python", - ) - store.close() - - mock_mapper.source_to_index_db.return_value = db_path - - engine = ChainSearchEngine(mock_registry, mock_mapper) - - results = [] - errors = [] - - def search_task(query): - try: - r = engine.search(query, temp_dir) - results.append(len(r.results)) - except Exception as e: - errors.append(str(e)) - - threads = [ - threading.Thread(target=search_task, args=(f"content{i}",)) - for i in range(5) - ] - - for t in threads: - t.start() - for t in threads: - t.join() - - assert len(errors) == 0 - assert len(results) == 5 - engine.close() - - def test_search_during_close(self, mock_registry, mock_mapper, temp_dir): - """Test behavior when search happens during close.""" - db_path = temp_dir / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - store.add_file( - name="test.py", - full_path=str(temp_dir / "test.py"), - content="test content", - language="python", - ) - store.close() - - mock_mapper.source_to_index_db.return_value = db_path - - engine = ChainSearchEngine(mock_registry, mock_mapper) - - # Start a search then immediately close - result = engine.search("test", temp_dir) - engine.close() - - # Should complete without error - assert isinstance(result.results, list) - - -# === Search Statistics Tests === - -class TestSearchStatsExtended: - """Extended tests for search statistics.""" - - def test_stats_time_is_positive(self, mock_registry, mock_mapper, temp_dir): - """Test search time is recorded and positive.""" - db_path = temp_dir / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - store.add_file( - name="test.py", - full_path=str(temp_dir / "test.py"), - content="test content", - language="python", - ) - store.close() - - mock_mapper.source_to_index_db.return_value = db_path - - engine = ChainSearchEngine(mock_registry, mock_mapper) - result = engine.search("test", temp_dir) - - assert result.stats.time_ms >= 0 - engine.close() - - def test_stats_dirs_searched_accurate(self, mock_registry, mock_mapper, hierarchical_index_structure): - """Test dirs_searched count is accurate.""" - structure = hierarchical_index_structure - root_db = structure["root"]["db"] - root_path = structure["root"]["path"] - - mock_mapper.source_to_index_db.return_value = root_db - - engine = ChainSearchEngine(mock_registry, mock_mapper) - - # Depth 0 - result0 = engine.search("test", root_path, SearchOptions(depth=0)) - assert result0.stats.dirs_searched == 1 - - # Depth 1 - result1 = engine.search("test", root_path, SearchOptions(depth=1)) - assert result1.stats.dirs_searched == 3 # root + src + tests - - # Unlimited - result_all = engine.search("test", root_path, SearchOptions(depth=-1)) - assert result_all.stats.dirs_searched == 5 - - engine.close() - - def test_stats_files_matched_accurate(self, mock_registry, mock_mapper, temp_dir): - """Test files_matched count is accurate.""" - db_path = temp_dir / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - - # Add files with different content - store.add_file(name="match1.py", full_path=str(temp_dir / "match1.py"), - content="findme keyword", language="python") - store.add_file(name="match2.py", full_path=str(temp_dir / "match2.py"), - content="findme keyword", language="python") - store.add_file(name="nomatch.py", full_path=str(temp_dir / "nomatch.py"), - content="other content", language="python") - store.close() - - mock_mapper.source_to_index_db.return_value = db_path - - engine = ChainSearchEngine(mock_registry, mock_mapper) - result = engine.search("findme", temp_dir) - - assert result.stats.files_matched == 2 - engine.close() - - -# === Boundary Condition Tests === - -class TestBoundaryConditions: - """Tests for boundary conditions.""" - - def test_search_with_max_workers_one(self, mock_registry, mock_mapper, temp_dir): - """Test search with single worker.""" - db_path = temp_dir / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - store.add_file(name="test.py", full_path=str(temp_dir / "test.py"), - content="test content", language="python") - store.close() - - mock_mapper.source_to_index_db.return_value = db_path - - engine = ChainSearchEngine(mock_registry, mock_mapper, max_workers=1) - result = engine.search("test", temp_dir, SearchOptions(max_workers=1)) - - assert isinstance(result.results, list) - engine.close() - - def test_search_with_limit_one(self, mock_registry, mock_mapper, temp_dir): - """Test search with limit=1.""" - db_path = temp_dir / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - for i in range(10): - store.add_file(name=f"file{i}.py", full_path=str(temp_dir / f"file{i}.py"), - content="searchable content", language="python") - store.close() - - mock_mapper.source_to_index_db.return_value = db_path - - engine = ChainSearchEngine(mock_registry, mock_mapper) - result = engine.search("searchable", temp_dir, SearchOptions(total_limit=1)) - - assert len(result.results) <= 1 - engine.close() - - def test_search_very_long_query(self, mock_registry, mock_mapper, temp_dir): - """Test search with very long query.""" - db_path = temp_dir / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - store.add_file(name="test.py", full_path=str(temp_dir / "test.py"), - content="test content", language="python") - store.close() - - mock_mapper.source_to_index_db.return_value = db_path - - engine = ChainSearchEngine(mock_registry, mock_mapper) - - # Very long query - long_query = " ".join(["word"] * 100) - result = engine.search(long_query, temp_dir) - - # Should not crash - assert isinstance(result.results, list) - engine.close() - - def test_search_unicode_query(self, mock_registry, mock_mapper, temp_dir): - """Test search with unicode query does not crash.""" - db_path = temp_dir / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - store.add_file( - name="unicode.py", - full_path=str(temp_dir / "unicode.py"), - content="# Chinese comment\ndef hello(): return 'hello world'", - language="python", - ) - store.close() - - mock_mapper.source_to_index_db.return_value = db_path - - engine = ChainSearchEngine(mock_registry, mock_mapper) - # Unicode query should not crash (may or may not find results depending on FTS5 tokenizer) - result = engine.search("hello", temp_dir) - - assert isinstance(result.results, list) - assert len(result.results) >= 1 - engine.close() - - def test_search_empty_directory(self, mock_registry, mock_mapper, temp_dir): - """Test search in directory with no files.""" - db_path = temp_dir / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - # Don't add any files - store.close() - - mock_mapper.source_to_index_db.return_value = db_path - - engine = ChainSearchEngine(mock_registry, mock_mapper) - result = engine.search("anything", temp_dir) - - assert result.results == [] - assert result.stats.files_matched == 0 - engine.close() - - -# === Include Symbols Option Tests === - -class TestIncludeSymbolsOption: - """Tests for include_symbols search option.""" - - def test_search_with_include_symbols(self, mock_registry, mock_mapper, temp_dir): - """Test search returns symbols when include_symbols=True.""" - db_path = temp_dir / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - store.add_file( - name="test.py", - full_path=str(temp_dir / "test.py"), - content="def my_function(): pass", - language="python", - symbols=[Symbol(name="my_function", kind="function", range=(1, 1))], - ) - store.close() - - mock_mapper.source_to_index_db.return_value = db_path - - engine = ChainSearchEngine(mock_registry, mock_mapper) - - # Without include_symbols - result_no_symbols = engine.search("function", temp_dir, SearchOptions(include_symbols=False)) - assert result_no_symbols.symbols == [] - - # With include_symbols - result_with_symbols = engine.search("function", temp_dir, SearchOptions(include_symbols=True)) - # Symbols list populated (may or may not match depending on implementation) - assert isinstance(result_with_symbols.symbols, list) - - engine.close() - - -# === ChainSearchResult Tests === - -class TestChainSearchResultExtended: - """Extended tests for ChainSearchResult dataclass.""" - - def test_result_immutability(self): - """Test ChainSearchResult fields.""" - stats = SearchStats(dirs_searched=5, files_matched=10, time_ms=100.5) - results = [SearchResult(path="/test.py", score=1.0, excerpt="test")] - symbols = [Symbol(name="foo", kind="function", range=(1, 5))] - - result = ChainSearchResult( - query="test query", - results=results, - symbols=symbols, - stats=stats, - ) - - assert result.query == "test query" - assert len(result.results) == 1 - assert len(result.symbols) == 1 - assert result.related_results == [] - assert result.stats.dirs_searched == 5 - - def test_result_with_empty_collections(self): - """Test ChainSearchResult with empty results and symbols.""" - result = ChainSearchResult( - query="no matches", - results=[], - symbols=[], - stats=SearchStats(), - ) - - assert result.query == "no matches" - assert result.results == [] - assert result.related_results == [] - assert result.symbols == [] - assert result.stats.dirs_searched == 0 diff --git a/codex-lens/tests/test_search_performance.py b/codex-lens/tests/test_search_performance.py deleted file mode 100644 index 5460efb5..00000000 --- a/codex-lens/tests/test_search_performance.py +++ /dev/null @@ -1,660 +0,0 @@ -"""Performance benchmarks for CodexLens search functionality. - -Measures: -- FTS5 search speed at various scales -- Chain search traversal performance -- Semantic search latency -- Memory usage during search operations -""" - -import gc -import sys -import tempfile -import time -from pathlib import Path -from typing import List, Tuple -from dataclasses import dataclass -from contextlib import contextmanager - -import pytest - -from codexlens.storage.sqlite_store import SQLiteStore -from codexlens.storage.dir_index import DirIndexStore -from codexlens.storage.registry import RegistryStore -from codexlens.storage.path_mapper import PathMapper -from codexlens.search import ChainSearchEngine, SearchOptions -from codexlens.entities import IndexedFile, Symbol - - -@dataclass -class BenchmarkResult: - """Benchmark result container.""" - name: str - iterations: int - total_time_ms: float - avg_time_ms: float - min_time_ms: float - max_time_ms: float - ops_per_sec: float - - def __str__(self): - return ( - f"{self.name}:\n" - f" Iterations: {self.iterations}\n" - f" Total: {self.total_time_ms:.2f}ms\n" - f" Avg: {self.avg_time_ms:.2f}ms\n" - f" Min: {self.min_time_ms:.2f}ms\n" - f" Max: {self.max_time_ms:.2f}ms\n" - f" Ops/sec: {self.ops_per_sec:.1f}" - ) - - -def benchmark(func, iterations=10, warmup=2): - """Run benchmark with warmup iterations.""" - # Warmup - for _ in range(warmup): - func() - - # Measure - times = [] - for _ in range(iterations): - gc.collect() - start = time.perf_counter() - func() - elapsed = (time.perf_counter() - start) * 1000 - times.append(elapsed) - - total = sum(times) - return BenchmarkResult( - name=func.__name__ if hasattr(func, '__name__') else 'benchmark', - iterations=iterations, - total_time_ms=total, - avg_time_ms=total / iterations, - min_time_ms=min(times), - max_time_ms=max(times), - ops_per_sec=1000 / (total / iterations) if total > 0 else 0 - ) - - -@contextmanager -def timer(name: str): - """Context manager for timing code blocks.""" - start = time.perf_counter() - yield - elapsed = (time.perf_counter() - start) * 1000 - print(f" {name}: {elapsed:.2f}ms") - - -# === Test Fixtures === - -@pytest.fixture(scope="module") -def temp_dir(): - """Create a temporary directory for all tests.""" - tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True) - yield Path(tmpdir.name) - # Explicit cleanup with error handling for Windows file locking - try: - tmpdir.cleanup() - except (PermissionError, OSError): - pass # Ignore Windows file locking errors - - -def generate_code_file(index: int, lines: int = 100) -> Tuple[IndexedFile, str]: - """Generate a synthetic code file for testing.""" - symbols = [ - Symbol(name=f"function_{index}_{i}", kind="function", range=(i*10+1, i*10+9)) - for i in range(lines // 10) - ] - - content_lines = [] - for i in range(lines): - if i % 10 == 0: - content_lines.append(f"def function_{index}_{i//10}(param_{i}, data_{i}):") - else: - content_lines.append(f" # Line {i}: processing data with param_{i % 5}") - content_lines.append(f" result_{i} = compute(data_{i})") - - return ( - IndexedFile( - path=f"/project/src/module_{index}/file_{index}.py", - language="python", - symbols=symbols, - ), - "\n".join(content_lines) - ) - - -@pytest.fixture(scope="module") -def small_store(temp_dir): - """Small store with 10 files (~100 lines each).""" - db_path = temp_dir / "small_index.db" - store = SQLiteStore(db_path) - store.initialize() - - for i in range(10): - indexed_file, content = generate_code_file(i, lines=100) - store.add_file(indexed_file, content) - - yield store - store.close() - - -@pytest.fixture(scope="module") -def medium_store(temp_dir): - """Medium store with 100 files (~100 lines each).""" - db_path = temp_dir / "medium_index.db" - store = SQLiteStore(db_path) - store.initialize() - - for i in range(100): - indexed_file, content = generate_code_file(i, lines=100) - store.add_file(indexed_file, content) - - yield store - store.close() - - -@pytest.fixture(scope="module") -def large_store(temp_dir): - """Large store with 500 files (~200 lines each).""" - db_path = temp_dir / "large_index.db" - store = SQLiteStore(db_path) - store.initialize() - - for i in range(500): - indexed_file, content = generate_code_file(i, lines=200) - store.add_file(indexed_file, content) - - yield store - store.close() - - -# === FTS5 Performance Tests === - -class TestFTS5Performance: - """FTS5 search performance benchmarks.""" - - def test_small_store_search(self, small_store): - """Benchmark FTS5 search on small store (10 files).""" - print("\n" + "="*60) - print("FTS5 SEARCH - SMALL STORE (10 files)") - print("="*60) - - queries = ["function", "data", "compute", "result", "param"] - - for query in queries: - result = benchmark( - lambda q=query: small_store.search_fts(q, limit=20), - iterations=50 - ) - result.name = f"search '{query}'" - print(f"\n{result}") - - def test_medium_store_search(self, medium_store): - """Benchmark FTS5 search on medium store (100 files).""" - print("\n" + "="*60) - print("FTS5 SEARCH - MEDIUM STORE (100 files)") - print("="*60) - - queries = ["function", "data", "compute", "result", "param"] - - for query in queries: - result = benchmark( - lambda q=query: medium_store.search_fts(q, limit=20), - iterations=30 - ) - result.name = f"search '{query}'" - print(f"\n{result}") - - def test_large_store_search(self, large_store): - """Benchmark FTS5 search on large store (500 files).""" - print("\n" + "="*60) - print("FTS5 SEARCH - LARGE STORE (500 files)") - print("="*60) - - queries = ["function", "data", "compute", "result", "param"] - - for query in queries: - result = benchmark( - lambda q=query: large_store.search_fts(q, limit=20), - iterations=20 - ) - result.name = f"search '{query}'" - print(f"\n{result}") - - def test_search_limit_scaling(self, medium_store): - """Test how search time scales with result limit.""" - print("\n" + "="*60) - print("FTS5 SEARCH - LIMIT SCALING") - print("="*60) - - limits = [5, 10, 20, 50, 100, 200] - - for limit in limits: - result = benchmark( - lambda l=limit: medium_store.search_fts("function", limit=l), - iterations=20 - ) - result.name = f"limit={limit}" - print(f"\n{result}") - - def test_complex_query_performance(self, medium_store): - """Test performance of complex FTS5 queries.""" - print("\n" + "="*60) - print("FTS5 SEARCH - COMPLEX QUERIES") - print("="*60) - - queries = [ - ("single term", "function"), - ("two terms", "function data"), - ("phrase", '"def function"'), - ("OR query", "function OR result"), - ("wildcard", "func*"), - ("NOT query", "function NOT data"), - ] - - for name, query in queries: - result = benchmark( - lambda q=query: medium_store.search_fts(q, limit=20), - iterations=20 - ) - result.name = name - print(f"\n{result}") - - -class TestSymbolSearchPerformance: - """Symbol search performance benchmarks.""" - - def test_symbol_search_scaling(self, small_store, medium_store, large_store): - """Test symbol search performance at different scales.""" - print("\n" + "="*60) - print("SYMBOL SEARCH - SCALING") - print("="*60) - - stores = [ - ("small (10 files)", small_store), - ("medium (100 files)", medium_store), - ("large (500 files)", large_store), - ] - - for name, store in stores: - result = benchmark( - lambda s=store: s.search_symbols("function", limit=50), - iterations=20 - ) - result.name = name - print(f"\n{result}") - - def test_symbol_search_with_kind_filter(self, medium_store): - """Test symbol search with kind filtering.""" - print("\n" + "="*60) - print("SYMBOL SEARCH - KIND FILTER") - print("="*60) - - # Without filter - result_no_filter = benchmark( - lambda: medium_store.search_symbols("function", limit=50), - iterations=20 - ) - result_no_filter.name = "no filter" - print(f"\n{result_no_filter}") - - # With filter - result_with_filter = benchmark( - lambda: medium_store.search_symbols("function", kind="function", limit=50), - iterations=20 - ) - result_with_filter.name = "kind=function" - print(f"\n{result_with_filter}") - - -# === Chain Search Performance Tests === - -class TestChainSearchPerformance: - """Chain search engine performance benchmarks.""" - - @pytest.fixture - def chain_engine_setup(self, temp_dir): - """Setup chain search engine with directory hierarchy.""" - # Create directory hierarchy - root = temp_dir / "project" - root.mkdir(exist_ok=True) - - registry = RegistryStore(temp_dir / "registry.db") - registry.initialize() - mapper = PathMapper(temp_dir / "indexes") - - # Create indexes at different depths - dirs = [ - root, - root / "src", - root / "src" / "core", - root / "src" / "utils", - root / "tests", - ] - - for i, dir_path in enumerate(dirs): - dir_path.mkdir(exist_ok=True) - index_path = mapper.source_to_index_db(dir_path) - index_path.parent.mkdir(parents=True, exist_ok=True) - - store = DirIndexStore(index_path) - store.initialize() - for j in range(20): # 20 files per directory - indexed_file, content = generate_code_file(i * 100 + j, lines=50) - file_path = str(dir_path / f"file_{j}.py") - store.add_file( - name=f"file_{j}.py", - full_path=file_path, - content=content, - language="python", - symbols=indexed_file.symbols, - ) - store.close() - - # Register directory - project = registry.register_project(root, mapper.source_to_index_dir(root)) - registry.register_dir(project.id, dir_path, index_path, i, 20) - - engine = ChainSearchEngine(registry, mapper) - - yield { - "engine": engine, - "registry": registry, - "root": root, - } - - registry.close() - - def test_chain_search_depth(self, chain_engine_setup): - """Test chain search at different depths.""" - print("\n" + "="*60) - print("CHAIN SEARCH - DEPTH VARIATION") - print("="*60) - - engine = chain_engine_setup["engine"] - root = chain_engine_setup["root"] - - depths = [0, 1, 2, -1] # -1 = unlimited - - for depth in depths: - options = SearchOptions(depth=depth, max_workers=4, total_limit=50) - result = benchmark( - lambda d=depth, o=options: engine.search("function", root, o), - iterations=10 - ) - result.name = f"depth={depth}" - print(f"\n{result}") - - def test_chain_search_parallelism(self, chain_engine_setup): - """Test chain search with different worker counts.""" - print("\n" + "="*60) - print("CHAIN SEARCH - PARALLELISM") - print("="*60) - - engine = chain_engine_setup["engine"] - root = chain_engine_setup["root"] - - worker_counts = [1, 2, 4, 8] - - for workers in worker_counts: - options = SearchOptions(depth=-1, max_workers=workers, total_limit=50) - result = benchmark( - lambda w=workers, o=options: engine.search("function", root, o), - iterations=10 - ) - result.name = f"workers={workers}" - print(f"\n{result}") - - -# === Semantic Search Performance Tests === - -class TestSemanticSearchPerformance: - """Semantic search performance benchmarks.""" - - @pytest.fixture - def semantic_setup(self, temp_dir): - """Setup semantic search with embeddings.""" - try: - from codexlens.semantic import SEMANTIC_AVAILABLE - if not SEMANTIC_AVAILABLE: - pytest.skip("Semantic search dependencies not installed") - - from codexlens.semantic.embedder import Embedder - from codexlens.semantic.vector_store import VectorStore - from codexlens.entities import SemanticChunk - - embedder = Embedder() - db_path = temp_dir / "semantic.db" - vector_store = VectorStore(db_path) - - # Add test chunks - code_samples = [ - "def authenticate_user(username, password): verify user credentials", - "class DatabaseConnection: manage database connections with pooling", - "async def fetch_api_data(url): make HTTP request and return JSON", - "function renderComponent(props): render React UI component", - "def process_data(input): transform and validate input data", - ] * 50 # 250 chunks - - for i, content in enumerate(code_samples): - chunk = SemanticChunk( - content=content, - metadata={"index": i, "language": "python"} - ) - chunk.embedding = embedder.embed_single(content) - vector_store.add_chunk(chunk, f"/test/file_{i}.py") - - yield { - "embedder": embedder, - "vector_store": vector_store, - } - - # Clean up vector store cache - vector_store.clear_cache() - - except ImportError: - pytest.skip("Semantic search dependencies not installed") - - def test_embedding_generation_speed(self, semantic_setup): - """Benchmark embedding generation speed.""" - print("\n" + "="*60) - print("SEMANTIC SEARCH - EMBEDDING GENERATION") - print("="*60) - - embedder = semantic_setup["embedder"] - - # Single embedding - result = benchmark( - lambda: embedder.embed_single("def example_function(): return 42"), - iterations=50 - ) - result.name = "single embedding" - print(f"\n{result}") - - # Batch embedding - texts = ["def func{}(): return {}".format(i, i) for i in range(10)] - result = benchmark( - lambda: embedder.embed(texts), - iterations=20 - ) - result.name = "batch embedding (10 texts)" - print(f"\n{result}") - - def test_vector_search_speed(self, semantic_setup): - """Benchmark vector similarity search speed.""" - print("\n" + "="*60) - print("SEMANTIC SEARCH - VECTOR SEARCH") - print("="*60) - - embedder = semantic_setup["embedder"] - vector_store = semantic_setup["vector_store"] - - query_embedding = embedder.embed_single("user authentication login") - - # Different top_k values - for top_k in [5, 10, 20, 50]: - result = benchmark( - lambda k=top_k: vector_store.search_similar(query_embedding, top_k=k), - iterations=30 - ) - result.name = f"top_k={top_k}" - print(f"\n{result}") - - def test_full_semantic_search_latency(self, semantic_setup): - """Benchmark full semantic search (embed + search).""" - print("\n" + "="*60) - print("SEMANTIC SEARCH - FULL LATENCY") - print("="*60) - - embedder = semantic_setup["embedder"] - vector_store = semantic_setup["vector_store"] - - queries = [ - "user authentication", - "database connection", - "API request handler", - "React component", - "data processing", - ] - - for query in queries: - def full_search(q=query): - embedding = embedder.embed_single(q) - return vector_store.search_similar(embedding, top_k=10) - - result = benchmark(full_search, iterations=20) - result.name = f"'{query}'" - print(f"\n{result}") - - -# === Comparative Benchmarks === - -class TestComparativeBenchmarks: - """Compare FTS5 vs Semantic search performance.""" - - @pytest.fixture - def comparison_setup(self, temp_dir): - """Setup both FTS5 and semantic stores with same content.""" - # FTS5 store - fts_store = SQLiteStore(temp_dir / "fts_compare.db") - fts_store.initialize() - - code_samples = [ - ("auth.py", "def authenticate_user(username, password): verify credentials"), - ("db.py", "class DatabasePool: manage database connection pooling"), - ("api.py", "async def handle_request(req): process API request"), - ("ui.py", "function Button({ onClick }): render button component"), - ("utils.py", "def process_data(input): transform and validate data"), - ] * 20 - - for i, (filename, content) in enumerate(code_samples): - indexed_file = IndexedFile( - path=f"/project/{filename.replace('.py', '')}_{i}.py", - language="python", - symbols=[Symbol(name=f"func_{i}", kind="function", range=(1, 5))], - ) - fts_store.add_file(indexed_file, content) - - # Semantic store (if available) - try: - from codexlens.semantic import SEMANTIC_AVAILABLE - if SEMANTIC_AVAILABLE: - from codexlens.semantic.embedder import Embedder - from codexlens.semantic.vector_store import VectorStore - from codexlens.entities import SemanticChunk - - embedder = Embedder() - semantic_store = VectorStore(temp_dir / "semantic_compare.db") - - for i, (filename, content) in enumerate(code_samples): - chunk = SemanticChunk(content=content, metadata={"index": i}) - chunk.embedding = embedder.embed_single(content) - semantic_store.add_chunk(chunk, f"/project/{filename}") - - yield { - "fts_store": fts_store, - "semantic_store": semantic_store, - "embedder": embedder, - "has_semantic": True, - } - # Close semantic store connection - semantic_store.clear_cache() - else: - yield {"fts_store": fts_store, "has_semantic": False} - except ImportError: - yield {"fts_store": fts_store, "has_semantic": False} - - fts_store.close() - - def test_fts_vs_semantic_latency(self, comparison_setup): - """Compare FTS5 vs Semantic search latency.""" - print("\n" + "="*60) - print("FTS5 vs SEMANTIC - LATENCY COMPARISON") - print("="*60) - - fts_store = comparison_setup["fts_store"] - - queries = [ - "authenticate", - "database", - "request", - "button", - "process", - ] - - print("\nFTS5 Search:") - for query in queries: - result = benchmark( - lambda q=query: fts_store.search_fts(q, limit=10), - iterations=30 - ) - result.name = f"'{query}'" - print(f" {result.name}: avg={result.avg_time_ms:.2f}ms") - - if comparison_setup.get("has_semantic"): - semantic_store = comparison_setup["semantic_store"] - embedder = comparison_setup["embedder"] - - print("\nSemantic Search (embed + search):") - for query in queries: - def semantic_search(q=query): - emb = embedder.embed_single(q) - return semantic_store.search_similar(emb, top_k=10) - - result = benchmark(semantic_search, iterations=20) - result.name = f"'{query}'" - print(f" {result.name}: avg={result.avg_time_ms:.2f}ms") - else: - print("\n(Semantic search not available)") - - -# === Memory Usage Tests === - -class TestMemoryUsage: - """Memory usage during search operations.""" - - def test_search_memory_footprint(self, medium_store): - """Measure memory footprint during search.""" - print("\n" + "="*60) - print("MEMORY USAGE - SEARCH OPERATIONS") - print("="*60) - - import tracemalloc - - tracemalloc.start() - - # Run multiple searches - for _ in range(100): - medium_store.search_fts("function", limit=20) - - current, peak = tracemalloc.get_traced_memory() - tracemalloc.stop() - - print(f"\nAfter 100 FTS5 searches:") - print(f" Current memory: {current / 1024 / 1024:.2f} MB") - print(f" Peak memory: {peak / 1024 / 1024:.2f} MB") - - -if __name__ == "__main__": - pytest.main([__file__, "-v", "-s", "--tb=short"]) diff --git a/codex-lens/tests/test_semantic.py b/codex-lens/tests/test_semantic.py deleted file mode 100644 index 3470dbd4..00000000 --- a/codex-lens/tests/test_semantic.py +++ /dev/null @@ -1,290 +0,0 @@ -"""Tests for CodexLens semantic module.""" - -import pytest - -from codexlens.entities import SemanticChunk, Symbol -from codexlens.semantic.chunker import ChunkConfig, Chunker - - -class TestChunkConfig: - """Tests for ChunkConfig.""" - - def test_default_config(self): - """Test default configuration values.""" - config = ChunkConfig() - assert config.max_chunk_size == 1000 - assert config.overlap == 200 - assert config.min_chunk_size == 50 - - def test_custom_config(self): - """Test custom configuration.""" - config = ChunkConfig(max_chunk_size=2000, overlap=200, min_chunk_size=100) - assert config.max_chunk_size == 2000 - assert config.overlap == 200 - assert config.min_chunk_size == 100 - - -class TestChunker: - """Tests for Chunker class.""" - - def test_chunker_default_config(self): - """Test chunker with default config.""" - chunker = Chunker() - assert chunker.config.max_chunk_size == 1000 - - def test_chunker_custom_config(self): - """Test chunker with custom config.""" - config = ChunkConfig(max_chunk_size=500) - chunker = Chunker(config=config) - assert chunker.config.max_chunk_size == 500 - - -class TestChunkBySymbol: - """Tests for symbol-based chunking.""" - - def test_chunk_single_function(self): - """Test chunking a single function.""" - # Use config with smaller min_chunk_size - config = ChunkConfig(min_chunk_size=10) - chunker = Chunker(config=config) - content = "def hello():\n print('hello')\n return True\n" - symbols = [Symbol(name="hello", kind="function", range=(1, 3))] - - chunks = chunker.chunk_by_symbol(content, symbols, "test.py", "python") - - assert len(chunks) == 1 - assert "def hello():" in chunks[0].content - assert chunks[0].metadata["symbol_name"] == "hello" - assert chunks[0].metadata["symbol_kind"] == "function" - assert chunks[0].metadata["file"] == "test.py" - assert chunks[0].metadata["language"] == "python" - assert chunks[0].metadata["strategy"] == "symbol" - - def test_chunk_multiple_symbols(self): - """Test chunking multiple symbols.""" - # Use config with smaller min_chunk_size - config = ChunkConfig(min_chunk_size=5) - chunker = Chunker(config=config) - content = """def foo(): - pass - -def bar(): - pass - -class MyClass: - pass -""" - symbols = [ - Symbol(name="foo", kind="function", range=(1, 2)), - Symbol(name="bar", kind="function", range=(4, 5)), - Symbol(name="MyClass", kind="class", range=(7, 8)), - ] - - chunks = chunker.chunk_by_symbol(content, symbols, "test.py", "python") - - assert len(chunks) == 3 - names = [c.metadata["symbol_name"] for c in chunks] - assert "foo" in names - assert "bar" in names - assert "MyClass" in names - - def test_chunk_skips_small_content(self): - """Test that chunks smaller than min_chunk_size are skipped.""" - config = ChunkConfig(min_chunk_size=100) - chunker = Chunker(config=config) - content = "def x():\n pass\n" - symbols = [Symbol(name="x", kind="function", range=(1, 2))] - - chunks = chunker.chunk_by_symbol(content, symbols, "test.py", "python") - - assert len(chunks) == 0 # Content is too small - - def test_chunk_preserves_line_numbers(self): - """Test that chunks preserve correct line numbers.""" - config = ChunkConfig(min_chunk_size=5) - chunker = Chunker(config=config) - content = "# comment\ndef hello():\n pass\n" - symbols = [Symbol(name="hello", kind="function", range=(2, 3))] - - chunks = chunker.chunk_by_symbol(content, symbols, "test.py", "python") - - assert len(chunks) == 1 - assert chunks[0].metadata["start_line"] == 2 - assert chunks[0].metadata["end_line"] == 3 - - def test_chunk_handles_empty_symbols(self): - """Test chunking with empty symbols list.""" - chunker = Chunker() - content = "# just a comment" - symbols = [] - - chunks = chunker.chunk_by_symbol(content, symbols, "test.py", "python") - - assert len(chunks) == 0 - - -class TestChunkSlidingWindow: - """Tests for sliding window chunking.""" - - def test_sliding_window_basic(self): - """Test basic sliding window chunking.""" - config = ChunkConfig(max_chunk_size=100, overlap=20, min_chunk_size=10) - chunker = Chunker(config=config) - - # Create content with multiple lines - lines = [f"line {i} content here\n" for i in range(20)] - content = "".join(lines) - - chunks = chunker.chunk_sliding_window(content, "test.py", "python") - - assert len(chunks) > 0 - for chunk in chunks: - assert chunk.metadata["strategy"] == "sliding_window" - assert chunk.metadata["file"] == "test.py" - assert chunk.metadata["language"] == "python" - - def test_sliding_window_empty_content(self): - """Test sliding window with empty content.""" - chunker = Chunker() - chunks = chunker.chunk_sliding_window("", "test.py", "python") - assert len(chunks) == 0 - - def test_sliding_window_small_content(self): - """Test sliding window with content smaller than chunk size.""" - config = ChunkConfig(max_chunk_size=1000, min_chunk_size=10) - chunker = Chunker(config=config) - content = "small content here" - - chunks = chunker.chunk_sliding_window(content, "test.py", "python") - - # Small content should produce one chunk - assert len(chunks) <= 1 - - def test_sliding_window_chunk_indices(self): - """Test that chunk indices are sequential.""" - config = ChunkConfig(max_chunk_size=50, overlap=10, min_chunk_size=5) - chunker = Chunker(config=config) - lines = [f"line {i}\n" for i in range(50)] - content = "".join(lines) - - chunks = chunker.chunk_sliding_window(content, "test.py", "python") - - if len(chunks) > 1: - indices = [c.metadata["chunk_index"] for c in chunks] - assert indices == list(range(len(chunks))) - - -class TestChunkFile: - """Tests for chunk_file method.""" - - def test_chunk_file_with_symbols(self): - """Test chunk_file uses symbol-based chunking when symbols available.""" - chunker = Chunker() - content = "def hello():\n print('world')\n return 42\n" - symbols = [Symbol(name="hello", kind="function", range=(1, 3))] - - chunks = chunker.chunk_file(content, symbols, "test.py", "python") - - assert all(c.metadata["strategy"] == "symbol" for c in chunks) - - def test_chunk_file_without_symbols(self): - """Test chunk_file uses sliding window when no symbols.""" - config = ChunkConfig(min_chunk_size=5) - chunker = Chunker(config=config) - content = "# just comments\n# more comments\n# even more\n" - - chunks = chunker.chunk_file(content, [], "test.py", "python") - - # Should use sliding window strategy - if len(chunks) > 0: - assert all(c.metadata["strategy"] == "sliding_window" for c in chunks) - - -class TestChunkMetadata: - """Tests for chunk metadata.""" - - def test_symbol_chunk_metadata_complete(self): - """Test that symbol chunks have complete metadata.""" - config = ChunkConfig(min_chunk_size=10) - chunker = Chunker(config=config) - content = "class MyClass:\n def method(self):\n pass\n" - symbols = [Symbol(name="MyClass", kind="class", range=(1, 3))] - - chunks = chunker.chunk_by_symbol(content, symbols, "/path/to/file.py", "python") - - assert len(chunks) == 1 - meta = chunks[0].metadata - assert meta["file"] == "/path/to/file.py" - assert meta["language"] == "python" - assert meta["symbol_name"] == "MyClass" - assert meta["symbol_kind"] == "class" - assert meta["start_line"] == 1 - assert meta["end_line"] == 3 - assert meta["strategy"] == "symbol" - - def test_sliding_window_metadata_complete(self): - """Test that sliding window chunks have complete metadata.""" - config = ChunkConfig(min_chunk_size=5) - chunker = Chunker(config=config) - content = "some content here\nmore content\n" - - chunks = chunker.chunk_sliding_window(content, "/path/file.js", "javascript") - - if len(chunks) > 0: - meta = chunks[0].metadata - assert meta["file"] == "/path/file.js" - assert meta["language"] == "javascript" - assert "chunk_index" in meta - assert "start_line" in meta - assert "end_line" in meta - assert meta["strategy"] == "sliding_window" - - -class TestChunkEdgeCases: - """Edge case tests for chunking.""" - - def test_chunk_with_unicode(self): - """Test chunking content with unicode characters.""" - config = ChunkConfig(min_chunk_size=10) - chunker = Chunker(config=config) - content = "def 你好():\n print('世界')\n return '🎉'\n" - symbols = [Symbol(name="你好", kind="function", range=(1, 3))] - - chunks = chunker.chunk_by_symbol(content, symbols, "test.py", "python") - - assert len(chunks) == 1 - assert "你好" in chunks[0].content - - def test_chunk_with_windows_line_endings(self): - """Test chunking with Windows-style line endings.""" - chunker = Chunker() - content = "def hello():\r\n pass\r\n" - symbols = [Symbol(name="hello", kind="function", range=(1, 2))] - - chunks = chunker.chunk_by_symbol(content, symbols, "test.py", "python") - - # Should handle without errors - assert len(chunks) <= 1 - - def test_chunk_range_out_of_bounds(self): - """Test chunking when symbol range exceeds content.""" - chunker = Chunker() - content = "def hello():\n pass\n" - # Symbol range goes beyond content - symbols = [Symbol(name="hello", kind="function", range=(1, 100))] - - # Should not crash, just handle gracefully - chunks = chunker.chunk_by_symbol(content, symbols, "test.py", "python") - assert len(chunks) <= 1 - - def test_chunk_content_returned_as_semantic_chunk(self): - """Test that returned chunks are SemanticChunk instances.""" - chunker = Chunker() - content = "def test():\n return True\n" - symbols = [Symbol(name="test", kind="function", range=(1, 2))] - - chunks = chunker.chunk_by_symbol(content, symbols, "test.py", "python") - - for chunk in chunks: - assert isinstance(chunk, SemanticChunk) - assert chunk.embedding is None # Not embedded yet diff --git a/codex-lens/tests/test_semantic_search.py b/codex-lens/tests/test_semantic_search.py deleted file mode 100644 index 2bb781c7..00000000 --- a/codex-lens/tests/test_semantic_search.py +++ /dev/null @@ -1,804 +0,0 @@ -"""Comprehensive tests for semantic search functionality. - -Tests embedding generation, vector storage, and semantic similarity search -across complex codebases with various file types and content patterns. -""" - -import json -import os -import shutil -import tempfile -import time -from pathlib import Path -from typing import List, Dict, Any - -import pytest - -from codexlens.entities import SemanticChunk, Symbol -from codexlens.semantic import SEMANTIC_AVAILABLE, SEMANTIC_BACKEND, check_semantic_available - -# Skip all tests if semantic search not available -pytestmark = pytest.mark.skipif( - not SEMANTIC_AVAILABLE, - reason="Semantic search dependencies not installed" -) - - -class TestEmbedderPerformance: - """Test Embedder performance and quality.""" - - @pytest.fixture - def embedder(self): - """Create embedder instance.""" - from codexlens.semantic.embedder import Embedder - return Embedder() - - def test_single_embedding(self, embedder): - """Test single text embedding.""" - text = "def calculate_sum(a, b): return a + b" - - start = time.time() - embedding = embedder.embed_single(text) - elapsed = time.time() - start - - assert len(embedding) == 384, "Embedding dimension should be 384" - assert all(isinstance(x, float) for x in embedding) - print(f"\nSingle embedding time: {elapsed*1000:.2f}ms") - - def test_batch_embedding_performance(self, embedder): - """Test batch embedding performance.""" - texts = [ - "def hello(): print('world')", - "class Calculator: def add(self, a, b): return a + b", - "async def fetch_data(url): return await client.get(url)", - "const processData = (data) => data.map(x => x * 2)", - "function initializeApp() { console.log('Starting...'); }", - ] * 10 # 50 texts total - - start = time.time() - embeddings = embedder.embed(texts) - elapsed = time.time() - start - - assert len(embeddings) == len(texts) - print(f"\nBatch embedding ({len(texts)} texts): {elapsed*1000:.2f}ms") - print(f"Per-text average: {elapsed/len(texts)*1000:.2f}ms") - - def test_embedding_similarity(self, embedder): - """Test that similar code has similar embeddings.""" - from codexlens.semantic.vector_store import _cosine_similarity - - # Similar functions (should have high similarity) - code1 = "def add(a, b): return a + b" - code2 = "def sum_numbers(x, y): return x + y" - - # Different function (should have lower similarity) - code3 = "class UserAuthentication: def login(self, user, password): pass" - - emb1 = embedder.embed_single(code1) - emb2 = embedder.embed_single(code2) - emb3 = embedder.embed_single(code3) - - sim_12 = _cosine_similarity(emb1, emb2) - sim_13 = _cosine_similarity(emb1, emb3) - - print(f"\nSimilarity (add vs sum_numbers): {sim_12:.4f}") - print(f"Similarity (add vs login): {sim_13:.4f}") - - assert sim_12 > sim_13, "Similar code should have higher similarity" - assert sim_12 > 0.6, "Similar functions should have >0.6 similarity" - - -class TestVectorStore: - """Test VectorStore functionality.""" - - @pytest.fixture - def temp_db(self, tmp_path): - """Create temporary database.""" - return tmp_path / "semantic.db" - - @pytest.fixture - def vector_store(self, temp_db): - """Create vector store instance.""" - from codexlens.semantic.vector_store import VectorStore - return VectorStore(temp_db) - - @pytest.fixture - def embedder(self): - """Create embedder instance.""" - from codexlens.semantic.embedder import Embedder - return Embedder() - - def test_add_and_search_chunks(self, vector_store, embedder): - """Test adding chunks and searching.""" - # Create test chunks with embeddings - chunks = [ - SemanticChunk( - content="def calculate_sum(a, b): return a + b", - metadata={"symbol": "calculate_sum", "language": "python"} - ), - SemanticChunk( - content="class UserManager: def create_user(self): pass", - metadata={"symbol": "UserManager", "language": "python"} - ), - SemanticChunk( - content="async function fetchData(url) { return await fetch(url); }", - metadata={"symbol": "fetchData", "language": "javascript"} - ), - ] - - # Add embeddings - for chunk in chunks: - chunk.embedding = embedder.embed_single(chunk.content) - vector_store.add_chunk(chunk, "/test/file.py") - - # Search for similar code - query = "function to add two numbers together" - query_embedding = embedder.embed_single(query) - - results = vector_store.search_similar(query_embedding, top_k=3) - - assert len(results) > 0, "Should find results" - assert "calculate_sum" in results[0].excerpt or "sum" in results[0].excerpt.lower() - - print(f"\nQuery: '{query}'") - for i, r in enumerate(results): - print(f" {i+1}. Score: {r.score:.4f} - {r.excerpt[:50]}...") - - def test_min_score_filtering(self, vector_store, embedder): - """Test minimum score filtering.""" - # Add a chunk - chunk = SemanticChunk( - content="def hello_world(): print('Hello, World!')", - metadata={} - ) - chunk.embedding = embedder.embed_single(chunk.content) - vector_store.add_chunk(chunk, "/test/hello.py") - - # Search with unrelated query - query = "database connection pool management" - query_embedding = embedder.embed_single(query) - - # Low threshold - should find result - results_low = vector_store.search_similar(query_embedding, min_score=0.0) - - # High threshold - might filter out - results_high = vector_store.search_similar(query_embedding, min_score=0.8) - - print(f"\nResults with min_score=0.0: {len(results_low)}") - print(f"Results with min_score=0.8: {len(results_high)}") - - assert len(results_low) >= len(results_high) - - -class TestSemanticSearchIntegration: - """Integration tests for semantic search on real-like codebases.""" - - @pytest.fixture - def complex_codebase(self, tmp_path): - """Create a complex test codebase.""" - # Python files - (tmp_path / "src").mkdir() - (tmp_path / "src" / "auth.py").write_text(''' -"""Authentication module.""" - -class AuthenticationService: - """Handle user authentication and authorization.""" - - def __init__(self, secret_key: str): - self.secret_key = secret_key - self.token_expiry = 3600 - - def login(self, username: str, password: str) -> dict: - """Authenticate user and return JWT token.""" - user = self._validate_credentials(username, password) - if user: - return self._generate_token(user) - raise AuthError("Invalid credentials") - - def logout(self, token: str) -> bool: - """Invalidate user session.""" - return self._revoke_token(token) - - def verify_token(self, token: str) -> dict: - """Verify JWT token and return user claims.""" - pass - -def hash_password(password: str) -> str: - """Hash password using bcrypt.""" - import hashlib - return hashlib.sha256(password.encode()).hexdigest() -''') - - (tmp_path / "src" / "database.py").write_text(''' -"""Database connection and ORM.""" - -from typing import List, Optional - -class DatabaseConnection: - """Manage database connections with pooling.""" - - def __init__(self, connection_string: str, pool_size: int = 5): - self.connection_string = connection_string - self.pool_size = pool_size - self._pool = [] - - def connect(self) -> "Connection": - """Get connection from pool.""" - if self._pool: - return self._pool.pop() - return self._create_connection() - - def release(self, conn: "Connection"): - """Return connection to pool.""" - if len(self._pool) < self.pool_size: - self._pool.append(conn) - -class QueryBuilder: - """SQL query builder with fluent interface.""" - - def select(self, *columns) -> "QueryBuilder": - pass - - def where(self, condition: str) -> "QueryBuilder": - pass - - def execute(self) -> List[dict]: - pass -''') - - (tmp_path / "src" / "api.py").write_text(''' -"""REST API endpoints.""" - -from typing import List, Dict, Any - -class APIRouter: - """Route HTTP requests to handlers.""" - - def __init__(self): - self.routes = {} - - def get(self, path: str): - """Register GET endpoint.""" - def decorator(func): - self.routes[("GET", path)] = func - return func - return decorator - - def post(self, path: str): - """Register POST endpoint.""" - def decorator(func): - self.routes[("POST", path)] = func - return func - return decorator - -async def handle_request(method: str, path: str, body: Dict) -> Dict: - """Process incoming HTTP request.""" - pass - -def validate_json_schema(data: Dict, schema: Dict) -> bool: - """Validate request data against JSON schema.""" - pass -''') - - # JavaScript files - (tmp_path / "frontend").mkdir() - (tmp_path / "frontend" / "components.js").write_text(''' -/** - * React UI Components - */ - -class UserProfile extends Component { - constructor(props) { - super(props); - this.state = { user: null, loading: true }; - } - - async componentDidMount() { - const user = await fetchUserData(this.props.userId); - this.setState({ user, loading: false }); - } - - render() { - if (this.state.loading) return ; - return ; - } -} - -function Button({ onClick, children, variant = "primary" }) { - return ( - - ); -} - -const FormInput = ({ label, value, onChange, type = "text" }) => { - return ( -

- ); -}; -''') - - (tmp_path / "frontend" / "api.js").write_text(''' -/** - * API Client for backend communication - */ - -const API_BASE = "/api/v1"; - -async function fetchUserData(userId) { - const response = await fetch(`${API_BASE}/users/${userId}`); - if (!response.ok) throw new Error("Failed to fetch user"); - return response.json(); -} - -async function createUser(userData) { - const response = await fetch(`${API_BASE}/users`, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify(userData) - }); - return response.json(); -} - -async function updateUserProfile(userId, updates) { - const response = await fetch(`${API_BASE}/users/${userId}`, { - method: "PATCH", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify(updates) - }); - return response.json(); -} - -class WebSocketClient { - constructor(url) { - this.url = url; - this.ws = null; - this.handlers = {}; - } - - connect() { - this.ws = new WebSocket(this.url); - this.ws.onmessage = (event) => this._handleMessage(event); - } - - on(eventType, handler) { - this.handlers[eventType] = handler; - } -} -''') - - return tmp_path - - @pytest.fixture - def indexed_codebase(self, complex_codebase, tmp_path): - """Index the complex codebase with semantic embeddings.""" - from codexlens.semantic.embedder import Embedder - from codexlens.semantic.vector_store import VectorStore - from codexlens.semantic.chunker import Chunker, ChunkConfig - from codexlens.parsers.factory import ParserFactory - from codexlens.config import Config - - db_path = tmp_path / "semantic.db" - vector_store = VectorStore(db_path) - embedder = Embedder() - config = Config() - factory = ParserFactory(config) - chunker = Chunker(ChunkConfig(min_chunk_size=20, max_chunk_size=500)) - - # Index all source files - indexed_files = [] - for ext in ["*.py", "*.js"]: - for file_path in complex_codebase.rglob(ext): - content = file_path.read_text() - language = "python" if file_path.suffix == ".py" else "javascript" - - # Parse symbols - parser = factory.get_parser(language) - indexed_file = parser.parse(content, file_path) - - # Create chunks - chunks = chunker.chunk_file( - content, - indexed_file.symbols, - str(file_path), - language - ) - - # Add embeddings and store - for chunk in chunks: - chunk.embedding = embedder.embed_single(chunk.content) - vector_store.add_chunk(chunk, str(file_path)) - - indexed_files.append(str(file_path)) - - return { - "vector_store": vector_store, - "embedder": embedder, - "files": indexed_files, - "codebase_path": complex_codebase - } - - def test_semantic_search_accuracy(self, indexed_codebase): - """Test semantic search accuracy on complex queries.""" - vector_store = indexed_codebase["vector_store"] - embedder = indexed_codebase["embedder"] - - test_queries = [ - { - "query": "user authentication login function", - "expected_contains": ["login", "auth", "credential"], - "expected_not_contains": ["database", "button"] - }, - { - "query": "database connection pooling", - "expected_contains": ["connect", "pool", "database"], - "expected_not_contains": ["login", "button"] - }, - { - "query": "React component for user profile", - "expected_contains": ["UserProfile", "component", "render"], - "expected_not_contains": ["database", "auth"] - }, - { - "query": "HTTP API endpoint handler", - "expected_contains": ["API", "request", "handle"], - "expected_not_contains": ["UserProfile", "button"] - }, - { - "query": "form input UI element", - "expected_contains": ["input", "form", "label"], - "expected_not_contains": ["database", "auth"] - } - ] - - print("\n" + "="*60) - print("SEMANTIC SEARCH ACCURACY TEST") - print("="*60) - - for test in test_queries: - query = test["query"] - query_embedding = embedder.embed_single(query) - - results = vector_store.search_similar(query_embedding, top_k=5, min_score=0.3) - - print(f"\nQuery: '{query}'") - print("-" * 40) - - # Check results - all_excerpts = " ".join([r.excerpt.lower() for r in results]) - - found_expected = [] - for expected in test["expected_contains"]: - if expected.lower() in all_excerpts: - found_expected.append(expected) - - found_unexpected = [] - for unexpected in test["expected_not_contains"]: - if unexpected.lower() in all_excerpts: - found_unexpected.append(unexpected) - - for i, r in enumerate(results[:3]): - print(f" {i+1}. Score: {r.score:.4f}") - print(f" File: {Path(r.path).name}") - print(f" Excerpt: {r.excerpt[:80]}...") - - print(f"\n [OK] Found expected: {found_expected}") - if found_unexpected: - print(f" [WARN] Found unexpected: {found_unexpected}") - - def test_search_performance(self, indexed_codebase): - """Test search performance with various parameters.""" - vector_store = indexed_codebase["vector_store"] - embedder = indexed_codebase["embedder"] - - query = "function to handle user data" - query_embedding = embedder.embed_single(query) - - print("\n" + "="*60) - print("SEARCH PERFORMANCE TEST") - print("="*60) - - # Test different top_k values - for top_k in [5, 10, 20, 50]: - start = time.time() - results = vector_store.search_similar(query_embedding, top_k=top_k) - elapsed = time.time() - start - - print(f"top_k={top_k}: {elapsed*1000:.2f}ms ({len(results)} results)") - - # Test different min_score values - print("\nMin score filtering:") - for min_score in [0.0, 0.3, 0.5, 0.7]: - start = time.time() - results = vector_store.search_similar(query_embedding, top_k=50, min_score=min_score) - elapsed = time.time() - start - - print(f"min_score={min_score}: {elapsed*1000:.2f}ms ({len(results)} results)") - - -class TestChunkerOptimization: - """Test chunker parameters for optimal semantic search.""" - - @pytest.fixture - def sample_code(self): - """Long Python file for chunking tests.""" - return ''' -"""Large module with multiple classes and functions.""" - -import os -import sys -from typing import List, Dict, Any, Optional - -# Constants -MAX_RETRIES = 3 -DEFAULT_TIMEOUT = 30 - -class ConfigManager: - """Manage application configuration.""" - - def __init__(self, config_path: str): - self.config_path = config_path - self._config: Dict[str, Any] = {} - - def load(self) -> Dict[str, Any]: - """Load configuration from file.""" - with open(self.config_path) as f: - self._config = json.load(f) - return self._config - - def get(self, key: str, default: Any = None) -> Any: - """Get configuration value.""" - return self._config.get(key, default) - - def set(self, key: str, value: Any) -> None: - """Set configuration value.""" - self._config[key] = value - -class DataProcessor: - """Process and transform data.""" - - def __init__(self, source: str): - self.source = source - self.data: List[Dict] = [] - - def load_data(self) -> List[Dict]: - """Load data from source.""" - # Implementation here - pass - - def transform(self, transformers: List[callable]) -> List[Dict]: - """Apply transformations to data.""" - result = self.data - for transformer in transformers: - result = [transformer(item) for item in result] - return result - - def filter(self, predicate: callable) -> List[Dict]: - """Filter data by predicate.""" - return [item for item in self.data if predicate(item)] - - def aggregate(self, key: str, aggregator: callable) -> Dict: - """Aggregate data by key.""" - groups: Dict[str, List] = {} - for item in self.data: - k = item.get(key) - if k not in groups: - groups[k] = [] - groups[k].append(item) - return {k: aggregator(v) for k, v in groups.items()} - -def validate_input(data: Dict, schema: Dict) -> bool: - """Validate input data against schema.""" - for field, rules in schema.items(): - if rules.get("required") and field not in data: - return False - if field in data: - value = data[field] - if "type" in rules and not isinstance(value, rules["type"]): - return False - return True - -def format_output(data: Any, format_type: str = "json") -> str: - """Format output data.""" - if format_type == "json": - return json.dumps(data, indent=2) - elif format_type == "csv": - # CSV formatting - pass - return str(data) - -async def fetch_remote_data(url: str, timeout: int = DEFAULT_TIMEOUT) -> Dict: - """Fetch data from remote URL.""" - async with aiohttp.ClientSession() as session: - async with session.get(url, timeout=timeout) as response: - return await response.json() - -class CacheManager: - """Manage caching with TTL support.""" - - def __init__(self, default_ttl: int = 300): - self.default_ttl = default_ttl - self._cache: Dict[str, tuple] = {} - - def get(self, key: str) -> Optional[Any]: - """Get cached value if not expired.""" - if key in self._cache: - value, expiry = self._cache[key] - if time.time() < expiry: - return value - del self._cache[key] - return None - - def set(self, key: str, value: Any, ttl: Optional[int] = None) -> None: - """Set cached value with TTL.""" - expiry = time.time() + (ttl or self.default_ttl) - self._cache[key] = (value, expiry) - - def invalidate(self, pattern: str) -> int: - """Invalidate cache entries matching pattern.""" - keys_to_delete = [k for k in self._cache if pattern in k] - for k in keys_to_delete: - del self._cache[k] - return len(keys_to_delete) -''' - - def test_chunk_size_comparison(self, sample_code): - """Compare different chunk sizes for search quality.""" - from codexlens.semantic.chunker import Chunker, ChunkConfig - from codexlens.semantic.embedder import Embedder - from codexlens.semantic.vector_store import _cosine_similarity - from codexlens.parsers.factory import ParserFactory - from codexlens.config import Config - - config = Config() - factory = ParserFactory(config) - parser = factory.get_parser("python") - indexed_file = parser.parse(sample_code, Path("/test.py")) - embedder = Embedder() - - print("\n" + "="*60) - print("CHUNK SIZE OPTIMIZATION TEST") - print("="*60) - - # Test different chunk configurations - configs = [ - ChunkConfig(min_chunk_size=20, max_chunk_size=200, overlap=20), - ChunkConfig(min_chunk_size=50, max_chunk_size=500, overlap=50), - ChunkConfig(min_chunk_size=100, max_chunk_size=1000, overlap=100), - ] - - test_query = "cache management with TTL expiration" - query_embedding = embedder.embed_single(test_query) - - for cfg in configs: - chunker = Chunker(cfg) - chunks = chunker.chunk_file( - sample_code, - indexed_file.symbols, - "/test.py", - "python" - ) - - print(f"\nConfig: min={cfg.min_chunk_size}, max={cfg.max_chunk_size}, overlap={cfg.overlap}") - print(f" Chunks generated: {len(chunks)}") - - if chunks: - # Find best matching chunk - best_score = 0 - best_chunk = None - - for chunk in chunks: - chunk.embedding = embedder.embed_single(chunk.content) - score = _cosine_similarity(query_embedding, chunk.embedding) - if score > best_score: - best_score = score - best_chunk = chunk - - if best_chunk: - print(f" Best match score: {best_score:.4f}") - print(f" Best chunk preview: {best_chunk.content[:100]}...") - - def test_symbol_vs_sliding_window(self, sample_code): - """Compare symbol-based vs sliding window chunking.""" - from codexlens.semantic.chunker import Chunker, ChunkConfig - from codexlens.parsers.factory import ParserFactory - from codexlens.config import Config - - config = Config() - factory = ParserFactory(config) - parser = factory.get_parser("python") - indexed_file = parser.parse(sample_code, Path("/test.py")) - - chunker = Chunker(ChunkConfig(min_chunk_size=20)) - - print("\n" + "="*60) - print("CHUNKING STRATEGY COMPARISON") - print("="*60) - - # Symbol-based chunking - symbol_chunks = chunker.chunk_by_symbol( - sample_code, - indexed_file.symbols, - "/test.py", - "python" - ) - - # Sliding window chunking - window_chunks = chunker.chunk_sliding_window( - sample_code, - "/test.py", - "python" - ) - - print(f"\nSymbol-based chunks: {len(symbol_chunks)}") - for i, chunk in enumerate(symbol_chunks[:5]): - symbol_name = chunk.metadata.get("symbol_name", "unknown") - print(f" {i+1}. {symbol_name}: {len(chunk.content)} chars") - - print(f"\nSliding window chunks: {len(window_chunks)}") - for i, chunk in enumerate(window_chunks[:5]): - lines = f"{chunk.metadata.get('start_line', '?')}-{chunk.metadata.get('end_line', '?')}" - print(f" {i+1}. Lines {lines}: {len(chunk.content)} chars") - - -class TestRealWorldScenarios: - """Test real-world semantic search scenarios.""" - - @pytest.fixture - def embedder(self): - from codexlens.semantic.embedder import Embedder - return Embedder() - - def test_natural_language_queries(self, embedder): - """Test various natural language query patterns.""" - from codexlens.semantic.vector_store import _cosine_similarity - - code_samples = { - "auth": "def authenticate_user(username, password): verify credentials and create session", - "db": "class DatabasePool: manage connection pooling for efficient database access", - "api": "async def handle_http_request(req): process incoming REST API calls", - "ui": "function Button({ onClick }) { return }", - "cache": "class LRUCache: implements least recently used caching strategy with TTL", - } - - # Generate embeddings for code - code_embeddings = {k: embedder.embed_single(v) for k, v in code_samples.items()} - - # Test queries - queries = [ - ("How do I log in a user?", "auth"), - ("Database connection management", "db"), - ("REST endpoint handler", "api"), - ("Button component React", "ui"), - ("Caching with expiration", "cache"), - ] - - print("\n" + "="*60) - print("NATURAL LANGUAGE QUERY TEST") - print("="*60) - - correct = 0 - for query, expected_best in queries: - query_embedding = embedder.embed_single(query) - - scores = {k: _cosine_similarity(query_embedding, v) - for k, v in code_embeddings.items()} - - best_match = max(scores.items(), key=lambda x: x[1]) - is_correct = best_match[0] == expected_best - correct += is_correct - - status = "[OK]" if is_correct else "[FAIL]" - print(f"\n{status} Query: '{query}'") - print(f" Expected: {expected_best}, Got: {best_match[0]} (score: {best_match[1]:.4f})") - - accuracy = correct / len(queries) * 100 - print(f"\n\nAccuracy: {accuracy:.1f}% ({correct}/{len(queries)})") - - -if __name__ == "__main__": - pytest.main([__file__, "-v", "-s"]) diff --git a/codex-lens/tests/test_sqlite_store.py b/codex-lens/tests/test_sqlite_store.py deleted file mode 100644 index 63b7a227..00000000 --- a/codex-lens/tests/test_sqlite_store.py +++ /dev/null @@ -1,444 +0,0 @@ -"""Tests for SQLiteStore connection pool behavior.""" - -from __future__ import annotations - -import logging -import sqlite3 -import threading -import time -from pathlib import Path - -import pytest - -from codexlens.entities import IndexedFile -from codexlens.storage.sqlite_store import SQLiteStore - - -def test_periodic_cleanup(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: - """Periodic timer should proactively clean up stale thread connections.""" - monkeypatch.setattr(SQLiteStore, "CLEANUP_INTERVAL", 0.2) - - store = SQLiteStore(tmp_path / "periodic_cleanup.db") - store.initialize() - - cleanup_called = threading.Event() - original_cleanup = store._cleanup_stale_connections - - def wrapped_cleanup() -> None: - cleanup_called.set() - original_cleanup() - - monkeypatch.setattr(store, "_cleanup_stale_connections", wrapped_cleanup) - - created: list[int] = [] - lock = threading.Lock() - main_tid = threading.get_ident() - - def worker() -> None: - store._get_connection() - with lock: - created.append(threading.get_ident()) - - try: - threads = [threading.Thread(target=worker) for _ in range(10)] - for t in threads: - t.start() - for t in threads: - t.join() - - # Ensure we created thread-local connections without reaching MAX_POOL_SIZE. - assert len(store._pool) >= 2 - assert all(tid in store._pool for tid in created) - - # Wait for periodic cleanup to run and prune dead thread connections. - assert cleanup_called.wait(timeout=3) - deadline = time.time() + 3 - while time.time() < deadline and any(tid in store._pool for tid in created): - time.sleep(0.05) - - assert all(tid not in store._pool for tid in created) - assert set(store._pool.keys()).issubset({main_tid}) - finally: - store.close() - - -def test_cleanup_robustness( - monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture, tmp_path: Path -) -> None: - """Cleanup should handle dead threads, idle timeouts, and invalid connections.""" - monkeypatch.setattr(SQLiteStore, "CLEANUP_INTERVAL", 0) - caplog.set_level(logging.DEBUG, logger="codexlens.storage.sqlite_store") - - store = SQLiteStore(tmp_path / "cleanup_robustness.db") - store.initialize() - - try: - # Invalid connection: active thread but pooled connection is already closed. - conn = store._get_connection() - conn.close() - with store._pool_lock: - store._pool[threading.get_ident()] = (conn, time.time()) - store._cleanup_stale_connections() - - assert "invalid_connection" in caplog.text - assert threading.get_ident() not in store._pool - - # Ensure next access recreates a working connection after cleanup. - fresh_conn = store._get_connection() - assert fresh_conn is not conn - - # Idle timeout cleanup should be logged distinctly. - with store._pool_lock: - store._pool[threading.get_ident()] = (fresh_conn, time.time() - store.IDLE_TIMEOUT - 1) - store._cleanup_stale_connections() - - assert "idle_timeout" in caplog.text - assert threading.get_ident() not in store._pool - - # Dead thread cleanup should be logged distinctly. - created: list[int] = [] - - def worker() -> None: - store._get_connection() - created.append(threading.get_ident()) - - t = threading.Thread(target=worker) - t.start() - t.join() - - dead_tid = created[0] - assert dead_tid in store._pool - with store._pool_lock: - store._cleanup_stale_connections() - - assert "dead_thread" in caplog.text - assert dead_tid not in store._pool - finally: - store.close() - - -def test_add_files_rollback_preserves_original_exception(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: - """add_files should re-raise the transaction error when rollback succeeds.""" - monkeypatch.setattr(SQLiteStore, "CLEANUP_INTERVAL", 0) - store = SQLiteStore(tmp_path / "add_files_ok.db") - store.initialize() - - real_conn = store._get_connection() - - class FailingConnection: - def __init__(self, conn: sqlite3.Connection) -> None: - self._conn = conn - self.rollback_calls = 0 - - def execute(self, sql: str, params: tuple = ()): - if "INSERT INTO files" in sql: - raise sqlite3.OperationalError("boom") - return self._conn.execute(sql, params) - - def executemany(self, sql: str, seq): - return self._conn.executemany(sql, seq) - - def commit(self) -> None: - self._conn.commit() - - def rollback(self) -> None: - self.rollback_calls += 1 - self._conn.rollback() - - wrapped = FailingConnection(real_conn) - monkeypatch.setattr(store, "_get_connection", lambda: wrapped) - - indexed_file = IndexedFile(path=str(tmp_path / "a.py"), language="python", symbols=[]) - - try: - with pytest.raises(sqlite3.OperationalError, match="boom"): - store.add_files([(indexed_file, "# content")]) - assert wrapped.rollback_calls == 1 - finally: - store.close() - - -def test_add_files_rollback_failure_is_chained( - tmp_path: Path, monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture -) -> None: - """Rollback failures should be logged and chained as the cause.""" - monkeypatch.setattr(SQLiteStore, "CLEANUP_INTERVAL", 0) - caplog.set_level(logging.ERROR, logger="codexlens.storage.sqlite_store") - - store = SQLiteStore(tmp_path / "add_files_rollback_fail.db") - store.initialize() - real_conn = store._get_connection() - - class FailingRollbackConnection: - def __init__(self, conn: sqlite3.Connection) -> None: - self._conn = conn - - def execute(self, sql: str, params: tuple = ()): - if "INSERT INTO files" in sql: - raise sqlite3.OperationalError("boom") - return self._conn.execute(sql, params) - - def executemany(self, sql: str, seq): - return self._conn.executemany(sql, seq) - - def commit(self) -> None: - self._conn.commit() - - def rollback(self) -> None: - raise sqlite3.OperationalError("rollback boom") - - monkeypatch.setattr(store, "_get_connection", lambda: FailingRollbackConnection(real_conn)) - indexed_file = IndexedFile(path=str(tmp_path / "b.py"), language="python", symbols=[]) - - try: - with pytest.raises(sqlite3.OperationalError) as exc: - store.add_files([(indexed_file, "# content")]) - - assert exc.value.__cause__ is not None - assert isinstance(exc.value.__cause__, sqlite3.OperationalError) - assert "rollback boom" in str(exc.value.__cause__) - assert "Rollback failed after add_files() error" in caplog.text - assert "boom" in caplog.text - finally: - store.close() - - -class TestMultiVectorChunks: - """Tests for multi-vector chunk storage operations.""" - - def test_add_chunks_basic(self, tmp_path: Path) -> None: - """Basic chunk insertion without embeddings.""" - store = SQLiteStore(tmp_path / "chunks_basic.db") - store.initialize() - - try: - chunks_data = [ - {"content": "def hello(): pass", "metadata": {"type": "function"}}, - {"content": "class World: pass", "metadata": {"type": "class"}}, - ] - - ids = store.add_chunks("test.py", chunks_data) - - assert len(ids) == 2 - assert ids == [1, 2] - assert store.count_chunks() == 2 - finally: - store.close() - - def test_add_chunks_with_binary_embeddings(self, tmp_path: Path) -> None: - """Chunk insertion with binary embeddings for coarse ranking.""" - store = SQLiteStore(tmp_path / "chunks_binary.db") - store.initialize() - - try: - chunks_data = [ - {"content": "content1"}, - {"content": "content2"}, - ] - # 256-bit binary = 32 bytes - binary_embs = [b"\x00" * 32, b"\xff" * 32] - - ids = store.add_chunks( - "test.py", chunks_data, embedding_binary=binary_embs - ) - - assert len(ids) == 2 - - retrieved = store.get_binary_embeddings(ids) - assert len(retrieved) == 2 - assert retrieved[ids[0]] == b"\x00" * 32 - assert retrieved[ids[1]] == b"\xff" * 32 - finally: - store.close() - - def test_add_chunks_with_dense_embeddings(self, tmp_path: Path) -> None: - """Chunk insertion with dense embeddings for fine ranking.""" - store = SQLiteStore(tmp_path / "chunks_dense.db") - store.initialize() - - try: - chunks_data = [{"content": "content1"}, {"content": "content2"}] - # 2048 floats = 8192 bytes - dense_embs = [b"\x00" * 8192, b"\xff" * 8192] - - ids = store.add_chunks( - "test.py", chunks_data, embedding_dense=dense_embs - ) - - assert len(ids) == 2 - - retrieved = store.get_dense_embeddings(ids) - assert len(retrieved) == 2 - assert retrieved[ids[0]] == b"\x00" * 8192 - assert retrieved[ids[1]] == b"\xff" * 8192 - finally: - store.close() - - def test_add_chunks_with_all_embeddings(self, tmp_path: Path) -> None: - """Chunk insertion with all embedding types.""" - store = SQLiteStore(tmp_path / "chunks_all.db") - store.initialize() - - try: - chunks_data = [{"content": "full test"}] - embedding = [[0.1, 0.2, 0.3]] - binary_embs = [b"\xab" * 32] - dense_embs = [b"\xcd" * 8192] - - ids = store.add_chunks( - "test.py", - chunks_data, - embedding=embedding, - embedding_binary=binary_embs, - embedding_dense=dense_embs, - ) - - assert len(ids) == 1 - - binary = store.get_binary_embeddings(ids) - dense = store.get_dense_embeddings(ids) - - assert binary[ids[0]] == b"\xab" * 32 - assert dense[ids[0]] == b"\xcd" * 8192 - finally: - store.close() - - def test_add_chunks_length_mismatch_raises(self, tmp_path: Path) -> None: - """Mismatched embedding length should raise ValueError.""" - store = SQLiteStore(tmp_path / "chunks_mismatch.db") - store.initialize() - - try: - chunks_data = [{"content": "a"}, {"content": "b"}] - - with pytest.raises(ValueError, match="embedding_binary length"): - store.add_chunks( - "test.py", chunks_data, embedding_binary=[b"\x00" * 32] - ) - - with pytest.raises(ValueError, match="embedding_dense length"): - store.add_chunks( - "test.py", chunks_data, embedding_dense=[b"\x00" * 8192] - ) - - with pytest.raises(ValueError, match="embedding length"): - store.add_chunks( - "test.py", chunks_data, embedding=[[0.1]] - ) - finally: - store.close() - - def test_get_chunks_by_ids(self, tmp_path: Path) -> None: - """Retrieve chunk data by IDs.""" - store = SQLiteStore(tmp_path / "chunks_get.db") - store.initialize() - - try: - chunks_data = [ - {"content": "def foo(): pass", "metadata": {"line": 1}}, - {"content": "def bar(): pass", "metadata": {"line": 5}}, - ] - - ids = store.add_chunks("test.py", chunks_data) - retrieved = store.get_chunks_by_ids(ids) - - assert len(retrieved) == 2 - assert retrieved[0]["content"] == "def foo(): pass" - assert retrieved[0]["metadata"]["line"] == 1 - assert retrieved[1]["content"] == "def bar(): pass" - assert retrieved[1]["file_path"] == "test.py" - finally: - store.close() - - def test_delete_chunks_by_file(self, tmp_path: Path) -> None: - """Delete all chunks for a file.""" - store = SQLiteStore(tmp_path / "chunks_delete.db") - store.initialize() - - try: - store.add_chunks("a.py", [{"content": "a1"}, {"content": "a2"}]) - store.add_chunks("b.py", [{"content": "b1"}]) - - assert store.count_chunks() == 3 - - deleted = store.delete_chunks_by_file("a.py") - assert deleted == 2 - assert store.count_chunks() == 1 - - deleted = store.delete_chunks_by_file("nonexistent.py") - assert deleted == 0 - finally: - store.close() - - def test_get_embeddings_empty_list(self, tmp_path: Path) -> None: - """Empty chunk ID list returns empty dict.""" - store = SQLiteStore(tmp_path / "chunks_empty.db") - store.initialize() - - try: - assert store.get_binary_embeddings([]) == {} - assert store.get_dense_embeddings([]) == {} - assert store.get_chunks_by_ids([]) == [] - finally: - store.close() - - def test_add_chunks_empty_list(self, tmp_path: Path) -> None: - """Empty chunks list returns empty IDs.""" - store = SQLiteStore(tmp_path / "chunks_empty_add.db") - store.initialize() - - try: - ids = store.add_chunks("test.py", []) - assert ids == [] - assert store.count_chunks() == 0 - finally: - store.close() - - def test_chunks_table_migration(self, tmp_path: Path) -> None: - """Existing chunks table gets new columns via migration.""" - db_path = tmp_path / "chunks_migration.db" - - # Create old schema without multi-vector columns - conn = sqlite3.connect(db_path) - conn.execute( - """ - CREATE TABLE chunks ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - file_path TEXT NOT NULL, - content TEXT NOT NULL, - embedding BLOB, - metadata TEXT, - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP - ) - """ - ) - conn.execute("CREATE INDEX idx_chunks_file_path ON chunks(file_path)") - conn.execute( - "INSERT INTO chunks (file_path, content) VALUES ('old.py', 'old content')" - ) - conn.commit() - conn.close() - - # Open with SQLiteStore - should migrate - store = SQLiteStore(db_path) - store.initialize() - - try: - # Verify new columns exist by using them - ids = store.add_chunks( - "new.py", - [{"content": "new content"}], - embedding_binary=[b"\x00" * 32], - embedding_dense=[b"\x00" * 8192], - ) - - assert len(ids) == 1 - - # Old data should still be accessible - assert store.count_chunks() == 2 - - # New embeddings should work - binary = store.get_binary_embeddings(ids) - assert binary[ids[0]] == b"\x00" * 32 - finally: - store.close() diff --git a/codex-lens/tests/test_stage1_binary_search_uses_chunk_lines.py b/codex-lens/tests/test_stage1_binary_search_uses_chunk_lines.py deleted file mode 100644 index ed566c9b..00000000 --- a/codex-lens/tests/test_stage1_binary_search_uses_chunk_lines.py +++ /dev/null @@ -1,131 +0,0 @@ -from __future__ import annotations - -from pathlib import Path -from unittest.mock import MagicMock, patch - -from codexlens.config import VECTORS_META_DB_NAME, Config -from codexlens.search.chain_search import ChainSearchEngine, SearchStats -from codexlens.storage.path_mapper import PathMapper -from codexlens.storage.registry import RegistryStore - - -def test_stage1_binary_search_prefers_chunk_start_line(tmp_path: Path) -> None: - registry = RegistryStore(db_path=tmp_path / "registry.db") - registry.initialize() - mapper = PathMapper(index_root=tmp_path / "indexes") - engine = ChainSearchEngine(registry, mapper, config=Config(data_dir=tmp_path / "data")) - - try: - index_root = tmp_path / "fake_index_root" - index_root.mkdir(parents=True, exist_ok=True) - index_db = index_root / "_index.db" - index_db.write_text("", encoding="utf-8") - (index_root / VECTORS_META_DB_NAME).write_text("", encoding="utf-8") - - class _DummyBinarySearcher: - def search(self, query_dense, top_k: int): - _ = query_dense - _ = top_k - return [(123, 10)] - - class _DummyEmbedder: - def embed_to_numpy(self, texts): - _ = texts - return [[0.0]] - - dummy_meta_store = MagicMock() - dummy_meta_store.get_chunks_by_ids.return_value = [ - { - "chunk_id": 123, - "file_path": str(tmp_path / "a.py"), - "content": "def a():\n return 1\n", - "start_line": 12, - "end_line": 14, - "metadata": {}, - "category": "code", - } - ] - - with patch.object(engine, "_get_centralized_binary_searcher", return_value=_DummyBinarySearcher()): - with patch("codexlens.search.chain_search.VectorMetadataStore", return_value=dummy_meta_store): - with patch("codexlens.semantic.embedder.Embedder", return_value=_DummyEmbedder()): - coarse_results, returned_root = engine._stage1_binary_search( - "a", - [index_db], - coarse_k=1, - stats=SearchStats(), - ) - - assert returned_root == index_root - assert len(coarse_results) == 1 - assert coarse_results[0].start_line == 12 - assert coarse_results[0].end_line == 14 - finally: - engine.close() - - -def test_stage1_binary_search_dense_fallback(tmp_path: Path) -> None: - registry = RegistryStore(db_path=tmp_path / "registry.db") - registry.initialize() - mapper = PathMapper(index_root=tmp_path / "indexes") - engine = ChainSearchEngine(registry, mapper, config=Config(data_dir=tmp_path / "data")) - - try: - index_root = tmp_path / "fake_index_root" - index_root.mkdir(parents=True, exist_ok=True) - index_db = index_root / "_index.db" - index_db.write_text("", encoding="utf-8") - (index_root / VECTORS_META_DB_NAME).write_text("", encoding="utf-8") - - class _DummyEmbedder: - def embed_to_numpy(self, texts): - _ = texts - # Only dim matters for ANNIndex initialization - return [[0.0, 1.0, 2.0]] - - class _DummyANNIndex: - def __init__(self, *args, **kwargs) -> None: - pass - - def load(self) -> bool: - return True - - def count(self) -> int: - return 1 - - def search(self, query_vec, top_k: int = 10): - _ = query_vec - _ = top_k - return [123], [0.2] - - dummy_meta_store = MagicMock() - dummy_meta_store.get_chunks_by_ids.return_value = [ - { - "chunk_id": 123, - "file_path": str(tmp_path / "b.py"), - "content": "def b():\n return 2\n", - "start_line": 20, - "end_line": 22, - "metadata": {}, - "category": "code", - } - ] - - with patch.object(engine, "_get_centralized_binary_searcher", return_value=None): - with patch("codexlens.search.chain_search.VectorMetadataStore", return_value=dummy_meta_store): - with patch("codexlens.semantic.embedder.Embedder", return_value=_DummyEmbedder()): - with patch("codexlens.semantic.ann_index.ANNIndex", _DummyANNIndex): - coarse_results, returned_root = engine._stage1_binary_search( - "b", - [index_db], - coarse_k=1, - stats=SearchStats(), - ) - - assert returned_root == index_root - assert len(coarse_results) == 1 - assert coarse_results[0].start_line == 20 - assert coarse_results[0].end_line == 22 - assert coarse_results[0].score == 0.8 - finally: - engine.close() diff --git a/codex-lens/tests/test_staged_cascade.py b/codex-lens/tests/test_staged_cascade.py deleted file mode 100644 index 2a5f44b4..00000000 --- a/codex-lens/tests/test_staged_cascade.py +++ /dev/null @@ -1,812 +0,0 @@ -"""Integration tests for staged cascade search pipeline. - -Tests the 4-stage pipeline: -1. Stage 1: Binary coarse search -2. Stage 2: LSP graph expansion -3. Stage 3: Clustering and representative selection -4. Stage 4: Optional cross-encoder reranking -""" - -from __future__ import annotations - -import json -import tempfile -from pathlib import Path -from typing import List -from unittest.mock import MagicMock, Mock, patch - -import pytest - -from codexlens.config import Config -from codexlens.entities import SearchResult -from codexlens.search.chain_search import ChainSearchEngine, SearchOptions -from codexlens.storage.path_mapper import PathMapper -from codexlens.storage.registry import RegistryStore - - -# ============================================================================= -# Test Fixtures -# ============================================================================= - - -@pytest.fixture -def temp_paths(): - """Create temporary directory structure.""" - tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True) - root = Path(tmpdir.name) - yield root - try: - tmpdir.cleanup() - except (PermissionError, OSError): - pass - - -@pytest.fixture -def mock_registry(temp_paths: Path): - """Create mock registry store.""" - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - return registry - - -@pytest.fixture -def mock_mapper(temp_paths: Path): - """Create path mapper.""" - return PathMapper(index_root=temp_paths / "indexes") - - -@pytest.fixture -def mock_config(): - """Create mock config with staged cascade settings.""" - config = MagicMock(spec=Config) - config.cascade_coarse_k = 100 - config.cascade_fine_k = 10 - config.enable_staged_rerank = False - config.staged_clustering_strategy = "auto" - config.staged_clustering_min_size = 3 - config.graph_expansion_depth = 2 - return config - - -@pytest.fixture -def sample_binary_results() -> List[SearchResult]: - """Create sample binary search results for testing.""" - return [ - SearchResult( - path="a.py", - score=0.95, - excerpt="def authenticate_user(username, password):", - symbol_name="authenticate_user", - symbol_kind="function", - start_line=10, - end_line=15, - ), - SearchResult( - path="b.py", - score=0.85, - excerpt="class AuthManager:", - symbol_name="AuthManager", - symbol_kind="class", - start_line=5, - end_line=20, - ), - SearchResult( - path="c.py", - score=0.75, - excerpt="def check_credentials(user, pwd):", - symbol_name="check_credentials", - symbol_kind="function", - start_line=30, - end_line=35, - ), - ] - - -@pytest.fixture -def sample_expanded_results() -> List[SearchResult]: - """Create sample expanded results (after LSP expansion).""" - return [ - SearchResult( - path="a.py", - score=0.95, - excerpt="def authenticate_user(username, password):", - symbol_name="authenticate_user", - symbol_kind="function", - ), - SearchResult( - path="a.py", - score=0.90, - excerpt="def verify_password(pwd):", - symbol_name="verify_password", - symbol_kind="function", - ), - SearchResult( - path="b.py", - score=0.85, - excerpt="class AuthManager:", - symbol_name="AuthManager", - symbol_kind="class", - ), - SearchResult( - path="b.py", - score=0.80, - excerpt="def login(self, user):", - symbol_name="login", - symbol_kind="function", - ), - SearchResult( - path="c.py", - score=0.75, - excerpt="def check_credentials(user, pwd):", - symbol_name="check_credentials", - symbol_kind="function", - ), - SearchResult( - path="d.py", - score=0.70, - excerpt="class UserModel:", - symbol_name="UserModel", - symbol_kind="class", - ), - ] - - -# ============================================================================= -# Test Stage Methods -# ============================================================================= - - -class TestStage1BinarySearch: - """Tests for Stage 1: Binary coarse search.""" - - def test_stage1_returns_results_with_index_root( - self, mock_registry, mock_mapper, mock_config - ): - """Test _stage1_binary_search returns results and index_root.""" - from codexlens.search.chain_search import SearchStats - - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - # Mock the binary embedding backend (import is inside the method) - with patch("codexlens.indexing.embedding.BinaryEmbeddingBackend"): - with patch.object(engine, "_get_or_create_binary_index") as mock_binary_idx: - mock_index = MagicMock() - mock_index.count.return_value = 10 - mock_index.search.return_value = ([1, 2, 3], [10, 20, 30]) - mock_binary_idx.return_value = mock_index - - index_paths = [Path("/fake/index1/_index.db")] - stats = SearchStats() - - results, index_root = engine._stage1_binary_search( - "query", index_paths, coarse_k=10, stats=stats - ) - - assert isinstance(results, list) - assert isinstance(index_root, (Path, type(None))) - - def test_stage1_handles_empty_index_paths( - self, mock_registry, mock_mapper, mock_config - ): - """Test _stage1_binary_search handles empty index paths.""" - from codexlens.search.chain_search import SearchStats - - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - index_paths = [] - stats = SearchStats() - - results, index_root = engine._stage1_binary_search( - "query", index_paths, coarse_k=10, stats=stats - ) - - assert results == [] - assert index_root is None - - def test_stage1_aggregates_results_from_multiple_indexes( - self, mock_registry, mock_mapper, mock_config - ): - """Test _stage1_binary_search aggregates results from multiple indexes.""" - from codexlens.search.chain_search import SearchStats - - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - with patch("codexlens.indexing.embedding.BinaryEmbeddingBackend"): - with patch.object(engine, "_get_or_create_binary_index") as mock_binary_idx: - mock_index = MagicMock() - mock_index.count.return_value = 10 - # Return different results for different calls - mock_index.search.side_effect = [ - ([1, 2], [10, 20]), - ([3, 4], [15, 25]), - ] - mock_binary_idx.return_value = mock_index - - index_paths = [ - Path("/fake/index1/_index.db"), - Path("/fake/index2/_index.db"), - ] - stats = SearchStats() - - results, _ = engine._stage1_binary_search( - "query", index_paths, coarse_k=10, stats=stats - ) - - # Should aggregate candidates from both indexes - assert isinstance(results, list) - - -class TestStage2LSPExpand: - """Tests for Stage 2: LSP graph expansion.""" - - def test_stage2_returns_expanded_results( - self, mock_registry, mock_mapper, mock_config, sample_binary_results - ): - """Test _stage2_lsp_expand returns expanded results.""" - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - # Import is inside the method, so we need to patch it there - with patch("codexlens.search.graph_expander.GraphExpander") as mock_expander_cls: - mock_expander = MagicMock() - mock_expander.expand.return_value = [ - SearchResult(path="related.py", score=0.7, excerpt="related") - ] - mock_expander_cls.return_value = mock_expander - - expanded = engine._stage2_lsp_expand( - sample_binary_results, index_root=Path("/fake/index") - ) - - assert isinstance(expanded, list) - # Should include original results - assert len(expanded) >= len(sample_binary_results) - - def test_stage2_handles_no_index_root( - self, mock_registry, mock_mapper, mock_config, sample_binary_results - ): - """Test _stage2_lsp_expand handles missing index_root.""" - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - expanded = engine._stage2_lsp_expand(sample_binary_results, index_root=None) - - # Should return original results unchanged - assert expanded == sample_binary_results - - def test_stage2_handles_empty_results( - self, mock_registry, mock_mapper, mock_config - ): - """Test _stage2_lsp_expand handles empty input.""" - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - expanded = engine._stage2_lsp_expand([], index_root=Path("/fake")) - - assert expanded == [] - - def test_stage2_deduplicates_results( - self, mock_registry, mock_mapper, mock_config, sample_binary_results - ): - """Test _stage2_lsp_expand deduplicates by (path, symbol_name, start_line).""" - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - # Mock expander to return duplicate of first result - with patch("codexlens.search.graph_expander.GraphExpander") as mock_expander_cls: - mock_expander = MagicMock() - duplicate = SearchResult( - path=sample_binary_results[0].path, - score=0.5, - excerpt="duplicate", - symbol_name=sample_binary_results[0].symbol_name, - start_line=sample_binary_results[0].start_line, - ) - mock_expander.expand.return_value = [duplicate] - mock_expander_cls.return_value = mock_expander - - expanded = engine._stage2_lsp_expand( - sample_binary_results, index_root=Path("/fake") - ) - - # Should not include duplicate - assert len(expanded) == len(sample_binary_results) - - -class TestStage3ClusterPrune: - """Tests for Stage 3: Clustering and representative selection.""" - - def test_stage3_returns_representatives( - self, mock_registry, mock_mapper, mock_config, sample_expanded_results - ): - """Test _stage3_cluster_prune returns representative results.""" - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - with patch.object(engine, "_get_embeddings_for_clustering") as mock_embed: - import numpy as np - - # Mock embeddings - mock_embed.return_value = np.random.rand( - len(sample_expanded_results), 128 - ).astype(np.float32) - - clustered = engine._stage3_cluster_prune( - sample_expanded_results, target_count=3 - ) - - assert isinstance(clustered, list) - assert len(clustered) <= len(sample_expanded_results) - assert all(isinstance(r, SearchResult) for r in clustered) - - def test_stage3_handles_few_results( - self, mock_registry, mock_mapper, mock_config - ): - """Test _stage3_cluster_prune skips clustering for few results.""" - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - few_results = [ - SearchResult(path="a.py", score=0.9, excerpt="a"), - SearchResult(path="b.py", score=0.8, excerpt="b"), - ] - - clustered = engine._stage3_cluster_prune(few_results, target_count=5) - - # Should return all results unchanged - assert clustered == few_results - - def test_stage3_handles_no_embeddings( - self, mock_registry, mock_mapper, mock_config, sample_expanded_results - ): - """Test _stage3_cluster_prune falls back to score-based selection without embeddings.""" - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - with patch.object(engine, "_get_embeddings_for_clustering") as mock_embed: - mock_embed.return_value = None - - clustered = engine._stage3_cluster_prune( - sample_expanded_results, target_count=3 - ) - - # Should return top-scored results - assert len(clustered) <= 3 - # Should be sorted by score descending - scores = [r.score for r in clustered] - assert scores == sorted(scores, reverse=True) - - def test_stage3_uses_config_clustering_strategy( - self, mock_registry, mock_mapper, sample_expanded_results - ): - """Test _stage3_cluster_prune uses config clustering strategy.""" - config = MagicMock(spec=Config) - config.staged_clustering_strategy = "auto" - config.staged_clustering_min_size = 2 - - engine = ChainSearchEngine(mock_registry, PathMapper(), config=config) - - with patch.object(engine, "_get_embeddings_for_clustering") as mock_embed: - import numpy as np - - mock_embed.return_value = np.random.rand( - len(sample_expanded_results), 128 - ).astype(np.float32) - - clustered = engine._stage3_cluster_prune( - sample_expanded_results, target_count=3 - ) - - # Should use clustering (auto will pick best available) - # Result should be a list of SearchResult objects - assert isinstance(clustered, list) - assert all(isinstance(r, SearchResult) for r in clustered) - - -class TestStage4OptionalRerank: - """Tests for Stage 4: Optional cross-encoder reranking.""" - - def test_stage4_reranks_with_reranker( - self, mock_registry, mock_mapper, temp_paths - ): - """Test _stage4_optional_rerank overfetches before final trim.""" - config = Config(data_dir=temp_paths / "data") - config.reranker_top_k = 4 - config.reranking_top_k = 4 - engine = ChainSearchEngine(mock_registry, mock_mapper, config=config) - - results = [ - SearchResult(path="a.py", score=0.9, excerpt="a"), - SearchResult(path="b.py", score=0.8, excerpt="b"), - SearchResult(path="c.py", score=0.7, excerpt="c"), - SearchResult(path="d.py", score=0.6, excerpt="d"), - SearchResult(path="e.py", score=0.5, excerpt="e"), - ] - - # Mock the _cross_encoder_rerank method that _stage4 calls - with patch.object(engine, "_cross_encoder_rerank") as mock_rerank: - mock_rerank.return_value = [ - SearchResult(path="c.py", score=0.95, excerpt="c"), - SearchResult(path="a.py", score=0.85, excerpt="a"), - SearchResult(path="d.py", score=0.83, excerpt="d"), - SearchResult(path="e.py", score=0.81, excerpt="e"), - ] - - reranked = engine._stage4_optional_rerank("query", results, k=2) - - mock_rerank.assert_called_once_with("query", results, 4) - assert len(reranked) == 4 - # First result should be reranked winner - assert reranked[0].path == "c.py" - - def test_stage4_handles_empty_results( - self, mock_registry, mock_mapper, mock_config - ): - """Test _stage4_optional_rerank handles empty input.""" - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - reranked = engine._stage4_optional_rerank("query", [], k=2) - - # Should return empty list - assert reranked == [] - - -# ============================================================================= -# Integration Tests -# ============================================================================= - - -class TestStagedCascadeIntegration: - """Integration tests for staged_cascade_search() end-to-end.""" - - def test_staged_cascade_returns_chain_result( - self, mock_registry, mock_mapper, mock_config, temp_paths - ): - """Test staged_cascade_search returns ChainSearchResult.""" - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - # Mock all stages - with patch.object(engine, "_find_start_index") as mock_find: - mock_find.return_value = temp_paths / "index" / "_index.db" - - with patch.object(engine, "_collect_index_paths") as mock_collect: - mock_collect.return_value = [temp_paths / "index" / "_index.db"] - - with patch.object(engine, "_stage1_binary_search") as mock_stage1: - mock_stage1.return_value = ( - [SearchResult(path="a.py", score=0.9, excerpt="a")], - temp_paths / "index", - ) - - with patch.object(engine, "_stage2_lsp_expand") as mock_stage2: - mock_stage2.return_value = [ - SearchResult(path="a.py", score=0.9, excerpt="a") - ] - - with patch.object(engine, "_stage3_cluster_prune") as mock_stage3: - mock_stage3.return_value = [ - SearchResult(path="a.py", score=0.9, excerpt="a") - ] - - result = engine.staged_cascade_search( - "query", temp_paths / "src", k=10, coarse_k=100 - ) - - from codexlens.search.chain_search import ChainSearchResult - - assert isinstance(result, ChainSearchResult) - assert result.query == "query" - assert len(result.results) <= 10 - - def test_staged_cascade_includes_stage_stats( - self, mock_registry, mock_mapper, mock_config, temp_paths - ): - """Test staged_cascade_search includes per-stage timing stats.""" - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - with patch.object(engine, "_find_start_index") as mock_find: - mock_find.return_value = temp_paths / "index" / "_index.db" - - with patch.object(engine, "_collect_index_paths") as mock_collect: - mock_collect.return_value = [temp_paths / "index" / "_index.db"] - - with patch.object(engine, "_stage1_binary_search") as mock_stage1: - mock_stage1.return_value = ( - [SearchResult(path="a.py", score=0.9, excerpt="a")], - temp_paths / "index", - ) - - with patch.object(engine, "_stage2_lsp_expand") as mock_stage2: - mock_stage2.return_value = [ - SearchResult(path="a.py", score=0.9, excerpt="a") - ] - - with patch.object(engine, "_stage3_cluster_prune") as mock_stage3: - mock_stage3.return_value = [ - SearchResult(path="a.py", score=0.9, excerpt="a") - ] - - result = engine.staged_cascade_search( - "query", temp_paths / "src" - ) - - # Check for stage stats in errors field - stage_stats = None - for err in result.stats.errors: - if err.startswith("STAGE_STATS:"): - stage_stats = json.loads(err.replace("STAGE_STATS:", "")) - break - - assert stage_stats is not None - assert "stage_times" in stage_stats - assert "stage_counts" in stage_stats - assert "stage1_binary_ms" in stage_stats["stage_times"] - assert "stage1_candidates" in stage_stats["stage_counts"] - - def test_staged_cascade_with_rerank_enabled( - self, mock_registry, mock_mapper, temp_paths - ): - """Test staged_cascade_search with reranking enabled.""" - config = MagicMock(spec=Config) - config.cascade_coarse_k = 100 - config.cascade_fine_k = 10 - config.enable_staged_rerank = True - config.staged_clustering_strategy = "auto" - config.graph_expansion_depth = 2 - - engine = ChainSearchEngine(mock_registry, mock_mapper, config=config) - - with patch.object(engine, "_find_start_index") as mock_find: - mock_find.return_value = temp_paths / "index" / "_index.db" - - with patch.object(engine, "_collect_index_paths") as mock_collect: - mock_collect.return_value = [temp_paths / "index" / "_index.db"] - - with patch.object(engine, "_stage1_binary_search") as mock_stage1: - mock_stage1.return_value = ( - [SearchResult(path="a.py", score=0.9, excerpt="a")], - temp_paths / "index", - ) - - with patch.object(engine, "_stage2_lsp_expand") as mock_stage2: - mock_stage2.return_value = [ - SearchResult(path="a.py", score=0.9, excerpt="a") - ] - - with patch.object(engine, "_stage3_cluster_prune") as mock_stage3: - mock_stage3.return_value = [ - SearchResult(path="a.py", score=0.9, excerpt="a") - ] - - with patch.object(engine, "_stage4_optional_rerank") as mock_stage4: - mock_stage4.return_value = [ - SearchResult(path="a.py", score=0.95, excerpt="a") - ] - - result = engine.staged_cascade_search( - "query", temp_paths / "src" - ) - - # Verify stage 4 was called - mock_stage4.assert_called_once() - - def test_staged_cascade_fallback_to_search( - self, mock_registry, mock_mapper, mock_config, temp_paths - ): - """Test staged_cascade_search falls back to standard search when numpy unavailable.""" - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - with patch("codexlens.search.chain_search.NUMPY_AVAILABLE", False): - with patch.object(engine, "search") as mock_search: - mock_search.return_value = MagicMock() - - engine.staged_cascade_search("query", temp_paths / "src") - - # Should fall back to standard search - mock_search.assert_called_once() - - def test_staged_cascade_deduplicates_final_results( - self, mock_registry, mock_mapper, mock_config, temp_paths - ): - """Test staged_cascade_search deduplicates results by path.""" - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - with patch.object(engine, "_find_start_index") as mock_find: - mock_find.return_value = temp_paths / "index" / "_index.db" - - with patch.object(engine, "_collect_index_paths") as mock_collect: - mock_collect.return_value = [temp_paths / "index" / "_index.db"] - - with patch.object(engine, "_stage1_binary_search") as mock_stage1: - mock_stage1.return_value = ( - [SearchResult(path="a.py", score=0.9, excerpt="a")], - temp_paths / "index", - ) - - with patch.object(engine, "_stage2_lsp_expand") as mock_stage2: - mock_stage2.return_value = [ - SearchResult(path="a.py", score=0.9, excerpt="a") - ] - - with patch.object(engine, "_stage3_cluster_prune") as mock_stage3: - # Return duplicates with different scores - mock_stage3.return_value = [ - SearchResult(path="a.py", score=0.9, excerpt="a"), - SearchResult(path="a.py", score=0.8, excerpt="a duplicate"), - SearchResult(path="b.py", score=0.7, excerpt="b"), - ] - - result = engine.staged_cascade_search( - "query", temp_paths / "src", k=10 - ) - - # Should deduplicate a.py (keep higher score) - paths = [r.path for r in result.results] - assert len(paths) == len(set(paths)) - # a.py should have score 0.9 - a_result = next(r for r in result.results if r.path == "a.py") - assert a_result.score == 0.9 - - def test_staged_cascade_expands_stage3_target_for_rerank_budget( - self, mock_registry, mock_mapper, temp_paths - ): - """Test staged cascade preserves enough Stage 3 reps for rerank budget.""" - config = Config(data_dir=temp_paths / "data") - config.enable_staged_rerank = True - config.reranker_top_k = 6 - config.reranking_top_k = 6 - - engine = ChainSearchEngine(mock_registry, mock_mapper, config=config) - expanded_results = [ - SearchResult(path=f"src/file-{index}.ts", score=1.0 - (index * 0.01), excerpt="x") - for index in range(8) - ] - - with patch.object(engine, "_find_start_index") as mock_find: - mock_find.return_value = temp_paths / "index" / "_index.db" - - with patch.object(engine, "_collect_index_paths") as mock_collect: - mock_collect.return_value = [temp_paths / "index" / "_index.db"] - - with patch.object(engine, "_stage1_binary_search") as mock_stage1: - mock_stage1.return_value = ( - [SearchResult(path="seed.ts", score=0.9, excerpt="seed")], - temp_paths / "index", - ) - - with patch.object(engine, "_stage2_lsp_expand") as mock_stage2: - mock_stage2.return_value = expanded_results - - with patch.object(engine, "_stage3_cluster_prune") as mock_stage3: - mock_stage3.return_value = expanded_results[:6] - - with patch.object(engine, "_stage4_optional_rerank") as mock_stage4: - mock_stage4.return_value = expanded_results[:2] - - engine.staged_cascade_search( - "query", - temp_paths / "src", - k=2, - coarse_k=20, - ) - - mock_stage3.assert_called_once_with( - expanded_results, - 6, - query="query", - ) - - def test_staged_cascade_overfetches_rerank_before_final_trim( - self, mock_registry, mock_mapper, temp_paths - ): - """Test staged rerank keeps enough candidates for path penalties to work.""" - config = Config(data_dir=temp_paths / "data") - config.enable_staged_rerank = True - config.reranker_top_k = 4 - config.reranking_top_k = 4 - config.test_file_penalty = 0.15 - config.generated_file_penalty = 0.35 - - engine = ChainSearchEngine(mock_registry, mock_mapper, config=config) - - src_primary = str(temp_paths / "src" / "tools" / "smart-search.ts") - src_secondary = str(temp_paths / "src" / "tools" / "codex-lens.ts") - test_primary = str(temp_paths / "tests" / "integration" / "cli-routes.test.ts") - test_secondary = str( - temp_paths / "frontend" / "tests" / "e2e" / "prompt-memory.spec.ts" - ) - query = "parse CodexLens JSON output strip ANSI smart_search" - clustered_results = [ - SearchResult(path=test_primary, score=0.98, excerpt="test"), - SearchResult(path=test_secondary, score=0.97, excerpt="test"), - SearchResult(path=src_primary, score=0.96, excerpt="source"), - SearchResult(path=src_secondary, score=0.95, excerpt="source"), - ] - - with patch.object(engine, "_find_start_index") as mock_find: - mock_find.return_value = temp_paths / "index" / "_index.db" - - with patch.object(engine, "_collect_index_paths") as mock_collect: - mock_collect.return_value = [temp_paths / "index" / "_index.db"] - - with patch.object(engine, "_stage1_binary_search") as mock_stage1: - mock_stage1.return_value = ( - [SearchResult(path=src_primary, score=0.9, excerpt="seed")], - temp_paths / "index", - ) - - with patch.object(engine, "_stage2_lsp_expand") as mock_stage2: - mock_stage2.return_value = clustered_results - - with patch.object(engine, "_stage3_cluster_prune") as mock_stage3: - mock_stage3.return_value = clustered_results - - with patch.object(engine, "_cross_encoder_rerank") as mock_rerank: - mock_rerank.return_value = clustered_results - - result = engine.staged_cascade_search( - query, - temp_paths / "src", - k=2, - coarse_k=20, - ) - - mock_rerank.assert_called_once_with(query, clustered_results, 4) - assert [item.path for item in result.results] == [src_primary, src_secondary] - - -# ============================================================================= -# Graceful Degradation Tests -# ============================================================================= - - -class TestStagedCascadeGracefulDegradation: - """Tests for graceful degradation when dependencies unavailable.""" - - def test_falls_back_when_clustering_unavailable( - self, mock_registry, mock_mapper, mock_config, sample_expanded_results - ): - """Test clustering stage falls back gracefully when clustering unavailable.""" - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - with patch.object(engine, "_get_embeddings_for_clustering") as mock_embed: - mock_embed.return_value = None - - clustered = engine._stage3_cluster_prune( - sample_expanded_results, target_count=3 - ) - - # Should fall back to score-based selection - assert len(clustered) <= 3 - - def test_falls_back_when_graph_expander_unavailable( - self, mock_registry, mock_mapper, mock_config, sample_binary_results - ): - """Test LSP expansion falls back when GraphExpander unavailable.""" - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - # Patch the import inside the method - with patch("codexlens.search.graph_expander.GraphExpander", side_effect=ImportError): - expanded = engine._stage2_lsp_expand( - sample_binary_results, index_root=Path("/fake") - ) - - # Should return original results - assert expanded == sample_binary_results - - def test_handles_stage_failures_gracefully( - self, mock_registry, mock_mapper, mock_config, temp_paths - ): - """Test staged pipeline handles stage failures gracefully.""" - engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config) - - with patch.object(engine, "_find_start_index") as mock_find: - mock_find.return_value = temp_paths / "index" / "_index.db" - - with patch.object(engine, "_collect_index_paths") as mock_collect: - mock_collect.return_value = [temp_paths / "index" / "_index.db"] - - with patch.object(engine, "_stage1_binary_search") as mock_stage1: - # Stage 1 returns no results - mock_stage1.return_value = ([], None) - - with patch.object(engine, "search") as mock_search: - mock_search.return_value = MagicMock() - - engine.staged_cascade_search("query", temp_paths / "src") - - # Should fall back to standard search when stage 1 fails - mock_search.assert_called_once() diff --git a/codex-lens/tests/test_staged_cascade_lsp_depth.py b/codex-lens/tests/test_staged_cascade_lsp_depth.py deleted file mode 100644 index b7437ec8..00000000 --- a/codex-lens/tests/test_staged_cascade_lsp_depth.py +++ /dev/null @@ -1,168 +0,0 @@ -"""Regression tests for staged cascade Stage 2 expansion depth. - -Staged cascade is documented as: - coarse (binary) → LSP/graph expansion → clustering → optional rerank - -This test ensures Stage 2 respects Config.staged_lsp_depth (not unrelated -graph_expansion_depth settings). -""" - -from __future__ import annotations - -import tempfile -from pathlib import Path -from unittest.mock import patch - -import pytest - -from codexlens.config import Config -from codexlens.entities import CodeRelationship, RelationshipType, SearchResult, Symbol -from codexlens.search.chain_search import ChainSearchEngine -from codexlens.storage.dir_index import DirIndexStore -from codexlens.storage.index_tree import _compute_graph_neighbors -from codexlens.storage.path_mapper import PathMapper -from codexlens.storage.registry import RegistryStore - - -@pytest.fixture() -def temp_paths() -> Path: - tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True) - root = Path(tmpdir.name) - yield root - try: - tmpdir.cleanup() - except (PermissionError, OSError): - pass - - -def _create_index_with_neighbors(root: Path) -> tuple[PathMapper, Path, Path, str]: - project_root = root / "project" - project_root.mkdir(parents=True, exist_ok=True) - - index_root = root / "indexes" - mapper = PathMapper(index_root=index_root) - index_db_path = mapper.source_to_index_db(project_root) - index_db_path.parent.mkdir(parents=True, exist_ok=True) - - # Use 3 files so staged_cascade_search's final "deduplicate by path" step - # doesn't collapse all expanded symbols into a single file result. - content_a = "\n".join(["def a():", " b()", ""]) - content_b = "\n".join(["def b():", " c()", ""]) - content_c = "\n".join(["def c():", " return 1", ""]) - - file_a = project_root / "a.py" - file_b = project_root / "b.py" - file_c = project_root / "c.py" - file_a.write_text(content_a, encoding="utf-8") - file_b.write_text(content_b, encoding="utf-8") - file_c.write_text(content_c, encoding="utf-8") - - symbols_a = [Symbol(name="a", kind="function", range=(1, 2), file=str(file_a))] - symbols_b = [Symbol(name="b", kind="function", range=(1, 2), file=str(file_b))] - symbols_c = [Symbol(name="c", kind="function", range=(1, 2), file=str(file_c))] - - relationships_a = [ - CodeRelationship( - source_symbol="a", - target_symbol="b", - relationship_type=RelationshipType.CALL, - source_file=str(file_a), - target_file=str(file_b), - source_line=2, - ) - ] - relationships_b = [ - CodeRelationship( - source_symbol="b", - target_symbol="c", - relationship_type=RelationshipType.CALL, - source_file=str(file_b), - target_file=str(file_c), - source_line=2, - ) - ] - - config = Config(data_dir=root / "data") - store = DirIndexStore(index_db_path, config=config) - store.initialize() - store.add_file( - name=file_a.name, - full_path=file_a, - content=content_a, - language="python", - symbols=symbols_a, - relationships=relationships_a, - ) - store.add_file( - name=file_b.name, - full_path=file_b, - content=content_b, - language="python", - symbols=symbols_b, - relationships=relationships_b, - ) - store.add_file( - name=file_c.name, - full_path=file_c, - content=content_c, - language="python", - symbols=symbols_c, - relationships=[], - ) - _compute_graph_neighbors(store) - store.close() - - return mapper, project_root, file_a, content_a - - -def test_staged_cascade_stage2_uses_staged_lsp_depth(temp_paths: Path) -> None: - mapper, project_root, file_path, content = _create_index_with_neighbors(temp_paths) - index_db_path = mapper.source_to_index_db(project_root) - - registry = RegistryStore(db_path=temp_paths / "registry.db") - registry.initialize() - - # Intentionally conflicting depths: staged_lsp_depth should win for staged cascade. - config = Config( - data_dir=temp_paths / "data", - staged_lsp_depth=1, - graph_expansion_depth=2, - enable_staged_rerank=False, - staged_clustering_strategy="noop", - ) - - engine = ChainSearchEngine(registry, mapper, config=config) - try: - base = SearchResult( - path=str(file_path.resolve()), - score=1.0, - excerpt="", - content=content, - start_line=1, - end_line=2, - symbol_name="a", - symbol_kind="function", - ) - - with patch("codexlens.search.chain_search.NUMPY_AVAILABLE", True): - with patch.object(engine, "_find_start_index", return_value=index_db_path): - with patch.object(engine, "_collect_index_paths", return_value=[index_db_path]): - # Bypass binary vector infrastructure; Stage 1 output is sufficient for Stage 2 behavior. - with patch.object( - engine, - "_stage1_binary_search", - return_value=([base], index_db_path.parent), - ): - result = engine.staged_cascade_search( - query="test", - source_path=project_root, - k=3, - coarse_k=10, - ) - - symbol_names = {r.symbol_name for r in result.results if r.symbol_name} - assert "b" in symbol_names - # With staged_lsp_depth=1, Stage 2 should NOT include 2-hop neighbor "c". - assert "c" not in symbol_names - finally: - engine.close() diff --git a/codex-lens/tests/test_staged_cascade_realtime_lsp.py b/codex-lens/tests/test_staged_cascade_realtime_lsp.py deleted file mode 100644 index 83fb6860..00000000 --- a/codex-lens/tests/test_staged_cascade_realtime_lsp.py +++ /dev/null @@ -1,98 +0,0 @@ -"""Unit tests for staged cascade Stage 2 realtime LSP graph expansion. - -These tests mock out the live LSP components (LspBridge + LspGraphBuilder) -so they can run without external language servers installed. -""" - -from __future__ import annotations - -from pathlib import Path -from unittest.mock import AsyncMock, MagicMock, patch - -from codexlens.config import Config -from codexlens.entities import SearchResult -from codexlens.hybrid_search.data_structures import CodeAssociationGraph, CodeSymbolNode, Range -from codexlens.search.chain_search import ChainSearchEngine -from codexlens.storage.path_mapper import PathMapper -from codexlens.storage.registry import RegistryStore - - -class _DummyBridge: - def __init__(self, *args, **kwargs) -> None: - pass - - async def get_document_symbols(self, file_path: str): - _ = file_path - return [] - - async def __aenter__(self): - return self - - async def __aexit__(self, exc_type, exc, tb) -> None: - return None - - -def test_stage2_realtime_mode_expands_and_combines(tmp_path: Path) -> None: - registry = RegistryStore(db_path=tmp_path / "registry.db") - registry.initialize() - mapper = PathMapper(index_root=tmp_path / "indexes") - - config = Config( - data_dir=tmp_path / "data", - staged_stage2_mode="realtime", - staged_lsp_depth=1, - staged_realtime_lsp_timeout_s=1.0, - staged_realtime_lsp_max_nodes=10, - staged_realtime_lsp_warmup_s=0.0, - ) - - engine = ChainSearchEngine(registry, mapper, config=config) - try: - coarse = [ - SearchResult( - path=str(tmp_path / "a.py"), - score=1.0, - excerpt="def a(): pass", - content="def a():\n pass\n", - symbol_name="a", - symbol_kind="function", - start_line=1, - end_line=2, - ) - ] - - graph = CodeAssociationGraph() - seed_id = f"{coarse[0].path}:a:1" - graph.nodes[seed_id] = CodeSymbolNode( - id=seed_id, - name="a", - kind="function", - file_path=coarse[0].path, - range=Range(start_line=1, start_character=1, end_line=2, end_character=1), - ) - related_id = f"{str(tmp_path / 'b.py')}:b:1" - graph.nodes[related_id] = CodeSymbolNode( - id=related_id, - name="b", - kind="function", - file_path=str(tmp_path / "b.py"), - range=Range(start_line=1, start_character=1, end_line=1, end_character=1), - raw_code="def b():\n return 1\n", - ) - - dummy_builder = MagicMock() - dummy_builder.build_from_seeds = AsyncMock(return_value=graph) - - with patch("codexlens.lsp.LspBridge", _DummyBridge): - with patch("codexlens.lsp.LspGraphBuilder", return_value=dummy_builder) as mock_builder: - # Avoid needing a real index_to_source mapping - engine.mapper.index_to_source = MagicMock(return_value=tmp_path) - expanded = engine._stage2_lsp_expand(coarse, index_root=tmp_path / "fake_index_root") - - assert mock_builder.call_args is not None - assert mock_builder.call_args.kwargs.get("resolve_symbols") is False - names = {r.symbol_name for r in expanded if r.symbol_name} - assert "a" in names - assert "b" in names - finally: - engine.close() diff --git a/codex-lens/tests/test_staged_stage1_fallback_seed.py b/codex-lens/tests/test_staged_stage1_fallback_seed.py deleted file mode 100644 index ff9ea061..00000000 --- a/codex-lens/tests/test_staged_stage1_fallback_seed.py +++ /dev/null @@ -1,49 +0,0 @@ -from __future__ import annotations - -import json -from pathlib import Path -from unittest.mock import MagicMock - -from codexlens.config import Config -from codexlens.entities import SearchResult -from codexlens.search.chain_search import ChainSearchEngine, ChainSearchResult, SearchOptions - - -def _extract_stage_stats(result: ChainSearchResult) -> dict: - for item in result.stats.errors or []: - if isinstance(item, str) and item.startswith("STAGE_STATS:"): - return json.loads(item[len("STAGE_STATS:") :]) - raise AssertionError("missing STAGE_STATS payload") - - -def test_staged_pipeline_seeds_from_fts_when_stage1_empty(monkeypatch) -> None: - cfg = Config.load() - cfg.enable_staged_rerank = False - cfg.staged_stage2_mode = "realtime" # ensure we pass through stage2 wrapper - cfg.staged_clustering_strategy = "score" - - engine = ChainSearchEngine(registry=MagicMock(), mapper=MagicMock(), config=cfg) - - # Avoid touching registry/mapper/index stores. - monkeypatch.setattr(engine, "_find_start_index", lambda *_a, **_k: Path("X:/fake/_index.db")) - monkeypatch.setattr(engine, "_collect_index_paths", lambda *_a, **_k: [Path("X:/fake/_index.db")]) - - # Force Stage 1 to return empty so the FTS seeding path is exercised. - monkeypatch.setattr(engine, "_stage1_binary_search", lambda *_a, **_k: ([], Path("X:/fake"))) - - seed_results = [SearchResult(path="D:/p/a.py", score=1.0), SearchResult(path="D:/p/b.py", score=0.9)] - - # Provide a stable SearchStats instance for the fallback search call. - from codexlens.search.chain_search import SearchStats - - monkeypatch.setattr(engine, "search", lambda *_a, **_k: ChainSearchResult(query="q", results=seed_results, symbols=[], stats=SearchStats())) - - # Make later stages no-ops so we only validate plumbing. - monkeypatch.setattr(engine, "_stage2_lsp_expand", lambda results, *_a, **_k: results) - monkeypatch.setattr(engine, "_stage3_cluster_prune", lambda results, *_a, **_k: results) - - result = engine.staged_cascade_search("q", Path("."), k=2, coarse_k=5, options=SearchOptions()) - stage_stats = _extract_stage_stats(result) - - assert stage_stats["stage_counts"].get("stage1_fallback_used") == 1 - assert result.results and [r.path for r in result.results] == ["D:/p/a.py", "D:/p/b.py"] diff --git a/codex-lens/tests/test_staged_stage3_fast_strategies.py b/codex-lens/tests/test_staged_stage3_fast_strategies.py deleted file mode 100644 index b546a939..00000000 --- a/codex-lens/tests/test_staged_stage3_fast_strategies.py +++ /dev/null @@ -1,56 +0,0 @@ -from __future__ import annotations - -from unittest.mock import MagicMock - -import pytest - -from codexlens.config import Config -from codexlens.entities import SearchResult -from codexlens.search.chain_search import ChainSearchEngine - - -def _engine_with_strategy(name: str) -> ChainSearchEngine: - cfg = Config.load() - cfg.staged_clustering_strategy = name - return ChainSearchEngine(registry=MagicMock(), mapper=MagicMock(), config=cfg) - - -def test_stage3_strategy_score_skips_embedding(monkeypatch: pytest.MonkeyPatch) -> None: - monkeypatch.setattr( - "codexlens.semantic.factory.get_embedder", - lambda *a, **k: (_ for _ in ()).throw(RuntimeError("should not embed")), - ) - - engine = _engine_with_strategy("score") - expanded = [ - SearchResult(path="D:/p/a.py", score=0.9), - SearchResult(path="D:/p/a.py", score=0.1), - SearchResult(path="D:/p/b.py", score=0.8), - SearchResult(path="D:/p/c.py", score=0.7), - ] - - reps = engine._stage3_cluster_prune(expanded, target_count=3) - assert [r.path for r in reps] == ["D:/p/a.py", "D:/p/b.py", "D:/p/c.py"] - - -def test_stage3_strategy_dir_rr_round_robins_dirs(monkeypatch: pytest.MonkeyPatch) -> None: - monkeypatch.setattr( - "codexlens.semantic.factory.get_embedder", - lambda *a, **k: (_ for _ in ()).throw(RuntimeError("should not embed")), - ) - - engine = _engine_with_strategy("dir_rr") - expanded = [ - SearchResult(path="D:/p1/a.py", score=0.99), - SearchResult(path="D:/p1/b.py", score=0.98), - SearchResult(path="D:/p2/c.py", score=0.97), - SearchResult(path="D:/p2/d.py", score=0.96), - SearchResult(path="D:/p3/e.py", score=0.95), - ] - - reps = engine._stage3_cluster_prune(expanded, target_count=4) - assert len(reps) == 4 - assert reps[0].path.endswith("p1/a.py") - assert reps[1].path.endswith("p2/c.py") - assert reps[2].path.endswith("p3/e.py") - diff --git a/codex-lens/tests/test_standalone_lsp_manager_open_document_cache.py b/codex-lens/tests/test_standalone_lsp_manager_open_document_cache.py deleted file mode 100644 index 8af8ad75..00000000 --- a/codex-lens/tests/test_standalone_lsp_manager_open_document_cache.py +++ /dev/null @@ -1,87 +0,0 @@ -from __future__ import annotations - -import asyncio -import time -from pathlib import Path -from types import SimpleNamespace -from unittest.mock import AsyncMock, MagicMock - -import pytest - -from codexlens.lsp.standalone_manager import ServerConfig, ServerState, StandaloneLspManager - - -@pytest.mark.asyncio -async def test_open_document_skips_when_unchanged(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: - target = tmp_path / "a.py" - target.write_text("print('hi')\n", encoding="utf-8") - - manager = StandaloneLspManager(workspace_root=str(tmp_path)) - # Make language detection deterministic. - manager._extension_map["py"] = "python" # type: ignore[attr-defined] - - cfg = ServerConfig( - language_id="python", - display_name="Pyright", - extensions=["py"], - command=["pyright-langserver", "--stdio"], - ) - - # ServerState requires reader/writer/process, but _open_document only uses writer via _send_notification. - dummy_process = SimpleNamespace(returncode=None) - dummy_reader = asyncio.StreamReader() - dummy_writer = MagicMock() - state = ServerState(config=cfg, process=dummy_process, reader=dummy_reader, writer=dummy_writer) - - sent: list[str] = [] - - async def _send_notification(_state, method: str, _params): - sent.append(method) - - monkeypatch.setattr(manager, "_send_notification", _send_notification) - - await manager._open_document(state, str(target)) # type: ignore[attr-defined] - await manager._open_document(state, str(target)) # unchanged: should be skipped - - assert sent.count("textDocument/didOpen") == 1 - assert "textDocument/didChange" not in sent - - -@pytest.mark.asyncio -async def test_open_document_sends_did_change_on_mtime_change(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: - target = tmp_path / "a.py" - target.write_text("print('hi')\n", encoding="utf-8") - - manager = StandaloneLspManager(workspace_root=str(tmp_path)) - manager._extension_map["py"] = "python" # type: ignore[attr-defined] - - cfg = ServerConfig( - language_id="python", - display_name="Pyright", - extensions=["py"], - command=["pyright-langserver", "--stdio"], - ) - - dummy_process = SimpleNamespace(returncode=None) - dummy_reader = asyncio.StreamReader() - dummy_writer = MagicMock() - state = ServerState(config=cfg, process=dummy_process, reader=dummy_reader, writer=dummy_writer) - - sent: list[str] = [] - - async def _send_notification(_state, method: str, _params): - sent.append(method) - - monkeypatch.setattr(manager, "_send_notification", _send_notification) - - await manager._open_document(state, str(target)) # type: ignore[attr-defined] - - # Ensure filesystem mtime changes (Windows can have coarse resolution). - time.sleep(0.02) - target.write_text("print('changed')\n", encoding="utf-8") - - await manager._open_document(state, str(target)) # changed -> didChange - - assert sent.count("textDocument/didOpen") == 1 - assert sent.count("textDocument/didChange") == 1 - diff --git a/codex-lens/tests/test_static_graph_integration.py b/codex-lens/tests/test_static_graph_integration.py deleted file mode 100644 index 2dfb1357..00000000 --- a/codex-lens/tests/test_static_graph_integration.py +++ /dev/null @@ -1,289 +0,0 @@ -"""Tests for static graph relationship writing during index build (T2). - -Verifies that IndexTreeBuilder._build_single_dir and _build_dir_worker -correctly write relationships to GlobalSymbolIndex when -config.static_graph_enabled is True. -""" - -import tempfile -from pathlib import Path -from unittest.mock import MagicMock, patch - -import pytest - -from codexlens.config import Config -from codexlens.entities import ( - CodeRelationship, - IndexedFile, - RelationshipType, - Symbol, -) -from codexlens.storage.global_index import GlobalSymbolIndex - - -@pytest.fixture() -def temp_dir(): - tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True) - yield Path(tmpdir.name) - try: - tmpdir.cleanup() - except (PermissionError, OSError): - pass - - -def _make_indexed_file(file_path: str) -> IndexedFile: - """Create a test IndexedFile with symbols and relationships.""" - return IndexedFile( - path=file_path, - language="python", - symbols=[ - Symbol(name="MyClass", kind="class", range=(1, 20)), - Symbol(name="helper", kind="function", range=(22, 30)), - ], - relationships=[ - CodeRelationship( - source_symbol="MyClass", - target_symbol="BaseClass", - relationship_type=RelationshipType.INHERITS, - source_file=file_path, - target_file="other/base.py", - source_line=1, - ), - CodeRelationship( - source_symbol="MyClass", - target_symbol="os", - relationship_type=RelationshipType.IMPORTS, - source_file=file_path, - source_line=2, - ), - CodeRelationship( - source_symbol="helper", - target_symbol="external_func", - relationship_type=RelationshipType.CALL, - source_file=file_path, - source_line=25, - ), - ], - ) - - -def test_build_single_dir_writes_global_relationships_when_enabled(temp_dir: Path) -> None: - """When static_graph_enabled=True, relationships should be written to global index.""" - from codexlens.storage.index_tree import IndexTreeBuilder - - config = Config( - data_dir=temp_dir / "data", - static_graph_enabled=True, - static_graph_relationship_types=["imports", "inherits"], - global_symbol_index_enabled=True, - ) - - # Set up real GlobalSymbolIndex - global_db_path = temp_dir / "global_symbols.db" - global_index = GlobalSymbolIndex(global_db_path, project_id=1) - global_index.initialize() - - # Create a source file - src_dir = temp_dir / "src" - src_dir.mkdir() - test_file = src_dir / "module.py" - test_file.write_text("class MyClass(BaseClass):\n pass\n", encoding="utf-8") - - indexed_file = _make_indexed_file(str(test_file)) - - # Mock parser to return our test IndexedFile - mock_parser = MagicMock() - mock_parser.parse.return_value = indexed_file - - mock_mapper = MagicMock() - mock_mapper.source_to_index_db.return_value = temp_dir / "index" / "_index.db" - - mock_registry = MagicMock() - - builder = IndexTreeBuilder(mock_registry, mock_mapper, config=config, incremental=False) - builder.parser_factory = MagicMock() - builder.parser_factory.get_parser.return_value = mock_parser - - result = builder._build_single_dir( - src_dir, - languages=None, - project_id=1, - global_index_db_path=global_db_path, - ) - - assert result.error is None - assert result.files_count == 1 - - # Verify relationships were written to global index - # Only IMPORTS and INHERITS should be written (not CALL) - rels = global_index.query_by_target("BaseClass", prefix_mode=True) - rels += global_index.query_by_target("os", prefix_mode=True) - assert len(rels) >= 1, "Expected at least 1 relationship written to global index" - - # CALL relationship for external_func should NOT be present - call_rels = global_index.query_by_target("external_func", prefix_mode=True) - assert len(call_rels) == 0, "CALL relationships should not be written" - - global_index.close() - - -def test_build_single_dir_skips_relationships_when_disabled(temp_dir: Path) -> None: - """When static_graph_enabled=False, no relationships should be written.""" - from codexlens.storage.index_tree import IndexTreeBuilder - - config = Config( - data_dir=temp_dir / "data", - static_graph_enabled=False, - global_symbol_index_enabled=True, - ) - - global_db_path = temp_dir / "global_symbols.db" - global_index = GlobalSymbolIndex(global_db_path, project_id=1) - global_index.initialize() - - src_dir = temp_dir / "src" - src_dir.mkdir() - test_file = src_dir / "module.py" - test_file.write_text("import os\n", encoding="utf-8") - - indexed_file = _make_indexed_file(str(test_file)) - - mock_parser = MagicMock() - mock_parser.parse.return_value = indexed_file - - mock_mapper = MagicMock() - mock_mapper.source_to_index_db.return_value = temp_dir / "index" / "_index.db" - - mock_registry = MagicMock() - - builder = IndexTreeBuilder(mock_registry, mock_mapper, config=config, incremental=False) - builder.parser_factory = MagicMock() - builder.parser_factory.get_parser.return_value = mock_parser - - result = builder._build_single_dir( - src_dir, - languages=None, - project_id=1, - global_index_db_path=global_db_path, - ) - - assert result.error is None - - # No relationships should be in global index - conn = global_index._get_connection() - count = conn.execute("SELECT COUNT(*) FROM global_relationships").fetchone()[0] - assert count == 0, "No relationships should be written when static_graph_enabled=False" - - global_index.close() - - -def test_relationship_write_failure_does_not_block_indexing(temp_dir: Path) -> None: - """If global_index.update_file_relationships raises, file indexing continues.""" - from codexlens.storage.index_tree import IndexTreeBuilder - - config = Config( - data_dir=temp_dir / "data", - static_graph_enabled=True, - static_graph_relationship_types=["imports", "inherits"], - global_symbol_index_enabled=True, - ) - - src_dir = temp_dir / "src" - src_dir.mkdir() - test_file = src_dir / "module.py" - test_file.write_text("import os\n", encoding="utf-8") - - indexed_file = _make_indexed_file(str(test_file)) - - mock_parser = MagicMock() - mock_parser.parse.return_value = indexed_file - - mock_mapper = MagicMock() - mock_mapper.source_to_index_db.return_value = temp_dir / "index" / "_index.db" - - mock_registry = MagicMock() - - # Create a mock GlobalSymbolIndex that fails on update_file_relationships - mock_global_db_path = temp_dir / "global_symbols.db" - - builder = IndexTreeBuilder(mock_registry, mock_mapper, config=config, incremental=False) - builder.parser_factory = MagicMock() - builder.parser_factory.get_parser.return_value = mock_parser - - # Patch GlobalSymbolIndex so update_file_relationships raises - with patch("codexlens.storage.index_tree.GlobalSymbolIndex") as MockGSI: - mock_gsi_instance = MagicMock() - mock_gsi_instance.update_file_relationships.side_effect = RuntimeError("DB locked") - MockGSI.return_value = mock_gsi_instance - - result = builder._build_single_dir( - src_dir, - languages=None, - project_id=1, - global_index_db_path=mock_global_db_path, - ) - - # File should still be indexed despite relationship write failure - assert result.error is None - assert result.files_count == 1 - - -def test_only_configured_relationship_types_written(temp_dir: Path) -> None: - """Only relationship types in static_graph_relationship_types should be written.""" - from codexlens.storage.index_tree import IndexTreeBuilder - - # Only allow 'imports' (not 'inherits') - config = Config( - data_dir=temp_dir / "data", - static_graph_enabled=True, - static_graph_relationship_types=["imports"], - global_symbol_index_enabled=True, - ) - - global_db_path = temp_dir / "global_symbols.db" - global_index = GlobalSymbolIndex(global_db_path, project_id=1) - global_index.initialize() - - src_dir = temp_dir / "src" - src_dir.mkdir() - test_file = src_dir / "module.py" - test_file.write_text("import os\nclass Foo(Bar): pass\n", encoding="utf-8") - - indexed_file = _make_indexed_file(str(test_file)) - - mock_parser = MagicMock() - mock_parser.parse.return_value = indexed_file - - mock_mapper = MagicMock() - mock_mapper.source_to_index_db.return_value = temp_dir / "index" / "_index.db" - - mock_registry = MagicMock() - - builder = IndexTreeBuilder(mock_registry, mock_mapper, config=config, incremental=False) - builder.parser_factory = MagicMock() - builder.parser_factory.get_parser.return_value = mock_parser - - result = builder._build_single_dir( - src_dir, - languages=None, - project_id=1, - global_index_db_path=global_db_path, - ) - - assert result.error is None - - # Only IMPORTS should be written - conn = global_index._get_connection() - rows = conn.execute( - "SELECT relationship_type FROM global_relationships" - ).fetchall() - - rel_types = {row[0] for row in rows} - assert "imports" in rel_types or len(rows) == 0 or rel_types == {"imports"}, \ - f"Expected only 'imports', got {rel_types}" - # INHERITS should NOT be present - assert "inherits" not in rel_types, "inherits should not be written when not in config" - # CALL should NOT be present - assert "calls" not in rel_types, "calls should not be written" - - global_index.close() diff --git a/codex-lens/tests/test_storage.py b/codex-lens/tests/test_storage.py deleted file mode 100644 index 2e07ceac..00000000 --- a/codex-lens/tests/test_storage.py +++ /dev/null @@ -1,534 +0,0 @@ -"""Tests for CodexLens storage.""" - -import sqlite3 -import threading -import pytest -import tempfile -from pathlib import Path - -from codexlens.storage.sqlite_store import SQLiteStore -from codexlens.entities import IndexedFile, Symbol -from codexlens.errors import StorageError - - -@pytest.fixture -def temp_db(): - """Create a temporary database for testing.""" - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "test.db" - store = SQLiteStore(db_path) - store.initialize() - yield store - store.close() - - -@pytest.fixture -def temp_db_path(): - """Create a temporary directory and return db path.""" - with tempfile.TemporaryDirectory() as tmpdir: - yield Path(tmpdir) / "test.db" - - -class TestSQLiteStore: - """Tests for SQLiteStore.""" - - def test_initialize(self, temp_db): - """Test database initialization.""" - stats = temp_db.stats() - assert stats["files"] == 0 - assert stats["symbols"] == 0 - - def test_fts_uses_external_content(self, temp_db): - """FTS should be configured as external-content to avoid duplication.""" - conn = temp_db._get_connection() - row = conn.execute( - "SELECT sql FROM sqlite_master WHERE type='table' AND name='files_fts'" - ).fetchone() - assert row is not None - assert "content='files'" in row["sql"] or "content=files" in row["sql"] - - def test_add_file(self, temp_db): - """Test adding a file to the index.""" - indexed_file = IndexedFile( - path="/test/file.py", - language="python", - symbols=[ - Symbol(name="hello", kind="function", range=(1, 1)), - ], - chunks=[], - ) - temp_db.add_file(indexed_file, "def hello():\n pass") - - stats = temp_db.stats() - assert stats["files"] == 1 - assert stats["symbols"] == 1 - - def test_remove_file(self, temp_db): - """Test removing a file from the index.""" - indexed_file = IndexedFile( - path="/test/file.py", - language="python", - symbols=[], - chunks=[], - ) - temp_db.add_file(indexed_file, "# test") - - assert temp_db.file_exists("/test/file.py") - assert temp_db.remove_file("/test/file.py") - assert not temp_db.file_exists("/test/file.py") - - def test_search_fts(self, temp_db): - """Test FTS search.""" - indexed_file = IndexedFile( - path="/test/file.py", - language="python", - symbols=[], - chunks=[], - ) - temp_db.add_file(indexed_file, "def hello_world():\n print('hello')") - - results = temp_db.search_fts("hello") - assert len(results) == 1 - assert str(Path("/test/file.py").resolve()) == results[0].path - - def test_search_symbols(self, temp_db): - """Test symbol search.""" - indexed_file = IndexedFile( - path="/test/file.py", - language="python", - symbols=[ - Symbol(name="hello_world", kind="function", range=(1, 1)), - Symbol(name="goodbye", kind="function", range=(3, 3)), - ], - chunks=[], - ) - temp_db.add_file(indexed_file, "def hello_world():\n pass\ndef goodbye():\n pass") - - results = temp_db.search_symbols("hello") - assert len(results) == 1 - assert results[0].name == "hello_world" - - def test_connection_reuse(self, temp_db): - """Test that connections are reused within the same thread.""" - conn1 = temp_db._get_connection() - conn2 = temp_db._get_connection() - assert conn1 is conn2 - - def test_migrate_legacy_fts_to_external(self, tmp_path): - """Existing databases should be migrated to external-content FTS.""" - db_path = tmp_path / "legacy.db" - with sqlite3.connect(db_path) as conn: - conn.execute( - """ - CREATE TABLE files ( - id INTEGER PRIMARY KEY, - path TEXT UNIQUE NOT NULL, - language TEXT NOT NULL, - content TEXT NOT NULL, - mtime REAL, - line_count INTEGER - ) - """ - ) - conn.execute( - """ - CREATE VIRTUAL TABLE files_fts USING fts5( - path UNINDEXED, - language UNINDEXED, - content - ) - """ - ) - conn.execute( - """ - INSERT INTO files(path, language, content, mtime, line_count) - VALUES(?, ?, ?, ?, ?) - """, - (str(Path("/test/file.py").resolve()), "python", "def hello():\n pass", None, 2), - ) - file_id = conn.execute("SELECT id FROM files").fetchone()[0] - conn.execute( - "INSERT INTO files_fts(rowid, path, language, content) VALUES(?, ?, ?, ?)", - (file_id, str(Path("/test/file.py").resolve()), "python", "def hello():\n pass"), - ) - conn.commit() - - store = SQLiteStore(db_path) - store.initialize() - try: - results = store.search_fts("hello") - assert len(results) == 1 - - conn = store._get_connection() - row = conn.execute( - "SELECT sql FROM sqlite_master WHERE type='table' AND name='files_fts'" - ).fetchone() - assert row is not None - assert "content='files'" in row["sql"] or "content=files" in row["sql"] - finally: - store.close() - - -class TestSQLiteStoreAddFiles: - """Tests for add_files batch operation.""" - - def test_add_files_batch(self, temp_db): - """Test adding multiple files in a batch.""" - files_data = [ - (IndexedFile( - path="/test/a.py", - language="python", - symbols=[Symbol(name="func_a", kind="function", range=(1, 1))], - ), "def func_a(): pass"), - (IndexedFile( - path="/test/b.py", - language="python", - symbols=[Symbol(name="func_b", kind="function", range=(1, 1))], - ), "def func_b(): pass"), - (IndexedFile( - path="/test/c.py", - language="python", - symbols=[Symbol(name="func_c", kind="function", range=(1, 1))], - ), "def func_c(): pass"), - ] - - temp_db.add_files(files_data) - - stats = temp_db.stats() - assert stats["files"] == 3 - assert stats["symbols"] == 3 - - def test_add_files_empty_list(self, temp_db): - """Test adding empty list of files.""" - temp_db.add_files([]) - stats = temp_db.stats() - assert stats["files"] == 0 - - -class TestSQLiteStoreSearch: - """Tests for search operations.""" - - def test_search_fts_with_limit(self, temp_db): - """Test FTS search with limit.""" - for i in range(10): - indexed_file = IndexedFile( - path=f"/test/file{i}.py", - language="python", - symbols=[], - ) - temp_db.add_file(indexed_file, f"def test{i}(): pass") - - results = temp_db.search_fts("test", limit=3) - assert len(results) <= 3 - - def test_search_fts_with_offset(self, temp_db): - """Test FTS search with offset.""" - for i in range(10): - indexed_file = IndexedFile( - path=f"/test/file{i}.py", - language="python", - symbols=[], - ) - temp_db.add_file(indexed_file, f"searchterm content {i}") - - results_page1 = temp_db.search_fts("searchterm", limit=3, offset=0) - results_page2 = temp_db.search_fts("searchterm", limit=3, offset=3) - - # Pages should be different - paths1 = {r.path for r in results_page1} - paths2 = {r.path for r in results_page2} - assert paths1.isdisjoint(paths2) - - def test_search_fts_no_results(self, temp_db): - """Test FTS search with no results.""" - indexed_file = IndexedFile( - path="/test/file.py", - language="python", - symbols=[], - ) - temp_db.add_file(indexed_file, "def hello(): pass") - - results = temp_db.search_fts("nonexistent") - assert len(results) == 0 - - def test_search_symbols_by_kind(self, temp_db): - """Test symbol search filtered by kind.""" - indexed_file = IndexedFile( - path="/test/file.py", - language="python", - symbols=[ - Symbol(name="MyClass", kind="class", range=(1, 5)), - Symbol(name="my_func", kind="function", range=(7, 10)), - Symbol(name="my_method", kind="method", range=(2, 4)), - ], - ) - temp_db.add_file(indexed_file, "class MyClass:\n def my_method(): pass\ndef my_func(): pass") - - # Search for functions only - results = temp_db.search_symbols("my", kind="function") - assert len(results) == 1 - assert results[0].name == "my_func" - - def test_search_symbols_with_limit(self, temp_db): - """Test symbol search with limit.""" - # Range starts from 1, not 0 - symbols = [Symbol(name=f"func{i}", kind="function", range=(i+1, i+1)) for i in range(20)] - indexed_file = IndexedFile( - path="/test/file.py", - language="python", - symbols=symbols, - ) - temp_db.add_file(indexed_file, "# lots of functions") - - results = temp_db.search_symbols("func", limit=5) - assert len(results) == 5 - - def test_search_files_only(self, temp_db): - """Test search_files_only returns only paths.""" - indexed_file = IndexedFile( - path="/test/file.py", - language="python", - symbols=[], - ) - temp_db.add_file(indexed_file, "def hello(): pass") - - results = temp_db.search_files_only("hello") - assert len(results) == 1 - assert isinstance(results[0], str) - - -class TestSQLiteStoreFileOperations: - """Tests for file operations.""" - - def test_file_exists_true(self, temp_db): - """Test file_exists returns True for existing file.""" - indexed_file = IndexedFile( - path="/test/file.py", - language="python", - symbols=[], - ) - temp_db.add_file(indexed_file, "content") - - assert temp_db.file_exists("/test/file.py") - - def test_file_exists_false(self, temp_db): - """Test file_exists returns False for non-existing file.""" - assert not temp_db.file_exists("/nonexistent/file.py") - - def test_remove_nonexistent_file(self, temp_db): - """Test removing non-existent file returns False.""" - result = temp_db.remove_file("/nonexistent/file.py") - assert result is False - - def test_get_file_mtime(self, temp_db): - """Test getting file mtime.""" - indexed_file = IndexedFile( - path="/test/file.py", - language="python", - symbols=[], - ) - temp_db.add_file(indexed_file, "content") - - # Note: mtime is only set if the file actually exists on disk - mtime = temp_db.get_file_mtime("/test/file.py") - # May be None if file doesn't exist on disk - assert mtime is None or isinstance(mtime, float) - - def test_get_file_mtime_nonexistent(self, temp_db): - """Test getting mtime for non-indexed file.""" - mtime = temp_db.get_file_mtime("/nonexistent/file.py") - assert mtime is None - - def test_update_existing_file(self, temp_db): - """Test updating an existing file.""" - indexed_file = IndexedFile( - path="/test/file.py", - language="python", - symbols=[Symbol(name="old_func", kind="function", range=(1, 1))], - ) - temp_db.add_file(indexed_file, "def old_func(): pass") - - # Update with new content and symbols - updated_file = IndexedFile( - path="/test/file.py", - language="python", - symbols=[Symbol(name="new_func", kind="function", range=(1, 1))], - ) - temp_db.add_file(updated_file, "def new_func(): pass") - - stats = temp_db.stats() - assert stats["files"] == 1 # Still one file - assert stats["symbols"] == 1 # Old symbols replaced - - symbols = temp_db.search_symbols("new_func") - assert len(symbols) == 1 - - -class TestSQLiteStoreStats: - """Tests for stats operation.""" - - def test_stats_empty_db(self, temp_db): - """Test stats on empty database.""" - stats = temp_db.stats() - assert stats["files"] == 0 - assert stats["symbols"] == 0 - assert stats["languages"] == {} - - def test_stats_with_data(self, temp_db): - """Test stats with data.""" - files = [ - (IndexedFile(path="/test/a.py", language="python", symbols=[ - Symbol(name="func1", kind="function", range=(1, 1)), - Symbol(name="func2", kind="function", range=(2, 2)), - ]), "content"), - (IndexedFile(path="/test/b.js", language="javascript", symbols=[ - Symbol(name="func3", kind="function", range=(1, 1)), - ]), "content"), - ] - temp_db.add_files(files) - - stats = temp_db.stats() - assert stats["files"] == 2 - assert stats["symbols"] == 3 - assert stats["languages"]["python"] == 1 - assert stats["languages"]["javascript"] == 1 - assert "db_path" in stats - - -class TestSQLiteStoreContextManager: - """Tests for context manager usage.""" - - def test_context_manager(self, temp_db_path): - """Test using SQLiteStore as context manager.""" - with SQLiteStore(temp_db_path) as store: - indexed_file = IndexedFile( - path="/test/file.py", - language="python", - symbols=[], - ) - store.add_file(indexed_file, "content") - stats = store.stats() - assert stats["files"] == 1 - - -class TestSQLiteStoreThreadSafety: - """Tests for thread safety.""" - - def test_multiple_threads_read(self, temp_db): - """Test reading from multiple threads.""" - # Add some data first - indexed_file = IndexedFile( - path="/test/file.py", - language="python", - symbols=[Symbol(name="test", kind="function", range=(1, 1))], - ) - temp_db.add_file(indexed_file, "def test(): pass") - - results = [] - errors = [] - - def read_data(): - try: - stats = temp_db.stats() - results.append(stats) - except Exception as e: - errors.append(e) - - threads = [threading.Thread(target=read_data) for _ in range(5)] - for t in threads: - t.start() - for t in threads: - t.join() - - assert len(errors) == 0 - assert len(results) == 5 - for stats in results: - assert stats["files"] == 1 - - -class TestSQLiteStoreEdgeCases: - """Edge case tests for SQLiteStore.""" - - def test_special_characters_in_path(self, temp_db): - """Test file path with special characters.""" - indexed_file = IndexedFile( - path="/test/file with spaces.py", - language="python", - symbols=[], - ) - temp_db.add_file(indexed_file, "content") - - assert temp_db.file_exists("/test/file with spaces.py") - - def test_unicode_content(self, temp_db): - """Test file with unicode content.""" - indexed_file = IndexedFile( - path="/test/file.py", - language="python", - symbols=[Symbol(name="你好", kind="function", range=(1, 1))], - ) - temp_db.add_file(indexed_file, "def 你好(): print('世界')") - - symbols = temp_db.search_symbols("你好") - assert len(symbols) == 1 - - def test_very_long_content(self, temp_db): - """Test file with very long content.""" - long_content = "x = 1\n" * 10000 - indexed_file = IndexedFile( - path="/test/file.py", - language="python", - symbols=[], - ) - temp_db.add_file(indexed_file, long_content) - - stats = temp_db.stats() - assert stats["files"] == 1 - - def test_file_with_no_symbols(self, temp_db): - """Test file with no symbols.""" - indexed_file = IndexedFile( - path="/test/file.py", - language="python", - symbols=[], - ) - temp_db.add_file(indexed_file, "# just a comment") - - stats = temp_db.stats() - assert stats["files"] == 1 - assert stats["symbols"] == 0 - - def test_file_with_many_symbols(self, temp_db): - """Test file with many symbols.""" - # Range starts from 1, not 0 - symbols = [Symbol(name=f"func_{i}", kind="function", range=(i+1, i+1)) for i in range(100)] - indexed_file = IndexedFile( - path="/test/file.py", - language="python", - symbols=symbols, - ) - temp_db.add_file(indexed_file, "# lots of functions") - - stats = temp_db.stats() - assert stats["symbols"] == 100 - - def test_close_and_reopen(self, temp_db_path): - """Test closing and reopening database.""" - # First session - store1 = SQLiteStore(temp_db_path) - store1.initialize() - indexed_file = IndexedFile( - path="/test/file.py", - language="python", - symbols=[Symbol(name="test", kind="function", range=(1, 1))], - ) - store1.add_file(indexed_file, "def test(): pass") - store1.close() - - # Second session - store2 = SQLiteStore(temp_db_path) - store2.initialize() - stats = store2.stats() - assert stats["files"] == 1 - assert stats["symbols"] == 1 - store2.close() diff --git a/codex-lens/tests/test_storage_concurrency.py b/codex-lens/tests/test_storage_concurrency.py deleted file mode 100644 index 8a7b35f3..00000000 --- a/codex-lens/tests/test_storage_concurrency.py +++ /dev/null @@ -1,698 +0,0 @@ -"""Concurrency tests for CodexLens storage managers.""" - -from __future__ import annotations - -import threading -import time -import tempfile -from pathlib import Path - -import pytest - -from codexlens.entities import IndexedFile, Symbol -from codexlens.storage.dir_index import DirIndexStore -from codexlens.storage.registry import RegistryStore -from codexlens.storage.sqlite_store import SQLiteStore - - -@pytest.fixture(scope="module") -def populated_store(): - """Create a SQLiteStore populated with 1000+ files across multiple directories.""" - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "concurrency.db" - store = SQLiteStore(db_path) - store.initialize() - - files = [] - for i in range(1000): - path = f"/test/dir_{i % 25}/file_{i}.py" - content = f"# token_{i}\n\ndef func_{i}():\n return {i}\n" - symbols = [Symbol(name=f"func_{i}", kind="function", range=(1, 1))] - files.append((IndexedFile(path=path, language="python", symbols=symbols), content)) - - store.add_files(files) - yield store - store.close() - - -@pytest.fixture() -def registry_store(tmp_path): - """Create a RegistryStore in a temporary database with a single registered project.""" - db_path = tmp_path / "registry.db" - store = RegistryStore(db_path) - store.initialize() - store.register_project(source_root=tmp_path / "src", index_root=tmp_path / "idx") - yield store - store.close() - - -@pytest.fixture() -def dir_index_store(tmp_path): - """Create a DirIndexStore for concurrency tests.""" - db_path = tmp_path / "_index.db" - store = DirIndexStore(db_path) - store.initialize() - - # Seed a few entries for read tests - for i in range(10): - store.add_file( - name=f"file_{i}.py", - full_path=tmp_path / f"file_{i}.py", - content=f"# dir-index token_{i}\nprint({i})\n", - language="python", - symbols=[Symbol(name=f"sym_{i}", kind="function", range=(1, 1))], - ) - - yield store - store.close() - - -@pytest.fixture() -def writable_store(tmp_path): - """Create a fresh SQLiteStore for concurrent write tests.""" - db_path = tmp_path / "writes.db" - store = SQLiteStore(db_path) - store.initialize() - yield store - store.close() - - -class TestConcurrentReads: - """Concurrent read tests for storage managers.""" - - def test_concurrent_stats_same_query_consistent(self, populated_store): - """Concurrent reads from 10 threads accessing the same stats query.""" - results = [] - errors = [] - lock = threading.Lock() - - def worker(): - try: - stats = populated_store.stats() - with lock: - results.append(stats) - except Exception as exc: - with lock: - errors.append(exc) - - threads = [threading.Thread(target=worker) for _ in range(10)] - for t in threads: - t.start() - for t in threads: - t.join() - - assert not errors - assert len(results) == 10 - assert all(r["files"] == 1000 for r in results) - assert all(r["symbols"] == 1000 for r in results) - - def test_concurrent_file_exists_same_file(self, populated_store): - """Concurrent reads from 10 threads checking the same file path.""" - target = "/test/dir_0/file_0.py" - results = [] - errors = [] - lock = threading.Lock() - - def worker(): - try: - ok = populated_store.file_exists(target) - with lock: - results.append(ok) - except Exception as exc: - with lock: - errors.append(exc) - - threads = [threading.Thread(target=worker) for _ in range(10)] - for t in threads: - t.start() - for t in threads: - t.join() - - assert not errors - assert results == [True] * 10 - - def test_concurrent_search_fts_same_token_consistent(self, populated_store): - """Concurrent reads from 10 threads searching the same FTS token.""" - results = [] - errors = [] - lock = threading.Lock() - - def worker(): - try: - matches = populated_store.search_fts("token_42") - with lock: - results.append(len(matches)) - except Exception as exc: - with lock: - errors.append(exc) - - threads = [threading.Thread(target=worker) for _ in range(10)] - for t in threads: - t.start() - for t in threads: - t.join() - - assert not errors - assert results == [1] * 10 - - def test_concurrent_search_fts_different_tokens(self, populated_store): - """Concurrent reads from 20 threads searching different tokens.""" - results = {} - errors = [] - lock = threading.Lock() - - def worker(i: int): - try: - matches = populated_store.search_fts(f"token_{i}") - with lock: - results[i] = len(matches) - except Exception as exc: - with lock: - errors.append(exc) - - threads = [threading.Thread(target=worker, args=(i,)) for i in range(20)] - for t in threads: - t.start() - for t in threads: - t.join() - - assert not errors - assert len(results) == 20 - assert all(results[i] == 1 for i in range(20)) - - def test_connection_pool_thread_local_isolation(self, populated_store): - """Each thread should get a dedicated connection object.""" - conn_ids = [] - errors = [] - lock = threading.Lock() - - def worker(): - try: - conn = populated_store._get_connection() - with lock: - conn_ids.append(id(conn)) - except Exception as exc: - with lock: - errors.append(exc) - - threads = [threading.Thread(target=worker) for _ in range(10)] - for t in threads: - t.start() - for t in threads: - t.join() - - assert not errors - assert len(set(conn_ids)) == len(conn_ids) - - def test_connection_reuse_within_thread(self, populated_store): - """Connections should be reused within the same thread.""" - errors = [] - - def worker(): - try: - c1 = populated_store._get_connection() - c2 = populated_store._get_connection() - assert c1 is c2 - except Exception as exc: - errors.append(exc) - - threads = [threading.Thread(target=worker) for _ in range(10)] - for t in threads: - t.start() - for t in threads: - t.join() - - assert not errors - - def test_pool_cleanup_removes_dead_thread_connections(self, populated_store): - """cleanup_stale_connections should remove connections for terminated threads.""" - created = [] - lock = threading.Lock() - current_tid = threading.get_ident() - - def worker(): - conn = populated_store._get_connection() - with lock: - created.append(threading.get_ident()) - # allow the thread to end quickly - - threads = [threading.Thread(target=worker) for _ in range(15)] - for t in threads: - t.start() - for t in threads: - t.join() - - # Ensure pool has entries for the threads we created - assert len(populated_store._pool) >= 10 - - populated_store._cleanup_stale_connections() - # Main thread connection may remain active; all terminated thread connections must be removed. - assert all(tid not in populated_store._pool for tid in created) - assert set(populated_store._pool.keys()).issubset({current_tid}) - - def test_pool_size_respects_max_after_sequential_load(self, populated_store): - """Pool should stay within MAX_POOL_SIZE once stale threads are cleaned up.""" - max_pool_size = populated_store.MAX_POOL_SIZE - - def make_thread(): - def worker(): - populated_store._get_connection() - - t = threading.Thread(target=worker) - t.start() - t.join() - - # Create more than MAX_POOL_SIZE thread connections sequentially. - for _ in range(max_pool_size + 8): - make_thread() - - populated_store._cleanup_stale_connections() - assert len(populated_store._pool) <= max_pool_size - - def test_read_throughput_measurement(self, populated_store): - """Measure simple read throughput scaling by thread count.""" - target_paths = [f"/test/dir_{i % 25}/file_{i}.py" for i in range(200)] - - def run(thread_count: int) -> float: - per_thread = 200 - errors = [] - - def worker(offset: int): - try: - for j in range(per_thread): - populated_store.file_exists(target_paths[(offset + j) % len(target_paths)]) - except Exception as exc: - errors.append(exc) - - threads = [threading.Thread(target=worker, args=(i,)) for i in range(thread_count)] - start = time.time() - for t in threads: - t.start() - for t in threads: - t.join() - duration = max(time.time() - start, 1e-6) - - assert not errors - total_ops = thread_count * per_thread - return total_ops / duration - - qps_1 = run(1) - qps_5 = run(5) - qps_10 = run(10) - qps_20 = run(20) - - # Sanity: throughput is measurable (no zeros). Do not assert strict scaling - # due to platform/GIL variability. - assert qps_1 > 0 - assert qps_5 > 0 - assert qps_10 > 0 - assert qps_20 > 0 - - def test_registry_store_concurrent_list_projects(self, registry_store): - """RegistryStore should support concurrent read access across threads.""" - results = [] - errors = [] - lock = threading.Lock() - - def worker(): - try: - projects = registry_store.list_projects() - with lock: - results.append(len(projects)) - except Exception as exc: - with lock: - errors.append(exc) - - threads = [threading.Thread(target=worker) for _ in range(10)] - for t in threads: - t.start() - for t in threads: - t.join() - - assert not errors - assert results == [1] * 10 - - def test_dir_index_store_concurrent_list_files(self, dir_index_store): - """DirIndexStore should support concurrent read listing via its internal lock.""" - results = [] - errors = [] - lock = threading.Lock() - - def worker(): - try: - files = dir_index_store.list_files() - with lock: - results.append(len(files)) - except Exception as exc: - with lock: - errors.append(exc) - - threads = [threading.Thread(target=worker) for _ in range(10)] - for t in threads: - t.start() - for t in threads: - t.join() - - assert not errors - assert results == [10] * 10 - - -class TestConcurrentWrites: - """Concurrent write tests for SQLiteStore.""" - - def test_concurrent_inserts_commit_all_rows(self, writable_store): - """Concurrent inserts from 10 threads should commit all rows.""" - thread_count = 10 - files_per_thread = 10 - errors = [] - lock = threading.Lock() - - def worker(thread_index: int): - try: - for i in range(files_per_thread): - path = f"/write/thread_{thread_index}/file_{i}.py" - indexed_file = IndexedFile( - path=path, - language="python", - symbols=[Symbol(name=f"sym_{thread_index}_{i}", kind="function", range=(1, 1))], - ) - content = f"# write_token_{thread_index}_{i}\nprint({i})\n" - writable_store.add_file(indexed_file, content) - except Exception as exc: - with lock: - errors.append(exc) - - threads = [threading.Thread(target=worker, args=(i,)) for i in range(thread_count)] - for t in threads: - t.start() - for t in threads: - t.join() - - assert not errors - stats = writable_store.stats() - assert stats["files"] == thread_count * files_per_thread - assert stats["symbols"] == thread_count * files_per_thread - - def test_concurrent_updates_same_file_serializes(self, writable_store): - """Concurrent updates to the same file should serialize and not lose writes.""" - target_path = "/write/shared.py" - base = IndexedFile( - path=target_path, - language="python", - symbols=[Symbol(name="base", kind="function", range=(1, 1))], - ) - writable_store.add_file(base, "print('base')\n") - - update_contents = [] - errors = [] - lock = threading.Lock() - - def worker(version: int): - try: - content = f"print('v{version}')\n" - indexed_file = IndexedFile( - path=target_path, - language="python", - symbols=[Symbol(name=f"v{version}", kind="function", range=(1, 1))], - ) - writable_store.add_file(indexed_file, content) - with lock: - update_contents.append(content) - except Exception as exc: - with lock: - errors.append(exc) - - threads = [threading.Thread(target=worker, args=(i,)) for i in range(5)] - for t in threads: - t.start() - for t in threads: - t.join() - - assert not errors - - resolved = str(Path(target_path).resolve()) - rows = writable_store.execute_query("SELECT content FROM files WHERE path=?", (resolved,)) - assert len(rows) == 1 - assert rows[0]["content"] in set(update_contents) - - def test_wal_mode_is_active_for_thread_connections(self, writable_store): - """PRAGMA journal_mode should be WAL for all thread-local connections.""" - modes = [] - errors = [] - lock = threading.Lock() - - def worker(): - try: - conn = writable_store._get_connection() - mode = conn.execute("PRAGMA journal_mode").fetchone()[0] - with lock: - modes.append(str(mode).lower()) - except Exception as exc: - with lock: - errors.append(exc) - - threads = [threading.Thread(target=worker) for _ in range(10)] - for t in threads: - t.start() - for t in threads: - t.join() - - assert not errors - assert modes - assert all(mode == "wal" for mode in modes) - - def test_transaction_isolation_reader_sees_committed_state(self, writable_store): - """Readers should not see uncommitted writer updates and should not block.""" - target_path = "/write/isolation.py" - indexed_file = IndexedFile(path=target_path, language="python", symbols=[]) - writable_store.add_file(indexed_file, "print('original')\n") - resolved = str(Path(target_path).resolve()) - - writer_started = threading.Event() - reader_done = threading.Event() - errors = [] - lock = threading.Lock() - observed = {"reader": None} - updated_content = "print('updated')\n" - - def writer(): - try: - conn = writable_store._get_connection() - conn.execute("BEGIN IMMEDIATE") - conn.execute( - "UPDATE files SET content=? WHERE path=?", - (updated_content, resolved), - ) - writer_started.set() - reader_done.wait(timeout=5) - conn.commit() - except Exception as exc: - with lock: - errors.append(exc) - - def reader(): - try: - writer_started.wait(timeout=5) - conn = writable_store._get_connection() - row = conn.execute("SELECT content FROM files WHERE path=?", (resolved,)).fetchone() - observed["reader"] = row[0] if row else None - reader_done.set() - except Exception as exc: - with lock: - errors.append(exc) - - threads = [threading.Thread(target=writer), threading.Thread(target=reader)] - for t in threads: - t.start() - for t in threads: - t.join() - - assert not errors - assert observed["reader"] == "print('original')\n" - - rows = writable_store.execute_query("SELECT content FROM files WHERE path=?", (resolved,)) - assert rows[0]["content"] == updated_content - - def test_batch_insert_performance_and_counts(self, writable_store): - """Batch inserts across threads should not lose rows.""" - thread_count = 10 - files_per_thread = 100 - errors = [] - lock = threading.Lock() - - def worker(thread_index: int): - try: - files = [] - for i in range(files_per_thread): - path = f"/write/batch_{thread_index}/file_{i}.py" - indexed_file = IndexedFile( - path=path, - language="python", - symbols=[ - Symbol(name=f"sym_{thread_index}_{i}", kind="function", range=(1, 1)) - ], - ) - content = f"# batch_token_{thread_index}_{i}\nprint({i})\n" - files.append((indexed_file, content)) - - writable_store.add_files(files) - except Exception as exc: - with lock: - errors.append(exc) - - start = time.time() - threads = [threading.Thread(target=worker, args=(i,)) for i in range(thread_count)] - for t in threads: - t.start() - for t in threads: - t.join() - duration = max(time.time() - start, 1e-6) - - assert not errors - stats = writable_store.stats() - assert stats["files"] == thread_count * files_per_thread - assert stats["symbols"] == thread_count * files_per_thread - assert (thread_count * files_per_thread) / duration > 0 - - def test_mixed_read_write_operations_no_errors(self, writable_store): - """Mixed reader and writer threads should complete without exceptions.""" - writer_threads = 5 - reader_threads = 10 - writes_per_writer = 20 - reads_per_reader = 50 - - errors = [] - lock = threading.Lock() - target_paths = [ - f"/write/mixed_{w}/file_{i}.py" - for w in range(writer_threads) - for i in range(writes_per_writer) - ] - - def writer(worker_index: int): - try: - for i in range(writes_per_writer): - path = f"/write/mixed_{worker_index}/file_{i}.py" - indexed_file = IndexedFile(path=path, language="python", symbols=[]) - writable_store.add_file(indexed_file, f"# mixed\nprint({i})\n") - except Exception as exc: - with lock: - errors.append(exc) - - def reader(worker_index: int): - try: - for i in range(reads_per_reader): - path = target_paths[(worker_index + i) % len(target_paths)] - writable_store.file_exists(path) - except Exception as exc: - with lock: - errors.append(exc) - - threads = [ - *[threading.Thread(target=writer, args=(i,)) for i in range(writer_threads)], - *[threading.Thread(target=reader, args=(i,)) for i in range(reader_threads)], - ] - for t in threads: - t.start() - for t in threads: - t.join() - - assert not errors - stats = writable_store.stats() - assert stats["files"] == writer_threads * writes_per_writer - - -class TestConnectionPooling: - """Stress tests for SQLiteStore thread-local connection pooling.""" - - def test_pool_size_never_exceeds_max_during_sequential_churn(self, writable_store): - """Pool should remain bounded when threads churn and stale connections are cleaned.""" - max_pool_size = writable_store.MAX_POOL_SIZE - - def make_thread(): - def worker(): - writable_store._get_connection() - - t = threading.Thread(target=worker) - t.start() - t.join() - - for _ in range(max_pool_size + 50): - make_thread() - writable_store._cleanup_stale_connections() - assert len(writable_store._pool) <= max_pool_size - - def test_pool_shrinks_after_threads_terminate(self, writable_store): - """After threads terminate, cleanup should remove their pooled connections.""" - thread_count = 20 - barrier = threading.Barrier(thread_count + 1) - created = [] - errors = [] - lock = threading.Lock() - current_tid = threading.get_ident() - - def worker(): - try: - writable_store._get_connection() - with lock: - created.append(threading.get_ident()) - barrier.wait(timeout=5) - except Exception as exc: - with lock: - errors.append(exc) - - threads = [threading.Thread(target=worker) for _ in range(thread_count)] - for t in threads: - t.start() - - barrier.wait(timeout=5) - assert not errors - assert len(writable_store._pool) >= thread_count - - for t in threads: - t.join() - - writable_store._cleanup_stale_connections() - assert all(tid not in writable_store._pool for tid in created) - assert set(writable_store._pool.keys()).issubset({current_tid}) - - def test_connection_identity_remains_stable_for_active_thread(self, writable_store): - """An active thread should keep using the same connection object.""" - main_conn = writable_store._get_connection() - errors = [] - lock = threading.Lock() - - def worker(): - try: - writable_store._get_connection() - except Exception as exc: - with lock: - errors.append(exc) - - threads = [threading.Thread(target=worker) for _ in range(15)] - for t in threads: - t.start() - for t in threads: - t.join() - - assert not errors - assert writable_store._get_connection() is main_conn - - def test_close_invalidates_connections_and_generation(self, tmp_path): - """close() should clear the pool and force new connections via generation increment.""" - store = SQLiteStore(tmp_path / "pool-close.db") - store.initialize() - try: - conn_before = store._get_connection() - generation_before = store._pool_generation - - store.close() - - assert store._pool_generation == generation_before + 1 - assert store._pool == {} - - conn_after = store._get_connection() - assert conn_after is not conn_before - assert getattr(store._local, "generation", None) == store._pool_generation - finally: - store.close() diff --git a/codex-lens/tests/test_symbol_extractor.py b/codex-lens/tests/test_symbol_extractor.py deleted file mode 100644 index be26e606..00000000 --- a/codex-lens/tests/test_symbol_extractor.py +++ /dev/null @@ -1,238 +0,0 @@ -"""Tests for symbol extraction and relationship tracking.""" -import tempfile -from pathlib import Path - -import pytest - -from codexlens.indexing.symbol_extractor import SymbolExtractor - - -@pytest.fixture -def extractor(): - """Create a temporary symbol extractor for testing.""" - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "test.db" - ext = SymbolExtractor(db_path) - ext.connect() - yield ext - ext.close() - - -class TestSymbolExtractor: - """Test suite for SymbolExtractor.""" - - def test_database_schema_creation(self, extractor): - """Test that database tables and indexes are created correctly.""" - cursor = extractor.db_conn.cursor() - - # Check symbols table exists - cursor.execute( - "SELECT name FROM sqlite_master WHERE type='table' AND name='symbols'" - ) - assert cursor.fetchone() is not None - - # Check symbol_relationships table exists - cursor.execute( - "SELECT name FROM sqlite_master WHERE type='table' AND name='symbol_relationships'" - ) - assert cursor.fetchone() is not None - - # Check indexes exist - cursor.execute( - "SELECT COUNT(*) FROM sqlite_master WHERE type='index' AND name LIKE 'idx_%'" - ) - idx_count = cursor.fetchone()[0] - assert idx_count == 5 - - def test_python_function_extraction(self, extractor): - """Test extracting functions from Python code.""" - code = """ -def hello(): - pass - -async def world(): - pass -""" - symbols, _ = extractor.extract_from_file(Path("test.py"), code) - - assert len(symbols) == 2 - assert symbols[0]["name"] == "hello" - assert symbols[0]["kind"] == "function" - assert symbols[1]["name"] == "world" - assert symbols[1]["kind"] == "function" - - def test_python_class_extraction(self, extractor): - """Test extracting classes from Python code.""" - code = """ -class MyClass: - pass - -class AnotherClass(BaseClass): - pass -""" - symbols, _ = extractor.extract_from_file(Path("test.py"), code) - - assert len(symbols) == 2 - assert symbols[0]["name"] == "MyClass" - assert symbols[0]["kind"] == "class" - assert symbols[1]["name"] == "AnotherClass" - assert symbols[1]["kind"] == "class" - - def test_typescript_extraction(self, extractor): - """Test extracting symbols from TypeScript code.""" - code = """ -export function calculateSum(a: number, b: number): number { - return a + b; -} - -export class Calculator { - multiply(x: number, y: number) { - return x * y; - } -} -""" - symbols, _ = extractor.extract_from_file(Path("test.ts"), code) - - assert len(symbols) == 2 - assert symbols[0]["name"] == "calculateSum" - assert symbols[0]["kind"] == "function" - assert symbols[1]["name"] == "Calculator" - assert symbols[1]["kind"] == "class" - - def test_javascript_extraction(self, extractor): - """Test extracting symbols from JavaScript code.""" - code = """ -function processData(data) { - return data; -} - -class DataProcessor { - transform(input) { - return input; - } -} -""" - symbols, _ = extractor.extract_from_file(Path("test.js"), code) - - assert len(symbols) == 2 - assert symbols[0]["name"] == "processData" - assert symbols[1]["name"] == "DataProcessor" - - def test_relationship_extraction(self, extractor): - """Test extracting relationships between symbols.""" - code = """ -def helper(): - pass - -def main(): - helper() - print("done") -""" - _, relationships = extractor.extract_from_file(Path("test.py"), code) - - # Should find calls to helper and print - call_targets = [r["target"] for r in relationships if r["type"] == "calls"] - assert "helper" in call_targets - - def test_save_and_query_symbols(self, extractor): - """Test saving symbols to database and querying them.""" - code = """ -def test_func(): - pass - -class TestClass: - pass -""" - symbols, _ = extractor.extract_from_file(Path("test.py"), code) - name_to_id = extractor.save_symbols(symbols) - - assert len(name_to_id) == 2 - assert "test_func" in name_to_id - assert "TestClass" in name_to_id - - # Query database - cursor = extractor.db_conn.cursor() - cursor.execute("SELECT COUNT(*) FROM symbols") - count = cursor.fetchone()[0] - assert count == 2 - - def test_save_relationships(self, extractor): - """Test saving relationships to database.""" - code = """ -def caller(): - callee() - -def callee(): - pass -""" - symbols, relationships = extractor.extract_from_file(Path("test.py"), code) - name_to_id = extractor.save_symbols(symbols) - extractor.save_relationships(relationships, name_to_id) - - # Query database - cursor = extractor.db_conn.cursor() - cursor.execute("SELECT COUNT(*) FROM symbol_relationships") - count = cursor.fetchone()[0] - assert count > 0 - - def test_qualified_name_generation(self, extractor): - """Test that qualified names are generated correctly.""" - code = """ -class MyClass: - pass -""" - symbols, _ = extractor.extract_from_file(Path("module.py"), code) - - assert symbols[0]["qualified_name"] == "module.MyClass" - - def test_unsupported_language(self, extractor): - """Test that unsupported languages return empty results.""" - code = "some random code" - symbols, relationships = extractor.extract_from_file(Path("test.txt"), code) - - assert len(symbols) == 0 - assert len(relationships) == 0 - - def test_empty_file(self, extractor): - """Test handling empty files.""" - symbols, relationships = extractor.extract_from_file(Path("test.py"), "") - - assert len(symbols) == 0 - assert len(relationships) == 0 - - def test_complete_workflow(self, extractor): - """Test complete workflow: extract, save, and verify.""" - code = """ -class UserService: - def get_user(self, user_id): - return fetch_user(user_id) - -def main(): - service = UserService() - service.get_user(1) -""" - file_path = Path("service.py") - symbols, relationships = extractor.extract_from_file(file_path, code) - - # Save to database - name_to_id = extractor.save_symbols(symbols) - extractor.save_relationships(relationships, name_to_id) - - # Verify symbols - cursor = extractor.db_conn.cursor() - cursor.execute("SELECT name, kind FROM symbols ORDER BY start_line") - db_symbols = cursor.fetchall() - assert len(db_symbols) == 2 - assert db_symbols[0][0] == "UserService" - assert db_symbols[1][0] == "main" - - # Verify relationships - cursor.execute( - """ - SELECT s.name, r.target_symbol_fqn, r.relationship_type - FROM symbol_relationships r - JOIN symbols s ON r.source_symbol_id = s.id - """ - ) - db_rels = cursor.fetchall() - assert len(db_rels) > 0 diff --git a/codex-lens/tests/test_token_chunking.py b/codex-lens/tests/test_token_chunking.py deleted file mode 100644 index 39be7aa0..00000000 --- a/codex-lens/tests/test_token_chunking.py +++ /dev/null @@ -1,190 +0,0 @@ -"""Tests for token-aware chunking functionality.""" - -import pytest - -from codexlens.entities import SemanticChunk, Symbol -from codexlens.semantic.chunker import ChunkConfig, Chunker, HybridChunker -from codexlens.parsers.tokenizer import get_default_tokenizer - - -class TestTokenAwareChunking: - """Tests for token counting integration in chunking.""" - - def test_chunker_adds_token_count_to_chunks(self): - """Test that chunker adds token_count metadata to chunks.""" - config = ChunkConfig(min_chunk_size=5) - chunker = Chunker(config=config) - - content = '''def hello(): - return "world" - -def goodbye(): - return "farewell" -''' - symbols = [ - Symbol(name="hello", kind="function", range=(1, 2)), - Symbol(name="goodbye", kind="function", range=(4, 5)), - ] - - chunks = chunker.chunk_file(content, symbols, "test.py", "python") - - # All chunks should have token_count metadata - assert all("token_count" in c.metadata for c in chunks) - - # Token counts should be positive integers - for chunk in chunks: - token_count = chunk.metadata["token_count"] - assert isinstance(token_count, int) - assert token_count > 0 - - def test_chunker_accepts_precomputed_token_counts(self): - """Test that chunker can accept precomputed token counts.""" - config = ChunkConfig(min_chunk_size=5) - chunker = Chunker(config=config) - - content = '''def hello(): - return "world" -''' - symbols = [Symbol(name="hello", kind="function", range=(1, 2))] - - # Provide precomputed token count - symbol_token_counts = {"hello": 42} - - chunks = chunker.chunk_file(content, symbols, "test.py", "python", symbol_token_counts) - - assert len(chunks) == 1 - assert chunks[0].metadata["token_count"] == 42 - - def test_sliding_window_includes_token_count(self): - """Test that sliding window chunking includes token counts.""" - config = ChunkConfig(min_chunk_size=5, max_chunk_size=100) - chunker = Chunker(config=config) - - # Create content without symbols to trigger sliding window - content = "x = 1\ny = 2\nz = 3\n" * 20 - - chunks = chunker.chunk_sliding_window(content, "test.py", "python") - - assert len(chunks) > 0 - for chunk in chunks: - assert "token_count" in chunk.metadata - assert chunk.metadata["token_count"] > 0 - - def test_hybrid_chunker_adds_token_count(self): - """Test that hybrid chunker adds token counts to all chunk types.""" - config = ChunkConfig(min_chunk_size=5) - chunker = HybridChunker(config=config) - - content = '''"""Module docstring.""" - -def hello(): - """Function docstring.""" - return "world" -''' - symbols = [Symbol(name="hello", kind="function", range=(3, 5))] - - chunks = chunker.chunk_file(content, symbols, "test.py", "python") - - # All chunks (docstrings and code) should have token_count - assert all("token_count" in c.metadata for c in chunks) - - docstring_chunks = [c for c in chunks if c.metadata.get("chunk_type") == "docstring"] - code_chunks = [c for c in chunks if c.metadata.get("chunk_type") == "code"] - - assert len(docstring_chunks) > 0 - assert len(code_chunks) > 0 - - # Verify all have valid token counts - for chunk in chunks: - assert chunk.metadata["token_count"] > 0 - - def test_token_count_matches_tiktoken(self): - """Test that token counts match tiktoken output.""" - config = ChunkConfig(min_chunk_size=5) - chunker = Chunker(config=config) - tokenizer = get_default_tokenizer() - - content = '''def calculate(x, y): - """Calculate sum of x and y.""" - return x + y -''' - symbols = [Symbol(name="calculate", kind="function", range=(1, 3))] - - chunks = chunker.chunk_file(content, symbols, "test.py", "python") - - assert len(chunks) == 1 - chunk = chunks[0] - - # Manually count tokens for verification - expected_count = tokenizer.count_tokens(chunk.content) - assert chunk.metadata["token_count"] == expected_count - - def test_token_count_fallback_to_calculation(self): - """Test that token count is calculated when not precomputed.""" - config = ChunkConfig(min_chunk_size=5) - chunker = Chunker(config=config) - - content = '''def test(): - pass -''' - symbols = [Symbol(name="test", kind="function", range=(1, 2))] - - # Don't provide symbol_token_counts - should calculate automatically - chunks = chunker.chunk_file(content, symbols, "test.py", "python") - - assert len(chunks) == 1 - assert "token_count" in chunks[0].metadata - assert chunks[0].metadata["token_count"] > 0 - - -class TestTokenCountPerformance: - """Tests for token counting performance optimization.""" - - def test_precomputed_tokens_avoid_recalculation(self): - """Test that providing precomputed token counts avoids recalculation.""" - import time - - config = ChunkConfig(min_chunk_size=5) - chunker = Chunker(config=config) - tokenizer = get_default_tokenizer() - - # Create larger content - lines = [] - for i in range(100): - lines.append(f'def func{i}(x):\n') - lines.append(f' return x * {i}\n') - lines.append('\n') - content = "".join(lines) - - symbols = [ - Symbol(name=f"func{i}", kind="function", range=(1 + i*3, 2 + i*3)) - for i in range(100) - ] - - # Precompute token counts - symbol_token_counts = {} - for symbol in symbols: - start_idx = symbol.range[0] - 1 - end_idx = symbol.range[1] - chunk_content = "".join(content.splitlines(keepends=True)[start_idx:end_idx]) - symbol_token_counts[symbol.name] = tokenizer.count_tokens(chunk_content) - - # Time with precomputed counts (3 runs) - precomputed_times = [] - for _ in range(3): - start = time.perf_counter() - chunker.chunk_file(content, symbols, "test.py", "python", symbol_token_counts) - precomputed_times.append(time.perf_counter() - start) - precomputed_time = sum(precomputed_times) / len(precomputed_times) - - # Time without precomputed counts (3 runs) - computed_times = [] - for _ in range(3): - start = time.perf_counter() - chunker.chunk_file(content, symbols, "test.py", "python") - computed_times.append(time.perf_counter() - start) - computed_time = sum(computed_times) / len(computed_times) - - # Precomputed should be at least 10% faster - speedup = ((computed_time - precomputed_time) / computed_time) * 100 - assert speedup >= 10.0, f"Speedup {speedup:.2f}% < 10% (computed={computed_time:.4f}s, precomputed={precomputed_time:.4f}s)" diff --git a/codex-lens/tests/test_token_storage.py b/codex-lens/tests/test_token_storage.py deleted file mode 100644 index 68391ca7..00000000 --- a/codex-lens/tests/test_token_storage.py +++ /dev/null @@ -1,368 +0,0 @@ -"""Integration tests for token metadata storage and retrieval.""" - -import pytest -import tempfile -from pathlib import Path - -from codexlens.entities import Symbol, IndexedFile -from codexlens.storage.sqlite_store import SQLiteStore -from codexlens.storage.dir_index import DirIndexStore -from codexlens.storage.migration_manager import MigrationManager - - -class TestTokenMetadataStorage: - """Tests for storing and retrieving token metadata.""" - - def test_sqlite_store_saves_token_count(self): - """Test that SQLiteStore saves token_count for symbols.""" - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "test.db" - store = SQLiteStore(db_path) - - with store: - # Create indexed file with symbols containing token counts - symbols = [ - Symbol( - name="func1", - kind="function", - range=(1, 5), - token_count=42, - symbol_type="function_definition" - ), - Symbol( - name="func2", - kind="function", - range=(7, 12), - token_count=73, - symbol_type="function_definition" - ), - ] - - indexed_file = IndexedFile( - path=str(Path(tmpdir) / "test.py"), - language="python", - symbols=symbols - ) - - content = "def func1():\n pass\n\ndef func2():\n pass\n" - store.add_file(indexed_file, content) - - # Retrieve symbols and verify token_count is saved - retrieved_symbols = store.search_symbols("func", limit=10) - - assert len(retrieved_symbols) == 2 - - # Check that symbols have token_count attribute - # Note: search_symbols currently doesn't return token_count - # This test verifies the data is stored correctly in the database - - def test_dir_index_store_saves_token_count(self): - """Test that DirIndexStore saves token_count for symbols.""" - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "_index.db" - store = DirIndexStore(db_path) - - with store: - symbols = [ - Symbol( - name="calculate", - kind="function", - range=(1, 10), - token_count=128, - symbol_type="function_definition" - ), - ] - - file_id = store.add_file( - name="math.py", - full_path=Path(tmpdir) / "math.py", - content="def calculate(x, y):\n return x + y\n", - language="python", - symbols=symbols - ) - - assert file_id > 0 - - # Verify file was stored - file_entry = store.get_file(Path(tmpdir) / "math.py") - assert file_entry is not None - assert file_entry.name == "math.py" - - def test_migration_adds_token_columns(self): - """Test that migrations properly handle token_count and symbol_type columns. - - Note: Migration 002 adds these columns, but migration 005 removes them - as they were identified as unused/redundant. New databases should not - have these columns. - """ - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "test.db" - store = SQLiteStore(db_path) - - with store: - # Apply migrations - conn = store._get_connection() - manager = MigrationManager(conn) - manager.apply_migrations() - - # Verify columns do NOT exist after all migrations - # (migration_005 removes token_count and symbol_type) - cursor = conn.execute("PRAGMA table_info(symbols)") - columns = {row[1] for row in cursor.fetchall()} - - # These columns should NOT be present after migration_005 - assert "token_count" not in columns, "token_count should be removed by migration_005" - assert "symbol_type" not in columns, "symbol_type should be removed by migration_005" - - # Index on symbol_type should also not exist - cursor = conn.execute( - "SELECT name FROM sqlite_master WHERE type='index' AND name='idx_symbols_type'" - ) - index = cursor.fetchone() - assert index is None, "idx_symbols_type should not exist after migration_005" - - def test_batch_insert_preserves_token_metadata(self): - """Test that batch insert preserves token metadata.""" - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "test.db" - store = SQLiteStore(db_path) - - with store: - files_data = [] - - for i in range(5): - symbols = [ - Symbol( - name=f"func{i}", - kind="function", - range=(1, 3), - token_count=10 + i, - symbol_type="function_definition" - ), - ] - - indexed_file = IndexedFile( - path=str(Path(tmpdir) / f"test{i}.py"), - language="python", - symbols=symbols - ) - - content = f"def func{i}():\n pass\n" - files_data.append((indexed_file, content)) - - # Batch insert - store.add_files(files_data) - - # Verify all files were stored - stats = store.stats() - assert stats["files"] == 5 - assert stats["symbols"] == 5 - - def test_symbol_type_defaults_to_kind(self): - """Test that symbol_type defaults to kind when not specified.""" - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "_index.db" - store = DirIndexStore(db_path) - - with store: - # Symbol without explicit symbol_type - symbols = [ - Symbol( - name="MyClass", - kind="class", - range=(1, 10), - token_count=200 - ), - ] - - store.add_file( - name="module.py", - full_path=Path(tmpdir) / "module.py", - content="class MyClass:\n pass\n", - language="python", - symbols=symbols - ) - - # Verify it was stored (symbol_type should default to 'class') - file_entry = store.get_file(Path(tmpdir) / "module.py") - assert file_entry is not None - - def test_null_token_count_allowed(self): - """Test that NULL token_count is allowed for backward compatibility.""" - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "test.db" - store = SQLiteStore(db_path) - - with store: - # Symbol without token_count (None) - symbols = [ - Symbol( - name="legacy_func", - kind="function", - range=(1, 5) - ), - ] - - indexed_file = IndexedFile( - path=str(Path(tmpdir) / "legacy.py"), - language="python", - symbols=symbols - ) - - content = "def legacy_func():\n pass\n" - store.add_file(indexed_file, content) - - # Should not raise an error - stats = store.stats() - assert stats["symbols"] == 1 - - def test_search_by_symbol_type(self): - """Test searching/filtering symbols by symbol_type.""" - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "_index.db" - store = DirIndexStore(db_path) - - with store: - # Add symbols with different types - symbols = [ - Symbol( - name="MyClass", - kind="class", - range=(1, 10), - symbol_type="class_definition" - ), - Symbol( - name="my_function", - kind="function", - range=(12, 15), - symbol_type="function_definition" - ), - Symbol( - name="my_method", - kind="method", - range=(5, 8), - symbol_type="method_definition" - ), - ] - - store.add_file( - name="code.py", - full_path=Path(tmpdir) / "code.py", - content="class MyClass:\n def my_method(self):\n pass\n\ndef my_function():\n pass\n", - language="python", - symbols=symbols - ) - - # Search for functions only - function_symbols = store.search_symbols("my", kind="function", limit=10) - assert len(function_symbols) == 1 - assert function_symbols[0].name == "my_function" - - # Search for methods only - method_symbols = store.search_symbols("my", kind="method", limit=10) - assert len(method_symbols) == 1 - assert method_symbols[0].name == "my_method" - - -class TestTokenCountAccuracy: - """Tests for symbol storage accuracy. - - Note: token_count and symbol_type columns were removed in migration_005 - as they were identified as unused/redundant. These tests now verify - that symbols are stored correctly with their basic fields. - """ - - def test_stored_token_count_matches_original(self): - """Test that symbols are stored correctly (token_count no longer stored). - - Note: token_count field was removed from schema. This test verifies - that symbols are still stored correctly with basic fields. - """ - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "test.db" - store = SQLiteStore(db_path) - - with store: - symbols = [ - Symbol( - name="complex_func", - kind="function", - range=(1, 20), - token_count=256 # This field is accepted but not stored - ), - ] - - indexed_file = IndexedFile( - path=str(Path(tmpdir) / "test.py"), - language="python", - symbols=symbols - ) - - content = "def complex_func():\n # Some complex logic\n pass\n" - store.add_file(indexed_file, content) - - # Verify symbol is stored with basic fields - conn = store._get_connection() - cursor = conn.execute( - "SELECT name, kind, start_line, end_line FROM symbols WHERE name = ?", - ("complex_func",) - ) - row = cursor.fetchone() - - assert row is not None - assert row["name"] == "complex_func" - assert row["kind"] == "function" - assert row["start_line"] == 1 - assert row["end_line"] == 20 - - def test_100_percent_storage_accuracy(self): - """Test that 100% of symbols are stored correctly. - - Note: token_count field was removed from schema. This test verifies - that symbols are stored correctly with basic fields. - """ - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "_index.db" - store = DirIndexStore(db_path) - - with store: - # Store symbols - file_entries = [] - for i in range(100): - symbol_name = f"func{i}" - - symbols = [ - Symbol( - name=symbol_name, - kind="function", - range=(1, 2), - token_count=10 + i * 3 # Accepted but not stored - ) - ] - - file_path = Path(tmpdir) / f"file{i}.py" - file_entries.append(( - f"file{i}.py", - file_path, - f"def {symbol_name}():\n pass\n", - "python", - symbols - )) - - count = store.add_files_batch(file_entries) - assert count == 100 - - # Verify all symbols are stored correctly - conn = store._get_connection() - cursor = conn.execute( - "SELECT name, kind, start_line, end_line FROM symbols ORDER BY name" - ) - rows = cursor.fetchall() - - assert len(rows) == 100 - - # Verify each symbol has correct basic fields - for row in rows: - assert row["kind"] == "function" - assert row["start_line"] == 1 - assert row["end_line"] == 2 diff --git a/codex-lens/tests/test_tokenizer.py b/codex-lens/tests/test_tokenizer.py deleted file mode 100644 index edf086d1..00000000 --- a/codex-lens/tests/test_tokenizer.py +++ /dev/null @@ -1,162 +0,0 @@ -"""Tests for tokenizer module.""" - -import pytest - -from codexlens.parsers.tokenizer import ( - Tokenizer, - count_tokens, - get_default_tokenizer, -) - - -class TestTokenizer: - """Tests for Tokenizer class.""" - - def test_empty_text(self): - tokenizer = Tokenizer() - assert tokenizer.count_tokens("") == 0 - - def test_simple_text(self): - tokenizer = Tokenizer() - text = "Hello world" - count = tokenizer.count_tokens(text) - assert count > 0 - # Should be roughly text length / 4 for fallback - assert count >= len(text) // 5 - - def test_long_text(self): - tokenizer = Tokenizer() - text = "def hello():\n pass\n" * 100 - count = tokenizer.count_tokens(text) - assert count > 0 - # Verify it's proportional to length - assert count >= len(text) // 5 - - def test_code_text(self): - tokenizer = Tokenizer() - code = """ -def calculate_fibonacci(n): - if n <= 1: - return n - return calculate_fibonacci(n-1) + calculate_fibonacci(n-2) - -class MathHelper: - def factorial(self, n): - if n <= 1: - return 1 - return n * self.factorial(n - 1) -""" - count = tokenizer.count_tokens(code) - assert count > 0 - - def test_unicode_text(self): - tokenizer = Tokenizer() - text = "你好世界 Hello World" - count = tokenizer.count_tokens(text) - assert count > 0 - - def test_special_characters(self): - tokenizer = Tokenizer() - text = "!@#$%^&*()_+-=[]{}|;':\",./<>?" - count = tokenizer.count_tokens(text) - assert count > 0 - - def test_is_using_tiktoken_check(self): - tokenizer = Tokenizer() - # Should return bool indicating if tiktoken is available - result = tokenizer.is_using_tiktoken() - assert isinstance(result, bool) - - -class TestTokenizerFallback: - """Tests for character count fallback.""" - - def test_character_count_fallback(self): - # Test with potentially unavailable encoding - tokenizer = Tokenizer(encoding_name="nonexistent_encoding") - text = "Hello world" - count = tokenizer.count_tokens(text) - # Should fall back to character counting - assert count == max(1, len(text) // 4) - - def test_fallback_minimum_count(self): - tokenizer = Tokenizer(encoding_name="nonexistent_encoding") - # Very short text should still return at least 1 - assert tokenizer.count_tokens("hi") >= 1 - - -class TestGlobalTokenizer: - """Tests for global tokenizer functions.""" - - def test_get_default_tokenizer(self): - tokenizer1 = get_default_tokenizer() - tokenizer2 = get_default_tokenizer() - # Should return the same instance - assert tokenizer1 is tokenizer2 - - def test_count_tokens_default(self): - text = "Hello world" - count = count_tokens(text) - assert count > 0 - - def test_count_tokens_custom_tokenizer(self): - custom_tokenizer = Tokenizer() - text = "Hello world" - count = count_tokens(text, tokenizer=custom_tokenizer) - assert count > 0 - - -class TestTokenizerPerformance: - """Performance-related tests.""" - - def test_large_file_tokenization(self): - """Test tokenization of large file content.""" - tokenizer = Tokenizer() - # Simulate a 1MB file - each line is ~126 chars, need ~8000 lines - large_text = "def function_{}():\n pass\n".format("x" * 100) * 8000 - assert len(large_text) > 1_000_000 - - count = tokenizer.count_tokens(large_text) - assert count > 0 - # Verify reasonable token count (at least 10k tokens for 1MB) - # Note: Modern tokenizers compress repetitive content efficiently - assert count >= 10000 - - def test_multiple_tokenizations(self): - """Test multiple tokenization calls.""" - tokenizer = Tokenizer() - text = "def hello(): pass" - - # Multiple calls should return same result - count1 = tokenizer.count_tokens(text) - count2 = tokenizer.count_tokens(text) - assert count1 == count2 - - -class TestTokenizerEdgeCases: - """Edge case tests.""" - - def test_only_whitespace(self): - tokenizer = Tokenizer() - count = tokenizer.count_tokens(" \n\t ") - assert count >= 0 - - def test_very_long_line(self): - tokenizer = Tokenizer() - long_line = "a" * 10000 - count = tokenizer.count_tokens(long_line) - assert count > 0 - - def test_mixed_content(self): - tokenizer = Tokenizer() - mixed = """ -# Comment -def func(): - '''Docstring''' - pass - -123.456 -"string" -""" - count = tokenizer.count_tokens(mixed) - assert count > 0 diff --git a/codex-lens/tests/test_tokenizer_performance.py b/codex-lens/tests/test_tokenizer_performance.py deleted file mode 100644 index bfee530f..00000000 --- a/codex-lens/tests/test_tokenizer_performance.py +++ /dev/null @@ -1,127 +0,0 @@ -"""Performance benchmarks for tokenizer. - -Verifies that tiktoken-based tokenization is at least 50% faster than -pure Python implementation for files >1MB. -""" - -import time -from pathlib import Path - -import pytest - -from codexlens.parsers.tokenizer import Tokenizer, TIKTOKEN_AVAILABLE - - -def pure_python_token_count(text: str) -> int: - """Pure Python token counting fallback (character count / 4).""" - if not text: - return 0 - return max(1, len(text) // 4) - - -@pytest.mark.skipif(not TIKTOKEN_AVAILABLE, reason="tiktoken not installed") -class TestTokenizerPerformance: - """Performance benchmarks comparing tiktoken vs pure Python.""" - - def test_performance_improvement_large_file(self): - """Verify tiktoken is at least 50% faster for files >1MB.""" - # Create a large file (>1MB) - large_text = "def function_{}():\n pass\n".format("x" * 100) * 8000 - assert len(large_text) > 1_000_000 - - # Warm up - tokenizer = Tokenizer() - tokenizer.count_tokens(large_text[:1000]) - pure_python_token_count(large_text[:1000]) - - # Benchmark tiktoken - tiktoken_times = [] - for _ in range(10): - start = time.perf_counter() - tokenizer.count_tokens(large_text) - end = time.perf_counter() - tiktoken_times.append(end - start) - - tiktoken_avg = sum(tiktoken_times) / len(tiktoken_times) - - # Benchmark pure Python - python_times = [] - for _ in range(10): - start = time.perf_counter() - pure_python_token_count(large_text) - end = time.perf_counter() - python_times.append(end - start) - - python_avg = sum(python_times) / len(python_times) - - # Calculate speed improvement - # tiktoken should be at least 50% faster (meaning python takes at least 1.5x longer) - speedup = python_avg / tiktoken_avg - - print(f"\nPerformance results for {len(large_text):,} byte file:") - print(f" Tiktoken avg: {tiktoken_avg*1000:.2f}ms") - print(f" Pure Python avg: {python_avg*1000:.2f}ms") - print(f" Speedup: {speedup:.2f}x") - - # For pure character counting, Python is actually faster since it's simpler - # The real benefit of tiktoken is ACCURACY, not speed - # So we adjust the test to verify tiktoken works correctly - assert tiktoken_avg < 1.0, "Tiktoken should complete in reasonable time" - assert speedup > 0, "Should have valid performance measurement" - - def test_accuracy_comparison(self): - """Verify tiktoken provides more accurate token counts.""" - code = """ -class Calculator: - def __init__(self): - self.value = 0 - - def add(self, x, y): - return x + y - - def multiply(self, x, y): - return x * y -""" - tokenizer = Tokenizer() - if tokenizer.is_using_tiktoken(): - tiktoken_count = tokenizer.count_tokens(code) - python_count = pure_python_token_count(code) - - # Tiktoken should give different (more accurate) count than naive char/4 - # They might be close, but tiktoken accounts for token boundaries - assert tiktoken_count > 0 - assert python_count > 0 - - # Both should be in reasonable range for this code - assert 20 < tiktoken_count < 100 - assert 20 < python_count < 100 - - def test_consistent_results(self): - """Verify tiktoken gives consistent results.""" - code = "def hello(): pass" - tokenizer = Tokenizer() - - if tokenizer.is_using_tiktoken(): - results = [tokenizer.count_tokens(code) for _ in range(100)] - # All results should be identical - assert len(set(results)) == 1 - - -class TestTokenizerWithoutTiktoken: - """Tests for behavior when tiktoken is unavailable.""" - - def test_fallback_performance(self): - """Verify fallback is still fast.""" - # Use invalid encoding to force fallback - tokenizer = Tokenizer(encoding_name="invalid_encoding") - large_text = "x" * 1_000_000 - - start = time.perf_counter() - count = tokenizer.count_tokens(large_text) - end = time.perf_counter() - - elapsed = end - start - - # Character counting should be very fast - assert elapsed < 0.1 # Should take less than 100ms - assert count == len(large_text) // 4 diff --git a/codex-lens/tests/test_treesitter_parser.py b/codex-lens/tests/test_treesitter_parser.py deleted file mode 100644 index 62303fc5..00000000 --- a/codex-lens/tests/test_treesitter_parser.py +++ /dev/null @@ -1,377 +0,0 @@ -"""Tests for TreeSitterSymbolParser.""" - -from pathlib import Path - -import pytest - -from codexlens.parsers.treesitter_parser import TreeSitterSymbolParser, TREE_SITTER_AVAILABLE - - -@pytest.mark.skipif(not TREE_SITTER_AVAILABLE, reason="tree-sitter not installed") -class TestTreeSitterPythonParser: - """Tests for Python parsing with tree-sitter.""" - - def test_parse_simple_function(self): - parser = TreeSitterSymbolParser("python") - code = "def hello():\n pass" - result = parser.parse(code, Path("test.py")) - - assert result is not None - assert result.language == "python" - assert len(result.symbols) == 1 - assert result.symbols[0].name == "hello" - assert result.symbols[0].kind == "function" - - def test_parse_async_function(self): - parser = TreeSitterSymbolParser("python") - code = "async def fetch_data():\n pass" - result = parser.parse(code, Path("test.py")) - - assert result is not None - assert len(result.symbols) == 1 - assert result.symbols[0].name == "fetch_data" - assert result.symbols[0].kind == "function" - - def test_parse_class(self): - parser = TreeSitterSymbolParser("python") - code = "class MyClass:\n pass" - result = parser.parse(code, Path("test.py")) - - assert result is not None - assert len(result.symbols) == 1 - assert result.symbols[0].name == "MyClass" - assert result.symbols[0].kind == "class" - - def test_parse_method(self): - parser = TreeSitterSymbolParser("python") - code = """ -class MyClass: - def method(self): - pass -""" - result = parser.parse(code, Path("test.py")) - - assert result is not None - assert len(result.symbols) == 2 - assert result.symbols[0].name == "MyClass" - assert result.symbols[0].kind == "class" - assert result.symbols[1].name == "method" - assert result.symbols[1].kind == "method" - - def test_parse_nested_functions(self): - parser = TreeSitterSymbolParser("python") - code = """ -def outer(): - def inner(): - pass - return inner -""" - result = parser.parse(code, Path("test.py")) - - assert result is not None - names = [s.name for s in result.symbols] - assert "outer" in names - assert "inner" in names - - def test_parse_complex_file(self): - parser = TreeSitterSymbolParser("python") - code = """ -class Calculator: - def add(self, a, b): - return a + b - - def subtract(self, a, b): - return a - b - -def standalone_function(): - pass - -class DataProcessor: - async def process(self, data): - pass -""" - result = parser.parse(code, Path("test.py")) - - assert result is not None - assert len(result.symbols) >= 5 - - names_kinds = [(s.name, s.kind) for s in result.symbols] - assert ("Calculator", "class") in names_kinds - assert ("add", "method") in names_kinds - assert ("subtract", "method") in names_kinds - assert ("standalone_function", "function") in names_kinds - assert ("DataProcessor", "class") in names_kinds - assert ("process", "method") in names_kinds - - def test_parse_empty_file(self): - parser = TreeSitterSymbolParser("python") - result = parser.parse("", Path("test.py")) - - assert result is not None - assert len(result.symbols) == 0 - - def test_extracts_relationships_with_alias_resolution(self): - parser = TreeSitterSymbolParser("python") - code = """ -import os.path as osp -from math import sqrt as sq - -class Base: - pass - -class Child(Base): - pass - -def main(): - osp.join("a", "b") - sq(4) -""" - result = parser.parse(code, Path("test.py")) - - assert result is not None - - rels = [r for r in result.relationships if r.source_symbol == "main"] - targets = {r.target_symbol for r in rels if r.relationship_type.value == "calls"} - assert "os.path.join" in targets - assert "math.sqrt" in targets - - inherits = [ - r for r in result.relationships - if r.source_symbol == "Child" and r.relationship_type.value == "inherits" - ] - assert any(r.target_symbol == "Base" for r in inherits) - - -@pytest.mark.skipif(not TREE_SITTER_AVAILABLE, reason="tree-sitter not installed") -class TestTreeSitterJavaScriptParser: - """Tests for JavaScript parsing with tree-sitter.""" - - def test_parse_function(self): - parser = TreeSitterSymbolParser("javascript") - code = "function hello() {}" - result = parser.parse(code, Path("test.js")) - - assert result is not None - assert len(result.symbols) == 1 - assert result.symbols[0].name == "hello" - assert result.symbols[0].kind == "function" - - def test_parse_arrow_function(self): - parser = TreeSitterSymbolParser("javascript") - code = "const hello = () => {}" - result = parser.parse(code, Path("test.js")) - - assert result is not None - assert len(result.symbols) == 1 - assert result.symbols[0].name == "hello" - assert result.symbols[0].kind == "function" - - def test_parse_class(self): - parser = TreeSitterSymbolParser("javascript") - code = "class MyClass {}" - result = parser.parse(code, Path("test.js")) - - assert result is not None - assert len(result.symbols) == 1 - assert result.symbols[0].name == "MyClass" - assert result.symbols[0].kind == "class" - - def test_parse_class_with_methods(self): - parser = TreeSitterSymbolParser("javascript") - code = """ -class MyClass { - method() {} - async asyncMethod() {} -} -""" - result = parser.parse(code, Path("test.js")) - - assert result is not None - names_kinds = [(s.name, s.kind) for s in result.symbols] - assert ("MyClass", "class") in names_kinds - assert ("method", "method") in names_kinds - assert ("asyncMethod", "method") in names_kinds - - def test_parse_export_functions(self): - parser = TreeSitterSymbolParser("javascript") - code = """ -export function exported() {} -export const arrowFunc = () => {} -""" - result = parser.parse(code, Path("test.js")) - - assert result is not None - assert len(result.symbols) >= 2 - names = [s.name for s in result.symbols] - assert "exported" in names - assert "arrowFunc" in names - - def test_extracts_relationships_with_import_alias(self): - parser = TreeSitterSymbolParser("javascript") - code = """ -import { readFile as rf } from "fs"; - -function main() { - rf("a"); -} -""" - result = parser.parse(code, Path("test.js")) - - assert result is not None - rels = [r for r in result.relationships if r.source_symbol == "main"] - targets = {r.target_symbol for r in rels if r.relationship_type.value == "calls"} - assert "fs.readFile" in targets - - -@pytest.mark.skipif(not TREE_SITTER_AVAILABLE, reason="tree-sitter not installed") -class TestTreeSitterTypeScriptParser: - """Tests for TypeScript parsing with tree-sitter.""" - - def test_parse_typescript_function(self): - parser = TreeSitterSymbolParser("typescript") - code = "function greet(name: string): string { return name; }" - result = parser.parse(code, Path("test.ts")) - - assert result is not None - assert len(result.symbols) >= 1 - assert any(s.name == "greet" for s in result.symbols) - - def test_parse_typescript_class(self): - parser = TreeSitterSymbolParser("typescript") - code = """ -class Service { - process(data: string): void {} -} -""" - result = parser.parse(code, Path("test.ts")) - - assert result is not None - names = [s.name for s in result.symbols] - assert "Service" in names - - -class TestTreeSitterParserAvailability: - """Tests for parser availability checking.""" - - def test_is_available_python(self): - parser = TreeSitterSymbolParser("python") - # Should match TREE_SITTER_AVAILABLE - assert parser.is_available() == TREE_SITTER_AVAILABLE - - def test_is_available_javascript(self): - parser = TreeSitterSymbolParser("javascript") - assert isinstance(parser.is_available(), bool) - - def test_unsupported_language(self): - parser = TreeSitterSymbolParser("rust") - # Rust not configured, so should not be available - assert parser.is_available() is False - - -class TestTreeSitterParserFallback: - """Tests for fallback behavior when tree-sitter unavailable.""" - - def test_parse_returns_none_when_unavailable(self): - parser = TreeSitterSymbolParser("rust") # Unsupported language - code = "fn main() {}" - result = parser.parse(code, Path("test.rs")) - - # Should return None when parser unavailable - assert result is None - - -class TestTreeSitterTokenCounting: - """Tests for token counting functionality.""" - - @pytest.mark.skipif(not TREE_SITTER_AVAILABLE, reason="tree-sitter not installed") - def test_count_tokens(self): - parser = TreeSitterSymbolParser("python") - code = "def hello():\n pass" - count = parser.count_tokens(code) - - assert count > 0 - assert isinstance(count, int) - - @pytest.mark.skipif(not TREE_SITTER_AVAILABLE, reason="tree-sitter not installed") - def test_count_tokens_large_file(self): - parser = TreeSitterSymbolParser("python") - # Generate large code - code = "def func_{}():\n pass\n".format("x" * 100) * 1000 - - count = parser.count_tokens(code) - assert count > 0 - - -class TestTreeSitterAccuracy: - """Tests for >99% symbol extraction accuracy.""" - - @pytest.mark.skipif(not TREE_SITTER_AVAILABLE, reason="tree-sitter not installed") - def test_comprehensive_python_file(self): - parser = TreeSitterSymbolParser("python") - code = """ -# Module-level function -def module_func(): - pass - -class FirstClass: - def method1(self): - pass - - def method2(self): - pass - - async def async_method(self): - pass - -def another_function(): - def nested(): - pass - return nested - -class SecondClass: - class InnerClass: - def inner_method(self): - pass - - def outer_method(self): - pass - -async def async_function(): - pass -""" - result = parser.parse(code, Path("test.py")) - - assert result is not None - # Expected symbols: module_func, FirstClass, method1, method2, async_method, - # another_function, nested, SecondClass, InnerClass, inner_method, - # outer_method, async_function - # Should find at least 12 symbols with >99% accuracy - assert len(result.symbols) >= 12 - - @pytest.mark.skipif(not TREE_SITTER_AVAILABLE, reason="tree-sitter not installed") - def test_comprehensive_javascript_file(self): - parser = TreeSitterSymbolParser("javascript") - code = """ -function regularFunc() {} - -const arrowFunc = () => {} - -class MainClass { - method1() {} - async method2() {} - static staticMethod() {} -} - -export function exportedFunc() {} - -export class ExportedClass { - method() {} -} -""" - result = parser.parse(code, Path("test.js")) - - assert result is not None - # Expected: regularFunc, arrowFunc, MainClass, method1, method2, - # staticMethod, exportedFunc, ExportedClass, method - # Should find at least 9 symbols - assert len(result.symbols) >= 9 diff --git a/codex-lens/tests/test_vector_search_full.py b/codex-lens/tests/test_vector_search_full.py deleted file mode 100644 index cc22c5f9..00000000 --- a/codex-lens/tests/test_vector_search_full.py +++ /dev/null @@ -1,812 +0,0 @@ -"""Full coverage tests for vector/semantic search functionality. - -Tests cover: -- Embedder model loading and embedding generation -- VectorStore CRUD operations and caching -- Cosine similarity computation -- Semantic search accuracy and relevance -- Performance benchmarks -- Edge cases and error handling -- Thread safety and concurrent access -""" - -import json -import math -import tempfile -import threading -import time -from pathlib import Path -from typing import List - -import pytest - -from codexlens.entities import SemanticChunk, Symbol, SearchResult -from codexlens.semantic import SEMANTIC_AVAILABLE, check_semantic_available - -# Only skip if NumPy is unavailable (some tests exercise vector math without fastembed). -try: - import numpy as np # noqa: F401 - NUMPY_AVAILABLE = True -except ImportError: - NUMPY_AVAILABLE = False - -# Skip all tests if NumPy is unavailable -pytestmark = pytest.mark.skipif( - not NUMPY_AVAILABLE, - reason="NumPy not installed (pip install codexlens[semantic])" -) - - -# === Fixtures === - -@pytest.fixture -def temp_db(tmp_path): - """Create temporary database path.""" - return tmp_path / "test_semantic.db" - - -@pytest.fixture -def embedder(): - """Create Embedder instance.""" - available, error = check_semantic_available() - if not available: - pytest.skip(error or "Semantic search dependencies not installed (pip install codexlens[semantic])") - from codexlens.semantic.embedder import Embedder - return Embedder() - - -@pytest.fixture -def vector_store(temp_db): - """Create VectorStore instance.""" - from codexlens.semantic.vector_store import VectorStore - return VectorStore(temp_db) - - -@pytest.fixture -def sample_code_chunks(): - """Sample code chunks for testing.""" - return [ - { - "content": "def authenticate(username, password): return check_credentials(username, password)", - "metadata": {"symbol_name": "authenticate", "symbol_kind": "function", "start_line": 1, "end_line": 1, "language": "python"}, - }, - { - "content": "class DatabaseConnection:\n def connect(self, host, port): pass\n def execute(self, query): pass", - "metadata": {"symbol_name": "DatabaseConnection", "symbol_kind": "class", "start_line": 1, "end_line": 3, "language": "python"}, - }, - { - "content": "async function fetchUserData(userId) { return await api.get('/users/' + userId); }", - "metadata": {"symbol_name": "fetchUserData", "symbol_kind": "function", "start_line": 1, "end_line": 1, "language": "javascript"}, - }, - { - "content": "def calculate_sum(numbers): return sum(numbers)", - "metadata": {"symbol_name": "calculate_sum", "symbol_kind": "function", "start_line": 1, "end_line": 1, "language": "python"}, - }, - { - "content": "class UserProfile:\n def __init__(self, name, email):\n self.name = name\n self.email = email", - "metadata": {"symbol_name": "UserProfile", "symbol_kind": "class", "start_line": 1, "end_line": 4, "language": "python"}, - }, - ] - - -# === Embedder Tests === - -class TestEmbedder: - """Tests for Embedder class.""" - - def test_embedder_initialization(self, embedder): - """Test embedder initializes correctly.""" - assert embedder.model_name == "BAAI/bge-small-en-v1.5" - assert embedder.embedding_dim == 384 - assert embedder._model is None # Lazy loading - - def test_embed_single_returns_correct_dimension(self, embedder): - """Test single embedding has correct dimension.""" - text = "def hello(): print('world')" - embedding = embedder.embed_single(text) - - assert isinstance(embedding, list) - assert len(embedding) == 384 - assert all(isinstance(x, float) for x in embedding) - - def test_embed_batch_returns_correct_count(self, embedder): - """Test batch embedding returns correct number of embeddings.""" - texts = [ - "def foo(): pass", - "def bar(): pass", - "def baz(): pass", - ] - embeddings = embedder.embed(texts) - - assert len(embeddings) == len(texts) - assert all(len(e) == 384 for e in embeddings) - - def test_embed_empty_string(self, embedder): - """Test embedding empty string.""" - embedding = embedder.embed_single("") - assert len(embedding) == 384 - - def test_embed_unicode_text(self, embedder): - """Test embedding unicode text.""" - text = "def 你好(): return '世界'" - embedding = embedder.embed_single(text) - assert len(embedding) == 384 - - def test_embed_long_text(self, embedder): - """Test embedding long text.""" - text = "def process(): pass\n" * 100 - embedding = embedder.embed_single(text) - assert len(embedding) == 384 - - def test_embed_special_characters(self, embedder): - """Test embedding text with special characters.""" - text = "def test(): return {'key': 'value', '@decorator': True}" - embedding = embedder.embed_single(text) - assert len(embedding) == 384 - - def test_lazy_model_loading(self, embedder): - """Test model loads lazily on first embed call.""" - assert embedder._model is None - embedder.embed_single("test") - assert embedder._model is not None - - def test_model_reuse(self, embedder): - """Test model is reused across multiple calls.""" - embedder.embed_single("test1") - model_ref = embedder._model - embedder.embed_single("test2") - assert embedder._model is model_ref # Same instance - - -class TestEmbeddingSimilarity: - """Tests for embedding similarity.""" - - def test_identical_text_similarity(self, embedder): - """Test identical text has similarity ~1.0.""" - from codexlens.semantic.vector_store import _cosine_similarity - - text = "def calculate_sum(a, b): return a + b" - emb1 = embedder.embed_single(text) - emb2 = embedder.embed_single(text) - - similarity = _cosine_similarity(emb1, emb2) - assert similarity > 0.99, "Identical text should have ~1.0 similarity" - - def test_similar_code_high_similarity(self, embedder): - """Test similar code has high similarity.""" - from codexlens.semantic.vector_store import _cosine_similarity - - code1 = "def add(a, b): return a + b" - code2 = "def sum_numbers(x, y): return x + y" - - emb1 = embedder.embed_single(code1) - emb2 = embedder.embed_single(code2) - - similarity = _cosine_similarity(emb1, emb2) - assert similarity > 0.6, "Similar functions should have high similarity" - - def test_different_code_lower_similarity(self, embedder): - """Test different code has lower similarity than similar code.""" - from codexlens.semantic.vector_store import _cosine_similarity - - code1 = "def add(a, b): return a + b" - code2 = "def sum_numbers(x, y): return x + y" - code3 = "class UserAuth: def login(self, user, pwd): pass" - - emb1 = embedder.embed_single(code1) - emb2 = embedder.embed_single(code2) - emb3 = embedder.embed_single(code3) - - sim_similar = _cosine_similarity(emb1, emb2) - sim_different = _cosine_similarity(emb1, emb3) - - assert sim_similar > sim_different, "Similar code should have higher similarity" - - def test_zero_vector_similarity(self): - """Test cosine similarity with zero vector.""" - from codexlens.semantic.vector_store import _cosine_similarity - - zero_vec = [0.0] * 384 - normal_vec = [1.0] * 384 - - similarity = _cosine_similarity(zero_vec, normal_vec) - assert similarity == 0.0, "Zero vector should have 0 similarity" - - def test_cosine_similarity_near_zero_norm_vectors(self): - """Near-zero norm vectors (< epsilon) should return 0.0 similarity.""" - from codexlens.semantic.vector_store import _cosine_similarity - - near_zero_vec = [1e-12] * 384 - normal_vec = [1.0] * 384 - - similarity = _cosine_similarity(near_zero_vec, normal_vec) - assert similarity == 0.0 - - def test_cosine_similarity_product_underflow_returns_zero(self): - """Product underflow (norm_a * norm_b < epsilon) should return 0.0.""" - from codexlens.semantic.vector_store import _cosine_similarity - - underflow_vec = [1e-7] * 384 - - similarity = _cosine_similarity(underflow_vec, underflow_vec) - assert similarity == 0.0 - - def test_cosine_similarity_small_valid_vectors(self): - """Small-but-valid vectors should compute similarity correctly.""" - from codexlens.semantic.vector_store import _cosine_similarity - - small_vec = [1e-6] * 384 - - similarity = _cosine_similarity(small_vec, small_vec) - assert similarity == pytest.approx(1.0) - - def test_cosine_similarity_no_inf_nan_results(self): - """Epsilon edge cases should never produce inf/nan results.""" - from codexlens.semantic.vector_store import _cosine_similarity - - cases = [ - ([0.0] * 384, [1.0] * 384), - ([1e-12] * 384, [1.0] * 384), - ([1e-7] * 384, [1e-7] * 384), - ([1e-6] * 384, [1e-6] * 384), - ([1.0] * 384, [1.0] * 384), - ] - - for a, b in cases: - similarity = _cosine_similarity(a, b) - assert math.isfinite(similarity) - - -# === VectorStore Tests === - -class TestVectorStoreCRUD: - """Tests for VectorStore CRUD operations.""" - - def test_add_chunk(self, vector_store, embedder): - """Test adding a single chunk.""" - chunk = SemanticChunk( - content="def test(): pass", - metadata={"language": "python"}, - ) - chunk.embedding = embedder.embed_single(chunk.content) - - chunk_id = vector_store.add_chunk(chunk, "/test/file.py") - - assert chunk_id > 0 - assert vector_store.count_chunks() == 1 - - def test_add_chunk_without_embedding_raises(self, vector_store): - """Test adding chunk without embedding raises error.""" - chunk = SemanticChunk(content="def test(): pass", metadata={}) - - with pytest.raises(ValueError, match="must have embedding"): - vector_store.add_chunk(chunk, "/test/file.py") - - def test_add_chunks_batch(self, vector_store, embedder, sample_code_chunks): - """Test batch adding chunks.""" - chunks = [] - for data in sample_code_chunks: - chunk = SemanticChunk(content=data["content"], metadata=data["metadata"]) - chunk.embedding = embedder.embed_single(chunk.content) - chunks.append(chunk) - - ids = vector_store.add_chunks(chunks, "/test/multi.py") - - assert len(ids) == len(chunks) - assert vector_store.count_chunks() == len(chunks) - - def test_add_empty_batch(self, vector_store): - """Test adding empty batch returns empty list.""" - ids = vector_store.add_chunks([], "/test/empty.py") - assert ids == [] - - def test_delete_file_chunks(self, vector_store, embedder): - """Test deleting chunks by file path.""" - # Add chunks for two files - chunk1 = SemanticChunk(content="def a(): pass", metadata={}) - chunk1.embedding = embedder.embed_single(chunk1.content) - vector_store.add_chunk(chunk1, "/test/file1.py") - - chunk2 = SemanticChunk(content="def b(): pass", metadata={}) - chunk2.embedding = embedder.embed_single(chunk2.content) - vector_store.add_chunk(chunk2, "/test/file2.py") - - assert vector_store.count_chunks() == 2 - - # Delete one file's chunks - deleted = vector_store.delete_file_chunks("/test/file1.py") - - assert deleted == 1 - assert vector_store.count_chunks() == 1 - - def test_delete_nonexistent_file(self, vector_store): - """Test deleting non-existent file returns 0.""" - deleted = vector_store.delete_file_chunks("/nonexistent/file.py") - assert deleted == 0 - - def test_count_chunks_empty(self, vector_store): - """Test count on empty store.""" - assert vector_store.count_chunks() == 0 - - -class TestVectorStoreSearch: - """Tests for VectorStore search functionality.""" - - def test_search_similar_basic(self, vector_store, embedder, sample_code_chunks): - """Test basic similarity search.""" - # Add chunks - for data in sample_code_chunks: - chunk = SemanticChunk(content=data["content"], metadata=data["metadata"]) - chunk.embedding = embedder.embed_single(chunk.content) - vector_store.add_chunk(chunk, "/test/file.py") - - # Search - query = "function to authenticate user login" - query_embedding = embedder.embed_single(query) - results = vector_store.search_similar(query_embedding, top_k=3) - - assert len(results) > 0 - assert all(isinstance(r, SearchResult) for r in results) - # Top result should be auth-related - assert "authenticate" in results[0].excerpt.lower() or "auth" in results[0].path.lower() - - def test_search_respects_top_k(self, vector_store, embedder, sample_code_chunks): - """Test search respects top_k parameter.""" - # Add all chunks - for data in sample_code_chunks: - chunk = SemanticChunk(content=data["content"], metadata=data["metadata"]) - chunk.embedding = embedder.embed_single(chunk.content) - vector_store.add_chunk(chunk, "/test/file.py") - - query_embedding = embedder.embed_single("code") - - results_2 = vector_store.search_similar(query_embedding, top_k=2) - results_5 = vector_store.search_similar(query_embedding, top_k=5) - - assert len(results_2) <= 2 - assert len(results_5) <= 5 - - def test_search_min_score_filtering(self, vector_store, embedder): - """Test min_score filtering.""" - chunk = SemanticChunk( - content="def hello(): print('hello world')", - metadata={}, - ) - chunk.embedding = embedder.embed_single(chunk.content) - vector_store.add_chunk(chunk, "/test/hello.py") - - query_embedding = embedder.embed_single("database connection pool") - - results_no_filter = vector_store.search_similar(query_embedding, min_score=0.0) - results_high_filter = vector_store.search_similar(query_embedding, min_score=0.9) - - assert len(results_no_filter) >= len(results_high_filter) - - def test_search_returns_sorted_by_score(self, vector_store, embedder, sample_code_chunks): - """Test results are sorted by score descending.""" - for data in sample_code_chunks: - chunk = SemanticChunk(content=data["content"], metadata=data["metadata"]) - chunk.embedding = embedder.embed_single(chunk.content) - vector_store.add_chunk(chunk, "/test/file.py") - - query_embedding = embedder.embed_single("function") - results = vector_store.search_similar(query_embedding, top_k=5) - - if len(results) > 1: - for i in range(len(results) - 1): - assert results[i].score >= results[i + 1].score - - def test_search_includes_metadata(self, vector_store, embedder): - """Test search results include metadata.""" - chunk = SemanticChunk( - content="def test_function(): pass", - metadata={ - "symbol_name": "test_function", - "symbol_kind": "function", - "start_line": 10, - "end_line": 15, - }, - ) - chunk.embedding = embedder.embed_single(chunk.content) - vector_store.add_chunk(chunk, "/test/func.py") - - query_embedding = embedder.embed_single("test function") - results = vector_store.search_similar(query_embedding, top_k=1) - - assert len(results) == 1 - assert results[0].symbol_name == "test_function" - assert results[0].symbol_kind == "function" - assert results[0].start_line == 10 - assert results[0].end_line == 15 - - def test_search_empty_store_returns_empty(self, vector_store, embedder): - """Test search on empty store returns empty list.""" - query_embedding = embedder.embed_single("anything") - results = vector_store.search_similar(query_embedding) - assert results == [] - - def test_search_with_return_full_content_false(self, vector_store, embedder): - """Test search with return_full_content=False.""" - chunk = SemanticChunk( - content="def long_function(): " + "pass\n" * 100, - metadata={}, - ) - chunk.embedding = embedder.embed_single(chunk.content) - vector_store.add_chunk(chunk, "/test/long.py") - - query_embedding = embedder.embed_single("function") - results = vector_store.search_similar( - query_embedding, top_k=1, return_full_content=False - ) - - assert len(results) == 1 - assert results[0].content is None - assert results[0].excerpt is not None - - -class TestVectorStoreCache: - """Tests for VectorStore caching behavior.""" - - def test_cache_invalidation_on_add(self, vector_store, embedder): - """Test cache is invalidated when chunks are added.""" - chunk1 = SemanticChunk(content="def a(): pass", metadata={}) - chunk1.embedding = embedder.embed_single(chunk1.content) - vector_store.add_chunk(chunk1, "/test/a.py") - - # Trigger cache population - query_embedding = embedder.embed_single("function") - vector_store.search_similar(query_embedding) - - initial_version = vector_store._cache_version - - # Add another chunk - chunk2 = SemanticChunk(content="def b(): pass", metadata={}) - chunk2.embedding = embedder.embed_single(chunk2.content) - vector_store.add_chunk(chunk2, "/test/b.py") - - assert vector_store._cache_version > initial_version - assert vector_store._embedding_matrix is None - - def test_cache_invalidation_on_delete(self, vector_store, embedder): - """Test cache is invalidated when chunks are deleted.""" - chunk = SemanticChunk(content="def a(): pass", metadata={}) - chunk.embedding = embedder.embed_single(chunk.content) - vector_store.add_chunk(chunk, "/test/a.py") - - # Trigger cache population - query_embedding = embedder.embed_single("function") - vector_store.search_similar(query_embedding) - - initial_version = vector_store._cache_version - - # Delete chunk - vector_store.delete_file_chunks("/test/a.py") - - assert vector_store._cache_version > initial_version - - def test_manual_cache_clear(self, vector_store, embedder): - """Test manual cache clearing.""" - chunk = SemanticChunk(content="def a(): pass", metadata={}) - chunk.embedding = embedder.embed_single(chunk.content) - vector_store.add_chunk(chunk, "/test/a.py") - - # Force brute-force mode to populate cache (disable ANN) - original_ann = vector_store._ann_index - vector_store._ann_index = None - - # Trigger cache population - query_embedding = embedder.embed_single("function") - vector_store.search_similar(query_embedding) - - assert vector_store._embedding_matrix is not None - - vector_store.clear_cache() - - assert vector_store._embedding_matrix is None - - # Restore ANN index - vector_store._ann_index = original_ann - - -# === Semantic Search Accuracy Tests === - -class TestSemanticSearchAccuracy: - """Tests for semantic search accuracy and relevance.""" - - def test_auth_query_finds_auth_code(self, vector_store, embedder, sample_code_chunks): - """Test authentication query finds auth code.""" - for data in sample_code_chunks: - chunk = SemanticChunk(content=data["content"], metadata=data["metadata"]) - chunk.embedding = embedder.embed_single(chunk.content) - vector_store.add_chunk(chunk, "/test/file.py") - - query = "user authentication login" - query_embedding = embedder.embed_single(query) - results = vector_store.search_similar(query_embedding, top_k=1) - - assert len(results) > 0 - assert "authenticate" in results[0].excerpt.lower() - - def test_database_query_finds_db_code(self, vector_store, embedder, sample_code_chunks): - """Test database query finds database code.""" - for data in sample_code_chunks: - chunk = SemanticChunk(content=data["content"], metadata=data["metadata"]) - chunk.embedding = embedder.embed_single(chunk.content) - vector_store.add_chunk(chunk, "/test/file.py") - - query = "database connection execute query" - query_embedding = embedder.embed_single(query) - results = vector_store.search_similar(query_embedding, top_k=1) - - assert len(results) > 0 - assert "database" in results[0].excerpt.lower() or "connect" in results[0].excerpt.lower() - - def test_math_query_finds_calculation_code(self, vector_store, embedder, sample_code_chunks): - """Test math query finds calculation code.""" - for data in sample_code_chunks: - chunk = SemanticChunk(content=data["content"], metadata=data["metadata"]) - chunk.embedding = embedder.embed_single(chunk.content) - vector_store.add_chunk(chunk, "/test/file.py") - - query = "sum numbers add calculation" - query_embedding = embedder.embed_single(query) - results = vector_store.search_similar(query_embedding, top_k=1) - - assert len(results) > 0 - assert "sum" in results[0].excerpt.lower() or "calculate" in results[0].excerpt.lower() - - -# === Performance Tests === - -class TestVectorSearchPerformance: - """Performance tests for vector search.""" - - def test_embedding_performance(self, embedder): - """Test embedding generation performance.""" - text = "def calculate_sum(a, b): return a + b" - - # Warm up - embedder.embed_single(text) - - # Measure - start = time.perf_counter() - iterations = 10 - for _ in range(iterations): - embedder.embed_single(text) - elapsed = time.perf_counter() - start - - avg_ms = (elapsed / iterations) * 1000 - assert avg_ms < 100, f"Single embedding should be <100ms, got {avg_ms:.2f}ms" - - def test_batch_embedding_performance(self, embedder): - """Test batch embedding performance.""" - texts = [f"def function_{i}(): pass" for i in range(50)] - - # Warm up - embedder.embed(texts[:5]) - - # Measure - start = time.perf_counter() - embedder.embed(texts) - elapsed = time.perf_counter() - start - - total_ms = elapsed * 1000 - per_text_ms = total_ms / len(texts) - assert per_text_ms < 20, f"Per-text embedding should be <20ms, got {per_text_ms:.2f}ms" - - def test_search_performance_small(self, vector_store, embedder): - """Test search performance with small dataset.""" - # Add 100 chunks - for i in range(100): - chunk = SemanticChunk( - content=f"def function_{i}(): return {i}", - metadata={"index": i}, - ) - chunk.embedding = embedder.embed_single(chunk.content) - vector_store.add_chunk(chunk, f"/test/file_{i}.py") - - query_embedding = embedder.embed_single("function return value") - - # Warm up - vector_store.search_similar(query_embedding) - - # Measure - start = time.perf_counter() - iterations = 10 - for _ in range(iterations): - vector_store.search_similar(query_embedding) - elapsed = time.perf_counter() - start - - avg_ms = (elapsed / iterations) * 1000 - assert avg_ms < 50, f"Search with 100 chunks should be <50ms, got {avg_ms:.2f}ms" - - def test_search_performance_medium(self, vector_store, embedder): - """Test search performance with medium dataset.""" - # Add 500 chunks in batch - chunks = [] - for i in range(500): - chunk = SemanticChunk( - content=f"def function_{i}(x): return x * {i}", - metadata={"index": i}, - ) - chunk.embedding = embedder.embed_single(chunk.content) - chunks.append(chunk) - - vector_store.add_chunks(chunks, "/test/bulk.py") - - query_embedding = embedder.embed_single("multiply value") - - # Warm up - vector_store.search_similar(query_embedding) - - # Measure - start = time.perf_counter() - iterations = 5 - for _ in range(iterations): - vector_store.search_similar(query_embedding) - elapsed = time.perf_counter() - start - - avg_ms = (elapsed / iterations) * 1000 - assert avg_ms < 100, f"Search with 500 chunks should be <100ms, got {avg_ms:.2f}ms" - - -# === Thread Safety Tests === - -class TestThreadSafety: - """Tests for thread safety.""" - - def test_concurrent_searches(self, vector_store, embedder, sample_code_chunks): - """Test concurrent searches are thread-safe.""" - # Populate store - for data in sample_code_chunks: - chunk = SemanticChunk(content=data["content"], metadata=data["metadata"]) - chunk.embedding = embedder.embed_single(chunk.content) - vector_store.add_chunk(chunk, "/test/file.py") - - results_list = [] - errors = [] - - def search_task(query): - try: - query_embedding = embedder.embed_single(query) - results = vector_store.search_similar(query_embedding, top_k=3) - results_list.append(len(results)) - except Exception as e: - errors.append(str(e)) - - queries = ["authentication", "database", "function", "class", "async"] - threads = [threading.Thread(target=search_task, args=(q,)) for q in queries] - - for t in threads: - t.start() - for t in threads: - t.join() - - assert len(errors) == 0, f"Errors during concurrent search: {errors}" - assert len(results_list) == len(queries) - - def test_concurrent_add_and_search(self, vector_store, embedder): - """Test concurrent add and search operations.""" - errors = [] - - def add_task(idx): - try: - chunk = SemanticChunk( - content=f"def task_{idx}(): pass", - metadata={"idx": idx}, - ) - chunk.embedding = embedder.embed_single(chunk.content) - vector_store.add_chunk(chunk, f"/test/task_{idx}.py") - except Exception as e: - errors.append(f"Add error: {e}") - - def search_task(): - try: - query_embedding = embedder.embed_single("function task") - vector_store.search_similar(query_embedding) - except Exception as e: - errors.append(f"Search error: {e}") - - threads = [] - for i in range(10): - threads.append(threading.Thread(target=add_task, args=(i,))) - threads.append(threading.Thread(target=search_task)) - - for t in threads: - t.start() - for t in threads: - t.join() - - assert len(errors) == 0, f"Errors during concurrent ops: {errors}" - - -# === Edge Cases === - -class TestEdgeCases: - """Tests for edge cases.""" - - def test_very_short_content(self, vector_store, embedder): - """Test handling very short content.""" - chunk = SemanticChunk(content="x", metadata={}) - chunk.embedding = embedder.embed_single(chunk.content) - vector_store.add_chunk(chunk, "/test/short.py") - - query_embedding = embedder.embed_single("x") - results = vector_store.search_similar(query_embedding) - - assert len(results) == 1 - - def test_special_characters_in_path(self, vector_store, embedder): - """Test handling special characters in file path.""" - chunk = SemanticChunk(content="def test(): pass", metadata={}) - chunk.embedding = embedder.embed_single(chunk.content) - - special_path = "/test/path with spaces/file-name_v2.py" - vector_store.add_chunk(chunk, special_path) - - query_embedding = embedder.embed_single("test function") - results = vector_store.search_similar(query_embedding) - - assert len(results) == 1 - assert results[0].path == special_path - - def test_json_metadata_special_chars(self, vector_store, embedder): - """Test metadata with special JSON characters.""" - metadata = { - "description": 'Test "quoted" text with \'single\' quotes', - "path": "C:\\Users\\test\\file.py", - "tags": ["tag1", "tag2"], - } - chunk = SemanticChunk(content="def test(): pass", metadata=metadata) - chunk.embedding = embedder.embed_single(chunk.content) - - vector_store.add_chunk(chunk, "/test/special.py") - - query_embedding = embedder.embed_single("test") - results = vector_store.search_similar(query_embedding) - - assert len(results) == 1 - assert results[0].metadata["description"] == metadata["description"] - - def test_search_zero_top_k(self, vector_store, embedder): - """Test search with top_k=0.""" - chunk = SemanticChunk(content="def test(): pass", metadata={}) - chunk.embedding = embedder.embed_single(chunk.content) - vector_store.add_chunk(chunk, "/test/file.py") - - query_embedding = embedder.embed_single("test") - results = vector_store.search_similar(query_embedding, top_k=0) - - assert results == [] - - def test_search_very_high_min_score(self, vector_store, embedder): - """Test search with very high min_score filters all results.""" - chunk = SemanticChunk(content="def hello(): print('world')", metadata={}) - chunk.embedding = embedder.embed_single(chunk.content) - vector_store.add_chunk(chunk, "/test/hello.py") - - # Query something unrelated with very high threshold - query_embedding = embedder.embed_single("database connection") - results = vector_store.search_similar(query_embedding, min_score=0.99) - - # Should filter out since unrelated - assert len(results) == 0 - - -# === Availability Check Tests === - -class TestAvailabilityCheck: - """Tests for semantic availability checking.""" - - def test_check_semantic_available(self): - """Test check_semantic_available function.""" - available, error = check_semantic_available() - assert available is SEMANTIC_AVAILABLE - if available: - assert error is None - else: - assert error is not None - - def test_semantic_available_flag(self): - """Test SEMANTIC_AVAILABLE flag is True when deps installed.""" - assert isinstance(SEMANTIC_AVAILABLE, bool) diff --git a/codex-lens/tests/test_vector_store.py b/codex-lens/tests/test_vector_store.py deleted file mode 100644 index 3da2ab0f..00000000 --- a/codex-lens/tests/test_vector_store.py +++ /dev/null @@ -1,386 +0,0 @@ -import sqlite3 -import sys -import tempfile -import threading -import time -from pathlib import Path - -import numpy as np -import pytest - -from codexlens.entities import SemanticChunk -import codexlens.semantic.vector_store as vector_store_module -from codexlens.semantic.vector_store import VectorStore - - -@pytest.fixture() -def temp_db(): - with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as tmpdir: - yield Path(tmpdir) / "semantic.db" - - -def test_concurrent_bulk_insert(monkeypatch: pytest.MonkeyPatch, temp_db: Path) -> None: - """Concurrent batch inserts in bulk mode should not corrupt accumulation state.""" - store = VectorStore(temp_db) - monkeypatch.setattr(store, "_ensure_ann_index", lambda dim: True) - - store.begin_bulk_insert() - - errors: list[Exception] = [] - lock = threading.Lock() - threads: list[threading.Thread] = [] - - def make_chunks(count: int, dim: int) -> list[SemanticChunk]: - chunks: list[SemanticChunk] = [] - for i in range(count): - chunk = SemanticChunk(content=f"chunk {i}", metadata={}) - chunk.embedding = np.random.randn(dim).astype(np.float32) - chunks.append(chunk) - return chunks - - def worker(idx: int) -> None: - try: - dim = 8 - if idx % 2 == 0: - chunks = make_chunks(5, dim) - store.add_chunks_batch([(c, f"file_{idx}.py") for c in chunks], auto_save_ann=False) - else: - chunks = [SemanticChunk(content=f"chunk {i}") for i in range(5)] - embeddings = np.random.randn(5, dim).astype(np.float32) - store.add_chunks_batch_numpy( - [(c, f"file_{idx}.py") for c in chunks], - embeddings_matrix=embeddings, - auto_save_ann=False, - ) - except Exception as exc: - with lock: - errors.append(exc) - - for i in range(10): - threads.append(threading.Thread(target=worker, args=(i,))) - - for t in threads: - t.start() - for t in threads: - t.join() - - assert not errors - assert len(store._bulk_insert_ids) == 50 - assert len(store._bulk_insert_embeddings) == 50 - assert store.count_chunks() == 50 - - -def test_bulk_insert_mode_transitions(monkeypatch: pytest.MonkeyPatch, temp_db: Path) -> None: - """begin/end bulk insert should be thread-safe with concurrent add operations.""" - store = VectorStore(temp_db) - - class DummyAnn: - def __init__(self) -> None: - self.total_added = 0 - self.save_calls = 0 - - def add_vectors(self, ids, embeddings) -> None: - self.total_added += len(ids) - - def save(self) -> None: - self.save_calls += 1 - - dummy_ann = DummyAnn() - store._ann_index = dummy_ann - monkeypatch.setattr(store, "_ensure_ann_index", lambda dim: True) - - errors: list[Exception] = [] - lock = threading.Lock() - stop_event = threading.Event() - - def adder(worker_id: int) -> None: - try: - while not stop_event.is_set(): - chunk = SemanticChunk(content=f"chunk {worker_id}", metadata={}) - chunk.embedding = np.random.randn(8).astype(np.float32) - store.add_chunks_batch([(chunk, f"file_{worker_id}.py")], auto_save_ann=False) - except Exception as exc: - with lock: - errors.append(exc) - - def toggler() -> None: - try: - for _ in range(5): - store.begin_bulk_insert() - time.sleep(0.05) - store.end_bulk_insert() - time.sleep(0.05) - except Exception as exc: - with lock: - errors.append(exc) - - threads = [threading.Thread(target=adder, args=(i,)) for i in range(3)] - toggle_thread = threading.Thread(target=toggler) - - for t in threads: - t.start() - toggle_thread.start() - - toggle_thread.join(timeout=10) - stop_event.set() - for t in threads: - t.join(timeout=10) - - assert not errors - assert toggle_thread.is_alive() is False - assert store._bulk_insert_mode is False - assert store._bulk_insert_ids == [] - assert store._bulk_insert_embeddings == [] - assert dummy_ann.total_added == store.count_chunks() - - -def test_search_similar_min_score_validation(monkeypatch: pytest.MonkeyPatch, temp_db: Path) -> None: - """search_similar should validate min_score is within [0.0, 1.0].""" - monkeypatch.setattr(vector_store_module, "HNSWLIB_AVAILABLE", False) - store = VectorStore(temp_db) - - chunk_a = SemanticChunk(content="chunk A", metadata={}) - chunk_a.embedding = np.array([1.0, 0.0, 0.0], dtype=np.float32) - chunk_b = SemanticChunk(content="chunk B", metadata={}) - chunk_b.embedding = np.array([0.0, 1.0, 0.0], dtype=np.float32) - store.add_chunks_batch([(chunk_a, "a.py"), (chunk_b, "b.py")]) - - query = [1.0, 0.0, 0.0] - - with pytest.raises(ValueError, match=r"min_score.*\[0\.0, 1\.0\].*cosine"): - store.search_similar(query, min_score=-0.5) - - with pytest.raises(ValueError, match=r"min_score.*\[0\.0, 1\.0\].*cosine"): - store.search_similar(query, min_score=1.5) - - store.search_similar(query, min_score=0.0) - store.search_similar(query, min_score=1.0) - - results = store.search_similar(query, min_score=0.5, return_full_content=False) - assert [r.path for r in results] == ["a.py"] - - -def test_search_similar(monkeypatch: pytest.MonkeyPatch, temp_db: Path) -> None: - """search_similar returns results ordered by descending similarity.""" - monkeypatch.setattr(vector_store_module, "HNSWLIB_AVAILABLE", False) - store = VectorStore(temp_db) - - chunk_a = SemanticChunk(content="chunk A", metadata={}) - chunk_a.embedding = np.array([1.0, 0.0, 0.0], dtype=np.float32) - chunk_b = SemanticChunk(content="chunk B", metadata={}) - chunk_b.embedding = np.array([0.0, 1.0, 0.0], dtype=np.float32) - store.add_chunks_batch([(chunk_a, "a.py"), (chunk_b, "b.py")]) - - results = store.search_similar([1.0, 0.0, 0.0], top_k=10, min_score=0.0, return_full_content=False) - - assert [r.path for r in results] == ["a.py", "b.py"] - assert results[0].score == pytest.approx(1.0) - assert results[1].score == pytest.approx(0.0) - - -def test_search_with_ann_null_results(monkeypatch: pytest.MonkeyPatch, temp_db: Path) -> None: - """_search_with_ann should return [] when ANN search returns null results.""" - monkeypatch.setattr(vector_store_module, "HNSWLIB_AVAILABLE", False) - store = VectorStore(temp_db) - - class DummyAnn: - def count(self) -> int: - return 1 - - def search(self, query_vec: np.ndarray, top_k: int): - return None, None - - store._ann_index = DummyAnn() - - results = store._search_with_ann(np.array([1.0, 0.0, 0.0], dtype=np.float32), top_k=10, min_score=0.0, return_full_content=False) - assert results == [] - - -def test_search_with_ann_empty_results(monkeypatch: pytest.MonkeyPatch, temp_db: Path) -> None: - """_search_with_ann should return [] when ANN search returns empty results.""" - monkeypatch.setattr(vector_store_module, "HNSWLIB_AVAILABLE", False) - store = VectorStore(temp_db) - - class DummyAnn: - def count(self) -> int: - return 1 - - def search(self, query_vec: np.ndarray, top_k: int): - return [], [] - - store._ann_index = DummyAnn() - - results = store._search_with_ann(np.array([1.0, 0.0, 0.0], dtype=np.float32), top_k=10, min_score=0.0, return_full_content=False) - assert results == [] - - -def test_search_with_ann_mismatched_results(monkeypatch: pytest.MonkeyPatch, temp_db: Path) -> None: - """_search_with_ann should return [] when ANN search returns mismatched results.""" - monkeypatch.setattr(vector_store_module, "HNSWLIB_AVAILABLE", False) - store = VectorStore(temp_db) - - class DummyAnn: - def count(self) -> int: - return 2 - - def search(self, query_vec: np.ndarray, top_k: int): - return [1, 2], [0.5] - - store._ann_index = DummyAnn() - - results = store._search_with_ann(np.array([1.0, 0.0, 0.0], dtype=np.float32), top_k=10, min_score=0.0, return_full_content=False) - assert results == [] - - -def test_search_with_ann_valid_results(monkeypatch: pytest.MonkeyPatch, temp_db: Path) -> None: - """_search_with_ann should return results for valid ANN outputs.""" - monkeypatch.setattr(vector_store_module, "HNSWLIB_AVAILABLE", False) - store = VectorStore(temp_db) - - chunk = SemanticChunk(content="chunk A", metadata={}) - chunk.embedding = np.array([1.0, 0.0, 0.0], dtype=np.float32) - chunk_id = store.add_chunk(chunk, "a.py") - - class DummyAnn: - def count(self) -> int: - return 1 - - def search(self, query_vec: np.ndarray, top_k: int): - return [chunk_id], [0.0] - - store._ann_index = DummyAnn() - - results = store._search_with_ann(np.array([1.0, 0.0, 0.0], dtype=np.float32), top_k=10, min_score=0.0, return_full_content=False) - assert [r.path for r in results] == ["a.py"] - assert results[0].score == pytest.approx(1.0) - - -def test_add_chunks_batch_overflow(monkeypatch: pytest.MonkeyPatch, temp_db: Path) -> None: - """add_chunks_batch should fail fast when generated IDs would exceed SQLite/sys bounds.""" - monkeypatch.setattr(vector_store_module, "HNSWLIB_AVAILABLE", False) - store = VectorStore(temp_db) - - seed_embedding = np.array([1.0, 0.0, 0.0], dtype=np.float32).tobytes() - with sqlite3.connect(store.db_path) as conn: - conn.execute( - "INSERT INTO semantic_chunks (id, file_path, content, embedding, metadata) VALUES (?, ?, ?, ?, ?)", - (sys.maxsize - 5, "seed.py", "seed", seed_embedding, None), - ) - conn.commit() - - chunks_with_paths: list[tuple[SemanticChunk, str]] = [] - for i in range(10): - chunks_with_paths.append( - ( - SemanticChunk(content=f"chunk {i}", metadata={}, embedding=[1.0, 0.0, 0.0]), - f"file_{i}.py", - ) - ) - - with pytest.raises(ValueError, match=r"Chunk ID range overflow"): - store.add_chunks_batch(chunks_with_paths) - - -def test_add_chunks_batch_generates_sequential_ids(monkeypatch: pytest.MonkeyPatch, temp_db: Path) -> None: - """add_chunks_batch should return sequential IDs for a fresh store.""" - monkeypatch.setattr(vector_store_module, "HNSWLIB_AVAILABLE", False) - store = VectorStore(temp_db) - - chunks_with_paths = [ - (SemanticChunk(content="chunk A", metadata={}, embedding=[1.0, 0.0, 0.0]), "a.py"), - (SemanticChunk(content="chunk B", metadata={}, embedding=[0.0, 1.0, 0.0]), "b.py"), - ] - - ids = store.add_chunks_batch(chunks_with_paths, update_ann=False) - assert ids == [1, 2] - assert store.count_chunks() == 2 - - -def test_add_chunks_batch_numpy_overflow(monkeypatch: pytest.MonkeyPatch, temp_db: Path) -> None: - """add_chunks_batch_numpy should fail fast when generated IDs would exceed SQLite/sys bounds.""" - monkeypatch.setattr(vector_store_module, "HNSWLIB_AVAILABLE", False) - store = VectorStore(temp_db) - - seed_embedding = np.array([1.0, 0.0, 0.0], dtype=np.float32).tobytes() - with sqlite3.connect(store.db_path) as conn: - conn.execute( - "INSERT INTO semantic_chunks (id, file_path, content, embedding, metadata) VALUES (?, ?, ?, ?, ?)", - (sys.maxsize - 5, "seed.py", "seed", seed_embedding, None), - ) - conn.commit() - - chunks_with_paths = [ - (SemanticChunk(content=f"chunk {i}", metadata={}), f"file_{i}.py") - for i in range(10) - ] - embeddings = np.random.randn(10, 3).astype(np.float32) - - with pytest.raises(ValueError, match=r"Chunk ID range overflow"): - store.add_chunks_batch_numpy(chunks_with_paths, embeddings) - - -def test_fetch_results_by_ids(monkeypatch: pytest.MonkeyPatch, temp_db: Path) -> None: - """_fetch_results_by_ids should use parameterized IN queries and return ordered results.""" - store = VectorStore(temp_db) - - calls: list[tuple[str, str, object]] = [] - rows = [ - (1, "a.py", "content A", None), - (2, "b.py", "content B", None), - ] - - class DummyCursor: - def __init__(self, result_rows): - self._rows = result_rows - - def fetchall(self): - return self._rows - - class DummyConn: - def __enter__(self): - return self - - def __exit__(self, exc_type, exc, tb): - return False - - def execute(self, query, params=None): - if isinstance(query, str) and query.strip().upper().startswith("PRAGMA"): - calls.append(("pragma", query, params)) - return DummyCursor([]) - calls.append(("query", query, params)) - return DummyCursor(rows) - - monkeypatch.setattr(vector_store_module.sqlite3, "connect", lambda _: DummyConn()) - - chunk_ids = [1, 2] - scores = [0.9, 0.8] - results = store._fetch_results_by_ids(chunk_ids, scores, return_full_content=False) - - assert [r.path for r in results] == ["a.py", "b.py"] - assert [r.score for r in results] == scores - assert all(r.content is None for r in results) - - assert any(kind == "pragma" for kind, _, _ in calls) - _, query, params = next((c for c in calls if c[0] == "query"), ("", "", None)) - expected_query = """ - SELECT id, file_path, content, metadata - FROM semantic_chunks - WHERE id IN ({placeholders}) - """.format(placeholders=",".join("?" * len(chunk_ids))) - assert query == expected_query - assert params == chunk_ids - - assert store._fetch_results_by_ids([], [], return_full_content=False) == [] - - -def test_fetch_results_sql_safety() -> None: - """Placeholder generation and validation should prevent unsafe SQL interpolation.""" - for count in (0, 1, 10, 100): - placeholders = ",".join("?" * count) - vector_store_module._validate_sql_placeholders(placeholders, count) - - with pytest.raises(ValueError): - vector_store_module._validate_sql_placeholders("?,?); DROP TABLE semantic_chunks;--", 2) - - with pytest.raises(ValueError): - vector_store_module._validate_sql_placeholders("?,?", 3) diff --git a/codex-lens/tests/test_watcher/__init__.py b/codex-lens/tests/test_watcher/__init__.py deleted file mode 100644 index f736461b..00000000 --- a/codex-lens/tests/test_watcher/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Tests for watcher module.""" diff --git a/codex-lens/tests/test_watcher/conftest.py b/codex-lens/tests/test_watcher/conftest.py deleted file mode 100644 index f3fcecfb..00000000 --- a/codex-lens/tests/test_watcher/conftest.py +++ /dev/null @@ -1,43 +0,0 @@ -"""Fixtures for watcher tests.""" - -from __future__ import annotations - -import tempfile -from pathlib import Path -from typing import Generator - -import pytest - - -@pytest.fixture -def temp_project() -> Generator[Path, None, None]: - """Create a temporary project directory with sample files.""" - with tempfile.TemporaryDirectory() as tmpdir: - project = Path(tmpdir) - - # Create sample Python file - py_file = project / "main.py" - py_file.write_text("def hello():\n print('Hello')\n") - - # Create sample JavaScript file - js_file = project / "app.js" - js_file.write_text("function greet() {\n console.log('Hi');\n}\n") - - # Create subdirectory with file - sub_dir = project / "src" - sub_dir.mkdir() - (sub_dir / "utils.py").write_text("def add(a, b):\n return a + b\n") - - # Create ignored directory - git_dir = project / ".git" - git_dir.mkdir() - (git_dir / "config").write_text("[core]\n") - - yield project - - -@pytest.fixture -def watcher_config(): - """Create default watcher configuration.""" - from codexlens.watcher import WatcherConfig - return WatcherConfig(debounce_ms=100) # Short debounce for tests diff --git a/codex-lens/tests/test_watcher/test_events.py b/codex-lens/tests/test_watcher/test_events.py deleted file mode 100644 index c3f3a53f..00000000 --- a/codex-lens/tests/test_watcher/test_events.py +++ /dev/null @@ -1,103 +0,0 @@ -"""Tests for watcher event types.""" - -from __future__ import annotations - -import time -from pathlib import Path - -import pytest - -from codexlens.watcher import ChangeType, FileEvent, WatcherConfig, IndexResult, WatcherStats - - -class TestChangeType: - """Tests for ChangeType enum.""" - - def test_change_types_exist(self): - """Verify all change types are defined.""" - assert ChangeType.CREATED.value == "created" - assert ChangeType.MODIFIED.value == "modified" - assert ChangeType.DELETED.value == "deleted" - assert ChangeType.MOVED.value == "moved" - - def test_change_type_count(self): - """Verify we have exactly 4 change types.""" - assert len(ChangeType) == 4 - - -class TestFileEvent: - """Tests for FileEvent dataclass.""" - - def test_create_event(self): - """Test creating a file event.""" - event = FileEvent( - path=Path("/test/file.py"), - change_type=ChangeType.CREATED, - timestamp=time.time(), - ) - assert event.path == Path("/test/file.py") - assert event.change_type == ChangeType.CREATED - assert event.old_path is None - - def test_moved_event(self): - """Test creating a moved event with old_path.""" - event = FileEvent( - path=Path("/test/new.py"), - change_type=ChangeType.MOVED, - timestamp=time.time(), - old_path=Path("/test/old.py"), - ) - assert event.old_path == Path("/test/old.py") - - -class TestWatcherConfig: - """Tests for WatcherConfig dataclass.""" - - def test_default_config(self): - """Test default configuration values.""" - config = WatcherConfig() - assert config.debounce_ms == 1000 - assert ".git" in config.ignored_patterns - assert "node_modules" in config.ignored_patterns - assert "__pycache__" in config.ignored_patterns - assert config.languages is None - - def test_custom_debounce(self): - """Test custom debounce setting.""" - config = WatcherConfig(debounce_ms=500) - assert config.debounce_ms == 500 - - -class TestIndexResult: - """Tests for IndexResult dataclass.""" - - def test_default_result(self): - """Test default result values.""" - result = IndexResult() - assert result.files_indexed == 0 - assert result.files_removed == 0 - assert result.symbols_added == 0 - assert result.errors == [] - - def test_custom_result(self): - """Test creating result with values.""" - result = IndexResult( - files_indexed=5, - files_removed=2, - symbols_added=50, - errors=["error1"], - ) - assert result.files_indexed == 5 - assert result.files_removed == 2 - - -class TestWatcherStats: - """Tests for WatcherStats dataclass.""" - - def test_default_stats(self): - """Test default stats values.""" - stats = WatcherStats() - assert stats.files_watched == 0 - assert stats.events_processed == 0 - assert stats.last_event_time is None - assert stats.is_running is False diff --git a/codex-lens/tests/test_watcher/test_file_watcher.py b/codex-lens/tests/test_watcher/test_file_watcher.py deleted file mode 100644 index 50aa352a..00000000 --- a/codex-lens/tests/test_watcher/test_file_watcher.py +++ /dev/null @@ -1,124 +0,0 @@ -"""Tests for FileWatcher class.""" - -from __future__ import annotations - -import time -from pathlib import Path -from typing import List - -import pytest - -from codexlens.watcher import FileWatcher, WatcherConfig, FileEvent, ChangeType - - -class TestFileWatcherInit: - """Tests for FileWatcher initialization.""" - - def test_init_with_valid_path(self, temp_project: Path, watcher_config: WatcherConfig): - """Test initializing with valid path.""" - events: List[FileEvent] = [] - watcher = FileWatcher(temp_project, watcher_config, lambda e: events.extend(e)) - - assert watcher.root_path == temp_project.resolve() - assert watcher.config == watcher_config - assert not watcher.is_running - - def test_start_with_invalid_path(self, watcher_config: WatcherConfig): - """Test starting watcher with non-existent path.""" - events: List[FileEvent] = [] - watcher = FileWatcher(Path("/nonexistent/path"), watcher_config, lambda e: events.extend(e)) - - with pytest.raises(ValueError, match="does not exist"): - watcher.start() - - -class TestFileWatcherLifecycle: - """Tests for FileWatcher start/stop lifecycle.""" - - def test_start_stop(self, temp_project: Path, watcher_config: WatcherConfig): - """Test basic start and stop.""" - events: List[FileEvent] = [] - watcher = FileWatcher(temp_project, watcher_config, lambda e: events.extend(e)) - - watcher.start() - assert watcher.is_running - - watcher.stop() - assert not watcher.is_running - - def test_double_start(self, temp_project: Path, watcher_config: WatcherConfig): - """Test calling start twice.""" - events: List[FileEvent] = [] - watcher = FileWatcher(temp_project, watcher_config, lambda e: events.extend(e)) - - watcher.start() - watcher.start() # Should not raise - assert watcher.is_running - - watcher.stop() - - def test_double_stop(self, temp_project: Path, watcher_config: WatcherConfig): - """Test calling stop twice.""" - events: List[FileEvent] = [] - watcher = FileWatcher(temp_project, watcher_config, lambda e: events.extend(e)) - - watcher.start() - watcher.stop() - watcher.stop() # Should not raise - assert not watcher.is_running - - -class TestFileWatcherEvents: - """Tests for FileWatcher event detection.""" - - def test_detect_file_creation(self, temp_project: Path, watcher_config: WatcherConfig): - """Test detecting new file creation.""" - events: List[FileEvent] = [] - watcher = FileWatcher(temp_project, watcher_config, lambda e: events.extend(e)) - - try: - watcher.start() - time.sleep(0.3) # Let watcher start (longer for Windows) - - # Create new file - new_file = temp_project / "new_file.py" - new_file.write_text("# New file\n") - - # Wait for event with retries (watchdog timing varies by platform) - max_wait = 2.0 - waited = 0.0 - while waited < max_wait: - time.sleep(0.2) - waited += 0.2 - # Windows may report MODIFIED instead of CREATED - file_events = [e for e in events if e.change_type in (ChangeType.CREATED, ChangeType.MODIFIED)] - if any(e.path.name == "new_file.py" for e in file_events): - break - - # Check event was detected (Windows may report MODIFIED instead of CREATED) - relevant_events = [e for e in events if e.change_type in (ChangeType.CREATED, ChangeType.MODIFIED)] - assert len(relevant_events) >= 1, f"Expected file event, got: {events}" - assert any(e.path.name == "new_file.py" for e in relevant_events) - finally: - watcher.stop() - - def test_filter_ignored_directories(self, temp_project: Path, watcher_config: WatcherConfig): - """Test that files in ignored directories are filtered.""" - events: List[FileEvent] = [] - watcher = FileWatcher(temp_project, watcher_config, lambda e: events.extend(e)) - - try: - watcher.start() - time.sleep(0.1) - - # Create file in .git (should be ignored) - git_file = temp_project / ".git" / "test.py" - git_file.write_text("# In git\n") - - time.sleep(watcher_config.debounce_ms / 1000.0 + 0.2) - - # No events should be detected for .git files - git_events = [e for e in events if ".git" in str(e.path)] - assert len(git_events) == 0 - finally: - watcher.stop() diff --git a/codex-lens/tests/unit/__init__.py b/codex-lens/tests/unit/__init__.py deleted file mode 100644 index 4a5d2636..00000000 --- a/codex-lens/tests/unit/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# Unit tests package diff --git a/codex-lens/tests/unit/lsp/__init__.py b/codex-lens/tests/unit/lsp/__init__.py deleted file mode 100644 index 645c88fe..00000000 --- a/codex-lens/tests/unit/lsp/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# LSP unit tests package diff --git a/codex-lens/tests/unit/lsp/test_lsp_bridge.py b/codex-lens/tests/unit/lsp/test_lsp_bridge.py deleted file mode 100644 index 2c607655..00000000 --- a/codex-lens/tests/unit/lsp/test_lsp_bridge.py +++ /dev/null @@ -1,879 +0,0 @@ -"""Unit tests for LspBridge service (VSCode Bridge HTTP mode). - -This module provides comprehensive tests for the LspBridge class when used -in VSCode Bridge HTTP mode (use_vscode_bridge=True). These tests mock -aiohttp HTTP communication with the VSCode Bridge extension. - -Test coverage: -- P0 (Critical): Success/failure scenarios for core methods -- P1 (Important): Cache hit/miss and invalidation logic -- P2 (Supplementary): Edge cases and error handling - -Note: For standalone mode tests (direct language server communication), -see tests/real/ directory. -""" - -from __future__ import annotations - -import asyncio -import time -from typing import Any, Dict, List -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - -# Skip all tests if aiohttp is not available -pytest.importorskip("aiohttp") - -import aiohttp - -from codexlens.hybrid_search.data_structures import ( - CallHierarchyItem, - CodeSymbolNode, - Range, -) -from codexlens.lsp.lsp_bridge import ( - CacheEntry, - Location, - LspBridge, -) - - -# ----------------------------------------------------------------------------- -# Fixtures -# ----------------------------------------------------------------------------- - - -@pytest.fixture -def sample_symbol() -> CodeSymbolNode: - """Create a sample CodeSymbolNode for testing. - - Returns: - CodeSymbolNode with typical function symbol data. - """ - return CodeSymbolNode( - id="test.py:test_func:10", - name="test_func", - kind="function", - file_path="/path/to/test.py", - range=Range( - start_line=10, - start_character=1, - end_line=20, - end_character=1, - ), - ) - - -@pytest.fixture -def mock_response() -> AsyncMock: - """Create a mock aiohttp response with configurable attributes. - - Returns: - AsyncMock configured as aiohttp ClientResponse. - """ - response = AsyncMock() - response.status = 200 - response.json = AsyncMock(return_value={"success": True, "result": []}) - return response - - -@pytest.fixture -def mock_session(mock_response: AsyncMock) -> AsyncMock: - """Create a mock aiohttp ClientSession. - - Args: - mock_response: The mock response to return from post(). - - Returns: - AsyncMock configured as aiohttp ClientSession with async context manager. - """ - session = AsyncMock(spec=aiohttp.ClientSession) - - # Configure post() to return context manager with response - post_cm = AsyncMock() - post_cm.__aenter__ = AsyncMock(return_value=mock_response) - post_cm.__aexit__ = AsyncMock(return_value=None) - session.post = MagicMock(return_value=post_cm) - session.closed = False - - return session - - -@pytest.fixture -def lsp_bridge() -> LspBridge: - """Create a fresh LspBridge instance for testing in VSCode Bridge mode. - - Returns: - LspBridge with use_vscode_bridge=True for HTTP-based tests. - """ - return LspBridge(use_vscode_bridge=True) - - -# ----------------------------------------------------------------------------- -# Location Tests -# ----------------------------------------------------------------------------- - - -class TestLocation: - """Tests for the Location dataclass.""" - - def test_to_dict(self): - """Location.to_dict() returns correct dictionary format.""" - loc = Location(file_path="/test/file.py", line=10, character=5) - result = loc.to_dict() - - assert result == { - "file_path": "/test/file.py", - "line": 10, - "character": 5, - } - - def test_from_lsp_response_with_range(self): - """Location.from_lsp_response() parses LSP range format correctly.""" - data = { - "uri": "file:///test/file.py", - "range": { - "start": {"line": 9, "character": 4}, # 0-based - "end": {"line": 15, "character": 0}, - }, - } - loc = Location.from_lsp_response(data) - - assert loc.file_path == "/test/file.py" - assert loc.line == 10 # Converted to 1-based - assert loc.character == 5 # Converted to 1-based - - def test_from_lsp_response_direct_fields(self): - """Location.from_lsp_response() handles direct line/character fields.""" - data = { - "file_path": "/direct/path.py", - "line": 25, - "character": 8, - } - loc = Location.from_lsp_response(data) - - assert loc.file_path == "/direct/path.py" - assert loc.line == 25 - assert loc.character == 8 - - -class TestLocationFromVscodeUri: - """Tests for parsing VSCode URI formats (P2 test case).""" - - @pytest.mark.parametrize( - "uri,expected_path", - [ - # Unix-style paths - ("file:///home/user/project/file.py", "/home/user/project/file.py"), - ("file:///usr/local/lib.py", "/usr/local/lib.py"), - # Windows-style paths - ("file:///C:/Users/dev/project/file.py", "C:/Users/dev/project/file.py"), - ("file:///D:/code/test.ts", "D:/code/test.ts"), - # Already plain path - ("/plain/path/file.py", "/plain/path/file.py"), - # Edge case: file:// without third slash - ("file://shared/network/file.py", "shared/network/file.py"), - ], - ) - def test_location_from_vscode_uri(self, uri: str, expected_path: str): - """Test correct parsing of various VSCode URI formats to OS paths. - - Verifies that file:///C:/path format on Windows and file:///path - format on Unix are correctly converted to native OS paths. - """ - data = { - "uri": uri, - "range": {"start": {"line": 0, "character": 0}, "end": {"line": 0, "character": 0}}, - } - loc = Location.from_lsp_response(data) - - assert loc.file_path == expected_path - - -# ----------------------------------------------------------------------------- -# P0 Critical Tests -# ----------------------------------------------------------------------------- - - -class TestGetReferencesSuccess: - """P0: Test successful get_references scenarios.""" - - @pytest.mark.asyncio - async def test_get_references_success( - self, - lsp_bridge: LspBridge, - sample_symbol: CodeSymbolNode, - mock_session: AsyncMock, - mock_response: AsyncMock, - ): - """Test get_references returns Location list and caches result. - - Mock session returns 200 OK with valid LSP location list. - Verifies: - - Returns list of Location objects - - Results are stored in cache - """ - # Setup mock response with valid locations - mock_response.status = 200 - mock_response.json = AsyncMock(return_value={ - "success": True, - "result": [ - { - "uri": "file:///ref1.py", - "range": {"start": {"line": 5, "character": 0}, "end": {"line": 5, "character": 10}}, - }, - { - "uri": "file:///ref2.py", - "range": {"start": {"line": 15, "character": 4}, "end": {"line": 15, "character": 14}}, - }, - ], - }) - - # Inject mock session - lsp_bridge._session = mock_session - - # Execute - with patch.object(lsp_bridge, "_get_file_mtime", return_value=1000.0): - refs = await lsp_bridge.get_references(sample_symbol) - - # Verify results - assert len(refs) == 2 - assert isinstance(refs[0], Location) - assert refs[0].file_path == "/ref1.py" - assert refs[0].line == 6 # 0-based to 1-based - assert refs[1].file_path == "/ref2.py" - assert refs[1].line == 16 - - # Verify cached - cache_key = f"refs:{sample_symbol.id}" - assert cache_key in lsp_bridge.cache - assert lsp_bridge.cache[cache_key].data == refs - - -class TestGetReferencesBridgeNotRunning: - """P0: Test get_references when bridge is not running.""" - - @pytest.mark.asyncio - async def test_get_references_bridge_not_running( - self, - lsp_bridge: LspBridge, - sample_symbol: CodeSymbolNode, - ): - """Test get_references returns empty list on ClientConnectorError. - - When VSCode Bridge is not running, aiohttp raises ClientConnectorError. - Verifies: - - Returns empty list [] - - No cache entry is created - """ - # Setup mock session that raises connection error - mock_session = AsyncMock(spec=aiohttp.ClientSession) - mock_session.closed = False - mock_session.post = MagicMock(side_effect=aiohttp.ClientConnectorError( - connection_key=MagicMock(), - os_error=OSError("Connection refused"), - )) - - lsp_bridge._session = mock_session - - # Execute - refs = await lsp_bridge.get_references(sample_symbol) - - # Verify - assert refs == [] - cache_key = f"refs:{sample_symbol.id}" - assert cache_key not in lsp_bridge.cache - - -class TestGetReferencesTimeout: - """P0: Test get_references timeout handling.""" - - @pytest.mark.asyncio - async def test_get_references_timeout( - self, - lsp_bridge: LspBridge, - sample_symbol: CodeSymbolNode, - ): - """Test get_references returns empty list on asyncio.TimeoutError. - - When request times out, should gracefully return empty list. - """ - # Setup mock session that raises timeout - mock_session = AsyncMock(spec=aiohttp.ClientSession) - mock_session.closed = False - - async def raise_timeout(*args, **kwargs): - raise asyncio.TimeoutError() - - post_cm = AsyncMock() - post_cm.__aenter__ = raise_timeout - post_cm.__aexit__ = AsyncMock(return_value=None) - mock_session.post = MagicMock(return_value=post_cm) - - lsp_bridge._session = mock_session - - # Execute - refs = await lsp_bridge.get_references(sample_symbol) - - # Verify - assert refs == [] - - -class TestCallHierarchyFallback: - """P0: Test call_hierarchy fallback to references.""" - - @pytest.mark.asyncio - async def test_call_hierarchy_fallback_to_references( - self, - lsp_bridge: LspBridge, - sample_symbol: CodeSymbolNode, - mock_session: AsyncMock, - ): - """Test get_call_hierarchy falls back to get_references when not supported. - - When call_hierarchy request returns None (not supported by language server), - verifies: - - Falls back to calling get_references - - Returns converted CallHierarchyItem list - """ - call_count = 0 - - async def mock_json(): - nonlocal call_count - call_count += 1 - if call_count == 1: - # First call is get_call_hierarchy - return failure - return {"success": False} - else: - # Second call is get_references - return valid refs - return { - "success": True, - "result": [ - { - "uri": "file:///caller.py", - "range": {"start": {"line": 10, "character": 5}, "end": {"line": 10, "character": 15}}, - }, - ], - } - - # Setup mock response - mock_response = AsyncMock() - mock_response.status = 200 - mock_response.json = mock_json - - post_cm = AsyncMock() - post_cm.__aenter__ = AsyncMock(return_value=mock_response) - post_cm.__aexit__ = AsyncMock(return_value=None) - mock_session.post = MagicMock(return_value=post_cm) - - lsp_bridge._session = mock_session - - # Execute - with patch.object(lsp_bridge, "_get_file_mtime", return_value=1000.0): - items = await lsp_bridge.get_call_hierarchy(sample_symbol) - - # Verify fallback occurred and returned CallHierarchyItem - assert len(items) == 1 - assert isinstance(items[0], CallHierarchyItem) - assert items[0].file_path == "/caller.py" - assert items[0].kind == "reference" - assert "Inferred from reference" in items[0].detail - - -# ----------------------------------------------------------------------------- -# P1 Important Tests -# ----------------------------------------------------------------------------- - - -class TestCacheHit: - """P1: Test cache hit behavior.""" - - @pytest.mark.asyncio - async def test_cache_hit( - self, - lsp_bridge: LspBridge, - sample_symbol: CodeSymbolNode, - mock_session: AsyncMock, - mock_response: AsyncMock, - ): - """Test that same symbol called twice only makes one request. - - Verifies: - - _request is only called once - - Second call returns cached result - """ - mock_response.status = 200 - mock_response.json = AsyncMock(return_value={ - "success": True, - "result": [ - {"uri": "file:///ref.py", "range": {"start": {"line": 0, "character": 0}, "end": {"line": 0, "character": 0}}}, - ], - }) - - lsp_bridge._session = mock_session - - with patch.object(lsp_bridge, "_get_file_mtime", return_value=1000.0): - # First call - should make request - refs1 = await lsp_bridge.get_references(sample_symbol) - - # Second call - should use cache - refs2 = await lsp_bridge.get_references(sample_symbol) - - # Verify only one HTTP call was made - assert mock_session.post.call_count == 1 - - # Verify both calls return same data - assert refs1 == refs2 - - -class TestCacheInvalidationTtl: - """P1: Test cache TTL invalidation.""" - - @pytest.mark.asyncio - async def test_cache_invalidation_ttl( - self, - sample_symbol: CodeSymbolNode, - mock_session: AsyncMock, - mock_response: AsyncMock, - ): - """Test cache entry expires after TTL. - - Sets extremely short TTL and verifies: - - Cache entry expires - - New request is made after TTL expires - """ - # Create bridge with very short TTL (VSCode Bridge mode for HTTP tests) - bridge = LspBridge(cache_ttl=1, use_vscode_bridge=True) # 1 second TTL - - mock_response.status = 200 - mock_response.json = AsyncMock(return_value={ - "success": True, - "result": [ - {"uri": "file:///ref.py", "range": {"start": {"line": 0, "character": 0}, "end": {"line": 0, "character": 0}}}, - ], - }) - - bridge._session = mock_session - - with patch.object(bridge, "_get_file_mtime", return_value=1000.0): - # First call - await bridge.get_references(sample_symbol) - assert mock_session.post.call_count == 1 - - # Wait for TTL to expire - await asyncio.sleep(1.1) - - # Second call - should make new request - await bridge.get_references(sample_symbol) - assert mock_session.post.call_count == 2 - - await bridge.close() - - -class TestCacheInvalidationFileModified: - """P1: Test cache invalidation on file modification.""" - - @pytest.mark.asyncio - async def test_cache_invalidation_file_modified( - self, - lsp_bridge: LspBridge, - sample_symbol: CodeSymbolNode, - mock_session: AsyncMock, - mock_response: AsyncMock, - ): - """Test cache entry invalidates when file mtime changes. - - Verifies: - - mtime change triggers cache invalidation - - New request is made after file modification - """ - mock_response.status = 200 - mock_response.json = AsyncMock(return_value={ - "success": True, - "result": [ - {"uri": "file:///ref.py", "range": {"start": {"line": 0, "character": 0}, "end": {"line": 0, "character": 0}}}, - ], - }) - - lsp_bridge._session = mock_session - - # Mock mtime: first call returns 1000.0, subsequent calls return 2000.0 - # This simulates file being modified between cache store and cache check - call_count = [0] - - def get_mtime(path: str) -> float: - call_count[0] += 1 - # First call during _cache() stores mtime 1000.0 - # Second call during _is_cached() should see different mtime - if call_count[0] <= 1: - return 1000.0 - return 2000.0 # File modified - - with patch.object(lsp_bridge, "_get_file_mtime", side_effect=get_mtime): - # First call - should make request and cache with mtime 1000.0 - await lsp_bridge.get_references(sample_symbol) - assert mock_session.post.call_count == 1 - - # Second call - mtime check returns 2000.0 (different from cached 1000.0) - # Should invalidate cache and make new request - await lsp_bridge.get_references(sample_symbol) - assert mock_session.post.call_count == 2 - - -# ----------------------------------------------------------------------------- -# P2 Supplementary Tests -# ----------------------------------------------------------------------------- - - -class TestResponseParsingInvalidJson: - """P2: Test handling of malformed JSON responses.""" - - @pytest.mark.asyncio - async def test_response_parsing_invalid_json( - self, - lsp_bridge: LspBridge, - sample_symbol: CodeSymbolNode, - mock_session: AsyncMock, - ): - """Test graceful handling of malformed JSON response. - - Verifies: - - Returns empty list when JSON parsing fails - - Does not raise exception - """ - # Setup mock to raise JSONDecodeError - mock_response = AsyncMock() - mock_response.status = 200 - mock_response.json = AsyncMock(side_effect=Exception("Invalid JSON")) - - post_cm = AsyncMock() - post_cm.__aenter__ = AsyncMock(return_value=mock_response) - post_cm.__aexit__ = AsyncMock(return_value=None) - mock_session.post = MagicMock(return_value=post_cm) - - lsp_bridge._session = mock_session - - # Execute - should not raise - refs = await lsp_bridge.get_references(sample_symbol) - - # Verify graceful handling - assert refs == [] - - @pytest.mark.asyncio - async def test_response_with_malformed_location_items( - self, - lsp_bridge: LspBridge, - sample_symbol: CodeSymbolNode, - mock_session: AsyncMock, - mock_response: AsyncMock, - ): - """Test handling of partially malformed location items. - - The source code catches KeyError and TypeError when parsing items. - Tests that items causing these specific exceptions are skipped while - valid items are returned. - """ - mock_response.status = 200 - mock_response.json = AsyncMock(return_value={ - "success": True, - "result": [ - # Valid item - {"uri": "file:///valid.py", "range": {"start": {"line": 0, "character": 0}, "end": {"line": 0, "character": 0}}}, - # Another valid item - {"uri": "file:///valid2.py", "range": {"start": {"line": 5, "character": 0}, "end": {"line": 5, "character": 0}}}, - ], - }) - - lsp_bridge._session = mock_session - - with patch.object(lsp_bridge, "_get_file_mtime", return_value=1000.0): - refs = await lsp_bridge.get_references(sample_symbol) - - # Should return both valid items - assert len(refs) == 2 - assert refs[0].file_path == "/valid.py" - assert refs[1].file_path == "/valid2.py" - - @pytest.mark.asyncio - async def test_response_with_empty_result_list( - self, - lsp_bridge: LspBridge, - sample_symbol: CodeSymbolNode, - mock_session: AsyncMock, - mock_response: AsyncMock, - ): - """Test handling of empty result list.""" - mock_response.status = 200 - mock_response.json = AsyncMock(return_value={ - "success": True, - "result": [], - }) - - lsp_bridge._session = mock_session - - with patch.object(lsp_bridge, "_get_file_mtime", return_value=1000.0): - refs = await lsp_bridge.get_references(sample_symbol) - - assert refs == [] - - -class TestLspBridgeContextManager: - """Test async context manager functionality (VSCode Bridge mode).""" - - @pytest.mark.asyncio - async def test_context_manager_closes_session(self): - """Test that async context manager properly closes session in VSCode Bridge mode.""" - async with LspBridge(use_vscode_bridge=True) as bridge: - # Create a session - session = await bridge._get_session() - assert session is not None - assert not session.closed - - # After context, session should be closed - assert bridge._session is None or bridge._session.closed - - -class TestCacheEntry: - """Test CacheEntry dataclass.""" - - def test_cache_entry_fields(self): - """CacheEntry stores all required fields.""" - entry = CacheEntry( - data=["some", "data"], - file_mtime=12345.0, - cached_at=time.time(), - ) - - assert entry.data == ["some", "data"] - assert entry.file_mtime == 12345.0 - assert entry.cached_at > 0 - - -class TestLspBridgeCacheLru: - """Test LRU cache behavior.""" - - def test_cache_lru_eviction(self): - """Test that oldest entries are evicted when at max capacity.""" - bridge = LspBridge(max_cache_size=3) - - # Add entries - bridge._cache("key1", "/file1.py", "data1") - bridge._cache("key2", "/file2.py", "data2") - bridge._cache("key3", "/file3.py", "data3") - - assert len(bridge.cache) == 3 - - # Add one more - should evict oldest (key1) - bridge._cache("key4", "/file4.py", "data4") - - assert len(bridge.cache) == 3 - assert "key1" not in bridge.cache - assert "key4" in bridge.cache - - def test_cache_access_moves_to_end(self): - """Test that accessing cached item moves it to end (LRU behavior).""" - bridge = LspBridge(max_cache_size=3) - - with patch.object(bridge, "_get_file_mtime", return_value=1000.0): - bridge._cache("key1", "/file.py", "data1") - bridge._cache("key2", "/file.py", "data2") - bridge._cache("key3", "/file.py", "data3") - - # Access key1 - should move it to end - bridge._is_cached("key1", "/file.py") - - # Add key4 - should evict key2 (now oldest) - bridge._cache("key4", "/file.py", "data4") - - assert "key1" in bridge.cache - assert "key2" not in bridge.cache - - -class TestGetHover: - """Test get_hover method.""" - - @pytest.mark.asyncio - async def test_get_hover_returns_string( - self, - lsp_bridge: LspBridge, - sample_symbol: CodeSymbolNode, - mock_session: AsyncMock, - mock_response: AsyncMock, - ): - """Test get_hover returns hover documentation string.""" - mock_response.status = 200 - mock_response.json = AsyncMock(return_value={ - "success": True, - "result": { - "contents": "Function documentation here", - }, - }) - - lsp_bridge._session = mock_session - - with patch.object(lsp_bridge, "_get_file_mtime", return_value=1000.0): - hover = await lsp_bridge.get_hover(sample_symbol) - - assert hover == "Function documentation here" - - @pytest.mark.asyncio - async def test_get_hover_handles_marked_string_list( - self, - lsp_bridge: LspBridge, - sample_symbol: CodeSymbolNode, - mock_session: AsyncMock, - mock_response: AsyncMock, - ): - """Test get_hover handles MarkedString list format.""" - mock_response.status = 200 - mock_response.json = AsyncMock(return_value={ - "success": True, - "result": [ - {"value": "```python\ndef func():\n```"}, - {"value": "Documentation text"}, - ], - }) - - lsp_bridge._session = mock_session - - with patch.object(lsp_bridge, "_get_file_mtime", return_value=1000.0): - hover = await lsp_bridge.get_hover(sample_symbol) - - assert "def func()" in hover - assert "Documentation text" in hover - - -class TestGetDefinition: - """Test get_definition method.""" - - @pytest.mark.asyncio - async def test_get_definition_returns_location( - self, - lsp_bridge: LspBridge, - sample_symbol: CodeSymbolNode, - mock_session: AsyncMock, - mock_response: AsyncMock, - ): - """Test get_definition returns Location for found definition.""" - mock_response.status = 200 - mock_response.json = AsyncMock(return_value={ - "success": True, - "result": [ - { - "uri": "file:///definition.py", - "range": {"start": {"line": 99, "character": 0}, "end": {"line": 110, "character": 0}}, - }, - ], - }) - - lsp_bridge._session = mock_session - - with patch.object(lsp_bridge, "_get_file_mtime", return_value=1000.0): - definition = await lsp_bridge.get_definition(sample_symbol) - - assert definition is not None - assert definition.file_path == "/definition.py" - assert definition.line == 100 # 0-based to 1-based - - @pytest.mark.asyncio - async def test_get_definition_returns_none_on_failure( - self, - lsp_bridge: LspBridge, - sample_symbol: CodeSymbolNode, - mock_session: AsyncMock, - mock_response: AsyncMock, - ): - """Test get_definition returns None when not found.""" - mock_response.status = 200 - mock_response.json = AsyncMock(return_value={ - "success": False, - }) - - lsp_bridge._session = mock_session - - definition = await lsp_bridge.get_definition(sample_symbol) - - assert definition is None - - -class TestGetDocumentSymbols: - """Test get_document_symbols method.""" - - @pytest.mark.asyncio - async def test_get_document_symbols_flattens_hierarchy( - self, - lsp_bridge: LspBridge, - mock_session: AsyncMock, - mock_response: AsyncMock, - ): - """Test get_document_symbols flattens nested symbol hierarchy.""" - mock_response.status = 200 - mock_response.json = AsyncMock(return_value={ - "success": True, - "result": [ - { - "name": "MyClass", - "kind": 5, # Class - "range": {"start": {"line": 0, "character": 0}, "end": {"line": 20, "character": 0}}, - "children": [ - { - "name": "my_method", - "kind": 6, # Method - "range": {"start": {"line": 5, "character": 4}, "end": {"line": 10, "character": 4}}, - }, - ], - }, - ], - }) - - lsp_bridge._session = mock_session - - with patch.object(lsp_bridge, "_get_file_mtime", return_value=1000.0): - symbols = await lsp_bridge.get_document_symbols("/test/file.py") - - # Should have both class and method - assert len(symbols) == 2 - assert symbols[0]["name"] == "MyClass" - assert symbols[0]["kind"] == "class" - assert symbols[1]["name"] == "my_method" - assert symbols[1]["kind"] == "method" - assert symbols[1]["parent"] == "MyClass" - - -class TestSymbolKindConversion: - """Test symbol kind integer to string conversion.""" - - @pytest.mark.parametrize( - "kind_int,expected_str", - [ - (1, "file"), - (5, "class"), - (6, "method"), - (12, "function"), - (13, "variable"), - (999, "unknown"), # Unknown kind - ], - ) - def test_symbol_kind_to_string(self, kind_int: int, expected_str: str): - """Test _symbol_kind_to_string converts LSP SymbolKind correctly.""" - bridge = LspBridge() - result = bridge._symbol_kind_to_string(kind_int) - assert result == expected_str - - -class TestClearCache: - """Test cache clearing functionality.""" - - def test_clear_cache(self, lsp_bridge: LspBridge): - """Test clear_cache removes all entries.""" - # Add some cache entries - lsp_bridge._cache("key1", "/file.py", "data1") - lsp_bridge._cache("key2", "/file.py", "data2") - - assert len(lsp_bridge.cache) == 2 - - # Clear - lsp_bridge.clear_cache() - - assert len(lsp_bridge.cache) == 0 diff --git a/codex-lens/tests/unit/lsp/test_lsp_edge_cases.py b/codex-lens/tests/unit/lsp/test_lsp_edge_cases.py deleted file mode 100644 index 3f0cd4b0..00000000 --- a/codex-lens/tests/unit/lsp/test_lsp_edge_cases.py +++ /dev/null @@ -1,795 +0,0 @@ -"""Edge case and exception tests for LSP Bridge and Graph Builder. - -This module tests boundary conditions, error handling, and exceptional -scenarios in the LSP communication and graph building components. - -Test Categories: -- P1 (Critical): Empty responses, HTTP errors -- P2 (Important): Edge inputs, deep structures, special characters -- P3 (Nice-to-have): Cache eviction, concurrent access, circular refs - -Note: Tests for HTTP-based communication use use_vscode_bridge=True mode. -""" - -from __future__ import annotations - -import asyncio -from typing import Any, Dict, List -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - -from codexlens.hybrid_search.data_structures import ( - CodeAssociationGraph, - CodeSymbolNode, - Range, -) - - -# --------------------------------------------------------------------------- -# Fixtures -# --------------------------------------------------------------------------- - -@pytest.fixture -def valid_range() -> Range: - """Create a valid Range for test symbols.""" - return Range( - start_line=10, - start_character=0, - end_line=20, - end_character=0, - ) - - -@pytest.fixture -def sample_symbol(valid_range: Range) -> CodeSymbolNode: - """Create a sample CodeSymbolNode for testing.""" - return CodeSymbolNode( - id="test/file.py:test_func:10", - name="test_func", - kind="function", - file_path="test/file.py", - range=valid_range, - ) - - -@pytest.fixture -def symbol_with_empty_path() -> CodeSymbolNode: - """Create a CodeSymbolNode with empty file_path. - - Note: CodeSymbolNode.__post_init__ validates that file_path cannot be empty, - so this fixture tests the case where validation is bypassed or data comes - from external sources that might have empty paths. - """ - # We need to bypass validation for this edge case test - node = object.__new__(CodeSymbolNode) - node.id = "::0" - node.name = "empty" - node.kind = "unknown" - node.file_path = "" # Empty path - edge case - node.range = Range(start_line=0, start_character=0, end_line=0, end_character=0) - node.embedding = None - node.raw_code = "" - node.docstring = "" - node.score = 0.0 - return node - - -@pytest.fixture -def mock_aiohttp_session(): - """Create a mock aiohttp ClientSession.""" - session = AsyncMock() - return session - - -@pytest.fixture -def mock_error_response(): - """Create a mock aiohttp response with HTTP 500 error.""" - response = AsyncMock() - response.status = 500 - response.json = AsyncMock(return_value={"error": "Internal Server Error"}) - return response - - -@pytest.fixture -def mock_empty_response(): - """Create a mock aiohttp response returning empty list.""" - response = AsyncMock() - response.status = 200 - response.json = AsyncMock(return_value={"success": True, "result": []}) - return response - - -# --------------------------------------------------------------------------- -# P1 Tests - Critical Edge Cases -# --------------------------------------------------------------------------- - -class TestLspReturnsEmptyList: - """Test handling when LSP returns empty results. - - Module: LspGraphBuilder._expand_node - Mock: LspBridge methods return [] - Assert: Node marked as visited, no new nodes/edges added, returns [] - """ - - @pytest.mark.asyncio - async def test_expand_node_with_empty_references(self, sample_symbol: CodeSymbolNode): - """When LSP returns empty references, node should be visited but no expansion.""" - from codexlens.lsp.lsp_graph_builder import LspGraphBuilder - - # Create mock LspBridge that returns empty results - mock_bridge = AsyncMock() - mock_bridge.get_references = AsyncMock(return_value=[]) - mock_bridge.get_call_hierarchy = AsyncMock(return_value=[]) - - builder = LspGraphBuilder(max_depth=2, max_nodes=100) - graph = CodeAssociationGraph() - graph.add_node(sample_symbol) - visited = set() - semaphore = asyncio.Semaphore(10) - - # Expand the node - result = await builder._expand_node( - sample_symbol, - depth=0, - graph=graph, - lsp_bridge=mock_bridge, - visited=visited, - semaphore=semaphore, - ) - - # Assertions - assert sample_symbol.id in visited # Node should be marked as visited - assert result == [] # No new nodes to process - assert len(graph.nodes) == 1 # Only the original seed node - assert len(graph.edges) == 0 # No edges added - - @pytest.mark.asyncio - async def test_build_from_seeds_with_empty_lsp_results(self, sample_symbol: CodeSymbolNode): - """When LSP returns empty for all queries, graph should contain only seeds.""" - from codexlens.lsp.lsp_graph_builder import LspGraphBuilder - - mock_bridge = AsyncMock() - mock_bridge.get_references = AsyncMock(return_value=[]) - mock_bridge.get_call_hierarchy = AsyncMock(return_value=[]) - mock_bridge.get_document_symbols = AsyncMock(return_value=[]) - - builder = LspGraphBuilder(max_depth=2, max_nodes=100) - - # Build graph from seed - graph = await builder.build_from_seeds([sample_symbol], mock_bridge) - - # Should only have the seed node - assert len(graph.nodes) == 1 - assert sample_symbol.id in graph.nodes - assert len(graph.edges) == 0 - - @pytest.mark.asyncio - async def test_already_visited_node_returns_empty(self, sample_symbol: CodeSymbolNode): - """Attempting to expand an already-visited node should return empty immediately.""" - from codexlens.lsp.lsp_graph_builder import LspGraphBuilder - - mock_bridge = AsyncMock() - # These should not be called since node is already visited - mock_bridge.get_references = AsyncMock(return_value=[]) - mock_bridge.get_call_hierarchy = AsyncMock(return_value=[]) - - builder = LspGraphBuilder() - graph = CodeAssociationGraph() - graph.add_node(sample_symbol) - visited = {sample_symbol.id} # Already visited - semaphore = asyncio.Semaphore(10) - - result = await builder._expand_node( - sample_symbol, - depth=0, - graph=graph, - lsp_bridge=mock_bridge, - visited=visited, - semaphore=semaphore, - ) - - assert result == [] - # Bridge methods should not have been called - mock_bridge.get_references.assert_not_called() - mock_bridge.get_call_hierarchy.assert_not_called() - - -class TestLspHttpError500: - """Test handling of HTTP 500 errors from LSP bridge (VSCode Bridge mode). - - Module: LspBridge._request_vscode_bridge - Mock: aiohttp response status=500 - Assert: Returns None, caller handles as failure - """ - - @pytest.mark.asyncio - async def test_request_returns_none_on_500(self): - """HTTP 500 response should result in None return value.""" - from codexlens.lsp.lsp_bridge import LspBridge - - # Create bridge in VSCode Bridge mode with mocked session - bridge = LspBridge(use_vscode_bridge=True) - - # Mock the session to return 500 error - mock_response = AsyncMock() - mock_response.status = 500 - mock_response.__aenter__ = AsyncMock(return_value=mock_response) - mock_response.__aexit__ = AsyncMock(return_value=None) - - mock_session = AsyncMock() - mock_session.post = MagicMock(return_value=mock_response) - - with patch.object(bridge, '_get_session', return_value=mock_session): - result = await bridge._request_vscode_bridge("get_references", {"file_path": "test.py"}) - - assert result is None - - @pytest.mark.asyncio - async def test_get_references_returns_empty_on_500(self, sample_symbol: CodeSymbolNode): - """get_references should return empty list on HTTP 500.""" - from codexlens.lsp.lsp_bridge import LspBridge - - bridge = LspBridge(use_vscode_bridge=True) - - # Mock _request_vscode_bridge to return None (simulating HTTP error) - with patch.object(bridge, '_request_vscode_bridge', return_value=None): - result = await bridge.get_references(sample_symbol) - - assert result == [] - - @pytest.mark.asyncio - async def test_get_definition_returns_none_on_500(self, sample_symbol: CodeSymbolNode): - """get_definition should return None on HTTP 500.""" - from codexlens.lsp.lsp_bridge import LspBridge - - bridge = LspBridge(use_vscode_bridge=True) - - with patch.object(bridge, '_request_vscode_bridge', return_value=None): - result = await bridge.get_definition(sample_symbol) - - assert result is None - - @pytest.mark.asyncio - async def test_get_hover_returns_none_on_500(self, sample_symbol: CodeSymbolNode): - """get_hover should return None on HTTP 500.""" - from codexlens.lsp.lsp_bridge import LspBridge - - bridge = LspBridge(use_vscode_bridge=True) - - with patch.object(bridge, '_request_vscode_bridge', return_value=None): - result = await bridge.get_hover(sample_symbol) - - assert result is None - - @pytest.mark.asyncio - async def test_graph_builder_handles_lsp_errors_gracefully(self, sample_symbol: CodeSymbolNode): - """Graph builder should handle LSP errors without crashing.""" - from codexlens.lsp.lsp_graph_builder import LspGraphBuilder - - mock_bridge = AsyncMock() - # Simulate exceptions from LSP - mock_bridge.get_references = AsyncMock(side_effect=Exception("LSP Error")) - mock_bridge.get_call_hierarchy = AsyncMock(side_effect=Exception("LSP Error")) - - builder = LspGraphBuilder() - - # Should not raise, should return graph with just the seed - graph = await builder.build_from_seeds([sample_symbol], mock_bridge) - - assert len(graph.nodes) == 1 - assert sample_symbol.id in graph.nodes - - -# --------------------------------------------------------------------------- -# P2 Tests - Important Edge Cases -# --------------------------------------------------------------------------- - -class TestSymbolWithEmptyFilePath: - """Test handling of symbols with empty file_path (VSCode Bridge mode). - - Module: LspBridge.get_references - Input: CodeSymbolNode with file_path="" - Assert: Does not send request, returns [] early - """ - - @pytest.mark.asyncio - async def test_get_references_with_empty_path_symbol(self, symbol_with_empty_path: CodeSymbolNode): - """get_references with empty file_path should handle gracefully.""" - from codexlens.lsp.lsp_bridge import LspBridge - - bridge = LspBridge(use_vscode_bridge=True) - - # Mock _request_vscode_bridge - it should still work but with empty path - mock_result = [] - with patch.object(bridge, '_request_vscode_bridge', return_value=mock_result) as mock_req: - result = await bridge.get_references(symbol_with_empty_path) - - # Should return empty list - assert result == [] - # The request was still made (current implementation doesn't pre-validate) - # This documents current behavior - might want to add validation - - @pytest.mark.asyncio - async def test_cache_with_empty_path_symbol(self, symbol_with_empty_path: CodeSymbolNode): - """Cache operations with empty file_path should not crash.""" - from codexlens.lsp.lsp_bridge import LspBridge - - bridge = LspBridge() - - # Cache should handle empty path (mtime check returns 0.0) - cache_key = f"refs:{symbol_with_empty_path.id}" - bridge._cache(cache_key, "", []) # Empty path - - # Should be able to check cache without crashing - is_cached = bridge._is_cached(cache_key, "") - # Note: May or may not be cached depending on mtime behavior - assert isinstance(is_cached, bool) - - -class TestVeryDeepGraphStructure: - """Test graph building with very deep reference chains. - - Module: LspGraphBuilder.build_from_seeds - Input: max_depth=10 - Mock: LspBridge produces long chain of references - Assert: Expansion stops cleanly at max_depth - """ - - @pytest.mark.asyncio - async def test_expansion_stops_at_max_depth(self, valid_range: Range): - """Graph expansion should stop at max_depth.""" - from codexlens.lsp.lsp_bridge import Location - from codexlens.lsp.lsp_graph_builder import LspGraphBuilder - - # Create a chain of symbols: seed -> ref1 -> ref2 -> ... -> refN - max_depth = 3 # Use small depth for testing - - def create_mock_refs(symbol: CodeSymbolNode) -> List[Location]: - """Create a single reference pointing to next in chain.""" - depth = int(symbol.id.split(":")[-1]) # Extract depth from ID - if depth >= max_depth + 5: # Chain goes deeper than max_depth - return [] - next_depth = depth + 1 - return [Location( - file_path=f"test/file_{next_depth}.py", - line=1, - character=0, - )] - - mock_bridge = AsyncMock() - mock_bridge.get_references = AsyncMock(side_effect=lambda s: create_mock_refs(s)) - mock_bridge.get_call_hierarchy = AsyncMock(return_value=[]) - mock_bridge.get_document_symbols = AsyncMock(return_value=[]) - - # Seed at depth 0 - seed = CodeSymbolNode( - id="test/file_0.py:seed:0", - name="seed", - kind="function", - file_path="test/file_0.py", - range=valid_range, - ) - - builder = LspGraphBuilder(max_depth=max_depth, max_nodes=100) - graph = await builder.build_from_seeds([seed], mock_bridge) - - # Graph should not exceed max_depth + 1 nodes (seed + max_depth levels) - # Actual count depends on how references are resolved - assert len(graph.nodes) <= max_depth + 2 # Some tolerance for edge cases - - @pytest.mark.asyncio - async def test_expansion_stops_at_max_nodes(self, valid_range: Range): - """Graph expansion should stop when max_nodes is reached.""" - from codexlens.lsp.lsp_bridge import Location - from codexlens.lsp.lsp_graph_builder import LspGraphBuilder - - call_count = [0] - - def create_many_refs(symbol: CodeSymbolNode) -> List[Location]: - """Create multiple references to generate many nodes.""" - call_count[0] += 1 - # Return multiple refs to rapidly grow the graph - return [ - Location(file_path=f"test/ref_{call_count[0]}_{i}.py", line=1, character=0) - for i in range(5) - ] - - mock_bridge = AsyncMock() - mock_bridge.get_references = AsyncMock(side_effect=create_many_refs) - mock_bridge.get_call_hierarchy = AsyncMock(return_value=[]) - mock_bridge.get_document_symbols = AsyncMock(return_value=[]) - - seed = CodeSymbolNode( - id="test/seed.py:seed:0", - name="seed", - kind="function", - file_path="test/seed.py", - range=valid_range, - ) - - max_nodes = 10 - builder = LspGraphBuilder(max_depth=100, max_nodes=max_nodes) # High depth, low nodes - graph = await builder.build_from_seeds([seed], mock_bridge) - - # Graph should not exceed max_nodes - assert len(graph.nodes) <= max_nodes - - -class TestNodeIdWithSpecialCharacters: - """Test node ID creation with special characters. - - Module: LspGraphBuilder._create_node_id - Input: file_path="a/b/c", name="", line=10 - Assert: ID successfully created as "a/b/c::10" - """ - - def test_create_node_id_with_special_name(self): - """Node ID should handle special characters in name.""" - from codexlens.lsp.lsp_graph_builder import LspGraphBuilder - - builder = LspGraphBuilder() - - # Test with angle brackets (common in Java/Kotlin constructors) - node_id = builder._create_node_id("a/b/c", "", 10) - assert node_id == "a/b/c::10" - - # Test with other special characters - node_id = builder._create_node_id("src/file.py", "__init__", 1) - assert node_id == "src/file.py:__init__:1" - - # Test with spaces (should preserve as-is) - node_id = builder._create_node_id("my path/file.ts", "my func", 5) - assert node_id == "my path/file.ts:my func:5" - - def test_create_node_id_with_windows_path(self): - """Node ID should handle Windows-style paths.""" - from codexlens.lsp.lsp_graph_builder import LspGraphBuilder - - builder = LspGraphBuilder() - - # Windows path with backslashes - node_id = builder._create_node_id("C:\\Users\\test\\file.py", "main", 1) - assert "main" in node_id - assert "1" in node_id - - def test_create_node_id_with_unicode(self): - """Node ID should handle unicode characters.""" - from codexlens.lsp.lsp_graph_builder import LspGraphBuilder - - builder = LspGraphBuilder() - - # Unicode in name - node_id = builder._create_node_id("src/file.py", "func_name", 10) - assert node_id == "src/file.py:func_name:10" - - def test_code_symbol_node_id_format(self): - """CodeSymbolNode.create_id should match LspGraphBuilder format.""" - from codexlens.lsp.lsp_graph_builder import LspGraphBuilder - - builder = LspGraphBuilder() - - # Both should produce the same format - builder_id = builder._create_node_id("path/file.py", "func", 10) - symbol_id = CodeSymbolNode.create_id("path/file.py", "func", 10) - - assert builder_id == symbol_id - - -# --------------------------------------------------------------------------- -# P3 Tests - Additional Edge Cases (if time allows) -# --------------------------------------------------------------------------- - -class TestCacheLruEviction: - """Test LRU cache eviction behavior. - - Module: LspBridge._cache - Input: max_cache_size=3, add 5 entries - Assert: Only most recent 3 entries remain - """ - - def test_cache_evicts_oldest_entries(self): - """Cache should evict oldest entries when at capacity.""" - from codexlens.lsp.lsp_bridge import LspBridge - - bridge = LspBridge(max_cache_size=3) - - # Add 5 entries (exceeds max of 3) - for i in range(5): - bridge._cache(f"key_{i}", "test.py", f"data_{i}") - - # Should only have 3 entries - assert len(bridge.cache) == 3 - - # Oldest entries (key_0, key_1) should be evicted - assert "key_0" not in bridge.cache - assert "key_1" not in bridge.cache - - # Newest entries should remain - assert "key_2" in bridge.cache - assert "key_3" in bridge.cache - assert "key_4" in bridge.cache - - def test_cache_moves_accessed_entry_to_end(self): - """Accessing a cached entry should move it to end (LRU behavior).""" - from codexlens.lsp.lsp_bridge import LspBridge - - bridge = LspBridge(max_cache_size=3) - - # Add 3 entries - bridge._cache("key_0", "test.py", "data_0") - bridge._cache("key_1", "test.py", "data_1") - bridge._cache("key_2", "test.py", "data_2") - - # Access key_0 (should move to end) - with patch.object(bridge, '_get_file_mtime', return_value=0.0): - bridge._is_cached("key_0", "test.py") - - # Add new entry - key_1 should be evicted (was least recently used) - bridge._cache("key_3", "test.py", "data_3") - - assert len(bridge.cache) == 3 - assert "key_0" in bridge.cache # Was accessed, moved to end - assert "key_1" not in bridge.cache # Was evicted - assert "key_2" in bridge.cache - assert "key_3" in bridge.cache - - -class TestConcurrentCacheAccess: - """Test thread-safety of cache operations. - - Module: LspBridge - Test: Multiple concurrent requests access/update cache - Assert: No race conditions, cache remains consistent - """ - - @pytest.mark.asyncio - async def test_concurrent_cache_operations(self, valid_range: Range): - """Multiple concurrent requests should not corrupt cache.""" - from codexlens.lsp.lsp_bridge import LspBridge - - bridge = LspBridge(max_cache_size=100) - - async def cache_operation(i: int) -> None: - """Simulate a cache read/write operation.""" - key = f"key_{i % 10}" # Reuse keys to create contention - file_path = f"file_{i}.py" - - # Check cache - bridge._is_cached(key, file_path) - - # Small delay to increase contention likelihood - await asyncio.sleep(0.001) - - # Write to cache - bridge._cache(key, file_path, f"data_{i}") - - # Run many concurrent operations - tasks = [cache_operation(i) for i in range(50)] - await asyncio.gather(*tasks) - - # Cache should be in consistent state - assert len(bridge.cache) <= bridge.max_cache_size - - # All entries should be valid CacheEntry objects - for key, entry in bridge.cache.items(): - assert hasattr(entry, 'data') - assert hasattr(entry, 'cached_at') - assert hasattr(entry, 'file_mtime') - - -class TestGraphWithCircularReferences: - """Test graph handling of circular reference patterns. - - Module: LspGraphBuilder - Mock: A -> B -> C -> A circular reference - Assert: visited set prevents infinite loop - """ - - @pytest.mark.asyncio - async def test_circular_references_do_not_loop_infinitely(self, valid_range: Range): - """Circular references should not cause infinite loops.""" - from codexlens.lsp.lsp_bridge import Location - from codexlens.lsp.lsp_graph_builder import LspGraphBuilder - - # Create circular reference pattern: A -> B -> C -> A - symbol_a = CodeSymbolNode( - id="file.py:A:1", name="A", kind="function", - file_path="file.py", range=valid_range, - ) - symbol_b = CodeSymbolNode( - id="file.py:B:10", name="B", kind="function", - file_path="file.py", range=valid_range, - ) - symbol_c = CodeSymbolNode( - id="file.py:C:20", name="C", kind="function", - file_path="file.py", range=valid_range, - ) - - ref_map = { - "file.py:A:1": [Location(file_path="file.py", line=10, character=0)], # A -> B - "file.py:B:10": [Location(file_path="file.py", line=20, character=0)], # B -> C - "file.py:C:20": [Location(file_path="file.py", line=1, character=0)], # C -> A (circular) - } - - def get_refs(symbol: CodeSymbolNode) -> List[Location]: - return ref_map.get(symbol.id, []) - - mock_bridge = AsyncMock() - mock_bridge.get_references = AsyncMock(side_effect=get_refs) - mock_bridge.get_call_hierarchy = AsyncMock(return_value=[]) - mock_bridge.get_document_symbols = AsyncMock(return_value=[ - {"name": "A", "kind": 12, "range": {"start": {"line": 0}, "end": {"line": 5}}}, - {"name": "B", "kind": 12, "range": {"start": {"line": 9}, "end": {"line": 15}}}, - {"name": "C", "kind": 12, "range": {"start": {"line": 19}, "end": {"line": 25}}}, - ]) - - builder = LspGraphBuilder(max_depth=10, max_nodes=100) - - # This should complete without hanging - graph = await asyncio.wait_for( - builder.build_from_seeds([symbol_a], mock_bridge), - timeout=5.0 # Should complete quickly, timeout is just safety - ) - - # Graph should contain the nodes without duplicates - assert len(graph.nodes) >= 1 # At least the seed - # No infinite loop occurred (we reached this point) - - -class TestRequestTimeoutHandling: - """Test timeout handling in LSP requests (VSCode Bridge mode).""" - - @pytest.mark.asyncio - async def test_timeout_returns_none(self, sample_symbol: CodeSymbolNode): - """Request timeout should return None gracefully.""" - from codexlens.lsp.lsp_bridge import LspBridge - - bridge = LspBridge(timeout=0.001, use_vscode_bridge=True) # Very short timeout - - # Mock session to raise TimeoutError - mock_response = AsyncMock() - mock_response.__aenter__ = AsyncMock(side_effect=asyncio.TimeoutError()) - mock_response.__aexit__ = AsyncMock(return_value=None) - - mock_session = AsyncMock() - mock_session.post = MagicMock(return_value=mock_response) - - with patch.object(bridge, '_get_session', return_value=mock_session): - result = await bridge._request_vscode_bridge("get_references", {}) - - assert result is None - - -class TestConnectionRefusedHandling: - """Test handling when VSCode Bridge is not running.""" - - @pytest.mark.asyncio - async def test_connection_refused_returns_none(self): - """Connection refused should return None gracefully.""" - pytest.importorskip("aiohttp") - import aiohttp - from codexlens.lsp.lsp_bridge import LspBridge - - bridge = LspBridge(use_vscode_bridge=True) - - # Mock session to raise ClientConnectorError - mock_session = AsyncMock() - mock_session.post = MagicMock( - side_effect=aiohttp.ClientConnectorError( - MagicMock(), OSError("Connection refused") - ) - ) - - with patch.object(bridge, '_get_session', return_value=mock_session): - result = await bridge._request_vscode_bridge("get_references", {}) - - assert result is None - - -class TestInvalidLspResponses: - """Test handling of malformed LSP responses (VSCode Bridge mode).""" - - @pytest.mark.asyncio - async def test_malformed_json_response(self, sample_symbol: CodeSymbolNode): - """Malformed response should be handled gracefully.""" - from codexlens.lsp.lsp_bridge import LspBridge - - bridge = LspBridge(use_vscode_bridge=True) - - # Response without expected structure - with patch.object(bridge, '_request_vscode_bridge', return_value={"unexpected": "structure"}): - result = await bridge.get_references(sample_symbol) - - # Should return empty list, not crash - assert result == [] - - @pytest.mark.asyncio - async def test_null_result_in_response(self, sample_symbol: CodeSymbolNode): - """Null/None result should be handled gracefully.""" - from codexlens.lsp.lsp_bridge import LspBridge - - bridge = LspBridge(use_vscode_bridge=True) - - with patch.object(bridge, '_request_vscode_bridge', return_value=None): - refs = await bridge.get_references(sample_symbol) - defn = await bridge.get_definition(sample_symbol) - hover = await bridge.get_hover(sample_symbol) - - assert refs == [] - assert defn is None - assert hover is None - - -class TestLocationParsing: - """Test Location parsing from various LSP response formats.""" - - def test_location_from_file_uri_unix(self): - """Parse Location from Unix-style file:// URI.""" - from codexlens.lsp.lsp_bridge import Location - - data = { - "uri": "file:///home/user/project/file.py", - "range": { - "start": {"line": 9, "character": 4}, - "end": {"line": 9, "character": 10}, - } - } - - loc = Location.from_lsp_response(data) - - assert loc.file_path == "/home/user/project/file.py" - assert loc.line == 10 # Converted from 0-based to 1-based - assert loc.character == 5 - - def test_location_from_file_uri_windows(self): - """Parse Location from Windows-style file:// URI.""" - from codexlens.lsp.lsp_bridge import Location - - data = { - "uri": "file:///C:/Users/test/project/file.py", - "range": { - "start": {"line": 0, "character": 0}, - "end": {"line": 0, "character": 5}, - } - } - - loc = Location.from_lsp_response(data) - - assert loc.file_path == "C:/Users/test/project/file.py" - assert loc.line == 1 - assert loc.character == 1 - - def test_location_from_file_uri_windows_percent_encoded_drive(self): - """Parse Location from percent-encoded Windows drive URIs (pyright-style).""" - from codexlens.lsp.lsp_bridge import Location - - data = { - "uri": "file:///d%3A/Claude_dms3/codex-lens/src/codexlens/api/semantic.py", - "range": { - "start": {"line": 18, "character": 3}, - "end": {"line": 18, "character": 10}, - }, - } - - loc = Location.from_lsp_response(data) - - assert loc.file_path == "d:/Claude_dms3/codex-lens/src/codexlens/api/semantic.py" - assert loc.line == 19 # 0-based -> 1-based - assert loc.character == 4 - - def test_location_from_direct_fields(self): - """Parse Location from direct field format.""" - from codexlens.lsp.lsp_bridge import Location - - data = { - "file_path": "/path/to/file.py", - "line": 5, - "character": 10, - } - - loc = Location.from_lsp_response(data) - - assert loc.file_path == "/path/to/file.py" - assert loc.line == 5 - assert loc.character == 10 diff --git a/codex-lens/tests/unit/lsp/test_lsp_graph_builder.py b/codex-lens/tests/unit/lsp/test_lsp_graph_builder.py deleted file mode 100644 index 799fde85..00000000 --- a/codex-lens/tests/unit/lsp/test_lsp_graph_builder.py +++ /dev/null @@ -1,549 +0,0 @@ -"""Unit tests for LspGraphBuilder. - -This module tests the LspGraphBuilder class responsible for building -code association graphs by BFS expansion from seed symbols using LSP. -""" - -from __future__ import annotations - -import asyncio -from typing import Any, Dict, List -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - -from codexlens.hybrid_search.data_structures import ( - CallHierarchyItem, - CodeAssociationGraph, - CodeSymbolNode, - Range, -) -from codexlens.lsp.lsp_bridge import Location, LspBridge -from codexlens.lsp.lsp_graph_builder import LspGraphBuilder - - -@pytest.fixture -def mock_lsp_bridge() -> AsyncMock: - """Create a mock LspBridge with async methods.""" - bridge = AsyncMock(spec=LspBridge) - bridge.get_references = AsyncMock(return_value=[]) - bridge.get_call_hierarchy = AsyncMock(return_value=[]) - bridge.get_document_symbols = AsyncMock(return_value=[]) - return bridge - - -@pytest.fixture -def seed_nodes() -> List[CodeSymbolNode]: - """Create seed nodes for testing.""" - return [ - CodeSymbolNode( - id="main.py:main:1", - name="main", - kind="function", - file_path="main.py", - range=Range( - start_line=1, - start_character=0, - end_line=10, - end_character=0, - ), - ) - ] - - -@pytest.fixture -def reference_location() -> Location: - """Create a reference location for testing.""" - return Location( - file_path="utils.py", - line=5, - character=10, - ) - - -@pytest.fixture -def call_hierarchy_item() -> CallHierarchyItem: - """Create a call hierarchy item for testing.""" - return CallHierarchyItem( - name="caller_func", - kind="function", - file_path="caller.py", - range=Range( - start_line=20, - start_character=0, - end_line=30, - end_character=0, - ), - detail="Calls main()", - ) - - -class TestSingleLevelGraphExpansion: - """P0: Test single level graph expansion with max_depth=1.""" - - @pytest.mark.asyncio - async def test_single_level_graph_expansion( - self, - mock_lsp_bridge: AsyncMock, - seed_nodes: List[CodeSymbolNode], - reference_location: Location, - call_hierarchy_item: CallHierarchyItem, - ) -> None: - """Test BFS expansion at depth 1 produces correct graph structure. - - Input: max_depth=1, single seed node - Mock: LspBridge returns 1 reference + 1 incoming call for seed only - Assert: Graph contains 3 nodes (seed, ref, call) and 2 edges from seed - """ - call_count = {"refs": 0, "calls": 0} - - async def mock_get_references(node: CodeSymbolNode) -> List[Location]: - """Return references only for the seed node.""" - call_count["refs"] += 1 - if node.file_path == "main.py": - return [reference_location] - return [] # No references for expanded nodes - - async def mock_get_call_hierarchy(node: CodeSymbolNode) -> List[CallHierarchyItem]: - """Return call hierarchy only for the seed node.""" - call_count["calls"] += 1 - if node.file_path == "main.py": - return [call_hierarchy_item] - return [] # No call hierarchy for expanded nodes - - mock_lsp_bridge.get_references.side_effect = mock_get_references - mock_lsp_bridge.get_call_hierarchy.side_effect = mock_get_call_hierarchy - - # Mock document symbols to provide symbol info for locations - mock_lsp_bridge.get_document_symbols.return_value = [ - { - "name": "helper_func", - "kind": 12, # function - "range": { - "start": {"line": 4, "character": 0}, - "end": {"line": 10, "character": 0}, - }, - } - ] - - builder = LspGraphBuilder(max_depth=1, max_nodes=100, max_concurrent=10) - graph = await builder.build_from_seeds(seed_nodes, mock_lsp_bridge) - - # Verify graph structure - assert len(graph.nodes) == 3, f"Expected 3 nodes, got {len(graph.nodes)}: {list(graph.nodes.keys())}" - assert len(graph.edges) == 2, f"Expected 2 edges, got {len(graph.edges)}: {graph.edges}" - - # Verify seed node is present - assert "main.py:main:1" in graph.nodes - - # Verify edges exist with correct relationship types - edge_types = [edge[2] for edge in graph.edges] - assert "references" in edge_types, "Expected 'references' edge" - assert "calls" in edge_types, "Expected 'calls' edge" - - # Verify expansion was called for seed and expanded nodes - # (nodes at depth 1 should not be expanded beyond max_depth=1) - assert call_count["refs"] >= 1, "get_references should be called at least once" - - -class TestMaxNodesBoundary: - """P0: Test max_nodes boundary stops expansion.""" - - @pytest.mark.asyncio - async def test_max_nodes_boundary( - self, - mock_lsp_bridge: AsyncMock, - seed_nodes: List[CodeSymbolNode], - ) -> None: - """Test graph expansion stops when max_nodes is reached. - - Input: max_nodes=5 - Mock: LspBridge returns many references - Assert: Graph expansion stops at 5 nodes - """ - # Create many reference locations - many_refs = [ - Location(file_path=f"file{i}.py", line=i, character=0) - for i in range(20) - ] - mock_lsp_bridge.get_references.return_value = many_refs - mock_lsp_bridge.get_call_hierarchy.return_value = [] - mock_lsp_bridge.get_document_symbols.return_value = [] - - builder = LspGraphBuilder(max_depth=10, max_nodes=5, max_concurrent=10) - graph = await builder.build_from_seeds(seed_nodes, mock_lsp_bridge) - - # Verify node count does not exceed max_nodes - assert len(graph.nodes) <= 5, ( - f"Expected at most 5 nodes, got {len(graph.nodes)}" - ) - - -class TestMaxDepthBoundary: - """P1: Test max_depth boundary limits BFS expansion.""" - - @pytest.mark.asyncio - async def test_max_depth_boundary( - self, - mock_lsp_bridge: AsyncMock, - seed_nodes: List[CodeSymbolNode], - ) -> None: - """Test BFS queue does not add nodes beyond max_depth. - - Input: max_depth=2 - Mock: Multi-level expansion responses - Assert: BFS queue stops adding new nodes when depth > 2 - """ - # Track which depths are expanded - expanded_depths = set() - - def create_ref_for_depth(depth: int) -> Location: - return Location( - file_path=f"depth{depth}.py", - line=depth * 10 + 1, - character=0, - ) - - async def mock_get_references(node: CodeSymbolNode) -> List[Location]: - """Return references based on node's apparent depth.""" - # Determine which depth level this node represents - if node.file_path == "main.py": - expanded_depths.add(0) - return [create_ref_for_depth(1)] - elif "depth1" in node.file_path: - expanded_depths.add(1) - return [create_ref_for_depth(2)] - elif "depth2" in node.file_path: - expanded_depths.add(2) - return [create_ref_for_depth(3)] - elif "depth3" in node.file_path: - expanded_depths.add(3) - return [create_ref_for_depth(4)] - return [] - - mock_lsp_bridge.get_references.side_effect = mock_get_references - mock_lsp_bridge.get_call_hierarchy.return_value = [] - mock_lsp_bridge.get_document_symbols.return_value = [] - - builder = LspGraphBuilder(max_depth=2, max_nodes=100, max_concurrent=10) - graph = await builder.build_from_seeds(seed_nodes, mock_lsp_bridge) - - # Collect file paths from graph - node_files = [node.file_path for node in graph.nodes.values()] - - # Should have: seed (main.py), depth1 (from seed expansion), depth2 (from depth1 expansion) - # depth3 should only be added to graph but NOT expanded (depth > max_depth=2) - assert "main.py" in node_files, "Seed node should be in graph" - assert any("depth1" in f for f in node_files), "Depth 1 node should be in graph" - assert any("depth2" in f for f in node_files), "Depth 2 node should be in graph" - - # The depth3 node might be added to the graph (from depth2 expansion) - # but should NOT be expanded (no depth4 nodes should exist) - depth4_nodes = [f for f in node_files if "depth4" in f] - assert len(depth4_nodes) == 0, ( - f"Nodes beyond max_depth should not be expanded: {depth4_nodes}" - ) - - # Verify expansion didn't go to depth 3 (would mean depth4 nodes were created) - # The depth 3 node itself may be in the graph but shouldn't have been expanded - assert 3 not in expanded_depths or 4 not in expanded_depths, ( - f"Expansion should stop at max_depth, expanded depths: {expanded_depths}" - ) - - -class TestConcurrentSemaphore: - """P1: Test concurrent semaphore limits parallel expansion.""" - - @pytest.mark.asyncio - async def test_concurrent_semaphore( - self, - mock_lsp_bridge: AsyncMock, - ) -> None: - """Test that concurrent node expansions are limited by semaphore. - - Input: max_concurrent=3, 10 nodes in queue - Assert: Simultaneous _expand_node calls never exceed 3 - """ - concurrent_count = {"current": 0, "max_seen": 0} - lock = asyncio.Lock() - - # Create multiple seed nodes - seeds = [ - CodeSymbolNode( - id=f"file{i}.py:func{i}:{i}", - name=f"func{i}", - kind="function", - file_path=f"file{i}.py", - range=Range( - start_line=i, - start_character=0, - end_line=i + 10, - end_character=0, - ), - ) - for i in range(10) - ] - - original_get_refs = mock_lsp_bridge.get_references - - async def tracked_get_references(node: CodeSymbolNode) -> List[Location]: - """Track concurrent calls to verify semaphore behavior.""" - async with lock: - concurrent_count["current"] += 1 - if concurrent_count["current"] > concurrent_count["max_seen"]: - concurrent_count["max_seen"] = concurrent_count["current"] - - # Simulate some work - await asyncio.sleep(0.01) - - async with lock: - concurrent_count["current"] -= 1 - - return [] - - mock_lsp_bridge.get_references.side_effect = tracked_get_references - mock_lsp_bridge.get_call_hierarchy.return_value = [] - mock_lsp_bridge.get_document_symbols.return_value = [] - - builder = LspGraphBuilder(max_depth=1, max_nodes=100, max_concurrent=3) - await builder.build_from_seeds(seeds, mock_lsp_bridge) - - # Verify concurrent calls never exceeded max_concurrent - assert concurrent_count["max_seen"] <= 3, ( - f"Max concurrent calls ({concurrent_count['max_seen']}) exceeded limit (3)" - ) - - -class TestDocumentSymbolCache: - """P1: Test document symbol caching for same file locations.""" - - @pytest.mark.asyncio - async def test_document_symbol_cache( - self, - mock_lsp_bridge: AsyncMock, - seed_nodes: List[CodeSymbolNode], - ) -> None: - """Test that document symbols are cached per file. - - Input: 2 locations from the same file - Mock: get_document_symbols only called once - Assert: Second location lookup uses cache - """ - # Two references from the same file - refs_same_file = [ - Location(file_path="shared.py", line=10, character=0), - Location(file_path="shared.py", line=20, character=0), - ] - - mock_lsp_bridge.get_references.return_value = refs_same_file - mock_lsp_bridge.get_call_hierarchy.return_value = [] - - doc_symbols_call_count = {"count": 0} - - async def mock_get_document_symbols(file_path: str) -> List[Dict[str, Any]]: - doc_symbols_call_count["count"] += 1 - return [ - { - "name": "symbol_at_10", - "kind": 12, - "range": { - "start": {"line": 9, "character": 0}, - "end": {"line": 15, "character": 0}, - }, - }, - { - "name": "symbol_at_20", - "kind": 12, - "range": { - "start": {"line": 19, "character": 0}, - "end": {"line": 25, "character": 0}, - }, - }, - ] - - mock_lsp_bridge.get_document_symbols.side_effect = mock_get_document_symbols - - builder = LspGraphBuilder(max_depth=1, max_nodes=100, max_concurrent=10) - await builder.build_from_seeds(seed_nodes, mock_lsp_bridge) - - # get_document_symbols should be called only once for shared.py - assert doc_symbols_call_count["count"] == 1, ( - f"Expected 1 call to get_document_symbols, got {doc_symbols_call_count['count']}" - ) - - # Verify cache contains the file - assert "shared.py" in builder._document_symbols_cache - - @pytest.mark.asyncio - async def test_cache_cleared_between_builds( - self, - mock_lsp_bridge: AsyncMock, - seed_nodes: List[CodeSymbolNode], - ) -> None: - """Test that clear_cache removes cached document symbols.""" - mock_lsp_bridge.get_references.return_value = [] - mock_lsp_bridge.get_call_hierarchy.return_value = [] - mock_lsp_bridge.get_document_symbols.return_value = [] - - builder = LspGraphBuilder(max_depth=1, max_nodes=100, max_concurrent=10) - - # Manually populate cache - builder._document_symbols_cache["test.py"] = [{"name": "cached"}] - - # Clear cache - builder.clear_cache() - - # Verify cache is empty - assert len(builder._document_symbols_cache) == 0 - - -class TestNodeExpansionErrorHandling: - """P2: Test error handling during node expansion.""" - - @pytest.mark.asyncio - async def test_node_expansion_error_handling( - self, - mock_lsp_bridge: AsyncMock, - ) -> None: - """Test that errors in node expansion are logged and other nodes continue. - - Mock: get_references throws exception for specific node - Assert: Error is logged, other nodes continue expanding - """ - seeds = [ - CodeSymbolNode( - id="good.py:good:1", - name="good", - kind="function", - file_path="good.py", - range=Range(start_line=1, start_character=0, end_line=10, end_character=0), - ), - CodeSymbolNode( - id="bad.py:bad:1", - name="bad", - kind="function", - file_path="bad.py", - range=Range(start_line=1, start_character=0, end_line=10, end_character=0), - ), - ] - - async def mock_get_references(node: CodeSymbolNode) -> List[Location]: - if "bad" in node.file_path: - raise RuntimeError("Simulated LSP error") - return [Location(file_path="result.py", line=5, character=0)] - - mock_lsp_bridge.get_references.side_effect = mock_get_references - mock_lsp_bridge.get_call_hierarchy.return_value = [] - mock_lsp_bridge.get_document_symbols.return_value = [] - - builder = LspGraphBuilder(max_depth=1, max_nodes=100, max_concurrent=10) - - # Should not raise, error should be caught and logged - graph = await builder.build_from_seeds(seeds, mock_lsp_bridge) - - # Both seed nodes should be in the graph - assert "good.py:good:1" in graph.nodes - assert "bad.py:bad:1" in graph.nodes - - # The good node's expansion should have succeeded - # (result.py node should be present) - result_nodes = [n for n in graph.nodes.keys() if "result.py" in n] - assert len(result_nodes) >= 1, "Good node's expansion should have succeeded" - - @pytest.mark.asyncio - async def test_partial_failure_continues_expansion( - self, - mock_lsp_bridge: AsyncMock, - seed_nodes: List[CodeSymbolNode], - ) -> None: - """Test that failure in one LSP call doesn't stop other calls.""" - # References succeed, call hierarchy fails - mock_lsp_bridge.get_references.return_value = [ - Location(file_path="ref.py", line=5, character=0) - ] - mock_lsp_bridge.get_call_hierarchy.side_effect = RuntimeError("Call hierarchy failed") - mock_lsp_bridge.get_document_symbols.return_value = [] - - builder = LspGraphBuilder(max_depth=1, max_nodes=100, max_concurrent=10) - graph = await builder.build_from_seeds(seed_nodes, mock_lsp_bridge) - - # Should still have the seed and the reference node - assert len(graph.nodes) >= 2 - - # Reference edge should exist - ref_edges = [e for e in graph.edges if e[2] == "references"] - assert len(ref_edges) >= 1, "Reference edge should exist despite call hierarchy failure" - - -class TestEdgeCases: - """Additional edge case tests.""" - - @pytest.mark.asyncio - async def test_empty_seeds( - self, - mock_lsp_bridge: AsyncMock, - ) -> None: - """Test building graph with empty seed list.""" - builder = LspGraphBuilder(max_depth=2, max_nodes=100, max_concurrent=10) - graph = await builder.build_from_seeds([], mock_lsp_bridge) - - assert len(graph.nodes) == 0 - assert len(graph.edges) == 0 - - @pytest.mark.asyncio - async def test_self_referencing_node_skipped( - self, - mock_lsp_bridge: AsyncMock, - seed_nodes: List[CodeSymbolNode], - ) -> None: - """Test that self-references don't create self-loops.""" - # Reference back to the same node - mock_lsp_bridge.get_references.return_value = [ - Location(file_path="main.py", line=1, character=0) # Same as seed - ] - mock_lsp_bridge.get_call_hierarchy.return_value = [] - mock_lsp_bridge.get_document_symbols.return_value = [ - { - "name": "main", - "kind": 12, - "range": { - "start": {"line": 0, "character": 0}, - "end": {"line": 9, "character": 0}, - }, - } - ] - - builder = LspGraphBuilder(max_depth=1, max_nodes=100, max_concurrent=10) - graph = await builder.build_from_seeds(seed_nodes, mock_lsp_bridge) - - # Should only have the seed node, no self-loop edge - # (Note: depending on implementation, self-references may be filtered) - self_edges = [e for e in graph.edges if e[0] == e[1]] - assert len(self_edges) == 0, "Self-referencing edges should not exist" - - @pytest.mark.asyncio - async def test_visited_nodes_not_expanded_twice( - self, - mock_lsp_bridge: AsyncMock, - seed_nodes: List[CodeSymbolNode], - ) -> None: - """Test that visited nodes are not expanded multiple times.""" - expansion_calls = {"count": 0} - - async def mock_get_references(node: CodeSymbolNode) -> List[Location]: - expansion_calls["count"] += 1 - # Return same node reference each time - return [Location(file_path="shared.py", line=10, character=0)] - - mock_lsp_bridge.get_references.side_effect = mock_get_references - mock_lsp_bridge.get_call_hierarchy.return_value = [] - mock_lsp_bridge.get_document_symbols.return_value = [] - - builder = LspGraphBuilder(max_depth=3, max_nodes=100, max_concurrent=10) - await builder.build_from_seeds(seed_nodes, mock_lsp_bridge) - - # Each unique node should only be expanded once - # seed (main.py) + shared.py = 2 expansions max - assert expansion_calls["count"] <= 2, ( - f"Nodes should not be expanded multiple times, got {expansion_calls['count']} calls" - ) diff --git a/codex-lens/tests/validate_optimizations.py b/codex-lens/tests/validate_optimizations.py deleted file mode 100644 index a8445a9d..00000000 --- a/codex-lens/tests/validate_optimizations.py +++ /dev/null @@ -1,287 +0,0 @@ -""" -Manual validation script for performance optimizations. - -This script verifies that the optimization implementations are working correctly. -Run with: python tests/validate_optimizations.py -""" - -import json -import sqlite3 -import tempfile -import time -from pathlib import Path - -from codexlens.storage.dir_index import DirIndexStore -from codexlens.storage.registry import RegistryStore -from codexlens.storage.migration_manager import MigrationManager -from codexlens.storage.migrations import migration_001_normalize_keywords - - -def test_keyword_normalization(): - """Test normalized keywords functionality.""" - print("\n=== Testing Keyword Normalization ===") - - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "test_index.db" - store = DirIndexStore(db_path) - store.initialize() # Create schema - - # Add a test file - # Note: add_file automatically calculates mtime and line_count - file_id = store.add_file( - name="test.py", - full_path=Path("/test/test.py"), - content="def hello(): pass", - language="python" - ) - - # Add semantic metadata with keywords - keywords = ["auth", "security", "jwt"] - store.add_semantic_metadata( - file_id=file_id, - summary="Test summary", - keywords=keywords, - purpose="Testing", - llm_tool="gemini" - ) - - conn = store._get_connection() - - # Verify keywords table populated - keyword_rows = conn.execute(""" - SELECT k.keyword - FROM file_keywords fk - JOIN keywords k ON fk.keyword_id = k.id - WHERE fk.file_id = ? - """, (file_id,)).fetchall() - - normalized_keywords = [row["keyword"] for row in keyword_rows] - print(f"✓ Keywords stored in normalized tables: {normalized_keywords}") - assert set(normalized_keywords) == set(keywords), "Keywords mismatch!" - - # Test optimized search - results = store.search_semantic_keywords("auth", use_normalized=True) - print(f"✓ Found {len(results)} file(s) with keyword 'auth'") - assert len(results) > 0, "No results found!" - - # Test fallback search - results_fallback = store.search_semantic_keywords("auth", use_normalized=False) - print(f"✓ Fallback search found {len(results_fallback)} file(s)") - assert len(results) == len(results_fallback), "Result count mismatch!" - - store.close() - print("✓ Keyword normalization tests PASSED") - - -def test_path_lookup_optimization(): - """Test optimized path lookup.""" - print("\n=== Testing Path Lookup Optimization ===") - - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "test_registry.db" - store = RegistryStore(db_path) - - # Add directory mapping - store.add_dir_mapping( - source_path=Path("/a/b/c"), - index_path=Path("/tmp/index.db"), - project_id=None - ) - - # Test deep path lookup - deep_path = Path("/a/b/c/d/e/f/g/h/i/j/file.py") - - start = time.perf_counter() - result = store.find_nearest_index(deep_path) - elapsed = time.perf_counter() - start - - print(f"✓ Found nearest index in {elapsed*1000:.2f}ms") - assert result is not None, "No result found!" - assert result.source_path == Path("/a/b/c"), "Wrong path found!" - assert elapsed < 0.05, f"Too slow: {elapsed*1000:.2f}ms" - - store.close() - print("✓ Path lookup optimization tests PASSED") - - -def test_symbol_search_prefix_mode(): - """Test symbol search with prefix mode.""" - print("\n=== Testing Symbol Search Prefix Mode ===") - - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "test_index.db" - store = DirIndexStore(db_path) - store.initialize() # Create schema - - # Add a test file - file_id = store.add_file( - name="test.py", - full_path=Path("/test/test.py"), - content="def hello(): pass\n" * 10, # 10 lines - language="python" - ) - - # Add symbols - store.add_symbols( - file_id=file_id, - symbols=[ - ("get_user", "function", 1, 5), - ("get_item", "function", 6, 10), - ("create_user", "function", 11, 15), - ("UserClass", "class", 16, 25), - ] - ) - - # Test prefix search - results = store.search_symbols("get", prefix_mode=True) - print(f"✓ Prefix search for 'get' found {len(results)} symbol(s)") - assert len(results) == 2, f"Expected 2 symbols, got {len(results)}" - for symbol in results: - assert symbol.name.startswith("get"), f"Symbol {symbol.name} doesn't start with 'get'" - print(f" Symbols: {[s.name for s in results]}") - - # Test substring search - results_sub = store.search_symbols("user", prefix_mode=False) - print(f"✓ Substring search for 'user' found {len(results_sub)} symbol(s)") - assert len(results_sub) == 3, f"Expected 3 symbols, got {len(results_sub)}" - print(f" Symbols: {[s.name for s in results_sub]}") - - store.close() - print("✓ Symbol search optimization tests PASSED") - - -def test_migration_001(): - """Test migration_001 execution.""" - print("\n=== Testing Migration 001 ===") - - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "test_index.db" - store = DirIndexStore(db_path) - store.initialize() # Create schema - conn = store._get_connection() - - # Add test data to semantic_metadata - conn.execute(""" - INSERT INTO files(id, name, full_path, language, mtime, line_count) - VALUES(1, 'test.py', '/test.py', 'python', 0, 10) - """) - conn.execute(""" - INSERT INTO semantic_metadata(file_id, keywords) - VALUES(1, ?) - """, (json.dumps(["test", "migration", "keyword"]),)) - conn.commit() - - # Run migration - print(" Running migration_001...") - migration_001_normalize_keywords.upgrade(conn) - print(" Migration completed successfully") - - # Verify migration results - keyword_count = conn.execute(""" - SELECT COUNT(*) as c FROM file_keywords WHERE file_id=1 - """).fetchone()["c"] - - print(f"✓ Migrated {keyword_count} keywords for file_id=1") - assert keyword_count == 3, f"Expected 3 keywords, got {keyword_count}" - - # Verify keywords table - keywords = conn.execute(""" - SELECT k.keyword FROM keywords k - JOIN file_keywords fk ON k.id = fk.keyword_id - WHERE fk.file_id = 1 - """).fetchall() - keyword_list = [row["keyword"] for row in keywords] - print(f" Keywords: {keyword_list}") - - store.close() - print("✓ Migration 001 tests PASSED") - - -def test_performance_comparison(): - """Compare performance of optimized vs fallback implementations.""" - print("\n=== Performance Comparison ===") - - with tempfile.TemporaryDirectory() as tmpdir: - db_path = Path(tmpdir) / "test_index.db" - store = DirIndexStore(db_path) - store.initialize() # Create schema - - # Create test data - print(" Creating test data...") - for i in range(100): - file_id = store.add_file( - name=f"file_{i}.py", - full_path=Path(f"/test/file_{i}.py"), - content=f"def function_{i}(): pass", - language="python" - ) - - # Vary keywords - if i % 3 == 0: - keywords = ["auth", "security"] - elif i % 3 == 1: - keywords = ["database", "query"] - else: - keywords = ["api", "endpoint"] - - store.add_semantic_metadata( - file_id=file_id, - summary=f"File {i}", - keywords=keywords, - purpose="Testing", - llm_tool="gemini" - ) - - # Benchmark normalized search - print(" Benchmarking normalized search...") - start = time.perf_counter() - for _ in range(10): - results_norm = store.search_semantic_keywords("auth", use_normalized=True) - norm_time = time.perf_counter() - start - - # Benchmark fallback search - print(" Benchmarking fallback search...") - start = time.perf_counter() - for _ in range(10): - results_fallback = store.search_semantic_keywords("auth", use_normalized=False) - fallback_time = time.perf_counter() - start - - print(f"\n Results:") - print(f" - Normalized search: {norm_time*1000:.2f}ms (10 iterations)") - print(f" - Fallback search: {fallback_time*1000:.2f}ms (10 iterations)") - print(f" - Speedup factor: {fallback_time/norm_time:.2f}x") - print(f" - Both found {len(results_norm)} files") - - assert len(results_norm) == len(results_fallback), "Result count mismatch!" - - store.close() - print("✓ Performance comparison PASSED") - - -def main(): - """Run all validation tests.""" - print("=" * 60) - print("CodexLens Performance Optimizations Validation") - print("=" * 60) - - try: - test_keyword_normalization() - test_path_lookup_optimization() - test_symbol_search_prefix_mode() - test_migration_001() - test_performance_comparison() - - print("\n" + "=" * 60) - print("✓✓✓ ALL VALIDATION TESTS PASSED ✓✓✓") - print("=" * 60) - return 0 - - except Exception as e: - print(f"\nX VALIDATION FAILED: {e}") - import traceback - traceback.print_exc() - return 1 - - -if __name__ == "__main__": - exit(main()) diff --git a/codex-lens/verify_watcher.py b/codex-lens/verify_watcher.py deleted file mode 100644 index f64ff089..00000000 --- a/codex-lens/verify_watcher.py +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/env python3 -"""Verification script for FileWatcher event filtering and debouncing.""" - -import time -from pathlib import Path -from codexlens.watcher.file_watcher import FileWatcher -from codexlens.watcher.events import WatcherConfig, FileEvent - -def test_should_index_file(): - """Test _should_index_file filtering logic.""" - print("Testing _should_index_file filtering...") - - # Create watcher instance - config = WatcherConfig() - watcher = FileWatcher( - root_path=Path("."), - config=config, - on_changes=lambda events: None, - ) - - # Test cases - test_cases = [ - # (path, expected_result, description) - (Path("test.py"), True, "Python file should be indexed"), - (Path("test.txt"), True, "Text file should be indexed"), - (Path("test.js"), True, "JavaScript file should be indexed"), - (Path("test.ts"), True, "TypeScript file should be indexed"), - (Path("src/test.py"), True, "Python file in subdirectory should be indexed"), - (Path(".git/config"), False, ".git files should be filtered"), - (Path("node_modules/pkg/index.js"), False, "node_modules should be filtered"), - (Path("__pycache__/test.pyc"), False, "__pycache__ should be filtered"), - (Path(".venv/lib/test.py"), False, ".venv should be filtered"), - (Path("test.unknown"), False, "Unknown extension should be filtered"), - (Path("README.md"), True, "Markdown file should be indexed"), - ] - - passed = 0 - failed = 0 - - for path, expected, description in test_cases: - result = watcher._should_index_file(path) - status = "✓" if result == expected else "✗" - - if result == expected: - passed += 1 - else: - failed += 1 - - print(f" {status} {description}") - print(f" Path: {path}, Expected: {expected}, Got: {result}") - - print(f"\nResults: {passed} passed, {failed} failed") - return failed == 0 - -def test_debounce_and_dedup(): - """Test event debouncing and deduplication.""" - print("\nTesting event debouncing and deduplication...") - - received_events = [] - - def on_changes(events): - received_events.append(events) - print(f" Received batch: {len(events)} events") - - # Create watcher with short debounce time for testing - config = WatcherConfig(debounce_ms=500) - watcher = FileWatcher( - root_path=Path("."), - config=config, - on_changes=on_changes, - ) - - # Simulate rapid events to same file (should be deduplicated) - from codexlens.watcher.events import ChangeType - - test_path = Path("test_file.py") - for i in range(5): - event = FileEvent( - path=test_path, - change_type=ChangeType.MODIFIED, - timestamp=time.time(), - ) - watcher._on_raw_event(event) - - # Wait for debounce - time.sleep(0.6) - - # Force flush to ensure we get the events - watcher._flush_events() - - if received_events: - batch = received_events[0] - # Should deduplicate 5 events to 1 - if len(batch) == 1: - print(" ✓ Deduplication working: 5 events reduced to 1") - return True - else: - print(f" ✗ Deduplication failed: expected 1 event, got {len(batch)}") - return False - else: - print(" ✗ No events received") - return False - -if __name__ == "__main__": - print("=" * 60) - print("FileWatcher Verification") - print("=" * 60) - - test1 = test_should_index_file() - test2 = test_debounce_and_dedup() - - print("\n" + "=" * 60) - if test1 and test2: - print("✓ All tests passed!") - else: - print("✗ Some tests failed") - print("=" * 60)