Add comprehensive tests for schema cleanup migration and search comparison

- Implement tests for migration 005 to verify removal of deprecated fields in the database schema.
- Ensure that new databases are created with a clean schema.
- Validate that keywords are correctly extracted from the normalized file_keywords table.
- Test symbol insertion without deprecated fields and subdir operations without direct_files.
- Create a detailed search comparison test to evaluate vector search vs hybrid search performance.
- Add a script for reindexing projects to extract code relationships and verify GraphAnalyzer functionality.
- Include a test script to check TreeSitter parser availability and relationship extraction from sample files.
This commit is contained in:
catlog22
2025-12-16 19:27:05 +08:00
parent 3da0ef2adb
commit df23975a0b
61 changed files with 13114 additions and 366 deletions

View File

@@ -18,3 +18,7 @@ Requires-Dist: pathspec>=0.11
Provides-Extra: semantic
Requires-Dist: numpy>=1.24; extra == "semantic"
Requires-Dist: fastembed>=0.2; extra == "semantic"
Provides-Extra: encoding
Requires-Dist: chardet>=5.0; extra == "encoding"
Provides-Extra: full
Requires-Dist: tiktoken>=0.5.0; extra == "full"

View File

@@ -11,15 +11,23 @@ src/codexlens/entities.py
src/codexlens/errors.py
src/codexlens/cli/__init__.py
src/codexlens/cli/commands.py
src/codexlens/cli/model_manager.py
src/codexlens/cli/output.py
src/codexlens/parsers/__init__.py
src/codexlens/parsers/encoding.py
src/codexlens/parsers/factory.py
src/codexlens/parsers/tokenizer.py
src/codexlens/parsers/treesitter_parser.py
src/codexlens/search/__init__.py
src/codexlens/search/chain_search.py
src/codexlens/search/hybrid_search.py
src/codexlens/search/query_parser.py
src/codexlens/search/ranking.py
src/codexlens/semantic/__init__.py
src/codexlens/semantic/chunker.py
src/codexlens/semantic/code_extractor.py
src/codexlens/semantic/embedder.py
src/codexlens/semantic/graph_analyzer.py
src/codexlens/semantic/llm_enhancer.py
src/codexlens/semantic/vector_store.py
src/codexlens/storage/__init__.py
@@ -30,21 +38,45 @@ src/codexlens/storage/migration_manager.py
src/codexlens/storage/path_mapper.py
src/codexlens/storage/registry.py
src/codexlens/storage/sqlite_store.py
src/codexlens/storage/sqlite_utils.py
src/codexlens/storage/migrations/__init__.py
src/codexlens/storage/migrations/migration_001_normalize_keywords.py
src/codexlens/storage/migrations/migration_002_add_token_metadata.py
src/codexlens/storage/migrations/migration_003_code_relationships.py
src/codexlens/storage/migrations/migration_004_dual_fts.py
src/codexlens/storage/migrations/migration_005_cleanup_unused_fields.py
tests/test_chain_search_engine.py
tests/test_cli_hybrid_search.py
tests/test_cli_output.py
tests/test_code_extractor.py
tests/test_config.py
tests/test_dual_fts.py
tests/test_encoding.py
tests/test_entities.py
tests/test_errors.py
tests/test_file_cache.py
tests/test_graph_analyzer.py
tests/test_graph_cli.py
tests/test_graph_storage.py
tests/test_hybrid_chunker.py
tests/test_hybrid_search_e2e.py
tests/test_incremental_indexing.py
tests/test_llm_enhancer.py
tests/test_parser_integration.py
tests/test_parsers.py
tests/test_performance_optimizations.py
tests/test_query_parser.py
tests/test_rrf_fusion.py
tests/test_schema_cleanup_migration.py
tests/test_search_comprehensive.py
tests/test_search_full_coverage.py
tests/test_search_performance.py
tests/test_semantic.py
tests/test_semantic_search.py
tests/test_storage.py
tests/test_token_chunking.py
tests/test_token_storage.py
tests/test_tokenizer.py
tests/test_tokenizer_performance.py
tests/test_treesitter_parser.py
tests/test_vector_search_full.py

View File

@@ -7,6 +7,12 @@ tree-sitter-javascript>=0.25
tree-sitter-typescript>=0.23
pathspec>=0.11
[encoding]
chardet>=5.0
[full]
tiktoken>=0.5.0
[semantic]
numpy>=1.24
fastembed>=0.2