Add graph expansion and cross-encoder reranking features

- Implemented GraphExpander to enhance search results with related symbols using precomputed neighbors.
- Added CrossEncoderReranker for second-stage search ranking, allowing for improved result scoring.
- Created migrations to establish necessary database tables for relationships and graph neighbors.
- Developed tests for graph expansion functionality, ensuring related results are populated correctly.
- Enhanced performance benchmarks for cross-encoder reranking latency and graph expansion overhead.
- Updated schema cleanup tests to reflect changes in versioning and deprecated fields.
- Added new test cases for Treesitter parser to validate relationship extraction with alias resolution.
This commit is contained in:
catlog22
2025-12-31 16:58:59 +08:00
parent 4bde13e83a
commit 31a45f1f30
27 changed files with 2566 additions and 97 deletions

View File

@@ -19,7 +19,7 @@ from codexlens.entities import Symbol
class TestSchemaCleanupMigration:
"""Test schema cleanup migration (v4 -> v5)."""
"""Test schema cleanup migration (v4 -> latest)."""
def test_migration_from_v4_to_v5(self):
"""Test that migration successfully removes deprecated fields."""
@@ -129,10 +129,12 @@ class TestSchemaCleanupMigration:
# Now initialize store - this should trigger migration
store.initialize()
# Verify schema version is now 5
# Verify schema version is now the latest
conn = store._get_connection()
version_row = conn.execute("PRAGMA user_version").fetchone()
assert version_row[0] == 5, f"Expected schema version 5, got {version_row[0]}"
assert version_row[0] == DirIndexStore.SCHEMA_VERSION, (
f"Expected schema version {DirIndexStore.SCHEMA_VERSION}, got {version_row[0]}"
)
# Check that deprecated columns are removed
# 1. Check semantic_metadata doesn't have keywords column
@@ -166,7 +168,7 @@ class TestSchemaCleanupMigration:
store.close()
def test_new_database_has_clean_schema(self):
"""Test that new databases are created with clean schema (v5)."""
"""Test that new databases are created with clean schema (latest)."""
with tempfile.TemporaryDirectory() as tmpdir:
db_path = Path(tmpdir) / "_index.db"
store = DirIndexStore(db_path)
@@ -174,9 +176,9 @@ class TestSchemaCleanupMigration:
conn = store._get_connection()
# Verify schema version is 5
# Verify schema version is the latest
version_row = conn.execute("PRAGMA user_version").fetchone()
assert version_row[0] == 5
assert version_row[0] == DirIndexStore.SCHEMA_VERSION
# Check that new schema doesn't have deprecated columns
cursor = conn.execute("PRAGMA table_info(semantic_metadata)")