Add graph expansion and cross-encoder reranking features

- Implemented GraphExpander to enhance search results with related symbols using precomputed neighbors.
- Added CrossEncoderReranker for second-stage search ranking, allowing for improved result scoring.
- Created migrations to establish necessary database tables for relationships and graph neighbors.
- Developed tests for graph expansion functionality, ensuring related results are populated correctly.
- Enhanced performance benchmarks for cross-encoder reranking latency and graph expansion overhead.
- Updated schema cleanup tests to reflect changes in versioning and deprecated fields.
- Added new test cases for Treesitter parser to validate relationship extraction with alias resolution.
This commit is contained in:
catlog22
2025-12-31 16:58:59 +08:00
parent 4bde13e83a
commit 31a45f1f30
27 changed files with 2566 additions and 97 deletions

View File

@@ -110,6 +110,37 @@ class DataProcessor:
assert result is not None
assert len(result.symbols) == 0
def test_extracts_relationships_with_alias_resolution(self):
parser = TreeSitterSymbolParser("python")
code = """
import os.path as osp
from math import sqrt as sq
class Base:
pass
class Child(Base):
pass
def main():
osp.join("a", "b")
sq(4)
"""
result = parser.parse(code, Path("test.py"))
assert result is not None
rels = [r for r in result.relationships if r.source_symbol == "main"]
targets = {r.target_symbol for r in rels if r.relationship_type.value == "calls"}
assert "os.path.join" in targets
assert "math.sqrt" in targets
inherits = [
r for r in result.relationships
if r.source_symbol == "Child" and r.relationship_type.value == "inherits"
]
assert any(r.target_symbol == "Base" for r in inherits)
@pytest.mark.skipif(not TREE_SITTER_AVAILABLE, reason="tree-sitter not installed")
class TestTreeSitterJavaScriptParser:
@@ -175,6 +206,22 @@ export const arrowFunc = () => {}
assert "exported" in names
assert "arrowFunc" in names
def test_extracts_relationships_with_import_alias(self):
parser = TreeSitterSymbolParser("javascript")
code = """
import { readFile as rf } from "fs";
function main() {
rf("a");
}
"""
result = parser.parse(code, Path("test.js"))
assert result is not None
rels = [r for r in result.relationships if r.source_symbol == "main"]
targets = {r.target_symbol for r in rels if r.relationship_type.value == "calls"}
assert "fs.readFile" in targets
@pytest.mark.skipif(not TREE_SITTER_AVAILABLE, reason="tree-sitter not installed")
class TestTreeSitterTypeScriptParser: