Files
Claude-Code-Workflow/codex-lens/tests/test_graph_analyzer.py
catlog22 0fe16963cd Add comprehensive tests for tokenizer, performance benchmarks, and TreeSitter parser functionality
- Implemented unit tests for the Tokenizer class, covering various text inputs, edge cases, and fallback mechanisms.
- Created performance benchmarks comparing tiktoken and pure Python implementations for token counting.
- Developed extensive tests for TreeSitterSymbolParser across Python, JavaScript, and TypeScript, ensuring accurate symbol extraction and parsing.
- Added configuration documentation for MCP integration and custom prompts, enhancing usability and flexibility.
- Introduced a refactor script for GraphAnalyzer to streamline future improvements.
2025-12-15 14:36:09 +08:00

436 lines
12 KiB
Python

"""Tests for GraphAnalyzer - code relationship extraction."""
from pathlib import Path
import pytest
from codexlens.semantic.graph_analyzer import GraphAnalyzer
TREE_SITTER_PYTHON_AVAILABLE = True
try:
import tree_sitter_python # type: ignore[import-not-found] # noqa: F401
except Exception:
TREE_SITTER_PYTHON_AVAILABLE = False
TREE_SITTER_JS_AVAILABLE = True
try:
import tree_sitter_javascript # type: ignore[import-not-found] # noqa: F401
except Exception:
TREE_SITTER_JS_AVAILABLE = False
@pytest.mark.skipif(not TREE_SITTER_PYTHON_AVAILABLE, reason="tree-sitter-python not installed")
class TestPythonGraphAnalyzer:
"""Tests for Python relationship extraction."""
def test_simple_function_call(self):
"""Test extraction of simple function call."""
code = """def helper():
pass
def main():
helper()
"""
analyzer = GraphAnalyzer("python")
relationships = analyzer.analyze_file(code, Path("test.py"))
# Should find main -> helper call
assert len(relationships) == 1
rel = relationships[0]
assert rel.source_symbol == "main"
assert rel.target_symbol == "helper"
assert rel.relationship_type == "call"
assert rel.source_line == 5
def test_multiple_calls_in_function(self):
"""Test extraction of multiple calls from same function."""
code = """def foo():
pass
def bar():
pass
def main():
foo()
bar()
"""
analyzer = GraphAnalyzer("python")
relationships = analyzer.analyze_file(code, Path("test.py"))
# Should find main -> foo and main -> bar
assert len(relationships) == 2
targets = {rel.target_symbol for rel in relationships}
assert targets == {"foo", "bar"}
assert all(rel.source_symbol == "main" for rel in relationships)
def test_nested_function_calls(self):
"""Test extraction of calls from nested functions."""
code = """def inner_helper():
pass
def outer():
def inner():
inner_helper()
inner()
"""
analyzer = GraphAnalyzer("python")
relationships = analyzer.analyze_file(code, Path("test.py"))
# Should find inner -> inner_helper and outer -> inner
assert len(relationships) == 2
call_pairs = {(rel.source_symbol, rel.target_symbol) for rel in relationships}
assert ("inner", "inner_helper") in call_pairs
assert ("outer", "inner") in call_pairs
def test_method_call_in_class(self):
"""Test extraction of method calls within class."""
code = """class Calculator:
def add(self, a, b):
return a + b
def compute(self, x, y):
result = self.add(x, y)
return result
"""
analyzer = GraphAnalyzer("python")
relationships = analyzer.analyze_file(code, Path("test.py"))
# Should find compute -> add
assert len(relationships) == 1
rel = relationships[0]
assert rel.source_symbol == "compute"
assert rel.target_symbol == "add"
def test_module_level_call(self):
"""Test extraction of module-level function calls."""
code = """def setup():
pass
setup()
"""
analyzer = GraphAnalyzer("python")
relationships = analyzer.analyze_file(code, Path("test.py"))
# Should find <module> -> setup
assert len(relationships) == 1
rel = relationships[0]
assert rel.source_symbol == "<module>"
assert rel.target_symbol == "setup"
def test_async_function_call(self):
"""Test extraction of calls involving async functions."""
code = """async def fetch_data():
pass
async def process():
await fetch_data()
"""
analyzer = GraphAnalyzer("python")
relationships = analyzer.analyze_file(code, Path("test.py"))
# Should find process -> fetch_data
assert len(relationships) == 1
rel = relationships[0]
assert rel.source_symbol == "process"
assert rel.target_symbol == "fetch_data"
def test_complex_python_file(self):
"""Test extraction from realistic Python file with multiple patterns."""
code = """class DataProcessor:
def __init__(self):
self.data = []
def load(self, filename):
self.data = read_file(filename)
def process(self):
self.validate()
self.transform()
def validate(self):
pass
def transform(self):
pass
def read_file(filename):
pass
def main():
processor = DataProcessor()
processor.load("data.txt")
processor.process()
main()
"""
analyzer = GraphAnalyzer("python")
relationships = analyzer.analyze_file(code, Path("test.py"))
# Extract call pairs
call_pairs = {(rel.source_symbol, rel.target_symbol) for rel in relationships}
# Expected relationships
expected = {
("load", "read_file"),
("process", "validate"),
("process", "transform"),
("main", "DataProcessor"),
("main", "load"),
("main", "process"),
("<module>", "main"),
}
# Should find all expected relationships
assert call_pairs >= expected
def test_empty_file(self):
"""Test handling of empty file."""
code = ""
analyzer = GraphAnalyzer("python")
relationships = analyzer.analyze_file(code, Path("test.py"))
assert len(relationships) == 0
def test_file_with_no_calls(self):
"""Test handling of file with definitions but no calls."""
code = """def func1():
pass
def func2():
pass
"""
analyzer = GraphAnalyzer("python")
relationships = analyzer.analyze_file(code, Path("test.py"))
assert len(relationships) == 0
@pytest.mark.skipif(not TREE_SITTER_JS_AVAILABLE, reason="tree-sitter-javascript not installed")
class TestJavaScriptGraphAnalyzer:
"""Tests for JavaScript relationship extraction."""
def test_simple_function_call(self):
"""Test extraction of simple JavaScript function call."""
code = """function helper() {}
function main() {
helper();
}
"""
analyzer = GraphAnalyzer("javascript")
relationships = analyzer.analyze_file(code, Path("test.js"))
# Should find main -> helper call
assert len(relationships) == 1
rel = relationships[0]
assert rel.source_symbol == "main"
assert rel.target_symbol == "helper"
assert rel.relationship_type == "call"
def test_arrow_function_call(self):
"""Test extraction of calls from arrow functions."""
code = """const helper = () => {};
const main = () => {
helper();
};
"""
analyzer = GraphAnalyzer("javascript")
relationships = analyzer.analyze_file(code, Path("test.js"))
# Should find main -> helper call
assert len(relationships) == 1
rel = relationships[0]
assert rel.source_symbol == "main"
assert rel.target_symbol == "helper"
def test_class_method_call(self):
"""Test extraction of method calls in JavaScript class."""
code = """class Calculator {
add(a, b) {
return a + b;
}
compute(x, y) {
return this.add(x, y);
}
}
"""
analyzer = GraphAnalyzer("javascript")
relationships = analyzer.analyze_file(code, Path("test.js"))
# Should find compute -> add
assert len(relationships) == 1
rel = relationships[0]
assert rel.source_symbol == "compute"
assert rel.target_symbol == "add"
def test_complex_javascript_file(self):
"""Test extraction from realistic JavaScript file."""
code = """function readFile(filename) {
return "";
}
class DataProcessor {
constructor() {
this.data = [];
}
load(filename) {
this.data = readFile(filename);
}
process() {
this.validate();
this.transform();
}
validate() {}
transform() {}
}
function main() {
const processor = new DataProcessor();
processor.load("data.txt");
processor.process();
}
main();
"""
analyzer = GraphAnalyzer("javascript")
relationships = analyzer.analyze_file(code, Path("test.js"))
# Extract call pairs
call_pairs = {(rel.source_symbol, rel.target_symbol) for rel in relationships}
# Expected relationships (note: constructor calls like "new DataProcessor()" are not tracked)
expected = {
("load", "readFile"),
("process", "validate"),
("process", "transform"),
("main", "load"),
("main", "process"),
("<module>", "main"),
}
# Should find all expected relationships
assert call_pairs >= expected
class TestGraphAnalyzerEdgeCases:
"""Edge case tests for GraphAnalyzer."""
@pytest.mark.skipif(not TREE_SITTER_PYTHON_AVAILABLE, reason="tree-sitter-python not installed")
def test_unavailable_language(self):
"""Test handling of unsupported language."""
code = "some code"
analyzer = GraphAnalyzer("rust")
relationships = analyzer.analyze_file(code, Path("test.rs"))
assert len(relationships) == 0
@pytest.mark.skipif(not TREE_SITTER_PYTHON_AVAILABLE, reason="tree-sitter-python not installed")
def test_malformed_python_code(self):
"""Test handling of malformed Python code."""
code = "def broken(\n pass"
analyzer = GraphAnalyzer("python")
# Should not crash
relationships = analyzer.analyze_file(code, Path("test.py"))
assert isinstance(relationships, list)
@pytest.mark.skipif(not TREE_SITTER_PYTHON_AVAILABLE, reason="tree-sitter-python not installed")
def test_file_path_in_relationship(self):
"""Test that file path is correctly set in relationships."""
code = """def foo():
pass
def bar():
foo()
"""
test_path = Path("test.py")
analyzer = GraphAnalyzer("python")
relationships = analyzer.analyze_file(code, test_path)
assert len(relationships) == 1
rel = relationships[0]
assert rel.source_file == str(test_path.resolve())
assert rel.target_file is None # Intra-file
@pytest.mark.skipif(not TREE_SITTER_PYTHON_AVAILABLE, reason="tree-sitter-python not installed")
def test_performance_large_file(self):
"""Test performance on larger file (1000 lines)."""
import time
# Generate file with many functions and calls
lines = []
for i in range(100):
lines.append(f"def func_{i}():")
if i > 0:
lines.append(f" func_{i-1}()")
else:
lines.append(" pass")
code = "\n".join(lines)
analyzer = GraphAnalyzer("python")
start_time = time.time()
relationships = analyzer.analyze_file(code, Path("test.py"))
elapsed_ms = (time.time() - start_time) * 1000
# Should complete in under 500ms
assert elapsed_ms < 500
# Should find 99 calls (func_1 -> func_0, func_2 -> func_1, ...)
assert len(relationships) == 99
@pytest.mark.skipif(not TREE_SITTER_PYTHON_AVAILABLE, reason="tree-sitter-python not installed")
def test_call_accuracy_rate(self):
"""Test >95% accuracy on known call graph."""
code = """def a(): pass
def b(): pass
def c(): pass
def d(): pass
def e(): pass
def test1():
a()
b()
def test2():
c()
d()
def test3():
e()
def main():
test1()
test2()
test3()
"""
analyzer = GraphAnalyzer("python")
relationships = analyzer.analyze_file(code, Path("test.py"))
# Expected calls: test1->a, test1->b, test2->c, test2->d, test3->e, main->test1, main->test2, main->test3
expected_calls = {
("test1", "a"),
("test1", "b"),
("test2", "c"),
("test2", "d"),
("test3", "e"),
("main", "test1"),
("main", "test2"),
("main", "test3"),
}
found_calls = {(rel.source_symbol, rel.target_symbol) for rel in relationships}
# Calculate accuracy
correct = len(expected_calls & found_calls)
total = len(expected_calls)
accuracy = (correct / total) * 100 if total > 0 else 0
# Should have >95% accuracy
assert accuracy >= 95.0
assert correct == total # Should be 100% for this simple case