mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-05 01:50:27 +08:00
- Implemented tests for the ChunkConfig and Chunker classes, covering default and custom configurations. - Added tests for symbol-based chunking, including single and multiple symbols, handling of empty symbols, and preservation of line numbers. - Developed tests for sliding window chunking, ensuring correct chunking behavior with various content sizes and configurations. - Created integration tests for semantic search, validating embedding generation, vector storage, and search accuracy across a complex codebase. - Included performance tests for embedding generation and search operations. - Established tests for chunking strategies, comparing symbol-based and sliding window approaches. - Enhanced test coverage for edge cases, including handling of unicode characters and out-of-bounds symbol ranges.
426 lines
14 KiB
Python
426 lines
14 KiB
Python
"""Tests for CodexLens parsers."""
|
|
|
|
import tempfile
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from codexlens.config import Config
|
|
from codexlens.parsers.factory import (
|
|
ParserFactory,
|
|
SimpleRegexParser,
|
|
_parse_go_symbols,
|
|
_parse_java_symbols,
|
|
_parse_js_ts_symbols,
|
|
_parse_python_symbols,
|
|
_parse_generic_symbols,
|
|
)
|
|
|
|
|
|
TREE_SITTER_JS_AVAILABLE = True
|
|
try:
|
|
import tree_sitter_javascript # type: ignore[import-not-found] # noqa: F401
|
|
except Exception:
|
|
TREE_SITTER_JS_AVAILABLE = False
|
|
|
|
|
|
class TestPythonParser:
|
|
"""Tests for Python symbol parsing."""
|
|
|
|
def test_parse_function(self):
|
|
code = "def hello():\n pass"
|
|
symbols = _parse_python_symbols(code)
|
|
assert len(symbols) == 1
|
|
assert symbols[0].name == "hello"
|
|
assert symbols[0].kind == "function"
|
|
|
|
def test_parse_async_function(self):
|
|
code = "async def fetch_data():\n pass"
|
|
symbols = _parse_python_symbols(code)
|
|
assert len(symbols) == 1
|
|
assert symbols[0].name == "fetch_data"
|
|
assert symbols[0].kind == "function"
|
|
|
|
def test_parse_class(self):
|
|
code = "class MyClass:\n pass"
|
|
symbols = _parse_python_symbols(code)
|
|
assert len(symbols) == 1
|
|
assert symbols[0].name == "MyClass"
|
|
assert symbols[0].kind == "class"
|
|
|
|
def test_parse_method(self):
|
|
code = "class MyClass:\n def method(self):\n pass"
|
|
symbols = _parse_python_symbols(code)
|
|
assert len(symbols) == 2
|
|
assert symbols[0].name == "MyClass"
|
|
assert symbols[0].kind == "class"
|
|
assert symbols[1].name == "method"
|
|
assert symbols[1].kind == "method"
|
|
|
|
def test_parse_async_method(self):
|
|
code = "class MyClass:\n async def async_method(self):\n pass"
|
|
symbols = _parse_python_symbols(code)
|
|
assert len(symbols) == 2
|
|
assert symbols[1].name == "async_method"
|
|
assert symbols[1].kind == "method"
|
|
|
|
|
|
class TestJavaScriptParser:
|
|
"""Tests for JavaScript/TypeScript symbol parsing."""
|
|
|
|
def test_parse_function(self):
|
|
code = "function hello() {}"
|
|
symbols = _parse_js_ts_symbols(code)
|
|
assert len(symbols) == 1
|
|
assert symbols[0].name == "hello"
|
|
assert symbols[0].kind == "function"
|
|
|
|
def test_parse_async_function(self):
|
|
code = "async function fetchData() {}"
|
|
symbols = _parse_js_ts_symbols(code)
|
|
assert len(symbols) == 1
|
|
assert symbols[0].name == "fetchData"
|
|
assert symbols[0].kind == "function"
|
|
|
|
def test_parse_arrow_function(self):
|
|
code = "const hello = () => {}"
|
|
symbols = _parse_js_ts_symbols(code)
|
|
assert len(symbols) == 1
|
|
assert symbols[0].name == "hello"
|
|
assert symbols[0].kind == "function"
|
|
|
|
def test_parse_async_arrow_function(self):
|
|
code = "const fetchData = async () => {}"
|
|
symbols = _parse_js_ts_symbols(code)
|
|
assert len(symbols) == 1
|
|
assert symbols[0].name == "fetchData"
|
|
assert symbols[0].kind == "function"
|
|
|
|
def test_parse_class(self):
|
|
code = "class MyClass {}"
|
|
symbols = _parse_js_ts_symbols(code)
|
|
assert len(symbols) == 1
|
|
assert symbols[0].name == "MyClass"
|
|
assert symbols[0].kind == "class"
|
|
|
|
def test_parse_export_function(self):
|
|
code = "export function hello() {}"
|
|
symbols = _parse_js_ts_symbols(code)
|
|
assert len(symbols) == 1
|
|
assert symbols[0].name == "hello"
|
|
assert symbols[0].kind == "function"
|
|
|
|
def test_parse_export_class(self):
|
|
code = "export class MyClass {}"
|
|
symbols = _parse_js_ts_symbols(code)
|
|
assert len(symbols) == 1
|
|
assert symbols[0].name == "MyClass"
|
|
assert symbols[0].kind == "class"
|
|
|
|
def test_parse_export_arrow_function(self):
|
|
code = "export const hello = () => {}"
|
|
symbols = _parse_js_ts_symbols(code)
|
|
assert len(symbols) == 1
|
|
assert symbols[0].name == "hello"
|
|
assert symbols[0].kind == "function"
|
|
|
|
@pytest.mark.skipif(not TREE_SITTER_JS_AVAILABLE, reason="tree-sitter-javascript not installed")
|
|
def test_parse_class_methods(self):
|
|
code = (
|
|
"class MyClass {\n"
|
|
" method() {}\n"
|
|
" async asyncMethod() {}\n"
|
|
" static staticMethod() {}\n"
|
|
" constructor() {}\n"
|
|
"}"
|
|
)
|
|
symbols = _parse_js_ts_symbols(code)
|
|
names_kinds = [(s.name, s.kind) for s in symbols]
|
|
assert ("MyClass", "class") in names_kinds
|
|
assert ("method", "method") in names_kinds
|
|
assert ("asyncMethod", "method") in names_kinds
|
|
assert ("staticMethod", "method") in names_kinds
|
|
assert all(name != "constructor" for name, _ in names_kinds)
|
|
|
|
|
|
class TestJavaParser:
|
|
"""Tests for Java symbol parsing."""
|
|
|
|
def test_parse_class(self):
|
|
code = "public class MyClass {\n}"
|
|
symbols = _parse_java_symbols(code)
|
|
assert len(symbols) == 1
|
|
assert symbols[0].name == "MyClass"
|
|
assert symbols[0].kind == "class"
|
|
|
|
def test_parse_class_without_public(self):
|
|
code = "class InternalClass {\n}"
|
|
symbols = _parse_java_symbols(code)
|
|
assert len(symbols) == 1
|
|
assert symbols[0].name == "InternalClass"
|
|
|
|
def test_parse_method(self):
|
|
code = "public class Test {\n public void doSomething() {}\n}"
|
|
symbols = _parse_java_symbols(code)
|
|
assert len(symbols) == 2
|
|
assert symbols[0].name == "Test"
|
|
assert symbols[0].kind == "class"
|
|
assert symbols[1].name == "doSomething"
|
|
assert symbols[1].kind == "method"
|
|
|
|
def test_parse_static_method(self):
|
|
code = "public class Test {\n public static void main(String[] args) {}\n}"
|
|
symbols = _parse_java_symbols(code)
|
|
method_names = [s.name for s in symbols if s.kind == "method"]
|
|
assert "main" in method_names
|
|
|
|
def test_parse_private_method(self):
|
|
code = "public class Test {\n private int calculate() { return 0; }\n}"
|
|
symbols = _parse_java_symbols(code)
|
|
method_names = [s.name for s in symbols if s.kind == "method"]
|
|
assert "calculate" in method_names
|
|
|
|
def test_parse_generic_return_type(self):
|
|
code = "public class Test {\n public List<String> getItems() { return null; }\n}"
|
|
symbols = _parse_java_symbols(code)
|
|
method_names = [s.name for s in symbols if s.kind == "method"]
|
|
assert "getItems" in method_names
|
|
|
|
|
|
class TestGoParser:
|
|
"""Tests for Go symbol parsing."""
|
|
|
|
def test_parse_function(self):
|
|
code = "func hello() {\n}"
|
|
symbols = _parse_go_symbols(code)
|
|
assert len(symbols) == 1
|
|
assert symbols[0].name == "hello"
|
|
assert symbols[0].kind == "function"
|
|
|
|
def test_parse_function_with_params(self):
|
|
code = "func greet(name string) string {\n return name\n}"
|
|
symbols = _parse_go_symbols(code)
|
|
assert len(symbols) == 1
|
|
assert symbols[0].name == "greet"
|
|
|
|
def test_parse_method(self):
|
|
code = "func (s *Server) Start() error {\n return nil\n}"
|
|
symbols = _parse_go_symbols(code)
|
|
assert len(symbols) == 1
|
|
assert symbols[0].name == "Start"
|
|
assert symbols[0].kind == "function"
|
|
|
|
def test_parse_struct(self):
|
|
code = "type User struct {\n Name string\n}"
|
|
symbols = _parse_go_symbols(code)
|
|
assert len(symbols) == 1
|
|
assert symbols[0].name == "User"
|
|
assert symbols[0].kind == "class"
|
|
|
|
def test_parse_interface(self):
|
|
code = "type Reader interface {\n Read(p []byte) (n int, err error)\n}"
|
|
symbols = _parse_go_symbols(code)
|
|
assert len(symbols) == 1
|
|
assert symbols[0].name == "Reader"
|
|
assert symbols[0].kind == "class"
|
|
|
|
def test_parse_multiple_symbols(self):
|
|
code = """type Config struct {
|
|
Port int
|
|
}
|
|
|
|
func NewConfig() *Config {
|
|
return &Config{}
|
|
}
|
|
|
|
func (c *Config) Validate() error {
|
|
return nil
|
|
}
|
|
"""
|
|
symbols = _parse_go_symbols(code)
|
|
names = [s.name for s in symbols]
|
|
assert "Config" in names
|
|
assert "NewConfig" in names
|
|
assert "Validate" in names
|
|
|
|
|
|
class TestGenericParser:
|
|
"""Tests for generic symbol parsing."""
|
|
|
|
def test_parse_def_keyword(self):
|
|
code = "def something():\n pass"
|
|
symbols = _parse_generic_symbols(code)
|
|
assert len(symbols) == 1
|
|
assert symbols[0].name == "something"
|
|
assert symbols[0].kind == "function"
|
|
|
|
def test_parse_function_keyword(self):
|
|
code = "function doIt() {}"
|
|
symbols = _parse_generic_symbols(code)
|
|
assert len(symbols) == 1
|
|
assert symbols[0].name == "doIt"
|
|
|
|
def test_parse_func_keyword(self):
|
|
code = "func test() {}"
|
|
symbols = _parse_generic_symbols(code)
|
|
assert len(symbols) == 1
|
|
assert symbols[0].name == "test"
|
|
|
|
def test_parse_class_keyword(self):
|
|
code = "class MyClass {}"
|
|
symbols = _parse_generic_symbols(code)
|
|
assert len(symbols) == 1
|
|
assert symbols[0].name == "MyClass"
|
|
assert symbols[0].kind == "class"
|
|
|
|
def test_parse_struct_keyword(self):
|
|
code = "struct Point { x: i32, y: i32 }"
|
|
symbols = _parse_generic_symbols(code)
|
|
assert len(symbols) == 1
|
|
assert symbols[0].name == "Point"
|
|
assert symbols[0].kind == "class"
|
|
|
|
def test_parse_interface_keyword(self):
|
|
code = "interface Drawable {}"
|
|
symbols = _parse_generic_symbols(code)
|
|
assert len(symbols) == 1
|
|
assert symbols[0].name == "Drawable"
|
|
assert symbols[0].kind == "class"
|
|
|
|
|
|
class TestParserInterface:
|
|
"""High-level interface tests."""
|
|
|
|
def test_simple_parser_parse(self):
|
|
parser = SimpleRegexParser("python")
|
|
indexed = parser.parse("def hello():\n pass", Path("test.py"))
|
|
assert indexed.language == "python"
|
|
assert len(indexed.symbols) == 1
|
|
assert indexed.symbols[0].name == "hello"
|
|
|
|
def test_simple_parser_javascript(self):
|
|
parser = SimpleRegexParser("javascript")
|
|
indexed = parser.parse("function test() {}", Path("test.js"))
|
|
assert indexed.language == "javascript"
|
|
assert len(indexed.symbols) == 1
|
|
|
|
def test_simple_parser_typescript(self):
|
|
parser = SimpleRegexParser("typescript")
|
|
indexed = parser.parse("export class Service {}", Path("test.ts"))
|
|
assert indexed.language == "typescript"
|
|
assert len(indexed.symbols) == 1
|
|
|
|
def test_simple_parser_java(self):
|
|
parser = SimpleRegexParser("java")
|
|
indexed = parser.parse("public class Main {}", Path("Main.java"))
|
|
assert indexed.language == "java"
|
|
assert len(indexed.symbols) == 1
|
|
|
|
def test_simple_parser_go(self):
|
|
parser = SimpleRegexParser("go")
|
|
indexed = parser.parse("func main() {}", Path("main.go"))
|
|
assert indexed.language == "go"
|
|
assert len(indexed.symbols) == 1
|
|
|
|
def test_simple_parser_unknown_language(self):
|
|
parser = SimpleRegexParser("zig")
|
|
indexed = parser.parse("fn main() void {}", Path("main.zig"))
|
|
assert indexed.language == "zig"
|
|
# Uses generic parser
|
|
assert indexed.chunks == []
|
|
|
|
def test_indexed_file_path_resolved(self):
|
|
parser = SimpleRegexParser("python")
|
|
indexed = parser.parse("def test(): pass", Path("./test.py"))
|
|
# Path should be resolved to absolute
|
|
assert Path(indexed.path).is_absolute()
|
|
|
|
|
|
class TestParserFactory:
|
|
"""Tests for ParserFactory."""
|
|
|
|
def test_factory_creates_parser(self):
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
import os
|
|
os.environ["CODEXLENS_DATA_DIR"] = tmpdir
|
|
try:
|
|
config = Config()
|
|
factory = ParserFactory(config)
|
|
parser = factory.get_parser("python")
|
|
assert parser is not None
|
|
finally:
|
|
del os.environ["CODEXLENS_DATA_DIR"]
|
|
|
|
def test_factory_caches_parsers(self):
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
import os
|
|
os.environ["CODEXLENS_DATA_DIR"] = tmpdir
|
|
try:
|
|
config = Config()
|
|
factory = ParserFactory(config)
|
|
parser1 = factory.get_parser("python")
|
|
parser2 = factory.get_parser("python")
|
|
assert parser1 is parser2
|
|
finally:
|
|
del os.environ["CODEXLENS_DATA_DIR"]
|
|
|
|
def test_factory_different_languages(self):
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
import os
|
|
os.environ["CODEXLENS_DATA_DIR"] = tmpdir
|
|
try:
|
|
config = Config()
|
|
factory = ParserFactory(config)
|
|
py_parser = factory.get_parser("python")
|
|
js_parser = factory.get_parser("javascript")
|
|
assert py_parser is not js_parser
|
|
finally:
|
|
del os.environ["CODEXLENS_DATA_DIR"]
|
|
|
|
|
|
class TestParserEdgeCases:
|
|
"""Edge case tests for parsers."""
|
|
|
|
def test_empty_code(self):
|
|
symbols = _parse_python_symbols("")
|
|
assert len(symbols) == 0
|
|
|
|
def test_only_comments(self):
|
|
code = "# This is a comment\n# Another comment"
|
|
symbols = _parse_python_symbols(code)
|
|
assert len(symbols) == 0
|
|
|
|
def test_nested_functions(self):
|
|
code = """def outer():
|
|
def inner():
|
|
pass
|
|
return inner
|
|
"""
|
|
symbols = _parse_python_symbols(code)
|
|
names = [s.name for s in symbols]
|
|
assert "outer" in names
|
|
assert "inner" in names
|
|
|
|
def test_unicode_function_name(self):
|
|
code = "def 你好():\n pass"
|
|
symbols = _parse_python_symbols(code)
|
|
# Regex may not support unicode function names, tree-sitter does
|
|
# So we just verify it doesn't crash
|
|
assert isinstance(symbols, list)
|
|
|
|
def test_long_file(self):
|
|
# Generate a file with many functions
|
|
lines = []
|
|
for i in range(100):
|
|
lines.append(f"def func_{i}():\n pass\n")
|
|
code = "\n".join(lines)
|
|
symbols = _parse_python_symbols(code)
|
|
assert len(symbols) == 100
|
|
|
|
def test_malformed_code(self):
|
|
# Parser should handle malformed code gracefully
|
|
code = "def broken(\n pass"
|
|
# Should not crash
|
|
symbols = _parse_python_symbols(code)
|
|
# May or may not find symbols depending on regex
|