mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-11 02:33:51 +08:00
Add comprehensive tests for semantic chunking and search functionality
- Implemented tests for the ChunkConfig and Chunker classes, covering default and custom configurations. - Added tests for symbol-based chunking, including single and multiple symbols, handling of empty symbols, and preservation of line numbers. - Developed tests for sliding window chunking, ensuring correct chunking behavior with various content sizes and configurations. - Created integration tests for semantic search, validating embedding generation, vector storage, and search accuracy across a complex codebase. - Included performance tests for embedding generation and search operations. - Established tests for chunking strategies, comparing symbol-based and sliding window approaches. - Enhanced test coverage for edge cases, including handling of unicode characters and out-of-bounds symbol ranges.
This commit is contained in:
@@ -1,12 +1,14 @@
|
||||
"""Tests for CodexLens storage."""
|
||||
|
||||
import sqlite3
|
||||
import threading
|
||||
import pytest
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from codexlens.storage.sqlite_store import SQLiteStore
|
||||
from codexlens.entities import IndexedFile, Symbol
|
||||
from codexlens.errors import StorageError
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -20,6 +22,13 @@ def temp_db():
|
||||
store.close()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_db_path():
|
||||
"""Create a temporary directory and return db path."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
yield Path(tmpdir) / "test.db"
|
||||
|
||||
|
||||
class TestSQLiteStore:
|
||||
"""Tests for SQLiteStore."""
|
||||
|
||||
@@ -158,3 +167,368 @@ class TestSQLiteStore:
|
||||
assert "content='files'" in row["sql"] or "content=files" in row["sql"]
|
||||
finally:
|
||||
store.close()
|
||||
|
||||
|
||||
class TestSQLiteStoreAddFiles:
|
||||
"""Tests for add_files batch operation."""
|
||||
|
||||
def test_add_files_batch(self, temp_db):
|
||||
"""Test adding multiple files in a batch."""
|
||||
files_data = [
|
||||
(IndexedFile(
|
||||
path="/test/a.py",
|
||||
language="python",
|
||||
symbols=[Symbol(name="func_a", kind="function", range=(1, 1))],
|
||||
), "def func_a(): pass"),
|
||||
(IndexedFile(
|
||||
path="/test/b.py",
|
||||
language="python",
|
||||
symbols=[Symbol(name="func_b", kind="function", range=(1, 1))],
|
||||
), "def func_b(): pass"),
|
||||
(IndexedFile(
|
||||
path="/test/c.py",
|
||||
language="python",
|
||||
symbols=[Symbol(name="func_c", kind="function", range=(1, 1))],
|
||||
), "def func_c(): pass"),
|
||||
]
|
||||
|
||||
temp_db.add_files(files_data)
|
||||
|
||||
stats = temp_db.stats()
|
||||
assert stats["files"] == 3
|
||||
assert stats["symbols"] == 3
|
||||
|
||||
def test_add_files_empty_list(self, temp_db):
|
||||
"""Test adding empty list of files."""
|
||||
temp_db.add_files([])
|
||||
stats = temp_db.stats()
|
||||
assert stats["files"] == 0
|
||||
|
||||
|
||||
class TestSQLiteStoreSearch:
|
||||
"""Tests for search operations."""
|
||||
|
||||
def test_search_fts_with_limit(self, temp_db):
|
||||
"""Test FTS search with limit."""
|
||||
for i in range(10):
|
||||
indexed_file = IndexedFile(
|
||||
path=f"/test/file{i}.py",
|
||||
language="python",
|
||||
symbols=[],
|
||||
)
|
||||
temp_db.add_file(indexed_file, f"def test{i}(): pass")
|
||||
|
||||
results = temp_db.search_fts("test", limit=3)
|
||||
assert len(results) <= 3
|
||||
|
||||
def test_search_fts_with_offset(self, temp_db):
|
||||
"""Test FTS search with offset."""
|
||||
for i in range(10):
|
||||
indexed_file = IndexedFile(
|
||||
path=f"/test/file{i}.py",
|
||||
language="python",
|
||||
symbols=[],
|
||||
)
|
||||
temp_db.add_file(indexed_file, f"searchterm content {i}")
|
||||
|
||||
results_page1 = temp_db.search_fts("searchterm", limit=3, offset=0)
|
||||
results_page2 = temp_db.search_fts("searchterm", limit=3, offset=3)
|
||||
|
||||
# Pages should be different
|
||||
paths1 = {r.path for r in results_page1}
|
||||
paths2 = {r.path for r in results_page2}
|
||||
assert paths1.isdisjoint(paths2)
|
||||
|
||||
def test_search_fts_no_results(self, temp_db):
|
||||
"""Test FTS search with no results."""
|
||||
indexed_file = IndexedFile(
|
||||
path="/test/file.py",
|
||||
language="python",
|
||||
symbols=[],
|
||||
)
|
||||
temp_db.add_file(indexed_file, "def hello(): pass")
|
||||
|
||||
results = temp_db.search_fts("nonexistent")
|
||||
assert len(results) == 0
|
||||
|
||||
def test_search_symbols_by_kind(self, temp_db):
|
||||
"""Test symbol search filtered by kind."""
|
||||
indexed_file = IndexedFile(
|
||||
path="/test/file.py",
|
||||
language="python",
|
||||
symbols=[
|
||||
Symbol(name="MyClass", kind="class", range=(1, 5)),
|
||||
Symbol(name="my_func", kind="function", range=(7, 10)),
|
||||
Symbol(name="my_method", kind="method", range=(2, 4)),
|
||||
],
|
||||
)
|
||||
temp_db.add_file(indexed_file, "class MyClass:\n def my_method(): pass\ndef my_func(): pass")
|
||||
|
||||
# Search for functions only
|
||||
results = temp_db.search_symbols("my", kind="function")
|
||||
assert len(results) == 1
|
||||
assert results[0].name == "my_func"
|
||||
|
||||
def test_search_symbols_with_limit(self, temp_db):
|
||||
"""Test symbol search with limit."""
|
||||
# Range starts from 1, not 0
|
||||
symbols = [Symbol(name=f"func{i}", kind="function", range=(i+1, i+1)) for i in range(20)]
|
||||
indexed_file = IndexedFile(
|
||||
path="/test/file.py",
|
||||
language="python",
|
||||
symbols=symbols,
|
||||
)
|
||||
temp_db.add_file(indexed_file, "# lots of functions")
|
||||
|
||||
results = temp_db.search_symbols("func", limit=5)
|
||||
assert len(results) == 5
|
||||
|
||||
def test_search_files_only(self, temp_db):
|
||||
"""Test search_files_only returns only paths."""
|
||||
indexed_file = IndexedFile(
|
||||
path="/test/file.py",
|
||||
language="python",
|
||||
symbols=[],
|
||||
)
|
||||
temp_db.add_file(indexed_file, "def hello(): pass")
|
||||
|
||||
results = temp_db.search_files_only("hello")
|
||||
assert len(results) == 1
|
||||
assert isinstance(results[0], str)
|
||||
|
||||
|
||||
class TestSQLiteStoreFileOperations:
|
||||
"""Tests for file operations."""
|
||||
|
||||
def test_file_exists_true(self, temp_db):
|
||||
"""Test file_exists returns True for existing file."""
|
||||
indexed_file = IndexedFile(
|
||||
path="/test/file.py",
|
||||
language="python",
|
||||
symbols=[],
|
||||
)
|
||||
temp_db.add_file(indexed_file, "content")
|
||||
|
||||
assert temp_db.file_exists("/test/file.py")
|
||||
|
||||
def test_file_exists_false(self, temp_db):
|
||||
"""Test file_exists returns False for non-existing file."""
|
||||
assert not temp_db.file_exists("/nonexistent/file.py")
|
||||
|
||||
def test_remove_nonexistent_file(self, temp_db):
|
||||
"""Test removing non-existent file returns False."""
|
||||
result = temp_db.remove_file("/nonexistent/file.py")
|
||||
assert result is False
|
||||
|
||||
def test_get_file_mtime(self, temp_db):
|
||||
"""Test getting file mtime."""
|
||||
indexed_file = IndexedFile(
|
||||
path="/test/file.py",
|
||||
language="python",
|
||||
symbols=[],
|
||||
)
|
||||
temp_db.add_file(indexed_file, "content")
|
||||
|
||||
# Note: mtime is only set if the file actually exists on disk
|
||||
mtime = temp_db.get_file_mtime("/test/file.py")
|
||||
# May be None if file doesn't exist on disk
|
||||
assert mtime is None or isinstance(mtime, float)
|
||||
|
||||
def test_get_file_mtime_nonexistent(self, temp_db):
|
||||
"""Test getting mtime for non-indexed file."""
|
||||
mtime = temp_db.get_file_mtime("/nonexistent/file.py")
|
||||
assert mtime is None
|
||||
|
||||
def test_update_existing_file(self, temp_db):
|
||||
"""Test updating an existing file."""
|
||||
indexed_file = IndexedFile(
|
||||
path="/test/file.py",
|
||||
language="python",
|
||||
symbols=[Symbol(name="old_func", kind="function", range=(1, 1))],
|
||||
)
|
||||
temp_db.add_file(indexed_file, "def old_func(): pass")
|
||||
|
||||
# Update with new content and symbols
|
||||
updated_file = IndexedFile(
|
||||
path="/test/file.py",
|
||||
language="python",
|
||||
symbols=[Symbol(name="new_func", kind="function", range=(1, 1))],
|
||||
)
|
||||
temp_db.add_file(updated_file, "def new_func(): pass")
|
||||
|
||||
stats = temp_db.stats()
|
||||
assert stats["files"] == 1 # Still one file
|
||||
assert stats["symbols"] == 1 # Old symbols replaced
|
||||
|
||||
symbols = temp_db.search_symbols("new_func")
|
||||
assert len(symbols) == 1
|
||||
|
||||
|
||||
class TestSQLiteStoreStats:
|
||||
"""Tests for stats operation."""
|
||||
|
||||
def test_stats_empty_db(self, temp_db):
|
||||
"""Test stats on empty database."""
|
||||
stats = temp_db.stats()
|
||||
assert stats["files"] == 0
|
||||
assert stats["symbols"] == 0
|
||||
assert stats["languages"] == {}
|
||||
|
||||
def test_stats_with_data(self, temp_db):
|
||||
"""Test stats with data."""
|
||||
files = [
|
||||
(IndexedFile(path="/test/a.py", language="python", symbols=[
|
||||
Symbol(name="func1", kind="function", range=(1, 1)),
|
||||
Symbol(name="func2", kind="function", range=(2, 2)),
|
||||
]), "content"),
|
||||
(IndexedFile(path="/test/b.js", language="javascript", symbols=[
|
||||
Symbol(name="func3", kind="function", range=(1, 1)),
|
||||
]), "content"),
|
||||
]
|
||||
temp_db.add_files(files)
|
||||
|
||||
stats = temp_db.stats()
|
||||
assert stats["files"] == 2
|
||||
assert stats["symbols"] == 3
|
||||
assert stats["languages"]["python"] == 1
|
||||
assert stats["languages"]["javascript"] == 1
|
||||
assert "db_path" in stats
|
||||
|
||||
|
||||
class TestSQLiteStoreContextManager:
|
||||
"""Tests for context manager usage."""
|
||||
|
||||
def test_context_manager(self, temp_db_path):
|
||||
"""Test using SQLiteStore as context manager."""
|
||||
with SQLiteStore(temp_db_path) as store:
|
||||
indexed_file = IndexedFile(
|
||||
path="/test/file.py",
|
||||
language="python",
|
||||
symbols=[],
|
||||
)
|
||||
store.add_file(indexed_file, "content")
|
||||
stats = store.stats()
|
||||
assert stats["files"] == 1
|
||||
|
||||
|
||||
class TestSQLiteStoreThreadSafety:
|
||||
"""Tests for thread safety."""
|
||||
|
||||
def test_multiple_threads_read(self, temp_db):
|
||||
"""Test reading from multiple threads."""
|
||||
# Add some data first
|
||||
indexed_file = IndexedFile(
|
||||
path="/test/file.py",
|
||||
language="python",
|
||||
symbols=[Symbol(name="test", kind="function", range=(1, 1))],
|
||||
)
|
||||
temp_db.add_file(indexed_file, "def test(): pass")
|
||||
|
||||
results = []
|
||||
errors = []
|
||||
|
||||
def read_data():
|
||||
try:
|
||||
stats = temp_db.stats()
|
||||
results.append(stats)
|
||||
except Exception as e:
|
||||
errors.append(e)
|
||||
|
||||
threads = [threading.Thread(target=read_data) for _ in range(5)]
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
assert len(errors) == 0
|
||||
assert len(results) == 5
|
||||
for stats in results:
|
||||
assert stats["files"] == 1
|
||||
|
||||
|
||||
class TestSQLiteStoreEdgeCases:
|
||||
"""Edge case tests for SQLiteStore."""
|
||||
|
||||
def test_special_characters_in_path(self, temp_db):
|
||||
"""Test file path with special characters."""
|
||||
indexed_file = IndexedFile(
|
||||
path="/test/file with spaces.py",
|
||||
language="python",
|
||||
symbols=[],
|
||||
)
|
||||
temp_db.add_file(indexed_file, "content")
|
||||
|
||||
assert temp_db.file_exists("/test/file with spaces.py")
|
||||
|
||||
def test_unicode_content(self, temp_db):
|
||||
"""Test file with unicode content."""
|
||||
indexed_file = IndexedFile(
|
||||
path="/test/file.py",
|
||||
language="python",
|
||||
symbols=[Symbol(name="你好", kind="function", range=(1, 1))],
|
||||
)
|
||||
temp_db.add_file(indexed_file, "def 你好(): print('世界')")
|
||||
|
||||
symbols = temp_db.search_symbols("你好")
|
||||
assert len(symbols) == 1
|
||||
|
||||
def test_very_long_content(self, temp_db):
|
||||
"""Test file with very long content."""
|
||||
long_content = "x = 1\n" * 10000
|
||||
indexed_file = IndexedFile(
|
||||
path="/test/file.py",
|
||||
language="python",
|
||||
symbols=[],
|
||||
)
|
||||
temp_db.add_file(indexed_file, long_content)
|
||||
|
||||
stats = temp_db.stats()
|
||||
assert stats["files"] == 1
|
||||
|
||||
def test_file_with_no_symbols(self, temp_db):
|
||||
"""Test file with no symbols."""
|
||||
indexed_file = IndexedFile(
|
||||
path="/test/file.py",
|
||||
language="python",
|
||||
symbols=[],
|
||||
)
|
||||
temp_db.add_file(indexed_file, "# just a comment")
|
||||
|
||||
stats = temp_db.stats()
|
||||
assert stats["files"] == 1
|
||||
assert stats["symbols"] == 0
|
||||
|
||||
def test_file_with_many_symbols(self, temp_db):
|
||||
"""Test file with many symbols."""
|
||||
# Range starts from 1, not 0
|
||||
symbols = [Symbol(name=f"func_{i}", kind="function", range=(i+1, i+1)) for i in range(100)]
|
||||
indexed_file = IndexedFile(
|
||||
path="/test/file.py",
|
||||
language="python",
|
||||
symbols=symbols,
|
||||
)
|
||||
temp_db.add_file(indexed_file, "# lots of functions")
|
||||
|
||||
stats = temp_db.stats()
|
||||
assert stats["symbols"] == 100
|
||||
|
||||
def test_close_and_reopen(self, temp_db_path):
|
||||
"""Test closing and reopening database."""
|
||||
# First session
|
||||
store1 = SQLiteStore(temp_db_path)
|
||||
store1.initialize()
|
||||
indexed_file = IndexedFile(
|
||||
path="/test/file.py",
|
||||
language="python",
|
||||
symbols=[Symbol(name="test", kind="function", range=(1, 1))],
|
||||
)
|
||||
store1.add_file(indexed_file, "def test(): pass")
|
||||
store1.close()
|
||||
|
||||
# Second session
|
||||
store2 = SQLiteStore(temp_db_path)
|
||||
store2.initialize()
|
||||
stats = store2.stats()
|
||||
assert stats["files"] == 1
|
||||
assert stats["symbols"] == 1
|
||||
store2.close()
|
||||
|
||||
Reference in New Issue
Block a user