mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-12 02:37:45 +08:00
Add comprehensive tests for schema cleanup migration and search comparison
- Implement tests for migration 005 to verify removal of deprecated fields in the database schema. - Ensure that new databases are created with a clean schema. - Validate that keywords are correctly extracted from the normalized file_keywords table. - Test symbol insertion without deprecated fields and subdir operations without direct_files. - Create a detailed search comparison test to evaluate vector search vs hybrid search performance. - Add a script for reindexing projects to extract code relationships and verify GraphAnalyzer functionality. - Include a test script to check TreeSitter parser availability and relationship extraction from sample files.
This commit is contained in:
306
codex-lens/tests/test_schema_cleanup_migration.py
Normal file
306
codex-lens/tests/test_schema_cleanup_migration.py
Normal file
@@ -0,0 +1,306 @@
|
||||
"""
|
||||
Test migration 005: Schema cleanup for unused/redundant fields.
|
||||
|
||||
Tests that migration 005 successfully removes:
|
||||
1. semantic_metadata.keywords (replaced by file_keywords)
|
||||
2. symbols.token_count (unused)
|
||||
3. symbols.symbol_type (redundant with kind)
|
||||
4. subdirs.direct_files (unused)
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from codexlens.storage.dir_index import DirIndexStore
|
||||
from codexlens.entities import Symbol
|
||||
|
||||
|
||||
class TestSchemaCleanupMigration:
|
||||
"""Test schema cleanup migration (v4 -> v5)."""
|
||||
|
||||
def test_migration_from_v4_to_v5(self):
|
||||
"""Test that migration successfully removes deprecated fields."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
db_path = Path(tmpdir) / "_index.db"
|
||||
store = DirIndexStore(db_path)
|
||||
|
||||
# Create v4 schema manually (with deprecated fields)
|
||||
conn = sqlite3.connect(db_path)
|
||||
conn.row_factory = sqlite3.Row
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Set schema version to 4
|
||||
cursor.execute("PRAGMA user_version = 4")
|
||||
|
||||
# Create v4 schema with deprecated fields
|
||||
cursor.execute("""
|
||||
CREATE TABLE files (
|
||||
id INTEGER PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
full_path TEXT UNIQUE NOT NULL,
|
||||
language TEXT,
|
||||
content TEXT,
|
||||
mtime REAL,
|
||||
line_count INTEGER
|
||||
)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE TABLE subdirs (
|
||||
id INTEGER PRIMARY KEY,
|
||||
name TEXT NOT NULL UNIQUE,
|
||||
index_path TEXT NOT NULL,
|
||||
files_count INTEGER DEFAULT 0,
|
||||
direct_files INTEGER DEFAULT 0,
|
||||
last_updated REAL
|
||||
)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE TABLE symbols (
|
||||
id INTEGER PRIMARY KEY,
|
||||
file_id INTEGER REFERENCES files(id) ON DELETE CASCADE,
|
||||
name TEXT NOT NULL,
|
||||
kind TEXT NOT NULL,
|
||||
start_line INTEGER,
|
||||
end_line INTEGER,
|
||||
token_count INTEGER,
|
||||
symbol_type TEXT
|
||||
)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE TABLE semantic_metadata (
|
||||
id INTEGER PRIMARY KEY,
|
||||
file_id INTEGER UNIQUE REFERENCES files(id) ON DELETE CASCADE,
|
||||
summary TEXT,
|
||||
keywords TEXT,
|
||||
purpose TEXT,
|
||||
llm_tool TEXT,
|
||||
generated_at REAL
|
||||
)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE TABLE keywords (
|
||||
id INTEGER PRIMARY KEY,
|
||||
keyword TEXT NOT NULL UNIQUE
|
||||
)
|
||||
""")
|
||||
|
||||
cursor.execute("""
|
||||
CREATE TABLE file_keywords (
|
||||
file_id INTEGER NOT NULL,
|
||||
keyword_id INTEGER NOT NULL,
|
||||
PRIMARY KEY (file_id, keyword_id),
|
||||
FOREIGN KEY (file_id) REFERENCES files (id) ON DELETE CASCADE,
|
||||
FOREIGN KEY (keyword_id) REFERENCES keywords (id) ON DELETE CASCADE
|
||||
)
|
||||
""")
|
||||
|
||||
# Insert test data
|
||||
cursor.execute(
|
||||
"INSERT INTO files (name, full_path, language, content, mtime, line_count) VALUES (?, ?, ?, ?, ?, ?)",
|
||||
("test.py", "/test/test.py", "python", "def test(): pass", 1234567890.0, 1)
|
||||
)
|
||||
file_id = cursor.lastrowid
|
||||
|
||||
cursor.execute(
|
||||
"INSERT INTO symbols (file_id, name, kind, start_line, end_line, token_count, symbol_type) VALUES (?, ?, ?, ?, ?, ?, ?)",
|
||||
(file_id, "test", "function", 1, 1, 10, "function")
|
||||
)
|
||||
|
||||
cursor.execute(
|
||||
"INSERT INTO semantic_metadata (file_id, summary, keywords, purpose, llm_tool, generated_at) VALUES (?, ?, ?, ?, ?, ?)",
|
||||
(file_id, "Test function", '["test", "example"]', "Testing", "gemini", 1234567890.0)
|
||||
)
|
||||
|
||||
cursor.execute(
|
||||
"INSERT INTO subdirs (name, index_path, files_count, direct_files, last_updated) VALUES (?, ?, ?, ?, ?)",
|
||||
("subdir", "/test/subdir/_index.db", 5, 2, 1234567890.0)
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
# Now initialize store - this should trigger migration
|
||||
store.initialize()
|
||||
|
||||
# Verify schema version is now 5
|
||||
conn = store._get_connection()
|
||||
version_row = conn.execute("PRAGMA user_version").fetchone()
|
||||
assert version_row[0] == 5, f"Expected schema version 5, got {version_row[0]}"
|
||||
|
||||
# Check that deprecated columns are removed
|
||||
# 1. Check semantic_metadata doesn't have keywords column
|
||||
cursor = conn.execute("PRAGMA table_info(semantic_metadata)")
|
||||
columns = {row[1] for row in cursor.fetchall()}
|
||||
assert "keywords" not in columns, "semantic_metadata.keywords should be removed"
|
||||
assert "summary" in columns, "semantic_metadata.summary should exist"
|
||||
assert "purpose" in columns, "semantic_metadata.purpose should exist"
|
||||
|
||||
# 2. Check symbols doesn't have token_count or symbol_type
|
||||
cursor = conn.execute("PRAGMA table_info(symbols)")
|
||||
columns = {row[1] for row in cursor.fetchall()}
|
||||
assert "token_count" not in columns, "symbols.token_count should be removed"
|
||||
assert "symbol_type" not in columns, "symbols.symbol_type should be removed"
|
||||
assert "kind" in columns, "symbols.kind should exist"
|
||||
|
||||
# 3. Check subdirs doesn't have direct_files
|
||||
cursor = conn.execute("PRAGMA table_info(subdirs)")
|
||||
columns = {row[1] for row in cursor.fetchall()}
|
||||
assert "direct_files" not in columns, "subdirs.direct_files should be removed"
|
||||
assert "files_count" in columns, "subdirs.files_count should exist"
|
||||
|
||||
# 4. Verify data integrity - data should be preserved
|
||||
semantic = store.get_semantic_metadata(file_id)
|
||||
assert semantic is not None, "Semantic metadata should be preserved"
|
||||
assert semantic["summary"] == "Test function"
|
||||
assert semantic["purpose"] == "Testing"
|
||||
# Keywords should now come from file_keywords table (empty after migration since we didn't populate it)
|
||||
assert isinstance(semantic["keywords"], list)
|
||||
|
||||
store.close()
|
||||
|
||||
def test_new_database_has_clean_schema(self):
|
||||
"""Test that new databases are created with clean schema (v5)."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
db_path = Path(tmpdir) / "_index.db"
|
||||
store = DirIndexStore(db_path)
|
||||
store.initialize()
|
||||
|
||||
conn = store._get_connection()
|
||||
|
||||
# Verify schema version is 5
|
||||
version_row = conn.execute("PRAGMA user_version").fetchone()
|
||||
assert version_row[0] == 5
|
||||
|
||||
# Check that new schema doesn't have deprecated columns
|
||||
cursor = conn.execute("PRAGMA table_info(semantic_metadata)")
|
||||
columns = {row[1] for row in cursor.fetchall()}
|
||||
assert "keywords" not in columns
|
||||
|
||||
cursor = conn.execute("PRAGMA table_info(symbols)")
|
||||
columns = {row[1] for row in cursor.fetchall()}
|
||||
assert "token_count" not in columns
|
||||
assert "symbol_type" not in columns
|
||||
|
||||
cursor = conn.execute("PRAGMA table_info(subdirs)")
|
||||
columns = {row[1] for row in cursor.fetchall()}
|
||||
assert "direct_files" not in columns
|
||||
|
||||
store.close()
|
||||
|
||||
def test_semantic_metadata_keywords_from_normalized_table(self):
|
||||
"""Test that keywords are read from file_keywords table, not JSON column."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
db_path = Path(tmpdir) / "_index.db"
|
||||
store = DirIndexStore(db_path)
|
||||
store.initialize()
|
||||
|
||||
# Add a file
|
||||
file_id = store.add_file(
|
||||
name="test.py",
|
||||
full_path="/test/test.py",
|
||||
content="def test(): pass",
|
||||
language="python",
|
||||
symbols=[]
|
||||
)
|
||||
|
||||
# Add semantic metadata with keywords
|
||||
store.add_semantic_metadata(
|
||||
file_id=file_id,
|
||||
summary="Test function",
|
||||
keywords=["test", "example", "function"],
|
||||
purpose="Testing",
|
||||
llm_tool="gemini"
|
||||
)
|
||||
|
||||
# Retrieve and verify keywords come from normalized table
|
||||
semantic = store.get_semantic_metadata(file_id)
|
||||
assert semantic is not None
|
||||
assert sorted(semantic["keywords"]) == ["example", "function", "test"]
|
||||
|
||||
# Verify keywords are in normalized tables
|
||||
conn = store._get_connection()
|
||||
keyword_count = conn.execute(
|
||||
"""SELECT COUNT(*) FROM file_keywords WHERE file_id = ?""",
|
||||
(file_id,)
|
||||
).fetchone()[0]
|
||||
assert keyword_count == 3
|
||||
|
||||
store.close()
|
||||
|
||||
def test_symbols_insert_without_deprecated_fields(self):
|
||||
"""Test that symbols can be inserted without token_count and symbol_type."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
db_path = Path(tmpdir) / "_index.db"
|
||||
store = DirIndexStore(db_path)
|
||||
store.initialize()
|
||||
|
||||
# Add file with symbols
|
||||
symbols = [
|
||||
Symbol(name="test_func", kind="function", range=(1, 5)),
|
||||
Symbol(name="TestClass", kind="class", range=(7, 20)),
|
||||
]
|
||||
|
||||
file_id = store.add_file(
|
||||
name="test.py",
|
||||
full_path="/test/test.py",
|
||||
content="def test_func(): pass\n\nclass TestClass:\n pass",
|
||||
language="python",
|
||||
symbols=symbols
|
||||
)
|
||||
|
||||
# Verify symbols were inserted
|
||||
conn = store._get_connection()
|
||||
symbol_rows = conn.execute(
|
||||
"SELECT name, kind, start_line, end_line FROM symbols WHERE file_id = ?",
|
||||
(file_id,)
|
||||
).fetchall()
|
||||
|
||||
assert len(symbol_rows) == 2
|
||||
assert symbol_rows[0]["name"] == "test_func"
|
||||
assert symbol_rows[0]["kind"] == "function"
|
||||
assert symbol_rows[1]["name"] == "TestClass"
|
||||
assert symbol_rows[1]["kind"] == "class"
|
||||
|
||||
store.close()
|
||||
|
||||
def test_subdir_operations_without_direct_files(self):
|
||||
"""Test that subdir operations work without direct_files field."""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
db_path = Path(tmpdir) / "_index.db"
|
||||
store = DirIndexStore(db_path)
|
||||
store.initialize()
|
||||
|
||||
# Register subdir (direct_files parameter is ignored)
|
||||
store.register_subdir(
|
||||
name="subdir",
|
||||
index_path="/test/subdir/_index.db",
|
||||
files_count=10,
|
||||
direct_files=5 # This should be ignored
|
||||
)
|
||||
|
||||
# Retrieve and verify
|
||||
subdir = store.get_subdir("subdir")
|
||||
assert subdir is not None
|
||||
assert subdir.name == "subdir"
|
||||
assert subdir.files_count == 10
|
||||
assert not hasattr(subdir, "direct_files") # Should not have this attribute
|
||||
|
||||
# Update stats (direct_files parameter is ignored)
|
||||
store.update_subdir_stats("subdir", files_count=15, direct_files=7)
|
||||
|
||||
# Verify update
|
||||
subdir = store.get_subdir("subdir")
|
||||
assert subdir.files_count == 15
|
||||
|
||||
store.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
Reference in New Issue
Block a user