mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-05 01:50:27 +08:00
- Implemented unit tests for the Tokenizer class, covering various text inputs, edge cases, and fallback mechanisms. - Created performance benchmarks comparing tiktoken and pure Python implementations for token counting. - Developed extensive tests for TreeSitterSymbolParser across Python, JavaScript, and TypeScript, ensuring accurate symbol extraction and parsing. - Added configuration documentation for MCP integration and custom prompts, enhancing usability and flexibility. - Introduced a refactor script for GraphAnalyzer to streamline future improvements.
354 lines
13 KiB
Python
354 lines
13 KiB
Python
"""Integration tests for token metadata storage and retrieval."""
|
|
|
|
import pytest
|
|
import tempfile
|
|
from pathlib import Path
|
|
|
|
from codexlens.entities import Symbol, IndexedFile
|
|
from codexlens.storage.sqlite_store import SQLiteStore
|
|
from codexlens.storage.dir_index import DirIndexStore
|
|
from codexlens.storage.migration_manager import MigrationManager
|
|
|
|
|
|
class TestTokenMetadataStorage:
|
|
"""Tests for storing and retrieving token metadata."""
|
|
|
|
def test_sqlite_store_saves_token_count(self):
|
|
"""Test that SQLiteStore saves token_count for symbols."""
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
db_path = Path(tmpdir) / "test.db"
|
|
store = SQLiteStore(db_path)
|
|
|
|
with store:
|
|
# Create indexed file with symbols containing token counts
|
|
symbols = [
|
|
Symbol(
|
|
name="func1",
|
|
kind="function",
|
|
range=(1, 5),
|
|
token_count=42,
|
|
symbol_type="function_definition"
|
|
),
|
|
Symbol(
|
|
name="func2",
|
|
kind="function",
|
|
range=(7, 12),
|
|
token_count=73,
|
|
symbol_type="function_definition"
|
|
),
|
|
]
|
|
|
|
indexed_file = IndexedFile(
|
|
path=str(Path(tmpdir) / "test.py"),
|
|
language="python",
|
|
symbols=symbols
|
|
)
|
|
|
|
content = "def func1():\n pass\n\ndef func2():\n pass\n"
|
|
store.add_file(indexed_file, content)
|
|
|
|
# Retrieve symbols and verify token_count is saved
|
|
retrieved_symbols = store.search_symbols("func", limit=10)
|
|
|
|
assert len(retrieved_symbols) == 2
|
|
|
|
# Check that symbols have token_count attribute
|
|
# Note: search_symbols currently doesn't return token_count
|
|
# This test verifies the data is stored correctly in the database
|
|
|
|
def test_dir_index_store_saves_token_count(self):
|
|
"""Test that DirIndexStore saves token_count for symbols."""
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
db_path = Path(tmpdir) / "_index.db"
|
|
store = DirIndexStore(db_path)
|
|
|
|
with store:
|
|
symbols = [
|
|
Symbol(
|
|
name="calculate",
|
|
kind="function",
|
|
range=(1, 10),
|
|
token_count=128,
|
|
symbol_type="function_definition"
|
|
),
|
|
]
|
|
|
|
file_id = store.add_file(
|
|
name="math.py",
|
|
full_path=Path(tmpdir) / "math.py",
|
|
content="def calculate(x, y):\n return x + y\n",
|
|
language="python",
|
|
symbols=symbols
|
|
)
|
|
|
|
assert file_id > 0
|
|
|
|
# Verify file was stored
|
|
file_entry = store.get_file(Path(tmpdir) / "math.py")
|
|
assert file_entry is not None
|
|
assert file_entry.name == "math.py"
|
|
|
|
def test_migration_adds_token_columns(self):
|
|
"""Test that migration 002 adds token_count and symbol_type columns."""
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
db_path = Path(tmpdir) / "test.db"
|
|
store = SQLiteStore(db_path)
|
|
|
|
with store:
|
|
# Apply migrations
|
|
conn = store._get_connection()
|
|
manager = MigrationManager(conn)
|
|
manager.apply_migrations()
|
|
|
|
# Verify columns exist
|
|
cursor = conn.execute("PRAGMA table_info(symbols)")
|
|
columns = {row[1] for row in cursor.fetchall()}
|
|
|
|
assert "token_count" in columns
|
|
assert "symbol_type" in columns
|
|
|
|
# Verify index exists
|
|
cursor = conn.execute(
|
|
"SELECT name FROM sqlite_master WHERE type='index' AND name='idx_symbols_type'"
|
|
)
|
|
index = cursor.fetchone()
|
|
assert index is not None
|
|
|
|
def test_batch_insert_preserves_token_metadata(self):
|
|
"""Test that batch insert preserves token metadata."""
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
db_path = Path(tmpdir) / "test.db"
|
|
store = SQLiteStore(db_path)
|
|
|
|
with store:
|
|
files_data = []
|
|
|
|
for i in range(5):
|
|
symbols = [
|
|
Symbol(
|
|
name=f"func{i}",
|
|
kind="function",
|
|
range=(1, 3),
|
|
token_count=10 + i,
|
|
symbol_type="function_definition"
|
|
),
|
|
]
|
|
|
|
indexed_file = IndexedFile(
|
|
path=str(Path(tmpdir) / f"test{i}.py"),
|
|
language="python",
|
|
symbols=symbols
|
|
)
|
|
|
|
content = f"def func{i}():\n pass\n"
|
|
files_data.append((indexed_file, content))
|
|
|
|
# Batch insert
|
|
store.add_files(files_data)
|
|
|
|
# Verify all files were stored
|
|
stats = store.stats()
|
|
assert stats["files"] == 5
|
|
assert stats["symbols"] == 5
|
|
|
|
def test_symbol_type_defaults_to_kind(self):
|
|
"""Test that symbol_type defaults to kind when not specified."""
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
db_path = Path(tmpdir) / "_index.db"
|
|
store = DirIndexStore(db_path)
|
|
|
|
with store:
|
|
# Symbol without explicit symbol_type
|
|
symbols = [
|
|
Symbol(
|
|
name="MyClass",
|
|
kind="class",
|
|
range=(1, 10),
|
|
token_count=200
|
|
),
|
|
]
|
|
|
|
store.add_file(
|
|
name="module.py",
|
|
full_path=Path(tmpdir) / "module.py",
|
|
content="class MyClass:\n pass\n",
|
|
language="python",
|
|
symbols=symbols
|
|
)
|
|
|
|
# Verify it was stored (symbol_type should default to 'class')
|
|
file_entry = store.get_file(Path(tmpdir) / "module.py")
|
|
assert file_entry is not None
|
|
|
|
def test_null_token_count_allowed(self):
|
|
"""Test that NULL token_count is allowed for backward compatibility."""
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
db_path = Path(tmpdir) / "test.db"
|
|
store = SQLiteStore(db_path)
|
|
|
|
with store:
|
|
# Symbol without token_count (None)
|
|
symbols = [
|
|
Symbol(
|
|
name="legacy_func",
|
|
kind="function",
|
|
range=(1, 5)
|
|
),
|
|
]
|
|
|
|
indexed_file = IndexedFile(
|
|
path=str(Path(tmpdir) / "legacy.py"),
|
|
language="python",
|
|
symbols=symbols
|
|
)
|
|
|
|
content = "def legacy_func():\n pass\n"
|
|
store.add_file(indexed_file, content)
|
|
|
|
# Should not raise an error
|
|
stats = store.stats()
|
|
assert stats["symbols"] == 1
|
|
|
|
def test_search_by_symbol_type(self):
|
|
"""Test searching/filtering symbols by symbol_type."""
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
db_path = Path(tmpdir) / "_index.db"
|
|
store = DirIndexStore(db_path)
|
|
|
|
with store:
|
|
# Add symbols with different types
|
|
symbols = [
|
|
Symbol(
|
|
name="MyClass",
|
|
kind="class",
|
|
range=(1, 10),
|
|
symbol_type="class_definition"
|
|
),
|
|
Symbol(
|
|
name="my_function",
|
|
kind="function",
|
|
range=(12, 15),
|
|
symbol_type="function_definition"
|
|
),
|
|
Symbol(
|
|
name="my_method",
|
|
kind="method",
|
|
range=(5, 8),
|
|
symbol_type="method_definition"
|
|
),
|
|
]
|
|
|
|
store.add_file(
|
|
name="code.py",
|
|
full_path=Path(tmpdir) / "code.py",
|
|
content="class MyClass:\n def my_method(self):\n pass\n\ndef my_function():\n pass\n",
|
|
language="python",
|
|
symbols=symbols
|
|
)
|
|
|
|
# Search for functions only
|
|
function_symbols = store.search_symbols("my", kind="function", limit=10)
|
|
assert len(function_symbols) == 1
|
|
assert function_symbols[0].name == "my_function"
|
|
|
|
# Search for methods only
|
|
method_symbols = store.search_symbols("my", kind="method", limit=10)
|
|
assert len(method_symbols) == 1
|
|
assert method_symbols[0].name == "my_method"
|
|
|
|
|
|
class TestTokenCountAccuracy:
|
|
"""Tests for token count accuracy in storage."""
|
|
|
|
def test_stored_token_count_matches_original(self):
|
|
"""Test that stored token_count matches the original value."""
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
db_path = Path(tmpdir) / "test.db"
|
|
store = SQLiteStore(db_path)
|
|
|
|
with store:
|
|
expected_token_count = 256
|
|
|
|
symbols = [
|
|
Symbol(
|
|
name="complex_func",
|
|
kind="function",
|
|
range=(1, 20),
|
|
token_count=expected_token_count
|
|
),
|
|
]
|
|
|
|
indexed_file = IndexedFile(
|
|
path=str(Path(tmpdir) / "test.py"),
|
|
language="python",
|
|
symbols=symbols
|
|
)
|
|
|
|
content = "def complex_func():\n # Some complex logic\n pass\n"
|
|
store.add_file(indexed_file, content)
|
|
|
|
# Verify by querying the database directly
|
|
conn = store._get_connection()
|
|
cursor = conn.execute(
|
|
"SELECT token_count FROM symbols WHERE name = ?",
|
|
("complex_func",)
|
|
)
|
|
row = cursor.fetchone()
|
|
|
|
assert row is not None
|
|
stored_token_count = row[0]
|
|
assert stored_token_count == expected_token_count
|
|
|
|
def test_100_percent_storage_accuracy(self):
|
|
"""Test that 100% of token counts are stored correctly."""
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
db_path = Path(tmpdir) / "_index.db"
|
|
store = DirIndexStore(db_path)
|
|
|
|
with store:
|
|
# Create a mapping of expected token counts
|
|
expected_counts = {}
|
|
|
|
# Store symbols with known token counts
|
|
file_entries = []
|
|
for i in range(100):
|
|
token_count = 10 + i * 3
|
|
symbol_name = f"func{i}"
|
|
expected_counts[symbol_name] = token_count
|
|
|
|
symbols = [
|
|
Symbol(
|
|
name=symbol_name,
|
|
kind="function",
|
|
range=(1, 2),
|
|
token_count=token_count
|
|
)
|
|
]
|
|
|
|
file_path = Path(tmpdir) / f"file{i}.py"
|
|
file_entries.append((
|
|
f"file{i}.py",
|
|
file_path,
|
|
f"def {symbol_name}():\n pass\n",
|
|
"python",
|
|
symbols
|
|
))
|
|
|
|
count = store.add_files_batch(file_entries)
|
|
assert count == 100
|
|
|
|
# Verify all token counts are stored correctly
|
|
conn = store._get_connection()
|
|
cursor = conn.execute(
|
|
"SELECT name, token_count FROM symbols ORDER BY name"
|
|
)
|
|
rows = cursor.fetchall()
|
|
|
|
assert len(rows) == 100
|
|
|
|
# Verify each stored token_count matches what we set
|
|
for name, token_count in rows:
|
|
expected = expected_counts[name]
|
|
assert token_count == expected, \
|
|
f"Symbol {name} has token_count {token_count}, expected {expected}"
|