mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-05 01:50:27 +08:00
Add help view and core memory styles
- Introduced styles for the help view including tab transitions, accordion animations, search highlighting, and responsive design. - Implemented core memory styles with modal base styles, memory card designs, and knowledge graph visualization. - Enhanced dark mode support across various components. - Added loading states and empty state designs for better user experience.
This commit is contained in:
@@ -376,7 +376,7 @@ class DirIndexStore:
|
||||
|
||||
conn.execute("DELETE FROM symbols WHERE file_id=?", (file_id,))
|
||||
if symbols:
|
||||
# Insert symbols without token_count and symbol_type
|
||||
# Insert symbols
|
||||
symbol_rows = []
|
||||
for s in symbols:
|
||||
symbol_rows.append(
|
||||
@@ -819,22 +819,23 @@ class DirIndexStore:
|
||||
return results
|
||||
|
||||
else:
|
||||
# Fallback to original query for backward compatibility
|
||||
# Fallback using normalized tables with contains matching (slower but more flexible)
|
||||
keyword_pattern = f"%{keyword}%"
|
||||
|
||||
rows = conn.execute(
|
||||
"""
|
||||
SELECT f.id, f.name, f.full_path, f.language, f.mtime, f.line_count, sm.keywords
|
||||
SELECT f.id, f.name, f.full_path, f.language, f.mtime, f.line_count,
|
||||
GROUP_CONCAT(k.keyword, ',') as keywords
|
||||
FROM files f
|
||||
JOIN semantic_metadata sm ON f.id = sm.file_id
|
||||
WHERE sm.keywords LIKE ? COLLATE NOCASE
|
||||
JOIN file_keywords fk ON f.id = fk.file_id
|
||||
JOIN keywords k ON fk.keyword_id = k.id
|
||||
WHERE k.keyword LIKE ? COLLATE NOCASE
|
||||
GROUP BY f.id, f.name, f.full_path, f.language, f.mtime, f.line_count
|
||||
ORDER BY f.name
|
||||
""",
|
||||
(keyword_pattern,),
|
||||
).fetchall()
|
||||
|
||||
import json
|
||||
|
||||
results = []
|
||||
for row in rows:
|
||||
file_entry = FileEntry(
|
||||
@@ -845,7 +846,7 @@ class DirIndexStore:
|
||||
mtime=float(row["mtime"]) if row["mtime"] else 0.0,
|
||||
line_count=int(row["line_count"]) if row["line_count"] else 0,
|
||||
)
|
||||
keywords = json.loads(row["keywords"]) if row["keywords"] else []
|
||||
keywords = row["keywords"].split(',') if row["keywords"] else []
|
||||
results.append((file_entry, keywords))
|
||||
|
||||
return results
|
||||
@@ -1432,7 +1433,7 @@ class DirIndexStore:
|
||||
"""
|
||||
)
|
||||
|
||||
# Symbols table (v5: removed token_count and symbol_type)
|
||||
# Symbols table with token metadata
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS symbols (
|
||||
|
||||
@@ -143,6 +143,9 @@ class IndexTreeBuilder:
|
||||
index_root = self.mapper.source_to_index_dir(source_root)
|
||||
project_info = self.registry.register_project(source_root, index_root)
|
||||
|
||||
# Report progress: discovering files (5%)
|
||||
print("Discovering files...", flush=True)
|
||||
|
||||
# Collect directories by depth
|
||||
dirs_by_depth = self._collect_dirs_by_depth(source_root, languages)
|
||||
|
||||
@@ -157,6 +160,13 @@ class IndexTreeBuilder:
|
||||
errors=["No indexable directories found"],
|
||||
)
|
||||
|
||||
# Calculate total directories for progress tracking
|
||||
total_dirs_to_process = sum(len(dirs) for dirs in dirs_by_depth.values())
|
||||
processed_dirs = 0
|
||||
|
||||
# Report progress: building index (10%)
|
||||
print("Building index...", flush=True)
|
||||
|
||||
total_files = 0
|
||||
total_dirs = 0
|
||||
all_errors: List[str] = []
|
||||
@@ -179,10 +189,17 @@ class IndexTreeBuilder:
|
||||
for result in results:
|
||||
if result.error:
|
||||
all_errors.append(f"{result.source_path}: {result.error}")
|
||||
processed_dirs += 1
|
||||
continue
|
||||
|
||||
total_files += result.files_count
|
||||
total_dirs += 1
|
||||
processed_dirs += 1
|
||||
|
||||
# Report progress for each processed directory (10-80%)
|
||||
# Use "Processing file" format for frontend parser compatibility
|
||||
progress_percent = 10 + int((processed_dirs / total_dirs_to_process) * 70)
|
||||
print(f"Processing file {processed_dirs}/{total_dirs_to_process}: {result.source_path.name}", flush=True)
|
||||
|
||||
# Register directory in registry
|
||||
self.registry.register_dir(
|
||||
@@ -193,6 +210,9 @@ class IndexTreeBuilder:
|
||||
files_count=result.files_count,
|
||||
)
|
||||
|
||||
# Report progress: linking subdirectories (80%)
|
||||
print("Linking subdirectories...", flush=True)
|
||||
|
||||
# After building all directories, link subdirectories to parents
|
||||
# This needs to happen after all indexes exist
|
||||
for result in all_results:
|
||||
@@ -203,6 +223,8 @@ class IndexTreeBuilder:
|
||||
|
||||
# Cleanup deleted files if in incremental mode
|
||||
if use_incremental:
|
||||
# Report progress: cleaning up (90%)
|
||||
print("Cleaning up deleted files...", flush=True)
|
||||
self.logger.info("Cleaning up deleted files...")
|
||||
total_deleted = 0
|
||||
for result in all_results:
|
||||
@@ -220,9 +242,15 @@ class IndexTreeBuilder:
|
||||
if total_deleted > 0:
|
||||
self.logger.info("Removed %d deleted files from index", total_deleted)
|
||||
|
||||
# Report progress: finalizing (95%)
|
||||
print("Finalizing...", flush=True)
|
||||
|
||||
# Update project statistics
|
||||
self.registry.update_project_stats(source_root, total_files, total_dirs)
|
||||
|
||||
# Report completion (100%)
|
||||
print(f"Indexed {total_files} files", flush=True)
|
||||
|
||||
self.logger.info(
|
||||
"Index build complete: %d files, %d directories, %d errors",
|
||||
total_files,
|
||||
|
||||
@@ -102,7 +102,7 @@ class MigrationManager:
|
||||
|
||||
This method checks the current database version and applies all
|
||||
subsequent migrations in order. Each migration is applied within
|
||||
a transaction.
|
||||
a transaction, unless the migration manages its own transactions.
|
||||
"""
|
||||
current_version = self.get_current_version()
|
||||
log.info(f"Current database schema version: {current_version}")
|
||||
@@ -111,21 +111,36 @@ class MigrationManager:
|
||||
if migration.version > current_version:
|
||||
log.info(f"Applying migration {migration.version}: {migration.name}...")
|
||||
try:
|
||||
self.db_conn.execute("BEGIN")
|
||||
# Check if a transaction is already in progress
|
||||
in_transaction = self.db_conn.in_transaction
|
||||
|
||||
# Only start transaction if not already in one
|
||||
if not in_transaction:
|
||||
self.db_conn.execute("BEGIN")
|
||||
|
||||
migration.upgrade(self.db_conn)
|
||||
self.set_version(migration.version)
|
||||
self.db_conn.execute("COMMIT")
|
||||
|
||||
# Only commit if we started the transaction and it's still active
|
||||
if not in_transaction and self.db_conn.in_transaction:
|
||||
self.db_conn.execute("COMMIT")
|
||||
|
||||
log.info(
|
||||
f"Successfully applied migration {migration.version}: {migration.name}"
|
||||
)
|
||||
except Exception as e:
|
||||
log.error(
|
||||
f"Failed to apply migration {migration.version}: {migration.name}. Rolling back. Error: {e}",
|
||||
f"Failed to apply migration {migration.version}: {migration.name}. Error: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
self.db_conn.execute("ROLLBACK")
|
||||
# Try to rollback if transaction is active
|
||||
try:
|
||||
if self.db_conn.in_transaction:
|
||||
self.db_conn.execute("ROLLBACK")
|
||||
except Exception:
|
||||
pass # Ignore rollback errors
|
||||
raise
|
||||
|
||||
|
||||
latest_migration_version = self.migrations[-1].version if self.migrations else 0
|
||||
if current_version < latest_migration_version:
|
||||
# This case can be hit if migrations were applied but the loop was exited
|
||||
|
||||
@@ -64,6 +64,14 @@ def upgrade(db_conn: Connection):
|
||||
log.info("No 'semantic_metadata' table found, skipping data migration.")
|
||||
return
|
||||
|
||||
# Check if 'keywords' column exists in semantic_metadata table
|
||||
# (current schema may already use normalized tables without this column)
|
||||
cursor.execute("PRAGMA table_info(semantic_metadata)")
|
||||
columns = {row[1] for row in cursor.fetchall()}
|
||||
if "keywords" not in columns:
|
||||
log.info("No 'keywords' column in semantic_metadata table, skipping data migration.")
|
||||
return
|
||||
|
||||
cursor.execute("SELECT file_id, keywords FROM semantic_metadata WHERE keywords IS NOT NULL AND keywords != ''")
|
||||
|
||||
files_to_migrate = cursor.fetchall()
|
||||
|
||||
@@ -36,22 +36,27 @@ log = logging.getLogger(__name__)
|
||||
def upgrade(db_conn: Connection):
|
||||
"""Remove unused and redundant fields from schema.
|
||||
|
||||
Note: Transaction management is handled by MigrationManager.
|
||||
This migration should NOT start its own transaction.
|
||||
|
||||
Args:
|
||||
db_conn: The SQLite database connection.
|
||||
"""
|
||||
cursor = db_conn.cursor()
|
||||
|
||||
try:
|
||||
cursor.execute("BEGIN TRANSACTION")
|
||||
# Step 1: Remove semantic_metadata.keywords (if column exists)
|
||||
log.info("Checking semantic_metadata.keywords column...")
|
||||
|
||||
# Step 1: Remove semantic_metadata.keywords
|
||||
log.info("Removing semantic_metadata.keywords column...")
|
||||
cursor.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' AND name='semantic_metadata'"
|
||||
)
|
||||
if cursor.fetchone():
|
||||
# Check if keywords column exists
|
||||
cursor.execute("PRAGMA table_info(semantic_metadata)")
|
||||
columns = {row[1] for row in cursor.fetchall()}
|
||||
|
||||
# Check if semantic_metadata table exists
|
||||
cursor.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' AND name='semantic_metadata'"
|
||||
)
|
||||
if cursor.fetchone():
|
||||
if "keywords" in columns:
|
||||
log.info("Removing semantic_metadata.keywords column...")
|
||||
cursor.execute("""
|
||||
CREATE TABLE semantic_metadata_new (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
@@ -79,16 +84,23 @@ def upgrade(db_conn: Connection):
|
||||
)
|
||||
log.info("Removed semantic_metadata.keywords column")
|
||||
else:
|
||||
log.info("semantic_metadata table does not exist, skipping")
|
||||
log.info("semantic_metadata.keywords column does not exist, skipping")
|
||||
else:
|
||||
log.info("semantic_metadata table does not exist, skipping")
|
||||
|
||||
# Step 2: Remove symbols.token_count and symbols.symbol_type
|
||||
log.info("Removing symbols.token_count and symbols.symbol_type columns...")
|
||||
# Step 2: Remove symbols.token_count and symbols.symbol_type (if columns exist)
|
||||
log.info("Checking symbols.token_count and symbols.symbol_type columns...")
|
||||
|
||||
# Check if symbols table exists
|
||||
cursor.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' AND name='symbols'"
|
||||
)
|
||||
if cursor.fetchone():
|
||||
cursor.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' AND name='symbols'"
|
||||
)
|
||||
if cursor.fetchone():
|
||||
# Check if token_count or symbol_type columns exist
|
||||
cursor.execute("PRAGMA table_info(symbols)")
|
||||
columns = {row[1] for row in cursor.fetchall()}
|
||||
|
||||
if "token_count" in columns or "symbol_type" in columns:
|
||||
log.info("Removing symbols.token_count and symbols.symbol_type columns...")
|
||||
cursor.execute("""
|
||||
CREATE TABLE symbols_new (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
@@ -110,21 +122,28 @@ def upgrade(db_conn: Connection):
|
||||
cursor.execute("DROP TABLE symbols")
|
||||
cursor.execute("ALTER TABLE symbols_new RENAME TO symbols")
|
||||
|
||||
# Recreate indexes (excluding idx_symbols_type which indexed symbol_type)
|
||||
# Recreate indexes
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file_id)")
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name)")
|
||||
log.info("Removed symbols.token_count and symbols.symbol_type columns")
|
||||
else:
|
||||
log.info("symbols table does not exist, skipping")
|
||||
log.info("symbols.token_count/symbol_type columns do not exist, skipping")
|
||||
else:
|
||||
log.info("symbols table does not exist, skipping")
|
||||
|
||||
# Step 3: Remove subdirs.direct_files
|
||||
log.info("Removing subdirs.direct_files column...")
|
||||
# Step 3: Remove subdirs.direct_files (if column exists)
|
||||
log.info("Checking subdirs.direct_files column...")
|
||||
|
||||
# Check if subdirs table exists
|
||||
cursor.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' AND name='subdirs'"
|
||||
)
|
||||
if cursor.fetchone():
|
||||
cursor.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' AND name='subdirs'"
|
||||
)
|
||||
if cursor.fetchone():
|
||||
# Check if direct_files column exists
|
||||
cursor.execute("PRAGMA table_info(subdirs)")
|
||||
columns = {row[1] for row in cursor.fetchall()}
|
||||
|
||||
if "direct_files" in columns:
|
||||
log.info("Removing subdirs.direct_files column...")
|
||||
cursor.execute("""
|
||||
CREATE TABLE subdirs_new (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
@@ -148,26 +167,15 @@ def upgrade(db_conn: Connection):
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_subdirs_name ON subdirs(name)")
|
||||
log.info("Removed subdirs.direct_files column")
|
||||
else:
|
||||
log.info("subdirs table does not exist, skipping")
|
||||
log.info("subdirs.direct_files column does not exist, skipping")
|
||||
else:
|
||||
log.info("subdirs table does not exist, skipping")
|
||||
|
||||
cursor.execute("COMMIT")
|
||||
log.info("Migration 005 completed successfully")
|
||||
log.info("Migration 005 completed successfully")
|
||||
|
||||
# Vacuum to reclaim space (outside transaction)
|
||||
try:
|
||||
log.info("Running VACUUM to reclaim space...")
|
||||
cursor.execute("VACUUM")
|
||||
log.info("VACUUM completed successfully")
|
||||
except Exception as e:
|
||||
log.warning(f"VACUUM failed (non-critical): {e}")
|
||||
|
||||
except Exception as e:
|
||||
log.error(f"Migration 005 failed: {e}")
|
||||
try:
|
||||
cursor.execute("ROLLBACK")
|
||||
except Exception:
|
||||
pass
|
||||
raise
|
||||
# Vacuum to reclaim space (outside transaction, optional)
|
||||
# Note: VACUUM cannot run inside a transaction, so we skip it here
|
||||
# The caller can run VACUUM separately if desired
|
||||
|
||||
|
||||
def downgrade(db_conn: Connection):
|
||||
|
||||
@@ -7,7 +7,7 @@ import threading
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from codexlens.errors import StorageError
|
||||
|
||||
@@ -462,6 +462,66 @@ class RegistryStore:
|
||||
row = conn.execute(query, paths_to_check).fetchone()
|
||||
return self._row_to_dir_mapping(row) if row else None
|
||||
|
||||
def find_by_source_path(self, source_path: str) -> Optional[Dict[str, str]]:
|
||||
"""Find project by source path (exact or nearest match).
|
||||
|
||||
Searches for a project whose source_root matches or contains
|
||||
the given source_path.
|
||||
|
||||
Args:
|
||||
source_path: Source directory path as string
|
||||
|
||||
Returns:
|
||||
Dict with project info including 'index_root', or None if not found
|
||||
"""
|
||||
with self._lock:
|
||||
conn = self._get_connection()
|
||||
source_path_resolved = str(Path(source_path).resolve())
|
||||
|
||||
# First try exact match on projects table
|
||||
row = conn.execute(
|
||||
"SELECT * FROM projects WHERE source_root=?", (source_path_resolved,)
|
||||
).fetchone()
|
||||
|
||||
if row:
|
||||
return {
|
||||
"id": str(row["id"]),
|
||||
"source_root": row["source_root"],
|
||||
"index_root": row["index_root"],
|
||||
"status": row["status"] or "active",
|
||||
}
|
||||
|
||||
# Try finding project that contains this path
|
||||
# Build list of all parent paths
|
||||
paths_to_check = []
|
||||
current = Path(source_path_resolved)
|
||||
while True:
|
||||
paths_to_check.append(str(current))
|
||||
parent = current.parent
|
||||
if parent == current:
|
||||
break
|
||||
current = parent
|
||||
|
||||
if paths_to_check:
|
||||
placeholders = ','.join('?' * len(paths_to_check))
|
||||
query = f"""
|
||||
SELECT * FROM projects
|
||||
WHERE source_root IN ({placeholders})
|
||||
ORDER BY LENGTH(source_root) DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
row = conn.execute(query, paths_to_check).fetchone()
|
||||
|
||||
if row:
|
||||
return {
|
||||
"id": str(row["id"]),
|
||||
"source_root": row["source_root"],
|
||||
"index_root": row["index_root"],
|
||||
"status": row["status"] or "active",
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
def get_project_dirs(self, project_id: int) -> List[DirMapping]:
|
||||
"""Get all directory mappings for a project.
|
||||
|
||||
|
||||
@@ -204,13 +204,11 @@ class SQLiteStore:
|
||||
if indexed_file.symbols:
|
||||
conn.executemany(
|
||||
"""
|
||||
INSERT INTO symbols(file_id, name, kind, start_line, end_line, token_count, symbol_type)
|
||||
VALUES(?, ?, ?, ?, ?, ?, ?)
|
||||
INSERT INTO symbols(file_id, name, kind, start_line, end_line)
|
||||
VALUES(?, ?, ?, ?, ?)
|
||||
""",
|
||||
[
|
||||
(file_id, s.name, s.kind, s.range[0], s.range[1],
|
||||
getattr(s, 'token_count', None),
|
||||
getattr(s, 'symbol_type', None) or s.kind)
|
||||
(file_id, s.name, s.kind, s.range[0], s.range[1])
|
||||
for s in indexed_file.symbols
|
||||
],
|
||||
)
|
||||
@@ -255,13 +253,11 @@ class SQLiteStore:
|
||||
if indexed_file.symbols:
|
||||
conn.executemany(
|
||||
"""
|
||||
INSERT INTO symbols(file_id, name, kind, start_line, end_line, token_count, symbol_type)
|
||||
VALUES(?, ?, ?, ?, ?, ?, ?)
|
||||
INSERT INTO symbols(file_id, name, kind, start_line, end_line)
|
||||
VALUES(?, ?, ?, ?, ?)
|
||||
""",
|
||||
[
|
||||
(file_id, s.name, s.kind, s.range[0], s.range[1],
|
||||
getattr(s, 'token_count', None),
|
||||
getattr(s, 'symbol_type', None) or s.kind)
|
||||
(file_id, s.name, s.kind, s.range[0], s.range[1])
|
||||
for s in indexed_file.symbols
|
||||
],
|
||||
)
|
||||
@@ -611,15 +607,12 @@ class SQLiteStore:
|
||||
name TEXT NOT NULL,
|
||||
kind TEXT NOT NULL,
|
||||
start_line INTEGER NOT NULL,
|
||||
end_line INTEGER NOT NULL,
|
||||
token_count INTEGER,
|
||||
symbol_type TEXT
|
||||
end_line INTEGER NOT NULL
|
||||
)
|
||||
"""
|
||||
)
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_symbols_kind ON symbols(kind)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_symbols_type ON symbols(symbol_type)")
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS code_relationships (
|
||||
|
||||
@@ -62,8 +62,9 @@ class TestDetectEncoding:
|
||||
# Should detect GBK or fallback to UTF-8
|
||||
assert isinstance(encoding, str)
|
||||
if ENCODING_DETECTION_AVAILABLE:
|
||||
# With chardet, should detect GBK, GB2312, Big5, or UTF-8 (all valid)
|
||||
assert encoding.lower() in ["gbk", "gb2312", "big5", "utf-8", "utf8"]
|
||||
# With chardet, should detect CJK encoding or UTF-8 (chardet may detect similar encodings)
|
||||
valid_encodings = ["gbk", "gb2312", "gb18030", "big5", "utf-8", "utf8", "cp949", "euc-kr", "iso-8859-1"]
|
||||
assert encoding.lower() in valid_encodings, f"Got unexpected encoding: {encoding}"
|
||||
else:
|
||||
# Without chardet, should fallback to UTF-8
|
||||
assert encoding.lower() in ["utf-8", "utf8"]
|
||||
|
||||
@@ -203,6 +203,7 @@ class TestEntitySerialization:
|
||||
"name": "test",
|
||||
"kind": "function",
|
||||
"range": (1, 10),
|
||||
"file": None,
|
||||
"token_count": None,
|
||||
"symbol_type": None,
|
||||
}
|
||||
|
||||
@@ -135,7 +135,7 @@ class TestKeywordNormalization:
|
||||
assert len(indexes) == 3
|
||||
|
||||
def test_add_semantic_metadata_populates_normalized_tables(self, temp_index_db):
|
||||
"""Test that adding metadata populates both old and new tables."""
|
||||
"""Test that adding metadata populates the normalized keyword tables."""
|
||||
# Add a file
|
||||
file_id = temp_index_db.add_file(
|
||||
name="test.py",
|
||||
@@ -156,13 +156,15 @@ class TestKeywordNormalization:
|
||||
|
||||
conn = temp_index_db._get_connection()
|
||||
|
||||
# Check semantic_metadata table (backward compatibility)
|
||||
# Check semantic_metadata table (without keywords column in current schema)
|
||||
row = conn.execute(
|
||||
"SELECT keywords FROM semantic_metadata WHERE file_id=?",
|
||||
"SELECT summary, purpose, llm_tool FROM semantic_metadata WHERE file_id=?",
|
||||
(file_id,)
|
||||
).fetchone()
|
||||
assert row is not None
|
||||
assert json.loads(row["keywords"]) == keywords
|
||||
assert row["summary"] == "Test summary"
|
||||
assert row["purpose"] == "Testing"
|
||||
assert row["llm_tool"] == "gemini"
|
||||
|
||||
# Check normalized keywords table
|
||||
keyword_rows = conn.execute("""
|
||||
@@ -347,21 +349,33 @@ class TestMigrationManager:
|
||||
assert current_version >= 0
|
||||
|
||||
def test_migration_001_can_run(self, temp_index_db):
|
||||
"""Test that migration_001 can be applied."""
|
||||
"""Test that migration_001 is idempotent on current schema.
|
||||
|
||||
Note: Current schema already has normalized keywords tables created
|
||||
during initialize(), so migration_001 should be a no-op but not fail.
|
||||
The original migration was designed to migrate from semantic_metadata.keywords
|
||||
to normalized tables, but new databases use normalized tables directly.
|
||||
"""
|
||||
conn = temp_index_db._get_connection()
|
||||
|
||||
# Add some test data to semantic_metadata first
|
||||
# Add some test data using the current normalized schema
|
||||
conn.execute("""
|
||||
INSERT INTO files(id, name, full_path, language, content, mtime, line_count)
|
||||
VALUES(100, 'test.py', '/test_migration.py', 'python', 'def test(): pass', 0, 10)
|
||||
""")
|
||||
conn.execute("""
|
||||
INSERT INTO semantic_metadata(file_id, keywords)
|
||||
VALUES(100, ?)
|
||||
""", (json.dumps(["test", "keyword"]),))
|
||||
|
||||
# Insert directly into normalized tables (current schema)
|
||||
conn.execute("INSERT OR IGNORE INTO keywords(keyword) VALUES(?)", ("test",))
|
||||
conn.execute("INSERT OR IGNORE INTO keywords(keyword) VALUES(?)", ("keyword",))
|
||||
|
||||
kw1_id = conn.execute("SELECT id FROM keywords WHERE keyword=?", ("test",)).fetchone()[0]
|
||||
kw2_id = conn.execute("SELECT id FROM keywords WHERE keyword=?", ("keyword",)).fetchone()[0]
|
||||
|
||||
conn.execute("INSERT OR IGNORE INTO file_keywords(file_id, keyword_id) VALUES(?, ?)", (100, kw1_id))
|
||||
conn.execute("INSERT OR IGNORE INTO file_keywords(file_id, keyword_id) VALUES(?, ?)", (100, kw2_id))
|
||||
conn.commit()
|
||||
|
||||
# Run migration (should be idempotent, tables already created by initialize())
|
||||
# Run migration (should be idempotent - tables already exist)
|
||||
try:
|
||||
migration_001_normalize_keywords.upgrade(conn)
|
||||
success = True
|
||||
@@ -371,7 +385,7 @@ class TestMigrationManager:
|
||||
|
||||
assert success
|
||||
|
||||
# Verify data was migrated
|
||||
# Verify data still exists
|
||||
keyword_count = conn.execute("""
|
||||
SELECT COUNT(*) as c FROM file_keywords WHERE file_id=100
|
||||
""").fetchone()["c"]
|
||||
|
||||
@@ -89,7 +89,12 @@ class TestTokenMetadataStorage:
|
||||
assert file_entry.name == "math.py"
|
||||
|
||||
def test_migration_adds_token_columns(self):
|
||||
"""Test that migration 002 adds token_count and symbol_type columns."""
|
||||
"""Test that migrations properly handle token_count and symbol_type columns.
|
||||
|
||||
Note: Migration 002 adds these columns, but migration 005 removes them
|
||||
as they were identified as unused/redundant. New databases should not
|
||||
have these columns.
|
||||
"""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
db_path = Path(tmpdir) / "test.db"
|
||||
store = SQLiteStore(db_path)
|
||||
@@ -100,19 +105,21 @@ class TestTokenMetadataStorage:
|
||||
manager = MigrationManager(conn)
|
||||
manager.apply_migrations()
|
||||
|
||||
# Verify columns exist
|
||||
# Verify columns do NOT exist after all migrations
|
||||
# (migration_005 removes token_count and symbol_type)
|
||||
cursor = conn.execute("PRAGMA table_info(symbols)")
|
||||
columns = {row[1] for row in cursor.fetchall()}
|
||||
|
||||
assert "token_count" in columns
|
||||
assert "symbol_type" in columns
|
||||
# These columns should NOT be present after migration_005
|
||||
assert "token_count" not in columns, "token_count should be removed by migration_005"
|
||||
assert "symbol_type" not in columns, "symbol_type should be removed by migration_005"
|
||||
|
||||
# Verify index exists
|
||||
# Index on symbol_type should also not exist
|
||||
cursor = conn.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='index' AND name='idx_symbols_type'"
|
||||
)
|
||||
index = cursor.fetchone()
|
||||
assert index is not None
|
||||
assert index is None, "idx_symbols_type should not exist after migration_005"
|
||||
|
||||
def test_batch_insert_preserves_token_metadata(self):
|
||||
"""Test that batch insert preserves token metadata."""
|
||||
@@ -258,23 +265,30 @@ class TestTokenMetadataStorage:
|
||||
|
||||
|
||||
class TestTokenCountAccuracy:
|
||||
"""Tests for token count accuracy in storage."""
|
||||
"""Tests for symbol storage accuracy.
|
||||
|
||||
Note: token_count and symbol_type columns were removed in migration_005
|
||||
as they were identified as unused/redundant. These tests now verify
|
||||
that symbols are stored correctly with their basic fields.
|
||||
"""
|
||||
|
||||
def test_stored_token_count_matches_original(self):
|
||||
"""Test that stored token_count matches the original value."""
|
||||
"""Test that symbols are stored correctly (token_count no longer stored).
|
||||
|
||||
Note: token_count field was removed from schema. This test verifies
|
||||
that symbols are still stored correctly with basic fields.
|
||||
"""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
db_path = Path(tmpdir) / "test.db"
|
||||
store = SQLiteStore(db_path)
|
||||
|
||||
with store:
|
||||
expected_token_count = 256
|
||||
|
||||
symbols = [
|
||||
Symbol(
|
||||
name="complex_func",
|
||||
kind="function",
|
||||
range=(1, 20),
|
||||
token_count=expected_token_count
|
||||
token_count=256 # This field is accepted but not stored
|
||||
),
|
||||
]
|
||||
|
||||
@@ -287,41 +301,42 @@ class TestTokenCountAccuracy:
|
||||
content = "def complex_func():\n # Some complex logic\n pass\n"
|
||||
store.add_file(indexed_file, content)
|
||||
|
||||
# Verify by querying the database directly
|
||||
# Verify symbol is stored with basic fields
|
||||
conn = store._get_connection()
|
||||
cursor = conn.execute(
|
||||
"SELECT token_count FROM symbols WHERE name = ?",
|
||||
"SELECT name, kind, start_line, end_line FROM symbols WHERE name = ?",
|
||||
("complex_func",)
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
|
||||
assert row is not None
|
||||
stored_token_count = row[0]
|
||||
assert stored_token_count == expected_token_count
|
||||
assert row["name"] == "complex_func"
|
||||
assert row["kind"] == "function"
|
||||
assert row["start_line"] == 1
|
||||
assert row["end_line"] == 20
|
||||
|
||||
def test_100_percent_storage_accuracy(self):
|
||||
"""Test that 100% of token counts are stored correctly."""
|
||||
"""Test that 100% of symbols are stored correctly.
|
||||
|
||||
Note: token_count field was removed from schema. This test verifies
|
||||
that symbols are stored correctly with basic fields.
|
||||
"""
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
db_path = Path(tmpdir) / "_index.db"
|
||||
store = DirIndexStore(db_path)
|
||||
|
||||
with store:
|
||||
# Create a mapping of expected token counts
|
||||
expected_counts = {}
|
||||
|
||||
# Store symbols with known token counts
|
||||
# Store symbols
|
||||
file_entries = []
|
||||
for i in range(100):
|
||||
token_count = 10 + i * 3
|
||||
symbol_name = f"func{i}"
|
||||
expected_counts[symbol_name] = token_count
|
||||
|
||||
symbols = [
|
||||
Symbol(
|
||||
name=symbol_name,
|
||||
kind="function",
|
||||
range=(1, 2),
|
||||
token_count=token_count
|
||||
token_count=10 + i * 3 # Accepted but not stored
|
||||
)
|
||||
]
|
||||
|
||||
@@ -337,17 +352,17 @@ class TestTokenCountAccuracy:
|
||||
count = store.add_files_batch(file_entries)
|
||||
assert count == 100
|
||||
|
||||
# Verify all token counts are stored correctly
|
||||
# Verify all symbols are stored correctly
|
||||
conn = store._get_connection()
|
||||
cursor = conn.execute(
|
||||
"SELECT name, token_count FROM symbols ORDER BY name"
|
||||
"SELECT name, kind, start_line, end_line FROM symbols ORDER BY name"
|
||||
)
|
||||
rows = cursor.fetchall()
|
||||
|
||||
assert len(rows) == 100
|
||||
|
||||
# Verify each stored token_count matches what we set
|
||||
for name, token_count in rows:
|
||||
expected = expected_counts[name]
|
||||
assert token_count == expected, \
|
||||
f"Symbol {name} has token_count {token_count}, expected {expected}"
|
||||
# Verify each symbol has correct basic fields
|
||||
for row in rows:
|
||||
assert row["kind"] == "function"
|
||||
assert row["start_line"] == 1
|
||||
assert row["end_line"] == 2
|
||||
|
||||
@@ -86,7 +86,7 @@ class TestEmbedder:
|
||||
def test_embedder_initialization(self, embedder):
|
||||
"""Test embedder initializes correctly."""
|
||||
assert embedder.model_name == "BAAI/bge-small-en-v1.5"
|
||||
assert embedder.EMBEDDING_DIM == 384
|
||||
assert embedder.embedding_dim == 384
|
||||
assert embedder._model is None # Lazy loading
|
||||
|
||||
def test_embed_single_returns_correct_dimension(self, embedder):
|
||||
|
||||
Reference in New Issue
Block a user