mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-10 02:24:35 +08:00
Add comprehensive tests for tokenizer, performance benchmarks, and TreeSitter parser functionality
- Implemented unit tests for the Tokenizer class, covering various text inputs, edge cases, and fallback mechanisms. - Created performance benchmarks comparing tiktoken and pure Python implementations for token counting. - Developed extensive tests for TreeSitterSymbolParser across Python, JavaScript, and TypeScript, ensuring accurate symbol extraction and parsing. - Added configuration documentation for MCP integration and custom prompts, enhancing usability and flexibility. - Introduced a refactor script for GraphAnalyzer to streamline future improvements.
This commit is contained in:
@@ -149,15 +149,21 @@ class DirIndexStore:
|
||||
# Replace symbols
|
||||
conn.execute("DELETE FROM symbols WHERE file_id=?", (file_id,))
|
||||
if symbols:
|
||||
# Extract token_count and symbol_type from symbol metadata if available
|
||||
symbol_rows = []
|
||||
for s in symbols:
|
||||
token_count = getattr(s, 'token_count', None)
|
||||
symbol_type = getattr(s, 'symbol_type', None) or s.kind
|
||||
symbol_rows.append(
|
||||
(file_id, s.name, s.kind, s.range[0], s.range[1], token_count, symbol_type)
|
||||
)
|
||||
|
||||
conn.executemany(
|
||||
"""
|
||||
INSERT INTO symbols(file_id, name, kind, start_line, end_line)
|
||||
VALUES(?, ?, ?, ?, ?)
|
||||
INSERT INTO symbols(file_id, name, kind, start_line, end_line, token_count, symbol_type)
|
||||
VALUES(?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
[
|
||||
(file_id, s.name, s.kind, s.range[0], s.range[1])
|
||||
for s in symbols
|
||||
],
|
||||
symbol_rows,
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
@@ -216,15 +222,21 @@ class DirIndexStore:
|
||||
|
||||
conn.execute("DELETE FROM symbols WHERE file_id=?", (file_id,))
|
||||
if symbols:
|
||||
# Extract token_count and symbol_type from symbol metadata if available
|
||||
symbol_rows = []
|
||||
for s in symbols:
|
||||
token_count = getattr(s, 'token_count', None)
|
||||
symbol_type = getattr(s, 'symbol_type', None) or s.kind
|
||||
symbol_rows.append(
|
||||
(file_id, s.name, s.kind, s.range[0], s.range[1], token_count, symbol_type)
|
||||
)
|
||||
|
||||
conn.executemany(
|
||||
"""
|
||||
INSERT INTO symbols(file_id, name, kind, start_line, end_line)
|
||||
VALUES(?, ?, ?, ?, ?)
|
||||
INSERT INTO symbols(file_id, name, kind, start_line, end_line, token_count, symbol_type)
|
||||
VALUES(?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
[
|
||||
(file_id, s.name, s.kind, s.range[0], s.range[1])
|
||||
for s in symbols
|
||||
],
|
||||
symbol_rows,
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
@@ -1021,7 +1033,9 @@ class DirIndexStore:
|
||||
name TEXT NOT NULL,
|
||||
kind TEXT NOT NULL,
|
||||
start_line INTEGER,
|
||||
end_line INTEGER
|
||||
end_line INTEGER,
|
||||
token_count INTEGER,
|
||||
symbol_type TEXT
|
||||
)
|
||||
"""
|
||||
)
|
||||
@@ -1083,6 +1097,7 @@ class DirIndexStore:
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_subdirs_name ON subdirs(name)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file_id)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_symbols_type ON symbols(symbol_type)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_semantic_file ON semantic_metadata(file_id)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_keywords_keyword ON keywords(keyword)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_file_keywords_file_id ON file_keywords(file_id)")
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
"""
|
||||
Migration 002: Add token_count and symbol_type to symbols table.
|
||||
|
||||
This migration adds token counting metadata to symbols for accurate chunk
|
||||
splitting and performance optimization. It also adds symbol_type for better
|
||||
filtering in searches.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from sqlite3 import Connection
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def upgrade(db_conn: Connection):
|
||||
"""
|
||||
Applies the migration to add token metadata to symbols.
|
||||
|
||||
- Adds token_count column to symbols table
|
||||
- Adds symbol_type column to symbols table (for future use)
|
||||
- Creates index on symbol_type for efficient filtering
|
||||
- Backfills existing symbols with NULL token_count (to be calculated lazily)
|
||||
|
||||
Args:
|
||||
db_conn: The SQLite database connection.
|
||||
"""
|
||||
cursor = db_conn.cursor()
|
||||
|
||||
log.info("Adding token_count column to symbols table...")
|
||||
try:
|
||||
cursor.execute("ALTER TABLE symbols ADD COLUMN token_count INTEGER")
|
||||
log.info("Successfully added token_count column.")
|
||||
except Exception as e:
|
||||
# Column might already exist
|
||||
log.warning(f"Could not add token_count column (might already exist): {e}")
|
||||
|
||||
log.info("Adding symbol_type column to symbols table...")
|
||||
try:
|
||||
cursor.execute("ALTER TABLE symbols ADD COLUMN symbol_type TEXT")
|
||||
log.info("Successfully added symbol_type column.")
|
||||
except Exception as e:
|
||||
# Column might already exist
|
||||
log.warning(f"Could not add symbol_type column (might already exist): {e}")
|
||||
|
||||
log.info("Creating index on symbol_type for efficient filtering...")
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_symbols_type ON symbols(symbol_type)")
|
||||
|
||||
log.info("Migration 002 completed successfully.")
|
||||
@@ -0,0 +1,57 @@
|
||||
"""
|
||||
Migration 003: Add code relationships storage.
|
||||
|
||||
This migration introduces the `code_relationships` table to store semantic
|
||||
relationships between code symbols (function calls, inheritance, imports).
|
||||
This enables graph-based code navigation and dependency analysis.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from sqlite3 import Connection
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def upgrade(db_conn: Connection):
|
||||
"""
|
||||
Applies the migration to add code relationships table.
|
||||
|
||||
- Creates `code_relationships` table with foreign key to symbols
|
||||
- Creates indexes for efficient relationship queries
|
||||
- Supports lazy expansion with target_symbol being qualified names
|
||||
|
||||
Args:
|
||||
db_conn: The SQLite database connection.
|
||||
"""
|
||||
cursor = db_conn.cursor()
|
||||
|
||||
log.info("Creating 'code_relationships' table...")
|
||||
cursor.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS code_relationships (
|
||||
id INTEGER PRIMARY KEY,
|
||||
source_symbol_id INTEGER NOT NULL,
|
||||
target_qualified_name TEXT NOT NULL,
|
||||
relationship_type TEXT NOT NULL,
|
||||
source_line INTEGER NOT NULL,
|
||||
target_file TEXT,
|
||||
FOREIGN KEY (source_symbol_id) REFERENCES symbols (id) ON DELETE CASCADE
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
log.info("Creating indexes for code_relationships...")
|
||||
cursor.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_relationships_source ON code_relationships (source_symbol_id)"
|
||||
)
|
||||
cursor.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_relationships_target ON code_relationships (target_qualified_name)"
|
||||
)
|
||||
cursor.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_relationships_type ON code_relationships (relationship_type)"
|
||||
)
|
||||
cursor.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_relationships_source_line ON code_relationships (source_line)"
|
||||
)
|
||||
|
||||
log.info("Finished creating code_relationships table and indexes.")
|
||||
@@ -9,7 +9,7 @@ from dataclasses import asdict
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Optional
|
||||
|
||||
from codexlens.entities import IndexedFile, SearchResult, Symbol
|
||||
from codexlens.entities import CodeRelationship, IndexedFile, SearchResult, Symbol
|
||||
from codexlens.errors import StorageError
|
||||
|
||||
|
||||
@@ -309,13 +309,184 @@ class SQLiteStore:
|
||||
"SELECT language, COUNT(*) AS c FROM files GROUP BY language ORDER BY c DESC"
|
||||
).fetchall()
|
||||
languages = {row["language"]: row["c"] for row in lang_rows}
|
||||
# Include relationship count if table exists
|
||||
relationship_count = 0
|
||||
try:
|
||||
rel_row = conn.execute("SELECT COUNT(*) AS c FROM code_relationships").fetchone()
|
||||
relationship_count = int(rel_row["c"]) if rel_row else 0
|
||||
except sqlite3.DatabaseError:
|
||||
pass
|
||||
|
||||
return {
|
||||
"files": int(file_count),
|
||||
"symbols": int(symbol_count),
|
||||
"relationships": relationship_count,
|
||||
"languages": languages,
|
||||
"db_path": str(self.db_path),
|
||||
}
|
||||
|
||||
|
||||
def add_relationships(self, file_path: str | Path, relationships: List[CodeRelationship]) -> None:
|
||||
"""Store code relationships for a file.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file containing the relationships
|
||||
relationships: List of CodeRelationship objects to store
|
||||
"""
|
||||
if not relationships:
|
||||
return
|
||||
|
||||
with self._lock:
|
||||
conn = self._get_connection()
|
||||
resolved_path = str(Path(file_path).resolve())
|
||||
|
||||
# Get file_id
|
||||
row = conn.execute("SELECT id FROM files WHERE path=?", (resolved_path,)).fetchone()
|
||||
if not row:
|
||||
raise StorageError(f"File not found in index: {file_path}")
|
||||
file_id = int(row["id"])
|
||||
|
||||
# Delete existing relationships for symbols in this file
|
||||
conn.execute(
|
||||
"""
|
||||
DELETE FROM code_relationships
|
||||
WHERE source_symbol_id IN (
|
||||
SELECT id FROM symbols WHERE file_id=?
|
||||
)
|
||||
""",
|
||||
(file_id,)
|
||||
)
|
||||
|
||||
# Insert new relationships
|
||||
relationship_rows = []
|
||||
for rel in relationships:
|
||||
# Find source symbol ID
|
||||
symbol_row = conn.execute(
|
||||
"""
|
||||
SELECT id FROM symbols
|
||||
WHERE file_id=? AND name=? AND start_line <= ? AND end_line >= ?
|
||||
ORDER BY (end_line - start_line) ASC
|
||||
LIMIT 1
|
||||
""",
|
||||
(file_id, rel.source_symbol, rel.source_line, rel.source_line)
|
||||
).fetchone()
|
||||
|
||||
if symbol_row:
|
||||
source_symbol_id = int(symbol_row["id"])
|
||||
relationship_rows.append((
|
||||
source_symbol_id,
|
||||
rel.target_symbol,
|
||||
rel.relationship_type,
|
||||
rel.source_line,
|
||||
rel.target_file
|
||||
))
|
||||
|
||||
if relationship_rows:
|
||||
conn.executemany(
|
||||
"""
|
||||
INSERT INTO code_relationships(
|
||||
source_symbol_id, target_qualified_name, relationship_type,
|
||||
source_line, target_file
|
||||
)
|
||||
VALUES(?, ?, ?, ?, ?)
|
||||
""",
|
||||
relationship_rows
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
def query_relationships_by_target(
|
||||
self, target_name: str, *, limit: int = 100
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Query relationships by target symbol name (find all callers).
|
||||
|
||||
Args:
|
||||
target_name: Name of the target symbol
|
||||
limit: Maximum number of results
|
||||
|
||||
Returns:
|
||||
List of dicts containing relationship info with file paths and line numbers
|
||||
"""
|
||||
with self._lock:
|
||||
conn = self._get_connection()
|
||||
rows = conn.execute(
|
||||
"""
|
||||
SELECT
|
||||
s.name AS source_symbol,
|
||||
r.target_qualified_name,
|
||||
r.relationship_type,
|
||||
r.source_line,
|
||||
f.path AS source_file,
|
||||
r.target_file
|
||||
FROM code_relationships r
|
||||
JOIN symbols s ON r.source_symbol_id = s.id
|
||||
JOIN files f ON s.file_id = f.id
|
||||
WHERE r.target_qualified_name = ?
|
||||
ORDER BY f.path, r.source_line
|
||||
LIMIT ?
|
||||
""",
|
||||
(target_name, limit)
|
||||
).fetchall()
|
||||
|
||||
return [
|
||||
{
|
||||
"source_symbol": row["source_symbol"],
|
||||
"target_symbol": row["target_qualified_name"],
|
||||
"relationship_type": row["relationship_type"],
|
||||
"source_line": row["source_line"],
|
||||
"source_file": row["source_file"],
|
||||
"target_file": row["target_file"],
|
||||
}
|
||||
for row in rows
|
||||
]
|
||||
|
||||
def query_relationships_by_source(
|
||||
self, source_symbol: str, source_file: str | Path, *, limit: int = 100
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Query relationships by source symbol (find what a symbol calls).
|
||||
|
||||
Args:
|
||||
source_symbol: Name of the source symbol
|
||||
source_file: File path containing the source symbol
|
||||
limit: Maximum number of results
|
||||
|
||||
Returns:
|
||||
List of dicts containing relationship info
|
||||
"""
|
||||
with self._lock:
|
||||
conn = self._get_connection()
|
||||
resolved_path = str(Path(source_file).resolve())
|
||||
|
||||
rows = conn.execute(
|
||||
"""
|
||||
SELECT
|
||||
s.name AS source_symbol,
|
||||
r.target_qualified_name,
|
||||
r.relationship_type,
|
||||
r.source_line,
|
||||
f.path AS source_file,
|
||||
r.target_file
|
||||
FROM code_relationships r
|
||||
JOIN symbols s ON r.source_symbol_id = s.id
|
||||
JOIN files f ON s.file_id = f.id
|
||||
WHERE s.name = ? AND f.path = ?
|
||||
ORDER BY r.source_line
|
||||
LIMIT ?
|
||||
""",
|
||||
(source_symbol, resolved_path, limit)
|
||||
).fetchall()
|
||||
|
||||
return [
|
||||
{
|
||||
"source_symbol": row["source_symbol"],
|
||||
"target_symbol": row["target_qualified_name"],
|
||||
"relationship_type": row["relationship_type"],
|
||||
"source_line": row["source_line"],
|
||||
"source_file": row["source_file"],
|
||||
"target_file": row["target_file"],
|
||||
}
|
||||
for row in rows
|
||||
]
|
||||
|
||||
def _connect(self) -> sqlite3.Connection:
|
||||
"""Legacy method for backward compatibility."""
|
||||
return self._get_connection()
|
||||
@@ -348,6 +519,20 @@ class SQLiteStore:
|
||||
)
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_symbols_kind ON symbols(kind)")
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS code_relationships (
|
||||
id INTEGER PRIMARY KEY,
|
||||
source_symbol_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE,
|
||||
target_qualified_name TEXT NOT NULL,
|
||||
relationship_type TEXT NOT NULL,
|
||||
source_line INTEGER NOT NULL,
|
||||
target_file TEXT
|
||||
)
|
||||
"""
|
||||
)
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_rel_target ON code_relationships(target_qualified_name)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_rel_source ON code_relationships(source_symbol_id)")
|
||||
conn.commit()
|
||||
except sqlite3.DatabaseError as exc:
|
||||
raise StorageError(f"Failed to initialize database schema: {exc}") from exc
|
||||
|
||||
Reference in New Issue
Block a user