mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-10 02:24:35 +08:00
feat(storage): implement storage manager for centralized management and cleanup
- Added a new Storage Manager component to handle storage statistics, project cleanup, and configuration for CCW centralized storage. - Introduced functions to calculate directory sizes, get project storage stats, and clean specific or all storage. - Enhanced SQLiteStore with a public API for executing queries securely. - Updated tests to utilize the new execute_query method and validate storage management functionalities. - Improved performance by implementing connection pooling with idle timeout management in SQLiteStore. - Added new fields (token_count, symbol_type) to the symbols table and adjusted related insertions. - Enhanced error handling and logging for storage operations.
This commit is contained in:
@@ -16,7 +16,38 @@ class ParseError(CodexLensError):
|
||||
|
||||
|
||||
class StorageError(CodexLensError):
|
||||
"""Raised when reading/writing index storage fails."""
|
||||
"""Raised when reading/writing index storage fails.
|
||||
|
||||
Attributes:
|
||||
message: Human-readable error description
|
||||
db_path: Path to the database file (if applicable)
|
||||
operation: The operation that failed (e.g., 'query', 'initialize', 'migrate')
|
||||
details: Additional context for debugging
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
db_path: str | None = None,
|
||||
operation: str | None = None,
|
||||
details: dict | None = None
|
||||
) -> None:
|
||||
super().__init__(message)
|
||||
self.message = message
|
||||
self.db_path = db_path
|
||||
self.operation = operation
|
||||
self.details = details or {}
|
||||
|
||||
def __str__(self) -> str:
|
||||
parts = [self.message]
|
||||
if self.db_path:
|
||||
parts.append(f"[db: {self.db_path}]")
|
||||
if self.operation:
|
||||
parts.append(f"[op: {self.operation}]")
|
||||
if self.details:
|
||||
detail_str = ", ".join(f"{k}={v}" for k, v in self.details.items())
|
||||
parts.append(f"[{detail_str}]")
|
||||
return " ".join(parts)
|
||||
|
||||
|
||||
class SearchError(CodexLensError):
|
||||
|
||||
@@ -778,29 +778,39 @@ class ChainSearchEngine:
|
||||
List of callee relationship dicts (empty on error)
|
||||
"""
|
||||
try:
|
||||
# Use the connection pool via SQLiteStore
|
||||
with SQLiteStore(index_path) as store:
|
||||
# Search across all files containing the symbol
|
||||
# Get all files that have this symbol
|
||||
conn = store._get_connection()
|
||||
file_rows = conn.execute(
|
||||
# Single JOIN query to get all callees (fixes N+1 query problem)
|
||||
# Uses public execute_query API instead of _get_connection bypass
|
||||
rows = store.execute_query(
|
||||
"""
|
||||
SELECT DISTINCT f.path
|
||||
FROM symbols s
|
||||
SELECT
|
||||
s.name AS source_symbol,
|
||||
r.target_qualified_name AS target_symbol,
|
||||
r.relationship_type,
|
||||
r.source_line,
|
||||
f.path AS source_file,
|
||||
r.target_file
|
||||
FROM code_relationships r
|
||||
JOIN symbols s ON r.source_symbol_id = s.id
|
||||
JOIN files f ON s.file_id = f.id
|
||||
WHERE s.name = ?
|
||||
WHERE s.name = ? AND r.relationship_type = 'call'
|
||||
ORDER BY f.path, r.source_line
|
||||
LIMIT 100
|
||||
""",
|
||||
(source_symbol,)
|
||||
).fetchall()
|
||||
)
|
||||
|
||||
# Collect results from all matching files
|
||||
all_results = []
|
||||
for file_row in file_rows:
|
||||
file_path = file_row["path"]
|
||||
results = store.query_relationships_by_source(source_symbol, file_path)
|
||||
all_results.extend(results)
|
||||
|
||||
return all_results
|
||||
return [
|
||||
{
|
||||
"source_symbol": row["source_symbol"],
|
||||
"target_symbol": row["target_symbol"],
|
||||
"relationship_type": row["relationship_type"],
|
||||
"source_line": row["source_line"],
|
||||
"source_file": row["source_file"],
|
||||
"target_file": row["target_file"],
|
||||
}
|
||||
for row in rows
|
||||
]
|
||||
except Exception as exc:
|
||||
self.logger.debug(f"Callee search error in {index_path}: {exc}")
|
||||
return []
|
||||
@@ -864,10 +874,11 @@ class ChainSearchEngine:
|
||||
"""
|
||||
try:
|
||||
with SQLiteStore(index_path) as store:
|
||||
conn = store._get_connection()
|
||||
|
||||
# Search both as base class (target) and derived class (source)
|
||||
rows = conn.execute(
|
||||
# Use UNION to find relationships where class is either:
|
||||
# 1. The base class (target) - find derived classes
|
||||
# 2. The derived class (source) - find parent classes
|
||||
# Uses public execute_query API instead of _get_connection bypass
|
||||
rows = store.execute_query(
|
||||
"""
|
||||
SELECT
|
||||
s.name AS source_symbol,
|
||||
@@ -879,13 +890,23 @@ class ChainSearchEngine:
|
||||
FROM code_relationships r
|
||||
JOIN symbols s ON r.source_symbol_id = s.id
|
||||
JOIN files f ON s.file_id = f.id
|
||||
WHERE (s.name = ? OR r.target_qualified_name LIKE ?)
|
||||
AND r.relationship_type = 'inherits'
|
||||
ORDER BY f.path, r.source_line
|
||||
WHERE r.target_qualified_name = ? AND r.relationship_type = 'inherits'
|
||||
UNION
|
||||
SELECT
|
||||
s.name AS source_symbol,
|
||||
r.target_qualified_name,
|
||||
r.relationship_type,
|
||||
r.source_line,
|
||||
f.path AS source_file,
|
||||
r.target_file
|
||||
FROM code_relationships r
|
||||
JOIN symbols s ON r.source_symbol_id = s.id
|
||||
JOIN files f ON s.file_id = f.id
|
||||
WHERE s.name = ? AND r.relationship_type = 'inherits'
|
||||
LIMIT 100
|
||||
""",
|
||||
(class_name, f"%{class_name}%")
|
||||
).fetchall()
|
||||
(class_name, class_name)
|
||||
)
|
||||
|
||||
return [
|
||||
{
|
||||
|
||||
@@ -111,6 +111,8 @@ class Chunker:
|
||||
avg_line_len = len(content) / max(len(lines), 1)
|
||||
lines_per_chunk = max(10, int(self.config.max_chunk_size / max(avg_line_len, 1)))
|
||||
overlap_lines = max(2, int(self.config.overlap / max(avg_line_len, 1)))
|
||||
# Ensure overlap is less than chunk size to prevent infinite loop
|
||||
overlap_lines = min(overlap_lines, lines_per_chunk - 1)
|
||||
|
||||
start = 0
|
||||
chunk_idx = 0
|
||||
|
||||
@@ -55,6 +55,10 @@ class DirIndexStore:
|
||||
Thread-safe operations with WAL mode enabled.
|
||||
"""
|
||||
|
||||
# Schema version for migration tracking
|
||||
# Increment this when schema changes require migration
|
||||
SCHEMA_VERSION = 2
|
||||
|
||||
def __init__(self, db_path: str | Path) -> None:
|
||||
"""Initialize directory index store.
|
||||
|
||||
@@ -70,10 +74,58 @@ class DirIndexStore:
|
||||
with self._lock:
|
||||
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
conn = self._get_connection()
|
||||
|
||||
# Check current schema version
|
||||
current_version = self._get_schema_version(conn)
|
||||
|
||||
# Fail gracefully if database is from a newer version
|
||||
if current_version > self.SCHEMA_VERSION:
|
||||
raise StorageError(
|
||||
f"Database schema version {current_version} is newer than "
|
||||
f"supported version {self.SCHEMA_VERSION}. "
|
||||
f"Please update the application or use a compatible database.",
|
||||
db_path=str(self.db_path),
|
||||
operation="initialize",
|
||||
details={
|
||||
"current_version": current_version,
|
||||
"supported_version": self.SCHEMA_VERSION
|
||||
}
|
||||
)
|
||||
|
||||
# Create or migrate schema
|
||||
self._create_schema(conn)
|
||||
self._create_fts_triggers(conn)
|
||||
|
||||
# Apply versioned migrations if needed
|
||||
if current_version < self.SCHEMA_VERSION:
|
||||
self._apply_migrations(conn, current_version)
|
||||
self._set_schema_version(conn, self.SCHEMA_VERSION)
|
||||
|
||||
conn.commit()
|
||||
|
||||
def _get_schema_version(self, conn: sqlite3.Connection) -> int:
|
||||
"""Get current schema version from database."""
|
||||
try:
|
||||
row = conn.execute("PRAGMA user_version").fetchone()
|
||||
return row[0] if row else 0
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
def _set_schema_version(self, conn: sqlite3.Connection, version: int) -> None:
|
||||
"""Set schema version in database."""
|
||||
conn.execute(f"PRAGMA user_version = {version}")
|
||||
|
||||
def _apply_migrations(self, conn: sqlite3.Connection, from_version: int) -> None:
|
||||
"""Apply schema migrations from current version to latest.
|
||||
|
||||
Args:
|
||||
conn: Database connection
|
||||
from_version: Current schema version
|
||||
"""
|
||||
# Migration v0/v1 -> v2: Add 'name' column to files table
|
||||
if from_version < 2:
|
||||
self._migrate_v2_add_name_column(conn)
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close database connection."""
|
||||
with self._lock:
|
||||
@@ -1106,6 +1158,37 @@ class DirIndexStore:
|
||||
except sqlite3.DatabaseError as exc:
|
||||
raise StorageError(f"Failed to create schema: {exc}") from exc
|
||||
|
||||
def _migrate_v2_add_name_column(self, conn: sqlite3.Connection) -> None:
|
||||
"""Migration v2: Add 'name' column to files table.
|
||||
|
||||
Required for FTS5 external content table.
|
||||
|
||||
Args:
|
||||
conn: Database connection
|
||||
"""
|
||||
# Check if files table exists and has columns
|
||||
cursor = conn.execute("PRAGMA table_info(files)")
|
||||
files_columns = {row[1] for row in cursor.fetchall()}
|
||||
|
||||
if not files_columns:
|
||||
return # No files table yet, will be created fresh
|
||||
|
||||
# Skip if 'name' column already exists
|
||||
if "name" in files_columns:
|
||||
return
|
||||
|
||||
# Add 'name' column with default value
|
||||
conn.execute("ALTER TABLE files ADD COLUMN name TEXT NOT NULL DEFAULT ''")
|
||||
|
||||
# Populate 'name' column from full_path using pathlib for robustness
|
||||
rows = conn.execute("SELECT id, full_path FROM files WHERE name = ''").fetchall()
|
||||
for row in rows:
|
||||
file_id = row[0]
|
||||
full_path = row[1]
|
||||
# Use pathlib.Path.name for cross-platform compatibility
|
||||
name = Path(full_path).name if full_path else ""
|
||||
conn.execute("UPDATE files SET name = ? WHERE id = ?", (name, file_id))
|
||||
|
||||
def _create_fts_triggers(self, conn: sqlite3.Connection) -> None:
|
||||
"""Create FTS5 external content triggers.
|
||||
|
||||
|
||||
@@ -57,6 +57,13 @@ def upgrade(db_conn: Connection):
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_file_keywords_keyword_id ON file_keywords (keyword_id)")
|
||||
|
||||
log.info("Migrating existing keywords from 'semantic_metadata' table...")
|
||||
|
||||
# Check if semantic_metadata table exists before querying
|
||||
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='semantic_metadata'")
|
||||
if not cursor.fetchone():
|
||||
log.info("No 'semantic_metadata' table found, skipping data migration.")
|
||||
return
|
||||
|
||||
cursor.execute("SELECT file_id, keywords FROM semantic_metadata WHERE keywords IS NOT NULL AND keywords != ''")
|
||||
|
||||
files_to_migrate = cursor.fetchall()
|
||||
|
||||
@@ -5,9 +5,10 @@ from __future__ import annotations
|
||||
import json
|
||||
import sqlite3
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import asdict
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Optional
|
||||
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
||||
|
||||
from codexlens.entities import CodeRelationship, IndexedFile, SearchResult, Symbol
|
||||
from codexlens.errors import StorageError
|
||||
@@ -15,29 +16,49 @@ from codexlens.errors import StorageError
|
||||
|
||||
class SQLiteStore:
|
||||
"""SQLiteStore providing FTS5 search and symbol lookup.
|
||||
|
||||
|
||||
Implements thread-local connection pooling for improved performance.
|
||||
"""
|
||||
|
||||
# Maximum number of connections to keep in pool to prevent memory leaks
|
||||
MAX_POOL_SIZE = 32
|
||||
# Idle timeout in seconds (10 minutes)
|
||||
IDLE_TIMEOUT = 600
|
||||
|
||||
def __init__(self, db_path: str | Path) -> None:
|
||||
self.db_path = Path(db_path)
|
||||
self._lock = threading.RLock()
|
||||
self._local = threading.local()
|
||||
self._pool_lock = threading.Lock()
|
||||
self._pool: Dict[int, sqlite3.Connection] = {}
|
||||
# Pool stores (connection, last_access_time) tuples
|
||||
self._pool: Dict[int, Tuple[sqlite3.Connection, float]] = {}
|
||||
self._pool_generation = 0
|
||||
|
||||
def _get_connection(self) -> sqlite3.Connection:
|
||||
"""Get or create a thread-local database connection."""
|
||||
thread_id = threading.get_ident()
|
||||
current_time = time.time()
|
||||
|
||||
if getattr(self._local, "generation", None) == self._pool_generation:
|
||||
conn = getattr(self._local, "conn", None)
|
||||
if conn is not None:
|
||||
# Update last access time
|
||||
with self._pool_lock:
|
||||
if thread_id in self._pool:
|
||||
self._pool[thread_id] = (conn, current_time)
|
||||
return conn
|
||||
|
||||
with self._pool_lock:
|
||||
conn = self._pool.get(thread_id)
|
||||
if conn is None:
|
||||
pool_entry = self._pool.get(thread_id)
|
||||
if pool_entry is not None:
|
||||
conn, _ = pool_entry
|
||||
# Update last access time
|
||||
self._pool[thread_id] = (conn, current_time)
|
||||
else:
|
||||
# Clean up stale and idle connections if pool is too large
|
||||
if len(self._pool) >= self.MAX_POOL_SIZE:
|
||||
self._cleanup_stale_connections()
|
||||
|
||||
conn = sqlite3.connect(self.db_path, check_same_thread=False)
|
||||
conn.row_factory = sqlite3.Row
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
@@ -45,17 +66,40 @@ class SQLiteStore:
|
||||
conn.execute("PRAGMA foreign_keys=ON")
|
||||
# Memory-mapped I/O for faster reads (30GB limit)
|
||||
conn.execute("PRAGMA mmap_size=30000000000")
|
||||
self._pool[thread_id] = conn
|
||||
self._pool[thread_id] = (conn, current_time)
|
||||
|
||||
self._local.conn = conn
|
||||
self._local.generation = self._pool_generation
|
||||
return conn
|
||||
|
||||
def _cleanup_stale_connections(self) -> None:
|
||||
"""Remove connections for threads that no longer exist or have been idle too long."""
|
||||
current_time = time.time()
|
||||
# Get list of active thread IDs
|
||||
active_threads = {t.ident for t in threading.enumerate() if t.ident is not None}
|
||||
|
||||
# Find connections to remove: dead threads or idle timeout exceeded
|
||||
stale_ids = []
|
||||
for tid, (conn, last_access) in list(self._pool.items()):
|
||||
is_dead_thread = tid not in active_threads
|
||||
is_idle = (current_time - last_access) > self.IDLE_TIMEOUT
|
||||
if is_dead_thread or is_idle:
|
||||
stale_ids.append(tid)
|
||||
|
||||
# Close and remove stale connections
|
||||
for tid in stale_ids:
|
||||
try:
|
||||
conn, _ = self._pool[tid]
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
del self._pool[tid]
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close all pooled connections."""
|
||||
with self._lock:
|
||||
with self._pool_lock:
|
||||
for conn in self._pool.values():
|
||||
for conn, _ in self._pool.values():
|
||||
conn.close()
|
||||
self._pool.clear()
|
||||
self._pool_generation += 1
|
||||
@@ -72,6 +116,56 @@ class SQLiteStore:
|
||||
def __exit__(self, exc_type: object, exc: object, tb: object) -> None:
|
||||
self.close()
|
||||
|
||||
def execute_query(
|
||||
self,
|
||||
sql: str,
|
||||
params: tuple = (),
|
||||
allow_writes: bool = False
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Execute a raw SQL query and return results as dictionaries.
|
||||
|
||||
This is the public API for executing custom queries without bypassing
|
||||
encapsulation via _get_connection().
|
||||
|
||||
By default, only SELECT queries are allowed. Use allow_writes=True
|
||||
for trusted internal code that needs to execute other statements.
|
||||
|
||||
Args:
|
||||
sql: SQL query string with ? placeholders for parameters
|
||||
params: Tuple of parameter values to bind
|
||||
allow_writes: If True, allow non-SELECT statements (default False)
|
||||
|
||||
Returns:
|
||||
List of result rows as dictionaries
|
||||
|
||||
Raises:
|
||||
StorageError: If query execution fails or validation fails
|
||||
"""
|
||||
# Validate query type for security
|
||||
sql_stripped = sql.strip().upper()
|
||||
if not allow_writes:
|
||||
# Only allow SELECT and WITH (for CTEs) statements
|
||||
if not (sql_stripped.startswith("SELECT") or sql_stripped.startswith("WITH")):
|
||||
raise StorageError(
|
||||
"Only SELECT queries are allowed. "
|
||||
"Use allow_writes=True for trusted internal operations.",
|
||||
db_path=str(self.db_path),
|
||||
operation="execute_query",
|
||||
details={"query_type": sql_stripped.split()[0] if sql_stripped else "EMPTY"}
|
||||
)
|
||||
|
||||
try:
|
||||
conn = self._get_connection()
|
||||
rows = conn.execute(sql, params).fetchall()
|
||||
return [dict(row) for row in rows]
|
||||
except sqlite3.Error as e:
|
||||
raise StorageError(
|
||||
f"Query execution failed: {e}",
|
||||
db_path=str(self.db_path),
|
||||
operation="execute_query",
|
||||
details={"error_type": type(e).__name__}
|
||||
) from e
|
||||
|
||||
def initialize(self) -> None:
|
||||
with self._lock:
|
||||
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
@@ -110,11 +204,13 @@ class SQLiteStore:
|
||||
if indexed_file.symbols:
|
||||
conn.executemany(
|
||||
"""
|
||||
INSERT INTO symbols(file_id, name, kind, start_line, end_line)
|
||||
VALUES(?, ?, ?, ?, ?)
|
||||
INSERT INTO symbols(file_id, name, kind, start_line, end_line, token_count, symbol_type)
|
||||
VALUES(?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
[
|
||||
(file_id, s.name, s.kind, s.range[0], s.range[1])
|
||||
(file_id, s.name, s.kind, s.range[0], s.range[1],
|
||||
getattr(s, 'token_count', None),
|
||||
getattr(s, 'symbol_type', None) or s.kind)
|
||||
for s in indexed_file.symbols
|
||||
],
|
||||
)
|
||||
@@ -159,11 +255,13 @@ class SQLiteStore:
|
||||
if indexed_file.symbols:
|
||||
conn.executemany(
|
||||
"""
|
||||
INSERT INTO symbols(file_id, name, kind, start_line, end_line)
|
||||
VALUES(?, ?, ?, ?, ?)
|
||||
INSERT INTO symbols(file_id, name, kind, start_line, end_line, token_count, symbol_type)
|
||||
VALUES(?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
[
|
||||
(file_id, s.name, s.kind, s.range[0], s.range[1])
|
||||
(file_id, s.name, s.kind, s.range[0], s.range[1],
|
||||
getattr(s, 'token_count', None),
|
||||
getattr(s, 'symbol_type', None) or s.kind)
|
||||
for s in indexed_file.symbols
|
||||
],
|
||||
)
|
||||
@@ -513,12 +611,15 @@ class SQLiteStore:
|
||||
name TEXT NOT NULL,
|
||||
kind TEXT NOT NULL,
|
||||
start_line INTEGER NOT NULL,
|
||||
end_line INTEGER NOT NULL
|
||||
end_line INTEGER NOT NULL,
|
||||
token_count INTEGER,
|
||||
symbol_type TEXT
|
||||
)
|
||||
"""
|
||||
)
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_symbols_kind ON symbols(kind)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_symbols_type ON symbols(symbol_type)")
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS code_relationships (
|
||||
|
||||
Reference in New Issue
Block a user