Refactor code structure and remove redundant changes

2026-02-13 02:41:50 +08:00 · 2026-01-24 14:47:47 +08:00
parent cf5fecd66d
commit f2b0a5bbc9
113 changed files with 43217 additions and 235 deletions
--- a/codex-lens/build/lib/codexlens/storage/migrations/init.py
+++ b/codex-lens/build/lib/codexlens/storage/migrations/init.py
@@ -0,0 +1 @@
+# This file makes the 'migrations' directory a Python package.
--- a/codex-lens/build/lib/codexlens/storage/migrations/migration_001_normalize_keywords.py
+++ b/codex-lens/build/lib/codexlens/storage/migrations/migration_001_normalize_keywords.py
@@ -0,0 +1,123 @@
+"""
+Migration 001: Normalize keywords into separate tables.
+
+This migration introduces two new tables, `keywords` and `file_keywords`, to
+store semantic keywords in a normalized fashion. It then migrates the existing
+keywords from the `semantic_data` JSON blob in the `files` table into these
+new tables. This is intended to speed up keyword-based searches significantly.
+"""
+
+import json
+import logging
+from sqlite3 import Connection
+
+log = logging.getLogger(__name__)
+
+
+def upgrade(db_conn: Connection):
+    """
+    Applies the migration to normalize keywords.
+
+    - Creates `keywords` and `file_keywords` tables.
+    - Creates indexes for efficient querying.
+    - Migrates data from `files.semantic_data` to the new tables.
+
+    Args:
+        db_conn: The SQLite database connection.
+    """
+    cursor = db_conn.cursor()
+
+    log.info("Creating 'keywords' and 'file_keywords' tables...")
+    # Create a table to store unique keywords
+    cursor.execute(
+        """
+        CREATE TABLE IF NOT EXISTS keywords (
+            id INTEGER PRIMARY KEY,
+            keyword TEXT NOT NULL UNIQUE
+        )
+        """
+    )
+
+    # Create a join table to link files and keywords (many-to-many)
+    cursor.execute(
+        """
+        CREATE TABLE IF NOT EXISTS file_keywords (
+            file_id INTEGER NOT NULL,
+            keyword_id INTEGER NOT NULL,
+            PRIMARY KEY (file_id, keyword_id),
+            FOREIGN KEY (file_id) REFERENCES files (id) ON DELETE CASCADE,
+            FOREIGN KEY (keyword_id) REFERENCES keywords (id) ON DELETE CASCADE
+        )
+        """
+    )
+    
+    log.info("Creating indexes for new keyword tables...")
+    cursor.execute("CREATE INDEX IF NOT EXISTS idx_keywords_keyword ON keywords (keyword)")
+    cursor.execute("CREATE INDEX IF NOT EXISTS idx_file_keywords_file_id ON file_keywords (file_id)")
+    cursor.execute("CREATE INDEX IF NOT EXISTS idx_file_keywords_keyword_id ON file_keywords (keyword_id)")
+
+    log.info("Migrating existing keywords from 'semantic_metadata' table...")
+
+    # Check if semantic_metadata table exists before querying
+    cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='semantic_metadata'")
+    if not cursor.fetchone():
+        log.info("No 'semantic_metadata' table found, skipping data migration.")
+        return
+
+    # Check if 'keywords' column exists in semantic_metadata table
+    # (current schema may already use normalized tables without this column)
+    cursor.execute("PRAGMA table_info(semantic_metadata)")
+    columns = {row[1] for row in cursor.fetchall()}
+    if "keywords" not in columns:
+        log.info("No 'keywords' column in semantic_metadata table, skipping data migration.")
+        return
+
+    cursor.execute("SELECT file_id, keywords FROM semantic_metadata WHERE keywords IS NOT NULL AND keywords != ''")
+
+    files_to_migrate = cursor.fetchall()
+    if not files_to_migrate:
+        log.info("No existing files with semantic metadata to migrate.")
+        return
+
+    log.info(f"Found {len(files_to_migrate)} files with semantic metadata to migrate.")
+
+    for file_id, keywords_json in files_to_migrate:
+        if not keywords_json:
+            continue
+        try:
+            keywords = json.loads(keywords_json)
+
+            if not isinstance(keywords, list):
+                log.warning(f"Keywords for file_id {file_id} is not a list, skipping.")
+                continue
+
+            for keyword in keywords:
+                if not isinstance(keyword, str):
+                    log.warning(f"Non-string keyword '{keyword}' found for file_id {file_id}, skipping.")
+                    continue
+
+                keyword = keyword.strip()
+                if not keyword:
+                    continue
+
+                # Get or create keyword_id
+                cursor.execute("INSERT OR IGNORE INTO keywords (keyword) VALUES (?)", (keyword,))
+                cursor.execute("SELECT id FROM keywords WHERE keyword = ?", (keyword,))
+                keyword_id_result = cursor.fetchone()
+
+                if keyword_id_result:
+                    keyword_id = keyword_id_result[0]
+                    # Link file to keyword
+                    cursor.execute(
+                        "INSERT OR IGNORE INTO file_keywords (file_id, keyword_id) VALUES (?, ?)",
+                        (file_id, keyword_id),
+                    )
+                else:
+                    log.error(f"Failed to retrieve or create keyword_id for keyword: {keyword}")
+
+        except json.JSONDecodeError as e:
+            log.warning(f"Could not parse keywords for file_id {file_id}: {e}")
+        except Exception as e:
+            log.error(f"An unexpected error occurred during migration for file_id {file_id}: {e}", exc_info=True)
+
+    log.info("Finished migrating keywords.")
--- a/codex-lens/build/lib/codexlens/storage/migrations/migration_002_add_token_metadata.py
+++ b/codex-lens/build/lib/codexlens/storage/migrations/migration_002_add_token_metadata.py
@@ -0,0 +1,48 @@
+"""
+Migration 002: Add token_count and symbol_type to symbols table.
+
+This migration adds token counting metadata to symbols for accurate chunk
+splitting and performance optimization. It also adds symbol_type for better
+filtering in searches.
+"""
+
+import logging
+from sqlite3 import Connection
+
+log = logging.getLogger(__name__)
+
+
+def upgrade(db_conn: Connection):
+    """
+    Applies the migration to add token metadata to symbols.
+
+    - Adds token_count column to symbols table
+    - Adds symbol_type column to symbols table (for future use)
+    - Creates index on symbol_type for efficient filtering
+    - Backfills existing symbols with NULL token_count (to be calculated lazily)
+
+    Args:
+        db_conn: The SQLite database connection.
+    """
+    cursor = db_conn.cursor()
+
+    log.info("Adding token_count column to symbols table...")
+    try:
+        cursor.execute("ALTER TABLE symbols ADD COLUMN token_count INTEGER")
+        log.info("Successfully added token_count column.")
+    except Exception as e:
+        # Column might already exist
+        log.warning(f"Could not add token_count column (might already exist): {e}")
+
+    log.info("Adding symbol_type column to symbols table...")
+    try:
+        cursor.execute("ALTER TABLE symbols ADD COLUMN symbol_type TEXT")
+        log.info("Successfully added symbol_type column.")
+    except Exception as e:
+        # Column might already exist
+        log.warning(f"Could not add symbol_type column (might already exist): {e}")
+
+    log.info("Creating index on symbol_type for efficient filtering...")
+    cursor.execute("CREATE INDEX IF NOT EXISTS idx_symbols_type ON symbols(symbol_type)")
+
+    log.info("Migration 002 completed successfully.")
--- a/codex-lens/build/lib/codexlens/storage/migrations/migration_004_dual_fts.py
+++ b/codex-lens/build/lib/codexlens/storage/migrations/migration_004_dual_fts.py
@@ -0,0 +1,232 @@
+"""
+Migration 004: Add dual FTS tables for exact and fuzzy matching.
+
+This migration introduces two FTS5 tables:
+- files_fts_exact: Uses unicode61 tokenizer for exact token matching
+- files_fts_fuzzy: Uses trigram tokenizer (or extended unicode61) for substring/fuzzy matching
+
+Both tables are synchronized with the files table via triggers for automatic updates.
+"""
+
+import logging
+from sqlite3 import Connection
+
+from codexlens.storage.sqlite_utils import check_trigram_support, get_sqlite_version
+
+log = logging.getLogger(__name__)
+
+
+def upgrade(db_conn: Connection):
+    """
+    Applies the migration to add dual FTS tables.
+
+    - Drops old files_fts table and triggers
+    - Creates files_fts_exact with unicode61 tokenizer
+    - Creates files_fts_fuzzy with trigram or extended unicode61 tokenizer
+    - Creates synchronized triggers for both tables
+    - Rebuilds FTS indexes from files table
+
+    Args:
+        db_conn: The SQLite database connection.
+    """
+    cursor = db_conn.cursor()
+
+    try:
+        # Check trigram support
+        has_trigram = check_trigram_support(db_conn)
+        version = get_sqlite_version(db_conn)
+        log.info(f"SQLite version: {'.'.join(map(str, version))}")
+
+        if has_trigram:
+            log.info("Trigram tokenizer available, using for fuzzy FTS table")
+            fuzzy_tokenizer = "trigram"
+        else:
+            log.warning(
+                f"Trigram tokenizer not available (requires SQLite >= 3.34), "
+                f"using extended unicode61 tokenizer for fuzzy matching"
+            )
+            fuzzy_tokenizer = "unicode61 tokenchars '_-.'"
+
+        # Start transaction
+        cursor.execute("BEGIN TRANSACTION")
+
+        # Check if files table has 'name' column (v2 schema doesn't have it)
+        cursor.execute("PRAGMA table_info(files)")
+        columns = {row[1] for row in cursor.fetchall()}
+        
+        if 'name' not in columns:
+            log.info("Adding 'name' column to files table (v2 schema upgrade)...")
+            # Add name column
+            cursor.execute("ALTER TABLE files ADD COLUMN name TEXT")
+            # Populate name from path (extract filename from last '/')
+            # Use Python to do the extraction since SQLite doesn't have reverse()
+            cursor.execute("SELECT rowid, path FROM files")
+            rows = cursor.fetchall()
+            for rowid, path in rows:
+                # Extract filename from path
+                name = path.split('/')[-1] if '/' in path else path
+                cursor.execute("UPDATE files SET name = ? WHERE rowid = ?", (name, rowid))
+            
+        # Rename 'path' column to 'full_path' if needed
+        if 'path' in columns and 'full_path' not in columns:
+            log.info("Renaming 'path' to 'full_path' (v2 schema upgrade)...")
+            # Check if indexed_at column exists in v2 schema
+            has_indexed_at = 'indexed_at' in columns
+            has_mtime = 'mtime' in columns
+            
+            # SQLite doesn't support RENAME COLUMN before 3.25, so use table recreation
+            cursor.execute("""
+                CREATE TABLE files_new (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    name TEXT NOT NULL,
+                    full_path TEXT NOT NULL UNIQUE,
+                    content TEXT,
+                    language TEXT,
+                    mtime REAL,
+                    indexed_at TEXT
+                )
+            """)
+            
+            # Build INSERT statement based on available columns
+            # Note: v2 schema has no rowid (path is PRIMARY KEY), so use NULL for AUTOINCREMENT
+            if has_indexed_at and has_mtime:
+                cursor.execute("""
+                    INSERT INTO files_new (name, full_path, content, language, mtime, indexed_at)
+                    SELECT name, path, content, language, mtime, indexed_at FROM files
+                """)
+            elif has_indexed_at:
+                cursor.execute("""
+                    INSERT INTO files_new (name, full_path, content, language, indexed_at)
+                    SELECT name, path, content, language, indexed_at FROM files
+                """)
+            elif has_mtime:
+                cursor.execute("""
+                    INSERT INTO files_new (name, full_path, content, language, mtime)
+                    SELECT name, path, content, language, mtime FROM files
+                """)
+            else:
+                cursor.execute("""
+                    INSERT INTO files_new (name, full_path, content, language)
+                    SELECT name, path, content, language FROM files
+                """)
+            
+            cursor.execute("DROP TABLE files")
+            cursor.execute("ALTER TABLE files_new RENAME TO files")
+
+        log.info("Dropping old FTS triggers and table...")
+        # Drop old triggers
+        cursor.execute("DROP TRIGGER IF EXISTS files_ai")
+        cursor.execute("DROP TRIGGER IF EXISTS files_ad")
+        cursor.execute("DROP TRIGGER IF EXISTS files_au")
+
+        # Drop old FTS table
+        cursor.execute("DROP TABLE IF EXISTS files_fts")
+
+        # Create exact FTS table (unicode61 with underscores/hyphens/dots as token chars)
+        # Note: tokenchars includes '.' to properly tokenize qualified names like PortRole.FLOW
+        log.info("Creating files_fts_exact table with unicode61 tokenizer...")
+        cursor.execute(
+            """
+            CREATE VIRTUAL TABLE files_fts_exact USING fts5(
+                name, full_path UNINDEXED, content,
+                content='files',
+                content_rowid='id',
+                tokenize="unicode61 tokenchars '_-.'"
+            )
+            """
+        )
+
+        # Create fuzzy FTS table (trigram or extended unicode61)
+        log.info(f"Creating files_fts_fuzzy table with {fuzzy_tokenizer} tokenizer...")
+        cursor.execute(
+            f"""
+            CREATE VIRTUAL TABLE files_fts_fuzzy USING fts5(
+                name, full_path UNINDEXED, content,
+                content='files',
+                content_rowid='id',
+                tokenize="{fuzzy_tokenizer}"
+            )
+            """
+        )
+
+        # Create synchronized triggers for files_fts_exact
+        log.info("Creating triggers for files_fts_exact...")
+        cursor.execute(
+            """
+            CREATE TRIGGER files_exact_ai AFTER INSERT ON files BEGIN
+                INSERT INTO files_fts_exact(rowid, name, full_path, content)
+                VALUES(new.id, new.name, new.full_path, new.content);
+            END
+            """
+        )
+        cursor.execute(
+            """
+            CREATE TRIGGER files_exact_ad AFTER DELETE ON files BEGIN
+                INSERT INTO files_fts_exact(files_fts_exact, rowid, name, full_path, content)
+                VALUES('delete', old.id, old.name, old.full_path, old.content);
+            END
+            """
+        )
+        cursor.execute(
+            """
+            CREATE TRIGGER files_exact_au AFTER UPDATE ON files BEGIN
+                INSERT INTO files_fts_exact(files_fts_exact, rowid, name, full_path, content)
+                VALUES('delete', old.id, old.name, old.full_path, old.content);
+                INSERT INTO files_fts_exact(rowid, name, full_path, content)
+                VALUES(new.id, new.name, new.full_path, new.content);
+            END
+            """
+        )
+
+        # Create synchronized triggers for files_fts_fuzzy
+        log.info("Creating triggers for files_fts_fuzzy...")
+        cursor.execute(
+            """
+            CREATE TRIGGER files_fuzzy_ai AFTER INSERT ON files BEGIN
+                INSERT INTO files_fts_fuzzy(rowid, name, full_path, content)
+                VALUES(new.id, new.name, new.full_path, new.content);
+            END
+            """
+        )
+        cursor.execute(
+            """
+            CREATE TRIGGER files_fuzzy_ad AFTER DELETE ON files BEGIN
+                INSERT INTO files_fts_fuzzy(files_fts_fuzzy, rowid, name, full_path, content)
+                VALUES('delete', old.id, old.name, old.full_path, old.content);
+            END
+            """
+        )
+        cursor.execute(
+            """
+            CREATE TRIGGER files_fuzzy_au AFTER UPDATE ON files BEGIN
+                INSERT INTO files_fts_fuzzy(files_fts_fuzzy, rowid, name, full_path, content)
+                VALUES('delete', old.id, old.name, old.full_path, old.content);
+                INSERT INTO files_fts_fuzzy(rowid, name, full_path, content)
+                VALUES(new.id, new.name, new.full_path, new.content);
+            END
+            """
+        )
+
+        # Rebuild FTS indexes from files table
+        log.info("Rebuilding FTS indexes from files table...")
+        cursor.execute("INSERT INTO files_fts_exact(files_fts_exact) VALUES('rebuild')")
+        cursor.execute("INSERT INTO files_fts_fuzzy(files_fts_fuzzy) VALUES('rebuild')")
+
+        # Commit transaction
+        cursor.execute("COMMIT")
+        log.info("Migration 004 completed successfully")
+
+        # Vacuum to reclaim space (outside transaction)
+        try:
+            log.info("Running VACUUM to reclaim space...")
+            cursor.execute("VACUUM")
+        except Exception as e:
+            log.warning(f"VACUUM failed (non-critical): {e}")
+
+    except Exception as e:
+        log.error(f"Migration 004 failed: {e}")
+        try:
+            cursor.execute("ROLLBACK")
+        except Exception:
+            pass
+        raise
--- a/codex-lens/build/lib/codexlens/storage/migrations/migration_005_cleanup_unused_fields.py
+++ b/codex-lens/build/lib/codexlens/storage/migrations/migration_005_cleanup_unused_fields.py
@@ -0,0 +1,196 @@
+"""
+Migration 005: Remove unused and redundant database fields.
+
+This migration removes four problematic fields identified by Gemini analysis:
+
+1. **semantic_metadata.keywords** (deprecated - replaced by file_keywords table)
+   - Data: Migrated to normalized file_keywords table in migration 001
+   - Impact: Column now redundant, remove to prevent sync issues
+
+2. **symbols.token_count** (unused - always NULL)
+   - Data: Never populated, always NULL
+   - Impact: No data loss, just removes unused column
+
+3. **symbols.symbol_type** (redundant - duplicates kind)
+   - Data: Redundant with symbols.kind field
+   - Impact: No data loss, kind field contains same information
+
+4. **subdirs.direct_files** (unused - never displayed)
+   - Data: Never used in queries or display logic
+   - Impact: No data loss, just removes unused column
+
+Schema changes use table recreation pattern (SQLite best practice):
+- Create new table without deprecated columns
+- Copy data from old table
+- Drop old table
+- Rename new table
+- Recreate indexes
+"""
+
+import logging
+from sqlite3 import Connection
+
+log = logging.getLogger(__name__)
+
+
+def upgrade(db_conn: Connection):
+    """Remove unused and redundant fields from schema.
+
+    Note: Transaction management is handled by MigrationManager.
+    This migration should NOT start its own transaction.
+
+    Args:
+        db_conn: The SQLite database connection.
+    """
+    cursor = db_conn.cursor()
+
+    # Step 1: Remove semantic_metadata.keywords (if column exists)
+    log.info("Checking semantic_metadata.keywords column...")
+
+    cursor.execute(
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='semantic_metadata'"
+    )
+    if cursor.fetchone():
+        # Check if keywords column exists
+        cursor.execute("PRAGMA table_info(semantic_metadata)")
+        columns = {row[1] for row in cursor.fetchall()}
+
+        if "keywords" in columns:
+            log.info("Removing semantic_metadata.keywords column...")
+            cursor.execute("""
+                CREATE TABLE semantic_metadata_new (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    file_id INTEGER NOT NULL UNIQUE,
+                    summary TEXT,
+                    purpose TEXT,
+                    llm_tool TEXT,
+                    generated_at REAL,
+                    FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
+                )
+            """)
+
+            cursor.execute("""
+                INSERT INTO semantic_metadata_new (id, file_id, summary, purpose, llm_tool, generated_at)
+                SELECT id, file_id, summary, purpose, llm_tool, generated_at
+                FROM semantic_metadata
+            """)
+
+            cursor.execute("DROP TABLE semantic_metadata")
+            cursor.execute("ALTER TABLE semantic_metadata_new RENAME TO semantic_metadata")
+
+            # Recreate index
+            cursor.execute(
+                "CREATE INDEX IF NOT EXISTS idx_semantic_file ON semantic_metadata(file_id)"
+            )
+            log.info("Removed semantic_metadata.keywords column")
+        else:
+            log.info("semantic_metadata.keywords column does not exist, skipping")
+    else:
+        log.info("semantic_metadata table does not exist, skipping")
+
+    # Step 2: Remove symbols.token_count and symbols.symbol_type (if columns exist)
+    log.info("Checking symbols.token_count and symbols.symbol_type columns...")
+
+    cursor.execute(
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='symbols'"
+    )
+    if cursor.fetchone():
+        # Check if token_count or symbol_type columns exist
+        cursor.execute("PRAGMA table_info(symbols)")
+        columns = {row[1] for row in cursor.fetchall()}
+
+        if "token_count" in columns or "symbol_type" in columns:
+            log.info("Removing symbols.token_count and symbols.symbol_type columns...")
+            cursor.execute("""
+                CREATE TABLE symbols_new (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    file_id INTEGER NOT NULL,
+                    name TEXT NOT NULL,
+                    kind TEXT,
+                    start_line INTEGER,
+                    end_line INTEGER,
+                    FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
+                )
+            """)
+
+            cursor.execute("""
+                INSERT INTO symbols_new (id, file_id, name, kind, start_line, end_line)
+                SELECT id, file_id, name, kind, start_line, end_line
+                FROM symbols
+            """)
+
+            cursor.execute("DROP TABLE symbols")
+            cursor.execute("ALTER TABLE symbols_new RENAME TO symbols")
+
+            # Recreate indexes
+            cursor.execute("CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file_id)")
+            cursor.execute("CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name)")
+            log.info("Removed symbols.token_count and symbols.symbol_type columns")
+        else:
+            log.info("symbols.token_count/symbol_type columns do not exist, skipping")
+    else:
+        log.info("symbols table does not exist, skipping")
+
+    # Step 3: Remove subdirs.direct_files (if column exists)
+    log.info("Checking subdirs.direct_files column...")
+
+    cursor.execute(
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='subdirs'"
+    )
+    if cursor.fetchone():
+        # Check if direct_files column exists
+        cursor.execute("PRAGMA table_info(subdirs)")
+        columns = {row[1] for row in cursor.fetchall()}
+
+        if "direct_files" in columns:
+            log.info("Removing subdirs.direct_files column...")
+            cursor.execute("""
+                CREATE TABLE subdirs_new (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    name TEXT NOT NULL UNIQUE,
+                    index_path TEXT NOT NULL,
+                    files_count INTEGER DEFAULT 0,
+                    last_updated REAL
+                )
+            """)
+
+            cursor.execute("""
+                INSERT INTO subdirs_new (id, name, index_path, files_count, last_updated)
+                SELECT id, name, index_path, files_count, last_updated
+                FROM subdirs
+            """)
+
+            cursor.execute("DROP TABLE subdirs")
+            cursor.execute("ALTER TABLE subdirs_new RENAME TO subdirs")
+
+            # Recreate index
+            cursor.execute("CREATE INDEX IF NOT EXISTS idx_subdirs_name ON subdirs(name)")
+            log.info("Removed subdirs.direct_files column")
+        else:
+            log.info("subdirs.direct_files column does not exist, skipping")
+    else:
+        log.info("subdirs table does not exist, skipping")
+
+    log.info("Migration 005 completed successfully")
+
+    # Vacuum to reclaim space (outside transaction, optional)
+    # Note: VACUUM cannot run inside a transaction, so we skip it here
+    # The caller can run VACUUM separately if desired
+
+
+def downgrade(db_conn: Connection):
+    """Restore removed fields (data will be lost for keywords, token_count, symbol_type, direct_files).
+
+    This is a placeholder - true downgrade is not feasible as data is lost.
+    The migration is designed to be one-way since removed fields are unused/redundant.
+
+    Args:
+        db_conn: The SQLite database connection.
+    """
+    log.warning(
+        "Migration 005 downgrade not supported - removed fields are unused/redundant. "
+        "Data cannot be restored."
+    )
+    raise NotImplementedError(
+        "Migration 005 downgrade not supported - this is a one-way migration"
+    )
--- a/codex-lens/build/lib/codexlens/storage/migrations/migration_006_enhance_relationships.py
+++ b/codex-lens/build/lib/codexlens/storage/migrations/migration_006_enhance_relationships.py
@@ -0,0 +1,37 @@
+"""
+Migration 006: Ensure relationship tables and indexes exist.
+
+This migration is intentionally idempotent. It creates the `code_relationships`
+table (used for graph visualization) and its indexes if missing.
+"""
+
+from __future__ import annotations
+
+import logging
+from sqlite3 import Connection
+
+log = logging.getLogger(__name__)
+
+
+def upgrade(db_conn: Connection) -> None:
+    cursor = db_conn.cursor()
+
+    log.info("Ensuring code_relationships table exists...")
+    cursor.execute(
+        """
+        CREATE TABLE IF NOT EXISTS code_relationships (
+            id INTEGER PRIMARY KEY,
+            source_symbol_id INTEGER NOT NULL REFERENCES symbols (id) ON DELETE CASCADE,
+            target_qualified_name TEXT NOT NULL,
+            relationship_type TEXT NOT NULL,
+            source_line INTEGER NOT NULL,
+            target_file TEXT
+        )
+        """
+    )
+
+    log.info("Ensuring relationship indexes exist...")
+    cursor.execute("CREATE INDEX IF NOT EXISTS idx_rel_source ON code_relationships(source_symbol_id)")
+    cursor.execute("CREATE INDEX IF NOT EXISTS idx_rel_target ON code_relationships(target_qualified_name)")
+    cursor.execute("CREATE INDEX IF NOT EXISTS idx_rel_type ON code_relationships(relationship_type)")
+
--- a/codex-lens/build/lib/codexlens/storage/migrations/migration_007_add_graph_neighbors.py
+++ b/codex-lens/build/lib/codexlens/storage/migrations/migration_007_add_graph_neighbors.py
@@ -0,0 +1,47 @@
+"""
+Migration 007: Add precomputed graph neighbor table for search expansion.
+
+Adds:
+- graph_neighbors: cached N-hop neighbors between symbols (keyed by symbol ids)
+
+This table is derived data (a cache) and is safe to rebuild at any time.
+The migration is intentionally idempotent.
+"""
+
+from __future__ import annotations
+
+import logging
+from sqlite3 import Connection
+
+log = logging.getLogger(__name__)
+
+
+def upgrade(db_conn: Connection) -> None:
+    cursor = db_conn.cursor()
+
+    log.info("Creating graph_neighbors table...")
+    cursor.execute(
+        """
+        CREATE TABLE IF NOT EXISTS graph_neighbors (
+            source_symbol_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE,
+            neighbor_symbol_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE,
+            relationship_depth INTEGER NOT NULL,
+            PRIMARY KEY (source_symbol_id, neighbor_symbol_id)
+        )
+        """
+    )
+
+    log.info("Creating indexes for graph_neighbors...")
+    cursor.execute(
+        """
+        CREATE INDEX IF NOT EXISTS idx_graph_neighbors_source_depth
+        ON graph_neighbors(source_symbol_id, relationship_depth)
+        """
+    )
+    cursor.execute(
+        """
+        CREATE INDEX IF NOT EXISTS idx_graph_neighbors_neighbor
+        ON graph_neighbors(neighbor_symbol_id)
+        """
+    )
+
--- a/codex-lens/build/lib/codexlens/storage/migrations/migration_008_add_merkle_hashes.py
+++ b/codex-lens/build/lib/codexlens/storage/migrations/migration_008_add_merkle_hashes.py
@@ -0,0 +1,81 @@
+"""
+Migration 008: Add Merkle hash tables for content-based incremental indexing.
+
+Adds:
+- merkle_hashes: per-file SHA-256 hashes (keyed by file_id)
+- merkle_state: directory-level root hash (single row, id=1)
+
+Backfills merkle_hashes using the existing `files.content` column when available.
+"""
+
+from __future__ import annotations
+
+import hashlib
+import logging
+import time
+from sqlite3 import Connection
+
+log = logging.getLogger(__name__)
+
+
+def upgrade(db_conn: Connection) -> None:
+    cursor = db_conn.cursor()
+
+    log.info("Creating merkle_hashes table...")
+    cursor.execute(
+        """
+        CREATE TABLE IF NOT EXISTS merkle_hashes (
+            file_id INTEGER PRIMARY KEY REFERENCES files(id) ON DELETE CASCADE,
+            sha256 TEXT NOT NULL,
+            updated_at REAL
+        )
+        """
+    )
+
+    log.info("Creating merkle_state table...")
+    cursor.execute(
+        """
+        CREATE TABLE IF NOT EXISTS merkle_state (
+            id INTEGER PRIMARY KEY CHECK (id = 1),
+            root_hash TEXT,
+            updated_at REAL
+        )
+        """
+    )
+
+    # Backfill file hashes from stored content (best-effort).
+    try:
+        rows = cursor.execute("SELECT id, content FROM files").fetchall()
+    except Exception as exc:
+        log.warning("Unable to backfill merkle hashes (files table missing?): %s", exc)
+        return
+
+    now = time.time()
+    inserts: list[tuple[int, str, float]] = []
+
+    for row in rows:
+        file_id = int(row[0])
+        content = row[1]
+        if content is None:
+            continue
+        try:
+            digest = hashlib.sha256(str(content).encode("utf-8", errors="ignore")).hexdigest()
+            inserts.append((file_id, digest, now))
+        except Exception:
+            continue
+
+    if not inserts:
+        return
+
+    log.info("Backfilling %d file hashes...", len(inserts))
+    cursor.executemany(
+        """
+        INSERT INTO merkle_hashes(file_id, sha256, updated_at)
+        VALUES(?, ?, ?)
+        ON CONFLICT(file_id) DO UPDATE SET
+            sha256=excluded.sha256,
+            updated_at=excluded.updated_at
+        """,
+        inserts,
+    )
+
--- a/codex-lens/build/lib/codexlens/storage/migrations/migration_009_add_splade.py
+++ b/codex-lens/build/lib/codexlens/storage/migrations/migration_009_add_splade.py
@@ -0,0 +1,103 @@
+"""
+Migration 009: Add SPLADE sparse retrieval tables.
+
+This migration introduces SPLADE (Sparse Lexical AnD Expansion) support:
+- splade_metadata: Model configuration (model name, vocab size, ONNX path)
+- splade_posting_list: Inverted index mapping token_id -> (chunk_id, weight)
+
+The SPLADE tables are designed for efficient sparse vector retrieval:
+- Token-based lookup for query expansion
+- Chunk-based deletion for index maintenance
+- Maintains backward compatibility with existing FTS tables
+"""
+
+import logging
+from sqlite3 import Connection
+
+log = logging.getLogger(__name__)
+
+
+def upgrade(db_conn: Connection) -> None:
+    """
+    Adds SPLADE tables for sparse retrieval.
+
+    Creates:
+    - splade_metadata: Stores model configuration and ONNX path
+    - splade_posting_list: Inverted index with token_id -> (chunk_id, weight) mappings
+    - Indexes for efficient token-based and chunk-based lookups
+
+    Args:
+        db_conn: The SQLite database connection.
+    """
+    cursor = db_conn.cursor()
+
+    log.info("Creating splade_metadata table...")
+    cursor.execute(
+        """
+        CREATE TABLE IF NOT EXISTS splade_metadata (
+            id INTEGER PRIMARY KEY DEFAULT 1,
+            model_name TEXT NOT NULL,
+            vocab_size INTEGER NOT NULL,
+            onnx_path TEXT,
+            created_at REAL
+        )
+        """
+    )
+
+    log.info("Creating splade_posting_list table...")
+    cursor.execute(
+        """
+        CREATE TABLE IF NOT EXISTS splade_posting_list (
+            token_id INTEGER NOT NULL,
+            chunk_id INTEGER NOT NULL,
+            weight REAL NOT NULL,
+            PRIMARY KEY (token_id, chunk_id),
+            FOREIGN KEY (chunk_id) REFERENCES semantic_chunks(id) ON DELETE CASCADE
+        )
+        """
+    )
+
+    log.info("Creating indexes for splade_posting_list...")
+    # Index for efficient chunk-based lookups (deletion, updates)
+    cursor.execute(
+        """
+        CREATE INDEX IF NOT EXISTS idx_splade_by_chunk
+        ON splade_posting_list(chunk_id)
+        """
+    )
+
+    # Index for efficient term-based retrieval
+    cursor.execute(
+        """
+        CREATE INDEX IF NOT EXISTS idx_splade_by_token
+        ON splade_posting_list(token_id)
+        """
+    )
+
+    log.info("Migration 009 completed successfully")
+
+
+def downgrade(db_conn: Connection) -> None:
+    """
+    Removes SPLADE tables.
+
+    Drops:
+    - splade_posting_list (and associated indexes)
+    - splade_metadata
+
+    Args:
+        db_conn: The SQLite database connection.
+    """
+    cursor = db_conn.cursor()
+
+    log.info("Dropping SPLADE indexes...")
+    cursor.execute("DROP INDEX IF EXISTS idx_splade_by_chunk")
+    cursor.execute("DROP INDEX IF EXISTS idx_splade_by_token")
+
+    log.info("Dropping splade_posting_list table...")
+    cursor.execute("DROP TABLE IF EXISTS splade_posting_list")
+
+    log.info("Dropping splade_metadata table...")
+    cursor.execute("DROP TABLE IF EXISTS splade_metadata")
+
+    log.info("Migration 009 downgrade completed successfully")
--- a/codex-lens/build/lib/codexlens/storage/migrations/migration_010_add_multi_vector_chunks.py
+++ b/codex-lens/build/lib/codexlens/storage/migrations/migration_010_add_multi_vector_chunks.py
@@ -0,0 +1,162 @@
+"""
+Migration 010: Add multi-vector storage support for cascade retrieval.
+
+This migration introduces the chunks table with multi-vector support:
+- chunks: Stores code chunks with multiple embedding types
+  - embedding: Original embedding for backward compatibility
+  - embedding_binary: 256-dim binary vector for coarse ranking (fast)
+  - embedding_dense: 2048-dim dense vector for fine ranking (precise)
+
+The multi-vector architecture enables cascade retrieval:
+1. First stage: Fast binary vector search for candidate retrieval
+2. Second stage: Dense vector reranking for precision
+"""
+
+import logging
+from sqlite3 import Connection
+
+log = logging.getLogger(__name__)
+
+
+def upgrade(db_conn: Connection) -> None:
+    """
+    Adds chunks table with multi-vector embedding columns.
+
+    Creates:
+    - chunks: Table for storing code chunks with multiple embedding types
+    - idx_chunks_file_path: Index for efficient file-based lookups
+
+    Also migrates existing chunks tables by adding new columns if needed.
+
+    Args:
+        db_conn: The SQLite database connection.
+    """
+    cursor = db_conn.cursor()
+
+    # Check if chunks table already exists
+    table_exists = cursor.execute(
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='chunks'"
+    ).fetchone()
+
+    if table_exists:
+        # Migrate existing table - add new columns if missing
+        log.info("chunks table exists, checking for missing columns...")
+        
+        col_info = cursor.execute("PRAGMA table_info(chunks)").fetchall()
+        existing_columns = {row[1] for row in col_info}
+        
+        if "embedding_binary" not in existing_columns:
+            log.info("Adding embedding_binary column to chunks table...")
+            cursor.execute(
+                "ALTER TABLE chunks ADD COLUMN embedding_binary BLOB"
+            )
+        
+        if "embedding_dense" not in existing_columns:
+            log.info("Adding embedding_dense column to chunks table...")
+            cursor.execute(
+                "ALTER TABLE chunks ADD COLUMN embedding_dense BLOB"
+            )
+    else:
+        # Create new table with all columns
+        log.info("Creating chunks table with multi-vector support...")
+        cursor.execute(
+            """
+            CREATE TABLE chunks (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                file_path TEXT NOT NULL,
+                content TEXT NOT NULL,
+                embedding BLOB,
+                embedding_binary BLOB,
+                embedding_dense BLOB,
+                metadata TEXT,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            )
+            """
+        )
+
+    # Create index for file-based lookups
+    log.info("Creating index for chunks table...")
+    cursor.execute(
+        """
+        CREATE INDEX IF NOT EXISTS idx_chunks_file_path
+        ON chunks(file_path)
+        """
+    )
+
+    log.info("Migration 010 completed successfully")
+
+
+def downgrade(db_conn: Connection) -> None:
+    """
+    Removes multi-vector columns from chunks table.
+
+    Note: This does not drop the chunks table entirely to preserve data.
+    Only the new columns added by this migration are removed.
+
+    Args:
+        db_conn: The SQLite database connection.
+    """
+    cursor = db_conn.cursor()
+
+    log.info("Removing multi-vector columns from chunks table...")
+    
+    # SQLite doesn't support DROP COLUMN directly in older versions
+    # We need to recreate the table without the columns
+    
+    # Check if chunks table exists
+    table_exists = cursor.execute(
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='chunks'"
+    ).fetchone()
+    
+    if not table_exists:
+        log.info("chunks table does not exist, nothing to downgrade")
+        return
+
+    # Check if the columns exist before trying to remove them
+    col_info = cursor.execute("PRAGMA table_info(chunks)").fetchall()
+    existing_columns = {row[1] for row in col_info}
+    
+    needs_migration = (
+        "embedding_binary" in existing_columns or
+        "embedding_dense" in existing_columns
+    )
+    
+    if not needs_migration:
+        log.info("Multi-vector columns not present, nothing to remove")
+        return
+
+    # Recreate table without the new columns
+    log.info("Recreating chunks table without multi-vector columns...")
+    
+    cursor.execute(
+        """
+        CREATE TABLE chunks_backup (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            file_path TEXT NOT NULL,
+            content TEXT NOT NULL,
+            embedding BLOB,
+            metadata TEXT,
+            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+        )
+        """
+    )
+    
+    cursor.execute(
+        """
+        INSERT INTO chunks_backup (id, file_path, content, embedding, metadata, created_at)
+        SELECT id, file_path, content, embedding, metadata, created_at FROM chunks
+        """
+    )
+    
+    cursor.execute("DROP TABLE chunks")
+    cursor.execute("ALTER TABLE chunks_backup RENAME TO chunks")
+    
+    # Recreate index
+    cursor.execute(
+        """
+        CREATE INDEX IF NOT EXISTS idx_chunks_file_path
+        ON chunks(file_path)
+        """
+    )
+
+    log.info("Migration 010 downgrade completed successfully")
				`@@ -0,0 +1 @@`
				`# This file makes the 'migrations' directory a Python package.`