Refactor code structure and remove redundant changes

2026-02-13 02:41:50 +08:00 · 2026-01-24 14:47:47 +08:00
parent cf5fecd66d
commit f2b0a5bbc9
113 changed files with 43217 additions and 235 deletions
--- a/codex-lens/build/lib/codexlens/storage/init.py
+++ b/codex-lens/build/lib/codexlens/storage/init.py
@@ -0,0 +1,32 @@
+"""Storage backends for CodexLens."""
+
+from __future__ import annotations
+
+from .sqlite_store import SQLiteStore
+from .path_mapper import PathMapper
+from .registry import RegistryStore, ProjectInfo, DirMapping
+from .dir_index import DirIndexStore, SubdirLink, FileEntry
+from .index_tree import IndexTreeBuilder, BuildResult, DirBuildResult
+from .vector_meta_store import VectorMetadataStore
+
+__all__ = [
+    # Legacy (workspace-local)
+    "SQLiteStore",
+    # Path mapping
+    "PathMapper",
+    # Global registry
+    "RegistryStore",
+    "ProjectInfo",
+    "DirMapping",
+    # Directory index
+    "DirIndexStore",
+    "SubdirLink",
+    "FileEntry",
+    # Tree builder
+    "IndexTreeBuilder",
+    "BuildResult",
+    "DirBuildResult",
+    # Vector metadata
+    "VectorMetadataStore",
+]
+
--- a/codex-lens/build/lib/codexlens/storage/dir_index.py
+++ b/codex-lens/build/lib/codexlens/storage/dir_index.py
--- a/codex-lens/build/lib/codexlens/storage/file_cache.py
+++ b/codex-lens/build/lib/codexlens/storage/file_cache.py
@@ -0,0 +1,32 @@
+"""Simple filesystem cache helpers."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional
+
+
+@dataclass
+class FileCache:
+    """Caches file mtimes for incremental indexing."""
+
+    cache_path: Path
+
+    def load_mtime(self, path: Path) -> Optional[float]:
+        try:
+            key = self._key_for(path)
+            record = (self.cache_path / key).read_text(encoding="utf-8")
+            return float(record)
+        except Exception:
+            return None
+
+    def store_mtime(self, path: Path, mtime: float) -> None:
+        self.cache_path.mkdir(parents=True, exist_ok=True)
+        key = self._key_for(path)
+        (self.cache_path / key).write_text(str(mtime), encoding="utf-8")
+
+    def _key_for(self, path: Path) -> str:
+        safe = str(path).replace(":", "_").replace("\\", "_").replace("/", "_")
+        return f"{safe}.mtime"
+
--- a/codex-lens/build/lib/codexlens/storage/global_index.py
+++ b/codex-lens/build/lib/codexlens/storage/global_index.py
@@ -0,0 +1,398 @@
+"""Global cross-directory symbol index for fast lookups.
+
+Stores symbols for an entire project in a single SQLite database so symbol search
+does not require traversing every directory _index.db.
+
+This index is updated incrementally during file indexing (delete+insert per file)
+to avoid expensive batch rebuilds.
+"""
+
+from __future__ import annotations
+
+import logging
+import sqlite3
+import threading
+from pathlib import Path
+from typing import List, Optional, Tuple
+
+from codexlens.entities import Symbol
+from codexlens.errors import StorageError
+
+
+class GlobalSymbolIndex:
+    """Project-wide symbol index with incremental updates."""
+
+    SCHEMA_VERSION = 1
+    DEFAULT_DB_NAME = "_global_symbols.db"
+
+    def __init__(self, db_path: str | Path, project_id: int) -> None:
+        self.db_path = Path(db_path).resolve()
+        self.project_id = int(project_id)
+        self._lock = threading.RLock()
+        self._conn: Optional[sqlite3.Connection] = None
+        self.logger = logging.getLogger(__name__)
+
+    def initialize(self) -> None:
+        """Create database and schema if not exists."""
+        with self._lock:
+            self.db_path.parent.mkdir(parents=True, exist_ok=True)
+            conn = self._get_connection()
+
+            current_version = self._get_schema_version(conn)
+            if current_version > self.SCHEMA_VERSION:
+                raise StorageError(
+                    f"Database schema version {current_version} is newer than "
+                    f"supported version {self.SCHEMA_VERSION}. "
+                    f"Please update the application or use a compatible database.",
+                    db_path=str(self.db_path),
+                    operation="initialize",
+                    details={
+                        "current_version": current_version,
+                        "supported_version": self.SCHEMA_VERSION,
+                    },
+                )
+
+            if current_version == 0:
+                self._create_schema(conn)
+                self._set_schema_version(conn, self.SCHEMA_VERSION)
+            elif current_version < self.SCHEMA_VERSION:
+                self._apply_migrations(conn, current_version)
+                self._set_schema_version(conn, self.SCHEMA_VERSION)
+
+            conn.commit()
+
+    def close(self) -> None:
+        """Close database connection."""
+        with self._lock:
+            if self._conn is not None:
+                try:
+                    self._conn.close()
+                except Exception:
+                    pass
+                finally:
+                    self._conn = None
+
+    def __enter__(self) -> "GlobalSymbolIndex":
+        self.initialize()
+        return self
+
+    def __exit__(self, exc_type: object, exc: object, tb: object) -> None:
+        self.close()
+
+    def add_symbol(self, symbol: Symbol, file_path: str | Path, index_path: str | Path) -> None:
+        """Insert a single symbol (idempotent) for incremental updates."""
+        file_path_str = str(Path(file_path).resolve())
+        index_path_str = str(Path(index_path).resolve())
+
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                conn.execute(
+                    """
+                    INSERT INTO global_symbols(
+                        project_id, symbol_name, symbol_kind,
+                        file_path, start_line, end_line, index_path
+                    )
+                    VALUES(?, ?, ?, ?, ?, ?, ?)
+                    ON CONFLICT(
+                        project_id, symbol_name, symbol_kind,
+                        file_path, start_line, end_line
+                    )
+                    DO UPDATE SET
+                        index_path=excluded.index_path
+                    """,
+                    (
+                        self.project_id,
+                        symbol.name,
+                        symbol.kind,
+                        file_path_str,
+                        symbol.range[0],
+                        symbol.range[1],
+                        index_path_str,
+                    ),
+                )
+                conn.commit()
+            except sqlite3.DatabaseError as exc:
+                conn.rollback()
+                raise StorageError(
+                    f"Failed to add symbol {symbol.name}: {exc}",
+                    db_path=str(self.db_path),
+                    operation="add_symbol",
+                ) from exc
+
+    def update_file_symbols(
+        self,
+        file_path: str | Path,
+        symbols: List[Symbol],
+        index_path: str | Path | None = None,
+    ) -> None:
+        """Replace all symbols for a file atomically (delete + insert)."""
+        file_path_str = str(Path(file_path).resolve())
+
+        index_path_str: Optional[str]
+        if index_path is not None:
+            index_path_str = str(Path(index_path).resolve())
+        else:
+            index_path_str = self._get_existing_index_path(file_path_str)
+
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                conn.execute("BEGIN")
+                conn.execute(
+                    "DELETE FROM global_symbols WHERE project_id=? AND file_path=?",
+                    (self.project_id, file_path_str),
+                )
+
+                if symbols:
+                    if not index_path_str:
+                        raise StorageError(
+                            "index_path is required when inserting symbols for a new file",
+                            db_path=str(self.db_path),
+                            operation="update_file_symbols",
+                            details={"file_path": file_path_str},
+                        )
+
+                    rows = [
+                        (
+                            self.project_id,
+                            s.name,
+                            s.kind,
+                            file_path_str,
+                            s.range[0],
+                            s.range[1],
+                            index_path_str,
+                        )
+                        for s in symbols
+                    ]
+                    conn.executemany(
+                        """
+                        INSERT INTO global_symbols(
+                            project_id, symbol_name, symbol_kind,
+                            file_path, start_line, end_line, index_path
+                        )
+                        VALUES(?, ?, ?, ?, ?, ?, ?)
+                        ON CONFLICT(
+                            project_id, symbol_name, symbol_kind,
+                            file_path, start_line, end_line
+                        )
+                        DO UPDATE SET
+                            index_path=excluded.index_path
+                        """,
+                        rows,
+                    )
+
+                conn.commit()
+            except sqlite3.DatabaseError as exc:
+                conn.rollback()
+                raise StorageError(
+                    f"Failed to update symbols for {file_path_str}: {exc}",
+                    db_path=str(self.db_path),
+                    operation="update_file_symbols",
+                ) from exc
+
+    def delete_file_symbols(self, file_path: str | Path) -> int:
+        """Remove all symbols for a file. Returns number of rows deleted."""
+        file_path_str = str(Path(file_path).resolve())
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                cur = conn.execute(
+                    "DELETE FROM global_symbols WHERE project_id=? AND file_path=?",
+                    (self.project_id, file_path_str),
+                )
+                conn.commit()
+                return int(cur.rowcount or 0)
+            except sqlite3.DatabaseError as exc:
+                conn.rollback()
+                raise StorageError(
+                    f"Failed to delete symbols for {file_path_str}: {exc}",
+                    db_path=str(self.db_path),
+                    operation="delete_file_symbols",
+                ) from exc
+
+    def search(
+        self,
+        name: str,
+        kind: Optional[str] = None,
+        limit: int = 50,
+        prefix_mode: bool = True,
+    ) -> List[Symbol]:
+        """Search symbols and return full Symbol objects."""
+        if prefix_mode:
+            pattern = f"{name}%"
+        else:
+            pattern = f"%{name}%"
+
+        with self._lock:
+            conn = self._get_connection()
+            if kind:
+                rows = conn.execute(
+                    """
+                    SELECT symbol_name, symbol_kind, file_path, start_line, end_line
+                    FROM global_symbols
+                    WHERE project_id=? AND symbol_name LIKE ? AND symbol_kind=?
+                    ORDER BY symbol_name
+                    LIMIT ?
+                    """,
+                    (self.project_id, pattern, kind, limit),
+                ).fetchall()
+            else:
+                rows = conn.execute(
+                    """
+                    SELECT symbol_name, symbol_kind, file_path, start_line, end_line
+                    FROM global_symbols
+                    WHERE project_id=? AND symbol_name LIKE ?
+                    ORDER BY symbol_name
+                    LIMIT ?
+                    """,
+                    (self.project_id, pattern, limit),
+                ).fetchall()
+
+            return [
+                Symbol(
+                    name=row["symbol_name"],
+                    kind=row["symbol_kind"],
+                    range=(row["start_line"], row["end_line"]),
+                    file=row["file_path"],
+                )
+                for row in rows
+            ]
+
+    def search_symbols(
+        self,
+        name: str,
+        kind: Optional[str] = None,
+        limit: int = 50,
+        prefix_mode: bool = True,
+    ) -> List[Tuple[str, Tuple[int, int]]]:
+        """Search symbols and return only (file_path, (start_line, end_line))."""
+        symbols = self.search(name=name, kind=kind, limit=limit, prefix_mode=prefix_mode)
+        return [(s.file or "", s.range) for s in symbols]
+
+    def get_file_symbols(self, file_path: str | Path) -> List[Symbol]:
+        """Get all symbols in a specific file, sorted by start_line.
+
+        Args:
+            file_path: Full path to the file
+
+        Returns:
+            List of Symbol objects sorted by start_line
+        """
+        file_path_str = str(Path(file_path).resolve())
+
+        with self._lock:
+            conn = self._get_connection()
+            rows = conn.execute(
+                """
+                SELECT symbol_name, symbol_kind, file_path, start_line, end_line
+                FROM global_symbols
+                WHERE project_id=? AND file_path=?
+                ORDER BY start_line
+                """,
+                (self.project_id, file_path_str),
+            ).fetchall()
+
+            return [
+                Symbol(
+                    name=row["symbol_name"],
+                    kind=row["symbol_kind"],
+                    range=(row["start_line"], row["end_line"]),
+                    file=row["file_path"],
+                )
+                for row in rows
+            ]
+
+    def _get_existing_index_path(self, file_path_str: str) -> Optional[str]:
+        with self._lock:
+            conn = self._get_connection()
+            row = conn.execute(
+                """
+                SELECT index_path
+                FROM global_symbols
+                WHERE project_id=? AND file_path=?
+                LIMIT 1
+                """,
+                (self.project_id, file_path_str),
+            ).fetchone()
+            return str(row["index_path"]) if row else None
+
+    def _get_schema_version(self, conn: sqlite3.Connection) -> int:
+        try:
+            row = conn.execute("PRAGMA user_version").fetchone()
+            return int(row[0]) if row else 0
+        except Exception:
+            return 0
+
+    def _set_schema_version(self, conn: sqlite3.Connection, version: int) -> None:
+        conn.execute(f"PRAGMA user_version = {int(version)}")
+
+    def _apply_migrations(self, conn: sqlite3.Connection, from_version: int) -> None:
+        # No migrations yet (v1).
+        _ = (conn, from_version)
+        return
+
+    def _get_connection(self) -> sqlite3.Connection:
+        if self._conn is None:
+            self._conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
+            self._conn.row_factory = sqlite3.Row
+            self._conn.execute("PRAGMA journal_mode=WAL")
+            self._conn.execute("PRAGMA synchronous=NORMAL")
+            self._conn.execute("PRAGMA foreign_keys=ON")
+            self._conn.execute("PRAGMA mmap_size=30000000000")
+        return self._conn
+
+    def _create_schema(self, conn: sqlite3.Connection) -> None:
+        try:
+            conn.execute(
+                """
+                CREATE TABLE IF NOT EXISTS global_symbols (
+                    id INTEGER PRIMARY KEY,
+                    project_id INTEGER NOT NULL,
+                    symbol_name TEXT NOT NULL,
+                    symbol_kind TEXT NOT NULL,
+                    file_path TEXT NOT NULL,
+                    start_line INTEGER,
+                    end_line INTEGER,
+                    index_path TEXT NOT NULL,
+                    UNIQUE(
+                        project_id, symbol_name, symbol_kind,
+                        file_path, start_line, end_line
+                    )
+                )
+                """
+            )
+
+            # Required by optimization spec.
+            conn.execute(
+                """
+                CREATE INDEX IF NOT EXISTS idx_global_symbols_name_kind
+                ON global_symbols(symbol_name, symbol_kind)
+                """
+            )
+            # Used by common queries (project-scoped name lookups).
+            conn.execute(
+                """
+                CREATE INDEX IF NOT EXISTS idx_global_symbols_project_name_kind
+                ON global_symbols(project_id, symbol_name, symbol_kind)
+                """
+            )
+            conn.execute(
+                """
+                CREATE INDEX IF NOT EXISTS idx_global_symbols_project_file
+                ON global_symbols(project_id, file_path)
+                """
+            )
+            conn.execute(
+                """
+                CREATE INDEX IF NOT EXISTS idx_global_symbols_project_index_path
+                ON global_symbols(project_id, index_path)
+                """
+            )
+        except sqlite3.DatabaseError as exc:
+            raise StorageError(
+                f"Failed to initialize global symbol schema: {exc}",
+                db_path=str(self.db_path),
+                operation="_create_schema",
+            ) from exc
+
--- a/codex-lens/build/lib/codexlens/storage/index_tree.py
+++ b/codex-lens/build/lib/codexlens/storage/index_tree.py
--- a/codex-lens/build/lib/codexlens/storage/merkle_tree.py
+++ b/codex-lens/build/lib/codexlens/storage/merkle_tree.py
@@ -0,0 +1,136 @@
+"""Merkle tree utilities for change detection.
+
+This module provides a generic, file-system based Merkle tree implementation
+that can be used to efficiently diff directory states.
+"""
+
+from __future__ import annotations
+
+import hashlib
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Dict, Iterable, List, Optional
+
+
+def sha256_bytes(data: bytes) -> str:
+    return hashlib.sha256(data).hexdigest()
+
+
+def sha256_text(text: str) -> str:
+    return sha256_bytes(text.encode("utf-8", errors="ignore"))
+
+
+@dataclass
+class MerkleNode:
+    """A Merkle node representing either a file (leaf) or directory (internal)."""
+
+    name: str
+    rel_path: str
+    hash: str
+    is_dir: bool
+    children: Dict[str, "MerkleNode"] = field(default_factory=dict)
+
+    def iter_files(self) -> Iterable["MerkleNode"]:
+        if not self.is_dir:
+            yield self
+            return
+        for child in self.children.values():
+            yield from child.iter_files()
+
+
+@dataclass
+class MerkleTree:
+    """Merkle tree for a directory snapshot."""
+
+    root: MerkleNode
+
+    @classmethod
+    def build_from_directory(cls, root_dir: Path) -> "MerkleTree":
+        root_dir = Path(root_dir).resolve()
+        node = cls._build_node(root_dir, base=root_dir)
+        return cls(root=node)
+
+    @classmethod
+    def _build_node(cls, path: Path, *, base: Path) -> MerkleNode:
+        if path.is_file():
+            rel = str(path.relative_to(base)).replace("\\", "/")
+            return MerkleNode(
+                name=path.name,
+                rel_path=rel,
+                hash=sha256_bytes(path.read_bytes()),
+                is_dir=False,
+            )
+
+        if not path.is_dir():
+            rel = str(path.relative_to(base)).replace("\\", "/")
+            return MerkleNode(name=path.name, rel_path=rel, hash="", is_dir=False)
+
+        children: Dict[str, MerkleNode] = {}
+        for child in sorted(path.iterdir(), key=lambda p: p.name):
+            child_node = cls._build_node(child, base=base)
+            children[child_node.name] = child_node
+
+        items = [
+            f"{'d' if n.is_dir else 'f'}:{name}:{n.hash}"
+            for name, n in sorted(children.items(), key=lambda kv: kv[0])
+        ]
+        dir_hash = sha256_text("\n".join(items))
+
+        rel_path = "." if path == base else str(path.relative_to(base)).replace("\\", "/")
+        return MerkleNode(
+            name="." if path == base else path.name,
+            rel_path=rel_path,
+            hash=dir_hash,
+            is_dir=True,
+            children=children,
+        )
+
+    @staticmethod
+    def find_changed_files(old: Optional["MerkleTree"], new: Optional["MerkleTree"]) -> List[str]:
+        """Find changed/added/removed files between two trees.
+
+        Returns:
+            List of relative file paths (POSIX-style separators).
+        """
+        if old is None and new is None:
+            return []
+        if old is None:
+            return sorted({n.rel_path for n in new.root.iter_files()})  # type: ignore[union-attr]
+        if new is None:
+            return sorted({n.rel_path for n in old.root.iter_files()})
+
+        changed: set[str] = set()
+
+        def walk(old_node: Optional[MerkleNode], new_node: Optional[MerkleNode]) -> None:
+            if old_node is None and new_node is None:
+                return
+
+            if old_node is None and new_node is not None:
+                changed.update(n.rel_path for n in new_node.iter_files())
+                return
+
+            if new_node is None and old_node is not None:
+                changed.update(n.rel_path for n in old_node.iter_files())
+                return
+
+            assert old_node is not None and new_node is not None
+
+            if old_node.hash == new_node.hash:
+                return
+
+            if not old_node.is_dir and not new_node.is_dir:
+                changed.add(new_node.rel_path)
+                return
+
+            if old_node.is_dir != new_node.is_dir:
+                changed.update(n.rel_path for n in old_node.iter_files())
+                changed.update(n.rel_path for n in new_node.iter_files())
+                return
+
+            names = set(old_node.children.keys()) | set(new_node.children.keys())
+            for name in names:
+                walk(old_node.children.get(name), new_node.children.get(name))
+
+        walk(old.root, new.root)
+        return sorted(changed)
+
--- a/codex-lens/build/lib/codexlens/storage/migration_manager.py
+++ b/codex-lens/build/lib/codexlens/storage/migration_manager.py
@@ -0,0 +1,154 @@
+"""
+Manages database schema migrations.
+
+This module provides a framework for applying versioned migrations to the SQLite
+database. Migrations are discovered from the `codexlens.storage.migrations`
+package and applied sequentially. The database schema version is tracked using
+the `user_version` pragma.
+"""
+
+import importlib
+import logging
+import pkgutil
+from pathlib import Path
+from sqlite3 import Connection
+from typing import List, NamedTuple
+
+log = logging.getLogger(__name__)
+
+
+class Migration(NamedTuple):
+    """Represents a single database migration."""
+
+    version: int
+    name: str
+    upgrade: callable
+
+
+def discover_migrations() -> List[Migration]:
+    """
+    Discovers and returns a sorted list of database migrations.
+
+    Migrations are expected to be in the `codexlens.storage.migrations` package,
+    with filenames in the format `migration_XXX_description.py`, where XXX is
+    the version number. Each migration module must contain an `upgrade` function
+    that takes a `sqlite3.Connection` object as its argument.
+
+    Returns:
+        A list of Migration objects, sorted by version.
+    """
+    import codexlens.storage.migrations
+
+    migrations = []
+    package_path = Path(codexlens.storage.migrations.__file__).parent
+    
+    for _, name, _ in pkgutil.iter_modules([str(package_path)]):
+        if name.startswith("migration_"):
+            try:
+                version = int(name.split("_")[1])
+                module = importlib.import_module(f"codexlens.storage.migrations.{name}")
+                if hasattr(module, "upgrade"):
+                    migrations.append(
+                        Migration(version=version, name=name, upgrade=module.upgrade)
+                    )
+                else:
+                    log.warning(f"Migration {name} is missing 'upgrade' function.")
+            except (ValueError, IndexError) as e:
+                log.warning(f"Could not parse migration name {name}: {e}")
+            except ImportError as e:
+                log.warning(f"Could not import migration {name}: {e}")
+
+    migrations.sort(key=lambda m: m.version)
+    return migrations
+
+
+class MigrationManager:
+    """
+    Manages the application of migrations to a database.
+    """
+
+    def __init__(self, db_conn: Connection):
+        """
+        Initializes the MigrationManager.
+
+        Args:
+            db_conn: The SQLite database connection.
+        """
+        self.db_conn = db_conn
+        self.migrations = discover_migrations()
+
+    def get_current_version(self) -> int:
+        """
+        Gets the current version of the database schema.
+
+        Returns:
+            The current schema version number.
+        """
+        return self.db_conn.execute("PRAGMA user_version").fetchone()[0]
+
+    def set_version(self, version: int):
+        """
+        Sets the database schema version.
+
+        Args:
+            version: The version number to set.
+        """
+        self.db_conn.execute(f"PRAGMA user_version = {version}")
+        log.info(f"Database schema version set to {version}")
+
+    def apply_migrations(self):
+        """
+        Applies all pending migrations to the database.
+
+        This method checks the current database version and applies all
+        subsequent migrations in order. Each migration is applied within
+        a transaction, unless the migration manages its own transactions.
+        """
+        current_version = self.get_current_version()
+        log.info(f"Current database schema version: {current_version}")
+
+        for migration in self.migrations:
+            if migration.version > current_version:
+                log.info(f"Applying migration {migration.version}: {migration.name}...")
+                try:
+                    # Check if a transaction is already in progress
+                    in_transaction = self.db_conn.in_transaction
+
+                    # Only start transaction if not already in one
+                    if not in_transaction:
+                        self.db_conn.execute("BEGIN")
+
+                    migration.upgrade(self.db_conn)
+                    self.set_version(migration.version)
+
+                    # Only commit if we started the transaction and it's still active
+                    if not in_transaction and self.db_conn.in_transaction:
+                        self.db_conn.execute("COMMIT")
+
+                    log.info(
+                        f"Successfully applied migration {migration.version}: {migration.name}"
+                    )
+                except Exception as e:
+                    log.error(
+                        f"Failed to apply migration {migration.version}: {migration.name}. Error: {e}",
+                        exc_info=True,
+                    )
+                    # Try to rollback if transaction is active
+                    try:
+                        if self.db_conn.in_transaction:
+                            self.db_conn.execute("ROLLBACK")
+                    except Exception:
+                        pass  # Ignore rollback errors
+                    raise
+
+        latest_migration_version = self.migrations[-1].version if self.migrations else 0
+        if current_version < latest_migration_version:
+            # This case can be hit if migrations were applied but the loop was exited
+            # and set_version was not called for the last one for some reason.
+            # To be safe, we explicitly set the version to the latest known migration.
+            final_version = self.get_current_version()
+            if final_version != latest_migration_version:
+                 log.warning(f"Database version ({final_version}) is not the latest migration version ({latest_migration_version}). This may indicate a problem.")
+
+        log.info("All pending migrations applied successfully.")
+
--- a/codex-lens/build/lib/codexlens/storage/migrations/init.py
+++ b/codex-lens/build/lib/codexlens/storage/migrations/init.py
@@ -0,0 +1 @@
+# This file makes the 'migrations' directory a Python package.
--- a/codex-lens/build/lib/codexlens/storage/migrations/migration_001_normalize_keywords.py
+++ b/codex-lens/build/lib/codexlens/storage/migrations/migration_001_normalize_keywords.py
@@ -0,0 +1,123 @@
+"""
+Migration 001: Normalize keywords into separate tables.
+
+This migration introduces two new tables, `keywords` and `file_keywords`, to
+store semantic keywords in a normalized fashion. It then migrates the existing
+keywords from the `semantic_data` JSON blob in the `files` table into these
+new tables. This is intended to speed up keyword-based searches significantly.
+"""
+
+import json
+import logging
+from sqlite3 import Connection
+
+log = logging.getLogger(__name__)
+
+
+def upgrade(db_conn: Connection):
+    """
+    Applies the migration to normalize keywords.
+
+    - Creates `keywords` and `file_keywords` tables.
+    - Creates indexes for efficient querying.
+    - Migrates data from `files.semantic_data` to the new tables.
+
+    Args:
+        db_conn: The SQLite database connection.
+    """
+    cursor = db_conn.cursor()
+
+    log.info("Creating 'keywords' and 'file_keywords' tables...")
+    # Create a table to store unique keywords
+    cursor.execute(
+        """
+        CREATE TABLE IF NOT EXISTS keywords (
+            id INTEGER PRIMARY KEY,
+            keyword TEXT NOT NULL UNIQUE
+        )
+        """
+    )
+
+    # Create a join table to link files and keywords (many-to-many)
+    cursor.execute(
+        """
+        CREATE TABLE IF NOT EXISTS file_keywords (
+            file_id INTEGER NOT NULL,
+            keyword_id INTEGER NOT NULL,
+            PRIMARY KEY (file_id, keyword_id),
+            FOREIGN KEY (file_id) REFERENCES files (id) ON DELETE CASCADE,
+            FOREIGN KEY (keyword_id) REFERENCES keywords (id) ON DELETE CASCADE
+        )
+        """
+    )
+    
+    log.info("Creating indexes for new keyword tables...")
+    cursor.execute("CREATE INDEX IF NOT EXISTS idx_keywords_keyword ON keywords (keyword)")
+    cursor.execute("CREATE INDEX IF NOT EXISTS idx_file_keywords_file_id ON file_keywords (file_id)")
+    cursor.execute("CREATE INDEX IF NOT EXISTS idx_file_keywords_keyword_id ON file_keywords (keyword_id)")
+
+    log.info("Migrating existing keywords from 'semantic_metadata' table...")
+
+    # Check if semantic_metadata table exists before querying
+    cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='semantic_metadata'")
+    if not cursor.fetchone():
+        log.info("No 'semantic_metadata' table found, skipping data migration.")
+        return
+
+    # Check if 'keywords' column exists in semantic_metadata table
+    # (current schema may already use normalized tables without this column)
+    cursor.execute("PRAGMA table_info(semantic_metadata)")
+    columns = {row[1] for row in cursor.fetchall()}
+    if "keywords" not in columns:
+        log.info("No 'keywords' column in semantic_metadata table, skipping data migration.")
+        return
+
+    cursor.execute("SELECT file_id, keywords FROM semantic_metadata WHERE keywords IS NOT NULL AND keywords != ''")
+
+    files_to_migrate = cursor.fetchall()
+    if not files_to_migrate:
+        log.info("No existing files with semantic metadata to migrate.")
+        return
+
+    log.info(f"Found {len(files_to_migrate)} files with semantic metadata to migrate.")
+
+    for file_id, keywords_json in files_to_migrate:
+        if not keywords_json:
+            continue
+        try:
+            keywords = json.loads(keywords_json)
+
+            if not isinstance(keywords, list):
+                log.warning(f"Keywords for file_id {file_id} is not a list, skipping.")
+                continue
+
+            for keyword in keywords:
+                if not isinstance(keyword, str):
+                    log.warning(f"Non-string keyword '{keyword}' found for file_id {file_id}, skipping.")
+                    continue
+
+                keyword = keyword.strip()
+                if not keyword:
+                    continue
+
+                # Get or create keyword_id
+                cursor.execute("INSERT OR IGNORE INTO keywords (keyword) VALUES (?)", (keyword,))
+                cursor.execute("SELECT id FROM keywords WHERE keyword = ?", (keyword,))
+                keyword_id_result = cursor.fetchone()
+
+                if keyword_id_result:
+                    keyword_id = keyword_id_result[0]
+                    # Link file to keyword
+                    cursor.execute(
+                        "INSERT OR IGNORE INTO file_keywords (file_id, keyword_id) VALUES (?, ?)",
+                        (file_id, keyword_id),
+                    )
+                else:
+                    log.error(f"Failed to retrieve or create keyword_id for keyword: {keyword}")
+
+        except json.JSONDecodeError as e:
+            log.warning(f"Could not parse keywords for file_id {file_id}: {e}")
+        except Exception as e:
+            log.error(f"An unexpected error occurred during migration for file_id {file_id}: {e}", exc_info=True)
+
+    log.info("Finished migrating keywords.")
--- a/codex-lens/build/lib/codexlens/storage/migrations/migration_002_add_token_metadata.py
+++ b/codex-lens/build/lib/codexlens/storage/migrations/migration_002_add_token_metadata.py
@@ -0,0 +1,48 @@
+"""
+Migration 002: Add token_count and symbol_type to symbols table.
+
+This migration adds token counting metadata to symbols for accurate chunk
+splitting and performance optimization. It also adds symbol_type for better
+filtering in searches.
+"""
+
+import logging
+from sqlite3 import Connection
+
+log = logging.getLogger(__name__)
+
+
+def upgrade(db_conn: Connection):
+    """
+    Applies the migration to add token metadata to symbols.
+
+    - Adds token_count column to symbols table
+    - Adds symbol_type column to symbols table (for future use)
+    - Creates index on symbol_type for efficient filtering
+    - Backfills existing symbols with NULL token_count (to be calculated lazily)
+
+    Args:
+        db_conn: The SQLite database connection.
+    """
+    cursor = db_conn.cursor()
+
+    log.info("Adding token_count column to symbols table...")
+    try:
+        cursor.execute("ALTER TABLE symbols ADD COLUMN token_count INTEGER")
+        log.info("Successfully added token_count column.")
+    except Exception as e:
+        # Column might already exist
+        log.warning(f"Could not add token_count column (might already exist): {e}")
+
+    log.info("Adding symbol_type column to symbols table...")
+    try:
+        cursor.execute("ALTER TABLE symbols ADD COLUMN symbol_type TEXT")
+        log.info("Successfully added symbol_type column.")
+    except Exception as e:
+        # Column might already exist
+        log.warning(f"Could not add symbol_type column (might already exist): {e}")
+
+    log.info("Creating index on symbol_type for efficient filtering...")
+    cursor.execute("CREATE INDEX IF NOT EXISTS idx_symbols_type ON symbols(symbol_type)")
+
+    log.info("Migration 002 completed successfully.")
--- a/codex-lens/build/lib/codexlens/storage/migrations/migration_004_dual_fts.py
+++ b/codex-lens/build/lib/codexlens/storage/migrations/migration_004_dual_fts.py
@@ -0,0 +1,232 @@
+"""
+Migration 004: Add dual FTS tables for exact and fuzzy matching.
+
+This migration introduces two FTS5 tables:
+- files_fts_exact: Uses unicode61 tokenizer for exact token matching
+- files_fts_fuzzy: Uses trigram tokenizer (or extended unicode61) for substring/fuzzy matching
+
+Both tables are synchronized with the files table via triggers for automatic updates.
+"""
+
+import logging
+from sqlite3 import Connection
+
+from codexlens.storage.sqlite_utils import check_trigram_support, get_sqlite_version
+
+log = logging.getLogger(__name__)
+
+
+def upgrade(db_conn: Connection):
+    """
+    Applies the migration to add dual FTS tables.
+
+    - Drops old files_fts table and triggers
+    - Creates files_fts_exact with unicode61 tokenizer
+    - Creates files_fts_fuzzy with trigram or extended unicode61 tokenizer
+    - Creates synchronized triggers for both tables
+    - Rebuilds FTS indexes from files table
+
+    Args:
+        db_conn: The SQLite database connection.
+    """
+    cursor = db_conn.cursor()
+
+    try:
+        # Check trigram support
+        has_trigram = check_trigram_support(db_conn)
+        version = get_sqlite_version(db_conn)
+        log.info(f"SQLite version: {'.'.join(map(str, version))}")
+
+        if has_trigram:
+            log.info("Trigram tokenizer available, using for fuzzy FTS table")
+            fuzzy_tokenizer = "trigram"
+        else:
+            log.warning(
+                f"Trigram tokenizer not available (requires SQLite >= 3.34), "
+                f"using extended unicode61 tokenizer for fuzzy matching"
+            )
+            fuzzy_tokenizer = "unicode61 tokenchars '_-.'"
+
+        # Start transaction
+        cursor.execute("BEGIN TRANSACTION")
+
+        # Check if files table has 'name' column (v2 schema doesn't have it)
+        cursor.execute("PRAGMA table_info(files)")
+        columns = {row[1] for row in cursor.fetchall()}
+        
+        if 'name' not in columns:
+            log.info("Adding 'name' column to files table (v2 schema upgrade)...")
+            # Add name column
+            cursor.execute("ALTER TABLE files ADD COLUMN name TEXT")
+            # Populate name from path (extract filename from last '/')
+            # Use Python to do the extraction since SQLite doesn't have reverse()
+            cursor.execute("SELECT rowid, path FROM files")
+            rows = cursor.fetchall()
+            for rowid, path in rows:
+                # Extract filename from path
+                name = path.split('/')[-1] if '/' in path else path
+                cursor.execute("UPDATE files SET name = ? WHERE rowid = ?", (name, rowid))
+            
+        # Rename 'path' column to 'full_path' if needed
+        if 'path' in columns and 'full_path' not in columns:
+            log.info("Renaming 'path' to 'full_path' (v2 schema upgrade)...")
+            # Check if indexed_at column exists in v2 schema
+            has_indexed_at = 'indexed_at' in columns
+            has_mtime = 'mtime' in columns
+            
+            # SQLite doesn't support RENAME COLUMN before 3.25, so use table recreation
+            cursor.execute("""
+                CREATE TABLE files_new (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    name TEXT NOT NULL,
+                    full_path TEXT NOT NULL UNIQUE,
+                    content TEXT,
+                    language TEXT,
+                    mtime REAL,
+                    indexed_at TEXT
+                )
+            """)
+            
+            # Build INSERT statement based on available columns
+            # Note: v2 schema has no rowid (path is PRIMARY KEY), so use NULL for AUTOINCREMENT
+            if has_indexed_at and has_mtime:
+                cursor.execute("""
+                    INSERT INTO files_new (name, full_path, content, language, mtime, indexed_at)
+                    SELECT name, path, content, language, mtime, indexed_at FROM files
+                """)
+            elif has_indexed_at:
+                cursor.execute("""
+                    INSERT INTO files_new (name, full_path, content, language, indexed_at)
+                    SELECT name, path, content, language, indexed_at FROM files
+                """)
+            elif has_mtime:
+                cursor.execute("""
+                    INSERT INTO files_new (name, full_path, content, language, mtime)
+                    SELECT name, path, content, language, mtime FROM files
+                """)
+            else:
+                cursor.execute("""
+                    INSERT INTO files_new (name, full_path, content, language)
+                    SELECT name, path, content, language FROM files
+                """)
+            
+            cursor.execute("DROP TABLE files")
+            cursor.execute("ALTER TABLE files_new RENAME TO files")
+
+        log.info("Dropping old FTS triggers and table...")
+        # Drop old triggers
+        cursor.execute("DROP TRIGGER IF EXISTS files_ai")
+        cursor.execute("DROP TRIGGER IF EXISTS files_ad")
+        cursor.execute("DROP TRIGGER IF EXISTS files_au")
+
+        # Drop old FTS table
+        cursor.execute("DROP TABLE IF EXISTS files_fts")
+
+        # Create exact FTS table (unicode61 with underscores/hyphens/dots as token chars)
+        # Note: tokenchars includes '.' to properly tokenize qualified names like PortRole.FLOW
+        log.info("Creating files_fts_exact table with unicode61 tokenizer...")
+        cursor.execute(
+            """
+            CREATE VIRTUAL TABLE files_fts_exact USING fts5(
+                name, full_path UNINDEXED, content,
+                content='files',
+                content_rowid='id',
+                tokenize="unicode61 tokenchars '_-.'"
+            )
+            """
+        )
+
+        # Create fuzzy FTS table (trigram or extended unicode61)
+        log.info(f"Creating files_fts_fuzzy table with {fuzzy_tokenizer} tokenizer...")
+        cursor.execute(
+            f"""
+            CREATE VIRTUAL TABLE files_fts_fuzzy USING fts5(
+                name, full_path UNINDEXED, content,
+                content='files',
+                content_rowid='id',
+                tokenize="{fuzzy_tokenizer}"
+            )
+            """
+        )
+
+        # Create synchronized triggers for files_fts_exact
+        log.info("Creating triggers for files_fts_exact...")
+        cursor.execute(
+            """
+            CREATE TRIGGER files_exact_ai AFTER INSERT ON files BEGIN
+                INSERT INTO files_fts_exact(rowid, name, full_path, content)
+                VALUES(new.id, new.name, new.full_path, new.content);
+            END
+            """
+        )
+        cursor.execute(
+            """
+            CREATE TRIGGER files_exact_ad AFTER DELETE ON files BEGIN
+                INSERT INTO files_fts_exact(files_fts_exact, rowid, name, full_path, content)
+                VALUES('delete', old.id, old.name, old.full_path, old.content);
+            END
+            """
+        )
+        cursor.execute(
+            """
+            CREATE TRIGGER files_exact_au AFTER UPDATE ON files BEGIN
+                INSERT INTO files_fts_exact(files_fts_exact, rowid, name, full_path, content)
+                VALUES('delete', old.id, old.name, old.full_path, old.content);
+                INSERT INTO files_fts_exact(rowid, name, full_path, content)
+                VALUES(new.id, new.name, new.full_path, new.content);
+            END
+            """
+        )
+
+        # Create synchronized triggers for files_fts_fuzzy
+        log.info("Creating triggers for files_fts_fuzzy...")
+        cursor.execute(
+            """
+            CREATE TRIGGER files_fuzzy_ai AFTER INSERT ON files BEGIN
+                INSERT INTO files_fts_fuzzy(rowid, name, full_path, content)
+                VALUES(new.id, new.name, new.full_path, new.content);
+            END
+            """
+        )
+        cursor.execute(
+            """
+            CREATE TRIGGER files_fuzzy_ad AFTER DELETE ON files BEGIN
+                INSERT INTO files_fts_fuzzy(files_fts_fuzzy, rowid, name, full_path, content)
+                VALUES('delete', old.id, old.name, old.full_path, old.content);
+            END
+            """
+        )
+        cursor.execute(
+            """
+            CREATE TRIGGER files_fuzzy_au AFTER UPDATE ON files BEGIN
+                INSERT INTO files_fts_fuzzy(files_fts_fuzzy, rowid, name, full_path, content)
+                VALUES('delete', old.id, old.name, old.full_path, old.content);
+                INSERT INTO files_fts_fuzzy(rowid, name, full_path, content)
+                VALUES(new.id, new.name, new.full_path, new.content);
+            END
+            """
+        )
+
+        # Rebuild FTS indexes from files table
+        log.info("Rebuilding FTS indexes from files table...")
+        cursor.execute("INSERT INTO files_fts_exact(files_fts_exact) VALUES('rebuild')")
+        cursor.execute("INSERT INTO files_fts_fuzzy(files_fts_fuzzy) VALUES('rebuild')")
+
+        # Commit transaction
+        cursor.execute("COMMIT")
+        log.info("Migration 004 completed successfully")
+
+        # Vacuum to reclaim space (outside transaction)
+        try:
+            log.info("Running VACUUM to reclaim space...")
+            cursor.execute("VACUUM")
+        except Exception as e:
+            log.warning(f"VACUUM failed (non-critical): {e}")
+
+    except Exception as e:
+        log.error(f"Migration 004 failed: {e}")
+        try:
+            cursor.execute("ROLLBACK")
+        except Exception:
+            pass
+        raise
--- a/codex-lens/build/lib/codexlens/storage/migrations/migration_005_cleanup_unused_fields.py
+++ b/codex-lens/build/lib/codexlens/storage/migrations/migration_005_cleanup_unused_fields.py
@@ -0,0 +1,196 @@
+"""
+Migration 005: Remove unused and redundant database fields.
+
+This migration removes four problematic fields identified by Gemini analysis:
+
+1. **semantic_metadata.keywords** (deprecated - replaced by file_keywords table)
+   - Data: Migrated to normalized file_keywords table in migration 001
+   - Impact: Column now redundant, remove to prevent sync issues
+
+2. **symbols.token_count** (unused - always NULL)
+   - Data: Never populated, always NULL
+   - Impact: No data loss, just removes unused column
+
+3. **symbols.symbol_type** (redundant - duplicates kind)
+   - Data: Redundant with symbols.kind field
+   - Impact: No data loss, kind field contains same information
+
+4. **subdirs.direct_files** (unused - never displayed)
+   - Data: Never used in queries or display logic
+   - Impact: No data loss, just removes unused column
+
+Schema changes use table recreation pattern (SQLite best practice):
+- Create new table without deprecated columns
+- Copy data from old table
+- Drop old table
+- Rename new table
+- Recreate indexes
+"""
+
+import logging
+from sqlite3 import Connection
+
+log = logging.getLogger(__name__)
+
+
+def upgrade(db_conn: Connection):
+    """Remove unused and redundant fields from schema.
+
+    Note: Transaction management is handled by MigrationManager.
+    This migration should NOT start its own transaction.
+
+    Args:
+        db_conn: The SQLite database connection.
+    """
+    cursor = db_conn.cursor()
+
+    # Step 1: Remove semantic_metadata.keywords (if column exists)
+    log.info("Checking semantic_metadata.keywords column...")
+
+    cursor.execute(
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='semantic_metadata'"
+    )
+    if cursor.fetchone():
+        # Check if keywords column exists
+        cursor.execute("PRAGMA table_info(semantic_metadata)")
+        columns = {row[1] for row in cursor.fetchall()}
+
+        if "keywords" in columns:
+            log.info("Removing semantic_metadata.keywords column...")
+            cursor.execute("""
+                CREATE TABLE semantic_metadata_new (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    file_id INTEGER NOT NULL UNIQUE,
+                    summary TEXT,
+                    purpose TEXT,
+                    llm_tool TEXT,
+                    generated_at REAL,
+                    FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
+                )
+            """)
+
+            cursor.execute("""
+                INSERT INTO semantic_metadata_new (id, file_id, summary, purpose, llm_tool, generated_at)
+                SELECT id, file_id, summary, purpose, llm_tool, generated_at
+                FROM semantic_metadata
+            """)
+
+            cursor.execute("DROP TABLE semantic_metadata")
+            cursor.execute("ALTER TABLE semantic_metadata_new RENAME TO semantic_metadata")
+
+            # Recreate index
+            cursor.execute(
+                "CREATE INDEX IF NOT EXISTS idx_semantic_file ON semantic_metadata(file_id)"
+            )
+            log.info("Removed semantic_metadata.keywords column")
+        else:
+            log.info("semantic_metadata.keywords column does not exist, skipping")
+    else:
+        log.info("semantic_metadata table does not exist, skipping")
+
+    # Step 2: Remove symbols.token_count and symbols.symbol_type (if columns exist)
+    log.info("Checking symbols.token_count and symbols.symbol_type columns...")
+
+    cursor.execute(
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='symbols'"
+    )
+    if cursor.fetchone():
+        # Check if token_count or symbol_type columns exist
+        cursor.execute("PRAGMA table_info(symbols)")
+        columns = {row[1] for row in cursor.fetchall()}
+
+        if "token_count" in columns or "symbol_type" in columns:
+            log.info("Removing symbols.token_count and symbols.symbol_type columns...")
+            cursor.execute("""
+                CREATE TABLE symbols_new (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    file_id INTEGER NOT NULL,
+                    name TEXT NOT NULL,
+                    kind TEXT,
+                    start_line INTEGER,
+                    end_line INTEGER,
+                    FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
+                )
+            """)
+
+            cursor.execute("""
+                INSERT INTO symbols_new (id, file_id, name, kind, start_line, end_line)
+                SELECT id, file_id, name, kind, start_line, end_line
+                FROM symbols
+            """)
+
+            cursor.execute("DROP TABLE symbols")
+            cursor.execute("ALTER TABLE symbols_new RENAME TO symbols")
+
+            # Recreate indexes
+            cursor.execute("CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file_id)")
+            cursor.execute("CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name)")
+            log.info("Removed symbols.token_count and symbols.symbol_type columns")
+        else:
+            log.info("symbols.token_count/symbol_type columns do not exist, skipping")
+    else:
+        log.info("symbols table does not exist, skipping")
+
+    # Step 3: Remove subdirs.direct_files (if column exists)
+    log.info("Checking subdirs.direct_files column...")
+
+    cursor.execute(
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='subdirs'"
+    )
+    if cursor.fetchone():
+        # Check if direct_files column exists
+        cursor.execute("PRAGMA table_info(subdirs)")
+        columns = {row[1] for row in cursor.fetchall()}
+
+        if "direct_files" in columns:
+            log.info("Removing subdirs.direct_files column...")
+            cursor.execute("""
+                CREATE TABLE subdirs_new (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    name TEXT NOT NULL UNIQUE,
+                    index_path TEXT NOT NULL,
+                    files_count INTEGER DEFAULT 0,
+                    last_updated REAL
+                )
+            """)
+
+            cursor.execute("""
+                INSERT INTO subdirs_new (id, name, index_path, files_count, last_updated)
+                SELECT id, name, index_path, files_count, last_updated
+                FROM subdirs
+            """)
+
+            cursor.execute("DROP TABLE subdirs")
+            cursor.execute("ALTER TABLE subdirs_new RENAME TO subdirs")
+
+            # Recreate index
+            cursor.execute("CREATE INDEX IF NOT EXISTS idx_subdirs_name ON subdirs(name)")
+            log.info("Removed subdirs.direct_files column")
+        else:
+            log.info("subdirs.direct_files column does not exist, skipping")
+    else:
+        log.info("subdirs table does not exist, skipping")
+
+    log.info("Migration 005 completed successfully")
+
+    # Vacuum to reclaim space (outside transaction, optional)
+    # Note: VACUUM cannot run inside a transaction, so we skip it here
+    # The caller can run VACUUM separately if desired
+
+
+def downgrade(db_conn: Connection):
+    """Restore removed fields (data will be lost for keywords, token_count, symbol_type, direct_files).
+
+    This is a placeholder - true downgrade is not feasible as data is lost.
+    The migration is designed to be one-way since removed fields are unused/redundant.
+
+    Args:
+        db_conn: The SQLite database connection.
+    """
+    log.warning(
+        "Migration 005 downgrade not supported - removed fields are unused/redundant. "
+        "Data cannot be restored."
+    )
+    raise NotImplementedError(
+        "Migration 005 downgrade not supported - this is a one-way migration"
+    )
--- a/codex-lens/build/lib/codexlens/storage/migrations/migration_006_enhance_relationships.py
+++ b/codex-lens/build/lib/codexlens/storage/migrations/migration_006_enhance_relationships.py
@@ -0,0 +1,37 @@
+"""
+Migration 006: Ensure relationship tables and indexes exist.
+
+This migration is intentionally idempotent. It creates the `code_relationships`
+table (used for graph visualization) and its indexes if missing.
+"""
+
+from __future__ import annotations
+
+import logging
+from sqlite3 import Connection
+
+log = logging.getLogger(__name__)
+
+
+def upgrade(db_conn: Connection) -> None:
+    cursor = db_conn.cursor()
+
+    log.info("Ensuring code_relationships table exists...")
+    cursor.execute(
+        """
+        CREATE TABLE IF NOT EXISTS code_relationships (
+            id INTEGER PRIMARY KEY,
+            source_symbol_id INTEGER NOT NULL REFERENCES symbols (id) ON DELETE CASCADE,
+            target_qualified_name TEXT NOT NULL,
+            relationship_type TEXT NOT NULL,
+            source_line INTEGER NOT NULL,
+            target_file TEXT
+        )
+        """
+    )
+
+    log.info("Ensuring relationship indexes exist...")
+    cursor.execute("CREATE INDEX IF NOT EXISTS idx_rel_source ON code_relationships(source_symbol_id)")
+    cursor.execute("CREATE INDEX IF NOT EXISTS idx_rel_target ON code_relationships(target_qualified_name)")
+    cursor.execute("CREATE INDEX IF NOT EXISTS idx_rel_type ON code_relationships(relationship_type)")
+
--- a/codex-lens/build/lib/codexlens/storage/migrations/migration_007_add_graph_neighbors.py
+++ b/codex-lens/build/lib/codexlens/storage/migrations/migration_007_add_graph_neighbors.py
@@ -0,0 +1,47 @@
+"""
+Migration 007: Add precomputed graph neighbor table for search expansion.
+
+Adds:
+- graph_neighbors: cached N-hop neighbors between symbols (keyed by symbol ids)
+
+This table is derived data (a cache) and is safe to rebuild at any time.
+The migration is intentionally idempotent.
+"""
+
+from __future__ import annotations
+
+import logging
+from sqlite3 import Connection
+
+log = logging.getLogger(__name__)
+
+
+def upgrade(db_conn: Connection) -> None:
+    cursor = db_conn.cursor()
+
+    log.info("Creating graph_neighbors table...")
+    cursor.execute(
+        """
+        CREATE TABLE IF NOT EXISTS graph_neighbors (
+            source_symbol_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE,
+            neighbor_symbol_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE,
+            relationship_depth INTEGER NOT NULL,
+            PRIMARY KEY (source_symbol_id, neighbor_symbol_id)
+        )
+        """
+    )
+
+    log.info("Creating indexes for graph_neighbors...")
+    cursor.execute(
+        """
+        CREATE INDEX IF NOT EXISTS idx_graph_neighbors_source_depth
+        ON graph_neighbors(source_symbol_id, relationship_depth)
+        """
+    )
+    cursor.execute(
+        """
+        CREATE INDEX IF NOT EXISTS idx_graph_neighbors_neighbor
+        ON graph_neighbors(neighbor_symbol_id)
+        """
+    )
+
--- a/codex-lens/build/lib/codexlens/storage/migrations/migration_008_add_merkle_hashes.py
+++ b/codex-lens/build/lib/codexlens/storage/migrations/migration_008_add_merkle_hashes.py
@@ -0,0 +1,81 @@
+"""
+Migration 008: Add Merkle hash tables for content-based incremental indexing.
+
+Adds:
+- merkle_hashes: per-file SHA-256 hashes (keyed by file_id)
+- merkle_state: directory-level root hash (single row, id=1)
+
+Backfills merkle_hashes using the existing `files.content` column when available.
+"""
+
+from __future__ import annotations
+
+import hashlib
+import logging
+import time
+from sqlite3 import Connection
+
+log = logging.getLogger(__name__)
+
+
+def upgrade(db_conn: Connection) -> None:
+    cursor = db_conn.cursor()
+
+    log.info("Creating merkle_hashes table...")
+    cursor.execute(
+        """
+        CREATE TABLE IF NOT EXISTS merkle_hashes (
+            file_id INTEGER PRIMARY KEY REFERENCES files(id) ON DELETE CASCADE,
+            sha256 TEXT NOT NULL,
+            updated_at REAL
+        )
+        """
+    )
+
+    log.info("Creating merkle_state table...")
+    cursor.execute(
+        """
+        CREATE TABLE IF NOT EXISTS merkle_state (
+            id INTEGER PRIMARY KEY CHECK (id = 1),
+            root_hash TEXT,
+            updated_at REAL
+        )
+        """
+    )
+
+    # Backfill file hashes from stored content (best-effort).
+    try:
+        rows = cursor.execute("SELECT id, content FROM files").fetchall()
+    except Exception as exc:
+        log.warning("Unable to backfill merkle hashes (files table missing?): %s", exc)
+        return
+
+    now = time.time()
+    inserts: list[tuple[int, str, float]] = []
+
+    for row in rows:
+        file_id = int(row[0])
+        content = row[1]
+        if content is None:
+            continue
+        try:
+            digest = hashlib.sha256(str(content).encode("utf-8", errors="ignore")).hexdigest()
+            inserts.append((file_id, digest, now))
+        except Exception:
+            continue
+
+    if not inserts:
+        return
+
+    log.info("Backfilling %d file hashes...", len(inserts))
+    cursor.executemany(
+        """
+        INSERT INTO merkle_hashes(file_id, sha256, updated_at)
+        VALUES(?, ?, ?)
+        ON CONFLICT(file_id) DO UPDATE SET
+            sha256=excluded.sha256,
+            updated_at=excluded.updated_at
+        """,
+        inserts,
+    )
+
--- a/codex-lens/build/lib/codexlens/storage/migrations/migration_009_add_splade.py
+++ b/codex-lens/build/lib/codexlens/storage/migrations/migration_009_add_splade.py
@@ -0,0 +1,103 @@
+"""
+Migration 009: Add SPLADE sparse retrieval tables.
+
+This migration introduces SPLADE (Sparse Lexical AnD Expansion) support:
+- splade_metadata: Model configuration (model name, vocab size, ONNX path)
+- splade_posting_list: Inverted index mapping token_id -> (chunk_id, weight)
+
+The SPLADE tables are designed for efficient sparse vector retrieval:
+- Token-based lookup for query expansion
+- Chunk-based deletion for index maintenance
+- Maintains backward compatibility with existing FTS tables
+"""
+
+import logging
+from sqlite3 import Connection
+
+log = logging.getLogger(__name__)
+
+
+def upgrade(db_conn: Connection) -> None:
+    """
+    Adds SPLADE tables for sparse retrieval.
+
+    Creates:
+    - splade_metadata: Stores model configuration and ONNX path
+    - splade_posting_list: Inverted index with token_id -> (chunk_id, weight) mappings
+    - Indexes for efficient token-based and chunk-based lookups
+
+    Args:
+        db_conn: The SQLite database connection.
+    """
+    cursor = db_conn.cursor()
+
+    log.info("Creating splade_metadata table...")
+    cursor.execute(
+        """
+        CREATE TABLE IF NOT EXISTS splade_metadata (
+            id INTEGER PRIMARY KEY DEFAULT 1,
+            model_name TEXT NOT NULL,
+            vocab_size INTEGER NOT NULL,
+            onnx_path TEXT,
+            created_at REAL
+        )
+        """
+    )
+
+    log.info("Creating splade_posting_list table...")
+    cursor.execute(
+        """
+        CREATE TABLE IF NOT EXISTS splade_posting_list (
+            token_id INTEGER NOT NULL,
+            chunk_id INTEGER NOT NULL,
+            weight REAL NOT NULL,
+            PRIMARY KEY (token_id, chunk_id),
+            FOREIGN KEY (chunk_id) REFERENCES semantic_chunks(id) ON DELETE CASCADE
+        )
+        """
+    )
+
+    log.info("Creating indexes for splade_posting_list...")
+    # Index for efficient chunk-based lookups (deletion, updates)
+    cursor.execute(
+        """
+        CREATE INDEX IF NOT EXISTS idx_splade_by_chunk
+        ON splade_posting_list(chunk_id)
+        """
+    )
+
+    # Index for efficient term-based retrieval
+    cursor.execute(
+        """
+        CREATE INDEX IF NOT EXISTS idx_splade_by_token
+        ON splade_posting_list(token_id)
+        """
+    )
+
+    log.info("Migration 009 completed successfully")
+
+
+def downgrade(db_conn: Connection) -> None:
+    """
+    Removes SPLADE tables.
+
+    Drops:
+    - splade_posting_list (and associated indexes)
+    - splade_metadata
+
+    Args:
+        db_conn: The SQLite database connection.
+    """
+    cursor = db_conn.cursor()
+
+    log.info("Dropping SPLADE indexes...")
+    cursor.execute("DROP INDEX IF EXISTS idx_splade_by_chunk")
+    cursor.execute("DROP INDEX IF EXISTS idx_splade_by_token")
+
+    log.info("Dropping splade_posting_list table...")
+    cursor.execute("DROP TABLE IF EXISTS splade_posting_list")
+
+    log.info("Dropping splade_metadata table...")
+    cursor.execute("DROP TABLE IF EXISTS splade_metadata")
+
+    log.info("Migration 009 downgrade completed successfully")
--- a/codex-lens/build/lib/codexlens/storage/migrations/migration_010_add_multi_vector_chunks.py
+++ b/codex-lens/build/lib/codexlens/storage/migrations/migration_010_add_multi_vector_chunks.py
@@ -0,0 +1,162 @@
+"""
+Migration 010: Add multi-vector storage support for cascade retrieval.
+
+This migration introduces the chunks table with multi-vector support:
+- chunks: Stores code chunks with multiple embedding types
+  - embedding: Original embedding for backward compatibility
+  - embedding_binary: 256-dim binary vector for coarse ranking (fast)
+  - embedding_dense: 2048-dim dense vector for fine ranking (precise)
+
+The multi-vector architecture enables cascade retrieval:
+1. First stage: Fast binary vector search for candidate retrieval
+2. Second stage: Dense vector reranking for precision
+"""
+
+import logging
+from sqlite3 import Connection
+
+log = logging.getLogger(__name__)
+
+
+def upgrade(db_conn: Connection) -> None:
+    """
+    Adds chunks table with multi-vector embedding columns.
+
+    Creates:
+    - chunks: Table for storing code chunks with multiple embedding types
+    - idx_chunks_file_path: Index for efficient file-based lookups
+
+    Also migrates existing chunks tables by adding new columns if needed.
+
+    Args:
+        db_conn: The SQLite database connection.
+    """
+    cursor = db_conn.cursor()
+
+    # Check if chunks table already exists
+    table_exists = cursor.execute(
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='chunks'"
+    ).fetchone()
+
+    if table_exists:
+        # Migrate existing table - add new columns if missing
+        log.info("chunks table exists, checking for missing columns...")
+        
+        col_info = cursor.execute("PRAGMA table_info(chunks)").fetchall()
+        existing_columns = {row[1] for row in col_info}
+        
+        if "embedding_binary" not in existing_columns:
+            log.info("Adding embedding_binary column to chunks table...")
+            cursor.execute(
+                "ALTER TABLE chunks ADD COLUMN embedding_binary BLOB"
+            )
+        
+        if "embedding_dense" not in existing_columns:
+            log.info("Adding embedding_dense column to chunks table...")
+            cursor.execute(
+                "ALTER TABLE chunks ADD COLUMN embedding_dense BLOB"
+            )
+    else:
+        # Create new table with all columns
+        log.info("Creating chunks table with multi-vector support...")
+        cursor.execute(
+            """
+            CREATE TABLE chunks (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                file_path TEXT NOT NULL,
+                content TEXT NOT NULL,
+                embedding BLOB,
+                embedding_binary BLOB,
+                embedding_dense BLOB,
+                metadata TEXT,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            )
+            """
+        )
+
+    # Create index for file-based lookups
+    log.info("Creating index for chunks table...")
+    cursor.execute(
+        """
+        CREATE INDEX IF NOT EXISTS idx_chunks_file_path
+        ON chunks(file_path)
+        """
+    )
+
+    log.info("Migration 010 completed successfully")
+
+
+def downgrade(db_conn: Connection) -> None:
+    """
+    Removes multi-vector columns from chunks table.
+
+    Note: This does not drop the chunks table entirely to preserve data.
+    Only the new columns added by this migration are removed.
+
+    Args:
+        db_conn: The SQLite database connection.
+    """
+    cursor = db_conn.cursor()
+
+    log.info("Removing multi-vector columns from chunks table...")
+    
+    # SQLite doesn't support DROP COLUMN directly in older versions
+    # We need to recreate the table without the columns
+    
+    # Check if chunks table exists
+    table_exists = cursor.execute(
+        "SELECT name FROM sqlite_master WHERE type='table' AND name='chunks'"
+    ).fetchone()
+    
+    if not table_exists:
+        log.info("chunks table does not exist, nothing to downgrade")
+        return
+
+    # Check if the columns exist before trying to remove them
+    col_info = cursor.execute("PRAGMA table_info(chunks)").fetchall()
+    existing_columns = {row[1] for row in col_info}
+    
+    needs_migration = (
+        "embedding_binary" in existing_columns or
+        "embedding_dense" in existing_columns
+    )
+    
+    if not needs_migration:
+        log.info("Multi-vector columns not present, nothing to remove")
+        return
+
+    # Recreate table without the new columns
+    log.info("Recreating chunks table without multi-vector columns...")
+    
+    cursor.execute(
+        """
+        CREATE TABLE chunks_backup (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            file_path TEXT NOT NULL,
+            content TEXT NOT NULL,
+            embedding BLOB,
+            metadata TEXT,
+            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+        )
+        """
+    )
+    
+    cursor.execute(
+        """
+        INSERT INTO chunks_backup (id, file_path, content, embedding, metadata, created_at)
+        SELECT id, file_path, content, embedding, metadata, created_at FROM chunks
+        """
+    )
+    
+    cursor.execute("DROP TABLE chunks")
+    cursor.execute("ALTER TABLE chunks_backup RENAME TO chunks")
+    
+    # Recreate index
+    cursor.execute(
+        """
+        CREATE INDEX IF NOT EXISTS idx_chunks_file_path
+        ON chunks(file_path)
+        """
+    )
+
+    log.info("Migration 010 downgrade completed successfully")
--- a/codex-lens/build/lib/codexlens/storage/path_mapper.py
+++ b/codex-lens/build/lib/codexlens/storage/path_mapper.py
@@ -0,0 +1,300 @@
+"""Path mapping utilities for source paths and index paths.
+
+This module provides bidirectional mapping between source code directories
+and their corresponding index storage locations.
+
+Storage Structure:
+    ~/.codexlens/
+    ├── registry.db                    # Global mapping table
+    └── indexes/
+        └── D/
+            └── Claude_dms3/
+                ├── _index.db          # Root directory index
+                └── src/
+                    └── _index.db      # src/ directory index
+"""
+
+import json
+import os
+import platform
+from pathlib import Path
+from typing import Optional
+
+
+def _get_configured_index_root() -> Path:
+    """Get the index root from environment or config file.
+
+    Priority order:
+    1. CODEXLENS_INDEX_DIR environment variable
+    2. index_dir from ~/.codexlens/config.json
+    3. Default: ~/.codexlens/indexes
+    """
+    env_override = os.getenv("CODEXLENS_INDEX_DIR")
+    if env_override:
+        return Path(env_override).expanduser().resolve()
+
+    config_file = Path.home() / ".codexlens" / "config.json"
+    if config_file.exists():
+        try:
+            cfg = json.loads(config_file.read_text(encoding="utf-8"))
+            if "index_dir" in cfg:
+                return Path(cfg["index_dir"]).expanduser().resolve()
+        except (json.JSONDecodeError, OSError):
+            pass
+
+    return Path.home() / ".codexlens" / "indexes"
+
+
+class PathMapper:
+    """Bidirectional mapping tool for source paths ↔ index paths.
+
+    Handles cross-platform path normalization and conversion between
+    source code directories and their index storage locations.
+
+    Attributes:
+        DEFAULT_INDEX_ROOT: Default root directory for all indexes
+        INDEX_DB_NAME: Standard name for index database files
+        index_root: Configured index root directory
+    """
+
+    DEFAULT_INDEX_ROOT = _get_configured_index_root()
+    INDEX_DB_NAME = "_index.db"
+
+    def __init__(self, index_root: Optional[Path] = None):
+        """Initialize PathMapper with optional custom index root.
+
+        Args:
+            index_root: Custom index root directory. If None, uses DEFAULT_INDEX_ROOT.
+        """
+        self.index_root = (index_root or self.DEFAULT_INDEX_ROOT).resolve()
+
+    def source_to_index_dir(self, source_path: Path) -> Path:
+        """Convert source directory to its index directory path.
+
+        Maps a source code directory to where its index data should be stored.
+        The mapping preserves the directory structure but normalizes paths
+        for cross-platform compatibility.
+
+        Args:
+            source_path: Source directory path to map
+
+        Returns:
+            Index directory path under index_root
+
+        Examples:
+            >>> mapper = PathMapper()
+            >>> mapper.source_to_index_dir(Path("D:/Claude_dms3/src"))
+            PosixPath('/home/user/.codexlens/indexes/D/Claude_dms3/src')
+
+            >>> mapper.source_to_index_dir(Path("/home/user/project"))
+            PosixPath('/home/user/.codexlens/indexes/home/user/project')
+        """
+        source_path = source_path.resolve()
+        normalized = self.normalize_path(source_path)
+        return self.index_root / normalized
+
+    def source_to_index_db(self, source_path: Path) -> Path:
+        """Convert source directory to its index database file path.
+
+        Maps a source directory to the full path of its index database file,
+        including the standard INDEX_DB_NAME.
+
+        Args:
+            source_path: Source directory path to map
+
+        Returns:
+            Full path to the index database file
+
+        Examples:
+            >>> mapper = PathMapper()
+            >>> mapper.source_to_index_db(Path("D:/Claude_dms3/src"))
+            PosixPath('/home/user/.codexlens/indexes/D/Claude_dms3/src/_index.db')
+        """
+        index_dir = self.source_to_index_dir(source_path)
+        return index_dir / self.INDEX_DB_NAME
+
+    def index_to_source(self, index_path: Path) -> Path:
+        """Convert index path back to original source path.
+
+        Performs reverse mapping from an index storage location to the
+        original source directory. Handles both directory paths and
+        database file paths.
+
+        Args:
+            index_path: Index directory or database file path
+
+        Returns:
+            Original source directory path
+
+        Raises:
+            ValueError: If index_path is not under index_root
+
+        Examples:
+            >>> mapper = PathMapper()
+            >>> mapper.index_to_source(
+            ...     Path("~/.codexlens/indexes/D/Claude_dms3/src/_index.db")
+            ... )
+            WindowsPath('D:/Claude_dms3/src')
+
+            >>> mapper.index_to_source(
+            ...     Path("~/.codexlens/indexes/D/Claude_dms3/src")
+            ... )
+            WindowsPath('D:/Claude_dms3/src')
+        """
+        index_path = index_path.resolve()
+
+        # Remove _index.db if present
+        if index_path.name == self.INDEX_DB_NAME:
+            index_path = index_path.parent
+
+        # Verify path is under index_root
+        try:
+            relative = index_path.relative_to(self.index_root)
+        except ValueError:
+            raise ValueError(
+                f"Index path {index_path} is not under index root {self.index_root}"
+            )
+
+        # Convert normalized path back to source path
+        normalized_str = str(relative).replace("\\", "/")
+        return self.denormalize_path(normalized_str)
+
+    def get_project_root(self, source_path: Path) -> Path:
+        """Find the project root directory (topmost indexed directory).
+
+        Walks up the directory tree to find the highest-level directory
+        that has an index database.
+
+        Args:
+            source_path: Source directory to start from
+
+        Returns:
+            Project root directory path. Returns source_path itself if
+            no parent index is found.
+
+        Examples:
+            >>> mapper = PathMapper()
+            >>> mapper.get_project_root(Path("D:/Claude_dms3/src/codexlens"))
+            WindowsPath('D:/Claude_dms3')
+        """
+        source_path = source_path.resolve()
+        current = source_path
+        project_root = source_path
+
+        # Walk up the tree
+        while current.parent != current:  # Stop at filesystem root
+            parent_index_db = self.source_to_index_db(current.parent)
+            if parent_index_db.exists():
+                project_root = current.parent
+                current = current.parent
+            else:
+                break
+
+        return project_root
+
+    def get_relative_depth(self, source_path: Path, project_root: Path) -> int:
+        """Calculate directory depth relative to project root.
+
+        Args:
+            source_path: Target directory path
+            project_root: Project root directory path
+
+        Returns:
+            Number of directory levels from project_root to source_path
+
+        Raises:
+            ValueError: If source_path is not under project_root
+
+        Examples:
+            >>> mapper = PathMapper()
+            >>> mapper.get_relative_depth(
+            ...     Path("D:/Claude_dms3/src/codexlens"),
+            ...     Path("D:/Claude_dms3")
+            ... )
+            2
+        """
+        source_path = source_path.resolve()
+        project_root = project_root.resolve()
+
+        try:
+            relative = source_path.relative_to(project_root)
+            # Count path components
+            return len(relative.parts)
+        except ValueError:
+            raise ValueError(
+                f"Source path {source_path} is not under project root {project_root}"
+            )
+
+    def normalize_path(self, path: Path) -> str:
+        """Normalize path to cross-platform storage format.
+
+        Converts OS-specific paths to a standardized format for storage:
+        - Windows: Removes drive colons (D: → D)
+        - Unix: Removes leading slash
+        - Uses forward slashes throughout
+
+        Args:
+            path: Path to normalize
+
+        Returns:
+            Normalized path string
+
+        Examples:
+            >>> mapper = PathMapper()
+            >>> mapper.normalize_path(Path("D:/path/to/dir"))
+            'D/path/to/dir'
+
+            >>> mapper.normalize_path(Path("/home/user/path"))
+            'home/user/path'
+        """
+        path = path.resolve()
+        path_str = str(path)
+
+        # Handle Windows paths with drive letters
+        if platform.system() == "Windows" and len(path.parts) > 0:
+            # Convert D:\path\to\dir → D/path/to/dir
+            drive = path.parts[0].replace(":", "")  # D: → D
+            rest = Path(*path.parts[1:]) if len(path.parts) > 1 else Path()
+            normalized = f"{drive}/{rest}".replace("\\", "/")
+            return normalized.rstrip("/")
+
+        # Handle Unix paths
+        # /home/user/path → home/user/path
+        return path_str.lstrip("/").replace("\\", "/")
+
+    def denormalize_path(self, normalized: str) -> Path:
+        """Convert normalized path back to OS-specific path.
+
+        Reverses the normalization process to restore OS-native path format:
+        - Windows: Adds drive colons (D → D:)
+        - Unix: Adds leading slash
+
+        Args:
+            normalized: Normalized path string
+
+        Returns:
+            OS-specific Path object
+
+        Examples:
+            >>> mapper = PathMapper()
+            >>> mapper.denormalize_path("D/path/to/dir")  # On Windows
+            WindowsPath('D:/path/to/dir')
+
+            >>> mapper.denormalize_path("home/user/path")  # On Unix
+            PosixPath('/home/user/path')
+        """
+        parts = normalized.split("/")
+
+        # Handle Windows paths
+        if platform.system() == "Windows" and len(parts) > 0:
+            # Check if first part is a drive letter
+            if len(parts[0]) == 1 and parts[0].isalpha():
+                # D/path/to/dir → D:/path/to/dir
+                drive = f"{parts[0]}:"
+                if len(parts) > 1:
+                    return Path(drive) / Path(*parts[1:])
+                return Path(drive)
+
+        # Handle Unix paths or relative paths
+        # home/user/path → /home/user/path
+        return Path("/") / Path(*parts)
--- a/codex-lens/build/lib/codexlens/storage/registry.py
+++ b/codex-lens/build/lib/codexlens/storage/registry.py
@@ -0,0 +1,683 @@
+"""Global project registry for CodexLens - SQLite storage."""
+
+from __future__ import annotations
+
+import platform
+import sqlite3
+import threading
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from codexlens.errors import StorageError
+
+
+@dataclass
+class ProjectInfo:
+    """Registered project information."""
+
+    id: int
+    source_root: Path
+    index_root: Path
+    created_at: float
+    last_indexed: float
+    total_files: int
+    total_dirs: int
+    status: str
+
+
+@dataclass
+class DirMapping:
+    """Directory to index path mapping."""
+
+    id: int
+    project_id: int
+    source_path: Path
+    index_path: Path
+    depth: int
+    files_count: int
+    last_updated: float
+
+
+class RegistryStore:
+    """Global project registry - SQLite storage.
+
+    Manages indexed projects and directory-to-index path mappings.
+    Thread-safe with connection pooling.
+    """
+
+    DEFAULT_DB_PATH = Path.home() / ".codexlens" / "registry.db"
+
+    def __init__(self, db_path: Path | None = None) -> None:
+        self.db_path = (db_path or self.DEFAULT_DB_PATH).resolve()
+        self._lock = threading.RLock()
+        self._local = threading.local()
+        self._pool_lock = threading.Lock()
+        self._pool: Dict[int, sqlite3.Connection] = {}
+        self._pool_generation = 0
+
+    def _get_connection(self) -> sqlite3.Connection:
+        """Get or create a thread-local database connection."""
+        thread_id = threading.get_ident()
+        if getattr(self._local, "generation", None) == self._pool_generation:
+            conn = getattr(self._local, "conn", None)
+            if conn is not None:
+                return conn
+
+        with self._pool_lock:
+            conn = self._pool.get(thread_id)
+            if conn is None:
+                conn = sqlite3.connect(self.db_path, check_same_thread=False)
+                conn.row_factory = sqlite3.Row
+                conn.execute("PRAGMA journal_mode=WAL")
+                conn.execute("PRAGMA synchronous=NORMAL")
+                conn.execute("PRAGMA foreign_keys=ON")
+                self._pool[thread_id] = conn
+
+            self._local.conn = conn
+            self._local.generation = self._pool_generation
+            return conn
+
+    def close(self) -> None:
+        """Close all pooled connections."""
+        with self._lock:
+            with self._pool_lock:
+                for conn in self._pool.values():
+                    conn.close()
+                self._pool.clear()
+                self._pool_generation += 1
+
+            if hasattr(self._local, "conn"):
+                self._local.conn = None
+            if hasattr(self._local, "generation"):
+                self._local.generation = self._pool_generation
+
+    def __enter__(self) -> RegistryStore:
+        self.initialize()
+        return self
+
+    def __exit__(self, exc_type: object, exc: object, tb: object) -> None:
+        self.close()
+
+    def initialize(self) -> None:
+        """Create database and schema."""
+        with self._lock:
+            self.db_path.parent.mkdir(parents=True, exist_ok=True)
+            conn = self._get_connection()
+            self._create_schema(conn)
+
+    def _create_schema(self, conn: sqlite3.Connection) -> None:
+        """Create database schema."""
+        try:
+            conn.execute(
+                """
+                CREATE TABLE IF NOT EXISTS projects (
+                    id INTEGER PRIMARY KEY,
+                    source_root TEXT UNIQUE NOT NULL,
+                    index_root TEXT NOT NULL,
+                    created_at REAL,
+                    last_indexed REAL,
+                    total_files INTEGER DEFAULT 0,
+                    total_dirs INTEGER DEFAULT 0,
+                    status TEXT DEFAULT 'active'
+                )
+                """
+            )
+
+            conn.execute(
+                """
+                CREATE TABLE IF NOT EXISTS dir_mapping (
+                    id INTEGER PRIMARY KEY,
+                    project_id INTEGER REFERENCES projects(id) ON DELETE CASCADE,
+                    source_path TEXT NOT NULL,
+                    index_path TEXT NOT NULL,
+                    depth INTEGER,
+                    files_count INTEGER DEFAULT 0,
+                    last_updated REAL,
+                    UNIQUE(source_path)
+                )
+                """
+            )
+
+            conn.execute(
+                "CREATE INDEX IF NOT EXISTS idx_dir_source ON dir_mapping(source_path)"
+            )
+            conn.execute(
+                "CREATE INDEX IF NOT EXISTS idx_dir_project ON dir_mapping(project_id)"
+            )
+            conn.execute(
+                "CREATE INDEX IF NOT EXISTS idx_project_source ON projects(source_root)"
+            )
+
+            conn.commit()
+        except sqlite3.DatabaseError as exc:
+            raise StorageError(f"Failed to initialize registry schema: {exc}") from exc
+
+    def _normalize_path_for_comparison(self, path: Path) -> str:
+        """Normalize paths for comparisons and storage.
+
+        Windows paths are treated as case-insensitive, so normalize to lowercase.
+        Unix platforms preserve case sensitivity.
+        """
+        path_str = str(path)
+        if platform.system() == "Windows":
+            return path_str.lower()
+        return path_str
+
+    # === Project Operations ===
+
+    def register_project(self, source_root: Path, index_root: Path) -> ProjectInfo:
+        """Register a new project or update existing one.
+
+        Args:
+            source_root: Source code root directory
+            index_root: Index storage root directory
+
+        Returns:
+            ProjectInfo for the registered project
+        """
+        with self._lock:
+            conn = self._get_connection()
+            source_root_str = self._normalize_path_for_comparison(source_root.resolve())
+            index_root_str = str(index_root.resolve())
+            now = time.time()
+
+            conn.execute(
+                """
+                INSERT INTO projects(source_root, index_root, created_at, last_indexed)
+                VALUES(?, ?, ?, ?)
+                ON CONFLICT(source_root) DO UPDATE SET
+                    index_root=excluded.index_root,
+                    last_indexed=excluded.last_indexed,
+                    status='active'
+                """,
+                (source_root_str, index_root_str, now, now),
+            )
+
+            row = conn.execute(
+                "SELECT * FROM projects WHERE source_root=?", (source_root_str,)
+            ).fetchone()
+
+            conn.commit()
+
+            if not row:
+                raise StorageError(f"Failed to register project: {source_root}")
+
+            return self._row_to_project_info(row)
+
+    def unregister_project(self, source_root: Path) -> bool:
+        """Remove a project registration (cascades to directory mappings).
+
+        Args:
+            source_root: Source code root directory
+
+        Returns:
+            True if project was removed, False if not found
+        """
+        with self._lock:
+            conn = self._get_connection()
+            source_root_str = self._normalize_path_for_comparison(source_root.resolve())
+
+            row = conn.execute(
+                "SELECT id FROM projects WHERE source_root=?", (source_root_str,)
+            ).fetchone()
+
+            if not row:
+                return False
+
+            conn.execute("DELETE FROM projects WHERE source_root=?", (source_root_str,))
+            conn.commit()
+            return True
+
+    def get_project(self, source_root: Path) -> Optional[ProjectInfo]:
+        """Get project information by source root.
+
+        Args:
+            source_root: Source code root directory
+
+        Returns:
+            ProjectInfo if found, None otherwise
+        """
+        with self._lock:
+            conn = self._get_connection()
+            source_root_str = self._normalize_path_for_comparison(source_root.resolve())
+
+            row = conn.execute(
+                "SELECT * FROM projects WHERE source_root=?", (source_root_str,)
+            ).fetchone()
+
+            return self._row_to_project_info(row) if row else None
+
+    def get_project_by_id(self, project_id: int) -> Optional[ProjectInfo]:
+        """Get project information by ID.
+
+        Args:
+            project_id: Project database ID
+
+        Returns:
+            ProjectInfo if found, None otherwise
+        """
+        with self._lock:
+            conn = self._get_connection()
+
+            row = conn.execute(
+                "SELECT * FROM projects WHERE id=?", (project_id,)
+            ).fetchone()
+
+            return self._row_to_project_info(row) if row else None
+
+    def list_projects(self, status: Optional[str] = None) -> List[ProjectInfo]:
+        """List all registered projects.
+
+        Args:
+            status: Optional status filter ('active', 'stale', 'removed')
+
+        Returns:
+            List of ProjectInfo objects
+        """
+        with self._lock:
+            conn = self._get_connection()
+
+            if status:
+                rows = conn.execute(
+                    "SELECT * FROM projects WHERE status=? ORDER BY created_at DESC",
+                    (status,),
+                ).fetchall()
+            else:
+                rows = conn.execute(
+                    "SELECT * FROM projects ORDER BY created_at DESC"
+                ).fetchall()
+
+            return [self._row_to_project_info(row) for row in rows]
+
+    def update_project_stats(
+        self, source_root: Path, total_files: int, total_dirs: int
+    ) -> None:
+        """Update project statistics.
+
+        Args:
+            source_root: Source code root directory
+            total_files: Total number of indexed files
+            total_dirs: Total number of indexed directories
+        """
+        with self._lock:
+            conn = self._get_connection()
+            source_root_str = self._normalize_path_for_comparison(source_root.resolve())
+
+            conn.execute(
+                """
+                UPDATE projects
+                SET total_files=?, total_dirs=?, last_indexed=?
+                WHERE source_root=?
+                """,
+                (total_files, total_dirs, time.time(), source_root_str),
+            )
+            conn.commit()
+
+    def set_project_status(self, source_root: Path, status: str) -> None:
+        """Set project status.
+
+        Args:
+            source_root: Source code root directory
+            status: Status string ('active', 'stale', 'removed')
+        """
+        with self._lock:
+            conn = self._get_connection()
+            source_root_str = self._normalize_path_for_comparison(source_root.resolve())
+
+            conn.execute(
+                "UPDATE projects SET status=? WHERE source_root=?",
+                (status, source_root_str),
+            )
+            conn.commit()
+
+    # === Directory Mapping Operations ===
+
+    def register_dir(
+        self,
+        project_id: int,
+        source_path: Path,
+        index_path: Path,
+        depth: int,
+        files_count: int = 0,
+    ) -> DirMapping:
+        """Register a directory mapping.
+
+        Args:
+            project_id: Project database ID
+            source_path: Source directory path
+            index_path: Index database path
+            depth: Directory depth relative to project root
+            files_count: Number of files in directory
+
+        Returns:
+            DirMapping for the registered directory
+        """
+        with self._lock:
+            conn = self._get_connection()
+            source_path_str = self._normalize_path_for_comparison(source_path.resolve())
+            index_path_str = str(index_path.resolve())
+            now = time.time()
+
+            conn.execute(
+                """
+                INSERT INTO dir_mapping(
+                    project_id, source_path, index_path, depth, files_count, last_updated
+                )
+                VALUES(?, ?, ?, ?, ?, ?)
+                ON CONFLICT(source_path) DO UPDATE SET
+                    index_path=excluded.index_path,
+                    depth=excluded.depth,
+                    files_count=excluded.files_count,
+                    last_updated=excluded.last_updated
+                """,
+                (project_id, source_path_str, index_path_str, depth, files_count, now),
+            )
+
+            row = conn.execute(
+                "SELECT * FROM dir_mapping WHERE source_path=?", (source_path_str,)
+            ).fetchone()
+
+            conn.commit()
+
+            if not row:
+                raise StorageError(f"Failed to register directory: {source_path}")
+
+            return self._row_to_dir_mapping(row)
+
+    def unregister_dir(self, source_path: Path) -> bool:
+        """Remove a directory mapping.
+
+        Args:
+            source_path: Source directory path
+
+        Returns:
+            True if directory was removed, False if not found
+        """
+        with self._lock:
+            conn = self._get_connection()
+            source_path_str = self._normalize_path_for_comparison(source_path.resolve())
+
+            row = conn.execute(
+                "SELECT id FROM dir_mapping WHERE source_path=?", (source_path_str,)
+            ).fetchone()
+
+            if not row:
+                return False
+
+            conn.execute("DELETE FROM dir_mapping WHERE source_path=?", (source_path_str,))
+            conn.commit()
+            return True
+
+    def find_index_path(self, source_path: Path) -> Optional[Path]:
+        """Find index path for a source directory (exact match).
+
+        Args:
+            source_path: Source directory path
+
+        Returns:
+            Index path if found, None otherwise
+        """
+        with self._lock:
+            conn = self._get_connection()
+            source_path_str = self._normalize_path_for_comparison(source_path.resolve())
+
+            row = conn.execute(
+                "SELECT index_path FROM dir_mapping WHERE source_path=?",
+                (source_path_str,),
+            ).fetchone()
+
+            return Path(row["index_path"]) if row else None
+
+    def find_nearest_index(self, source_path: Path) -> Optional[DirMapping]:
+        """Find nearest indexed ancestor directory.
+
+        Searches for the closest parent directory that has an index.
+        Useful for supporting subdirectory searches.
+
+        Optimized to use single database query instead of iterating through
+        each parent directory level.
+
+        Args:
+            source_path: Source directory or file path
+
+        Returns:
+            DirMapping for nearest ancestor, None if not found
+        """
+        with self._lock:
+            conn = self._get_connection()
+            source_path_resolved = source_path.resolve()
+
+            # Build list of all parent paths from deepest to shallowest
+            paths_to_check = []
+            current = source_path_resolved
+            while True:
+                paths_to_check.append(self._normalize_path_for_comparison(current))
+                parent = current.parent
+                if parent == current:  # Reached filesystem root
+                    break
+                current = parent
+
+            if not paths_to_check:
+                return None
+
+            # Single query with WHERE IN, ordered by path length (longest = nearest)
+            placeholders = ','.join('?' * len(paths_to_check))
+            query = f"""
+                SELECT * FROM dir_mapping
+                WHERE source_path IN ({placeholders})
+                ORDER BY LENGTH(source_path) DESC
+                LIMIT 1
+            """
+
+            row = conn.execute(query, paths_to_check).fetchone()
+            return self._row_to_dir_mapping(row) if row else None
+
+    def find_by_source_path(self, source_path: str) -> Optional[Dict[str, str]]:
+        """Find project by source path (exact or nearest match).
+
+        Searches for a project whose source_root matches or contains
+        the given source_path.
+
+        Args:
+            source_path: Source directory path as string
+
+        Returns:
+            Dict with project info including 'index_root', or None if not found
+        """
+        with self._lock:
+            conn = self._get_connection()
+            resolved_path = Path(source_path).resolve()
+            source_path_resolved = self._normalize_path_for_comparison(resolved_path)
+
+            # First try exact match on projects table
+            row = conn.execute(
+                "SELECT * FROM projects WHERE source_root=?", (source_path_resolved,)
+            ).fetchone()
+
+            if row:
+                return {
+                    "id": str(row["id"]),
+                    "source_root": row["source_root"],
+                    "index_root": row["index_root"],
+                    "status": row["status"] or "active",
+                }
+
+            # Try finding project that contains this path
+            # Build list of all parent paths
+            paths_to_check = []
+            current = resolved_path
+            while True:
+                paths_to_check.append(self._normalize_path_for_comparison(current))
+                parent = current.parent
+                if parent == current:
+                    break
+                current = parent
+
+            if paths_to_check:
+                placeholders = ','.join('?' * len(paths_to_check))
+                query = f"""
+                    SELECT * FROM projects
+                    WHERE source_root IN ({placeholders})
+                    ORDER BY LENGTH(source_root) DESC
+                    LIMIT 1
+                """
+                row = conn.execute(query, paths_to_check).fetchone()
+
+                if row:
+                    return {
+                        "id": str(row["id"]),
+                        "source_root": row["source_root"],
+                        "index_root": row["index_root"],
+                        "status": row["status"] or "active",
+                    }
+
+            return None
+
+    def get_project_dirs(self, project_id: int) -> List[DirMapping]:
+        """Get all directory mappings for a project.
+
+        Args:
+            project_id: Project database ID
+
+        Returns:
+            List of DirMapping objects
+        """
+        with self._lock:
+            conn = self._get_connection()
+
+            rows = conn.execute(
+                "SELECT * FROM dir_mapping WHERE project_id=? ORDER BY depth, source_path",
+                (project_id,),
+            ).fetchall()
+
+            return [self._row_to_dir_mapping(row) for row in rows]
+
+    def get_subdirs(self, source_path: Path) -> List[DirMapping]:
+        """Get direct subdirectory mappings.
+
+        Args:
+            source_path: Parent directory path
+
+        Returns:
+            List of DirMapping objects for direct children
+        """
+        with self._lock:
+            conn = self._get_connection()
+            source_path_str = self._normalize_path_for_comparison(source_path.resolve())
+
+            # First get the parent's depth
+            parent_row = conn.execute(
+                "SELECT depth, project_id FROM dir_mapping WHERE source_path=?",
+                (source_path_str,),
+            ).fetchone()
+
+            if not parent_row:
+                return []
+
+            parent_depth = int(parent_row["depth"])
+            project_id = int(parent_row["project_id"])
+
+            # Get all subdirs with depth = parent_depth + 1 and matching path prefix
+            rows = conn.execute(
+                """
+                SELECT * FROM dir_mapping
+                WHERE project_id=? AND depth=? AND source_path LIKE ?
+                ORDER BY source_path
+                """,
+                (project_id, parent_depth + 1, f"{source_path_str}%"),
+            ).fetchall()
+
+            return [self._row_to_dir_mapping(row) for row in rows]
+
+    def update_dir_stats(self, source_path: Path, files_count: int) -> None:
+        """Update directory statistics.
+
+        Args:
+            source_path: Source directory path
+            files_count: Number of files in directory
+        """
+        with self._lock:
+            conn = self._get_connection()
+            source_path_str = self._normalize_path_for_comparison(source_path.resolve())
+
+            conn.execute(
+                """
+                UPDATE dir_mapping
+                SET files_count=?, last_updated=?
+                WHERE source_path=?
+                """,
+                (files_count, time.time(), source_path_str),
+            )
+            conn.commit()
+
+    def update_index_paths(self, old_root: Path, new_root: Path) -> int:
+        """Update all index paths after migration.
+
+        Replaces old_root prefix with new_root in all stored index paths.
+
+        Args:
+            old_root: Old index root directory
+            new_root: New index root directory
+
+        Returns:
+            Number of paths updated
+        """
+        with self._lock:
+            conn = self._get_connection()
+            old_root_str = str(old_root.resolve())
+            new_root_str = str(new_root.resolve())
+            updated = 0
+
+            # Update projects
+            conn.execute(
+                """
+                UPDATE projects
+                SET index_root = REPLACE(index_root, ?, ?)
+                WHERE index_root LIKE ?
+                """,
+                (old_root_str, new_root_str, f"{old_root_str}%"),
+            )
+            updated += conn.total_changes
+
+            # Update dir_mapping
+            conn.execute(
+                """
+                UPDATE dir_mapping
+                SET index_path = REPLACE(index_path, ?, ?)
+                WHERE index_path LIKE ?
+                """,
+                (old_root_str, new_root_str, f"{old_root_str}%"),
+            )
+            updated += conn.total_changes
+
+            conn.commit()
+            return updated
+
+    # === Internal Methods ===
+
+    def _row_to_project_info(self, row: sqlite3.Row) -> ProjectInfo:
+        """Convert database row to ProjectInfo."""
+        return ProjectInfo(
+            id=int(row["id"]),
+            source_root=Path(row["source_root"]),
+            index_root=Path(row["index_root"]),
+            created_at=float(row["created_at"]) if row["created_at"] else 0.0,
+            last_indexed=float(row["last_indexed"]) if row["last_indexed"] else 0.0,
+            total_files=int(row["total_files"]) if row["total_files"] else 0,
+            total_dirs=int(row["total_dirs"]) if row["total_dirs"] else 0,
+            status=str(row["status"]) if row["status"] else "active",
+        )
+
+    def _row_to_dir_mapping(self, row: sqlite3.Row) -> DirMapping:
+        """Convert database row to DirMapping."""
+        return DirMapping(
+            id=int(row["id"]),
+            project_id=int(row["project_id"]),
+            source_path=Path(row["source_path"]),
+            index_path=Path(row["index_path"]),
+            depth=int(row["depth"]) if row["depth"] is not None else 0,
+            files_count=int(row["files_count"]) if row["files_count"] else 0,
+            last_updated=float(row["last_updated"]) if row["last_updated"] else 0.0,
+        )
--- a/codex-lens/build/lib/codexlens/storage/splade_index.py
+++ b/codex-lens/build/lib/codexlens/storage/splade_index.py
@@ -0,0 +1,578 @@
+"""SPLADE inverted index storage for sparse vector retrieval.
+
+This module implements SQLite-based inverted index for SPLADE sparse vectors,
+enabling efficient sparse retrieval using dot-product scoring.
+"""
+
+from __future__ import annotations
+
+import logging
+import sqlite3
+import threading
+import time
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+
+from codexlens.entities import SearchResult
+from codexlens.errors import StorageError
+
+logger = logging.getLogger(__name__)
+
+
+class SpladeIndex:
+    """SQLite-based inverted index for SPLADE sparse vectors.
+    
+    Stores sparse vectors as posting lists mapping token_id -> (chunk_id, weight).
+    Supports efficient dot-product retrieval using SQL joins.
+    """
+
+    def __init__(self, db_path: Path | str) -> None:
+        """Initialize SPLADE index.
+        
+        Args:
+            db_path: Path to SQLite database file.
+        """
+        self.db_path = Path(db_path)
+        self.db_path.parent.mkdir(parents=True, exist_ok=True)
+        
+        # Thread-safe connection management
+        self._lock = threading.RLock()
+        self._local = threading.local()
+        
+    def _get_connection(self) -> sqlite3.Connection:
+        """Get or create a thread-local database connection.
+
+        Each thread gets its own connection to ensure thread safety.
+        Connections are stored in thread-local storage.
+        """
+        conn = getattr(self._local, "conn", None)
+        if conn is None:
+            # Thread-local connection - each thread has its own
+            conn = sqlite3.connect(
+                self.db_path,
+                timeout=30.0,  # Wait up to 30s for locks
+                check_same_thread=True,  # Enforce thread safety
+            )
+            conn.row_factory = sqlite3.Row
+            conn.execute("PRAGMA journal_mode=WAL")
+            conn.execute("PRAGMA synchronous=NORMAL")
+            conn.execute("PRAGMA foreign_keys=ON")
+            # Limit mmap to 1GB to avoid OOM on smaller systems
+            conn.execute("PRAGMA mmap_size=1073741824")
+            # Increase cache size for better query performance (20MB = -20000 pages)
+            conn.execute("PRAGMA cache_size=-20000")
+            self._local.conn = conn
+        return conn
+    
+    def close(self) -> None:
+        """Close thread-local database connection."""
+        with self._lock:
+            conn = getattr(self._local, "conn", None)
+            if conn is not None:
+                conn.close()
+                self._local.conn = None
+    
+    def __enter__(self) -> SpladeIndex:
+        """Context manager entry."""
+        self.create_tables()
+        return self
+    
+    def __exit__(self, exc_type, exc, tb) -> None:
+        """Context manager exit."""
+        self.close()
+    
+    def has_index(self) -> bool:
+        """Check if SPLADE tables exist in database.
+        
+        Returns:
+            True if tables exist, False otherwise.
+        """
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                cursor = conn.execute(
+                    """
+                    SELECT name FROM sqlite_master 
+                    WHERE type='table' AND name='splade_posting_list'
+                    """
+                )
+                return cursor.fetchone() is not None
+            except sqlite3.Error as e:
+                logger.error("Failed to check index existence: %s", e)
+                return False
+    
+    def create_tables(self) -> None:
+        """Create SPLADE schema if not exists.
+        
+        Note: When used with distributed indexes (multiple _index.db files),
+        the SPLADE database stores chunk IDs from multiple sources. In this case,
+        foreign key constraints are not enforced to allow cross-database references.
+        """
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                # Inverted index for sparse vectors
+                # Note: No FOREIGN KEY constraint to support distributed index architecture
+                # where chunks may come from multiple _index.db files
+                conn.execute("""
+                    CREATE TABLE IF NOT EXISTS splade_posting_list (
+                        token_id INTEGER NOT NULL,
+                        chunk_id INTEGER NOT NULL,
+                        weight REAL NOT NULL,
+                        PRIMARY KEY (token_id, chunk_id)
+                    )
+                """)
+                
+                # Indexes for efficient lookups
+                conn.execute("""
+                    CREATE INDEX IF NOT EXISTS idx_splade_by_chunk 
+                    ON splade_posting_list(chunk_id)
+                """)
+                conn.execute("""
+                    CREATE INDEX IF NOT EXISTS idx_splade_by_token 
+                    ON splade_posting_list(token_id)
+                """)
+                
+                # Model metadata
+                conn.execute("""
+                    CREATE TABLE IF NOT EXISTS splade_metadata (
+                        id INTEGER PRIMARY KEY DEFAULT 1,
+                        model_name TEXT NOT NULL,
+                        vocab_size INTEGER NOT NULL,
+                        onnx_path TEXT,
+                        created_at REAL
+                    )
+                """)
+
+                # Chunk metadata for self-contained search results
+                # Stores all chunk info needed to build SearchResult without querying _index.db
+                conn.execute("""
+                    CREATE TABLE IF NOT EXISTS splade_chunks (
+                        id INTEGER PRIMARY KEY,
+                        file_path TEXT NOT NULL,
+                        content TEXT NOT NULL,
+                        metadata TEXT,
+                        source_db TEXT
+                    )
+                """)
+                
+                conn.commit()
+                logger.debug("SPLADE schema created successfully")
+            except sqlite3.Error as e:
+                raise StorageError(
+                    f"Failed to create SPLADE schema: {e}",
+                    db_path=str(self.db_path),
+                    operation="create_tables"
+                ) from e
+    
+    def add_posting(self, chunk_id: int, sparse_vec: Dict[int, float]) -> None:
+        """Add a single document to inverted index.
+        
+        Args:
+            chunk_id: Chunk ID (foreign key to semantic_chunks.id).
+            sparse_vec: Sparse vector as {token_id: weight} mapping.
+        """
+        if not sparse_vec:
+            logger.warning("Empty sparse vector for chunk_id=%d, skipping", chunk_id)
+            return
+        
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                # Insert all non-zero weights for this chunk
+                postings = [
+                    (token_id, chunk_id, weight)
+                    for token_id, weight in sparse_vec.items()
+                    if weight > 0  # Only store non-zero weights
+                ]
+                
+                if postings:
+                    conn.executemany(
+                        """
+                        INSERT OR REPLACE INTO splade_posting_list 
+                        (token_id, chunk_id, weight)
+                        VALUES (?, ?, ?)
+                        """,
+                        postings
+                    )
+                    conn.commit()
+                    logger.debug(
+                        "Added %d postings for chunk_id=%d", len(postings), chunk_id
+                    )
+            except sqlite3.Error as e:
+                raise StorageError(
+                    f"Failed to add posting for chunk_id={chunk_id}: {e}",
+                    db_path=str(self.db_path),
+                    operation="add_posting"
+                ) from e
+    
+    def add_postings_batch(
+        self, postings: List[Tuple[int, Dict[int, float]]]
+    ) -> None:
+        """Batch insert postings for multiple chunks.
+        
+        Args:
+            postings: List of (chunk_id, sparse_vec) tuples.
+        """
+        if not postings:
+            return
+        
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                # Flatten all postings into single batch
+                batch_data = []
+                for chunk_id, sparse_vec in postings:
+                    for token_id, weight in sparse_vec.items():
+                        if weight > 0:  # Only store non-zero weights
+                            batch_data.append((token_id, chunk_id, weight))
+                
+                if batch_data:
+                    conn.executemany(
+                        """
+                        INSERT OR REPLACE INTO splade_posting_list 
+                        (token_id, chunk_id, weight)
+                        VALUES (?, ?, ?)
+                        """,
+                        batch_data
+                    )
+                    conn.commit()
+                    logger.debug(
+                        "Batch inserted %d postings for %d chunks",
+                        len(batch_data),
+                        len(postings)
+                    )
+            except sqlite3.Error as e:
+                raise StorageError(
+                    f"Failed to batch insert postings: {e}",
+                    db_path=str(self.db_path),
+                    operation="add_postings_batch"
+                ) from e
+
+    def add_chunk_metadata(
+        self,
+        chunk_id: int,
+        file_path: str,
+        content: str,
+        metadata: Optional[str] = None,
+        source_db: Optional[str] = None
+    ) -> None:
+        """Store chunk metadata for self-contained search results.
+
+        Args:
+            chunk_id: Global chunk ID.
+            file_path: Path to source file.
+            content: Chunk text content.
+            metadata: JSON metadata string.
+            source_db: Path to source _index.db.
+        """
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                conn.execute(
+                    """
+                    INSERT OR REPLACE INTO splade_chunks
+                    (id, file_path, content, metadata, source_db)
+                    VALUES (?, ?, ?, ?, ?)
+                    """,
+                    (chunk_id, file_path, content, metadata, source_db)
+                )
+                conn.commit()
+            except sqlite3.Error as e:
+                raise StorageError(
+                    f"Failed to add chunk metadata for chunk_id={chunk_id}: {e}",
+                    db_path=str(self.db_path),
+                    operation="add_chunk_metadata"
+                ) from e
+
+    def add_chunks_metadata_batch(
+        self,
+        chunks: List[Tuple[int, str, str, Optional[str], Optional[str]]]
+    ) -> None:
+        """Batch insert chunk metadata.
+
+        Args:
+            chunks: List of (chunk_id, file_path, content, metadata, source_db) tuples.
+        """
+        if not chunks:
+            return
+
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                conn.executemany(
+                    """
+                    INSERT OR REPLACE INTO splade_chunks
+                    (id, file_path, content, metadata, source_db)
+                    VALUES (?, ?, ?, ?, ?)
+                    """,
+                    chunks
+                )
+                conn.commit()
+                logger.debug("Batch inserted %d chunk metadata records", len(chunks))
+            except sqlite3.Error as e:
+                raise StorageError(
+                    f"Failed to batch insert chunk metadata: {e}",
+                    db_path=str(self.db_path),
+                    operation="add_chunks_metadata_batch"
+                ) from e
+
+    def get_chunks_by_ids(self, chunk_ids: List[int]) -> List[Dict]:
+        """Get chunk metadata by IDs.
+
+        Args:
+            chunk_ids: List of chunk IDs to retrieve.
+
+        Returns:
+            List of dicts with id, file_path, content, metadata, source_db.
+        """
+        if not chunk_ids:
+            return []
+
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                placeholders = ",".join("?" * len(chunk_ids))
+                rows = conn.execute(
+                    f"""
+                    SELECT id, file_path, content, metadata, source_db
+                    FROM splade_chunks
+                    WHERE id IN ({placeholders})
+                    """,
+                    chunk_ids
+                ).fetchall()
+
+                return [
+                    {
+                        "id": row["id"],
+                        "file_path": row["file_path"],
+                        "content": row["content"],
+                        "metadata": row["metadata"],
+                        "source_db": row["source_db"]
+                    }
+                    for row in rows
+                ]
+            except sqlite3.Error as e:
+                logger.error("Failed to get chunks by IDs: %s", e)
+                return []
+    
+    def remove_chunk(self, chunk_id: int) -> int:
+        """Remove all postings for a chunk.
+        
+        Args:
+            chunk_id: Chunk ID to remove.
+            
+        Returns:
+            Number of deleted postings.
+        """
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                cursor = conn.execute(
+                    "DELETE FROM splade_posting_list WHERE chunk_id = ?",
+                    (chunk_id,)
+                )
+                conn.commit()
+                deleted = cursor.rowcount
+                logger.debug("Removed %d postings for chunk_id=%d", deleted, chunk_id)
+                return deleted
+            except sqlite3.Error as e:
+                raise StorageError(
+                    f"Failed to remove chunk_id={chunk_id}: {e}",
+                    db_path=str(self.db_path),
+                    operation="remove_chunk"
+                ) from e
+    
+    def search(
+        self,
+        query_sparse: Dict[int, float],
+        limit: int = 50,
+        min_score: float = 0.0,
+        max_query_terms: int = 64
+    ) -> List[Tuple[int, float]]:
+        """Search for similar chunks using dot-product scoring.
+
+        Implements efficient sparse dot-product via SQL JOIN:
+        score(q, d) = sum(q[t] * d[t]) for all tokens t
+
+        Args:
+            query_sparse: Query sparse vector as {token_id: weight}.
+            limit: Maximum number of results.
+            min_score: Minimum score threshold.
+            max_query_terms: Maximum query terms to use (default: 64).
+                Pruning to top-K terms reduces search time with minimal impact on quality.
+                Set to 0 or negative to disable pruning (use all terms).
+
+        Returns:
+            List of (chunk_id, score) tuples, ordered by score descending.
+        """
+        if not query_sparse:
+            logger.warning("Empty query sparse vector")
+            return []
+
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                # Build VALUES clause for query terms
+                # Each term: (token_id, weight)
+                query_terms = [
+                    (token_id, weight)
+                    for token_id, weight in query_sparse.items()
+                    if weight > 0
+                ]
+
+                if not query_terms:
+                    logger.warning("No non-zero query terms")
+                    return []
+
+                # Query pruning: keep only top-K terms by weight
+                # max_query_terms <= 0 means no limit (use all terms)
+                if max_query_terms > 0 and len(query_terms) > max_query_terms:
+                    query_terms = sorted(query_terms, key=lambda x: x[1], reverse=True)[:max_query_terms]
+                    logger.debug(
+                        "Query pruned from %d to %d terms",
+                        len(query_sparse),
+                        len(query_terms)
+                    )
+                
+                # Create CTE for query terms using parameterized VALUES
+                # Build placeholders and params to prevent SQL injection
+                params = []
+                placeholders = []
+                for token_id, weight in query_terms:
+                    placeholders.append("(?, ?)")
+                    params.extend([token_id, weight])
+
+                values_placeholders = ", ".join(placeholders)
+
+                sql = f"""
+                    WITH query_terms(token_id, weight) AS (
+                        VALUES {values_placeholders}
+                    )
+                    SELECT
+                        p.chunk_id,
+                        SUM(p.weight * q.weight) as score
+                    FROM splade_posting_list p
+                    INNER JOIN query_terms q ON p.token_id = q.token_id
+                    GROUP BY p.chunk_id
+                    HAVING score >= ?
+                    ORDER BY score DESC
+                    LIMIT ?
+                """
+
+                # Append min_score and limit to params
+                params.extend([min_score, limit])
+                rows = conn.execute(sql, params).fetchall()
+                
+                results = [(row["chunk_id"], float(row["score"])) for row in rows]
+                logger.debug(
+                    "SPLADE search: %d query terms, %d results", 
+                    len(query_terms), 
+                    len(results)
+                )
+                return results
+                
+            except sqlite3.Error as e:
+                raise StorageError(
+                    f"SPLADE search failed: {e}",
+                    db_path=str(self.db_path),
+                    operation="search"
+                ) from e
+    
+    def get_metadata(self) -> Optional[Dict]:
+        """Get SPLADE model metadata.
+        
+        Returns:
+            Dictionary with model_name, vocab_size, onnx_path, created_at,
+            or None if not set.
+        """
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                row = conn.execute(
+                    """
+                    SELECT model_name, vocab_size, onnx_path, created_at
+                    FROM splade_metadata
+                    WHERE id = 1
+                    """
+                ).fetchone()
+                
+                if row:
+                    return {
+                        "model_name": row["model_name"],
+                        "vocab_size": row["vocab_size"],
+                        "onnx_path": row["onnx_path"],
+                        "created_at": row["created_at"]
+                    }
+                return None
+            except sqlite3.Error as e:
+                logger.error("Failed to get metadata: %s", e)
+                return None
+    
+    def set_metadata(
+        self,
+        model_name: str,
+        vocab_size: int,
+        onnx_path: Optional[str] = None
+    ) -> None:
+        """Set SPLADE model metadata.
+        
+        Args:
+            model_name: SPLADE model name.
+            vocab_size: Vocabulary size (typically ~30k for BERT vocab).
+            onnx_path: Optional path to ONNX model file.
+        """
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                current_time = time.time()
+                conn.execute(
+                    """
+                    INSERT OR REPLACE INTO splade_metadata
+                    (id, model_name, vocab_size, onnx_path, created_at)
+                    VALUES (1, ?, ?, ?, ?)
+                    """,
+                    (model_name, vocab_size, onnx_path, current_time)
+                )
+                conn.commit()
+                logger.info(
+                    "Set SPLADE metadata: model=%s, vocab_size=%d",
+                    model_name,
+                    vocab_size
+                )
+            except sqlite3.Error as e:
+                raise StorageError(
+                    f"Failed to set metadata: {e}",
+                    db_path=str(self.db_path),
+                    operation="set_metadata"
+                ) from e
+    
+    def get_stats(self) -> Dict:
+        """Get index statistics.
+        
+        Returns:
+            Dictionary with total_postings, unique_tokens, unique_chunks.
+        """
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                row = conn.execute("""
+                    SELECT
+                        COUNT(*) as total_postings,
+                        COUNT(DISTINCT token_id) as unique_tokens,
+                        COUNT(DISTINCT chunk_id) as unique_chunks
+                    FROM splade_posting_list
+                """).fetchone()
+                
+                return {
+                    "total_postings": row["total_postings"],
+                    "unique_tokens": row["unique_tokens"],
+                    "unique_chunks": row["unique_chunks"]
+                }
+            except sqlite3.Error as e:
+                logger.error("Failed to get stats: %s", e)
+                return {
+                    "total_postings": 0,
+                    "unique_tokens": 0,
+                    "unique_chunks": 0
+                }
--- a/codex-lens/build/lib/codexlens/storage/sqlite_store.py
+++ b/codex-lens/build/lib/codexlens/storage/sqlite_store.py
@@ -0,0 +1,976 @@
+"""SQLite storage for CodexLens indexing and search."""
+
+from __future__ import annotations
+
+import json
+import logging
+import sqlite3
+import threading
+import time
+from dataclasses import asdict
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Optional, Tuple
+
+from codexlens.entities import IndexedFile, SearchResult, Symbol
+from codexlens.errors import StorageError
+
+logger = logging.getLogger(__name__)
+
+
+class SQLiteStore:
+    """SQLiteStore providing FTS5 search and symbol lookup.
+
+    Implements thread-local connection pooling for improved performance.
+    """
+
+    # Maximum number of connections to keep in pool to prevent memory leaks
+    MAX_POOL_SIZE = 32
+    # Idle timeout in seconds (10 minutes)
+    IDLE_TIMEOUT = 600
+    # Periodic cleanup interval in seconds (5 minutes)
+    CLEANUP_INTERVAL = 300
+
+    def __init__(self, db_path: str | Path) -> None:
+        self.db_path = Path(db_path)
+        self._lock = threading.RLock()
+        self._local = threading.local()
+        self._pool_lock = threading.Lock()
+        # Pool stores (connection, last_access_time) tuples
+        self._pool: Dict[int, Tuple[sqlite3.Connection, float]] = {}
+        self._pool_generation = 0
+        self._cleanup_timer: threading.Timer | None = None
+        self._cleanup_stop_event = threading.Event()
+        self._start_cleanup_timer()
+
+    def _get_connection(self) -> sqlite3.Connection:
+        """Get or create a thread-local database connection."""
+        thread_id = threading.get_ident()
+        current_time = time.time()
+
+        if getattr(self._local, "generation", None) == self._pool_generation:
+            conn = getattr(self._local, "conn", None)
+            if conn is not None:
+                with self._pool_lock:
+                    pool_entry = self._pool.get(thread_id)
+                    if pool_entry is not None:
+                        pooled_conn, _ = pool_entry
+                        self._pool[thread_id] = (pooled_conn, current_time)
+                        self._local.conn = pooled_conn
+                        return pooled_conn
+
+                # Thread-local connection is stale (e.g., cleaned up by timer).
+                self._local.conn = None
+
+        with self._pool_lock:
+            pool_entry = self._pool.get(thread_id)
+            if pool_entry is not None:
+                conn, _ = pool_entry
+                # Update last access time
+                self._pool[thread_id] = (conn, current_time)
+            else:
+                # Clean up stale and idle connections if pool is too large
+                if len(self._pool) >= self.MAX_POOL_SIZE:
+                    self._cleanup_stale_connections()
+
+                conn = sqlite3.connect(self.db_path, check_same_thread=False)
+                conn.row_factory = sqlite3.Row
+                conn.execute("PRAGMA journal_mode=WAL")
+                conn.execute("PRAGMA synchronous=NORMAL")
+                conn.execute("PRAGMA foreign_keys=ON")
+                # Memory-mapped I/O for faster reads (30GB limit)
+                conn.execute("PRAGMA mmap_size=30000000000")
+                self._pool[thread_id] = (conn, current_time)
+
+            self._local.conn = conn
+            self._local.generation = self._pool_generation
+            return conn
+
+    def _cleanup_stale_connections(self) -> None:
+        """Remove connections for threads that no longer exist or have been idle too long."""
+        current_time = time.time()
+        # Get list of active thread IDs
+        active_threads = {t.ident for t in threading.enumerate() if t.ident is not None}
+
+        # Find connections to remove: dead threads or idle timeout exceeded
+        stale_ids: list[tuple[int, str]] = []
+        for tid, (conn, last_access) in list(self._pool.items()):
+            try:
+                is_dead_thread = tid not in active_threads
+                is_idle = (current_time - last_access) > self.IDLE_TIMEOUT
+
+                is_invalid_connection = False
+                if not is_dead_thread and not is_idle:
+                    try:
+                        conn.execute("SELECT 1").fetchone()
+                    except sqlite3.ProgrammingError:
+                        is_invalid_connection = True
+                    except sqlite3.Error:
+                        is_invalid_connection = True
+
+                if is_invalid_connection:
+                    stale_ids.append((tid, "invalid_connection"))
+                elif is_dead_thread:
+                    stale_ids.append((tid, "dead_thread"))
+                elif is_idle:
+                    stale_ids.append((tid, "idle_timeout"))
+            except Exception:
+                # Never break cleanup for a single bad entry.
+                continue
+
+        # Close and remove stale connections
+        for tid, reason in stale_ids:
+            try:
+                conn, _ = self._pool[tid]
+                conn.close()
+            except Exception:
+                pass
+            del self._pool[tid]
+            logger.debug("Cleaned SQLiteStore connection for thread_id=%s (%s)", tid, reason)
+
+    def _start_cleanup_timer(self) -> None:
+        if self.CLEANUP_INTERVAL <= 0:
+            return
+
+        self._cleanup_stop_event.clear()
+
+        def tick() -> None:
+            if self._cleanup_stop_event.is_set():
+                return
+
+            try:
+                with self._pool_lock:
+                    self._cleanup_stale_connections()
+            finally:
+                with self._pool_lock:
+                    if self._cleanup_stop_event.is_set():
+                        self._cleanup_timer = None
+                        return
+
+                    self._cleanup_timer = threading.Timer(self.CLEANUP_INTERVAL, tick)
+                    self._cleanup_timer.daemon = True
+                    self._cleanup_timer.start()
+
+        self._cleanup_timer = threading.Timer(self.CLEANUP_INTERVAL, tick)
+        self._cleanup_timer.daemon = True
+        self._cleanup_timer.start()
+
+    def _stop_cleanup_timer(self) -> None:
+        self._cleanup_stop_event.set()
+        with self._pool_lock:
+            if self._cleanup_timer is not None:
+                self._cleanup_timer.cancel()
+                self._cleanup_timer = None
+
+    def close(self) -> None:
+        """Close all pooled connections."""
+        with self._lock:
+            self._stop_cleanup_timer()
+            with self._pool_lock:
+                for conn, _ in self._pool.values():
+                    conn.close()
+                self._pool.clear()
+                self._pool_generation += 1
+
+            if hasattr(self._local, "conn"):
+                self._local.conn = None
+            if hasattr(self._local, "generation"):
+                self._local.generation = self._pool_generation
+
+    def __enter__(self) -> SQLiteStore:
+        self.initialize()
+        return self
+
+    def __exit__(self, exc_type: object, exc: object, tb: object) -> None:
+        self.close()
+
+    def execute_query(
+        self,
+        sql: str,
+        params: tuple = (),
+        allow_writes: bool = False
+    ) -> List[Dict[str, Any]]:
+        """Execute a raw SQL query and return results as dictionaries.
+
+        This is the public API for executing custom queries without bypassing
+        encapsulation via _get_connection().
+
+        By default, only SELECT queries are allowed. Use allow_writes=True
+        for trusted internal code that needs to execute other statements.
+
+        Args:
+            sql: SQL query string with ? placeholders for parameters
+            params: Tuple of parameter values to bind
+            allow_writes: If True, allow non-SELECT statements (default False)
+
+        Returns:
+            List of result rows as dictionaries
+
+        Raises:
+            StorageError: If query execution fails or validation fails
+        """
+        # Validate query type for security
+        sql_stripped = sql.strip().upper()
+        if not allow_writes:
+            # Only allow SELECT and WITH (for CTEs) statements
+            if not (sql_stripped.startswith("SELECT") or sql_stripped.startswith("WITH")):
+                raise StorageError(
+                    "Only SELECT queries are allowed. "
+                    "Use allow_writes=True for trusted internal operations.",
+                    db_path=str(self.db_path),
+                    operation="execute_query",
+                    details={"query_type": sql_stripped.split()[0] if sql_stripped else "EMPTY"}
+                )
+
+        try:
+            conn = self._get_connection()
+            rows = conn.execute(sql, params).fetchall()
+            return [dict(row) for row in rows]
+        except sqlite3.Error as e:
+            raise StorageError(
+                f"Query execution failed: {e}",
+                db_path=str(self.db_path),
+                operation="execute_query",
+                details={"error_type": type(e).__name__}
+            ) from e
+
+    def initialize(self) -> None:
+        with self._lock:
+            self.db_path.parent.mkdir(parents=True, exist_ok=True)
+            conn = self._get_connection()
+            self._create_schema(conn)
+            self._ensure_fts_external_content(conn)
+
+
+    def add_file(self, indexed_file: IndexedFile, content: str) -> None:
+        with self._lock:
+            conn = self._get_connection()
+            path = str(Path(indexed_file.path).resolve())
+            language = indexed_file.language
+            mtime = Path(path).stat().st_mtime if Path(path).exists() else None
+            line_count = content.count(chr(10)) + 1
+
+            conn.execute(
+                """
+                INSERT INTO files(path, language, content, mtime, line_count)
+                VALUES(?, ?, ?, ?, ?)
+                ON CONFLICT(path) DO UPDATE SET
+                    language=excluded.language,
+                    content=excluded.content,
+                    mtime=excluded.mtime,
+                    line_count=excluded.line_count
+                """,
+                (path, language, content, mtime, line_count),
+            )
+
+            row = conn.execute("SELECT id FROM files WHERE path=?", (path,)).fetchone()
+            if not row:
+                raise StorageError(f"Failed to read file id for {path}")
+            file_id = int(row["id"])
+
+            conn.execute("DELETE FROM symbols WHERE file_id=?", (file_id,))
+            if indexed_file.symbols:
+                conn.executemany(
+                    """
+                    INSERT INTO symbols(file_id, name, kind, start_line, end_line)
+                    VALUES(?, ?, ?, ?, ?)
+                    """,
+                    [
+                        (file_id, s.name, s.kind, s.range[0], s.range[1])
+                        for s in indexed_file.symbols
+                    ],
+                )
+            conn.commit()
+
+    def add_files(self, files_data: List[tuple[IndexedFile, str]]) -> None:
+        """Add multiple files in a single transaction for better performance.
+        
+        Args:
+            files_data: List of (indexed_file, content) tuples
+        """
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                conn.execute("BEGIN")
+                
+                for indexed_file, content in files_data:
+                    path = str(Path(indexed_file.path).resolve())
+                    language = indexed_file.language
+                    mtime = Path(path).stat().st_mtime if Path(path).exists() else None
+                    line_count = content.count(chr(10)) + 1
+
+                    conn.execute(
+                        """
+                        INSERT INTO files(path, language, content, mtime, line_count)
+                        VALUES(?, ?, ?, ?, ?)
+                        ON CONFLICT(path) DO UPDATE SET
+                            language=excluded.language,
+                            content=excluded.content,
+                            mtime=excluded.mtime,
+                            line_count=excluded.line_count
+                        """,
+                        (path, language, content, mtime, line_count),
+                    )
+
+                    row = conn.execute("SELECT id FROM files WHERE path=?", (path,)).fetchone()
+                    if not row:
+                        raise StorageError(f"Failed to read file id for {path}")
+                    file_id = int(row["id"])
+
+                    conn.execute("DELETE FROM symbols WHERE file_id=?", (file_id,))
+                    if indexed_file.symbols:
+                        conn.executemany(
+                            """
+                            INSERT INTO symbols(file_id, name, kind, start_line, end_line)
+                            VALUES(?, ?, ?, ?, ?)
+                            """,
+                            [
+                                (file_id, s.name, s.kind, s.range[0], s.range[1])
+                                for s in indexed_file.symbols
+                            ],
+                        )
+                
+                conn.commit()
+            except Exception as exc:
+                try:
+                    conn.rollback()
+                except Exception as rollback_exc:
+                    logger.error(
+                        "Rollback failed after add_files() error (%s): %s", exc, rollback_exc
+                    )
+                    raise exc.with_traceback(exc.__traceback__) from rollback_exc
+                raise
+
+    def remove_file(self, path: str | Path) -> bool:
+        """Remove a file from the index."""
+        with self._lock:
+            conn = self._get_connection()
+            resolved_path = str(Path(path).resolve())
+
+            row = conn.execute(
+                "SELECT id FROM files WHERE path=?", (resolved_path,)
+            ).fetchone()
+
+            if not row:
+                return False
+
+            file_id = int(row["id"])
+            conn.execute("DELETE FROM files WHERE id=?", (file_id,))
+            conn.commit()
+            return True
+
+    def file_exists(self, path: str | Path) -> bool:
+        """Check if a file exists in the index."""
+        with self._lock:
+            conn = self._get_connection()
+            resolved_path = str(Path(path).resolve())
+            row = conn.execute(
+                "SELECT 1 FROM files WHERE path=?", (resolved_path,)
+            ).fetchone()
+            return row is not None
+
+    def get_file_mtime(self, path: str | Path) -> float | None:
+        """Get the stored mtime for a file."""
+        with self._lock:
+            conn = self._get_connection()
+            resolved_path = str(Path(path).resolve())
+            row = conn.execute(
+                "SELECT mtime FROM files WHERE path=?", (resolved_path,)
+            ).fetchone()
+            return float(row["mtime"]) if row and row["mtime"] else None
+
+
+    def search_fts(self, query: str, *, limit: int = 20, offset: int = 0) -> List[SearchResult]:
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                rows = conn.execute(
+                    """
+                    SELECT rowid, path, bm25(files_fts) AS rank,
+                           snippet(files_fts, 2, '[bold red]', '[/bold red]', "...", 20) AS excerpt
+                    FROM files_fts
+                    WHERE files_fts MATCH ?
+                    ORDER BY rank
+                    LIMIT ? OFFSET ?
+                    """,
+                    (query, limit, offset),
+                ).fetchall()
+            except sqlite3.DatabaseError as exc:
+                raise StorageError(f"FTS search failed: {exc}") from exc
+
+            results: List[SearchResult] = []
+            for row in rows:
+                rank = float(row["rank"]) if row["rank"] is not None else 0.0
+                score = abs(rank) if rank < 0 else 0.0
+                results.append(
+                    SearchResult(
+                        path=row["path"],
+                        score=score,
+                        excerpt=row["excerpt"],
+                    )
+                )
+            return results
+
+    def search_files_only(
+        self, query: str, *, limit: int = 20, offset: int = 0
+    ) -> List[str]:
+        """Search indexed file contents and return only file paths."""
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                rows = conn.execute(
+                    """
+                    SELECT path
+                    FROM files_fts
+                    WHERE files_fts MATCH ?
+                    ORDER BY bm25(files_fts)
+                    LIMIT ? OFFSET ?
+                    """,
+                    (query, limit, offset),
+                ).fetchall()
+            except sqlite3.DatabaseError as exc:
+                raise StorageError(f"FTS search failed: {exc}") from exc
+
+            return [row["path"] for row in rows]
+
+    def search_symbols(
+        self, name: str, *, kind: Optional[str] = None, limit: int = 50
+    ) -> List[Symbol]:
+        pattern = f"%{name}%"
+        with self._lock:
+            conn = self._get_connection()
+            if kind:
+                rows = conn.execute(
+                    """
+                    SELECT name, kind, start_line, end_line
+                    FROM symbols
+                    WHERE name LIKE ? AND kind=?
+                    ORDER BY name
+                    LIMIT ?
+                    """,
+                    (pattern, kind, limit),
+                ).fetchall()
+            else:
+                rows = conn.execute(
+                    """
+                    SELECT name, kind, start_line, end_line
+                    FROM symbols
+                    WHERE name LIKE ?
+                    ORDER BY name
+                    LIMIT ?
+                    """,
+                    (pattern, limit),
+                ).fetchall()
+
+            return [
+                Symbol(name=row["name"], kind=row["kind"], range=(row["start_line"], row["end_line"]))
+                for row in rows
+            ]
+
+
+    def stats(self) -> Dict[str, Any]:
+        with self._lock:
+            conn = self._get_connection()
+            file_count = conn.execute("SELECT COUNT(*) AS c FROM files").fetchone()["c"]
+            symbol_count = conn.execute("SELECT COUNT(*) AS c FROM symbols").fetchone()["c"]
+            lang_rows = conn.execute(
+                "SELECT language, COUNT(*) AS c FROM files GROUP BY language ORDER BY c DESC"
+            ).fetchall()
+            languages = {row["language"]: row["c"] for row in lang_rows}
+            # Include relationship count if table exists
+            relationship_count = 0
+            try:
+                rel_row = conn.execute("SELECT COUNT(*) AS c FROM code_relationships").fetchone()
+                relationship_count = int(rel_row["c"]) if rel_row else 0
+            except sqlite3.DatabaseError:
+                pass
+
+            return {
+                "files": int(file_count),
+                "symbols": int(symbol_count),
+                "relationships": relationship_count,
+                "languages": languages,
+                "db_path": str(self.db_path),
+            }
+
+
+    def _connect(self) -> sqlite3.Connection:
+        """Legacy method for backward compatibility."""
+        return self._get_connection()
+
+    def _create_schema(self, conn: sqlite3.Connection) -> None:
+        try:
+            conn.execute(
+                """
+                CREATE TABLE IF NOT EXISTS files (
+                    id INTEGER PRIMARY KEY,
+                    path TEXT UNIQUE NOT NULL,
+                    language TEXT NOT NULL,
+                    content TEXT NOT NULL,
+                    mtime REAL,
+                    line_count INTEGER
+                )
+                """
+            )
+            conn.execute(
+                """
+                CREATE TABLE IF NOT EXISTS symbols (
+                    id INTEGER PRIMARY KEY,
+                    file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
+                    name TEXT NOT NULL,
+                    kind TEXT NOT NULL,
+                    start_line INTEGER NOT NULL,
+                    end_line INTEGER NOT NULL
+                )
+                """
+            )
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_symbols_kind ON symbols(kind)")
+            conn.execute(
+                """
+                CREATE TABLE IF NOT EXISTS code_relationships (
+                    id INTEGER PRIMARY KEY,
+                    source_symbol_id INTEGER NOT NULL REFERENCES symbols(id) ON DELETE CASCADE,
+                    target_qualified_name TEXT NOT NULL,
+                    relationship_type TEXT NOT NULL,
+                    source_line INTEGER NOT NULL,
+                    target_file TEXT
+                )
+                """
+            )
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_rel_target ON code_relationships(target_qualified_name)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_rel_source ON code_relationships(source_symbol_id)")
+            # Chunks table for multi-vector storage (cascade retrieval architecture)
+            # - embedding: Original embedding for backward compatibility
+            # - embedding_binary: 256-dim binary vector for coarse ranking
+            # - embedding_dense: 2048-dim dense vector for fine ranking
+            conn.execute(
+                """
+                CREATE TABLE IF NOT EXISTS chunks (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    file_path TEXT NOT NULL,
+                    content TEXT NOT NULL,
+                    embedding BLOB,
+                    embedding_binary BLOB,
+                    embedding_dense BLOB,
+                    metadata TEXT,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                )
+                """
+            )
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_chunks_file_path ON chunks(file_path)")
+            # Run migration for existing databases
+            self._migrate_chunks_table(conn)
+            conn.commit()
+        except sqlite3.DatabaseError as exc:
+            raise StorageError(f"Failed to initialize database schema: {exc}") from exc
+
+    def _ensure_fts_external_content(self, conn: sqlite3.Connection) -> None:
+        """Ensure files_fts is an FTS5 external-content table (no content duplication)."""
+        try:
+            sql_row = conn.execute(
+                "SELECT sql FROM sqlite_master WHERE type='table' AND name='files_fts'"
+            ).fetchone()
+            sql = str(sql_row["sql"]) if sql_row and sql_row["sql"] else None
+
+            if sql is None:
+                self._create_external_fts(conn)
+                conn.commit()
+                return
+
+            if (
+                "content='files'" in sql
+                or 'content="files"' in sql
+                or "content=files" in sql
+            ):
+                self._create_fts_triggers(conn)
+                conn.commit()
+                return
+
+            self._migrate_fts_to_external(conn)
+        except sqlite3.DatabaseError as exc:
+            raise StorageError(f"Failed to ensure FTS schema: {exc}") from exc
+
+    def _create_external_fts(self, conn: sqlite3.Connection) -> None:
+        conn.execute(
+            """
+            CREATE VIRTUAL TABLE files_fts USING fts5(
+                path UNINDEXED,
+                language UNINDEXED,
+                content,
+                content='files',
+                content_rowid='id',
+                tokenize="unicode61 tokenchars '_'"
+            )
+            """
+        )
+        self._create_fts_triggers(conn)
+
+    def _create_fts_triggers(self, conn: sqlite3.Connection) -> None:
+        conn.execute(
+            """
+            CREATE TRIGGER IF NOT EXISTS files_ai AFTER INSERT ON files BEGIN
+                INSERT INTO files_fts(rowid, path, language, content)
+                VALUES(new.id, new.path, new.language, new.content);
+            END
+            """
+        )
+        conn.execute(
+            """
+            CREATE TRIGGER IF NOT EXISTS files_ad AFTER DELETE ON files BEGIN
+                INSERT INTO files_fts(files_fts, rowid, path, language, content)
+                VALUES('delete', old.id, old.path, old.language, old.content);
+            END
+            """
+        )
+        conn.execute(
+            """
+            CREATE TRIGGER IF NOT EXISTS files_au AFTER UPDATE ON files BEGIN
+                INSERT INTO files_fts(files_fts, rowid, path, language, content)
+                VALUES('delete', old.id, old.path, old.language, old.content);
+                INSERT INTO files_fts(rowid, path, language, content)
+                VALUES(new.id, new.path, new.language, new.content);
+            END
+            """
+        )
+
+    def _migrate_fts_to_external(self, conn: sqlite3.Connection) -> None:
+        """Migrate legacy files_fts (with duplicated content) to external content."""
+        try:
+            conn.execute("BEGIN")
+            conn.execute("DROP TRIGGER IF EXISTS files_ai")
+            conn.execute("DROP TRIGGER IF EXISTS files_ad")
+            conn.execute("DROP TRIGGER IF EXISTS files_au")
+
+            conn.execute("ALTER TABLE files_fts RENAME TO files_fts_legacy")
+            self._create_external_fts(conn)
+            conn.execute("INSERT INTO files_fts(files_fts) VALUES('rebuild')")
+            conn.execute("DROP TABLE files_fts_legacy")
+            conn.commit()
+        except sqlite3.DatabaseError as exc:
+            try:
+                conn.rollback()
+            except Exception as rollback_exc:
+                logger.error(
+                    "Rollback failed during FTS schema migration (%s): %s", exc, rollback_exc
+                )
+                raise exc.with_traceback(exc.__traceback__) from rollback_exc
+
+            try:
+                conn.execute("DROP TABLE IF EXISTS files_fts")
+            except Exception:
+                pass
+
+            try:
+                conn.execute("ALTER TABLE files_fts_legacy RENAME TO files_fts")
+                conn.commit()
+            except Exception:
+                pass
+            raise
+
+        try:
+            conn.execute("VACUUM")
+        except sqlite3.DatabaseError:
+            pass
+
+    def _migrate_chunks_table(self, conn: sqlite3.Connection) -> None:
+        """Migrate existing chunks table to add multi-vector columns if needed.
+
+        This handles upgrading existing databases that may have the chunks table
+        without the embedding_binary and embedding_dense columns.
+        """
+        # Check if chunks table exists
+        table_exists = conn.execute(
+            "SELECT name FROM sqlite_master WHERE type='table' AND name='chunks'"
+        ).fetchone()
+
+        if not table_exists:
+            # Table doesn't exist yet, nothing to migrate
+            return
+
+        # Check existing columns
+        cursor = conn.execute("PRAGMA table_info(chunks)")
+        columns = {row[1] for row in cursor.fetchall()}
+
+        # Add embedding_binary column if missing
+        if "embedding_binary" not in columns:
+            logger.info("Migrating chunks table: adding embedding_binary column")
+            conn.execute(
+                "ALTER TABLE chunks ADD COLUMN embedding_binary BLOB"
+            )
+
+        # Add embedding_dense column if missing
+        if "embedding_dense" not in columns:
+            logger.info("Migrating chunks table: adding embedding_dense column")
+            conn.execute(
+                "ALTER TABLE chunks ADD COLUMN embedding_dense BLOB"
+            )
+
+    def add_chunks(
+        self,
+        file_path: str,
+        chunks_data: List[Dict[str, Any]],
+        *,
+        embedding: Optional[List[List[float]]] = None,
+        embedding_binary: Optional[List[bytes]] = None,
+        embedding_dense: Optional[List[bytes]] = None,
+    ) -> List[int]:
+        """Add multiple chunks with multi-vector embeddings support.
+
+        This method supports the cascade retrieval architecture with three embedding types:
+        - embedding: Original dense embedding for backward compatibility
+        - embedding_binary: 256-dim binary vector for fast coarse ranking
+        - embedding_dense: 2048-dim dense vector for precise fine ranking
+
+        Args:
+            file_path: Path to the source file for all chunks.
+            chunks_data: List of dicts with 'content' and optional 'metadata' keys.
+            embedding: Optional list of dense embeddings (one per chunk).
+            embedding_binary: Optional list of binary embeddings as bytes (one per chunk).
+            embedding_dense: Optional list of dense embeddings as bytes (one per chunk).
+
+        Returns:
+            List of inserted chunk IDs.
+
+        Raises:
+            ValueError: If embedding list lengths don't match chunks_data length.
+            StorageError: If database operation fails.
+        """
+        if not chunks_data:
+            return []
+
+        n_chunks = len(chunks_data)
+
+        # Validate embedding lengths
+        if embedding is not None and len(embedding) != n_chunks:
+            raise ValueError(
+                f"embedding length ({len(embedding)}) != chunks_data length ({n_chunks})"
+            )
+        if embedding_binary is not None and len(embedding_binary) != n_chunks:
+            raise ValueError(
+                f"embedding_binary length ({len(embedding_binary)}) != chunks_data length ({n_chunks})"
+            )
+        if embedding_dense is not None and len(embedding_dense) != n_chunks:
+            raise ValueError(
+                f"embedding_dense length ({len(embedding_dense)}) != chunks_data length ({n_chunks})"
+            )
+
+        # Prepare batch data
+        batch_data = []
+        for i, chunk in enumerate(chunks_data):
+            content = chunk.get("content", "")
+            metadata = chunk.get("metadata")
+            metadata_json = json.dumps(metadata) if metadata else None
+
+            # Convert embeddings to bytes if needed
+            emb_blob = None
+            if embedding is not None:
+                import struct
+                emb_blob = struct.pack(f"{len(embedding[i])}f", *embedding[i])
+
+            emb_binary_blob = embedding_binary[i] if embedding_binary is not None else None
+            emb_dense_blob = embedding_dense[i] if embedding_dense is not None else None
+
+            batch_data.append((
+                file_path, content, emb_blob, emb_binary_blob, emb_dense_blob, metadata_json
+            ))
+
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                # Get starting ID before insert
+                row = conn.execute("SELECT MAX(id) FROM chunks").fetchone()
+                start_id = (row[0] or 0) + 1
+
+                conn.executemany(
+                    """
+                    INSERT INTO chunks (
+                        file_path, content, embedding, embedding_binary,
+                        embedding_dense, metadata
+                    )
+                    VALUES (?, ?, ?, ?, ?, ?)
+                    """,
+                    batch_data
+                )
+                conn.commit()
+
+                # Calculate inserted IDs
+                return list(range(start_id, start_id + n_chunks))
+
+            except sqlite3.DatabaseError as exc:
+                raise StorageError(
+                    f"Failed to add chunks: {exc}",
+                    db_path=str(self.db_path),
+                    operation="add_chunks",
+                ) from exc
+
+    def get_binary_embeddings(
+        self, chunk_ids: List[int]
+    ) -> Dict[int, Optional[bytes]]:
+        """Get binary embeddings for specified chunk IDs.
+
+        Used for coarse ranking in cascade retrieval architecture.
+        Binary embeddings (256-dim) enable fast approximate similarity search.
+
+        Args:
+            chunk_ids: List of chunk IDs to retrieve embeddings for.
+
+        Returns:
+            Dictionary mapping chunk_id to embedding_binary bytes (or None if not set).
+
+        Raises:
+            StorageError: If database query fails.
+        """
+        if not chunk_ids:
+            return {}
+
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                placeholders = ",".join("?" * len(chunk_ids))
+                rows = conn.execute(
+                    f"SELECT id, embedding_binary FROM chunks WHERE id IN ({placeholders})",
+                    chunk_ids
+                ).fetchall()
+
+                return {row["id"]: row["embedding_binary"] for row in rows}
+
+            except sqlite3.DatabaseError as exc:
+                raise StorageError(
+                    f"Failed to get binary embeddings: {exc}",
+                    db_path=str(self.db_path),
+                    operation="get_binary_embeddings",
+                ) from exc
+
+    def get_dense_embeddings(
+        self, chunk_ids: List[int]
+    ) -> Dict[int, Optional[bytes]]:
+        """Get dense embeddings for specified chunk IDs.
+
+        Used for fine ranking in cascade retrieval architecture.
+        Dense embeddings (2048-dim) provide high-precision similarity scoring.
+
+        Args:
+            chunk_ids: List of chunk IDs to retrieve embeddings for.
+
+        Returns:
+            Dictionary mapping chunk_id to embedding_dense bytes (or None if not set).
+
+        Raises:
+            StorageError: If database query fails.
+        """
+        if not chunk_ids:
+            return {}
+
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                placeholders = ",".join("?" * len(chunk_ids))
+                rows = conn.execute(
+                    f"SELECT id, embedding_dense FROM chunks WHERE id IN ({placeholders})",
+                    chunk_ids
+                ).fetchall()
+
+                return {row["id"]: row["embedding_dense"] for row in rows}
+
+            except sqlite3.DatabaseError as exc:
+                raise StorageError(
+                    f"Failed to get dense embeddings: {exc}",
+                    db_path=str(self.db_path),
+                    operation="get_dense_embeddings",
+                ) from exc
+
+    def get_chunks_by_ids(
+        self, chunk_ids: List[int]
+    ) -> List[Dict[str, Any]]:
+        """Get chunk data for specified IDs.
+
+        Args:
+            chunk_ids: List of chunk IDs to retrieve.
+
+        Returns:
+            List of chunk dictionaries with id, file_path, content, metadata.
+
+        Raises:
+            StorageError: If database query fails.
+        """
+        if not chunk_ids:
+            return []
+
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                placeholders = ",".join("?" * len(chunk_ids))
+                rows = conn.execute(
+                    f"""
+                    SELECT id, file_path, content, metadata, created_at
+                    FROM chunks
+                    WHERE id IN ({placeholders})
+                    """,
+                    chunk_ids
+                ).fetchall()
+
+                results = []
+                for row in rows:
+                    metadata = None
+                    if row["metadata"]:
+                        try:
+                            metadata = json.loads(row["metadata"])
+                        except json.JSONDecodeError:
+                            pass
+
+                    results.append({
+                        "id": row["id"],
+                        "file_path": row["file_path"],
+                        "content": row["content"],
+                        "metadata": metadata,
+                        "created_at": row["created_at"],
+                    })
+
+                return results
+
+            except sqlite3.DatabaseError as exc:
+                raise StorageError(
+                    f"Failed to get chunks: {exc}",
+                    db_path=str(self.db_path),
+                    operation="get_chunks_by_ids",
+                ) from exc
+
+    def delete_chunks_by_file(self, file_path: str) -> int:
+        """Delete all chunks for a given file path.
+
+        Args:
+            file_path: Path to the source file.
+
+        Returns:
+            Number of deleted chunks.
+
+        Raises:
+            StorageError: If database operation fails.
+        """
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                cursor = conn.execute(
+                    "DELETE FROM chunks WHERE file_path = ?",
+                    (file_path,)
+                )
+                conn.commit()
+                return cursor.rowcount
+
+            except sqlite3.DatabaseError as exc:
+                raise StorageError(
+                    f"Failed to delete chunks: {exc}",
+                    db_path=str(self.db_path),
+                    operation="delete_chunks_by_file",
+                ) from exc
+
+    def count_chunks(self) -> int:
+        """Count total chunks in store.
+
+        Returns:
+            Total number of chunks.
+        """
+        with self._lock:
+            conn = self._get_connection()
+            row = conn.execute("SELECT COUNT(*) AS c FROM chunks").fetchone()
+            return int(row["c"]) if row else 0
--- a/codex-lens/build/lib/codexlens/storage/sqlite_utils.py
+++ b/codex-lens/build/lib/codexlens/storage/sqlite_utils.py
@@ -0,0 +1,64 @@
+"""SQLite utility functions for CodexLens storage layer."""
+
+from __future__ import annotations
+
+import logging
+import sqlite3
+
+log = logging.getLogger(__name__)
+
+
+def check_trigram_support(conn: sqlite3.Connection) -> bool:
+    """Check if SQLite supports trigram tokenizer for FTS5.
+
+    Trigram tokenizer requires SQLite >= 3.34.0.
+
+    Args:
+        conn: Database connection to test
+
+    Returns:
+        True if trigram tokenizer is available, False otherwise
+    """
+    try:
+        # Test by creating a temporary virtual table with trigram tokenizer
+        conn.execute(
+            """
+            CREATE VIRTUAL TABLE IF NOT EXISTS test_trigram_check
+            USING fts5(test_content, tokenize='trigram')
+            """
+        )
+        # Clean up test table
+        conn.execute("DROP TABLE IF EXISTS test_trigram_check")
+        conn.commit()
+        return True
+    except sqlite3.OperationalError as e:
+        # Trigram tokenizer not available
+        if "unrecognized tokenizer" in str(e).lower():
+            log.debug("Trigram tokenizer not available in this SQLite version")
+            return False
+        # Other operational errors should be re-raised
+        raise
+    except Exception:
+        # Any other exception means trigram is not supported
+        return False
+
+
+def get_sqlite_version(conn: sqlite3.Connection) -> tuple[int, int, int]:
+    """Get SQLite version as (major, minor, patch) tuple.
+
+    Args:
+        conn: Database connection
+
+    Returns:
+        Version tuple, e.g., (3, 34, 1)
+    """
+    row = conn.execute("SELECT sqlite_version()").fetchone()
+    version_str = row[0] if row else "0.0.0"
+    parts = version_str.split('.')
+    try:
+        major = int(parts[0]) if len(parts) > 0 else 0
+        minor = int(parts[1]) if len(parts) > 1 else 0
+        patch = int(parts[2]) if len(parts) > 2 else 0
+        return (major, minor, patch)
+    except (ValueError, IndexError):
+        return (0, 0, 0)
--- a/codex-lens/build/lib/codexlens/storage/vector_meta_store.py
+++ b/codex-lens/build/lib/codexlens/storage/vector_meta_store.py
@@ -0,0 +1,415 @@
+"""Central storage for vector metadata.
+
+This module provides a centralized SQLite database for storing chunk metadata
+associated with centralized vector indexes. Instead of traversing all _index.db
+files to fetch chunk metadata, this provides O(1) lookup by chunk ID.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import sqlite3
+import threading
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from codexlens.errors import StorageError
+
+logger = logging.getLogger(__name__)
+
+
+class VectorMetadataStore:
+    """Store and retrieve chunk metadata for centralized vector search.
+
+    This class provides efficient storage and retrieval of chunk metadata
+    for the centralized vector index architecture. All chunk metadata is
+    stored in a single _vectors_meta.db file at the project root, enabling
+    fast lookups without traversing multiple _index.db files.
+
+    Schema:
+        chunk_metadata:
+            - chunk_id: INTEGER PRIMARY KEY - Global chunk ID
+            - file_path: TEXT NOT NULL - Path to source file
+            - content: TEXT - Chunk text content
+            - start_line: INTEGER - Start line in source file
+            - end_line: INTEGER - End line in source file
+            - category: TEXT - Content category (code/doc)
+            - metadata: TEXT - JSON-encoded additional metadata
+            - source_index_db: TEXT - Path to source _index.db file
+    """
+
+    def __init__(self, db_path: Path | str) -> None:
+        """Initialize VectorMetadataStore.
+
+        Args:
+            db_path: Path to SQLite database file.
+        """
+        self.db_path = Path(db_path)
+        self.db_path.parent.mkdir(parents=True, exist_ok=True)
+
+        # Thread-safe connection management
+        self._lock = threading.RLock()
+        self._local = threading.local()
+
+    def _get_connection(self) -> sqlite3.Connection:
+        """Get or create a thread-local database connection.
+
+        Each thread gets its own connection to ensure thread safety.
+        """
+        conn = getattr(self._local, "conn", None)
+        if conn is None:
+            conn = sqlite3.connect(
+                str(self.db_path),
+                timeout=30.0,
+                check_same_thread=True,
+            )
+            conn.row_factory = sqlite3.Row
+            conn.execute("PRAGMA journal_mode=WAL")
+            conn.execute("PRAGMA synchronous=NORMAL")
+            conn.execute("PRAGMA mmap_size=1073741824")  # 1GB mmap
+            self._local.conn = conn
+        return conn
+
+    def _ensure_schema(self) -> None:
+        """Create tables if they don't exist."""
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                conn.execute('''
+                    CREATE TABLE IF NOT EXISTS chunk_metadata (
+                        chunk_id INTEGER PRIMARY KEY,
+                        file_path TEXT NOT NULL,
+                        content TEXT,
+                        start_line INTEGER,
+                        end_line INTEGER,
+                        category TEXT,
+                        metadata TEXT,
+                        source_index_db TEXT
+                    )
+                ''')
+                conn.execute(
+                    'CREATE INDEX IF NOT EXISTS idx_chunk_file_path '
+                    'ON chunk_metadata(file_path)'
+                )
+                conn.execute(
+                    'CREATE INDEX IF NOT EXISTS idx_chunk_category '
+                    'ON chunk_metadata(category)'
+                )
+                # Binary vectors table for cascade search
+                conn.execute('''
+                    CREATE TABLE IF NOT EXISTS binary_vectors (
+                        chunk_id INTEGER PRIMARY KEY,
+                        vector BLOB NOT NULL
+                    )
+                ''')
+                conn.commit()
+                logger.debug("VectorMetadataStore schema created/verified")
+            except sqlite3.Error as e:
+                raise StorageError(
+                    f"Failed to create schema: {e}",
+                    db_path=str(self.db_path),
+                    operation="_ensure_schema"
+                ) from e
+
+    def add_chunk(
+        self,
+        chunk_id: int,
+        file_path: str,
+        content: str,
+        start_line: Optional[int] = None,
+        end_line: Optional[int] = None,
+        category: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+        source_index_db: Optional[str] = None,
+    ) -> None:
+        """Add a single chunk's metadata.
+
+        Args:
+            chunk_id: Global unique chunk ID.
+            file_path: Path to source file.
+            content: Chunk text content.
+            start_line: Start line in source file.
+            end_line: End line in source file.
+            category: Content category (code/doc).
+            metadata: Additional metadata dictionary.
+            source_index_db: Path to source _index.db file.
+        """
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                metadata_json = json.dumps(metadata) if metadata else None
+                conn.execute(
+                    '''
+                    INSERT OR REPLACE INTO chunk_metadata
+                    (chunk_id, file_path, content, start_line, end_line,
+                     category, metadata, source_index_db)
+                    VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+                    ''',
+                    (chunk_id, file_path, content, start_line, end_line,
+                     category, metadata_json, source_index_db)
+                )
+                conn.commit()
+            except sqlite3.Error as e:
+                raise StorageError(
+                    f"Failed to add chunk {chunk_id}: {e}",
+                    db_path=str(self.db_path),
+                    operation="add_chunk"
+                ) from e
+
+    def add_chunks(self, chunks: List[Dict[str, Any]]) -> None:
+        """Batch insert chunk metadata.
+
+        Args:
+            chunks: List of dictionaries with keys:
+                - chunk_id (required): Global unique chunk ID
+                - file_path (required): Path to source file
+                - content: Chunk text content
+                - start_line: Start line in source file
+                - end_line: End line in source file
+                - category: Content category (code/doc)
+                - metadata: Additional metadata dictionary
+                - source_index_db: Path to source _index.db file
+        """
+        if not chunks:
+            return
+
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                batch_data = []
+                for chunk in chunks:
+                    metadata = chunk.get("metadata")
+                    metadata_json = json.dumps(metadata) if metadata else None
+                    batch_data.append((
+                        chunk["chunk_id"],
+                        chunk["file_path"],
+                        chunk.get("content"),
+                        chunk.get("start_line"),
+                        chunk.get("end_line"),
+                        chunk.get("category"),
+                        metadata_json,
+                        chunk.get("source_index_db"),
+                    ))
+
+                conn.executemany(
+                    '''
+                    INSERT OR REPLACE INTO chunk_metadata
+                    (chunk_id, file_path, content, start_line, end_line,
+                     category, metadata, source_index_db)
+                    VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+                    ''',
+                    batch_data
+                )
+                conn.commit()
+                logger.debug("Batch inserted %d chunk metadata records", len(chunks))
+            except sqlite3.Error as e:
+                raise StorageError(
+                    f"Failed to batch insert chunks: {e}",
+                    db_path=str(self.db_path),
+                    operation="add_chunks"
+                ) from e
+
+    def get_chunks_by_ids(
+        self,
+        chunk_ids: List[int],
+        category: Optional[str] = None,
+    ) -> List[Dict[str, Any]]:
+        """Retrieve chunks by their IDs - the key optimization.
+
+        This is the primary method that replaces traversing all _index.db files.
+        Provides O(1) lookup by chunk ID instead of O(n) where n is the number
+        of index databases.
+
+        Args:
+            chunk_ids: List of chunk IDs to retrieve.
+            category: Optional category filter ('code' or 'doc').
+
+        Returns:
+            List of dictionaries with chunk metadata:
+                - chunk_id: Global chunk ID
+                - file_path: Path to source file
+                - content: Chunk text content
+                - start_line: Start line in source file
+                - end_line: End line in source file
+                - category: Content category
+                - metadata: Parsed metadata dictionary
+                - source_index_db: Source _index.db path
+        """
+        if not chunk_ids:
+            return []
+
+        # No lock needed for reads: WAL mode + thread-local connections ensure safety
+        conn = self._get_connection()
+        try:
+            placeholders = ",".join("?" * len(chunk_ids))
+
+            if category:
+                query = f'''
+                    SELECT chunk_id, file_path, content, start_line, end_line,
+                           category, metadata, source_index_db
+                    FROM chunk_metadata
+                    WHERE chunk_id IN ({placeholders}) AND category = ?
+                '''
+                params = list(chunk_ids) + [category]
+            else:
+                query = f'''
+                    SELECT chunk_id, file_path, content, start_line, end_line,
+                           category, metadata, source_index_db
+                    FROM chunk_metadata
+                    WHERE chunk_id IN ({placeholders})
+                '''
+                params = list(chunk_ids)
+
+            rows = conn.execute(query, params).fetchall()
+
+            results = []
+            for row in rows:
+                metadata = None
+                if row["metadata"]:
+                    try:
+                        metadata = json.loads(row["metadata"])
+                    except json.JSONDecodeError:
+                        metadata = {}
+
+                results.append({
+                    "chunk_id": row["chunk_id"],
+                    "file_path": row["file_path"],
+                    "content": row["content"],
+                    "start_line": row["start_line"],
+                    "end_line": row["end_line"],
+                    "category": row["category"],
+                    "metadata": metadata or {},
+                    "source_index_db": row["source_index_db"],
+                })
+
+            return results
+
+        except sqlite3.Error as e:
+            logger.error("Failed to get chunks by IDs: %s", e)
+            return []
+
+    def get_chunk_count(self) -> int:
+        """Get total number of chunks in store.
+
+        Returns:
+            Total chunk count.
+        """
+        # No lock needed for reads: WAL mode + thread-local connections ensure safety
+        conn = self._get_connection()
+        try:
+            row = conn.execute(
+                "SELECT COUNT(*) FROM chunk_metadata"
+            ).fetchone()
+            return row[0] if row else 0
+        except sqlite3.Error:
+            return 0
+
+    def clear(self) -> None:
+        """Clear all metadata."""
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                conn.execute("DELETE FROM chunk_metadata")
+                conn.commit()
+                logger.info("Cleared all chunk metadata")
+            except sqlite3.Error as e:
+                raise StorageError(
+                    f"Failed to clear metadata: {e}",
+                    db_path=str(self.db_path),
+                    operation="clear"
+                ) from e
+
+    def close(self) -> None:
+        """Close database connection."""
+        with self._lock:
+            conn = getattr(self._local, "conn", None)
+            if conn is not None:
+                conn.close()
+                self._local.conn = None
+
+    def __enter__(self) -> "VectorMetadataStore":
+        """Context manager entry."""
+        self._ensure_schema()
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
+        """Context manager exit."""
+        self.close()
+
+    # ============= Binary Vector Methods for Cascade Search =============
+
+    def add_binary_vectors(
+        self, chunk_ids: List[int], binary_vectors: List[bytes]
+    ) -> None:
+        """Batch insert binary vectors for cascade search.
+
+        Args:
+            chunk_ids: List of chunk IDs.
+            binary_vectors: List of packed binary vectors (as bytes).
+        """
+        if not chunk_ids or len(chunk_ids) != len(binary_vectors):
+            return
+
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                data = list(zip(chunk_ids, binary_vectors))
+                conn.executemany(
+                    "INSERT OR REPLACE INTO binary_vectors (chunk_id, vector) VALUES (?, ?)",
+                    data
+                )
+                conn.commit()
+                logger.debug("Added %d binary vectors", len(chunk_ids))
+            except sqlite3.Error as e:
+                raise StorageError(
+                    f"Failed to add binary vectors: {e}",
+                    db_path=str(self.db_path),
+                    operation="add_binary_vectors"
+                ) from e
+
+    def get_all_binary_vectors(self) -> List[tuple]:
+        """Get all binary vectors for cascade search.
+
+        Returns:
+            List of (chunk_id, vector_bytes) tuples.
+        """
+        conn = self._get_connection()
+        try:
+            rows = conn.execute(
+                "SELECT chunk_id, vector FROM binary_vectors"
+            ).fetchall()
+            return [(row[0], row[1]) for row in rows]
+        except sqlite3.Error as e:
+            logger.error("Failed to get binary vectors: %s", e)
+            return []
+
+    def get_binary_vector_count(self) -> int:
+        """Get total number of binary vectors.
+
+        Returns:
+            Binary vector count.
+        """
+        conn = self._get_connection()
+        try:
+            row = conn.execute(
+                "SELECT COUNT(*) FROM binary_vectors"
+            ).fetchone()
+            return row[0] if row else 0
+        except sqlite3.Error:
+            return 0
+
+    def clear_binary_vectors(self) -> None:
+        """Clear all binary vectors."""
+        with self._lock:
+            conn = self._get_connection()
+            try:
+                conn.execute("DELETE FROM binary_vectors")
+                conn.commit()
+                logger.info("Cleared all binary vectors")
+            except sqlite3.Error as e:
+                raise StorageError(
+                    f"Failed to clear binary vectors: {e}",
+                    db_path=str(self.db_path),
+                    operation="clear_binary_vectors"
+                ) from e
				`@@ -0,0 +1 @@`
				`# This file makes the 'migrations' directory a Python package.`