refactor: 移除图索引功能，修复内存泄露，优化嵌入生成

主要更改: 1. 移除图索引功能 (graph indexing) - 删除 graph_analyzer.py 及相关迁移文件 - 移除 CLI 的 graph 命令和 --enrich 标志 - 清理 chain_search.py 中的图查询方法 (370行) - 删除相关测试文件 2. 修复嵌入生成内存问题 - 重构 generate_embeddings.py 使用流式批处理 - 改用 embedding_manager 的内存安全实现 - 文件从 548 行精简到 259 行 (52.7% 减少) 3. 修复内存泄露 - chain_search.py: quick_search 使用 with 语句管理 ChainSearchEngine - embedding_manager.py: 使用 with 语句管理 VectorStore - vector_store.py: 添加暴力搜索内存警告 4. 代码清理 - 移除 Symbol 模型的 token_count 和 symbol_type 字段 - 清理相关测试用例测试: 760 passed, 7 skipped 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-11 02:33:51 +08:00 · 2025-12-21 16:22:03 +08:00
parent 15d5890861
commit 3e9a309079
19 changed files with 165 additions and 3909 deletions
--- a/codex-lens/src/codexlens/storage/dir_index.py
+++ b/codex-lens/src/codexlens/storage/dir_index.py
@@ -17,7 +17,7 @@ from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple

-from codexlens.entities import CodeRelationship, SearchResult, Symbol
+from codexlens.entities import SearchResult, Symbol
 from codexlens.errors import StorageError


@@ -237,116 +237,6 @@ class DirIndexStore:
                conn.rollback()
                raise StorageError(f"Failed to add file {name}: {exc}") from exc

-    def add_relationships(
-        self,
-        file_path: str | Path,
-        relationships: List[CodeRelationship],
-    ) -> int:
-        """Store code relationships for a file.
-
-        Args:
-            file_path: Path to the source file
-            relationships: List of CodeRelationship objects to store
-
-        Returns:
-            Number of relationships stored
-
-        Raises:
-            StorageError: If database operations fail
-        """
-        if not relationships:
-            return 0
-
-        with self._lock:
-            conn = self._get_connection()
-            file_path_str = str(Path(file_path).resolve())
-
-            try:
-                # Get file_id
-                row = conn.execute(
-                    "SELECT id FROM files WHERE full_path=?", (file_path_str,)
-                ).fetchone()
-                if not row:
-                    return 0
-
-                file_id = int(row["id"])
-
-                # Delete existing relationships for symbols in this file
-                conn.execute(
-                    """
-                    DELETE FROM code_relationships
-                    WHERE source_symbol_id IN (
-                        SELECT id FROM symbols WHERE file_id=?
-                    )
-                    """,
-                    (file_id,),
-                )
-
-                # Insert new relationships
-                relationship_rows = []
-                skipped_relationships = []
-                for rel in relationships:
-                    # Extract simple name from fully qualified name (e.g., "MyClass.my_method" -> "my_method")
-                    # This handles cases where GraphAnalyzer generates qualified names but symbols table stores simple names
-                    source_symbol_simple = rel.source_symbol.split(".")[-1] if "." in rel.source_symbol else rel.source_symbol
-
-                    # Find symbol_id by name and file
-                    symbol_row = conn.execute(
-                        """
-                        SELECT id FROM symbols
-                        WHERE file_id=? AND name=? AND start_line<=? AND end_line>=?
-                        LIMIT 1
-                        """,
-                        (file_id, source_symbol_simple, rel.source_line, rel.source_line),
-                    ).fetchone()
-
-                    if not symbol_row:
-                        # Try matching by simple name only
-                        symbol_row = conn.execute(
-                            "SELECT id FROM symbols WHERE file_id=? AND name=? LIMIT 1",
-                            (file_id, source_symbol_simple),
-                        ).fetchone()
-
-                    if symbol_row:
-                        relationship_rows.append((
-                            int(symbol_row["id"]),
-                            rel.target_symbol,
-                            rel.relationship_type,
-                            rel.source_line,
-                            rel.target_file,
-                        ))
-                    else:
-                        # Log warning when symbol lookup fails
-                        skipped_relationships.append(rel.source_symbol)
-
-                # Log skipped relationships for debugging
-                if skipped_relationships:
-                    self.logger.warning(
-                        "Failed to find source symbol IDs for %d relationships in %s: %s",
-                        len(skipped_relationships),
-                        file_path_str,
-                        ", ".join(set(skipped_relationships))
-                    )
-
-                if relationship_rows:
-                    conn.executemany(
-                        """
-                        INSERT INTO code_relationships(
-                            source_symbol_id, target_qualified_name, relationship_type,
-                            source_line, target_file
-                        )
-                        VALUES(?, ?, ?, ?, ?)
-                        """,
-                        relationship_rows,
-                    )
-
-                conn.commit()
-                return len(relationship_rows)
-
-            except sqlite3.DatabaseError as exc:
-                conn.rollback()
-                raise StorageError(f"Failed to add relationships: {exc}") from exc
-
    def add_files_batch(
        self, files: List[Tuple[str, Path, str, str, Optional[List[Symbol]]]]
    ) -> int:
--- a/codex-lens/src/codexlens/storage/index_tree.py
+++ b/codex-lens/src/codexlens/storage/index_tree.py
@@ -16,7 +16,6 @@ from typing import Dict, List, Optional, Set

 from codexlens.config import Config
 from codexlens.parsers.factory import ParserFactory
-from codexlens.semantic.graph_analyzer import GraphAnalyzer
 from codexlens.storage.dir_index import DirIndexStore
 from codexlens.storage.path_mapper import PathMapper
 from codexlens.storage.registry import ProjectInfo, RegistryStore
@@ -525,16 +524,6 @@ class IndexTreeBuilder:
                        symbols=indexed_file.symbols,
                    )

-                    # Extract and store code relationships for graph visualization
-                    if language_id in {"python", "javascript", "typescript"}:
-                        graph_analyzer = GraphAnalyzer(language_id)
-                        if graph_analyzer.is_available():
-                            relationships = graph_analyzer.analyze_with_symbols(
-                                text, file_path, indexed_file.symbols
-                            )
-                            if relationships:
-                                store.add_relationships(file_path, relationships)
-
                    files_count += 1
                    symbols_count += len(indexed_file.symbols)

@@ -742,16 +731,6 @@ def _build_dir_worker(args: tuple) -> DirBuildResult:
                    symbols=indexed_file.symbols,
                )

-                # Extract and store code relationships for graph visualization
-                if language_id in {"python", "javascript", "typescript"}:
-                    graph_analyzer = GraphAnalyzer(language_id)
-                    if graph_analyzer.is_available():
-                        relationships = graph_analyzer.analyze_with_symbols(
-                            text, item, indexed_file.symbols
-                        )
-                        if relationships:
-                            store.add_relationships(item, relationships)
-
                files_count += 1
                symbols_count += len(indexed_file.symbols)

--- a/codex-lens/src/codexlens/storage/migrations/migration_003_code_relationships.py
+++ b/codex-lens/src/codexlens/storage/migrations/migration_003_code_relationships.py
@@ -1,57 +0,0 @@
-"""
-Migration 003: Add code relationships storage.
-
-This migration introduces the `code_relationships` table to store semantic
-relationships between code symbols (function calls, inheritance, imports).
-This enables graph-based code navigation and dependency analysis.
-"""
-
-import logging
-from sqlite3 import Connection
-
-log = logging.getLogger(__name__)
-
-
-def upgrade(db_conn: Connection):
-    """
-    Applies the migration to add code relationships table.
-
-    - Creates `code_relationships` table with foreign key to symbols
-    - Creates indexes for efficient relationship queries
-    - Supports lazy expansion with target_symbol being qualified names
-
-    Args:
-        db_conn: The SQLite database connection.
-    """
-    cursor = db_conn.cursor()
-
-    log.info("Creating 'code_relationships' table...")
-    cursor.execute(
-        """
-        CREATE TABLE IF NOT EXISTS code_relationships (
-            id INTEGER PRIMARY KEY,
-            source_symbol_id INTEGER NOT NULL,
-            target_qualified_name TEXT NOT NULL,
-            relationship_type TEXT NOT NULL,
-            source_line INTEGER NOT NULL,
-            target_file TEXT,
-            FOREIGN KEY (source_symbol_id) REFERENCES symbols (id) ON DELETE CASCADE
-        )
-        """
-    )
-
-    log.info("Creating indexes for code_relationships...")
-    cursor.execute(
-        "CREATE INDEX IF NOT EXISTS idx_relationships_source ON code_relationships (source_symbol_id)"
-    )
-    cursor.execute(
-        "CREATE INDEX IF NOT EXISTS idx_relationships_target ON code_relationships (target_qualified_name)"
-    )
-    cursor.execute(
-        "CREATE INDEX IF NOT EXISTS idx_relationships_type ON code_relationships (relationship_type)"
-    )
-    cursor.execute(
-        "CREATE INDEX IF NOT EXISTS idx_relationships_source_line ON code_relationships (source_line)"
-    )
-
-    log.info("Finished creating code_relationships table and indexes.")
--- a/codex-lens/src/codexlens/storage/sqlite_store.py
+++ b/codex-lens/src/codexlens/storage/sqlite_store.py
@@ -10,7 +10,7 @@ from dataclasses import asdict
 from pathlib import Path
 from typing import Any, Dict, Iterable, List, Optional, Tuple

-from codexlens.entities import CodeRelationship, IndexedFile, SearchResult, Symbol
+from codexlens.entities import IndexedFile, SearchResult, Symbol
 from codexlens.errors import StorageError


@@ -420,167 +420,6 @@ class SQLiteStore:
            }


-    def add_relationships(self, file_path: str | Path, relationships: List[CodeRelationship]) -> None:
-        """Store code relationships for a file.
-
-        Args:
-            file_path: Path to the file containing the relationships
-            relationships: List of CodeRelationship objects to store
-        """
-        if not relationships:
-            return
-
-        with self._lock:
-            conn = self._get_connection()
-            resolved_path = str(Path(file_path).resolve())
-
-            # Get file_id
-            row = conn.execute("SELECT id FROM files WHERE path=?", (resolved_path,)).fetchone()
-            if not row:
-                raise StorageError(f"File not found in index: {file_path}")
-            file_id = int(row["id"])
-
-            # Delete existing relationships for symbols in this file
-            conn.execute(
-                """
-                DELETE FROM code_relationships
-                WHERE source_symbol_id IN (
-                    SELECT id FROM symbols WHERE file_id=?
-                )
-                """,
-                (file_id,)
-            )
-
-            # Insert new relationships
-            relationship_rows = []
-            for rel in relationships:
-                # Find source symbol ID
-                symbol_row = conn.execute(
-                    """
-                    SELECT id FROM symbols
-                    WHERE file_id=? AND name=? AND start_line <= ? AND end_line >= ?
-                    ORDER BY (end_line - start_line) ASC
-                    LIMIT 1
-                    """,
-                    (file_id, rel.source_symbol, rel.source_line, rel.source_line)
-                ).fetchone()
-
-                if symbol_row:
-                    source_symbol_id = int(symbol_row["id"])
-                    relationship_rows.append((
-                        source_symbol_id,
-                        rel.target_symbol,
-                        rel.relationship_type,
-                        rel.source_line,
-                        rel.target_file
-                    ))
-
-            if relationship_rows:
-                conn.executemany(
-                    """
-                    INSERT INTO code_relationships(
-                        source_symbol_id, target_qualified_name, relationship_type,
-                        source_line, target_file
-                    )
-                    VALUES(?, ?, ?, ?, ?)
-                    """,
-                    relationship_rows
-                )
-            conn.commit()
-
-    def query_relationships_by_target(
-        self, target_name: str, *, limit: int = 100
-    ) -> List[Dict[str, Any]]:
-        """Query relationships by target symbol name (find all callers).
-
-        Args:
-            target_name: Name of the target symbol
-            limit: Maximum number of results
-
-        Returns:
-            List of dicts containing relationship info with file paths and line numbers
-        """
-        with self._lock:
-            conn = self._get_connection()
-            rows = conn.execute(
-                """
-                SELECT
-                    s.name AS source_symbol,
-                    r.target_qualified_name,
-                    r.relationship_type,
-                    r.source_line,
-                    f.full_path AS source_file,
-                    r.target_file
-                FROM code_relationships r
-                JOIN symbols s ON r.source_symbol_id = s.id
-                JOIN files f ON s.file_id = f.id
-                WHERE r.target_qualified_name = ?
-                ORDER BY f.full_path, r.source_line
-                LIMIT ?
-                """,
-                (target_name, limit)
-            ).fetchall()
-
-            return [
-                {
-                    "source_symbol": row["source_symbol"],
-                    "target_symbol": row["target_qualified_name"],
-                    "relationship_type": row["relationship_type"],
-                    "source_line": row["source_line"],
-                    "source_file": row["source_file"],
-                    "target_file": row["target_file"],
-                }
-                for row in rows
-            ]
-
-    def query_relationships_by_source(
-        self, source_symbol: str, source_file: str | Path, *, limit: int = 100
-    ) -> List[Dict[str, Any]]:
-        """Query relationships by source symbol (find what a symbol calls).
-
-        Args:
-            source_symbol: Name of the source symbol
-            source_file: File path containing the source symbol
-            limit: Maximum number of results
-
-        Returns:
-            List of dicts containing relationship info
-        """
-        with self._lock:
-            conn = self._get_connection()
-            resolved_path = str(Path(source_file).resolve())
-
-            rows = conn.execute(
-                """
-                SELECT
-                    s.name AS source_symbol,
-                    r.target_qualified_name,
-                    r.relationship_type,
-                    r.source_line,
-                    f.path AS source_file,
-                    r.target_file
-                FROM code_relationships r
-                JOIN symbols s ON r.source_symbol_id = s.id
-                JOIN files f ON s.file_id = f.id
-                WHERE s.name = ? AND f.path = ?
-                ORDER BY r.source_line
-                LIMIT ?
-                """,
-                (source_symbol, resolved_path, limit)
-            ).fetchall()
-
-            return [
-                {
-                    "source_symbol": row["source_symbol"],
-                    "target_symbol": row["target_qualified_name"],
-                    "relationship_type": row["relationship_type"],
-                    "source_line": row["source_line"],
-                    "source_file": row["source_file"],
-                    "target_file": row["target_file"],
-                }
-                for row in rows
-            ]
-
    def _connect(self) -> sqlite3.Connection:
        """Legacy method for backward compatibility."""
        return self._get_connection()