refactor: 移除图索引功能,修复内存泄露,优化嵌入生成

主要更改:

1. 移除图索引功能 (graph indexing)
   - 删除 graph_analyzer.py 及相关迁移文件
   - 移除 CLI 的 graph 命令和 --enrich 标志
   - 清理 chain_search.py 中的图查询方法 (370行)
   - 删除相关测试文件

2. 修复嵌入生成内存问题
   - 重构 generate_embeddings.py 使用流式批处理
   - 改用 embedding_manager 的内存安全实现
   - 文件从 548 行精简到 259 行 (52.7% 减少)

3. 修复内存泄露
   - chain_search.py: quick_search 使用 with 语句管理 ChainSearchEngine
   - embedding_manager.py: 使用 with 语句管理 VectorStore
   - vector_store.py: 添加暴力搜索内存警告

4. 代码清理
   - 移除 Symbol 模型的 token_count 和 symbol_type 字段
   - 清理相关测试用例

测试: 760 passed, 7 skipped

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
catlog22
2025-12-21 16:22:03 +08:00
parent 15d5890861
commit 3e9a309079
19 changed files with 165 additions and 3909 deletions

View File

@@ -10,7 +10,7 @@ from dataclasses import asdict
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Tuple
from codexlens.entities import CodeRelationship, IndexedFile, SearchResult, Symbol
from codexlens.entities import IndexedFile, SearchResult, Symbol
from codexlens.errors import StorageError
@@ -420,167 +420,6 @@ class SQLiteStore:
}
def add_relationships(self, file_path: str | Path, relationships: List[CodeRelationship]) -> None:
"""Store code relationships for a file.
Args:
file_path: Path to the file containing the relationships
relationships: List of CodeRelationship objects to store
"""
if not relationships:
return
with self._lock:
conn = self._get_connection()
resolved_path = str(Path(file_path).resolve())
# Get file_id
row = conn.execute("SELECT id FROM files WHERE path=?", (resolved_path,)).fetchone()
if not row:
raise StorageError(f"File not found in index: {file_path}")
file_id = int(row["id"])
# Delete existing relationships for symbols in this file
conn.execute(
"""
DELETE FROM code_relationships
WHERE source_symbol_id IN (
SELECT id FROM symbols WHERE file_id=?
)
""",
(file_id,)
)
# Insert new relationships
relationship_rows = []
for rel in relationships:
# Find source symbol ID
symbol_row = conn.execute(
"""
SELECT id FROM symbols
WHERE file_id=? AND name=? AND start_line <= ? AND end_line >= ?
ORDER BY (end_line - start_line) ASC
LIMIT 1
""",
(file_id, rel.source_symbol, rel.source_line, rel.source_line)
).fetchone()
if symbol_row:
source_symbol_id = int(symbol_row["id"])
relationship_rows.append((
source_symbol_id,
rel.target_symbol,
rel.relationship_type,
rel.source_line,
rel.target_file
))
if relationship_rows:
conn.executemany(
"""
INSERT INTO code_relationships(
source_symbol_id, target_qualified_name, relationship_type,
source_line, target_file
)
VALUES(?, ?, ?, ?, ?)
""",
relationship_rows
)
conn.commit()
def query_relationships_by_target(
self, target_name: str, *, limit: int = 100
) -> List[Dict[str, Any]]:
"""Query relationships by target symbol name (find all callers).
Args:
target_name: Name of the target symbol
limit: Maximum number of results
Returns:
List of dicts containing relationship info with file paths and line numbers
"""
with self._lock:
conn = self._get_connection()
rows = conn.execute(
"""
SELECT
s.name AS source_symbol,
r.target_qualified_name,
r.relationship_type,
r.source_line,
f.full_path AS source_file,
r.target_file
FROM code_relationships r
JOIN symbols s ON r.source_symbol_id = s.id
JOIN files f ON s.file_id = f.id
WHERE r.target_qualified_name = ?
ORDER BY f.full_path, r.source_line
LIMIT ?
""",
(target_name, limit)
).fetchall()
return [
{
"source_symbol": row["source_symbol"],
"target_symbol": row["target_qualified_name"],
"relationship_type": row["relationship_type"],
"source_line": row["source_line"],
"source_file": row["source_file"],
"target_file": row["target_file"],
}
for row in rows
]
def query_relationships_by_source(
self, source_symbol: str, source_file: str | Path, *, limit: int = 100
) -> List[Dict[str, Any]]:
"""Query relationships by source symbol (find what a symbol calls).
Args:
source_symbol: Name of the source symbol
source_file: File path containing the source symbol
limit: Maximum number of results
Returns:
List of dicts containing relationship info
"""
with self._lock:
conn = self._get_connection()
resolved_path = str(Path(source_file).resolve())
rows = conn.execute(
"""
SELECT
s.name AS source_symbol,
r.target_qualified_name,
r.relationship_type,
r.source_line,
f.path AS source_file,
r.target_file
FROM code_relationships r
JOIN symbols s ON r.source_symbol_id = s.id
JOIN files f ON s.file_id = f.id
WHERE s.name = ? AND f.path = ?
ORDER BY r.source_line
LIMIT ?
""",
(source_symbol, resolved_path, limit)
).fetchall()
return [
{
"source_symbol": row["source_symbol"],
"target_symbol": row["target_qualified_name"],
"relationship_type": row["relationship_type"],
"source_line": row["source_line"],
"source_file": row["source_file"],
"target_file": row["target_file"],
}
for row in rows
]
def _connect(self) -> sqlite3.Connection:
"""Legacy method for backward compatibility."""
return self._get_connection()