mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-13 02:41:50 +08:00
refactor: 移除图索引功能,修复内存泄露,优化嵌入生成
主要更改: 1. 移除图索引功能 (graph indexing) - 删除 graph_analyzer.py 及相关迁移文件 - 移除 CLI 的 graph 命令和 --enrich 标志 - 清理 chain_search.py 中的图查询方法 (370行) - 删除相关测试文件 2. 修复嵌入生成内存问题 - 重构 generate_embeddings.py 使用流式批处理 - 改用 embedding_manager 的内存安全实现 - 文件从 548 行精简到 259 行 (52.7% 减少) 3. 修复内存泄露 - chain_search.py: quick_search 使用 with 语句管理 ChainSearchEngine - embedding_manager.py: 使用 with 语句管理 VectorStore - vector_store.py: 添加暴力搜索内存警告 4. 代码清理 - 移除 Symbol 模型的 token_count 和 symbol_type 字段 - 清理相关测试用例 测试: 760 passed, 7 skipped 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -302,108 +302,6 @@ class ChainSearchEngine:
|
||||
index_paths, name, kind, options.total_limit
|
||||
)
|
||||
|
||||
def search_callers(self, target_symbol: str,
|
||||
source_path: Path,
|
||||
options: Optional[SearchOptions] = None) -> List[Dict[str, Any]]:
|
||||
"""Find all callers of a given symbol across directory hierarchy.
|
||||
|
||||
Args:
|
||||
target_symbol: Name of the symbol to find callers for
|
||||
source_path: Starting directory path
|
||||
options: Search configuration (uses defaults if None)
|
||||
|
||||
Returns:
|
||||
List of relationship dicts with caller information
|
||||
|
||||
Examples:
|
||||
>>> engine = ChainSearchEngine(registry, mapper)
|
||||
>>> callers = engine.search_callers("my_function", Path("D:/project"))
|
||||
>>> for caller in callers:
|
||||
... print(f"{caller['source_symbol']} in {caller['source_file']}:{caller['source_line']}")
|
||||
"""
|
||||
options = options or SearchOptions()
|
||||
|
||||
start_index = self._find_start_index(source_path)
|
||||
if not start_index:
|
||||
self.logger.warning(f"No index found for {source_path}")
|
||||
return []
|
||||
|
||||
index_paths = self._collect_index_paths(start_index, options.depth)
|
||||
if not index_paths:
|
||||
return []
|
||||
|
||||
return self._search_callers_parallel(
|
||||
index_paths, target_symbol, options.total_limit
|
||||
)
|
||||
|
||||
def search_callees(self, source_symbol: str,
|
||||
source_path: Path,
|
||||
options: Optional[SearchOptions] = None) -> List[Dict[str, Any]]:
|
||||
"""Find all callees (what a symbol calls) across directory hierarchy.
|
||||
|
||||
Args:
|
||||
source_symbol: Name of the symbol to find callees for
|
||||
source_path: Starting directory path
|
||||
options: Search configuration (uses defaults if None)
|
||||
|
||||
Returns:
|
||||
List of relationship dicts with callee information
|
||||
|
||||
Examples:
|
||||
>>> engine = ChainSearchEngine(registry, mapper)
|
||||
>>> callees = engine.search_callees("MyClass.method", Path("D:/project"))
|
||||
>>> for callee in callees:
|
||||
... print(f"Calls {callee['target_symbol']} at line {callee['source_line']}")
|
||||
"""
|
||||
options = options or SearchOptions()
|
||||
|
||||
start_index = self._find_start_index(source_path)
|
||||
if not start_index:
|
||||
self.logger.warning(f"No index found for {source_path}")
|
||||
return []
|
||||
|
||||
index_paths = self._collect_index_paths(start_index, options.depth)
|
||||
if not index_paths:
|
||||
return []
|
||||
|
||||
return self._search_callees_parallel(
|
||||
index_paths, source_symbol, options.total_limit
|
||||
)
|
||||
|
||||
def search_inheritance(self, class_name: str,
|
||||
source_path: Path,
|
||||
options: Optional[SearchOptions] = None) -> List[Dict[str, Any]]:
|
||||
"""Find inheritance relationships for a class across directory hierarchy.
|
||||
|
||||
Args:
|
||||
class_name: Name of the class to find inheritance for
|
||||
source_path: Starting directory path
|
||||
options: Search configuration (uses defaults if None)
|
||||
|
||||
Returns:
|
||||
List of relationship dicts with inheritance information
|
||||
|
||||
Examples:
|
||||
>>> engine = ChainSearchEngine(registry, mapper)
|
||||
>>> inheritance = engine.search_inheritance("BaseClass", Path("D:/project"))
|
||||
>>> for rel in inheritance:
|
||||
... print(f"{rel['source_symbol']} extends {rel['target_symbol']}")
|
||||
"""
|
||||
options = options or SearchOptions()
|
||||
|
||||
start_index = self._find_start_index(source_path)
|
||||
if not start_index:
|
||||
self.logger.warning(f"No index found for {source_path}")
|
||||
return []
|
||||
|
||||
index_paths = self._collect_index_paths(start_index, options.depth)
|
||||
if not index_paths:
|
||||
return []
|
||||
|
||||
return self._search_inheritance_parallel(
|
||||
index_paths, class_name, options.total_limit
|
||||
)
|
||||
|
||||
# === Internal Methods ===
|
||||
|
||||
def _find_start_index(self, source_path: Path) -> Optional[Path]:
|
||||
@@ -711,273 +609,6 @@ class ChainSearchEngine:
|
||||
self.logger.debug(f"Symbol search error in {index_path}: {exc}")
|
||||
return []
|
||||
|
||||
def _search_callers_parallel(self, index_paths: List[Path],
|
||||
target_symbol: str,
|
||||
limit: int) -> List[Dict[str, Any]]:
|
||||
"""Search for callers across multiple indexes in parallel.
|
||||
|
||||
Args:
|
||||
index_paths: List of _index.db paths to search
|
||||
target_symbol: Target symbol name
|
||||
limit: Total result limit
|
||||
|
||||
Returns:
|
||||
Deduplicated list of caller relationships
|
||||
"""
|
||||
all_callers = []
|
||||
|
||||
executor = self._get_executor()
|
||||
future_to_path = {
|
||||
executor.submit(
|
||||
self._search_callers_single,
|
||||
idx_path,
|
||||
target_symbol
|
||||
): idx_path
|
||||
for idx_path in index_paths
|
||||
}
|
||||
|
||||
for future in as_completed(future_to_path):
|
||||
try:
|
||||
callers = future.result()
|
||||
all_callers.extend(callers)
|
||||
except Exception as exc:
|
||||
self.logger.error(f"Caller search failed: {exc}")
|
||||
|
||||
# Deduplicate by (source_file, source_line)
|
||||
seen = set()
|
||||
unique_callers = []
|
||||
for caller in all_callers:
|
||||
key = (caller.get("source_file"), caller.get("source_line"))
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
unique_callers.append(caller)
|
||||
|
||||
# Sort by source file and line
|
||||
unique_callers.sort(key=lambda c: (c.get("source_file", ""), c.get("source_line", 0)))
|
||||
|
||||
return unique_callers[:limit]
|
||||
|
||||
def _search_callers_single(self, index_path: Path,
|
||||
target_symbol: str) -> List[Dict[str, Any]]:
|
||||
"""Search for callers in a single index.
|
||||
|
||||
Args:
|
||||
index_path: Path to _index.db file
|
||||
target_symbol: Target symbol name
|
||||
|
||||
Returns:
|
||||
List of caller relationship dicts (empty on error)
|
||||
"""
|
||||
try:
|
||||
with SQLiteStore(index_path) as store:
|
||||
return store.query_relationships_by_target(target_symbol)
|
||||
except Exception as exc:
|
||||
self.logger.debug(f"Caller search error in {index_path}: {exc}")
|
||||
return []
|
||||
|
||||
def _search_callees_parallel(self, index_paths: List[Path],
|
||||
source_symbol: str,
|
||||
limit: int) -> List[Dict[str, Any]]:
|
||||
"""Search for callees across multiple indexes in parallel.
|
||||
|
||||
Args:
|
||||
index_paths: List of _index.db paths to search
|
||||
source_symbol: Source symbol name
|
||||
limit: Total result limit
|
||||
|
||||
Returns:
|
||||
Deduplicated list of callee relationships
|
||||
"""
|
||||
all_callees = []
|
||||
|
||||
executor = self._get_executor()
|
||||
future_to_path = {
|
||||
executor.submit(
|
||||
self._search_callees_single,
|
||||
idx_path,
|
||||
source_symbol
|
||||
): idx_path
|
||||
for idx_path in index_paths
|
||||
}
|
||||
|
||||
for future in as_completed(future_to_path):
|
||||
try:
|
||||
callees = future.result()
|
||||
all_callees.extend(callees)
|
||||
except Exception as exc:
|
||||
self.logger.error(f"Callee search failed: {exc}")
|
||||
|
||||
# Deduplicate by (target_symbol, source_line)
|
||||
seen = set()
|
||||
unique_callees = []
|
||||
for callee in all_callees:
|
||||
key = (callee.get("target_symbol"), callee.get("source_line"))
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
unique_callees.append(callee)
|
||||
|
||||
# Sort by source line
|
||||
unique_callees.sort(key=lambda c: c.get("source_line", 0))
|
||||
|
||||
return unique_callees[:limit]
|
||||
|
||||
def _search_callees_single(self, index_path: Path,
|
||||
source_symbol: str) -> List[Dict[str, Any]]:
|
||||
"""Search for callees in a single index.
|
||||
|
||||
Args:
|
||||
index_path: Path to _index.db file
|
||||
source_symbol: Source symbol name
|
||||
|
||||
Returns:
|
||||
List of callee relationship dicts (empty on error)
|
||||
"""
|
||||
try:
|
||||
with SQLiteStore(index_path) as store:
|
||||
# Single JOIN query to get all callees (fixes N+1 query problem)
|
||||
# Uses public execute_query API instead of _get_connection bypass
|
||||
rows = store.execute_query(
|
||||
"""
|
||||
SELECT
|
||||
s.name AS source_symbol,
|
||||
r.target_qualified_name AS target_symbol,
|
||||
r.relationship_type,
|
||||
r.source_line,
|
||||
f.full_path AS source_file,
|
||||
r.target_file
|
||||
FROM code_relationships r
|
||||
JOIN symbols s ON r.source_symbol_id = s.id
|
||||
JOIN files f ON s.file_id = f.id
|
||||
WHERE s.name = ? AND r.relationship_type = 'call'
|
||||
ORDER BY f.full_path, r.source_line
|
||||
LIMIT 100
|
||||
""",
|
||||
(source_symbol,)
|
||||
)
|
||||
|
||||
return [
|
||||
{
|
||||
"source_symbol": row["source_symbol"],
|
||||
"target_symbol": row["target_symbol"],
|
||||
"relationship_type": row["relationship_type"],
|
||||
"source_line": row["source_line"],
|
||||
"source_file": row["source_file"],
|
||||
"target_file": row["target_file"],
|
||||
}
|
||||
for row in rows
|
||||
]
|
||||
except Exception as exc:
|
||||
self.logger.debug(f"Callee search error in {index_path}: {exc}")
|
||||
return []
|
||||
|
||||
def _search_inheritance_parallel(self, index_paths: List[Path],
|
||||
class_name: str,
|
||||
limit: int) -> List[Dict[str, Any]]:
|
||||
"""Search for inheritance relationships across multiple indexes in parallel.
|
||||
|
||||
Args:
|
||||
index_paths: List of _index.db paths to search
|
||||
class_name: Class name to search for
|
||||
limit: Total result limit
|
||||
|
||||
Returns:
|
||||
Deduplicated list of inheritance relationships
|
||||
"""
|
||||
all_inheritance = []
|
||||
|
||||
executor = self._get_executor()
|
||||
future_to_path = {
|
||||
executor.submit(
|
||||
self._search_inheritance_single,
|
||||
idx_path,
|
||||
class_name
|
||||
): idx_path
|
||||
for idx_path in index_paths
|
||||
}
|
||||
|
||||
for future in as_completed(future_to_path):
|
||||
try:
|
||||
inheritance = future.result()
|
||||
all_inheritance.extend(inheritance)
|
||||
except Exception as exc:
|
||||
self.logger.error(f"Inheritance search failed: {exc}")
|
||||
|
||||
# Deduplicate by (source_symbol, target_symbol)
|
||||
seen = set()
|
||||
unique_inheritance = []
|
||||
for rel in all_inheritance:
|
||||
key = (rel.get("source_symbol"), rel.get("target_symbol"))
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
unique_inheritance.append(rel)
|
||||
|
||||
# Sort by source file
|
||||
unique_inheritance.sort(key=lambda r: r.get("source_file", ""))
|
||||
|
||||
return unique_inheritance[:limit]
|
||||
|
||||
def _search_inheritance_single(self, index_path: Path,
|
||||
class_name: str) -> List[Dict[str, Any]]:
|
||||
"""Search for inheritance relationships in a single index.
|
||||
|
||||
Args:
|
||||
index_path: Path to _index.db file
|
||||
class_name: Class name to search for
|
||||
|
||||
Returns:
|
||||
List of inheritance relationship dicts (empty on error)
|
||||
"""
|
||||
try:
|
||||
with SQLiteStore(index_path) as store:
|
||||
# Use UNION to find relationships where class is either:
|
||||
# 1. The base class (target) - find derived classes
|
||||
# 2. The derived class (source) - find parent classes
|
||||
# Uses public execute_query API instead of _get_connection bypass
|
||||
rows = store.execute_query(
|
||||
"""
|
||||
SELECT
|
||||
s.name AS source_symbol,
|
||||
r.target_qualified_name,
|
||||
r.relationship_type,
|
||||
r.source_line,
|
||||
f.full_path AS source_file,
|
||||
r.target_file
|
||||
FROM code_relationships r
|
||||
JOIN symbols s ON r.source_symbol_id = s.id
|
||||
JOIN files f ON s.file_id = f.id
|
||||
WHERE r.target_qualified_name = ? AND r.relationship_type = 'inherits'
|
||||
UNION
|
||||
SELECT
|
||||
s.name AS source_symbol,
|
||||
r.target_qualified_name,
|
||||
r.relationship_type,
|
||||
r.source_line,
|
||||
f.full_path AS source_file,
|
||||
r.target_file
|
||||
FROM code_relationships r
|
||||
JOIN symbols s ON r.source_symbol_id = s.id
|
||||
JOIN files f ON s.file_id = f.id
|
||||
WHERE s.name = ? AND r.relationship_type = 'inherits'
|
||||
LIMIT 100
|
||||
""",
|
||||
(class_name, class_name)
|
||||
)
|
||||
|
||||
return [
|
||||
{
|
||||
"source_symbol": row["source_symbol"],
|
||||
"target_symbol": row["target_qualified_name"],
|
||||
"relationship_type": row["relationship_type"],
|
||||
"source_line": row["source_line"],
|
||||
"source_file": row["source_file"],
|
||||
"target_file": row["target_file"],
|
||||
}
|
||||
for row in rows
|
||||
]
|
||||
except Exception as exc:
|
||||
self.logger.debug(f"Inheritance search error in {index_path}: {exc}")
|
||||
return []
|
||||
|
||||
|
||||
# === Convenience Functions ===
|
||||
|
||||
@@ -1007,10 +638,9 @@ def quick_search(query: str,
|
||||
|
||||
mapper = PathMapper()
|
||||
|
||||
engine = ChainSearchEngine(registry, mapper)
|
||||
options = SearchOptions(depth=depth)
|
||||
|
||||
result = engine.search(query, source_path, options)
|
||||
with ChainSearchEngine(registry, mapper) as engine:
|
||||
options = SearchOptions(depth=depth)
|
||||
result = engine.search(query, source_path, options)
|
||||
|
||||
registry.close()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user