Add comprehensive tests for tokenizer, performance benchmarks, and TreeSitter parser functionality

- Implemented unit tests for the Tokenizer class, covering various text inputs, edge cases, and fallback mechanisms.
- Created performance benchmarks comparing tiktoken and pure Python implementations for token counting.
- Developed extensive tests for TreeSitterSymbolParser across Python, JavaScript, and TypeScript, ensuring accurate symbol extraction and parsing.
- Added configuration documentation for MCP integration and custom prompts, enhancing usability and flexibility.
- Introduced a refactor script for GraphAnalyzer to streamline future improvements.
This commit is contained in:
catlog22
2025-12-15 14:36:09 +08:00
parent 82dcafff00
commit 0fe16963cd
49 changed files with 9307 additions and 438 deletions

View File

@@ -17,6 +17,7 @@ from codexlens.entities import SearchResult, Symbol
from codexlens.storage.registry import RegistryStore, DirMapping
from codexlens.storage.dir_index import DirIndexStore, SubdirLink
from codexlens.storage.path_mapper import PathMapper
from codexlens.storage.sqlite_store import SQLiteStore
@dataclass
@@ -278,6 +279,108 @@ class ChainSearchEngine:
index_paths, name, kind, options.total_limit
)
def search_callers(self, target_symbol: str,
source_path: Path,
options: Optional[SearchOptions] = None) -> List[Dict[str, Any]]:
"""Find all callers of a given symbol across directory hierarchy.
Args:
target_symbol: Name of the symbol to find callers for
source_path: Starting directory path
options: Search configuration (uses defaults if None)
Returns:
List of relationship dicts with caller information
Examples:
>>> engine = ChainSearchEngine(registry, mapper)
>>> callers = engine.search_callers("my_function", Path("D:/project"))
>>> for caller in callers:
... print(f"{caller['source_symbol']} in {caller['source_file']}:{caller['source_line']}")
"""
options = options or SearchOptions()
start_index = self._find_start_index(source_path)
if not start_index:
self.logger.warning(f"No index found for {source_path}")
return []
index_paths = self._collect_index_paths(start_index, options.depth)
if not index_paths:
return []
return self._search_callers_parallel(
index_paths, target_symbol, options.total_limit
)
def search_callees(self, source_symbol: str,
source_path: Path,
options: Optional[SearchOptions] = None) -> List[Dict[str, Any]]:
"""Find all callees (what a symbol calls) across directory hierarchy.
Args:
source_symbol: Name of the symbol to find callees for
source_path: Starting directory path
options: Search configuration (uses defaults if None)
Returns:
List of relationship dicts with callee information
Examples:
>>> engine = ChainSearchEngine(registry, mapper)
>>> callees = engine.search_callees("MyClass.method", Path("D:/project"))
>>> for callee in callees:
... print(f"Calls {callee['target_symbol']} at line {callee['source_line']}")
"""
options = options or SearchOptions()
start_index = self._find_start_index(source_path)
if not start_index:
self.logger.warning(f"No index found for {source_path}")
return []
index_paths = self._collect_index_paths(start_index, options.depth)
if not index_paths:
return []
return self._search_callees_parallel(
index_paths, source_symbol, options.total_limit
)
def search_inheritance(self, class_name: str,
source_path: Path,
options: Optional[SearchOptions] = None) -> List[Dict[str, Any]]:
"""Find inheritance relationships for a class across directory hierarchy.
Args:
class_name: Name of the class to find inheritance for
source_path: Starting directory path
options: Search configuration (uses defaults if None)
Returns:
List of relationship dicts with inheritance information
Examples:
>>> engine = ChainSearchEngine(registry, mapper)
>>> inheritance = engine.search_inheritance("BaseClass", Path("D:/project"))
>>> for rel in inheritance:
... print(f"{rel['source_symbol']} extends {rel['target_symbol']}")
"""
options = options or SearchOptions()
start_index = self._find_start_index(source_path)
if not start_index:
self.logger.warning(f"No index found for {source_path}")
return []
index_paths = self._collect_index_paths(start_index, options.depth)
if not index_paths:
return []
return self._search_inheritance_parallel(
index_paths, class_name, options.total_limit
)
# === Internal Methods ===
def _find_start_index(self, source_path: Path) -> Optional[Path]:
@@ -553,6 +656,252 @@ class ChainSearchEngine:
self.logger.debug(f"Symbol search error in {index_path}: {exc}")
return []
def _search_callers_parallel(self, index_paths: List[Path],
target_symbol: str,
limit: int) -> List[Dict[str, Any]]:
"""Search for callers across multiple indexes in parallel.
Args:
index_paths: List of _index.db paths to search
target_symbol: Target symbol name
limit: Total result limit
Returns:
Deduplicated list of caller relationships
"""
all_callers = []
executor = self._get_executor()
future_to_path = {
executor.submit(
self._search_callers_single,
idx_path,
target_symbol
): idx_path
for idx_path in index_paths
}
for future in as_completed(future_to_path):
try:
callers = future.result()
all_callers.extend(callers)
except Exception as exc:
self.logger.error(f"Caller search failed: {exc}")
# Deduplicate by (source_file, source_line)
seen = set()
unique_callers = []
for caller in all_callers:
key = (caller.get("source_file"), caller.get("source_line"))
if key not in seen:
seen.add(key)
unique_callers.append(caller)
# Sort by source file and line
unique_callers.sort(key=lambda c: (c.get("source_file", ""), c.get("source_line", 0)))
return unique_callers[:limit]
def _search_callers_single(self, index_path: Path,
target_symbol: str) -> List[Dict[str, Any]]:
"""Search for callers in a single index.
Args:
index_path: Path to _index.db file
target_symbol: Target symbol name
Returns:
List of caller relationship dicts (empty on error)
"""
try:
with SQLiteStore(index_path) as store:
return store.query_relationships_by_target(target_symbol)
except Exception as exc:
self.logger.debug(f"Caller search error in {index_path}: {exc}")
return []
def _search_callees_parallel(self, index_paths: List[Path],
source_symbol: str,
limit: int) -> List[Dict[str, Any]]:
"""Search for callees across multiple indexes in parallel.
Args:
index_paths: List of _index.db paths to search
source_symbol: Source symbol name
limit: Total result limit
Returns:
Deduplicated list of callee relationships
"""
all_callees = []
executor = self._get_executor()
future_to_path = {
executor.submit(
self._search_callees_single,
idx_path,
source_symbol
): idx_path
for idx_path in index_paths
}
for future in as_completed(future_to_path):
try:
callees = future.result()
all_callees.extend(callees)
except Exception as exc:
self.logger.error(f"Callee search failed: {exc}")
# Deduplicate by (target_symbol, source_line)
seen = set()
unique_callees = []
for callee in all_callees:
key = (callee.get("target_symbol"), callee.get("source_line"))
if key not in seen:
seen.add(key)
unique_callees.append(callee)
# Sort by source line
unique_callees.sort(key=lambda c: c.get("source_line", 0))
return unique_callees[:limit]
def _search_callees_single(self, index_path: Path,
source_symbol: str) -> List[Dict[str, Any]]:
"""Search for callees in a single index.
Args:
index_path: Path to _index.db file
source_symbol: Source symbol name
Returns:
List of callee relationship dicts (empty on error)
"""
try:
# Use the connection pool via SQLiteStore
with SQLiteStore(index_path) as store:
# Search across all files containing the symbol
# Get all files that have this symbol
conn = store._get_connection()
file_rows = conn.execute(
"""
SELECT DISTINCT f.path
FROM symbols s
JOIN files f ON s.file_id = f.id
WHERE s.name = ?
""",
(source_symbol,)
).fetchall()
# Collect results from all matching files
all_results = []
for file_row in file_rows:
file_path = file_row["path"]
results = store.query_relationships_by_source(source_symbol, file_path)
all_results.extend(results)
return all_results
except Exception as exc:
self.logger.debug(f"Callee search error in {index_path}: {exc}")
return []
def _search_inheritance_parallel(self, index_paths: List[Path],
class_name: str,
limit: int) -> List[Dict[str, Any]]:
"""Search for inheritance relationships across multiple indexes in parallel.
Args:
index_paths: List of _index.db paths to search
class_name: Class name to search for
limit: Total result limit
Returns:
Deduplicated list of inheritance relationships
"""
all_inheritance = []
executor = self._get_executor()
future_to_path = {
executor.submit(
self._search_inheritance_single,
idx_path,
class_name
): idx_path
for idx_path in index_paths
}
for future in as_completed(future_to_path):
try:
inheritance = future.result()
all_inheritance.extend(inheritance)
except Exception as exc:
self.logger.error(f"Inheritance search failed: {exc}")
# Deduplicate by (source_symbol, target_symbol)
seen = set()
unique_inheritance = []
for rel in all_inheritance:
key = (rel.get("source_symbol"), rel.get("target_symbol"))
if key not in seen:
seen.add(key)
unique_inheritance.append(rel)
# Sort by source file
unique_inheritance.sort(key=lambda r: r.get("source_file", ""))
return unique_inheritance[:limit]
def _search_inheritance_single(self, index_path: Path,
class_name: str) -> List[Dict[str, Any]]:
"""Search for inheritance relationships in a single index.
Args:
index_path: Path to _index.db file
class_name: Class name to search for
Returns:
List of inheritance relationship dicts (empty on error)
"""
try:
with SQLiteStore(index_path) as store:
conn = store._get_connection()
# Search both as base class (target) and derived class (source)
rows = conn.execute(
"""
SELECT
s.name AS source_symbol,
r.target_qualified_name,
r.relationship_type,
r.source_line,
f.path AS source_file,
r.target_file
FROM code_relationships r
JOIN symbols s ON r.source_symbol_id = s.id
JOIN files f ON s.file_id = f.id
WHERE (s.name = ? OR r.target_qualified_name LIKE ?)
AND r.relationship_type = 'inherits'
ORDER BY f.path, r.source_line
LIMIT 100
""",
(class_name, f"%{class_name}%")
).fetchall()
return [
{
"source_symbol": row["source_symbol"],
"target_symbol": row["target_qualified_name"],
"relationship_type": row["relationship_type"],
"source_line": row["source_line"],
"source_file": row["source_file"],
"target_file": row["target_file"],
}
for row in rows
]
except Exception as exc:
self.logger.debug(f"Inheritance search error in {index_path}: {exc}")
return []
# === Convenience Functions ===