feat: Enhance CodexLens indexing and search capabilities with new CLI options and improved error handling

This commit is contained in:
catlog22
2025-12-19 15:10:37 +08:00
parent c7ced2bfbb
commit 2f0cce0089
18 changed files with 480 additions and 128 deletions

View File

@@ -831,13 +831,13 @@ class ChainSearchEngine:
r.target_qualified_name AS target_symbol,
r.relationship_type,
r.source_line,
f.path AS source_file,
f.full_path AS source_file,
r.target_file
FROM code_relationships r
JOIN symbols s ON r.source_symbol_id = s.id
JOIN files f ON s.file_id = f.id
WHERE s.name = ? AND r.relationship_type = 'call'
ORDER BY f.path, r.source_line
ORDER BY f.full_path, r.source_line
LIMIT 100
""",
(source_symbol,)
@@ -928,7 +928,7 @@ class ChainSearchEngine:
r.target_qualified_name,
r.relationship_type,
r.source_line,
f.path AS source_file,
f.full_path AS source_file,
r.target_file
FROM code_relationships r
JOIN symbols s ON r.source_symbol_id = s.id
@@ -940,7 +940,7 @@ class ChainSearchEngine:
r.target_qualified_name,
r.relationship_type,
r.source_line,
f.path AS source_file,
f.full_path AS source_file,
r.target_file
FROM code_relationships r
JOIN symbols s ON r.source_symbol_id = s.id

View File

@@ -434,20 +434,31 @@ class GraphAnalyzer:
def _find_enclosing_symbol(self, node: TreeSitterNode, symbols: List[dict]) -> Optional[str]:
"""Find the enclosing function/method/class for a node.
Returns fully qualified name (e.g., "MyClass.my_method") by traversing up
the AST tree and collecting parent class/function names.
Args:
node: AST node to find enclosure for
symbols: List of defined symbols
Returns:
Name of enclosing symbol, or None if at module level
Fully qualified name of enclosing symbol, or None if at module level
"""
# Walk up the tree to find enclosing symbol
# Walk up the tree to find all enclosing symbols
enclosing_names = []
parent = node.parent
while parent is not None:
for symbol in symbols:
if symbol["node"] == parent:
return symbol["name"]
# Prepend to maintain order (innermost to outermost)
enclosing_names.insert(0, symbol["name"])
break
parent = parent.parent
# Return fully qualified name or None if at module level
if enclosing_names:
return ".".join(enclosing_names)
return None
def _extract_call_target(self, source_bytes: bytes, node: TreeSitterNode) -> Optional[str]:

View File

@@ -1226,17 +1226,14 @@ class DirIndexStore:
query: str,
limit: int = 20,
enhance_query: bool = False,
return_full_content: bool = True,
return_full_content: bool = False,
context_lines: int = 10,
) -> List[SearchResult]:
"""Full-text search in current directory files with complete method blocks.
"""Full-text search in current directory files.
Uses files_fts_exact (unicode61 tokenizer) for exact token matching.
For fuzzy/substring search, use search_fts_fuzzy() instead.
Returns complete code blocks (functions/methods/classes) containing the match,
rather than just a short snippet.
Best Practice (from industry analysis of Codanna/Code-Index-MCP):
- Default: Respects exact user input without modification
- Users can manually add wildcards (e.g., "loadPack*") for prefix matching
@@ -1248,11 +1245,12 @@ class DirIndexStore:
limit: Maximum results to return
enhance_query: If True, automatically add prefix wildcards for simple queries.
Default False to respect exact user input.
return_full_content: If True, include full code block in content field
return_full_content: If True, include full code block in content field.
Default False for fast location-only results.
context_lines: Lines of context when no symbol contains the match
Returns:
List of SearchResult objects with complete code blocks
List of SearchResult objects (location-only by default, with content if requested)
Raises:
StorageError: If FTS search fails
@@ -1263,8 +1261,39 @@ class DirIndexStore:
with self._lock:
conn = self._get_connection()
# Fast path: location-only results (no content processing)
if not return_full_content:
try:
rows = conn.execute(
"""
SELECT rowid, full_path, bm25(files_fts_exact) AS rank,
snippet(files_fts_exact, 2, '', '', '...', 30) AS excerpt
FROM files_fts_exact
WHERE files_fts_exact MATCH ?
ORDER BY rank
LIMIT ?
""",
(final_query, limit),
).fetchall()
except sqlite3.DatabaseError as exc:
raise StorageError(f"FTS search failed: {exc}") from exc
results: List[SearchResult] = []
for row in rows:
rank = float(row["rank"]) if row["rank"] is not None else 0.0
score = abs(rank) if rank < 0 else 0.0
results.append(
SearchResult(
path=row["full_path"],
score=score,
excerpt=row["excerpt"],
)
)
return results
# Full content path: fetch content and find containing symbols
try:
# Join with files table to get content and file_id
rows = conn.execute(
"""
SELECT f.id AS file_id, f.full_path, f.content,
@@ -1319,7 +1348,7 @@ class DirIndexStore:
path=file_path,
score=score,
excerpt=excerpt,
content=block_content if return_full_content else None,
content=block_content,
start_line=start_line,
end_line=end_line,
symbol_name=symbol_name,
@@ -1332,31 +1361,59 @@ class DirIndexStore:
self,
query: str,
limit: int = 20,
return_full_content: bool = True,
return_full_content: bool = False,
context_lines: int = 10,
) -> List[SearchResult]:
"""Full-text search using exact token matching with complete method blocks.
Returns complete code blocks (functions/methods/classes) containing the match,
rather than just a short snippet. If no symbol contains the match, returns
context lines around the match.
"""Full-text search using exact token matching.
Args:
query: FTS5 query string
limit: Maximum results to return
return_full_content: If True, include full code block in content field
return_full_content: If True, include full code block in content field.
Default False for fast location-only results.
context_lines: Lines of context when no symbol contains the match
Returns:
List of SearchResult objects with complete code blocks
List of SearchResult objects (location-only by default, with content if requested)
Raises:
StorageError: If FTS search fails
"""
with self._lock:
conn = self._get_connection()
# Fast path: location-only results (no content processing)
if not return_full_content:
try:
rows = conn.execute(
"""
SELECT rowid, full_path, bm25(files_fts_exact) AS rank,
snippet(files_fts_exact, 2, '', '', '...', 30) AS excerpt
FROM files_fts_exact
WHERE files_fts_exact MATCH ?
ORDER BY rank
LIMIT ?
""",
(query, limit),
).fetchall()
except sqlite3.DatabaseError as exc:
raise StorageError(f"FTS exact search failed: {exc}") from exc
results: List[SearchResult] = []
for row in rows:
rank = float(row["rank"]) if row["rank"] is not None else 0.0
score = abs(rank) if rank < 0 else 0.0
results.append(
SearchResult(
path=row["full_path"],
score=score,
excerpt=row["excerpt"],
)
)
return results
# Full content path: fetch content and find containing symbols
try:
# Join with files table to get content and file_id
rows = conn.execute(
"""
SELECT f.id AS file_id, f.full_path, f.content,
@@ -1411,7 +1468,7 @@ class DirIndexStore:
path=file_path,
score=score,
excerpt=excerpt,
content=block_content if return_full_content else None,
content=block_content,
start_line=start_line,
end_line=end_line,
symbol_name=symbol_name,
@@ -1424,31 +1481,59 @@ class DirIndexStore:
self,
query: str,
limit: int = 20,
return_full_content: bool = True,
return_full_content: bool = False,
context_lines: int = 10,
) -> List[SearchResult]:
"""Full-text search using fuzzy/substring matching with complete method blocks.
Returns complete code blocks (functions/methods/classes) containing the match,
rather than just a short snippet. If no symbol contains the match, returns
context lines around the match.
"""Full-text search using fuzzy/substring matching.
Args:
query: FTS5 query string
limit: Maximum results to return
return_full_content: If True, include full code block in content field
return_full_content: If True, include full code block in content field.
Default False for fast location-only results.
context_lines: Lines of context when no symbol contains the match
Returns:
List of SearchResult objects with complete code blocks
List of SearchResult objects (location-only by default, with content if requested)
Raises:
StorageError: If FTS search fails
"""
with self._lock:
conn = self._get_connection()
# Fast path: location-only results (no content processing)
if not return_full_content:
try:
rows = conn.execute(
"""
SELECT rowid, full_path, bm25(files_fts_fuzzy) AS rank,
snippet(files_fts_fuzzy, 2, '', '', '...', 30) AS excerpt
FROM files_fts_fuzzy
WHERE files_fts_fuzzy MATCH ?
ORDER BY rank
LIMIT ?
""",
(query, limit),
).fetchall()
except sqlite3.DatabaseError as exc:
raise StorageError(f"FTS fuzzy search failed: {exc}") from exc
results: List[SearchResult] = []
for row in rows:
rank = float(row["rank"]) if row["rank"] is not None else 0.0
score = abs(rank) if rank < 0 else 0.0
results.append(
SearchResult(
path=row["full_path"],
score=score,
excerpt=row["excerpt"],
)
)
return results
# Full content path: fetch content and find containing symbols
try:
# Join with files table to get content and file_id
rows = conn.execute(
"""
SELECT f.id AS file_id, f.full_path, f.content,
@@ -1503,7 +1588,7 @@ class DirIndexStore:
path=file_path,
score=score,
excerpt=excerpt,
content=block_content if return_full_content else None,
content=block_content,
start_line=start_line,
end_line=end_line,
symbol_name=symbol_name,

View File

@@ -527,19 +527,13 @@ class IndexTreeBuilder:
# Extract and store code relationships for graph visualization
if language_id in {"python", "javascript", "typescript"}:
try:
graph_analyzer = GraphAnalyzer(language_id)
if graph_analyzer.is_available():
relationships = graph_analyzer.analyze_with_symbols(
text, file_path, indexed_file.symbols
)
if relationships:
store.add_relationships(file_path, relationships)
except Exception as rel_exc:
self.logger.debug(
"Failed to extract relationships from %s: %s",
file_path, rel_exc
graph_analyzer = GraphAnalyzer(language_id)
if graph_analyzer.is_available():
relationships = graph_analyzer.analyze_with_symbols(
text, file_path, indexed_file.symbols
)
if relationships:
store.add_relationships(file_path, relationships)
files_count += 1
symbols_count += len(indexed_file.symbols)
@@ -750,16 +744,13 @@ def _build_dir_worker(args: tuple) -> DirBuildResult:
# Extract and store code relationships for graph visualization
if language_id in {"python", "javascript", "typescript"}:
try:
graph_analyzer = GraphAnalyzer(language_id)
if graph_analyzer.is_available():
relationships = graph_analyzer.analyze_with_symbols(
text, item, indexed_file.symbols
)
if relationships:
store.add_relationships(item, relationships)
except Exception:
pass # Silently skip relationship extraction errors
graph_analyzer = GraphAnalyzer(language_id)
if graph_analyzer.is_available():
relationships = graph_analyzer.analyze_with_symbols(
text, item, indexed_file.symbols
)
if relationships:
store.add_relationships(item, relationships)
files_count += 1
symbols_count += len(indexed_file.symbols)

View File

@@ -509,13 +509,13 @@ class SQLiteStore:
r.target_qualified_name,
r.relationship_type,
r.source_line,
f.path AS source_file,
f.full_path AS source_file,
r.target_file
FROM code_relationships r
JOIN symbols s ON r.source_symbol_id = s.id
JOIN files f ON s.file_id = f.id
WHERE r.target_qualified_name = ?
ORDER BY f.path, r.source_line
ORDER BY f.full_path, r.source_line
LIMIT ?
""",
(target_name, limit)

View File

@@ -78,10 +78,10 @@ def outer():
analyzer = GraphAnalyzer("python")
relationships = analyzer.analyze_file(code, Path("test.py"))
# Should find inner -> inner_helper and outer -> inner
# Should find outer.inner -> inner_helper and outer -> inner (with fully qualified names)
assert len(relationships) == 2
call_pairs = {(rel.source_symbol, rel.target_symbol) for rel in relationships}
assert ("inner", "inner_helper") in call_pairs
assert ("outer.inner", "inner_helper") in call_pairs
assert ("outer", "inner") in call_pairs
def test_method_call_in_class(self):
@@ -97,10 +97,10 @@ def outer():
analyzer = GraphAnalyzer("python")
relationships = analyzer.analyze_file(code, Path("test.py"))
# Should find compute -> add
# Should find Calculator.compute -> add (with fully qualified source)
assert len(relationships) == 1
rel = relationships[0]
assert rel.source_symbol == "compute"
assert rel.source_symbol == "Calculator.compute"
assert rel.target_symbol == "add"
def test_module_level_call(self):
@@ -171,11 +171,11 @@ main()
# Extract call pairs
call_pairs = {(rel.source_symbol, rel.target_symbol) for rel in relationships}
# Expected relationships
# Expected relationships (with fully qualified source symbols for methods)
expected = {
("load", "read_file"),
("process", "validate"),
("process", "transform"),
("DataProcessor.load", "read_file"),
("DataProcessor.process", "validate"),
("DataProcessor.process", "transform"),
("main", "DataProcessor"),
("main", "load"),
("main", "process"),
@@ -259,10 +259,10 @@ const main = () => {
analyzer = GraphAnalyzer("javascript")
relationships = analyzer.analyze_file(code, Path("test.js"))
# Should find compute -> add
# Should find Calculator.compute -> add (with fully qualified source)
assert len(relationships) == 1
rel = relationships[0]
assert rel.source_symbol == "compute"
assert rel.source_symbol == "Calculator.compute"
assert rel.target_symbol == "add"
def test_complex_javascript_file(self):
@@ -304,11 +304,12 @@ main();
# Extract call pairs
call_pairs = {(rel.source_symbol, rel.target_symbol) for rel in relationships}
# Expected relationships (note: constructor calls like "new DataProcessor()" are not tracked)
# Expected relationships (with fully qualified source symbols for methods)
# Note: constructor calls like "new DataProcessor()" are not tracked
expected = {
("load", "readFile"),
("process", "validate"),
("process", "transform"),
("DataProcessor.load", "readFile"),
("DataProcessor.process", "validate"),
("DataProcessor.process", "transform"),
("main", "load"),
("main", "process"),
("<module>", "main"),