feat: Enhance CodexLens indexing and search capabilities with new CLI options and improved error handling

This commit is contained in:
catlog22
2025-12-19 15:10:37 +08:00
parent c7ced2bfbb
commit 2f0cce0089
18 changed files with 480 additions and 128 deletions

View File

@@ -1226,17 +1226,14 @@ class DirIndexStore:
query: str,
limit: int = 20,
enhance_query: bool = False,
return_full_content: bool = True,
return_full_content: bool = False,
context_lines: int = 10,
) -> List[SearchResult]:
"""Full-text search in current directory files with complete method blocks.
"""Full-text search in current directory files.
Uses files_fts_exact (unicode61 tokenizer) for exact token matching.
For fuzzy/substring search, use search_fts_fuzzy() instead.
Returns complete code blocks (functions/methods/classes) containing the match,
rather than just a short snippet.
Best Practice (from industry analysis of Codanna/Code-Index-MCP):
- Default: Respects exact user input without modification
- Users can manually add wildcards (e.g., "loadPack*") for prefix matching
@@ -1248,11 +1245,12 @@ class DirIndexStore:
limit: Maximum results to return
enhance_query: If True, automatically add prefix wildcards for simple queries.
Default False to respect exact user input.
return_full_content: If True, include full code block in content field
return_full_content: If True, include full code block in content field.
Default False for fast location-only results.
context_lines: Lines of context when no symbol contains the match
Returns:
List of SearchResult objects with complete code blocks
List of SearchResult objects (location-only by default, with content if requested)
Raises:
StorageError: If FTS search fails
@@ -1263,8 +1261,39 @@ class DirIndexStore:
with self._lock:
conn = self._get_connection()
# Fast path: location-only results (no content processing)
if not return_full_content:
try:
rows = conn.execute(
"""
SELECT rowid, full_path, bm25(files_fts_exact) AS rank,
snippet(files_fts_exact, 2, '', '', '...', 30) AS excerpt
FROM files_fts_exact
WHERE files_fts_exact MATCH ?
ORDER BY rank
LIMIT ?
""",
(final_query, limit),
).fetchall()
except sqlite3.DatabaseError as exc:
raise StorageError(f"FTS search failed: {exc}") from exc
results: List[SearchResult] = []
for row in rows:
rank = float(row["rank"]) if row["rank"] is not None else 0.0
score = abs(rank) if rank < 0 else 0.0
results.append(
SearchResult(
path=row["full_path"],
score=score,
excerpt=row["excerpt"],
)
)
return results
# Full content path: fetch content and find containing symbols
try:
# Join with files table to get content and file_id
rows = conn.execute(
"""
SELECT f.id AS file_id, f.full_path, f.content,
@@ -1319,7 +1348,7 @@ class DirIndexStore:
path=file_path,
score=score,
excerpt=excerpt,
content=block_content if return_full_content else None,
content=block_content,
start_line=start_line,
end_line=end_line,
symbol_name=symbol_name,
@@ -1332,31 +1361,59 @@ class DirIndexStore:
self,
query: str,
limit: int = 20,
return_full_content: bool = True,
return_full_content: bool = False,
context_lines: int = 10,
) -> List[SearchResult]:
"""Full-text search using exact token matching with complete method blocks.
Returns complete code blocks (functions/methods/classes) containing the match,
rather than just a short snippet. If no symbol contains the match, returns
context lines around the match.
"""Full-text search using exact token matching.
Args:
query: FTS5 query string
limit: Maximum results to return
return_full_content: If True, include full code block in content field
return_full_content: If True, include full code block in content field.
Default False for fast location-only results.
context_lines: Lines of context when no symbol contains the match
Returns:
List of SearchResult objects with complete code blocks
List of SearchResult objects (location-only by default, with content if requested)
Raises:
StorageError: If FTS search fails
"""
with self._lock:
conn = self._get_connection()
# Fast path: location-only results (no content processing)
if not return_full_content:
try:
rows = conn.execute(
"""
SELECT rowid, full_path, bm25(files_fts_exact) AS rank,
snippet(files_fts_exact, 2, '', '', '...', 30) AS excerpt
FROM files_fts_exact
WHERE files_fts_exact MATCH ?
ORDER BY rank
LIMIT ?
""",
(query, limit),
).fetchall()
except sqlite3.DatabaseError as exc:
raise StorageError(f"FTS exact search failed: {exc}") from exc
results: List[SearchResult] = []
for row in rows:
rank = float(row["rank"]) if row["rank"] is not None else 0.0
score = abs(rank) if rank < 0 else 0.0
results.append(
SearchResult(
path=row["full_path"],
score=score,
excerpt=row["excerpt"],
)
)
return results
# Full content path: fetch content and find containing symbols
try:
# Join with files table to get content and file_id
rows = conn.execute(
"""
SELECT f.id AS file_id, f.full_path, f.content,
@@ -1411,7 +1468,7 @@ class DirIndexStore:
path=file_path,
score=score,
excerpt=excerpt,
content=block_content if return_full_content else None,
content=block_content,
start_line=start_line,
end_line=end_line,
symbol_name=symbol_name,
@@ -1424,31 +1481,59 @@ class DirIndexStore:
self,
query: str,
limit: int = 20,
return_full_content: bool = True,
return_full_content: bool = False,
context_lines: int = 10,
) -> List[SearchResult]:
"""Full-text search using fuzzy/substring matching with complete method blocks.
Returns complete code blocks (functions/methods/classes) containing the match,
rather than just a short snippet. If no symbol contains the match, returns
context lines around the match.
"""Full-text search using fuzzy/substring matching.
Args:
query: FTS5 query string
limit: Maximum results to return
return_full_content: If True, include full code block in content field
return_full_content: If True, include full code block in content field.
Default False for fast location-only results.
context_lines: Lines of context when no symbol contains the match
Returns:
List of SearchResult objects with complete code blocks
List of SearchResult objects (location-only by default, with content if requested)
Raises:
StorageError: If FTS search fails
"""
with self._lock:
conn = self._get_connection()
# Fast path: location-only results (no content processing)
if not return_full_content:
try:
rows = conn.execute(
"""
SELECT rowid, full_path, bm25(files_fts_fuzzy) AS rank,
snippet(files_fts_fuzzy, 2, '', '', '...', 30) AS excerpt
FROM files_fts_fuzzy
WHERE files_fts_fuzzy MATCH ?
ORDER BY rank
LIMIT ?
""",
(query, limit),
).fetchall()
except sqlite3.DatabaseError as exc:
raise StorageError(f"FTS fuzzy search failed: {exc}") from exc
results: List[SearchResult] = []
for row in rows:
rank = float(row["rank"]) if row["rank"] is not None else 0.0
score = abs(rank) if rank < 0 else 0.0
results.append(
SearchResult(
path=row["full_path"],
score=score,
excerpt=row["excerpt"],
)
)
return results
# Full content path: fetch content and find containing symbols
try:
# Join with files table to get content and file_id
rows = conn.execute(
"""
SELECT f.id AS file_id, f.full_path, f.content,
@@ -1503,7 +1588,7 @@ class DirIndexStore:
path=file_path,
score=score,
excerpt=excerpt,
content=block_content if return_full_content else None,
content=block_content,
start_line=start_line,
end_line=end_line,
symbol_name=symbol_name,

View File

@@ -527,19 +527,13 @@ class IndexTreeBuilder:
# Extract and store code relationships for graph visualization
if language_id in {"python", "javascript", "typescript"}:
try:
graph_analyzer = GraphAnalyzer(language_id)
if graph_analyzer.is_available():
relationships = graph_analyzer.analyze_with_symbols(
text, file_path, indexed_file.symbols
)
if relationships:
store.add_relationships(file_path, relationships)
except Exception as rel_exc:
self.logger.debug(
"Failed to extract relationships from %s: %s",
file_path, rel_exc
graph_analyzer = GraphAnalyzer(language_id)
if graph_analyzer.is_available():
relationships = graph_analyzer.analyze_with_symbols(
text, file_path, indexed_file.symbols
)
if relationships:
store.add_relationships(file_path, relationships)
files_count += 1
symbols_count += len(indexed_file.symbols)
@@ -750,16 +744,13 @@ def _build_dir_worker(args: tuple) -> DirBuildResult:
# Extract and store code relationships for graph visualization
if language_id in {"python", "javascript", "typescript"}:
try:
graph_analyzer = GraphAnalyzer(language_id)
if graph_analyzer.is_available():
relationships = graph_analyzer.analyze_with_symbols(
text, item, indexed_file.symbols
)
if relationships:
store.add_relationships(item, relationships)
except Exception:
pass # Silently skip relationship extraction errors
graph_analyzer = GraphAnalyzer(language_id)
if graph_analyzer.is_available():
relationships = graph_analyzer.analyze_with_symbols(
text, item, indexed_file.symbols
)
if relationships:
store.add_relationships(item, relationships)
files_count += 1
symbols_count += len(indexed_file.symbols)

View File

@@ -509,13 +509,13 @@ class SQLiteStore:
r.target_qualified_name,
r.relationship_type,
r.source_line,
f.path AS source_file,
f.full_path AS source_file,
r.target_file
FROM code_relationships r
JOIN symbols s ON r.source_symbol_id = s.id
JOIN files f ON s.file_id = f.id
WHERE r.target_qualified_name = ?
ORDER BY f.path, r.source_line
ORDER BY f.full_path, r.source_line
LIMIT ?
""",
(target_name, limit)