feat: Enhance CodexLens indexing and search capabilities with new CLI options and improved error handling

2026-02-11 02:33:51 +08:00 · 2025-12-19 15:10:37 +08:00
parent c7ced2bfbb
commit 2f0cce0089
18 changed files with 480 additions and 128 deletions
--- a/codex-lens/src/codexlens/storage/dir_index.py
+++ b/codex-lens/src/codexlens/storage/dir_index.py
@@ -1226,17 +1226,14 @@ class DirIndexStore:
        query: str,
        limit: int = 20,
        enhance_query: bool = False,
-        return_full_content: bool = True,
+        return_full_content: bool = False,
        context_lines: int = 10,
    ) -> List[SearchResult]:
-        """Full-text search in current directory files with complete method blocks.
+        """Full-text search in current directory files.

        Uses files_fts_exact (unicode61 tokenizer) for exact token matching.
        For fuzzy/substring search, use search_fts_fuzzy() instead.

-        Returns complete code blocks (functions/methods/classes) containing the match,
-        rather than just a short snippet.
-
        Best Practice (from industry analysis of Codanna/Code-Index-MCP):
        - Default: Respects exact user input without modification
        - Users can manually add wildcards (e.g., "loadPack*") for prefix matching
@@ -1248,11 +1245,12 @@ class DirIndexStore:
            limit: Maximum results to return
            enhance_query: If True, automatically add prefix wildcards for simple queries.
                          Default False to respect exact user input.
-            return_full_content: If True, include full code block in content field
+            return_full_content: If True, include full code block in content field.
+                                Default False for fast location-only results.
            context_lines: Lines of context when no symbol contains the match

        Returns:
-            List of SearchResult objects with complete code blocks
+            List of SearchResult objects (location-only by default, with content if requested)

        Raises:
            StorageError: If FTS search fails
@@ -1263,8 +1261,39 @@ class DirIndexStore:

        with self._lock:
            conn = self._get_connection()
+
+            # Fast path: location-only results (no content processing)
+            if not return_full_content:
+                try:
+                    rows = conn.execute(
+                        """
+                        SELECT rowid, full_path, bm25(files_fts_exact) AS rank,
+                               snippet(files_fts_exact, 2, '', '', '...', 30) AS excerpt
+                        FROM files_fts_exact
+                        WHERE files_fts_exact MATCH ?
+                        ORDER BY rank
+                        LIMIT ?
+                        """,
+                        (final_query, limit),
+                    ).fetchall()
+                except sqlite3.DatabaseError as exc:
+                    raise StorageError(f"FTS search failed: {exc}") from exc
+
+                results: List[SearchResult] = []
+                for row in rows:
+                    rank = float(row["rank"]) if row["rank"] is not None else 0.0
+                    score = abs(rank) if rank < 0 else 0.0
+                    results.append(
+                        SearchResult(
+                            path=row["full_path"],
+                            score=score,
+                            excerpt=row["excerpt"],
+                        )
+                    )
+                return results
+
+            # Full content path: fetch content and find containing symbols
            try:
-                # Join with files table to get content and file_id
                rows = conn.execute(
                    """
                    SELECT f.id AS file_id, f.full_path, f.content,
@@ -1319,7 +1348,7 @@ class DirIndexStore:
                        path=file_path,
                        score=score,
                        excerpt=excerpt,
-                        content=block_content if return_full_content else None,
+                        content=block_content,
                        start_line=start_line,
                        end_line=end_line,
                        symbol_name=symbol_name,
@@ -1332,31 +1361,59 @@ class DirIndexStore:
        self,
        query: str,
        limit: int = 20,
-        return_full_content: bool = True,
+        return_full_content: bool = False,
        context_lines: int = 10,
    ) -> List[SearchResult]:
-        """Full-text search using exact token matching with complete method blocks.
-
-        Returns complete code blocks (functions/methods/classes) containing the match,
-        rather than just a short snippet. If no symbol contains the match, returns
-        context lines around the match.
+        """Full-text search using exact token matching.

        Args:
            query: FTS5 query string
            limit: Maximum results to return
-            return_full_content: If True, include full code block in content field
+            return_full_content: If True, include full code block in content field.
+                                Default False for fast location-only results.
            context_lines: Lines of context when no symbol contains the match

        Returns:
-            List of SearchResult objects with complete code blocks
+            List of SearchResult objects (location-only by default, with content if requested)

        Raises:
            StorageError: If FTS search fails
        """
        with self._lock:
            conn = self._get_connection()
+
+            # Fast path: location-only results (no content processing)
+            if not return_full_content:
+                try:
+                    rows = conn.execute(
+                        """
+                        SELECT rowid, full_path, bm25(files_fts_exact) AS rank,
+                               snippet(files_fts_exact, 2, '', '', '...', 30) AS excerpt
+                        FROM files_fts_exact
+                        WHERE files_fts_exact MATCH ?
+                        ORDER BY rank
+                        LIMIT ?
+                        """,
+                        (query, limit),
+                    ).fetchall()
+                except sqlite3.DatabaseError as exc:
+                    raise StorageError(f"FTS exact search failed: {exc}") from exc
+
+                results: List[SearchResult] = []
+                for row in rows:
+                    rank = float(row["rank"]) if row["rank"] is not None else 0.0
+                    score = abs(rank) if rank < 0 else 0.0
+                    results.append(
+                        SearchResult(
+                            path=row["full_path"],
+                            score=score,
+                            excerpt=row["excerpt"],
+                        )
+                    )
+                return results
+
+            # Full content path: fetch content and find containing symbols
            try:
-                # Join with files table to get content and file_id
                rows = conn.execute(
                    """
                    SELECT f.id AS file_id, f.full_path, f.content,
@@ -1411,7 +1468,7 @@ class DirIndexStore:
                        path=file_path,
                        score=score,
                        excerpt=excerpt,
-                        content=block_content if return_full_content else None,
+                        content=block_content,
                        start_line=start_line,
                        end_line=end_line,
                        symbol_name=symbol_name,
@@ -1424,31 +1481,59 @@ class DirIndexStore:
        self,
        query: str,
        limit: int = 20,
-        return_full_content: bool = True,
+        return_full_content: bool = False,
        context_lines: int = 10,
    ) -> List[SearchResult]:
-        """Full-text search using fuzzy/substring matching with complete method blocks.
-
-        Returns complete code blocks (functions/methods/classes) containing the match,
-        rather than just a short snippet. If no symbol contains the match, returns
-        context lines around the match.
+        """Full-text search using fuzzy/substring matching.

        Args:
            query: FTS5 query string
            limit: Maximum results to return
-            return_full_content: If True, include full code block in content field
+            return_full_content: If True, include full code block in content field.
+                                Default False for fast location-only results.
            context_lines: Lines of context when no symbol contains the match

        Returns:
-            List of SearchResult objects with complete code blocks
+            List of SearchResult objects (location-only by default, with content if requested)

        Raises:
            StorageError: If FTS search fails
        """
        with self._lock:
            conn = self._get_connection()
+
+            # Fast path: location-only results (no content processing)
+            if not return_full_content:
+                try:
+                    rows = conn.execute(
+                        """
+                        SELECT rowid, full_path, bm25(files_fts_fuzzy) AS rank,
+                               snippet(files_fts_fuzzy, 2, '', '', '...', 30) AS excerpt
+                        FROM files_fts_fuzzy
+                        WHERE files_fts_fuzzy MATCH ?
+                        ORDER BY rank
+                        LIMIT ?
+                        """,
+                        (query, limit),
+                    ).fetchall()
+                except sqlite3.DatabaseError as exc:
+                    raise StorageError(f"FTS fuzzy search failed: {exc}") from exc
+
+                results: List[SearchResult] = []
+                for row in rows:
+                    rank = float(row["rank"]) if row["rank"] is not None else 0.0
+                    score = abs(rank) if rank < 0 else 0.0
+                    results.append(
+                        SearchResult(
+                            path=row["full_path"],
+                            score=score,
+                            excerpt=row["excerpt"],
+                        )
+                    )
+                return results
+
+            # Full content path: fetch content and find containing symbols
            try:
-                # Join with files table to get content and file_id
                rows = conn.execute(
                    """
                    SELECT f.id AS file_id, f.full_path, f.content,
@@ -1503,7 +1588,7 @@ class DirIndexStore:
                        path=file_path,
                        score=score,
                        excerpt=excerpt,
-                        content=block_content if return_full_content else None,
+                        content=block_content,
                        start_line=start_line,
                        end_line=end_line,
                        symbol_name=symbol_name,
--- a/codex-lens/src/codexlens/storage/index_tree.py
+++ b/codex-lens/src/codexlens/storage/index_tree.py
@@ -527,19 +527,13 @@ class IndexTreeBuilder:

                    # Extract and store code relationships for graph visualization
                    if language_id in {"python", "javascript", "typescript"}:
-                        try:
-                            graph_analyzer = GraphAnalyzer(language_id)
-                            if graph_analyzer.is_available():
-                                relationships = graph_analyzer.analyze_with_symbols(
-                                    text, file_path, indexed_file.symbols
-                                )
-                                if relationships:
-                                    store.add_relationships(file_path, relationships)
-                        except Exception as rel_exc:
-                            self.logger.debug(
-                                "Failed to extract relationships from %s: %s",
-                                file_path, rel_exc
+                        graph_analyzer = GraphAnalyzer(language_id)
+                        if graph_analyzer.is_available():
+                            relationships = graph_analyzer.analyze_with_symbols(
+                                text, file_path, indexed_file.symbols
                            )
+                            if relationships:
+                                store.add_relationships(file_path, relationships)

                    files_count += 1
                    symbols_count += len(indexed_file.symbols)
@@ -750,16 +744,13 @@ def _build_dir_worker(args: tuple) -> DirBuildResult:

                # Extract and store code relationships for graph visualization
                if language_id in {"python", "javascript", "typescript"}:
-                    try:
-                        graph_analyzer = GraphAnalyzer(language_id)
-                        if graph_analyzer.is_available():
-                            relationships = graph_analyzer.analyze_with_symbols(
-                                text, item, indexed_file.symbols
-                            )
-                            if relationships:
-                                store.add_relationships(item, relationships)
-                    except Exception:
-                        pass  # Silently skip relationship extraction errors
+                    graph_analyzer = GraphAnalyzer(language_id)
+                    if graph_analyzer.is_available():
+                        relationships = graph_analyzer.analyze_with_symbols(
+                            text, item, indexed_file.symbols
+                        )
+                        if relationships:
+                            store.add_relationships(item, relationships)

                files_count += 1
                symbols_count += len(indexed_file.symbols)
--- a/codex-lens/src/codexlens/storage/sqlite_store.py
+++ b/codex-lens/src/codexlens/storage/sqlite_store.py
@@ -509,13 +509,13 @@ class SQLiteStore:
                    r.target_qualified_name,
                    r.relationship_type,
                    r.source_line,
-                    f.path AS source_file,
+                    f.full_path AS source_file,
                    r.target_file
                FROM code_relationships r
                JOIN symbols s ON r.source_symbol_id = s.id
                JOIN files f ON s.file_id = f.id
                WHERE r.target_qualified_name = ?
-                ORDER BY f.path, r.source_line
+                ORDER BY f.full_path, r.source_line
                LIMIT ?
                """,
                (target_name, limit)