feat: Enhance CodexLens indexing and search capabilities with new CLI options and improved error handling

2026-02-10 02:24:35 +08:00 · 2025-12-19 15:10:37 +08:00
parent c7ced2bfbb
commit 2f0cce0089
18 changed files with 480 additions and 128 deletions
--- a/codex-lens/src/codexlens/search/chain_search.py
+++ b/codex-lens/src/codexlens/search/chain_search.py
@@ -831,13 +831,13 @@ class ChainSearchEngine:
                        r.target_qualified_name AS target_symbol,
                        r.relationship_type,
                        r.source_line,
-                        f.path AS source_file,
+                        f.full_path AS source_file,
                        r.target_file
                    FROM code_relationships r
                    JOIN symbols s ON r.source_symbol_id = s.id
                    JOIN files f ON s.file_id = f.id
                    WHERE s.name = ? AND r.relationship_type = 'call'
-                    ORDER BY f.path, r.source_line
+                    ORDER BY f.full_path, r.source_line
                    LIMIT 100
                    """,
                    (source_symbol,)
@@ -928,7 +928,7 @@ class ChainSearchEngine:
                        r.target_qualified_name,
                        r.relationship_type,
                        r.source_line,
-                        f.path AS source_file,
+                        f.full_path AS source_file,
                        r.target_file
                    FROM code_relationships r
                    JOIN symbols s ON r.source_symbol_id = s.id
@@ -940,7 +940,7 @@ class ChainSearchEngine:
                        r.target_qualified_name,
                        r.relationship_type,
                        r.source_line,
-                        f.path AS source_file,
+                        f.full_path AS source_file,
                        r.target_file
                    FROM code_relationships r
                    JOIN symbols s ON r.source_symbol_id = s.id
--- a/codex-lens/src/codexlens/semantic/graph_analyzer.py
+++ b/codex-lens/src/codexlens/semantic/graph_analyzer.py
@@ -434,20 +434,31 @@ class GraphAnalyzer:
    def _find_enclosing_symbol(self, node: TreeSitterNode, symbols: List[dict]) -> Optional[str]:
        """Find the enclosing function/method/class for a node.

+        Returns fully qualified name (e.g., "MyClass.my_method") by traversing up
+        the AST tree and collecting parent class/function names.
+
        Args:
            node: AST node to find enclosure for
            symbols: List of defined symbols

        Returns:
-            Name of enclosing symbol, or None if at module level
+            Fully qualified name of enclosing symbol, or None if at module level
        """
-        # Walk up the tree to find enclosing symbol
+        # Walk up the tree to find all enclosing symbols
+        enclosing_names = []
        parent = node.parent
+
        while parent is not None:
            for symbol in symbols:
                if symbol["node"] == parent:
-                    return symbol["name"]
+                    # Prepend to maintain order (innermost to outermost)
+                    enclosing_names.insert(0, symbol["name"])
+                    break
            parent = parent.parent
+
+        # Return fully qualified name or None if at module level
+        if enclosing_names:
+            return ".".join(enclosing_names)
        return None

    def _extract_call_target(self, source_bytes: bytes, node: TreeSitterNode) -> Optional[str]:
--- a/codex-lens/src/codexlens/storage/dir_index.py
+++ b/codex-lens/src/codexlens/storage/dir_index.py
@@ -1226,17 +1226,14 @@ class DirIndexStore:
        query: str,
        limit: int = 20,
        enhance_query: bool = False,
-        return_full_content: bool = True,
+        return_full_content: bool = False,
        context_lines: int = 10,
    ) -> List[SearchResult]:
-        """Full-text search in current directory files with complete method blocks.
+        """Full-text search in current directory files.

        Uses files_fts_exact (unicode61 tokenizer) for exact token matching.
        For fuzzy/substring search, use search_fts_fuzzy() instead.

-        Returns complete code blocks (functions/methods/classes) containing the match,
-        rather than just a short snippet.
-
        Best Practice (from industry analysis of Codanna/Code-Index-MCP):
        - Default: Respects exact user input without modification
        - Users can manually add wildcards (e.g., "loadPack*") for prefix matching
@@ -1248,11 +1245,12 @@ class DirIndexStore:
            limit: Maximum results to return
            enhance_query: If True, automatically add prefix wildcards for simple queries.
                          Default False to respect exact user input.
-            return_full_content: If True, include full code block in content field
+            return_full_content: If True, include full code block in content field.
+                                Default False for fast location-only results.
            context_lines: Lines of context when no symbol contains the match

        Returns:
-            List of SearchResult objects with complete code blocks
+            List of SearchResult objects (location-only by default, with content if requested)

        Raises:
            StorageError: If FTS search fails
@@ -1263,8 +1261,39 @@ class DirIndexStore:

        with self._lock:
            conn = self._get_connection()
+
+            # Fast path: location-only results (no content processing)
+            if not return_full_content:
+                try:
+                    rows = conn.execute(
+                        """
+                        SELECT rowid, full_path, bm25(files_fts_exact) AS rank,
+                               snippet(files_fts_exact, 2, '', '', '...', 30) AS excerpt
+                        FROM files_fts_exact
+                        WHERE files_fts_exact MATCH ?
+                        ORDER BY rank
+                        LIMIT ?
+                        """,
+                        (final_query, limit),
+                    ).fetchall()
+                except sqlite3.DatabaseError as exc:
+                    raise StorageError(f"FTS search failed: {exc}") from exc
+
+                results: List[SearchResult] = []
+                for row in rows:
+                    rank = float(row["rank"]) if row["rank"] is not None else 0.0
+                    score = abs(rank) if rank < 0 else 0.0
+                    results.append(
+                        SearchResult(
+                            path=row["full_path"],
+                            score=score,
+                            excerpt=row["excerpt"],
+                        )
+                    )
+                return results
+
+            # Full content path: fetch content and find containing symbols
            try:
-                # Join with files table to get content and file_id
                rows = conn.execute(
                    """
                    SELECT f.id AS file_id, f.full_path, f.content,
@@ -1319,7 +1348,7 @@ class DirIndexStore:
                        path=file_path,
                        score=score,
                        excerpt=excerpt,
-                        content=block_content if return_full_content else None,
+                        content=block_content,
                        start_line=start_line,
                        end_line=end_line,
                        symbol_name=symbol_name,
@@ -1332,31 +1361,59 @@ class DirIndexStore:
        self,
        query: str,
        limit: int = 20,
-        return_full_content: bool = True,
+        return_full_content: bool = False,
        context_lines: int = 10,
    ) -> List[SearchResult]:
-        """Full-text search using exact token matching with complete method blocks.
-
-        Returns complete code blocks (functions/methods/classes) containing the match,
-        rather than just a short snippet. If no symbol contains the match, returns
-        context lines around the match.
+        """Full-text search using exact token matching.

        Args:
            query: FTS5 query string
            limit: Maximum results to return
-            return_full_content: If True, include full code block in content field
+            return_full_content: If True, include full code block in content field.
+                                Default False for fast location-only results.
            context_lines: Lines of context when no symbol contains the match

        Returns:
-            List of SearchResult objects with complete code blocks
+            List of SearchResult objects (location-only by default, with content if requested)

        Raises:
            StorageError: If FTS search fails
        """
        with self._lock:
            conn = self._get_connection()
+
+            # Fast path: location-only results (no content processing)
+            if not return_full_content:
+                try:
+                    rows = conn.execute(
+                        """
+                        SELECT rowid, full_path, bm25(files_fts_exact) AS rank,
+                               snippet(files_fts_exact, 2, '', '', '...', 30) AS excerpt
+                        FROM files_fts_exact
+                        WHERE files_fts_exact MATCH ?
+                        ORDER BY rank
+                        LIMIT ?
+                        """,
+                        (query, limit),
+                    ).fetchall()
+                except sqlite3.DatabaseError as exc:
+                    raise StorageError(f"FTS exact search failed: {exc}") from exc
+
+                results: List[SearchResult] = []
+                for row in rows:
+                    rank = float(row["rank"]) if row["rank"] is not None else 0.0
+                    score = abs(rank) if rank < 0 else 0.0
+                    results.append(
+                        SearchResult(
+                            path=row["full_path"],
+                            score=score,
+                            excerpt=row["excerpt"],
+                        )
+                    )
+                return results
+
+            # Full content path: fetch content and find containing symbols
            try:
-                # Join with files table to get content and file_id
                rows = conn.execute(
                    """
                    SELECT f.id AS file_id, f.full_path, f.content,
@@ -1411,7 +1468,7 @@ class DirIndexStore:
                        path=file_path,
                        score=score,
                        excerpt=excerpt,
-                        content=block_content if return_full_content else None,
+                        content=block_content,
                        start_line=start_line,
                        end_line=end_line,
                        symbol_name=symbol_name,
@@ -1424,31 +1481,59 @@ class DirIndexStore:
        self,
        query: str,
        limit: int = 20,
-        return_full_content: bool = True,
+        return_full_content: bool = False,
        context_lines: int = 10,
    ) -> List[SearchResult]:
-        """Full-text search using fuzzy/substring matching with complete method blocks.
-
-        Returns complete code blocks (functions/methods/classes) containing the match,
-        rather than just a short snippet. If no symbol contains the match, returns
-        context lines around the match.
+        """Full-text search using fuzzy/substring matching.

        Args:
            query: FTS5 query string
            limit: Maximum results to return
-            return_full_content: If True, include full code block in content field
+            return_full_content: If True, include full code block in content field.
+                                Default False for fast location-only results.
            context_lines: Lines of context when no symbol contains the match

        Returns:
-            List of SearchResult objects with complete code blocks
+            List of SearchResult objects (location-only by default, with content if requested)

        Raises:
            StorageError: If FTS search fails
        """
        with self._lock:
            conn = self._get_connection()
+
+            # Fast path: location-only results (no content processing)
+            if not return_full_content:
+                try:
+                    rows = conn.execute(
+                        """
+                        SELECT rowid, full_path, bm25(files_fts_fuzzy) AS rank,
+                               snippet(files_fts_fuzzy, 2, '', '', '...', 30) AS excerpt
+                        FROM files_fts_fuzzy
+                        WHERE files_fts_fuzzy MATCH ?
+                        ORDER BY rank
+                        LIMIT ?
+                        """,
+                        (query, limit),
+                    ).fetchall()
+                except sqlite3.DatabaseError as exc:
+                    raise StorageError(f"FTS fuzzy search failed: {exc}") from exc
+
+                results: List[SearchResult] = []
+                for row in rows:
+                    rank = float(row["rank"]) if row["rank"] is not None else 0.0
+                    score = abs(rank) if rank < 0 else 0.0
+                    results.append(
+                        SearchResult(
+                            path=row["full_path"],
+                            score=score,
+                            excerpt=row["excerpt"],
+                        )
+                    )
+                return results
+
+            # Full content path: fetch content and find containing symbols
            try:
-                # Join with files table to get content and file_id
                rows = conn.execute(
                    """
                    SELECT f.id AS file_id, f.full_path, f.content,
@@ -1503,7 +1588,7 @@ class DirIndexStore:
                        path=file_path,
                        score=score,
                        excerpt=excerpt,
-                        content=block_content if return_full_content else None,
+                        content=block_content,
                        start_line=start_line,
                        end_line=end_line,
                        symbol_name=symbol_name,
--- a/codex-lens/src/codexlens/storage/index_tree.py
+++ b/codex-lens/src/codexlens/storage/index_tree.py
@@ -527,19 +527,13 @@ class IndexTreeBuilder:

                    # Extract and store code relationships for graph visualization
                    if language_id in {"python", "javascript", "typescript"}:
-                        try:
-                            graph_analyzer = GraphAnalyzer(language_id)
-                            if graph_analyzer.is_available():
-                                relationships = graph_analyzer.analyze_with_symbols(
-                                    text, file_path, indexed_file.symbols
-                                )
-                                if relationships:
-                                    store.add_relationships(file_path, relationships)
-                        except Exception as rel_exc:
-                            self.logger.debug(
-                                "Failed to extract relationships from %s: %s",
-                                file_path, rel_exc
+                        graph_analyzer = GraphAnalyzer(language_id)
+                        if graph_analyzer.is_available():
+                            relationships = graph_analyzer.analyze_with_symbols(
+                                text, file_path, indexed_file.symbols
                            )
+                            if relationships:
+                                store.add_relationships(file_path, relationships)

                    files_count += 1
                    symbols_count += len(indexed_file.symbols)
@@ -750,16 +744,13 @@ def _build_dir_worker(args: tuple) -> DirBuildResult:

                # Extract and store code relationships for graph visualization
                if language_id in {"python", "javascript", "typescript"}:
-                    try:
-                        graph_analyzer = GraphAnalyzer(language_id)
-                        if graph_analyzer.is_available():
-                            relationships = graph_analyzer.analyze_with_symbols(
-                                text, item, indexed_file.symbols
-                            )
-                            if relationships:
-                                store.add_relationships(item, relationships)
-                    except Exception:
-                        pass  # Silently skip relationship extraction errors
+                    graph_analyzer = GraphAnalyzer(language_id)
+                    if graph_analyzer.is_available():
+                        relationships = graph_analyzer.analyze_with_symbols(
+                            text, item, indexed_file.symbols
+                        )
+                        if relationships:
+                            store.add_relationships(item, relationships)

                files_count += 1
                symbols_count += len(indexed_file.symbols)
--- a/codex-lens/src/codexlens/storage/sqlite_store.py
+++ b/codex-lens/src/codexlens/storage/sqlite_store.py
@@ -509,13 +509,13 @@ class SQLiteStore:
                    r.target_qualified_name,
                    r.relationship_type,
                    r.source_line,
-                    f.path AS source_file,
+                    f.full_path AS source_file,
                    r.target_file
                FROM code_relationships r
                JOIN symbols s ON r.source_symbol_id = s.id
                JOIN files f ON s.file_id = f.id
                WHERE r.target_qualified_name = ?
-                ORDER BY f.path, r.source_line
+                ORDER BY f.full_path, r.source_line
                LIMIT ?
                """,
                (target_name, limit)
--- a/codex-lens/tests/test_graph_analyzer.py
+++ b/codex-lens/tests/test_graph_analyzer.py
@@ -78,10 +78,10 @@ def outer():
        analyzer = GraphAnalyzer("python")
        relationships = analyzer.analyze_file(code, Path("test.py"))

-        # Should find inner -> inner_helper and outer -> inner
+        # Should find outer.inner -> inner_helper and outer -> inner (with fully qualified names)
        assert len(relationships) == 2
        call_pairs = {(rel.source_symbol, rel.target_symbol) for rel in relationships}
-        assert ("inner", "inner_helper") in call_pairs
+        assert ("outer.inner", "inner_helper") in call_pairs
        assert ("outer", "inner") in call_pairs

    def test_method_call_in_class(self):
@@ -97,10 +97,10 @@ def outer():
        analyzer = GraphAnalyzer("python")
        relationships = analyzer.analyze_file(code, Path("test.py"))

-        # Should find compute -> add
+        # Should find Calculator.compute -> add (with fully qualified source)
        assert len(relationships) == 1
        rel = relationships[0]
-        assert rel.source_symbol == "compute"
+        assert rel.source_symbol == "Calculator.compute"
        assert rel.target_symbol == "add"

    def test_module_level_call(self):
@@ -171,11 +171,11 @@ main()
        # Extract call pairs
        call_pairs = {(rel.source_symbol, rel.target_symbol) for rel in relationships}

-        # Expected relationships
+        # Expected relationships (with fully qualified source symbols for methods)
        expected = {
-            ("load", "read_file"),
-            ("process", "validate"),
-            ("process", "transform"),
+            ("DataProcessor.load", "read_file"),
+            ("DataProcessor.process", "validate"),
+            ("DataProcessor.process", "transform"),
            ("main", "DataProcessor"),
            ("main", "load"),
            ("main", "process"),
@@ -259,10 +259,10 @@ const main = () => {
        analyzer = GraphAnalyzer("javascript")
        relationships = analyzer.analyze_file(code, Path("test.js"))

-        # Should find compute -> add
+        # Should find Calculator.compute -> add (with fully qualified source)
        assert len(relationships) == 1
        rel = relationships[0]
-        assert rel.source_symbol == "compute"
+        assert rel.source_symbol == "Calculator.compute"
        assert rel.target_symbol == "add"

    def test_complex_javascript_file(self):
@@ -304,11 +304,12 @@ main();
        # Extract call pairs
        call_pairs = {(rel.source_symbol, rel.target_symbol) for rel in relationships}

-        # Expected relationships (note: constructor calls like "new DataProcessor()" are not tracked)
+        # Expected relationships (with fully qualified source symbols for methods)
+        # Note: constructor calls like "new DataProcessor()" are not tracked
        expected = {
-            ("load", "readFile"),
-            ("process", "validate"),
-            ("process", "transform"),
+            ("DataProcessor.load", "readFile"),
+            ("DataProcessor.process", "validate"),
+            ("DataProcessor.process", "transform"),
            ("main", "load"),
            ("main", "process"),
            ("<module>", "main"),