feat: Implement Cross-CLI Sync Panel for MCP servers

- Added CrossCliSyncPanel component for synchronizing MCP servers between Claude and Codex. - Implemented server selection, copy operations, and result handling. - Added tests for path mapping on Windows drives. - Created E2E tests for ask_question Answer Broker functionality. - Introduced MCP Tools Test Script for validating modified read_file and edit_file tools. - Updated path_mapper to ensure correct drive formatting on Windows. - Added .gitignore for ace-tool directory.
2026-02-10 02:24:35 +08:00 · 2026-02-08 23:19:19 +08:00
parent b9b2932f50
commit dfe153778c
24 changed files with 1911 additions and 168 deletions
--- a/codex-lens/src/codexlens/config.py
+++ b/codex-lens/src/codexlens/config.py
@@ -146,9 +146,12 @@ class Config:
    staged_coarse_k: int = 200  # Number of coarse candidates from Stage 1 binary search
    staged_lsp_depth: int = 2  # LSP relationship expansion depth in Stage 2
    staged_stage2_mode: str = "precomputed"  # "precomputed" (graph_neighbors) | "realtime" (LSP)
-    staged_realtime_lsp_timeout_s: float = 10.0  # Max time budget for realtime LSP expansion
-    staged_realtime_lsp_max_nodes: int = 100  # Node cap for realtime graph expansion
-    staged_realtime_lsp_warmup_s: float = 2.0  # Wait for server analysis after opening seed docs
+    staged_realtime_lsp_timeout_s: float = 30.0  # Max time budget for realtime LSP expansion
+    staged_realtime_lsp_depth: int = 1  # BFS depth for realtime LSP expansion
+    staged_realtime_lsp_max_nodes: int = 50  # Node cap for realtime graph expansion
+    staged_realtime_lsp_max_seeds: int = 1  # Seed cap for realtime graph expansion
+    staged_realtime_lsp_max_concurrent: int = 2  # Max concurrent LSP requests during graph expansion
+    staged_realtime_lsp_warmup_s: float = 3.0  # Wait for server analysis after opening seed docs
    staged_realtime_lsp_resolve_symbols: bool = False  # If True, resolves symbol names via documentSymbol (slower)
    staged_clustering_strategy: str = "auto"  # "auto", "hdbscan", "dbscan", "frequency", "noop"
    staged_clustering_min_size: int = 3  # Minimum cluster size for Stage 3 grouping
--- a/codex-lens/src/codexlens/lsp/lsp_bridge.py
+++ b/codex-lens/src/codexlens/lsp/lsp_bridge.py
@@ -14,6 +14,7 @@ Features:
 from __future__ import annotations

 import asyncio
+import logging
 import os
 import time
 from collections import OrderedDict
@@ -22,6 +23,8 @@ from pathlib import Path
 from typing import Any, Dict, List, Optional, TYPE_CHECKING
 from urllib.parse import unquote

+logger = logging.getLogger(__name__)
+
 if TYPE_CHECKING:
    from codexlens.lsp.standalone_manager import StandaloneLspManager

@@ -362,6 +365,14 @@ class LspBridge:
                except (KeyError, TypeError):
                    continue

+        logger.debug(
+            "LSP references for %s (%s:%s:%s): %d",
+            symbol.id,
+            symbol.file_path,
+            symbol.range.start_line,
+            symbol.range.start_character,
+            len(locations),
+        )
        self._cache(cache_key, symbol.file_path, locations)
        return locations
    
@@ -542,6 +553,14 @@ class LspBridge:
                        detail="Inferred from reference",
                    ))
        
+        logger.debug(
+            "LSP call hierarchy for %s (%s:%s:%s): %d",
+            symbol.id,
+            symbol.file_path,
+            symbol.range.start_line,
+            symbol.range.start_character,
+            len(items),
+        )
        self._cache(cache_key, symbol.file_path, items)
        return items
    
--- a/codex-lens/src/codexlens/search/chain_search.py
+++ b/codex-lens/src/codexlens/search/chain_search.py
@@ -854,7 +854,7 @@ class ChainSearchEngine:

        # ========== Stage 2: LSP Graph Expansion ==========
        stage2_start = time.time()
-        expanded_results = self._stage2_lsp_expand(coarse_results, index_root)
+        expanded_results = self._stage2_lsp_expand(coarse_results, index_root, query=query)
        stage_times["stage2_expand_ms"] = (time.time() - stage2_start) * 1000
        stage_counts["stage2_expanded"] = len(expanded_results)

@@ -969,8 +969,9 @@ class ChainSearchEngine:

        # Try centralized BinarySearcher first (preferred for mmap indexes)
        index_root = index_paths[0].parent if index_paths else None
-        coarse_candidates: List[Tuple[int, int, Path]] = []  # (chunk_id, distance, index_path)
+        coarse_candidates: List[Tuple[int, float, Path]] = []  # (chunk_id, distance, index_path)
        used_centralized = False
+        using_dense_fallback = False

        if index_root:
            binary_searcher = self._get_centralized_binary_searcher(index_root)
@@ -992,30 +993,78 @@ class ChainSearchEngine:
                    self.logger.debug(f"Centralized binary search failed: {exc}")

        if not used_centralized:
-            # Fallback to per-directory binary indexes
-            use_gpu = True
-            if self._config is not None:
-                use_gpu = getattr(self._config, "embedding_use_gpu", True)
+            # Fallback to per-directory binary indexes (legacy BinaryANNIndex).
+            #
+            # Generating the query binary embedding can be expensive (depending on embedding backend).
+            # If no legacy binary vector files exist, skip this path and fall back to dense ANN search.
+            has_legacy_binary_vectors = any(
+                (p.parent / f"{p.stem}_binary_vectors.bin").exists() for p in index_paths
+            )
+            if not has_legacy_binary_vectors:
+                self.logger.debug(
+                    "No legacy binary vector files found; skipping legacy binary search fallback"
+                )
+            else:
+                use_gpu = True
+                if self._config is not None:
+                    use_gpu = getattr(self._config, "embedding_use_gpu", True)

-            try:
-                binary_backend = BinaryEmbeddingBackend(use_gpu=use_gpu)
-                query_binary = binary_backend.embed_packed([query])[0]
-            except Exception as exc:
-                self.logger.warning(f"Failed to generate binary query embedding: {exc}")
-                return [], index_root
-
-            for index_path in index_paths:
+                query_binary = None
                try:
-                    binary_index = self._get_or_create_binary_index(index_path)
-                    if binary_index is None or binary_index.count() == 0:
-                        continue
-                    ids, distances = binary_index.search(query_binary, coarse_k)
-                    for chunk_id, dist in zip(ids, distances):
-                        coarse_candidates.append((chunk_id, dist, index_path))
+                    binary_backend = BinaryEmbeddingBackend(use_gpu=use_gpu)
+                    query_binary = binary_backend.embed_packed([query])[0]
                except Exception as exc:
-                    self.logger.debug(
-                        "Binary search failed for %s: %s", index_path, exc
-                    )
+                    self.logger.warning(f"Failed to generate binary query embedding: {exc}")
+                    query_binary = None
+
+                if query_binary is not None:
+                    for index_path in index_paths:
+                        try:
+                            binary_index = self._get_or_create_binary_index(index_path)
+                            if binary_index is None or binary_index.count() == 0:
+                                continue
+                            ids, distances = binary_index.search(query_binary, coarse_k)
+                            for chunk_id, dist in zip(ids, distances):
+                                coarse_candidates.append((chunk_id, float(dist), index_path))
+                        except Exception as exc:
+                            self.logger.debug(
+                                "Binary search failed for %s: %s", index_path, exc
+                            )
+
+        if not coarse_candidates:
+            # Final fallback: dense ANN coarse search (HNSW) over existing dense vector indexes.
+            #
+            # This allows the staged pipeline (LSP expansion + clustering) to run even when
+            # binary vectors are not generated for the current project.
+            dense_candidates: List[Tuple[int, float, Path]] = []
+            try:
+                from codexlens.semantic.ann_index import ANNIndex
+                from codexlens.semantic.embedder import Embedder
+
+                embedder = Embedder()
+                query_dense = embedder.embed_to_numpy([query])[0]
+                dim = int(getattr(query_dense, "shape", (len(query_dense),))[0])
+
+                for index_path in index_paths:
+                    try:
+                        ann_index = ANNIndex(index_path, dim=dim)
+                        if not ann_index.load() or ann_index.count() == 0:
+                            continue
+                        ids, distances = ann_index.search(query_dense, top_k=coarse_k)
+                        for chunk_id, dist in zip(ids, distances):
+                            dense_candidates.append((chunk_id, float(dist), index_path))
+                    except Exception as exc:
+                        self.logger.debug(
+                            "Dense coarse search failed for %s: %s", index_path, exc
+                        )
+            except Exception as exc:
+                self.logger.debug("Dense coarse search fallback unavailable: %s", exc)
+                dense_candidates = []
+
+            if dense_candidates:
+                dense_candidates.sort(key=lambda x: x[1])
+                coarse_candidates = dense_candidates[:coarse_k]
+                using_dense_fallback = True

        if not coarse_candidates:
            return [], index_root
@@ -1086,7 +1135,11 @@ class ChainSearchEngine:
                        (d for cid, d, _ in coarse_candidates if cid == chunk_id),
                        256
                    )
-                    score = 1.0 - (distance / 256.0)
+                    if using_dense_fallback:
+                        # Cosine distance in [0, 2] -> clamp to [0, 1] score
+                        score = max(0.0, 1.0 - float(distance))
+                    else:
+                        score = 1.0 - (int(distance) / 256.0)

                    content = chunk.get("content", "")

@@ -1129,6 +1182,7 @@ class ChainSearchEngine:
        self,
        coarse_results: List[SearchResult],
        index_root: Optional[Path],
+        query: Optional[str] = None,
    ) -> List[SearchResult]:
        """Stage 2: LSP/graph expansion for staged cascade.

@@ -1152,7 +1206,11 @@ class ChainSearchEngine:
                mode = (getattr(self._config, "staged_stage2_mode", "precomputed") or "precomputed").strip().lower()

            if mode in {"realtime", "live"}:
-                return self._stage2_realtime_lsp_expand(coarse_results, index_root=index_root)
+                return self._stage2_realtime_lsp_expand(
+                    coarse_results,
+                    index_root=index_root,
+                    query=query,
+                )

            return self._stage2_precomputed_graph_expand(coarse_results, index_root=index_root)

@@ -1209,6 +1267,7 @@ class ChainSearchEngine:
        coarse_results: List[SearchResult],
        *,
        index_root: Path,
+        query: Optional[str] = None,
    ) -> List[SearchResult]:
        """Stage 2 (realtime): compute expansion graph via live LSP servers."""
        import asyncio
@@ -1217,16 +1276,27 @@ class ChainSearchEngine:
        from codexlens.hybrid_search.data_structures import CodeSymbolNode, Range
        from codexlens.lsp import LspBridge, LspGraphBuilder

-        max_depth = 2
-        timeout_s = 10.0
-        max_nodes = 100
-        warmup_s = 2.0
+        max_depth = 1
+        timeout_s = 30.0
+        max_nodes = 50
+        max_seeds = 1
+        max_concurrent = 2
+        warmup_s = 3.0
        resolve_symbols = False
        if self._config is not None:
-            max_depth = int(getattr(self._config, "staged_lsp_depth", 2) or 2)
-            timeout_s = float(getattr(self._config, "staged_realtime_lsp_timeout_s", 10.0) or 10.0)
-            max_nodes = int(getattr(self._config, "staged_realtime_lsp_max_nodes", 100) or 100)
-            warmup_s = float(getattr(self._config, "staged_realtime_lsp_warmup_s", 2.0) or 0.0)
+            max_depth = int(
+                getattr(
+                    self._config,
+                    "staged_realtime_lsp_depth",
+                    getattr(self._config, "staged_lsp_depth", 1),
+                )
+                or 1
+            )
+            timeout_s = float(getattr(self._config, "staged_realtime_lsp_timeout_s", 30.0) or 30.0)
+            max_nodes = int(getattr(self._config, "staged_realtime_lsp_max_nodes", 50) or 50)
+            warmup_s = float(getattr(self._config, "staged_realtime_lsp_warmup_s", 3.0) or 0.0)
+            max_seeds = int(getattr(self._config, "staged_realtime_lsp_max_seeds", 1) or 1)
+            max_concurrent = int(getattr(self._config, "staged_realtime_lsp_max_concurrent", 2) or 2)
            resolve_symbols = bool(getattr(self._config, "staged_realtime_lsp_resolve_symbols", False))

        try:
@@ -1234,13 +1304,189 @@ class ChainSearchEngine:
        except Exception:
            source_root = Path(coarse_results[0].path).resolve().parent

-        workspace_root = self._find_lsp_workspace_root(source_root)
+        lsp_config_file = self._find_lsp_config_file(source_root)
+        workspace_root = Path(source_root).resolve()

-        max_expand = min(10, len(coarse_results))
+        max_expand = min(max(1, max_seeds), len(coarse_results))
        seed_nodes: List[CodeSymbolNode] = []
        seed_ids: set[str] = set()

-        for seed in list(coarse_results)[:max_expand]:
+        selected_results = list(coarse_results)
+        if query:
+            import re
+
+            terms = {
+                t.lower()
+                for t in re.findall(r"[A-Za-z_][A-Za-z0-9_]*", query)
+                if t
+            }
+
+            def _priority(result: SearchResult) -> float:
+                sym = (result.symbol_name or "").strip().lower()
+                stem = Path(result.path).stem.lower() if result.path else ""
+                score = 0.0
+                if sym and sym in terms:
+                    score += 5.0
+                if sym:
+                    score += 2.0
+                if stem and stem in terms:
+                    score += 1.0
+                if result.symbol_kind:
+                    score += 0.5
+                if result.start_line:
+                    score += 0.2
+                return score
+
+            indexed = list(enumerate(selected_results))
+            indexed.sort(
+                key=lambda pair: (
+                    _priority(pair[1]),
+                    float(pair[1].score),
+                    -pair[0],
+                ),
+                reverse=True,
+            )
+            selected_results = [r for _, r in indexed]
+        else:
+            indexed = list(enumerate(selected_results))
+            indexed.sort(
+                key=lambda pair: (
+                    1.0 if pair[1].symbol_name else 0.0,
+                    float(pair[1].score),
+                    -pair[0],
+                ),
+                reverse=True,
+            )
+            selected_results = [r for _, r in indexed]
+
+        # Prefer symbol-definition seeds when possible (improves LSP reference/call-hierarchy results).
+        #
+        # NOTE: We avoid relying purely on the stored symbol index here because its ranges may be
+        # imprecise in some projects. Instead, we attempt a lightweight definition-line detection
+        # for query identifiers within the top coarse candidate files.
+        if query:
+            try:
+                import re
+
+                terms_raw = [
+                    t for t in re.findall(r"[A-Za-z_][A-Za-z0-9_]*", query) if t
+                ]
+                stopwords = {
+                    "class", "def", "function", "method", "import", "from", "return",
+                    "async", "await", "public", "private", "protected", "static",
+                    "const", "let", "var", "new",
+                }
+                candidate_terms = [
+                    t for t in terms_raw
+                    if t.lower() not in stopwords and len(t) >= 3
+                ]
+
+                candidate_terms.sort(key=len, reverse=True)
+
+                # Candidate files (best-first): de-dupe while preserving ordering.
+                candidate_files: List[str] = []
+                seen_files: set[str] = set()
+                for r in selected_results:
+                    if r.path and r.path not in seen_files:
+                        seen_files.add(r.path)
+                        candidate_files.append(r.path)
+                    if len(candidate_files) >= 50:
+                        break
+
+                # Also consider files whose *names* match query identifiers (helps when coarse retrieval
+                # misses the defining file for a symbol like `Config`).
+                try:
+                    if source_root and candidate_terms:
+                        allow_suffix = {".py", ".ts", ".tsx", ".js", ".jsx"}
+                        name_terms = [t.lower() for t in candidate_terms[:3]]
+                        for dirpath, _, filenames in os.walk(source_root):
+                            for filename in filenames:
+                                suffix = Path(filename).suffix.lower()
+                                if suffix not in allow_suffix:
+                                    continue
+                                lowered = filename.lower()
+                                if any(t in lowered for t in name_terms):
+                                    fp = str(Path(dirpath) / filename)
+                                    if fp not in seen_files:
+                                        seen_files.add(fp)
+                                        candidate_files.append(fp)
+                            if len(candidate_files) >= 120:
+                                break
+                except Exception:
+                    pass
+
+                for term in candidate_terms[:5]:
+                    if len(seed_nodes) >= max_expand:
+                        break
+
+                    escaped = re.escape(term)
+                    py_class = re.compile(rf"^\s*class\s+{escaped}\b")
+                    py_def = re.compile(rf"^\s*(?:async\s+)?def\s+{escaped}\b")
+                    ts_class = re.compile(rf"^\s*(?:export\s+)?class\s+{escaped}\b")
+                    ts_func = re.compile(rf"^\s*(?:export\s+)?(?:async\s+)?function\s+{escaped}\b")
+
+                    for file_path in candidate_files:
+                        if len(seed_nodes) >= max_expand:
+                            break
+                        suffix = Path(file_path).suffix.lower()
+                        if suffix not in {".py", ".ts", ".tsx", ".js", ".jsx"}:
+                            continue
+
+                        try:
+                            lines = Path(file_path).read_text(encoding="utf-8", errors="ignore").splitlines()
+                        except Exception:
+                            continue
+
+                        for i, line in enumerate(lines):
+                            kind = None
+                            if suffix == ".py":
+                                if py_class.search(line):
+                                    kind = "class"
+                                elif py_def.search(line):
+                                    kind = "function"
+                            else:
+                                if ts_class.search(line):
+                                    kind = "class"
+                                elif ts_func.search(line):
+                                    kind = "function"
+
+                            if not kind:
+                                continue
+
+                            start_line = i + 1
+                            idx = line.find(term)
+                            if idx >= 0:
+                                start_character = idx + 1
+                            else:
+                                stripped = line.lstrip()
+                                start_character = (len(line) - len(stripped)) + 1 if stripped else 1
+
+                            node_id = f"{file_path}:{term}:{start_line}"
+                            if node_id in seed_ids:
+                                break
+
+                            seed_ids.add(node_id)
+                            seed_nodes.append(
+                                CodeSymbolNode(
+                                    id=node_id,
+                                    name=term,
+                                    kind=kind,
+                                    file_path=file_path,
+                                    range=Range(
+                                        start_line=start_line,
+                                        start_character=start_character,
+                                        end_line=start_line,
+                                        end_character=start_character,
+                                    ),
+                                )
+                            )
+                            break
+            except Exception:
+                pass
+
+        for seed in selected_results:
+            if len(seed_nodes) >= max_expand:
+                break
            if not seed.path:
                continue
            name = seed.symbol_name or Path(seed.path).stem
@@ -1249,14 +1495,21 @@ class ChainSearchEngine:
            end_line = int(seed.end_line or start_line)
            start_character = 1
            try:
-                if seed.symbol_name and start_line >= 1:
+                if start_line >= 1:
                    line_text = Path(seed.path).read_text(encoding="utf-8", errors="ignore").splitlines()[start_line - 1]
-                    idx = line_text.find(seed.symbol_name)
-                    if idx >= 0:
-                        start_character = idx + 1  # 1-based for StandaloneLspManager
+                    if seed.symbol_name:
+                        idx = line_text.find(seed.symbol_name)
+                        if idx >= 0:
+                            start_character = idx + 1  # 1-based for StandaloneLspManager
+                    else:
+                        stripped = line_text.lstrip()
+                        if stripped:
+                            start_character = (len(line_text) - len(stripped)) + 1
            except Exception:
                start_character = 1
            node_id = f"{seed.path}:{name}:{start_line}"
+            if node_id in seed_ids:
+                continue
            seed_ids.add(node_id)
            seed_nodes.append(
                CodeSymbolNode(
@@ -1268,7 +1521,7 @@ class ChainSearchEngine:
                        start_line=start_line,
                        start_character=start_character,
                        end_line=end_line,
-                        end_character=1,
+                        end_character=start_character if end_line == start_line else 1,
                    ),
                    raw_code=seed.content or "",
                    docstring=seed.excerpt or "",
@@ -1279,7 +1532,11 @@ class ChainSearchEngine:
            return coarse_results

        async def expand_graph():
-            async with LspBridge(workspace_root=str(workspace_root), timeout=timeout_s) as bridge:
+            async with LspBridge(
+                workspace_root=str(workspace_root),
+                config_file=str(lsp_config_file) if lsp_config_file else None,
+                timeout=timeout_s,
+            ) as bridge:
                # Warm up analysis: open seed docs and wait a bit so references/call hierarchy are populated.
                if warmup_s > 0:
                    for seed in seed_nodes[:3]:
@@ -1288,12 +1545,14 @@ class ChainSearchEngine:
                        except Exception:
                            continue
                    try:
-                        await asyncio.sleep(min(warmup_s, max(0.0, timeout_s - 0.5)))
+                        warmup_budget = min(warmup_s, max(0.0, timeout_s * 0.1))
+                        await asyncio.sleep(min(warmup_budget, max(0.0, timeout_s - 0.5)))
                    except Exception:
                        pass
                builder = LspGraphBuilder(
                    max_depth=max_depth,
                    max_nodes=max_nodes,
+                    max_concurrent=max(1, max_concurrent),
                    resolve_symbols=resolve_symbols,
                )
                return await builder.build_from_seeds(seed_nodes, bridge)
@@ -1314,9 +1573,21 @@ class ChainSearchEngine:
            else:
                graph = run_coro_blocking()
        except Exception as exc:
-            self.logger.debug("Stage 2 (realtime) expansion failed: %s", exc)
+            self.logger.debug("Stage 2 (realtime) expansion failed: %r", exc)
            return coarse_results

+        try:
+            node_count = len(getattr(graph, "nodes", {}) or {})
+            edge_count = len(getattr(graph, "edges", []) or [])
+        except Exception:
+            node_count, edge_count = 0, 0
+        self.logger.debug(
+            "Stage 2 (realtime) graph built: seeds=%d nodes=%d edges=%d",
+            len(seed_nodes),
+            node_count,
+            edge_count,
+        )
+
        related_results: List[SearchResult] = []
        for node_id, node in getattr(graph, "nodes", {}).items():
            if node_id in seed_ids or getattr(node, "id", "") in seed_ids:
@@ -1395,6 +1666,21 @@ class ChainSearchEngine:

        return start

+    def _find_lsp_config_file(self, start_path: Path) -> Optional[Path]:
+        """Find a lsp-servers.json by walking up from start_path."""
+        start = Path(start_path).resolve()
+        if start.is_file():
+            start = start.parent
+
+        for current in [start, *list(start.parents)]:
+            try:
+                candidate = current / "lsp-servers.json"
+                if candidate.is_file():
+                    return candidate
+            except OSError:
+                continue
+        return None
+
    def _stage3_cluster_prune(
        self,
        expanded_results: List[SearchResult],
--- a/codex-lens/src/codexlens/storage/path_mapper.py
+++ b/codex-lens/src/codexlens/storage/path_mapper.py
@@ -290,7 +290,7 @@ class PathMapper:
            # Check if first part is a drive letter
            if len(parts[0]) == 1 and parts[0].isalpha():
                # D/path/to/dir → D:/path/to/dir
-                drive = f"{parts[0]}:"
+                drive = f"{parts[0]}:/"
                if len(parts) > 1:
                    return Path(drive) / Path(*parts[1:])
                return Path(drive)