Files
Claude-Code-Workflow/codex-lens/src/codexlens/lsp/lsp_bridge.py
catlog22 2f3a14e946 Add unit tests for LspGraphBuilder class
- Implement comprehensive unit tests for the LspGraphBuilder class to validate its functionality in building code association graphs.
- Tests cover various scenarios including single level graph expansion, max nodes and depth boundaries, concurrent expansion limits, document symbol caching, error handling during node expansion, and edge cases such as empty seed lists and self-referencing nodes.
- Utilize pytest and asyncio for asynchronous testing and mocking of LspBridge methods.
2026-01-20 12:49:31 +08:00

835 lines
30 KiB
Python

"""LspBridge service for real-time LSP communication with caching.
This module provides a bridge to communicate with language servers either via:
1. Standalone LSP Manager (direct subprocess communication - default)
2. VSCode Bridge extension (HTTP-based, legacy mode)
Features:
- Direct communication with language servers (no VSCode dependency)
- Cache with TTL and file modification time invalidation
- Graceful error handling with empty results on failure
- Support for definition, references, hover, and call hierarchy
"""
from __future__ import annotations
import asyncio
import os
import time
from collections import OrderedDict
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional, TYPE_CHECKING
if TYPE_CHECKING:
from codexlens.lsp.standalone_manager import StandaloneLspManager
# Check for optional dependencies
try:
import aiohttp
HAS_AIOHTTP = True
except ImportError:
HAS_AIOHTTP = False
from codexlens.hybrid_search.data_structures import (
CallHierarchyItem,
CodeSymbolNode,
Range,
)
@dataclass
class Location:
"""A location in a source file (LSP response format)."""
file_path: str
line: int
character: int
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary format."""
return {
"file_path": self.file_path,
"line": self.line,
"character": self.character,
}
@classmethod
def from_lsp_response(cls, data: Dict[str, Any]) -> "Location":
"""Create Location from LSP response format.
Handles both direct format and VSCode URI format.
"""
# Handle VSCode URI format (file:///path/to/file)
uri = data.get("uri", data.get("file_path", ""))
if uri.startswith("file:///"):
# Windows: file:///C:/path -> C:/path
# Unix: file:///path -> /path
file_path = uri[8:] if uri[8:9].isalpha() and uri[9:10] == ":" else uri[7:]
elif uri.startswith("file://"):
file_path = uri[7:]
else:
file_path = uri
# Get position from range or direct fields
if "range" in data:
range_data = data["range"]
start = range_data.get("start", {})
line = start.get("line", 0) + 1 # LSP is 0-based, convert to 1-based
character = start.get("character", 0) + 1
else:
line = data.get("line", 1)
character = data.get("character", 1)
return cls(file_path=file_path, line=line, character=character)
@dataclass
class CacheEntry:
"""A cached LSP response with expiration metadata.
Attributes:
data: The cached response data
file_mtime: File modification time when cached (for invalidation)
cached_at: Unix timestamp when entry was cached
"""
data: Any
file_mtime: float
cached_at: float
class LspBridge:
"""Bridge for real-time LSP communication with language servers.
By default, uses StandaloneLspManager to directly spawn and communicate
with language servers via JSON-RPC over stdio. No VSCode dependency required.
For legacy mode, can use VSCode Bridge HTTP server (set use_vscode_bridge=True).
Features:
- Direct language server communication (default)
- Response caching with TTL and file modification invalidation
- Timeout handling
- Graceful error handling returning empty results
Example:
# Default: standalone mode (no VSCode needed)
async with LspBridge() as bridge:
refs = await bridge.get_references(symbol)
definition = await bridge.get_definition(symbol)
# Legacy: VSCode Bridge mode
async with LspBridge(use_vscode_bridge=True) as bridge:
refs = await bridge.get_references(symbol)
"""
DEFAULT_BRIDGE_URL = "http://127.0.0.1:3457"
DEFAULT_TIMEOUT = 30.0 # seconds (increased for standalone mode)
DEFAULT_CACHE_TTL = 300 # 5 minutes
DEFAULT_MAX_CACHE_SIZE = 1000 # Maximum cache entries
def __init__(
self,
bridge_url: str = DEFAULT_BRIDGE_URL,
timeout: float = DEFAULT_TIMEOUT,
cache_ttl: int = DEFAULT_CACHE_TTL,
max_cache_size: int = DEFAULT_MAX_CACHE_SIZE,
use_vscode_bridge: bool = False,
workspace_root: Optional[str] = None,
config_file: Optional[str] = None,
):
"""Initialize LspBridge.
Args:
bridge_url: URL of the VSCode Bridge HTTP server (legacy mode only)
timeout: Request timeout in seconds
cache_ttl: Cache time-to-live in seconds
max_cache_size: Maximum number of cache entries (LRU eviction)
use_vscode_bridge: If True, use VSCode Bridge HTTP mode (requires aiohttp)
workspace_root: Root directory for standalone LSP manager
config_file: Path to lsp-servers.json configuration file
"""
self.bridge_url = bridge_url
self.timeout = timeout
self.cache_ttl = cache_ttl
self.max_cache_size = max_cache_size
self.use_vscode_bridge = use_vscode_bridge
self.workspace_root = workspace_root
self.config_file = config_file
self.cache: OrderedDict[str, CacheEntry] = OrderedDict()
# VSCode Bridge mode (legacy)
self._session: Optional["aiohttp.ClientSession"] = None
# Standalone mode (default)
self._manager: Optional["StandaloneLspManager"] = None
self._manager_started = False
# Validate dependencies
if use_vscode_bridge and not HAS_AIOHTTP:
raise ImportError(
"aiohttp is required for VSCode Bridge mode: pip install aiohttp"
)
async def _ensure_manager(self) -> "StandaloneLspManager":
"""Ensure standalone LSP manager is started."""
if self._manager is None:
from codexlens.lsp.standalone_manager import StandaloneLspManager
self._manager = StandaloneLspManager(
workspace_root=self.workspace_root,
config_file=self.config_file,
timeout=self.timeout,
)
if not self._manager_started:
await self._manager.start()
self._manager_started = True
return self._manager
async def _get_session(self) -> "aiohttp.ClientSession":
"""Get or create the aiohttp session (VSCode Bridge mode only)."""
if not HAS_AIOHTTP:
raise ImportError("aiohttp required for VSCode Bridge mode")
if self._session is None or self._session.closed:
timeout = aiohttp.ClientTimeout(total=self.timeout)
self._session = aiohttp.ClientSession(timeout=timeout)
return self._session
async def close(self) -> None:
"""Close connections and cleanup resources."""
# Close VSCode Bridge session
if self._session and not self._session.closed:
await self._session.close()
self._session = None
# Stop standalone manager
if self._manager and self._manager_started:
await self._manager.stop()
self._manager_started = False
def _get_file_mtime(self, file_path: str) -> float:
"""Get file modification time, or 0 if file doesn't exist."""
try:
return os.path.getmtime(file_path)
except OSError:
return 0.0
def _is_cached(self, cache_key: str, file_path: str) -> bool:
"""Check if cache entry is valid.
Cache is invalid if:
- Entry doesn't exist
- TTL has expired
- File has been modified since caching
Args:
cache_key: The cache key to check
file_path: Path to source file for mtime check
Returns:
True if cache is valid and can be used
"""
if cache_key not in self.cache:
return False
entry = self.cache[cache_key]
now = time.time()
# Check TTL
if now - entry.cached_at > self.cache_ttl:
del self.cache[cache_key]
return False
# Check file modification time
current_mtime = self._get_file_mtime(file_path)
if current_mtime != entry.file_mtime:
del self.cache[cache_key]
return False
# Move to end on access (LRU behavior)
self.cache.move_to_end(cache_key)
return True
def _cache(self, key: str, file_path: str, data: Any) -> None:
"""Store data in cache with LRU eviction.
Args:
key: Cache key
file_path: Path to source file (for mtime tracking)
data: Data to cache
"""
# Remove oldest entries if at capacity
while len(self.cache) >= self.max_cache_size:
self.cache.popitem(last=False) # Remove oldest (FIFO order)
# Move to end if key exists (update access order)
if key in self.cache:
self.cache.move_to_end(key)
self.cache[key] = CacheEntry(
data=data,
file_mtime=self._get_file_mtime(file_path),
cached_at=time.time(),
)
def clear_cache(self) -> None:
"""Clear all cached entries."""
self.cache.clear()
async def _request_vscode_bridge(self, action: str, params: Dict[str, Any]) -> Any:
"""Make HTTP request to VSCode Bridge (legacy mode).
Args:
action: The endpoint/action name (e.g., "get_definition")
params: Request parameters
Returns:
Response data on success, None on failure
"""
url = f"{self.bridge_url}/{action}"
try:
session = await self._get_session()
async with session.post(url, json=params) as response:
if response.status != 200:
return None
data = await response.json()
if data.get("success") is False:
return None
return data.get("result")
except asyncio.TimeoutError:
return None
except Exception:
return None
async def get_references(self, symbol: CodeSymbolNode) -> List[Location]:
"""Get all references to a symbol via real-time LSP.
Args:
symbol: The code symbol to find references for
Returns:
List of Location objects where the symbol is referenced.
Returns empty list on error or timeout.
"""
cache_key = f"refs:{symbol.id}"
if self._is_cached(cache_key, symbol.file_path):
return self.cache[cache_key].data
locations: List[Location] = []
if self.use_vscode_bridge:
# Legacy: VSCode Bridge HTTP mode
result = await self._request_vscode_bridge("get_references", {
"file_path": symbol.file_path,
"line": symbol.range.start_line,
"character": symbol.range.start_character,
})
# Don't cache on connection error (result is None)
if result is None:
return locations
if isinstance(result, list):
for item in result:
try:
locations.append(Location.from_lsp_response(item))
except (KeyError, TypeError):
continue
else:
# Default: Standalone mode
manager = await self._ensure_manager()
result = await manager.get_references(
file_path=symbol.file_path,
line=symbol.range.start_line,
character=symbol.range.start_character,
)
for item in result:
try:
locations.append(Location.from_lsp_response(item))
except (KeyError, TypeError):
continue
self._cache(cache_key, symbol.file_path, locations)
return locations
async def get_definition(self, symbol: CodeSymbolNode) -> Optional[Location]:
"""Get symbol definition location.
Args:
symbol: The code symbol to find definition for
Returns:
Location of the definition, or None if not found
"""
cache_key = f"def:{symbol.id}"
if self._is_cached(cache_key, symbol.file_path):
return self.cache[cache_key].data
location: Optional[Location] = None
if self.use_vscode_bridge:
# Legacy: VSCode Bridge HTTP mode
result = await self._request_vscode_bridge("get_definition", {
"file_path": symbol.file_path,
"line": symbol.range.start_line,
"character": symbol.range.start_character,
})
if result:
if isinstance(result, list) and len(result) > 0:
try:
location = Location.from_lsp_response(result[0])
except (KeyError, TypeError):
pass
elif isinstance(result, dict):
try:
location = Location.from_lsp_response(result)
except (KeyError, TypeError):
pass
else:
# Default: Standalone mode
manager = await self._ensure_manager()
result = await manager.get_definition(
file_path=symbol.file_path,
line=symbol.range.start_line,
character=symbol.range.start_character,
)
if result:
try:
location = Location.from_lsp_response(result)
except (KeyError, TypeError):
pass
self._cache(cache_key, symbol.file_path, location)
return location
async def get_call_hierarchy(self, symbol: CodeSymbolNode) -> List[CallHierarchyItem]:
"""Get incoming/outgoing calls for a symbol.
If call hierarchy is not supported by the language server,
falls back to using references.
Args:
symbol: The code symbol to get call hierarchy for
Returns:
List of CallHierarchyItem representing callers/callees.
Returns empty list on error or if not supported.
"""
cache_key = f"calls:{symbol.id}"
if self._is_cached(cache_key, symbol.file_path):
return self.cache[cache_key].data
items: List[CallHierarchyItem] = []
if self.use_vscode_bridge:
# Legacy: VSCode Bridge HTTP mode
result = await self._request_vscode_bridge("get_call_hierarchy", {
"file_path": symbol.file_path,
"line": symbol.range.start_line,
"character": symbol.range.start_character,
})
if result is None:
# Fallback: use references
refs = await self.get_references(symbol)
for ref in refs:
items.append(CallHierarchyItem(
name=f"caller@{ref.line}",
kind="reference",
file_path=ref.file_path,
range=Range(
start_line=ref.line,
start_character=ref.character,
end_line=ref.line,
end_character=ref.character,
),
detail="Inferred from reference",
))
elif isinstance(result, list):
for item in result:
try:
range_data = item.get("range", {})
start = range_data.get("start", {})
end = range_data.get("end", {})
items.append(CallHierarchyItem(
name=item.get("name", "unknown"),
kind=item.get("kind", "unknown"),
file_path=item.get("file_path", item.get("uri", "")),
range=Range(
start_line=start.get("line", 0) + 1,
start_character=start.get("character", 0) + 1,
end_line=end.get("line", 0) + 1,
end_character=end.get("character", 0) + 1,
),
detail=item.get("detail"),
))
except (KeyError, TypeError):
continue
else:
# Default: Standalone mode
manager = await self._ensure_manager()
# Try to get call hierarchy items
hierarchy_items = await manager.get_call_hierarchy_items(
file_path=symbol.file_path,
line=symbol.range.start_line,
character=symbol.range.start_character,
)
if hierarchy_items:
# Get incoming calls for each item
for h_item in hierarchy_items:
incoming = await manager.get_incoming_calls(h_item)
for call in incoming:
from_item = call.get("from", {})
range_data = from_item.get("range", {})
start = range_data.get("start", {})
end = range_data.get("end", {})
# Parse URI
uri = from_item.get("uri", "")
if uri.startswith("file:///"):
fp = uri[8:] if uri[8:9].isalpha() and uri[9:10] == ":" else uri[7:]
elif uri.startswith("file://"):
fp = uri[7:]
else:
fp = uri
items.append(CallHierarchyItem(
name=from_item.get("name", "unknown"),
kind=str(from_item.get("kind", "unknown")),
file_path=fp,
range=Range(
start_line=start.get("line", 0) + 1,
start_character=start.get("character", 0) + 1,
end_line=end.get("line", 0) + 1,
end_character=end.get("character", 0) + 1,
),
detail=from_item.get("detail"),
))
else:
# Fallback: use references
refs = await self.get_references(symbol)
for ref in refs:
items.append(CallHierarchyItem(
name=f"caller@{ref.line}",
kind="reference",
file_path=ref.file_path,
range=Range(
start_line=ref.line,
start_character=ref.character,
end_line=ref.line,
end_character=ref.character,
),
detail="Inferred from reference",
))
self._cache(cache_key, symbol.file_path, items)
return items
async def get_document_symbols(self, file_path: str) -> List[Dict[str, Any]]:
"""Get all symbols in a document (batch operation).
This is more efficient than individual hover queries when processing
multiple locations in the same file.
Args:
file_path: Path to the source file
Returns:
List of symbol dictionaries with name, kind, range, etc.
Returns empty list on error or timeout.
"""
cache_key = f"symbols:{file_path}"
if self._is_cached(cache_key, file_path):
return self.cache[cache_key].data
symbols: List[Dict[str, Any]] = []
if self.use_vscode_bridge:
# Legacy: VSCode Bridge HTTP mode
result = await self._request_vscode_bridge("get_document_symbols", {
"file_path": file_path,
})
if isinstance(result, list):
symbols = self._flatten_document_symbols(result)
else:
# Default: Standalone mode
manager = await self._ensure_manager()
result = await manager.get_document_symbols(file_path)
if result:
symbols = self._flatten_document_symbols(result)
self._cache(cache_key, file_path, symbols)
return symbols
def _flatten_document_symbols(
self, symbols: List[Dict[str, Any]], parent_name: str = ""
) -> List[Dict[str, Any]]:
"""Flatten nested document symbols into a flat list.
Document symbols can be nested (e.g., methods inside classes).
This flattens them for easier lookup by line number.
Args:
symbols: List of symbol dictionaries (may be nested)
parent_name: Name of parent symbol for qualification
Returns:
Flat list of all symbols with their ranges
"""
flat: List[Dict[str, Any]] = []
for sym in symbols:
# Add the symbol itself
symbol_entry = {
"name": sym.get("name", "unknown"),
"kind": self._symbol_kind_to_string(sym.get("kind", 0)),
"range": sym.get("range", sym.get("location", {}).get("range", {})),
"selection_range": sym.get("selectionRange", {}),
"detail": sym.get("detail", ""),
"parent": parent_name,
}
flat.append(symbol_entry)
# Recursively process children
children = sym.get("children", [])
if children:
qualified_name = sym.get("name", "")
if parent_name:
qualified_name = f"{parent_name}.{qualified_name}"
flat.extend(self._flatten_document_symbols(children, qualified_name))
return flat
def _symbol_kind_to_string(self, kind: int) -> str:
"""Convert LSP SymbolKind integer to string.
Args:
kind: LSP SymbolKind enum value
Returns:
Human-readable string representation
"""
# LSP SymbolKind enum (1-indexed)
kinds = {
1: "file",
2: "module",
3: "namespace",
4: "package",
5: "class",
6: "method",
7: "property",
8: "field",
9: "constructor",
10: "enum",
11: "interface",
12: "function",
13: "variable",
14: "constant",
15: "string",
16: "number",
17: "boolean",
18: "array",
19: "object",
20: "key",
21: "null",
22: "enum_member",
23: "struct",
24: "event",
25: "operator",
26: "type_parameter",
}
return kinds.get(kind, "unknown")
async def get_hover(self, symbol: CodeSymbolNode) -> Optional[str]:
"""Get hover documentation for a symbol.
Args:
symbol: The code symbol to get hover info for
Returns:
Hover documentation as string, or None if not available
"""
cache_key = f"hover:{symbol.id}"
if self._is_cached(cache_key, symbol.file_path):
return self.cache[cache_key].data
hover_text: Optional[str] = None
if self.use_vscode_bridge:
# Legacy: VSCode Bridge HTTP mode
result = await self._request_vscode_bridge("get_hover", {
"file_path": symbol.file_path,
"line": symbol.range.start_line,
"character": symbol.range.start_character,
})
if result:
hover_text = self._parse_hover_result(result)
else:
# Default: Standalone mode
manager = await self._ensure_manager()
hover_text = await manager.get_hover(
file_path=symbol.file_path,
line=symbol.range.start_line,
character=symbol.range.start_character,
)
self._cache(cache_key, symbol.file_path, hover_text)
return hover_text
def _parse_hover_result(self, result: Any) -> Optional[str]:
"""Parse hover result into string."""
if isinstance(result, str):
return result
elif isinstance(result, list):
parts = []
for item in result:
if isinstance(item, str):
parts.append(item)
elif isinstance(item, dict):
value = item.get("value", item.get("contents", ""))
if value:
parts.append(str(value))
return "\n\n".join(parts) if parts else None
elif isinstance(result, dict):
contents = result.get("contents", result.get("value", ""))
if isinstance(contents, str):
return contents
elif isinstance(contents, list):
parts = []
for c in contents:
if isinstance(c, str):
parts.append(c)
elif isinstance(c, dict):
parts.append(str(c.get("value", "")))
return "\n\n".join(parts) if parts else None
return None
async def __aenter__(self) -> "LspBridge":
"""Async context manager entry."""
return self
async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
"""Async context manager exit - close connections."""
await self.close()
# Simple test
if __name__ == "__main__":
import sys
async def test_lsp_bridge():
"""Simple test of LspBridge functionality."""
print("Testing LspBridge (Standalone Mode)...")
print(f"Timeout: {LspBridge.DEFAULT_TIMEOUT}s")
print(f"Cache TTL: {LspBridge.DEFAULT_CACHE_TTL}s")
print()
# Create a test symbol pointing to this file
test_file = os.path.abspath(__file__)
test_symbol = CodeSymbolNode(
id=f"{test_file}:LspBridge:96",
name="LspBridge",
kind="class",
file_path=test_file,
range=Range(
start_line=96,
start_character=1,
end_line=200,
end_character=1,
),
)
print(f"Test symbol: {test_symbol.name} in {os.path.basename(test_symbol.file_path)}")
print()
# Use standalone mode (default)
async with LspBridge(
workspace_root=str(Path(__file__).parent.parent.parent.parent),
) as bridge:
print("1. Testing get_document_symbols...")
try:
symbols = await bridge.get_document_symbols(test_file)
print(f" Found {len(symbols)} symbols")
for sym in symbols[:5]:
print(f" - {sym.get('name')} ({sym.get('kind')})")
except Exception as e:
print(f" Error: {e}")
print()
print("2. Testing get_definition...")
try:
definition = await bridge.get_definition(test_symbol)
if definition:
print(f" Definition: {os.path.basename(definition.file_path)}:{definition.line}")
else:
print(" No definition found")
except Exception as e:
print(f" Error: {e}")
print()
print("3. Testing get_references...")
try:
refs = await bridge.get_references(test_symbol)
print(f" Found {len(refs)} references")
for ref in refs[:3]:
print(f" - {os.path.basename(ref.file_path)}:{ref.line}")
except Exception as e:
print(f" Error: {e}")
print()
print("4. Testing get_hover...")
try:
hover = await bridge.get_hover(test_symbol)
if hover:
print(f" Hover: {hover[:100]}...")
else:
print(" No hover info found")
except Exception as e:
print(f" Error: {e}")
print()
print("5. Testing get_call_hierarchy...")
try:
calls = await bridge.get_call_hierarchy(test_symbol)
print(f" Found {len(calls)} call hierarchy items")
for call in calls[:3]:
print(f" - {call.name} in {os.path.basename(call.file_path)}")
except Exception as e:
print(f" Error: {e}")
print()
print("6. Testing cache...")
print(f" Cache entries: {len(bridge.cache)}")
for key in list(bridge.cache.keys())[:5]:
print(f" - {key}")
print()
print("Test complete!")
# Run the test
# Note: On Windows, use default ProactorEventLoop (supports subprocess creation)
asyncio.run(test_lsp_bridge())