Files
Claude-Code-Workflow/codex-lens/tests/integration/test_lsp_search_integration.py
catlog22 2f3a14e946 Add unit tests for LspGraphBuilder class
- Implement comprehensive unit tests for the LspGraphBuilder class to validate its functionality in building code association graphs.
- Tests cover various scenarios including single level graph expansion, max nodes and depth boundaries, concurrent expansion limits, document symbol caching, error handling during node expansion, and edge cases such as empty seed lists and self-referencing nodes.
- Utilize pytest and asyncio for asynchronous testing and mocking of LspBridge methods.
2026-01-20 12:49:31 +08:00

597 lines
20 KiB
Python

"""Integration tests for HybridSearchEngine LSP graph search.
Tests the _search_lsp_graph method which orchestrates:
1. Seed retrieval via vector/splade/exact fallback chain
2. LSP graph expansion via LspBridge and LspGraphBuilder
3. Result deduplication and merging
Test Priority:
- P0: Critical path tests (e2e success, fallback chain)
- P1: Important edge cases (no seeds, bridge failures)
- P2: Supplementary tests (deduplication)
"""
from __future__ import annotations
import asyncio
import logging
import tempfile
from pathlib import Path
from typing import Any, Dict, List, Optional
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from codexlens.entities import SearchResult
from codexlens.hybrid_search.data_structures import (
CallHierarchyItem,
CodeAssociationGraph,
CodeSymbolNode,
Range,
)
from codexlens.search.hybrid_search import HybridSearchEngine
# -----------------------------------------------------------------------------
# Fixtures
# -----------------------------------------------------------------------------
@pytest.fixture
def tmp_index_path(tmp_path: Path) -> Path:
"""Create a temporary index database path."""
db_path = tmp_path / "_index.db"
# Create empty file to satisfy existence checks
db_path.write_bytes(b"")
return db_path
@pytest.fixture
def sample_search_result() -> SearchResult:
"""Create a sample SearchResult for use as seed."""
return SearchResult(
path="/path/to/file.py",
content="def auth_flow(): ...",
excerpt="def auth_flow(): ...",
start_line=10,
end_line=20,
symbol_name="auth_flow",
symbol_kind="function",
score=0.9,
)
@pytest.fixture
def sample_search_result_2() -> SearchResult:
"""Create a second sample SearchResult."""
return SearchResult(
path="/path/to/other.py",
content="def init_db(): ...",
excerpt="def init_db(): ...",
start_line=5,
end_line=15,
symbol_name="init_db",
symbol_kind="function",
score=0.85,
)
@pytest.fixture
def sample_code_symbol_node() -> CodeSymbolNode:
"""Create a sample CodeSymbolNode for graph expansion."""
return CodeSymbolNode(
id="/path/to/related.py:helper_func:30",
name="helper_func",
kind="function",
file_path="/path/to/related.py",
range=Range(
start_line=30,
start_character=0,
end_line=40,
end_character=0,
),
raw_code="def helper_func(): pass",
docstring="Helper function",
)
@pytest.fixture
def sample_code_symbol_node_2() -> CodeSymbolNode:
"""Create another sample CodeSymbolNode."""
return CodeSymbolNode(
id="/path/to/util.py:validate:50",
name="validate",
kind="function",
file_path="/path/to/util.py",
range=Range(
start_line=50,
start_character=0,
end_line=60,
end_character=0,
),
raw_code="def validate(): pass",
docstring="Validation function",
)
@pytest.fixture
def mock_search_engine() -> HybridSearchEngine:
"""Create a HybridSearchEngine with default settings."""
return HybridSearchEngine()
def create_mock_graph_with_seed_and_related(
seed_result: SearchResult,
related_nodes: List[CodeSymbolNode],
) -> CodeAssociationGraph:
"""Helper to create a mock graph with seed and related nodes."""
graph = CodeAssociationGraph()
# Add seed node
seed_node_id = f"{seed_result.path}:{seed_result.symbol_name or 'unknown'}:{seed_result.start_line or 0}"
seed_node = CodeSymbolNode(
id=seed_node_id,
name=seed_result.symbol_name or "unknown",
kind=seed_result.symbol_kind or "unknown",
file_path=seed_result.path,
range=Range(
start_line=seed_result.start_line or 1,
start_character=0,
end_line=seed_result.end_line or 1,
end_character=0,
),
)
graph.add_node(seed_node)
# Add related nodes
for node in related_nodes:
graph.add_node(node)
return graph
# -----------------------------------------------------------------------------
# P0: Critical Tests
# -----------------------------------------------------------------------------
class TestP0CriticalLspSearch:
"""P0 Critical: Core E2E tests for LSP graph search."""
def test_e2e_lsp_search_vector_seed_success(
self,
tmp_index_path: Path,
sample_search_result: SearchResult,
sample_code_symbol_node: CodeSymbolNode,
sample_code_symbol_node_2: CodeSymbolNode,
) -> None:
"""Test E2E LSP search with vector providing seed, returning graph-expanded results.
Input: query="authentication flow"
Mock: _search_vector returns 1 SearchResult as seed
Mock: LspBridge/LspGraphBuilder returns 2 related symbols
Assert: Returns 2 new results (seed is filtered from final results)
"""
engine = HybridSearchEngine()
# Create mock graph with seed and 2 related nodes
mock_graph = create_mock_graph_with_seed_and_related(
sample_search_result,
[sample_code_symbol_node, sample_code_symbol_node_2],
)
# Patch seed search methods
with patch.object(
engine, "_search_vector", return_value=[sample_search_result]
) as mock_vector, patch.object(
engine, "_search_splade", return_value=[]
), patch.object(
engine, "_search_exact", return_value=[]
):
# Patch LSP module at the import location
with patch.dict("sys.modules", {"codexlens.lsp": MagicMock()}):
# Patch the module-level HAS_LSP check
with patch("codexlens.search.hybrid_search.HAS_LSP", True):
# Create mock LspBridge class
mock_bridge_instance = AsyncMock()
mock_bridge_class = MagicMock()
mock_bridge_class.return_value.__aenter__ = AsyncMock(
return_value=mock_bridge_instance
)
mock_bridge_class.return_value.__aexit__ = AsyncMock(
return_value=None
)
# Create mock LspGraphBuilder
async def mock_build(seeds, bridge):
return mock_graph
mock_builder_instance = MagicMock()
mock_builder_instance.build_from_seeds = mock_build
mock_builder_class = MagicMock(return_value=mock_builder_instance)
# Patch at module level
with patch(
"codexlens.search.hybrid_search.LspBridge",
mock_bridge_class,
), patch(
"codexlens.search.hybrid_search.LspGraphBuilder",
mock_builder_class,
):
results = engine._search_lsp_graph(
index_path=tmp_index_path,
query="authentication flow",
limit=10,
max_depth=1,
max_nodes=20,
)
# Verify vector search was called first
mock_vector.assert_called_once()
# Should return 2 results (the two non-seed nodes)
assert len(results) == 2
# Verify seed is not in results
seed_node_id = f"{sample_search_result.path}:{sample_search_result.symbol_name or 'unknown'}:{sample_search_result.start_line or 0}"
result_node_ids = {
f"{r.path}:{r.symbol_name or 'unknown'}:{r.start_line or 0}"
for r in results
}
assert seed_node_id not in result_node_ids
# Verify the returned results are the graph-expanded nodes
result_paths = {r.path for r in results}
assert sample_code_symbol_node.file_path in result_paths
assert sample_code_symbol_node_2.file_path in result_paths
def test_seed_fallback_chain_vector_fails_fts_succeeds(
self,
tmp_index_path: Path,
sample_search_result: SearchResult,
sample_code_symbol_node: CodeSymbolNode,
) -> None:
"""Test seed fallback chain: vector -> splade -> exact.
Input: query="init_db"
Mock: _search_vector returns []
Mock: _search_splade returns []
Mock: _search_exact returns 1 seed
Assert: Fallback chain called in order, uses exact's seed
"""
engine = HybridSearchEngine()
call_order: List[str] = []
def track_vector(*args, **kwargs):
call_order.append("vector")
return []
def track_splade(*args, **kwargs):
call_order.append("splade")
return []
def track_exact(*args, **kwargs):
call_order.append("exact")
return [sample_search_result]
# Create mock graph
mock_graph = create_mock_graph_with_seed_and_related(
sample_search_result,
[sample_code_symbol_node],
)
with patch.object(
engine, "_search_vector", side_effect=track_vector
) as mock_vector, patch.object(
engine, "_search_splade", side_effect=track_splade
) as mock_splade, patch.object(
engine, "_search_exact", side_effect=track_exact
) as mock_exact:
with patch("codexlens.search.hybrid_search.HAS_LSP", True):
# Create mock LspBridge class
mock_bridge_instance = AsyncMock()
mock_bridge_class = MagicMock()
mock_bridge_class.return_value.__aenter__ = AsyncMock(
return_value=mock_bridge_instance
)
mock_bridge_class.return_value.__aexit__ = AsyncMock(
return_value=None
)
# Create mock LspGraphBuilder
async def mock_build(seeds, bridge):
return mock_graph
mock_builder_instance = MagicMock()
mock_builder_instance.build_from_seeds = mock_build
mock_builder_class = MagicMock(return_value=mock_builder_instance)
with patch(
"codexlens.search.hybrid_search.LspBridge",
mock_bridge_class,
), patch(
"codexlens.search.hybrid_search.LspGraphBuilder",
mock_builder_class,
):
results = engine._search_lsp_graph(
index_path=tmp_index_path,
query="init_db",
limit=10,
max_depth=1,
max_nodes=20,
)
# Verify fallback chain order: vector -> splade -> exact
assert call_order == ["vector", "splade", "exact"]
# All three methods should be called
mock_vector.assert_called_once()
mock_splade.assert_called_once()
mock_exact.assert_called_once()
# Should return results from graph expansion (1 related node)
assert len(results) == 1
# -----------------------------------------------------------------------------
# P1: Important Tests
# -----------------------------------------------------------------------------
class TestP1ImportantLspSearch:
"""P1 Important: Edge case tests for LSP graph search."""
def test_e2e_lsp_search_no_seeds_found(
self,
tmp_index_path: Path,
) -> None:
"""Test LSP search when no seeds found from any source.
Input: query="non_existent_symbol"
Mock: All seed search methods return []
Assert: Returns [], LspBridge is not called
"""
engine = HybridSearchEngine()
with patch.object(
engine, "_search_vector", return_value=[]
) as mock_vector, patch.object(
engine, "_search_splade", return_value=[]
) as mock_splade, patch.object(
engine, "_search_exact", return_value=[]
) as mock_exact:
with patch("codexlens.search.hybrid_search.HAS_LSP", True):
# LspBridge should NOT be called when no seeds
mock_bridge_class = MagicMock()
with patch(
"codexlens.search.hybrid_search.LspBridge",
mock_bridge_class,
):
results = engine._search_lsp_graph(
index_path=tmp_index_path,
query="non_existent_symbol",
limit=10,
max_depth=1,
max_nodes=20,
)
# All search methods should be tried
mock_vector.assert_called_once()
mock_splade.assert_called_once()
mock_exact.assert_called_once()
# Should return empty list
assert results == []
# LspBridge should not be instantiated (no seeds)
mock_bridge_class.assert_not_called()
def test_e2e_lsp_search_bridge_fails(
self,
tmp_index_path: Path,
sample_search_result: SearchResult,
caplog: pytest.LogCaptureFixture,
) -> None:
"""Test graceful degradation when LspBridge connection fails.
Mock: Seed search returns valid seed
Mock: LspBridge raises exception during expansion
Assert: Returns [], error handled gracefully
"""
engine = HybridSearchEngine()
with patch.object(
engine, "_search_vector", return_value=[sample_search_result]
):
with patch("codexlens.search.hybrid_search.HAS_LSP", True):
# Make LspBridge raise an error during async context
mock_bridge_class = MagicMock()
mock_bridge_class.return_value.__aenter__ = AsyncMock(
side_effect=Exception("Connection refused")
)
mock_bridge_class.return_value.__aexit__ = AsyncMock(
return_value=None
)
mock_builder_class = MagicMock()
with patch(
"codexlens.search.hybrid_search.LspBridge",
mock_bridge_class,
), patch(
"codexlens.search.hybrid_search.LspGraphBuilder",
mock_builder_class,
):
with caplog.at_level(logging.DEBUG):
results = engine._search_lsp_graph(
index_path=tmp_index_path,
query="authentication",
limit=10,
max_depth=1,
max_nodes=20,
)
# Should return empty list on failure
assert results == []
# -----------------------------------------------------------------------------
# P2: Supplementary Tests
# -----------------------------------------------------------------------------
class TestP2SupplementaryLspSearch:
"""P2 Supplementary: Deduplication and edge cases."""
def test_result_deduping_seed_not_returned(
self,
tmp_index_path: Path,
sample_search_result: SearchResult,
) -> None:
"""Test that seed results are deduplicated from final output.
Mock: Seed search returns SearchResult(path="a.py", symbol_name="foo")
Mock: LspBridge also returns same symbol in graph
Assert: Final results do not contain duplicate seed symbol
"""
engine = HybridSearchEngine()
# Create a different node that should be returned
different_node = CodeSymbolNode(
id="/different/path.py:other_func:100",
name="other_func",
kind="function",
file_path="/different/path.py",
range=Range(
start_line=100,
start_character=0,
end_line=110,
end_character=0,
),
raw_code="def other_func(): pass",
docstring="Other function",
)
# Create mock graph with seed and one different node
mock_graph = create_mock_graph_with_seed_and_related(
sample_search_result,
[different_node],
)
with patch.object(
engine, "_search_vector", return_value=[sample_search_result]
):
with patch("codexlens.search.hybrid_search.HAS_LSP", True):
mock_bridge_instance = AsyncMock()
mock_bridge_class = MagicMock()
mock_bridge_class.return_value.__aenter__ = AsyncMock(
return_value=mock_bridge_instance
)
mock_bridge_class.return_value.__aexit__ = AsyncMock(
return_value=None
)
async def mock_build(seeds, bridge):
return mock_graph
mock_builder_instance = MagicMock()
mock_builder_instance.build_from_seeds = mock_build
mock_builder_class = MagicMock(return_value=mock_builder_instance)
with patch(
"codexlens.search.hybrid_search.LspBridge",
mock_bridge_class,
), patch(
"codexlens.search.hybrid_search.LspGraphBuilder",
mock_builder_class,
):
results = engine._search_lsp_graph(
index_path=tmp_index_path,
query="test query",
limit=10,
max_depth=1,
max_nodes=20,
)
# Should only return 1 result (the different node, not the seed)
assert len(results) == 1
# The seed should NOT be in results
result_paths = [r.path for r in results]
assert sample_search_result.path not in result_paths
# The different node should be in results
assert "/different/path.py" in result_paths
def test_lsp_not_available_returns_empty(
self,
tmp_index_path: Path,
) -> None:
"""Test that _search_lsp_graph returns [] when LSP dependencies unavailable."""
engine = HybridSearchEngine()
with patch("codexlens.search.hybrid_search.HAS_LSP", False):
results = engine._search_lsp_graph(
index_path=tmp_index_path,
query="test",
limit=10,
max_depth=1,
max_nodes=20,
)
assert results == []
def test_graph_with_no_new_nodes_returns_empty(
self,
tmp_index_path: Path,
sample_search_result: SearchResult,
) -> None:
"""Test when graph only contains seed nodes (no expansion)."""
engine = HybridSearchEngine()
# Create graph with ONLY the seed node (no related nodes)
mock_graph = create_mock_graph_with_seed_and_related(
sample_search_result,
[], # No related nodes
)
with patch.object(
engine, "_search_vector", return_value=[sample_search_result]
):
with patch("codexlens.search.hybrid_search.HAS_LSP", True):
mock_bridge_instance = AsyncMock()
mock_bridge_class = MagicMock()
mock_bridge_class.return_value.__aenter__ = AsyncMock(
return_value=mock_bridge_instance
)
mock_bridge_class.return_value.__aexit__ = AsyncMock(
return_value=None
)
async def mock_build(seeds, bridge):
return mock_graph
mock_builder_instance = MagicMock()
mock_builder_instance.build_from_seeds = mock_build
mock_builder_class = MagicMock(return_value=mock_builder_instance)
with patch(
"codexlens.search.hybrid_search.LspBridge",
mock_bridge_class,
), patch(
"codexlens.search.hybrid_search.LspGraphBuilder",
mock_builder_class,
):
results = engine._search_lsp_graph(
index_path=tmp_index_path,
query="test",
limit=10,
max_depth=1,
max_nodes=20,
)
# Should return empty since all nodes are seeds (filtered out)
assert results == []