mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-10 02:24:35 +08:00
删除 SPLADE 稀疏神经搜索后端和 hybrid_cascade 策略,
将搜索架构从 6 种后端简化为 4 种(FTS Exact/Fuzzy, Binary Vector, Dense Vector, LSP)。
主要变更:
- 删除 splade_encoder.py, splade_index.py, migration_009 等 4 个文件
- 移除 config.py 中 SPLADE 相关配置(enable_splade, splade_model 等)
- DEFAULT_WEIGHTS 改为 FTS 权重 {exact:0.25, fuzzy:0.1, vector:0.5, lsp:0.15}
- 删除 hybrid_cascade_search(),所有 cascade fallback 改为 self.search()
- API fusion_strategy='hybrid' 向后兼容映射到 binary_rerank
- 删除 CLI index_splade/splade_status 命令和 --method splade
- 更新测试、基准测试和文档
584 lines
20 KiB
Python
584 lines
20 KiB
Python
"""Integration tests for HybridSearchEngine LSP graph search.
|
|
|
|
Tests the _search_lsp_graph method which orchestrates:
|
|
1. Seed retrieval via vector/exact fallback chain
|
|
2. LSP graph expansion via LspBridge and LspGraphBuilder
|
|
3. Result deduplication and merging
|
|
|
|
Test Priority:
|
|
- P0: Critical path tests (e2e success, fallback chain)
|
|
- P1: Important edge cases (no seeds, bridge failures)
|
|
- P2: Supplementary tests (deduplication)
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
import tempfile
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from codexlens.entities import SearchResult
|
|
from codexlens.hybrid_search.data_structures import (
|
|
CallHierarchyItem,
|
|
CodeAssociationGraph,
|
|
CodeSymbolNode,
|
|
Range,
|
|
)
|
|
from codexlens.search.hybrid_search import HybridSearchEngine
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Fixtures
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.fixture
|
|
def tmp_index_path(tmp_path: Path) -> Path:
|
|
"""Create a temporary index database path."""
|
|
db_path = tmp_path / "_index.db"
|
|
# Create empty file to satisfy existence checks
|
|
db_path.write_bytes(b"")
|
|
return db_path
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_search_result() -> SearchResult:
|
|
"""Create a sample SearchResult for use as seed."""
|
|
return SearchResult(
|
|
path="/path/to/file.py",
|
|
content="def auth_flow(): ...",
|
|
excerpt="def auth_flow(): ...",
|
|
start_line=10,
|
|
end_line=20,
|
|
symbol_name="auth_flow",
|
|
symbol_kind="function",
|
|
score=0.9,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_search_result_2() -> SearchResult:
|
|
"""Create a second sample SearchResult."""
|
|
return SearchResult(
|
|
path="/path/to/other.py",
|
|
content="def init_db(): ...",
|
|
excerpt="def init_db(): ...",
|
|
start_line=5,
|
|
end_line=15,
|
|
symbol_name="init_db",
|
|
symbol_kind="function",
|
|
score=0.85,
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_code_symbol_node() -> CodeSymbolNode:
|
|
"""Create a sample CodeSymbolNode for graph expansion."""
|
|
return CodeSymbolNode(
|
|
id="/path/to/related.py:helper_func:30",
|
|
name="helper_func",
|
|
kind="function",
|
|
file_path="/path/to/related.py",
|
|
range=Range(
|
|
start_line=30,
|
|
start_character=0,
|
|
end_line=40,
|
|
end_character=0,
|
|
),
|
|
raw_code="def helper_func(): pass",
|
|
docstring="Helper function",
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_code_symbol_node_2() -> CodeSymbolNode:
|
|
"""Create another sample CodeSymbolNode."""
|
|
return CodeSymbolNode(
|
|
id="/path/to/util.py:validate:50",
|
|
name="validate",
|
|
kind="function",
|
|
file_path="/path/to/util.py",
|
|
range=Range(
|
|
start_line=50,
|
|
start_character=0,
|
|
end_line=60,
|
|
end_character=0,
|
|
),
|
|
raw_code="def validate(): pass",
|
|
docstring="Validation function",
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_search_engine() -> HybridSearchEngine:
|
|
"""Create a HybridSearchEngine with default settings."""
|
|
return HybridSearchEngine()
|
|
|
|
|
|
def create_mock_graph_with_seed_and_related(
|
|
seed_result: SearchResult,
|
|
related_nodes: List[CodeSymbolNode],
|
|
) -> CodeAssociationGraph:
|
|
"""Helper to create a mock graph with seed and related nodes."""
|
|
graph = CodeAssociationGraph()
|
|
|
|
# Add seed node
|
|
seed_node_id = f"{seed_result.path}:{seed_result.symbol_name or 'unknown'}:{seed_result.start_line or 0}"
|
|
seed_node = CodeSymbolNode(
|
|
id=seed_node_id,
|
|
name=seed_result.symbol_name or "unknown",
|
|
kind=seed_result.symbol_kind or "unknown",
|
|
file_path=seed_result.path,
|
|
range=Range(
|
|
start_line=seed_result.start_line or 1,
|
|
start_character=0,
|
|
end_line=seed_result.end_line or 1,
|
|
end_character=0,
|
|
),
|
|
)
|
|
graph.add_node(seed_node)
|
|
|
|
# Add related nodes
|
|
for node in related_nodes:
|
|
graph.add_node(node)
|
|
|
|
return graph
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# P0: Critical Tests
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
class TestP0CriticalLspSearch:
|
|
"""P0 Critical: Core E2E tests for LSP graph search."""
|
|
|
|
def test_e2e_lsp_search_vector_seed_success(
|
|
self,
|
|
tmp_index_path: Path,
|
|
sample_search_result: SearchResult,
|
|
sample_code_symbol_node: CodeSymbolNode,
|
|
sample_code_symbol_node_2: CodeSymbolNode,
|
|
) -> None:
|
|
"""Test E2E LSP search with vector providing seed, returning graph-expanded results.
|
|
|
|
Input: query="authentication flow"
|
|
Mock: _search_vector returns 1 SearchResult as seed
|
|
Mock: LspBridge/LspGraphBuilder returns 2 related symbols
|
|
Assert: Returns 2 new results (seed is filtered from final results)
|
|
"""
|
|
engine = HybridSearchEngine()
|
|
|
|
# Create mock graph with seed and 2 related nodes
|
|
mock_graph = create_mock_graph_with_seed_and_related(
|
|
sample_search_result,
|
|
[sample_code_symbol_node, sample_code_symbol_node_2],
|
|
)
|
|
|
|
# Patch seed search methods
|
|
with patch.object(
|
|
engine, "_search_vector", return_value=[sample_search_result]
|
|
) as mock_vector, patch.object(
|
|
engine, "_search_exact", return_value=[]
|
|
):
|
|
# Patch LSP module at the import location
|
|
with patch.dict("sys.modules", {"codexlens.lsp": MagicMock()}):
|
|
# Patch the module-level HAS_LSP check
|
|
with patch("codexlens.search.hybrid_search.HAS_LSP", True):
|
|
# Create mock LspBridge class
|
|
mock_bridge_instance = AsyncMock()
|
|
mock_bridge_class = MagicMock()
|
|
mock_bridge_class.return_value.__aenter__ = AsyncMock(
|
|
return_value=mock_bridge_instance
|
|
)
|
|
mock_bridge_class.return_value.__aexit__ = AsyncMock(
|
|
return_value=None
|
|
)
|
|
|
|
# Create mock LspGraphBuilder
|
|
async def mock_build(seeds, bridge):
|
|
return mock_graph
|
|
|
|
mock_builder_instance = MagicMock()
|
|
mock_builder_instance.build_from_seeds = mock_build
|
|
mock_builder_class = MagicMock(return_value=mock_builder_instance)
|
|
|
|
# Patch at module level
|
|
with patch(
|
|
"codexlens.search.hybrid_search.LspBridge",
|
|
mock_bridge_class,
|
|
), patch(
|
|
"codexlens.search.hybrid_search.LspGraphBuilder",
|
|
mock_builder_class,
|
|
):
|
|
results = engine._search_lsp_graph(
|
|
index_path=tmp_index_path,
|
|
query="authentication flow",
|
|
limit=10,
|
|
max_depth=1,
|
|
max_nodes=20,
|
|
)
|
|
|
|
# Verify vector search was called first
|
|
mock_vector.assert_called_once()
|
|
|
|
# Should return 2 results (the two non-seed nodes)
|
|
assert len(results) == 2
|
|
|
|
# Verify seed is not in results
|
|
seed_node_id = f"{sample_search_result.path}:{sample_search_result.symbol_name or 'unknown'}:{sample_search_result.start_line or 0}"
|
|
result_node_ids = {
|
|
f"{r.path}:{r.symbol_name or 'unknown'}:{r.start_line or 0}"
|
|
for r in results
|
|
}
|
|
assert seed_node_id not in result_node_ids
|
|
|
|
# Verify the returned results are the graph-expanded nodes
|
|
result_paths = {r.path for r in results}
|
|
assert sample_code_symbol_node.file_path in result_paths
|
|
assert sample_code_symbol_node_2.file_path in result_paths
|
|
|
|
def test_seed_fallback_chain_vector_fails_fts_succeeds(
|
|
self,
|
|
tmp_index_path: Path,
|
|
sample_search_result: SearchResult,
|
|
sample_code_symbol_node: CodeSymbolNode,
|
|
) -> None:
|
|
"""Test seed fallback chain: vector -> exact.
|
|
|
|
Input: query="init_db"
|
|
Mock: _search_vector returns []
|
|
Mock: _search_exact returns 1 seed
|
|
Assert: Fallback chain called in order, uses exact's seed
|
|
"""
|
|
engine = HybridSearchEngine()
|
|
|
|
call_order: List[str] = []
|
|
|
|
def track_vector(*args, **kwargs):
|
|
call_order.append("vector")
|
|
return []
|
|
|
|
def track_exact(*args, **kwargs):
|
|
call_order.append("exact")
|
|
return [sample_search_result]
|
|
|
|
# Create mock graph
|
|
mock_graph = create_mock_graph_with_seed_and_related(
|
|
sample_search_result,
|
|
[sample_code_symbol_node],
|
|
)
|
|
|
|
with patch.object(
|
|
engine, "_search_vector", side_effect=track_vector
|
|
) as mock_vector, patch.object(
|
|
engine, "_search_exact", side_effect=track_exact
|
|
) as mock_exact:
|
|
with patch("codexlens.search.hybrid_search.HAS_LSP", True):
|
|
# Create mock LspBridge class
|
|
mock_bridge_instance = AsyncMock()
|
|
mock_bridge_class = MagicMock()
|
|
mock_bridge_class.return_value.__aenter__ = AsyncMock(
|
|
return_value=mock_bridge_instance
|
|
)
|
|
mock_bridge_class.return_value.__aexit__ = AsyncMock(
|
|
return_value=None
|
|
)
|
|
|
|
# Create mock LspGraphBuilder
|
|
async def mock_build(seeds, bridge):
|
|
return mock_graph
|
|
|
|
mock_builder_instance = MagicMock()
|
|
mock_builder_instance.build_from_seeds = mock_build
|
|
mock_builder_class = MagicMock(return_value=mock_builder_instance)
|
|
|
|
with patch(
|
|
"codexlens.search.hybrid_search.LspBridge",
|
|
mock_bridge_class,
|
|
), patch(
|
|
"codexlens.search.hybrid_search.LspGraphBuilder",
|
|
mock_builder_class,
|
|
):
|
|
results = engine._search_lsp_graph(
|
|
index_path=tmp_index_path,
|
|
query="init_db",
|
|
limit=10,
|
|
max_depth=1,
|
|
max_nodes=20,
|
|
)
|
|
|
|
# Verify fallback chain order: vector -> exact
|
|
assert call_order == ["vector", "exact"]
|
|
|
|
# Both methods should be called
|
|
mock_vector.assert_called_once()
|
|
mock_exact.assert_called_once()
|
|
|
|
# Should return results from graph expansion (1 related node)
|
|
assert len(results) == 1
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# P1: Important Tests
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
class TestP1ImportantLspSearch:
|
|
"""P1 Important: Edge case tests for LSP graph search."""
|
|
|
|
def test_e2e_lsp_search_no_seeds_found(
|
|
self,
|
|
tmp_index_path: Path,
|
|
) -> None:
|
|
"""Test LSP search when no seeds found from any source.
|
|
|
|
Input: query="non_existent_symbol"
|
|
Mock: All seed search methods return []
|
|
Assert: Returns [], LspBridge is not called
|
|
"""
|
|
engine = HybridSearchEngine()
|
|
|
|
with patch.object(
|
|
engine, "_search_vector", return_value=[]
|
|
) as mock_vector, patch.object(
|
|
engine, "_search_exact", return_value=[]
|
|
) as mock_exact:
|
|
with patch("codexlens.search.hybrid_search.HAS_LSP", True):
|
|
# LspBridge should NOT be called when no seeds
|
|
mock_bridge_class = MagicMock()
|
|
|
|
with patch(
|
|
"codexlens.search.hybrid_search.LspBridge",
|
|
mock_bridge_class,
|
|
):
|
|
results = engine._search_lsp_graph(
|
|
index_path=tmp_index_path,
|
|
query="non_existent_symbol",
|
|
limit=10,
|
|
max_depth=1,
|
|
max_nodes=20,
|
|
)
|
|
|
|
# All search methods should be tried
|
|
mock_vector.assert_called_once()
|
|
mock_exact.assert_called_once()
|
|
|
|
# Should return empty list
|
|
assert results == []
|
|
|
|
# LspBridge should not be instantiated (no seeds)
|
|
mock_bridge_class.assert_not_called()
|
|
|
|
def test_e2e_lsp_search_bridge_fails(
|
|
self,
|
|
tmp_index_path: Path,
|
|
sample_search_result: SearchResult,
|
|
caplog: pytest.LogCaptureFixture,
|
|
) -> None:
|
|
"""Test graceful degradation when LspBridge connection fails.
|
|
|
|
Mock: Seed search returns valid seed
|
|
Mock: LspBridge raises exception during expansion
|
|
Assert: Returns [], error handled gracefully
|
|
"""
|
|
engine = HybridSearchEngine()
|
|
|
|
with patch.object(
|
|
engine, "_search_vector", return_value=[sample_search_result]
|
|
):
|
|
with patch("codexlens.search.hybrid_search.HAS_LSP", True):
|
|
# Make LspBridge raise an error during async context
|
|
mock_bridge_class = MagicMock()
|
|
mock_bridge_class.return_value.__aenter__ = AsyncMock(
|
|
side_effect=Exception("Connection refused")
|
|
)
|
|
mock_bridge_class.return_value.__aexit__ = AsyncMock(
|
|
return_value=None
|
|
)
|
|
|
|
mock_builder_class = MagicMock()
|
|
|
|
with patch(
|
|
"codexlens.search.hybrid_search.LspBridge",
|
|
mock_bridge_class,
|
|
), patch(
|
|
"codexlens.search.hybrid_search.LspGraphBuilder",
|
|
mock_builder_class,
|
|
):
|
|
with caplog.at_level(logging.DEBUG):
|
|
results = engine._search_lsp_graph(
|
|
index_path=tmp_index_path,
|
|
query="authentication",
|
|
limit=10,
|
|
max_depth=1,
|
|
max_nodes=20,
|
|
)
|
|
|
|
# Should return empty list on failure
|
|
assert results == []
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# P2: Supplementary Tests
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
class TestP2SupplementaryLspSearch:
|
|
"""P2 Supplementary: Deduplication and edge cases."""
|
|
|
|
def test_result_deduping_seed_not_returned(
|
|
self,
|
|
tmp_index_path: Path,
|
|
sample_search_result: SearchResult,
|
|
) -> None:
|
|
"""Test that seed results are deduplicated from final output.
|
|
|
|
Mock: Seed search returns SearchResult(path="a.py", symbol_name="foo")
|
|
Mock: LspBridge also returns same symbol in graph
|
|
Assert: Final results do not contain duplicate seed symbol
|
|
"""
|
|
engine = HybridSearchEngine()
|
|
|
|
# Create a different node that should be returned
|
|
different_node = CodeSymbolNode(
|
|
id="/different/path.py:other_func:100",
|
|
name="other_func",
|
|
kind="function",
|
|
file_path="/different/path.py",
|
|
range=Range(
|
|
start_line=100,
|
|
start_character=0,
|
|
end_line=110,
|
|
end_character=0,
|
|
),
|
|
raw_code="def other_func(): pass",
|
|
docstring="Other function",
|
|
)
|
|
|
|
# Create mock graph with seed and one different node
|
|
mock_graph = create_mock_graph_with_seed_and_related(
|
|
sample_search_result,
|
|
[different_node],
|
|
)
|
|
|
|
with patch.object(
|
|
engine, "_search_vector", return_value=[sample_search_result]
|
|
):
|
|
with patch("codexlens.search.hybrid_search.HAS_LSP", True):
|
|
mock_bridge_instance = AsyncMock()
|
|
mock_bridge_class = MagicMock()
|
|
mock_bridge_class.return_value.__aenter__ = AsyncMock(
|
|
return_value=mock_bridge_instance
|
|
)
|
|
mock_bridge_class.return_value.__aexit__ = AsyncMock(
|
|
return_value=None
|
|
)
|
|
|
|
async def mock_build(seeds, bridge):
|
|
return mock_graph
|
|
|
|
mock_builder_instance = MagicMock()
|
|
mock_builder_instance.build_from_seeds = mock_build
|
|
mock_builder_class = MagicMock(return_value=mock_builder_instance)
|
|
|
|
with patch(
|
|
"codexlens.search.hybrid_search.LspBridge",
|
|
mock_bridge_class,
|
|
), patch(
|
|
"codexlens.search.hybrid_search.LspGraphBuilder",
|
|
mock_builder_class,
|
|
):
|
|
results = engine._search_lsp_graph(
|
|
index_path=tmp_index_path,
|
|
query="test query",
|
|
limit=10,
|
|
max_depth=1,
|
|
max_nodes=20,
|
|
)
|
|
|
|
# Should only return 1 result (the different node, not the seed)
|
|
assert len(results) == 1
|
|
|
|
# The seed should NOT be in results
|
|
result_paths = [r.path for r in results]
|
|
assert sample_search_result.path not in result_paths
|
|
|
|
# The different node should be in results
|
|
assert "/different/path.py" in result_paths
|
|
|
|
def test_lsp_not_available_returns_empty(
|
|
self,
|
|
tmp_index_path: Path,
|
|
) -> None:
|
|
"""Test that _search_lsp_graph returns [] when LSP dependencies unavailable."""
|
|
engine = HybridSearchEngine()
|
|
|
|
with patch("codexlens.search.hybrid_search.HAS_LSP", False):
|
|
results = engine._search_lsp_graph(
|
|
index_path=tmp_index_path,
|
|
query="test",
|
|
limit=10,
|
|
max_depth=1,
|
|
max_nodes=20,
|
|
)
|
|
|
|
assert results == []
|
|
|
|
def test_graph_with_no_new_nodes_returns_empty(
|
|
self,
|
|
tmp_index_path: Path,
|
|
sample_search_result: SearchResult,
|
|
) -> None:
|
|
"""Test when graph only contains seed nodes (no expansion)."""
|
|
engine = HybridSearchEngine()
|
|
|
|
# Create graph with ONLY the seed node (no related nodes)
|
|
mock_graph = create_mock_graph_with_seed_and_related(
|
|
sample_search_result,
|
|
[], # No related nodes
|
|
)
|
|
|
|
with patch.object(
|
|
engine, "_search_vector", return_value=[sample_search_result]
|
|
):
|
|
with patch("codexlens.search.hybrid_search.HAS_LSP", True):
|
|
mock_bridge_instance = AsyncMock()
|
|
mock_bridge_class = MagicMock()
|
|
mock_bridge_class.return_value.__aenter__ = AsyncMock(
|
|
return_value=mock_bridge_instance
|
|
)
|
|
mock_bridge_class.return_value.__aexit__ = AsyncMock(
|
|
return_value=None
|
|
)
|
|
|
|
async def mock_build(seeds, bridge):
|
|
return mock_graph
|
|
|
|
mock_builder_instance = MagicMock()
|
|
mock_builder_instance.build_from_seeds = mock_build
|
|
mock_builder_class = MagicMock(return_value=mock_builder_instance)
|
|
|
|
with patch(
|
|
"codexlens.search.hybrid_search.LspBridge",
|
|
mock_bridge_class,
|
|
), patch(
|
|
"codexlens.search.hybrid_search.LspGraphBuilder",
|
|
mock_builder_class,
|
|
):
|
|
results = engine._search_lsp_graph(
|
|
index_path=tmp_index_path,
|
|
query="test",
|
|
limit=10,
|
|
max_depth=1,
|
|
max_nodes=20,
|
|
)
|
|
|
|
# Should return empty since all nodes are seeds (filtered out)
|
|
assert results == []
|