refactor: 移除 SPLADE 和 hybrid_cascade,精简搜索架构

删除 SPLADE 稀疏神经搜索后端和 hybrid_cascade 策略,
将搜索架构从 6 种后端简化为 4 种(FTS Exact/Fuzzy, Binary Vector, Dense Vector, LSP)。

主要变更:
- 删除 splade_encoder.py, splade_index.py, migration_009 等 4 个文件
- 移除 config.py 中 SPLADE 相关配置(enable_splade, splade_model 等)
- DEFAULT_WEIGHTS 改为 FTS 权重 {exact:0.25, fuzzy:0.1, vector:0.5, lsp:0.15}
- 删除 hybrid_cascade_search(),所有 cascade fallback 改为 self.search()
- API fusion_strategy='hybrid' 向后兼容映射到 binary_rerank
- 删除 CLI index_splade/splade_status 命令和 --method splade
- 更新测试、基准测试和文档
This commit is contained in:
catlog22
2026-02-08 12:07:41 +08:00
parent 72d2ae750b
commit 71faaf43a8
22 changed files with 126 additions and 2883 deletions

View File

@@ -427,12 +427,12 @@ class TestFusionStrategyMapping:
mock_engine.binary_cascade_search.assert_called_once()
def test_hybrid_strategy_calls_hybrid_cascade_search(self):
"""Test that hybrid strategy maps to hybrid_cascade_search."""
def test_hybrid_strategy_maps_to_binary_rerank(self):
"""Test that hybrid strategy maps to binary_rerank_cascade_search (backward compat)."""
from codexlens.api.semantic import _execute_search
mock_engine = MagicMock()
mock_engine.hybrid_cascade_search.return_value = MagicMock(results=[])
mock_engine.binary_rerank_cascade_search.return_value = MagicMock(results=[])
mock_options = MagicMock()
_execute_search(
@@ -444,7 +444,7 @@ class TestFusionStrategyMapping:
limit=20,
)
mock_engine.hybrid_cascade_search.assert_called_once()
mock_engine.binary_rerank_cascade_search.assert_called_once()
def test_unknown_strategy_defaults_to_rrf(self):
"""Test that unknown strategy defaults to standard search (rrf)."""

View File

@@ -1,7 +1,7 @@
"""Integration tests for HybridSearchEngine LSP graph search.
Tests the _search_lsp_graph method which orchestrates:
1. Seed retrieval via vector/splade/exact fallback chain
1. Seed retrieval via vector/exact fallback chain
2. LSP graph expansion via LspBridge and LspGraphBuilder
3. Result deduplication and merging
@@ -184,8 +184,6 @@ class TestP0CriticalLspSearch:
with patch.object(
engine, "_search_vector", return_value=[sample_search_result]
) as mock_vector, patch.object(
engine, "_search_splade", return_value=[]
), patch.object(
engine, "_search_exact", return_value=[]
):
# Patch LSP module at the import location
@@ -251,11 +249,10 @@ class TestP0CriticalLspSearch:
sample_search_result: SearchResult,
sample_code_symbol_node: CodeSymbolNode,
) -> None:
"""Test seed fallback chain: vector -> splade -> exact.
"""Test seed fallback chain: vector -> exact.
Input: query="init_db"
Mock: _search_vector returns []
Mock: _search_splade returns []
Mock: _search_exact returns 1 seed
Assert: Fallback chain called in order, uses exact's seed
"""
@@ -267,10 +264,6 @@ class TestP0CriticalLspSearch:
call_order.append("vector")
return []
def track_splade(*args, **kwargs):
call_order.append("splade")
return []
def track_exact(*args, **kwargs):
call_order.append("exact")
return [sample_search_result]
@@ -284,8 +277,6 @@ class TestP0CriticalLspSearch:
with patch.object(
engine, "_search_vector", side_effect=track_vector
) as mock_vector, patch.object(
engine, "_search_splade", side_effect=track_splade
) as mock_splade, patch.object(
engine, "_search_exact", side_effect=track_exact
) as mock_exact:
with patch("codexlens.search.hybrid_search.HAS_LSP", True):
@@ -322,12 +313,11 @@ class TestP0CriticalLspSearch:
max_nodes=20,
)
# Verify fallback chain order: vector -> splade -> exact
assert call_order == ["vector", "splade", "exact"]
# Verify fallback chain order: vector -> exact
assert call_order == ["vector", "exact"]
# All three methods should be called
# Both methods should be called
mock_vector.assert_called_once()
mock_splade.assert_called_once()
mock_exact.assert_called_once()
# Should return results from graph expansion (1 related node)
@@ -357,8 +347,6 @@ class TestP1ImportantLspSearch:
with patch.object(
engine, "_search_vector", return_value=[]
) as mock_vector, patch.object(
engine, "_search_splade", return_value=[]
) as mock_splade, patch.object(
engine, "_search_exact", return_value=[]
) as mock_exact:
with patch("codexlens.search.hybrid_search.HAS_LSP", True):
@@ -379,7 +367,6 @@ class TestP1ImportantLspSearch:
# All search methods should be tried
mock_vector.assert_called_once()
mock_splade.assert_called_once()
mock_exact.assert_called_once()
# Should return empty list

View File

@@ -303,7 +303,7 @@ class TestRealHybridSearchIntegrationStandalone:
"""Test the full LSP search pipeline with real LSP."""
print(f"\n>>> Testing full LSP search pipeline")
# Create mock seeds (normally from vector/splade search)
# Create mock seeds (normally from vector/FTS search)
seeds = [
CodeSymbolNode(
id=f"{TEST_PYTHON_FILE}:LspBridge:96",

View File

@@ -109,14 +109,6 @@ def test_cascade_search_strategy_routing(temp_paths: Path) -> None:
engine.cascade_search("query", source_path, strategy="binary")
mock_binary.assert_called_once()
# Test strategy='hybrid' routing
with patch.object(engine, "hybrid_cascade_search") as mock_hybrid:
mock_hybrid.return_value = ChainSearchResult(
query="query", results=[], symbols=[], stats=SearchStats()
)
engine.cascade_search("query", source_path, strategy="hybrid")
mock_hybrid.assert_called_once()
# Test strategy='binary_rerank' routing
with patch.object(engine, "binary_rerank_cascade_search") as mock_br:
mock_br.return_value = ChainSearchResult(

View File

@@ -576,20 +576,20 @@ class TestStagedCascadeIntegration:
# Verify stage 4 was called
mock_stage4.assert_called_once()
def test_staged_cascade_fallback_to_hybrid(
def test_staged_cascade_fallback_to_search(
self, mock_registry, mock_mapper, mock_config, temp_paths
):
"""Test staged_cascade_search falls back to hybrid when numpy unavailable."""
"""Test staged_cascade_search falls back to standard search when numpy unavailable."""
engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config)
with patch("codexlens.search.chain_search.NUMPY_AVAILABLE", False):
with patch.object(engine, "hybrid_cascade_search") as mock_hybrid:
mock_hybrid.return_value = MagicMock()
with patch.object(engine, "search") as mock_search:
mock_search.return_value = MagicMock()
engine.staged_cascade_search("query", temp_paths / "src")
# Should fall back to hybrid cascade
mock_hybrid.assert_called_once()
# Should fall back to standard search
mock_search.assert_called_once()
def test_staged_cascade_deduplicates_final_results(
self, mock_registry, mock_mapper, mock_config, temp_paths
@@ -689,10 +689,10 @@ class TestStagedCascadeGracefulDegradation:
# Stage 1 returns no results
mock_stage1.return_value = ([], None)
with patch.object(engine, "hybrid_cascade_search") as mock_hybrid:
mock_hybrid.return_value = MagicMock()
with patch.object(engine, "search") as mock_search:
mock_search.return_value = MagicMock()
engine.staged_cascade_search("query", temp_paths / "src")
# Should fall back to hybrid when stage 1 fails
mock_hybrid.assert_called_once()
# Should fall back to standard search when stage 1 fails
mock_search.assert_called_once()