refactor: 移除 SPLADE 和 hybrid_cascade，精简搜索架构

删除 SPLADE 稀疏神经搜索后端和 hybrid_cascade 策略，将搜索架构从 6 种后端简化为 4 种（FTS Exact/Fuzzy, Binary Vector, Dense Vector, LSP）。主要变更： - 删除 splade_encoder.py, splade_index.py, migration_009 等 4 个文件 - 移除 config.py 中 SPLADE 相关配置（enable_splade, splade_model 等） - DEFAULT_WEIGHTS 改为 FTS 权重 {exact:0.25, fuzzy:0.1, vector:0.5, lsp:0.15} - 删除 hybrid_cascade_search()，所有 cascade fallback 改为 self.search() - API fusion_strategy='hybrid' 向后兼容映射到 binary_rerank - 删除 CLI index_splade/splade_status 命令和 --method splade - 更新测试、基准测试和文档
2026-02-11 02:33:51 +08:00 · 2026-02-08 12:07:41 +08:00
parent 72d2ae750b
commit 71faaf43a8
22 changed files with 126 additions and 2883 deletions
--- a/codex-lens/tests/api/test_semantic_search.py
+++ b/codex-lens/tests/api/test_semantic_search.py
@@ -427,12 +427,12 @@ class TestFusionStrategyMapping:

        mock_engine.binary_cascade_search.assert_called_once()

-    def test_hybrid_strategy_calls_hybrid_cascade_search(self):
-        """Test that hybrid strategy maps to hybrid_cascade_search."""
+    def test_hybrid_strategy_maps_to_binary_rerank(self):
+        """Test that hybrid strategy maps to binary_rerank_cascade_search (backward compat)."""
        from codexlens.api.semantic import _execute_search

        mock_engine = MagicMock()
-        mock_engine.hybrid_cascade_search.return_value = MagicMock(results=[])
+        mock_engine.binary_rerank_cascade_search.return_value = MagicMock(results=[])
        mock_options = MagicMock()

        _execute_search(
@@ -444,7 +444,7 @@ class TestFusionStrategyMapping:
            limit=20,
        )

-        mock_engine.hybrid_cascade_search.assert_called_once()
+        mock_engine.binary_rerank_cascade_search.assert_called_once()

    def test_unknown_strategy_defaults_to_rrf(self):
        """Test that unknown strategy defaults to standard search (rrf)."""
--- a/codex-lens/tests/integration/test_lsp_search_integration.py
+++ b/codex-lens/tests/integration/test_lsp_search_integration.py
@@ -1,7 +1,7 @@
 """Integration tests for HybridSearchEngine LSP graph search.

 Tests the _search_lsp_graph method which orchestrates:
-1. Seed retrieval via vector/splade/exact fallback chain
+1. Seed retrieval via vector/exact fallback chain
 2. LSP graph expansion via LspBridge and LspGraphBuilder
 3. Result deduplication and merging

@@ -184,8 +184,6 @@ class TestP0CriticalLspSearch:
        with patch.object(
            engine, "_search_vector", return_value=[sample_search_result]
        ) as mock_vector, patch.object(
-            engine, "_search_splade", return_value=[]
-        ), patch.object(
            engine, "_search_exact", return_value=[]
        ):
            # Patch LSP module at the import location
@@ -251,11 +249,10 @@ class TestP0CriticalLspSearch:
        sample_search_result: SearchResult,
        sample_code_symbol_node: CodeSymbolNode,
    ) -> None:
-        """Test seed fallback chain: vector -> splade -> exact.
+        """Test seed fallback chain: vector -> exact.

        Input: query="init_db"
        Mock: _search_vector returns []
-        Mock: _search_splade returns []
        Mock: _search_exact returns 1 seed
        Assert: Fallback chain called in order, uses exact's seed
        """
@@ -267,10 +264,6 @@ class TestP0CriticalLspSearch:
            call_order.append("vector")
            return []

-        def track_splade(*args, **kwargs):
-            call_order.append("splade")
-            return []
-
        def track_exact(*args, **kwargs):
            call_order.append("exact")
            return [sample_search_result]
@@ -284,8 +277,6 @@ class TestP0CriticalLspSearch:
        with patch.object(
            engine, "_search_vector", side_effect=track_vector
        ) as mock_vector, patch.object(
-            engine, "_search_splade", side_effect=track_splade
-        ) as mock_splade, patch.object(
            engine, "_search_exact", side_effect=track_exact
        ) as mock_exact:
            with patch("codexlens.search.hybrid_search.HAS_LSP", True):
@@ -322,12 +313,11 @@ class TestP0CriticalLspSearch:
                        max_nodes=20,
                    )

-        # Verify fallback chain order: vector -> splade -> exact
-        assert call_order == ["vector", "splade", "exact"]
+        # Verify fallback chain order: vector -> exact
+        assert call_order == ["vector", "exact"]

-        # All three methods should be called
+        # Both methods should be called
        mock_vector.assert_called_once()
-        mock_splade.assert_called_once()
        mock_exact.assert_called_once()

        # Should return results from graph expansion (1 related node)
@@ -357,8 +347,6 @@ class TestP1ImportantLspSearch:
        with patch.object(
            engine, "_search_vector", return_value=[]
        ) as mock_vector, patch.object(
-            engine, "_search_splade", return_value=[]
-        ) as mock_splade, patch.object(
            engine, "_search_exact", return_value=[]
        ) as mock_exact:
            with patch("codexlens.search.hybrid_search.HAS_LSP", True):
@@ -379,7 +367,6 @@ class TestP1ImportantLspSearch:

        # All search methods should be tried
        mock_vector.assert_called_once()
-        mock_splade.assert_called_once()
        mock_exact.assert_called_once()

        # Should return empty list
--- a/codex-lens/tests/real/test_lsp_real_interface.py
+++ b/codex-lens/tests/real/test_lsp_real_interface.py
@@ -303,7 +303,7 @@ class TestRealHybridSearchIntegrationStandalone:
        """Test the full LSP search pipeline with real LSP."""
        print(f"\n>>> Testing full LSP search pipeline")

-        # Create mock seeds (normally from vector/splade search)
+        # Create mock seeds (normally from vector/FTS search)
        seeds = [
            CodeSymbolNode(
                id=f"{TEST_PYTHON_FILE}:LspBridge:96",
--- a/codex-lens/tests/test_chain_search.py
+++ b/codex-lens/tests/test_chain_search.py
@@ -109,14 +109,6 @@ def test_cascade_search_strategy_routing(temp_paths: Path) -> None:
        engine.cascade_search("query", source_path, strategy="binary")
        mock_binary.assert_called_once()

-    # Test strategy='hybrid' routing
-    with patch.object(engine, "hybrid_cascade_search") as mock_hybrid:
-        mock_hybrid.return_value = ChainSearchResult(
-            query="query", results=[], symbols=[], stats=SearchStats()
-        )
-        engine.cascade_search("query", source_path, strategy="hybrid")
-        mock_hybrid.assert_called_once()
-
    # Test strategy='binary_rerank' routing
    with patch.object(engine, "binary_rerank_cascade_search") as mock_br:
        mock_br.return_value = ChainSearchResult(
--- a/codex-lens/tests/test_staged_cascade.py
+++ b/codex-lens/tests/test_staged_cascade.py
@@ -576,20 +576,20 @@ class TestStagedCascadeIntegration:
                                # Verify stage 4 was called
                                mock_stage4.assert_called_once()

-    def test_staged_cascade_fallback_to_hybrid(
+    def test_staged_cascade_fallback_to_search(
        self, mock_registry, mock_mapper, mock_config, temp_paths
    ):
-        """Test staged_cascade_search falls back to hybrid when numpy unavailable."""
+        """Test staged_cascade_search falls back to standard search when numpy unavailable."""
        engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config)

        with patch("codexlens.search.chain_search.NUMPY_AVAILABLE", False):
-            with patch.object(engine, "hybrid_cascade_search") as mock_hybrid:
-                mock_hybrid.return_value = MagicMock()
+            with patch.object(engine, "search") as mock_search:
+                mock_search.return_value = MagicMock()

                engine.staged_cascade_search("query", temp_paths / "src")

-                # Should fall back to hybrid cascade
-                mock_hybrid.assert_called_once()
+                # Should fall back to standard search
+                mock_search.assert_called_once()

    def test_staged_cascade_deduplicates_final_results(
        self, mock_registry, mock_mapper, mock_config, temp_paths
@@ -689,10 +689,10 @@ class TestStagedCascadeGracefulDegradation:
                    # Stage 1 returns no results
                    mock_stage1.return_value = ([], None)

-                    with patch.object(engine, "hybrid_cascade_search") as mock_hybrid:
-                        mock_hybrid.return_value = MagicMock()
+                    with patch.object(engine, "search") as mock_search:
+                        mock_search.return_value = MagicMock()

                        engine.staged_cascade_search("query", temp_paths / "src")

-                        # Should fall back to hybrid when stage 1 fails
-                        mock_hybrid.assert_called_once()
+                        # Should fall back to standard search when stage 1 fails
+                        mock_search.assert_called_once()