Files
Claude-Code-Workflow/codex-lens/tests/test_chain_search.py
catlog22 71faaf43a8 refactor: 移除 SPLADE 和 hybrid_cascade,精简搜索架构
删除 SPLADE 稀疏神经搜索后端和 hybrid_cascade 策略,
将搜索架构从 6 种后端简化为 4 种(FTS Exact/Fuzzy, Binary Vector, Dense Vector, LSP)。

主要变更:
- 删除 splade_encoder.py, splade_index.py, migration_009 等 4 个文件
- 移除 config.py 中 SPLADE 相关配置(enable_splade, splade_model 等)
- DEFAULT_WEIGHTS 改为 FTS 权重 {exact:0.25, fuzzy:0.1, vector:0.5, lsp:0.15}
- 删除 hybrid_cascade_search(),所有 cascade fallback 改为 self.search()
- API fusion_strategy='hybrid' 向后兼容映射到 binary_rerank
- 删除 CLI index_splade/splade_status 命令和 --method splade
- 更新测试、基准测试和文档
2026-02-08 12:07:41 +08:00

158 lines
6.3 KiB
Python

import logging
import os
import tempfile
from pathlib import Path
from unittest.mock import MagicMock
import pytest
from codexlens.config import Config
from codexlens.entities import Symbol
from codexlens.search.chain_search import ChainSearchEngine, SearchOptions
from codexlens.storage.global_index import GlobalSymbolIndex
from codexlens.storage.path_mapper import PathMapper
from codexlens.storage.registry import RegistryStore
@pytest.fixture()
def temp_paths():
tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True)
root = Path(tmpdir.name)
yield root
try:
tmpdir.cleanup()
except (PermissionError, OSError):
pass
def test_symbol_filtering_handles_path_failures(monkeypatch: pytest.MonkeyPatch, caplog, temp_paths: Path) -> None:
project_root = temp_paths / "project"
(project_root / "src").mkdir(parents=True, exist_ok=True)
index_root = temp_paths / "indexes"
mapper = PathMapper(index_root=index_root)
index_db_path = mapper.source_to_index_db(project_root)
index_db_path.parent.mkdir(parents=True, exist_ok=True)
index_db_path.write_text("", encoding="utf-8") # existence is enough for _find_start_index
registry = RegistryStore(db_path=temp_paths / "registry.db")
registry.initialize()
project_info = registry.register_project(project_root, mapper.source_to_index_dir(project_root))
global_db_path = project_info.index_root / GlobalSymbolIndex.DEFAULT_DB_NAME
global_index = GlobalSymbolIndex(global_db_path, project_id=project_info.id)
global_index.initialize()
valid_file = project_root / "src" / "auth.py"
valid_sym = Symbol(name="AuthManager", kind="class", range=(1, 2), file=str(valid_file))
bad_null = Symbol(name="BadNull", kind="class", range=(1, 2), file="bad\0path.py")
bad_relative = Symbol(name="BadRelative", kind="class", range=(1, 2), file="relative/path.py")
candidates = [valid_sym, bad_null, bad_relative]
if os.name == "nt":
root_drive, _ = os.path.splitdrive(str(project_root.resolve()))
other_drive = "C:" if root_drive.lower() != "c:" else "D:"
candidates.append(
Symbol(name="CrossDrive", kind="class", range=(1, 2), file=f"{other_drive}\\other\\file.py")
)
def fake_search(self, name: str, kind=None, limit: int = 20, prefix_mode: bool = False):
return candidates
monkeypatch.setattr(GlobalSymbolIndex, "search", fake_search)
config = Config(data_dir=temp_paths / "data", global_symbol_index_enabled=True)
engine = ChainSearchEngine(registry, mapper, config=config)
engine._search_symbols_parallel = MagicMock(side_effect=AssertionError("should not traverse chain"))
caplog.set_level(logging.DEBUG, logger="codexlens.search.chain_search")
symbols = engine.search_symbols(
"Auth",
project_root,
options=SearchOptions(depth=5, total_limit=10),
)
assert [s.name for s in symbols] == ["AuthManager"]
assert "BadNull" in caplog.text
assert "BadRelative" in caplog.text
if os.name == "nt":
assert "CrossDrive" in caplog.text
def test_cascade_search_strategy_routing(temp_paths: Path) -> None:
"""Test cascade_search() routes to correct strategy implementation."""
from unittest.mock import patch
from codexlens.search.chain_search import ChainSearchResult, SearchStats
registry = RegistryStore(db_path=temp_paths / "registry.db")
registry.initialize()
mapper = PathMapper(index_root=temp_paths / "indexes")
config = Config(data_dir=temp_paths / "data")
engine = ChainSearchEngine(registry, mapper, config=config)
source_path = temp_paths / "src"
# Test strategy='staged' routing
with patch.object(engine, "staged_cascade_search") as mock_staged:
mock_staged.return_value = ChainSearchResult(
query="query", results=[], symbols=[], stats=SearchStats()
)
engine.cascade_search("query", source_path, strategy="staged")
mock_staged.assert_called_once()
# Test strategy='binary' routing
with patch.object(engine, "binary_cascade_search") as mock_binary:
mock_binary.return_value = ChainSearchResult(
query="query", results=[], symbols=[], stats=SearchStats()
)
engine.cascade_search("query", source_path, strategy="binary")
mock_binary.assert_called_once()
# Test strategy='binary_rerank' routing
with patch.object(engine, "binary_rerank_cascade_search") as mock_br:
mock_br.return_value = ChainSearchResult(
query="query", results=[], symbols=[], stats=SearchStats()
)
engine.cascade_search("query", source_path, strategy="binary_rerank")
mock_br.assert_called_once()
# Test strategy='dense_rerank' routing
with patch.object(engine, "dense_rerank_cascade_search") as mock_dr:
mock_dr.return_value = ChainSearchResult(
query="query", results=[], symbols=[], stats=SearchStats()
)
engine.cascade_search("query", source_path, strategy="dense_rerank")
mock_dr.assert_called_once()
# Test default routing (no strategy specified) - defaults to binary
with patch.object(engine, "binary_cascade_search") as mock_default:
mock_default.return_value = ChainSearchResult(
query="query", results=[], symbols=[], stats=SearchStats()
)
engine.cascade_search("query", source_path)
mock_default.assert_called_once()
def test_cascade_search_invalid_strategy(temp_paths: Path) -> None:
"""Test cascade_search() defaults to 'binary' for invalid strategy."""
from unittest.mock import patch
from codexlens.search.chain_search import ChainSearchResult, SearchStats
registry = RegistryStore(db_path=temp_paths / "registry.db")
registry.initialize()
mapper = PathMapper(index_root=temp_paths / "indexes")
config = Config(data_dir=temp_paths / "data")
engine = ChainSearchEngine(registry, mapper, config=config)
source_path = temp_paths / "src"
# Invalid strategy should default to binary
with patch.object(engine, "binary_cascade_search") as mock_binary:
mock_binary.return_value = ChainSearchResult(
query="query", results=[], symbols=[], stats=SearchStats()
)
engine.cascade_search("query", source_path, strategy="invalid_strategy")
mock_binary.assert_called_once()