mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-10 02:24:35 +08:00
368 lines
14 KiB
Python
368 lines
14 KiB
Python
"""Unit tests for HybridSearchEngine - parallel search and RRF fusion.
|
|
|
|
Tests cover:
|
|
- search: exact only, fuzzy enabled, vector enabled, pure vector mode
|
|
- search: RRF fusion, empty query, no results, reranking, category filtering
|
|
- _search_parallel: parallel backend execution
|
|
- _search_lsp_graph: LSP graph expansion with seeds, vector-to-FTS fallback
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import tempfile
|
|
from pathlib import Path
|
|
from typing import Dict, List
|
|
from unittest.mock import MagicMock, Mock, patch, PropertyMock
|
|
|
|
import pytest
|
|
|
|
from codexlens.config import Config
|
|
from codexlens.entities import SearchResult
|
|
from codexlens.search.hybrid_search import HybridSearchEngine
|
|
|
|
|
|
# =============================================================================
|
|
# Test Fixtures
|
|
# =============================================================================
|
|
|
|
|
|
@pytest.fixture
|
|
def temp_paths():
|
|
"""Create temporary directory structure with a mock index."""
|
|
tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True)
|
|
root = Path(tmpdir.name)
|
|
# Create a non-empty index file to pass the empty-file guard
|
|
index_path = root / "_index.db"
|
|
index_path.write_bytes(b"\x00" * 100)
|
|
yield root
|
|
try:
|
|
tmpdir.cleanup()
|
|
except (PermissionError, OSError):
|
|
pass
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_config():
|
|
"""Create mock config for hybrid search."""
|
|
config = MagicMock(spec=Config)
|
|
config.embedding_use_gpu = False
|
|
config.enable_reranking = False
|
|
config.enable_cross_encoder_rerank = False
|
|
config.symbol_boost_factor = 1.5
|
|
config.fusion_method = "rrf"
|
|
config.rrf_k = 60
|
|
config.enable_category_filter = True
|
|
return config
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_results() -> List[SearchResult]:
|
|
"""Create sample search results."""
|
|
return [
|
|
SearchResult(
|
|
path="auth.py",
|
|
score=0.9,
|
|
excerpt="def authenticate(user):",
|
|
symbol_name="authenticate",
|
|
symbol_kind="function",
|
|
),
|
|
SearchResult(
|
|
path="login.py",
|
|
score=0.7,
|
|
excerpt="class LoginHandler:",
|
|
symbol_name="LoginHandler",
|
|
symbol_kind="class",
|
|
),
|
|
]
|
|
|
|
|
|
# =============================================================================
|
|
# Tests: search with different backends
|
|
# =============================================================================
|
|
|
|
|
|
class TestHybridSearchBackends:
|
|
"""Tests for HybridSearchEngine.search() backend configurations."""
|
|
|
|
def test_search_exact_only(self, temp_paths, mock_config):
|
|
"""Search with only exact FTS backend."""
|
|
engine = HybridSearchEngine(config=mock_config)
|
|
index_path = temp_paths / "_index.db"
|
|
|
|
with patch.object(engine, "_search_parallel") as mock_parallel:
|
|
mock_parallel.return_value = {
|
|
"exact": [
|
|
SearchResult(path="a.py", score=10.0, excerpt="result"),
|
|
]
|
|
}
|
|
|
|
results = engine.search(
|
|
index_path, "test query",
|
|
enable_fuzzy=False, enable_vector=False,
|
|
)
|
|
|
|
assert len(results) == 1
|
|
# Verify only exact backend was requested
|
|
call_args = mock_parallel.call_args
|
|
backends = call_args[0][2] # 3rd positional arg
|
|
assert "exact" in backends
|
|
assert "fuzzy" not in backends
|
|
assert "vector" not in backends
|
|
|
|
def test_search_fuzzy_enabled(self, temp_paths, mock_config):
|
|
"""Search with exact + fuzzy backends."""
|
|
engine = HybridSearchEngine(config=mock_config)
|
|
index_path = temp_paths / "_index.db"
|
|
|
|
with patch.object(engine, "_search_parallel") as mock_parallel:
|
|
mock_parallel.return_value = {
|
|
"exact": [SearchResult(path="a.py", score=10.0, excerpt="exact")],
|
|
"fuzzy": [SearchResult(path="b.py", score=8.0, excerpt="fuzzy")],
|
|
}
|
|
|
|
results = engine.search(
|
|
index_path, "test_query",
|
|
enable_fuzzy=True, enable_vector=False,
|
|
)
|
|
|
|
assert len(results) >= 1
|
|
backends = mock_parallel.call_args[0][2]
|
|
assert "exact" in backends
|
|
assert "fuzzy" in backends
|
|
|
|
def test_search_vector_enabled(self, temp_paths, mock_config):
|
|
"""Search with exact + fuzzy + vector backends."""
|
|
engine = HybridSearchEngine(config=mock_config)
|
|
index_path = temp_paths / "_index.db"
|
|
|
|
with patch.object(engine, "_search_parallel") as mock_parallel:
|
|
mock_parallel.return_value = {
|
|
"exact": [SearchResult(path="a.py", score=10.0, excerpt="exact")],
|
|
"vector": [SearchResult(path="c.py", score=0.85, excerpt="vector")],
|
|
}
|
|
|
|
results = engine.search(
|
|
index_path, "test_query",
|
|
enable_fuzzy=False, enable_vector=True,
|
|
)
|
|
|
|
backends = mock_parallel.call_args[0][2]
|
|
assert "exact" in backends
|
|
assert "vector" in backends
|
|
|
|
def test_search_pure_vector(self, temp_paths, mock_config):
|
|
"""Pure vector mode should only use vector backend."""
|
|
engine = HybridSearchEngine(config=mock_config)
|
|
mock_config.enable_category_filter = False
|
|
index_path = temp_paths / "_index.db"
|
|
|
|
with patch.object(engine, "_search_parallel") as mock_parallel:
|
|
mock_parallel.return_value = {
|
|
"vector": [SearchResult(path="a.py", score=0.9, excerpt="vec")],
|
|
}
|
|
|
|
results = engine.search(
|
|
index_path, "semantic query",
|
|
enable_vector=True, pure_vector=True,
|
|
)
|
|
|
|
backends = mock_parallel.call_args[0][2]
|
|
assert "vector" in backends
|
|
assert "exact" not in backends
|
|
|
|
|
|
# =============================================================================
|
|
# Tests: search fusion and post-processing
|
|
# =============================================================================
|
|
|
|
|
|
class TestHybridSearchFusion:
|
|
"""Tests for RRF fusion, empty query, no results, reranking, filtering."""
|
|
|
|
def test_search_rrf_fusion(self, temp_paths, mock_config):
|
|
"""Results from multiple backends should be fused via RRF."""
|
|
engine = HybridSearchEngine(config=mock_config)
|
|
index_path = temp_paths / "_index.db"
|
|
|
|
with patch.object(engine, "_search_parallel") as mock_parallel:
|
|
mock_parallel.return_value = {
|
|
"exact": [
|
|
SearchResult(path="a.py", score=10.0, excerpt="exact a"),
|
|
SearchResult(path="b.py", score=5.0, excerpt="exact b"),
|
|
],
|
|
"vector": [
|
|
SearchResult(path="b.py", score=0.9, excerpt="vector b"),
|
|
SearchResult(path="c.py", score=0.8, excerpt="vector c"),
|
|
],
|
|
}
|
|
|
|
results = engine.search(
|
|
index_path, "test",
|
|
enable_fuzzy=False, enable_vector=True,
|
|
)
|
|
|
|
# b.py appears in both sources - should have high fusion score
|
|
assert any(r.path == "b.py" for r in results)
|
|
|
|
def test_search_empty_query(self, temp_paths, mock_config):
|
|
"""Empty query should still execute (handled gracefully)."""
|
|
engine = HybridSearchEngine(config=mock_config)
|
|
index_path = temp_paths / "_index.db"
|
|
|
|
with patch.object(engine, "_search_parallel") as mock_parallel:
|
|
mock_parallel.return_value = {"exact": []}
|
|
|
|
results = engine.search(index_path, "", enable_fuzzy=False)
|
|
|
|
assert results == []
|
|
|
|
def test_search_no_results(self, temp_paths, mock_config):
|
|
"""All backends returning empty should produce empty results."""
|
|
engine = HybridSearchEngine(config=mock_config)
|
|
index_path = temp_paths / "_index.db"
|
|
|
|
with patch.object(engine, "_search_parallel") as mock_parallel:
|
|
mock_parallel.return_value = {
|
|
"exact": [],
|
|
"fuzzy": [],
|
|
}
|
|
|
|
results = engine.search(index_path, "nonexistent")
|
|
|
|
assert results == []
|
|
|
|
def test_search_reranking(self, temp_paths, mock_config):
|
|
"""Reranking should be applied when config enables it."""
|
|
mock_config.enable_reranking = True
|
|
mock_config.enable_cross_encoder_rerank = False
|
|
mock_config.reranking_top_k = 50
|
|
engine = HybridSearchEngine(config=mock_config)
|
|
index_path = temp_paths / "_index.db"
|
|
|
|
mock_embedder = MagicMock()
|
|
mock_embedder.embed_single.return_value = [0.1] * 128
|
|
mock_embedder.embed.return_value = [[0.1] * 128]
|
|
engine.embedder = mock_embedder
|
|
|
|
with patch.object(engine, "_search_parallel") as mock_parallel:
|
|
mock_parallel.return_value = {
|
|
"exact": [SearchResult(path="a.py", score=10.0, excerpt="code")],
|
|
}
|
|
|
|
with patch("codexlens.search.hybrid_search.rerank_results") as mock_rerank:
|
|
mock_rerank.return_value = [
|
|
SearchResult(path="a.py", score=0.85, excerpt="code"),
|
|
]
|
|
results = engine.search(index_path, "query", enable_fuzzy=False)
|
|
|
|
mock_rerank.assert_called_once()
|
|
|
|
def test_search_category_filtering(self, temp_paths, mock_config):
|
|
"""Category filtering should separate code/doc results by intent."""
|
|
mock_config.enable_category_filter = True
|
|
engine = HybridSearchEngine(config=mock_config)
|
|
index_path = temp_paths / "_index.db"
|
|
|
|
with patch.object(engine, "_search_parallel") as mock_parallel:
|
|
mock_parallel.return_value = {
|
|
"exact": [
|
|
SearchResult(path="auth.py", score=10.0, excerpt="def auth"),
|
|
SearchResult(path="README.md", score=8.0, excerpt="docs"),
|
|
],
|
|
}
|
|
|
|
# Keyword-like query should filter to code
|
|
results = engine.search(
|
|
index_path, "AuthManager",
|
|
enable_fuzzy=False,
|
|
)
|
|
|
|
paths = [r.path for r in results]
|
|
# Code files should remain, doc files filtered for KEYWORD intent
|
|
assert "auth.py" in paths
|
|
|
|
|
|
# =============================================================================
|
|
# Tests: _search_parallel
|
|
# =============================================================================
|
|
|
|
|
|
class TestSearchParallel:
|
|
"""Tests for _search_parallel() parallel backend execution."""
|
|
|
|
def test_search_parallel_backends(self, temp_paths, mock_config):
|
|
"""Parallel execution should run all requested backends."""
|
|
engine = HybridSearchEngine(config=mock_config)
|
|
index_path = temp_paths / "_index.db"
|
|
|
|
with patch.object(engine, "_search_exact") as mock_exact, \
|
|
patch.object(engine, "_search_fuzzy") as mock_fuzzy:
|
|
mock_exact.return_value = [
|
|
SearchResult(path="a.py", score=10.0, excerpt="exact"),
|
|
]
|
|
mock_fuzzy.return_value = [
|
|
SearchResult(path="b.py", score=8.0, excerpt="fuzzy"),
|
|
]
|
|
|
|
results_map = engine._search_parallel(
|
|
index_path, "query",
|
|
backends={"exact": True, "fuzzy": True},
|
|
limit=10,
|
|
)
|
|
|
|
assert "exact" in results_map
|
|
assert "fuzzy" in results_map
|
|
mock_exact.assert_called_once()
|
|
mock_fuzzy.assert_called_once()
|
|
|
|
|
|
# =============================================================================
|
|
# Tests: _search_lsp_graph
|
|
# =============================================================================
|
|
|
|
|
|
class TestSearchLspGraph:
|
|
"""Tests for _search_lsp_graph() LSP graph expansion."""
|
|
|
|
def test_search_lsp_graph(self, temp_paths, mock_config):
|
|
"""LSP graph search should use seed results for expansion."""
|
|
engine = HybridSearchEngine(config=mock_config)
|
|
index_path = temp_paths / "_index.db"
|
|
|
|
# When HAS_LSP is False, _search_lsp_graph returns []
|
|
with patch("codexlens.search.hybrid_search.HAS_LSP", False):
|
|
results = engine._search_lsp_graph(
|
|
index_path, "auth function", limit=5,
|
|
)
|
|
assert results == []
|
|
|
|
def test_lsp_fallback_vector_to_fts(self, temp_paths, mock_config):
|
|
"""When vector seeds fail, should fall back to FTS seeds."""
|
|
engine = HybridSearchEngine(config=mock_config)
|
|
index_path = temp_paths / "_index.db"
|
|
|
|
with patch("codexlens.search.hybrid_search.HAS_LSP", True):
|
|
# Mock _search_vector to return empty (no seeds from vector)
|
|
with patch.object(engine, "_search_vector", return_value=[]):
|
|
# Mock _search_exact to return seeds
|
|
with patch.object(engine, "_search_exact") as mock_exact:
|
|
mock_exact.return_value = [
|
|
SearchResult(
|
|
path="auth.py", score=10.0,
|
|
excerpt="def auth():", symbol_name="auth",
|
|
start_line=1, end_line=5,
|
|
),
|
|
]
|
|
|
|
# Mock the LSP bridge (will fail on import or async)
|
|
# The function should attempt FTS fallback before LSP expansion
|
|
try:
|
|
results = engine._search_lsp_graph(
|
|
index_path, "auth", limit=5,
|
|
)
|
|
except Exception:
|
|
pass # LSP deps may not be available, but FTS fallback was attempted
|
|
|
|
# Verify FTS was called as fallback
|
|
mock_exact.assert_called_once()
|