mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-09 02:24:11 +08:00
Refactor code structure for improved readability and maintainability
This commit is contained in:
264
codex-lens/tests/api/test_semantic_integration.py
Normal file
264
codex-lens/tests/api/test_semantic_integration.py
Normal file
@@ -0,0 +1,264 @@
|
||||
"""Integration tests for semantic.py API - fusion strategy routing and result transform.
|
||||
|
||||
Tests cover:
|
||||
- _execute_search: Strategy routing for rrf, binary, staged, hybrid (compat), dense_rerank
|
||||
- _transform_results: Score extraction and kind filtering
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
from unittest.mock import MagicMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from codexlens.api.models import SemanticResult
|
||||
from codexlens.api.semantic import _execute_search, _transform_results
|
||||
from codexlens.entities import SearchResult
|
||||
from codexlens.search.chain_search import (
|
||||
ChainSearchEngine,
|
||||
ChainSearchResult,
|
||||
SearchOptions,
|
||||
SearchStats,
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Test Fixtures
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_engine():
|
||||
"""Create mock ChainSearchEngine."""
|
||||
engine = MagicMock(spec=ChainSearchEngine)
|
||||
return engine
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_chain_result():
|
||||
"""Create mock ChainSearchResult with sample data."""
|
||||
return ChainSearchResult(
|
||||
query="test query",
|
||||
results=[
|
||||
SearchResult(
|
||||
path="auth.py",
|
||||
score=0.9,
|
||||
excerpt="def authenticate(user):",
|
||||
symbol_name="authenticate",
|
||||
symbol_kind="function",
|
||||
start_line=10,
|
||||
end_line=20,
|
||||
),
|
||||
SearchResult(
|
||||
path="login.py",
|
||||
score=0.7,
|
||||
excerpt="class LoginHandler:",
|
||||
symbol_name="LoginHandler",
|
||||
symbol_kind="class",
|
||||
start_line=5,
|
||||
end_line=50,
|
||||
),
|
||||
],
|
||||
symbols=[],
|
||||
stats=SearchStats(),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_options():
|
||||
"""Create mock SearchOptions."""
|
||||
return SearchOptions(
|
||||
hybrid_mode=True,
|
||||
enable_vector=True,
|
||||
enable_fuzzy=True,
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests: _execute_search strategy routing
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestExecuteSearchStrategyRouting:
|
||||
"""Tests for _execute_search() fusion strategy routing."""
|
||||
|
||||
def test_fusion_strategy_rrf(self, mock_engine, mock_chain_result, mock_options):
|
||||
"""Default 'rrf' strategy should call engine.search()."""
|
||||
mock_engine.search.return_value = mock_chain_result
|
||||
|
||||
result = _execute_search(
|
||||
engine=mock_engine,
|
||||
query="test",
|
||||
source_path=Path("/project"),
|
||||
fusion_strategy="rrf",
|
||||
options=mock_options,
|
||||
limit=20,
|
||||
)
|
||||
|
||||
mock_engine.search.assert_called_once()
|
||||
assert isinstance(result, ChainSearchResult)
|
||||
|
||||
def test_fusion_strategy_binary(self, mock_engine, mock_chain_result, mock_options):
|
||||
"""'binary' strategy should call engine.binary_cascade_search()."""
|
||||
mock_engine.binary_cascade_search.return_value = mock_chain_result
|
||||
|
||||
result = _execute_search(
|
||||
engine=mock_engine,
|
||||
query="test",
|
||||
source_path=Path("/project"),
|
||||
fusion_strategy="binary",
|
||||
options=mock_options,
|
||||
limit=20,
|
||||
)
|
||||
|
||||
mock_engine.binary_cascade_search.assert_called_once()
|
||||
# Verify k and coarse_k parameters
|
||||
call_kwargs = mock_engine.binary_cascade_search.call_args
|
||||
assert call_kwargs[1]["k"] == 20
|
||||
assert call_kwargs[1]["coarse_k"] == 100 # limit * 5
|
||||
|
||||
def test_fusion_strategy_staged(self, mock_engine, mock_chain_result, mock_options):
|
||||
"""'staged' strategy should call engine.staged_cascade_search()."""
|
||||
mock_engine.staged_cascade_search.return_value = mock_chain_result
|
||||
|
||||
result = _execute_search(
|
||||
engine=mock_engine,
|
||||
query="test",
|
||||
source_path=Path("/project"),
|
||||
fusion_strategy="staged",
|
||||
options=mock_options,
|
||||
limit=20,
|
||||
)
|
||||
|
||||
mock_engine.staged_cascade_search.assert_called_once()
|
||||
|
||||
def test_fusion_strategy_hybrid_compat(
|
||||
self, mock_engine, mock_chain_result, mock_options
|
||||
):
|
||||
"""'hybrid' strategy should map to binary_rerank_cascade_search (backward compat)."""
|
||||
mock_engine.binary_rerank_cascade_search.return_value = mock_chain_result
|
||||
|
||||
result = _execute_search(
|
||||
engine=mock_engine,
|
||||
query="test",
|
||||
source_path=Path("/project"),
|
||||
fusion_strategy="hybrid",
|
||||
options=mock_options,
|
||||
limit=20,
|
||||
)
|
||||
|
||||
mock_engine.binary_rerank_cascade_search.assert_called_once()
|
||||
|
||||
def test_fusion_strategy_dense_rerank(
|
||||
self, mock_engine, mock_chain_result, mock_options
|
||||
):
|
||||
"""'dense_rerank' strategy should call engine.search() (default fallback)."""
|
||||
# In the current implementation, dense_rerank is not explicitly handled,
|
||||
# so it falls through to the default (rrf) branch
|
||||
mock_engine.search.return_value = mock_chain_result
|
||||
|
||||
result = _execute_search(
|
||||
engine=mock_engine,
|
||||
query="test",
|
||||
source_path=Path("/project"),
|
||||
fusion_strategy="dense_rerank",
|
||||
options=mock_options,
|
||||
limit=20,
|
||||
)
|
||||
|
||||
# dense_rerank falls to default (else branch -> engine.search)
|
||||
mock_engine.search.assert_called_once()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests: _transform_results
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestTransformResults:
|
||||
"""Tests for _transform_results()."""
|
||||
|
||||
def test_transform_results_basic(self):
|
||||
"""_transform_results should convert SearchResult to SemanticResult."""
|
||||
results = [
|
||||
SearchResult(
|
||||
path="auth.py",
|
||||
score=0.9,
|
||||
excerpt="def authenticate(user):",
|
||||
symbol_name="authenticate",
|
||||
symbol_kind="function",
|
||||
start_line=10,
|
||||
end_line=20,
|
||||
),
|
||||
SearchResult(
|
||||
path="models.py",
|
||||
score=0.7,
|
||||
excerpt="class UserModel:",
|
||||
symbol_name="UserModel",
|
||||
symbol_kind="class",
|
||||
start_line=1,
|
||||
end_line=30,
|
||||
),
|
||||
]
|
||||
|
||||
semantic_results = _transform_results(
|
||||
results=results,
|
||||
mode="fusion",
|
||||
vector_weight=0.5,
|
||||
structural_weight=0.3,
|
||||
keyword_weight=0.2,
|
||||
kind_filter=None,
|
||||
include_match_reason=False,
|
||||
query="authentication",
|
||||
)
|
||||
|
||||
assert len(semantic_results) == 2
|
||||
assert all(isinstance(r, SemanticResult) for r in semantic_results)
|
||||
|
||||
# Check first result
|
||||
first = semantic_results[0]
|
||||
assert first.fusion_score == 0.9
|
||||
assert first.symbol_name == "authenticate"
|
||||
assert first.kind == "function"
|
||||
assert first.file_path == "auth.py"
|
||||
assert first.line == 10
|
||||
|
||||
# Should be sorted by fusion_score descending
|
||||
scores = [r.fusion_score for r in semantic_results]
|
||||
assert scores == sorted(scores, reverse=True)
|
||||
|
||||
def test_transform_results_kind_filter(self):
|
||||
"""_transform_results should filter by kind when kind_filter is set."""
|
||||
results = [
|
||||
SearchResult(
|
||||
path="auth.py",
|
||||
score=0.9,
|
||||
excerpt="def auth():",
|
||||
symbol_name="auth",
|
||||
symbol_kind="function",
|
||||
),
|
||||
SearchResult(
|
||||
path="models.py",
|
||||
score=0.8,
|
||||
excerpt="class User:",
|
||||
symbol_name="User",
|
||||
symbol_kind="class",
|
||||
),
|
||||
]
|
||||
|
||||
# Filter to only functions
|
||||
semantic_results = _transform_results(
|
||||
results=results,
|
||||
mode="fusion",
|
||||
vector_weight=0.5,
|
||||
structural_weight=0.3,
|
||||
keyword_weight=0.2,
|
||||
kind_filter=["function"],
|
||||
include_match_reason=False,
|
||||
query="test",
|
||||
)
|
||||
|
||||
assert len(semantic_results) == 1
|
||||
assert semantic_results[0].kind == "function"
|
||||
161
codex-lens/tests/test_binary_searcher.py
Normal file
161
codex-lens/tests/test_binary_searcher.py
Normal file
@@ -0,0 +1,161 @@
|
||||
"""Unit tests for BinarySearcher - binary vector search using Hamming distance.
|
||||
|
||||
Tests cover:
|
||||
- load: mmap file loading, DB fallback, no data scenario
|
||||
- search: basic search, top_k limit, empty index
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch, mock_open
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from codexlens.search.binary_searcher import BinarySearcher
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Test Fixtures
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_paths():
|
||||
"""Create temporary directory structure."""
|
||||
tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True)
|
||||
root = Path(tmpdir.name)
|
||||
yield root
|
||||
try:
|
||||
tmpdir.cleanup()
|
||||
except (PermissionError, OSError):
|
||||
pass
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def binary_mmap_setup(temp_paths):
|
||||
"""Create a mock memory-mapped binary vectors file with metadata."""
|
||||
num_vectors = 10
|
||||
dim_bytes = 32 # 256 bits = 32 bytes
|
||||
|
||||
# Create binary matrix
|
||||
rng = np.random.default_rng(42)
|
||||
binary_matrix = rng.integers(0, 256, size=(num_vectors, dim_bytes), dtype=np.uint8)
|
||||
chunk_ids = list(range(100, 100 + num_vectors))
|
||||
|
||||
# Write mmap file
|
||||
mmap_path = temp_paths / "_binary_vectors.mmap"
|
||||
binary_matrix.tofile(str(mmap_path))
|
||||
|
||||
# Write metadata
|
||||
meta_path = mmap_path.with_suffix(".meta.json")
|
||||
meta = {
|
||||
"shape": [num_vectors, dim_bytes],
|
||||
"chunk_ids": chunk_ids,
|
||||
}
|
||||
with open(meta_path, "w") as f:
|
||||
json.dump(meta, f)
|
||||
|
||||
return temp_paths, binary_matrix, chunk_ids
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests: load
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestBinarySearcherLoad:
|
||||
"""Tests for BinarySearcher.load()."""
|
||||
|
||||
def test_load_mmap(self, binary_mmap_setup):
|
||||
"""Memory-mapped file loading should succeed and mark is_memmap."""
|
||||
index_root, binary_matrix, chunk_ids = binary_mmap_setup
|
||||
searcher = BinarySearcher(index_root)
|
||||
|
||||
result = searcher.load()
|
||||
|
||||
assert result is True
|
||||
assert searcher._loaded is True
|
||||
assert searcher.is_memmap is True
|
||||
assert searcher.vector_count == len(chunk_ids)
|
||||
|
||||
def test_load_db_fallback(self, temp_paths):
|
||||
"""Should fall back to DB loading when no mmap file exists."""
|
||||
searcher = BinarySearcher(temp_paths)
|
||||
|
||||
# Mock the DB fallback
|
||||
with patch.object(searcher, "_load_from_db", return_value=True) as mock_db:
|
||||
result = searcher.load()
|
||||
|
||||
assert result is True
|
||||
mock_db.assert_called_once()
|
||||
|
||||
def test_load_no_data(self, temp_paths):
|
||||
"""Should return False when neither mmap nor DB data available."""
|
||||
searcher = BinarySearcher(temp_paths)
|
||||
|
||||
with patch.object(searcher, "_load_from_db", return_value=False):
|
||||
result = searcher.load()
|
||||
|
||||
assert result is False
|
||||
assert searcher._loaded is False
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests: search
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestBinarySearcherSearch:
|
||||
"""Tests for BinarySearcher.search()."""
|
||||
|
||||
def test_search_basic(self, binary_mmap_setup):
|
||||
"""Basic search should return (chunk_id, distance) tuples."""
|
||||
index_root, binary_matrix, chunk_ids = binary_mmap_setup
|
||||
searcher = BinarySearcher(index_root)
|
||||
searcher.load()
|
||||
|
||||
# Create a query vector (256 dimensions, will be binarized)
|
||||
rng = np.random.default_rng(99)
|
||||
query_vector = rng.standard_normal(256).astype(np.float32)
|
||||
|
||||
results = searcher.search(query_vector, top_k=5)
|
||||
|
||||
assert len(results) == 5
|
||||
# Results should be (chunk_id, hamming_distance) tuples
|
||||
for chunk_id, distance in results:
|
||||
assert isinstance(chunk_id, int)
|
||||
assert isinstance(distance, int)
|
||||
assert chunk_id in chunk_ids
|
||||
|
||||
def test_search_top_k(self, binary_mmap_setup):
|
||||
"""Search should respect top_k limit."""
|
||||
index_root, binary_matrix, chunk_ids = binary_mmap_setup
|
||||
searcher = BinarySearcher(index_root)
|
||||
searcher.load()
|
||||
|
||||
query_vector = np.random.default_rng(42).standard_normal(256).astype(np.float32)
|
||||
|
||||
results_3 = searcher.search(query_vector, top_k=3)
|
||||
results_7 = searcher.search(query_vector, top_k=7)
|
||||
|
||||
assert len(results_3) == 3
|
||||
assert len(results_7) == 7
|
||||
# Results should be sorted by distance (ascending)
|
||||
distances_3 = [d for _, d in results_3]
|
||||
assert distances_3 == sorted(distances_3)
|
||||
|
||||
def test_search_empty_index(self, temp_paths):
|
||||
"""Search on empty/unloaded index should return empty list."""
|
||||
searcher = BinarySearcher(temp_paths)
|
||||
# Do not load - index is empty
|
||||
|
||||
query_vector = np.zeros(256, dtype=np.float32)
|
||||
|
||||
with patch.object(searcher, "load", return_value=False):
|
||||
results = searcher.search(query_vector, top_k=5)
|
||||
|
||||
assert results == []
|
||||
392
codex-lens/tests/test_cascade_strategies.py
Normal file
392
codex-lens/tests/test_cascade_strategies.py
Normal file
@@ -0,0 +1,392 @@
|
||||
"""Integration tests for chain_search.py cascade strategies.
|
||||
|
||||
Tests cover:
|
||||
- binary_cascade_search: Full pipeline and numpy-unavailable fallback
|
||||
- binary_rerank_cascade_search: Pipeline and fallback
|
||||
- dense_rerank_cascade_search: Pipeline and fallback
|
||||
- cascade_search: Router dispatching to correct strategy methods
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
from unittest.mock import MagicMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from codexlens.config import Config
|
||||
from codexlens.entities import SearchResult
|
||||
from codexlens.search.chain_search import (
|
||||
ChainSearchEngine,
|
||||
ChainSearchResult,
|
||||
SearchOptions,
|
||||
SearchStats,
|
||||
)
|
||||
from codexlens.storage.path_mapper import PathMapper
|
||||
from codexlens.storage.registry import RegistryStore
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Test Fixtures
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_paths():
|
||||
"""Create temporary directory structure."""
|
||||
tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True)
|
||||
root = Path(tmpdir.name)
|
||||
yield root
|
||||
try:
|
||||
tmpdir.cleanup()
|
||||
except (PermissionError, OSError):
|
||||
pass
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_registry(temp_paths: Path):
|
||||
"""Create mock registry store."""
|
||||
registry = RegistryStore(db_path=temp_paths / "registry.db")
|
||||
registry.initialize()
|
||||
return registry
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_mapper(temp_paths: Path):
|
||||
"""Create path mapper."""
|
||||
return PathMapper(index_root=temp_paths / "indexes")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_config():
|
||||
"""Create mock config for cascade search."""
|
||||
config = MagicMock(spec=Config)
|
||||
config.cascade_coarse_k = 100
|
||||
config.cascade_fine_k = 10
|
||||
config.cascade_strategy = "binary"
|
||||
config.enable_staged_rerank = False
|
||||
config.staged_clustering_strategy = "auto"
|
||||
config.staged_clustering_min_size = 3
|
||||
config.graph_expansion_depth = 2
|
||||
return config
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_search_results() -> List[SearchResult]:
|
||||
"""Create sample search results for testing."""
|
||||
return [
|
||||
SearchResult(path="a.py", score=0.9, excerpt="def auth():"),
|
||||
SearchResult(path="b.py", score=0.8, excerpt="class User:"),
|
||||
SearchResult(path="c.py", score=0.7, excerpt="def login():"),
|
||||
]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests: binary_cascade_search
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestBinaryCascadeSearch:
|
||||
"""Tests for binary_cascade_search()."""
|
||||
|
||||
def test_binary_cascade_full_pipeline(
|
||||
self, mock_registry, mock_mapper, mock_config, temp_paths
|
||||
):
|
||||
"""binary_cascade_search should execute full binary+dense pipeline."""
|
||||
engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config)
|
||||
|
||||
with patch.object(engine, "_find_start_index") as mock_find:
|
||||
mock_find.return_value = temp_paths / "index" / "_index.db"
|
||||
|
||||
with patch.object(engine, "_collect_index_paths") as mock_collect:
|
||||
mock_collect.return_value = [temp_paths / "index" / "_index.db"]
|
||||
|
||||
# Mock the embedding backend imports
|
||||
with patch("codexlens.search.chain_search.NUMPY_AVAILABLE", True):
|
||||
with patch.dict("sys.modules", {
|
||||
"codexlens.indexing.embedding": MagicMock(),
|
||||
"codexlens.semantic.ann_index": MagicMock(),
|
||||
}):
|
||||
# Mock _get_or_create_binary_index
|
||||
with patch.object(
|
||||
engine, "_get_or_create_binary_index"
|
||||
) as mock_bin:
|
||||
mock_index = MagicMock()
|
||||
mock_index.count.return_value = 10
|
||||
mock_index.search.return_value = ([1, 2], [10, 20])
|
||||
mock_bin.return_value = mock_index
|
||||
|
||||
# The search should fall back to standard on import issues
|
||||
with patch.object(engine, "search") as mock_search:
|
||||
mock_search.return_value = ChainSearchResult(
|
||||
query="test",
|
||||
results=[SearchResult(path="a.py", score=0.9, excerpt="a")],
|
||||
symbols=[],
|
||||
stats=SearchStats(),
|
||||
)
|
||||
|
||||
result = engine.binary_cascade_search(
|
||||
"test query", temp_paths / "src",
|
||||
k=10, coarse_k=100,
|
||||
)
|
||||
|
||||
assert isinstance(result, ChainSearchResult)
|
||||
|
||||
def test_binary_cascade_numpy_unavailable(
|
||||
self, mock_registry, mock_mapper, mock_config, temp_paths
|
||||
):
|
||||
"""binary_cascade_search should fall back to standard search when numpy unavailable."""
|
||||
engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config)
|
||||
|
||||
with patch("codexlens.search.chain_search.NUMPY_AVAILABLE", False):
|
||||
with patch.object(engine, "search") as mock_search:
|
||||
mock_search.return_value = ChainSearchResult(
|
||||
query="test",
|
||||
results=[],
|
||||
symbols=[],
|
||||
stats=SearchStats(),
|
||||
)
|
||||
|
||||
result = engine.binary_cascade_search(
|
||||
"query", temp_paths / "src",
|
||||
)
|
||||
|
||||
mock_search.assert_called_once()
|
||||
assert isinstance(result, ChainSearchResult)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests: binary_rerank_cascade_search
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestBinaryRerankCascadeSearch:
|
||||
"""Tests for binary_rerank_cascade_search()."""
|
||||
|
||||
def test_binary_rerank_cascade_pipeline(
|
||||
self, mock_registry, mock_mapper, mock_config, temp_paths
|
||||
):
|
||||
"""binary_rerank_cascade_search should execute binary+cross-encoder pipeline."""
|
||||
engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config)
|
||||
|
||||
with patch("codexlens.search.chain_search.NUMPY_AVAILABLE", True):
|
||||
with patch.object(engine, "_find_start_index") as mock_find:
|
||||
mock_find.return_value = temp_paths / "index" / "_index.db"
|
||||
|
||||
with patch.object(engine, "_collect_index_paths") as mock_collect:
|
||||
mock_collect.return_value = [temp_paths / "index" / "_index.db"]
|
||||
|
||||
# Mock BinaryEmbeddingBackend import
|
||||
with patch.dict("sys.modules", {
|
||||
"codexlens.indexing.embedding": MagicMock(),
|
||||
}):
|
||||
with patch.object(engine, "search") as mock_search:
|
||||
mock_search.return_value = ChainSearchResult(
|
||||
query="test",
|
||||
results=[SearchResult(path="a.py", score=0.9, excerpt="a")],
|
||||
symbols=[],
|
||||
stats=SearchStats(),
|
||||
)
|
||||
|
||||
result = engine.binary_rerank_cascade_search(
|
||||
"test query", temp_paths / "src",
|
||||
k=10, coarse_k=100,
|
||||
)
|
||||
|
||||
assert isinstance(result, ChainSearchResult)
|
||||
|
||||
def test_binary_rerank_fallback(
|
||||
self, mock_registry, mock_mapper, mock_config, temp_paths
|
||||
):
|
||||
"""binary_rerank_cascade_search should fall back when numpy unavailable."""
|
||||
engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config)
|
||||
|
||||
with patch("codexlens.search.chain_search.NUMPY_AVAILABLE", False):
|
||||
with patch.object(engine, "search") as mock_search:
|
||||
mock_search.return_value = ChainSearchResult(
|
||||
query="test",
|
||||
results=[],
|
||||
symbols=[],
|
||||
stats=SearchStats(),
|
||||
)
|
||||
|
||||
result = engine.binary_rerank_cascade_search(
|
||||
"query", temp_paths / "src",
|
||||
)
|
||||
|
||||
mock_search.assert_called_once()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests: dense_rerank_cascade_search
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestDenseRerankCascadeSearch:
|
||||
"""Tests for dense_rerank_cascade_search()."""
|
||||
|
||||
def test_dense_rerank_cascade_pipeline(
|
||||
self, mock_registry, mock_mapper, mock_config, temp_paths
|
||||
):
|
||||
"""dense_rerank_cascade_search should execute dense+cross-encoder pipeline."""
|
||||
engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config)
|
||||
|
||||
with patch("codexlens.search.chain_search.NUMPY_AVAILABLE", True):
|
||||
with patch.object(engine, "_find_start_index") as mock_find:
|
||||
mock_find.return_value = temp_paths / "index" / "_index.db"
|
||||
|
||||
with patch.object(engine, "_collect_index_paths") as mock_collect:
|
||||
mock_collect.return_value = [temp_paths / "index" / "_index.db"]
|
||||
|
||||
with patch.object(engine, "search") as mock_search:
|
||||
mock_search.return_value = ChainSearchResult(
|
||||
query="test",
|
||||
results=[SearchResult(path="a.py", score=0.9, excerpt="a")],
|
||||
symbols=[],
|
||||
stats=SearchStats(),
|
||||
)
|
||||
|
||||
result = engine.dense_rerank_cascade_search(
|
||||
"test query", temp_paths / "src",
|
||||
k=10, coarse_k=100,
|
||||
)
|
||||
|
||||
assert isinstance(result, ChainSearchResult)
|
||||
|
||||
def test_dense_rerank_fallback(
|
||||
self, mock_registry, mock_mapper, mock_config, temp_paths
|
||||
):
|
||||
"""dense_rerank_cascade_search should fall back when numpy unavailable."""
|
||||
engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config)
|
||||
|
||||
with patch("codexlens.search.chain_search.NUMPY_AVAILABLE", False):
|
||||
with patch.object(engine, "search") as mock_search:
|
||||
mock_search.return_value = ChainSearchResult(
|
||||
query="test",
|
||||
results=[],
|
||||
symbols=[],
|
||||
stats=SearchStats(),
|
||||
)
|
||||
|
||||
result = engine.dense_rerank_cascade_search(
|
||||
"query", temp_paths / "src",
|
||||
)
|
||||
|
||||
mock_search.assert_called_once()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests: cascade_search (unified router)
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestCascadeRouter:
|
||||
"""Tests for cascade_search() strategy routing."""
|
||||
|
||||
def test_cascade_router_binary(
|
||||
self, mock_registry, mock_mapper, mock_config, temp_paths
|
||||
):
|
||||
"""cascade_search with strategy='binary' should route to binary_cascade_search."""
|
||||
engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config)
|
||||
|
||||
with patch.object(engine, "binary_cascade_search") as mock_binary:
|
||||
mock_binary.return_value = ChainSearchResult(
|
||||
query="test", results=[], symbols=[], stats=SearchStats()
|
||||
)
|
||||
|
||||
engine.cascade_search(
|
||||
"query", temp_paths / "src", strategy="binary"
|
||||
)
|
||||
|
||||
mock_binary.assert_called_once()
|
||||
|
||||
def test_cascade_router_binary_rerank(
|
||||
self, mock_registry, mock_mapper, mock_config, temp_paths
|
||||
):
|
||||
"""cascade_search with strategy='binary_rerank' should route correctly."""
|
||||
engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config)
|
||||
|
||||
with patch.object(engine, "binary_rerank_cascade_search") as mock_rerank:
|
||||
mock_rerank.return_value = ChainSearchResult(
|
||||
query="test", results=[], symbols=[], stats=SearchStats()
|
||||
)
|
||||
|
||||
engine.cascade_search(
|
||||
"query", temp_paths / "src", strategy="binary_rerank"
|
||||
)
|
||||
|
||||
mock_rerank.assert_called_once()
|
||||
|
||||
def test_cascade_router_dense_rerank(
|
||||
self, mock_registry, mock_mapper, mock_config, temp_paths
|
||||
):
|
||||
"""cascade_search with strategy='dense_rerank' should route correctly."""
|
||||
engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config)
|
||||
|
||||
with patch.object(engine, "dense_rerank_cascade_search") as mock_dense:
|
||||
mock_dense.return_value = ChainSearchResult(
|
||||
query="test", results=[], symbols=[], stats=SearchStats()
|
||||
)
|
||||
|
||||
engine.cascade_search(
|
||||
"query", temp_paths / "src", strategy="dense_rerank"
|
||||
)
|
||||
|
||||
mock_dense.assert_called_once()
|
||||
|
||||
def test_cascade_router_staged(
|
||||
self, mock_registry, mock_mapper, mock_config, temp_paths
|
||||
):
|
||||
"""cascade_search with strategy='staged' should route to staged_cascade_search."""
|
||||
engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config)
|
||||
|
||||
with patch.object(engine, "staged_cascade_search") as mock_staged:
|
||||
mock_staged.return_value = ChainSearchResult(
|
||||
query="test", results=[], symbols=[], stats=SearchStats()
|
||||
)
|
||||
|
||||
engine.cascade_search(
|
||||
"query", temp_paths / "src", strategy="staged"
|
||||
)
|
||||
|
||||
mock_staged.assert_called_once()
|
||||
|
||||
def test_cascade_router_config_default(
|
||||
self, mock_registry, mock_mapper, temp_paths
|
||||
):
|
||||
"""cascade_search with no strategy param should use config cascade_strategy."""
|
||||
config = MagicMock(spec=Config)
|
||||
config.cascade_strategy = "binary_rerank"
|
||||
config.cascade_coarse_k = 100
|
||||
config.cascade_fine_k = 10
|
||||
|
||||
engine = ChainSearchEngine(mock_registry, mock_mapper, config=config)
|
||||
|
||||
with patch.object(engine, "binary_rerank_cascade_search") as mock_rerank:
|
||||
mock_rerank.return_value = ChainSearchResult(
|
||||
query="test", results=[], symbols=[], stats=SearchStats()
|
||||
)
|
||||
|
||||
# No strategy param -> reads from config
|
||||
engine.cascade_search("query", temp_paths / "src")
|
||||
|
||||
mock_rerank.assert_called_once()
|
||||
|
||||
def test_cascade_router_invalid_fallback(
|
||||
self, mock_registry, mock_mapper, mock_config, temp_paths
|
||||
):
|
||||
"""cascade_search with invalid strategy should default to 'binary'."""
|
||||
engine = ChainSearchEngine(mock_registry, mock_mapper, config=mock_config)
|
||||
|
||||
with patch.object(engine, "binary_cascade_search") as mock_binary:
|
||||
mock_binary.return_value = ChainSearchResult(
|
||||
query="test", results=[], symbols=[], stats=SearchStats()
|
||||
)
|
||||
|
||||
engine.cascade_search(
|
||||
"query", temp_paths / "src", strategy="nonexistent"
|
||||
)
|
||||
|
||||
mock_binary.assert_called_once()
|
||||
96
codex-lens/tests/test_config_cascade.py
Normal file
96
codex-lens/tests/test_config_cascade.py
Normal file
@@ -0,0 +1,96 @@
|
||||
"""Unit tests for Config cascade settings validation.
|
||||
|
||||
Tests cover:
|
||||
- Default cascade_strategy value
|
||||
- Valid cascade strategies accepted by load_settings
|
||||
- Invalid cascade strategy fallback behavior
|
||||
- Staged cascade config defaults
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from codexlens.config import Config
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Test Fixtures
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_config_dir():
|
||||
"""Create temporary directory for config data_dir."""
|
||||
tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True)
|
||||
yield Path(tmpdir.name)
|
||||
try:
|
||||
tmpdir.cleanup()
|
||||
except (PermissionError, OSError):
|
||||
pass
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests: cascade config defaults
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestConfigCascadeDefaults:
|
||||
"""Tests for Config cascade-related defaults and load_settings()."""
|
||||
|
||||
def test_default_cascade_strategy(self, temp_config_dir):
|
||||
"""Default cascade_strategy should be 'binary'."""
|
||||
config = Config(data_dir=temp_config_dir)
|
||||
assert config.cascade_strategy == "binary"
|
||||
|
||||
def test_valid_cascade_strategies(self, temp_config_dir):
|
||||
"""load_settings should accept all valid cascade strategies."""
|
||||
valid_strategies = ["binary", "binary_rerank", "dense_rerank", "staged"]
|
||||
|
||||
for strategy in valid_strategies:
|
||||
config = Config(data_dir=temp_config_dir)
|
||||
settings = {"cascade": {"strategy": strategy}}
|
||||
|
||||
settings_path = config.settings_path
|
||||
settings_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(settings_path, "w", encoding="utf-8") as f:
|
||||
json.dump(settings, f)
|
||||
|
||||
with patch.object(config, "_apply_env_overrides"):
|
||||
config.load_settings()
|
||||
|
||||
assert config.cascade_strategy == strategy, (
|
||||
f"Strategy '{strategy}' should be accepted"
|
||||
)
|
||||
|
||||
def test_invalid_cascade_strategy_fallback(self, temp_config_dir):
|
||||
"""Invalid cascade strategy should keep default (not crash)."""
|
||||
config = Config(data_dir=temp_config_dir)
|
||||
settings = {"cascade": {"strategy": "invalid_strategy"}}
|
||||
|
||||
settings_path = config.settings_path
|
||||
settings_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(settings_path, "w", encoding="utf-8") as f:
|
||||
json.dump(settings, f)
|
||||
|
||||
with patch.object(config, "_apply_env_overrides"):
|
||||
config.load_settings()
|
||||
|
||||
# Should keep the default "binary" strategy
|
||||
assert config.cascade_strategy == "binary"
|
||||
|
||||
def test_staged_config_defaults(self, temp_config_dir):
|
||||
"""Staged cascade settings should have correct defaults."""
|
||||
config = Config(data_dir=temp_config_dir)
|
||||
assert config.staged_coarse_k == 200
|
||||
assert config.staged_lsp_depth == 2
|
||||
assert config.staged_clustering_strategy == "auto"
|
||||
assert config.staged_clustering_min_size == 3
|
||||
assert config.enable_staged_rerank is True
|
||||
assert config.cascade_coarse_k == 100
|
||||
assert config.cascade_fine_k == 10
|
||||
367
codex-lens/tests/test_hybrid_search_unit.py
Normal file
367
codex-lens/tests/test_hybrid_search_unit.py
Normal file
@@ -0,0 +1,367 @@
|
||||
"""Unit tests for HybridSearchEngine - parallel search and RRF fusion.
|
||||
|
||||
Tests cover:
|
||||
- search: exact only, fuzzy enabled, vector enabled, pure vector mode
|
||||
- search: RRF fusion, empty query, no results, reranking, category filtering
|
||||
- _search_parallel: parallel backend execution
|
||||
- _search_lsp_graph: LSP graph expansion with seeds, vector-to-FTS fallback
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
from unittest.mock import MagicMock, Mock, patch, PropertyMock
|
||||
|
||||
import pytest
|
||||
|
||||
from codexlens.config import Config
|
||||
from codexlens.entities import SearchResult
|
||||
from codexlens.search.hybrid_search import HybridSearchEngine
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Test Fixtures
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def temp_paths():
|
||||
"""Create temporary directory structure with a mock index."""
|
||||
tmpdir = tempfile.TemporaryDirectory(ignore_cleanup_errors=True)
|
||||
root = Path(tmpdir.name)
|
||||
# Create a non-empty index file to pass the empty-file guard
|
||||
index_path = root / "_index.db"
|
||||
index_path.write_bytes(b"\x00" * 100)
|
||||
yield root
|
||||
try:
|
||||
tmpdir.cleanup()
|
||||
except (PermissionError, OSError):
|
||||
pass
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_config():
|
||||
"""Create mock config for hybrid search."""
|
||||
config = MagicMock(spec=Config)
|
||||
config.embedding_use_gpu = False
|
||||
config.enable_reranking = False
|
||||
config.enable_cross_encoder_rerank = False
|
||||
config.symbol_boost_factor = 1.5
|
||||
config.fusion_method = "rrf"
|
||||
config.rrf_k = 60
|
||||
config.enable_category_filter = True
|
||||
return config
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_results() -> List[SearchResult]:
|
||||
"""Create sample search results."""
|
||||
return [
|
||||
SearchResult(
|
||||
path="auth.py",
|
||||
score=0.9,
|
||||
excerpt="def authenticate(user):",
|
||||
symbol_name="authenticate",
|
||||
symbol_kind="function",
|
||||
),
|
||||
SearchResult(
|
||||
path="login.py",
|
||||
score=0.7,
|
||||
excerpt="class LoginHandler:",
|
||||
symbol_name="LoginHandler",
|
||||
symbol_kind="class",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests: search with different backends
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestHybridSearchBackends:
|
||||
"""Tests for HybridSearchEngine.search() backend configurations."""
|
||||
|
||||
def test_search_exact_only(self, temp_paths, mock_config):
|
||||
"""Search with only exact FTS backend."""
|
||||
engine = HybridSearchEngine(config=mock_config)
|
||||
index_path = temp_paths / "_index.db"
|
||||
|
||||
with patch.object(engine, "_search_parallel") as mock_parallel:
|
||||
mock_parallel.return_value = {
|
||||
"exact": [
|
||||
SearchResult(path="a.py", score=10.0, excerpt="result"),
|
||||
]
|
||||
}
|
||||
|
||||
results = engine.search(
|
||||
index_path, "test query",
|
||||
enable_fuzzy=False, enable_vector=False,
|
||||
)
|
||||
|
||||
assert len(results) == 1
|
||||
# Verify only exact backend was requested
|
||||
call_args = mock_parallel.call_args
|
||||
backends = call_args[0][2] # 3rd positional arg
|
||||
assert "exact" in backends
|
||||
assert "fuzzy" not in backends
|
||||
assert "vector" not in backends
|
||||
|
||||
def test_search_fuzzy_enabled(self, temp_paths, mock_config):
|
||||
"""Search with exact + fuzzy backends."""
|
||||
engine = HybridSearchEngine(config=mock_config)
|
||||
index_path = temp_paths / "_index.db"
|
||||
|
||||
with patch.object(engine, "_search_parallel") as mock_parallel:
|
||||
mock_parallel.return_value = {
|
||||
"exact": [SearchResult(path="a.py", score=10.0, excerpt="exact")],
|
||||
"fuzzy": [SearchResult(path="b.py", score=8.0, excerpt="fuzzy")],
|
||||
}
|
||||
|
||||
results = engine.search(
|
||||
index_path, "test_query",
|
||||
enable_fuzzy=True, enable_vector=False,
|
||||
)
|
||||
|
||||
assert len(results) >= 1
|
||||
backends = mock_parallel.call_args[0][2]
|
||||
assert "exact" in backends
|
||||
assert "fuzzy" in backends
|
||||
|
||||
def test_search_vector_enabled(self, temp_paths, mock_config):
|
||||
"""Search with exact + fuzzy + vector backends."""
|
||||
engine = HybridSearchEngine(config=mock_config)
|
||||
index_path = temp_paths / "_index.db"
|
||||
|
||||
with patch.object(engine, "_search_parallel") as mock_parallel:
|
||||
mock_parallel.return_value = {
|
||||
"exact": [SearchResult(path="a.py", score=10.0, excerpt="exact")],
|
||||
"vector": [SearchResult(path="c.py", score=0.85, excerpt="vector")],
|
||||
}
|
||||
|
||||
results = engine.search(
|
||||
index_path, "test_query",
|
||||
enable_fuzzy=False, enable_vector=True,
|
||||
)
|
||||
|
||||
backends = mock_parallel.call_args[0][2]
|
||||
assert "exact" in backends
|
||||
assert "vector" in backends
|
||||
|
||||
def test_search_pure_vector(self, temp_paths, mock_config):
|
||||
"""Pure vector mode should only use vector backend."""
|
||||
engine = HybridSearchEngine(config=mock_config)
|
||||
mock_config.enable_category_filter = False
|
||||
index_path = temp_paths / "_index.db"
|
||||
|
||||
with patch.object(engine, "_search_parallel") as mock_parallel:
|
||||
mock_parallel.return_value = {
|
||||
"vector": [SearchResult(path="a.py", score=0.9, excerpt="vec")],
|
||||
}
|
||||
|
||||
results = engine.search(
|
||||
index_path, "semantic query",
|
||||
enable_vector=True, pure_vector=True,
|
||||
)
|
||||
|
||||
backends = mock_parallel.call_args[0][2]
|
||||
assert "vector" in backends
|
||||
assert "exact" not in backends
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests: search fusion and post-processing
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestHybridSearchFusion:
|
||||
"""Tests for RRF fusion, empty query, no results, reranking, filtering."""
|
||||
|
||||
def test_search_rrf_fusion(self, temp_paths, mock_config):
|
||||
"""Results from multiple backends should be fused via RRF."""
|
||||
engine = HybridSearchEngine(config=mock_config)
|
||||
index_path = temp_paths / "_index.db"
|
||||
|
||||
with patch.object(engine, "_search_parallel") as mock_parallel:
|
||||
mock_parallel.return_value = {
|
||||
"exact": [
|
||||
SearchResult(path="a.py", score=10.0, excerpt="exact a"),
|
||||
SearchResult(path="b.py", score=5.0, excerpt="exact b"),
|
||||
],
|
||||
"vector": [
|
||||
SearchResult(path="b.py", score=0.9, excerpt="vector b"),
|
||||
SearchResult(path="c.py", score=0.8, excerpt="vector c"),
|
||||
],
|
||||
}
|
||||
|
||||
results = engine.search(
|
||||
index_path, "test",
|
||||
enable_fuzzy=False, enable_vector=True,
|
||||
)
|
||||
|
||||
# b.py appears in both sources - should have high fusion score
|
||||
assert any(r.path == "b.py" for r in results)
|
||||
|
||||
def test_search_empty_query(self, temp_paths, mock_config):
|
||||
"""Empty query should still execute (handled gracefully)."""
|
||||
engine = HybridSearchEngine(config=mock_config)
|
||||
index_path = temp_paths / "_index.db"
|
||||
|
||||
with patch.object(engine, "_search_parallel") as mock_parallel:
|
||||
mock_parallel.return_value = {"exact": []}
|
||||
|
||||
results = engine.search(index_path, "", enable_fuzzy=False)
|
||||
|
||||
assert results == []
|
||||
|
||||
def test_search_no_results(self, temp_paths, mock_config):
|
||||
"""All backends returning empty should produce empty results."""
|
||||
engine = HybridSearchEngine(config=mock_config)
|
||||
index_path = temp_paths / "_index.db"
|
||||
|
||||
with patch.object(engine, "_search_parallel") as mock_parallel:
|
||||
mock_parallel.return_value = {
|
||||
"exact": [],
|
||||
"fuzzy": [],
|
||||
}
|
||||
|
||||
results = engine.search(index_path, "nonexistent")
|
||||
|
||||
assert results == []
|
||||
|
||||
def test_search_reranking(self, temp_paths, mock_config):
|
||||
"""Reranking should be applied when config enables it."""
|
||||
mock_config.enable_reranking = True
|
||||
mock_config.enable_cross_encoder_rerank = False
|
||||
mock_config.reranking_top_k = 50
|
||||
engine = HybridSearchEngine(config=mock_config)
|
||||
index_path = temp_paths / "_index.db"
|
||||
|
||||
mock_embedder = MagicMock()
|
||||
mock_embedder.embed_single.return_value = [0.1] * 128
|
||||
mock_embedder.embed.return_value = [[0.1] * 128]
|
||||
engine.embedder = mock_embedder
|
||||
|
||||
with patch.object(engine, "_search_parallel") as mock_parallel:
|
||||
mock_parallel.return_value = {
|
||||
"exact": [SearchResult(path="a.py", score=10.0, excerpt="code")],
|
||||
}
|
||||
|
||||
with patch("codexlens.search.hybrid_search.rerank_results") as mock_rerank:
|
||||
mock_rerank.return_value = [
|
||||
SearchResult(path="a.py", score=0.85, excerpt="code"),
|
||||
]
|
||||
results = engine.search(index_path, "query", enable_fuzzy=False)
|
||||
|
||||
mock_rerank.assert_called_once()
|
||||
|
||||
def test_search_category_filtering(self, temp_paths, mock_config):
|
||||
"""Category filtering should separate code/doc results by intent."""
|
||||
mock_config.enable_category_filter = True
|
||||
engine = HybridSearchEngine(config=mock_config)
|
||||
index_path = temp_paths / "_index.db"
|
||||
|
||||
with patch.object(engine, "_search_parallel") as mock_parallel:
|
||||
mock_parallel.return_value = {
|
||||
"exact": [
|
||||
SearchResult(path="auth.py", score=10.0, excerpt="def auth"),
|
||||
SearchResult(path="README.md", score=8.0, excerpt="docs"),
|
||||
],
|
||||
}
|
||||
|
||||
# Keyword-like query should filter to code
|
||||
results = engine.search(
|
||||
index_path, "AuthManager",
|
||||
enable_fuzzy=False,
|
||||
)
|
||||
|
||||
paths = [r.path for r in results]
|
||||
# Code files should remain, doc files filtered for KEYWORD intent
|
||||
assert "auth.py" in paths
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests: _search_parallel
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestSearchParallel:
|
||||
"""Tests for _search_parallel() parallel backend execution."""
|
||||
|
||||
def test_search_parallel_backends(self, temp_paths, mock_config):
|
||||
"""Parallel execution should run all requested backends."""
|
||||
engine = HybridSearchEngine(config=mock_config)
|
||||
index_path = temp_paths / "_index.db"
|
||||
|
||||
with patch.object(engine, "_search_exact") as mock_exact, \
|
||||
patch.object(engine, "_search_fuzzy") as mock_fuzzy:
|
||||
mock_exact.return_value = [
|
||||
SearchResult(path="a.py", score=10.0, excerpt="exact"),
|
||||
]
|
||||
mock_fuzzy.return_value = [
|
||||
SearchResult(path="b.py", score=8.0, excerpt="fuzzy"),
|
||||
]
|
||||
|
||||
results_map = engine._search_parallel(
|
||||
index_path, "query",
|
||||
backends={"exact": True, "fuzzy": True},
|
||||
limit=10,
|
||||
)
|
||||
|
||||
assert "exact" in results_map
|
||||
assert "fuzzy" in results_map
|
||||
mock_exact.assert_called_once()
|
||||
mock_fuzzy.assert_called_once()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests: _search_lsp_graph
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestSearchLspGraph:
|
||||
"""Tests for _search_lsp_graph() LSP graph expansion."""
|
||||
|
||||
def test_search_lsp_graph(self, temp_paths, mock_config):
|
||||
"""LSP graph search should use seed results for expansion."""
|
||||
engine = HybridSearchEngine(config=mock_config)
|
||||
index_path = temp_paths / "_index.db"
|
||||
|
||||
# When HAS_LSP is False, _search_lsp_graph returns []
|
||||
with patch("codexlens.search.hybrid_search.HAS_LSP", False):
|
||||
results = engine._search_lsp_graph(
|
||||
index_path, "auth function", limit=5,
|
||||
)
|
||||
assert results == []
|
||||
|
||||
def test_lsp_fallback_vector_to_fts(self, temp_paths, mock_config):
|
||||
"""When vector seeds fail, should fall back to FTS seeds."""
|
||||
engine = HybridSearchEngine(config=mock_config)
|
||||
index_path = temp_paths / "_index.db"
|
||||
|
||||
with patch("codexlens.search.hybrid_search.HAS_LSP", True):
|
||||
# Mock _search_vector to return empty (no seeds from vector)
|
||||
with patch.object(engine, "_search_vector", return_value=[]):
|
||||
# Mock _search_exact to return seeds
|
||||
with patch.object(engine, "_search_exact") as mock_exact:
|
||||
mock_exact.return_value = [
|
||||
SearchResult(
|
||||
path="auth.py", score=10.0,
|
||||
excerpt="def auth():", symbol_name="auth",
|
||||
start_line=1, end_line=5,
|
||||
),
|
||||
]
|
||||
|
||||
# Mock the LSP bridge (will fail on import or async)
|
||||
# The function should attempt FTS fallback before LSP expansion
|
||||
try:
|
||||
results = engine._search_lsp_graph(
|
||||
index_path, "auth", limit=5,
|
||||
)
|
||||
except Exception:
|
||||
pass # LSP deps may not be available, but FTS fallback was attempted
|
||||
|
||||
# Verify FTS was called as fallback
|
||||
mock_exact.assert_called_once()
|
||||
330
codex-lens/tests/test_ranking.py
Normal file
330
codex-lens/tests/test_ranking.py
Normal file
@@ -0,0 +1,330 @@
|
||||
"""Unit tests for ranking.py - RRF weights, intent detection, score fusion, and filtering.
|
||||
|
||||
Tests cover:
|
||||
- detect_query_intent: CamelCase/underscore -> KEYWORD, natural language -> SEMANTIC, mixed
|
||||
- adjust_weights_by_intent: Weight adjustments per intent type
|
||||
- get_rrf_weights: Composite of detect + adjust
|
||||
- reciprocal_rank_fusion: Single/multi source, empty, weight normalization
|
||||
- simple_weighted_fusion: Basic fusion and empty input
|
||||
- apply_symbol_boost: Symbol match boost and no-match scenario
|
||||
- filter_results_by_category: KEYWORD -> code only, SEMANTIC -> docs priority
|
||||
- group_similar_results: Group results by score proximity
|
||||
- normalize_weights: All-zero weights edge case
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from typing import Dict, List
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from codexlens.entities import SearchResult
|
||||
from codexlens.search.ranking import (
|
||||
DEFAULT_WEIGHTS,
|
||||
QueryIntent,
|
||||
adjust_weights_by_intent,
|
||||
apply_symbol_boost,
|
||||
detect_query_intent,
|
||||
filter_results_by_category,
|
||||
get_rrf_weights,
|
||||
group_similar_results,
|
||||
normalize_weights,
|
||||
reciprocal_rank_fusion,
|
||||
simple_weighted_fusion,
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Helpers
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def _make_result(
|
||||
path: str = "a.py",
|
||||
score: float = 0.5,
|
||||
excerpt: str = "def foo():",
|
||||
symbol_name: str | None = None,
|
||||
symbol_kind: str | None = None,
|
||||
start_line: int | None = None,
|
||||
end_line: int | None = None,
|
||||
) -> SearchResult:
|
||||
"""Create a SearchResult with sensible defaults."""
|
||||
return SearchResult(
|
||||
path=path,
|
||||
score=score,
|
||||
excerpt=excerpt,
|
||||
symbol_name=symbol_name,
|
||||
symbol_kind=symbol_kind,
|
||||
start_line=start_line,
|
||||
end_line=end_line,
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests: detect_query_intent
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestDetectQueryIntent:
|
||||
"""Tests for detect_query_intent()."""
|
||||
|
||||
def test_detect_keyword_intent(self):
|
||||
"""CamelCase/underscore queries should be detected as KEYWORD."""
|
||||
assert detect_query_intent("MyClassName") == QueryIntent.KEYWORD
|
||||
assert detect_query_intent("my_function_name") == QueryIntent.KEYWORD
|
||||
assert detect_query_intent("foo::bar") == QueryIntent.KEYWORD
|
||||
|
||||
def test_detect_semantic_intent(self):
|
||||
"""Natural language queries should be detected as SEMANTIC."""
|
||||
assert detect_query_intent("how to authenticate users safely?") == QueryIntent.SEMANTIC
|
||||
assert detect_query_intent("explain the login process") == QueryIntent.SEMANTIC
|
||||
|
||||
def test_detect_mixed_intent(self):
|
||||
"""Queries with both code and NL signals should be MIXED."""
|
||||
# Has code signal (underscore identifier) and NL signal ("how")
|
||||
assert detect_query_intent("how does my_function work") == QueryIntent.MIXED
|
||||
|
||||
def test_detect_empty_query(self):
|
||||
"""Empty string should return MIXED (safe default)."""
|
||||
assert detect_query_intent("") == QueryIntent.MIXED
|
||||
assert detect_query_intent(" ") == QueryIntent.MIXED
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests: adjust_weights_by_intent
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestAdjustWeightsByIntent:
|
||||
"""Tests for adjust_weights_by_intent()."""
|
||||
|
||||
def test_adjust_keyword_weights(self):
|
||||
"""KEYWORD intent should boost exact and reduce vector."""
|
||||
base = {"exact": 0.3, "fuzzy": 0.1, "vector": 0.6}
|
||||
adjusted = adjust_weights_by_intent(QueryIntent.KEYWORD, base)
|
||||
# Expected target: exact:0.5, fuzzy:0.1, vector:0.4
|
||||
assert adjusted["exact"] == pytest.approx(0.5, abs=0.01)
|
||||
assert adjusted["fuzzy"] == pytest.approx(0.1, abs=0.01)
|
||||
assert adjusted["vector"] == pytest.approx(0.4, abs=0.01)
|
||||
|
||||
def test_adjust_semantic_weights(self):
|
||||
"""SEMANTIC intent should boost vector and reduce exact."""
|
||||
base = {"exact": 0.3, "fuzzy": 0.1, "vector": 0.6}
|
||||
adjusted = adjust_weights_by_intent(QueryIntent.SEMANTIC, base)
|
||||
# Expected target: exact:0.2, fuzzy:0.1, vector:0.7
|
||||
assert adjusted["exact"] == pytest.approx(0.2, abs=0.01)
|
||||
assert adjusted["fuzzy"] == pytest.approx(0.1, abs=0.01)
|
||||
assert adjusted["vector"] == pytest.approx(0.7, abs=0.01)
|
||||
|
||||
def test_adjust_mixed_weights(self):
|
||||
"""MIXED intent should return normalized base_weights."""
|
||||
base = {"exact": 0.3, "fuzzy": 0.1, "vector": 0.6}
|
||||
adjusted = adjust_weights_by_intent(QueryIntent.MIXED, base)
|
||||
# MIXED returns normalized base_weights
|
||||
total = sum(adjusted.values())
|
||||
assert total == pytest.approx(1.0, abs=0.01)
|
||||
# Proportions should be preserved
|
||||
assert adjusted["exact"] == pytest.approx(0.3, abs=0.01)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests: get_rrf_weights
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestGetRrfWeights:
|
||||
"""Tests for get_rrf_weights() composite function."""
|
||||
|
||||
def test_get_rrf_weights_composite(self):
|
||||
"""get_rrf_weights should compose detect_query_intent + adjust_weights_by_intent."""
|
||||
base = {"exact": 0.3, "fuzzy": 0.1, "vector": 0.6}
|
||||
# Keyword-like query
|
||||
weights = get_rrf_weights("MyClassName", base)
|
||||
# MyClassName -> KEYWORD -> exact boosted
|
||||
assert weights["exact"] > weights["fuzzy"]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests: reciprocal_rank_fusion
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestReciprocalRankFusion:
|
||||
"""Tests for reciprocal_rank_fusion()."""
|
||||
|
||||
def test_rrf_single_source(self):
|
||||
"""Single source RRF should produce ranked results."""
|
||||
results = {
|
||||
"exact": [
|
||||
_make_result(path="a.py", score=10.0),
|
||||
_make_result(path="b.py", score=5.0),
|
||||
]
|
||||
}
|
||||
fused = reciprocal_rank_fusion(results)
|
||||
assert len(fused) == 2
|
||||
# a.py should rank higher (rank 1)
|
||||
assert fused[0].path == "a.py"
|
||||
assert fused[0].score > fused[1].score
|
||||
|
||||
def test_rrf_multi_source(self):
|
||||
"""Multi-source RRF should combine rankings from multiple sources."""
|
||||
results = {
|
||||
"exact": [
|
||||
_make_result(path="a.py", score=10.0),
|
||||
_make_result(path="b.py", score=5.0),
|
||||
],
|
||||
"vector": [
|
||||
_make_result(path="b.py", score=0.9),
|
||||
_make_result(path="c.py", score=0.8),
|
||||
],
|
||||
}
|
||||
weights = {"exact": 0.5, "vector": 0.5}
|
||||
fused = reciprocal_rank_fusion(results, weights=weights)
|
||||
# b.py appears in both sources - should have highest fusion score
|
||||
assert len(fused) == 3
|
||||
assert fused[0].path == "b.py"
|
||||
assert fused[0].metadata["fusion_method"] == "rrf"
|
||||
|
||||
def test_rrf_empty_results(self):
|
||||
"""Empty results map should return empty list."""
|
||||
assert reciprocal_rank_fusion({}) == []
|
||||
|
||||
def test_rrf_weight_normalization(self):
|
||||
"""Weights not summing to 1.0 should be auto-normalized."""
|
||||
results = {
|
||||
"exact": [_make_result(path="a.py", score=10.0)],
|
||||
}
|
||||
weights = {"exact": 2.0} # Does not sum to 1.0
|
||||
fused = reciprocal_rank_fusion(results, weights=weights)
|
||||
assert len(fused) == 1
|
||||
# Result should still be valid after weight normalization
|
||||
assert fused[0].score > 0
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests: simple_weighted_fusion
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestSimpleWeightedFusion:
|
||||
"""Tests for simple_weighted_fusion()."""
|
||||
|
||||
def test_weighted_fusion_basic(self):
|
||||
"""Basic weighted fusion should combine scores."""
|
||||
results = {
|
||||
"exact": [_make_result(path="a.py", score=10.0)],
|
||||
"vector": [_make_result(path="a.py", score=0.8)],
|
||||
}
|
||||
weights = {"exact": 0.5, "vector": 0.5}
|
||||
fused = simple_weighted_fusion(results, weights=weights)
|
||||
assert len(fused) == 1
|
||||
assert fused[0].path == "a.py"
|
||||
assert fused[0].metadata["fusion_method"] == "simple_weighted"
|
||||
assert fused[0].score > 0
|
||||
|
||||
def test_weighted_fusion_empty(self):
|
||||
"""Empty input should return empty list."""
|
||||
assert simple_weighted_fusion({}) == []
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests: apply_symbol_boost
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestApplySymbolBoost:
|
||||
"""Tests for apply_symbol_boost()."""
|
||||
|
||||
def test_symbol_boost_applied(self):
|
||||
"""Results with symbol_name should get boosted by factor."""
|
||||
results = [
|
||||
_make_result(path="a.py", score=0.5, symbol_name="authenticate"),
|
||||
_make_result(path="b.py", score=0.6),
|
||||
]
|
||||
boosted = apply_symbol_boost(results, boost_factor=1.5)
|
||||
# a.py has symbol -> gets 1.5x boost -> 0.75
|
||||
a_result = next(r for r in boosted if r.path == "a.py")
|
||||
assert a_result.score == pytest.approx(0.75, abs=0.01)
|
||||
assert a_result.metadata.get("boosted") is True
|
||||
|
||||
def test_symbol_boost_no_match(self):
|
||||
"""Results without symbol_name should not be boosted."""
|
||||
results = [
|
||||
_make_result(path="a.py", score=0.5),
|
||||
]
|
||||
boosted = apply_symbol_boost(results, boost_factor=1.5)
|
||||
assert boosted[0].score == pytest.approx(0.5, abs=0.01)
|
||||
assert boosted[0].metadata.get("boosted") is not True
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests: filter_results_by_category
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestFilterResultsByCategory:
|
||||
"""Tests for filter_results_by_category()."""
|
||||
|
||||
def test_filter_keyword_code_only(self):
|
||||
"""KEYWORD intent should return only code files."""
|
||||
results = [
|
||||
_make_result(path="main.py", score=0.9),
|
||||
_make_result(path="README.md", score=0.8),
|
||||
_make_result(path="utils.ts", score=0.7),
|
||||
]
|
||||
filtered = filter_results_by_category(results, QueryIntent.KEYWORD)
|
||||
paths = [r.path for r in filtered]
|
||||
assert "README.md" not in paths
|
||||
assert "main.py" in paths
|
||||
assert "utils.ts" in paths
|
||||
|
||||
def test_filter_semantic_docs_first(self):
|
||||
"""SEMANTIC intent should put docs before code."""
|
||||
results = [
|
||||
_make_result(path="main.py", score=0.9),
|
||||
_make_result(path="README.md", score=0.8),
|
||||
]
|
||||
filtered = filter_results_by_category(results, QueryIntent.SEMANTIC, allow_mixed=True)
|
||||
# Docs should come first
|
||||
assert filtered[0].path == "README.md"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests: group_similar_results
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestGroupSimilarResults:
|
||||
"""Tests for group_similar_results()."""
|
||||
|
||||
def test_group_similar_results(self):
|
||||
"""Results with same excerpt and close scores should be grouped."""
|
||||
results = [
|
||||
_make_result(path="a.py", score=0.50, excerpt="def foo():"),
|
||||
_make_result(path="b.py", score=0.50, excerpt="def foo():"),
|
||||
_make_result(path="c.py", score=0.30, excerpt="def bar():"),
|
||||
]
|
||||
grouped = group_similar_results(results, score_threshold_abs=0.01)
|
||||
# a.py and b.py should be grouped (same excerpt, same score)
|
||||
assert len(grouped) == 2
|
||||
# Find the grouped result
|
||||
grouped_result = next(r for r in grouped if r.path == "a.py")
|
||||
assert len(grouped_result.additional_locations) == 1
|
||||
assert grouped_result.additional_locations[0].path == "b.py"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Tests: normalize_weights
|
||||
# =============================================================================
|
||||
|
||||
|
||||
class TestNormalizeWeights:
|
||||
"""Tests for normalize_weights()."""
|
||||
|
||||
def test_normalize_weights_zero_total(self):
|
||||
"""All-zero weights should be returned as-is (no division by zero)."""
|
||||
weights = {"exact": 0.0, "fuzzy": 0.0, "vector": 0.0}
|
||||
result = normalize_weights(weights)
|
||||
assert result == {"exact": 0.0, "fuzzy": 0.0, "vector": 0.0}
|
||||
Reference in New Issue
Block a user