"""Tests for Reciprocal Rank Fusion (RRF) algorithm (P2). Tests RRF fusion logic, score computation, weight handling, and result ranking. """ import pytest from codexlens.entities import SearchResult from codexlens.search.ranking import ( apply_symbol_boost, QueryIntent, detect_query_intent, normalize_bm25_score, reciprocal_rank_fusion, rerank_results, tag_search_source, ) class TestReciprocalRankFusion: """Tests for reciprocal_rank_fusion function.""" def test_single_source_ranking(self): """Test RRF with single source returns ranked results.""" results = [ SearchResult(path="a.py", score=10.0, excerpt="..."), SearchResult(path="b.py", score=8.0, excerpt="..."), SearchResult(path="c.py", score=6.0, excerpt="..."), ] results_map = {"exact": results} fused = reciprocal_rank_fusion(results_map) assert len(fused) == 3 # Order should be preserved (highest original score first) assert fused[0].path == "a.py" assert fused[1].path == "b.py" assert fused[2].path == "c.py" def test_two_sources_fusion(self): """Test RRF combines rankings from two sources.""" exact_results = [ SearchResult(path="a.py", score=10.0, excerpt="..."), SearchResult(path="b.py", score=8.0, excerpt="..."), SearchResult(path="c.py", score=6.0, excerpt="..."), ] fuzzy_results = [ SearchResult(path="b.py", score=9.0, excerpt="..."), SearchResult(path="c.py", score=7.0, excerpt="..."), SearchResult(path="d.py", score=5.0, excerpt="..."), ] results_map = {"exact": exact_results, "fuzzy": fuzzy_results} fused = reciprocal_rank_fusion(results_map) # Should have all unique paths paths = [r.path for r in fused] assert set(paths) == {"a.py", "b.py", "c.py", "d.py"} # Results appearing in both should rank higher # b.py and c.py appear in both sources assert fused[0].path in ["b.py", "c.py"], "Items in both sources should rank highest" def test_rrf_score_calculation(self): """Test RRF scores are calculated correctly with default k=60.""" # Simple scenario: single source results = [SearchResult(path="a.py", score=10.0, excerpt="...")] results_map = {"exact": results} fused = reciprocal_rank_fusion(results_map, k=60) # RRF score = weight / (k + rank) = 1.0 / (60 + 1) ≈ 0.0164 expected_score = 1.0 / 61 assert abs(fused[0].score - expected_score) < 0.001 def test_custom_weights(self): """Test custom weights affect RRF scores.""" results_a = [SearchResult(path="a.py", score=10.0, excerpt="...")] results_b = [SearchResult(path="a.py", score=10.0, excerpt="...")] results_map = {"exact": results_a, "fuzzy": results_b} # Higher weight for exact weights = {"exact": 0.7, "fuzzy": 0.3} fused = reciprocal_rank_fusion(results_map, weights=weights, k=60) # Score should be: 0.7/(60+1) + 0.3/(60+1) = 1.0/61 ≈ 0.0164 expected_score = (0.7 + 0.3) / 61 assert abs(fused[0].score - expected_score) < 0.001 def test_weight_normalization(self): """Test weights are normalized to sum to 1.0.""" results = [SearchResult(path="a.py", score=10.0, excerpt="...")] results_map = {"exact": results} # Weights not summing to 1.0 weights = {"exact": 2.0} # Will be normalized to 1.0 fused = reciprocal_rank_fusion(results_map, weights=weights) # Should work without error and produce normalized scores assert len(fused) == 1 assert fused[0].score > 0 def test_empty_results_map(self): """Test RRF with empty results returns empty list.""" fused = reciprocal_rank_fusion({}) assert fused == [] def test_zero_weight_source_ignored(self): """Test sources with zero weight are ignored.""" results_a = [SearchResult(path="a.py", score=10.0, excerpt="...")] results_b = [SearchResult(path="b.py", score=10.0, excerpt="...")] results_map = {"exact": results_a, "fuzzy": results_b} weights = {"exact": 1.0, "fuzzy": 0.0} # Ignore fuzzy fused = reciprocal_rank_fusion(results_map, weights=weights) # Should only have result from exact source assert len(fused) == 1 assert fused[0].path == "a.py" def test_fusion_score_in_metadata(self): """Test fusion score is stored in result metadata.""" results = [SearchResult(path="a.py", score=10.0, excerpt="...")] results_map = {"exact": results} fused = reciprocal_rank_fusion(results_map) # Check metadata assert "fusion_score" in fused[0].metadata assert "original_score" in fused[0].metadata assert fused[0].metadata["original_score"] == 10.0 def test_rank_order_matters(self): """Test rank position affects RRF score (lower rank = higher score).""" results = [ SearchResult(path="a.py", score=10.0, excerpt="..."), # rank 1 SearchResult(path="b.py", score=8.0, excerpt="..."), # rank 2 SearchResult(path="c.py", score=6.0, excerpt="..."), # rank 3 ] results_map = {"exact": results} fused = reciprocal_rank_fusion(results_map, k=60) # a.py (rank 1): score = 1/(60+1) ≈ 0.0164 # b.py (rank 2): score = 1/(60+2) ≈ 0.0161 # c.py (rank 3): score = 1/(60+3) ≈ 0.0159 assert fused[0].score > fused[1].score > fused[2].score class TestRRFSyntheticRankings: """Tests with synthetic rankings to verify RRF correctness.""" def test_perfect_agreement(self): """Test RRF when all sources rank items identically.""" # All sources rank a > b > c exact = [ SearchResult(path="a.py", score=10.0, excerpt="..."), SearchResult(path="b.py", score=8.0, excerpt="..."), SearchResult(path="c.py", score=6.0, excerpt="..."), ] fuzzy = [ SearchResult(path="a.py", score=9.0, excerpt="..."), SearchResult(path="b.py", score=7.0, excerpt="..."), SearchResult(path="c.py", score=5.0, excerpt="..."), ] results_map = {"exact": exact, "fuzzy": fuzzy} fused = reciprocal_rank_fusion(results_map) # Order should match both sources assert fused[0].path == "a.py" assert fused[1].path == "b.py" assert fused[2].path == "c.py" def test_complete_disagreement(self): """Test RRF when sources have opposite rankings.""" # exact: a > b > c # fuzzy: c > b > a exact = [ SearchResult(path="a.py", score=10.0, excerpt="..."), SearchResult(path="b.py", score=8.0, excerpt="..."), SearchResult(path="c.py", score=6.0, excerpt="..."), ] fuzzy = [ SearchResult(path="c.py", score=9.0, excerpt="..."), SearchResult(path="b.py", score=7.0, excerpt="..."), SearchResult(path="a.py", score=5.0, excerpt="..."), ] results_map = {"exact": exact, "fuzzy": fuzzy} fused = reciprocal_rank_fusion(results_map) # With opposite rankings, a.py and c.py get equal RRF scores: # a.py: 0.5/(60+1) + 0.5/(60+3) = 0.01613 # c.py: 0.5/(60+3) + 0.5/(60+1) = 0.01613 (same!) # b.py: 0.5/(60+2) + 0.5/(60+2) = 0.01613 (slightly lower due to rounding) # So top result should be a.py or c.py (tied) assert fused[0].path in ["a.py", "c.py"], "Items with symmetric ranks should tie for first" def test_partial_overlap(self): """Test RRF with partial overlap between sources.""" # exact: [A, B, C] # fuzzy: [B, C, D] exact = [ SearchResult(path="A", score=10.0, excerpt="..."), SearchResult(path="B", score=8.0, excerpt="..."), SearchResult(path="C", score=6.0, excerpt="..."), ] fuzzy = [ SearchResult(path="B", score=9.0, excerpt="..."), SearchResult(path="C", score=7.0, excerpt="..."), SearchResult(path="D", score=5.0, excerpt="..."), ] results_map = {"exact": exact, "fuzzy": fuzzy} fused = reciprocal_rank_fusion(results_map) # B and C appear in both, should rank higher than A and D paths = [r.path for r in fused] b_idx = paths.index("B") c_idx = paths.index("C") a_idx = paths.index("A") d_idx = paths.index("D") assert b_idx < a_idx, "B (in both) should outrank A (in one)" assert c_idx < d_idx, "C (in both) should outrank D (in one)" def test_three_sources(self): """Test RRF with three sources (exact, fuzzy, vector).""" exact = [SearchResult(path="a.py", score=10.0, excerpt="...")] fuzzy = [SearchResult(path="b.py", score=9.0, excerpt="...")] vector = [SearchResult(path="c.py", score=8.0, excerpt="...")] results_map = {"exact": exact, "fuzzy": fuzzy, "vector": vector} weights = {"exact": 0.3, "fuzzy": 0.1, "vector": 0.6} fused = reciprocal_rank_fusion(results_map, weights=weights) assert len(fused) == 3 # Each appears in one source only, so scores differ by weights # c.py: 0.6/61 ≈ 0.0098 (vector, highest weight) # a.py: 0.3/61 ≈ 0.0049 (exact) # b.py: 0.1/61 ≈ 0.0016 (fuzzy) assert fused[0].path == "c.py", "Vector (higher weight) should rank first" class TestNormalizeBM25Score: """Tests for normalize_bm25_score function.""" def test_negative_bm25_normalization(self): """Test BM25 scores (negative) are normalized to 0-1 range.""" # SQLite FTS5 returns negative BM25 scores scores = [-20.0, -10.0, -5.0, -1.0, 0.0] for score in scores: normalized = normalize_bm25_score(score) assert 0.0 <= normalized <= 1.0, f"Normalized score {normalized} out of range" def test_better_match_higher_score(self): """Test more negative BM25 (better match) gives higher normalized score.""" good_match = -15.0 weak_match = -2.0 norm_good = normalize_bm25_score(good_match) norm_weak = normalize_bm25_score(weak_match) assert norm_good > norm_weak, "Better match should have higher normalized score" def test_zero_score(self): """Test zero BM25 score normalization.""" normalized = normalize_bm25_score(0.0) assert 0.0 <= normalized <= 1.0 def test_positive_score_handling(self): """Test positive scores (edge case) are handled.""" normalized = normalize_bm25_score(5.0) # Should still be in valid range assert 0.0 <= normalized <= 1.0 class TestTagSearchSource: """Tests for tag_search_source function.""" def test_tagging_adds_source_metadata(self): """Test tagging adds search_source to metadata.""" results = [ SearchResult(path="a.py", score=10.0, excerpt="..."), SearchResult(path="b.py", score=8.0, excerpt="..."), ] tagged = tag_search_source(results, "exact") for result in tagged: assert "search_source" in result.metadata assert result.metadata["search_source"] == "exact" def test_tagging_preserves_existing_metadata(self): """Test tagging preserves existing metadata fields.""" results = [ SearchResult( path="a.py", score=10.0, excerpt="...", metadata={"custom_field": "value"} ), ] tagged = tag_search_source(results, "fuzzy") assert "custom_field" in tagged[0].metadata assert tagged[0].metadata["custom_field"] == "value" assert "search_source" in tagged[0].metadata assert tagged[0].metadata["search_source"] == "fuzzy" def test_tagging_empty_list(self): """Test tagging empty list returns empty list.""" tagged = tag_search_source([], "exact") assert tagged == [] def test_tagging_preserves_result_fields(self): """Test tagging preserves all SearchResult fields.""" results = [ SearchResult( path="a.py", score=10.0, excerpt="test excerpt", content="full content", start_line=10, end_line=20, symbol_name="test_func", symbol_kind="function" ), ] tagged = tag_search_source(results, "exact") assert tagged[0].path == "a.py" assert tagged[0].score == 10.0 assert tagged[0].excerpt == "test excerpt" assert tagged[0].content == "full content" assert tagged[0].start_line == 10 assert tagged[0].end_line == 20 assert tagged[0].symbol_name == "test_func" assert tagged[0].symbol_kind == "function" class TestSymbolBoost: """Tests for apply_symbol_boost function.""" def test_symbol_boost(self): results = [ SearchResult(path="a.py", score=0.2, excerpt="...", symbol_name="foo"), SearchResult(path="b.py", score=0.21, excerpt="..."), ] boosted = apply_symbol_boost(results, boost_factor=1.5) assert boosted[0].path == "a.py" assert boosted[0].score == pytest.approx(0.2 * 1.5) assert boosted[0].metadata["boosted"] is True assert boosted[0].metadata["original_fusion_score"] == pytest.approx(0.2) assert boosted[1].path == "b.py" assert boosted[1].score == pytest.approx(0.21) assert "boosted" not in boosted[1].metadata class TestEmbeddingReranking: """Tests for rerank_results embedding-based similarity.""" def test_rerank_embedding_similarity(self): class DummyEmbedder: def embed(self, texts): if isinstance(texts, str): texts = [texts] mapping = { "query": [1.0, 0.0], "doc1": [1.0, 0.0], "doc2": [0.0, 1.0], } return [mapping[t] for t in texts] results = [ SearchResult(path="a.py", score=0.2, excerpt="doc1"), SearchResult(path="b.py", score=0.9, excerpt="doc2"), ] reranked = rerank_results("query", results, DummyEmbedder(), top_k=2) assert reranked[0].path == "a.py" assert reranked[0].metadata["reranked"] is True assert reranked[0].metadata["rrf_score"] == pytest.approx(0.2) assert reranked[0].metadata["cosine_similarity"] == pytest.approx(1.0) assert reranked[0].score == pytest.approx(0.5 * 0.2 + 0.5 * 1.0) assert reranked[1].path == "b.py" assert reranked[1].metadata["reranked"] is True assert reranked[1].metadata["rrf_score"] == pytest.approx(0.9) assert reranked[1].metadata["cosine_similarity"] == pytest.approx(0.0) assert reranked[1].score == pytest.approx(0.5 * 0.9 + 0.5 * 0.0) @pytest.mark.parametrize("k_value", [30, 60, 100]) class TestRRFParameterized: """Parameterized tests for RRF with different k values.""" def test_k_value_affects_scores(self, k_value): """Test k parameter affects RRF score magnitude.""" results = [SearchResult(path="a.py", score=10.0, excerpt="...")] results_map = {"exact": results} fused = reciprocal_rank_fusion(results_map, k=k_value) # Score should be 1.0 / (k + 1) expected = 1.0 / (k_value + 1) assert abs(fused[0].score - expected) < 0.001 class TestRRFEdgeCases: """Edge case tests for RRF.""" def test_duplicate_paths_in_same_source(self): """Test handling of duplicate paths in single source.""" results = [ SearchResult(path="a.py", score=10.0, excerpt="..."), SearchResult(path="a.py", score=8.0, excerpt="..."), # Duplicate ] results_map = {"exact": results} fused = reciprocal_rank_fusion(results_map) # Should deduplicate (first occurrence wins) assert len(fused) == 1 assert fused[0].path == "a.py" def test_very_large_result_lists(self): """Test RRF handles large result sets efficiently.""" # Create 1000 results results = [ SearchResult(path=f"file{i}.py", score=1000-i, excerpt="...") for i in range(1000) ] results_map = {"exact": results} fused = reciprocal_rank_fusion(results_map) assert len(fused) == 1000 # Should maintain ranking assert fused[0].path == "file0.py" assert fused[-1].path == "file999.py" def test_all_same_score(self): """Test RRF when all results have same original score.""" results = [ SearchResult(path="a.py", score=10.0, excerpt="..."), SearchResult(path="b.py", score=10.0, excerpt="..."), SearchResult(path="c.py", score=10.0, excerpt="..."), ] results_map = {"exact": results} fused = reciprocal_rank_fusion(results_map) # Should still rank by position (rank matters) assert len(fused) == 3 assert fused[0].score > fused[1].score > fused[2].score def test_missing_weight_for_source(self): """Test missing weight for source uses default.""" results = [SearchResult(path="a.py", score=10.0, excerpt="...")] results_map = {"exact": results, "fuzzy": results} # Only provide weight for exact weights = {"exact": 1.0} fused = reciprocal_rank_fusion(results_map, weights=weights) # Should work with normalization assert len(fused) == 1 # Deduplicated assert fused[0].score > 0 class TestSymbolBoostAndIntentV1: """Tests for symbol boosting and query intent detection (v1.0).""" def test_symbol_boost_application(self): """Results with symbol_name receive a multiplicative boost (default 1.5x).""" results = [ SearchResult(path="a.py", score=0.4, excerpt="...", symbol_name="AuthManager"), SearchResult(path="b.py", score=0.41, excerpt="..."), ] boosted = apply_symbol_boost(results, boost_factor=1.5) assert boosted[0].score == pytest.approx(0.4 * 1.5) assert boosted[0].metadata["boosted"] is True assert boosted[0].metadata["original_fusion_score"] == pytest.approx(0.4) assert boosted[1].score == pytest.approx(0.41) assert "boosted" not in boosted[1].metadata @pytest.mark.parametrize( ("query", "expected"), [ ("def authenticate", QueryIntent.KEYWORD), ("MyClass", QueryIntent.KEYWORD), ("user_id", QueryIntent.KEYWORD), ("UserService::authenticate", QueryIntent.KEYWORD), ("ptr->next", QueryIntent.KEYWORD), ("how to handle user login", QueryIntent.SEMANTIC), ("what is authentication?", QueryIntent.SEMANTIC), ("where is this used?", QueryIntent.SEMANTIC), ("why does FooBar crash?", QueryIntent.MIXED), ("how to use user_id in query", QueryIntent.MIXED), ], ) def test_query_intent_detection(self, query, expected): """Detect intent for representative queries (Python/TypeScript parity).""" assert detect_query_intent(query) == expected