"""Tests for Reciprocal Rank Fusion (RRF) algorithm (P2).

Tests RRF fusion logic, score computation, weight handling, and result ranking.
"""

import pytest

from codexlens.entities import SearchResult
from codexlens.search.ranking import (
    apply_symbol_boost,
    QueryIntent,
    detect_query_intent,
    normalize_bm25_score,
    reciprocal_rank_fusion,
    rerank_results,
    tag_search_source,
)


class TestReciprocalRankFusion:
    """Tests for reciprocal_rank_fusion function."""

    def test_single_source_ranking(self):
        """Test RRF with single source returns ranked results."""
        results = [
            SearchResult(path="a.py", score=10.0, excerpt="..."),
            SearchResult(path="b.py", score=8.0, excerpt="..."),
            SearchResult(path="c.py", score=6.0, excerpt="..."),
        ]
        results_map = {"exact": results}

        fused = reciprocal_rank_fusion(results_map)

        assert len(fused) == 3
        # Order should be preserved (highest original score first)
        assert fused[0].path == "a.py"
        assert fused[1].path == "b.py"
        assert fused[2].path == "c.py"

    def test_two_sources_fusion(self):
        """Test RRF combines rankings from two sources."""
        exact_results = [
            SearchResult(path="a.py", score=10.0, excerpt="..."),
            SearchResult(path="b.py", score=8.0, excerpt="..."),
            SearchResult(path="c.py", score=6.0, excerpt="..."),
        ]
        fuzzy_results = [
            SearchResult(path="b.py", score=9.0, excerpt="..."),
            SearchResult(path="c.py", score=7.0, excerpt="..."),
            SearchResult(path="d.py", score=5.0, excerpt="..."),
        ]
        results_map = {"exact": exact_results, "fuzzy": fuzzy_results}

        fused = reciprocal_rank_fusion(results_map)

        # Should have all unique paths
        paths = [r.path for r in fused]
        assert set(paths) == {"a.py", "b.py", "c.py", "d.py"}

        # Results appearing in both should rank higher
        # b.py and c.py appear in both sources
        assert fused[0].path in ["b.py", "c.py"], "Items in both sources should rank highest"

    def test_rrf_score_calculation(self):
        """Test RRF scores are calculated correctly with default k=60."""
        # Simple scenario: single source
        results = [SearchResult(path="a.py", score=10.0, excerpt="...")]
        results_map = {"exact": results}

        fused = reciprocal_rank_fusion(results_map, k=60)

        # RRF score = weight / (k + rank) = 1.0 / (60 + 1) ≈ 0.0164
        expected_score = 1.0 / 61
        assert abs(fused[0].score - expected_score) < 0.001

    def test_custom_weights(self):
        """Test custom weights affect RRF scores."""
        results_a = [SearchResult(path="a.py", score=10.0, excerpt="...")]
        results_b = [SearchResult(path="a.py", score=10.0, excerpt="...")]

        results_map = {"exact": results_a, "fuzzy": results_b}

        # Higher weight for exact
        weights = {"exact": 0.7, "fuzzy": 0.3}
        fused = reciprocal_rank_fusion(results_map, weights=weights, k=60)

        # Score should be: 0.7/(60+1) + 0.3/(60+1) = 1.0/61 ≈ 0.0164
        expected_score = (0.7 + 0.3) / 61
        assert abs(fused[0].score - expected_score) < 0.001

    def test_weight_normalization(self):
        """Test weights are normalized to sum to 1.0."""
        results = [SearchResult(path="a.py", score=10.0, excerpt="...")]
        results_map = {"exact": results}

        # Weights not summing to 1.0
        weights = {"exact": 2.0}  # Will be normalized to 1.0
        fused = reciprocal_rank_fusion(results_map, weights=weights)

        # Should work without error and produce normalized scores
        assert len(fused) == 1
        assert fused[0].score > 0

    def test_empty_results_map(self):
        """Test RRF with empty results returns empty list."""
        fused = reciprocal_rank_fusion({})
        assert fused == []

    def test_zero_weight_source_ignored(self):
        """Test sources with zero weight are ignored."""
        results_a = [SearchResult(path="a.py", score=10.0, excerpt="...")]
        results_b = [SearchResult(path="b.py", score=10.0, excerpt="...")]

        results_map = {"exact": results_a, "fuzzy": results_b}
        weights = {"exact": 1.0, "fuzzy": 0.0}  # Ignore fuzzy

        fused = reciprocal_rank_fusion(results_map, weights=weights)

        # Should only have result from exact source
        assert len(fused) == 1
        assert fused[0].path == "a.py"

    def test_fusion_score_in_metadata(self):
        """Test fusion score is stored in result metadata."""
        results = [SearchResult(path="a.py", score=10.0, excerpt="...")]
        results_map = {"exact": results}

        fused = reciprocal_rank_fusion(results_map)

        # Check metadata
        assert "fusion_score" in fused[0].metadata
        assert "original_score" in fused[0].metadata
        assert fused[0].metadata["original_score"] == 10.0

    def test_rank_order_matters(self):
        """Test rank position affects RRF score (lower rank = higher score)."""
        results = [
            SearchResult(path="a.py", score=10.0, excerpt="..."),  # rank 1
            SearchResult(path="b.py", score=8.0, excerpt="..."),   # rank 2
            SearchResult(path="c.py", score=6.0, excerpt="..."),   # rank 3
        ]
        results_map = {"exact": results}

        fused = reciprocal_rank_fusion(results_map, k=60)

        # a.py (rank 1): score = 1/(60+1) ≈ 0.0164
        # b.py (rank 2): score = 1/(60+2) ≈ 0.0161
        # c.py (rank 3): score = 1/(60+3) ≈ 0.0159
        assert fused[0].score > fused[1].score > fused[2].score


class TestRRFSyntheticRankings:
    """Tests with synthetic rankings to verify RRF correctness."""

    def test_perfect_agreement(self):
        """Test RRF when all sources rank items identically."""
        # All sources rank a > b > c
        exact = [
            SearchResult(path="a.py", score=10.0, excerpt="..."),
            SearchResult(path="b.py", score=8.0, excerpt="..."),
            SearchResult(path="c.py", score=6.0, excerpt="..."),
        ]
        fuzzy = [
            SearchResult(path="a.py", score=9.0, excerpt="..."),
            SearchResult(path="b.py", score=7.0, excerpt="..."),
            SearchResult(path="c.py", score=5.0, excerpt="..."),
        ]

        results_map = {"exact": exact, "fuzzy": fuzzy}
        fused = reciprocal_rank_fusion(results_map)

        # Order should match both sources
        assert fused[0].path == "a.py"
        assert fused[1].path == "b.py"
        assert fused[2].path == "c.py"

    def test_complete_disagreement(self):
        """Test RRF when sources have opposite rankings."""
        # exact: a > b > c
        # fuzzy: c > b > a
        exact = [
            SearchResult(path="a.py", score=10.0, excerpt="..."),
            SearchResult(path="b.py", score=8.0, excerpt="..."),
            SearchResult(path="c.py", score=6.0, excerpt="..."),
        ]
        fuzzy = [
            SearchResult(path="c.py", score=9.0, excerpt="..."),
            SearchResult(path="b.py", score=7.0, excerpt="..."),
            SearchResult(path="a.py", score=5.0, excerpt="..."),
        ]

        results_map = {"exact": exact, "fuzzy": fuzzy}
        fused = reciprocal_rank_fusion(results_map)

        # With opposite rankings, a.py and c.py get equal RRF scores:
        # a.py: 0.5/(60+1) + 0.5/(60+3) = 0.01613
        # c.py: 0.5/(60+3) + 0.5/(60+1) = 0.01613 (same!)
        # b.py: 0.5/(60+2) + 0.5/(60+2) = 0.01613 (slightly lower due to rounding)
        # So top result should be a.py or c.py (tied)
        assert fused[0].path in ["a.py", "c.py"], "Items with symmetric ranks should tie for first"

    def test_partial_overlap(self):
        """Test RRF with partial overlap between sources."""
        # exact: [A, B, C]
        # fuzzy: [B, C, D]
        exact = [
            SearchResult(path="A", score=10.0, excerpt="..."),
            SearchResult(path="B", score=8.0, excerpt="..."),
            SearchResult(path="C", score=6.0, excerpt="..."),
        ]
        fuzzy = [
            SearchResult(path="B", score=9.0, excerpt="..."),
            SearchResult(path="C", score=7.0, excerpt="..."),
            SearchResult(path="D", score=5.0, excerpt="..."),
        ]

        results_map = {"exact": exact, "fuzzy": fuzzy}
        fused = reciprocal_rank_fusion(results_map)

        # B and C appear in both, should rank higher than A and D
        paths = [r.path for r in fused]
        b_idx = paths.index("B")
        c_idx = paths.index("C")
        a_idx = paths.index("A")
        d_idx = paths.index("D")

        assert b_idx < a_idx, "B (in both) should outrank A (in one)"
        assert c_idx < d_idx, "C (in both) should outrank D (in one)"

    def test_three_sources(self):
        """Test RRF with three sources (exact, fuzzy, vector)."""
        exact = [SearchResult(path="a.py", score=10.0, excerpt="...")]
        fuzzy = [SearchResult(path="b.py", score=9.0, excerpt="...")]
        vector = [SearchResult(path="c.py", score=8.0, excerpt="...")]

        results_map = {"exact": exact, "fuzzy": fuzzy, "vector": vector}
        weights = {"exact": 0.3, "fuzzy": 0.1, "vector": 0.6}

        fused = reciprocal_rank_fusion(results_map, weights=weights)

        assert len(fused) == 3
        # Each appears in one source only, so scores differ by weights
        # c.py: 0.6/61 ≈ 0.0098 (vector, highest weight)
        # a.py: 0.3/61 ≈ 0.0049 (exact)
        # b.py: 0.1/61 ≈ 0.0016 (fuzzy)
        assert fused[0].path == "c.py", "Vector (higher weight) should rank first"


class TestNormalizeBM25Score:
    """Tests for normalize_bm25_score function."""

    def test_negative_bm25_normalization(self):
        """Test BM25 scores (negative) are normalized to 0-1 range."""
        # SQLite FTS5 returns negative BM25 scores
        scores = [-20.0, -10.0, -5.0, -1.0, 0.0]

        for score in scores:
            normalized = normalize_bm25_score(score)
            assert 0.0 <= normalized <= 1.0, f"Normalized score {normalized} out of range"

    def test_better_match_higher_score(self):
        """Test more negative BM25 (better match) gives higher normalized score."""
        good_match = -15.0
        weak_match = -2.0

        norm_good = normalize_bm25_score(good_match)
        norm_weak = normalize_bm25_score(weak_match)

        assert norm_good > norm_weak, "Better match should have higher normalized score"

    def test_zero_score(self):
        """Test zero BM25 score normalization."""
        normalized = normalize_bm25_score(0.0)
        assert 0.0 <= normalized <= 1.0

    def test_positive_score_handling(self):
        """Test positive scores (edge case) are handled."""
        normalized = normalize_bm25_score(5.0)
        # Should still be in valid range
        assert 0.0 <= normalized <= 1.0


class TestTagSearchSource:
    """Tests for tag_search_source function."""

    def test_tagging_adds_source_metadata(self):
        """Test tagging adds search_source to metadata."""
        results = [
            SearchResult(path="a.py", score=10.0, excerpt="..."),
            SearchResult(path="b.py", score=8.0, excerpt="..."),
        ]

        tagged = tag_search_source(results, "exact")

        for result in tagged:
            assert "search_source" in result.metadata
            assert result.metadata["search_source"] == "exact"

    def test_tagging_preserves_existing_metadata(self):
        """Test tagging preserves existing metadata fields."""
        results = [
            SearchResult(
                path="a.py",
                score=10.0,
                excerpt="...",
                metadata={"custom_field": "value"}
            ),
        ]

        tagged = tag_search_source(results, "fuzzy")

        assert "custom_field" in tagged[0].metadata
        assert tagged[0].metadata["custom_field"] == "value"
        assert "search_source" in tagged[0].metadata
        assert tagged[0].metadata["search_source"] == "fuzzy"

    def test_tagging_empty_list(self):
        """Test tagging empty list returns empty list."""
        tagged = tag_search_source([], "exact")
        assert tagged == []

    def test_tagging_preserves_result_fields(self):
        """Test tagging preserves all SearchResult fields."""
        results = [
            SearchResult(
                path="a.py",
                score=10.0,
                excerpt="test excerpt",
                content="full content",
                start_line=10,
                end_line=20,
                symbol_name="test_func",
                symbol_kind="function"
            ),
        ]

        tagged = tag_search_source(results, "exact")

        assert tagged[0].path == "a.py"
        assert tagged[0].score == 10.0
        assert tagged[0].excerpt == "test excerpt"
        assert tagged[0].content == "full content"
        assert tagged[0].start_line == 10
        assert tagged[0].end_line == 20
        assert tagged[0].symbol_name == "test_func"
        assert tagged[0].symbol_kind == "function"


class TestSymbolBoost:
    """Tests for apply_symbol_boost function."""

    def test_symbol_boost(self):
        results = [
            SearchResult(path="a.py", score=0.2, excerpt="...", symbol_name="foo"),
            SearchResult(path="b.py", score=0.21, excerpt="..."),
        ]

        boosted = apply_symbol_boost(results, boost_factor=1.5)

        assert boosted[0].path == "a.py"
        assert boosted[0].score == pytest.approx(0.2 * 1.5)
        assert boosted[0].metadata["boosted"] is True
        assert boosted[0].metadata["original_fusion_score"] == pytest.approx(0.2)

        assert boosted[1].path == "b.py"
        assert boosted[1].score == pytest.approx(0.21)
        assert "boosted" not in boosted[1].metadata


class TestEmbeddingReranking:
    """Tests for rerank_results embedding-based similarity."""

    def test_rerank_embedding_similarity(self):
        class DummyEmbedder:
            def embed(self, texts):
                if isinstance(texts, str):
                    texts = [texts]
                mapping = {
                    "query": [1.0, 0.0],
                    "doc1": [1.0, 0.0],
                    "doc2": [0.0, 1.0],
                }
                return [mapping[t] for t in texts]

        results = [
            SearchResult(path="a.py", score=0.2, excerpt="doc1"),
            SearchResult(path="b.py", score=0.9, excerpt="doc2"),
        ]

        reranked = rerank_results("query", results, DummyEmbedder(), top_k=2)

        assert reranked[0].path == "a.py"
        assert reranked[0].metadata["reranked"] is True
        assert reranked[0].metadata["rrf_score"] == pytest.approx(0.2)
        assert reranked[0].metadata["cosine_similarity"] == pytest.approx(1.0)
        assert reranked[0].score == pytest.approx(0.5 * 0.2 + 0.5 * 1.0)

        assert reranked[1].path == "b.py"
        assert reranked[1].metadata["reranked"] is True
        assert reranked[1].metadata["rrf_score"] == pytest.approx(0.9)
        assert reranked[1].metadata["cosine_similarity"] == pytest.approx(0.0)
        assert reranked[1].score == pytest.approx(0.5 * 0.9 + 0.5 * 0.0)


@pytest.mark.parametrize("k_value", [30, 60, 100])
class TestRRFParameterized:
    """Parameterized tests for RRF with different k values."""

    def test_k_value_affects_scores(self, k_value):
        """Test k parameter affects RRF score magnitude."""
        results = [SearchResult(path="a.py", score=10.0, excerpt="...")]
        results_map = {"exact": results}

        fused = reciprocal_rank_fusion(results_map, k=k_value)

        # Score should be 1.0 / (k + 1)
        expected = 1.0 / (k_value + 1)
        assert abs(fused[0].score - expected) < 0.001


class TestRRFEdgeCases:
    """Edge case tests for RRF."""

    def test_duplicate_paths_in_same_source(self):
        """Test handling of duplicate paths in single source."""
        results = [
            SearchResult(path="a.py", score=10.0, excerpt="..."),
            SearchResult(path="a.py", score=8.0, excerpt="..."),  # Duplicate
        ]
        results_map = {"exact": results}

        fused = reciprocal_rank_fusion(results_map)

        # Should deduplicate (first occurrence wins)
        assert len(fused) == 1
        assert fused[0].path == "a.py"

    def test_very_large_result_lists(self):
        """Test RRF handles large result sets efficiently."""
        # Create 1000 results
        results = [
            SearchResult(path=f"file{i}.py", score=1000-i, excerpt="...")
            for i in range(1000)
        ]
        results_map = {"exact": results}

        fused = reciprocal_rank_fusion(results_map)

        assert len(fused) == 1000
        # Should maintain ranking
        assert fused[0].path == "file0.py"
        assert fused[-1].path == "file999.py"

    def test_all_same_score(self):
        """Test RRF when all results have same original score."""
        results = [
            SearchResult(path="a.py", score=10.0, excerpt="..."),
            SearchResult(path="b.py", score=10.0, excerpt="..."),
            SearchResult(path="c.py", score=10.0, excerpt="..."),
        ]
        results_map = {"exact": results}

        fused = reciprocal_rank_fusion(results_map)

        # Should still rank by position (rank matters)
        assert len(fused) == 3
        assert fused[0].score > fused[1].score > fused[2].score

    def test_missing_weight_for_source(self):
        """Test missing weight for source uses default."""
        results = [SearchResult(path="a.py", score=10.0, excerpt="...")]
        results_map = {"exact": results, "fuzzy": results}

        # Only provide weight for exact
        weights = {"exact": 1.0}

        fused = reciprocal_rank_fusion(results_map, weights=weights)

        # Should work with normalization
        assert len(fused) == 1  # Deduplicated
        assert fused[0].score > 0


class TestSymbolBoostAndIntentV1:
    """Tests for symbol boosting and query intent detection (v1.0)."""

    def test_symbol_boost_application(self):
        """Results with symbol_name receive a multiplicative boost (default 1.5x)."""
        results = [
            SearchResult(path="a.py", score=0.4, excerpt="...", symbol_name="AuthManager"),
            SearchResult(path="b.py", score=0.41, excerpt="..."),
        ]

        boosted = apply_symbol_boost(results, boost_factor=1.5)

        assert boosted[0].score == pytest.approx(0.4 * 1.5)
        assert boosted[0].metadata["boosted"] is True
        assert boosted[0].metadata["original_fusion_score"] == pytest.approx(0.4)
        assert boosted[1].score == pytest.approx(0.41)
        assert "boosted" not in boosted[1].metadata

    @pytest.mark.parametrize(
        ("query", "expected"),
        [
            ("def authenticate", QueryIntent.KEYWORD),
            ("MyClass", QueryIntent.KEYWORD),
            ("user_id", QueryIntent.KEYWORD),
            ("UserService::authenticate", QueryIntent.KEYWORD),
            ("ptr->next", QueryIntent.KEYWORD),
            ("how to handle user login", QueryIntent.SEMANTIC),
            ("what is authentication?", QueryIntent.SEMANTIC),
            ("where is this used?", QueryIntent.SEMANTIC),
            ("why does FooBar crash?", QueryIntent.MIXED),
            ("how to use user_id in query", QueryIntent.MIXED),
        ],
    )
    def test_query_intent_detection(self, query, expected):
        """Detect intent for representative queries (Python/TypeScript parity)."""
        assert detect_query_intent(query) == expected