Claude-Code-Workflow/codex-lens/tests/test_result_grouping.py

"""Multi-level tests for search result grouping functionality.

Tests cover:
1. Unit tests for group_similar_results function
2. Boundary condition tests
3. Integration tests with SearchOptions
4. Performance/stress tests
"""

import pytest
from typing import List

from codexlens.entities import SearchResult, AdditionalLocation
from codexlens.search.ranking import group_similar_results
from codexlens.search.chain_search import SearchOptions


# =============================================================================
# Test Fixtures
# =============================================================================

@pytest.fixture
def sample_results() -> List[SearchResult]:
    """Create sample search results for testing."""
    return [
        SearchResult(path="a.py", score=0.5, excerpt="def foo(): pass", start_line=10, symbol_name="foo"),
        SearchResult(path="b.py", score=0.5, excerpt="def foo(): pass", start_line=20, symbol_name="foo"),
        SearchResult(path="c.py", score=0.49, excerpt="def foo(): pass", start_line=30, symbol_name="foo"),
        SearchResult(path="d.py", score=0.3, excerpt="def bar(): pass", start_line=40, symbol_name="bar"),
    ]


@pytest.fixture
def results_with_different_excerpts() -> List[SearchResult]:
    """Results with same scores but different content."""
    return [
        SearchResult(path="a.py", score=0.5, excerpt="def foo(): pass"),
        SearchResult(path="b.py", score=0.5, excerpt="def bar(): pass"),
        SearchResult(path="c.py", score=0.5, excerpt="def baz(): pass"),
    ]


@pytest.fixture
def results_with_same_excerpt_different_scores() -> List[SearchResult]:
    """Results with same content but very different scores."""
    return [
        SearchResult(path="a.py", score=0.9, excerpt="def foo(): pass"),
        SearchResult(path="b.py", score=0.5, excerpt="def foo(): pass"),
        SearchResult(path="c.py", score=0.1, excerpt="def foo(): pass"),
    ]


# =============================================================================
# Level 1: Unit Tests - Basic Functionality
# =============================================================================

class TestGroupSimilarResultsBasic:
    """Basic unit tests for group_similar_results function."""

    def test_empty_results_returns_empty(self):
        """Empty input should return empty output."""
        result = group_similar_results([])
        assert result == []

    def test_single_result_returns_unchanged(self):
        """Single result should be returned as-is."""
        single = SearchResult(path="test.py", score=0.5, excerpt="code")
        result = group_similar_results([single])

        assert len(result) == 1
        assert result[0].path == "test.py"
        assert result[0].additional_locations == []

    def test_groups_identical_excerpt_similar_score(self, sample_results):
        """Results with same excerpt and similar scores should be grouped."""
        grouped = group_similar_results(sample_results, score_threshold_abs=0.02)

        # Should have 2 groups: foo group (a, b, c) and bar (d)
        assert len(grouped) == 2

        # First group should have additional locations
        foo_group = next(r for r in grouped if r.excerpt == "def foo(): pass")
        assert len(foo_group.additional_locations) == 2

        # Second group (bar) should have no additional locations
        bar_group = next(r for r in grouped if r.excerpt == "def bar(): pass")
        assert len(bar_group.additional_locations) == 0

    def test_preserves_highest_score_as_representative(self, sample_results):
        """Representative result should have the highest score in group."""
        grouped = group_similar_results(sample_results, score_threshold_abs=0.02)

        foo_group = next(r for r in grouped if r.excerpt == "def foo(): pass")
        # a.py has score 0.5, which is highest
        assert foo_group.path == "a.py"
        assert foo_group.score == 0.5

    def test_additional_locations_contain_correct_info(self, sample_results):
        """Additional locations should contain correct path, score, line info."""
        grouped = group_similar_results(sample_results, score_threshold_abs=0.02)

        foo_group = next(r for r in grouped if r.excerpt == "def foo(): pass")
        locations = foo_group.additional_locations

        paths = {loc.path for loc in locations}
        assert "b.py" in paths
        assert "c.py" in paths

        # Check that start_line is preserved
        for loc in locations:
            if loc.path == "b.py":
                assert loc.start_line == 20
            elif loc.path == "c.py":
                assert loc.start_line == 30


# =============================================================================
# Level 2: Boundary Condition Tests
# =============================================================================

class TestGroupSimilarResultsBoundary:
    """Boundary condition tests for edge cases."""

    def test_threshold_zero_no_grouping(self):
        """With threshold=0, only exactly equal scores should group."""
        results = [
            SearchResult(path="a.py", score=0.5, excerpt="def foo()"),
            SearchResult(path="b.py", score=0.5, excerpt="def foo()"),
            SearchResult(path="c.py", score=0.50001, excerpt="def foo()"),  # Slightly different
        ]

        grouped = group_similar_results(results, score_threshold_abs=0.0)

        # a and b should group (exact same score), c should be separate
        assert len(grouped) == 2

        main_group = next(r for r in grouped if len(r.additional_locations) > 0)
        assert len(main_group.additional_locations) == 1

    def test_threshold_exact_boundary(self):
        """Test behavior at exact threshold boundary.

        Note: Due to floating-point precision, 0.5 - 0.49 = 0.010000000000000009
        which is slightly > 0.01, so they won't group with threshold=0.01.
        Use a slightly larger threshold to account for floating-point precision.
        """
        results = [
            SearchResult(path="a.py", score=0.5, excerpt="def foo()"),
            SearchResult(path="b.py", score=0.49, excerpt="def foo()"),  # 0.01 diff (floating-point)
            SearchResult(path="c.py", score=0.48, excerpt="def foo()"),  # 0.02 diff from a
        ]

        # With threshold 0.011 (slightly above floating-point 0.01), a and b should group
        grouped = group_similar_results(results, score_threshold_abs=0.011)

        # a groups with b, c is separate (0.02 from a, 0.01 from b)
        # After a+b group, c is compared with remaining and forms its own group
        assert len(grouped) == 2

        # Verify a is representative (highest score)
        main_group = next(r for r in grouped if r.score == 0.5)
        assert main_group.path == "a.py"
        assert len(main_group.additional_locations) == 1
        assert main_group.additional_locations[0].path == "b.py"

    def test_large_threshold_groups_all(self):
        """Very large threshold should group all same-content results."""
        results = [
            SearchResult(path="a.py", score=0.9, excerpt="def foo()"),
            SearchResult(path="b.py", score=0.1, excerpt="def foo()"),
        ]

        grouped = group_similar_results(results, score_threshold_abs=1.0)

        assert len(grouped) == 1
        assert len(grouped[0].additional_locations) == 1

    def test_none_excerpt_not_grouped(self):
        """Results with None excerpt should not be grouped."""
        results = [
            SearchResult(path="a.py", score=0.5, excerpt=None),
            SearchResult(path="b.py", score=0.5, excerpt=None),
        ]

        grouped = group_similar_results(results)

        # None excerpts can't be grouped by content
        assert len(grouped) == 2
        for r in grouped:
            assert len(r.additional_locations) == 0

    def test_empty_excerpt_not_grouped(self):
        """Results with empty string excerpt should not be grouped."""
        results = [
            SearchResult(path="a.py", score=0.5, excerpt=""),
            SearchResult(path="b.py", score=0.5, excerpt=""),
            SearchResult(path="c.py", score=0.5, excerpt="   "),  # Whitespace only
        ]

        grouped = group_similar_results(results)

        # Empty/whitespace excerpts can't be grouped
        assert len(grouped) == 3

    def test_different_excerpts_not_grouped(self, results_with_different_excerpts):
        """Results with different excerpts should not be grouped even with same score."""
        grouped = group_similar_results(results_with_different_excerpts, score_threshold_abs=1.0)

        # Different content = no grouping
        assert len(grouped) == 3
        for r in grouped:
            assert len(r.additional_locations) == 0

    def test_same_excerpt_different_scores_creates_subgroups(self, results_with_same_excerpt_different_scores):
        """Same content but very different scores should create separate subgroups."""
        grouped = group_similar_results(
            results_with_same_excerpt_different_scores,
            score_threshold_abs=0.1
        )

        # Scores 0.9, 0.5, 0.1 with threshold 0.1
        # 0.9 and 0.5 differ by 0.4 > 0.1, so separate
        # 0.5 and 0.1 differ by 0.4 > 0.1, so separate
        assert len(grouped) == 3


# =============================================================================
# Level 3: Content Field Tests
# =============================================================================

class TestGroupSimilarResultsContentField:
    """Tests for different content_field options."""

    def test_group_by_content_field(self):
        """Should be able to group by 'content' field instead of 'excerpt'."""
        results = [
            SearchResult(path="a.py", score=0.5, excerpt="short", content="full content here"),
            SearchResult(path="b.py", score=0.5, excerpt="different", content="full content here"),
        ]

        # Group by excerpt - different excerpts, no grouping
        grouped_by_excerpt = group_similar_results(results, content_field="excerpt")
        assert len(grouped_by_excerpt) == 2

        # Group by content - same content, should group
        grouped_by_content = group_similar_results(results, content_field="content")
        assert len(grouped_by_content) == 1
        assert len(grouped_by_content[0].additional_locations) == 1

    def test_fallback_when_content_field_missing(self):
        """Results without the specified content field should not be grouped."""
        results = [
            SearchResult(path="a.py", score=0.5, content=None),
            SearchResult(path="b.py", score=0.5, content=None),
        ]

        grouped = group_similar_results(results, content_field="content")

        # None content = ungroupable
        assert len(grouped) == 2


# =============================================================================
# Level 4: Metadata and Ordering Tests
# =============================================================================

class TestGroupSimilarResultsMetadata:
    """Tests for metadata handling and result ordering."""

    def test_grouped_count_in_metadata(self, sample_results):
        """Grouped results should have grouped_count in metadata."""
        grouped = group_similar_results(sample_results, score_threshold_abs=0.02)

        foo_group = next(r for r in grouped if r.excerpt == "def foo(): pass")

        assert "grouped_count" in foo_group.metadata
        assert foo_group.metadata["grouped_count"] == 3  # a, b, c

    def test_preserves_original_metadata(self):
        """Original metadata should be preserved in grouped result."""
        results = [
            SearchResult(
                path="a.py",
                score=0.5,
                excerpt="def foo()",
                metadata={"original_key": "original_value", "fusion_score": 0.5}
            ),
            SearchResult(path="b.py", score=0.5, excerpt="def foo()"),
        ]

        grouped = group_similar_results(results, score_threshold_abs=0.1)

        assert grouped[0].metadata["original_key"] == "original_value"
        assert grouped[0].metadata["fusion_score"] == 0.5

    def test_results_sorted_by_score_descending(self):
        """Final results should be sorted by score descending."""
        results = [
            SearchResult(path="low.py", score=0.1, excerpt="low"),
            SearchResult(path="high.py", score=0.9, excerpt="high"),
            SearchResult(path="mid.py", score=0.5, excerpt="mid"),
        ]

        grouped = group_similar_results(results)

        scores = [r.score for r in grouped]
        assert scores == sorted(scores, reverse=True)
        assert scores == [0.9, 0.5, 0.1]


# =============================================================================
# Level 5: Integration Tests with SearchOptions
# =============================================================================

class TestSearchOptionsGrouping:
    """Integration tests for SearchOptions grouping configuration."""

    def test_search_options_default_grouping_disabled(self):
        """Default SearchOptions should have grouping disabled."""
        options = SearchOptions()

        assert options.group_results is False
        assert options.grouping_threshold == 0.01

    def test_search_options_enable_grouping(self):
        """SearchOptions should allow enabling grouping."""
        options = SearchOptions(group_results=True)

        assert options.group_results is True

    def test_search_options_custom_threshold(self):
        """SearchOptions should allow custom grouping threshold."""
        options = SearchOptions(group_results=True, grouping_threshold=0.05)

        assert options.grouping_threshold == 0.05

    def test_search_options_all_parameters(self):
        """SearchOptions should work with all parameters combined."""
        options = SearchOptions(
            depth=3,
            max_workers=4,
            limit_per_dir=20,
            total_limit=200,
            include_symbols=True,
            hybrid_mode=True,
            group_results=True,
            grouping_threshold=0.02,
        )

        assert options.depth == 3
        assert options.group_results is True
        assert options.grouping_threshold == 0.02


# =============================================================================
# Level 6: AdditionalLocation Entity Tests
# =============================================================================

class TestAdditionalLocationEntity:
    """Tests for AdditionalLocation entity model."""

    def test_create_minimal_additional_location(self):
        """Create AdditionalLocation with minimal required fields."""
        loc = AdditionalLocation(path="test.py", score=0.5)

        assert loc.path == "test.py"
        assert loc.score == 0.5
        assert loc.start_line is None
        assert loc.end_line is None
        assert loc.symbol_name is None

    def test_create_full_additional_location(self):
        """Create AdditionalLocation with all fields."""
        loc = AdditionalLocation(
            path="test.py",
            score=0.75,
            start_line=10,
            end_line=20,
            symbol_name="my_function"
        )

        assert loc.path == "test.py"
        assert loc.score == 0.75
        assert loc.start_line == 10
        assert loc.end_line == 20
        assert loc.symbol_name == "my_function"

    def test_additional_location_path_required(self):
        """Path should be required for AdditionalLocation."""
        with pytest.raises(Exception):  # ValidationError
            AdditionalLocation(score=0.5)

    def test_additional_location_score_required(self):
        """Score should be required for AdditionalLocation."""
        with pytest.raises(Exception):  # ValidationError
            AdditionalLocation(path="test.py")

    def test_additional_location_score_non_negative(self):
        """Score should be non-negative."""
        with pytest.raises(Exception):  # ValidationError
            AdditionalLocation(path="test.py", score=-0.1)

    def test_additional_location_serialization(self):
        """AdditionalLocation should serialize correctly."""
        loc = AdditionalLocation(
            path="test.py",
            score=0.5,
            start_line=10,
            symbol_name="func"
        )

        data = loc.model_dump()

        assert data["path"] == "test.py"
        assert data["score"] == 0.5
        assert data["start_line"] == 10
        assert data["symbol_name"] == "func"


# =============================================================================
# Level 7: SearchResult with AdditionalLocations Tests
# =============================================================================

class TestSearchResultWithAdditionalLocations:
    """Tests for SearchResult entity with additional_locations field."""

    def test_search_result_default_empty_locations(self):
        """SearchResult should have empty additional_locations by default."""
        result = SearchResult(path="test.py", score=0.5)

        assert result.additional_locations == []

    def test_search_result_with_additional_locations(self):
        """SearchResult should accept additional_locations."""
        locations = [
            AdditionalLocation(path="other.py", score=0.4, start_line=5),
        ]

        result = SearchResult(
            path="main.py",
            score=0.5,
            additional_locations=locations
        )

        assert len(result.additional_locations) == 1
        assert result.additional_locations[0].path == "other.py"

    def test_search_result_serialization_with_locations(self):
        """SearchResult with additional_locations should serialize correctly."""
        locations = [
            AdditionalLocation(path="loc1.py", score=0.4),
            AdditionalLocation(path="loc2.py", score=0.3),
        ]

        result = SearchResult(
            path="main.py",
            score=0.5,
            excerpt="code",
            additional_locations=locations
        )

        data = result.model_dump()

        assert len(data["additional_locations"]) == 2
        assert data["additional_locations"][0]["path"] == "loc1.py"
        assert data["additional_locations"][1]["path"] == "loc2.py"


# =============================================================================
# Level 8: Stress/Performance Tests
# =============================================================================

class TestGroupSimilarResultsPerformance:
    """Performance and stress tests."""

    def test_handles_large_result_set(self):
        """Should handle large number of results efficiently."""
        # Create 1000 results with 100 different excerpts
        results = []
        for i in range(1000):
            excerpt_id = i % 100
            results.append(SearchResult(
                path=f"file_{i}.py",
                score=0.5 + (i % 10) * 0.01,  # Scores vary slightly
                excerpt=f"def func_{excerpt_id}(): pass",
                start_line=i,
            ))

        grouped = group_similar_results(results, score_threshold_abs=0.05)

        # Should reduce to approximately 100 groups (one per excerpt)
        # with some variation due to score subgrouping
        assert len(grouped) <= 200
        assert len(grouped) >= 50  # At least some grouping happened

    def test_handles_all_identical_results(self):
        """Should handle case where all results are identical."""
        results = [
            SearchResult(path=f"file_{i}.py", score=0.5, excerpt="same code")
            for i in range(100)
        ]

        grouped = group_similar_results(results, score_threshold_abs=0.01)

        # All should be grouped into one
        assert len(grouped) == 1
        assert len(grouped[0].additional_locations) == 99

    def test_handles_all_unique_results(self):
        """Should handle case where all results are unique."""
        results = [
            SearchResult(path=f"file_{i}.py", score=0.5, excerpt=f"unique_{i}")
            for i in range(100)
        ]

        grouped = group_similar_results(results, score_threshold_abs=0.01)

        # None should be grouped
        assert len(grouped) == 100
        for r in grouped:
            assert len(r.additional_locations) == 0


# =============================================================================
# Level 9: Real-world Scenario Tests
# =============================================================================

class TestGroupSimilarResultsRealWorld:
    """Tests simulating real-world usage scenarios."""

    def test_rrf_fusion_scores_grouping(self):
        """Test with typical RRF fusion score ranges (0.001 - 0.02)."""
        results = [
            SearchResult(path="auth/login.py", score=0.0164, excerpt="def authenticate():"),
            SearchResult(path="auth/oauth.py", score=0.0163, excerpt="def authenticate():"),
            SearchResult(path="auth/basic.py", score=0.0162, excerpt="def authenticate():"),
            SearchResult(path="utils/helper.py", score=0.0082, excerpt="def helper():"),
        ]

        # RRF scores are typically very small, use appropriate threshold
        grouped = group_similar_results(results, score_threshold_abs=0.001)

        assert len(grouped) == 2

        auth_group = next(r for r in grouped if "auth" in r.path)
        assert len(auth_group.additional_locations) == 2

    def test_duplicate_code_detection(self):
        """Simulate detecting duplicate code across files."""
        duplicate_code = """
def calculate_total(items):
    return sum(item.price for item in items)
"""
        results = [
            SearchResult(path="orders/service.py", score=0.5, excerpt=duplicate_code, start_line=45),
            SearchResult(path="cart/calculator.py", score=0.5, excerpt=duplicate_code, start_line=12),
            SearchResult(path="invoices/generator.py", score=0.5, excerpt=duplicate_code, start_line=78),
        ]

        grouped = group_similar_results(results, score_threshold_abs=0.01)

        # All duplicates should be grouped
        assert len(grouped) == 1
        assert len(grouped[0].additional_locations) == 2

        # Can identify all locations
        all_paths = {grouped[0].path} | {loc.path for loc in grouped[0].additional_locations}
        assert all_paths == {"orders/service.py", "cart/calculator.py", "invoices/generator.py"}

    def test_mixed_relevance_results(self):
        """Test with mixed relevance results typical of code search."""
        results = [
            # High relevance group - exact match
            SearchResult(path="core.py", score=0.9, excerpt="def process():"),
            SearchResult(path="core_v2.py", score=0.89, excerpt="def process():"),
            # Medium relevance - partial match
            SearchResult(path="utils.py", score=0.5, excerpt="def process_data():"),
            # Low relevance - tangential
            SearchResult(path="test.py", score=0.2, excerpt="def test_process():"),
        ]

        grouped = group_similar_results(results, score_threshold_abs=0.02)

        # core.py and core_v2.py should group (same excerpt, similar score)
        # Others should remain separate (different excerpts)
        assert len(grouped) == 3

        high_rel = next(r for r in grouped if r.score >= 0.89)
        assert len(high_rel.additional_locations) == 1