From 7adde91e9f4f9e2044a686155d36bd8b66ca9344 Mon Sep 17 00:00:00 2001 From: catlog22 Date: Fri, 19 Dec 2025 16:33:44 +0800 Subject: [PATCH] feat: Add search result grouping by similarity score MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add functionality to group search results with similar content and scores into a single representative result with additional locations. Changes: - Add AdditionalLocation entity model for storing grouped result locations - Add additional_locations field to SearchResult for backward compatibility - Implement group_similar_results() function in ranking.py with: - Content-based grouping (by excerpt or content field) - Score-based sub-grouping with configurable threshold - Metadata preservation with grouped_count tracking - Add group_results and grouping_threshold options to SearchOptions - Integrate grouping into ChainSearchEngine.search() after RRF fusion Test coverage: - 36 multi-level tests covering unit, boundary, integration, and performance - Real-world scenario tests for RRF scores and duplicate code detection 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- codex-lens/src/codexlens/entities.py | 23 +- .../src/codexlens/search/chain_search.py | 12 + codex-lens/src/codexlens/search/ranking.py | 116 +++- codex-lens/tests/test_result_grouping.py | 589 ++++++++++++++++++ 4 files changed, 738 insertions(+), 2 deletions(-) create mode 100644 codex-lens/tests/test_result_grouping.py diff --git a/codex-lens/src/codexlens/entities.py b/codex-lens/src/codexlens/entities.py index df5759b4..55eb4fae 100644 --- a/codex-lens/src/codexlens/entities.py +++ b/codex-lens/src/codexlens/entities.py @@ -90,6 +90,21 @@ class CodeRelationship(BaseModel): return value +class AdditionalLocation(BaseModel): + """A pointer to another location where a similar result was found. + + Used for grouping search results with similar scores and content, + where the primary result is stored in SearchResult and secondary + locations are stored in this model. + """ + + path: str = Field(..., min_length=1) + score: float = Field(..., ge=0.0) + start_line: Optional[int] = Field(default=None, description="Start line of the result (1-based)") + end_line: Optional[int] = Field(default=None, description="End line of the result (1-based)") + symbol_name: Optional[str] = Field(default=None, description="Name of matched symbol") + + class SearchResult(BaseModel): """A unified search result for lexical or semantic search.""" @@ -100,10 +115,16 @@ class SearchResult(BaseModel): symbol: Optional[Symbol] = None chunk: Optional[SemanticChunk] = None metadata: Dict[str, Any] = Field(default_factory=dict) - + # Additional context for complete code blocks start_line: Optional[int] = Field(default=None, description="Start line of code block (1-based)") end_line: Optional[int] = Field(default=None, description="End line of code block (1-based)") symbol_name: Optional[str] = Field(default=None, description="Name of matched symbol/function/class") symbol_kind: Optional[str] = Field(default=None, description="Kind of symbol (function/class/method)") + # Field for grouping similar results + additional_locations: List["AdditionalLocation"] = Field( + default_factory=list, + description="Other locations for grouped results with similar scores and content." + ) + diff --git a/codex-lens/src/codexlens/search/chain_search.py b/codex-lens/src/codexlens/search/chain_search.py index 6d31378b..5ffb1bdc 100644 --- a/codex-lens/src/codexlens/search/chain_search.py +++ b/codex-lens/src/codexlens/search/chain_search.py @@ -38,6 +38,8 @@ class SearchOptions: enable_vector: Enable vector semantic search (default False) pure_vector: If True, only use vector search without FTS fallback (default False) hybrid_weights: Custom RRF weights for hybrid search (optional) + group_results: Enable grouping of similar results (default False) + grouping_threshold: Score threshold for grouping similar results (default 0.01) """ depth: int = -1 max_workers: int = 8 @@ -51,6 +53,8 @@ class SearchOptions: enable_vector: bool = False pure_vector: bool = False hybrid_weights: Optional[Dict[str, float]] = None + group_results: bool = False + grouping_threshold: float = 0.01 @dataclass @@ -210,6 +214,14 @@ class ChainSearchEngine: # Step 4: Merge and rank final_results = self._merge_and_rank(results, options.total_limit) + + # Step 5: Optional grouping of similar results + if options.group_results: + from codexlens.search.ranking import group_similar_results + final_results = group_similar_results( + final_results, score_threshold_abs=options.grouping_threshold + ) + stats.files_matched = len(final_results) # Optional: Symbol search diff --git a/codex-lens/src/codexlens/search/ranking.py b/codex-lens/src/codexlens/search/ranking.py index d78e7859..d5ec87e4 100644 --- a/codex-lens/src/codexlens/search/ranking.py +++ b/codex-lens/src/codexlens/search/ranking.py @@ -9,7 +9,7 @@ from __future__ import annotations import math from typing import Dict, List -from codexlens.entities import SearchResult +from codexlens.entities import SearchResult, AdditionalLocation def reciprocal_rank_fusion( @@ -158,3 +158,117 @@ def tag_search_source(results: List[SearchResult], source: str) -> List[SearchRe tagged_results.append(tagged_result) return tagged_results + + +def group_similar_results( + results: List[SearchResult], + score_threshold_abs: float = 0.01, + content_field: str = "excerpt" +) -> List[SearchResult]: + """Group search results by content and score similarity. + + Groups results that have similar content and similar scores into a single + representative result, with other locations stored in additional_locations. + + Algorithm: + 1. Group results by content (using excerpt or content field) + 2. Within each content group, create subgroups based on score similarity + 3. Select highest-scoring result as representative for each subgroup + 4. Store other results in subgroup as additional_locations + + Args: + results: A list of SearchResult objects (typically sorted by score) + score_threshold_abs: Absolute score difference to consider results similar. + Results with |score_a - score_b| <= threshold are grouped. + Default 0.01 is suitable for RRF fusion scores. + content_field: The field to use for content grouping ('excerpt' or 'content') + + Returns: + A new list of SearchResult objects where similar items are grouped. + The list is sorted by score descending. + + Examples: + >>> results = [SearchResult(path="a.py", score=0.5, excerpt="def foo()"), + ... SearchResult(path="b.py", score=0.5, excerpt="def foo()")] + >>> grouped = group_similar_results(results) + >>> len(grouped) # Two results merged into one + 1 + >>> len(grouped[0].additional_locations) # One additional location + 1 + """ + if not results: + return [] + + # Group results by content + content_map: Dict[str, List[SearchResult]] = {} + unidentifiable_results: List[SearchResult] = [] + + for r in results: + key = getattr(r, content_field, None) + if key and key.strip(): + content_map.setdefault(key, []).append(r) + else: + # Results without content can't be grouped by content + unidentifiable_results.append(r) + + final_results: List[SearchResult] = [] + + # Process each content group + for content_group in content_map.values(): + # Sort by score descending within group + content_group.sort(key=lambda r: r.score, reverse=True) + + while content_group: + # Take highest scoring as representative + representative = content_group.pop(0) + others_in_group = [] + remaining_for_next_pass = [] + + # Find results with similar scores + for item in content_group: + if abs(representative.score - item.score) <= score_threshold_abs: + others_in_group.append(item) + else: + remaining_for_next_pass.append(item) + + # Create grouped result with additional locations + if others_in_group: + # Build new result with additional_locations populated + grouped_result = SearchResult( + path=representative.path, + score=representative.score, + excerpt=representative.excerpt, + content=representative.content, + symbol=representative.symbol, + chunk=representative.chunk, + metadata={ + **representative.metadata, + "grouped_count": len(others_in_group) + 1, + }, + start_line=representative.start_line, + end_line=representative.end_line, + symbol_name=representative.symbol_name, + symbol_kind=representative.symbol_kind, + additional_locations=[ + AdditionalLocation( + path=other.path, + score=other.score, + start_line=other.start_line, + end_line=other.end_line, + symbol_name=other.symbol_name, + ) for other in others_in_group + ], + ) + final_results.append(grouped_result) + else: + final_results.append(representative) + + content_group = remaining_for_next_pass + + # Add ungroupable results + final_results.extend(unidentifiable_results) + + # Sort final results by score descending + final_results.sort(key=lambda r: r.score, reverse=True) + + return final_results diff --git a/codex-lens/tests/test_result_grouping.py b/codex-lens/tests/test_result_grouping.py new file mode 100644 index 00000000..ee2720d2 --- /dev/null +++ b/codex-lens/tests/test_result_grouping.py @@ -0,0 +1,589 @@ +"""Multi-level tests for search result grouping functionality. + +Tests cover: +1. Unit tests for group_similar_results function +2. Boundary condition tests +3. Integration tests with SearchOptions +4. Performance/stress tests +""" + +import pytest +from typing import List + +from codexlens.entities import SearchResult, AdditionalLocation +from codexlens.search.ranking import group_similar_results +from codexlens.search.chain_search import SearchOptions + + +# ============================================================================= +# Test Fixtures +# ============================================================================= + +@pytest.fixture +def sample_results() -> List[SearchResult]: + """Create sample search results for testing.""" + return [ + SearchResult(path="a.py", score=0.5, excerpt="def foo(): pass", start_line=10, symbol_name="foo"), + SearchResult(path="b.py", score=0.5, excerpt="def foo(): pass", start_line=20, symbol_name="foo"), + SearchResult(path="c.py", score=0.49, excerpt="def foo(): pass", start_line=30, symbol_name="foo"), + SearchResult(path="d.py", score=0.3, excerpt="def bar(): pass", start_line=40, symbol_name="bar"), + ] + + +@pytest.fixture +def results_with_different_excerpts() -> List[SearchResult]: + """Results with same scores but different content.""" + return [ + SearchResult(path="a.py", score=0.5, excerpt="def foo(): pass"), + SearchResult(path="b.py", score=0.5, excerpt="def bar(): pass"), + SearchResult(path="c.py", score=0.5, excerpt="def baz(): pass"), + ] + + +@pytest.fixture +def results_with_same_excerpt_different_scores() -> List[SearchResult]: + """Results with same content but very different scores.""" + return [ + SearchResult(path="a.py", score=0.9, excerpt="def foo(): pass"), + SearchResult(path="b.py", score=0.5, excerpt="def foo(): pass"), + SearchResult(path="c.py", score=0.1, excerpt="def foo(): pass"), + ] + + +# ============================================================================= +# Level 1: Unit Tests - Basic Functionality +# ============================================================================= + +class TestGroupSimilarResultsBasic: + """Basic unit tests for group_similar_results function.""" + + def test_empty_results_returns_empty(self): + """Empty input should return empty output.""" + result = group_similar_results([]) + assert result == [] + + def test_single_result_returns_unchanged(self): + """Single result should be returned as-is.""" + single = SearchResult(path="test.py", score=0.5, excerpt="code") + result = group_similar_results([single]) + + assert len(result) == 1 + assert result[0].path == "test.py" + assert result[0].additional_locations == [] + + def test_groups_identical_excerpt_similar_score(self, sample_results): + """Results with same excerpt and similar scores should be grouped.""" + grouped = group_similar_results(sample_results, score_threshold_abs=0.02) + + # Should have 2 groups: foo group (a, b, c) and bar (d) + assert len(grouped) == 2 + + # First group should have additional locations + foo_group = next(r for r in grouped if r.excerpt == "def foo(): pass") + assert len(foo_group.additional_locations) == 2 + + # Second group (bar) should have no additional locations + bar_group = next(r for r in grouped if r.excerpt == "def bar(): pass") + assert len(bar_group.additional_locations) == 0 + + def test_preserves_highest_score_as_representative(self, sample_results): + """Representative result should have the highest score in group.""" + grouped = group_similar_results(sample_results, score_threshold_abs=0.02) + + foo_group = next(r for r in grouped if r.excerpt == "def foo(): pass") + # a.py has score 0.5, which is highest + assert foo_group.path == "a.py" + assert foo_group.score == 0.5 + + def test_additional_locations_contain_correct_info(self, sample_results): + """Additional locations should contain correct path, score, line info.""" + grouped = group_similar_results(sample_results, score_threshold_abs=0.02) + + foo_group = next(r for r in grouped if r.excerpt == "def foo(): pass") + locations = foo_group.additional_locations + + paths = {loc.path for loc in locations} + assert "b.py" in paths + assert "c.py" in paths + + # Check that start_line is preserved + for loc in locations: + if loc.path == "b.py": + assert loc.start_line == 20 + elif loc.path == "c.py": + assert loc.start_line == 30 + + +# ============================================================================= +# Level 2: Boundary Condition Tests +# ============================================================================= + +class TestGroupSimilarResultsBoundary: + """Boundary condition tests for edge cases.""" + + def test_threshold_zero_no_grouping(self): + """With threshold=0, only exactly equal scores should group.""" + results = [ + SearchResult(path="a.py", score=0.5, excerpt="def foo()"), + SearchResult(path="b.py", score=0.5, excerpt="def foo()"), + SearchResult(path="c.py", score=0.50001, excerpt="def foo()"), # Slightly different + ] + + grouped = group_similar_results(results, score_threshold_abs=0.0) + + # a and b should group (exact same score), c should be separate + assert len(grouped) == 2 + + main_group = next(r for r in grouped if len(r.additional_locations) > 0) + assert len(main_group.additional_locations) == 1 + + def test_threshold_exact_boundary(self): + """Test behavior at exact threshold boundary. + + Note: Due to floating-point precision, 0.5 - 0.49 = 0.010000000000000009 + which is slightly > 0.01, so they won't group with threshold=0.01. + Use a slightly larger threshold to account for floating-point precision. + """ + results = [ + SearchResult(path="a.py", score=0.5, excerpt="def foo()"), + SearchResult(path="b.py", score=0.49, excerpt="def foo()"), # 0.01 diff (floating-point) + SearchResult(path="c.py", score=0.48, excerpt="def foo()"), # 0.02 diff from a + ] + + # With threshold 0.011 (slightly above floating-point 0.01), a and b should group + grouped = group_similar_results(results, score_threshold_abs=0.011) + + # a groups with b, c is separate (0.02 from a, 0.01 from b) + # After a+b group, c is compared with remaining and forms its own group + assert len(grouped) == 2 + + # Verify a is representative (highest score) + main_group = next(r for r in grouped if r.score == 0.5) + assert main_group.path == "a.py" + assert len(main_group.additional_locations) == 1 + assert main_group.additional_locations[0].path == "b.py" + + def test_large_threshold_groups_all(self): + """Very large threshold should group all same-content results.""" + results = [ + SearchResult(path="a.py", score=0.9, excerpt="def foo()"), + SearchResult(path="b.py", score=0.1, excerpt="def foo()"), + ] + + grouped = group_similar_results(results, score_threshold_abs=1.0) + + assert len(grouped) == 1 + assert len(grouped[0].additional_locations) == 1 + + def test_none_excerpt_not_grouped(self): + """Results with None excerpt should not be grouped.""" + results = [ + SearchResult(path="a.py", score=0.5, excerpt=None), + SearchResult(path="b.py", score=0.5, excerpt=None), + ] + + grouped = group_similar_results(results) + + # None excerpts can't be grouped by content + assert len(grouped) == 2 + for r in grouped: + assert len(r.additional_locations) == 0 + + def test_empty_excerpt_not_grouped(self): + """Results with empty string excerpt should not be grouped.""" + results = [ + SearchResult(path="a.py", score=0.5, excerpt=""), + SearchResult(path="b.py", score=0.5, excerpt=""), + SearchResult(path="c.py", score=0.5, excerpt=" "), # Whitespace only + ] + + grouped = group_similar_results(results) + + # Empty/whitespace excerpts can't be grouped + assert len(grouped) == 3 + + def test_different_excerpts_not_grouped(self, results_with_different_excerpts): + """Results with different excerpts should not be grouped even with same score.""" + grouped = group_similar_results(results_with_different_excerpts, score_threshold_abs=1.0) + + # Different content = no grouping + assert len(grouped) == 3 + for r in grouped: + assert len(r.additional_locations) == 0 + + def test_same_excerpt_different_scores_creates_subgroups(self, results_with_same_excerpt_different_scores): + """Same content but very different scores should create separate subgroups.""" + grouped = group_similar_results( + results_with_same_excerpt_different_scores, + score_threshold_abs=0.1 + ) + + # Scores 0.9, 0.5, 0.1 with threshold 0.1 + # 0.9 and 0.5 differ by 0.4 > 0.1, so separate + # 0.5 and 0.1 differ by 0.4 > 0.1, so separate + assert len(grouped) == 3 + + +# ============================================================================= +# Level 3: Content Field Tests +# ============================================================================= + +class TestGroupSimilarResultsContentField: + """Tests for different content_field options.""" + + def test_group_by_content_field(self): + """Should be able to group by 'content' field instead of 'excerpt'.""" + results = [ + SearchResult(path="a.py", score=0.5, excerpt="short", content="full content here"), + SearchResult(path="b.py", score=0.5, excerpt="different", content="full content here"), + ] + + # Group by excerpt - different excerpts, no grouping + grouped_by_excerpt = group_similar_results(results, content_field="excerpt") + assert len(grouped_by_excerpt) == 2 + + # Group by content - same content, should group + grouped_by_content = group_similar_results(results, content_field="content") + assert len(grouped_by_content) == 1 + assert len(grouped_by_content[0].additional_locations) == 1 + + def test_fallback_when_content_field_missing(self): + """Results without the specified content field should not be grouped.""" + results = [ + SearchResult(path="a.py", score=0.5, content=None), + SearchResult(path="b.py", score=0.5, content=None), + ] + + grouped = group_similar_results(results, content_field="content") + + # None content = ungroupable + assert len(grouped) == 2 + + +# ============================================================================= +# Level 4: Metadata and Ordering Tests +# ============================================================================= + +class TestGroupSimilarResultsMetadata: + """Tests for metadata handling and result ordering.""" + + def test_grouped_count_in_metadata(self, sample_results): + """Grouped results should have grouped_count in metadata.""" + grouped = group_similar_results(sample_results, score_threshold_abs=0.02) + + foo_group = next(r for r in grouped if r.excerpt == "def foo(): pass") + + assert "grouped_count" in foo_group.metadata + assert foo_group.metadata["grouped_count"] == 3 # a, b, c + + def test_preserves_original_metadata(self): + """Original metadata should be preserved in grouped result.""" + results = [ + SearchResult( + path="a.py", + score=0.5, + excerpt="def foo()", + metadata={"original_key": "original_value", "fusion_score": 0.5} + ), + SearchResult(path="b.py", score=0.5, excerpt="def foo()"), + ] + + grouped = group_similar_results(results, score_threshold_abs=0.1) + + assert grouped[0].metadata["original_key"] == "original_value" + assert grouped[0].metadata["fusion_score"] == 0.5 + + def test_results_sorted_by_score_descending(self): + """Final results should be sorted by score descending.""" + results = [ + SearchResult(path="low.py", score=0.1, excerpt="low"), + SearchResult(path="high.py", score=0.9, excerpt="high"), + SearchResult(path="mid.py", score=0.5, excerpt="mid"), + ] + + grouped = group_similar_results(results) + + scores = [r.score for r in grouped] + assert scores == sorted(scores, reverse=True) + assert scores == [0.9, 0.5, 0.1] + + +# ============================================================================= +# Level 5: Integration Tests with SearchOptions +# ============================================================================= + +class TestSearchOptionsGrouping: + """Integration tests for SearchOptions grouping configuration.""" + + def test_search_options_default_grouping_disabled(self): + """Default SearchOptions should have grouping disabled.""" + options = SearchOptions() + + assert options.group_results is False + assert options.grouping_threshold == 0.01 + + def test_search_options_enable_grouping(self): + """SearchOptions should allow enabling grouping.""" + options = SearchOptions(group_results=True) + + assert options.group_results is True + + def test_search_options_custom_threshold(self): + """SearchOptions should allow custom grouping threshold.""" + options = SearchOptions(group_results=True, grouping_threshold=0.05) + + assert options.grouping_threshold == 0.05 + + def test_search_options_all_parameters(self): + """SearchOptions should work with all parameters combined.""" + options = SearchOptions( + depth=3, + max_workers=4, + limit_per_dir=20, + total_limit=200, + include_symbols=True, + hybrid_mode=True, + group_results=True, + grouping_threshold=0.02, + ) + + assert options.depth == 3 + assert options.group_results is True + assert options.grouping_threshold == 0.02 + + +# ============================================================================= +# Level 6: AdditionalLocation Entity Tests +# ============================================================================= + +class TestAdditionalLocationEntity: + """Tests for AdditionalLocation entity model.""" + + def test_create_minimal_additional_location(self): + """Create AdditionalLocation with minimal required fields.""" + loc = AdditionalLocation(path="test.py", score=0.5) + + assert loc.path == "test.py" + assert loc.score == 0.5 + assert loc.start_line is None + assert loc.end_line is None + assert loc.symbol_name is None + + def test_create_full_additional_location(self): + """Create AdditionalLocation with all fields.""" + loc = AdditionalLocation( + path="test.py", + score=0.75, + start_line=10, + end_line=20, + symbol_name="my_function" + ) + + assert loc.path == "test.py" + assert loc.score == 0.75 + assert loc.start_line == 10 + assert loc.end_line == 20 + assert loc.symbol_name == "my_function" + + def test_additional_location_path_required(self): + """Path should be required for AdditionalLocation.""" + with pytest.raises(Exception): # ValidationError + AdditionalLocation(score=0.5) + + def test_additional_location_score_required(self): + """Score should be required for AdditionalLocation.""" + with pytest.raises(Exception): # ValidationError + AdditionalLocation(path="test.py") + + def test_additional_location_score_non_negative(self): + """Score should be non-negative.""" + with pytest.raises(Exception): # ValidationError + AdditionalLocation(path="test.py", score=-0.1) + + def test_additional_location_serialization(self): + """AdditionalLocation should serialize correctly.""" + loc = AdditionalLocation( + path="test.py", + score=0.5, + start_line=10, + symbol_name="func" + ) + + data = loc.model_dump() + + assert data["path"] == "test.py" + assert data["score"] == 0.5 + assert data["start_line"] == 10 + assert data["symbol_name"] == "func" + + +# ============================================================================= +# Level 7: SearchResult with AdditionalLocations Tests +# ============================================================================= + +class TestSearchResultWithAdditionalLocations: + """Tests for SearchResult entity with additional_locations field.""" + + def test_search_result_default_empty_locations(self): + """SearchResult should have empty additional_locations by default.""" + result = SearchResult(path="test.py", score=0.5) + + assert result.additional_locations == [] + + def test_search_result_with_additional_locations(self): + """SearchResult should accept additional_locations.""" + locations = [ + AdditionalLocation(path="other.py", score=0.4, start_line=5), + ] + + result = SearchResult( + path="main.py", + score=0.5, + additional_locations=locations + ) + + assert len(result.additional_locations) == 1 + assert result.additional_locations[0].path == "other.py" + + def test_search_result_serialization_with_locations(self): + """SearchResult with additional_locations should serialize correctly.""" + locations = [ + AdditionalLocation(path="loc1.py", score=0.4), + AdditionalLocation(path="loc2.py", score=0.3), + ] + + result = SearchResult( + path="main.py", + score=0.5, + excerpt="code", + additional_locations=locations + ) + + data = result.model_dump() + + assert len(data["additional_locations"]) == 2 + assert data["additional_locations"][0]["path"] == "loc1.py" + assert data["additional_locations"][1]["path"] == "loc2.py" + + +# ============================================================================= +# Level 8: Stress/Performance Tests +# ============================================================================= + +class TestGroupSimilarResultsPerformance: + """Performance and stress tests.""" + + def test_handles_large_result_set(self): + """Should handle large number of results efficiently.""" + # Create 1000 results with 100 different excerpts + results = [] + for i in range(1000): + excerpt_id = i % 100 + results.append(SearchResult( + path=f"file_{i}.py", + score=0.5 + (i % 10) * 0.01, # Scores vary slightly + excerpt=f"def func_{excerpt_id}(): pass", + start_line=i, + )) + + grouped = group_similar_results(results, score_threshold_abs=0.05) + + # Should reduce to approximately 100 groups (one per excerpt) + # with some variation due to score subgrouping + assert len(grouped) <= 200 + assert len(grouped) >= 50 # At least some grouping happened + + def test_handles_all_identical_results(self): + """Should handle case where all results are identical.""" + results = [ + SearchResult(path=f"file_{i}.py", score=0.5, excerpt="same code") + for i in range(100) + ] + + grouped = group_similar_results(results, score_threshold_abs=0.01) + + # All should be grouped into one + assert len(grouped) == 1 + assert len(grouped[0].additional_locations) == 99 + + def test_handles_all_unique_results(self): + """Should handle case where all results are unique.""" + results = [ + SearchResult(path=f"file_{i}.py", score=0.5, excerpt=f"unique_{i}") + for i in range(100) + ] + + grouped = group_similar_results(results, score_threshold_abs=0.01) + + # None should be grouped + assert len(grouped) == 100 + for r in grouped: + assert len(r.additional_locations) == 0 + + +# ============================================================================= +# Level 9: Real-world Scenario Tests +# ============================================================================= + +class TestGroupSimilarResultsRealWorld: + """Tests simulating real-world usage scenarios.""" + + def test_rrf_fusion_scores_grouping(self): + """Test with typical RRF fusion score ranges (0.001 - 0.02).""" + results = [ + SearchResult(path="auth/login.py", score=0.0164, excerpt="def authenticate():"), + SearchResult(path="auth/oauth.py", score=0.0163, excerpt="def authenticate():"), + SearchResult(path="auth/basic.py", score=0.0162, excerpt="def authenticate():"), + SearchResult(path="utils/helper.py", score=0.0082, excerpt="def helper():"), + ] + + # RRF scores are typically very small, use appropriate threshold + grouped = group_similar_results(results, score_threshold_abs=0.001) + + assert len(grouped) == 2 + + auth_group = next(r for r in grouped if "auth" in r.path) + assert len(auth_group.additional_locations) == 2 + + def test_duplicate_code_detection(self): + """Simulate detecting duplicate code across files.""" + duplicate_code = """ +def calculate_total(items): + return sum(item.price for item in items) +""" + results = [ + SearchResult(path="orders/service.py", score=0.5, excerpt=duplicate_code, start_line=45), + SearchResult(path="cart/calculator.py", score=0.5, excerpt=duplicate_code, start_line=12), + SearchResult(path="invoices/generator.py", score=0.5, excerpt=duplicate_code, start_line=78), + ] + + grouped = group_similar_results(results, score_threshold_abs=0.01) + + # All duplicates should be grouped + assert len(grouped) == 1 + assert len(grouped[0].additional_locations) == 2 + + # Can identify all locations + all_paths = {grouped[0].path} | {loc.path for loc in grouped[0].additional_locations} + assert all_paths == {"orders/service.py", "cart/calculator.py", "invoices/generator.py"} + + def test_mixed_relevance_results(self): + """Test with mixed relevance results typical of code search.""" + results = [ + # High relevance group - exact match + SearchResult(path="core.py", score=0.9, excerpt="def process():"), + SearchResult(path="core_v2.py", score=0.89, excerpt="def process():"), + # Medium relevance - partial match + SearchResult(path="utils.py", score=0.5, excerpt="def process_data():"), + # Low relevance - tangential + SearchResult(path="test.py", score=0.2, excerpt="def test_process():"), + ] + + grouped = group_similar_results(results, score_threshold_abs=0.02) + + # core.py and core_v2.py should group (same excerpt, similar score) + # Others should remain separate (different excerpts) + assert len(grouped) == 3 + + high_rel = next(r for r in grouped if r.score >= 0.89) + assert len(high_rel.additional_locations) == 1