Files
Claude-Code-Workflow/codex-lens/tests/test_result_grouping.py
catlog22 7adde91e9f feat: Add search result grouping by similarity score
Add functionality to group search results with similar content and scores
into a single representative result with additional locations.

Changes:
- Add AdditionalLocation entity model for storing grouped result locations
- Add additional_locations field to SearchResult for backward compatibility
- Implement group_similar_results() function in ranking.py with:
  - Content-based grouping (by excerpt or content field)
  - Score-based sub-grouping with configurable threshold
  - Metadata preservation with grouped_count tracking
- Add group_results and grouping_threshold options to SearchOptions
- Integrate grouping into ChainSearchEngine.search() after RRF fusion

Test coverage:
- 36 multi-level tests covering unit, boundary, integration, and performance
- Real-world scenario tests for RRF scores and duplicate code detection

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-19 16:33:44 +08:00

590 lines
23 KiB
Python

"""Multi-level tests for search result grouping functionality.
Tests cover:
1. Unit tests for group_similar_results function
2. Boundary condition tests
3. Integration tests with SearchOptions
4. Performance/stress tests
"""
import pytest
from typing import List
from codexlens.entities import SearchResult, AdditionalLocation
from codexlens.search.ranking import group_similar_results
from codexlens.search.chain_search import SearchOptions
# =============================================================================
# Test Fixtures
# =============================================================================
@pytest.fixture
def sample_results() -> List[SearchResult]:
"""Create sample search results for testing."""
return [
SearchResult(path="a.py", score=0.5, excerpt="def foo(): pass", start_line=10, symbol_name="foo"),
SearchResult(path="b.py", score=0.5, excerpt="def foo(): pass", start_line=20, symbol_name="foo"),
SearchResult(path="c.py", score=0.49, excerpt="def foo(): pass", start_line=30, symbol_name="foo"),
SearchResult(path="d.py", score=0.3, excerpt="def bar(): pass", start_line=40, symbol_name="bar"),
]
@pytest.fixture
def results_with_different_excerpts() -> List[SearchResult]:
"""Results with same scores but different content."""
return [
SearchResult(path="a.py", score=0.5, excerpt="def foo(): pass"),
SearchResult(path="b.py", score=0.5, excerpt="def bar(): pass"),
SearchResult(path="c.py", score=0.5, excerpt="def baz(): pass"),
]
@pytest.fixture
def results_with_same_excerpt_different_scores() -> List[SearchResult]:
"""Results with same content but very different scores."""
return [
SearchResult(path="a.py", score=0.9, excerpt="def foo(): pass"),
SearchResult(path="b.py", score=0.5, excerpt="def foo(): pass"),
SearchResult(path="c.py", score=0.1, excerpt="def foo(): pass"),
]
# =============================================================================
# Level 1: Unit Tests - Basic Functionality
# =============================================================================
class TestGroupSimilarResultsBasic:
"""Basic unit tests for group_similar_results function."""
def test_empty_results_returns_empty(self):
"""Empty input should return empty output."""
result = group_similar_results([])
assert result == []
def test_single_result_returns_unchanged(self):
"""Single result should be returned as-is."""
single = SearchResult(path="test.py", score=0.5, excerpt="code")
result = group_similar_results([single])
assert len(result) == 1
assert result[0].path == "test.py"
assert result[0].additional_locations == []
def test_groups_identical_excerpt_similar_score(self, sample_results):
"""Results with same excerpt and similar scores should be grouped."""
grouped = group_similar_results(sample_results, score_threshold_abs=0.02)
# Should have 2 groups: foo group (a, b, c) and bar (d)
assert len(grouped) == 2
# First group should have additional locations
foo_group = next(r for r in grouped if r.excerpt == "def foo(): pass")
assert len(foo_group.additional_locations) == 2
# Second group (bar) should have no additional locations
bar_group = next(r for r in grouped if r.excerpt == "def bar(): pass")
assert len(bar_group.additional_locations) == 0
def test_preserves_highest_score_as_representative(self, sample_results):
"""Representative result should have the highest score in group."""
grouped = group_similar_results(sample_results, score_threshold_abs=0.02)
foo_group = next(r for r in grouped if r.excerpt == "def foo(): pass")
# a.py has score 0.5, which is highest
assert foo_group.path == "a.py"
assert foo_group.score == 0.5
def test_additional_locations_contain_correct_info(self, sample_results):
"""Additional locations should contain correct path, score, line info."""
grouped = group_similar_results(sample_results, score_threshold_abs=0.02)
foo_group = next(r for r in grouped if r.excerpt == "def foo(): pass")
locations = foo_group.additional_locations
paths = {loc.path for loc in locations}
assert "b.py" in paths
assert "c.py" in paths
# Check that start_line is preserved
for loc in locations:
if loc.path == "b.py":
assert loc.start_line == 20
elif loc.path == "c.py":
assert loc.start_line == 30
# =============================================================================
# Level 2: Boundary Condition Tests
# =============================================================================
class TestGroupSimilarResultsBoundary:
"""Boundary condition tests for edge cases."""
def test_threshold_zero_no_grouping(self):
"""With threshold=0, only exactly equal scores should group."""
results = [
SearchResult(path="a.py", score=0.5, excerpt="def foo()"),
SearchResult(path="b.py", score=0.5, excerpt="def foo()"),
SearchResult(path="c.py", score=0.50001, excerpt="def foo()"), # Slightly different
]
grouped = group_similar_results(results, score_threshold_abs=0.0)
# a and b should group (exact same score), c should be separate
assert len(grouped) == 2
main_group = next(r for r in grouped if len(r.additional_locations) > 0)
assert len(main_group.additional_locations) == 1
def test_threshold_exact_boundary(self):
"""Test behavior at exact threshold boundary.
Note: Due to floating-point precision, 0.5 - 0.49 = 0.010000000000000009
which is slightly > 0.01, so they won't group with threshold=0.01.
Use a slightly larger threshold to account for floating-point precision.
"""
results = [
SearchResult(path="a.py", score=0.5, excerpt="def foo()"),
SearchResult(path="b.py", score=0.49, excerpt="def foo()"), # 0.01 diff (floating-point)
SearchResult(path="c.py", score=0.48, excerpt="def foo()"), # 0.02 diff from a
]
# With threshold 0.011 (slightly above floating-point 0.01), a and b should group
grouped = group_similar_results(results, score_threshold_abs=0.011)
# a groups with b, c is separate (0.02 from a, 0.01 from b)
# After a+b group, c is compared with remaining and forms its own group
assert len(grouped) == 2
# Verify a is representative (highest score)
main_group = next(r for r in grouped if r.score == 0.5)
assert main_group.path == "a.py"
assert len(main_group.additional_locations) == 1
assert main_group.additional_locations[0].path == "b.py"
def test_large_threshold_groups_all(self):
"""Very large threshold should group all same-content results."""
results = [
SearchResult(path="a.py", score=0.9, excerpt="def foo()"),
SearchResult(path="b.py", score=0.1, excerpt="def foo()"),
]
grouped = group_similar_results(results, score_threshold_abs=1.0)
assert len(grouped) == 1
assert len(grouped[0].additional_locations) == 1
def test_none_excerpt_not_grouped(self):
"""Results with None excerpt should not be grouped."""
results = [
SearchResult(path="a.py", score=0.5, excerpt=None),
SearchResult(path="b.py", score=0.5, excerpt=None),
]
grouped = group_similar_results(results)
# None excerpts can't be grouped by content
assert len(grouped) == 2
for r in grouped:
assert len(r.additional_locations) == 0
def test_empty_excerpt_not_grouped(self):
"""Results with empty string excerpt should not be grouped."""
results = [
SearchResult(path="a.py", score=0.5, excerpt=""),
SearchResult(path="b.py", score=0.5, excerpt=""),
SearchResult(path="c.py", score=0.5, excerpt=" "), # Whitespace only
]
grouped = group_similar_results(results)
# Empty/whitespace excerpts can't be grouped
assert len(grouped) == 3
def test_different_excerpts_not_grouped(self, results_with_different_excerpts):
"""Results with different excerpts should not be grouped even with same score."""
grouped = group_similar_results(results_with_different_excerpts, score_threshold_abs=1.0)
# Different content = no grouping
assert len(grouped) == 3
for r in grouped:
assert len(r.additional_locations) == 0
def test_same_excerpt_different_scores_creates_subgroups(self, results_with_same_excerpt_different_scores):
"""Same content but very different scores should create separate subgroups."""
grouped = group_similar_results(
results_with_same_excerpt_different_scores,
score_threshold_abs=0.1
)
# Scores 0.9, 0.5, 0.1 with threshold 0.1
# 0.9 and 0.5 differ by 0.4 > 0.1, so separate
# 0.5 and 0.1 differ by 0.4 > 0.1, so separate
assert len(grouped) == 3
# =============================================================================
# Level 3: Content Field Tests
# =============================================================================
class TestGroupSimilarResultsContentField:
"""Tests for different content_field options."""
def test_group_by_content_field(self):
"""Should be able to group by 'content' field instead of 'excerpt'."""
results = [
SearchResult(path="a.py", score=0.5, excerpt="short", content="full content here"),
SearchResult(path="b.py", score=0.5, excerpt="different", content="full content here"),
]
# Group by excerpt - different excerpts, no grouping
grouped_by_excerpt = group_similar_results(results, content_field="excerpt")
assert len(grouped_by_excerpt) == 2
# Group by content - same content, should group
grouped_by_content = group_similar_results(results, content_field="content")
assert len(grouped_by_content) == 1
assert len(grouped_by_content[0].additional_locations) == 1
def test_fallback_when_content_field_missing(self):
"""Results without the specified content field should not be grouped."""
results = [
SearchResult(path="a.py", score=0.5, content=None),
SearchResult(path="b.py", score=0.5, content=None),
]
grouped = group_similar_results(results, content_field="content")
# None content = ungroupable
assert len(grouped) == 2
# =============================================================================
# Level 4: Metadata and Ordering Tests
# =============================================================================
class TestGroupSimilarResultsMetadata:
"""Tests for metadata handling and result ordering."""
def test_grouped_count_in_metadata(self, sample_results):
"""Grouped results should have grouped_count in metadata."""
grouped = group_similar_results(sample_results, score_threshold_abs=0.02)
foo_group = next(r for r in grouped if r.excerpt == "def foo(): pass")
assert "grouped_count" in foo_group.metadata
assert foo_group.metadata["grouped_count"] == 3 # a, b, c
def test_preserves_original_metadata(self):
"""Original metadata should be preserved in grouped result."""
results = [
SearchResult(
path="a.py",
score=0.5,
excerpt="def foo()",
metadata={"original_key": "original_value", "fusion_score": 0.5}
),
SearchResult(path="b.py", score=0.5, excerpt="def foo()"),
]
grouped = group_similar_results(results, score_threshold_abs=0.1)
assert grouped[0].metadata["original_key"] == "original_value"
assert grouped[0].metadata["fusion_score"] == 0.5
def test_results_sorted_by_score_descending(self):
"""Final results should be sorted by score descending."""
results = [
SearchResult(path="low.py", score=0.1, excerpt="low"),
SearchResult(path="high.py", score=0.9, excerpt="high"),
SearchResult(path="mid.py", score=0.5, excerpt="mid"),
]
grouped = group_similar_results(results)
scores = [r.score for r in grouped]
assert scores == sorted(scores, reverse=True)
assert scores == [0.9, 0.5, 0.1]
# =============================================================================
# Level 5: Integration Tests with SearchOptions
# =============================================================================
class TestSearchOptionsGrouping:
"""Integration tests for SearchOptions grouping configuration."""
def test_search_options_default_grouping_disabled(self):
"""Default SearchOptions should have grouping disabled."""
options = SearchOptions()
assert options.group_results is False
assert options.grouping_threshold == 0.01
def test_search_options_enable_grouping(self):
"""SearchOptions should allow enabling grouping."""
options = SearchOptions(group_results=True)
assert options.group_results is True
def test_search_options_custom_threshold(self):
"""SearchOptions should allow custom grouping threshold."""
options = SearchOptions(group_results=True, grouping_threshold=0.05)
assert options.grouping_threshold == 0.05
def test_search_options_all_parameters(self):
"""SearchOptions should work with all parameters combined."""
options = SearchOptions(
depth=3,
max_workers=4,
limit_per_dir=20,
total_limit=200,
include_symbols=True,
hybrid_mode=True,
group_results=True,
grouping_threshold=0.02,
)
assert options.depth == 3
assert options.group_results is True
assert options.grouping_threshold == 0.02
# =============================================================================
# Level 6: AdditionalLocation Entity Tests
# =============================================================================
class TestAdditionalLocationEntity:
"""Tests for AdditionalLocation entity model."""
def test_create_minimal_additional_location(self):
"""Create AdditionalLocation with minimal required fields."""
loc = AdditionalLocation(path="test.py", score=0.5)
assert loc.path == "test.py"
assert loc.score == 0.5
assert loc.start_line is None
assert loc.end_line is None
assert loc.symbol_name is None
def test_create_full_additional_location(self):
"""Create AdditionalLocation with all fields."""
loc = AdditionalLocation(
path="test.py",
score=0.75,
start_line=10,
end_line=20,
symbol_name="my_function"
)
assert loc.path == "test.py"
assert loc.score == 0.75
assert loc.start_line == 10
assert loc.end_line == 20
assert loc.symbol_name == "my_function"
def test_additional_location_path_required(self):
"""Path should be required for AdditionalLocation."""
with pytest.raises(Exception): # ValidationError
AdditionalLocation(score=0.5)
def test_additional_location_score_required(self):
"""Score should be required for AdditionalLocation."""
with pytest.raises(Exception): # ValidationError
AdditionalLocation(path="test.py")
def test_additional_location_score_non_negative(self):
"""Score should be non-negative."""
with pytest.raises(Exception): # ValidationError
AdditionalLocation(path="test.py", score=-0.1)
def test_additional_location_serialization(self):
"""AdditionalLocation should serialize correctly."""
loc = AdditionalLocation(
path="test.py",
score=0.5,
start_line=10,
symbol_name="func"
)
data = loc.model_dump()
assert data["path"] == "test.py"
assert data["score"] == 0.5
assert data["start_line"] == 10
assert data["symbol_name"] == "func"
# =============================================================================
# Level 7: SearchResult with AdditionalLocations Tests
# =============================================================================
class TestSearchResultWithAdditionalLocations:
"""Tests for SearchResult entity with additional_locations field."""
def test_search_result_default_empty_locations(self):
"""SearchResult should have empty additional_locations by default."""
result = SearchResult(path="test.py", score=0.5)
assert result.additional_locations == []
def test_search_result_with_additional_locations(self):
"""SearchResult should accept additional_locations."""
locations = [
AdditionalLocation(path="other.py", score=0.4, start_line=5),
]
result = SearchResult(
path="main.py",
score=0.5,
additional_locations=locations
)
assert len(result.additional_locations) == 1
assert result.additional_locations[0].path == "other.py"
def test_search_result_serialization_with_locations(self):
"""SearchResult with additional_locations should serialize correctly."""
locations = [
AdditionalLocation(path="loc1.py", score=0.4),
AdditionalLocation(path="loc2.py", score=0.3),
]
result = SearchResult(
path="main.py",
score=0.5,
excerpt="code",
additional_locations=locations
)
data = result.model_dump()
assert len(data["additional_locations"]) == 2
assert data["additional_locations"][0]["path"] == "loc1.py"
assert data["additional_locations"][1]["path"] == "loc2.py"
# =============================================================================
# Level 8: Stress/Performance Tests
# =============================================================================
class TestGroupSimilarResultsPerformance:
"""Performance and stress tests."""
def test_handles_large_result_set(self):
"""Should handle large number of results efficiently."""
# Create 1000 results with 100 different excerpts
results = []
for i in range(1000):
excerpt_id = i % 100
results.append(SearchResult(
path=f"file_{i}.py",
score=0.5 + (i % 10) * 0.01, # Scores vary slightly
excerpt=f"def func_{excerpt_id}(): pass",
start_line=i,
))
grouped = group_similar_results(results, score_threshold_abs=0.05)
# Should reduce to approximately 100 groups (one per excerpt)
# with some variation due to score subgrouping
assert len(grouped) <= 200
assert len(grouped) >= 50 # At least some grouping happened
def test_handles_all_identical_results(self):
"""Should handle case where all results are identical."""
results = [
SearchResult(path=f"file_{i}.py", score=0.5, excerpt="same code")
for i in range(100)
]
grouped = group_similar_results(results, score_threshold_abs=0.01)
# All should be grouped into one
assert len(grouped) == 1
assert len(grouped[0].additional_locations) == 99
def test_handles_all_unique_results(self):
"""Should handle case where all results are unique."""
results = [
SearchResult(path=f"file_{i}.py", score=0.5, excerpt=f"unique_{i}")
for i in range(100)
]
grouped = group_similar_results(results, score_threshold_abs=0.01)
# None should be grouped
assert len(grouped) == 100
for r in grouped:
assert len(r.additional_locations) == 0
# =============================================================================
# Level 9: Real-world Scenario Tests
# =============================================================================
class TestGroupSimilarResultsRealWorld:
"""Tests simulating real-world usage scenarios."""
def test_rrf_fusion_scores_grouping(self):
"""Test with typical RRF fusion score ranges (0.001 - 0.02)."""
results = [
SearchResult(path="auth/login.py", score=0.0164, excerpt="def authenticate():"),
SearchResult(path="auth/oauth.py", score=0.0163, excerpt="def authenticate():"),
SearchResult(path="auth/basic.py", score=0.0162, excerpt="def authenticate():"),
SearchResult(path="utils/helper.py", score=0.0082, excerpt="def helper():"),
]
# RRF scores are typically very small, use appropriate threshold
grouped = group_similar_results(results, score_threshold_abs=0.001)
assert len(grouped) == 2
auth_group = next(r for r in grouped if "auth" in r.path)
assert len(auth_group.additional_locations) == 2
def test_duplicate_code_detection(self):
"""Simulate detecting duplicate code across files."""
duplicate_code = """
def calculate_total(items):
return sum(item.price for item in items)
"""
results = [
SearchResult(path="orders/service.py", score=0.5, excerpt=duplicate_code, start_line=45),
SearchResult(path="cart/calculator.py", score=0.5, excerpt=duplicate_code, start_line=12),
SearchResult(path="invoices/generator.py", score=0.5, excerpt=duplicate_code, start_line=78),
]
grouped = group_similar_results(results, score_threshold_abs=0.01)
# All duplicates should be grouped
assert len(grouped) == 1
assert len(grouped[0].additional_locations) == 2
# Can identify all locations
all_paths = {grouped[0].path} | {loc.path for loc in grouped[0].additional_locations}
assert all_paths == {"orders/service.py", "cart/calculator.py", "invoices/generator.py"}
def test_mixed_relevance_results(self):
"""Test with mixed relevance results typical of code search."""
results = [
# High relevance group - exact match
SearchResult(path="core.py", score=0.9, excerpt="def process():"),
SearchResult(path="core_v2.py", score=0.89, excerpt="def process():"),
# Medium relevance - partial match
SearchResult(path="utils.py", score=0.5, excerpt="def process_data():"),
# Low relevance - tangential
SearchResult(path="test.py", score=0.2, excerpt="def test_process():"),
]
grouped = group_similar_results(results, score_threshold_abs=0.02)
# core.py and core_v2.py should group (same excerpt, similar score)
# Others should remain separate (different excerpts)
assert len(grouped) == 3
high_rel = next(r for r in grouped if r.score >= 0.89)
assert len(high_rel.additional_locations) == 1