mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-05 01:50:27 +08:00
Add functionality to group search results with similar content and scores into a single representative result with additional locations. Changes: - Add AdditionalLocation entity model for storing grouped result locations - Add additional_locations field to SearchResult for backward compatibility - Implement group_similar_results() function in ranking.py with: - Content-based grouping (by excerpt or content field) - Score-based sub-grouping with configurable threshold - Metadata preservation with grouped_count tracking - Add group_results and grouping_threshold options to SearchOptions - Integrate grouping into ChainSearchEngine.search() after RRF fusion Test coverage: - 36 multi-level tests covering unit, boundary, integration, and performance - Real-world scenario tests for RRF scores and duplicate code detection 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
590 lines
23 KiB
Python
590 lines
23 KiB
Python
"""Multi-level tests for search result grouping functionality.
|
|
|
|
Tests cover:
|
|
1. Unit tests for group_similar_results function
|
|
2. Boundary condition tests
|
|
3. Integration tests with SearchOptions
|
|
4. Performance/stress tests
|
|
"""
|
|
|
|
import pytest
|
|
from typing import List
|
|
|
|
from codexlens.entities import SearchResult, AdditionalLocation
|
|
from codexlens.search.ranking import group_similar_results
|
|
from codexlens.search.chain_search import SearchOptions
|
|
|
|
|
|
# =============================================================================
|
|
# Test Fixtures
|
|
# =============================================================================
|
|
|
|
@pytest.fixture
|
|
def sample_results() -> List[SearchResult]:
|
|
"""Create sample search results for testing."""
|
|
return [
|
|
SearchResult(path="a.py", score=0.5, excerpt="def foo(): pass", start_line=10, symbol_name="foo"),
|
|
SearchResult(path="b.py", score=0.5, excerpt="def foo(): pass", start_line=20, symbol_name="foo"),
|
|
SearchResult(path="c.py", score=0.49, excerpt="def foo(): pass", start_line=30, symbol_name="foo"),
|
|
SearchResult(path="d.py", score=0.3, excerpt="def bar(): pass", start_line=40, symbol_name="bar"),
|
|
]
|
|
|
|
|
|
@pytest.fixture
|
|
def results_with_different_excerpts() -> List[SearchResult]:
|
|
"""Results with same scores but different content."""
|
|
return [
|
|
SearchResult(path="a.py", score=0.5, excerpt="def foo(): pass"),
|
|
SearchResult(path="b.py", score=0.5, excerpt="def bar(): pass"),
|
|
SearchResult(path="c.py", score=0.5, excerpt="def baz(): pass"),
|
|
]
|
|
|
|
|
|
@pytest.fixture
|
|
def results_with_same_excerpt_different_scores() -> List[SearchResult]:
|
|
"""Results with same content but very different scores."""
|
|
return [
|
|
SearchResult(path="a.py", score=0.9, excerpt="def foo(): pass"),
|
|
SearchResult(path="b.py", score=0.5, excerpt="def foo(): pass"),
|
|
SearchResult(path="c.py", score=0.1, excerpt="def foo(): pass"),
|
|
]
|
|
|
|
|
|
# =============================================================================
|
|
# Level 1: Unit Tests - Basic Functionality
|
|
# =============================================================================
|
|
|
|
class TestGroupSimilarResultsBasic:
|
|
"""Basic unit tests for group_similar_results function."""
|
|
|
|
def test_empty_results_returns_empty(self):
|
|
"""Empty input should return empty output."""
|
|
result = group_similar_results([])
|
|
assert result == []
|
|
|
|
def test_single_result_returns_unchanged(self):
|
|
"""Single result should be returned as-is."""
|
|
single = SearchResult(path="test.py", score=0.5, excerpt="code")
|
|
result = group_similar_results([single])
|
|
|
|
assert len(result) == 1
|
|
assert result[0].path == "test.py"
|
|
assert result[0].additional_locations == []
|
|
|
|
def test_groups_identical_excerpt_similar_score(self, sample_results):
|
|
"""Results with same excerpt and similar scores should be grouped."""
|
|
grouped = group_similar_results(sample_results, score_threshold_abs=0.02)
|
|
|
|
# Should have 2 groups: foo group (a, b, c) and bar (d)
|
|
assert len(grouped) == 2
|
|
|
|
# First group should have additional locations
|
|
foo_group = next(r for r in grouped if r.excerpt == "def foo(): pass")
|
|
assert len(foo_group.additional_locations) == 2
|
|
|
|
# Second group (bar) should have no additional locations
|
|
bar_group = next(r for r in grouped if r.excerpt == "def bar(): pass")
|
|
assert len(bar_group.additional_locations) == 0
|
|
|
|
def test_preserves_highest_score_as_representative(self, sample_results):
|
|
"""Representative result should have the highest score in group."""
|
|
grouped = group_similar_results(sample_results, score_threshold_abs=0.02)
|
|
|
|
foo_group = next(r for r in grouped if r.excerpt == "def foo(): pass")
|
|
# a.py has score 0.5, which is highest
|
|
assert foo_group.path == "a.py"
|
|
assert foo_group.score == 0.5
|
|
|
|
def test_additional_locations_contain_correct_info(self, sample_results):
|
|
"""Additional locations should contain correct path, score, line info."""
|
|
grouped = group_similar_results(sample_results, score_threshold_abs=0.02)
|
|
|
|
foo_group = next(r for r in grouped if r.excerpt == "def foo(): pass")
|
|
locations = foo_group.additional_locations
|
|
|
|
paths = {loc.path for loc in locations}
|
|
assert "b.py" in paths
|
|
assert "c.py" in paths
|
|
|
|
# Check that start_line is preserved
|
|
for loc in locations:
|
|
if loc.path == "b.py":
|
|
assert loc.start_line == 20
|
|
elif loc.path == "c.py":
|
|
assert loc.start_line == 30
|
|
|
|
|
|
# =============================================================================
|
|
# Level 2: Boundary Condition Tests
|
|
# =============================================================================
|
|
|
|
class TestGroupSimilarResultsBoundary:
|
|
"""Boundary condition tests for edge cases."""
|
|
|
|
def test_threshold_zero_no_grouping(self):
|
|
"""With threshold=0, only exactly equal scores should group."""
|
|
results = [
|
|
SearchResult(path="a.py", score=0.5, excerpt="def foo()"),
|
|
SearchResult(path="b.py", score=0.5, excerpt="def foo()"),
|
|
SearchResult(path="c.py", score=0.50001, excerpt="def foo()"), # Slightly different
|
|
]
|
|
|
|
grouped = group_similar_results(results, score_threshold_abs=0.0)
|
|
|
|
# a and b should group (exact same score), c should be separate
|
|
assert len(grouped) == 2
|
|
|
|
main_group = next(r for r in grouped if len(r.additional_locations) > 0)
|
|
assert len(main_group.additional_locations) == 1
|
|
|
|
def test_threshold_exact_boundary(self):
|
|
"""Test behavior at exact threshold boundary.
|
|
|
|
Note: Due to floating-point precision, 0.5 - 0.49 = 0.010000000000000009
|
|
which is slightly > 0.01, so they won't group with threshold=0.01.
|
|
Use a slightly larger threshold to account for floating-point precision.
|
|
"""
|
|
results = [
|
|
SearchResult(path="a.py", score=0.5, excerpt="def foo()"),
|
|
SearchResult(path="b.py", score=0.49, excerpt="def foo()"), # 0.01 diff (floating-point)
|
|
SearchResult(path="c.py", score=0.48, excerpt="def foo()"), # 0.02 diff from a
|
|
]
|
|
|
|
# With threshold 0.011 (slightly above floating-point 0.01), a and b should group
|
|
grouped = group_similar_results(results, score_threshold_abs=0.011)
|
|
|
|
# a groups with b, c is separate (0.02 from a, 0.01 from b)
|
|
# After a+b group, c is compared with remaining and forms its own group
|
|
assert len(grouped) == 2
|
|
|
|
# Verify a is representative (highest score)
|
|
main_group = next(r for r in grouped if r.score == 0.5)
|
|
assert main_group.path == "a.py"
|
|
assert len(main_group.additional_locations) == 1
|
|
assert main_group.additional_locations[0].path == "b.py"
|
|
|
|
def test_large_threshold_groups_all(self):
|
|
"""Very large threshold should group all same-content results."""
|
|
results = [
|
|
SearchResult(path="a.py", score=0.9, excerpt="def foo()"),
|
|
SearchResult(path="b.py", score=0.1, excerpt="def foo()"),
|
|
]
|
|
|
|
grouped = group_similar_results(results, score_threshold_abs=1.0)
|
|
|
|
assert len(grouped) == 1
|
|
assert len(grouped[0].additional_locations) == 1
|
|
|
|
def test_none_excerpt_not_grouped(self):
|
|
"""Results with None excerpt should not be grouped."""
|
|
results = [
|
|
SearchResult(path="a.py", score=0.5, excerpt=None),
|
|
SearchResult(path="b.py", score=0.5, excerpt=None),
|
|
]
|
|
|
|
grouped = group_similar_results(results)
|
|
|
|
# None excerpts can't be grouped by content
|
|
assert len(grouped) == 2
|
|
for r in grouped:
|
|
assert len(r.additional_locations) == 0
|
|
|
|
def test_empty_excerpt_not_grouped(self):
|
|
"""Results with empty string excerpt should not be grouped."""
|
|
results = [
|
|
SearchResult(path="a.py", score=0.5, excerpt=""),
|
|
SearchResult(path="b.py", score=0.5, excerpt=""),
|
|
SearchResult(path="c.py", score=0.5, excerpt=" "), # Whitespace only
|
|
]
|
|
|
|
grouped = group_similar_results(results)
|
|
|
|
# Empty/whitespace excerpts can't be grouped
|
|
assert len(grouped) == 3
|
|
|
|
def test_different_excerpts_not_grouped(self, results_with_different_excerpts):
|
|
"""Results with different excerpts should not be grouped even with same score."""
|
|
grouped = group_similar_results(results_with_different_excerpts, score_threshold_abs=1.0)
|
|
|
|
# Different content = no grouping
|
|
assert len(grouped) == 3
|
|
for r in grouped:
|
|
assert len(r.additional_locations) == 0
|
|
|
|
def test_same_excerpt_different_scores_creates_subgroups(self, results_with_same_excerpt_different_scores):
|
|
"""Same content but very different scores should create separate subgroups."""
|
|
grouped = group_similar_results(
|
|
results_with_same_excerpt_different_scores,
|
|
score_threshold_abs=0.1
|
|
)
|
|
|
|
# Scores 0.9, 0.5, 0.1 with threshold 0.1
|
|
# 0.9 and 0.5 differ by 0.4 > 0.1, so separate
|
|
# 0.5 and 0.1 differ by 0.4 > 0.1, so separate
|
|
assert len(grouped) == 3
|
|
|
|
|
|
# =============================================================================
|
|
# Level 3: Content Field Tests
|
|
# =============================================================================
|
|
|
|
class TestGroupSimilarResultsContentField:
|
|
"""Tests for different content_field options."""
|
|
|
|
def test_group_by_content_field(self):
|
|
"""Should be able to group by 'content' field instead of 'excerpt'."""
|
|
results = [
|
|
SearchResult(path="a.py", score=0.5, excerpt="short", content="full content here"),
|
|
SearchResult(path="b.py", score=0.5, excerpt="different", content="full content here"),
|
|
]
|
|
|
|
# Group by excerpt - different excerpts, no grouping
|
|
grouped_by_excerpt = group_similar_results(results, content_field="excerpt")
|
|
assert len(grouped_by_excerpt) == 2
|
|
|
|
# Group by content - same content, should group
|
|
grouped_by_content = group_similar_results(results, content_field="content")
|
|
assert len(grouped_by_content) == 1
|
|
assert len(grouped_by_content[0].additional_locations) == 1
|
|
|
|
def test_fallback_when_content_field_missing(self):
|
|
"""Results without the specified content field should not be grouped."""
|
|
results = [
|
|
SearchResult(path="a.py", score=0.5, content=None),
|
|
SearchResult(path="b.py", score=0.5, content=None),
|
|
]
|
|
|
|
grouped = group_similar_results(results, content_field="content")
|
|
|
|
# None content = ungroupable
|
|
assert len(grouped) == 2
|
|
|
|
|
|
# =============================================================================
|
|
# Level 4: Metadata and Ordering Tests
|
|
# =============================================================================
|
|
|
|
class TestGroupSimilarResultsMetadata:
|
|
"""Tests for metadata handling and result ordering."""
|
|
|
|
def test_grouped_count_in_metadata(self, sample_results):
|
|
"""Grouped results should have grouped_count in metadata."""
|
|
grouped = group_similar_results(sample_results, score_threshold_abs=0.02)
|
|
|
|
foo_group = next(r for r in grouped if r.excerpt == "def foo(): pass")
|
|
|
|
assert "grouped_count" in foo_group.metadata
|
|
assert foo_group.metadata["grouped_count"] == 3 # a, b, c
|
|
|
|
def test_preserves_original_metadata(self):
|
|
"""Original metadata should be preserved in grouped result."""
|
|
results = [
|
|
SearchResult(
|
|
path="a.py",
|
|
score=0.5,
|
|
excerpt="def foo()",
|
|
metadata={"original_key": "original_value", "fusion_score": 0.5}
|
|
),
|
|
SearchResult(path="b.py", score=0.5, excerpt="def foo()"),
|
|
]
|
|
|
|
grouped = group_similar_results(results, score_threshold_abs=0.1)
|
|
|
|
assert grouped[0].metadata["original_key"] == "original_value"
|
|
assert grouped[0].metadata["fusion_score"] == 0.5
|
|
|
|
def test_results_sorted_by_score_descending(self):
|
|
"""Final results should be sorted by score descending."""
|
|
results = [
|
|
SearchResult(path="low.py", score=0.1, excerpt="low"),
|
|
SearchResult(path="high.py", score=0.9, excerpt="high"),
|
|
SearchResult(path="mid.py", score=0.5, excerpt="mid"),
|
|
]
|
|
|
|
grouped = group_similar_results(results)
|
|
|
|
scores = [r.score for r in grouped]
|
|
assert scores == sorted(scores, reverse=True)
|
|
assert scores == [0.9, 0.5, 0.1]
|
|
|
|
|
|
# =============================================================================
|
|
# Level 5: Integration Tests with SearchOptions
|
|
# =============================================================================
|
|
|
|
class TestSearchOptionsGrouping:
|
|
"""Integration tests for SearchOptions grouping configuration."""
|
|
|
|
def test_search_options_default_grouping_disabled(self):
|
|
"""Default SearchOptions should have grouping disabled."""
|
|
options = SearchOptions()
|
|
|
|
assert options.group_results is False
|
|
assert options.grouping_threshold == 0.01
|
|
|
|
def test_search_options_enable_grouping(self):
|
|
"""SearchOptions should allow enabling grouping."""
|
|
options = SearchOptions(group_results=True)
|
|
|
|
assert options.group_results is True
|
|
|
|
def test_search_options_custom_threshold(self):
|
|
"""SearchOptions should allow custom grouping threshold."""
|
|
options = SearchOptions(group_results=True, grouping_threshold=0.05)
|
|
|
|
assert options.grouping_threshold == 0.05
|
|
|
|
def test_search_options_all_parameters(self):
|
|
"""SearchOptions should work with all parameters combined."""
|
|
options = SearchOptions(
|
|
depth=3,
|
|
max_workers=4,
|
|
limit_per_dir=20,
|
|
total_limit=200,
|
|
include_symbols=True,
|
|
hybrid_mode=True,
|
|
group_results=True,
|
|
grouping_threshold=0.02,
|
|
)
|
|
|
|
assert options.depth == 3
|
|
assert options.group_results is True
|
|
assert options.grouping_threshold == 0.02
|
|
|
|
|
|
# =============================================================================
|
|
# Level 6: AdditionalLocation Entity Tests
|
|
# =============================================================================
|
|
|
|
class TestAdditionalLocationEntity:
|
|
"""Tests for AdditionalLocation entity model."""
|
|
|
|
def test_create_minimal_additional_location(self):
|
|
"""Create AdditionalLocation with minimal required fields."""
|
|
loc = AdditionalLocation(path="test.py", score=0.5)
|
|
|
|
assert loc.path == "test.py"
|
|
assert loc.score == 0.5
|
|
assert loc.start_line is None
|
|
assert loc.end_line is None
|
|
assert loc.symbol_name is None
|
|
|
|
def test_create_full_additional_location(self):
|
|
"""Create AdditionalLocation with all fields."""
|
|
loc = AdditionalLocation(
|
|
path="test.py",
|
|
score=0.75,
|
|
start_line=10,
|
|
end_line=20,
|
|
symbol_name="my_function"
|
|
)
|
|
|
|
assert loc.path == "test.py"
|
|
assert loc.score == 0.75
|
|
assert loc.start_line == 10
|
|
assert loc.end_line == 20
|
|
assert loc.symbol_name == "my_function"
|
|
|
|
def test_additional_location_path_required(self):
|
|
"""Path should be required for AdditionalLocation."""
|
|
with pytest.raises(Exception): # ValidationError
|
|
AdditionalLocation(score=0.5)
|
|
|
|
def test_additional_location_score_required(self):
|
|
"""Score should be required for AdditionalLocation."""
|
|
with pytest.raises(Exception): # ValidationError
|
|
AdditionalLocation(path="test.py")
|
|
|
|
def test_additional_location_score_non_negative(self):
|
|
"""Score should be non-negative."""
|
|
with pytest.raises(Exception): # ValidationError
|
|
AdditionalLocation(path="test.py", score=-0.1)
|
|
|
|
def test_additional_location_serialization(self):
|
|
"""AdditionalLocation should serialize correctly."""
|
|
loc = AdditionalLocation(
|
|
path="test.py",
|
|
score=0.5,
|
|
start_line=10,
|
|
symbol_name="func"
|
|
)
|
|
|
|
data = loc.model_dump()
|
|
|
|
assert data["path"] == "test.py"
|
|
assert data["score"] == 0.5
|
|
assert data["start_line"] == 10
|
|
assert data["symbol_name"] == "func"
|
|
|
|
|
|
# =============================================================================
|
|
# Level 7: SearchResult with AdditionalLocations Tests
|
|
# =============================================================================
|
|
|
|
class TestSearchResultWithAdditionalLocations:
|
|
"""Tests for SearchResult entity with additional_locations field."""
|
|
|
|
def test_search_result_default_empty_locations(self):
|
|
"""SearchResult should have empty additional_locations by default."""
|
|
result = SearchResult(path="test.py", score=0.5)
|
|
|
|
assert result.additional_locations == []
|
|
|
|
def test_search_result_with_additional_locations(self):
|
|
"""SearchResult should accept additional_locations."""
|
|
locations = [
|
|
AdditionalLocation(path="other.py", score=0.4, start_line=5),
|
|
]
|
|
|
|
result = SearchResult(
|
|
path="main.py",
|
|
score=0.5,
|
|
additional_locations=locations
|
|
)
|
|
|
|
assert len(result.additional_locations) == 1
|
|
assert result.additional_locations[0].path == "other.py"
|
|
|
|
def test_search_result_serialization_with_locations(self):
|
|
"""SearchResult with additional_locations should serialize correctly."""
|
|
locations = [
|
|
AdditionalLocation(path="loc1.py", score=0.4),
|
|
AdditionalLocation(path="loc2.py", score=0.3),
|
|
]
|
|
|
|
result = SearchResult(
|
|
path="main.py",
|
|
score=0.5,
|
|
excerpt="code",
|
|
additional_locations=locations
|
|
)
|
|
|
|
data = result.model_dump()
|
|
|
|
assert len(data["additional_locations"]) == 2
|
|
assert data["additional_locations"][0]["path"] == "loc1.py"
|
|
assert data["additional_locations"][1]["path"] == "loc2.py"
|
|
|
|
|
|
# =============================================================================
|
|
# Level 8: Stress/Performance Tests
|
|
# =============================================================================
|
|
|
|
class TestGroupSimilarResultsPerformance:
|
|
"""Performance and stress tests."""
|
|
|
|
def test_handles_large_result_set(self):
|
|
"""Should handle large number of results efficiently."""
|
|
# Create 1000 results with 100 different excerpts
|
|
results = []
|
|
for i in range(1000):
|
|
excerpt_id = i % 100
|
|
results.append(SearchResult(
|
|
path=f"file_{i}.py",
|
|
score=0.5 + (i % 10) * 0.01, # Scores vary slightly
|
|
excerpt=f"def func_{excerpt_id}(): pass",
|
|
start_line=i,
|
|
))
|
|
|
|
grouped = group_similar_results(results, score_threshold_abs=0.05)
|
|
|
|
# Should reduce to approximately 100 groups (one per excerpt)
|
|
# with some variation due to score subgrouping
|
|
assert len(grouped) <= 200
|
|
assert len(grouped) >= 50 # At least some grouping happened
|
|
|
|
def test_handles_all_identical_results(self):
|
|
"""Should handle case where all results are identical."""
|
|
results = [
|
|
SearchResult(path=f"file_{i}.py", score=0.5, excerpt="same code")
|
|
for i in range(100)
|
|
]
|
|
|
|
grouped = group_similar_results(results, score_threshold_abs=0.01)
|
|
|
|
# All should be grouped into one
|
|
assert len(grouped) == 1
|
|
assert len(grouped[0].additional_locations) == 99
|
|
|
|
def test_handles_all_unique_results(self):
|
|
"""Should handle case where all results are unique."""
|
|
results = [
|
|
SearchResult(path=f"file_{i}.py", score=0.5, excerpt=f"unique_{i}")
|
|
for i in range(100)
|
|
]
|
|
|
|
grouped = group_similar_results(results, score_threshold_abs=0.01)
|
|
|
|
# None should be grouped
|
|
assert len(grouped) == 100
|
|
for r in grouped:
|
|
assert len(r.additional_locations) == 0
|
|
|
|
|
|
# =============================================================================
|
|
# Level 9: Real-world Scenario Tests
|
|
# =============================================================================
|
|
|
|
class TestGroupSimilarResultsRealWorld:
|
|
"""Tests simulating real-world usage scenarios."""
|
|
|
|
def test_rrf_fusion_scores_grouping(self):
|
|
"""Test with typical RRF fusion score ranges (0.001 - 0.02)."""
|
|
results = [
|
|
SearchResult(path="auth/login.py", score=0.0164, excerpt="def authenticate():"),
|
|
SearchResult(path="auth/oauth.py", score=0.0163, excerpt="def authenticate():"),
|
|
SearchResult(path="auth/basic.py", score=0.0162, excerpt="def authenticate():"),
|
|
SearchResult(path="utils/helper.py", score=0.0082, excerpt="def helper():"),
|
|
]
|
|
|
|
# RRF scores are typically very small, use appropriate threshold
|
|
grouped = group_similar_results(results, score_threshold_abs=0.001)
|
|
|
|
assert len(grouped) == 2
|
|
|
|
auth_group = next(r for r in grouped if "auth" in r.path)
|
|
assert len(auth_group.additional_locations) == 2
|
|
|
|
def test_duplicate_code_detection(self):
|
|
"""Simulate detecting duplicate code across files."""
|
|
duplicate_code = """
|
|
def calculate_total(items):
|
|
return sum(item.price for item in items)
|
|
"""
|
|
results = [
|
|
SearchResult(path="orders/service.py", score=0.5, excerpt=duplicate_code, start_line=45),
|
|
SearchResult(path="cart/calculator.py", score=0.5, excerpt=duplicate_code, start_line=12),
|
|
SearchResult(path="invoices/generator.py", score=0.5, excerpt=duplicate_code, start_line=78),
|
|
]
|
|
|
|
grouped = group_similar_results(results, score_threshold_abs=0.01)
|
|
|
|
# All duplicates should be grouped
|
|
assert len(grouped) == 1
|
|
assert len(grouped[0].additional_locations) == 2
|
|
|
|
# Can identify all locations
|
|
all_paths = {grouped[0].path} | {loc.path for loc in grouped[0].additional_locations}
|
|
assert all_paths == {"orders/service.py", "cart/calculator.py", "invoices/generator.py"}
|
|
|
|
def test_mixed_relevance_results(self):
|
|
"""Test with mixed relevance results typical of code search."""
|
|
results = [
|
|
# High relevance group - exact match
|
|
SearchResult(path="core.py", score=0.9, excerpt="def process():"),
|
|
SearchResult(path="core_v2.py", score=0.89, excerpt="def process():"),
|
|
# Medium relevance - partial match
|
|
SearchResult(path="utils.py", score=0.5, excerpt="def process_data():"),
|
|
# Low relevance - tangential
|
|
SearchResult(path="test.py", score=0.2, excerpt="def test_process():"),
|
|
]
|
|
|
|
grouped = group_similar_results(results, score_threshold_abs=0.02)
|
|
|
|
# core.py and core_v2.py should group (same excerpt, similar score)
|
|
# Others should remain separate (different excerpts)
|
|
assert len(grouped) == 3
|
|
|
|
high_rel = next(r for r in grouped if r.score >= 0.89)
|
|
assert len(high_rel.additional_locations) == 1
|