mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-13 02:41:50 +08:00
feat: Add search result grouping by similarity score
Add functionality to group search results with similar content and scores into a single representative result with additional locations. Changes: - Add AdditionalLocation entity model for storing grouped result locations - Add additional_locations field to SearchResult for backward compatibility - Implement group_similar_results() function in ranking.py with: - Content-based grouping (by excerpt or content field) - Score-based sub-grouping with configurable threshold - Metadata preservation with grouped_count tracking - Add group_results and grouping_threshold options to SearchOptions - Integrate grouping into ChainSearchEngine.search() after RRF fusion Test coverage: - 36 multi-level tests covering unit, boundary, integration, and performance - Real-world scenario tests for RRF scores and duplicate code detection 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -38,6 +38,8 @@ class SearchOptions:
|
||||
enable_vector: Enable vector semantic search (default False)
|
||||
pure_vector: If True, only use vector search without FTS fallback (default False)
|
||||
hybrid_weights: Custom RRF weights for hybrid search (optional)
|
||||
group_results: Enable grouping of similar results (default False)
|
||||
grouping_threshold: Score threshold for grouping similar results (default 0.01)
|
||||
"""
|
||||
depth: int = -1
|
||||
max_workers: int = 8
|
||||
@@ -51,6 +53,8 @@ class SearchOptions:
|
||||
enable_vector: bool = False
|
||||
pure_vector: bool = False
|
||||
hybrid_weights: Optional[Dict[str, float]] = None
|
||||
group_results: bool = False
|
||||
grouping_threshold: float = 0.01
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -210,6 +214,14 @@ class ChainSearchEngine:
|
||||
|
||||
# Step 4: Merge and rank
|
||||
final_results = self._merge_and_rank(results, options.total_limit)
|
||||
|
||||
# Step 5: Optional grouping of similar results
|
||||
if options.group_results:
|
||||
from codexlens.search.ranking import group_similar_results
|
||||
final_results = group_similar_results(
|
||||
final_results, score_threshold_abs=options.grouping_threshold
|
||||
)
|
||||
|
||||
stats.files_matched = len(final_results)
|
||||
|
||||
# Optional: Symbol search
|
||||
|
||||
Reference in New Issue
Block a user