Add comprehensive tests for query parsing and Reciprocal Rank Fusion

- Implemented tests for the QueryParser class, covering various identifier splitting methods (CamelCase, snake_case, kebab-case), OR expansion, and FTS5 operator preservation.
- Added parameterized tests to validate expected token outputs for different query formats.
- Created edge case tests to ensure robustness against unusual input scenarios.
- Developed tests for the Reciprocal Rank Fusion (RRF) algorithm, including score computation, weight handling, and result ranking across multiple sources.
- Included tests for normalization of BM25 scores and tagging search results with source metadata.
This commit is contained in:
catlog22
2025-12-16 10:20:19 +08:00
parent 35485bbbb1
commit 3da0ef2adb
39 changed files with 6171 additions and 240 deletions

View File

@@ -41,15 +41,45 @@ def print_json(*, success: bool, result: Any = None, error: str | None = None) -
console.print_json(json.dumps(payload, ensure_ascii=False))
def render_search_results(results: Sequence[SearchResult], *, title: str = "Search Results") -> None:
def render_search_results(
results: Sequence[SearchResult], *, title: str = "Search Results", verbose: bool = False
) -> None:
"""Render search results with optional source tags in verbose mode.
Args:
results: Search results to display
title: Table title
verbose: If True, show search source tags ([E], [F], [V]) and fusion scores
"""
table = Table(title=title, show_lines=False)
if verbose:
# Verbose mode: show source tags
table.add_column("Source", style="dim", width=6, justify="center")
table.add_column("Path", style="cyan", no_wrap=True)
table.add_column("Score", style="magenta", justify="right")
table.add_column("Excerpt", style="white")
for res in results:
excerpt = res.excerpt or ""
table.add_row(res.path, f"{res.score:.3f}", excerpt)
score_str = f"{res.score:.3f}"
if verbose:
# Extract search source tag if available
source = getattr(res, "search_source", None)
source_tag = ""
if source == "exact":
source_tag = "[E]"
elif source == "fuzzy":
source_tag = "[F]"
elif source == "vector":
source_tag = "[V]"
elif source == "fusion":
source_tag = "[RRF]"
table.add_row(source_tag, res.path, score_str, excerpt)
else:
table.add_row(res.path, score_str, excerpt)
console.print(table)