mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-03-19 18:58:47 +08:00
Implement search and reranking functionality with FTS and embedding support
- Added BaseReranker abstract class for defining reranking interfaces. - Implemented FastEmbedReranker using fastembed's TextCrossEncoder for scoring document-query pairs. - Introduced FTSEngine for full-text search capabilities using SQLite FTS5. - Developed SearchPipeline to integrate embedding, binary search, ANN indexing, FTS, and reranking. - Added fusion methods for combining results from different search strategies using Reciprocal Rank Fusion. - Created unit and integration tests for the new search and reranking components. - Established configuration management for search parameters and models.
This commit is contained in:
44
codex-lens-v2/tests/integration/test_search_pipeline.py
Normal file
44
codex-lens-v2/tests/integration/test_search_pipeline.py
Normal file
@@ -0,0 +1,44 @@
|
||||
"""Integration tests for SearchPipeline using real components and mock embedder/reranker."""
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
def test_vector_search_returns_results(search_pipeline):
|
||||
results = search_pipeline.search("authentication middleware")
|
||||
assert len(results) > 0
|
||||
assert all(isinstance(r.score, float) for r in results)
|
||||
|
||||
|
||||
def test_exact_keyword_search(search_pipeline):
|
||||
results = search_pipeline.search("authenticate")
|
||||
assert len(results) > 0
|
||||
result_ids = {r.id for r in results}
|
||||
# Doc 0 and 10 both contain "authenticate"
|
||||
assert result_ids & {0, 10}, f"Expected doc 0 or 10 in results, got {result_ids}"
|
||||
|
||||
|
||||
def test_pipeline_top_k_limit(search_pipeline):
|
||||
results = search_pipeline.search("user", top_k=5)
|
||||
assert len(results) <= 5
|
||||
|
||||
|
||||
def test_search_result_fields_populated(search_pipeline):
|
||||
results = search_pipeline.search("password")
|
||||
assert len(results) > 0
|
||||
for r in results:
|
||||
assert r.id >= 0
|
||||
assert r.score >= 0
|
||||
assert isinstance(r.path, str)
|
||||
|
||||
|
||||
def test_empty_query_handled(search_pipeline):
|
||||
results = search_pipeline.search("")
|
||||
assert isinstance(results, list) # no exception
|
||||
|
||||
|
||||
def test_different_queries_give_different_results(search_pipeline):
|
||||
r1 = search_pipeline.search("authenticate user")
|
||||
r2 = search_pipeline.search("cache redis")
|
||||
# Results should differ (different top IDs or scores), unless both are empty
|
||||
ids1 = [r.id for r in r1]
|
||||
ids2 = [r.id for r in r2]
|
||||
assert ids1 != ids2 or len(r1) == 0
|
||||
Reference in New Issue
Block a user