mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-14 02:42:04 +08:00
feat: Add multi-type embedding backends for cascade retrieval
- Implemented BinaryEmbeddingBackend for fast coarse filtering using 256-dimensional binary vectors. - Developed DenseEmbeddingBackend for high-precision dense vectors (2048 dimensions) for reranking. - Created CascadeEmbeddingBackend to combine binary and dense embeddings for two-stage retrieval. - Introduced utility functions for embedding conversion and distance computation. chore: Migration 010 - Add multi-vector storage support - Added 'chunks' table to support multi-vector embeddings for cascade retrieval. - Included new columns: embedding_binary (256-dim) and embedding_dense (2048-dim) for efficient storage. - Implemented upgrade and downgrade functions to manage schema changes and data migration.
This commit is contained in:
@@ -40,11 +40,20 @@ from codexlens.search.ranking import (
|
||||
get_rrf_weights,
|
||||
reciprocal_rank_fusion,
|
||||
rerank_results,
|
||||
simple_weighted_fusion,
|
||||
tag_search_source,
|
||||
)
|
||||
from codexlens.storage.dir_index import DirIndexStore
|
||||
|
||||
|
||||
# Three-way fusion weights (FTS + Vector + SPLADE)
|
||||
THREE_WAY_WEIGHTS = {
|
||||
"exact": 0.2,
|
||||
"splade": 0.3,
|
||||
"vector": 0.5,
|
||||
}
|
||||
|
||||
|
||||
class HybridSearchEngine:
|
||||
"""Hybrid search engine with parallel execution and RRF fusion.
|
||||
|
||||
@@ -193,9 +202,22 @@ class HybridSearchEngine:
|
||||
if source in results_map
|
||||
}
|
||||
|
||||
with timer("rrf_fusion", self.logger):
|
||||
# Determine fusion method from config (default: rrf)
|
||||
fusion_method = "rrf"
|
||||
rrf_k = 60
|
||||
if self._config is not None:
|
||||
fusion_method = getattr(self._config, "fusion_method", "rrf") or "rrf"
|
||||
rrf_k = getattr(self._config, "rrf_k", 60) or 60
|
||||
|
||||
with timer("fusion", self.logger):
|
||||
adaptive_weights = get_rrf_weights(query, active_weights)
|
||||
fused_results = reciprocal_rank_fusion(results_map, adaptive_weights)
|
||||
if fusion_method == "simple":
|
||||
fused_results = simple_weighted_fusion(results_map, adaptive_weights)
|
||||
else:
|
||||
# Default to RRF
|
||||
fused_results = reciprocal_rank_fusion(
|
||||
results_map, adaptive_weights, k=rrf_k
|
||||
)
|
||||
|
||||
# Optional: boost results that include explicit symbol matches
|
||||
boost_factor = (
|
||||
|
||||
Reference in New Issue
Block a user