mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-12 02:37:45 +08:00
feat: Implement CodexLens multi-provider embedding rotation management
- Added functions to get and update CodexLens embedding rotation configuration. - Introduced functionality to retrieve enabled embedding providers for rotation. - Created endpoints for managing rotation configuration via API. - Enhanced dashboard UI to support multi-provider rotation configuration. - Updated internationalization strings for new rotation features. - Adjusted CLI commands and embedding manager to support increased concurrency limits. - Modified hybrid search weights for improved ranking behavior.
This commit is contained in:
@@ -103,12 +103,12 @@ def init(
|
||||
"-l",
|
||||
help="Limit indexing to specific languages (repeat or comma-separated).",
|
||||
),
|
||||
workers: Optional[int] = typer.Option(None, "--workers", "-w", min=1, max=16, help="Parallel worker processes (default: auto-detect based on CPU count, max 16)."),
|
||||
workers: Optional[int] = typer.Option(None, "--workers", "-w", min=1, max=32, help="Parallel worker processes (default: auto-detect based on CPU count, max 32)."),
|
||||
force: bool = typer.Option(False, "--force", "-f", help="Force full reindex (skip incremental mode)."),
|
||||
no_embeddings: bool = typer.Option(False, "--no-embeddings", help="Skip automatic embedding generation (if semantic deps installed)."),
|
||||
embedding_backend: str = typer.Option("fastembed", "--embedding-backend", help="Embedding backend: fastembed (local) or litellm (remote API)."),
|
||||
embedding_model: str = typer.Option("code", "--embedding-model", help="Embedding model: profile name for fastembed (fast/code/multilingual/balanced) or model name for litellm (e.g. text-embedding-3-small)."),
|
||||
max_workers: int = typer.Option(1, "--max-workers", min=1, max=16, help="Max concurrent API calls for embedding generation. Recommended: 4-8 for litellm backend."),
|
||||
max_workers: int = typer.Option(1, "--max-workers", min=1, max=32, help="Max concurrent API calls for embedding generation. Recommended: 4-8 for litellm backend."),
|
||||
json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
|
||||
verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."),
|
||||
) -> None:
|
||||
@@ -351,7 +351,7 @@ def search(
|
||||
Use 'codexlens embeddings-generate' to create embeddings first.
|
||||
|
||||
Hybrid Mode:
|
||||
Default weights: exact=0.4, fuzzy=0.3, vector=0.3
|
||||
Default weights: exact=0.3, fuzzy=0.1, vector=0.6
|
||||
Use --weights to customize (e.g., --weights 0.5,0.3,0.2)
|
||||
|
||||
Examples:
|
||||
@@ -1852,7 +1852,7 @@ def embeddings_generate(
|
||||
"--max-workers",
|
||||
"-w",
|
||||
min=1,
|
||||
max=16,
|
||||
max=32,
|
||||
help="Max concurrent API calls. Recommended: 4-8 for litellm backend. Default: 1 (sequential).",
|
||||
),
|
||||
json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
|
||||
|
||||
@@ -331,7 +331,7 @@ def generate_embeddings(
|
||||
if max_workers is None:
|
||||
if embedding_backend == "litellm":
|
||||
if endpoint_count > 1:
|
||||
max_workers = min(endpoint_count * 2, 16) # Cap at 16 workers
|
||||
max_workers = min(endpoint_count * 2, 32) # Cap at 32 workers
|
||||
else:
|
||||
max_workers = 4
|
||||
else:
|
||||
@@ -806,7 +806,7 @@ def generate_embeddings_recursive(
|
||||
if max_workers is None:
|
||||
if embedding_backend == "litellm":
|
||||
if endpoint_count > 1:
|
||||
max_workers = min(endpoint_count * 2, 16)
|
||||
max_workers = min(endpoint_count * 2, 32)
|
||||
else:
|
||||
max_workers = 4
|
||||
else:
|
||||
|
||||
@@ -27,11 +27,11 @@ class HybridSearchEngine:
|
||||
default_weights: Default RRF weights for each source
|
||||
"""
|
||||
|
||||
# Default RRF weights (exact: 40%, fuzzy: 30%, vector: 30%)
|
||||
# Default RRF weights (vector: 60%, exact: 30%, fuzzy: 10%)
|
||||
DEFAULT_WEIGHTS = {
|
||||
"exact": 0.4,
|
||||
"fuzzy": 0.3,
|
||||
"vector": 0.3,
|
||||
"exact": 0.3,
|
||||
"fuzzy": 0.1,
|
||||
"vector": 0.6,
|
||||
}
|
||||
|
||||
def __init__(self, weights: Optional[Dict[str, float]] = None):
|
||||
|
||||
@@ -25,7 +25,7 @@ def reciprocal_rank_fusion(
|
||||
results_map: Dictionary mapping source name to list of SearchResult objects
|
||||
Sources: 'exact', 'fuzzy', 'vector'
|
||||
weights: Dictionary mapping source name to weight (default: equal weights)
|
||||
Example: {'exact': 0.4, 'fuzzy': 0.3, 'vector': 0.3}
|
||||
Example: {'exact': 0.3, 'fuzzy': 0.1, 'vector': 0.6}
|
||||
k: Constant to avoid division by zero and control rank influence (default 60)
|
||||
|
||||
Returns:
|
||||
|
||||
@@ -45,9 +45,9 @@ class TestHybridSearchBasics:
|
||||
"""Test HybridSearchEngine initializes with default weights."""
|
||||
engine = HybridSearchEngine()
|
||||
assert engine.weights == HybridSearchEngine.DEFAULT_WEIGHTS
|
||||
assert engine.weights["exact"] == 0.4
|
||||
assert engine.weights["fuzzy"] == 0.3
|
||||
assert engine.weights["vector"] == 0.3
|
||||
assert engine.weights["exact"] == 0.3
|
||||
assert engine.weights["fuzzy"] == 0.1
|
||||
assert engine.weights["vector"] == 0.6
|
||||
|
||||
def test_engine_custom_weights(self):
|
||||
"""Test HybridSearchEngine accepts custom weights."""
|
||||
|
||||
@@ -230,16 +230,16 @@ class TestRRFSyntheticRankings:
|
||||
vector = [SearchResult(path="c.py", score=8.0, excerpt="...")]
|
||||
|
||||
results_map = {"exact": exact, "fuzzy": fuzzy, "vector": vector}
|
||||
weights = {"exact": 0.4, "fuzzy": 0.3, "vector": 0.3}
|
||||
weights = {"exact": 0.3, "fuzzy": 0.1, "vector": 0.6}
|
||||
|
||||
fused = reciprocal_rank_fusion(results_map, weights=weights)
|
||||
|
||||
assert len(fused) == 3
|
||||
# Each appears in one source only, so scores differ by weights
|
||||
# a.py: 0.4/61 ≈ 0.0066
|
||||
# b.py: 0.3/61 ≈ 0.0049
|
||||
# c.py: 0.3/61 ≈ 0.0049
|
||||
assert fused[0].path == "a.py", "Exact (higher weight) should rank first"
|
||||
# c.py: 0.6/61 ≈ 0.0098 (vector, highest weight)
|
||||
# a.py: 0.3/61 ≈ 0.0049 (exact)
|
||||
# b.py: 0.1/61 ≈ 0.0016 (fuzzy)
|
||||
assert fused[0].path == "c.py", "Vector (higher weight) should rank first"
|
||||
|
||||
|
||||
class TestNormalizeBM25Score:
|
||||
|
||||
Reference in New Issue
Block a user