mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-05 01:50:27 +08:00
feat: Unified Embedding Pool with auto-discovery
Architecture refactoring for multi-provider rotation: Backend: - Add EmbeddingPoolConfig type with autoDiscover support - Implement discoverProvidersForModel() for auto-aggregation - Add GET/PUT /api/litellm-api/embedding-pool endpoints - Add GET /api/litellm-api/embedding-pool/discover/:model preview - Convert ccw-litellm status check to async with 5-min cache - Maintain backward compatibility with legacy rotation config Frontend: - Add "Embedding Pool" tab in API Settings - Auto-discover providers when target model selected - Show provider/key count with include/exclude controls - Increase sidebar width (280px → 320px) - Add sync result feedback on save Other: - Remove worker count limits (was max=32) - Add i18n translations (EN/CN) - Update .gitignore for .mcp.json 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -103,12 +103,12 @@ def init(
|
||||
"-l",
|
||||
help="Limit indexing to specific languages (repeat or comma-separated).",
|
||||
),
|
||||
workers: Optional[int] = typer.Option(None, "--workers", "-w", min=1, max=32, help="Parallel worker processes (default: auto-detect based on CPU count, max 32)."),
|
||||
workers: Optional[int] = typer.Option(None, "--workers", "-w", min=1, help="Parallel worker processes (default: auto-detect based on CPU count)."),
|
||||
force: bool = typer.Option(False, "--force", "-f", help="Force full reindex (skip incremental mode)."),
|
||||
no_embeddings: bool = typer.Option(False, "--no-embeddings", help="Skip automatic embedding generation (if semantic deps installed)."),
|
||||
embedding_backend: str = typer.Option("fastembed", "--embedding-backend", help="Embedding backend: fastembed (local) or litellm (remote API)."),
|
||||
embedding_model: str = typer.Option("code", "--embedding-model", help="Embedding model: profile name for fastembed (fast/code/multilingual/balanced) or model name for litellm (e.g. text-embedding-3-small)."),
|
||||
max_workers: int = typer.Option(1, "--max-workers", min=1, max=32, help="Max concurrent API calls for embedding generation. Recommended: 4-8 for litellm backend."),
|
||||
max_workers: int = typer.Option(1, "--max-workers", min=1, help="Max concurrent API calls for embedding generation. Recommended: 4-8 for litellm backend."),
|
||||
json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
|
||||
verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."),
|
||||
) -> None:
|
||||
@@ -478,6 +478,7 @@ def search(
|
||||
"path": r.path,
|
||||
"score": r.score,
|
||||
"excerpt": r.excerpt,
|
||||
"content": r.content, # Full function/class body
|
||||
"source": getattr(r, "search_source", None),
|
||||
"symbol": getattr(r, "symbol", None),
|
||||
}
|
||||
@@ -1852,7 +1853,6 @@ def embeddings_generate(
|
||||
"--max-workers",
|
||||
"-w",
|
||||
min=1,
|
||||
max=32,
|
||||
help="Max concurrent API calls. Recommended: 4-8 for litellm backend. Default: 1 (sequential).",
|
||||
),
|
||||
json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
|
||||
|
||||
@@ -331,7 +331,7 @@ def generate_embeddings(
|
||||
if max_workers is None:
|
||||
if embedding_backend == "litellm":
|
||||
if endpoint_count > 1:
|
||||
max_workers = min(endpoint_count * 2, 32) # Cap at 32 workers
|
||||
max_workers = endpoint_count * 2 # No cap, scale with endpoints
|
||||
else:
|
||||
max_workers = 4
|
||||
else:
|
||||
@@ -806,7 +806,7 @@ def generate_embeddings_recursive(
|
||||
if max_workers is None:
|
||||
if embedding_backend == "litellm":
|
||||
if endpoint_count > 1:
|
||||
max_workers = min(endpoint_count * 2, 32)
|
||||
max_workers = endpoint_count * 2 # No cap, scale with endpoints
|
||||
else:
|
||||
max_workers = 4
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user