mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-10 02:24:35 +08:00
feat: Add unified LiteLLM API management with dashboard UI and CLI integration
- Create ccw-litellm Python package with AbstractEmbedder and AbstractLLMClient interfaces - Add BaseEmbedder abstraction and factory pattern to codex-lens for pluggable backends - Implement API Settings dashboard page for provider credentials and custom endpoints - Add REST API routes for CRUD operations on providers and endpoints - Extend CLI with --model parameter for custom endpoint routing - Integrate existing context-cache for @pattern file resolution - Add provider model registry with predefined models per provider type - Include i18n translations (en/zh) for all new UI elements 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -106,7 +106,8 @@ def init(
|
||||
workers: Optional[int] = typer.Option(None, "--workers", "-w", min=1, max=16, help="Parallel worker processes (default: auto-detect based on CPU count, max 16)."),
|
||||
force: bool = typer.Option(False, "--force", "-f", help="Force full reindex (skip incremental mode)."),
|
||||
no_embeddings: bool = typer.Option(False, "--no-embeddings", help="Skip automatic embedding generation (if semantic deps installed)."),
|
||||
embedding_model: str = typer.Option("code", "--embedding-model", help="Embedding model profile: fast, code, multilingual, balanced."),
|
||||
embedding_backend: str = typer.Option("fastembed", "--embedding-backend", help="Embedding backend: fastembed (local) or litellm (remote API)."),
|
||||
embedding_model: str = typer.Option("code", "--embedding-model", help="Embedding model: profile name for fastembed (fast/code/multilingual/balanced) or model name for litellm (e.g. text-embedding-3-small)."),
|
||||
json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
|
||||
verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."),
|
||||
) -> None:
|
||||
@@ -120,6 +121,14 @@ def init(
|
||||
|
||||
If semantic search dependencies are installed, automatically generates embeddings
|
||||
after indexing completes. Use --no-embeddings to skip this step.
|
||||
|
||||
Embedding Backend Options:
|
||||
- fastembed: Local ONNX-based embeddings (default, no API calls)
|
||||
- litellm: Remote API embeddings via ccw-litellm (requires API keys)
|
||||
|
||||
Embedding Model Options:
|
||||
- For fastembed backend: Use profile names (fast, code, multilingual, balanced)
|
||||
- For litellm backend: Use model names (e.g., text-embedding-3-small, text-embedding-ada-002)
|
||||
"""
|
||||
_configure_logging(verbose, json_mode)
|
||||
config = Config()
|
||||
@@ -171,11 +180,22 @@ def init(
|
||||
from codexlens.cli.embedding_manager import generate_embeddings_recursive, get_embeddings_status
|
||||
|
||||
if SEMANTIC_AVAILABLE:
|
||||
# Validate embedding backend
|
||||
valid_backends = ["fastembed", "litellm"]
|
||||
if embedding_backend not in valid_backends:
|
||||
error_msg = f"Invalid embedding backend: {embedding_backend}. Must be one of: {', '.join(valid_backends)}"
|
||||
if json_mode:
|
||||
print_json(success=False, error=error_msg)
|
||||
else:
|
||||
console.print(f"[red]Error:[/red] {error_msg}")
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
# Use the index root directory (not the _index.db file)
|
||||
index_root = Path(build_result.index_root)
|
||||
|
||||
if not json_mode:
|
||||
console.print("\n[bold]Generating embeddings...[/bold]")
|
||||
console.print(f"Backend: [cyan]{embedding_backend}[/cyan]")
|
||||
console.print(f"Model: [cyan]{embedding_model}[/cyan]")
|
||||
else:
|
||||
# Output progress message for JSON mode (parsed by Node.js)
|
||||
@@ -196,6 +216,7 @@ def init(
|
||||
|
||||
embed_result = generate_embeddings_recursive(
|
||||
index_root,
|
||||
embedding_backend=embedding_backend,
|
||||
model_profile=embedding_model,
|
||||
force=False, # Don't force regenerate during init
|
||||
chunk_size=2000,
|
||||
@@ -1781,11 +1802,17 @@ def embeddings_generate(
|
||||
exists=True,
|
||||
help="Path to _index.db file or project directory.",
|
||||
),
|
||||
backend: str = typer.Option(
|
||||
"fastembed",
|
||||
"--backend",
|
||||
"-b",
|
||||
help="Embedding backend: fastembed (local) or litellm (remote API).",
|
||||
),
|
||||
model: str = typer.Option(
|
||||
"code",
|
||||
"--model",
|
||||
"-m",
|
||||
help="Model profile: fast, code, multilingual, balanced.",
|
||||
help="Model: profile name for fastembed (fast/code/multilingual/balanced) or model name for litellm (e.g. text-embedding-3-small).",
|
||||
),
|
||||
force: bool = typer.Option(
|
||||
False,
|
||||
@@ -1813,21 +1840,43 @@ def embeddings_generate(
|
||||
semantic search capabilities. Embeddings are stored in the same
|
||||
database as the FTS index.
|
||||
|
||||
Model Profiles:
|
||||
- fast: BAAI/bge-small-en-v1.5 (384 dims, ~80MB)
|
||||
- code: jinaai/jina-embeddings-v2-base-code (768 dims, ~150MB) [recommended]
|
||||
- multilingual: intfloat/multilingual-e5-large (1024 dims, ~1GB)
|
||||
- balanced: mixedbread-ai/mxbai-embed-large-v1 (1024 dims, ~600MB)
|
||||
Embedding Backend Options:
|
||||
- fastembed: Local ONNX-based embeddings (default, no API calls)
|
||||
- litellm: Remote API embeddings via ccw-litellm (requires API keys)
|
||||
|
||||
Model Options:
|
||||
For fastembed backend (profiles):
|
||||
- fast: BAAI/bge-small-en-v1.5 (384 dims, ~80MB)
|
||||
- code: jinaai/jina-embeddings-v2-base-code (768 dims, ~150MB) [recommended]
|
||||
- multilingual: intfloat/multilingual-e5-large (1024 dims, ~1GB)
|
||||
- balanced: mixedbread-ai/mxbai-embed-large-v1 (1024 dims, ~600MB)
|
||||
|
||||
For litellm backend (model names):
|
||||
- text-embedding-3-small, text-embedding-3-large (OpenAI)
|
||||
- text-embedding-ada-002 (OpenAI legacy)
|
||||
- Any model supported by ccw-litellm
|
||||
|
||||
Examples:
|
||||
codexlens embeddings-generate ~/projects/my-app # Auto-find index for project
|
||||
codexlens embeddings-generate ~/projects/my-app # Auto-find index (fastembed, code profile)
|
||||
codexlens embeddings-generate ~/.codexlens/indexes/project/_index.db # Specific index
|
||||
codexlens embeddings-generate ~/projects/my-app --model fast --force # Regenerate with fast model
|
||||
codexlens embeddings-generate ~/projects/my-app --backend litellm --model text-embedding-3-small # Use LiteLLM
|
||||
codexlens embeddings-generate ~/projects/my-app --model fast --force # Regenerate with fast profile
|
||||
"""
|
||||
_configure_logging(verbose, json_mode)
|
||||
|
||||
from codexlens.cli.embedding_manager import generate_embeddings, generate_embeddings_recursive
|
||||
|
||||
# Validate backend
|
||||
valid_backends = ["fastembed", "litellm"]
|
||||
if backend not in valid_backends:
|
||||
error_msg = f"Invalid backend: {backend}. Must be one of: {', '.join(valid_backends)}"
|
||||
if json_mode:
|
||||
print_json(success=False, error=error_msg)
|
||||
else:
|
||||
console.print(f"[red]Error:[/red] {error_msg}")
|
||||
console.print(f"[dim]Valid backends: {', '.join(valid_backends)}[/dim]")
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
# Resolve path
|
||||
target_path = path.expanduser().resolve()
|
||||
|
||||
@@ -1877,11 +1926,13 @@ def embeddings_generate(
|
||||
console.print(f"Mode: [yellow]Recursive[/yellow]")
|
||||
else:
|
||||
console.print(f"Index: [dim]{index_path}[/dim]")
|
||||
console.print(f"Backend: [cyan]{backend}[/cyan]")
|
||||
console.print(f"Model: [cyan]{model}[/cyan]\n")
|
||||
|
||||
if use_recursive:
|
||||
result = generate_embeddings_recursive(
|
||||
index_root,
|
||||
embedding_backend=backend,
|
||||
model_profile=model,
|
||||
force=force,
|
||||
chunk_size=chunk_size,
|
||||
@@ -1890,6 +1941,7 @@ def embeddings_generate(
|
||||
else:
|
||||
result = generate_embeddings(
|
||||
index_path,
|
||||
embedding_backend=backend,
|
||||
model_profile=model,
|
||||
force=force,
|
||||
chunk_size=chunk_size,
|
||||
|
||||
@@ -191,6 +191,7 @@ def check_index_embeddings(index_path: Path) -> Dict[str, any]:
|
||||
|
||||
def generate_embeddings(
|
||||
index_path: Path,
|
||||
embedding_backend: str = "fastembed",
|
||||
model_profile: str = "code",
|
||||
force: bool = False,
|
||||
chunk_size: int = 2000,
|
||||
@@ -203,7 +204,9 @@ def generate_embeddings(
|
||||
|
||||
Args:
|
||||
index_path: Path to _index.db file
|
||||
model_profile: Model profile (fast, code, multilingual, balanced)
|
||||
embedding_backend: Embedding backend to use (fastembed or litellm)
|
||||
model_profile: Model profile for fastembed (fast, code, multilingual, balanced)
|
||||
or model name for litellm (e.g., text-embedding-3-small)
|
||||
force: If True, regenerate even if embeddings exist
|
||||
chunk_size: Maximum chunk size in characters
|
||||
progress_callback: Optional callback for progress updates
|
||||
@@ -253,8 +256,22 @@ def generate_embeddings(
|
||||
|
||||
# Initialize components
|
||||
try:
|
||||
# Initialize embedder (singleton, reused throughout the function)
|
||||
embedder = get_embedder(profile=model_profile)
|
||||
# Import factory function to support both backends
|
||||
from codexlens.semantic.factory import get_embedder as get_embedder_factory
|
||||
|
||||
# Initialize embedder using factory (supports both fastembed and litellm)
|
||||
# For fastembed: model_profile is a profile name (fast/code/multilingual/balanced)
|
||||
# For litellm: model_profile is a model name (e.g., text-embedding-3-small)
|
||||
if embedding_backend == "fastembed":
|
||||
embedder = get_embedder_factory(backend="fastembed", profile=model_profile, use_gpu=True)
|
||||
elif embedding_backend == "litellm":
|
||||
embedder = get_embedder_factory(backend="litellm", model=model_profile)
|
||||
else:
|
||||
return {
|
||||
"success": False,
|
||||
"error": f"Invalid embedding backend: {embedding_backend}. Must be 'fastembed' or 'litellm'.",
|
||||
}
|
||||
|
||||
# skip_token_count=True: Use fast estimation (len/4) instead of expensive tiktoken
|
||||
# This significantly reduces CPU usage with minimal impact on metadata accuracy
|
||||
chunker = Chunker(config=ChunkConfig(max_chunk_size=chunk_size, skip_token_count=True))
|
||||
@@ -428,6 +445,7 @@ def find_all_indexes(scan_dir: Path) -> List[Path]:
|
||||
|
||||
def generate_embeddings_recursive(
|
||||
index_root: Path,
|
||||
embedding_backend: str = "fastembed",
|
||||
model_profile: str = "code",
|
||||
force: bool = False,
|
||||
chunk_size: int = 2000,
|
||||
@@ -437,7 +455,9 @@ def generate_embeddings_recursive(
|
||||
|
||||
Args:
|
||||
index_root: Root index directory containing _index.db files
|
||||
model_profile: Model profile (fast, code, multilingual, balanced)
|
||||
embedding_backend: Embedding backend to use (fastembed or litellm)
|
||||
model_profile: Model profile for fastembed (fast, code, multilingual, balanced)
|
||||
or model name for litellm (e.g., text-embedding-3-small)
|
||||
force: If True, regenerate even if embeddings exist
|
||||
chunk_size: Maximum chunk size in characters
|
||||
progress_callback: Optional callback for progress updates
|
||||
@@ -474,6 +494,7 @@ def generate_embeddings_recursive(
|
||||
|
||||
result = generate_embeddings(
|
||||
index_path,
|
||||
embedding_backend=embedding_backend,
|
||||
model_profile=model_profile,
|
||||
force=force,
|
||||
chunk_size=chunk_size,
|
||||
|
||||
Reference in New Issue
Block a user