feat: Add unified LiteLLM API management with dashboard UI and CLI integration

- Create ccw-litellm Python package with AbstractEmbedder and AbstractLLMClient interfaces
- Add BaseEmbedder abstraction and factory pattern to codex-lens for pluggable backends
- Implement API Settings dashboard page for provider credentials and custom endpoints
- Add REST API routes for CRUD operations on providers and endpoints
- Extend CLI with --model parameter for custom endpoint routing
- Integrate existing context-cache for @pattern file resolution
- Add provider model registry with predefined models per provider type
- Include i18n translations (en/zh) for all new UI elements

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
catlog22
2025-12-23 20:36:32 +08:00
parent 5228581324
commit bf66b095c7
44 changed files with 4948 additions and 19 deletions

View File

@@ -106,7 +106,8 @@ def init(
workers: Optional[int] = typer.Option(None, "--workers", "-w", min=1, max=16, help="Parallel worker processes (default: auto-detect based on CPU count, max 16)."),
force: bool = typer.Option(False, "--force", "-f", help="Force full reindex (skip incremental mode)."),
no_embeddings: bool = typer.Option(False, "--no-embeddings", help="Skip automatic embedding generation (if semantic deps installed)."),
embedding_model: str = typer.Option("code", "--embedding-model", help="Embedding model profile: fast, code, multilingual, balanced."),
embedding_backend: str = typer.Option("fastembed", "--embedding-backend", help="Embedding backend: fastembed (local) or litellm (remote API)."),
embedding_model: str = typer.Option("code", "--embedding-model", help="Embedding model: profile name for fastembed (fast/code/multilingual/balanced) or model name for litellm (e.g. text-embedding-3-small)."),
json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."),
) -> None:
@@ -120,6 +121,14 @@ def init(
If semantic search dependencies are installed, automatically generates embeddings
after indexing completes. Use --no-embeddings to skip this step.
Embedding Backend Options:
- fastembed: Local ONNX-based embeddings (default, no API calls)
- litellm: Remote API embeddings via ccw-litellm (requires API keys)
Embedding Model Options:
- For fastembed backend: Use profile names (fast, code, multilingual, balanced)
- For litellm backend: Use model names (e.g., text-embedding-3-small, text-embedding-ada-002)
"""
_configure_logging(verbose, json_mode)
config = Config()
@@ -171,11 +180,22 @@ def init(
from codexlens.cli.embedding_manager import generate_embeddings_recursive, get_embeddings_status
if SEMANTIC_AVAILABLE:
# Validate embedding backend
valid_backends = ["fastembed", "litellm"]
if embedding_backend not in valid_backends:
error_msg = f"Invalid embedding backend: {embedding_backend}. Must be one of: {', '.join(valid_backends)}"
if json_mode:
print_json(success=False, error=error_msg)
else:
console.print(f"[red]Error:[/red] {error_msg}")
raise typer.Exit(code=1)
# Use the index root directory (not the _index.db file)
index_root = Path(build_result.index_root)
if not json_mode:
console.print("\n[bold]Generating embeddings...[/bold]")
console.print(f"Backend: [cyan]{embedding_backend}[/cyan]")
console.print(f"Model: [cyan]{embedding_model}[/cyan]")
else:
# Output progress message for JSON mode (parsed by Node.js)
@@ -196,6 +216,7 @@ def init(
embed_result = generate_embeddings_recursive(
index_root,
embedding_backend=embedding_backend,
model_profile=embedding_model,
force=False, # Don't force regenerate during init
chunk_size=2000,
@@ -1781,11 +1802,17 @@ def embeddings_generate(
exists=True,
help="Path to _index.db file or project directory.",
),
backend: str = typer.Option(
"fastembed",
"--backend",
"-b",
help="Embedding backend: fastembed (local) or litellm (remote API).",
),
model: str = typer.Option(
"code",
"--model",
"-m",
help="Model profile: fast, code, multilingual, balanced.",
help="Model: profile name for fastembed (fast/code/multilingual/balanced) or model name for litellm (e.g. text-embedding-3-small).",
),
force: bool = typer.Option(
False,
@@ -1813,21 +1840,43 @@ def embeddings_generate(
semantic search capabilities. Embeddings are stored in the same
database as the FTS index.
Model Profiles:
- fast: BAAI/bge-small-en-v1.5 (384 dims, ~80MB)
- code: jinaai/jina-embeddings-v2-base-code (768 dims, ~150MB) [recommended]
- multilingual: intfloat/multilingual-e5-large (1024 dims, ~1GB)
- balanced: mixedbread-ai/mxbai-embed-large-v1 (1024 dims, ~600MB)
Embedding Backend Options:
- fastembed: Local ONNX-based embeddings (default, no API calls)
- litellm: Remote API embeddings via ccw-litellm (requires API keys)
Model Options:
For fastembed backend (profiles):
- fast: BAAI/bge-small-en-v1.5 (384 dims, ~80MB)
- code: jinaai/jina-embeddings-v2-base-code (768 dims, ~150MB) [recommended]
- multilingual: intfloat/multilingual-e5-large (1024 dims, ~1GB)
- balanced: mixedbread-ai/mxbai-embed-large-v1 (1024 dims, ~600MB)
For litellm backend (model names):
- text-embedding-3-small, text-embedding-3-large (OpenAI)
- text-embedding-ada-002 (OpenAI legacy)
- Any model supported by ccw-litellm
Examples:
codexlens embeddings-generate ~/projects/my-app # Auto-find index for project
codexlens embeddings-generate ~/projects/my-app # Auto-find index (fastembed, code profile)
codexlens embeddings-generate ~/.codexlens/indexes/project/_index.db # Specific index
codexlens embeddings-generate ~/projects/my-app --model fast --force # Regenerate with fast model
codexlens embeddings-generate ~/projects/my-app --backend litellm --model text-embedding-3-small # Use LiteLLM
codexlens embeddings-generate ~/projects/my-app --model fast --force # Regenerate with fast profile
"""
_configure_logging(verbose, json_mode)
from codexlens.cli.embedding_manager import generate_embeddings, generate_embeddings_recursive
# Validate backend
valid_backends = ["fastembed", "litellm"]
if backend not in valid_backends:
error_msg = f"Invalid backend: {backend}. Must be one of: {', '.join(valid_backends)}"
if json_mode:
print_json(success=False, error=error_msg)
else:
console.print(f"[red]Error:[/red] {error_msg}")
console.print(f"[dim]Valid backends: {', '.join(valid_backends)}[/dim]")
raise typer.Exit(code=1)
# Resolve path
target_path = path.expanduser().resolve()
@@ -1877,11 +1926,13 @@ def embeddings_generate(
console.print(f"Mode: [yellow]Recursive[/yellow]")
else:
console.print(f"Index: [dim]{index_path}[/dim]")
console.print(f"Backend: [cyan]{backend}[/cyan]")
console.print(f"Model: [cyan]{model}[/cyan]\n")
if use_recursive:
result = generate_embeddings_recursive(
index_root,
embedding_backend=backend,
model_profile=model,
force=force,
chunk_size=chunk_size,
@@ -1890,6 +1941,7 @@ def embeddings_generate(
else:
result = generate_embeddings(
index_path,
embedding_backend=backend,
model_profile=model,
force=force,
chunk_size=chunk_size,

View File

@@ -191,6 +191,7 @@ def check_index_embeddings(index_path: Path) -> Dict[str, any]:
def generate_embeddings(
index_path: Path,
embedding_backend: str = "fastembed",
model_profile: str = "code",
force: bool = False,
chunk_size: int = 2000,
@@ -203,7 +204,9 @@ def generate_embeddings(
Args:
index_path: Path to _index.db file
model_profile: Model profile (fast, code, multilingual, balanced)
embedding_backend: Embedding backend to use (fastembed or litellm)
model_profile: Model profile for fastembed (fast, code, multilingual, balanced)
or model name for litellm (e.g., text-embedding-3-small)
force: If True, regenerate even if embeddings exist
chunk_size: Maximum chunk size in characters
progress_callback: Optional callback for progress updates
@@ -253,8 +256,22 @@ def generate_embeddings(
# Initialize components
try:
# Initialize embedder (singleton, reused throughout the function)
embedder = get_embedder(profile=model_profile)
# Import factory function to support both backends
from codexlens.semantic.factory import get_embedder as get_embedder_factory
# Initialize embedder using factory (supports both fastembed and litellm)
# For fastembed: model_profile is a profile name (fast/code/multilingual/balanced)
# For litellm: model_profile is a model name (e.g., text-embedding-3-small)
if embedding_backend == "fastembed":
embedder = get_embedder_factory(backend="fastembed", profile=model_profile, use_gpu=True)
elif embedding_backend == "litellm":
embedder = get_embedder_factory(backend="litellm", model=model_profile)
else:
return {
"success": False,
"error": f"Invalid embedding backend: {embedding_backend}. Must be 'fastembed' or 'litellm'.",
}
# skip_token_count=True: Use fast estimation (len/4) instead of expensive tiktoken
# This significantly reduces CPU usage with minimal impact on metadata accuracy
chunker = Chunker(config=ChunkConfig(max_chunk_size=chunk_size, skip_token_count=True))
@@ -428,6 +445,7 @@ def find_all_indexes(scan_dir: Path) -> List[Path]:
def generate_embeddings_recursive(
index_root: Path,
embedding_backend: str = "fastembed",
model_profile: str = "code",
force: bool = False,
chunk_size: int = 2000,
@@ -437,7 +455,9 @@ def generate_embeddings_recursive(
Args:
index_root: Root index directory containing _index.db files
model_profile: Model profile (fast, code, multilingual, balanced)
embedding_backend: Embedding backend to use (fastembed or litellm)
model_profile: Model profile for fastembed (fast, code, multilingual, balanced)
or model name for litellm (e.g., text-embedding-3-small)
force: If True, regenerate even if embeddings exist
chunk_size: Maximum chunk size in characters
progress_callback: Optional callback for progress updates
@@ -474,6 +494,7 @@ def generate_embeddings_recursive(
result = generate_embeddings(
index_path,
embedding_backend=embedding_backend,
model_profile=model_profile,
force=force,
chunk_size=chunk_size,

View File

@@ -67,10 +67,29 @@ def check_gpu_available() -> tuple[bool, str]:
return False, "GPU support module not available"
# Export embedder components
# BaseEmbedder is always available (abstract base class)
from .base import BaseEmbedder
# Factory function for creating embedders
from .factory import get_embedder as get_embedder_factory
# Optional: LiteLLMEmbedderWrapper (only if ccw-litellm is installed)
try:
from .litellm_embedder import LiteLLMEmbedderWrapper
_LITELLM_AVAILABLE = True
except ImportError:
LiteLLMEmbedderWrapper = None
_LITELLM_AVAILABLE = False
__all__ = [
"SEMANTIC_AVAILABLE",
"SEMANTIC_BACKEND",
"GPU_AVAILABLE",
"check_semantic_available",
"check_gpu_available",
"BaseEmbedder",
"get_embedder_factory",
"LiteLLMEmbedderWrapper",
]

View File

@@ -0,0 +1,51 @@
"""Base class for embedders.
Defines the interface that all embedders must implement.
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Iterable
import numpy as np
class BaseEmbedder(ABC):
"""Base class for all embedders.
All embedder implementations must inherit from this class and implement
the abstract methods to ensure a consistent interface.
"""
@property
@abstractmethod
def embedding_dim(self) -> int:
"""Return embedding dimensions.
Returns:
int: Dimension of the embedding vectors.
"""
...
@property
@abstractmethod
def model_name(self) -> str:
"""Return model name.
Returns:
str: Name or identifier of the underlying model.
"""
...
@abstractmethod
def embed_to_numpy(self, texts: str | Iterable[str]) -> np.ndarray:
"""Embed texts to numpy array.
Args:
texts: Single text or iterable of texts to embed.
Returns:
numpy.ndarray: Array of shape (n_texts, embedding_dim) containing embeddings.
"""
...

View File

@@ -14,6 +14,7 @@ from typing import Dict, Iterable, List, Optional
import numpy as np
from . import SEMANTIC_AVAILABLE
from .base import BaseEmbedder
from .gpu_support import get_optimal_providers, is_gpu_available, get_gpu_summary, get_selected_device_id
logger = logging.getLogger(__name__)
@@ -84,7 +85,7 @@ def clear_embedder_cache() -> None:
gc.collect()
class Embedder:
class Embedder(BaseEmbedder):
"""Generate embeddings for code chunks using fastembed (ONNX-based).
Supported Model Profiles:
@@ -138,11 +139,11 @@ class Embedder:
# Resolve model name from profile or use explicit name
if model_name:
self.model_name = model_name
self._model_name = model_name
elif profile and profile in self.MODELS:
self.model_name = self.MODELS[profile]
self._model_name = self.MODELS[profile]
else:
self.model_name = self.DEFAULT_MODEL
self._model_name = self.DEFAULT_MODEL
# Configure ONNX execution providers with device_id options for GPU selection
# Using with_device_options=True ensures DirectML/CUDA device_id is passed correctly
@@ -154,10 +155,15 @@ class Embedder:
self._use_gpu = use_gpu
self._model = None
@property
def model_name(self) -> str:
"""Get model name."""
return self._model_name
@property
def embedding_dim(self) -> int:
"""Get embedding dimension for current model."""
return self.MODEL_DIMS.get(self.model_name, 768) # Default to 768 if unknown
return self.MODEL_DIMS.get(self._model_name, 768) # Default to 768 if unknown
@property
def providers(self) -> List[str]:

View File

@@ -0,0 +1,61 @@
"""Factory for creating embedders.
Provides a unified interface for instantiating different embedder backends.
"""
from __future__ import annotations
from typing import Any
from .base import BaseEmbedder
def get_embedder(
backend: str = "fastembed",
profile: str = "code",
model: str = "default",
use_gpu: bool = True,
**kwargs: Any,
) -> BaseEmbedder:
"""Factory function to create embedder based on backend.
Args:
backend: Embedder backend to use. Options:
- "fastembed": Use fastembed (ONNX-based) embedder (default)
- "litellm": Use ccw-litellm embedder
profile: Model profile for fastembed backend ("fast", "code", "multilingual", "balanced")
Used only when backend="fastembed". Default: "code"
model: Model identifier for litellm backend.
Used only when backend="litellm". Default: "default"
use_gpu: Whether to use GPU acceleration when available (default: True).
Used only when backend="fastembed".
**kwargs: Additional backend-specific arguments
Returns:
BaseEmbedder: Configured embedder instance
Raises:
ValueError: If backend is not recognized
ImportError: If required backend dependencies are not installed
Examples:
Create fastembed embedder with code profile:
>>> embedder = get_embedder(backend="fastembed", profile="code")
Create fastembed embedder with fast profile and CPU only:
>>> embedder = get_embedder(backend="fastembed", profile="fast", use_gpu=False)
Create litellm embedder:
>>> embedder = get_embedder(backend="litellm", model="text-embedding-3-small")
"""
if backend == "fastembed":
from .embedder import Embedder
return Embedder(profile=profile, use_gpu=use_gpu, **kwargs)
elif backend == "litellm":
from .litellm_embedder import LiteLLMEmbedderWrapper
return LiteLLMEmbedderWrapper(model=model, **kwargs)
else:
raise ValueError(
f"Unknown backend: {backend}. "
f"Supported backends: 'fastembed', 'litellm'"
)

View File

@@ -0,0 +1,79 @@
"""LiteLLM embedder wrapper for CodexLens.
Provides integration with ccw-litellm's LiteLLMEmbedder for embedding generation.
"""
from __future__ import annotations
from typing import Iterable
import numpy as np
from .base import BaseEmbedder
class LiteLLMEmbedderWrapper(BaseEmbedder):
"""Wrapper for ccw-litellm LiteLLMEmbedder.
This wrapper adapts the ccw-litellm LiteLLMEmbedder to the CodexLens
BaseEmbedder interface, enabling seamless integration with CodexLens
semantic search functionality.
Args:
model: Model identifier for LiteLLM (default: "default")
**kwargs: Additional arguments passed to LiteLLMEmbedder
Raises:
ImportError: If ccw-litellm package is not installed
"""
def __init__(self, model: str = "default", **kwargs) -> None:
"""Initialize LiteLLM embedder wrapper.
Args:
model: Model identifier for LiteLLM (default: "default")
**kwargs: Additional arguments passed to LiteLLMEmbedder
Raises:
ImportError: If ccw-litellm package is not installed
"""
try:
from ccw_litellm import LiteLLMEmbedder
self._embedder = LiteLLMEmbedder(model=model, **kwargs)
except ImportError as e:
raise ImportError(
"ccw-litellm not installed. Install with: pip install ccw-litellm"
) from e
@property
def embedding_dim(self) -> int:
"""Return embedding dimensions from LiteLLMEmbedder.
Returns:
int: Dimension of the embedding vectors.
"""
return self._embedder.dimensions
@property
def model_name(self) -> str:
"""Return model name from LiteLLMEmbedder.
Returns:
str: Name or identifier of the underlying model.
"""
return self._embedder.model_name
def embed_to_numpy(self, texts: str | Iterable[str]) -> np.ndarray:
"""Embed texts to numpy array using LiteLLMEmbedder.
Args:
texts: Single text or iterable of texts to embed.
Returns:
numpy.ndarray: Array of shape (n_texts, embedding_dim) containing embeddings.
"""
if isinstance(texts, str):
texts = [texts]
else:
texts = list(texts)
return self._embedder.embed(texts)