feat: 添加多端点支持和负载均衡功能，增强 LiteLLM 嵌入管理

2026-02-10 02:24:35 +08:00 · 2025-12-25 11:01:08 +08:00
parent 3c3ce55842
commit 40e61b30d6
7 changed files with 727 additions and 29 deletions
--- a/codex-lens/src/codexlens/config.py
+++ b/codex-lens/src/codexlens/config.py
@@ -100,6 +100,12 @@ class Config:
                                   # For litellm: model name from config (e.g., "qwen3-embedding")
    embedding_use_gpu: bool = True  # For fastembed: whether to use GPU acceleration

+    # Multi-endpoint configuration for litellm backend
+    embedding_endpoints: List[Dict[str, Any]] = field(default_factory=list)
+    # List of endpoint configs: [{"model": "...", "api_key": "...", "api_base": "...", "weight": 1.0}]
+    embedding_strategy: str = "latency_aware"  # round_robin, latency_aware, weighted_random
+    embedding_cooldown: float = 60.0  # Default cooldown seconds for rate-limited endpoints
+
    def __post_init__(self) -> None:
        try:
            self.data_dir = self.data_dir.expanduser().resolve()
@@ -151,12 +157,19 @@ class Config:

    def save_settings(self) -> None:
        """Save embedding and other settings to file."""
+        embedding_config = {
+            "backend": self.embedding_backend,
+            "model": self.embedding_model,
+            "use_gpu": self.embedding_use_gpu,
+        }
+        # Include multi-endpoint config if present
+        if self.embedding_endpoints:
+            embedding_config["endpoints"] = self.embedding_endpoints
+            embedding_config["strategy"] = self.embedding_strategy
+            embedding_config["cooldown"] = self.embedding_cooldown
+
        settings = {
-            "embedding": {
-                "backend": self.embedding_backend,
-                "model": self.embedding_model,
-                "use_gpu": self.embedding_use_gpu,
-            },
+            "embedding": embedding_config,
            "llm": {
                "enabled": self.llm_enabled,
                "tool": self.llm_tool,
@@ -185,6 +198,14 @@ class Config:
            if "use_gpu" in embedding:
                self.embedding_use_gpu = embedding["use_gpu"]

+            # Load multi-endpoint configuration
+            if "endpoints" in embedding:
+                self.embedding_endpoints = embedding["endpoints"]
+            if "strategy" in embedding:
+                self.embedding_strategy = embedding["strategy"]
+            if "cooldown" in embedding:
+                self.embedding_cooldown = embedding["cooldown"]
+
            # Load LLM settings
            llm = settings.get("llm", {})
            if "enabled" in llm: