fix: 修复 ModelScope API 路由 bug 导致的 Ollama 连接错误

- 添加 _sanitize_text() 方法处理以 'import' 开头的文本 - ModelScope 后端错误地将此类文本路由到本地 Ollama 端点 - 通过在文本前添加空格绕过路由检测，不影响嵌入质量 - 增强 embedding_manager.py 的重试逻辑和错误处理 - 在 commands.py 中成功生成后调用全局模型锁定 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-05 01:50:27 +08:00 · 2025-12-25 12:52:43 +08:00
parent 229d51cd18
commit 501d9a05d4
3 changed files with 72 additions and 18 deletions
--- a/codex-lens/src/codexlens/cli/commands.py
+++ b/codex-lens/src/codexlens/cli/commands.py
@@ -2073,6 +2073,10 @@ def embeddings_generate(

        data = result["result"]

+        # Set global model lock after successful generation
+        # This prevents using different models for future indexes
+        set_locked_model_config(backend, model)
+
        if use_recursive:
            # Recursive mode output
            console.print(f"[green]✓[/green] Recursive embeddings generation complete!")
--- a/codex-lens/src/codexlens/cli/embedding_manager.py
+++ b/codex-lens/src/codexlens/cli/embedding_manager.py
@@ -512,8 +512,8 @@ def generate_embeddings(
                        for _, file_path in chunk_batch:
                            batch_files.add(file_path)

-                        max_retries = 3
-                        base_delay = 1.0
+                        max_retries = 5
+                        base_delay = 2.0

                        for attempt in range(max_retries + 1):
                            try:
@@ -523,10 +523,13 @@ def generate_embeddings(

                            except Exception as e:
                                error_str = str(e).lower()
-                                # Check for retryable errors (rate limit, connection issues)
+                                # Check for retryable errors (rate limit, connection, backend issues)
+                                # Note: Some backends (e.g., ModelScope) return 400 with nested 500 errors
                                is_retryable = any(x in error_str for x in [
                                    "429", "rate limit", "connection", "timeout",
-                                    "502", "503", "504", "service unavailable"
+                                    "502", "503", "504", "service unavailable",
+                                    "500", "400", "badrequesterror", "internal server error",
+                                    "11434"  # Ollama port - indicates backend routing issue
                                ])

                                if attempt < max_retries and is_retryable:
@@ -554,24 +557,50 @@ def generate_embeddings(
                            for _, file_path in chunk_batch:
                                batch_files.add(file_path)

-                            try:
-                                # Generate embeddings
-                                batch_contents = [chunk.content for chunk, _ in chunk_batch]
-                                embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE)
+                            # Retry logic for transient backend errors
+                            max_retries = 5
+                            base_delay = 2.0
+                            success = False

-                                # Store embeddings
-                                vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
+                            for attempt in range(max_retries + 1):
+                                try:
+                                    # Generate embeddings
+                                    batch_contents = [chunk.content for chunk, _ in chunk_batch]
+                                    embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE)

-                                files_seen.update(batch_files)
-                                total_chunks_created += len(chunk_batch)
-                                total_files_processed = len(files_seen)
+                                    # Store embeddings
+                                    vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)

-                                if progress_callback and batch_number % 10 == 0:
-                                    progress_callback(f"  Batch {batch_number}: {total_chunks_created} chunks, {total_files_processed} files")
+                                    files_seen.update(batch_files)
+                                    total_chunks_created += len(chunk_batch)
+                                    total_files_processed = len(files_seen)
+                                    success = True
+                                    break

-                            except Exception as e:
-                                logger.error(f"Failed to process batch {batch_number}: {str(e)}")
-                                files_seen.update(batch_files)
+                                except Exception as e:
+                                    error_str = str(e).lower()
+                                    # Check for retryable errors (rate limit, connection, backend issues)
+                                    is_retryable = any(x in error_str for x in [
+                                        "429", "rate limit", "connection", "timeout",
+                                        "502", "503", "504", "service unavailable",
+                                        "500", "400", "badrequesterror", "internal server error",
+                                        "11434"  # Ollama port - indicates backend routing issue
+                                    ])
+
+                                    if attempt < max_retries and is_retryable:
+                                        import random
+                                        sleep_time = base_delay * (2 ** attempt) + random.uniform(0, 0.5)
+                                        logger.warning(f"Batch {batch_number} failed (attempt {attempt+1}/{max_retries+1}). "
+                                                       f"Retrying in {sleep_time:.1f}s. Error: {e}")
+                                        time.sleep(sleep_time)
+                                        continue
+
+                                    logger.error(f"Failed to process batch {batch_number}: {str(e)}")
+                                    files_seen.update(batch_files)
+                                    break
+
+                            if success and progress_callback and batch_number % 10 == 0:
+                                progress_callback(f"  Batch {batch_number}: {total_chunks_created} chunks, {total_files_processed} files")
                    else:
                        # Concurrent processing - main thread iterates batches (SQLite safe),
                        # workers compute embeddings (parallel), main thread writes to DB (serial)
--- a/codex-lens/src/codexlens/semantic/litellm_embedder.py
+++ b/codex-lens/src/codexlens/semantic/litellm_embedder.py
@@ -89,6 +89,23 @@ class LiteLLMEmbedderWrapper(BaseEmbedder):
        # Default fallback
        return 8192

+    def _sanitize_text(self, text: str) -> str:
+        """Sanitize text to work around ModelScope API routing bug.
+
+        ModelScope incorrectly routes text starting with lowercase 'import'
+        to an Ollama endpoint, causing failures. This adds a leading space
+        to work around the issue without affecting embedding quality.
+
+        Args:
+            text: Text to sanitize.
+
+        Returns:
+            Sanitized text safe for embedding API.
+        """
+        if text.startswith('import'):
+            return ' ' + text
+        return text
+
    def embed_to_numpy(self, texts: str | Iterable[str], **kwargs) -> np.ndarray:
        """Embed texts to numpy array using LiteLLMEmbedder.

@@ -104,5 +121,9 @@ class LiteLLMEmbedderWrapper(BaseEmbedder):
            texts = [texts]
        else:
            texts = list(texts)
+
+        # Sanitize texts to avoid ModelScope routing bug
+        texts = [self._sanitize_text(t) for t in texts]
+
        # LiteLLM handles batching internally, ignore batch_size parameter
        return self._embedder.embed(texts)