fix: 修复 ModelScope API 路由 bug 导致的 Ollama 连接错误

- 添加 _sanitize_text() 方法处理以 'import' 开头的文本 - ModelScope 后端错误地将此类文本路由到本地 Ollama 端点 - 通过在文本前添加空格绕过路由检测，不影响嵌入质量 - 增强 embedding_manager.py 的重试逻辑和错误处理 - 在 commands.py 中成功生成后调用全局模型锁定 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-11 02:33:51 +08:00 · 2025-12-25 12:52:43 +08:00
parent 229d51cd18
commit 501d9a05d4
3 changed files with 72 additions and 18 deletions
--- a/codex-lens/src/codexlens/cli/commands.py
+++ b/codex-lens/src/codexlens/cli/commands.py
@@ -2073,6 +2073,10 @@ def embeddings_generate(
        data = result["result"]
        # Set global model lock after successful generation
        # This prevents using different models for future indexes
        set_locked_model_config(backend, model)
        if use_recursive:
            # Recursive mode output
            console.print(f"[green]✓[/green] Recursive embeddings generation complete!")
--- a/codex-lens/src/codexlens/cli/embedding_manager.py
+++ b/codex-lens/src/codexlens/cli/embedding_manager.py
@@ -512,8 +512,8 @@ def generate_embeddings(
                        for _, file_path in chunk_batch:
                            batch_files.add(file_path)
-                        max_retries = 3
+                        max_retries = 5
-                        base_delay = 1.0
+                        base_delay = 2.0
                        for attempt in range(max_retries + 1):
                            try:
@@ -523,10 +523,13 @@ def generate_embeddings(
                            except Exception as e:
                                error_str = str(e).lower()
-                                # Check for retryable errors (rate limit, connection issues)
+                                # Check for retryable errors (rate limit, connection, backend issues)
                                # Note: Some backends (e.g., ModelScope) return 400 with nested 500 errors
                                is_retryable = any(x in error_str for x in [
                                    "429", "rate limit", "connection", "timeout",
-                                    "502", "503", "504", "service unavailable"
+                                    "502", "503", "504", "service unavailable",
                                    "500", "400", "badrequesterror", "internal server error",
                                    "11434"  # Ollama port - indicates backend routing issue
                                ])
                                if attempt < max_retries and is_retryable:
@@ -554,24 +557,50 @@ def generate_embeddings(
                            for _, file_path in chunk_batch:
                                batch_files.add(file_path)
-                            try:
+                            # Retry logic for transient backend errors
-                                # Generate embeddings
+                            max_retries = 5
-                                batch_contents = [chunk.content for chunk, _ in chunk_batch]
+                            base_delay = 2.0
-                                embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE)
+                            success = False
-                                # Store embeddings
+                            for attempt in range(max_retries + 1):
-                                vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
+                                try:
                                    # Generate embeddings
                                    batch_contents = [chunk.content for chunk, _ in chunk_batch]
                                    embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE)
-                                files_seen.update(batch_files)
+                                    # Store embeddings
-                                total_chunks_created += len(chunk_batch)
+                                    vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
                                total_files_processed = len(files_seen)
-                                if progress_callback and batch_number % 10 == 0:
+                                    files_seen.update(batch_files)
-                                    progress_callback(f"  Batch {batch_number}: {total_chunks_created} chunks, {total_files_processed} files")
+                                    total_chunks_created += len(chunk_batch)
                                    total_files_processed = len(files_seen)
                                    success = True
                                    break
-                            except Exception as e:
+                                except Exception as e:
-                                logger.error(f"Failed to process batch {batch_number}: {str(e)}")
+                                    error_str = str(e).lower()
-                                files_seen.update(batch_files)
+                                    # Check for retryable errors (rate limit, connection, backend issues)
                                    is_retryable = any(x in error_str for x in [
                                        "429", "rate limit", "connection", "timeout",
                                        "502", "503", "504", "service unavailable",
                                        "500", "400", "badrequesterror", "internal server error",
                                        "11434"  # Ollama port - indicates backend routing issue
                                    ])
                                    if attempt < max_retries and is_retryable:
                                        import random
                                        sleep_time = base_delay * (2 ** attempt) + random.uniform(0, 0.5)
                                        logger.warning(f"Batch {batch_number} failed (attempt {attempt+1}/{max_retries+1}). "
                                                       f"Retrying in {sleep_time:.1f}s. Error: {e}")
                                        time.sleep(sleep_time)
                                        continue
                                    logger.error(f"Failed to process batch {batch_number}: {str(e)}")
                                    files_seen.update(batch_files)
                                    break
                            if success and progress_callback and batch_number % 10 == 0:
                                progress_callback(f"  Batch {batch_number}: {total_chunks_created} chunks, {total_files_processed} files")
                    else:
                        # Concurrent processing - main thread iterates batches (SQLite safe),
                        # workers compute embeddings (parallel), main thread writes to DB (serial)
--- a/codex-lens/src/codexlens/semantic/litellm_embedder.py
+++ b/codex-lens/src/codexlens/semantic/litellm_embedder.py
@@ -89,6 +89,23 @@ class LiteLLMEmbedderWrapper(BaseEmbedder):
        # Default fallback
        return 8192
    def _sanitize_text(self, text: str) -> str:
        """Sanitize text to work around ModelScope API routing bug.
        ModelScope incorrectly routes text starting with lowercase 'import'
        to an Ollama endpoint, causing failures. This adds a leading space
        to work around the issue without affecting embedding quality.
        Args:
            text: Text to sanitize.
        Returns:
            Sanitized text safe for embedding API.
        """
        if text.startswith('import'):
            return ' ' + text
        return text
    def embed_to_numpy(self, texts: str | Iterable[str], **kwargs) -> np.ndarray:
        """Embed texts to numpy array using LiteLLMEmbedder.
@@ -104,5 +121,9 @@ class LiteLLMEmbedderWrapper(BaseEmbedder):
            texts = [texts]
        else:
            texts = list(texts)
        # Sanitize texts to avoid ModelScope routing bug
        texts = [self._sanitize_text(t) for t in texts]
        # LiteLLM handles batching internally, ignore batch_size parameter
        return self._embedder.embed(texts)