fix: 修复 ModelScope API 路由 bug 导致的 Ollama 连接错误

- 添加 _sanitize_text() 方法处理以 'import' 开头的文本
- ModelScope 后端错误地将此类文本路由到本地 Ollama 端点
- 通过在文本前添加空格绕过路由检测,不影响嵌入质量
- 增强 embedding_manager.py 的重试逻辑和错误处理
- 在 commands.py 中成功生成后调用全局模型锁定

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
catlog22
2025-12-25 12:52:43 +08:00
parent 229d51cd18
commit 501d9a05d4
3 changed files with 72 additions and 18 deletions

View File

@@ -2073,6 +2073,10 @@ def embeddings_generate(
data = result["result"]
# Set global model lock after successful generation
# This prevents using different models for future indexes
set_locked_model_config(backend, model)
if use_recursive:
# Recursive mode output
console.print(f"[green]✓[/green] Recursive embeddings generation complete!")

View File

@@ -512,8 +512,8 @@ def generate_embeddings(
for _, file_path in chunk_batch:
batch_files.add(file_path)
max_retries = 3
base_delay = 1.0
max_retries = 5
base_delay = 2.0
for attempt in range(max_retries + 1):
try:
@@ -523,10 +523,13 @@ def generate_embeddings(
except Exception as e:
error_str = str(e).lower()
# Check for retryable errors (rate limit, connection issues)
# Check for retryable errors (rate limit, connection, backend issues)
# Note: Some backends (e.g., ModelScope) return 400 with nested 500 errors
is_retryable = any(x in error_str for x in [
"429", "rate limit", "connection", "timeout",
"502", "503", "504", "service unavailable"
"502", "503", "504", "service unavailable",
"500", "400", "badrequesterror", "internal server error",
"11434" # Ollama port - indicates backend routing issue
])
if attempt < max_retries and is_retryable:
@@ -554,24 +557,50 @@ def generate_embeddings(
for _, file_path in chunk_batch:
batch_files.add(file_path)
try:
# Generate embeddings
batch_contents = [chunk.content for chunk, _ in chunk_batch]
embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE)
# Retry logic for transient backend errors
max_retries = 5
base_delay = 2.0
success = False
# Store embeddings
vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
for attempt in range(max_retries + 1):
try:
# Generate embeddings
batch_contents = [chunk.content for chunk, _ in chunk_batch]
embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE)
files_seen.update(batch_files)
total_chunks_created += len(chunk_batch)
total_files_processed = len(files_seen)
# Store embeddings
vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
if progress_callback and batch_number % 10 == 0:
progress_callback(f" Batch {batch_number}: {total_chunks_created} chunks, {total_files_processed} files")
files_seen.update(batch_files)
total_chunks_created += len(chunk_batch)
total_files_processed = len(files_seen)
success = True
break
except Exception as e:
logger.error(f"Failed to process batch {batch_number}: {str(e)}")
files_seen.update(batch_files)
except Exception as e:
error_str = str(e).lower()
# Check for retryable errors (rate limit, connection, backend issues)
is_retryable = any(x in error_str for x in [
"429", "rate limit", "connection", "timeout",
"502", "503", "504", "service unavailable",
"500", "400", "badrequesterror", "internal server error",
"11434" # Ollama port - indicates backend routing issue
])
if attempt < max_retries and is_retryable:
import random
sleep_time = base_delay * (2 ** attempt) + random.uniform(0, 0.5)
logger.warning(f"Batch {batch_number} failed (attempt {attempt+1}/{max_retries+1}). "
f"Retrying in {sleep_time:.1f}s. Error: {e}")
time.sleep(sleep_time)
continue
logger.error(f"Failed to process batch {batch_number}: {str(e)}")
files_seen.update(batch_files)
break
if success and progress_callback and batch_number % 10 == 0:
progress_callback(f" Batch {batch_number}: {total_chunks_created} chunks, {total_files_processed} files")
else:
# Concurrent processing - main thread iterates batches (SQLite safe),
# workers compute embeddings (parallel), main thread writes to DB (serial)

View File

@@ -89,6 +89,23 @@ class LiteLLMEmbedderWrapper(BaseEmbedder):
# Default fallback
return 8192
def _sanitize_text(self, text: str) -> str:
"""Sanitize text to work around ModelScope API routing bug.
ModelScope incorrectly routes text starting with lowercase 'import'
to an Ollama endpoint, causing failures. This adds a leading space
to work around the issue without affecting embedding quality.
Args:
text: Text to sanitize.
Returns:
Sanitized text safe for embedding API.
"""
if text.startswith('import'):
return ' ' + text
return text
def embed_to_numpy(self, texts: str | Iterable[str], **kwargs) -> np.ndarray:
"""Embed texts to numpy array using LiteLLMEmbedder.
@@ -104,5 +121,9 @@ class LiteLLMEmbedderWrapper(BaseEmbedder):
texts = [texts]
else:
texts = list(texts)
# Sanitize texts to avoid ModelScope routing bug
texts = [self._sanitize_text(t) for t in texts]
# LiteLLM handles batching internally, ignore batch_size parameter
return self._embedder.embed(texts)