mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-05 01:50:27 +08:00
fix: 修复 ModelScope API 路由 bug 导致的 Ollama 连接错误
- 添加 _sanitize_text() 方法处理以 'import' 开头的文本 - ModelScope 后端错误地将此类文本路由到本地 Ollama 端点 - 通过在文本前添加空格绕过路由检测,不影响嵌入质量 - 增强 embedding_manager.py 的重试逻辑和错误处理 - 在 commands.py 中成功生成后调用全局模型锁定 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -2073,6 +2073,10 @@ def embeddings_generate(
|
||||
|
||||
data = result["result"]
|
||||
|
||||
# Set global model lock after successful generation
|
||||
# This prevents using different models for future indexes
|
||||
set_locked_model_config(backend, model)
|
||||
|
||||
if use_recursive:
|
||||
# Recursive mode output
|
||||
console.print(f"[green]✓[/green] Recursive embeddings generation complete!")
|
||||
|
||||
@@ -512,8 +512,8 @@ def generate_embeddings(
|
||||
for _, file_path in chunk_batch:
|
||||
batch_files.add(file_path)
|
||||
|
||||
max_retries = 3
|
||||
base_delay = 1.0
|
||||
max_retries = 5
|
||||
base_delay = 2.0
|
||||
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
@@ -523,10 +523,13 @@ def generate_embeddings(
|
||||
|
||||
except Exception as e:
|
||||
error_str = str(e).lower()
|
||||
# Check for retryable errors (rate limit, connection issues)
|
||||
# Check for retryable errors (rate limit, connection, backend issues)
|
||||
# Note: Some backends (e.g., ModelScope) return 400 with nested 500 errors
|
||||
is_retryable = any(x in error_str for x in [
|
||||
"429", "rate limit", "connection", "timeout",
|
||||
"502", "503", "504", "service unavailable"
|
||||
"502", "503", "504", "service unavailable",
|
||||
"500", "400", "badrequesterror", "internal server error",
|
||||
"11434" # Ollama port - indicates backend routing issue
|
||||
])
|
||||
|
||||
if attempt < max_retries and is_retryable:
|
||||
@@ -554,24 +557,50 @@ def generate_embeddings(
|
||||
for _, file_path in chunk_batch:
|
||||
batch_files.add(file_path)
|
||||
|
||||
try:
|
||||
# Generate embeddings
|
||||
batch_contents = [chunk.content for chunk, _ in chunk_batch]
|
||||
embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE)
|
||||
# Retry logic for transient backend errors
|
||||
max_retries = 5
|
||||
base_delay = 2.0
|
||||
success = False
|
||||
|
||||
# Store embeddings
|
||||
vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
# Generate embeddings
|
||||
batch_contents = [chunk.content for chunk, _ in chunk_batch]
|
||||
embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE)
|
||||
|
||||
files_seen.update(batch_files)
|
||||
total_chunks_created += len(chunk_batch)
|
||||
total_files_processed = len(files_seen)
|
||||
# Store embeddings
|
||||
vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
|
||||
|
||||
if progress_callback and batch_number % 10 == 0:
|
||||
progress_callback(f" Batch {batch_number}: {total_chunks_created} chunks, {total_files_processed} files")
|
||||
files_seen.update(batch_files)
|
||||
total_chunks_created += len(chunk_batch)
|
||||
total_files_processed = len(files_seen)
|
||||
success = True
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process batch {batch_number}: {str(e)}")
|
||||
files_seen.update(batch_files)
|
||||
except Exception as e:
|
||||
error_str = str(e).lower()
|
||||
# Check for retryable errors (rate limit, connection, backend issues)
|
||||
is_retryable = any(x in error_str for x in [
|
||||
"429", "rate limit", "connection", "timeout",
|
||||
"502", "503", "504", "service unavailable",
|
||||
"500", "400", "badrequesterror", "internal server error",
|
||||
"11434" # Ollama port - indicates backend routing issue
|
||||
])
|
||||
|
||||
if attempt < max_retries and is_retryable:
|
||||
import random
|
||||
sleep_time = base_delay * (2 ** attempt) + random.uniform(0, 0.5)
|
||||
logger.warning(f"Batch {batch_number} failed (attempt {attempt+1}/{max_retries+1}). "
|
||||
f"Retrying in {sleep_time:.1f}s. Error: {e}")
|
||||
time.sleep(sleep_time)
|
||||
continue
|
||||
|
||||
logger.error(f"Failed to process batch {batch_number}: {str(e)}")
|
||||
files_seen.update(batch_files)
|
||||
break
|
||||
|
||||
if success and progress_callback and batch_number % 10 == 0:
|
||||
progress_callback(f" Batch {batch_number}: {total_chunks_created} chunks, {total_files_processed} files")
|
||||
else:
|
||||
# Concurrent processing - main thread iterates batches (SQLite safe),
|
||||
# workers compute embeddings (parallel), main thread writes to DB (serial)
|
||||
|
||||
@@ -89,6 +89,23 @@ class LiteLLMEmbedderWrapper(BaseEmbedder):
|
||||
# Default fallback
|
||||
return 8192
|
||||
|
||||
def _sanitize_text(self, text: str) -> str:
|
||||
"""Sanitize text to work around ModelScope API routing bug.
|
||||
|
||||
ModelScope incorrectly routes text starting with lowercase 'import'
|
||||
to an Ollama endpoint, causing failures. This adds a leading space
|
||||
to work around the issue without affecting embedding quality.
|
||||
|
||||
Args:
|
||||
text: Text to sanitize.
|
||||
|
||||
Returns:
|
||||
Sanitized text safe for embedding API.
|
||||
"""
|
||||
if text.startswith('import'):
|
||||
return ' ' + text
|
||||
return text
|
||||
|
||||
def embed_to_numpy(self, texts: str | Iterable[str], **kwargs) -> np.ndarray:
|
||||
"""Embed texts to numpy array using LiteLLMEmbedder.
|
||||
|
||||
@@ -104,5 +121,9 @@ class LiteLLMEmbedderWrapper(BaseEmbedder):
|
||||
texts = [texts]
|
||||
else:
|
||||
texts = list(texts)
|
||||
|
||||
# Sanitize texts to avoid ModelScope routing bug
|
||||
texts = [self._sanitize_text(t) for t in texts]
|
||||
|
||||
# LiteLLM handles batching internally, ignore batch_size parameter
|
||||
return self._embedder.embed(texts)
|
||||
|
||||
Reference in New Issue
Block a user