mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-11 02:33:51 +08:00
fix: 修复 ModelScope API 路由 bug 导致的 Ollama 连接错误
- 添加 _sanitize_text() 方法处理以 'import' 开头的文本 - ModelScope 后端错误地将此类文本路由到本地 Ollama 端点 - 通过在文本前添加空格绕过路由检测,不影响嵌入质量 - 增强 embedding_manager.py 的重试逻辑和错误处理 - 在 commands.py 中成功生成后调用全局模型锁定 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -2073,6 +2073,10 @@ def embeddings_generate(
|
|||||||
|
|
||||||
data = result["result"]
|
data = result["result"]
|
||||||
|
|
||||||
|
# Set global model lock after successful generation
|
||||||
|
# This prevents using different models for future indexes
|
||||||
|
set_locked_model_config(backend, model)
|
||||||
|
|
||||||
if use_recursive:
|
if use_recursive:
|
||||||
# Recursive mode output
|
# Recursive mode output
|
||||||
console.print(f"[green]✓[/green] Recursive embeddings generation complete!")
|
console.print(f"[green]✓[/green] Recursive embeddings generation complete!")
|
||||||
|
|||||||
@@ -512,8 +512,8 @@ def generate_embeddings(
|
|||||||
for _, file_path in chunk_batch:
|
for _, file_path in chunk_batch:
|
||||||
batch_files.add(file_path)
|
batch_files.add(file_path)
|
||||||
|
|
||||||
max_retries = 3
|
max_retries = 5
|
||||||
base_delay = 1.0
|
base_delay = 2.0
|
||||||
|
|
||||||
for attempt in range(max_retries + 1):
|
for attempt in range(max_retries + 1):
|
||||||
try:
|
try:
|
||||||
@@ -523,10 +523,13 @@ def generate_embeddings(
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_str = str(e).lower()
|
error_str = str(e).lower()
|
||||||
# Check for retryable errors (rate limit, connection issues)
|
# Check for retryable errors (rate limit, connection, backend issues)
|
||||||
|
# Note: Some backends (e.g., ModelScope) return 400 with nested 500 errors
|
||||||
is_retryable = any(x in error_str for x in [
|
is_retryable = any(x in error_str for x in [
|
||||||
"429", "rate limit", "connection", "timeout",
|
"429", "rate limit", "connection", "timeout",
|
||||||
"502", "503", "504", "service unavailable"
|
"502", "503", "504", "service unavailable",
|
||||||
|
"500", "400", "badrequesterror", "internal server error",
|
||||||
|
"11434" # Ollama port - indicates backend routing issue
|
||||||
])
|
])
|
||||||
|
|
||||||
if attempt < max_retries and is_retryable:
|
if attempt < max_retries and is_retryable:
|
||||||
@@ -554,24 +557,50 @@ def generate_embeddings(
|
|||||||
for _, file_path in chunk_batch:
|
for _, file_path in chunk_batch:
|
||||||
batch_files.add(file_path)
|
batch_files.add(file_path)
|
||||||
|
|
||||||
try:
|
# Retry logic for transient backend errors
|
||||||
# Generate embeddings
|
max_retries = 5
|
||||||
batch_contents = [chunk.content for chunk, _ in chunk_batch]
|
base_delay = 2.0
|
||||||
embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE)
|
success = False
|
||||||
|
|
||||||
# Store embeddings
|
for attempt in range(max_retries + 1):
|
||||||
vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
|
try:
|
||||||
|
# Generate embeddings
|
||||||
|
batch_contents = [chunk.content for chunk, _ in chunk_batch]
|
||||||
|
embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE)
|
||||||
|
|
||||||
files_seen.update(batch_files)
|
# Store embeddings
|
||||||
total_chunks_created += len(chunk_batch)
|
vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
|
||||||
total_files_processed = len(files_seen)
|
|
||||||
|
|
||||||
if progress_callback and batch_number % 10 == 0:
|
files_seen.update(batch_files)
|
||||||
progress_callback(f" Batch {batch_number}: {total_chunks_created} chunks, {total_files_processed} files")
|
total_chunks_created += len(chunk_batch)
|
||||||
|
total_files_processed = len(files_seen)
|
||||||
|
success = True
|
||||||
|
break
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to process batch {batch_number}: {str(e)}")
|
error_str = str(e).lower()
|
||||||
files_seen.update(batch_files)
|
# Check for retryable errors (rate limit, connection, backend issues)
|
||||||
|
is_retryable = any(x in error_str for x in [
|
||||||
|
"429", "rate limit", "connection", "timeout",
|
||||||
|
"502", "503", "504", "service unavailable",
|
||||||
|
"500", "400", "badrequesterror", "internal server error",
|
||||||
|
"11434" # Ollama port - indicates backend routing issue
|
||||||
|
])
|
||||||
|
|
||||||
|
if attempt < max_retries and is_retryable:
|
||||||
|
import random
|
||||||
|
sleep_time = base_delay * (2 ** attempt) + random.uniform(0, 0.5)
|
||||||
|
logger.warning(f"Batch {batch_number} failed (attempt {attempt+1}/{max_retries+1}). "
|
||||||
|
f"Retrying in {sleep_time:.1f}s. Error: {e}")
|
||||||
|
time.sleep(sleep_time)
|
||||||
|
continue
|
||||||
|
|
||||||
|
logger.error(f"Failed to process batch {batch_number}: {str(e)}")
|
||||||
|
files_seen.update(batch_files)
|
||||||
|
break
|
||||||
|
|
||||||
|
if success and progress_callback and batch_number % 10 == 0:
|
||||||
|
progress_callback(f" Batch {batch_number}: {total_chunks_created} chunks, {total_files_processed} files")
|
||||||
else:
|
else:
|
||||||
# Concurrent processing - main thread iterates batches (SQLite safe),
|
# Concurrent processing - main thread iterates batches (SQLite safe),
|
||||||
# workers compute embeddings (parallel), main thread writes to DB (serial)
|
# workers compute embeddings (parallel), main thread writes to DB (serial)
|
||||||
|
|||||||
@@ -89,6 +89,23 @@ class LiteLLMEmbedderWrapper(BaseEmbedder):
|
|||||||
# Default fallback
|
# Default fallback
|
||||||
return 8192
|
return 8192
|
||||||
|
|
||||||
|
def _sanitize_text(self, text: str) -> str:
|
||||||
|
"""Sanitize text to work around ModelScope API routing bug.
|
||||||
|
|
||||||
|
ModelScope incorrectly routes text starting with lowercase 'import'
|
||||||
|
to an Ollama endpoint, causing failures. This adds a leading space
|
||||||
|
to work around the issue without affecting embedding quality.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: Text to sanitize.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Sanitized text safe for embedding API.
|
||||||
|
"""
|
||||||
|
if text.startswith('import'):
|
||||||
|
return ' ' + text
|
||||||
|
return text
|
||||||
|
|
||||||
def embed_to_numpy(self, texts: str | Iterable[str], **kwargs) -> np.ndarray:
|
def embed_to_numpy(self, texts: str | Iterable[str], **kwargs) -> np.ndarray:
|
||||||
"""Embed texts to numpy array using LiteLLMEmbedder.
|
"""Embed texts to numpy array using LiteLLMEmbedder.
|
||||||
|
|
||||||
@@ -104,5 +121,9 @@ class LiteLLMEmbedderWrapper(BaseEmbedder):
|
|||||||
texts = [texts]
|
texts = [texts]
|
||||||
else:
|
else:
|
||||||
texts = list(texts)
|
texts = list(texts)
|
||||||
|
|
||||||
|
# Sanitize texts to avoid ModelScope routing bug
|
||||||
|
texts = [self._sanitize_text(t) for t in texts]
|
||||||
|
|
||||||
# LiteLLM handles batching internally, ignore batch_size parameter
|
# LiteLLM handles batching internally, ignore batch_size parameter
|
||||||
return self._embedder.embed(texts)
|
return self._embedder.embed(texts)
|
||||||
|
|||||||
Reference in New Issue
Block a user