feat: 添加全局模型锁定功能,防止不同模型混合使用,增强嵌入生成的稳定性

This commit is contained in:
catlog22
2025-12-25 11:20:05 +08:00
parent 40e61b30d6
commit 229d51cd18
2 changed files with 143 additions and 2 deletions

View File

@@ -1888,7 +1888,13 @@ def embeddings_generate(
"""
_configure_logging(verbose, json_mode)
from codexlens.cli.embedding_manager import generate_embeddings, generate_embeddings_recursive, scan_for_model_conflicts
from codexlens.cli.embedding_manager import (
generate_embeddings,
generate_embeddings_recursive,
scan_for_model_conflicts,
check_global_model_lock,
set_locked_model_config,
)
# Validate backend
valid_backends = ["fastembed", "litellm"]
@@ -1956,6 +1962,31 @@ def embeddings_generate(
console.print(f"Concurrency: [cyan]{max_workers} workers[/cyan]")
console.print()
# Check global model lock (prevents mixing different models)
if not force:
lock_result = check_global_model_lock(backend, model)
if lock_result["has_conflict"]:
locked = lock_result["locked_config"]
if json_mode:
print_json(
success=False,
error="Global model lock conflict",
code="MODEL_LOCKED",
locked_config=locked,
target_config=lock_result["target_config"],
hint="Use --force to override the lock and switch to a different model (will regenerate all embeddings)",
)
raise typer.Exit(code=1)
else:
console.print("[red]⛔ Global Model Lock Active[/red]")
console.print(f" Locked model: [cyan]{locked['backend']}/{locked['model']}[/cyan]")
console.print(f" Requested: [yellow]{backend}/{model}[/yellow]")
console.print(f" Locked at: {locked.get('locked_at', 'unknown')}")
console.print()
console.print("[dim]All indexes must use the same embedding model.[/dim]")
console.print("[dim]Use --force to switch models (will regenerate all embeddings).[/dim]")
raise typer.Exit(code=1)
# Pre-check for model conflicts (only if not forcing)
if not force:
# Determine the index root for conflict scanning

View File

@@ -1,13 +1,14 @@
"""Embedding Manager - Manage semantic embeddings for code indexes."""
import gc
import json
import logging
import sqlite3
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from itertools import islice
from pathlib import Path
from typing import Dict, Generator, List, Optional, Tuple
from typing import Any, Dict, Generator, List, Optional, Tuple
try:
from codexlens.semantic import SEMANTIC_AVAILABLE, is_embedding_backend_available
@@ -1097,4 +1098,113 @@ def scan_for_model_conflicts(
"target_config": {"backend": target_backend, "model": target_model},
"conflicts": conflicts,
"indexes_with_embeddings": indexes_with_embeddings,
}
def _get_global_settings_path() -> Path:
"""Get the path to global embedding settings file."""
return Path.home() / ".codexlens" / "embedding_lock.json"
def get_locked_model_config() -> Optional[Dict[str, Any]]:
"""Get the globally locked embedding model configuration.
Returns:
Dictionary with backend and model if locked, None otherwise.
"""
settings_path = _get_global_settings_path()
if not settings_path.exists():
return None
try:
with open(settings_path, "r", encoding="utf-8") as f:
data = json.load(f)
if data.get("locked"):
return {
"backend": data.get("backend"),
"model": data.get("model"),
"locked_at": data.get("locked_at"),
}
except (json.JSONDecodeError, OSError):
pass
return None
def set_locked_model_config(backend: str, model: str) -> None:
"""Set the globally locked embedding model configuration.
This is called after the first successful embedding generation
to lock the model for all future operations.
Args:
backend: Embedding backend (fastembed or litellm)
model: Model profile/name
"""
import datetime
settings_path = _get_global_settings_path()
settings_path.parent.mkdir(parents=True, exist_ok=True)
data = {
"locked": True,
"backend": backend,
"model": model,
"locked_at": datetime.datetime.now().isoformat(),
}
with open(settings_path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
def clear_locked_model_config() -> bool:
"""Clear the globally locked embedding model configuration.
Returns:
True if lock was cleared, False if no lock existed.
"""
settings_path = _get_global_settings_path()
if settings_path.exists():
settings_path.unlink()
return True
return False
def check_global_model_lock(
target_backend: str,
target_model: str,
) -> Dict[str, Any]:
"""Check if the target model conflicts with the global lock.
Args:
target_backend: Requested embedding backend
target_model: Requested model profile/name
Returns:
Dictionary with:
- is_locked: True if a global lock exists
- has_conflict: True if target differs from locked config
- locked_config: The locked configuration (if any)
- target_config: The requested configuration
"""
locked_config = get_locked_model_config()
if locked_config is None:
return {
"is_locked": False,
"has_conflict": False,
"locked_config": None,
"target_config": {"backend": target_backend, "model": target_model},
}
has_conflict = (
locked_config["backend"] != target_backend or
locked_config["model"] != target_model
)
return {
"is_locked": True,
"has_conflict": has_conflict,
"locked_config": locked_config,
"target_config": {"backend": target_backend, "model": target_model},
}