feat: 添加全局模型锁定功能，防止不同模型混合使用，增强嵌入生成的稳定性

2026-02-11 02:33:51 +08:00 · 2025-12-25 11:20:05 +08:00
parent 40e61b30d6
commit 229d51cd18
2 changed files with 143 additions and 2 deletions
--- a/codex-lens/src/codexlens/cli/commands.py
+++ b/codex-lens/src/codexlens/cli/commands.py
@@ -1888,7 +1888,13 @@ def embeddings_generate(
    """
    _configure_logging(verbose, json_mode)
-    from codexlens.cli.embedding_manager import generate_embeddings, generate_embeddings_recursive, scan_for_model_conflicts
+    from codexlens.cli.embedding_manager import (
        generate_embeddings,
        generate_embeddings_recursive,
        scan_for_model_conflicts,
        check_global_model_lock,
        set_locked_model_config,
    )
    # Validate backend
    valid_backends = ["fastembed", "litellm"]
@@ -1956,6 +1962,31 @@ def embeddings_generate(
        console.print(f"Concurrency: [cyan]{max_workers} workers[/cyan]")
    console.print()
    # Check global model lock (prevents mixing different models)
    if not force:
        lock_result = check_global_model_lock(backend, model)
        if lock_result["has_conflict"]:
            locked = lock_result["locked_config"]
            if json_mode:
                print_json(
                    success=False,
                    error="Global model lock conflict",
                    code="MODEL_LOCKED",
                    locked_config=locked,
                    target_config=lock_result["target_config"],
                    hint="Use --force to override the lock and switch to a different model (will regenerate all embeddings)",
                )
                raise typer.Exit(code=1)
            else:
                console.print("[red]⛔ Global Model Lock Active[/red]")
                console.print(f"  Locked model: [cyan]{locked['backend']}/{locked['model']}[/cyan]")
                console.print(f"  Requested: [yellow]{backend}/{model}[/yellow]")
                console.print(f"  Locked at: {locked.get('locked_at', 'unknown')}")
                console.print()
                console.print("[dim]All indexes must use the same embedding model.[/dim]")
                console.print("[dim]Use --force to switch models (will regenerate all embeddings).[/dim]")
                raise typer.Exit(code=1)
    # Pre-check for model conflicts (only if not forcing)
    if not force:
        # Determine the index root for conflict scanning
--- a/codex-lens/src/codexlens/cli/embedding_manager.py
+++ b/codex-lens/src/codexlens/cli/embedding_manager.py
@@ -1,13 +1,14 @@
 """Embedding Manager - Manage semantic embeddings for code indexes."""
 import gc
 import json
 import logging
 import sqlite3
 import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from itertools import islice
 from pathlib import Path
-from typing import Dict, Generator, List, Optional, Tuple
+from typing import Any, Dict, Generator, List, Optional, Tuple
 try:
    from codexlens.semantic import SEMANTIC_AVAILABLE, is_embedding_backend_available
@@ -1098,3 +1099,112 @@ def scan_for_model_conflicts(
        "conflicts": conflicts,
        "indexes_with_embeddings": indexes_with_embeddings,
    }
 def _get_global_settings_path() -> Path:
    """Get the path to global embedding settings file."""
    return Path.home() / ".codexlens" / "embedding_lock.json"
 def get_locked_model_config() -> Optional[Dict[str, Any]]:
    """Get the globally locked embedding model configuration.
    Returns:
        Dictionary with backend and model if locked, None otherwise.
    """
    settings_path = _get_global_settings_path()
    if not settings_path.exists():
        return None
    try:
        with open(settings_path, "r", encoding="utf-8") as f:
            data = json.load(f)
            if data.get("locked"):
                return {
                    "backend": data.get("backend"),
                    "model": data.get("model"),
                    "locked_at": data.get("locked_at"),
                }
    except (json.JSONDecodeError, OSError):
        pass
    return None
 def set_locked_model_config(backend: str, model: str) -> None:
    """Set the globally locked embedding model configuration.
    This is called after the first successful embedding generation
    to lock the model for all future operations.
    Args:
        backend: Embedding backend (fastembed or litellm)
        model: Model profile/name
    """
    import datetime
    settings_path = _get_global_settings_path()
    settings_path.parent.mkdir(parents=True, exist_ok=True)
    data = {
        "locked": True,
        "backend": backend,
        "model": model,
        "locked_at": datetime.datetime.now().isoformat(),
    }
    with open(settings_path, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2)
 def clear_locked_model_config() -> bool:
    """Clear the globally locked embedding model configuration.
    Returns:
        True if lock was cleared, False if no lock existed.
    """
    settings_path = _get_global_settings_path()
    if settings_path.exists():
        settings_path.unlink()
        return True
    return False
 def check_global_model_lock(
    target_backend: str,
    target_model: str,
 ) -> Dict[str, Any]:
    """Check if the target model conflicts with the global lock.
    Args:
        target_backend: Requested embedding backend
        target_model: Requested model profile/name
    Returns:
        Dictionary with:
        - is_locked: True if a global lock exists
        - has_conflict: True if target differs from locked config
        - locked_config: The locked configuration (if any)
        - target_config: The requested configuration
    """
    locked_config = get_locked_model_config()
    if locked_config is None:
        return {
            "is_locked": False,
            "has_conflict": False,
            "locked_config": None,
            "target_config": {"backend": target_backend, "model": target_model},
        }
    has_conflict = (
        locked_config["backend"] != target_backend or
        locked_config["model"] != target_model
    )
    return {
        "is_locked": True,
        "has_conflict": has_conflict,
        "locked_config": locked_config,
        "target_config": {"backend": target_backend, "model": target_model},
    }