feat: 添加全局模型锁定功能，防止不同模型混合使用，增强嵌入生成的稳定性

2026-03-21 19:08:17 +08:00 · 2025-12-25 11:20:05 +08:00
parent 40e61b30d6
commit 229d51cd18
2 changed files with 143 additions and 2 deletions
--- a/codex-lens/src/codexlens/cli/commands.py
+++ b/codex-lens/src/codexlens/cli/commands.py
@@ -1888,7 +1888,13 @@ def embeddings_generate(
    """
    _configure_logging(verbose, json_mode)

-    from codexlens.cli.embedding_manager import generate_embeddings, generate_embeddings_recursive, scan_for_model_conflicts
+    from codexlens.cli.embedding_manager import (
+        generate_embeddings,
+        generate_embeddings_recursive,
+        scan_for_model_conflicts,
+        check_global_model_lock,
+        set_locked_model_config,
+    )

    # Validate backend
    valid_backends = ["fastembed", "litellm"]
@@ -1956,6 +1962,31 @@ def embeddings_generate(
        console.print(f"Concurrency: [cyan]{max_workers} workers[/cyan]")
    console.print()

+    # Check global model lock (prevents mixing different models)
+    if not force:
+        lock_result = check_global_model_lock(backend, model)
+        if lock_result["has_conflict"]:
+            locked = lock_result["locked_config"]
+            if json_mode:
+                print_json(
+                    success=False,
+                    error="Global model lock conflict",
+                    code="MODEL_LOCKED",
+                    locked_config=locked,
+                    target_config=lock_result["target_config"],
+                    hint="Use --force to override the lock and switch to a different model (will regenerate all embeddings)",
+                )
+                raise typer.Exit(code=1)
+            else:
+                console.print("[red]⛔ Global Model Lock Active[/red]")
+                console.print(f"  Locked model: [cyan]{locked['backend']}/{locked['model']}[/cyan]")
+                console.print(f"  Requested: [yellow]{backend}/{model}[/yellow]")
+                console.print(f"  Locked at: {locked.get('locked_at', 'unknown')}")
+                console.print()
+                console.print("[dim]All indexes must use the same embedding model.[/dim]")
+                console.print("[dim]Use --force to switch models (will regenerate all embeddings).[/dim]")
+                raise typer.Exit(code=1)
+
    # Pre-check for model conflicts (only if not forcing)
    if not force:
        # Determine the index root for conflict scanning
--- a/codex-lens/src/codexlens/cli/embedding_manager.py
+++ b/codex-lens/src/codexlens/cli/embedding_manager.py
@@ -1,13 +1,14 @@
 """Embedding Manager - Manage semantic embeddings for code indexes."""

 import gc
+import json
 import logging
 import sqlite3
 import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from itertools import islice
 from pathlib import Path
-from typing import Dict, Generator, List, Optional, Tuple
+from typing import Any, Dict, Generator, List, Optional, Tuple

 try:
    from codexlens.semantic import SEMANTIC_AVAILABLE, is_embedding_backend_available
@@ -1097,4 +1098,113 @@ def scan_for_model_conflicts(
        "target_config": {"backend": target_backend, "model": target_model},
        "conflicts": conflicts,
        "indexes_with_embeddings": indexes_with_embeddings,
+    }
+
+
+def _get_global_settings_path() -> Path:
+    """Get the path to global embedding settings file."""
+    return Path.home() / ".codexlens" / "embedding_lock.json"
+
+
+def get_locked_model_config() -> Optional[Dict[str, Any]]:
+    """Get the globally locked embedding model configuration.
+
+    Returns:
+        Dictionary with backend and model if locked, None otherwise.
+    """
+    settings_path = _get_global_settings_path()
+    if not settings_path.exists():
+        return None
+
+    try:
+        with open(settings_path, "r", encoding="utf-8") as f:
+            data = json.load(f)
+            if data.get("locked"):
+                return {
+                    "backend": data.get("backend"),
+                    "model": data.get("model"),
+                    "locked_at": data.get("locked_at"),
+                }
+    except (json.JSONDecodeError, OSError):
+        pass
+
+    return None
+
+
+def set_locked_model_config(backend: str, model: str) -> None:
+    """Set the globally locked embedding model configuration.
+
+    This is called after the first successful embedding generation
+    to lock the model for all future operations.
+
+    Args:
+        backend: Embedding backend (fastembed or litellm)
+        model: Model profile/name
+    """
+    import datetime
+
+    settings_path = _get_global_settings_path()
+    settings_path.parent.mkdir(parents=True, exist_ok=True)
+
+    data = {
+        "locked": True,
+        "backend": backend,
+        "model": model,
+        "locked_at": datetime.datetime.now().isoformat(),
+    }
+
+    with open(settings_path, "w", encoding="utf-8") as f:
+        json.dump(data, f, indent=2)
+
+
+def clear_locked_model_config() -> bool:
+    """Clear the globally locked embedding model configuration.
+
+    Returns:
+        True if lock was cleared, False if no lock existed.
+    """
+    settings_path = _get_global_settings_path()
+    if settings_path.exists():
+        settings_path.unlink()
+        return True
+    return False
+
+
+def check_global_model_lock(
+    target_backend: str,
+    target_model: str,
+) -> Dict[str, Any]:
+    """Check if the target model conflicts with the global lock.
+
+    Args:
+        target_backend: Requested embedding backend
+        target_model: Requested model profile/name
+
+    Returns:
+        Dictionary with:
+        - is_locked: True if a global lock exists
+        - has_conflict: True if target differs from locked config
+        - locked_config: The locked configuration (if any)
+        - target_config: The requested configuration
+    """
+    locked_config = get_locked_model_config()
+
+    if locked_config is None:
+        return {
+            "is_locked": False,
+            "has_conflict": False,
+            "locked_config": None,
+            "target_config": {"backend": target_backend, "model": target_model},
+        }
+
+    has_conflict = (
+        locked_config["backend"] != target_backend or
+        locked_config["model"] != target_model
+    )
+
+    return {
+        "is_locked": True,
+        "has_conflict": has_conflict,
+        "locked_config": locked_config,
+        "target_config": {"backend": target_backend, "model": target_model},
    }