From 229d51cd18af8bd6615f0d218b29998f080cddeb Mon Sep 17 00:00:00 2001 From: catlog22 Date: Thu, 25 Dec 2025 11:20:05 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E5=85=A8=E5=B1=80?= =?UTF-8?q?=E6=A8=A1=E5=9E=8B=E9=94=81=E5=AE=9A=E5=8A=9F=E8=83=BD=EF=BC=8C?= =?UTF-8?q?=E9=98=B2=E6=AD=A2=E4=B8=8D=E5=90=8C=E6=A8=A1=E5=9E=8B=E6=B7=B7?= =?UTF-8?q?=E5=90=88=E4=BD=BF=E7=94=A8=EF=BC=8C=E5=A2=9E=E5=BC=BA=E5=B5=8C?= =?UTF-8?q?=E5=85=A5=E7=94=9F=E6=88=90=E7=9A=84=E7=A8=B3=E5=AE=9A=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- codex-lens/src/codexlens/cli/commands.py | 33 +++++- .../src/codexlens/cli/embedding_manager.py | 112 +++++++++++++++++- 2 files changed, 143 insertions(+), 2 deletions(-) diff --git a/codex-lens/src/codexlens/cli/commands.py b/codex-lens/src/codexlens/cli/commands.py index 111ed5db..5f90e08d 100644 --- a/codex-lens/src/codexlens/cli/commands.py +++ b/codex-lens/src/codexlens/cli/commands.py @@ -1888,7 +1888,13 @@ def embeddings_generate( """ _configure_logging(verbose, json_mode) - from codexlens.cli.embedding_manager import generate_embeddings, generate_embeddings_recursive, scan_for_model_conflicts + from codexlens.cli.embedding_manager import ( + generate_embeddings, + generate_embeddings_recursive, + scan_for_model_conflicts, + check_global_model_lock, + set_locked_model_config, + ) # Validate backend valid_backends = ["fastembed", "litellm"] @@ -1956,6 +1962,31 @@ def embeddings_generate( console.print(f"Concurrency: [cyan]{max_workers} workers[/cyan]") console.print() + # Check global model lock (prevents mixing different models) + if not force: + lock_result = check_global_model_lock(backend, model) + if lock_result["has_conflict"]: + locked = lock_result["locked_config"] + if json_mode: + print_json( + success=False, + error="Global model lock conflict", + code="MODEL_LOCKED", + locked_config=locked, + target_config=lock_result["target_config"], + hint="Use --force to override the lock and switch to a different model (will regenerate all embeddings)", + ) + raise typer.Exit(code=1) + else: + console.print("[red]⛔ Global Model Lock Active[/red]") + console.print(f" Locked model: [cyan]{locked['backend']}/{locked['model']}[/cyan]") + console.print(f" Requested: [yellow]{backend}/{model}[/yellow]") + console.print(f" Locked at: {locked.get('locked_at', 'unknown')}") + console.print() + console.print("[dim]All indexes must use the same embedding model.[/dim]") + console.print("[dim]Use --force to switch models (will regenerate all embeddings).[/dim]") + raise typer.Exit(code=1) + # Pre-check for model conflicts (only if not forcing) if not force: # Determine the index root for conflict scanning diff --git a/codex-lens/src/codexlens/cli/embedding_manager.py b/codex-lens/src/codexlens/cli/embedding_manager.py index e28439ca..c8168869 100644 --- a/codex-lens/src/codexlens/cli/embedding_manager.py +++ b/codex-lens/src/codexlens/cli/embedding_manager.py @@ -1,13 +1,14 @@ """Embedding Manager - Manage semantic embeddings for code indexes.""" import gc +import json import logging import sqlite3 import time from concurrent.futures import ThreadPoolExecutor, as_completed from itertools import islice from pathlib import Path -from typing import Dict, Generator, List, Optional, Tuple +from typing import Any, Dict, Generator, List, Optional, Tuple try: from codexlens.semantic import SEMANTIC_AVAILABLE, is_embedding_backend_available @@ -1097,4 +1098,113 @@ def scan_for_model_conflicts( "target_config": {"backend": target_backend, "model": target_model}, "conflicts": conflicts, "indexes_with_embeddings": indexes_with_embeddings, + } + + +def _get_global_settings_path() -> Path: + """Get the path to global embedding settings file.""" + return Path.home() / ".codexlens" / "embedding_lock.json" + + +def get_locked_model_config() -> Optional[Dict[str, Any]]: + """Get the globally locked embedding model configuration. + + Returns: + Dictionary with backend and model if locked, None otherwise. + """ + settings_path = _get_global_settings_path() + if not settings_path.exists(): + return None + + try: + with open(settings_path, "r", encoding="utf-8") as f: + data = json.load(f) + if data.get("locked"): + return { + "backend": data.get("backend"), + "model": data.get("model"), + "locked_at": data.get("locked_at"), + } + except (json.JSONDecodeError, OSError): + pass + + return None + + +def set_locked_model_config(backend: str, model: str) -> None: + """Set the globally locked embedding model configuration. + + This is called after the first successful embedding generation + to lock the model for all future operations. + + Args: + backend: Embedding backend (fastembed or litellm) + model: Model profile/name + """ + import datetime + + settings_path = _get_global_settings_path() + settings_path.parent.mkdir(parents=True, exist_ok=True) + + data = { + "locked": True, + "backend": backend, + "model": model, + "locked_at": datetime.datetime.now().isoformat(), + } + + with open(settings_path, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2) + + +def clear_locked_model_config() -> bool: + """Clear the globally locked embedding model configuration. + + Returns: + True if lock was cleared, False if no lock existed. + """ + settings_path = _get_global_settings_path() + if settings_path.exists(): + settings_path.unlink() + return True + return False + + +def check_global_model_lock( + target_backend: str, + target_model: str, +) -> Dict[str, Any]: + """Check if the target model conflicts with the global lock. + + Args: + target_backend: Requested embedding backend + target_model: Requested model profile/name + + Returns: + Dictionary with: + - is_locked: True if a global lock exists + - has_conflict: True if target differs from locked config + - locked_config: The locked configuration (if any) + - target_config: The requested configuration + """ + locked_config = get_locked_model_config() + + if locked_config is None: + return { + "is_locked": False, + "has_conflict": False, + "locked_config": None, + "target_config": {"backend": target_backend, "model": target_model}, + } + + has_conflict = ( + locked_config["backend"] != target_backend or + locked_config["model"] != target_model + ) + + return { + "is_locked": True, + "has_conflict": has_conflict, + "locked_config": locked_config, + "target_config": {"backend": target_backend, "model": target_model}, } \ No newline at end of file