From 8203d690cbbff1c303f5740912f43b38b6439626 Mon Sep 17 00:00:00 2001 From: catlog22 Date: Mon, 22 Dec 2025 21:49:10 +0800 Subject: [PATCH] fix: CodexLens model detection, hybrid search stability, and JSON logging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix model installation detection using fastembed ONNX cache names - Add embeddings_config table for model metadata tracking - Fix hybrid search segfault by using single-threaded GPU mode - Suppress INFO logs in JSON mode to prevent error display - Add model dropdown filtering to show only installed models 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .../dashboard-js/components/cli-status.js | 37 ++++++- .../dashboard-js/views/cli-manager.js | 51 ++++++++-- codex-lens/src/codex_lens.egg-info/PKG-INFO | 12 +-- .../src/codex_lens.egg-info/requires.txt | 10 +- codex-lens/src/codexlens/cli/commands.py | 37 ++++--- .../src/codexlens/cli/embedding_manager.py | 15 +++ codex-lens/src/codexlens/cli/model_manager.py | 46 +++++++-- .../src/codexlens/search/chain_search.py | 15 ++- .../src/codexlens/search/hybrid_search.py | 37 ++++--- .../src/codexlens/semantic/vector_store.py | 97 +++++++++++++++++++ package.json | 2 +- 11 files changed, 302 insertions(+), 57 deletions(-) diff --git a/ccw/src/templates/dashboard-js/components/cli-status.js b/ccw/src/templates/dashboard-js/components/cli-status.js index e3740b5f..a90d5b47 100644 --- a/ccw/src/templates/dashboard-js/components/cli-status.js +++ b/ccw/src/templates/dashboard-js/components/cli-status.js @@ -98,15 +98,17 @@ async function loadCodexLensStatus() { } window.cliToolsStatus.codexlens = { installed: data.ready || false, - version: data.version || null + version: data.version || null, + installedModels: [] // Will be populated by loadSemanticStatus }; // Update CodexLens badge updateCodexLensBadge(); - // If CodexLens is ready, also check semantic status + // If CodexLens is ready, also check semantic status and models if (data.ready) { await loadSemanticStatus(); + await loadInstalledModels(); } return data; @@ -132,6 +134,37 @@ async function loadSemanticStatus() { } } +/** + * Load installed embedding models + */ +async function loadInstalledModels() { + try { + const response = await fetch('/api/codexlens/models'); + if (!response.ok) throw new Error('Failed to load models'); + const data = await response.json(); + + if (data.success && data.result && data.result.models) { + // Filter to only installed models + const installedModels = data.result.models + .filter(m => m.installed) + .map(m => m.profile); + + // Update window.cliToolsStatus + if (window.cliToolsStatus && window.cliToolsStatus.codexlens) { + window.cliToolsStatus.codexlens.installedModels = installedModels; + window.cliToolsStatus.codexlens.allModels = data.result.models; + } + + console.log('[CLI Status] Installed models:', installedModels); + return installedModels; + } + return []; + } catch (err) { + console.error('Failed to load installed models:', err); + return []; + } +} + // ========== Badge Update ========== function updateCliBadge() { const badge = document.getElementById('badgeCliTools'); diff --git a/ccw/src/templates/dashboard-js/views/cli-manager.js b/ccw/src/templates/dashboard-js/views/cli-manager.js index 10515e53..ca9c86c4 100644 --- a/ccw/src/templates/dashboard-js/views/cli-manager.js +++ b/ccw/src/templates/dashboard-js/views/cli-manager.js @@ -349,6 +349,50 @@ function getSelectedModel() { return select ? select.value : 'code'; } +/** + * Build model select options HTML, showing only installed models + * @returns {string} HTML string for select options + */ +function buildModelSelectOptions() { + var installedModels = window.cliToolsStatus?.codexlens?.installedModels || []; + var allModels = window.cliToolsStatus?.codexlens?.allModels || []; + + // Model display configuration + var modelConfig = { + 'code': { label: t('index.modelCode') || 'Code (768d)', star: true }, + 'base': { label: t('index.modelBase') || 'Base (768d)', star: false }, + 'fast': { label: t('index.modelFast') || 'Fast (384d)', star: false }, + 'minilm': { label: t('index.modelMinilm') || 'MiniLM (384d)', star: false }, + 'multilingual': { label: t('index.modelMultilingual') || 'Multilingual (1024d)', warn: true }, + 'balanced': { label: t('index.modelBalanced') || 'Balanced (1024d)', warn: true } + }; + + // If no models installed, show placeholder + if (installedModels.length === 0) { + return ''; + } + + // Build options for installed models only + var options = ''; + var firstInstalled = null; + + // Preferred order: code, fast, minilm, base, multilingual, balanced + var preferredOrder = ['code', 'fast', 'minilm', 'base', 'multilingual', 'balanced']; + + preferredOrder.forEach(function(profile) { + if (installedModels.includes(profile) && modelConfig[profile]) { + var config = modelConfig[profile]; + var style = config.warn ? ' style="color: var(--muted-foreground)"' : ''; + var suffix = config.star ? ' ⭐' : (config.warn ? ' ⚠️' : ''); + var selected = !firstInstalled ? ' selected' : ''; + if (!firstInstalled) firstInstalled = profile; + options += ''; + } + }); + + return options; +} + // ========== Tools Section (Left Column) ========== function renderToolsSection() { var container = document.getElementById('tools-section'); @@ -404,12 +448,7 @@ function renderToolsSection() { (codexLensStatus.ready ? ' v' + (codexLensStatus.version || 'installed') + '' + '' + '' + '' + diff --git a/codex-lens/src/codex_lens.egg-info/PKG-INFO b/codex-lens/src/codex_lens.egg-info/PKG-INFO index d11786d3..f7321511 100644 --- a/codex-lens/src/codex_lens.egg-info/PKG-INFO +++ b/codex-lens/src/codex_lens.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.4 Name: codex-lens -Version: 0.2.0 +Version: 0.1.0 Summary: CodexLens multi-modal code analysis platform Author: CodexLens contributors License: MIT @@ -17,18 +17,18 @@ Requires-Dist: tree-sitter-typescript>=0.23 Requires-Dist: pathspec>=0.11 Provides-Extra: semantic Requires-Dist: numpy>=1.24; extra == "semantic" -Requires-Dist: fastembed>=0.5; extra == "semantic" +Requires-Dist: fastembed>=0.2; extra == "semantic" Requires-Dist: hnswlib>=0.8.0; extra == "semantic" Provides-Extra: semantic-gpu Requires-Dist: numpy>=1.24; extra == "semantic-gpu" -Requires-Dist: fastembed>=0.5; extra == "semantic-gpu" +Requires-Dist: fastembed>=0.2; extra == "semantic-gpu" Requires-Dist: hnswlib>=0.8.0; extra == "semantic-gpu" -Requires-Dist: onnxruntime-gpu>=1.18.0; extra == "semantic-gpu" +Requires-Dist: onnxruntime-gpu>=1.15.0; extra == "semantic-gpu" Provides-Extra: semantic-directml Requires-Dist: numpy>=1.24; extra == "semantic-directml" -Requires-Dist: fastembed>=0.5; extra == "semantic-directml" +Requires-Dist: fastembed>=0.2; extra == "semantic-directml" Requires-Dist: hnswlib>=0.8.0; extra == "semantic-directml" -Requires-Dist: onnxruntime-directml>=1.18.0; extra == "semantic-directml" +Requires-Dist: onnxruntime-directml>=1.15.0; extra == "semantic-directml" Provides-Extra: encoding Requires-Dist: chardet>=5.0; extra == "encoding" Provides-Extra: full diff --git a/codex-lens/src/codex_lens.egg-info/requires.txt b/codex-lens/src/codex_lens.egg-info/requires.txt index fa253403..468955df 100644 --- a/codex-lens/src/codex_lens.egg-info/requires.txt +++ b/codex-lens/src/codex_lens.egg-info/requires.txt @@ -15,17 +15,17 @@ tiktoken>=0.5.0 [semantic] numpy>=1.24 -fastembed>=0.5 +fastembed>=0.2 hnswlib>=0.8.0 [semantic-directml] numpy>=1.24 -fastembed>=0.5 +fastembed>=0.2 hnswlib>=0.8.0 -onnxruntime-directml>=1.18.0 +onnxruntime-directml>=1.15.0 [semantic-gpu] numpy>=1.24 -fastembed>=0.5 +fastembed>=0.2 hnswlib>=0.8.0 -onnxruntime-gpu>=1.18.0 +onnxruntime-gpu>=1.15.0 diff --git a/codex-lens/src/codexlens/cli/commands.py b/codex-lens/src/codexlens/cli/commands.py index 5aa6860f..329e9dc7 100644 --- a/codex-lens/src/codexlens/cli/commands.py +++ b/codex-lens/src/codexlens/cli/commands.py @@ -35,8 +35,17 @@ from .output import ( app = typer.Typer(help="CodexLens CLI — local code indexing and search.") -def _configure_logging(verbose: bool) -> None: - level = logging.DEBUG if verbose else logging.INFO +def _configure_logging(verbose: bool, json_mode: bool = False) -> None: + """Configure logging level. + + In JSON mode, suppress INFO logs to keep stderr clean for error parsing. + Only WARNING and above are shown to avoid mixing logs with JSON output. + """ + if json_mode and not verbose: + # In JSON mode, suppress INFO logs to keep stderr clean + level = logging.WARNING + else: + level = logging.DEBUG if verbose else logging.INFO logging.basicConfig(level=level, format="%(levelname)s %(message)s") @@ -95,7 +104,7 @@ def init( If semantic search dependencies are installed, automatically generates embeddings after indexing completes. Use --no-embeddings to skip this step. """ - _configure_logging(verbose) + _configure_logging(verbose, json_mode) config = Config() languages = _parse_languages(language) base_path = path.expanduser().resolve() @@ -314,7 +323,7 @@ def search( # Force hybrid mode codexlens search "authentication" --mode hybrid """ - _configure_logging(verbose) + _configure_logging(verbose, json_mode) search_path = path.expanduser().resolve() # Validate mode @@ -487,7 +496,7 @@ def symbol( verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."), ) -> None: """Look up symbols by name and optional kind.""" - _configure_logging(verbose) + _configure_logging(verbose, json_mode) search_path = path.expanduser().resolve() registry: RegistryStore | None = None @@ -538,7 +547,7 @@ def inspect( verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."), ) -> None: """Analyze a single file and display symbols.""" - _configure_logging(verbose) + _configure_logging(verbose, json_mode) config = Config() factory = ParserFactory(config) @@ -588,7 +597,7 @@ def status( verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."), ) -> None: """Show index status and configuration.""" - _configure_logging(verbose) + _configure_logging(verbose, json_mode) registry: RegistryStore | None = None try: @@ -648,7 +657,7 @@ def status( # Embedding manager not available pass except Exception as e: - logger.debug(f"Failed to get embeddings status: {e}") + logging.debug(f"Failed to get embeddings status: {e}") stats = { "index_root": str(index_root), @@ -737,7 +746,7 @@ def projects( - show : Show details for a specific project - remove : Remove a project from the registry """ - _configure_logging(verbose) + _configure_logging(verbose, json_mode) registry: RegistryStore | None = None try: @@ -892,7 +901,7 @@ def config( Config keys: - index_dir: Directory to store indexes (default: ~/.codexlens/indexes) """ - _configure_logging(verbose) + _configure_logging(verbose, json_mode) config_file = Path.home() / ".codexlens" / "config.json" @@ -1057,7 +1066,7 @@ def migrate( This is a safe operation that preserves all existing data. Progress is shown during migration. """ - _configure_logging(verbose) + _configure_logging(verbose, json_mode) base_path = path.expanduser().resolve() registry: RegistryStore | None = None @@ -1183,7 +1192,7 @@ def clean( With path, removes that project's indexes. With --all, removes all indexes (use with caution). """ - _configure_logging(verbose) + _configure_logging(verbose, json_mode) try: mapper = PathMapper() @@ -1329,7 +1338,7 @@ def semantic_list( Shows files that have LLM-generated summaries and keywords. Results are aggregated from all index databases in the project. """ - _configure_logging(verbose) + _configure_logging(verbose, json_mode) base_path = path.expanduser().resolve() registry: Optional[RegistryStore] = None @@ -1798,7 +1807,7 @@ def embeddings_generate( codexlens embeddings-generate ~/.codexlens/indexes/project/_index.db # Specific index codexlens embeddings-generate ~/projects/my-app --model fast --force # Regenerate with fast model """ - _configure_logging(verbose) + _configure_logging(verbose, json_mode) from codexlens.cli.embedding_manager import generate_embeddings, generate_embeddings_recursive diff --git a/codex-lens/src/codexlens/cli/embedding_manager.py b/codex-lens/src/codexlens/cli/embedding_manager.py index 68797688..ff9c8640 100644 --- a/codex-lens/src/codexlens/cli/embedding_manager.py +++ b/codex-lens/src/codexlens/cli/embedding_manager.py @@ -279,6 +279,21 @@ def generate_embeddings( try: with VectorStore(index_path) as vector_store: + # Check model compatibility with existing embeddings + if not force: + is_compatible, warning = vector_store.check_model_compatibility( + model_profile, embedder.model_name, embedder.embedding_dim + ) + if not is_compatible: + return { + "success": False, + "error": warning, + } + + # Set/update model configuration for this index + vector_store.set_model_config( + model_profile, embedder.model_name, embedder.embedding_dim + ) # Use bulk insert mode for efficient batch ANN index building # This defers ANN updates until end_bulk_insert() is called with vector_store.bulk_insert(): diff --git a/codex-lens/src/codexlens/cli/model_manager.py b/codex-lens/src/codexlens/cli/model_manager.py index d401fa5a..5369a2d5 100644 --- a/codex-lens/src/codexlens/cli/model_manager.py +++ b/codex-lens/src/codexlens/cli/model_manager.py @@ -16,9 +16,11 @@ except ImportError: # Model profiles with metadata # Note: 768d is max recommended dimension for optimal performance/quality balance # 1024d models are available but not recommended due to higher resource usage +# cache_name: The actual Hugging Face repo name used by fastembed for ONNX caching MODEL_PROFILES = { "fast": { "model_name": "BAAI/bge-small-en-v1.5", + "cache_name": "qdrant/bge-small-en-v1.5-onnx-q", # fastembed uses ONNX version "dimensions": 384, "size_mb": 80, "description": "Fast, lightweight, English-optimized", @@ -27,6 +29,7 @@ MODEL_PROFILES = { }, "base": { "model_name": "BAAI/bge-base-en-v1.5", + "cache_name": "qdrant/bge-base-en-v1.5-onnx-q", # fastembed uses ONNX version "dimensions": 768, "size_mb": 220, "description": "General purpose, good balance of speed and quality", @@ -35,6 +38,7 @@ MODEL_PROFILES = { }, "code": { "model_name": "jinaai/jina-embeddings-v2-base-code", + "cache_name": "jinaai/jina-embeddings-v2-base-code", # Uses original name "dimensions": 768, "size_mb": 150, "description": "Code-optimized, best for programming languages", @@ -43,6 +47,7 @@ MODEL_PROFILES = { }, "minilm": { "model_name": "sentence-transformers/all-MiniLM-L6-v2", + "cache_name": "qdrant/all-MiniLM-L6-v2-onnx", # fastembed uses ONNX version "dimensions": 384, "size_mb": 90, "description": "Popular lightweight model, good quality", @@ -51,6 +56,7 @@ MODEL_PROFILES = { }, "multilingual": { "model_name": "intfloat/multilingual-e5-large", + "cache_name": "qdrant/multilingual-e5-large-onnx", # fastembed uses ONNX version "dimensions": 1024, "size_mb": 1000, "description": "Multilingual + code support (high resource usage)", @@ -59,6 +65,7 @@ MODEL_PROFILES = { }, "balanced": { "model_name": "mixedbread-ai/mxbai-embed-large-v1", + "cache_name": "mixedbread-ai/mxbai-embed-large-v1", # Uses original name "dimensions": 1024, "size_mb": 600, "description": "High accuracy, general purpose (high resource usage)", @@ -87,6 +94,23 @@ def get_cache_dir() -> Path: return cache_dir +def _get_model_cache_path(cache_dir: Path, info: Dict) -> Path: + """Get the actual cache path for a model. + + fastembed uses ONNX versions of models with different names than the original. + This function returns the correct path based on the cache_name field. + + Args: + cache_dir: The fastembed cache directory + info: Model profile info dictionary + + Returns: + Path to the model cache directory + """ + cache_name = info.get("cache_name", info["model_name"]) + return cache_dir / f"models--{cache_name.replace('/', '--')}" + + def list_models() -> Dict[str, any]: """List available model profiles and their installation status. @@ -106,13 +130,13 @@ def list_models() -> Dict[str, any]: for profile, info in MODEL_PROFILES.items(): model_name = info["model_name"] - # Check if model is cached + # Check if model is cached using the actual cache name installed = False cache_size_mb = 0 if cache_exists: - # Check for model directory in cache - model_cache_path = cache_dir / f"models--{model_name.replace('/', '--')}" + # Check for model directory in cache using correct cache_name + model_cache_path = _get_model_cache_path(cache_dir, info) if model_cache_path.exists(): installed = True # Calculate cache size @@ -166,7 +190,8 @@ def download_model(profile: str, progress_callback: Optional[callable] = None) - "error": f"Unknown profile: {profile}. Available: {', '.join(MODEL_PROFILES.keys())}", } - model_name = MODEL_PROFILES[profile]["model_name"] + info = MODEL_PROFILES[profile] + model_name = info["model_name"] try: # Download model by instantiating TextEmbedding @@ -179,9 +204,9 @@ def download_model(profile: str, progress_callback: Optional[callable] = None) - if progress_callback: progress_callback(f"Model {model_name} downloaded successfully") - # Get cache info + # Get cache info using correct cache_name cache_dir = get_cache_dir() - model_cache_path = cache_dir / f"models--{model_name.replace('/', '--')}" + model_cache_path = _get_model_cache_path(cache_dir, info) cache_size = 0 if model_cache_path.exists(): @@ -224,9 +249,10 @@ def delete_model(profile: str) -> Dict[str, any]: "error": f"Unknown profile: {profile}. Available: {', '.join(MODEL_PROFILES.keys())}", } - model_name = MODEL_PROFILES[profile]["model_name"] + info = MODEL_PROFILES[profile] + model_name = info["model_name"] cache_dir = get_cache_dir() - model_cache_path = cache_dir / f"models--{model_name.replace('/', '--')}" + model_cache_path = _get_model_cache_path(cache_dir, info) if not model_cache_path.exists(): return { @@ -281,9 +307,9 @@ def get_model_info(profile: str) -> Dict[str, any]: info = MODEL_PROFILES[profile] model_name = info["model_name"] - # Check installation status + # Check installation status using correct cache_name cache_dir = get_cache_dir() - model_cache_path = cache_dir / f"models--{model_name.replace('/', '--')}" + model_cache_path = _get_model_cache_path(cache_dir, info) installed = model_cache_path.exists() cache_size_mb = None diff --git a/codex-lens/src/codexlens/search/chain_search.py b/codex-lens/src/codexlens/search/chain_search.py index 33e37bff..3d741a8c 100644 --- a/codex-lens/src/codexlens/search/chain_search.py +++ b/codex-lens/src/codexlens/search/chain_search.py @@ -396,7 +396,20 @@ class ChainSearchEngine: all_results = [] stats = SearchStats() - executor = self._get_executor(options.max_workers) + # Force single-threaded execution for vector/hybrid search to avoid GPU crashes + # DirectML/ONNX have threading issues when multiple threads access GPU resources + effective_workers = options.max_workers + if options.enable_vector or options.hybrid_mode: + effective_workers = 1 + self.logger.debug("Using single-threaded mode for vector search (GPU safety)") + # Pre-load embedder to avoid initialization overhead per-search + try: + from codexlens.semantic.embedder import get_embedder + get_embedder(profile="code", use_gpu=True) + except Exception: + pass # Ignore pre-load failures + + executor = self._get_executor(effective_workers) # Submit all search tasks future_to_path = { executor.submit( diff --git a/codex-lens/src/codexlens/search/hybrid_search.py b/codex-lens/src/codexlens/search/hybrid_search.py index 726dec88..a603c06e 100644 --- a/codex-lens/src/codexlens/search/hybrid_search.py +++ b/codex-lens/src/codexlens/search/hybrid_search.py @@ -274,19 +274,32 @@ class HybridSearchEngine: ) return [] - # Auto-detect embedding dimension and select appropriate profile - detected_dim = vector_store.dimension - if detected_dim is None: - self.logger.info("Vector store dimension unknown, using default profile") - profile = "code" # Default fallback - elif detected_dim == 384: - profile = "fast" - elif detected_dim == 768: - profile = "code" - elif detected_dim == 1024: - profile = "multilingual" # or balanced, both are 1024 + # Get stored model configuration (preferred) or auto-detect from dimension + model_config = vector_store.get_model_config() + if model_config: + profile = model_config["model_profile"] + self.logger.debug( + "Using stored model config: %s (%s, %dd)", + profile, model_config["model_name"], model_config["embedding_dim"] + ) else: - profile = "code" # Default fallback + # Fallback: auto-detect from embedding dimension + detected_dim = vector_store.dimension + if detected_dim is None: + self.logger.info("Vector store dimension unknown, using default profile") + profile = "code" # Default fallback + elif detected_dim == 384: + profile = "fast" + elif detected_dim == 768: + profile = "code" + elif detected_dim == 1024: + profile = "multilingual" # or balanced, both are 1024 + else: + profile = "code" # Default fallback + self.logger.debug( + "No stored model config, auto-detected profile '%s' from dimension %s", + profile, detected_dim + ) # Use cached embedder (singleton) for performance embedder = get_embedder(profile=profile) diff --git a/codex-lens/src/codexlens/semantic/vector_store.py b/codex-lens/src/codexlens/semantic/vector_store.py index 0299f68b..9f86cce0 100644 --- a/codex-lens/src/codexlens/semantic/vector_store.py +++ b/codex-lens/src/codexlens/semantic/vector_store.py @@ -116,6 +116,17 @@ class VectorStore: CREATE INDEX IF NOT EXISTS idx_chunks_file ON semantic_chunks(file_path) """) + # Model configuration table - tracks which model generated the embeddings + conn.execute(""" + CREATE TABLE IF NOT EXISTS embeddings_config ( + id INTEGER PRIMARY KEY CHECK (id = 1), + model_profile TEXT NOT NULL, + model_name TEXT NOT NULL, + embedding_dim INTEGER NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + """) conn.commit() def _init_ann_index(self) -> None: @@ -932,6 +943,92 @@ class VectorStore: return self._ann_index.count() return 0 + def get_model_config(self) -> Optional[Dict[str, Any]]: + """Get the model configuration used for embeddings in this store. + + Returns: + Dictionary with model_profile, model_name, embedding_dim, or None if not set. + """ + with sqlite3.connect(self.db_path) as conn: + row = conn.execute( + "SELECT model_profile, model_name, embedding_dim, created_at, updated_at " + "FROM embeddings_config WHERE id = 1" + ).fetchone() + if row: + return { + "model_profile": row[0], + "model_name": row[1], + "embedding_dim": row[2], + "created_at": row[3], + "updated_at": row[4], + } + return None + + def set_model_config( + self, model_profile: str, model_name: str, embedding_dim: int + ) -> None: + """Set the model configuration for embeddings in this store. + + This should be called when generating new embeddings. If a different + model was previously used, this will update the configuration. + + Args: + model_profile: Model profile name (fast, code, minilm, etc.) + model_name: Full model name (e.g., jinaai/jina-embeddings-v2-base-code) + embedding_dim: Embedding dimension (e.g., 768) + """ + with sqlite3.connect(self.db_path) as conn: + conn.execute( + """ + INSERT INTO embeddings_config (id, model_profile, model_name, embedding_dim) + VALUES (1, ?, ?, ?) + ON CONFLICT(id) DO UPDATE SET + model_profile = excluded.model_profile, + model_name = excluded.model_name, + embedding_dim = excluded.embedding_dim, + updated_at = CURRENT_TIMESTAMP + """, + (model_profile, model_name, embedding_dim) + ) + conn.commit() + + def check_model_compatibility( + self, model_profile: str, model_name: str, embedding_dim: int + ) -> Tuple[bool, Optional[str]]: + """Check if the given model is compatible with existing embeddings. + + Args: + model_profile: Model profile to check + model_name: Model name to check + embedding_dim: Embedding dimension to check + + Returns: + Tuple of (is_compatible, warning_message). + is_compatible is True if no existing config or configs match. + warning_message is a user-friendly message if incompatible. + """ + existing = self.get_model_config() + if existing is None: + return True, None + + # Check dimension first (most critical) + if existing["embedding_dim"] != embedding_dim: + return False, ( + f"Dimension mismatch: existing embeddings use {existing['embedding_dim']}d " + f"({existing['model_profile']}), but requested model uses {embedding_dim}d " + f"({model_profile}). Use --force to regenerate all embeddings." + ) + + # Check model (different models with same dimension may have different semantic spaces) + if existing["model_profile"] != model_profile: + return False, ( + f"Model mismatch: existing embeddings use '{existing['model_profile']}' " + f"({existing['model_name']}), but requested '{model_profile}' " + f"({model_name}). Use --force to regenerate all embeddings." + ) + + return True, None + def close(self) -> None: """Close the vector store and release resources. diff --git a/package.json b/package.json index 10ee66e4..beca6410 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "claude-code-workflow", - "version": "6.2.6", + "version": "6.2.7", "description": "JSON-driven multi-agent development framework with intelligent CLI orchestration (Gemini/Qwen/Codex), context-first architecture, and automated workflow execution", "type": "module", "main": "ccw/src/index.js",