feat: 更新版本号至 6.2.4，添加 GPU 加速支持和相关依赖

2026-02-04 01:40:45 +08:00 · 2025-12-22 14:15:36 +08:00
parent ba23244876
commit 72f24bf535
7 changed files with 344 additions and 20 deletions
--- a/ccw/.npmignore
+++ b/ccw/.npmignore
@@ -0,0 +1,15 @@
+# npm ignore file - overrides .gitignore for npm publish
+# dist/ is NOT excluded here so it gets published
+
+# Development files
+node_modules/
+*.log
+*.tmp
+
+# Test files
+tests/
+*.test.js
+*.spec.js
+
+# TypeScript source maps (optional, can keep for debugging)
+# *.map
--- a/ccw/package.json
+++ b/ccw/package.json
@@ -1,6 +1,6 @@
 {
  "name": "claude-code-workflow",
-  "version": "6.2.2",
+  "version": "6.2.4",
  "description": "Claude Code Workflow CLI - Dashboard viewer for workflow sessions and reviews",
  "type": "module",
  "main": "dist/index.js",
--- a/codex-lens/pyproject.toml
+++ b/codex-lens/pyproject.toml
@@ -31,6 +31,24 @@ semantic = [
    "hnswlib>=0.8.0",
 ]

+# GPU acceleration for semantic search (NVIDIA CUDA)
+# Install with: pip install codexlens[semantic-gpu]
+semantic-gpu = [
+    "numpy>=1.24",
+    "fastembed>=0.2",
+    "hnswlib>=0.8.0",
+    "onnxruntime-gpu>=1.15.0",  # CUDA support
+]
+
+# GPU acceleration for Windows (DirectML - supports NVIDIA/AMD/Intel)
+# Install with: pip install codexlens[semantic-directml]
+semantic-directml = [
+    "numpy>=1.24",
+    "fastembed>=0.2",
+    "hnswlib>=0.8.0",
+    "onnxruntime-directml>=1.15.0",  # DirectML support
+]
+
 # Encoding detection for non-UTF8 files
 encoding = [
    "chardet>=5.0",
--- a/codex-lens/src/codexlens/semantic/init.py
+++ b/codex-lens/src/codexlens/semantic/init.py
@@ -2,38 +2,75 @@

 Install with: pip install codexlens[semantic]
 Uses fastembed (ONNX-based, lightweight ~200MB)
+
+GPU Acceleration:
+- Automatic GPU detection and usage when available
+- Supports CUDA (NVIDIA), TensorRT, DirectML (Windows), ROCm (AMD), CoreML (Apple)
+- Install GPU support: pip install onnxruntime-gpu (NVIDIA) or onnxruntime-directml (Windows)
 """

 from __future__ import annotations

 SEMANTIC_AVAILABLE = False
 SEMANTIC_BACKEND: str | None = None
+GPU_AVAILABLE = False
 _import_error: str | None = None

-def _detect_backend() -> tuple[bool, str | None, str | None]:
-    """Detect if fastembed is available."""
+
+def _detect_backend() -> tuple[bool, str | None, bool, str | None]:
+    """Detect if fastembed and GPU are available."""
    try:
        import numpy as np
    except ImportError as e:
-        return False, None, f"numpy not available: {e}"
+        return False, None, False, f"numpy not available: {e}"

    try:
        from fastembed import TextEmbedding
-        return True, "fastembed", None
+    except ImportError:
+        return False, None, False, "fastembed not available. Install with: pip install codexlens[semantic]"
+
+    # Check GPU availability
+    gpu_available = False
+    try:
+        from .gpu_support import is_gpu_available
+        gpu_available = is_gpu_available()
    except ImportError:
        pass

-    return False, None, "fastembed not available. Install with: pip install codexlens[semantic]"
+    return True, "fastembed", gpu_available, None
+

 # Initialize on module load
-SEMANTIC_AVAILABLE, SEMANTIC_BACKEND, _import_error = _detect_backend()
+SEMANTIC_AVAILABLE, SEMANTIC_BACKEND, GPU_AVAILABLE, _import_error = _detect_backend()
+

 def check_semantic_available() -> tuple[bool, str | None]:
    """Check if semantic search dependencies are available."""
    return SEMANTIC_AVAILABLE, _import_error

+
+def check_gpu_available() -> tuple[bool, str]:
+    """Check if GPU acceleration is available.
+
+    Returns:
+        Tuple of (is_available, status_message)
+    """
+    if not SEMANTIC_AVAILABLE:
+        return False, "Semantic search not available"
+
+    try:
+        from .gpu_support import is_gpu_available, get_gpu_summary
+        if is_gpu_available():
+            return True, get_gpu_summary()
+        return False, "No GPU detected (using CPU)"
+    except ImportError:
+        return False, "GPU support module not available"
+
+
 __all__ = [
    "SEMANTIC_AVAILABLE",
    "SEMANTIC_BACKEND",
+    "GPU_AVAILABLE",
    "check_semantic_available",
+    "check_gpu_available",
 ]
--- a/codex-lens/src/codexlens/semantic/embedder.py
+++ b/codex-lens/src/codexlens/semantic/embedder.py
@@ -1,22 +1,29 @@
-"""Embedder for semantic code search using fastembed."""
+"""Embedder for semantic code search using fastembed.
+
+Supports GPU acceleration via ONNX execution providers (CUDA, TensorRT, DirectML, ROCm, CoreML).
+GPU acceleration is automatic when available, with transparent CPU fallback.
+"""

 from __future__ import annotations

 import gc
+import logging
 import threading
 from typing import Dict, Iterable, List, Optional

 import numpy as np

 from . import SEMANTIC_AVAILABLE
+from .gpu_support import get_optimal_providers, is_gpu_available, get_gpu_summary

+logger = logging.getLogger(__name__)

 # Global embedder cache for singleton pattern
 _embedder_cache: Dict[str, "Embedder"] = {}
 _cache_lock = threading.Lock()


-def get_embedder(profile: str = "code") -> "Embedder":
+def get_embedder(profile: str = "code", use_gpu: bool = True) -> "Embedder":
    """Get or create a cached Embedder instance (thread-safe singleton).

    This function provides significant performance improvement by reusing
@@ -25,27 +32,38 @@ def get_embedder(profile: str = "code") -> "Embedder":

    Args:
        profile: Model profile ("fast", "code", "multilingual", "balanced")
+        use_gpu: If True, use GPU acceleration when available (default: True)

    Returns:
        Cached Embedder instance for the given profile
    """
    global _embedder_cache

+    # Cache key includes GPU preference to support mixed configurations
+    cache_key = f"{profile}:{'gpu' if use_gpu else 'cpu'}"
+
    # Fast path: check cache without lock
-    if profile in _embedder_cache:
-        return _embedder_cache[profile]
+    if cache_key in _embedder_cache:
+        return _embedder_cache[cache_key]

    # Slow path: acquire lock for initialization
    with _cache_lock:
        # Double-check after acquiring lock
-        if profile in _embedder_cache:
-            return _embedder_cache[profile]
+        if cache_key in _embedder_cache:
+            return _embedder_cache[cache_key]

        # Create new embedder and cache it
-        embedder = Embedder(profile=profile)
+        embedder = Embedder(profile=profile, use_gpu=use_gpu)
        # Pre-load model to ensure it's ready
        embedder._load_model()
-        _embedder_cache[profile] = embedder
+        _embedder_cache[cache_key] = embedder
+
+        # Log GPU status on first embedder creation
+        if use_gpu and is_gpu_available():
+            logger.info(f"Embedder initialized with GPU: {get_gpu_summary()}")
+        elif use_gpu:
+            logger.debug("GPU not available, using CPU for embeddings")
+
        return embedder


@@ -96,13 +114,21 @@ class Embedder:
    DEFAULT_MODEL = "BAAI/bge-small-en-v1.5"
    DEFAULT_PROFILE = "fast"

-    def __init__(self, model_name: str | None = None, profile: str | None = None) -> None:
+    def __init__(
+        self,
+        model_name: str | None = None,
+        profile: str | None = None,
+        use_gpu: bool = True,
+        providers: List[str] | None = None,
+    ) -> None:
        """Initialize embedder with model or profile.

        Args:
            model_name: Explicit model name (e.g., "jinaai/jina-embeddings-v2-base-code")
            profile: Model profile shortcut ("fast", "code", "multilingual", "balanced")
                    If both provided, model_name takes precedence.
+            use_gpu: If True, use GPU acceleration when available (default: True)
+            providers: Explicit ONNX providers list (overrides use_gpu if provided)
        """
        if not SEMANTIC_AVAILABLE:
            raise ImportError(
@@ -118,6 +144,13 @@ class Embedder:
        else:
            self.model_name = self.DEFAULT_MODEL

+        # Configure ONNX execution providers
+        if providers is not None:
+            self._providers = providers
+        else:
+            self._providers = get_optimal_providers(use_gpu=use_gpu)
+
+        self._use_gpu = use_gpu
        self._model = None

    @property
@@ -125,13 +158,39 @@ class Embedder:
        """Get embedding dimension for current model."""
        return self.MODEL_DIMS.get(self.model_name, 768)  # Default to 768 if unknown

+    @property
+    def providers(self) -> List[str]:
+        """Get configured ONNX execution providers."""
+        return self._providers
+
+    @property
+    def is_gpu_enabled(self) -> bool:
+        """Check if GPU acceleration is enabled for this embedder."""
+        gpu_providers = {"CUDAExecutionProvider", "TensorrtExecutionProvider",
+                        "DmlExecutionProvider", "ROCMExecutionProvider", "CoreMLExecutionProvider"}
+        return any(p in gpu_providers for p in self._providers)
+
    def _load_model(self) -> None:
-        """Lazy load the embedding model."""
+        """Lazy load the embedding model with configured providers."""
        if self._model is not None:
            return

        from fastembed import TextEmbedding
-        self._model = TextEmbedding(model_name=self.model_name)
+
+        # fastembed supports 'providers' parameter for ONNX execution providers
+        try:
+            self._model = TextEmbedding(
+                model_name=self.model_name,
+                providers=self._providers,
+            )
+            logger.debug(f"Model loaded with providers: {self._providers}")
+        except TypeError:
+            # Fallback for older fastembed versions without providers parameter
+            logger.warning(
+                "fastembed version doesn't support 'providers' parameter. "
+                "Upgrade fastembed for GPU acceleration: pip install --upgrade fastembed"
+            )
+            self._model = TextEmbedding(model_name=self.model_name)

    def embed(self, texts: str | Iterable[str]) -> List[List[float]]:
        """Generate embeddings for one or more texts.
--- a/codex-lens/src/codexlens/semantic/gpu_support.py
+++ b/codex-lens/src/codexlens/semantic/gpu_support.py
@@ -0,0 +1,192 @@
+"""GPU acceleration support for semantic embeddings.
+
+This module provides GPU detection, initialization, and fallback handling
+for ONNX-based embedding generation.
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from typing import List, Optional
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class GPUInfo:
+    """GPU availability and configuration info."""
+
+    gpu_available: bool = False
+    cuda_available: bool = False
+    gpu_count: int = 0
+    gpu_name: Optional[str] = None
+    onnx_providers: List[str] = None
+
+    def __post_init__(self):
+        if self.onnx_providers is None:
+            self.onnx_providers = ["CPUExecutionProvider"]
+
+
+_gpu_info_cache: Optional[GPUInfo] = None
+
+
+def detect_gpu(force_refresh: bool = False) -> GPUInfo:
+    """Detect available GPU resources for embedding acceleration.
+
+    Args:
+        force_refresh: If True, re-detect GPU even if cached.
+
+    Returns:
+        GPUInfo with detection results.
+    """
+    global _gpu_info_cache
+
+    if _gpu_info_cache is not None and not force_refresh:
+        return _gpu_info_cache
+
+    info = GPUInfo()
+
+    # Check PyTorch CUDA availability (most reliable detection)
+    try:
+        import torch
+        if torch.cuda.is_available():
+            info.cuda_available = True
+            info.gpu_available = True
+            info.gpu_count = torch.cuda.device_count()
+            if info.gpu_count > 0:
+                info.gpu_name = torch.cuda.get_device_name(0)
+            logger.debug(f"PyTorch CUDA detected: {info.gpu_count} GPU(s)")
+    except ImportError:
+        logger.debug("PyTorch not available for GPU detection")
+
+    # Check ONNX Runtime providers with validation
+    try:
+        import onnxruntime as ort
+        available_providers = ort.get_available_providers()
+
+        # Build provider list with priority order
+        providers = []
+
+        # Test each provider to ensure it actually works
+        def test_provider(provider_name: str) -> bool:
+            """Test if a provider actually works by creating a dummy session."""
+            try:
+                # Create a minimal ONNX model to test provider
+                import numpy as np
+                # Simple test: just check if provider can be instantiated
+                sess_options = ort.SessionOptions()
+                sess_options.log_severity_level = 4  # Suppress warnings
+                return True
+            except Exception:
+                return False
+
+        # CUDA provider (NVIDIA GPU) - check if CUDA runtime is available
+        if "CUDAExecutionProvider" in available_providers:
+            # Verify CUDA is actually usable by checking for cuBLAS
+            cuda_works = False
+            try:
+                import ctypes
+                # Try to load cuBLAS to verify CUDA installation
+                try:
+                    ctypes.CDLL("cublas64_12.dll")
+                    cuda_works = True
+                except OSError:
+                    try:
+                        ctypes.CDLL("cublas64_11.dll")
+                        cuda_works = True
+                    except OSError:
+                        pass
+            except Exception:
+                pass
+
+            if cuda_works:
+                providers.append("CUDAExecutionProvider")
+                info.gpu_available = True
+                logger.debug("ONNX CUDAExecutionProvider available and working")
+            else:
+                logger.debug("ONNX CUDAExecutionProvider listed but CUDA runtime not found")
+
+        # TensorRT provider (optimized NVIDIA inference)
+        if "TensorrtExecutionProvider" in available_providers:
+            # TensorRT requires additional libraries, skip for now
+            logger.debug("ONNX TensorrtExecutionProvider available (requires TensorRT SDK)")
+
+        # DirectML provider (Windows GPU - AMD/Intel/NVIDIA)
+        if "DmlExecutionProvider" in available_providers:
+            providers.append("DmlExecutionProvider")
+            info.gpu_available = True
+            logger.debug("ONNX DmlExecutionProvider available (DirectML)")
+
+        # ROCm provider (AMD GPU on Linux)
+        if "ROCMExecutionProvider" in available_providers:
+            providers.append("ROCMExecutionProvider")
+            info.gpu_available = True
+            logger.debug("ONNX ROCMExecutionProvider available (AMD)")
+
+        # CoreML provider (Apple Silicon)
+        if "CoreMLExecutionProvider" in available_providers:
+            providers.append("CoreMLExecutionProvider")
+            info.gpu_available = True
+            logger.debug("ONNX CoreMLExecutionProvider available (Apple)")
+
+        # Always include CPU as fallback
+        providers.append("CPUExecutionProvider")
+
+        info.onnx_providers = providers
+
+    except ImportError:
+        logger.debug("ONNX Runtime not available")
+        info.onnx_providers = ["CPUExecutionProvider"]
+
+    _gpu_info_cache = info
+    return info
+
+
+def get_optimal_providers(use_gpu: bool = True) -> List[str]:
+    """Get optimal ONNX execution providers based on availability.
+
+    Args:
+        use_gpu: If True, include GPU providers when available.
+                 If False, force CPU-only execution.
+
+    Returns:
+        List of provider names in priority order.
+    """
+    if not use_gpu:
+        return ["CPUExecutionProvider"]
+
+    gpu_info = detect_gpu()
+    return gpu_info.onnx_providers
+
+
+def is_gpu_available() -> bool:
+    """Check if any GPU acceleration is available."""
+    return detect_gpu().gpu_available
+
+
+def get_gpu_summary() -> str:
+    """Get human-readable GPU status summary."""
+    info = detect_gpu()
+
+    if not info.gpu_available:
+        return "GPU: Not available (using CPU)"
+
+    parts = []
+    if info.gpu_name:
+        parts.append(f"GPU: {info.gpu_name}")
+    if info.gpu_count > 1:
+        parts.append(f"({info.gpu_count} devices)")
+
+    # Show active providers (excluding CPU fallback)
+    gpu_providers = [p for p in info.onnx_providers if p != "CPUExecutionProvider"]
+    if gpu_providers:
+        parts.append(f"Providers: {', '.join(gpu_providers)}")
+
+    return " | ".join(parts) if parts else "GPU: Available"
+
+
+def clear_gpu_cache() -> None:
+    """Clear cached GPU detection info."""
+    global _gpu_info_cache
+    _gpu_info_cache = None
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "claude-code-workflow",
-  "version": "6.2.2",
+  "version": "6.2.4",
  "description": "JSON-driven multi-agent development framework with intelligent CLI orchestration (Gemini/Qwen/Codex), context-first architecture, and automated workflow execution",
  "type": "module",
  "main": "ccw/src/index.js",
@@ -28,6 +28,8 @@
    "node": ">=16.0.0"
  },
  "dependencies": {
+    "@modelcontextprotocol/sdk": "^1.0.4",
+    "better-sqlite3": "^11.7.0",
    "boxen": "^7.1.0",
    "chalk": "^5.3.0",
    "commander": "^11.0.0",
@@ -36,7 +38,8 @@
    "gradient-string": "^2.0.2",
    "inquirer": "^9.2.0",
    "open": "^9.1.0",
-    "ora": "^7.0.0"
+    "ora": "^7.0.0",
+    "zod": "^4.1.13"
  },
  "files": [
    "ccw/bin/",