feat: 更新版本号至 6.2.4，添加 GPU 加速支持和相关依赖

2026-02-11 02:33:51 +08:00 · 2025-12-22 14:15:36 +08:00
parent ba23244876
commit 72f24bf535
7 changed files with 344 additions and 20 deletions
--- a/ccw/.npmignore
+++ b/ccw/.npmignore
@@ -0,0 +1,15 @@
 # npm ignore file - overrides .gitignore for npm publish
 # dist/ is NOT excluded here so it gets published
 # Development files
 node_modules/
 *.log
 *.tmp
 # Test files
 tests/
 *.test.js
 *.spec.js
 # TypeScript source maps (optional, can keep for debugging)
 # *.map
--- a/ccw/package.json
+++ b/ccw/package.json
@@ -1,6 +1,6 @@
 {
  "name": "claude-code-workflow",
-  "version": "6.2.2",
+  "version": "6.2.4",
  "description": "Claude Code Workflow CLI - Dashboard viewer for workflow sessions and reviews",
  "type": "module",
  "main": "dist/index.js",
--- a/codex-lens/pyproject.toml
+++ b/codex-lens/pyproject.toml
@@ -31,6 +31,24 @@ semantic = [
    "hnswlib>=0.8.0",
 ]
 # GPU acceleration for semantic search (NVIDIA CUDA)
 # Install with: pip install codexlens[semantic-gpu]
 semantic-gpu = [
    "numpy>=1.24",
    "fastembed>=0.2",
    "hnswlib>=0.8.0",
    "onnxruntime-gpu>=1.15.0",  # CUDA support
 ]
 # GPU acceleration for Windows (DirectML - supports NVIDIA/AMD/Intel)
 # Install with: pip install codexlens[semantic-directml]
 semantic-directml = [
    "numpy>=1.24",
    "fastembed>=0.2",
    "hnswlib>=0.8.0",
    "onnxruntime-directml>=1.15.0",  # DirectML support
 ]
 # Encoding detection for non-UTF8 files
 encoding = [
    "chardet>=5.0",
--- a/codex-lens/src/codexlens/semantic/init.py
+++ b/codex-lens/src/codexlens/semantic/init.py
@@ -2,38 +2,75 @@
 Install with: pip install codexlens[semantic]
 Uses fastembed (ONNX-based, lightweight ~200MB)
 GPU Acceleration:
 - Automatic GPU detection and usage when available
 - Supports CUDA (NVIDIA), TensorRT, DirectML (Windows), ROCm (AMD), CoreML (Apple)
 - Install GPU support: pip install onnxruntime-gpu (NVIDIA) or onnxruntime-directml (Windows)
 """
 from __future__ import annotations
 SEMANTIC_AVAILABLE = False
 SEMANTIC_BACKEND: str | None = None
 GPU_AVAILABLE = False
 _import_error: str | None = None
-def _detect_backend() -> tuple[bool, str | None, str | None]:
+
-    """Detect if fastembed is available."""
+def _detect_backend() -> tuple[bool, str | None, bool, str | None]:
    """Detect if fastembed and GPU are available."""
    try:
        import numpy as np
    except ImportError as e:
-        return False, None, f"numpy not available: {e}"
+        return False, None, False, f"numpy not available: {e}"
    try:
        from fastembed import TextEmbedding
-        return True, "fastembed", None
+    except ImportError:
        return False, None, False, "fastembed not available. Install with: pip install codexlens[semantic]"
    # Check GPU availability
    gpu_available = False
    try:
        from .gpu_support import is_gpu_available
        gpu_available = is_gpu_available()
    except ImportError:
        pass
-    return False, None, "fastembed not available. Install with: pip install codexlens[semantic]"
+    return True, "fastembed", gpu_available, None
 # Initialize on module load
-SEMANTIC_AVAILABLE, SEMANTIC_BACKEND, _import_error = _detect_backend()
+SEMANTIC_AVAILABLE, SEMANTIC_BACKEND, GPU_AVAILABLE, _import_error = _detect_backend()
 def check_semantic_available() -> tuple[bool, str | None]:
    """Check if semantic search dependencies are available."""
    return SEMANTIC_AVAILABLE, _import_error
 def check_gpu_available() -> tuple[bool, str]:
    """Check if GPU acceleration is available.
    Returns:
        Tuple of (is_available, status_message)
    """
    if not SEMANTIC_AVAILABLE:
        return False, "Semantic search not available"
    try:
        from .gpu_support import is_gpu_available, get_gpu_summary
        if is_gpu_available():
            return True, get_gpu_summary()
        return False, "No GPU detected (using CPU)"
    except ImportError:
        return False, "GPU support module not available"
 __all__ = [
    "SEMANTIC_AVAILABLE",
    "SEMANTIC_BACKEND",
    "GPU_AVAILABLE",
    "check_semantic_available",
    "check_gpu_available",
 ]
--- a/codex-lens/src/codexlens/semantic/embedder.py
+++ b/codex-lens/src/codexlens/semantic/embedder.py
@@ -1,22 +1,29 @@
-"""Embedder for semantic code search using fastembed."""
+"""Embedder for semantic code search using fastembed.
 Supports GPU acceleration via ONNX execution providers (CUDA, TensorRT, DirectML, ROCm, CoreML).
 GPU acceleration is automatic when available, with transparent CPU fallback.
 """
 from __future__ import annotations
 import gc
 import logging
 import threading
 from typing import Dict, Iterable, List, Optional
 import numpy as np
 from . import SEMANTIC_AVAILABLE
 from .gpu_support import get_optimal_providers, is_gpu_available, get_gpu_summary
 logger = logging.getLogger(__name__)
 # Global embedder cache for singleton pattern
 _embedder_cache: Dict[str, "Embedder"] = {}
 _cache_lock = threading.Lock()
-def get_embedder(profile: str = "code") -> "Embedder":
+def get_embedder(profile: str = "code", use_gpu: bool = True) -> "Embedder":
    """Get or create a cached Embedder instance (thread-safe singleton).
    This function provides significant performance improvement by reusing
@@ -25,27 +32,38 @@ def get_embedder(profile: str = "code") -> "Embedder":
    Args:
        profile: Model profile ("fast", "code", "multilingual", "balanced")
        use_gpu: If True, use GPU acceleration when available (default: True)
    Returns:
        Cached Embedder instance for the given profile
    """
    global _embedder_cache
    # Cache key includes GPU preference to support mixed configurations
    cache_key = f"{profile}:{'gpu' if use_gpu else 'cpu'}"
    # Fast path: check cache without lock
-    if profile in _embedder_cache:
+    if cache_key in _embedder_cache:
-        return _embedder_cache[profile]
+        return _embedder_cache[cache_key]
    # Slow path: acquire lock for initialization
    with _cache_lock:
        # Double-check after acquiring lock
-        if profile in _embedder_cache:
+        if cache_key in _embedder_cache:
-            return _embedder_cache[profile]
+            return _embedder_cache[cache_key]
        # Create new embedder and cache it
-        embedder = Embedder(profile=profile)
+        embedder = Embedder(profile=profile, use_gpu=use_gpu)
        # Pre-load model to ensure it's ready
        embedder._load_model()
-        _embedder_cache[profile] = embedder
+        _embedder_cache[cache_key] = embedder
        # Log GPU status on first embedder creation
        if use_gpu and is_gpu_available():
            logger.info(f"Embedder initialized with GPU: {get_gpu_summary()}")
        elif use_gpu:
            logger.debug("GPU not available, using CPU for embeddings")
        return embedder
@@ -96,13 +114,21 @@ class Embedder:
    DEFAULT_MODEL = "BAAI/bge-small-en-v1.5"
    DEFAULT_PROFILE = "fast"
-    def __init__(self, model_name: str | None = None, profile: str | None = None) -> None:
+    def __init__(
        self,
        model_name: str | None = None,
        profile: str | None = None,
        use_gpu: bool = True,
        providers: List[str] | None = None,
    ) -> None:
        """Initialize embedder with model or profile.
        Args:
            model_name: Explicit model name (e.g., "jinaai/jina-embeddings-v2-base-code")
            profile: Model profile shortcut ("fast", "code", "multilingual", "balanced")
                    If both provided, model_name takes precedence.
            use_gpu: If True, use GPU acceleration when available (default: True)
            providers: Explicit ONNX providers list (overrides use_gpu if provided)
        """
        if not SEMANTIC_AVAILABLE:
            raise ImportError(
@@ -118,6 +144,13 @@ class Embedder:
        else:
            self.model_name = self.DEFAULT_MODEL
        # Configure ONNX execution providers
        if providers is not None:
            self._providers = providers
        else:
            self._providers = get_optimal_providers(use_gpu=use_gpu)
        self._use_gpu = use_gpu
        self._model = None
    @property
@@ -125,13 +158,39 @@ class Embedder:
        """Get embedding dimension for current model."""
        return self.MODEL_DIMS.get(self.model_name, 768)  # Default to 768 if unknown
    @property
    def providers(self) -> List[str]:
        """Get configured ONNX execution providers."""
        return self._providers
    @property
    def is_gpu_enabled(self) -> bool:
        """Check if GPU acceleration is enabled for this embedder."""
        gpu_providers = {"CUDAExecutionProvider", "TensorrtExecutionProvider",
                        "DmlExecutionProvider", "ROCMExecutionProvider", "CoreMLExecutionProvider"}
        return any(p in gpu_providers for p in self._providers)
    def _load_model(self) -> None:
-        """Lazy load the embedding model."""
+        """Lazy load the embedding model with configured providers."""
        if self._model is not None:
            return
        from fastembed import TextEmbedding
-        self._model = TextEmbedding(model_name=self.model_name)
+
        # fastembed supports 'providers' parameter for ONNX execution providers
        try:
            self._model = TextEmbedding(
                model_name=self.model_name,
                providers=self._providers,
            )
            logger.debug(f"Model loaded with providers: {self._providers}")
        except TypeError:
            # Fallback for older fastembed versions without providers parameter
            logger.warning(
                "fastembed version doesn't support 'providers' parameter. "
                "Upgrade fastembed for GPU acceleration: pip install --upgrade fastembed"
            )
            self._model = TextEmbedding(model_name=self.model_name)
    def embed(self, texts: str | Iterable[str]) -> List[List[float]]:
        """Generate embeddings for one or more texts.
--- a/codex-lens/src/codexlens/semantic/gpu_support.py
+++ b/codex-lens/src/codexlens/semantic/gpu_support.py
@@ -0,0 +1,192 @@
 """GPU acceleration support for semantic embeddings.
 This module provides GPU detection, initialization, and fallback handling
 for ONNX-based embedding generation.
 """
 from __future__ import annotations
 import logging
 from dataclasses import dataclass
 from typing import List, Optional
 logger = logging.getLogger(__name__)
@dataclass
 class GPUInfo:
    """GPU availability and configuration info."""
    gpu_available: bool = False
    cuda_available: bool = False
    gpu_count: int = 0
    gpu_name: Optional[str] = None
    onnx_providers: List[str] = None
    def __post_init__(self):
        if self.onnx_providers is None:
            self.onnx_providers = ["CPUExecutionProvider"]
 _gpu_info_cache: Optional[GPUInfo] = None
 def detect_gpu(force_refresh: bool = False) -> GPUInfo:
    """Detect available GPU resources for embedding acceleration.
    Args:
        force_refresh: If True, re-detect GPU even if cached.
    Returns:
        GPUInfo with detection results.
    """
    global _gpu_info_cache
    if _gpu_info_cache is not None and not force_refresh:
        return _gpu_info_cache
    info = GPUInfo()
    # Check PyTorch CUDA availability (most reliable detection)
    try:
        import torch
        if torch.cuda.is_available():
            info.cuda_available = True
            info.gpu_available = True
            info.gpu_count = torch.cuda.device_count()
            if info.gpu_count > 0:
                info.gpu_name = torch.cuda.get_device_name(0)
            logger.debug(f"PyTorch CUDA detected: {info.gpu_count} GPU(s)")
    except ImportError:
        logger.debug("PyTorch not available for GPU detection")
    # Check ONNX Runtime providers with validation
    try:
        import onnxruntime as ort
        available_providers = ort.get_available_providers()
        # Build provider list with priority order
        providers = []
        # Test each provider to ensure it actually works
        def test_provider(provider_name: str) -> bool:
            """Test if a provider actually works by creating a dummy session."""
            try:
                # Create a minimal ONNX model to test provider
                import numpy as np
                # Simple test: just check if provider can be instantiated
                sess_options = ort.SessionOptions()
                sess_options.log_severity_level = 4  # Suppress warnings
                return True
            except Exception:
                return False
        # CUDA provider (NVIDIA GPU) - check if CUDA runtime is available
        if "CUDAExecutionProvider" in available_providers:
            # Verify CUDA is actually usable by checking for cuBLAS
            cuda_works = False
            try:
                import ctypes
                # Try to load cuBLAS to verify CUDA installation
                try:
                    ctypes.CDLL("cublas64_12.dll")
                    cuda_works = True
                except OSError:
                    try:
                        ctypes.CDLL("cublas64_11.dll")
                        cuda_works = True
                    except OSError:
                        pass
            except Exception:
                pass
            if cuda_works:
                providers.append("CUDAExecutionProvider")
                info.gpu_available = True
                logger.debug("ONNX CUDAExecutionProvider available and working")
            else:
                logger.debug("ONNX CUDAExecutionProvider listed but CUDA runtime not found")
        # TensorRT provider (optimized NVIDIA inference)
        if "TensorrtExecutionProvider" in available_providers:
            # TensorRT requires additional libraries, skip for now
            logger.debug("ONNX TensorrtExecutionProvider available (requires TensorRT SDK)")
        # DirectML provider (Windows GPU - AMD/Intel/NVIDIA)
        if "DmlExecutionProvider" in available_providers:
            providers.append("DmlExecutionProvider")
            info.gpu_available = True
            logger.debug("ONNX DmlExecutionProvider available (DirectML)")
        # ROCm provider (AMD GPU on Linux)
        if "ROCMExecutionProvider" in available_providers:
            providers.append("ROCMExecutionProvider")
            info.gpu_available = True
            logger.debug("ONNX ROCMExecutionProvider available (AMD)")
        # CoreML provider (Apple Silicon)
        if "CoreMLExecutionProvider" in available_providers:
            providers.append("CoreMLExecutionProvider")
            info.gpu_available = True
            logger.debug("ONNX CoreMLExecutionProvider available (Apple)")
        # Always include CPU as fallback
        providers.append("CPUExecutionProvider")
        info.onnx_providers = providers
    except ImportError:
        logger.debug("ONNX Runtime not available")
        info.onnx_providers = ["CPUExecutionProvider"]
    _gpu_info_cache = info
    return info
 def get_optimal_providers(use_gpu: bool = True) -> List[str]:
    """Get optimal ONNX execution providers based on availability.
    Args:
        use_gpu: If True, include GPU providers when available.
                 If False, force CPU-only execution.
    Returns:
        List of provider names in priority order.
    """
    if not use_gpu:
        return ["CPUExecutionProvider"]
    gpu_info = detect_gpu()
    return gpu_info.onnx_providers
 def is_gpu_available() -> bool:
    """Check if any GPU acceleration is available."""
    return detect_gpu().gpu_available
 def get_gpu_summary() -> str:
    """Get human-readable GPU status summary."""
    info = detect_gpu()
    if not info.gpu_available:
        return "GPU: Not available (using CPU)"
    parts = []
    if info.gpu_name:
        parts.append(f"GPU: {info.gpu_name}")
    if info.gpu_count > 1:
        parts.append(f"({info.gpu_count} devices)")
    # Show active providers (excluding CPU fallback)
    gpu_providers = [p for p in info.onnx_providers if p != "CPUExecutionProvider"]
    if gpu_providers:
        parts.append(f"Providers: {', '.join(gpu_providers)}")
    return " | ".join(parts) if parts else "GPU: Available"
 def clear_gpu_cache() -> None:
    """Clear cached GPU detection info."""
    global _gpu_info_cache
    _gpu_info_cache = None
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "claude-code-workflow",
-  "version": "6.2.2",
+  "version": "6.2.4",
  "description": "JSON-driven multi-agent development framework with intelligent CLI orchestration (Gemini/Qwen/Codex), context-first architecture, and automated workflow execution",
  "type": "module",
  "main": "ccw/src/index.js",
@@ -28,6 +28,8 @@
    "node": ">=16.0.0"
  },
  "dependencies": {
    "@modelcontextprotocol/sdk": "^1.0.4",
    "better-sqlite3": "^11.7.0",
    "boxen": "^7.1.0",
    "chalk": "^5.3.0",
    "commander": "^11.0.0",
@@ -36,7 +38,8 @@
    "gradient-string": "^2.0.2",
    "inquirer": "^9.2.0",
    "open": "^9.1.0",
-    "ora": "^7.0.0"
+    "ora": "^7.0.0",
    "zod": "^4.1.13"
  },
  "files": [
    "ccw/bin/",