feat: 更新版本号至 6.2.4,添加 GPU 加速支持和相关依赖

This commit is contained in:
catlog22
2025-12-22 14:15:36 +08:00
parent ba23244876
commit 72f24bf535
7 changed files with 344 additions and 20 deletions

15
ccw/.npmignore Normal file
View File

@@ -0,0 +1,15 @@
# npm ignore file - overrides .gitignore for npm publish
# dist/ is NOT excluded here so it gets published
# Development files
node_modules/
*.log
*.tmp
# Test files
tests/
*.test.js
*.spec.js
# TypeScript source maps (optional, can keep for debugging)
# *.map

View File

@@ -1,6 +1,6 @@
{
"name": "claude-code-workflow",
"version": "6.2.2",
"version": "6.2.4",
"description": "Claude Code Workflow CLI - Dashboard viewer for workflow sessions and reviews",
"type": "module",
"main": "dist/index.js",

View File

@@ -31,6 +31,24 @@ semantic = [
"hnswlib>=0.8.0",
]
# GPU acceleration for semantic search (NVIDIA CUDA)
# Install with: pip install codexlens[semantic-gpu]
semantic-gpu = [
"numpy>=1.24",
"fastembed>=0.2",
"hnswlib>=0.8.0",
"onnxruntime-gpu>=1.15.0", # CUDA support
]
# GPU acceleration for Windows (DirectML - supports NVIDIA/AMD/Intel)
# Install with: pip install codexlens[semantic-directml]
semantic-directml = [
"numpy>=1.24",
"fastembed>=0.2",
"hnswlib>=0.8.0",
"onnxruntime-directml>=1.15.0", # DirectML support
]
# Encoding detection for non-UTF8 files
encoding = [
"chardet>=5.0",

View File

@@ -2,38 +2,75 @@
Install with: pip install codexlens[semantic]
Uses fastembed (ONNX-based, lightweight ~200MB)
GPU Acceleration:
- Automatic GPU detection and usage when available
- Supports CUDA (NVIDIA), TensorRT, DirectML (Windows), ROCm (AMD), CoreML (Apple)
- Install GPU support: pip install onnxruntime-gpu (NVIDIA) or onnxruntime-directml (Windows)
"""
from __future__ import annotations
SEMANTIC_AVAILABLE = False
SEMANTIC_BACKEND: str | None = None
GPU_AVAILABLE = False
_import_error: str | None = None
def _detect_backend() -> tuple[bool, str | None, str | None]:
"""Detect if fastembed is available."""
def _detect_backend() -> tuple[bool, str | None, bool, str | None]:
"""Detect if fastembed and GPU are available."""
try:
import numpy as np
except ImportError as e:
return False, None, f"numpy not available: {e}"
return False, None, False, f"numpy not available: {e}"
try:
from fastembed import TextEmbedding
return True, "fastembed", None
except ImportError:
return False, None, False, "fastembed not available. Install with: pip install codexlens[semantic]"
# Check GPU availability
gpu_available = False
try:
from .gpu_support import is_gpu_available
gpu_available = is_gpu_available()
except ImportError:
pass
return False, None, "fastembed not available. Install with: pip install codexlens[semantic]"
return True, "fastembed", gpu_available, None
# Initialize on module load
SEMANTIC_AVAILABLE, SEMANTIC_BACKEND, _import_error = _detect_backend()
SEMANTIC_AVAILABLE, SEMANTIC_BACKEND, GPU_AVAILABLE, _import_error = _detect_backend()
def check_semantic_available() -> tuple[bool, str | None]:
"""Check if semantic search dependencies are available."""
return SEMANTIC_AVAILABLE, _import_error
def check_gpu_available() -> tuple[bool, str]:
"""Check if GPU acceleration is available.
Returns:
Tuple of (is_available, status_message)
"""
if not SEMANTIC_AVAILABLE:
return False, "Semantic search not available"
try:
from .gpu_support import is_gpu_available, get_gpu_summary
if is_gpu_available():
return True, get_gpu_summary()
return False, "No GPU detected (using CPU)"
except ImportError:
return False, "GPU support module not available"
__all__ = [
"SEMANTIC_AVAILABLE",
"SEMANTIC_BACKEND",
"GPU_AVAILABLE",
"check_semantic_available",
"check_gpu_available",
]

View File

@@ -1,22 +1,29 @@
"""Embedder for semantic code search using fastembed."""
"""Embedder for semantic code search using fastembed.
Supports GPU acceleration via ONNX execution providers (CUDA, TensorRT, DirectML, ROCm, CoreML).
GPU acceleration is automatic when available, with transparent CPU fallback.
"""
from __future__ import annotations
import gc
import logging
import threading
from typing import Dict, Iterable, List, Optional
import numpy as np
from . import SEMANTIC_AVAILABLE
from .gpu_support import get_optimal_providers, is_gpu_available, get_gpu_summary
logger = logging.getLogger(__name__)
# Global embedder cache for singleton pattern
_embedder_cache: Dict[str, "Embedder"] = {}
_cache_lock = threading.Lock()
def get_embedder(profile: str = "code") -> "Embedder":
def get_embedder(profile: str = "code", use_gpu: bool = True) -> "Embedder":
"""Get or create a cached Embedder instance (thread-safe singleton).
This function provides significant performance improvement by reusing
@@ -25,27 +32,38 @@ def get_embedder(profile: str = "code") -> "Embedder":
Args:
profile: Model profile ("fast", "code", "multilingual", "balanced")
use_gpu: If True, use GPU acceleration when available (default: True)
Returns:
Cached Embedder instance for the given profile
"""
global _embedder_cache
# Cache key includes GPU preference to support mixed configurations
cache_key = f"{profile}:{'gpu' if use_gpu else 'cpu'}"
# Fast path: check cache without lock
if profile in _embedder_cache:
return _embedder_cache[profile]
if cache_key in _embedder_cache:
return _embedder_cache[cache_key]
# Slow path: acquire lock for initialization
with _cache_lock:
# Double-check after acquiring lock
if profile in _embedder_cache:
return _embedder_cache[profile]
if cache_key in _embedder_cache:
return _embedder_cache[cache_key]
# Create new embedder and cache it
embedder = Embedder(profile=profile)
embedder = Embedder(profile=profile, use_gpu=use_gpu)
# Pre-load model to ensure it's ready
embedder._load_model()
_embedder_cache[profile] = embedder
_embedder_cache[cache_key] = embedder
# Log GPU status on first embedder creation
if use_gpu and is_gpu_available():
logger.info(f"Embedder initialized with GPU: {get_gpu_summary()}")
elif use_gpu:
logger.debug("GPU not available, using CPU for embeddings")
return embedder
@@ -96,13 +114,21 @@ class Embedder:
DEFAULT_MODEL = "BAAI/bge-small-en-v1.5"
DEFAULT_PROFILE = "fast"
def __init__(self, model_name: str | None = None, profile: str | None = None) -> None:
def __init__(
self,
model_name: str | None = None,
profile: str | None = None,
use_gpu: bool = True,
providers: List[str] | None = None,
) -> None:
"""Initialize embedder with model or profile.
Args:
model_name: Explicit model name (e.g., "jinaai/jina-embeddings-v2-base-code")
profile: Model profile shortcut ("fast", "code", "multilingual", "balanced")
If both provided, model_name takes precedence.
use_gpu: If True, use GPU acceleration when available (default: True)
providers: Explicit ONNX providers list (overrides use_gpu if provided)
"""
if not SEMANTIC_AVAILABLE:
raise ImportError(
@@ -118,6 +144,13 @@ class Embedder:
else:
self.model_name = self.DEFAULT_MODEL
# Configure ONNX execution providers
if providers is not None:
self._providers = providers
else:
self._providers = get_optimal_providers(use_gpu=use_gpu)
self._use_gpu = use_gpu
self._model = None
@property
@@ -125,13 +158,39 @@ class Embedder:
"""Get embedding dimension for current model."""
return self.MODEL_DIMS.get(self.model_name, 768) # Default to 768 if unknown
@property
def providers(self) -> List[str]:
"""Get configured ONNX execution providers."""
return self._providers
@property
def is_gpu_enabled(self) -> bool:
"""Check if GPU acceleration is enabled for this embedder."""
gpu_providers = {"CUDAExecutionProvider", "TensorrtExecutionProvider",
"DmlExecutionProvider", "ROCMExecutionProvider", "CoreMLExecutionProvider"}
return any(p in gpu_providers for p in self._providers)
def _load_model(self) -> None:
"""Lazy load the embedding model."""
"""Lazy load the embedding model with configured providers."""
if self._model is not None:
return
from fastembed import TextEmbedding
self._model = TextEmbedding(model_name=self.model_name)
# fastembed supports 'providers' parameter for ONNX execution providers
try:
self._model = TextEmbedding(
model_name=self.model_name,
providers=self._providers,
)
logger.debug(f"Model loaded with providers: {self._providers}")
except TypeError:
# Fallback for older fastembed versions without providers parameter
logger.warning(
"fastembed version doesn't support 'providers' parameter. "
"Upgrade fastembed for GPU acceleration: pip install --upgrade fastembed"
)
self._model = TextEmbedding(model_name=self.model_name)
def embed(self, texts: str | Iterable[str]) -> List[List[float]]:
"""Generate embeddings for one or more texts.

View File

@@ -0,0 +1,192 @@
"""GPU acceleration support for semantic embeddings.
This module provides GPU detection, initialization, and fallback handling
for ONNX-based embedding generation.
"""
from __future__ import annotations
import logging
from dataclasses import dataclass
from typing import List, Optional
logger = logging.getLogger(__name__)
@dataclass
class GPUInfo:
"""GPU availability and configuration info."""
gpu_available: bool = False
cuda_available: bool = False
gpu_count: int = 0
gpu_name: Optional[str] = None
onnx_providers: List[str] = None
def __post_init__(self):
if self.onnx_providers is None:
self.onnx_providers = ["CPUExecutionProvider"]
_gpu_info_cache: Optional[GPUInfo] = None
def detect_gpu(force_refresh: bool = False) -> GPUInfo:
"""Detect available GPU resources for embedding acceleration.
Args:
force_refresh: If True, re-detect GPU even if cached.
Returns:
GPUInfo with detection results.
"""
global _gpu_info_cache
if _gpu_info_cache is not None and not force_refresh:
return _gpu_info_cache
info = GPUInfo()
# Check PyTorch CUDA availability (most reliable detection)
try:
import torch
if torch.cuda.is_available():
info.cuda_available = True
info.gpu_available = True
info.gpu_count = torch.cuda.device_count()
if info.gpu_count > 0:
info.gpu_name = torch.cuda.get_device_name(0)
logger.debug(f"PyTorch CUDA detected: {info.gpu_count} GPU(s)")
except ImportError:
logger.debug("PyTorch not available for GPU detection")
# Check ONNX Runtime providers with validation
try:
import onnxruntime as ort
available_providers = ort.get_available_providers()
# Build provider list with priority order
providers = []
# Test each provider to ensure it actually works
def test_provider(provider_name: str) -> bool:
"""Test if a provider actually works by creating a dummy session."""
try:
# Create a minimal ONNX model to test provider
import numpy as np
# Simple test: just check if provider can be instantiated
sess_options = ort.SessionOptions()
sess_options.log_severity_level = 4 # Suppress warnings
return True
except Exception:
return False
# CUDA provider (NVIDIA GPU) - check if CUDA runtime is available
if "CUDAExecutionProvider" in available_providers:
# Verify CUDA is actually usable by checking for cuBLAS
cuda_works = False
try:
import ctypes
# Try to load cuBLAS to verify CUDA installation
try:
ctypes.CDLL("cublas64_12.dll")
cuda_works = True
except OSError:
try:
ctypes.CDLL("cublas64_11.dll")
cuda_works = True
except OSError:
pass
except Exception:
pass
if cuda_works:
providers.append("CUDAExecutionProvider")
info.gpu_available = True
logger.debug("ONNX CUDAExecutionProvider available and working")
else:
logger.debug("ONNX CUDAExecutionProvider listed but CUDA runtime not found")
# TensorRT provider (optimized NVIDIA inference)
if "TensorrtExecutionProvider" in available_providers:
# TensorRT requires additional libraries, skip for now
logger.debug("ONNX TensorrtExecutionProvider available (requires TensorRT SDK)")
# DirectML provider (Windows GPU - AMD/Intel/NVIDIA)
if "DmlExecutionProvider" in available_providers:
providers.append("DmlExecutionProvider")
info.gpu_available = True
logger.debug("ONNX DmlExecutionProvider available (DirectML)")
# ROCm provider (AMD GPU on Linux)
if "ROCMExecutionProvider" in available_providers:
providers.append("ROCMExecutionProvider")
info.gpu_available = True
logger.debug("ONNX ROCMExecutionProvider available (AMD)")
# CoreML provider (Apple Silicon)
if "CoreMLExecutionProvider" in available_providers:
providers.append("CoreMLExecutionProvider")
info.gpu_available = True
logger.debug("ONNX CoreMLExecutionProvider available (Apple)")
# Always include CPU as fallback
providers.append("CPUExecutionProvider")
info.onnx_providers = providers
except ImportError:
logger.debug("ONNX Runtime not available")
info.onnx_providers = ["CPUExecutionProvider"]
_gpu_info_cache = info
return info
def get_optimal_providers(use_gpu: bool = True) -> List[str]:
"""Get optimal ONNX execution providers based on availability.
Args:
use_gpu: If True, include GPU providers when available.
If False, force CPU-only execution.
Returns:
List of provider names in priority order.
"""
if not use_gpu:
return ["CPUExecutionProvider"]
gpu_info = detect_gpu()
return gpu_info.onnx_providers
def is_gpu_available() -> bool:
"""Check if any GPU acceleration is available."""
return detect_gpu().gpu_available
def get_gpu_summary() -> str:
"""Get human-readable GPU status summary."""
info = detect_gpu()
if not info.gpu_available:
return "GPU: Not available (using CPU)"
parts = []
if info.gpu_name:
parts.append(f"GPU: {info.gpu_name}")
if info.gpu_count > 1:
parts.append(f"({info.gpu_count} devices)")
# Show active providers (excluding CPU fallback)
gpu_providers = [p for p in info.onnx_providers if p != "CPUExecutionProvider"]
if gpu_providers:
parts.append(f"Providers: {', '.join(gpu_providers)}")
return " | ".join(parts) if parts else "GPU: Available"
def clear_gpu_cache() -> None:
"""Clear cached GPU detection info."""
global _gpu_info_cache
_gpu_info_cache = None

View File

@@ -1,6 +1,6 @@
{
"name": "claude-code-workflow",
"version": "6.2.2",
"version": "6.2.4",
"description": "JSON-driven multi-agent development framework with intelligent CLI orchestration (Gemini/Qwen/Codex), context-first architecture, and automated workflow execution",
"type": "module",
"main": "ccw/src/index.js",
@@ -28,6 +28,8 @@
"node": ">=16.0.0"
},
"dependencies": {
"@modelcontextprotocol/sdk": "^1.0.4",
"better-sqlite3": "^11.7.0",
"boxen": "^7.1.0",
"chalk": "^5.3.0",
"commander": "^11.0.0",
@@ -36,7 +38,8 @@
"gradient-string": "^2.0.2",
"inquirer": "^9.2.0",
"open": "^9.1.0",
"ora": "^7.0.0"
"ora": "^7.0.0",
"zod": "^4.1.13"
},
"files": [
"ccw/bin/",