feat: Add CodexLens Manager to dashboard and enhance GPU management

- Introduced a new CodexLens Manager item in the dashboard for easier access. - Implemented GPU management commands in the CLI, including listing available GPUs, selecting a specific GPU, and resetting to automatic detection. - Enhanced the embedding generation process to utilize GPU resources more effectively, including batch size optimization for better performance. - Updated the embedder to support device ID options for GPU selection, ensuring compatibility with DirectML and CUDA. - Added detailed logging and error handling for GPU detection and selection processes. - Updated package version to 6.2.9 and added comprehensive documentation for Codex Agent Execution Protocol.
2026-02-09 02:24:11 +08:00 · 2025-12-23 18:35:30 +08:00
parent 5ff2a43b70
commit 39056292b7
17 changed files with 1834 additions and 78 deletions
--- a/codex-lens/src/codexlens/cli/commands.py
+++ b/codex-lens/src/codexlens/cli/commands.py
@@ -1955,3 +1955,178 @@ def embeddings_generate(

        console.print("\n[dim]Use vector search with:[/dim]")
        console.print("  [cyan]codexlens search 'your query' --mode pure-vector[/cyan]")
+
+
+# ==================== GPU Management Commands ====================
+
+@app.command(name="gpu-list")
+def gpu_list(
+    json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
+) -> None:
+    """List available GPU devices for embedding acceleration.
+
+    Shows all detected GPU devices with their capabilities and selection status.
+    Discrete GPUs (NVIDIA, AMD) are automatically preferred over integrated GPUs.
+
+    Examples:
+        codexlens gpu-list                    # List all GPUs
+        codexlens gpu-list --json             # JSON output for scripting
+    """
+    from codexlens.semantic.gpu_support import get_gpu_devices, detect_gpu, get_selected_device_id
+
+    gpu_info = detect_gpu()
+    devices = get_gpu_devices()
+    selected_id = get_selected_device_id()
+
+    if json_mode:
+        print_json(
+            success=True,
+            result={
+                "devices": devices,
+                "selected_device_id": selected_id,
+                "gpu_available": gpu_info.gpu_available,
+                "providers": gpu_info.onnx_providers,
+            }
+        )
+    else:
+        if not devices:
+            console.print("[yellow]No GPU devices detected[/yellow]")
+            console.print(f"ONNX Providers: [dim]{', '.join(gpu_info.onnx_providers)}[/dim]")
+            return
+
+        console.print("[bold]Available GPU Devices[/bold]\n")
+
+        table = Table(show_header=True, header_style="bold")
+        table.add_column("ID", justify="center")
+        table.add_column("Name")
+        table.add_column("Vendor", justify="center")
+        table.add_column("Type", justify="center")
+        table.add_column("Status", justify="center")
+
+        for dev in devices:
+            type_str = "[green]Discrete[/green]" if dev["is_discrete"] else "[dim]Integrated[/dim]"
+            vendor_color = {
+                "nvidia": "green",
+                "amd": "red",
+                "intel": "blue"
+            }.get(dev["vendor"], "white")
+            vendor_str = f"[{vendor_color}]{dev['vendor'].upper()}[/{vendor_color}]"
+
+            status_parts = []
+            if dev["is_preferred"]:
+                status_parts.append("[cyan]Auto[/cyan]")
+            if dev["is_selected"]:
+                status_parts.append("[green]✓ Selected[/green]")
+
+            status_str = " ".join(status_parts) if status_parts else "[dim]—[/dim]"
+
+            table.add_row(
+                str(dev["device_id"]),
+                dev["name"],
+                vendor_str,
+                type_str,
+                status_str,
+            )
+
+        console.print(table)
+        console.print(f"\nONNX Providers: [dim]{', '.join(gpu_info.onnx_providers)}[/dim]")
+        console.print("\n[dim]Select GPU with:[/dim]")
+        console.print("  [cyan]codexlens gpu-select <device_id>[/cyan]")
+
+
+@app.command(name="gpu-select")
+def gpu_select(
+    device_id: int = typer.Argument(
+        ...,
+        help="GPU device ID to use for embeddings. Use 'codexlens gpu-list' to see available IDs.",
+    ),
+    json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
+) -> None:
+    """Select a specific GPU device for embedding generation.
+
+    By default, CodexLens automatically selects the most powerful GPU (discrete over integrated).
+    Use this command to override the selection.
+
+    Examples:
+        codexlens gpu-select 1                # Use GPU device 1
+        codexlens gpu-select 0 --json         # Select GPU 0 with JSON output
+    """
+    from codexlens.semantic.gpu_support import set_selected_device_id, get_gpu_devices
+    from codexlens.semantic.embedder import clear_embedder_cache
+
+    devices = get_gpu_devices()
+    valid_ids = [dev["device_id"] for dev in devices]
+
+    if device_id not in valid_ids:
+        if json_mode:
+            print_json(success=False, error=f"Invalid device_id {device_id}. Valid IDs: {valid_ids}")
+        else:
+            console.print(f"[red]Error:[/red] Invalid device_id {device_id}")
+            console.print(f"Valid IDs: {valid_ids}")
+            console.print("\n[dim]Use 'codexlens gpu-list' to see available devices[/dim]")
+        raise typer.Exit(code=1)
+
+    success = set_selected_device_id(device_id)
+
+    if success:
+        # Clear embedder cache to force reload with new GPU
+        clear_embedder_cache()
+
+        device_name = next((dev["name"] for dev in devices if dev["device_id"] == device_id), "Unknown")
+
+        if json_mode:
+            print_json(
+                success=True,
+                result={
+                    "device_id": device_id,
+                    "device_name": device_name,
+                    "message": f"GPU selection set to device {device_id}: {device_name}",
+                }
+            )
+        else:
+            console.print(f"[green]✓[/green] GPU selection updated")
+            console.print(f"  Device ID: {device_id}")
+            console.print(f"  Device: [cyan]{device_name}[/cyan]")
+            console.print("\n[dim]New embeddings will use this GPU[/dim]")
+    else:
+        if json_mode:
+            print_json(success=False, error="Failed to set GPU selection")
+        else:
+            console.print("[red]Error:[/red] Failed to set GPU selection")
+        raise typer.Exit(code=1)
+
+
+@app.command(name="gpu-reset")
+def gpu_reset(
+    json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
+) -> None:
+    """Reset GPU selection to automatic detection.
+
+    Clears any manual GPU selection and returns to automatic selection
+    (discrete GPU preferred over integrated).
+
+    Examples:
+        codexlens gpu-reset                   # Reset to auto-detection
+    """
+    from codexlens.semantic.gpu_support import set_selected_device_id, detect_gpu
+    from codexlens.semantic.embedder import clear_embedder_cache
+
+    set_selected_device_id(None)
+    clear_embedder_cache()
+
+    gpu_info = detect_gpu(force_refresh=True)
+
+    if json_mode:
+        print_json(
+            success=True,
+            result={
+                "message": "GPU selection reset to auto-detection",
+                "preferred_device_id": gpu_info.preferred_device_id,
+                "preferred_device_name": gpu_info.gpu_name,
+            }
+        )
+    else:
+        console.print("[green]✓[/green] GPU selection reset to auto-detection")
+        if gpu_info.preferred_device_id is not None:
+            console.print(f"  Auto-selected device: {gpu_info.preferred_device_id}")
+            console.print(f"  Device: [cyan]{gpu_info.gpu_name}[/cyan]")
--- a/codex-lens/src/codexlens/cli/embedding_manager.py
+++ b/codex-lens/src/codexlens/cli/embedding_manager.py
@@ -21,7 +21,7 @@ logger = logging.getLogger(__name__)

 # Embedding batch size - larger values improve throughput on modern hardware
 # Benchmark: 256 gives ~2.35x speedup over 64 with DirectML GPU acceleration
-EMBEDDING_BATCH_SIZE = 256  # Optimized from 64 based on batch size benchmarks
+EMBEDDING_BATCH_SIZE = 256


 def _generate_chunks_from_cursor(
@@ -337,7 +337,8 @@ def generate_embeddings(
                        # Generate embeddings directly to numpy (no tolist() conversion)
                        try:
                            batch_contents = [chunk.content for chunk, _ in chunk_batch]
-                            embeddings_numpy = embedder.embed_to_numpy(batch_contents)
+                            # Pass batch_size to fastembed for optimal GPU utilization
+                            embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE)

                            # Use add_chunks_batch_numpy to avoid numpy->list->numpy roundtrip
                            vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)
--- a/codex-lens/src/codexlens/semantic/embedder.py
+++ b/codex-lens/src/codexlens/semantic/embedder.py
@@ -14,7 +14,7 @@ from typing import Dict, Iterable, List, Optional
 import numpy as np

 from . import SEMANTIC_AVAILABLE
-from .gpu_support import get_optimal_providers, is_gpu_available, get_gpu_summary
+from .gpu_support import get_optimal_providers, is_gpu_available, get_gpu_summary, get_selected_device_id

 logger = logging.getLogger(__name__)

@@ -144,11 +144,12 @@ class Embedder:
        else:
            self.model_name = self.DEFAULT_MODEL

-        # Configure ONNX execution providers
+        # Configure ONNX execution providers with device_id options for GPU selection
+        # Using with_device_options=True ensures DirectML/CUDA device_id is passed correctly
        if providers is not None:
            self._providers = providers
        else:
-            self._providers = get_optimal_providers(use_gpu=use_gpu)
+            self._providers = get_optimal_providers(use_gpu=use_gpu, with_device_options=True)

        self._use_gpu = use_gpu
        self._model = None
@@ -168,7 +169,12 @@ class Embedder:
        """Check if GPU acceleration is enabled for this embedder."""
        gpu_providers = {"CUDAExecutionProvider", "TensorrtExecutionProvider",
                        "DmlExecutionProvider", "ROCMExecutionProvider", "CoreMLExecutionProvider"}
-        return any(p in gpu_providers for p in self._providers)
+        # Handle both string providers and tuple providers (name, options)
+        for p in self._providers:
+            provider_name = p[0] if isinstance(p, tuple) else p
+            if provider_name in gpu_providers:
+                return True
+        return False

    def _load_model(self) -> None:
        """Lazy load the embedding model with configured providers."""
@@ -177,7 +183,9 @@ class Embedder:

        from fastembed import TextEmbedding

-        # fastembed supports 'providers' parameter for ONNX execution providers
+        # providers already include device_id options via get_optimal_providers(with_device_options=True)
+        # DO NOT pass device_ids separately - fastembed ignores it when providers is specified
+        # See: fastembed/text/onnx_embedding.py - device_ids is only used with cuda=True
        try:
            self._model = TextEmbedding(
                model_name=self.model_name,
@@ -215,7 +223,7 @@ class Embedder:
        embeddings = list(self._model.embed(texts))
        return [emb.tolist() for emb in embeddings]

-    def embed_to_numpy(self, texts: str | Iterable[str]) -> np.ndarray:
+    def embed_to_numpy(self, texts: str | Iterable[str], batch_size: Optional[int] = None) -> np.ndarray:
        """Generate embeddings for one or more texts (returns numpy arrays).

        This method is more memory-efficient than embed() as it avoids converting
@@ -224,6 +232,8 @@ class Embedder:

        Args:
            texts: Single text or iterable of texts to embed.
+            batch_size: Optional batch size for fastembed processing.
+                       Larger values improve GPU utilization but use more memory.

        Returns:
            numpy.ndarray of shape (n_texts, embedding_dim) containing embeddings.
@@ -235,8 +245,12 @@ class Embedder:
        else:
            texts = list(texts)

-        # Return embeddings as numpy array directly (no .tolist() conversion)
-        embeddings = list(self._model.embed(texts))
+        # Pass batch_size to fastembed for optimal GPU utilization
+        # Default batch_size in fastembed is 256, but larger values can improve throughput
+        if batch_size is not None:
+            embeddings = list(self._model.embed(texts, batch_size=batch_size))
+        else:
+            embeddings = list(self._model.embed(texts))
        return np.array(embeddings)

    def embed_single(self, text: str) -> List[float]:
--- a/codex-lens/src/codexlens/semantic/gpu_support.py
+++ b/codex-lens/src/codexlens/semantic/gpu_support.py
@@ -13,6 +13,15 @@ from typing import List, Optional
 logger = logging.getLogger(__name__)


+@dataclass
+class GPUDevice:
+    """Individual GPU device info."""
+    device_id: int
+    name: str
+    is_discrete: bool  # True for discrete GPU (NVIDIA, AMD), False for integrated (Intel UHD)
+    vendor: str  # "nvidia", "amd", "intel", "unknown"
+
+
@dataclass
 class GPUInfo:
    """GPU availability and configuration info."""
@@ -22,15 +31,117 @@ class GPUInfo:
    gpu_count: int = 0
    gpu_name: Optional[str] = None
    onnx_providers: List[str] = None
+    devices: List[GPUDevice] = None  # List of detected GPU devices
+    preferred_device_id: Optional[int] = None  # Preferred GPU for embedding

    def __post_init__(self):
        if self.onnx_providers is None:
            self.onnx_providers = ["CPUExecutionProvider"]
+        if self.devices is None:
+            self.devices = []


 _gpu_info_cache: Optional[GPUInfo] = None


+def _enumerate_gpus() -> List[GPUDevice]:
+    """Enumerate available GPU devices using WMI on Windows.
+    
+    Returns:
+        List of GPUDevice with device info, ordered by device_id.
+    """
+    devices = []
+    
+    try:
+        import subprocess
+        import sys
+        
+        if sys.platform == "win32":
+            # Use PowerShell to query GPU information via WMI
+            cmd = [
+                "powershell", "-NoProfile", "-Command",
+                "Get-WmiObject Win32_VideoController | Select-Object DeviceID, Name, AdapterCompatibility | ConvertTo-Json"
+            ]
+            result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
+            
+            if result.returncode == 0 and result.stdout.strip():
+                import json
+                gpu_data = json.loads(result.stdout)
+                
+                # Handle single GPU case (returns dict instead of list)
+                if isinstance(gpu_data, dict):
+                    gpu_data = [gpu_data]
+                
+                for idx, gpu in enumerate(gpu_data):
+                    name = gpu.get("Name", "Unknown GPU")
+                    compat = gpu.get("AdapterCompatibility", "").lower()
+                    
+                    # Determine vendor
+                    name_lower = name.lower()
+                    if "nvidia" in name_lower or "nvidia" in compat:
+                        vendor = "nvidia"
+                        is_discrete = True
+                    elif "amd" in name_lower or "radeon" in name_lower or "amd" in compat:
+                        vendor = "amd"
+                        is_discrete = True
+                    elif "intel" in name_lower or "intel" in compat:
+                        vendor = "intel"
+                        # Intel UHD/Iris are integrated, Intel Arc is discrete
+                        is_discrete = "arc" in name_lower
+                    else:
+                        vendor = "unknown"
+                        is_discrete = False
+                    
+                    devices.append(GPUDevice(
+                        device_id=idx,
+                        name=name,
+                        is_discrete=is_discrete,
+                        vendor=vendor
+                    ))
+                    logger.debug(f"Detected GPU {idx}: {name} (vendor={vendor}, discrete={is_discrete})")
+                    
+    except Exception as e:
+        logger.debug(f"GPU enumeration failed: {e}")
+    
+    return devices
+
+
+def _get_preferred_device_id(devices: List[GPUDevice]) -> Optional[int]:
+    """Determine the preferred GPU device_id for embedding.
+    
+    Preference order:
+    1. NVIDIA discrete GPU (best DirectML/CUDA support)
+    2. AMD discrete GPU
+    3. Intel Arc (discrete)
+    4. Intel integrated (fallback)
+    
+    Returns:
+        device_id of preferred GPU, or None to use default.
+    """
+    if not devices:
+        return None
+    
+    # Priority: NVIDIA > AMD > Intel Arc > Intel integrated
+    priority_order = [
+        ("nvidia", True),   # NVIDIA discrete
+        ("amd", True),      # AMD discrete
+        ("intel", True),    # Intel Arc (discrete)
+        ("intel", False),   # Intel integrated (fallback)
+    ]
+    
+    for target_vendor, target_discrete in priority_order:
+        for device in devices:
+            if device.vendor == target_vendor and device.is_discrete == target_discrete:
+                logger.info(f"Preferred GPU: {device.name} (device_id={device.device_id})")
+                return device.device_id
+    
+    # If no match, use first device
+    if devices:
+        return devices[0].device_id
+    
+    return None
+
+
 def detect_gpu(force_refresh: bool = False) -> GPUInfo:
    """Detect available GPU resources for embedding acceleration.

@@ -47,6 +158,18 @@ def detect_gpu(force_refresh: bool = False) -> GPUInfo:

    info = GPUInfo()

+    # Enumerate GPU devices first
+    info.devices = _enumerate_gpus()
+    info.gpu_count = len(info.devices)
+    if info.devices:
+        # Set preferred device (discrete GPU preferred over integrated)
+        info.preferred_device_id = _get_preferred_device_id(info.devices)
+        # Set gpu_name to preferred device name
+        for dev in info.devices:
+            if dev.device_id == info.preferred_device_id:
+                info.gpu_name = dev.name
+                break
+
    # Check PyTorch CUDA availability (most reliable detection)
    try:
        import torch
@@ -143,21 +266,48 @@ def detect_gpu(force_refresh: bool = False) -> GPUInfo:
    return info


-def get_optimal_providers(use_gpu: bool = True) -> List[str]:
+def get_optimal_providers(use_gpu: bool = True, with_device_options: bool = False) -> list:
    """Get optimal ONNX execution providers based on availability.

    Args:
        use_gpu: If True, include GPU providers when available.
                 If False, force CPU-only execution.
+        with_device_options: If True, return providers as tuples with device_id options
+                            for proper GPU device selection (required for DirectML).

    Returns:
-        List of provider names in priority order.
+        List of provider names or tuples (provider_name, options_dict) in priority order.
    """
    if not use_gpu:
        return ["CPUExecutionProvider"]

    gpu_info = detect_gpu()
-    return gpu_info.onnx_providers
+    
+    if not with_device_options:
+        return gpu_info.onnx_providers
+    
+    # Build providers with device_id options for GPU providers
+    device_id = get_selected_device_id()
+    providers = []
+    
+    for provider in gpu_info.onnx_providers:
+        if provider == "DmlExecutionProvider" and device_id is not None:
+            # DirectML requires device_id in provider_options tuple
+            providers.append(("DmlExecutionProvider", {"device_id": device_id}))
+            logger.debug(f"DmlExecutionProvider configured with device_id={device_id}")
+        elif provider == "CUDAExecutionProvider" and device_id is not None:
+            # CUDA also supports device_id in provider_options
+            providers.append(("CUDAExecutionProvider", {"device_id": device_id}))
+            logger.debug(f"CUDAExecutionProvider configured with device_id={device_id}")
+        elif provider == "ROCMExecutionProvider" and device_id is not None:
+            # ROCm supports device_id
+            providers.append(("ROCMExecutionProvider", {"device_id": device_id}))
+            logger.debug(f"ROCMExecutionProvider configured with device_id={device_id}")
+        else:
+            # CPU and other providers don't need device_id
+            providers.append(provider)
+    
+    return providers


 def is_gpu_available() -> bool:
@@ -190,3 +340,75 @@ def clear_gpu_cache() -> None:
    """Clear cached GPU detection info."""
    global _gpu_info_cache
    _gpu_info_cache = None
+
+
+# User-selected device ID (overrides auto-detection)
+_selected_device_id: Optional[int] = None
+
+
+def get_gpu_devices() -> List[dict]:
+    """Get list of available GPU devices for frontend selection.
+    
+    Returns:
+        List of dicts with device info for each GPU.
+    """
+    info = detect_gpu()
+    devices = []
+    
+    for dev in info.devices:
+        devices.append({
+            "device_id": dev.device_id,
+            "name": dev.name,
+            "vendor": dev.vendor,
+            "is_discrete": dev.is_discrete,
+            "is_preferred": dev.device_id == info.preferred_device_id,
+            "is_selected": dev.device_id == get_selected_device_id(),
+        })
+    
+    return devices
+
+
+def get_selected_device_id() -> Optional[int]:
+    """Get the user-selected GPU device_id.
+    
+    Returns:
+        User-selected device_id, or auto-detected preferred device_id if not set.
+    """
+    global _selected_device_id
+    
+    if _selected_device_id is not None:
+        return _selected_device_id
+    
+    # Fall back to auto-detected preferred device
+    info = detect_gpu()
+    return info.preferred_device_id
+
+
+def set_selected_device_id(device_id: Optional[int]) -> bool:
+    """Set the GPU device_id to use for embeddings.
+    
+    Args:
+        device_id: GPU device_id to use, or None to use auto-detection.
+        
+    Returns:
+        True if device_id is valid, False otherwise.
+    """
+    global _selected_device_id
+    
+    if device_id is None:
+        _selected_device_id = None
+        logger.info("GPU selection reset to auto-detection")
+        return True
+    
+    # Validate device_id exists
+    info = detect_gpu()
+    valid_ids = [dev.device_id for dev in info.devices]
+    
+    if device_id in valid_ids:
+        _selected_device_id = device_id
+        device_name = next((dev.name for dev in info.devices if dev.device_id == device_id), "Unknown")
+        logger.info(f"GPU selection set to device {device_id}: {device_name}")
+        return True
+    else:
+        logger.warning(f"Invalid device_id {device_id}. Valid IDs: {valid_ids}")
+        return False