feat: Add CodexLens Manager to dashboard and enhance GPU management

- Introduced a new CodexLens Manager item in the dashboard for easier access.
- Implemented GPU management commands in the CLI, including listing available GPUs, selecting a specific GPU, and resetting to automatic detection.
- Enhanced the embedding generation process to utilize GPU resources more effectively, including batch size optimization for better performance.
- Updated the embedder to support device ID options for GPU selection, ensuring compatibility with DirectML and CUDA.
- Added detailed logging and error handling for GPU detection and selection processes.
- Updated package version to 6.2.9 and added comprehensive documentation for Codex Agent Execution Protocol.
This commit is contained in:
catlog22
2025-12-23 18:35:30 +08:00
parent 5ff2a43b70
commit 39056292b7
17 changed files with 1834 additions and 78 deletions

View File

@@ -1955,3 +1955,178 @@ def embeddings_generate(
console.print("\n[dim]Use vector search with:[/dim]")
console.print(" [cyan]codexlens search 'your query' --mode pure-vector[/cyan]")
# ==================== GPU Management Commands ====================
@app.command(name="gpu-list")
def gpu_list(
json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
) -> None:
"""List available GPU devices for embedding acceleration.
Shows all detected GPU devices with their capabilities and selection status.
Discrete GPUs (NVIDIA, AMD) are automatically preferred over integrated GPUs.
Examples:
codexlens gpu-list # List all GPUs
codexlens gpu-list --json # JSON output for scripting
"""
from codexlens.semantic.gpu_support import get_gpu_devices, detect_gpu, get_selected_device_id
gpu_info = detect_gpu()
devices = get_gpu_devices()
selected_id = get_selected_device_id()
if json_mode:
print_json(
success=True,
result={
"devices": devices,
"selected_device_id": selected_id,
"gpu_available": gpu_info.gpu_available,
"providers": gpu_info.onnx_providers,
}
)
else:
if not devices:
console.print("[yellow]No GPU devices detected[/yellow]")
console.print(f"ONNX Providers: [dim]{', '.join(gpu_info.onnx_providers)}[/dim]")
return
console.print("[bold]Available GPU Devices[/bold]\n")
table = Table(show_header=True, header_style="bold")
table.add_column("ID", justify="center")
table.add_column("Name")
table.add_column("Vendor", justify="center")
table.add_column("Type", justify="center")
table.add_column("Status", justify="center")
for dev in devices:
type_str = "[green]Discrete[/green]" if dev["is_discrete"] else "[dim]Integrated[/dim]"
vendor_color = {
"nvidia": "green",
"amd": "red",
"intel": "blue"
}.get(dev["vendor"], "white")
vendor_str = f"[{vendor_color}]{dev['vendor'].upper()}[/{vendor_color}]"
status_parts = []
if dev["is_preferred"]:
status_parts.append("[cyan]Auto[/cyan]")
if dev["is_selected"]:
status_parts.append("[green]✓ Selected[/green]")
status_str = " ".join(status_parts) if status_parts else "[dim]—[/dim]"
table.add_row(
str(dev["device_id"]),
dev["name"],
vendor_str,
type_str,
status_str,
)
console.print(table)
console.print(f"\nONNX Providers: [dim]{', '.join(gpu_info.onnx_providers)}[/dim]")
console.print("\n[dim]Select GPU with:[/dim]")
console.print(" [cyan]codexlens gpu-select <device_id>[/cyan]")
@app.command(name="gpu-select")
def gpu_select(
device_id: int = typer.Argument(
...,
help="GPU device ID to use for embeddings. Use 'codexlens gpu-list' to see available IDs.",
),
json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
) -> None:
"""Select a specific GPU device for embedding generation.
By default, CodexLens automatically selects the most powerful GPU (discrete over integrated).
Use this command to override the selection.
Examples:
codexlens gpu-select 1 # Use GPU device 1
codexlens gpu-select 0 --json # Select GPU 0 with JSON output
"""
from codexlens.semantic.gpu_support import set_selected_device_id, get_gpu_devices
from codexlens.semantic.embedder import clear_embedder_cache
devices = get_gpu_devices()
valid_ids = [dev["device_id"] for dev in devices]
if device_id not in valid_ids:
if json_mode:
print_json(success=False, error=f"Invalid device_id {device_id}. Valid IDs: {valid_ids}")
else:
console.print(f"[red]Error:[/red] Invalid device_id {device_id}")
console.print(f"Valid IDs: {valid_ids}")
console.print("\n[dim]Use 'codexlens gpu-list' to see available devices[/dim]")
raise typer.Exit(code=1)
success = set_selected_device_id(device_id)
if success:
# Clear embedder cache to force reload with new GPU
clear_embedder_cache()
device_name = next((dev["name"] for dev in devices if dev["device_id"] == device_id), "Unknown")
if json_mode:
print_json(
success=True,
result={
"device_id": device_id,
"device_name": device_name,
"message": f"GPU selection set to device {device_id}: {device_name}",
}
)
else:
console.print(f"[green]✓[/green] GPU selection updated")
console.print(f" Device ID: {device_id}")
console.print(f" Device: [cyan]{device_name}[/cyan]")
console.print("\n[dim]New embeddings will use this GPU[/dim]")
else:
if json_mode:
print_json(success=False, error="Failed to set GPU selection")
else:
console.print("[red]Error:[/red] Failed to set GPU selection")
raise typer.Exit(code=1)
@app.command(name="gpu-reset")
def gpu_reset(
json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
) -> None:
"""Reset GPU selection to automatic detection.
Clears any manual GPU selection and returns to automatic selection
(discrete GPU preferred over integrated).
Examples:
codexlens gpu-reset # Reset to auto-detection
"""
from codexlens.semantic.gpu_support import set_selected_device_id, detect_gpu
from codexlens.semantic.embedder import clear_embedder_cache
set_selected_device_id(None)
clear_embedder_cache()
gpu_info = detect_gpu(force_refresh=True)
if json_mode:
print_json(
success=True,
result={
"message": "GPU selection reset to auto-detection",
"preferred_device_id": gpu_info.preferred_device_id,
"preferred_device_name": gpu_info.gpu_name,
}
)
else:
console.print("[green]✓[/green] GPU selection reset to auto-detection")
if gpu_info.preferred_device_id is not None:
console.print(f" Auto-selected device: {gpu_info.preferred_device_id}")
console.print(f" Device: [cyan]{gpu_info.gpu_name}[/cyan]")

View File

@@ -21,7 +21,7 @@ logger = logging.getLogger(__name__)
# Embedding batch size - larger values improve throughput on modern hardware
# Benchmark: 256 gives ~2.35x speedup over 64 with DirectML GPU acceleration
EMBEDDING_BATCH_SIZE = 256 # Optimized from 64 based on batch size benchmarks
EMBEDDING_BATCH_SIZE = 256
def _generate_chunks_from_cursor(
@@ -337,7 +337,8 @@ def generate_embeddings(
# Generate embeddings directly to numpy (no tolist() conversion)
try:
batch_contents = [chunk.content for chunk, _ in chunk_batch]
embeddings_numpy = embedder.embed_to_numpy(batch_contents)
# Pass batch_size to fastembed for optimal GPU utilization
embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE)
# Use add_chunks_batch_numpy to avoid numpy->list->numpy roundtrip
vector_store.add_chunks_batch_numpy(chunk_batch, embeddings_numpy)

View File

@@ -14,7 +14,7 @@ from typing import Dict, Iterable, List, Optional
import numpy as np
from . import SEMANTIC_AVAILABLE
from .gpu_support import get_optimal_providers, is_gpu_available, get_gpu_summary
from .gpu_support import get_optimal_providers, is_gpu_available, get_gpu_summary, get_selected_device_id
logger = logging.getLogger(__name__)
@@ -144,11 +144,12 @@ class Embedder:
else:
self.model_name = self.DEFAULT_MODEL
# Configure ONNX execution providers
# Configure ONNX execution providers with device_id options for GPU selection
# Using with_device_options=True ensures DirectML/CUDA device_id is passed correctly
if providers is not None:
self._providers = providers
else:
self._providers = get_optimal_providers(use_gpu=use_gpu)
self._providers = get_optimal_providers(use_gpu=use_gpu, with_device_options=True)
self._use_gpu = use_gpu
self._model = None
@@ -168,7 +169,12 @@ class Embedder:
"""Check if GPU acceleration is enabled for this embedder."""
gpu_providers = {"CUDAExecutionProvider", "TensorrtExecutionProvider",
"DmlExecutionProvider", "ROCMExecutionProvider", "CoreMLExecutionProvider"}
return any(p in gpu_providers for p in self._providers)
# Handle both string providers and tuple providers (name, options)
for p in self._providers:
provider_name = p[0] if isinstance(p, tuple) else p
if provider_name in gpu_providers:
return True
return False
def _load_model(self) -> None:
"""Lazy load the embedding model with configured providers."""
@@ -177,7 +183,9 @@ class Embedder:
from fastembed import TextEmbedding
# fastembed supports 'providers' parameter for ONNX execution providers
# providers already include device_id options via get_optimal_providers(with_device_options=True)
# DO NOT pass device_ids separately - fastembed ignores it when providers is specified
# See: fastembed/text/onnx_embedding.py - device_ids is only used with cuda=True
try:
self._model = TextEmbedding(
model_name=self.model_name,
@@ -215,7 +223,7 @@ class Embedder:
embeddings = list(self._model.embed(texts))
return [emb.tolist() for emb in embeddings]
def embed_to_numpy(self, texts: str | Iterable[str]) -> np.ndarray:
def embed_to_numpy(self, texts: str | Iterable[str], batch_size: Optional[int] = None) -> np.ndarray:
"""Generate embeddings for one or more texts (returns numpy arrays).
This method is more memory-efficient than embed() as it avoids converting
@@ -224,6 +232,8 @@ class Embedder:
Args:
texts: Single text or iterable of texts to embed.
batch_size: Optional batch size for fastembed processing.
Larger values improve GPU utilization but use more memory.
Returns:
numpy.ndarray of shape (n_texts, embedding_dim) containing embeddings.
@@ -235,8 +245,12 @@ class Embedder:
else:
texts = list(texts)
# Return embeddings as numpy array directly (no .tolist() conversion)
embeddings = list(self._model.embed(texts))
# Pass batch_size to fastembed for optimal GPU utilization
# Default batch_size in fastembed is 256, but larger values can improve throughput
if batch_size is not None:
embeddings = list(self._model.embed(texts, batch_size=batch_size))
else:
embeddings = list(self._model.embed(texts))
return np.array(embeddings)
def embed_single(self, text: str) -> List[float]:

View File

@@ -13,6 +13,15 @@ from typing import List, Optional
logger = logging.getLogger(__name__)
@dataclass
class GPUDevice:
"""Individual GPU device info."""
device_id: int
name: str
is_discrete: bool # True for discrete GPU (NVIDIA, AMD), False for integrated (Intel UHD)
vendor: str # "nvidia", "amd", "intel", "unknown"
@dataclass
class GPUInfo:
"""GPU availability and configuration info."""
@@ -22,15 +31,117 @@ class GPUInfo:
gpu_count: int = 0
gpu_name: Optional[str] = None
onnx_providers: List[str] = None
devices: List[GPUDevice] = None # List of detected GPU devices
preferred_device_id: Optional[int] = None # Preferred GPU for embedding
def __post_init__(self):
if self.onnx_providers is None:
self.onnx_providers = ["CPUExecutionProvider"]
if self.devices is None:
self.devices = []
_gpu_info_cache: Optional[GPUInfo] = None
def _enumerate_gpus() -> List[GPUDevice]:
"""Enumerate available GPU devices using WMI on Windows.
Returns:
List of GPUDevice with device info, ordered by device_id.
"""
devices = []
try:
import subprocess
import sys
if sys.platform == "win32":
# Use PowerShell to query GPU information via WMI
cmd = [
"powershell", "-NoProfile", "-Command",
"Get-WmiObject Win32_VideoController | Select-Object DeviceID, Name, AdapterCompatibility | ConvertTo-Json"
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
if result.returncode == 0 and result.stdout.strip():
import json
gpu_data = json.loads(result.stdout)
# Handle single GPU case (returns dict instead of list)
if isinstance(gpu_data, dict):
gpu_data = [gpu_data]
for idx, gpu in enumerate(gpu_data):
name = gpu.get("Name", "Unknown GPU")
compat = gpu.get("AdapterCompatibility", "").lower()
# Determine vendor
name_lower = name.lower()
if "nvidia" in name_lower or "nvidia" in compat:
vendor = "nvidia"
is_discrete = True
elif "amd" in name_lower or "radeon" in name_lower or "amd" in compat:
vendor = "amd"
is_discrete = True
elif "intel" in name_lower or "intel" in compat:
vendor = "intel"
# Intel UHD/Iris are integrated, Intel Arc is discrete
is_discrete = "arc" in name_lower
else:
vendor = "unknown"
is_discrete = False
devices.append(GPUDevice(
device_id=idx,
name=name,
is_discrete=is_discrete,
vendor=vendor
))
logger.debug(f"Detected GPU {idx}: {name} (vendor={vendor}, discrete={is_discrete})")
except Exception as e:
logger.debug(f"GPU enumeration failed: {e}")
return devices
def _get_preferred_device_id(devices: List[GPUDevice]) -> Optional[int]:
"""Determine the preferred GPU device_id for embedding.
Preference order:
1. NVIDIA discrete GPU (best DirectML/CUDA support)
2. AMD discrete GPU
3. Intel Arc (discrete)
4. Intel integrated (fallback)
Returns:
device_id of preferred GPU, or None to use default.
"""
if not devices:
return None
# Priority: NVIDIA > AMD > Intel Arc > Intel integrated
priority_order = [
("nvidia", True), # NVIDIA discrete
("amd", True), # AMD discrete
("intel", True), # Intel Arc (discrete)
("intel", False), # Intel integrated (fallback)
]
for target_vendor, target_discrete in priority_order:
for device in devices:
if device.vendor == target_vendor and device.is_discrete == target_discrete:
logger.info(f"Preferred GPU: {device.name} (device_id={device.device_id})")
return device.device_id
# If no match, use first device
if devices:
return devices[0].device_id
return None
def detect_gpu(force_refresh: bool = False) -> GPUInfo:
"""Detect available GPU resources for embedding acceleration.
@@ -47,6 +158,18 @@ def detect_gpu(force_refresh: bool = False) -> GPUInfo:
info = GPUInfo()
# Enumerate GPU devices first
info.devices = _enumerate_gpus()
info.gpu_count = len(info.devices)
if info.devices:
# Set preferred device (discrete GPU preferred over integrated)
info.preferred_device_id = _get_preferred_device_id(info.devices)
# Set gpu_name to preferred device name
for dev in info.devices:
if dev.device_id == info.preferred_device_id:
info.gpu_name = dev.name
break
# Check PyTorch CUDA availability (most reliable detection)
try:
import torch
@@ -143,21 +266,48 @@ def detect_gpu(force_refresh: bool = False) -> GPUInfo:
return info
def get_optimal_providers(use_gpu: bool = True) -> List[str]:
def get_optimal_providers(use_gpu: bool = True, with_device_options: bool = False) -> list:
"""Get optimal ONNX execution providers based on availability.
Args:
use_gpu: If True, include GPU providers when available.
If False, force CPU-only execution.
with_device_options: If True, return providers as tuples with device_id options
for proper GPU device selection (required for DirectML).
Returns:
List of provider names in priority order.
List of provider names or tuples (provider_name, options_dict) in priority order.
"""
if not use_gpu:
return ["CPUExecutionProvider"]
gpu_info = detect_gpu()
return gpu_info.onnx_providers
if not with_device_options:
return gpu_info.onnx_providers
# Build providers with device_id options for GPU providers
device_id = get_selected_device_id()
providers = []
for provider in gpu_info.onnx_providers:
if provider == "DmlExecutionProvider" and device_id is not None:
# DirectML requires device_id in provider_options tuple
providers.append(("DmlExecutionProvider", {"device_id": device_id}))
logger.debug(f"DmlExecutionProvider configured with device_id={device_id}")
elif provider == "CUDAExecutionProvider" and device_id is not None:
# CUDA also supports device_id in provider_options
providers.append(("CUDAExecutionProvider", {"device_id": device_id}))
logger.debug(f"CUDAExecutionProvider configured with device_id={device_id}")
elif provider == "ROCMExecutionProvider" and device_id is not None:
# ROCm supports device_id
providers.append(("ROCMExecutionProvider", {"device_id": device_id}))
logger.debug(f"ROCMExecutionProvider configured with device_id={device_id}")
else:
# CPU and other providers don't need device_id
providers.append(provider)
return providers
def is_gpu_available() -> bool:
@@ -190,3 +340,75 @@ def clear_gpu_cache() -> None:
"""Clear cached GPU detection info."""
global _gpu_info_cache
_gpu_info_cache = None
# User-selected device ID (overrides auto-detection)
_selected_device_id: Optional[int] = None
def get_gpu_devices() -> List[dict]:
"""Get list of available GPU devices for frontend selection.
Returns:
List of dicts with device info for each GPU.
"""
info = detect_gpu()
devices = []
for dev in info.devices:
devices.append({
"device_id": dev.device_id,
"name": dev.name,
"vendor": dev.vendor,
"is_discrete": dev.is_discrete,
"is_preferred": dev.device_id == info.preferred_device_id,
"is_selected": dev.device_id == get_selected_device_id(),
})
return devices
def get_selected_device_id() -> Optional[int]:
"""Get the user-selected GPU device_id.
Returns:
User-selected device_id, or auto-detected preferred device_id if not set.
"""
global _selected_device_id
if _selected_device_id is not None:
return _selected_device_id
# Fall back to auto-detected preferred device
info = detect_gpu()
return info.preferred_device_id
def set_selected_device_id(device_id: Optional[int]) -> bool:
"""Set the GPU device_id to use for embeddings.
Args:
device_id: GPU device_id to use, or None to use auto-detection.
Returns:
True if device_id is valid, False otherwise.
"""
global _selected_device_id
if device_id is None:
_selected_device_id = None
logger.info("GPU selection reset to auto-detection")
return True
# Validate device_id exists
info = detect_gpu()
valid_ids = [dev.device_id for dev in info.devices]
if device_id in valid_ids:
_selected_device_id = device_id
device_name = next((dev.name for dev in info.devices if dev.device_id == device_id), "Unknown")
logger.info(f"GPU selection set to device {device_id}: {device_name}")
return True
else:
logger.warning(f"Invalid device_id {device_id}. Valid IDs: {valid_ids}")
return False