feat: 添加动态批量大小计算，优化嵌入管理和配置系统

2026-02-13 02:41:50 +08:00 · 2026-01-12 17:34:37 +08:00
parent b360e0edc7
commit 90a1321aac
6 changed files with 425 additions and 72 deletions
--- a/ccw/src/core/routes/codexlens/config-handlers.ts
+++ b/ccw/src/core/routes/codexlens/config-handlers.ts
@@ -93,6 +93,96 @@ export async function handleCodexLensConfigRoutes(ctx: RouteContext): Promise<bo
    return true;
  }
  // API: CodexLens Workspace Status - Get FTS and Vector index status for current workspace
  if (pathname === '/api/codexlens/workspace-status') {
    try {
      const venvStatus = await checkVenvStatus();
      // Default response when not installed
      if (!venvStatus.ready) {
        res.writeHead(200, { 'Content-Type': 'application/json' });
        res.end(JSON.stringify({
          success: true,
          hasIndex: false,
          fts: { percent: 0, indexedFiles: 0, totalFiles: 0 },
          vector: { percent: 0, filesWithEmbeddings: 0, totalFiles: 0, totalChunks: 0 }
        }));
        return true;
      }
      // Get project info for current workspace
      const projectResult = await executeCodexLens(['projects', 'get', initialPath, '--json']);
      if (!projectResult.success) {
        // No index for this workspace
        res.writeHead(200, { 'Content-Type': 'application/json' });
        res.end(JSON.stringify({
          success: true,
          hasIndex: false,
          fts: { percent: 0, indexedFiles: 0, totalFiles: 0 },
          vector: { percent: 0, filesWithEmbeddings: 0, totalFiles: 0, totalChunks: 0 }
        }));
        return true;
      }
      // Parse project data
      let projectData: any = null;
      try {
        const parsed = extractJSON(projectResult.output ?? '');
        if (parsed.success && parsed.result) {
          projectData = parsed.result;
        }
      } catch (e: unknown) {
        console.error('[CodexLens] Failed to parse project data:', e instanceof Error ? e.message : String(e));
      }
      if (!projectData) {
        res.writeHead(200, { 'Content-Type': 'application/json' });
        res.end(JSON.stringify({
          success: true,
          hasIndex: false,
          fts: { percent: 0, indexedFiles: 0, totalFiles: 0 },
          vector: { percent: 0, filesWithEmbeddings: 0, totalFiles: 0, totalChunks: 0 }
        }));
        return true;
      }
      // Calculate FTS and Vector percentages
      const totalFiles = projectData.total_files || 0;
      const indexedFiles = projectData.indexed_files || projectData.total_files || 0;
      const filesWithEmbeddings = projectData.files_with_embeddings || projectData.embedded_files || 0;
      const totalChunks = projectData.total_chunks || projectData.embedded_chunks || 0;
      // FTS percentage (all indexed files have FTS)
      const ftsPercent = totalFiles > 0 ? Math.round((indexedFiles / totalFiles) * 100) : 0;
      // Vector percentage (files with embeddings)
      const vectorPercent = totalFiles > 0 ? Math.round((filesWithEmbeddings / totalFiles) * 1000) / 10 : 0;
      res.writeHead(200, { 'Content-Type': 'application/json' });
      res.end(JSON.stringify({
        success: true,
        hasIndex: true,
        path: initialPath,
        fts: {
          percent: ftsPercent,
          indexedFiles,
          totalFiles
        },
        vector: {
          percent: vectorPercent,
          filesWithEmbeddings,
          totalFiles,
          totalChunks
        }
      }));
    } catch (err: unknown) {
      res.writeHead(500, { 'Content-Type': 'application/json' });
      res.end(JSON.stringify({ success: false, error: err instanceof Error ? err.message : String(err) }));
    }
    return true;
  }
  // API: CodexLens Bootstrap (Install)
  if (pathname === '/api/codexlens/bootstrap' && req.method === 'POST') {
    handlePostRequest(req, res, async () => {
@@ -164,9 +254,10 @@ export async function handleCodexLensConfigRoutes(ctx: RouteContext): Promise<bo
        return true;
      }
-      const [configResult, statusResult] = await Promise.all([
+      // Use projects list for accurate index_count (same source as /api/codexlens/indexes)
      const [configResult, projectsResult] = await Promise.all([
        executeCodexLens(['config', '--json']),
-        executeCodexLens(['status', '--json'])
+        executeCodexLens(['projects', 'list', '--json'])
      ]);
      // Parse config (extract JSON from output that may contain log messages)
@@ -190,16 +281,27 @@ export async function handleCodexLensConfigRoutes(ctx: RouteContext): Promise<bo
        }
      }
-      // Parse status to get index_count (projects_count)
+      // Parse projects list to get index_count (consistent with /api/codexlens/indexes)
-      if (statusResult.success) {
+      if (projectsResult.success) {
        try {
-          const status = extractJSON(statusResult.output ?? '');
+          const projectsData = extractJSON(projectsResult.output ?? '');
-          if (status.success && status.result) {
+          if (projectsData.success && Array.isArray(projectsData.result)) {
-            responseData.index_count = status.result.projects_count || 0;
+            // Filter out test/temp projects (same logic as /api/codexlens/indexes)
            const validProjects = projectsData.result.filter((project: any) => {
              if (project.source_root && (
                project.source_root.includes('\\Temp\\') ||
                project.source_root.includes('/tmp/') ||
                project.total_files === 0
              )) {
                return false;
              }
              return true;
            });
            responseData.index_count = validProjects.length;
          }
        } catch (e: unknown) {
-          console.error('[CodexLens] Failed to parse status:', e instanceof Error ? e.message : String(e));
+          console.error('[CodexLens] Failed to parse projects list:', e instanceof Error ? e.message : String(e));
-          console.error('[CodexLens] Status output:', (statusResult.output ?? '').substring(0, 200));
+          console.error('[CodexLens] Projects output:', (projectsResult.output ?? '').substring(0, 200));
        }
      }
--- a/ccw/src/core/routes/codexlens/index-handlers.ts
+++ b/ccw/src/core/routes/codexlens/index-handlers.ts
@@ -5,6 +5,7 @@
 import {
  cancelIndexing,
  checkVenvStatus,
  checkSemanticStatus,
  ensureLiteLLMEmbedderReady,
  executeCodexLens,
  isIndexingInProgress,
@@ -230,11 +231,29 @@ export async function handleCodexLensIndexRoutes(ctx: RouteContext): Promise<boo
      const resolvedEmbeddingBackend = typeof embeddingBackend === 'string' && embeddingBackend.trim().length > 0 ? embeddingBackend : 'fastembed';
      const resolvedMaxWorkers = typeof maxWorkers === 'number' ? maxWorkers : Number(maxWorkers);
-      // Ensure LiteLLM backend dependencies are installed before running the CLI
+      // Pre-check: Verify embedding backend availability before proceeding with vector indexing
-      if (resolvedIndexType !== 'normal' && resolvedEmbeddingBackend === 'litellm') {
+      // This prevents silent degradation where vector indexing is skipped without error
-        const installResult = await ensureLiteLLMEmbedderReady();
+      if (resolvedIndexType !== 'normal') {
-        if (!installResult.success) {
+        if (resolvedEmbeddingBackend === 'litellm') {
-          return { success: false, error: installResult.error || 'Failed to prepare LiteLLM embedder', status: 500 };
+          // For litellm backend, ensure ccw-litellm is installed
          const installResult = await ensureLiteLLMEmbedderReady();
          if (!installResult.success) {
            return {
              success: false,
              error: installResult.error || 'LiteLLM embedding backend is not available. Please install ccw-litellm first.',
              status: 500
            };
          }
        } else {
          // For fastembed backend (default), check semantic dependencies
          const semanticStatus = await checkSemanticStatus();
          if (!semanticStatus.available) {
            return {
              success: false,
              error: semanticStatus.error || 'FastEmbed semantic backend is not available. Please install semantic dependencies first (CodeLens Settings → Install Semantic).',
              status: 500
            };
          }
        }
      }
--- a/ccw/src/core/routes/codexlens/semantic-handlers.ts
+++ b/ccw/src/core/routes/codexlens/semantic-handlers.ts
@@ -9,7 +9,7 @@ import {
  installSemantic,
 } from '../../../tools/codex-lens.js';
 import type { GpuMode } from '../../../tools/codex-lens.js';
-import { loadLiteLLMApiConfig } from '../../../config/litellm-api-config-manager.js';
+import { loadLiteLLMApiConfig, getAvailableModelsForType, getProvider, getAllProviders } from '../../../config/litellm-api-config-manager.js';
 import {
  isUvAvailable,
  createCodexLensUvManager,
@@ -317,16 +317,21 @@ export async function handleCodexLensSemanticRoutes(ctx: RouteContext): Promise<
        config_source: 'default'
      };
-      // Load LiteLLM endpoints for dropdown
+      // Load LiteLLM reranker models for dropdown (from litellm-api-config providers)
      try {
-        const litellmConfig = loadLiteLLMApiConfig(initialPath);
+        const availableRerankerModels = getAvailableModelsForType(initialPath, 'reranker');
-        if (litellmConfig.endpoints && Array.isArray(litellmConfig.endpoints)) {
+        if (availableRerankerModels && Array.isArray(availableRerankerModels)) {
-          rerankerConfig.litellm_endpoints = litellmConfig.endpoints.map(
+          // Return full model info for frontend to use
-            (ep: any) => ep.alias || ep.name || ep.baseUrl
+          (rerankerConfig as any).litellm_models = availableRerankerModels.map((m: any) => ({
-          ).filter(Boolean);
+            modelId: m.modelId,
            modelName: m.modelName,
            providers: m.providers
          }));
          // Keep litellm_endpoints for backward compatibility (just model IDs)
          rerankerConfig.litellm_endpoints = availableRerankerModels.map((m: any) => m.modelId);
        }
      } catch {
-        // LiteLLM config not available, continue with empty endpoints
+        // LiteLLM config not available, continue with empty models
      }
      // If CodexLens is installed, try to get actual config
@@ -407,6 +412,97 @@ export async function handleCodexLensSemanticRoutes(ctx: RouteContext): Promise<
      try {
        const updates: string[] = [];
        // Special handling for litellm backend - auto-configure from litellm-api-config
        if (resolvedBackend === 'litellm' && (resolvedModelName || resolvedLiteLLMEndpoint)) {
          const selectedModel = resolvedModelName || resolvedLiteLLMEndpoint;
          // Find the provider that has this model
          const providers = getAllProviders(initialPath);
          let providerWithModel: any = null;
          let foundModel: any = null;
          for (const provider of providers) {
            if (!provider.enabled || !provider.rerankerModels) continue;
            const model = provider.rerankerModels.find((m: any) => m.id === selectedModel && m.enabled);
            if (model) {
              providerWithModel = provider;
              foundModel = model;
              break;
            }
          }
          if (providerWithModel) {
            // Set backend to litellm
            const backendResult = await executeCodexLens(['config', 'set', 'reranker_backend', 'litellm', '--json']);
            if (backendResult.success) updates.push('backend');
            // Set model
            const modelResult = await executeCodexLens(['config', 'set', 'reranker_model', selectedModel, '--json']);
            if (modelResult.success) updates.push('model_name');
            // Auto-configure API credentials from provider
            // Write to CodexLens .env file for persistence
            const { writeFileSync, existsSync, readFileSync } = await import('fs');
            const { join } = await import('path');
            const { homedir } = await import('os');
            const codexlensDir = join(homedir(), '.codexlens');
            const envFile = join(codexlensDir, '.env');
            // Read existing .env content
            let envContent = '';
            if (existsSync(envFile)) {
              envContent = readFileSync(envFile, 'utf-8');
            }
            // Update or add RERANKER_API_KEY and RERANKER_API_BASE
            const apiKey = providerWithModel.apiKey;
            const apiBase = providerWithModel.apiBase;
            // Helper to update env var in content
            const updateEnvVar = (content: string, key: string, value: string): string => {
              const regex = new RegExp(`^${key}=.*$`, 'm');
              const newLine = `${key}="${value}"`;
              if (regex.test(content)) {
                return content.replace(regex, newLine);
              } else {
                return content.trim() + '\n' + newLine;
              }
            };
            if (apiKey) {
              envContent = updateEnvVar(envContent, 'RERANKER_API_KEY', apiKey);
              envContent = updateEnvVar(envContent, 'CODEXLENS_RERANKER_API_KEY', apiKey);
              process.env.RERANKER_API_KEY = apiKey;
              updates.push('api_key (auto-configured)');
            }
            if (apiBase) {
              envContent = updateEnvVar(envContent, 'RERANKER_API_BASE', apiBase);
              envContent = updateEnvVar(envContent, 'CODEXLENS_RERANKER_API_BASE', apiBase);
              process.env.RERANKER_API_BASE = apiBase;
              updates.push('api_base (auto-configured)');
            }
            // Write updated .env
            writeFileSync(envFile, envContent.trim() + '\n', 'utf-8');
            return {
              success: true,
              message: `LiteLLM backend configured with model: ${selectedModel}`,
              updated_fields: updates,
              provider: providerWithModel.name,
              auto_configured: true
            };
          } else {
            return {
              success: false,
              error: `Model "${selectedModel}" not found in any enabled LiteLLM provider. Please configure it in API Settings first.`,
              status: 400
            };
          }
        }
        // Standard handling for non-litellm backends
        // Set backend
        if (resolvedBackend) {
          const result = await executeCodexLens(['config', 'set', 'reranker_backend', resolvedBackend, '--json']);
@@ -425,8 +521,8 @@ export async function handleCodexLensSemanticRoutes(ctx: RouteContext): Promise<
          if (result.success) updates.push('api_provider');
        }
-        // Set LiteLLM endpoint
+        // Set LiteLLM endpoint (for backward compatibility)
-        if (resolvedLiteLLMEndpoint) {
+        if (resolvedLiteLLMEndpoint && resolvedBackend !== 'litellm') {
          const result = await executeCodexLens([
            'config',
            'set',
--- a/ccw/src/templates/dashboard-js/views/codexlens-manager.js
+++ b/ccw/src/templates/dashboard-js/views/codexlens-manager.js
@@ -6057,6 +6057,7 @@ function buildRerankerConfigContent(config) {
  const availableBackends = config.available_backends || ['onnx', 'api', 'litellm', 'legacy'];
  const apiProviders = config.api_providers || ['siliconflow', 'cohere', 'jina'];
  const litellmEndpoints = config.litellm_endpoints || [];
  const litellmModels = config.litellm_models || []; // Rich model info with providers
  // ONNX models
  const onnxModels = [
@@ -6067,11 +6068,12 @@ function buildRerankerConfigContent(config) {
  ];
  // Build backend options
  const hasLitellmModels = litellmModels.length > 0 || litellmEndpoints.length > 0;
  const backendOptions = availableBackends.map(function(b) {
    const labels = {
      'onnx': 'ONNX (Local, Optimum)',
-      'api': 'API (SiliconFlow/Cohere/Jina)',
+      'api': 'API (Manual Config)',
-      'litellm': 'LiteLLM (Custom Endpoint)',
+      'litellm': hasLitellmModels ? 'LiteLLM (Auto-configured)' : 'LiteLLM (Not configured)',
      'legacy': 'Legacy (SentenceTransformers)'
    };
    return '<option value="' + b + '" ' + (backend === b ? 'selected' : '') + '>' + (labels[b] || b) + '</option>';
@@ -6087,12 +6089,21 @@ function buildRerankerConfigContent(config) {
    return '<option value="' + m + '" ' + (modelName === m ? 'selected' : '') + '>' + m + '</option>';
  }).join('');
-  // Build LiteLLM endpoint options
+  // Build LiteLLM model options (use rich model data if available)
-  const litellmOptions = litellmEndpoints.length > 0
+  const litellmOptions = litellmModels.length > 0
-    ? litellmEndpoints.map(function(ep) {
+    ? litellmModels.map(function(m) {
-        return '<option value="' + ep + '">' + ep + '</option>';
+        // Display: "ModelName (Provider)" for better UX
        const providerNames = m.providers && m.providers.length > 0
          ? m.providers.join(', ')
          : 'Unknown';
        const displayName = m.modelName + ' (' + providerNames + ')';
        return '<option value="' + m.modelId + '">' + displayName + '</option>';
      }).join('')
-    : '<option value="" disabled>No endpoints configured</option>';
+    : (litellmEndpoints.length > 0
        ? litellmEndpoints.map(function(ep) {
            return '<option value="' + ep + '">' + ep + '</option>';
          }).join('')
        : '<option value="" disabled>No models configured</option>');
  return '<div class="modal-backdrop" id="rerankerConfigModal">' +
    '<div class="modal-container max-w-xl">' +
@@ -6162,13 +6173,16 @@ function buildRerankerConfigContent(config) {
        // LiteLLM Section (visible when backend=litellm)
        '<div id="rerankerLitellmSection" class="tool-config-section" style="display:' + (backend === 'litellm' ? 'block' : 'none') + '">' +
-          '<h4>' + (t('codexlens.litellmEndpoint') || 'LiteLLM Endpoint') + '</h4>' +
+          '<h4>' + (t('codexlens.litellmModel') || 'Reranker Model') + '</h4>' +
          '<select id="rerankerLitellmEndpoint" class="w-full px-3 py-2 border border-border rounded-lg bg-background text-sm">' +
            litellmOptions +
          '</select>' +
-          (litellmEndpoints.length === 0
+          ((litellmModels.length > 0 || litellmEndpoints.length > 0)
-            ? '<p class="text-xs text-warning mt-1">' + (t('codexlens.noEndpointsHint') || 'Configure LiteLLM endpoints in API Settings first') + '</p>'
+            ? '<div class="flex items-start gap-2 mt-2 p-2 bg-success/10 border border-success/30 rounded-lg text-xs">' +
-            : '') +
+                '<i data-lucide="check-circle" class="w-4 h-4 text-success mt-0.5 flex-shrink-0"></i>' +
                '<span class="text-muted-foreground">' + (t('codexlens.litellmAutoConfigHint') || 'API key and endpoint will be auto-configured from your LiteLLM API Settings') + '</span>' +
              '</div>'
            : '<p class="text-xs text-warning mt-1">' + (t('codexlens.noEndpointsHint') || 'Configure reranker models in API Settings first') + '</p>') +
        '</div>' +
        // Legacy Section (visible when backend=legacy)
--- a/codex-lens/src/codexlens/cli/embedding_manager.py
+++ b/codex-lens/src/codexlens/cli/embedding_manager.py
@@ -43,6 +43,73 @@ logger = logging.getLogger(__name__)
 EMBEDDING_BATCH_SIZE = 256
 def calculate_dynamic_batch_size(config, embedder) -> int:
    """Calculate batch size dynamically based on model token capacity.
    This function computes an optimal batch size by considering:
    - Maximum chunk character size from parsing rules
    - Estimated tokens per chunk (chars / chars_per_token_estimate)
    - Model's maximum token capacity
    - Utilization factor (default 80% to leave headroom)
    Args:
        config: Config object with api_batch_size_* settings
        embedder: Embedding model object with max_tokens property
    Returns:
        Calculated batch size, clamped to [1, api_batch_size_max]
    """
    # If dynamic calculation is disabled, return static value
    if not getattr(config, 'api_batch_size_dynamic', False):
        return getattr(config, 'api_batch_size', 8)
    # Get maximum chunk character size from parsing rules
    parsing_rules = getattr(config, 'parsing_rules', {})
    default_rules = parsing_rules.get('default', {})
    max_chunk_chars = default_rules.get('max_chunk_chars', 4000)
    # Get characters per token estimate
    chars_per_token = getattr(config, 'chars_per_token_estimate', 4)
    if chars_per_token <= 0:
        chars_per_token = 4  # Safe default
    # Estimate tokens per chunk
    estimated_tokens_per_chunk = max_chunk_chars / chars_per_token
    # Prevent division by zero
    if estimated_tokens_per_chunk <= 0:
        return getattr(config, 'api_batch_size', 8)
    # Get model's maximum token capacity
    model_max_tokens = getattr(embedder, 'max_tokens', 8192)
    # Get utilization factor (default 80%)
    utilization_factor = getattr(config, 'api_batch_size_utilization_factor', 0.8)
    if utilization_factor <= 0 or utilization_factor > 1:
        utilization_factor = 0.8
    # Calculate safe token limit
    safe_token_limit = model_max_tokens * utilization_factor
    # Calculate dynamic batch size
    dynamic_batch_size = int(safe_token_limit / estimated_tokens_per_chunk)
    # Get maximum batch size limit
    batch_size_max = getattr(config, 'api_batch_size_max', 2048)
    # Clamp to [1, batch_size_max]
    result = max(1, min(dynamic_batch_size, batch_size_max))
    logger.debug(
        "Dynamic batch size calculated: %d (max_chunk_chars=%d, chars_per_token=%d, "
        "model_max_tokens=%d, utilization=%.1f%%, limit=%d)",
        result, max_chunk_chars, chars_per_token, model_max_tokens,
        utilization_factor * 100, batch_size_max
    )
    return result
 def _build_categories_from_batch(chunk_batch: List[Tuple[Any, str]]) -> List[str]:
    """Build categories list from chunk batch for index-level category filtering.
@@ -464,6 +531,14 @@ def generate_embeddings(
                progress_callback(f"Using {endpoint_count} API endpoints with {strategy} strategy")
            progress_callback(f"Using model: {embedder.model_name} ({embedder.embedding_dim} dimensions)")
        # Calculate dynamic batch size based on model capacity
        from codexlens.config import Config
        batch_config = Config.load()
        effective_batch_size = calculate_dynamic_batch_size(batch_config, embedder)
        if progress_callback and batch_config.api_batch_size_dynamic:
            progress_callback(f"Dynamic batch size: {effective_batch_size} (model max_tokens={getattr(embedder, 'max_tokens', 8192)})")
    except Exception as e:
        return {
            "success": False,
@@ -477,7 +552,7 @@ def generate_embeddings(
    total_chunks_created = 0
    total_files_processed = 0
    FILE_BATCH_SIZE = 100  # Process 100 files at a time
-    # EMBEDDING_BATCH_SIZE is defined at module level (default: 256)
+    # effective_batch_size is calculated above (dynamic or EMBEDDING_BATCH_SIZE fallback)
    try:
        with VectorStore(index_path) as vector_store:
@@ -535,7 +610,7 @@ def generate_embeddings(
                        # Fallback to fixed-size batching for backward compatibility
                        def fixed_size_batches():
                            while True:
-                                batch = list(islice(chunk_generator, EMBEDDING_BATCH_SIZE))
+                                batch = list(islice(chunk_generator, effective_batch_size))
                                if not batch:
                                    break
                                yield batch
@@ -566,7 +641,7 @@ def generate_embeddings(
                        for attempt in range(max_retries + 1):
                            try:
                                batch_contents = [chunk.content for chunk, _ in chunk_batch]
-                                embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE)
+                                embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=effective_batch_size)
                                return batch_num, chunk_batch, embeddings_numpy, batch_files, None
                            except Exception as e:
@@ -614,7 +689,7 @@ def generate_embeddings(
                                try:
                                    # Generate embeddings
                                    batch_contents = [chunk.content for chunk, _ in chunk_batch]
-                                    embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=EMBEDDING_BATCH_SIZE)
+                                    embeddings_numpy = embedder.embed_to_numpy(batch_contents, batch_size=effective_batch_size)
                                    # Store embeddings with category
                                    categories = _build_categories_from_batch(chunk_batch)
@@ -1227,6 +1302,14 @@ def generate_dense_embeddings_centralized(
                progress_callback(f"Using {endpoint_count} API endpoints with {strategy} strategy")
            progress_callback(f"Using model: {embedder.model_name} ({embedder.embedding_dim} dimensions)")
        # Calculate dynamic batch size based on model capacity
        from codexlens.config import Config
        batch_config = Config.load()
        effective_batch_size = calculate_dynamic_batch_size(batch_config, embedder)
        if progress_callback and batch_config.api_batch_size_dynamic:
            progress_callback(f"Dynamic batch size: {effective_batch_size} (model max_tokens={getattr(embedder, 'max_tokens', 8192)})")
    except Exception as e:
        return {
            "success": False,
--- a/codex-lens/src/codexlens/config.py
+++ b/codex-lens/src/codexlens/config.py
@@ -170,6 +170,10 @@ class Config:
    # API concurrency settings
    api_max_workers: int = 4  # Max concurrent API calls for embedding/reranking
    api_batch_size: int = 8  # Batch size for API requests
    api_batch_size_dynamic: bool = False  # Enable dynamic batch size calculation
    api_batch_size_utilization_factor: float = 0.8  # Use 80% of model token capacity
    api_batch_size_max: int = 2048  # Absolute upper limit for batch size
    chars_per_token_estimate: int = 4  # Characters per token estimation ratio
    def __post_init__(self) -> None:
        try:
@@ -291,6 +295,10 @@ class Config:
            "api": {
                "max_workers": self.api_max_workers,
                "batch_size": self.api_batch_size,
                "batch_size_dynamic": self.api_batch_size_dynamic,
                "batch_size_utilization_factor": self.api_batch_size_utilization_factor,
                "batch_size_max": self.api_batch_size_max,
                "chars_per_token_estimate": self.chars_per_token_estimate,
            },
        }
        with open(self.settings_path, "w", encoding="utf-8") as f:
@@ -309,13 +317,16 @@ class Config:
            embedding = settings.get("embedding", {})
            if "backend" in embedding:
                backend = embedding["backend"]
                # Support 'api' as alias for 'litellm'
                if backend == "api":
                    backend = "litellm"
                if backend in {"fastembed", "litellm"}:
                    self.embedding_backend = backend
                else:
                    log.warning(
                        "Invalid embedding backend in %s: %r (expected 'fastembed' or 'litellm')",
                        self.settings_path,
-                        backend,
+                        embedding["backend"],
                    )
            if "model" in embedding:
                self.embedding_model = embedding["model"]
@@ -393,6 +404,14 @@ class Config:
                self.api_max_workers = api["max_workers"]
            if "batch_size" in api:
                self.api_batch_size = api["batch_size"]
            if "batch_size_dynamic" in api:
                self.api_batch_size_dynamic = api["batch_size_dynamic"]
            if "batch_size_utilization_factor" in api:
                self.api_batch_size_utilization_factor = api["batch_size_utilization_factor"]
            if "batch_size_max" in api:
                self.api_batch_size_max = api["batch_size_max"]
            if "chars_per_token_estimate" in api:
                self.chars_per_token_estimate = api["chars_per_token_estimate"]
        except Exception as exc:
            log.warning(
                "Failed to load settings from %s (%s): %s",
@@ -409,7 +428,7 @@ class Config:
        Priority: default → settings.json → .env (highest)
-        Supported variables:
+        Supported variables (with or without CODEXLENS_ prefix):
            EMBEDDING_MODEL: Override embedding model/profile
            EMBEDDING_BACKEND: Override embedding backend (fastembed/litellm)
            EMBEDDING_POOL_ENABLED: Enable embedding high availability pool
@@ -428,83 +447,103 @@ class Config:
        if not env_vars:
            return
        def get_env(key: str) -> str | None:
            """Get env var with or without CODEXLENS_ prefix."""
            # Check prefixed version first (Dashboard format), then unprefixed
            return env_vars.get(f"CODEXLENS_{key}") or env_vars.get(key)
        # Embedding overrides
-        if "EMBEDDING_MODEL" in env_vars:
+        embedding_model = get_env("EMBEDDING_MODEL")
-            self.embedding_model = env_vars["EMBEDDING_MODEL"]
+        if embedding_model:
            self.embedding_model = embedding_model
            log.debug("Overriding embedding_model from .env: %s", self.embedding_model)
-        if "EMBEDDING_BACKEND" in env_vars:
+        embedding_backend = get_env("EMBEDDING_BACKEND")
-            backend = env_vars["EMBEDDING_BACKEND"].lower()
+        if embedding_backend:
            backend = embedding_backend.lower()
            # Support 'api' as alias for 'litellm'
            if backend == "api":
                backend = "litellm"
            if backend in {"fastembed", "litellm"}:
                self.embedding_backend = backend
                log.debug("Overriding embedding_backend from .env: %s", backend)
            else:
-                log.warning("Invalid EMBEDDING_BACKEND in .env: %r", backend)
+                log.warning("Invalid EMBEDDING_BACKEND in .env: %r", embedding_backend)
-        if "EMBEDDING_POOL_ENABLED" in env_vars:
+        embedding_pool = get_env("EMBEDDING_POOL_ENABLED")
-            value = env_vars["EMBEDDING_POOL_ENABLED"].lower()
+        if embedding_pool:
            value = embedding_pool.lower()
            self.embedding_pool_enabled = value in {"true", "1", "yes", "on"}
            log.debug("Overriding embedding_pool_enabled from .env: %s", self.embedding_pool_enabled)
-        if "EMBEDDING_STRATEGY" in env_vars:
+        embedding_strategy = get_env("EMBEDDING_STRATEGY")
-            strategy = env_vars["EMBEDDING_STRATEGY"].lower()
+        if embedding_strategy:
            strategy = embedding_strategy.lower()
            if strategy in {"round_robin", "latency_aware", "weighted_random"}:
                self.embedding_strategy = strategy
                log.debug("Overriding embedding_strategy from .env: %s", strategy)
            else:
-                log.warning("Invalid EMBEDDING_STRATEGY in .env: %r", strategy)
+                log.warning("Invalid EMBEDDING_STRATEGY in .env: %r", embedding_strategy)
-        if "EMBEDDING_COOLDOWN" in env_vars:
+        embedding_cooldown = get_env("EMBEDDING_COOLDOWN")
        if embedding_cooldown:
            try:
-                self.embedding_cooldown = float(env_vars["EMBEDDING_COOLDOWN"])
+                self.embedding_cooldown = float(embedding_cooldown)
                log.debug("Overriding embedding_cooldown from .env: %s", self.embedding_cooldown)
            except ValueError:
-                log.warning("Invalid EMBEDDING_COOLDOWN in .env: %r", env_vars["EMBEDDING_COOLDOWN"])
+                log.warning("Invalid EMBEDDING_COOLDOWN in .env: %r", embedding_cooldown)
        # Reranker overrides
-        if "RERANKER_MODEL" in env_vars:
+        reranker_model = get_env("RERANKER_MODEL")
-            self.reranker_model = env_vars["RERANKER_MODEL"]
+        if reranker_model:
            self.reranker_model = reranker_model
            log.debug("Overriding reranker_model from .env: %s", self.reranker_model)
-        if "RERANKER_BACKEND" in env_vars:
+        reranker_backend = get_env("RERANKER_BACKEND")
-            backend = env_vars["RERANKER_BACKEND"].lower()
+        if reranker_backend:
            backend = reranker_backend.lower()
            if backend in {"fastembed", "onnx", "api", "litellm", "legacy"}:
                self.reranker_backend = backend
                log.debug("Overriding reranker_backend from .env: %s", backend)
            else:
-                log.warning("Invalid RERANKER_BACKEND in .env: %r", backend)
+                log.warning("Invalid RERANKER_BACKEND in .env: %r", reranker_backend)
-        if "RERANKER_ENABLED" in env_vars:
+        reranker_enabled = get_env("RERANKER_ENABLED")
-            value = env_vars["RERANKER_ENABLED"].lower()
+        if reranker_enabled:
            value = reranker_enabled.lower()
            self.enable_cross_encoder_rerank = value in {"true", "1", "yes", "on"}
            log.debug("Overriding reranker_enabled from .env: %s", self.enable_cross_encoder_rerank)
-        if "RERANKER_POOL_ENABLED" in env_vars:
+        reranker_pool = get_env("RERANKER_POOL_ENABLED")
-            value = env_vars["RERANKER_POOL_ENABLED"].lower()
+        if reranker_pool:
            value = reranker_pool.lower()
            self.reranker_pool_enabled = value in {"true", "1", "yes", "on"}
            log.debug("Overriding reranker_pool_enabled from .env: %s", self.reranker_pool_enabled)
-        if "RERANKER_STRATEGY" in env_vars:
+        reranker_strategy = get_env("RERANKER_STRATEGY")
-            strategy = env_vars["RERANKER_STRATEGY"].lower()
+        if reranker_strategy:
            strategy = reranker_strategy.lower()
            if strategy in {"round_robin", "latency_aware", "weighted_random"}:
                self.reranker_strategy = strategy
                log.debug("Overriding reranker_strategy from .env: %s", strategy)
            else:
-                log.warning("Invalid RERANKER_STRATEGY in .env: %r", strategy)
+                log.warning("Invalid RERANKER_STRATEGY in .env: %r", reranker_strategy)
-        if "RERANKER_COOLDOWN" in env_vars:
+        reranker_cooldown = get_env("RERANKER_COOLDOWN")
        if reranker_cooldown:
            try:
-                self.reranker_cooldown = float(env_vars["RERANKER_COOLDOWN"])
+                self.reranker_cooldown = float(reranker_cooldown)
                log.debug("Overriding reranker_cooldown from .env: %s", self.reranker_cooldown)
            except ValueError:
-                log.warning("Invalid RERANKER_COOLDOWN in .env: %r", env_vars["RERANKER_COOLDOWN"])
+                log.warning("Invalid RERANKER_COOLDOWN in .env: %r", reranker_cooldown)
-        if "RERANKER_MAX_INPUT_TOKENS" in env_vars:
+        reranker_max_tokens = get_env("RERANKER_MAX_INPUT_TOKENS")
        if reranker_max_tokens:
            try:
-                self.reranker_max_input_tokens = int(env_vars["RERANKER_MAX_INPUT_TOKENS"])
+                self.reranker_max_input_tokens = int(reranker_max_tokens)
                log.debug("Overriding reranker_max_input_tokens from .env: %s", self.reranker_max_input_tokens)
            except ValueError:
-                log.warning("Invalid RERANKER_MAX_INPUT_TOKENS in .env: %r", env_vars["RERANKER_MAX_INPUT_TOKENS"])
+                log.warning("Invalid RERANKER_MAX_INPUT_TOKENS in .env: %r", reranker_max_tokens)
    @classmethod
    def load(cls) -> "Config":