Remove LLM enhancement features and related components as per user request. This includes the deletion of source code files, CLI commands, front-end components, tests, scripts, and documentation associated with LLM functionality. Simplified dependencies and reduced complexity while retaining core vector search capabilities. Validation of changes confirmed successful removal and functionality.

2026-02-05 01:50:27 +08:00 · 2025-12-16 21:38:27 +08:00
parent d21066c282
commit b702791c2c
21 changed files with 375 additions and 7193 deletions
--- a/.claude/agents/cli-explore-agent.md
+++ b/.claude/agents/cli-explore-agent.md
@@ -85,7 +85,7 @@ MODE: analysis
 CONTEXT: @**/*
 EXPECTED: {from prompt}
 RULES: {from prompt, if template specified} | analysis=READ-ONLY
-" --tool gemini --cd {dir}
+" --tool gemini --cd {dir} 
 ```

 **Fallback Chain**: Gemini → Qwen → Codex → Bash-only
--- a/.claude/rules/active_memory.md
+++ b/.claude/rules/active_memory.md
@@ -1,13 +0,0 @@
-# Active Memory
-
-> Auto-generated understanding of frequently accessed files using GEMINI.
-> Last updated: 2025-12-14T08:59:41.526Z
-> Files analyzed: 10
-> CLI Tool: gemini
-
---
-
-[object Object]
-
---
-
--- a/ccw/src/templates/dashboard-js/components/cli-status.js
+++ b/ccw/src/templates/dashboard-js/components/cli-status.js
@@ -18,15 +18,6 @@ let nativeResumeEnabled = localStorage.getItem('ccw-native-resume') !== 'false';
 // Recursive Query settings (for hierarchical storage aggregation)
 let recursiveQueryEnabled = localStorage.getItem('ccw-recursive-query') !== 'false'; // default true

-// LLM Enhancement settings for Semantic Search
-let llmEnhancementSettings = {
-  enabled: localStorage.getItem('ccw-llm-enhancement-enabled') === 'true',
-  tool: localStorage.getItem('ccw-llm-enhancement-tool') || 'gemini',
-  fallbackTool: localStorage.getItem('ccw-llm-enhancement-fallback') || 'qwen',
-  batchSize: parseInt(localStorage.getItem('ccw-llm-enhancement-batch-size') || '5', 10),
-  timeoutMs: parseInt(localStorage.getItem('ccw-llm-enhancement-timeout') || '300000', 10)
-};
-
 // ========== Initialization ==========
 function initCliStatus() {
  // Load all statuses in one call using aggregated endpoint
@@ -242,17 +233,12 @@ function renderCliStatus() {
  `;

  // Semantic Search card (only show if CodexLens is installed)
-  const llmStatusBadge = llmEnhancementSettings.enabled 
-    ? `<span class="badge px-1.5 py-0.5 text-xs rounded bg-success/20 text-success">LLM</span>`
-    : '';
  const semanticHtml = codexLensStatus.ready ? `
-    <div class="cli-tool-card tool-semantic clickable ${semanticStatus.available ? 'available' : 'unavailable'}"
-         onclick="openSemanticSettingsModal()">
+    <div class="cli-tool-card tool-semantic ${semanticStatus.available ? 'available' : 'unavailable'}">
      <div class="cli-tool-header">
        <span class="cli-tool-status ${semanticStatus.available ? 'status-available' : 'status-unavailable'}"></span>
        <span class="cli-tool-name">Semantic Search</span>
        <span class="badge px-1.5 py-0.5 text-xs rounded ${semanticStatus.available ? 'bg-primary/20 text-primary' : 'bg-muted text-muted-foreground'}">AI</span>
-        ${llmStatusBadge}
      </div>
      <div class="cli-tool-desc text-xs text-muted-foreground mt-1">
        ${semanticStatus.available ? 'AI-powered code understanding' : 'Natural language code search'}
@@ -265,27 +251,17 @@ function renderCliStatus() {
      </div>
      <div class="cli-tool-actions flex flex-col gap-2 mt-3">
        ${!semanticStatus.available ? `
-          <button class="btn-sm btn-primary w-full flex items-center justify-center gap-1" onclick="event.stopPropagation(); openSemanticInstallWizard()">
+          <button class="btn-sm btn-primary w-full flex items-center justify-center gap-1" onclick="openSemanticInstallWizard()">
            <i data-lucide="brain" class="w-3 h-3"></i> Install AI Model
          </button>
-          <div class="flex items-center justify-between w-full mt-1">
-            <div class="flex items-center gap-1 text-xs text-muted-foreground">
-              <i data-lucide="hard-drive" class="w-3 h-3"></i>
-              <span>~130MB</span>
-            </div>
-            <button class="btn-sm btn-outline flex items-center gap-1" onclick="event.stopPropagation(); openSemanticSettingsModal()">
-              <i data-lucide="settings" class="w-3 h-3"></i>
-            </button>
+          <div class="flex items-center gap-1 text-xs text-muted-foreground mt-1">
+            <i data-lucide="hard-drive" class="w-3 h-3"></i>
+            <span>~130MB</span>
          </div>
        ` : `
-          <div class="flex items-center justify-between w-full">
-            <div class="flex items-center gap-1 text-xs text-muted-foreground">
-              <i data-lucide="cpu" class="w-3 h-3"></i>
-              <span>bge-small-en-v1.5</span>
-            </div>
-            <button class="btn-sm btn-outline flex items-center gap-1" onclick="event.stopPropagation(); openSemanticSettingsModal()">
-              <i data-lucide="settings" class="w-3 h-3"></i>
-            </button>
+          <div class="flex items-center gap-1 text-xs text-muted-foreground">
+            <i data-lucide="cpu" class="w-3 h-3"></i>
+            <span>bge-small-en-v1.5</span>
          </div>
        `}
      </div>
@@ -991,618 +967,3 @@ async function startSemanticInstall() {
  }
 }

-// ========== Semantic Search Settings Modal ==========
-function openSemanticSettingsModal() {
-  const availableTools = Object.entries(cliToolStatus)
-    .filter(function(entry) { return entry[1].available; })
-    .map(function(entry) { return entry[0]; });
-
-  const modal = document.createElement('div');
-  modal.id = 'semanticSettingsModal';
-  modal.className = 'fixed inset-0 bg-black/50 flex items-center justify-center z-50';
-  modal.onclick = function(e) { if (e.target === modal) closeSemanticSettingsModal(); };
-
-  const toolOptions = availableTools.map(function(tool) {
-    return '<option value="' + tool + '"' + (llmEnhancementSettings.tool === tool ? ' selected' : '') + '>' +
-      tool.charAt(0).toUpperCase() + tool.slice(1) + '</option>';
-  }).join('');
-
-  const fallbackOptions = '<option value="">' + t('semantic.none') + '</option>' + availableTools.map(function(tool) {
-    return '<option value="' + tool + '"' + (llmEnhancementSettings.fallbackTool === tool ? ' selected' : '') + '>' +
-      tool.charAt(0).toUpperCase() + tool.slice(1) + '</option>';
-  }).join('');
-
-  const disabled = !llmEnhancementSettings.enabled ? 'disabled' : '';
-  const opacityClass = !llmEnhancementSettings.enabled ? 'opacity-50' : '';
-
-  modal.innerHTML =
-    '<div class="bg-card rounded-lg shadow-xl w-full max-w-lg mx-4 overflow-hidden" onclick="event.stopPropagation()">' +
-      '<div class="p-6">' +
-        '<div class="flex items-center gap-3 mb-4">' +
-          '<div class="w-10 h-10 rounded-full bg-primary/10 flex items-center justify-center">' +
-            '<i data-lucide="sparkles" class="w-5 h-5 text-primary"></i>' +
-          '</div>' +
-          '<div>' +
-            '<h3 class="text-lg font-semibold">' + t('semantic.settings') + '</h3>' +
-            '<p class="text-sm text-muted-foreground">' + t('semantic.configDesc') + '</p>' +
-          '</div>' +
-        '</div>' +
-        '<div class="space-y-4">' +
-          '<div class="flex items-center justify-between p-4 bg-muted/50 rounded-lg">' +
-            '<div>' +
-              '<h4 class="font-medium flex items-center gap-2">' +
-                '<i data-lucide="brain" class="w-4 h-4"></i>' + t('semantic.llmEnhancement') + '</h4>' +
-              '<p class="text-sm text-muted-foreground mt-1">' + t('semantic.llmDesc') + '</p>' +
-            '</div>' +
-            '<label class="cli-toggle">' +
-              '<input type="checkbox" id="llmEnhancementToggle" ' + (llmEnhancementSettings.enabled ? 'checked' : '') +
-              ' onchange="toggleLlmEnhancement(this.checked)">' +
-              '<span class="cli-toggle-slider"></span>' +
-            '</label>' +
-          '</div>' +
-          '<div class="p-4 bg-muted/30 rounded-lg space-y-4 ' + opacityClass + '" id="llmSettingsSection">' +
-            '<div class="grid grid-cols-2 gap-4">' +
-              '<div>' +
-                '<label class="block text-sm font-medium mb-2">' +
-                  '<i data-lucide="cpu" class="w-3 h-3 inline mr-1"></i>' + t('semantic.primaryTool') + '</label>' +
-                '<select class="cli-setting-select w-full" id="llmToolSelect" onchange="updateLlmTool(this.value)" ' + disabled + '>' + toolOptions + '</select>' +
-              '</div>' +
-              '<div>' +
-                '<label class="block text-sm font-medium mb-2">' +
-                  '<i data-lucide="refresh-cw" class="w-3 h-3 inline mr-1"></i>' + t('semantic.fallbackTool') + '</label>' +
-                '<select class="cli-setting-select w-full" id="llmFallbackSelect" onchange="updateLlmFallback(this.value)" ' + disabled + '>' + fallbackOptions + '</select>' +
-              '</div>' +
-            '</div>' +
-            '<div class="grid grid-cols-2 gap-4">' +
-              '<div>' +
-                '<label class="block text-sm font-medium mb-2">' +
-                  '<i data-lucide="layers" class="w-3 h-3 inline mr-1"></i>' + t('semantic.batchSize') + '</label>' +
-                '<select class="cli-setting-select w-full" id="llmBatchSelect" onchange="updateLlmBatchSize(this.value)" ' + disabled + '>' +
-                  '<option value="1"' + (llmEnhancementSettings.batchSize === 1 ? ' selected' : '') + '>1 ' + t('semantic.file') + '</option>' +
-                  '<option value="3"' + (llmEnhancementSettings.batchSize === 3 ? ' selected' : '') + '>3 ' + t('semantic.files') + '</option>' +
-                  '<option value="5"' + (llmEnhancementSettings.batchSize === 5 ? ' selected' : '') + '>5 ' + t('semantic.files') + '</option>' +
-                  '<option value="10"' + (llmEnhancementSettings.batchSize === 10 ? ' selected' : '') + '>10 ' + t('semantic.files') + '</option>' +
-                '</select>' +
-              '</div>' +
-              '<div>' +
-                '<label class="block text-sm font-medium mb-2">' +
-                  '<i data-lucide="clock" class="w-3 h-3 inline mr-1"></i>' + t('semantic.timeout') + '</label>' +
-                '<select class="cli-setting-select w-full" id="llmTimeoutSelect" onchange="updateLlmTimeout(this.value)" ' + disabled + '>' +
-                  '<option value="60000"' + (llmEnhancementSettings.timeoutMs === 60000 ? ' selected' : '') + '>1 min</option>' +
-                  '<option value="180000"' + (llmEnhancementSettings.timeoutMs === 180000 ? ' selected' : '') + '>3 min</option>' +
-                  '<option value="300000"' + (llmEnhancementSettings.timeoutMs === 300000 ? ' selected' : '') + '>5 min</option>' +
-                  '<option value="600000"' + (llmEnhancementSettings.timeoutMs === 600000 ? ' selected' : '') + '>10 min</option>' +
-                '</select>' +
-              '</div>' +
-            '</div>' +
-          '</div>' +
-          '<div class="bg-primary/5 border border-primary/20 rounded-lg p-3">' +
-            '<div class="flex items-start gap-2">' +
-              '<i data-lucide="info" class="w-4 h-4 text-primary mt-0.5"></i>' +
-              '<div class="text-sm text-muted-foreground">' +
-                '<p>' + t('semantic.enhanceInfo') + '</p>' +
-                '<p class="mt-1">' + t('semantic.enhanceCommand') + ' <code class="bg-muted px-1 rounded">codex-lens enhance</code> ' + t('semantic.enhanceAfterEnable') + '</p>' +
-              '</div>' +
-            '</div>' +
-          '</div>' +
-          '<div class="flex gap-2 pt-2">' +
-            '<button class="btn-sm btn-outline flex items-center gap-1 flex-1" onclick="runEnhanceCommand()" ' + disabled + '>' +
-              '<i data-lucide="zap" class="w-3 h-3"></i>' + t('semantic.runEnhanceNow') + '</button>' +
-            '<button class="btn-sm btn-outline flex items-center gap-1 flex-1" onclick="viewEnhanceStatus()">' +
-              '<i data-lucide="bar-chart-2" class="w-3 h-3"></i>' + t('semantic.viewStatus') + '</button>' +
-          '</div>' +
-          '<div class="border-t border-border my-4"></div>' +
-          '<div>' +
-            '<h4 class="font-medium mb-3 flex items-center gap-2">' +
-              '<i data-lucide="search" class="w-4 h-4"></i>' + t('semantic.testSearch') + '</h4>' +
-            '<div class="space-y-3">' +
-              '<div>' +
-                '<input type="text" id="semanticSearchInput" class="tool-config-input w-full" ' +
-                  'placeholder="' + t('semantic.searchPlaceholder') + '" />' +
-              '</div>' +
-              '<div>' +
-                '<button class="btn-sm btn-primary w-full" id="runSemanticSearchBtn">' +
-                  '<i data-lucide="search" class="w-3 h-3"></i> ' + t('semantic.runSearch') +
-                '</button>' +
-              '</div>' +
-              '<div id="semanticSearchResults" class="hidden">' +
-                '<div class="bg-muted/30 rounded-lg p-3 max-h-64 overflow-y-auto">' +
-                  '<div class="flex items-center justify-between mb-2">' +
-                    '<p class="text-sm font-medium">' + t('codexlens.results') + ':</p>' +
-                    '<span id="semanticResultCount" class="text-xs text-muted-foreground"></span>' +
-                  '</div>' +
-                  '<pre id="semanticResultContent" class="text-xs font-mono whitespace-pre-wrap break-all"></pre>' +
-                '</div>' +
-              '</div>' +
-            '</div>' +
-          '</div>' +
-        '</div>' +
-      '</div>' +
-      '<div class="border-t border-border p-4 flex justify-end gap-3 bg-muted/30">' +
-        '<button class="btn-outline px-4 py-2" onclick="closeSemanticSettingsModal()">' + t('semantic.close') + '</button>' +
-      '</div>' +
-    '</div>';
-
-  document.body.appendChild(modal);
-
-  // Add semantic search button handler
-  setTimeout(function() {
-    var runSemanticSearchBtn = document.getElementById('runSemanticSearchBtn');
-    if (runSemanticSearchBtn) {
-      runSemanticSearchBtn.onclick = async function() {
-        var query = document.getElementById('semanticSearchInput').value.trim();
-        var resultsDiv = document.getElementById('semanticSearchResults');
-        var resultCount = document.getElementById('semanticResultCount');
-        var resultContent = document.getElementById('semanticResultContent');
-
-        if (!query) {
-          showRefreshToast(t('codexlens.enterQuery'), 'warning');
-          return;
-        }
-
-        runSemanticSearchBtn.disabled = true;
-        runSemanticSearchBtn.innerHTML = '<span class="animate-pulse">' + t('codexlens.searching') + '</span>';
-        resultsDiv.classList.add('hidden');
-
-        try {
-          var params = new URLSearchParams({
-            query: query,
-            mode: 'semantic',
-            limit: '10'
-          });
-
-          var response = await fetch('/api/codexlens/search?' + params.toString());
-          var result = await response.json();
-
-          console.log('[Semantic Search Test] Result:', result);
-
-          if (result.success) {
-            var results = result.results || [];
-            resultCount.textContent = results.length + ' ' + t('codexlens.resultsCount');
-            resultContent.textContent = JSON.stringify(results, null, 2);
-            resultsDiv.classList.remove('hidden');
-            showRefreshToast(t('codexlens.searchCompleted') + ': ' + results.length + ' ' + t('codexlens.resultsCount'), 'success');
-          } else {
-            resultContent.textContent = t('common.error') + ': ' + (result.error || t('common.unknownError'));
-            resultsDiv.classList.remove('hidden');
-            showRefreshToast(t('codexlens.searchFailed') + ': ' + result.error, 'error');
-          }
-
-          runSemanticSearchBtn.disabled = false;
-          runSemanticSearchBtn.innerHTML = '<i data-lucide="search" class="w-3 h-3"></i> ' + t('semantic.runSearch');
-          if (window.lucide) lucide.createIcons();
-        } catch (err) {
-          console.error('[Semantic Search Test] Error:', err);
-          resultContent.textContent = t('common.exception') + ': ' + err.message;
-          resultsDiv.classList.remove('hidden');
-          showRefreshToast(t('common.error') + ': ' + err.message, 'error');
-          runSemanticSearchBtn.disabled = false;
-          runSemanticSearchBtn.innerHTML = '<i data-lucide="search" class="w-3 h-3"></i> ' + t('semantic.runSearch');
-          if (window.lucide) lucide.createIcons();
-        }
-      };
-    }
-  }, 100);
-
-  var handleEscape = function(e) {
-    if (e.key === 'Escape') {
-      closeSemanticSettingsModal();
-      document.removeEventListener('keydown', handleEscape);
-    }
-  };
-  document.addEventListener('keydown', handleEscape);
-
-  if (window.lucide) {
-    lucide.createIcons();
-  }
-}
-
-function closeSemanticSettingsModal() {
-  var modal = document.getElementById('semanticSettingsModal');
-  if (modal) modal.remove();
-}
-
-function toggleLlmEnhancement(enabled) {
-  llmEnhancementSettings.enabled = enabled;
-  localStorage.setItem('ccw-llm-enhancement-enabled', enabled.toString());
-
-  var settingsSection = document.getElementById('llmSettingsSection');
-  if (settingsSection) {
-    settingsSection.classList.toggle('opacity-50', !enabled);
-    settingsSection.querySelectorAll('select').forEach(function(el) { el.disabled = !enabled; });
-  }
-
-  renderCliStatus();
-  showRefreshToast(t('semantic.llmEnhancement') + ' ' + (enabled ? t('semantic.enabled') : t('semantic.disabled')), 'success');
-}
-
-function updateLlmTool(tool) {
-  llmEnhancementSettings.tool = tool;
-  localStorage.setItem('ccw-llm-enhancement-tool', tool);
-  showRefreshToast(t('semantic.toolSetTo') + ' ' + tool, 'success');
-}
-
-function updateLlmFallback(tool) {
-  llmEnhancementSettings.fallbackTool = tool;
-  localStorage.setItem('ccw-llm-enhancement-fallback', tool);
-  showRefreshToast(t('semantic.fallbackSetTo') + ' ' + (tool || t('semantic.none')), 'success');
-}
-
-function updateLlmBatchSize(size) {
-  llmEnhancementSettings.batchSize = parseInt(size, 10);
-  localStorage.setItem('ccw-llm-enhancement-batch-size', size);
-  showRefreshToast(t('semantic.batchSetTo') + ' ' + size + ' ' + t('semantic.files'), 'success');
-}
-
-function updateLlmTimeout(ms) {
-  llmEnhancementSettings.timeoutMs = parseInt(ms, 10);
-  localStorage.setItem('ccw-llm-enhancement-timeout', ms);
-  var mins = parseInt(ms, 10) / 60000;
-  showRefreshToast(t('semantic.timeoutSetTo') + ' ' + mins + ' ' + (mins > 1 ? t('semantic.minutes') : t('semantic.minute')), 'success');
-}
-
-async function runEnhanceCommand() {
-  if (!llmEnhancementSettings.enabled) {
-    showRefreshToast(t('semantic.enableFirst'), 'warning');
-    return;
-  }
-
-  showRefreshToast('Starting LLM enhancement...', 'info');
-  closeSemanticSettingsModal();
-
-  try {
-    var response = await fetch('/api/codexlens/enhance', {
-      method: 'POST',
-      headers: { 'Content-Type': 'application/json' },
-      body: JSON.stringify({
-        path: projectPath,
-        tool: llmEnhancementSettings.tool,
-        batchSize: llmEnhancementSettings.batchSize,
-        timeoutMs: llmEnhancementSettings.timeoutMs
-      })
-    });
-
-    var result = await response.json();
-    if (result.success) {
-      var enhanced = result.result?.enhanced || 0;
-      showRefreshToast('Enhanced ' + enhanced + ' files with LLM', 'success');
-    } else {
-      showRefreshToast('Enhance failed: ' + result.error, 'error');
-    }
-  } catch (err) {
-    showRefreshToast('Enhance error: ' + err.message, 'error');
-  }
-}
-
-function viewEnhanceStatus() {
-  openSemanticMetadataViewer();
-}
-
-// ========== Semantic Metadata Viewer ==========
-var semanticMetadataCache = {
-  entries: [],
-  total: 0,
-  offset: 0,
-  limit: 50,
-  loading: false
-};
-
-async function openSemanticMetadataViewer() {
-  closeSemanticSettingsModal();
-
-  var modal = document.createElement('div');
-  modal.id = 'semanticMetadataModal';
-  modal.className = 'generic-modal-overlay';
-  modal.onclick = function(e) { if (e.target === modal) closeSemanticMetadataViewer(); };
-
-  modal.innerHTML =
-    '<div class="generic-modal large" onclick="event.stopPropagation()">' +
-      '<div class="generic-modal-header">' +
-        '<div class="flex items-center gap-3">' +
-          '<i data-lucide="database" class="w-5 h-5 text-primary"></i>' +
-          '<h3 class="generic-modal-title">Semantic Metadata Browser</h3>' +
-          '<span id="semanticMetadataCount" class="badge bg-muted text-muted-foreground px-2 py-0.5 text-xs rounded">Loading...</span>' +
-        '</div>' +
-        '<button class="generic-modal-close" onclick="closeSemanticMetadataViewer()">' +
-          '<i data-lucide="x" class="w-4 h-4"></i>' +
-        '</button>' +
-      '</div>' +
-      '<div class="generic-modal-body p-0">' +
-        '<div class="semantic-viewer-toolbar">' +
-          '<div class="flex items-center gap-3">' +
-            '<select id="semanticToolFilter" class="cli-setting-select" onchange="filterSemanticByTool(this.value)">' +
-              '<option value="">All Tools</option>' +
-              '<option value="gemini">Gemini</option>' +
-              '<option value="qwen">Qwen</option>' +
-            '</select>' +
-            '<button class="btn-sm btn-outline flex items-center gap-1" onclick="refreshSemanticMetadata()">' +
-              '<i data-lucide="refresh-cw" class="w-3 h-3"></i> Refresh' +
-            '</button>' +
-          '</div>' +
-          '<div class="flex items-center gap-2 text-sm text-muted-foreground">' +
-            '<span id="semanticPaginationInfo">-</span>' +
-          '</div>' +
-        '</div>' +
-        '<div id="semanticMetadataTableContainer" class="semantic-table-container">' +
-          '<div class="semantic-loading">' +
-            '<div class="animate-spin w-6 h-6 border-2 border-primary border-t-transparent rounded-full"></div>' +
-            '<span>Loading metadata...</span>' +
-          '</div>' +
-        '</div>' +
-        '<div class="semantic-viewer-footer">' +
-          '<button id="semanticPrevBtn" class="btn-sm btn-outline" onclick="semanticPrevPage()" disabled>' +
-            '<i data-lucide="chevron-left" class="w-4 h-4"></i> Previous' +
-          '</button>' +
-          '<div class="flex items-center gap-2">' +
-            '<span class="text-sm text-muted-foreground">Page</span>' +
-            '<select id="semanticPageSelect" class="cli-setting-select" onchange="semanticGoToPage(this.value)">' +
-              '<option value="0">1</option>' +
-            '</select>' +
-          '</div>' +
-          '<button id="semanticNextBtn" class="btn-sm btn-outline" onclick="semanticNextPage()" disabled>' +
-            'Next <i data-lucide="chevron-right" class="w-4 h-4"></i>' +
-          '</button>' +
-        '</div>' +
-      '</div>' +
-    '</div>';
-
-  document.body.appendChild(modal);
-
-  requestAnimationFrame(function() {
-    modal.classList.add('active');
-  });
-
-  var handleEscape = function(e) {
-    if (e.key === 'Escape') {
-      closeSemanticMetadataViewer();
-      document.removeEventListener('keydown', handleEscape);
-    }
-  };
-  document.addEventListener('keydown', handleEscape);
-
-  if (window.lucide) {
-    lucide.createIcons();
-  }
-
-  await loadSemanticMetadata();
-}
-
-function closeSemanticMetadataViewer() {
-  var modal = document.getElementById('semanticMetadataModal');
-  if (modal) {
-    modal.classList.remove('active');
-    setTimeout(function() { modal.remove(); }, 200);
-  }
-}
-
-async function loadSemanticMetadata(offset, toolFilter) {
-  offset = typeof offset === 'number' ? offset : semanticMetadataCache.offset;
-  toolFilter = toolFilter !== undefined ? toolFilter : (document.getElementById('semanticToolFilter')?.value || '');
-
-  semanticMetadataCache.loading = true;
-
-  var container = document.getElementById('semanticMetadataTableContainer');
-  if (container) {
-    container.innerHTML =
-      '<div class="semantic-loading">' +
-        '<div class="animate-spin w-6 h-6 border-2 border-primary border-t-transparent rounded-full"></div>' +
-        '<span>Loading metadata...</span>' +
-      '</div>';
-  }
-
-  try {
-    var url = '/api/codexlens/semantic/metadata?offset=' + offset + '&limit=' + semanticMetadataCache.limit;
-    if (toolFilter) {
-      url += '&tool=' + encodeURIComponent(toolFilter);
-    }
-
-    var response = await fetch(url);
-    var data = await response.json();
-
-    if (data.success && data.result) {
-      semanticMetadataCache.entries = data.result.entries || [];
-      semanticMetadataCache.total = data.result.total || 0;
-      semanticMetadataCache.offset = offset;
-
-      renderSemanticMetadataTable();
-      updateSemanticPagination();
-    } else {
-      container.innerHTML =
-        '<div class="semantic-empty">' +
-          '<i data-lucide="alert-circle" class="w-8 h-8 text-muted-foreground"></i>' +
-          '<p>Error loading metadata: ' + (data.error || 'Unknown error') + '</p>' +
-        '</div>';
-      if (window.lucide) lucide.createIcons();
-    }
-  } catch (err) {
-    container.innerHTML =
-      '<div class="semantic-empty">' +
-        '<i data-lucide="alert-circle" class="w-8 h-8 text-muted-foreground"></i>' +
-        '<p>Error: ' + err.message + '</p>' +
-      '</div>';
-    if (window.lucide) lucide.createIcons();
-  }
-
-  semanticMetadataCache.loading = false;
-}
-
-function escapeHtmlSemantic(text) {
-  if (!text) return '';
-  var div = document.createElement('div');
-  div.textContent = text;
-  return div.innerHTML;
-}
-
-function renderSemanticMetadataTable() {
-  var container = document.getElementById('semanticMetadataTableContainer');
-  if (!container) return;
-
-  var entries = semanticMetadataCache.entries;
-
-  if (!entries.length) {
-    container.innerHTML =
-      '<div class="semantic-empty">' +
-        '<i data-lucide="database" class="w-12 h-12 text-muted-foreground mb-3"></i>' +
-        '<p class="text-lg font-medium">No semantic metadata found</p>' +
-        '<p class="text-sm text-muted-foreground mt-1">Run \'codex-lens enhance\' to generate metadata for indexed files.</p>' +
-        '<button class="btn-sm btn-primary mt-4" onclick="closeSemanticMetadataViewer(); runEnhanceCommand();">' +
-          '<i data-lucide="zap" class="w-3 h-3 mr-1"></i> Run Enhance' +
-        '</button>' +
-      '</div>';
-    if (window.lucide) lucide.createIcons();
-    return;
-  }
-
-  var rows = entries.map(function(entry, idx) {
-    var keywordsHtml = (entry.keywords || []).slice(0, 4).map(function(k) {
-      return '<span class="semantic-keyword">' + escapeHtmlSemantic(k) + '</span>';
-    }).join('');
-    if ((entry.keywords || []).length > 4) {
-      keywordsHtml += '<span class="semantic-keyword-more">+' + (entry.keywords.length - 4) + '</span>';
-    }
-
-    var date = entry.generated_at ? new Date(entry.generated_at * 1000).toLocaleDateString() : '-';
-
-    return (
-      '<tr class="semantic-row" onclick="toggleSemanticDetail(' + idx + ')">' +
-        '<td class="semantic-cell-file">' +
-          '<div class="flex items-center gap-2">' +
-            '<i data-lucide="file-code" class="w-4 h-4 text-muted-foreground"></i>' +
-            '<span class="font-medium">' + escapeHtmlSemantic(entry.file_name || '-') + '</span>' +
-          '</div>' +
-          '<div class="text-xs text-muted-foreground truncate" title="' + escapeHtmlSemantic(entry.full_path || '') + '">' +
-            escapeHtmlSemantic(entry.full_path || '-') +
-          '</div>' +
-        '</td>' +
-        '<td class="semantic-cell-lang">' + escapeHtmlSemantic(entry.language || '-') + '</td>' +
-        '<td class="semantic-cell-purpose">' + escapeHtmlSemantic((entry.purpose || '-').substring(0, 50)) +
-          ((entry.purpose || '').length > 50 ? '...' : '') + '</td>' +
-        '<td class="semantic-cell-keywords">' + (keywordsHtml || '-') + '</td>' +
-        '<td class="semantic-cell-tool">' +
-          '<span class="tool-badge tool-' + (entry.llm_tool || 'unknown') + '">' +
-            escapeHtmlSemantic(entry.llm_tool || '-') +
-          '</span>' +
-        '</td>' +
-        '<td class="semantic-cell-date">' + date + '</td>' +
-      '</tr>' +
-      '<tr id="semanticDetail' + idx + '" class="semantic-detail-row hidden">' +
-        '<td colspan="6">' +
-          '<div class="semantic-detail-content">' +
-            '<div class="semantic-detail-section">' +
-              '<h4><i data-lucide="file-text" class="w-3 h-3"></i> Summary</h4>' +
-              '<p>' + escapeHtmlSemantic(entry.summary || 'No summary available') + '</p>' +
-            '</div>' +
-            '<div class="semantic-detail-section">' +
-              '<h4><i data-lucide="tag" class="w-3 h-3"></i> All Keywords</h4>' +
-              '<div class="semantic-keywords-full">' +
-                (entry.keywords || []).map(function(k) {
-                  return '<span class="semantic-keyword">' + escapeHtmlSemantic(k) + '</span>';
-                }).join('') +
-              '</div>' +
-            '</div>' +
-            '<div class="semantic-detail-meta">' +
-              '<span><i data-lucide="hash" class="w-3 h-3"></i> ' + (entry.line_count || 0) + ' lines</span>' +
-              '<span><i data-lucide="cpu" class="w-3 h-3"></i> ' + escapeHtmlSemantic(entry.llm_tool || 'Unknown') + '</span>' +
-              '<span><i data-lucide="calendar" class="w-3 h-3"></i> ' + date + '</span>' +
-            '</div>' +
-          '</div>' +
-        '</td>' +
-      '</tr>'
-    );
-  }).join('');
-
-  container.innerHTML =
-    '<table class="semantic-table">' +
-      '<thead>' +
-        '<tr>' +
-          '<th>File</th>' +
-          '<th>Language</th>' +
-          '<th>Purpose</th>' +
-          '<th>Keywords</th>' +
-          '<th>Tool</th>' +
-          '<th>Date</th>' +
-        '</tr>' +
-      '</thead>' +
-      '<tbody>' + rows + '</tbody>' +
-    '</table>';
-
-  if (window.lucide) lucide.createIcons();
-}
-
-function toggleSemanticDetail(idx) {
-  var detailRow = document.getElementById('semanticDetail' + idx);
-  if (detailRow) {
-    detailRow.classList.toggle('hidden');
-    if (window.lucide) lucide.createIcons();
-  }
-}
-
-function updateSemanticPagination() {
-  var total = semanticMetadataCache.total;
-  var offset = semanticMetadataCache.offset;
-  var limit = semanticMetadataCache.limit;
-  var entries = semanticMetadataCache.entries;
-
-  var countBadge = document.getElementById('semanticMetadataCount');
-  if (countBadge) {
-    countBadge.textContent = total + ' entries';
-  }
-
-  var paginationInfo = document.getElementById('semanticPaginationInfo');
-  if (paginationInfo) {
-    if (total > 0) {
-      paginationInfo.textContent = (offset + 1) + '-' + (offset + entries.length) + ' of ' + total;
-    } else {
-      paginationInfo.textContent = 'No entries';
-    }
-  }
-
-  var pageSelect = document.getElementById('semanticPageSelect');
-  if (pageSelect) {
-    var totalPages = Math.ceil(total / limit) || 1;
-    var currentPage = Math.floor(offset / limit);
-
-    pageSelect.innerHTML = '';
-    for (var i = 0; i < totalPages; i++) {
-      var opt = document.createElement('option');
-      opt.value = i;
-      opt.textContent = i + 1;
-      if (i === currentPage) opt.selected = true;
-      pageSelect.appendChild(opt);
-    }
-  }
-
-  var prevBtn = document.getElementById('semanticPrevBtn');
-  var nextBtn = document.getElementById('semanticNextBtn');
-  if (prevBtn) prevBtn.disabled = offset === 0;
-  if (nextBtn) nextBtn.disabled = offset + limit >= total;
-}
-
-function semanticPrevPage() {
-  if (semanticMetadataCache.offset > 0) {
-    loadSemanticMetadata(Math.max(0, semanticMetadataCache.offset - semanticMetadataCache.limit));
-  }
-}
-
-function semanticNextPage() {
-  if (semanticMetadataCache.offset + semanticMetadataCache.limit < semanticMetadataCache.total) {
-    loadSemanticMetadata(semanticMetadataCache.offset + semanticMetadataCache.limit);
-  }
-}
-
-function semanticGoToPage(pageIndex) {
-  var offset = parseInt(pageIndex, 10) * semanticMetadataCache.limit;
-  loadSemanticMetadata(offset);
-}
-
-function filterSemanticByTool(tool) {
-  loadSemanticMetadata(0, tool);
-}
-
-function refreshSemanticMetadata() {
-  loadSemanticMetadata(semanticMetadataCache.offset);
-}
-
-function getLlmEnhancementSettings() {
-  return Object.assign({}, llmEnhancementSettings);
-}
--- a/ccw/src/templates/dashboard-js/i18n.js
+++ b/ccw/src/templates/dashboard-js/i18n.js
@@ -277,35 +277,10 @@ const i18n = {

    // Semantic Search Configuration
    'semantic.settings': 'Semantic Search Settings',
-    'semantic.configDesc': 'Configure LLM enhancement for semantic indexing',
-    'semantic.llmEnhancement': 'LLM Enhancement',
-    'semantic.llmDesc': 'Use LLM to generate code summaries for better semantic search',
-    'semantic.primaryTool': 'Primary LLM Tool',
-    'semantic.fallbackTool': 'Fallback Tool',
-    'semantic.batchSize': 'Batch Size',
-    'semantic.timeout': 'Timeout',
-    'semantic.file': 'file',
-    'semantic.files': 'files',
-    'semantic.enhanceInfo': 'LLM enhancement generates code summaries and keywords for each file, improving semantic search accuracy.',
-    'semantic.enhanceCommand': 'Run',
-    'semantic.enhanceAfterEnable': 'after enabling to process existing files.',
-    'semantic.runEnhanceNow': 'Run Enhance Now',
-    'semantic.viewStatus': 'View Status',
    'semantic.testSearch': 'Test Semantic Search',
    'semantic.searchPlaceholder': 'Enter semantic query (e.g., authentication logic, error handling)',
    'semantic.runSearch': 'Run Semantic Search',
    'semantic.close': 'Close',
-    'semantic.enabled': 'enabled',
-    'semantic.disabled': 'disabled',
-    'semantic.toolSetTo': 'Primary LLM tool set to',
-    'semantic.fallbackSetTo': 'Fallback tool set to',
-    'semantic.none': 'none',
-    'semantic.llmEnhancement': 'LLM Enhancement',
-    'semantic.batchSetTo': 'Batch size set to',
-    'semantic.timeoutSetTo': 'Timeout set to',
-    'semantic.minute': 'minute',
-    'semantic.minutes': 'minutes',
-    'semantic.enableFirst': 'Please enable LLM Enhancement first',

    'cli.settings': 'CLI Execution Settings',
    'cli.promptFormat': 'Prompt Format',
@@ -1407,35 +1382,10 @@ const i18n = {

    // Semantic Search 配置
    'semantic.settings': '语义搜索设置',
-    'semantic.configDesc': '配置语义索引的 LLM 增强功能',
-    'semantic.llmEnhancement': 'LLM 增强',
-    'semantic.llmDesc': '使用 LLM 生成代码摘要以改进语义搜索',
-    'semantic.primaryTool': '主 LLM 工具',
-    'semantic.fallbackTool': '备用工具',
-    'semantic.batchSize': '批处理大小',
-    'semantic.timeout': '超时时间',
-    'semantic.file': '个文件',
-    'semantic.files': '个文件',
-    'semantic.enhanceInfo': 'LLM 增强为每个文件生成代码摘要和关键词，提高语义搜索准确度。',
-    'semantic.enhanceCommand': '运行',
-    'semantic.enhanceAfterEnable': '启用后处理现有文件。',
-    'semantic.runEnhanceNow': '立即运行增强',
-    'semantic.viewStatus': '查看状态',
    'semantic.testSearch': '测试语义搜索',
    'semantic.searchPlaceholder': '输入语义查询（例如：身份验证逻辑、错误处理）',
    'semantic.runSearch': '运行语义搜索',
    'semantic.close': '关闭',
-    'semantic.enabled': '已启用',
-    'semantic.disabled': '已禁用',
-    'semantic.toolSetTo': '主 LLM 工具已设置为',
-    'semantic.fallbackSetTo': '备用工具已设置为',
-    'semantic.none': '无',
-    'semantic.llmEnhancement': 'LLM 增强',
-    'semantic.batchSetTo': '批量大小已设置为',
-    'semantic.timeoutSetTo': '超时已设置为',
-    'semantic.minute': '分钟',
-    'semantic.minutes': '分钟',
-    'semantic.enableFirst': '请先启用 LLM 增强',

    'cli.settings': 'CLI 调用设置',
    'cli.promptFormat': '提示词格式',
--- a/ccw/src/templates/dashboard-js/views/cli-manager.js
+++ b/ccw/src/templates/dashboard-js/views/cli-manager.js
@@ -397,13 +397,11 @@ function renderToolsSection() {
  // Semantic Search item (only show if CodexLens is installed)
  var semanticHtml = '';
  if (codexLensStatus.ready) {
-    semanticHtml = '<div class="tool-item clickable ' + (semanticStatus.available ? 'available' : 'unavailable') + '" onclick="openSemanticSettingsModal()">' +
+    semanticHtml = '<div class="tool-item ' + (semanticStatus.available ? 'available' : 'unavailable') + '">' +
      '<div class="tool-item-left">' +
        '<span class="tool-status-dot ' + (semanticStatus.available ? 'status-available' : 'status-unavailable') + '"></span>' +
        '<div class="tool-item-info">' +
-          '<div class="tool-item-name">Semantic Search <span class="tool-type-badge ai">AI</span>' +
-            (llmEnhancementSettings.enabled ? '<span class="tool-type-badge llm">LLM</span>' : '') +
-            '<i data-lucide="settings" class="w-3 h-3 tool-config-icon"></i></div>' +
+          '<div class="tool-item-name">Semantic Search <span class="tool-type-badge ai">AI</span></div>' +
          '<div class="tool-item-desc">' + (semanticStatus.available ? 'AI-powered code understanding' : 'Natural language code search') + '</div>' +
        '</div>' +
      '</div>' +
--- a/codex-lens/docs/CLI_INTEGRATION_SUMMARY.md
+++ b/codex-lens/docs/CLI_INTEGRATION_SUMMARY.md
@@ -1,316 +0,0 @@
-# CLI Integration Summary - Embedding Management
-
-**Date**: 2025-12-16
-**Version**: v0.5.1
-**Status**: ✅ Complete
-
---
-
-## Overview
-
-Completed integration of embedding management commands into the CodexLens CLI, making vector search functionality more accessible and user-friendly. Users no longer need to run standalone scripts - all embedding operations are now available through simple CLI commands.
-
-## What Changed
-
-### 1. New CLI Commands
-
-#### `codexlens embeddings-generate`
-
-**Purpose**: Generate semantic embeddings for code search
-
-**Features**:
- Accepts project directory or direct `_index.db` path
- Auto-finds index for project paths using registry
- Supports 4 model profiles (fast, code, multilingual, balanced)
- Force regeneration with `--force` flag
- Configurable chunk size
- Verbose mode with progress updates
- JSON output mode for scripting
-
-**Examples**:
-```bash
-# Generate embeddings for a project
-codexlens embeddings-generate ~/projects/my-app
-
-# Use specific model
-codexlens embeddings-generate ~/projects/my-app --model fast
-
-# Force regeneration
-codexlens embeddings-generate ~/projects/my-app --force
-
-# Verbose output
-codexlens embeddings-generate ~/projects/my-app -v
-```
-
-**Output**:
-```
-Generating embeddings
-Index: ~/.codexlens/indexes/my-app/_index.db
-Model: code
-
-✓ Embeddings generated successfully!
-  Model: jinaai/jina-embeddings-v2-base-code
-  Chunks created: 1,234
-  Files processed: 89
-  Time: 45.2s
-
-Use vector search with:
-  codexlens search 'your query' --mode pure-vector
-```
-
-#### `codexlens embeddings-status`
-
-**Purpose**: Check embedding status for indexes
-
-**Features**:
- Check all indexes (no arguments)
- Check specific project or index
- Summary table view
- File coverage statistics
- Missing files detection
- JSON output mode
-
-**Examples**:
-```bash
-# Check all indexes
-codexlens embeddings-status
-
-# Check specific project
-codexlens embeddings-status ~/projects/my-app
-
-# Check specific index
-codexlens embeddings-status ~/.codexlens/indexes/my-app/_index.db
-```
-
-**Output (all indexes)**:
-```
-Embedding Status Summary
-Index root: ~/.codexlens/indexes
-
-Total indexes: 5
-Indexes with embeddings: 3/5
-Total chunks: 4,567
-
-Project      Files  Chunks  Coverage  Status
-my-app        89    1,234    100.0%      ✓
-other-app    145    2,456     95.5%      ✓
-test-proj     23      877    100.0%      ✓
-no-emb       67        0       0.0%      —
-legacy       45        0       0.0%      —
-```
-
-**Output (specific project)**:
-```
-Embedding Status
-Index: ~/.codexlens/indexes/my-app/_index.db
-
-✓ Embeddings available
-  Total chunks: 1,234
-  Total files: 89
-  Files with embeddings: 89/89
-  Coverage: 100.0%
-```
-
-### 2. Improved Error Messages
-
-Enhanced error messages throughout the search pipeline to guide users to the new CLI commands:
-
-**Before**:
-```
-DEBUG: No semantic_chunks table found
-DEBUG: Vector store is empty
-```
-
-**After**:
-```
-INFO: No embeddings found in index. Generate embeddings with: codexlens embeddings-generate ~/projects/my-app
-WARNING: Pure vector search returned no results. This usually means embeddings haven't been generated. Run: codexlens embeddings-generate ~/projects/my-app
-```
-
-**Locations Updated**:
- `src/codexlens/search/hybrid_search.py` - Added helpful info messages
- `src/codexlens/cli/commands.py` - Improved error hints in CLI output
-
-### 3. Backend Infrastructure
-
-Created `src/codexlens/cli/embedding_manager.py` with reusable functions:
-
-**Functions**:
- `check_index_embeddings(index_path)` - Check embedding status
- `generate_embeddings(index_path, ...)` - Generate embeddings
- `find_all_indexes(scan_dir)` - Find all indexes in directory
- `get_embedding_stats_summary(index_root)` - Aggregate stats for all indexes
-
-**Architecture**:
- Follows same pattern as `model_manager.py` for consistency
- Returns standardized result dictionaries `{"success": bool, "result": dict}`
- Supports progress callbacks for UI updates
- Handles all error cases gracefully
-
-### 4. Documentation Updates
-
-Updated user-facing documentation to reference new CLI commands:
-
-**Files Updated**:
-1. `docs/PURE_VECTOR_SEARCH_GUIDE.md`
-   - Changed all references from `python scripts/generate_embeddings.py` to `codexlens embeddings-generate`
-   - Updated troubleshooting section
-   - Added new `embeddings-status` examples
-
-2. `docs/IMPLEMENTATION_SUMMARY.md`
-   - Marked P1 priorities as complete
-   - Added CLI integration to checklist
-   - Updated feature list
-
-3. `src/codexlens/cli/commands.py`
-   - Updated search command help text to reference new commands
-
-## Files Created
-
-| File | Purpose | Lines |
-|------|---------|-------|
-| `src/codexlens/cli/embedding_manager.py` | Backend logic for embedding operations | ~290 |
-| `docs/CLI_INTEGRATION_SUMMARY.md` | This document | ~400 |
-
-## Files Modified
-
-| File | Changes |
-|------|---------|
-| `src/codexlens/cli/commands.py` | Added 2 new commands (~270 lines) |
-| `src/codexlens/search/hybrid_search.py` | Improved error messages (~20 lines) |
-| `docs/PURE_VECTOR_SEARCH_GUIDE.md` | Updated CLI references (~10 changes) |
-| `docs/IMPLEMENTATION_SUMMARY.md` | Marked P1 complete (~10 lines) |
-
-## Testing Workflow
-
-### Manual Testing Checklist
-
- [ ] `codexlens embeddings-status` with no indexes
- [ ] `codexlens embeddings-status` with multiple indexes
- [ ] `codexlens embeddings-status ~/projects/my-app` (project path)
- [ ] `codexlens embeddings-status ~/.codexlens/indexes/my-app/_index.db` (direct path)
- [ ] `codexlens embeddings-generate ~/projects/my-app` (first time)
- [ ] `codexlens embeddings-generate ~/projects/my-app` (already exists, should error)
- [ ] `codexlens embeddings-generate ~/projects/my-app --force` (regenerate)
- [ ] `codexlens embeddings-generate ~/projects/my-app --model fast`
- [ ] `codexlens embeddings-generate ~/projects/my-app -v` (verbose output)
- [ ] `codexlens search "query" --mode pure-vector` (with embeddings)
- [ ] `codexlens search "query" --mode pure-vector` (without embeddings, check error message)
- [ ] `codexlens embeddings-status --json` (JSON output)
- [ ] `codexlens embeddings-generate ~/projects/my-app --json` (JSON output)
-
-### Expected Test Results
-
-**Without embeddings**:
-```bash
-$ codexlens embeddings-status ~/projects/my-app
-Embedding Status
-Index: ~/.codexlens/indexes/my-app/_index.db
-
-— No embeddings found
-  Total files indexed: 89
-
-Generate embeddings with:
-  codexlens embeddings-generate ~/projects/my-app
-```
-
-**After generating embeddings**:
-```bash
-$ codexlens embeddings-generate ~/projects/my-app
-Generating embeddings
-Index: ~/.codexlens/indexes/my-app/_index.db
-Model: code
-
-✓ Embeddings generated successfully!
-  Model: jinaai/jina-embeddings-v2-base-code
-  Chunks created: 1,234
-  Files processed: 89
-  Time: 45.2s
-```
-
-**Status after generation**:
-```bash
-$ codexlens embeddings-status ~/projects/my-app
-Embedding Status
-Index: ~/.codexlens/indexes/my-app/_index.db
-
-✓ Embeddings available
-  Total chunks: 1,234
-  Total files: 89
-  Files with embeddings: 89/89
-  Coverage: 100.0%
-```
-
-**Pure vector search**:
-```bash
-$ codexlens search "how to authenticate users" --mode pure-vector
-Found 5 results in 12.3ms:
-
-auth/authentication.py:42  [0.876]
-  def authenticate_user(username: str, password: str) -> bool:
-      '''Verify user credentials against database.'''
-      return check_password(username, password)
-...
-```
-
-## User Experience Improvements
-
-| Before | After |
-|--------|-------|
-| Run separate Python script | Single CLI command |
-| Manual path resolution | Auto-finds project index |
-| No status check | `embeddings-status` command |
-| Generic error messages | Helpful hints with commands |
-| Script-level documentation | Integrated `--help` text |
-
-## Backward Compatibility
-
- ✅ Standalone script `scripts/generate_embeddings.py` still works
- ✅ All existing search modes unchanged
- ✅ Pure vector implementation backward compatible
- ✅ No breaking changes to APIs
-
-## Next Steps (Optional)
-
-Future enhancements users might want:
-
-1. **Batch operations**:
-   ```bash
-   codexlens embeddings-generate --all  # Generate for all indexes
-   ```
-
-2. **Incremental updates**:
-   ```bash
-   codexlens embeddings-update ~/projects/my-app  # Only changed files
-   ```
-
-3. **Embedding cleanup**:
-   ```bash
-   codexlens embeddings-delete ~/projects/my-app  # Remove embeddings
-   ```
-
-4. **Model management integration**:
-   ```bash
-   codexlens embeddings-generate ~/projects/my-app --download-model
-   ```
-
---
-
-## Summary
-
-✅ **Completed**: Full CLI integration for embedding management
-✅ **User Experience**: Simplified from multi-step script to single command
-✅ **Error Handling**: Helpful messages guide users to correct commands
-✅ **Documentation**: All references updated to new CLI commands
-✅ **Testing**: Manual testing checklist prepared
-
-**Impact**: Users can now manage embeddings with intuitive CLI commands instead of running scripts, making vector search more accessible and easier to use.
-
-**Command Summary**:
-```bash
-codexlens embeddings-status [path]                     # Check status
-codexlens embeddings-generate <path> [--model] [--force]  # Generate
-codexlens search "query" --mode pure-vector            # Use vector search
-```
-
-The integration is **complete and ready for testing**.
--- a/codex-lens/docs/DOCSTRING_LLM_HYBRID_DESIGN.md
+++ b/codex-lens/docs/DOCSTRING_LLM_HYBRID_DESIGN.md
@@ -1,972 +0,0 @@
-# Docstring与LLM混合策略设计方案
-
-## 1. 背景与目标
-
-### 1.1 当前问题
-
-现有 `llm_enhancer.py` 的实现存在以下问题：
-
-1. **忽略已有文档**：对所有代码无差别调用LLM，即使已有高质量的docstring
-2. **成本浪费**：重复生成已有信息，增加API调用费用和时间
-3. **信息质量不一致**：LLM生成的内容可能不如作者编写的docstring准确
-4. **缺少作者意图**：丢失了docstring中的设计决策、使用示例等关键信息
-
-### 1.2 设计目标
-
-实现**智能混合策略**，结合docstring和LLM的优势：
-
-1. **优先使用docstring**：作为最权威的信息源
-2. **LLM作为补充**：填补docstring缺失或质量不足的部分
-3. **智能质量评估**：自动判断docstring质量，决定是否需要LLM增强
-4. **成本优化**：减少不必要的LLM调用，降低API费用
-5. **信息融合**：将docstring和LLM生成的内容有机结合
-
-## 2. 技术架构
-
-### 2.1 整体流程
-
-```
-Code Symbol
-    ↓
-[Docstring Extractor] ← 提取docstring
-    ↓
-[Quality Evaluator]   ← 评估docstring质量
-    ↓
-    ├─ High Quality → Use Docstring Directly
-    │                 + LLM Generate Keywords Only
-    │
-    ├─ Medium Quality → LLM Refine & Enhance
-    │                   (docstring作为base)
-    │
-    └─ Low/No Docstring → LLM Full Generation
-                          (现有流程)
-    ↓
-[Metadata Merger]     ← 合并docstring和LLM内容
-    ↓
-Final SemanticMetadata
-```
-
-### 2.2 核心组件
-
-```python
-from dataclasses import dataclass
-from enum import Enum
-from typing import Optional
-
-class DocstringQuality(Enum):
-    """Docstring质量等级"""
-    MISSING = "missing"           # 无docstring
-    LOW = "low"                   # 质量低：<10字符或纯占位符
-    MEDIUM = "medium"             # 质量中：有基本描述但不完整
-    HIGH = "high"                 # 质量高：详细且结构化
-
-@dataclass
-class DocstringMetadata:
-    """从docstring提取的元数据"""
-    raw_text: str
-    quality: DocstringQuality
-    summary: Optional[str] = None       # 提取的摘要
-    parameters: Optional[dict] = None   # 参数说明
-    returns: Optional[str] = None       # 返回值说明
-    examples: Optional[str] = None      # 使用示例
-    notes: Optional[str] = None         # 注意事项
-```
-
-## 3. 详细实现步骤
-
-### 3.1 Docstring提取与解析
-
-```python
-import re
-from typing import Optional
-
-class DocstringExtractor:
-    """Docstring提取器"""
-
-    # Docstring风格正则
-    GOOGLE_STYLE_PATTERN = re.compile(
-        r'Args:|Returns:|Raises:|Examples:|Note:',
-        re.MULTILINE
-    )
-
-    NUMPY_STYLE_PATTERN = re.compile(
-        r'Parameters\n-+|Returns\n-+|Examples\n-+',
-        re.MULTILINE
-    )
-
-    def extract_from_code(self, content: str, symbol: Symbol) -> Optional[str]:
-        """从代码中提取docstring"""
-
-        lines = content.splitlines()
-        start_line = symbol.range[0] - 1  # 0-indexed
-
-        # 查找函数定义后的第一个字符串字面量
-        # 通常在函数定义的下一行或几行内
-        for i in range(start_line + 1, min(start_line + 10, len(lines))):
-            line = lines[i].strip()
-
-            # Python triple-quoted string
-            if line.startswith('"""') or line.startswith("'''"):
-                return self._extract_multiline_docstring(lines, i)
-
-        return None
-
-    def _extract_multiline_docstring(
-        self,
-        lines: List[str],
-        start_idx: int
-    ) -> str:
-        """提取多行docstring"""
-
-        quote_char = '"""' if lines[start_idx].strip().startswith('"""') else "'''"
-        docstring_lines = []
-
-        # 检查是否单行docstring
-        first_line = lines[start_idx].strip()
-        if first_line.count(quote_char) == 2:
-            # 单行: """This is a docstring."""
-            return first_line.strip(quote_char).strip()
-
-        # 多行docstring
-        in_docstring = True
-        for i in range(start_idx, len(lines)):
-            line = lines[i]
-
-            if i == start_idx:
-                # 第一行：移除开始的引号
-                docstring_lines.append(line.strip().lstrip(quote_char))
-            elif quote_char in line:
-                # 结束行：移除结束的引号
-                docstring_lines.append(line.strip().rstrip(quote_char))
-                break
-            else:
-                docstring_lines.append(line.strip())
-
-        return '\n'.join(docstring_lines).strip()
-
-    def parse_docstring(self, raw_docstring: str) -> DocstringMetadata:
-        """解析docstring，提取结构化信息"""
-
-        if not raw_docstring:
-            return DocstringMetadata(
-                raw_text="",
-                quality=DocstringQuality.MISSING
-            )
-
-        # 评估质量
-        quality = self._evaluate_quality(raw_docstring)
-
-        # 提取各个部分
-        metadata = DocstringMetadata(
-            raw_text=raw_docstring,
-            quality=quality,
-        )
-
-        # 提取摘要（第一行或第一段）
-        metadata.summary = self._extract_summary(raw_docstring)
-
-        # 如果是Google或NumPy风格，提取结构化内容
-        if self.GOOGLE_STYLE_PATTERN.search(raw_docstring):
-            self._parse_google_style(raw_docstring, metadata)
-        elif self.NUMPY_STYLE_PATTERN.search(raw_docstring):
-            self._parse_numpy_style(raw_docstring, metadata)
-
-        return metadata
-
-    def _evaluate_quality(self, docstring: str) -> DocstringQuality:
-        """评估docstring质量"""
-
-        if not docstring or len(docstring.strip()) == 0:
-            return DocstringQuality.MISSING
-
-        # 检查是否是占位符
-        placeholders = ['todo', 'fixme', 'tbd', 'placeholder', '...']
-        if any(p in docstring.lower() for p in placeholders):
-            return DocstringQuality.LOW
-
-        # 长度检查
-        if len(docstring.strip()) < 10:
-            return DocstringQuality.LOW
-
-        # 检查是否有结构化内容
-        has_structure = (
-            self.GOOGLE_STYLE_PATTERN.search(docstring) or
-            self.NUMPY_STYLE_PATTERN.search(docstring)
-        )
-
-        # 检查是否有足够的描述性文本
-        word_count = len(docstring.split())
-
-        if has_structure and word_count >= 20:
-            return DocstringQuality.HIGH
-        elif word_count >= 10:
-            return DocstringQuality.MEDIUM
-        else:
-            return DocstringQuality.LOW
-
-    def _extract_summary(self, docstring: str) -> str:
-        """提取摘要（第一行或第一段）"""
-
-        lines = docstring.split('\n')
-        # 第一行非空行作为摘要
-        for line in lines:
-            if line.strip():
-                return line.strip()
-
-        return ""
-
-    def _parse_google_style(self, docstring: str, metadata: DocstringMetadata):
-        """解析Google风格docstring"""
-
-        # 提取Args
-        args_match = re.search(r'Args:(.*?)(?=Returns:|Raises:|Examples:|Note:|\Z)', docstring, re.DOTALL)
-        if args_match:
-            metadata.parameters = self._parse_args_section(args_match.group(1))
-
-        # 提取Returns
-        returns_match = re.search(r'Returns:(.*?)(?=Raises:|Examples:|Note:|\Z)', docstring, re.DOTALL)
-        if returns_match:
-            metadata.returns = returns_match.group(1).strip()
-
-        # 提取Examples
-        examples_match = re.search(r'Examples:(.*?)(?=Note:|\Z)', docstring, re.DOTALL)
-        if examples_match:
-            metadata.examples = examples_match.group(1).strip()
-
-    def _parse_args_section(self, args_text: str) -> dict:
-        """解析参数列表"""
-
-        params = {}
-        # 匹配 "param_name (type): description" 或 "param_name: description"
-        pattern = re.compile(r'(\w+)\s*(?:\(([^)]+)\))?\s*:\s*(.+)')
-
-        for line in args_text.split('\n'):
-            match = pattern.search(line.strip())
-            if match:
-                param_name, param_type, description = match.groups()
-                params[param_name] = {
-                    'type': param_type,
-                    'description': description.strip()
-                }
-
-        return params
-```
-
-### 3.2 智能混合策略引擎
-
-```python
-class HybridEnhancer:
-    """Docstring与LLM混合增强器"""
-
-    def __init__(
-        self,
-        llm_enhancer: LLMEnhancer,
-        docstring_extractor: DocstringExtractor
-    ):
-        self.llm_enhancer = llm_enhancer
-        self.docstring_extractor = docstring_extractor
-
-    def enhance_with_strategy(
-        self,
-        file_data: FileData,
-        symbols: List[Symbol]
-    ) -> Dict[str, SemanticMetadata]:
-        """根据docstring质量选择增强策略"""
-
-        results = {}
-
-        for symbol in symbols:
-            # 1. 提取并解析docstring
-            raw_docstring = self.docstring_extractor.extract_from_code(
-                file_data.content, symbol
-            )
-            doc_metadata = self.docstring_extractor.parse_docstring(raw_docstring or "")
-
-            # 2. 根据质量选择策略
-            semantic_metadata = self._apply_strategy(
-                file_data, symbol, doc_metadata
-            )
-
-            results[symbol.name] = semantic_metadata
-
-        return results
-
-    def _apply_strategy(
-        self,
-        file_data: FileData,
-        symbol: Symbol,
-        doc_metadata: DocstringMetadata
-    ) -> SemanticMetadata:
-        """应用混合策略"""
-
-        quality = doc_metadata.quality
-
-        if quality == DocstringQuality.HIGH:
-            # 高质量：直接使用docstring，只用LLM生成keywords
-            return self._use_docstring_with_llm_keywords(symbol, doc_metadata)
-
-        elif quality == DocstringQuality.MEDIUM:
-            # 中等质量：让LLM精炼和增强
-            return self._refine_with_llm(file_data, symbol, doc_metadata)
-
-        else:  # LOW or MISSING
-            # 低质量或无：完全由LLM生成
-            return self._full_llm_generation(file_data, symbol)
-
-    def _use_docstring_with_llm_keywords(
-        self,
-        symbol: Symbol,
-        doc_metadata: DocstringMetadata
-    ) -> SemanticMetadata:
-        """策略1：使用docstring，LLM只生成keywords"""
-
-        # 直接使用docstring的摘要
-        summary = doc_metadata.summary or doc_metadata.raw_text[:200]
-
-        # 使用LLM生成keywords
-        keywords = self._generate_keywords_only(summary, symbol.name)
-
-        # 从docstring推断purpose
-        purpose = self._infer_purpose_from_docstring(doc_metadata)
-
-        return SemanticMetadata(
-            summary=summary,
-            keywords=keywords,
-            purpose=purpose,
-            file_path=symbol.file_path if hasattr(symbol, 'file_path') else None,
-            symbol_name=symbol.name,
-            llm_tool="hybrid_docstring_primary",
-        )
-
-    def _refine_with_llm(
-        self,
-        file_data: FileData,
-        symbol: Symbol,
-        doc_metadata: DocstringMetadata
-    ) -> SemanticMetadata:
-        """策略2：让LLM精炼和增强docstring"""
-
-        prompt = f"""
-PURPOSE: Refine and enhance an existing docstring for better semantic search
-TASK:
- Review the existing docstring
- Generate a concise summary (1-2 sentences) that captures the core purpose
- Extract 8-12 relevant keywords for search
- Identify the functional category/purpose
-
-EXISTING DOCSTRING:
-{doc_metadata.raw_text}
-
-CODE CONTEXT:
-Function: {symbol.name}
-```{file_data.language}
-{self._get_symbol_code(file_data.content, symbol)}
-```
-
-OUTPUT: JSON format
-{{
-    "summary": "refined summary based on docstring and code",
-    "keywords": ["keyword1", "keyword2", ...],
-    "purpose": "category"
-}}
-"""
-
-        response = self.llm_enhancer._invoke_ccw_cli(prompt, tool='gemini')
-        if response['success']:
-            data = json.loads(self.llm_enhancer._extract_json(response['stdout']))
-            return SemanticMetadata(
-                summary=data.get('summary', doc_metadata.summary),
-                keywords=data.get('keywords', []),
-                purpose=data.get('purpose', 'unknown'),
-                file_path=file_data.path,
-                symbol_name=symbol.name,
-                llm_tool="hybrid_llm_refined",
-            )
-
-        # Fallback: 使用docstring
-        return self._use_docstring_with_llm_keywords(symbol, doc_metadata)
-
-    def _full_llm_generation(
-        self,
-        file_data: FileData,
-        symbol: Symbol
-    ) -> SemanticMetadata:
-        """策略3：完全由LLM生成（原有流程）"""
-
-        # 复用现有的LLM enhancer
-        code_snippet = self._get_symbol_code(file_data.content, symbol)
-
-        results = self.llm_enhancer.enhance_files([
-            FileData(
-                path=f"{file_data.path}:{symbol.name}",
-                content=code_snippet,
-                language=file_data.language
-            )
-        ])
-
-        return results.get(f"{file_data.path}:{symbol.name}", SemanticMetadata(
-            summary="",
-            keywords=[],
-            purpose="unknown",
-            file_path=file_data.path,
-            symbol_name=symbol.name,
-            llm_tool="hybrid_llm_full",
-        ))
-
-    def _generate_keywords_only(self, summary: str, symbol_name: str) -> List[str]:
-        """仅生成keywords（快速LLM调用）"""
-
-        prompt = f"""
-PURPOSE: Generate search keywords for a code function
-TASK: Extract 5-8 relevant keywords from the summary
-
-Summary: {summary}
-Function Name: {symbol_name}
-
-OUTPUT: Comma-separated keywords
-"""
-
-        response = self.llm_enhancer._invoke_ccw_cli(prompt, tool='gemini')
-        if response['success']:
-            keywords_str = response['stdout'].strip()
-            return [k.strip() for k in keywords_str.split(',')]
-
-        # Fallback: 从摘要提取关键词
-        return self._extract_keywords_heuristic(summary)
-
-    def _extract_keywords_heuristic(self, text: str) -> List[str]:
-        """启发式关键词提取（无需LLM）"""
-
-        # 简单实现：提取名词性词组
-        import re
-        words = re.findall(r'\b[a-z]{4,}\b', text.lower())
-
-        # 过滤常见词
-        stopwords = {'this', 'that', 'with', 'from', 'have', 'will', 'your', 'their'}
-        keywords = [w for w in words if w not in stopwords]
-
-        return list(set(keywords))[:8]
-
-    def _infer_purpose_from_docstring(self, doc_metadata: DocstringMetadata) -> str:
-        """从docstring推断purpose（无需LLM）"""
-
-        summary = doc_metadata.summary.lower()
-
-        # 简单规则匹配
-        if 'authenticate' in summary or 'login' in summary:
-            return 'auth'
-        elif 'validate' in summary or 'check' in summary:
-            return 'validation'
-        elif 'parse' in summary or 'format' in summary:
-            return 'data_processing'
-        elif 'api' in summary or 'endpoint' in summary:
-            return 'api'
-        elif 'database' in summary or 'query' in summary:
-            return 'data'
-        elif 'test' in summary:
-            return 'test'
-
-        return 'util'
-
-    def _get_symbol_code(self, content: str, symbol: Symbol) -> str:
-        """提取符号的代码"""
-
-        lines = content.splitlines()
-        start, end = symbol.range
-        return '\n'.join(lines[start-1:end])
-```
-
-### 3.3 成本优化统计
-
-```python
-@dataclass
-class EnhancementStats:
-    """增强统计"""
-    total_symbols: int = 0
-    used_docstring_only: int = 0      # 只使用docstring
-    llm_keywords_only: int = 0        # LLM只生成keywords
-    llm_refined: int = 0              # LLM精炼docstring
-    llm_full_generation: int = 0      # LLM完全生成
-    total_llm_calls: int = 0
-    estimated_cost_savings: float = 0.0  # 相比全用LLM节省的成本
-
-class CostOptimizedEnhancer(HybridEnhancer):
-    """带成本统计的增强器"""
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.stats = EnhancementStats()
-
-    def enhance_with_strategy(
-        self,
-        file_data: FileData,
-        symbols: List[Symbol]
-    ) -> Dict[str, SemanticMetadata]:
-        """增强并统计成本"""
-
-        self.stats.total_symbols += len(symbols)
-        results = super().enhance_with_strategy(file_data, symbols)
-
-        # 统计各策略使用情况
-        for metadata in results.values():
-            if metadata.llm_tool == "hybrid_docstring_primary":
-                self.stats.used_docstring_only += 1
-                self.stats.llm_keywords_only += 1
-                self.stats.total_llm_calls += 1
-            elif metadata.llm_tool == "hybrid_llm_refined":
-                self.stats.llm_refined += 1
-                self.stats.total_llm_calls += 1
-            elif metadata.llm_tool == "hybrid_llm_full":
-                self.stats.llm_full_generation += 1
-                self.stats.total_llm_calls += 1
-
-        # 计算成本节省（假设：keywords-only调用成本为full的20%）
-        keywords_only_savings = self.stats.llm_keywords_only * 0.8  # 节省80%
-        full_generation_count = self.stats.total_symbols - self.stats.llm_keywords_only
-        self.stats.estimated_cost_savings = keywords_only_savings / full_generation_count if full_generation_count > 0 else 0
-
-        return results
-
-    def print_stats(self):
-        """打印统计信息"""
-
-        print("=== Enhancement Statistics ===")
-        print(f"Total Symbols: {self.stats.total_symbols}")
-        print(f"Used Docstring (with LLM keywords): {self.stats.used_docstring_only} ({self.stats.used_docstring_only/self.stats.total_symbols*100:.1f}%)")
-        print(f"LLM Refined Docstring: {self.stats.llm_refined} ({self.stats.llm_refined/self.stats.total_symbols*100:.1f}%)")
-        print(f"LLM Full Generation: {self.stats.llm_full_generation} ({self.stats.llm_full_generation/self.stats.total_symbols*100:.1f}%)")
-        print(f"Total LLM Calls: {self.stats.total_llm_calls}")
-        print(f"Estimated Cost Savings: {self.stats.estimated_cost_savings*100:.1f}%")
-```
-
-## 4. 配置选项
-
-```python
-@dataclass
-class HybridEnhancementConfig:
-    """混合增强配置"""
-
-    # 是否启用混合策略（False则回退到全LLM模式）
-    enable_hybrid: bool = True
-
-    # 质量阈值配置
-    use_docstring_threshold: DocstringQuality = DocstringQuality.HIGH
-    refine_docstring_threshold: DocstringQuality = DocstringQuality.MEDIUM
-
-    # 是否为高质量docstring生成keywords
-    generate_keywords_for_docstring: bool = True
-
-    # LLM配置
-    llm_tool: str = "gemini"
-    llm_timeout: int = 300000
-
-    # 成本优化
-    batch_size: int = 5              # 批量处理大小
-    skip_test_files: bool = True     # 跳过测试文件（通常docstring较少）
-
-    # 调试选项
-    log_strategy_decisions: bool = False  # 记录策略决策日志
-```
-
-## 5. 测试策略
-
-### 5.1 单元测试
-
-```python
-import pytest
-
-class TestDocstringExtractor:
-    """测试docstring提取"""
-
-    def test_extract_google_style(self):
-        """测试Google风格docstring提取"""
-        code = '''
-def calculate_total(items, discount=0):
-    """Calculate total price with optional discount.
-
-    This function processes a list of items and applies
-    a discount if specified.
-
-    Args:
-        items (list): List of item objects with price attribute.
-        discount (float): Discount percentage (0-1). Defaults to 0.
-
-    Returns:
-        float: Total price after discount.
-
-    Examples:
-        >>> calculate_total([item1, item2], discount=0.1)
-        90.0
-    """
-    total = sum(item.price for item in items)
-    return total * (1 - discount)
-'''
-        extractor = DocstringExtractor()
-        symbol = Symbol(name='calculate_total', kind='function', range=(1, 18))
-        docstring = extractor.extract_from_code(code, symbol)
-
-        assert docstring is not None
-        metadata = extractor.parse_docstring(docstring)
-
-        assert metadata.quality == DocstringQuality.HIGH
-        assert 'Calculate total price' in metadata.summary
-        assert metadata.parameters is not None
-        assert 'items' in metadata.parameters
-        assert metadata.returns is not None
-        assert metadata.examples is not None
-
-    def test_extract_low_quality_docstring(self):
-        """测试低质量docstring识别"""
-        code = '''
-def process():
-    """TODO"""
-    pass
-'''
-        extractor = DocstringExtractor()
-        symbol = Symbol(name='process', kind='function', range=(1, 3))
-        docstring = extractor.extract_from_code(code, symbol)
-
-        metadata = extractor.parse_docstring(docstring)
-        assert metadata.quality == DocstringQuality.LOW
-
-class TestHybridEnhancer:
-    """测试混合增强器"""
-
-    def test_high_quality_docstring_strategy(self):
-        """测试高质量docstring使用策略"""
-
-        extractor = DocstringExtractor()
-        llm_enhancer = LLMEnhancer(LLMConfig(enabled=True))
-        hybrid = HybridEnhancer(llm_enhancer, extractor)
-
-        # 模拟高质量docstring
-        doc_metadata = DocstringMetadata(
-            raw_text="Validate user credentials against database.",
-            quality=DocstringQuality.HIGH,
-            summary="Validate user credentials against database."
-        )
-
-        symbol = Symbol(name='validate_user', kind='function', range=(1, 10))
-
-        result = hybrid._use_docstring_with_llm_keywords(symbol, doc_metadata)
-
-        # 应该使用docstring的摘要
-        assert result.summary == doc_metadata.summary
-        # 应该有keywords（可能由LLM或启发式生成）
-        assert len(result.keywords) > 0
-
-    def test_cost_optimization(self):
-        """测试成本优化效果"""
-
-        enhancer = CostOptimizedEnhancer(
-            llm_enhancer=LLMEnhancer(LLMConfig(enabled=False)),  # Mock
-            docstring_extractor=DocstringExtractor()
-        )
-
-        # 模拟处理10个symbol，其中5个有高质量docstring
-        # 预期：5个只调用keywords生成，5个完整LLM
-        # 总调用10次，但成本降低（keywords调用更便宜）
-
-        # 实际测试需要mock LLM调用
-        pass
-```
-
-### 5.2 集成测试
-
-```python
-class TestHybridEnhancementPipeline:
-    """测试完整的混合增强流程"""
-
-    def test_full_pipeline(self):
-        """测试完整流程：代码 -> docstring提取 -> 质量评估 -> 策略选择 -> 增强"""
-
-        code = '''
-def authenticate_user(username, password):
-    """Authenticate user with username and password.
-
-    Args:
-        username (str): User's username
-        password (str): User's password
-
-    Returns:
-        bool: True if authenticated, False otherwise
-    """
-    # ... implementation
-    pass
-
-def helper_func(x):
-    # No docstring
-    return x * 2
-'''
-
-        file_data = FileData(path='auth.py', content=code, language='python')
-        symbols = [
-            Symbol(name='authenticate_user', kind='function', range=(1, 11)),
-            Symbol(name='helper_func', kind='function', range=(13, 15)),
-        ]
-
-        extractor = DocstringExtractor()
-        llm_enhancer = LLMEnhancer(LLMConfig(enabled=True))
-        hybrid = CostOptimizedEnhancer(llm_enhancer, extractor)
-
-        results = hybrid.enhance_with_strategy(file_data, symbols)
-
-        # authenticate_user 应该使用docstring
-        assert results['authenticate_user'].llm_tool == "hybrid_docstring_primary"
-
-        # helper_func 应该完全LLM生成
-        assert results['helper_func'].llm_tool == "hybrid_llm_full"
-
-        # 统计
-        assert hybrid.stats.total_symbols == 2
-        assert hybrid.stats.used_docstring_only >= 1
-        assert hybrid.stats.llm_full_generation >= 1
-```
-
-## 6. 实施路线图
-
-### Phase 1: 基础设施（1周）
- [x] 设计数据结构（DocstringMetadata, DocstringQuality）
- [ ] 实现DocstringExtractor（提取和解析）
- [ ] 支持Python docstring（Google/NumPy/reStructuredText风格）
- [ ] 单元测试
-
-### Phase 2: 质量评估（1周）
- [ ] 实现质量评估算法
- [ ] 启发式规则优化
- [ ] 测试不同质量的docstring
- [ ] 调整阈值参数
-
-### Phase 3: 混合策略（1-2周）
- [ ] 实现HybridEnhancer
- [ ] 三种策略实现（docstring-only, refine, full-llm）
- [ ] 策略选择逻辑
- [ ] 集成测试
-
-### Phase 4: 成本优化（1周）
- [ ] 实现CostOptimizedEnhancer
- [ ] 统计和监控
- [ ] 批量处理优化
- [ ] 性能测试
-
-### Phase 5: 多语言支持（1-2周）
- [ ] JavaScript/TypeScript JSDoc
- [ ] Java Javadoc
- [ ] 其他语言docstring格式
-
-### Phase 6: 集成与部署（1周）
- [ ] 集成到现有llm_enhancer
- [ ] CLI选项暴露
- [ ] 配置文件支持
- [ ] 文档和示例
-
-**总计预估时间**：6-8周
-
-## 7. 性能与成本分析
-
-### 7.1 预期成本节省
-
-假设场景：分析1000个函数
-
-| Docstring质量分布 | 占比 | LLM调用策略 | 相对成本 |
-|------------------|------|------------|---------|
-| High (有详细docstring) | 30% | 只生成keywords | 20% |
-| Medium (有基本docstring) | 40% | 精炼增强 | 60% |
-| Low/Missing | 30% | 完全生成 | 100% |
-
-**总成本计算**：
- 纯LLM模式：1000 * 100% = 1000 units
- 混合模式：300*20% + 400*60% + 300*100% = 60 + 240 + 300 = 600 units
- **节省**：40%
-
-### 7.2 质量对比
-
-| 指标 | 纯LLM模式 | 混合模式 |
-|------|----------|---------|
-| 准确性 | 中（可能有幻觉） | **高**（docstring权威） |
-| 一致性 | 中（依赖prompt） | **高**（保留作者风格） |
-| 覆盖率 | **高**（全覆盖） | 高（98%+） |
-| 成本 | 高 | **低**（节省40%） |
-| 速度 | 慢（所有文件） | **快**（减少LLM调用） |
-
-## 8. 潜在问题与解决方案
-
-### 8.1 问题：Docstring过时
-
-**现象**：代码已修改，但docstring未更新，导致信息不准确。
-
-**解决方案**：
-```python
-class DocstringFreshnessChecker:
-    """检查docstring与代码的一致性"""
-
-    def check_freshness(
-        self,
-        symbol: Symbol,
-        code: str,
-        doc_metadata: DocstringMetadata
-    ) -> bool:
-        """检查docstring是否与代码匹配"""
-
-        # 检查1: 参数列表是否匹配
-        if doc_metadata.parameters:
-            actual_params = self._extract_actual_parameters(code)
-            documented_params = set(doc_metadata.parameters.keys())
-
-            if actual_params != documented_params:
-                logger.warning(
-                    f"Parameter mismatch in {symbol.name}: "
-                    f"code has {actual_params}, doc has {documented_params}"
-                )
-                return False
-
-        # 检查2: 使用LLM验证一致性
-        # TODO: 构建验证prompt
-
-        return True
-```
-
-### 8.2 问题：不同docstring风格混用
-
-**现象**：同一项目中使用多种docstring风格（Google, NumPy, 自定义）。
-
-**解决方案**：
-```python
-class MultiStyleDocstringParser:
-    """支持多种docstring风格的解析器"""
-
-    def parse(self, docstring: str) -> DocstringMetadata:
-        """自动检测并解析不同风格"""
-
-        # 尝试各种解析器
-        for parser in [
-            GoogleStyleParser(),
-            NumpyStyleParser(),
-            ReStructuredTextParser(),
-            SimpleParser(),  # Fallback
-        ]:
-            try:
-                metadata = parser.parse(docstring)
-                if metadata.quality != DocstringQuality.LOW:
-                    return metadata
-            except Exception:
-                continue
-
-        # 如果所有解析器都失败，返回简单解析结果
-        return SimpleParser().parse(docstring)
-```
-
-### 8.3 问题：多语言docstring提取差异
-
-**现象**：不同语言的docstring格式和位置不同。
-
-**解决方案**：
-```python
-class LanguageSpecificExtractor:
-    """语言特定的docstring提取器"""
-
-    def extract(self, language: str, code: str, symbol: Symbol) -> Optional[str]:
-        """根据语言选择合适的提取器"""
-
-        extractors = {
-            'python': PythonDocstringExtractor(),
-            'javascript': JSDocExtractor(),
-            'typescript': TSDocExtractor(),
-            'java': JavadocExtractor(),
-        }
-
-        extractor = extractors.get(language, GenericExtractor())
-        return extractor.extract(code, symbol)
-
-class JSDocExtractor:
-    """JavaScript/TypeScript JSDoc提取器"""
-
-    def extract(self, code: str, symbol: Symbol) -> Optional[str]:
-        """提取JSDoc注释"""
-
-        lines = code.splitlines()
-        start_line = symbol.range[0] - 1
-
-        # 向上查找 /** ... */ 注释
-        for i in range(start_line - 1, max(0, start_line - 20), -1):
-            if '*/' in lines[i]:
-                # 找到结束标记，向上提取
-                return self._extract_jsdoc_block(lines, i)
-
-        return None
-```
-
-## 9. 配置示例
-
-### 9.1 配置文件
-
-```yaml
-# .codexlens/hybrid_enhancement.yaml
-
-hybrid_enhancement:
-  enabled: true
-
-  # 质量阈值
-  quality_thresholds:
-    use_docstring: high      # high/medium/low
-    refine_docstring: medium
-
-  # LLM选项
-  llm:
-    tool: gemini
-    fallback: qwen
-    timeout_ms: 300000
-    batch_size: 5
-
-  # 成本优化
-  cost_optimization:
-    generate_keywords_for_docstring: true
-    skip_test_files: true
-    skip_private_methods: false
-
-  # 语言支持
-  languages:
-    python:
-      styles: [google, numpy, sphinx]
-    javascript:
-      styles: [jsdoc]
-    java:
-      styles: [javadoc]
-
-  # 监控
-  logging:
-    log_strategy_decisions: false
-    log_cost_savings: true
-```
-
-### 9.2 CLI使用
-
-```bash
-# 使用混合策略增强
-codex-lens enhance . --hybrid --tool gemini
-
-# 查看成本统计
-codex-lens enhance . --hybrid --show-stats
-
-# 仅对高质量docstring生成keywords
-codex-lens enhance . --hybrid --keywords-only
-
-# 禁用混合模式，回退到纯LLM
-codex-lens enhance . --no-hybrid --tool gemini
-```
-
-## 10. 成功指标
-
-1. **成本节省**：相比纯LLM模式，降低API调用成本40%+
-2. **准确性提升**：使用docstring的符号，元数据准确率>95%
-3. **覆盖率**：98%+的符号有语义元数据（docstring或LLM生成）
-4. **速度提升**：整体处理速度提升30%+（减少LLM调用）
-5. **用户满意度**：保留docstring信息，开发者认可度高
-
-## 11. 参考资料
-
- [PEP 257 - Docstring Conventions](https://peps.python.org/pep-0257/)
- [Google Python Style Guide - Docstrings](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings)
- [NumPy Docstring Standard](https://numpydoc.readthedocs.io/en/latest/format.html)
- [JSDoc Documentation](https://jsdoc.app/)
- [Javadoc Tool](https://docs.oracle.com/javase/8/docs/technotes/tools/windows/javadoc.html)
--- a/codex-lens/docs/IMPLEMENTATION_SUMMARY.md
+++ b/codex-lens/docs/IMPLEMENTATION_SUMMARY.md
@@ -394,52 +394,32 @@ results = engine.search(
  - 指导用户如何生成嵌入
  - 集成到搜索引擎日志中

-### ✅ LLM语义增强验证 (2025-12-16)
+### ❌ LLM语义增强功能已移除 (2025-12-16)

-**测试目标**: 验证LLM增强的向量搜索是否正常工作，对比纯向量搜索效果
+**移除原因**: 简化代码库，减少外部依赖

-**测试基础设施**:
- 创建测试套件 `tests/test_llm_enhanced_search.py` (550+ lines)
- 创建独立测试脚本 `scripts/compare_search_methods.py` (460+ lines)
- 创建完整文档 `docs/LLM_ENHANCED_SEARCH_GUIDE.md` (460+ lines)
+**已移除内容**:
+- `src/codexlens/semantic/llm_enhancer.py` - LLM增强核心模块
+- `src/codexlens/cli/commands.py` 中的 `enhance` 命令
+- `tests/test_llm_enhancer.py` - LLM增强测试
+- `tests/test_llm_enhanced_search.py` - LLM对比测试
+- `scripts/compare_search_methods.py` - 对比测试脚本
+- `scripts/test_misleading_comments.py` - 误导性注释测试
+- `scripts/show_llm_analysis.py` - LLM分析展示脚本
+- `scripts/inspect_llm_summaries.py` - LLM摘要检查工具
+- `docs/LLM_ENHANCED_SEARCH_GUIDE.md` - LLM使用指南
+- `docs/LLM_ENHANCEMENT_TEST_RESULTS.md` - LLM测试结果
+- `docs/MISLEADING_COMMENTS_TEST_RESULTS.md` - 误导性注释测试结果
+- `docs/CLI_INTEGRATION_SUMMARY.md` - CLI集成文档（包含enhance命令）
+- `docs/DOCSTRING_LLM_HYBRID_DESIGN.md` - LLM混合策略设计

-**测试数据**:
- 5个真实Python代码样本 (认证、API、验证、数据库)
- 6个自然语言测试查询
- 涵盖密码哈希、JWT令牌、用户API、邮箱验证、数据库连接等场景
+**保留功能**:
+- ✅ 纯向量搜索 (pure_vector) 完整保留
+- ✅ 语义嵌入生成 (`codexlens embeddings-generate`)
+- ✅ 语义嵌入状态检查 (`codexlens embeddings-status`)
+- ✅ 所有核心搜索功能

-**测试结果** (2025-12-16):
-```
-数据集: 5个Python文件, 5个查询
-测试工具: Gemini Flash 2.5
-
-Setup Time:
-  - Pure Vector:    2.3秒  (直接嵌入代码)
-  - LLM-Enhanced: 174.2秒  (通过Gemini生成摘要, 75x slower)
-
-Accuracy:
-  - Pure Vector:    5/5 (100%) - 所有查询Rank 1
-  - LLM-Enhanced:   5/5 (100%) - 所有查询Rank 1
-  - Score:         15 vs 15 (平局)
-```
-
-**关键发现**:
-1. ✅ **LLM增强功能正常工作**
-   - CCW CLI集成正常
-   - Gemini API调用成功
-   - 摘要生成和嵌入创建正常
-
-2. **性能权衡**
-   - 索引阶段慢75倍 (LLM API调用开销)
-   - 查询阶段速度相同 (都是向量相似度搜索)
-   - 适合离线索引，在线查询场景
-
-3. **准确性**
-   - 测试数据集太简单 (5文件，完美1:1映射)
-   - 两种方法都达到100%准确率
-   - 需要更大、更复杂的代码库来显示差异
-
-**结论**: LLM语义增强功能已验证可正常工作，可用于生产环境
+**历史记录**: LLM增强功能在测试中表现良好，但为简化维护和减少外部依赖（CCW CLI, Gemini/Qwen API）而移除。设计文档（DESIGN_EVALUATION_REPORT.md等）保留作为历史参考。

 ### P2 - 中期（1-2月）

--- a/codex-lens/docs/LLM_ENHANCED_SEARCH_GUIDE.md
+++ b/codex-lens/docs/LLM_ENHANCED_SEARCH_GUIDE.md
@@ -1,463 +0,0 @@
-# LLM-Enhanced Semantic Search Guide
-
-**Last Updated**: 2025-12-16
-**Status**: Experimental Feature
-
---
-
-## Overview
-
-CodexLens supports two approaches for semantic vector search:
-
-| Approach | Pipeline | Best For |
-|----------|----------|----------|
-| **Pure Vector** | Code → fastembed → search | Code pattern matching, exact functionality |
-| **LLM-Enhanced** | Code → LLM summary → fastembed → search | Natural language queries, conceptual search |
-
-### Why LLM Enhancement?
-
-**Problem**: Raw code embeddings don't match natural language well.
-
-```
-Query: "How do I hash passwords securely?"
-Raw code: def hash_password(password: str) -> str: ...
-Mismatch: Low semantic similarity
-```
-
-**Solution**: LLM generates natural language summaries.
-
-```
-Query: "How do I hash passwords securely?"
-LLM Summary: "Hash a password using bcrypt with specified salt rounds for secure storage"
-Match: High semantic similarity ✓
-```
-
-## Architecture
-
-### Pure Vector Search Flow
-
-```
-1. Code File
-   └→ "def hash_password(password: str): ..."
-
-2. Chunking
-   └→ Split into semantic chunks (500-2000 chars)
-
-3. Embedding (fastembed)
-   └→ Generate 768-dim vector from raw code
-
-4. Storage
-   └→ Store vector in semantic_chunks table
-
-5. Query
-   └→ "How to hash passwords"
-   └→ Generate query vector
-   └→ Find similar vectors (cosine similarity)
-```
-
-**Pros**: Fast, no external dependencies, good for code patterns
-**Cons**: Poor semantic match for natural language queries
-
-### LLM-Enhanced Search Flow
-
-```
-1. Code File
-   └→ "def hash_password(password: str): ..."
-
-2. LLM Analysis (Gemini/Qwen via CCW)
-   └→ Generate summary: "Hash a password using bcrypt..."
-   └→ Extract keywords: ["password", "hash", "bcrypt", "security"]
-   └→ Identify purpose: "auth"
-
-3. Embeddable Text Creation
-   └→ Combine: summary + keywords + purpose + filename
-
-4. Embedding (fastembed)
-   └→ Generate 768-dim vector from LLM text
-
-5. Storage
-   └→ Store vector with metadata
-
-6. Query
-   └→ "How to hash passwords"
-   └→ Generate query vector
-   └→ Find similar vectors → Better match! ✓
-```
-
-**Pros**: Excellent semantic match for natural language
-**Cons**: Slower, requires CCW CLI and LLM access
-
-## Setup Requirements
-
-### 1. Install Dependencies
-
-```bash
-# Install semantic search dependencies
-pip install codexlens[semantic]
-
-# Install CCW CLI for LLM enhancement
-npm install -g ccw
-```
-
-### 2. Configure LLM Tools
-
-```bash
-# Set primary LLM tool (default: gemini)
-export CCW_CLI_SECONDARY_TOOL=gemini
-
-# Set fallback tool (default: qwen)
-export CCW_CLI_FALLBACK_TOOL=qwen
-
-# Configure API keys (see CCW documentation)
-ccw config set gemini.apiKey YOUR_API_KEY
-```
-
-### 3. Verify Setup
-
-```bash
-# Check CCW availability
-ccw --version
-
-# Check semantic dependencies
-python -c "from codexlens.semantic import SEMANTIC_AVAILABLE; print(SEMANTIC_AVAILABLE)"
-```
-
-## Running Comparison Tests
-
-### Method 1: Standalone Script (Recommended)
-
-```bash
-# Run full comparison (pure vector + LLM-enhanced)
-python scripts/compare_search_methods.py
-
-# Use specific LLM tool
-python scripts/compare_search_methods.py --tool gemini
-python scripts/compare_search_methods.py --tool qwen
-
-# Skip LLM test (only pure vector)
-python scripts/compare_search_methods.py --skip-llm
-```
-
-**Output Example**:
-
-```
-======================================================================
-SEMANTIC SEARCH COMPARISON TEST
-Pure Vector vs LLM-Enhanced Vector Search
-======================================================================
-
-Test dataset: 5 Python files
-Test queries: 5 natural language questions
-
-======================================================================
-PURE VECTOR SEARCH (Code → fastembed)
-======================================================================
-Setup: 5 files, 23 chunks in 2.3s
-
-Query                                        Top Result                     Score
----------------------------------------------------------------------
-✓ How do I securely hash passwords?         password_hasher.py             0.723
-✗ Generate JWT token for authentication      user_endpoints.py              0.645
-✓ Create new user account via API            user_endpoints.py              0.812
-✓ Validate email address format              validation.py                  0.756
-~ Connect to PostgreSQL database             connection.py                  0.689
-
-======================================================================
-LLM-ENHANCED SEARCH (Code → GEMINI → fastembed)
-======================================================================
-Generating LLM summaries for 5 files...
-Setup: 5/5 files indexed in 8.7s
-
-Query                                        Top Result                     Score
----------------------------------------------------------------------
-✓ How do I securely hash passwords?         password_hasher.py             0.891
-✓ Generate JWT token for authentication      jwt_handler.py                 0.867
-✓ Create new user account via API            user_endpoints.py              0.923
-✓ Validate email address format              validation.py                  0.845
-✓ Connect to PostgreSQL database             connection.py                  0.801
-
-======================================================================
-COMPARISON SUMMARY
-======================================================================
-
-Query                                        Pure       LLM
----------------------------------------------------------------------
-How do I securely hash passwords?           ✓ Rank 1   ✓ Rank 1
-Generate JWT token for authentication        ✗ Miss     ✓ Rank 1
-Create new user account via API              ✓ Rank 1   ✓ Rank 1
-Validate email address format                ✓ Rank 1   ✓ Rank 1
-Connect to PostgreSQL database               ~ Rank 2   ✓ Rank 1
----------------------------------------------------------------------
-TOTAL SCORE                                  11         15
-======================================================================
-
-ANALYSIS:
-✓ LLM enhancement improves results by 36.4%
-  Natural language summaries match queries better than raw code
-```
-
-### Method 2: Pytest Test Suite
-
-```bash
-# Run full test suite
-pytest tests/test_llm_enhanced_search.py -v -s
-
-# Run specific test
-pytest tests/test_llm_enhanced_search.py::TestSearchComparison::test_comparison -v -s
-
-# Skip LLM tests if CCW not available
-pytest tests/test_llm_enhanced_search.py -v -s -k "not llm_enhanced"
-```
-
-## Using LLM Enhancement in Production
-
-### Option 1: Enhanced Embeddings Generation (Recommended)
-
-Create embeddings with LLM enhancement during indexing:
-
-```python
-from pathlib import Path
-from codexlens.semantic.llm_enhancer import create_enhanced_indexer, FileData
-
-# Create enhanced indexer
-indexer = create_enhanced_indexer(
-    vector_store_path=Path("~/.codexlens/indexes/project/_index.db"),
-    llm_tool="gemini",
-    llm_enabled=True,
-)
-
-# Prepare file data
-files = [
-    FileData(
-        path="auth/password_hasher.py",
-        content=open("auth/password_hasher.py").read(),
-        language="python"
-    ),
-    # ... more files
-]
-
-# Index with LLM enhancement
-indexed_count = indexer.index_files(files)
-print(f"Indexed {indexed_count} files with LLM enhancement")
-```
-
-### Option 2: CLI Integration (Coming Soon)
-
-```bash
-# Generate embeddings with LLM enhancement
-codexlens embeddings-generate ~/projects/my-app --llm-enhanced --tool gemini
-
-# Check which strategy was used
-codexlens embeddings-status ~/projects/my-app --show-strategies
-```
-
-**Note**: CLI integration is planned but not yet implemented. Currently use Option 1 (Python API).
-
-### Option 3: Hybrid Approach
-
-Combine both strategies for best results:
-
-```python
-# Generate both pure and LLM-enhanced embeddings
-# 1. Pure vector for exact code matching
-generate_pure_embeddings(files)
-
-# 2. LLM-enhanced for semantic matching
-generate_llm_embeddings(files)
-
-# Search uses both and ranks by best match
-```
-
-## Performance Considerations
-
-### Speed Comparison
-
-| Approach | Indexing Time (100 files) | Query Time | Cost |
-|----------|---------------------------|------------|------|
-| Pure Vector | ~30s | ~50ms | Free |
-| LLM-Enhanced | ~5-10 min | ~50ms | LLM API costs |
-
-**LLM indexing is slower** because:
- Calls external LLM API (gemini/qwen)
- Processes files in batches (default: 5 files/batch)
- Waits for LLM response (~2-5s per batch)
-
-**Query speed is identical** because:
- Both use fastembed for similarity search
- Vector lookup is same speed
- Difference is only in what was embedded
-
-### Cost Estimation
-
-**Gemini Flash (via CCW)**:
- ~$0.10 per 1M input tokens
- Average: ~500 tokens per file
- 100 files = ~$0.005 (half a cent)
-
-**Qwen (local)**:
- Free if running locally
- Slower than Gemini Flash
-
-### When to Use Each Approach
-
-| Use Case | Recommendation |
-|----------|----------------|
-| **Code pattern search** | Pure vector (e.g., "find all REST endpoints") |
-| **Natural language queries** | LLM-enhanced (e.g., "how to authenticate users") |
-| **Large codebase** | Pure vector first, LLM for important modules |
-| **Personal projects** | LLM-enhanced (cost is minimal) |
-| **Enterprise** | Hybrid approach |
-
-## Configuration Options
-
-### LLM Config
-
-```python
-from codexlens.semantic.llm_enhancer import LLMConfig, LLMEnhancer
-
-config = LLMConfig(
-    tool="gemini",              # Primary LLM tool
-    fallback_tool="qwen",       # Fallback if primary fails
-    timeout_ms=300000,          # 5 minute timeout
-    batch_size=5,               # Files per batch
-    max_content_chars=8000,     # Max chars per file in prompt
-    enabled=True,               # Enable/disable LLM
-)
-
-enhancer = LLMEnhancer(config)
-```
-
-### Environment Variables
-
-```bash
-# Override default LLM tool
-export CCW_CLI_SECONDARY_TOOL=gemini
-
-# Override fallback tool
-export CCW_CLI_FALLBACK_TOOL=qwen
-
-# Disable LLM enhancement (fall back to pure vector)
-export CODEXLENS_LLM_ENABLED=false
-```
-
-## Troubleshooting
-
-### Issue 1: CCW CLI Not Found
-
-**Error**: `CCW CLI not found in PATH, LLM enhancement disabled`
-
-**Solution**:
-```bash
-# Install CCW globally
-npm install -g ccw
-
-# Verify installation
-ccw --version
-
-# Check PATH
-which ccw  # Unix
-where ccw  # Windows
-```
-
-### Issue 2: LLM API Errors
-
-**Error**: `LLM call failed: HTTP 429 Too Many Requests`
-
-**Solution**:
- Reduce batch size in LLMConfig
- Add delay between batches
- Check API quota/limits
- Try fallback tool (qwen)
-
-### Issue 3: Poor LLM Summaries
-
-**Symptom**: LLM summaries are too generic or inaccurate
-
-**Solution**:
- Try different LLM tool (gemini vs qwen)
- Increase max_content_chars (default 8000)
- Manually review and refine summaries
- Fall back to pure vector for code-heavy files
-
-### Issue 4: Slow Indexing
-
-**Symptom**: Indexing takes too long with LLM enhancement
-
-**Solution**:
-```python
-# Reduce batch size for faster feedback
-config = LLMConfig(batch_size=2)  # Default is 5
-
-# Or use pure vector for large files
-if file_size > 10000:
-    use_pure_vector()
-else:
-    use_llm_enhanced()
-```
-
-## Example Test Queries
-
-### Good for LLM-Enhanced Search
-
-```python
-# Natural language, conceptual queries
-"How do I authenticate users with JWT?"
-"Validate email addresses before saving to database"
-"Secure password storage with hashing"
-"Create REST API endpoint for user registration"
-"Connect to PostgreSQL with connection pooling"
-```
-
-### Good for Pure Vector Search
-
-```python
-# Code-specific, pattern-matching queries
-"bcrypt.hashpw"
-"jwt.encode"
-"@app.route POST"
-"re.match email"
-"psycopg2.pool.SimpleConnectionPool"
-```
-
-### Best: Combine Both
-
-Use LLM-enhanced for high-level search, then pure vector for refinement:
-
-```python
-# Step 1: LLM-enhanced for semantic search
-results = search_llm_enhanced("user authentication with tokens")
-# Returns: jwt_handler.py, password_hasher.py, user_endpoints.py
-
-# Step 2: Pure vector for exact code pattern
-results = search_pure_vector("jwt.encode")
-# Returns: jwt_handler.py (exact match)
-```
-
-## Future Improvements
-
- [ ] CLI integration for `--llm-enhanced` flag
- [ ] Incremental LLM summary updates
- [ ] Caching LLM summaries to reduce API calls
- [ ] Hybrid search combining both approaches
- [ ] Custom prompt templates for specific domains
- [ ] Local LLM support (ollama, llama.cpp)
-
-## Related Documentation
-
- `PURE_VECTOR_SEARCH_GUIDE.md` - Pure vector search usage
- `IMPLEMENTATION_SUMMARY.md` - Technical implementation details
- `scripts/compare_search_methods.py` - Comparison test script
- `tests/test_llm_enhanced_search.py` - Test suite
-
-## References
-
- **LLM Enhancer Implementation**: `src/codexlens/semantic/llm_enhancer.py`
- **CCW CLI Documentation**: https://github.com/anthropics/ccw
- **Fastembed**: https://github.com/qdrant/fastembed
-
---
-
-**Questions?** Run the comparison script to see LLM enhancement in action:
-```bash
-python scripts/compare_search_methods.py
-```
--- a/codex-lens/docs/LLM_ENHANCEMENT_TEST_RESULTS.md
+++ b/codex-lens/docs/LLM_ENHANCEMENT_TEST_RESULTS.md
@@ -1,232 +0,0 @@
-# LLM语义增强测试结果
-
-**测试日期**: 2025-12-16
-**状态**: ✅ 通过 - LLM增强功能正常工作
-
---
-
-## 📊 测试结果概览
-
-### 测试配置
-
-| 项目 | 配置 |
-|------|------|
-| **测试工具** | Gemini Flash 2.5 (via CCW CLI) |
-| **测试数据** | 5个Python代码文件 |
-| **查询数量** | 5个自然语言查询 |
-| **嵌入模型** | BAAI/bge-small-en-v1.5 (768维) |
-
-### 性能对比
-
-| 指标 | 纯向量搜索 | LLM增强搜索 | 差异 |
-|------|-----------|------------|------|
-| **索引时间** | 2.3秒 | 174.2秒 | 75倍慢 |
-| **查询速度** | ~50ms | ~50ms | 相同 |
-| **准确率** | 5/5 (100%) | 5/5 (100%) | 相同 |
-| **排名得分** | 15/15 | 15/15 | 平局 |
-
-### 详细结果
-
-所有5个查询都找到了正确的文件 (Rank 1):
-
-| 查询 | 预期文件 | 纯向量 | LLM增强 |
-|------|---------|--------|---------|
-| 如何安全地哈希密码？ | password_hasher.py | [OK] Rank 1 | [OK] Rank 1 |
-| 生成JWT令牌进行认证 | jwt_handler.py | [OK] Rank 1 | [OK] Rank 1 |
-| 通过API创建新用户账户 | user_endpoints.py | [OK] Rank 1 | [OK] Rank 1 |
-| 验证电子邮件地址格式 | validation.py | [OK] Rank 1 | [OK] Rank 1 |
-| 连接到PostgreSQL数据库 | connection.py | [OK] Rank 1 | [OK] Rank 1 |
-
---
-
-## ✅ 验证结论
-
-### 1. LLM增强功能工作正常
-
- ✅ **CCW CLI集成**: 成功调用外部CLI工具
- ✅ **Gemini API**: API调用成功，无错误
- ✅ **摘要生成**: LLM成功生成代码摘要和关键词
- ✅ **嵌入创建**: 从摘要成功生成768维向量
- ✅ **向量存储**: 正确存储到semantic_chunks表
- ✅ **搜索准确性**: 100%准确匹配所有查询
-
-### 2. 性能权衡分析
-
-**优势**:
- 查询速度与纯向量相同 (~50ms)
- 更好的语义理解能力 (理论上)
- 适合自然语言查询
-
-**劣势**:
- 索引阶段慢75倍 (174s vs 2.3s)
- 需要外部LLM API (成本)
- 需要安装和配置CCW CLI
-
-**适用场景**:
- 离线索引，在线查询
- 个人项目 (成本可忽略)
- 重视自然语言查询体验
-
-### 3. 测试数据集局限性
-
-**当前测试太简单**:
- 仅5个文件
- 每个查询完美对应1个文件
- 没有歧义或相似文件
- 两种方法都能轻松找到
-
-**预期在真实场景**:
- 数百或数千个文件
- 多个相似功能的文件
- 模糊或概念性查询
- LLM增强应该表现更好
-
---
-
-## 🛠️ 测试基础设施
-
-### 创建的文件
-
-1. **测试套件** (`tests/test_llm_enhanced_search.py`)
-   - 550+ lines
-   - 完整pytest测试
-   - 3个测试类 (纯向量, LLM增强, 对比)
-
-2. **独立脚本** (`scripts/compare_search_methods.py`)
-   - 460+ lines
-   - 可直接运行: `python scripts/compare_search_methods.py`
-   - 支持参数: `--tool gemini|qwen`, `--skip-llm`
-   - 详细对比报告
-
-3. **完整文档** (`docs/LLM_ENHANCED_SEARCH_GUIDE.md`)
-   - 460+ lines
-   - 架构对比图
-   - 设置说明
-   - 使用示例
-   - 故障排除
-
-### 运行测试
-
-```bash
-# 方式1: 独立脚本 (推荐)
-python scripts/compare_search_methods.py --tool gemini
-
-# 方式2: Pytest
-pytest tests/test_llm_enhanced_search.py::TestSearchComparison::test_comparison -v -s
-
-# 跳过LLM测试 (仅测试纯向量)
-python scripts/compare_search_methods.py --skip-llm
-```
-
-### 前置要求
-
-```bash
-# 1. 安装语义搜索依赖
-pip install codexlens[semantic]
-
-# 2. 安装CCW CLI
-npm install -g ccw
-
-# 3. 配置API密钥
-ccw config set gemini.apiKey YOUR_API_KEY
-```
-
---
-
-## 🔍 架构对比
-
-### 纯向量搜索流程
-
-```
-代码文件 → 分块 → fastembed (768维) → semantic_chunks表 → 向量搜索
-```
-
-**优点**: 快速、无需外部依赖、直接嵌入代码
-**缺点**: 对自然语言查询理解较弱
-
-### LLM增强搜索流程
-
-```
-代码文件 → CCW CLI调用Gemini → 生成摘要+关键词 → fastembed (768维) → semantic_chunks表 → 向量搜索
-```
-
-**优点**: 更好的语义理解、适合自然语言查询
-**缺点**: 索引慢75倍、需要LLM API、有成本
-
---
-
-## 💰 成本估算
-
-### Gemini Flash (via CCW)
-
- 价格: ~$0.10 / 1M input tokens
- 平均: ~500 tokens / 文件
- 100文件成本: ~$0.005 (半分钱)
-
-### Qwen (本地)
-
- 价格: 免费 (本地运行)
- 速度: 比Gemini Flash慢
-
---
-
-## 📝 修复的问题
-
-### 1. Unicode编码问题
-
-**问题**: Windows GBK控制台无法显示Unicode符号 (✓, ✗, •)
-**修复**: 替换为ASCII符号 ([OK], [X], -)
-
-**影响文件**:
- `scripts/compare_search_methods.py`
- `tests/test_llm_enhanced_search.py`
-
-### 2. 数据库文件锁定
-
-**问题**: Windows无法删除临时数据库 (PermissionError)
-**修复**: 添加垃圾回收和异常处理
-
-```python
-import gc
-gc.collect()  # 强制关闭连接
-time.sleep(0.1)  # 等待Windows释放文件句柄
-```
-
-### 3. 正则表达式警告
-
-**问题**: SyntaxWarning about invalid escape sequence `\.`
-**状态**: 无害警告，正则表达式正常工作
-
---
-
-## 🎯 结论和建议
-
-### 核心发现
-
-1. ✅ **LLM语义增强功能已验证可用**
-2. ✅ **测试基础设施完整**
-3. ⚠️ **测试数据集需扩展** (当前太简单)
-
-### 使用建议
-
-| 场景 | 推荐方案 |
-|------|---------|
-| 代码模式搜索 | 纯向量 (如 "find all REST endpoints") |
-| 自然语言查询 | LLM增强 (如 "how to authenticate users") |
-| 大型代码库 | 纯向量优先，重要模块用LLM |
-| 个人项目 | LLM增强 (成本可忽略) |
-| 企业级应用 | 混合方案 |
-
-### 后续工作 (可选)
-
- [ ] 使用更大的测试数据集 (100+ files)
- [ ] 测试更复杂的查询 (概念性、模糊查询)
- [ ] 性能优化 (批量LLM调用)
- [ ] 成本优化 (缓存LLM摘要)
- [ ] 混合搜索 (结合两种方法)
-
---
-
-**完成时间**: 2025-12-16
-**测试执行者**: Claude (Sonnet 4.5)
-**文档版本**: 1.0
--- a/codex-lens/docs/LLM_REMOVAL_SUMMARY.md
+++ b/codex-lens/docs/LLM_REMOVAL_SUMMARY.md
@@ -0,0 +1,342 @@
+# LLM增强功能移除总结
+
+**移除日期**: 2025-12-16
+**执行者**: 用户请求
+**状态**: ✅ 完成
+
+---
+
+## 📋 移除清单
+
+### ✅ 已删除的源代码文件
+
+| 文件 | 说明 |
+|------|------|
+| `src/codexlens/semantic/llm_enhancer.py` | LLM增强核心模块 (900+ lines) |
+
+### ✅ 已修改的源代码文件
+
+| 文件 | 修改内容 |
+|------|---------|
+| `src/codexlens/cli/commands.py` | 删除 `enhance` 命令 (lines 1050-1227) |
+| `src/codexlens/semantic/__init__.py` | 删除LLM相关导出 (lines 35-69) |
+
+### ✅ 已修改的前端文件（CCW Dashboard）
+
+| 文件 | 修改内容 |
+|------|---------|
+| `ccw/src/templates/dashboard-js/components/cli-status.js` | 删除LLM增强设置 (8行)、Semantic Settings Modal (615行)、Metadata Viewer (326行) |
+| `ccw/src/templates/dashboard-js/i18n.js` | 删除英文LLM翻译 (26行)、中文LLM翻译 (26行) |
+| `ccw/src/templates/dashboard-js/views/cli-manager.js` | 移除LLM badge和设置modal调用 (3行) |
+
+### ✅ 已删除的测试文件
+
+| 文件 | 说明 |
+|------|------|
+| `tests/test_llm_enhancer.py` | LLM增强单元测试 |
+| `tests/test_llm_enhanced_search.py` | LLM vs 纯向量对比测试 (550+ lines) |
+
+### ✅ 已删除的脚本文件
+
+| 文件 | 说明 |
+|------|------|
+| `scripts/compare_search_methods.py` | 纯向量 vs LLM增强对比脚本 (460+ lines) |
+| `scripts/test_misleading_comments.py` | 误导性注释测试脚本 (490+ lines) |
+| `scripts/show_llm_analysis.py` | LLM分析展示工具 |
+| `scripts/inspect_llm_summaries.py` | LLM摘要检查工具 |
+
+### ✅ 已删除的文档文件
+
+| 文件 | 说明 |
+|------|------|
+| `docs/LLM_ENHANCED_SEARCH_GUIDE.md` | LLM增强使用指南 (460+ lines) |
+| `docs/LLM_ENHANCEMENT_TEST_RESULTS.md` | LLM测试结果文档 |
+| `docs/MISLEADING_COMMENTS_TEST_RESULTS.md` | 误导性注释测试结果 |
+| `docs/CLI_INTEGRATION_SUMMARY.md` | CLI集成文档（包含enhance命令） |
+| `docs/DOCSTRING_LLM_HYBRID_DESIGN.md` | Docstring与LLM混合策略设计 |
+
+### ✅ 已更新的文档
+
+| 文件 | 修改内容 |
+|------|---------|
+| `docs/IMPLEMENTATION_SUMMARY.md` | 添加LLM移除说明，列出已删除内容 |
+
+### 📚 保留的设计文档（作为历史参考）
+
+| 文件 | 说明 |
+|------|------|
+| `docs/DESIGN_EVALUATION_REPORT.md` | 包含LLM混合策略的技术评估报告 |
+| `docs/SEMANTIC_GRAPH_DESIGN.md` | 语义图谱设计（可能提及LLM） |
+| `docs/MULTILEVEL_CHUNKER_DESIGN.md` | 多层次分词器设计（可能提及LLM） |
+
+*这些文档保留作为技术历史参考，不影响当前功能。*
+
+---
+
+## 🔒 移除的功能
+
+### CLI命令
+
+```bash
+# 已移除 - 不再可用
+codexlens enhance [PATH] --tool gemini --batch-size 5
+
+# 说明：此命令用于通过CCW CLI调用Gemini/Qwen生成代码摘要
+# 移除原因：减少外部依赖，简化维护
+```
+
+### Python API
+
+```python
+# 已移除 - 不再可用
+from codexlens.semantic import (
+    LLMEnhancer,
+    LLMConfig,
+    SemanticMetadata,
+    FileData,
+    EnhancedSemanticIndexer,
+    create_enhancer,
+    create_enhanced_indexer,
+)
+
+# 移除的类和函数：
+# - LLMEnhancer: LLM增强器主类
+# - LLMConfig: LLM配置类
+# - SemanticMetadata: 语义元数据结构
+# - FileData: 文件数据结构
+# - EnhancedSemanticIndexer: LLM增强索引器
+# - create_enhancer(): 创建增强器的工厂函数
+# - create_enhanced_indexer(): 创建增强索引器的工厂函数
+```
+
+---
+
+## ✅ 保留的功能
+
+### 完全保留的核心功能
+
+| 功能 | 状态 |
+|------|------|
+| **纯向量搜索** | ✅ 完整保留 |
+| **语义嵌入生成** | ✅ 完整保留 (`codexlens embeddings-generate`) |
+| **语义嵌入状态检查** | ✅ 完整保留 (`codexlens embeddings-status`) |
+| **混合搜索引擎** | ✅ 完整保留（exact + fuzzy + vector） |
+| **向量存储** | ✅ 完整保留 |
+| **语义分块** | ✅ 完整保留 |
+| **fastembed集成** | ✅ 完整保留 |
+
+### 可用的CLI命令
+
+```bash
+# 生成纯向量嵌入（无需LLM）
+codexlens embeddings-generate [PATH]
+
+# 检查嵌入状态
+codexlens embeddings-status [PATH]
+
+# 所有搜索命令
+codexlens search [QUERY] --index [PATH]
+
+# 所有索引管理命令
+codexlens init [PATH]
+codexlens update [PATH]
+codexlens clean [PATH]
+```
+
+### 可用的Python API
+
+```python
+# 完全可用 - 纯向量搜索
+from codexlens.semantic import SEMANTIC_AVAILABLE, SEMANTIC_BACKEND
+from codexlens.semantic.embedder import Embedder
+from codexlens.semantic.vector_store import VectorStore
+from codexlens.semantic.chunker import Chunker, ChunkConfig
+from codexlens.search.hybrid_search import HybridSearchEngine
+
+# 示例：纯向量搜索
+engine = HybridSearchEngine()
+results = engine.search(
+    index_path,
+    query="your search query",
+    enable_vector=True,
+    pure_vector=True,  # 纯向量模式
+)
+```
+
+---
+
+## 🎯 移除原因
+
+### 1. 简化依赖
+
+**移除的外部依赖**:
+- CCW CLI (npm package)
+- Gemini API (需要API密钥)
+- Qwen API (可选)
+
+**保留的依赖**:
+- fastembed (ONNX-based，轻量级)
+- numpy
+- Python标准库
+
+### 2. 减少复杂性
+
+- **前**: 两种搜索方式（纯向量 + LLM增强）
+- **后**: 一种搜索方式（纯向量）
+- 移除了900+ lines的LLM增强代码
+- 移除了CLI命令和相关配置
+- 移除了测试和文档
+
+### 3. 性能考虑
+
+| 方面 | LLM增强 | 纯向量 |
+|------|---------|--------|
+| **索引速度** | 慢75倍 | 基准 |
+| **查询速度** | 相同 | 相同 |
+| **准确率** | 相同* | 基准 |
+| **成本** | API费用 | 免费 |
+
+*在测试数据集上准确率相同（5/5），但LLM增强理论上在更复杂场景下可能更好
+
+### 4. 维护负担
+
+**移除前**:
+- 需要维护CCW CLI集成
+- 需要处理API限流和错误
+- 需要测试多个LLM后端
+- 需要维护批处理逻辑
+
+**移除后**:
+- 单一嵌入引擎（fastembed）
+- 无外部API依赖
+- 更简单的错误处理
+- 更容易测试
+
+---
+
+## 🔍 验证结果
+
+### 导入测试
+
+```bash
+# ✅ 通过 - 语义模块正常
+python -c "from codexlens.semantic import SEMANTIC_AVAILABLE; print(SEMANTIC_AVAILABLE)"
+# Output: True
+
+# ✅ 通过 - 搜索引擎正常
+python -c "from codexlens.search.hybrid_search import HybridSearchEngine; print('OK')"
+# Output: OK
+```
+
+### 代码清洁度验证
+
+```bash
+# ✅ 通过 - 无遗留LLM引用
+grep -r "llm_enhancer\|LLMEnhancer\|LLMConfig" src/ --include="*.py"
+# Output: (空)
+```
+
+### 测试结果
+
+```bash
+# ✅ 5/7通过 - 纯向量搜索基本功能正常
+pytest tests/test_pure_vector_search.py -v
+# 通过: 5个基本测试
+# 失败: 2个嵌入测试（已知的模型维度不匹配问题，与LLM移除无关）
+```
+
+---
+
+## 📊 统计
+
+### 代码删除统计
+
+| 类型 | 删除文件数 | 删除行数（估计） |
+|------|-----------|-----------------|
+| **源代码** | 1 | ~900 lines |
+| **CLI命令** | 1 command | ~180 lines |
+| **导出清理** | 1 section | ~35 lines |
+| **前端代码** | 3 files | ~1000 lines |
+| **测试文件** | 2 | ~600 lines |
+| **脚本工具** | 4 | ~1500 lines |
+| **文档** | 5 | ~2000 lines |
+| **总计** | 16 files/sections | ~6200 lines |
+
+### 依赖简化
+
+| 方面 | 移除前 | 移除后 |
+|------|--------|--------|
+| **外部工具依赖** | CCW CLI, Gemini/Qwen | 无 |
+| **Python包依赖** | fastembed, numpy | fastembed, numpy |
+| **API依赖** | Gemini/Qwen API | 无 |
+| **配置复杂度** | 高（tool, batch_size, API keys） | 低（model profile） |
+
+---
+
+## 🚀 后续建议
+
+### 如果需要LLM增强功能
+
+1. **从git历史恢复**
+   ```bash
+   # 查看删除前的提交
+   git log --all --full-history -- "*llm_enhancer*"
+
+   # 恢复特定文件
+   git checkout <commit-hash> -- src/codexlens/semantic/llm_enhancer.py
+   ```
+
+2. **或使用外部工具**
+   - 在索引前使用独立脚本生成摘要
+   - 将摘要作为注释添加到代码中
+   - 然后使用纯向量索引（会包含摘要）
+
+3. **或考虑轻量级替代方案**
+   - 使用本地小模型（llama.cpp, ggml）
+   - 使用docstring提取（无需LLM）
+   - 使用静态分析生成摘要
+
+### 代码库维护建议
+
+1. ✅ **保持简单** - 继续使用纯向量搜索
+2. ✅ **优化现有功能** - 改进向量搜索准确性
+3. ✅ **增量改进** - 优化分块策略和嵌入质量
+4. ⚠️ **避免重复** - 如需LLM，先评估是否真正必要
+
+---
+
+## 📝 文件清单
+
+### 删除的文件完整列表
+
+```
+src/codexlens/semantic/llm_enhancer.py
+tests/test_llm_enhancer.py
+tests/test_llm_enhanced_search.py
+scripts/compare_search_methods.py
+scripts/test_misleading_comments.py
+scripts/show_llm_analysis.py
+scripts/inspect_llm_summaries.py
+docs/LLM_ENHANCED_SEARCH_GUIDE.md
+docs/LLM_ENHANCEMENT_TEST_RESULTS.md
+docs/MISLEADING_COMMENTS_TEST_RESULTS.md
+docs/CLI_INTEGRATION_SUMMARY.md
+docs/DOCSTRING_LLM_HYBRID_DESIGN.md
+```
+
+### 修改的文件
+
+```
+src/codexlens/cli/commands.py (删除enhance命令)
+src/codexlens/semantic/__init__.py (删除LLM导出)
+ccw/src/templates/dashboard-js/components/cli-status.js (删除LLM配置、Settings Modal、Metadata Viewer)
+ccw/src/templates/dashboard-js/i18n.js (删除LLM翻译字符串)
+ccw/src/templates/dashboard-js/views/cli-manager.js (移除LLM badge和modal调用)
+docs/IMPLEMENTATION_SUMMARY.md (添加移除说明)
+```
+
+---
+
+**移除完成时间**: 2025-12-16
+**文档版本**: 1.0
+**验证状态**: ✅ 通过
--- a/codex-lens/docs/MISLEADING_COMMENTS_TEST_RESULTS.md
+++ b/codex-lens/docs/MISLEADING_COMMENTS_TEST_RESULTS.md
@@ -1,301 +0,0 @@
-# 误导性注释测试结果
-
-**测试日期**: 2025-12-16
-**测试目的**: 验证LLM增强搜索是否能克服错误/缺失的代码注释
-
---
-
-## 📊 测试结果总结
-
-### 性能对比
-
-| 方法 | 索引时间 | 准确率 | 得分 | 结论 |
-|------|---------|--------|------|------|
-| **纯向量搜索** | 2.1秒 | 5/5 (100%) | 15/15 | ✅ 未被误导性注释影响 |
-| **LLM增强搜索** | 103.7秒 | 5/5 (100%) | 15/15 | ✅ 正确识别实际功能 |
-
-**结论**: 平局 - 两种方法都能正确处理误导性注释
-
---
-
-## 🧪 测试数据集设计
-
-### 误导性代码样本 (5个文件)
-
-| 文件 | 错误注释 | 实际功能 | 误导程度 |
-|------|---------|---------|---------|
-| `crypto/hasher.py` | "Simple string utilities" | bcrypt密码哈希 | 高 |
-| `auth/token.py` | 无注释，模糊函数名 | JWT令牌生成 | 中 |
-| `api/handlers.py` | "Database utilities", 反向docstrings | REST API用户管理 | 极高 |
-| `utils/checker.py` | "Math calculation functions" | 邮箱地址验证 | 高 |
-| `db/pool.py` | "Email sending service" | PostgreSQL连接池 | 极高 |
-
-### 具体误导示例
-
-#### 示例 1: 完全错误的模块描述
-
-```python
-"""Email sending service."""  # 错误！
-import psycopg2  # 实际是数据库库
-from psycopg2 import pool
-
-class EmailSender:  # 错误的类名
-    """SMTP email sender with retry logic."""  # 错误！
-
-    def __init__(self, min_conn: int = 1, max_conn: int = 10):
-        """Initialize email sender."""  # 错误！
-        self.pool = psycopg2.pool.SimpleConnectionPool(...)  # 实际是DB连接池
-```
-
-**实际功能**: PostgreSQL数据库连接池管理器
-**注释声称**: SMTP邮件发送服务
-
-#### 示例 2: 反向的函数文档
-
-```python
-@app.route('/api/items', methods=['POST'])
-def create_item():
-    """Delete an existing item."""  # 完全相反！
-    data = request.get_json()
-    # 实际是创建新项目
-    return jsonify({'item_id': item_id}), 201
-```
-
-### 测试查询 (基于实际功能)
-
-| 查询 | 预期文件 | 查询难度 |
-|------|---------|---------|
-| "Hash passwords securely with bcrypt" | `crypto/hasher.py` | 高 - 注释说string utils |
-| "Generate JWT authentication token" | `auth/token.py` | 中 - 无注释 |
-| "Create user account REST API endpoint" | `api/handlers.py` | 高 - 注释说database |
-| "Validate email address format" | `utils/checker.py` | 高 - 注释说math |
-| "PostgreSQL database connection pool" | `db/pool.py` | 极高 - 注释说email |
-
---
-
-## 🔍 LLM分析能力验证
-
-### 直接测试: LLM如何理解误导性代码
-
-**测试代码**: `db/pool.py` (声称是"Email sending service")
-
-**Gemini分析结果**:
-
-```
-Summary: This Python module defines an `EmailSender` class that manages
-a PostgreSQL connection pool for an email sending service, using
-`psycopg2` for database interactions. It provides a context manager
-`send_email` to handle connection acquisition, transaction commitment,
-and release back to the pool.
-
-Purpose: data
-
-Keywords: psycopg2, connection pool, PostgreSQL, database, email sender,
-context manager, python, database connection, transaction
-```
-
-**分析得分**:
- ✅ **正确识别的术语** (5/5): PostgreSQL, connection pool, database, psycopg2, database connection
- ⚠️ **误导性术语** (2/3): email sender, email sending service (但上下文正确)
-
-**结论**: LLM正确识别了实际功能（PostgreSQL connection pool），虽然摘要开头提到了错误的module docstring，但核心描述准确。
-
---
-
-## 💡 关键发现
-
-### 1. 为什么纯向量搜索也能工作？
-
-**原因**: 代码中的技术关键词权重高于注释
-
-```python
-# 这些强信号即使有错误注释也能正确匹配
-import bcrypt          # 强信号: 密码哈希
-import jwt             # 强信号: JWT令牌
-import psycopg2        # 强信号: PostgreSQL
-from flask import Flask, request  # 强信号: REST API
-pattern = r'^[a-zA-Z0-9._%+-]+@'  # 强信号: 邮箱验证
-```
-
-**嵌入模型的优势**:
- 代码标识符（bcrypt, jwt, psycopg2）具有高度特异性
- import语句权重高
- 正则表达式模式具有语义信息
- 框架API调用（Flask路由）提供明确上下文
-
-### 2. LLM增强的价值
-
-**LLM分析过程**:
-1. ✅ 读取代码逻辑（不仅仅是注释）
-2. ✅ 识别import语句和实际使用
-3. ✅ 理解代码流程和数据流
-4. ✅ 生成基于行为的摘要
-5. ⚠️ 部分参考错误注释（但不完全依赖）
-
-**示例对比**:
-
-| 方面 | 纯向量 | LLM增强 |
-|------|--------|---------|
-| **处理内容** | 代码 + 注释 (整体嵌入) | 代码分析 → 生成摘要 |
-| **误导性注释影响** | 低 (代码关键词权重高) | 极低 (理解代码逻辑) |
-| **自然语言查询** | 依赖代码词汇匹配 | 理解语义意图 |
-| **处理速度** | 快 (2秒) | 慢 (104秒, 52倍差) |
-
-### 3. 测试数据集的局限性
-
-**为什么两种方法都表现完美**:
-
-1. **文件数量太少** (5个文件)
-   - 没有相似功能的文件竞争
-   - 每个查询有唯一的目标文件
-
-2. **代码关键词太强**
-   - bcrypt → 唯一用于密码
-   - jwt → 唯一用于令牌
-   - Flask+@app.route → 唯一的API
-   - psycopg2 → 唯一的数据库
-
-3. **查询过于具体**
-   - "bcrypt password hashing" 直接匹配代码关键词
-   - 不是概念性或模糊查询
-
-**理想的测试场景**:
- ❌ 5个唯一功能文件
- ✅ 100+文件，多个相似功能模块
- ✅ 模糊概念查询: "用户认证"而不是"bcrypt hash"
- ✅ 没有明显关键词的业务逻辑代码
-
---
-
-## 🎯 实际应用建议
-
-### 何时使用纯向量搜索
-
-✅ **推荐场景**:
- 代码库有良好文档
- 搜索代码模式和API使用
- 已知技术栈关键词
- 需要快速索引
-
-**示例查询**:
- "bcrypt.hashpw usage"
- "Flask @app.route GET method"
- "jwt.encode algorithm"
-
-### 何时使用LLM增强搜索
-
-✅ **推荐场景**:
- 代码库文档缺失或过时
- 自然语言概念性查询
- 业务逻辑搜索
- 重视搜索准确性 > 索引速度
-
-**示例查询**:
- "How to authenticate users?" (概念性)
- "Payment processing workflow" (业务逻辑)
- "Error handling for API requests" (模式搜索)
-
-### 混合策略 (推荐)
-
-| 模块类型 | 索引方式 | 原因 |
-|---------|---------|------|
-| **核心业务逻辑** | LLM增强 | 复杂逻辑，文档可能不完整 |
-| **工具函数** | 纯向量 | 代码清晰，关键词明确 |
-| **第三方集成** | 纯向量 | API调用已是最好描述 |
-| **遗留代码** | LLM增强 | 文档陈旧或缺失 |
-
---
-
-## 📈 性能与成本
-
-### 时间成本
-
-| 操作 | 纯向量 | LLM增强 | 差异 |
-|------|--------|---------|------|
-| **索引5文件** | 2.1秒 | 103.7秒 | 49倍慢 |
-| **索引100文件** | ~42秒 | ~35分钟 | ~50倍慢 |
-| **查询速度** | ~50ms | ~50ms | 相同 |
-
-### 金钱成本 (Gemini Flash)
-
- **价格**: $0.10 / 1M input tokens
- **平均**: ~500 tokens / 文件
- **100文件**: $0.005 (半分钱)
- **1000文件**: $0.05 (5分钱)
-
-**结论**: 金钱成本可忽略，时间成本是主要考虑因素
-
---
-
-## 🧪 测试工具
-
-### 创建的脚本
-
-1. **`scripts/test_misleading_comments.py`**
-   - 完整对比测试
-   - 支持 `--tool gemini|qwen`
-   - 支持 `--keep-db` 保存结果数据库
-
-2. **`scripts/show_llm_analysis.py`**
-   - 直接显示LLM对单个文件的分析
-   - 评估LLM是否被误导
-   - 计算正确/误导术语比例
-
-3. **`scripts/inspect_llm_summaries.py`**
-   - 检查数据库中的LLM摘要
-   - 查看metadata和keywords
-
-### 运行测试
-
-```bash
-# 完整对比测试
-python scripts/test_misleading_comments.py --tool gemini
-
-# 保存数据库用于检查
-python scripts/test_misleading_comments.py --keep-db ./results.db
-
-# 查看LLM对单个文件的分析
-python scripts/show_llm_analysis.py
-
-# 检查数据库中的摘要
-python scripts/inspect_llm_summaries.py results.db
-```
-
---
-
-## 📝 结论
-
-### 测试结论
-
-1. ✅ **LLM能够克服误导性注释**
-   - 正确识别实际代码功能
-   - 生成基于行为的准确摘要
-   - 不完全依赖文档字符串
-
-2. ✅ **纯向量搜索也具有抗干扰能力**
-   - 代码关键词提供强信号
-   - 技术栈名称具有高特异性
-   - import语句和API调用信息丰富
-
-3. ⚠️ **当前测试数据集太简单**
-   - 需要更大规模测试 (100+文件)
-   - 需要概念性查询测试
-   - 需要相似功能模块对比
-
-### 生产使用建议
-
-**最佳实践**: 根据代码库特征选择策略
-
-| 代码库特征 | 推荐方案 | 理由 |
-|-----------|---------|------|
-| 良好文档，清晰命名 | 纯向量 | 快速，成本低 |
-| 文档缺失/陈旧 | LLM增强 | 理解代码逻辑 |
-| 遗留系统 | LLM增强 | 克服历史包袱 |
-| 新项目 | 纯向量 | 现代代码通常更清晰 |
-| 大型企业代码库 | 混合 | 分模块策略 |
-
---
-
-**测试完成时间**: 2025-12-16
-**测试工具**: Gemini Flash 2.5, fastembed (BAAI/bge-small-en-v1.5)
-**文档版本**: 1.0
--- a/codex-lens/scripts/compare_search_methods.py
+++ b/codex-lens/scripts/compare_search_methods.py
@@ -1,465 +0,0 @@
-#!/usr/bin/env python3
-"""Standalone script to compare pure vector vs LLM-enhanced semantic search.
-
-Usage:
-    python compare_search_methods.py [--tool gemini|qwen] [--skip-llm]
-
-This script:
-1. Creates a test dataset with sample code
-2. Tests pure vector search (code → fastembed → search)
-3. Tests LLM-enhanced search (code → LLM summary → fastembed → search)
-4. Compares results across natural language queries
-"""
-
-import argparse
-import sqlite3
-import sys
-import tempfile
-import time
-from pathlib import Path
-from typing import Dict, List, Tuple
-
-# Check dependencies
-try:
-    from codexlens.semantic import SEMANTIC_AVAILABLE
-    from codexlens.semantic.embedder import Embedder
-    from codexlens.semantic.vector_store import VectorStore
-    from codexlens.semantic.chunker import Chunker, ChunkConfig
-    from codexlens.semantic.llm_enhancer import (
-        LLMEnhancer,
-        LLMConfig,
-        FileData,
-        EnhancedSemanticIndexer,
-    )
-    from codexlens.storage.dir_index import DirIndexStore
-    from codexlens.search.hybrid_search import HybridSearchEngine
-except ImportError as e:
-    print(f"Error: Missing dependencies - {e}")
-    print("Install with: pip install codexlens[semantic]")
-    sys.exit(1)
-
-if not SEMANTIC_AVAILABLE:
-    print("Error: Semantic search dependencies not available")
-    print("Install with: pip install codexlens[semantic]")
-    sys.exit(1)
-
-
-# Test dataset with realistic code samples
-TEST_DATASET = {
-    "auth/password_hasher.py": '''"""Password hashing utilities using bcrypt."""
-import bcrypt
-
-def hash_password(password: str, salt_rounds: int = 12) -> str:
-    """Hash a password using bcrypt with specified salt rounds."""
-    salt = bcrypt.gensalt(rounds=salt_rounds)
-    hashed = bcrypt.hashpw(password.encode('utf-8'), salt)
-    return hashed.decode('utf-8')
-
-def verify_password(password: str, hashed: str) -> bool:
-    """Verify a password against its hash."""
-    return bcrypt.checkpw(password.encode('utf-8'), hashed.encode('utf-8'))
-''',
-
-    "auth/jwt_handler.py": '''"""JWT token generation and validation."""
-import jwt
-from datetime import datetime, timedelta
-
-SECRET_KEY = "your-secret-key"
-
-def create_token(user_id: int, expires_in: int = 3600) -> str:
-    """Generate a JWT access token for user authentication."""
-    payload = {
-        'user_id': user_id,
-        'exp': datetime.utcnow() + timedelta(seconds=expires_in),
-        'iat': datetime.utcnow()
-    }
-    return jwt.encode(payload, SECRET_KEY, algorithm='HS256')
-
-def decode_token(token: str) -> dict:
-    """Validate and decode JWT token."""
-    try:
-        return jwt.decode(token, SECRET_KEY, algorithms=['HS256'])
-    except jwt.ExpiredSignatureError:
-        return None
-''',
-
-    "api/user_endpoints.py": '''"""REST API endpoints for user management."""
-from flask import Flask, request, jsonify
-
-app = Flask(__name__)
-
-@app.route('/api/users', methods=['POST'])
-def create_user():
-    """Create a new user account with email and password."""
-    data = request.get_json()
-    if not data.get('email') or not data.get('password'):
-        return jsonify({'error': 'Email and password required'}), 400
-    user_id = 12345  # Database insert
-    return jsonify({'user_id': user_id, 'success': True}), 201
-
-@app.route('/api/users/<int:user_id>', methods=['GET'])
-def get_user(user_id: int):
-    """Retrieve user profile information by user ID."""
-    user = {
-        'id': user_id,
-        'email': 'user@example.com',
-        'name': 'John Doe'
-    }
-    return jsonify(user), 200
-''',
-
-    "utils/validation.py": '''"""Input validation utilities."""
-import re
-
-def validate_email(email: str) -> bool:
-    """Check if email address format is valid using regex."""
-    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
-    return bool(re.match(pattern, email))
-
-def sanitize_input(text: str, max_length: int = 255) -> str:
-    """Clean user input by removing special characters."""
-    text = re.sub(r'[<>\"\'&]', '', text)
-    return text.strip()[:max_length]
-
-def validate_password_strength(password: str) -> tuple:
-    """Validate password meets security requirements."""
-    if len(password) < 8:
-        return False, "Password must be at least 8 characters"
-    if not re.search(r'[A-Z]', password):
-        return False, "Must contain uppercase letter"
-    return True, None
-''',
-
-    "database/connection.py": '''"""Database connection pooling."""
-import psycopg2
-from psycopg2 import pool
-from contextlib import contextmanager
-
-class DatabasePool:
-    """PostgreSQL connection pool manager."""
-
-    def __init__(self, min_conn: int = 1, max_conn: int = 10):
-        """Initialize database connection pool."""
-        self.pool = psycopg2.pool.SimpleConnectionPool(
-            min_conn, max_conn,
-            user='dbuser', host='localhost', database='myapp'
-        )
-
-    @contextmanager
-    def get_connection(self):
-        """Get a connection from pool as context manager."""
-        conn = self.pool.getconn()
-        try:
-            yield conn
-            conn.commit()
-        finally:
-            self.pool.putconn(conn)
-''',
-}
-
-
-# Natural language test queries
-TEST_QUERIES = [
-    ("How do I securely hash passwords?", "auth/password_hasher.py"),
-    ("Generate JWT token for authentication", "auth/jwt_handler.py"),
-    ("Create new user account via API", "api/user_endpoints.py"),
-    ("Validate email address format", "utils/validation.py"),
-    ("Connect to PostgreSQL database", "database/connection.py"),
-]
-
-
-def create_test_database(db_path: Path) -> None:
-    """Create and populate test database."""
-    store = DirIndexStore(db_path)
-    store.initialize()
-
-    with store._get_connection() as conn:
-        for path, content in TEST_DATASET.items():
-            name = path.split('/')[-1]
-            conn.execute(
-                """INSERT INTO files (name, full_path, content, language, mtime)
-                   VALUES (?, ?, ?, ?, ?)""",
-                (name, path, content, "python", 0.0)
-            )
-        conn.commit()
-
-    store.close()
-
-
-def test_pure_vector_search(db_path: Path) -> Dict:
-    """Test pure vector search (raw code embeddings)."""
-    print("\n" + "="*70)
-    print("PURE VECTOR SEARCH (Code → fastembed)")
-    print("="*70)
-
-    start_time = time.time()
-
-    # Generate pure vector embeddings
-    embedder = Embedder(profile="code")
-    vector_store = VectorStore(db_path)
-    chunker = Chunker(config=ChunkConfig(max_chunk_size=2000))
-
-    with sqlite3.connect(db_path) as conn:
-        conn.row_factory = sqlite3.Row
-        rows = conn.execute("SELECT full_path, content FROM files").fetchall()
-
-    chunk_count = 0
-    for row in rows:
-        chunks = chunker.chunk_sliding_window(
-            row["content"],
-            file_path=row["full_path"],
-            language="python"
-        )
-        for chunk in chunks:
-            chunk.embedding = embedder.embed_single(chunk.content)
-            chunk.metadata["strategy"] = "pure_vector"
-        if chunks:
-            vector_store.add_chunks(chunks, row["full_path"])
-            chunk_count += len(chunks)
-
-    setup_time = time.time() - start_time
-    print(f"Setup: {len(rows)} files, {chunk_count} chunks in {setup_time:.1f}s")
-
-    # Test queries
-    engine = HybridSearchEngine()
-    results = {}
-
-    print(f"\n{'Query':<45} {'Top Result':<30} {'Score':<8}")
-    print("-" * 70)
-
-    for query, expected_file in TEST_QUERIES:
-        search_results = engine.search(
-            db_path,
-            query,
-            limit=3,
-            enable_vector=True,
-            pure_vector=True,
-        )
-
-        top_file = search_results[0].path if search_results else "No results"
-        top_score = search_results[0].score if search_results else 0.0
-        found = expected_file in [r.path for r in search_results]
-        rank = None
-        if found:
-            for i, r in enumerate(search_results):
-                if r.path == expected_file:
-                    rank = i + 1
-                    break
-
-        status = "[OK]" if found and rank == 1 else ("[~]" if found else "[X]")
-        display_query = query[:42] + "..." if len(query) > 45 else query
-        display_file = top_file.split('/')[-1] if '/' in top_file else top_file
-
-        print(f"{status} {display_query:<43} {display_file:<30} {top_score:.3f}")
-
-        results[query] = {
-            "found": found,
-            "rank": rank,
-            "top_file": top_file,
-            "score": top_score,
-        }
-
-    return results
-
-
-def test_llm_enhanced_search(db_path: Path, llm_tool: str = "gemini") -> Dict:
-    """Test LLM-enhanced search (LLM summaries → fastembed)."""
-    print("\n" + "="*70)
-    print(f"LLM-ENHANCED SEARCH (Code → {llm_tool.upper()} → fastembed)")
-    print("="*70)
-
-    # Check CCW availability
-    llm_config = LLMConfig(enabled=True, tool=llm_tool, batch_size=2)
-    enhancer = LLMEnhancer(llm_config)
-
-    if not enhancer.check_available():
-        print("[X] CCW CLI not available - skipping LLM-enhanced test")
-        print("  Install CCW: npm install -g ccw")
-        return {}
-
-    start_time = time.time()
-
-    # Generate LLM-enhanced embeddings
-    embedder = Embedder(profile="code")
-    vector_store = VectorStore(db_path)
-    indexer = EnhancedSemanticIndexer(enhancer, embedder, vector_store)
-
-    # Prepare file data
-    file_data_list = [
-        FileData(path=path, content=content, language="python")
-        for path, content in TEST_DATASET.items()
-    ]
-
-    # Index with LLM enhancement
-    print(f"Generating LLM summaries for {len(file_data_list)} files...")
-    indexed = indexer.index_files(file_data_list)
-    setup_time = time.time() - start_time
-
-    print(f"Setup: {indexed}/{len(file_data_list)} files indexed in {setup_time:.1f}s")
-
-    # Test queries
-    engine = HybridSearchEngine()
-    results = {}
-
-    print(f"\n{'Query':<45} {'Top Result':<30} {'Score':<8}")
-    print("-" * 70)
-
-    for query, expected_file in TEST_QUERIES:
-        search_results = engine.search(
-            db_path,
-            query,
-            limit=3,
-            enable_vector=True,
-            pure_vector=True,
-        )
-
-        top_file = search_results[0].path if search_results else "No results"
-        top_score = search_results[0].score if search_results else 0.0
-        found = expected_file in [r.path for r in search_results]
-        rank = None
-        if found:
-            for i, r in enumerate(search_results):
-                if r.path == expected_file:
-                    rank = i + 1
-                    break
-
-        status = "[OK]" if found and rank == 1 else ("[~]" if found else "[X]")
-        display_query = query[:42] + "..." if len(query) > 45 else query
-        display_file = top_file.split('/')[-1] if '/' in top_file else top_file
-
-        print(f"{status} {display_query:<43} {display_file:<30} {top_score:.3f}")
-
-        results[query] = {
-            "found": found,
-            "rank": rank,
-            "top_file": top_file,
-            "score": top_score,
-        }
-
-    return results
-
-
-def compare_results(pure_results: Dict, llm_results: Dict) -> None:
-    """Compare and analyze results from both approaches."""
-    print("\n" + "="*70)
-    print("COMPARISON SUMMARY")
-    print("="*70)
-
-    if not llm_results:
-        print("Cannot compare - LLM-enhanced test was skipped")
-        return
-
-    pure_score = 0
-    llm_score = 0
-
-    print(f"\n{'Query':<45} {'Pure':<10} {'LLM':<10}")
-    print("-" * 70)
-
-    for query, expected_file in TEST_QUERIES:
-        pure_res = pure_results.get(query, {})
-        llm_res = llm_results.get(query, {})
-
-        pure_status = f"[OK] Rank {pure_res.get('rank', '?')}" if pure_res.get('found') else "[X] Miss"
-        llm_status = f"[OK] Rank {llm_res.get('rank', '?')}" if llm_res.get('found') else "[X] Miss"
-
-        # Scoring: Rank 1 = 3 points, Rank 2 = 2 points, Rank 3 = 1 point
-        if pure_res.get('found') and pure_res.get('rank'):
-            pure_score += max(0, 4 - pure_res['rank'])
-        if llm_res.get('found') and llm_res.get('rank'):
-            llm_score += max(0, 4 - llm_res['rank'])
-
-        display_query = query[:42] + "..." if len(query) > 45 else query
-        print(f"{display_query:<45} {pure_status:<10} {llm_status:<10}")
-
-    print("-" * 70)
-    print(f"{'TOTAL SCORE':<45} {pure_score:<10} {llm_score:<10}")
-    print("="*70)
-
-    # Analysis
-    print("\nANALYSIS:")
-    if llm_score > pure_score:
-        improvement = ((llm_score - pure_score) / max(pure_score, 1)) * 100
-        print(f"[OK] LLM enhancement improves results by {improvement:.1f}%")
-        print("  Natural language summaries match queries better than raw code")
-    elif pure_score > llm_score:
-        degradation = ((pure_score - llm_score) / max(pure_score, 1)) * 100
-        print(f"[X] Pure vector performed {degradation:.1f}% better")
-        print("  LLM summaries may be too generic or missing key details")
-    else:
-        print("= Both approaches performed equally on this test set")
-
-    print("\nKEY FINDINGS:")
-    print("- Pure Vector: Direct code embeddings, fast but may miss semantic intent")
-    print("- LLM Enhanced: Natural language summaries, better for human-like queries")
-    print("- Best Use: Combine both - LLM for natural language, vector for code patterns")
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Compare pure vector vs LLM-enhanced semantic search"
-    )
-    parser.add_argument(
-        "--tool",
-        choices=["gemini", "qwen"],
-        default="gemini",
-        help="LLM tool to use for enhancement (default: gemini)"
-    )
-    parser.add_argument(
-        "--skip-llm",
-        action="store_true",
-        help="Skip LLM-enhanced test (only run pure vector)"
-    )
-    args = parser.parse_args()
-
-    print("\n" + "="*70)
-    print("SEMANTIC SEARCH COMPARISON TEST")
-    print("Pure Vector vs LLM-Enhanced Vector Search")
-    print("="*70)
-
-    # Create test database
-    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
-        db_path = Path(f.name)
-
-    try:
-        print(f"\nTest dataset: {len(TEST_DATASET)} Python files")
-        print(f"Test queries: {len(TEST_QUERIES)} natural language questions")
-
-        create_test_database(db_path)
-
-        # Test pure vector search
-        pure_results = test_pure_vector_search(db_path)
-
-        # Test LLM-enhanced search
-        if not args.skip_llm:
-            # Clear semantic_chunks table for LLM test
-            with sqlite3.connect(db_path) as conn:
-                conn.execute("DELETE FROM semantic_chunks")
-                conn.commit()
-
-            llm_results = test_llm_enhanced_search(db_path, args.tool)
-        else:
-            llm_results = {}
-            print("\n[X] LLM-enhanced test skipped (--skip-llm flag)")
-
-        # Compare results
-        compare_results(pure_results, llm_results)
-
-    finally:
-        # Cleanup - ensure all connections are closed
-        try:
-            import gc
-            gc.collect()  # Force garbage collection to close any lingering connections
-            time.sleep(0.1)  # Small delay for Windows to release file handle
-            if db_path.exists():
-                db_path.unlink()
-        except PermissionError:
-            print(f"\nWarning: Could not delete temporary database: {db_path}")
-            print("It will be cleaned up on next system restart.")
-
-    print("\n" + "="*70)
-    print("Test completed successfully!")
-    print("="*70)
-
-
-if __name__ == "__main__":
-    main()
--- a/codex-lens/scripts/inspect_llm_summaries.py
+++ b/codex-lens/scripts/inspect_llm_summaries.py
@@ -1,88 +0,0 @@
-#!/usr/bin/env python3
-"""Inspect LLM-generated summaries in semantic_chunks table."""
-
-import sqlite3
-import sys
-from pathlib import Path
-
-def inspect_summaries(db_path: Path):
-    """Show LLM-generated summaries from database."""
-    if not db_path.exists():
-        print(f"Error: Database not found: {db_path}")
-        return
-
-    with sqlite3.connect(db_path) as conn:
-        conn.row_factory = sqlite3.Row
-
-        # Check if semantic_chunks table exists
-        cursor = conn.execute(
-            "SELECT name FROM sqlite_master WHERE type='table' AND name='semantic_chunks'"
-        )
-        if not cursor.fetchone():
-            print("No semantic_chunks table found")
-            return
-
-        # Get all chunks with metadata
-        cursor = conn.execute("""
-            SELECT file_path, chunk_index, content,
-                   json_extract(metadata, '$.llm_summary') as summary,
-                   json_extract(metadata, '$.llm_keywords') as keywords,
-                   json_extract(metadata, '$.llm_purpose') as purpose,
-                   json_extract(metadata, '$.strategy') as strategy
-            FROM semantic_chunks
-            ORDER BY file_path, chunk_index
-        """)
-
-        chunks = cursor.fetchall()
-
-        if not chunks:
-            print("No chunks found in database")
-            return
-
-        print("="*80)
-        print("LLM-GENERATED SUMMARIES INSPECTION")
-        print("="*80)
-
-        current_file = None
-        for chunk in chunks:
-            file_path = chunk['file_path']
-
-            if file_path != current_file:
-                print(f"\n{'='*80}")
-                print(f"FILE: {file_path}")
-                print(f"{'='*80}")
-                current_file = file_path
-
-            print(f"\n[Chunk {chunk['chunk_index']}]")
-            print(f"Strategy: {chunk['strategy']}")
-
-            if chunk['summary']:
-                print(f"\nLLM Summary:")
-                print(f"  {chunk['summary']}")
-
-            if chunk['keywords']:
-                print(f"\nKeywords:")
-                print(f"  {chunk['keywords']}")
-
-            if chunk['purpose']:
-                print(f"\nPurpose:")
-                print(f"  {chunk['purpose']}")
-
-            # Show first 200 chars of content
-            content = chunk['content']
-            if len(content) > 200:
-                content = content[:200] + "..."
-            print(f"\nOriginal Content (first 200 chars):")
-            print(f"  {content}")
-            print("-" * 80)
-
-
-if __name__ == "__main__":
-    if len(sys.argv) < 2:
-        print("Usage: python inspect_llm_summaries.py <path_to_index.db>")
-        print("\nExample:")
-        print("  python inspect_llm_summaries.py ~/.codexlens/indexes/myproject/_index.db")
-        sys.exit(1)
-
-    db_path = Path(sys.argv[1])
-    inspect_summaries(db_path)
--- a/codex-lens/scripts/show_llm_analysis.py
+++ b/codex-lens/scripts/show_llm_analysis.py
@@ -1,112 +0,0 @@
-#!/usr/bin/env python3
-"""Directly show LLM analysis of test code."""
-
-from codexlens.semantic.llm_enhancer import LLMEnhancer, LLMConfig, FileData
-
-# Misleading code example
-TEST_CODE = '''"""Email sending service."""
-import psycopg2
-from psycopg2 import pool
-from contextlib import contextmanager
-
-class EmailSender:
-    """SMTP email sender with retry logic."""
-
-    def __init__(self, min_conn: int = 1, max_conn: int = 10):
-        """Initialize email sender."""
-        self.pool = psycopg2.pool.SimpleConnectionPool(
-            min_conn, max_conn,
-            user='dbuser', host='localhost', database='myapp'
-        )
-
-    @contextmanager
-    def send_email(self):
-        """Send email message."""
-        conn = self.pool.getconn()
-        try:
-            yield conn
-            conn.commit()
-        finally:
-            self.pool.putconn(conn)
-'''
-
-print("="*80)
-print("LLM ANALYSIS OF MISLEADING CODE")
-print("="*80)
-
-print("\n[Original Code with Misleading Comments]")
-print("-"*80)
-print(TEST_CODE)
-print("-"*80)
-
-print("\n[Actual Functionality]")
-print("  - Imports: psycopg2 (PostgreSQL library)")
-print("  - Class: EmailSender (but name is misleading!)")
-print("  - Actually: Creates PostgreSQL connection pool")
-print("  - Methods: send_email (actually gets DB connection)")
-
-print("\n[Misleading Documentation]")
-print("  - Module docstring: 'Email sending service' (WRONG)")
-print("  - Class docstring: 'SMTP email sender' (WRONG)")
-print("  - Method docstring: 'Send email message' (WRONG)")
-
-print("\n" + "="*80)
-print("TESTING LLM UNDERSTANDING")
-print("="*80)
-
-# Test LLM analysis
-config = LLMConfig(enabled=True, tool="gemini", batch_size=1)
-enhancer = LLMEnhancer(config)
-
-if not enhancer.check_available():
-    print("\n[X] CCW CLI not available")
-    print("Install: npm install -g ccw")
-    exit(1)
-
-print("\n[Calling Gemini to analyze code...]")
-file_data = FileData(path="db/pool.py", content=TEST_CODE, language="python")
-
-import tempfile
-from pathlib import Path
-
-with tempfile.TemporaryDirectory() as tmpdir:
-    result = enhancer.enhance_files([file_data], Path(tmpdir))
-
-    if "db/pool.py" in result:
-        metadata = result["db/pool.py"]
-
-        print("\n[LLM-Generated Summary]")
-        print("-"*80)
-        print(f"Summary: {metadata.summary}")
-        print(f"\nPurpose: {metadata.purpose}")
-        print(f"\nKeywords: {', '.join(metadata.keywords)}")
-        print("-"*80)
-
-        print("\n[Analysis]")
-        # Check if LLM identified the real functionality
-        summary_lower = metadata.summary.lower()
-        keywords_lower = [k.lower() for k in metadata.keywords]
-
-        correct_terms = ['database', 'postgresql', 'connection', 'pool', 'psycopg']
-        misleading_terms = ['email', 'smtp', 'send']
-
-        found_correct = sum(1 for term in correct_terms
-                           if term in summary_lower or any(term in k for k in keywords_lower))
-        found_misleading = sum(1 for term in misleading_terms
-                              if term in summary_lower or any(term in k for k in keywords_lower))
-
-        print(f"Correct terms found: {found_correct}/{len(correct_terms)}")
-        print(f"Misleading terms found: {found_misleading}/{len(misleading_terms)}")
-
-        if found_correct > found_misleading:
-            print("\n[OK] LLM correctly identified actual functionality!")
-            print("     LLM ignored misleading comments and analyzed code behavior")
-        elif found_misleading > found_correct:
-            print("\n[X] LLM was misled by incorrect comments")
-            print("    LLM trusted documentation over code analysis")
-        else:
-            print("\n[~] Mixed results - LLM found both correct and misleading terms")
-    else:
-        print("\n[X] LLM analysis failed - no results returned")
-
-print("\n" + "="*80)
--- a/codex-lens/scripts/test_misleading_comments.py
+++ b/codex-lens/scripts/test_misleading_comments.py
@@ -1,491 +0,0 @@
-#!/usr/bin/env python3
-"""Test pure vector vs LLM-enhanced search with misleading/missing comments.
-
-This test demonstrates how LLM enhancement can overcome:
-1. Missing comments/docstrings
-2. Misleading or incorrect comments
-3. Outdated documentation
-
-Usage:
-    python test_misleading_comments.py --tool gemini
-"""
-
-import argparse
-import sqlite3
-import sys
-import tempfile
-import time
-from pathlib import Path
-from typing import Dict, List
-
-# Check dependencies
-try:
-    from codexlens.semantic import SEMANTIC_AVAILABLE
-    from codexlens.semantic.embedder import Embedder
-    from codexlens.semantic.vector_store import VectorStore
-    from codexlens.semantic.chunker import Chunker, ChunkConfig
-    from codexlens.semantic.llm_enhancer import (
-        LLMEnhancer,
-        LLMConfig,
-        FileData,
-        EnhancedSemanticIndexer,
-    )
-    from codexlens.storage.dir_index import DirIndexStore
-    from codexlens.search.hybrid_search import HybridSearchEngine
-except ImportError as e:
-    print(f"Error: Missing dependencies - {e}")
-    print("Install with: pip install codexlens[semantic]")
-    sys.exit(1)
-
-if not SEMANTIC_AVAILABLE:
-    print("Error: Semantic search dependencies not available")
-    sys.exit(1)
-
-
-# Test dataset with MISLEADING or MISSING comments
-MISLEADING_DATASET = {
-    "crypto/hasher.py": '''"""Simple string utilities."""
-import bcrypt
-
-def process_string(s: str, rounds: int = 12) -> str:
-    """Convert string to uppercase."""
-    salt = bcrypt.gensalt(rounds=rounds)
-    hashed = bcrypt.hashpw(s.encode('utf-8'), salt)
-    return hashed.decode('utf-8')
-
-def check_string(s: str, target: str) -> bool:
-    """Check if two strings are equal."""
-    return bcrypt.checkpw(s.encode('utf-8'), target.encode('utf-8'))
-''',
-
-    "auth/token.py": '''import jwt
-from datetime import datetime, timedelta
-
-SECRET_KEY = "key123"
-
-def make_thing(uid: int, exp: int = 3600) -> str:
-    payload = {
-        'user_id': uid,
-        'exp': datetime.utcnow() + timedelta(seconds=exp),
-        'iat': datetime.utcnow()
-    }
-    return jwt.encode(payload, SECRET_KEY, algorithm='HS256')
-
-def parse_thing(thing: str) -> dict:
-    try:
-        return jwt.decode(thing, SECRET_KEY, algorithms=['HS256'])
-    except jwt.ExpiredSignatureError:
-        return None
-''',
-
-    "api/handlers.py": '''"""Database connection utilities."""
-from flask import Flask, request, jsonify
-
-app = Flask(__name__)
-
-@app.route('/api/items', methods=['POST'])
-def create_item():
-    """Delete an existing item."""
-    data = request.get_json()
-    if not data.get('email') or not data.get('password'):
-        return jsonify({'error': 'Missing data'}), 400
-    item_id = 12345
-    return jsonify({'item_id': item_id, 'success': True}), 201
-
-@app.route('/api/items/<int:item_id>', methods=['GET'])
-def get_item(item_id: int):
-    """Update item configuration."""
-    item = {
-        'id': item_id,
-        'email': 'user@example.com',
-        'name': 'John Doe'
-    }
-    return jsonify(item), 200
-''',
-
-    "utils/checker.py": '''"""Math calculation functions."""
-import re
-
-def calc_sum(email: str) -> bool:
-    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$'
-    return bool(re.match(pattern, email))
-
-def format_text(text: str, max_len: int = 255) -> str:
-    text = re.sub(r'[<>"\\'&]', '', text)
-    return text.strip()[:max_len]
-''',
-
-    "db/pool.py": '''"""Email sending service."""
-import psycopg2
-from psycopg2 import pool
-from contextlib import contextmanager
-
-class EmailSender:
-    """SMTP email sender with retry logic."""
-
-    def __init__(self, min_conn: int = 1, max_conn: int = 10):
-        """Initialize email sender."""
-        self.pool = psycopg2.pool.SimpleConnectionPool(
-            min_conn, max_conn,
-            user='dbuser', host='localhost', database='myapp'
-        )
-
-    @contextmanager
-    def send_email(self):
-        """Send email message."""
-        conn = self.pool.getconn()
-        try:
-            yield conn
-            conn.commit()
-        finally:
-            self.pool.putconn(conn)
-''',
-}
-
-
-# Test queries - natural language based on ACTUAL functionality (not misleading comments)
-TEST_QUERIES = [
-    ("How to hash passwords securely with bcrypt?", "crypto/hasher.py"),
-    ("Generate JWT authentication token", "auth/token.py"),
-    ("Create user account REST API endpoint", "api/handlers.py"),
-    ("Validate email address format", "utils/checker.py"),
-    ("PostgreSQL database connection pool", "db/pool.py"),
-]
-
-
-def create_test_database(db_path: Path) -> None:
-    """Create and populate test database."""
-    store = DirIndexStore(db_path)
-    store.initialize()
-
-    with store._get_connection() as conn:
-        for path, content in MISLEADING_DATASET.items():
-            name = path.split('/')[-1]
-            conn.execute(
-                """INSERT INTO files (name, full_path, content, language, mtime)
-                   VALUES (?, ?, ?, ?, ?)""",
-                (name, path, content, "python", 0.0)
-            )
-        conn.commit()
-
-    store.close()
-
-
-def test_pure_vector_search(db_path: Path) -> Dict:
-    """Test pure vector search (relies on code + misleading comments)."""
-    print("\n" + "="*70)
-    print("PURE VECTOR SEARCH (Code + Misleading Comments -> fastembed)")
-    print("="*70)
-
-    start_time = time.time()
-
-    # Generate pure vector embeddings
-    embedder = Embedder(profile="code")
-    vector_store = VectorStore(db_path)
-    chunker = Chunker(config=ChunkConfig(max_chunk_size=2000))
-
-    with sqlite3.connect(db_path) as conn:
-        conn.row_factory = sqlite3.Row
-        rows = conn.execute("SELECT full_path, content FROM files").fetchall()
-
-    chunk_count = 0
-    for row in rows:
-        chunks = chunker.chunk_sliding_window(
-            row["content"],
-            file_path=row["full_path"],
-            language="python"
-        )
-        for chunk in chunks:
-            chunk.embedding = embedder.embed_single(chunk.content)
-            chunk.metadata["strategy"] = "pure_vector"
-        if chunks:
-            vector_store.add_chunks(chunks, row["full_path"])
-            chunk_count += len(chunks)
-
-    setup_time = time.time() - start_time
-    print(f"Setup: {len(rows)} files, {chunk_count} chunks in {setup_time:.1f}s")
-    print("Note: Embeddings include misleading comments")
-
-    # Test queries
-    engine = HybridSearchEngine()
-    results = {}
-
-    print(f"\n{'Query':<45} {'Top Result':<30} {'Score':<8}")
-    print("-" * 70)
-
-    for query, expected_file in TEST_QUERIES:
-        search_results = engine.search(
-            db_path,
-            query,
-            limit=3,
-            enable_vector=True,
-            pure_vector=True,
-        )
-
-        top_file = search_results[0].path if search_results else "No results"
-        top_score = search_results[0].score if search_results else 0.0
-        found = expected_file in [r.path for r in search_results]
-        rank = None
-        if found:
-            for i, r in enumerate(search_results):
-                if r.path == expected_file:
-                    rank = i + 1
-                    break
-
-        status = "[OK]" if found and rank == 1 else ("[~]" if found else "[X]")
-        display_query = query[:42] + "..." if len(query) > 45 else query
-        display_file = top_file.split('/')[-1] if '/' in top_file else top_file
-
-        print(f"{status} {display_query:<43} {display_file:<30} {top_score:.3f}")
-
-        results[query] = {
-            "found": found,
-            "rank": rank,
-            "top_file": top_file,
-            "score": top_score,
-        }
-
-    return results
-
-
-def test_llm_enhanced_search(db_path: Path, llm_tool: str = "gemini") -> Dict:
-    """Test LLM-enhanced search (LLM reads code and generates accurate summary)."""
-    print("\n" + "="*70)
-    print(f"LLM-ENHANCED SEARCH (Code -> {llm_tool.upper()} Analysis -> fastembed)")
-    print("="*70)
-
-    # Check CCW availability
-    llm_config = LLMConfig(enabled=True, tool=llm_tool, batch_size=2)
-    enhancer = LLMEnhancer(llm_config)
-
-    if not enhancer.check_available():
-        print("[X] CCW CLI not available - skipping LLM-enhanced test")
-        print("  Install CCW: npm install -g ccw")
-        return {}
-
-    start_time = time.time()
-
-    # Generate LLM-enhanced embeddings
-    embedder = Embedder(profile="code")
-    vector_store = VectorStore(db_path)
-    indexer = EnhancedSemanticIndexer(enhancer, embedder, vector_store)
-
-    # Prepare file data
-    file_data_list = [
-        FileData(path=path, content=content, language="python")
-        for path, content in MISLEADING_DATASET.items()
-    ]
-
-    # Index with LLM enhancement
-    print(f"LLM analyzing code (ignoring misleading comments)...")
-    indexed = indexer.index_files(file_data_list)
-    setup_time = time.time() - start_time
-
-    print(f"Setup: {indexed}/{len(file_data_list)} files indexed in {setup_time:.1f}s")
-    print("Note: LLM generates summaries based on actual code logic")
-
-    # Test queries
-    engine = HybridSearchEngine()
-    results = {}
-
-    print(f"\n{'Query':<45} {'Top Result':<30} {'Score':<8}")
-    print("-" * 70)
-
-    for query, expected_file in TEST_QUERIES:
-        search_results = engine.search(
-            db_path,
-            query,
-            limit=3,
-            enable_vector=True,
-            pure_vector=True,
-        )
-
-        top_file = search_results[0].path if search_results else "No results"
-        top_score = search_results[0].score if search_results else 0.0
-        found = expected_file in [r.path for r in search_results]
-        rank = None
-        if found:
-            for i, r in enumerate(search_results):
-                if r.path == expected_file:
-                    rank = i + 1
-                    break
-
-        status = "[OK]" if found and rank == 1 else ("[~]" if found else "[X]")
-        display_query = query[:42] + "..." if len(query) > 45 else query
-        display_file = top_file.split('/')[-1] if '/' in top_file else top_file
-
-        print(f"{status} {display_query:<43} {display_file:<30} {top_score:.3f}")
-
-        results[query] = {
-            "found": found,
-            "rank": rank,
-            "top_file": top_file,
-            "score": top_score,
-        }
-
-    return results
-
-
-def compare_results(pure_results: Dict, llm_results: Dict) -> None:
-    """Compare and analyze results from both approaches."""
-    print("\n" + "="*70)
-    print("COMPARISON SUMMARY - MISLEADING COMMENTS TEST")
-    print("="*70)
-
-    if not llm_results:
-        print("Cannot compare - LLM-enhanced test was skipped")
-        return
-
-    pure_score = 0
-    llm_score = 0
-
-    print(f"\n{'Query':<45} {'Pure':<10} {'LLM':<10}")
-    print("-" * 70)
-
-    for query, expected_file in TEST_QUERIES:
-        pure_res = pure_results.get(query, {})
-        llm_res = llm_results.get(query, {})
-
-        pure_status = f"[OK] Rank {pure_res.get('rank', '?')}" if pure_res.get('found') else "[X] Miss"
-        llm_status = f"[OK] Rank {llm_res.get('rank', '?')}" if llm_res.get('found') else "[X] Miss"
-
-        # Scoring: Rank 1 = 3 points, Rank 2 = 2 points, Rank 3 = 1 point
-        if pure_res.get('found') and pure_res.get('rank'):
-            pure_score += max(0, 4 - pure_res['rank'])
-        if llm_res.get('found') and llm_res.get('rank'):
-            llm_score += max(0, 4 - llm_res['rank'])
-
-        display_query = query[:42] + "..." if len(query) > 45 else query
-        print(f"{display_query:<45} {pure_status:<10} {llm_status:<10}")
-
-    print("-" * 70)
-    print(f"{'TOTAL SCORE':<45} {pure_score:<10} {llm_score:<10}")
-    print("="*70)
-
-    # Analysis
-    print("\nANALYSIS:")
-    if llm_score > pure_score:
-        improvement = ((llm_score - pure_score) / max(pure_score, 1)) * 100
-        print(f"[OK] LLM enhancement improves results by {improvement:.1f}%")
-        print("  LLM understands actual code logic despite misleading comments")
-        print("  Pure vector search misled by incorrect documentation")
-    elif pure_score > llm_score:
-        degradation = ((pure_score - llm_score) / max(pure_score, 1)) * 100
-        print(f"[X] Pure vector performed {degradation:.1f}% better")
-        print("  Unexpected: Pure vector wasn't affected by misleading comments")
-    else:
-        print("= Both approaches performed equally")
-        print("  Test dataset may still be too simple to show differences")
-
-    print("\nKEY INSIGHTS:")
-    print("- Pure Vector: Embeds code + comments together, can be misled")
-    print("- LLM Enhanced: Analyzes actual code behavior, ignores bad comments")
-    print("- Best Use: LLM enhancement crucial for poorly documented codebases")
-
-    print("\nMISLEADING COMMENTS IN TEST:")
-    print("1. 'hasher.py' claims 'string utilities' but does bcrypt hashing")
-    print("2. 'token.py' has no docstrings, unclear function names")
-    print("3. 'handlers.py' says 'database utilities' but is REST API")
-    print("4. 'handlers.py' docstrings opposite (create says delete, etc)")
-    print("5. 'checker.py' claims 'math functions' but validates emails")
-    print("6. 'pool.py' claims 'email sender' but is database pool")
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Test pure vector vs LLM-enhanced with misleading comments"
-    )
-    parser.add_argument(
-        "--tool",
-        choices=["gemini", "qwen"],
-        default="gemini",
-        help="LLM tool to use (default: gemini)"
-    )
-    parser.add_argument(
-        "--skip-llm",
-        action="store_true",
-        help="Skip LLM-enhanced test"
-    )
-    parser.add_argument(
-        "--keep-db",
-        type=str,
-        help="Save database to specified path for inspection (e.g., ./test_results.db)"
-    )
-    args = parser.parse_args()
-
-    print("\n" + "="*70)
-    print("MISLEADING COMMENTS TEST")
-    print("Pure Vector vs LLM-Enhanced with Incorrect Documentation")
-    print("="*70)
-
-    # Create test database
-    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
-        db_path = Path(f.name)
-
-    try:
-        print(f"\nTest dataset: {len(MISLEADING_DATASET)} Python files")
-        print(f"Test queries: {len(TEST_QUERIES)} natural language questions")
-        print("\nChallenges:")
-        print("- Misleading module docstrings")
-        print("- Incorrect function docstrings")
-        print("- Missing documentation")
-        print("- Unclear function names")
-
-        create_test_database(db_path)
-
-        # Test pure vector search
-        pure_results = test_pure_vector_search(db_path)
-
-        # Test LLM-enhanced search
-        if not args.skip_llm:
-            # Clear semantic_chunks table for LLM test
-            with sqlite3.connect(db_path) as conn:
-                conn.execute("DELETE FROM semantic_chunks")
-                conn.commit()
-
-            llm_results = test_llm_enhanced_search(db_path, args.tool)
-        else:
-            llm_results = {}
-            print("\n[X] LLM-enhanced test skipped (--skip-llm flag)")
-
-        # Compare results
-        compare_results(pure_results, llm_results)
-
-    finally:
-        # Save or cleanup database
-        if args.keep_db:
-            import shutil
-            save_path = Path(args.keep_db)
-            try:
-                import gc
-                gc.collect()
-                time.sleep(0.2)
-                shutil.copy2(db_path, save_path)
-                print(f"\n[OK] Database saved to: {save_path}")
-                print(f"Inspect with: python scripts/inspect_llm_summaries.py {save_path}")
-            except Exception as e:
-                print(f"\n[X] Failed to save database: {e}")
-            finally:
-                try:
-                    if db_path.exists():
-                        db_path.unlink()
-                except:
-                    pass
-        else:
-            # Cleanup
-            try:
-                import gc
-                gc.collect()
-                time.sleep(0.1)
-                if db_path.exists():
-                    db_path.unlink()
-            except PermissionError:
-                print(f"\nWarning: Could not delete temporary database: {db_path}")
-
-    print("\n" + "="*70)
-    print("Test completed!")
-    print("="*70)
-
-
-if __name__ == "__main__":
-    main()
--- a/codex-lens/src/codexlens/cli/commands.py
+++ b/codex-lens/src/codexlens/cli/commands.py
@@ -1047,184 +1047,6 @@ def migrate(
            registry.close()


-@app.command()
-def enhance(
-    path: Path = typer.Argument(Path("."), exists=True, file_okay=False, dir_okay=True, help="Project root to enhance."),
-    tool: str = typer.Option("gemini", "--tool", "-t", help="LLM tool to use (gemini or qwen)."),
-    batch_size: int = typer.Option(5, "--batch-size", "-b", min=1, max=20, help="Number of files to process per batch."),
-    force: bool = typer.Option(False, "--force", "-f", help="Regenerate metadata for all files, even if already exists."),
-    json_mode: bool = typer.Option(False, "--json", help="Output JSON response."),
-    verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable debug logging."),
-) -> None:
-    """Generate LLM-enhanced semantic metadata for indexed files.
-
-    Uses CCW CLI to generate summaries, keywords, and purpose descriptions.
-    Requires ccw to be installed and accessible in PATH.
-    """
-    _configure_logging(verbose)
-    base_path = path.expanduser().resolve()
-
-    registry: RegistryStore | None = None
-    try:
-        # Check if ccw is available
-        import subprocess
-        import shutil
-        import sys
-        try:
-            ccw_cmd = shutil.which("ccw")
-            if not ccw_cmd:
-                raise FileNotFoundError("ccw not in PATH")
-            # On Windows, .cmd files need shell=True
-            if sys.platform == "win32":
-                subprocess.run("ccw --version", shell=True, capture_output=True, check=True)
-            else:
-                subprocess.run(["ccw", "--version"], capture_output=True, check=True)
-        except (subprocess.CalledProcessError, FileNotFoundError):
-            raise CodexLensError("ccw CLI not found. Please install ccw first.")
-
-        # Validate tool
-        if tool not in ("gemini", "qwen"):
-            raise CodexLensError(f"Invalid tool: {tool}. Must be 'gemini' or 'qwen'.")
-
-        registry = RegistryStore()
-        registry.initialize()
-        mapper = PathMapper()
-
-        # Find project
-        project_info = registry.get_project(base_path)
-        if not project_info:
-            raise CodexLensError(f"No index found for: {base_path}. Run 'codex-lens init' first.")
-
-        # Import LLM enhancer
-        try:
-            from codexlens.semantic.llm_enhancer import LLMEnhancer, LLMConfig
-        except ImportError as e:
-            raise CodexLensError(f"Semantic enhancement requires additional dependencies: {e}")
-
-        # Create enhancer with config
-        config = LLMConfig(tool=tool, batch_size=batch_size)
-        enhancer = LLMEnhancer(config=config)
-
-        # Get index directory
-        index_dir = mapper.source_to_index_dir(base_path)
-        if not index_dir.exists():
-            raise CodexLensError(f"Index directory not found: {index_dir}")
-
-        # Process all index databases recursively
-        from codexlens.storage.dir_index import DirIndexStore
-        from pathlib import Path
-
-        total_processed = 0
-        total_errors = 0
-
-        with Progress(
-            SpinnerColumn(),
-            TextColumn("[progress.description]{task.description}"),
-            BarColumn(),
-            TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
-            TimeElapsedColumn(),
-            console=console,
-        ) as progress:
-            # Find all _index.db files
-            index_files = list(index_dir.rglob("_index.db"))
-            task = progress.add_task(f"Enhancing {len(index_files)} directories...", total=len(index_files))
-
-            for db_path in index_files:
-                try:
-                    store = DirIndexStore(db_path)
-                    store.initialize()
-
-                    # Get files to process
-                    if force:
-                        files_to_process = store.list_files()
-                    else:
-                        files_to_process = store.get_files_without_semantic()
-
-                    if not files_to_process:
-                        progress.update(task, advance=1)
-                        continue
-
-                    # Process files
-                    for file_entry in files_to_process:
-                        try:
-                            # Read file content
-                            with open(file_entry.full_path, "r", encoding="utf-8", errors="ignore") as f:
-                                content = f.read()
-
-                            # Generate metadata
-                            metadata = enhancer.enhance_file(
-                                path=str(file_entry.full_path),
-                                content=content,
-                                language=file_entry.language or "unknown"
-                            )
-
-                            # Store metadata
-                            store.add_semantic_metadata(
-                                file_id=file_entry.id,
-                                summary=metadata.summary,
-                                keywords=metadata.keywords,
-                                purpose=metadata.purpose,
-                                llm_tool=tool
-                            )
-
-                            total_processed += 1
-
-                        except Exception as e:
-                            total_errors += 1
-                            if verbose:
-                                console.print(f"[yellow]Error processing {file_entry.full_path}: {e}[/yellow]")
-
-                    store.close()
-
-                except Exception as e:
-                    total_errors += 1
-                    if verbose:
-                        console.print(f"[yellow]Error processing {db_path}: {e}[/yellow]")
-
-                progress.update(task, advance=1)
-
-        result = {
-            "path": str(base_path),
-            "tool": tool,
-            "files_processed": total_processed,
-            "errors": total_errors,
-        }
-
-        if json_mode:
-            print_json(success=True, result=result)
-        else:
-            console.print(f"[green]Enhanced {total_processed} files using {tool}[/green]")
-            if total_errors > 0:
-                console.print(f"  [yellow]Errors: {total_errors}[/yellow]")
-
-    except StorageError as exc:
-        if json_mode:
-            print_json(success=False, error=f"Storage error: {exc}")
-        else:
-            console.print(f"[red]Enhancement failed (storage):[/red] {exc}")
-            raise typer.Exit(code=1)
-    except PermissionError as exc:
-        if json_mode:
-            print_json(success=False, error=f"Permission denied: {exc}")
-        else:
-            console.print(f"[red]Enhancement failed (permission denied):[/red] {exc}")
-            raise typer.Exit(code=1)
-    except CodexLensError as exc:
-        if json_mode:
-            print_json(success=False, error=str(exc))
-        else:
-            console.print(f"[red]Enhancement failed:[/red] {exc}")
-            raise typer.Exit(code=1)
-    except Exception as exc:
-        if json_mode:
-            print_json(success=False, error=f"Unexpected error: {exc}")
-        else:
-            console.print(f"[red]Enhancement failed (unexpected):[/red] {exc}")
-            raise typer.Exit(code=1)
-    finally:
-        if registry is not None:
-            registry.close()
-
@app.command()
 def clean(
    path: Optional[Path] = typer.Argument(None, help="Project path to clean (removes project index)."),
--- a/codex-lens/src/codexlens/semantic/init.py
+++ b/codex-lens/src/codexlens/semantic/init.py
@@ -32,38 +32,8 @@ def check_semantic_available() -> tuple[bool, str | None]:
    """Check if semantic search dependencies are available."""
    return SEMANTIC_AVAILABLE, _import_error

-# Export LLM enhancement classes
-try:
-    from .llm_enhancer import (
-        LLMEnhancer,
-        LLMConfig,
-        SemanticMetadata,
-        FileData,
-        EnhancedSemanticIndexer,
-        create_enhancer,
-        create_enhanced_indexer,
-    )
-    LLM_AVAILABLE = True
-except ImportError:
-    LLM_AVAILABLE = False
-    LLMEnhancer = None  # type: ignore
-    LLMConfig = None  # type: ignore
-    SemanticMetadata = None  # type: ignore
-    FileData = None  # type: ignore
-    EnhancedSemanticIndexer = None  # type: ignore
-    create_enhancer = None  # type: ignore
-    create_enhanced_indexer = None  # type: ignore
-
 __all__ = [
    "SEMANTIC_AVAILABLE",
    "SEMANTIC_BACKEND",
    "check_semantic_available",
-    "LLM_AVAILABLE",
-    "LLMEnhancer",
-    "LLMConfig",
-    "SemanticMetadata",
-    "FileData",
-    "EnhancedSemanticIndexer",
-    "create_enhancer",
-    "create_enhanced_indexer",
 ]
--- a/codex-lens/src/codexlens/semantic/llm_enhancer.py
+++ b/codex-lens/src/codexlens/semantic/llm_enhancer.py
@@ -1,899 +0,0 @@
-"""LLM-based semantic enhancement using CCW CLI.
-
-This module provides LLM-generated descriptions that are then embedded
-by fastembed for improved semantic search. The flow is:
-
-    Code → LLM Summary → fastembed embedding → VectorStore → semantic search
-
-LLM-generated summaries match natural language queries better than raw code.
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-import os
-import subprocess
-import shutil
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Any, Dict, List, Optional, TYPE_CHECKING
-
-from codexlens.entities import SemanticChunk, Symbol
-
-if TYPE_CHECKING:
-    from .embedder import Embedder
-    from .vector_store import VectorStore
-
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class SemanticMetadata:
-    """LLM-generated semantic metadata for a file or symbol."""
-
-    summary: str
-    keywords: List[str]
-    purpose: str
-    file_path: Optional[str] = None
-    symbol_name: Optional[str] = None
-    llm_tool: Optional[str] = None
-
-
-@dataclass
-class FileData:
-    """File data for LLM processing."""
-
-    path: str
-    content: str
-    language: str
-    symbols: List[Symbol] = field(default_factory=list)
-
-
-@dataclass
-class LLMConfig:
-    """Configuration for LLM enhancement.
-
-    Tool selection can be overridden via environment variables:
-    - CCW_CLI_SECONDARY_TOOL: Primary tool for LLM calls (default: gemini)
-    - CCW_CLI_FALLBACK_TOOL: Fallback tool if primary fails (default: qwen)
-    """
-
-    tool: str = field(default_factory=lambda: os.environ.get("CCW_CLI_SECONDARY_TOOL", "gemini"))
-    fallback_tool: str = field(default_factory=lambda: os.environ.get("CCW_CLI_FALLBACK_TOOL", "qwen"))
-    timeout_ms: int = 300000
-    batch_size: int = 5
-    max_content_chars: int = 8000  # Max chars per file in batch prompt
-    enabled: bool = True
-
-
-class LLMEnhancer:
-    """LLM-based semantic enhancement using CCW CLI.
-
-    Generates code summaries and search keywords by calling
-    external LLM tools (gemini, qwen) via CCW CLI subprocess.
-    """
-
-    CHUNK_REFINEMENT_PROMPT = '''PURPOSE: Identify optimal semantic split points in code chunk
-TASK:
- Analyze the code structure to find natural semantic boundaries
- Identify logical groupings (functions, classes, related statements)
- Suggest split points that maintain semantic cohesion
-MODE: analysis
-EXPECTED: JSON format with split positions
-
-=== CODE CHUNK ===
-{code_chunk}
-
-=== OUTPUT FORMAT ===
-Return ONLY valid JSON (no markdown, no explanation):
-{{
-  "split_points": [
-    {{
-      "line": <line_number>,
-      "reason": "brief reason for split (e.g., 'start of new function', 'end of class definition')"
-    }}
-  ]
-}}
-
-Rules:
- Split at function/class/method boundaries
- Keep related code together (don't split mid-function)
- Aim for chunks between 500-2000 characters
- Return empty split_points if no good splits found'''
-
-    PROMPT_TEMPLATE = '''PURPOSE: Generate semantic summaries and search keywords for code files
-TASK:
- For each code block, generate a concise summary (1-2 sentences)
- Extract 5-10 relevant search keywords
- Identify the functional purpose/category
-MODE: analysis
-EXPECTED: JSON format output
-
-=== CODE BLOCKS ===
-{code_blocks}
-
-=== OUTPUT FORMAT ===
-Return ONLY valid JSON (no markdown, no explanation):
-{{
-  "files": {{
-    "<file_path>": {{
-      "summary": "Brief description of what this code does",
-      "keywords": ["keyword1", "keyword2", ...],
-      "purpose": "category like: auth, api, util, ui, data, config, test"
-    }}
-  }}
-}}'''
-
-    def __init__(self, config: LLMConfig | None = None) -> None:
-        """Initialize LLM enhancer.
-
-        Args:
-            config: LLM configuration, uses defaults if None
-        """
-        self.config = config or LLMConfig()
-        self._ccw_available: Optional[bool] = None
-
-    def check_available(self) -> bool:
-        """Check if CCW CLI tool is available."""
-        if self._ccw_available is not None:
-            return self._ccw_available
-
-        self._ccw_available = shutil.which("ccw") is not None
-        if not self._ccw_available:
-            logger.warning("CCW CLI not found in PATH, LLM enhancement disabled")
-        return self._ccw_available
-
-    def enhance_files(
-        self,
-        files: List[FileData],
-        working_dir: Optional[Path] = None,
-    ) -> Dict[str, SemanticMetadata]:
-        """Enhance multiple files with LLM-generated semantic metadata.
-
-        Processes files in batches to manage token limits and API costs.
-
-        Args:
-            files: List of file data to process
-            working_dir: Optional working directory for CCW CLI
-
-        Returns:
-            Dict mapping file paths to SemanticMetadata
-        """
-        if not self.config.enabled:
-            logger.debug("LLM enhancement disabled by config")
-            return {}
-
-        if not self.check_available():
-            return {}
-
-        if not files:
-            return {}
-
-        results: Dict[str, SemanticMetadata] = {}
-        batch_size = self.config.batch_size
-
-        for i in range(0, len(files), batch_size):
-            batch = files[i:i + batch_size]
-            try:
-                batch_results = self._process_batch(batch, working_dir)
-                results.update(batch_results)
-                logger.debug(
-                    "Processed batch %d/%d: %d files enhanced",
-                    i // batch_size + 1,
-                    (len(files) + batch_size - 1) // batch_size,
-                    len(batch_results),
-                )
-            except Exception as e:
-                logger.warning(
-                    "Batch %d failed, continuing: %s",
-                    i // batch_size + 1,
-                    e,
-                )
-                continue
-
-        return results
-
-    def enhance_file(
-
-        self,
-
-        path: str,
-
-        content: str,
-
-        language: str,
-
-        working_dir: Optional[Path] = None,
-
-    ) -> SemanticMetadata:
-
-        """Enhance a single file with LLM-generated semantic metadata.
-
-
-
-        Convenience method that wraps enhance_files for single file processing.
-
-
-
-        Args:
-
-            path: File path
-
-            content: File content
-
-            language: Programming language
-
-            working_dir: Optional working directory for CCW CLI
-
-
-
-        Returns:
-
-            SemanticMetadata for the file
-
-
-
-        Raises:
-
-            ValueError: If enhancement fails
-
-        """
-
-        file_data = FileData(path=path, content=content, language=language)
-
-        results = self.enhance_files([file_data], working_dir)
-
-
-
-        if path not in results:
-
-            # Return default metadata if enhancement failed
-
-            return SemanticMetadata(
-
-                summary=f"Code file written in {language}",
-
-                keywords=[language, "code"],
-
-                purpose="unknown",
-
-                file_path=path,
-
-                llm_tool=self.config.tool,
-
-            )
-
-
-
-        return results[path]
-
-    def refine_chunk_boundaries(
-        self,
-        chunk: SemanticChunk,
-        max_chunk_size: int = 2000,
-        working_dir: Optional[Path] = None,
-    ) -> List[SemanticChunk]:
-        """Refine chunk boundaries using LLM for large code chunks.
-
-        Uses LLM to identify semantic split points in large chunks,
-        breaking them into smaller, more cohesive pieces.
-
-        Args:
-            chunk: Original chunk to refine
-            max_chunk_size: Maximum characters before triggering refinement
-            working_dir: Optional working directory for CCW CLI
-
-        Returns:
-            List of refined chunks (original chunk if no splits or refinement fails)
-        """
-        # Skip if chunk is small enough
-        if len(chunk.content) <= max_chunk_size:
-            return [chunk]
-
-        # Skip if LLM enhancement disabled or unavailable
-        if not self.config.enabled or not self.check_available():
-            return [chunk]
-
-        # Skip docstring chunks - only refine code chunks
-        if chunk.metadata.get("chunk_type") == "docstring":
-            return [chunk]
-
-        try:
-            # Build refinement prompt
-            prompt = self.CHUNK_REFINEMENT_PROMPT.format(code_chunk=chunk.content)
-
-            # Invoke LLM
-            result = self._invoke_ccw_cli(
-                prompt,
-                tool=self.config.tool,
-                working_dir=working_dir,
-            )
-
-            # Fallback if primary tool fails
-            if not result["success"] and self.config.fallback_tool:
-                result = self._invoke_ccw_cli(
-                    prompt,
-                    tool=self.config.fallback_tool,
-                    working_dir=working_dir,
-                )
-
-            if not result["success"]:
-                logger.debug("LLM refinement failed, returning original chunk")
-                return [chunk]
-
-            # Parse split points
-            split_points = self._parse_split_points(result["stdout"])
-            if not split_points:
-                logger.debug("No split points identified, returning original chunk")
-                return [chunk]
-
-            # Split chunk at identified boundaries
-            refined_chunks = self._split_chunk_at_points(chunk, split_points)
-            logger.debug(
-                "Refined chunk into %d smaller chunks (was %d chars)",
-                len(refined_chunks),
-                len(chunk.content),
-            )
-            return refined_chunks
-
-        except Exception as e:
-            logger.warning("Chunk refinement error: %s, returning original chunk", e)
-            return [chunk]
-
-    def _parse_split_points(self, stdout: str) -> List[int]:
-        """Parse split points from LLM response.
-
-        Args:
-            stdout: Raw stdout from CCW CLI
-
-        Returns:
-            List of line numbers where splits should occur (sorted)
-        """
-        # Extract JSON from response
-        json_str = self._extract_json(stdout)
-        if not json_str:
-            return []
-
-        try:
-            data = json.loads(json_str)
-            split_points_data = data.get("split_points", [])
-
-            # Extract line numbers
-            lines = []
-            for point in split_points_data:
-                if isinstance(point, dict) and "line" in point:
-                    line_num = point["line"]
-                    if isinstance(line_num, int) and line_num > 0:
-                        lines.append(line_num)
-
-            return sorted(set(lines))
-
-        except (json.JSONDecodeError, ValueError, TypeError) as e:
-            logger.debug("Failed to parse split points: %s", e)
-            return []
-
-    def _split_chunk_at_points(
-        self,
-        chunk: SemanticChunk,
-        split_points: List[int],
-    ) -> List[SemanticChunk]:
-        """Split chunk at specified line numbers.
-
-        Args:
-            chunk: Original chunk to split
-            split_points: Sorted list of line numbers to split at
-
-        Returns:
-            List of smaller chunks
-        """
-        lines = chunk.content.splitlines(keepends=True)
-        chunks: List[SemanticChunk] = []
-
-        # Get original metadata
-        base_metadata = dict(chunk.metadata)
-        original_start = base_metadata.get("start_line", 1)
-
-        # Add start and end boundaries
-        boundaries = [0] + split_points + [len(lines)]
-
-        for i in range(len(boundaries) - 1):
-            start_idx = boundaries[i]
-            end_idx = boundaries[i + 1]
-
-            # Skip empty sections
-            if start_idx >= end_idx:
-                continue
-
-            # Extract content
-            section_lines = lines[start_idx:end_idx]
-            section_content = "".join(section_lines)
-
-            # Skip if too small
-            if len(section_content.strip()) < 50:
-                continue
-
-            # Create new chunk with updated metadata
-            new_metadata = base_metadata.copy()
-            new_metadata["start_line"] = original_start + start_idx
-            new_metadata["end_line"] = original_start + end_idx - 1
-            new_metadata["refined_by_llm"] = True
-            new_metadata["original_chunk_size"] = len(chunk.content)
-
-            chunks.append(
-                SemanticChunk(
-                    content=section_content,
-                    embedding=None,  # Embeddings will be regenerated
-                    metadata=new_metadata,
-                )
-            )
-
-        # If no valid chunks created, return original
-        if not chunks:
-            return [chunk]
-
-        return chunks
-
-
-
-
-    def _process_batch(
-        self,
-        files: List[FileData],
-        working_dir: Optional[Path] = None,
-    ) -> Dict[str, SemanticMetadata]:
-        """Process a single batch of files."""
-        prompt = self._build_batch_prompt(files)
-
-        # Try primary tool first
-        result = self._invoke_ccw_cli(
-            prompt,
-            tool=self.config.tool,
-            working_dir=working_dir,
-        )
-
-        # Fallback to secondary tool if primary fails
-        if not result["success"] and self.config.fallback_tool:
-            logger.debug(
-                "Primary tool %s failed, trying fallback %s",
-                self.config.tool,
-                self.config.fallback_tool,
-            )
-            result = self._invoke_ccw_cli(
-                prompt,
-                tool=self.config.fallback_tool,
-                working_dir=working_dir,
-            )
-
-        if not result["success"]:
-            logger.warning("LLM call failed: %s", result.get("stderr", "unknown error"))
-            return {}
-
-        return self._parse_response(result["stdout"], self.config.tool)
-
-    def _build_batch_prompt(self, files: List[FileData]) -> str:
-        """Build prompt for batch processing."""
-        code_blocks_parts: List[str] = []
-
-        for file_data in files:
-            # Truncate content if too long
-            content = file_data.content
-            if len(content) > self.config.max_content_chars:
-                content = content[:self.config.max_content_chars] + "\n... [truncated]"
-
-            # Format code block
-            lang_hint = file_data.language or "text"
-            code_block = f'''[FILE: {file_data.path}]
-```{lang_hint}
-{content}
-```'''
-            code_blocks_parts.append(code_block)
-
-        code_blocks = "\n\n".join(code_blocks_parts)
-        return self.PROMPT_TEMPLATE.format(code_blocks=code_blocks)
-
-    def _invoke_ccw_cli(
-        self,
-        prompt: str,
-        tool: str = "gemini",
-        working_dir: Optional[Path] = None,
-    ) -> Dict[str, Any]:
-        """Invoke CCW CLI tool via subprocess.
-
-        Args:
-            prompt: The prompt to send to LLM
-            tool: Tool name (gemini, qwen, codex)
-            working_dir: Optional working directory
-
-        Returns:
-            Dict with success, stdout, stderr, exit_code
-        """
-        import sys
-        import os
-
-        timeout_seconds = (self.config.timeout_ms / 1000) + 30
-
-        # Build base arguments
-        base_args = [
-            "cli", "exec",
-            prompt,  # Direct string argument
-            "--tool", tool,
-            "--mode", "analysis",
-            "--timeout", str(self.config.timeout_ms),
-        ]
-        if working_dir:
-            base_args.extend(["--cd", str(working_dir)])
-
-        try:
-            if sys.platform == "win32":
-                # On Windows, ccw is a .CMD wrapper that requires shell
-                # Instead, directly invoke node with the ccw.js script
-                ccw_path = shutil.which("ccw")
-                if ccw_path and ccw_path.lower().endswith(".cmd"):
-                    # Find the ccw.js script location
-                    npm_dir = Path(ccw_path).parent
-                    ccw_js = npm_dir / "node_modules" / "ccw" / "bin" / "ccw.js"
-                    if ccw_js.exists():
-                        cmd = ["node", str(ccw_js)] + base_args
-                    else:
-                        # Fallback to shell execution
-                        cmd_str = "ccw " + " ".join(f'"{a}"' if " " in a else a for a in base_args)
-                        result = subprocess.run(
-                            cmd_str, shell=True, capture_output=True, text=True,
-                            timeout=timeout_seconds, cwd=working_dir,
-                            encoding="utf-8", errors="replace",
-                        )
-                        return {
-                            "success": result.returncode == 0,
-                            "stdout": result.stdout,
-                            "stderr": result.stderr,
-                            "exit_code": result.returncode,
-                        }
-                else:
-                    cmd = ["ccw"] + base_args
-            else:
-                cmd = ["ccw"] + base_args
-
-            result = subprocess.run(
-                cmd,
-                capture_output=True,
-                text=True,
-                timeout=timeout_seconds,
-                cwd=working_dir,
-                encoding="utf-8",
-                errors="replace",
-            )
-
-            return {
-                "success": result.returncode == 0,
-                "stdout": result.stdout,
-                "stderr": result.stderr,
-                "exit_code": result.returncode,
-            }
-
-        except subprocess.TimeoutExpired:
-            logger.warning("CCW CLI timeout after %ds", self.config.timeout_ms / 1000)
-            return {
-                "success": False,
-                "stdout": "",
-                "stderr": "timeout",
-                "exit_code": -1,
-            }
-        except FileNotFoundError:
-            logger.warning("CCW CLI not found - ensure 'ccw' is in PATH")
-            return {
-                "success": False,
-                "stdout": "",
-                "stderr": "ccw command not found",
-                "exit_code": -1,
-            }
-        except Exception as e:
-            logger.warning("CCW CLI invocation failed: %s", e)
-            return {
-                "success": False,
-                "stdout": "",
-                "stderr": str(e),
-                "exit_code": -1,
-            }
-
-    def _parse_response(
-        self,
-        stdout: str,
-        tool: str,
-    ) -> Dict[str, SemanticMetadata]:
-        """Parse LLM response into SemanticMetadata objects.
-
-        Args:
-            stdout: Raw stdout from CCW CLI
-            tool: Tool name used for generation
-
-        Returns:
-            Dict mapping file paths to SemanticMetadata
-        """
-        results: Dict[str, SemanticMetadata] = {}
-
-        # Extract JSON from response (may be wrapped in markdown or other text)
-        json_str = self._extract_json(stdout)
-        if not json_str:
-            logger.warning("No JSON found in LLM response")
-            return results
-
-        try:
-            data = json.loads(json_str)
-        except json.JSONDecodeError as e:
-            logger.warning("Failed to parse LLM response JSON: %s", e)
-            return results
-
-        # Handle expected format: {"files": {"path": {...}}}
-        files_data = data.get("files", data)
-        if not isinstance(files_data, dict):
-            logger.warning("Unexpected response format: expected dict")
-            return results
-
-        for file_path, metadata in files_data.items():
-            if not isinstance(metadata, dict):
-                continue
-
-            try:
-                results[file_path] = SemanticMetadata(
-                    summary=metadata.get("summary", ""),
-                    keywords=metadata.get("keywords", []),
-                    purpose=metadata.get("purpose", ""),
-                    file_path=file_path,
-                    llm_tool=tool,
-                )
-            except Exception as e:
-                logger.debug("Failed to parse metadata for %s: %s", file_path, e)
-                continue
-
-        return results
-
-    def _extract_json(self, text: str) -> Optional[str]:
-        """Extract JSON object from text that may contain markdown or other content."""
-        # Try to find JSON object boundaries
-        text = text.strip()
-
-        # Remove markdown code blocks if present
-        if text.startswith("```"):
-            lines = text.split("\n")
-            # Remove first line (```json or ```)
-            lines = lines[1:]
-            # Find closing ```
-            for i, line in enumerate(lines):
-                if line.strip() == "```":
-                    lines = lines[:i]
-                    break
-            text = "\n".join(lines)
-
-        # Find JSON object
-        start = text.find("{")
-        if start == -1:
-            return None
-
-        # Find matching closing brace
-        depth = 0
-        end = start
-        for i, char in enumerate(text[start:], start):
-            if char == "{":
-                depth += 1
-            elif char == "}":
-                depth -= 1
-                if depth == 0:
-                    end = i + 1
-                    break
-
-        if depth != 0:
-            return None
-
-        return text[start:end]
-
-
-def create_enhancer(
-    tool: str = "gemini",
-    timeout_ms: int = 300000,
-    batch_size: int = 5,
-    enabled: bool = True,
-) -> LLMEnhancer:
-    """Factory function to create LLM enhancer with custom config."""
-    config = LLMConfig(
-        tool=tool,
-        timeout_ms=timeout_ms,
-        batch_size=batch_size,
-        enabled=enabled,
-    )
-    return LLMEnhancer(config)
-
-
-class EnhancedSemanticIndexer:
-    """Integrates LLM enhancement with fastembed vector search.
-
-    Flow:
-        1. Code files → LLM generates summaries/keywords
-        2. Summaries → fastembed generates embeddings
-        3. Embeddings → VectorStore for similarity search
-
-    This produces better semantic search because:
-    - LLM summaries are natural language descriptions
-    - Natural language queries match summaries better than raw code
-    - Keywords expand search coverage
-    """
-
-    def __init__(
-        self,
-        enhancer: LLMEnhancer,
-        embedder: "Embedder",
-        vector_store: "VectorStore",
-    ) -> None:
-        """Initialize enhanced semantic indexer.
-
-        Args:
-            enhancer: LLM enhancer for generating summaries
-            embedder: Fastembed embedder for vector generation
-            vector_store: Vector storage for similarity search
-        """
-        self.enhancer = enhancer
-        self.embedder = embedder
-        self.vector_store = vector_store
-
-    def index_files(
-        self,
-        files: List[FileData],
-        working_dir: Optional[Path] = None,
-    ) -> int:
-        """Index files with LLM-enhanced semantic search.
-
-        Args:
-            files: List of file data to index
-            working_dir: Optional working directory for LLM calls
-
-        Returns:
-            Number of files successfully indexed
-        """
-        if not files:
-            return 0
-
-        # Step 1: Generate LLM summaries
-        logger.info("Generating LLM summaries for %d files...", len(files))
-        metadata_map = self.enhancer.enhance_files(files, working_dir)
-
-        if not metadata_map:
-            logger.warning("No LLM metadata generated, falling back to raw code")
-            return self._index_raw_code(files)
-
-        # Step 2: Create semantic chunks from LLM summaries
-        chunks_to_embed: List[SemanticChunk] = []
-        file_paths: List[str] = []
-
-        for file_data in files:
-            metadata = metadata_map.get(file_data.path)
-            if metadata:
-                # Use LLM-generated summary + keywords for embedding
-                embeddable_text = self._create_embeddable_text(metadata, file_data)
-                chunk = SemanticChunk(
-                    content=embeddable_text,
-                    embedding=None,
-                    metadata={
-                        "file": file_data.path,
-                        "language": file_data.language,
-                        "summary": metadata.summary,
-                        "keywords": metadata.keywords,
-                        "purpose": metadata.purpose,
-                        "llm_tool": metadata.llm_tool,
-                        "strategy": "llm_enhanced",
-                    },
-                )
-            else:
-                # Fallback: use truncated raw code
-                chunk = SemanticChunk(
-                    content=file_data.content[:2000],
-                    embedding=None,
-                    metadata={
-                        "file": file_data.path,
-                        "language": file_data.language,
-                        "strategy": "raw_code",
-                    },
-                )
-
-            chunks_to_embed.append(chunk)
-            file_paths.append(file_data.path)
-
-        # Step 3: Generate embeddings
-        logger.info("Generating embeddings for %d chunks...", len(chunks_to_embed))
-        texts = [chunk.content for chunk in chunks_to_embed]
-        embeddings = self.embedder.embed(texts)
-
-        # Step 4: Store in vector store
-        indexed_count = 0
-        for chunk, embedding, file_path in zip(chunks_to_embed, embeddings, file_paths):
-            chunk.embedding = embedding
-            try:
-                self.vector_store.add_chunk(chunk, file_path)
-                indexed_count += 1
-            except Exception as e:
-                logger.debug("Failed to store chunk for %s: %s", file_path, e)
-
-        logger.info("Successfully indexed %d/%d files", indexed_count, len(files))
-        return indexed_count
-
-    def _create_embeddable_text(
-        self,
-        metadata: SemanticMetadata,
-        file_data: FileData,
-    ) -> str:
-        """Create text optimized for embedding from LLM metadata.
-
-        Combines summary, keywords, and purpose into a single string
-        that will produce good semantic matches for natural language queries.
-        """
-        parts = []
-
-        # Summary is the primary content
-        if metadata.summary:
-            parts.append(metadata.summary)
-
-        # Purpose adds categorical context
-        if metadata.purpose:
-            parts.append(f"Category: {metadata.purpose}")
-
-        # Keywords expand search coverage
-        if metadata.keywords:
-            parts.append(f"Keywords: {', '.join(metadata.keywords)}")
-
-        # Add file name for context
-        parts.append(f"File: {Path(file_data.path).name}")
-
-        return "\n".join(parts)
-
-    def _index_raw_code(self, files: List[FileData]) -> int:
-        """Fallback: index raw code without LLM enhancement."""
-        indexed_count = 0
-
-        for file_data in files:
-            # Truncate to reasonable size
-            content = file_data.content[:2000]
-
-            chunk = SemanticChunk(
-                content=content,
-                embedding=None,
-                metadata={
-                    "file": file_data.path,
-                    "language": file_data.language,
-                    "strategy": "raw_code",
-                },
-            )
-
-            try:
-                embedding = self.embedder.embed_single(content)
-                chunk.embedding = embedding
-                self.vector_store.add_chunk(chunk, file_data.path)
-                indexed_count += 1
-            except Exception as e:
-                logger.debug("Failed to index %s: %s", file_data.path, e)
-
-        return indexed_count
-
-
-def create_enhanced_indexer(
-    vector_store_path: Path,
-    llm_tool: str = "gemini",
-    llm_enabled: bool = True,
-) -> EnhancedSemanticIndexer:
-    """Factory function to create an enhanced semantic indexer.
-
-    Args:
-        vector_store_path: Path for the vector store database
-        llm_tool: LLM tool to use (gemini, qwen)
-        llm_enabled: Whether to enable LLM enhancement
-
-    Returns:
-        Configured EnhancedSemanticIndexer instance
-    """
-    from .embedder import Embedder
-    from .vector_store import VectorStore
-
-    enhancer = create_enhancer(tool=llm_tool, enabled=llm_enabled)
-    embedder = Embedder()
-    vector_store = VectorStore(vector_store_path)
-
-    return EnhancedSemanticIndexer(enhancer, embedder, vector_store)
--- a/codex-lens/tests/test_llm_enhanced_search.py
+++ b/codex-lens/tests/test_llm_enhanced_search.py
@@ -1,545 +0,0 @@
-"""Test suite for comparing pure vector search vs LLM-enhanced vector search.
-
-This test demonstrates the difference between:
-1. Pure vector search: Raw code → fastembed → vector search
-2. LLM-enhanced search: Code → LLM summary → fastembed → vector search
-
-LLM-enhanced search should provide better semantic matches for natural language queries.
-"""
-
-import pytest
-import sqlite3
-import tempfile
-from pathlib import Path
-from typing import Dict, List
-
-from codexlens.search.hybrid_search import HybridSearchEngine
-from codexlens.storage.dir_index import DirIndexStore
-
-# Check semantic dependencies
-try:
-    from codexlens.semantic import SEMANTIC_AVAILABLE
-    from codexlens.semantic.embedder import Embedder
-    from codexlens.semantic.vector_store import VectorStore
-    from codexlens.semantic.chunker import Chunker, ChunkConfig
-    from codexlens.semantic.llm_enhancer import (
-        LLMEnhancer,
-        LLMConfig,
-        FileData,
-        EnhancedSemanticIndexer,
-        SemanticChunk,
-    )
-    from codexlens.entities import SearchResult
-except ImportError:
-    SEMANTIC_AVAILABLE = False
-
-
-# Test code samples representing different functionality
-TEST_CODE_SAMPLES = {
-    "auth/password_hasher.py": '''"""Password hashing utilities using bcrypt."""
-import bcrypt
-
-def hash_password(password: str, salt_rounds: int = 12) -> str:
-    """Hash a password using bcrypt with specified salt rounds.
-
-    Args:
-        password: Plain text password to hash
-        salt_rounds: Number of salt rounds (default 12)
-
-    Returns:
-        Hashed password string
-    """
-    salt = bcrypt.gensalt(rounds=salt_rounds)
-    hashed = bcrypt.hashpw(password.encode('utf-8'), salt)
-    return hashed.decode('utf-8')
-
-def verify_password(password: str, hashed: str) -> bool:
-    """Verify a password against its hash.
-
-    Args:
-        password: Plain text password to verify
-        hashed: Previously hashed password
-
-    Returns:
-        True if password matches hash
-    """
-    return bcrypt.checkpw(password.encode('utf-8'), hashed.encode('utf-8'))
-''',
-
-    "auth/jwt_handler.py": '''"""JWT token generation and validation."""
-import jwt
-from datetime import datetime, timedelta
-from typing import Dict, Optional
-
-SECRET_KEY = "your-secret-key-here"
-
-def create_token(user_id: int, expires_in: int = 3600) -> str:
-    """Generate a JWT access token for user authentication.
-
-    Args:
-        user_id: User ID to encode in token
-        expires_in: Token expiration in seconds (default 1 hour)
-
-    Returns:
-        JWT token string
-    """
-    payload = {
-        'user_id': user_id,
-        'exp': datetime.utcnow() + timedelta(seconds=expires_in),
-        'iat': datetime.utcnow()
-    }
-    return jwt.encode(payload, SECRET_KEY, algorithm='HS256')
-
-def decode_token(token: str) -> Optional[Dict]:
-    """Validate and decode JWT token to extract user information.
-
-    Args:
-        token: JWT token string to decode
-
-    Returns:
-        Decoded payload dict or None if invalid
-    """
-    try:
-        payload = jwt.decode(token, SECRET_KEY, algorithms=['HS256'])
-        return payload
-    except jwt.ExpiredSignatureError:
-        return None
-    except jwt.InvalidTokenError:
-        return None
-''',
-
-    "api/user_endpoints.py": '''"""REST API endpoints for user management."""
-from flask import Flask, request, jsonify
-from typing import Dict
-
-app = Flask(__name__)
-
-@app.route('/api/users', methods=['POST'])
-def create_user():
-    """Create a new user account with email and password.
-
-    Request JSON:
-        email: User email address
-        password: User password
-        name: User full name
-
-    Returns:
-        JSON with user_id and success status
-    """
-    data = request.get_json()
-    # Validate input
-    if not data.get('email') or not data.get('password'):
-        return jsonify({'error': 'Email and password required'}), 400
-
-    # Create user (simplified)
-    user_id = 12345  # Would normally insert into database
-    return jsonify({'user_id': user_id, 'success': True}), 201
-
-@app.route('/api/users/<int:user_id>', methods=['GET'])
-def get_user(user_id: int):
-    """Retrieve user profile information by user ID.
-
-    Args:
-        user_id: Unique user identifier
-
-    Returns:
-        JSON with user profile data
-    """
-    # Simplified user retrieval
-    user = {
-        'id': user_id,
-        'email': 'user@example.com',
-        'name': 'John Doe',
-        'created_at': '2024-01-01'
-    }
-    return jsonify(user), 200
-''',
-
-    "utils/validation.py": '''"""Input validation and sanitization utilities."""
-import re
-from typing import Optional
-
-def validate_email(email: str) -> bool:
-    """Check if email address format is valid using regex pattern.
-
-    Args:
-        email: Email address string to validate
-
-    Returns:
-        True if email format is valid
-    """
-    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
-    return bool(re.match(pattern, email))
-
-def sanitize_input(text: str, max_length: int = 255) -> str:
-    """Clean user input by removing special characters and limiting length.
-
-    Args:
-        text: Input text to sanitize
-        max_length: Maximum allowed length
-
-    Returns:
-        Sanitized text string
-    """
-    # Remove special characters
-    text = re.sub(r'[<>\"\'&]', '', text)
-    # Trim whitespace
-    text = text.strip()
-    # Limit length
-    return text[:max_length]
-
-def validate_password_strength(password: str) -> tuple[bool, Optional[str]]:
-    """Validate password meets security requirements.
-
-    Requirements:
-        - At least 8 characters
-        - Contains uppercase and lowercase
-        - Contains numbers
-        - Contains special characters
-
-    Args:
-        password: Password string to validate
-
-    Returns:
-        Tuple of (is_valid, error_message)
-    """
-    if len(password) < 8:
-        return False, "Password must be at least 8 characters"
-    if not re.search(r'[A-Z]', password):
-        return False, "Password must contain uppercase letter"
-    if not re.search(r'[a-z]', password):
-        return False, "Password must contain lowercase letter"
-    if not re.search(r'[0-9]', password):
-        return False, "Password must contain number"
-    if not re.search(r'[!@#$%^&*(),.?":{}|<>]', password):
-        return False, "Password must contain special character"
-    return True, None
-''',
-
-    "database/connection.py": '''"""Database connection pooling and management."""
-import psycopg2
-from psycopg2 import pool
-from typing import Optional
-from contextlib import contextmanager
-
-class DatabasePool:
-    """PostgreSQL connection pool manager for handling multiple concurrent connections."""
-
-    def __init__(self, min_conn: int = 1, max_conn: int = 10):
-        """Initialize database connection pool.
-
-        Args:
-            min_conn: Minimum number of connections to maintain
-            max_conn: Maximum number of connections allowed
-        """
-        self.pool = psycopg2.pool.SimpleConnectionPool(
-            min_conn,
-            max_conn,
-            user='dbuser',
-            password='dbpass',
-            host='localhost',
-            port='5432',
-            database='myapp'
-        )
-
-    @contextmanager
-    def get_connection(self):
-        """Get a connection from pool as context manager.
-
-        Yields:
-            Database connection object
-        """
-        conn = self.pool.getconn()
-        try:
-            yield conn
-            conn.commit()
-        except Exception:
-            conn.rollback()
-            raise
-        finally:
-            self.pool.putconn(conn)
-
-    def close_all(self):
-        """Close all connections in pool."""
-        self.pool.closeall()
-'''
-}
-
-
-# Natural language queries to test semantic understanding
-TEST_QUERIES = [
-    {
-        "query": "How do I securely hash passwords?",
-        "expected_file": "auth/password_hasher.py",
-        "description": "Should find password hashing implementation",
-    },
-    {
-        "query": "Generate JWT token for user authentication",
-        "expected_file": "auth/jwt_handler.py",
-        "description": "Should find JWT token creation logic",
-    },
-    {
-        "query": "Create new user account via REST API",
-        "expected_file": "api/user_endpoints.py",
-        "description": "Should find user registration endpoint",
-    },
-    {
-        "query": "Validate email address format",
-        "expected_file": "utils/validation.py",
-        "description": "Should find email validation function",
-    },
-    {
-        "query": "Connect to PostgreSQL database",
-        "expected_file": "database/connection.py",
-        "description": "Should find database connection management",
-    },
-    {
-        "query": "Check password complexity requirements",
-        "expected_file": "utils/validation.py",
-        "description": "Should find password strength validation",
-    },
-]
-
-
-@pytest.mark.skipif(not SEMANTIC_AVAILABLE, reason="Semantic dependencies not available")
-class TestPureVectorSearch:
-    """Test pure vector search (code → fastembed → search)."""
-
-    @pytest.fixture
-    def pure_vector_db(self):
-        """Create database with pure vector embeddings (no LLM)."""
-        with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
-            db_path = Path(f.name)
-
-        # Initialize database
-        store = DirIndexStore(db_path)
-        store.initialize()
-
-        # Add test files
-        with store._get_connection() as conn:
-            for path, content in TEST_CODE_SAMPLES.items():
-                name = path.split('/')[-1]
-                conn.execute(
-                    """INSERT INTO files (name, full_path, content, language, mtime)
-                       VALUES (?, ?, ?, ?, ?)""",
-                    (name, path, content, "python", 0.0)
-                )
-            conn.commit()
-
-        # Generate embeddings using pure vector approach (raw code)
-        embedder = Embedder(profile="code")
-        vector_store = VectorStore(db_path)
-        chunker = Chunker(config=ChunkConfig(max_chunk_size=2000))
-
-        with sqlite3.connect(db_path) as conn:
-            conn.row_factory = sqlite3.Row
-            rows = conn.execute("SELECT full_path, content FROM files").fetchall()
-
-        for row in rows:
-            # Pure vector: directly chunk and embed raw code
-            chunks = chunker.chunk_sliding_window(
-                row["content"],
-                file_path=row["full_path"],
-                language="python"
-            )
-            for chunk in chunks:
-                chunk.embedding = embedder.embed_single(chunk.content)
-                chunk.metadata["strategy"] = "pure_vector"
-            if chunks:
-                vector_store.add_chunks(chunks, row["full_path"])
-
-        yield db_path
-        store.close()
-        if db_path.exists():
-            db_path.unlink()
-
-    def test_pure_vector_queries(self, pure_vector_db):
-        """Test natural language queries with pure vector search."""
-        engine = HybridSearchEngine()
-        results = {}
-
-        for test_case in TEST_QUERIES:
-            query = test_case["query"]
-            expected_file = test_case["expected_file"]
-
-            search_results = engine.search(
-                pure_vector_db,
-                query,
-                limit=5,
-                enable_vector=True,
-                pure_vector=True,
-            )
-
-            # Check if expected file is in top 3 results
-            top_files = [r.path for r in search_results[:3]]
-            found = expected_file in top_files
-            rank = top_files.index(expected_file) + 1 if found else None
-
-            results[query] = {
-                "found": found,
-                "rank": rank,
-                "top_result": search_results[0].path if search_results else None,
-                "top_score": search_results[0].score if search_results else 0.0,
-            }
-
-        return results
-
-
-@pytest.mark.skipif(not SEMANTIC_AVAILABLE, reason="Semantic dependencies not available")
-class TestLLMEnhancedSearch:
-    """Test LLM-enhanced vector search (code → LLM → fastembed → search)."""
-
-    @pytest.fixture
-    def llm_enhanced_db(self):
-        """Create database with LLM-enhanced embeddings."""
-        # Skip if CCW not available
-        llm_config = LLMConfig(enabled=True, tool="gemini")
-        enhancer = LLMEnhancer(llm_config)
-        if not enhancer.check_available():
-            pytest.skip("CCW CLI not available for LLM enhancement")
-
-        with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
-            db_path = Path(f.name)
-
-        # Initialize database
-        store = DirIndexStore(db_path)
-        store.initialize()
-
-        # Add test files
-        with store._get_connection() as conn:
-            for path, content in TEST_CODE_SAMPLES.items():
-                name = path.split('/')[-1]
-                conn.execute(
-                    """INSERT INTO files (name, full_path, content, language, mtime)
-                       VALUES (?, ?, ?, ?, ?)""",
-                    (name, path, content, "python", 0.0)
-                )
-            conn.commit()
-
-        # Generate embeddings using LLM-enhanced approach
-        embedder = Embedder(profile="code")
-        vector_store = VectorStore(db_path)
-
-        # Create enhanced indexer
-        indexer = EnhancedSemanticIndexer(enhancer, embedder, vector_store)
-
-        # Prepare file data
-        file_data_list = [
-            FileData(path=path, content=content, language="python")
-            for path, content in TEST_CODE_SAMPLES.items()
-        ]
-
-        # Index with LLM enhancement
-        indexed = indexer.index_files(file_data_list)
-        print(f"\nLLM-enhanced indexing: {indexed}/{len(file_data_list)} files")
-
-        yield db_path
-        store.close()
-        if db_path.exists():
-            db_path.unlink()
-
-    def test_llm_enhanced_queries(self, llm_enhanced_db):
-        """Test natural language queries with LLM-enhanced search."""
-        engine = HybridSearchEngine()
-        results = {}
-
-        for test_case in TEST_QUERIES:
-            query = test_case["query"]
-            expected_file = test_case["expected_file"]
-
-            search_results = engine.search(
-                llm_enhanced_db,
-                query,
-                limit=5,
-                enable_vector=True,
-                pure_vector=True,
-            )
-
-            # Check if expected file is in top 3 results
-            top_files = [r.path for r in search_results[:3]]
-            found = expected_file in top_files
-            rank = top_files.index(expected_file) + 1 if found else None
-
-            results[query] = {
-                "found": found,
-                "rank": rank,
-                "top_result": search_results[0].path if search_results else None,
-                "top_score": search_results[0].score if search_results else 0.0,
-            }
-
-        return results
-
-
-@pytest.mark.skipif(not SEMANTIC_AVAILABLE, reason="Semantic dependencies not available")
-class TestSearchComparison:
-    """Compare pure vector vs LLM-enhanced search side-by-side."""
-
-    def test_comparison(self):
-        """Run comprehensive comparison of both approaches."""
-        # This test runs both approaches and compares results
-        print("\n" + "="*70)
-        print("SEMANTIC SEARCH COMPARISON TEST")
-        print("="*70)
-
-        try:
-            # Test pure vector search
-            print("\n1. Testing Pure Vector Search (Code → fastembed)")
-            print("-" * 70)
-            pure_test = TestPureVectorSearch()
-            pure_db = next(pure_test.pure_vector_db())
-            pure_results = pure_test.test_pure_vector_queries(pure_db)
-
-            # Test LLM-enhanced search
-            print("\n2. Testing LLM-Enhanced Search (Code → LLM → fastembed)")
-            print("-" * 70)
-            llm_test = TestLLMEnhancedSearch()
-            llm_db = next(llm_test.llm_enhanced_db())
-            llm_results = llm_test.test_llm_enhanced_queries(llm_db)
-
-            # Compare results
-            print("\n3. COMPARISON RESULTS")
-            print("="*70)
-            print(f"{'Query':<50} {'Pure Vec':<12} {'LLM Enhanced':<12}")
-            print("-" * 70)
-
-            pure_score = 0
-            llm_score = 0
-
-            for test_case in TEST_QUERIES:
-                query = test_case["query"][:47] + "..." if len(test_case["query"]) > 50 else test_case["query"]
-
-                pure_res = pure_results.get(test_case["query"], {})
-                llm_res = llm_results.get(test_case["query"], {})
-
-                pure_status = f"[OK] Rank {pure_res.get('rank', '?')}" if pure_res.get('found') else "[X] Not found"
-                llm_status = f"[OK] Rank {llm_res.get('rank', '?')}" if llm_res.get('found') else "[X] Not found"
-
-                print(f"{query:<50} {pure_status:<12} {llm_status:<12}")
-
-                if pure_res.get('found'):
-                    pure_score += (4 - pure_res['rank'])  # 3 points for rank 1, 2 for rank 2, etc
-                if llm_res.get('found'):
-                    llm_score += (4 - llm_res['rank'])
-
-            print("-" * 70)
-            print(f"{'TOTAL SCORE':<50} {pure_score:<12} {llm_score:<12}")
-            print("="*70)
-
-            # Interpretation
-            print("\nINTERPRETATION:")
-            if llm_score > pure_score:
-                improvement = ((llm_score - pure_score) / max(pure_score, 1)) * 100
-                print(f"[OK] LLM enhancement improves results by {improvement:.1f}%")
-                print("  LLM summaries match natural language queries better than raw code")
-            elif pure_score > llm_score:
-                print("[X] Pure vector search performed better (unexpected)")
-                print("  This may indicate LLM summaries are too generic")
-            else:
-                print("= Both approaches performed equally")
-
-        except Exception as e:
-            pytest.fail(f"Comparison test failed: {e}")
-
-
-if __name__ == "__main__":
-    pytest.main([__file__, "-v", "-s"])
--- a/codex-lens/tests/test_llm_enhancer.py
+++ b/codex-lens/tests/test_llm_enhancer.py