feat(codexlens): add CodexLens code indexing platform with incremental updates

- Add CodexLens Python package with SQLite FTS5 search and tree-sitter parsing - Implement workspace-local index storage (.codexlens/ directory) - Add incremental update CLI command for efficient file-level index refresh - Integrate CodexLens with CCW tools (codex_lens action: update) - Add CodexLens Auto-Sync hook template for automatic index updates on file changes - Add CodexLens status card in CCW Dashboard CLI Manager with install/init buttons - Add server APIs: /api/codexlens/status, /api/codexlens/bootstrap, /api/codexlens/init 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-09 02:24:11 +08:00 · 2025-12-12 15:02:32 +08:00
parent b74a90b416
commit a393601ec5
31 changed files with 2718 additions and 27 deletions
--- a/ccw/src/tools/codex-lens.js
+++ b/ccw/src/tools/codex-lens.js
@@ -0,0 +1,474 @@
+/**
+ * CodexLens Tool - Bridge between CCW and CodexLens Python package
+ * Provides code indexing and semantic search via spawned Python process
+ *
+ * Features:
+ * - Automatic venv bootstrap at ~/.codexlens/venv
+ * - JSON protocol communication
+ * - Symbol extraction and semantic search
+ * - FTS5 full-text search
+ */
+
+import { spawn, execSync } from 'child_process';
+import { existsSync, mkdirSync } from 'fs';
+import { join, dirname } from 'path';
+import { homedir } from 'os';
+import { fileURLToPath } from 'url';
+
+// Get directory of this module
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+// CodexLens configuration
+const CODEXLENS_DATA_DIR = join(homedir(), '.codexlens');
+const CODEXLENS_VENV = join(CODEXLENS_DATA_DIR, 'venv');
+const VENV_PYTHON = process.platform === 'win32'
+  ? join(CODEXLENS_VENV, 'Scripts', 'python.exe')
+  : join(CODEXLENS_VENV, 'bin', 'python');
+
+// Bootstrap status cache
+let bootstrapChecked = false;
+let bootstrapReady = false;
+
+/**
+ * Detect available Python 3 executable
+ * @returns {string} - Python executable command
+ */
+function getSystemPython() {
+  const commands = process.platform === 'win32'
+    ? ['python', 'py', 'python3']
+    : ['python3', 'python'];
+
+  for (const cmd of commands) {
+    try {
+      const version = execSync(`${cmd} --version 2>&1`, { encoding: 'utf8' });
+      if (version.includes('Python 3')) {
+        return cmd;
+      }
+    } catch {
+      // Try next command
+    }
+  }
+  throw new Error('Python 3 not found. Please install Python 3 and ensure it is in PATH.');
+}
+
+/**
+ * Check if CodexLens venv exists and has required packages
+ * @returns {Promise<{ready: boolean, error?: string}>}
+ */
+async function checkVenvStatus() {
+  // Check venv exists
+  if (!existsSync(CODEXLENS_VENV)) {
+    return { ready: false, error: 'Venv not found' };
+  }
+
+  // Check python executable exists
+  if (!existsSync(VENV_PYTHON)) {
+    return { ready: false, error: 'Python executable not found in venv' };
+  }
+
+  // Check codexlens is importable
+  return new Promise((resolve) => {
+    const child = spawn(VENV_PYTHON, ['-c', 'import codexlens; print(codexlens.__version__)'], {
+      stdio: ['ignore', 'pipe', 'pipe'],
+      timeout: 10000
+    });
+
+    let stdout = '';
+    let stderr = '';
+
+    child.stdout.on('data', (data) => { stdout += data.toString(); });
+    child.stderr.on('data', (data) => { stderr += data.toString(); });
+
+    child.on('close', (code) => {
+      if (code === 0) {
+        resolve({ ready: true, version: stdout.trim() });
+      } else {
+        resolve({ ready: false, error: `CodexLens not installed: ${stderr}` });
+      }
+    });
+
+    child.on('error', (err) => {
+      resolve({ ready: false, error: `Failed to check venv: ${err.message}` });
+    });
+  });
+}
+
+/**
+ * Bootstrap CodexLens venv with required packages
+ * @returns {Promise<{success: boolean, error?: string}>}
+ */
+async function bootstrapVenv() {
+  // Ensure data directory exists
+  if (!existsSync(CODEXLENS_DATA_DIR)) {
+    mkdirSync(CODEXLENS_DATA_DIR, { recursive: true });
+  }
+
+  // Create venv if not exists
+  if (!existsSync(CODEXLENS_VENV)) {
+    try {
+      console.log('[CodexLens] Creating virtual environment...');
+      const pythonCmd = getSystemPython();
+      execSync(`${pythonCmd} -m venv "${CODEXLENS_VENV}"`, { stdio: 'inherit' });
+    } catch (err) {
+      return { success: false, error: `Failed to create venv: ${err.message}` };
+    }
+  }
+
+  // Install codexlens with semantic extras
+  try {
+    console.log('[CodexLens] Installing codexlens package...');
+    const pipPath = process.platform === 'win32'
+      ? join(CODEXLENS_VENV, 'Scripts', 'pip.exe')
+      : join(CODEXLENS_VENV, 'bin', 'pip');
+
+    // Try multiple local paths, then fall back to PyPI
+    const possiblePaths = [
+      join(process.cwd(), 'codex-lens'),
+      join(__dirname, '..', '..', '..', 'codex-lens'),  // ccw/src/tools -> project root
+      join(homedir(), 'codex-lens'),
+    ];
+
+    let installed = false;
+    for (const localPath of possiblePaths) {
+      if (existsSync(join(localPath, 'pyproject.toml'))) {
+        console.log(`[CodexLens] Installing from local path: ${localPath}`);
+        execSync(`"${pipPath}" install -e "${localPath}"`, { stdio: 'inherit' });
+        installed = true;
+        break;
+      }
+    }
+
+    if (!installed) {
+      console.log('[CodexLens] Installing from PyPI...');
+      execSync(`"${pipPath}" install codexlens`, { stdio: 'inherit' });
+    }
+
+    return { success: true };
+  } catch (err) {
+    return { success: false, error: `Failed to install codexlens: ${err.message}` };
+  }
+}
+
+/**
+ * Ensure CodexLens is ready to use
+ * @returns {Promise<{ready: boolean, error?: string}>}
+ */
+async function ensureReady() {
+  // Use cached result if already checked
+  if (bootstrapChecked && bootstrapReady) {
+    return { ready: true };
+  }
+
+  // Check current status
+  const status = await checkVenvStatus();
+  if (status.ready) {
+    bootstrapChecked = true;
+    bootstrapReady = true;
+    return { ready: true, version: status.version };
+  }
+
+  // Attempt bootstrap
+  const bootstrap = await bootstrapVenv();
+  if (!bootstrap.success) {
+    return { ready: false, error: bootstrap.error };
+  }
+
+  // Verify after bootstrap
+  const recheck = await checkVenvStatus();
+  bootstrapChecked = true;
+  bootstrapReady = recheck.ready;
+
+  return recheck;
+}
+
+/**
+ * Execute CodexLens CLI command
+ * @param {string[]} args - CLI arguments
+ * @param {Object} options - Execution options
+ * @returns {Promise<{success: boolean, output?: string, error?: string}>}
+ */
+async function executeCodexLens(args, options = {}) {
+  const { timeout = 60000, cwd = process.cwd() } = options;
+
+  // Ensure ready
+  const readyStatus = await ensureReady();
+  if (!readyStatus.ready) {
+    return { success: false, error: readyStatus.error };
+  }
+
+  return new Promise((resolve) => {
+    const child = spawn(VENV_PYTHON, ['-m', 'codexlens', ...args], {
+      cwd,
+      stdio: ['ignore', 'pipe', 'pipe']
+    });
+
+    let stdout = '';
+    let stderr = '';
+    let timedOut = false;
+
+    child.stdout.on('data', (data) => { stdout += data.toString(); });
+    child.stderr.on('data', (data) => { stderr += data.toString(); });
+
+    const timeoutId = setTimeout(() => {
+      timedOut = true;
+      child.kill('SIGTERM');
+    }, timeout);
+
+    child.on('close', (code) => {
+      clearTimeout(timeoutId);
+
+      if (timedOut) {
+        resolve({ success: false, error: 'Command timed out' });
+      } else if (code === 0) {
+        resolve({ success: true, output: stdout.trim() });
+      } else {
+        resolve({ success: false, error: stderr || `Exit code: ${code}` });
+      }
+    });
+
+    child.on('error', (err) => {
+      clearTimeout(timeoutId);
+      resolve({ success: false, error: `Spawn failed: ${err.message}` });
+    });
+  });
+}
+
+/**
+ * Initialize CodexLens index for a directory
+ * @param {Object} params - Parameters
+ * @returns {Promise<Object>}
+ */
+async function initIndex(params) {
+  const { path = '.', languages } = params;
+
+  const args = ['init', path];
+  if (languages && languages.length > 0) {
+    args.push('--languages', languages.join(','));
+  }
+
+  return executeCodexLens(args, { cwd: path });
+}
+
+/**
+ * Search code using CodexLens
+ * @param {Object} params - Search parameters
+ * @returns {Promise<Object>}
+ */
+async function searchCode(params) {
+  const { query, path = '.', mode = 'text', limit = 20 } = params;
+
+  const args = ['search', query, '--limit', limit.toString(), '--json'];
+
+  // Note: semantic mode requires semantic extras to be installed
+  // Currently not exposed via CLI flag, uses standard FTS search
+
+  const result = await executeCodexLens(args, { cwd: path });
+
+  if (result.success) {
+    try {
+      result.results = JSON.parse(result.output);
+      delete result.output;
+    } catch {
+      // Keep raw output if JSON parse fails
+    }
+  }
+
+  return result;
+}
+
+/**
+ * Extract symbols from a file
+ * @param {Object} params - Parameters
+ * @returns {Promise<Object>}
+ */
+async function extractSymbols(params) {
+  const { file } = params;
+
+  const args = ['symbol', file, '--json'];
+
+  const result = await executeCodexLens(args);
+
+  if (result.success) {
+    try {
+      result.symbols = JSON.parse(result.output);
+      delete result.output;
+    } catch {
+      // Keep raw output if JSON parse fails
+    }
+  }
+
+  return result;
+}
+
+/**
+ * Get index status
+ * @param {Object} params - Parameters
+ * @returns {Promise<Object>}
+ */
+async function getStatus(params) {
+  const { path = '.' } = params;
+
+  const args = ['status', '--json'];
+
+  const result = await executeCodexLens(args, { cwd: path });
+
+  if (result.success) {
+    try {
+      result.status = JSON.parse(result.output);
+      delete result.output;
+    } catch {
+      // Keep raw output if JSON parse fails
+    }
+  }
+
+  return result;
+}
+
+/**
+ * Update specific files in the index
+ * @param {Object} params - Parameters
+ * @returns {Promise<Object>}
+ */
+async function updateFiles(params) {
+  const { files, path = '.' } = params;
+
+  if (!files || !Array.isArray(files) || files.length === 0) {
+    return { success: false, error: 'files parameter is required and must be a non-empty array' };
+  }
+
+  const args = ['update', ...files, '--json'];
+
+  const result = await executeCodexLens(args, { cwd: path });
+
+  if (result.success) {
+    try {
+      result.updateResult = JSON.parse(result.output);
+      delete result.output;
+    } catch {
+      // Keep raw output if JSON parse fails
+    }
+  }
+
+  return result;
+}
+
+/**
+ * Main execute function - routes to appropriate handler
+ * @param {Object} params - Execution parameters
+ * @returns {Promise<Object>}
+ */
+async function execute(params) {
+  const { action, ...rest } = params;
+
+  switch (action) {
+    case 'init':
+      return initIndex(rest);
+
+    case 'search':
+      return searchCode(rest);
+
+    case 'symbol':
+      return extractSymbols(rest);
+
+    case 'status':
+      return getStatus(rest);
+
+    case 'update':
+      return updateFiles(rest);
+
+    case 'bootstrap':
+      // Force re-bootstrap
+      bootstrapChecked = false;
+      bootstrapReady = false;
+      const bootstrapResult = await bootstrapVenv();
+      return bootstrapResult.success
+        ? { success: true, message: 'CodexLens bootstrapped successfully' }
+        : { success: false, error: bootstrapResult.error };
+
+    case 'check':
+      // Check venv status
+      return checkVenvStatus();
+
+    default:
+      throw new Error(`Unknown action: ${action}. Valid actions: init, search, symbol, status, update, bootstrap, check`);
+  }
+}
+
+/**
+ * CodexLens Tool Definition
+ */
+export const codexLensTool = {
+  name: 'codex_lens',
+  description: `Code indexing and semantic search via CodexLens Python package.
+
+Actions:
+- init: Initialize index for a directory
+- search: Search code (text or semantic mode)
+- symbol: Extract symbols from a file
+- status: Get index status
+- update: Incrementally update specific files (add/modify/remove)
+- bootstrap: Force re-install CodexLens venv
+- check: Check venv readiness
+
+Features:
+- Automatic venv bootstrap at ~/.codexlens/venv
+- SQLite FTS5 full-text search
+- Tree-sitter symbol extraction
+- Incremental updates for changed files
+- Optional semantic search with embeddings`,
+  parameters: {
+    type: 'object',
+    properties: {
+      action: {
+        type: 'string',
+        enum: ['init', 'search', 'symbol', 'status', 'update', 'bootstrap', 'check'],
+        description: 'Action to perform'
+      },
+      path: {
+        type: 'string',
+        description: 'Target path (for init, search, status, update)'
+      },
+      query: {
+        type: 'string',
+        description: 'Search query (for search action)'
+      },
+      mode: {
+        type: 'string',
+        enum: ['text', 'semantic'],
+        description: 'Search mode (default: text)',
+        default: 'text'
+      },
+      file: {
+        type: 'string',
+        description: 'File path (for symbol action)'
+      },
+      files: {
+        type: 'array',
+        items: { type: 'string' },
+        description: 'File paths to update (for update action)'
+      },
+      languages: {
+        type: 'array',
+        items: { type: 'string' },
+        description: 'Languages to index (for init action)'
+      },
+      limit: {
+        type: 'number',
+        description: 'Maximum results (for search action)',
+        default: 20
+      },
+      format: {
+        type: 'string',
+        enum: ['json', 'table', 'plain'],
+        description: 'Output format',
+        default: 'json'
+      }
+    },
+    required: ['action']
+  },
+  execute
+};
+
+// Export for direct usage
+export { ensureReady, executeCodexLens, checkVenvStatus, bootstrapVenv };
--- a/ccw/src/tools/index.js
+++ b/ccw/src/tools/index.js
@@ -18,6 +18,7 @@ import { convertTokensToCssTool } from './convert-tokens-to-css.js';
 import { sessionManagerTool } from './session-manager.js';
 import { cliExecutorTool } from './cli-executor.js';
 import { smartSearchTool } from './smart-search.js';
+import { codexLensTool } from './codex-lens.js';

 // Tool registry - add new tools here
 const tools = new Map();
@@ -264,6 +265,7 @@ registerTool(convertTokensToCssTool);
 registerTool(sessionManagerTool);
 registerTool(cliExecutorTool);
 registerTool(smartSearchTool);
+registerTool(codexLensTool);

 // Export for external tool registration
 export { registerTool };
--- a/ccw/src/tools/smart-search.js
+++ b/ccw/src/tools/smart-search.js
@@ -12,6 +12,7 @@
 import { spawn, execSync } from 'child_process';
 import { existsSync, readdirSync, statSync } from 'fs';
 import { join, resolve, isAbsolute } from 'path';
+import { ensureReady as ensureCodexLensReady, executeCodexLens } from './codex-lens.js';

 // Search mode constants
 const SEARCH_MODES = ['auto', 'exact', 'fuzzy', 'semantic', 'graph'];
@@ -195,12 +196,10 @@ async function executeAutoMode(params) {
      };

    case 'fuzzy':
-    case 'semantic':
-    case 'graph':
-      // These modes not yet implemented
+      // Fuzzy mode not yet implemented
      return {
        success: false,
-        error: `${classification.mode} mode not yet implemented`,
+        error: 'Fuzzy mode not yet implemented',
        metadata: {
          classified_as: classification.mode,
          confidence: classification.confidence,
@@ -208,6 +207,32 @@ async function executeAutoMode(params) {
        }
      };

+    case 'semantic':
+      // Execute semantic mode via CodexLens
+      const semanticResult = await executeSemanticMode(params);
+      return {
+        ...semanticResult,
+        metadata: {
+          ...semanticResult.metadata,
+          classified_as: classification.mode,
+          confidence: classification.confidence,
+          reasoning: classification.reasoning
+        }
+      };
+
+    case 'graph':
+      // Execute graph mode via CodexLens
+      const graphResult = await executeGraphMode(params);
+      return {
+        ...graphResult,
+        metadata: {
+          ...graphResult.metadata,
+          classified_as: classification.mode,
+          confidence: classification.confidence,
+          reasoning: classification.reasoning
+        }
+      };
+
    default:
      // Fallback to exact mode with warning
      const fallbackResult = await executeExactMode(params);
@@ -346,41 +371,166 @@ async function executeFuzzyMode(params) {

 /**
 * Mode: semantic - Natural language understanding search
- * Uses LLM or embeddings for semantic similarity
+ * Uses CodexLens embeddings for semantic similarity
 */
 async function executeSemanticMode(params) {
  const { query, paths = [], maxResults = 100 } = params;

-  // TODO: Implement semantic search
-  // - Option 1: Use Gemini CLI via cli-executor.js
-  // - Option 2: Use local embeddings (transformers.js)
-  // - Generate query embedding
-  // - Compare with code embeddings
-  // - Return semantically similar results
+  // Check CodexLens availability
+  const readyStatus = await ensureCodexLensReady();
+  if (!readyStatus.ready) {
+    return {
+      success: false,
+      error: `CodexLens not available: ${readyStatus.error}. Run 'ccw tool exec codex_lens {"action":"bootstrap"}' to install.`
+    };
+  }
+
+  // Determine search path
+  const searchPath = paths.length > 0 ? paths[0] : '.';
+
+  // Execute CodexLens semantic search
+  const result = await executeCodexLens(
+    ['search', query, '--limit', maxResults.toString(), '--json'],
+    { cwd: searchPath }
+  );
+
+  if (!result.success) {
+    return {
+      success: false,
+      error: result.error,
+      metadata: {
+        mode: 'semantic',
+        backend: 'codexlens'
+      }
+    };
+  }
+
+  // Parse and transform results
+  let results = [];
+  try {
+    // Handle CRLF in output
+    const cleanOutput = result.output.replace(/\r\n/g, '\n');
+    const parsed = JSON.parse(cleanOutput);
+    const data = parsed.result || parsed;
+    results = (data.results || []).map(item => ({
+      file: item.path || item.file,
+      score: item.score || 0,
+      content: item.excerpt || item.content || '',
+      symbol: item.symbol || null
+    }));
+  } catch {
+    // Return raw output if JSON parsing fails
+    return {
+      success: true,
+      results: [],
+      output: result.output,
+      metadata: {
+        mode: 'semantic',
+        backend: 'codexlens',
+        count: 0,
+        query,
+        warning: 'Failed to parse JSON output'
+      }
+    };
+  }

  return {
-    success: false,
-    error: 'Semantic mode not implemented - LLM/embedding integration pending'
+    success: true,
+    results,
+    metadata: {
+      mode: 'semantic',
+      backend: 'codexlens',
+      count: results.length,
+      query
+    }
  };
 }

 /**
 * Mode: graph - Dependency and relationship traversal
- * Analyzes code relationships (imports, exports, dependencies)
+ * Uses CodexLens symbol extraction for code analysis
 */
 async function executeGraphMode(params) {
  const { query, paths = [], maxResults = 100 } = params;

-  // TODO: Implement graph search
-  // - Parse import/export statements
-  // - Build dependency graph
-  // - Traverse relationships
-  // - Find related modules
-  // - Return graph results
+  // Check CodexLens availability
+  const readyStatus = await ensureCodexLensReady();
+  if (!readyStatus.ready) {
+    return {
+      success: false,
+      error: `CodexLens not available: ${readyStatus.error}. Run 'ccw tool exec codex_lens {"action":"bootstrap"}' to install.`
+    };
+  }
+
+  // First, search for relevant files using text search
+  const searchPath = paths.length > 0 ? paths[0] : '.';
+
+  // Execute text search to find files matching the query
+  const textResult = await executeCodexLens(
+    ['search', query, '--limit', maxResults.toString(), '--json'],
+    { cwd: searchPath }
+  );
+
+  if (!textResult.success) {
+    return {
+      success: false,
+      error: textResult.error,
+      metadata: {
+        mode: 'graph',
+        backend: 'codexlens'
+      }
+    };
+  }
+
+  // Parse results and extract symbols from top files
+  let results = [];
+  try {
+    const parsed = JSON.parse(textResult.output);
+    const files = [...new Set((parsed.results || parsed).map(item => item.path || item.file))].slice(0, 10);
+
+    // Extract symbols from files in parallel
+    const symbolPromises = files.map(file =>
+      executeCodexLens(['symbol', file, '--json'], { cwd: searchPath })
+        .then(result => ({ file, result }))
+    );
+
+    const symbolResults = await Promise.all(symbolPromises);
+
+    for (const { file, result } of symbolResults) {
+      if (result.success) {
+        try {
+          const symbols = JSON.parse(result.output);
+          results.push({
+            file,
+            symbols: symbols.symbols || symbols,
+            relationships: []
+          });
+        } catch {
+          // Skip files with parse errors
+        }
+      }
+    }
+  } catch {
+    return {
+      success: false,
+      error: 'Failed to parse search results',
+      metadata: {
+        mode: 'graph',
+        backend: 'codexlens'
+      }
+    };
+  }

  return {
-    success: false,
-    error: 'Graph mode not implemented - dependency analysis pending'
+    success: true,
+    results,
+    metadata: {
+      mode: 'graph',
+      backend: 'codexlens',
+      count: results.length,
+      query,
+      note: 'Graph mode provides symbol extraction; full dependency graph analysis pending'
+    }
  };
 }