Add parallel search mode and index progress bar

Features: - CCW smart_search: Add 'parallel' mode that runs hybrid + exact + ripgrep simultaneously with RRF (Reciprocal Rank Fusion) for result merging - Dashboard: Add real-time progress bar for CodexLens index initialization - MCP: Return progress metadata in init action response - Codex-lens: Auto-detect optimal worker count for parallel indexing Changes: - smart-search.ts: Add parallel mode, RRF fusion, progress tracking - codex-lens.ts: Add onProgress callback support, progress parsing - codexlens-routes.ts: Broadcast index progress via WebSocket - codexlens-manager.js: New index progress modal with real-time updates - notifications.js: Add WebSocket event handler registration system - i18n.js: Add English/Chinese translations for progress UI - index_tree.py: Workers parameter now auto-detects CPU count (max 16) - commands.py: CLI --workers parameter supports auto-detection 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-10 02:24:35 +08:00 · 2025-12-17 23:17:15 +08:00
parent 44d84116c3
commit 51a61bef31
8 changed files with 569 additions and 21 deletions
--- a/ccw/src/tools/codex-lens.ts
+++ b/ccw/src/tools/codex-lens.ts
@@ -91,6 +91,15 @@ interface ExecuteResult {
 interface ExecuteOptions {
  timeout?: number;
  cwd?: string;
+  onProgress?: (progress: ProgressInfo) => void;
+}
+
+interface ProgressInfo {
+  stage: string;
+  message: string;
+  percent: number;
+  filesProcessed?: number;
+  totalFiles?: number;
 }

 /**
@@ -361,6 +370,57 @@ async function ensureReady(): Promise<ReadyStatus> {
  return recheck;
 }

+/**
+ * Parse progress info from CodexLens output
+ * @param line - Output line to parse
+ * @returns Progress info or null
+ */
+function parseProgressLine(line: string): ProgressInfo | null {
+  // Parse file processing progress: "Processing file X/Y: path"
+  const fileMatch = line.match(/Processing file (\d+)\/(\d+):\s*(.+)/i);
+  if (fileMatch) {
+    const current = parseInt(fileMatch[1], 10);
+    const total = parseInt(fileMatch[2], 10);
+    return {
+      stage: 'indexing',
+      message: `Processing ${fileMatch[3]}`,
+      percent: Math.round((current / total) * 80) + 10, // 10-90%
+      filesProcessed: current,
+      totalFiles: total,
+    };
+  }
+
+  // Parse stage messages
+  if (line.includes('Discovering files')) {
+    return { stage: 'discover', message: 'Discovering files...', percent: 5 };
+  }
+  if (line.includes('Building index')) {
+    return { stage: 'build', message: 'Building index...', percent: 10 };
+  }
+  if (line.includes('Extracting symbols')) {
+    return { stage: 'symbols', message: 'Extracting symbols...', percent: 50 };
+  }
+  if (line.includes('Generating embeddings') || line.includes('Creating embeddings')) {
+    return { stage: 'embeddings', message: 'Generating embeddings...', percent: 70 };
+  }
+  if (line.includes('Finalizing') || line.includes('Complete')) {
+    return { stage: 'complete', message: 'Finalizing...', percent: 95 };
+  }
+
+  // Parse indexed count: "Indexed X files"
+  const indexedMatch = line.match(/Indexed (\d+) files/i);
+  if (indexedMatch) {
+    return {
+      stage: 'complete',
+      message: `Indexed ${indexedMatch[1]} files`,
+      percent: 100,
+      filesProcessed: parseInt(indexedMatch[1], 10),
+    };
+  }
+
+  return null;
+}
+
 /**
 * Execute CodexLens CLI command
 * @param args - CLI arguments
@@ -368,7 +428,7 @@ async function ensureReady(): Promise<ReadyStatus> {
 * @returns Execution result
 */
 async function executeCodexLens(args: string[], options: ExecuteOptions = {}): Promise<ExecuteResult> {
-  const { timeout = 60000, cwd = process.cwd() } = options;
+  const { timeout = 60000, cwd = process.cwd(), onProgress } = options;

  // Ensure ready
  const readyStatus = await ensureReady();
@@ -387,10 +447,35 @@ async function executeCodexLens(args: string[], options: ExecuteOptions = {}): P
    let timedOut = false;

    child.stdout.on('data', (data) => {
-      stdout += data.toString();
+      const chunk = data.toString();
+      stdout += chunk;
+
+      // Report progress if callback provided
+      if (onProgress) {
+        const lines = chunk.split('\n');
+        for (const line of lines) {
+          const progress = parseProgressLine(line.trim());
+          if (progress) {
+            onProgress(progress);
+          }
+        }
+      }
    });
+
    child.stderr.on('data', (data) => {
-      stderr += data.toString();
+      const chunk = data.toString();
+      stderr += chunk;
+
+      // Also check stderr for progress (some tools output there)
+      if (onProgress) {
+        const lines = chunk.split('\n');
+        for (const line of lines) {
+          const progress = parseProgressLine(line.trim());
+          if (progress) {
+            onProgress(progress);
+          }
+        }
+      }
    });

    const timeoutId = setTimeout(() => {
@@ -803,6 +888,9 @@ async function uninstallCodexLens(): Promise<BootstrapResult> {
  }
 }

+// Export types
+export type { ProgressInfo, ExecuteOptions };
+
 // Export for direct usage
 export { ensureReady, executeCodexLens, checkVenvStatus, bootstrapVenv, checkSemanticStatus, installSemantic, uninstallCodexLens };

--- a/ccw/src/tools/smart-search.ts
+++ b/ccw/src/tools/smart-search.ts
@@ -21,12 +21,13 @@ import {
  ensureReady as ensureCodexLensReady,
  executeCodexLens,
 } from './codex-lens.js';
+import type { ProgressInfo } from './codex-lens.js';

 // Define Zod schema for validation
 const ParamsSchema = z.object({
  action: z.enum(['init', 'search', 'search_files', 'status']).default('search'),
  query: z.string().optional(),
-  mode: z.enum(['auto', 'hybrid', 'exact', 'ripgrep']).default('auto'),
+  mode: z.enum(['auto', 'hybrid', 'exact', 'ripgrep', 'parallel']).default('auto'),
  output_mode: z.enum(['full', 'files_only', 'count']).default('full'),
  path: z.string().optional(),
  paths: z.array(z.string()).default([]),
@@ -35,12 +36,17 @@ const ParamsSchema = z.object({
  includeHidden: z.boolean().default(false),
  languages: z.array(z.string()).optional(),
  limit: z.number().default(100),
+  parallelWeights: z.object({
+    hybrid: z.number().default(0.5),
+    exact: z.number().default(0.3),
+    ripgrep: z.number().default(0.2),
+  }).optional(),
 });

 type Params = z.infer<typeof ParamsSchema>;

 // Search mode constants
-const SEARCH_MODES = ['auto', 'hybrid', 'exact', 'ripgrep'] as const;
+const SEARCH_MODES = ['auto', 'hybrid', 'exact', 'ripgrep', 'parallel'] as const;

 // Classification confidence threshold
 const CONFIDENCE_THRESHOLD = 0.7;
@@ -72,10 +78,10 @@ interface GraphMatch {
 }

 interface SearchMetadata {
-  mode: string;
-  backend: string;
-  count: number;
-  query: string;
+  mode?: string;
+  backend?: string;
+  count?: number;
+  query?: string;
  classified_as?: string;
  confidence?: number;
  reasoning?: string;
@@ -83,6 +89,17 @@ interface SearchMetadata {
  warning?: string;
  note?: string;
  index_status?: 'indexed' | 'not_indexed' | 'partial';
+  // Init action specific
+  action?: string;
+  path?: string;
+  progress?: {
+    stage: string;
+    message: string;
+    percent: number;
+    filesProcessed?: number;
+    totalFiles?: number;
+  };
+  progressHistory?: ProgressInfo[];
 }

 interface SearchResult {
@@ -326,7 +343,39 @@ async function executeInitAction(params: Params): Promise<SearchResult> {
    args.push('--languages', languages.join(','));
  }

-  const result = await executeCodexLens(args, { cwd: path, timeout: 300000 });
+  // Track progress updates
+  const progressUpdates: ProgressInfo[] = [];
+  let lastProgress: ProgressInfo | null = null;
+
+  const result = await executeCodexLens(args, {
+    cwd: path,
+    timeout: 300000,
+    onProgress: (progress: ProgressInfo) => {
+      progressUpdates.push(progress);
+      lastProgress = progress;
+    },
+  });
+
+  // Build metadata with progress info
+  const metadata: SearchMetadata = {
+    action: 'init',
+    path,
+  };
+
+  if (lastProgress !== null) {
+    const p = lastProgress as ProgressInfo;
+    metadata.progress = {
+      stage: p.stage,
+      message: p.message,
+      percent: p.percent,
+      filesProcessed: p.filesProcessed,
+      totalFiles: p.totalFiles,
+    };
+  }
+
+  if (progressUpdates.length > 0) {
+    metadata.progressHistory = progressUpdates.slice(-5); // Keep last 5 progress updates
+  }

  return {
    success: result.success,
@@ -334,6 +383,7 @@ async function executeInitAction(params: Params): Promise<SearchResult> {
    message: result.success
      ? `CodexLens index created successfully for ${path}`
      : undefined,
+    metadata,
  };
 }

@@ -726,17 +776,155 @@ async function executeHybridMode(params: Params): Promise<SearchResult> {
  };
 }

+/**
+ * TypeScript implementation of Reciprocal Rank Fusion
+ * Reference: codex-lens/src/codexlens/search/ranking.py
+ * Formula: score(d) = Σ weight_source / (k + rank_source(d))
+ */
+function applyRRFFusion(
+  resultsMap: Map<string, any[]>,
+  weights: Record<string, number>,
+  limit: number,
+  k: number = 60,
+): any[] {
+  const pathScores = new Map<string, { score: number; result: any; sources: string[] }>();
+
+  resultsMap.forEach((results, source) => {
+    const weight = weights[source] || 0;
+    if (weight === 0 || !results) return;
+
+    results.forEach((result, rank) => {
+      const path = result.file || result.path;
+      if (!path) return;
+
+      const rrfContribution = weight / (k + rank + 1);
+
+      if (!pathScores.has(path)) {
+        pathScores.set(path, { score: 0, result, sources: [] });
+      }
+      const entry = pathScores.get(path)!;
+      entry.score += rrfContribution;
+      if (!entry.sources.includes(source)) {
+        entry.sources.push(source);
+      }
+    });
+  });
+
+  // Sort by fusion score descending
+  return Array.from(pathScores.values())
+    .sort((a, b) => b.score - a.score)
+    .slice(0, limit)
+    .map(item => ({
+      ...item.result,
+      fusion_score: item.score,
+      matched_backends: item.sources,
+    }));
+}
+
+/**
+ * Mode: parallel - Run all backends simultaneously with RRF fusion
+ * Returns best results from hybrid + exact + ripgrep combined
+ */
+async function executeParallelMode(params: Params): Promise<SearchResult> {
+  const { query, path = '.', limit = 100, parallelWeights } = params;
+
+  if (!query) {
+    return {
+      success: false,
+      error: 'Query is required for search',
+    };
+  }
+
+  // Default weights if not provided
+  const weights = parallelWeights || {
+    hybrid: 0.5,
+    exact: 0.3,
+    ripgrep: 0.2,
+  };
+
+  // Run all backends in parallel
+  const [hybridResult, exactResult, ripgrepResult] = await Promise.allSettled([
+    executeHybridMode(params),
+    executeCodexLensExactMode(params),
+    executeRipgrepMode(params),
+  ]);
+
+  // Collect successful results
+  const resultsMap = new Map<string, any[]>();
+  const backendStatus: Record<string, string> = {};
+
+  if (hybridResult.status === 'fulfilled' && hybridResult.value.success) {
+    resultsMap.set('hybrid', hybridResult.value.results as any[]);
+    backendStatus.hybrid = 'success';
+  } else {
+    backendStatus.hybrid = hybridResult.status === 'rejected'
+      ? `error: ${hybridResult.reason}`
+      : `failed: ${(hybridResult as PromiseFulfilledResult<SearchResult>).value.error}`;
+  }
+
+  if (exactResult.status === 'fulfilled' && exactResult.value.success) {
+    resultsMap.set('exact', exactResult.value.results as any[]);
+    backendStatus.exact = 'success';
+  } else {
+    backendStatus.exact = exactResult.status === 'rejected'
+      ? `error: ${exactResult.reason}`
+      : `failed: ${(exactResult as PromiseFulfilledResult<SearchResult>).value.error}`;
+  }
+
+  if (ripgrepResult.status === 'fulfilled' && ripgrepResult.value.success) {
+    resultsMap.set('ripgrep', ripgrepResult.value.results as any[]);
+    backendStatus.ripgrep = 'success';
+  } else {
+    backendStatus.ripgrep = ripgrepResult.status === 'rejected'
+      ? `error: ${ripgrepResult.reason}`
+      : `failed: ${(ripgrepResult as PromiseFulfilledResult<SearchResult>).value.error}`;
+  }
+
+  // If no results from any backend
+  if (resultsMap.size === 0) {
+    return {
+      success: false,
+      error: 'All search backends failed',
+      metadata: {
+        mode: 'parallel',
+        backend: 'multi-backend',
+        count: 0,
+        query,
+        backend_status: backendStatus,
+      } as any,
+    };
+  }
+
+  // Apply RRF fusion
+  const fusedResults = applyRRFFusion(resultsMap, weights, limit);
+
+  return {
+    success: true,
+    results: fusedResults,
+    metadata: {
+      mode: 'parallel',
+      backend: 'multi-backend',
+      count: fusedResults.length,
+      query,
+      backends_used: Array.from(resultsMap.keys()),
+      backend_status: backendStatus,
+      weights,
+      note: 'Parallel mode runs hybrid + exact + ripgrep simultaneously with RRF fusion',
+    } as any,
+  };
+}
+
 // Tool schema for MCP
 export const schema: ToolSchema = {
  name: 'smart_search',
-  description: `Intelligent code search with three optimized modes: hybrid, exact, ripgrep.
+  description: `Intelligent code search with five modes: auto, hybrid, exact, ripgrep, parallel.

 **Quick Start:**
  smart_search(query="authentication logic")           # Auto mode (intelligent routing)
  smart_search(action="init", path=".")                # Initialize index (required for hybrid)
  smart_search(action="status")                        # Check index status

-**Three Core Modes:**
+**Five Modes:**
  1. auto (default): Intelligent routing based on query and index
     - Natural language + index → hybrid
     - Simple query + index → exact
@@ -754,6 +942,10 @@ export const schema: ToolSchema = {
     - Fast, no index required
     - Literal string matching

+  5. parallel: Run all backends simultaneously
+     - Highest recall, runs hybrid + exact + ripgrep in parallel
+     - Results merged using RRF fusion with configurable weights
+
 **Actions:**
  - search (default): Intelligent search with auto routing
  - init: Create CodexLens index (required for hybrid/exact)
@@ -780,7 +972,7 @@ export const schema: ToolSchema = {
      mode: {
        type: 'string',
        enum: SEARCH_MODES,
-        description: 'Search mode: auto (default), hybrid (best quality), exact (CodexLens FTS), ripgrep (fast, no index)',
+        description: 'Search mode: auto (default), hybrid (best quality), exact (CodexLens FTS), ripgrep (fast, no index), parallel (all backends with RRF fusion)',
        default: 'auto',
      },
      output_mode: {
@@ -826,6 +1018,15 @@ export const schema: ToolSchema = {
        items: { type: 'string' },
        description: 'Languages to index (for init action). Example: ["javascript", "typescript"]',
      },
+      parallelWeights: {
+        type: 'object',
+        properties: {
+          hybrid: { type: 'number', default: 0.5 },
+          exact: { type: 'number', default: 0.3 },
+          ripgrep: { type: 'number', default: 0.2 },
+        },
+        description: 'RRF weights for parallel mode. Weights should sum to 1.0. Default: {hybrid: 0.5, exact: 0.3, ripgrep: 0.2}',
+      },
    },
    required: [],
  },
@@ -902,7 +1103,7 @@ export async function handler(params: Record<string, unknown>): Promise<ToolResu

      case 'search':
      default:
-        // Handle search modes: auto | hybrid | exact | ripgrep
+        // Handle search modes: auto | hybrid | exact | ripgrep | parallel
        switch (mode) {
          case 'auto':
            result = await executeAutoMode(parsed.data);
@@ -916,8 +1117,11 @@ export async function handler(params: Record<string, unknown>): Promise<ToolResu
          case 'ripgrep':
            result = await executeRipgrepMode(parsed.data);
            break;
+          case 'parallel':
+            result = await executeParallelMode(parsed.data);
+            break;
          default:
-            throw new Error(`Unsupported mode: ${mode}. Use: auto, hybrid, exact, or ripgrep`);
+            throw new Error(`Unsupported mode: ${mode}. Use: auto, hybrid, exact, ripgrep, or parallel`);
        }
        break;
    }