Claude-Code-Workflow/ccw/src/tools/smart-search.ts

/**
 * Smart Search Tool - Unified intelligent search with CodexLens integration
 *
 * Features:
 * - Fuzzy mode: FTS + ripgrep fusion with RRF ranking (default)
 * - Semantic mode: Dense coarse retrieval + cross-encoder reranking
 * - CodexLens integration (init, dense_rerank, fts)
 * - Ripgrep fallback for exact mode
 * - Index status checking and warnings
 * - Multi-backend search routing with RRF ranking
 *
 * Actions:
 * - init: Initialize CodexLens index
 * - search: Intelligent search with fuzzy (default) or semantic mode
 * - status: Check index status
 * - update: Incremental index update for changed files
 * - watch: Start file watcher for automatic updates
 */

import { z } from 'zod';
import type { ToolSchema, ToolResult } from '../types/tool.js';
import { spawn, execSync } from 'child_process';
import {
  ensureReady as ensureCodexLensReady,
  executeCodexLens,
} from './codex-lens.js';
import type { ProgressInfo } from './codex-lens.js';
import { getProjectRoot } from '../utils/path-validator.js';

// Timing utilities for performance analysis
const TIMING_ENABLED = process.env.SMART_SEARCH_TIMING === '1' || process.env.DEBUG?.includes('timing');

interface TimingData {
  [key: string]: number;
}

function createTimer(): { mark: (name: string) => void; getTimings: () => TimingData; log: () => void } {
  const startTime = performance.now();
  const marks: { name: string; time: number }[] = [];
  let lastMark = startTime;

  return {
    mark(name: string) {
      const now = performance.now();
      marks.push({ name, time: now - lastMark });
      lastMark = now;
    },
    getTimings(): TimingData {
      const timings: TimingData = {};
      marks.forEach(m => { timings[m.name] = Math.round(m.time * 100) / 100; });
      timings['_total'] = Math.round((performance.now() - startTime) * 100) / 100;
      return timings;
    },
    log() {
      if (TIMING_ENABLED) {
        const timings = this.getTimings();
        console.error(`[TIMING] smart-search: ${JSON.stringify(timings)}`);
      }
    }
  };
}

// Define Zod schema for validation
const ParamsSchema = z.object({
  // Action: search (content), find_files (path/name pattern), init, status, update (incremental), watch
  // Note: search_files is deprecated, use search with output_mode='files_only'
  action: z.enum(['init', 'search', 'search_files', 'find_files', 'status', 'update', 'watch']).default('search'),
  query: z.string().optional().describe('Content search query (for action="search")'),
  pattern: z.string().optional().describe('Glob pattern for path matching (for action="find_files")'),
  mode: z.enum(['fuzzy', 'semantic']).default('fuzzy'),
  output_mode: z.enum(['full', 'files_only', 'count']).default('full'),
  path: z.string().optional(),
  paths: z.array(z.string()).default([]),
  contextLines: z.number().default(0),
  maxResults: z.number().default(5),  // Default 5 with full content
  includeHidden: z.boolean().default(false),
  languages: z.array(z.string()).optional(),
  limit: z.number().default(5),  // Default 5 with full content
  extraFilesCount: z.number().default(10),  // Additional file-only results
  maxContentLength: z.number().default(200),  // Max content length for truncation (50-2000)
  offset: z.number().default(0),  // NEW: Pagination offset (start_index)
  enrich: z.boolean().default(false),
  // Search modifiers for ripgrep mode
  regex: z.boolean().default(true),            // Use regex pattern matching (default: enabled)
  caseSensitive: z.boolean().default(true),    // Case sensitivity (default: case-sensitive)
  tokenize: z.boolean().default(true),         // Tokenize multi-word queries for OR matching (default: enabled)
  // File type filtering
  excludeExtensions: z.array(z.string()).optional().describe('File extensions to exclude from results (e.g., ["md", "txt"])'),
  codeOnly: z.boolean().default(false).describe('Only return code files (excludes md, txt, json, yaml, xml, etc.)'),
  // Watcher options
  debounce: z.number().default(1000).describe('Debounce interval in ms for watch action'),
  // Fuzzy matching is implicit in hybrid mode (RRF fusion)
});

type Params = z.infer<typeof ParamsSchema>;

// Search mode constants
const SEARCH_MODES = ['fuzzy', 'semantic'] as const;

// Classification confidence threshold
const CONFIDENCE_THRESHOLD = 0.7;

// File filtering configuration (ported from code-index)
const FILTER_CONFIG = {
  exclude_directories: new Set([
    '.git', '.svn', '.hg', '.bzr',
    'node_modules', '__pycache__', '.venv', 'venv', 'vendor', 'bower_components',
    'dist', 'build', 'target', 'out', 'bin', 'obj',
    '.idea', '.vscode', '.vs', '.sublime-workspace',
    '.pytest_cache', '.coverage', '.tox', '.nyc_output', 'coverage', 'htmlcov',
    '.next', '.nuxt', '.cache', '.parcel-cache',
    '.DS_Store', 'Thumbs.db',
  ]),
  exclude_files: new Set([
    '*.tmp', '*.temp', '*.swp', '*.swo', '*.bak', '*~', '*.orig', '*.log',
    'package-lock.json', 'yarn.lock', 'pnpm-lock.yaml', 'Pipfile.lock',
  ]),
  // Windows device files - must use **/ pattern to match in any directory
  // These cause "os error 1" on Windows when accessed
  windows_device_files: new Set([
    'nul', 'con', 'aux', 'prn',
    'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
    'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
  ]),
};

function buildExcludeArgs(): string[] {
  const args: string[] = [];
  for (const dir of FILTER_CONFIG.exclude_directories) {
    args.push('--glob', `!**/${dir}/**`);
  }
  for (const pattern of FILTER_CONFIG.exclude_files) {
    args.push('--glob', `!${pattern}`);
  }
  // Windows device files need case-insensitive matching in any directory
  for (const device of FILTER_CONFIG.windows_device_files) {
    args.push('--glob', `!**/${device}`);
    args.push('--glob', `!**/${device.toUpperCase()}`);
  }
  return args;
}

/**
 * Tokenize query for multi-word OR matching
 * Splits on whitespace and common delimiters, filters stop words and short tokens
 * @param query - The search query
 * @returns Array of tokens
 */
function tokenizeQuery(query: string): string[] {
  // Stop words for filtering (common English + programming keywords)
  const stopWords = new Set([
    'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
    'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
    'should', 'may', 'might', 'must', 'can', 'to', 'of', 'in', 'for', 'on',
    'with', 'at', 'by', 'from', 'as', 'into', 'through', 'and', 'but', 'if',
    'or', 'not', 'this', 'that', 'these', 'those', 'it', 'its', 'how', 'what',
    'where', 'when', 'why', 'which', 'who', 'whom',
  ]);

  // Split on whitespace and common delimiters, keep meaningful tokens
  const tokens = query
    .split(/[\s,;:]+/)
    .map(token => token.trim())
    .filter(token => {
      // Keep tokens that are:
      // - At least 2 characters long
      // - Not a stop word (case-insensitive)
      // - Or look like identifiers (contain underscore/camelCase)
      if (token.length < 2) return false;
      if (stopWords.has(token.toLowerCase()) && !token.includes('_') && !/[A-Z]/.test(token)) {
        return false;
      }
      return true;
    });

  return tokens;
}

/**
 * Score results based on token match count for ranking
 * @param results - Search results
 * @param tokens - Query tokens
 * @returns Results with match scores
 */
function scoreByTokenMatch(results: ExactMatch[], tokens: string[]): ExactMatch[] {
  if (tokens.length <= 1) return results;

  // Create case-insensitive patterns for each token
  const tokenPatterns = tokens.map(t => {
    const escaped = t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
    return new RegExp(escaped, 'i');
  });

  return results.map(r => {
    const content = r.content || '';
    const file = r.file || '';
    const searchText = `${file} ${content}`;

    // Count how many tokens match
    let matchCount = 0;
    for (const pattern of tokenPatterns) {
      if (pattern.test(searchText)) {
        matchCount++;
      }
    }

    // Calculate match ratio (0 to 1)
    const matchRatio = matchCount / tokens.length;

    return {
      ...r,
      matchScore: matchRatio,
      matchCount,
    };
  }).sort((a, b) => {
    // Sort by match ratio (descending), then by line number
    if (b.matchScore !== a.matchScore) {
      return b.matchScore - a.matchScore;
    }
    return (a.line || 0) - (b.line || 0);
  });
}

interface Classification {
  mode: string;
  confidence: number;
  reasoning: string;
}

interface ExactMatch {
  file: string;
  line: number;
  column: number;
  content: string;
  matchScore?: number;  // Token match ratio (0-1) for multi-word queries
  matchCount?: number;  // Number of tokens matched
}

interface RelationshipInfo {
  type: string;           // 'calls', 'imports', 'called_by', 'imported_by'
  direction: 'outgoing' | 'incoming';
  target?: string;        // Target symbol name (for outgoing)
  source?: string;        // Source symbol name (for incoming)
  file: string;           // File path
  line?: number;          // Line number
}

interface SemanticMatch {
  file: string;
  score: number;
  content: string;
  symbol: string | null;
  relationships?: RelationshipInfo[];
}

interface GraphMatch {
  file: string;
  symbols: unknown;
  relationships: unknown[];
}

// File match for find_files action (path-based search)
interface FileMatch {
  path: string;
  type: 'file' | 'directory';
  name: string;       // Filename only
  extension?: string; // File extension (without dot)
}

interface PaginationInfo {
  offset: number;     // Starting index of returned results
  limit: number;      // Number of results requested
  total: number;      // Total number of results found
  has_more: boolean;  // True if more results are available
}

interface SearchMetadata {
  mode?: string;
  backend?: string;
  count?: number;
  query?: string;
  pattern?: string;  // For find_files action
  classified_as?: string;
  confidence?: number;
  reasoning?: string;
  embeddings_coverage_percent?: number;
  warning?: string;
  note?: string;
  index_status?: 'indexed' | 'not_indexed' | 'partial';
  fallback?: string;  // Fallback mode used (e.g., 'fuzzy')
  fallback_history?: string[];
  suggested_weights?: Record<string, number>;
  // Tokenization metadata (ripgrep mode)
  tokens?: string[];   // Query tokens used for multi-word search
  tokenized?: boolean; // Whether tokenization was applied
  // Pagination metadata
  pagination?: PaginationInfo;
  // Performance timing data (when SMART_SEARCH_TIMING=1 or DEBUG includes 'timing')
  timing?: TimingData;
  // Init action specific
  action?: string;
  path?: string;
  progress?: {
    stage: string;
    message: string;
    percent: number;
    filesProcessed?: number;
    totalFiles?: number;
  };
  progressHistory?: ProgressInfo[];
}

interface SearchResult {
  success: boolean;
  results?: ExactMatch[] | SemanticMatch[] | GraphMatch[] | FileMatch[] | unknown;
  extra_files?: string[];  // Additional file paths without content
  output?: string;
  metadata?: SearchMetadata;
  error?: string;
  status?: unknown;
  message?: string;
}

interface ModelInfo {
  model_profile?: string;
  model_name?: string;
  embedding_dim?: number;
  backend?: string;
  created_at?: string;
  updated_at?: string;
}

interface IndexStatus {
  indexed: boolean;
  has_embeddings: boolean;
  file_count?: number;
  embeddings_coverage_percent?: number;
  total_chunks?: number;
  model_info?: ModelInfo | null;
  warning?: string;
}

/**
 * Strip ANSI color codes from string (for JSON parsing)
 */
function stripAnsi(str: string): string {
  return str.replace(/\x1b\[[0-9;]*m/g, '');
}

/** Default maximum content length to return (avoid excessive output) */
const DEFAULT_MAX_CONTENT_LENGTH = 200;

/**
 * Truncate content to specified length with ellipsis
 * @param content - The content to truncate
 * @param maxLength - Maximum length (default: 200)
 */
function truncateContent(content: string | null | undefined, maxLength: number = DEFAULT_MAX_CONTENT_LENGTH): string {
  if (!content) return '';
  if (content.length <= maxLength) return content;
  return content.slice(0, maxLength) + '...';
}

/**
 * Split results into full content results and extra file-only results
 * Generic function supporting both SemanticMatch and ExactMatch types
 * @param allResults - All search results (must have 'file' property)
 * @param fullContentLimit - Number of results with full content (default: 5)
 * @param extraFilesCount - Number of additional file-only results (default: 10)
 */
function splitResultsWithExtraFiles<T extends { file: string }>(
  allResults: T[],
  fullContentLimit: number = 5,
  extraFilesCount: number = 10
): { results: T[]; extra_files: string[] } {
  // First N results with full content
  const results = allResults.slice(0, fullContentLimit);

  // Next M results as file paths only (deduplicated)
  const extraResults = allResults.slice(fullContentLimit, fullContentLimit + extraFilesCount);
  const extra_files = [...new Set(extraResults.map(r => r.file))];

  return { results, extra_files };
}

/**
 * Check if CodexLens index exists for current directory
 * @param path - Directory path to check
 * @returns Index status
 */
async function checkIndexStatus(path: string = '.'): Promise<IndexStatus> {
  try {
    const result = await executeCodexLens(['status', '--json'], { cwd: path });

    if (!result.success) {
      return {
        indexed: false,
        has_embeddings: false,
        warning: 'No CodexLens index found. Run smart_search(action="init") to create index for better search results.',
      };
    }

    // Parse status output
    try {
      // Strip ANSI color codes from JSON output
      const cleanOutput = stripAnsi(result.output || '{}');
      const parsed = JSON.parse(cleanOutput);
      // Handle both direct and nested response formats (status returns {success, result: {...}})
      const status = parsed.result || parsed;
      const indexed = status.projects_count > 0 || status.total_files > 0;

      // Get embeddings coverage from comprehensive status
      const embeddingsData = status.embeddings || {};
      const embeddingsCoverage = embeddingsData.coverage_percent || 0;
      const has_embeddings = embeddingsCoverage >= 50; // Threshold: 50%
      const totalChunks = embeddingsData.total_chunks || 0;

      // Extract model info if available
      const modelInfoData = embeddingsData.model_info;
      const modelInfo: ModelInfo | undefined = modelInfoData ? {
        model_profile: modelInfoData.model_profile,
        model_name: modelInfoData.model_name,
        embedding_dim: modelInfoData.embedding_dim,
        backend: modelInfoData.backend,
        created_at: modelInfoData.created_at,
        updated_at: modelInfoData.updated_at,
      } : undefined;

      let warning: string | undefined;
      if (!indexed) {
        warning = 'No CodexLens index found. Run smart_search(action="init") to create index for better search results.';
      } else if (embeddingsCoverage === 0) {
        warning = 'Index exists but no embeddings generated. Run: codexlens embeddings-generate --recursive';
      } else if (embeddingsCoverage < 50) {
        warning = `Embeddings coverage is ${embeddingsCoverage.toFixed(1)}% (below 50%). Hybrid search will use exact mode. Run: codexlens embeddings-generate --recursive`;
      }

      return {
        indexed,
        has_embeddings,
        file_count: status.total_files,
        embeddings_coverage_percent: embeddingsCoverage,
        total_chunks: totalChunks,
        // Ensure model_info is null instead of undefined so it's included in JSON
        model_info: modelInfo ?? null,
        warning,
      };
    } catch {
      return {
        indexed: false,
        has_embeddings: false,
        warning: 'Failed to parse index status',
      };
    }
  } catch {
    return {
      indexed: false,
      has_embeddings: false,
      warning: 'CodexLens not available',
    };
  }
}

/**
 * Detection heuristics for intent classification
 */

/**
 * Detect literal string query (simple alphanumeric or quoted strings)
 */
function detectLiteral(query: string): boolean {
  return /^[a-zA-Z0-9_-]+$/.test(query) || /^["'].*["']$/.test(query);
}

/**
 * Detect regex pattern (contains regex metacharacters)
 */
function detectRegex(query: string): boolean {
  return /[.*+?^${}()|[\]\\]/.test(query);
}

/**
 * Detect natural language query (sentence structure, questions, multi-word phrases)
 */
function detectNaturalLanguage(query: string): boolean {
  return query.split(/\s+/).length >= 3 || /\?$/.test(query);
}

/**
 * Detect file path query (path separators, file extensions)
 */
function detectFilePath(query: string): boolean {
  return /[/\\]/.test(query) || /\.[a-z]{2,4}$/i.test(query);
}

/**
 * Detect relationship query (import, export, dependency keywords)
 */
function detectRelationship(query: string): boolean {
  return /(import|export|uses?|depends?|calls?|extends?)\s/i.test(query);
}

function looksLikeCodeQuery(query: string): boolean {
  if (/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(query)) return true;
  if (/[:.<>\-=(){}[\]]/.test(query) && query.split(/\s+/).length <= 2) return true;
  if (/\.\*|\\\(|\\\[|\\s/.test(query)) return true;
  if (/^[a-zA-Z_][a-zA-Z0-9_]*\.[a-zA-Z_][a-zA-Z0-9_]*$/.test(query)) return true;
  return false;
}

/**
 * Classify query intent and recommend search mode
 * Simple mapping: hybrid (NL + index + embeddings) | exact (index or insufficient embeddings) | ripgrep (no index)
 * @param query - Search query string
 * @param hasIndex - Whether CodexLens index exists
 * @param hasSufficientEmbeddings - Whether embeddings coverage >= 50%
 * @returns Classification result
 */
function classifyIntent(query: string, hasIndex: boolean = false, hasSufficientEmbeddings: boolean = false): Classification {
  const isNaturalLanguage = detectNaturalLanguage(query);
  const isCodeQuery = looksLikeCodeQuery(query);
  const isRegexPattern = detectRegex(query);

  let mode: string;
  let confidence: number;

  if (!hasIndex) {
    mode = 'ripgrep';
    confidence = 1.0;
  } else if (isCodeQuery || isRegexPattern) {
    mode = 'exact';
    confidence = 0.95;
  } else if (isNaturalLanguage && hasSufficientEmbeddings) {
    mode = 'hybrid';
    confidence = 0.9;
  } else {
    mode = 'exact';
    confidence = 0.8;
  }

  const detectedPatterns: string[] = [];
  if (detectLiteral(query)) detectedPatterns.push('literal');
  if (detectRegex(query)) detectedPatterns.push('regex');
  if (detectNaturalLanguage(query)) detectedPatterns.push('natural language');
  if (detectFilePath(query)) detectedPatterns.push('file path');
  if (detectRelationship(query)) detectedPatterns.push('relationship');
  if (isCodeQuery) detectedPatterns.push('code identifier');

  const reasoning = `Query classified as ${mode} (confidence: ${confidence.toFixed(2)}, detected: ${detectedPatterns.join(', ')}, index: ${hasIndex ? 'available' : 'not available'}, embeddings: ${hasSufficientEmbeddings ? 'sufficient' : 'insufficient'})`;

  return { mode, confidence, reasoning };
}

/**
 * Check if a tool is available in PATH
 * @param toolName - Tool executable name
 * @returns True if available
 */
function checkToolAvailability(toolName: string): boolean {
  try {
    const isWindows = process.platform === 'win32';
    const command = isWindows ? 'where' : 'which';
    execSync(`${command} ${toolName}`, { stdio: 'ignore' });
    return true;
  } catch {
    return false;
  }
}

/**
 * Build ripgrep command arguments
 * Supports tokenized multi-word queries with OR matching
 * @param params - Search parameters
 * @returns Command, arguments, and tokens used
 */
function buildRipgrepCommand(params: {
  query: string;
  paths: string[];
  contextLines: number;
  maxResults: number;
  includeHidden: boolean;
  regex?: boolean;
  caseSensitive?: boolean;
  tokenize?: boolean;
}): { command: string; args: string[]; tokens: string[] } {
  const { query, paths = ['.'], contextLines = 0, maxResults = 10, includeHidden = false, regex = false, caseSensitive = true, tokenize = true } = params;

  const args = [
    '-n',
    '--color=never',
    '--json',
  ];

  // Add file filtering (unless includeHidden is true)
  if (!includeHidden) {
    args.push(...buildExcludeArgs());
  }

  // Case sensitivity
  if (!caseSensitive) {
    args.push('--ignore-case');
  }

  if (contextLines > 0) {
    args.push('-C', contextLines.toString());
  }

  if (maxResults > 0) {
    args.push('--max-count', maxResults.toString());
  }

  if (includeHidden) {
    args.push('--hidden');
  }

  // Tokenize query for multi-word OR matching
  const tokens = tokenize ? tokenizeQuery(query) : [query];

  if (tokens.length > 1) {
    // Multi-token: use multiple -e patterns (OR matching)
    // Each token is escaped for regex safety unless regex mode is enabled
    for (const token of tokens) {
      if (regex) {
        args.push('-e', token);
      } else {
        // Escape regex special chars for literal matching
        const escaped = token.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
        args.push('-e', escaped);
      }
    }
  } else {
    // Single token or no tokenization: use original behavior
    if (regex) {
      args.push('-e', query);
    } else {
      args.push('-F', query);
    }
  }

  args.push(...paths);

  return { command: 'rg', args, tokens };
}

/**
 * Action: init - Initialize CodexLens index (FTS only, no embeddings)
 * For semantic/vector search, use ccw view dashboard or codexlens CLI directly
 */
async function executeInitAction(params: Params): Promise<SearchResult> {
  const { path = '.', languages } = params;

  // Check CodexLens availability
  const readyStatus = await ensureCodexLensReady();
  if (!readyStatus.ready) {
    return {
      success: false,
      error: `CodexLens not available: ${readyStatus.error}. CodexLens will be auto-installed on first use.`,
    };
  }

  // Build args with --no-embeddings for FTS-only index (faster)
  // Use 'index init' subcommand (new CLI structure)
  const args = ['index', 'init', path, '--no-embeddings'];
  if (languages && languages.length > 0) {
    args.push('--language', languages.join(','));
  }

  // Track progress updates
  const progressUpdates: ProgressInfo[] = [];
  let lastProgress: ProgressInfo | null = null;

  const result = await executeCodexLens(args, {
    cwd: path,
    timeout: 1800000, // 30 minutes for large codebases
    onProgress: (progress: ProgressInfo) => {
      progressUpdates.push(progress);
      lastProgress = progress;
    },
  });

  // Build metadata with progress info
  const metadata: SearchMetadata = {
    action: 'init',
    path,
  };

  if (lastProgress !== null) {
    const p = lastProgress as ProgressInfo;
    metadata.progress = {
      stage: p.stage,
      message: p.message,
      percent: p.percent,
      filesProcessed: p.filesProcessed,
      totalFiles: p.totalFiles,
    };
  }

  if (progressUpdates.length > 0) {
    metadata.progressHistory = progressUpdates.slice(-5); // Keep last 5 progress updates
  }

  const successMessage = result.success
    ? `FTS index created for ${path}. Note: For semantic/vector search, create vector index via "ccw view" dashboard or run "codexlens init ${path}" (without --no-embeddings).`
    : undefined;

  return {
    success: result.success,
    error: result.error,
    message: successMessage,
    metadata,
  };
}

/**
 * Action: status - Check CodexLens index status
 */
async function executeStatusAction(params: Params): Promise<SearchResult> {
  const { path = '.' } = params;

  const indexStatus = await checkIndexStatus(path);

  return {
    success: true,
    status: indexStatus,
    message: indexStatus.warning || `Index status: ${indexStatus.indexed ? 'indexed' : 'not indexed'}, embeddings: ${indexStatus.has_embeddings ? 'available' : 'not available'}`,
  };
}

/**
 * Action: update - Incremental index update
 * Updates index for changed files without full rebuild
 */
async function executeUpdateAction(params: Params): Promise<SearchResult> {
  const { path = '.', languages } = params;

  // Check CodexLens availability
  const readyStatus = await ensureCodexLensReady();
  if (!readyStatus.ready) {
    return {
      success: false,
      error: `CodexLens not available: ${readyStatus.error}`,
    };
  }

  // Check if index exists first
  const indexStatus = await checkIndexStatus(path);
  if (!indexStatus.indexed) {
    return {
      success: false,
      error: `Directory not indexed. Run smart_search(action="init") first.`,
    };
  }

  // Build args for incremental init (without --force)
  // Use 'index init' subcommand (new CLI structure)
  const args = ['index', 'init', path];
  if (languages && languages.length > 0) {
    args.push('--language', languages.join(','));
  }

  // Track progress updates
  const progressUpdates: ProgressInfo[] = [];
  let lastProgress: ProgressInfo | null = null;

  const result = await executeCodexLens(args, {
    cwd: path,
    timeout: 600000, // 10 minutes for incremental updates
    onProgress: (progress: ProgressInfo) => {
      progressUpdates.push(progress);
      lastProgress = progress;
    },
  });

  // Build metadata with progress info
  const metadata: SearchMetadata = {
    action: 'update',
    path,
  };

  if (lastProgress !== null) {
    const p = lastProgress as ProgressInfo;
    metadata.progress = {
      stage: p.stage,
      message: p.message,
      percent: p.percent,
      filesProcessed: p.filesProcessed,
      totalFiles: p.totalFiles,
    };
  }

  if (progressUpdates.length > 0) {
    metadata.progressHistory = progressUpdates.slice(-5);
  }

  return {
    success: result.success,
    error: result.error,
    message: result.success
      ? `Incremental update completed for ${path}`
      : undefined,
    metadata,
  };
}

/**
 * Action: watch - Start file watcher for automatic incremental updates
 * Note: This starts a background process, returns immediately with status
 */
async function executeWatchAction(params: Params): Promise<SearchResult> {
  const { path = '.', languages, debounce = 1000 } = params;

  // Check CodexLens availability
  const readyStatus = await ensureCodexLensReady();
  if (!readyStatus.ready) {
    return {
      success: false,
      error: `CodexLens not available: ${readyStatus.error}`,
    };
  }

  // Check if index exists first
  const indexStatus = await checkIndexStatus(path);
  if (!indexStatus.indexed) {
    return {
      success: false,
      error: `Directory not indexed. Run smart_search(action="init") first.`,
    };
  }

  // Build args for watch command
  const args = ['watch', path, '--debounce', debounce.toString()];
  if (languages && languages.length > 0) {
    args.push('--language', languages.join(','));
  }

  // Start watcher in background (non-blocking)
  // Note: The watcher runs until manually stopped
  const result = await executeCodexLens(args, {
    cwd: path,
    timeout: 5000, // Short timeout for initial startup check
  });

  return {
    success: true,
    message: `File watcher started for ${path}. Use Ctrl+C or kill the process to stop.`,
    metadata: {
      action: 'watch',
      path,
      note: 'Watcher runs in background. Changes are indexed automatically with debounce.',
    },
  };
}

/**
 * Mode: fuzzy - FTS + ripgrep fusion with RRF ranking
 * Runs both exact (FTS) and ripgrep searches in parallel, merges and ranks results
 */
async function executeFuzzyMode(params: Params): Promise<SearchResult> {
  const { query, path = '.', maxResults = 5, extraFilesCount = 10 } = params;

  if (!query) {
    return {
      success: false,
      error: 'Query is required for search',
    };
  }

  const timer = createTimer();

  // Run both searches in parallel
  const [ftsResult, ripgrepResult] = await Promise.allSettled([
    executeCodexLensExactMode(params),
    executeRipgrepMode(params),
  ]);
  timer.mark('parallel_search');

  // Collect results from both sources
  const resultsMap = new Map<string, any[]>();

  // Add FTS results if successful
  if (ftsResult.status === 'fulfilled' && ftsResult.value.success && ftsResult.value.results) {
    resultsMap.set('exact', ftsResult.value.results as any[]);
  }

  // Add ripgrep results if successful
  if (ripgrepResult.status === 'fulfilled' && ripgrepResult.value.success && ripgrepResult.value.results) {
    resultsMap.set('ripgrep', ripgrepResult.value.results as any[]);
  }

  // If both failed, return error
  if (resultsMap.size === 0) {
    const errors: string[] = [];
    if (ftsResult.status === 'rejected') errors.push(`FTS: ${ftsResult.reason}`);
    if (ripgrepResult.status === 'rejected') errors.push(`Ripgrep: ${ripgrepResult.reason}`);
    return {
      success: false,
      error: `Both search backends failed: ${errors.join('; ')}`,
    };
  }

  // Apply RRF fusion with fuzzy-optimized weights
  // Fuzzy mode: balanced between exact and ripgrep
  const fusionWeights = { exact: 0.5, ripgrep: 0.5 };
  const totalToFetch = maxResults + extraFilesCount;
  const fusedResults = applyRRFFusion(resultsMap, fusionWeights, totalToFetch);
  timer.mark('rrf_fusion');

  // Normalize results format
  const normalizedResults = fusedResults.map((item: any) => ({
    file: item.file || item.path,
    line: item.line || 0,
    column: item.column || 0,
    content: item.content || '',
    score: item.fusion_score || 0,
    matchCount: item.matchCount,
    matchScore: item.matchScore,
  }));

  // Split results: first N with full content, rest as file paths only
  const { results, extra_files } = splitResultsWithExtraFiles(normalizedResults, maxResults, extraFilesCount);

  // Log timing
  timer.log();
  const timings = timer.getTimings();

  return {
    success: true,
    results,
    extra_files: extra_files.length > 0 ? extra_files : undefined,
    metadata: {
      mode: 'fuzzy',
      backend: 'fts+ripgrep',
      count: results.length,
      query,
      note: `Fuzzy search using RRF fusion of FTS and ripgrep (weights: exact=${fusionWeights.exact}, ripgrep=${fusionWeights.ripgrep})`,
      timing: TIMING_ENABLED ? timings : undefined,
    },
  };
}

/**
 * Mode: auto - Intent classification and mode selection
 * Routes to: hybrid (NL + index) | exact (index) | ripgrep (no index)
 */
async function executeAutoMode(params: Params): Promise<SearchResult> {
  const { query, path = '.' } = params;

  if (!query) {
    return {
      success: false,
      error: 'Query is required for search action',
    };
  }

  // Check index status
  const indexStatus = await checkIndexStatus(path);

  // Classify intent with index and embeddings awareness
  const classification = classifyIntent(
    query,
    indexStatus.indexed,
    indexStatus.has_embeddings  // This now considers 50% threshold
  );

  // Route to appropriate mode based on classification
  let result: SearchResult;

  switch (classification.mode) {
    case 'hybrid':
      result = await executeHybridMode(params);
      break;

    case 'exact':
      result = await executeCodexLensExactMode(params);
      break;

    case 'ripgrep':
      result = await executeRipgrepMode(params);
      break;

    default:
      // Fallback to ripgrep
      result = await executeRipgrepMode(params);
      break;
  }

  // Add classification metadata
  if (result.metadata) {
    result.metadata.classified_as = classification.mode;
    result.metadata.confidence = classification.confidence;
    result.metadata.reasoning = classification.reasoning;
    result.metadata.embeddings_coverage_percent = indexStatus.embeddings_coverage_percent;
    result.metadata.index_status = indexStatus.indexed
      ? (indexStatus.has_embeddings ? 'indexed' : 'partial')
      : 'not_indexed';

    // Add warning if needed
    if (indexStatus.warning) {
      result.metadata.warning = indexStatus.warning;
    }
  }

  return result;
}

/**
 * Mode: ripgrep - Fast literal string matching using ripgrep
 * No index required, fallback to CodexLens if ripgrep unavailable
 * Supports tokenized multi-word queries with OR matching and result ranking
 */
async function executeRipgrepMode(params: Params): Promise<SearchResult> {
  const { query, paths = [], contextLines = 0, maxResults = 5, extraFilesCount = 10, maxContentLength = 200, includeHidden = false, path = '.', regex = true, caseSensitive = true, tokenize = true } = params;

  if (!query) {
    return {
      success: false,
      error: 'Query is required for search',
    };
  }

  // Check if ripgrep is available
  const hasRipgrep = checkToolAvailability('rg');

  // Calculate total to fetch for split (full content + extra files)
  const totalToFetch = maxResults + extraFilesCount;

  // If ripgrep not available, fall back to CodexLens exact mode
  if (!hasRipgrep) {
    const readyStatus = await ensureCodexLensReady();
    if (!readyStatus.ready) {
      return {
        success: false,
        error: 'Neither ripgrep nor CodexLens available. Install ripgrep (rg) or CodexLens for search functionality.',
      };
    }

    // Use CodexLens fts mode as fallback
    const args = ['search', query, '--limit', totalToFetch.toString(), '--method', 'fts', '--json'];
    const result = await executeCodexLens(args, { cwd: path });

    if (!result.success) {
      return {
        success: false,
        error: result.error,
        metadata: {
          mode: 'ripgrep',
          backend: 'codexlens-fallback',
          count: 0,
          query,
        },
      };
    }

    // Parse results
    let allResults: SemanticMatch[] = [];
    try {
      const parsed = JSON.parse(stripAnsi(result.output || '{}'));
      const data = parsed.result?.results || parsed.results || parsed;
      allResults = (Array.isArray(data) ? data : []).map((item: any) => ({
        file: item.path || item.file,
        score: item.score || 0,
        content: truncateContent(item.content || item.excerpt, maxContentLength),
        symbol: item.symbol || null,
      }));
    } catch {
      // Keep empty results
    }

    // Split results: first N with full content, rest as file paths only
    const { results, extra_files } = splitResultsWithExtraFiles(allResults, maxResults, extraFilesCount);

    return {
      success: true,
      results,
      extra_files: extra_files.length > 0 ? extra_files : undefined,
      metadata: {
        mode: 'ripgrep',
        backend: 'codexlens-fallback',
        count: results.length,
        query,
        note: 'Using CodexLens exact mode (ripgrep not available)',
      },
    };
  }

  // Use ripgrep - request more results to support split
  const { command, args, tokens } = buildRipgrepCommand({
    query,
    paths: paths.length > 0 ? paths : [path],
    contextLines,
    maxResults: totalToFetch,  // Fetch more to support split
    includeHidden,
    regex,
    caseSensitive,
    tokenize,
  });

  return new Promise((resolve) => {
    const child = spawn(command, args, {
      cwd: path || getProjectRoot(),
      stdio: ['ignore', 'pipe', 'pipe'],
    });

    let stdout = '';
    let stderr = '';
    let resultLimitReached = false;

    child.stdout.on('data', (data) => {
      stdout += data.toString();
    });

    child.stderr.on('data', (data) => {
      stderr += data.toString();
    });

    child.on('close', (code) => {
      const allResults: ExactMatch[] = [];
      const lines = stdout.split('\n').filter((line) => line.trim());
      // Limit total results to prevent memory overflow (--max-count only limits per-file)
      const effectiveLimit = totalToFetch > 0 ? totalToFetch : 500;

      for (const line of lines) {
        // Stop collecting if we've reached the limit
        if (allResults.length >= effectiveLimit) {
          resultLimitReached = true;
          break;
        }

        try {
          const item = JSON.parse(line);

          if (item.type === 'match') {
            const match: ExactMatch = {
              file: item.data.path.text,
              line: item.data.line_number,
              column:
                item.data.submatches && item.data.submatches[0]
                  ? item.data.submatches[0].start + 1
                  : 1,
              content: item.data.lines.text.trim(),
            };
            allResults.push(match);
          }
        } catch {
          continue;
        }
      }

      // Handle Windows device file errors gracefully (os error 1)
      // If we have results despite the error, return them as partial success
      const isWindowsDeviceError = stderr.includes('os error 1') || stderr.includes('函数不正确');

      // Apply token-based scoring and sorting for multi-word queries
      // Results matching more tokens are ranked higher (exact matches first)
      const scoredResults = tokens.length > 1 ? scoreByTokenMatch(allResults, tokens) : allResults;

      if (code === 0 || code === 1 || (isWindowsDeviceError && scoredResults.length > 0)) {
        // Split results: first N with full content, rest as file paths only
        const { results, extra_files } = splitResultsWithExtraFiles(scoredResults, maxResults, extraFilesCount);

        // Build warning message for various conditions
        const warnings: string[] = [];
        if (resultLimitReached) {
          warnings.push(`Result limit reached (${effectiveLimit}). Use a more specific query or increase limit.`);
        }
        if (isWindowsDeviceError) {
          warnings.push('Some Windows device files were skipped');
        }

        resolve({
          success: true,
          results,
          extra_files: extra_files.length > 0 ? extra_files : undefined,
          metadata: {
            mode: 'ripgrep',
            backend: 'ripgrep',
            count: results.length,
            query,
            tokens: tokens.length > 1 ? tokens : undefined,  // Include tokens in metadata for debugging
            tokenized: tokens.length > 1,
            ...(warnings.length > 0 && { warning: warnings.join('; ') }),
          },
        });
      } else if (isWindowsDeviceError && allResults.length === 0) {
        // Windows device error but no results - might be the only issue
        resolve({
          success: true,
          results: [],
          metadata: {
            mode: 'ripgrep',
            backend: 'ripgrep',
            count: 0,
            query,
            warning: 'No matches found (some Windows device files were skipped)',
          },
        });
      } else {
        resolve({
          success: false,
          error: `ripgrep execution failed with code ${code}: ${stderr}`,
          results: [],
        });
      }
    });

    child.on('error', (error) => {
      resolve({
        success: false,
        error: `Failed to spawn ripgrep: ${error.message}`,
        results: [],
      });
    });
  });
}

/**
 * Mode: exact - CodexLens exact/FTS search
 * Requires index
 */
async function executeCodexLensExactMode(params: Params): Promise<SearchResult> {
  const { query, path = '.', maxResults = 5, extraFilesCount = 10, maxContentLength = 200, enrich = false, excludeExtensions, codeOnly = false, offset = 0 } = params;

  if (!query) {
    return {
      success: false,
      error: 'Query is required for search',
    };
  }

  // Check CodexLens availability
  const readyStatus = await ensureCodexLensReady();
  if (!readyStatus.ready) {
    return {
      success: false,
      error: `CodexLens not available: ${readyStatus.error}`,
    };
  }

  // Check index status
  const indexStatus = await checkIndexStatus(path);

  // Request more results to support split (full content + extra files)
  const totalToFetch = maxResults + extraFilesCount;
  const args = ['search', query, '--limit', totalToFetch.toString(), '--offset', offset.toString(), '--method', 'fts', '--json'];
  if (enrich) {
    args.push('--enrich');
  }
  // Add code_only filter if requested
  if (codeOnly) {
    args.push('--code-only');
  }
  // Add exclude_extensions filter if provided
  if (excludeExtensions && excludeExtensions.length > 0) {
    args.push('--exclude-extensions', excludeExtensions.join(','));
  }
  const result = await executeCodexLens(args, { cwd: path });

  if (!result.success) {
    return {
      success: false,
      error: result.error,
      metadata: {
        mode: 'exact',
        backend: 'codexlens',
        count: 0,
        query,
        warning: indexStatus.warning,
      },
    };
  }

  // Parse results
  let allResults: SemanticMatch[] = [];
  try {
    const parsed = JSON.parse(stripAnsi(result.output || '{}'));
    const data = parsed.result?.results || parsed.results || parsed;
    allResults = (Array.isArray(data) ? data : []).map((item: any) => ({
      file: item.path || item.file,
      score: item.score || 0,
      content: truncateContent(item.content || item.excerpt, maxContentLength),
      symbol: item.symbol || null,
    }));
  } catch {
    // Keep empty results
  }

  // Fallback to fuzzy mode if exact returns no results
  if (allResults.length === 0) {
    const fuzzyArgs = ['search', query, '--limit', totalToFetch.toString(), '--offset', offset.toString(), '--method', 'fts', '--use-fuzzy', '--json'];
    if (enrich) {
      fuzzyArgs.push('--enrich');
    }
    // Add code_only filter if requested
    if (codeOnly) {
      fuzzyArgs.push('--code-only');
    }
    // Add exclude_extensions filter if provided
    if (excludeExtensions && excludeExtensions.length > 0) {
      fuzzyArgs.push('--exclude-extensions', excludeExtensions.join(','));
    }
    const fuzzyResult = await executeCodexLens(fuzzyArgs, { cwd: path });

    if (fuzzyResult.success) {
      try {
        const parsed = JSON.parse(stripAnsi(fuzzyResult.output || '{}'));
        const data = parsed.result?.results || parsed.results || parsed;
        allResults = (Array.isArray(data) ? data : []).map((item: any) => ({
          file: item.path || item.file,
          score: item.score || 0,
          content: truncateContent(item.content || item.excerpt, maxContentLength),
          symbol: item.symbol || null,
        }));
      } catch {
        // Keep empty results
      }

      if (allResults.length > 0) {
        // Split results: first N with full content, rest as file paths only
        const { results, extra_files } = splitResultsWithExtraFiles(allResults, maxResults, extraFilesCount);
        return {
          success: true,
          results,
          extra_files: extra_files.length > 0 ? extra_files : undefined,
          metadata: {
            mode: 'exact',
            backend: 'codexlens',
            count: results.length,
            query,
            warning: indexStatus.warning,
            note: 'No exact matches found, showing fuzzy results',
            fallback: 'fuzzy',
          },
        };
      }
    }
  }

  // Split results: first N with full content, rest as file paths only
  const { results, extra_files } = splitResultsWithExtraFiles(allResults, maxResults, extraFilesCount);

  return {
    success: true,
    results,
    extra_files: extra_files.length > 0 ? extra_files : undefined,
    metadata: {
      mode: 'exact',
      backend: 'codexlens',
      count: results.length,
      query,
      warning: indexStatus.warning,
    },
  };
}

/**
 * Mode: hybrid - Best quality semantic search
 * Uses CodexLens dense_rerank method (dense coarse + cross-encoder rerank)
 * Requires index with embeddings
 */
async function executeHybridMode(params: Params): Promise<SearchResult> {
  const timer = createTimer();
  const { query, path = '.', maxResults = 5, extraFilesCount = 10, maxContentLength = 200, enrich = false, excludeExtensions, codeOnly = false, offset = 0 } = params;

  if (!query) {
    return {
      success: false,
      error: 'Query is required for search',
    };
  }

  // Check CodexLens availability
  const readyStatus = await ensureCodexLensReady();
  timer.mark('codexlens_ready_check');
  if (!readyStatus.ready) {
    return {
      success: false,
      error: `CodexLens not available: ${readyStatus.error}`,
    };
  }

  // Check index status
  const indexStatus = await checkIndexStatus(path);
  timer.mark('index_status_check');

  // Request more results to support split (full content + extra files)
  const totalToFetch = maxResults + extraFilesCount;
  const args = ['search', query, '--limit', totalToFetch.toString(), '--offset', offset.toString(), '--method', 'dense_rerank', '--json'];
  if (enrich) {
    args.push('--enrich');
  }
  // Add code_only filter if requested
  if (codeOnly) {
    args.push('--code-only');
  }
  // Add exclude_extensions filter if provided
  if (excludeExtensions && excludeExtensions.length > 0) {
    args.push('--exclude-extensions', excludeExtensions.join(','));
  }
  const result = await executeCodexLens(args, { cwd: path });
  timer.mark('codexlens_search');

  if (!result.success) {
    timer.log();
    return {
      success: false,
      error: result.error,
      metadata: {
        mode: 'hybrid',
        backend: 'codexlens',
        count: 0,
        query,
        warning: indexStatus.warning,
      },
    };
  }

  // Parse results
  let allResults: SemanticMatch[] = [];
  let baselineInfo: { score: number; count: number } | null = null;
  let initialCount = 0;

  try {
    const parsed = JSON.parse(stripAnsi(result.output || '{}'));
    const data = parsed.result?.results || parsed.results || parsed;
    allResults = (Array.isArray(data) ? data : []).map((item: any) => {
      const rawScore = item.score || 0;
      // Hybrid mode returns distance scores (lower is better).
      // Convert to similarity scores (higher is better) for consistency.
      // Formula: similarity = 1 / (1 + distance)
      const similarityScore = rawScore > 0 ? 1 / (1 + rawScore) : 1;
      return {
        file: item.path || item.file,
        score: similarityScore,
        content: truncateContent(item.content || item.excerpt, maxContentLength),
        symbol: item.symbol || null,
      };
    });
    timer.mark('parse_results');

    initialCount = allResults.length;

    // Post-processing pipeline to improve semantic search quality
    // 0. Filter dominant baseline scores (hot spot detection)
    const baselineResult = filterDominantBaselineScores(allResults);
    allResults = baselineResult.filteredResults;
    baselineInfo = baselineResult.baselineInfo;

    // 1. Filter noisy files (coverage, node_modules, etc.) and excluded extensions
    allResults = filterNoisyFiles(allResults, { excludeExtensions, codeOnly });
    // 2. Boost results containing query keywords
    allResults = applyKeywordBoosting(allResults, query);
    // 3. Enforce score diversity (penalize identical scores)
    allResults = enforceScoreDiversity(allResults);
    // 4. Re-sort by adjusted scores
    allResults.sort((a, b) => b.score - a.score);
    timer.mark('post_processing');
  } catch {
    return {
      success: true,
      results: [],
      output: result.output,
      metadata: {
        mode: 'hybrid',
        backend: 'codexlens',
        count: 0,
        query,
        warning: indexStatus.warning || 'Failed to parse JSON output',
      },
    };
  }

  // Split results: first N with full content, rest as file paths only
  const { results, extra_files } = splitResultsWithExtraFiles(allResults, maxResults, extraFilesCount);
  timer.mark('split_results');

  // Build metadata with baseline info if detected
  let note = 'Using dense_rerank (dense coarse + cross-encoder rerank) for semantic search';
  if (baselineInfo) {
    note += ` | Filtered ${initialCount - allResults.length} hot-spot results with baseline score ~${baselineInfo.score.toFixed(4)}`;
  }

  // Log timing data
  timer.log();
  const timings = timer.getTimings();

  return {
    success: true,
    results,
    extra_files: extra_files.length > 0 ? extra_files : undefined,
    metadata: {
      mode: 'hybrid',
      backend: 'codexlens',
      count: results.length,
      query,
      note,
      warning: indexStatus.warning,
      suggested_weights: getRRFWeights(query),
      timing: TIMING_ENABLED ? timings : undefined,
    },
  };
}

/**
 * Query intent used to adapt RRF weights (Python parity).
 *
 * Keep this logic aligned with CodexLens Python hybrid search:
 * `codex-lens/src/codexlens/search/hybrid_search.py`
 */
export type QueryIntent = 'keyword' | 'semantic' | 'mixed';

// Python default: vector 60%, exact 30%, fuzzy 10%
const DEFAULT_RRF_WEIGHTS = {
  exact: 0.3,
  fuzzy: 0.1,
  vector: 0.6,
} as const;

function normalizeWeights(weights: Record<string, number>): Record<string, number> {
  const sum = Object.values(weights).reduce((acc, v) => acc + v, 0);
  if (!Number.isFinite(sum) || sum <= 0) return { ...weights };
  return Object.fromEntries(Object.entries(weights).map(([k, v]) => [k, v / sum]));
}

/**
 * Detect query intent using the same heuristic signals as Python:
 * - Code patterns: `.`, `::`, `->`, CamelCase, snake_case, common code keywords
 * - Natural language patterns: >5 words, question marks, interrogatives, common verbs
 */
export function detectQueryIntent(query: string): QueryIntent {
  const trimmed = query.trim();
  if (!trimmed) return 'mixed';

  const lower = trimmed.toLowerCase();
  const wordCount = trimmed.split(/\s+/).filter(Boolean).length;

  const hasCodeSignals =
    /(::|->|\.)/.test(trimmed) ||
    /[A-Z][a-z]+[A-Z]/.test(trimmed) ||
    /\b\w+_\w+\b/.test(trimmed) ||
    /\b(def|class|function|const|let|var|import|from|return|async|await|interface|type)\b/i.test(lower);

  const hasNaturalSignals =
    wordCount > 5 ||
    /\?/.test(trimmed) ||
    /\b(how|what|why|when|where)\b/i.test(trimmed) ||
    /\b(handle|explain|fix|implement|create|build|use|find|search|convert|parse|generate|support)\b/i.test(trimmed);

  if (hasCodeSignals && hasNaturalSignals) return 'mixed';
  if (hasCodeSignals) return 'keyword';
  if (hasNaturalSignals) return 'semantic';
  return 'mixed';
}

/**
 * Intent → weights mapping (Python parity).
 * - keyword: exact-heavy
 * - semantic: vector-heavy
 * - mixed: keep defaults
 */
export function adjustWeightsByIntent(
  intent: QueryIntent,
  baseWeights: Record<string, number>,
): Record<string, number> {
  if (intent === 'keyword') return normalizeWeights({ exact: 0.5, fuzzy: 0.1, vector: 0.4 });
  if (intent === 'semantic') return normalizeWeights({ exact: 0.2, fuzzy: 0.1, vector: 0.7 });
  return normalizeWeights({ ...baseWeights });
}

export function getRRFWeights(
  query: string,
  baseWeights: Record<string, number> = DEFAULT_RRF_WEIGHTS,
): Record<string, number> {
  return adjustWeightsByIntent(detectQueryIntent(query), baseWeights);
}

/**
 * Post-processing: Filter noisy files from semantic search results
 * Uses FILTER_CONFIG patterns to remove irrelevant files.
 * Optimized: pre-compiled regexes, accurate path segment matching.
 */
// Pre-compile file exclusion regexes once (avoid recompilation in loop)
const FILE_EXCLUDE_REGEXES = [...FILTER_CONFIG.exclude_files].map(pattern =>
  new RegExp('^' + pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&').replace(/\\\*/g, '.*') + '$')
);

// Non-code file extensions (for codeOnly filter)
const NON_CODE_EXTENSIONS = new Set([
  'md', 'txt', 'json', 'yaml', 'yml', 'xml', 'csv', 'log',
  'ini', 'cfg', 'conf', 'toml', 'env', 'properties',
  'html', 'htm', 'svg', 'png', 'jpg', 'jpeg', 'gif', 'ico', 'webp',
  'pdf', 'doc', 'docx', 'xls', 'xlsx', 'ppt', 'pptx',
  'lock', 'sum', 'mod',
]);

interface FilterOptions {
  excludeExtensions?: string[];
  codeOnly?: boolean;
}

function filterNoisyFiles(results: SemanticMatch[], options: FilterOptions = {}): SemanticMatch[] {
  const { excludeExtensions = [], codeOnly = false } = options;

  // Build extension filter set
  const excludedExtSet = new Set(excludeExtensions.map(ext => ext.toLowerCase().replace(/^\./, '')));
  if (codeOnly) {
    NON_CODE_EXTENSIONS.forEach(ext => excludedExtSet.add(ext));
  }

  return results.filter(r => {
    const filePath = r.file || '';
    if (!filePath) return true;

    const segments = filePath.split(/[/\\]/);

    // Accurate directory check: segment must exactly match excluded directory
    if (segments.some(segment => FILTER_CONFIG.exclude_directories.has(segment))) {
      return false;
    }

    // Accurate file check: pattern matches filename only (not full path)
    const filename = segments.pop() || '';
    if (FILE_EXCLUDE_REGEXES.some(regex => regex.test(filename))) {
      return false;
    }

    // Extension filter check
    if (excludedExtSet.size > 0) {
      const ext = filename.split('.').pop()?.toLowerCase() || '';
      if (excludedExtSet.has(ext)) {
        return false;
      }
    }

    return true;
  });
}

/**
 * Post-processing: Boost results containing query keywords
 * Extracts keywords from query and boosts matching results.
 * Optimized: uses whole-word matching with regex for accuracy.
 */
// Helper to escape regex special characters
function escapeRegExp(str: string): string {
  return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}

function applyKeywordBoosting(results: SemanticMatch[], query: string): SemanticMatch[] {
  // Extract meaningful keywords (ignore common words)
  const stopWords = new Set(['the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'shall', 'can', 'need', 'dare', 'ought', 'used', 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by', 'from', 'as', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'between', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 'just', 'and', 'but', 'if', 'or', 'because', 'until', 'while', 'although', 'though', 'after', 'before', 'when', 'whenever', 'where', 'wherever', 'whether', 'which', 'who', 'whom', 'whose', 'what', 'whatever', 'whichever', 'whoever', 'whomever', 'this', 'that', 'these', 'those', 'it', 'its']);

  const keywords = query
    .toLowerCase()
    .split(/[\s,.;:()"{}[\]-]+/)  // More robust splitting on punctuation
    .filter(word => word.length > 2 && !stopWords.has(word));

  if (keywords.length === 0) return results;

  // Create case-insensitive regexes for whole-word matching
  const keywordRegexes = keywords.map(kw => new RegExp(`\\b${escapeRegExp(kw)}\\b`, 'i'));

  return results.map(r => {
    const content = r.content || '';
    const file = r.file || '';

    // Count keyword matches using whole-word regex
    let matchCount = 0;
    for (const regex of keywordRegexes) {
      if (regex.test(content) || regex.test(file)) {
        matchCount++;
      }
    }

    // Apply boost only if there are matches
    if (matchCount > 0) {
      const matchRatio = matchCount / keywords.length;
      const boost = 1 + (matchRatio * 0.3); // Up to 30% boost for full match
      return {
        ...r,
        score: r.score * boost,
      };
    }

    return r;
  });
}

/**
 * Post-processing: Enforce score diversity
 * Penalizes results with identical scores (indicates undifferentiated matching)
 */
function enforceScoreDiversity(results: SemanticMatch[]): SemanticMatch[] {
  if (results.length < 2) return results;

  // Count occurrences of each score (rounded to 3 decimal places for comparison)
  const scoreCounts = new Map<number, number>();
  for (const r of results) {
    const roundedScore = Math.round(r.score * 1000) / 1000;
    scoreCounts.set(roundedScore, (scoreCounts.get(roundedScore) || 0) + 1);
  }

  // Apply penalty to scores that appear more than twice
  return results.map(r => {
    const roundedScore = Math.round(r.score * 1000) / 1000;
    const count = scoreCounts.get(roundedScore) || 1;

    if (count > 2) {
      // Progressive penalty: more duplicates = bigger penalty
      const penalty = Math.max(0.7, 1 - (count * 0.05));
      return { ...r, score: r.score * penalty };
    }
    return r;
  });
}

/**
 * Post-processing: Filter results with dominant baseline score (hot spot detection)
 * When backend returns default "hot spot" files with identical high scores,
 * this function detects and removes them.
 *
 * Detection criteria:
 * - A single score appears in >50% of results
 * - That score is suspiciously high (>0.9)
 * - This indicates fallback mechanism returned placeholder results
 */
function filterDominantBaselineScores(
  results: SemanticMatch[]
): { filteredResults: SemanticMatch[]; baselineInfo: { score: number; count: number } | null } {
  if (results.length < 4) {
    return { filteredResults: results, baselineInfo: null };
  }

  // Count occurrences of each score (rounded to 4 decimal places)
  const scoreCounts = new Map<number, number>();
  results.forEach(r => {
    const rounded = Math.round(r.score * 10000) / 10000;
    scoreCounts.set(rounded, (scoreCounts.get(rounded) || 0) + 1);
  });

  // Find the most dominant score
  let dominantScore: number | null = null;
  let dominantCount = 0;
  scoreCounts.forEach((count, score) => {
    if (count > dominantCount) {
      dominantCount = count;
      dominantScore = score;
    }
  });

  // If a single score is present in >50% of results and is high (>0.9),
  // treat it as a suspicious baseline score and filter it out
  const BASELINE_THRESHOLD = 0.5;  // >50% of results have same score
  const HIGH_SCORE_THRESHOLD = 0.9; // Score above 0.9 is suspiciously high

  if (
    dominantScore !== null &&
    dominantCount > results.length * BASELINE_THRESHOLD &&
    dominantScore > HIGH_SCORE_THRESHOLD
  ) {
    const filteredResults = results.filter(r => {
      const rounded = Math.round(r.score * 10000) / 10000;
      return rounded !== dominantScore;
    });

    return {
      filteredResults,
      baselineInfo: { score: dominantScore, count: dominantCount },
    };
  }

  return { filteredResults: results, baselineInfo: null };
}

/**
 * TypeScript implementation of Reciprocal Rank Fusion
 * Reference: codex-lens/src/codexlens/search/ranking.py
 * Formula: score(d) = Σ weight_source / (k + rank_source(d))
 */
function applyRRFFusion(
  resultsMap: Map<string, any[]>,
  weightsOrQuery: Record<string, number> | string,
  limit: number,
  k: number = 60,
): any[] {
  const weights = typeof weightsOrQuery === 'string' ? getRRFWeights(weightsOrQuery) : weightsOrQuery;
  const pathScores = new Map<string, { score: number; result: any; sources: string[] }>();

  resultsMap.forEach((results, source) => {
    const weight = weights[source] || 0;
    if (weight === 0 || !results) return;

    results.forEach((result, rank) => {
      const path = result.file || result.path;
      if (!path) return;

      const rrfContribution = weight / (k + rank + 1);

      if (!pathScores.has(path)) {
        pathScores.set(path, { score: 0, result, sources: [] });
      }
      const entry = pathScores.get(path)!;
      entry.score += rrfContribution;
      if (!entry.sources.includes(source)) {
        entry.sources.push(source);
      }
    });
  });

  // Sort by fusion score descending
  return Array.from(pathScores.values())
    .sort((a, b) => b.score - a.score)
    .slice(0, limit)
    .map(item => ({
      ...item.result,
      fusion_score: item.score,
      matched_backends: item.sources,
    }));
}

/**
 * Promise wrapper with timeout support
 * @param promise - The promise to wrap
 * @param ms - Timeout in milliseconds
 * @param modeName - Name of the mode for error message
 * @returns A new promise that rejects on timeout
 */
function withTimeout<T>(promise: Promise<T>, ms: number, modeName: string): Promise<T> {
  return new Promise((resolve, reject) => {
    const timer = setTimeout(() => {
      reject(new Error(`'${modeName}' search timed out after ${ms}ms`));
    }, ms);

    promise
      .then(resolve)
      .catch(reject)
      .finally(() => clearTimeout(timer));
  });
}

/**
 * Mode: priority - Fallback search strategy: hybrid -> exact -> ripgrep
 * Returns results from the first backend that succeeds and provides results.
 * More efficient than parallel mode - stops as soon as valid results are found.
 */
async function executePriorityFallbackMode(params: Params): Promise<SearchResult> {
  const { query, path = '.' } = params;
  const fallbackHistory: string[] = [];

  if (!query) {
    return { success: false, error: 'Query is required for search' };
  }

  // Check index status first
  const indexStatus = await checkIndexStatus(path);

  // 1. Try Hybrid search (highest priority) - 90s timeout for large indexes
  if (indexStatus.indexed && indexStatus.has_embeddings) {
    try {
      const hybridResult = await withTimeout(executeHybridMode(params), 90000, 'hybrid');
      if (hybridResult.success && hybridResult.results && (hybridResult.results as any[]).length > 0) {
        fallbackHistory.push('hybrid: success');
        return {
          ...hybridResult,
          metadata: {
            ...hybridResult.metadata,
            mode: 'priority',
            note: 'Result from hybrid search (semantic + vector).',
            fallback_history: fallbackHistory,
          },
        };
      }
      fallbackHistory.push('hybrid: no results');
    } catch (error) {
      fallbackHistory.push(`hybrid: ${(error as Error).message}`);
    }
  } else {
    fallbackHistory.push(`hybrid: skipped (${!indexStatus.indexed ? 'no index' : 'no embeddings'})`);
  }

  // 2. Fallback to Exact search - 10s timeout
  if (indexStatus.indexed) {
    try {
      const exactResult = await withTimeout(executeCodexLensExactMode(params), 10000, 'exact');
      if (exactResult.success && exactResult.results && (exactResult.results as any[]).length > 0) {
        fallbackHistory.push('exact: success');
        return {
          ...exactResult,
          metadata: {
            ...exactResult.metadata,
            mode: 'priority',
            note: 'Result from exact/FTS search (fallback from hybrid).',
            fallback_history: fallbackHistory,
          },
        };
      }
      fallbackHistory.push('exact: no results');
    } catch (error) {
      fallbackHistory.push(`exact: ${(error as Error).message}`);
    }
  } else {
    fallbackHistory.push('exact: skipped (no index)');
  }

  // 3. Final fallback to Ripgrep - 5s timeout
  try {
    const ripgrepResult = await withTimeout(executeRipgrepMode(params), 5000, 'ripgrep');
    fallbackHistory.push(ripgrepResult.success ? 'ripgrep: success' : 'ripgrep: failed');
    return {
      ...ripgrepResult,
      metadata: {
        ...ripgrepResult.metadata,
        mode: 'priority',
        note: 'Result from ripgrep search (final fallback).',
        fallback_history: fallbackHistory,
      },
    };
  } catch (error) {
    fallbackHistory.push(`ripgrep: ${(error as Error).message}`);
  }

  // All modes failed
  return {
    success: false,
    error: 'All search backends in priority mode failed or returned no results.',
    metadata: {
      mode: 'priority',
      query,
      fallback_history: fallbackHistory,
    } as any,
  };
}

// Tool schema for MCP
export const schema: ToolSchema = {
  name: 'smart_search',
  description: `Unified code search tool with content search, file discovery, and semantic search capabilities.

**Actions:**
- search: Search file content (default)
- find_files: Find files by path/name pattern (glob matching)
- init: Create FTS index
- status: Check index status
- update: Incremental index update (for changed files)
- watch: Start file watcher for automatic updates

**Content Search (action="search"):**
  smart_search(query="authentication logic")        # fuzzy mode (default) - FTS + ripgrep fusion
  smart_search(query="MyClass", mode="fuzzy")       # fuzzy mode - fast hybrid search
  smart_search(query="how to auth", mode="semantic")  # semantic mode - dense + reranker

**File Discovery (action="find_files"):**
  smart_search(action="find_files", pattern="*.ts")           # find all TypeScript files
  smart_search(action="find_files", pattern="src/**/*.js")    # recursive glob pattern
  smart_search(action="find_files", pattern="test_*.py")      # find test files
  smart_search(action="find_files", pattern="*.tsx", offset=20, limit=10)  # pagination

**Index Maintenance:**
  smart_search(action="update", path="/project")              # incremental index update
  smart_search(action="watch", path="/project")               # start file watcher
  smart_search(action="watch", debounce=2000)                 # custom debounce interval

**Pagination:** All actions support offset/limit for paginated results:
  smart_search(query="auth", limit=10, offset=0)    # first page
  smart_search(query="auth", limit=10, offset=10)   # second page

**Modes:** fuzzy (FTS + ripgrep fusion, default), semantic (dense + reranker)`,
  inputSchema: {
    type: 'object',
    properties: {
      action: {
        type: 'string',
        enum: ['init', 'search', 'find_files', 'status', 'search_files'],
        description: 'Action: search (content search), find_files (path pattern matching), init (create index), status (check index). Note: search_files is deprecated.',
        default: 'search',
      },
      query: {
        type: 'string',
        description: 'Content search query (for action="search")',
      },
      pattern: {
        type: 'string',
        description: 'Glob pattern for file discovery (for action="find_files"). Examples: "*.ts", "src/**/*.js", "test_*.py"',
      },
      mode: {
        type: 'string',
        enum: SEARCH_MODES,
        description: 'Search mode: fuzzy (FTS + ripgrep fusion, default), semantic (dense + reranker for natural language queries)',
        default: 'fuzzy',
      },
      output_mode: {
        type: 'string',
        enum: ['full', 'files_only', 'count'],
        description: 'Output format: full (default), files_only (paths only), count (per-file counts)',
        default: 'full',
      },
      path: {
        type: 'string',
        description: 'Directory path for init/search actions (default: current directory)',
      },
      paths: {
        type: 'array',
        description: 'Multiple paths to search within (for search action)',
        items: {
          type: 'string',
        },
        default: [],
      },
      contextLines: {
        type: 'number',
        description: 'Number of context lines around matches (exact mode only)',
        default: 0,
      },
      maxResults: {
        type: 'number',
        description: 'Maximum number of results (default: 20)',
        default: 20,
      },
      limit: {
        type: 'number',
        description: 'Alias for maxResults (default: 20)',
        default: 20,
      },
      extraFilesCount: {
        type: 'number',
        description: 'Number of additional file-only results (paths without content)',
        default: 10,
      },
      maxContentLength: {
        type: 'number',
        description: 'Maximum content length for truncation (50-2000)',
        default: 200,
      },
      offset: {
        type: 'number',
        description: 'Pagination offset - skip first N results (default: 0)',
        default: 0,
      },
      includeHidden: {
        type: 'boolean',
        description: 'Include hidden files/directories',
        default: false,
      },
      languages: {
        type: 'array',
        items: { type: 'string' },
        description: 'Languages to index (for init action). Example: ["javascript", "typescript"]',
      },
      enrich: {
        type: 'boolean',
        description: 'Enrich search results with code graph relationships (calls, imports, called_by, imported_by).',
        default: false,
      },
      regex: {
        type: 'boolean',
        description: 'Use regex pattern matching instead of literal string (ripgrep mode only). Default: enabled. Example: smart_search(query="class.*Builder")',
        default: true,
      },
      caseSensitive: {
        type: 'boolean',
        description: 'Case-sensitive search (default: true). Set to false for case-insensitive matching.',
        default: true,
      },
      tokenize: {
        type: 'boolean',
        description: 'Tokenize multi-word queries for OR matching (ripgrep mode). Default: true. Results are ranked by token match count (exact matches first).',
        default: true,
      },
    },
    required: [],
  },
};

/**
 * Action: find_files - Find files by path/name pattern (glob matching)
 * Unlike search which looks inside file content, find_files matches file paths
 */
async function executeFindFilesAction(params: Params): Promise<SearchResult> {
  const { pattern, path = '.', limit = 20, offset = 0, includeHidden = false, caseSensitive = true } = params;

  if (!pattern) {
    return {
      success: false,
      error: 'Pattern is required for find_files action. Use glob patterns like "*.ts", "src/**/*.js", or "test_*.py"',
    };
  }

  // Use ripgrep with --files flag for fast file listing with glob pattern
  const hasRipgrep = checkToolAvailability('rg');

  if (!hasRipgrep) {
    // Fallback to CodexLens file listing if available
    const readyStatus = await ensureCodexLensReady();
    if (!readyStatus.ready) {
      return {
        success: false,
        error: 'Neither ripgrep nor CodexLens available for file discovery.',
      };
    }

    // Try CodexLens file list command
    const args = ['list-files', '--json'];
    const result = await executeCodexLens(args, { cwd: path });

    if (!result.success) {
      return {
        success: false,
        error: `Failed to list files: ${result.error}`,
      };
    }

    // Parse and filter results by pattern
    let files: string[] = [];
    try {
      const parsed = JSON.parse(stripAnsi(result.output || '[]'));
      files = Array.isArray(parsed) ? parsed : (parsed.files || []);
    } catch {
      return {
        success: false,
        error: 'Failed to parse file list from CodexLens',
      };
    }

    // Apply glob pattern matching using minimatch-style regex
    const globRegex = globToRegex(pattern, caseSensitive);
    const matchedFiles = files.filter(f => globRegex.test(f));

    // Apply pagination
    const total = matchedFiles.length;
    const paginatedFiles = matchedFiles.slice(offset, offset + limit);

    const results: FileMatch[] = paginatedFiles.map(filePath => {
      const parts = filePath.split(/[/\\]/);
      const name = parts[parts.length - 1] || '';
      const ext = name.includes('.') ? name.split('.').pop() : undefined;
      return {
        path: filePath,
        type: 'file' as const,
        name,
        extension: ext,
      };
    });

    return {
      success: true,
      results,
      metadata: {
        pattern,
        backend: 'codexlens',
        count: results.length,
        pagination: {
          offset,
          limit,
          total,
          has_more: offset + limit < total,
        },
      },
    };
  }

  // Use ripgrep --files with glob pattern for fast file discovery
  return new Promise((resolve) => {
    const args = ['--files'];

    // Add exclude patterns
    if (!includeHidden) {
      args.push(...buildExcludeArgs());
    } else {
      args.push('--hidden');
    }

    // Add glob pattern
    args.push('--glob', pattern);

    // Case sensitivity for glob matching
    if (!caseSensitive) {
      args.push('--iglob', pattern);
      // Remove the case-sensitive glob and use iglob instead
      const globIndex = args.indexOf('--glob');
      if (globIndex !== -1) {
        args.splice(globIndex, 2);
      }
    }

    const child = spawn('rg', args, {
      cwd: path || getProjectRoot(),
      stdio: ['ignore', 'pipe', 'pipe'],
    });

    let stdout = '';
    let stderr = '';

    child.stdout.on('data', (data) => {
      stdout += data.toString();
    });

    child.stderr.on('data', (data) => {
      stderr += data.toString();
    });

    child.on('close', (code) => {
      // ripgrep returns 1 when no matches found, which is not an error
      if (code !== 0 && code !== 1 && !stderr.includes('os error 1')) {
        resolve({
          success: false,
          error: `ripgrep file search failed: ${stderr}`,
        });
        return;
      }

      const allFiles = stdout.split('\n').filter(line => line.trim());
      const total = allFiles.length;

      // Apply pagination
      const paginatedFiles = allFiles.slice(offset, offset + limit);

      const results: FileMatch[] = paginatedFiles.map(filePath => {
        const normalizedPath = filePath.replace(/\\/g, '/');
        const parts = normalizedPath.split('/');
        const name = parts[parts.length - 1] || '';
        const ext = name.includes('.') ? name.split('.').pop() : undefined;
        return {
          path: normalizedPath,
          type: 'file' as const,
          name,
          extension: ext,
        };
      });

      resolve({
        success: true,
        results,
        metadata: {
          pattern,
          backend: 'ripgrep',
          count: results.length,
          pagination: {
            offset,
            limit,
            total,
            has_more: offset + limit < total,
          },
        },
      });
    });

    child.on('error', (error) => {
      resolve({
        success: false,
        error: `Failed to spawn ripgrep: ${error.message}`,
      });
    });
  });
}

/**
 * Convert glob pattern to regex for file matching
 * Supports: *, **, ?, [abc], [!abc]
 */
function globToRegex(pattern: string, caseSensitive: boolean = true): RegExp {
  let i = 0;
  const out: string[] = [];
  const special = '.^$+{}|()';

  while (i < pattern.length) {
    const c = pattern[i];

    if (c === '*') {
      if (i + 1 < pattern.length && pattern[i + 1] === '*') {
        // ** matches any path including /
        out.push('.*');
        i += 2;
        // Skip following / if present
        if (pattern[i] === '/') {
          i++;
        }
        continue;
      } else {
        // * matches any character except /
        out.push('[^/]*');
      }
    } else if (c === '?') {
      out.push('[^/]');
    } else if (c === '[') {
      // Character class
      let j = i + 1;
      let negated = false;
      if (pattern[j] === '!' || pattern[j] === '^') {
        negated = true;
        j++;
      }
      let classContent = '';
      while (j < pattern.length && pattern[j] !== ']') {
        classContent += pattern[j];
        j++;
      }
      if (negated) {
        out.push(`[^${classContent}]`);
      } else {
        out.push(`[${classContent}]`);
      }
      i = j;
    } else if (special.includes(c)) {
      out.push('\\' + c);
    } else {
      out.push(c);
    }
    i++;
  }

  const flags = caseSensitive ? '' : 'i';
  return new RegExp('^' + out.join('') + '$', flags);
}

/**
 * Apply pagination to search results and add pagination metadata
 */
function applyPagination<T>(
  results: T[],
  offset: number,
  limit: number
): { paginatedResults: T[]; pagination: PaginationInfo } {
  const total = results.length;
  const paginatedResults = results.slice(offset, offset + limit);

  return {
    paginatedResults,
    pagination: {
      offset,
      limit,
      total,
      has_more: offset + limit < total,
    },
  };
}

/**
 * Transform results based on output_mode
 */
function transformOutput(
  results: ExactMatch[] | SemanticMatch[] | GraphMatch[] | unknown[],
  outputMode: 'full' | 'files_only' | 'count'
): unknown {
  if (!Array.isArray(results)) {
    return results;
  }

  switch (outputMode) {
    case 'files_only': {
      // Extract unique file paths
      const files = [...new Set(results.map((r: any) => r.file))].filter(Boolean);
      return { files, count: files.length };
    }
    case 'count': {
      // Count matches per file
      const counts: Record<string, number> = {};
      for (const r of results) {
        const file = (r as any).file;
        if (file) {
          counts[file] = (counts[file] || 0) + 1;
        }
      }
      return {
        files: Object.entries(counts).map(([file, count]) => ({ file, count })),
        total: results.length,
      };
    }
    case 'full':
    default:
      return results;
  }
}

// Handler function
export async function handler(params: Record<string, unknown>): Promise<ToolResult<SearchResult>> {
  const parsed = ParamsSchema.safeParse(params);
  if (!parsed.success) {
    return { success: false, error: `Invalid params: ${parsed.error.message}` };
  }

  const { action, mode, output_mode, offset = 0 } = parsed.data;

  // Sync limit and maxResults - use the larger of the two if both provided
  // This ensures user-provided values take precedence over defaults
  const effectiveLimit = Math.max(parsed.data.limit || 20, parsed.data.maxResults || 20);
  parsed.data.maxResults = effectiveLimit;
  parsed.data.limit = effectiveLimit;

  // Track if search_files was used (deprecated)
  let deprecationWarning: string | undefined;

  try {
    let result: SearchResult;

    // Handle actions
    switch (action) {
      case 'init':
        result = await executeInitAction(parsed.data);
        break;

      case 'status':
        result = await executeStatusAction(parsed.data);
        break;

      case 'find_files':
        // NEW: File path/name pattern matching (glob-based)
        result = await executeFindFilesAction(parsed.data);
        break;

      case 'update':
        // Incremental index update
        result = await executeUpdateAction(parsed.data);
        break;

      case 'watch':
        // Start file watcher (returns status, watcher runs in background)
        result = await executeWatchAction(parsed.data);
        break;

      case 'search_files':
        // DEPRECATED: Redirect to search with files_only output
        deprecationWarning = 'action="search_files" is deprecated. Use action="search" with output_mode="files_only" for content-to-files search, or action="find_files" for path pattern matching.';
        parsed.data.output_mode = 'files_only';
        // Fall through to search

      case 'search':
      default:
        // Handle search modes: fuzzy | semantic
        switch (mode) {
          case 'fuzzy':
            result = await executeFuzzyMode(parsed.data);
            break;
          case 'semantic':
            result = await executeHybridMode(parsed.data);
            break;
          default:
            throw new Error(`Unsupported mode: ${mode}. Use: fuzzy or semantic`);
        }
        break;
    }

    // Transform output based on output_mode (for search actions only)
    if (action === 'search' || action === 'search_files') {
      if (result.success && result.results && output_mode !== 'full') {
        result.results = transformOutput(result.results as any[], output_mode);
      }

      // Add pagination metadata for search results if not already present
      if (result.success && result.results && Array.isArray(result.results)) {
        const totalResults = (result.results as any[]).length;
        if (!result.metadata) {
          result.metadata = {};
        }
        if (!result.metadata.pagination) {
          result.metadata.pagination = {
            offset: 0,
            limit: effectiveLimit,
            total: totalResults,
            has_more: false,  // Already limited by backend
          };
        }
      }
    }

    // Add deprecation warning if applicable
    if (deprecationWarning && result.metadata) {
      result.metadata.warning = deprecationWarning;
    }

    return result.success ? { success: true, result } : { success: false, error: result.error };
  } catch (error) {
    return { success: false, error: (error as Error).message };
  }
}

/**
 * Execute init action with external progress callback
 * Used by MCP server for streaming progress
 */
export async function executeInitWithProgress(
  params: Record<string, unknown>,
  onProgress?: (progress: ProgressInfo) => void
): Promise<SearchResult> {
  const path = (params.path as string) || '.';
  const languages = params.languages as string[] | undefined;

  // Check CodexLens availability
  const readyStatus = await ensureCodexLensReady();
  if (!readyStatus.ready) {
    return {
      success: false,
      error: `CodexLens not available: ${readyStatus.error}. CodexLens will be auto-installed on first use.`,
    };
  }

  // Use 'index init' subcommand (new CLI structure)
  const args = ['index', 'init', path];
  if (languages && languages.length > 0) {
    args.push('--language', languages.join(','));
  }

  // Track progress updates
  const progressUpdates: ProgressInfo[] = [];
  let lastProgress: ProgressInfo | null = null;

  const result = await executeCodexLens(args, {
    cwd: path,
    timeout: 1800000, // 30 minutes for large codebases
    onProgress: (progress: ProgressInfo) => {
      progressUpdates.push(progress);
      lastProgress = progress;
      // Call external progress callback if provided
      if (onProgress) {
        onProgress(progress);
      }
    },
  });

  // Build metadata with progress info
  const metadata: SearchMetadata = {
    action: 'init',
    path,
  };

  if (lastProgress !== null) {
    const p = lastProgress as ProgressInfo;
    metadata.progress = {
      stage: p.stage,
      message: p.message,
      percent: p.percent,
      filesProcessed: p.filesProcessed,
      totalFiles: p.totalFiles,
    };
  }

  if (progressUpdates.length > 0) {
    metadata.progressHistory = progressUpdates.slice(-5);
  }

  return {
    success: result.success,
    error: result.error,
    message: result.success
      ? `CodexLens index created successfully for ${path}`
      : undefined,
    metadata,
  };
}