/** * Smart Search Tool - Unified search with mode-based execution * Modes: auto, exact, fuzzy, semantic, graph * * Features: * - Intent classification (auto mode) * - Multi-backend search routing * - Result fusion with RRF ranking * - Configurable search parameters */ import { spawn, execSync } from 'child_process'; import { existsSync, readdirSync, statSync } from 'fs'; import { join, resolve, isAbsolute } from 'path'; import { ensureReady as ensureCodexLensReady, executeCodexLens } from './codex-lens.js'; // Search mode constants const SEARCH_MODES = ['auto', 'exact', 'fuzzy', 'semantic', 'graph']; // Classification confidence threshold const CONFIDENCE_THRESHOLD = 0.7; /** * Detection heuristics for intent classification */ /** * Detect literal string query (simple alphanumeric or quoted strings) */ function detectLiteral(query) { return /^[a-zA-Z0-9_-]+$/.test(query) || /^["'].*["']$/.test(query); } /** * Detect regex pattern (contains regex metacharacters) */ function detectRegex(query) { return /[.*+?^${}()|[\]\\]/.test(query); } /** * Detect natural language query (sentence structure, questions, multi-word phrases) */ function detectNaturalLanguage(query) { return query.split(/\s+/).length >= 3 || /\?$/.test(query); } /** * Detect file path query (path separators, file extensions) */ function detectFilePath(query) { return /[/\]/.test(query) || /\.[a-z]{2,4}$/i.test(query); } /** * Detect relationship query (import, export, dependency keywords) */ function detectRelationship(query) { return /(import|export|uses?|depends?|calls?|extends?)\s/i.test(query); } /** * Classify query intent and recommend search mode * @param {string} query - Search query string * @returns {{mode: string, confidence: number, reasoning: string}} */ function classifyIntent(query) { // Initialize mode scores const scores = { exact: 0, fuzzy: 0, semantic: 0, graph: 0 }; // Apply detection heuristics with weighted scoring if (detectLiteral(query)) { scores.exact += 0.8; } if (detectRegex(query)) { scores.fuzzy += 0.7; } if (detectNaturalLanguage(query)) { scores.semantic += 0.9; } if (detectFilePath(query)) { scores.exact += 0.6; } if (detectRelationship(query)) { scores.graph += 0.85; } // Find mode with highest confidence score const mode = Object.keys(scores).reduce((a, b) => scores[a] > scores[b] ? a : b); const confidence = scores[mode]; // Build reasoning string const detectedPatterns = []; if (detectLiteral(query)) detectedPatterns.push('literal'); if (detectRegex(query)) detectedPatterns.push('regex'); if (detectNaturalLanguage(query)) detectedPatterns.push('natural language'); if (detectFilePath(query)) detectedPatterns.push('file path'); if (detectRelationship(query)) detectedPatterns.push('relationship'); const reasoning = `Query classified as ${mode} (confidence: ${confidence.toFixed(2)}, detected: ${detectedPatterns.join(', ')})`; return { mode, confidence, reasoning }; } /** * Check if a tool is available in PATH * @param {string} toolName - Tool executable name * @returns {boolean} */ function checkToolAvailability(toolName) { try { const isWindows = process.platform === 'win32'; const command = isWindows ? 'where' : 'which'; execSync(`${command} ${toolName}`, { stdio: 'ignore' }); return true; } catch { return false; } } /** * Build ripgrep command arguments * @param {Object} params - Search parameters * @returns {{command: string, args: string[]}} */ function buildRipgrepCommand(params) { const { query, paths = ['.'], contextLines = 0, maxResults = 100, includeHidden = false } = params; const args = [ '-n', // Show line numbers '--color=never', // Disable color output '--json' // Output in JSON format ]; // Add context lines if specified if (contextLines > 0) { args.push('-C', contextLines.toString()); } // Add max results limit if (maxResults > 0) { args.push('--max-count', maxResults.toString()); } // Include hidden files if specified if (includeHidden) { args.push('--hidden'); } // Use literal/fixed string matching for exact mode args.push('-F', query); // Add search paths args.push(...paths); return { command: 'rg', args }; } /** * Mode: auto - Intent classification and mode selection * Analyzes query to determine optimal search mode */ /** * Mode: auto - Intent classification and mode selection * Analyzes query to determine optimal search mode */ async function executeAutoMode(params) { const { query } = params; // Classify intent const classification = classifyIntent(query); // Route to appropriate mode based on classification switch (classification.mode) { case 'exact': // Execute exact mode and enrich result with classification metadata const exactResult = await executeExactMode(params); return { ...exactResult, metadata: { ...exactResult.metadata, classified_as: classification.mode, confidence: classification.confidence, reasoning: classification.reasoning } }; case 'fuzzy': // Fuzzy mode not yet implemented return { success: false, error: 'Fuzzy mode not yet implemented', metadata: { classified_as: classification.mode, confidence: classification.confidence, reasoning: classification.reasoning } }; case 'semantic': // Execute semantic mode via CodexLens const semanticResult = await executeSemanticMode(params); return { ...semanticResult, metadata: { ...semanticResult.metadata, classified_as: classification.mode, confidence: classification.confidence, reasoning: classification.reasoning } }; case 'graph': // Execute graph mode via CodexLens const graphResult = await executeGraphMode(params); return { ...graphResult, metadata: { ...graphResult.metadata, classified_as: classification.mode, confidence: classification.confidence, reasoning: classification.reasoning } }; default: // Fallback to exact mode with warning const fallbackResult = await executeExactMode(params); return { ...fallbackResult, metadata: { ...fallbackResult.metadata, classified_as: 'exact', confidence: 0.5, reasoning: 'Fallback to exact mode due to unknown classification' } }; } } /** * Mode: exact - Precise file path and content matching * Uses ripgrep for literal string matching */ async function executeExactMode(params) { const { query, paths = [], contextLines = 0, maxResults = 100, includeHidden = false } = params; // Check ripgrep availability if (!checkToolAvailability('rg')) { return { success: false, error: 'ripgrep not available - please install ripgrep (rg) to use exact search mode' }; } // Build ripgrep command const { command, args } = buildRipgrepCommand({ query, paths: paths.length > 0 ? paths : ['.'], contextLines, maxResults, includeHidden }); return new Promise((resolve) => { const child = spawn(command, args, { cwd: process.cwd(), stdio: ['ignore', 'pipe', 'pipe'] }); let stdout = ''; let stderr = ''; // Collect stdout child.stdout.on('data', (data) => { stdout += data.toString(); }); // Collect stderr child.stderr.on('data', (data) => { stderr += data.toString(); }); // Handle completion child.on('close', (code) => { // Parse ripgrep JSON output const results = []; if (code === 0 || (code === 1 && stdout.trim())) { // Code 0: matches found, Code 1: no matches (but may have output) const lines = stdout.split('\n').filter(line => line.trim()); for (const line of lines) { try { const item = JSON.parse(line); // Only process match type items if (item.type === 'match') { const match = { file: item.data.path.text, line: item.data.line_number, column: item.data.submatches && item.data.submatches[0] ? item.data.submatches[0].start + 1 : 1, content: item.data.lines.text.trim() }; results.push(match); } } catch (err) { // Skip malformed JSON lines continue; } } resolve({ success: true, results, metadata: { mode: 'exact', backend: 'ripgrep', count: results.length, query } }); } else { // Error occurred resolve({ success: false, error: `ripgrep execution failed with code ${code}: ${stderr}`, results: [] }); } }); // Handle spawn errors child.on('error', (error) => { resolve({ success: false, error: `Failed to spawn ripgrep: ${error.message}`, results: [] }); }); }); } /** * Mode: fuzzy - Approximate matching with tolerance * Uses fuzzy matching algorithms for typo-tolerant search */ async function executeFuzzyMode(params) { const { query, paths = [], maxResults = 100 } = params; // TODO: Implement fuzzy search // - Use fuse.js for content fuzzy matching // - Support approximate file path matching // - Configure similarity threshold // - Return ranked results return { success: false, error: 'Fuzzy mode not implemented - fuzzy matching engine pending' }; } /** * Mode: semantic - Natural language understanding search * Uses CodexLens embeddings for semantic similarity */ async function executeSemanticMode(params) { const { query, paths = [], maxResults = 100 } = params; // Check CodexLens availability const readyStatus = await ensureCodexLensReady(); if (!readyStatus.ready) { return { success: false, error: `CodexLens not available: ${readyStatus.error}. Run 'ccw tool exec codex_lens {"action":"bootstrap"}' to install.` }; } // Determine search path const searchPath = paths.length > 0 ? paths[0] : '.'; // Execute CodexLens semantic search const result = await executeCodexLens( ['search', query, '--limit', maxResults.toString(), '--json'], { cwd: searchPath } ); if (!result.success) { return { success: false, error: result.error, metadata: { mode: 'semantic', backend: 'codexlens' } }; } // Parse and transform results let results = []; try { // Handle CRLF in output const cleanOutput = result.output.replace(/\r\n/g, '\n'); const parsed = JSON.parse(cleanOutput); const data = parsed.result || parsed; results = (data.results || []).map(item => ({ file: item.path || item.file, score: item.score || 0, content: item.excerpt || item.content || '', symbol: item.symbol || null })); } catch { // Return raw output if JSON parsing fails return { success: true, results: [], output: result.output, metadata: { mode: 'semantic', backend: 'codexlens', count: 0, query, warning: 'Failed to parse JSON output' } }; } return { success: true, results, metadata: { mode: 'semantic', backend: 'codexlens', count: results.length, query } }; } /** * Mode: graph - Dependency and relationship traversal * Uses CodexLens symbol extraction for code analysis */ async function executeGraphMode(params) { const { query, paths = [], maxResults = 100 } = params; // Check CodexLens availability const readyStatus = await ensureCodexLensReady(); if (!readyStatus.ready) { return { success: false, error: `CodexLens not available: ${readyStatus.error}. Run 'ccw tool exec codex_lens {"action":"bootstrap"}' to install.` }; } // First, search for relevant files using text search const searchPath = paths.length > 0 ? paths[0] : '.'; // Execute text search to find files matching the query const textResult = await executeCodexLens( ['search', query, '--limit', maxResults.toString(), '--json'], { cwd: searchPath } ); if (!textResult.success) { return { success: false, error: textResult.error, metadata: { mode: 'graph', backend: 'codexlens' } }; } // Parse results and extract symbols from top files let results = []; try { const parsed = JSON.parse(textResult.output); const files = [...new Set((parsed.results || parsed).map(item => item.path || item.file))].slice(0, 10); // Extract symbols from files in parallel const symbolPromises = files.map(file => executeCodexLens(['symbol', file, '--json'], { cwd: searchPath }) .then(result => ({ file, result })) ); const symbolResults = await Promise.all(symbolPromises); for (const { file, result } of symbolResults) { if (result.success) { try { const symbols = JSON.parse(result.output); results.push({ file, symbols: symbols.symbols || symbols, relationships: [] }); } catch { // Skip files with parse errors } } } } catch { return { success: false, error: 'Failed to parse search results', metadata: { mode: 'graph', backend: 'codexlens' } }; } return { success: true, results, metadata: { mode: 'graph', backend: 'codexlens', count: results.length, query, note: 'Graph mode provides symbol extraction; full dependency graph analysis pending' } }; } /** * Main execute function - routes to appropriate mode handler */ async function execute(params) { const { query, mode = 'auto', paths = [], contextLines = 0, maxResults = 100, includeHidden = false } = params; // Validate required parameters if (!query || typeof query !== 'string') { throw new Error('Parameter "query" is required and must be a string'); } // Validate mode if (!SEARCH_MODES.includes(mode)) { throw new Error(`Invalid mode: ${mode}. Valid modes: ${SEARCH_MODES.join(', ')}`); } // Route to mode-specific handler switch (mode) { case 'auto': return executeAutoMode(params); case 'exact': return executeExactMode(params); case 'fuzzy': return executeFuzzyMode(params); case 'semantic': return executeSemanticMode(params); case 'graph': return executeGraphMode(params); default: throw new Error(`Unsupported mode: ${mode}`); } } /** * Smart Search Tool Definition */ export const smartSearchTool = { name: 'smart_search', description: `Intelligent code search with multiple modes. Usage: smart_search(query="function main", path=".") # Auto-select mode smart_search(query="def init", mode="exact") # Exact match smart_search(query="authentication logic", mode="semantic") # NL search Modes: auto (default), exact, fuzzy, semantic, graph`, parameters: { type: 'object', properties: { query: { type: 'string', description: 'Search query (file pattern, text content, or natural language)' }, mode: { type: 'string', enum: SEARCH_MODES, description: 'Search mode (default: auto)', default: 'auto' }, paths: { type: 'array', description: 'Paths to search within (default: current directory)', items: { type: 'string' }, default: [] }, contextLines: { type: 'number', description: 'Number of context lines around matches (default: 0)', default: 0 }, maxResults: { type: 'number', description: 'Maximum number of results to return (default: 100)', default: 100 }, includeHidden: { type: 'boolean', description: 'Include hidden files/directories (default: false)', default: false } }, required: ['query'] }, execute };