Files
Claude-Code-Workflow/ccw/src/tools/smart-search.js
catlog22 d4e59770d0 feat: Add CCW MCP server and tools integration
- Introduced `ccw-mcp` command for running CCW tools as an MCP server.
- Updated `package.json` to include new MCP dependencies and scripts.
- Enhanced CLI with new options for `codex_lens` tool.
- Implemented MCP server logic to expose CCW tools via Model Context Protocol.
- Added new tools and updated existing ones for better functionality and documentation.
- Created quick start and full documentation for MCP server usage.
- Added tests for MCP server functionality to ensure reliability.
2025-12-13 09:14:57 +08:00

629 lines
16 KiB
JavaScript

/**
* Smart Search Tool - Unified search with mode-based execution
* Modes: auto, exact, fuzzy, semantic, graph
*
* Features:
* - Intent classification (auto mode)
* - Multi-backend search routing
* - Result fusion with RRF ranking
* - Configurable search parameters
*/
import { spawn, execSync } from 'child_process';
import { existsSync, readdirSync, statSync } from 'fs';
import { join, resolve, isAbsolute } from 'path';
import { ensureReady as ensureCodexLensReady, executeCodexLens } from './codex-lens.js';
// Search mode constants
const SEARCH_MODES = ['auto', 'exact', 'fuzzy', 'semantic', 'graph'];
// Classification confidence threshold
const CONFIDENCE_THRESHOLD = 0.7;
/**
* Detection heuristics for intent classification
*/
/**
* Detect literal string query (simple alphanumeric or quoted strings)
*/
function detectLiteral(query) {
return /^[a-zA-Z0-9_-]+$/.test(query) || /^["'].*["']$/.test(query);
}
/**
* Detect regex pattern (contains regex metacharacters)
*/
function detectRegex(query) {
return /[.*+?^${}()|[\]\\]/.test(query);
}
/**
* Detect natural language query (sentence structure, questions, multi-word phrases)
*/
function detectNaturalLanguage(query) {
return query.split(/\s+/).length >= 3 || /\?$/.test(query);
}
/**
* Detect file path query (path separators, file extensions)
*/
function detectFilePath(query) {
return /[/\]/.test(query) || /\.[a-z]{2,4}$/i.test(query);
}
/**
* Detect relationship query (import, export, dependency keywords)
*/
function detectRelationship(query) {
return /(import|export|uses?|depends?|calls?|extends?)\s/i.test(query);
}
/**
* Classify query intent and recommend search mode
* @param {string} query - Search query string
* @returns {{mode: string, confidence: number, reasoning: string}}
*/
function classifyIntent(query) {
// Initialize mode scores
const scores = {
exact: 0,
fuzzy: 0,
semantic: 0,
graph: 0
};
// Apply detection heuristics with weighted scoring
if (detectLiteral(query)) {
scores.exact += 0.8;
}
if (detectRegex(query)) {
scores.fuzzy += 0.7;
}
if (detectNaturalLanguage(query)) {
scores.semantic += 0.9;
}
if (detectFilePath(query)) {
scores.exact += 0.6;
}
if (detectRelationship(query)) {
scores.graph += 0.85;
}
// Find mode with highest confidence score
const mode = Object.keys(scores).reduce((a, b) => scores[a] > scores[b] ? a : b);
const confidence = scores[mode];
// Build reasoning string
const detectedPatterns = [];
if (detectLiteral(query)) detectedPatterns.push('literal');
if (detectRegex(query)) detectedPatterns.push('regex');
if (detectNaturalLanguage(query)) detectedPatterns.push('natural language');
if (detectFilePath(query)) detectedPatterns.push('file path');
if (detectRelationship(query)) detectedPatterns.push('relationship');
const reasoning = `Query classified as ${mode} (confidence: ${confidence.toFixed(2)}, detected: ${detectedPatterns.join(', ')})`;
return { mode, confidence, reasoning };
}
/**
* Check if a tool is available in PATH
* @param {string} toolName - Tool executable name
* @returns {boolean}
*/
function checkToolAvailability(toolName) {
try {
const isWindows = process.platform === 'win32';
const command = isWindows ? 'where' : 'which';
execSync(`${command} ${toolName}`, { stdio: 'ignore' });
return true;
} catch {
return false;
}
}
/**
* Build ripgrep command arguments
* @param {Object} params - Search parameters
* @returns {{command: string, args: string[]}}
*/
function buildRipgrepCommand(params) {
const { query, paths = ['.'], contextLines = 0, maxResults = 100, includeHidden = false } = params;
const args = [
'-n', // Show line numbers
'--color=never', // Disable color output
'--json' // Output in JSON format
];
// Add context lines if specified
if (contextLines > 0) {
args.push('-C', contextLines.toString());
}
// Add max results limit
if (maxResults > 0) {
args.push('--max-count', maxResults.toString());
}
// Include hidden files if specified
if (includeHidden) {
args.push('--hidden');
}
// Use literal/fixed string matching for exact mode
args.push('-F', query);
// Add search paths
args.push(...paths);
return { command: 'rg', args };
}
/**
* Mode: auto - Intent classification and mode selection
* Analyzes query to determine optimal search mode
*/
/**
* Mode: auto - Intent classification and mode selection
* Analyzes query to determine optimal search mode
*/
async function executeAutoMode(params) {
const { query } = params;
// Classify intent
const classification = classifyIntent(query);
// Route to appropriate mode based on classification
switch (classification.mode) {
case 'exact':
// Execute exact mode and enrich result with classification metadata
const exactResult = await executeExactMode(params);
return {
...exactResult,
metadata: {
...exactResult.metadata,
classified_as: classification.mode,
confidence: classification.confidence,
reasoning: classification.reasoning
}
};
case 'fuzzy':
// Fuzzy mode not yet implemented
return {
success: false,
error: 'Fuzzy mode not yet implemented',
metadata: {
classified_as: classification.mode,
confidence: classification.confidence,
reasoning: classification.reasoning
}
};
case 'semantic':
// Execute semantic mode via CodexLens
const semanticResult = await executeSemanticMode(params);
return {
...semanticResult,
metadata: {
...semanticResult.metadata,
classified_as: classification.mode,
confidence: classification.confidence,
reasoning: classification.reasoning
}
};
case 'graph':
// Execute graph mode via CodexLens
const graphResult = await executeGraphMode(params);
return {
...graphResult,
metadata: {
...graphResult.metadata,
classified_as: classification.mode,
confidence: classification.confidence,
reasoning: classification.reasoning
}
};
default:
// Fallback to exact mode with warning
const fallbackResult = await executeExactMode(params);
return {
...fallbackResult,
metadata: {
...fallbackResult.metadata,
classified_as: 'exact',
confidence: 0.5,
reasoning: 'Fallback to exact mode due to unknown classification'
}
};
}
}
/**
* Mode: exact - Precise file path and content matching
* Uses ripgrep for literal string matching
*/
async function executeExactMode(params) {
const { query, paths = [], contextLines = 0, maxResults = 100, includeHidden = false } = params;
// Check ripgrep availability
if (!checkToolAvailability('rg')) {
return {
success: false,
error: 'ripgrep not available - please install ripgrep (rg) to use exact search mode'
};
}
// Build ripgrep command
const { command, args } = buildRipgrepCommand({
query,
paths: paths.length > 0 ? paths : ['.'],
contextLines,
maxResults,
includeHidden
});
return new Promise((resolve) => {
const child = spawn(command, args, {
cwd: process.cwd(),
stdio: ['ignore', 'pipe', 'pipe']
});
let stdout = '';
let stderr = '';
// Collect stdout
child.stdout.on('data', (data) => {
stdout += data.toString();
});
// Collect stderr
child.stderr.on('data', (data) => {
stderr += data.toString();
});
// Handle completion
child.on('close', (code) => {
// Parse ripgrep JSON output
const results = [];
if (code === 0 || (code === 1 && stdout.trim())) {
// Code 0: matches found, Code 1: no matches (but may have output)
const lines = stdout.split('\n').filter(line => line.trim());
for (const line of lines) {
try {
const item = JSON.parse(line);
// Only process match type items
if (item.type === 'match') {
const match = {
file: item.data.path.text,
line: item.data.line_number,
column: item.data.submatches && item.data.submatches[0] ? item.data.submatches[0].start + 1 : 1,
content: item.data.lines.text.trim()
};
results.push(match);
}
} catch (err) {
// Skip malformed JSON lines
continue;
}
}
resolve({
success: true,
results,
metadata: {
mode: 'exact',
backend: 'ripgrep',
count: results.length,
query
}
});
} else {
// Error occurred
resolve({
success: false,
error: `ripgrep execution failed with code ${code}: ${stderr}`,
results: []
});
}
});
// Handle spawn errors
child.on('error', (error) => {
resolve({
success: false,
error: `Failed to spawn ripgrep: ${error.message}`,
results: []
});
});
});
}
/**
* Mode: fuzzy - Approximate matching with tolerance
* Uses fuzzy matching algorithms for typo-tolerant search
*/
async function executeFuzzyMode(params) {
const { query, paths = [], maxResults = 100 } = params;
// TODO: Implement fuzzy search
// - Use fuse.js for content fuzzy matching
// - Support approximate file path matching
// - Configure similarity threshold
// - Return ranked results
return {
success: false,
error: 'Fuzzy mode not implemented - fuzzy matching engine pending'
};
}
/**
* Mode: semantic - Natural language understanding search
* Uses CodexLens embeddings for semantic similarity
*/
async function executeSemanticMode(params) {
const { query, paths = [], maxResults = 100 } = params;
// Check CodexLens availability
const readyStatus = await ensureCodexLensReady();
if (!readyStatus.ready) {
return {
success: false,
error: `CodexLens not available: ${readyStatus.error}. Run 'ccw tool exec codex_lens {"action":"bootstrap"}' to install.`
};
}
// Determine search path
const searchPath = paths.length > 0 ? paths[0] : '.';
// Execute CodexLens semantic search
const result = await executeCodexLens(
['search', query, '--limit', maxResults.toString(), '--json'],
{ cwd: searchPath }
);
if (!result.success) {
return {
success: false,
error: result.error,
metadata: {
mode: 'semantic',
backend: 'codexlens'
}
};
}
// Parse and transform results
let results = [];
try {
// Handle CRLF in output
const cleanOutput = result.output.replace(/\r\n/g, '\n');
const parsed = JSON.parse(cleanOutput);
const data = parsed.result || parsed;
results = (data.results || []).map(item => ({
file: item.path || item.file,
score: item.score || 0,
content: item.excerpt || item.content || '',
symbol: item.symbol || null
}));
} catch {
// Return raw output if JSON parsing fails
return {
success: true,
results: [],
output: result.output,
metadata: {
mode: 'semantic',
backend: 'codexlens',
count: 0,
query,
warning: 'Failed to parse JSON output'
}
};
}
return {
success: true,
results,
metadata: {
mode: 'semantic',
backend: 'codexlens',
count: results.length,
query
}
};
}
/**
* Mode: graph - Dependency and relationship traversal
* Uses CodexLens symbol extraction for code analysis
*/
async function executeGraphMode(params) {
const { query, paths = [], maxResults = 100 } = params;
// Check CodexLens availability
const readyStatus = await ensureCodexLensReady();
if (!readyStatus.ready) {
return {
success: false,
error: `CodexLens not available: ${readyStatus.error}. Run 'ccw tool exec codex_lens {"action":"bootstrap"}' to install.`
};
}
// First, search for relevant files using text search
const searchPath = paths.length > 0 ? paths[0] : '.';
// Execute text search to find files matching the query
const textResult = await executeCodexLens(
['search', query, '--limit', maxResults.toString(), '--json'],
{ cwd: searchPath }
);
if (!textResult.success) {
return {
success: false,
error: textResult.error,
metadata: {
mode: 'graph',
backend: 'codexlens'
}
};
}
// Parse results and extract symbols from top files
let results = [];
try {
const parsed = JSON.parse(textResult.output);
const files = [...new Set((parsed.results || parsed).map(item => item.path || item.file))].slice(0, 10);
// Extract symbols from files in parallel
const symbolPromises = files.map(file =>
executeCodexLens(['symbol', file, '--json'], { cwd: searchPath })
.then(result => ({ file, result }))
);
const symbolResults = await Promise.all(symbolPromises);
for (const { file, result } of symbolResults) {
if (result.success) {
try {
const symbols = JSON.parse(result.output);
results.push({
file,
symbols: symbols.symbols || symbols,
relationships: []
});
} catch {
// Skip files with parse errors
}
}
}
} catch {
return {
success: false,
error: 'Failed to parse search results',
metadata: {
mode: 'graph',
backend: 'codexlens'
}
};
}
return {
success: true,
results,
metadata: {
mode: 'graph',
backend: 'codexlens',
count: results.length,
query,
note: 'Graph mode provides symbol extraction; full dependency graph analysis pending'
}
};
}
/**
* Main execute function - routes to appropriate mode handler
*/
async function execute(params) {
const { query, mode = 'auto', paths = [], contextLines = 0, maxResults = 100, includeHidden = false } = params;
// Validate required parameters
if (!query || typeof query !== 'string') {
throw new Error('Parameter "query" is required and must be a string');
}
// Validate mode
if (!SEARCH_MODES.includes(mode)) {
throw new Error(`Invalid mode: ${mode}. Valid modes: ${SEARCH_MODES.join(', ')}`);
}
// Route to mode-specific handler
switch (mode) {
case 'auto':
return executeAutoMode(params);
case 'exact':
return executeExactMode(params);
case 'fuzzy':
return executeFuzzyMode(params);
case 'semantic':
return executeSemanticMode(params);
case 'graph':
return executeGraphMode(params);
default:
throw new Error(`Unsupported mode: ${mode}`);
}
}
/**
* Smart Search Tool Definition
*/
export const smartSearchTool = {
name: 'smart_search',
description: `Intelligent code search with multiple modes.
Usage:
smart_search(query="function main", path=".") # Auto-select mode
smart_search(query="def init", mode="exact") # Exact match
smart_search(query="authentication logic", mode="semantic") # NL search
Modes: auto (default), exact, fuzzy, semantic, graph`,
parameters: {
type: 'object',
properties: {
query: {
type: 'string',
description: 'Search query (file pattern, text content, or natural language)'
},
mode: {
type: 'string',
enum: SEARCH_MODES,
description: 'Search mode (default: auto)',
default: 'auto'
},
paths: {
type: 'array',
description: 'Paths to search within (default: current directory)',
items: {
type: 'string'
},
default: []
},
contextLines: {
type: 'number',
description: 'Number of context lines around matches (default: 0)',
default: 0
},
maxResults: {
type: 'number',
description: 'Maximum number of results to return (default: 100)',
default: 100
},
includeHidden: {
type: 'boolean',
description: 'Include hidden files/directories (default: false)',
default: false
}
},
required: ['query']
},
execute
};