mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-11 02:33:51 +08:00
feat: 增加文件读取功能的行分页支持,优化智能搜索的多词查询匹配
This commit is contained in:
@@ -256,7 +256,7 @@ async function renderMcpManager() {
|
|||||||
</div>
|
</div>
|
||||||
<div class="grid grid-cols-1 gap-2">
|
<div class="grid grid-cols-1 gap-2">
|
||||||
<div class="flex items-center gap-2">
|
<div class="flex items-center gap-2">
|
||||||
<label class="text-xs text-muted-foreground w-28 shrink-0">CCW_PROJECT_ROOT</label>
|
<label class="text-xs text-muted-foreground w-36 shrink-0">CCW_PROJECT_ROOT</label>
|
||||||
<input type="text"
|
<input type="text"
|
||||||
class="ccw-project-root-input flex-1 px-2 py-1 text-xs bg-background border border-border rounded focus:outline-none focus:ring-1 focus:ring-primary"
|
class="ccw-project-root-input flex-1 px-2 py-1 text-xs bg-background border border-border rounded focus:outline-none focus:ring-1 focus:ring-primary"
|
||||||
placeholder="${projectPath || t('mcp.useCurrentDir')}"
|
placeholder="${projectPath || t('mcp.useCurrentDir')}"
|
||||||
@@ -268,7 +268,7 @@ async function renderMcpManager() {
|
|||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
<div class="flex items-center gap-2">
|
<div class="flex items-center gap-2">
|
||||||
<label class="text-xs text-muted-foreground w-28 shrink-0">CCW_ALLOWED_DIRS</label>
|
<label class="text-xs text-muted-foreground w-36 shrink-0">CCW_ALLOWED_DIRS</label>
|
||||||
<input type="text"
|
<input type="text"
|
||||||
class="ccw-allowed-dirs-input flex-1 px-2 py-1 text-xs bg-background border border-border rounded focus:outline-none focus:ring-1 focus:ring-primary"
|
class="ccw-allowed-dirs-input flex-1 px-2 py-1 text-xs bg-background border border-border rounded focus:outline-none focus:ring-1 focus:ring-primary"
|
||||||
placeholder="${t('mcp.allowedDirsPlaceholder')}"
|
placeholder="${t('mcp.allowedDirsPlaceholder')}"
|
||||||
@@ -470,7 +470,7 @@ async function renderMcpManager() {
|
|||||||
</div>
|
</div>
|
||||||
<div class="grid grid-cols-1 gap-2">
|
<div class="grid grid-cols-1 gap-2">
|
||||||
<div class="flex items-center gap-2">
|
<div class="flex items-center gap-2">
|
||||||
<label class="text-xs text-muted-foreground w-28 shrink-0">CCW_PROJECT_ROOT</label>
|
<label class="text-xs text-muted-foreground w-36 shrink-0">CCW_PROJECT_ROOT</label>
|
||||||
<input type="text"
|
<input type="text"
|
||||||
class="ccw-project-root-input flex-1 px-2 py-1 text-xs bg-background border border-border rounded focus:outline-none focus:ring-1 focus:ring-primary"
|
class="ccw-project-root-input flex-1 px-2 py-1 text-xs bg-background border border-border rounded focus:outline-none focus:ring-1 focus:ring-primary"
|
||||||
placeholder="${projectPath || t('mcp.useCurrentDir')}"
|
placeholder="${projectPath || t('mcp.useCurrentDir')}"
|
||||||
@@ -482,7 +482,7 @@ async function renderMcpManager() {
|
|||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
<div class="flex items-center gap-2">
|
<div class="flex items-center gap-2">
|
||||||
<label class="text-xs text-muted-foreground w-28 shrink-0">CCW_ALLOWED_DIRS</label>
|
<label class="text-xs text-muted-foreground w-36 shrink-0">CCW_ALLOWED_DIRS</label>
|
||||||
<input type="text"
|
<input type="text"
|
||||||
class="ccw-allowed-dirs-input flex-1 px-2 py-1 text-xs bg-background border border-border rounded focus:outline-none focus:ring-1 focus:ring-primary"
|
class="ccw-allowed-dirs-input flex-1 px-2 py-1 text-xs bg-background border border-border rounded focus:outline-none focus:ring-1 focus:ring-primary"
|
||||||
placeholder="${t('mcp.allowedDirsPlaceholder')}"
|
placeholder="${t('mcp.allowedDirsPlaceholder')}"
|
||||||
|
|||||||
@@ -30,6 +30,8 @@ const ParamsSchema = z.object({
|
|||||||
maxDepth: z.number().default(3).describe('Max directory depth to traverse'),
|
maxDepth: z.number().default(3).describe('Max directory depth to traverse'),
|
||||||
includeContent: z.boolean().default(true).describe('Include file content in result'),
|
includeContent: z.boolean().default(true).describe('Include file content in result'),
|
||||||
maxFiles: z.number().default(MAX_FILES).describe('Max number of files to return'),
|
maxFiles: z.number().default(MAX_FILES).describe('Max number of files to return'),
|
||||||
|
offset: z.number().min(0).optional().describe('Line offset to start reading from (0-based, for single file only)'),
|
||||||
|
limit: z.number().min(1).optional().describe('Number of lines to read (for single file only)'),
|
||||||
});
|
});
|
||||||
|
|
||||||
type Params = z.infer<typeof ParamsSchema>;
|
type Params = z.infer<typeof ParamsSchema>;
|
||||||
@@ -40,6 +42,8 @@ interface FileEntry {
|
|||||||
content?: string;
|
content?: string;
|
||||||
truncated?: boolean;
|
truncated?: boolean;
|
||||||
matches?: string[];
|
matches?: string[];
|
||||||
|
totalLines?: number;
|
||||||
|
lineRange?: { start: number; end: number };
|
||||||
}
|
}
|
||||||
|
|
||||||
interface ReadResult {
|
interface ReadResult {
|
||||||
@@ -123,23 +127,69 @@ function collectFiles(
|
|||||||
return files;
|
return files;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface ReadContentOptions {
|
||||||
|
maxLength: number;
|
||||||
|
offset?: number;
|
||||||
|
limit?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ReadContentResult {
|
||||||
|
content: string;
|
||||||
|
truncated: boolean;
|
||||||
|
totalLines?: number;
|
||||||
|
lineRange?: { start: number; end: number };
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read file content with truncation
|
* Read file content with truncation and optional line-based pagination
|
||||||
*/
|
*/
|
||||||
function readFileContent(filePath: string, maxLength: number): { content: string; truncated: boolean } {
|
function readFileContent(filePath: string, options: ReadContentOptions): ReadContentResult {
|
||||||
|
const { maxLength, offset, limit } = options;
|
||||||
|
|
||||||
if (isBinaryFile(filePath)) {
|
if (isBinaryFile(filePath)) {
|
||||||
return { content: '[Binary file]', truncated: false };
|
return { content: '[Binary file]', truncated: false };
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const content = readFileSync(filePath, 'utf8');
|
const content = readFileSync(filePath, 'utf8');
|
||||||
|
const lines = content.split('\n');
|
||||||
|
const totalLines = lines.length;
|
||||||
|
|
||||||
|
// If offset/limit specified, use line-based pagination
|
||||||
|
if (offset !== undefined || limit !== undefined) {
|
||||||
|
const startLine = Math.min(offset ?? 0, totalLines);
|
||||||
|
const endLine = limit !== undefined ? Math.min(startLine + limit, totalLines) : totalLines;
|
||||||
|
const selectedLines = lines.slice(startLine, endLine);
|
||||||
|
const selectedContent = selectedLines.join('\n');
|
||||||
|
|
||||||
|
const actualEnd = endLine;
|
||||||
|
const hasMore = actualEnd < totalLines;
|
||||||
|
|
||||||
|
let finalContent = selectedContent;
|
||||||
|
if (selectedContent.length > maxLength) {
|
||||||
|
finalContent = selectedContent.substring(0, maxLength) + `\n... (+${selectedContent.length - maxLength} chars)`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate actual line range (handle empty selection)
|
||||||
|
const actualLineEnd = selectedLines.length > 0 ? startLine + selectedLines.length - 1 : startLine;
|
||||||
|
|
||||||
|
return {
|
||||||
|
content: finalContent,
|
||||||
|
truncated: hasMore || selectedContent.length > maxLength,
|
||||||
|
totalLines,
|
||||||
|
lineRange: { start: startLine, end: actualLineEnd },
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Default behavior: truncate by character length
|
||||||
if (content.length > maxLength) {
|
if (content.length > maxLength) {
|
||||||
return {
|
return {
|
||||||
content: content.substring(0, maxLength) + `\n... (+${content.length - maxLength} chars)`,
|
content: content.substring(0, maxLength) + `\n... (+${content.length - maxLength} chars)`,
|
||||||
truncated: true
|
truncated: true,
|
||||||
|
totalLines,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
return { content, truncated: false };
|
return { content, truncated: false, totalLines };
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
return { content: `[Error: ${(error as Error).message}]`, truncated: false };
|
return { content: `[Error: ${(error as Error).message}]`, truncated: false };
|
||||||
}
|
}
|
||||||
@@ -171,15 +221,17 @@ function findMatches(content: string, pattern: string): string[] {
|
|||||||
// Tool schema for MCP
|
// Tool schema for MCP
|
||||||
export const schema: ToolSchema = {
|
export const schema: ToolSchema = {
|
||||||
name: 'read_file',
|
name: 'read_file',
|
||||||
description: `Read files with multi-file, directory, and regex support.
|
description: `Read files with multi-file, directory, regex support, and line-based pagination.
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
read_file(paths="file.ts") # Single file
|
read_file(paths="file.ts") # Single file (full content)
|
||||||
read_file(paths=["a.ts", "b.ts"]) # Multiple files
|
read_file(paths="file.ts", offset=100, limit=50) # Lines 100-149 (0-based)
|
||||||
read_file(paths="src/", pattern="*.ts") # Directory with pattern
|
read_file(paths=["a.ts", "b.ts"]) # Multiple files
|
||||||
read_file(paths="src/", contentPattern="TODO") # Search content
|
read_file(paths="src/", pattern="*.ts") # Directory with pattern
|
||||||
|
read_file(paths="src/", contentPattern="TODO") # Search content
|
||||||
|
|
||||||
Returns compact file list with optional content.`,
|
Supports both absolute and relative paths. Relative paths are resolved from project root.
|
||||||
|
Returns compact file list with optional content. Use offset/limit for large file pagination.`,
|
||||||
inputSchema: {
|
inputSchema: {
|
||||||
type: 'object',
|
type: 'object',
|
||||||
properties: {
|
properties: {
|
||||||
@@ -213,6 +265,16 @@ Returns compact file list with optional content.`,
|
|||||||
description: `Max number of files to return (default: ${MAX_FILES})`,
|
description: `Max number of files to return (default: ${MAX_FILES})`,
|
||||||
default: MAX_FILES,
|
default: MAX_FILES,
|
||||||
},
|
},
|
||||||
|
offset: {
|
||||||
|
type: 'number',
|
||||||
|
description: 'Line offset to start reading from (0-based, for single file only)',
|
||||||
|
minimum: 0,
|
||||||
|
},
|
||||||
|
limit: {
|
||||||
|
type: 'number',
|
||||||
|
description: 'Number of lines to read (for single file only)',
|
||||||
|
minimum: 1,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
required: ['paths'],
|
required: ['paths'],
|
||||||
},
|
},
|
||||||
@@ -232,6 +294,8 @@ export async function handler(params: Record<string, unknown>): Promise<ToolResu
|
|||||||
maxDepth,
|
maxDepth,
|
||||||
includeContent,
|
includeContent,
|
||||||
maxFiles,
|
maxFiles,
|
||||||
|
offset,
|
||||||
|
limit,
|
||||||
} = parsed.data;
|
} = parsed.data;
|
||||||
|
|
||||||
const cwd = getProjectRoot();
|
const cwd = getProjectRoot();
|
||||||
@@ -271,6 +335,10 @@ export async function handler(params: Record<string, unknown>): Promise<ToolResu
|
|||||||
const files: FileEntry[] = [];
|
const files: FileEntry[] = [];
|
||||||
let totalContent = 0;
|
let totalContent = 0;
|
||||||
|
|
||||||
|
// Only apply offset/limit for single file mode
|
||||||
|
const isSingleFile = limitedFiles.length === 1;
|
||||||
|
const useLinePagination = isSingleFile && (offset !== undefined || limit !== undefined);
|
||||||
|
|
||||||
for (const filePath of limitedFiles) {
|
for (const filePath of limitedFiles) {
|
||||||
if (totalContent >= MAX_TOTAL_CONTENT) break;
|
if (totalContent >= MAX_TOTAL_CONTENT) break;
|
||||||
|
|
||||||
@@ -283,7 +351,15 @@ export async function handler(params: Record<string, unknown>): Promise<ToolResu
|
|||||||
if (includeContent) {
|
if (includeContent) {
|
||||||
const remainingSpace = MAX_TOTAL_CONTENT - totalContent;
|
const remainingSpace = MAX_TOTAL_CONTENT - totalContent;
|
||||||
const maxLen = Math.min(MAX_CONTENT_LENGTH, remainingSpace);
|
const maxLen = Math.min(MAX_CONTENT_LENGTH, remainingSpace);
|
||||||
const { content, truncated } = readFileContent(filePath, maxLen);
|
|
||||||
|
// Pass offset/limit only for single file mode
|
||||||
|
const readOptions: ReadContentOptions = { maxLength: maxLen };
|
||||||
|
if (useLinePagination) {
|
||||||
|
if (offset !== undefined) readOptions.offset = offset;
|
||||||
|
if (limit !== undefined) readOptions.limit = limit;
|
||||||
|
}
|
||||||
|
|
||||||
|
const { content, truncated, totalLines, lineRange } = readFileContent(filePath, readOptions);
|
||||||
|
|
||||||
// If contentPattern provided, only include files with matches
|
// If contentPattern provided, only include files with matches
|
||||||
if (contentPattern) {
|
if (contentPattern) {
|
||||||
@@ -292,6 +368,8 @@ export async function handler(params: Record<string, unknown>): Promise<ToolResu
|
|||||||
entry.matches = matches;
|
entry.matches = matches;
|
||||||
entry.content = content;
|
entry.content = content;
|
||||||
entry.truncated = truncated;
|
entry.truncated = truncated;
|
||||||
|
entry.totalLines = totalLines;
|
||||||
|
entry.lineRange = lineRange;
|
||||||
totalContent += content.length;
|
totalContent += content.length;
|
||||||
} else {
|
} else {
|
||||||
continue; // Skip files without matches
|
continue; // Skip files without matches
|
||||||
@@ -299,6 +377,8 @@ export async function handler(params: Record<string, unknown>): Promise<ToolResu
|
|||||||
} else {
|
} else {
|
||||||
entry.content = content;
|
entry.content = content;
|
||||||
entry.truncated = truncated;
|
entry.truncated = truncated;
|
||||||
|
entry.totalLines = totalLines;
|
||||||
|
entry.lineRange = lineRange;
|
||||||
totalContent += content.length;
|
totalContent += content.length;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -311,6 +391,10 @@ export async function handler(params: Record<string, unknown>): Promise<ToolResu
|
|||||||
if (totalFiles > maxFiles) {
|
if (totalFiles > maxFiles) {
|
||||||
message += ` (showing ${maxFiles} of ${totalFiles})`;
|
message += ` (showing ${maxFiles} of ${totalFiles})`;
|
||||||
}
|
}
|
||||||
|
if (useLinePagination && files.length > 0 && files[0].lineRange) {
|
||||||
|
const { start, end } = files[0].lineRange;
|
||||||
|
message += ` [lines ${start}-${end} of ${files[0].totalLines}]`;
|
||||||
|
}
|
||||||
if (contentPattern) {
|
if (contentPattern) {
|
||||||
message += ` matching "${contentPattern}"`;
|
message += ` matching "${contentPattern}"`;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -45,6 +45,7 @@ const ParamsSchema = z.object({
|
|||||||
// Search modifiers for ripgrep mode
|
// Search modifiers for ripgrep mode
|
||||||
regex: z.boolean().default(true), // Use regex pattern matching (default: enabled)
|
regex: z.boolean().default(true), // Use regex pattern matching (default: enabled)
|
||||||
caseSensitive: z.boolean().default(true), // Case sensitivity (default: case-sensitive)
|
caseSensitive: z.boolean().default(true), // Case sensitivity (default: case-sensitive)
|
||||||
|
tokenize: z.boolean().default(true), // Tokenize multi-word queries for OR matching (default: enabled)
|
||||||
// Fuzzy matching is implicit in hybrid mode (RRF fusion)
|
// Fuzzy matching is implicit in hybrid mode (RRF fusion)
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -96,6 +97,87 @@ function buildExcludeArgs(): string[] {
|
|||||||
return args;
|
return args;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tokenize query for multi-word OR matching
|
||||||
|
* Splits on whitespace and common delimiters, filters stop words and short tokens
|
||||||
|
* @param query - The search query
|
||||||
|
* @returns Array of tokens
|
||||||
|
*/
|
||||||
|
function tokenizeQuery(query: string): string[] {
|
||||||
|
// Stop words for filtering (common English + programming keywords)
|
||||||
|
const stopWords = new Set([
|
||||||
|
'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
|
||||||
|
'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
|
||||||
|
'should', 'may', 'might', 'must', 'can', 'to', 'of', 'in', 'for', 'on',
|
||||||
|
'with', 'at', 'by', 'from', 'as', 'into', 'through', 'and', 'but', 'if',
|
||||||
|
'or', 'not', 'this', 'that', 'these', 'those', 'it', 'its', 'how', 'what',
|
||||||
|
'where', 'when', 'why', 'which', 'who', 'whom',
|
||||||
|
]);
|
||||||
|
|
||||||
|
// Split on whitespace and common delimiters, keep meaningful tokens
|
||||||
|
const tokens = query
|
||||||
|
.split(/[\s,;:]+/)
|
||||||
|
.map(token => token.trim())
|
||||||
|
.filter(token => {
|
||||||
|
// Keep tokens that are:
|
||||||
|
// - At least 2 characters long
|
||||||
|
// - Not a stop word (case-insensitive)
|
||||||
|
// - Or look like identifiers (contain underscore/camelCase)
|
||||||
|
if (token.length < 2) return false;
|
||||||
|
if (stopWords.has(token.toLowerCase()) && !token.includes('_') && !/[A-Z]/.test(token)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Score results based on token match count for ranking
|
||||||
|
* @param results - Search results
|
||||||
|
* @param tokens - Query tokens
|
||||||
|
* @returns Results with match scores
|
||||||
|
*/
|
||||||
|
function scoreByTokenMatch(results: ExactMatch[], tokens: string[]): ExactMatch[] {
|
||||||
|
if (tokens.length <= 1) return results;
|
||||||
|
|
||||||
|
// Create case-insensitive patterns for each token
|
||||||
|
const tokenPatterns = tokens.map(t => {
|
||||||
|
const escaped = t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||||
|
return new RegExp(escaped, 'i');
|
||||||
|
});
|
||||||
|
|
||||||
|
return results.map(r => {
|
||||||
|
const content = r.content || '';
|
||||||
|
const file = r.file || '';
|
||||||
|
const searchText = `${file} ${content}`;
|
||||||
|
|
||||||
|
// Count how many tokens match
|
||||||
|
let matchCount = 0;
|
||||||
|
for (const pattern of tokenPatterns) {
|
||||||
|
if (pattern.test(searchText)) {
|
||||||
|
matchCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate match ratio (0 to 1)
|
||||||
|
const matchRatio = matchCount / tokens.length;
|
||||||
|
|
||||||
|
return {
|
||||||
|
...r,
|
||||||
|
matchScore: matchRatio,
|
||||||
|
matchCount,
|
||||||
|
};
|
||||||
|
}).sort((a, b) => {
|
||||||
|
// Sort by match ratio (descending), then by line number
|
||||||
|
if (b.matchScore !== a.matchScore) {
|
||||||
|
return b.matchScore - a.matchScore;
|
||||||
|
}
|
||||||
|
return (a.line || 0) - (b.line || 0);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
interface Classification {
|
interface Classification {
|
||||||
mode: string;
|
mode: string;
|
||||||
confidence: number;
|
confidence: number;
|
||||||
@@ -107,6 +189,8 @@ interface ExactMatch {
|
|||||||
line: number;
|
line: number;
|
||||||
column: number;
|
column: number;
|
||||||
content: string;
|
content: string;
|
||||||
|
matchScore?: number; // Token match ratio (0-1) for multi-word queries
|
||||||
|
matchCount?: number; // Number of tokens matched
|
||||||
}
|
}
|
||||||
|
|
||||||
interface RelationshipInfo {
|
interface RelationshipInfo {
|
||||||
@@ -162,6 +246,9 @@ interface SearchMetadata {
|
|||||||
index_status?: 'indexed' | 'not_indexed' | 'partial';
|
index_status?: 'indexed' | 'not_indexed' | 'partial';
|
||||||
fallback_history?: string[];
|
fallback_history?: string[];
|
||||||
suggested_weights?: Record<string, number>;
|
suggested_weights?: Record<string, number>;
|
||||||
|
// Tokenization metadata (ripgrep mode)
|
||||||
|
tokens?: string[]; // Query tokens used for multi-word search
|
||||||
|
tokenized?: boolean; // Whether tokenization was applied
|
||||||
// Pagination metadata
|
// Pagination metadata
|
||||||
pagination?: PaginationInfo;
|
pagination?: PaginationInfo;
|
||||||
// Init action specific
|
// Init action specific
|
||||||
@@ -373,8 +460,9 @@ function checkToolAvailability(toolName: string): boolean {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Build ripgrep command arguments
|
* Build ripgrep command arguments
|
||||||
|
* Supports tokenized multi-word queries with OR matching
|
||||||
* @param params - Search parameters
|
* @param params - Search parameters
|
||||||
* @returns Command and arguments
|
* @returns Command, arguments, and tokens used
|
||||||
*/
|
*/
|
||||||
function buildRipgrepCommand(params: {
|
function buildRipgrepCommand(params: {
|
||||||
query: string;
|
query: string;
|
||||||
@@ -384,8 +472,9 @@ function buildRipgrepCommand(params: {
|
|||||||
includeHidden: boolean;
|
includeHidden: boolean;
|
||||||
regex?: boolean;
|
regex?: boolean;
|
||||||
caseSensitive?: boolean;
|
caseSensitive?: boolean;
|
||||||
}): { command: string; args: string[] } {
|
tokenize?: boolean;
|
||||||
const { query, paths = ['.'], contextLines = 0, maxResults = 10, includeHidden = false, regex = false, caseSensitive = true } = params;
|
}): { command: string; args: string[]; tokens: string[] } {
|
||||||
|
const { query, paths = ['.'], contextLines = 0, maxResults = 10, includeHidden = false, regex = false, caseSensitive = true, tokenize = true } = params;
|
||||||
|
|
||||||
const args = [
|
const args = [
|
||||||
'-n',
|
'-n',
|
||||||
@@ -415,16 +504,33 @@ function buildRipgrepCommand(params: {
|
|||||||
args.push('--hidden');
|
args.push('--hidden');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Regex mode (-e) vs fixed string mode (-F)
|
// Tokenize query for multi-word OR matching
|
||||||
if (regex) {
|
const tokens = tokenize ? tokenizeQuery(query) : [query];
|
||||||
args.push('-e', query);
|
|
||||||
|
if (tokens.length > 1) {
|
||||||
|
// Multi-token: use multiple -e patterns (OR matching)
|
||||||
|
// Each token is escaped for regex safety unless regex mode is enabled
|
||||||
|
for (const token of tokens) {
|
||||||
|
if (regex) {
|
||||||
|
args.push('-e', token);
|
||||||
|
} else {
|
||||||
|
// Escape regex special chars for literal matching
|
||||||
|
const escaped = token.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||||
|
args.push('-e', escaped);
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
args.push('-F', query);
|
// Single token or no tokenization: use original behavior
|
||||||
|
if (regex) {
|
||||||
|
args.push('-e', query);
|
||||||
|
} else {
|
||||||
|
args.push('-F', query);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
args.push(...paths);
|
args.push(...paths);
|
||||||
|
|
||||||
return { command: 'rg', args };
|
return { command: 'rg', args, tokens };
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -578,9 +684,10 @@ async function executeAutoMode(params: Params): Promise<SearchResult> {
|
|||||||
/**
|
/**
|
||||||
* Mode: ripgrep - Fast literal string matching using ripgrep
|
* Mode: ripgrep - Fast literal string matching using ripgrep
|
||||||
* No index required, fallback to CodexLens if ripgrep unavailable
|
* No index required, fallback to CodexLens if ripgrep unavailable
|
||||||
|
* Supports tokenized multi-word queries with OR matching and result ranking
|
||||||
*/
|
*/
|
||||||
async function executeRipgrepMode(params: Params): Promise<SearchResult> {
|
async function executeRipgrepMode(params: Params): Promise<SearchResult> {
|
||||||
const { query, paths = [], contextLines = 0, maxResults = 10, includeHidden = false, path = '.', regex = true, caseSensitive = true } = params;
|
const { query, paths = [], contextLines = 0, maxResults = 10, includeHidden = false, path = '.', regex = true, caseSensitive = true, tokenize = true } = params;
|
||||||
|
|
||||||
if (!query) {
|
if (!query) {
|
||||||
return {
|
return {
|
||||||
@@ -648,7 +755,7 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Use ripgrep
|
// Use ripgrep
|
||||||
const { command, args } = buildRipgrepCommand({
|
const { command, args, tokens } = buildRipgrepCommand({
|
||||||
query,
|
query,
|
||||||
paths: paths.length > 0 ? paths : [path],
|
paths: paths.length > 0 ? paths : [path],
|
||||||
contextLines,
|
contextLines,
|
||||||
@@ -656,6 +763,7 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
|
|||||||
includeHidden,
|
includeHidden,
|
||||||
regex,
|
regex,
|
||||||
caseSensitive,
|
caseSensitive,
|
||||||
|
tokenize,
|
||||||
});
|
});
|
||||||
|
|
||||||
return new Promise((resolve) => {
|
return new Promise((resolve) => {
|
||||||
@@ -704,15 +812,21 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
|
|||||||
// If we have results despite the error, return them as partial success
|
// If we have results despite the error, return them as partial success
|
||||||
const isWindowsDeviceError = stderr.includes('os error 1') || stderr.includes('函数不正确');
|
const isWindowsDeviceError = stderr.includes('os error 1') || stderr.includes('函数不正确');
|
||||||
|
|
||||||
if (code === 0 || code === 1 || (isWindowsDeviceError && results.length > 0)) {
|
// Apply token-based scoring and sorting for multi-word queries
|
||||||
|
// Results matching more tokens are ranked higher (exact matches first)
|
||||||
|
const scoredResults = tokens.length > 1 ? scoreByTokenMatch(results, tokens) : results;
|
||||||
|
|
||||||
|
if (code === 0 || code === 1 || (isWindowsDeviceError && scoredResults.length > 0)) {
|
||||||
resolve({
|
resolve({
|
||||||
success: true,
|
success: true,
|
||||||
results,
|
results: scoredResults,
|
||||||
metadata: {
|
metadata: {
|
||||||
mode: 'ripgrep',
|
mode: 'ripgrep',
|
||||||
backend: 'ripgrep',
|
backend: 'ripgrep',
|
||||||
count: results.length,
|
count: scoredResults.length,
|
||||||
query,
|
query,
|
||||||
|
tokens: tokens.length > 1 ? tokens : undefined, // Include tokens in metadata for debugging
|
||||||
|
tokenized: tokens.length > 1,
|
||||||
...(isWindowsDeviceError && { warning: 'Some Windows device files were skipped' }),
|
...(isWindowsDeviceError && { warning: 'Some Windows device files were skipped' }),
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
@@ -1310,12 +1424,17 @@ export const schema: ToolSchema = {
|
|||||||
smart_search(query="auth", limit=10, offset=0) # first page
|
smart_search(query="auth", limit=10, offset=0) # first page
|
||||||
smart_search(query="auth", limit=10, offset=10) # second page
|
smart_search(query="auth", limit=10, offset=10) # second page
|
||||||
|
|
||||||
|
**Multi-Word Search (ripgrep mode with tokenization):**
|
||||||
|
smart_search(query="CCW_PROJECT_ROOT CCW_ALLOWED_DIRS", mode="ripgrep") # tokenized OR matching
|
||||||
|
smart_search(query="auth login user", mode="ripgrep") # matches any token, ranks by match count
|
||||||
|
smart_search(query="exact phrase", mode="ripgrep", tokenize=false) # disable tokenization
|
||||||
|
|
||||||
**Regex Search (ripgrep mode):**
|
**Regex Search (ripgrep mode):**
|
||||||
smart_search(query="class.*Builder") # auto-detects regex pattern
|
smart_search(query="class.*Builder") # auto-detects regex pattern
|
||||||
smart_search(query="def.*\\(.*\\):") # find function definitions
|
smart_search(query="def.*\\(.*\\):") # find function definitions
|
||||||
smart_search(query="import.*from", caseSensitive=false) # case-insensitive
|
smart_search(query="import.*from", caseSensitive=false) # case-insensitive
|
||||||
|
|
||||||
**Modes:** auto (intelligent routing), hybrid (semantic+fuzzy), exact (FTS), ripgrep (fast), priority (fallback chain)`,
|
**Modes:** auto (intelligent routing), hybrid (semantic+fuzzy), exact (FTS), ripgrep (fast with tokenization), priority (fallback chain)`,
|
||||||
inputSchema: {
|
inputSchema: {
|
||||||
type: 'object',
|
type: 'object',
|
||||||
properties: {
|
properties: {
|
||||||
@@ -1402,6 +1521,11 @@ export const schema: ToolSchema = {
|
|||||||
description: 'Case-sensitive search (default: true). Set to false for case-insensitive matching.',
|
description: 'Case-sensitive search (default: true). Set to false for case-insensitive matching.',
|
||||||
default: true,
|
default: true,
|
||||||
},
|
},
|
||||||
|
tokenize: {
|
||||||
|
type: 'boolean',
|
||||||
|
description: 'Tokenize multi-word queries for OR matching (ripgrep mode). Default: true. Results are ranked by token match count (exact matches first).',
|
||||||
|
default: true,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
required: [],
|
required: [],
|
||||||
},
|
},
|
||||||
|
|||||||
Reference in New Issue
Block a user