feat: Add comprehensive tests for contentPattern and glob pattern matching

- Implemented final verification tests for contentPattern to validate behavior with empty strings, dangerous patterns, and normal patterns. - Created glob pattern matching tests to verify regex conversion and matching functionality. - Developed infinite loop risk tests using Worker threads to isolate potential blocking operations. - Introduced optimized contentPattern tests to validate improvements in the findMatches function. - Added verification tests to assess the effectiveness of contentPattern optimizations. - Conducted safety tests for contentPattern to identify edge cases and potential vulnerabilities. - Implemented unrestricted loop tests to analyze infinite loop risks without match limits. - Developed tests for zero-width pattern detection logic to ensure proper handling of dangerous regex patterns.
2026-03-02 15:23:19 +08:00 · 2026-02-09 11:13:01 +08:00
parent dfe153778c
commit 964292ebdb
62 changed files with 7588 additions and 374 deletions
--- a/ccw/src/tools/read-file.ts
+++ b/ccw/src/tools/read-file.ts
@@ -1,417 +1,108 @@
 /**
- * Read File Tool - Read files with multi-file, directory, and regex support
+ * Read File Tool - Single file precise reading with optional line pagination
 *
 * Features:
- * - Read single or multiple files
- * - Read all files in a directory (with depth control)
- * - Filter files by glob/regex pattern
- * - Content search with regex
- * - Compact output format
+ * - Read a single file with full content
+ * - Line-based pagination with offset/limit
+ * - Binary file detection
 */

 import { z } from 'zod';
 import type { ToolSchema, ToolResult } from '../types/tool.js';
-import { readFileSync, readdirSync, statSync, existsSync } from 'fs';
-import { resolve, isAbsolute, join, relative, extname } from 'path';
+import { existsSync, statSync } from 'fs';
+import { relative } from 'path';
 import { validatePath, getProjectRoot } from '../utils/path-validator.js';
+import {
+  MAX_CONTENT_LENGTH,
+  readFileContent,
+  type FileEntry,
+  type ReadResult,
+} from '../utils/file-reader.js';

-// Max content per file (truncate if larger)
-const MAX_CONTENT_LENGTH = 5000;
-// Max files to return
-const MAX_FILES = 50;
-// Max total content length
-const MAX_TOTAL_CONTENT = 50000;
-
-// Define Zod schema for validation
 const ParamsSchema = z.object({
-  paths: z.union([z.string(), z.array(z.string())]).describe('File path(s) or directory'),
-  pattern: z.string().optional().describe('Glob pattern to filter files (e.g., "*.ts", "**/*.js")'),
-  contentPattern: z.string().optional().describe('Regex to search within file content'),
-  maxDepth: z.number().default(3).describe('Max directory depth to traverse'),
-  includeContent: z.boolean().default(true).describe('Include file content in result'),
-  maxFiles: z.number().default(MAX_FILES).describe('Max number of files to return'),
-  offset: z.number().min(0).optional().describe('Line offset to start reading from (0-based, for single file only)'),
-  limit: z.number().min(1).optional().describe('Number of lines to read (for single file only)'),
-}).refine((data) => {
-  // Validate: offset/limit only allowed for single file mode
-  const hasPagination = data.offset !== undefined || data.limit !== undefined;
-  const isMultiple = Array.isArray(data.paths) && data.paths.length > 1;
-  return !(hasPagination && isMultiple);
-}, {
-  message: 'offset/limit parameters are only supported for single file mode. Cannot use with multiple paths.',
-  path: ['offset', 'limit', 'paths'],
+  path: z.string().describe('Single file path to read'),
+  offset: z.number().min(0).optional().describe('Line offset to start reading from (0-based)'),
+  limit: z.number().min(1).optional().describe('Number of lines to read'),
 });

 type Params = z.infer<typeof ParamsSchema>;

-interface FileEntry {
-  path: string;
-  size: number;
-  content?: string;
-  truncated?: boolean;
-  matches?: string[];
-  totalLines?: number;
-  lineRange?: { start: number; end: number };
-}
-
-interface ReadResult {
-  files: FileEntry[];
-  totalFiles: number;
-  message: string;
-}
-
-// Common binary extensions to skip
-const BINARY_EXTENSIONS = new Set([
-  '.png', '.jpg', '.jpeg', '.gif', '.bmp', '.ico', '.webp', '.svg',
-  '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
-  '.zip', '.tar', '.gz', '.rar', '.7z',
-  '.exe', '.dll', '.so', '.dylib',
-  '.mp3', '.mp4', '.wav', '.avi', '.mov',
-  '.woff', '.woff2', '.ttf', '.eot', '.otf',
-  '.pyc', '.class', '.o', '.obj',
-]);
-
-/**
- * Check if file is likely binary
- */
-function isBinaryFile(filePath: string): boolean {
-  const ext = extname(filePath).toLowerCase();
-  return BINARY_EXTENSIONS.has(ext);
-}
-
-/**
- * Convert glob pattern to regex
- */
-function globToRegex(pattern: string): RegExp {
-  const escaped = pattern
-    .replace(/[.+^${}()|[\]\\]/g, '\\$&')
-    .replace(/\*/g, '.*')
-    .replace(/\?/g, '.');
-  return new RegExp(`^${escaped}$`, 'i');
-}
-
-/**
- * Check if filename matches glob pattern
- */
-function matchesPattern(filename: string, pattern: string): boolean {
-  const regex = globToRegex(pattern);
-  return regex.test(filename);
-}
-
-/**
- * Recursively collect files from directory
- */
-function collectFiles(
-  dir: string,
-  pattern: string | undefined,
-  maxDepth: number,
-  currentDepth: number = 0
-): string[] {
-  if (currentDepth > maxDepth) return [];
-
-  const files: string[] = [];
-
-  try {
-    const entries = readdirSync(dir, { withFileTypes: true });
-
-    for (const entry of entries) {
-      // Skip hidden files/dirs and node_modules
-      if (entry.name.startsWith('.') || entry.name === 'node_modules') continue;
-
-      const fullPath = join(dir, entry.name);
-
-      if (entry.isDirectory()) {
-        files.push(...collectFiles(fullPath, pattern, maxDepth, currentDepth + 1));
-      } else if (entry.isFile()) {
-        if (!pattern || matchesPattern(entry.name, pattern)) {
-          files.push(fullPath);
-        }
-      }
-    }
-  } catch {
-    // Skip directories we can't read
-  }
-
-  return files;
-}
-
-interface ReadContentOptions {
-  maxLength: number;
-  offset?: number;
-  limit?: number;
-}
-
-interface ReadContentResult {
-  content: string;
-  truncated: boolean;
-  totalLines?: number;
-  lineRange?: { start: number; end: number };
-}
-
-/**
- * Read file content with truncation and optional line-based pagination
- */
-function readFileContent(filePath: string, options: ReadContentOptions): ReadContentResult {
-  const { maxLength, offset, limit } = options;
-
-  if (isBinaryFile(filePath)) {
-    return { content: '[Binary file]', truncated: false };
-  }
-
-  try {
-    const content = readFileSync(filePath, 'utf8');
-    const lines = content.split('\n');
-    const totalLines = lines.length;
-
-    // If offset/limit specified, use line-based pagination
-    if (offset !== undefined || limit !== undefined) {
-      const startLine = Math.min(offset ?? 0, totalLines);
-      const endLine = limit !== undefined ? Math.min(startLine + limit, totalLines) : totalLines;
-      const selectedLines = lines.slice(startLine, endLine);
-      const selectedContent = selectedLines.join('\n');
-
-      const actualEnd = endLine;
-      const hasMore = actualEnd < totalLines;
-
-      let finalContent = selectedContent;
-      if (selectedContent.length > maxLength) {
-        finalContent = selectedContent.substring(0, maxLength) + `\n... (+${selectedContent.length - maxLength} chars)`;
-      }
-
-      // Calculate actual line range (handle empty selection)
-      const actualLineEnd = selectedLines.length > 0 ? startLine + selectedLines.length - 1 : startLine;
-
-      return {
-        content: finalContent,
-        truncated: hasMore || selectedContent.length > maxLength,
-        totalLines,
-        lineRange: { start: startLine, end: actualLineEnd },
-      };
-    }
-
-    // Default behavior: truncate by character length
-    if (content.length > maxLength) {
-      return {
-        content: content.substring(0, maxLength) + `\n... (+${content.length - maxLength} chars)`,
-        truncated: true,
-        totalLines,
-      };
-    }
-    return { content, truncated: false, totalLines };
-  } catch (error) {
-    return { content: `[Error: ${(error as Error).message}]`, truncated: false };
-  }
-}
-
-/**
- * Find regex matches in content
- */
-function findMatches(content: string, pattern: string): string[] {
-  try {
-    const regex = new RegExp(pattern, 'gm');
-    const matches: string[] = [];
-    let match;
-
-    while ((match = regex.exec(content)) !== null && matches.length < 10) {
-      // Get line containing match
-      const lineStart = content.lastIndexOf('\n', match.index) + 1;
-      const lineEnd = content.indexOf('\n', match.index);
-      const line = content.substring(lineStart, lineEnd === -1 ? undefined : lineEnd).trim();
-      matches.push(line.substring(0, 200)); // Truncate long lines
-    }
-
-    return matches;
-  } catch {
-    return [];
-  }
-}
-
-// Tool schema for MCP
 export const schema: ToolSchema = {
  name: 'read_file',
-  description: `Read files with multi-file, directory, regex support, and line-based pagination.
+  description: `Read a single file with optional line-based pagination.

 Usage:
-  read_file(paths="file.ts")                              # Single file (full content)
-  read_file(paths="file.ts", offset=100, limit=50)        # Lines 100-149 (0-based)
-  read_file(paths=["a.ts", "b.ts"])                       # Multiple files
-  read_file(paths="src/", pattern="*.ts")                 # Directory with pattern
-  read_file(paths="src/", contentPattern="TODO")          # Search content
+  read_file(path="file.ts")                        # Full content
+  read_file(path="file.ts", offset=100, limit=50)  # Lines 100-149 (0-based)

 Supports both absolute and relative paths. Relative paths are resolved from project root.
-Returns compact file list with optional content. Use offset/limit for large file pagination.`,
+Use offset/limit for large file pagination.`,
  inputSchema: {
    type: 'object',
    properties: {
-      paths: {
-        oneOf: [
-          { type: 'string', description: 'Single file or directory path' },
-          { type: 'array', items: { type: 'string' }, description: 'Array of file paths' }
-        ],
-        description: 'File path(s) or directory to read',
-      },
-      pattern: {
+      path: {
        type: 'string',
-        description: 'Glob pattern to filter files (e.g., "*.ts", "*.{js,ts}")',
-      },
-      contentPattern: {
-        type: 'string',
-        description: 'Regex pattern to search within file content',
-      },
-      maxDepth: {
-        type: 'number',
-        description: 'Max directory depth to traverse (default: 3)',
-        default: 3,
-      },
-      includeContent: {
-        type: 'boolean',
-        description: 'Include file content in result (default: true)',
-        default: true,
-      },
-      maxFiles: {
-        type: 'number',
-        description: `Max number of files to return (default: ${MAX_FILES})`,
-        default: MAX_FILES,
+        description: 'Single file path to read',
      },
      offset: {
        type: 'number',
-        description: 'Line offset to start reading from (0-based). **Only for single file mode** - validation error if used with multiple paths.',
+        description: 'Line offset to start reading from (0-based)',
        minimum: 0,
      },
      limit: {
        type: 'number',
-        description: 'Number of lines to read. **Only for single file mode** - validation error if used with multiple paths.',
+        description: 'Number of lines to read',
        minimum: 1,
      },
    },
-    required: ['paths'],
+    required: ['path'],
  },
 };

-// Handler function
 export async function handler(params: Record<string, unknown>): Promise<ToolResult<ReadResult>> {
  const parsed = ParamsSchema.safeParse(params);
  if (!parsed.success) {
    return { success: false, error: `Invalid params: ${parsed.error.message}` };
  }

-  const {
-    paths,
-    pattern,
-    contentPattern,
-    maxDepth,
-    includeContent,
-    maxFiles,
+  const { path: filePath, offset, limit } = parsed.data;
+  const cwd = getProjectRoot();
+  const resolvedPath = await validatePath(filePath);
+
+  if (!existsSync(resolvedPath)) {
+    return { success: false, error: `File not found: ${filePath}` };
+  }
+
+  const stat = statSync(resolvedPath);
+  if (!stat.isFile()) {
+    return { success: false, error: `Not a file: ${filePath}. Use read_many_files for directories.` };
+  }
+
+  const { content, truncated, totalLines, lineRange } = readFileContent(resolvedPath, {
+    maxLength: MAX_CONTENT_LENGTH,
    offset,
    limit,
-  } = parsed.data;
+  });

-  const cwd = getProjectRoot();
+  const entry: FileEntry = {
+    path: relative(cwd, resolvedPath) || filePath,
+    size: stat.size,
+    content,
+    truncated,
+    totalLines,
+    lineRange,
+  };

-  // Normalize paths to array
-  const inputPaths = Array.isArray(paths) ? paths : [paths];
-
-  // Collect all files to read
-  const allFiles: string[] = [];
-
-  for (const inputPath of inputPaths) {
-    const resolvedPath = await validatePath(inputPath);
-
-    if (!existsSync(resolvedPath)) {
-      continue; // Skip non-existent paths
-    }
-
-    const stat = statSync(resolvedPath);
-
-    if (stat.isDirectory()) {
-      // Collect files from directory
-      const dirFiles = collectFiles(resolvedPath, pattern, maxDepth);
-      allFiles.push(...dirFiles);
-    } else if (stat.isFile()) {
-      // Add single file (check pattern if provided)
-      if (!pattern || matchesPattern(relative(cwd, resolvedPath), pattern)) {
-        allFiles.push(resolvedPath);
-      }
-    }
-  }
-
-  // Limit files
-  const limitedFiles = allFiles.slice(0, maxFiles);
-  const totalFiles = allFiles.length;
-
-  // Process files
-  const files: FileEntry[] = [];
-  let totalContent = 0;
-
-  // Only apply offset/limit for single file mode
-  const isSingleFile = limitedFiles.length === 1;
-  const useLinePagination = isSingleFile && (offset !== undefined || limit !== undefined);
-
-  for (const filePath of limitedFiles) {
-    if (totalContent >= MAX_TOTAL_CONTENT) break;
-
-    const stat = statSync(filePath);
-    const entry: FileEntry = {
-      path: relative(cwd, filePath) || filePath,
-      size: stat.size,
-    };
-
-    if (includeContent) {
-      const remainingSpace = MAX_TOTAL_CONTENT - totalContent;
-      const maxLen = Math.min(MAX_CONTENT_LENGTH, remainingSpace);
-
-      // Pass offset/limit only for single file mode
-      const readOptions: ReadContentOptions = { maxLength: maxLen };
-      if (useLinePagination) {
-        if (offset !== undefined) readOptions.offset = offset;
-        if (limit !== undefined) readOptions.limit = limit;
-      }
-
-      const { content, truncated, totalLines, lineRange } = readFileContent(filePath, readOptions);
-
-      // If contentPattern provided, only include files with matches
-      if (contentPattern) {
-        const matches = findMatches(content, contentPattern);
-        if (matches.length > 0) {
-          entry.matches = matches;
-          entry.content = content;
-          entry.truncated = truncated;
-          entry.totalLines = totalLines;
-          entry.lineRange = lineRange;
-          totalContent += content.length;
-        } else {
-          continue; // Skip files without matches
-        }
-      } else {
-        entry.content = content;
-        entry.truncated = truncated;
-        entry.totalLines = totalLines;
-        entry.lineRange = lineRange;
-        totalContent += content.length;
-      }
-    }
-
-    files.push(entry);
-  }
-
-  // Build message
-  let message = `Read ${files.length} file(s)`;
-  if (totalFiles > maxFiles) {
-    message += ` (showing ${maxFiles} of ${totalFiles})`;
-  }
-  if (useLinePagination && files.length > 0 && files[0].lineRange) {
-    const { start, end } = files[0].lineRange;
-    message += ` [lines ${start}-${end} of ${files[0].totalLines}]`;
-  }
-  if (contentPattern) {
-    message += ` matching "${contentPattern}"`;
+  let message = `Read 1 file`;
+  if (lineRange) {
+    message += ` [lines ${lineRange.start}-${lineRange.end} of ${totalLines}]`;
  }

  return {
    success: true,
    result: {
-      files,
-      totalFiles,
+      files: [entry],
+      totalFiles: 1,
      message,
    },
  };