From 6d3f10d1d7b60d26ae244bde1f3f5b3e9b5b96fb Mon Sep 17 00:00:00 2001
From: catlog22 <catlog22@github.com>
Date: Sun, 21 Dec 2025 21:45:04 +0800
Subject: [PATCH] =?UTF-8?q?feat:=20=E5=A2=9E=E5=8A=A0=E6=96=87=E4=BB=B6?=
 =?UTF-8?q?=E8=AF=BB=E5=8F=96=E5=8A=9F=E8=83=BD=E7=9A=84=E8=A1=8C=E5=88=86?=
 =?UTF-8?q?=E9=A1=B5=E6=94=AF=E6=8C=81=EF=BC=8C=E4=BC=98=E5=8C=96=E6=99=BA?=
 =?UTF-8?q?=E8=83=BD=E6=90=9C=E7=B4=A2=E7=9A=84=E5=A4=9A=E8=AF=8D=E6=9F=A5?=
 =?UTF-8?q?=E8=AF=A2=E5=8C=B9=E9=85=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../dashboard-js/views/mcp-manager.js         |   8 +-
 ccw/src/tools/read-file.ts                    | 106 ++++++++++--
 ccw/src/tools/smart-search.ts                 | 152 ++++++++++++++++--
 3 files changed, 237 insertions(+), 29 deletions(-)
diff --git a/ccw/src/templates/dashboard-js/views/mcp-manager.js b/ccw/src/templates/dashboard-js/views/mcp-manager.js
index 55248cc4..fb0a5c70 100644
--- a/ccw/src/templates/dashboard-js/views/mcp-manager.js
+++ b/ccw/src/templates/dashboard-js/views/mcp-manager.js
@@ -256,7 +256,7 @@ async function renderMcpManager() {
                   </div>
                   <div class="grid grid-cols-1 gap-2">
                     <div class="flex items-center gap-2">
-                      <label class="text-xs text-muted-foreground w-28 shrink-0">CCW_PROJECT_ROOT</label>
+                      <label class="text-xs text-muted-foreground w-36 shrink-0">CCW_PROJECT_ROOT</label>
                       <input type="text"
                              class="ccw-project-root-input flex-1 px-2 py-1 text-xs bg-background border border-border rounded focus:outline-none focus:ring-1 focus:ring-primary"
                              placeholder="${projectPath || t('mcp.useCurrentDir')}"
@@ -268,7 +268,7 @@ async function renderMcpManager() {
                       </button>
                     </div>
                     <div class="flex items-center gap-2">
-                      <label class="text-xs text-muted-foreground w-28 shrink-0">CCW_ALLOWED_DIRS</label>
+                      <label class="text-xs text-muted-foreground w-36 shrink-0">CCW_ALLOWED_DIRS</label>
                       <input type="text"
                              class="ccw-allowed-dirs-input flex-1 px-2 py-1 text-xs bg-background border border-border rounded focus:outline-none focus:ring-1 focus:ring-primary"
                              placeholder="${t('mcp.allowedDirsPlaceholder')}"
@@ -470,7 +470,7 @@ async function renderMcpManager() {
                   </div>
                   <div class="grid grid-cols-1 gap-2">
                     <div class="flex items-center gap-2">
-                      <label class="text-xs text-muted-foreground w-28 shrink-0">CCW_PROJECT_ROOT</label>
+                      <label class="text-xs text-muted-foreground w-36 shrink-0">CCW_PROJECT_ROOT</label>
                       <input type="text"
                              class="ccw-project-root-input flex-1 px-2 py-1 text-xs bg-background border border-border rounded focus:outline-none focus:ring-1 focus:ring-primary"
                              placeholder="${projectPath || t('mcp.useCurrentDir')}"
@@ -482,7 +482,7 @@ async function renderMcpManager() {
                       </button>
                     </div>
                     <div class="flex items-center gap-2">
-                      <label class="text-xs text-muted-foreground w-28 shrink-0">CCW_ALLOWED_DIRS</label>
+                      <label class="text-xs text-muted-foreground w-36 shrink-0">CCW_ALLOWED_DIRS</label>
                       <input type="text"
                              class="ccw-allowed-dirs-input flex-1 px-2 py-1 text-xs bg-background border border-border rounded focus:outline-none focus:ring-1 focus:ring-primary"
                              placeholder="${t('mcp.allowedDirsPlaceholder')}"
diff --git a/ccw/src/tools/read-file.ts b/ccw/src/tools/read-file.ts
index 15316207..a58c31ba 100644
--- a/ccw/src/tools/read-file.ts
+++ b/ccw/src/tools/read-file.ts
@@ -30,6 +30,8 @@ const ParamsSchema = z.object({
   maxDepth: z.number().default(3).describe('Max directory depth to traverse'),
   includeContent: z.boolean().default(true).describe('Include file content in result'),
   maxFiles: z.number().default(MAX_FILES).describe('Max number of files to return'),
+  offset: z.number().min(0).optional().describe('Line offset to start reading from (0-based, for single file only)'),
+  limit: z.number().min(1).optional().describe('Number of lines to read (for single file only)'),
 });
 
 type Params = z.infer<typeof ParamsSchema>;
@@ -40,6 +42,8 @@ interface FileEntry {
   content?: string;
   truncated?: boolean;
   matches?: string[];
+  totalLines?: number;
+  lineRange?: { start: number; end: number };
 }
 
 interface ReadResult {
@@ -123,23 +127,69 @@ function collectFiles(
   return files;
 }
 
+interface ReadContentOptions {
+  maxLength: number;
+  offset?: number;
+  limit?: number;
+}
+
+interface ReadContentResult {
+  content: string;
+  truncated: boolean;
+  totalLines?: number;
+  lineRange?: { start: number; end: number };
+}
+
 /**
- * Read file content with truncation
+ * Read file content with truncation and optional line-based pagination
  */
-function readFileContent(filePath: string, maxLength: number): { content: string; truncated: boolean } {
+function readFileContent(filePath: string, options: ReadContentOptions): ReadContentResult {
+  const { maxLength, offset, limit } = options;
+
   if (isBinaryFile(filePath)) {
     return { content: '[Binary file]', truncated: false };
   }
 
   try {
     const content = readFileSync(filePath, 'utf8');
+    const lines = content.split('\n');
+    const totalLines = lines.length;
+
+    // If offset/limit specified, use line-based pagination
+    if (offset !== undefined || limit !== undefined) {
+      const startLine = Math.min(offset ?? 0, totalLines);
+      const endLine = limit !== undefined ? Math.min(startLine + limit, totalLines) : totalLines;
+      const selectedLines = lines.slice(startLine, endLine);
+      const selectedContent = selectedLines.join('\n');
+
+      const actualEnd = endLine;
+      const hasMore = actualEnd < totalLines;
+
+      let finalContent = selectedContent;
+      if (selectedContent.length > maxLength) {
+        finalContent = selectedContent.substring(0, maxLength) + `\n... (+${selectedContent.length - maxLength} chars)`;
+      }
+
+      // Calculate actual line range (handle empty selection)
+      const actualLineEnd = selectedLines.length > 0 ? startLine + selectedLines.length - 1 : startLine;
+
+      return {
+        content: finalContent,
+        truncated: hasMore || selectedContent.length > maxLength,
+        totalLines,
+        lineRange: { start: startLine, end: actualLineEnd },
+      };
+    }
+
+    // Default behavior: truncate by character length
     if (content.length > maxLength) {
       return {
         content: content.substring(0, maxLength) + `\n... (+${content.length - maxLength} chars)`,
-        truncated: true
+        truncated: true,
+        totalLines,
       };
     }
-    return { content, truncated: false };
+    return { content, truncated: false, totalLines };
   } catch (error) {
     return { content: `[Error: ${(error as Error).message}]`, truncated: false };
   }
@@ -171,15 +221,17 @@ function findMatches(content: string, pattern: string): string[] {
 // Tool schema for MCP
 export const schema: ToolSchema = {
   name: 'read_file',
-  description: `Read files with multi-file, directory, and regex support.
+  description: `Read files with multi-file, directory, regex support, and line-based pagination.
 
 Usage:
-  read_file(paths="file.ts")                    # Single file
-  read_file(paths=["a.ts", "b.ts"])             # Multiple files
-  read_file(paths="src/", pattern="*.ts")       # Directory with pattern
-  read_file(paths="src/", contentPattern="TODO")  # Search content
+  read_file(paths="file.ts")                              # Single file (full content)
+  read_file(paths="file.ts", offset=100, limit=50)        # Lines 100-149 (0-based)
+  read_file(paths=["a.ts", "b.ts"])                       # Multiple files
+  read_file(paths="src/", pattern="*.ts")                 # Directory with pattern
+  read_file(paths="src/", contentPattern="TODO")          # Search content
 
-Returns compact file list with optional content.`,
+Supports both absolute and relative paths. Relative paths are resolved from project root.
+Returns compact file list with optional content. Use offset/limit for large file pagination.`,
   inputSchema: {
     type: 'object',
     properties: {
@@ -213,6 +265,16 @@ Returns compact file list with optional content.`,
         description: `Max number of files to return (default: ${MAX_FILES})`,
         default: MAX_FILES,
       },
+      offset: {
+        type: 'number',
+        description: 'Line offset to start reading from (0-based, for single file only)',
+        minimum: 0,
+      },
+      limit: {
+        type: 'number',
+        description: 'Number of lines to read (for single file only)',
+        minimum: 1,
+      },
     },
     required: ['paths'],
   },
@@ -232,6 +294,8 @@ export async function handler(params: Record<string, unknown>): Promise<ToolResu
     maxDepth,
     includeContent,
     maxFiles,
+    offset,
+    limit,
   } = parsed.data;
 
   const cwd = getProjectRoot();
@@ -271,6 +335,10 @@ export async function handler(params: Record<string, unknown>): Promise<ToolResu
   const files: FileEntry[] = [];
   let totalContent = 0;
 
+  // Only apply offset/limit for single file mode
+  const isSingleFile = limitedFiles.length === 1;
+  const useLinePagination = isSingleFile && (offset !== undefined || limit !== undefined);
+
   for (const filePath of limitedFiles) {
     if (totalContent >= MAX_TOTAL_CONTENT) break;
 
@@ -283,7 +351,15 @@ export async function handler(params: Record<string, unknown>): Promise<ToolResu
     if (includeContent) {
       const remainingSpace = MAX_TOTAL_CONTENT - totalContent;
       const maxLen = Math.min(MAX_CONTENT_LENGTH, remainingSpace);
-      const { content, truncated } = readFileContent(filePath, maxLen);
+
+      // Pass offset/limit only for single file mode
+      const readOptions: ReadContentOptions = { maxLength: maxLen };
+      if (useLinePagination) {
+        if (offset !== undefined) readOptions.offset = offset;
+        if (limit !== undefined) readOptions.limit = limit;
+      }
+
+      const { content, truncated, totalLines, lineRange } = readFileContent(filePath, readOptions);
 
       // If contentPattern provided, only include files with matches
       if (contentPattern) {
@@ -292,6 +368,8 @@ export async function handler(params: Record<string, unknown>): Promise<ToolResu
           entry.matches = matches;
           entry.content = content;
           entry.truncated = truncated;
+          entry.totalLines = totalLines;
+          entry.lineRange = lineRange;
           totalContent += content.length;
         } else {
           continue; // Skip files without matches
@@ -299,6 +377,8 @@ export async function handler(params: Record<string, unknown>): Promise<ToolResu
       } else {
         entry.content = content;
         entry.truncated = truncated;
+        entry.totalLines = totalLines;
+        entry.lineRange = lineRange;
         totalContent += content.length;
       }
     }
@@ -311,6 +391,10 @@ export async function handler(params: Record<string, unknown>): Promise<ToolResu
   if (totalFiles > maxFiles) {
     message += ` (showing ${maxFiles} of ${totalFiles})`;
   }
+  if (useLinePagination && files.length > 0 && files[0].lineRange) {
+    const { start, end } = files[0].lineRange;
+    message += ` [lines ${start}-${end} of ${files[0].totalLines}]`;
+  }
   if (contentPattern) {
     message += ` matching "${contentPattern}"`;
   }
diff --git a/ccw/src/tools/smart-search.ts b/ccw/src/tools/smart-search.ts
index e18c8545..f609de77 100644
--- a/ccw/src/tools/smart-search.ts
+++ b/ccw/src/tools/smart-search.ts
@@ -45,6 +45,7 @@ const ParamsSchema = z.object({
   // Search modifiers for ripgrep mode
   regex: z.boolean().default(true),            // Use regex pattern matching (default: enabled)
   caseSensitive: z.boolean().default(true),    // Case sensitivity (default: case-sensitive)
+  tokenize: z.boolean().default(true),         // Tokenize multi-word queries for OR matching (default: enabled)
   // Fuzzy matching is implicit in hybrid mode (RRF fusion)
 });
 
@@ -96,6 +97,87 @@ function buildExcludeArgs(): string[] {
   return args;
 }
 
+/**
+ * Tokenize query for multi-word OR matching
+ * Splits on whitespace and common delimiters, filters stop words and short tokens
+ * @param query - The search query
+ * @returns Array of tokens
+ */
+function tokenizeQuery(query: string): string[] {
+  // Stop words for filtering (common English + programming keywords)
+  const stopWords = new Set([
+    'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
+    'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
+    'should', 'may', 'might', 'must', 'can', 'to', 'of', 'in', 'for', 'on',
+    'with', 'at', 'by', 'from', 'as', 'into', 'through', 'and', 'but', 'if',
+    'or', 'not', 'this', 'that', 'these', 'those', 'it', 'its', 'how', 'what',
+    'where', 'when', 'why', 'which', 'who', 'whom',
+  ]);
+
+  // Split on whitespace and common delimiters, keep meaningful tokens
+  const tokens = query
+    .split(/[\s,;:]+/)
+    .map(token => token.trim())
+    .filter(token => {
+      // Keep tokens that are:
+      // - At least 2 characters long
+      // - Not a stop word (case-insensitive)
+      // - Or look like identifiers (contain underscore/camelCase)
+      if (token.length < 2) return false;
+      if (stopWords.has(token.toLowerCase()) && !token.includes('_') && !/[A-Z]/.test(token)) {
+        return false;
+      }
+      return true;
+    });
+
+  return tokens;
+}
+
+/**
+ * Score results based on token match count for ranking
+ * @param results - Search results
+ * @param tokens - Query tokens
+ * @returns Results with match scores
+ */
+function scoreByTokenMatch(results: ExactMatch[], tokens: string[]): ExactMatch[] {
+  if (tokens.length <= 1) return results;
+
+  // Create case-insensitive patterns for each token
+  const tokenPatterns = tokens.map(t => {
+    const escaped = t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+    return new RegExp(escaped, 'i');
+  });
+
+  return results.map(r => {
+    const content = r.content || '';
+    const file = r.file || '';
+    const searchText = `${file} ${content}`;
+
+    // Count how many tokens match
+    let matchCount = 0;
+    for (const pattern of tokenPatterns) {
+      if (pattern.test(searchText)) {
+        matchCount++;
+      }
+    }
+
+    // Calculate match ratio (0 to 1)
+    const matchRatio = matchCount / tokens.length;
+
+    return {
+      ...r,
+      matchScore: matchRatio,
+      matchCount,
+    };
+  }).sort((a, b) => {
+    // Sort by match ratio (descending), then by line number
+    if (b.matchScore !== a.matchScore) {
+      return b.matchScore - a.matchScore;
+    }
+    return (a.line || 0) - (b.line || 0);
+  });
+}
+
 interface Classification {
   mode: string;
   confidence: number;
@@ -107,6 +189,8 @@ interface ExactMatch {
   line: number;
   column: number;
   content: string;
+  matchScore?: number;  // Token match ratio (0-1) for multi-word queries
+  matchCount?: number;  // Number of tokens matched
 }
 
 interface RelationshipInfo {
@@ -162,6 +246,9 @@ interface SearchMetadata {
   index_status?: 'indexed' | 'not_indexed' | 'partial';
   fallback_history?: string[];
   suggested_weights?: Record<string, number>;
+  // Tokenization metadata (ripgrep mode)
+  tokens?: string[];   // Query tokens used for multi-word search
+  tokenized?: boolean; // Whether tokenization was applied
   // Pagination metadata
   pagination?: PaginationInfo;
   // Init action specific
@@ -373,8 +460,9 @@ function checkToolAvailability(toolName: string): boolean {
 
 /**
  * Build ripgrep command arguments
+ * Supports tokenized multi-word queries with OR matching
  * @param params - Search parameters
- * @returns Command and arguments
+ * @returns Command, arguments, and tokens used
  */
 function buildRipgrepCommand(params: {
   query: string;
@@ -384,8 +472,9 @@ function buildRipgrepCommand(params: {
   includeHidden: boolean;
   regex?: boolean;
   caseSensitive?: boolean;
-}): { command: string; args: string[] } {
-  const { query, paths = ['.'], contextLines = 0, maxResults = 10, includeHidden = false, regex = false, caseSensitive = true } = params;
+  tokenize?: boolean;
+}): { command: string; args: string[]; tokens: string[] } {
+  const { query, paths = ['.'], contextLines = 0, maxResults = 10, includeHidden = false, regex = false, caseSensitive = true, tokenize = true } = params;
 
   const args = [
     '-n',
@@ -415,16 +504,33 @@ function buildRipgrepCommand(params: {
     args.push('--hidden');
   }
 
-  // Regex mode (-e) vs fixed string mode (-F)
-  if (regex) {
-    args.push('-e', query);
+  // Tokenize query for multi-word OR matching
+  const tokens = tokenize ? tokenizeQuery(query) : [query];
+
+  if (tokens.length > 1) {
+    // Multi-token: use multiple -e patterns (OR matching)
+    // Each token is escaped for regex safety unless regex mode is enabled
+    for (const token of tokens) {
+      if (regex) {
+        args.push('-e', token);
+      } else {
+        // Escape regex special chars for literal matching
+        const escaped = token.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+        args.push('-e', escaped);
+      }
+    }
   } else {
-    args.push('-F', query);
+    // Single token or no tokenization: use original behavior
+    if (regex) {
+      args.push('-e', query);
+    } else {
+      args.push('-F', query);
+    }
   }
 
   args.push(...paths);
 
-  return { command: 'rg', args };
+  return { command: 'rg', args, tokens };
 }
 
 /**
@@ -578,9 +684,10 @@ async function executeAutoMode(params: Params): Promise<SearchResult> {
 /**
  * Mode: ripgrep - Fast literal string matching using ripgrep
  * No index required, fallback to CodexLens if ripgrep unavailable
+ * Supports tokenized multi-word queries with OR matching and result ranking
  */
 async function executeRipgrepMode(params: Params): Promise<SearchResult> {
-  const { query, paths = [], contextLines = 0, maxResults = 10, includeHidden = false, path = '.', regex = true, caseSensitive = true } = params;
+  const { query, paths = [], contextLines = 0, maxResults = 10, includeHidden = false, path = '.', regex = true, caseSensitive = true, tokenize = true } = params;
 
   if (!query) {
     return {
@@ -648,7 +755,7 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
   }
 
   // Use ripgrep
-  const { command, args } = buildRipgrepCommand({
+  const { command, args, tokens } = buildRipgrepCommand({
     query,
     paths: paths.length > 0 ? paths : [path],
     contextLines,
@@ -656,6 +763,7 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
     includeHidden,
     regex,
     caseSensitive,
+    tokenize,
   });
 
   return new Promise((resolve) => {
@@ -704,15 +812,21 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
       // If we have results despite the error, return them as partial success
       const isWindowsDeviceError = stderr.includes('os error 1') || stderr.includes('函数不正确');
 
-      if (code === 0 || code === 1 || (isWindowsDeviceError && results.length > 0)) {
+      // Apply token-based scoring and sorting for multi-word queries
+      // Results matching more tokens are ranked higher (exact matches first)
+      const scoredResults = tokens.length > 1 ? scoreByTokenMatch(results, tokens) : results;
+
+      if (code === 0 || code === 1 || (isWindowsDeviceError && scoredResults.length > 0)) {
         resolve({
           success: true,
-          results,
+          results: scoredResults,
           metadata: {
             mode: 'ripgrep',
             backend: 'ripgrep',
-            count: results.length,
+            count: scoredResults.length,
             query,
+            tokens: tokens.length > 1 ? tokens : undefined,  // Include tokens in metadata for debugging
+            tokenized: tokens.length > 1,
             ...(isWindowsDeviceError && { warning: 'Some Windows device files were skipped' }),
           },
         });
@@ -1310,12 +1424,17 @@ export const schema: ToolSchema = {
   smart_search(query="auth", limit=10, offset=0)    # first page
   smart_search(query="auth", limit=10, offset=10)   # second page
 
+**Multi-Word Search (ripgrep mode with tokenization):**
+  smart_search(query="CCW_PROJECT_ROOT CCW_ALLOWED_DIRS", mode="ripgrep")  # tokenized OR matching
+  smart_search(query="auth login user", mode="ripgrep")   # matches any token, ranks by match count
+  smart_search(query="exact phrase", mode="ripgrep", tokenize=false)  # disable tokenization
+
 **Regex Search (ripgrep mode):**
   smart_search(query="class.*Builder")              # auto-detects regex pattern
   smart_search(query="def.*\\(.*\\):")              # find function definitions
   smart_search(query="import.*from", caseSensitive=false)  # case-insensitive
 
-**Modes:** auto (intelligent routing), hybrid (semantic+fuzzy), exact (FTS), ripgrep (fast), priority (fallback chain)`,
+**Modes:** auto (intelligent routing), hybrid (semantic+fuzzy), exact (FTS), ripgrep (fast with tokenization), priority (fallback chain)`,
   inputSchema: {
     type: 'object',
     properties: {
@@ -1402,6 +1521,11 @@ export const schema: ToolSchema = {
         description: 'Case-sensitive search (default: true). Set to false for case-insensitive matching.',
         default: true,
       },
+      tokenize: {
+        type: 'boolean',
+        description: 'Tokenize multi-word queries for OR matching (ripgrep mode). Default: true. Results are ranked by token match count (exact matches first).',
+        default: true,
+      },
     },
     required: [],
   },