diff --git a/.claude/skills/code-reviewer/SKILL.md b/.claude/skills/code-reviewer/SKILL.md index 6051b741..891ff3b7 100644 --- a/.claude/skills/code-reviewer/SKILL.md +++ b/.claude/skills/code-reviewer/SKILL.md @@ -1,22 +1,13 @@ +--- +name: code-reviewer +description: Comprehensive code review skill for identifying security vulnerabilities and best practices violations. Triggers on "code review", "review code", "security audit", "代码审查". +allowed-tools: Read, Glob, Grep, mcp__ace-tool__search_context, mcp__ccw-tools__smart_search +--- + # Code Reviewer Comprehensive code review skill for identifying security vulnerabilities and best practices violations. -## Metadata - -```yaml -name: code-reviewer -description: 帮助审查代码的安全漏洞和最佳实践 -version: 1.0.0 -execution_mode: sequential -allowed-tools: - - Read - - Glob - - Grep - - mcp__ace-tool__search_context - - mcp__ccw-tools__smart_search -``` - ## Architecture Overview ``` diff --git a/.claude/skills/skill-generator/templates/skill-md.md b/.claude/skills/skill-generator/templates/skill-md.md index 63fe1e66..8be0c678 100644 --- a/.claude/skills/skill-generator/templates/skill-md.md +++ b/.claude/skills/skill-generator/templates/skill-md.md @@ -2,9 +2,20 @@ 用于生成新 Skill 入口文件的模板。 -## 模板结构 +## ⚠️ 重要:YAML Front Matter 规范 + +> **CRITICAL**: SKILL.md 文件必须以 YAML front matter 开头,即以 `---` 作为文件第一行。 +> +> **禁止**使用以下格式: +> - `# Title` 然后 `## Metadata` + yaml 代码块 ❌ +> - 任何在 `---` 之前的内容 ❌ +> +> **正确格式**:文件第一行必须是 `---` + +## 可直接应用的模板 + +以下是完整的 SKILL.md 模板。生成时**直接复制应用**,将 `{{变量}}` 替换为实际值: -```markdown --- name: {{skill_name}} description: {{description}}. Triggers on {{triggers}}. @@ -33,9 +44,9 @@ allowed-tools: {{allowed_tools}} \`\`\`javascript const timestamp = new Date().toISOString().slice(0,19).replace(/[-:T]/g, ''); -const workDir = `{{output_location}}`; +const workDir = \`{{output_location}}\`; -Bash(`mkdir -p "${workDir}"`); +Bash(\`mkdir -p "\${workDir}"\`); {{additional_dirs}} \`\`\` @@ -48,7 +59,8 @@ Bash(`mkdir -p "${workDir}"`); ## Reference Documents {{reference_table}} -``` + +--- ## 变量说明 diff --git a/ccw/src/tools/smart-search.ts b/ccw/src/tools/smart-search.ts index 53c908a0..3ca74128 100644 --- a/ccw/src/tools/smart-search.ts +++ b/ccw/src/tools/smart-search.ts @@ -1218,7 +1218,7 @@ async function executeRipgrepMode(params: Params): Promise { * Requires index */ async function executeCodexLensExactMode(params: Params): Promise { - const { query, path = '.', maxResults = 5, extraFilesCount = 10, maxContentLength = 200, enrich = false } = params; + const { query, path = '.', maxResults = 5, extraFilesCount = 10, maxContentLength = 200, enrich = false, excludeExtensions, codeOnly = false, offset = 0 } = params; if (!query) { return { @@ -1241,10 +1241,18 @@ async function executeCodexLensExactMode(params: Params): Promise // Request more results to support split (full content + extra files) const totalToFetch = maxResults + extraFilesCount; - const args = ['search', query, '--limit', totalToFetch.toString(), '--method', 'fts', '--json']; + const args = ['search', query, '--limit', totalToFetch.toString(), '--offset', offset.toString(), '--method', 'fts', '--json']; if (enrich) { args.push('--enrich'); } + // Add code_only filter if requested + if (codeOnly) { + args.push('--code-only'); + } + // Add exclude_extensions filter if provided + if (excludeExtensions && excludeExtensions.length > 0) { + args.push('--exclude-extensions', excludeExtensions.join(',')); + } const result = await executeCodexLens(args, { cwd: path }); if (!result.success) { @@ -1278,10 +1286,18 @@ async function executeCodexLensExactMode(params: Params): Promise // Fallback to fuzzy mode if exact returns no results if (allResults.length === 0) { - const fuzzyArgs = ['search', query, '--limit', totalToFetch.toString(), '--method', 'fts', '--use-fuzzy', '--json']; + const fuzzyArgs = ['search', query, '--limit', totalToFetch.toString(), '--offset', offset.toString(), '--method', 'fts', '--use-fuzzy', '--json']; if (enrich) { fuzzyArgs.push('--enrich'); } + // Add code_only filter if requested + if (codeOnly) { + fuzzyArgs.push('--code-only'); + } + // Add exclude_extensions filter if provided + if (excludeExtensions && excludeExtensions.length > 0) { + fuzzyArgs.push('--exclude-extensions', excludeExtensions.join(',')); + } const fuzzyResult = await executeCodexLens(fuzzyArgs, { cwd: path }); if (fuzzyResult.success) { @@ -1343,7 +1359,7 @@ async function executeCodexLensExactMode(params: Params): Promise */ async function executeHybridMode(params: Params): Promise { const timer = createTimer(); - const { query, path = '.', maxResults = 5, extraFilesCount = 10, maxContentLength = 200, enrich = false, excludeExtensions, codeOnly = false } = params; + const { query, path = '.', maxResults = 5, extraFilesCount = 10, maxContentLength = 200, enrich = false, excludeExtensions, codeOnly = false, offset = 0 } = params; if (!query) { return { @@ -1368,10 +1384,18 @@ async function executeHybridMode(params: Params): Promise { // Request more results to support split (full content + extra files) const totalToFetch = maxResults + extraFilesCount; - const args = ['search', query, '--limit', totalToFetch.toString(), '--method', 'dense_rerank', '--json']; + const args = ['search', query, '--limit', totalToFetch.toString(), '--offset', offset.toString(), '--method', 'dense_rerank', '--json']; if (enrich) { args.push('--enrich'); } + // Add code_only filter if requested + if (codeOnly) { + args.push('--code-only'); + } + // Add exclude_extensions filter if provided + if (excludeExtensions && excludeExtensions.length > 0) { + args.push('--exclude-extensions', excludeExtensions.join(',')); + } const result = await executeCodexLens(args, { cwd: path }); timer.mark('codexlens_search'); diff --git a/codex-lens/src/codexlens/cli/commands.py b/codex-lens/src/codexlens/cli/commands.py index de03961b..8bfed9d5 100644 --- a/codex-lens/src/codexlens/cli/commands.py +++ b/codex-lens/src/codexlens/cli/commands.py @@ -435,6 +435,8 @@ def search( files_only: bool = typer.Option(False, "--files-only", "-f", help="Return only file paths without content snippets."), method: str = typer.Option("dense_rerank", "--method", "-m", help="Search method: 'dense_rerank' (semantic, default), 'fts' (exact keyword)."), use_fuzzy: bool = typer.Option(False, "--use-fuzzy", help="Enable fuzzy matching in FTS method."), + code_only: bool = typer.Option(False, "--code-only", help="Only return code files (excludes md, txt, json, yaml, xml, etc.)."), + exclude_extensions: Optional[str] = typer.Option(None, "--exclude-extensions", help="Comma-separated list of file extensions to exclude (e.g., 'md,txt,json')."), # Hidden advanced options for backward compatibility weights: Optional[str] = typer.Option( None, @@ -654,10 +656,18 @@ def search( else: raise ValueError(f"Invalid method: {actual_method}") + # Parse exclude_extensions from comma-separated string + exclude_exts_list = None + if exclude_extensions: + exclude_exts_list = [ext.strip() for ext in exclude_extensions.split(',') if ext.strip()] + options = SearchOptions( depth=depth, total_limit=limit, + offset=offset, files_only=files_only, + code_only=code_only, + exclude_extensions=exclude_exts_list, hybrid_mode=hybrid_mode, enable_fuzzy=enable_fuzzy, enable_vector=enable_vector, diff --git a/codex-lens/src/codexlens/search/chain_search.py b/codex-lens/src/codexlens/search/chain_search.py index fa8a19b4..a507458a 100644 --- a/codex-lens/src/codexlens/search/chain_search.py +++ b/codex-lens/src/codexlens/search/chain_search.py @@ -44,9 +44,12 @@ class SearchOptions: max_workers: Number of parallel worker threads limit_per_dir: Maximum results per directory total_limit: Total result limit across all directories + offset: Pagination offset - skip first N results (default 0) include_symbols: Whether to include symbol search results files_only: Return only file paths without excerpts include_semantic: Whether to include semantic keyword search results + code_only: Only return code files (excludes md, txt, json, yaml, xml, etc.) + exclude_extensions: List of file extensions to exclude (e.g., ["md", "txt", "json"]) hybrid_mode: Enable hybrid search with RRF fusion (default False) enable_fuzzy: Enable fuzzy FTS in hybrid mode (default True) enable_vector: Enable vector semantic search (default False) @@ -61,9 +64,12 @@ class SearchOptions: max_workers: int = 8 limit_per_dir: int = 10 total_limit: int = 100 + offset: int = 0 include_symbols: bool = False files_only: bool = False include_semantic: bool = False + code_only: bool = False + exclude_extensions: Optional[List[str]] = None hybrid_mode: bool = False enable_fuzzy: bool = True enable_vector: bool = False @@ -234,8 +240,14 @@ class ChainSearchEngine: ) stats.errors = search_stats.errors + # Step 3.5: Filter by extension if requested + if options.code_only or options.exclude_extensions: + results = self._filter_by_extension( + results, options.code_only, options.exclude_extensions + ) + # Step 4: Merge and rank - final_results = self._merge_and_rank(results, options.total_limit) + final_results = self._merge_and_rank(results, options.total_limit, options.offset) # Step 5: Optional grouping of similar results if options.group_results: @@ -2092,21 +2104,72 @@ class ChainSearchEngine: self.logger.debug(f"Search error in {index_path}: {exc}") return [] + def _filter_by_extension(self, results: List[SearchResult], + code_only: bool = False, + exclude_extensions: Optional[List[str]] = None) -> List[SearchResult]: + """Filter search results by file extension. + + Args: + results: Search results to filter + code_only: If True, exclude non-code files (md, txt, json, yaml, xml, etc.) + exclude_extensions: List of extensions to exclude (e.g., ["md", "txt"]) + + Returns: + Filtered results + """ + # Non-code file extensions (same as MCP tool smart-search.ts) + NON_CODE_EXTENSIONS = { + 'md', 'txt', 'json', 'yaml', 'yml', 'xml', 'csv', 'log', + 'ini', 'cfg', 'conf', 'toml', 'env', 'properties', + 'html', 'htm', 'svg', 'png', 'jpg', 'jpeg', 'gif', 'ico', 'webp', + 'pdf', 'doc', 'docx', 'xls', 'xlsx', 'ppt', 'pptx', + 'lock', 'sum', 'mod', + } + + # Build exclusion set + excluded_exts = set() + if exclude_extensions: + # Normalize extensions (remove leading dots, lowercase) + excluded_exts = {ext.lower().lstrip('.') for ext in exclude_extensions} + if code_only: + excluded_exts.update(NON_CODE_EXTENSIONS) + + if not excluded_exts: + return results + + # Filter results + filtered = [] + for result in results: + path_str = result.path + if not path_str: + continue + + # Extract extension from path + if '.' in path_str: + ext = path_str.rsplit('.', 1)[-1].lower() + if ext in excluded_exts: + continue # Skip this result + + filtered.append(result) + + return filtered + def _merge_and_rank(self, results: List[SearchResult], - limit: int) -> List[SearchResult]: + limit: int, offset: int = 0) -> List[SearchResult]: """Aggregate, deduplicate, and rank results. Process: 1. Deduplicate by path (keep highest score) 2. Sort by score descending - 3. Limit to requested count + 3. Apply offset and limit for pagination Args: results: Raw results from all indexes limit: Maximum results to return + offset: Number of results to skip (pagination offset) Returns: - Deduplicated and ranked results + Deduplicated and ranked results with pagination """ # Deduplicate by path, keeping best score path_to_result: Dict[str, SearchResult] = {} @@ -2119,8 +2182,8 @@ class ChainSearchEngine: unique_results = list(path_to_result.values()) unique_results.sort(key=lambda r: r.score, reverse=True) - # Apply limit - return unique_results[:limit] + # Apply offset and limit for pagination + return unique_results[offset:offset + limit] def _search_symbols_parallel(self, index_paths: List[Path], name: str,