mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-05 01:50:27 +08:00
feat: 增加搜索功能的代码过滤选项,支持排除特定文件扩展名和仅返回代码文件
This commit is contained in:
@@ -1,22 +1,13 @@
|
||||
---
|
||||
name: code-reviewer
|
||||
description: Comprehensive code review skill for identifying security vulnerabilities and best practices violations. Triggers on "code review", "review code", "security audit", "代码审查".
|
||||
allowed-tools: Read, Glob, Grep, mcp__ace-tool__search_context, mcp__ccw-tools__smart_search
|
||||
---
|
||||
|
||||
# Code Reviewer
|
||||
|
||||
Comprehensive code review skill for identifying security vulnerabilities and best practices violations.
|
||||
|
||||
## Metadata
|
||||
|
||||
```yaml
|
||||
name: code-reviewer
|
||||
description: 帮助审查代码的安全漏洞和最佳实践
|
||||
version: 1.0.0
|
||||
execution_mode: sequential
|
||||
allowed-tools:
|
||||
- Read
|
||||
- Glob
|
||||
- Grep
|
||||
- mcp__ace-tool__search_context
|
||||
- mcp__ccw-tools__smart_search
|
||||
```
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
```
|
||||
|
||||
@@ -2,9 +2,20 @@
|
||||
|
||||
用于生成新 Skill 入口文件的模板。
|
||||
|
||||
## 模板结构
|
||||
## ⚠️ 重要:YAML Front Matter 规范
|
||||
|
||||
> **CRITICAL**: SKILL.md 文件必须以 YAML front matter 开头,即以 `---` 作为文件第一行。
|
||||
>
|
||||
> **禁止**使用以下格式:
|
||||
> - `# Title` 然后 `## Metadata` + yaml 代码块 ❌
|
||||
> - 任何在 `---` 之前的内容 ❌
|
||||
>
|
||||
> **正确格式**:文件第一行必须是 `---`
|
||||
|
||||
## 可直接应用的模板
|
||||
|
||||
以下是完整的 SKILL.md 模板。生成时**直接复制应用**,将 `{{变量}}` 替换为实际值:
|
||||
|
||||
```markdown
|
||||
---
|
||||
name: {{skill_name}}
|
||||
description: {{description}}. Triggers on {{triggers}}.
|
||||
@@ -33,9 +44,9 @@ allowed-tools: {{allowed_tools}}
|
||||
|
||||
\`\`\`javascript
|
||||
const timestamp = new Date().toISOString().slice(0,19).replace(/[-:T]/g, '');
|
||||
const workDir = `{{output_location}}`;
|
||||
const workDir = \`{{output_location}}\`;
|
||||
|
||||
Bash(`mkdir -p "${workDir}"`);
|
||||
Bash(\`mkdir -p "\${workDir}"\`);
|
||||
{{additional_dirs}}
|
||||
\`\`\`
|
||||
|
||||
@@ -48,7 +59,8 @@ Bash(`mkdir -p "${workDir}"`);
|
||||
## Reference Documents
|
||||
|
||||
{{reference_table}}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 变量说明
|
||||
|
||||
|
||||
@@ -1218,7 +1218,7 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
|
||||
* Requires index
|
||||
*/
|
||||
async function executeCodexLensExactMode(params: Params): Promise<SearchResult> {
|
||||
const { query, path = '.', maxResults = 5, extraFilesCount = 10, maxContentLength = 200, enrich = false } = params;
|
||||
const { query, path = '.', maxResults = 5, extraFilesCount = 10, maxContentLength = 200, enrich = false, excludeExtensions, codeOnly = false, offset = 0 } = params;
|
||||
|
||||
if (!query) {
|
||||
return {
|
||||
@@ -1241,10 +1241,18 @@ async function executeCodexLensExactMode(params: Params): Promise<SearchResult>
|
||||
|
||||
// Request more results to support split (full content + extra files)
|
||||
const totalToFetch = maxResults + extraFilesCount;
|
||||
const args = ['search', query, '--limit', totalToFetch.toString(), '--method', 'fts', '--json'];
|
||||
const args = ['search', query, '--limit', totalToFetch.toString(), '--offset', offset.toString(), '--method', 'fts', '--json'];
|
||||
if (enrich) {
|
||||
args.push('--enrich');
|
||||
}
|
||||
// Add code_only filter if requested
|
||||
if (codeOnly) {
|
||||
args.push('--code-only');
|
||||
}
|
||||
// Add exclude_extensions filter if provided
|
||||
if (excludeExtensions && excludeExtensions.length > 0) {
|
||||
args.push('--exclude-extensions', excludeExtensions.join(','));
|
||||
}
|
||||
const result = await executeCodexLens(args, { cwd: path });
|
||||
|
||||
if (!result.success) {
|
||||
@@ -1278,10 +1286,18 @@ async function executeCodexLensExactMode(params: Params): Promise<SearchResult>
|
||||
|
||||
// Fallback to fuzzy mode if exact returns no results
|
||||
if (allResults.length === 0) {
|
||||
const fuzzyArgs = ['search', query, '--limit', totalToFetch.toString(), '--method', 'fts', '--use-fuzzy', '--json'];
|
||||
const fuzzyArgs = ['search', query, '--limit', totalToFetch.toString(), '--offset', offset.toString(), '--method', 'fts', '--use-fuzzy', '--json'];
|
||||
if (enrich) {
|
||||
fuzzyArgs.push('--enrich');
|
||||
}
|
||||
// Add code_only filter if requested
|
||||
if (codeOnly) {
|
||||
fuzzyArgs.push('--code-only');
|
||||
}
|
||||
// Add exclude_extensions filter if provided
|
||||
if (excludeExtensions && excludeExtensions.length > 0) {
|
||||
fuzzyArgs.push('--exclude-extensions', excludeExtensions.join(','));
|
||||
}
|
||||
const fuzzyResult = await executeCodexLens(fuzzyArgs, { cwd: path });
|
||||
|
||||
if (fuzzyResult.success) {
|
||||
@@ -1343,7 +1359,7 @@ async function executeCodexLensExactMode(params: Params): Promise<SearchResult>
|
||||
*/
|
||||
async function executeHybridMode(params: Params): Promise<SearchResult> {
|
||||
const timer = createTimer();
|
||||
const { query, path = '.', maxResults = 5, extraFilesCount = 10, maxContentLength = 200, enrich = false, excludeExtensions, codeOnly = false } = params;
|
||||
const { query, path = '.', maxResults = 5, extraFilesCount = 10, maxContentLength = 200, enrich = false, excludeExtensions, codeOnly = false, offset = 0 } = params;
|
||||
|
||||
if (!query) {
|
||||
return {
|
||||
@@ -1368,10 +1384,18 @@ async function executeHybridMode(params: Params): Promise<SearchResult> {
|
||||
|
||||
// Request more results to support split (full content + extra files)
|
||||
const totalToFetch = maxResults + extraFilesCount;
|
||||
const args = ['search', query, '--limit', totalToFetch.toString(), '--method', 'dense_rerank', '--json'];
|
||||
const args = ['search', query, '--limit', totalToFetch.toString(), '--offset', offset.toString(), '--method', 'dense_rerank', '--json'];
|
||||
if (enrich) {
|
||||
args.push('--enrich');
|
||||
}
|
||||
// Add code_only filter if requested
|
||||
if (codeOnly) {
|
||||
args.push('--code-only');
|
||||
}
|
||||
// Add exclude_extensions filter if provided
|
||||
if (excludeExtensions && excludeExtensions.length > 0) {
|
||||
args.push('--exclude-extensions', excludeExtensions.join(','));
|
||||
}
|
||||
const result = await executeCodexLens(args, { cwd: path });
|
||||
timer.mark('codexlens_search');
|
||||
|
||||
|
||||
@@ -435,6 +435,8 @@ def search(
|
||||
files_only: bool = typer.Option(False, "--files-only", "-f", help="Return only file paths without content snippets."),
|
||||
method: str = typer.Option("dense_rerank", "--method", "-m", help="Search method: 'dense_rerank' (semantic, default), 'fts' (exact keyword)."),
|
||||
use_fuzzy: bool = typer.Option(False, "--use-fuzzy", help="Enable fuzzy matching in FTS method."),
|
||||
code_only: bool = typer.Option(False, "--code-only", help="Only return code files (excludes md, txt, json, yaml, xml, etc.)."),
|
||||
exclude_extensions: Optional[str] = typer.Option(None, "--exclude-extensions", help="Comma-separated list of file extensions to exclude (e.g., 'md,txt,json')."),
|
||||
# Hidden advanced options for backward compatibility
|
||||
weights: Optional[str] = typer.Option(
|
||||
None,
|
||||
@@ -654,10 +656,18 @@ def search(
|
||||
else:
|
||||
raise ValueError(f"Invalid method: {actual_method}")
|
||||
|
||||
# Parse exclude_extensions from comma-separated string
|
||||
exclude_exts_list = None
|
||||
if exclude_extensions:
|
||||
exclude_exts_list = [ext.strip() for ext in exclude_extensions.split(',') if ext.strip()]
|
||||
|
||||
options = SearchOptions(
|
||||
depth=depth,
|
||||
total_limit=limit,
|
||||
offset=offset,
|
||||
files_only=files_only,
|
||||
code_only=code_only,
|
||||
exclude_extensions=exclude_exts_list,
|
||||
hybrid_mode=hybrid_mode,
|
||||
enable_fuzzy=enable_fuzzy,
|
||||
enable_vector=enable_vector,
|
||||
|
||||
@@ -44,9 +44,12 @@ class SearchOptions:
|
||||
max_workers: Number of parallel worker threads
|
||||
limit_per_dir: Maximum results per directory
|
||||
total_limit: Total result limit across all directories
|
||||
offset: Pagination offset - skip first N results (default 0)
|
||||
include_symbols: Whether to include symbol search results
|
||||
files_only: Return only file paths without excerpts
|
||||
include_semantic: Whether to include semantic keyword search results
|
||||
code_only: Only return code files (excludes md, txt, json, yaml, xml, etc.)
|
||||
exclude_extensions: List of file extensions to exclude (e.g., ["md", "txt", "json"])
|
||||
hybrid_mode: Enable hybrid search with RRF fusion (default False)
|
||||
enable_fuzzy: Enable fuzzy FTS in hybrid mode (default True)
|
||||
enable_vector: Enable vector semantic search (default False)
|
||||
@@ -61,9 +64,12 @@ class SearchOptions:
|
||||
max_workers: int = 8
|
||||
limit_per_dir: int = 10
|
||||
total_limit: int = 100
|
||||
offset: int = 0
|
||||
include_symbols: bool = False
|
||||
files_only: bool = False
|
||||
include_semantic: bool = False
|
||||
code_only: bool = False
|
||||
exclude_extensions: Optional[List[str]] = None
|
||||
hybrid_mode: bool = False
|
||||
enable_fuzzy: bool = True
|
||||
enable_vector: bool = False
|
||||
@@ -234,8 +240,14 @@ class ChainSearchEngine:
|
||||
)
|
||||
stats.errors = search_stats.errors
|
||||
|
||||
# Step 3.5: Filter by extension if requested
|
||||
if options.code_only or options.exclude_extensions:
|
||||
results = self._filter_by_extension(
|
||||
results, options.code_only, options.exclude_extensions
|
||||
)
|
||||
|
||||
# Step 4: Merge and rank
|
||||
final_results = self._merge_and_rank(results, options.total_limit)
|
||||
final_results = self._merge_and_rank(results, options.total_limit, options.offset)
|
||||
|
||||
# Step 5: Optional grouping of similar results
|
||||
if options.group_results:
|
||||
@@ -2092,21 +2104,72 @@ class ChainSearchEngine:
|
||||
self.logger.debug(f"Search error in {index_path}: {exc}")
|
||||
return []
|
||||
|
||||
def _filter_by_extension(self, results: List[SearchResult],
|
||||
code_only: bool = False,
|
||||
exclude_extensions: Optional[List[str]] = None) -> List[SearchResult]:
|
||||
"""Filter search results by file extension.
|
||||
|
||||
Args:
|
||||
results: Search results to filter
|
||||
code_only: If True, exclude non-code files (md, txt, json, yaml, xml, etc.)
|
||||
exclude_extensions: List of extensions to exclude (e.g., ["md", "txt"])
|
||||
|
||||
Returns:
|
||||
Filtered results
|
||||
"""
|
||||
# Non-code file extensions (same as MCP tool smart-search.ts)
|
||||
NON_CODE_EXTENSIONS = {
|
||||
'md', 'txt', 'json', 'yaml', 'yml', 'xml', 'csv', 'log',
|
||||
'ini', 'cfg', 'conf', 'toml', 'env', 'properties',
|
||||
'html', 'htm', 'svg', 'png', 'jpg', 'jpeg', 'gif', 'ico', 'webp',
|
||||
'pdf', 'doc', 'docx', 'xls', 'xlsx', 'ppt', 'pptx',
|
||||
'lock', 'sum', 'mod',
|
||||
}
|
||||
|
||||
# Build exclusion set
|
||||
excluded_exts = set()
|
||||
if exclude_extensions:
|
||||
# Normalize extensions (remove leading dots, lowercase)
|
||||
excluded_exts = {ext.lower().lstrip('.') for ext in exclude_extensions}
|
||||
if code_only:
|
||||
excluded_exts.update(NON_CODE_EXTENSIONS)
|
||||
|
||||
if not excluded_exts:
|
||||
return results
|
||||
|
||||
# Filter results
|
||||
filtered = []
|
||||
for result in results:
|
||||
path_str = result.path
|
||||
if not path_str:
|
||||
continue
|
||||
|
||||
# Extract extension from path
|
||||
if '.' in path_str:
|
||||
ext = path_str.rsplit('.', 1)[-1].lower()
|
||||
if ext in excluded_exts:
|
||||
continue # Skip this result
|
||||
|
||||
filtered.append(result)
|
||||
|
||||
return filtered
|
||||
|
||||
def _merge_and_rank(self, results: List[SearchResult],
|
||||
limit: int) -> List[SearchResult]:
|
||||
limit: int, offset: int = 0) -> List[SearchResult]:
|
||||
"""Aggregate, deduplicate, and rank results.
|
||||
|
||||
Process:
|
||||
1. Deduplicate by path (keep highest score)
|
||||
2. Sort by score descending
|
||||
3. Limit to requested count
|
||||
3. Apply offset and limit for pagination
|
||||
|
||||
Args:
|
||||
results: Raw results from all indexes
|
||||
limit: Maximum results to return
|
||||
offset: Number of results to skip (pagination offset)
|
||||
|
||||
Returns:
|
||||
Deduplicated and ranked results
|
||||
Deduplicated and ranked results with pagination
|
||||
"""
|
||||
# Deduplicate by path, keeping best score
|
||||
path_to_result: Dict[str, SearchResult] = {}
|
||||
@@ -2119,8 +2182,8 @@ class ChainSearchEngine:
|
||||
unique_results = list(path_to_result.values())
|
||||
unique_results.sort(key=lambda r: r.score, reverse=True)
|
||||
|
||||
# Apply limit
|
||||
return unique_results[:limit]
|
||||
# Apply offset and limit for pagination
|
||||
return unique_results[offset:offset + limit]
|
||||
|
||||
def _search_symbols_parallel(self, index_paths: List[Path],
|
||||
name: str,
|
||||
|
||||
Reference in New Issue
Block a user