feat: 增加搜索功能的代码过滤选项,支持排除特定文件扩展名和仅返回代码文件

This commit is contained in:
catlog22
2026-01-06 23:19:47 +08:00
parent ef770ff29b
commit 1298fdd20f
5 changed files with 131 additions and 31 deletions

View File

@@ -1,22 +1,13 @@
---
name: code-reviewer
description: Comprehensive code review skill for identifying security vulnerabilities and best practices violations. Triggers on "code review", "review code", "security audit", "代码审查".
allowed-tools: Read, Glob, Grep, mcp__ace-tool__search_context, mcp__ccw-tools__smart_search
---
# Code Reviewer # Code Reviewer
Comprehensive code review skill for identifying security vulnerabilities and best practices violations. Comprehensive code review skill for identifying security vulnerabilities and best practices violations.
## Metadata
```yaml
name: code-reviewer
description: 帮助审查代码的安全漏洞和最佳实践
version: 1.0.0
execution_mode: sequential
allowed-tools:
- Read
- Glob
- Grep
- mcp__ace-tool__search_context
- mcp__ccw-tools__smart_search
```
## Architecture Overview ## Architecture Overview
``` ```

View File

@@ -2,9 +2,20 @@
用于生成新 Skill 入口文件的模板。 用于生成新 Skill 入口文件的模板。
## 模板结构 ## ⚠️ 重要YAML Front Matter 规范
> **CRITICAL**: SKILL.md 文件必须以 YAML front matter 开头,即以 `---` 作为文件第一行。
>
> **禁止**使用以下格式:
> - `# Title` 然后 `## Metadata` + yaml 代码块 ❌
> - 任何在 `---` 之前的内容 ❌
>
> **正确格式**:文件第一行必须是 `---`
## 可直接应用的模板
以下是完整的 SKILL.md 模板。生成时**直接复制应用**,将 `{{变量}}` 替换为实际值:
```markdown
--- ---
name: {{skill_name}} name: {{skill_name}}
description: {{description}}. Triggers on {{triggers}}. description: {{description}}. Triggers on {{triggers}}.
@@ -33,9 +44,9 @@ allowed-tools: {{allowed_tools}}
\`\`\`javascript \`\`\`javascript
const timestamp = new Date().toISOString().slice(0,19).replace(/[-:T]/g, ''); const timestamp = new Date().toISOString().slice(0,19).replace(/[-:T]/g, '');
const workDir = `{{output_location}}`; const workDir = \`{{output_location}}\`;
Bash(`mkdir -p "${workDir}"`); Bash(\`mkdir -p "\${workDir}"\`);
{{additional_dirs}} {{additional_dirs}}
\`\`\` \`\`\`
@@ -48,7 +59,8 @@ Bash(`mkdir -p "${workDir}"`);
## Reference Documents ## Reference Documents
{{reference_table}} {{reference_table}}
```
---
## 变量说明 ## 变量说明

View File

@@ -1218,7 +1218,7 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
* Requires index * Requires index
*/ */
async function executeCodexLensExactMode(params: Params): Promise<SearchResult> { async function executeCodexLensExactMode(params: Params): Promise<SearchResult> {
const { query, path = '.', maxResults = 5, extraFilesCount = 10, maxContentLength = 200, enrich = false } = params; const { query, path = '.', maxResults = 5, extraFilesCount = 10, maxContentLength = 200, enrich = false, excludeExtensions, codeOnly = false, offset = 0 } = params;
if (!query) { if (!query) {
return { return {
@@ -1241,10 +1241,18 @@ async function executeCodexLensExactMode(params: Params): Promise<SearchResult>
// Request more results to support split (full content + extra files) // Request more results to support split (full content + extra files)
const totalToFetch = maxResults + extraFilesCount; const totalToFetch = maxResults + extraFilesCount;
const args = ['search', query, '--limit', totalToFetch.toString(), '--method', 'fts', '--json']; const args = ['search', query, '--limit', totalToFetch.toString(), '--offset', offset.toString(), '--method', 'fts', '--json'];
if (enrich) { if (enrich) {
args.push('--enrich'); args.push('--enrich');
} }
// Add code_only filter if requested
if (codeOnly) {
args.push('--code-only');
}
// Add exclude_extensions filter if provided
if (excludeExtensions && excludeExtensions.length > 0) {
args.push('--exclude-extensions', excludeExtensions.join(','));
}
const result = await executeCodexLens(args, { cwd: path }); const result = await executeCodexLens(args, { cwd: path });
if (!result.success) { if (!result.success) {
@@ -1278,10 +1286,18 @@ async function executeCodexLensExactMode(params: Params): Promise<SearchResult>
// Fallback to fuzzy mode if exact returns no results // Fallback to fuzzy mode if exact returns no results
if (allResults.length === 0) { if (allResults.length === 0) {
const fuzzyArgs = ['search', query, '--limit', totalToFetch.toString(), '--method', 'fts', '--use-fuzzy', '--json']; const fuzzyArgs = ['search', query, '--limit', totalToFetch.toString(), '--offset', offset.toString(), '--method', 'fts', '--use-fuzzy', '--json'];
if (enrich) { if (enrich) {
fuzzyArgs.push('--enrich'); fuzzyArgs.push('--enrich');
} }
// Add code_only filter if requested
if (codeOnly) {
fuzzyArgs.push('--code-only');
}
// Add exclude_extensions filter if provided
if (excludeExtensions && excludeExtensions.length > 0) {
fuzzyArgs.push('--exclude-extensions', excludeExtensions.join(','));
}
const fuzzyResult = await executeCodexLens(fuzzyArgs, { cwd: path }); const fuzzyResult = await executeCodexLens(fuzzyArgs, { cwd: path });
if (fuzzyResult.success) { if (fuzzyResult.success) {
@@ -1343,7 +1359,7 @@ async function executeCodexLensExactMode(params: Params): Promise<SearchResult>
*/ */
async function executeHybridMode(params: Params): Promise<SearchResult> { async function executeHybridMode(params: Params): Promise<SearchResult> {
const timer = createTimer(); const timer = createTimer();
const { query, path = '.', maxResults = 5, extraFilesCount = 10, maxContentLength = 200, enrich = false, excludeExtensions, codeOnly = false } = params; const { query, path = '.', maxResults = 5, extraFilesCount = 10, maxContentLength = 200, enrich = false, excludeExtensions, codeOnly = false, offset = 0 } = params;
if (!query) { if (!query) {
return { return {
@@ -1368,10 +1384,18 @@ async function executeHybridMode(params: Params): Promise<SearchResult> {
// Request more results to support split (full content + extra files) // Request more results to support split (full content + extra files)
const totalToFetch = maxResults + extraFilesCount; const totalToFetch = maxResults + extraFilesCount;
const args = ['search', query, '--limit', totalToFetch.toString(), '--method', 'dense_rerank', '--json']; const args = ['search', query, '--limit', totalToFetch.toString(), '--offset', offset.toString(), '--method', 'dense_rerank', '--json'];
if (enrich) { if (enrich) {
args.push('--enrich'); args.push('--enrich');
} }
// Add code_only filter if requested
if (codeOnly) {
args.push('--code-only');
}
// Add exclude_extensions filter if provided
if (excludeExtensions && excludeExtensions.length > 0) {
args.push('--exclude-extensions', excludeExtensions.join(','));
}
const result = await executeCodexLens(args, { cwd: path }); const result = await executeCodexLens(args, { cwd: path });
timer.mark('codexlens_search'); timer.mark('codexlens_search');

View File

@@ -435,6 +435,8 @@ def search(
files_only: bool = typer.Option(False, "--files-only", "-f", help="Return only file paths without content snippets."), files_only: bool = typer.Option(False, "--files-only", "-f", help="Return only file paths without content snippets."),
method: str = typer.Option("dense_rerank", "--method", "-m", help="Search method: 'dense_rerank' (semantic, default), 'fts' (exact keyword)."), method: str = typer.Option("dense_rerank", "--method", "-m", help="Search method: 'dense_rerank' (semantic, default), 'fts' (exact keyword)."),
use_fuzzy: bool = typer.Option(False, "--use-fuzzy", help="Enable fuzzy matching in FTS method."), use_fuzzy: bool = typer.Option(False, "--use-fuzzy", help="Enable fuzzy matching in FTS method."),
code_only: bool = typer.Option(False, "--code-only", help="Only return code files (excludes md, txt, json, yaml, xml, etc.)."),
exclude_extensions: Optional[str] = typer.Option(None, "--exclude-extensions", help="Comma-separated list of file extensions to exclude (e.g., 'md,txt,json')."),
# Hidden advanced options for backward compatibility # Hidden advanced options for backward compatibility
weights: Optional[str] = typer.Option( weights: Optional[str] = typer.Option(
None, None,
@@ -654,10 +656,18 @@ def search(
else: else:
raise ValueError(f"Invalid method: {actual_method}") raise ValueError(f"Invalid method: {actual_method}")
# Parse exclude_extensions from comma-separated string
exclude_exts_list = None
if exclude_extensions:
exclude_exts_list = [ext.strip() for ext in exclude_extensions.split(',') if ext.strip()]
options = SearchOptions( options = SearchOptions(
depth=depth, depth=depth,
total_limit=limit, total_limit=limit,
offset=offset,
files_only=files_only, files_only=files_only,
code_only=code_only,
exclude_extensions=exclude_exts_list,
hybrid_mode=hybrid_mode, hybrid_mode=hybrid_mode,
enable_fuzzy=enable_fuzzy, enable_fuzzy=enable_fuzzy,
enable_vector=enable_vector, enable_vector=enable_vector,

View File

@@ -44,9 +44,12 @@ class SearchOptions:
max_workers: Number of parallel worker threads max_workers: Number of parallel worker threads
limit_per_dir: Maximum results per directory limit_per_dir: Maximum results per directory
total_limit: Total result limit across all directories total_limit: Total result limit across all directories
offset: Pagination offset - skip first N results (default 0)
include_symbols: Whether to include symbol search results include_symbols: Whether to include symbol search results
files_only: Return only file paths without excerpts files_only: Return only file paths without excerpts
include_semantic: Whether to include semantic keyword search results include_semantic: Whether to include semantic keyword search results
code_only: Only return code files (excludes md, txt, json, yaml, xml, etc.)
exclude_extensions: List of file extensions to exclude (e.g., ["md", "txt", "json"])
hybrid_mode: Enable hybrid search with RRF fusion (default False) hybrid_mode: Enable hybrid search with RRF fusion (default False)
enable_fuzzy: Enable fuzzy FTS in hybrid mode (default True) enable_fuzzy: Enable fuzzy FTS in hybrid mode (default True)
enable_vector: Enable vector semantic search (default False) enable_vector: Enable vector semantic search (default False)
@@ -61,9 +64,12 @@ class SearchOptions:
max_workers: int = 8 max_workers: int = 8
limit_per_dir: int = 10 limit_per_dir: int = 10
total_limit: int = 100 total_limit: int = 100
offset: int = 0
include_symbols: bool = False include_symbols: bool = False
files_only: bool = False files_only: bool = False
include_semantic: bool = False include_semantic: bool = False
code_only: bool = False
exclude_extensions: Optional[List[str]] = None
hybrid_mode: bool = False hybrid_mode: bool = False
enable_fuzzy: bool = True enable_fuzzy: bool = True
enable_vector: bool = False enable_vector: bool = False
@@ -234,8 +240,14 @@ class ChainSearchEngine:
) )
stats.errors = search_stats.errors stats.errors = search_stats.errors
# Step 3.5: Filter by extension if requested
if options.code_only or options.exclude_extensions:
results = self._filter_by_extension(
results, options.code_only, options.exclude_extensions
)
# Step 4: Merge and rank # Step 4: Merge and rank
final_results = self._merge_and_rank(results, options.total_limit) final_results = self._merge_and_rank(results, options.total_limit, options.offset)
# Step 5: Optional grouping of similar results # Step 5: Optional grouping of similar results
if options.group_results: if options.group_results:
@@ -2092,21 +2104,72 @@ class ChainSearchEngine:
self.logger.debug(f"Search error in {index_path}: {exc}") self.logger.debug(f"Search error in {index_path}: {exc}")
return [] return []
def _filter_by_extension(self, results: List[SearchResult],
code_only: bool = False,
exclude_extensions: Optional[List[str]] = None) -> List[SearchResult]:
"""Filter search results by file extension.
Args:
results: Search results to filter
code_only: If True, exclude non-code files (md, txt, json, yaml, xml, etc.)
exclude_extensions: List of extensions to exclude (e.g., ["md", "txt"])
Returns:
Filtered results
"""
# Non-code file extensions (same as MCP tool smart-search.ts)
NON_CODE_EXTENSIONS = {
'md', 'txt', 'json', 'yaml', 'yml', 'xml', 'csv', 'log',
'ini', 'cfg', 'conf', 'toml', 'env', 'properties',
'html', 'htm', 'svg', 'png', 'jpg', 'jpeg', 'gif', 'ico', 'webp',
'pdf', 'doc', 'docx', 'xls', 'xlsx', 'ppt', 'pptx',
'lock', 'sum', 'mod',
}
# Build exclusion set
excluded_exts = set()
if exclude_extensions:
# Normalize extensions (remove leading dots, lowercase)
excluded_exts = {ext.lower().lstrip('.') for ext in exclude_extensions}
if code_only:
excluded_exts.update(NON_CODE_EXTENSIONS)
if not excluded_exts:
return results
# Filter results
filtered = []
for result in results:
path_str = result.path
if not path_str:
continue
# Extract extension from path
if '.' in path_str:
ext = path_str.rsplit('.', 1)[-1].lower()
if ext in excluded_exts:
continue # Skip this result
filtered.append(result)
return filtered
def _merge_and_rank(self, results: List[SearchResult], def _merge_and_rank(self, results: List[SearchResult],
limit: int) -> List[SearchResult]: limit: int, offset: int = 0) -> List[SearchResult]:
"""Aggregate, deduplicate, and rank results. """Aggregate, deduplicate, and rank results.
Process: Process:
1. Deduplicate by path (keep highest score) 1. Deduplicate by path (keep highest score)
2. Sort by score descending 2. Sort by score descending
3. Limit to requested count 3. Apply offset and limit for pagination
Args: Args:
results: Raw results from all indexes results: Raw results from all indexes
limit: Maximum results to return limit: Maximum results to return
offset: Number of results to skip (pagination offset)
Returns: Returns:
Deduplicated and ranked results Deduplicated and ranked results with pagination
""" """
# Deduplicate by path, keeping best score # Deduplicate by path, keeping best score
path_to_result: Dict[str, SearchResult] = {} path_to_result: Dict[str, SearchResult] = {}
@@ -2119,8 +2182,8 @@ class ChainSearchEngine:
unique_results = list(path_to_result.values()) unique_results = list(path_to_result.values())
unique_results.sort(key=lambda r: r.score, reverse=True) unique_results.sort(key=lambda r: r.score, reverse=True)
# Apply limit # Apply offset and limit for pagination
return unique_results[:limit] return unique_results[offset:offset + limit]
def _search_symbols_parallel(self, index_paths: List[Path], def _search_symbols_parallel(self, index_paths: List[Path],
name: str, name: str,