mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-05 01:50:27 +08:00
fix: 修复 SmartSearch 的 ripgrep limit 和 FTS 分词器问题
- Ripgrep 模式: 添加总结果数量限制,防止返回超过 2MB 数据 - --max-count 只限制每个文件的匹配数,现在在收集结果时应用 limit - 达到限制时在 metadata 中添加 warning 提示 - FTS 分词器: 将点号(.)添加到 tokenchars,修复 PortRole.FLOW 等带点号标识符的精确搜索 - 更新 dir_index.py 和 migration_004_dual_fts.py 中的 tokenize 配置 - 需要重建索引才能生效 - Exact 模式: 添加 fuzzy 回退,当精确搜索无结果时自动尝试模糊搜索 - 回退时在 metadata 中标注 fallback: 'fuzzy' 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -774,6 +774,7 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
|
||||
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
let resultLimitReached = false;
|
||||
|
||||
child.stdout.on('data', (data) => {
|
||||
stdout += data.toString();
|
||||
@@ -786,8 +787,16 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
|
||||
child.on('close', (code) => {
|
||||
const results: ExactMatch[] = [];
|
||||
const lines = stdout.split('\n').filter((line) => line.trim());
|
||||
// Limit total results to prevent memory overflow (--max-count only limits per-file)
|
||||
const effectiveLimit = maxResults > 0 ? maxResults : 500;
|
||||
|
||||
for (const line of lines) {
|
||||
// Stop collecting if we've reached the limit
|
||||
if (results.length >= effectiveLimit) {
|
||||
resultLimitReached = true;
|
||||
break;
|
||||
}
|
||||
|
||||
try {
|
||||
const item = JSON.parse(line);
|
||||
|
||||
@@ -817,6 +826,15 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
|
||||
const scoredResults = tokens.length > 1 ? scoreByTokenMatch(results, tokens) : results;
|
||||
|
||||
if (code === 0 || code === 1 || (isWindowsDeviceError && scoredResults.length > 0)) {
|
||||
// Build warning message for various conditions
|
||||
const warnings: string[] = [];
|
||||
if (resultLimitReached) {
|
||||
warnings.push(`Result limit reached (${effectiveLimit}). Use a more specific query or increase limit.`);
|
||||
}
|
||||
if (isWindowsDeviceError) {
|
||||
warnings.push('Some Windows device files were skipped');
|
||||
}
|
||||
|
||||
resolve({
|
||||
success: true,
|
||||
results: scoredResults,
|
||||
@@ -827,7 +845,7 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
|
||||
query,
|
||||
tokens: tokens.length > 1 ? tokens : undefined, // Include tokens in metadata for debugging
|
||||
tokenized: tokens.length > 1,
|
||||
...(isWindowsDeviceError && { warning: 'Some Windows device files were skipped' }),
|
||||
...(warnings.length > 0 && { warning: warnings.join('; ') }),
|
||||
},
|
||||
});
|
||||
} else if (isWindowsDeviceError && results.length === 0) {
|
||||
@@ -923,6 +941,46 @@ async function executeCodexLensExactMode(params: Params): Promise<SearchResult>
|
||||
// Keep empty results
|
||||
}
|
||||
|
||||
// Fallback to fuzzy mode if exact returns no results
|
||||
if (results.length === 0) {
|
||||
const fuzzyArgs = ['search', query, '--limit', maxResults.toString(), '--mode', 'fuzzy', '--json'];
|
||||
if (enrich) {
|
||||
fuzzyArgs.push('--enrich');
|
||||
}
|
||||
const fuzzyResult = await executeCodexLens(fuzzyArgs, { cwd: path });
|
||||
|
||||
if (fuzzyResult.success) {
|
||||
try {
|
||||
const parsed = JSON.parse(stripAnsi(fuzzyResult.output || '{}'));
|
||||
const data = parsed.result?.results || parsed.results || parsed;
|
||||
results = (Array.isArray(data) ? data : []).map((item: any) => ({
|
||||
file: item.path || item.file,
|
||||
score: item.score || 0,
|
||||
content: item.excerpt || item.content || '',
|
||||
symbol: item.symbol || null,
|
||||
}));
|
||||
} catch {
|
||||
// Keep empty results
|
||||
}
|
||||
|
||||
if (results.length > 0) {
|
||||
return {
|
||||
success: true,
|
||||
results,
|
||||
metadata: {
|
||||
mode: 'exact',
|
||||
backend: 'codexlens',
|
||||
count: results.length,
|
||||
query,
|
||||
warning: indexStatus.warning,
|
||||
note: 'No exact matches found, showing fuzzy results',
|
||||
fallback: 'fuzzy',
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
results,
|
||||
|
||||
@@ -1651,16 +1651,17 @@ class DirIndexStore:
|
||||
from codexlens.storage.sqlite_utils import check_trigram_support
|
||||
|
||||
has_trigram = check_trigram_support(conn)
|
||||
fuzzy_tokenizer = "trigram" if has_trigram else "unicode61 tokenchars '_-'"
|
||||
fuzzy_tokenizer = "trigram" if has_trigram else "unicode61 tokenchars '_-.'"
|
||||
|
||||
# Exact FTS table with unicode61 tokenizer
|
||||
# Note: tokenchars includes '.' to properly tokenize qualified names like PortRole.FLOW
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS files_fts_exact USING fts5(
|
||||
name, full_path UNINDEXED, content,
|
||||
content='files',
|
||||
content_rowid='id',
|
||||
tokenize="unicode61 tokenchars '_-'"
|
||||
tokenize="unicode61 tokenchars '_-.'"
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
@@ -45,7 +45,7 @@ def upgrade(db_conn: Connection):
|
||||
f"Trigram tokenizer not available (requires SQLite >= 3.34), "
|
||||
f"using extended unicode61 tokenizer for fuzzy matching"
|
||||
)
|
||||
fuzzy_tokenizer = "unicode61 tokenchars '_-'"
|
||||
fuzzy_tokenizer = "unicode61 tokenchars '_-.'"
|
||||
|
||||
# Start transaction
|
||||
cursor.execute("BEGIN TRANSACTION")
|
||||
@@ -122,7 +122,8 @@ def upgrade(db_conn: Connection):
|
||||
# Drop old FTS table
|
||||
cursor.execute("DROP TABLE IF EXISTS files_fts")
|
||||
|
||||
# Create exact FTS table (unicode61 with underscores/hyphens as token chars)
|
||||
# Create exact FTS table (unicode61 with underscores/hyphens/dots as token chars)
|
||||
# Note: tokenchars includes '.' to properly tokenize qualified names like PortRole.FLOW
|
||||
log.info("Creating files_fts_exact table with unicode61 tokenizer...")
|
||||
cursor.execute(
|
||||
"""
|
||||
@@ -130,7 +131,7 @@ def upgrade(db_conn: Connection):
|
||||
name, full_path UNINDEXED, content,
|
||||
content='files',
|
||||
content_rowid='id',
|
||||
tokenize="unicode61 tokenchars '_-'"
|
||||
tokenize="unicode61 tokenchars '_-.'"
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user