fix: 修复 SmartSearch 的 ripgrep limit 和 FTS 分词器问题

- Ripgrep 模式: 添加总结果数量限制,防止返回超过 2MB 数据
  - --max-count 只限制每个文件的匹配数,现在在收集结果时应用 limit
  - 达到限制时在 metadata 中添加 warning 提示

- FTS 分词器: 将点号(.)添加到 tokenchars,修复 PortRole.FLOW 等带点号标识符的精确搜索
  - 更新 dir_index.py 和 migration_004_dual_fts.py 中的 tokenize 配置
  - 需要重建索引才能生效

- Exact 模式: 添加 fuzzy 回退,当精确搜索无结果时自动尝试模糊搜索
  - 回退时在 metadata 中标注 fallback: 'fuzzy'

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
catlog22
2025-12-22 09:50:29 +08:00
parent 7ecc64614a
commit e60d793c8c
3 changed files with 66 additions and 6 deletions

View File

@@ -774,6 +774,7 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
let stdout = '';
let stderr = '';
let resultLimitReached = false;
child.stdout.on('data', (data) => {
stdout += data.toString();
@@ -786,8 +787,16 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
child.on('close', (code) => {
const results: ExactMatch[] = [];
const lines = stdout.split('\n').filter((line) => line.trim());
// Limit total results to prevent memory overflow (--max-count only limits per-file)
const effectiveLimit = maxResults > 0 ? maxResults : 500;
for (const line of lines) {
// Stop collecting if we've reached the limit
if (results.length >= effectiveLimit) {
resultLimitReached = true;
break;
}
try {
const item = JSON.parse(line);
@@ -817,6 +826,15 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
const scoredResults = tokens.length > 1 ? scoreByTokenMatch(results, tokens) : results;
if (code === 0 || code === 1 || (isWindowsDeviceError && scoredResults.length > 0)) {
// Build warning message for various conditions
const warnings: string[] = [];
if (resultLimitReached) {
warnings.push(`Result limit reached (${effectiveLimit}). Use a more specific query or increase limit.`);
}
if (isWindowsDeviceError) {
warnings.push('Some Windows device files were skipped');
}
resolve({
success: true,
results: scoredResults,
@@ -827,7 +845,7 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
query,
tokens: tokens.length > 1 ? tokens : undefined, // Include tokens in metadata for debugging
tokenized: tokens.length > 1,
...(isWindowsDeviceError && { warning: 'Some Windows device files were skipped' }),
...(warnings.length > 0 && { warning: warnings.join('; ') }),
},
});
} else if (isWindowsDeviceError && results.length === 0) {
@@ -923,6 +941,46 @@ async function executeCodexLensExactMode(params: Params): Promise<SearchResult>
// Keep empty results
}
// Fallback to fuzzy mode if exact returns no results
if (results.length === 0) {
const fuzzyArgs = ['search', query, '--limit', maxResults.toString(), '--mode', 'fuzzy', '--json'];
if (enrich) {
fuzzyArgs.push('--enrich');
}
const fuzzyResult = await executeCodexLens(fuzzyArgs, { cwd: path });
if (fuzzyResult.success) {
try {
const parsed = JSON.parse(stripAnsi(fuzzyResult.output || '{}'));
const data = parsed.result?.results || parsed.results || parsed;
results = (Array.isArray(data) ? data : []).map((item: any) => ({
file: item.path || item.file,
score: item.score || 0,
content: item.excerpt || item.content || '',
symbol: item.symbol || null,
}));
} catch {
// Keep empty results
}
if (results.length > 0) {
return {
success: true,
results,
metadata: {
mode: 'exact',
backend: 'codexlens',
count: results.length,
query,
warning: indexStatus.warning,
note: 'No exact matches found, showing fuzzy results',
fallback: 'fuzzy',
},
};
}
}
}
return {
success: true,
results,