mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-14 02:42:04 +08:00
fix: 修复 SmartSearch 的 ripgrep limit 和 FTS 分词器问题
- Ripgrep 模式: 添加总结果数量限制,防止返回超过 2MB 数据 - --max-count 只限制每个文件的匹配数,现在在收集结果时应用 limit - 达到限制时在 metadata 中添加 warning 提示 - FTS 分词器: 将点号(.)添加到 tokenchars,修复 PortRole.FLOW 等带点号标识符的精确搜索 - 更新 dir_index.py 和 migration_004_dual_fts.py 中的 tokenize 配置 - 需要重建索引才能生效 - Exact 模式: 添加 fuzzy 回退,当精确搜索无结果时自动尝试模糊搜索 - 回退时在 metadata 中标注 fallback: 'fuzzy' 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -774,6 +774,7 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
|
|||||||
|
|
||||||
let stdout = '';
|
let stdout = '';
|
||||||
let stderr = '';
|
let stderr = '';
|
||||||
|
let resultLimitReached = false;
|
||||||
|
|
||||||
child.stdout.on('data', (data) => {
|
child.stdout.on('data', (data) => {
|
||||||
stdout += data.toString();
|
stdout += data.toString();
|
||||||
@@ -786,8 +787,16 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
|
|||||||
child.on('close', (code) => {
|
child.on('close', (code) => {
|
||||||
const results: ExactMatch[] = [];
|
const results: ExactMatch[] = [];
|
||||||
const lines = stdout.split('\n').filter((line) => line.trim());
|
const lines = stdout.split('\n').filter((line) => line.trim());
|
||||||
|
// Limit total results to prevent memory overflow (--max-count only limits per-file)
|
||||||
|
const effectiveLimit = maxResults > 0 ? maxResults : 500;
|
||||||
|
|
||||||
for (const line of lines) {
|
for (const line of lines) {
|
||||||
|
// Stop collecting if we've reached the limit
|
||||||
|
if (results.length >= effectiveLimit) {
|
||||||
|
resultLimitReached = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const item = JSON.parse(line);
|
const item = JSON.parse(line);
|
||||||
|
|
||||||
@@ -817,6 +826,15 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
|
|||||||
const scoredResults = tokens.length > 1 ? scoreByTokenMatch(results, tokens) : results;
|
const scoredResults = tokens.length > 1 ? scoreByTokenMatch(results, tokens) : results;
|
||||||
|
|
||||||
if (code === 0 || code === 1 || (isWindowsDeviceError && scoredResults.length > 0)) {
|
if (code === 0 || code === 1 || (isWindowsDeviceError && scoredResults.length > 0)) {
|
||||||
|
// Build warning message for various conditions
|
||||||
|
const warnings: string[] = [];
|
||||||
|
if (resultLimitReached) {
|
||||||
|
warnings.push(`Result limit reached (${effectiveLimit}). Use a more specific query or increase limit.`);
|
||||||
|
}
|
||||||
|
if (isWindowsDeviceError) {
|
||||||
|
warnings.push('Some Windows device files were skipped');
|
||||||
|
}
|
||||||
|
|
||||||
resolve({
|
resolve({
|
||||||
success: true,
|
success: true,
|
||||||
results: scoredResults,
|
results: scoredResults,
|
||||||
@@ -827,7 +845,7 @@ async function executeRipgrepMode(params: Params): Promise<SearchResult> {
|
|||||||
query,
|
query,
|
||||||
tokens: tokens.length > 1 ? tokens : undefined, // Include tokens in metadata for debugging
|
tokens: tokens.length > 1 ? tokens : undefined, // Include tokens in metadata for debugging
|
||||||
tokenized: tokens.length > 1,
|
tokenized: tokens.length > 1,
|
||||||
...(isWindowsDeviceError && { warning: 'Some Windows device files were skipped' }),
|
...(warnings.length > 0 && { warning: warnings.join('; ') }),
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
} else if (isWindowsDeviceError && results.length === 0) {
|
} else if (isWindowsDeviceError && results.length === 0) {
|
||||||
@@ -923,6 +941,46 @@ async function executeCodexLensExactMode(params: Params): Promise<SearchResult>
|
|||||||
// Keep empty results
|
// Keep empty results
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fallback to fuzzy mode if exact returns no results
|
||||||
|
if (results.length === 0) {
|
||||||
|
const fuzzyArgs = ['search', query, '--limit', maxResults.toString(), '--mode', 'fuzzy', '--json'];
|
||||||
|
if (enrich) {
|
||||||
|
fuzzyArgs.push('--enrich');
|
||||||
|
}
|
||||||
|
const fuzzyResult = await executeCodexLens(fuzzyArgs, { cwd: path });
|
||||||
|
|
||||||
|
if (fuzzyResult.success) {
|
||||||
|
try {
|
||||||
|
const parsed = JSON.parse(stripAnsi(fuzzyResult.output || '{}'));
|
||||||
|
const data = parsed.result?.results || parsed.results || parsed;
|
||||||
|
results = (Array.isArray(data) ? data : []).map((item: any) => ({
|
||||||
|
file: item.path || item.file,
|
||||||
|
score: item.score || 0,
|
||||||
|
content: item.excerpt || item.content || '',
|
||||||
|
symbol: item.symbol || null,
|
||||||
|
}));
|
||||||
|
} catch {
|
||||||
|
// Keep empty results
|
||||||
|
}
|
||||||
|
|
||||||
|
if (results.length > 0) {
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
results,
|
||||||
|
metadata: {
|
||||||
|
mode: 'exact',
|
||||||
|
backend: 'codexlens',
|
||||||
|
count: results.length,
|
||||||
|
query,
|
||||||
|
warning: indexStatus.warning,
|
||||||
|
note: 'No exact matches found, showing fuzzy results',
|
||||||
|
fallback: 'fuzzy',
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
success: true,
|
success: true,
|
||||||
results,
|
results,
|
||||||
|
|||||||
@@ -1651,16 +1651,17 @@ class DirIndexStore:
|
|||||||
from codexlens.storage.sqlite_utils import check_trigram_support
|
from codexlens.storage.sqlite_utils import check_trigram_support
|
||||||
|
|
||||||
has_trigram = check_trigram_support(conn)
|
has_trigram = check_trigram_support(conn)
|
||||||
fuzzy_tokenizer = "trigram" if has_trigram else "unicode61 tokenchars '_-'"
|
fuzzy_tokenizer = "trigram" if has_trigram else "unicode61 tokenchars '_-.'"
|
||||||
|
|
||||||
# Exact FTS table with unicode61 tokenizer
|
# Exact FTS table with unicode61 tokenizer
|
||||||
|
# Note: tokenchars includes '.' to properly tokenize qualified names like PortRole.FLOW
|
||||||
conn.execute(
|
conn.execute(
|
||||||
"""
|
"""
|
||||||
CREATE VIRTUAL TABLE IF NOT EXISTS files_fts_exact USING fts5(
|
CREATE VIRTUAL TABLE IF NOT EXISTS files_fts_exact USING fts5(
|
||||||
name, full_path UNINDEXED, content,
|
name, full_path UNINDEXED, content,
|
||||||
content='files',
|
content='files',
|
||||||
content_rowid='id',
|
content_rowid='id',
|
||||||
tokenize="unicode61 tokenchars '_-'"
|
tokenize="unicode61 tokenchars '_-.'"
|
||||||
)
|
)
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ def upgrade(db_conn: Connection):
|
|||||||
f"Trigram tokenizer not available (requires SQLite >= 3.34), "
|
f"Trigram tokenizer not available (requires SQLite >= 3.34), "
|
||||||
f"using extended unicode61 tokenizer for fuzzy matching"
|
f"using extended unicode61 tokenizer for fuzzy matching"
|
||||||
)
|
)
|
||||||
fuzzy_tokenizer = "unicode61 tokenchars '_-'"
|
fuzzy_tokenizer = "unicode61 tokenchars '_-.'"
|
||||||
|
|
||||||
# Start transaction
|
# Start transaction
|
||||||
cursor.execute("BEGIN TRANSACTION")
|
cursor.execute("BEGIN TRANSACTION")
|
||||||
@@ -122,7 +122,8 @@ def upgrade(db_conn: Connection):
|
|||||||
# Drop old FTS table
|
# Drop old FTS table
|
||||||
cursor.execute("DROP TABLE IF EXISTS files_fts")
|
cursor.execute("DROP TABLE IF EXISTS files_fts")
|
||||||
|
|
||||||
# Create exact FTS table (unicode61 with underscores/hyphens as token chars)
|
# Create exact FTS table (unicode61 with underscores/hyphens/dots as token chars)
|
||||||
|
# Note: tokenchars includes '.' to properly tokenize qualified names like PortRole.FLOW
|
||||||
log.info("Creating files_fts_exact table with unicode61 tokenizer...")
|
log.info("Creating files_fts_exact table with unicode61 tokenizer...")
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
"""
|
"""
|
||||||
@@ -130,7 +131,7 @@ def upgrade(db_conn: Connection):
|
|||||||
name, full_path UNINDEXED, content,
|
name, full_path UNINDEXED, content,
|
||||||
content='files',
|
content='files',
|
||||||
content_rowid='id',
|
content_rowid='id',
|
||||||
tokenize="unicode61 tokenchars '_-'"
|
tokenize="unicode61 tokenchars '_-.'"
|
||||||
)
|
)
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user