mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-09 02:24:11 +08:00
fix: 修复 SmartSearch 的 ripgrep limit 和 FTS 分词器问题
- Ripgrep 模式: 添加总结果数量限制,防止返回超过 2MB 数据 - --max-count 只限制每个文件的匹配数,现在在收集结果时应用 limit - 达到限制时在 metadata 中添加 warning 提示 - FTS 分词器: 将点号(.)添加到 tokenchars,修复 PortRole.FLOW 等带点号标识符的精确搜索 - 更新 dir_index.py 和 migration_004_dual_fts.py 中的 tokenize 配置 - 需要重建索引才能生效 - Exact 模式: 添加 fuzzy 回退,当精确搜索无结果时自动尝试模糊搜索 - 回退时在 metadata 中标注 fallback: 'fuzzy' 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -1651,16 +1651,17 @@ class DirIndexStore:
|
||||
from codexlens.storage.sqlite_utils import check_trigram_support
|
||||
|
||||
has_trigram = check_trigram_support(conn)
|
||||
fuzzy_tokenizer = "trigram" if has_trigram else "unicode61 tokenchars '_-'"
|
||||
fuzzy_tokenizer = "trigram" if has_trigram else "unicode61 tokenchars '_-.'"
|
||||
|
||||
# Exact FTS table with unicode61 tokenizer
|
||||
# Note: tokenchars includes '.' to properly tokenize qualified names like PortRole.FLOW
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS files_fts_exact USING fts5(
|
||||
name, full_path UNINDEXED, content,
|
||||
content='files',
|
||||
content_rowid='id',
|
||||
tokenize="unicode61 tokenchars '_-'"
|
||||
tokenize="unicode61 tokenchars '_-.'"
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
@@ -45,7 +45,7 @@ def upgrade(db_conn: Connection):
|
||||
f"Trigram tokenizer not available (requires SQLite >= 3.34), "
|
||||
f"using extended unicode61 tokenizer for fuzzy matching"
|
||||
)
|
||||
fuzzy_tokenizer = "unicode61 tokenchars '_-'"
|
||||
fuzzy_tokenizer = "unicode61 tokenchars '_-.'"
|
||||
|
||||
# Start transaction
|
||||
cursor.execute("BEGIN TRANSACTION")
|
||||
@@ -122,7 +122,8 @@ def upgrade(db_conn: Connection):
|
||||
# Drop old FTS table
|
||||
cursor.execute("DROP TABLE IF EXISTS files_fts")
|
||||
|
||||
# Create exact FTS table (unicode61 with underscores/hyphens as token chars)
|
||||
# Create exact FTS table (unicode61 with underscores/hyphens/dots as token chars)
|
||||
# Note: tokenchars includes '.' to properly tokenize qualified names like PortRole.FLOW
|
||||
log.info("Creating files_fts_exact table with unicode61 tokenizer...")
|
||||
cursor.execute(
|
||||
"""
|
||||
@@ -130,7 +131,7 @@ def upgrade(db_conn: Connection):
|
||||
name, full_path UNINDEXED, content,
|
||||
content='files',
|
||||
content_rowid='id',
|
||||
tokenize="unicode61 tokenchars '_-'"
|
||||
tokenize="unicode61 tokenchars '_-.'"
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user