diff --git a/ccw/src/tools/smart-search.ts b/ccw/src/tools/smart-search.ts index f609de77..4841a9bb 100644 --- a/ccw/src/tools/smart-search.ts +++ b/ccw/src/tools/smart-search.ts @@ -774,6 +774,7 @@ async function executeRipgrepMode(params: Params): Promise { let stdout = ''; let stderr = ''; + let resultLimitReached = false; child.stdout.on('data', (data) => { stdout += data.toString(); @@ -786,8 +787,16 @@ async function executeRipgrepMode(params: Params): Promise { child.on('close', (code) => { const results: ExactMatch[] = []; const lines = stdout.split('\n').filter((line) => line.trim()); + // Limit total results to prevent memory overflow (--max-count only limits per-file) + const effectiveLimit = maxResults > 0 ? maxResults : 500; for (const line of lines) { + // Stop collecting if we've reached the limit + if (results.length >= effectiveLimit) { + resultLimitReached = true; + break; + } + try { const item = JSON.parse(line); @@ -817,6 +826,15 @@ async function executeRipgrepMode(params: Params): Promise { const scoredResults = tokens.length > 1 ? scoreByTokenMatch(results, tokens) : results; if (code === 0 || code === 1 || (isWindowsDeviceError && scoredResults.length > 0)) { + // Build warning message for various conditions + const warnings: string[] = []; + if (resultLimitReached) { + warnings.push(`Result limit reached (${effectiveLimit}). Use a more specific query or increase limit.`); + } + if (isWindowsDeviceError) { + warnings.push('Some Windows device files were skipped'); + } + resolve({ success: true, results: scoredResults, @@ -827,7 +845,7 @@ async function executeRipgrepMode(params: Params): Promise { query, tokens: tokens.length > 1 ? tokens : undefined, // Include tokens in metadata for debugging tokenized: tokens.length > 1, - ...(isWindowsDeviceError && { warning: 'Some Windows device files were skipped' }), + ...(warnings.length > 0 && { warning: warnings.join('; ') }), }, }); } else if (isWindowsDeviceError && results.length === 0) { @@ -923,6 +941,46 @@ async function executeCodexLensExactMode(params: Params): Promise // Keep empty results } + // Fallback to fuzzy mode if exact returns no results + if (results.length === 0) { + const fuzzyArgs = ['search', query, '--limit', maxResults.toString(), '--mode', 'fuzzy', '--json']; + if (enrich) { + fuzzyArgs.push('--enrich'); + } + const fuzzyResult = await executeCodexLens(fuzzyArgs, { cwd: path }); + + if (fuzzyResult.success) { + try { + const parsed = JSON.parse(stripAnsi(fuzzyResult.output || '{}')); + const data = parsed.result?.results || parsed.results || parsed; + results = (Array.isArray(data) ? data : []).map((item: any) => ({ + file: item.path || item.file, + score: item.score || 0, + content: item.excerpt || item.content || '', + symbol: item.symbol || null, + })); + } catch { + // Keep empty results + } + + if (results.length > 0) { + return { + success: true, + results, + metadata: { + mode: 'exact', + backend: 'codexlens', + count: results.length, + query, + warning: indexStatus.warning, + note: 'No exact matches found, showing fuzzy results', + fallback: 'fuzzy', + }, + }; + } + } + } + return { success: true, results, diff --git a/codex-lens/src/codexlens/storage/dir_index.py b/codex-lens/src/codexlens/storage/dir_index.py index f30395a8..fbb0a543 100644 --- a/codex-lens/src/codexlens/storage/dir_index.py +++ b/codex-lens/src/codexlens/storage/dir_index.py @@ -1651,16 +1651,17 @@ class DirIndexStore: from codexlens.storage.sqlite_utils import check_trigram_support has_trigram = check_trigram_support(conn) - fuzzy_tokenizer = "trigram" if has_trigram else "unicode61 tokenchars '_-'" + fuzzy_tokenizer = "trigram" if has_trigram else "unicode61 tokenchars '_-.'" # Exact FTS table with unicode61 tokenizer + # Note: tokenchars includes '.' to properly tokenize qualified names like PortRole.FLOW conn.execute( """ CREATE VIRTUAL TABLE IF NOT EXISTS files_fts_exact USING fts5( name, full_path UNINDEXED, content, content='files', content_rowid='id', - tokenize="unicode61 tokenchars '_-'" + tokenize="unicode61 tokenchars '_-.'" ) """ ) diff --git a/codex-lens/src/codexlens/storage/migrations/migration_004_dual_fts.py b/codex-lens/src/codexlens/storage/migrations/migration_004_dual_fts.py index 7bd8c503..502e067d 100644 --- a/codex-lens/src/codexlens/storage/migrations/migration_004_dual_fts.py +++ b/codex-lens/src/codexlens/storage/migrations/migration_004_dual_fts.py @@ -45,7 +45,7 @@ def upgrade(db_conn: Connection): f"Trigram tokenizer not available (requires SQLite >= 3.34), " f"using extended unicode61 tokenizer for fuzzy matching" ) - fuzzy_tokenizer = "unicode61 tokenchars '_-'" + fuzzy_tokenizer = "unicode61 tokenchars '_-.'" # Start transaction cursor.execute("BEGIN TRANSACTION") @@ -122,7 +122,8 @@ def upgrade(db_conn: Connection): # Drop old FTS table cursor.execute("DROP TABLE IF EXISTS files_fts") - # Create exact FTS table (unicode61 with underscores/hyphens as token chars) + # Create exact FTS table (unicode61 with underscores/hyphens/dots as token chars) + # Note: tokenchars includes '.' to properly tokenize qualified names like PortRole.FLOW log.info("Creating files_fts_exact table with unicode61 tokenizer...") cursor.execute( """ @@ -130,7 +131,7 @@ def upgrade(db_conn: Connection): name, full_path UNINDEXED, content, content='files', content_rowid='id', - tokenize="unicode61 tokenchars '_-'" + tokenize="unicode61 tokenchars '_-.'" ) """ )