diff --git a/.claude/workflows/cli-tools-usage.md b/.claude/workflows/cli-tools-usage.md index 8f2f7c0f..39260c42 100644 --- a/.claude/workflows/cli-tools-usage.md +++ b/.claude/workflows/cli-tools-usage.md @@ -466,27 +466,21 @@ RULES: $(cat ~/.claude/workflows/cli-templates/protocols/write-protocol.md) $(ca ``` --- -## Configuration +## ⚙️ Execution Configuration -### Timeout Allocation (Bash) +### Dynamic Timeout Allocation -controlled by external bash `timeout` command: +**Minimum timeout: 5 minutes (300000ms)** - Never set below this threshold. -**Recommended Time Allocation**: +**Timeout Ranges**: +- **Simple** (analysis, search): 5-10min (300000-600000ms) +- **Medium** (refactoring, documentation): 10-20min (600000-1200000ms) +- **Complex** (implementation, migration): 20-60min (1200000-3600000ms) +- **Heavy** (large codebase, multi-file): 60-120min (3600000-7200000ms) -- **Simple** (5-10min): Analysis, search - - `timeout 300` ~ `timeout 600` +**Codex Multiplier**: 3x of allocated time (minimum 15min / 900000ms) -- **Medium** (10-20min): Refactoring, documentation - - `timeout 600` ~ `timeout 1200` - -- **Complex** (20-60min): Implementation, migration - - `timeout 1200` ~ `timeout 3600` - -- **Heavy** (60-120min): Large codebase, multi-file - - `timeout 3600` ~ `timeout 7200` - -**Codex Multiplier**: 3x allocated time (minimum 15min / 900s) +**Auto-detection**: Analyze PURPOSE and TASK fields to determine timeout ### Permission Framework diff --git a/ccw/src/core/routes/codexlens-routes.ts b/ccw/src/core/routes/codexlens-routes.ts index 1d49f27b..091e1b5f 100644 --- a/ccw/src/core/routes/codexlens-routes.ts +++ b/ccw/src/core/routes/codexlens-routes.ts @@ -586,6 +586,8 @@ export async function handleCodexLensRoutes(ctx: RouteContext): Promise const query = url.searchParams.get('query') || ''; const limit = parseInt(url.searchParams.get('limit') || '20', 10); const mode = url.searchParams.get('mode') || 'exact'; // exact, fuzzy, hybrid, vector + const maxContentLength = parseInt(url.searchParams.get('max_content_length') || '200', 10); + const extraFilesCount = parseInt(url.searchParams.get('extra_files_count') || '10', 10); const projectPath = url.searchParams.get('path') || initialPath; if (!query) { @@ -595,15 +597,46 @@ export async function handleCodexLensRoutes(ctx: RouteContext): Promise } try { - const args = ['search', query, '--path', projectPath, '--limit', limit.toString(), '--mode', mode, '--json']; + // Request more results to support split (full content + extra files) + const totalToFetch = limit + extraFilesCount; + const args = ['search', query, '--path', projectPath, '--limit', totalToFetch.toString(), '--mode', mode, '--json']; const result = await executeCodexLens(args, { cwd: projectPath }); if (result.success) { try { const parsed = extractJSON(result.output); + const allResults = parsed.result?.results || []; + + // Truncate content and split results + const truncateContent = (content: string | null | undefined): string => { + if (!content) return ''; + if (content.length <= maxContentLength) return content; + return content.slice(0, maxContentLength) + '...'; + }; + + // Split results: first N with full content, rest as file paths only + const resultsWithContent = allResults.slice(0, limit).map((r: any) => ({ + ...r, + content: truncateContent(r.content || r.excerpt), + excerpt: truncateContent(r.excerpt || r.content), + })); + + const extraResults = allResults.slice(limit, limit + extraFilesCount); + const extraFiles = [...new Set(extraResults.map((r: any) => r.path || r.file))]; + res.writeHead(200, { 'Content-Type': 'application/json' }); - res.end(JSON.stringify({ success: true, ...parsed.result })); + res.end(JSON.stringify({ + success: true, + results: resultsWithContent, + extra_files: extraFiles.length > 0 ? extraFiles : undefined, + metadata: { + total: allResults.length, + limit, + max_content_length: maxContentLength, + extra_files_count: extraFilesCount, + }, + })); } catch { res.writeHead(200, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ success: true, results: [], output: result.output })); diff --git a/ccw/src/tools/smart-search.ts b/ccw/src/tools/smart-search.ts index 5d54ac48..075ee789 100644 --- a/ccw/src/tools/smart-search.ts +++ b/ccw/src/tools/smart-search.ts @@ -36,10 +36,12 @@ const ParamsSchema = z.object({ path: z.string().optional(), paths: z.array(z.string()).default([]), contextLines: z.number().default(0), - maxResults: z.number().default(20), // Increased default + maxResults: z.number().default(5), // Default 5 with full content includeHidden: z.boolean().default(false), languages: z.array(z.string()).optional(), - limit: z.number().default(20), // Increased default + limit: z.number().default(5), // Default 5 with full content + extraFilesCount: z.number().default(10), // Additional file-only results + maxContentLength: z.number().default(200), // Max content length for truncation (50-2000) offset: z.number().default(0), // NEW: Pagination offset (start_index) enrich: z.boolean().default(false), // Search modifiers for ripgrep mode @@ -268,6 +270,7 @@ interface SearchMetadata { interface SearchResult { success: boolean; results?: ExactMatch[] | SemanticMatch[] | GraphMatch[] | FileMatch[] | unknown; + extra_files?: string[]; // Additional file paths without content output?: string; metadata?: SearchMetadata; error?: string; @@ -301,6 +304,42 @@ function stripAnsi(str: string): string { return str.replace(/\x1b\[[0-9;]*m/g, ''); } +/** Default maximum content length to return (avoid excessive output) */ +const DEFAULT_MAX_CONTENT_LENGTH = 200; + +/** + * Truncate content to specified length with ellipsis + * @param content - The content to truncate + * @param maxLength - Maximum length (default: 200) + */ +function truncateContent(content: string | null | undefined, maxLength: number = DEFAULT_MAX_CONTENT_LENGTH): string { + if (!content) return ''; + if (content.length <= maxLength) return content; + return content.slice(0, maxLength) + '...'; +} + +/** + * Split results into full content results and extra file-only results + * Generic function supporting both SemanticMatch and ExactMatch types + * @param allResults - All search results (must have 'file' property) + * @param fullContentLimit - Number of results with full content (default: 5) + * @param extraFilesCount - Number of additional file-only results (default: 10) + */ +function splitResultsWithExtraFiles( + allResults: T[], + fullContentLimit: number = 5, + extraFilesCount: number = 10 +): { results: T[]; extra_files: string[] } { + // First N results with full content + const results = allResults.slice(0, fullContentLimit); + + // Next M results as file paths only (deduplicated) + const extraResults = allResults.slice(fullContentLimit, fullContentLimit + extraFilesCount); + const extra_files = [...new Set(extraResults.map(r => r.file))]; + + return { results, extra_files }; +} + /** * Check if CodexLens index exists for current directory * @param path - Directory path to check @@ -714,7 +753,7 @@ async function executeAutoMode(params: Params): Promise { * Supports tokenized multi-word queries with OR matching and result ranking */ async function executeRipgrepMode(params: Params): Promise { - const { query, paths = [], contextLines = 0, maxResults = 10, includeHidden = false, path = '.', regex = true, caseSensitive = true, tokenize = true } = params; + const { query, paths = [], contextLines = 0, maxResults = 5, extraFilesCount = 10, maxContentLength = 200, includeHidden = false, path = '.', regex = true, caseSensitive = true, tokenize = true } = params; if (!query) { return { @@ -726,6 +765,9 @@ async function executeRipgrepMode(params: Params): Promise { // Check if ripgrep is available const hasRipgrep = checkToolAvailability('rg'); + // Calculate total to fetch for split (full content + extra files) + const totalToFetch = maxResults + extraFilesCount; + // If ripgrep not available, fall back to CodexLens exact mode if (!hasRipgrep) { const readyStatus = await ensureCodexLensReady(); @@ -737,7 +779,7 @@ async function executeRipgrepMode(params: Params): Promise { } // Use CodexLens exact mode as fallback - const args = ['search', query, '--limit', maxResults.toString(), '--mode', 'exact', '--json']; + const args = ['search', query, '--limit', totalToFetch.toString(), '--mode', 'exact', '--json']; const result = await executeCodexLens(args, { cwd: path }); if (!result.success) { @@ -754,23 +796,27 @@ async function executeRipgrepMode(params: Params): Promise { } // Parse results - let results: SemanticMatch[] = []; + let allResults: SemanticMatch[] = []; try { const parsed = JSON.parse(stripAnsi(result.output || '{}')); const data = parsed.result?.results || parsed.results || parsed; - results = (Array.isArray(data) ? data : []).map((item: any) => ({ + allResults = (Array.isArray(data) ? data : []).map((item: any) => ({ file: item.path || item.file, score: item.score || 0, - content: item.excerpt || item.content || '', + content: truncateContent(item.content || item.excerpt, maxContentLength), symbol: item.symbol || null, })); } catch { // Keep empty results } + // Split results: first N with full content, rest as file paths only + const { results, extra_files } = splitResultsWithExtraFiles(allResults, maxResults, extraFilesCount); + return { success: true, results, + extra_files: extra_files.length > 0 ? extra_files : undefined, metadata: { mode: 'ripgrep', backend: 'codexlens-fallback', @@ -781,12 +827,12 @@ async function executeRipgrepMode(params: Params): Promise { }; } - // Use ripgrep + // Use ripgrep - request more results to support split const { command, args, tokens } = buildRipgrepCommand({ query, paths: paths.length > 0 ? paths : [path], contextLines, - maxResults, + maxResults: totalToFetch, // Fetch more to support split includeHidden, regex, caseSensitive, @@ -812,14 +858,14 @@ async function executeRipgrepMode(params: Params): Promise { }); child.on('close', (code) => { - const results: ExactMatch[] = []; + const allResults: ExactMatch[] = []; const lines = stdout.split('\n').filter((line) => line.trim()); // Limit total results to prevent memory overflow (--max-count only limits per-file) - const effectiveLimit = maxResults > 0 ? maxResults : 500; + const effectiveLimit = totalToFetch > 0 ? totalToFetch : 500; for (const line of lines) { // Stop collecting if we've reached the limit - if (results.length >= effectiveLimit) { + if (allResults.length >= effectiveLimit) { resultLimitReached = true; break; } @@ -837,7 +883,7 @@ async function executeRipgrepMode(params: Params): Promise { : 1, content: item.data.lines.text.trim(), }; - results.push(match); + allResults.push(match); } } catch { continue; @@ -850,9 +896,12 @@ async function executeRipgrepMode(params: Params): Promise { // Apply token-based scoring and sorting for multi-word queries // Results matching more tokens are ranked higher (exact matches first) - const scoredResults = tokens.length > 1 ? scoreByTokenMatch(results, tokens) : results; + const scoredResults = tokens.length > 1 ? scoreByTokenMatch(allResults, tokens) : allResults; if (code === 0 || code === 1 || (isWindowsDeviceError && scoredResults.length > 0)) { + // Split results: first N with full content, rest as file paths only + const { results, extra_files } = splitResultsWithExtraFiles(scoredResults, maxResults, extraFilesCount); + // Build warning message for various conditions const warnings: string[] = []; if (resultLimitReached) { @@ -864,18 +913,19 @@ async function executeRipgrepMode(params: Params): Promise { resolve({ success: true, - results: scoredResults, + results, + extra_files: extra_files.length > 0 ? extra_files : undefined, metadata: { mode: 'ripgrep', backend: 'ripgrep', - count: scoredResults.length, + count: results.length, query, tokens: tokens.length > 1 ? tokens : undefined, // Include tokens in metadata for debugging tokenized: tokens.length > 1, ...(warnings.length > 0 && { warning: warnings.join('; ') }), }, }); - } else if (isWindowsDeviceError && results.length === 0) { + } else if (isWindowsDeviceError && allResults.length === 0) { // Windows device error but no results - might be the only issue resolve({ success: true, @@ -912,7 +962,7 @@ async function executeRipgrepMode(params: Params): Promise { * Requires index */ async function executeCodexLensExactMode(params: Params): Promise { - const { query, path = '.', maxResults = 10, enrich = false } = params; + const { query, path = '.', maxResults = 5, extraFilesCount = 10, maxContentLength = 200, enrich = false } = params; if (!query) { return { @@ -933,7 +983,9 @@ async function executeCodexLensExactMode(params: Params): Promise // Check index status const indexStatus = await checkIndexStatus(path); - const args = ['search', query, '--limit', maxResults.toString(), '--mode', 'exact', '--json']; + // Request more results to support split (full content + extra files) + const totalToFetch = maxResults + extraFilesCount; + const args = ['search', query, '--limit', totalToFetch.toString(), '--mode', 'exact', '--json']; if (enrich) { args.push('--enrich'); } @@ -954,14 +1006,14 @@ async function executeCodexLensExactMode(params: Params): Promise } // Parse results - let results: SemanticMatch[] = []; + let allResults: SemanticMatch[] = []; try { const parsed = JSON.parse(stripAnsi(result.output || '{}')); const data = parsed.result?.results || parsed.results || parsed; - results = (Array.isArray(data) ? data : []).map((item: any) => ({ + allResults = (Array.isArray(data) ? data : []).map((item: any) => ({ file: item.path || item.file, score: item.score || 0, - content: item.excerpt || item.content || '', + content: truncateContent(item.content || item.excerpt, maxContentLength), symbol: item.symbol || null, })); } catch { @@ -969,8 +1021,8 @@ async function executeCodexLensExactMode(params: Params): Promise } // Fallback to fuzzy mode if exact returns no results - if (results.length === 0) { - const fuzzyArgs = ['search', query, '--limit', maxResults.toString(), '--mode', 'fuzzy', '--json']; + if (allResults.length === 0) { + const fuzzyArgs = ['search', query, '--limit', totalToFetch.toString(), '--mode', 'fuzzy', '--json']; if (enrich) { fuzzyArgs.push('--enrich'); } @@ -980,20 +1032,23 @@ async function executeCodexLensExactMode(params: Params): Promise try { const parsed = JSON.parse(stripAnsi(fuzzyResult.output || '{}')); const data = parsed.result?.results || parsed.results || parsed; - results = (Array.isArray(data) ? data : []).map((item: any) => ({ + allResults = (Array.isArray(data) ? data : []).map((item: any) => ({ file: item.path || item.file, score: item.score || 0, - content: item.excerpt || item.content || '', + content: truncateContent(item.content || item.excerpt, maxContentLength), symbol: item.symbol || null, })); } catch { // Keep empty results } - if (results.length > 0) { + if (allResults.length > 0) { + // Split results: first N with full content, rest as file paths only + const { results, extra_files } = splitResultsWithExtraFiles(allResults, maxResults, extraFilesCount); return { success: true, results, + extra_files: extra_files.length > 0 ? extra_files : undefined, metadata: { mode: 'exact', backend: 'codexlens', @@ -1008,9 +1063,13 @@ async function executeCodexLensExactMode(params: Params): Promise } } + // Split results: first N with full content, rest as file paths only + const { results, extra_files } = splitResultsWithExtraFiles(allResults, maxResults, extraFilesCount); + return { success: true, results, + extra_files: extra_files.length > 0 ? extra_files : undefined, metadata: { mode: 'exact', backend: 'codexlens', @@ -1027,7 +1086,7 @@ async function executeCodexLensExactMode(params: Params): Promise * Requires index with embeddings */ async function executeHybridMode(params: Params): Promise { - const { query, path = '.', maxResults = 10, enrich = false } = params; + const { query, path = '.', maxResults = 5, extraFilesCount = 10, maxContentLength = 200, enrich = false } = params; if (!query) { return { @@ -1048,7 +1107,9 @@ async function executeHybridMode(params: Params): Promise { // Check index status const indexStatus = await checkIndexStatus(path); - const args = ['search', query, '--limit', maxResults.toString(), '--mode', 'hybrid', '--json']; + // Request more results to support split (full content + extra files) + const totalToFetch = maxResults + extraFilesCount; + const args = ['search', query, '--limit', totalToFetch.toString(), '--mode', 'hybrid', '--json']; if (enrich) { args.push('--enrich'); } @@ -1069,14 +1130,14 @@ async function executeHybridMode(params: Params): Promise { } // Parse results - let results: SemanticMatch[] = []; + let allResults: SemanticMatch[] = []; let baselineInfo: { score: number; count: number } | null = null; let initialCount = 0; try { const parsed = JSON.parse(stripAnsi(result.output || '{}')); const data = parsed.result?.results || parsed.results || parsed; - results = (Array.isArray(data) ? data : []).map((item: any) => { + allResults = (Array.isArray(data) ? data : []).map((item: any) => { const rawScore = item.score || 0; // Hybrid mode returns distance scores (lower is better). // Convert to similarity scores (higher is better) for consistency. @@ -1085,27 +1146,27 @@ async function executeHybridMode(params: Params): Promise { return { file: item.path || item.file, score: similarityScore, - content: item.excerpt || item.content || '', + content: truncateContent(item.content || item.excerpt, maxContentLength), symbol: item.symbol || null, }; }); - initialCount = results.length; + initialCount = allResults.length; // Post-processing pipeline to improve semantic search quality // 0. Filter dominant baseline scores (hot spot detection) - const baselineResult = filterDominantBaselineScores(results); - results = baselineResult.filteredResults; + const baselineResult = filterDominantBaselineScores(allResults); + allResults = baselineResult.filteredResults; baselineInfo = baselineResult.baselineInfo; // 1. Filter noisy files (coverage, node_modules, etc.) - results = filterNoisyFiles(results); + allResults = filterNoisyFiles(allResults); // 2. Boost results containing query keywords - results = applyKeywordBoosting(results, query); + allResults = applyKeywordBoosting(allResults, query); // 3. Enforce score diversity (penalize identical scores) - results = enforceScoreDiversity(results); + allResults = enforceScoreDiversity(allResults); // 4. Re-sort by adjusted scores - results.sort((a, b) => b.score - a.score); + allResults.sort((a, b) => b.score - a.score); } catch { return { success: true, @@ -1121,15 +1182,19 @@ async function executeHybridMode(params: Params): Promise { }; } + // Split results: first N with full content, rest as file paths only + const { results, extra_files } = splitResultsWithExtraFiles(allResults, maxResults, extraFilesCount); + // Build metadata with baseline info if detected let note = 'Hybrid mode uses RRF fusion (exact + fuzzy + vector) for best results'; if (baselineInfo) { - note += ` | Filtered ${initialCount - results.length} hot-spot results with baseline score ~${baselineInfo.score.toFixed(4)}`; + note += ` | Filtered ${initialCount - allResults.length} hot-spot results with baseline score ~${baselineInfo.score.toFixed(4)}`; } return { success: true, results, + extra_files: extra_files.length > 0 ? extra_files : undefined, metadata: { mode: 'hybrid', backend: 'codexlens', @@ -1540,7 +1605,7 @@ export const schema: ToolSchema = { mode: { type: 'string', enum: SEARCH_MODES, - description: 'Search mode: auto (default), hybrid (best quality), exact (CodexLens FTS), ripgrep (fast, no index), priority (fallback: hybrid->exact->ripgrep)', + description: 'Search mode: auto, hybrid (best quality), exact (CodexLens FTS), ripgrep (fast, no index), priority (fallback chain)', default: 'auto', }, output_mode: { @@ -1576,6 +1641,16 @@ export const schema: ToolSchema = { description: 'Alias for maxResults (default: 20)', default: 20, }, + extraFilesCount: { + type: 'number', + description: 'Number of additional file-only results (paths without content)', + default: 10, + }, + maxContentLength: { + type: 'number', + description: 'Maximum content length for truncation (50-2000)', + default: 200, + }, offset: { type: 'number', description: 'Pagination offset - skip first N results (default: 0)', diff --git a/codex-lens/src/codexlens/search/chain_search.py b/codex-lens/src/codexlens/search/chain_search.py index 3d741a8c..1f37c7f6 100644 --- a/codex-lens/src/codexlens/search/chain_search.py +++ b/codex-lens/src/codexlens/search/chain_search.py @@ -494,9 +494,13 @@ class ChainSearchEngine: else: # Use fuzzy FTS if enable_fuzzy=True (mode="fuzzy"), otherwise exact FTS if enable_fuzzy: - fts_results = store.search_fts_fuzzy(query, limit=limit) + fts_results = store.search_fts_fuzzy( + query, limit=limit, return_full_content=True + ) else: - fts_results = store.search_fts(query, limit=limit) + fts_results = store.search_fts_exact( + query, limit=limit, return_full_content=True + ) # Optionally add semantic keyword results if include_semantic: diff --git a/codex-lens/src/codexlens/search/hybrid_search.py b/codex-lens/src/codexlens/search/hybrid_search.py index 8c8c225b..6984daf8 100644 --- a/codex-lens/src/codexlens/search/hybrid_search.py +++ b/codex-lens/src/codexlens/search/hybrid_search.py @@ -200,7 +200,9 @@ class HybridSearchEngine: """ try: with DirIndexStore(index_path) as store: - return store.search_fts_exact(query, limit=limit) + return store.search_fts_exact( + query, limit=limit, return_full_content=True + ) except Exception as exc: self.logger.debug("Exact search error: %s", exc) return [] @@ -220,7 +222,9 @@ class HybridSearchEngine: """ try: with DirIndexStore(index_path) as store: - return store.search_fts_fuzzy(query, limit=limit) + return store.search_fts_fuzzy( + query, limit=limit, return_full_content=True + ) except Exception as exc: self.logger.debug("Fuzzy search error: %s", exc) return [] diff --git a/codex-lens/src/codexlens/semantic/litellm_embedder.py b/codex-lens/src/codexlens/semantic/litellm_embedder.py index 7377853f..27a6137c 100644 --- a/codex-lens/src/codexlens/semantic/litellm_embedder.py +++ b/codex-lens/src/codexlens/semantic/litellm_embedder.py @@ -127,3 +127,18 @@ class LiteLLMEmbedderWrapper(BaseEmbedder): # LiteLLM handles batching internally, ignore batch_size parameter return self._embedder.embed(texts) + + def embed_single(self, text: str) -> list[float]: + """Generate embedding for a single text. + + Args: + text: Text to embed. + + Returns: + list[float]: Embedding vector as a list of floats. + """ + # Sanitize text before embedding + sanitized = self._sanitize_text(text) + embedding = self._embedder.embed([sanitized]) + return embedding[0].tolist() +