/** * Smart Search Tool - Unified intelligent search powered by codexlens-search v2 * * Features: * - Semantic search: 2-stage vector (binary coarse + ANN fine) + FTS5 + RRF fusion + reranking * - Ripgrep fallback for fast exact/regex matching * - File discovery via glob patterns * - Incremental indexing with Mark-and-Filter strategy * - File watcher for automatic index updates * * Actions: * - search: Semantic search via v2 bridge with ripgrep fallback * - init: Initialize v2 index and sync files * - status: Check v2 index statistics * - update: Incremental sync for changed files * - watch: Start file watcher for automatic updates * - find_files: Glob-based file path matching */ import { z } from 'zod'; import type { ToolSchema, ToolResult } from '../types/tool.js'; import { spawn, spawnSync, type SpawnOptions } from 'child_process'; import { existsSync, readFileSync, statSync } from 'fs'; import { dirname, join, resolve } from 'path'; import { ensureReady as ensureCodexLensReady, checkSemanticStatus, ensureLiteLLMEmbedderReady, executeCodexLens, getVenvPythonPath, } from './codex-lens.js'; import { execFile } from 'child_process'; import type { ProgressInfo } from './codex-lens.js'; import { getProjectRoot } from '../utils/path-validator.js'; import { getCodexLensDataDir } from '../utils/codexlens-path.js'; import { EXEC_TIMEOUTS } from '../utils/exec-constants.js'; import { generateRotationEndpoints } from '../config/litellm-api-config-manager.js'; import type { RotationEndpointConfig } from '../config/litellm-api-config-manager.js'; // Timing utilities for performance analysis const TIMING_ENABLED = process.env.SMART_SEARCH_TIMING === '1' || process.env.DEBUG?.includes('timing'); const SEARCH_OUTPUT_MODES = ['full', 'files_only', 'count', 'ace'] as const; type SearchOutputMode = typeof SEARCH_OUTPUT_MODES[number]; interface TimingData { [key: string]: number; } function createTimer(): { mark: (name: string) => void; getTimings: () => TimingData; log: () => void } { const startTime = performance.now(); const marks: { name: string; time: number }[] = []; let lastMark = startTime; return { mark(name: string) { const now = performance.now(); marks.push({ name, time: now - lastMark }); lastMark = now; }, getTimings(): TimingData { const timings: TimingData = {}; marks.forEach(m => { timings[m.name] = Math.round(m.time * 100) / 100; }); timings['_total'] = Math.round((performance.now() - startTime) * 100) / 100; return timings; }, log() { if (TIMING_ENABLED) { const timings = this.getTimings(); console.error(`[TIMING] smart-search: ${JSON.stringify(timings)}`); } } }; } // Define Zod schema for validation const ParamsSchema = z.object({ // Action: search (content), find_files (path/name pattern), init, status, update (incremental sync), watch // Note: search_files is deprecated, use search with output_mode='files_only' action: z.enum(['init', 'search', 'search_files', 'find_files', 'status', 'update', 'watch']).default('search'), query: z.string().optional().describe('Content search query (for action="search")'), pattern: z.string().optional().describe('Glob pattern for path matching (for action="find_files")'), mode: z.enum(['fuzzy', 'semantic']).default('fuzzy'), output_mode: z.enum(SEARCH_OUTPUT_MODES).default('ace'), path: z.string().optional(), paths: z.array(z.string()).default([]), contextLines: z.number().default(0), maxResults: z.number().default(5), // Default 5 with full content includeHidden: z.boolean().default(false), force: z.boolean().default(false).describe('Force full rebuild for action="init".'), limit: z.number().default(5), // Default 5 with full content extraFilesCount: z.number().default(10), // Additional file-only results maxContentLength: z.number().default(200), // Max content length for truncation (50-2000) offset: z.number().default(0), // NEW: Pagination offset (start_index) // Search modifiers for ripgrep mode regex: z.boolean().default(true), // Use regex pattern matching (default: enabled) caseSensitive: z.boolean().default(true), // Case sensitivity (default: case-sensitive) tokenize: z.boolean().default(true), // Tokenize multi-word queries for OR matching (default: enabled) // File type filtering (default: code only) excludeExtensions: z.array(z.string()).optional().describe('File extensions to exclude from results (e.g., ["md", "txt"])'), codeOnly: z.boolean().default(true).describe('Only return code files (excludes md, txt, json, yaml, xml, etc.). Default: true'), withDoc: z.boolean().default(false).describe('Include documentation files (md, txt, rst, etc.). Overrides codeOnly when true'), // Watcher options debounce: z.number().default(1000).describe('Debounce interval in ms for watch action'), // Fuzzy matching is implicit in hybrid mode (RRF fusion) }); type Params = z.infer; // Search mode constants const SEARCH_MODES = ['fuzzy', 'semantic'] as const; // Classification confidence threshold const CONFIDENCE_THRESHOLD = 0.7; // File filtering configuration (ported from code-index) const FILTER_CONFIG = { exclude_directories: new Set([ '.git', '.svn', '.hg', '.bzr', 'node_modules', '__pycache__', '.venv', 'venv', 'vendor', 'bower_components', 'dist', 'build', 'target', 'out', 'bin', 'obj', '.idea', '.vscode', '.vs', '.sublime-workspace', '.pytest_cache', '.coverage', '.tox', '.nyc_output', 'coverage', 'htmlcov', '.next', '.nuxt', '.cache', '.parcel-cache', '.DS_Store', 'Thumbs.db', ]), exclude_files: new Set([ '*.tmp', '*.temp', '*.swp', '*.swo', '*.bak', '*~', '*.orig', '*.log', 'package-lock.json', 'yarn.lock', 'pnpm-lock.yaml', 'Pipfile.lock', ]), // Windows device files - must use **/ pattern to match in any directory // These cause "os error 1" on Windows when accessed windows_device_files: new Set([ 'nul', 'con', 'aux', 'prn', 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9', 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9', ]), }; function buildExcludeArgs(): string[] { const args: string[] = []; for (const dir of FILTER_CONFIG.exclude_directories) { args.push('--glob', `!**/${dir}/**`); } for (const pattern of FILTER_CONFIG.exclude_files) { args.push('--glob', `!${pattern}`); } // Windows device files need case-insensitive matching in any directory for (const device of FILTER_CONFIG.windows_device_files) { args.push('--glob', `!**/${device}`); args.push('--glob', `!**/${device.toUpperCase()}`); } return args; } /** * Tokenize query for multi-word OR matching * Splits on whitespace and common delimiters, filters stop words and short tokens * @param query - The search query * @returns Array of tokens */ function tokenizeQuery(query: string): string[] { // Stop words for filtering (common English + programming keywords) const stopWords = new Set([ 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'can', 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by', 'from', 'as', 'into', 'through', 'and', 'but', 'if', 'or', 'not', 'this', 'that', 'these', 'those', 'it', 'its', 'how', 'what', 'where', 'when', 'why', 'which', 'who', 'whom', ]); // Split on whitespace and common delimiters, keep meaningful tokens const tokens = query .split(/[\s,;:]+/) .map(token => token.trim()) .filter(token => { // Keep tokens that are: // - At least 2 characters long // - Not a stop word (case-insensitive) // - Or look like identifiers (contain underscore/camelCase) if (token.length < 2) return false; if (stopWords.has(token.toLowerCase()) && !token.includes('_') && !/[A-Z]/.test(token)) { return false; } return true; }); return tokens; } /** * Score results based on token match count for ranking * @param results - Search results * @param tokens - Query tokens * @returns Results with match scores */ function scoreByTokenMatch(results: ExactMatch[], tokens: string[]): ExactMatch[] { if (tokens.length <= 1) return results; // Create case-insensitive patterns for each token const tokenPatterns = tokens.map(t => { const escaped = t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); return new RegExp(escaped, 'i'); }); return results.map(r => { const content = r.content || ''; const file = r.file || ''; const searchText = `${file} ${content}`; // Count how many tokens match let matchCount = 0; for (const pattern of tokenPatterns) { if (pattern.test(searchText)) { matchCount++; } } // Calculate match ratio (0 to 1) const matchRatio = matchCount / tokens.length; return { ...r, matchScore: matchRatio, matchCount, }; }).sort((a, b) => { // Sort by match ratio (descending), then by line number if (b.matchScore !== a.matchScore) { return b.matchScore - a.matchScore; } return (a.line || 0) - (b.line || 0); }); } interface Classification { mode: string; confidence: number; reasoning: string; } interface ChunkLine { line: number; text: string; isMatch: boolean; } interface ExactMatch { file: string; line: number; column: number; content: string; endLine?: number; chunkLines?: ChunkLine[]; matchScore?: number; // Token match ratio (0-1) for multi-word queries matchCount?: number; // Number of tokens matched } interface RelationshipInfo { type: string; // 'calls', 'imports', 'called_by', 'imported_by' direction: 'outgoing' | 'incoming'; target?: string; // Target symbol name (for outgoing) source?: string; // Source symbol name (for incoming) file: string; // File path line?: number; // Line number } interface SemanticMatch { file: string; line?: number; column?: number; score: number; content: string; symbol: string | null; relationships?: RelationshipInfo[]; } interface GraphMatch { file: string; symbols: unknown; relationships: unknown[]; } // File match for find_files action (path-based search) interface FileMatch { path: string; type: 'file' | 'directory'; name: string; // Filename only extension?: string; // File extension (without dot) } interface PaginationInfo { offset: number; // Starting index of returned results limit: number; // Number of results requested total: number; // Total number of results found has_more: boolean; // True if more results are available } interface SearchSuggestion { title: string; command: string; reason: string; } interface SearchMetadata { mode?: string; backend?: string; count?: number; query?: string; pattern?: string; // For find_files action classified_as?: string; confidence?: number; reasoning?: string; embeddings_coverage_percent?: number; warning?: string; note?: string; index_status?: 'indexed' | 'not_indexed' | 'partial'; fallback?: string; // Fallback mode used (e.g., 'fuzzy') fallback_history?: string[]; suggested_weights?: Record; // Tokenization metadata (ripgrep mode) tokens?: string[]; // Query tokens used for multi-word search tokenized?: boolean; // Whether tokenization was applied suggestions?: SearchSuggestion[]; // Pagination metadata pagination?: PaginationInfo; // Performance timing data (when SMART_SEARCH_TIMING=1 or DEBUG includes 'timing') timing?: TimingData; // Init action specific action?: string; path?: string; progress?: { stage: string; message: string; percent: number; filesProcessed?: number; totalFiles?: number; }; progressHistory?: ProgressInfo[]; api_max_workers?: number; endpoint_count?: number; use_gpu?: boolean; reranker_enabled?: boolean; reranker_backend?: string; reranker_model?: string; cascade_strategy?: string; staged_stage2_mode?: string; static_graph_enabled?: boolean; preset?: string; } interface SearchResult { success: boolean; results?: ExactMatch[] | SemanticMatch[] | GraphMatch[] | FileMatch[] | AceLikeOutput | unknown; extra_files?: string[]; // Additional file paths without content output?: string; metadata?: SearchMetadata; error?: string; status?: unknown; message?: string; } interface AceLikeSection { path: string; line?: number; endLine?: number; column?: number; score?: number; symbol?: string | null; snippet: string; lines?: ChunkLine[]; } interface AceLikeGroup { path: string; sections: AceLikeSection[]; total_matches: number; } interface AceLikeOutput { format: 'ace'; text: string; groups: AceLikeGroup[]; sections: AceLikeSection[]; total: number; } interface ModelInfo { model_profile?: string; model_name?: string; embedding_dim?: number; backend?: string; created_at?: string; updated_at?: string; } interface CodexLensConfig { config_file?: string; index_dir?: string; embedding_backend?: string; // 'fastembed' (local) or 'litellm' (api) embedding_model?: string; embedding_auto_embed_missing?: boolean; reranker_enabled?: boolean; reranker_backend?: string; // 'onnx' (local) or 'api' reranker_model?: string; reranker_top_k?: number; api_max_workers?: number; api_batch_size?: number; cascade_strategy?: string; staged_stage2_mode?: string; static_graph_enabled?: boolean; } interface IndexStatus { indexed: boolean; has_embeddings: boolean; file_count?: number; embeddings_coverage_percent?: number; total_chunks?: number; model_info?: ModelInfo | null; config?: CodexLensConfig | null; warning?: string; } function readCodexLensSettingsSnapshot(): Partial { const settingsPath = join(getCodexLensDataDir(), 'settings.json'); if (!existsSync(settingsPath)) { return {}; } try { const parsed = JSON.parse(readFileSync(settingsPath, 'utf-8')) as Record; const embedding = (parsed.embedding ?? {}) as Record; const reranker = (parsed.reranker ?? {}) as Record; const api = (parsed.api ?? {}) as Record; const cascade = (parsed.cascade ?? {}) as Record; const staged = (parsed.staged ?? {}) as Record; const indexing = (parsed.indexing ?? {}) as Record; return { embedding_backend: normalizeEmbeddingBackend(typeof embedding.backend === 'string' ? embedding.backend : undefined), embedding_model: typeof embedding.model === 'string' ? embedding.model : undefined, embedding_auto_embed_missing: typeof embedding.auto_embed_missing === 'boolean' ? embedding.auto_embed_missing : undefined, reranker_enabled: typeof reranker.enabled === 'boolean' ? reranker.enabled : undefined, reranker_backend: typeof reranker.backend === 'string' ? reranker.backend : undefined, reranker_model: typeof reranker.model === 'string' ? reranker.model : undefined, reranker_top_k: typeof reranker.top_k === 'number' ? reranker.top_k : undefined, api_max_workers: typeof api.max_workers === 'number' ? api.max_workers : undefined, api_batch_size: typeof api.batch_size === 'number' ? api.batch_size : undefined, cascade_strategy: typeof cascade.strategy === 'string' ? cascade.strategy : undefined, staged_stage2_mode: typeof staged.stage2_mode === 'string' ? staged.stage2_mode : undefined, static_graph_enabled: typeof indexing.static_graph_enabled === 'boolean' ? indexing.static_graph_enabled : undefined, }; } catch { return {}; } } /** * Strip ANSI color codes from string (for JSON parsing) */ function stripAnsi(str: string): string { return str.replace(/\x1b\[[0-9;]*m/g, ''); } /** Default maximum content length to return (avoid excessive output) */ const DEFAULT_MAX_CONTENT_LENGTH = 200; const CODEX_LENS_FTS_COMPATIBILITY_PATTERNS = [ /UsageError:\s*Got unexpected extra arguments?/i, /Option ['"]--method['"] does not take a value/i, /TyperArgument\.make_metavar\(\) takes 1 positional argument but 2 were given/i, ]; let codexLensFtsBackendBroken = false; const autoInitJobs = new Map(); const autoEmbedJobs = new Map(); type SmartSearchRuntimeOverrides = { checkSemanticStatus?: typeof checkSemanticStatus; getVenvPythonPath?: typeof getVenvPythonPath; spawnProcess?: typeof spawn; now?: () => number; }; const runtimeOverrides: SmartSearchRuntimeOverrides = {}; function getSemanticStatusRuntime(): typeof checkSemanticStatus { return runtimeOverrides.checkSemanticStatus ?? checkSemanticStatus; } function getVenvPythonPathRuntime(): typeof getVenvPythonPath { return runtimeOverrides.getVenvPythonPath ?? getVenvPythonPath; } function getSpawnRuntime(): typeof spawn { return runtimeOverrides.spawnProcess ?? spawn; } function getNowRuntime(): number { return (runtimeOverrides.now ?? Date.now)(); } function buildSmartSearchSpawnOptions(cwd: string, overrides: SpawnOptions = {}): SpawnOptions { const { env, ...rest } = overrides; return { cwd, shell: false, windowsHide: true, env: { ...process.env, PYTHONIOENCODING: 'utf-8', ...env }, ...rest, }; } function shouldDetachBackgroundSmartSearchProcess(): boolean { // On Windows, detached Python children can still create a transient console // window even when windowsHide is set. Background warmup only needs to outlive // the current request, not the MCP server process. return process.platform !== 'win32'; } /** * Truncate content to specified length with ellipsis * @param content - The content to truncate * @param maxLength - Maximum length (default: 200) */ function truncateContent(content: string | null | undefined, maxLength: number = DEFAULT_MAX_CONTENT_LENGTH): string { if (!content) return ''; if (content.length <= maxLength) return content; return content.slice(0, maxLength) + '...'; } /** * Split results into full content results and extra file-only results * Generic function supporting both SemanticMatch and ExactMatch types * @param allResults - All search results (must have 'file' property) * @param fullContentLimit - Number of results with full content (default: 5) * @param extraFilesCount - Number of additional file-only results (default: 10) */ function splitResultsWithExtraFiles( allResults: T[], fullContentLimit: number = 5, extraFilesCount: number = 10 ): { results: T[]; extra_files: string[] } { // First N results with full content const results = allResults.slice(0, fullContentLimit); // Next M results as file paths only (deduplicated) const extraResults = allResults.slice(fullContentLimit, fullContentLimit + extraFilesCount); const extra_files = [...new Set(extraResults.map(r => r.file))]; return { results, extra_files }; } interface SearchScope { workingDirectory: string; searchPaths: string[]; targetFile?: string; } interface RipgrepQueryModeResolution { regex: boolean; tokenize: boolean; tokens: string[]; literalFallback: boolean; warning?: string; } const GENERATED_QUERY_RE = /(? sanitizeSearchPath(item) || item); const fallbackPath = normalizedPath || getProjectRoot(); try { const resolvedPath = resolve(fallbackPath); const stats = statSync(resolvedPath); if (stats.isFile()) { return { workingDirectory: dirname(resolvedPath), searchPaths: normalizedPaths.length > 0 ? normalizedPaths : [resolvedPath], targetFile: resolvedPath, }; } return { workingDirectory: resolvedPath, searchPaths: normalizedPaths.length > 0 ? normalizedPaths : ['.'], }; } catch { return { workingDirectory: fallbackPath, searchPaths: normalizedPaths.length > 0 ? normalizedPaths : [normalizedPath || '.'], }; } } function normalizeResultFilePath(filePath: string, workingDirectory: string): string { return resolve(workingDirectory, filePath).replace(/\\/g, '/'); } function filterResultsToTargetFile(results: T[], scope: SearchScope): T[] { if (!scope.targetFile) { return results; } const normalizedTarget = scope.targetFile.replace(/\\/g, '/'); return results.filter((result) => normalizeResultFilePath(result.file, scope.workingDirectory) === normalizedTarget); } function parseCodexLensJsonOutput(output: string | undefined): any | null { const cleanOutput = stripAnsi(output || '').trim(); if (!cleanOutput) { return null; } const candidates = [ cleanOutput, ...cleanOutput.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.startsWith('{') || line.startsWith('[')), ]; const firstBrace = cleanOutput.indexOf('{'); const lastBrace = cleanOutput.lastIndexOf('}'); if (firstBrace !== -1 && lastBrace > firstBrace) { candidates.push(cleanOutput.slice(firstBrace, lastBrace + 1)); } const firstBracket = cleanOutput.indexOf('['); const lastBracket = cleanOutput.lastIndexOf(']'); if (firstBracket !== -1 && lastBracket > firstBracket) { candidates.push(cleanOutput.slice(firstBracket, lastBracket + 1)); } for (const candidate of candidates) { try { return JSON.parse(candidate); } catch { continue; } } return null; } function isValidRegexPattern(pattern: string): boolean { try { new RegExp(pattern); return true; } catch { return false; } } function resolveRipgrepQueryMode(query: string, regex: boolean = true, tokenize: boolean = true): RipgrepQueryModeResolution { const tokens = tokenize ? tokenizeQuery(query) : [query]; if (!regex) { return { regex: false, tokenize, tokens, literalFallback: false, }; } const invalidTokens = tokens.filter((token) => token.length > 0 && !isValidRegexPattern(token)); if (invalidTokens.length === 0) { return { regex: true, tokenize, tokens, literalFallback: false, }; } const preview = truncateContent(invalidTokens[0], 40); return { regex: false, tokenize, tokens, literalFallback: true, warning: invalidTokens.length === 1 ? `Query token "${preview}" is not a valid regular expression. Falling back to literal ripgrep matching.` : 'Query contains invalid regular expression tokens. Falling back to literal ripgrep matching.', }; } function isCodexLensCliCompatibilityError(error: string | undefined): boolean { if (!error) { return false; } const cleanError = stripAnsi(error); return CODEX_LENS_FTS_COMPATIBILITY_PATTERNS.some((pattern) => pattern.test(cleanError)); } function noteCodexLensFtsCompatibility(error: string | undefined): boolean { if (!isCodexLensCliCompatibilityError(error)) { return false; } codexLensFtsBackendBroken = true; return true; } function shouldSurfaceCodexLensFtsCompatibilityWarning(options: { compatibilityTriggeredThisQuery: boolean; skipExactDueToCompatibility: boolean; ripgrepResultCount: number; }): boolean { if (options.ripgrepResultCount > 0) { return false; } return options.compatibilityTriggeredThisQuery || options.skipExactDueToCompatibility; } function summarizeBackendError(error: string | undefined): string { const cleanError = stripAnsi(error || '').trim(); if (!cleanError) { return 'unknown error'; } if (isCodexLensCliCompatibilityError(cleanError)) { return 'CodexLens exact search CLI is incompatible with the current Typer/Click runtime'; } const regexSummary = cleanError.match(/error:\s*([^\r\n]+)/i); if (/regex parse error/i.test(cleanError) && regexSummary?.[1]) { return `invalid regular expression (${regexSummary[1].trim()})`; } const usageSummary = cleanError.match(/UsageError:\s*([^\r\n]+)/i); if (usageSummary?.[1]) { return usageSummary[1].trim(); } const firstMeaningfulLine = cleanError .split(/\r?\n/) .map((line) => line.trim()) .find((line) => line && !line.startsWith('│') && !line.startsWith('┌') && !line.startsWith('└')); return truncateContent(firstMeaningfulLine || cleanError, 180); } function mapCodexLensSemanticMatches(data: any[], scope: SearchScope, maxContentLength: number): SemanticMatch[] { return filterResultsToTargetFile(data.map((item: any) => { const rawScore = item.score || 0; const similarityScore = rawScore > 0 ? 1 / (1 + rawScore) : 1; return { file: item.path || item.file, line: typeof item.line === 'number' ? item.line : undefined, column: typeof item.column === 'number' ? item.column : undefined, score: similarityScore, content: truncateContent(item.content || item.excerpt, maxContentLength), symbol: item.symbol || null, }; }), scope); } function parsePlainTextFileMatches(output: string | undefined, scope: SearchScope): SemanticMatch[] { const lines = stripAnsi(output || '') .split(/\r?\n/) .map((line) => line.trim()) .filter(Boolean); const fileLines = lines.filter((line) => { if (line.includes('RuntimeWarning:') || line.startsWith('warn(') || line.startsWith('Warning:')) { return false; } const resolvedPath = /^[a-zA-Z]:[\\/]|^\//.test(line) ? line : resolve(scope.workingDirectory, line); try { return statSync(resolvedPath).isFile(); } catch { return false; } }); return filterResultsToTargetFile( [...new Set(fileLines)].map((file, index) => ({ file, score: Math.max(0.1, 1 - index * 0.05), content: '', symbol: null, })), scope, ); } function hasCentralizedVectorArtifacts(indexRoot: unknown): boolean { if (typeof indexRoot !== 'string' || !indexRoot.trim()) { return false; } const resolvedRoot = resolve(indexRoot); return [ join(resolvedRoot, '_vectors.hnsw'), join(resolvedRoot, '_vectors_meta.db'), join(resolvedRoot, '_binary_vectors.mmap'), ].every((artifactPath) => existsSync(artifactPath)); } function asObjectRecord(value: unknown): Record | undefined { if (!value || typeof value !== 'object' || Array.isArray(value)) { return undefined; } return value as Record; } function asFiniteNumber(value: unknown): number | undefined { if (typeof value !== 'number' || !Number.isFinite(value)) { return undefined; } return value; } function asBoolean(value: unknown): boolean | undefined { return typeof value === 'boolean' ? value : undefined; } function extractEmbeddingsStatusSummary(embeddingsData: unknown): { coveragePercent: number; totalChunks: number; hasEmbeddings: boolean; } { const embeddings = asObjectRecord(embeddingsData) ?? {}; const root = asObjectRecord(embeddings.root) ?? embeddings; const centralized = asObjectRecord(embeddings.centralized); const totalIndexes = asFiniteNumber(root.total_indexes) ?? asFiniteNumber(embeddings.total_indexes) ?? 0; const indexesWithEmbeddings = asFiniteNumber(root.indexes_with_embeddings) ?? asFiniteNumber(embeddings.indexes_with_embeddings) ?? 0; const totalChunks = asFiniteNumber(root.total_chunks) ?? asFiniteNumber(embeddings.total_chunks) ?? 0; const coveragePercent = asFiniteNumber(root.coverage_percent) ?? asFiniteNumber(embeddings.coverage_percent) ?? (totalIndexes > 0 ? (indexesWithEmbeddings / totalIndexes) * 100 : 0); const hasEmbeddings = asBoolean(root.has_embeddings) ?? asBoolean(centralized?.usable) ?? (totalChunks > 0 || indexesWithEmbeddings > 0 || coveragePercent > 0); return { coveragePercent, totalChunks, hasEmbeddings, }; } function selectEmbeddingsStatusPayload(statusData: unknown): Record { const status = asObjectRecord(statusData) ?? {}; return asObjectRecord(status.embeddings_status) ?? asObjectRecord(status.embeddings) ?? {}; } function collectBackendError( errors: string[], backendName: string, backendResult: PromiseSettledResult, ): void { if (backendResult.status === 'rejected') { errors.push(`${backendName}: ${summarizeBackendError(String(backendResult.reason))}`); return; } if (!backendResult.value.success) { errors.push(`${backendName}: ${summarizeBackendError(backendResult.value.error)}`); } } function mergeWarnings(...warnings: Array): string | undefined { const merged = [...new Set( warnings .filter((warning): warning is string => typeof warning === 'string' && warning.trim().length > 0) .map((warning) => warning.trim()) )]; return merged.length > 0 ? merged.join(' | ') : undefined; } function mergeNotes(...notes: Array): string | undefined { const merged = [...new Set( notes .filter((note): note is string => typeof note === 'string' && note.trim().length > 0) .map((note) => note.trim()) )]; return merged.length > 0 ? merged.join(' | ') : undefined; } function mergeSuggestions(...groups: Array): SearchSuggestion[] | undefined { const merged = new Map(); for (const group of groups) { for (const suggestion of group ?? []) { if (!merged.has(suggestion.command)) { merged.set(suggestion.command, suggestion); } } } return merged.size > 0 ? [...merged.values()] : undefined; } function formatSmartSearchCommand(action: string, pathValue: string, extraParams: Record = {}): string { const normalizedPath = pathValue.replace(/\\/g, '/'); const args = [`action=${JSON.stringify(action)}`, `path=${JSON.stringify(normalizedPath)}`]; for (const [key, value] of Object.entries(extraParams)) { if (value === undefined) { continue; } args.push(`${key}=${JSON.stringify(value)}`); } return `smart_search(${args.join(', ')})`; } function parseOptionalBooleanEnv(raw: string | undefined): boolean | undefined { const normalized = raw?.trim().toLowerCase(); if (!normalized) { return undefined; } if (['1', 'true', 'on', 'yes'].includes(normalized)) { return true; } if (['0', 'false', 'off', 'no'].includes(normalized)) { return false; } return undefined; } function isAutoEmbedMissingEnabled(config: CodexLensConfig | null | undefined): boolean { const envOverride = parseOptionalBooleanEnv(process.env.CODEXLENS_AUTO_EMBED_MISSING); if (envOverride !== undefined) { return envOverride; } if (process.platform === 'win32') { return false; } if (typeof config?.embedding_auto_embed_missing === 'boolean') { return config.embedding_auto_embed_missing; } return true; } function isAutoInitMissingEnabled(): boolean { const envOverride = parseOptionalBooleanEnv(process.env.CODEXLENS_AUTO_INIT_MISSING); if (envOverride !== undefined) { return envOverride; } return process.platform !== 'win32'; } function getAutoEmbedMissingDisabledReason(config: CodexLensConfig | null | undefined): string { const envOverride = parseOptionalBooleanEnv(process.env.CODEXLENS_AUTO_EMBED_MISSING); if (envOverride === false) { return 'Automatic embedding warmup is disabled by CODEXLENS_AUTO_EMBED_MISSING=false.'; } if (config?.embedding_auto_embed_missing === false) { return 'Automatic embedding warmup is disabled by embedding.auto_embed_missing=false.'; } if (process.platform === 'win32') { return 'Automatic embedding warmup is disabled by default on Windows even if CodexLens config resolves auto_embed_missing=true. Set CODEXLENS_AUTO_EMBED_MISSING=true to opt in.'; } return 'Automatic embedding warmup is disabled.'; } function getAutoInitMissingDisabledReason(): string { const envOverride = parseOptionalBooleanEnv(process.env.CODEXLENS_AUTO_INIT_MISSING); if (envOverride === false) { return 'Automatic static index warmup is disabled by CODEXLENS_AUTO_INIT_MISSING=false.'; } if (process.platform === 'win32') { return 'Automatic static index warmup is disabled by default on Windows. Set CODEXLENS_AUTO_INIT_MISSING=true to opt in.'; } return 'Automatic static index warmup is disabled.'; } function buildIndexSuggestions(indexStatus: IndexStatus, scope: SearchScope): SearchSuggestion[] | undefined { const suggestions: SearchSuggestion[] = []; if (!indexStatus.indexed) { suggestions.push({ title: 'Initialize index', command: formatSmartSearchCommand('init', scope.workingDirectory), reason: 'No CodexLens index exists for this path yet.', }); suggestions.push({ title: 'Check index status', command: formatSmartSearchCommand('status', scope.workingDirectory), reason: 'Verify whether the target path is mapped to the expected CodexLens project root.', }); return suggestions; } if (!indexStatus.has_embeddings) { suggestions.push({ title: 'Generate embeddings', command: formatSmartSearchCommand('embed', scope.workingDirectory), reason: 'The index exists, but semantic/vector retrieval is unavailable until embeddings are generated.', }); } else if ((indexStatus.embeddings_coverage_percent ?? 0) < 50) { suggestions.push({ title: 'Rebuild embeddings', command: formatSmartSearchCommand('embed', scope.workingDirectory, { force: true }), reason: `Embedding coverage is only ${(indexStatus.embeddings_coverage_percent ?? 0).toFixed(1)}%, so semantic search quality is degraded.`, }); } if (indexStatus.warning?.includes('Failed to parse index status')) { suggestions.push({ title: 'Re-check status', command: formatSmartSearchCommand('status', scope.workingDirectory), reason: 'The index health payload could not be parsed cleanly.', }); } return suggestions.length > 0 ? suggestions : undefined; } /** * Check if CodexLens index exists for current directory * @param path - Directory path to check * @returns Index status */ async function checkIndexStatus(path: string = '.'): Promise { const scope = resolveSearchScope(path); try { // Fetch both status and config in parallel const [statusResult, configResult] = await Promise.all([ executeCodexLens(['index', 'status', scope.workingDirectory], { cwd: scope.workingDirectory }), executeCodexLens(['config', '--json'], { cwd: scope.workingDirectory }), ]); // Parse config const settingsConfig = readCodexLensSettingsSnapshot(); let config: CodexLensConfig | null = Object.keys(settingsConfig).length > 0 ? { ...settingsConfig } : null; if (configResult.success && configResult.output) { try { const cleanConfigOutput = stripAnsi(configResult.output); const parsedConfig = JSON.parse(cleanConfigOutput); const configData = parsedConfig.result || parsedConfig; config = { ...settingsConfig, config_file: configData.config_file, index_dir: configData.index_dir, embedding_backend: normalizeEmbeddingBackend(configData.embedding_backend) ?? settingsConfig.embedding_backend, embedding_model: typeof configData.embedding_model === 'string' ? configData.embedding_model : settingsConfig.embedding_model, embedding_auto_embed_missing: typeof configData.embedding_auto_embed_missing === 'boolean' ? configData.embedding_auto_embed_missing : settingsConfig.embedding_auto_embed_missing, reranker_enabled: typeof configData.reranker_enabled === 'boolean' ? configData.reranker_enabled : settingsConfig.reranker_enabled, reranker_backend: typeof configData.reranker_backend === 'string' ? configData.reranker_backend : settingsConfig.reranker_backend, reranker_model: typeof configData.reranker_model === 'string' ? configData.reranker_model : settingsConfig.reranker_model, reranker_top_k: typeof configData.reranker_top_k === 'number' ? configData.reranker_top_k : settingsConfig.reranker_top_k, }; } catch { // Config parse failed, continue without it } } if (!statusResult.success) { return { indexed: false, has_embeddings: false, config, warning: 'No CodexLens index found. Run smart_search(action="init") to create index for better search results.', }; } // Parse status output try { // Strip ANSI color codes from JSON output const cleanOutput = stripAnsi(statusResult.output || '{}'); const parsed = JSON.parse(cleanOutput); // Handle both direct and nested response formats (status returns {success, result: {...}}) const status = parsed.result || parsed; // Get embeddings coverage from comprehensive status const embeddingsData = selectEmbeddingsStatusPayload(status); const legacyEmbeddingsData = asObjectRecord(status.embeddings) ?? {}; const embeddingsSummary = extractEmbeddingsStatusSummary(embeddingsData); const totalIndexes = Number(legacyEmbeddingsData.total_indexes || asObjectRecord(embeddingsData)?.total_indexes || 0); const embeddingsCoverage = embeddingsSummary.coveragePercent; const totalChunks = embeddingsSummary.totalChunks; const indexed = Boolean(status.projects_count > 0 || status.total_files > 0 || status.index_root || totalIndexes > 0 || totalChunks > 0); const has_embeddings = embeddingsSummary.hasEmbeddings; // Extract model info if available const modelInfoData = asObjectRecord(embeddingsData.model_info); const modelInfo: ModelInfo | undefined = modelInfoData ? { model_profile: typeof modelInfoData.model_profile === 'string' ? modelInfoData.model_profile : undefined, model_name: typeof modelInfoData.model_name === 'string' ? modelInfoData.model_name : undefined, embedding_dim: typeof modelInfoData.embedding_dim === 'number' ? modelInfoData.embedding_dim : undefined, backend: typeof modelInfoData.backend === 'string' ? modelInfoData.backend : undefined, created_at: typeof modelInfoData.created_at === 'string' ? modelInfoData.created_at : undefined, updated_at: typeof modelInfoData.updated_at === 'string' ? modelInfoData.updated_at : undefined, } : undefined; let warning: string | undefined; if (!indexed) { warning = 'No CodexLens index found. Run smart_search(action="init") to create index for better search results.'; } else if (embeddingsCoverage === 0) { warning = 'Index exists but no embeddings generated. Run smart_search(action="embed") to build the vector index.'; } else if (embeddingsCoverage < 50) { warning = `Embeddings coverage is ${embeddingsCoverage.toFixed(1)}% (below 50%). Hybrid search will degrade. Run smart_search(action="embed") to improve vector coverage.`; } return { indexed, has_embeddings, file_count: status.total_files, embeddings_coverage_percent: embeddingsCoverage, total_chunks: totalChunks, // Ensure model_info is null instead of undefined so it's included in JSON model_info: modelInfo ?? null, config, warning, }; } catch { return { indexed: false, has_embeddings: false, config, warning: 'Failed to parse index status', }; } } catch { return { indexed: false, has_embeddings: false, warning: 'CodexLens not available', }; } } /** * Detection heuristics for intent classification */ /** * Detect literal string query (simple alphanumeric or quoted strings) */ function detectLiteral(query: string): boolean { return /^[a-zA-Z0-9_-]+$/.test(query) || /^["'].*["']$/.test(query); } /** * Detect regex pattern (contains regex metacharacters) */ function detectRegex(query: string): boolean { return /[.*+?^${}()|[\]\\]/.test(query); } /** * Detect natural language query (sentence structure, questions, multi-word phrases) */ function detectNaturalLanguage(query: string): boolean { return query.split(/\s+/).length >= 3 || /\?$/.test(query); } /** * Detect file path query (path separators, file extensions) */ function detectFilePath(query: string): boolean { return /[/\\]/.test(query) || /\.[a-z]{2,4}$/i.test(query); } /** * Detect relationship query (import, export, dependency keywords) */ function detectRelationship(query: string): boolean { return /(import|export|uses?|depends?|calls?|extends?)\s/i.test(query); } function looksLikeCodeQuery(query: string): boolean { if (/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(query)) return true; if (/[:.<>\-=(){}[\]]/.test(query) && query.split(/\s+/).length <= 2) return true; if (/\.\*|\\\(|\\\[|\\s/.test(query)) return true; if (/^[a-zA-Z_][a-zA-Z0-9_]*\.[a-zA-Z_][a-zA-Z0-9_]*$/.test(query)) return true; return false; } function queryTargetsGeneratedFiles(query: string): boolean { return GENERATED_QUERY_RE.test(query.trim()); } function prefersLexicalPriorityQuery(query: string): boolean { const trimmed = query.trim(); if (!trimmed) return false; if (ENV_STYLE_QUERY_RE.test(trimmed)) return true; const tokens = new Set((trimmed.match(TOPIC_TOKEN_RE) ?? []).map((token) => token.toLowerCase())); if (tokens.size === 0) return false; if (tokens.has('factory') || tokens.has('factories')) return true; if ((tokens.has('environment') || tokens.has('env')) && (tokens.has('variable') || tokens.has('variables'))) { return true; } if ( tokens.has('backend') && ['embedding', 'embeddings', 'reranker', 'rerankers', 'onnx', 'api', 'litellm', 'fastembed', 'local', 'legacy'] .some((token) => tokens.has(token)) ) { return true; } let surfaceHit = false; let focusHit = false; for (const token of tokens) { if (LEXICAL_PRIORITY_SURFACE_TOKENS.has(token)) surfaceHit = true; if (LEXICAL_PRIORITY_FOCUS_TOKENS.has(token)) focusHit = true; if (surfaceHit && focusHit) return true; } return false; } /** * Classify query intent and recommend search mode * Simple mapping: hybrid (NL + index + embeddings) | exact (index or insufficient embeddings) | ripgrep (no index) * @param query - Search query string * @param hasIndex - Whether CodexLens index exists * @param hasSufficientEmbeddings - Whether embeddings coverage >= 50% * @returns Classification result */ function classifyIntent(query: string, hasIndex: boolean = false, hasSufficientEmbeddings: boolean = false): Classification { const isNaturalLanguage = detectNaturalLanguage(query); const isCodeQuery = looksLikeCodeQuery(query); const isRegexPattern = detectRegex(query); const targetsGeneratedFiles = queryTargetsGeneratedFiles(query); const prefersLexicalPriority = prefersLexicalPriorityQuery(query); let mode: string; let confidence: number; if (!hasIndex) { mode = 'ripgrep'; confidence = 1.0; } else if (targetsGeneratedFiles || prefersLexicalPriority || isCodeQuery || isRegexPattern) { mode = 'exact'; confidence = targetsGeneratedFiles ? 0.97 : prefersLexicalPriority ? 0.93 : 0.95; } else if (isNaturalLanguage && hasSufficientEmbeddings) { mode = 'hybrid'; confidence = 0.9; } else { mode = 'exact'; confidence = 0.8; } const detectedPatterns: string[] = []; if (detectLiteral(query)) detectedPatterns.push('literal'); if (detectRegex(query)) detectedPatterns.push('regex'); if (detectNaturalLanguage(query)) detectedPatterns.push('natural language'); if (detectFilePath(query)) detectedPatterns.push('file path'); if (detectRelationship(query)) detectedPatterns.push('relationship'); if (targetsGeneratedFiles) detectedPatterns.push('generated artifact'); if (prefersLexicalPriority) detectedPatterns.push('lexical priority'); if (isCodeQuery) detectedPatterns.push('code identifier'); const reasoning = `Query classified as ${mode} (confidence: ${confidence.toFixed(2)}, detected: ${detectedPatterns.join(', ')}, index: ${hasIndex ? 'available' : 'not available'}, embeddings: ${hasSufficientEmbeddings ? 'sufficient' : 'insufficient'})`; return { mode, confidence, reasoning }; } /** * Check if a tool is available in PATH * @param toolName - Tool executable name * @returns True if available */ function checkToolAvailability( toolName: string, lookupRuntime: typeof spawnSync = spawnSync, ): boolean { try { const isWindows = process.platform === 'win32'; const command = isWindows ? 'where' : 'which'; const result = lookupRuntime(command, [toolName], { shell: false, windowsHide: true, stdio: 'ignore', timeout: EXEC_TIMEOUTS.SYSTEM_INFO, env: { ...process.env, PYTHONIOENCODING: 'utf-8' }, }); return !result.error && result.status === 0; } catch { return false; } } /** * Build ripgrep command arguments * Supports tokenized multi-word queries with OR matching * @param params - Search parameters * @returns Command, arguments, and tokens used */ function buildRipgrepCommand(params: { query: string; paths: string[]; contextLines: number; maxResults: number; includeHidden: boolean; regex?: boolean; caseSensitive?: boolean; tokenize?: boolean; }): { command: string; args: string[]; tokens: string[]; warning?: string; literalFallback: boolean; regex: boolean } { const { query, paths = ['.'], contextLines = 0, maxResults = 10, includeHidden = false, regex = false, caseSensitive = true, tokenize = true } = params; const queryMode = resolveRipgrepQueryMode(query, regex, tokenize); const args = [ '-n', '--color=never', '--json', ]; // Add file filtering (unless includeHidden is true) if (!includeHidden) { args.push(...buildExcludeArgs()); } // Case sensitivity if (!caseSensitive) { args.push('--ignore-case'); } if (contextLines > 0) { args.push('-C', contextLines.toString()); } if (maxResults > 0) { args.push('--max-count', maxResults.toString()); } if (includeHidden) { args.push('--hidden'); } const { tokens } = queryMode; if (tokens.length > 1) { // Multi-token: use multiple -e patterns (OR matching) // Each token is escaped for regex safety unless regex mode is enabled for (const token of tokens) { if (queryMode.regex) { args.push('-e', token); } else { // Escape regex special chars for literal matching const escaped = token.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); args.push('-e', escaped); } } } else { // Single token or no tokenization: use original behavior if (queryMode.regex) { args.push('-e', query); } else { args.push('-F', query); } } args.push(...paths); return { command: 'rg', args, tokens, warning: queryMode.warning, literalFallback: queryMode.literalFallback, regex: queryMode.regex, }; } interface RipgrepChunkAccumulator { file: string; chunkLines: ChunkLine[]; firstMatchLine?: number; firstMatchColumn?: number; lastLine?: number; matchCount: number; } function finalizeRipgrepChunk(accumulator: RipgrepChunkAccumulator | undefined): ExactMatch | null { if (!accumulator || accumulator.matchCount === 0 || accumulator.chunkLines.length === 0) { return null; } const firstLine = accumulator.chunkLines[0]?.line ?? accumulator.firstMatchLine ?? 1; const lastLine = accumulator.chunkLines[accumulator.chunkLines.length - 1]?.line ?? accumulator.firstMatchLine ?? firstLine; return { file: accumulator.file, line: accumulator.firstMatchLine ?? firstLine, endLine: lastLine, column: accumulator.firstMatchColumn ?? 1, content: accumulator.chunkLines.map((line) => line.text).join('\n').trim(), chunkLines: [...accumulator.chunkLines], }; } function parseRipgrepJsonResults(stdout: string, effectiveLimit: number): { results: ExactMatch[]; resultLimitReached: boolean } { const allResults: ExactMatch[] = []; const activeChunks = new Map(); const lines = stdout.split('\n').filter((line) => line.trim()); let resultLimitReached = false; const flushChunk = (file: string) => { const finalized = finalizeRipgrepChunk(activeChunks.get(file)); activeChunks.delete(file); if (!finalized) { return; } allResults.push(finalized); if (allResults.length >= effectiveLimit) { resultLimitReached = true; } }; for (const line of lines) { if (resultLimitReached) { break; } try { const item = JSON.parse(line); if (item.type !== 'match' && item.type !== 'context' && item.type !== 'end') { continue; } const file = item.data?.path?.text as string | undefined; if (!file) { continue; } if (item.type === 'end') { flushChunk(file); continue; } const lineNumber = typeof item.data?.line_number === 'number' ? item.data.line_number : undefined; const rawText = typeof item.data?.lines?.text === 'string' ? item.data.lines.text.replace(/\r?\n$/, '') : ''; if (lineNumber === undefined) { continue; } let current = activeChunks.get(file); const isContiguous = current && current.lastLine !== undefined && lineNumber <= current.lastLine + 1; if (!current || !isContiguous) { if (current) { flushChunk(file); if (resultLimitReached) { break; } } current = { file, chunkLines: [], matchCount: 0, }; activeChunks.set(file, current); } const previousLine = current.chunkLines[current.chunkLines.length - 1]; const duplicateLine = previousLine && previousLine.line === lineNumber && previousLine.text === rawText; if (!duplicateLine) { current.chunkLines.push({ line: lineNumber, text: rawText, isMatch: item.type === 'match', }); } else if (item.type === 'match') { previousLine.isMatch = true; } if (item.type === 'match') { current.matchCount += 1; if (current.firstMatchLine === undefined) { current.firstMatchLine = lineNumber; current.firstMatchColumn = item.data.submatches && item.data.submatches[0] ? item.data.submatches[0].start + 1 : 1; } } current.lastLine = lineNumber; } catch { continue; } } if (!resultLimitReached) { for (const file of [...activeChunks.keys()]) { flushChunk(file); if (resultLimitReached) { break; } } } return { results: allResults.slice(0, effectiveLimit), resultLimitReached }; } function normalizeEmbeddingBackend(backend?: string): string | undefined { if (!backend) { return undefined; } const normalized = backend.trim().toLowerCase(); if (!normalized) { return undefined; } if (normalized === 'api') { return 'litellm'; } if (normalized === 'local') { return 'fastembed'; } return normalized; } function buildIndexInitArgs(projectPath: string, options: { force?: boolean; languages?: string[]; noEmbeddings?: boolean } = {}): string[] { const { force = false, languages, noEmbeddings = true } = options; const args = ['index', 'init', projectPath]; if (noEmbeddings) { args.push('--no-embeddings'); } if (force) { args.push('--force'); } if (languages && languages.length > 0) { args.push(...languages.flatMap((language) => ['--language', language])); } return args; } function resolveEmbeddingSelection( requestedBackend: string | undefined, requestedModel: string | undefined, config: CodexLensConfig | null | undefined, ): { backend?: string; model?: string; preset: 'explicit' | 'config' | 'bulk-local-fast'; note?: string } { const normalizedRequestedBackend = normalizeEmbeddingBackend(requestedBackend); const normalizedRequestedModel = requestedModel?.trim() || undefined; if (normalizedRequestedBackend) { return { backend: normalizedRequestedBackend, model: normalizedRequestedModel || config?.embedding_model, preset: 'explicit', }; } if (normalizedRequestedModel) { const inferredBackend = config?.embedding_backend || (['fast', 'code'].includes(normalizedRequestedModel) ? 'fastembed' : undefined); return { backend: inferredBackend, model: normalizedRequestedModel, preset: inferredBackend ? 'config' : 'explicit', }; } return { backend: 'fastembed', model: 'fast', preset: 'bulk-local-fast', note: config?.embedding_backend && config.embedding_backend !== 'fastembed' ? `Using recommended bulk indexing preset: local-fast instead of configured ${config.embedding_backend}. Pass embeddingBackend="api" to force remote API embeddings.` : 'Using recommended bulk indexing preset: local-fast. Pass embeddingBackend="api" to force remote API embeddings.', }; } const EMBED_PROGRESS_PREFIX = '__CCW_EMBED_PROGRESS__'; function resolveEmbeddingEndpoints(backend?: string): RotationEndpointConfig[] { if (backend !== 'litellm') { return []; } try { return generateRotationEndpoints(getProjectRoot()).filter((endpoint) => { const apiKey = endpoint.api_key?.trim() ?? ''; return Boolean( apiKey && apiKey.length > 8 && !/^\*+$/.test(apiKey) && endpoint.api_base?.trim() && endpoint.model?.trim() ); }); } catch { return []; } } function resolveApiWorkerCount( requestedWorkers: number | undefined, backend: string | undefined, endpoints: RotationEndpointConfig[] ): number | undefined { if (backend !== 'litellm') { return undefined; } if (typeof requestedWorkers === 'number' && Number.isFinite(requestedWorkers)) { return Math.max(1, Math.floor(requestedWorkers)); } if (endpoints.length <= 1) { return 4; } return Math.min(16, Math.max(4, endpoints.length * 2)); } function extractEmbedJsonLine(stdout: string): string | undefined { const lines = stdout .split(/\r?\n/) .map((line) => line.trim()) .filter(Boolean) .filter((line) => !line.startsWith(EMBED_PROGRESS_PREFIX)); return [...lines].reverse().find((line) => line.startsWith('{') && line.endsWith('}')); } function buildEmbeddingPythonCode(params: { projectPath: string; backend?: string; model?: string; force: boolean; maxWorkers?: number; endpoints?: RotationEndpointConfig[]; }): string { const { projectPath, backend, model, force, maxWorkers, endpoints = [] } = params; return ` import json import sys from pathlib import Path from codexlens.storage.path_mapper import PathMapper from codexlens.storage.registry import RegistryStore from codexlens.cli.embedding_manager import generate_dense_embeddings_centralized target_path = Path(r"__PROJECT_PATH__").expanduser().resolve() backend = __BACKEND__ model = __MODEL__ force = __FORCE__ max_workers = __MAX_WORKERS__ endpoints = json.loads(r'''__ENDPOINTS_JSON__''') def progress_update(message: str): print("__CCW_EMBED_PROGRESS__" + str(message), flush=True) registry = RegistryStore() registry.initialize() try: project = registry.get_project(target_path) index_root = None if project is not None: index_root = Path(project.index_root) else: mapper = PathMapper() index_db = mapper.source_to_index_db(target_path) if index_db.exists(): index_root = index_db.parent else: nearest = registry.find_nearest_index(target_path) if nearest is not None: index_root = Path(nearest.index_path).parent if index_root is None: print(json.dumps({"success": False, "error": f"No index found for: {target_path}"}), flush=True) sys.exit(1) result = generate_dense_embeddings_centralized( index_root, embedding_backend=backend, model_profile=model, force=force, use_gpu=True, max_workers=max_workers, endpoints=endpoints if endpoints else None, progress_callback=progress_update, ) print(json.dumps(result), flush=True) if not result.get("success"): sys.exit(1) finally: registry.close() ` .replace('__PROJECT_PATH__', projectPath.replace(/\\/g, '\\\\')) .replace('__BACKEND__', backend ? JSON.stringify(backend) : 'None') .replace('__MODEL__', model ? JSON.stringify(model) : 'None') .replace('__FORCE__', force ? 'True' : 'False') .replace('__MAX_WORKERS__', typeof maxWorkers === 'number' ? String(Math.max(1, Math.floor(maxWorkers))) : 'None') .replace('__ENDPOINTS_JSON__', JSON.stringify(endpoints).replace(/\\/g, '\\\\').replace(/'''/g, "\\'\\'\\'")); } function spawnBackgroundEmbeddingsViaPython(params: { projectPath: string; backend?: string; model?: string; force: boolean; maxWorkers?: number; endpoints?: RotationEndpointConfig[]; }): { success: boolean; error?: string } { const { projectPath, backend, model } = params; try { const child = getSpawnRuntime()( getVenvPythonPathRuntime()(), ['-c', buildEmbeddingPythonCode(params)], buildSmartSearchSpawnOptions(projectPath, { detached: shouldDetachBackgroundSmartSearchProcess(), stdio: 'ignore', }), ); autoEmbedJobs.set(projectPath, { startedAt: getNowRuntime(), backend, model, }); const cleanup = () => { autoEmbedJobs.delete(projectPath); }; child.on('error', cleanup); child.on('close', cleanup); child.unref(); return { success: true }; } catch (error) { return { success: false, error: error instanceof Error ? error.message : String(error), }; } } function spawnBackgroundIndexInit(params: { projectPath: string; languages?: string[]; }): { success: boolean; error?: string } { const { projectPath, languages } = params; try { const pythonPath = getVenvPythonPathRuntime()(); if (!existsSync(pythonPath)) { return { success: false, error: 'CodexLens Python environment is not ready yet.', }; } const child = getSpawnRuntime()( pythonPath, ['-m', 'codexlens', ...buildIndexInitArgs(projectPath, { languages })], buildSmartSearchSpawnOptions(projectPath, { detached: shouldDetachBackgroundSmartSearchProcess(), stdio: 'ignore', }), ); autoInitJobs.set(projectPath, { startedAt: getNowRuntime(), languages, }); const cleanup = () => { autoInitJobs.delete(projectPath); }; child.on('error', cleanup); child.on('close', cleanup); child.unref(); return { success: true }; } catch (error) { return { success: false, error: error instanceof Error ? error.message : String(error), }; } } async function maybeStartBackgroundAutoInit( scope: SearchScope, indexStatus: IndexStatus, ): Promise<{ note?: string; warning?: string }> { if (indexStatus.indexed) { return {}; } if (!isAutoInitMissingEnabled()) { return { note: getAutoInitMissingDisabledReason(), }; } if (autoInitJobs.has(scope.workingDirectory)) { return { note: 'Background static index build is already running for this path.', }; } const spawned = spawnBackgroundIndexInit({ projectPath: scope.workingDirectory, }); if (!spawned.success) { return { warning: `Automatic static index warmup could not start: ${spawned.error}`, }; } return { note: 'Background static index build started for this path. Re-run search shortly for indexed FTS results.', }; } async function maybeStartBackgroundAutoEmbed( scope: SearchScope, indexStatus: IndexStatus, ): Promise<{ note?: string; warning?: string }> { if (!indexStatus.indexed || indexStatus.has_embeddings) { return {}; } if (!isAutoEmbedMissingEnabled(indexStatus.config)) { return { note: getAutoEmbedMissingDisabledReason(indexStatus.config), }; } if (autoEmbedJobs.has(scope.workingDirectory)) { return { note: 'Background embedding build is already running for this path.', }; } const backend = normalizeEmbeddingBackend(indexStatus.config?.embedding_backend) ?? 'fastembed'; const model = indexStatus.config?.embedding_model?.trim() || undefined; const semanticStatus = await getSemanticStatusRuntime()(); if (!semanticStatus.available) { return { warning: 'Automatic embedding warmup skipped because semantic dependencies are not ready.', }; } if (backend === 'litellm' && !semanticStatus.litellmAvailable) { return { warning: 'Automatic embedding warmup skipped because the LiteLLM embedder is not ready.', }; } const endpoints = resolveEmbeddingEndpoints(backend); const configuredApiMaxWorkers = indexStatus.config?.api_max_workers; const effectiveApiMaxWorkers = typeof configuredApiMaxWorkers === 'number' ? Math.max(1, Math.floor(configuredApiMaxWorkers)) : resolveApiWorkerCount(undefined, backend, endpoints); const spawned = spawnBackgroundEmbeddingsViaPython({ projectPath: scope.workingDirectory, backend, model, force: false, maxWorkers: effectiveApiMaxWorkers, endpoints, }); if (!spawned.success) { return { warning: `Automatic embedding warmup could not start: ${spawned.error}`, }; } return { note: 'Background embedding build started for this path. Re-run semantic search shortly for vector results.', }; } // v1 executeEmbeddingsViaPython removed — v2 uses built-in fastembed models // v1 executeInitAction removed — replaced by executeInitActionV2 // v1 executeEmbedAction removed — v2 auto-embeds during sync // v1 executeStatusAction removed — replaced by executeStatusActionV2 // v1 executeUpdateAction and executeWatchAction removed — replaced by V2 versions // v1 executeFuzzyMode and executeAutoMode removed — v2 bridge handles all search /** * Mode: ripgrep - Fast literal string matching using ripgrep * No index required, fallback to CodexLens if ripgrep unavailable * Supports tokenized multi-word queries with OR matching and result ranking */ async function executeRipgrepMode(params: Params): Promise { const { query, paths = [], contextLines = 0, maxResults = 5, extraFilesCount = 10, maxContentLength = 200, includeHidden = false, path = '.', regex = true, caseSensitive = true, tokenize = true, codeOnly = true, withDoc = false, excludeExtensions } = params; const scope = resolveSearchScope(path, paths); // withDoc overrides codeOnly const effectiveCodeOnly = withDoc ? false : codeOnly; if (!query) { return { success: false, error: 'Query is required for search', }; } // Check if ripgrep is available const hasRipgrep = checkToolAvailability('rg'); // Calculate total to fetch for split (full content + extra files) const totalToFetch = maxResults + extraFilesCount; // If ripgrep not available, fall back to CodexLens exact mode if (!hasRipgrep) { const readyStatus = await ensureCodexLensReady(); if (!readyStatus.ready) { return { success: false, error: 'Neither ripgrep nor CodexLens available. Install ripgrep (rg) or CodexLens for search functionality.', }; } // Use CodexLens fts mode as fallback const args = ['search', query, '--limit', totalToFetch.toString(), '--method', 'fts', '--json']; const result = await executeCodexLens(args, { cwd: scope.workingDirectory }); if (!result.success) { noteCodexLensFtsCompatibility(result.error); return { success: false, error: summarizeBackendError(result.error), metadata: { mode: 'ripgrep', backend: 'codexlens-fallback', count: 0, query, }, }; } // Parse results let allResults: SemanticMatch[] = []; try { const parsed = JSON.parse(stripAnsi(result.output || '{}')); const data = parsed.result?.results || parsed.results || parsed; allResults = (Array.isArray(data) ? data : []).map((item: any) => ({ file: item.path || item.file, score: item.score || 0, content: truncateContent(item.content || item.excerpt, maxContentLength), symbol: item.symbol || null, })); } catch { // Keep empty results } const scopedResults = filterResultsToTargetFile(allResults, scope); // Split results: first N with full content, rest as file paths only const { results, extra_files } = splitResultsWithExtraFiles(scopedResults, maxResults, extraFilesCount); return { success: true, results, extra_files: extra_files.length > 0 ? extra_files : undefined, metadata: { mode: 'ripgrep', backend: 'codexlens-fallback', count: results.length, query, note: 'Using CodexLens exact mode (ripgrep not available)', }, }; } // Use ripgrep - request more results to support split const { command, args, tokens, warning: queryModeWarning } = buildRipgrepCommand({ query, paths: scope.searchPaths, contextLines, maxResults: totalToFetch, // Fetch more to support split includeHidden, regex, caseSensitive, tokenize, }); return new Promise((resolve) => { const child = getSpawnRuntime()( command, args, buildSmartSearchSpawnOptions(scope.workingDirectory || getProjectRoot(), { stdio: ['ignore', 'pipe', 'pipe'], }), ); let stdout = ''; let stderr = ''; let resultLimitReached = false; child.stdout?.on('data', (data) => { stdout += data.toString(); }); child.stderr?.on('data', (data) => { stderr += data.toString(); }); child.on('close', (code) => { // Limit total results to prevent memory overflow (--max-count only limits per-file) const effectiveLimit = totalToFetch > 0 ? totalToFetch : 500; const parsedResults = parseRipgrepJsonResults(stdout, effectiveLimit); const allResults = parsedResults.results; resultLimitReached = parsedResults.resultLimitReached; // Handle Windows device file errors gracefully (os error 1) // If we have results despite the error, return them as partial success const isWindowsDeviceError = stderr.includes('os error 1') || stderr.includes('函数不正确'); // Apply token-based scoring and sorting for multi-word queries // Results matching more tokens are ranked higher (exact matches first) const scoredResults = tokens.length > 1 ? scoreByTokenMatch(allResults, tokens) : allResults; // Apply code-only and extension filtering const filteredResults = filterNoisyFiles(scoredResults as any[], { codeOnly: effectiveCodeOnly, excludeExtensions }); if (code === 0 || code === 1 || (isWindowsDeviceError && filteredResults.length > 0)) { // Split results: first N with full content, rest as file paths only const { results, extra_files } = splitResultsWithExtraFiles(filteredResults, maxResults, extraFilesCount); // Build warning message for various conditions const warnings: string[] = []; if (queryModeWarning) { warnings.push(queryModeWarning); } if (resultLimitReached) { warnings.push(`Result limit reached (${effectiveLimit}). Use a more specific query or increase limit.`); } if (isWindowsDeviceError) { warnings.push('Some Windows device files were skipped'); } resolve({ success: true, results, extra_files: extra_files.length > 0 ? extra_files : undefined, metadata: { mode: 'ripgrep', backend: 'ripgrep', count: results.length, query, tokens: tokens.length > 1 ? tokens : undefined, // Include tokens in metadata for debugging tokenized: tokens.length > 1, ...(warnings.length > 0 && { warning: warnings.join('; ') }), }, }); } else if (isWindowsDeviceError && allResults.length === 0) { // Windows device error but no results - might be the only issue resolve({ success: true, results: [], metadata: { mode: 'ripgrep', backend: 'ripgrep', count: 0, query, warning: 'No matches found (some Windows device files were skipped)', }, }); } else { resolve({ success: false, error: `ripgrep execution failed with code ${code}: ${stderr}`, results: [], }); } }); child.on('error', (error) => { resolve({ success: false, error: `Failed to spawn ripgrep: ${error.message}`, results: [], }); }); }); } // ======================================== // codexlens-search v2 bridge integration // ======================================== /** * Execute search via codexlens-search (v2) bridge CLI. * Spawns 'codexlens-search search --query X --top-k Y --db-path Z' and parses JSON output. * * @param query - Search query string * @param topK - Number of results to return * @param dbPath - Path to the v2 index database directory * @returns Parsed search results as SemanticMatch array */ async function executeCodexLensV2Bridge( query: string, topK: number, dbPath: string, ): Promise { return new Promise((resolve) => { const args = [ '--db-path', dbPath, 'search', '--query', query, '--top-k', String(topK), ]; execFile('codexlens-search', args, { encoding: 'utf-8', timeout: EXEC_TIMEOUTS.PROCESS_SPAWN, windowsHide: true, env: { ...process.env, PYTHONIOENCODING: 'utf-8' }, }, (error, stdout, stderr) => { if (error) { console.warn(`[CodexLens-v2] Bridge search failed: ${error.message}`); resolve({ success: false, error: `codexlens-search v2 bridge failed: ${error.message}`, }); return; } try { const parsed = JSON.parse(stdout.trim()); // Bridge outputs {"error": string} on failure if (parsed && typeof parsed === 'object' && 'error' in parsed) { resolve({ success: false, error: `codexlens-search v2: ${parsed.error}`, }); return; } // Bridge outputs array of {path, score, line, end_line, snippet, content} const raw: Array<{ path?: string; score?: number; line?: number; end_line?: number; snippet?: string; content?: string; }> = Array.isArray(parsed) ? parsed : []; // Build AceLike sections and group by file const sections: AceLikeSection[] = raw.map(r => ({ path: r.path || '', line: r.line || undefined, endLine: r.end_line || undefined, score: r.score || 0, symbol: null, snippet: r.content || r.snippet || '', })); const groupMap = new Map(); for (const s of sections) { const arr = groupMap.get(s.path) || []; arr.push(s); groupMap.set(s.path, arr); } const groups: AceLikeGroup[] = Array.from(groupMap.entries()).map( ([path, secs]) => ({ path, sections: secs, total_matches: secs.length }) ); // Render text view with line numbers const textParts: string[] = []; for (const s of sections) { const lineInfo = s.line ? `:${s.line}${s.endLine ? `-${s.endLine}` : ''}` : ''; textParts.push(`Path: ${s.path}${lineInfo}\n${s.snippet}\n`); } const aceLikeOutput: AceLikeOutput = { format: 'ace', text: textParts.join('\n'), groups, sections, total: sections.length, }; resolve({ success: true, results: aceLikeOutput, metadata: { mode: 'semantic' as any, backend: 'codexlens-v2', count: sections.length, query, note: 'Using codexlens-search v2 bridge (2-stage vector + reranking)', }, }); } catch (parseErr) { console.warn(`[CodexLens-v2] Failed to parse bridge output: ${(parseErr as Error).message}`); resolve({ success: false, error: `Failed to parse codexlens-search v2 output: ${(parseErr as Error).message}`, output: stdout, }); } }); }); } /** * Execute a generic codexlens-search v2 bridge subcommand (init, status, sync, watch, etc.). * Returns parsed JSON output from the bridge CLI. */ async function executeV2BridgeCommand( subcommand: string, args: string[], options?: { timeout?: number; dbPath?: string }, ): Promise { return new Promise((resolve) => { // --db-path is a global arg and must come BEFORE the subcommand const globalArgs = options?.dbPath ? ['--db-path', options.dbPath] : []; const fullArgs = [...globalArgs, subcommand, ...args]; execFile('codexlens-search', fullArgs, { encoding: 'utf-8', timeout: options?.timeout ?? EXEC_TIMEOUTS.PROCESS_SPAWN, windowsHide: true, env: { ...process.env, PYTHONIOENCODING: 'utf-8' }, }, (error, stdout, stderr) => { if (error) { resolve({ success: false, error: `codexlens-search ${subcommand} failed: ${error.message}`, }); return; } try { const parsed = JSON.parse(stdout.trim()); if (parsed && typeof parsed === 'object' && 'error' in parsed) { resolve({ success: false, error: `codexlens-search: ${parsed.error}` }); return; } resolve({ success: true, status: parsed, message: parsed.status || `${subcommand} completed`, metadata: { action: subcommand } }); } catch { resolve({ success: false, error: `Failed to parse codexlens-search ${subcommand} output`, output: stdout }); } }); }); } /** * Action: init (v2) - Initialize index and sync files. */ async function executeInitActionV2(params: Params): Promise { const { path = '.' } = params; const scope = resolveSearchScope(path); const dbPath = join(scope.workingDirectory, '.codexlens'); // Step 1: init empty index const initResult = await executeV2BridgeCommand('init', [], { dbPath }); if (!initResult.success) return initResult; // Step 2: sync all files const syncResult = await executeV2BridgeCommand('sync', [ '--root', scope.workingDirectory, ], { timeout: 1800000, dbPath }); // 30 min for large codebases return { success: syncResult.success, error: syncResult.error, message: syncResult.success ? `Index initialized and synced for ${scope.workingDirectory}` : undefined, metadata: { action: 'init', path: scope.workingDirectory }, status: syncResult.status, }; } /** * Action: status (v2) - Report index statistics. */ async function executeStatusActionV2(params: Params): Promise { const { path = '.' } = params; const scope = resolveSearchScope(path); const dbPath = join(scope.workingDirectory, '.codexlens'); return executeV2BridgeCommand('status', [], { dbPath }); } /** * Action: update (v2) - Incremental sync (re-sync changed files). */ async function executeUpdateActionV2(params: Params): Promise { const { path = '.' } = params; const scope = resolveSearchScope(path); const dbPath = join(scope.workingDirectory, '.codexlens'); return executeV2BridgeCommand('sync', [ '--root', scope.workingDirectory, ], { timeout: 600000, dbPath }); // 10 min } /** * Action: watch (v2) - Start file watcher for auto-updates. */ async function executeWatchActionV2(params: Params): Promise { const { path = '.', debounce = 1000 } = params; const scope = resolveSearchScope(path); const dbPath = join(scope.workingDirectory, '.codexlens'); // Watch runs indefinitely — start it with a short initial timeout to confirm startup const result = await executeV2BridgeCommand('watch', [ '--root', scope.workingDirectory, '--debounce-ms', debounce.toString(), ], { timeout: 5000, dbPath }); return { success: true, message: `File watcher started for ${scope.workingDirectory}. Changes are indexed automatically.`, metadata: { action: 'watch', path: scope.workingDirectory }, status: result.status, }; } // v1 executeCodexLensExactMode removed — v2 bridge handles search // v1 executeHybridMode removed — v2 bridge handles semantic search // v1 executeHybridMode removed — v2 bridge handles semantic search /** * Query intent used to adapt RRF weights (Python parity). * * Keep this logic aligned with CodexLens Python hybrid search: * `codex-lens/src/codexlens/search/hybrid_search.py` */ export type QueryIntent = 'keyword' | 'semantic' | 'mixed'; // Python default: vector 60%, exact 30%, fuzzy 10% const DEFAULT_RRF_WEIGHTS = { exact: 0.3, fuzzy: 0.1, vector: 0.6, } as const; function normalizeWeights(weights: Record): Record { const sum = Object.values(weights).reduce((acc, v) => acc + v, 0); if (!Number.isFinite(sum) || sum <= 0) return { ...weights }; return Object.fromEntries(Object.entries(weights).map(([k, v]) => [k, v / sum])); } /** * Detect query intent using the same heuristic signals as Python: * - Code patterns: `.`, `::`, `->`, CamelCase, snake_case, common code keywords * - Natural language patterns: >5 words, question marks, interrogatives, common verbs */ export function detectQueryIntent(query: string): QueryIntent { const trimmed = query.trim(); if (!trimmed) return 'mixed'; const lower = trimmed.toLowerCase(); const wordCount = trimmed.split(/\s+/).filter(Boolean).length; const hasCodeSignals = /(::|->|\.)/.test(trimmed) || /[A-Z][a-z]+[A-Z]/.test(trimmed) || /\b\w+_\w+\b/.test(trimmed) || /\b(def|class|function|const|let|var|import|from|return|async|await|interface|type)\b/i.test(lower); const hasNaturalSignals = wordCount > 5 || /\?/.test(trimmed) || /\b(how|what|why|when|where)\b/i.test(trimmed) || /\b(handle|explain|fix|implement|create|build|use|find|search|convert|parse|generate|support)\b/i.test(trimmed); if (hasCodeSignals && hasNaturalSignals) return 'mixed'; if (hasCodeSignals) return 'keyword'; if (hasNaturalSignals) return 'semantic'; return 'mixed'; } /** * Intent → weights mapping (Python parity). * - keyword: exact-heavy * - semantic: vector-heavy * - mixed: keep defaults */ export function adjustWeightsByIntent( intent: QueryIntent, baseWeights: Record, ): Record { if (intent === 'keyword') return normalizeWeights({ exact: 0.5, fuzzy: 0.1, vector: 0.4 }); if (intent === 'semantic') return normalizeWeights({ exact: 0.2, fuzzy: 0.1, vector: 0.7 }); return normalizeWeights({ ...baseWeights }); } export function getRRFWeights( query: string, baseWeights: Record = DEFAULT_RRF_WEIGHTS, ): Record { return adjustWeightsByIntent(detectQueryIntent(query), baseWeights); } /** * Post-processing: Filter noisy files from semantic search results * Uses FILTER_CONFIG patterns to remove irrelevant files. * Optimized: pre-compiled regexes, accurate path segment matching. */ // Pre-compile file exclusion regexes once (avoid recompilation in loop) const FILE_EXCLUDE_REGEXES = [...FILTER_CONFIG.exclude_files].map(pattern => new RegExp('^' + pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&').replace(/\\\*/g, '.*') + '$') ); // Non-code file extensions (for codeOnly filter) const NON_CODE_EXTENSIONS = new Set([ 'md', 'txt', 'json', 'yaml', 'yml', 'xml', 'csv', 'log', 'ini', 'cfg', 'conf', 'toml', 'env', 'properties', 'html', 'htm', 'svg', 'png', 'jpg', 'jpeg', 'gif', 'ico', 'webp', 'pdf', 'doc', 'docx', 'xls', 'xlsx', 'ppt', 'pptx', 'lock', 'sum', 'mod', ]); interface FilterOptions { excludeExtensions?: string[]; codeOnly?: boolean; } function filterNoisyFiles(results: SemanticMatch[], options: FilterOptions = {}): SemanticMatch[] { const { excludeExtensions = [], codeOnly = false } = options; // Build extension filter set const excludedExtSet = new Set(excludeExtensions.map(ext => ext.toLowerCase().replace(/^\./, ''))); if (codeOnly) { NON_CODE_EXTENSIONS.forEach(ext => excludedExtSet.add(ext)); } return results.filter(r => { // Support both 'file' and 'path' field names (different backends use different names) const filePath = r.file || (r as any).path || ''; if (!filePath) return true; const segments: string[] = filePath.split(/[/\\]/); // Accurate directory check: segment must exactly match excluded directory if (segments.some((segment: string) => FILTER_CONFIG.exclude_directories.has(segment))) { return false; } // Accurate file check: pattern matches filename only (not full path) const filename = segments.pop() || ''; if (FILE_EXCLUDE_REGEXES.some(regex => regex.test(filename))) { return false; } // Extension filter check if (excludedExtSet.size > 0) { const ext = filename.split('.').pop()?.toLowerCase() || ''; if (excludedExtSet.has(ext)) { return false; } } return true; }); } /** * Post-processing: Boost results containing query keywords * Extracts keywords from query and boosts matching results. * Optimized: uses whole-word matching with regex for accuracy. */ // Helper to escape regex special characters function escapeRegExp(str: string): string { return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); } function applyKeywordBoosting(results: SemanticMatch[], query: string): SemanticMatch[] { // Extract meaningful keywords (ignore common words) const stopWords = new Set(['the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'shall', 'can', 'need', 'dare', 'ought', 'used', 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by', 'from', 'as', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'between', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 'just', 'and', 'but', 'if', 'or', 'because', 'until', 'while', 'although', 'though', 'after', 'before', 'when', 'whenever', 'where', 'wherever', 'whether', 'which', 'who', 'whom', 'whose', 'what', 'whatever', 'whichever', 'whoever', 'whomever', 'this', 'that', 'these', 'those', 'it', 'its']); const keywords = query .toLowerCase() .split(/[\s,.;:()"{}[\]-]+/) // More robust splitting on punctuation .filter(word => word.length > 2 && !stopWords.has(word)); if (keywords.length === 0) return results; // Create case-insensitive regexes for whole-word matching const keywordRegexes = keywords.map(kw => new RegExp(`\\b${escapeRegExp(kw)}\\b`, 'i')); return results.map(r => { const content = r.content || ''; const file = r.file || ''; // Count keyword matches using whole-word regex let matchCount = 0; for (const regex of keywordRegexes) { if (regex.test(content) || regex.test(file)) { matchCount++; } } // Apply boost only if there are matches if (matchCount > 0) { const matchRatio = matchCount / keywords.length; const boost = 1 + (matchRatio * 0.3); // Up to 30% boost for full match return { ...r, score: r.score * boost, }; } return r; }); } /** * Post-processing: Enforce score diversity * Penalizes results with identical scores (indicates undifferentiated matching) */ function enforceScoreDiversity(results: SemanticMatch[]): SemanticMatch[] { if (results.length < 2) return results; // Count occurrences of each score (rounded to 3 decimal places for comparison) const scoreCounts = new Map(); for (const r of results) { const roundedScore = Math.round(r.score * 1000) / 1000; scoreCounts.set(roundedScore, (scoreCounts.get(roundedScore) || 0) + 1); } // Apply penalty to scores that appear more than twice return results.map(r => { const roundedScore = Math.round(r.score * 1000) / 1000; const count = scoreCounts.get(roundedScore) || 1; if (count > 2) { // Progressive penalty: more duplicates = bigger penalty const penalty = Math.max(0.7, 1 - (count * 0.05)); return { ...r, score: r.score * penalty }; } return r; }); } /** * Post-processing: Filter results with dominant baseline score (hot spot detection) * When backend returns default "hot spot" files with identical high scores, * this function detects and removes them. * * Detection criteria: * - A single score appears in >50% of results * - That score is suspiciously high (>0.9) * - This indicates fallback mechanism returned placeholder results */ function filterDominantBaselineScores( results: SemanticMatch[] ): { filteredResults: SemanticMatch[]; baselineInfo: { score: number; count: number } | null } { if (results.length < 4) { return { filteredResults: results, baselineInfo: null }; } // Count occurrences of each score (rounded to 4 decimal places) const scoreCounts = new Map(); results.forEach(r => { const rounded = Math.round(r.score * 10000) / 10000; scoreCounts.set(rounded, (scoreCounts.get(rounded) || 0) + 1); }); // Find the most dominant score let dominantScore: number | null = null; let dominantCount = 0; scoreCounts.forEach((count, score) => { if (count > dominantCount) { dominantCount = count; dominantScore = score; } }); // If a single score is present in >50% of results and is high (>0.9), // treat it as a suspicious baseline score and filter it out const BASELINE_THRESHOLD = 0.5; // >50% of results have same score const HIGH_SCORE_THRESHOLD = 0.9; // Score above 0.9 is suspiciously high if ( dominantScore !== null && dominantCount > results.length * BASELINE_THRESHOLD && dominantScore > HIGH_SCORE_THRESHOLD ) { const filteredResults = results.filter(r => { const rounded = Math.round(r.score * 10000) / 10000; return rounded !== dominantScore; }); return { filteredResults, baselineInfo: { score: dominantScore, count: dominantCount }, }; } return { filteredResults: results, baselineInfo: null }; } /** * TypeScript implementation of Reciprocal Rank Fusion * Reference: codex-lens/src/codexlens/search/ranking.py * Formula: score(d) = Σ weight_source / (k + rank_source(d)) */ function normalizeFusionSnippet(value: unknown): string | undefined { if (typeof value !== 'string') { return undefined; } const normalized = value.replace(/\s+/g, ' ').trim(); return normalized ? normalized.slice(0, 240) : undefined; } function buildFusionIdentity(result: any): string | null { const path = typeof result?.file === 'string' ? result.file : typeof result?.path === 'string' ? result.path : undefined; if (!path) { return null; } const line = typeof result?.line === 'number' && Number.isFinite(result.line) ? result.line : undefined; const endLine = typeof result?.endLine === 'number' && Number.isFinite(result.endLine) ? result.endLine : line; const column = typeof result?.column === 'number' && Number.isFinite(result.column) ? result.column : undefined; if (line !== undefined) { return `${path}#L${line}-${endLine ?? line}:C${column ?? 0}`; } const symbol = typeof result?.symbol === 'string' && result.symbol.trim() ? result.symbol.trim() : undefined; const snippet = normalizeFusionSnippet(result?.content); if (symbol && snippet) { return `${path}::${symbol}::${snippet}`; } if (snippet) { return `${path}::${snippet}`; } if (symbol) { return `${path}::${symbol}`; } return path; } function scoreFusionRepresentative(result: any): number { let score = 0; if (typeof result?.line === 'number' && Number.isFinite(result.line)) { score += 1000; } if (typeof result?.endLine === 'number' && Number.isFinite(result.endLine)) { score += 250; } if (typeof result?.column === 'number' && Number.isFinite(result.column)) { score += 50; } if (Array.isArray(result?.chunkLines) && result.chunkLines.length > 0) { score += 500 + result.chunkLines.length; } if (typeof result?.symbol === 'string' && result.symbol.trim()) { score += 50; } if (typeof result?.content === 'string') { score += Math.min(result.content.length, 200); } return score; } function applyRRFFusion( resultsMap: Map, weightsOrQuery: Record | string, limit: number, k: number = 60, ): any[] { const weights = typeof weightsOrQuery === 'string' ? getRRFWeights(weightsOrQuery) : weightsOrQuery; const fusedScores = new Map(); resultsMap.forEach((results, source) => { const weight = weights[source] || 0; if (weight === 0 || !results) return; results.forEach((result, rank) => { const identity = buildFusionIdentity(result); if (!identity) return; const rrfContribution = weight / (k + rank + 1); const representativeScore = scoreFusionRepresentative(result); if (!fusedScores.has(identity)) { fusedScores.set(identity, { score: 0, result, sources: [], representativeScore }); } const entry = fusedScores.get(identity)!; entry.score += rrfContribution; if (representativeScore > entry.representativeScore) { entry.result = result; entry.representativeScore = representativeScore; } if (!entry.sources.includes(source)) { entry.sources.push(source); } }); }); // Sort by fusion score descending return Array.from(fusedScores.values()) .sort((a, b) => b.score - a.score) .slice(0, limit) .map(item => ({ ...item.result, fusion_score: item.score, matched_backends: item.sources, })); } /** * Promise wrapper with timeout support * @param promise - The promise to wrap * @param ms - Timeout in milliseconds * @param modeName - Name of the mode for error message * @returns A new promise that rejects on timeout */ function withTimeout(promise: Promise, ms: number, modeName: string): Promise { return new Promise((resolve, reject) => { const timer = setTimeout(() => { reject(new Error(`'${modeName}' search timed out after ${ms}ms`)); }, ms); promise .then(resolve) .catch(reject) .finally(() => clearTimeout(timer)); }); } // v1 executePriorityFallbackMode removed — v2 bridge + ripgrep fallback handles all search // Tool schema for MCP export const schema: ToolSchema = { name: 'smart_search', description: `Unified code search tool powered by codexlens-search v2 (2-stage vector + FTS5 + reranking). Recommended flow: use **action=\"search\"** for lookups, **action=\"init\"** to build the semantic index, and **action=\"update\"** when files change. **Actions & Required Parameters:** * **search** (default): Semantic code search with ripgrep fallback. * **query** (string, **REQUIRED**): Content to search for. * *limit* (number): Max results (default: 5). * *path* (string): Directory or single file to search (default: current directory). * *contextLines* (number): Context lines around matches (default: 0). * *regex* (boolean): Use regex matching in ripgrep fallback (default: true). * *caseSensitive* (boolean): Case-sensitive search (default: true). * **find_files**: Find files by path/name pattern. * **pattern** (string, **REQUIRED**): Glob pattern (e.g., "*.ts", "src/**/*.js"). * *limit* (number): Max results (default: 20). * *offset* (number): Pagination offset (default: 0). * *includeHidden* (boolean): Include hidden files (default: false). * **init**: Initialize v2 semantic index and sync all files. * *path* (string): Directory to index (default: current). * **status**: Check v2 index statistics. (No required params) * **update**: Incremental sync for changed files. * *path* (string): Directory to update (default: current). * **watch**: Start file watcher for auto-updates. * *path* (string): Directory to watch (default: current). **Examples:** smart_search(query="authentication logic") # Semantic search (default) smart_search(action="init", path="/project") # Build v2 index smart_search(action="update", path="/project") # Sync changed files smart_search(query="auth", limit=10, offset=0) # Paginated search`, inputSchema: { type: 'object', properties: { action: { type: 'string', enum: ['init', 'search', 'find_files', 'status', 'update', 'watch', 'search_files'], description: 'Action: search (semantic search, default), find_files (path pattern matching), init (build v2 index), status (check index), update (incremental sync), watch (auto-update watcher). Note: search_files is deprecated.', default: 'search', }, query: { type: 'string', description: 'Content search query (for action="search").', }, pattern: { type: 'string', description: 'Glob pattern for file discovery (for action="find_files"). Examples: "*.ts", "src/**/*.js", "test_*.py"', }, mode: { type: 'string', enum: SEARCH_MODES, description: 'Search mode: fuzzy (v2 semantic + ripgrep fallback, default) or semantic (v2 semantic search only).', default: 'fuzzy', }, output_mode: { type: 'string', enum: [...SEARCH_OUTPUT_MODES], description: 'Output format: ace (default, ACE-style grouped code sections + rendered text), full (raw matches), files_only (paths only), count (per-file counts)', default: 'ace', }, path: { type: 'string', description: 'Directory path for init/search actions (default: current directory). For action=search, a single file path is also accepted and results are automatically scoped back to that file.', }, paths: { type: 'array', description: 'Multiple paths to search within (for search action)', items: { type: 'string', }, default: [], }, contextLines: { type: 'number', description: 'Number of context lines around matches (exact mode only)', default: 0, }, maxResults: { type: 'number', description: 'Maximum number of full-content results (default: 5)', default: 5, }, limit: { type: 'number', description: 'Alias for maxResults (default: 5)', default: 5, }, extraFilesCount: { type: 'number', description: 'Number of additional file-only results (paths without content)', default: 10, }, maxContentLength: { type: 'number', description: 'Maximum content length for truncation (50-2000)', default: 200, }, offset: { type: 'number', description: 'Pagination offset - skip first N results (default: 0)', default: 0, }, includeHidden: { type: 'boolean', description: 'Include hidden files/directories', default: false, }, force: { type: 'boolean', description: 'Force full rebuild for action="init".', default: false, }, regex: { type: 'boolean', description: 'Use regex pattern matching instead of literal string (ripgrep mode only). Default: enabled. Example: smart_search(query="class.*Builder")', default: true, }, caseSensitive: { type: 'boolean', description: 'Case-sensitive search (default: true). Set to false for case-insensitive matching.', default: true, }, tokenize: { type: 'boolean', description: 'Tokenize multi-word queries for OR matching (ripgrep mode). Default: true. Results are ranked by token match count (exact matches first).', default: true, }, }, required: [], }, }; /** * Action: find_files - Find files by path/name pattern (glob matching) * Unlike search which looks inside file content, find_files matches file paths */ async function executeFindFilesAction(params: Params): Promise { const { pattern, path = '.', limit = 20, offset = 0, includeHidden = false, caseSensitive = true } = params; const scope = resolveSearchScope(path); if (!pattern) { return { success: false, error: 'Pattern is required for find_files action. Use glob patterns like "*.ts", "src/**/*.js", or "test_*.py"', }; } // Use ripgrep with --files flag for fast file listing with glob pattern const hasRipgrep = checkToolAvailability('rg'); if (!hasRipgrep) { // Fallback to CodexLens file listing if available const readyStatus = await ensureCodexLensReady(); if (!readyStatus.ready) { return { success: false, error: 'Neither ripgrep nor CodexLens available for file discovery.', }; } // Try CodexLens file list command const args = ['list-files', '--json']; const result = await executeCodexLens(args, { cwd: scope.workingDirectory }); if (!result.success) { return { success: false, error: `Failed to list files: ${result.error}`, }; } // Parse and filter results by pattern let files: string[] = []; try { const parsed = JSON.parse(stripAnsi(result.output || '[]')); files = Array.isArray(parsed) ? parsed : (parsed.files || []); } catch { return { success: false, error: 'Failed to parse file list from CodexLens', }; } // Apply glob pattern matching using minimatch-style regex const globRegex = globToRegex(pattern, caseSensitive); const matchedFiles = files.filter(f => globRegex.test(f)); // Apply pagination const total = matchedFiles.length; const paginatedFiles = matchedFiles.slice(offset, offset + limit); const results: FileMatch[] = paginatedFiles.map(filePath => { const parts = filePath.split(/[/\\]/); const name = parts[parts.length - 1] || ''; const ext = name.includes('.') ? name.split('.').pop() : undefined; return { path: filePath, type: 'file' as const, name, extension: ext, }; }); return { success: true, results, metadata: { pattern, backend: 'codexlens', count: results.length, pagination: { offset, limit, total, has_more: offset + limit < total, }, }, }; } // Use ripgrep --files with glob pattern for fast file discovery return new Promise((resolve) => { const args = ['--files']; // Add exclude patterns if (!includeHidden) { args.push(...buildExcludeArgs()); } else { args.push('--hidden'); } // Add glob pattern args.push('--glob', pattern); // Case sensitivity for glob matching if (!caseSensitive) { args.push('--iglob', pattern); // Remove the case-sensitive glob and use iglob instead const globIndex = args.indexOf('--glob'); if (globIndex !== -1) { args.splice(globIndex, 2); } } const child = getSpawnRuntime()( 'rg', args, buildSmartSearchSpawnOptions(scope.workingDirectory || getProjectRoot(), { stdio: ['ignore', 'pipe', 'pipe'], }), ); let stdout = ''; let stderr = ''; child.stdout?.on('data', (data) => { stdout += data.toString(); }); child.stderr?.on('data', (data) => { stderr += data.toString(); }); child.on('close', (code) => { // ripgrep returns 1 when no matches found, which is not an error if (code !== 0 && code !== 1 && !stderr.includes('os error 1')) { resolve({ success: false, error: `ripgrep file search failed: ${stderr}`, }); return; } const allFiles = stdout.split('\n').filter(line => line.trim()); const total = allFiles.length; // Apply pagination const paginatedFiles = allFiles.slice(offset, offset + limit); const results: FileMatch[] = paginatedFiles.map(filePath => { const normalizedPath = filePath.replace(/\\/g, '/'); const parts = normalizedPath.split('/'); const name = parts[parts.length - 1] || ''; const ext = name.includes('.') ? name.split('.').pop() : undefined; return { path: normalizedPath, type: 'file' as const, name, extension: ext, }; }); resolve({ success: true, results, metadata: { pattern, backend: 'ripgrep', count: results.length, pagination: { offset, limit, total, has_more: offset + limit < total, }, }, }); }); child.on('error', (error) => { resolve({ success: false, error: `Failed to spawn ripgrep: ${error.message}`, }); }); }); } /** * Convert glob pattern to regex for file matching * Supports: *, **, ?, [abc], [!abc] */ function globToRegex(pattern: string, caseSensitive: boolean = true): RegExp { let i = 0; const out: string[] = []; const special = '.^$+{}|()'; while (i < pattern.length) { const c = pattern[i]; if (c === '*') { if (i + 1 < pattern.length && pattern[i + 1] === '*') { // ** matches any path including / out.push('.*'); i += 2; // Skip following / if present if (pattern[i] === '/') { i++; } continue; } else { // * matches any character except / out.push('[^/]*'); } } else if (c === '?') { out.push('[^/]'); } else if (c === '[') { // Character class let j = i + 1; let negated = false; if (pattern[j] === '!' || pattern[j] === '^') { negated = true; j++; } let classContent = ''; while (j < pattern.length && pattern[j] !== ']') { classContent += pattern[j]; j++; } if (negated) { out.push(`[^${classContent}]`); } else { out.push(`[${classContent}]`); } i = j; } else if (special.includes(c)) { out.push('\\' + c); } else { out.push(c); } i++; } const flags = caseSensitive ? '' : 'i'; return new RegExp('^' + out.join('') + '$', flags); } /** * Apply pagination to search results and add pagination metadata */ function applyPagination( results: T[], offset: number, limit: number ): { paginatedResults: T[]; pagination: PaginationInfo } { const total = results.length; const paginatedResults = results.slice(offset, offset + limit); return { paginatedResults, pagination: { offset, limit, total, has_more: offset + limit < total, }, }; } function formatChunkRange(section: AceLikeSection): string { if (section.lines && section.lines.length > 0) { const start = section.lines[0]?.line; const end = section.lines[section.lines.length - 1]?.line; if (typeof start === 'number' && typeof end === 'number' && end > start) { return `${start}-${end}`; } if (typeof start === 'number') { return String(start); } } if (section.line && section.endLine && section.endLine > section.line) { return `${section.line}-${section.endLine}`; } if (section.line) { return String(section.line); } return '?'; } function renderAceSnippet(section: AceLikeSection): string[] { if (section.lines && section.lines.length > 0) { return section.lines.map((line) => { const marker = line.isMatch ? '>' : ' '; return `${marker} ${String(line.line).padStart(4, ' ')} | ${line.text}`; }); } return section.snippet.split(/\r?\n/).map((line) => ` ${line}`); } function formatAceLikeOutput( results: ExactMatch[] | SemanticMatch[] | GraphMatch[] | FileMatch[] | unknown[], ): AceLikeOutput { const sections: AceLikeSection[] = []; for (const result of results) { const candidate = result as Record; const path = typeof candidate.file === 'string' ? candidate.file : typeof candidate.path === 'string' ? candidate.path : undefined; if (!path) { continue; } const line = typeof candidate.line === 'number' && candidate.line > 0 ? candidate.line : undefined; const column = typeof candidate.column === 'number' && candidate.column > 0 ? candidate.column : undefined; const score = typeof candidate.score === 'number' ? candidate.score : undefined; const symbol = typeof candidate.symbol === 'string' ? candidate.symbol : null; const rawSnippet = typeof candidate.content === 'string' ? candidate.content : typeof candidate.name === 'string' ? candidate.name : typeof candidate.type === 'string' ? `[${candidate.type}]` : ''; sections.push({ path, line, endLine: typeof candidate.endLine === 'number' && candidate.endLine >= (line ?? 0) ? candidate.endLine : line, column, score, symbol, snippet: rawSnippet || '[no snippet available]', lines: Array.isArray(candidate.chunkLines) ? candidate.chunkLines as ChunkLine[] : undefined, }); } const groupsMap = new Map(); for (const section of sections) { if (!groupsMap.has(section.path)) { groupsMap.set(section.path, { path: section.path, sections: [], total_matches: 0, }); } const group = groupsMap.get(section.path)!; group.sections.push(section); group.total_matches += 1; } const groups = [...groupsMap.values()]; const textParts = ['The following code sections were retrieved:']; for (const group of groups) { textParts.push(''); textParts.push(`Path: ${group.path}`); group.sections.forEach((section, index) => { const chunkLabel = group.sections.length > 1 ? `Chunk ${index + 1}` : 'Chunk'; textParts.push(`${chunkLabel}: lines ${formatChunkRange(section)}${section.score !== undefined ? ` | score=${section.score.toFixed(4)}` : ''}`); if (section.symbol) { textParts.push(`Symbol: ${section.symbol}`); } for (const snippetLine of renderAceSnippet(section)) { textParts.push(snippetLine); } if (index < group.sections.length - 1) { textParts.push(''); } }); } return { format: 'ace', text: textParts.join('\n'), groups, sections, total: sections.length, }; } /** * Transform results based on output_mode */ function transformOutput( results: ExactMatch[] | SemanticMatch[] | GraphMatch[] | unknown[], outputMode: SearchOutputMode ): unknown { if (!Array.isArray(results)) { return results; } switch (outputMode) { case 'files_only': { // Extract unique file paths const files = [...new Set(results.map((r: any) => r.file))].filter(Boolean); return { files, count: files.length }; } case 'count': { // Count matches per file const counts: Record = {}; for (const r of results) { const file = (r as any).file; if (file) { counts[file] = (counts[file] || 0) + 1; } } return { files: Object.entries(counts).map(([file, count]) => ({ file, count })), total: results.length, }; } case 'ace': return formatAceLikeOutput(results); case 'full': default: return results; } } function enrichMetadataWithIndexStatus( metadata: SearchMetadata | undefined, indexStatus: IndexStatus, scope: SearchScope, ): SearchMetadata { const nextMetadata: SearchMetadata = { ...(metadata ?? {}) }; nextMetadata.embeddings_coverage_percent = indexStatus.embeddings_coverage_percent; nextMetadata.index_status = indexStatus.indexed ? (indexStatus.has_embeddings ? 'indexed' : 'partial') : 'not_indexed'; nextMetadata.reranker_enabled = indexStatus.config?.reranker_enabled; nextMetadata.reranker_backend = indexStatus.config?.reranker_backend; nextMetadata.reranker_model = indexStatus.config?.reranker_model; nextMetadata.cascade_strategy = indexStatus.config?.cascade_strategy; nextMetadata.staged_stage2_mode = indexStatus.config?.staged_stage2_mode; nextMetadata.static_graph_enabled = indexStatus.config?.static_graph_enabled; nextMetadata.warning = mergeWarnings(nextMetadata.warning, indexStatus.warning); nextMetadata.suggestions = mergeSuggestions(nextMetadata.suggestions, buildIndexSuggestions(indexStatus, scope)); return nextMetadata; } // Handler function export async function handler(params: Record): Promise> { const parsed = ParamsSchema.safeParse(params); if (!parsed.success) { return { success: false, error: `Invalid params: ${parsed.error.message}` }; } parsed.data.query = sanitizeSearchQuery(parsed.data.query); parsed.data.pattern = sanitizeSearchPath(parsed.data.pattern); parsed.data.path = sanitizeSearchPath(parsed.data.path); parsed.data.paths = parsed.data.paths.map((item) => sanitizeSearchPath(item) || item); const { action, mode, output_mode, offset = 0 } = parsed.data; // Sync limit and maxResults while preserving explicit small values. // If both are provided, use the larger one. If only one is provided, honor it. const rawLimit = typeof params.limit === 'number' ? params.limit : undefined; const rawMaxResults = typeof params.maxResults === 'number' ? params.maxResults : undefined; const effectiveLimit = rawLimit !== undefined && rawMaxResults !== undefined ? Math.max(rawLimit, rawMaxResults) : rawMaxResults ?? rawLimit ?? parsed.data.maxResults ?? parsed.data.limit ?? 5; parsed.data.maxResults = effectiveLimit; parsed.data.limit = effectiveLimit; // Track if search_files was used (deprecated) let deprecationWarning: string | undefined; try { let result: SearchResult; // Handle actions — all routed through codexlens-search v2 bridge switch (action) { case 'init': result = await executeInitActionV2(parsed.data); break; case 'status': result = await executeStatusActionV2(parsed.data); break; case 'find_files': result = await executeFindFilesAction(parsed.data); break; case 'update': result = await executeUpdateActionV2(parsed.data); break; case 'watch': result = await executeWatchActionV2(parsed.data); break; case 'search_files': // DEPRECATED: Redirect to search with files_only output deprecationWarning = 'action="search_files" is deprecated. Use action="search" with output_mode="files_only" for content-to-files search, or action="find_files" for path pattern matching.'; parsed.data.output_mode = 'files_only'; // Fall through to search case 'search': default: { // v2 bridge for semantic search const scope = resolveSearchScope(parsed.data.path ?? '.'); const dbPath = join(scope.workingDirectory, '.codexlens'); const topK = (parsed.data.maxResults || 5) + (parsed.data.extraFilesCount || 10); const v2Result = await executeCodexLensV2Bridge(parsed.data.query || '', topK, dbPath); if (v2Result.success) { result = v2Result; break; } // v2 failed — fall back to ripgrep-only search console.warn(`[CodexLens-v2] Bridge failed, falling back to ripgrep: ${v2Result.error}`); result = await executeRipgrepMode(parsed.data); break; } } let backgroundNote: string | undefined; // Transform output based on output_mode (for search actions only) if (action === 'search' || action === 'search_files') { // Add pagination metadata for search results if not already present if (result.success && result.results && Array.isArray(result.results)) { const totalResults = (result.results as any[]).length; if (!result.metadata) { result.metadata = {}; } if (!result.metadata.pagination) { result.metadata.pagination = { offset: 0, limit: effectiveLimit, total: totalResults, has_more: false, // Already limited by backend }; } } if (result.success && result.results && output_mode !== 'full') { result.results = transformOutput(result.results as any[], output_mode); if ( output_mode === 'ace' && result.results && typeof result.results === 'object' && 'format' in result.results && result.results.format === 'ace' ) { const advisoryLines: string[] = []; if (result.metadata?.warning) { advisoryLines.push('', 'Warnings:', `- ${result.metadata.warning}`); } if (backgroundNote) { advisoryLines.push('', 'Notes:', `- ${backgroundNote}`); } if (result.metadata?.suggestions && result.metadata.suggestions.length > 0) { advisoryLines.push('', 'Suggestions:'); for (const suggestion of result.metadata.suggestions) { advisoryLines.push(`- ${suggestion.title}: ${suggestion.command}`); advisoryLines.push(` ${suggestion.reason}`); } } const aceResults = result.results as AceLikeOutput; if (advisoryLines.length > 0) { aceResults.text += `\n${advisoryLines.join('\n')}`; } } } } // Add deprecation warning if applicable if (deprecationWarning && result.metadata) { result.metadata.warning = deprecationWarning; } return result.success ? { success: true, result } : { success: false, error: result.error }; } catch (error) { return { success: false, error: (error as Error).message }; } } /** * Execute init action with external progress callback * Used by MCP server for streaming progress * @param params - Search parameters (path, languages, force) * @param onProgress - Optional callback for progress updates */ export const __testables = { isCodexLensCliCompatibilityError, shouldSurfaceCodexLensFtsCompatibilityWarning, buildSmartSearchSpawnOptions, shouldDetachBackgroundSmartSearchProcess, checkToolAvailability, parseCodexLensJsonOutput, parsePlainTextFileMatches, hasCentralizedVectorArtifacts, extractEmbeddingsStatusSummary, selectEmbeddingsStatusPayload, resolveRipgrepQueryMode, queryTargetsGeneratedFiles, prefersLexicalPriorityQuery, classifyIntent, resolveEmbeddingSelection, parseOptionalBooleanEnv, isAutoInitMissingEnabled, isAutoEmbedMissingEnabled, getAutoInitMissingDisabledReason, getAutoEmbedMissingDisabledReason, buildIndexSuggestions, maybeStartBackgroundAutoInit, maybeStartBackgroundAutoEmbed, __setRuntimeOverrides(overrides: Partial) { Object.assign(runtimeOverrides, overrides); }, __resetRuntimeOverrides() { for (const key of Object.keys(runtimeOverrides) as Array) { delete runtimeOverrides[key]; } }, __resetBackgroundJobs() { autoInitJobs.clear(); autoEmbedJobs.clear(); }, }; export async function executeInitWithProgress( params: Record, onProgress?: (progress: ProgressInfo) => void ): Promise { const path = (params.path as string) || '.'; const scope = resolveSearchScope(path); const dbPath = join(scope.workingDirectory, '.codexlens'); // Notify progress start if (onProgress) { onProgress({ stage: 'init', message: 'Initializing v2 index...', percent: 0 } as ProgressInfo); } // Step 1: init empty index const initResult = await executeV2BridgeCommand('init', [], { dbPath }); if (!initResult.success) return initResult; if (onProgress) { onProgress({ stage: 'sync', message: 'Syncing files...', percent: 10 } as ProgressInfo); } // Step 2: sync all files const syncResult = await executeV2BridgeCommand('sync', [ '--root', scope.workingDirectory, ], { timeout: 1800000, dbPath }); if (onProgress) { onProgress({ stage: 'complete', message: 'Index build complete', percent: 100 } as ProgressInfo); } return { success: syncResult.success, error: syncResult.error, message: syncResult.success ? `v2 index created and synced for ${scope.workingDirectory}` : undefined, metadata: { action: 'init', path: scope.workingDirectory }, status: syncResult.status, }; }