From 1cd96b90e8ca9493e8ef151e3431cee78c550a32 Mon Sep 17 00:00:00 2001 From: catlog22 Date: Wed, 11 Mar 2026 16:48:16 +0800 Subject: [PATCH] chore: bump version to 7.2.7 - Enhance smart-search with advanced MCP integration - Add GEMINI_API_KEY configuration support in codexlens - Update MCP server with new tool handlers - Add tests for smart-search MCP usage - Update documentation --- .../components/codexlens/SettingsTab.test.tsx | 2 + .../src/components/codexlens/envVarSchema.ts | 7 + .../src/components/mcp/McpServerDialog.tsx | 20 +- ccw/frontend/src/locales/en/codexlens.json | 1 + ccw/frontend/src/locales/zh/codexlens.json | 1 + .../core/routes/codexlens/config-handlers.ts | 4 + ccw/src/mcp-server/index.ts | 107 +- ccw/src/tools/smart-search.ts | 946 ++++++++++++++++-- ccw/tests/mcp-server.test.js | 77 +- ccw/tests/smart-search-mcp-usage.test.js | 121 +++ codex-lens/src/codexlens/cli/commands.py | 13 +- codex-lens/src/codexlens/config.py | 12 + codex-lens/src/codexlens/env_config.py | 1 + docs/mcp/tools.md | 8 +- package-lock.json | 4 +- package.json | 2 +- 16 files changed, 1215 insertions(+), 111 deletions(-) diff --git a/ccw/frontend/src/components/codexlens/SettingsTab.test.tsx b/ccw/frontend/src/components/codexlens/SettingsTab.test.tsx index 39f854b8..45e2f77c 100644 --- a/ccw/frontend/src/components/codexlens/SettingsTab.test.tsx +++ b/ccw/frontend/src/components/codexlens/SettingsTab.test.tsx @@ -61,6 +61,7 @@ const mockConfig: CodexLensConfig = { const mockEnv: Record = { CODEXLENS_EMBEDDING_BACKEND: 'local', CODEXLENS_EMBEDDING_MODEL: 'fast', + CODEXLENS_AUTO_EMBED_MISSING: 'true', CODEXLENS_USE_GPU: 'true', CODEXLENS_RERANKER_ENABLED: 'true', CODEXLENS_RERANKER_BACKEND: 'onnx', @@ -141,6 +142,7 @@ describe('SettingsTab', () => { expect(screen.getByText(/Concurrency/i)).toBeInTheDocument(); expect(screen.getByText(/Cascade/i)).toBeInTheDocument(); expect(screen.getByText(/Chunking/i)).toBeInTheDocument(); + expect(screen.getByText(/Auto Build Missing Vectors/i)).toBeInTheDocument(); }); it('should initialize index dir from config', () => { diff --git a/ccw/frontend/src/components/codexlens/envVarSchema.ts b/ccw/frontend/src/components/codexlens/envVarSchema.ts index 3da88cdd..73814690 100644 --- a/ccw/frontend/src/components/codexlens/envVarSchema.ts +++ b/ccw/frontend/src/components/codexlens/envVarSchema.ts @@ -56,6 +56,13 @@ export const envVarGroupsSchema: EnvVarGroupsSchema = { }, ], }, + CODEXLENS_AUTO_EMBED_MISSING: { + key: 'CODEXLENS_AUTO_EMBED_MISSING', + labelKey: 'codexlens.envField.autoEmbedMissing', + type: 'checkbox', + default: 'true', + settingsPath: 'embedding.auto_embed_missing', + }, CODEXLENS_USE_GPU: { key: 'CODEXLENS_USE_GPU', labelKey: 'codexlens.envField.useGpu', diff --git a/ccw/frontend/src/components/mcp/McpServerDialog.tsx b/ccw/frontend/src/components/mcp/McpServerDialog.tsx index f2888812..79f05484 100644 --- a/ccw/frontend/src/components/mcp/McpServerDialog.tsx +++ b/ccw/frontend/src/components/mcp/McpServerDialog.tsx @@ -256,8 +256,22 @@ export function McpServerDialog({ // Parse JSON config and populate form const parseJsonConfig = useCallback(() => { try { - const config = JSON.parse(jsonInput); - + let config = JSON.parse(jsonInput); + let extractedServerName = ''; + + // Auto-detect mcpServers wrapper format (Claude Code config format) + // Supports both: { "mcpServers": { "name": {...} } } and direct { "command": ... } + if (config.mcpServers && typeof config.mcpServers === 'object' && !Array.isArray(config.mcpServers)) { + const serverNames = Object.keys(config.mcpServers); + if (serverNames.length > 0) { + extractedServerName = serverNames[0]; + const serverConfig = config.mcpServers[extractedServerName]; + if (serverConfig && typeof serverConfig === 'object') { + config = serverConfig; + } + } + } + // Detect transport type based on config structure if (config.url) { // HTTP transport @@ -278,6 +292,7 @@ export function McpServerDialog({ setFormData(prev => ({ ...prev, + name: extractedServerName || prev.name, url: config.url || '', headers, bearerTokenEnvVar: config.bearer_token_env_var || config.bearerTokenEnvVar || '', @@ -291,6 +306,7 @@ export function McpServerDialog({ setFormData(prev => ({ ...prev, + name: extractedServerName || prev.name, command: config.command || '', args, env, diff --git a/ccw/frontend/src/locales/en/codexlens.json b/ccw/frontend/src/locales/en/codexlens.json index 3baf93fe..3ecd74d3 100644 --- a/ccw/frontend/src/locales/en/codexlens.json +++ b/ccw/frontend/src/locales/en/codexlens.json @@ -298,6 +298,7 @@ "envField": { "backend": "Backend", "model": "Model", + "autoEmbedMissing": "Auto Build Missing Vectors", "useGpu": "Use GPU", "highAvailability": "High Availability", "loadBalanceStrategy": "Load Balance Strategy", diff --git a/ccw/frontend/src/locales/zh/codexlens.json b/ccw/frontend/src/locales/zh/codexlens.json index 295fd835..b9757d5c 100644 --- a/ccw/frontend/src/locales/zh/codexlens.json +++ b/ccw/frontend/src/locales/zh/codexlens.json @@ -298,6 +298,7 @@ "envField": { "backend": "后端", "model": "模型", + "autoEmbedMissing": "缺失向量时自动构建", "useGpu": "使用 GPU", "highAvailability": "高可用", "loadBalanceStrategy": "负载均衡策略", diff --git a/ccw/src/core/routes/codexlens/config-handlers.ts b/ccw/src/core/routes/codexlens/config-handlers.ts index e7ad1fc7..f0621f7c 100644 --- a/ccw/src/core/routes/codexlens/config-handlers.ts +++ b/ccw/src/core/routes/codexlens/config-handlers.ts @@ -955,6 +955,9 @@ export async function handleCodexLensConfigRoutes(ctx: RouteContext): Promise v === 'true' }, + 'CODEXLENS_AUTO_EMBED_MISSING': { path: ['embedding', 'auto_embed_missing'], transform: v => v === 'true' }, 'CODEXLENS_EMBEDDING_STRATEGY': { path: ['embedding', 'strategy'] }, 'CODEXLENS_EMBEDDING_COOLDOWN': { path: ['embedding', 'cooldown'], transform: v => parseFloat(v) }, 'CODEXLENS_RERANKER_BACKEND': { path: ['reranker', 'backend'] }, diff --git a/ccw/src/mcp-server/index.ts b/ccw/src/mcp-server/index.ts index 947a4ca6..499596fd 100644 --- a/ccw/src/mcp-server/index.ts +++ b/ccw/src/mcp-server/index.ts @@ -20,6 +20,7 @@ const SERVER_VERSION = '6.2.0'; // Environment variable names for documentation const ENV_PROJECT_ROOT = 'CCW_PROJECT_ROOT'; const ENV_ALLOWED_DIRS = 'CCW_ALLOWED_DIRS'; +const STDIO_DISCONNECT_ERROR_CODES = new Set(['EPIPE', 'ERR_STREAM_DESTROYED']); // Default enabled tools (core set - file operations, core memory, and smart search) const DEFAULT_TOOLS: string[] = ['write_file', 'edit_file', 'read_file', 'read_many_files', 'read_outline', 'core_memory', 'smart_search']; @@ -67,6 +68,47 @@ function formatToolResult(result: unknown): string { return String(result); } +/** + * Detect broken stdio pipes so orphaned MCP processes can terminate cleanly. + */ +function isStdioDisconnectError(error: unknown): error is NodeJS.ErrnoException { + if (error && typeof error === 'object') { + const maybeErrnoError = error as NodeJS.ErrnoException; + if (typeof maybeErrnoError.code === 'string' && STDIO_DISCONNECT_ERROR_CODES.has(maybeErrnoError.code)) { + return true; + } + } + + return error instanceof Error && /broken pipe/i.test(error.message); +} + +/** + * Best-effort logging for teardown paths where stderr may already be gone. + */ +function safeStderrWrite(message: string): void { + try { + if (process.stderr.destroyed || !process.stderr.writable) { + return; + } + + process.stderr.write(`${message}\n`); + } catch { + // Ignore logging failures while stdio is tearing down. + } +} + +function safeLogError(prefix: string, error: unknown): void { + if (error instanceof Error) { + safeStderrWrite(`${prefix}: ${error.message}`); + if (error.stack) { + safeStderrWrite(error.stack); + } + return; + } + + safeStderrWrite(`${prefix}: ${String(error)}`); +} + /** * Create and configure the MCP server */ @@ -151,28 +193,77 @@ function createServer(): Server { async function main(): Promise { const server = createServer(); const transport = new StdioServerTransport(); + let shutdownPromise: Promise | null = null; + + const shutdown = (reason: string, exitCode = 0, error?: unknown): Promise => { + if (shutdownPromise) { + return shutdownPromise; + } + + if (error && !isStdioDisconnectError(error)) { + safeLogError(`[${SERVER_NAME}] ${reason}`, error); + } + + shutdownPromise = (async () => { + try { + await server.close(); + } catch (closeError) { + if (!isStdioDisconnectError(closeError)) { + safeLogError(`[${SERVER_NAME}] Failed to close server`, closeError); + } + } + + process.exit(exitCode); + })(); + + return shutdownPromise; + }; + + const handleStreamClose = (streamName: string) => () => { + void shutdown(`${streamName} disconnected`); + }; + + const handleStreamError = (streamName: string) => (error: unknown) => { + const exitCode = isStdioDisconnectError(error) ? 0 : 1; + void shutdown(`${streamName} stream error`, exitCode, error); + }; // Connect server to transport await server.connect(transport); - // Error handling - prevent process crashes from closing transport + process.stdin.once('end', handleStreamClose('stdin')); + process.stdin.once('close', handleStreamClose('stdin')); + process.stdin.once('error', handleStreamError('stdin')); + process.stdout.once('close', handleStreamClose('stdout')); + process.stdout.once('error', handleStreamError('stdout')); + process.stderr.once('close', handleStreamClose('stderr')); + process.stderr.once('error', handleStreamError('stderr')); + + // Error handling - stdio disconnects should terminate, other errors stay logged. process.on('uncaughtException', (error) => { - console.error(`[${SERVER_NAME}] Uncaught exception:`, error.message); - console.error(error.stack); + if (isStdioDisconnectError(error)) { + void shutdown('Uncaught stdio disconnect', 0, error); + return; + } + + safeLogError(`[${SERVER_NAME}] Uncaught exception`, error); }); process.on('unhandledRejection', (reason) => { - console.error(`[${SERVER_NAME}] Unhandled rejection:`, reason); + if (isStdioDisconnectError(reason)) { + void shutdown('Unhandled stdio disconnect', 0, reason); + return; + } + + safeLogError(`[${SERVER_NAME}] Unhandled rejection`, reason); }); process.on('SIGINT', async () => { - await server.close(); - process.exit(0); + await shutdown('Received SIGINT'); }); process.on('SIGTERM', async () => { - await server.close(); - process.exit(0); + await shutdown('Received SIGTERM'); }); // Log server start (to stderr to not interfere with stdio protocol) diff --git a/ccw/src/tools/smart-search.ts b/ccw/src/tools/smart-search.ts index ab948765..18b2e26c 100644 --- a/ccw/src/tools/smart-search.ts +++ b/ccw/src/tools/smart-search.ts @@ -25,6 +25,7 @@ import { existsSync, readFileSync, statSync } from 'fs'; import { dirname, join, resolve } from 'path'; import { ensureReady as ensureCodexLensReady, + checkSemanticStatus, ensureLiteLLMEmbedderReady, executeCodexLens, getVenvPythonPath, @@ -38,6 +39,8 @@ import type { RotationEndpointConfig } from '../config/litellm-api-config-manage // Timing utilities for performance analysis const TIMING_ENABLED = process.env.SMART_SEARCH_TIMING === '1' || process.env.DEBUG?.includes('timing'); +const SEARCH_OUTPUT_MODES = ['full', 'files_only', 'count', 'ace'] as const; +type SearchOutputMode = typeof SEARCH_OUTPUT_MODES[number]; interface TimingData { [key: string]: number; @@ -78,7 +81,7 @@ const ParamsSchema = z.object({ query: z.string().optional().describe('Content search query (for action="search")'), pattern: z.string().optional().describe('Glob pattern for path matching (for action="find_files")'), mode: z.enum(['fuzzy', 'semantic']).default('fuzzy'), - output_mode: z.enum(['full', 'files_only', 'count']).default('full'), + output_mode: z.enum(SEARCH_OUTPUT_MODES).default('ace'), path: z.string().optional(), paths: z.array(z.string()).default([]), contextLines: z.number().default(0), @@ -242,11 +245,19 @@ interface Classification { reasoning: string; } +interface ChunkLine { + line: number; + text: string; + isMatch: boolean; +} + interface ExactMatch { file: string; line: number; column: number; content: string; + endLine?: number; + chunkLines?: ChunkLine[]; matchScore?: number; // Token match ratio (0-1) for multi-word queries matchCount?: number; // Number of tokens matched } @@ -262,6 +273,8 @@ interface RelationshipInfo { interface SemanticMatch { file: string; + line?: number; + column?: number; score: number; content: string; symbol: string | null; @@ -289,6 +302,12 @@ interface PaginationInfo { has_more: boolean; // True if more results are available } +interface SearchSuggestion { + title: string; + command: string; + reason: string; +} + interface SearchMetadata { mode?: string; backend?: string; @@ -308,6 +327,7 @@ interface SearchMetadata { // Tokenization metadata (ripgrep mode) tokens?: string[]; // Query tokens used for multi-word search tokenized?: boolean; // Whether tokenization was applied + suggestions?: SearchSuggestion[]; // Pagination metadata pagination?: PaginationInfo; // Performance timing data (when SMART_SEARCH_TIMING=1 or DEBUG includes 'timing') @@ -333,7 +353,7 @@ interface SearchMetadata { interface SearchResult { success: boolean; - results?: ExactMatch[] | SemanticMatch[] | GraphMatch[] | FileMatch[] | unknown; + results?: ExactMatch[] | SemanticMatch[] | GraphMatch[] | FileMatch[] | AceLikeOutput | unknown; extra_files?: string[]; // Additional file paths without content output?: string; metadata?: SearchMetadata; @@ -342,6 +362,31 @@ interface SearchResult { message?: string; } +interface AceLikeSection { + path: string; + line?: number; + endLine?: number; + column?: number; + score?: number; + symbol?: string | null; + snippet: string; + lines?: ChunkLine[]; +} + +interface AceLikeGroup { + path: string; + sections: AceLikeSection[]; + total_matches: number; +} + +interface AceLikeOutput { + format: 'ace'; + text: string; + groups: AceLikeGroup[]; + sections: AceLikeSection[]; + total: number; +} + interface ModelInfo { model_profile?: string; model_name?: string; @@ -356,6 +401,7 @@ interface CodexLensConfig { index_dir?: string; embedding_backend?: string; // 'fastembed' (local) or 'litellm' (api) embedding_model?: string; + embedding_auto_embed_missing?: boolean; reranker_enabled?: boolean; reranker_backend?: string; // 'onnx' (local) or 'api' reranker_model?: string; @@ -396,6 +442,7 @@ function readCodexLensSettingsSnapshot(): Partial { return { embedding_backend: normalizeEmbeddingBackend(typeof embedding.backend === 'string' ? embedding.backend : undefined), embedding_model: typeof embedding.model === 'string' ? embedding.model : undefined, + embedding_auto_embed_missing: typeof embedding.auto_embed_missing === 'boolean' ? embedding.auto_embed_missing : undefined, reranker_enabled: typeof reranker.enabled === 'boolean' ? reranker.enabled : undefined, reranker_backend: typeof reranker.backend === 'string' ? reranker.backend : undefined, reranker_model: typeof reranker.model === 'string' ? reranker.model : undefined, @@ -420,6 +467,14 @@ function stripAnsi(str: string): string { /** Default maximum content length to return (avoid excessive output) */ const DEFAULT_MAX_CONTENT_LENGTH = 200; +const CODEX_LENS_FTS_COMPATIBILITY_PATTERNS = [ + /UsageError:\s*Got unexpected extra arguments?/i, + /Option ['"]--method['"] does not take a value/i, + /TyperArgument\.make_metavar\(\) takes 1 positional argument but 2 were given/i, +]; + +let codexLensFtsBackendBroken = false; +const autoEmbedJobs = new Map(); /** * Truncate content to specified length with ellipsis @@ -460,6 +515,14 @@ interface SearchScope { targetFile?: string; } +interface RipgrepQueryModeResolution { + regex: boolean; + tokenize: boolean; + tokens: string[]; + literalFallback: boolean; + warning?: string; +} + function sanitizeSearchQuery(query: string | undefined): string | undefined { if (!query) { return query; @@ -552,12 +615,103 @@ function parseCodexLensJsonOutput(output: string | undefined): any | null { return null; } +function isValidRegexPattern(pattern: string): boolean { + try { + new RegExp(pattern); + return true; + } catch { + return false; + } +} + +function resolveRipgrepQueryMode(query: string, regex: boolean = true, tokenize: boolean = true): RipgrepQueryModeResolution { + const tokens = tokenize ? tokenizeQuery(query) : [query]; + + if (!regex) { + return { + regex: false, + tokenize, + tokens, + literalFallback: false, + }; + } + + const invalidTokens = tokens.filter((token) => token.length > 0 && !isValidRegexPattern(token)); + if (invalidTokens.length === 0) { + return { + regex: true, + tokenize, + tokens, + literalFallback: false, + }; + } + + const preview = truncateContent(invalidTokens[0], 40); + return { + regex: false, + tokenize, + tokens, + literalFallback: true, + warning: invalidTokens.length === 1 + ? `Query token "${preview}" is not a valid regular expression. Falling back to literal ripgrep matching.` + : 'Query contains invalid regular expression tokens. Falling back to literal ripgrep matching.', + }; +} + +function isCodexLensCliCompatibilityError(error: string | undefined): boolean { + if (!error) { + return false; + } + + const cleanError = stripAnsi(error); + return CODEX_LENS_FTS_COMPATIBILITY_PATTERNS.some((pattern) => pattern.test(cleanError)); +} + +function noteCodexLensFtsCompatibility(error: string | undefined): boolean { + if (!isCodexLensCliCompatibilityError(error)) { + return false; + } + + codexLensFtsBackendBroken = true; + return true; +} + +function summarizeBackendError(error: string | undefined): string { + const cleanError = stripAnsi(error || '').trim(); + if (!cleanError) { + return 'unknown error'; + } + + if (isCodexLensCliCompatibilityError(cleanError)) { + return 'CodexLens exact search CLI is incompatible with the current Typer/Click runtime'; + } + + const regexSummary = cleanError.match(/error:\s*([^\r\n]+)/i); + if (/regex parse error/i.test(cleanError) && regexSummary?.[1]) { + return `invalid regular expression (${regexSummary[1].trim()})`; + } + + const usageSummary = cleanError.match(/UsageError:\s*([^\r\n]+)/i); + if (usageSummary?.[1]) { + return usageSummary[1].trim(); + } + + const firstMeaningfulLine = cleanError + .split(/\r?\n/) + .map((line) => line.trim()) + .find((line) => line && !line.startsWith('│') && !line.startsWith('┌') && !line.startsWith('└')); + + return truncateContent(firstMeaningfulLine || cleanError, 180); +} + function mapCodexLensSemanticMatches(data: any[], scope: SearchScope, maxContentLength: number): SemanticMatch[] { return filterResultsToTargetFile(data.map((item: any) => { const rawScore = item.score || 0; const similarityScore = rawScore > 0 ? 1 / (1 + rawScore) : 1; return { file: item.path || item.file, + line: typeof item.line === 'number' ? item.line : undefined, + column: typeof item.column === 'number' ? item.column : undefined, score: similarityScore, content: truncateContent(item.content || item.excerpt, maxContentLength), symbol: item.symbol || null, @@ -617,12 +771,12 @@ function collectBackendError( backendResult: PromiseSettledResult, ): void { if (backendResult.status === 'rejected') { - errors.push(`${backendName}: ${String(backendResult.reason)}`); + errors.push(`${backendName}: ${summarizeBackendError(String(backendResult.reason))}`); return; } if (!backendResult.value.success) { - errors.push(`${backendName}: ${backendResult.value.error || 'unknown error'}`); + errors.push(`${backendName}: ${summarizeBackendError(backendResult.value.error)}`); } } @@ -635,6 +789,88 @@ function mergeWarnings(...warnings: Array): string | undefin return merged.length > 0 ? merged.join(' | ') : undefined; } +function mergeNotes(...notes: Array): string | undefined { + const merged = [...new Set( + notes + .filter((note): note is string => typeof note === 'string' && note.trim().length > 0) + .map((note) => note.trim()) + )]; + return merged.length > 0 ? merged.join(' | ') : undefined; +} + +function mergeSuggestions(...groups: Array): SearchSuggestion[] | undefined { + const merged = new Map(); + for (const group of groups) { + for (const suggestion of group ?? []) { + if (!merged.has(suggestion.command)) { + merged.set(suggestion.command, suggestion); + } + } + } + + return merged.size > 0 ? [...merged.values()] : undefined; +} + +function formatSmartSearchCommand(action: string, pathValue: string, extraParams: Record = {}): string { + const normalizedPath = pathValue.replace(/\\/g, '/'); + const args = [`action=${JSON.stringify(action)}`, `path=${JSON.stringify(normalizedPath)}`]; + + for (const [key, value] of Object.entries(extraParams)) { + if (value === undefined) { + continue; + } + args.push(`${key}=${JSON.stringify(value)}`); + } + + return `smart_search(${args.join(', ')})`; +} + +function isAutoEmbedMissingEnabled(config: CodexLensConfig | null | undefined): boolean { + return config?.embedding_auto_embed_missing !== false; +} + +function buildIndexSuggestions(indexStatus: IndexStatus, scope: SearchScope): SearchSuggestion[] | undefined { + const suggestions: SearchSuggestion[] = []; + + if (!indexStatus.indexed) { + suggestions.push({ + title: 'Initialize index', + command: formatSmartSearchCommand('init', scope.workingDirectory), + reason: 'No CodexLens index exists for this path yet.', + }); + suggestions.push({ + title: 'Check index status', + command: formatSmartSearchCommand('status', scope.workingDirectory), + reason: 'Verify whether the target path is mapped to the expected CodexLens project root.', + }); + return suggestions; + } + + if (!indexStatus.has_embeddings) { + suggestions.push({ + title: 'Generate embeddings', + command: formatSmartSearchCommand('embed', scope.workingDirectory), + reason: 'The index exists, but semantic/vector retrieval is unavailable until embeddings are generated.', + }); + } else if ((indexStatus.embeddings_coverage_percent ?? 0) < 50) { + suggestions.push({ + title: 'Rebuild embeddings', + command: formatSmartSearchCommand('embed', scope.workingDirectory, { force: true }), + reason: `Embedding coverage is only ${(indexStatus.embeddings_coverage_percent ?? 0).toFixed(1)}%, so semantic search quality is degraded.`, + }); + } + + if (indexStatus.warning?.includes('Failed to parse index status')) { + suggestions.push({ + title: 'Re-check status', + command: formatSmartSearchCommand('status', scope.workingDirectory), + reason: 'The index health payload could not be parsed cleanly.', + }); + } + + return suggestions.length > 0 ? suggestions : undefined; +} + /** * Check if CodexLens index exists for current directory * @param path - Directory path to check @@ -661,12 +897,15 @@ async function checkIndexStatus(path: string = '.'): Promise { ...settingsConfig, config_file: configData.config_file, index_dir: configData.index_dir, - embedding_backend: configData.embedding_backend, - embedding_model: configData.embedding_model, - reranker_enabled: configData.reranker_enabled, - reranker_backend: configData.reranker_backend, - reranker_model: configData.reranker_model, - reranker_top_k: configData.reranker_top_k, + embedding_backend: normalizeEmbeddingBackend(configData.embedding_backend) ?? settingsConfig.embedding_backend, + embedding_model: typeof configData.embedding_model === 'string' ? configData.embedding_model : settingsConfig.embedding_model, + embedding_auto_embed_missing: typeof configData.embedding_auto_embed_missing === 'boolean' + ? configData.embedding_auto_embed_missing + : settingsConfig.embedding_auto_embed_missing, + reranker_enabled: typeof configData.reranker_enabled === 'boolean' ? configData.reranker_enabled : settingsConfig.reranker_enabled, + reranker_backend: typeof configData.reranker_backend === 'string' ? configData.reranker_backend : settingsConfig.reranker_backend, + reranker_model: typeof configData.reranker_model === 'string' ? configData.reranker_model : settingsConfig.reranker_model, + reranker_top_k: typeof configData.reranker_top_k === 'number' ? configData.reranker_top_k : settingsConfig.reranker_top_k, }; } catch { // Config parse failed, continue without it @@ -874,8 +1113,9 @@ function buildRipgrepCommand(params: { regex?: boolean; caseSensitive?: boolean; tokenize?: boolean; -}): { command: string; args: string[]; tokens: string[] } { +}): { command: string; args: string[]; tokens: string[]; warning?: string; literalFallback: boolean; regex: boolean } { const { query, paths = ['.'], contextLines = 0, maxResults = 10, includeHidden = false, regex = false, caseSensitive = true, tokenize = true } = params; + const queryMode = resolveRipgrepQueryMode(query, regex, tokenize); const args = [ '-n', @@ -905,14 +1145,13 @@ function buildRipgrepCommand(params: { args.push('--hidden'); } - // Tokenize query for multi-word OR matching - const tokens = tokenize ? tokenizeQuery(query) : [query]; + const { tokens } = queryMode; if (tokens.length > 1) { // Multi-token: use multiple -e patterns (OR matching) // Each token is escaped for regex safety unless regex mode is enabled for (const token of tokens) { - if (regex) { + if (queryMode.regex) { args.push('-e', token); } else { // Escape regex special chars for literal matching @@ -922,7 +1161,7 @@ function buildRipgrepCommand(params: { } } else { // Single token or no tokenization: use original behavior - if (regex) { + if (queryMode.regex) { args.push('-e', query); } else { args.push('-F', query); @@ -931,7 +1170,146 @@ function buildRipgrepCommand(params: { args.push(...paths); - return { command: 'rg', args, tokens }; + return { + command: 'rg', + args, + tokens, + warning: queryMode.warning, + literalFallback: queryMode.literalFallback, + regex: queryMode.regex, + }; +} + +interface RipgrepChunkAccumulator { + file: string; + chunkLines: ChunkLine[]; + firstMatchLine?: number; + firstMatchColumn?: number; + lastLine?: number; + matchCount: number; +} + +function finalizeRipgrepChunk(accumulator: RipgrepChunkAccumulator | undefined): ExactMatch | null { + if (!accumulator || accumulator.matchCount === 0 || accumulator.chunkLines.length === 0) { + return null; + } + + const firstLine = accumulator.chunkLines[0]?.line ?? accumulator.firstMatchLine ?? 1; + const lastLine = accumulator.chunkLines[accumulator.chunkLines.length - 1]?.line ?? accumulator.firstMatchLine ?? firstLine; + + return { + file: accumulator.file, + line: accumulator.firstMatchLine ?? firstLine, + endLine: lastLine, + column: accumulator.firstMatchColumn ?? 1, + content: accumulator.chunkLines.map((line) => line.text).join('\n').trim(), + chunkLines: [...accumulator.chunkLines], + }; +} + +function parseRipgrepJsonResults(stdout: string, effectiveLimit: number): { results: ExactMatch[]; resultLimitReached: boolean } { + const allResults: ExactMatch[] = []; + const activeChunks = new Map(); + const lines = stdout.split('\n').filter((line) => line.trim()); + let resultLimitReached = false; + + const flushChunk = (file: string) => { + const finalized = finalizeRipgrepChunk(activeChunks.get(file)); + activeChunks.delete(file); + if (!finalized) { + return; + } + allResults.push(finalized); + if (allResults.length >= effectiveLimit) { + resultLimitReached = true; + } + }; + + for (const line of lines) { + if (resultLimitReached) { + break; + } + + try { + const item = JSON.parse(line); + if (item.type !== 'match' && item.type !== 'context' && item.type !== 'end') { + continue; + } + + const file = item.data?.path?.text as string | undefined; + if (!file) { + continue; + } + + if (item.type === 'end') { + flushChunk(file); + continue; + } + + const lineNumber = typeof item.data?.line_number === 'number' ? item.data.line_number : undefined; + const rawText = typeof item.data?.lines?.text === 'string' + ? item.data.lines.text.replace(/\r?\n$/, '') + : ''; + + if (lineNumber === undefined) { + continue; + } + + let current = activeChunks.get(file); + const isContiguous = current && current.lastLine !== undefined && lineNumber <= current.lastLine + 1; + if (!current || !isContiguous) { + if (current) { + flushChunk(file); + if (resultLimitReached) { + break; + } + } + current = { + file, + chunkLines: [], + matchCount: 0, + }; + activeChunks.set(file, current); + } + + const previousLine = current.chunkLines[current.chunkLines.length - 1]; + const duplicateLine = previousLine && previousLine.line === lineNumber && previousLine.text === rawText; + if (!duplicateLine) { + current.chunkLines.push({ + line: lineNumber, + text: rawText, + isMatch: item.type === 'match', + }); + } else if (item.type === 'match') { + previousLine.isMatch = true; + } + + if (item.type === 'match') { + current.matchCount += 1; + if (current.firstMatchLine === undefined) { + current.firstMatchLine = lineNumber; + current.firstMatchColumn = + item.data.submatches && item.data.submatches[0] + ? item.data.submatches[0].start + 1 + : 1; + } + } + current.lastLine = lineNumber; + } catch { + continue; + } + } + + if (!resultLimitReached) { + for (const file of [...activeChunks.keys()]) { + flushChunk(file); + if (resultLimitReached) { + break; + } + } + } + + return { results: allResults.slice(0, effectiveLimit), resultLimitReached }; } function normalizeEmbeddingBackend(backend?: string): string | undefined { @@ -1041,19 +1419,20 @@ function extractEmbedJsonLine(stdout: string): string | undefined { return [...lines].reverse().find((line) => line.startsWith('{') && line.endsWith('}')); } -async function executeEmbeddingsViaPython(params: { +function buildEmbeddingPythonCode(params: { projectPath: string; backend?: string; model?: string; force: boolean; maxWorkers?: number; endpoints?: RotationEndpointConfig[]; -}): Promise<{ success: boolean; error?: string; progressMessages?: string[] }> { +}): string { const { projectPath, backend, model, force, maxWorkers, endpoints = [] } = params; - const pythonCode = ` + return ` import json import sys from pathlib import Path +from codexlens.storage.path_mapper import PathMapper from codexlens.storage.registry import RegistryStore from codexlens.cli.embedding_manager import generate_dense_embeddings_centralized @@ -1071,11 +1450,23 @@ registry = RegistryStore() registry.initialize() try: project = registry.get_project(target_path) - if project is None: + index_root = None + if project is not None: + index_root = Path(project.index_root) + else: + mapper = PathMapper() + index_db = mapper.source_to_index_db(target_path) + if index_db.exists(): + index_root = index_db.parent + else: + nearest = registry.find_nearest_index(target_path) + if nearest is not None: + index_root = Path(nearest.index_path).parent + + if index_root is None: print(json.dumps({"success": False, "error": f"No index found for: {target_path}"}), flush=True) sys.exit(1) - index_root = Path(project.index_root) result = generate_dense_embeddings_centralized( index_root, embedding_backend=backend, @@ -1099,6 +1490,118 @@ finally: .replace('__FORCE__', force ? 'True' : 'False') .replace('__MAX_WORKERS__', typeof maxWorkers === 'number' ? String(Math.max(1, Math.floor(maxWorkers))) : 'None') .replace('__ENDPOINTS_JSON__', JSON.stringify(endpoints).replace(/\\/g, '\\\\').replace(/'''/g, "\\'\\'\\'")); +} + +function spawnBackgroundEmbeddingsViaPython(params: { + projectPath: string; + backend?: string; + model?: string; + force: boolean; + maxWorkers?: number; + endpoints?: RotationEndpointConfig[]; +}): { success: boolean; error?: string } { + const { projectPath, backend, model } = params; + try { + const child = spawn(getVenvPythonPath(), ['-c', buildEmbeddingPythonCode(params)], { + cwd: projectPath, + shell: false, + detached: true, + stdio: 'ignore', + windowsHide: true, + env: { ...process.env, PYTHONIOENCODING: 'utf-8' }, + }); + + autoEmbedJobs.set(projectPath, { + startedAt: Date.now(), + backend, + model, + }); + + const cleanup = () => { + autoEmbedJobs.delete(projectPath); + }; + child.on('error', cleanup); + child.on('close', cleanup); + child.unref(); + return { success: true }; + } catch (error) { + return { + success: false, + error: error instanceof Error ? error.message : String(error), + }; + } +} + +async function maybeStartBackgroundAutoEmbed( + scope: SearchScope, + indexStatus: IndexStatus, +): Promise<{ note?: string; warning?: string }> { + if (!indexStatus.indexed || indexStatus.has_embeddings) { + return {}; + } + + if (!isAutoEmbedMissingEnabled(indexStatus.config)) { + return { + note: 'Automatic embedding warmup is disabled by CODEXLENS_AUTO_EMBED_MISSING=false.', + }; + } + + if (autoEmbedJobs.has(scope.workingDirectory)) { + return { + note: 'Background embedding build is already running for this path.', + }; + } + + const backend = normalizeEmbeddingBackend(indexStatus.config?.embedding_backend) ?? 'fastembed'; + const model = indexStatus.config?.embedding_model?.trim() || undefined; + const semanticStatus = await checkSemanticStatus(); + if (!semanticStatus.available) { + return { + warning: 'Automatic embedding warmup skipped because semantic dependencies are not ready.', + }; + } + + if (backend === 'litellm' && !semanticStatus.litellmAvailable) { + return { + warning: 'Automatic embedding warmup skipped because the LiteLLM embedder is not ready.', + }; + } + + const endpoints = resolveEmbeddingEndpoints(backend); + const configuredApiMaxWorkers = indexStatus.config?.api_max_workers; + const effectiveApiMaxWorkers = typeof configuredApiMaxWorkers === 'number' + ? Math.max(1, Math.floor(configuredApiMaxWorkers)) + : resolveApiWorkerCount(undefined, backend, endpoints); + const spawned = spawnBackgroundEmbeddingsViaPython({ + projectPath: scope.workingDirectory, + backend, + model, + force: false, + maxWorkers: effectiveApiMaxWorkers, + endpoints, + }); + + if (!spawned.success) { + return { + warning: `Automatic embedding warmup could not start: ${spawned.error}`, + }; + } + + return { + note: 'Background embedding build started for this path. Re-run semantic search shortly for vector results.', + }; +} + +async function executeEmbeddingsViaPython(params: { + projectPath: string; + backend?: string; + model?: string; + force: boolean; + maxWorkers?: number; + endpoints?: RotationEndpointConfig[]; +}): Promise<{ success: boolean; error?: string; progressMessages?: string[] }> { + const { projectPath } = params; + const pythonCode = buildEmbeddingPythonCode(params); return await new Promise((resolve) => { const child = spawn(getVenvPythonPath(), ['-c', pythonCode], { @@ -1343,6 +1846,7 @@ async function executeStatusAction(params: Params): Promise { // Embedding backend info const embeddingType = cfg.embedding_backend === 'litellm' ? 'API' : 'Local'; statusParts.push(`Embedding: ${embeddingType} (${cfg.embedding_model || 'default'})`); + statusParts.push(`Auto Embed Missing: ${isAutoEmbedMissingEnabled(cfg) ? 'on' : 'off'}`); if (typeof cfg.api_max_workers === 'number') { statusParts.push(`API Workers: ${cfg.api_max_workers}`); } @@ -1366,6 +1870,12 @@ async function executeStatusAction(params: Params): Promise { success: true, status: indexStatus, message: indexStatus.warning || statusParts.join(' | '), + metadata: { + action: 'status', + path: scope.workingDirectory, + warning: indexStatus.warning, + suggestions: buildIndexSuggestions(indexStatus, scope), + }, }; } @@ -1501,7 +2011,7 @@ async function executeWatchAction(params: Params): Promise { * Runs both exact (FTS) and ripgrep searches in parallel, merges and ranks results */ async function executeFuzzyMode(params: Params): Promise { - const { query, path = '.', maxResults = 5, extraFilesCount = 10, codeOnly = true, withDoc = false, excludeExtensions } = params; + const { query, path = '.', maxResults = 5, extraFilesCount = 10, codeOnly = true, withDoc = false, excludeExtensions, regex = true, tokenize = true } = params; // withDoc overrides codeOnly const effectiveCodeOnly = withDoc ? false : codeOnly; @@ -1513,14 +2023,40 @@ async function executeFuzzyMode(params: Params): Promise { } const timer = createTimer(); + const ftsWasBroken = codexLensFtsBackendBroken; + const ripgrepQueryMode = resolveRipgrepQueryMode(query, regex, tokenize); + const fuzzyWarnings: string[] = []; + + let skipExactReason: string | undefined; + if (ripgrepQueryMode.literalFallback) { + skipExactReason = 'Skipped CodexLens FTS backend for a literal code-pattern query; using ripgrep literal matching.'; + } else if (codexLensFtsBackendBroken) { + skipExactReason = 'CodexLens FTS backend disabled for this process due to CLI compatibility errors.'; + } // Run both searches in parallel const [ftsResult, ripgrepResult] = await Promise.allSettled([ - executeCodexLensExactMode(params), + skipExactReason + ? Promise.resolve({ success: false, error: skipExactReason }) + : executeCodexLensExactMode(params), executeRipgrepMode(params), ]); timer.mark('parallel_search'); + if (!skipExactReason && !ftsWasBroken && codexLensFtsBackendBroken) { + fuzzyWarnings.push('CodexLens FTS backend is incompatible with the current CLI runtime. Falling back to ripgrep results.'); + } + if (skipExactReason) { + fuzzyWarnings.push(skipExactReason); + } + if (ripgrepResult.status === 'fulfilled' && ripgrepResult.value.metadata?.warning) { + fuzzyWarnings.push(String(ripgrepResult.value.metadata.warning)); + } + const mergedSuggestions = mergeSuggestions( + ftsResult.status === 'fulfilled' ? ftsResult.value.metadata?.suggestions : undefined, + ripgrepResult.status === 'fulfilled' ? ripgrepResult.value.metadata?.suggestions : undefined, + ); + // Collect results from both sources const resultsMap = new Map(); @@ -1559,8 +2095,10 @@ async function executeFuzzyMode(params: Params): Promise { const normalizedResults = filteredFusedResults.map((item: any) => ({ file: item.file || item.path, line: item.line || 0, + endLine: item.endLine || item.line || 0, column: item.column || 0, content: item.content || '', + chunkLines: Array.isArray(item.chunkLines) ? item.chunkLines : undefined, score: item.fusion_score || 0, matchCount: item.matchCount, matchScore: item.matchScore, @@ -1572,6 +2110,7 @@ async function executeFuzzyMode(params: Params): Promise { // Log timing timer.log(); const timings = timer.getTimings(); + const usingExactResults = resultsMap.has('exact'); return { success: true, @@ -1579,10 +2118,14 @@ async function executeFuzzyMode(params: Params): Promise { extra_files: extra_files.length > 0 ? extra_files : undefined, metadata: { mode: 'fuzzy', - backend: 'fts+ripgrep', + backend: usingExactResults ? 'fts+ripgrep' : 'ripgrep', count: results.length, query, - note: `Fuzzy search using RRF fusion of FTS and ripgrep (weights: exact=${fusionWeights.exact}, ripgrep=${fusionWeights.ripgrep})`, + note: usingExactResults + ? `Fuzzy search using RRF fusion of FTS and ripgrep (weights: exact=${fusionWeights.exact}, ripgrep=${fusionWeights.ripgrep})` + : 'Fuzzy search resolved using ripgrep only.', + warning: mergeWarnings(...fuzzyWarnings), + suggestions: mergedSuggestions, timing: TIMING_ENABLED ? timings : undefined, }, }; @@ -1636,20 +2179,10 @@ async function executeAutoMode(params: Params): Promise { } // Add classification metadata - if (result.metadata) { - result.metadata.classified_as = classification.mode; - result.metadata.confidence = classification.confidence; - result.metadata.reasoning = classification.reasoning; - result.metadata.embeddings_coverage_percent = indexStatus.embeddings_coverage_percent; - result.metadata.index_status = indexStatus.indexed - ? (indexStatus.has_embeddings ? 'indexed' : 'partial') - : 'not_indexed'; - - // Add warning if needed - if (indexStatus.warning) { - result.metadata.warning = indexStatus.warning; - } - } + result.metadata = enrichMetadataWithIndexStatus(result.metadata, indexStatus, scope); + result.metadata.classified_as = classification.mode; + result.metadata.confidence = classification.confidence; + result.metadata.reasoning = classification.reasoning; return result; } @@ -1693,9 +2226,10 @@ async function executeRipgrepMode(params: Params): Promise { const result = await executeCodexLens(args, { cwd: scope.workingDirectory }); if (!result.success) { + noteCodexLensFtsCompatibility(result.error); return { success: false, - error: result.error, + error: summarizeBackendError(result.error), metadata: { mode: 'ripgrep', backend: 'codexlens-fallback', @@ -1740,7 +2274,7 @@ async function executeRipgrepMode(params: Params): Promise { } // Use ripgrep - request more results to support split - const { command, args, tokens } = buildRipgrepCommand({ + const { command, args, tokens, warning: queryModeWarning } = buildRipgrepCommand({ query, paths: scope.searchPaths, contextLines, @@ -1770,37 +2304,11 @@ async function executeRipgrepMode(params: Params): Promise { }); child.on('close', (code) => { - const allResults: ExactMatch[] = []; - const lines = stdout.split('\n').filter((line) => line.trim()); // Limit total results to prevent memory overflow (--max-count only limits per-file) const effectiveLimit = totalToFetch > 0 ? totalToFetch : 500; - - for (const line of lines) { - // Stop collecting if we've reached the limit - if (allResults.length >= effectiveLimit) { - resultLimitReached = true; - break; - } - - try { - const item = JSON.parse(line); - - if (item.type === 'match') { - const match: ExactMatch = { - file: item.data.path.text, - line: item.data.line_number, - column: - item.data.submatches && item.data.submatches[0] - ? item.data.submatches[0].start + 1 - : 1, - content: item.data.lines.text.trim(), - }; - allResults.push(match); - } - } catch { - continue; - } - } + const parsedResults = parseRipgrepJsonResults(stdout, effectiveLimit); + const allResults = parsedResults.results; + resultLimitReached = parsedResults.resultLimitReached; // Handle Windows device file errors gracefully (os error 1) // If we have results despite the error, return them as partial success @@ -1819,6 +2327,9 @@ async function executeRipgrepMode(params: Params): Promise { // Build warning message for various conditions const warnings: string[] = []; + if (queryModeWarning) { + warnings.push(queryModeWarning); + } if (resultLimitReached) { warnings.push(`Result limit reached (${effectiveLimit}). Use a more specific query or increase limit.`); } @@ -1918,15 +2429,17 @@ async function executeCodexLensExactMode(params: Params): Promise const result = await executeCodexLens(args, { cwd: scope.workingDirectory }); if (!result.success) { + noteCodexLensFtsCompatibility(result.error); return { success: false, - error: result.error, + error: summarizeBackendError(result.error), metadata: { mode: 'exact', backend: 'codexlens', count: 0, query, warning: mergeWarnings(indexStatus.warning, result.warning), + suggestions: buildIndexSuggestions(indexStatus, scope), }, }; } @@ -1993,6 +2506,7 @@ async function executeCodexLensExactMode(params: Params): Promise warning: mergeWarnings(indexStatus.warning, fuzzyResult.warning), note: 'No exact matches found, showing fuzzy results', fallback: 'fuzzy', + suggestions: buildIndexSuggestions(indexStatus, scope), }, }; } @@ -2012,6 +2526,7 @@ async function executeCodexLensExactMode(params: Params): Promise count: results.length, query, warning: mergeWarnings(indexStatus.warning, result.warning), + suggestions: buildIndexSuggestions(indexStatus, scope), }, }; } @@ -2080,6 +2595,7 @@ async function executeHybridMode(params: Params): Promise { count: 0, query, warning: mergeWarnings(indexStatus.warning, result.warning), + suggestions: buildIndexSuggestions(indexStatus, scope), }, }; } @@ -2126,6 +2642,7 @@ async function executeHybridMode(params: Params): Promise { count: 0, query, warning: mergeWarnings(indexStatus.warning, result.warning, 'Failed to parse JSON output'), + suggestions: buildIndexSuggestions(indexStatus, scope), }, }; } @@ -2158,6 +2675,7 @@ async function executeHybridMode(params: Params): Promise { query, note, warning: mergeWarnings(indexStatus.warning, result.warning), + suggestions: buildIndexSuggestions(indexStatus, scope), suggested_weights: getRRFWeights(query), timing: TIMING_ENABLED ? timings : undefined, }, @@ -2441,6 +2959,83 @@ function filterDominantBaselineScores( * Reference: codex-lens/src/codexlens/search/ranking.py * Formula: score(d) = Σ weight_source / (k + rank_source(d)) */ +function normalizeFusionSnippet(value: unknown): string | undefined { + if (typeof value !== 'string') { + return undefined; + } + + const normalized = value.replace(/\s+/g, ' ').trim(); + return normalized ? normalized.slice(0, 240) : undefined; +} + +function buildFusionIdentity(result: any): string | null { + const path = typeof result?.file === 'string' + ? result.file + : typeof result?.path === 'string' + ? result.path + : undefined; + + if (!path) { + return null; + } + + const line = typeof result?.line === 'number' && Number.isFinite(result.line) + ? result.line + : undefined; + const endLine = typeof result?.endLine === 'number' && Number.isFinite(result.endLine) + ? result.endLine + : line; + const column = typeof result?.column === 'number' && Number.isFinite(result.column) + ? result.column + : undefined; + + if (line !== undefined) { + return `${path}#L${line}-${endLine ?? line}:C${column ?? 0}`; + } + + const symbol = typeof result?.symbol === 'string' && result.symbol.trim() + ? result.symbol.trim() + : undefined; + const snippet = normalizeFusionSnippet(result?.content); + + if (symbol && snippet) { + return `${path}::${symbol}::${snippet}`; + } + if (snippet) { + return `${path}::${snippet}`; + } + if (symbol) { + return `${path}::${symbol}`; + } + + return path; +} + +function scoreFusionRepresentative(result: any): number { + let score = 0; + + if (typeof result?.line === 'number' && Number.isFinite(result.line)) { + score += 1000; + } + if (typeof result?.endLine === 'number' && Number.isFinite(result.endLine)) { + score += 250; + } + if (typeof result?.column === 'number' && Number.isFinite(result.column)) { + score += 50; + } + if (Array.isArray(result?.chunkLines) && result.chunkLines.length > 0) { + score += 500 + result.chunkLines.length; + } + if (typeof result?.symbol === 'string' && result.symbol.trim()) { + score += 50; + } + if (typeof result?.content === 'string') { + score += Math.min(result.content.length, 200); + } + + return score; +} + function applyRRFFusion( resultsMap: Map, weightsOrQuery: Record | string, @@ -2448,23 +3043,28 @@ function applyRRFFusion( k: number = 60, ): any[] { const weights = typeof weightsOrQuery === 'string' ? getRRFWeights(weightsOrQuery) : weightsOrQuery; - const pathScores = new Map(); + const fusedScores = new Map(); resultsMap.forEach((results, source) => { const weight = weights[source] || 0; if (weight === 0 || !results) return; results.forEach((result, rank) => { - const path = result.file || result.path; - if (!path) return; + const identity = buildFusionIdentity(result); + if (!identity) return; const rrfContribution = weight / (k + rank + 1); + const representativeScore = scoreFusionRepresentative(result); - if (!pathScores.has(path)) { - pathScores.set(path, { score: 0, result, sources: [] }); + if (!fusedScores.has(identity)) { + fusedScores.set(identity, { score: 0, result, sources: [], representativeScore }); } - const entry = pathScores.get(path)!; + const entry = fusedScores.get(identity)!; entry.score += rrfContribution; + if (representativeScore > entry.representativeScore) { + entry.result = result; + entry.representativeScore = representativeScore; + } if (!entry.sources.includes(source)) { entry.sources.push(source); } @@ -2472,7 +3072,7 @@ function applyRRFFusion( }); // Sort by fusion score descending - return Array.from(pathScores.values()) + return Array.from(fusedScores.values()) .sort((a, b) => b.score - a.score) .slice(0, limit) .map(item => ({ @@ -2674,9 +3274,9 @@ Recommended MCP flow: use **action=\"search\"** for lookups, **action=\"init\"** }, output_mode: { type: 'string', - enum: ['full', 'files_only', 'count'], - description: 'Output format: full (default), files_only (paths only), count (per-file counts)', - default: 'full', + enum: [...SEARCH_OUTPUT_MODES], + description: 'Output format: ace (default, ACE-style grouped code sections + rendered text), full (raw matches), files_only (paths only), count (per-file counts)', + default: 'ace', }, path: { type: 'string', @@ -3036,12 +3636,127 @@ function applyPagination( }; } +function formatChunkRange(section: AceLikeSection): string { + if (section.lines && section.lines.length > 0) { + const start = section.lines[0]?.line; + const end = section.lines[section.lines.length - 1]?.line; + if (typeof start === 'number' && typeof end === 'number' && end > start) { + return `${start}-${end}`; + } + if (typeof start === 'number') { + return String(start); + } + } + if (section.line && section.endLine && section.endLine > section.line) { + return `${section.line}-${section.endLine}`; + } + if (section.line) { + return String(section.line); + } + return '?'; +} + +function renderAceSnippet(section: AceLikeSection): string[] { + if (section.lines && section.lines.length > 0) { + return section.lines.map((line) => { + const marker = line.isMatch ? '>' : ' '; + return `${marker} ${String(line.line).padStart(4, ' ')} | ${line.text}`; + }); + } + + return section.snippet.split(/\r?\n/).map((line) => ` ${line}`); +} + +function formatAceLikeOutput( + results: ExactMatch[] | SemanticMatch[] | GraphMatch[] | FileMatch[] | unknown[], +): AceLikeOutput { + const sections: AceLikeSection[] = []; + + for (const result of results) { + const candidate = result as Record; + const path = typeof candidate.file === 'string' + ? candidate.file + : typeof candidate.path === 'string' + ? candidate.path + : undefined; + + if (!path) { + continue; + } + + const line = typeof candidate.line === 'number' && candidate.line > 0 ? candidate.line : undefined; + const column = typeof candidate.column === 'number' && candidate.column > 0 ? candidate.column : undefined; + const score = typeof candidate.score === 'number' ? candidate.score : undefined; + const symbol = typeof candidate.symbol === 'string' ? candidate.symbol : null; + const rawSnippet = typeof candidate.content === 'string' + ? candidate.content + : typeof candidate.name === 'string' + ? candidate.name + : typeof candidate.type === 'string' + ? `[${candidate.type}]` + : ''; + + sections.push({ + path, + line, + endLine: typeof candidate.endLine === 'number' && candidate.endLine >= (line ?? 0) ? candidate.endLine : line, + column, + score, + symbol, + snippet: rawSnippet || '[no snippet available]', + lines: Array.isArray(candidate.chunkLines) ? candidate.chunkLines as ChunkLine[] : undefined, + }); + } + + const groupsMap = new Map(); + for (const section of sections) { + if (!groupsMap.has(section.path)) { + groupsMap.set(section.path, { + path: section.path, + sections: [], + total_matches: 0, + }); + } + const group = groupsMap.get(section.path)!; + group.sections.push(section); + group.total_matches += 1; + } + const groups = [...groupsMap.values()]; + + const textParts = ['The following code sections were retrieved:']; + for (const group of groups) { + textParts.push(''); + textParts.push(`Path: ${group.path}`); + group.sections.forEach((section, index) => { + const chunkLabel = group.sections.length > 1 ? `Chunk ${index + 1}` : 'Chunk'; + textParts.push(`${chunkLabel}: lines ${formatChunkRange(section)}${section.score !== undefined ? ` | score=${section.score.toFixed(4)}` : ''}`); + if (section.symbol) { + textParts.push(`Symbol: ${section.symbol}`); + } + for (const snippetLine of renderAceSnippet(section)) { + textParts.push(snippetLine); + } + if (index < group.sections.length - 1) { + textParts.push(''); + } + }); + } + + return { + format: 'ace', + text: textParts.join('\n'), + groups, + sections, + total: sections.length, + }; +} + /** * Transform results based on output_mode */ function transformOutput( results: ExactMatch[] | SemanticMatch[] | GraphMatch[] | unknown[], - outputMode: 'full' | 'files_only' | 'count' + outputMode: SearchOutputMode ): unknown { if (!Array.isArray(results)) { return results; @@ -3067,12 +3782,29 @@ function transformOutput( total: results.length, }; } + case 'ace': + return formatAceLikeOutput(results); case 'full': default: return results; } } +function enrichMetadataWithIndexStatus( + metadata: SearchMetadata | undefined, + indexStatus: IndexStatus, + scope: SearchScope, +): SearchMetadata { + const nextMetadata: SearchMetadata = { ...(metadata ?? {}) }; + nextMetadata.embeddings_coverage_percent = indexStatus.embeddings_coverage_percent; + nextMetadata.index_status = indexStatus.indexed + ? (indexStatus.has_embeddings ? 'indexed' : 'partial') + : 'not_indexed'; + nextMetadata.warning = mergeWarnings(nextMetadata.warning, indexStatus.warning); + nextMetadata.suggestions = mergeSuggestions(nextMetadata.suggestions, buildIndexSuggestions(indexStatus, scope)); + return nextMetadata; +} + // Handler function export async function handler(params: Record): Promise> { const parsed = ParamsSchema.safeParse(params); @@ -3158,11 +3890,21 @@ export async function handler(params: Record): Promise): Promise 0) { + advisoryLines.push('', 'Suggestions:'); + for (const suggestion of result.metadata.suggestions) { + advisoryLines.push(`- ${suggestion.title}: ${suggestion.command}`); + advisoryLines.push(` ${suggestion.reason}`); + } + } + const aceResults = result.results as AceLikeOutput; + if (advisoryLines.length > 0) { + aceResults.text += `\n${advisoryLines.join('\n')}`; + } + } + } } // Add deprecation warning if applicable @@ -3199,10 +3971,14 @@ export async function handler(params: Record): Promise { + const timeout = setTimeout(() => { + reject(new Error('Server start timeout')); + }, 5000); + + const onData = (data) => { + const message = data.toString(); + if (message.includes('started')) { + clearTimeout(timeout); + child.stderr.off('data', onData); + child.off('exit', onExit); + resolve(); + } + }; + + const onExit = (code, signal) => { + clearTimeout(timeout); + child.stderr.off('data', onData); + reject(new Error(`Server exited before start (code=${code}, signal=${signal})`)); + }; + + child.stderr.on('data', onData); + child.once('exit', onExit); + }); +} describe('MCP Server', () => { let serverProcess; before(async () => { // Start the MCP server - const serverPath = join(__dirname, '../bin/ccw-mcp.js'); serverProcess = spawn('node', [serverPath], { stdio: ['pipe', 'pipe', 'pipe'] }); // Wait for server to start - await new Promise((resolve) => { - serverProcess.stderr.once('data', (data) => { - const message = data.toString(); - if (message.includes('started')) { - resolve(); - } - }); - }); + await waitForServerStart(serverProcess); }); after(() => { @@ -157,4 +177,43 @@ describe('MCP Server', () => { // Error could be "not enabled" (filtered by default tools) or "not found" (all tools enabled) assert(response.result.content[0].text.includes('not enabled') || response.result.content[0].text.includes('not found')); }); + it('should exit when stdout disconnects during a request', async () => { + const disconnectedProcess = spawn('node', [serverPath], { + stdio: ['pipe', 'pipe', 'pipe'] + }); + + try { + await waitForServerStart(disconnectedProcess); + + const exitPromise = new Promise((resolve, reject) => { + const timeout = setTimeout(() => { + disconnectedProcess.kill('SIGKILL'); + reject(new Error('Server did not exit after stdout disconnect')); + }, 1500); + + disconnectedProcess.once('exit', (code, signal) => { + clearTimeout(timeout); + resolve({ code, signal }); + }); + }); + + // Simulate the MCP client disappearing before the server sends its response. + disconnectedProcess.stdout.destroy(); + disconnectedProcess.stdin.write(JSON.stringify({ + jsonrpc: '2.0', + id: 4, + method: 'tools/list', + params: {} + }) + '\n'); + + const exitResult = await exitPromise; + + assert.equal(exitResult.code, 0); + assert.equal(exitResult.signal, null); + } finally { + if (disconnectedProcess.exitCode === null) { + disconnectedProcess.kill('SIGKILL'); + } + } + }); }); diff --git a/ccw/tests/smart-search-mcp-usage.test.js b/ccw/tests/smart-search-mcp-usage.test.js index df071c3a..2097d5e7 100644 --- a/ccw/tests/smart-search-mcp-usage.test.js +++ b/ccw/tests/smart-search-mcp-usage.test.js @@ -45,6 +45,19 @@ describe('Smart Search MCP usage defaults and path handling', async () => { assert.match(props.apiMaxWorkers.description, /endpoint pool/i); assert.match(schema.description, /apiMaxWorkers=8/i); assert.match(props.path.description, /single file path/i); + assert.ok(props.output_mode.enum.includes('ace')); + assert.match(props.output_mode.description, /ACE-style/i); + assert.equal(props.output_mode.default, 'ace'); + }); + + it('defaults auto embedding warmup to enabled unless explicitly disabled', () => { + if (!smartSearchModule) return; + + const { __testables } = smartSearchModule; + assert.equal(__testables.isAutoEmbedMissingEnabled(undefined), true); + assert.equal(__testables.isAutoEmbedMissingEnabled({}), true); + assert.equal(__testables.isAutoEmbedMissingEnabled({ embedding_auto_embed_missing: true }), true); + assert.equal(__testables.isAutoEmbedMissingEnabled({ embedding_auto_embed_missing: false }), false); }); it('honors explicit small limit values', async () => { @@ -58,6 +71,7 @@ describe('Smart Search MCP usage defaults and path handling', async () => { action: 'search', query: 'hit', path: dir, + output_mode: 'full', limit: 1, regex: false, tokenize: false, @@ -82,6 +96,7 @@ describe('Smart Search MCP usage defaults and path handling', async () => { action: 'search', query: 'TARGET_TOKEN', path: target, + output_mode: 'full', regex: false, tokenize: false, }); @@ -112,6 +127,7 @@ describe('Smart Search MCP usage defaults and path handling', async () => { action: 'search', query: wrappedQuery, path: wrappedPath, + output_mode: 'full', regex: false, caseSensitive: false, }); @@ -121,6 +137,66 @@ describe('Smart Search MCP usage defaults and path handling', async () => { assert.ok(toolResult.result.results.length >= 1); }); + it('falls back to literal ripgrep matching for invalid regex-like code queries', async () => { + if (!smartSearchModule) return; + + const dir = createWorkspace(); + const target = join(dir, 'component.ts'); + writeFileSync(target, 'defineExpose({ handleResize });\n'); + + const toolResult = await smartSearchModule.handler({ + action: 'search', + query: 'defineExpose({ handleResize', + path: dir, + output_mode: 'full', + limit: 5, + }); + + assert.equal(toolResult.success, true, toolResult.error); + assert.equal(toolResult.result.success, true); + assert.ok(toolResult.result.results.length >= 1); + assert.match(toolResult.result.metadata.warning, /literal ripgrep matching/i); + }); + + it('renders grouped ace-style output by default with multi-line chunks', async () => { + if (!smartSearchModule) return; + + const dir = createWorkspace(); + const target = join(dir, 'ace-target.ts'); + writeFileSync(target, [ + 'const before = 1;', + 'const TARGET_TOKEN = 1;', + 'const after = 2;', + '', + 'function useToken() {', + ' return TARGET_TOKEN;', + '}', + ].join('\n')); + + const toolResult = await smartSearchModule.handler({ + action: 'search', + query: 'TARGET_TOKEN', + path: dir, + contextLines: 1, + regex: false, + tokenize: false, + }); + + assert.equal(toolResult.success, true, toolResult.error); + assert.equal(toolResult.result.success, true); + assert.equal(toolResult.result.results.format, 'ace'); + assert.equal(Array.isArray(toolResult.result.results.groups), true); + assert.equal(Array.isArray(toolResult.result.results.sections), true); + assert.equal(toolResult.result.results.groups.length, 1); + assert.equal(toolResult.result.results.groups[0].sections.length, 2); + assert.match(toolResult.result.results.text, /The following code sections were retrieved:/); + assert.match(toolResult.result.results.text, /Path: .*ace-target\.ts/); + assert.match(toolResult.result.results.text, /Chunk 1: lines 1-3/); + assert.match(toolResult.result.results.text, />\s+2 \| const TARGET_TOKEN = 1;/); + assert.match(toolResult.result.results.text, /Chunk 2: lines 5-7/); + assert.equal(toolResult.result.metadata.pagination.total >= 1, true); + }); + it('defaults embed selection to local-fast for bulk indexing', () => { if (!smartSearchModule) return; @@ -181,6 +257,50 @@ describe('Smart Search MCP usage defaults and path handling', async () => { assert.equal(smartSearchModule.__testables.hasCentralizedVectorArtifacts(dir), true); }); + it('recognizes CodexLens CLI compatibility failures and invalid regex fallback', () => { + if (!smartSearchModule) return; + + const compatibilityError = [ + 'UsageError: Got unexpected extra arguments (20 0 fts)', + 'TypeError: TyperArgument.make_metavar() takes 1 positional argument but 2 were given', + ].join('\n'); + + assert.equal( + smartSearchModule.__testables.isCodexLensCliCompatibilityError(compatibilityError), + true, + ); + + const resolution = smartSearchModule.__testables.resolveRipgrepQueryMode( + 'defineExpose({ handleResize', + true, + true, + ); + + assert.equal(resolution.regex, false); + assert.equal(resolution.literalFallback, true); + assert.match(resolution.warning, /literal ripgrep matching/i); + }); + + it('builds actionable index suggestions for unhealthy index states', () => { + if (!smartSearchModule) return; + + const suggestions = smartSearchModule.__testables.buildIndexSuggestions( + { + indexed: true, + has_embeddings: false, + embeddings_coverage_percent: 0, + warning: 'Index exists but no embeddings generated. Run smart_search(action="embed") to build the vector index.', + }, + { + workingDirectory: 'D:/tmp/demo', + searchPaths: ['.'], + }, + ); + + assert.equal(Array.isArray(suggestions), true); + assert.match(suggestions[0].command, /smart_search\(action="embed"/); + }); + it('surfaces backend failure details when fuzzy search fully fails', async () => { if (!smartSearchModule) return; @@ -189,6 +309,7 @@ describe('Smart Search MCP usage defaults and path handling', async () => { action: 'search', query: 'TARGET_TOKEN', path: missingPath, + output_mode: 'full', regex: false, tokenize: false, }); diff --git a/codex-lens/src/codexlens/cli/commands.py b/codex-lens/src/codexlens/cli/commands.py index 9ecce30b..f6a7a3ae 100644 --- a/codex-lens/src/codexlens/cli/commands.py +++ b/codex-lens/src/codexlens/cli/commands.py @@ -1566,6 +1566,8 @@ def config( result["embedding_backend"] = embedding["backend"] if embedding.get("model"): result["embedding_model"] = embedding["model"] + if embedding.get("auto_embed_missing") is not None: + result["embedding_auto_embed_missing"] = embedding["auto_embed_missing"] except (json.JSONDecodeError, OSError): pass # Settings file not readable, continue with defaults @@ -1584,6 +1586,10 @@ def config( if env_overrides.get("EMBEDDING_BACKEND"): result["embedding_backend"] = env_overrides["EMBEDDING_BACKEND"] result["embedding_backend_source"] = ".env" + auto_embed_missing_override = env_overrides.get("CODEXLENS_AUTO_EMBED_MISSING") or env_overrides.get("AUTO_EMBED_MISSING") + if auto_embed_missing_override: + result["embedding_auto_embed_missing"] = auto_embed_missing_override.lower() in ("true", "1", "yes", "on") + result["embedding_auto_embed_missing_source"] = ".env" if env_overrides.get("RERANKER_MODEL"): result["reranker_model"] = env_overrides["RERANKER_MODEL"] result["reranker_model_source"] = ".env" @@ -1613,6 +1619,9 @@ def config( model = result.get('embedding_model', 'code') model_source = result.get('embedding_model_source', 'settings.json') console.print(f" Model: {model} [dim]({model_source})[/dim]") + auto_embed_missing = result.get("embedding_auto_embed_missing", True) + auto_embed_missing_source = result.get("embedding_auto_embed_missing_source", "settings.json") + console.print(f" Auto Embed Missing: {auto_embed_missing} [dim]({auto_embed_missing_source})[/dim]") # Show reranker settings console.print(f"\n[bold]Reranker[/bold]") @@ -1647,7 +1656,7 @@ def config( # Handle reranker and embedding settings (stored in settings.json) elif key in ("reranker_backend", "reranker_model", "reranker_enabled", "reranker_top_k", - "embedding_backend", "embedding_model", "reranker_api_provider"): + "embedding_backend", "embedding_model", "embedding_auto_embed_missing", "reranker_api_provider"): settings_file = Path.home() / ".codexlens" / "settings.json" settings_file.parent.mkdir(parents=True, exist_ok=True) @@ -1680,6 +1689,8 @@ def config( settings["embedding"]["backend"] = value elif key == "embedding_model": settings["embedding"]["model"] = value + elif key == "embedding_auto_embed_missing": + settings["embedding"]["auto_embed_missing"] = value.lower() in ("true", "1", "yes", "on") # Save settings settings_file.write_text(json.dumps(settings, indent=2), encoding="utf-8") diff --git a/codex-lens/src/codexlens/config.py b/codex-lens/src/codexlens/config.py index d6d59a34..c76841b2 100644 --- a/codex-lens/src/codexlens/config.py +++ b/codex-lens/src/codexlens/config.py @@ -110,6 +110,7 @@ class Config: embedding_model: str = "code" # For fastembed: profile (fast/code/multilingual/balanced) # For litellm: model name from config (e.g., "qwen3-embedding") embedding_use_gpu: bool = True # For fastembed: whether to use GPU acceleration + embedding_auto_embed_missing: bool = True # Auto-build embeddings in background when indexed projects are searched without vectors # Indexing/search optimizations global_symbol_index_enabled: bool = True # Enable project-wide symbol index fast path @@ -281,6 +282,7 @@ class Config: "backend": self.embedding_backend, "model": self.embedding_model, "use_gpu": self.embedding_use_gpu, + "auto_embed_missing": self.embedding_auto_embed_missing, "pool_enabled": self.embedding_pool_enabled, "strategy": self.embedding_strategy, "cooldown": self.embedding_cooldown, @@ -376,6 +378,8 @@ class Config: self.embedding_model = embedding["model"] if "use_gpu" in embedding: self.embedding_use_gpu = embedding["use_gpu"] + if "auto_embed_missing" in embedding: + self.embedding_auto_embed_missing = embedding["auto_embed_missing"] # Load multi-endpoint configuration if "endpoints" in embedding: @@ -782,6 +786,14 @@ class Config: else: log.warning("Invalid EMBEDDING_BACKEND in .env: %r", embedding_backend) + auto_embed_missing = get_env("AUTO_EMBED_MISSING") + if auto_embed_missing: + self.embedding_auto_embed_missing = _parse_bool(auto_embed_missing) + log.debug( + "Overriding embedding_auto_embed_missing from .env: %s", + self.embedding_auto_embed_missing, + ) + embedding_pool = get_env("EMBEDDING_POOL_ENABLED") if embedding_pool: value = embedding_pool.lower() diff --git a/codex-lens/src/codexlens/env_config.py b/codex-lens/src/codexlens/env_config.py index 0c502db1..87dac45c 100644 --- a/codex-lens/src/codexlens/env_config.py +++ b/codex-lens/src/codexlens/env_config.py @@ -33,6 +33,7 @@ ENV_VARS = { # Embedding configuration (overrides settings.json) "EMBEDDING_MODEL": "Embedding model/profile name (overrides settings.json)", "EMBEDDING_BACKEND": "Embedding backend: fastembed, litellm", + "AUTO_EMBED_MISSING": "Auto-build embeddings in background when indexed projects are searched without vectors: true/false", "EMBEDDING_API_KEY": "API key for embedding service", "EMBEDDING_API_BASE": "Base URL for embedding API", "EMBEDDING_POOL_ENABLED": "Enable embedding high availability pool: true/false", diff --git a/docs/mcp/tools.md b/docs/mcp/tools.md index 8939d21d..8b43d5f5 100644 --- a/docs/mcp/tools.md +++ b/docs/mcp/tools.md @@ -98,16 +98,18 @@ Unified search with content search, file discovery, and semantic search. { "name": "smart_search", "parameters": { - "action": "search | find_files | init | status", + "action": "search | find_files | init | init_force | embed | status | update | watch", "query": "string (for search)", "pattern": "glob pattern (for find_files)", "mode": "fuzzy | semantic (default: fuzzy)", - "output_mode": "full | files_only | count", - "maxResults": "number (default: 20)" + "output_mode": "ace | full | files_only | count (default: ace)", + "maxResults": "number (default: 5)" } } ``` +`search` now defaults to `output_mode: "ace"`, which groups results by file and renders multi-line code chunks. When the index is missing, embeddings are incomplete, or status parsing looks unhealthy, the response metadata includes actionable `smart_search(...)` suggestions such as `init`, `status`, or `embed`. If an index exists but vectors are missing, `smart_search` can also start a background embedding build automatically when `CODEXLENS_AUTO_EMBED_MISSING` is enabled (default: `true`). + **Usage:** ```javascript // Fuzzy search (default) diff --git a/package-lock.json b/package-lock.json index e23cf820..eb055911 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "claude-code-workflow", - "version": "7.2.4", + "version": "7.2.7", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "claude-code-workflow", - "version": "7.2.4", + "version": "7.2.7", "hasInstallScript": true, "license": "MIT", "workspaces": [ diff --git a/package.json b/package.json index d0002177..06aca2bb 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "claude-code-workflow", - "version": "7.2.6", + "version": "7.2.7", "description": "JSON-driven multi-agent development framework with intelligent CLI orchestration (Gemini/Qwen/Codex), context-first architecture, and automated workflow execution", "type": "module", "main": "ccw/dist/index.js",