From df69f997e4898d4f0c25259a60a3359dfd242dda Mon Sep 17 00:00:00 2001 From: catlog22 Date: Wed, 18 Mar 2026 11:35:51 +0800 Subject: [PATCH] Remove outdated tests for CodexLens and LiteLLM client, refactor Smart Search MCP usage tests to use new command structure, and clean up unified vector index tests. --- .../dashboard/widgets/WorkflowTaskWidget.tsx | 32 - .../src/components/layout/Sidebar.tsx | 1 - .../components/mcp/CcwToolsMcpCard.test.tsx | 4 +- .../src/components/mcp/CcwToolsMcpCard.tsx | 4 - .../src/components/shared/IndexManager.tsx | 227 - ccw/frontend/src/components/shared/index.ts | 3 - ccw/frontend/src/hooks/index.ts | 10 - ccw/frontend/src/hooks/useIndex.ts | 142 - ccw/frontend/src/hooks/useV2SearchManager.ts | 159 - ccw/frontend/src/lib/api.ts | 54 +- ccw/frontend/src/locales/en/mcp-manager.json | 4 - ccw/frontend/src/locales/zh/mcp-manager.json | 4 - .../src/pages/CodexLensManagerPage.test.tsx | 196 - .../src/pages/CodexLensManagerPage.tsx | 277 -- ccw/frontend/src/pages/index.ts | 1 - ccw/frontend/src/router.tsx | 6 - ccw/frontend/src/test/i18n.tsx | 4 - ccw/frontend/src/types/index.ts | 3 - ccw/scripts/IMPLEMENTATION-SUMMARY.md | 226 - ccw/scripts/QUICK-REFERENCE.md | 135 - ccw/scripts/README-memory-embedder.md | 157 - ccw/scripts/memory-embedder-example.ts | 184 - ccw/scripts/memory_embedder.py | 428 -- ccw/scripts/test_memory_embedder.py | 245 -- ccw/scripts/unified_memory_embedder.py | 473 --- ccw/src/core/routes/core-memory-routes.ts | 8 +- ccw/src/core/routes/mcp-routes.ts | 32 +- ccw/src/mcp-server/index.ts | 20 +- ccw/src/tools/codex-lens.ts | 213 - ccw/src/tools/index.ts | 65 +- ccw/src/tools/smart-context.ts | 4 +- ccw/src/tools/smart-search.ts | 3686 ----------------- ccw/src/tools/smart-search.ts.backup | 1233 ------ .../codex-lens-bootstrap-fallback.test.js | 93 - ccw/tests/codex-lens-cli-compat.test.js | 139 - ccw/tests/codex-lens-integration.test.js | 485 --- ccw/tests/codex-lens.test.js | 521 --- ccw/tests/e2e/mcp-tools.e2e.test.ts | 83 +- ccw/tests/litellm-client.test.ts | 403 -- ccw/tests/mcp-server.test.js | 2 +- ccw/tests/smart-search-enrich.test.js | 256 -- ccw/tests/smart-search-intent.test.js | 141 - ccw/tests/smart-search-mcp-usage.test.js | 703 ---- ccw/tests/smart-search.test.ts | 71 - ccw/tests/unified-vector-index.test.ts | 97 - 45 files changed, 64 insertions(+), 11170 deletions(-) delete mode 100644 ccw/frontend/src/components/shared/IndexManager.tsx delete mode 100644 ccw/frontend/src/hooks/useIndex.ts delete mode 100644 ccw/frontend/src/hooks/useV2SearchManager.ts delete mode 100644 ccw/frontend/src/pages/CodexLensManagerPage.test.tsx delete mode 100644 ccw/frontend/src/pages/CodexLensManagerPage.tsx delete mode 100644 ccw/scripts/IMPLEMENTATION-SUMMARY.md delete mode 100644 ccw/scripts/QUICK-REFERENCE.md delete mode 100644 ccw/scripts/README-memory-embedder.md delete mode 100644 ccw/scripts/memory-embedder-example.ts delete mode 100644 ccw/scripts/memory_embedder.py delete mode 100644 ccw/scripts/test_memory_embedder.py delete mode 100644 ccw/scripts/unified_memory_embedder.py delete mode 100644 ccw/src/tools/codex-lens.ts delete mode 100644 ccw/src/tools/smart-search.ts delete mode 100644 ccw/src/tools/smart-search.ts.backup delete mode 100644 ccw/tests/codex-lens-bootstrap-fallback.test.js delete mode 100644 ccw/tests/codex-lens-cli-compat.test.js delete mode 100644 ccw/tests/codex-lens-integration.test.js delete mode 100644 ccw/tests/codex-lens.test.js delete mode 100644 ccw/tests/litellm-client.test.ts delete mode 100644 ccw/tests/smart-search-enrich.test.js delete mode 100644 ccw/tests/smart-search-intent.test.js delete mode 100644 ccw/tests/smart-search-mcp-usage.test.js delete mode 100644 ccw/tests/smart-search.test.ts delete mode 100644 ccw/tests/unified-vector-index.test.ts diff --git a/ccw/frontend/src/components/dashboard/widgets/WorkflowTaskWidget.tsx b/ccw/frontend/src/components/dashboard/widgets/WorkflowTaskWidget.tsx index e15687ad..0d224128 100644 --- a/ccw/frontend/src/components/dashboard/widgets/WorkflowTaskWidget.tsx +++ b/ccw/frontend/src/components/dashboard/widgets/WorkflowTaskWidget.tsx @@ -14,7 +14,6 @@ import { Sparkline } from '@/components/charts/Sparkline'; import { useWorkflowStatusCounts } from '@/hooks/useWorkflowStatusCounts'; import { useDashboardStats } from '@/hooks/useDashboardStats'; import { useProjectOverview } from '@/hooks/useProjectOverview'; -import { useIndexStatus } from '@/hooks/useIndex'; import { useSessions } from '@/hooks/useSessions'; import { cn } from '@/lib/utils'; import type { TaskData } from '@/types/store'; @@ -40,7 +39,6 @@ import { Sparkles, BarChart3, PieChart as PieChartIcon, - Database, } from 'lucide-react'; export interface WorkflowTaskWidgetProps { @@ -187,8 +185,6 @@ function WorkflowTaskWidgetComponent({ className }: WorkflowTaskWidgetProps) { const { data, isLoading } = useWorkflowStatusCounts(); const { stats, isLoading: statsLoading } = useDashboardStats({ refetchInterval: 60000 }); const { projectOverview, isLoading: projectLoading } = useProjectOverview(); - const { status: indexStatus } = useIndexStatus({ refetchInterval: 30000 }); - // Fetch real sessions data const { activeSessions, isLoading: sessionsLoading } = useSessions({ filter: { location: 'active' }, @@ -328,34 +324,6 @@ function WorkflowTaskWidgetComponent({ className }: WorkflowTaskWidgetProps) { {formatMessage({ id: 'projectOverview.devIndex.category.enhancements' })} - {/* Index Status Indicator */} -
-
- - {indexStatus?.status === 'building' && ( - - - - - )} -
- - {indexStatus?.totalFiles || 0} - - {formatMessage({ id: 'home.indexStatus.label' })} -
{/* Date + Expand Button */} diff --git a/ccw/frontend/src/components/layout/Sidebar.tsx b/ccw/frontend/src/components/layout/Sidebar.tsx index d0c1f61f..4b3b5914 100644 --- a/ccw/frontend/src/components/layout/Sidebar.tsx +++ b/ccw/frontend/src/components/layout/Sidebar.tsx @@ -114,7 +114,6 @@ const navGroupDefinitions: NavGroupDef[] = [ titleKey: 'navigation.groups.configuration', icon: Cog, items: [ - { path: '/settings/codexlens', labelKey: 'navigation.main.codexlens', icon: Sparkles }, { path: '/api-settings', labelKey: 'navigation.main.apiSettings', icon: Server }, { path: '/settings', labelKey: 'navigation.main.settings', icon: Settings, end: true }, ], diff --git a/ccw/frontend/src/components/mcp/CcwToolsMcpCard.test.tsx b/ccw/frontend/src/components/mcp/CcwToolsMcpCard.test.tsx index 455b8d5e..ff3d5155 100644 --- a/ccw/frontend/src/components/mcp/CcwToolsMcpCard.test.tsx +++ b/ccw/frontend/src/components/mcp/CcwToolsMcpCard.test.tsx @@ -139,7 +139,7 @@ describe('CcwToolsMcpCard', () => { render( { const [payload] = updateClaudeMock.mock.calls[0] ?? []; expect(payload).toEqual( expect.objectContaining({ - enabledTools: ['write_file', 'smart_search'], + enabledTools: ['write_file', 'edit_file'], }) ); }); diff --git a/ccw/frontend/src/components/mcp/CcwToolsMcpCard.tsx b/ccw/frontend/src/components/mcp/CcwToolsMcpCard.tsx index 421418b0..20a93ec2 100644 --- a/ccw/frontend/src/components/mcp/CcwToolsMcpCard.tsx +++ b/ccw/frontend/src/components/mcp/CcwToolsMcpCard.tsx @@ -18,7 +18,6 @@ import { HardDrive, MessageCircleQuestion, MessagesSquare, - SearchCode, ChevronDown, ChevronRight, Globe, @@ -110,7 +109,6 @@ export const CCW_MCP_TOOLS: CcwTool[] = [ { name: 'read_many_files', desc: 'Read multiple files/dirs', core: true }, { name: 'core_memory', desc: 'Core memory management', core: true }, { name: 'ask_question', desc: 'Interactive questions (A2UI)', core: false }, - { name: 'smart_search', desc: 'Intelligent code search', core: true }, { name: 'team_msg', desc: 'Agent team message bus', core: false }, ]; @@ -572,8 +570,6 @@ function getToolIcon(toolName: string): React.ReactElement { return ; case 'ask_question': return ; - case 'smart_search': - return ; case 'team_msg': return ; default: diff --git a/ccw/frontend/src/components/shared/IndexManager.tsx b/ccw/frontend/src/components/shared/IndexManager.tsx deleted file mode 100644 index 605b0c52..00000000 --- a/ccw/frontend/src/components/shared/IndexManager.tsx +++ /dev/null @@ -1,227 +0,0 @@ -// ======================================== -// IndexManager Component -// ======================================== -// Component for managing code index with status display and rebuild functionality - -import * as React from 'react'; -import { useIntl } from 'react-intl'; -import { Database, RefreshCw, AlertCircle, CheckCircle2, Clock } from 'lucide-react'; -import { Card } from '@/components/ui/Card'; -import { Button } from '@/components/ui/Button'; -import { StatCard } from '@/components/shared/StatCard'; -import { Badge } from '@/components/ui/Badge'; -import { useIndex } from '@/hooks/useIndex'; -import { cn } from '@/lib/utils'; - -// ========== Types ========== - -export interface IndexManagerProps { - className?: string; -} - -// ========== Helper Components ========== - -/** - * Progress bar for index rebuild - */ -function IndexProgressBar({ progress, status }: { progress?: number; status: string }) { - const { formatMessage } = useIntl(); - - if (status !== 'building' || progress === undefined) return null; - - return ( -
-
- - {formatMessage({ id: 'index.status.building' })} - - {progress}% -
-
-
-
-
- ); -} - -/** - * Status badge component - */ -function IndexStatusBadge({ status }: { status: string }) { - const { formatMessage } = useIntl(); - - const config: Record = { - idle: { variant: 'secondary', label: formatMessage({ id: 'index.status.idle' }) }, - building: { variant: 'default', label: formatMessage({ id: 'index.status.building' }) }, - completed: { variant: 'outline', label: formatMessage({ id: 'index.status.completed' }) }, - failed: { variant: 'destructive', label: formatMessage({ id: 'index.status.failed' }) }, - }; - - const { variant, label } = config[status] ?? config.idle; - - return ( - - {label} - - ); -} - -// ========== Main Component ========== - -/** - * IndexManager component for displaying index status and managing rebuild operations - * - * @example - * ```tsx - * - * ``` - */ -export function IndexManager({ className }: IndexManagerProps) { - const { formatMessage } = useIntl(); - const { status, isLoading, rebuildIndex, isRebuilding, rebuildError, refetch } = useIndex(); - - // Auto-refresh during rebuild - const refetchInterval = status?.status === 'building' ? 2000 : 0; - React.useEffect(() => { - if (status?.status === 'building') { - const interval = setInterval(() => { - refetch(); - }, refetchInterval); - return () => clearInterval(interval); - } - }, [status?.status, refetchInterval, refetch]); - - // Handle rebuild button click - const handleRebuild = async () => { - try { - await rebuildIndex({ force: false }); - } catch (error) { - console.error('[IndexManager] Rebuild failed:', error); - } - }; - - // Format build time (ms to human readable) - const formatBuildTime = (ms: number): string => { - if (ms < 1000) return `${ms}ms`; - if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`; - return `${Math.floor(ms / 60000)}m ${Math.floor((ms % 60000) / 1000)}s`; - }; - - // Format last updated time - const formatLastUpdated = (isoString: string): string => { - const date = new Date(isoString); - const now = new Date(); - const diffMs = now.getTime() - date.getTime(); - const diffMins = Math.floor(diffMs / 60000); - const diffHours = Math.floor(diffMs / 3600000); - const diffDays = Math.floor(diffMs / 86400000); - - if (diffMins < 1) return formatMessage({ id: 'index.time.justNow' }); - if (diffMins < 60) return formatMessage({ id: 'index.time.minutesAgo' }, { value: diffMins }); - if (diffHours < 24) return formatMessage({ id: 'index.time.hoursAgo' }, { value: diffHours }); - return formatMessage({ id: 'index.time.daysAgo' }, { value: diffDays }); - }; - - return ( - - {/* Header */} -
-
- -

- {formatMessage({ id: 'index.title' })} -

- {status && } -
- -
- - {/* Description */} -

- {formatMessage({ id: 'index.description' })} -

- - {/* Error message */} - {rebuildError && ( -
- -
-

- {formatMessage({ id: 'index.errors.rebuildFailed' })} -

-

{rebuildError.message}

-
-
- )} - - {/* Status error */} - {status?.error && ( -
- -

{status.error}

-
- )} - - {/* Progress Bar */} - {status && } - - {/* Current file being indexed */} - {status?.currentFile && status.status === 'building' && ( -
- - {status.currentFile} -
- )} - - {/* Stat Cards */} -
- {/* Total Files */} - - - {/* Last Updated */} - - - {/* Build Time */} - -
-
- ); -} - -export default IndexManager; diff --git a/ccw/frontend/src/components/shared/index.ts b/ccw/frontend/src/components/shared/index.ts index 11f61936..d3ccddbc 100644 --- a/ccw/frontend/src/components/shared/index.ts +++ b/ccw/frontend/src/components/shared/index.ts @@ -146,9 +146,6 @@ export type { RuleDialogProps } from './RuleDialog'; // Tools and utility components export { ThemeSelector } from './ThemeSelector'; -export { IndexManager } from './IndexManager'; -export type { IndexManagerProps } from './IndexManager'; - export { ExplorerToolbar } from './ExplorerToolbar'; export type { ExplorerToolbarProps } from './ExplorerToolbar'; diff --git a/ccw/frontend/src/hooks/index.ts b/ccw/frontend/src/hooks/index.ts index d280caa1..9f245ba0 100644 --- a/ccw/frontend/src/hooks/index.ts +++ b/ccw/frontend/src/hooks/index.ts @@ -290,16 +290,6 @@ export type { WorkspaceQueryKeys, } from './useWorkspaceQueryKeys'; -// ========== CodexLens (v2) ========== -export { - useV2SearchManager, -} from './useV2SearchManager'; -export type { - V2IndexStatus, - V2SearchTestResult, - UseV2SearchManagerReturn, -} from './useV2SearchManager'; - // ========== Skill Hub ========== export { useRemoteSkills, diff --git a/ccw/frontend/src/hooks/useIndex.ts b/ccw/frontend/src/hooks/useIndex.ts deleted file mode 100644 index 65fd415c..00000000 --- a/ccw/frontend/src/hooks/useIndex.ts +++ /dev/null @@ -1,142 +0,0 @@ -// ======================================== -// useIndex Hook -// ======================================== -// TanStack Query hooks for index management with real-time updates - -import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query'; -import { - fetchIndexStatus, - rebuildIndex, - type IndexStatus, - type IndexRebuildRequest, -} from '../lib/api'; -import { useWorkflowStore, selectProjectPath } from '@/stores/workflowStore'; -import { workspaceQueryKeys } from '@/lib/queryKeys'; - -// ========== Stale Time ========== - -// Default stale time: 30 seconds (index status updates less frequently) -const STALE_TIME = 30 * 1000; - -// ========== Query Hook ========== - -export interface UseIndexStatusOptions { - enabled?: boolean; - staleTime?: number; - refetchInterval?: number; -} - -export interface UseIndexStatusReturn { - status: IndexStatus | null; - isLoading: boolean; - isFetching: boolean; - error: Error | null; - refetch: () => Promise; - invalidate: () => Promise; -} - -/** - * Hook for fetching index status - * - * @example - * ```tsx - * const { status, isLoading, refetch } = useIndexStatus(); - * ``` - */ -export function useIndexStatus(options: UseIndexStatusOptions = {}): UseIndexStatusReturn { - const { staleTime = STALE_TIME, enabled = true, refetchInterval = 0 } = options; - const queryClient = useQueryClient(); - - const projectPath = useWorkflowStore(selectProjectPath); - const queryEnabled = enabled && !!projectPath; - - const query = useQuery({ - queryKey: workspaceQueryKeys.indexStatus(projectPath), - queryFn: () => fetchIndexStatus(projectPath), - staleTime, - enabled: queryEnabled, - refetchInterval: refetchInterval > 0 ? refetchInterval : false, - retry: 2, - }); - - const refetch = async () => { - await query.refetch(); - }; - - const invalidate = async () => { - await queryClient.invalidateQueries({ queryKey: workspaceQueryKeys.index(projectPath) }); - }; - - return { - status: query.data ?? null, - isLoading: query.isLoading, - isFetching: query.isFetching, - error: query.error, - refetch, - invalidate, - }; -} - -// ========== Mutation Hooks ========== - -export interface UseRebuildIndexReturn { - rebuildIndex: (request?: IndexRebuildRequest) => Promise; - isRebuilding: boolean; - error: Error | null; -} - -/** - * Hook for rebuilding index - * - * @example - * ```tsx - * const { rebuildIndex, isRebuilding } = useRebuildIndex(); - * - * const handleRebuild = async () => { - * await rebuildIndex({ force: true }); - * }; - * ``` - */ -export function useRebuildIndex(): UseRebuildIndexReturn { - const queryClient = useQueryClient(); - const projectPath = useWorkflowStore(selectProjectPath); - - const mutation = useMutation({ - mutationFn: rebuildIndex, - onSuccess: (updatedStatus) => { - // Update the status query cache - queryClient.setQueryData(workspaceQueryKeys.indexStatus(projectPath), updatedStatus); - }, - }); - - return { - rebuildIndex: mutation.mutateAsync, - isRebuilding: mutation.isPending, - error: mutation.error, - }; -} - -/** - * Combined hook for all index operations - * - * @example - * ```tsx - * const { - * status, - * isLoading, - * rebuildIndex, - * isRebuilding, - * } = useIndex(); - * ``` - */ -export function useIndex() { - const status = useIndexStatus(); - const rebuild = useRebuildIndex(); - - return { - ...status, - rebuildIndex: rebuild.rebuildIndex, - isRebuilding: rebuild.isRebuilding, - rebuildError: rebuild.error, - }; -} diff --git a/ccw/frontend/src/hooks/useV2SearchManager.ts b/ccw/frontend/src/hooks/useV2SearchManager.ts deleted file mode 100644 index c0161cae..00000000 --- a/ccw/frontend/src/hooks/useV2SearchManager.ts +++ /dev/null @@ -1,159 +0,0 @@ -// ======================================== -// useV2SearchManager Hook -// ======================================== -// React hook for v2 search management via smart_search tool - -import { useState, useCallback } from 'react'; -import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query'; - -// ========== Types ========== - -export interface V2IndexStatus { - indexed: boolean; - totalFiles: number; - totalChunks: number; - lastIndexedAt: string | null; - dbSizeBytes: number; - vectorDimension: number | null; - ftsEnabled: boolean; -} - -export interface V2SearchTestResult { - query: string; - results: Array<{ - file: string; - score: number; - snippet: string; - }>; - timingMs: number; - totalResults: number; -} - -export interface UseV2SearchManagerReturn { - status: V2IndexStatus | null; - isLoadingStatus: boolean; - statusError: Error | null; - refetchStatus: () => void; - search: (query: string) => Promise; - isSearching: boolean; - searchResult: V2SearchTestResult | null; - reindex: () => Promise; - isReindexing: boolean; -} - -// ========== API helpers ========== - -async function fetchWithJson(url: string, body?: Record): Promise { - const response = await fetch(url, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - credentials: 'same-origin', - body: JSON.stringify(body), - }); - if (!response.ok) { - throw new Error(`Request failed: ${response.status}`); - } - return response.json(); -} - -async function fetchV2Status(): Promise { - const data = await fetchWithJson<{ result?: V2IndexStatus; error?: string }>('/api/tools', { - tool_name: 'smart_search', - action: 'status', - }); - if (data.error) { - throw new Error(data.error); - } - // Provide defaults for fields that may be missing - return { - indexed: false, - totalFiles: 0, - totalChunks: 0, - lastIndexedAt: null, - dbSizeBytes: 0, - vectorDimension: null, - ftsEnabled: false, - ...data.result, - }; -} - -async function fetchV2Search(query: string): Promise { - const data = await fetchWithJson<{ result?: V2SearchTestResult; error?: string }>('/api/tools', { - tool_name: 'smart_search', - action: 'search', - params: { query, limit: 10 }, - }); - if (data.error) { - throw new Error(data.error); - } - return data.result ?? { query, results: [], timingMs: 0, totalResults: 0 }; -} - -async function fetchV2Reindex(): Promise { - const data = await fetchWithJson<{ error?: string }>('/api/tools', { - tool_name: 'smart_search', - action: 'reindex', - }); - if (data.error) { - throw new Error(data.error); - } -} - -// ========== Query Keys ========== - -export const v2SearchKeys = { - all: ['v2-search'] as const, - status: () => [...v2SearchKeys.all, 'status'] as const, -}; - -// ========== Hook ========== - -export function useV2SearchManager(): UseV2SearchManagerReturn { - const queryClient = useQueryClient(); - const [searchResult, setSearchResult] = useState(null); - - // Status query - const statusQuery = useQuery({ - queryKey: v2SearchKeys.status(), - queryFn: fetchV2Status, - staleTime: 30_000, - retry: 1, - }); - - // Search mutation - const searchMutation = useMutation({ - mutationFn: (query: string) => fetchV2Search(query), - onSuccess: (data) => { - setSearchResult(data); - }, - }); - - // Reindex mutation - const reindexMutation = useMutation({ - mutationFn: fetchV2Reindex, - onSuccess: () => { - queryClient.invalidateQueries({ queryKey: v2SearchKeys.status() }); - }, - }); - - const search = useCallback(async (query: string) => { - const result = await searchMutation.mutateAsync(query); - return result; - }, [searchMutation]); - - const reindex = useCallback(async () => { - await reindexMutation.mutateAsync(); - }, [reindexMutation]); - - return { - status: statusQuery.data ?? null, - isLoadingStatus: statusQuery.isLoading, - statusError: statusQuery.error as Error | null, - refetchStatus: () => statusQuery.refetch(), - search, - isSearching: searchMutation.isPending, - searchResult, - reindex, - isReindexing: reindexMutation.isPending, - }; -} diff --git a/ccw/frontend/src/lib/api.ts b/ccw/frontend/src/lib/api.ts index 06ba090a..8efe2b46 100644 --- a/ccw/frontend/src/lib/api.ts +++ b/ccw/frontend/src/lib/api.ts @@ -3,11 +3,11 @@ // ======================================== // Typed fetch functions for API communication with CSRF token handling -import type { SessionMetadata, TaskData, IndexStatus, IndexRebuildRequest, Rule, RuleCreateInput, RulesResponse, Prompt, PromptInsight, Pattern, Suggestion, McpTemplate, McpTemplateInstallRequest, AllProjectsResponse, OtherProjectsServersResponse, CrossCliCopyRequest, CrossCliCopyResponse } from '../types/store'; +import type { SessionMetadata, TaskData, Rule, RuleCreateInput, RulesResponse, Prompt, PromptInsight, Pattern, Suggestion, McpTemplate, McpTemplateInstallRequest, AllProjectsResponse, OtherProjectsServersResponse, CrossCliCopyRequest, CrossCliCopyResponse } from '../types/store'; import type { TeamArtifactsResponse } from '../types/team'; // Re-export types for backward compatibility -export type { IndexStatus, IndexRebuildRequest, Rule, RuleCreateInput, RulesResponse, Prompt, PromptInsight, Pattern, Suggestion, McpTemplate, McpTemplateInstallRequest, AllProjectsResponse, OtherProjectsServersResponse, CrossCliCopyRequest, CrossCliCopyResponse }; +export type { Rule, RuleCreateInput, RulesResponse, Prompt, PromptInsight, Pattern, Suggestion, McpTemplate, McpTemplateInstallRequest, AllProjectsResponse, OtherProjectsServersResponse, CrossCliCopyRequest, CrossCliCopyResponse }; /** @@ -4648,10 +4648,10 @@ export async function fetchCcwMcpConfig(currentProjectPath?: string): Promise t.trim()).filter(Boolean); @@ -4710,7 +4710,7 @@ export async function installCcwMcp( scope, projectPath: path, env: { - enabledTools: ['write_file', 'edit_file', 'read_file', 'core_memory', 'ask_question', 'smart_search'], + enabledTools: ['write_file', 'edit_file', 'read_file', 'core_memory', 'ask_question'], }, }), }); @@ -4793,10 +4793,10 @@ export async function fetchCcwMcpConfigForCodex(): Promise { let enabledTools: string[]; if (enabledToolsStr === undefined || enabledToolsStr === null) { // No setting = use default tools - enabledTools = ['write_file', 'edit_file', 'read_file', 'core_memory', 'ask_question', 'smart_search']; + enabledTools = ['write_file', 'edit_file', 'read_file', 'core_memory', 'ask_question']; } else if (enabledToolsStr === '' || enabledToolsStr === 'all') { // Empty string = all tools disabled, 'all' = default set (for backward compatibility) - enabledTools = enabledToolsStr === '' ? [] : ['write_file', 'edit_file', 'read_file', 'core_memory', 'ask_question', 'smart_search']; + enabledTools = enabledToolsStr === '' ? [] : ['write_file', 'edit_file', 'read_file', 'core_memory', 'ask_question']; } else { // Comma-separated list enabledTools = enabledToolsStr.split(',').map((t: string) => t.trim()).filter(Boolean); @@ -4831,7 +4831,7 @@ function buildCcwMcpServerConfigForCodex(config: { if (config.enabledTools !== undefined) { env.CCW_ENABLED_TOOLS = config.enabledTools.join(','); } else { - env.CCW_ENABLED_TOOLS = 'write_file,edit_file,read_file,core_memory,ask_question,smart_search'; + env.CCW_ENABLED_TOOLS = 'write_file,edit_file,read_file,core_memory,ask_question'; } if (config.projectRoot) { @@ -4852,7 +4852,7 @@ function buildCcwMcpServerConfigForCodex(config: { */ export async function installCcwMcpToCodex(): Promise { const serverConfig = buildCcwMcpServerConfigForCodex({ - enabledTools: ['write_file', 'edit_file', 'read_file', 'core_memory', 'ask_question', 'smart_search'], + enabledTools: ['write_file', 'edit_file', 'read_file', 'core_memory', 'ask_question'], }); const result = await addCodexMcpServer('ccw-tools', serverConfig); @@ -4892,42 +4892,6 @@ export async function updateCcwConfigForCodex(config: { return fetchCcwMcpConfigForCodex(); } -// ========== Index Management API ========== - -/** - * Fetch current index status for a specific workspace - * @param projectPath - Optional project path to filter data by workspace - */ -export async function fetchIndexStatus(_projectPath?: string): Promise { - const resp = await fetchApi<{ result?: { indexed?: boolean; totalFiles?: number } }>('/api/tools', { - method: 'POST', - body: JSON.stringify({ tool_name: 'smart_search', action: 'status' }), - }); - const result = resp.result ?? {}; - return { - totalFiles: result.totalFiles ?? 0, - lastUpdated: new Date().toISOString(), - buildTime: 0, - status: result.indexed ? 'completed' : 'idle', - }; -} - -/** - * Rebuild index - */ -export async function rebuildIndex(_request: IndexRebuildRequest = {}): Promise { - await fetchApi<{ error?: string }>('/api/tools', { - method: 'POST', - body: JSON.stringify({ tool_name: 'smart_search', action: 'reindex' }), - }); - return { - totalFiles: 0, - lastUpdated: new Date().toISOString(), - buildTime: 0, - status: 'building', - }; -} - // ========== Prompt History API ========== /** diff --git a/ccw/frontend/src/locales/en/mcp-manager.json b/ccw/frontend/src/locales/en/mcp-manager.json index 629546c1..88646d6c 100644 --- a/ccw/frontend/src/locales/en/mcp-manager.json +++ b/ccw/frontend/src/locales/en/mcp-manager.json @@ -183,10 +183,6 @@ "name": "ask_question", "desc": "Ask interactive questions through A2UI interface" }, - "smart_search": { - "name": "smart_search", - "desc": "Intelligent code search with fuzzy and semantic modes" - }, "team_msg": { "name": "team_msg", "desc": "Persistent JSONL message bus for Agent Team communication" diff --git a/ccw/frontend/src/locales/zh/mcp-manager.json b/ccw/frontend/src/locales/zh/mcp-manager.json index d5dba646..311ab20d 100644 --- a/ccw/frontend/src/locales/zh/mcp-manager.json +++ b/ccw/frontend/src/locales/zh/mcp-manager.json @@ -172,10 +172,6 @@ "name": "ask_question", "desc": "通过 A2UI 界面发起交互式问答" }, - "smart_search": { - "name": "smart_search", - "desc": "智能代码搜索,支持模糊和语义搜索模式" - }, "team_msg": { "name": "team_msg", "desc": "Agent Team 持久化消息总线,用于团队协作通信" diff --git a/ccw/frontend/src/pages/CodexLensManagerPage.test.tsx b/ccw/frontend/src/pages/CodexLensManagerPage.test.tsx deleted file mode 100644 index a744ddcb..00000000 --- a/ccw/frontend/src/pages/CodexLensManagerPage.test.tsx +++ /dev/null @@ -1,196 +0,0 @@ -// ======================================== -// CodexLens Manager Page Tests (v2) -// ======================================== -// Tests for v2 search management page - -import { describe, it, expect, beforeEach, vi } from 'vitest'; -import { render, screen } from '@/test/i18n'; -import userEvent from '@testing-library/user-event'; -import { CodexLensManagerPage } from './CodexLensManagerPage'; - -// Mock the v2 search manager hook -vi.mock('@/hooks/useV2SearchManager', () => ({ - useV2SearchManager: vi.fn(), -})); - -import { useV2SearchManager } from '@/hooks/useV2SearchManager'; - -const mockStatus = { - indexed: true, - totalFiles: 150, - totalChunks: 1200, - lastIndexedAt: '2026-03-17T10:00:00Z', - dbSizeBytes: 5242880, - vectorDimension: 384, - ftsEnabled: true, -}; - -const defaultHookReturn = { - status: mockStatus, - isLoadingStatus: false, - statusError: null, - refetchStatus: vi.fn(), - search: vi.fn().mockResolvedValue({ - query: 'test', - results: [], - timingMs: 12.5, - totalResults: 0, - }), - isSearching: false, - searchResult: null, - reindex: vi.fn().mockResolvedValue(undefined), - isReindexing: false, -}; - -describe('CodexLensManagerPage (v2)', () => { - beforeEach(() => { - vi.clearAllMocks(); - (vi.mocked(useV2SearchManager) as any).mockReturnValue(defaultHookReturn); - }); - - it('should render page title', () => { - render(); - // The title comes from i18n codexlens.title - expect(screen.getByRole('heading', { level: 1 })).toBeInTheDocument(); - }); - - it('should render index status section', () => { - render(); - // Check for file count display - expect(screen.getByText('150')).toBeInTheDocument(); - }); - - it('should render search input', () => { - render(); - const input = screen.getByPlaceholderText(/search query/i); - expect(input).toBeInTheDocument(); - }); - - it('should call refetchStatus on refresh click', async () => { - const refetchStatus = vi.fn(); - (vi.mocked(useV2SearchManager) as any).mockReturnValue({ - ...defaultHookReturn, - refetchStatus, - }); - - const user = userEvent.setup(); - render(); - - const refreshButton = screen.getByText(/Refresh/i); - await user.click(refreshButton); - - expect(refetchStatus).toHaveBeenCalledOnce(); - }); - - it('should call search when clicking search button', async () => { - const searchFn = vi.fn().mockResolvedValue({ - query: 'test query', - results: [], - timingMs: 5, - totalResults: 0, - }); - (vi.mocked(useV2SearchManager) as any).mockReturnValue({ - ...defaultHookReturn, - search: searchFn, - }); - - const user = userEvent.setup(); - render(); - - const input = screen.getByPlaceholderText(/search query/i); - await user.type(input, 'test query'); - - const searchButton = screen.getByText(/Search/i); - await user.click(searchButton); - - expect(searchFn).toHaveBeenCalledWith('test query'); - }); - - it('should display search results', () => { - (vi.mocked(useV2SearchManager) as any).mockReturnValue({ - ...defaultHookReturn, - searchResult: { - query: 'auth', - results: [ - { file: 'src/auth.ts', score: 0.95, snippet: 'export function authenticate()' }, - ], - timingMs: 8.2, - totalResults: 1, - }, - }); - - render(); - - expect(screen.getByText('src/auth.ts')).toBeInTheDocument(); - expect(screen.getByText('95.0%')).toBeInTheDocument(); - expect(screen.getByText('export function authenticate()')).toBeInTheDocument(); - }); - - it('should call reindex on button click', async () => { - const reindexFn = vi.fn().mockResolvedValue(undefined); - (vi.mocked(useV2SearchManager) as any).mockReturnValue({ - ...defaultHookReturn, - reindex: reindexFn, - }); - - const user = userEvent.setup(); - render(); - - const reindexButton = screen.getByText(/Reindex/i); - await user.click(reindexButton); - - expect(reindexFn).toHaveBeenCalledOnce(); - }); - - it('should show loading skeleton when status is loading', () => { - (vi.mocked(useV2SearchManager) as any).mockReturnValue({ - ...defaultHookReturn, - status: null, - isLoadingStatus: true, - }); - - render(); - - // Should have pulse animation elements - const pulseElements = document.querySelectorAll('.animate-pulse'); - expect(pulseElements.length).toBeGreaterThan(0); - }); - - it('should show error alert when status fetch fails', () => { - (vi.mocked(useV2SearchManager) as any).mockReturnValue({ - ...defaultHookReturn, - status: null, - statusError: new Error('Network error'), - }); - - render(); - - // Error message should be visible - expect(screen.getByText(/Failed to load/i)).toBeInTheDocument(); - }); - - it('should show not indexed state', () => { - (vi.mocked(useV2SearchManager) as any).mockReturnValue({ - ...defaultHookReturn, - status: { - ...mockStatus, - indexed: false, - totalFiles: 0, - totalChunks: 0, - }, - }); - - render(); - - expect(screen.getByText(/Not Indexed/i)).toBeInTheDocument(); - }); - - describe('i18n - Chinese locale', () => { - it('should display translated text in Chinese', () => { - render(, { locale: 'zh' }); - - // Page title from zh codexlens.json - expect(screen.getByRole('heading', { level: 1 })).toBeInTheDocument(); - }); - }); -}); diff --git a/ccw/frontend/src/pages/CodexLensManagerPage.tsx b/ccw/frontend/src/pages/CodexLensManagerPage.tsx deleted file mode 100644 index ce67d799..00000000 --- a/ccw/frontend/src/pages/CodexLensManagerPage.tsx +++ /dev/null @@ -1,277 +0,0 @@ -// ======================================== -// CodexLens Manager Page (v2) -// ======================================== -// V2 search management interface with index status, search test, and configuration - -import { useState } from 'react'; -import { useIntl } from 'react-intl'; -import { - Search, - RefreshCw, - Database, - Zap, - AlertCircle, - CheckCircle2, - Clock, - FileText, - HardDrive, -} from 'lucide-react'; -import { Card } from '@/components/ui/Card'; -import { Button } from '@/components/ui/Button'; -import { useV2SearchManager } from '@/hooks'; -import { cn } from '@/lib/utils'; - -function formatBytes(bytes: number): string { - if (bytes === 0) return '0 B'; - const units = ['B', 'KB', 'MB', 'GB']; - const i = Math.floor(Math.log(bytes) / Math.log(1024)); - return `${(bytes / Math.pow(1024, i)).toFixed(1)} ${units[i]}`; -} - -function formatDate(dateStr: string | null): string { - if (!dateStr) return '-'; - try { - return new Date(dateStr).toLocaleString(); - } catch { - return dateStr; - } -} - -export function CodexLensManagerPage() { - const { formatMessage } = useIntl(); - const [searchQuery, setSearchQuery] = useState(''); - - const { - status, - isLoadingStatus, - statusError, - refetchStatus, - search, - isSearching, - searchResult, - reindex, - isReindexing, - } = useV2SearchManager(); - - const handleSearch = async () => { - if (!searchQuery.trim()) return; - await search(searchQuery.trim()); - }; - - const handleKeyDown = (e: React.KeyboardEvent) => { - if (e.key === 'Enter') { - handleSearch(); - } - }; - - return ( -
- {/* Page Header */} -
-
-

- - {formatMessage({ id: 'codexlens.title' })} -

-

- {formatMessage({ id: 'codexlens.description' })} -

-
-
- - -
-
- - {/* Error Alert */} - {statusError && ( - -
- -

- {formatMessage({ id: 'codexlens.statusError' })} -

-
-
- )} - - {/* Index Status Section */} - -

- - {formatMessage({ id: 'codexlens.indexStatus.title' })} -

- - {isLoadingStatus ? ( -
- {[1, 2, 3, 4].map((i) => ( -
- ))} -
- ) : status ? ( -
-
- {status.indexed ? ( - - ) : ( - - )} -
-

- {formatMessage({ id: 'codexlens.indexStatus.status' })} -

-

- {status.indexed - ? formatMessage({ id: 'codexlens.indexStatus.ready' }) - : formatMessage({ id: 'codexlens.indexStatus.notIndexed' }) - } -

-
-
- -
- -
-

- {formatMessage({ id: 'codexlens.indexStatus.files' })} -

-

{status.totalFiles.toLocaleString()}

-
-
- -
- -
-

- {formatMessage({ id: 'codexlens.indexStatus.dbSize' })} -

-

{formatBytes(status.dbSizeBytes)}

-
-
- -
- -
-

- {formatMessage({ id: 'codexlens.indexStatus.lastIndexed' })} -

-

{formatDate(status.lastIndexedAt)}

-
-
-
- ) : ( -

- {formatMessage({ id: 'codexlens.indexStatus.unavailable' })} -

- )} - - {status && ( -
- - {formatMessage({ id: 'codexlens.indexStatus.chunks' })}: {status.totalChunks.toLocaleString()} - - {status.vectorDimension && ( - - {formatMessage({ id: 'codexlens.indexStatus.vectorDim' })}: {status.vectorDimension} - - )} - - FTS: {status.ftsEnabled - ? formatMessage({ id: 'codexlens.indexStatus.enabled' }) - : formatMessage({ id: 'codexlens.indexStatus.disabled' }) - } - -
- )} - - - {/* Search Test Section */} - -

- - {formatMessage({ id: 'codexlens.searchTest.title' })} -

- -
- setSearchQuery(e.target.value)} - onKeyDown={handleKeyDown} - placeholder={formatMessage({ id: 'codexlens.searchTest.placeholder' })} - className="flex-1 px-3 py-2 border border-input rounded-md bg-background text-sm focus:outline-none focus:ring-2 focus:ring-ring" - /> - -
- - {searchResult && ( -
-
-

- {searchResult.totalResults} {formatMessage({ id: 'codexlens.searchTest.results' })} -

-

- {searchResult.timingMs.toFixed(1)}ms -

-
- - {searchResult.results.length > 0 ? ( -
- {searchResult.results.map((result, idx) => ( -
-
- - {result.file} - - - {(result.score * 100).toFixed(1)}% - -
-
-                      {result.snippet}
-                    
-
- ))} -
- ) : ( -

- {formatMessage({ id: 'codexlens.searchTest.noResults' })} -

- )} -
- )} -
-
- ); -} - -export default CodexLensManagerPage; diff --git a/ccw/frontend/src/pages/index.ts b/ccw/frontend/src/pages/index.ts index 7d777ece..e11daf44 100644 --- a/ccw/frontend/src/pages/index.ts +++ b/ccw/frontend/src/pages/index.ts @@ -29,7 +29,6 @@ export { RulesManagerPage } from './RulesManagerPage'; export { PromptHistoryPage } from './PromptHistoryPage'; export { ExplorerPage } from './ExplorerPage'; export { GraphExplorerPage } from './GraphExplorerPage'; -export { CodexLensManagerPage } from './CodexLensManagerPage'; export { ApiSettingsPage } from './ApiSettingsPage'; export { CliViewerPage } from './CliViewerPage'; export { CliSessionSharePage } from './CliSessionSharePage'; diff --git a/ccw/frontend/src/router.tsx b/ccw/frontend/src/router.tsx index 8078277e..aa7e49aa 100644 --- a/ccw/frontend/src/router.tsx +++ b/ccw/frontend/src/router.tsx @@ -35,7 +35,6 @@ const RulesManagerPage = lazy(() => import('@/pages/RulesManagerPage').then(m => const PromptHistoryPage = lazy(() => import('@/pages/PromptHistoryPage').then(m => ({ default: m.PromptHistoryPage }))); const ExplorerPage = lazy(() => import('@/pages/ExplorerPage').then(m => ({ default: m.ExplorerPage }))); const GraphExplorerPage = lazy(() => import('@/pages/GraphExplorerPage').then(m => ({ default: m.GraphExplorerPage }))); -const CodexLensManagerPage = lazy(() => import('@/pages/CodexLensManagerPage').then(m => ({ default: m.CodexLensManagerPage }))); const ApiSettingsPage = lazy(() => import('@/pages/ApiSettingsPage').then(m => ({ default: m.ApiSettingsPage }))); const CliViewerPage = lazy(() => import('@/pages/CliViewerPage').then(m => ({ default: m.CliViewerPage }))); const CliSessionSharePage = lazy(() => import('@/pages/CliSessionSharePage').then(m => ({ default: m.CliSessionSharePage }))); @@ -170,10 +169,6 @@ const routes: RouteObject[] = [ path: 'settings/specs', element: withErrorHandling(), }, - { - path: 'settings/codexlens', - element: withErrorHandling(), - }, { path: 'api-settings', element: withErrorHandling(), @@ -260,7 +255,6 @@ export const ROUTES = { ENDPOINTS: '/settings/endpoints', INSTALLATIONS: '/settings/installations', SETTINGS_RULES: '/settings/rules', - CODEXLENS_MANAGER: '/settings/codexlens', API_SETTINGS: '/api-settings', EXPLORER: '/explorer', GRAPH: '/graph', diff --git a/ccw/frontend/src/test/i18n.tsx b/ccw/frontend/src/test/i18n.tsx index 9ff9b018..1f2334d7 100644 --- a/ccw/frontend/src/test/i18n.tsx +++ b/ccw/frontend/src/test/i18n.tsx @@ -172,8 +172,6 @@ const mockMessages: Record> = { 'mcp.ccw.tools.core_memory.desc': 'Core memory management', 'mcp.ccw.tools.ask_question.name': 'Ask Question', 'mcp.ccw.tools.ask_question.desc': 'Interactive questions (A2UI)', - 'mcp.ccw.tools.smart_search.name': 'Smart Search', - 'mcp.ccw.tools.smart_search.desc': 'Intelligent code search', 'mcp.ccw.tools.team_msg.name': 'Team Message', 'mcp.ccw.tools.team_msg.desc': 'Agent team message bus', 'mcp.ccw.paths.label': 'Paths', @@ -348,8 +346,6 @@ const mockMessages: Record> = { 'mcp.ccw.tools.core_memory.desc': '核心记忆管理', 'mcp.ccw.tools.ask_question.name': '提问', 'mcp.ccw.tools.ask_question.desc': '交互式问题(A2UI)', - 'mcp.ccw.tools.smart_search.name': '智能搜索', - 'mcp.ccw.tools.smart_search.desc': '智能代码搜索', 'mcp.ccw.tools.team_msg.name': '团队消息', 'mcp.ccw.tools.team_msg.desc': '代理团队消息总线', 'mcp.ccw.paths.label': '路径', diff --git a/ccw/frontend/src/types/index.ts b/ccw/frontend/src/types/index.ts index 732eab8c..d004973a 100644 --- a/ccw/frontend/src/types/index.ts +++ b/ccw/frontend/src/types/index.ts @@ -40,9 +40,6 @@ export type { NotificationState, NotificationActions, NotificationStore, - // Index Manager - IndexStatus, - IndexRebuildRequest, // Rules Rule, RuleCreateInput, diff --git a/ccw/scripts/IMPLEMENTATION-SUMMARY.md b/ccw/scripts/IMPLEMENTATION-SUMMARY.md deleted file mode 100644 index 8dbfc99b..00000000 --- a/ccw/scripts/IMPLEMENTATION-SUMMARY.md +++ /dev/null @@ -1,226 +0,0 @@ -# Memory Embedder Implementation Summary - -## Overview - -Created a Python script (`memory_embedder.py`) that bridges CCW to CodexLens semantic search by generating and searching embeddings for memory chunks stored in CCW's SQLite database. - -## Files Created - -### 1. `memory_embedder.py` (Main Script) -**Location**: `D:\Claude_dms3\ccw\scripts\memory_embedder.py` - -**Features**: -- Reuses CodexLens embedder: `from codexlens.semantic.embedder import get_embedder` -- Uses jina-embeddings-v2-base-code (768 dimensions) -- Three commands: `embed`, `search`, `status` -- JSON output for easy integration -- Batch processing for efficiency -- Graceful error handling - -**Commands**: - -1. **embed** - Generate embeddings - ```bash - python memory_embedder.py embed [options] - Options: - --source-id ID # Only process specific source - --batch-size N # Batch size (default: 8) - --force # Re-embed existing chunks - ``` - -2. **search** - Semantic search - ```bash - python memory_embedder.py search [options] - Options: - --top-k N # Number of results (default: 10) - --min-score F # Minimum score (default: 0.3) - --type TYPE # Filter by source type - ``` - -3. **status** - Get statistics - ```bash - python memory_embedder.py status - ``` - -### 2. `README-memory-embedder.md` (Documentation) -**Location**: `D:\Claude_dms3\ccw\scripts\README-memory-embedder.md` - -**Contents**: -- Feature overview -- Requirements and installation -- Detailed usage examples -- Database path reference -- TypeScript integration guide -- Performance metrics -- Source type descriptions - -### 3. `memory-embedder-example.ts` (Integration Example) -**Location**: `D:\Claude_dms3\ccw\scripts\memory-embedder-example.ts` - -**Exported Functions**: -- `embedChunks(dbPath, options)` - Generate embeddings -- `searchMemory(dbPath, query, options)` - Semantic search -- `getEmbeddingStatus(dbPath)` - Get status - -**Example Usage**: -```typescript -import { searchMemory, embedChunks, getEmbeddingStatus } from './memory-embedder-example'; - -// Check status -const status = getEmbeddingStatus(dbPath); - -// Generate embeddings -const result = embedChunks(dbPath, { batchSize: 16 }); - -// Search -const matches = searchMemory(dbPath, 'authentication', { - topK: 5, - minScore: 0.5, - sourceType: 'workflow' -}); -``` - -## Technical Implementation - -### Database Schema -Uses existing `memory_chunks` table: -```sql -CREATE TABLE memory_chunks ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - source_id TEXT NOT NULL, - source_type TEXT NOT NULL, - chunk_index INTEGER NOT NULL, - content TEXT NOT NULL, - embedding BLOB, - metadata TEXT, - created_at TEXT NOT NULL, - UNIQUE(source_id, chunk_index) -); -``` - -### Embedding Storage -- Format: `float32` bytes (numpy array) -- Dimension: 768 (jina-embeddings-v2-base-code) -- Storage: `np.array(emb, dtype=np.float32).tobytes()` -- Loading: `np.frombuffer(blob, dtype=np.float32)` - -### Similarity Search -- Algorithm: Cosine similarity -- Formula: `np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))` -- Default threshold: 0.3 -- Sorting: Descending by score - -### Source Types -- `core_memory`: Strategic architectural context -- `workflow`: Session-based development history -- `cli_history`: Command execution logs - -### Restore Commands -Generated automatically for each match: -- core_memory/cli_history: `ccw memory export ` -- workflow: `ccw session resume ` - -## Dependencies - -### Required -- `numpy`: Array operations and cosine similarity -- `codex-lens[semantic]`: Embedding generation - -### Installation -```bash -pip install numpy codex-lens[semantic] -``` - -## Testing - -### Script Validation -```bash -# Syntax check -python -m py_compile scripts/memory_embedder.py # OK - -# Help output -python scripts/memory_embedder.py --help # Works -python scripts/memory_embedder.py embed --help # Works -python scripts/memory_embedder.py search --help # Works -python scripts/memory_embedder.py status --help # Works - -# Status test -python scripts/memory_embedder.py status # Works -``` - -### Error Handling -- Missing database: FileNotFoundError with clear message -- Missing CodexLens: ImportError with installation instructions -- Missing numpy: ImportError with installation instructions -- Database errors: JSON error response with success=false -- Missing table: Graceful error with JSON output - -## Performance - -- **Embedding speed**: ~8 chunks/second (batch size 8) -- **Search speed**: ~0.1-0.5 seconds for 1000 chunks -- **Model loading**: ~0.8 seconds (cached after first use via CodexLens singleton) -- **Batch processing**: Configurable batch size (default: 8) - -## Output Format - -All commands output JSON for easy parsing: - -### Embed Result -```json -{ - "success": true, - "chunks_processed": 50, - "chunks_failed": 0, - "elapsed_time": 12.34 -} -``` - -### Search Result -```json -{ - "success": true, - "matches": [ - { - "source_id": "WFS-20250101-auth", - "source_type": "workflow", - "chunk_index": 2, - "content": "Implemented JWT...", - "score": 0.8542, - "restore_command": "ccw session resume WFS-20250101-auth" - } - ] -} -``` - -### Status Result -```json -{ - "total_chunks": 150, - "embedded_chunks": 100, - "pending_chunks": 50, - "by_type": { - "core_memory": {"total": 80, "embedded": 60, "pending": 20} - } -} -``` - -## Next Steps - -1. **TypeScript Integration**: Add to CCW's core memory routes -2. **CLI Command**: Create `ccw memory search` command -3. **Automatic Embedding**: Trigger embedding on memory creation -4. **Index Management**: Add rebuild/optimize commands -5. **Cluster Search**: Integrate with session clusters - -## Code Quality - -- ✅ Single responsibility per function -- ✅ Clear, descriptive naming -- ✅ Explicit error handling -- ✅ No premature abstractions -- ✅ Minimal debug output (essential logging only) -- ✅ ASCII-only characters (no emojis) -- ✅ GBK encoding compatible -- ✅ Type hints for all functions -- ✅ Comprehensive docstrings diff --git a/ccw/scripts/QUICK-REFERENCE.md b/ccw/scripts/QUICK-REFERENCE.md deleted file mode 100644 index 0ef204ea..00000000 --- a/ccw/scripts/QUICK-REFERENCE.md +++ /dev/null @@ -1,135 +0,0 @@ -# Memory Embedder - Quick Reference - -## Installation - -```bash -pip install numpy codex-lens[semantic] -``` - -## Commands - -### Status -```bash -python scripts/memory_embedder.py status -``` - -### Embed All -```bash -python scripts/memory_embedder.py embed -``` - -### Embed Specific Source -```bash -python scripts/memory_embedder.py embed --source-id CMEM-20250101-120000 -``` - -### Re-embed (Force) -```bash -python scripts/memory_embedder.py embed --force -``` - -### Search -```bash -python scripts/memory_embedder.py search "authentication flow" -``` - -### Advanced Search -```bash -python scripts/memory_embedder.py search "rate limiting" \ - --top-k 5 \ - --min-score 0.5 \ - --type workflow -``` - -## Database Path - -Find your database: -```bash -# Linux/Mac -~/.ccw/projects//core-memory/core_memory.db - -# Windows -%USERPROFILE%\.ccw\projects\\core-memory\core_memory.db -``` - -## TypeScript Integration - -```typescript -import { execSync } from 'child_process'; - -// Status -const status = JSON.parse( - execSync(`python scripts/memory_embedder.py status "${dbPath}"`, { - encoding: 'utf-8' - }) -); - -// Embed -const result = JSON.parse( - execSync(`python scripts/memory_embedder.py embed "${dbPath}"`, { - encoding: 'utf-8' - }) -); - -// Search -const matches = JSON.parse( - execSync( - `python scripts/memory_embedder.py search "${dbPath}" "query"`, - { encoding: 'utf-8' } - ) -); -``` - -## Output Examples - -### Status -```json -{ - "total_chunks": 150, - "embedded_chunks": 100, - "pending_chunks": 50, - "by_type": { - "core_memory": {"total": 80, "embedded": 60, "pending": 20} - } -} -``` - -### Embed -```json -{ - "success": true, - "chunks_processed": 50, - "chunks_failed": 0, - "elapsed_time": 12.34 -} -``` - -### Search -```json -{ - "success": true, - "matches": [ - { - "source_id": "WFS-20250101-auth", - "source_type": "workflow", - "chunk_index": 2, - "content": "Implemented JWT authentication...", - "score": 0.8542, - "restore_command": "ccw session resume WFS-20250101-auth" - } - ] -} -``` - -## Source Types - -- `core_memory` - Strategic architectural context -- `workflow` - Session-based development history -- `cli_history` - Command execution logs - -## Performance - -- Embedding: ~8 chunks/second -- Search: ~0.1-0.5s for 1000 chunks -- Model load: ~0.8s (cached) -- Batch size: 8 (default, configurable) diff --git a/ccw/scripts/README-memory-embedder.md b/ccw/scripts/README-memory-embedder.md deleted file mode 100644 index 0954aeb0..00000000 --- a/ccw/scripts/README-memory-embedder.md +++ /dev/null @@ -1,157 +0,0 @@ -# Memory Embedder - -Bridge CCW to CodexLens semantic search by generating and searching embeddings for memory chunks. - -## Features - -- **Generate embeddings** for memory chunks using CodexLens's jina-embeddings-v2-base-code (768 dim) -- **Semantic search** across all memory types (core_memory, workflow, cli_history) -- **Status tracking** to monitor embedding progress -- **Batch processing** for efficient embedding generation -- **Restore commands** included in search results - -## Requirements - -```bash -pip install numpy codex-lens[semantic] -``` - -## Usage - -### 1. Check Status - -```bash -python scripts/memory_embedder.py status -``` - -Example output: -```json -{ - "total_chunks": 150, - "embedded_chunks": 100, - "pending_chunks": 50, - "by_type": { - "core_memory": {"total": 80, "embedded": 60, "pending": 20}, - "workflow": {"total": 50, "embedded": 30, "pending": 20}, - "cli_history": {"total": 20, "embedded": 10, "pending": 10} - } -} -``` - -### 2. Generate Embeddings - -Embed all unembedded chunks: -```bash -python scripts/memory_embedder.py embed -``` - -Embed specific source: -```bash -python scripts/memory_embedder.py embed --source-id CMEM-20250101-120000 -``` - -Re-embed all chunks (force): -```bash -python scripts/memory_embedder.py embed --force -``` - -Adjust batch size (default 8): -```bash -python scripts/memory_embedder.py embed --batch-size 16 -``` - -Example output: -```json -{ - "success": true, - "chunks_processed": 50, - "chunks_failed": 0, - "elapsed_time": 12.34 -} -``` - -### 3. Semantic Search - -Basic search: -```bash -python scripts/memory_embedder.py search "authentication flow" -``` - -Advanced search: -```bash -python scripts/memory_embedder.py search "rate limiting" \ - --top-k 5 \ - --min-score 0.5 \ - --type workflow -``` - -Example output: -```json -{ - "success": true, - "matches": [ - { - "source_id": "WFS-20250101-auth", - "source_type": "workflow", - "chunk_index": 2, - "content": "Implemented JWT-based authentication...", - "score": 0.8542, - "restore_command": "ccw session resume WFS-20250101-auth" - } - ] -} -``` - -## Database Path - -The database is located in CCW's storage directory: - -- **Windows**: `%USERPROFILE%\.ccw\projects\\core-memory\core_memory.db` -- **Linux/Mac**: `~/.ccw/projects//core-memory/core_memory.db` - -Find your project's database: -```bash -ccw memory list # Shows project path -# Then look in: ~/.ccw/projects//core-memory/core_memory.db -``` - -## Integration with CCW - -This script is designed to be called from CCW's TypeScript code: - -```typescript -import { execSync } from 'child_process'; - -// Embed chunks -const result = execSync( - `python scripts/memory_embedder.py embed ${dbPath}`, - { encoding: 'utf-8' } -); -const { success, chunks_processed } = JSON.parse(result); - -// Search -const searchResult = execSync( - `python scripts/memory_embedder.py search ${dbPath} "${query}" --top-k 10`, - { encoding: 'utf-8' } -); -const { matches } = JSON.parse(searchResult); -``` - -## Performance - -- **Embedding speed**: ~8 chunks/second (batch size 8) -- **Search speed**: ~0.1-0.5 seconds for 1000 chunks -- **Model loading**: ~0.8 seconds (cached after first use) - -## Source Types - -- `core_memory`: Strategic architectural context -- `workflow`: Session-based development history -- `cli_history`: Command execution logs - -## Restore Commands - -Search results include restore commands: - -- **core_memory/cli_history**: `ccw memory export ` -- **workflow**: `ccw session resume ` diff --git a/ccw/scripts/memory-embedder-example.ts b/ccw/scripts/memory-embedder-example.ts deleted file mode 100644 index 32998fa0..00000000 --- a/ccw/scripts/memory-embedder-example.ts +++ /dev/null @@ -1,184 +0,0 @@ -/** - * Example: Using Memory Embedder from TypeScript - * - * This shows how to integrate the Python memory embedder script - * into CCW's TypeScript codebase. - */ - -import { execSync } from 'child_process'; -import { join } from 'path'; - -interface EmbedResult { - success: boolean; - chunks_processed: number; - chunks_failed: number; - elapsed_time: number; -} - -interface SearchMatch { - source_id: string; - source_type: 'core_memory' | 'workflow' | 'cli_history'; - chunk_index: number; - content: string; - score: number; - restore_command: string; -} - -interface SearchResult { - success: boolean; - matches: SearchMatch[]; - error?: string; -} - -interface StatusResult { - total_chunks: number; - embedded_chunks: number; - pending_chunks: number; - by_type: Record; -} - -/** - * Get path to memory embedder script - */ -function getEmbedderScript(): string { - return join(__dirname, 'memory_embedder.py'); -} - -/** - * Execute memory embedder command - */ -function execEmbedder(args: string[]): string { - const script = getEmbedderScript(); - const command = `python "${script}" ${args.join(' ')}`; - - try { - return execSync(command, { - encoding: 'utf-8', - maxBuffer: 10 * 1024 * 1024 // 10MB buffer - }); - } catch (error: any) { - // Try to parse error output as JSON - if (error.stdout) { - return error.stdout; - } - throw new Error(`Embedder failed: ${error.message}`); - } -} - -/** - * Generate embeddings for memory chunks - */ -export function embedChunks( - dbPath: string, - options: { - sourceId?: string; - batchSize?: number; - force?: boolean; - } = {} -): EmbedResult { - const args = ['embed', `"${dbPath}"`]; - - if (options.sourceId) { - args.push('--source-id', options.sourceId); - } - if (options.batchSize) { - args.push('--batch-size', String(options.batchSize)); - } - if (options.force) { - args.push('--force'); - } - - const output = execEmbedder(args); - return JSON.parse(output); -} - -/** - * Search memory chunks semantically - */ -export function searchMemory( - dbPath: string, - query: string, - options: { - topK?: number; - minScore?: number; - sourceType?: 'core_memory' | 'workflow' | 'cli_history'; - } = {} -): SearchResult { - const args = ['search', `"${dbPath}"`, `"${query}"`]; - - if (options.topK) { - args.push('--top-k', String(options.topK)); - } - if (options.minScore !== undefined) { - args.push('--min-score', String(options.minScore)); - } - if (options.sourceType) { - args.push('--type', options.sourceType); - } - - const output = execEmbedder(args); - return JSON.parse(output); -} - -/** - * Get embedding status - */ -export function getEmbeddingStatus(dbPath: string): StatusResult { - const args = ['status', `"${dbPath}"`]; - const output = execEmbedder(args); - return JSON.parse(output); -} - -// ============================================================================ -// Example Usage -// ============================================================================ - -async function exampleUsage() { - const dbPath = join(process.env.HOME || '', '.ccw/projects/myproject/core-memory/core_memory.db'); - - // 1. Check status - console.log('Checking embedding status...'); - const status = getEmbeddingStatus(dbPath); - console.log(`Total chunks: ${status.total_chunks}`); - console.log(`Embedded: ${status.embedded_chunks}`); - console.log(`Pending: ${status.pending_chunks}`); - - // 2. Generate embeddings if needed - if (status.pending_chunks > 0) { - console.log('\nGenerating embeddings...'); - const embedResult = embedChunks(dbPath, { batchSize: 16 }); - console.log(`Processed: ${embedResult.chunks_processed}`); - console.log(`Time: ${embedResult.elapsed_time}s`); - } - - // 3. Search for relevant memories - console.log('\nSearching for authentication-related memories...'); - const searchResult = searchMemory(dbPath, 'authentication flow', { - topK: 5, - minScore: 0.5 - }); - - if (searchResult.success) { - console.log(`Found ${searchResult.matches.length} matches:`); - for (const match of searchResult.matches) { - console.log(`\n- ${match.source_id} (score: ${match.score})`); - console.log(` Type: ${match.source_type}`); - console.log(` Restore: ${match.restore_command}`); - console.log(` Content: ${match.content.substring(0, 100)}...`); - } - } - - // 4. Search specific source type - console.log('\nSearching workflows only...'); - const workflowSearch = searchMemory(dbPath, 'API implementation', { - sourceType: 'workflow', - topK: 3 - }); - - console.log(`Found ${workflowSearch.matches.length} workflow matches`); -} - -// Run example if executed directly -if (require.main === module) { - exampleUsage().catch(console.error); -} diff --git a/ccw/scripts/memory_embedder.py b/ccw/scripts/memory_embedder.py deleted file mode 100644 index e026998f..00000000 --- a/ccw/scripts/memory_embedder.py +++ /dev/null @@ -1,428 +0,0 @@ -#!/usr/bin/env python3 -""" -Memory Embedder - Bridge CCW to CodexLens semantic search - -This script generates and searches embeddings for memory chunks stored in CCW's -SQLite database using CodexLens's embedder. - -Usage: - python memory_embedder.py embed [--source-id ID] [--batch-size N] [--force] - python memory_embedder.py search [--top-k N] [--min-score F] [--type TYPE] - python memory_embedder.py status -""" - -import argparse -import json -import sqlite3 -import sys -import time -from pathlib import Path -from typing import List, Dict, Any, Optional, Tuple - -try: - import numpy as np -except ImportError: - print("Error: numpy is required. Install with: pip install numpy", file=sys.stderr) - sys.exit(1) - -try: - from codexlens.semantic.factory import get_embedder as get_embedder_factory - from codexlens.semantic.factory import clear_embedder_cache - from codexlens.config import Config as CodexLensConfig -except ImportError: - print("Error: CodexLens not found. Install with: pip install codex-lens[semantic]", file=sys.stderr) - sys.exit(1) - - -class MemoryEmbedder: - """Generate and search embeddings for memory chunks.""" - - def __init__(self, db_path: str): - """Initialize embedder with database path.""" - self.db_path = Path(db_path) - if not self.db_path.exists(): - raise FileNotFoundError(f"Database not found: {db_path}") - - self.conn = sqlite3.connect(str(self.db_path)) - self.conn.row_factory = sqlite3.Row - - # Load CodexLens configuration for embedding settings - try: - self._config = CodexLensConfig.load() - except Exception as e: - print(f"Warning: Could not load CodexLens config, using defaults. Error: {e}", file=sys.stderr) - self._config = CodexLensConfig() # Use default config - - # Lazy-load embedder to avoid ~0.8s model loading for status command - self._embedder = None - self._embedding_dim = None - - @property - def embedding_dim(self) -> int: - """Get embedding dimension from the embedder.""" - if self._embedding_dim is None: - # Access embedder to get its dimension - self._embedding_dim = self.embedder.embedding_dim - return self._embedding_dim - - @property - def embedder(self): - """Lazy-load the embedder on first access using CodexLens config.""" - if self._embedder is None: - # Use CodexLens configuration settings - backend = self._config.embedding_backend - model = self._config.embedding_model - use_gpu = self._config.embedding_use_gpu - - # Use factory to create embedder based on backend type - if backend == "fastembed": - self._embedder = get_embedder_factory( - backend="fastembed", - profile=model, - use_gpu=use_gpu - ) - elif backend == "litellm": - # For litellm backend, also pass endpoints if configured - endpoints = self._config.embedding_endpoints - strategy = self._config.embedding_strategy - cooldown = self._config.embedding_cooldown - - self._embedder = get_embedder_factory( - backend="litellm", - model=model, - endpoints=endpoints if endpoints else None, - strategy=strategy, - cooldown=cooldown, - ) - else: - # Fallback to fastembed with code profile - self._embedder = get_embedder_factory( - backend="fastembed", - profile="code", - use_gpu=True - ) - return self._embedder - - def close(self): - """Close database connection.""" - if self.conn: - self.conn.close() - - def embed_chunks( - self, - source_id: Optional[str] = None, - batch_size: int = 8, - force: bool = False - ) -> Dict[str, Any]: - """ - Generate embeddings for unembedded chunks. - - Args: - source_id: Only process chunks from this source - batch_size: Number of chunks to process in each batch - force: Re-embed chunks that already have embeddings - - Returns: - Result dict with success, chunks_processed, chunks_failed, elapsed_time - """ - start_time = time.time() - - # Build query - query = "SELECT id, source_id, source_type, chunk_index, content FROM memory_chunks" - params = [] - - if force: - # Process all chunks (with optional source filter) - if source_id: - query += " WHERE source_id = ?" - params.append(source_id) - else: - # Only process chunks without embeddings - query += " WHERE embedding IS NULL" - if source_id: - query += " AND source_id = ?" - params.append(source_id) - - query += " ORDER BY id" - - cursor = self.conn.cursor() - cursor.execute(query, params) - - chunks_processed = 0 - chunks_failed = 0 - batch = [] - batch_ids = [] - - for row in cursor: - batch.append(row["content"]) - batch_ids.append(row["id"]) - - # Process batch when full - if len(batch) >= batch_size: - processed, failed = self._process_batch(batch, batch_ids) - chunks_processed += processed - chunks_failed += failed - batch = [] - batch_ids = [] - - # Process remaining chunks - if batch: - processed, failed = self._process_batch(batch, batch_ids) - chunks_processed += processed - chunks_failed += failed - - elapsed_time = time.time() - start_time - - return { - "success": chunks_failed == 0, - "chunks_processed": chunks_processed, - "chunks_failed": chunks_failed, - "elapsed_time": round(elapsed_time, 2) - } - - def _process_batch(self, texts: List[str], ids: List[int]) -> Tuple[int, int]: - """Process a batch of texts and update embeddings.""" - try: - # Generate embeddings for batch - embeddings = self.embedder.embed(texts) - - processed = 0 - failed = 0 - - # Update database - cursor = self.conn.cursor() - for chunk_id, embedding in zip(ids, embeddings): - try: - # Convert to numpy array and store as bytes - emb_array = np.array(embedding, dtype=np.float32) - emb_bytes = emb_array.tobytes() - - cursor.execute( - "UPDATE memory_chunks SET embedding = ? WHERE id = ?", - (emb_bytes, chunk_id) - ) - processed += 1 - except Exception as e: - print(f"Error updating chunk {chunk_id}: {e}", file=sys.stderr) - failed += 1 - - self.conn.commit() - return processed, failed - - except Exception as e: - print(f"Error processing batch: {e}", file=sys.stderr) - return 0, len(ids) - - def search( - self, - query: str, - top_k: int = 10, - min_score: float = 0.3, - source_type: Optional[str] = None - ) -> Dict[str, Any]: - """ - Perform semantic search on memory chunks. - - Args: - query: Search query text - top_k: Number of results to return - min_score: Minimum similarity score (0-1) - source_type: Filter by source type (core_memory, workflow, cli_history) - - Returns: - Result dict with success and matches list - """ - try: - # Generate query embedding - query_embedding = self.embedder.embed_single(query) - query_array = np.array(query_embedding, dtype=np.float32) - - # Build database query - sql = """ - SELECT id, source_id, source_type, chunk_index, content, embedding - FROM memory_chunks - WHERE embedding IS NOT NULL - """ - params = [] - - if source_type: - sql += " AND source_type = ?" - params.append(source_type) - - cursor = self.conn.cursor() - cursor.execute(sql, params) - - # Calculate similarities - matches = [] - for row in cursor: - # Load embedding from bytes - emb_bytes = row["embedding"] - emb_array = np.frombuffer(emb_bytes, dtype=np.float32) - - # Cosine similarity - score = float( - np.dot(query_array, emb_array) / - (np.linalg.norm(query_array) * np.linalg.norm(emb_array)) - ) - - if score >= min_score: - # Generate restore command - restore_command = self._get_restore_command( - row["source_id"], - row["source_type"] - ) - - matches.append({ - "source_id": row["source_id"], - "source_type": row["source_type"], - "chunk_index": row["chunk_index"], - "content": row["content"], - "score": round(score, 4), - "restore_command": restore_command - }) - - # Sort by score and limit - matches.sort(key=lambda x: x["score"], reverse=True) - matches = matches[:top_k] - - return { - "success": True, - "matches": matches - } - - except Exception as e: - return { - "success": False, - "error": str(e), - "matches": [] - } - - def _get_restore_command(self, source_id: str, source_type: str) -> str: - """Generate restore command for a source.""" - if source_type in ("core_memory", "cli_history"): - return f"ccw memory export {source_id}" - elif source_type == "workflow": - return f"ccw session resume {source_id}" - else: - return f"# Unknown source type: {source_type}" - - def get_status(self) -> Dict[str, Any]: - """Get embedding status statistics.""" - cursor = self.conn.cursor() - - # Total chunks - cursor.execute("SELECT COUNT(*) as count FROM memory_chunks") - total_chunks = cursor.fetchone()["count"] - - # Embedded chunks - cursor.execute("SELECT COUNT(*) as count FROM memory_chunks WHERE embedding IS NOT NULL") - embedded_chunks = cursor.fetchone()["count"] - - # By type - cursor.execute(""" - SELECT - source_type, - COUNT(*) as total, - SUM(CASE WHEN embedding IS NOT NULL THEN 1 ELSE 0 END) as embedded - FROM memory_chunks - GROUP BY source_type - """) - - by_type = {} - for row in cursor: - by_type[row["source_type"]] = { - "total": row["total"], - "embedded": row["embedded"], - "pending": row["total"] - row["embedded"] - } - - return { - "total_chunks": total_chunks, - "embedded_chunks": embedded_chunks, - "pending_chunks": total_chunks - embedded_chunks, - "by_type": by_type - } - - -def main(): - """Main entry point.""" - parser = argparse.ArgumentParser( - description="Memory Embedder - Bridge CCW to CodexLens semantic search" - ) - - subparsers = parser.add_subparsers(dest="command", help="Command to execute") - subparsers.required = True - - # Embed command - embed_parser = subparsers.add_parser("embed", help="Generate embeddings for chunks") - embed_parser.add_argument("db_path", help="Path to SQLite database") - embed_parser.add_argument("--source-id", help="Only process chunks from this source") - embed_parser.add_argument("--batch-size", type=int, default=8, help="Batch size (default: 8)") - embed_parser.add_argument("--force", action="store_true", help="Re-embed existing chunks") - - # Search command - search_parser = subparsers.add_parser("search", help="Semantic search") - search_parser.add_argument("db_path", help="Path to SQLite database") - search_parser.add_argument("query", help="Search query") - search_parser.add_argument("--top-k", type=int, default=10, help="Number of results (default: 10)") - search_parser.add_argument("--min-score", type=float, default=0.3, help="Minimum score (default: 0.3)") - search_parser.add_argument("--type", dest="source_type", help="Filter by source type") - - # Status command - status_parser = subparsers.add_parser("status", help="Get embedding status") - status_parser.add_argument("db_path", help="Path to SQLite database") - - args = parser.parse_args() - - try: - embedder = MemoryEmbedder(args.db_path) - - if args.command == "embed": - result = embedder.embed_chunks( - source_id=args.source_id, - batch_size=args.batch_size, - force=args.force - ) - print(json.dumps(result, indent=2)) - - elif args.command == "search": - result = embedder.search( - query=args.query, - top_k=args.top_k, - min_score=args.min_score, - source_type=args.source_type - ) - print(json.dumps(result, indent=2)) - - elif args.command == "status": - result = embedder.get_status() - print(json.dumps(result, indent=2)) - - embedder.close() - - # Exit with error code if operation failed - if "success" in result and not result["success"]: - # Clean up ONNX resources before exit - clear_embedder_cache() - sys.exit(1) - - # Clean up ONNX resources to ensure process can exit cleanly - # This releases fastembed/ONNX Runtime threads that would otherwise - # prevent the Python interpreter from shutting down - clear_embedder_cache() - - except Exception as e: - # Clean up ONNX resources even on error - try: - clear_embedder_cache() - except Exception: - pass - print(json.dumps({ - "success": False, - "error": str(e) - }, indent=2), file=sys.stderr) - sys.exit(1) - - -if __name__ == "__main__": - main() diff --git a/ccw/scripts/test_memory_embedder.py b/ccw/scripts/test_memory_embedder.py deleted file mode 100644 index b78a0c40..00000000 --- a/ccw/scripts/test_memory_embedder.py +++ /dev/null @@ -1,245 +0,0 @@ -#!/usr/bin/env python3 -""" -Test script for memory_embedder.py - -Creates a temporary database with test data and verifies all commands work. -""" - -import json -import sqlite3 -import tempfile -import subprocess -from pathlib import Path -from datetime import datetime - - -def create_test_database(): - """Create a temporary database with test chunks.""" - # Create temp file - temp_db = tempfile.NamedTemporaryFile(suffix='.db', delete=False) - temp_db.close() - - conn = sqlite3.connect(temp_db.name) - cursor = conn.cursor() - - # Create schema - cursor.execute(""" - CREATE TABLE memory_chunks ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - source_id TEXT NOT NULL, - source_type TEXT NOT NULL, - chunk_index INTEGER NOT NULL, - content TEXT NOT NULL, - embedding BLOB, - metadata TEXT, - created_at TEXT NOT NULL, - UNIQUE(source_id, chunk_index) - ) - """) - - # Insert test data - test_chunks = [ - ("CMEM-20250101-001", "core_memory", 0, "Implemented authentication using JWT tokens with refresh mechanism"), - ("CMEM-20250101-001", "core_memory", 1, "Added rate limiting to API endpoints using Redis"), - ("WFS-20250101-auth", "workflow", 0, "Created login endpoint with password hashing"), - ("WFS-20250101-auth", "workflow", 1, "Implemented session management with token rotation"), - ("CLI-20250101-001", "cli_history", 0, "Executed database migration for user table"), - ] - - now = datetime.now().isoformat() - for source_id, source_type, chunk_index, content in test_chunks: - cursor.execute( - """ - INSERT INTO memory_chunks (source_id, source_type, chunk_index, content, created_at) - VALUES (?, ?, ?, ?, ?) - """, - (source_id, source_type, chunk_index, content, now) - ) - - conn.commit() - conn.close() - - return temp_db.name - - -def run_command(args): - """Run memory_embedder.py with given arguments.""" - script = Path(__file__).parent / "memory_embedder.py" - cmd = ["python", str(script)] + args - - result = subprocess.run( - cmd, - capture_output=True, - text=True - ) - - return result.returncode, result.stdout, result.stderr - - -def test_status(db_path): - """Test status command.""" - print("Testing status command...") - returncode, stdout, stderr = run_command(["status", db_path]) - - if returncode != 0: - print(f"[FAIL] Status failed: {stderr}") - return False - - result = json.loads(stdout) - expected_total = 5 - - if result["total_chunks"] != expected_total: - print(f"[FAIL] Expected {expected_total} chunks, got {result['total_chunks']}") - return False - - if result["embedded_chunks"] != 0: - print(f"[FAIL] Expected 0 embedded chunks, got {result['embedded_chunks']}") - return False - - print(f"[PASS] Status OK: {result['total_chunks']} total, {result['embedded_chunks']} embedded") - return True - - -def test_embed(db_path): - """Test embed command.""" - print("\nTesting embed command...") - returncode, stdout, stderr = run_command(["embed", db_path, "--batch-size", "2"]) - - if returncode != 0: - print(f"[FAIL] Embed failed: {stderr}") - return False - - result = json.loads(stdout) - - if not result["success"]: - print(f"[FAIL] Embed unsuccessful") - return False - - if result["chunks_processed"] != 5: - print(f"[FAIL] Expected 5 processed, got {result['chunks_processed']}") - return False - - if result["chunks_failed"] != 0: - print(f"[FAIL] Expected 0 failed, got {result['chunks_failed']}") - return False - - print(f"[PASS] Embed OK: {result['chunks_processed']} processed in {result['elapsed_time']}s") - return True - - -def test_search(db_path): - """Test search command.""" - print("\nTesting search command...") - returncode, stdout, stderr = run_command([ - "search", db_path, "authentication JWT", - "--top-k", "3", - "--min-score", "0.3" - ]) - - if returncode != 0: - print(f"[FAIL] Search failed: {stderr}") - return False - - result = json.loads(stdout) - - if not result["success"]: - print(f"[FAIL] Search unsuccessful: {result.get('error', 'Unknown error')}") - return False - - if len(result["matches"]) == 0: - print(f"[FAIL] Expected at least 1 match, got 0") - return False - - print(f"[PASS] Search OK: {len(result['matches'])} matches found") - - # Show top match - top_match = result["matches"][0] - print(f" Top match: {top_match['source_id']} (score: {top_match['score']})") - print(f" Content: {top_match['content'][:60]}...") - - return True - - -def test_source_filter(db_path): - """Test search with source type filter.""" - print("\nTesting source type filter...") - returncode, stdout, stderr = run_command([ - "search", db_path, "authentication", - "--type", "workflow" - ]) - - if returncode != 0: - print(f"[FAIL] Filtered search failed: {stderr}") - return False - - result = json.loads(stdout) - - if not result["success"]: - print(f"[FAIL] Filtered search unsuccessful") - return False - - # Verify all matches are workflow type - for match in result["matches"]: - if match["source_type"] != "workflow": - print(f"[FAIL] Expected workflow type, got {match['source_type']}") - return False - - print(f"[PASS] Filter OK: {len(result['matches'])} workflow matches") - return True - - -def main(): - """Run all tests.""" - print("Memory Embedder Test Suite") - print("=" * 60) - - # Create test database - print("\nCreating test database...") - db_path = create_test_database() - print(f"[PASS] Database created: {db_path}") - - try: - # Run tests - tests = [ - ("Status", test_status), - ("Embed", test_embed), - ("Search", test_search), - ("Source Filter", test_source_filter), - ] - - passed = 0 - failed = 0 - - for name, test_func in tests: - try: - if test_func(db_path): - passed += 1 - else: - failed += 1 - except Exception as e: - print(f"[FAIL] {name} crashed: {e}") - failed += 1 - - # Summary - print("\n" + "=" * 60) - print(f"Results: {passed} passed, {failed} failed") - - if failed == 0: - print("[PASS] All tests passed!") - return 0 - else: - print("[FAIL] Some tests failed") - return 1 - - finally: - # Cleanup - import os - try: - os.unlink(db_path) - print(f"\n[PASS] Cleaned up test database") - except: - pass - - -if __name__ == "__main__": - exit(main()) diff --git a/ccw/scripts/unified_memory_embedder.py b/ccw/scripts/unified_memory_embedder.py deleted file mode 100644 index 9bbe0f47..00000000 --- a/ccw/scripts/unified_memory_embedder.py +++ /dev/null @@ -1,473 +0,0 @@ -#!/usr/bin/env python3 -""" -Unified Memory Embedder - Bridge CCW to CodexLens VectorStore (HNSW) - -Uses CodexLens VectorStore for HNSW-indexed vector storage and search, -replacing full-table-scan cosine similarity with sub-10ms approximate -nearest neighbor lookups. - -Protocol: JSON via stdin/stdout -Operations: embed, search, search_by_vector, status, reindex - -Usage: - echo '{"operation":"embed","store_path":"...","chunks":[...]}' | python unified_memory_embedder.py - echo '{"operation":"search","store_path":"...","query":"..."}' | python unified_memory_embedder.py - echo '{"operation":"status","store_path":"..."}' | python unified_memory_embedder.py - echo '{"operation":"reindex","store_path":"..."}' | python unified_memory_embedder.py -""" - -import json -import sys -import time -from pathlib import Path -from typing import List, Dict, Any, Optional - -try: - import numpy as np -except ImportError: - print(json.dumps({ - "success": False, - "error": "numpy is required. Install with: pip install numpy" - })) - sys.exit(1) - -try: - from codexlens.semantic.factory import get_embedder, clear_embedder_cache - from codexlens.semantic.vector_store import VectorStore - from codexlens.entities import SemanticChunk -except ImportError: - print(json.dumps({ - "success": False, - "error": "CodexLens not found. Install with: pip install codex-lens[semantic]" - })) - sys.exit(1) - - -# Valid category values for filtering -VALID_CATEGORIES = {"core_memory", "cli_history", "workflow", "entity", "pattern"} - - -class UnifiedMemoryEmbedder: - """Unified embedder backed by CodexLens VectorStore (HNSW).""" - - def __init__(self, store_path: str): - """ - Initialize with path to VectorStore database directory. - - Args: - store_path: Directory containing vectors.db and vectors.hnsw - """ - self.store_path = Path(store_path) - self.store_path.mkdir(parents=True, exist_ok=True) - - db_path = str(self.store_path / "vectors.db") - self.store = VectorStore(db_path) - - # Lazy-load embedder to avoid ~0.8s model loading for status command - self._embedder = None - - @property - def embedder(self): - """Lazy-load the embedder on first access.""" - if self._embedder is None: - self._embedder = get_embedder( - backend="fastembed", - profile="code", - use_gpu=True - ) - return self._embedder - - def embed(self, chunks: List[Dict[str, Any]], batch_size: int = 8) -> Dict[str, Any]: - """ - Embed chunks and insert into VectorStore. - - Each chunk dict must contain: - - content: str - - source_id: str - - source_type: str (e.g. "core_memory", "workflow", "cli_history") - - category: str (e.g. "core_memory", "cli_history", "workflow", "entity", "pattern") - - Optional fields: - - chunk_index: int (default 0) - - metadata: dict (additional metadata) - - Args: - chunks: List of chunk dicts to embed - batch_size: Number of chunks to embed per batch - - Returns: - Result dict with success, chunks_processed, chunks_failed, elapsed_time - """ - start_time = time.time() - chunks_processed = 0 - chunks_failed = 0 - - if not chunks: - return { - "success": True, - "chunks_processed": 0, - "chunks_failed": 0, - "elapsed_time": 0.0 - } - - # Process in batches - for i in range(0, len(chunks), batch_size): - batch = chunks[i:i + batch_size] - texts = [c["content"] for c in batch] - - try: - # Batch embed - embeddings = self.embedder.embed_to_numpy(texts) - - # Build SemanticChunks and insert - semantic_chunks = [] - for j, chunk_data in enumerate(batch): - category = chunk_data.get("category", chunk_data.get("source_type", "core_memory")) - source_id = chunk_data.get("source_id", "") - chunk_index = chunk_data.get("chunk_index", 0) - extra_meta = chunk_data.get("metadata", {}) - - # Build metadata dict for VectorStore - metadata = { - "source_id": source_id, - "source_type": chunk_data.get("source_type", ""), - "chunk_index": chunk_index, - **extra_meta - } - - sc = SemanticChunk( - content=chunk_data["content"], - embedding=embeddings[j].tolist(), - metadata=metadata - ) - semantic_chunks.append((sc, source_id, category)) - - # Insert into VectorStore - for sc, file_path, category in semantic_chunks: - try: - self.store.add_chunk(sc, file_path=file_path, category=category) - chunks_processed += 1 - except Exception as e: - print(f"Error inserting chunk: {e}", file=sys.stderr) - chunks_failed += 1 - - except Exception as e: - print(f"Error embedding batch starting at {i}: {e}", file=sys.stderr) - chunks_failed += len(batch) - - elapsed_time = time.time() - start_time - - return { - "success": chunks_failed == 0, - "chunks_processed": chunks_processed, - "chunks_failed": chunks_failed, - "elapsed_time": round(elapsed_time, 3) - } - - def search( - self, - query: str, - top_k: int = 10, - min_score: float = 0.3, - category: Optional[str] = None - ) -> Dict[str, Any]: - """ - Search VectorStore using HNSW index. - - Args: - query: Search query text - top_k: Number of results - min_score: Minimum similarity threshold - category: Optional category filter - - Returns: - Result dict with success and matches list - """ - try: - start_time = time.time() - - # Generate query embedding (embed_to_numpy accepts single string) - query_emb = self.embedder.embed_to_numpy(query)[0].tolist() - - # Search via VectorStore HNSW - results = self.store.search_similar( - query_emb, - top_k=top_k, - min_score=min_score, - category=category - ) - - elapsed_time = time.time() - start_time - - matches = [] - for result in results: - meta = result.metadata if result.metadata else {} - if isinstance(meta, str): - try: - meta = json.loads(meta) - except (json.JSONDecodeError, TypeError): - meta = {} - - matches.append({ - "content": result.content or result.excerpt or "", - "score": round(float(result.score), 4), - "source_id": meta.get("source_id", result.path or ""), - "source_type": meta.get("source_type", ""), - "chunk_index": meta.get("chunk_index", 0), - "category": meta.get("category", ""), - "metadata": meta - }) - - return { - "success": True, - "matches": matches, - "elapsed_time": round(elapsed_time, 3), - "total_searched": len(results) - } - - except Exception as e: - return { - "success": False, - "matches": [], - "error": str(e) - } - - def search_by_vector( - self, - vector: List[float], - top_k: int = 10, - min_score: float = 0.3, - category: Optional[str] = None - ) -> Dict[str, Any]: - """ - Search VectorStore using a pre-computed embedding vector (no re-embedding). - - Args: - vector: Pre-computed embedding vector (list of floats) - top_k: Number of results - min_score: Minimum similarity threshold - category: Optional category filter - - Returns: - Result dict with success and matches list - """ - try: - start_time = time.time() - - # Search via VectorStore HNSW directly with provided vector - results = self.store.search_similar( - vector, - top_k=top_k, - min_score=min_score, - category=category - ) - - elapsed_time = time.time() - start_time - - matches = [] - for result in results: - meta = result.metadata if result.metadata else {} - if isinstance(meta, str): - try: - meta = json.loads(meta) - except (json.JSONDecodeError, TypeError): - meta = {} - - matches.append({ - "content": result.content or result.excerpt or "", - "score": round(float(result.score), 4), - "source_id": meta.get("source_id", result.path or ""), - "source_type": meta.get("source_type", ""), - "chunk_index": meta.get("chunk_index", 0), - "category": meta.get("category", ""), - "metadata": meta - }) - - return { - "success": True, - "matches": matches, - "elapsed_time": round(elapsed_time, 3), - "total_searched": len(results) - } - - except Exception as e: - return { - "success": False, - "matches": [], - "error": str(e) - } - - def status(self) -> Dict[str, Any]: - """ - Get VectorStore index status. - - Returns: - Status dict with total_chunks, hnsw_available, dimension, etc. - """ - try: - total_chunks = self.store.count_chunks() - hnsw_available = self.store.ann_available - hnsw_count = self.store.ann_count - dimension = self.store.dimension or 768 - - # Count per category from SQLite - categories = {} - try: - import sqlite3 - db_path = str(self.store_path / "vectors.db") - with sqlite3.connect(db_path) as conn: - rows = conn.execute( - "SELECT category, COUNT(*) FROM semantic_chunks GROUP BY category" - ).fetchall() - for row in rows: - categories[row[0] or "unknown"] = row[1] - except Exception: - pass - - return { - "success": True, - "total_chunks": total_chunks, - "hnsw_available": hnsw_available, - "hnsw_count": hnsw_count, - "dimension": dimension, - "categories": categories, - "model_config": { - "backend": "fastembed", - "profile": "code", - "dimension": 768, - "max_tokens": 8192 - } - } - - except Exception as e: - return { - "success": False, - "total_chunks": 0, - "hnsw_available": False, - "hnsw_count": 0, - "dimension": 0, - "error": str(e) - } - - def reindex(self) -> Dict[str, Any]: - """ - Rebuild HNSW index from scratch. - - Returns: - Result dict with success and timing - """ - try: - start_time = time.time() - - self.store.rebuild_ann_index() - - elapsed_time = time.time() - start_time - - return { - "success": True, - "hnsw_count": self.store.ann_count, - "elapsed_time": round(elapsed_time, 3) - } - - except Exception as e: - return { - "success": False, - "error": str(e) - } - - -def main(): - """Main entry point. Reads JSON from stdin, writes JSON to stdout.""" - try: - raw_input = sys.stdin.read() - if not raw_input.strip(): - print(json.dumps({ - "success": False, - "error": "No input provided. Send JSON via stdin." - })) - sys.exit(1) - - request = json.loads(raw_input) - except json.JSONDecodeError as e: - print(json.dumps({ - "success": False, - "error": f"Invalid JSON input: {e}" - })) - sys.exit(1) - - operation = request.get("operation") - store_path = request.get("store_path") - - if not operation: - print(json.dumps({ - "success": False, - "error": "Missing required field: operation" - })) - sys.exit(1) - - if not store_path: - print(json.dumps({ - "success": False, - "error": "Missing required field: store_path" - })) - sys.exit(1) - - try: - embedder = UnifiedMemoryEmbedder(store_path) - - if operation == "embed": - chunks = request.get("chunks", []) - batch_size = request.get("batch_size", 8) - result = embedder.embed(chunks, batch_size=batch_size) - - elif operation == "search": - query = request.get("query", "") - if not query: - result = {"success": False, "error": "Missing required field: query", "matches": []} - else: - top_k = request.get("top_k", 10) - min_score = request.get("min_score", 0.3) - category = request.get("category") - result = embedder.search(query, top_k=top_k, min_score=min_score, category=category) - - elif operation == "search_by_vector": - vector = request.get("vector", []) - if not vector: - result = {"success": False, "error": "Missing required field: vector", "matches": []} - else: - top_k = request.get("top_k", 10) - min_score = request.get("min_score", 0.3) - category = request.get("category") - result = embedder.search_by_vector(vector, top_k=top_k, min_score=min_score, category=category) - - elif operation == "status": - result = embedder.status() - - elif operation == "reindex": - result = embedder.reindex() - - else: - result = { - "success": False, - "error": f"Unknown operation: {operation}. Valid: embed, search, search_by_vector, status, reindex" - } - - print(json.dumps(result)) - - # Clean up ONNX resources to ensure process can exit cleanly - clear_embedder_cache() - - except Exception as e: - try: - clear_embedder_cache() - except Exception: - pass - print(json.dumps({ - "success": False, - "error": str(e) - })) - sys.exit(1) - - -if __name__ == "__main__": - main() diff --git a/ccw/src/core/routes/core-memory-routes.ts b/ccw/src/core/routes/core-memory-routes.ts index fdf75fc0..a05b5d00 100644 --- a/ccw/src/core/routes/core-memory-routes.ts +++ b/ccw/src/core/routes/core-memory-routes.ts @@ -3,7 +3,6 @@ import { URL } from 'url'; import { getCoreMemoryStore } from '../core-memory-store.js'; import type { CoreMemory, SessionCluster, ClusterMember, ClusterRelation } from '../core-memory-store.js'; import { getEmbeddingStatus, generateEmbeddings } from '../memory-embedder-bridge.js'; -import { checkSemanticStatus } from '../../tools/codex-lens.js'; import { MemoryJobScheduler } from '../memory-job-scheduler.js'; import type { JobStatus } from '../memory-job-scheduler.js'; import { StoragePaths } from '../../config/storage-paths.js'; @@ -781,8 +780,8 @@ export async function handleCoreMemoryRoutes(ctx: RouteContext): Promise CodexLens > Semantic page.', status: 503 }; } diff --git a/ccw/src/core/routes/mcp-routes.ts b/ccw/src/core/routes/mcp-routes.ts index 447af46d..82784a54 100644 --- a/ccw/src/core/routes/mcp-routes.ts +++ b/ccw/src/core/routes/mcp-routes.ts @@ -1084,7 +1084,35 @@ function isRecord(value: unknown): value is Record { * Handle MCP routes * @returns true if route was handled, false otherwise */ +// Seed built-in MCP templates once +let _templateSeeded = false; +function seedBuiltinTemplates(): void { + if (_templateSeeded) return; + _templateSeeded = true; + try { + McpTemplatesDb.saveTemplate({ + name: 'codexlens', + description: 'CodexLens semantic code search (vector + FTS + reranking)', + serverConfig: { + command: 'uvx', + args: ['--from', 'codexlens-search[mcp]', 'codexlens-mcp'], + env: { + CODEXLENS_EMBED_API_URL: '', + CODEXLENS_EMBED_API_KEY: '', + CODEXLENS_EMBED_API_MODEL: 'text-embedding-3-small', + CODEXLENS_EMBED_DIM: '1536', + }, + }, + category: 'code-search', + tags: ['search', 'semantic', 'code-intelligence'], + }); + } catch { + // Template may already exist — ignore upsert errors + } +} + export async function handleMcpRoutes(ctx: RouteContext): Promise { + seedBuiltinTemplates(); const { pathname, url, req, res, initialPath, handlePostRequest, broadcastToClients } = ctx; // API: Get MCP configuration (includes both Claude and Codex) @@ -1230,13 +1258,13 @@ export async function handleMcpRoutes(ctx: RouteContext): Promise { const enabledToolsRaw = envInput.enabledTools; let enabledToolsEnv: string; if (enabledToolsRaw === undefined || enabledToolsRaw === null) { - enabledToolsEnv = 'write_file,edit_file,read_file,core_memory,ask_question,smart_search'; + enabledToolsEnv = 'write_file,edit_file,read_file,core_memory,ask_question'; } else if (Array.isArray(enabledToolsRaw)) { enabledToolsEnv = enabledToolsRaw.filter((t): t is string => typeof t === 'string').join(','); } else if (typeof enabledToolsRaw === 'string') { enabledToolsEnv = enabledToolsRaw; } else { - enabledToolsEnv = 'write_file,edit_file,read_file,core_memory,ask_question,smart_search'; + enabledToolsEnv = 'write_file,edit_file,read_file,core_memory,ask_question'; } const projectRoot = typeof envInput.projectRoot === 'string' ? envInput.projectRoot : undefined; diff --git a/ccw/src/mcp-server/index.ts b/ccw/src/mcp-server/index.ts index 499596fd..396b14dd 100644 --- a/ccw/src/mcp-server/index.ts +++ b/ccw/src/mcp-server/index.ts @@ -10,8 +10,8 @@ import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js'; -import { getAllToolSchemas, executeTool, executeToolWithProgress } from '../tools/index.js'; -import type { ToolSchema, ToolResult } from '../types/tool.js'; +import { getAllToolSchemas, executeTool } from '../tools/index.js'; +import type { ToolSchema } from '../types/tool.js'; import { getProjectRoot, getAllowedDirectories, isSandboxEnabled } from '../utils/path-validator.js'; const SERVER_NAME = 'ccw-tools'; @@ -23,7 +23,7 @@ const ENV_ALLOWED_DIRS = 'CCW_ALLOWED_DIRS'; const STDIO_DISCONNECT_ERROR_CODES = new Set(['EPIPE', 'ERR_STREAM_DESTROYED']); // Default enabled tools (core set - file operations, core memory, and smart search) -const DEFAULT_TOOLS: string[] = ['write_file', 'edit_file', 'read_file', 'read_many_files', 'read_outline', 'core_memory', 'smart_search']; +const DEFAULT_TOOLS: string[] = ['write_file', 'edit_file', 'read_file', 'read_many_files', 'read_outline', 'core_memory']; /** * Get list of enabled tools from environment or defaults @@ -151,19 +151,7 @@ function createServer(): Server { } try { - // For smart_search init action, use progress-aware execution - const isInitAction = name === 'smart_search' && args?.action === 'init'; - - let result: ToolResult; - if (isInitAction) { - // Execute with progress callback that writes to stderr - result = await executeToolWithProgress(name, args || {}, (progress) => { - // Output progress to stderr (visible in terminal, doesn't interfere with JSON-RPC) - console.error(`[Progress] ${progress.percent}% - ${progress.message}`); - }); - } else { - result = await executeTool(name, args || {}); - } + const result = await executeTool(name, args || {}); if (!result.success) { return { diff --git a/ccw/src/tools/codex-lens.ts b/ccw/src/tools/codex-lens.ts deleted file mode 100644 index ffff12f1..00000000 --- a/ccw/src/tools/codex-lens.ts +++ /dev/null @@ -1,213 +0,0 @@ -/** - * CodexLens Tool - STUB (v1 removed) - * - * The v1 Python bridge has been removed. This module provides no-op stubs - * so that existing consumers compile without errors. - * Semantic search is now handled entirely by codexlens-search v2. - */ - -import type { ToolSchema, ToolResult } from '../types/tool.js'; - -// --------------------------------------------------------------------------- -// Types (kept for backward compatibility) -// --------------------------------------------------------------------------- - -interface ReadyStatus { - ready: boolean; - installed: boolean; - error?: string; - version?: string; - pythonVersion?: string; - venvPath?: string; -} - -interface SemanticStatus { - available: boolean; - backend?: string; - accelerator?: string; - providers?: string[]; - litellmAvailable?: boolean; - error?: string; -} - -interface BootstrapResult { - success: boolean; - message?: string; - error?: string; - details?: { - pythonVersion?: string; - venvPath?: string; - packagePath?: string; - installer?: 'uv' | 'pip'; - editable?: boolean; - }; -} - -interface ExecuteResult { - success: boolean; - output?: string; - error?: string; - message?: string; - warning?: string; - results?: unknown; - files?: unknown; - symbols?: unknown; -} - -interface ExecuteOptions { - timeout?: number; - cwd?: string; - onProgress?: (progress: ProgressInfo) => void; -} - -interface ProgressInfo { - stage: string; - message: string; - percent: number; - filesProcessed?: number; - totalFiles?: number; -} - -type GpuMode = 'cpu' | 'cuda' | 'directml'; - -interface PythonEnvInfo { - version: string; - majorMinor: string; - architecture: number; - compatible: boolean; - error?: string; -} - -// --------------------------------------------------------------------------- -// No-op implementations -// --------------------------------------------------------------------------- - -const V1_REMOVED = 'CodexLens v1 has been removed. Use codexlens-search v2.'; - -async function ensureReady(): Promise { - return { ready: false, installed: false, error: V1_REMOVED }; -} - -async function executeCodexLens(_args: string[], _options: ExecuteOptions = {}): Promise { - return { success: false, error: V1_REMOVED }; -} - -async function checkVenvStatus(_force?: boolean): Promise { - return { ready: false, installed: false, error: V1_REMOVED }; -} - -async function bootstrapVenv(): Promise { - return { success: false, error: V1_REMOVED }; -} - -async function checkSemanticStatus(_force?: boolean): Promise { - return { available: false, error: V1_REMOVED }; -} - -async function ensureLiteLLMEmbedderReady(): Promise { - return { success: false, error: V1_REMOVED }; -} - -async function installSemantic(_gpuMode: GpuMode = 'cpu'): Promise { - return { success: false, error: V1_REMOVED }; -} - -async function detectGpuSupport(): Promise<{ mode: GpuMode; available: GpuMode[]; info: string; pythonEnv?: PythonEnvInfo }> { - return { mode: 'cpu', available: ['cpu'], info: V1_REMOVED }; -} - -async function uninstallCodexLens(): Promise { - return { success: false, error: V1_REMOVED }; -} - -function cancelIndexing(): { success: boolean; message?: string; error?: string } { - return { success: false, error: V1_REMOVED }; -} - -function isIndexingInProgress(): boolean { - return false; -} - -async function bootstrapWithUv(_gpuMode: GpuMode = 'cpu'): Promise { - return { success: false, error: V1_REMOVED }; -} - -async function installSemanticWithUv(_gpuMode: GpuMode = 'cpu'): Promise { - return { success: false, error: V1_REMOVED }; -} - -function useCodexLensV2(): boolean { - return true; // v2 is now the only option -} - -function isCodexLensV2Installed(): boolean { - return false; -} - -async function bootstrapV2WithUv(): Promise { - return { success: false, error: V1_REMOVED }; -} - -function getVenvPythonPath(): string { - return 'python'; -} - -// --------------------------------------------------------------------------- -// Tool schema / handler (no-op) -// --------------------------------------------------------------------------- - -export const schema: ToolSchema = { - name: 'codex_lens', - description: '[REMOVED] CodexLens v1 tool has been removed. Use smart_search instead.', - inputSchema: { - type: 'object', - properties: { - action: { type: 'string', description: 'Action (v1 removed)' }, - }, - }, -}; - -export async function handler(_params: Record): Promise> { - return { - success: false, - error: V1_REMOVED, - result: { success: false, error: V1_REMOVED }, - }; -} - -// --------------------------------------------------------------------------- -// Exports -// --------------------------------------------------------------------------- - -export type { ProgressInfo, ExecuteOptions, GpuMode, PythonEnvInfo }; - -export { - ensureReady, - executeCodexLens, - checkVenvStatus, - bootstrapVenv, - checkSemanticStatus, - ensureLiteLLMEmbedderReady, - installSemantic, - detectGpuSupport, - uninstallCodexLens, - cancelIndexing, - isIndexingInProgress, - bootstrapWithUv, - installSemanticWithUv, - useCodexLensV2, - isCodexLensV2Installed, - bootstrapV2WithUv, - getVenvPythonPath, -}; - -export const __testables = {}; - -export const codexLensTool = { - name: schema.name, - description: schema.description, - parameters: schema.inputSchema, - execute: async (_params: Record) => { - return { success: false, error: V1_REMOVED }; - }, -}; diff --git a/ccw/src/tools/index.ts b/ccw/src/tools/index.ts index d009c6c0..b0c4c4d1 100644 --- a/ccw/src/tools/index.ts +++ b/ccw/src/tools/index.ts @@ -18,10 +18,7 @@ import * as generateDddDocsMod from './generate-ddd-docs.js'; import * as convertTokensToCssMod from './convert-tokens-to-css.js'; import * as sessionManagerMod from './session-manager.js'; import * as cliExecutorMod from './cli-executor.js'; -import * as smartSearchMod from './smart-search.js'; -import { executeInitWithProgress } from './smart-search.js'; -// codex_lens removed - functionality integrated into smart_search -// codex_lens_lsp removed - v1 LSP bridge removed +// codex_lens / smart_search removed - use codexlens MCP server instead import * as readFileMod from './read-file.js'; import * as readManyFilesMod from './read-many-files.js'; import * as readOutlineMod from './read-outline.js'; @@ -30,7 +27,7 @@ import * as contextCacheMod from './context-cache.js'; import * as skillContextLoaderMod from './skill-context-loader.js'; import * as askQuestionMod from './ask-question.js'; import * as teamMsgMod from './team-msg.js'; -import type { ProgressInfo } from './codex-lens.js'; + // Import legacy JS tools import { uiGeneratePreviewTool } from './ui-generate-preview.js'; @@ -272,60 +269,6 @@ function sanitizeResult(result: unknown): unknown { return result; } -/** - * Execute a tool with progress callback (for init actions) - */ -export async function executeToolWithProgress( - name: string, - params: Record = {}, - onProgress?: (progress: ProgressInfo) => void -): Promise<{ - success: boolean; - result?: unknown; - error?: string; -}> { - // For smart_search init, use special progress-aware execution - if (name === 'smart_search' && params.action === 'init') { - try { - // Notify dashboard - execution started - notifyDashboard({ - toolName: name, - status: 'started', - params: sanitizeParams(params) - }); - - const result = await executeInitWithProgress(params, onProgress); - - // Notify dashboard - execution completed - notifyDashboard({ - toolName: name, - status: 'completed', - result: sanitizeResult(result) - }); - - return { - success: result.success, - result, - error: result.error - }; - } catch (error) { - notifyDashboard({ - toolName: name, - status: 'failed', - error: (error as Error).message || 'Tool execution failed' - }); - - return { - success: false, - error: (error as Error).message || 'Tool execution failed' - }; - } - } - - // Fall back to regular execution for other tools - return executeTool(name, params); -} - /** * Get tool schema in MCP-compatible format */ @@ -363,9 +306,7 @@ registerTool(toLegacyTool(generateDddDocsMod)); registerTool(toLegacyTool(convertTokensToCssMod)); registerTool(toLegacyTool(sessionManagerMod)); registerTool(toLegacyTool(cliExecutorMod)); -registerTool(toLegacyTool(smartSearchMod)); -// codex_lens removed - functionality integrated into smart_search -// codex_lens_lsp removed - v1 LSP bridge removed +// codex_lens / smart_search removed - use codexlens MCP server instead registerTool(toLegacyTool(readFileMod)); registerTool(toLegacyTool(readManyFilesMod)); registerTool(toLegacyTool(readOutlineMod)); diff --git a/ccw/src/tools/smart-context.ts b/ccw/src/tools/smart-context.ts index ad6b20bb..56f2cf7b 100644 --- a/ccw/src/tools/smart-context.ts +++ b/ccw/src/tools/smart-context.ts @@ -4,7 +4,9 @@ * Auto-generates contextual file references for CLI execution */ -import { executeCodexLens, ensureReady as ensureCodexLensReady } from './codex-lens.js'; +// codex-lens v1 removed — no-op stubs for backward compatibility +async function ensureCodexLensReady(): Promise<{ ready: boolean }> { return { ready: false }; } +async function executeCodexLens(_args: string[], _opts?: { cwd?: string }): Promise<{ success: boolean; output?: string }> { return { success: false }; } // Options for smart context generation export interface SmartContextOptions { diff --git a/ccw/src/tools/smart-search.ts b/ccw/src/tools/smart-search.ts deleted file mode 100644 index 51d6f6ae..00000000 --- a/ccw/src/tools/smart-search.ts +++ /dev/null @@ -1,3686 +0,0 @@ -/** - * Smart Search Tool - Unified intelligent search powered by codexlens-search v2 - * - * Features: - * - Semantic search: 2-stage vector (binary coarse + ANN fine) + FTS5 + RRF fusion + reranking - * - Ripgrep fallback for fast exact/regex matching - * - File discovery via glob patterns - * - Incremental indexing with Mark-and-Filter strategy - * - File watcher for automatic index updates - * - * Actions: - * - search: Semantic search via v2 bridge with ripgrep fallback - * - init: Initialize v2 index and sync files - * - status: Check v2 index statistics - * - update: Incremental sync for changed files - * - watch: Start file watcher for automatic updates - * - find_files: Glob-based file path matching - */ - -import { z } from 'zod'; -import type { ToolSchema, ToolResult } from '../types/tool.js'; -import { spawn, spawnSync, type SpawnOptions } from 'child_process'; -import { existsSync, readFileSync, statSync } from 'fs'; -import { dirname, join, resolve } from 'path'; -import { - ensureReady as ensureCodexLensReady, - checkSemanticStatus, - ensureLiteLLMEmbedderReady, - executeCodexLens, - getVenvPythonPath, -} from './codex-lens.js'; -import { execFile } from 'child_process'; -import type { ProgressInfo } from './codex-lens.js'; -import { getProjectRoot } from '../utils/path-validator.js'; -import { getCodexLensDataDir } from '../utils/codexlens-path.js'; -import { EXEC_TIMEOUTS } from '../utils/exec-constants.js'; -import { generateRotationEndpoints } from '../config/litellm-api-config-manager.js'; -import type { RotationEndpointConfig } from '../config/litellm-api-config-manager.js'; - -// Timing utilities for performance analysis -const TIMING_ENABLED = process.env.SMART_SEARCH_TIMING === '1' || process.env.DEBUG?.includes('timing'); -const SEARCH_OUTPUT_MODES = ['full', 'files_only', 'count', 'ace'] as const; -type SearchOutputMode = typeof SEARCH_OUTPUT_MODES[number]; - -interface TimingData { - [key: string]: number; -} - -function createTimer(): { mark: (name: string) => void; getTimings: () => TimingData; log: () => void } { - const startTime = performance.now(); - const marks: { name: string; time: number }[] = []; - let lastMark = startTime; - - return { - mark(name: string) { - const now = performance.now(); - marks.push({ name, time: now - lastMark }); - lastMark = now; - }, - getTimings(): TimingData { - const timings: TimingData = {}; - marks.forEach(m => { timings[m.name] = Math.round(m.time * 100) / 100; }); - timings['_total'] = Math.round((performance.now() - startTime) * 100) / 100; - return timings; - }, - log() { - if (TIMING_ENABLED) { - const timings = this.getTimings(); - console.error(`[TIMING] smart-search: ${JSON.stringify(timings)}`); - } - } - }; -} - -// Define Zod schema for validation -const ParamsSchema = z.object({ - // Action: search (content), find_files (path/name pattern), init, status, update (incremental sync), watch - // Note: search_files is deprecated, use search with output_mode='files_only' - action: z.enum(['init', 'search', 'search_files', 'find_files', 'status', 'update', 'watch']).default('search'), - query: z.string().optional().describe('Content search query (for action="search")'), - pattern: z.string().optional().describe('Glob pattern for path matching (for action="find_files")'), - mode: z.enum(['fuzzy', 'semantic']).default('fuzzy'), - output_mode: z.enum(SEARCH_OUTPUT_MODES).default('ace'), - path: z.string().optional(), - paths: z.array(z.string()).default([]), - contextLines: z.number().default(0), - maxResults: z.number().default(5), // Default 5 with full content - includeHidden: z.boolean().default(false), - force: z.boolean().default(false).describe('Force full rebuild for action="init".'), - limit: z.number().default(5), // Default 5 with full content - extraFilesCount: z.number().default(10), // Additional file-only results - maxContentLength: z.number().default(200), // Max content length for truncation (50-2000) - offset: z.number().default(0), // NEW: Pagination offset (start_index) - // Search modifiers for ripgrep mode - regex: z.boolean().default(true), // Use regex pattern matching (default: enabled) - caseSensitive: z.boolean().default(true), // Case sensitivity (default: case-sensitive) - tokenize: z.boolean().default(true), // Tokenize multi-word queries for OR matching (default: enabled) - // File type filtering (default: code only) - excludeExtensions: z.array(z.string()).optional().describe('File extensions to exclude from results (e.g., ["md", "txt"])'), - codeOnly: z.boolean().default(true).describe('Only return code files (excludes md, txt, json, yaml, xml, etc.). Default: true'), - withDoc: z.boolean().default(false).describe('Include documentation files (md, txt, rst, etc.). Overrides codeOnly when true'), - // Watcher options - debounce: z.number().default(1000).describe('Debounce interval in ms for watch action'), - // Fuzzy matching is implicit in hybrid mode (RRF fusion) -}); - -type Params = z.infer; - -// Search mode constants -const SEARCH_MODES = ['fuzzy', 'semantic'] as const; - -// Classification confidence threshold -const CONFIDENCE_THRESHOLD = 0.7; - -// File filtering configuration (ported from code-index) -const FILTER_CONFIG = { - exclude_directories: new Set([ - '.git', '.svn', '.hg', '.bzr', - 'node_modules', '__pycache__', '.venv', 'venv', 'vendor', 'bower_components', - 'dist', 'build', 'target', 'out', 'bin', 'obj', - '.idea', '.vscode', '.vs', '.sublime-workspace', - '.pytest_cache', '.coverage', '.tox', '.nyc_output', 'coverage', 'htmlcov', - '.next', '.nuxt', '.cache', '.parcel-cache', - '.DS_Store', 'Thumbs.db', - ]), - exclude_files: new Set([ - '*.tmp', '*.temp', '*.swp', '*.swo', '*.bak', '*~', '*.orig', '*.log', - 'package-lock.json', 'yarn.lock', 'pnpm-lock.yaml', 'Pipfile.lock', - ]), - // Windows device files - must use **/ pattern to match in any directory - // These cause "os error 1" on Windows when accessed - windows_device_files: new Set([ - 'nul', 'con', 'aux', 'prn', - 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9', - 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9', - ]), -}; - -function buildExcludeArgs(): string[] { - const args: string[] = []; - for (const dir of FILTER_CONFIG.exclude_directories) { - args.push('--glob', `!**/${dir}/**`); - } - for (const pattern of FILTER_CONFIG.exclude_files) { - args.push('--glob', `!${pattern}`); - } - // Windows device files need case-insensitive matching in any directory - for (const device of FILTER_CONFIG.windows_device_files) { - args.push('--glob', `!**/${device}`); - args.push('--glob', `!**/${device.toUpperCase()}`); - } - return args; -} - -/** - * Tokenize query for multi-word OR matching - * Splits on whitespace and common delimiters, filters stop words and short tokens - * @param query - The search query - * @returns Array of tokens - */ -function tokenizeQuery(query: string): string[] { - // Stop words for filtering (common English + programming keywords) - const stopWords = new Set([ - 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being', - 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', - 'should', 'may', 'might', 'must', 'can', 'to', 'of', 'in', 'for', 'on', - 'with', 'at', 'by', 'from', 'as', 'into', 'through', 'and', 'but', 'if', - 'or', 'not', 'this', 'that', 'these', 'those', 'it', 'its', 'how', 'what', - 'where', 'when', 'why', 'which', 'who', 'whom', - ]); - - // Split on whitespace and common delimiters, keep meaningful tokens - const tokens = query - .split(/[\s,;:]+/) - .map(token => token.trim()) - .filter(token => { - // Keep tokens that are: - // - At least 2 characters long - // - Not a stop word (case-insensitive) - // - Or look like identifiers (contain underscore/camelCase) - if (token.length < 2) return false; - if (stopWords.has(token.toLowerCase()) && !token.includes('_') && !/[A-Z]/.test(token)) { - return false; - } - return true; - }); - - return tokens; -} - -/** - * Score results based on token match count for ranking - * @param results - Search results - * @param tokens - Query tokens - * @returns Results with match scores - */ -function scoreByTokenMatch(results: ExactMatch[], tokens: string[]): ExactMatch[] { - if (tokens.length <= 1) return results; - - // Create case-insensitive patterns for each token - const tokenPatterns = tokens.map(t => { - const escaped = t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); - return new RegExp(escaped, 'i'); - }); - - return results.map(r => { - const content = r.content || ''; - const file = r.file || ''; - const searchText = `${file} ${content}`; - - // Count how many tokens match - let matchCount = 0; - for (const pattern of tokenPatterns) { - if (pattern.test(searchText)) { - matchCount++; - } - } - - // Calculate match ratio (0 to 1) - const matchRatio = matchCount / tokens.length; - - return { - ...r, - matchScore: matchRatio, - matchCount, - }; - }).sort((a, b) => { - // Sort by match ratio (descending), then by line number - if (b.matchScore !== a.matchScore) { - return b.matchScore - a.matchScore; - } - return (a.line || 0) - (b.line || 0); - }); -} - -interface Classification { - mode: string; - confidence: number; - reasoning: string; -} - -interface ChunkLine { - line: number; - text: string; - isMatch: boolean; -} - -interface ExactMatch { - file: string; - line: number; - column: number; - content: string; - endLine?: number; - chunkLines?: ChunkLine[]; - matchScore?: number; // Token match ratio (0-1) for multi-word queries - matchCount?: number; // Number of tokens matched -} - -interface RelationshipInfo { - type: string; // 'calls', 'imports', 'called_by', 'imported_by' - direction: 'outgoing' | 'incoming'; - target?: string; // Target symbol name (for outgoing) - source?: string; // Source symbol name (for incoming) - file: string; // File path - line?: number; // Line number -} - -interface SemanticMatch { - file: string; - line?: number; - column?: number; - score: number; - content: string; - symbol: string | null; - relationships?: RelationshipInfo[]; -} - -interface GraphMatch { - file: string; - symbols: unknown; - relationships: unknown[]; -} - -// File match for find_files action (path-based search) -interface FileMatch { - path: string; - type: 'file' | 'directory'; - name: string; // Filename only - extension?: string; // File extension (without dot) -} - -interface PaginationInfo { - offset: number; // Starting index of returned results - limit: number; // Number of results requested - total: number; // Total number of results found - has_more: boolean; // True if more results are available -} - -interface SearchSuggestion { - title: string; - command: string; - reason: string; -} - -interface SearchMetadata { - mode?: string; - backend?: string; - count?: number; - query?: string; - pattern?: string; // For find_files action - classified_as?: string; - confidence?: number; - reasoning?: string; - embeddings_coverage_percent?: number; - warning?: string; - note?: string; - index_status?: 'indexed' | 'not_indexed' | 'partial'; - fallback?: string; // Fallback mode used (e.g., 'fuzzy') - fallback_history?: string[]; - suggested_weights?: Record; - // Tokenization metadata (ripgrep mode) - tokens?: string[]; // Query tokens used for multi-word search - tokenized?: boolean; // Whether tokenization was applied - suggestions?: SearchSuggestion[]; - // Pagination metadata - pagination?: PaginationInfo; - // Performance timing data (when SMART_SEARCH_TIMING=1 or DEBUG includes 'timing') - timing?: TimingData; - // Init action specific - action?: string; - path?: string; - progress?: { - stage: string; - message: string; - percent: number; - filesProcessed?: number; - totalFiles?: number; - }; - progressHistory?: ProgressInfo[]; - api_max_workers?: number; - endpoint_count?: number; - use_gpu?: boolean; - reranker_enabled?: boolean; - reranker_backend?: string; - reranker_model?: string; - cascade_strategy?: string; - staged_stage2_mode?: string; - static_graph_enabled?: boolean; - preset?: string; -} - -interface SearchResult { - success: boolean; - results?: ExactMatch[] | SemanticMatch[] | GraphMatch[] | FileMatch[] | AceLikeOutput | unknown; - extra_files?: string[]; // Additional file paths without content - output?: string; - metadata?: SearchMetadata; - error?: string; - status?: unknown; - message?: string; -} - -interface AceLikeSection { - path: string; - line?: number; - endLine?: number; - column?: number; - score?: number; - symbol?: string | null; - snippet: string; - lines?: ChunkLine[]; -} - -interface AceLikeGroup { - path: string; - sections: AceLikeSection[]; - total_matches: number; -} - -interface AceLikeOutput { - format: 'ace'; - text: string; - groups: AceLikeGroup[]; - sections: AceLikeSection[]; - total: number; -} - -interface ModelInfo { - model_profile?: string; - model_name?: string; - embedding_dim?: number; - backend?: string; - created_at?: string; - updated_at?: string; -} - -interface CodexLensConfig { - config_file?: string; - index_dir?: string; - embedding_backend?: string; // 'fastembed' (local) or 'litellm' (api) - embedding_model?: string; - embedding_auto_embed_missing?: boolean; - reranker_enabled?: boolean; - reranker_backend?: string; // 'onnx' (local) or 'api' - reranker_model?: string; - reranker_top_k?: number; - api_max_workers?: number; - api_batch_size?: number; - cascade_strategy?: string; - staged_stage2_mode?: string; - static_graph_enabled?: boolean; -} - -interface IndexStatus { - indexed: boolean; - has_embeddings: boolean; - file_count?: number; - embeddings_coverage_percent?: number; - total_chunks?: number; - model_info?: ModelInfo | null; - config?: CodexLensConfig | null; - warning?: string; -} - -function readCodexLensSettingsSnapshot(): Partial { - const settingsPath = join(getCodexLensDataDir(), 'settings.json'); - if (!existsSync(settingsPath)) { - return {}; - } - - try { - const parsed = JSON.parse(readFileSync(settingsPath, 'utf-8')) as Record; - const embedding = (parsed.embedding ?? {}) as Record; - const reranker = (parsed.reranker ?? {}) as Record; - const api = (parsed.api ?? {}) as Record; - const cascade = (parsed.cascade ?? {}) as Record; - const staged = (parsed.staged ?? {}) as Record; - const indexing = (parsed.indexing ?? {}) as Record; - - return { - embedding_backend: normalizeEmbeddingBackend(typeof embedding.backend === 'string' ? embedding.backend : undefined), - embedding_model: typeof embedding.model === 'string' ? embedding.model : undefined, - embedding_auto_embed_missing: typeof embedding.auto_embed_missing === 'boolean' ? embedding.auto_embed_missing : undefined, - reranker_enabled: typeof reranker.enabled === 'boolean' ? reranker.enabled : undefined, - reranker_backend: typeof reranker.backend === 'string' ? reranker.backend : undefined, - reranker_model: typeof reranker.model === 'string' ? reranker.model : undefined, - reranker_top_k: typeof reranker.top_k === 'number' ? reranker.top_k : undefined, - api_max_workers: typeof api.max_workers === 'number' ? api.max_workers : undefined, - api_batch_size: typeof api.batch_size === 'number' ? api.batch_size : undefined, - cascade_strategy: typeof cascade.strategy === 'string' ? cascade.strategy : undefined, - staged_stage2_mode: typeof staged.stage2_mode === 'string' ? staged.stage2_mode : undefined, - static_graph_enabled: typeof indexing.static_graph_enabled === 'boolean' ? indexing.static_graph_enabled : undefined, - }; - } catch { - return {}; - } -} - -/** - * Strip ANSI color codes from string (for JSON parsing) - */ -function stripAnsi(str: string): string { - return str.replace(/\x1b\[[0-9;]*m/g, ''); -} - -/** Default maximum content length to return (avoid excessive output) */ -const DEFAULT_MAX_CONTENT_LENGTH = 200; -const CODEX_LENS_FTS_COMPATIBILITY_PATTERNS = [ - /UsageError:\s*Got unexpected extra arguments?/i, - /Option ['"]--method['"] does not take a value/i, - /TyperArgument\.make_metavar\(\) takes 1 positional argument but 2 were given/i, -]; - -let codexLensFtsBackendBroken = false; -const autoInitJobs = new Map(); -const autoEmbedJobs = new Map(); - -type SmartSearchRuntimeOverrides = { - checkSemanticStatus?: typeof checkSemanticStatus; - getVenvPythonPath?: typeof getVenvPythonPath; - spawnProcess?: typeof spawn; - now?: () => number; -}; - -const runtimeOverrides: SmartSearchRuntimeOverrides = {}; - -function getSemanticStatusRuntime(): typeof checkSemanticStatus { - return runtimeOverrides.checkSemanticStatus ?? checkSemanticStatus; -} - -function getVenvPythonPathRuntime(): typeof getVenvPythonPath { - return runtimeOverrides.getVenvPythonPath ?? getVenvPythonPath; -} - -function getSpawnRuntime(): typeof spawn { - return runtimeOverrides.spawnProcess ?? spawn; -} - -function getNowRuntime(): number { - return (runtimeOverrides.now ?? Date.now)(); -} - -function buildSmartSearchSpawnOptions(cwd: string, overrides: SpawnOptions = {}): SpawnOptions { - const { env, ...rest } = overrides; - return { - cwd, - shell: false, - windowsHide: true, - env: { ...process.env, PYTHONIOENCODING: 'utf-8', ...env }, - ...rest, - }; -} - -function shouldDetachBackgroundSmartSearchProcess(): boolean { - // On Windows, detached Python children can still create a transient console - // window even when windowsHide is set. Background warmup only needs to outlive - // the current request, not the MCP server process. - return process.platform !== 'win32'; -} - -/** - * Truncate content to specified length with ellipsis - * @param content - The content to truncate - * @param maxLength - Maximum length (default: 200) - */ -function truncateContent(content: string | null | undefined, maxLength: number = DEFAULT_MAX_CONTENT_LENGTH): string { - if (!content) return ''; - if (content.length <= maxLength) return content; - return content.slice(0, maxLength) + '...'; -} - -/** - * Split results into full content results and extra file-only results - * Generic function supporting both SemanticMatch and ExactMatch types - * @param allResults - All search results (must have 'file' property) - * @param fullContentLimit - Number of results with full content (default: 5) - * @param extraFilesCount - Number of additional file-only results (default: 10) - */ -function splitResultsWithExtraFiles( - allResults: T[], - fullContentLimit: number = 5, - extraFilesCount: number = 10 -): { results: T[]; extra_files: string[] } { - // First N results with full content - const results = allResults.slice(0, fullContentLimit); - - // Next M results as file paths only (deduplicated) - const extraResults = allResults.slice(fullContentLimit, fullContentLimit + extraFilesCount); - const extra_files = [...new Set(extraResults.map(r => r.file))]; - - return { results, extra_files }; -} - -interface SearchScope { - workingDirectory: string; - searchPaths: string[]; - targetFile?: string; -} - -interface RipgrepQueryModeResolution { - regex: boolean; - tokenize: boolean; - tokens: string[]; - literalFallback: boolean; - warning?: string; -} - -const GENERATED_QUERY_RE = /(? sanitizeSearchPath(item) || item); - const fallbackPath = normalizedPath || getProjectRoot(); - - try { - const resolvedPath = resolve(fallbackPath); - const stats = statSync(resolvedPath); - - if (stats.isFile()) { - return { - workingDirectory: dirname(resolvedPath), - searchPaths: normalizedPaths.length > 0 ? normalizedPaths : [resolvedPath], - targetFile: resolvedPath, - }; - } - - return { - workingDirectory: resolvedPath, - searchPaths: normalizedPaths.length > 0 ? normalizedPaths : ['.'], - }; - } catch { - return { - workingDirectory: fallbackPath, - searchPaths: normalizedPaths.length > 0 ? normalizedPaths : [normalizedPath || '.'], - }; - } -} - -function normalizeResultFilePath(filePath: string, workingDirectory: string): string { - return resolve(workingDirectory, filePath).replace(/\\/g, '/'); -} - -function filterResultsToTargetFile(results: T[], scope: SearchScope): T[] { - if (!scope.targetFile) { - return results; - } - - const normalizedTarget = scope.targetFile.replace(/\\/g, '/'); - return results.filter((result) => normalizeResultFilePath(result.file, scope.workingDirectory) === normalizedTarget); -} - -function parseCodexLensJsonOutput(output: string | undefined): any | null { - const cleanOutput = stripAnsi(output || '').trim(); - if (!cleanOutput) { - return null; - } - - const candidates = [ - cleanOutput, - ...cleanOutput.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.startsWith('{') || line.startsWith('[')), - ]; - - const firstBrace = cleanOutput.indexOf('{'); - const lastBrace = cleanOutput.lastIndexOf('}'); - if (firstBrace !== -1 && lastBrace > firstBrace) { - candidates.push(cleanOutput.slice(firstBrace, lastBrace + 1)); - } - - const firstBracket = cleanOutput.indexOf('['); - const lastBracket = cleanOutput.lastIndexOf(']'); - if (firstBracket !== -1 && lastBracket > firstBracket) { - candidates.push(cleanOutput.slice(firstBracket, lastBracket + 1)); - } - - for (const candidate of candidates) { - try { - return JSON.parse(candidate); - } catch { - continue; - } - } - - return null; -} - -function isValidRegexPattern(pattern: string): boolean { - try { - new RegExp(pattern); - return true; - } catch { - return false; - } -} - -function resolveRipgrepQueryMode(query: string, regex: boolean = true, tokenize: boolean = true): RipgrepQueryModeResolution { - const tokens = tokenize ? tokenizeQuery(query) : [query]; - - if (!regex) { - return { - regex: false, - tokenize, - tokens, - literalFallback: false, - }; - } - - const invalidTokens = tokens.filter((token) => token.length > 0 && !isValidRegexPattern(token)); - if (invalidTokens.length === 0) { - return { - regex: true, - tokenize, - tokens, - literalFallback: false, - }; - } - - const preview = truncateContent(invalidTokens[0], 40); - return { - regex: false, - tokenize, - tokens, - literalFallback: true, - warning: invalidTokens.length === 1 - ? `Query token "${preview}" is not a valid regular expression. Falling back to literal ripgrep matching.` - : 'Query contains invalid regular expression tokens. Falling back to literal ripgrep matching.', - }; -} - -function isCodexLensCliCompatibilityError(error: string | undefined): boolean { - if (!error) { - return false; - } - - const cleanError = stripAnsi(error); - return CODEX_LENS_FTS_COMPATIBILITY_PATTERNS.some((pattern) => pattern.test(cleanError)); -} - -function noteCodexLensFtsCompatibility(error: string | undefined): boolean { - if (!isCodexLensCliCompatibilityError(error)) { - return false; - } - - codexLensFtsBackendBroken = true; - return true; -} - -function shouldSurfaceCodexLensFtsCompatibilityWarning(options: { - compatibilityTriggeredThisQuery: boolean; - skipExactDueToCompatibility: boolean; - ripgrepResultCount: number; -}): boolean { - if (options.ripgrepResultCount > 0) { - return false; - } - - return options.compatibilityTriggeredThisQuery || options.skipExactDueToCompatibility; -} - -function summarizeBackendError(error: string | undefined): string { - const cleanError = stripAnsi(error || '').trim(); - if (!cleanError) { - return 'unknown error'; - } - - if (isCodexLensCliCompatibilityError(cleanError)) { - return 'CodexLens exact search CLI is incompatible with the current Typer/Click runtime'; - } - - const regexSummary = cleanError.match(/error:\s*([^\r\n]+)/i); - if (/regex parse error/i.test(cleanError) && regexSummary?.[1]) { - return `invalid regular expression (${regexSummary[1].trim()})`; - } - - const usageSummary = cleanError.match(/UsageError:\s*([^\r\n]+)/i); - if (usageSummary?.[1]) { - return usageSummary[1].trim(); - } - - const firstMeaningfulLine = cleanError - .split(/\r?\n/) - .map((line) => line.trim()) - .find((line) => line && !line.startsWith('│') && !line.startsWith('┌') && !line.startsWith('└')); - - return truncateContent(firstMeaningfulLine || cleanError, 180); -} - -function mapCodexLensSemanticMatches(data: any[], scope: SearchScope, maxContentLength: number): SemanticMatch[] { - return filterResultsToTargetFile(data.map((item: any) => { - const rawScore = item.score || 0; - const similarityScore = rawScore > 0 ? 1 / (1 + rawScore) : 1; - return { - file: item.path || item.file, - line: typeof item.line === 'number' ? item.line : undefined, - column: typeof item.column === 'number' ? item.column : undefined, - score: similarityScore, - content: truncateContent(item.content || item.excerpt, maxContentLength), - symbol: item.symbol || null, - }; - }), scope); -} - -function parsePlainTextFileMatches(output: string | undefined, scope: SearchScope): SemanticMatch[] { - const lines = stripAnsi(output || '') - .split(/\r?\n/) - .map((line) => line.trim()) - .filter(Boolean); - - const fileLines = lines.filter((line) => { - if (line.includes('RuntimeWarning:') || line.startsWith('warn(') || line.startsWith('Warning:')) { - return false; - } - - const resolvedPath = /^[a-zA-Z]:[\\/]|^\//.test(line) - ? line - : resolve(scope.workingDirectory, line); - - try { - return statSync(resolvedPath).isFile(); - } catch { - return false; - } - }); - - return filterResultsToTargetFile( - [...new Set(fileLines)].map((file, index) => ({ - file, - score: Math.max(0.1, 1 - index * 0.05), - content: '', - symbol: null, - })), - scope, - ); -} - -function hasCentralizedVectorArtifacts(indexRoot: unknown): boolean { - if (typeof indexRoot !== 'string' || !indexRoot.trim()) { - return false; - } - - const resolvedRoot = resolve(indexRoot); - return [ - join(resolvedRoot, '_vectors.hnsw'), - join(resolvedRoot, '_vectors_meta.db'), - join(resolvedRoot, '_binary_vectors.mmap'), - ].every((artifactPath) => existsSync(artifactPath)); -} - -function asObjectRecord(value: unknown): Record | undefined { - if (!value || typeof value !== 'object' || Array.isArray(value)) { - return undefined; - } - return value as Record; -} - -function asFiniteNumber(value: unknown): number | undefined { - if (typeof value !== 'number' || !Number.isFinite(value)) { - return undefined; - } - return value; -} - -function asBoolean(value: unknown): boolean | undefined { - return typeof value === 'boolean' ? value : undefined; -} - -function extractEmbeddingsStatusSummary(embeddingsData: unknown): { - coveragePercent: number; - totalChunks: number; - hasEmbeddings: boolean; -} { - const embeddings = asObjectRecord(embeddingsData) ?? {}; - const root = asObjectRecord(embeddings.root) ?? embeddings; - const centralized = asObjectRecord(embeddings.centralized); - - const totalIndexes = asFiniteNumber(root.total_indexes) - ?? asFiniteNumber(embeddings.total_indexes) - ?? 0; - const indexesWithEmbeddings = asFiniteNumber(root.indexes_with_embeddings) - ?? asFiniteNumber(embeddings.indexes_with_embeddings) - ?? 0; - const totalChunks = asFiniteNumber(root.total_chunks) - ?? asFiniteNumber(embeddings.total_chunks) - ?? 0; - const coveragePercent = asFiniteNumber(root.coverage_percent) - ?? asFiniteNumber(embeddings.coverage_percent) - ?? (totalIndexes > 0 ? (indexesWithEmbeddings / totalIndexes) * 100 : 0); - const hasEmbeddings = asBoolean(root.has_embeddings) - ?? asBoolean(centralized?.usable) - ?? (totalChunks > 0 || indexesWithEmbeddings > 0 || coveragePercent > 0); - - return { - coveragePercent, - totalChunks, - hasEmbeddings, - }; -} - -function selectEmbeddingsStatusPayload(statusData: unknown): Record { - const status = asObjectRecord(statusData) ?? {}; - return asObjectRecord(status.embeddings_status) ?? asObjectRecord(status.embeddings) ?? {}; -} - -function collectBackendError( - errors: string[], - backendName: string, - backendResult: PromiseSettledResult, -): void { - if (backendResult.status === 'rejected') { - errors.push(`${backendName}: ${summarizeBackendError(String(backendResult.reason))}`); - return; - } - - if (!backendResult.value.success) { - errors.push(`${backendName}: ${summarizeBackendError(backendResult.value.error)}`); - } -} - -function mergeWarnings(...warnings: Array): string | undefined { - const merged = [...new Set( - warnings - .filter((warning): warning is string => typeof warning === 'string' && warning.trim().length > 0) - .map((warning) => warning.trim()) - )]; - return merged.length > 0 ? merged.join(' | ') : undefined; -} - -function mergeNotes(...notes: Array): string | undefined { - const merged = [...new Set( - notes - .filter((note): note is string => typeof note === 'string' && note.trim().length > 0) - .map((note) => note.trim()) - )]; - return merged.length > 0 ? merged.join(' | ') : undefined; -} - -function mergeSuggestions(...groups: Array): SearchSuggestion[] | undefined { - const merged = new Map(); - for (const group of groups) { - for (const suggestion of group ?? []) { - if (!merged.has(suggestion.command)) { - merged.set(suggestion.command, suggestion); - } - } - } - - return merged.size > 0 ? [...merged.values()] : undefined; -} - -function formatSmartSearchCommand(action: string, pathValue: string, extraParams: Record = {}): string { - const normalizedPath = pathValue.replace(/\\/g, '/'); - const args = [`action=${JSON.stringify(action)}`, `path=${JSON.stringify(normalizedPath)}`]; - - for (const [key, value] of Object.entries(extraParams)) { - if (value === undefined) { - continue; - } - args.push(`${key}=${JSON.stringify(value)}`); - } - - return `smart_search(${args.join(', ')})`; -} - -function parseOptionalBooleanEnv(raw: string | undefined): boolean | undefined { - const normalized = raw?.trim().toLowerCase(); - if (!normalized) { - return undefined; - } - - if (['1', 'true', 'on', 'yes'].includes(normalized)) { - return true; - } - - if (['0', 'false', 'off', 'no'].includes(normalized)) { - return false; - } - - return undefined; -} - -function isAutoEmbedMissingEnabled(config: CodexLensConfig | null | undefined): boolean { - const envOverride = parseOptionalBooleanEnv(process.env.CODEXLENS_AUTO_EMBED_MISSING); - if (envOverride !== undefined) { - return envOverride; - } - - if (process.platform === 'win32') { - return false; - } - - if (typeof config?.embedding_auto_embed_missing === 'boolean') { - return config.embedding_auto_embed_missing; - } - - return true; -} - -function isAutoInitMissingEnabled(): boolean { - const envOverride = parseOptionalBooleanEnv(process.env.CODEXLENS_AUTO_INIT_MISSING); - if (envOverride !== undefined) { - return envOverride; - } - - return process.platform !== 'win32'; -} - -function getAutoEmbedMissingDisabledReason(config: CodexLensConfig | null | undefined): string { - const envOverride = parseOptionalBooleanEnv(process.env.CODEXLENS_AUTO_EMBED_MISSING); - if (envOverride === false) { - return 'Automatic embedding warmup is disabled by CODEXLENS_AUTO_EMBED_MISSING=false.'; - } - - if (config?.embedding_auto_embed_missing === false) { - return 'Automatic embedding warmup is disabled by embedding.auto_embed_missing=false.'; - } - - if (process.platform === 'win32') { - return 'Automatic embedding warmup is disabled by default on Windows even if CodexLens config resolves auto_embed_missing=true. Set CODEXLENS_AUTO_EMBED_MISSING=true to opt in.'; - } - - return 'Automatic embedding warmup is disabled.'; -} - -function getAutoInitMissingDisabledReason(): string { - const envOverride = parseOptionalBooleanEnv(process.env.CODEXLENS_AUTO_INIT_MISSING); - if (envOverride === false) { - return 'Automatic static index warmup is disabled by CODEXLENS_AUTO_INIT_MISSING=false.'; - } - - if (process.platform === 'win32') { - return 'Automatic static index warmup is disabled by default on Windows. Set CODEXLENS_AUTO_INIT_MISSING=true to opt in.'; - } - - return 'Automatic static index warmup is disabled.'; -} - -function buildIndexSuggestions(indexStatus: IndexStatus, scope: SearchScope): SearchSuggestion[] | undefined { - const suggestions: SearchSuggestion[] = []; - - if (!indexStatus.indexed) { - suggestions.push({ - title: 'Initialize index', - command: formatSmartSearchCommand('init', scope.workingDirectory), - reason: 'No CodexLens index exists for this path yet.', - }); - suggestions.push({ - title: 'Check index status', - command: formatSmartSearchCommand('status', scope.workingDirectory), - reason: 'Verify whether the target path is mapped to the expected CodexLens project root.', - }); - return suggestions; - } - - if (!indexStatus.has_embeddings) { - suggestions.push({ - title: 'Generate embeddings', - command: formatSmartSearchCommand('embed', scope.workingDirectory), - reason: 'The index exists, but semantic/vector retrieval is unavailable until embeddings are generated.', - }); - } else if ((indexStatus.embeddings_coverage_percent ?? 0) < 50) { - suggestions.push({ - title: 'Rebuild embeddings', - command: formatSmartSearchCommand('embed', scope.workingDirectory, { force: true }), - reason: `Embedding coverage is only ${(indexStatus.embeddings_coverage_percent ?? 0).toFixed(1)}%, so semantic search quality is degraded.`, - }); - } - - if (indexStatus.warning?.includes('Failed to parse index status')) { - suggestions.push({ - title: 'Re-check status', - command: formatSmartSearchCommand('status', scope.workingDirectory), - reason: 'The index health payload could not be parsed cleanly.', - }); - } - - return suggestions.length > 0 ? suggestions : undefined; -} - -/** - * Check if CodexLens index exists for current directory - * @param path - Directory path to check - * @returns Index status - */ -async function checkIndexStatus(path: string = '.'): Promise { - const scope = resolveSearchScope(path); - try { - // Fetch both status and config in parallel - const [statusResult, configResult] = await Promise.all([ - executeCodexLens(['index', 'status', scope.workingDirectory], { cwd: scope.workingDirectory }), - executeCodexLens(['config', '--json'], { cwd: scope.workingDirectory }), - ]); - - // Parse config - const settingsConfig = readCodexLensSettingsSnapshot(); - let config: CodexLensConfig | null = Object.keys(settingsConfig).length > 0 ? { ...settingsConfig } : null; - if (configResult.success && configResult.output) { - try { - const cleanConfigOutput = stripAnsi(configResult.output); - const parsedConfig = JSON.parse(cleanConfigOutput); - const configData = parsedConfig.result || parsedConfig; - config = { - ...settingsConfig, - config_file: configData.config_file, - index_dir: configData.index_dir, - embedding_backend: normalizeEmbeddingBackend(configData.embedding_backend) ?? settingsConfig.embedding_backend, - embedding_model: typeof configData.embedding_model === 'string' ? configData.embedding_model : settingsConfig.embedding_model, - embedding_auto_embed_missing: typeof configData.embedding_auto_embed_missing === 'boolean' - ? configData.embedding_auto_embed_missing - : settingsConfig.embedding_auto_embed_missing, - reranker_enabled: typeof configData.reranker_enabled === 'boolean' ? configData.reranker_enabled : settingsConfig.reranker_enabled, - reranker_backend: typeof configData.reranker_backend === 'string' ? configData.reranker_backend : settingsConfig.reranker_backend, - reranker_model: typeof configData.reranker_model === 'string' ? configData.reranker_model : settingsConfig.reranker_model, - reranker_top_k: typeof configData.reranker_top_k === 'number' ? configData.reranker_top_k : settingsConfig.reranker_top_k, - }; - } catch { - // Config parse failed, continue without it - } - } - - if (!statusResult.success) { - return { - indexed: false, - has_embeddings: false, - config, - warning: 'No CodexLens index found. Run smart_search(action="init") to create index for better search results.', - }; - } - - // Parse status output - try { - // Strip ANSI color codes from JSON output - const cleanOutput = stripAnsi(statusResult.output || '{}'); - const parsed = JSON.parse(cleanOutput); - // Handle both direct and nested response formats (status returns {success, result: {...}}) - const status = parsed.result || parsed; - - // Get embeddings coverage from comprehensive status - const embeddingsData = selectEmbeddingsStatusPayload(status); - const legacyEmbeddingsData = asObjectRecord(status.embeddings) ?? {}; - const embeddingsSummary = extractEmbeddingsStatusSummary(embeddingsData); - const totalIndexes = Number(legacyEmbeddingsData.total_indexes || asObjectRecord(embeddingsData)?.total_indexes || 0); - const embeddingsCoverage = embeddingsSummary.coveragePercent; - const totalChunks = embeddingsSummary.totalChunks; - const indexed = Boolean(status.projects_count > 0 || status.total_files > 0 || status.index_root || totalIndexes > 0 || totalChunks > 0); - const has_embeddings = embeddingsSummary.hasEmbeddings; - - // Extract model info if available - const modelInfoData = asObjectRecord(embeddingsData.model_info); - const modelInfo: ModelInfo | undefined = modelInfoData ? { - model_profile: typeof modelInfoData.model_profile === 'string' ? modelInfoData.model_profile : undefined, - model_name: typeof modelInfoData.model_name === 'string' ? modelInfoData.model_name : undefined, - embedding_dim: typeof modelInfoData.embedding_dim === 'number' ? modelInfoData.embedding_dim : undefined, - backend: typeof modelInfoData.backend === 'string' ? modelInfoData.backend : undefined, - created_at: typeof modelInfoData.created_at === 'string' ? modelInfoData.created_at : undefined, - updated_at: typeof modelInfoData.updated_at === 'string' ? modelInfoData.updated_at : undefined, - } : undefined; - - let warning: string | undefined; - if (!indexed) { - warning = 'No CodexLens index found. Run smart_search(action="init") to create index for better search results.'; - } else if (embeddingsCoverage === 0) { - warning = 'Index exists but no embeddings generated. Run smart_search(action="embed") to build the vector index.'; - } else if (embeddingsCoverage < 50) { - warning = `Embeddings coverage is ${embeddingsCoverage.toFixed(1)}% (below 50%). Hybrid search will degrade. Run smart_search(action="embed") to improve vector coverage.`; - } - - return { - indexed, - has_embeddings, - file_count: status.total_files, - embeddings_coverage_percent: embeddingsCoverage, - total_chunks: totalChunks, - // Ensure model_info is null instead of undefined so it's included in JSON - model_info: modelInfo ?? null, - config, - warning, - }; - } catch { - return { - indexed: false, - has_embeddings: false, - config, - warning: 'Failed to parse index status', - }; - } - } catch { - return { - indexed: false, - has_embeddings: false, - warning: 'CodexLens not available', - }; - } -} - -/** - * Detection heuristics for intent classification - */ - -/** - * Detect literal string query (simple alphanumeric or quoted strings) - */ -function detectLiteral(query: string): boolean { - return /^[a-zA-Z0-9_-]+$/.test(query) || /^["'].*["']$/.test(query); -} - -/** - * Detect regex pattern (contains regex metacharacters) - */ -function detectRegex(query: string): boolean { - return /[.*+?^${}()|[\]\\]/.test(query); -} - -/** - * Detect natural language query (sentence structure, questions, multi-word phrases) - */ -function detectNaturalLanguage(query: string): boolean { - return query.split(/\s+/).length >= 3 || /\?$/.test(query); -} - -/** - * Detect file path query (path separators, file extensions) - */ -function detectFilePath(query: string): boolean { - return /[/\\]/.test(query) || /\.[a-z]{2,4}$/i.test(query); -} - -/** - * Detect relationship query (import, export, dependency keywords) - */ -function detectRelationship(query: string): boolean { - return /(import|export|uses?|depends?|calls?|extends?)\s/i.test(query); -} - -function looksLikeCodeQuery(query: string): boolean { - if (/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(query)) return true; - if (/[:.<>\-=(){}[\]]/.test(query) && query.split(/\s+/).length <= 2) return true; - if (/\.\*|\\\(|\\\[|\\s/.test(query)) return true; - if (/^[a-zA-Z_][a-zA-Z0-9_]*\.[a-zA-Z_][a-zA-Z0-9_]*$/.test(query)) return true; - return false; -} - -function queryTargetsGeneratedFiles(query: string): boolean { - return GENERATED_QUERY_RE.test(query.trim()); -} - -function prefersLexicalPriorityQuery(query: string): boolean { - const trimmed = query.trim(); - if (!trimmed) return false; - if (ENV_STYLE_QUERY_RE.test(trimmed)) return true; - - const tokens = new Set((trimmed.match(TOPIC_TOKEN_RE) ?? []).map((token) => token.toLowerCase())); - if (tokens.size === 0) return false; - if (tokens.has('factory') || tokens.has('factories')) return true; - if ((tokens.has('environment') || tokens.has('env')) && (tokens.has('variable') || tokens.has('variables'))) { - return true; - } - if ( - tokens.has('backend') && - ['embedding', 'embeddings', 'reranker', 'rerankers', 'onnx', 'api', 'litellm', 'fastembed', 'local', 'legacy'] - .some((token) => tokens.has(token)) - ) { - return true; - } - - let surfaceHit = false; - let focusHit = false; - for (const token of tokens) { - if (LEXICAL_PRIORITY_SURFACE_TOKENS.has(token)) surfaceHit = true; - if (LEXICAL_PRIORITY_FOCUS_TOKENS.has(token)) focusHit = true; - if (surfaceHit && focusHit) return true; - } - return false; -} - -/** - * Classify query intent and recommend search mode - * Simple mapping: hybrid (NL + index + embeddings) | exact (index or insufficient embeddings) | ripgrep (no index) - * @param query - Search query string - * @param hasIndex - Whether CodexLens index exists - * @param hasSufficientEmbeddings - Whether embeddings coverage >= 50% - * @returns Classification result - */ -function classifyIntent(query: string, hasIndex: boolean = false, hasSufficientEmbeddings: boolean = false): Classification { - const isNaturalLanguage = detectNaturalLanguage(query); - const isCodeQuery = looksLikeCodeQuery(query); - const isRegexPattern = detectRegex(query); - const targetsGeneratedFiles = queryTargetsGeneratedFiles(query); - const prefersLexicalPriority = prefersLexicalPriorityQuery(query); - - let mode: string; - let confidence: number; - - if (!hasIndex) { - mode = 'ripgrep'; - confidence = 1.0; - } else if (targetsGeneratedFiles || prefersLexicalPriority || isCodeQuery || isRegexPattern) { - mode = 'exact'; - confidence = targetsGeneratedFiles ? 0.97 : prefersLexicalPriority ? 0.93 : 0.95; - } else if (isNaturalLanguage && hasSufficientEmbeddings) { - mode = 'hybrid'; - confidence = 0.9; - } else { - mode = 'exact'; - confidence = 0.8; - } - - const detectedPatterns: string[] = []; - if (detectLiteral(query)) detectedPatterns.push('literal'); - if (detectRegex(query)) detectedPatterns.push('regex'); - if (detectNaturalLanguage(query)) detectedPatterns.push('natural language'); - if (detectFilePath(query)) detectedPatterns.push('file path'); - if (detectRelationship(query)) detectedPatterns.push('relationship'); - if (targetsGeneratedFiles) detectedPatterns.push('generated artifact'); - if (prefersLexicalPriority) detectedPatterns.push('lexical priority'); - if (isCodeQuery) detectedPatterns.push('code identifier'); - - const reasoning = `Query classified as ${mode} (confidence: ${confidence.toFixed(2)}, detected: ${detectedPatterns.join(', ')}, index: ${hasIndex ? 'available' : 'not available'}, embeddings: ${hasSufficientEmbeddings ? 'sufficient' : 'insufficient'})`; - - return { mode, confidence, reasoning }; -} - -/** - * Check if a tool is available in PATH - * @param toolName - Tool executable name - * @returns True if available - */ -function checkToolAvailability( - toolName: string, - lookupRuntime: typeof spawnSync = spawnSync, -): boolean { - try { - const isWindows = process.platform === 'win32'; - const command = isWindows ? 'where' : 'which'; - const result = lookupRuntime(command, [toolName], { - shell: false, - windowsHide: true, - stdio: 'ignore', - timeout: EXEC_TIMEOUTS.SYSTEM_INFO, - env: { ...process.env, PYTHONIOENCODING: 'utf-8' }, - }); - return !result.error && result.status === 0; - } catch { - return false; - } -} - -/** - * Build ripgrep command arguments - * Supports tokenized multi-word queries with OR matching - * @param params - Search parameters - * @returns Command, arguments, and tokens used - */ -function buildRipgrepCommand(params: { - query: string; - paths: string[]; - contextLines: number; - maxResults: number; - includeHidden: boolean; - regex?: boolean; - caseSensitive?: boolean; - tokenize?: boolean; -}): { command: string; args: string[]; tokens: string[]; warning?: string; literalFallback: boolean; regex: boolean } { - const { query, paths = ['.'], contextLines = 0, maxResults = 10, includeHidden = false, regex = false, caseSensitive = true, tokenize = true } = params; - const queryMode = resolveRipgrepQueryMode(query, regex, tokenize); - - const args = [ - '-n', - '--color=never', - '--json', - ]; - - // Add file filtering (unless includeHidden is true) - if (!includeHidden) { - args.push(...buildExcludeArgs()); - } - - // Case sensitivity - if (!caseSensitive) { - args.push('--ignore-case'); - } - - if (contextLines > 0) { - args.push('-C', contextLines.toString()); - } - - if (maxResults > 0) { - args.push('--max-count', maxResults.toString()); - } - - if (includeHidden) { - args.push('--hidden'); - } - - const { tokens } = queryMode; - - if (tokens.length > 1) { - // Multi-token: use multiple -e patterns (OR matching) - // Each token is escaped for regex safety unless regex mode is enabled - for (const token of tokens) { - if (queryMode.regex) { - args.push('-e', token); - } else { - // Escape regex special chars for literal matching - const escaped = token.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); - args.push('-e', escaped); - } - } - } else { - // Single token or no tokenization: use original behavior - if (queryMode.regex) { - args.push('-e', query); - } else { - args.push('-F', query); - } - } - - args.push(...paths); - - return { - command: 'rg', - args, - tokens, - warning: queryMode.warning, - literalFallback: queryMode.literalFallback, - regex: queryMode.regex, - }; -} - -interface RipgrepChunkAccumulator { - file: string; - chunkLines: ChunkLine[]; - firstMatchLine?: number; - firstMatchColumn?: number; - lastLine?: number; - matchCount: number; -} - -function finalizeRipgrepChunk(accumulator: RipgrepChunkAccumulator | undefined): ExactMatch | null { - if (!accumulator || accumulator.matchCount === 0 || accumulator.chunkLines.length === 0) { - return null; - } - - const firstLine = accumulator.chunkLines[0]?.line ?? accumulator.firstMatchLine ?? 1; - const lastLine = accumulator.chunkLines[accumulator.chunkLines.length - 1]?.line ?? accumulator.firstMatchLine ?? firstLine; - - return { - file: accumulator.file, - line: accumulator.firstMatchLine ?? firstLine, - endLine: lastLine, - column: accumulator.firstMatchColumn ?? 1, - content: accumulator.chunkLines.map((line) => line.text).join('\n').trim(), - chunkLines: [...accumulator.chunkLines], - }; -} - -function parseRipgrepJsonResults(stdout: string, effectiveLimit: number): { results: ExactMatch[]; resultLimitReached: boolean } { - const allResults: ExactMatch[] = []; - const activeChunks = new Map(); - const lines = stdout.split('\n').filter((line) => line.trim()); - let resultLimitReached = false; - - const flushChunk = (file: string) => { - const finalized = finalizeRipgrepChunk(activeChunks.get(file)); - activeChunks.delete(file); - if (!finalized) { - return; - } - allResults.push(finalized); - if (allResults.length >= effectiveLimit) { - resultLimitReached = true; - } - }; - - for (const line of lines) { - if (resultLimitReached) { - break; - } - - try { - const item = JSON.parse(line); - if (item.type !== 'match' && item.type !== 'context' && item.type !== 'end') { - continue; - } - - const file = item.data?.path?.text as string | undefined; - if (!file) { - continue; - } - - if (item.type === 'end') { - flushChunk(file); - continue; - } - - const lineNumber = typeof item.data?.line_number === 'number' ? item.data.line_number : undefined; - const rawText = typeof item.data?.lines?.text === 'string' - ? item.data.lines.text.replace(/\r?\n$/, '') - : ''; - - if (lineNumber === undefined) { - continue; - } - - let current = activeChunks.get(file); - const isContiguous = current && current.lastLine !== undefined && lineNumber <= current.lastLine + 1; - if (!current || !isContiguous) { - if (current) { - flushChunk(file); - if (resultLimitReached) { - break; - } - } - current = { - file, - chunkLines: [], - matchCount: 0, - }; - activeChunks.set(file, current); - } - - const previousLine = current.chunkLines[current.chunkLines.length - 1]; - const duplicateLine = previousLine && previousLine.line === lineNumber && previousLine.text === rawText; - if (!duplicateLine) { - current.chunkLines.push({ - line: lineNumber, - text: rawText, - isMatch: item.type === 'match', - }); - } else if (item.type === 'match') { - previousLine.isMatch = true; - } - - if (item.type === 'match') { - current.matchCount += 1; - if (current.firstMatchLine === undefined) { - current.firstMatchLine = lineNumber; - current.firstMatchColumn = - item.data.submatches && item.data.submatches[0] - ? item.data.submatches[0].start + 1 - : 1; - } - } - current.lastLine = lineNumber; - } catch { - continue; - } - } - - if (!resultLimitReached) { - for (const file of [...activeChunks.keys()]) { - flushChunk(file); - if (resultLimitReached) { - break; - } - } - } - - return { results: allResults.slice(0, effectiveLimit), resultLimitReached }; -} - -function normalizeEmbeddingBackend(backend?: string): string | undefined { - if (!backend) { - return undefined; - } - - const normalized = backend.trim().toLowerCase(); - if (!normalized) { - return undefined; - } - if (normalized === 'api') { - return 'litellm'; - } - if (normalized === 'local') { - return 'fastembed'; - } - return normalized; -} - -function buildIndexInitArgs(projectPath: string, options: { force?: boolean; languages?: string[]; noEmbeddings?: boolean } = {}): string[] { - const { force = false, languages, noEmbeddings = true } = options; - const args = ['index', 'init', projectPath]; - - if (noEmbeddings) { - args.push('--no-embeddings'); - } - if (force) { - args.push('--force'); - } - if (languages && languages.length > 0) { - args.push(...languages.flatMap((language) => ['--language', language])); - } - - return args; -} - -function resolveEmbeddingSelection( - requestedBackend: string | undefined, - requestedModel: string | undefined, - config: CodexLensConfig | null | undefined, -): { backend?: string; model?: string; preset: 'explicit' | 'config' | 'bulk-local-fast'; note?: string } { - const normalizedRequestedBackend = normalizeEmbeddingBackend(requestedBackend); - const normalizedRequestedModel = requestedModel?.trim() || undefined; - - if (normalizedRequestedBackend) { - return { - backend: normalizedRequestedBackend, - model: normalizedRequestedModel || config?.embedding_model, - preset: 'explicit', - }; - } - - if (normalizedRequestedModel) { - const inferredBackend = config?.embedding_backend - || (['fast', 'code'].includes(normalizedRequestedModel) ? 'fastembed' : undefined); - return { - backend: inferredBackend, - model: normalizedRequestedModel, - preset: inferredBackend ? 'config' : 'explicit', - }; - } - - return { - backend: 'fastembed', - model: 'fast', - preset: 'bulk-local-fast', - note: config?.embedding_backend && config.embedding_backend !== 'fastembed' - ? `Using recommended bulk indexing preset: local-fast instead of configured ${config.embedding_backend}. Pass embeddingBackend="api" to force remote API embeddings.` - : 'Using recommended bulk indexing preset: local-fast. Pass embeddingBackend="api" to force remote API embeddings.', - }; -} - -const EMBED_PROGRESS_PREFIX = '__CCW_EMBED_PROGRESS__'; - -function resolveEmbeddingEndpoints(backend?: string): RotationEndpointConfig[] { - if (backend !== 'litellm') { - return []; - } - - try { - return generateRotationEndpoints(getProjectRoot()).filter((endpoint) => { - const apiKey = endpoint.api_key?.trim() ?? ''; - return Boolean( - apiKey && - apiKey.length > 8 && - !/^\*+$/.test(apiKey) && - endpoint.api_base?.trim() && - endpoint.model?.trim() - ); - }); - } catch { - return []; - } -} - -function resolveApiWorkerCount( - requestedWorkers: number | undefined, - backend: string | undefined, - endpoints: RotationEndpointConfig[] -): number | undefined { - if (backend !== 'litellm') { - return undefined; - } - - if (typeof requestedWorkers === 'number' && Number.isFinite(requestedWorkers)) { - return Math.max(1, Math.floor(requestedWorkers)); - } - - if (endpoints.length <= 1) { - return 4; - } - - return Math.min(16, Math.max(4, endpoints.length * 2)); -} - -function extractEmbedJsonLine(stdout: string): string | undefined { - const lines = stdout - .split(/\r?\n/) - .map((line) => line.trim()) - .filter(Boolean) - .filter((line) => !line.startsWith(EMBED_PROGRESS_PREFIX)); - - return [...lines].reverse().find((line) => line.startsWith('{') && line.endsWith('}')); -} - -function buildEmbeddingPythonCode(params: { - projectPath: string; - backend?: string; - model?: string; - force: boolean; - maxWorkers?: number; - endpoints?: RotationEndpointConfig[]; -}): string { - const { projectPath, backend, model, force, maxWorkers, endpoints = [] } = params; - return ` -import json -import sys -from pathlib import Path -from codexlens.storage.path_mapper import PathMapper -from codexlens.storage.registry import RegistryStore -from codexlens.cli.embedding_manager import generate_dense_embeddings_centralized - -target_path = Path(r"__PROJECT_PATH__").expanduser().resolve() -backend = __BACKEND__ -model = __MODEL__ -force = __FORCE__ -max_workers = __MAX_WORKERS__ -endpoints = json.loads(r'''__ENDPOINTS_JSON__''') - -def progress_update(message: str): - print("__CCW_EMBED_PROGRESS__" + str(message), flush=True) - -registry = RegistryStore() -registry.initialize() -try: - project = registry.get_project(target_path) - index_root = None - if project is not None: - index_root = Path(project.index_root) - else: - mapper = PathMapper() - index_db = mapper.source_to_index_db(target_path) - if index_db.exists(): - index_root = index_db.parent - else: - nearest = registry.find_nearest_index(target_path) - if nearest is not None: - index_root = Path(nearest.index_path).parent - - if index_root is None: - print(json.dumps({"success": False, "error": f"No index found for: {target_path}"}), flush=True) - sys.exit(1) - - result = generate_dense_embeddings_centralized( - index_root, - embedding_backend=backend, - model_profile=model, - force=force, - use_gpu=True, - max_workers=max_workers, - endpoints=endpoints if endpoints else None, - progress_callback=progress_update, - ) - - print(json.dumps(result), flush=True) - if not result.get("success"): - sys.exit(1) -finally: - registry.close() -` - .replace('__PROJECT_PATH__', projectPath.replace(/\\/g, '\\\\')) - .replace('__BACKEND__', backend ? JSON.stringify(backend) : 'None') - .replace('__MODEL__', model ? JSON.stringify(model) : 'None') - .replace('__FORCE__', force ? 'True' : 'False') - .replace('__MAX_WORKERS__', typeof maxWorkers === 'number' ? String(Math.max(1, Math.floor(maxWorkers))) : 'None') - .replace('__ENDPOINTS_JSON__', JSON.stringify(endpoints).replace(/\\/g, '\\\\').replace(/'''/g, "\\'\\'\\'")); -} - -function spawnBackgroundEmbeddingsViaPython(params: { - projectPath: string; - backend?: string; - model?: string; - force: boolean; - maxWorkers?: number; - endpoints?: RotationEndpointConfig[]; -}): { success: boolean; error?: string } { - const { projectPath, backend, model } = params; - try { - const child = getSpawnRuntime()( - getVenvPythonPathRuntime()(), - ['-c', buildEmbeddingPythonCode(params)], - buildSmartSearchSpawnOptions(projectPath, { - detached: shouldDetachBackgroundSmartSearchProcess(), - stdio: 'ignore', - }), - ); - - autoEmbedJobs.set(projectPath, { - startedAt: getNowRuntime(), - backend, - model, - }); - - const cleanup = () => { - autoEmbedJobs.delete(projectPath); - }; - child.on('error', cleanup); - child.on('close', cleanup); - child.unref(); - return { success: true }; - } catch (error) { - return { - success: false, - error: error instanceof Error ? error.message : String(error), - }; - } -} - -function spawnBackgroundIndexInit(params: { - projectPath: string; - languages?: string[]; -}): { success: boolean; error?: string } { - const { projectPath, languages } = params; - try { - const pythonPath = getVenvPythonPathRuntime()(); - if (!existsSync(pythonPath)) { - return { - success: false, - error: 'CodexLens Python environment is not ready yet.', - }; - } - - const child = getSpawnRuntime()( - pythonPath, - ['-m', 'codexlens', ...buildIndexInitArgs(projectPath, { languages })], - buildSmartSearchSpawnOptions(projectPath, { - detached: shouldDetachBackgroundSmartSearchProcess(), - stdio: 'ignore', - }), - ); - - autoInitJobs.set(projectPath, { - startedAt: getNowRuntime(), - languages, - }); - - const cleanup = () => { - autoInitJobs.delete(projectPath); - }; - child.on('error', cleanup); - child.on('close', cleanup); - child.unref(); - return { success: true }; - } catch (error) { - return { - success: false, - error: error instanceof Error ? error.message : String(error), - }; - } -} - -async function maybeStartBackgroundAutoInit( - scope: SearchScope, - indexStatus: IndexStatus, -): Promise<{ note?: string; warning?: string }> { - if (indexStatus.indexed) { - return {}; - } - - if (!isAutoInitMissingEnabled()) { - return { - note: getAutoInitMissingDisabledReason(), - }; - } - - if (autoInitJobs.has(scope.workingDirectory)) { - return { - note: 'Background static index build is already running for this path.', - }; - } - - const spawned = spawnBackgroundIndexInit({ - projectPath: scope.workingDirectory, - }); - - if (!spawned.success) { - return { - warning: `Automatic static index warmup could not start: ${spawned.error}`, - }; - } - - return { - note: 'Background static index build started for this path. Re-run search shortly for indexed FTS results.', - }; -} - -async function maybeStartBackgroundAutoEmbed( - scope: SearchScope, - indexStatus: IndexStatus, -): Promise<{ note?: string; warning?: string }> { - if (!indexStatus.indexed || indexStatus.has_embeddings) { - return {}; - } - - if (!isAutoEmbedMissingEnabled(indexStatus.config)) { - return { - note: getAutoEmbedMissingDisabledReason(indexStatus.config), - }; - } - - if (autoEmbedJobs.has(scope.workingDirectory)) { - return { - note: 'Background embedding build is already running for this path.', - }; - } - - const backend = normalizeEmbeddingBackend(indexStatus.config?.embedding_backend) ?? 'fastembed'; - const model = indexStatus.config?.embedding_model?.trim() || undefined; - const semanticStatus = await getSemanticStatusRuntime()(); - if (!semanticStatus.available) { - return { - warning: 'Automatic embedding warmup skipped because semantic dependencies are not ready.', - }; - } - - if (backend === 'litellm' && !semanticStatus.litellmAvailable) { - return { - warning: 'Automatic embedding warmup skipped because the LiteLLM embedder is not ready.', - }; - } - - const endpoints = resolveEmbeddingEndpoints(backend); - const configuredApiMaxWorkers = indexStatus.config?.api_max_workers; - const effectiveApiMaxWorkers = typeof configuredApiMaxWorkers === 'number' - ? Math.max(1, Math.floor(configuredApiMaxWorkers)) - : resolveApiWorkerCount(undefined, backend, endpoints); - const spawned = spawnBackgroundEmbeddingsViaPython({ - projectPath: scope.workingDirectory, - backend, - model, - force: false, - maxWorkers: effectiveApiMaxWorkers, - endpoints, - }); - - if (!spawned.success) { - return { - warning: `Automatic embedding warmup could not start: ${spawned.error}`, - }; - } - - return { - note: 'Background embedding build started for this path. Re-run semantic search shortly for vector results.', - }; -} - -// v1 executeEmbeddingsViaPython removed — v2 uses built-in fastembed models - -// v1 executeInitAction removed — replaced by executeInitActionV2 - -// v1 executeEmbedAction removed — v2 auto-embeds during sync - -// v1 executeStatusAction removed — replaced by executeStatusActionV2 - -// v1 executeUpdateAction and executeWatchAction removed — replaced by V2 versions - -// v1 executeFuzzyMode and executeAutoMode removed — v2 bridge handles all search - -/** - * Mode: ripgrep - Fast literal string matching using ripgrep - * No index required, fallback to CodexLens if ripgrep unavailable - * Supports tokenized multi-word queries with OR matching and result ranking - */ -async function executeRipgrepMode(params: Params): Promise { - const { query, paths = [], contextLines = 0, maxResults = 5, extraFilesCount = 10, maxContentLength = 200, includeHidden = false, path = '.', regex = true, caseSensitive = true, tokenize = true, codeOnly = true, withDoc = false, excludeExtensions } = params; - const scope = resolveSearchScope(path, paths); - // withDoc overrides codeOnly - const effectiveCodeOnly = withDoc ? false : codeOnly; - - if (!query) { - return { - success: false, - error: 'Query is required for search', - }; - } - - // Check if ripgrep is available - const hasRipgrep = checkToolAvailability('rg'); - - // Calculate total to fetch for split (full content + extra files) - const totalToFetch = maxResults + extraFilesCount; - - // If ripgrep not available, fall back to CodexLens exact mode - if (!hasRipgrep) { - const readyStatus = await ensureCodexLensReady(); - if (!readyStatus.ready) { - return { - success: false, - error: 'Neither ripgrep nor CodexLens available. Install ripgrep (rg) or CodexLens for search functionality.', - }; - } - - // Use CodexLens fts mode as fallback - const args = ['search', query, '--limit', totalToFetch.toString(), '--method', 'fts', '--json']; - const result = await executeCodexLens(args, { cwd: scope.workingDirectory }); - - if (!result.success) { - noteCodexLensFtsCompatibility(result.error); - return { - success: false, - error: summarizeBackendError(result.error), - metadata: { - mode: 'ripgrep', - backend: 'codexlens-fallback', - count: 0, - query, - }, - }; - } - - // Parse results - let allResults: SemanticMatch[] = []; - try { - const parsed = JSON.parse(stripAnsi(result.output || '{}')); - const data = parsed.result?.results || parsed.results || parsed; - allResults = (Array.isArray(data) ? data : []).map((item: any) => ({ - file: item.path || item.file, - score: item.score || 0, - content: truncateContent(item.content || item.excerpt, maxContentLength), - symbol: item.symbol || null, - })); - } catch { - // Keep empty results - } - - const scopedResults = filterResultsToTargetFile(allResults, scope); - - // Split results: first N with full content, rest as file paths only - const { results, extra_files } = splitResultsWithExtraFiles(scopedResults, maxResults, extraFilesCount); - - return { - success: true, - results, - extra_files: extra_files.length > 0 ? extra_files : undefined, - metadata: { - mode: 'ripgrep', - backend: 'codexlens-fallback', - count: results.length, - query, - note: 'Using CodexLens exact mode (ripgrep not available)', - }, - }; - } - - // Use ripgrep - request more results to support split - const { command, args, tokens, warning: queryModeWarning } = buildRipgrepCommand({ - query, - paths: scope.searchPaths, - contextLines, - maxResults: totalToFetch, // Fetch more to support split - includeHidden, - regex, - caseSensitive, - tokenize, - }); - - return new Promise((resolve) => { - const child = getSpawnRuntime()( - command, - args, - buildSmartSearchSpawnOptions(scope.workingDirectory || getProjectRoot(), { - stdio: ['ignore', 'pipe', 'pipe'], - }), - ); - - let stdout = ''; - let stderr = ''; - let resultLimitReached = false; - - child.stdout?.on('data', (data) => { - stdout += data.toString(); - }); - - child.stderr?.on('data', (data) => { - stderr += data.toString(); - }); - - child.on('close', (code) => { - // Limit total results to prevent memory overflow (--max-count only limits per-file) - const effectiveLimit = totalToFetch > 0 ? totalToFetch : 500; - const parsedResults = parseRipgrepJsonResults(stdout, effectiveLimit); - const allResults = parsedResults.results; - resultLimitReached = parsedResults.resultLimitReached; - - // Handle Windows device file errors gracefully (os error 1) - // If we have results despite the error, return them as partial success - const isWindowsDeviceError = stderr.includes('os error 1') || stderr.includes('函数不正确'); - - // Apply token-based scoring and sorting for multi-word queries - // Results matching more tokens are ranked higher (exact matches first) - const scoredResults = tokens.length > 1 ? scoreByTokenMatch(allResults, tokens) : allResults; - - // Apply code-only and extension filtering - const filteredResults = filterNoisyFiles(scoredResults as any[], { codeOnly: effectiveCodeOnly, excludeExtensions }); - - if (code === 0 || code === 1 || (isWindowsDeviceError && filteredResults.length > 0)) { - // Split results: first N with full content, rest as file paths only - const { results, extra_files } = splitResultsWithExtraFiles(filteredResults, maxResults, extraFilesCount); - - // Build warning message for various conditions - const warnings: string[] = []; - if (queryModeWarning) { - warnings.push(queryModeWarning); - } - if (resultLimitReached) { - warnings.push(`Result limit reached (${effectiveLimit}). Use a more specific query or increase limit.`); - } - if (isWindowsDeviceError) { - warnings.push('Some Windows device files were skipped'); - } - - resolve({ - success: true, - results, - extra_files: extra_files.length > 0 ? extra_files : undefined, - metadata: { - mode: 'ripgrep', - backend: 'ripgrep', - count: results.length, - query, - tokens: tokens.length > 1 ? tokens : undefined, // Include tokens in metadata for debugging - tokenized: tokens.length > 1, - ...(warnings.length > 0 && { warning: warnings.join('; ') }), - }, - }); - } else if (isWindowsDeviceError && allResults.length === 0) { - // Windows device error but no results - might be the only issue - resolve({ - success: true, - results: [], - metadata: { - mode: 'ripgrep', - backend: 'ripgrep', - count: 0, - query, - warning: 'No matches found (some Windows device files were skipped)', - }, - }); - } else { - resolve({ - success: false, - error: `ripgrep execution failed with code ${code}: ${stderr}`, - results: [], - }); - } - }); - - child.on('error', (error) => { - resolve({ - success: false, - error: `Failed to spawn ripgrep: ${error.message}`, - results: [], - }); - }); - }); -} - -// ======================================== -// codexlens-search v2 bridge integration -// ======================================== - -/** - * Execute search via codexlens-search (v2) bridge CLI. - * Spawns 'codexlens-search search --query X --top-k Y --db-path Z' and parses JSON output. - * - * @param query - Search query string - * @param topK - Number of results to return - * @param dbPath - Path to the v2 index database directory - * @returns Parsed search results as SemanticMatch array - */ -async function executeCodexLensV2Bridge( - query: string, - topK: number, - dbPath: string, -): Promise { - return new Promise((resolve) => { - const args = [ - '--db-path', dbPath, - 'search', - '--query', query, - '--top-k', String(topK), - ]; - - execFile('codexlens-search', args, { - encoding: 'utf-8', - timeout: EXEC_TIMEOUTS.PROCESS_SPAWN, - windowsHide: true, - env: { ...process.env, PYTHONIOENCODING: 'utf-8' }, - }, (error, stdout, stderr) => { - if (error) { - console.warn(`[CodexLens-v2] Bridge search failed: ${error.message}`); - resolve({ - success: false, - error: `codexlens-search v2 bridge failed: ${error.message}`, - }); - return; - } - - try { - const parsed = JSON.parse(stdout.trim()); - - // Bridge outputs {"error": string} on failure - if (parsed && typeof parsed === 'object' && 'error' in parsed) { - resolve({ - success: false, - error: `codexlens-search v2: ${parsed.error}`, - }); - return; - } - - // Bridge outputs array of {path, score, line, end_line, snippet, content} - const raw: Array<{ - path?: string; score?: number; line?: number; - end_line?: number; snippet?: string; content?: string; - }> = Array.isArray(parsed) ? parsed : []; - - // Build AceLike sections and group by file - const sections: AceLikeSection[] = raw.map(r => ({ - path: r.path || '', - line: r.line || undefined, - endLine: r.end_line || undefined, - score: r.score || 0, - symbol: null, - snippet: r.content || r.snippet || '', - })); - - const groupMap = new Map(); - for (const s of sections) { - const arr = groupMap.get(s.path) || []; - arr.push(s); - groupMap.set(s.path, arr); - } - const groups: AceLikeGroup[] = Array.from(groupMap.entries()).map( - ([path, secs]) => ({ path, sections: secs, total_matches: secs.length }) - ); - - // Render text view with line numbers - const textParts: string[] = []; - for (const s of sections) { - const lineInfo = s.line ? `:${s.line}${s.endLine ? `-${s.endLine}` : ''}` : ''; - textParts.push(`Path: ${s.path}${lineInfo}\n${s.snippet}\n`); - } - - const aceLikeOutput: AceLikeOutput = { - format: 'ace', - text: textParts.join('\n'), - groups, - sections, - total: sections.length, - }; - - resolve({ - success: true, - results: aceLikeOutput, - metadata: { - mode: 'semantic' as any, - backend: 'codexlens-v2', - count: sections.length, - query, - note: 'Using codexlens-search v2 bridge (2-stage vector + reranking)', - }, - }); - } catch (parseErr) { - console.warn(`[CodexLens-v2] Failed to parse bridge output: ${(parseErr as Error).message}`); - resolve({ - success: false, - error: `Failed to parse codexlens-search v2 output: ${(parseErr as Error).message}`, - output: stdout, - }); - } - }); - }); -} - -/** - * Load env vars from ~/.codexlens/.env file so they're passed to bridge subprocess. - */ -function loadCodexLensEnvFile(): Record { - const envVars: Record = {}; - try { - const envPath = join(getCodexLensDataDir(), '.env'); - const content = readFileSync(envPath, 'utf-8'); - for (const line of content.split('\n')) { - const trimmed = line.trim(); - if (!trimmed || trimmed.startsWith('#')) continue; - const eqIdx = trimmed.indexOf('='); - if (eqIdx <= 0) continue; - const key = trimmed.substring(0, eqIdx).trim(); - let value = trimmed.substring(eqIdx + 1).trim(); - // Strip surrounding quotes - if ((value.startsWith('"') && value.endsWith('"')) || - (value.startsWith("'") && value.endsWith("'"))) { - value = value.slice(1, -1); - } - envVars[key] = value; - } - } catch { - // File doesn't exist — no env overrides - } - return envVars; -} - -/** - * Execute a generic codexlens-search v2 bridge subcommand (init, status, sync, watch, etc.). - * Returns parsed JSON output from the bridge CLI. - */ -async function executeV2BridgeCommand( - subcommand: string, - args: string[], - options?: { timeout?: number; dbPath?: string }, -): Promise { - return new Promise((resolve) => { - // --db-path is a global arg and must come BEFORE the subcommand - const globalArgs = options?.dbPath ? ['--db-path', options.dbPath] : []; - const fullArgs = [...globalArgs, subcommand, ...args]; - // Merge process.env with .env file settings (file values override process.env) - const codexlensEnv = loadCodexLensEnvFile(); - execFile('codexlens-search', fullArgs, { - encoding: 'utf-8', - timeout: options?.timeout ?? EXEC_TIMEOUTS.PROCESS_SPAWN, - windowsHide: true, - env: { ...process.env, ...codexlensEnv, PYTHONIOENCODING: 'utf-8' }, - }, (error, stdout, stderr) => { - if (error) { - resolve({ - success: false, - error: `codexlens-search ${subcommand} failed: ${error.message}`, - }); - return; - } - try { - const parsed = JSON.parse(stdout.trim()); - if (parsed && typeof parsed === 'object' && 'error' in parsed) { - resolve({ success: false, error: `codexlens-search: ${parsed.error}` }); - return; - } - resolve({ success: true, status: parsed, message: parsed.status || `${subcommand} completed`, metadata: { action: subcommand } }); - } catch { - resolve({ success: false, error: `Failed to parse codexlens-search ${subcommand} output`, output: stdout }); - } - }); - }); -} - -/** - * List known models via v2 bridge (list-models subcommand). - * Returns JSON array of {name, type, installed, cache_path}. - */ -export async function executeV2ListModels(): Promise { - return executeV2BridgeCommand('list-models', []); -} - -/** - * Download a single model by name via v2 bridge (download-model subcommand). - */ -export async function executeV2DownloadModel(modelName: string): Promise { - return executeV2BridgeCommand('download-model', [modelName], { timeout: 600000 }); -} - -/** - * Delete a model from cache via v2 bridge (delete-model subcommand). - */ -export async function executeV2DeleteModel(modelName: string): Promise { - return executeV2BridgeCommand('delete-model', [modelName]); -} - -/** - * Action: init (v2) - Initialize index and sync files. - */ -async function executeInitActionV2(params: Params): Promise { - const { path = '.' } = params; - const scope = resolveSearchScope(path); - const dbPath = join(scope.workingDirectory, '.codexlens'); - - // Step 1: init empty index - const initResult = await executeV2BridgeCommand('init', [], { dbPath }); - if (!initResult.success) return initResult; - - // Step 2: sync all files - const syncResult = await executeV2BridgeCommand('sync', [ - '--root', scope.workingDirectory, - ], { timeout: 1800000, dbPath }); // 30 min for large codebases - - return { - success: syncResult.success, - error: syncResult.error, - message: syncResult.success - ? `Index initialized and synced for ${scope.workingDirectory}` - : undefined, - metadata: { action: 'init', path: scope.workingDirectory }, - status: syncResult.status, - }; -} - -/** - * Action: status (v2) - Report index statistics. - */ -async function executeStatusActionV2(params: Params): Promise { - const { path = '.' } = params; - const scope = resolveSearchScope(path); - const dbPath = join(scope.workingDirectory, '.codexlens'); - - return executeV2BridgeCommand('status', [], { dbPath }); -} - -/** - * Action: update (v2) - Incremental sync (re-sync changed files). - */ -async function executeUpdateActionV2(params: Params): Promise { - const { path = '.' } = params; - const scope = resolveSearchScope(path); - const dbPath = join(scope.workingDirectory, '.codexlens'); - - return executeV2BridgeCommand('sync', [ - '--root', scope.workingDirectory, - ], { timeout: 600000, dbPath }); // 10 min -} - -/** - * Action: watch (v2) - Start file watcher for auto-updates. - */ -async function executeWatchActionV2(params: Params): Promise { - const { path = '.', debounce = 1000 } = params; - const scope = resolveSearchScope(path); - const dbPath = join(scope.workingDirectory, '.codexlens'); - - // Watch runs indefinitely — start it with a short initial timeout to confirm startup - const result = await executeV2BridgeCommand('watch', [ - '--root', scope.workingDirectory, - '--debounce-ms', debounce.toString(), - ], { timeout: 5000, dbPath }); - - return { - success: true, - message: `File watcher started for ${scope.workingDirectory}. Changes are indexed automatically.`, - metadata: { action: 'watch', path: scope.workingDirectory }, - status: result.status, - }; -} - -// v1 executeCodexLensExactMode removed — v2 bridge handles search - -// v1 executeHybridMode removed — v2 bridge handles semantic search -// v1 executeHybridMode removed — v2 bridge handles semantic search - -/** - * Query intent used to adapt RRF weights (Python parity). - * - * Keep this logic aligned with CodexLens Python hybrid search: - * `codex-lens/src/codexlens/search/hybrid_search.py` - */ -export type QueryIntent = 'keyword' | 'semantic' | 'mixed'; - -// Python default: vector 60%, exact 30%, fuzzy 10% -const DEFAULT_RRF_WEIGHTS = { - exact: 0.3, - fuzzy: 0.1, - vector: 0.6, -} as const; - -function normalizeWeights(weights: Record): Record { - const sum = Object.values(weights).reduce((acc, v) => acc + v, 0); - if (!Number.isFinite(sum) || sum <= 0) return { ...weights }; - return Object.fromEntries(Object.entries(weights).map(([k, v]) => [k, v / sum])); -} - -/** - * Detect query intent using the same heuristic signals as Python: - * - Code patterns: `.`, `::`, `->`, CamelCase, snake_case, common code keywords - * - Natural language patterns: >5 words, question marks, interrogatives, common verbs - */ -export function detectQueryIntent(query: string): QueryIntent { - const trimmed = query.trim(); - if (!trimmed) return 'mixed'; - - const lower = trimmed.toLowerCase(); - const wordCount = trimmed.split(/\s+/).filter(Boolean).length; - - const hasCodeSignals = - /(::|->|\.)/.test(trimmed) || - /[A-Z][a-z]+[A-Z]/.test(trimmed) || - /\b\w+_\w+\b/.test(trimmed) || - /\b(def|class|function|const|let|var|import|from|return|async|await|interface|type)\b/i.test(lower); - - const hasNaturalSignals = - wordCount > 5 || - /\?/.test(trimmed) || - /\b(how|what|why|when|where)\b/i.test(trimmed) || - /\b(handle|explain|fix|implement|create|build|use|find|search|convert|parse|generate|support)\b/i.test(trimmed); - - if (hasCodeSignals && hasNaturalSignals) return 'mixed'; - if (hasCodeSignals) return 'keyword'; - if (hasNaturalSignals) return 'semantic'; - return 'mixed'; -} - -/** - * Intent → weights mapping (Python parity). - * - keyword: exact-heavy - * - semantic: vector-heavy - * - mixed: keep defaults - */ -export function adjustWeightsByIntent( - intent: QueryIntent, - baseWeights: Record, -): Record { - if (intent === 'keyword') return normalizeWeights({ exact: 0.5, fuzzy: 0.1, vector: 0.4 }); - if (intent === 'semantic') return normalizeWeights({ exact: 0.2, fuzzy: 0.1, vector: 0.7 }); - return normalizeWeights({ ...baseWeights }); -} - -export function getRRFWeights( - query: string, - baseWeights: Record = DEFAULT_RRF_WEIGHTS, -): Record { - return adjustWeightsByIntent(detectQueryIntent(query), baseWeights); -} - -/** - * Post-processing: Filter noisy files from semantic search results - * Uses FILTER_CONFIG patterns to remove irrelevant files. - * Optimized: pre-compiled regexes, accurate path segment matching. - */ -// Pre-compile file exclusion regexes once (avoid recompilation in loop) -const FILE_EXCLUDE_REGEXES = [...FILTER_CONFIG.exclude_files].map(pattern => - new RegExp('^' + pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&').replace(/\\\*/g, '.*') + '$') -); - -// Non-code file extensions (for codeOnly filter) -const NON_CODE_EXTENSIONS = new Set([ - 'md', 'txt', 'json', 'yaml', 'yml', 'xml', 'csv', 'log', - 'ini', 'cfg', 'conf', 'toml', 'env', 'properties', - 'html', 'htm', 'svg', 'png', 'jpg', 'jpeg', 'gif', 'ico', 'webp', - 'pdf', 'doc', 'docx', 'xls', 'xlsx', 'ppt', 'pptx', - 'lock', 'sum', 'mod', -]); - -interface FilterOptions { - excludeExtensions?: string[]; - codeOnly?: boolean; -} - -function filterNoisyFiles(results: SemanticMatch[], options: FilterOptions = {}): SemanticMatch[] { - const { excludeExtensions = [], codeOnly = false } = options; - - // Build extension filter set - const excludedExtSet = new Set(excludeExtensions.map(ext => ext.toLowerCase().replace(/^\./, ''))); - if (codeOnly) { - NON_CODE_EXTENSIONS.forEach(ext => excludedExtSet.add(ext)); - } - - return results.filter(r => { - // Support both 'file' and 'path' field names (different backends use different names) - const filePath = r.file || (r as any).path || ''; - if (!filePath) return true; - - const segments: string[] = filePath.split(/[/\\]/); - - // Accurate directory check: segment must exactly match excluded directory - if (segments.some((segment: string) => FILTER_CONFIG.exclude_directories.has(segment))) { - return false; - } - - // Accurate file check: pattern matches filename only (not full path) - const filename = segments.pop() || ''; - if (FILE_EXCLUDE_REGEXES.some(regex => regex.test(filename))) { - return false; - } - - // Extension filter check - if (excludedExtSet.size > 0) { - const ext = filename.split('.').pop()?.toLowerCase() || ''; - if (excludedExtSet.has(ext)) { - return false; - } - } - - return true; - }); -} - -/** - * Post-processing: Boost results containing query keywords - * Extracts keywords from query and boosts matching results. - * Optimized: uses whole-word matching with regex for accuracy. - */ -// Helper to escape regex special characters -function escapeRegExp(str: string): string { - return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); -} - -function applyKeywordBoosting(results: SemanticMatch[], query: string): SemanticMatch[] { - // Extract meaningful keywords (ignore common words) - const stopWords = new Set(['the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'shall', 'can', 'need', 'dare', 'ought', 'used', 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by', 'from', 'as', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'between', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 'just', 'and', 'but', 'if', 'or', 'because', 'until', 'while', 'although', 'though', 'after', 'before', 'when', 'whenever', 'where', 'wherever', 'whether', 'which', 'who', 'whom', 'whose', 'what', 'whatever', 'whichever', 'whoever', 'whomever', 'this', 'that', 'these', 'those', 'it', 'its']); - - const keywords = query - .toLowerCase() - .split(/[\s,.;:()"{}[\]-]+/) // More robust splitting on punctuation - .filter(word => word.length > 2 && !stopWords.has(word)); - - if (keywords.length === 0) return results; - - // Create case-insensitive regexes for whole-word matching - const keywordRegexes = keywords.map(kw => new RegExp(`\\b${escapeRegExp(kw)}\\b`, 'i')); - - return results.map(r => { - const content = r.content || ''; - const file = r.file || ''; - - // Count keyword matches using whole-word regex - let matchCount = 0; - for (const regex of keywordRegexes) { - if (regex.test(content) || regex.test(file)) { - matchCount++; - } - } - - // Apply boost only if there are matches - if (matchCount > 0) { - const matchRatio = matchCount / keywords.length; - const boost = 1 + (matchRatio * 0.3); // Up to 30% boost for full match - return { - ...r, - score: r.score * boost, - }; - } - - return r; - }); -} - -/** - * Post-processing: Enforce score diversity - * Penalizes results with identical scores (indicates undifferentiated matching) - */ -function enforceScoreDiversity(results: SemanticMatch[]): SemanticMatch[] { - if (results.length < 2) return results; - - // Count occurrences of each score (rounded to 3 decimal places for comparison) - const scoreCounts = new Map(); - for (const r of results) { - const roundedScore = Math.round(r.score * 1000) / 1000; - scoreCounts.set(roundedScore, (scoreCounts.get(roundedScore) || 0) + 1); - } - - // Apply penalty to scores that appear more than twice - return results.map(r => { - const roundedScore = Math.round(r.score * 1000) / 1000; - const count = scoreCounts.get(roundedScore) || 1; - - if (count > 2) { - // Progressive penalty: more duplicates = bigger penalty - const penalty = Math.max(0.7, 1 - (count * 0.05)); - return { ...r, score: r.score * penalty }; - } - return r; - }); -} - -/** - * Post-processing: Filter results with dominant baseline score (hot spot detection) - * When backend returns default "hot spot" files with identical high scores, - * this function detects and removes them. - * - * Detection criteria: - * - A single score appears in >50% of results - * - That score is suspiciously high (>0.9) - * - This indicates fallback mechanism returned placeholder results - */ -function filterDominantBaselineScores( - results: SemanticMatch[] -): { filteredResults: SemanticMatch[]; baselineInfo: { score: number; count: number } | null } { - if (results.length < 4) { - return { filteredResults: results, baselineInfo: null }; - } - - // Count occurrences of each score (rounded to 4 decimal places) - const scoreCounts = new Map(); - results.forEach(r => { - const rounded = Math.round(r.score * 10000) / 10000; - scoreCounts.set(rounded, (scoreCounts.get(rounded) || 0) + 1); - }); - - // Find the most dominant score - let dominantScore: number | null = null; - let dominantCount = 0; - scoreCounts.forEach((count, score) => { - if (count > dominantCount) { - dominantCount = count; - dominantScore = score; - } - }); - - // If a single score is present in >50% of results and is high (>0.9), - // treat it as a suspicious baseline score and filter it out - const BASELINE_THRESHOLD = 0.5; // >50% of results have same score - const HIGH_SCORE_THRESHOLD = 0.9; // Score above 0.9 is suspiciously high - - if ( - dominantScore !== null && - dominantCount > results.length * BASELINE_THRESHOLD && - dominantScore > HIGH_SCORE_THRESHOLD - ) { - const filteredResults = results.filter(r => { - const rounded = Math.round(r.score * 10000) / 10000; - return rounded !== dominantScore; - }); - - return { - filteredResults, - baselineInfo: { score: dominantScore, count: dominantCount }, - }; - } - - return { filteredResults: results, baselineInfo: null }; -} - -/** - * TypeScript implementation of Reciprocal Rank Fusion - * Reference: codex-lens/src/codexlens/search/ranking.py - * Formula: score(d) = Σ weight_source / (k + rank_source(d)) - */ -function normalizeFusionSnippet(value: unknown): string | undefined { - if (typeof value !== 'string') { - return undefined; - } - - const normalized = value.replace(/\s+/g, ' ').trim(); - return normalized ? normalized.slice(0, 240) : undefined; -} - -function buildFusionIdentity(result: any): string | null { - const path = typeof result?.file === 'string' - ? result.file - : typeof result?.path === 'string' - ? result.path - : undefined; - - if (!path) { - return null; - } - - const line = typeof result?.line === 'number' && Number.isFinite(result.line) - ? result.line - : undefined; - const endLine = typeof result?.endLine === 'number' && Number.isFinite(result.endLine) - ? result.endLine - : line; - const column = typeof result?.column === 'number' && Number.isFinite(result.column) - ? result.column - : undefined; - - if (line !== undefined) { - return `${path}#L${line}-${endLine ?? line}:C${column ?? 0}`; - } - - const symbol = typeof result?.symbol === 'string' && result.symbol.trim() - ? result.symbol.trim() - : undefined; - const snippet = normalizeFusionSnippet(result?.content); - - if (symbol && snippet) { - return `${path}::${symbol}::${snippet}`; - } - if (snippet) { - return `${path}::${snippet}`; - } - if (symbol) { - return `${path}::${symbol}`; - } - - return path; -} - -function scoreFusionRepresentative(result: any): number { - let score = 0; - - if (typeof result?.line === 'number' && Number.isFinite(result.line)) { - score += 1000; - } - if (typeof result?.endLine === 'number' && Number.isFinite(result.endLine)) { - score += 250; - } - if (typeof result?.column === 'number' && Number.isFinite(result.column)) { - score += 50; - } - if (Array.isArray(result?.chunkLines) && result.chunkLines.length > 0) { - score += 500 + result.chunkLines.length; - } - if (typeof result?.symbol === 'string' && result.symbol.trim()) { - score += 50; - } - if (typeof result?.content === 'string') { - score += Math.min(result.content.length, 200); - } - - return score; -} - -function applyRRFFusion( - resultsMap: Map, - weightsOrQuery: Record | string, - limit: number, - k: number = 60, -): any[] { - const weights = typeof weightsOrQuery === 'string' ? getRRFWeights(weightsOrQuery) : weightsOrQuery; - const fusedScores = new Map(); - - resultsMap.forEach((results, source) => { - const weight = weights[source] || 0; - if (weight === 0 || !results) return; - - results.forEach((result, rank) => { - const identity = buildFusionIdentity(result); - if (!identity) return; - - const rrfContribution = weight / (k + rank + 1); - const representativeScore = scoreFusionRepresentative(result); - - if (!fusedScores.has(identity)) { - fusedScores.set(identity, { score: 0, result, sources: [], representativeScore }); - } - const entry = fusedScores.get(identity)!; - entry.score += rrfContribution; - if (representativeScore > entry.representativeScore) { - entry.result = result; - entry.representativeScore = representativeScore; - } - if (!entry.sources.includes(source)) { - entry.sources.push(source); - } - }); - }); - - // Sort by fusion score descending - return Array.from(fusedScores.values()) - .sort((a, b) => b.score - a.score) - .slice(0, limit) - .map(item => ({ - ...item.result, - fusion_score: item.score, - matched_backends: item.sources, - })); -} - -/** - * Promise wrapper with timeout support - * @param promise - The promise to wrap - * @param ms - Timeout in milliseconds - * @param modeName - Name of the mode for error message - * @returns A new promise that rejects on timeout - */ -function withTimeout(promise: Promise, ms: number, modeName: string): Promise { - return new Promise((resolve, reject) => { - const timer = setTimeout(() => { - reject(new Error(`'${modeName}' search timed out after ${ms}ms`)); - }, ms); - - promise - .then(resolve) - .catch(reject) - .finally(() => clearTimeout(timer)); - }); -} - -// v1 executePriorityFallbackMode removed — v2 bridge + ripgrep fallback handles all search - -// Tool schema for MCP -export const schema: ToolSchema = { - name: 'smart_search', - description: `Unified code search tool powered by codexlens-search v2 (2-stage vector + FTS5 + reranking). - -Recommended flow: use **action=\"search\"** for lookups, **action=\"init\"** to build the semantic index, and **action=\"update\"** when files change. - -**Actions & Required Parameters:** - -* **search** (default): Semantic code search with ripgrep fallback. - * **query** (string, **REQUIRED**): Content to search for. - * *limit* (number): Max results (default: 5). - * *path* (string): Directory or single file to search (default: current directory). - * *contextLines* (number): Context lines around matches (default: 0). - * *regex* (boolean): Use regex matching in ripgrep fallback (default: true). - * *caseSensitive* (boolean): Case-sensitive search (default: true). - -* **find_files**: Find files by path/name pattern. - * **pattern** (string, **REQUIRED**): Glob pattern (e.g., "*.ts", "src/**/*.js"). - * *limit* (number): Max results (default: 20). - * *offset* (number): Pagination offset (default: 0). - * *includeHidden* (boolean): Include hidden files (default: false). - -* **init**: Initialize v2 semantic index and sync all files. - * *path* (string): Directory to index (default: current). - -* **status**: Check v2 index statistics. (No required params) - -* **update**: Incremental sync for changed files. - * *path* (string): Directory to update (default: current). - -* **watch**: Start file watcher for auto-updates. - * *path* (string): Directory to watch (default: current). - -**Examples:** - smart_search(query="authentication logic") # Semantic search (default) - smart_search(action="init", path="/project") # Build v2 index - smart_search(action="update", path="/project") # Sync changed files - smart_search(query="auth", limit=10, offset=0) # Paginated search`, - inputSchema: { - type: 'object', - properties: { - action: { - type: 'string', - enum: ['init', 'search', 'find_files', 'status', 'update', 'watch', 'search_files'], - description: 'Action: search (semantic search, default), find_files (path pattern matching), init (build v2 index), status (check index), update (incremental sync), watch (auto-update watcher). Note: search_files is deprecated.', - default: 'search', - }, - query: { - type: 'string', - description: 'Content search query (for action="search").', - }, - pattern: { - type: 'string', - description: 'Glob pattern for file discovery (for action="find_files"). Examples: "*.ts", "src/**/*.js", "test_*.py"', - }, - mode: { - type: 'string', - enum: SEARCH_MODES, - description: 'Search mode: fuzzy (v2 semantic + ripgrep fallback, default) or semantic (v2 semantic search only).', - default: 'fuzzy', - }, - output_mode: { - type: 'string', - enum: [...SEARCH_OUTPUT_MODES], - description: 'Output format: ace (default, ACE-style grouped code sections + rendered text), full (raw matches), files_only (paths only), count (per-file counts)', - default: 'ace', - }, - path: { - type: 'string', - description: 'Directory path for init/search actions (default: current directory). For action=search, a single file path is also accepted and results are automatically scoped back to that file.', - }, - paths: { - type: 'array', - description: 'Multiple paths to search within (for search action)', - items: { - type: 'string', - }, - default: [], - }, - contextLines: { - type: 'number', - description: 'Number of context lines around matches (exact mode only)', - default: 0, - }, - maxResults: { - type: 'number', - description: 'Maximum number of full-content results (default: 5)', - default: 5, - }, - limit: { - type: 'number', - description: 'Alias for maxResults (default: 5)', - default: 5, - }, - extraFilesCount: { - type: 'number', - description: 'Number of additional file-only results (paths without content)', - default: 10, - }, - maxContentLength: { - type: 'number', - description: 'Maximum content length for truncation (50-2000)', - default: 200, - }, - offset: { - type: 'number', - description: 'Pagination offset - skip first N results (default: 0)', - default: 0, - }, - includeHidden: { - type: 'boolean', - description: 'Include hidden files/directories', - default: false, - }, - force: { - type: 'boolean', - description: 'Force full rebuild for action="init".', - default: false, - }, - regex: { - type: 'boolean', - description: 'Use regex pattern matching instead of literal string (ripgrep mode only). Default: enabled. Example: smart_search(query="class.*Builder")', - default: true, - }, - caseSensitive: { - type: 'boolean', - description: 'Case-sensitive search (default: true). Set to false for case-insensitive matching.', - default: true, - }, - tokenize: { - type: 'boolean', - description: 'Tokenize multi-word queries for OR matching (ripgrep mode). Default: true. Results are ranked by token match count (exact matches first).', - default: true, - }, - }, - required: [], - }, -}; - -/** - * Action: find_files - Find files by path/name pattern (glob matching) - * Unlike search which looks inside file content, find_files matches file paths - */ -async function executeFindFilesAction(params: Params): Promise { - const { pattern, path = '.', limit = 20, offset = 0, includeHidden = false, caseSensitive = true } = params; - const scope = resolveSearchScope(path); - - if (!pattern) { - return { - success: false, - error: 'Pattern is required for find_files action. Use glob patterns like "*.ts", "src/**/*.js", or "test_*.py"', - }; - } - - // Use ripgrep with --files flag for fast file listing with glob pattern - const hasRipgrep = checkToolAvailability('rg'); - - if (!hasRipgrep) { - // Fallback to CodexLens file listing if available - const readyStatus = await ensureCodexLensReady(); - if (!readyStatus.ready) { - return { - success: false, - error: 'Neither ripgrep nor CodexLens available for file discovery.', - }; - } - - // Try CodexLens file list command - const args = ['list-files', '--json']; - const result = await executeCodexLens(args, { cwd: scope.workingDirectory }); - - if (!result.success) { - return { - success: false, - error: `Failed to list files: ${result.error}`, - }; - } - - // Parse and filter results by pattern - let files: string[] = []; - try { - const parsed = JSON.parse(stripAnsi(result.output || '[]')); - files = Array.isArray(parsed) ? parsed : (parsed.files || []); - } catch { - return { - success: false, - error: 'Failed to parse file list from CodexLens', - }; - } - - // Apply glob pattern matching using minimatch-style regex - const globRegex = globToRegex(pattern, caseSensitive); - const matchedFiles = files.filter(f => globRegex.test(f)); - - // Apply pagination - const total = matchedFiles.length; - const paginatedFiles = matchedFiles.slice(offset, offset + limit); - - const results: FileMatch[] = paginatedFiles.map(filePath => { - const parts = filePath.split(/[/\\]/); - const name = parts[parts.length - 1] || ''; - const ext = name.includes('.') ? name.split('.').pop() : undefined; - return { - path: filePath, - type: 'file' as const, - name, - extension: ext, - }; - }); - - return { - success: true, - results, - metadata: { - pattern, - backend: 'codexlens', - count: results.length, - pagination: { - offset, - limit, - total, - has_more: offset + limit < total, - }, - }, - }; - } - - // Use ripgrep --files with glob pattern for fast file discovery - return new Promise((resolve) => { - const args = ['--files']; - - // Add exclude patterns - if (!includeHidden) { - args.push(...buildExcludeArgs()); - } else { - args.push('--hidden'); - } - - // Add glob pattern - args.push('--glob', pattern); - - // Case sensitivity for glob matching - if (!caseSensitive) { - args.push('--iglob', pattern); - // Remove the case-sensitive glob and use iglob instead - const globIndex = args.indexOf('--glob'); - if (globIndex !== -1) { - args.splice(globIndex, 2); - } - } - - const child = getSpawnRuntime()( - 'rg', - args, - buildSmartSearchSpawnOptions(scope.workingDirectory || getProjectRoot(), { - stdio: ['ignore', 'pipe', 'pipe'], - }), - ); - - let stdout = ''; - let stderr = ''; - - child.stdout?.on('data', (data) => { - stdout += data.toString(); - }); - - child.stderr?.on('data', (data) => { - stderr += data.toString(); - }); - - child.on('close', (code) => { - // ripgrep returns 1 when no matches found, which is not an error - if (code !== 0 && code !== 1 && !stderr.includes('os error 1')) { - resolve({ - success: false, - error: `ripgrep file search failed: ${stderr}`, - }); - return; - } - - const allFiles = stdout.split('\n').filter(line => line.trim()); - const total = allFiles.length; - - // Apply pagination - const paginatedFiles = allFiles.slice(offset, offset + limit); - - const results: FileMatch[] = paginatedFiles.map(filePath => { - const normalizedPath = filePath.replace(/\\/g, '/'); - const parts = normalizedPath.split('/'); - const name = parts[parts.length - 1] || ''; - const ext = name.includes('.') ? name.split('.').pop() : undefined; - return { - path: normalizedPath, - type: 'file' as const, - name, - extension: ext, - }; - }); - - resolve({ - success: true, - results, - metadata: { - pattern, - backend: 'ripgrep', - count: results.length, - pagination: { - offset, - limit, - total, - has_more: offset + limit < total, - }, - }, - }); - }); - - child.on('error', (error) => { - resolve({ - success: false, - error: `Failed to spawn ripgrep: ${error.message}`, - }); - }); - }); -} - -/** - * Convert glob pattern to regex for file matching - * Supports: *, **, ?, [abc], [!abc] - */ -function globToRegex(pattern: string, caseSensitive: boolean = true): RegExp { - let i = 0; - const out: string[] = []; - const special = '.^$+{}|()'; - - while (i < pattern.length) { - const c = pattern[i]; - - if (c === '*') { - if (i + 1 < pattern.length && pattern[i + 1] === '*') { - // ** matches any path including / - out.push('.*'); - i += 2; - // Skip following / if present - if (pattern[i] === '/') { - i++; - } - continue; - } else { - // * matches any character except / - out.push('[^/]*'); - } - } else if (c === '?') { - out.push('[^/]'); - } else if (c === '[') { - // Character class - let j = i + 1; - let negated = false; - if (pattern[j] === '!' || pattern[j] === '^') { - negated = true; - j++; - } - let classContent = ''; - while (j < pattern.length && pattern[j] !== ']') { - classContent += pattern[j]; - j++; - } - if (negated) { - out.push(`[^${classContent}]`); - } else { - out.push(`[${classContent}]`); - } - i = j; - } else if (special.includes(c)) { - out.push('\\' + c); - } else { - out.push(c); - } - i++; - } - - const flags = caseSensitive ? '' : 'i'; - return new RegExp('^' + out.join('') + '$', flags); -} - -/** - * Apply pagination to search results and add pagination metadata - */ -function applyPagination( - results: T[], - offset: number, - limit: number -): { paginatedResults: T[]; pagination: PaginationInfo } { - const total = results.length; - const paginatedResults = results.slice(offset, offset + limit); - - return { - paginatedResults, - pagination: { - offset, - limit, - total, - has_more: offset + limit < total, - }, - }; -} - -function formatChunkRange(section: AceLikeSection): string { - if (section.lines && section.lines.length > 0) { - const start = section.lines[0]?.line; - const end = section.lines[section.lines.length - 1]?.line; - if (typeof start === 'number' && typeof end === 'number' && end > start) { - return `${start}-${end}`; - } - if (typeof start === 'number') { - return String(start); - } - } - if (section.line && section.endLine && section.endLine > section.line) { - return `${section.line}-${section.endLine}`; - } - if (section.line) { - return String(section.line); - } - return '?'; -} - -function renderAceSnippet(section: AceLikeSection): string[] { - if (section.lines && section.lines.length > 0) { - return section.lines.map((line) => { - const marker = line.isMatch ? '>' : ' '; - return `${marker} ${String(line.line).padStart(4, ' ')} | ${line.text}`; - }); - } - - return section.snippet.split(/\r?\n/).map((line) => ` ${line}`); -} - -function formatAceLikeOutput( - results: ExactMatch[] | SemanticMatch[] | GraphMatch[] | FileMatch[] | unknown[], -): AceLikeOutput { - const sections: AceLikeSection[] = []; - - for (const result of results) { - const candidate = result as Record; - const path = typeof candidate.file === 'string' - ? candidate.file - : typeof candidate.path === 'string' - ? candidate.path - : undefined; - - if (!path) { - continue; - } - - const line = typeof candidate.line === 'number' && candidate.line > 0 ? candidate.line : undefined; - const column = typeof candidate.column === 'number' && candidate.column > 0 ? candidate.column : undefined; - const score = typeof candidate.score === 'number' ? candidate.score : undefined; - const symbol = typeof candidate.symbol === 'string' ? candidate.symbol : null; - const rawSnippet = typeof candidate.content === 'string' - ? candidate.content - : typeof candidate.name === 'string' - ? candidate.name - : typeof candidate.type === 'string' - ? `[${candidate.type}]` - : ''; - - sections.push({ - path, - line, - endLine: typeof candidate.endLine === 'number' && candidate.endLine >= (line ?? 0) ? candidate.endLine : line, - column, - score, - symbol, - snippet: rawSnippet || '[no snippet available]', - lines: Array.isArray(candidate.chunkLines) ? candidate.chunkLines as ChunkLine[] : undefined, - }); - } - - const groupsMap = new Map(); - for (const section of sections) { - if (!groupsMap.has(section.path)) { - groupsMap.set(section.path, { - path: section.path, - sections: [], - total_matches: 0, - }); - } - const group = groupsMap.get(section.path)!; - group.sections.push(section); - group.total_matches += 1; - } - const groups = [...groupsMap.values()]; - - const textParts = ['The following code sections were retrieved:']; - for (const group of groups) { - textParts.push(''); - textParts.push(`Path: ${group.path}`); - group.sections.forEach((section, index) => { - const chunkLabel = group.sections.length > 1 ? `Chunk ${index + 1}` : 'Chunk'; - textParts.push(`${chunkLabel}: lines ${formatChunkRange(section)}${section.score !== undefined ? ` | score=${section.score.toFixed(4)}` : ''}`); - if (section.symbol) { - textParts.push(`Symbol: ${section.symbol}`); - } - for (const snippetLine of renderAceSnippet(section)) { - textParts.push(snippetLine); - } - if (index < group.sections.length - 1) { - textParts.push(''); - } - }); - } - - return { - format: 'ace', - text: textParts.join('\n'), - groups, - sections, - total: sections.length, - }; -} - -/** - * Transform results based on output_mode - */ -function transformOutput( - results: ExactMatch[] | SemanticMatch[] | GraphMatch[] | unknown[], - outputMode: SearchOutputMode -): unknown { - if (!Array.isArray(results)) { - return results; - } - - switch (outputMode) { - case 'files_only': { - // Extract unique file paths - const files = [...new Set(results.map((r: any) => r.file))].filter(Boolean); - return { files, count: files.length }; - } - case 'count': { - // Count matches per file - const counts: Record = {}; - for (const r of results) { - const file = (r as any).file; - if (file) { - counts[file] = (counts[file] || 0) + 1; - } - } - return { - files: Object.entries(counts).map(([file, count]) => ({ file, count })), - total: results.length, - }; - } - case 'ace': - return formatAceLikeOutput(results); - case 'full': - default: - return results; - } -} - -function enrichMetadataWithIndexStatus( - metadata: SearchMetadata | undefined, - indexStatus: IndexStatus, - scope: SearchScope, -): SearchMetadata { - const nextMetadata: SearchMetadata = { ...(metadata ?? {}) }; - nextMetadata.embeddings_coverage_percent = indexStatus.embeddings_coverage_percent; - nextMetadata.index_status = indexStatus.indexed - ? (indexStatus.has_embeddings ? 'indexed' : 'partial') - : 'not_indexed'; - nextMetadata.reranker_enabled = indexStatus.config?.reranker_enabled; - nextMetadata.reranker_backend = indexStatus.config?.reranker_backend; - nextMetadata.reranker_model = indexStatus.config?.reranker_model; - nextMetadata.cascade_strategy = indexStatus.config?.cascade_strategy; - nextMetadata.staged_stage2_mode = indexStatus.config?.staged_stage2_mode; - nextMetadata.static_graph_enabled = indexStatus.config?.static_graph_enabled; - nextMetadata.warning = mergeWarnings(nextMetadata.warning, indexStatus.warning); - nextMetadata.suggestions = mergeSuggestions(nextMetadata.suggestions, buildIndexSuggestions(indexStatus, scope)); - return nextMetadata; -} - -// Handler function -export async function handler(params: Record): Promise> { - const parsed = ParamsSchema.safeParse(params); - if (!parsed.success) { - return { success: false, error: `Invalid params: ${parsed.error.message}` }; - } - - parsed.data.query = sanitizeSearchQuery(parsed.data.query); - parsed.data.pattern = sanitizeSearchPath(parsed.data.pattern); - parsed.data.path = sanitizeSearchPath(parsed.data.path); - parsed.data.paths = parsed.data.paths.map((item) => sanitizeSearchPath(item) || item); - - const { action, mode, output_mode, offset = 0 } = parsed.data; - - // Sync limit and maxResults while preserving explicit small values. - // If both are provided, use the larger one. If only one is provided, honor it. - const rawLimit = typeof params.limit === 'number' ? params.limit : undefined; - const rawMaxResults = typeof params.maxResults === 'number' ? params.maxResults : undefined; - const effectiveLimit = rawLimit !== undefined && rawMaxResults !== undefined - ? Math.max(rawLimit, rawMaxResults) - : rawMaxResults ?? rawLimit ?? parsed.data.maxResults ?? parsed.data.limit ?? 5; - parsed.data.maxResults = effectiveLimit; - parsed.data.limit = effectiveLimit; - - // Track if search_files was used (deprecated) - let deprecationWarning: string | undefined; - - try { - let result: SearchResult; - - // Handle actions — all routed through codexlens-search v2 bridge - switch (action) { - case 'init': - result = await executeInitActionV2(parsed.data); - break; - - case 'status': - result = await executeStatusActionV2(parsed.data); - break; - - case 'find_files': - result = await executeFindFilesAction(parsed.data); - break; - - case 'update': - result = await executeUpdateActionV2(parsed.data); - break; - - case 'watch': - result = await executeWatchActionV2(parsed.data); - break; - - case 'search_files': - // DEPRECATED: Redirect to search with files_only output - deprecationWarning = 'action="search_files" is deprecated. Use action="search" with output_mode="files_only" for content-to-files search, or action="find_files" for path pattern matching.'; - parsed.data.output_mode = 'files_only'; - // Fall through to search - - case 'search': - default: { - // v2 bridge for semantic search - const scope = resolveSearchScope(parsed.data.path ?? '.'); - const dbPath = join(scope.workingDirectory, '.codexlens'); - const topK = (parsed.data.maxResults || 5) + (parsed.data.extraFilesCount || 10); - const v2Result = await executeCodexLensV2Bridge(parsed.data.query || '', topK, dbPath); - if (v2Result.success) { - result = v2Result; - break; - } - // v2 failed — fall back to ripgrep-only search - console.warn(`[CodexLens-v2] Bridge failed, falling back to ripgrep: ${v2Result.error}`); - result = await executeRipgrepMode(parsed.data); - break; - } - } - - let backgroundNote: string | undefined; - - // Transform output based on output_mode (for search actions only) - if (action === 'search' || action === 'search_files') { - - // Add pagination metadata for search results if not already present - if (result.success && result.results && Array.isArray(result.results)) { - const totalResults = (result.results as any[]).length; - if (!result.metadata) { - result.metadata = {}; - } - if (!result.metadata.pagination) { - result.metadata.pagination = { - offset: 0, - limit: effectiveLimit, - total: totalResults, - has_more: false, // Already limited by backend - }; - } - } - - if (result.success && result.results && output_mode !== 'full') { - result.results = transformOutput(result.results as any[], output_mode); - if ( - output_mode === 'ace' - && result.results - && typeof result.results === 'object' - && 'format' in result.results - && result.results.format === 'ace' - ) { - const advisoryLines: string[] = []; - if (result.metadata?.warning) { - advisoryLines.push('', 'Warnings:', `- ${result.metadata.warning}`); - } - if (backgroundNote) { - advisoryLines.push('', 'Notes:', `- ${backgroundNote}`); - } - if (result.metadata?.suggestions && result.metadata.suggestions.length > 0) { - advisoryLines.push('', 'Suggestions:'); - for (const suggestion of result.metadata.suggestions) { - advisoryLines.push(`- ${suggestion.title}: ${suggestion.command}`); - advisoryLines.push(` ${suggestion.reason}`); - } - } - const aceResults = result.results as AceLikeOutput; - if (advisoryLines.length > 0) { - aceResults.text += `\n${advisoryLines.join('\n')}`; - } - } - } - } - - // Add deprecation warning if applicable - if (deprecationWarning && result.metadata) { - result.metadata.warning = deprecationWarning; - } - - return result.success ? { success: true, result } : { success: false, error: result.error }; - } catch (error) { - return { success: false, error: (error as Error).message }; - } -} - -/** - * Execute init action with external progress callback - * Used by MCP server for streaming progress - * @param params - Search parameters (path, languages, force) - * @param onProgress - Optional callback for progress updates - */ -export const __testables = { - isCodexLensCliCompatibilityError, - shouldSurfaceCodexLensFtsCompatibilityWarning, - buildSmartSearchSpawnOptions, - shouldDetachBackgroundSmartSearchProcess, - checkToolAvailability, - parseCodexLensJsonOutput, - parsePlainTextFileMatches, - hasCentralizedVectorArtifacts, - extractEmbeddingsStatusSummary, - selectEmbeddingsStatusPayload, - resolveRipgrepQueryMode, - queryTargetsGeneratedFiles, - prefersLexicalPriorityQuery, - classifyIntent, - resolveEmbeddingSelection, - parseOptionalBooleanEnv, - isAutoInitMissingEnabled, - isAutoEmbedMissingEnabled, - getAutoInitMissingDisabledReason, - getAutoEmbedMissingDisabledReason, - buildIndexSuggestions, - maybeStartBackgroundAutoInit, - maybeStartBackgroundAutoEmbed, - __setRuntimeOverrides(overrides: Partial) { - Object.assign(runtimeOverrides, overrides); - }, - __resetRuntimeOverrides() { - for (const key of Object.keys(runtimeOverrides) as Array) { - delete runtimeOverrides[key]; - } - }, - __resetBackgroundJobs() { - autoInitJobs.clear(); - autoEmbedJobs.clear(); - }, -}; - -export async function executeInitWithProgress( - params: Record, - onProgress?: (progress: ProgressInfo) => void -): Promise { - const path = (params.path as string) || '.'; - const scope = resolveSearchScope(path); - const dbPath = join(scope.workingDirectory, '.codexlens'); - - // Notify progress start - if (onProgress) { - onProgress({ stage: 'init', message: 'Initializing v2 index...', percent: 0 } as ProgressInfo); - } - - // Step 1: init empty index - const initResult = await executeV2BridgeCommand('init', [], { dbPath }); - if (!initResult.success) return initResult; - - if (onProgress) { - onProgress({ stage: 'sync', message: 'Syncing files...', percent: 10 } as ProgressInfo); - } - - // Step 2: sync all files - const syncResult = await executeV2BridgeCommand('sync', [ - '--root', scope.workingDirectory, - ], { timeout: 1800000, dbPath }); - - if (onProgress) { - onProgress({ stage: 'complete', message: 'Index build complete', percent: 100 } as ProgressInfo); - } - - return { - success: syncResult.success, - error: syncResult.error, - message: syncResult.success - ? `v2 index created and synced for ${scope.workingDirectory}` - : undefined, - metadata: { action: 'init', path: scope.workingDirectory }, - status: syncResult.status, - }; -} diff --git a/ccw/src/tools/smart-search.ts.backup b/ccw/src/tools/smart-search.ts.backup deleted file mode 100644 index 173ce0fa..00000000 --- a/ccw/src/tools/smart-search.ts.backup +++ /dev/null @@ -1,1233 +0,0 @@ -/** - * Smart Search Tool - Unified intelligent search with CodexLens integration - * - * Features: - * - Intent classification with automatic mode selection - * - CodexLens integration (init, hybrid, vector, semantic) - * - Ripgrep fallback for exact mode - * - Index status checking and warnings - * - Multi-backend search routing with RRF ranking - * - * Actions: - * - init: Initialize CodexLens index - * - search: Intelligent search with auto mode selection - * - status: Check index status - */ - -import { z } from 'zod'; -import type { ToolSchema, ToolResult } from '../types/tool.js'; -import { spawn, execSync } from 'child_process'; -import { - ensureReady as ensureCodexLensReady, - executeCodexLens, -} from './codex-lens.js'; -import type { ProgressInfo } from './codex-lens.js'; - -// Define Zod schema for validation -const ParamsSchema = z.object({ - action: z.enum(['init', 'search', 'search_files', 'status']).default('search'), - query: z.string().optional(), - mode: z.enum(['auto', 'hybrid', 'exact', 'ripgrep', 'priority']).default('auto'), - output_mode: z.enum(['full', 'files_only', 'count']).default('full'), - path: z.string().optional(), - paths: z.array(z.string()).default([]), - contextLines: z.number().default(0), - maxResults: z.number().default(10), - includeHidden: z.boolean().default(false), - languages: z.array(z.string()).optional(), - limit: z.number().default(10), - enrich: z.boolean().default(false), -}); - -type Params = z.infer; - -// Search mode constants -const SEARCH_MODES = ['auto', 'hybrid', 'exact', 'ripgrep', 'priority'] as const; - -// Classification confidence threshold -const CONFIDENCE_THRESHOLD = 0.7; - -interface Classification { - mode: string; - confidence: number; - reasoning: string; -} - -interface ExactMatch { - file: string; - line: number; - column: number; - content: string; -} - -interface RelationshipInfo { - type: string; // 'calls', 'imports', 'called_by', 'imported_by' - direction: 'outgoing' | 'incoming'; - target?: string; // Target symbol name (for outgoing) - source?: string; // Source symbol name (for incoming) - file: string; // File path - line?: number; // Line number -} - -interface SemanticMatch { - file: string; - score: number; - content: string; - symbol: string | null; - relationships?: RelationshipInfo[]; -} - -interface GraphMatch { - file: string; - symbols: unknown; - relationships: unknown[]; -} - -interface SearchMetadata { - mode?: string; - backend?: string; - count?: number; - query?: string; - classified_as?: string; - confidence?: number; - reasoning?: string; - embeddings_coverage_percent?: number; - warning?: string; - note?: string; - index_status?: 'indexed' | 'not_indexed' | 'partial'; - fallback_history?: string[]; - // Init action specific - action?: string; - path?: string; - progress?: { - stage: string; - message: string; - percent: number; - filesProcessed?: number; - totalFiles?: number; - }; - progressHistory?: ProgressInfo[]; -} - -interface SearchResult { - success: boolean; - results?: ExactMatch[] | SemanticMatch[] | GraphMatch[] | unknown; - output?: string; - metadata?: SearchMetadata; - error?: string; - status?: unknown; - message?: string; -} - -interface IndexStatus { - indexed: boolean; - has_embeddings: boolean; - file_count?: number; - embeddings_coverage_percent?: number; - warning?: string; -} - -/** - * Strip ANSI color codes from string (for JSON parsing) - */ -function stripAnsi(str: string): string { - return str.replace(/\x1b\[[0-9;]*m/g, ''); -} - -/** - * Check if CodexLens index exists for current directory - * @param path - Directory path to check - * @returns Index status - */ -async function checkIndexStatus(path: string = '.'): Promise { - try { - const result = await executeCodexLens(['status', '--json'], { cwd: path }); - - if (!result.success) { - return { - indexed: false, - has_embeddings: false, - warning: 'No CodexLens index found. Run smart_search(action="init") to create index for better search results.', - }; - } - - // Parse status output - try { - // Strip ANSI color codes from JSON output - const cleanOutput = stripAnsi(result.output || '{}'); - const parsed = JSON.parse(cleanOutput); - // Handle both direct and nested response formats (status returns {success, result: {...}}) - const status = parsed.result || parsed; - const indexed = status.projects_count > 0 || status.total_files > 0; - - // Get embeddings coverage from comprehensive status - const embeddingsData = status.embeddings || {}; - const embeddingsCoverage = embeddingsData.coverage_percent || 0; - const has_embeddings = embeddingsCoverage >= 50; // Threshold: 50% - - let warning: string | undefined; - if (!indexed) { - warning = 'No CodexLens index found. Run smart_search(action="init") to create index for better search results.'; - } else if (embeddingsCoverage === 0) { - warning = 'Index exists but no embeddings generated. Run: codexlens embeddings-generate --recursive'; - } else if (embeddingsCoverage < 50) { - warning = `Embeddings coverage is ${embeddingsCoverage.toFixed(1)}% (below 50%). Hybrid search will use exact mode. Run: codexlens embeddings-generate --recursive`; - } - - return { - indexed, - has_embeddings, - file_count: status.total_files, - embeddings_coverage_percent: embeddingsCoverage, - warning, - }; - } catch { - return { - indexed: false, - has_embeddings: false, - warning: 'Failed to parse index status', - }; - } - } catch { - return { - indexed: false, - has_embeddings: false, - warning: 'CodexLens not available', - }; - } -} - -/** - * Detection heuristics for intent classification - */ - -/** - * Detect literal string query (simple alphanumeric or quoted strings) - */ -function detectLiteral(query: string): boolean { - return /^[a-zA-Z0-9_-]+$/.test(query) || /^["'].*["']$/.test(query); -} - -/** - * Detect regex pattern (contains regex metacharacters) - */ -function detectRegex(query: string): boolean { - return /[.*+?^${}()|[\]\\]/.test(query); -} - -/** - * Detect natural language query (sentence structure, questions, multi-word phrases) - */ -function detectNaturalLanguage(query: string): boolean { - return query.split(/\s+/).length >= 3 || /\?$/.test(query); -} - -/** - * Detect file path query (path separators, file extensions) - */ -function detectFilePath(query: string): boolean { - return /[/\\]/.test(query) || /\.[a-z]{2,4}$/i.test(query); -} - -/** - * Detect relationship query (import, export, dependency keywords) - */ -function detectRelationship(query: string): boolean { - return /(import|export|uses?|depends?|calls?|extends?)\s/i.test(query); -} - -/** - * Classify query intent and recommend search mode - * Simple mapping: hybrid (NL + index + embeddings) | exact (index or insufficient embeddings) | ripgrep (no index) - * @param query - Search query string - * @param hasIndex - Whether CodexLens index exists - * @param hasSufficientEmbeddings - Whether embeddings coverage >= 50% - * @returns Classification result - */ -function classifyIntent(query: string, hasIndex: boolean = false, hasSufficientEmbeddings: boolean = false): Classification { - // Detect query patterns - const isNaturalLanguage = detectNaturalLanguage(query); - - // Simple decision tree - let mode: string; - let confidence: number; - - if (!hasIndex) { - // No index: use ripgrep - mode = 'ripgrep'; - confidence = 1.0; - } else if (isNaturalLanguage && hasSufficientEmbeddings) { - // Natural language + sufficient embeddings: use hybrid - mode = 'hybrid'; - confidence = 0.9; - } else { - // Simple query OR insufficient embeddings: use exact - mode = 'exact'; - confidence = 0.8; - } - - // Build reasoning string - const detectedPatterns: string[] = []; - if (detectLiteral(query)) detectedPatterns.push('literal'); - if (detectRegex(query)) detectedPatterns.push('regex'); - if (detectNaturalLanguage(query)) detectedPatterns.push('natural language'); - if (detectFilePath(query)) detectedPatterns.push('file path'); - if (detectRelationship(query)) detectedPatterns.push('relationship'); - - const reasoning = `Query classified as ${mode} (confidence: ${confidence.toFixed(2)}, detected: ${detectedPatterns.join(', ')}, index: ${hasIndex ? 'available' : 'not available'}, embeddings: ${hasSufficientEmbeddings ? 'sufficient' : 'insufficient'})`; - - return { mode, confidence, reasoning }; -} - -/** - * Check if a tool is available in PATH - * @param toolName - Tool executable name - * @returns True if available - */ -function checkToolAvailability(toolName: string): boolean { - try { - const isWindows = process.platform === 'win32'; - const command = isWindows ? 'where' : 'which'; - execSync(`${command} ${toolName}`, { stdio: 'ignore' }); - return true; - } catch { - return false; - } -} - -/** - * Build ripgrep command arguments - * @param params - Search parameters - * @returns Command and arguments - */ -function buildRipgrepCommand(params: { - query: string; - paths: string[]; - contextLines: number; - maxResults: number; - includeHidden: boolean; -}): { command: string; args: string[] } { - const { query, paths = ['.'], contextLines = 0, maxResults = 10, includeHidden = false } = params; - - const args = [ - '-n', // Show line numbers - '--color=never', // Disable color output - '--json', // Output in JSON format - ]; - - // Add context lines if specified - if (contextLines > 0) { - args.push('-C', contextLines.toString()); - } - - // Add max results limit - if (maxResults > 0) { - args.push('--max-count', maxResults.toString()); - } - - // Include hidden files if specified - if (includeHidden) { - args.push('--hidden'); - } - - // Use literal/fixed string matching for exact mode - args.push('-F', query); - - // Add search paths - args.push(...paths); - - return { command: 'rg', args }; -} - -/** - * Action: init - Initialize CodexLens index (FTS only, no embeddings) - * For semantic/vector search, use ccw view dashboard or codexlens CLI directly - */ -async function executeInitAction(params: Params): Promise { - const { path = '.', languages } = params; - - // Check CodexLens availability - const readyStatus = await ensureCodexLensReady(); - if (!readyStatus.ready) { - return { - success: false, - error: `CodexLens not available: ${readyStatus.error}. CodexLens will be auto-installed on first use.`, - }; - } - - // Build args with --no-embeddings for FTS-only index (faster) - const args = ['init', path, '--no-embeddings']; - if (languages && languages.length > 0) { - args.push('--languages', languages.join(',')); - } - - // Track progress updates - const progressUpdates: ProgressInfo[] = []; - let lastProgress: ProgressInfo | null = null; - - const result = await executeCodexLens(args, { - cwd: path, - timeout: 1800000, // 30 minutes for large codebases - onProgress: (progress: ProgressInfo) => { - progressUpdates.push(progress); - lastProgress = progress; - }, - }); - - // Build metadata with progress info - const metadata: SearchMetadata = { - action: 'init', - path, - }; - - if (lastProgress !== null) { - const p = lastProgress as ProgressInfo; - metadata.progress = { - stage: p.stage, - message: p.message, - percent: p.percent, - filesProcessed: p.filesProcessed, - totalFiles: p.totalFiles, - }; - } - - if (progressUpdates.length > 0) { - metadata.progressHistory = progressUpdates.slice(-5); // Keep last 5 progress updates - } - - const successMessage = result.success - ? `FTS index created for ${path}. Note: For semantic/vector search, create vector index via "ccw view" dashboard or run "codexlens init ${path}" (without --no-embeddings).` - : undefined; - - return { - success: result.success, - error: result.error, - message: successMessage, - metadata, - }; -} - -/** - * Action: status - Check CodexLens index status - */ -async function executeStatusAction(params: Params): Promise { - const { path = '.' } = params; - - const indexStatus = await checkIndexStatus(path); - - return { - success: true, - status: indexStatus, - message: indexStatus.warning || `Index status: ${indexStatus.indexed ? 'indexed' : 'not indexed'}, embeddings: ${indexStatus.has_embeddings ? 'available' : 'not available'}`, - }; -} - -/** - * Mode: auto - Intent classification and mode selection - * Routes to: hybrid (NL + index) | exact (index) | ripgrep (no index) - */ -async function executeAutoMode(params: Params): Promise { - const { query, path = '.' } = params; - - if (!query) { - return { - success: false, - error: 'Query is required for search action', - }; - } - - // Check index status - const indexStatus = await checkIndexStatus(path); - - // Classify intent with index and embeddings awareness - const classification = classifyIntent( - query, - indexStatus.indexed, - indexStatus.has_embeddings // This now considers 50% threshold - ); - - // Route to appropriate mode based on classification - let result: SearchResult; - - switch (classification.mode) { - case 'hybrid': - result = await executeHybridMode(params); - break; - - case 'exact': - result = await executeCodexLensExactMode(params); - break; - - case 'ripgrep': - result = await executeRipgrepMode(params); - break; - - default: - // Fallback to ripgrep - result = await executeRipgrepMode(params); - break; - } - - // Add classification metadata - if (result.metadata) { - result.metadata.classified_as = classification.mode; - result.metadata.confidence = classification.confidence; - result.metadata.reasoning = classification.reasoning; - result.metadata.embeddings_coverage_percent = indexStatus.embeddings_coverage_percent; - result.metadata.index_status = indexStatus.indexed - ? (indexStatus.has_embeddings ? 'indexed' : 'partial') - : 'not_indexed'; - - // Add warning if needed - if (indexStatus.warning) { - result.metadata.warning = indexStatus.warning; - } - } - - return result; -} - -/** - * Mode: ripgrep - Fast literal string matching using ripgrep - * No index required, fallback to CodexLens if ripgrep unavailable - */ -async function executeRipgrepMode(params: Params): Promise { - const { query, paths = [], contextLines = 0, maxResults = 10, includeHidden = false, path = '.' } = params; - - if (!query) { - return { - success: false, - error: 'Query is required for search', - }; - } - - // Check if ripgrep is available - const hasRipgrep = checkToolAvailability('rg'); - - // If ripgrep not available, fall back to CodexLens exact mode - if (!hasRipgrep) { - const readyStatus = await ensureCodexLensReady(); - if (!readyStatus.ready) { - return { - success: false, - error: 'Neither ripgrep nor CodexLens available. Install ripgrep (rg) or CodexLens for search functionality.', - }; - } - - // Use CodexLens exact mode as fallback - const args = ['search', query, '--limit', maxResults.toString(), '--mode', 'exact', '--json']; - const result = await executeCodexLens(args, { cwd: path }); - - if (!result.success) { - return { - success: false, - error: result.error, - metadata: { - mode: 'ripgrep', - backend: 'codexlens-fallback', - count: 0, - query, - }, - }; - } - - // Parse results - let results: SemanticMatch[] = []; - try { - const parsed = JSON.parse(stripAnsi(result.output || '{}')); - const data = parsed.result?.results || parsed.results || parsed; - results = (Array.isArray(data) ? data : []).map((item: any) => ({ - file: item.path || item.file, - score: item.score || 0, - content: item.excerpt || item.content || '', - symbol: item.symbol || null, - })); - } catch { - // Keep empty results - } - - return { - success: true, - results, - metadata: { - mode: 'ripgrep', - backend: 'codexlens-fallback', - count: results.length, - query, - note: 'Using CodexLens exact mode (ripgrep not available)', - }, - }; - } - - // Use ripgrep - const { command, args } = buildRipgrepCommand({ - query, - paths: paths.length > 0 ? paths : [path], - contextLines, - maxResults, - includeHidden, - }); - - return new Promise((resolve) => { - const child = spawn(command, args, { - cwd: path || process.cwd(), - stdio: ['ignore', 'pipe', 'pipe'], - }); - - let stdout = ''; - let stderr = ''; - - child.stdout.on('data', (data) => { - stdout += data.toString(); - }); - - child.stderr.on('data', (data) => { - stderr += data.toString(); - }); - - child.on('close', (code) => { - const results: ExactMatch[] = []; - - if (code === 0 || (code === 1 && stdout.trim())) { - const lines = stdout.split('\n').filter((line) => line.trim()); - - for (const line of lines) { - try { - const item = JSON.parse(line); - - if (item.type === 'match') { - const match: ExactMatch = { - file: item.data.path.text, - line: item.data.line_number, - column: - item.data.submatches && item.data.submatches[0] - ? item.data.submatches[0].start + 1 - : 1, - content: item.data.lines.text.trim(), - }; - results.push(match); - } - } catch { - continue; - } - } - - resolve({ - success: true, - results, - metadata: { - mode: 'ripgrep', - backend: 'ripgrep', - count: results.length, - query, - }, - }); - } else { - resolve({ - success: false, - error: `ripgrep execution failed with code ${code}: ${stderr}`, - results: [], - }); - } - }); - - child.on('error', (error) => { - resolve({ - success: false, - error: `Failed to spawn ripgrep: ${error.message}`, - results: [], - }); - }); - }); -} - -/** - * Mode: exact - CodexLens exact/FTS search - * Requires index - */ -async function executeCodexLensExactMode(params: Params): Promise { - const { query, path = '.', maxResults = 10, enrich = false } = params; - - if (!query) { - return { - success: false, - error: 'Query is required for search', - }; - } - - // Check CodexLens availability - const readyStatus = await ensureCodexLensReady(); - if (!readyStatus.ready) { - return { - success: false, - error: `CodexLens not available: ${readyStatus.error}`, - }; - } - - // Check index status - const indexStatus = await checkIndexStatus(path); - - const args = ['search', query, '--limit', maxResults.toString(), '--mode', 'exact', '--json']; - if (enrich) { - args.push('--enrich'); - } - const result = await executeCodexLens(args, { cwd: path }); - - if (!result.success) { - return { - success: false, - error: result.error, - metadata: { - mode: 'exact', - backend: 'codexlens', - count: 0, - query, - warning: indexStatus.warning, - }, - }; - } - - // Parse results - let results: SemanticMatch[] = []; - try { - const parsed = JSON.parse(stripAnsi(result.output || '{}')); - const data = parsed.result?.results || parsed.results || parsed; - results = (Array.isArray(data) ? data : []).map((item: any) => ({ - file: item.path || item.file, - score: item.score || 0, - content: item.excerpt || item.content || '', - symbol: item.symbol || null, - })); - } catch { - // Keep empty results - } - - return { - success: true, - results, - metadata: { - mode: 'exact', - backend: 'codexlens', - count: results.length, - query, - warning: indexStatus.warning, - }, - }; -} - -/** - * Mode: hybrid - Best quality search with RRF fusion - * Uses CodexLens hybrid mode (exact + fuzzy + vector) - * Requires index with embeddings - */ -async function executeHybridMode(params: Params): Promise { - const { query, path = '.', maxResults = 10, enrich = false } = params; - - if (!query) { - return { - success: false, - error: 'Query is required for search', - }; - } - - // Check CodexLens availability - const readyStatus = await ensureCodexLensReady(); - if (!readyStatus.ready) { - return { - success: false, - error: `CodexLens not available: ${readyStatus.error}`, - }; - } - - // Check index status - const indexStatus = await checkIndexStatus(path); - - const args = ['search', query, '--limit', maxResults.toString(), '--mode', 'hybrid', '--json']; - if (enrich) { - args.push('--enrich'); - } - const result = await executeCodexLens(args, { cwd: path }); - - if (!result.success) { - return { - success: false, - error: result.error, - metadata: { - mode: 'hybrid', - backend: 'codexlens', - count: 0, - query, - warning: indexStatus.warning, - }, - }; - } - - // Parse results - let results: SemanticMatch[] = []; - try { - const parsed = JSON.parse(stripAnsi(result.output || '{}')); - const data = parsed.result?.results || parsed.results || parsed; - results = (Array.isArray(data) ? data : []).map((item: any) => ({ - file: item.path || item.file, - score: item.score || 0, - content: item.excerpt || item.content || '', - symbol: item.symbol || null, - })); - } catch { - return { - success: true, - results: [], - output: result.output, - metadata: { - mode: 'hybrid', - backend: 'codexlens', - count: 0, - query, - warning: indexStatus.warning || 'Failed to parse JSON output', - }, - }; - } - - return { - success: true, - results, - metadata: { - mode: 'hybrid', - backend: 'codexlens', - count: results.length, - query, - note: 'Hybrid mode uses RRF fusion (exact + fuzzy + vector) for best results', - warning: indexStatus.warning, - }, - }; -} - -/** - * TypeScript implementation of Reciprocal Rank Fusion - * Reference: codex-lens/src/codexlens/search/ranking.py - * Formula: score(d) = Σ weight_source / (k + rank_source(d)) - */ -function applyRRFFusion( - resultsMap: Map, - weights: Record, - limit: number, - k: number = 60, -): any[] { - const pathScores = new Map(); - - resultsMap.forEach((results, source) => { - const weight = weights[source] || 0; - if (weight === 0 || !results) return; - - results.forEach((result, rank) => { - const path = result.file || result.path; - if (!path) return; - - const rrfContribution = weight / (k + rank + 1); - - if (!pathScores.has(path)) { - pathScores.set(path, { score: 0, result, sources: [] }); - } - const entry = pathScores.get(path)!; - entry.score += rrfContribution; - if (!entry.sources.includes(source)) { - entry.sources.push(source); - } - }); - }); - - // Sort by fusion score descending - return Array.from(pathScores.values()) - .sort((a, b) => b.score - a.score) - .slice(0, limit) - .map(item => ({ - ...item.result, - fusion_score: item.score, - matched_backends: item.sources, - })); -} - -/** - * Promise wrapper with timeout support - * @param promise - The promise to wrap - * @param ms - Timeout in milliseconds - * @param modeName - Name of the mode for error message - * @returns A new promise that rejects on timeout - */ -function withTimeout(promise: Promise, ms: number, modeName: string): Promise { - return new Promise((resolve, reject) => { - const timer = setTimeout(() => { - reject(new Error(`'${modeName}' search timed out after ${ms}ms`)); - }, ms); - - promise - .then(resolve) - .catch(reject) - .finally(() => clearTimeout(timer)); - }); -} - -/** - * Mode: priority - Fallback search strategy: hybrid -> exact -> ripgrep - * Returns results from the first backend that succeeds and provides results. - * More efficient than parallel mode - stops as soon as valid results are found. - */ -async function executePriorityFallbackMode(params: Params): Promise { - const { query, path = '.' } = params; - const fallbackHistory: string[] = []; - - if (!query) { - return { success: false, error: 'Query is required for search' }; - } - - // Check index status first - const indexStatus = await checkIndexStatus(path); - - // 1. Try Hybrid search (highest priority) - 90s timeout for large indexes - if (indexStatus.indexed && indexStatus.has_embeddings) { - try { - const hybridResult = await withTimeout(executeHybridMode(params), 90000, 'hybrid'); - if (hybridResult.success && hybridResult.results && (hybridResult.results as any[]).length > 0) { - fallbackHistory.push('hybrid: success'); - return { - ...hybridResult, - metadata: { - ...hybridResult.metadata, - mode: 'priority', - note: 'Result from hybrid search (semantic + vector).', - fallback_history: fallbackHistory, - }, - }; - } - fallbackHistory.push('hybrid: no results'); - } catch (error) { - fallbackHistory.push(`hybrid: ${(error as Error).message}`); - } - } else { - fallbackHistory.push(`hybrid: skipped (${!indexStatus.indexed ? 'no index' : 'no embeddings'})`); - } - - // 2. Fallback to Exact search - 10s timeout - if (indexStatus.indexed) { - try { - const exactResult = await withTimeout(executeCodexLensExactMode(params), 10000, 'exact'); - if (exactResult.success && exactResult.results && (exactResult.results as any[]).length > 0) { - fallbackHistory.push('exact: success'); - return { - ...exactResult, - metadata: { - ...exactResult.metadata, - mode: 'priority', - note: 'Result from exact/FTS search (fallback from hybrid).', - fallback_history: fallbackHistory, - }, - }; - } - fallbackHistory.push('exact: no results'); - } catch (error) { - fallbackHistory.push(`exact: ${(error as Error).message}`); - } - } else { - fallbackHistory.push('exact: skipped (no index)'); - } - - // 3. Final fallback to Ripgrep - 5s timeout - try { - const ripgrepResult = await withTimeout(executeRipgrepMode(params), 5000, 'ripgrep'); - fallbackHistory.push(ripgrepResult.success ? 'ripgrep: success' : 'ripgrep: failed'); - return { - ...ripgrepResult, - metadata: { - ...ripgrepResult.metadata, - mode: 'priority', - note: 'Result from ripgrep search (final fallback).', - fallback_history: fallbackHistory, - }, - }; - } catch (error) { - fallbackHistory.push(`ripgrep: ${(error as Error).message}`); - } - - // All modes failed - return { - success: false, - error: 'All search backends in priority mode failed or returned no results.', - metadata: { - mode: 'priority', - query, - fallback_history: fallbackHistory, - } as any, - }; -} - -// Tool schema for MCP -export const schema: ToolSchema = { - name: 'smart_search', - description: `Intelligent code search with five modes. Use "auto" mode (default) for intelligent routing. - -**Usage:** - smart_search(query="authentication logic") # auto mode - routes to best backend - smart_search(query="MyClass", mode="exact") # exact mode - precise FTS matching - smart_search(query="auth", mode="ripgrep") # ripgrep mode - fast literal search (no index) - smart_search(query="how to auth", mode="hybrid") # hybrid mode - semantic search (requires index) - -**Index Management:** - smart_search(action="init") # Create FTS index for current directory - smart_search(action="status") # Check index and embedding status - -**Graph Enrichment:** - smart_search(query="func", enrich=true) # Enrich results with code relationships (calls, imports, called_by, imported_by) - -**Modes:** auto (intelligent routing), hybrid (semantic, needs index), exact (FTS), ripgrep (fast, no index), priority (fallback: hybrid→exact→ripgrep)`, - inputSchema: { - type: 'object', - properties: { - action: { - type: 'string', - enum: ['init', 'search', 'search_files', 'status'], - description: 'Action to perform: init (create FTS index, no embeddings), search (default), search_files (paths only), status (check index)', - default: 'search', - }, - query: { - type: 'string', - description: 'Search query (required for search/search_files actions)', - }, - mode: { - type: 'string', - enum: SEARCH_MODES, - description: 'Search mode: auto (default), hybrid (best quality), exact (CodexLens FTS), ripgrep (fast, no index), priority (fallback: hybrid->exact->ripgrep)', - default: 'auto', - }, - output_mode: { - type: 'string', - enum: ['full', 'files_only', 'count'], - description: 'Output format: full (default), files_only (paths only), count (per-file counts)', - default: 'full', - }, - path: { - type: 'string', - description: 'Directory path for init/search actions (default: current directory)', - }, - paths: { - type: 'array', - description: 'Multiple paths to search within (for search action)', - items: { - type: 'string', - }, - default: [], - }, - contextLines: { - type: 'number', - description: 'Number of context lines around matches (exact mode only)', - default: 0, - }, - maxResults: { - type: 'number', - description: 'Maximum number of results (default: 10)', - default: 10, - }, - limit: { - type: 'number', - description: 'Alias for maxResults', - default: 10, - }, - includeHidden: { - type: 'boolean', - description: 'Include hidden files/directories', - default: false, - }, - languages: { - type: 'array', - items: { type: 'string' }, - description: 'Languages to index (for init action). Example: ["javascript", "typescript"]', - }, - enrich: { - type: 'boolean', - description: 'Enrich search results with code graph relationships (calls, imports, called_by, imported_by).', - default: false, - }, - }, - required: [], - }, -}; - -/** - * Transform results based on output_mode - */ -function transformOutput( - results: ExactMatch[] | SemanticMatch[] | GraphMatch[] | unknown[], - outputMode: 'full' | 'files_only' | 'count' -): unknown { - if (!Array.isArray(results)) { - return results; - } - - switch (outputMode) { - case 'files_only': { - // Extract unique file paths - const files = [...new Set(results.map((r: any) => r.file))].filter(Boolean); - return { files, count: files.length }; - } - case 'count': { - // Count matches per file - const counts: Record = {}; - for (const r of results) { - const file = (r as any).file; - if (file) { - counts[file] = (counts[file] || 0) + 1; - } - } - return { - files: Object.entries(counts).map(([file, count]) => ({ file, count })), - total: results.length, - }; - } - case 'full': - default: - return results; - } -} - -// Handler function -export async function handler(params: Record): Promise> { - const parsed = ParamsSchema.safeParse(params); - if (!parsed.success) { - return { success: false, error: `Invalid params: ${parsed.error.message}` }; - } - - const { action, mode, output_mode } = parsed.data; - - // Sync limit and maxResults - use the larger of the two if both provided - // This ensures user-provided values take precedence over defaults - const effectiveLimit = Math.max(parsed.data.limit || 10, parsed.data.maxResults || 10); - parsed.data.maxResults = effectiveLimit; - parsed.data.limit = effectiveLimit; - - try { - let result: SearchResult; - - // Handle actions - switch (action) { - case 'init': - result = await executeInitAction(parsed.data); - break; - - case 'status': - result = await executeStatusAction(parsed.data); - break; - - case 'search_files': - // For search_files, use search mode but force files_only output - parsed.data.output_mode = 'files_only'; - // Fall through to search - - case 'search': - default: - // Handle search modes: auto | hybrid | exact | ripgrep | priority - switch (mode) { - case 'auto': - result = await executeAutoMode(parsed.data); - break; - case 'hybrid': - result = await executeHybridMode(parsed.data); - break; - case 'exact': - result = await executeCodexLensExactMode(parsed.data); - break; - case 'ripgrep': - result = await executeRipgrepMode(parsed.data); - break; - case 'priority': - result = await executePriorityFallbackMode(parsed.data); - break; - default: - throw new Error(`Unsupported mode: ${mode}. Use: auto, hybrid, exact, ripgrep, or priority`); - } - break; - } - - // Transform output based on output_mode (for search actions only) - if (action === 'search' || action === 'search_files') { - if (result.success && result.results && output_mode !== 'full') { - result.results = transformOutput(result.results as any[], output_mode); - } - } - - return result.success ? { success: true, result } : { success: false, error: result.error }; - } catch (error) { - return { success: false, error: (error as Error).message }; - } -} - -/** - * Execute init action with external progress callback - * Used by MCP server for streaming progress - */ -export async function executeInitWithProgress( - params: Record, - onProgress?: (progress: ProgressInfo) => void -): Promise { - const path = (params.path as string) || '.'; - const languages = params.languages as string[] | undefined; - - // Check CodexLens availability - const readyStatus = await ensureCodexLensReady(); - if (!readyStatus.ready) { - return { - success: false, - error: `CodexLens not available: ${readyStatus.error}. CodexLens will be auto-installed on first use.`, - }; - } - - const args = ['init', path]; - if (languages && languages.length > 0) { - args.push('--languages', languages.join(',')); - } - - // Track progress updates - const progressUpdates: ProgressInfo[] = []; - let lastProgress: ProgressInfo | null = null; - - const result = await executeCodexLens(args, { - cwd: path, - timeout: 1800000, // 30 minutes for large codebases - onProgress: (progress: ProgressInfo) => { - progressUpdates.push(progress); - lastProgress = progress; - // Call external progress callback if provided - if (onProgress) { - onProgress(progress); - } - }, - }); - - // Build metadata with progress info - const metadata: SearchMetadata = { - action: 'init', - path, - }; - - if (lastProgress !== null) { - const p = lastProgress as ProgressInfo; - metadata.progress = { - stage: p.stage, - message: p.message, - percent: p.percent, - filesProcessed: p.filesProcessed, - totalFiles: p.totalFiles, - }; - } - - if (progressUpdates.length > 0) { - metadata.progressHistory = progressUpdates.slice(-5); - } - - return { - success: result.success, - error: result.error, - message: result.success - ? `CodexLens index created successfully for ${path}` - : undefined, - metadata, - }; -} diff --git a/ccw/tests/codex-lens-bootstrap-fallback.test.js b/ccw/tests/codex-lens-bootstrap-fallback.test.js deleted file mode 100644 index cd9c1593..00000000 --- a/ccw/tests/codex-lens-bootstrap-fallback.test.js +++ /dev/null @@ -1,93 +0,0 @@ -/** - * Regression test: CodexLens bootstrap falls back to pip when UV bootstrap fails. - * - * We simulate a "broken UV" by pointing CCW_UV_PATH to the current Node executable. - * `node --version` exits 0 so isUvAvailable() returns true, but `node venv ...` fails, - * forcing the bootstrap code to try the pip path. - * - * This test runs bootstrapVenv in a child process to avoid mutating process-wide - * environment variables that could affect other tests. - */ - -import { describe, it } from 'node:test'; -import assert from 'node:assert/strict'; -import { spawn } from 'node:child_process'; -import { mkdtempSync, rmSync } from 'node:fs'; -import { dirname, join } from 'node:path'; -import { tmpdir } from 'node:os'; -import { fileURLToPath } from 'node:url'; - -const __filename = fileURLToPath(import.meta.url); -const __dirname = dirname(__filename); - -// repo root: /ccw/tests -> -const REPO_ROOT = join(__dirname, '..', '..'); - -function runNodeEvalModule(script, env) { - return new Promise((resolve, reject) => { - const child = spawn(process.execPath, ['--input-type=module', '-e', script], { - cwd: REPO_ROOT, - env, - stdio: ['ignore', 'pipe', 'pipe'], - windowsHide: true, - }); - - let stdout = ''; - let stderr = ''; - - child.stdout.on('data', (d) => { stdout += d.toString(); }); - child.stderr.on('data', (d) => { stderr += d.toString(); }); - - child.on('error', (err) => reject(err)); - child.on('close', (code) => resolve({ code, stdout, stderr })); - }); -} - -describe('CodexLens bootstrap fallback', () => { - it('falls back to pip when UV bootstrap fails', { timeout: 10 * 60 * 1000 }, async () => { - const dataDir = mkdtempSync(join(tmpdir(), 'codexlens-bootstrap-fallback-')); - - try { - const script = ` -import { bootstrapVenv } from './ccw/dist/tools/codex-lens.js'; - -(async () => { - const result = await bootstrapVenv(); - console.log('@@RESULT@@' + JSON.stringify(result)); -})().catch((e) => { - console.error(e?.stack || String(e)); - process.exit(1); -}); -`; - - const env = { - ...process.env, - // Isolate test venv + dependencies from user/global CodexLens state. - CODEXLENS_DATA_DIR: dataDir, - // Make isUvAvailable() return true, but createVenv() fail. - CCW_UV_PATH: process.execPath, - }; - - const { code, stdout, stderr } = await runNodeEvalModule(script, env); - assert.equal(code, 0, `bootstrapVenv child process failed:\nSTDOUT:\n${stdout}\nSTDERR:\n${stderr}`); - - const marker = '@@RESULT@@'; - const idx = stdout.lastIndexOf(marker); - assert.ok(idx !== -1, `Missing result marker in stdout:\n${stdout}`); - - const jsonText = stdout.slice(idx + marker.length).trim(); - const parsed = JSON.parse(jsonText); - - assert.equal(parsed?.success, true, `Expected success=true, got:\n${jsonText}`); - assert.ok(Array.isArray(parsed.warnings), 'Expected warnings array on pip fallback result'); - assert.ok(parsed.warnings.some((w) => String(w).includes('UV bootstrap failed')), `Expected UV failure warning, got: ${JSON.stringify(parsed.warnings)}`); - } finally { - try { - rmSync(dataDir, { recursive: true, force: true }); - } catch { - // Best effort cleanup; leave artifacts only if Windows locks prevent removal. - } - } - }); -}); - diff --git a/ccw/tests/codex-lens-cli-compat.test.js b/ccw/tests/codex-lens-cli-compat.test.js deleted file mode 100644 index 51573a1e..00000000 --- a/ccw/tests/codex-lens-cli-compat.test.js +++ /dev/null @@ -1,139 +0,0 @@ -import { after, describe, it } from 'node:test'; -import assert from 'node:assert/strict'; -import { mkdtempSync, rmSync, writeFileSync } from 'node:fs'; -import { join } from 'node:path'; -import { tmpdir } from 'node:os'; - -const tempDirs = []; - -after(() => { - for (const dir of tempDirs) { - rmSync(dir, { recursive: true, force: true }); - } -}); - -describe('CodexLens CLI compatibility retries', () => { - it('builds hidden Python spawn options for CLI invocations', async () => { - const moduleUrl = new URL(`../dist/tools/codex-lens.js?spawn-opts=${Date.now()}`, import.meta.url).href; - const { __testables } = await import(moduleUrl); - - const options = __testables.buildCodexLensSpawnOptions(tmpdir(), 12345); - - assert.equal(options.cwd, tmpdir()); - assert.equal(options.shell, false); - assert.equal(options.timeout, 12345); - assert.equal(options.windowsHide, true); - assert.equal(options.env.PYTHONIOENCODING, 'utf-8'); - }); - - it('probes Python version without a shell-backed console window', async () => { - const moduleUrl = new URL(`../dist/tools/codex-lens.js?python-probe=${Date.now()}`, import.meta.url).href; - const { __testables } = await import(moduleUrl); - const probeCalls = []; - - const version = __testables.probePythonVersion({ command: 'python', args: [], display: 'python' }, (command, args, options) => { - probeCalls.push({ command, args, options }); - return { status: 0, stdout: '', stderr: 'Python 3.11.9\n' }; - }); - - assert.equal(version, 'Python 3.11.9'); - assert.equal(probeCalls.length, 1); - assert.equal(probeCalls[0].command, 'python'); - assert.deepEqual(probeCalls[0].args, ['--version']); - assert.equal(probeCalls[0].options.shell, false); - assert.equal(probeCalls[0].options.windowsHide, true); - assert.equal(probeCalls[0].options.env.PYTHONIOENCODING, 'utf-8'); - }); - - it('initializes a tiny index even when CLI emits compatibility conflicts first', async () => { - const moduleUrl = new URL(`../dist/tools/codex-lens.js?compat=${Date.now()}`, import.meta.url).href; - const { checkVenvStatus, executeCodexLens } = await import(moduleUrl); - - const ready = await checkVenvStatus(true); - if (!ready.ready) { - console.log('Skipping: CodexLens not ready'); - return; - } - - const projectDir = mkdtempSync(join(tmpdir(), 'codexlens-init-')); - tempDirs.push(projectDir); - writeFileSync(join(projectDir, 'sample.ts'), 'export const sample = 1;\n'); - - const result = await executeCodexLens(['index', 'init', projectDir, '--force'], { timeout: 600000 }); - - assert.equal(result.success, true, result.error ?? 'Expected init to succeed'); - assert.ok((result.output ?? '').length > 0 || (result.warning ?? '').length > 0, 'Expected init output or compatibility warning'); - }); - - it('synthesizes a machine-readable fallback when JSON search output is empty', async () => { - const moduleUrl = new URL(`../dist/tools/codex-lens.js?compat-empty=${Date.now()}`, import.meta.url).href; - const { __testables } = await import(moduleUrl); - - const normalized = __testables.normalizeSearchCommandResult( - { success: true }, - { query: 'missing symbol', cwd: tmpdir(), limit: 5, filesOnly: false }, - ); - - assert.equal(normalized.success, true); - assert.match(normalized.warning ?? '', /empty stdout/i); - assert.deepEqual(normalized.results, { - success: true, - result: { - query: 'missing symbol', - count: 0, - results: [], - }, - }); - }); - - it('returns structured semantic search results for a local embedded workspace', async () => { - const codexLensUrl = new URL(`../dist/tools/codex-lens.js?compat-search=${Date.now()}`, import.meta.url).href; - const smartSearchUrl = new URL(`../dist/tools/smart-search.js?compat-search=${Date.now()}`, import.meta.url).href; - const codexLensModule = await import(codexLensUrl); - const smartSearchModule = await import(smartSearchUrl); - - const ready = await codexLensModule.checkVenvStatus(true); - if (!ready.ready) { - console.log('Skipping: CodexLens not ready'); - return; - } - - const semantic = await codexLensModule.checkSemanticStatus(); - if (!semantic.available) { - console.log('Skipping: semantic dependencies not ready'); - return; - } - - const projectDir = mkdtempSync(join(tmpdir(), 'codexlens-search-')); - tempDirs.push(projectDir); - writeFileSync( - join(projectDir, 'sample.ts'), - 'export function greet(name) { return `hello ${name}`; }\nexport const sum = (a, b) => a + b;\n', - ); - - const init = await smartSearchModule.handler({ action: 'init', path: projectDir }); - assert.equal(init.success, true, init.error ?? 'Expected smart-search init to succeed'); - - const embed = await smartSearchModule.handler({ - action: 'embed', - path: projectDir, - embeddingBackend: 'local', - force: true, - }); - assert.equal(embed.success, true, embed.error ?? 'Expected smart-search embed to succeed'); - - const result = await codexLensModule.codexLensTool.execute({ - action: 'search', - path: projectDir, - query: 'greet function', - mode: 'semantic', - format: 'json', - }); - - assert.equal(result.success, true, result.error ?? 'Expected semantic search compatibility fallback to succeed'); - const payload = result.results?.result ?? result.results; - assert.ok(Array.isArray(payload?.results), 'Expected structured search results payload'); - assert.ok(payload.results.length > 0, 'Expected at least one structured semantic search result'); - assert.doesNotMatch(result.error ?? '', /unexpected extra arguments/i); - }); -}); diff --git a/ccw/tests/codex-lens-integration.test.js b/ccw/tests/codex-lens-integration.test.js deleted file mode 100644 index ebcec928..00000000 --- a/ccw/tests/codex-lens-integration.test.js +++ /dev/null @@ -1,485 +0,0 @@ -/** - * Integration Tests for CodexLens with actual file operations - * - * These tests create temporary files and directories to test - * the full indexing and search workflow. - */ - -import { describe, it, before, after } from 'node:test'; -import assert from 'node:assert'; -import { dirname, join } from 'path'; -import { fileURLToPath } from 'url'; -import { existsSync, mkdirSync, rmSync, writeFileSync, readdirSync } from 'fs'; -import { tmpdir } from 'os'; - -const __filename = fileURLToPath(import.meta.url); -const __dirname = dirname(__filename); - -// Import the codex-lens module -const codexLensPath = new URL('../dist/tools/codex-lens.js', import.meta.url).href; - -describe('CodexLens Full Integration Tests', async () => { - let codexLensModule; - let testDir; - let isReady = false; - - before(async () => { - try { - codexLensModule = await import(codexLensPath); - - // Check if CodexLens is installed - const status = await codexLensModule.checkVenvStatus(); - isReady = status.ready; - - if (!isReady) { - console.log('CodexLens not installed - some integration tests will be skipped'); - return; - } - - // Create temporary test directory - testDir = join(tmpdir(), `codexlens-test-${Date.now()}`); - mkdirSync(testDir, { recursive: true }); - - // Create test Python files - writeFileSync(join(testDir, 'main.py'), ` -"""Main module for testing.""" - -def hello_world(): - """Say hello to the world.""" - print("Hello, World!") - return "hello" - -def calculate_sum(a, b): - """Calculate sum of two numbers.""" - return a + b - -class Calculator: - """A simple calculator class.""" - - def __init__(self): - self.result = 0 - - def add(self, value): - """Add value to result.""" - self.result += value - return self.result - - def subtract(self, value): - """Subtract value from result.""" - self.result -= value - return self.result -`); - - writeFileSync(join(testDir, 'utils.py'), ` -"""Utility functions.""" - -def format_string(text): - """Format a string.""" - return text.strip().lower() - -def validate_email(email): - """Validate email format.""" - return "@" in email and "." in email - -async def fetch_data(url): - """Fetch data from URL (async).""" - pass -`); - - // Create test JavaScript file - writeFileSync(join(testDir, 'app.js'), ` -/** - * Main application module - */ - -function initApp() { - console.log('App initialized'); -} - -const processData = async (data) => { - return data.map(item => item.value); -}; - -class Application { - constructor(name) { - this.name = name; - } - - start() { - console.log(\`Starting \${this.name}\`); - } -} - -export { initApp, processData, Application }; -`); - - console.log(`Test directory created at: ${testDir}`); - } catch (err) { - console.log('Setup failed:', err.message); - } - }); - - after(async () => { - // Cleanup test directory - if (testDir && existsSync(testDir)) { - try { - rmSync(testDir, { recursive: true, force: true }); - console.log('Test directory cleaned up'); - } catch (err) { - console.log('Cleanup failed:', err.message); - } - } - }); - - describe('Index Initialization', () => { - it('should initialize index for test directory', async () => { - if (!isReady || !testDir) { - console.log('Skipping: CodexLens not ready or test dir not created'); - return; - } - - const result = await codexLensModule.codexLensTool.execute({ - action: 'init', - path: testDir - }); - - assert.ok(typeof result === 'object', 'Result should be an object'); - assert.ok('success' in result, 'Result should have success property'); - - if (result.success) { - // CodexLens stores indexes in the global data directory (e.g. ~/.codexlens/indexes) - // rather than creating a per-project ".codexlens" folder. - assert.ok(true); - } - }); - - it('should create index.db file', async () => { - if (!isReady || !testDir) { - console.log('Skipping: CodexLens not ready or test dir not created'); - return; - } - - const indexDb = join(testDir, '.codexlens', 'index.db'); - - // May need to wait for previous init to complete - // Index.db should exist after successful init - if (existsSync(join(testDir, '.codexlens'))) { - // Check files in .codexlens directory - const files = readdirSync(join(testDir, '.codexlens')); - console.log('.codexlens contents:', files); - } - }); - }); - - describe('Status Query', () => { - it('should return index status for test directory', async () => { - if (!isReady || !testDir) { - console.log('Skipping: CodexLens not ready or test dir not created'); - return; - } - - const result = await codexLensModule.codexLensTool.execute({ - action: 'status', - path: testDir - }); - - assert.ok(typeof result === 'object', 'Result should be an object'); - console.log('Index status:', JSON.stringify(result, null, 2)); - - if (result.success) { - // Navigate nested structure: result.status.result or result.result - const statusData = result.status?.result || result.result || result.status || result; - const hasIndexInfo = ( - 'files' in statusData || - 'db_path' in statusData || - result.output || - (result.status && 'success' in result.status) - ); - assert.ok(hasIndexInfo, 'Status should contain index information or raw output'); - } - }); - }); - - describe('Symbol Extraction', () => { - it('should extract symbols from Python file', async () => { - if (!isReady || !testDir) { - console.log('Skipping: CodexLens not ready or test dir not created'); - return; - } - - const result = await codexLensModule.codexLensTool.execute({ - action: 'symbol', - file: join(testDir, 'main.py') - }); - - assert.ok(typeof result === 'object', 'Result should be an object'); - - if (result.success) { - console.log('Symbols found:', result.symbols || result.output); - - // Parse output if needed - let symbols = result.symbols; - if (!symbols && result.output) { - try { - const parsed = JSON.parse(result.output); - symbols = parsed.result?.file?.symbols || parsed.symbols; - } catch { - // Keep raw output - } - } - - if (symbols && Array.isArray(symbols)) { - // Check for expected symbols - const symbolNames = symbols.map(s => s.name); - assert.ok(symbolNames.includes('hello_world') || symbolNames.some(n => n.includes('hello')), - 'Should find hello_world function'); - assert.ok(symbolNames.includes('Calculator') || symbolNames.some(n => n.includes('Calc')), - 'Should find Calculator class'); - } - } - }); - - it('should extract symbols from JavaScript file', async () => { - if (!isReady || !testDir) { - console.log('Skipping: CodexLens not ready or test dir not created'); - return; - } - - const result = await codexLensModule.codexLensTool.execute({ - action: 'symbol', - file: join(testDir, 'app.js') - }); - - assert.ok(typeof result === 'object', 'Result should be an object'); - - if (result.success) { - console.log('JS Symbols found:', result.symbols || result.output); - } - }); - }); - - describe('Full-Text Search', () => { - it('should search for text in indexed files', async () => { - if (!isReady || !testDir) { - console.log('Skipping: CodexLens not ready or test dir not created'); - return; - } - - // First ensure index is initialized - await codexLensModule.codexLensTool.execute({ - action: 'init', - path: testDir - }); - - const result = await codexLensModule.codexLensTool.execute({ - action: 'search', - query: 'hello', - path: testDir, - limit: 10 - }); - - assert.ok(typeof result === 'object', 'Result should be an object'); - - if (result.success) { - console.log('Search results:', result.results || result.output); - } - }); - - it('should search for class names', async () => { - if (!isReady || !testDir) { - console.log('Skipping: CodexLens not ready or test dir not created'); - return; - } - - const result = await codexLensModule.codexLensTool.execute({ - action: 'search', - query: 'Calculator', - path: testDir, - limit: 10 - }); - - assert.ok(typeof result === 'object', 'Result should be an object'); - - if (result.success) { - console.log('Class search results:', result.results || result.output); - } - }); - }); - - describe('Incremental Update', () => { - it('should update index when file changes', async () => { - if (!isReady || !testDir) { - console.log('Skipping: CodexLens not ready or test dir not created'); - return; - } - - // Create a new file - const newFile = join(testDir, 'new_module.py'); - writeFileSync(newFile, ` -def new_function(): - """A newly added function.""" - return "new" -`); - - const result = await codexLensModule.codexLensTool.execute({ - action: 'update', - files: [newFile], - path: testDir - }); - - assert.ok(typeof result === 'object', 'Result should be an object'); - - if (result.success) { - console.log('Update result:', result.updateResult || result.output); - } - }); - - it('should handle deleted files in update', async () => { - if (!isReady || !testDir) { - console.log('Skipping: CodexLens not ready or test dir not created'); - return; - } - - // Reference a non-existent file - const deletedFile = join(testDir, 'deleted_file.py'); - - const result = await codexLensModule.codexLensTool.execute({ - action: 'update', - files: [deletedFile], - path: testDir - }); - - assert.ok(typeof result === 'object', 'Result should be an object'); - // Should handle gracefully without crashing - }); - }); -}); - -describe('CodexLens CLI Commands via executeCodexLens', async () => { - let codexLensModule; - let isReady = false; - - before(async () => { - try { - codexLensModule = await import(codexLensPath); - const status = await codexLensModule.checkVenvStatus(); - isReady = status.ready; - } catch (err) { - console.log('Setup failed:', err.message); - } - }); - - it('should execute --version command', async () => { - if (!isReady) { - console.log('Skipping: CodexLens not ready'); - return; - } - - // Note: codexlens may not have --version, use --help instead - const result = await codexLensModule.executeCodexLens(['--help']); - assert.ok(typeof result === 'object'); - - if (result.success) { - assert.ok(result.output, 'Should have output'); - } - }); - - it('should execute status --json command', async () => { - if (!isReady) { - console.log('Skipping: CodexLens not ready'); - return; - } - - const result = await codexLensModule.executeCodexLens(['status', '--json'], { - cwd: __dirname - }); - - assert.ok(typeof result === 'object'); - - if (result.success && result.output) { - // Try to parse JSON output - try { - const parsed = JSON.parse(result.output); - assert.ok(typeof parsed === 'object', 'Output should be valid JSON'); - } catch { - // Output might not be JSON if index doesn't exist - console.log('Status output (non-JSON):', result.output); - } - } - }); - - it('should handle inspect command', async () => { - if (!isReady) { - console.log('Skipping: CodexLens not ready'); - return; - } - - // Use this test file as input - const testFile = join(__dirname, 'codex-lens.test.js'); - if (!existsSync(testFile)) { - console.log('Skipping: Test file not found'); - return; - } - - const result = await codexLensModule.executeCodexLens([ - 'inspect', testFile, '--json' - ]); - - assert.ok(typeof result === 'object'); - - if (result.success) { - console.log('Inspect result received'); - } - }); -}); - -describe('CodexLens Workspace Detection', async () => { - let codexLensModule; - let isReady = false; - - before(async () => { - try { - codexLensModule = await import(codexLensPath); - const status = await codexLensModule.checkVenvStatus(); - isReady = status.ready; - } catch (err) { - console.log('Setup failed:', err.message); - } - }); - - it('should detect existing workspace', async () => { - if (!isReady) { - console.log('Skipping: CodexLens not ready'); - return; - } - - // Try to get status from project root where .codexlens might exist - const projectRoot = join(__dirname, '..', '..'); - - const result = await codexLensModule.codexLensTool.execute({ - action: 'status', - path: projectRoot - }); - - assert.ok(typeof result === 'object'); - console.log('Project root status:', result.success ? 'Found' : 'Not found'); - }); - - it('should use global database when workspace not found', async () => { - if (!isReady) { - console.log('Skipping: CodexLens not ready'); - return; - } - - // Use a path that definitely won't have .codexlens - const tempPath = tmpdir(); - - const result = await codexLensModule.codexLensTool.execute({ - action: 'status', - path: tempPath - }); - - assert.ok(typeof result === 'object'); - // Should fall back to global database - }); -}); diff --git a/ccw/tests/codex-lens.test.js b/ccw/tests/codex-lens.test.js deleted file mode 100644 index c2d46ca0..00000000 --- a/ccw/tests/codex-lens.test.js +++ /dev/null @@ -1,521 +0,0 @@ -/** - * Tests for CodexLens API endpoints and tool integration - * - * Tests the following endpoints: - * - GET /api/codexlens/status - * - POST /api/codexlens/bootstrap - * - POST /api/codexlens/init - * - GET /api/codexlens/semantic/status - * - POST /api/codexlens/semantic/install - * - * Also tests the codex-lens.js tool functions directly - */ - -import { describe, it, before, after, mock } from 'node:test'; -import assert from 'node:assert'; -import { createServer } from 'http'; -import { join, dirname } from 'path'; -import { fileURLToPath } from 'url'; -import { existsSync, mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'fs'; -import { homedir, tmpdir } from 'os'; - -const __filename = fileURLToPath(import.meta.url); -const __dirname = dirname(__filename); - -// Import the codex-lens module - use file:// URL format for Windows compatibility -const codexLensPath = new URL('../dist/tools/codex-lens.js', import.meta.url).href; - -describe('CodexLens Tool Functions', async () => { - let codexLensModule; - - before(async () => { - try { - codexLensModule = await import(codexLensPath); - } catch (err) { - console.log('Note: codex-lens module import skipped (module may not be available):', err.message); - } - }); - - describe('checkVenvStatus', () => { - it('should return an object with ready property', async () => { - if (!codexLensModule) { - console.log('Skipping: codex-lens module not available'); - return; - } - - const status = await codexLensModule.checkVenvStatus(); - assert.ok(typeof status === 'object', 'Status should be an object'); - assert.ok('ready' in status, 'Status should have ready property'); - assert.ok(typeof status.ready === 'boolean', 'ready should be boolean'); - - if (status.ready) { - assert.ok('version' in status, 'Ready status should include version'); - } else { - assert.ok('error' in status, 'Not ready status should include error'); - } - }); - }); - - describe('checkSemanticStatus', () => { - it('should return semantic availability status', async () => { - if (!codexLensModule) { - console.log('Skipping: codex-lens module not available'); - return; - } - - const status = await codexLensModule.checkSemanticStatus(); - assert.ok(typeof status === 'object', 'Status should be an object'); - assert.ok('available' in status, 'Status should have available property'); - assert.ok(typeof status.available === 'boolean', 'available should be boolean'); - - if (status.available) { - assert.ok('backend' in status, 'Available status should include backend'); - } - }); - }); - - describe('executeCodexLens', () => { - it('should execute codexlens command and return result', async () => { - if (!codexLensModule) { - console.log('Skipping: codex-lens module not available'); - return; - } - - // First check if CodexLens is ready - const status = await codexLensModule.checkVenvStatus(); - if (!status.ready) { - console.log('Skipping: CodexLens not installed'); - return; - } - - // Execute a simple status command - const result = await codexLensModule.executeCodexLens(['--help']); - assert.ok(typeof result === 'object', 'Result should be an object'); - assert.ok('success' in result, 'Result should have success property'); - - // --help should succeed - if (result.success) { - assert.ok('output' in result, 'Success result should have output'); - assert.ok(result.output.includes('CodexLens') || result.output.includes('codexlens'), - 'Help output should mention CodexLens'); - } - }); - - it('should handle timeout gracefully', async () => { - if (!codexLensModule) { - console.log('Skipping: codex-lens module not available'); - return; - } - - const status = await codexLensModule.checkVenvStatus(); - if (!status.ready) { - console.log('Skipping: CodexLens not installed'); - return; - } - - // Use a very short timeout to trigger timeout behavior - // Note: This test may not always trigger timeout depending on system speed - const result = await codexLensModule.executeCodexLens(['status', '--json'], { timeout: 1 }); - assert.ok(typeof result === 'object', 'Result should be an object'); - assert.ok('success' in result, 'Result should have success property'); - }); - }); - - describe('codexLensTool.execute', () => { - it('should handle check action', async () => { - if (!codexLensModule) { - console.log('Skipping: codex-lens module not available'); - return; - } - - const result = await codexLensModule.codexLensTool.execute({ action: 'check' }); - assert.ok(typeof result === 'object', 'Result should be an object'); - assert.ok('ready' in result, 'Check result should have ready property'); - }); - - it('should return error for unknown action', async () => { - if (!codexLensModule) { - console.log('Skipping: codex-lens module not available'); - return; - } - - const result = await codexLensModule.codexLensTool.execute({ action: 'unknown_action' }); - assert.strictEqual(result.success, false, 'Should return success: false'); - assert.ok(result.error, 'Should have error message'); - }); - - it('should handle status action', async () => { - if (!codexLensModule) { - console.log('Skipping: codex-lens module not available'); - return; - } - - const checkResult = await codexLensModule.checkVenvStatus(); - if (!checkResult.ready) { - console.log('Skipping: CodexLens not installed'); - return; - } - - const result = await codexLensModule.codexLensTool.execute({ - action: 'status', - path: __dirname - }); - assert.ok(typeof result === 'object', 'Result should be an object'); - assert.ok('success' in result, 'Result should have success property'); - }); - }); -}); - -describe('CodexLens API Endpoints (Integration)', async () => { - // These tests require a running server - // They test the actual HTTP endpoints - - const TEST_PORT = 19999; - let serverModule; - let server; - let baseUrl; - - before(async () => { - // Note: We cannot easily start the ccw server in tests - // So we test the endpoint handlers directly or mock the server - baseUrl = `http://localhost:${TEST_PORT}`; - - // Try to import server module for handler testing - try { - // serverModule = await import(join(__dirname, '..', 'src', 'core', 'server.js')); - console.log('Note: Server integration tests require manual server start'); - } catch (err) { - console.log('Server module not available for direct testing'); - } - }); - - describe('GET /api/codexlens/status', () => { - it('should return JSON response with ready status', async () => { - // This test requires a running server - // Skip if server is not running - try { - const response = await fetch(`${baseUrl}/api/codexlens/status`); - - if (response.ok) { - const data = await response.json(); - assert.ok(typeof data === 'object', 'Response should be JSON object'); - assert.ok('ready' in data, 'Response should have ready property'); - } - } catch (err) { - if (err.cause?.code === 'ECONNREFUSED') { - console.log('Skipping: Server not running on port', TEST_PORT); - } else { - throw err; - } - } - }); - }); - - describe('POST /api/codexlens/init', () => { - it('should initialize index for given path', async () => { - try { - const response = await fetch(`${baseUrl}/api/codexlens/init`, { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ path: __dirname }) - }); - - if (response.ok) { - const data = await response.json(); - assert.ok(typeof data === 'object', 'Response should be JSON object'); - assert.ok('success' in data, 'Response should have success property'); - } - } catch (err) { - if (err.cause?.code === 'ECONNREFUSED') { - console.log('Skipping: Server not running on port', TEST_PORT); - } else { - throw err; - } - } - }); - }); - - describe('GET /api/codexlens/semantic/status', () => { - it('should return semantic search status', async () => { - try { - const response = await fetch(`${baseUrl}/api/codexlens/semantic/status`); - - if (response.ok) { - const data = await response.json(); - assert.ok(typeof data === 'object', 'Response should be JSON object'); - assert.ok('available' in data, 'Response should have available property'); - } - } catch (err) { - if (err.cause?.code === 'ECONNREFUSED') { - console.log('Skipping: Server not running on port', TEST_PORT); - } else { - throw err; - } - } - }); - }); -}); - -describe('CodexLens Tool Definition', async () => { - let codexLensModule; - - before(async () => { - try { - codexLensModule = await import(codexLensPath); - } catch (err) { - console.log('Note: codex-lens module not available'); - } - }); - - it('should have correct tool name', () => { - if (!codexLensModule) { - console.log('Skipping: codex-lens module not available'); - return; - } - - assert.strictEqual(codexLensModule.codexLensTool.name, 'codex_lens'); - }); - - it('should have description', () => { - if (!codexLensModule) { - console.log('Skipping: codex-lens module not available'); - return; - } - - assert.ok(codexLensModule.codexLensTool.description, 'Should have description'); - assert.ok(codexLensModule.codexLensTool.description.includes('CodexLens'), - 'Description should mention CodexLens'); - }); - - it('should have parameters schema', () => { - if (!codexLensModule) { - console.log('Skipping: codex-lens module not available'); - return; - } - - const { parameters } = codexLensModule.codexLensTool; - assert.ok(parameters, 'Should have parameters'); - assert.strictEqual(parameters.type, 'object'); - assert.ok(parameters.properties, 'Should have properties'); - assert.ok(parameters.properties.action, 'Should have action property'); - assert.deepStrictEqual(parameters.required, ['action'], 'action should be required'); - }); - - it('should support all documented actions', () => { - if (!codexLensModule) { - console.log('Skipping: codex-lens module not available'); - return; - } - - const { parameters } = codexLensModule.codexLensTool; - const supportedActions = parameters.properties.action.enum; - - const expectedActions = ['init', 'search', 'symbol', 'status', 'update', 'bootstrap', 'check']; - - for (const action of expectedActions) { - assert.ok(supportedActions.includes(action), `Should support ${action} action`); - } - }); - - it('should have execute function', () => { - if (!codexLensModule) { - console.log('Skipping: codex-lens module not available'); - return; - } - - assert.ok(typeof codexLensModule.codexLensTool.execute === 'function', - 'Should have execute function'); - }); -}); - -describe('CodexLens Path Configuration', () => { - it('should use correct venv path based on platform', async () => { - const codexLensDataDir = join(homedir(), '.codexlens'); - const codexLensVenv = join(codexLensDataDir, 'venv'); - - const expectedPython = process.platform === 'win32' - ? join(codexLensVenv, 'Scripts', 'python.exe') - : join(codexLensVenv, 'bin', 'python'); - - // Just verify the path construction logic is correct - assert.ok(expectedPython.includes('codexlens'), 'Python path should include codexlens'); - assert.ok(expectedPython.includes('venv'), 'Python path should include venv'); - - if (process.platform === 'win32') { - assert.ok(expectedPython.includes('Scripts'), 'Windows should use Scripts directory'); - assert.ok(expectedPython.endsWith('.exe'), 'Windows should have .exe extension'); - } else { - assert.ok(expectedPython.includes('bin'), 'Unix should use bin directory'); - } - }); -}); - -describe('CodexLens Error Handling', async () => { - let codexLensModule; - const testTempDirs = []; // Track temp directories for cleanup - - after(() => { - // Clean up temp directories created during tests - for (const dir of testTempDirs) { - try { - rmSync(dir, { recursive: true, force: true }); - } catch (e) { - // Ignore cleanup errors - } - } - - // Clean up any indexes created for temp directories - const indexDir = join(homedir(), '.codexlens', 'indexes'); - const tempIndexPattern = join(indexDir, 'C', 'Users', '*', 'AppData', 'Local', 'Temp', 'ccw-codexlens-update-*'); - try { - const glob = require('glob'); - const matches = glob.sync(tempIndexPattern.replace(/\\/g, '/')); - for (const match of matches) { - rmSync(match, { recursive: true, force: true }); - } - } catch (e) { - // glob may not be available, try direct cleanup - try { - const tempPath = join(indexDir, 'C', 'Users'); - if (existsSync(tempPath)) { - console.log('Note: Temp indexes may need manual cleanup at:', indexDir); - } - } catch (e2) { - // Ignore - } - } - }); - - before(async () => { - try { - codexLensModule = await import(codexLensPath); - } catch (err) { - console.log('Note: codex-lens module not available'); - } - }); - - it('should handle missing file parameter for symbol action', async () => { - if (!codexLensModule) { - console.log('Skipping: codex-lens module not available'); - return; - } - - const checkResult = await codexLensModule.checkVenvStatus(); - if (!checkResult.ready) { - console.log('Skipping: CodexLens not installed'); - return; - } - - const result = await codexLensModule.codexLensTool.execute({ - action: 'symbol' - // file is missing - }); - - // Should either error or return success: false - assert.ok(typeof result === 'object', 'Result should be an object'); - }); - - it('should support update action without files parameter', async () => { - if (!codexLensModule) { - console.log('Skipping: codex-lens module not available'); - return; - } - - const checkResult = await codexLensModule.checkVenvStatus(); - if (!checkResult.ready) { - console.log('Skipping: CodexLens not installed'); - return; - } - - const updateRoot = mkdtempSync(join(tmpdir(), 'ccw-codexlens-update-')); - testTempDirs.push(updateRoot); // Track for cleanup - writeFileSync(join(updateRoot, 'main.py'), 'def hello():\n return 1\n', 'utf8'); - - const result = await codexLensModule.codexLensTool.execute({ - action: 'update', - path: updateRoot, - }); - - assert.ok(typeof result === 'object', 'Result should be an object'); - assert.ok('success' in result, 'Result should have success property'); - }); - - it('should ignore extraneous files parameter for update action', async () => { - if (!codexLensModule) { - console.log('Skipping: codex-lens module not available'); - return; - } - - const checkResult = await codexLensModule.checkVenvStatus(); - if (!checkResult.ready) { - console.log('Skipping: CodexLens not installed'); - return; - } - - const updateRoot = mkdtempSync(join(tmpdir(), 'ccw-codexlens-update-')); - testTempDirs.push(updateRoot); // Track for cleanup - writeFileSync(join(updateRoot, 'main.py'), 'def hello():\n return 1\n', 'utf8'); - - const result = await codexLensModule.codexLensTool.execute({ - action: 'update', - path: updateRoot, - files: [] - }); - - assert.ok(typeof result === 'object', 'Result should be an object'); - assert.ok('success' in result, 'Result should have success property'); - }); -}); - -describe('CodexLens Search Parameters', async () => { - let codexLensModule; - - before(async () => { - try { - codexLensModule = await import(codexLensPath); - } catch (err) { - console.log('Note: codex-lens module not available'); - } - }); - - it('should support text and semantic search modes', () => { - if (!codexLensModule) { - console.log('Skipping: codex-lens module not available'); - return; - } - - const { parameters } = codexLensModule.codexLensTool; - const modeEnum = parameters.properties.mode?.enum; - - assert.ok(modeEnum, 'Should have mode enum'); - assert.ok(modeEnum.includes('text'), 'Should support text mode'); - assert.ok(modeEnum.includes('semantic'), 'Should support semantic mode'); - }); - - it('should have limit parameter with default', () => { - if (!codexLensModule) { - console.log('Skipping: codex-lens module not available'); - return; - } - - const { parameters } = codexLensModule.codexLensTool; - const limitProp = parameters.properties.limit; - - assert.ok(limitProp, 'Should have limit property'); - assert.strictEqual(limitProp.type, 'number', 'limit should be number'); - assert.strictEqual(limitProp.default, 20, 'Default limit should be 20'); - }); - - it('should support output format options', () => { - if (!codexLensModule) { - console.log('Skipping: codex-lens module not available'); - return; - } - - const { parameters } = codexLensModule.codexLensTool; - const formatEnum = parameters.properties.format?.enum; - - assert.ok(formatEnum, 'Should have format enum'); - assert.ok(formatEnum.includes('json'), 'Should support json format'); - }); -}); diff --git a/ccw/tests/e2e/mcp-tools.e2e.test.ts b/ccw/tests/e2e/mcp-tools.e2e.test.ts index 30011972..fecb9d2f 100644 --- a/ccw/tests/e2e/mcp-tools.e2e.test.ts +++ b/ccw/tests/e2e/mcp-tools.e2e.test.ts @@ -161,54 +161,16 @@ describe('E2E: MCP Tool Execution', async () => { // Verify essential tools are present const toolNames = response.result.tools.map((t: any) => t.name); - assert.ok(toolNames.includes('smart_search')); assert.ok(toolNames.includes('edit_file')); assert.ok(toolNames.includes('write_file')); assert.ok(toolNames.includes('session_manager')); // Verify tool schema structure - const smartSearch = response.result.tools.find((t: any) => t.name === 'smart_search'); - assert.ok(smartSearch.description); - assert.ok(smartSearch.inputSchema); - assert.equal(smartSearch.inputSchema.type, 'object'); - assert.ok(smartSearch.inputSchema.properties); - }); - - it('executes smart_search tool with valid parameters', async () => { - const response = await mcpClient.call('tools/call', { - name: 'smart_search', - arguments: { - action: 'status', - path: process.cwd() - } - }); - - assert.equal(response.jsonrpc, '2.0'); - assert.ok(response.result); - assert.ok(Array.isArray(response.result.content)); - assert.equal(response.result.content[0].type, 'text'); - assert.ok(response.result.content[0].text.length > 0); - }); - - it('validates required parameters and returns error for missing params', async () => { - const response = await mcpClient.call('tools/call', { - name: 'smart_search', - arguments: { - action: 'search' - // Missing required 'query' parameter - } - }); - - assert.equal(response.jsonrpc, '2.0'); - assert.ok(response.result); - assert.equal(response.result.isError, true); - // Error message should mention query is required - assert.ok( - response.result.content[0].text.includes('Query is required') || - response.result.content[0].text.includes('query') || - response.result.content[0].text.includes('required'), - `Expected error about missing query, got: ${response.result.content[0].text}` - ); + const editFile = response.result.tools.find((t: any) => t.name === 'edit_file'); + assert.ok(editFile.description); + assert.ok(editFile.inputSchema); + assert.equal(editFile.inputSchema.type, 'object'); + assert.ok(editFile.inputSchema.properties); }); it('returns error for non-existent tool', async () => { @@ -374,10 +336,6 @@ describe('E2E: MCP Tool Execution', async () => { it('handles concurrent tool calls without interference', async () => { const calls = await Promise.all([ mcpClient.call('tools/list', {}), - mcpClient.call('tools/call', { - name: 'smart_search', - arguments: { action: 'status', path: process.cwd() } - }), mcpClient.call('tools/call', { name: 'session_manager', arguments: { operation: 'list', location: 'active' } @@ -392,8 +350,7 @@ describe('E2E: MCP Tool Execution', async () => { // Verify different results assert.ok(Array.isArray(calls[0].result.tools)); // tools/list - assert.ok(calls[1].result.content); // smart_search - assert.ok(calls[2].result.content); // session_manager + assert.ok(calls[1].result.content); // session_manager }); it('validates path parameters for security (path traversal prevention)', async () => { @@ -415,24 +372,6 @@ describe('E2E: MCP Tool Execution', async () => { assert.ok(hasError); }); - it('supports progress reporting for long-running operations', async () => { - // smart_search init action supports progress reporting - const response = await mcpClient.call('tools/call', { - name: 'smart_search', - arguments: { - action: 'status', - path: process.cwd() - } - }); - - assert.equal(response.jsonrpc, '2.0'); - assert.ok(response.result); - assert.ok(response.result.content); - - // For status action, should return immediately - // Progress is logged to stderr but doesn't affect result structure - }); - it('handles tool execution timeout gracefully', async () => { // Create a tool call that should complete quickly // If it times out, the client will throw @@ -495,14 +434,10 @@ describe('E2E: MCP Tool Execution', async () => { it('preserves parameter types in tool execution', async () => { const response = await mcpClient.call('tools/call', { - name: 'smart_search', + name: 'session_manager', arguments: { - action: 'find_files', - pattern: '*.json', - path: process.cwd(), - limit: 10, // Number - offset: 0, // Number - caseSensitive: true // Boolean + operation: 'list', + location: 'active' } }); diff --git a/ccw/tests/litellm-client.test.ts b/ccw/tests/litellm-client.test.ts deleted file mode 100644 index 5400ebd3..00000000 --- a/ccw/tests/litellm-client.test.ts +++ /dev/null @@ -1,403 +0,0 @@ -/** - * Unit tests for LiteLLM client bridge (ccw/dist/tools/litellm-client.js). - * - * Notes: - * - Uses Node's built-in test runner (node:test) (no Jest in this repo). - * - Stubs `child_process.spawn` to avoid depending on local Python/ccw_litellm installation. - */ - -import { after, beforeEach, describe, it } from 'node:test'; -import assert from 'node:assert/strict'; -import { EventEmitter } from 'node:events'; -import { createRequire } from 'node:module'; - -const require = createRequire(import.meta.url); -// eslint-disable-next-line @typescript-eslint/no-var-requires -const childProcess = require('child_process') as typeof import('child_process'); - -type SpawnBehavior = - | { type: 'close'; code?: number; stdout?: string; stderr?: string } - | { type: 'error'; error: Error } - | { type: 'hang' }; - -class FakeChildProcess extends EventEmitter { - stdout = new EventEmitter(); - stderr = new EventEmitter(); - killCalls: string[] = []; - - kill(signal?: NodeJS.Signals | number | string): boolean { - this.killCalls.push(signal === undefined ? 'undefined' : String(signal)); - return true; - } -} - -type SpawnCall = { - command: string; - args: string[]; - // eslint-disable-next-line @typescript-eslint/no-explicit-any - options: any; - proc: FakeChildProcess; -}; - -const spawnCalls: SpawnCall[] = []; -const spawnPlan: SpawnBehavior[] = []; - -const originalSpawn = childProcess.spawn; - -childProcess.spawn = ((command: string, args: string[] = [], options: any = {}) => { - const normalizedArgs = (args ?? []).map(String); - const shouldIntercept = normalizedArgs[0] === '-m' && normalizedArgs[1] === 'ccw_litellm.cli'; - if (!shouldIntercept) { - return originalSpawn(command as any, args as any, options as any); - } - - const proc = new FakeChildProcess(); - spawnCalls.push({ command: String(command), args: normalizedArgs, options, proc }); - - const next = spawnPlan.shift() ?? { type: 'close', code: 0, stdout: '' }; - - queueMicrotask(() => { - if (next.type === 'error') { - proc.emit('error', next.error); - return; - } - - if (next.type === 'close') { - if (next.stdout !== undefined) proc.stdout.emit('data', next.stdout); - if (next.stderr !== undefined) proc.stderr.emit('data', next.stderr); - proc.emit('close', next.code ?? 0); - return; - } - - // hang: intentionally do nothing - }); - - return proc as any; -}) as any; - -function getClientModuleUrl(): URL { - const url = new URL('../dist/tools/litellm-client.js', import.meta.url); - url.searchParams.set('t', `${Date.now()}-${Math.random()}`); - return url; -} - -// eslint-disable-next-line @typescript-eslint/no-explicit-any -let mod: any; - -beforeEach(async () => { - spawnCalls.length = 0; - spawnPlan.length = 0; - mod = await import(getClientModuleUrl().href); -}); - -after(() => { - childProcess.spawn = originalSpawn; -}); - -describe('LiteLLM client bridge', () => { - it('uses default pythonPath and version check arguments', async () => { - spawnPlan.push({ type: 'close', code: 0, stdout: '1.2.3\n' }); - - const client = new mod.LiteLLMClient(); - const available = await client.isAvailable(); - - assert.equal(available, true); - assert.equal(spawnCalls.length, 1); - assert.equal(spawnCalls[0].command, mod.getCodexLensVenvPython()); - assert.deepEqual(spawnCalls[0].args, ['-m', 'ccw_litellm.cli', 'version']); - }); - - it('uses custom pythonPath when provided', async () => { - spawnPlan.push({ type: 'close', code: 0, stdout: 'ok' }); - - const client = new mod.LiteLLMClient({ pythonPath: 'python3', timeout: 10 }); - await client.chat('hello', 'default'); - - assert.equal(spawnCalls.length, 1); - assert.equal(spawnCalls[0].command, 'python3'); - }); - - it('spawns LiteLLM Python with hidden window options', async () => { - spawnPlan.push({ type: 'close', code: 0, stdout: '1.2.3\n' }); - - const client = new mod.LiteLLMClient({ timeout: 10 }); - const available = await client.isAvailable(); - - assert.equal(available, true); - assert.equal(spawnCalls.length, 1); - assert.equal(spawnCalls[0].options.shell, false); - assert.equal(spawnCalls[0].options.windowsHide, true); - assert.equal(spawnCalls[0].options.env.PYTHONIOENCODING, 'utf-8'); - }); - - it('isAvailable returns false on spawn error', async () => { - spawnPlan.push({ type: 'error', error: new Error('ENOENT') }); - - const client = new mod.LiteLLMClient({ timeout: 10 }); - const available = await client.isAvailable(); - - assert.equal(available, false); - }); - - it('getStatus returns version on success', async () => { - spawnPlan.push({ type: 'close', code: 0, stdout: 'v9.9.9\n' }); - - const client = new mod.LiteLLMClient({ timeout: 10 }); - const status = await client.getStatus(); - - assert.equal(status.available, true); - assert.equal(status.version, 'v9.9.9'); - }); - - it('getStatus returns error details on non-zero exit', async () => { - spawnPlan.push({ type: 'close', code: 1, stderr: 'HTTP 500 Internal Server Error' }); - - const client = new mod.LiteLLMClient({ timeout: 10 }); - const status = await client.getStatus(); - - assert.equal(status.available, false); - assert.ok(String(status.error).includes('HTTP 500')); - }); - - it('getConfig parses JSON output', async () => { - spawnPlan.push({ type: 'close', code: 0, stdout: JSON.stringify({ ok: true }) }); - - const client = new mod.LiteLLMClient({ timeout: 10 }); - const cfg = await client.getConfig(); - - assert.deepEqual(cfg, { ok: true }); - assert.equal(spawnCalls.length, 1); - assert.deepEqual(spawnCalls[0].args, ['-m', 'ccw_litellm.cli', 'config']); - }); - - it('getConfig throws on malformed JSON', async () => { - spawnPlan.push({ type: 'close', code: 0, stdout: '{not-json' }); - - const client = new mod.LiteLLMClient({ timeout: 10 }); - await assert.rejects(() => client.getConfig()); - }); - - it('embed rejects empty texts input and does not spawn', async () => { - const client = new mod.LiteLLMClient({ timeout: 10 }); - await assert.rejects(() => client.embed([]), /texts array cannot be empty/); - assert.equal(spawnCalls.length, 0); - }); - - it('embed rejects null/undefined input', async () => { - const client = new mod.LiteLLMClient({ timeout: 10 }); - await assert.rejects(() => client.embed(null as any), /texts array cannot be empty/); - await assert.rejects(() => client.embed(undefined as any), /texts array cannot be empty/); - assert.equal(spawnCalls.length, 0); - }); - - it('embed returns vectors with derived dimensions', async () => { - spawnPlan.push({ type: 'close', code: 0, stdout: JSON.stringify([[1, 2, 3], [4, 5, 6]]) }); - - const client = new mod.LiteLLMClient({ timeout: 10 }); - const res = await client.embed(['a', 'b'], 'embed-model'); - - assert.equal(res.model, 'embed-model'); - assert.equal(res.dimensions, 3); - assert.deepEqual(res.vectors, [ - [1, 2, 3], - [4, 5, 6], - ]); - - assert.equal(spawnCalls.length, 1); - assert.deepEqual(spawnCalls[0].args, [ - '-m', - 'ccw_litellm.cli', - 'embed', - '--model', - 'embed-model', - '--output', - 'json', - 'a', - 'b', - ]); - }); - - it('embed throws on malformed JSON output', async () => { - spawnPlan.push({ type: 'close', code: 0, stdout: 'not-json' }); - - const client = new mod.LiteLLMClient({ timeout: 10 }); - await assert.rejects(() => client.embed(['a'], 'embed-model')); - }); - - it('chat rejects empty message and does not spawn', async () => { - const client = new mod.LiteLLMClient({ timeout: 10 }); - await assert.rejects(() => client.chat(''), /message cannot be empty/); - assert.equal(spawnCalls.length, 0); - }); - - it('chat returns trimmed stdout on success', async () => { - spawnPlan.push({ type: 'close', code: 0, stdout: 'Hello\n' }); - - const client = new mod.LiteLLMClient({ timeout: 10 }); - const out = await client.chat('hi', 'chat-model'); - - assert.equal(out, 'Hello'); - assert.equal(spawnCalls.length, 1); - assert.deepEqual(spawnCalls[0].args, ['-m', 'ccw_litellm.cli', 'chat', '--model', 'chat-model', 'hi']); - }); - - it('chat propagates auth errors (401)', async () => { - spawnPlan.push({ type: 'close', code: 1, stderr: 'HTTP 401 Unauthorized' }); - - const client = new mod.LiteLLMClient({ timeout: 10 }); - await assert.rejects(() => client.chat('hi', 'chat-model'), /401/); - }); - - it('chat propagates auth errors (403)', async () => { - spawnPlan.push({ type: 'close', code: 1, stderr: 'HTTP 403 Forbidden' }); - - const client = new mod.LiteLLMClient({ timeout: 10 }); - await assert.rejects(() => client.chat('hi', 'chat-model'), /403/); - }); - - it('chat propagates rate limit errors (429)', async () => { - spawnPlan.push({ type: 'close', code: 1, stderr: 'HTTP 429 Too Many Requests' }); - - const client = new mod.LiteLLMClient({ timeout: 10 }); - await assert.rejects(() => client.chat('hi', 'chat-model'), /429/); - }); - - it('chat propagates server errors (500)', async () => { - spawnPlan.push({ type: 'close', code: 1, stderr: 'HTTP 500 Internal Server Error' }); - - const client = new mod.LiteLLMClient({ timeout: 10 }); - await assert.rejects(() => client.chat('hi', 'chat-model'), /500/); - }); - - it('chat propagates server errors (503)', async () => { - spawnPlan.push({ type: 'close', code: 1, stderr: 'HTTP 503 Service Unavailable' }); - - const client = new mod.LiteLLMClient({ timeout: 10 }); - await assert.rejects(() => client.chat('hi', 'chat-model'), /503/); - }); - - it('chat falls back to exit code when stderr is empty', async () => { - spawnPlan.push({ type: 'close', code: 2, stdout: '' }); - - const client = new mod.LiteLLMClient({ timeout: 10 }); - await assert.rejects(() => client.chat('hi', 'chat-model'), /Process exited with code 2/); - }); - - it('chat surfaces spawn failures with descriptive message', async () => { - spawnPlan.push({ type: 'error', error: new Error('spawn ENOENT') }); - - const client = new mod.LiteLLMClient({ timeout: 10 }); - await assert.rejects(() => client.chat('hi', 'chat-model'), /Failed to spawn Python process: spawn ENOENT/); - }); - - it('chat enforces timeout and terminates process', async () => { - const originalSetTimeout = global.setTimeout; - let observedDelay: number | null = null; - - (global as any).setTimeout = ((fn: any, delay: number, ...args: any[]) => { - observedDelay = delay; - return originalSetTimeout(fn, 0, ...args); - }) as any; - - try { - spawnPlan.push({ type: 'hang' }); - - const client = new mod.LiteLLMClient({ timeout: 11 }); - await assert.rejects(() => client.chat('hi', 'chat-model'), /Command timed out after 22ms/); - - assert.equal(observedDelay, 22); - assert.equal(spawnCalls.length, 1); - assert.ok(spawnCalls[0].proc.killCalls.includes('SIGTERM')); - } finally { - (global as any).setTimeout = originalSetTimeout; - } - }); - - it('chatMessages rejects empty inputs', async () => { - const client = new mod.LiteLLMClient({ timeout: 10 }); - await assert.rejects(() => client.chatMessages([]), /messages array cannot be empty/); - await assert.rejects(() => client.chatMessages(null as any), /messages array cannot be empty/); - assert.equal(spawnCalls.length, 0); - }); - - it('chatMessages uses the last message content', async () => { - spawnPlan.push({ type: 'close', code: 0, stdout: 'OK' }); - - const client = new mod.LiteLLMClient({ timeout: 10 }); - const res = await client.chatMessages( - [ - { role: 'user', content: 'first' }, - { role: 'user', content: 'last' }, - ], - 'chat-model', - ); - - assert.equal(res.content, 'OK'); - assert.equal(res.model, 'chat-model'); - assert.equal(spawnCalls.length, 1); - assert.equal(spawnCalls[0].args.at(-1), 'last'); - }); - - it('getLiteLLMClient returns a singleton instance', () => { - const c1 = mod.getLiteLLMClient(); - const c2 = mod.getLiteLLMClient(); - assert.equal(c1, c2); - }); - - it('checkLiteLLMAvailable returns false when version check fails', async () => { - spawnPlan.push({ type: 'close', code: 1, stderr: 'ccw_litellm not installed' }); - - const available = await mod.checkLiteLLMAvailable(); - assert.equal(available, false); - }); - - it('getLiteLLMStatus includes error message when unavailable', async () => { - spawnPlan.push({ type: 'close', code: 1, stderr: 'ccw_litellm not installed' }); - - const status = await mod.getLiteLLMStatus(); - assert.equal(status.available, false); - assert.ok(String(status.error).includes('ccw_litellm not installed')); - }); -}); - -describe('getCodexLensVenvPython (Issue #68 fix)', () => { - it('should be exported from the module', async () => { - assert.ok(typeof mod.getCodexLensVenvPython === 'function'); - }); - - it('should return a string path', async () => { - const pythonPath = mod.getCodexLensVenvPython(); - assert.equal(typeof pythonPath, 'string'); - assert.ok(pythonPath.length > 0); - }); - - it('should return correct path structure for CodexLens venv', async () => { - const pythonPath = mod.getCodexLensVenvPython(); - - // On Windows: should contain Scripts/python.exe - // On Unix: should contain bin/python - const isWindows = process.platform === 'win32'; - - if (isWindows) { - // Either it's the venv path with Scripts, or fallback to 'python' - const isVenvPath = pythonPath.includes('Scripts') && pythonPath.includes('python'); - const isFallback = pythonPath === 'python'; - assert.ok(isVenvPath || isFallback, `Expected venv path or 'python' fallback, got: ${pythonPath}`); - } else { - // On Unix: either venv path with bin/python, or fallback - const isVenvPath = pythonPath.includes('bin') && pythonPath.includes('python'); - const isFallback = pythonPath === 'python'; - assert.ok(isVenvPath || isFallback, `Expected venv path or 'python' fallback, got: ${pythonPath}`); - } - }); - - it('should include .codexlens/venv in path when venv exists', async () => { - const pythonPath = mod.getCodexLensVenvPython(); - - // If not falling back to 'python', should contain .codexlens/venv - if (pythonPath !== 'python') { - assert.ok(pythonPath.includes('.codexlens'), `Expected .codexlens in path, got: ${pythonPath}`); - assert.ok(pythonPath.includes('venv'), `Expected venv in path, got: ${pythonPath}`); - } - }); -}); diff --git a/ccw/tests/mcp-server.test.js b/ccw/tests/mcp-server.test.js index 6d603b2a..86582170 100644 --- a/ccw/tests/mcp-server.test.js +++ b/ccw/tests/mcp-server.test.js @@ -97,7 +97,7 @@ describe('MCP Server', () => { const toolNames = response.result.tools.map(t => t.name); assert(toolNames.includes('edit_file')); assert(toolNames.includes('write_file')); - assert(toolNames.includes('smart_search')); + // smart_search removed - use codexlens MCP server instead }); it('should respond to tools/call request', async () => { diff --git a/ccw/tests/smart-search-enrich.test.js b/ccw/tests/smart-search-enrich.test.js deleted file mode 100644 index 2950c17d..00000000 --- a/ccw/tests/smart-search-enrich.test.js +++ /dev/null @@ -1,256 +0,0 @@ -/** - * Tests for smart_search with enrich parameter - * - * Tests the following: - * - enrich parameter is passed to codex-lens - * - relationship data is parsed from response - * - SemanticMatch interface with relationships field - */ - -import { describe, it, before, mock } from 'node:test'; -import assert from 'node:assert'; -import { dirname, join } from 'path'; -import { fileURLToPath } from 'url'; - -const __filename = fileURLToPath(import.meta.url); -const __dirname = dirname(__filename); - -// Import the smart-search module (exports schema, not smartSearchTool) -const smartSearchPath = new URL('../dist/tools/smart-search.js', import.meta.url).href; - -describe('Smart Search Enrich Parameter', async () => { - let smartSearchModule; - - before(async () => { - try { - smartSearchModule = await import(smartSearchPath); - } catch (err) { - console.log('Note: smart-search module import skipped:', err.message); - } - }); - - describe('Parameter Schema', () => { - it('should have enrich parameter in schema', async () => { - if (!smartSearchModule) { - console.log('Skipping: smart-search module not available'); - return; - } - - const { schema } = smartSearchModule; - assert.ok(schema, 'Should export schema'); - // Schema uses inputSchema (MCP standard), not parameters - const params = schema.inputSchema || schema.parameters; - assert.ok(params, 'Should have inputSchema or parameters'); - - const props = params.properties; - assert.ok(props.enrich, 'Should have enrich parameter'); - assert.strictEqual(props.enrich.type, 'boolean', 'enrich should be boolean'); - assert.strictEqual(props.enrich.default, false, 'enrich should default to false'); - }); - - it('should describe enrich parameter purpose', async () => { - if (!smartSearchModule) { - console.log('Skipping: smart-search module not available'); - return; - } - - const { schema } = smartSearchModule; - const params = schema.inputSchema || schema.parameters; - const enrichDesc = params.properties.enrich?.description || ''; - - // Description should mention relationships or graph - const mentionsRelationships = enrichDesc.toLowerCase().includes('relationship') || - enrichDesc.toLowerCase().includes('graph') || - enrichDesc.toLowerCase().includes('enrich'); - assert.ok(mentionsRelationships, 'enrich description should mention relationships/graph'); - }); - }); - - describe('SemanticMatch Interface', () => { - it('should handle results with relationships field', async () => { - if (!smartSearchModule) { - console.log('Skipping: smart-search module not available'); - return; - } - - // Create a mock result with relationships - const mockResult = { - file: 'test.py', - score: 0.95, - content: 'def main(): pass', - symbol: 'main', - relationships: [ - { - type: 'calls', - direction: 'outgoing', - target: 'helper', - file: 'test.py', - line: 5 - }, - { - type: 'called_by', - direction: 'incoming', - source: 'entrypoint', - file: 'app.py', - line: 10 - } - ] - }; - - // Verify structure - assert.ok(Array.isArray(mockResult.relationships), 'relationships should be array'); - assert.strictEqual(mockResult.relationships.length, 2, 'should have 2 relationships'); - - const outgoing = mockResult.relationships[0]; - assert.strictEqual(outgoing.type, 'calls'); - assert.strictEqual(outgoing.direction, 'outgoing'); - assert.ok(outgoing.target, 'outgoing should have target'); - - const incoming = mockResult.relationships[1]; - assert.strictEqual(incoming.type, 'called_by'); - assert.strictEqual(incoming.direction, 'incoming'); - assert.ok(incoming.source, 'incoming should have source'); - }); - }); - - describe('RelationshipInfo Structure', () => { - it('should validate relationship info structure', () => { - // Test the expected structure of RelationshipInfo - const validRelationship = { - type: 'calls', - direction: 'outgoing', - target: 'some_function', - file: 'module.py', - line: 42 - }; - - assert.ok(['calls', 'imports', 'extends', 'called_by', 'imported_by', 'extended_by'] - .includes(validRelationship.type), 'type should be valid relationship type'); - assert.ok(['outgoing', 'incoming'].includes(validRelationship.direction), - 'direction should be outgoing or incoming'); - assert.ok(typeof validRelationship.file === 'string', 'file should be string'); - }); - - it('should allow optional line number', () => { - const withLine = { - type: 'calls', - direction: 'outgoing', - target: 'func', - file: 'test.py', - line: 10 - }; - - const withoutLine = { - type: 'imports', - direction: 'outgoing', - target: 'os', - file: 'test.py' - // line is optional - }; - - assert.strictEqual(withLine.line, 10); - assert.strictEqual(withoutLine.line, undefined); - }); - }); -}); - -describe('Smart Search Tool Definition', async () => { - let smartSearchModule; - - before(async () => { - try { - smartSearchModule = await import(smartSearchPath); - } catch (err) { - console.log('Note: smart-search module not available'); - } - }); - - it('should have correct tool name', () => { - if (!smartSearchModule) { - console.log('Skipping: smart-search module not available'); - return; - } - - assert.strictEqual(smartSearchModule.schema.name, 'smart_search'); - }); - - it('should have all required parameters', () => { - if (!smartSearchModule) { - console.log('Skipping: smart-search module not available'); - return; - } - - const params = smartSearchModule.schema.inputSchema || smartSearchModule.schema.parameters; - const props = params.properties; - - // Core parameters - assert.ok(props.action, 'Should have action parameter'); - assert.ok(props.query, 'Should have query parameter'); - assert.ok(props.path, 'Should have path parameter'); - - // Search parameters - assert.ok(props.mode, 'Should have mode parameter'); - assert.ok(props.maxResults || props.limit, 'Should have maxResults/limit parameter'); - - // New enrich parameter - assert.ok(props.enrich, 'Should have enrich parameter'); - }); - - it('should support search modes', () => { - if (!smartSearchModule) { - console.log('Skipping: smart-search module not available'); - return; - } - - const params = smartSearchModule.schema.inputSchema || smartSearchModule.schema.parameters; - const modeEnum = params.properties.mode?.enum; - - assert.ok(modeEnum, 'Should have mode enum'); - assert.ok(modeEnum.includes('fuzzy'), 'Should support fuzzy mode'); - assert.ok(modeEnum.includes('semantic'), 'Should support semantic mode'); - }); -}); - -describe('Enrich Flag Integration', async () => { - let codexLensModule; - - before(async () => { - try { - const codexLensPath = new URL('../dist/tools/codex-lens.js', import.meta.url).href; - codexLensModule = await import(codexLensPath); - } catch (err) { - console.log('Note: codex-lens module not available'); - } - }); - - it('codex-lens should support enrich parameter', () => { - if (!codexLensModule) { - console.log('Skipping: codex-lens module not available'); - return; - } - - // Use schema export (primary) or codexLensTool (backward-compatible) - const toolDef = codexLensModule.schema || codexLensModule.codexLensTool; - assert.ok(toolDef, 'Should have schema or codexLensTool export'); - - // Schema uses inputSchema (MCP standard), codexLensTool uses parameters - const params = toolDef.inputSchema || toolDef.parameters; - const props = params.properties; - assert.ok(props.enrich, 'should have enrich parameter'); - assert.strictEqual(props.enrich.type, 'boolean', 'enrich should be boolean'); - }); - - it('should pass enrich flag to command line', async () => { - if (!codexLensModule) { - console.log('Skipping: codex-lens module not available'); - return; - } - - // Check if executeCodexLens function is exported - const { executeCodexLens } = codexLensModule; - if (executeCodexLens) { - // The function should be available for passing enrich parameter - assert.ok(typeof executeCodexLens === 'function', 'executeCodexLens should be a function'); - } - }); -}); diff --git a/ccw/tests/smart-search-intent.test.js b/ccw/tests/smart-search-intent.test.js deleted file mode 100644 index 0c51f6bf..00000000 --- a/ccw/tests/smart-search-intent.test.js +++ /dev/null @@ -1,141 +0,0 @@ -/** - * Tests for query intent detection + adaptive RRF weights (TypeScript/Python parity). - * - * References: - * - `ccw/src/tools/smart-search.ts` (detectQueryIntent, adjustWeightsByIntent, getRRFWeights) - * - `codex-lens/src/codexlens/search/hybrid_search.py` (weight intent concept + defaults) - */ - -import { describe, it, before } from 'node:test'; -import assert from 'node:assert'; - -const smartSearchPath = new URL('../dist/tools/smart-search.js', import.meta.url).href; - -describe('Smart Search - Query Intent + RRF Weights', async () => { - /** @type {any} */ - let smartSearchModule; - - before(async () => { - try { - smartSearchModule = await import(smartSearchPath); - } catch (err) { - // Keep tests non-blocking for environments that haven't built `ccw/dist` yet. - console.log('Note: smart-search module import skipped:', err.message); - } - }); - - describe('detectQueryIntent', () => { - it('classifies "def authenticate" as keyword', () => { - if (!smartSearchModule) return; - assert.strictEqual(smartSearchModule.detectQueryIntent('def authenticate'), 'keyword'); - }); - - it('classifies CamelCase identifiers as keyword', () => { - if (!smartSearchModule) return; - assert.strictEqual(smartSearchModule.detectQueryIntent('MyClass'), 'keyword'); - }); - - it('classifies snake_case identifiers as keyword', () => { - if (!smartSearchModule) return; - assert.strictEqual(smartSearchModule.detectQueryIntent('user_id'), 'keyword'); - }); - - it('classifies namespace separators "::" as keyword', () => { - if (!smartSearchModule) return; - assert.strictEqual(smartSearchModule.detectQueryIntent('UserService::authenticate'), 'keyword'); - }); - - it('classifies pointer arrows "->" as keyword', () => { - if (!smartSearchModule) return; - assert.strictEqual(smartSearchModule.detectQueryIntent('ptr->next'), 'keyword'); - }); - - it('classifies dotted member access as keyword', () => { - if (!smartSearchModule) return; - assert.strictEqual(smartSearchModule.detectQueryIntent('foo.bar'), 'keyword'); - }); - - it('classifies natural language questions as semantic', () => { - if (!smartSearchModule) return; - assert.strictEqual(smartSearchModule.detectQueryIntent('how to handle user login'), 'semantic'); - }); - - it('classifies interrogatives with question marks as semantic', () => { - if (!smartSearchModule) return; - assert.strictEqual(smartSearchModule.detectQueryIntent('what is authentication?'), 'semantic'); - }); - - it('classifies queries with both code + NL signals as mixed', () => { - if (!smartSearchModule) return; - assert.strictEqual(smartSearchModule.detectQueryIntent('why does FooBar crash?'), 'mixed'); - }); - - it('classifies long NL queries containing identifiers as mixed', () => { - if (!smartSearchModule) return; - assert.strictEqual(smartSearchModule.detectQueryIntent('how to use user_id in query'), 'mixed'); - }); - }); - - describe('classifyIntent lexical routing', () => { - it('routes config/backend queries to exact when index and embeddings are available', () => { - if (!smartSearchModule) return; - const classification = smartSearchModule.__testables.classifyIntent( - 'embedding backend fastembed local litellm api config', - true, - true, - ); - assert.strictEqual(classification.mode, 'exact'); - assert.match(classification.reasoning, /lexical priority/i); - }); - - it('routes generated artifact queries to exact when index and embeddings are available', () => { - if (!smartSearchModule) return; - const classification = smartSearchModule.__testables.classifyIntent('dist bundle output', true, true); - assert.strictEqual(classification.mode, 'exact'); - assert.match(classification.reasoning, /generated artifact/i); - }); - }); - - describe('adjustWeightsByIntent', () => { - it('maps keyword intent to exact-heavy weights', () => { - if (!smartSearchModule) return; - const weights = smartSearchModule.adjustWeightsByIntent('keyword', { exact: 0.3, fuzzy: 0.1, vector: 0.6 }); - assert.deepStrictEqual(weights, { exact: 0.5, fuzzy: 0.1, vector: 0.4 }); - }); - }); - - describe('getRRFWeights parity set', () => { - it('produces stable weights for 20 representative queries', () => { - if (!smartSearchModule) return; - - const base = { exact: 0.3, fuzzy: 0.1, vector: 0.6 }; - const expected = [ - ['def authenticate', { exact: 0.5, fuzzy: 0.1, vector: 0.4 }], - ['class UserService', { exact: 0.5, fuzzy: 0.1, vector: 0.4 }], - ['user_id', { exact: 0.5, fuzzy: 0.1, vector: 0.4 }], - ['MyClass', { exact: 0.5, fuzzy: 0.1, vector: 0.4 }], - ['Foo::Bar', { exact: 0.5, fuzzy: 0.1, vector: 0.4 }], - ['ptr->next', { exact: 0.5, fuzzy: 0.1, vector: 0.4 }], - ['foo.bar', { exact: 0.5, fuzzy: 0.1, vector: 0.4 }], - ['import os', { exact: 0.5, fuzzy: 0.1, vector: 0.4 }], - ['how to handle user login', { exact: 0.2, fuzzy: 0.1, vector: 0.7 }], - ['what is the best way to search?', { exact: 0.2, fuzzy: 0.1, vector: 0.7 }], - ['explain the authentication flow', { exact: 0.2, fuzzy: 0.1, vector: 0.7 }], - ['generate embeddings for this repo', { exact: 0.2, fuzzy: 0.1, vector: 0.7 }], - ['how does FooBar work', base], - ['user_id how to handle', base], - ['Find UserService::authenticate method', base], - ['where is foo.bar used', base], - ['parse_json function', { exact: 0.5, fuzzy: 0.1, vector: 0.4 }], - ['How to parse_json output?', base], - ['', base], - ['authentication', base], - ]; - - for (const [query, expectedWeights] of expected) { - const actual = smartSearchModule.getRRFWeights(query, base); - assert.deepStrictEqual(actual, expectedWeights, `unexpected weights for query: ${JSON.stringify(query)}`); - } - }); - }); -}); diff --git a/ccw/tests/smart-search-mcp-usage.test.js b/ccw/tests/smart-search-mcp-usage.test.js deleted file mode 100644 index 889af31d..00000000 --- a/ccw/tests/smart-search-mcp-usage.test.js +++ /dev/null @@ -1,703 +0,0 @@ -import { after, afterEach, before, describe, it } from 'node:test'; -import assert from 'node:assert/strict'; -import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs'; -import { tmpdir } from 'node:os'; -import { join } from 'node:path'; - -const smartSearchPath = new URL('../dist/tools/smart-search.js', import.meta.url).href; -const originalAutoInitMissing = process.env.CODEXLENS_AUTO_INIT_MISSING; -const originalAutoEmbedMissing = process.env.CODEXLENS_AUTO_EMBED_MISSING; - -describe('Smart Search MCP usage defaults and path handling', async () => { - let smartSearchModule; - const tempDirs = []; - - before(async () => { - process.env.CODEXLENS_AUTO_INIT_MISSING = 'false'; - try { - smartSearchModule = await import(smartSearchPath); - } catch (err) { - console.log('Note: smart-search module import skipped:', err?.message ?? String(err)); - } - }); - - after(() => { - if (originalAutoInitMissing === undefined) { - delete process.env.CODEXLENS_AUTO_INIT_MISSING; - } else { - process.env.CODEXLENS_AUTO_INIT_MISSING = originalAutoInitMissing; - } - - if (originalAutoEmbedMissing === undefined) { - delete process.env.CODEXLENS_AUTO_EMBED_MISSING; - return; - } - process.env.CODEXLENS_AUTO_EMBED_MISSING = originalAutoEmbedMissing; - }); - - afterEach(() => { - while (tempDirs.length > 0) { - rmSync(tempDirs.pop(), { recursive: true, force: true }); - } - if (smartSearchModule?.__testables) { - smartSearchModule.__testables.__resetRuntimeOverrides(); - smartSearchModule.__testables.__resetBackgroundJobs(); - } - process.env.CODEXLENS_AUTO_INIT_MISSING = 'false'; - delete process.env.CODEXLENS_AUTO_EMBED_MISSING; - }); - - function createWorkspace() { - const dir = mkdtempSync(join(tmpdir(), 'ccw-smart-search-')); - tempDirs.push(dir); - return dir; - } - - function createDetachedChild() { - return { - on() { - return this; - }, - unref() {}, - }; - } - - it('keeps schema defaults aligned with runtime docs', () => { - if (!smartSearchModule) return; - - const { schema } = smartSearchModule; - const props = schema.inputSchema.properties; - - assert.equal(props.maxResults.default, 5); - assert.equal(props.limit.default, 5); - assert.match(schema.description, /static FTS index/i); - assert.match(schema.description, /semantic\/vector embeddings/i); - assert.ok(props.action.enum.includes('embed')); - assert.match(props.embeddingBackend.description, /litellm\/api/i); - assert.match(props.apiMaxWorkers.description, /endpoint pool/i); - assert.match(schema.description, /apiMaxWorkers=8/i); - assert.match(props.path.description, /single file path/i); - assert.ok(props.output_mode.enum.includes('ace')); - assert.match(props.output_mode.description, /ACE-style/i); - assert.equal(props.output_mode.default, 'ace'); - }); - - it('defaults auto embedding warmup off on Windows unless explicitly enabled', () => { - if (!smartSearchModule) return; - - const { __testables } = smartSearchModule; - delete process.env.CODEXLENS_AUTO_EMBED_MISSING; - assert.equal(__testables.isAutoEmbedMissingEnabled(undefined), process.platform !== 'win32'); - assert.equal(__testables.isAutoEmbedMissingEnabled({}), process.platform !== 'win32'); - assert.equal( - __testables.isAutoEmbedMissingEnabled({ embedding_auto_embed_missing: true }), - process.platform === 'win32' ? false : true, - ); - assert.equal(__testables.isAutoEmbedMissingEnabled({ embedding_auto_embed_missing: false }), false); - process.env.CODEXLENS_AUTO_EMBED_MISSING = 'true'; - assert.equal(__testables.isAutoEmbedMissingEnabled({ embedding_auto_embed_missing: false }), true); - process.env.CODEXLENS_AUTO_EMBED_MISSING = 'off'; - assert.equal(__testables.isAutoEmbedMissingEnabled({ embedding_auto_embed_missing: true }), false); - }); - - it('defaults auto index warmup off on Windows unless explicitly enabled', () => { - if (!smartSearchModule) return; - - const { __testables } = smartSearchModule; - delete process.env.CODEXLENS_AUTO_INIT_MISSING; - assert.equal(__testables.isAutoInitMissingEnabled(), process.platform !== 'win32'); - process.env.CODEXLENS_AUTO_INIT_MISSING = 'off'; - assert.equal(__testables.isAutoInitMissingEnabled(), false); - process.env.CODEXLENS_AUTO_INIT_MISSING = '1'; - assert.equal(__testables.isAutoInitMissingEnabled(), true); - }); - - it('explains when Windows disables background warmup by default', () => { - if (!smartSearchModule) return; - - const { __testables } = smartSearchModule; - delete process.env.CODEXLENS_AUTO_INIT_MISSING; - delete process.env.CODEXLENS_AUTO_EMBED_MISSING; - - const initReason = __testables.getAutoInitMissingDisabledReason(); - const embedReason = __testables.getAutoEmbedMissingDisabledReason({}); - - if (process.platform === 'win32') { - assert.match(initReason, /disabled by default on Windows/i); - assert.match(embedReason, /disabled by default on Windows/i); - assert.match(embedReason, /auto_embed_missing=true/i); - } else { - assert.match(initReason, /disabled/i); - assert.match(embedReason, /disabled/i); - } - }); - - it('builds hidden subprocess options for Smart Search child processes', () => { - if (!smartSearchModule) return; - - const options = smartSearchModule.__testables.buildSmartSearchSpawnOptions(tmpdir(), { - detached: true, - stdio: 'ignore', - timeout: 12345, - }); - - assert.equal(options.cwd, tmpdir()); - assert.equal(options.shell, false); - assert.equal(options.windowsHide, true); - assert.equal(options.detached, true); - assert.equal(options.timeout, 12345); - assert.equal(options.env.PYTHONIOENCODING, 'utf-8'); - }); - - it('avoids detached background warmup children on Windows consoles', () => { - if (!smartSearchModule) return; - - assert.equal( - smartSearchModule.__testables.shouldDetachBackgroundSmartSearchProcess(), - process.platform !== 'win32', - ); - }); - - it('checks tool availability without shell-based lookup popups', () => { - if (!smartSearchModule) return; - - const lookupCalls = []; - const available = smartSearchModule.__testables.checkToolAvailability( - 'rg', - (command, args, options) => { - lookupCalls.push({ command, args, options }); - return { status: 0, stdout: '', stderr: '' }; - }, - ); - - assert.equal(available, true); - assert.equal(lookupCalls.length, 1); - assert.equal(lookupCalls[0].command, process.platform === 'win32' ? 'where' : 'which'); - assert.deepEqual(lookupCalls[0].args, ['rg']); - assert.equal(lookupCalls[0].options.shell, false); - assert.equal(lookupCalls[0].options.windowsHide, true); - assert.equal(lookupCalls[0].options.stdio, 'ignore'); - assert.equal(lookupCalls[0].options.env.PYTHONIOENCODING, 'utf-8'); - }); - - it('starts background static index build once for unindexed paths', async () => { - if (!smartSearchModule) return; - - const { __testables } = smartSearchModule; - const dir = createWorkspace(); - const fakePython = join(dir, 'python.exe'); - writeFileSync(fakePython, ''); - process.env.CODEXLENS_AUTO_INIT_MISSING = 'true'; - - const spawnCalls = []; - __testables.__setRuntimeOverrides({ - getVenvPythonPath: () => fakePython, - now: () => 1234567890, - spawnProcess: (command, args, options) => { - spawnCalls.push({ command, args, options }); - return createDetachedChild(); - }, - }); - - const scope = { workingDirectory: dir, searchPaths: ['.'] }; - const indexStatus = { indexed: false, has_embeddings: false }; - - const first = await __testables.maybeStartBackgroundAutoInit(scope, indexStatus); - const second = await __testables.maybeStartBackgroundAutoInit(scope, indexStatus); - - assert.match(first.note, /started/i); - assert.match(second.note, /already running/i); - assert.equal(spawnCalls.length, 1); - assert.equal(spawnCalls[0].command, fakePython); - assert.deepEqual(spawnCalls[0].args, ['-m', 'codexlens', 'index', 'init', dir, '--no-embeddings']); - assert.equal(spawnCalls[0].options.cwd, dir); - assert.equal( - spawnCalls[0].options.detached, - smartSearchModule.__testables.shouldDetachBackgroundSmartSearchProcess(), - ); - assert.equal(spawnCalls[0].options.windowsHide, true); - }); - - it('starts background embedding build without detached Windows consoles', async () => { - if (!smartSearchModule) return; - - const { __testables } = smartSearchModule; - const dir = createWorkspace(); - const fakePython = join(dir, 'python.exe'); - writeFileSync(fakePython, ''); - process.env.CODEXLENS_AUTO_EMBED_MISSING = 'true'; - - const spawnCalls = []; - __testables.__setRuntimeOverrides({ - getVenvPythonPath: () => fakePython, - checkSemanticStatus: async () => ({ available: true, litellmAvailable: true }), - now: () => 1234567890, - spawnProcess: (command, args, options) => { - spawnCalls.push({ command, args, options }); - return createDetachedChild(); - }, - }); - - const status = await __testables.maybeStartBackgroundAutoEmbed( - { workingDirectory: dir, searchPaths: ['.'] }, - { - indexed: true, - has_embeddings: false, - config: { embedding_backend: 'fastembed' }, - }, - ); - - assert.match(status.note, /started/i); - assert.equal(spawnCalls.length, 1); - assert.equal(spawnCalls[0].command, fakePython); - assert.deepEqual(spawnCalls[0].args.slice(0, 1), ['-c']); - assert.equal(spawnCalls[0].options.cwd, dir); - assert.equal( - spawnCalls[0].options.detached, - smartSearchModule.__testables.shouldDetachBackgroundSmartSearchProcess(), - ); - assert.equal(spawnCalls[0].options.windowsHide, true); - assert.equal(spawnCalls[0].options.stdio, 'ignore'); - }); - - it('surfaces warnings when background static index warmup cannot start', async () => { - if (!smartSearchModule) return; - - const { __testables } = smartSearchModule; - const dir = createWorkspace(); - process.env.CODEXLENS_AUTO_INIT_MISSING = 'true'; - - __testables.__setRuntimeOverrides({ - getVenvPythonPath: () => join(dir, 'missing-python.exe'), - }); - - const status = await __testables.maybeStartBackgroundAutoInit( - { workingDirectory: dir, searchPaths: ['.'] }, - { indexed: false, has_embeddings: false }, - ); - - assert.match(status.warning, /Automatic static index warmup could not start/i); - assert.match(status.warning, /not ready yet/i); - }); - - it('honors explicit small limit values', async () => { - if (!smartSearchModule) return; - - const dir = createWorkspace(); - const file = join(dir, 'many.ts'); - writeFileSync(file, ['const hit = 1;', 'const hit = 2;', 'const hit = 3;'].join('\n')); - - const toolResult = await smartSearchModule.handler({ - action: 'search', - query: 'hit', - path: dir, - output_mode: 'full', - limit: 1, - regex: false, - tokenize: false, - }); - - assert.equal(toolResult.success, true, toolResult.error); - assert.equal(toolResult.result.success, true); - assert.equal(toolResult.result.results.length, 1); - assert.equal(toolResult.result.metadata.pagination.limit, 1); - }); - - it('scopes search results to a single file path', async () => { - if (!smartSearchModule) return; - - const dir = createWorkspace(); - const target = join(dir, 'target.ts'); - const other = join(dir, 'other.ts'); - writeFileSync(target, 'const TARGET_TOKEN = 1;\n'); - writeFileSync(other, 'const TARGET_TOKEN = 2;\n'); - - const toolResult = await smartSearchModule.handler({ - action: 'search', - query: 'TARGET_TOKEN', - path: target, - output_mode: 'full', - regex: false, - tokenize: false, - }); - - assert.equal(toolResult.success, true, toolResult.error); - assert.equal(toolResult.result.success, true); - assert.ok(Array.isArray(toolResult.result.results)); - assert.ok(toolResult.result.results.length >= 1); - - const normalizedFiles = toolResult.result.results.map((item) => String(item.file).replace(/\\/g, '/')); - assert.ok(normalizedFiles.every((file) => file.endsWith('/target.ts') || file === 'target.ts')); - assert.ok(normalizedFiles.every((file) => !file.endsWith('/other.ts'))); - }); - - it('normalizes wrapped multiline query and file path inputs', async () => { - if (!smartSearchModule) return; - - const dir = createWorkspace(); - const nestedDir = join(dir, 'hydro_generator_module', 'builders'); - mkdirSync(nestedDir, { recursive: true }); - const target = join(nestedDir, 'full_machine_builders.py'); - writeFileSync(target, 'def _resolve_rotor_inner():\n return rotor_main_seg\n'); - - const wrappedPath = target.replace(/([\\/])builders([\\/])/, '$1\n builders$2'); - const wrappedQuery = '_resolve_rotor_inner OR\n rotor_main_seg'; - - const toolResult = await smartSearchModule.handler({ - action: 'search', - query: wrappedQuery, - path: wrappedPath, - output_mode: 'full', - regex: false, - caseSensitive: false, - }); - - assert.equal(toolResult.success, true, toolResult.error); - assert.equal(toolResult.result.success, true); - assert.ok(toolResult.result.results.length >= 1); - }); - - it('falls back to literal ripgrep matching for invalid regex-like code queries', async () => { - if (!smartSearchModule) return; - - const dir = createWorkspace(); - const target = join(dir, 'component.ts'); - writeFileSync(target, 'defineExpose({ handleResize });\n'); - - const toolResult = await smartSearchModule.handler({ - action: 'search', - query: 'defineExpose({ handleResize', - path: dir, - output_mode: 'full', - limit: 5, - }); - - assert.equal(toolResult.success, true, toolResult.error); - assert.equal(toolResult.result.success, true); - assert.ok(toolResult.result.results.length >= 1); - assert.match(toolResult.result.metadata.warning, /literal ripgrep matching/i); - }); - - it('renders grouped ace-style output by default with multi-line chunks', async () => { - if (!smartSearchModule) return; - - const dir = createWorkspace(); - const target = join(dir, 'ace-target.ts'); - writeFileSync(target, [ - 'const before = 1;', - 'const TARGET_TOKEN = 1;', - 'const after = 2;', - '', - 'function useToken() {', - ' return TARGET_TOKEN;', - '}', - ].join('\n')); - - const toolResult = await smartSearchModule.handler({ - action: 'search', - query: 'TARGET_TOKEN', - path: dir, - contextLines: 1, - regex: false, - tokenize: false, - }); - - assert.equal(toolResult.success, true, toolResult.error); - assert.equal(toolResult.result.success, true); - assert.equal(toolResult.result.results.format, 'ace'); - assert.equal(Array.isArray(toolResult.result.results.groups), true); - assert.equal(Array.isArray(toolResult.result.results.sections), true); - assert.equal(toolResult.result.results.groups.length, 1); - assert.equal(toolResult.result.results.groups[0].sections.length, 2); - assert.match(toolResult.result.results.text, /The following code sections were retrieved:/); - assert.match(toolResult.result.results.text, /Path: .*ace-target\.ts/); - assert.match(toolResult.result.results.text, /Chunk 1: lines 1-3/); - assert.match(toolResult.result.results.text, />\s+2 \| const TARGET_TOKEN = 1;/); - assert.match(toolResult.result.results.text, /Chunk 2: lines 5-7/); - assert.equal(toolResult.result.metadata.pagination.total >= 1, true); - }); - - it('defaults embed selection to local-fast for bulk indexing', () => { - if (!smartSearchModule) return; - - const selection = smartSearchModule.__testables.resolveEmbeddingSelection(undefined, undefined, { - embedding_backend: 'litellm', - embedding_model: 'qwen3-embedding-sf', - }); - - assert.equal(selection.backend, 'fastembed'); - assert.equal(selection.model, 'fast'); - assert.equal(selection.preset, 'bulk-local-fast'); - assert.match(selection.note, /local-fast/i); - }); - - it('keeps explicit api embedding selection when requested', () => { - if (!smartSearchModule) return; - - const selection = smartSearchModule.__testables.resolveEmbeddingSelection('api', 'qwen3-embedding-sf', { - embedding_backend: 'fastembed', - embedding_model: 'fast', - }); - - assert.equal(selection.backend, 'litellm'); - assert.equal(selection.model, 'qwen3-embedding-sf'); - assert.equal(selection.preset, 'explicit'); - }); - - it('parses warning-prefixed JSON and plain-text file lists for semantic fallback', () => { - if (!smartSearchModule) return; - - const dir = createWorkspace(); - const target = join(dir, 'target.ts'); - writeFileSync(target, 'export const target = 1;\n'); - - const parsed = smartSearchModule.__testables.parseCodexLensJsonOutput([ - 'RuntimeWarning: compatibility shim', - JSON.stringify({ results: [{ file: 'target.ts', score: 0.25, excerpt: 'target' }] }), - ].join('\n')); - assert.equal(Array.isArray(parsed.results), true); - assert.equal(parsed.results[0].file, 'target.ts'); - - const matches = smartSearchModule.__testables.parsePlainTextFileMatches(target, { - workingDirectory: dir, - searchPaths: ['.'], - }); - assert.equal(matches.length, 1); - assert.match(String(matches[0].file).replace(/\\/g, '/'), /target\.ts$/); - }); - - it('uses root-scoped embedding status instead of subtree artifacts', () => { - if (!smartSearchModule) return; - - const summary = smartSearchModule.__testables.extractEmbeddingsStatusSummary({ - total_indexes: 3, - indexes_with_embeddings: 2, - total_chunks: 24, - coverage_percent: 66.7, - root: { - total_files: 4, - files_with_embeddings: 0, - total_chunks: 0, - coverage_percent: 0, - has_embeddings: false, - }, - subtree: { - total_indexes: 3, - indexes_with_embeddings: 2, - total_files: 12, - files_with_embeddings: 8, - total_chunks: 24, - coverage_percent: 66.7, - }, - centralized: { - dense_index_exists: true, - binary_index_exists: true, - meta_db_exists: true, - usable: false, - }, - }); - - assert.equal(summary.coveragePercent, 0); - assert.equal(summary.totalChunks, 0); - assert.equal(summary.hasEmbeddings, false); - }); - - it('accepts validated root centralized readiness from CLI status payloads', () => { - if (!smartSearchModule) return; - - const summary = smartSearchModule.__testables.extractEmbeddingsStatusSummary({ - total_indexes: 2, - indexes_with_embeddings: 1, - total_chunks: 10, - coverage_percent: 25, - root: { - total_files: 2, - files_with_embeddings: 1, - total_chunks: 3, - coverage_percent: 50, - has_embeddings: true, - }, - centralized: { - usable: true, - dense_ready: true, - chunk_metadata_rows: 3, - }, - }); - - assert.equal(summary.coveragePercent, 50); - assert.equal(summary.totalChunks, 3); - assert.equal(summary.hasEmbeddings, true); - }); - - it('prefers embeddings_status over legacy embeddings summary payloads', () => { - if (!smartSearchModule) return; - - const payload = smartSearchModule.__testables.selectEmbeddingsStatusPayload({ - embeddings: { - total_indexes: 7, - indexes_with_embeddings: 4, - total_chunks: 99, - }, - embeddings_status: { - total_indexes: 7, - total_chunks: 3, - root: { - total_files: 2, - files_with_embeddings: 1, - total_chunks: 3, - coverage_percent: 50, - has_embeddings: true, - }, - centralized: { - usable: true, - dense_ready: true, - chunk_metadata_rows: 3, - }, - }, - }); - - assert.equal(payload.root.total_chunks, 3); - assert.equal(payload.centralized.usable, true); - }); - - it('recognizes CodexLens CLI compatibility failures and invalid regex fallback', () => { - if (!smartSearchModule) return; - - const compatibilityError = [ - 'UsageError: Got unexpected extra arguments (20 0 fts)', - 'TypeError: TyperArgument.make_metavar() takes 1 positional argument but 2 were given', - ].join('\n'); - - assert.equal( - smartSearchModule.__testables.isCodexLensCliCompatibilityError(compatibilityError), - true, - ); - - const resolution = smartSearchModule.__testables.resolveRipgrepQueryMode( - 'defineExpose({ handleResize', - true, - true, - ); - - assert.equal(resolution.regex, false); - assert.equal(resolution.literalFallback, true); - assert.match(resolution.warning, /literal ripgrep matching/i); - }); - - it('suppresses compatibility-only fuzzy warnings when ripgrep already produced hits', () => { - if (!smartSearchModule) return; - - assert.equal( - smartSearchModule.__testables.shouldSurfaceCodexLensFtsCompatibilityWarning({ - compatibilityTriggeredThisQuery: true, - skipExactDueToCompatibility: false, - ripgrepResultCount: 2, - }), - false, - ); - - assert.equal( - smartSearchModule.__testables.shouldSurfaceCodexLensFtsCompatibilityWarning({ - compatibilityTriggeredThisQuery: true, - skipExactDueToCompatibility: false, - ripgrepResultCount: 0, - }), - true, - ); - - assert.equal( - smartSearchModule.__testables.shouldSurfaceCodexLensFtsCompatibilityWarning({ - compatibilityTriggeredThisQuery: false, - skipExactDueToCompatibility: true, - ripgrepResultCount: 0, - }), - true, - ); - }); - - it('builds actionable index suggestions for unhealthy index states', () => { - if (!smartSearchModule) return; - - const suggestions = smartSearchModule.__testables.buildIndexSuggestions( - { - indexed: true, - has_embeddings: false, - embeddings_coverage_percent: 0, - warning: 'Index exists but no embeddings generated. Run smart_search(action="embed") to build the vector index.', - }, - { - workingDirectory: 'D:/tmp/demo', - searchPaths: ['.'], - }, - ); - - assert.equal(Array.isArray(suggestions), true); - assert.match(suggestions[0].command, /smart_search\(action="embed"/); - }); - - it('surfaces backend failure details when fuzzy search fully fails', async () => { - if (!smartSearchModule) return; - - const missingPath = join(createWorkspace(), 'missing-folder', 'missing.ts'); - const toolResult = await smartSearchModule.handler({ - action: 'search', - query: 'TARGET_TOKEN', - path: missingPath, - output_mode: 'full', - regex: false, - tokenize: false, - }); - - assert.equal(toolResult.success, false); - assert.match(toolResult.error, /Both search backends failed:/); - assert.match(toolResult.error, /(FTS|Ripgrep)/); - }); - - it('returns structured semantic results after local init and embed without JSON parse warnings', async () => { - if (!smartSearchModule) return; - - const codexLensModule = await import(new URL(`../dist/tools/codex-lens.js?smart-semantic=${Date.now()}`, import.meta.url).href); - const ready = await codexLensModule.checkVenvStatus(true); - if (!ready.ready) { - console.log('Skipping: CodexLens not ready'); - return; - } - - const semantic = await codexLensModule.checkSemanticStatus(); - if (!semantic.available) { - console.log('Skipping: semantic dependencies not ready'); - return; - } - - const dir = createWorkspace(); - writeFileSync( - join(dir, 'sample.ts'), - 'export function parseCodexLensOutput() { return stripAnsiOutput(); }\nexport const sum = (a, b) => a + b;\n', - ); - - const init = await smartSearchModule.handler({ action: 'init', path: dir }); - assert.equal(init.success, true, init.error ?? 'Expected init to succeed'); - - const embed = await smartSearchModule.handler({ - action: 'embed', - path: dir, - embeddingBackend: 'local', - force: true, - }); - assert.equal(embed.success, true, embed.error ?? 'Expected local embed to succeed'); - - const search = await smartSearchModule.handler({ - action: 'search', - mode: 'semantic', - path: dir, - query: 'parse CodexLens output strip ANSI', - limit: 5, - }); - - assert.equal(search.success, true, search.error ?? 'Expected semantic search to succeed'); - assert.equal(search.result.success, true); - assert.equal(search.result.results.format, 'ace'); - assert.ok(search.result.results.total >= 1, 'Expected at least one structured semantic match'); - assert.doesNotMatch(search.result.metadata?.warning ?? '', /Failed to parse JSON output/i); - }); -}); diff --git a/ccw/tests/smart-search.test.ts b/ccw/tests/smart-search.test.ts deleted file mode 100644 index 959fe037..00000000 --- a/ccw/tests/smart-search.test.ts +++ /dev/null @@ -1,71 +0,0 @@ -/** - * TypeScript parity tests for query intent detection + adaptive RRF weights. - * - * Notes: - * - These tests target the runtime implementation shipped in `ccw/dist`. - * - Keep logic aligned with Python: `codex-lens/src/codexlens/search/ranking.py`. - */ - -import { before, describe, it } from 'node:test'; -import assert from 'node:assert'; - -const smartSearchPath = new URL('../dist/tools/smart-search.js', import.meta.url).href; - -describe('Smart Search (TS) - Query Intent + RRF Weights', async () => { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - let smartSearchModule: any; - - before(async () => { - try { - smartSearchModule = await import(smartSearchPath); - } catch (err: any) { - // Keep tests non-blocking for environments that haven't built `ccw/dist` yet. - console.log('Note: smart-search module import skipped:', err?.message ?? String(err)); - } - }); - - describe('detectQueryIntent parity (10 cases)', () => { - const cases: Array<[string, 'keyword' | 'semantic' | 'mixed']> = [ - ['def authenticate', 'keyword'], - ['MyClass', 'keyword'], - ['user_id', 'keyword'], - ['UserService::authenticate', 'keyword'], - ['ptr->next', 'keyword'], - ['how to handle user login', 'semantic'], - ['what is authentication?', 'semantic'], - ['where is this used?', 'semantic'], - ['why does FooBar crash?', 'mixed'], - ['how to use user_id in query', 'mixed'], - ]; - - for (const [query, expected] of cases) { - it(`classifies ${JSON.stringify(query)} as ${expected}`, () => { - if (!smartSearchModule) return; - assert.strictEqual(smartSearchModule.detectQueryIntent(query), expected); - }); - } - }); - - describe('adaptive weights (Python parity thresholds)', () => { - it('uses exact-heavy weights for code-like queries (exact > 0.4)', () => { - if (!smartSearchModule) return; - const weights = smartSearchModule.getRRFWeights('def authenticate', { - exact: 0.3, - fuzzy: 0.1, - vector: 0.6, - }); - assert.ok(weights.exact > 0.4); - }); - - it('uses vector-heavy weights for NL queries (vector > 0.6)', () => { - if (!smartSearchModule) return; - const weights = smartSearchModule.getRRFWeights('how to handle user login', { - exact: 0.3, - fuzzy: 0.1, - vector: 0.6, - }); - assert.ok(weights.vector > 0.6); - }); - }); -}); - diff --git a/ccw/tests/unified-vector-index.test.ts b/ccw/tests/unified-vector-index.test.ts deleted file mode 100644 index f14acc02..00000000 --- a/ccw/tests/unified-vector-index.test.ts +++ /dev/null @@ -1,97 +0,0 @@ -import { after, beforeEach, describe, it } from 'node:test'; -import assert from 'node:assert/strict'; -import { EventEmitter } from 'node:events'; -import { createRequire } from 'node:module'; -import { mkdtempSync, rmSync } from 'node:fs'; -import { tmpdir } from 'node:os'; -import { join } from 'node:path'; - -const require = createRequire(import.meta.url); -// eslint-disable-next-line @typescript-eslint/no-var-requires -const fs = require('node:fs') as typeof import('node:fs'); -// eslint-disable-next-line @typescript-eslint/no-var-requires -const childProcess = require('node:child_process') as typeof import('node:child_process'); - -class FakeChildProcess extends EventEmitter { - stdout = new EventEmitter(); - stderr = new EventEmitter(); - stdinChunks: string[] = []; - stdin = { - write: (chunk: string | Buffer) => { - this.stdinChunks.push(String(chunk)); - return true; - }, - end: () => undefined, - }; -} - -type SpawnCall = { - command: string; - args: string[]; - // eslint-disable-next-line @typescript-eslint/no-explicit-any - options: any; - child: FakeChildProcess; -}; - -const spawnCalls: SpawnCall[] = []; -const tempDirs: string[] = []; -let embedderAvailable = true; - -const originalExistsSync = fs.existsSync; -const originalSpawn = childProcess.spawn; - -fs.existsSync = ((..._args: unknown[]) => embedderAvailable) as typeof fs.existsSync; - -childProcess.spawn = ((command: string, args: string[] = [], options: unknown = {}) => { - const child = new FakeChildProcess(); - spawnCalls.push({ command: String(command), args: args.map(String), options, child }); - - queueMicrotask(() => { - child.stdout.emit('data', JSON.stringify({ - success: true, - total_chunks: 4, - hnsw_available: true, - hnsw_count: 4, - dimension: 384, - })); - child.emit('close', 0); - }); - - return child as unknown as ReturnType; -}) as typeof childProcess.spawn; - -after(() => { - fs.existsSync = originalExistsSync; - childProcess.spawn = originalSpawn; - while (tempDirs.length > 0) { - rmSync(tempDirs.pop() as string, { recursive: true, force: true }); - } -}); - -describe('unified-vector-index', () => { - beforeEach(() => { - embedderAvailable = true; - spawnCalls.length = 0; - }); - - it('spawns CodexLens venv python with hidden window options', async () => { - const projectDir = mkdtempSync(join(tmpdir(), 'ccw-unified-vector-index-')); - tempDirs.push(projectDir); - - const moduleUrl = new URL('../dist/core/unified-vector-index.js', import.meta.url); - moduleUrl.searchParams.set('t', String(Date.now())); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const mod: any = await import(moduleUrl.href); - - const index = new mod.UnifiedVectorIndex(projectDir); - const status = await index.getStatus(); - - assert.equal(status.success, true); - assert.equal(spawnCalls.length, 1); - assert.equal(spawnCalls[0].options.shell, false); - assert.equal(spawnCalls[0].options.windowsHide, true); - assert.equal(spawnCalls[0].options.env.PYTHONIOENCODING, 'utf-8'); - assert.deepEqual(spawnCalls[0].options.stdio, ['pipe', 'pipe', 'pipe']); - assert.match(spawnCalls[0].child.stdinChunks.join(''), /"operation":"status"/); - }); -});