diff --git a/ccw/frontend/src/components/dashboard/widgets/WorkflowTaskWidget.tsx b/ccw/frontend/src/components/dashboard/widgets/WorkflowTaskWidget.tsx
index e15687ad..0d224128 100644
--- a/ccw/frontend/src/components/dashboard/widgets/WorkflowTaskWidget.tsx
+++ b/ccw/frontend/src/components/dashboard/widgets/WorkflowTaskWidget.tsx
@@ -14,7 +14,6 @@ import { Sparkline } from '@/components/charts/Sparkline';
import { useWorkflowStatusCounts } from '@/hooks/useWorkflowStatusCounts';
import { useDashboardStats } from '@/hooks/useDashboardStats';
import { useProjectOverview } from '@/hooks/useProjectOverview';
-import { useIndexStatus } from '@/hooks/useIndex';
import { useSessions } from '@/hooks/useSessions';
import { cn } from '@/lib/utils';
import type { TaskData } from '@/types/store';
@@ -40,7 +39,6 @@ import {
Sparkles,
BarChart3,
PieChart as PieChartIcon,
- Database,
} from 'lucide-react';
export interface WorkflowTaskWidgetProps {
@@ -187,8 +185,6 @@ function WorkflowTaskWidgetComponent({ className }: WorkflowTaskWidgetProps) {
const { data, isLoading } = useWorkflowStatusCounts();
const { stats, isLoading: statsLoading } = useDashboardStats({ refetchInterval: 60000 });
const { projectOverview, isLoading: projectLoading } = useProjectOverview();
- const { status: indexStatus } = useIndexStatus({ refetchInterval: 30000 });
-
// Fetch real sessions data
const { activeSessions, isLoading: sessionsLoading } = useSessions({
filter: { location: 'active' },
@@ -328,34 +324,6 @@ function WorkflowTaskWidgetComponent({ className }: WorkflowTaskWidgetProps) {
{formatMessage({ id: 'projectOverview.devIndex.category.enhancements' })}
- {/* Index Status Indicator */}
-
-
-
- {indexStatus?.status === 'building' && (
-
-
-
-
- )}
-
-
- {indexStatus?.totalFiles || 0}
-
-
{formatMessage({ id: 'home.indexStatus.label' })}
-
{/* Date + Expand Button */}
diff --git a/ccw/frontend/src/components/layout/Sidebar.tsx b/ccw/frontend/src/components/layout/Sidebar.tsx
index d0c1f61f..4b3b5914 100644
--- a/ccw/frontend/src/components/layout/Sidebar.tsx
+++ b/ccw/frontend/src/components/layout/Sidebar.tsx
@@ -114,7 +114,6 @@ const navGroupDefinitions: NavGroupDef[] = [
titleKey: 'navigation.groups.configuration',
icon: Cog,
items: [
- { path: '/settings/codexlens', labelKey: 'navigation.main.codexlens', icon: Sparkles },
{ path: '/api-settings', labelKey: 'navigation.main.apiSettings', icon: Server },
{ path: '/settings', labelKey: 'navigation.main.settings', icon: Settings, end: true },
],
diff --git a/ccw/frontend/src/components/mcp/CcwToolsMcpCard.test.tsx b/ccw/frontend/src/components/mcp/CcwToolsMcpCard.test.tsx
index 455b8d5e..ff3d5155 100644
--- a/ccw/frontend/src/components/mcp/CcwToolsMcpCard.test.tsx
+++ b/ccw/frontend/src/components/mcp/CcwToolsMcpCard.test.tsx
@@ -139,7 +139,7 @@ describe('CcwToolsMcpCard', () => {
render(
{
const [payload] = updateClaudeMock.mock.calls[0] ?? [];
expect(payload).toEqual(
expect.objectContaining({
- enabledTools: ['write_file', 'smart_search'],
+ enabledTools: ['write_file', 'edit_file'],
})
);
});
diff --git a/ccw/frontend/src/components/mcp/CcwToolsMcpCard.tsx b/ccw/frontend/src/components/mcp/CcwToolsMcpCard.tsx
index 421418b0..20a93ec2 100644
--- a/ccw/frontend/src/components/mcp/CcwToolsMcpCard.tsx
+++ b/ccw/frontend/src/components/mcp/CcwToolsMcpCard.tsx
@@ -18,7 +18,6 @@ import {
HardDrive,
MessageCircleQuestion,
MessagesSquare,
- SearchCode,
ChevronDown,
ChevronRight,
Globe,
@@ -110,7 +109,6 @@ export const CCW_MCP_TOOLS: CcwTool[] = [
{ name: 'read_many_files', desc: 'Read multiple files/dirs', core: true },
{ name: 'core_memory', desc: 'Core memory management', core: true },
{ name: 'ask_question', desc: 'Interactive questions (A2UI)', core: false },
- { name: 'smart_search', desc: 'Intelligent code search', core: true },
{ name: 'team_msg', desc: 'Agent team message bus', core: false },
];
@@ -572,8 +570,6 @@ function getToolIcon(toolName: string): React.ReactElement {
return ;
case 'ask_question':
return ;
- case 'smart_search':
- return ;
case 'team_msg':
return ;
default:
diff --git a/ccw/frontend/src/components/shared/IndexManager.tsx b/ccw/frontend/src/components/shared/IndexManager.tsx
deleted file mode 100644
index 605b0c52..00000000
--- a/ccw/frontend/src/components/shared/IndexManager.tsx
+++ /dev/null
@@ -1,227 +0,0 @@
-// ========================================
-// IndexManager Component
-// ========================================
-// Component for managing code index with status display and rebuild functionality
-
-import * as React from 'react';
-import { useIntl } from 'react-intl';
-import { Database, RefreshCw, AlertCircle, CheckCircle2, Clock } from 'lucide-react';
-import { Card } from '@/components/ui/Card';
-import { Button } from '@/components/ui/Button';
-import { StatCard } from '@/components/shared/StatCard';
-import { Badge } from '@/components/ui/Badge';
-import { useIndex } from '@/hooks/useIndex';
-import { cn } from '@/lib/utils';
-
-// ========== Types ==========
-
-export interface IndexManagerProps {
- className?: string;
-}
-
-// ========== Helper Components ==========
-
-/**
- * Progress bar for index rebuild
- */
-function IndexProgressBar({ progress, status }: { progress?: number; status: string }) {
- const { formatMessage } = useIntl();
-
- if (status !== 'building' || progress === undefined) return null;
-
- return (
-
-
-
- {formatMessage({ id: 'index.status.building' })}
-
- {progress}%
-
-
-
- );
-}
-
-/**
- * Status badge component
- */
-function IndexStatusBadge({ status }: { status: string }) {
- const { formatMessage } = useIntl();
-
- const config: Record = {
- idle: { variant: 'secondary', label: formatMessage({ id: 'index.status.idle' }) },
- building: { variant: 'default', label: formatMessage({ id: 'index.status.building' }) },
- completed: { variant: 'outline', label: formatMessage({ id: 'index.status.completed' }) },
- failed: { variant: 'destructive', label: formatMessage({ id: 'index.status.failed' }) },
- };
-
- const { variant, label } = config[status] ?? config.idle;
-
- return (
-
- {label}
-
- );
-}
-
-// ========== Main Component ==========
-
-/**
- * IndexManager component for displaying index status and managing rebuild operations
- *
- * @example
- * ```tsx
- *
- * ```
- */
-export function IndexManager({ className }: IndexManagerProps) {
- const { formatMessage } = useIntl();
- const { status, isLoading, rebuildIndex, isRebuilding, rebuildError, refetch } = useIndex();
-
- // Auto-refresh during rebuild
- const refetchInterval = status?.status === 'building' ? 2000 : 0;
- React.useEffect(() => {
- if (status?.status === 'building') {
- const interval = setInterval(() => {
- refetch();
- }, refetchInterval);
- return () => clearInterval(interval);
- }
- }, [status?.status, refetchInterval, refetch]);
-
- // Handle rebuild button click
- const handleRebuild = async () => {
- try {
- await rebuildIndex({ force: false });
- } catch (error) {
- console.error('[IndexManager] Rebuild failed:', error);
- }
- };
-
- // Format build time (ms to human readable)
- const formatBuildTime = (ms: number): string => {
- if (ms < 1000) return `${ms}ms`;
- if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`;
- return `${Math.floor(ms / 60000)}m ${Math.floor((ms % 60000) / 1000)}s`;
- };
-
- // Format last updated time
- const formatLastUpdated = (isoString: string): string => {
- const date = new Date(isoString);
- const now = new Date();
- const diffMs = now.getTime() - date.getTime();
- const diffMins = Math.floor(diffMs / 60000);
- const diffHours = Math.floor(diffMs / 3600000);
- const diffDays = Math.floor(diffMs / 86400000);
-
- if (diffMins < 1) return formatMessage({ id: 'index.time.justNow' });
- if (diffMins < 60) return formatMessage({ id: 'index.time.minutesAgo' }, { value: diffMins });
- if (diffHours < 24) return formatMessage({ id: 'index.time.hoursAgo' }, { value: diffHours });
- return formatMessage({ id: 'index.time.daysAgo' }, { value: diffDays });
- };
-
- return (
-
- {/* Header */}
-
-
-
-
- {formatMessage({ id: 'index.title' })}
-
- {status && }
-
-
-
-
- {/* Description */}
-
- {formatMessage({ id: 'index.description' })}
-
-
- {/* Error message */}
- {rebuildError && (
-
-
-
-
- {formatMessage({ id: 'index.errors.rebuildFailed' })}
-
-
{rebuildError.message}
-
-
- )}
-
- {/* Status error */}
- {status?.error && (
-
- )}
-
- {/* Progress Bar */}
- {status && }
-
- {/* Current file being indexed */}
- {status?.currentFile && status.status === 'building' && (
-
-
- {status.currentFile}
-
- )}
-
- {/* Stat Cards */}
-
- {/* Total Files */}
-
-
- {/* Last Updated */}
-
-
- {/* Build Time */}
-
-
-
- );
-}
-
-export default IndexManager;
diff --git a/ccw/frontend/src/components/shared/index.ts b/ccw/frontend/src/components/shared/index.ts
index 11f61936..d3ccddbc 100644
--- a/ccw/frontend/src/components/shared/index.ts
+++ b/ccw/frontend/src/components/shared/index.ts
@@ -146,9 +146,6 @@ export type { RuleDialogProps } from './RuleDialog';
// Tools and utility components
export { ThemeSelector } from './ThemeSelector';
-export { IndexManager } from './IndexManager';
-export type { IndexManagerProps } from './IndexManager';
-
export { ExplorerToolbar } from './ExplorerToolbar';
export type { ExplorerToolbarProps } from './ExplorerToolbar';
diff --git a/ccw/frontend/src/hooks/index.ts b/ccw/frontend/src/hooks/index.ts
index d280caa1..9f245ba0 100644
--- a/ccw/frontend/src/hooks/index.ts
+++ b/ccw/frontend/src/hooks/index.ts
@@ -290,16 +290,6 @@ export type {
WorkspaceQueryKeys,
} from './useWorkspaceQueryKeys';
-// ========== CodexLens (v2) ==========
-export {
- useV2SearchManager,
-} from './useV2SearchManager';
-export type {
- V2IndexStatus,
- V2SearchTestResult,
- UseV2SearchManagerReturn,
-} from './useV2SearchManager';
-
// ========== Skill Hub ==========
export {
useRemoteSkills,
diff --git a/ccw/frontend/src/hooks/useIndex.ts b/ccw/frontend/src/hooks/useIndex.ts
deleted file mode 100644
index 65fd415c..00000000
--- a/ccw/frontend/src/hooks/useIndex.ts
+++ /dev/null
@@ -1,142 +0,0 @@
-// ========================================
-// useIndex Hook
-// ========================================
-// TanStack Query hooks for index management with real-time updates
-
-import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
-import {
- fetchIndexStatus,
- rebuildIndex,
- type IndexStatus,
- type IndexRebuildRequest,
-} from '../lib/api';
-import { useWorkflowStore, selectProjectPath } from '@/stores/workflowStore';
-import { workspaceQueryKeys } from '@/lib/queryKeys';
-
-// ========== Stale Time ==========
-
-// Default stale time: 30 seconds (index status updates less frequently)
-const STALE_TIME = 30 * 1000;
-
-// ========== Query Hook ==========
-
-export interface UseIndexStatusOptions {
- enabled?: boolean;
- staleTime?: number;
- refetchInterval?: number;
-}
-
-export interface UseIndexStatusReturn {
- status: IndexStatus | null;
- isLoading: boolean;
- isFetching: boolean;
- error: Error | null;
- refetch: () => Promise;
- invalidate: () => Promise;
-}
-
-/**
- * Hook for fetching index status
- *
- * @example
- * ```tsx
- * const { status, isLoading, refetch } = useIndexStatus();
- * ```
- */
-export function useIndexStatus(options: UseIndexStatusOptions = {}): UseIndexStatusReturn {
- const { staleTime = STALE_TIME, enabled = true, refetchInterval = 0 } = options;
- const queryClient = useQueryClient();
-
- const projectPath = useWorkflowStore(selectProjectPath);
- const queryEnabled = enabled && !!projectPath;
-
- const query = useQuery({
- queryKey: workspaceQueryKeys.indexStatus(projectPath),
- queryFn: () => fetchIndexStatus(projectPath),
- staleTime,
- enabled: queryEnabled,
- refetchInterval: refetchInterval > 0 ? refetchInterval : false,
- retry: 2,
- });
-
- const refetch = async () => {
- await query.refetch();
- };
-
- const invalidate = async () => {
- await queryClient.invalidateQueries({ queryKey: workspaceQueryKeys.index(projectPath) });
- };
-
- return {
- status: query.data ?? null,
- isLoading: query.isLoading,
- isFetching: query.isFetching,
- error: query.error,
- refetch,
- invalidate,
- };
-}
-
-// ========== Mutation Hooks ==========
-
-export interface UseRebuildIndexReturn {
- rebuildIndex: (request?: IndexRebuildRequest) => Promise;
- isRebuilding: boolean;
- error: Error | null;
-}
-
-/**
- * Hook for rebuilding index
- *
- * @example
- * ```tsx
- * const { rebuildIndex, isRebuilding } = useRebuildIndex();
- *
- * const handleRebuild = async () => {
- * await rebuildIndex({ force: true });
- * };
- * ```
- */
-export function useRebuildIndex(): UseRebuildIndexReturn {
- const queryClient = useQueryClient();
- const projectPath = useWorkflowStore(selectProjectPath);
-
- const mutation = useMutation({
- mutationFn: rebuildIndex,
- onSuccess: (updatedStatus) => {
- // Update the status query cache
- queryClient.setQueryData(workspaceQueryKeys.indexStatus(projectPath), updatedStatus);
- },
- });
-
- return {
- rebuildIndex: mutation.mutateAsync,
- isRebuilding: mutation.isPending,
- error: mutation.error,
- };
-}
-
-/**
- * Combined hook for all index operations
- *
- * @example
- * ```tsx
- * const {
- * status,
- * isLoading,
- * rebuildIndex,
- * isRebuilding,
- * } = useIndex();
- * ```
- */
-export function useIndex() {
- const status = useIndexStatus();
- const rebuild = useRebuildIndex();
-
- return {
- ...status,
- rebuildIndex: rebuild.rebuildIndex,
- isRebuilding: rebuild.isRebuilding,
- rebuildError: rebuild.error,
- };
-}
diff --git a/ccw/frontend/src/hooks/useV2SearchManager.ts b/ccw/frontend/src/hooks/useV2SearchManager.ts
deleted file mode 100644
index c0161cae..00000000
--- a/ccw/frontend/src/hooks/useV2SearchManager.ts
+++ /dev/null
@@ -1,159 +0,0 @@
-// ========================================
-// useV2SearchManager Hook
-// ========================================
-// React hook for v2 search management via smart_search tool
-
-import { useState, useCallback } from 'react';
-import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
-
-// ========== Types ==========
-
-export interface V2IndexStatus {
- indexed: boolean;
- totalFiles: number;
- totalChunks: number;
- lastIndexedAt: string | null;
- dbSizeBytes: number;
- vectorDimension: number | null;
- ftsEnabled: boolean;
-}
-
-export interface V2SearchTestResult {
- query: string;
- results: Array<{
- file: string;
- score: number;
- snippet: string;
- }>;
- timingMs: number;
- totalResults: number;
-}
-
-export interface UseV2SearchManagerReturn {
- status: V2IndexStatus | null;
- isLoadingStatus: boolean;
- statusError: Error | null;
- refetchStatus: () => void;
- search: (query: string) => Promise;
- isSearching: boolean;
- searchResult: V2SearchTestResult | null;
- reindex: () => Promise;
- isReindexing: boolean;
-}
-
-// ========== API helpers ==========
-
-async function fetchWithJson(url: string, body?: Record): Promise {
- const response = await fetch(url, {
- method: 'POST',
- headers: { 'Content-Type': 'application/json' },
- credentials: 'same-origin',
- body: JSON.stringify(body),
- });
- if (!response.ok) {
- throw new Error(`Request failed: ${response.status}`);
- }
- return response.json();
-}
-
-async function fetchV2Status(): Promise {
- const data = await fetchWithJson<{ result?: V2IndexStatus; error?: string }>('/api/tools', {
- tool_name: 'smart_search',
- action: 'status',
- });
- if (data.error) {
- throw new Error(data.error);
- }
- // Provide defaults for fields that may be missing
- return {
- indexed: false,
- totalFiles: 0,
- totalChunks: 0,
- lastIndexedAt: null,
- dbSizeBytes: 0,
- vectorDimension: null,
- ftsEnabled: false,
- ...data.result,
- };
-}
-
-async function fetchV2Search(query: string): Promise {
- const data = await fetchWithJson<{ result?: V2SearchTestResult; error?: string }>('/api/tools', {
- tool_name: 'smart_search',
- action: 'search',
- params: { query, limit: 10 },
- });
- if (data.error) {
- throw new Error(data.error);
- }
- return data.result ?? { query, results: [], timingMs: 0, totalResults: 0 };
-}
-
-async function fetchV2Reindex(): Promise {
- const data = await fetchWithJson<{ error?: string }>('/api/tools', {
- tool_name: 'smart_search',
- action: 'reindex',
- });
- if (data.error) {
- throw new Error(data.error);
- }
-}
-
-// ========== Query Keys ==========
-
-export const v2SearchKeys = {
- all: ['v2-search'] as const,
- status: () => [...v2SearchKeys.all, 'status'] as const,
-};
-
-// ========== Hook ==========
-
-export function useV2SearchManager(): UseV2SearchManagerReturn {
- const queryClient = useQueryClient();
- const [searchResult, setSearchResult] = useState(null);
-
- // Status query
- const statusQuery = useQuery({
- queryKey: v2SearchKeys.status(),
- queryFn: fetchV2Status,
- staleTime: 30_000,
- retry: 1,
- });
-
- // Search mutation
- const searchMutation = useMutation({
- mutationFn: (query: string) => fetchV2Search(query),
- onSuccess: (data) => {
- setSearchResult(data);
- },
- });
-
- // Reindex mutation
- const reindexMutation = useMutation({
- mutationFn: fetchV2Reindex,
- onSuccess: () => {
- queryClient.invalidateQueries({ queryKey: v2SearchKeys.status() });
- },
- });
-
- const search = useCallback(async (query: string) => {
- const result = await searchMutation.mutateAsync(query);
- return result;
- }, [searchMutation]);
-
- const reindex = useCallback(async () => {
- await reindexMutation.mutateAsync();
- }, [reindexMutation]);
-
- return {
- status: statusQuery.data ?? null,
- isLoadingStatus: statusQuery.isLoading,
- statusError: statusQuery.error as Error | null,
- refetchStatus: () => statusQuery.refetch(),
- search,
- isSearching: searchMutation.isPending,
- searchResult,
- reindex,
- isReindexing: reindexMutation.isPending,
- };
-}
diff --git a/ccw/frontend/src/lib/api.ts b/ccw/frontend/src/lib/api.ts
index 06ba090a..8efe2b46 100644
--- a/ccw/frontend/src/lib/api.ts
+++ b/ccw/frontend/src/lib/api.ts
@@ -3,11 +3,11 @@
// ========================================
// Typed fetch functions for API communication with CSRF token handling
-import type { SessionMetadata, TaskData, IndexStatus, IndexRebuildRequest, Rule, RuleCreateInput, RulesResponse, Prompt, PromptInsight, Pattern, Suggestion, McpTemplate, McpTemplateInstallRequest, AllProjectsResponse, OtherProjectsServersResponse, CrossCliCopyRequest, CrossCliCopyResponse } from '../types/store';
+import type { SessionMetadata, TaskData, Rule, RuleCreateInput, RulesResponse, Prompt, PromptInsight, Pattern, Suggestion, McpTemplate, McpTemplateInstallRequest, AllProjectsResponse, OtherProjectsServersResponse, CrossCliCopyRequest, CrossCliCopyResponse } from '../types/store';
import type { TeamArtifactsResponse } from '../types/team';
// Re-export types for backward compatibility
-export type { IndexStatus, IndexRebuildRequest, Rule, RuleCreateInput, RulesResponse, Prompt, PromptInsight, Pattern, Suggestion, McpTemplate, McpTemplateInstallRequest, AllProjectsResponse, OtherProjectsServersResponse, CrossCliCopyRequest, CrossCliCopyResponse };
+export type { Rule, RuleCreateInput, RulesResponse, Prompt, PromptInsight, Pattern, Suggestion, McpTemplate, McpTemplateInstallRequest, AllProjectsResponse, OtherProjectsServersResponse, CrossCliCopyRequest, CrossCliCopyResponse };
/**
@@ -4648,10 +4648,10 @@ export async function fetchCcwMcpConfig(currentProjectPath?: string): Promise t.trim()).filter(Boolean);
@@ -4710,7 +4710,7 @@ export async function installCcwMcp(
scope,
projectPath: path,
env: {
- enabledTools: ['write_file', 'edit_file', 'read_file', 'core_memory', 'ask_question', 'smart_search'],
+ enabledTools: ['write_file', 'edit_file', 'read_file', 'core_memory', 'ask_question'],
},
}),
});
@@ -4793,10 +4793,10 @@ export async function fetchCcwMcpConfigForCodex(): Promise {
let enabledTools: string[];
if (enabledToolsStr === undefined || enabledToolsStr === null) {
// No setting = use default tools
- enabledTools = ['write_file', 'edit_file', 'read_file', 'core_memory', 'ask_question', 'smart_search'];
+ enabledTools = ['write_file', 'edit_file', 'read_file', 'core_memory', 'ask_question'];
} else if (enabledToolsStr === '' || enabledToolsStr === 'all') {
// Empty string = all tools disabled, 'all' = default set (for backward compatibility)
- enabledTools = enabledToolsStr === '' ? [] : ['write_file', 'edit_file', 'read_file', 'core_memory', 'ask_question', 'smart_search'];
+ enabledTools = enabledToolsStr === '' ? [] : ['write_file', 'edit_file', 'read_file', 'core_memory', 'ask_question'];
} else {
// Comma-separated list
enabledTools = enabledToolsStr.split(',').map((t: string) => t.trim()).filter(Boolean);
@@ -4831,7 +4831,7 @@ function buildCcwMcpServerConfigForCodex(config: {
if (config.enabledTools !== undefined) {
env.CCW_ENABLED_TOOLS = config.enabledTools.join(',');
} else {
- env.CCW_ENABLED_TOOLS = 'write_file,edit_file,read_file,core_memory,ask_question,smart_search';
+ env.CCW_ENABLED_TOOLS = 'write_file,edit_file,read_file,core_memory,ask_question';
}
if (config.projectRoot) {
@@ -4852,7 +4852,7 @@ function buildCcwMcpServerConfigForCodex(config: {
*/
export async function installCcwMcpToCodex(): Promise {
const serverConfig = buildCcwMcpServerConfigForCodex({
- enabledTools: ['write_file', 'edit_file', 'read_file', 'core_memory', 'ask_question', 'smart_search'],
+ enabledTools: ['write_file', 'edit_file', 'read_file', 'core_memory', 'ask_question'],
});
const result = await addCodexMcpServer('ccw-tools', serverConfig);
@@ -4892,42 +4892,6 @@ export async function updateCcwConfigForCodex(config: {
return fetchCcwMcpConfigForCodex();
}
-// ========== Index Management API ==========
-
-/**
- * Fetch current index status for a specific workspace
- * @param projectPath - Optional project path to filter data by workspace
- */
-export async function fetchIndexStatus(_projectPath?: string): Promise {
- const resp = await fetchApi<{ result?: { indexed?: boolean; totalFiles?: number } }>('/api/tools', {
- method: 'POST',
- body: JSON.stringify({ tool_name: 'smart_search', action: 'status' }),
- });
- const result = resp.result ?? {};
- return {
- totalFiles: result.totalFiles ?? 0,
- lastUpdated: new Date().toISOString(),
- buildTime: 0,
- status: result.indexed ? 'completed' : 'idle',
- };
-}
-
-/**
- * Rebuild index
- */
-export async function rebuildIndex(_request: IndexRebuildRequest = {}): Promise {
- await fetchApi<{ error?: string }>('/api/tools', {
- method: 'POST',
- body: JSON.stringify({ tool_name: 'smart_search', action: 'reindex' }),
- });
- return {
- totalFiles: 0,
- lastUpdated: new Date().toISOString(),
- buildTime: 0,
- status: 'building',
- };
-}
-
// ========== Prompt History API ==========
/**
diff --git a/ccw/frontend/src/locales/en/mcp-manager.json b/ccw/frontend/src/locales/en/mcp-manager.json
index 629546c1..88646d6c 100644
--- a/ccw/frontend/src/locales/en/mcp-manager.json
+++ b/ccw/frontend/src/locales/en/mcp-manager.json
@@ -183,10 +183,6 @@
"name": "ask_question",
"desc": "Ask interactive questions through A2UI interface"
},
- "smart_search": {
- "name": "smart_search",
- "desc": "Intelligent code search with fuzzy and semantic modes"
- },
"team_msg": {
"name": "team_msg",
"desc": "Persistent JSONL message bus for Agent Team communication"
diff --git a/ccw/frontend/src/locales/zh/mcp-manager.json b/ccw/frontend/src/locales/zh/mcp-manager.json
index d5dba646..311ab20d 100644
--- a/ccw/frontend/src/locales/zh/mcp-manager.json
+++ b/ccw/frontend/src/locales/zh/mcp-manager.json
@@ -172,10 +172,6 @@
"name": "ask_question",
"desc": "通过 A2UI 界面发起交互式问答"
},
- "smart_search": {
- "name": "smart_search",
- "desc": "智能代码搜索,支持模糊和语义搜索模式"
- },
"team_msg": {
"name": "team_msg",
"desc": "Agent Team 持久化消息总线,用于团队协作通信"
diff --git a/ccw/frontend/src/pages/CodexLensManagerPage.test.tsx b/ccw/frontend/src/pages/CodexLensManagerPage.test.tsx
deleted file mode 100644
index a744ddcb..00000000
--- a/ccw/frontend/src/pages/CodexLensManagerPage.test.tsx
+++ /dev/null
@@ -1,196 +0,0 @@
-// ========================================
-// CodexLens Manager Page Tests (v2)
-// ========================================
-// Tests for v2 search management page
-
-import { describe, it, expect, beforeEach, vi } from 'vitest';
-import { render, screen } from '@/test/i18n';
-import userEvent from '@testing-library/user-event';
-import { CodexLensManagerPage } from './CodexLensManagerPage';
-
-// Mock the v2 search manager hook
-vi.mock('@/hooks/useV2SearchManager', () => ({
- useV2SearchManager: vi.fn(),
-}));
-
-import { useV2SearchManager } from '@/hooks/useV2SearchManager';
-
-const mockStatus = {
- indexed: true,
- totalFiles: 150,
- totalChunks: 1200,
- lastIndexedAt: '2026-03-17T10:00:00Z',
- dbSizeBytes: 5242880,
- vectorDimension: 384,
- ftsEnabled: true,
-};
-
-const defaultHookReturn = {
- status: mockStatus,
- isLoadingStatus: false,
- statusError: null,
- refetchStatus: vi.fn(),
- search: vi.fn().mockResolvedValue({
- query: 'test',
- results: [],
- timingMs: 12.5,
- totalResults: 0,
- }),
- isSearching: false,
- searchResult: null,
- reindex: vi.fn().mockResolvedValue(undefined),
- isReindexing: false,
-};
-
-describe('CodexLensManagerPage (v2)', () => {
- beforeEach(() => {
- vi.clearAllMocks();
- (vi.mocked(useV2SearchManager) as any).mockReturnValue(defaultHookReturn);
- });
-
- it('should render page title', () => {
- render();
- // The title comes from i18n codexlens.title
- expect(screen.getByRole('heading', { level: 1 })).toBeInTheDocument();
- });
-
- it('should render index status section', () => {
- render();
- // Check for file count display
- expect(screen.getByText('150')).toBeInTheDocument();
- });
-
- it('should render search input', () => {
- render();
- const input = screen.getByPlaceholderText(/search query/i);
- expect(input).toBeInTheDocument();
- });
-
- it('should call refetchStatus on refresh click', async () => {
- const refetchStatus = vi.fn();
- (vi.mocked(useV2SearchManager) as any).mockReturnValue({
- ...defaultHookReturn,
- refetchStatus,
- });
-
- const user = userEvent.setup();
- render();
-
- const refreshButton = screen.getByText(/Refresh/i);
- await user.click(refreshButton);
-
- expect(refetchStatus).toHaveBeenCalledOnce();
- });
-
- it('should call search when clicking search button', async () => {
- const searchFn = vi.fn().mockResolvedValue({
- query: 'test query',
- results: [],
- timingMs: 5,
- totalResults: 0,
- });
- (vi.mocked(useV2SearchManager) as any).mockReturnValue({
- ...defaultHookReturn,
- search: searchFn,
- });
-
- const user = userEvent.setup();
- render();
-
- const input = screen.getByPlaceholderText(/search query/i);
- await user.type(input, 'test query');
-
- const searchButton = screen.getByText(/Search/i);
- await user.click(searchButton);
-
- expect(searchFn).toHaveBeenCalledWith('test query');
- });
-
- it('should display search results', () => {
- (vi.mocked(useV2SearchManager) as any).mockReturnValue({
- ...defaultHookReturn,
- searchResult: {
- query: 'auth',
- results: [
- { file: 'src/auth.ts', score: 0.95, snippet: 'export function authenticate()' },
- ],
- timingMs: 8.2,
- totalResults: 1,
- },
- });
-
- render();
-
- expect(screen.getByText('src/auth.ts')).toBeInTheDocument();
- expect(screen.getByText('95.0%')).toBeInTheDocument();
- expect(screen.getByText('export function authenticate()')).toBeInTheDocument();
- });
-
- it('should call reindex on button click', async () => {
- const reindexFn = vi.fn().mockResolvedValue(undefined);
- (vi.mocked(useV2SearchManager) as any).mockReturnValue({
- ...defaultHookReturn,
- reindex: reindexFn,
- });
-
- const user = userEvent.setup();
- render();
-
- const reindexButton = screen.getByText(/Reindex/i);
- await user.click(reindexButton);
-
- expect(reindexFn).toHaveBeenCalledOnce();
- });
-
- it('should show loading skeleton when status is loading', () => {
- (vi.mocked(useV2SearchManager) as any).mockReturnValue({
- ...defaultHookReturn,
- status: null,
- isLoadingStatus: true,
- });
-
- render();
-
- // Should have pulse animation elements
- const pulseElements = document.querySelectorAll('.animate-pulse');
- expect(pulseElements.length).toBeGreaterThan(0);
- });
-
- it('should show error alert when status fetch fails', () => {
- (vi.mocked(useV2SearchManager) as any).mockReturnValue({
- ...defaultHookReturn,
- status: null,
- statusError: new Error('Network error'),
- });
-
- render();
-
- // Error message should be visible
- expect(screen.getByText(/Failed to load/i)).toBeInTheDocument();
- });
-
- it('should show not indexed state', () => {
- (vi.mocked(useV2SearchManager) as any).mockReturnValue({
- ...defaultHookReturn,
- status: {
- ...mockStatus,
- indexed: false,
- totalFiles: 0,
- totalChunks: 0,
- },
- });
-
- render();
-
- expect(screen.getByText(/Not Indexed/i)).toBeInTheDocument();
- });
-
- describe('i18n - Chinese locale', () => {
- it('should display translated text in Chinese', () => {
- render(, { locale: 'zh' });
-
- // Page title from zh codexlens.json
- expect(screen.getByRole('heading', { level: 1 })).toBeInTheDocument();
- });
- });
-});
diff --git a/ccw/frontend/src/pages/CodexLensManagerPage.tsx b/ccw/frontend/src/pages/CodexLensManagerPage.tsx
deleted file mode 100644
index ce67d799..00000000
--- a/ccw/frontend/src/pages/CodexLensManagerPage.tsx
+++ /dev/null
@@ -1,277 +0,0 @@
-// ========================================
-// CodexLens Manager Page (v2)
-// ========================================
-// V2 search management interface with index status, search test, and configuration
-
-import { useState } from 'react';
-import { useIntl } from 'react-intl';
-import {
- Search,
- RefreshCw,
- Database,
- Zap,
- AlertCircle,
- CheckCircle2,
- Clock,
- FileText,
- HardDrive,
-} from 'lucide-react';
-import { Card } from '@/components/ui/Card';
-import { Button } from '@/components/ui/Button';
-import { useV2SearchManager } from '@/hooks';
-import { cn } from '@/lib/utils';
-
-function formatBytes(bytes: number): string {
- if (bytes === 0) return '0 B';
- const units = ['B', 'KB', 'MB', 'GB'];
- const i = Math.floor(Math.log(bytes) / Math.log(1024));
- return `${(bytes / Math.pow(1024, i)).toFixed(1)} ${units[i]}`;
-}
-
-function formatDate(dateStr: string | null): string {
- if (!dateStr) return '-';
- try {
- return new Date(dateStr).toLocaleString();
- } catch {
- return dateStr;
- }
-}
-
-export function CodexLensManagerPage() {
- const { formatMessage } = useIntl();
- const [searchQuery, setSearchQuery] = useState('');
-
- const {
- status,
- isLoadingStatus,
- statusError,
- refetchStatus,
- search,
- isSearching,
- searchResult,
- reindex,
- isReindexing,
- } = useV2SearchManager();
-
- const handleSearch = async () => {
- if (!searchQuery.trim()) return;
- await search(searchQuery.trim());
- };
-
- const handleKeyDown = (e: React.KeyboardEvent) => {
- if (e.key === 'Enter') {
- handleSearch();
- }
- };
-
- return (
-
- {/* Page Header */}
-
-
-
-
- {formatMessage({ id: 'codexlens.title' })}
-
-
- {formatMessage({ id: 'codexlens.description' })}
-
-
-
-
-
-
-
-
- {/* Error Alert */}
- {statusError && (
-
-
-
-
- {formatMessage({ id: 'codexlens.statusError' })}
-
-
-
- )}
-
- {/* Index Status Section */}
-
-
-
- {formatMessage({ id: 'codexlens.indexStatus.title' })}
-
-
- {isLoadingStatus ? (
-
- {[1, 2, 3, 4].map((i) => (
-
- ))}
-
- ) : status ? (
-
-
- {status.indexed ? (
-
- ) : (
-
- )}
-
-
- {formatMessage({ id: 'codexlens.indexStatus.status' })}
-
-
- {status.indexed
- ? formatMessage({ id: 'codexlens.indexStatus.ready' })
- : formatMessage({ id: 'codexlens.indexStatus.notIndexed' })
- }
-
-
-
-
-
-
-
-
- {formatMessage({ id: 'codexlens.indexStatus.files' })}
-
-
{status.totalFiles.toLocaleString()}
-
-
-
-
-
-
-
- {formatMessage({ id: 'codexlens.indexStatus.dbSize' })}
-
-
{formatBytes(status.dbSizeBytes)}
-
-
-
-
-
-
-
- {formatMessage({ id: 'codexlens.indexStatus.lastIndexed' })}
-
-
{formatDate(status.lastIndexedAt)}
-
-
-
- ) : (
-
- {formatMessage({ id: 'codexlens.indexStatus.unavailable' })}
-
- )}
-
- {status && (
-
-
- {formatMessage({ id: 'codexlens.indexStatus.chunks' })}: {status.totalChunks.toLocaleString()}
-
- {status.vectorDimension && (
-
- {formatMessage({ id: 'codexlens.indexStatus.vectorDim' })}: {status.vectorDimension}
-
- )}
-
- FTS: {status.ftsEnabled
- ? formatMessage({ id: 'codexlens.indexStatus.enabled' })
- : formatMessage({ id: 'codexlens.indexStatus.disabled' })
- }
-
-
- )}
-
-
- {/* Search Test Section */}
-
-
-
- {formatMessage({ id: 'codexlens.searchTest.title' })}
-
-
-
- setSearchQuery(e.target.value)}
- onKeyDown={handleKeyDown}
- placeholder={formatMessage({ id: 'codexlens.searchTest.placeholder' })}
- className="flex-1 px-3 py-2 border border-input rounded-md bg-background text-sm focus:outline-none focus:ring-2 focus:ring-ring"
- />
-
-
-
- {searchResult && (
-
-
-
- {searchResult.totalResults} {formatMessage({ id: 'codexlens.searchTest.results' })}
-
-
- {searchResult.timingMs.toFixed(1)}ms
-
-
-
- {searchResult.results.length > 0 ? (
-
- {searchResult.results.map((result, idx) => (
-
-
-
- {result.file}
-
-
- {(result.score * 100).toFixed(1)}%
-
-
-
- {result.snippet}
-
-
- ))}
-
- ) : (
-
- {formatMessage({ id: 'codexlens.searchTest.noResults' })}
-
- )}
-
- )}
-
-
- );
-}
-
-export default CodexLensManagerPage;
diff --git a/ccw/frontend/src/pages/index.ts b/ccw/frontend/src/pages/index.ts
index 7d777ece..e11daf44 100644
--- a/ccw/frontend/src/pages/index.ts
+++ b/ccw/frontend/src/pages/index.ts
@@ -29,7 +29,6 @@ export { RulesManagerPage } from './RulesManagerPage';
export { PromptHistoryPage } from './PromptHistoryPage';
export { ExplorerPage } from './ExplorerPage';
export { GraphExplorerPage } from './GraphExplorerPage';
-export { CodexLensManagerPage } from './CodexLensManagerPage';
export { ApiSettingsPage } from './ApiSettingsPage';
export { CliViewerPage } from './CliViewerPage';
export { CliSessionSharePage } from './CliSessionSharePage';
diff --git a/ccw/frontend/src/router.tsx b/ccw/frontend/src/router.tsx
index 8078277e..aa7e49aa 100644
--- a/ccw/frontend/src/router.tsx
+++ b/ccw/frontend/src/router.tsx
@@ -35,7 +35,6 @@ const RulesManagerPage = lazy(() => import('@/pages/RulesManagerPage').then(m =>
const PromptHistoryPage = lazy(() => import('@/pages/PromptHistoryPage').then(m => ({ default: m.PromptHistoryPage })));
const ExplorerPage = lazy(() => import('@/pages/ExplorerPage').then(m => ({ default: m.ExplorerPage })));
const GraphExplorerPage = lazy(() => import('@/pages/GraphExplorerPage').then(m => ({ default: m.GraphExplorerPage })));
-const CodexLensManagerPage = lazy(() => import('@/pages/CodexLensManagerPage').then(m => ({ default: m.CodexLensManagerPage })));
const ApiSettingsPage = lazy(() => import('@/pages/ApiSettingsPage').then(m => ({ default: m.ApiSettingsPage })));
const CliViewerPage = lazy(() => import('@/pages/CliViewerPage').then(m => ({ default: m.CliViewerPage })));
const CliSessionSharePage = lazy(() => import('@/pages/CliSessionSharePage').then(m => ({ default: m.CliSessionSharePage })));
@@ -170,10 +169,6 @@ const routes: RouteObject[] = [
path: 'settings/specs',
element: withErrorHandling(),
},
- {
- path: 'settings/codexlens',
- element: withErrorHandling(),
- },
{
path: 'api-settings',
element: withErrorHandling(),
@@ -260,7 +255,6 @@ export const ROUTES = {
ENDPOINTS: '/settings/endpoints',
INSTALLATIONS: '/settings/installations',
SETTINGS_RULES: '/settings/rules',
- CODEXLENS_MANAGER: '/settings/codexlens',
API_SETTINGS: '/api-settings',
EXPLORER: '/explorer',
GRAPH: '/graph',
diff --git a/ccw/frontend/src/test/i18n.tsx b/ccw/frontend/src/test/i18n.tsx
index 9ff9b018..1f2334d7 100644
--- a/ccw/frontend/src/test/i18n.tsx
+++ b/ccw/frontend/src/test/i18n.tsx
@@ -172,8 +172,6 @@ const mockMessages: Record> = {
'mcp.ccw.tools.core_memory.desc': 'Core memory management',
'mcp.ccw.tools.ask_question.name': 'Ask Question',
'mcp.ccw.tools.ask_question.desc': 'Interactive questions (A2UI)',
- 'mcp.ccw.tools.smart_search.name': 'Smart Search',
- 'mcp.ccw.tools.smart_search.desc': 'Intelligent code search',
'mcp.ccw.tools.team_msg.name': 'Team Message',
'mcp.ccw.tools.team_msg.desc': 'Agent team message bus',
'mcp.ccw.paths.label': 'Paths',
@@ -348,8 +346,6 @@ const mockMessages: Record> = {
'mcp.ccw.tools.core_memory.desc': '核心记忆管理',
'mcp.ccw.tools.ask_question.name': '提问',
'mcp.ccw.tools.ask_question.desc': '交互式问题(A2UI)',
- 'mcp.ccw.tools.smart_search.name': '智能搜索',
- 'mcp.ccw.tools.smart_search.desc': '智能代码搜索',
'mcp.ccw.tools.team_msg.name': '团队消息',
'mcp.ccw.tools.team_msg.desc': '代理团队消息总线',
'mcp.ccw.paths.label': '路径',
diff --git a/ccw/frontend/src/types/index.ts b/ccw/frontend/src/types/index.ts
index 732eab8c..d004973a 100644
--- a/ccw/frontend/src/types/index.ts
+++ b/ccw/frontend/src/types/index.ts
@@ -40,9 +40,6 @@ export type {
NotificationState,
NotificationActions,
NotificationStore,
- // Index Manager
- IndexStatus,
- IndexRebuildRequest,
// Rules
Rule,
RuleCreateInput,
diff --git a/ccw/scripts/IMPLEMENTATION-SUMMARY.md b/ccw/scripts/IMPLEMENTATION-SUMMARY.md
deleted file mode 100644
index 8dbfc99b..00000000
--- a/ccw/scripts/IMPLEMENTATION-SUMMARY.md
+++ /dev/null
@@ -1,226 +0,0 @@
-# Memory Embedder Implementation Summary
-
-## Overview
-
-Created a Python script (`memory_embedder.py`) that bridges CCW to CodexLens semantic search by generating and searching embeddings for memory chunks stored in CCW's SQLite database.
-
-## Files Created
-
-### 1. `memory_embedder.py` (Main Script)
-**Location**: `D:\Claude_dms3\ccw\scripts\memory_embedder.py`
-
-**Features**:
-- Reuses CodexLens embedder: `from codexlens.semantic.embedder import get_embedder`
-- Uses jina-embeddings-v2-base-code (768 dimensions)
-- Three commands: `embed`, `search`, `status`
-- JSON output for easy integration
-- Batch processing for efficiency
-- Graceful error handling
-
-**Commands**:
-
-1. **embed** - Generate embeddings
- ```bash
- python memory_embedder.py embed [options]
- Options:
- --source-id ID # Only process specific source
- --batch-size N # Batch size (default: 8)
- --force # Re-embed existing chunks
- ```
-
-2. **search** - Semantic search
- ```bash
- python memory_embedder.py search [options]
- Options:
- --top-k N # Number of results (default: 10)
- --min-score F # Minimum score (default: 0.3)
- --type TYPE # Filter by source type
- ```
-
-3. **status** - Get statistics
- ```bash
- python memory_embedder.py status
- ```
-
-### 2. `README-memory-embedder.md` (Documentation)
-**Location**: `D:\Claude_dms3\ccw\scripts\README-memory-embedder.md`
-
-**Contents**:
-- Feature overview
-- Requirements and installation
-- Detailed usage examples
-- Database path reference
-- TypeScript integration guide
-- Performance metrics
-- Source type descriptions
-
-### 3. `memory-embedder-example.ts` (Integration Example)
-**Location**: `D:\Claude_dms3\ccw\scripts\memory-embedder-example.ts`
-
-**Exported Functions**:
-- `embedChunks(dbPath, options)` - Generate embeddings
-- `searchMemory(dbPath, query, options)` - Semantic search
-- `getEmbeddingStatus(dbPath)` - Get status
-
-**Example Usage**:
-```typescript
-import { searchMemory, embedChunks, getEmbeddingStatus } from './memory-embedder-example';
-
-// Check status
-const status = getEmbeddingStatus(dbPath);
-
-// Generate embeddings
-const result = embedChunks(dbPath, { batchSize: 16 });
-
-// Search
-const matches = searchMemory(dbPath, 'authentication', {
- topK: 5,
- minScore: 0.5,
- sourceType: 'workflow'
-});
-```
-
-## Technical Implementation
-
-### Database Schema
-Uses existing `memory_chunks` table:
-```sql
-CREATE TABLE memory_chunks (
- id INTEGER PRIMARY KEY AUTOINCREMENT,
- source_id TEXT NOT NULL,
- source_type TEXT NOT NULL,
- chunk_index INTEGER NOT NULL,
- content TEXT NOT NULL,
- embedding BLOB,
- metadata TEXT,
- created_at TEXT NOT NULL,
- UNIQUE(source_id, chunk_index)
-);
-```
-
-### Embedding Storage
-- Format: `float32` bytes (numpy array)
-- Dimension: 768 (jina-embeddings-v2-base-code)
-- Storage: `np.array(emb, dtype=np.float32).tobytes()`
-- Loading: `np.frombuffer(blob, dtype=np.float32)`
-
-### Similarity Search
-- Algorithm: Cosine similarity
-- Formula: `np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))`
-- Default threshold: 0.3
-- Sorting: Descending by score
-
-### Source Types
-- `core_memory`: Strategic architectural context
-- `workflow`: Session-based development history
-- `cli_history`: Command execution logs
-
-### Restore Commands
-Generated automatically for each match:
-- core_memory/cli_history: `ccw memory export `
-- workflow: `ccw session resume `
-
-## Dependencies
-
-### Required
-- `numpy`: Array operations and cosine similarity
-- `codex-lens[semantic]`: Embedding generation
-
-### Installation
-```bash
-pip install numpy codex-lens[semantic]
-```
-
-## Testing
-
-### Script Validation
-```bash
-# Syntax check
-python -m py_compile scripts/memory_embedder.py # OK
-
-# Help output
-python scripts/memory_embedder.py --help # Works
-python scripts/memory_embedder.py embed --help # Works
-python scripts/memory_embedder.py search --help # Works
-python scripts/memory_embedder.py status --help # Works
-
-# Status test
-python scripts/memory_embedder.py status # Works
-```
-
-### Error Handling
-- Missing database: FileNotFoundError with clear message
-- Missing CodexLens: ImportError with installation instructions
-- Missing numpy: ImportError with installation instructions
-- Database errors: JSON error response with success=false
-- Missing table: Graceful error with JSON output
-
-## Performance
-
-- **Embedding speed**: ~8 chunks/second (batch size 8)
-- **Search speed**: ~0.1-0.5 seconds for 1000 chunks
-- **Model loading**: ~0.8 seconds (cached after first use via CodexLens singleton)
-- **Batch processing**: Configurable batch size (default: 8)
-
-## Output Format
-
-All commands output JSON for easy parsing:
-
-### Embed Result
-```json
-{
- "success": true,
- "chunks_processed": 50,
- "chunks_failed": 0,
- "elapsed_time": 12.34
-}
-```
-
-### Search Result
-```json
-{
- "success": true,
- "matches": [
- {
- "source_id": "WFS-20250101-auth",
- "source_type": "workflow",
- "chunk_index": 2,
- "content": "Implemented JWT...",
- "score": 0.8542,
- "restore_command": "ccw session resume WFS-20250101-auth"
- }
- ]
-}
-```
-
-### Status Result
-```json
-{
- "total_chunks": 150,
- "embedded_chunks": 100,
- "pending_chunks": 50,
- "by_type": {
- "core_memory": {"total": 80, "embedded": 60, "pending": 20}
- }
-}
-```
-
-## Next Steps
-
-1. **TypeScript Integration**: Add to CCW's core memory routes
-2. **CLI Command**: Create `ccw memory search` command
-3. **Automatic Embedding**: Trigger embedding on memory creation
-4. **Index Management**: Add rebuild/optimize commands
-5. **Cluster Search**: Integrate with session clusters
-
-## Code Quality
-
-- ✅ Single responsibility per function
-- ✅ Clear, descriptive naming
-- ✅ Explicit error handling
-- ✅ No premature abstractions
-- ✅ Minimal debug output (essential logging only)
-- ✅ ASCII-only characters (no emojis)
-- ✅ GBK encoding compatible
-- ✅ Type hints for all functions
-- ✅ Comprehensive docstrings
diff --git a/ccw/scripts/QUICK-REFERENCE.md b/ccw/scripts/QUICK-REFERENCE.md
deleted file mode 100644
index 0ef204ea..00000000
--- a/ccw/scripts/QUICK-REFERENCE.md
+++ /dev/null
@@ -1,135 +0,0 @@
-# Memory Embedder - Quick Reference
-
-## Installation
-
-```bash
-pip install numpy codex-lens[semantic]
-```
-
-## Commands
-
-### Status
-```bash
-python scripts/memory_embedder.py status
-```
-
-### Embed All
-```bash
-python scripts/memory_embedder.py embed
-```
-
-### Embed Specific Source
-```bash
-python scripts/memory_embedder.py embed --source-id CMEM-20250101-120000
-```
-
-### Re-embed (Force)
-```bash
-python scripts/memory_embedder.py embed --force
-```
-
-### Search
-```bash
-python scripts/memory_embedder.py search "authentication flow"
-```
-
-### Advanced Search
-```bash
-python scripts/memory_embedder.py search "rate limiting" \
- --top-k 5 \
- --min-score 0.5 \
- --type workflow
-```
-
-## Database Path
-
-Find your database:
-```bash
-# Linux/Mac
-~/.ccw/projects//core-memory/core_memory.db
-
-# Windows
-%USERPROFILE%\.ccw\projects\\core-memory\core_memory.db
-```
-
-## TypeScript Integration
-
-```typescript
-import { execSync } from 'child_process';
-
-// Status
-const status = JSON.parse(
- execSync(`python scripts/memory_embedder.py status "${dbPath}"`, {
- encoding: 'utf-8'
- })
-);
-
-// Embed
-const result = JSON.parse(
- execSync(`python scripts/memory_embedder.py embed "${dbPath}"`, {
- encoding: 'utf-8'
- })
-);
-
-// Search
-const matches = JSON.parse(
- execSync(
- `python scripts/memory_embedder.py search "${dbPath}" "query"`,
- { encoding: 'utf-8' }
- )
-);
-```
-
-## Output Examples
-
-### Status
-```json
-{
- "total_chunks": 150,
- "embedded_chunks": 100,
- "pending_chunks": 50,
- "by_type": {
- "core_memory": {"total": 80, "embedded": 60, "pending": 20}
- }
-}
-```
-
-### Embed
-```json
-{
- "success": true,
- "chunks_processed": 50,
- "chunks_failed": 0,
- "elapsed_time": 12.34
-}
-```
-
-### Search
-```json
-{
- "success": true,
- "matches": [
- {
- "source_id": "WFS-20250101-auth",
- "source_type": "workflow",
- "chunk_index": 2,
- "content": "Implemented JWT authentication...",
- "score": 0.8542,
- "restore_command": "ccw session resume WFS-20250101-auth"
- }
- ]
-}
-```
-
-## Source Types
-
-- `core_memory` - Strategic architectural context
-- `workflow` - Session-based development history
-- `cli_history` - Command execution logs
-
-## Performance
-
-- Embedding: ~8 chunks/second
-- Search: ~0.1-0.5s for 1000 chunks
-- Model load: ~0.8s (cached)
-- Batch size: 8 (default, configurable)
diff --git a/ccw/scripts/README-memory-embedder.md b/ccw/scripts/README-memory-embedder.md
deleted file mode 100644
index 0954aeb0..00000000
--- a/ccw/scripts/README-memory-embedder.md
+++ /dev/null
@@ -1,157 +0,0 @@
-# Memory Embedder
-
-Bridge CCW to CodexLens semantic search by generating and searching embeddings for memory chunks.
-
-## Features
-
-- **Generate embeddings** for memory chunks using CodexLens's jina-embeddings-v2-base-code (768 dim)
-- **Semantic search** across all memory types (core_memory, workflow, cli_history)
-- **Status tracking** to monitor embedding progress
-- **Batch processing** for efficient embedding generation
-- **Restore commands** included in search results
-
-## Requirements
-
-```bash
-pip install numpy codex-lens[semantic]
-```
-
-## Usage
-
-### 1. Check Status
-
-```bash
-python scripts/memory_embedder.py status
-```
-
-Example output:
-```json
-{
- "total_chunks": 150,
- "embedded_chunks": 100,
- "pending_chunks": 50,
- "by_type": {
- "core_memory": {"total": 80, "embedded": 60, "pending": 20},
- "workflow": {"total": 50, "embedded": 30, "pending": 20},
- "cli_history": {"total": 20, "embedded": 10, "pending": 10}
- }
-}
-```
-
-### 2. Generate Embeddings
-
-Embed all unembedded chunks:
-```bash
-python scripts/memory_embedder.py embed
-```
-
-Embed specific source:
-```bash
-python scripts/memory_embedder.py embed --source-id CMEM-20250101-120000
-```
-
-Re-embed all chunks (force):
-```bash
-python scripts/memory_embedder.py embed --force
-```
-
-Adjust batch size (default 8):
-```bash
-python scripts/memory_embedder.py embed --batch-size 16
-```
-
-Example output:
-```json
-{
- "success": true,
- "chunks_processed": 50,
- "chunks_failed": 0,
- "elapsed_time": 12.34
-}
-```
-
-### 3. Semantic Search
-
-Basic search:
-```bash
-python scripts/memory_embedder.py search "authentication flow"
-```
-
-Advanced search:
-```bash
-python scripts/memory_embedder.py search "rate limiting" \
- --top-k 5 \
- --min-score 0.5 \
- --type workflow
-```
-
-Example output:
-```json
-{
- "success": true,
- "matches": [
- {
- "source_id": "WFS-20250101-auth",
- "source_type": "workflow",
- "chunk_index": 2,
- "content": "Implemented JWT-based authentication...",
- "score": 0.8542,
- "restore_command": "ccw session resume WFS-20250101-auth"
- }
- ]
-}
-```
-
-## Database Path
-
-The database is located in CCW's storage directory:
-
-- **Windows**: `%USERPROFILE%\.ccw\projects\\core-memory\core_memory.db`
-- **Linux/Mac**: `~/.ccw/projects//core-memory/core_memory.db`
-
-Find your project's database:
-```bash
-ccw memory list # Shows project path
-# Then look in: ~/.ccw/projects//core-memory/core_memory.db
-```
-
-## Integration with CCW
-
-This script is designed to be called from CCW's TypeScript code:
-
-```typescript
-import { execSync } from 'child_process';
-
-// Embed chunks
-const result = execSync(
- `python scripts/memory_embedder.py embed ${dbPath}`,
- { encoding: 'utf-8' }
-);
-const { success, chunks_processed } = JSON.parse(result);
-
-// Search
-const searchResult = execSync(
- `python scripts/memory_embedder.py search ${dbPath} "${query}" --top-k 10`,
- { encoding: 'utf-8' }
-);
-const { matches } = JSON.parse(searchResult);
-```
-
-## Performance
-
-- **Embedding speed**: ~8 chunks/second (batch size 8)
-- **Search speed**: ~0.1-0.5 seconds for 1000 chunks
-- **Model loading**: ~0.8 seconds (cached after first use)
-
-## Source Types
-
-- `core_memory`: Strategic architectural context
-- `workflow`: Session-based development history
-- `cli_history`: Command execution logs
-
-## Restore Commands
-
-Search results include restore commands:
-
-- **core_memory/cli_history**: `ccw memory export `
-- **workflow**: `ccw session resume `
diff --git a/ccw/scripts/memory-embedder-example.ts b/ccw/scripts/memory-embedder-example.ts
deleted file mode 100644
index 32998fa0..00000000
--- a/ccw/scripts/memory-embedder-example.ts
+++ /dev/null
@@ -1,184 +0,0 @@
-/**
- * Example: Using Memory Embedder from TypeScript
- *
- * This shows how to integrate the Python memory embedder script
- * into CCW's TypeScript codebase.
- */
-
-import { execSync } from 'child_process';
-import { join } from 'path';
-
-interface EmbedResult {
- success: boolean;
- chunks_processed: number;
- chunks_failed: number;
- elapsed_time: number;
-}
-
-interface SearchMatch {
- source_id: string;
- source_type: 'core_memory' | 'workflow' | 'cli_history';
- chunk_index: number;
- content: string;
- score: number;
- restore_command: string;
-}
-
-interface SearchResult {
- success: boolean;
- matches: SearchMatch[];
- error?: string;
-}
-
-interface StatusResult {
- total_chunks: number;
- embedded_chunks: number;
- pending_chunks: number;
- by_type: Record;
-}
-
-/**
- * Get path to memory embedder script
- */
-function getEmbedderScript(): string {
- return join(__dirname, 'memory_embedder.py');
-}
-
-/**
- * Execute memory embedder command
- */
-function execEmbedder(args: string[]): string {
- const script = getEmbedderScript();
- const command = `python "${script}" ${args.join(' ')}`;
-
- try {
- return execSync(command, {
- encoding: 'utf-8',
- maxBuffer: 10 * 1024 * 1024 // 10MB buffer
- });
- } catch (error: any) {
- // Try to parse error output as JSON
- if (error.stdout) {
- return error.stdout;
- }
- throw new Error(`Embedder failed: ${error.message}`);
- }
-}
-
-/**
- * Generate embeddings for memory chunks
- */
-export function embedChunks(
- dbPath: string,
- options: {
- sourceId?: string;
- batchSize?: number;
- force?: boolean;
- } = {}
-): EmbedResult {
- const args = ['embed', `"${dbPath}"`];
-
- if (options.sourceId) {
- args.push('--source-id', options.sourceId);
- }
- if (options.batchSize) {
- args.push('--batch-size', String(options.batchSize));
- }
- if (options.force) {
- args.push('--force');
- }
-
- const output = execEmbedder(args);
- return JSON.parse(output);
-}
-
-/**
- * Search memory chunks semantically
- */
-export function searchMemory(
- dbPath: string,
- query: string,
- options: {
- topK?: number;
- minScore?: number;
- sourceType?: 'core_memory' | 'workflow' | 'cli_history';
- } = {}
-): SearchResult {
- const args = ['search', `"${dbPath}"`, `"${query}"`];
-
- if (options.topK) {
- args.push('--top-k', String(options.topK));
- }
- if (options.minScore !== undefined) {
- args.push('--min-score', String(options.minScore));
- }
- if (options.sourceType) {
- args.push('--type', options.sourceType);
- }
-
- const output = execEmbedder(args);
- return JSON.parse(output);
-}
-
-/**
- * Get embedding status
- */
-export function getEmbeddingStatus(dbPath: string): StatusResult {
- const args = ['status', `"${dbPath}"`];
- const output = execEmbedder(args);
- return JSON.parse(output);
-}
-
-// ============================================================================
-// Example Usage
-// ============================================================================
-
-async function exampleUsage() {
- const dbPath = join(process.env.HOME || '', '.ccw/projects/myproject/core-memory/core_memory.db');
-
- // 1. Check status
- console.log('Checking embedding status...');
- const status = getEmbeddingStatus(dbPath);
- console.log(`Total chunks: ${status.total_chunks}`);
- console.log(`Embedded: ${status.embedded_chunks}`);
- console.log(`Pending: ${status.pending_chunks}`);
-
- // 2. Generate embeddings if needed
- if (status.pending_chunks > 0) {
- console.log('\nGenerating embeddings...');
- const embedResult = embedChunks(dbPath, { batchSize: 16 });
- console.log(`Processed: ${embedResult.chunks_processed}`);
- console.log(`Time: ${embedResult.elapsed_time}s`);
- }
-
- // 3. Search for relevant memories
- console.log('\nSearching for authentication-related memories...');
- const searchResult = searchMemory(dbPath, 'authentication flow', {
- topK: 5,
- minScore: 0.5
- });
-
- if (searchResult.success) {
- console.log(`Found ${searchResult.matches.length} matches:`);
- for (const match of searchResult.matches) {
- console.log(`\n- ${match.source_id} (score: ${match.score})`);
- console.log(` Type: ${match.source_type}`);
- console.log(` Restore: ${match.restore_command}`);
- console.log(` Content: ${match.content.substring(0, 100)}...`);
- }
- }
-
- // 4. Search specific source type
- console.log('\nSearching workflows only...');
- const workflowSearch = searchMemory(dbPath, 'API implementation', {
- sourceType: 'workflow',
- topK: 3
- });
-
- console.log(`Found ${workflowSearch.matches.length} workflow matches`);
-}
-
-// Run example if executed directly
-if (require.main === module) {
- exampleUsage().catch(console.error);
-}
diff --git a/ccw/scripts/memory_embedder.py b/ccw/scripts/memory_embedder.py
deleted file mode 100644
index e026998f..00000000
--- a/ccw/scripts/memory_embedder.py
+++ /dev/null
@@ -1,428 +0,0 @@
-#!/usr/bin/env python3
-"""
-Memory Embedder - Bridge CCW to CodexLens semantic search
-
-This script generates and searches embeddings for memory chunks stored in CCW's
-SQLite database using CodexLens's embedder.
-
-Usage:
- python memory_embedder.py embed [--source-id ID] [--batch-size N] [--force]
- python memory_embedder.py search [--top-k N] [--min-score F] [--type TYPE]
- python memory_embedder.py status
-"""
-
-import argparse
-import json
-import sqlite3
-import sys
-import time
-from pathlib import Path
-from typing import List, Dict, Any, Optional, Tuple
-
-try:
- import numpy as np
-except ImportError:
- print("Error: numpy is required. Install with: pip install numpy", file=sys.stderr)
- sys.exit(1)
-
-try:
- from codexlens.semantic.factory import get_embedder as get_embedder_factory
- from codexlens.semantic.factory import clear_embedder_cache
- from codexlens.config import Config as CodexLensConfig
-except ImportError:
- print("Error: CodexLens not found. Install with: pip install codex-lens[semantic]", file=sys.stderr)
- sys.exit(1)
-
-
-class MemoryEmbedder:
- """Generate and search embeddings for memory chunks."""
-
- def __init__(self, db_path: str):
- """Initialize embedder with database path."""
- self.db_path = Path(db_path)
- if not self.db_path.exists():
- raise FileNotFoundError(f"Database not found: {db_path}")
-
- self.conn = sqlite3.connect(str(self.db_path))
- self.conn.row_factory = sqlite3.Row
-
- # Load CodexLens configuration for embedding settings
- try:
- self._config = CodexLensConfig.load()
- except Exception as e:
- print(f"Warning: Could not load CodexLens config, using defaults. Error: {e}", file=sys.stderr)
- self._config = CodexLensConfig() # Use default config
-
- # Lazy-load embedder to avoid ~0.8s model loading for status command
- self._embedder = None
- self._embedding_dim = None
-
- @property
- def embedding_dim(self) -> int:
- """Get embedding dimension from the embedder."""
- if self._embedding_dim is None:
- # Access embedder to get its dimension
- self._embedding_dim = self.embedder.embedding_dim
- return self._embedding_dim
-
- @property
- def embedder(self):
- """Lazy-load the embedder on first access using CodexLens config."""
- if self._embedder is None:
- # Use CodexLens configuration settings
- backend = self._config.embedding_backend
- model = self._config.embedding_model
- use_gpu = self._config.embedding_use_gpu
-
- # Use factory to create embedder based on backend type
- if backend == "fastembed":
- self._embedder = get_embedder_factory(
- backend="fastembed",
- profile=model,
- use_gpu=use_gpu
- )
- elif backend == "litellm":
- # For litellm backend, also pass endpoints if configured
- endpoints = self._config.embedding_endpoints
- strategy = self._config.embedding_strategy
- cooldown = self._config.embedding_cooldown
-
- self._embedder = get_embedder_factory(
- backend="litellm",
- model=model,
- endpoints=endpoints if endpoints else None,
- strategy=strategy,
- cooldown=cooldown,
- )
- else:
- # Fallback to fastembed with code profile
- self._embedder = get_embedder_factory(
- backend="fastembed",
- profile="code",
- use_gpu=True
- )
- return self._embedder
-
- def close(self):
- """Close database connection."""
- if self.conn:
- self.conn.close()
-
- def embed_chunks(
- self,
- source_id: Optional[str] = None,
- batch_size: int = 8,
- force: bool = False
- ) -> Dict[str, Any]:
- """
- Generate embeddings for unembedded chunks.
-
- Args:
- source_id: Only process chunks from this source
- batch_size: Number of chunks to process in each batch
- force: Re-embed chunks that already have embeddings
-
- Returns:
- Result dict with success, chunks_processed, chunks_failed, elapsed_time
- """
- start_time = time.time()
-
- # Build query
- query = "SELECT id, source_id, source_type, chunk_index, content FROM memory_chunks"
- params = []
-
- if force:
- # Process all chunks (with optional source filter)
- if source_id:
- query += " WHERE source_id = ?"
- params.append(source_id)
- else:
- # Only process chunks without embeddings
- query += " WHERE embedding IS NULL"
- if source_id:
- query += " AND source_id = ?"
- params.append(source_id)
-
- query += " ORDER BY id"
-
- cursor = self.conn.cursor()
- cursor.execute(query, params)
-
- chunks_processed = 0
- chunks_failed = 0
- batch = []
- batch_ids = []
-
- for row in cursor:
- batch.append(row["content"])
- batch_ids.append(row["id"])
-
- # Process batch when full
- if len(batch) >= batch_size:
- processed, failed = self._process_batch(batch, batch_ids)
- chunks_processed += processed
- chunks_failed += failed
- batch = []
- batch_ids = []
-
- # Process remaining chunks
- if batch:
- processed, failed = self._process_batch(batch, batch_ids)
- chunks_processed += processed
- chunks_failed += failed
-
- elapsed_time = time.time() - start_time
-
- return {
- "success": chunks_failed == 0,
- "chunks_processed": chunks_processed,
- "chunks_failed": chunks_failed,
- "elapsed_time": round(elapsed_time, 2)
- }
-
- def _process_batch(self, texts: List[str], ids: List[int]) -> Tuple[int, int]:
- """Process a batch of texts and update embeddings."""
- try:
- # Generate embeddings for batch
- embeddings = self.embedder.embed(texts)
-
- processed = 0
- failed = 0
-
- # Update database
- cursor = self.conn.cursor()
- for chunk_id, embedding in zip(ids, embeddings):
- try:
- # Convert to numpy array and store as bytes
- emb_array = np.array(embedding, dtype=np.float32)
- emb_bytes = emb_array.tobytes()
-
- cursor.execute(
- "UPDATE memory_chunks SET embedding = ? WHERE id = ?",
- (emb_bytes, chunk_id)
- )
- processed += 1
- except Exception as e:
- print(f"Error updating chunk {chunk_id}: {e}", file=sys.stderr)
- failed += 1
-
- self.conn.commit()
- return processed, failed
-
- except Exception as e:
- print(f"Error processing batch: {e}", file=sys.stderr)
- return 0, len(ids)
-
- def search(
- self,
- query: str,
- top_k: int = 10,
- min_score: float = 0.3,
- source_type: Optional[str] = None
- ) -> Dict[str, Any]:
- """
- Perform semantic search on memory chunks.
-
- Args:
- query: Search query text
- top_k: Number of results to return
- min_score: Minimum similarity score (0-1)
- source_type: Filter by source type (core_memory, workflow, cli_history)
-
- Returns:
- Result dict with success and matches list
- """
- try:
- # Generate query embedding
- query_embedding = self.embedder.embed_single(query)
- query_array = np.array(query_embedding, dtype=np.float32)
-
- # Build database query
- sql = """
- SELECT id, source_id, source_type, chunk_index, content, embedding
- FROM memory_chunks
- WHERE embedding IS NOT NULL
- """
- params = []
-
- if source_type:
- sql += " AND source_type = ?"
- params.append(source_type)
-
- cursor = self.conn.cursor()
- cursor.execute(sql, params)
-
- # Calculate similarities
- matches = []
- for row in cursor:
- # Load embedding from bytes
- emb_bytes = row["embedding"]
- emb_array = np.frombuffer(emb_bytes, dtype=np.float32)
-
- # Cosine similarity
- score = float(
- np.dot(query_array, emb_array) /
- (np.linalg.norm(query_array) * np.linalg.norm(emb_array))
- )
-
- if score >= min_score:
- # Generate restore command
- restore_command = self._get_restore_command(
- row["source_id"],
- row["source_type"]
- )
-
- matches.append({
- "source_id": row["source_id"],
- "source_type": row["source_type"],
- "chunk_index": row["chunk_index"],
- "content": row["content"],
- "score": round(score, 4),
- "restore_command": restore_command
- })
-
- # Sort by score and limit
- matches.sort(key=lambda x: x["score"], reverse=True)
- matches = matches[:top_k]
-
- return {
- "success": True,
- "matches": matches
- }
-
- except Exception as e:
- return {
- "success": False,
- "error": str(e),
- "matches": []
- }
-
- def _get_restore_command(self, source_id: str, source_type: str) -> str:
- """Generate restore command for a source."""
- if source_type in ("core_memory", "cli_history"):
- return f"ccw memory export {source_id}"
- elif source_type == "workflow":
- return f"ccw session resume {source_id}"
- else:
- return f"# Unknown source type: {source_type}"
-
- def get_status(self) -> Dict[str, Any]:
- """Get embedding status statistics."""
- cursor = self.conn.cursor()
-
- # Total chunks
- cursor.execute("SELECT COUNT(*) as count FROM memory_chunks")
- total_chunks = cursor.fetchone()["count"]
-
- # Embedded chunks
- cursor.execute("SELECT COUNT(*) as count FROM memory_chunks WHERE embedding IS NOT NULL")
- embedded_chunks = cursor.fetchone()["count"]
-
- # By type
- cursor.execute("""
- SELECT
- source_type,
- COUNT(*) as total,
- SUM(CASE WHEN embedding IS NOT NULL THEN 1 ELSE 0 END) as embedded
- FROM memory_chunks
- GROUP BY source_type
- """)
-
- by_type = {}
- for row in cursor:
- by_type[row["source_type"]] = {
- "total": row["total"],
- "embedded": row["embedded"],
- "pending": row["total"] - row["embedded"]
- }
-
- return {
- "total_chunks": total_chunks,
- "embedded_chunks": embedded_chunks,
- "pending_chunks": total_chunks - embedded_chunks,
- "by_type": by_type
- }
-
-
-def main():
- """Main entry point."""
- parser = argparse.ArgumentParser(
- description="Memory Embedder - Bridge CCW to CodexLens semantic search"
- )
-
- subparsers = parser.add_subparsers(dest="command", help="Command to execute")
- subparsers.required = True
-
- # Embed command
- embed_parser = subparsers.add_parser("embed", help="Generate embeddings for chunks")
- embed_parser.add_argument("db_path", help="Path to SQLite database")
- embed_parser.add_argument("--source-id", help="Only process chunks from this source")
- embed_parser.add_argument("--batch-size", type=int, default=8, help="Batch size (default: 8)")
- embed_parser.add_argument("--force", action="store_true", help="Re-embed existing chunks")
-
- # Search command
- search_parser = subparsers.add_parser("search", help="Semantic search")
- search_parser.add_argument("db_path", help="Path to SQLite database")
- search_parser.add_argument("query", help="Search query")
- search_parser.add_argument("--top-k", type=int, default=10, help="Number of results (default: 10)")
- search_parser.add_argument("--min-score", type=float, default=0.3, help="Minimum score (default: 0.3)")
- search_parser.add_argument("--type", dest="source_type", help="Filter by source type")
-
- # Status command
- status_parser = subparsers.add_parser("status", help="Get embedding status")
- status_parser.add_argument("db_path", help="Path to SQLite database")
-
- args = parser.parse_args()
-
- try:
- embedder = MemoryEmbedder(args.db_path)
-
- if args.command == "embed":
- result = embedder.embed_chunks(
- source_id=args.source_id,
- batch_size=args.batch_size,
- force=args.force
- )
- print(json.dumps(result, indent=2))
-
- elif args.command == "search":
- result = embedder.search(
- query=args.query,
- top_k=args.top_k,
- min_score=args.min_score,
- source_type=args.source_type
- )
- print(json.dumps(result, indent=2))
-
- elif args.command == "status":
- result = embedder.get_status()
- print(json.dumps(result, indent=2))
-
- embedder.close()
-
- # Exit with error code if operation failed
- if "success" in result and not result["success"]:
- # Clean up ONNX resources before exit
- clear_embedder_cache()
- sys.exit(1)
-
- # Clean up ONNX resources to ensure process can exit cleanly
- # This releases fastembed/ONNX Runtime threads that would otherwise
- # prevent the Python interpreter from shutting down
- clear_embedder_cache()
-
- except Exception as e:
- # Clean up ONNX resources even on error
- try:
- clear_embedder_cache()
- except Exception:
- pass
- print(json.dumps({
- "success": False,
- "error": str(e)
- }, indent=2), file=sys.stderr)
- sys.exit(1)
-
-
-if __name__ == "__main__":
- main()
diff --git a/ccw/scripts/test_memory_embedder.py b/ccw/scripts/test_memory_embedder.py
deleted file mode 100644
index b78a0c40..00000000
--- a/ccw/scripts/test_memory_embedder.py
+++ /dev/null
@@ -1,245 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test script for memory_embedder.py
-
-Creates a temporary database with test data and verifies all commands work.
-"""
-
-import json
-import sqlite3
-import tempfile
-import subprocess
-from pathlib import Path
-from datetime import datetime
-
-
-def create_test_database():
- """Create a temporary database with test chunks."""
- # Create temp file
- temp_db = tempfile.NamedTemporaryFile(suffix='.db', delete=False)
- temp_db.close()
-
- conn = sqlite3.connect(temp_db.name)
- cursor = conn.cursor()
-
- # Create schema
- cursor.execute("""
- CREATE TABLE memory_chunks (
- id INTEGER PRIMARY KEY AUTOINCREMENT,
- source_id TEXT NOT NULL,
- source_type TEXT NOT NULL,
- chunk_index INTEGER NOT NULL,
- content TEXT NOT NULL,
- embedding BLOB,
- metadata TEXT,
- created_at TEXT NOT NULL,
- UNIQUE(source_id, chunk_index)
- )
- """)
-
- # Insert test data
- test_chunks = [
- ("CMEM-20250101-001", "core_memory", 0, "Implemented authentication using JWT tokens with refresh mechanism"),
- ("CMEM-20250101-001", "core_memory", 1, "Added rate limiting to API endpoints using Redis"),
- ("WFS-20250101-auth", "workflow", 0, "Created login endpoint with password hashing"),
- ("WFS-20250101-auth", "workflow", 1, "Implemented session management with token rotation"),
- ("CLI-20250101-001", "cli_history", 0, "Executed database migration for user table"),
- ]
-
- now = datetime.now().isoformat()
- for source_id, source_type, chunk_index, content in test_chunks:
- cursor.execute(
- """
- INSERT INTO memory_chunks (source_id, source_type, chunk_index, content, created_at)
- VALUES (?, ?, ?, ?, ?)
- """,
- (source_id, source_type, chunk_index, content, now)
- )
-
- conn.commit()
- conn.close()
-
- return temp_db.name
-
-
-def run_command(args):
- """Run memory_embedder.py with given arguments."""
- script = Path(__file__).parent / "memory_embedder.py"
- cmd = ["python", str(script)] + args
-
- result = subprocess.run(
- cmd,
- capture_output=True,
- text=True
- )
-
- return result.returncode, result.stdout, result.stderr
-
-
-def test_status(db_path):
- """Test status command."""
- print("Testing status command...")
- returncode, stdout, stderr = run_command(["status", db_path])
-
- if returncode != 0:
- print(f"[FAIL] Status failed: {stderr}")
- return False
-
- result = json.loads(stdout)
- expected_total = 5
-
- if result["total_chunks"] != expected_total:
- print(f"[FAIL] Expected {expected_total} chunks, got {result['total_chunks']}")
- return False
-
- if result["embedded_chunks"] != 0:
- print(f"[FAIL] Expected 0 embedded chunks, got {result['embedded_chunks']}")
- return False
-
- print(f"[PASS] Status OK: {result['total_chunks']} total, {result['embedded_chunks']} embedded")
- return True
-
-
-def test_embed(db_path):
- """Test embed command."""
- print("\nTesting embed command...")
- returncode, stdout, stderr = run_command(["embed", db_path, "--batch-size", "2"])
-
- if returncode != 0:
- print(f"[FAIL] Embed failed: {stderr}")
- return False
-
- result = json.loads(stdout)
-
- if not result["success"]:
- print(f"[FAIL] Embed unsuccessful")
- return False
-
- if result["chunks_processed"] != 5:
- print(f"[FAIL] Expected 5 processed, got {result['chunks_processed']}")
- return False
-
- if result["chunks_failed"] != 0:
- print(f"[FAIL] Expected 0 failed, got {result['chunks_failed']}")
- return False
-
- print(f"[PASS] Embed OK: {result['chunks_processed']} processed in {result['elapsed_time']}s")
- return True
-
-
-def test_search(db_path):
- """Test search command."""
- print("\nTesting search command...")
- returncode, stdout, stderr = run_command([
- "search", db_path, "authentication JWT",
- "--top-k", "3",
- "--min-score", "0.3"
- ])
-
- if returncode != 0:
- print(f"[FAIL] Search failed: {stderr}")
- return False
-
- result = json.loads(stdout)
-
- if not result["success"]:
- print(f"[FAIL] Search unsuccessful: {result.get('error', 'Unknown error')}")
- return False
-
- if len(result["matches"]) == 0:
- print(f"[FAIL] Expected at least 1 match, got 0")
- return False
-
- print(f"[PASS] Search OK: {len(result['matches'])} matches found")
-
- # Show top match
- top_match = result["matches"][0]
- print(f" Top match: {top_match['source_id']} (score: {top_match['score']})")
- print(f" Content: {top_match['content'][:60]}...")
-
- return True
-
-
-def test_source_filter(db_path):
- """Test search with source type filter."""
- print("\nTesting source type filter...")
- returncode, stdout, stderr = run_command([
- "search", db_path, "authentication",
- "--type", "workflow"
- ])
-
- if returncode != 0:
- print(f"[FAIL] Filtered search failed: {stderr}")
- return False
-
- result = json.loads(stdout)
-
- if not result["success"]:
- print(f"[FAIL] Filtered search unsuccessful")
- return False
-
- # Verify all matches are workflow type
- for match in result["matches"]:
- if match["source_type"] != "workflow":
- print(f"[FAIL] Expected workflow type, got {match['source_type']}")
- return False
-
- print(f"[PASS] Filter OK: {len(result['matches'])} workflow matches")
- return True
-
-
-def main():
- """Run all tests."""
- print("Memory Embedder Test Suite")
- print("=" * 60)
-
- # Create test database
- print("\nCreating test database...")
- db_path = create_test_database()
- print(f"[PASS] Database created: {db_path}")
-
- try:
- # Run tests
- tests = [
- ("Status", test_status),
- ("Embed", test_embed),
- ("Search", test_search),
- ("Source Filter", test_source_filter),
- ]
-
- passed = 0
- failed = 0
-
- for name, test_func in tests:
- try:
- if test_func(db_path):
- passed += 1
- else:
- failed += 1
- except Exception as e:
- print(f"[FAIL] {name} crashed: {e}")
- failed += 1
-
- # Summary
- print("\n" + "=" * 60)
- print(f"Results: {passed} passed, {failed} failed")
-
- if failed == 0:
- print("[PASS] All tests passed!")
- return 0
- else:
- print("[FAIL] Some tests failed")
- return 1
-
- finally:
- # Cleanup
- import os
- try:
- os.unlink(db_path)
- print(f"\n[PASS] Cleaned up test database")
- except:
- pass
-
-
-if __name__ == "__main__":
- exit(main())
diff --git a/ccw/scripts/unified_memory_embedder.py b/ccw/scripts/unified_memory_embedder.py
deleted file mode 100644
index 9bbe0f47..00000000
--- a/ccw/scripts/unified_memory_embedder.py
+++ /dev/null
@@ -1,473 +0,0 @@
-#!/usr/bin/env python3
-"""
-Unified Memory Embedder - Bridge CCW to CodexLens VectorStore (HNSW)
-
-Uses CodexLens VectorStore for HNSW-indexed vector storage and search,
-replacing full-table-scan cosine similarity with sub-10ms approximate
-nearest neighbor lookups.
-
-Protocol: JSON via stdin/stdout
-Operations: embed, search, search_by_vector, status, reindex
-
-Usage:
- echo '{"operation":"embed","store_path":"...","chunks":[...]}' | python unified_memory_embedder.py
- echo '{"operation":"search","store_path":"...","query":"..."}' | python unified_memory_embedder.py
- echo '{"operation":"status","store_path":"..."}' | python unified_memory_embedder.py
- echo '{"operation":"reindex","store_path":"..."}' | python unified_memory_embedder.py
-"""
-
-import json
-import sys
-import time
-from pathlib import Path
-from typing import List, Dict, Any, Optional
-
-try:
- import numpy as np
-except ImportError:
- print(json.dumps({
- "success": False,
- "error": "numpy is required. Install with: pip install numpy"
- }))
- sys.exit(1)
-
-try:
- from codexlens.semantic.factory import get_embedder, clear_embedder_cache
- from codexlens.semantic.vector_store import VectorStore
- from codexlens.entities import SemanticChunk
-except ImportError:
- print(json.dumps({
- "success": False,
- "error": "CodexLens not found. Install with: pip install codex-lens[semantic]"
- }))
- sys.exit(1)
-
-
-# Valid category values for filtering
-VALID_CATEGORIES = {"core_memory", "cli_history", "workflow", "entity", "pattern"}
-
-
-class UnifiedMemoryEmbedder:
- """Unified embedder backed by CodexLens VectorStore (HNSW)."""
-
- def __init__(self, store_path: str):
- """
- Initialize with path to VectorStore database directory.
-
- Args:
- store_path: Directory containing vectors.db and vectors.hnsw
- """
- self.store_path = Path(store_path)
- self.store_path.mkdir(parents=True, exist_ok=True)
-
- db_path = str(self.store_path / "vectors.db")
- self.store = VectorStore(db_path)
-
- # Lazy-load embedder to avoid ~0.8s model loading for status command
- self._embedder = None
-
- @property
- def embedder(self):
- """Lazy-load the embedder on first access."""
- if self._embedder is None:
- self._embedder = get_embedder(
- backend="fastembed",
- profile="code",
- use_gpu=True
- )
- return self._embedder
-
- def embed(self, chunks: List[Dict[str, Any]], batch_size: int = 8) -> Dict[str, Any]:
- """
- Embed chunks and insert into VectorStore.
-
- Each chunk dict must contain:
- - content: str
- - source_id: str
- - source_type: str (e.g. "core_memory", "workflow", "cli_history")
- - category: str (e.g. "core_memory", "cli_history", "workflow", "entity", "pattern")
-
- Optional fields:
- - chunk_index: int (default 0)
- - metadata: dict (additional metadata)
-
- Args:
- chunks: List of chunk dicts to embed
- batch_size: Number of chunks to embed per batch
-
- Returns:
- Result dict with success, chunks_processed, chunks_failed, elapsed_time
- """
- start_time = time.time()
- chunks_processed = 0
- chunks_failed = 0
-
- if not chunks:
- return {
- "success": True,
- "chunks_processed": 0,
- "chunks_failed": 0,
- "elapsed_time": 0.0
- }
-
- # Process in batches
- for i in range(0, len(chunks), batch_size):
- batch = chunks[i:i + batch_size]
- texts = [c["content"] for c in batch]
-
- try:
- # Batch embed
- embeddings = self.embedder.embed_to_numpy(texts)
-
- # Build SemanticChunks and insert
- semantic_chunks = []
- for j, chunk_data in enumerate(batch):
- category = chunk_data.get("category", chunk_data.get("source_type", "core_memory"))
- source_id = chunk_data.get("source_id", "")
- chunk_index = chunk_data.get("chunk_index", 0)
- extra_meta = chunk_data.get("metadata", {})
-
- # Build metadata dict for VectorStore
- metadata = {
- "source_id": source_id,
- "source_type": chunk_data.get("source_type", ""),
- "chunk_index": chunk_index,
- **extra_meta
- }
-
- sc = SemanticChunk(
- content=chunk_data["content"],
- embedding=embeddings[j].tolist(),
- metadata=metadata
- )
- semantic_chunks.append((sc, source_id, category))
-
- # Insert into VectorStore
- for sc, file_path, category in semantic_chunks:
- try:
- self.store.add_chunk(sc, file_path=file_path, category=category)
- chunks_processed += 1
- except Exception as e:
- print(f"Error inserting chunk: {e}", file=sys.stderr)
- chunks_failed += 1
-
- except Exception as e:
- print(f"Error embedding batch starting at {i}: {e}", file=sys.stderr)
- chunks_failed += len(batch)
-
- elapsed_time = time.time() - start_time
-
- return {
- "success": chunks_failed == 0,
- "chunks_processed": chunks_processed,
- "chunks_failed": chunks_failed,
- "elapsed_time": round(elapsed_time, 3)
- }
-
- def search(
- self,
- query: str,
- top_k: int = 10,
- min_score: float = 0.3,
- category: Optional[str] = None
- ) -> Dict[str, Any]:
- """
- Search VectorStore using HNSW index.
-
- Args:
- query: Search query text
- top_k: Number of results
- min_score: Minimum similarity threshold
- category: Optional category filter
-
- Returns:
- Result dict with success and matches list
- """
- try:
- start_time = time.time()
-
- # Generate query embedding (embed_to_numpy accepts single string)
- query_emb = self.embedder.embed_to_numpy(query)[0].tolist()
-
- # Search via VectorStore HNSW
- results = self.store.search_similar(
- query_emb,
- top_k=top_k,
- min_score=min_score,
- category=category
- )
-
- elapsed_time = time.time() - start_time
-
- matches = []
- for result in results:
- meta = result.metadata if result.metadata else {}
- if isinstance(meta, str):
- try:
- meta = json.loads(meta)
- except (json.JSONDecodeError, TypeError):
- meta = {}
-
- matches.append({
- "content": result.content or result.excerpt or "",
- "score": round(float(result.score), 4),
- "source_id": meta.get("source_id", result.path or ""),
- "source_type": meta.get("source_type", ""),
- "chunk_index": meta.get("chunk_index", 0),
- "category": meta.get("category", ""),
- "metadata": meta
- })
-
- return {
- "success": True,
- "matches": matches,
- "elapsed_time": round(elapsed_time, 3),
- "total_searched": len(results)
- }
-
- except Exception as e:
- return {
- "success": False,
- "matches": [],
- "error": str(e)
- }
-
- def search_by_vector(
- self,
- vector: List[float],
- top_k: int = 10,
- min_score: float = 0.3,
- category: Optional[str] = None
- ) -> Dict[str, Any]:
- """
- Search VectorStore using a pre-computed embedding vector (no re-embedding).
-
- Args:
- vector: Pre-computed embedding vector (list of floats)
- top_k: Number of results
- min_score: Minimum similarity threshold
- category: Optional category filter
-
- Returns:
- Result dict with success and matches list
- """
- try:
- start_time = time.time()
-
- # Search via VectorStore HNSW directly with provided vector
- results = self.store.search_similar(
- vector,
- top_k=top_k,
- min_score=min_score,
- category=category
- )
-
- elapsed_time = time.time() - start_time
-
- matches = []
- for result in results:
- meta = result.metadata if result.metadata else {}
- if isinstance(meta, str):
- try:
- meta = json.loads(meta)
- except (json.JSONDecodeError, TypeError):
- meta = {}
-
- matches.append({
- "content": result.content or result.excerpt or "",
- "score": round(float(result.score), 4),
- "source_id": meta.get("source_id", result.path or ""),
- "source_type": meta.get("source_type", ""),
- "chunk_index": meta.get("chunk_index", 0),
- "category": meta.get("category", ""),
- "metadata": meta
- })
-
- return {
- "success": True,
- "matches": matches,
- "elapsed_time": round(elapsed_time, 3),
- "total_searched": len(results)
- }
-
- except Exception as e:
- return {
- "success": False,
- "matches": [],
- "error": str(e)
- }
-
- def status(self) -> Dict[str, Any]:
- """
- Get VectorStore index status.
-
- Returns:
- Status dict with total_chunks, hnsw_available, dimension, etc.
- """
- try:
- total_chunks = self.store.count_chunks()
- hnsw_available = self.store.ann_available
- hnsw_count = self.store.ann_count
- dimension = self.store.dimension or 768
-
- # Count per category from SQLite
- categories = {}
- try:
- import sqlite3
- db_path = str(self.store_path / "vectors.db")
- with sqlite3.connect(db_path) as conn:
- rows = conn.execute(
- "SELECT category, COUNT(*) FROM semantic_chunks GROUP BY category"
- ).fetchall()
- for row in rows:
- categories[row[0] or "unknown"] = row[1]
- except Exception:
- pass
-
- return {
- "success": True,
- "total_chunks": total_chunks,
- "hnsw_available": hnsw_available,
- "hnsw_count": hnsw_count,
- "dimension": dimension,
- "categories": categories,
- "model_config": {
- "backend": "fastembed",
- "profile": "code",
- "dimension": 768,
- "max_tokens": 8192
- }
- }
-
- except Exception as e:
- return {
- "success": False,
- "total_chunks": 0,
- "hnsw_available": False,
- "hnsw_count": 0,
- "dimension": 0,
- "error": str(e)
- }
-
- def reindex(self) -> Dict[str, Any]:
- """
- Rebuild HNSW index from scratch.
-
- Returns:
- Result dict with success and timing
- """
- try:
- start_time = time.time()
-
- self.store.rebuild_ann_index()
-
- elapsed_time = time.time() - start_time
-
- return {
- "success": True,
- "hnsw_count": self.store.ann_count,
- "elapsed_time": round(elapsed_time, 3)
- }
-
- except Exception as e:
- return {
- "success": False,
- "error": str(e)
- }
-
-
-def main():
- """Main entry point. Reads JSON from stdin, writes JSON to stdout."""
- try:
- raw_input = sys.stdin.read()
- if not raw_input.strip():
- print(json.dumps({
- "success": False,
- "error": "No input provided. Send JSON via stdin."
- }))
- sys.exit(1)
-
- request = json.loads(raw_input)
- except json.JSONDecodeError as e:
- print(json.dumps({
- "success": False,
- "error": f"Invalid JSON input: {e}"
- }))
- sys.exit(1)
-
- operation = request.get("operation")
- store_path = request.get("store_path")
-
- if not operation:
- print(json.dumps({
- "success": False,
- "error": "Missing required field: operation"
- }))
- sys.exit(1)
-
- if not store_path:
- print(json.dumps({
- "success": False,
- "error": "Missing required field: store_path"
- }))
- sys.exit(1)
-
- try:
- embedder = UnifiedMemoryEmbedder(store_path)
-
- if operation == "embed":
- chunks = request.get("chunks", [])
- batch_size = request.get("batch_size", 8)
- result = embedder.embed(chunks, batch_size=batch_size)
-
- elif operation == "search":
- query = request.get("query", "")
- if not query:
- result = {"success": False, "error": "Missing required field: query", "matches": []}
- else:
- top_k = request.get("top_k", 10)
- min_score = request.get("min_score", 0.3)
- category = request.get("category")
- result = embedder.search(query, top_k=top_k, min_score=min_score, category=category)
-
- elif operation == "search_by_vector":
- vector = request.get("vector", [])
- if not vector:
- result = {"success": False, "error": "Missing required field: vector", "matches": []}
- else:
- top_k = request.get("top_k", 10)
- min_score = request.get("min_score", 0.3)
- category = request.get("category")
- result = embedder.search_by_vector(vector, top_k=top_k, min_score=min_score, category=category)
-
- elif operation == "status":
- result = embedder.status()
-
- elif operation == "reindex":
- result = embedder.reindex()
-
- else:
- result = {
- "success": False,
- "error": f"Unknown operation: {operation}. Valid: embed, search, search_by_vector, status, reindex"
- }
-
- print(json.dumps(result))
-
- # Clean up ONNX resources to ensure process can exit cleanly
- clear_embedder_cache()
-
- except Exception as e:
- try:
- clear_embedder_cache()
- except Exception:
- pass
- print(json.dumps({
- "success": False,
- "error": str(e)
- }))
- sys.exit(1)
-
-
-if __name__ == "__main__":
- main()
diff --git a/ccw/src/core/routes/core-memory-routes.ts b/ccw/src/core/routes/core-memory-routes.ts
index fdf75fc0..a05b5d00 100644
--- a/ccw/src/core/routes/core-memory-routes.ts
+++ b/ccw/src/core/routes/core-memory-routes.ts
@@ -3,7 +3,6 @@ import { URL } from 'url';
import { getCoreMemoryStore } from '../core-memory-store.js';
import type { CoreMemory, SessionCluster, ClusterMember, ClusterRelation } from '../core-memory-store.js';
import { getEmbeddingStatus, generateEmbeddings } from '../memory-embedder-bridge.js';
-import { checkSemanticStatus } from '../../tools/codex-lens.js';
import { MemoryJobScheduler } from '../memory-job-scheduler.js';
import type { JobStatus } from '../memory-job-scheduler.js';
import { StoragePaths } from '../../config/storage-paths.js';
@@ -781,8 +780,8 @@ export async function handleCoreMemoryRoutes(ctx: RouteContext): Promise CodexLens > Semantic page.', status: 503 };
}
diff --git a/ccw/src/core/routes/mcp-routes.ts b/ccw/src/core/routes/mcp-routes.ts
index 447af46d..82784a54 100644
--- a/ccw/src/core/routes/mcp-routes.ts
+++ b/ccw/src/core/routes/mcp-routes.ts
@@ -1084,7 +1084,35 @@ function isRecord(value: unknown): value is Record {
* Handle MCP routes
* @returns true if route was handled, false otherwise
*/
+// Seed built-in MCP templates once
+let _templateSeeded = false;
+function seedBuiltinTemplates(): void {
+ if (_templateSeeded) return;
+ _templateSeeded = true;
+ try {
+ McpTemplatesDb.saveTemplate({
+ name: 'codexlens',
+ description: 'CodexLens semantic code search (vector + FTS + reranking)',
+ serverConfig: {
+ command: 'uvx',
+ args: ['--from', 'codexlens-search[mcp]', 'codexlens-mcp'],
+ env: {
+ CODEXLENS_EMBED_API_URL: '',
+ CODEXLENS_EMBED_API_KEY: '',
+ CODEXLENS_EMBED_API_MODEL: 'text-embedding-3-small',
+ CODEXLENS_EMBED_DIM: '1536',
+ },
+ },
+ category: 'code-search',
+ tags: ['search', 'semantic', 'code-intelligence'],
+ });
+ } catch {
+ // Template may already exist — ignore upsert errors
+ }
+}
+
export async function handleMcpRoutes(ctx: RouteContext): Promise {
+ seedBuiltinTemplates();
const { pathname, url, req, res, initialPath, handlePostRequest, broadcastToClients } = ctx;
// API: Get MCP configuration (includes both Claude and Codex)
@@ -1230,13 +1258,13 @@ export async function handleMcpRoutes(ctx: RouteContext): Promise {
const enabledToolsRaw = envInput.enabledTools;
let enabledToolsEnv: string;
if (enabledToolsRaw === undefined || enabledToolsRaw === null) {
- enabledToolsEnv = 'write_file,edit_file,read_file,core_memory,ask_question,smart_search';
+ enabledToolsEnv = 'write_file,edit_file,read_file,core_memory,ask_question';
} else if (Array.isArray(enabledToolsRaw)) {
enabledToolsEnv = enabledToolsRaw.filter((t): t is string => typeof t === 'string').join(',');
} else if (typeof enabledToolsRaw === 'string') {
enabledToolsEnv = enabledToolsRaw;
} else {
- enabledToolsEnv = 'write_file,edit_file,read_file,core_memory,ask_question,smart_search';
+ enabledToolsEnv = 'write_file,edit_file,read_file,core_memory,ask_question';
}
const projectRoot = typeof envInput.projectRoot === 'string' ? envInput.projectRoot : undefined;
diff --git a/ccw/src/mcp-server/index.ts b/ccw/src/mcp-server/index.ts
index 499596fd..396b14dd 100644
--- a/ccw/src/mcp-server/index.ts
+++ b/ccw/src/mcp-server/index.ts
@@ -10,8 +10,8 @@ import {
CallToolRequestSchema,
ListToolsRequestSchema,
} from '@modelcontextprotocol/sdk/types.js';
-import { getAllToolSchemas, executeTool, executeToolWithProgress } from '../tools/index.js';
-import type { ToolSchema, ToolResult } from '../types/tool.js';
+import { getAllToolSchemas, executeTool } from '../tools/index.js';
+import type { ToolSchema } from '../types/tool.js';
import { getProjectRoot, getAllowedDirectories, isSandboxEnabled } from '../utils/path-validator.js';
const SERVER_NAME = 'ccw-tools';
@@ -23,7 +23,7 @@ const ENV_ALLOWED_DIRS = 'CCW_ALLOWED_DIRS';
const STDIO_DISCONNECT_ERROR_CODES = new Set(['EPIPE', 'ERR_STREAM_DESTROYED']);
// Default enabled tools (core set - file operations, core memory, and smart search)
-const DEFAULT_TOOLS: string[] = ['write_file', 'edit_file', 'read_file', 'read_many_files', 'read_outline', 'core_memory', 'smart_search'];
+const DEFAULT_TOOLS: string[] = ['write_file', 'edit_file', 'read_file', 'read_many_files', 'read_outline', 'core_memory'];
/**
* Get list of enabled tools from environment or defaults
@@ -151,19 +151,7 @@ function createServer(): Server {
}
try {
- // For smart_search init action, use progress-aware execution
- const isInitAction = name === 'smart_search' && args?.action === 'init';
-
- let result: ToolResult;
- if (isInitAction) {
- // Execute with progress callback that writes to stderr
- result = await executeToolWithProgress(name, args || {}, (progress) => {
- // Output progress to stderr (visible in terminal, doesn't interfere with JSON-RPC)
- console.error(`[Progress] ${progress.percent}% - ${progress.message}`);
- });
- } else {
- result = await executeTool(name, args || {});
- }
+ const result = await executeTool(name, args || {});
if (!result.success) {
return {
diff --git a/ccw/src/tools/codex-lens.ts b/ccw/src/tools/codex-lens.ts
deleted file mode 100644
index ffff12f1..00000000
--- a/ccw/src/tools/codex-lens.ts
+++ /dev/null
@@ -1,213 +0,0 @@
-/**
- * CodexLens Tool - STUB (v1 removed)
- *
- * The v1 Python bridge has been removed. This module provides no-op stubs
- * so that existing consumers compile without errors.
- * Semantic search is now handled entirely by codexlens-search v2.
- */
-
-import type { ToolSchema, ToolResult } from '../types/tool.js';
-
-// ---------------------------------------------------------------------------
-// Types (kept for backward compatibility)
-// ---------------------------------------------------------------------------
-
-interface ReadyStatus {
- ready: boolean;
- installed: boolean;
- error?: string;
- version?: string;
- pythonVersion?: string;
- venvPath?: string;
-}
-
-interface SemanticStatus {
- available: boolean;
- backend?: string;
- accelerator?: string;
- providers?: string[];
- litellmAvailable?: boolean;
- error?: string;
-}
-
-interface BootstrapResult {
- success: boolean;
- message?: string;
- error?: string;
- details?: {
- pythonVersion?: string;
- venvPath?: string;
- packagePath?: string;
- installer?: 'uv' | 'pip';
- editable?: boolean;
- };
-}
-
-interface ExecuteResult {
- success: boolean;
- output?: string;
- error?: string;
- message?: string;
- warning?: string;
- results?: unknown;
- files?: unknown;
- symbols?: unknown;
-}
-
-interface ExecuteOptions {
- timeout?: number;
- cwd?: string;
- onProgress?: (progress: ProgressInfo) => void;
-}
-
-interface ProgressInfo {
- stage: string;
- message: string;
- percent: number;
- filesProcessed?: number;
- totalFiles?: number;
-}
-
-type GpuMode = 'cpu' | 'cuda' | 'directml';
-
-interface PythonEnvInfo {
- version: string;
- majorMinor: string;
- architecture: number;
- compatible: boolean;
- error?: string;
-}
-
-// ---------------------------------------------------------------------------
-// No-op implementations
-// ---------------------------------------------------------------------------
-
-const V1_REMOVED = 'CodexLens v1 has been removed. Use codexlens-search v2.';
-
-async function ensureReady(): Promise {
- return { ready: false, installed: false, error: V1_REMOVED };
-}
-
-async function executeCodexLens(_args: string[], _options: ExecuteOptions = {}): Promise {
- return { success: false, error: V1_REMOVED };
-}
-
-async function checkVenvStatus(_force?: boolean): Promise {
- return { ready: false, installed: false, error: V1_REMOVED };
-}
-
-async function bootstrapVenv(): Promise {
- return { success: false, error: V1_REMOVED };
-}
-
-async function checkSemanticStatus(_force?: boolean): Promise {
- return { available: false, error: V1_REMOVED };
-}
-
-async function ensureLiteLLMEmbedderReady(): Promise {
- return { success: false, error: V1_REMOVED };
-}
-
-async function installSemantic(_gpuMode: GpuMode = 'cpu'): Promise {
- return { success: false, error: V1_REMOVED };
-}
-
-async function detectGpuSupport(): Promise<{ mode: GpuMode; available: GpuMode[]; info: string; pythonEnv?: PythonEnvInfo }> {
- return { mode: 'cpu', available: ['cpu'], info: V1_REMOVED };
-}
-
-async function uninstallCodexLens(): Promise {
- return { success: false, error: V1_REMOVED };
-}
-
-function cancelIndexing(): { success: boolean; message?: string; error?: string } {
- return { success: false, error: V1_REMOVED };
-}
-
-function isIndexingInProgress(): boolean {
- return false;
-}
-
-async function bootstrapWithUv(_gpuMode: GpuMode = 'cpu'): Promise {
- return { success: false, error: V1_REMOVED };
-}
-
-async function installSemanticWithUv(_gpuMode: GpuMode = 'cpu'): Promise {
- return { success: false, error: V1_REMOVED };
-}
-
-function useCodexLensV2(): boolean {
- return true; // v2 is now the only option
-}
-
-function isCodexLensV2Installed(): boolean {
- return false;
-}
-
-async function bootstrapV2WithUv(): Promise {
- return { success: false, error: V1_REMOVED };
-}
-
-function getVenvPythonPath(): string {
- return 'python';
-}
-
-// ---------------------------------------------------------------------------
-// Tool schema / handler (no-op)
-// ---------------------------------------------------------------------------
-
-export const schema: ToolSchema = {
- name: 'codex_lens',
- description: '[REMOVED] CodexLens v1 tool has been removed. Use smart_search instead.',
- inputSchema: {
- type: 'object',
- properties: {
- action: { type: 'string', description: 'Action (v1 removed)' },
- },
- },
-};
-
-export async function handler(_params: Record): Promise> {
- return {
- success: false,
- error: V1_REMOVED,
- result: { success: false, error: V1_REMOVED },
- };
-}
-
-// ---------------------------------------------------------------------------
-// Exports
-// ---------------------------------------------------------------------------
-
-export type { ProgressInfo, ExecuteOptions, GpuMode, PythonEnvInfo };
-
-export {
- ensureReady,
- executeCodexLens,
- checkVenvStatus,
- bootstrapVenv,
- checkSemanticStatus,
- ensureLiteLLMEmbedderReady,
- installSemantic,
- detectGpuSupport,
- uninstallCodexLens,
- cancelIndexing,
- isIndexingInProgress,
- bootstrapWithUv,
- installSemanticWithUv,
- useCodexLensV2,
- isCodexLensV2Installed,
- bootstrapV2WithUv,
- getVenvPythonPath,
-};
-
-export const __testables = {};
-
-export const codexLensTool = {
- name: schema.name,
- description: schema.description,
- parameters: schema.inputSchema,
- execute: async (_params: Record) => {
- return { success: false, error: V1_REMOVED };
- },
-};
diff --git a/ccw/src/tools/index.ts b/ccw/src/tools/index.ts
index d009c6c0..b0c4c4d1 100644
--- a/ccw/src/tools/index.ts
+++ b/ccw/src/tools/index.ts
@@ -18,10 +18,7 @@ import * as generateDddDocsMod from './generate-ddd-docs.js';
import * as convertTokensToCssMod from './convert-tokens-to-css.js';
import * as sessionManagerMod from './session-manager.js';
import * as cliExecutorMod from './cli-executor.js';
-import * as smartSearchMod from './smart-search.js';
-import { executeInitWithProgress } from './smart-search.js';
-// codex_lens removed - functionality integrated into smart_search
-// codex_lens_lsp removed - v1 LSP bridge removed
+// codex_lens / smart_search removed - use codexlens MCP server instead
import * as readFileMod from './read-file.js';
import * as readManyFilesMod from './read-many-files.js';
import * as readOutlineMod from './read-outline.js';
@@ -30,7 +27,7 @@ import * as contextCacheMod from './context-cache.js';
import * as skillContextLoaderMod from './skill-context-loader.js';
import * as askQuestionMod from './ask-question.js';
import * as teamMsgMod from './team-msg.js';
-import type { ProgressInfo } from './codex-lens.js';
+
// Import legacy JS tools
import { uiGeneratePreviewTool } from './ui-generate-preview.js';
@@ -272,60 +269,6 @@ function sanitizeResult(result: unknown): unknown {
return result;
}
-/**
- * Execute a tool with progress callback (for init actions)
- */
-export async function executeToolWithProgress(
- name: string,
- params: Record = {},
- onProgress?: (progress: ProgressInfo) => void
-): Promise<{
- success: boolean;
- result?: unknown;
- error?: string;
-}> {
- // For smart_search init, use special progress-aware execution
- if (name === 'smart_search' && params.action === 'init') {
- try {
- // Notify dashboard - execution started
- notifyDashboard({
- toolName: name,
- status: 'started',
- params: sanitizeParams(params)
- });
-
- const result = await executeInitWithProgress(params, onProgress);
-
- // Notify dashboard - execution completed
- notifyDashboard({
- toolName: name,
- status: 'completed',
- result: sanitizeResult(result)
- });
-
- return {
- success: result.success,
- result,
- error: result.error
- };
- } catch (error) {
- notifyDashboard({
- toolName: name,
- status: 'failed',
- error: (error as Error).message || 'Tool execution failed'
- });
-
- return {
- success: false,
- error: (error as Error).message || 'Tool execution failed'
- };
- }
- }
-
- // Fall back to regular execution for other tools
- return executeTool(name, params);
-}
-
/**
* Get tool schema in MCP-compatible format
*/
@@ -363,9 +306,7 @@ registerTool(toLegacyTool(generateDddDocsMod));
registerTool(toLegacyTool(convertTokensToCssMod));
registerTool(toLegacyTool(sessionManagerMod));
registerTool(toLegacyTool(cliExecutorMod));
-registerTool(toLegacyTool(smartSearchMod));
-// codex_lens removed - functionality integrated into smart_search
-// codex_lens_lsp removed - v1 LSP bridge removed
+// codex_lens / smart_search removed - use codexlens MCP server instead
registerTool(toLegacyTool(readFileMod));
registerTool(toLegacyTool(readManyFilesMod));
registerTool(toLegacyTool(readOutlineMod));
diff --git a/ccw/src/tools/smart-context.ts b/ccw/src/tools/smart-context.ts
index ad6b20bb..56f2cf7b 100644
--- a/ccw/src/tools/smart-context.ts
+++ b/ccw/src/tools/smart-context.ts
@@ -4,7 +4,9 @@
* Auto-generates contextual file references for CLI execution
*/
-import { executeCodexLens, ensureReady as ensureCodexLensReady } from './codex-lens.js';
+// codex-lens v1 removed — no-op stubs for backward compatibility
+async function ensureCodexLensReady(): Promise<{ ready: boolean }> { return { ready: false }; }
+async function executeCodexLens(_args: string[], _opts?: { cwd?: string }): Promise<{ success: boolean; output?: string }> { return { success: false }; }
// Options for smart context generation
export interface SmartContextOptions {
diff --git a/ccw/src/tools/smart-search.ts b/ccw/src/tools/smart-search.ts
deleted file mode 100644
index 51d6f6ae..00000000
--- a/ccw/src/tools/smart-search.ts
+++ /dev/null
@@ -1,3686 +0,0 @@
-/**
- * Smart Search Tool - Unified intelligent search powered by codexlens-search v2
- *
- * Features:
- * - Semantic search: 2-stage vector (binary coarse + ANN fine) + FTS5 + RRF fusion + reranking
- * - Ripgrep fallback for fast exact/regex matching
- * - File discovery via glob patterns
- * - Incremental indexing with Mark-and-Filter strategy
- * - File watcher for automatic index updates
- *
- * Actions:
- * - search: Semantic search via v2 bridge with ripgrep fallback
- * - init: Initialize v2 index and sync files
- * - status: Check v2 index statistics
- * - update: Incremental sync for changed files
- * - watch: Start file watcher for automatic updates
- * - find_files: Glob-based file path matching
- */
-
-import { z } from 'zod';
-import type { ToolSchema, ToolResult } from '../types/tool.js';
-import { spawn, spawnSync, type SpawnOptions } from 'child_process';
-import { existsSync, readFileSync, statSync } from 'fs';
-import { dirname, join, resolve } from 'path';
-import {
- ensureReady as ensureCodexLensReady,
- checkSemanticStatus,
- ensureLiteLLMEmbedderReady,
- executeCodexLens,
- getVenvPythonPath,
-} from './codex-lens.js';
-import { execFile } from 'child_process';
-import type { ProgressInfo } from './codex-lens.js';
-import { getProjectRoot } from '../utils/path-validator.js';
-import { getCodexLensDataDir } from '../utils/codexlens-path.js';
-import { EXEC_TIMEOUTS } from '../utils/exec-constants.js';
-import { generateRotationEndpoints } from '../config/litellm-api-config-manager.js';
-import type { RotationEndpointConfig } from '../config/litellm-api-config-manager.js';
-
-// Timing utilities for performance analysis
-const TIMING_ENABLED = process.env.SMART_SEARCH_TIMING === '1' || process.env.DEBUG?.includes('timing');
-const SEARCH_OUTPUT_MODES = ['full', 'files_only', 'count', 'ace'] as const;
-type SearchOutputMode = typeof SEARCH_OUTPUT_MODES[number];
-
-interface TimingData {
- [key: string]: number;
-}
-
-function createTimer(): { mark: (name: string) => void; getTimings: () => TimingData; log: () => void } {
- const startTime = performance.now();
- const marks: { name: string; time: number }[] = [];
- let lastMark = startTime;
-
- return {
- mark(name: string) {
- const now = performance.now();
- marks.push({ name, time: now - lastMark });
- lastMark = now;
- },
- getTimings(): TimingData {
- const timings: TimingData = {};
- marks.forEach(m => { timings[m.name] = Math.round(m.time * 100) / 100; });
- timings['_total'] = Math.round((performance.now() - startTime) * 100) / 100;
- return timings;
- },
- log() {
- if (TIMING_ENABLED) {
- const timings = this.getTimings();
- console.error(`[TIMING] smart-search: ${JSON.stringify(timings)}`);
- }
- }
- };
-}
-
-// Define Zod schema for validation
-const ParamsSchema = z.object({
- // Action: search (content), find_files (path/name pattern), init, status, update (incremental sync), watch
- // Note: search_files is deprecated, use search with output_mode='files_only'
- action: z.enum(['init', 'search', 'search_files', 'find_files', 'status', 'update', 'watch']).default('search'),
- query: z.string().optional().describe('Content search query (for action="search")'),
- pattern: z.string().optional().describe('Glob pattern for path matching (for action="find_files")'),
- mode: z.enum(['fuzzy', 'semantic']).default('fuzzy'),
- output_mode: z.enum(SEARCH_OUTPUT_MODES).default('ace'),
- path: z.string().optional(),
- paths: z.array(z.string()).default([]),
- contextLines: z.number().default(0),
- maxResults: z.number().default(5), // Default 5 with full content
- includeHidden: z.boolean().default(false),
- force: z.boolean().default(false).describe('Force full rebuild for action="init".'),
- limit: z.number().default(5), // Default 5 with full content
- extraFilesCount: z.number().default(10), // Additional file-only results
- maxContentLength: z.number().default(200), // Max content length for truncation (50-2000)
- offset: z.number().default(0), // NEW: Pagination offset (start_index)
- // Search modifiers for ripgrep mode
- regex: z.boolean().default(true), // Use regex pattern matching (default: enabled)
- caseSensitive: z.boolean().default(true), // Case sensitivity (default: case-sensitive)
- tokenize: z.boolean().default(true), // Tokenize multi-word queries for OR matching (default: enabled)
- // File type filtering (default: code only)
- excludeExtensions: z.array(z.string()).optional().describe('File extensions to exclude from results (e.g., ["md", "txt"])'),
- codeOnly: z.boolean().default(true).describe('Only return code files (excludes md, txt, json, yaml, xml, etc.). Default: true'),
- withDoc: z.boolean().default(false).describe('Include documentation files (md, txt, rst, etc.). Overrides codeOnly when true'),
- // Watcher options
- debounce: z.number().default(1000).describe('Debounce interval in ms for watch action'),
- // Fuzzy matching is implicit in hybrid mode (RRF fusion)
-});
-
-type Params = z.infer;
-
-// Search mode constants
-const SEARCH_MODES = ['fuzzy', 'semantic'] as const;
-
-// Classification confidence threshold
-const CONFIDENCE_THRESHOLD = 0.7;
-
-// File filtering configuration (ported from code-index)
-const FILTER_CONFIG = {
- exclude_directories: new Set([
- '.git', '.svn', '.hg', '.bzr',
- 'node_modules', '__pycache__', '.venv', 'venv', 'vendor', 'bower_components',
- 'dist', 'build', 'target', 'out', 'bin', 'obj',
- '.idea', '.vscode', '.vs', '.sublime-workspace',
- '.pytest_cache', '.coverage', '.tox', '.nyc_output', 'coverage', 'htmlcov',
- '.next', '.nuxt', '.cache', '.parcel-cache',
- '.DS_Store', 'Thumbs.db',
- ]),
- exclude_files: new Set([
- '*.tmp', '*.temp', '*.swp', '*.swo', '*.bak', '*~', '*.orig', '*.log',
- 'package-lock.json', 'yarn.lock', 'pnpm-lock.yaml', 'Pipfile.lock',
- ]),
- // Windows device files - must use **/ pattern to match in any directory
- // These cause "os error 1" on Windows when accessed
- windows_device_files: new Set([
- 'nul', 'con', 'aux', 'prn',
- 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
- 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
- ]),
-};
-
-function buildExcludeArgs(): string[] {
- const args: string[] = [];
- for (const dir of FILTER_CONFIG.exclude_directories) {
- args.push('--glob', `!**/${dir}/**`);
- }
- for (const pattern of FILTER_CONFIG.exclude_files) {
- args.push('--glob', `!${pattern}`);
- }
- // Windows device files need case-insensitive matching in any directory
- for (const device of FILTER_CONFIG.windows_device_files) {
- args.push('--glob', `!**/${device}`);
- args.push('--glob', `!**/${device.toUpperCase()}`);
- }
- return args;
-}
-
-/**
- * Tokenize query for multi-word OR matching
- * Splits on whitespace and common delimiters, filters stop words and short tokens
- * @param query - The search query
- * @returns Array of tokens
- */
-function tokenizeQuery(query: string): string[] {
- // Stop words for filtering (common English + programming keywords)
- const stopWords = new Set([
- 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
- 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
- 'should', 'may', 'might', 'must', 'can', 'to', 'of', 'in', 'for', 'on',
- 'with', 'at', 'by', 'from', 'as', 'into', 'through', 'and', 'but', 'if',
- 'or', 'not', 'this', 'that', 'these', 'those', 'it', 'its', 'how', 'what',
- 'where', 'when', 'why', 'which', 'who', 'whom',
- ]);
-
- // Split on whitespace and common delimiters, keep meaningful tokens
- const tokens = query
- .split(/[\s,;:]+/)
- .map(token => token.trim())
- .filter(token => {
- // Keep tokens that are:
- // - At least 2 characters long
- // - Not a stop word (case-insensitive)
- // - Or look like identifiers (contain underscore/camelCase)
- if (token.length < 2) return false;
- if (stopWords.has(token.toLowerCase()) && !token.includes('_') && !/[A-Z]/.test(token)) {
- return false;
- }
- return true;
- });
-
- return tokens;
-}
-
-/**
- * Score results based on token match count for ranking
- * @param results - Search results
- * @param tokens - Query tokens
- * @returns Results with match scores
- */
-function scoreByTokenMatch(results: ExactMatch[], tokens: string[]): ExactMatch[] {
- if (tokens.length <= 1) return results;
-
- // Create case-insensitive patterns for each token
- const tokenPatterns = tokens.map(t => {
- const escaped = t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
- return new RegExp(escaped, 'i');
- });
-
- return results.map(r => {
- const content = r.content || '';
- const file = r.file || '';
- const searchText = `${file} ${content}`;
-
- // Count how many tokens match
- let matchCount = 0;
- for (const pattern of tokenPatterns) {
- if (pattern.test(searchText)) {
- matchCount++;
- }
- }
-
- // Calculate match ratio (0 to 1)
- const matchRatio = matchCount / tokens.length;
-
- return {
- ...r,
- matchScore: matchRatio,
- matchCount,
- };
- }).sort((a, b) => {
- // Sort by match ratio (descending), then by line number
- if (b.matchScore !== a.matchScore) {
- return b.matchScore - a.matchScore;
- }
- return (a.line || 0) - (b.line || 0);
- });
-}
-
-interface Classification {
- mode: string;
- confidence: number;
- reasoning: string;
-}
-
-interface ChunkLine {
- line: number;
- text: string;
- isMatch: boolean;
-}
-
-interface ExactMatch {
- file: string;
- line: number;
- column: number;
- content: string;
- endLine?: number;
- chunkLines?: ChunkLine[];
- matchScore?: number; // Token match ratio (0-1) for multi-word queries
- matchCount?: number; // Number of tokens matched
-}
-
-interface RelationshipInfo {
- type: string; // 'calls', 'imports', 'called_by', 'imported_by'
- direction: 'outgoing' | 'incoming';
- target?: string; // Target symbol name (for outgoing)
- source?: string; // Source symbol name (for incoming)
- file: string; // File path
- line?: number; // Line number
-}
-
-interface SemanticMatch {
- file: string;
- line?: number;
- column?: number;
- score: number;
- content: string;
- symbol: string | null;
- relationships?: RelationshipInfo[];
-}
-
-interface GraphMatch {
- file: string;
- symbols: unknown;
- relationships: unknown[];
-}
-
-// File match for find_files action (path-based search)
-interface FileMatch {
- path: string;
- type: 'file' | 'directory';
- name: string; // Filename only
- extension?: string; // File extension (without dot)
-}
-
-interface PaginationInfo {
- offset: number; // Starting index of returned results
- limit: number; // Number of results requested
- total: number; // Total number of results found
- has_more: boolean; // True if more results are available
-}
-
-interface SearchSuggestion {
- title: string;
- command: string;
- reason: string;
-}
-
-interface SearchMetadata {
- mode?: string;
- backend?: string;
- count?: number;
- query?: string;
- pattern?: string; // For find_files action
- classified_as?: string;
- confidence?: number;
- reasoning?: string;
- embeddings_coverage_percent?: number;
- warning?: string;
- note?: string;
- index_status?: 'indexed' | 'not_indexed' | 'partial';
- fallback?: string; // Fallback mode used (e.g., 'fuzzy')
- fallback_history?: string[];
- suggested_weights?: Record;
- // Tokenization metadata (ripgrep mode)
- tokens?: string[]; // Query tokens used for multi-word search
- tokenized?: boolean; // Whether tokenization was applied
- suggestions?: SearchSuggestion[];
- // Pagination metadata
- pagination?: PaginationInfo;
- // Performance timing data (when SMART_SEARCH_TIMING=1 or DEBUG includes 'timing')
- timing?: TimingData;
- // Init action specific
- action?: string;
- path?: string;
- progress?: {
- stage: string;
- message: string;
- percent: number;
- filesProcessed?: number;
- totalFiles?: number;
- };
- progressHistory?: ProgressInfo[];
- api_max_workers?: number;
- endpoint_count?: number;
- use_gpu?: boolean;
- reranker_enabled?: boolean;
- reranker_backend?: string;
- reranker_model?: string;
- cascade_strategy?: string;
- staged_stage2_mode?: string;
- static_graph_enabled?: boolean;
- preset?: string;
-}
-
-interface SearchResult {
- success: boolean;
- results?: ExactMatch[] | SemanticMatch[] | GraphMatch[] | FileMatch[] | AceLikeOutput | unknown;
- extra_files?: string[]; // Additional file paths without content
- output?: string;
- metadata?: SearchMetadata;
- error?: string;
- status?: unknown;
- message?: string;
-}
-
-interface AceLikeSection {
- path: string;
- line?: number;
- endLine?: number;
- column?: number;
- score?: number;
- symbol?: string | null;
- snippet: string;
- lines?: ChunkLine[];
-}
-
-interface AceLikeGroup {
- path: string;
- sections: AceLikeSection[];
- total_matches: number;
-}
-
-interface AceLikeOutput {
- format: 'ace';
- text: string;
- groups: AceLikeGroup[];
- sections: AceLikeSection[];
- total: number;
-}
-
-interface ModelInfo {
- model_profile?: string;
- model_name?: string;
- embedding_dim?: number;
- backend?: string;
- created_at?: string;
- updated_at?: string;
-}
-
-interface CodexLensConfig {
- config_file?: string;
- index_dir?: string;
- embedding_backend?: string; // 'fastembed' (local) or 'litellm' (api)
- embedding_model?: string;
- embedding_auto_embed_missing?: boolean;
- reranker_enabled?: boolean;
- reranker_backend?: string; // 'onnx' (local) or 'api'
- reranker_model?: string;
- reranker_top_k?: number;
- api_max_workers?: number;
- api_batch_size?: number;
- cascade_strategy?: string;
- staged_stage2_mode?: string;
- static_graph_enabled?: boolean;
-}
-
-interface IndexStatus {
- indexed: boolean;
- has_embeddings: boolean;
- file_count?: number;
- embeddings_coverage_percent?: number;
- total_chunks?: number;
- model_info?: ModelInfo | null;
- config?: CodexLensConfig | null;
- warning?: string;
-}
-
-function readCodexLensSettingsSnapshot(): Partial {
- const settingsPath = join(getCodexLensDataDir(), 'settings.json');
- if (!existsSync(settingsPath)) {
- return {};
- }
-
- try {
- const parsed = JSON.parse(readFileSync(settingsPath, 'utf-8')) as Record;
- const embedding = (parsed.embedding ?? {}) as Record;
- const reranker = (parsed.reranker ?? {}) as Record;
- const api = (parsed.api ?? {}) as Record;
- const cascade = (parsed.cascade ?? {}) as Record;
- const staged = (parsed.staged ?? {}) as Record;
- const indexing = (parsed.indexing ?? {}) as Record;
-
- return {
- embedding_backend: normalizeEmbeddingBackend(typeof embedding.backend === 'string' ? embedding.backend : undefined),
- embedding_model: typeof embedding.model === 'string' ? embedding.model : undefined,
- embedding_auto_embed_missing: typeof embedding.auto_embed_missing === 'boolean' ? embedding.auto_embed_missing : undefined,
- reranker_enabled: typeof reranker.enabled === 'boolean' ? reranker.enabled : undefined,
- reranker_backend: typeof reranker.backend === 'string' ? reranker.backend : undefined,
- reranker_model: typeof reranker.model === 'string' ? reranker.model : undefined,
- reranker_top_k: typeof reranker.top_k === 'number' ? reranker.top_k : undefined,
- api_max_workers: typeof api.max_workers === 'number' ? api.max_workers : undefined,
- api_batch_size: typeof api.batch_size === 'number' ? api.batch_size : undefined,
- cascade_strategy: typeof cascade.strategy === 'string' ? cascade.strategy : undefined,
- staged_stage2_mode: typeof staged.stage2_mode === 'string' ? staged.stage2_mode : undefined,
- static_graph_enabled: typeof indexing.static_graph_enabled === 'boolean' ? indexing.static_graph_enabled : undefined,
- };
- } catch {
- return {};
- }
-}
-
-/**
- * Strip ANSI color codes from string (for JSON parsing)
- */
-function stripAnsi(str: string): string {
- return str.replace(/\x1b\[[0-9;]*m/g, '');
-}
-
-/** Default maximum content length to return (avoid excessive output) */
-const DEFAULT_MAX_CONTENT_LENGTH = 200;
-const CODEX_LENS_FTS_COMPATIBILITY_PATTERNS = [
- /UsageError:\s*Got unexpected extra arguments?/i,
- /Option ['"]--method['"] does not take a value/i,
- /TyperArgument\.make_metavar\(\) takes 1 positional argument but 2 were given/i,
-];
-
-let codexLensFtsBackendBroken = false;
-const autoInitJobs = new Map();
-const autoEmbedJobs = new Map();
-
-type SmartSearchRuntimeOverrides = {
- checkSemanticStatus?: typeof checkSemanticStatus;
- getVenvPythonPath?: typeof getVenvPythonPath;
- spawnProcess?: typeof spawn;
- now?: () => number;
-};
-
-const runtimeOverrides: SmartSearchRuntimeOverrides = {};
-
-function getSemanticStatusRuntime(): typeof checkSemanticStatus {
- return runtimeOverrides.checkSemanticStatus ?? checkSemanticStatus;
-}
-
-function getVenvPythonPathRuntime(): typeof getVenvPythonPath {
- return runtimeOverrides.getVenvPythonPath ?? getVenvPythonPath;
-}
-
-function getSpawnRuntime(): typeof spawn {
- return runtimeOverrides.spawnProcess ?? spawn;
-}
-
-function getNowRuntime(): number {
- return (runtimeOverrides.now ?? Date.now)();
-}
-
-function buildSmartSearchSpawnOptions(cwd: string, overrides: SpawnOptions = {}): SpawnOptions {
- const { env, ...rest } = overrides;
- return {
- cwd,
- shell: false,
- windowsHide: true,
- env: { ...process.env, PYTHONIOENCODING: 'utf-8', ...env },
- ...rest,
- };
-}
-
-function shouldDetachBackgroundSmartSearchProcess(): boolean {
- // On Windows, detached Python children can still create a transient console
- // window even when windowsHide is set. Background warmup only needs to outlive
- // the current request, not the MCP server process.
- return process.platform !== 'win32';
-}
-
-/**
- * Truncate content to specified length with ellipsis
- * @param content - The content to truncate
- * @param maxLength - Maximum length (default: 200)
- */
-function truncateContent(content: string | null | undefined, maxLength: number = DEFAULT_MAX_CONTENT_LENGTH): string {
- if (!content) return '';
- if (content.length <= maxLength) return content;
- return content.slice(0, maxLength) + '...';
-}
-
-/**
- * Split results into full content results and extra file-only results
- * Generic function supporting both SemanticMatch and ExactMatch types
- * @param allResults - All search results (must have 'file' property)
- * @param fullContentLimit - Number of results with full content (default: 5)
- * @param extraFilesCount - Number of additional file-only results (default: 10)
- */
-function splitResultsWithExtraFiles(
- allResults: T[],
- fullContentLimit: number = 5,
- extraFilesCount: number = 10
-): { results: T[]; extra_files: string[] } {
- // First N results with full content
- const results = allResults.slice(0, fullContentLimit);
-
- // Next M results as file paths only (deduplicated)
- const extraResults = allResults.slice(fullContentLimit, fullContentLimit + extraFilesCount);
- const extra_files = [...new Set(extraResults.map(r => r.file))];
-
- return { results, extra_files };
-}
-
-interface SearchScope {
- workingDirectory: string;
- searchPaths: string[];
- targetFile?: string;
-}
-
-interface RipgrepQueryModeResolution {
- regex: boolean;
- tokenize: boolean;
- tokens: string[];
- literalFallback: boolean;
- warning?: string;
-}
-
-const GENERATED_QUERY_RE = /(? sanitizeSearchPath(item) || item);
- const fallbackPath = normalizedPath || getProjectRoot();
-
- try {
- const resolvedPath = resolve(fallbackPath);
- const stats = statSync(resolvedPath);
-
- if (stats.isFile()) {
- return {
- workingDirectory: dirname(resolvedPath),
- searchPaths: normalizedPaths.length > 0 ? normalizedPaths : [resolvedPath],
- targetFile: resolvedPath,
- };
- }
-
- return {
- workingDirectory: resolvedPath,
- searchPaths: normalizedPaths.length > 0 ? normalizedPaths : ['.'],
- };
- } catch {
- return {
- workingDirectory: fallbackPath,
- searchPaths: normalizedPaths.length > 0 ? normalizedPaths : [normalizedPath || '.'],
- };
- }
-}
-
-function normalizeResultFilePath(filePath: string, workingDirectory: string): string {
- return resolve(workingDirectory, filePath).replace(/\\/g, '/');
-}
-
-function filterResultsToTargetFile(results: T[], scope: SearchScope): T[] {
- if (!scope.targetFile) {
- return results;
- }
-
- const normalizedTarget = scope.targetFile.replace(/\\/g, '/');
- return results.filter((result) => normalizeResultFilePath(result.file, scope.workingDirectory) === normalizedTarget);
-}
-
-function parseCodexLensJsonOutput(output: string | undefined): any | null {
- const cleanOutput = stripAnsi(output || '').trim();
- if (!cleanOutput) {
- return null;
- }
-
- const candidates = [
- cleanOutput,
- ...cleanOutput.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.startsWith('{') || line.startsWith('[')),
- ];
-
- const firstBrace = cleanOutput.indexOf('{');
- const lastBrace = cleanOutput.lastIndexOf('}');
- if (firstBrace !== -1 && lastBrace > firstBrace) {
- candidates.push(cleanOutput.slice(firstBrace, lastBrace + 1));
- }
-
- const firstBracket = cleanOutput.indexOf('[');
- const lastBracket = cleanOutput.lastIndexOf(']');
- if (firstBracket !== -1 && lastBracket > firstBracket) {
- candidates.push(cleanOutput.slice(firstBracket, lastBracket + 1));
- }
-
- for (const candidate of candidates) {
- try {
- return JSON.parse(candidate);
- } catch {
- continue;
- }
- }
-
- return null;
-}
-
-function isValidRegexPattern(pattern: string): boolean {
- try {
- new RegExp(pattern);
- return true;
- } catch {
- return false;
- }
-}
-
-function resolveRipgrepQueryMode(query: string, regex: boolean = true, tokenize: boolean = true): RipgrepQueryModeResolution {
- const tokens = tokenize ? tokenizeQuery(query) : [query];
-
- if (!regex) {
- return {
- regex: false,
- tokenize,
- tokens,
- literalFallback: false,
- };
- }
-
- const invalidTokens = tokens.filter((token) => token.length > 0 && !isValidRegexPattern(token));
- if (invalidTokens.length === 0) {
- return {
- regex: true,
- tokenize,
- tokens,
- literalFallback: false,
- };
- }
-
- const preview = truncateContent(invalidTokens[0], 40);
- return {
- regex: false,
- tokenize,
- tokens,
- literalFallback: true,
- warning: invalidTokens.length === 1
- ? `Query token "${preview}" is not a valid regular expression. Falling back to literal ripgrep matching.`
- : 'Query contains invalid regular expression tokens. Falling back to literal ripgrep matching.',
- };
-}
-
-function isCodexLensCliCompatibilityError(error: string | undefined): boolean {
- if (!error) {
- return false;
- }
-
- const cleanError = stripAnsi(error);
- return CODEX_LENS_FTS_COMPATIBILITY_PATTERNS.some((pattern) => pattern.test(cleanError));
-}
-
-function noteCodexLensFtsCompatibility(error: string | undefined): boolean {
- if (!isCodexLensCliCompatibilityError(error)) {
- return false;
- }
-
- codexLensFtsBackendBroken = true;
- return true;
-}
-
-function shouldSurfaceCodexLensFtsCompatibilityWarning(options: {
- compatibilityTriggeredThisQuery: boolean;
- skipExactDueToCompatibility: boolean;
- ripgrepResultCount: number;
-}): boolean {
- if (options.ripgrepResultCount > 0) {
- return false;
- }
-
- return options.compatibilityTriggeredThisQuery || options.skipExactDueToCompatibility;
-}
-
-function summarizeBackendError(error: string | undefined): string {
- const cleanError = stripAnsi(error || '').trim();
- if (!cleanError) {
- return 'unknown error';
- }
-
- if (isCodexLensCliCompatibilityError(cleanError)) {
- return 'CodexLens exact search CLI is incompatible with the current Typer/Click runtime';
- }
-
- const regexSummary = cleanError.match(/error:\s*([^\r\n]+)/i);
- if (/regex parse error/i.test(cleanError) && regexSummary?.[1]) {
- return `invalid regular expression (${regexSummary[1].trim()})`;
- }
-
- const usageSummary = cleanError.match(/UsageError:\s*([^\r\n]+)/i);
- if (usageSummary?.[1]) {
- return usageSummary[1].trim();
- }
-
- const firstMeaningfulLine = cleanError
- .split(/\r?\n/)
- .map((line) => line.trim())
- .find((line) => line && !line.startsWith('│') && !line.startsWith('┌') && !line.startsWith('└'));
-
- return truncateContent(firstMeaningfulLine || cleanError, 180);
-}
-
-function mapCodexLensSemanticMatches(data: any[], scope: SearchScope, maxContentLength: number): SemanticMatch[] {
- return filterResultsToTargetFile(data.map((item: any) => {
- const rawScore = item.score || 0;
- const similarityScore = rawScore > 0 ? 1 / (1 + rawScore) : 1;
- return {
- file: item.path || item.file,
- line: typeof item.line === 'number' ? item.line : undefined,
- column: typeof item.column === 'number' ? item.column : undefined,
- score: similarityScore,
- content: truncateContent(item.content || item.excerpt, maxContentLength),
- symbol: item.symbol || null,
- };
- }), scope);
-}
-
-function parsePlainTextFileMatches(output: string | undefined, scope: SearchScope): SemanticMatch[] {
- const lines = stripAnsi(output || '')
- .split(/\r?\n/)
- .map((line) => line.trim())
- .filter(Boolean);
-
- const fileLines = lines.filter((line) => {
- if (line.includes('RuntimeWarning:') || line.startsWith('warn(') || line.startsWith('Warning:')) {
- return false;
- }
-
- const resolvedPath = /^[a-zA-Z]:[\\/]|^\//.test(line)
- ? line
- : resolve(scope.workingDirectory, line);
-
- try {
- return statSync(resolvedPath).isFile();
- } catch {
- return false;
- }
- });
-
- return filterResultsToTargetFile(
- [...new Set(fileLines)].map((file, index) => ({
- file,
- score: Math.max(0.1, 1 - index * 0.05),
- content: '',
- symbol: null,
- })),
- scope,
- );
-}
-
-function hasCentralizedVectorArtifacts(indexRoot: unknown): boolean {
- if (typeof indexRoot !== 'string' || !indexRoot.trim()) {
- return false;
- }
-
- const resolvedRoot = resolve(indexRoot);
- return [
- join(resolvedRoot, '_vectors.hnsw'),
- join(resolvedRoot, '_vectors_meta.db'),
- join(resolvedRoot, '_binary_vectors.mmap'),
- ].every((artifactPath) => existsSync(artifactPath));
-}
-
-function asObjectRecord(value: unknown): Record | undefined {
- if (!value || typeof value !== 'object' || Array.isArray(value)) {
- return undefined;
- }
- return value as Record;
-}
-
-function asFiniteNumber(value: unknown): number | undefined {
- if (typeof value !== 'number' || !Number.isFinite(value)) {
- return undefined;
- }
- return value;
-}
-
-function asBoolean(value: unknown): boolean | undefined {
- return typeof value === 'boolean' ? value : undefined;
-}
-
-function extractEmbeddingsStatusSummary(embeddingsData: unknown): {
- coveragePercent: number;
- totalChunks: number;
- hasEmbeddings: boolean;
-} {
- const embeddings = asObjectRecord(embeddingsData) ?? {};
- const root = asObjectRecord(embeddings.root) ?? embeddings;
- const centralized = asObjectRecord(embeddings.centralized);
-
- const totalIndexes = asFiniteNumber(root.total_indexes)
- ?? asFiniteNumber(embeddings.total_indexes)
- ?? 0;
- const indexesWithEmbeddings = asFiniteNumber(root.indexes_with_embeddings)
- ?? asFiniteNumber(embeddings.indexes_with_embeddings)
- ?? 0;
- const totalChunks = asFiniteNumber(root.total_chunks)
- ?? asFiniteNumber(embeddings.total_chunks)
- ?? 0;
- const coveragePercent = asFiniteNumber(root.coverage_percent)
- ?? asFiniteNumber(embeddings.coverage_percent)
- ?? (totalIndexes > 0 ? (indexesWithEmbeddings / totalIndexes) * 100 : 0);
- const hasEmbeddings = asBoolean(root.has_embeddings)
- ?? asBoolean(centralized?.usable)
- ?? (totalChunks > 0 || indexesWithEmbeddings > 0 || coveragePercent > 0);
-
- return {
- coveragePercent,
- totalChunks,
- hasEmbeddings,
- };
-}
-
-function selectEmbeddingsStatusPayload(statusData: unknown): Record {
- const status = asObjectRecord(statusData) ?? {};
- return asObjectRecord(status.embeddings_status) ?? asObjectRecord(status.embeddings) ?? {};
-}
-
-function collectBackendError(
- errors: string[],
- backendName: string,
- backendResult: PromiseSettledResult,
-): void {
- if (backendResult.status === 'rejected') {
- errors.push(`${backendName}: ${summarizeBackendError(String(backendResult.reason))}`);
- return;
- }
-
- if (!backendResult.value.success) {
- errors.push(`${backendName}: ${summarizeBackendError(backendResult.value.error)}`);
- }
-}
-
-function mergeWarnings(...warnings: Array): string | undefined {
- const merged = [...new Set(
- warnings
- .filter((warning): warning is string => typeof warning === 'string' && warning.trim().length > 0)
- .map((warning) => warning.trim())
- )];
- return merged.length > 0 ? merged.join(' | ') : undefined;
-}
-
-function mergeNotes(...notes: Array): string | undefined {
- const merged = [...new Set(
- notes
- .filter((note): note is string => typeof note === 'string' && note.trim().length > 0)
- .map((note) => note.trim())
- )];
- return merged.length > 0 ? merged.join(' | ') : undefined;
-}
-
-function mergeSuggestions(...groups: Array): SearchSuggestion[] | undefined {
- const merged = new Map();
- for (const group of groups) {
- for (const suggestion of group ?? []) {
- if (!merged.has(suggestion.command)) {
- merged.set(suggestion.command, suggestion);
- }
- }
- }
-
- return merged.size > 0 ? [...merged.values()] : undefined;
-}
-
-function formatSmartSearchCommand(action: string, pathValue: string, extraParams: Record = {}): string {
- const normalizedPath = pathValue.replace(/\\/g, '/');
- const args = [`action=${JSON.stringify(action)}`, `path=${JSON.stringify(normalizedPath)}`];
-
- for (const [key, value] of Object.entries(extraParams)) {
- if (value === undefined) {
- continue;
- }
- args.push(`${key}=${JSON.stringify(value)}`);
- }
-
- return `smart_search(${args.join(', ')})`;
-}
-
-function parseOptionalBooleanEnv(raw: string | undefined): boolean | undefined {
- const normalized = raw?.trim().toLowerCase();
- if (!normalized) {
- return undefined;
- }
-
- if (['1', 'true', 'on', 'yes'].includes(normalized)) {
- return true;
- }
-
- if (['0', 'false', 'off', 'no'].includes(normalized)) {
- return false;
- }
-
- return undefined;
-}
-
-function isAutoEmbedMissingEnabled(config: CodexLensConfig | null | undefined): boolean {
- const envOverride = parseOptionalBooleanEnv(process.env.CODEXLENS_AUTO_EMBED_MISSING);
- if (envOverride !== undefined) {
- return envOverride;
- }
-
- if (process.platform === 'win32') {
- return false;
- }
-
- if (typeof config?.embedding_auto_embed_missing === 'boolean') {
- return config.embedding_auto_embed_missing;
- }
-
- return true;
-}
-
-function isAutoInitMissingEnabled(): boolean {
- const envOverride = parseOptionalBooleanEnv(process.env.CODEXLENS_AUTO_INIT_MISSING);
- if (envOverride !== undefined) {
- return envOverride;
- }
-
- return process.platform !== 'win32';
-}
-
-function getAutoEmbedMissingDisabledReason(config: CodexLensConfig | null | undefined): string {
- const envOverride = parseOptionalBooleanEnv(process.env.CODEXLENS_AUTO_EMBED_MISSING);
- if (envOverride === false) {
- return 'Automatic embedding warmup is disabled by CODEXLENS_AUTO_EMBED_MISSING=false.';
- }
-
- if (config?.embedding_auto_embed_missing === false) {
- return 'Automatic embedding warmup is disabled by embedding.auto_embed_missing=false.';
- }
-
- if (process.platform === 'win32') {
- return 'Automatic embedding warmup is disabled by default on Windows even if CodexLens config resolves auto_embed_missing=true. Set CODEXLENS_AUTO_EMBED_MISSING=true to opt in.';
- }
-
- return 'Automatic embedding warmup is disabled.';
-}
-
-function getAutoInitMissingDisabledReason(): string {
- const envOverride = parseOptionalBooleanEnv(process.env.CODEXLENS_AUTO_INIT_MISSING);
- if (envOverride === false) {
- return 'Automatic static index warmup is disabled by CODEXLENS_AUTO_INIT_MISSING=false.';
- }
-
- if (process.platform === 'win32') {
- return 'Automatic static index warmup is disabled by default on Windows. Set CODEXLENS_AUTO_INIT_MISSING=true to opt in.';
- }
-
- return 'Automatic static index warmup is disabled.';
-}
-
-function buildIndexSuggestions(indexStatus: IndexStatus, scope: SearchScope): SearchSuggestion[] | undefined {
- const suggestions: SearchSuggestion[] = [];
-
- if (!indexStatus.indexed) {
- suggestions.push({
- title: 'Initialize index',
- command: formatSmartSearchCommand('init', scope.workingDirectory),
- reason: 'No CodexLens index exists for this path yet.',
- });
- suggestions.push({
- title: 'Check index status',
- command: formatSmartSearchCommand('status', scope.workingDirectory),
- reason: 'Verify whether the target path is mapped to the expected CodexLens project root.',
- });
- return suggestions;
- }
-
- if (!indexStatus.has_embeddings) {
- suggestions.push({
- title: 'Generate embeddings',
- command: formatSmartSearchCommand('embed', scope.workingDirectory),
- reason: 'The index exists, but semantic/vector retrieval is unavailable until embeddings are generated.',
- });
- } else if ((indexStatus.embeddings_coverage_percent ?? 0) < 50) {
- suggestions.push({
- title: 'Rebuild embeddings',
- command: formatSmartSearchCommand('embed', scope.workingDirectory, { force: true }),
- reason: `Embedding coverage is only ${(indexStatus.embeddings_coverage_percent ?? 0).toFixed(1)}%, so semantic search quality is degraded.`,
- });
- }
-
- if (indexStatus.warning?.includes('Failed to parse index status')) {
- suggestions.push({
- title: 'Re-check status',
- command: formatSmartSearchCommand('status', scope.workingDirectory),
- reason: 'The index health payload could not be parsed cleanly.',
- });
- }
-
- return suggestions.length > 0 ? suggestions : undefined;
-}
-
-/**
- * Check if CodexLens index exists for current directory
- * @param path - Directory path to check
- * @returns Index status
- */
-async function checkIndexStatus(path: string = '.'): Promise {
- const scope = resolveSearchScope(path);
- try {
- // Fetch both status and config in parallel
- const [statusResult, configResult] = await Promise.all([
- executeCodexLens(['index', 'status', scope.workingDirectory], { cwd: scope.workingDirectory }),
- executeCodexLens(['config', '--json'], { cwd: scope.workingDirectory }),
- ]);
-
- // Parse config
- const settingsConfig = readCodexLensSettingsSnapshot();
- let config: CodexLensConfig | null = Object.keys(settingsConfig).length > 0 ? { ...settingsConfig } : null;
- if (configResult.success && configResult.output) {
- try {
- const cleanConfigOutput = stripAnsi(configResult.output);
- const parsedConfig = JSON.parse(cleanConfigOutput);
- const configData = parsedConfig.result || parsedConfig;
- config = {
- ...settingsConfig,
- config_file: configData.config_file,
- index_dir: configData.index_dir,
- embedding_backend: normalizeEmbeddingBackend(configData.embedding_backend) ?? settingsConfig.embedding_backend,
- embedding_model: typeof configData.embedding_model === 'string' ? configData.embedding_model : settingsConfig.embedding_model,
- embedding_auto_embed_missing: typeof configData.embedding_auto_embed_missing === 'boolean'
- ? configData.embedding_auto_embed_missing
- : settingsConfig.embedding_auto_embed_missing,
- reranker_enabled: typeof configData.reranker_enabled === 'boolean' ? configData.reranker_enabled : settingsConfig.reranker_enabled,
- reranker_backend: typeof configData.reranker_backend === 'string' ? configData.reranker_backend : settingsConfig.reranker_backend,
- reranker_model: typeof configData.reranker_model === 'string' ? configData.reranker_model : settingsConfig.reranker_model,
- reranker_top_k: typeof configData.reranker_top_k === 'number' ? configData.reranker_top_k : settingsConfig.reranker_top_k,
- };
- } catch {
- // Config parse failed, continue without it
- }
- }
-
- if (!statusResult.success) {
- return {
- indexed: false,
- has_embeddings: false,
- config,
- warning: 'No CodexLens index found. Run smart_search(action="init") to create index for better search results.',
- };
- }
-
- // Parse status output
- try {
- // Strip ANSI color codes from JSON output
- const cleanOutput = stripAnsi(statusResult.output || '{}');
- const parsed = JSON.parse(cleanOutput);
- // Handle both direct and nested response formats (status returns {success, result: {...}})
- const status = parsed.result || parsed;
-
- // Get embeddings coverage from comprehensive status
- const embeddingsData = selectEmbeddingsStatusPayload(status);
- const legacyEmbeddingsData = asObjectRecord(status.embeddings) ?? {};
- const embeddingsSummary = extractEmbeddingsStatusSummary(embeddingsData);
- const totalIndexes = Number(legacyEmbeddingsData.total_indexes || asObjectRecord(embeddingsData)?.total_indexes || 0);
- const embeddingsCoverage = embeddingsSummary.coveragePercent;
- const totalChunks = embeddingsSummary.totalChunks;
- const indexed = Boolean(status.projects_count > 0 || status.total_files > 0 || status.index_root || totalIndexes > 0 || totalChunks > 0);
- const has_embeddings = embeddingsSummary.hasEmbeddings;
-
- // Extract model info if available
- const modelInfoData = asObjectRecord(embeddingsData.model_info);
- const modelInfo: ModelInfo | undefined = modelInfoData ? {
- model_profile: typeof modelInfoData.model_profile === 'string' ? modelInfoData.model_profile : undefined,
- model_name: typeof modelInfoData.model_name === 'string' ? modelInfoData.model_name : undefined,
- embedding_dim: typeof modelInfoData.embedding_dim === 'number' ? modelInfoData.embedding_dim : undefined,
- backend: typeof modelInfoData.backend === 'string' ? modelInfoData.backend : undefined,
- created_at: typeof modelInfoData.created_at === 'string' ? modelInfoData.created_at : undefined,
- updated_at: typeof modelInfoData.updated_at === 'string' ? modelInfoData.updated_at : undefined,
- } : undefined;
-
- let warning: string | undefined;
- if (!indexed) {
- warning = 'No CodexLens index found. Run smart_search(action="init") to create index for better search results.';
- } else if (embeddingsCoverage === 0) {
- warning = 'Index exists but no embeddings generated. Run smart_search(action="embed") to build the vector index.';
- } else if (embeddingsCoverage < 50) {
- warning = `Embeddings coverage is ${embeddingsCoverage.toFixed(1)}% (below 50%). Hybrid search will degrade. Run smart_search(action="embed") to improve vector coverage.`;
- }
-
- return {
- indexed,
- has_embeddings,
- file_count: status.total_files,
- embeddings_coverage_percent: embeddingsCoverage,
- total_chunks: totalChunks,
- // Ensure model_info is null instead of undefined so it's included in JSON
- model_info: modelInfo ?? null,
- config,
- warning,
- };
- } catch {
- return {
- indexed: false,
- has_embeddings: false,
- config,
- warning: 'Failed to parse index status',
- };
- }
- } catch {
- return {
- indexed: false,
- has_embeddings: false,
- warning: 'CodexLens not available',
- };
- }
-}
-
-/**
- * Detection heuristics for intent classification
- */
-
-/**
- * Detect literal string query (simple alphanumeric or quoted strings)
- */
-function detectLiteral(query: string): boolean {
- return /^[a-zA-Z0-9_-]+$/.test(query) || /^["'].*["']$/.test(query);
-}
-
-/**
- * Detect regex pattern (contains regex metacharacters)
- */
-function detectRegex(query: string): boolean {
- return /[.*+?^${}()|[\]\\]/.test(query);
-}
-
-/**
- * Detect natural language query (sentence structure, questions, multi-word phrases)
- */
-function detectNaturalLanguage(query: string): boolean {
- return query.split(/\s+/).length >= 3 || /\?$/.test(query);
-}
-
-/**
- * Detect file path query (path separators, file extensions)
- */
-function detectFilePath(query: string): boolean {
- return /[/\\]/.test(query) || /\.[a-z]{2,4}$/i.test(query);
-}
-
-/**
- * Detect relationship query (import, export, dependency keywords)
- */
-function detectRelationship(query: string): boolean {
- return /(import|export|uses?|depends?|calls?|extends?)\s/i.test(query);
-}
-
-function looksLikeCodeQuery(query: string): boolean {
- if (/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(query)) return true;
- if (/[:.<>\-=(){}[\]]/.test(query) && query.split(/\s+/).length <= 2) return true;
- if (/\.\*|\\\(|\\\[|\\s/.test(query)) return true;
- if (/^[a-zA-Z_][a-zA-Z0-9_]*\.[a-zA-Z_][a-zA-Z0-9_]*$/.test(query)) return true;
- return false;
-}
-
-function queryTargetsGeneratedFiles(query: string): boolean {
- return GENERATED_QUERY_RE.test(query.trim());
-}
-
-function prefersLexicalPriorityQuery(query: string): boolean {
- const trimmed = query.trim();
- if (!trimmed) return false;
- if (ENV_STYLE_QUERY_RE.test(trimmed)) return true;
-
- const tokens = new Set((trimmed.match(TOPIC_TOKEN_RE) ?? []).map((token) => token.toLowerCase()));
- if (tokens.size === 0) return false;
- if (tokens.has('factory') || tokens.has('factories')) return true;
- if ((tokens.has('environment') || tokens.has('env')) && (tokens.has('variable') || tokens.has('variables'))) {
- return true;
- }
- if (
- tokens.has('backend') &&
- ['embedding', 'embeddings', 'reranker', 'rerankers', 'onnx', 'api', 'litellm', 'fastembed', 'local', 'legacy']
- .some((token) => tokens.has(token))
- ) {
- return true;
- }
-
- let surfaceHit = false;
- let focusHit = false;
- for (const token of tokens) {
- if (LEXICAL_PRIORITY_SURFACE_TOKENS.has(token)) surfaceHit = true;
- if (LEXICAL_PRIORITY_FOCUS_TOKENS.has(token)) focusHit = true;
- if (surfaceHit && focusHit) return true;
- }
- return false;
-}
-
-/**
- * Classify query intent and recommend search mode
- * Simple mapping: hybrid (NL + index + embeddings) | exact (index or insufficient embeddings) | ripgrep (no index)
- * @param query - Search query string
- * @param hasIndex - Whether CodexLens index exists
- * @param hasSufficientEmbeddings - Whether embeddings coverage >= 50%
- * @returns Classification result
- */
-function classifyIntent(query: string, hasIndex: boolean = false, hasSufficientEmbeddings: boolean = false): Classification {
- const isNaturalLanguage = detectNaturalLanguage(query);
- const isCodeQuery = looksLikeCodeQuery(query);
- const isRegexPattern = detectRegex(query);
- const targetsGeneratedFiles = queryTargetsGeneratedFiles(query);
- const prefersLexicalPriority = prefersLexicalPriorityQuery(query);
-
- let mode: string;
- let confidence: number;
-
- if (!hasIndex) {
- mode = 'ripgrep';
- confidence = 1.0;
- } else if (targetsGeneratedFiles || prefersLexicalPriority || isCodeQuery || isRegexPattern) {
- mode = 'exact';
- confidence = targetsGeneratedFiles ? 0.97 : prefersLexicalPriority ? 0.93 : 0.95;
- } else if (isNaturalLanguage && hasSufficientEmbeddings) {
- mode = 'hybrid';
- confidence = 0.9;
- } else {
- mode = 'exact';
- confidence = 0.8;
- }
-
- const detectedPatterns: string[] = [];
- if (detectLiteral(query)) detectedPatterns.push('literal');
- if (detectRegex(query)) detectedPatterns.push('regex');
- if (detectNaturalLanguage(query)) detectedPatterns.push('natural language');
- if (detectFilePath(query)) detectedPatterns.push('file path');
- if (detectRelationship(query)) detectedPatterns.push('relationship');
- if (targetsGeneratedFiles) detectedPatterns.push('generated artifact');
- if (prefersLexicalPriority) detectedPatterns.push('lexical priority');
- if (isCodeQuery) detectedPatterns.push('code identifier');
-
- const reasoning = `Query classified as ${mode} (confidence: ${confidence.toFixed(2)}, detected: ${detectedPatterns.join(', ')}, index: ${hasIndex ? 'available' : 'not available'}, embeddings: ${hasSufficientEmbeddings ? 'sufficient' : 'insufficient'})`;
-
- return { mode, confidence, reasoning };
-}
-
-/**
- * Check if a tool is available in PATH
- * @param toolName - Tool executable name
- * @returns True if available
- */
-function checkToolAvailability(
- toolName: string,
- lookupRuntime: typeof spawnSync = spawnSync,
-): boolean {
- try {
- const isWindows = process.platform === 'win32';
- const command = isWindows ? 'where' : 'which';
- const result = lookupRuntime(command, [toolName], {
- shell: false,
- windowsHide: true,
- stdio: 'ignore',
- timeout: EXEC_TIMEOUTS.SYSTEM_INFO,
- env: { ...process.env, PYTHONIOENCODING: 'utf-8' },
- });
- return !result.error && result.status === 0;
- } catch {
- return false;
- }
-}
-
-/**
- * Build ripgrep command arguments
- * Supports tokenized multi-word queries with OR matching
- * @param params - Search parameters
- * @returns Command, arguments, and tokens used
- */
-function buildRipgrepCommand(params: {
- query: string;
- paths: string[];
- contextLines: number;
- maxResults: number;
- includeHidden: boolean;
- regex?: boolean;
- caseSensitive?: boolean;
- tokenize?: boolean;
-}): { command: string; args: string[]; tokens: string[]; warning?: string; literalFallback: boolean; regex: boolean } {
- const { query, paths = ['.'], contextLines = 0, maxResults = 10, includeHidden = false, regex = false, caseSensitive = true, tokenize = true } = params;
- const queryMode = resolveRipgrepQueryMode(query, regex, tokenize);
-
- const args = [
- '-n',
- '--color=never',
- '--json',
- ];
-
- // Add file filtering (unless includeHidden is true)
- if (!includeHidden) {
- args.push(...buildExcludeArgs());
- }
-
- // Case sensitivity
- if (!caseSensitive) {
- args.push('--ignore-case');
- }
-
- if (contextLines > 0) {
- args.push('-C', contextLines.toString());
- }
-
- if (maxResults > 0) {
- args.push('--max-count', maxResults.toString());
- }
-
- if (includeHidden) {
- args.push('--hidden');
- }
-
- const { tokens } = queryMode;
-
- if (tokens.length > 1) {
- // Multi-token: use multiple -e patterns (OR matching)
- // Each token is escaped for regex safety unless regex mode is enabled
- for (const token of tokens) {
- if (queryMode.regex) {
- args.push('-e', token);
- } else {
- // Escape regex special chars for literal matching
- const escaped = token.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
- args.push('-e', escaped);
- }
- }
- } else {
- // Single token or no tokenization: use original behavior
- if (queryMode.regex) {
- args.push('-e', query);
- } else {
- args.push('-F', query);
- }
- }
-
- args.push(...paths);
-
- return {
- command: 'rg',
- args,
- tokens,
- warning: queryMode.warning,
- literalFallback: queryMode.literalFallback,
- regex: queryMode.regex,
- };
-}
-
-interface RipgrepChunkAccumulator {
- file: string;
- chunkLines: ChunkLine[];
- firstMatchLine?: number;
- firstMatchColumn?: number;
- lastLine?: number;
- matchCount: number;
-}
-
-function finalizeRipgrepChunk(accumulator: RipgrepChunkAccumulator | undefined): ExactMatch | null {
- if (!accumulator || accumulator.matchCount === 0 || accumulator.chunkLines.length === 0) {
- return null;
- }
-
- const firstLine = accumulator.chunkLines[0]?.line ?? accumulator.firstMatchLine ?? 1;
- const lastLine = accumulator.chunkLines[accumulator.chunkLines.length - 1]?.line ?? accumulator.firstMatchLine ?? firstLine;
-
- return {
- file: accumulator.file,
- line: accumulator.firstMatchLine ?? firstLine,
- endLine: lastLine,
- column: accumulator.firstMatchColumn ?? 1,
- content: accumulator.chunkLines.map((line) => line.text).join('\n').trim(),
- chunkLines: [...accumulator.chunkLines],
- };
-}
-
-function parseRipgrepJsonResults(stdout: string, effectiveLimit: number): { results: ExactMatch[]; resultLimitReached: boolean } {
- const allResults: ExactMatch[] = [];
- const activeChunks = new Map();
- const lines = stdout.split('\n').filter((line) => line.trim());
- let resultLimitReached = false;
-
- const flushChunk = (file: string) => {
- const finalized = finalizeRipgrepChunk(activeChunks.get(file));
- activeChunks.delete(file);
- if (!finalized) {
- return;
- }
- allResults.push(finalized);
- if (allResults.length >= effectiveLimit) {
- resultLimitReached = true;
- }
- };
-
- for (const line of lines) {
- if (resultLimitReached) {
- break;
- }
-
- try {
- const item = JSON.parse(line);
- if (item.type !== 'match' && item.type !== 'context' && item.type !== 'end') {
- continue;
- }
-
- const file = item.data?.path?.text as string | undefined;
- if (!file) {
- continue;
- }
-
- if (item.type === 'end') {
- flushChunk(file);
- continue;
- }
-
- const lineNumber = typeof item.data?.line_number === 'number' ? item.data.line_number : undefined;
- const rawText = typeof item.data?.lines?.text === 'string'
- ? item.data.lines.text.replace(/\r?\n$/, '')
- : '';
-
- if (lineNumber === undefined) {
- continue;
- }
-
- let current = activeChunks.get(file);
- const isContiguous = current && current.lastLine !== undefined && lineNumber <= current.lastLine + 1;
- if (!current || !isContiguous) {
- if (current) {
- flushChunk(file);
- if (resultLimitReached) {
- break;
- }
- }
- current = {
- file,
- chunkLines: [],
- matchCount: 0,
- };
- activeChunks.set(file, current);
- }
-
- const previousLine = current.chunkLines[current.chunkLines.length - 1];
- const duplicateLine = previousLine && previousLine.line === lineNumber && previousLine.text === rawText;
- if (!duplicateLine) {
- current.chunkLines.push({
- line: lineNumber,
- text: rawText,
- isMatch: item.type === 'match',
- });
- } else if (item.type === 'match') {
- previousLine.isMatch = true;
- }
-
- if (item.type === 'match') {
- current.matchCount += 1;
- if (current.firstMatchLine === undefined) {
- current.firstMatchLine = lineNumber;
- current.firstMatchColumn =
- item.data.submatches && item.data.submatches[0]
- ? item.data.submatches[0].start + 1
- : 1;
- }
- }
- current.lastLine = lineNumber;
- } catch {
- continue;
- }
- }
-
- if (!resultLimitReached) {
- for (const file of [...activeChunks.keys()]) {
- flushChunk(file);
- if (resultLimitReached) {
- break;
- }
- }
- }
-
- return { results: allResults.slice(0, effectiveLimit), resultLimitReached };
-}
-
-function normalizeEmbeddingBackend(backend?: string): string | undefined {
- if (!backend) {
- return undefined;
- }
-
- const normalized = backend.trim().toLowerCase();
- if (!normalized) {
- return undefined;
- }
- if (normalized === 'api') {
- return 'litellm';
- }
- if (normalized === 'local') {
- return 'fastembed';
- }
- return normalized;
-}
-
-function buildIndexInitArgs(projectPath: string, options: { force?: boolean; languages?: string[]; noEmbeddings?: boolean } = {}): string[] {
- const { force = false, languages, noEmbeddings = true } = options;
- const args = ['index', 'init', projectPath];
-
- if (noEmbeddings) {
- args.push('--no-embeddings');
- }
- if (force) {
- args.push('--force');
- }
- if (languages && languages.length > 0) {
- args.push(...languages.flatMap((language) => ['--language', language]));
- }
-
- return args;
-}
-
-function resolveEmbeddingSelection(
- requestedBackend: string | undefined,
- requestedModel: string | undefined,
- config: CodexLensConfig | null | undefined,
-): { backend?: string; model?: string; preset: 'explicit' | 'config' | 'bulk-local-fast'; note?: string } {
- const normalizedRequestedBackend = normalizeEmbeddingBackend(requestedBackend);
- const normalizedRequestedModel = requestedModel?.trim() || undefined;
-
- if (normalizedRequestedBackend) {
- return {
- backend: normalizedRequestedBackend,
- model: normalizedRequestedModel || config?.embedding_model,
- preset: 'explicit',
- };
- }
-
- if (normalizedRequestedModel) {
- const inferredBackend = config?.embedding_backend
- || (['fast', 'code'].includes(normalizedRequestedModel) ? 'fastembed' : undefined);
- return {
- backend: inferredBackend,
- model: normalizedRequestedModel,
- preset: inferredBackend ? 'config' : 'explicit',
- };
- }
-
- return {
- backend: 'fastembed',
- model: 'fast',
- preset: 'bulk-local-fast',
- note: config?.embedding_backend && config.embedding_backend !== 'fastembed'
- ? `Using recommended bulk indexing preset: local-fast instead of configured ${config.embedding_backend}. Pass embeddingBackend="api" to force remote API embeddings.`
- : 'Using recommended bulk indexing preset: local-fast. Pass embeddingBackend="api" to force remote API embeddings.',
- };
-}
-
-const EMBED_PROGRESS_PREFIX = '__CCW_EMBED_PROGRESS__';
-
-function resolveEmbeddingEndpoints(backend?: string): RotationEndpointConfig[] {
- if (backend !== 'litellm') {
- return [];
- }
-
- try {
- return generateRotationEndpoints(getProjectRoot()).filter((endpoint) => {
- const apiKey = endpoint.api_key?.trim() ?? '';
- return Boolean(
- apiKey &&
- apiKey.length > 8 &&
- !/^\*+$/.test(apiKey) &&
- endpoint.api_base?.trim() &&
- endpoint.model?.trim()
- );
- });
- } catch {
- return [];
- }
-}
-
-function resolveApiWorkerCount(
- requestedWorkers: number | undefined,
- backend: string | undefined,
- endpoints: RotationEndpointConfig[]
-): number | undefined {
- if (backend !== 'litellm') {
- return undefined;
- }
-
- if (typeof requestedWorkers === 'number' && Number.isFinite(requestedWorkers)) {
- return Math.max(1, Math.floor(requestedWorkers));
- }
-
- if (endpoints.length <= 1) {
- return 4;
- }
-
- return Math.min(16, Math.max(4, endpoints.length * 2));
-}
-
-function extractEmbedJsonLine(stdout: string): string | undefined {
- const lines = stdout
- .split(/\r?\n/)
- .map((line) => line.trim())
- .filter(Boolean)
- .filter((line) => !line.startsWith(EMBED_PROGRESS_PREFIX));
-
- return [...lines].reverse().find((line) => line.startsWith('{') && line.endsWith('}'));
-}
-
-function buildEmbeddingPythonCode(params: {
- projectPath: string;
- backend?: string;
- model?: string;
- force: boolean;
- maxWorkers?: number;
- endpoints?: RotationEndpointConfig[];
-}): string {
- const { projectPath, backend, model, force, maxWorkers, endpoints = [] } = params;
- return `
-import json
-import sys
-from pathlib import Path
-from codexlens.storage.path_mapper import PathMapper
-from codexlens.storage.registry import RegistryStore
-from codexlens.cli.embedding_manager import generate_dense_embeddings_centralized
-
-target_path = Path(r"__PROJECT_PATH__").expanduser().resolve()
-backend = __BACKEND__
-model = __MODEL__
-force = __FORCE__
-max_workers = __MAX_WORKERS__
-endpoints = json.loads(r'''__ENDPOINTS_JSON__''')
-
-def progress_update(message: str):
- print("__CCW_EMBED_PROGRESS__" + str(message), flush=True)
-
-registry = RegistryStore()
-registry.initialize()
-try:
- project = registry.get_project(target_path)
- index_root = None
- if project is not None:
- index_root = Path(project.index_root)
- else:
- mapper = PathMapper()
- index_db = mapper.source_to_index_db(target_path)
- if index_db.exists():
- index_root = index_db.parent
- else:
- nearest = registry.find_nearest_index(target_path)
- if nearest is not None:
- index_root = Path(nearest.index_path).parent
-
- if index_root is None:
- print(json.dumps({"success": False, "error": f"No index found for: {target_path}"}), flush=True)
- sys.exit(1)
-
- result = generate_dense_embeddings_centralized(
- index_root,
- embedding_backend=backend,
- model_profile=model,
- force=force,
- use_gpu=True,
- max_workers=max_workers,
- endpoints=endpoints if endpoints else None,
- progress_callback=progress_update,
- )
-
- print(json.dumps(result), flush=True)
- if not result.get("success"):
- sys.exit(1)
-finally:
- registry.close()
-`
- .replace('__PROJECT_PATH__', projectPath.replace(/\\/g, '\\\\'))
- .replace('__BACKEND__', backend ? JSON.stringify(backend) : 'None')
- .replace('__MODEL__', model ? JSON.stringify(model) : 'None')
- .replace('__FORCE__', force ? 'True' : 'False')
- .replace('__MAX_WORKERS__', typeof maxWorkers === 'number' ? String(Math.max(1, Math.floor(maxWorkers))) : 'None')
- .replace('__ENDPOINTS_JSON__', JSON.stringify(endpoints).replace(/\\/g, '\\\\').replace(/'''/g, "\\'\\'\\'"));
-}
-
-function spawnBackgroundEmbeddingsViaPython(params: {
- projectPath: string;
- backend?: string;
- model?: string;
- force: boolean;
- maxWorkers?: number;
- endpoints?: RotationEndpointConfig[];
-}): { success: boolean; error?: string } {
- const { projectPath, backend, model } = params;
- try {
- const child = getSpawnRuntime()(
- getVenvPythonPathRuntime()(),
- ['-c', buildEmbeddingPythonCode(params)],
- buildSmartSearchSpawnOptions(projectPath, {
- detached: shouldDetachBackgroundSmartSearchProcess(),
- stdio: 'ignore',
- }),
- );
-
- autoEmbedJobs.set(projectPath, {
- startedAt: getNowRuntime(),
- backend,
- model,
- });
-
- const cleanup = () => {
- autoEmbedJobs.delete(projectPath);
- };
- child.on('error', cleanup);
- child.on('close', cleanup);
- child.unref();
- return { success: true };
- } catch (error) {
- return {
- success: false,
- error: error instanceof Error ? error.message : String(error),
- };
- }
-}
-
-function spawnBackgroundIndexInit(params: {
- projectPath: string;
- languages?: string[];
-}): { success: boolean; error?: string } {
- const { projectPath, languages } = params;
- try {
- const pythonPath = getVenvPythonPathRuntime()();
- if (!existsSync(pythonPath)) {
- return {
- success: false,
- error: 'CodexLens Python environment is not ready yet.',
- };
- }
-
- const child = getSpawnRuntime()(
- pythonPath,
- ['-m', 'codexlens', ...buildIndexInitArgs(projectPath, { languages })],
- buildSmartSearchSpawnOptions(projectPath, {
- detached: shouldDetachBackgroundSmartSearchProcess(),
- stdio: 'ignore',
- }),
- );
-
- autoInitJobs.set(projectPath, {
- startedAt: getNowRuntime(),
- languages,
- });
-
- const cleanup = () => {
- autoInitJobs.delete(projectPath);
- };
- child.on('error', cleanup);
- child.on('close', cleanup);
- child.unref();
- return { success: true };
- } catch (error) {
- return {
- success: false,
- error: error instanceof Error ? error.message : String(error),
- };
- }
-}
-
-async function maybeStartBackgroundAutoInit(
- scope: SearchScope,
- indexStatus: IndexStatus,
-): Promise<{ note?: string; warning?: string }> {
- if (indexStatus.indexed) {
- return {};
- }
-
- if (!isAutoInitMissingEnabled()) {
- return {
- note: getAutoInitMissingDisabledReason(),
- };
- }
-
- if (autoInitJobs.has(scope.workingDirectory)) {
- return {
- note: 'Background static index build is already running for this path.',
- };
- }
-
- const spawned = spawnBackgroundIndexInit({
- projectPath: scope.workingDirectory,
- });
-
- if (!spawned.success) {
- return {
- warning: `Automatic static index warmup could not start: ${spawned.error}`,
- };
- }
-
- return {
- note: 'Background static index build started for this path. Re-run search shortly for indexed FTS results.',
- };
-}
-
-async function maybeStartBackgroundAutoEmbed(
- scope: SearchScope,
- indexStatus: IndexStatus,
-): Promise<{ note?: string; warning?: string }> {
- if (!indexStatus.indexed || indexStatus.has_embeddings) {
- return {};
- }
-
- if (!isAutoEmbedMissingEnabled(indexStatus.config)) {
- return {
- note: getAutoEmbedMissingDisabledReason(indexStatus.config),
- };
- }
-
- if (autoEmbedJobs.has(scope.workingDirectory)) {
- return {
- note: 'Background embedding build is already running for this path.',
- };
- }
-
- const backend = normalizeEmbeddingBackend(indexStatus.config?.embedding_backend) ?? 'fastembed';
- const model = indexStatus.config?.embedding_model?.trim() || undefined;
- const semanticStatus = await getSemanticStatusRuntime()();
- if (!semanticStatus.available) {
- return {
- warning: 'Automatic embedding warmup skipped because semantic dependencies are not ready.',
- };
- }
-
- if (backend === 'litellm' && !semanticStatus.litellmAvailable) {
- return {
- warning: 'Automatic embedding warmup skipped because the LiteLLM embedder is not ready.',
- };
- }
-
- const endpoints = resolveEmbeddingEndpoints(backend);
- const configuredApiMaxWorkers = indexStatus.config?.api_max_workers;
- const effectiveApiMaxWorkers = typeof configuredApiMaxWorkers === 'number'
- ? Math.max(1, Math.floor(configuredApiMaxWorkers))
- : resolveApiWorkerCount(undefined, backend, endpoints);
- const spawned = spawnBackgroundEmbeddingsViaPython({
- projectPath: scope.workingDirectory,
- backend,
- model,
- force: false,
- maxWorkers: effectiveApiMaxWorkers,
- endpoints,
- });
-
- if (!spawned.success) {
- return {
- warning: `Automatic embedding warmup could not start: ${spawned.error}`,
- };
- }
-
- return {
- note: 'Background embedding build started for this path. Re-run semantic search shortly for vector results.',
- };
-}
-
-// v1 executeEmbeddingsViaPython removed — v2 uses built-in fastembed models
-
-// v1 executeInitAction removed — replaced by executeInitActionV2
-
-// v1 executeEmbedAction removed — v2 auto-embeds during sync
-
-// v1 executeStatusAction removed — replaced by executeStatusActionV2
-
-// v1 executeUpdateAction and executeWatchAction removed — replaced by V2 versions
-
-// v1 executeFuzzyMode and executeAutoMode removed — v2 bridge handles all search
-
-/**
- * Mode: ripgrep - Fast literal string matching using ripgrep
- * No index required, fallback to CodexLens if ripgrep unavailable
- * Supports tokenized multi-word queries with OR matching and result ranking
- */
-async function executeRipgrepMode(params: Params): Promise {
- const { query, paths = [], contextLines = 0, maxResults = 5, extraFilesCount = 10, maxContentLength = 200, includeHidden = false, path = '.', regex = true, caseSensitive = true, tokenize = true, codeOnly = true, withDoc = false, excludeExtensions } = params;
- const scope = resolveSearchScope(path, paths);
- // withDoc overrides codeOnly
- const effectiveCodeOnly = withDoc ? false : codeOnly;
-
- if (!query) {
- return {
- success: false,
- error: 'Query is required for search',
- };
- }
-
- // Check if ripgrep is available
- const hasRipgrep = checkToolAvailability('rg');
-
- // Calculate total to fetch for split (full content + extra files)
- const totalToFetch = maxResults + extraFilesCount;
-
- // If ripgrep not available, fall back to CodexLens exact mode
- if (!hasRipgrep) {
- const readyStatus = await ensureCodexLensReady();
- if (!readyStatus.ready) {
- return {
- success: false,
- error: 'Neither ripgrep nor CodexLens available. Install ripgrep (rg) or CodexLens for search functionality.',
- };
- }
-
- // Use CodexLens fts mode as fallback
- const args = ['search', query, '--limit', totalToFetch.toString(), '--method', 'fts', '--json'];
- const result = await executeCodexLens(args, { cwd: scope.workingDirectory });
-
- if (!result.success) {
- noteCodexLensFtsCompatibility(result.error);
- return {
- success: false,
- error: summarizeBackendError(result.error),
- metadata: {
- mode: 'ripgrep',
- backend: 'codexlens-fallback',
- count: 0,
- query,
- },
- };
- }
-
- // Parse results
- let allResults: SemanticMatch[] = [];
- try {
- const parsed = JSON.parse(stripAnsi(result.output || '{}'));
- const data = parsed.result?.results || parsed.results || parsed;
- allResults = (Array.isArray(data) ? data : []).map((item: any) => ({
- file: item.path || item.file,
- score: item.score || 0,
- content: truncateContent(item.content || item.excerpt, maxContentLength),
- symbol: item.symbol || null,
- }));
- } catch {
- // Keep empty results
- }
-
- const scopedResults = filterResultsToTargetFile(allResults, scope);
-
- // Split results: first N with full content, rest as file paths only
- const { results, extra_files } = splitResultsWithExtraFiles(scopedResults, maxResults, extraFilesCount);
-
- return {
- success: true,
- results,
- extra_files: extra_files.length > 0 ? extra_files : undefined,
- metadata: {
- mode: 'ripgrep',
- backend: 'codexlens-fallback',
- count: results.length,
- query,
- note: 'Using CodexLens exact mode (ripgrep not available)',
- },
- };
- }
-
- // Use ripgrep - request more results to support split
- const { command, args, tokens, warning: queryModeWarning } = buildRipgrepCommand({
- query,
- paths: scope.searchPaths,
- contextLines,
- maxResults: totalToFetch, // Fetch more to support split
- includeHidden,
- regex,
- caseSensitive,
- tokenize,
- });
-
- return new Promise((resolve) => {
- const child = getSpawnRuntime()(
- command,
- args,
- buildSmartSearchSpawnOptions(scope.workingDirectory || getProjectRoot(), {
- stdio: ['ignore', 'pipe', 'pipe'],
- }),
- );
-
- let stdout = '';
- let stderr = '';
- let resultLimitReached = false;
-
- child.stdout?.on('data', (data) => {
- stdout += data.toString();
- });
-
- child.stderr?.on('data', (data) => {
- stderr += data.toString();
- });
-
- child.on('close', (code) => {
- // Limit total results to prevent memory overflow (--max-count only limits per-file)
- const effectiveLimit = totalToFetch > 0 ? totalToFetch : 500;
- const parsedResults = parseRipgrepJsonResults(stdout, effectiveLimit);
- const allResults = parsedResults.results;
- resultLimitReached = parsedResults.resultLimitReached;
-
- // Handle Windows device file errors gracefully (os error 1)
- // If we have results despite the error, return them as partial success
- const isWindowsDeviceError = stderr.includes('os error 1') || stderr.includes('函数不正确');
-
- // Apply token-based scoring and sorting for multi-word queries
- // Results matching more tokens are ranked higher (exact matches first)
- const scoredResults = tokens.length > 1 ? scoreByTokenMatch(allResults, tokens) : allResults;
-
- // Apply code-only and extension filtering
- const filteredResults = filterNoisyFiles(scoredResults as any[], { codeOnly: effectiveCodeOnly, excludeExtensions });
-
- if (code === 0 || code === 1 || (isWindowsDeviceError && filteredResults.length > 0)) {
- // Split results: first N with full content, rest as file paths only
- const { results, extra_files } = splitResultsWithExtraFiles(filteredResults, maxResults, extraFilesCount);
-
- // Build warning message for various conditions
- const warnings: string[] = [];
- if (queryModeWarning) {
- warnings.push(queryModeWarning);
- }
- if (resultLimitReached) {
- warnings.push(`Result limit reached (${effectiveLimit}). Use a more specific query or increase limit.`);
- }
- if (isWindowsDeviceError) {
- warnings.push('Some Windows device files were skipped');
- }
-
- resolve({
- success: true,
- results,
- extra_files: extra_files.length > 0 ? extra_files : undefined,
- metadata: {
- mode: 'ripgrep',
- backend: 'ripgrep',
- count: results.length,
- query,
- tokens: tokens.length > 1 ? tokens : undefined, // Include tokens in metadata for debugging
- tokenized: tokens.length > 1,
- ...(warnings.length > 0 && { warning: warnings.join('; ') }),
- },
- });
- } else if (isWindowsDeviceError && allResults.length === 0) {
- // Windows device error but no results - might be the only issue
- resolve({
- success: true,
- results: [],
- metadata: {
- mode: 'ripgrep',
- backend: 'ripgrep',
- count: 0,
- query,
- warning: 'No matches found (some Windows device files were skipped)',
- },
- });
- } else {
- resolve({
- success: false,
- error: `ripgrep execution failed with code ${code}: ${stderr}`,
- results: [],
- });
- }
- });
-
- child.on('error', (error) => {
- resolve({
- success: false,
- error: `Failed to spawn ripgrep: ${error.message}`,
- results: [],
- });
- });
- });
-}
-
-// ========================================
-// codexlens-search v2 bridge integration
-// ========================================
-
-/**
- * Execute search via codexlens-search (v2) bridge CLI.
- * Spawns 'codexlens-search search --query X --top-k Y --db-path Z' and parses JSON output.
- *
- * @param query - Search query string
- * @param topK - Number of results to return
- * @param dbPath - Path to the v2 index database directory
- * @returns Parsed search results as SemanticMatch array
- */
-async function executeCodexLensV2Bridge(
- query: string,
- topK: number,
- dbPath: string,
-): Promise {
- return new Promise((resolve) => {
- const args = [
- '--db-path', dbPath,
- 'search',
- '--query', query,
- '--top-k', String(topK),
- ];
-
- execFile('codexlens-search', args, {
- encoding: 'utf-8',
- timeout: EXEC_TIMEOUTS.PROCESS_SPAWN,
- windowsHide: true,
- env: { ...process.env, PYTHONIOENCODING: 'utf-8' },
- }, (error, stdout, stderr) => {
- if (error) {
- console.warn(`[CodexLens-v2] Bridge search failed: ${error.message}`);
- resolve({
- success: false,
- error: `codexlens-search v2 bridge failed: ${error.message}`,
- });
- return;
- }
-
- try {
- const parsed = JSON.parse(stdout.trim());
-
- // Bridge outputs {"error": string} on failure
- if (parsed && typeof parsed === 'object' && 'error' in parsed) {
- resolve({
- success: false,
- error: `codexlens-search v2: ${parsed.error}`,
- });
- return;
- }
-
- // Bridge outputs array of {path, score, line, end_line, snippet, content}
- const raw: Array<{
- path?: string; score?: number; line?: number;
- end_line?: number; snippet?: string; content?: string;
- }> = Array.isArray(parsed) ? parsed : [];
-
- // Build AceLike sections and group by file
- const sections: AceLikeSection[] = raw.map(r => ({
- path: r.path || '',
- line: r.line || undefined,
- endLine: r.end_line || undefined,
- score: r.score || 0,
- symbol: null,
- snippet: r.content || r.snippet || '',
- }));
-
- const groupMap = new Map();
- for (const s of sections) {
- const arr = groupMap.get(s.path) || [];
- arr.push(s);
- groupMap.set(s.path, arr);
- }
- const groups: AceLikeGroup[] = Array.from(groupMap.entries()).map(
- ([path, secs]) => ({ path, sections: secs, total_matches: secs.length })
- );
-
- // Render text view with line numbers
- const textParts: string[] = [];
- for (const s of sections) {
- const lineInfo = s.line ? `:${s.line}${s.endLine ? `-${s.endLine}` : ''}` : '';
- textParts.push(`Path: ${s.path}${lineInfo}\n${s.snippet}\n`);
- }
-
- const aceLikeOutput: AceLikeOutput = {
- format: 'ace',
- text: textParts.join('\n'),
- groups,
- sections,
- total: sections.length,
- };
-
- resolve({
- success: true,
- results: aceLikeOutput,
- metadata: {
- mode: 'semantic' as any,
- backend: 'codexlens-v2',
- count: sections.length,
- query,
- note: 'Using codexlens-search v2 bridge (2-stage vector + reranking)',
- },
- });
- } catch (parseErr) {
- console.warn(`[CodexLens-v2] Failed to parse bridge output: ${(parseErr as Error).message}`);
- resolve({
- success: false,
- error: `Failed to parse codexlens-search v2 output: ${(parseErr as Error).message}`,
- output: stdout,
- });
- }
- });
- });
-}
-
-/**
- * Load env vars from ~/.codexlens/.env file so they're passed to bridge subprocess.
- */
-function loadCodexLensEnvFile(): Record {
- const envVars: Record = {};
- try {
- const envPath = join(getCodexLensDataDir(), '.env');
- const content = readFileSync(envPath, 'utf-8');
- for (const line of content.split('\n')) {
- const trimmed = line.trim();
- if (!trimmed || trimmed.startsWith('#')) continue;
- const eqIdx = trimmed.indexOf('=');
- if (eqIdx <= 0) continue;
- const key = trimmed.substring(0, eqIdx).trim();
- let value = trimmed.substring(eqIdx + 1).trim();
- // Strip surrounding quotes
- if ((value.startsWith('"') && value.endsWith('"')) ||
- (value.startsWith("'") && value.endsWith("'"))) {
- value = value.slice(1, -1);
- }
- envVars[key] = value;
- }
- } catch {
- // File doesn't exist — no env overrides
- }
- return envVars;
-}
-
-/**
- * Execute a generic codexlens-search v2 bridge subcommand (init, status, sync, watch, etc.).
- * Returns parsed JSON output from the bridge CLI.
- */
-async function executeV2BridgeCommand(
- subcommand: string,
- args: string[],
- options?: { timeout?: number; dbPath?: string },
-): Promise {
- return new Promise((resolve) => {
- // --db-path is a global arg and must come BEFORE the subcommand
- const globalArgs = options?.dbPath ? ['--db-path', options.dbPath] : [];
- const fullArgs = [...globalArgs, subcommand, ...args];
- // Merge process.env with .env file settings (file values override process.env)
- const codexlensEnv = loadCodexLensEnvFile();
- execFile('codexlens-search', fullArgs, {
- encoding: 'utf-8',
- timeout: options?.timeout ?? EXEC_TIMEOUTS.PROCESS_SPAWN,
- windowsHide: true,
- env: { ...process.env, ...codexlensEnv, PYTHONIOENCODING: 'utf-8' },
- }, (error, stdout, stderr) => {
- if (error) {
- resolve({
- success: false,
- error: `codexlens-search ${subcommand} failed: ${error.message}`,
- });
- return;
- }
- try {
- const parsed = JSON.parse(stdout.trim());
- if (parsed && typeof parsed === 'object' && 'error' in parsed) {
- resolve({ success: false, error: `codexlens-search: ${parsed.error}` });
- return;
- }
- resolve({ success: true, status: parsed, message: parsed.status || `${subcommand} completed`, metadata: { action: subcommand } });
- } catch {
- resolve({ success: false, error: `Failed to parse codexlens-search ${subcommand} output`, output: stdout });
- }
- });
- });
-}
-
-/**
- * List known models via v2 bridge (list-models subcommand).
- * Returns JSON array of {name, type, installed, cache_path}.
- */
-export async function executeV2ListModels(): Promise {
- return executeV2BridgeCommand('list-models', []);
-}
-
-/**
- * Download a single model by name via v2 bridge (download-model subcommand).
- */
-export async function executeV2DownloadModel(modelName: string): Promise {
- return executeV2BridgeCommand('download-model', [modelName], { timeout: 600000 });
-}
-
-/**
- * Delete a model from cache via v2 bridge (delete-model subcommand).
- */
-export async function executeV2DeleteModel(modelName: string): Promise {
- return executeV2BridgeCommand('delete-model', [modelName]);
-}
-
-/**
- * Action: init (v2) - Initialize index and sync files.
- */
-async function executeInitActionV2(params: Params): Promise {
- const { path = '.' } = params;
- const scope = resolveSearchScope(path);
- const dbPath = join(scope.workingDirectory, '.codexlens');
-
- // Step 1: init empty index
- const initResult = await executeV2BridgeCommand('init', [], { dbPath });
- if (!initResult.success) return initResult;
-
- // Step 2: sync all files
- const syncResult = await executeV2BridgeCommand('sync', [
- '--root', scope.workingDirectory,
- ], { timeout: 1800000, dbPath }); // 30 min for large codebases
-
- return {
- success: syncResult.success,
- error: syncResult.error,
- message: syncResult.success
- ? `Index initialized and synced for ${scope.workingDirectory}`
- : undefined,
- metadata: { action: 'init', path: scope.workingDirectory },
- status: syncResult.status,
- };
-}
-
-/**
- * Action: status (v2) - Report index statistics.
- */
-async function executeStatusActionV2(params: Params): Promise {
- const { path = '.' } = params;
- const scope = resolveSearchScope(path);
- const dbPath = join(scope.workingDirectory, '.codexlens');
-
- return executeV2BridgeCommand('status', [], { dbPath });
-}
-
-/**
- * Action: update (v2) - Incremental sync (re-sync changed files).
- */
-async function executeUpdateActionV2(params: Params): Promise {
- const { path = '.' } = params;
- const scope = resolveSearchScope(path);
- const dbPath = join(scope.workingDirectory, '.codexlens');
-
- return executeV2BridgeCommand('sync', [
- '--root', scope.workingDirectory,
- ], { timeout: 600000, dbPath }); // 10 min
-}
-
-/**
- * Action: watch (v2) - Start file watcher for auto-updates.
- */
-async function executeWatchActionV2(params: Params): Promise {
- const { path = '.', debounce = 1000 } = params;
- const scope = resolveSearchScope(path);
- const dbPath = join(scope.workingDirectory, '.codexlens');
-
- // Watch runs indefinitely — start it with a short initial timeout to confirm startup
- const result = await executeV2BridgeCommand('watch', [
- '--root', scope.workingDirectory,
- '--debounce-ms', debounce.toString(),
- ], { timeout: 5000, dbPath });
-
- return {
- success: true,
- message: `File watcher started for ${scope.workingDirectory}. Changes are indexed automatically.`,
- metadata: { action: 'watch', path: scope.workingDirectory },
- status: result.status,
- };
-}
-
-// v1 executeCodexLensExactMode removed — v2 bridge handles search
-
-// v1 executeHybridMode removed — v2 bridge handles semantic search
-// v1 executeHybridMode removed — v2 bridge handles semantic search
-
-/**
- * Query intent used to adapt RRF weights (Python parity).
- *
- * Keep this logic aligned with CodexLens Python hybrid search:
- * `codex-lens/src/codexlens/search/hybrid_search.py`
- */
-export type QueryIntent = 'keyword' | 'semantic' | 'mixed';
-
-// Python default: vector 60%, exact 30%, fuzzy 10%
-const DEFAULT_RRF_WEIGHTS = {
- exact: 0.3,
- fuzzy: 0.1,
- vector: 0.6,
-} as const;
-
-function normalizeWeights(weights: Record): Record {
- const sum = Object.values(weights).reduce((acc, v) => acc + v, 0);
- if (!Number.isFinite(sum) || sum <= 0) return { ...weights };
- return Object.fromEntries(Object.entries(weights).map(([k, v]) => [k, v / sum]));
-}
-
-/**
- * Detect query intent using the same heuristic signals as Python:
- * - Code patterns: `.`, `::`, `->`, CamelCase, snake_case, common code keywords
- * - Natural language patterns: >5 words, question marks, interrogatives, common verbs
- */
-export function detectQueryIntent(query: string): QueryIntent {
- const trimmed = query.trim();
- if (!trimmed) return 'mixed';
-
- const lower = trimmed.toLowerCase();
- const wordCount = trimmed.split(/\s+/).filter(Boolean).length;
-
- const hasCodeSignals =
- /(::|->|\.)/.test(trimmed) ||
- /[A-Z][a-z]+[A-Z]/.test(trimmed) ||
- /\b\w+_\w+\b/.test(trimmed) ||
- /\b(def|class|function|const|let|var|import|from|return|async|await|interface|type)\b/i.test(lower);
-
- const hasNaturalSignals =
- wordCount > 5 ||
- /\?/.test(trimmed) ||
- /\b(how|what|why|when|where)\b/i.test(trimmed) ||
- /\b(handle|explain|fix|implement|create|build|use|find|search|convert|parse|generate|support)\b/i.test(trimmed);
-
- if (hasCodeSignals && hasNaturalSignals) return 'mixed';
- if (hasCodeSignals) return 'keyword';
- if (hasNaturalSignals) return 'semantic';
- return 'mixed';
-}
-
-/**
- * Intent → weights mapping (Python parity).
- * - keyword: exact-heavy
- * - semantic: vector-heavy
- * - mixed: keep defaults
- */
-export function adjustWeightsByIntent(
- intent: QueryIntent,
- baseWeights: Record,
-): Record {
- if (intent === 'keyword') return normalizeWeights({ exact: 0.5, fuzzy: 0.1, vector: 0.4 });
- if (intent === 'semantic') return normalizeWeights({ exact: 0.2, fuzzy: 0.1, vector: 0.7 });
- return normalizeWeights({ ...baseWeights });
-}
-
-export function getRRFWeights(
- query: string,
- baseWeights: Record = DEFAULT_RRF_WEIGHTS,
-): Record {
- return adjustWeightsByIntent(detectQueryIntent(query), baseWeights);
-}
-
-/**
- * Post-processing: Filter noisy files from semantic search results
- * Uses FILTER_CONFIG patterns to remove irrelevant files.
- * Optimized: pre-compiled regexes, accurate path segment matching.
- */
-// Pre-compile file exclusion regexes once (avoid recompilation in loop)
-const FILE_EXCLUDE_REGEXES = [...FILTER_CONFIG.exclude_files].map(pattern =>
- new RegExp('^' + pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&').replace(/\\\*/g, '.*') + '$')
-);
-
-// Non-code file extensions (for codeOnly filter)
-const NON_CODE_EXTENSIONS = new Set([
- 'md', 'txt', 'json', 'yaml', 'yml', 'xml', 'csv', 'log',
- 'ini', 'cfg', 'conf', 'toml', 'env', 'properties',
- 'html', 'htm', 'svg', 'png', 'jpg', 'jpeg', 'gif', 'ico', 'webp',
- 'pdf', 'doc', 'docx', 'xls', 'xlsx', 'ppt', 'pptx',
- 'lock', 'sum', 'mod',
-]);
-
-interface FilterOptions {
- excludeExtensions?: string[];
- codeOnly?: boolean;
-}
-
-function filterNoisyFiles(results: SemanticMatch[], options: FilterOptions = {}): SemanticMatch[] {
- const { excludeExtensions = [], codeOnly = false } = options;
-
- // Build extension filter set
- const excludedExtSet = new Set(excludeExtensions.map(ext => ext.toLowerCase().replace(/^\./, '')));
- if (codeOnly) {
- NON_CODE_EXTENSIONS.forEach(ext => excludedExtSet.add(ext));
- }
-
- return results.filter(r => {
- // Support both 'file' and 'path' field names (different backends use different names)
- const filePath = r.file || (r as any).path || '';
- if (!filePath) return true;
-
- const segments: string[] = filePath.split(/[/\\]/);
-
- // Accurate directory check: segment must exactly match excluded directory
- if (segments.some((segment: string) => FILTER_CONFIG.exclude_directories.has(segment))) {
- return false;
- }
-
- // Accurate file check: pattern matches filename only (not full path)
- const filename = segments.pop() || '';
- if (FILE_EXCLUDE_REGEXES.some(regex => regex.test(filename))) {
- return false;
- }
-
- // Extension filter check
- if (excludedExtSet.size > 0) {
- const ext = filename.split('.').pop()?.toLowerCase() || '';
- if (excludedExtSet.has(ext)) {
- return false;
- }
- }
-
- return true;
- });
-}
-
-/**
- * Post-processing: Boost results containing query keywords
- * Extracts keywords from query and boosts matching results.
- * Optimized: uses whole-word matching with regex for accuracy.
- */
-// Helper to escape regex special characters
-function escapeRegExp(str: string): string {
- return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
-}
-
-function applyKeywordBoosting(results: SemanticMatch[], query: string): SemanticMatch[] {
- // Extract meaningful keywords (ignore common words)
- const stopWords = new Set(['the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'shall', 'can', 'need', 'dare', 'ought', 'used', 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by', 'from', 'as', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'between', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 'just', 'and', 'but', 'if', 'or', 'because', 'until', 'while', 'although', 'though', 'after', 'before', 'when', 'whenever', 'where', 'wherever', 'whether', 'which', 'who', 'whom', 'whose', 'what', 'whatever', 'whichever', 'whoever', 'whomever', 'this', 'that', 'these', 'those', 'it', 'its']);
-
- const keywords = query
- .toLowerCase()
- .split(/[\s,.;:()"{}[\]-]+/) // More robust splitting on punctuation
- .filter(word => word.length > 2 && !stopWords.has(word));
-
- if (keywords.length === 0) return results;
-
- // Create case-insensitive regexes for whole-word matching
- const keywordRegexes = keywords.map(kw => new RegExp(`\\b${escapeRegExp(kw)}\\b`, 'i'));
-
- return results.map(r => {
- const content = r.content || '';
- const file = r.file || '';
-
- // Count keyword matches using whole-word regex
- let matchCount = 0;
- for (const regex of keywordRegexes) {
- if (regex.test(content) || regex.test(file)) {
- matchCount++;
- }
- }
-
- // Apply boost only if there are matches
- if (matchCount > 0) {
- const matchRatio = matchCount / keywords.length;
- const boost = 1 + (matchRatio * 0.3); // Up to 30% boost for full match
- return {
- ...r,
- score: r.score * boost,
- };
- }
-
- return r;
- });
-}
-
-/**
- * Post-processing: Enforce score diversity
- * Penalizes results with identical scores (indicates undifferentiated matching)
- */
-function enforceScoreDiversity(results: SemanticMatch[]): SemanticMatch[] {
- if (results.length < 2) return results;
-
- // Count occurrences of each score (rounded to 3 decimal places for comparison)
- const scoreCounts = new Map();
- for (const r of results) {
- const roundedScore = Math.round(r.score * 1000) / 1000;
- scoreCounts.set(roundedScore, (scoreCounts.get(roundedScore) || 0) + 1);
- }
-
- // Apply penalty to scores that appear more than twice
- return results.map(r => {
- const roundedScore = Math.round(r.score * 1000) / 1000;
- const count = scoreCounts.get(roundedScore) || 1;
-
- if (count > 2) {
- // Progressive penalty: more duplicates = bigger penalty
- const penalty = Math.max(0.7, 1 - (count * 0.05));
- return { ...r, score: r.score * penalty };
- }
- return r;
- });
-}
-
-/**
- * Post-processing: Filter results with dominant baseline score (hot spot detection)
- * When backend returns default "hot spot" files with identical high scores,
- * this function detects and removes them.
- *
- * Detection criteria:
- * - A single score appears in >50% of results
- * - That score is suspiciously high (>0.9)
- * - This indicates fallback mechanism returned placeholder results
- */
-function filterDominantBaselineScores(
- results: SemanticMatch[]
-): { filteredResults: SemanticMatch[]; baselineInfo: { score: number; count: number } | null } {
- if (results.length < 4) {
- return { filteredResults: results, baselineInfo: null };
- }
-
- // Count occurrences of each score (rounded to 4 decimal places)
- const scoreCounts = new Map();
- results.forEach(r => {
- const rounded = Math.round(r.score * 10000) / 10000;
- scoreCounts.set(rounded, (scoreCounts.get(rounded) || 0) + 1);
- });
-
- // Find the most dominant score
- let dominantScore: number | null = null;
- let dominantCount = 0;
- scoreCounts.forEach((count, score) => {
- if (count > dominantCount) {
- dominantCount = count;
- dominantScore = score;
- }
- });
-
- // If a single score is present in >50% of results and is high (>0.9),
- // treat it as a suspicious baseline score and filter it out
- const BASELINE_THRESHOLD = 0.5; // >50% of results have same score
- const HIGH_SCORE_THRESHOLD = 0.9; // Score above 0.9 is suspiciously high
-
- if (
- dominantScore !== null &&
- dominantCount > results.length * BASELINE_THRESHOLD &&
- dominantScore > HIGH_SCORE_THRESHOLD
- ) {
- const filteredResults = results.filter(r => {
- const rounded = Math.round(r.score * 10000) / 10000;
- return rounded !== dominantScore;
- });
-
- return {
- filteredResults,
- baselineInfo: { score: dominantScore, count: dominantCount },
- };
- }
-
- return { filteredResults: results, baselineInfo: null };
-}
-
-/**
- * TypeScript implementation of Reciprocal Rank Fusion
- * Reference: codex-lens/src/codexlens/search/ranking.py
- * Formula: score(d) = Σ weight_source / (k + rank_source(d))
- */
-function normalizeFusionSnippet(value: unknown): string | undefined {
- if (typeof value !== 'string') {
- return undefined;
- }
-
- const normalized = value.replace(/\s+/g, ' ').trim();
- return normalized ? normalized.slice(0, 240) : undefined;
-}
-
-function buildFusionIdentity(result: any): string | null {
- const path = typeof result?.file === 'string'
- ? result.file
- : typeof result?.path === 'string'
- ? result.path
- : undefined;
-
- if (!path) {
- return null;
- }
-
- const line = typeof result?.line === 'number' && Number.isFinite(result.line)
- ? result.line
- : undefined;
- const endLine = typeof result?.endLine === 'number' && Number.isFinite(result.endLine)
- ? result.endLine
- : line;
- const column = typeof result?.column === 'number' && Number.isFinite(result.column)
- ? result.column
- : undefined;
-
- if (line !== undefined) {
- return `${path}#L${line}-${endLine ?? line}:C${column ?? 0}`;
- }
-
- const symbol = typeof result?.symbol === 'string' && result.symbol.trim()
- ? result.symbol.trim()
- : undefined;
- const snippet = normalizeFusionSnippet(result?.content);
-
- if (symbol && snippet) {
- return `${path}::${symbol}::${snippet}`;
- }
- if (snippet) {
- return `${path}::${snippet}`;
- }
- if (symbol) {
- return `${path}::${symbol}`;
- }
-
- return path;
-}
-
-function scoreFusionRepresentative(result: any): number {
- let score = 0;
-
- if (typeof result?.line === 'number' && Number.isFinite(result.line)) {
- score += 1000;
- }
- if (typeof result?.endLine === 'number' && Number.isFinite(result.endLine)) {
- score += 250;
- }
- if (typeof result?.column === 'number' && Number.isFinite(result.column)) {
- score += 50;
- }
- if (Array.isArray(result?.chunkLines) && result.chunkLines.length > 0) {
- score += 500 + result.chunkLines.length;
- }
- if (typeof result?.symbol === 'string' && result.symbol.trim()) {
- score += 50;
- }
- if (typeof result?.content === 'string') {
- score += Math.min(result.content.length, 200);
- }
-
- return score;
-}
-
-function applyRRFFusion(
- resultsMap: Map,
- weightsOrQuery: Record | string,
- limit: number,
- k: number = 60,
-): any[] {
- const weights = typeof weightsOrQuery === 'string' ? getRRFWeights(weightsOrQuery) : weightsOrQuery;
- const fusedScores = new Map();
-
- resultsMap.forEach((results, source) => {
- const weight = weights[source] || 0;
- if (weight === 0 || !results) return;
-
- results.forEach((result, rank) => {
- const identity = buildFusionIdentity(result);
- if (!identity) return;
-
- const rrfContribution = weight / (k + rank + 1);
- const representativeScore = scoreFusionRepresentative(result);
-
- if (!fusedScores.has(identity)) {
- fusedScores.set(identity, { score: 0, result, sources: [], representativeScore });
- }
- const entry = fusedScores.get(identity)!;
- entry.score += rrfContribution;
- if (representativeScore > entry.representativeScore) {
- entry.result = result;
- entry.representativeScore = representativeScore;
- }
- if (!entry.sources.includes(source)) {
- entry.sources.push(source);
- }
- });
- });
-
- // Sort by fusion score descending
- return Array.from(fusedScores.values())
- .sort((a, b) => b.score - a.score)
- .slice(0, limit)
- .map(item => ({
- ...item.result,
- fusion_score: item.score,
- matched_backends: item.sources,
- }));
-}
-
-/**
- * Promise wrapper with timeout support
- * @param promise - The promise to wrap
- * @param ms - Timeout in milliseconds
- * @param modeName - Name of the mode for error message
- * @returns A new promise that rejects on timeout
- */
-function withTimeout(promise: Promise, ms: number, modeName: string): Promise {
- return new Promise((resolve, reject) => {
- const timer = setTimeout(() => {
- reject(new Error(`'${modeName}' search timed out after ${ms}ms`));
- }, ms);
-
- promise
- .then(resolve)
- .catch(reject)
- .finally(() => clearTimeout(timer));
- });
-}
-
-// v1 executePriorityFallbackMode removed — v2 bridge + ripgrep fallback handles all search
-
-// Tool schema for MCP
-export const schema: ToolSchema = {
- name: 'smart_search',
- description: `Unified code search tool powered by codexlens-search v2 (2-stage vector + FTS5 + reranking).
-
-Recommended flow: use **action=\"search\"** for lookups, **action=\"init\"** to build the semantic index, and **action=\"update\"** when files change.
-
-**Actions & Required Parameters:**
-
-* **search** (default): Semantic code search with ripgrep fallback.
- * **query** (string, **REQUIRED**): Content to search for.
- * *limit* (number): Max results (default: 5).
- * *path* (string): Directory or single file to search (default: current directory).
- * *contextLines* (number): Context lines around matches (default: 0).
- * *regex* (boolean): Use regex matching in ripgrep fallback (default: true).
- * *caseSensitive* (boolean): Case-sensitive search (default: true).
-
-* **find_files**: Find files by path/name pattern.
- * **pattern** (string, **REQUIRED**): Glob pattern (e.g., "*.ts", "src/**/*.js").
- * *limit* (number): Max results (default: 20).
- * *offset* (number): Pagination offset (default: 0).
- * *includeHidden* (boolean): Include hidden files (default: false).
-
-* **init**: Initialize v2 semantic index and sync all files.
- * *path* (string): Directory to index (default: current).
-
-* **status**: Check v2 index statistics. (No required params)
-
-* **update**: Incremental sync for changed files.
- * *path* (string): Directory to update (default: current).
-
-* **watch**: Start file watcher for auto-updates.
- * *path* (string): Directory to watch (default: current).
-
-**Examples:**
- smart_search(query="authentication logic") # Semantic search (default)
- smart_search(action="init", path="/project") # Build v2 index
- smart_search(action="update", path="/project") # Sync changed files
- smart_search(query="auth", limit=10, offset=0) # Paginated search`,
- inputSchema: {
- type: 'object',
- properties: {
- action: {
- type: 'string',
- enum: ['init', 'search', 'find_files', 'status', 'update', 'watch', 'search_files'],
- description: 'Action: search (semantic search, default), find_files (path pattern matching), init (build v2 index), status (check index), update (incremental sync), watch (auto-update watcher). Note: search_files is deprecated.',
- default: 'search',
- },
- query: {
- type: 'string',
- description: 'Content search query (for action="search").',
- },
- pattern: {
- type: 'string',
- description: 'Glob pattern for file discovery (for action="find_files"). Examples: "*.ts", "src/**/*.js", "test_*.py"',
- },
- mode: {
- type: 'string',
- enum: SEARCH_MODES,
- description: 'Search mode: fuzzy (v2 semantic + ripgrep fallback, default) or semantic (v2 semantic search only).',
- default: 'fuzzy',
- },
- output_mode: {
- type: 'string',
- enum: [...SEARCH_OUTPUT_MODES],
- description: 'Output format: ace (default, ACE-style grouped code sections + rendered text), full (raw matches), files_only (paths only), count (per-file counts)',
- default: 'ace',
- },
- path: {
- type: 'string',
- description: 'Directory path for init/search actions (default: current directory). For action=search, a single file path is also accepted and results are automatically scoped back to that file.',
- },
- paths: {
- type: 'array',
- description: 'Multiple paths to search within (for search action)',
- items: {
- type: 'string',
- },
- default: [],
- },
- contextLines: {
- type: 'number',
- description: 'Number of context lines around matches (exact mode only)',
- default: 0,
- },
- maxResults: {
- type: 'number',
- description: 'Maximum number of full-content results (default: 5)',
- default: 5,
- },
- limit: {
- type: 'number',
- description: 'Alias for maxResults (default: 5)',
- default: 5,
- },
- extraFilesCount: {
- type: 'number',
- description: 'Number of additional file-only results (paths without content)',
- default: 10,
- },
- maxContentLength: {
- type: 'number',
- description: 'Maximum content length for truncation (50-2000)',
- default: 200,
- },
- offset: {
- type: 'number',
- description: 'Pagination offset - skip first N results (default: 0)',
- default: 0,
- },
- includeHidden: {
- type: 'boolean',
- description: 'Include hidden files/directories',
- default: false,
- },
- force: {
- type: 'boolean',
- description: 'Force full rebuild for action="init".',
- default: false,
- },
- regex: {
- type: 'boolean',
- description: 'Use regex pattern matching instead of literal string (ripgrep mode only). Default: enabled. Example: smart_search(query="class.*Builder")',
- default: true,
- },
- caseSensitive: {
- type: 'boolean',
- description: 'Case-sensitive search (default: true). Set to false for case-insensitive matching.',
- default: true,
- },
- tokenize: {
- type: 'boolean',
- description: 'Tokenize multi-word queries for OR matching (ripgrep mode). Default: true. Results are ranked by token match count (exact matches first).',
- default: true,
- },
- },
- required: [],
- },
-};
-
-/**
- * Action: find_files - Find files by path/name pattern (glob matching)
- * Unlike search which looks inside file content, find_files matches file paths
- */
-async function executeFindFilesAction(params: Params): Promise {
- const { pattern, path = '.', limit = 20, offset = 0, includeHidden = false, caseSensitive = true } = params;
- const scope = resolveSearchScope(path);
-
- if (!pattern) {
- return {
- success: false,
- error: 'Pattern is required for find_files action. Use glob patterns like "*.ts", "src/**/*.js", or "test_*.py"',
- };
- }
-
- // Use ripgrep with --files flag for fast file listing with glob pattern
- const hasRipgrep = checkToolAvailability('rg');
-
- if (!hasRipgrep) {
- // Fallback to CodexLens file listing if available
- const readyStatus = await ensureCodexLensReady();
- if (!readyStatus.ready) {
- return {
- success: false,
- error: 'Neither ripgrep nor CodexLens available for file discovery.',
- };
- }
-
- // Try CodexLens file list command
- const args = ['list-files', '--json'];
- const result = await executeCodexLens(args, { cwd: scope.workingDirectory });
-
- if (!result.success) {
- return {
- success: false,
- error: `Failed to list files: ${result.error}`,
- };
- }
-
- // Parse and filter results by pattern
- let files: string[] = [];
- try {
- const parsed = JSON.parse(stripAnsi(result.output || '[]'));
- files = Array.isArray(parsed) ? parsed : (parsed.files || []);
- } catch {
- return {
- success: false,
- error: 'Failed to parse file list from CodexLens',
- };
- }
-
- // Apply glob pattern matching using minimatch-style regex
- const globRegex = globToRegex(pattern, caseSensitive);
- const matchedFiles = files.filter(f => globRegex.test(f));
-
- // Apply pagination
- const total = matchedFiles.length;
- const paginatedFiles = matchedFiles.slice(offset, offset + limit);
-
- const results: FileMatch[] = paginatedFiles.map(filePath => {
- const parts = filePath.split(/[/\\]/);
- const name = parts[parts.length - 1] || '';
- const ext = name.includes('.') ? name.split('.').pop() : undefined;
- return {
- path: filePath,
- type: 'file' as const,
- name,
- extension: ext,
- };
- });
-
- return {
- success: true,
- results,
- metadata: {
- pattern,
- backend: 'codexlens',
- count: results.length,
- pagination: {
- offset,
- limit,
- total,
- has_more: offset + limit < total,
- },
- },
- };
- }
-
- // Use ripgrep --files with glob pattern for fast file discovery
- return new Promise((resolve) => {
- const args = ['--files'];
-
- // Add exclude patterns
- if (!includeHidden) {
- args.push(...buildExcludeArgs());
- } else {
- args.push('--hidden');
- }
-
- // Add glob pattern
- args.push('--glob', pattern);
-
- // Case sensitivity for glob matching
- if (!caseSensitive) {
- args.push('--iglob', pattern);
- // Remove the case-sensitive glob and use iglob instead
- const globIndex = args.indexOf('--glob');
- if (globIndex !== -1) {
- args.splice(globIndex, 2);
- }
- }
-
- const child = getSpawnRuntime()(
- 'rg',
- args,
- buildSmartSearchSpawnOptions(scope.workingDirectory || getProjectRoot(), {
- stdio: ['ignore', 'pipe', 'pipe'],
- }),
- );
-
- let stdout = '';
- let stderr = '';
-
- child.stdout?.on('data', (data) => {
- stdout += data.toString();
- });
-
- child.stderr?.on('data', (data) => {
- stderr += data.toString();
- });
-
- child.on('close', (code) => {
- // ripgrep returns 1 when no matches found, which is not an error
- if (code !== 0 && code !== 1 && !stderr.includes('os error 1')) {
- resolve({
- success: false,
- error: `ripgrep file search failed: ${stderr}`,
- });
- return;
- }
-
- const allFiles = stdout.split('\n').filter(line => line.trim());
- const total = allFiles.length;
-
- // Apply pagination
- const paginatedFiles = allFiles.slice(offset, offset + limit);
-
- const results: FileMatch[] = paginatedFiles.map(filePath => {
- const normalizedPath = filePath.replace(/\\/g, '/');
- const parts = normalizedPath.split('/');
- const name = parts[parts.length - 1] || '';
- const ext = name.includes('.') ? name.split('.').pop() : undefined;
- return {
- path: normalizedPath,
- type: 'file' as const,
- name,
- extension: ext,
- };
- });
-
- resolve({
- success: true,
- results,
- metadata: {
- pattern,
- backend: 'ripgrep',
- count: results.length,
- pagination: {
- offset,
- limit,
- total,
- has_more: offset + limit < total,
- },
- },
- });
- });
-
- child.on('error', (error) => {
- resolve({
- success: false,
- error: `Failed to spawn ripgrep: ${error.message}`,
- });
- });
- });
-}
-
-/**
- * Convert glob pattern to regex for file matching
- * Supports: *, **, ?, [abc], [!abc]
- */
-function globToRegex(pattern: string, caseSensitive: boolean = true): RegExp {
- let i = 0;
- const out: string[] = [];
- const special = '.^$+{}|()';
-
- while (i < pattern.length) {
- const c = pattern[i];
-
- if (c === '*') {
- if (i + 1 < pattern.length && pattern[i + 1] === '*') {
- // ** matches any path including /
- out.push('.*');
- i += 2;
- // Skip following / if present
- if (pattern[i] === '/') {
- i++;
- }
- continue;
- } else {
- // * matches any character except /
- out.push('[^/]*');
- }
- } else if (c === '?') {
- out.push('[^/]');
- } else if (c === '[') {
- // Character class
- let j = i + 1;
- let negated = false;
- if (pattern[j] === '!' || pattern[j] === '^') {
- negated = true;
- j++;
- }
- let classContent = '';
- while (j < pattern.length && pattern[j] !== ']') {
- classContent += pattern[j];
- j++;
- }
- if (negated) {
- out.push(`[^${classContent}]`);
- } else {
- out.push(`[${classContent}]`);
- }
- i = j;
- } else if (special.includes(c)) {
- out.push('\\' + c);
- } else {
- out.push(c);
- }
- i++;
- }
-
- const flags = caseSensitive ? '' : 'i';
- return new RegExp('^' + out.join('') + '$', flags);
-}
-
-/**
- * Apply pagination to search results and add pagination metadata
- */
-function applyPagination(
- results: T[],
- offset: number,
- limit: number
-): { paginatedResults: T[]; pagination: PaginationInfo } {
- const total = results.length;
- const paginatedResults = results.slice(offset, offset + limit);
-
- return {
- paginatedResults,
- pagination: {
- offset,
- limit,
- total,
- has_more: offset + limit < total,
- },
- };
-}
-
-function formatChunkRange(section: AceLikeSection): string {
- if (section.lines && section.lines.length > 0) {
- const start = section.lines[0]?.line;
- const end = section.lines[section.lines.length - 1]?.line;
- if (typeof start === 'number' && typeof end === 'number' && end > start) {
- return `${start}-${end}`;
- }
- if (typeof start === 'number') {
- return String(start);
- }
- }
- if (section.line && section.endLine && section.endLine > section.line) {
- return `${section.line}-${section.endLine}`;
- }
- if (section.line) {
- return String(section.line);
- }
- return '?';
-}
-
-function renderAceSnippet(section: AceLikeSection): string[] {
- if (section.lines && section.lines.length > 0) {
- return section.lines.map((line) => {
- const marker = line.isMatch ? '>' : ' ';
- return `${marker} ${String(line.line).padStart(4, ' ')} | ${line.text}`;
- });
- }
-
- return section.snippet.split(/\r?\n/).map((line) => ` ${line}`);
-}
-
-function formatAceLikeOutput(
- results: ExactMatch[] | SemanticMatch[] | GraphMatch[] | FileMatch[] | unknown[],
-): AceLikeOutput {
- const sections: AceLikeSection[] = [];
-
- for (const result of results) {
- const candidate = result as Record;
- const path = typeof candidate.file === 'string'
- ? candidate.file
- : typeof candidate.path === 'string'
- ? candidate.path
- : undefined;
-
- if (!path) {
- continue;
- }
-
- const line = typeof candidate.line === 'number' && candidate.line > 0 ? candidate.line : undefined;
- const column = typeof candidate.column === 'number' && candidate.column > 0 ? candidate.column : undefined;
- const score = typeof candidate.score === 'number' ? candidate.score : undefined;
- const symbol = typeof candidate.symbol === 'string' ? candidate.symbol : null;
- const rawSnippet = typeof candidate.content === 'string'
- ? candidate.content
- : typeof candidate.name === 'string'
- ? candidate.name
- : typeof candidate.type === 'string'
- ? `[${candidate.type}]`
- : '';
-
- sections.push({
- path,
- line,
- endLine: typeof candidate.endLine === 'number' && candidate.endLine >= (line ?? 0) ? candidate.endLine : line,
- column,
- score,
- symbol,
- snippet: rawSnippet || '[no snippet available]',
- lines: Array.isArray(candidate.chunkLines) ? candidate.chunkLines as ChunkLine[] : undefined,
- });
- }
-
- const groupsMap = new Map();
- for (const section of sections) {
- if (!groupsMap.has(section.path)) {
- groupsMap.set(section.path, {
- path: section.path,
- sections: [],
- total_matches: 0,
- });
- }
- const group = groupsMap.get(section.path)!;
- group.sections.push(section);
- group.total_matches += 1;
- }
- const groups = [...groupsMap.values()];
-
- const textParts = ['The following code sections were retrieved:'];
- for (const group of groups) {
- textParts.push('');
- textParts.push(`Path: ${group.path}`);
- group.sections.forEach((section, index) => {
- const chunkLabel = group.sections.length > 1 ? `Chunk ${index + 1}` : 'Chunk';
- textParts.push(`${chunkLabel}: lines ${formatChunkRange(section)}${section.score !== undefined ? ` | score=${section.score.toFixed(4)}` : ''}`);
- if (section.symbol) {
- textParts.push(`Symbol: ${section.symbol}`);
- }
- for (const snippetLine of renderAceSnippet(section)) {
- textParts.push(snippetLine);
- }
- if (index < group.sections.length - 1) {
- textParts.push('');
- }
- });
- }
-
- return {
- format: 'ace',
- text: textParts.join('\n'),
- groups,
- sections,
- total: sections.length,
- };
-}
-
-/**
- * Transform results based on output_mode
- */
-function transformOutput(
- results: ExactMatch[] | SemanticMatch[] | GraphMatch[] | unknown[],
- outputMode: SearchOutputMode
-): unknown {
- if (!Array.isArray(results)) {
- return results;
- }
-
- switch (outputMode) {
- case 'files_only': {
- // Extract unique file paths
- const files = [...new Set(results.map((r: any) => r.file))].filter(Boolean);
- return { files, count: files.length };
- }
- case 'count': {
- // Count matches per file
- const counts: Record = {};
- for (const r of results) {
- const file = (r as any).file;
- if (file) {
- counts[file] = (counts[file] || 0) + 1;
- }
- }
- return {
- files: Object.entries(counts).map(([file, count]) => ({ file, count })),
- total: results.length,
- };
- }
- case 'ace':
- return formatAceLikeOutput(results);
- case 'full':
- default:
- return results;
- }
-}
-
-function enrichMetadataWithIndexStatus(
- metadata: SearchMetadata | undefined,
- indexStatus: IndexStatus,
- scope: SearchScope,
-): SearchMetadata {
- const nextMetadata: SearchMetadata = { ...(metadata ?? {}) };
- nextMetadata.embeddings_coverage_percent = indexStatus.embeddings_coverage_percent;
- nextMetadata.index_status = indexStatus.indexed
- ? (indexStatus.has_embeddings ? 'indexed' : 'partial')
- : 'not_indexed';
- nextMetadata.reranker_enabled = indexStatus.config?.reranker_enabled;
- nextMetadata.reranker_backend = indexStatus.config?.reranker_backend;
- nextMetadata.reranker_model = indexStatus.config?.reranker_model;
- nextMetadata.cascade_strategy = indexStatus.config?.cascade_strategy;
- nextMetadata.staged_stage2_mode = indexStatus.config?.staged_stage2_mode;
- nextMetadata.static_graph_enabled = indexStatus.config?.static_graph_enabled;
- nextMetadata.warning = mergeWarnings(nextMetadata.warning, indexStatus.warning);
- nextMetadata.suggestions = mergeSuggestions(nextMetadata.suggestions, buildIndexSuggestions(indexStatus, scope));
- return nextMetadata;
-}
-
-// Handler function
-export async function handler(params: Record): Promise> {
- const parsed = ParamsSchema.safeParse(params);
- if (!parsed.success) {
- return { success: false, error: `Invalid params: ${parsed.error.message}` };
- }
-
- parsed.data.query = sanitizeSearchQuery(parsed.data.query);
- parsed.data.pattern = sanitizeSearchPath(parsed.data.pattern);
- parsed.data.path = sanitizeSearchPath(parsed.data.path);
- parsed.data.paths = parsed.data.paths.map((item) => sanitizeSearchPath(item) || item);
-
- const { action, mode, output_mode, offset = 0 } = parsed.data;
-
- // Sync limit and maxResults while preserving explicit small values.
- // If both are provided, use the larger one. If only one is provided, honor it.
- const rawLimit = typeof params.limit === 'number' ? params.limit : undefined;
- const rawMaxResults = typeof params.maxResults === 'number' ? params.maxResults : undefined;
- const effectiveLimit = rawLimit !== undefined && rawMaxResults !== undefined
- ? Math.max(rawLimit, rawMaxResults)
- : rawMaxResults ?? rawLimit ?? parsed.data.maxResults ?? parsed.data.limit ?? 5;
- parsed.data.maxResults = effectiveLimit;
- parsed.data.limit = effectiveLimit;
-
- // Track if search_files was used (deprecated)
- let deprecationWarning: string | undefined;
-
- try {
- let result: SearchResult;
-
- // Handle actions — all routed through codexlens-search v2 bridge
- switch (action) {
- case 'init':
- result = await executeInitActionV2(parsed.data);
- break;
-
- case 'status':
- result = await executeStatusActionV2(parsed.data);
- break;
-
- case 'find_files':
- result = await executeFindFilesAction(parsed.data);
- break;
-
- case 'update':
- result = await executeUpdateActionV2(parsed.data);
- break;
-
- case 'watch':
- result = await executeWatchActionV2(parsed.data);
- break;
-
- case 'search_files':
- // DEPRECATED: Redirect to search with files_only output
- deprecationWarning = 'action="search_files" is deprecated. Use action="search" with output_mode="files_only" for content-to-files search, or action="find_files" for path pattern matching.';
- parsed.data.output_mode = 'files_only';
- // Fall through to search
-
- case 'search':
- default: {
- // v2 bridge for semantic search
- const scope = resolveSearchScope(parsed.data.path ?? '.');
- const dbPath = join(scope.workingDirectory, '.codexlens');
- const topK = (parsed.data.maxResults || 5) + (parsed.data.extraFilesCount || 10);
- const v2Result = await executeCodexLensV2Bridge(parsed.data.query || '', topK, dbPath);
- if (v2Result.success) {
- result = v2Result;
- break;
- }
- // v2 failed — fall back to ripgrep-only search
- console.warn(`[CodexLens-v2] Bridge failed, falling back to ripgrep: ${v2Result.error}`);
- result = await executeRipgrepMode(parsed.data);
- break;
- }
- }
-
- let backgroundNote: string | undefined;
-
- // Transform output based on output_mode (for search actions only)
- if (action === 'search' || action === 'search_files') {
-
- // Add pagination metadata for search results if not already present
- if (result.success && result.results && Array.isArray(result.results)) {
- const totalResults = (result.results as any[]).length;
- if (!result.metadata) {
- result.metadata = {};
- }
- if (!result.metadata.pagination) {
- result.metadata.pagination = {
- offset: 0,
- limit: effectiveLimit,
- total: totalResults,
- has_more: false, // Already limited by backend
- };
- }
- }
-
- if (result.success && result.results && output_mode !== 'full') {
- result.results = transformOutput(result.results as any[], output_mode);
- if (
- output_mode === 'ace'
- && result.results
- && typeof result.results === 'object'
- && 'format' in result.results
- && result.results.format === 'ace'
- ) {
- const advisoryLines: string[] = [];
- if (result.metadata?.warning) {
- advisoryLines.push('', 'Warnings:', `- ${result.metadata.warning}`);
- }
- if (backgroundNote) {
- advisoryLines.push('', 'Notes:', `- ${backgroundNote}`);
- }
- if (result.metadata?.suggestions && result.metadata.suggestions.length > 0) {
- advisoryLines.push('', 'Suggestions:');
- for (const suggestion of result.metadata.suggestions) {
- advisoryLines.push(`- ${suggestion.title}: ${suggestion.command}`);
- advisoryLines.push(` ${suggestion.reason}`);
- }
- }
- const aceResults = result.results as AceLikeOutput;
- if (advisoryLines.length > 0) {
- aceResults.text += `\n${advisoryLines.join('\n')}`;
- }
- }
- }
- }
-
- // Add deprecation warning if applicable
- if (deprecationWarning && result.metadata) {
- result.metadata.warning = deprecationWarning;
- }
-
- return result.success ? { success: true, result } : { success: false, error: result.error };
- } catch (error) {
- return { success: false, error: (error as Error).message };
- }
-}
-
-/**
- * Execute init action with external progress callback
- * Used by MCP server for streaming progress
- * @param params - Search parameters (path, languages, force)
- * @param onProgress - Optional callback for progress updates
- */
-export const __testables = {
- isCodexLensCliCompatibilityError,
- shouldSurfaceCodexLensFtsCompatibilityWarning,
- buildSmartSearchSpawnOptions,
- shouldDetachBackgroundSmartSearchProcess,
- checkToolAvailability,
- parseCodexLensJsonOutput,
- parsePlainTextFileMatches,
- hasCentralizedVectorArtifacts,
- extractEmbeddingsStatusSummary,
- selectEmbeddingsStatusPayload,
- resolveRipgrepQueryMode,
- queryTargetsGeneratedFiles,
- prefersLexicalPriorityQuery,
- classifyIntent,
- resolveEmbeddingSelection,
- parseOptionalBooleanEnv,
- isAutoInitMissingEnabled,
- isAutoEmbedMissingEnabled,
- getAutoInitMissingDisabledReason,
- getAutoEmbedMissingDisabledReason,
- buildIndexSuggestions,
- maybeStartBackgroundAutoInit,
- maybeStartBackgroundAutoEmbed,
- __setRuntimeOverrides(overrides: Partial) {
- Object.assign(runtimeOverrides, overrides);
- },
- __resetRuntimeOverrides() {
- for (const key of Object.keys(runtimeOverrides) as Array) {
- delete runtimeOverrides[key];
- }
- },
- __resetBackgroundJobs() {
- autoInitJobs.clear();
- autoEmbedJobs.clear();
- },
-};
-
-export async function executeInitWithProgress(
- params: Record,
- onProgress?: (progress: ProgressInfo) => void
-): Promise {
- const path = (params.path as string) || '.';
- const scope = resolveSearchScope(path);
- const dbPath = join(scope.workingDirectory, '.codexlens');
-
- // Notify progress start
- if (onProgress) {
- onProgress({ stage: 'init', message: 'Initializing v2 index...', percent: 0 } as ProgressInfo);
- }
-
- // Step 1: init empty index
- const initResult = await executeV2BridgeCommand('init', [], { dbPath });
- if (!initResult.success) return initResult;
-
- if (onProgress) {
- onProgress({ stage: 'sync', message: 'Syncing files...', percent: 10 } as ProgressInfo);
- }
-
- // Step 2: sync all files
- const syncResult = await executeV2BridgeCommand('sync', [
- '--root', scope.workingDirectory,
- ], { timeout: 1800000, dbPath });
-
- if (onProgress) {
- onProgress({ stage: 'complete', message: 'Index build complete', percent: 100 } as ProgressInfo);
- }
-
- return {
- success: syncResult.success,
- error: syncResult.error,
- message: syncResult.success
- ? `v2 index created and synced for ${scope.workingDirectory}`
- : undefined,
- metadata: { action: 'init', path: scope.workingDirectory },
- status: syncResult.status,
- };
-}
diff --git a/ccw/src/tools/smart-search.ts.backup b/ccw/src/tools/smart-search.ts.backup
deleted file mode 100644
index 173ce0fa..00000000
--- a/ccw/src/tools/smart-search.ts.backup
+++ /dev/null
@@ -1,1233 +0,0 @@
-/**
- * Smart Search Tool - Unified intelligent search with CodexLens integration
- *
- * Features:
- * - Intent classification with automatic mode selection
- * - CodexLens integration (init, hybrid, vector, semantic)
- * - Ripgrep fallback for exact mode
- * - Index status checking and warnings
- * - Multi-backend search routing with RRF ranking
- *
- * Actions:
- * - init: Initialize CodexLens index
- * - search: Intelligent search with auto mode selection
- * - status: Check index status
- */
-
-import { z } from 'zod';
-import type { ToolSchema, ToolResult } from '../types/tool.js';
-import { spawn, execSync } from 'child_process';
-import {
- ensureReady as ensureCodexLensReady,
- executeCodexLens,
-} from './codex-lens.js';
-import type { ProgressInfo } from './codex-lens.js';
-
-// Define Zod schema for validation
-const ParamsSchema = z.object({
- action: z.enum(['init', 'search', 'search_files', 'status']).default('search'),
- query: z.string().optional(),
- mode: z.enum(['auto', 'hybrid', 'exact', 'ripgrep', 'priority']).default('auto'),
- output_mode: z.enum(['full', 'files_only', 'count']).default('full'),
- path: z.string().optional(),
- paths: z.array(z.string()).default([]),
- contextLines: z.number().default(0),
- maxResults: z.number().default(10),
- includeHidden: z.boolean().default(false),
- languages: z.array(z.string()).optional(),
- limit: z.number().default(10),
- enrich: z.boolean().default(false),
-});
-
-type Params = z.infer;
-
-// Search mode constants
-const SEARCH_MODES = ['auto', 'hybrid', 'exact', 'ripgrep', 'priority'] as const;
-
-// Classification confidence threshold
-const CONFIDENCE_THRESHOLD = 0.7;
-
-interface Classification {
- mode: string;
- confidence: number;
- reasoning: string;
-}
-
-interface ExactMatch {
- file: string;
- line: number;
- column: number;
- content: string;
-}
-
-interface RelationshipInfo {
- type: string; // 'calls', 'imports', 'called_by', 'imported_by'
- direction: 'outgoing' | 'incoming';
- target?: string; // Target symbol name (for outgoing)
- source?: string; // Source symbol name (for incoming)
- file: string; // File path
- line?: number; // Line number
-}
-
-interface SemanticMatch {
- file: string;
- score: number;
- content: string;
- symbol: string | null;
- relationships?: RelationshipInfo[];
-}
-
-interface GraphMatch {
- file: string;
- symbols: unknown;
- relationships: unknown[];
-}
-
-interface SearchMetadata {
- mode?: string;
- backend?: string;
- count?: number;
- query?: string;
- classified_as?: string;
- confidence?: number;
- reasoning?: string;
- embeddings_coverage_percent?: number;
- warning?: string;
- note?: string;
- index_status?: 'indexed' | 'not_indexed' | 'partial';
- fallback_history?: string[];
- // Init action specific
- action?: string;
- path?: string;
- progress?: {
- stage: string;
- message: string;
- percent: number;
- filesProcessed?: number;
- totalFiles?: number;
- };
- progressHistory?: ProgressInfo[];
-}
-
-interface SearchResult {
- success: boolean;
- results?: ExactMatch[] | SemanticMatch[] | GraphMatch[] | unknown;
- output?: string;
- metadata?: SearchMetadata;
- error?: string;
- status?: unknown;
- message?: string;
-}
-
-interface IndexStatus {
- indexed: boolean;
- has_embeddings: boolean;
- file_count?: number;
- embeddings_coverage_percent?: number;
- warning?: string;
-}
-
-/**
- * Strip ANSI color codes from string (for JSON parsing)
- */
-function stripAnsi(str: string): string {
- return str.replace(/\x1b\[[0-9;]*m/g, '');
-}
-
-/**
- * Check if CodexLens index exists for current directory
- * @param path - Directory path to check
- * @returns Index status
- */
-async function checkIndexStatus(path: string = '.'): Promise {
- try {
- const result = await executeCodexLens(['status', '--json'], { cwd: path });
-
- if (!result.success) {
- return {
- indexed: false,
- has_embeddings: false,
- warning: 'No CodexLens index found. Run smart_search(action="init") to create index for better search results.',
- };
- }
-
- // Parse status output
- try {
- // Strip ANSI color codes from JSON output
- const cleanOutput = stripAnsi(result.output || '{}');
- const parsed = JSON.parse(cleanOutput);
- // Handle both direct and nested response formats (status returns {success, result: {...}})
- const status = parsed.result || parsed;
- const indexed = status.projects_count > 0 || status.total_files > 0;
-
- // Get embeddings coverage from comprehensive status
- const embeddingsData = status.embeddings || {};
- const embeddingsCoverage = embeddingsData.coverage_percent || 0;
- const has_embeddings = embeddingsCoverage >= 50; // Threshold: 50%
-
- let warning: string | undefined;
- if (!indexed) {
- warning = 'No CodexLens index found. Run smart_search(action="init") to create index for better search results.';
- } else if (embeddingsCoverage === 0) {
- warning = 'Index exists but no embeddings generated. Run: codexlens embeddings-generate --recursive';
- } else if (embeddingsCoverage < 50) {
- warning = `Embeddings coverage is ${embeddingsCoverage.toFixed(1)}% (below 50%). Hybrid search will use exact mode. Run: codexlens embeddings-generate --recursive`;
- }
-
- return {
- indexed,
- has_embeddings,
- file_count: status.total_files,
- embeddings_coverage_percent: embeddingsCoverage,
- warning,
- };
- } catch {
- return {
- indexed: false,
- has_embeddings: false,
- warning: 'Failed to parse index status',
- };
- }
- } catch {
- return {
- indexed: false,
- has_embeddings: false,
- warning: 'CodexLens not available',
- };
- }
-}
-
-/**
- * Detection heuristics for intent classification
- */
-
-/**
- * Detect literal string query (simple alphanumeric or quoted strings)
- */
-function detectLiteral(query: string): boolean {
- return /^[a-zA-Z0-9_-]+$/.test(query) || /^["'].*["']$/.test(query);
-}
-
-/**
- * Detect regex pattern (contains regex metacharacters)
- */
-function detectRegex(query: string): boolean {
- return /[.*+?^${}()|[\]\\]/.test(query);
-}
-
-/**
- * Detect natural language query (sentence structure, questions, multi-word phrases)
- */
-function detectNaturalLanguage(query: string): boolean {
- return query.split(/\s+/).length >= 3 || /\?$/.test(query);
-}
-
-/**
- * Detect file path query (path separators, file extensions)
- */
-function detectFilePath(query: string): boolean {
- return /[/\\]/.test(query) || /\.[a-z]{2,4}$/i.test(query);
-}
-
-/**
- * Detect relationship query (import, export, dependency keywords)
- */
-function detectRelationship(query: string): boolean {
- return /(import|export|uses?|depends?|calls?|extends?)\s/i.test(query);
-}
-
-/**
- * Classify query intent and recommend search mode
- * Simple mapping: hybrid (NL + index + embeddings) | exact (index or insufficient embeddings) | ripgrep (no index)
- * @param query - Search query string
- * @param hasIndex - Whether CodexLens index exists
- * @param hasSufficientEmbeddings - Whether embeddings coverage >= 50%
- * @returns Classification result
- */
-function classifyIntent(query: string, hasIndex: boolean = false, hasSufficientEmbeddings: boolean = false): Classification {
- // Detect query patterns
- const isNaturalLanguage = detectNaturalLanguage(query);
-
- // Simple decision tree
- let mode: string;
- let confidence: number;
-
- if (!hasIndex) {
- // No index: use ripgrep
- mode = 'ripgrep';
- confidence = 1.0;
- } else if (isNaturalLanguage && hasSufficientEmbeddings) {
- // Natural language + sufficient embeddings: use hybrid
- mode = 'hybrid';
- confidence = 0.9;
- } else {
- // Simple query OR insufficient embeddings: use exact
- mode = 'exact';
- confidence = 0.8;
- }
-
- // Build reasoning string
- const detectedPatterns: string[] = [];
- if (detectLiteral(query)) detectedPatterns.push('literal');
- if (detectRegex(query)) detectedPatterns.push('regex');
- if (detectNaturalLanguage(query)) detectedPatterns.push('natural language');
- if (detectFilePath(query)) detectedPatterns.push('file path');
- if (detectRelationship(query)) detectedPatterns.push('relationship');
-
- const reasoning = `Query classified as ${mode} (confidence: ${confidence.toFixed(2)}, detected: ${detectedPatterns.join(', ')}, index: ${hasIndex ? 'available' : 'not available'}, embeddings: ${hasSufficientEmbeddings ? 'sufficient' : 'insufficient'})`;
-
- return { mode, confidence, reasoning };
-}
-
-/**
- * Check if a tool is available in PATH
- * @param toolName - Tool executable name
- * @returns True if available
- */
-function checkToolAvailability(toolName: string): boolean {
- try {
- const isWindows = process.platform === 'win32';
- const command = isWindows ? 'where' : 'which';
- execSync(`${command} ${toolName}`, { stdio: 'ignore' });
- return true;
- } catch {
- return false;
- }
-}
-
-/**
- * Build ripgrep command arguments
- * @param params - Search parameters
- * @returns Command and arguments
- */
-function buildRipgrepCommand(params: {
- query: string;
- paths: string[];
- contextLines: number;
- maxResults: number;
- includeHidden: boolean;
-}): { command: string; args: string[] } {
- const { query, paths = ['.'], contextLines = 0, maxResults = 10, includeHidden = false } = params;
-
- const args = [
- '-n', // Show line numbers
- '--color=never', // Disable color output
- '--json', // Output in JSON format
- ];
-
- // Add context lines if specified
- if (contextLines > 0) {
- args.push('-C', contextLines.toString());
- }
-
- // Add max results limit
- if (maxResults > 0) {
- args.push('--max-count', maxResults.toString());
- }
-
- // Include hidden files if specified
- if (includeHidden) {
- args.push('--hidden');
- }
-
- // Use literal/fixed string matching for exact mode
- args.push('-F', query);
-
- // Add search paths
- args.push(...paths);
-
- return { command: 'rg', args };
-}
-
-/**
- * Action: init - Initialize CodexLens index (FTS only, no embeddings)
- * For semantic/vector search, use ccw view dashboard or codexlens CLI directly
- */
-async function executeInitAction(params: Params): Promise {
- const { path = '.', languages } = params;
-
- // Check CodexLens availability
- const readyStatus = await ensureCodexLensReady();
- if (!readyStatus.ready) {
- return {
- success: false,
- error: `CodexLens not available: ${readyStatus.error}. CodexLens will be auto-installed on first use.`,
- };
- }
-
- // Build args with --no-embeddings for FTS-only index (faster)
- const args = ['init', path, '--no-embeddings'];
- if (languages && languages.length > 0) {
- args.push('--languages', languages.join(','));
- }
-
- // Track progress updates
- const progressUpdates: ProgressInfo[] = [];
- let lastProgress: ProgressInfo | null = null;
-
- const result = await executeCodexLens(args, {
- cwd: path,
- timeout: 1800000, // 30 minutes for large codebases
- onProgress: (progress: ProgressInfo) => {
- progressUpdates.push(progress);
- lastProgress = progress;
- },
- });
-
- // Build metadata with progress info
- const metadata: SearchMetadata = {
- action: 'init',
- path,
- };
-
- if (lastProgress !== null) {
- const p = lastProgress as ProgressInfo;
- metadata.progress = {
- stage: p.stage,
- message: p.message,
- percent: p.percent,
- filesProcessed: p.filesProcessed,
- totalFiles: p.totalFiles,
- };
- }
-
- if (progressUpdates.length > 0) {
- metadata.progressHistory = progressUpdates.slice(-5); // Keep last 5 progress updates
- }
-
- const successMessage = result.success
- ? `FTS index created for ${path}. Note: For semantic/vector search, create vector index via "ccw view" dashboard or run "codexlens init ${path}" (without --no-embeddings).`
- : undefined;
-
- return {
- success: result.success,
- error: result.error,
- message: successMessage,
- metadata,
- };
-}
-
-/**
- * Action: status - Check CodexLens index status
- */
-async function executeStatusAction(params: Params): Promise {
- const { path = '.' } = params;
-
- const indexStatus = await checkIndexStatus(path);
-
- return {
- success: true,
- status: indexStatus,
- message: indexStatus.warning || `Index status: ${indexStatus.indexed ? 'indexed' : 'not indexed'}, embeddings: ${indexStatus.has_embeddings ? 'available' : 'not available'}`,
- };
-}
-
-/**
- * Mode: auto - Intent classification and mode selection
- * Routes to: hybrid (NL + index) | exact (index) | ripgrep (no index)
- */
-async function executeAutoMode(params: Params): Promise {
- const { query, path = '.' } = params;
-
- if (!query) {
- return {
- success: false,
- error: 'Query is required for search action',
- };
- }
-
- // Check index status
- const indexStatus = await checkIndexStatus(path);
-
- // Classify intent with index and embeddings awareness
- const classification = classifyIntent(
- query,
- indexStatus.indexed,
- indexStatus.has_embeddings // This now considers 50% threshold
- );
-
- // Route to appropriate mode based on classification
- let result: SearchResult;
-
- switch (classification.mode) {
- case 'hybrid':
- result = await executeHybridMode(params);
- break;
-
- case 'exact':
- result = await executeCodexLensExactMode(params);
- break;
-
- case 'ripgrep':
- result = await executeRipgrepMode(params);
- break;
-
- default:
- // Fallback to ripgrep
- result = await executeRipgrepMode(params);
- break;
- }
-
- // Add classification metadata
- if (result.metadata) {
- result.metadata.classified_as = classification.mode;
- result.metadata.confidence = classification.confidence;
- result.metadata.reasoning = classification.reasoning;
- result.metadata.embeddings_coverage_percent = indexStatus.embeddings_coverage_percent;
- result.metadata.index_status = indexStatus.indexed
- ? (indexStatus.has_embeddings ? 'indexed' : 'partial')
- : 'not_indexed';
-
- // Add warning if needed
- if (indexStatus.warning) {
- result.metadata.warning = indexStatus.warning;
- }
- }
-
- return result;
-}
-
-/**
- * Mode: ripgrep - Fast literal string matching using ripgrep
- * No index required, fallback to CodexLens if ripgrep unavailable
- */
-async function executeRipgrepMode(params: Params): Promise {
- const { query, paths = [], contextLines = 0, maxResults = 10, includeHidden = false, path = '.' } = params;
-
- if (!query) {
- return {
- success: false,
- error: 'Query is required for search',
- };
- }
-
- // Check if ripgrep is available
- const hasRipgrep = checkToolAvailability('rg');
-
- // If ripgrep not available, fall back to CodexLens exact mode
- if (!hasRipgrep) {
- const readyStatus = await ensureCodexLensReady();
- if (!readyStatus.ready) {
- return {
- success: false,
- error: 'Neither ripgrep nor CodexLens available. Install ripgrep (rg) or CodexLens for search functionality.',
- };
- }
-
- // Use CodexLens exact mode as fallback
- const args = ['search', query, '--limit', maxResults.toString(), '--mode', 'exact', '--json'];
- const result = await executeCodexLens(args, { cwd: path });
-
- if (!result.success) {
- return {
- success: false,
- error: result.error,
- metadata: {
- mode: 'ripgrep',
- backend: 'codexlens-fallback',
- count: 0,
- query,
- },
- };
- }
-
- // Parse results
- let results: SemanticMatch[] = [];
- try {
- const parsed = JSON.parse(stripAnsi(result.output || '{}'));
- const data = parsed.result?.results || parsed.results || parsed;
- results = (Array.isArray(data) ? data : []).map((item: any) => ({
- file: item.path || item.file,
- score: item.score || 0,
- content: item.excerpt || item.content || '',
- symbol: item.symbol || null,
- }));
- } catch {
- // Keep empty results
- }
-
- return {
- success: true,
- results,
- metadata: {
- mode: 'ripgrep',
- backend: 'codexlens-fallback',
- count: results.length,
- query,
- note: 'Using CodexLens exact mode (ripgrep not available)',
- },
- };
- }
-
- // Use ripgrep
- const { command, args } = buildRipgrepCommand({
- query,
- paths: paths.length > 0 ? paths : [path],
- contextLines,
- maxResults,
- includeHidden,
- });
-
- return new Promise((resolve) => {
- const child = spawn(command, args, {
- cwd: path || process.cwd(),
- stdio: ['ignore', 'pipe', 'pipe'],
- });
-
- let stdout = '';
- let stderr = '';
-
- child.stdout.on('data', (data) => {
- stdout += data.toString();
- });
-
- child.stderr.on('data', (data) => {
- stderr += data.toString();
- });
-
- child.on('close', (code) => {
- const results: ExactMatch[] = [];
-
- if (code === 0 || (code === 1 && stdout.trim())) {
- const lines = stdout.split('\n').filter((line) => line.trim());
-
- for (const line of lines) {
- try {
- const item = JSON.parse(line);
-
- if (item.type === 'match') {
- const match: ExactMatch = {
- file: item.data.path.text,
- line: item.data.line_number,
- column:
- item.data.submatches && item.data.submatches[0]
- ? item.data.submatches[0].start + 1
- : 1,
- content: item.data.lines.text.trim(),
- };
- results.push(match);
- }
- } catch {
- continue;
- }
- }
-
- resolve({
- success: true,
- results,
- metadata: {
- mode: 'ripgrep',
- backend: 'ripgrep',
- count: results.length,
- query,
- },
- });
- } else {
- resolve({
- success: false,
- error: `ripgrep execution failed with code ${code}: ${stderr}`,
- results: [],
- });
- }
- });
-
- child.on('error', (error) => {
- resolve({
- success: false,
- error: `Failed to spawn ripgrep: ${error.message}`,
- results: [],
- });
- });
- });
-}
-
-/**
- * Mode: exact - CodexLens exact/FTS search
- * Requires index
- */
-async function executeCodexLensExactMode(params: Params): Promise {
- const { query, path = '.', maxResults = 10, enrich = false } = params;
-
- if (!query) {
- return {
- success: false,
- error: 'Query is required for search',
- };
- }
-
- // Check CodexLens availability
- const readyStatus = await ensureCodexLensReady();
- if (!readyStatus.ready) {
- return {
- success: false,
- error: `CodexLens not available: ${readyStatus.error}`,
- };
- }
-
- // Check index status
- const indexStatus = await checkIndexStatus(path);
-
- const args = ['search', query, '--limit', maxResults.toString(), '--mode', 'exact', '--json'];
- if (enrich) {
- args.push('--enrich');
- }
- const result = await executeCodexLens(args, { cwd: path });
-
- if (!result.success) {
- return {
- success: false,
- error: result.error,
- metadata: {
- mode: 'exact',
- backend: 'codexlens',
- count: 0,
- query,
- warning: indexStatus.warning,
- },
- };
- }
-
- // Parse results
- let results: SemanticMatch[] = [];
- try {
- const parsed = JSON.parse(stripAnsi(result.output || '{}'));
- const data = parsed.result?.results || parsed.results || parsed;
- results = (Array.isArray(data) ? data : []).map((item: any) => ({
- file: item.path || item.file,
- score: item.score || 0,
- content: item.excerpt || item.content || '',
- symbol: item.symbol || null,
- }));
- } catch {
- // Keep empty results
- }
-
- return {
- success: true,
- results,
- metadata: {
- mode: 'exact',
- backend: 'codexlens',
- count: results.length,
- query,
- warning: indexStatus.warning,
- },
- };
-}
-
-/**
- * Mode: hybrid - Best quality search with RRF fusion
- * Uses CodexLens hybrid mode (exact + fuzzy + vector)
- * Requires index with embeddings
- */
-async function executeHybridMode(params: Params): Promise {
- const { query, path = '.', maxResults = 10, enrich = false } = params;
-
- if (!query) {
- return {
- success: false,
- error: 'Query is required for search',
- };
- }
-
- // Check CodexLens availability
- const readyStatus = await ensureCodexLensReady();
- if (!readyStatus.ready) {
- return {
- success: false,
- error: `CodexLens not available: ${readyStatus.error}`,
- };
- }
-
- // Check index status
- const indexStatus = await checkIndexStatus(path);
-
- const args = ['search', query, '--limit', maxResults.toString(), '--mode', 'hybrid', '--json'];
- if (enrich) {
- args.push('--enrich');
- }
- const result = await executeCodexLens(args, { cwd: path });
-
- if (!result.success) {
- return {
- success: false,
- error: result.error,
- metadata: {
- mode: 'hybrid',
- backend: 'codexlens',
- count: 0,
- query,
- warning: indexStatus.warning,
- },
- };
- }
-
- // Parse results
- let results: SemanticMatch[] = [];
- try {
- const parsed = JSON.parse(stripAnsi(result.output || '{}'));
- const data = parsed.result?.results || parsed.results || parsed;
- results = (Array.isArray(data) ? data : []).map((item: any) => ({
- file: item.path || item.file,
- score: item.score || 0,
- content: item.excerpt || item.content || '',
- symbol: item.symbol || null,
- }));
- } catch {
- return {
- success: true,
- results: [],
- output: result.output,
- metadata: {
- mode: 'hybrid',
- backend: 'codexlens',
- count: 0,
- query,
- warning: indexStatus.warning || 'Failed to parse JSON output',
- },
- };
- }
-
- return {
- success: true,
- results,
- metadata: {
- mode: 'hybrid',
- backend: 'codexlens',
- count: results.length,
- query,
- note: 'Hybrid mode uses RRF fusion (exact + fuzzy + vector) for best results',
- warning: indexStatus.warning,
- },
- };
-}
-
-/**
- * TypeScript implementation of Reciprocal Rank Fusion
- * Reference: codex-lens/src/codexlens/search/ranking.py
- * Formula: score(d) = Σ weight_source / (k + rank_source(d))
- */
-function applyRRFFusion(
- resultsMap: Map,
- weights: Record,
- limit: number,
- k: number = 60,
-): any[] {
- const pathScores = new Map();
-
- resultsMap.forEach((results, source) => {
- const weight = weights[source] || 0;
- if (weight === 0 || !results) return;
-
- results.forEach((result, rank) => {
- const path = result.file || result.path;
- if (!path) return;
-
- const rrfContribution = weight / (k + rank + 1);
-
- if (!pathScores.has(path)) {
- pathScores.set(path, { score: 0, result, sources: [] });
- }
- const entry = pathScores.get(path)!;
- entry.score += rrfContribution;
- if (!entry.sources.includes(source)) {
- entry.sources.push(source);
- }
- });
- });
-
- // Sort by fusion score descending
- return Array.from(pathScores.values())
- .sort((a, b) => b.score - a.score)
- .slice(0, limit)
- .map(item => ({
- ...item.result,
- fusion_score: item.score,
- matched_backends: item.sources,
- }));
-}
-
-/**
- * Promise wrapper with timeout support
- * @param promise - The promise to wrap
- * @param ms - Timeout in milliseconds
- * @param modeName - Name of the mode for error message
- * @returns A new promise that rejects on timeout
- */
-function withTimeout(promise: Promise, ms: number, modeName: string): Promise {
- return new Promise((resolve, reject) => {
- const timer = setTimeout(() => {
- reject(new Error(`'${modeName}' search timed out after ${ms}ms`));
- }, ms);
-
- promise
- .then(resolve)
- .catch(reject)
- .finally(() => clearTimeout(timer));
- });
-}
-
-/**
- * Mode: priority - Fallback search strategy: hybrid -> exact -> ripgrep
- * Returns results from the first backend that succeeds and provides results.
- * More efficient than parallel mode - stops as soon as valid results are found.
- */
-async function executePriorityFallbackMode(params: Params): Promise {
- const { query, path = '.' } = params;
- const fallbackHistory: string[] = [];
-
- if (!query) {
- return { success: false, error: 'Query is required for search' };
- }
-
- // Check index status first
- const indexStatus = await checkIndexStatus(path);
-
- // 1. Try Hybrid search (highest priority) - 90s timeout for large indexes
- if (indexStatus.indexed && indexStatus.has_embeddings) {
- try {
- const hybridResult = await withTimeout(executeHybridMode(params), 90000, 'hybrid');
- if (hybridResult.success && hybridResult.results && (hybridResult.results as any[]).length > 0) {
- fallbackHistory.push('hybrid: success');
- return {
- ...hybridResult,
- metadata: {
- ...hybridResult.metadata,
- mode: 'priority',
- note: 'Result from hybrid search (semantic + vector).',
- fallback_history: fallbackHistory,
- },
- };
- }
- fallbackHistory.push('hybrid: no results');
- } catch (error) {
- fallbackHistory.push(`hybrid: ${(error as Error).message}`);
- }
- } else {
- fallbackHistory.push(`hybrid: skipped (${!indexStatus.indexed ? 'no index' : 'no embeddings'})`);
- }
-
- // 2. Fallback to Exact search - 10s timeout
- if (indexStatus.indexed) {
- try {
- const exactResult = await withTimeout(executeCodexLensExactMode(params), 10000, 'exact');
- if (exactResult.success && exactResult.results && (exactResult.results as any[]).length > 0) {
- fallbackHistory.push('exact: success');
- return {
- ...exactResult,
- metadata: {
- ...exactResult.metadata,
- mode: 'priority',
- note: 'Result from exact/FTS search (fallback from hybrid).',
- fallback_history: fallbackHistory,
- },
- };
- }
- fallbackHistory.push('exact: no results');
- } catch (error) {
- fallbackHistory.push(`exact: ${(error as Error).message}`);
- }
- } else {
- fallbackHistory.push('exact: skipped (no index)');
- }
-
- // 3. Final fallback to Ripgrep - 5s timeout
- try {
- const ripgrepResult = await withTimeout(executeRipgrepMode(params), 5000, 'ripgrep');
- fallbackHistory.push(ripgrepResult.success ? 'ripgrep: success' : 'ripgrep: failed');
- return {
- ...ripgrepResult,
- metadata: {
- ...ripgrepResult.metadata,
- mode: 'priority',
- note: 'Result from ripgrep search (final fallback).',
- fallback_history: fallbackHistory,
- },
- };
- } catch (error) {
- fallbackHistory.push(`ripgrep: ${(error as Error).message}`);
- }
-
- // All modes failed
- return {
- success: false,
- error: 'All search backends in priority mode failed or returned no results.',
- metadata: {
- mode: 'priority',
- query,
- fallback_history: fallbackHistory,
- } as any,
- };
-}
-
-// Tool schema for MCP
-export const schema: ToolSchema = {
- name: 'smart_search',
- description: `Intelligent code search with five modes. Use "auto" mode (default) for intelligent routing.
-
-**Usage:**
- smart_search(query="authentication logic") # auto mode - routes to best backend
- smart_search(query="MyClass", mode="exact") # exact mode - precise FTS matching
- smart_search(query="auth", mode="ripgrep") # ripgrep mode - fast literal search (no index)
- smart_search(query="how to auth", mode="hybrid") # hybrid mode - semantic search (requires index)
-
-**Index Management:**
- smart_search(action="init") # Create FTS index for current directory
- smart_search(action="status") # Check index and embedding status
-
-**Graph Enrichment:**
- smart_search(query="func", enrich=true) # Enrich results with code relationships (calls, imports, called_by, imported_by)
-
-**Modes:** auto (intelligent routing), hybrid (semantic, needs index), exact (FTS), ripgrep (fast, no index), priority (fallback: hybrid→exact→ripgrep)`,
- inputSchema: {
- type: 'object',
- properties: {
- action: {
- type: 'string',
- enum: ['init', 'search', 'search_files', 'status'],
- description: 'Action to perform: init (create FTS index, no embeddings), search (default), search_files (paths only), status (check index)',
- default: 'search',
- },
- query: {
- type: 'string',
- description: 'Search query (required for search/search_files actions)',
- },
- mode: {
- type: 'string',
- enum: SEARCH_MODES,
- description: 'Search mode: auto (default), hybrid (best quality), exact (CodexLens FTS), ripgrep (fast, no index), priority (fallback: hybrid->exact->ripgrep)',
- default: 'auto',
- },
- output_mode: {
- type: 'string',
- enum: ['full', 'files_only', 'count'],
- description: 'Output format: full (default), files_only (paths only), count (per-file counts)',
- default: 'full',
- },
- path: {
- type: 'string',
- description: 'Directory path for init/search actions (default: current directory)',
- },
- paths: {
- type: 'array',
- description: 'Multiple paths to search within (for search action)',
- items: {
- type: 'string',
- },
- default: [],
- },
- contextLines: {
- type: 'number',
- description: 'Number of context lines around matches (exact mode only)',
- default: 0,
- },
- maxResults: {
- type: 'number',
- description: 'Maximum number of results (default: 10)',
- default: 10,
- },
- limit: {
- type: 'number',
- description: 'Alias for maxResults',
- default: 10,
- },
- includeHidden: {
- type: 'boolean',
- description: 'Include hidden files/directories',
- default: false,
- },
- languages: {
- type: 'array',
- items: { type: 'string' },
- description: 'Languages to index (for init action). Example: ["javascript", "typescript"]',
- },
- enrich: {
- type: 'boolean',
- description: 'Enrich search results with code graph relationships (calls, imports, called_by, imported_by).',
- default: false,
- },
- },
- required: [],
- },
-};
-
-/**
- * Transform results based on output_mode
- */
-function transformOutput(
- results: ExactMatch[] | SemanticMatch[] | GraphMatch[] | unknown[],
- outputMode: 'full' | 'files_only' | 'count'
-): unknown {
- if (!Array.isArray(results)) {
- return results;
- }
-
- switch (outputMode) {
- case 'files_only': {
- // Extract unique file paths
- const files = [...new Set(results.map((r: any) => r.file))].filter(Boolean);
- return { files, count: files.length };
- }
- case 'count': {
- // Count matches per file
- const counts: Record = {};
- for (const r of results) {
- const file = (r as any).file;
- if (file) {
- counts[file] = (counts[file] || 0) + 1;
- }
- }
- return {
- files: Object.entries(counts).map(([file, count]) => ({ file, count })),
- total: results.length,
- };
- }
- case 'full':
- default:
- return results;
- }
-}
-
-// Handler function
-export async function handler(params: Record): Promise> {
- const parsed = ParamsSchema.safeParse(params);
- if (!parsed.success) {
- return { success: false, error: `Invalid params: ${parsed.error.message}` };
- }
-
- const { action, mode, output_mode } = parsed.data;
-
- // Sync limit and maxResults - use the larger of the two if both provided
- // This ensures user-provided values take precedence over defaults
- const effectiveLimit = Math.max(parsed.data.limit || 10, parsed.data.maxResults || 10);
- parsed.data.maxResults = effectiveLimit;
- parsed.data.limit = effectiveLimit;
-
- try {
- let result: SearchResult;
-
- // Handle actions
- switch (action) {
- case 'init':
- result = await executeInitAction(parsed.data);
- break;
-
- case 'status':
- result = await executeStatusAction(parsed.data);
- break;
-
- case 'search_files':
- // For search_files, use search mode but force files_only output
- parsed.data.output_mode = 'files_only';
- // Fall through to search
-
- case 'search':
- default:
- // Handle search modes: auto | hybrid | exact | ripgrep | priority
- switch (mode) {
- case 'auto':
- result = await executeAutoMode(parsed.data);
- break;
- case 'hybrid':
- result = await executeHybridMode(parsed.data);
- break;
- case 'exact':
- result = await executeCodexLensExactMode(parsed.data);
- break;
- case 'ripgrep':
- result = await executeRipgrepMode(parsed.data);
- break;
- case 'priority':
- result = await executePriorityFallbackMode(parsed.data);
- break;
- default:
- throw new Error(`Unsupported mode: ${mode}. Use: auto, hybrid, exact, ripgrep, or priority`);
- }
- break;
- }
-
- // Transform output based on output_mode (for search actions only)
- if (action === 'search' || action === 'search_files') {
- if (result.success && result.results && output_mode !== 'full') {
- result.results = transformOutput(result.results as any[], output_mode);
- }
- }
-
- return result.success ? { success: true, result } : { success: false, error: result.error };
- } catch (error) {
- return { success: false, error: (error as Error).message };
- }
-}
-
-/**
- * Execute init action with external progress callback
- * Used by MCP server for streaming progress
- */
-export async function executeInitWithProgress(
- params: Record,
- onProgress?: (progress: ProgressInfo) => void
-): Promise {
- const path = (params.path as string) || '.';
- const languages = params.languages as string[] | undefined;
-
- // Check CodexLens availability
- const readyStatus = await ensureCodexLensReady();
- if (!readyStatus.ready) {
- return {
- success: false,
- error: `CodexLens not available: ${readyStatus.error}. CodexLens will be auto-installed on first use.`,
- };
- }
-
- const args = ['init', path];
- if (languages && languages.length > 0) {
- args.push('--languages', languages.join(','));
- }
-
- // Track progress updates
- const progressUpdates: ProgressInfo[] = [];
- let lastProgress: ProgressInfo | null = null;
-
- const result = await executeCodexLens(args, {
- cwd: path,
- timeout: 1800000, // 30 minutes for large codebases
- onProgress: (progress: ProgressInfo) => {
- progressUpdates.push(progress);
- lastProgress = progress;
- // Call external progress callback if provided
- if (onProgress) {
- onProgress(progress);
- }
- },
- });
-
- // Build metadata with progress info
- const metadata: SearchMetadata = {
- action: 'init',
- path,
- };
-
- if (lastProgress !== null) {
- const p = lastProgress as ProgressInfo;
- metadata.progress = {
- stage: p.stage,
- message: p.message,
- percent: p.percent,
- filesProcessed: p.filesProcessed,
- totalFiles: p.totalFiles,
- };
- }
-
- if (progressUpdates.length > 0) {
- metadata.progressHistory = progressUpdates.slice(-5);
- }
-
- return {
- success: result.success,
- error: result.error,
- message: result.success
- ? `CodexLens index created successfully for ${path}`
- : undefined,
- metadata,
- };
-}
diff --git a/ccw/tests/codex-lens-bootstrap-fallback.test.js b/ccw/tests/codex-lens-bootstrap-fallback.test.js
deleted file mode 100644
index cd9c1593..00000000
--- a/ccw/tests/codex-lens-bootstrap-fallback.test.js
+++ /dev/null
@@ -1,93 +0,0 @@
-/**
- * Regression test: CodexLens bootstrap falls back to pip when UV bootstrap fails.
- *
- * We simulate a "broken UV" by pointing CCW_UV_PATH to the current Node executable.
- * `node --version` exits 0 so isUvAvailable() returns true, but `node venv ...` fails,
- * forcing the bootstrap code to try the pip path.
- *
- * This test runs bootstrapVenv in a child process to avoid mutating process-wide
- * environment variables that could affect other tests.
- */
-
-import { describe, it } from 'node:test';
-import assert from 'node:assert/strict';
-import { spawn } from 'node:child_process';
-import { mkdtempSync, rmSync } from 'node:fs';
-import { dirname, join } from 'node:path';
-import { tmpdir } from 'node:os';
-import { fileURLToPath } from 'node:url';
-
-const __filename = fileURLToPath(import.meta.url);
-const __dirname = dirname(__filename);
-
-// repo root: /ccw/tests ->
-const REPO_ROOT = join(__dirname, '..', '..');
-
-function runNodeEvalModule(script, env) {
- return new Promise((resolve, reject) => {
- const child = spawn(process.execPath, ['--input-type=module', '-e', script], {
- cwd: REPO_ROOT,
- env,
- stdio: ['ignore', 'pipe', 'pipe'],
- windowsHide: true,
- });
-
- let stdout = '';
- let stderr = '';
-
- child.stdout.on('data', (d) => { stdout += d.toString(); });
- child.stderr.on('data', (d) => { stderr += d.toString(); });
-
- child.on('error', (err) => reject(err));
- child.on('close', (code) => resolve({ code, stdout, stderr }));
- });
-}
-
-describe('CodexLens bootstrap fallback', () => {
- it('falls back to pip when UV bootstrap fails', { timeout: 10 * 60 * 1000 }, async () => {
- const dataDir = mkdtempSync(join(tmpdir(), 'codexlens-bootstrap-fallback-'));
-
- try {
- const script = `
-import { bootstrapVenv } from './ccw/dist/tools/codex-lens.js';
-
-(async () => {
- const result = await bootstrapVenv();
- console.log('@@RESULT@@' + JSON.stringify(result));
-})().catch((e) => {
- console.error(e?.stack || String(e));
- process.exit(1);
-});
-`;
-
- const env = {
- ...process.env,
- // Isolate test venv + dependencies from user/global CodexLens state.
- CODEXLENS_DATA_DIR: dataDir,
- // Make isUvAvailable() return true, but createVenv() fail.
- CCW_UV_PATH: process.execPath,
- };
-
- const { code, stdout, stderr } = await runNodeEvalModule(script, env);
- assert.equal(code, 0, `bootstrapVenv child process failed:\nSTDOUT:\n${stdout}\nSTDERR:\n${stderr}`);
-
- const marker = '@@RESULT@@';
- const idx = stdout.lastIndexOf(marker);
- assert.ok(idx !== -1, `Missing result marker in stdout:\n${stdout}`);
-
- const jsonText = stdout.slice(idx + marker.length).trim();
- const parsed = JSON.parse(jsonText);
-
- assert.equal(parsed?.success, true, `Expected success=true, got:\n${jsonText}`);
- assert.ok(Array.isArray(parsed.warnings), 'Expected warnings array on pip fallback result');
- assert.ok(parsed.warnings.some((w) => String(w).includes('UV bootstrap failed')), `Expected UV failure warning, got: ${JSON.stringify(parsed.warnings)}`);
- } finally {
- try {
- rmSync(dataDir, { recursive: true, force: true });
- } catch {
- // Best effort cleanup; leave artifacts only if Windows locks prevent removal.
- }
- }
- });
-});
-
diff --git a/ccw/tests/codex-lens-cli-compat.test.js b/ccw/tests/codex-lens-cli-compat.test.js
deleted file mode 100644
index 51573a1e..00000000
--- a/ccw/tests/codex-lens-cli-compat.test.js
+++ /dev/null
@@ -1,139 +0,0 @@
-import { after, describe, it } from 'node:test';
-import assert from 'node:assert/strict';
-import { mkdtempSync, rmSync, writeFileSync } from 'node:fs';
-import { join } from 'node:path';
-import { tmpdir } from 'node:os';
-
-const tempDirs = [];
-
-after(() => {
- for (const dir of tempDirs) {
- rmSync(dir, { recursive: true, force: true });
- }
-});
-
-describe('CodexLens CLI compatibility retries', () => {
- it('builds hidden Python spawn options for CLI invocations', async () => {
- const moduleUrl = new URL(`../dist/tools/codex-lens.js?spawn-opts=${Date.now()}`, import.meta.url).href;
- const { __testables } = await import(moduleUrl);
-
- const options = __testables.buildCodexLensSpawnOptions(tmpdir(), 12345);
-
- assert.equal(options.cwd, tmpdir());
- assert.equal(options.shell, false);
- assert.equal(options.timeout, 12345);
- assert.equal(options.windowsHide, true);
- assert.equal(options.env.PYTHONIOENCODING, 'utf-8');
- });
-
- it('probes Python version without a shell-backed console window', async () => {
- const moduleUrl = new URL(`../dist/tools/codex-lens.js?python-probe=${Date.now()}`, import.meta.url).href;
- const { __testables } = await import(moduleUrl);
- const probeCalls = [];
-
- const version = __testables.probePythonVersion({ command: 'python', args: [], display: 'python' }, (command, args, options) => {
- probeCalls.push({ command, args, options });
- return { status: 0, stdout: '', stderr: 'Python 3.11.9\n' };
- });
-
- assert.equal(version, 'Python 3.11.9');
- assert.equal(probeCalls.length, 1);
- assert.equal(probeCalls[0].command, 'python');
- assert.deepEqual(probeCalls[0].args, ['--version']);
- assert.equal(probeCalls[0].options.shell, false);
- assert.equal(probeCalls[0].options.windowsHide, true);
- assert.equal(probeCalls[0].options.env.PYTHONIOENCODING, 'utf-8');
- });
-
- it('initializes a tiny index even when CLI emits compatibility conflicts first', async () => {
- const moduleUrl = new URL(`../dist/tools/codex-lens.js?compat=${Date.now()}`, import.meta.url).href;
- const { checkVenvStatus, executeCodexLens } = await import(moduleUrl);
-
- const ready = await checkVenvStatus(true);
- if (!ready.ready) {
- console.log('Skipping: CodexLens not ready');
- return;
- }
-
- const projectDir = mkdtempSync(join(tmpdir(), 'codexlens-init-'));
- tempDirs.push(projectDir);
- writeFileSync(join(projectDir, 'sample.ts'), 'export const sample = 1;\n');
-
- const result = await executeCodexLens(['index', 'init', projectDir, '--force'], { timeout: 600000 });
-
- assert.equal(result.success, true, result.error ?? 'Expected init to succeed');
- assert.ok((result.output ?? '').length > 0 || (result.warning ?? '').length > 0, 'Expected init output or compatibility warning');
- });
-
- it('synthesizes a machine-readable fallback when JSON search output is empty', async () => {
- const moduleUrl = new URL(`../dist/tools/codex-lens.js?compat-empty=${Date.now()}`, import.meta.url).href;
- const { __testables } = await import(moduleUrl);
-
- const normalized = __testables.normalizeSearchCommandResult(
- { success: true },
- { query: 'missing symbol', cwd: tmpdir(), limit: 5, filesOnly: false },
- );
-
- assert.equal(normalized.success, true);
- assert.match(normalized.warning ?? '', /empty stdout/i);
- assert.deepEqual(normalized.results, {
- success: true,
- result: {
- query: 'missing symbol',
- count: 0,
- results: [],
- },
- });
- });
-
- it('returns structured semantic search results for a local embedded workspace', async () => {
- const codexLensUrl = new URL(`../dist/tools/codex-lens.js?compat-search=${Date.now()}`, import.meta.url).href;
- const smartSearchUrl = new URL(`../dist/tools/smart-search.js?compat-search=${Date.now()}`, import.meta.url).href;
- const codexLensModule = await import(codexLensUrl);
- const smartSearchModule = await import(smartSearchUrl);
-
- const ready = await codexLensModule.checkVenvStatus(true);
- if (!ready.ready) {
- console.log('Skipping: CodexLens not ready');
- return;
- }
-
- const semantic = await codexLensModule.checkSemanticStatus();
- if (!semantic.available) {
- console.log('Skipping: semantic dependencies not ready');
- return;
- }
-
- const projectDir = mkdtempSync(join(tmpdir(), 'codexlens-search-'));
- tempDirs.push(projectDir);
- writeFileSync(
- join(projectDir, 'sample.ts'),
- 'export function greet(name) { return `hello ${name}`; }\nexport const sum = (a, b) => a + b;\n',
- );
-
- const init = await smartSearchModule.handler({ action: 'init', path: projectDir });
- assert.equal(init.success, true, init.error ?? 'Expected smart-search init to succeed');
-
- const embed = await smartSearchModule.handler({
- action: 'embed',
- path: projectDir,
- embeddingBackend: 'local',
- force: true,
- });
- assert.equal(embed.success, true, embed.error ?? 'Expected smart-search embed to succeed');
-
- const result = await codexLensModule.codexLensTool.execute({
- action: 'search',
- path: projectDir,
- query: 'greet function',
- mode: 'semantic',
- format: 'json',
- });
-
- assert.equal(result.success, true, result.error ?? 'Expected semantic search compatibility fallback to succeed');
- const payload = result.results?.result ?? result.results;
- assert.ok(Array.isArray(payload?.results), 'Expected structured search results payload');
- assert.ok(payload.results.length > 0, 'Expected at least one structured semantic search result');
- assert.doesNotMatch(result.error ?? '', /unexpected extra arguments/i);
- });
-});
diff --git a/ccw/tests/codex-lens-integration.test.js b/ccw/tests/codex-lens-integration.test.js
deleted file mode 100644
index ebcec928..00000000
--- a/ccw/tests/codex-lens-integration.test.js
+++ /dev/null
@@ -1,485 +0,0 @@
-/**
- * Integration Tests for CodexLens with actual file operations
- *
- * These tests create temporary files and directories to test
- * the full indexing and search workflow.
- */
-
-import { describe, it, before, after } from 'node:test';
-import assert from 'node:assert';
-import { dirname, join } from 'path';
-import { fileURLToPath } from 'url';
-import { existsSync, mkdirSync, rmSync, writeFileSync, readdirSync } from 'fs';
-import { tmpdir } from 'os';
-
-const __filename = fileURLToPath(import.meta.url);
-const __dirname = dirname(__filename);
-
-// Import the codex-lens module
-const codexLensPath = new URL('../dist/tools/codex-lens.js', import.meta.url).href;
-
-describe('CodexLens Full Integration Tests', async () => {
- let codexLensModule;
- let testDir;
- let isReady = false;
-
- before(async () => {
- try {
- codexLensModule = await import(codexLensPath);
-
- // Check if CodexLens is installed
- const status = await codexLensModule.checkVenvStatus();
- isReady = status.ready;
-
- if (!isReady) {
- console.log('CodexLens not installed - some integration tests will be skipped');
- return;
- }
-
- // Create temporary test directory
- testDir = join(tmpdir(), `codexlens-test-${Date.now()}`);
- mkdirSync(testDir, { recursive: true });
-
- // Create test Python files
- writeFileSync(join(testDir, 'main.py'), `
-"""Main module for testing."""
-
-def hello_world():
- """Say hello to the world."""
- print("Hello, World!")
- return "hello"
-
-def calculate_sum(a, b):
- """Calculate sum of two numbers."""
- return a + b
-
-class Calculator:
- """A simple calculator class."""
-
- def __init__(self):
- self.result = 0
-
- def add(self, value):
- """Add value to result."""
- self.result += value
- return self.result
-
- def subtract(self, value):
- """Subtract value from result."""
- self.result -= value
- return self.result
-`);
-
- writeFileSync(join(testDir, 'utils.py'), `
-"""Utility functions."""
-
-def format_string(text):
- """Format a string."""
- return text.strip().lower()
-
-def validate_email(email):
- """Validate email format."""
- return "@" in email and "." in email
-
-async def fetch_data(url):
- """Fetch data from URL (async)."""
- pass
-`);
-
- // Create test JavaScript file
- writeFileSync(join(testDir, 'app.js'), `
-/**
- * Main application module
- */
-
-function initApp() {
- console.log('App initialized');
-}
-
-const processData = async (data) => {
- return data.map(item => item.value);
-};
-
-class Application {
- constructor(name) {
- this.name = name;
- }
-
- start() {
- console.log(\`Starting \${this.name}\`);
- }
-}
-
-export { initApp, processData, Application };
-`);
-
- console.log(`Test directory created at: ${testDir}`);
- } catch (err) {
- console.log('Setup failed:', err.message);
- }
- });
-
- after(async () => {
- // Cleanup test directory
- if (testDir && existsSync(testDir)) {
- try {
- rmSync(testDir, { recursive: true, force: true });
- console.log('Test directory cleaned up');
- } catch (err) {
- console.log('Cleanup failed:', err.message);
- }
- }
- });
-
- describe('Index Initialization', () => {
- it('should initialize index for test directory', async () => {
- if (!isReady || !testDir) {
- console.log('Skipping: CodexLens not ready or test dir not created');
- return;
- }
-
- const result = await codexLensModule.codexLensTool.execute({
- action: 'init',
- path: testDir
- });
-
- assert.ok(typeof result === 'object', 'Result should be an object');
- assert.ok('success' in result, 'Result should have success property');
-
- if (result.success) {
- // CodexLens stores indexes in the global data directory (e.g. ~/.codexlens/indexes)
- // rather than creating a per-project ".codexlens" folder.
- assert.ok(true);
- }
- });
-
- it('should create index.db file', async () => {
- if (!isReady || !testDir) {
- console.log('Skipping: CodexLens not ready or test dir not created');
- return;
- }
-
- const indexDb = join(testDir, '.codexlens', 'index.db');
-
- // May need to wait for previous init to complete
- // Index.db should exist after successful init
- if (existsSync(join(testDir, '.codexlens'))) {
- // Check files in .codexlens directory
- const files = readdirSync(join(testDir, '.codexlens'));
- console.log('.codexlens contents:', files);
- }
- });
- });
-
- describe('Status Query', () => {
- it('should return index status for test directory', async () => {
- if (!isReady || !testDir) {
- console.log('Skipping: CodexLens not ready or test dir not created');
- return;
- }
-
- const result = await codexLensModule.codexLensTool.execute({
- action: 'status',
- path: testDir
- });
-
- assert.ok(typeof result === 'object', 'Result should be an object');
- console.log('Index status:', JSON.stringify(result, null, 2));
-
- if (result.success) {
- // Navigate nested structure: result.status.result or result.result
- const statusData = result.status?.result || result.result || result.status || result;
- const hasIndexInfo = (
- 'files' in statusData ||
- 'db_path' in statusData ||
- result.output ||
- (result.status && 'success' in result.status)
- );
- assert.ok(hasIndexInfo, 'Status should contain index information or raw output');
- }
- });
- });
-
- describe('Symbol Extraction', () => {
- it('should extract symbols from Python file', async () => {
- if (!isReady || !testDir) {
- console.log('Skipping: CodexLens not ready or test dir not created');
- return;
- }
-
- const result = await codexLensModule.codexLensTool.execute({
- action: 'symbol',
- file: join(testDir, 'main.py')
- });
-
- assert.ok(typeof result === 'object', 'Result should be an object');
-
- if (result.success) {
- console.log('Symbols found:', result.symbols || result.output);
-
- // Parse output if needed
- let symbols = result.symbols;
- if (!symbols && result.output) {
- try {
- const parsed = JSON.parse(result.output);
- symbols = parsed.result?.file?.symbols || parsed.symbols;
- } catch {
- // Keep raw output
- }
- }
-
- if (symbols && Array.isArray(symbols)) {
- // Check for expected symbols
- const symbolNames = symbols.map(s => s.name);
- assert.ok(symbolNames.includes('hello_world') || symbolNames.some(n => n.includes('hello')),
- 'Should find hello_world function');
- assert.ok(symbolNames.includes('Calculator') || symbolNames.some(n => n.includes('Calc')),
- 'Should find Calculator class');
- }
- }
- });
-
- it('should extract symbols from JavaScript file', async () => {
- if (!isReady || !testDir) {
- console.log('Skipping: CodexLens not ready or test dir not created');
- return;
- }
-
- const result = await codexLensModule.codexLensTool.execute({
- action: 'symbol',
- file: join(testDir, 'app.js')
- });
-
- assert.ok(typeof result === 'object', 'Result should be an object');
-
- if (result.success) {
- console.log('JS Symbols found:', result.symbols || result.output);
- }
- });
- });
-
- describe('Full-Text Search', () => {
- it('should search for text in indexed files', async () => {
- if (!isReady || !testDir) {
- console.log('Skipping: CodexLens not ready or test dir not created');
- return;
- }
-
- // First ensure index is initialized
- await codexLensModule.codexLensTool.execute({
- action: 'init',
- path: testDir
- });
-
- const result = await codexLensModule.codexLensTool.execute({
- action: 'search',
- query: 'hello',
- path: testDir,
- limit: 10
- });
-
- assert.ok(typeof result === 'object', 'Result should be an object');
-
- if (result.success) {
- console.log('Search results:', result.results || result.output);
- }
- });
-
- it('should search for class names', async () => {
- if (!isReady || !testDir) {
- console.log('Skipping: CodexLens not ready or test dir not created');
- return;
- }
-
- const result = await codexLensModule.codexLensTool.execute({
- action: 'search',
- query: 'Calculator',
- path: testDir,
- limit: 10
- });
-
- assert.ok(typeof result === 'object', 'Result should be an object');
-
- if (result.success) {
- console.log('Class search results:', result.results || result.output);
- }
- });
- });
-
- describe('Incremental Update', () => {
- it('should update index when file changes', async () => {
- if (!isReady || !testDir) {
- console.log('Skipping: CodexLens not ready or test dir not created');
- return;
- }
-
- // Create a new file
- const newFile = join(testDir, 'new_module.py');
- writeFileSync(newFile, `
-def new_function():
- """A newly added function."""
- return "new"
-`);
-
- const result = await codexLensModule.codexLensTool.execute({
- action: 'update',
- files: [newFile],
- path: testDir
- });
-
- assert.ok(typeof result === 'object', 'Result should be an object');
-
- if (result.success) {
- console.log('Update result:', result.updateResult || result.output);
- }
- });
-
- it('should handle deleted files in update', async () => {
- if (!isReady || !testDir) {
- console.log('Skipping: CodexLens not ready or test dir not created');
- return;
- }
-
- // Reference a non-existent file
- const deletedFile = join(testDir, 'deleted_file.py');
-
- const result = await codexLensModule.codexLensTool.execute({
- action: 'update',
- files: [deletedFile],
- path: testDir
- });
-
- assert.ok(typeof result === 'object', 'Result should be an object');
- // Should handle gracefully without crashing
- });
- });
-});
-
-describe('CodexLens CLI Commands via executeCodexLens', async () => {
- let codexLensModule;
- let isReady = false;
-
- before(async () => {
- try {
- codexLensModule = await import(codexLensPath);
- const status = await codexLensModule.checkVenvStatus();
- isReady = status.ready;
- } catch (err) {
- console.log('Setup failed:', err.message);
- }
- });
-
- it('should execute --version command', async () => {
- if (!isReady) {
- console.log('Skipping: CodexLens not ready');
- return;
- }
-
- // Note: codexlens may not have --version, use --help instead
- const result = await codexLensModule.executeCodexLens(['--help']);
- assert.ok(typeof result === 'object');
-
- if (result.success) {
- assert.ok(result.output, 'Should have output');
- }
- });
-
- it('should execute status --json command', async () => {
- if (!isReady) {
- console.log('Skipping: CodexLens not ready');
- return;
- }
-
- const result = await codexLensModule.executeCodexLens(['status', '--json'], {
- cwd: __dirname
- });
-
- assert.ok(typeof result === 'object');
-
- if (result.success && result.output) {
- // Try to parse JSON output
- try {
- const parsed = JSON.parse(result.output);
- assert.ok(typeof parsed === 'object', 'Output should be valid JSON');
- } catch {
- // Output might not be JSON if index doesn't exist
- console.log('Status output (non-JSON):', result.output);
- }
- }
- });
-
- it('should handle inspect command', async () => {
- if (!isReady) {
- console.log('Skipping: CodexLens not ready');
- return;
- }
-
- // Use this test file as input
- const testFile = join(__dirname, 'codex-lens.test.js');
- if (!existsSync(testFile)) {
- console.log('Skipping: Test file not found');
- return;
- }
-
- const result = await codexLensModule.executeCodexLens([
- 'inspect', testFile, '--json'
- ]);
-
- assert.ok(typeof result === 'object');
-
- if (result.success) {
- console.log('Inspect result received');
- }
- });
-});
-
-describe('CodexLens Workspace Detection', async () => {
- let codexLensModule;
- let isReady = false;
-
- before(async () => {
- try {
- codexLensModule = await import(codexLensPath);
- const status = await codexLensModule.checkVenvStatus();
- isReady = status.ready;
- } catch (err) {
- console.log('Setup failed:', err.message);
- }
- });
-
- it('should detect existing workspace', async () => {
- if (!isReady) {
- console.log('Skipping: CodexLens not ready');
- return;
- }
-
- // Try to get status from project root where .codexlens might exist
- const projectRoot = join(__dirname, '..', '..');
-
- const result = await codexLensModule.codexLensTool.execute({
- action: 'status',
- path: projectRoot
- });
-
- assert.ok(typeof result === 'object');
- console.log('Project root status:', result.success ? 'Found' : 'Not found');
- });
-
- it('should use global database when workspace not found', async () => {
- if (!isReady) {
- console.log('Skipping: CodexLens not ready');
- return;
- }
-
- // Use a path that definitely won't have .codexlens
- const tempPath = tmpdir();
-
- const result = await codexLensModule.codexLensTool.execute({
- action: 'status',
- path: tempPath
- });
-
- assert.ok(typeof result === 'object');
- // Should fall back to global database
- });
-});
diff --git a/ccw/tests/codex-lens.test.js b/ccw/tests/codex-lens.test.js
deleted file mode 100644
index c2d46ca0..00000000
--- a/ccw/tests/codex-lens.test.js
+++ /dev/null
@@ -1,521 +0,0 @@
-/**
- * Tests for CodexLens API endpoints and tool integration
- *
- * Tests the following endpoints:
- * - GET /api/codexlens/status
- * - POST /api/codexlens/bootstrap
- * - POST /api/codexlens/init
- * - GET /api/codexlens/semantic/status
- * - POST /api/codexlens/semantic/install
- *
- * Also tests the codex-lens.js tool functions directly
- */
-
-import { describe, it, before, after, mock } from 'node:test';
-import assert from 'node:assert';
-import { createServer } from 'http';
-import { join, dirname } from 'path';
-import { fileURLToPath } from 'url';
-import { existsSync, mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'fs';
-import { homedir, tmpdir } from 'os';
-
-const __filename = fileURLToPath(import.meta.url);
-const __dirname = dirname(__filename);
-
-// Import the codex-lens module - use file:// URL format for Windows compatibility
-const codexLensPath = new URL('../dist/tools/codex-lens.js', import.meta.url).href;
-
-describe('CodexLens Tool Functions', async () => {
- let codexLensModule;
-
- before(async () => {
- try {
- codexLensModule = await import(codexLensPath);
- } catch (err) {
- console.log('Note: codex-lens module import skipped (module may not be available):', err.message);
- }
- });
-
- describe('checkVenvStatus', () => {
- it('should return an object with ready property', async () => {
- if (!codexLensModule) {
- console.log('Skipping: codex-lens module not available');
- return;
- }
-
- const status = await codexLensModule.checkVenvStatus();
- assert.ok(typeof status === 'object', 'Status should be an object');
- assert.ok('ready' in status, 'Status should have ready property');
- assert.ok(typeof status.ready === 'boolean', 'ready should be boolean');
-
- if (status.ready) {
- assert.ok('version' in status, 'Ready status should include version');
- } else {
- assert.ok('error' in status, 'Not ready status should include error');
- }
- });
- });
-
- describe('checkSemanticStatus', () => {
- it('should return semantic availability status', async () => {
- if (!codexLensModule) {
- console.log('Skipping: codex-lens module not available');
- return;
- }
-
- const status = await codexLensModule.checkSemanticStatus();
- assert.ok(typeof status === 'object', 'Status should be an object');
- assert.ok('available' in status, 'Status should have available property');
- assert.ok(typeof status.available === 'boolean', 'available should be boolean');
-
- if (status.available) {
- assert.ok('backend' in status, 'Available status should include backend');
- }
- });
- });
-
- describe('executeCodexLens', () => {
- it('should execute codexlens command and return result', async () => {
- if (!codexLensModule) {
- console.log('Skipping: codex-lens module not available');
- return;
- }
-
- // First check if CodexLens is ready
- const status = await codexLensModule.checkVenvStatus();
- if (!status.ready) {
- console.log('Skipping: CodexLens not installed');
- return;
- }
-
- // Execute a simple status command
- const result = await codexLensModule.executeCodexLens(['--help']);
- assert.ok(typeof result === 'object', 'Result should be an object');
- assert.ok('success' in result, 'Result should have success property');
-
- // --help should succeed
- if (result.success) {
- assert.ok('output' in result, 'Success result should have output');
- assert.ok(result.output.includes('CodexLens') || result.output.includes('codexlens'),
- 'Help output should mention CodexLens');
- }
- });
-
- it('should handle timeout gracefully', async () => {
- if (!codexLensModule) {
- console.log('Skipping: codex-lens module not available');
- return;
- }
-
- const status = await codexLensModule.checkVenvStatus();
- if (!status.ready) {
- console.log('Skipping: CodexLens not installed');
- return;
- }
-
- // Use a very short timeout to trigger timeout behavior
- // Note: This test may not always trigger timeout depending on system speed
- const result = await codexLensModule.executeCodexLens(['status', '--json'], { timeout: 1 });
- assert.ok(typeof result === 'object', 'Result should be an object');
- assert.ok('success' in result, 'Result should have success property');
- });
- });
-
- describe('codexLensTool.execute', () => {
- it('should handle check action', async () => {
- if (!codexLensModule) {
- console.log('Skipping: codex-lens module not available');
- return;
- }
-
- const result = await codexLensModule.codexLensTool.execute({ action: 'check' });
- assert.ok(typeof result === 'object', 'Result should be an object');
- assert.ok('ready' in result, 'Check result should have ready property');
- });
-
- it('should return error for unknown action', async () => {
- if (!codexLensModule) {
- console.log('Skipping: codex-lens module not available');
- return;
- }
-
- const result = await codexLensModule.codexLensTool.execute({ action: 'unknown_action' });
- assert.strictEqual(result.success, false, 'Should return success: false');
- assert.ok(result.error, 'Should have error message');
- });
-
- it('should handle status action', async () => {
- if (!codexLensModule) {
- console.log('Skipping: codex-lens module not available');
- return;
- }
-
- const checkResult = await codexLensModule.checkVenvStatus();
- if (!checkResult.ready) {
- console.log('Skipping: CodexLens not installed');
- return;
- }
-
- const result = await codexLensModule.codexLensTool.execute({
- action: 'status',
- path: __dirname
- });
- assert.ok(typeof result === 'object', 'Result should be an object');
- assert.ok('success' in result, 'Result should have success property');
- });
- });
-});
-
-describe('CodexLens API Endpoints (Integration)', async () => {
- // These tests require a running server
- // They test the actual HTTP endpoints
-
- const TEST_PORT = 19999;
- let serverModule;
- let server;
- let baseUrl;
-
- before(async () => {
- // Note: We cannot easily start the ccw server in tests
- // So we test the endpoint handlers directly or mock the server
- baseUrl = `http://localhost:${TEST_PORT}`;
-
- // Try to import server module for handler testing
- try {
- // serverModule = await import(join(__dirname, '..', 'src', 'core', 'server.js'));
- console.log('Note: Server integration tests require manual server start');
- } catch (err) {
- console.log('Server module not available for direct testing');
- }
- });
-
- describe('GET /api/codexlens/status', () => {
- it('should return JSON response with ready status', async () => {
- // This test requires a running server
- // Skip if server is not running
- try {
- const response = await fetch(`${baseUrl}/api/codexlens/status`);
-
- if (response.ok) {
- const data = await response.json();
- assert.ok(typeof data === 'object', 'Response should be JSON object');
- assert.ok('ready' in data, 'Response should have ready property');
- }
- } catch (err) {
- if (err.cause?.code === 'ECONNREFUSED') {
- console.log('Skipping: Server not running on port', TEST_PORT);
- } else {
- throw err;
- }
- }
- });
- });
-
- describe('POST /api/codexlens/init', () => {
- it('should initialize index for given path', async () => {
- try {
- const response = await fetch(`${baseUrl}/api/codexlens/init`, {
- method: 'POST',
- headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify({ path: __dirname })
- });
-
- if (response.ok) {
- const data = await response.json();
- assert.ok(typeof data === 'object', 'Response should be JSON object');
- assert.ok('success' in data, 'Response should have success property');
- }
- } catch (err) {
- if (err.cause?.code === 'ECONNREFUSED') {
- console.log('Skipping: Server not running on port', TEST_PORT);
- } else {
- throw err;
- }
- }
- });
- });
-
- describe('GET /api/codexlens/semantic/status', () => {
- it('should return semantic search status', async () => {
- try {
- const response = await fetch(`${baseUrl}/api/codexlens/semantic/status`);
-
- if (response.ok) {
- const data = await response.json();
- assert.ok(typeof data === 'object', 'Response should be JSON object');
- assert.ok('available' in data, 'Response should have available property');
- }
- } catch (err) {
- if (err.cause?.code === 'ECONNREFUSED') {
- console.log('Skipping: Server not running on port', TEST_PORT);
- } else {
- throw err;
- }
- }
- });
- });
-});
-
-describe('CodexLens Tool Definition', async () => {
- let codexLensModule;
-
- before(async () => {
- try {
- codexLensModule = await import(codexLensPath);
- } catch (err) {
- console.log('Note: codex-lens module not available');
- }
- });
-
- it('should have correct tool name', () => {
- if (!codexLensModule) {
- console.log('Skipping: codex-lens module not available');
- return;
- }
-
- assert.strictEqual(codexLensModule.codexLensTool.name, 'codex_lens');
- });
-
- it('should have description', () => {
- if (!codexLensModule) {
- console.log('Skipping: codex-lens module not available');
- return;
- }
-
- assert.ok(codexLensModule.codexLensTool.description, 'Should have description');
- assert.ok(codexLensModule.codexLensTool.description.includes('CodexLens'),
- 'Description should mention CodexLens');
- });
-
- it('should have parameters schema', () => {
- if (!codexLensModule) {
- console.log('Skipping: codex-lens module not available');
- return;
- }
-
- const { parameters } = codexLensModule.codexLensTool;
- assert.ok(parameters, 'Should have parameters');
- assert.strictEqual(parameters.type, 'object');
- assert.ok(parameters.properties, 'Should have properties');
- assert.ok(parameters.properties.action, 'Should have action property');
- assert.deepStrictEqual(parameters.required, ['action'], 'action should be required');
- });
-
- it('should support all documented actions', () => {
- if (!codexLensModule) {
- console.log('Skipping: codex-lens module not available');
- return;
- }
-
- const { parameters } = codexLensModule.codexLensTool;
- const supportedActions = parameters.properties.action.enum;
-
- const expectedActions = ['init', 'search', 'symbol', 'status', 'update', 'bootstrap', 'check'];
-
- for (const action of expectedActions) {
- assert.ok(supportedActions.includes(action), `Should support ${action} action`);
- }
- });
-
- it('should have execute function', () => {
- if (!codexLensModule) {
- console.log('Skipping: codex-lens module not available');
- return;
- }
-
- assert.ok(typeof codexLensModule.codexLensTool.execute === 'function',
- 'Should have execute function');
- });
-});
-
-describe('CodexLens Path Configuration', () => {
- it('should use correct venv path based on platform', async () => {
- const codexLensDataDir = join(homedir(), '.codexlens');
- const codexLensVenv = join(codexLensDataDir, 'venv');
-
- const expectedPython = process.platform === 'win32'
- ? join(codexLensVenv, 'Scripts', 'python.exe')
- : join(codexLensVenv, 'bin', 'python');
-
- // Just verify the path construction logic is correct
- assert.ok(expectedPython.includes('codexlens'), 'Python path should include codexlens');
- assert.ok(expectedPython.includes('venv'), 'Python path should include venv');
-
- if (process.platform === 'win32') {
- assert.ok(expectedPython.includes('Scripts'), 'Windows should use Scripts directory');
- assert.ok(expectedPython.endsWith('.exe'), 'Windows should have .exe extension');
- } else {
- assert.ok(expectedPython.includes('bin'), 'Unix should use bin directory');
- }
- });
-});
-
-describe('CodexLens Error Handling', async () => {
- let codexLensModule;
- const testTempDirs = []; // Track temp directories for cleanup
-
- after(() => {
- // Clean up temp directories created during tests
- for (const dir of testTempDirs) {
- try {
- rmSync(dir, { recursive: true, force: true });
- } catch (e) {
- // Ignore cleanup errors
- }
- }
-
- // Clean up any indexes created for temp directories
- const indexDir = join(homedir(), '.codexlens', 'indexes');
- const tempIndexPattern = join(indexDir, 'C', 'Users', '*', 'AppData', 'Local', 'Temp', 'ccw-codexlens-update-*');
- try {
- const glob = require('glob');
- const matches = glob.sync(tempIndexPattern.replace(/\\/g, '/'));
- for (const match of matches) {
- rmSync(match, { recursive: true, force: true });
- }
- } catch (e) {
- // glob may not be available, try direct cleanup
- try {
- const tempPath = join(indexDir, 'C', 'Users');
- if (existsSync(tempPath)) {
- console.log('Note: Temp indexes may need manual cleanup at:', indexDir);
- }
- } catch (e2) {
- // Ignore
- }
- }
- });
-
- before(async () => {
- try {
- codexLensModule = await import(codexLensPath);
- } catch (err) {
- console.log('Note: codex-lens module not available');
- }
- });
-
- it('should handle missing file parameter for symbol action', async () => {
- if (!codexLensModule) {
- console.log('Skipping: codex-lens module not available');
- return;
- }
-
- const checkResult = await codexLensModule.checkVenvStatus();
- if (!checkResult.ready) {
- console.log('Skipping: CodexLens not installed');
- return;
- }
-
- const result = await codexLensModule.codexLensTool.execute({
- action: 'symbol'
- // file is missing
- });
-
- // Should either error or return success: false
- assert.ok(typeof result === 'object', 'Result should be an object');
- });
-
- it('should support update action without files parameter', async () => {
- if (!codexLensModule) {
- console.log('Skipping: codex-lens module not available');
- return;
- }
-
- const checkResult = await codexLensModule.checkVenvStatus();
- if (!checkResult.ready) {
- console.log('Skipping: CodexLens not installed');
- return;
- }
-
- const updateRoot = mkdtempSync(join(tmpdir(), 'ccw-codexlens-update-'));
- testTempDirs.push(updateRoot); // Track for cleanup
- writeFileSync(join(updateRoot, 'main.py'), 'def hello():\n return 1\n', 'utf8');
-
- const result = await codexLensModule.codexLensTool.execute({
- action: 'update',
- path: updateRoot,
- });
-
- assert.ok(typeof result === 'object', 'Result should be an object');
- assert.ok('success' in result, 'Result should have success property');
- });
-
- it('should ignore extraneous files parameter for update action', async () => {
- if (!codexLensModule) {
- console.log('Skipping: codex-lens module not available');
- return;
- }
-
- const checkResult = await codexLensModule.checkVenvStatus();
- if (!checkResult.ready) {
- console.log('Skipping: CodexLens not installed');
- return;
- }
-
- const updateRoot = mkdtempSync(join(tmpdir(), 'ccw-codexlens-update-'));
- testTempDirs.push(updateRoot); // Track for cleanup
- writeFileSync(join(updateRoot, 'main.py'), 'def hello():\n return 1\n', 'utf8');
-
- const result = await codexLensModule.codexLensTool.execute({
- action: 'update',
- path: updateRoot,
- files: []
- });
-
- assert.ok(typeof result === 'object', 'Result should be an object');
- assert.ok('success' in result, 'Result should have success property');
- });
-});
-
-describe('CodexLens Search Parameters', async () => {
- let codexLensModule;
-
- before(async () => {
- try {
- codexLensModule = await import(codexLensPath);
- } catch (err) {
- console.log('Note: codex-lens module not available');
- }
- });
-
- it('should support text and semantic search modes', () => {
- if (!codexLensModule) {
- console.log('Skipping: codex-lens module not available');
- return;
- }
-
- const { parameters } = codexLensModule.codexLensTool;
- const modeEnum = parameters.properties.mode?.enum;
-
- assert.ok(modeEnum, 'Should have mode enum');
- assert.ok(modeEnum.includes('text'), 'Should support text mode');
- assert.ok(modeEnum.includes('semantic'), 'Should support semantic mode');
- });
-
- it('should have limit parameter with default', () => {
- if (!codexLensModule) {
- console.log('Skipping: codex-lens module not available');
- return;
- }
-
- const { parameters } = codexLensModule.codexLensTool;
- const limitProp = parameters.properties.limit;
-
- assert.ok(limitProp, 'Should have limit property');
- assert.strictEqual(limitProp.type, 'number', 'limit should be number');
- assert.strictEqual(limitProp.default, 20, 'Default limit should be 20');
- });
-
- it('should support output format options', () => {
- if (!codexLensModule) {
- console.log('Skipping: codex-lens module not available');
- return;
- }
-
- const { parameters } = codexLensModule.codexLensTool;
- const formatEnum = parameters.properties.format?.enum;
-
- assert.ok(formatEnum, 'Should have format enum');
- assert.ok(formatEnum.includes('json'), 'Should support json format');
- });
-});
diff --git a/ccw/tests/e2e/mcp-tools.e2e.test.ts b/ccw/tests/e2e/mcp-tools.e2e.test.ts
index 30011972..fecb9d2f 100644
--- a/ccw/tests/e2e/mcp-tools.e2e.test.ts
+++ b/ccw/tests/e2e/mcp-tools.e2e.test.ts
@@ -161,54 +161,16 @@ describe('E2E: MCP Tool Execution', async () => {
// Verify essential tools are present
const toolNames = response.result.tools.map((t: any) => t.name);
- assert.ok(toolNames.includes('smart_search'));
assert.ok(toolNames.includes('edit_file'));
assert.ok(toolNames.includes('write_file'));
assert.ok(toolNames.includes('session_manager'));
// Verify tool schema structure
- const smartSearch = response.result.tools.find((t: any) => t.name === 'smart_search');
- assert.ok(smartSearch.description);
- assert.ok(smartSearch.inputSchema);
- assert.equal(smartSearch.inputSchema.type, 'object');
- assert.ok(smartSearch.inputSchema.properties);
- });
-
- it('executes smart_search tool with valid parameters', async () => {
- const response = await mcpClient.call('tools/call', {
- name: 'smart_search',
- arguments: {
- action: 'status',
- path: process.cwd()
- }
- });
-
- assert.equal(response.jsonrpc, '2.0');
- assert.ok(response.result);
- assert.ok(Array.isArray(response.result.content));
- assert.equal(response.result.content[0].type, 'text');
- assert.ok(response.result.content[0].text.length > 0);
- });
-
- it('validates required parameters and returns error for missing params', async () => {
- const response = await mcpClient.call('tools/call', {
- name: 'smart_search',
- arguments: {
- action: 'search'
- // Missing required 'query' parameter
- }
- });
-
- assert.equal(response.jsonrpc, '2.0');
- assert.ok(response.result);
- assert.equal(response.result.isError, true);
- // Error message should mention query is required
- assert.ok(
- response.result.content[0].text.includes('Query is required') ||
- response.result.content[0].text.includes('query') ||
- response.result.content[0].text.includes('required'),
- `Expected error about missing query, got: ${response.result.content[0].text}`
- );
+ const editFile = response.result.tools.find((t: any) => t.name === 'edit_file');
+ assert.ok(editFile.description);
+ assert.ok(editFile.inputSchema);
+ assert.equal(editFile.inputSchema.type, 'object');
+ assert.ok(editFile.inputSchema.properties);
});
it('returns error for non-existent tool', async () => {
@@ -374,10 +336,6 @@ describe('E2E: MCP Tool Execution', async () => {
it('handles concurrent tool calls without interference', async () => {
const calls = await Promise.all([
mcpClient.call('tools/list', {}),
- mcpClient.call('tools/call', {
- name: 'smart_search',
- arguments: { action: 'status', path: process.cwd() }
- }),
mcpClient.call('tools/call', {
name: 'session_manager',
arguments: { operation: 'list', location: 'active' }
@@ -392,8 +350,7 @@ describe('E2E: MCP Tool Execution', async () => {
// Verify different results
assert.ok(Array.isArray(calls[0].result.tools)); // tools/list
- assert.ok(calls[1].result.content); // smart_search
- assert.ok(calls[2].result.content); // session_manager
+ assert.ok(calls[1].result.content); // session_manager
});
it('validates path parameters for security (path traversal prevention)', async () => {
@@ -415,24 +372,6 @@ describe('E2E: MCP Tool Execution', async () => {
assert.ok(hasError);
});
- it('supports progress reporting for long-running operations', async () => {
- // smart_search init action supports progress reporting
- const response = await mcpClient.call('tools/call', {
- name: 'smart_search',
- arguments: {
- action: 'status',
- path: process.cwd()
- }
- });
-
- assert.equal(response.jsonrpc, '2.0');
- assert.ok(response.result);
- assert.ok(response.result.content);
-
- // For status action, should return immediately
- // Progress is logged to stderr but doesn't affect result structure
- });
-
it('handles tool execution timeout gracefully', async () => {
// Create a tool call that should complete quickly
// If it times out, the client will throw
@@ -495,14 +434,10 @@ describe('E2E: MCP Tool Execution', async () => {
it('preserves parameter types in tool execution', async () => {
const response = await mcpClient.call('tools/call', {
- name: 'smart_search',
+ name: 'session_manager',
arguments: {
- action: 'find_files',
- pattern: '*.json',
- path: process.cwd(),
- limit: 10, // Number
- offset: 0, // Number
- caseSensitive: true // Boolean
+ operation: 'list',
+ location: 'active'
}
});
diff --git a/ccw/tests/litellm-client.test.ts b/ccw/tests/litellm-client.test.ts
deleted file mode 100644
index 5400ebd3..00000000
--- a/ccw/tests/litellm-client.test.ts
+++ /dev/null
@@ -1,403 +0,0 @@
-/**
- * Unit tests for LiteLLM client bridge (ccw/dist/tools/litellm-client.js).
- *
- * Notes:
- * - Uses Node's built-in test runner (node:test) (no Jest in this repo).
- * - Stubs `child_process.spawn` to avoid depending on local Python/ccw_litellm installation.
- */
-
-import { after, beforeEach, describe, it } from 'node:test';
-import assert from 'node:assert/strict';
-import { EventEmitter } from 'node:events';
-import { createRequire } from 'node:module';
-
-const require = createRequire(import.meta.url);
-// eslint-disable-next-line @typescript-eslint/no-var-requires
-const childProcess = require('child_process') as typeof import('child_process');
-
-type SpawnBehavior =
- | { type: 'close'; code?: number; stdout?: string; stderr?: string }
- | { type: 'error'; error: Error }
- | { type: 'hang' };
-
-class FakeChildProcess extends EventEmitter {
- stdout = new EventEmitter();
- stderr = new EventEmitter();
- killCalls: string[] = [];
-
- kill(signal?: NodeJS.Signals | number | string): boolean {
- this.killCalls.push(signal === undefined ? 'undefined' : String(signal));
- return true;
- }
-}
-
-type SpawnCall = {
- command: string;
- args: string[];
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
- options: any;
- proc: FakeChildProcess;
-};
-
-const spawnCalls: SpawnCall[] = [];
-const spawnPlan: SpawnBehavior[] = [];
-
-const originalSpawn = childProcess.spawn;
-
-childProcess.spawn = ((command: string, args: string[] = [], options: any = {}) => {
- const normalizedArgs = (args ?? []).map(String);
- const shouldIntercept = normalizedArgs[0] === '-m' && normalizedArgs[1] === 'ccw_litellm.cli';
- if (!shouldIntercept) {
- return originalSpawn(command as any, args as any, options as any);
- }
-
- const proc = new FakeChildProcess();
- spawnCalls.push({ command: String(command), args: normalizedArgs, options, proc });
-
- const next = spawnPlan.shift() ?? { type: 'close', code: 0, stdout: '' };
-
- queueMicrotask(() => {
- if (next.type === 'error') {
- proc.emit('error', next.error);
- return;
- }
-
- if (next.type === 'close') {
- if (next.stdout !== undefined) proc.stdout.emit('data', next.stdout);
- if (next.stderr !== undefined) proc.stderr.emit('data', next.stderr);
- proc.emit('close', next.code ?? 0);
- return;
- }
-
- // hang: intentionally do nothing
- });
-
- return proc as any;
-}) as any;
-
-function getClientModuleUrl(): URL {
- const url = new URL('../dist/tools/litellm-client.js', import.meta.url);
- url.searchParams.set('t', `${Date.now()}-${Math.random()}`);
- return url;
-}
-
-// eslint-disable-next-line @typescript-eslint/no-explicit-any
-let mod: any;
-
-beforeEach(async () => {
- spawnCalls.length = 0;
- spawnPlan.length = 0;
- mod = await import(getClientModuleUrl().href);
-});
-
-after(() => {
- childProcess.spawn = originalSpawn;
-});
-
-describe('LiteLLM client bridge', () => {
- it('uses default pythonPath and version check arguments', async () => {
- spawnPlan.push({ type: 'close', code: 0, stdout: '1.2.3\n' });
-
- const client = new mod.LiteLLMClient();
- const available = await client.isAvailable();
-
- assert.equal(available, true);
- assert.equal(spawnCalls.length, 1);
- assert.equal(spawnCalls[0].command, mod.getCodexLensVenvPython());
- assert.deepEqual(spawnCalls[0].args, ['-m', 'ccw_litellm.cli', 'version']);
- });
-
- it('uses custom pythonPath when provided', async () => {
- spawnPlan.push({ type: 'close', code: 0, stdout: 'ok' });
-
- const client = new mod.LiteLLMClient({ pythonPath: 'python3', timeout: 10 });
- await client.chat('hello', 'default');
-
- assert.equal(spawnCalls.length, 1);
- assert.equal(spawnCalls[0].command, 'python3');
- });
-
- it('spawns LiteLLM Python with hidden window options', async () => {
- spawnPlan.push({ type: 'close', code: 0, stdout: '1.2.3\n' });
-
- const client = new mod.LiteLLMClient({ timeout: 10 });
- const available = await client.isAvailable();
-
- assert.equal(available, true);
- assert.equal(spawnCalls.length, 1);
- assert.equal(spawnCalls[0].options.shell, false);
- assert.equal(spawnCalls[0].options.windowsHide, true);
- assert.equal(spawnCalls[0].options.env.PYTHONIOENCODING, 'utf-8');
- });
-
- it('isAvailable returns false on spawn error', async () => {
- spawnPlan.push({ type: 'error', error: new Error('ENOENT') });
-
- const client = new mod.LiteLLMClient({ timeout: 10 });
- const available = await client.isAvailable();
-
- assert.equal(available, false);
- });
-
- it('getStatus returns version on success', async () => {
- spawnPlan.push({ type: 'close', code: 0, stdout: 'v9.9.9\n' });
-
- const client = new mod.LiteLLMClient({ timeout: 10 });
- const status = await client.getStatus();
-
- assert.equal(status.available, true);
- assert.equal(status.version, 'v9.9.9');
- });
-
- it('getStatus returns error details on non-zero exit', async () => {
- spawnPlan.push({ type: 'close', code: 1, stderr: 'HTTP 500 Internal Server Error' });
-
- const client = new mod.LiteLLMClient({ timeout: 10 });
- const status = await client.getStatus();
-
- assert.equal(status.available, false);
- assert.ok(String(status.error).includes('HTTP 500'));
- });
-
- it('getConfig parses JSON output', async () => {
- spawnPlan.push({ type: 'close', code: 0, stdout: JSON.stringify({ ok: true }) });
-
- const client = new mod.LiteLLMClient({ timeout: 10 });
- const cfg = await client.getConfig();
-
- assert.deepEqual(cfg, { ok: true });
- assert.equal(spawnCalls.length, 1);
- assert.deepEqual(spawnCalls[0].args, ['-m', 'ccw_litellm.cli', 'config']);
- });
-
- it('getConfig throws on malformed JSON', async () => {
- spawnPlan.push({ type: 'close', code: 0, stdout: '{not-json' });
-
- const client = new mod.LiteLLMClient({ timeout: 10 });
- await assert.rejects(() => client.getConfig());
- });
-
- it('embed rejects empty texts input and does not spawn', async () => {
- const client = new mod.LiteLLMClient({ timeout: 10 });
- await assert.rejects(() => client.embed([]), /texts array cannot be empty/);
- assert.equal(spawnCalls.length, 0);
- });
-
- it('embed rejects null/undefined input', async () => {
- const client = new mod.LiteLLMClient({ timeout: 10 });
- await assert.rejects(() => client.embed(null as any), /texts array cannot be empty/);
- await assert.rejects(() => client.embed(undefined as any), /texts array cannot be empty/);
- assert.equal(spawnCalls.length, 0);
- });
-
- it('embed returns vectors with derived dimensions', async () => {
- spawnPlan.push({ type: 'close', code: 0, stdout: JSON.stringify([[1, 2, 3], [4, 5, 6]]) });
-
- const client = new mod.LiteLLMClient({ timeout: 10 });
- const res = await client.embed(['a', 'b'], 'embed-model');
-
- assert.equal(res.model, 'embed-model');
- assert.equal(res.dimensions, 3);
- assert.deepEqual(res.vectors, [
- [1, 2, 3],
- [4, 5, 6],
- ]);
-
- assert.equal(spawnCalls.length, 1);
- assert.deepEqual(spawnCalls[0].args, [
- '-m',
- 'ccw_litellm.cli',
- 'embed',
- '--model',
- 'embed-model',
- '--output',
- 'json',
- 'a',
- 'b',
- ]);
- });
-
- it('embed throws on malformed JSON output', async () => {
- spawnPlan.push({ type: 'close', code: 0, stdout: 'not-json' });
-
- const client = new mod.LiteLLMClient({ timeout: 10 });
- await assert.rejects(() => client.embed(['a'], 'embed-model'));
- });
-
- it('chat rejects empty message and does not spawn', async () => {
- const client = new mod.LiteLLMClient({ timeout: 10 });
- await assert.rejects(() => client.chat(''), /message cannot be empty/);
- assert.equal(spawnCalls.length, 0);
- });
-
- it('chat returns trimmed stdout on success', async () => {
- spawnPlan.push({ type: 'close', code: 0, stdout: 'Hello\n' });
-
- const client = new mod.LiteLLMClient({ timeout: 10 });
- const out = await client.chat('hi', 'chat-model');
-
- assert.equal(out, 'Hello');
- assert.equal(spawnCalls.length, 1);
- assert.deepEqual(spawnCalls[0].args, ['-m', 'ccw_litellm.cli', 'chat', '--model', 'chat-model', 'hi']);
- });
-
- it('chat propagates auth errors (401)', async () => {
- spawnPlan.push({ type: 'close', code: 1, stderr: 'HTTP 401 Unauthorized' });
-
- const client = new mod.LiteLLMClient({ timeout: 10 });
- await assert.rejects(() => client.chat('hi', 'chat-model'), /401/);
- });
-
- it('chat propagates auth errors (403)', async () => {
- spawnPlan.push({ type: 'close', code: 1, stderr: 'HTTP 403 Forbidden' });
-
- const client = new mod.LiteLLMClient({ timeout: 10 });
- await assert.rejects(() => client.chat('hi', 'chat-model'), /403/);
- });
-
- it('chat propagates rate limit errors (429)', async () => {
- spawnPlan.push({ type: 'close', code: 1, stderr: 'HTTP 429 Too Many Requests' });
-
- const client = new mod.LiteLLMClient({ timeout: 10 });
- await assert.rejects(() => client.chat('hi', 'chat-model'), /429/);
- });
-
- it('chat propagates server errors (500)', async () => {
- spawnPlan.push({ type: 'close', code: 1, stderr: 'HTTP 500 Internal Server Error' });
-
- const client = new mod.LiteLLMClient({ timeout: 10 });
- await assert.rejects(() => client.chat('hi', 'chat-model'), /500/);
- });
-
- it('chat propagates server errors (503)', async () => {
- spawnPlan.push({ type: 'close', code: 1, stderr: 'HTTP 503 Service Unavailable' });
-
- const client = new mod.LiteLLMClient({ timeout: 10 });
- await assert.rejects(() => client.chat('hi', 'chat-model'), /503/);
- });
-
- it('chat falls back to exit code when stderr is empty', async () => {
- spawnPlan.push({ type: 'close', code: 2, stdout: '' });
-
- const client = new mod.LiteLLMClient({ timeout: 10 });
- await assert.rejects(() => client.chat('hi', 'chat-model'), /Process exited with code 2/);
- });
-
- it('chat surfaces spawn failures with descriptive message', async () => {
- spawnPlan.push({ type: 'error', error: new Error('spawn ENOENT') });
-
- const client = new mod.LiteLLMClient({ timeout: 10 });
- await assert.rejects(() => client.chat('hi', 'chat-model'), /Failed to spawn Python process: spawn ENOENT/);
- });
-
- it('chat enforces timeout and terminates process', async () => {
- const originalSetTimeout = global.setTimeout;
- let observedDelay: number | null = null;
-
- (global as any).setTimeout = ((fn: any, delay: number, ...args: any[]) => {
- observedDelay = delay;
- return originalSetTimeout(fn, 0, ...args);
- }) as any;
-
- try {
- spawnPlan.push({ type: 'hang' });
-
- const client = new mod.LiteLLMClient({ timeout: 11 });
- await assert.rejects(() => client.chat('hi', 'chat-model'), /Command timed out after 22ms/);
-
- assert.equal(observedDelay, 22);
- assert.equal(spawnCalls.length, 1);
- assert.ok(spawnCalls[0].proc.killCalls.includes('SIGTERM'));
- } finally {
- (global as any).setTimeout = originalSetTimeout;
- }
- });
-
- it('chatMessages rejects empty inputs', async () => {
- const client = new mod.LiteLLMClient({ timeout: 10 });
- await assert.rejects(() => client.chatMessages([]), /messages array cannot be empty/);
- await assert.rejects(() => client.chatMessages(null as any), /messages array cannot be empty/);
- assert.equal(spawnCalls.length, 0);
- });
-
- it('chatMessages uses the last message content', async () => {
- spawnPlan.push({ type: 'close', code: 0, stdout: 'OK' });
-
- const client = new mod.LiteLLMClient({ timeout: 10 });
- const res = await client.chatMessages(
- [
- { role: 'user', content: 'first' },
- { role: 'user', content: 'last' },
- ],
- 'chat-model',
- );
-
- assert.equal(res.content, 'OK');
- assert.equal(res.model, 'chat-model');
- assert.equal(spawnCalls.length, 1);
- assert.equal(spawnCalls[0].args.at(-1), 'last');
- });
-
- it('getLiteLLMClient returns a singleton instance', () => {
- const c1 = mod.getLiteLLMClient();
- const c2 = mod.getLiteLLMClient();
- assert.equal(c1, c2);
- });
-
- it('checkLiteLLMAvailable returns false when version check fails', async () => {
- spawnPlan.push({ type: 'close', code: 1, stderr: 'ccw_litellm not installed' });
-
- const available = await mod.checkLiteLLMAvailable();
- assert.equal(available, false);
- });
-
- it('getLiteLLMStatus includes error message when unavailable', async () => {
- spawnPlan.push({ type: 'close', code: 1, stderr: 'ccw_litellm not installed' });
-
- const status = await mod.getLiteLLMStatus();
- assert.equal(status.available, false);
- assert.ok(String(status.error).includes('ccw_litellm not installed'));
- });
-});
-
-describe('getCodexLensVenvPython (Issue #68 fix)', () => {
- it('should be exported from the module', async () => {
- assert.ok(typeof mod.getCodexLensVenvPython === 'function');
- });
-
- it('should return a string path', async () => {
- const pythonPath = mod.getCodexLensVenvPython();
- assert.equal(typeof pythonPath, 'string');
- assert.ok(pythonPath.length > 0);
- });
-
- it('should return correct path structure for CodexLens venv', async () => {
- const pythonPath = mod.getCodexLensVenvPython();
-
- // On Windows: should contain Scripts/python.exe
- // On Unix: should contain bin/python
- const isWindows = process.platform === 'win32';
-
- if (isWindows) {
- // Either it's the venv path with Scripts, or fallback to 'python'
- const isVenvPath = pythonPath.includes('Scripts') && pythonPath.includes('python');
- const isFallback = pythonPath === 'python';
- assert.ok(isVenvPath || isFallback, `Expected venv path or 'python' fallback, got: ${pythonPath}`);
- } else {
- // On Unix: either venv path with bin/python, or fallback
- const isVenvPath = pythonPath.includes('bin') && pythonPath.includes('python');
- const isFallback = pythonPath === 'python';
- assert.ok(isVenvPath || isFallback, `Expected venv path or 'python' fallback, got: ${pythonPath}`);
- }
- });
-
- it('should include .codexlens/venv in path when venv exists', async () => {
- const pythonPath = mod.getCodexLensVenvPython();
-
- // If not falling back to 'python', should contain .codexlens/venv
- if (pythonPath !== 'python') {
- assert.ok(pythonPath.includes('.codexlens'), `Expected .codexlens in path, got: ${pythonPath}`);
- assert.ok(pythonPath.includes('venv'), `Expected venv in path, got: ${pythonPath}`);
- }
- });
-});
diff --git a/ccw/tests/mcp-server.test.js b/ccw/tests/mcp-server.test.js
index 6d603b2a..86582170 100644
--- a/ccw/tests/mcp-server.test.js
+++ b/ccw/tests/mcp-server.test.js
@@ -97,7 +97,7 @@ describe('MCP Server', () => {
const toolNames = response.result.tools.map(t => t.name);
assert(toolNames.includes('edit_file'));
assert(toolNames.includes('write_file'));
- assert(toolNames.includes('smart_search'));
+ // smart_search removed - use codexlens MCP server instead
});
it('should respond to tools/call request', async () => {
diff --git a/ccw/tests/smart-search-enrich.test.js b/ccw/tests/smart-search-enrich.test.js
deleted file mode 100644
index 2950c17d..00000000
--- a/ccw/tests/smart-search-enrich.test.js
+++ /dev/null
@@ -1,256 +0,0 @@
-/**
- * Tests for smart_search with enrich parameter
- *
- * Tests the following:
- * - enrich parameter is passed to codex-lens
- * - relationship data is parsed from response
- * - SemanticMatch interface with relationships field
- */
-
-import { describe, it, before, mock } from 'node:test';
-import assert from 'node:assert';
-import { dirname, join } from 'path';
-import { fileURLToPath } from 'url';
-
-const __filename = fileURLToPath(import.meta.url);
-const __dirname = dirname(__filename);
-
-// Import the smart-search module (exports schema, not smartSearchTool)
-const smartSearchPath = new URL('../dist/tools/smart-search.js', import.meta.url).href;
-
-describe('Smart Search Enrich Parameter', async () => {
- let smartSearchModule;
-
- before(async () => {
- try {
- smartSearchModule = await import(smartSearchPath);
- } catch (err) {
- console.log('Note: smart-search module import skipped:', err.message);
- }
- });
-
- describe('Parameter Schema', () => {
- it('should have enrich parameter in schema', async () => {
- if (!smartSearchModule) {
- console.log('Skipping: smart-search module not available');
- return;
- }
-
- const { schema } = smartSearchModule;
- assert.ok(schema, 'Should export schema');
- // Schema uses inputSchema (MCP standard), not parameters
- const params = schema.inputSchema || schema.parameters;
- assert.ok(params, 'Should have inputSchema or parameters');
-
- const props = params.properties;
- assert.ok(props.enrich, 'Should have enrich parameter');
- assert.strictEqual(props.enrich.type, 'boolean', 'enrich should be boolean');
- assert.strictEqual(props.enrich.default, false, 'enrich should default to false');
- });
-
- it('should describe enrich parameter purpose', async () => {
- if (!smartSearchModule) {
- console.log('Skipping: smart-search module not available');
- return;
- }
-
- const { schema } = smartSearchModule;
- const params = schema.inputSchema || schema.parameters;
- const enrichDesc = params.properties.enrich?.description || '';
-
- // Description should mention relationships or graph
- const mentionsRelationships = enrichDesc.toLowerCase().includes('relationship') ||
- enrichDesc.toLowerCase().includes('graph') ||
- enrichDesc.toLowerCase().includes('enrich');
- assert.ok(mentionsRelationships, 'enrich description should mention relationships/graph');
- });
- });
-
- describe('SemanticMatch Interface', () => {
- it('should handle results with relationships field', async () => {
- if (!smartSearchModule) {
- console.log('Skipping: smart-search module not available');
- return;
- }
-
- // Create a mock result with relationships
- const mockResult = {
- file: 'test.py',
- score: 0.95,
- content: 'def main(): pass',
- symbol: 'main',
- relationships: [
- {
- type: 'calls',
- direction: 'outgoing',
- target: 'helper',
- file: 'test.py',
- line: 5
- },
- {
- type: 'called_by',
- direction: 'incoming',
- source: 'entrypoint',
- file: 'app.py',
- line: 10
- }
- ]
- };
-
- // Verify structure
- assert.ok(Array.isArray(mockResult.relationships), 'relationships should be array');
- assert.strictEqual(mockResult.relationships.length, 2, 'should have 2 relationships');
-
- const outgoing = mockResult.relationships[0];
- assert.strictEqual(outgoing.type, 'calls');
- assert.strictEqual(outgoing.direction, 'outgoing');
- assert.ok(outgoing.target, 'outgoing should have target');
-
- const incoming = mockResult.relationships[1];
- assert.strictEqual(incoming.type, 'called_by');
- assert.strictEqual(incoming.direction, 'incoming');
- assert.ok(incoming.source, 'incoming should have source');
- });
- });
-
- describe('RelationshipInfo Structure', () => {
- it('should validate relationship info structure', () => {
- // Test the expected structure of RelationshipInfo
- const validRelationship = {
- type: 'calls',
- direction: 'outgoing',
- target: 'some_function',
- file: 'module.py',
- line: 42
- };
-
- assert.ok(['calls', 'imports', 'extends', 'called_by', 'imported_by', 'extended_by']
- .includes(validRelationship.type), 'type should be valid relationship type');
- assert.ok(['outgoing', 'incoming'].includes(validRelationship.direction),
- 'direction should be outgoing or incoming');
- assert.ok(typeof validRelationship.file === 'string', 'file should be string');
- });
-
- it('should allow optional line number', () => {
- const withLine = {
- type: 'calls',
- direction: 'outgoing',
- target: 'func',
- file: 'test.py',
- line: 10
- };
-
- const withoutLine = {
- type: 'imports',
- direction: 'outgoing',
- target: 'os',
- file: 'test.py'
- // line is optional
- };
-
- assert.strictEqual(withLine.line, 10);
- assert.strictEqual(withoutLine.line, undefined);
- });
- });
-});
-
-describe('Smart Search Tool Definition', async () => {
- let smartSearchModule;
-
- before(async () => {
- try {
- smartSearchModule = await import(smartSearchPath);
- } catch (err) {
- console.log('Note: smart-search module not available');
- }
- });
-
- it('should have correct tool name', () => {
- if (!smartSearchModule) {
- console.log('Skipping: smart-search module not available');
- return;
- }
-
- assert.strictEqual(smartSearchModule.schema.name, 'smart_search');
- });
-
- it('should have all required parameters', () => {
- if (!smartSearchModule) {
- console.log('Skipping: smart-search module not available');
- return;
- }
-
- const params = smartSearchModule.schema.inputSchema || smartSearchModule.schema.parameters;
- const props = params.properties;
-
- // Core parameters
- assert.ok(props.action, 'Should have action parameter');
- assert.ok(props.query, 'Should have query parameter');
- assert.ok(props.path, 'Should have path parameter');
-
- // Search parameters
- assert.ok(props.mode, 'Should have mode parameter');
- assert.ok(props.maxResults || props.limit, 'Should have maxResults/limit parameter');
-
- // New enrich parameter
- assert.ok(props.enrich, 'Should have enrich parameter');
- });
-
- it('should support search modes', () => {
- if (!smartSearchModule) {
- console.log('Skipping: smart-search module not available');
- return;
- }
-
- const params = smartSearchModule.schema.inputSchema || smartSearchModule.schema.parameters;
- const modeEnum = params.properties.mode?.enum;
-
- assert.ok(modeEnum, 'Should have mode enum');
- assert.ok(modeEnum.includes('fuzzy'), 'Should support fuzzy mode');
- assert.ok(modeEnum.includes('semantic'), 'Should support semantic mode');
- });
-});
-
-describe('Enrich Flag Integration', async () => {
- let codexLensModule;
-
- before(async () => {
- try {
- const codexLensPath = new URL('../dist/tools/codex-lens.js', import.meta.url).href;
- codexLensModule = await import(codexLensPath);
- } catch (err) {
- console.log('Note: codex-lens module not available');
- }
- });
-
- it('codex-lens should support enrich parameter', () => {
- if (!codexLensModule) {
- console.log('Skipping: codex-lens module not available');
- return;
- }
-
- // Use schema export (primary) or codexLensTool (backward-compatible)
- const toolDef = codexLensModule.schema || codexLensModule.codexLensTool;
- assert.ok(toolDef, 'Should have schema or codexLensTool export');
-
- // Schema uses inputSchema (MCP standard), codexLensTool uses parameters
- const params = toolDef.inputSchema || toolDef.parameters;
- const props = params.properties;
- assert.ok(props.enrich, 'should have enrich parameter');
- assert.strictEqual(props.enrich.type, 'boolean', 'enrich should be boolean');
- });
-
- it('should pass enrich flag to command line', async () => {
- if (!codexLensModule) {
- console.log('Skipping: codex-lens module not available');
- return;
- }
-
- // Check if executeCodexLens function is exported
- const { executeCodexLens } = codexLensModule;
- if (executeCodexLens) {
- // The function should be available for passing enrich parameter
- assert.ok(typeof executeCodexLens === 'function', 'executeCodexLens should be a function');
- }
- });
-});
diff --git a/ccw/tests/smart-search-intent.test.js b/ccw/tests/smart-search-intent.test.js
deleted file mode 100644
index 0c51f6bf..00000000
--- a/ccw/tests/smart-search-intent.test.js
+++ /dev/null
@@ -1,141 +0,0 @@
-/**
- * Tests for query intent detection + adaptive RRF weights (TypeScript/Python parity).
- *
- * References:
- * - `ccw/src/tools/smart-search.ts` (detectQueryIntent, adjustWeightsByIntent, getRRFWeights)
- * - `codex-lens/src/codexlens/search/hybrid_search.py` (weight intent concept + defaults)
- */
-
-import { describe, it, before } from 'node:test';
-import assert from 'node:assert';
-
-const smartSearchPath = new URL('../dist/tools/smart-search.js', import.meta.url).href;
-
-describe('Smart Search - Query Intent + RRF Weights', async () => {
- /** @type {any} */
- let smartSearchModule;
-
- before(async () => {
- try {
- smartSearchModule = await import(smartSearchPath);
- } catch (err) {
- // Keep tests non-blocking for environments that haven't built `ccw/dist` yet.
- console.log('Note: smart-search module import skipped:', err.message);
- }
- });
-
- describe('detectQueryIntent', () => {
- it('classifies "def authenticate" as keyword', () => {
- if (!smartSearchModule) return;
- assert.strictEqual(smartSearchModule.detectQueryIntent('def authenticate'), 'keyword');
- });
-
- it('classifies CamelCase identifiers as keyword', () => {
- if (!smartSearchModule) return;
- assert.strictEqual(smartSearchModule.detectQueryIntent('MyClass'), 'keyword');
- });
-
- it('classifies snake_case identifiers as keyword', () => {
- if (!smartSearchModule) return;
- assert.strictEqual(smartSearchModule.detectQueryIntent('user_id'), 'keyword');
- });
-
- it('classifies namespace separators "::" as keyword', () => {
- if (!smartSearchModule) return;
- assert.strictEqual(smartSearchModule.detectQueryIntent('UserService::authenticate'), 'keyword');
- });
-
- it('classifies pointer arrows "->" as keyword', () => {
- if (!smartSearchModule) return;
- assert.strictEqual(smartSearchModule.detectQueryIntent('ptr->next'), 'keyword');
- });
-
- it('classifies dotted member access as keyword', () => {
- if (!smartSearchModule) return;
- assert.strictEqual(smartSearchModule.detectQueryIntent('foo.bar'), 'keyword');
- });
-
- it('classifies natural language questions as semantic', () => {
- if (!smartSearchModule) return;
- assert.strictEqual(smartSearchModule.detectQueryIntent('how to handle user login'), 'semantic');
- });
-
- it('classifies interrogatives with question marks as semantic', () => {
- if (!smartSearchModule) return;
- assert.strictEqual(smartSearchModule.detectQueryIntent('what is authentication?'), 'semantic');
- });
-
- it('classifies queries with both code + NL signals as mixed', () => {
- if (!smartSearchModule) return;
- assert.strictEqual(smartSearchModule.detectQueryIntent('why does FooBar crash?'), 'mixed');
- });
-
- it('classifies long NL queries containing identifiers as mixed', () => {
- if (!smartSearchModule) return;
- assert.strictEqual(smartSearchModule.detectQueryIntent('how to use user_id in query'), 'mixed');
- });
- });
-
- describe('classifyIntent lexical routing', () => {
- it('routes config/backend queries to exact when index and embeddings are available', () => {
- if (!smartSearchModule) return;
- const classification = smartSearchModule.__testables.classifyIntent(
- 'embedding backend fastembed local litellm api config',
- true,
- true,
- );
- assert.strictEqual(classification.mode, 'exact');
- assert.match(classification.reasoning, /lexical priority/i);
- });
-
- it('routes generated artifact queries to exact when index and embeddings are available', () => {
- if (!smartSearchModule) return;
- const classification = smartSearchModule.__testables.classifyIntent('dist bundle output', true, true);
- assert.strictEqual(classification.mode, 'exact');
- assert.match(classification.reasoning, /generated artifact/i);
- });
- });
-
- describe('adjustWeightsByIntent', () => {
- it('maps keyword intent to exact-heavy weights', () => {
- if (!smartSearchModule) return;
- const weights = smartSearchModule.adjustWeightsByIntent('keyword', { exact: 0.3, fuzzy: 0.1, vector: 0.6 });
- assert.deepStrictEqual(weights, { exact: 0.5, fuzzy: 0.1, vector: 0.4 });
- });
- });
-
- describe('getRRFWeights parity set', () => {
- it('produces stable weights for 20 representative queries', () => {
- if (!smartSearchModule) return;
-
- const base = { exact: 0.3, fuzzy: 0.1, vector: 0.6 };
- const expected = [
- ['def authenticate', { exact: 0.5, fuzzy: 0.1, vector: 0.4 }],
- ['class UserService', { exact: 0.5, fuzzy: 0.1, vector: 0.4 }],
- ['user_id', { exact: 0.5, fuzzy: 0.1, vector: 0.4 }],
- ['MyClass', { exact: 0.5, fuzzy: 0.1, vector: 0.4 }],
- ['Foo::Bar', { exact: 0.5, fuzzy: 0.1, vector: 0.4 }],
- ['ptr->next', { exact: 0.5, fuzzy: 0.1, vector: 0.4 }],
- ['foo.bar', { exact: 0.5, fuzzy: 0.1, vector: 0.4 }],
- ['import os', { exact: 0.5, fuzzy: 0.1, vector: 0.4 }],
- ['how to handle user login', { exact: 0.2, fuzzy: 0.1, vector: 0.7 }],
- ['what is the best way to search?', { exact: 0.2, fuzzy: 0.1, vector: 0.7 }],
- ['explain the authentication flow', { exact: 0.2, fuzzy: 0.1, vector: 0.7 }],
- ['generate embeddings for this repo', { exact: 0.2, fuzzy: 0.1, vector: 0.7 }],
- ['how does FooBar work', base],
- ['user_id how to handle', base],
- ['Find UserService::authenticate method', base],
- ['where is foo.bar used', base],
- ['parse_json function', { exact: 0.5, fuzzy: 0.1, vector: 0.4 }],
- ['How to parse_json output?', base],
- ['', base],
- ['authentication', base],
- ];
-
- for (const [query, expectedWeights] of expected) {
- const actual = smartSearchModule.getRRFWeights(query, base);
- assert.deepStrictEqual(actual, expectedWeights, `unexpected weights for query: ${JSON.stringify(query)}`);
- }
- });
- });
-});
diff --git a/ccw/tests/smart-search-mcp-usage.test.js b/ccw/tests/smart-search-mcp-usage.test.js
deleted file mode 100644
index 889af31d..00000000
--- a/ccw/tests/smart-search-mcp-usage.test.js
+++ /dev/null
@@ -1,703 +0,0 @@
-import { after, afterEach, before, describe, it } from 'node:test';
-import assert from 'node:assert/strict';
-import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs';
-import { tmpdir } from 'node:os';
-import { join } from 'node:path';
-
-const smartSearchPath = new URL('../dist/tools/smart-search.js', import.meta.url).href;
-const originalAutoInitMissing = process.env.CODEXLENS_AUTO_INIT_MISSING;
-const originalAutoEmbedMissing = process.env.CODEXLENS_AUTO_EMBED_MISSING;
-
-describe('Smart Search MCP usage defaults and path handling', async () => {
- let smartSearchModule;
- const tempDirs = [];
-
- before(async () => {
- process.env.CODEXLENS_AUTO_INIT_MISSING = 'false';
- try {
- smartSearchModule = await import(smartSearchPath);
- } catch (err) {
- console.log('Note: smart-search module import skipped:', err?.message ?? String(err));
- }
- });
-
- after(() => {
- if (originalAutoInitMissing === undefined) {
- delete process.env.CODEXLENS_AUTO_INIT_MISSING;
- } else {
- process.env.CODEXLENS_AUTO_INIT_MISSING = originalAutoInitMissing;
- }
-
- if (originalAutoEmbedMissing === undefined) {
- delete process.env.CODEXLENS_AUTO_EMBED_MISSING;
- return;
- }
- process.env.CODEXLENS_AUTO_EMBED_MISSING = originalAutoEmbedMissing;
- });
-
- afterEach(() => {
- while (tempDirs.length > 0) {
- rmSync(tempDirs.pop(), { recursive: true, force: true });
- }
- if (smartSearchModule?.__testables) {
- smartSearchModule.__testables.__resetRuntimeOverrides();
- smartSearchModule.__testables.__resetBackgroundJobs();
- }
- process.env.CODEXLENS_AUTO_INIT_MISSING = 'false';
- delete process.env.CODEXLENS_AUTO_EMBED_MISSING;
- });
-
- function createWorkspace() {
- const dir = mkdtempSync(join(tmpdir(), 'ccw-smart-search-'));
- tempDirs.push(dir);
- return dir;
- }
-
- function createDetachedChild() {
- return {
- on() {
- return this;
- },
- unref() {},
- };
- }
-
- it('keeps schema defaults aligned with runtime docs', () => {
- if (!smartSearchModule) return;
-
- const { schema } = smartSearchModule;
- const props = schema.inputSchema.properties;
-
- assert.equal(props.maxResults.default, 5);
- assert.equal(props.limit.default, 5);
- assert.match(schema.description, /static FTS index/i);
- assert.match(schema.description, /semantic\/vector embeddings/i);
- assert.ok(props.action.enum.includes('embed'));
- assert.match(props.embeddingBackend.description, /litellm\/api/i);
- assert.match(props.apiMaxWorkers.description, /endpoint pool/i);
- assert.match(schema.description, /apiMaxWorkers=8/i);
- assert.match(props.path.description, /single file path/i);
- assert.ok(props.output_mode.enum.includes('ace'));
- assert.match(props.output_mode.description, /ACE-style/i);
- assert.equal(props.output_mode.default, 'ace');
- });
-
- it('defaults auto embedding warmup off on Windows unless explicitly enabled', () => {
- if (!smartSearchModule) return;
-
- const { __testables } = smartSearchModule;
- delete process.env.CODEXLENS_AUTO_EMBED_MISSING;
- assert.equal(__testables.isAutoEmbedMissingEnabled(undefined), process.platform !== 'win32');
- assert.equal(__testables.isAutoEmbedMissingEnabled({}), process.platform !== 'win32');
- assert.equal(
- __testables.isAutoEmbedMissingEnabled({ embedding_auto_embed_missing: true }),
- process.platform === 'win32' ? false : true,
- );
- assert.equal(__testables.isAutoEmbedMissingEnabled({ embedding_auto_embed_missing: false }), false);
- process.env.CODEXLENS_AUTO_EMBED_MISSING = 'true';
- assert.equal(__testables.isAutoEmbedMissingEnabled({ embedding_auto_embed_missing: false }), true);
- process.env.CODEXLENS_AUTO_EMBED_MISSING = 'off';
- assert.equal(__testables.isAutoEmbedMissingEnabled({ embedding_auto_embed_missing: true }), false);
- });
-
- it('defaults auto index warmup off on Windows unless explicitly enabled', () => {
- if (!smartSearchModule) return;
-
- const { __testables } = smartSearchModule;
- delete process.env.CODEXLENS_AUTO_INIT_MISSING;
- assert.equal(__testables.isAutoInitMissingEnabled(), process.platform !== 'win32');
- process.env.CODEXLENS_AUTO_INIT_MISSING = 'off';
- assert.equal(__testables.isAutoInitMissingEnabled(), false);
- process.env.CODEXLENS_AUTO_INIT_MISSING = '1';
- assert.equal(__testables.isAutoInitMissingEnabled(), true);
- });
-
- it('explains when Windows disables background warmup by default', () => {
- if (!smartSearchModule) return;
-
- const { __testables } = smartSearchModule;
- delete process.env.CODEXLENS_AUTO_INIT_MISSING;
- delete process.env.CODEXLENS_AUTO_EMBED_MISSING;
-
- const initReason = __testables.getAutoInitMissingDisabledReason();
- const embedReason = __testables.getAutoEmbedMissingDisabledReason({});
-
- if (process.platform === 'win32') {
- assert.match(initReason, /disabled by default on Windows/i);
- assert.match(embedReason, /disabled by default on Windows/i);
- assert.match(embedReason, /auto_embed_missing=true/i);
- } else {
- assert.match(initReason, /disabled/i);
- assert.match(embedReason, /disabled/i);
- }
- });
-
- it('builds hidden subprocess options for Smart Search child processes', () => {
- if (!smartSearchModule) return;
-
- const options = smartSearchModule.__testables.buildSmartSearchSpawnOptions(tmpdir(), {
- detached: true,
- stdio: 'ignore',
- timeout: 12345,
- });
-
- assert.equal(options.cwd, tmpdir());
- assert.equal(options.shell, false);
- assert.equal(options.windowsHide, true);
- assert.equal(options.detached, true);
- assert.equal(options.timeout, 12345);
- assert.equal(options.env.PYTHONIOENCODING, 'utf-8');
- });
-
- it('avoids detached background warmup children on Windows consoles', () => {
- if (!smartSearchModule) return;
-
- assert.equal(
- smartSearchModule.__testables.shouldDetachBackgroundSmartSearchProcess(),
- process.platform !== 'win32',
- );
- });
-
- it('checks tool availability without shell-based lookup popups', () => {
- if (!smartSearchModule) return;
-
- const lookupCalls = [];
- const available = smartSearchModule.__testables.checkToolAvailability(
- 'rg',
- (command, args, options) => {
- lookupCalls.push({ command, args, options });
- return { status: 0, stdout: '', stderr: '' };
- },
- );
-
- assert.equal(available, true);
- assert.equal(lookupCalls.length, 1);
- assert.equal(lookupCalls[0].command, process.platform === 'win32' ? 'where' : 'which');
- assert.deepEqual(lookupCalls[0].args, ['rg']);
- assert.equal(lookupCalls[0].options.shell, false);
- assert.equal(lookupCalls[0].options.windowsHide, true);
- assert.equal(lookupCalls[0].options.stdio, 'ignore');
- assert.equal(lookupCalls[0].options.env.PYTHONIOENCODING, 'utf-8');
- });
-
- it('starts background static index build once for unindexed paths', async () => {
- if (!smartSearchModule) return;
-
- const { __testables } = smartSearchModule;
- const dir = createWorkspace();
- const fakePython = join(dir, 'python.exe');
- writeFileSync(fakePython, '');
- process.env.CODEXLENS_AUTO_INIT_MISSING = 'true';
-
- const spawnCalls = [];
- __testables.__setRuntimeOverrides({
- getVenvPythonPath: () => fakePython,
- now: () => 1234567890,
- spawnProcess: (command, args, options) => {
- spawnCalls.push({ command, args, options });
- return createDetachedChild();
- },
- });
-
- const scope = { workingDirectory: dir, searchPaths: ['.'] };
- const indexStatus = { indexed: false, has_embeddings: false };
-
- const first = await __testables.maybeStartBackgroundAutoInit(scope, indexStatus);
- const second = await __testables.maybeStartBackgroundAutoInit(scope, indexStatus);
-
- assert.match(first.note, /started/i);
- assert.match(second.note, /already running/i);
- assert.equal(spawnCalls.length, 1);
- assert.equal(spawnCalls[0].command, fakePython);
- assert.deepEqual(spawnCalls[0].args, ['-m', 'codexlens', 'index', 'init', dir, '--no-embeddings']);
- assert.equal(spawnCalls[0].options.cwd, dir);
- assert.equal(
- spawnCalls[0].options.detached,
- smartSearchModule.__testables.shouldDetachBackgroundSmartSearchProcess(),
- );
- assert.equal(spawnCalls[0].options.windowsHide, true);
- });
-
- it('starts background embedding build without detached Windows consoles', async () => {
- if (!smartSearchModule) return;
-
- const { __testables } = smartSearchModule;
- const dir = createWorkspace();
- const fakePython = join(dir, 'python.exe');
- writeFileSync(fakePython, '');
- process.env.CODEXLENS_AUTO_EMBED_MISSING = 'true';
-
- const spawnCalls = [];
- __testables.__setRuntimeOverrides({
- getVenvPythonPath: () => fakePython,
- checkSemanticStatus: async () => ({ available: true, litellmAvailable: true }),
- now: () => 1234567890,
- spawnProcess: (command, args, options) => {
- spawnCalls.push({ command, args, options });
- return createDetachedChild();
- },
- });
-
- const status = await __testables.maybeStartBackgroundAutoEmbed(
- { workingDirectory: dir, searchPaths: ['.'] },
- {
- indexed: true,
- has_embeddings: false,
- config: { embedding_backend: 'fastembed' },
- },
- );
-
- assert.match(status.note, /started/i);
- assert.equal(spawnCalls.length, 1);
- assert.equal(spawnCalls[0].command, fakePython);
- assert.deepEqual(spawnCalls[0].args.slice(0, 1), ['-c']);
- assert.equal(spawnCalls[0].options.cwd, dir);
- assert.equal(
- spawnCalls[0].options.detached,
- smartSearchModule.__testables.shouldDetachBackgroundSmartSearchProcess(),
- );
- assert.equal(spawnCalls[0].options.windowsHide, true);
- assert.equal(spawnCalls[0].options.stdio, 'ignore');
- });
-
- it('surfaces warnings when background static index warmup cannot start', async () => {
- if (!smartSearchModule) return;
-
- const { __testables } = smartSearchModule;
- const dir = createWorkspace();
- process.env.CODEXLENS_AUTO_INIT_MISSING = 'true';
-
- __testables.__setRuntimeOverrides({
- getVenvPythonPath: () => join(dir, 'missing-python.exe'),
- });
-
- const status = await __testables.maybeStartBackgroundAutoInit(
- { workingDirectory: dir, searchPaths: ['.'] },
- { indexed: false, has_embeddings: false },
- );
-
- assert.match(status.warning, /Automatic static index warmup could not start/i);
- assert.match(status.warning, /not ready yet/i);
- });
-
- it('honors explicit small limit values', async () => {
- if (!smartSearchModule) return;
-
- const dir = createWorkspace();
- const file = join(dir, 'many.ts');
- writeFileSync(file, ['const hit = 1;', 'const hit = 2;', 'const hit = 3;'].join('\n'));
-
- const toolResult = await smartSearchModule.handler({
- action: 'search',
- query: 'hit',
- path: dir,
- output_mode: 'full',
- limit: 1,
- regex: false,
- tokenize: false,
- });
-
- assert.equal(toolResult.success, true, toolResult.error);
- assert.equal(toolResult.result.success, true);
- assert.equal(toolResult.result.results.length, 1);
- assert.equal(toolResult.result.metadata.pagination.limit, 1);
- });
-
- it('scopes search results to a single file path', async () => {
- if (!smartSearchModule) return;
-
- const dir = createWorkspace();
- const target = join(dir, 'target.ts');
- const other = join(dir, 'other.ts');
- writeFileSync(target, 'const TARGET_TOKEN = 1;\n');
- writeFileSync(other, 'const TARGET_TOKEN = 2;\n');
-
- const toolResult = await smartSearchModule.handler({
- action: 'search',
- query: 'TARGET_TOKEN',
- path: target,
- output_mode: 'full',
- regex: false,
- tokenize: false,
- });
-
- assert.equal(toolResult.success, true, toolResult.error);
- assert.equal(toolResult.result.success, true);
- assert.ok(Array.isArray(toolResult.result.results));
- assert.ok(toolResult.result.results.length >= 1);
-
- const normalizedFiles = toolResult.result.results.map((item) => String(item.file).replace(/\\/g, '/'));
- assert.ok(normalizedFiles.every((file) => file.endsWith('/target.ts') || file === 'target.ts'));
- assert.ok(normalizedFiles.every((file) => !file.endsWith('/other.ts')));
- });
-
- it('normalizes wrapped multiline query and file path inputs', async () => {
- if (!smartSearchModule) return;
-
- const dir = createWorkspace();
- const nestedDir = join(dir, 'hydro_generator_module', 'builders');
- mkdirSync(nestedDir, { recursive: true });
- const target = join(nestedDir, 'full_machine_builders.py');
- writeFileSync(target, 'def _resolve_rotor_inner():\n return rotor_main_seg\n');
-
- const wrappedPath = target.replace(/([\\/])builders([\\/])/, '$1\n builders$2');
- const wrappedQuery = '_resolve_rotor_inner OR\n rotor_main_seg';
-
- const toolResult = await smartSearchModule.handler({
- action: 'search',
- query: wrappedQuery,
- path: wrappedPath,
- output_mode: 'full',
- regex: false,
- caseSensitive: false,
- });
-
- assert.equal(toolResult.success, true, toolResult.error);
- assert.equal(toolResult.result.success, true);
- assert.ok(toolResult.result.results.length >= 1);
- });
-
- it('falls back to literal ripgrep matching for invalid regex-like code queries', async () => {
- if (!smartSearchModule) return;
-
- const dir = createWorkspace();
- const target = join(dir, 'component.ts');
- writeFileSync(target, 'defineExpose({ handleResize });\n');
-
- const toolResult = await smartSearchModule.handler({
- action: 'search',
- query: 'defineExpose({ handleResize',
- path: dir,
- output_mode: 'full',
- limit: 5,
- });
-
- assert.equal(toolResult.success, true, toolResult.error);
- assert.equal(toolResult.result.success, true);
- assert.ok(toolResult.result.results.length >= 1);
- assert.match(toolResult.result.metadata.warning, /literal ripgrep matching/i);
- });
-
- it('renders grouped ace-style output by default with multi-line chunks', async () => {
- if (!smartSearchModule) return;
-
- const dir = createWorkspace();
- const target = join(dir, 'ace-target.ts');
- writeFileSync(target, [
- 'const before = 1;',
- 'const TARGET_TOKEN = 1;',
- 'const after = 2;',
- '',
- 'function useToken() {',
- ' return TARGET_TOKEN;',
- '}',
- ].join('\n'));
-
- const toolResult = await smartSearchModule.handler({
- action: 'search',
- query: 'TARGET_TOKEN',
- path: dir,
- contextLines: 1,
- regex: false,
- tokenize: false,
- });
-
- assert.equal(toolResult.success, true, toolResult.error);
- assert.equal(toolResult.result.success, true);
- assert.equal(toolResult.result.results.format, 'ace');
- assert.equal(Array.isArray(toolResult.result.results.groups), true);
- assert.equal(Array.isArray(toolResult.result.results.sections), true);
- assert.equal(toolResult.result.results.groups.length, 1);
- assert.equal(toolResult.result.results.groups[0].sections.length, 2);
- assert.match(toolResult.result.results.text, /The following code sections were retrieved:/);
- assert.match(toolResult.result.results.text, /Path: .*ace-target\.ts/);
- assert.match(toolResult.result.results.text, /Chunk 1: lines 1-3/);
- assert.match(toolResult.result.results.text, />\s+2 \| const TARGET_TOKEN = 1;/);
- assert.match(toolResult.result.results.text, /Chunk 2: lines 5-7/);
- assert.equal(toolResult.result.metadata.pagination.total >= 1, true);
- });
-
- it('defaults embed selection to local-fast for bulk indexing', () => {
- if (!smartSearchModule) return;
-
- const selection = smartSearchModule.__testables.resolveEmbeddingSelection(undefined, undefined, {
- embedding_backend: 'litellm',
- embedding_model: 'qwen3-embedding-sf',
- });
-
- assert.equal(selection.backend, 'fastembed');
- assert.equal(selection.model, 'fast');
- assert.equal(selection.preset, 'bulk-local-fast');
- assert.match(selection.note, /local-fast/i);
- });
-
- it('keeps explicit api embedding selection when requested', () => {
- if (!smartSearchModule) return;
-
- const selection = smartSearchModule.__testables.resolveEmbeddingSelection('api', 'qwen3-embedding-sf', {
- embedding_backend: 'fastembed',
- embedding_model: 'fast',
- });
-
- assert.equal(selection.backend, 'litellm');
- assert.equal(selection.model, 'qwen3-embedding-sf');
- assert.equal(selection.preset, 'explicit');
- });
-
- it('parses warning-prefixed JSON and plain-text file lists for semantic fallback', () => {
- if (!smartSearchModule) return;
-
- const dir = createWorkspace();
- const target = join(dir, 'target.ts');
- writeFileSync(target, 'export const target = 1;\n');
-
- const parsed = smartSearchModule.__testables.parseCodexLensJsonOutput([
- 'RuntimeWarning: compatibility shim',
- JSON.stringify({ results: [{ file: 'target.ts', score: 0.25, excerpt: 'target' }] }),
- ].join('\n'));
- assert.equal(Array.isArray(parsed.results), true);
- assert.equal(parsed.results[0].file, 'target.ts');
-
- const matches = smartSearchModule.__testables.parsePlainTextFileMatches(target, {
- workingDirectory: dir,
- searchPaths: ['.'],
- });
- assert.equal(matches.length, 1);
- assert.match(String(matches[0].file).replace(/\\/g, '/'), /target\.ts$/);
- });
-
- it('uses root-scoped embedding status instead of subtree artifacts', () => {
- if (!smartSearchModule) return;
-
- const summary = smartSearchModule.__testables.extractEmbeddingsStatusSummary({
- total_indexes: 3,
- indexes_with_embeddings: 2,
- total_chunks: 24,
- coverage_percent: 66.7,
- root: {
- total_files: 4,
- files_with_embeddings: 0,
- total_chunks: 0,
- coverage_percent: 0,
- has_embeddings: false,
- },
- subtree: {
- total_indexes: 3,
- indexes_with_embeddings: 2,
- total_files: 12,
- files_with_embeddings: 8,
- total_chunks: 24,
- coverage_percent: 66.7,
- },
- centralized: {
- dense_index_exists: true,
- binary_index_exists: true,
- meta_db_exists: true,
- usable: false,
- },
- });
-
- assert.equal(summary.coveragePercent, 0);
- assert.equal(summary.totalChunks, 0);
- assert.equal(summary.hasEmbeddings, false);
- });
-
- it('accepts validated root centralized readiness from CLI status payloads', () => {
- if (!smartSearchModule) return;
-
- const summary = smartSearchModule.__testables.extractEmbeddingsStatusSummary({
- total_indexes: 2,
- indexes_with_embeddings: 1,
- total_chunks: 10,
- coverage_percent: 25,
- root: {
- total_files: 2,
- files_with_embeddings: 1,
- total_chunks: 3,
- coverage_percent: 50,
- has_embeddings: true,
- },
- centralized: {
- usable: true,
- dense_ready: true,
- chunk_metadata_rows: 3,
- },
- });
-
- assert.equal(summary.coveragePercent, 50);
- assert.equal(summary.totalChunks, 3);
- assert.equal(summary.hasEmbeddings, true);
- });
-
- it('prefers embeddings_status over legacy embeddings summary payloads', () => {
- if (!smartSearchModule) return;
-
- const payload = smartSearchModule.__testables.selectEmbeddingsStatusPayload({
- embeddings: {
- total_indexes: 7,
- indexes_with_embeddings: 4,
- total_chunks: 99,
- },
- embeddings_status: {
- total_indexes: 7,
- total_chunks: 3,
- root: {
- total_files: 2,
- files_with_embeddings: 1,
- total_chunks: 3,
- coverage_percent: 50,
- has_embeddings: true,
- },
- centralized: {
- usable: true,
- dense_ready: true,
- chunk_metadata_rows: 3,
- },
- },
- });
-
- assert.equal(payload.root.total_chunks, 3);
- assert.equal(payload.centralized.usable, true);
- });
-
- it('recognizes CodexLens CLI compatibility failures and invalid regex fallback', () => {
- if (!smartSearchModule) return;
-
- const compatibilityError = [
- 'UsageError: Got unexpected extra arguments (20 0 fts)',
- 'TypeError: TyperArgument.make_metavar() takes 1 positional argument but 2 were given',
- ].join('\n');
-
- assert.equal(
- smartSearchModule.__testables.isCodexLensCliCompatibilityError(compatibilityError),
- true,
- );
-
- const resolution = smartSearchModule.__testables.resolveRipgrepQueryMode(
- 'defineExpose({ handleResize',
- true,
- true,
- );
-
- assert.equal(resolution.regex, false);
- assert.equal(resolution.literalFallback, true);
- assert.match(resolution.warning, /literal ripgrep matching/i);
- });
-
- it('suppresses compatibility-only fuzzy warnings when ripgrep already produced hits', () => {
- if (!smartSearchModule) return;
-
- assert.equal(
- smartSearchModule.__testables.shouldSurfaceCodexLensFtsCompatibilityWarning({
- compatibilityTriggeredThisQuery: true,
- skipExactDueToCompatibility: false,
- ripgrepResultCount: 2,
- }),
- false,
- );
-
- assert.equal(
- smartSearchModule.__testables.shouldSurfaceCodexLensFtsCompatibilityWarning({
- compatibilityTriggeredThisQuery: true,
- skipExactDueToCompatibility: false,
- ripgrepResultCount: 0,
- }),
- true,
- );
-
- assert.equal(
- smartSearchModule.__testables.shouldSurfaceCodexLensFtsCompatibilityWarning({
- compatibilityTriggeredThisQuery: false,
- skipExactDueToCompatibility: true,
- ripgrepResultCount: 0,
- }),
- true,
- );
- });
-
- it('builds actionable index suggestions for unhealthy index states', () => {
- if (!smartSearchModule) return;
-
- const suggestions = smartSearchModule.__testables.buildIndexSuggestions(
- {
- indexed: true,
- has_embeddings: false,
- embeddings_coverage_percent: 0,
- warning: 'Index exists but no embeddings generated. Run smart_search(action="embed") to build the vector index.',
- },
- {
- workingDirectory: 'D:/tmp/demo',
- searchPaths: ['.'],
- },
- );
-
- assert.equal(Array.isArray(suggestions), true);
- assert.match(suggestions[0].command, /smart_search\(action="embed"/);
- });
-
- it('surfaces backend failure details when fuzzy search fully fails', async () => {
- if (!smartSearchModule) return;
-
- const missingPath = join(createWorkspace(), 'missing-folder', 'missing.ts');
- const toolResult = await smartSearchModule.handler({
- action: 'search',
- query: 'TARGET_TOKEN',
- path: missingPath,
- output_mode: 'full',
- regex: false,
- tokenize: false,
- });
-
- assert.equal(toolResult.success, false);
- assert.match(toolResult.error, /Both search backends failed:/);
- assert.match(toolResult.error, /(FTS|Ripgrep)/);
- });
-
- it('returns structured semantic results after local init and embed without JSON parse warnings', async () => {
- if (!smartSearchModule) return;
-
- const codexLensModule = await import(new URL(`../dist/tools/codex-lens.js?smart-semantic=${Date.now()}`, import.meta.url).href);
- const ready = await codexLensModule.checkVenvStatus(true);
- if (!ready.ready) {
- console.log('Skipping: CodexLens not ready');
- return;
- }
-
- const semantic = await codexLensModule.checkSemanticStatus();
- if (!semantic.available) {
- console.log('Skipping: semantic dependencies not ready');
- return;
- }
-
- const dir = createWorkspace();
- writeFileSync(
- join(dir, 'sample.ts'),
- 'export function parseCodexLensOutput() { return stripAnsiOutput(); }\nexport const sum = (a, b) => a + b;\n',
- );
-
- const init = await smartSearchModule.handler({ action: 'init', path: dir });
- assert.equal(init.success, true, init.error ?? 'Expected init to succeed');
-
- const embed = await smartSearchModule.handler({
- action: 'embed',
- path: dir,
- embeddingBackend: 'local',
- force: true,
- });
- assert.equal(embed.success, true, embed.error ?? 'Expected local embed to succeed');
-
- const search = await smartSearchModule.handler({
- action: 'search',
- mode: 'semantic',
- path: dir,
- query: 'parse CodexLens output strip ANSI',
- limit: 5,
- });
-
- assert.equal(search.success, true, search.error ?? 'Expected semantic search to succeed');
- assert.equal(search.result.success, true);
- assert.equal(search.result.results.format, 'ace');
- assert.ok(search.result.results.total >= 1, 'Expected at least one structured semantic match');
- assert.doesNotMatch(search.result.metadata?.warning ?? '', /Failed to parse JSON output/i);
- });
-});
diff --git a/ccw/tests/smart-search.test.ts b/ccw/tests/smart-search.test.ts
deleted file mode 100644
index 959fe037..00000000
--- a/ccw/tests/smart-search.test.ts
+++ /dev/null
@@ -1,71 +0,0 @@
-/**
- * TypeScript parity tests for query intent detection + adaptive RRF weights.
- *
- * Notes:
- * - These tests target the runtime implementation shipped in `ccw/dist`.
- * - Keep logic aligned with Python: `codex-lens/src/codexlens/search/ranking.py`.
- */
-
-import { before, describe, it } from 'node:test';
-import assert from 'node:assert';
-
-const smartSearchPath = new URL('../dist/tools/smart-search.js', import.meta.url).href;
-
-describe('Smart Search (TS) - Query Intent + RRF Weights', async () => {
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
- let smartSearchModule: any;
-
- before(async () => {
- try {
- smartSearchModule = await import(smartSearchPath);
- } catch (err: any) {
- // Keep tests non-blocking for environments that haven't built `ccw/dist` yet.
- console.log('Note: smart-search module import skipped:', err?.message ?? String(err));
- }
- });
-
- describe('detectQueryIntent parity (10 cases)', () => {
- const cases: Array<[string, 'keyword' | 'semantic' | 'mixed']> = [
- ['def authenticate', 'keyword'],
- ['MyClass', 'keyword'],
- ['user_id', 'keyword'],
- ['UserService::authenticate', 'keyword'],
- ['ptr->next', 'keyword'],
- ['how to handle user login', 'semantic'],
- ['what is authentication?', 'semantic'],
- ['where is this used?', 'semantic'],
- ['why does FooBar crash?', 'mixed'],
- ['how to use user_id in query', 'mixed'],
- ];
-
- for (const [query, expected] of cases) {
- it(`classifies ${JSON.stringify(query)} as ${expected}`, () => {
- if (!smartSearchModule) return;
- assert.strictEqual(smartSearchModule.detectQueryIntent(query), expected);
- });
- }
- });
-
- describe('adaptive weights (Python parity thresholds)', () => {
- it('uses exact-heavy weights for code-like queries (exact > 0.4)', () => {
- if (!smartSearchModule) return;
- const weights = smartSearchModule.getRRFWeights('def authenticate', {
- exact: 0.3,
- fuzzy: 0.1,
- vector: 0.6,
- });
- assert.ok(weights.exact > 0.4);
- });
-
- it('uses vector-heavy weights for NL queries (vector > 0.6)', () => {
- if (!smartSearchModule) return;
- const weights = smartSearchModule.getRRFWeights('how to handle user login', {
- exact: 0.3,
- fuzzy: 0.1,
- vector: 0.6,
- });
- assert.ok(weights.vector > 0.6);
- });
- });
-});
-
diff --git a/ccw/tests/unified-vector-index.test.ts b/ccw/tests/unified-vector-index.test.ts
deleted file mode 100644
index f14acc02..00000000
--- a/ccw/tests/unified-vector-index.test.ts
+++ /dev/null
@@ -1,97 +0,0 @@
-import { after, beforeEach, describe, it } from 'node:test';
-import assert from 'node:assert/strict';
-import { EventEmitter } from 'node:events';
-import { createRequire } from 'node:module';
-import { mkdtempSync, rmSync } from 'node:fs';
-import { tmpdir } from 'node:os';
-import { join } from 'node:path';
-
-const require = createRequire(import.meta.url);
-// eslint-disable-next-line @typescript-eslint/no-var-requires
-const fs = require('node:fs') as typeof import('node:fs');
-// eslint-disable-next-line @typescript-eslint/no-var-requires
-const childProcess = require('node:child_process') as typeof import('node:child_process');
-
-class FakeChildProcess extends EventEmitter {
- stdout = new EventEmitter();
- stderr = new EventEmitter();
- stdinChunks: string[] = [];
- stdin = {
- write: (chunk: string | Buffer) => {
- this.stdinChunks.push(String(chunk));
- return true;
- },
- end: () => undefined,
- };
-}
-
-type SpawnCall = {
- command: string;
- args: string[];
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
- options: any;
- child: FakeChildProcess;
-};
-
-const spawnCalls: SpawnCall[] = [];
-const tempDirs: string[] = [];
-let embedderAvailable = true;
-
-const originalExistsSync = fs.existsSync;
-const originalSpawn = childProcess.spawn;
-
-fs.existsSync = ((..._args: unknown[]) => embedderAvailable) as typeof fs.existsSync;
-
-childProcess.spawn = ((command: string, args: string[] = [], options: unknown = {}) => {
- const child = new FakeChildProcess();
- spawnCalls.push({ command: String(command), args: args.map(String), options, child });
-
- queueMicrotask(() => {
- child.stdout.emit('data', JSON.stringify({
- success: true,
- total_chunks: 4,
- hnsw_available: true,
- hnsw_count: 4,
- dimension: 384,
- }));
- child.emit('close', 0);
- });
-
- return child as unknown as ReturnType;
-}) as typeof childProcess.spawn;
-
-after(() => {
- fs.existsSync = originalExistsSync;
- childProcess.spawn = originalSpawn;
- while (tempDirs.length > 0) {
- rmSync(tempDirs.pop() as string, { recursive: true, force: true });
- }
-});
-
-describe('unified-vector-index', () => {
- beforeEach(() => {
- embedderAvailable = true;
- spawnCalls.length = 0;
- });
-
- it('spawns CodexLens venv python with hidden window options', async () => {
- const projectDir = mkdtempSync(join(tmpdir(), 'ccw-unified-vector-index-'));
- tempDirs.push(projectDir);
-
- const moduleUrl = new URL('../dist/core/unified-vector-index.js', import.meta.url);
- moduleUrl.searchParams.set('t', String(Date.now()));
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
- const mod: any = await import(moduleUrl.href);
-
- const index = new mod.UnifiedVectorIndex(projectDir);
- const status = await index.getStatus();
-
- assert.equal(status.success, true);
- assert.equal(spawnCalls.length, 1);
- assert.equal(spawnCalls[0].options.shell, false);
- assert.equal(spawnCalls[0].options.windowsHide, true);
- assert.equal(spawnCalls[0].options.env.PYTHONIOENCODING, 'utf-8');
- assert.deepEqual(spawnCalls[0].options.stdio, ['pipe', 'pipe', 'pipe']);
- assert.match(spawnCalls[0].child.stdinChunks.join(''), /"operation":"status"/);
- });
-});