From 48a6a1f2aabe137e27c2a1cace4bc222c076ee17 Mon Sep 17 00:00:00 2001 From: catlog22 Date: Sun, 15 Feb 2026 21:14:14 +0800 Subject: [PATCH] Add comprehensive tests for ast-grep and tree-sitter relationship extraction - Introduced test suite for AstGrepPythonProcessor covering pattern definitions, parsing, and relationship extraction. - Added comparison tests between tree-sitter and ast-grep for consistency in relationship extraction. - Implemented tests for ast-grep binding module to verify functionality and availability. - Ensured tests cover various scenarios including inheritance, function calls, and imports. --- .../phases/02-lite-execute.md | 151 ++- .../phases/02-lite-execute.md | 151 ++- .../dashboard/widgets/WorkflowTaskWidget.tsx | 2 +- .../settings/PlatformConfigCards.tsx | 396 ++++++++ .../settings/RemoteNotificationSection.tsx | 347 +++++++ .../terminal-dashboard/CliConfigModal.tsx | 270 +++++ .../terminal-dashboard/DashboardToolbar.tsx | 297 ++++-- ccw/frontend/src/hooks/index.ts | 16 + ccw/frontend/src/hooks/useMemory.ts | 10 +- ccw/frontend/src/hooks/useUnifiedSearch.ts | 199 ++++ ccw/frontend/src/lib/api.ts | 146 +++ ccw/frontend/src/lib/queryKeys.ts | 9 + ccw/frontend/src/lib/utils.ts | 16 + ccw/frontend/src/locales/en/memory.json | 33 +- ccw/frontend/src/locales/en/settings.json | 39 + ccw/frontend/src/locales/zh/memory.json | 33 +- ccw/frontend/src/locales/zh/settings.json | 39 + ccw/frontend/src/pages/MemoryPage.tsx | 663 ++++++++++--- ccw/frontend/src/pages/SettingsPage.tsx | 4 + ccw/frontend/src/types/remote-notification.ts | 193 ++++ ccw/scripts/unified_memory_embedder.py | 473 +++++++++ ccw/src/cli.ts | 4 + ccw/src/commands/core-memory.ts | 124 ++- ccw/src/commands/hook.ts | 164 ++- ccw/src/config/remote-notification-config.ts | 154 +++ ccw/src/config/storage-paths.ts | 19 + ccw/src/core/core-memory-store.ts | 41 + ccw/src/core/memory-extraction-pipeline.ts | 103 ++ ccw/src/core/pattern-detector.ts | 485 +++++++++ ccw/src/core/routes/notification-routes.ts | 357 +++++++ ccw/src/core/routes/unified-memory-routes.ts | 151 +++ ccw/src/core/server.ts | 12 + .../services/remote-notification-service.ts | 592 +++++++++++ ccw/src/core/session-clustering-service.ts | 252 ++++- ccw/src/core/unified-context-builder.ts | 410 ++++++++ ccw/src/core/unified-memory-service.ts | 488 +++++++++ ccw/src/core/unified-vector-index.ts | 474 +++++++++ ccw/src/tools/ask-question.ts | 18 + ccw/src/types/remote-notification.ts | 227 +++++ ccw/src/types/util.ts | 75 ++ codex-lens/pyproject.toml | 3 + codex-lens/src/codexlens/config.py | 3 + codex-lens/src/codexlens/parsers/__init__.py | 8 +- .../src/codexlens/parsers/astgrep_binding.py | 320 ++++++ .../codexlens/parsers/astgrep_processor.py | 931 ++++++++++++++++++ .../codexlens/parsers/patterns/__init__.py | 5 + .../parsers/patterns/python/__init__.py | 204 ++++ .../parsers/patterns/python/call.yaml | 87 ++ .../parsers/patterns/python/imports.yaml | 82 ++ .../parsers/patterns/python/inherits.yaml | 42 + .../codexlens/parsers/treesitter_parser.py | 110 ++- codex-lens/tests/parsers/__init__.py | 1 + .../tests/parsers/test_astgrep_extraction.py | 444 +++++++++ .../tests/parsers/test_astgrep_processor.py | 402 ++++++++ codex-lens/tests/parsers/test_comparison.py | 526 ++++++++++ codex-lens/tests/test_astgrep_binding.py | 191 ++++ 56 files changed, 10622 insertions(+), 374 deletions(-) create mode 100644 ccw/frontend/src/components/settings/PlatformConfigCards.tsx create mode 100644 ccw/frontend/src/components/settings/RemoteNotificationSection.tsx create mode 100644 ccw/frontend/src/components/terminal-dashboard/CliConfigModal.tsx create mode 100644 ccw/frontend/src/hooks/useUnifiedSearch.ts create mode 100644 ccw/frontend/src/types/remote-notification.ts create mode 100644 ccw/scripts/unified_memory_embedder.py create mode 100644 ccw/src/config/remote-notification-config.ts create mode 100644 ccw/src/core/pattern-detector.ts create mode 100644 ccw/src/core/routes/notification-routes.ts create mode 100644 ccw/src/core/routes/unified-memory-routes.ts create mode 100644 ccw/src/core/services/remote-notification-service.ts create mode 100644 ccw/src/core/unified-context-builder.ts create mode 100644 ccw/src/core/unified-memory-service.ts create mode 100644 ccw/src/core/unified-vector-index.ts create mode 100644 ccw/src/types/remote-notification.ts create mode 100644 ccw/src/types/util.ts create mode 100644 codex-lens/src/codexlens/parsers/astgrep_binding.py create mode 100644 codex-lens/src/codexlens/parsers/astgrep_processor.py create mode 100644 codex-lens/src/codexlens/parsers/patterns/__init__.py create mode 100644 codex-lens/src/codexlens/parsers/patterns/python/__init__.py create mode 100644 codex-lens/src/codexlens/parsers/patterns/python/call.yaml create mode 100644 codex-lens/src/codexlens/parsers/patterns/python/imports.yaml create mode 100644 codex-lens/src/codexlens/parsers/patterns/python/inherits.yaml create mode 100644 codex-lens/tests/parsers/__init__.py create mode 100644 codex-lens/tests/parsers/test_astgrep_extraction.py create mode 100644 codex-lens/tests/parsers/test_astgrep_processor.py create mode 100644 codex-lens/tests/parsers/test_comparison.py create mode 100644 codex-lens/tests/test_astgrep_binding.py diff --git a/.claude/skills/workflow-lite-plan/phases/02-lite-execute.md b/.claude/skills/workflow-lite-plan/phases/02-lite-execute.md index 647d4444..42eabe3f 100644 --- a/.claude/skills/workflow-lite-plan/phases/02-lite-execute.md +++ b/.claude/skills/workflow-lite-plan/phases/02-lite-execute.md @@ -184,11 +184,11 @@ Execution: ├─ Step 1: Initialize result tracking (previousExecutionResults = []) ├─ Step 2: Task grouping & batch creation │ ├─ Extract explicit depends_on (no file/keyword inference) - │ ├─ Group: independent tasks → single parallel batch (maximize utilization) + │ ├─ Group: independent tasks → per-executor parallel batches (one CLI per batch) │ ├─ Group: dependent tasks → sequential phases (respect dependencies) │ └─ Create TodoWrite list for batches ├─ Step 3: Launch execution - │ ├─ Phase 1: All independent tasks (⚡ single batch, concurrent) + │ ├─ Phase 1: Independent tasks (⚡ per-executor batches, multi-CLI concurrent) │ └─ Phase 2+: Dependent tasks by dependency order ├─ Step 4: Track progress (TodoWrite updates per batch) └─ Step 5: Code review (if codeReviewTool ≠ "Skip") @@ -241,26 +241,58 @@ function extractDependencies(tasks) { }) } -// Group into batches: maximize parallel execution +// Executor Resolution (used by task grouping below) +// 获取任务的 executor(优先使用 executorAssignments,fallback 到全局 executionMethod) +function getTaskExecutor(task) { + const assignments = executionContext?.executorAssignments || {} + if (assignments[task.id]) { + return assignments[task.id].executor // 'gemini' | 'codex' | 'agent' + } + // Fallback: 全局 executionMethod 映射 + const method = executionContext?.executionMethod || 'Auto' + if (method === 'Agent') return 'agent' + if (method === 'Codex') return 'codex' + // Auto: 根据复杂度 + return planObject.complexity === 'Low' ? 'agent' : 'codex' +} + +// 按 executor 分组任务(核心分组组件) +function groupTasksByExecutor(tasks) { + const groups = { gemini: [], codex: [], agent: [] } + tasks.forEach(task => { + const executor = getTaskExecutor(task) + groups[executor].push(task) + }) + return groups +} + +// Group into batches: per-executor parallel batches (one CLI per batch) function createExecutionCalls(tasks, executionMethod) { const tasksWithDeps = extractDependencies(tasks) const processed = new Set() const calls = [] - // Phase 1: All independent tasks → single parallel batch (maximize utilization) + // Phase 1: Independent tasks → per-executor batches (multi-CLI concurrent) const independentTasks = tasksWithDeps.filter(t => t.dependencies.length === 0) if (independentTasks.length > 0) { - independentTasks.forEach(t => processed.add(t.taskIndex)) - calls.push({ - method: executionMethod, - executionType: "parallel", - groupId: "P1", - taskSummary: independentTasks.map(t => t.title).join(' | '), - tasks: independentTasks - }) + const executorGroups = groupTasksByExecutor(independentTasks) + let parallelIndex = 1 + + for (const [executor, tasks] of Object.entries(executorGroups)) { + if (tasks.length === 0) continue + tasks.forEach(t => processed.add(t.taskIndex)) + calls.push({ + method: executionMethod, + executor: executor, // 明确指定 executor + executionType: "parallel", + groupId: `P${parallelIndex++}`, + taskSummary: tasks.map(t => t.title).join(' | '), + tasks: tasks + }) + } } - // Phase 2: Dependent tasks → sequential batches (respect dependencies) + // Phase 2: Dependent tasks → sequential/parallel batches (respect dependencies) let sequentialIndex = 1 let remaining = tasksWithDeps.filter(t => !processed.has(t.taskIndex)) @@ -275,15 +307,33 @@ function createExecutionCalls(tasks, executionMethod) { ready.push(...remaining) } - // Group ready tasks (can run in parallel within this phase) - ready.forEach(t => processed.add(t.taskIndex)) - calls.push({ - method: executionMethod, - executionType: ready.length > 1 ? "parallel" : "sequential", - groupId: ready.length > 1 ? `P${calls.length + 1}` : `S${sequentialIndex++}`, - taskSummary: ready.map(t => t.title).join(ready.length > 1 ? ' | ' : ' → '), - tasks: ready - }) + if (ready.length > 1) { + // Multiple ready tasks → per-executor batches (parallel within this phase) + const executorGroups = groupTasksByExecutor(ready) + for (const [executor, tasks] of Object.entries(executorGroups)) { + if (tasks.length === 0) continue + tasks.forEach(t => processed.add(t.taskIndex)) + calls.push({ + method: executionMethod, + executor: executor, + executionType: "parallel", + groupId: `P${calls.length + 1}`, + taskSummary: tasks.map(t => t.title).join(' | '), + tasks: tasks + }) + } + } else { + // Single ready task → sequential batch + ready.forEach(t => processed.add(t.taskIndex)) + calls.push({ + method: executionMethod, + executor: getTaskExecutor(ready[0]), + executionType: "sequential", + groupId: `S${sequentialIndex++}`, + taskSummary: ready[0].title, + tasks: ready + }) + } remaining = remaining.filter(t => !processed.has(t.taskIndex)) } @@ -304,33 +354,40 @@ TodoWrite({ ### Step 3: Launch Execution -**Executor Resolution** (任务级 executor 优先于全局设置): -```javascript -// 获取任务的 executor(优先使用 executorAssignments,fallback 到全局 executionMethod) -function getTaskExecutor(task) { - const assignments = executionContext?.executorAssignments || {} - if (assignments[task.id]) { - return assignments[task.id].executor // 'gemini' | 'codex' | 'agent' - } - // Fallback: 全局 executionMethod 映射 - const method = executionContext?.executionMethod || 'Auto' - if (method === 'Agent') return 'agent' - if (method === 'Codex') return 'codex' - // Auto: 根据复杂度 - return planObject.complexity === 'Low' ? 'agent' : 'codex' -} +**Executor Resolution**: `getTaskExecutor()` and `groupTasksByExecutor()` defined in Step 2 (Task Grouping). -// 按 executor 分组任务 -function groupTasksByExecutor(tasks) { - const groups = { gemini: [], codex: [], agent: [] } - tasks.forEach(task => { - const executor = getTaskExecutor(task) - groups[executor].push(task) - }) - return groups +**Batch Execution Routing** (根据 batch.executor 字段路由): +```javascript +// executeBatch 根据 batch 自身的 executor 字段决定调用哪个 CLI +function executeBatch(batch) { + const executor = batch.executor || getTaskExecutor(batch.tasks[0]) + const sessionId = executionContext?.session?.id || 'standalone' + const fixedId = `${sessionId}-${batch.groupId}` + + if (executor === 'agent') { + // Agent execution (synchronous) + return Task({ + subagent_type: "code-developer", + run_in_background: false, + description: batch.taskSummary, + prompt: buildExecutionPrompt(batch) + }) + } else if (executor === 'codex') { + // Codex CLI (background) + return Bash(`ccw cli -p "${buildExecutionPrompt(batch)}" --tool codex --mode write --id ${fixedId}`, { run_in_background: true }) + } else if (executor === 'gemini') { + // Gemini CLI (background) + return Bash(`ccw cli -p "${buildExecutionPrompt(batch)}" --tool gemini --mode write --id ${fixedId}`, { run_in_background: true }) + } } ``` +**并行执行原则**: +- 每个 batch 对应一个独立的 CLI 实例或 Agent 调用 +- 并行 = 多个 Bash(run_in_background=true) 或多个 Task() 同时发出 +- 绝不将多个独立任务合并到同一个 CLI prompt 中 +- Agent 任务不可后台执行(run_in_background=false),但多个 Agent 任务可通过单条消息中的多个 Task() 调用并发 + **Execution Flow**: Parallel batches concurrently → Sequential batches in order ```javascript const parallel = executionCalls.filter(c => c.executionType === "parallel") @@ -659,8 +716,8 @@ console.log(`✓ Development index: [${category}] ${entry.title}`) ## Best Practices **Input Modes**: In-memory (lite-plan), prompt (standalone), file (JSON/text) -**Task Grouping**: Based on explicit depends_on only; independent tasks run in single parallel batch -**Execution**: All independent tasks launch concurrently via single Claude message with multiple tool calls +**Task Grouping**: Based on explicit depends_on only; independent tasks split by executor, each batch runs as separate CLI instance +**Execution**: Independent task batches launch concurrently via single Claude message with multiple tool calls (one tool call per batch) ## Error Handling diff --git a/.claude/skills/workflow-multi-cli-plan/phases/02-lite-execute.md b/.claude/skills/workflow-multi-cli-plan/phases/02-lite-execute.md index cd998b3d..e2456f9f 100644 --- a/.claude/skills/workflow-multi-cli-plan/phases/02-lite-execute.md +++ b/.claude/skills/workflow-multi-cli-plan/phases/02-lite-execute.md @@ -191,11 +191,11 @@ Execution: ├─ Step 1: Initialize result tracking (previousExecutionResults = []) ├─ Step 2: Task grouping & batch creation │ ├─ Extract explicit depends_on (no file/keyword inference) - │ ├─ Group: independent tasks → single parallel batch (maximize utilization) + │ ├─ Group: independent tasks → per-executor parallel batches (one CLI per batch) │ ├─ Group: dependent tasks → sequential phases (respect dependencies) │ └─ Create TodoWrite list for batches ├─ Step 3: Launch execution - │ ├─ Phase 1: All independent tasks (⚡ single batch, concurrent) + │ ├─ Phase 1: Independent tasks (⚡ per-executor batches, multi-CLI concurrent) │ └─ Phase 2+: Dependent tasks by dependency order ├─ Step 4: Track progress (TodoWrite updates per batch) └─ Step 5: Code review (if codeReviewTool ≠ "Skip") @@ -248,26 +248,58 @@ function extractDependencies(tasks) { }) } -// Group into batches: maximize parallel execution +// Executor Resolution (used by task grouping below) +// 获取任务的 executor(优先使用 executorAssignments,fallback 到全局 executionMethod) +function getTaskExecutor(task) { + const assignments = executionContext?.executorAssignments || {} + if (assignments[task.id]) { + return assignments[task.id].executor // 'gemini' | 'codex' | 'agent' + } + // Fallback: 全局 executionMethod 映射 + const method = executionContext?.executionMethod || 'Auto' + if (method === 'Agent') return 'agent' + if (method === 'Codex') return 'codex' + // Auto: 根据复杂度 + return planObject.complexity === 'Low' ? 'agent' : 'codex' +} + +// 按 executor 分组任务(核心分组组件) +function groupTasksByExecutor(tasks) { + const groups = { gemini: [], codex: [], agent: [] } + tasks.forEach(task => { + const executor = getTaskExecutor(task) + groups[executor].push(task) + }) + return groups +} + +// Group into batches: per-executor parallel batches (one CLI per batch) function createExecutionCalls(tasks, executionMethod) { const tasksWithDeps = extractDependencies(tasks) const processed = new Set() const calls = [] - // Phase 1: All independent tasks → single parallel batch (maximize utilization) + // Phase 1: Independent tasks → per-executor batches (multi-CLI concurrent) const independentTasks = tasksWithDeps.filter(t => t.dependencies.length === 0) if (independentTasks.length > 0) { - independentTasks.forEach(t => processed.add(t.taskIndex)) - calls.push({ - method: executionMethod, - executionType: "parallel", - groupId: "P1", - taskSummary: independentTasks.map(t => t.title).join(' | '), - tasks: independentTasks - }) + const executorGroups = groupTasksByExecutor(independentTasks) + let parallelIndex = 1 + + for (const [executor, tasks] of Object.entries(executorGroups)) { + if (tasks.length === 0) continue + tasks.forEach(t => processed.add(t.taskIndex)) + calls.push({ + method: executionMethod, + executor: executor, // 明确指定 executor + executionType: "parallel", + groupId: `P${parallelIndex++}`, + taskSummary: tasks.map(t => t.title).join(' | '), + tasks: tasks + }) + } } - // Phase 2: Dependent tasks → sequential batches (respect dependencies) + // Phase 2: Dependent tasks → sequential/parallel batches (respect dependencies) let sequentialIndex = 1 let remaining = tasksWithDeps.filter(t => !processed.has(t.taskIndex)) @@ -282,15 +314,33 @@ function createExecutionCalls(tasks, executionMethod) { ready.push(...remaining) } - // Group ready tasks (can run in parallel within this phase) - ready.forEach(t => processed.add(t.taskIndex)) - calls.push({ - method: executionMethod, - executionType: ready.length > 1 ? "parallel" : "sequential", - groupId: ready.length > 1 ? `P${calls.length + 1}` : `S${sequentialIndex++}`, - taskSummary: ready.map(t => t.title).join(ready.length > 1 ? ' | ' : ' → '), - tasks: ready - }) + if (ready.length > 1) { + // Multiple ready tasks → per-executor batches (parallel within this phase) + const executorGroups = groupTasksByExecutor(ready) + for (const [executor, tasks] of Object.entries(executorGroups)) { + if (tasks.length === 0) continue + tasks.forEach(t => processed.add(t.taskIndex)) + calls.push({ + method: executionMethod, + executor: executor, + executionType: "parallel", + groupId: `P${calls.length + 1}`, + taskSummary: tasks.map(t => t.title).join(' | '), + tasks: tasks + }) + } + } else { + // Single ready task → sequential batch + ready.forEach(t => processed.add(t.taskIndex)) + calls.push({ + method: executionMethod, + executor: getTaskExecutor(ready[0]), + executionType: "sequential", + groupId: `S${sequentialIndex++}`, + taskSummary: ready[0].title, + tasks: ready + }) + } remaining = remaining.filter(t => !processed.has(t.taskIndex)) } @@ -311,33 +361,40 @@ TodoWrite({ ### Step 3: Launch Execution -**Executor Resolution** (任务级 executor 优先于全局设置): -```javascript -// 获取任务的 executor(优先使用 executorAssignments,fallback 到全局 executionMethod) -function getTaskExecutor(task) { - const assignments = executionContext?.executorAssignments || {} - if (assignments[task.id]) { - return assignments[task.id].executor // 'gemini' | 'codex' | 'agent' - } - // Fallback: 全局 executionMethod 映射 - const method = executionContext?.executionMethod || 'Auto' - if (method === 'Agent') return 'agent' - if (method === 'Codex') return 'codex' - // Auto: 根据复杂度 - return planObject.complexity === 'Low' ? 'agent' : 'codex' -} +**Executor Resolution**: `getTaskExecutor()` and `groupTasksByExecutor()` defined in Step 2 (Task Grouping). -// 按 executor 分组任务 -function groupTasksByExecutor(tasks) { - const groups = { gemini: [], codex: [], agent: [] } - tasks.forEach(task => { - const executor = getTaskExecutor(task) - groups[executor].push(task) - }) - return groups +**Batch Execution Routing** (根据 batch.executor 字段路由): +```javascript +// executeBatch 根据 batch 自身的 executor 字段决定调用哪个 CLI +function executeBatch(batch) { + const executor = batch.executor || getTaskExecutor(batch.tasks[0]) + const sessionId = executionContext?.session?.id || 'standalone' + const fixedId = `${sessionId}-${batch.groupId}` + + if (executor === 'agent') { + // Agent execution (synchronous) + return Task({ + subagent_type: "code-developer", + run_in_background: false, + description: batch.taskSummary, + prompt: buildExecutionPrompt(batch) + }) + } else if (executor === 'codex') { + // Codex CLI (background) + return Bash(`ccw cli -p "${buildExecutionPrompt(batch)}" --tool codex --mode write --id ${fixedId}`, { run_in_background: true }) + } else if (executor === 'gemini') { + // Gemini CLI (background) + return Bash(`ccw cli -p "${buildExecutionPrompt(batch)}" --tool gemini --mode write --id ${fixedId}`, { run_in_background: true }) + } } ``` +**并行执行原则**: +- 每个 batch 对应一个独立的 CLI 实例或 Agent 调用 +- 并行 = 多个 Bash(run_in_background=true) 或多个 Task() 同时发出 +- 绝不将多个独立任务合并到同一个 CLI prompt 中 +- Agent 任务不可后台执行(run_in_background=false),但多个 Agent 任务可通过单条消息中的多个 Task() 调用并发 + **Execution Flow**: Parallel batches concurrently → Sequential batches in order ```javascript const parallel = executionCalls.filter(c => c.executionType === "parallel") @@ -666,8 +723,8 @@ console.log(`✓ Development index: [${category}] ${entry.title}`) ## Best Practices **Input Modes**: In-memory (lite-plan), prompt (standalone), file (JSON/text) -**Task Grouping**: Based on explicit depends_on only; independent tasks run in single parallel batch -**Execution**: All independent tasks launch concurrently via single Claude message with multiple tool calls +**Task Grouping**: Based on explicit depends_on only; independent tasks split by executor, each batch runs as separate CLI instance +**Execution**: Independent task batches launch concurrently via single Claude message with multiple tool calls (one tool call per batch) ## Error Handling diff --git a/ccw/frontend/src/components/dashboard/widgets/WorkflowTaskWidget.tsx b/ccw/frontend/src/components/dashboard/widgets/WorkflowTaskWidget.tsx index 877ccfca..b3131a45 100644 --- a/ccw/frontend/src/components/dashboard/widgets/WorkflowTaskWidget.tsx +++ b/ccw/frontend/src/components/dashboard/widgets/WorkflowTaskWidget.tsx @@ -704,7 +704,7 @@ function WorkflowTaskWidgetComponent({ className }: WorkflowTaskWidgetProps) { const isLastOdd = currentSession.tasks!.length % 2 === 1 && index === currentSession.tasks!.length - 1; return (
void; + onUpdateConfig: ( + platform: NotificationPlatform, + updates: Partial + ) => void; + onTest: ( + platform: NotificationPlatform, + config: DiscordConfig | TelegramConfig | WebhookConfig + ) => void; + onSave: () => void; + saving: boolean; +} + +export function PlatformConfigCards({ + config, + expandedPlatform, + testing, + onToggleExpand, + onUpdateConfig, + onTest, + onSave, + saving, +}: PlatformConfigCardsProps) { + const { formatMessage } = useIntl(); + + const platforms: NotificationPlatform[] = ['discord', 'telegram', 'webhook']; + + const getPlatformIcon = (platform: NotificationPlatform) => { + switch (platform) { + case 'discord': + return ; + case 'telegram': + return ; + case 'webhook': + return ; + } + }; + + const getPlatformConfig = ( + platform: NotificationPlatform + ): DiscordConfig | TelegramConfig | WebhookConfig => { + switch (platform) { + case 'discord': + return config.platforms.discord || { enabled: false, webhookUrl: '' }; + case 'telegram': + return config.platforms.telegram || { enabled: false, botToken: '', chatId: '' }; + case 'webhook': + return config.platforms.webhook || { enabled: false, url: '', method: 'POST' }; + } + }; + + const isConfigured = (platform: NotificationPlatform): boolean => { + const platformConfig = getPlatformConfig(platform); + switch (platform) { + case 'discord': + return !!(platformConfig as DiscordConfig).webhookUrl; + case 'telegram': + return !!(platformConfig as TelegramConfig).botToken && !!(platformConfig as TelegramConfig).chatId; + case 'webhook': + return !!(platformConfig as WebhookConfig).url; + } + }; + + return ( +
+ {platforms.map((platform) => { + const info = PLATFORM_INFO[platform]; + const platformConfig = getPlatformConfig(platform); + const configured = isConfigured(platform); + const expanded = expandedPlatform === platform; + + return ( + + {/* Header */} +
onToggleExpand(expanded ? null : platform)} + > +
+
+
+ {getPlatformIcon(platform)} +
+
+
+ {info.name} + {configured && ( + + + {formatMessage({ id: 'settings.remoteNotifications.configured' })} + + )} +
+

{info.description}

+
+
+
+ + {expanded ? ( + + ) : ( + + )} +
+
+
+ + {/* Expanded Content */} + {expanded && ( +
+ {platform === 'discord' && ( + onUpdateConfig('discord', updates)} + /> + )} + {platform === 'telegram' && ( + onUpdateConfig('telegram', updates)} + /> + )} + {platform === 'webhook' && ( + onUpdateConfig('webhook', updates)} + /> + )} + + {/* Action Buttons */} +
+ + +
+
+ )} +
+ ); + })} +
+ ); +} + +// ========== Discord Config Form ========== + +function DiscordConfigForm({ + config, + onUpdate, +}: { + config: DiscordConfig; + onUpdate: (updates: Partial) => void; +}) { + const { formatMessage } = useIntl(); + const [showUrl, setShowUrl] = useState(false); + + return ( +
+
+ +
+ onUpdate({ webhookUrl: e.target.value })} + placeholder="https://discord.com/api/webhooks/..." + className="flex-1" + /> + +
+

+ {formatMessage({ id: 'settings.remoteNotifications.discord.webhookUrlHint' })} +

+
+
+ + onUpdate({ username: e.target.value })} + placeholder="CCW Notification" + className="mt-1" + /> +
+
+ ); +} + +// ========== Telegram Config Form ========== + +function TelegramConfigForm({ + config, + onUpdate, +}: { + config: TelegramConfig; + onUpdate: (updates: Partial) => void; +}) { + const { formatMessage } = useIntl(); + const [showToken, setShowToken] = useState(false); + + return ( +
+
+ +
+ onUpdate({ botToken: e.target.value })} + placeholder="1234567890:ABCdefGHIjklMNOpqrsTUVwxyz" + className="flex-1" + /> + +
+

+ {formatMessage({ id: 'settings.remoteNotifications.telegram.botTokenHint' })} +

+
+
+ + onUpdate({ chatId: e.target.value })} + placeholder="-1001234567890" + className="mt-1" + /> +

+ {formatMessage({ id: 'settings.remoteNotifications.telegram.chatIdHint' })} +

+
+
+ ); +} + +// ========== Webhook Config Form ========== + +function WebhookConfigForm({ + config, + onUpdate, +}: { + config: WebhookConfig; + onUpdate: (updates: Partial) => void; +}) { + const { formatMessage } = useIntl(); + + return ( +
+
+ + onUpdate({ url: e.target.value })} + placeholder="https://your-server.com/webhook" + className="mt-1" + /> +
+
+ +
+ + +
+
+
+ + { + try { + const headers = e.target.value ? JSON.parse(e.target.value) : undefined; + onUpdate({ headers }); + } catch { + // Invalid JSON, ignore + } + }} + placeholder='{"Authorization": "Bearer token"}' + className="mt-1 font-mono text-xs" + /> +

+ {formatMessage({ id: 'settings.remoteNotifications.webhook.headersHint' })} +

+
+
+ ); +} + +export default PlatformConfigCards; diff --git a/ccw/frontend/src/components/settings/RemoteNotificationSection.tsx b/ccw/frontend/src/components/settings/RemoteNotificationSection.tsx new file mode 100644 index 00000000..ce966afa --- /dev/null +++ b/ccw/frontend/src/components/settings/RemoteNotificationSection.tsx @@ -0,0 +1,347 @@ +// ======================================== +// Remote Notification Settings Section +// ======================================== +// Configuration UI for remote notification platforms + +import { useState, useEffect, useCallback } from 'react'; +import { useIntl } from 'react-intl'; +import { + Bell, + BellOff, + RefreshCw, + Check, + X, + ChevronDown, + ChevronUp, + TestTube, + Save, + AlertTriangle, +} from 'lucide-react'; +import { Card } from '@/components/ui/Card'; +import { Button } from '@/components/ui/Button'; +import { Input } from '@/components/ui/Input'; +import { Badge } from '@/components/ui/Badge'; +import { cn } from '@/lib/utils'; +import { toast } from 'sonner'; +import type { + RemoteNotificationConfig, + NotificationPlatform, + EventConfig, + DiscordConfig, + TelegramConfig, + WebhookConfig, +} from '@/types/remote-notification'; +import { PLATFORM_INFO, EVENT_INFO, getDefaultConfig } from '@/types/remote-notification'; +import { PlatformConfigCards } from './PlatformConfigCards'; + +interface RemoteNotificationSectionProps { + className?: string; +} + +export function RemoteNotificationSection({ className }: RemoteNotificationSectionProps) { + const { formatMessage } = useIntl(); + const [config, setConfig] = useState(null); + const [loading, setLoading] = useState(true); + const [saving, setSaving] = useState(false); + const [testing, setTesting] = useState(null); + const [expandedPlatform, setExpandedPlatform] = useState(null); + + // Load configuration + const loadConfig = useCallback(async () => { + setLoading(true); + try { + const response = await fetch('/api/notifications/remote/config'); + if (response.ok) { + const data = await response.json(); + setConfig(data); + } else { + // Use default config if not found + setConfig(getDefaultConfig()); + } + } catch (error) { + console.error('Failed to load remote notification config:', error); + setConfig(getDefaultConfig()); + } finally { + setLoading(false); + } + }, []); + + useEffect(() => { + loadConfig(); + }, [loadConfig]); + + // Save configuration + const saveConfig = useCallback(async (newConfig: RemoteNotificationConfig) => { + setSaving(true); + try { + const response = await fetch('/api/notifications/remote/config', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(newConfig), + }); + + if (response.ok) { + const data = await response.json(); + setConfig(data.config); + toast.success(formatMessage({ id: 'settings.remoteNotifications.saved' })); + } else { + throw new Error(`HTTP ${response.status}`); + } + } catch (error) { + toast.error(formatMessage({ id: 'settings.remoteNotifications.saveError' })); + } finally { + setSaving(false); + } + }, [formatMessage]); + + // Test platform + const testPlatform = useCallback(async ( + platform: NotificationPlatform, + platformConfig: DiscordConfig | TelegramConfig | WebhookConfig + ) => { + setTesting(platform); + try { + const response = await fetch('/api/notifications/remote/test', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ platform, config: platformConfig }), + }); + + const result = await response.json(); + + if (result.success) { + toast.success( + formatMessage({ id: 'settings.remoteNotifications.testSuccess' }), + { description: `${result.responseTime}ms` } + ); + } else { + toast.error( + formatMessage({ id: 'settings.remoteNotifications.testFailed' }), + { description: result.error } + ); + } + } catch (error) { + toast.error(formatMessage({ id: 'settings.remoteNotifications.testError' })); + } finally { + setTesting(null); + } + }, [formatMessage]); + + // Toggle master switch + const toggleEnabled = () => { + if (!config) return; + saveConfig({ ...config, enabled: !config.enabled }); + }; + + // Update platform config + const updatePlatformConfig = ( + platform: NotificationPlatform, + updates: Partial + ) => { + if (!config) return; + const newConfig = { + ...config, + platforms: { + ...config.platforms, + [platform]: { + ...config.platforms[platform as keyof typeof config.platforms], + ...updates, + }, + }, + }; + setConfig(newConfig); + }; + + // Update event config + const updateEventConfig = (eventIndex: number, updates: Partial) => { + if (!config) return; + const newEvents = [...config.events]; + newEvents[eventIndex] = { ...newEvents[eventIndex], ...updates }; + setConfig({ ...config, events: newEvents }); + }; + + // Reset to defaults + const resetConfig = async () => { + if (!confirm(formatMessage({ id: 'settings.remoteNotifications.resetConfirm' }))) { + return; + } + try { + const response = await fetch('/api/notifications/remote/reset', { + method: 'POST', + }); + if (response.ok) { + const data = await response.json(); + setConfig(data.config); + toast.success(formatMessage({ id: 'settings.remoteNotifications.resetSuccess' })); + } + } catch { + toast.error(formatMessage({ id: 'settings.remoteNotifications.resetError' })); + } + }; + + if (loading) { + return ( + +
+ +
+
+ ); + } + + if (!config) { + return null; + } + + return ( + + {/* Header */} +
+

+ {config.enabled ? ( + + ) : ( + + )} + {formatMessage({ id: 'settings.remoteNotifications.title' })} +

+
+ + +
+
+ + {/* Description */} +

+ {formatMessage({ id: 'settings.remoteNotifications.description' })} +

+ + {config.enabled && ( + <> + {/* Platform Configuration */} +
+

+ {formatMessage({ id: 'settings.remoteNotifications.platforms' })} +

+ saveConfig(config)} + saving={saving} + /> +
+ + {/* Event Configuration */} +
+

+ {formatMessage({ id: 'settings.remoteNotifications.events' })} +

+
+ {config.events.map((eventConfig, index) => { + const info = EVENT_INFO[eventConfig.event]; + return ( +
+
+
+ {info.icon} +
+
+

{info.name}

+

{info.description}

+
+
+
+ {/* Platform badges */} +
+ {eventConfig.platforms.map((platform) => ( + + {PLATFORM_INFO[platform].name} + + ))} + {eventConfig.platforms.length === 0 && ( + + {formatMessage({ id: 'settings.remoteNotifications.noPlatforms' })} + + )} +
+ {/* Toggle */} + +
+
+ ); + })} +
+
+ + {/* Action Buttons */} +
+ + +
+ + )} +
+ ); +} + +export default RemoteNotificationSection; diff --git a/ccw/frontend/src/components/terminal-dashboard/CliConfigModal.tsx b/ccw/frontend/src/components/terminal-dashboard/CliConfigModal.tsx new file mode 100644 index 00000000..34de149a --- /dev/null +++ b/ccw/frontend/src/components/terminal-dashboard/CliConfigModal.tsx @@ -0,0 +1,270 @@ +// ======================================== +// CliConfigModal Component +// ======================================== +// Config modal for creating a new CLI session in Terminal Dashboard. + +import * as React from 'react'; +import { useIntl } from 'react-intl'; +import { FolderOpen } from 'lucide-react'; +import { cn } from '@/lib/utils'; +import { Button } from '@/components/ui/Button'; +import { Input } from '@/components/ui/Input'; +import { Label } from '@/components/ui/Label'; +import { + Dialog, + DialogContent, + DialogHeader, + DialogTitle, + DialogDescription, + DialogFooter, +} from '@/components/ui/Dialog'; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/components/ui/Select'; +import { RadioGroup, RadioGroupItem } from '@/components/ui/RadioGroup'; + +export type CliTool = 'claude' | 'gemini' | 'qwen' | 'codex' | 'opencode'; +export type LaunchMode = 'default' | 'yolo'; +export type ShellKind = 'bash' | 'pwsh'; + +export interface CliSessionConfig { + tool: CliTool; + model?: string; + launchMode: LaunchMode; + preferredShell: ShellKind; + workingDir: string; +} + +export interface CliConfigModalProps { + isOpen: boolean; + onClose: () => void; + defaultWorkingDir?: string | null; + onCreateSession: (config: CliSessionConfig) => Promise; +} + +const CLI_TOOLS: CliTool[] = ['claude', 'gemini', 'qwen', 'codex', 'opencode']; + +const MODEL_OPTIONS: Record = { + claude: ['sonnet', 'haiku'], + gemini: ['gemini-2.5-pro', 'gemini-2.5-flash'], + qwen: ['coder-model'], + codex: ['gpt-5.2'], + opencode: ['opencode/glm-4.7-free'], +}; + +const AUTO_MODEL_VALUE = '__auto__'; + +export function CliConfigModal({ + isOpen, + onClose, + defaultWorkingDir, + onCreateSession, +}: CliConfigModalProps) { + const { formatMessage } = useIntl(); + + const [tool, setTool] = React.useState('gemini'); + const [model, setModel] = React.useState(MODEL_OPTIONS.gemini[0]); + const [launchMode, setLaunchMode] = React.useState('yolo'); + const [preferredShell, setPreferredShell] = React.useState('bash'); + const [workingDir, setWorkingDir] = React.useState(defaultWorkingDir ?? ''); + + const [isSubmitting, setIsSubmitting] = React.useState(false); + const [error, setError] = React.useState(null); + + const modelOptions = React.useMemo(() => MODEL_OPTIONS[tool] ?? [], [tool]); + + React.useEffect(() => { + if (!isOpen) return; + // Reset to a safe default each time the modal is opened. + const nextWorkingDir = defaultWorkingDir ?? ''; + setWorkingDir(nextWorkingDir); + setError(null); + }, [isOpen, defaultWorkingDir]); + + const handleToolChange = (nextTool: string) => { + const next = nextTool as CliTool; + setTool(next); + const nextModels = MODEL_OPTIONS[next] ?? []; + if (!model || !nextModels.includes(model)) { + setModel(nextModels[0]); + } + }; + + const handleBrowse = () => { + // Reserved for future file-picker integration + console.log('[CliConfigModal] browse working directory - not implemented'); + }; + + const handleCreate = async () => { + const dir = workingDir.trim(); + if (!dir) { + setError(formatMessage({ id: 'terminalDashboard.cliConfig.errors.workingDirRequired' })); + return; + } + + setIsSubmitting(true); + setError(null); + try { + await onCreateSession({ + tool, + model, + launchMode, + preferredShell, + workingDir: dir, + }); + onClose(); + } catch (err) { + console.error('[CliConfigModal] create session failed:', err); + setError(formatMessage({ id: 'terminalDashboard.cliConfig.errors.createFailed' })); + } finally { + setIsSubmitting(false); + } + }; + + return ( + !open && onClose()}> + + + {formatMessage({ id: 'terminalDashboard.cliConfig.title' })} + + {formatMessage({ id: 'terminalDashboard.cliConfig.description' })} + + + +
+
+ {/* Tool */} +
+ + +
+ + {/* Model */} +
+ + +
+
+ + {/* Mode */} +
+ + setLaunchMode(v as LaunchMode)} + className="flex items-center gap-4" + > + + + +
+ + {/* Shell */} +
+ + +
+ + {/* Working Directory */} +
+ +
+ { + setWorkingDir(e.target.value); + if (error) setError(null); + }} + placeholder={formatMessage({ id: 'terminalDashboard.cliConfig.workingDirPlaceholder' })} + disabled={isSubmitting} + className={cn(error && 'border-destructive')} + /> + +
+ {error &&

{error}

} +
+
+ + + + + +
+
+ ); +} + +export default CliConfigModal; + diff --git a/ccw/frontend/src/components/terminal-dashboard/DashboardToolbar.tsx b/ccw/frontend/src/components/terminal-dashboard/DashboardToolbar.tsx index 7d2dda68..147da134 100644 --- a/ccw/frontend/src/components/terminal-dashboard/DashboardToolbar.tsx +++ b/ccw/frontend/src/components/terminal-dashboard/DashboardToolbar.tsx @@ -27,6 +27,11 @@ import { DropdownMenu, DropdownMenuContent, DropdownMenuItem, + DropdownMenuRadioGroup, + DropdownMenuRadioItem, + DropdownMenuSub, + DropdownMenuSubContent, + DropdownMenuSubTrigger, DropdownMenuTrigger, DropdownMenuSeparator, } from '@/components/ui/Dropdown'; @@ -37,6 +42,8 @@ import { import { useIssues, useIssueQueue } from '@/hooks/useIssues'; import { useTerminalGridStore, selectTerminalGridFocusedPaneId } from '@/stores/terminalGridStore'; import { useWorkflowStore, selectProjectPath } from '@/stores/workflowStore'; +import { sendCliSessionText } from '@/lib/api'; +import { CliConfigModal, type CliSessionConfig } from './CliConfigModal'; // ========== Types ========== @@ -56,6 +63,19 @@ const LAYOUT_PRESETS = [ { id: 'grid-2x2' as const, icon: LayoutGrid, labelId: 'terminalDashboard.toolbar.layoutGrid' }, ]; +type LaunchMode = 'default' | 'yolo'; + +const CLI_TOOLS = ['claude', 'gemini', 'qwen', 'codex', 'opencode'] as const; +type CliTool = (typeof CLI_TOOLS)[number]; + +const LAUNCH_COMMANDS: Record> = { + claude: { default: 'claude', yolo: 'claude --permission-mode bypassPermissions' }, + gemini: { default: 'gemini', yolo: 'gemini --approval-mode yolo' }, + qwen: { default: 'qwen', yolo: 'qwen --approval-mode yolo' }, + codex: { default: 'codex', yolo: 'codex --full-auto' }, + opencode: { default: 'opencode', yolo: 'opencode' }, +}; + // ========== Component ========== export function DashboardToolbar({ activePanel, onTogglePanel }: DashboardToolbarProps) { @@ -94,117 +114,216 @@ export function DashboardToolbar({ activePanel, onTogglePanel }: DashboardToolba const focusedPaneId = useTerminalGridStore(selectTerminalGridFocusedPaneId); const createSessionAndAssign = useTerminalGridStore((s) => s.createSessionAndAssign); const [isCreating, setIsCreating] = useState(false); + const [selectedTool, setSelectedTool] = useState('gemini'); + const [launchMode, setLaunchMode] = useState('yolo'); + const [isConfigOpen, setIsConfigOpen] = useState(false); const handleQuickCreate = useCallback(async () => { if (!focusedPaneId || !projectPath) return; setIsCreating(true); try { - await createSessionAndAssign(focusedPaneId, { + const created = await createSessionAndAssign(focusedPaneId, { workingDir: projectPath, preferredShell: 'bash', + tool: selectedTool, }, projectPath); + + if (created?.session?.sessionKey) { + const command = LAUNCH_COMMANDS[selectedTool]?.[launchMode] ?? selectedTool; + setTimeout(() => { + sendCliSessionText( + created.session.sessionKey, + { text: command, appendNewline: true }, + projectPath + ).catch((err) => console.error('[DashboardToolbar] auto-launch failed:', err)); + }, 300); + } + } finally { + setIsCreating(false); + } + }, [focusedPaneId, projectPath, createSessionAndAssign, selectedTool, launchMode]); + + const handleConfigure = useCallback(() => { + setIsConfigOpen(true); + }, []); + + const handleCreateConfiguredSession = useCallback(async (config: CliSessionConfig) => { + if (!focusedPaneId || !projectPath) throw new Error('No focused pane or project path'); + setIsCreating(true); + try { + const created = await createSessionAndAssign( + focusedPaneId, + { + workingDir: config.workingDir || projectPath, + preferredShell: config.preferredShell, + tool: config.tool, + model: config.model, + }, + projectPath + ); + + if (!created?.session?.sessionKey) throw new Error('createSessionAndAssign failed'); + + const tool = config.tool as CliTool; + const mode = config.launchMode as LaunchMode; + const command = LAUNCH_COMMANDS[tool]?.[mode] ?? tool; + setTimeout(() => { + sendCliSessionText( + created.session.sessionKey, + { text: command, appendNewline: true }, + projectPath + ).catch((err) => console.error('[DashboardToolbar] auto-launch failed:', err)); + }, 300); } finally { setIsCreating(false); } }, [focusedPaneId, projectPath, createSessionAndAssign]); - const handleConfigure = useCallback(() => { - // TODO: Open configuration modal (future implementation) - console.log('Configure CLI session - modal to be implemented'); - }, []); - return ( -
- {/* Launch CLI dropdown */} - - + <> +
+ {/* Launch CLI dropdown */} + + + + + + + + {formatMessage({ id: 'terminalDashboard.toolbar.tool' })} + ({selectedTool}) + + + setSelectedTool(v as CliTool)} + > + {CLI_TOOLS.map((tool) => ( + + {tool} + + ))} + + + + + + + {formatMessage({ id: 'terminalDashboard.toolbar.mode' })} + + {launchMode === 'default' + ? formatMessage({ id: 'terminalDashboard.toolbar.modeDefault' }) + : formatMessage({ id: 'terminalDashboard.toolbar.modeYolo' })} + + + + setLaunchMode(v as LaunchMode)} + > + + {formatMessage({ id: 'terminalDashboard.toolbar.modeDefault' })} + + + {formatMessage({ id: 'terminalDashboard.toolbar.modeYolo' })} + + + + + + + + + {formatMessage({ id: 'terminalDashboard.toolbar.quickCreate' })} + + + + + {formatMessage({ id: 'terminalDashboard.toolbar.configure' })} + + + + + {/* Separator */} +
+ + {/* Panel toggle buttons */} + onTogglePanel('issues')} + badge={openCount > 0 ? openCount : undefined} + /> + onTogglePanel('queue')} + badge={queueCount > 0 ? queueCount : undefined} + /> + onTogglePanel('inspector')} + dot={hasChain} + /> + + {/* Separator */} +
+ + {/* Layout presets */} + {LAYOUT_PRESETS.map((preset) => ( - - - - - {formatMessage({ id: 'terminalDashboard.toolbar.quickCreate' })} - - - - - {formatMessage({ id: 'terminalDashboard.toolbar.configure' })} - - - + ))} - {/* Separator */} -
+ {/* Right-aligned title */} + + {formatMessage({ id: 'terminalDashboard.page.title' })} + +
- {/* Panel toggle buttons */} - onTogglePanel('issues')} - badge={openCount > 0 ? openCount : undefined} + setIsConfigOpen(false)} + defaultWorkingDir={projectPath} + onCreateSession={handleCreateConfiguredSession} /> - onTogglePanel('queue')} - badge={queueCount > 0 ? queueCount : undefined} - /> - onTogglePanel('inspector')} - dot={hasChain} - /> - - {/* Separator */} -
- - {/* Layout presets */} - {LAYOUT_PRESETS.map((preset) => ( - - ))} - - {/* Right-aligned title */} - - {formatMessage({ id: 'terminalDashboard.page.title' })} - -
+ ); } diff --git a/ccw/frontend/src/hooks/index.ts b/ccw/frontend/src/hooks/index.ts index afbed68d..038b5116 100644 --- a/ccw/frontend/src/hooks/index.ts +++ b/ccw/frontend/src/hooks/index.ts @@ -145,6 +145,22 @@ export type { UseDeleteMemoryReturn, } from './useMemory'; +// ========== Unified Memory ========== +export { + useUnifiedSearch, + useUnifiedStats, + useRecommendations, + useReindex, +} from './useUnifiedSearch'; +export type { + UseUnifiedSearchOptions, + UseUnifiedSearchReturn, + UseUnifiedStatsReturn, + UseRecommendationsOptions, + UseRecommendationsReturn, + UseReindexReturn, +} from './useUnifiedSearch'; + // ========== MCP Servers ========== export { useMcpServers, diff --git a/ccw/frontend/src/hooks/useMemory.ts b/ccw/frontend/src/hooks/useMemory.ts index 3c4a7a07..464a4dc4 100644 --- a/ccw/frontend/src/hooks/useMemory.ts +++ b/ccw/frontend/src/hooks/useMemory.ts @@ -15,6 +15,7 @@ import { } from '../lib/api'; import { useWorkflowStore, selectProjectPath } from '@/stores/workflowStore'; import { workspaceQueryKeys } from '@/lib/queryKeys'; +import { parseMemoryMetadata } from '@/lib/utils'; // Query key factory export const memoryKeys = { @@ -99,13 +100,8 @@ export function useMemory(options: UseMemoryOptions = {}): UseMemoryReturn { // Filter by favorite status (from metadata) if (filter?.favorite === true) { memories = memories.filter((m) => { - if (!m.metadata) return false; - try { - const metadata = typeof m.metadata === 'string' ? JSON.parse(m.metadata) : m.metadata; - return metadata.favorite === true; - } catch { - return false; - } + const metadata = parseMemoryMetadata(m.metadata); + return metadata.favorite === true; }); } diff --git a/ccw/frontend/src/hooks/useUnifiedSearch.ts b/ccw/frontend/src/hooks/useUnifiedSearch.ts new file mode 100644 index 00000000..fd75b32c --- /dev/null +++ b/ccw/frontend/src/hooks/useUnifiedSearch.ts @@ -0,0 +1,199 @@ +// ======================================== +// useUnifiedSearch Hook +// ======================================== +// TanStack Query hooks for unified memory search, stats, and recommendations + +import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query'; +import { + fetchUnifiedSearch, + fetchUnifiedStats, + fetchRecommendations, + triggerReindex, + type UnifiedSearchResult, + type UnifiedMemoryStats, + type RecommendationResult, + type ReindexResponse, +} from '../lib/api'; +import { useWorkflowStore, selectProjectPath } from '@/stores/workflowStore'; +import { workspaceQueryKeys } from '@/lib/queryKeys'; + +// Default stale time: 1 minute +const STALE_TIME = 60 * 1000; + +// ========== Unified Search ========== + +export interface UseUnifiedSearchOptions { + query: string; + categories?: string; + topK?: number; + minScore?: number; + enabled?: boolean; + staleTime?: number; +} + +export interface UseUnifiedSearchReturn { + results: UnifiedSearchResult[]; + total: number; + isLoading: boolean; + isFetching: boolean; + error: Error | null; + refetch: () => Promise; +} + +/** + * Hook for unified vector + FTS5 search across all memory categories + */ +export function useUnifiedSearch(options: UseUnifiedSearchOptions): UseUnifiedSearchReturn { + const { + query, + categories, + topK, + minScore, + enabled = true, + staleTime = STALE_TIME, + } = options; + const projectPath = useWorkflowStore(selectProjectPath); + + // Only enable query when projectPath exists and query is non-empty + const queryEnabled = enabled && !!projectPath && query.trim().length > 0; + + const result = useQuery({ + queryKey: workspaceQueryKeys.unifiedSearch(projectPath || '', query, categories), + queryFn: () => + fetchUnifiedSearch( + query, + { topK, minScore, category: categories }, + projectPath || undefined + ), + staleTime, + enabled: queryEnabled, + retry: 1, + }); + + const refetch = async () => { + await result.refetch(); + }; + + return { + results: result.data?.results ?? [], + total: result.data?.total ?? 0, + isLoading: result.isLoading, + isFetching: result.isFetching, + error: result.error, + refetch, + }; +} + +// ========== Unified Stats ========== + +export interface UseUnifiedStatsReturn { + stats: UnifiedMemoryStats | null; + isLoading: boolean; + isFetching: boolean; + error: Error | null; + refetch: () => Promise; +} + +/** + * Hook for fetching unified memory statistics + */ +export function useUnifiedStats(): UseUnifiedStatsReturn { + const projectPath = useWorkflowStore(selectProjectPath); + const queryEnabled = !!projectPath; + + const result = useQuery({ + queryKey: workspaceQueryKeys.unifiedStats(projectPath || ''), + queryFn: () => fetchUnifiedStats(projectPath || undefined), + staleTime: STALE_TIME, + enabled: queryEnabled, + retry: 2, + }); + + const refetch = async () => { + await result.refetch(); + }; + + return { + stats: result.data?.stats ?? null, + isLoading: result.isLoading, + isFetching: result.isFetching, + error: result.error, + refetch, + }; +} + +// ========== Recommendations ========== + +export interface UseRecommendationsOptions { + memoryId: string; + limit?: number; + enabled?: boolean; +} + +export interface UseRecommendationsReturn { + recommendations: RecommendationResult[]; + total: number; + isLoading: boolean; + isFetching: boolean; + error: Error | null; +} + +/** + * Hook for KNN-based memory recommendations + */ +export function useRecommendations(options: UseRecommendationsOptions): UseRecommendationsReturn { + const { memoryId, limit = 5, enabled = true } = options; + const projectPath = useWorkflowStore(selectProjectPath); + + const queryEnabled = enabled && !!projectPath && !!memoryId; + + const result = useQuery({ + queryKey: workspaceQueryKeys.unifiedRecommendations(projectPath || '', memoryId), + queryFn: () => fetchRecommendations(memoryId, limit, projectPath || undefined), + staleTime: STALE_TIME, + enabled: queryEnabled, + retry: 1, + }); + + return { + recommendations: result.data?.recommendations ?? [], + total: result.data?.total ?? 0, + isLoading: result.isLoading, + isFetching: result.isFetching, + error: result.error, + }; +} + +// ========== Reindex Mutation ========== + +export interface UseReindexReturn { + reindex: () => Promise; + isReindexing: boolean; + error: Error | null; +} + +/** + * Hook for triggering vector index rebuild + */ +export function useReindex(): UseReindexReturn { + const queryClient = useQueryClient(); + const projectPath = useWorkflowStore(selectProjectPath); + + const mutation = useMutation({ + mutationFn: () => triggerReindex(projectPath || undefined), + onSuccess: () => { + // Invalidate unified memory cache after reindex + if (projectPath) { + queryClient.invalidateQueries({ + queryKey: workspaceQueryKeys.unifiedMemory(projectPath), + }); + } + }, + }); + + return { + reindex: mutation.mutateAsync, + isReindexing: mutation.isPending, + error: mutation.error, + }; +} diff --git a/ccw/frontend/src/lib/api.ts b/ccw/frontend/src/lib/api.ts index 67bd18b6..98d4cda4 100644 --- a/ccw/frontend/src/lib/api.ts +++ b/ccw/frontend/src/lib/api.ts @@ -6360,3 +6360,149 @@ export async function fetchCliSessionAudit( withPath(`/api/audit/cli-sessions${queryString ? `?${queryString}` : ''}`, options?.projectPath) ); } + +// ========== Unified Memory API ========== + +export interface UnifiedSearchResult { + source_id: string; + source_type: string; + score: number; + content: string; + category: string; + rank_sources: { + vector_rank?: number; + vector_score?: number; + fts_rank?: number; + heat_score?: number; + }; +} + +export interface UnifiedSearchResponse { + success: boolean; + query: string; + total: number; + results: UnifiedSearchResult[]; +} + +export interface UnifiedMemoryStats { + core_memories: { + total: number; + archived: number; + }; + stage1_outputs: number; + entities: number; + prompts: number; + conversations: number; + vector_index: { + available: boolean; + total_chunks: number; + hnsw_available: boolean; + hnsw_count: number; + dimension: number; + categories?: Record; + }; +} + +export interface RecommendationResult { + source_id: string; + source_type: string; + score: number; + content: string; + category: string; +} + +export interface ReindexResponse { + success: boolean; + hnsw_count?: number; + elapsed_time?: number; + error?: string; +} + +/** + * Search unified memory using vector + FTS5 fusion (RRF) + * @param query - Search query text + * @param options - Search options (topK, minScore, category) + * @param projectPath - Optional project path for workspace isolation + */ +export async function fetchUnifiedSearch( + query: string, + options?: { + topK?: number; + minScore?: number; + category?: string; + }, + projectPath?: string +): Promise { + const params = new URLSearchParams(); + params.set('q', query); + if (options?.topK) params.set('topK', String(options.topK)); + if (options?.minScore) params.set('minScore', String(options.minScore)); + if (options?.category) params.set('category', options.category); + + const data = await fetchApi( + withPath(`/api/unified-memory/search?${params.toString()}`, projectPath) + ); + if (data.success === false) { + throw new Error(data.error || 'Search failed'); + } + return data; +} + +/** + * Fetch unified memory statistics (core memories, entities, vectors, etc.) + * @param projectPath - Optional project path for workspace isolation + */ +export async function fetchUnifiedStats( + projectPath?: string +): Promise<{ success: boolean; stats: UnifiedMemoryStats }> { + const data = await fetchApi<{ success: boolean; stats: UnifiedMemoryStats; error?: string }>( + withPath('/api/unified-memory/stats', projectPath) + ); + if (data.success === false) { + throw new Error(data.error || 'Failed to load unified stats'); + } + return data; +} + +/** + * Get KNN-based recommendations for a specific memory + * @param memoryId - Core memory ID (CMEM-*) + * @param limit - Number of recommendations (default: 5) + * @param projectPath - Optional project path for workspace isolation + */ +export async function fetchRecommendations( + memoryId: string, + limit?: number, + projectPath?: string +): Promise<{ success: boolean; memory_id: string; total: number; recommendations: RecommendationResult[] }> { + const params = new URLSearchParams(); + if (limit) params.set('limit', String(limit)); + const queryString = params.toString(); + + const data = await fetchApi<{ success: boolean; memory_id: string; total: number; recommendations: RecommendationResult[]; error?: string }>( + withPath( + `/api/unified-memory/recommendations/${encodeURIComponent(memoryId)}${queryString ? `?${queryString}` : ''}`, + projectPath + ) + ); + if (data.success === false) { + throw new Error(data.error || 'Failed to load recommendations'); + } + return data; +} + +/** + * Trigger vector index rebuild + * @param projectPath - Optional project path for workspace isolation + */ +export async function triggerReindex( + projectPath?: string +): Promise { + return fetchApi( + '/api/unified-memory/reindex', + { + method: 'POST', + body: JSON.stringify({ path: projectPath }), + } + ); +} diff --git a/ccw/frontend/src/lib/queryKeys.ts b/ccw/frontend/src/lib/queryKeys.ts index 228c09f9..a5f3cbbb 100644 --- a/ccw/frontend/src/lib/queryKeys.ts +++ b/ccw/frontend/src/lib/queryKeys.ts @@ -130,6 +130,15 @@ export const workspaceQueryKeys = { offset?: number; } ) => [...workspaceQueryKeys.audit(projectPath), 'cliSessions', options] as const, + + // ========== Unified Memory ========== + unifiedMemory: (projectPath: string) => [...workspaceQueryKeys.all(projectPath), 'unifiedMemory'] as const, + unifiedSearch: (projectPath: string, query: string, categories?: string) => + [...workspaceQueryKeys.unifiedMemory(projectPath), 'search', query, categories] as const, + unifiedStats: (projectPath: string) => + [...workspaceQueryKeys.unifiedMemory(projectPath), 'stats'] as const, + unifiedRecommendations: (projectPath: string, memoryId: string) => + [...workspaceQueryKeys.unifiedMemory(projectPath), 'recommendations', memoryId] as const, }; // ========== API Settings Keys ========== diff --git a/ccw/frontend/src/lib/utils.ts b/ccw/frontend/src/lib/utils.ts index e569adb9..33d41320 100644 --- a/ccw/frontend/src/lib/utils.ts +++ b/ccw/frontend/src/lib/utils.ts @@ -14,4 +14,20 @@ export function cn(...inputs: ClassValue[]): string { return twMerge(clsx(inputs)); } +/** + * Safely parse memory metadata from string, object, or undefined. + * Returns an empty object on parse failure or missing input. + */ +export function parseMemoryMetadata( + metadata: string | Record | undefined | null +): Record { + if (!metadata) return {}; + if (typeof metadata === 'object') return metadata; + try { + return JSON.parse(metadata); + } catch { + return {}; + } +} + export type { ClassValue }; diff --git a/ccw/frontend/src/locales/en/memory.json b/ccw/frontend/src/locales/en/memory.json index 6a39a05d..4f6f80bf 100644 --- a/ccw/frontend/src/locales/en/memory.json +++ b/ccw/frontend/src/locales/en/memory.json @@ -22,19 +22,31 @@ "tabs": { "memories": "Memories", "favorites": "Favorites", - "archived": "Archived" + "archived": "Archived", + "unifiedSearch": "Unified Search" }, "stats": { "totalSize": "Total Size", "count": "Count", "claudeMdCount": "CLAUDE.md Files", - "totalEntries": "Total Entries" + "totalEntries": "Total Entries", + "vectorChunks": "Vector Chunks", + "hnswStatus": "HNSW Index", + "entities": "Entities", + "prompts": "Prompts" }, "filters": { "search": "Search memories...", + "searchUnified": "Semantic search across all memory types...", "tags": "Tags", "clear": "Clear", - "all": "All" + "all": "All", + "categoryAll": "All Categories", + "categoryCoreMemory": "Core Memory", + "categoryCliHistory": "CLI History", + "categoryWorkflow": "Workflow", + "categoryEntity": "Entity", + "categoryPattern": "Pattern" }, "card": { "id": "ID", @@ -82,5 +94,20 @@ "coreMemory": "Core Memory", "workflow": "Workflow", "cliHistory": "CLI History" + }, + "unified": { + "score": "Score", + "noResults": "No results found. Try a different search query.", + "searching": "Searching...", + "resultCount": "{count} results", + "recommendations": "Related", + "noRecommendations": "No recommendations available", + "reindex": "Rebuild Index", + "reindexing": "Rebuilding...", + "reindexSuccess": "Index rebuilt successfully", + "reindexError": "Failed to rebuild index", + "vectorRank": "Vector #{rank}", + "ftsRank": "FTS #{rank}", + "heatScore": "Heat: {score}" } } diff --git a/ccw/frontend/src/locales/en/settings.json b/ccw/frontend/src/locales/en/settings.json index bd586299..1eb4fafe 100644 --- a/ccw/frontend/src/locales/en/settings.json +++ b/ccw/frontend/src/locales/en/settings.json @@ -114,6 +114,45 @@ "on": "On", "off": "Off" }, + "remoteNotifications": { + "title": "Remote Notifications", + "description": "Send notifications to external platforms like Discord, Telegram, or custom webhooks when events occur.", + "enabled": "Enabled", + "disabled": "Disabled", + "platforms": "Platform Configuration", + "events": "Event Triggers", + "noPlatforms": "No platforms", + "configured": "Configured", + "save": "Save", + "saving": "Saving...", + "saved": "Configuration saved", + "saveError": "Failed to save configuration", + "reset": "Reset to Defaults", + "resetConfirm": "Reset all remote notification settings to defaults?", + "resetSuccess": "Settings reset to defaults", + "resetError": "Failed to reset settings", + "testConnection": "Test Connection", + "testSuccess": "Test notification sent successfully", + "testFailed": "Test notification failed", + "testError": "Failed to send test notification", + "discord": { + "webhookUrl": "Webhook URL", + "webhookUrlHint": "Create a webhook in your Discord channel settings", + "username": "Custom Username (optional)" + }, + "telegram": { + "botToken": "Bot Token", + "botTokenHint": "Get from @BotFather on Telegram", + "chatId": "Chat ID", + "chatIdHint": "User or group chat ID (use @userinfobot to find it)" + }, + "webhook": { + "url": "Webhook URL", + "method": "HTTP Method", + "headers": "Custom Headers (JSON)", + "headersHint": "Optional JSON object with custom headers" + } + }, "versionCheck": { "title": "Version Update", "currentVersion": "Current Version", diff --git a/ccw/frontend/src/locales/zh/memory.json b/ccw/frontend/src/locales/zh/memory.json index 238aa421..738adaf9 100644 --- a/ccw/frontend/src/locales/zh/memory.json +++ b/ccw/frontend/src/locales/zh/memory.json @@ -22,19 +22,31 @@ "tabs": { "memories": "记忆", "favorites": "收藏", - "archived": "归档" + "archived": "归档", + "unifiedSearch": "统一搜索" }, "stats": { "totalSize": "总大小", "count": "数量", "claudeMdCount": "CLAUDE.md 文件", - "totalEntries": "总条目" + "totalEntries": "总条目", + "vectorChunks": "向量块", + "hnswStatus": "HNSW 索引", + "entities": "实体", + "prompts": "提示" }, "filters": { "search": "搜索记忆...", + "searchUnified": "跨所有记忆类型语义搜索...", "tags": "标签", "clear": "清除", - "all": "全部" + "all": "全部", + "categoryAll": "所有类别", + "categoryCoreMemory": "核心记忆", + "categoryCliHistory": "CLI 历史", + "categoryWorkflow": "工作流", + "categoryEntity": "实体", + "categoryPattern": "模式" }, "card": { "id": "ID", @@ -82,5 +94,20 @@ "coreMemory": "核心记忆", "workflow": "工作流", "cliHistory": "CLI 历史" + }, + "unified": { + "score": "分数", + "noResults": "未找到结果。请尝试不同的搜索查询。", + "searching": "搜索中...", + "resultCount": "{count} 条结果", + "recommendations": "相关", + "noRecommendations": "暂无推荐", + "reindex": "重建索引", + "reindexing": "重建中...", + "reindexSuccess": "索引重建成功", + "reindexError": "索引重建失败", + "vectorRank": "向量 #{rank}", + "ftsRank": "全文 #{rank}", + "heatScore": "热度: {score}" } } diff --git a/ccw/frontend/src/locales/zh/settings.json b/ccw/frontend/src/locales/zh/settings.json index 7fbfcaf6..89cee92d 100644 --- a/ccw/frontend/src/locales/zh/settings.json +++ b/ccw/frontend/src/locales/zh/settings.json @@ -114,6 +114,45 @@ "on": "开启", "off": "关闭" }, + "remoteNotifications": { + "title": "远程通知", + "description": "当事件发生时,发送通知到 Discord、Telegram 或自定义 Webhook 等外部平台。", + "enabled": "已启用", + "disabled": "已禁用", + "platforms": "平台配置", + "events": "事件触发器", + "noPlatforms": "无平台", + "configured": "已配置", + "save": "保存", + "saving": "保存中...", + "saved": "配置已保存", + "saveError": "保存配置失败", + "reset": "重置为默认值", + "resetConfirm": "确定要将所有远程通知设置重置为默认值吗?", + "resetSuccess": "设置已重置为默认值", + "resetError": "重置设置失败", + "testConnection": "测试连接", + "testSuccess": "测试通知发送成功", + "testFailed": "测试通知发送失败", + "testError": "发送测试通知失败", + "discord": { + "webhookUrl": "Webhook URL", + "webhookUrlHint": "在 Discord 频道设置中创建 Webhook", + "username": "自定义用户名(可选)" + }, + "telegram": { + "botToken": "Bot Token", + "botTokenHint": "从 Telegram 的 @BotFather 获取", + "chatId": "Chat ID", + "chatIdHint": "用户或群组 Chat ID(使用 @userinfobot 查找)" + }, + "webhook": { + "url": "Webhook URL", + "method": "HTTP 方法", + "headers": "自定义请求头(JSON)", + "headersHint": "可选的 JSON 对象,包含自定义请求头" + } + }, "versionCheck": { "title": "版本更新", "currentVersion": "当前版本", diff --git a/ccw/frontend/src/pages/MemoryPage.tsx b/ccw/frontend/src/pages/MemoryPage.tsx index 5d7b56a1..c44de8e3 100644 --- a/ccw/frontend/src/pages/MemoryPage.tsx +++ b/ccw/frontend/src/pages/MemoryPage.tsx @@ -2,6 +2,7 @@ // Memory Page // ======================================== // View and manage core memory and context with CRUD operations +// Includes unified vector search across all memory categories import { useState, useEffect } from 'react'; import { useIntl } from 'react-intl'; @@ -22,6 +23,11 @@ import { Archive, ArchiveRestore, AlertCircle, + Layers, + Zap, + Terminal, + GitBranch, + Hash, } from 'lucide-react'; import { Card } from '@/components/ui/Card'; import { Button } from '@/components/ui/Button'; @@ -30,9 +36,39 @@ import { Badge } from '@/components/ui/Badge'; import { TabsNavigation } from '@/components/ui/TabsNavigation'; import { Dialog, DialogContent, DialogHeader, DialogTitle } from '@/components/ui/Dialog'; import { Checkbox } from '@/components/ui/Checkbox'; -import { useMemory, useMemoryMutations } from '@/hooks'; -import type { CoreMemory } from '@/lib/api'; -import { cn } from '@/lib/utils'; +import { useMemory, useMemoryMutations, useUnifiedSearch, useUnifiedStats, useRecommendations, useReindex } from '@/hooks'; +import type { CoreMemory, UnifiedSearchResult } from '@/lib/api'; +import { cn, parseMemoryMetadata } from '@/lib/utils'; + +// ========== Source Type Helpers ========== + +const SOURCE_TYPE_COLORS: Record = { + core_memory: 'bg-blue-100 text-blue-800 dark:bg-blue-900/30 dark:text-blue-300', + cli_history: 'bg-green-100 text-green-800 dark:bg-green-900/30 dark:text-green-300', + workflow: 'bg-purple-100 text-purple-800 dark:bg-purple-900/30 dark:text-purple-300', + entity: 'bg-orange-100 text-orange-800 dark:bg-orange-900/30 dark:text-orange-300', + pattern: 'bg-pink-100 text-pink-800 dark:bg-pink-900/30 dark:text-pink-300', +}; + +const SOURCE_TYPE_ICONS: Record = { + core_memory: , + cli_history: , + workflow: , + entity: , + pattern: , +}; + +function SourceTypeBadge({ sourceType }: { sourceType: string }) { + const colorClass = SOURCE_TYPE_COLORS[sourceType] || 'bg-gray-100 text-gray-800 dark:bg-gray-900/30 dark:text-gray-300'; + const icon = SOURCE_TYPE_ICONS[sourceType] || ; + + return ( + + {icon} + {sourceType} + + ); +} // ========== Memory Card Component ========== @@ -51,7 +87,7 @@ function MemoryCard({ memory, onView, onEdit, onDelete, onCopy, onToggleFavorite const formattedDate = new Date(memory.createdAt).toLocaleDateString(); // Parse metadata from memory - const metadata = memory.metadata ? (typeof memory.metadata === 'string' ? JSON.parse(memory.metadata) : memory.metadata) : {}; + const metadata = parseMemoryMetadata(memory.metadata); const isFavorite = metadata.favorite === true; const priority = metadata.priority || 'medium'; const isArchived = memory.archived || false; @@ -197,6 +233,138 @@ function MemoryCard({ memory, onView, onEdit, onDelete, onCopy, onToggleFavorite ); } +// ========== Unified Search Result Card ========== + +interface UnifiedResultCardProps { + result: UnifiedSearchResult; + onCopy: (content: string) => void; +} + +function UnifiedResultCard({ result, onCopy }: UnifiedResultCardProps) { + const { formatMessage } = useIntl(); + const scorePercent = (result.score * 100).toFixed(1); + + return ( + +
+
+
+ +
+
+ + {result.source_id} + + + {formatMessage({ id: 'memory.unified.score' })}: {scorePercent}% + +
+ {/* Rank sources */} +
+ {result.rank_sources.vector_rank != null && ( + + {formatMessage({ id: 'memory.unified.vectorRank' }, { rank: result.rank_sources.vector_rank })} + + )} + {result.rank_sources.fts_rank != null && ( + + {formatMessage({ id: 'memory.unified.ftsRank' }, { rank: result.rank_sources.fts_rank })} + + )} + {result.rank_sources.heat_score != null && ( + + {formatMessage({ id: 'memory.unified.heatScore' }, { score: result.rank_sources.heat_score.toFixed(2) })} + + )} +
+
+
+ +
+ + {/* Content preview */} +

+ {result.content} +

+
+
+ ); +} + +// ========== Recommendations Panel ========== + +interface RecommendationsPanelProps { + memoryId: string; + onCopy: (content: string) => void; +} + +function RecommendationsPanel({ memoryId, onCopy }: RecommendationsPanelProps) { + const { formatMessage } = useIntl(); + const { recommendations, isLoading } = useRecommendations({ + memoryId, + limit: 5, + enabled: !!memoryId, + }); + + if (isLoading) { + return ( +
+ + {formatMessage({ id: 'memory.unified.searching' })} +
+ ); + } + + if (recommendations.length === 0) { + return ( +

+ {formatMessage({ id: 'memory.unified.noRecommendations' })} +

+ ); + } + + return ( +
+ {recommendations.map((rec) => ( +
+ +
+
+ + {rec.source_id} + + + {(rec.score * 100).toFixed(0)}% + +
+

+ {rec.content} +

+
+ +
+ ))} +
+ ); +} + // ========== View Memory Dialog ========== interface ViewMemoryDialogProps { @@ -211,7 +379,7 @@ function ViewMemoryDialog({ memory, open, onOpenChange, onEdit, onCopy }: ViewMe const { formatMessage } = useIntl(); if (!memory) return null; - const metadata = memory.metadata ? (typeof memory.metadata === 'string' ? JSON.parse(memory.metadata) : memory.metadata) : {}; + const metadata = parseMemoryMetadata(memory.metadata); const priority = metadata.priority || 'medium'; const formattedDate = new Date(memory.createdAt).toLocaleDateString(); const formattedSize = memory.size @@ -264,6 +432,15 @@ function ViewMemoryDialog({ memory, open, onOpenChange, onEdit, onCopy }: ViewMe
+ {/* Recommendations */} +
+

+ + {formatMessage({ id: 'memory.unified.recommendations' })} +

+ +
+ {/* Actions */}
+ )} + - + {!isUnifiedTab && ( + + )}
- {/* Tab Navigation - styled like LiteTasksPage */} + {/* Tab Navigation */} setCurrentTab(v as 'memories' | 'favorites' | 'archived')} + onValueChange={(v) => setCurrentTab(v as typeof currentTab)} tabs={[ { value: 'memories', @@ -601,141 +844,285 @@ export function MemoryPage() { label: formatMessage({ id: 'memory.tabs.archived' }), icon: , }, + { + value: 'unifiedSearch', + label: formatMessage({ id: 'memory.tabs.unifiedSearch' }), + icon: , + }, ]} /> {/* Error alert */} - {error && ( + {activeError && (

{formatMessage({ id: 'common.errors.loadFailed' })}

-

{error.message}

+

{activeError.message}

-
)} {/* Stats Cards */} -
- -
-
- + {isUnifiedTab ? ( + /* Unified Stats Cards */ +
+ +
+
+ +
+
+
+ {statsLoading ? '-' : (unifiedStats?.core_memories.total ?? 0)} +
+

{formatMessage({ id: 'memory.stats.count' })}

+
-
-
{memories.length}
-

{formatMessage({ id: 'memory.stats.count' })}

+ + +
+
+ +
+
+
+ {statsLoading ? '-' : (unifiedStats?.entities ?? 0)} +
+

{formatMessage({ id: 'memory.stats.entities' })}

+
-
-
- -
-
- + + +
+
+ +
+
+
+ {statsLoading ? '-' : (unifiedStats?.vector_index.total_chunks ?? 0)} +
+

{formatMessage({ id: 'memory.stats.vectorChunks' })}

+
-
-
{claudeMdCount}
-

{formatMessage({ id: 'memory.stats.claudeMdCount' })}

+ + +
+
+ +
+
+
+ {statsLoading ? '-' : (unifiedStats?.vector_index.hnsw_available ? unifiedStats.vector_index.hnsw_count : 'N/A')} +
+

{formatMessage({ id: 'memory.stats.hnswStatus' })}

+
-
-
- -
-
- + +
+ ) : ( + /* Standard Stats Cards */ +
+ +
+
+ +
+
+
{memories.length}
+

{formatMessage({ id: 'memory.stats.count' })}

+
-
-
{formattedTotalSize}
-

{formatMessage({ id: 'memory.stats.totalSize' })}

+ + +
+
+ +
+
+
{claudeMdCount}
+

{formatMessage({ id: 'memory.stats.claudeMdCount' })}

+
-
-
-
+ + +
+
+ +
+
+
{formattedTotalSize}
+

{formatMessage({ id: 'memory.stats.totalSize' })}

+
+
+
+
+ )} {/* Search and Filters */} -
-
- - setSearchQuery(e.target.value)} - className="pl-9" - /> -
- - {/* Tags Filter */} - {allTags.length > 0 && ( -
- {formatMessage({ id: 'memory.card.tags' })}: - {allTags.map((tag) => ( - - ))} - {selectedTags.length > 0 && ( - - )} + {isUnifiedTab ? ( + /* Unified Search Input + Category Filter */ +
+
+
+ + setUnifiedQuery(e.target.value)} + className="pl-9" + /> +
+
- )} -
- - {/* Memory List */} - {isLoading ? ( -
- {[1, 2, 3, 4, 5, 6].map((i) => ( -
- ))} + {unifiedQuery.trim().length > 0 && !unifiedLoading && ( +

+ {formatMessage({ id: 'memory.unified.resultCount' }, { count: unifiedTotal })} +

+ )}
- ) : memories.length === 0 ? ( - - -

- {formatMessage({ id: 'memory.emptyState.title' })} -

-

- {formatMessage({ id: 'memory.emptyState.message' })} -

- -
) : ( -
- {memories.map((memory) => ( - +
+ + setSearchQuery(e.target.value)} + className="pl-9" /> - ))} +
+ + {/* Tags Filter */} + {allTags.length > 0 && ( +
+ {formatMessage({ id: 'memory.card.tags' })}: + {allTags.map((tag) => ( + + ))} + {selectedTags.length > 0 && ( + + )} +
+ )}
)} + {/* Content Area */} + {isUnifiedTab ? ( + /* Unified Search Results */ + unifiedLoading ? ( +
+ + + {formatMessage({ id: 'memory.unified.searching' })} + +
+ ) : unifiedQuery.trim().length === 0 ? ( + + +

+ {formatMessage({ id: 'memory.tabs.unifiedSearch' })} +

+

+ {formatMessage({ id: 'memory.filters.searchUnified' })} +

+
+ ) : unifiedResults.length === 0 ? ( + + +

+ {formatMessage({ id: 'memory.unified.noResults' })} +

+
+ ) : ( +
+ {unifiedResults.map((result) => ( + + ))} +
+ ) + ) : ( + /* Standard Memory List */ + isLoading ? ( +
+ {[1, 2, 3, 4, 5, 6].map((i) => ( +
+ ))} +
+ ) : memories.length === 0 ? ( + + +

+ {formatMessage({ id: 'memory.emptyState.title' })} +

+

+ {formatMessage({ id: 'memory.emptyState.message' })} +

+ +
+ ) : ( +
+ {memories.map((memory) => ( + + ))} +
+ ) + )} + {/* View Memory Dialog */} + {/* Remote Notifications */} + + {/* Reset Settings */}

diff --git a/ccw/frontend/src/types/remote-notification.ts b/ccw/frontend/src/types/remote-notification.ts new file mode 100644 index 00000000..2bb66d18 --- /dev/null +++ b/ccw/frontend/src/types/remote-notification.ts @@ -0,0 +1,193 @@ +// ======================================== +// Remote Notification Types (Frontend) +// ======================================== +// Type definitions for remote notification system UI +// Mirrors backend types with UI-specific additions + +/** + * Supported notification platforms + */ +export type NotificationPlatform = 'discord' | 'telegram' | 'webhook'; + +/** + * Event types that can trigger notifications + */ +export type NotificationEventType = + | 'ask-user-question' + | 'session-start' + | 'session-end' + | 'task-completed' + | 'task-failed'; + +/** + * Discord platform configuration + */ +export interface DiscordConfig { + enabled: boolean; + webhookUrl: string; + username?: string; + avatarUrl?: string; +} + +/** + * Telegram platform configuration + */ +export interface TelegramConfig { + enabled: boolean; + botToken: string; + chatId: string; + parseMode?: 'HTML' | 'Markdown' | 'MarkdownV2'; +} + +/** + * Generic Webhook platform configuration + */ +export interface WebhookConfig { + enabled: boolean; + url: string; + method: 'POST' | 'PUT'; + headers?: Record; + timeout?: number; +} + +/** + * Event configuration + */ +export interface EventConfig { + event: NotificationEventType; + platforms: NotificationPlatform[]; + enabled: boolean; +} + +/** + * Full remote notification configuration + */ +export interface RemoteNotificationConfig { + enabled: boolean; + platforms: { + discord?: DiscordConfig; + telegram?: TelegramConfig; + webhook?: WebhookConfig; + }; + events: EventConfig[]; + timeout: number; +} + +/** + * Test notification request + */ +export interface TestNotificationRequest { + platform: NotificationPlatform; + config: DiscordConfig | TelegramConfig | WebhookConfig; +} + +/** + * Test notification result + */ +export interface TestNotificationResult { + success: boolean; + error?: string; + responseTime?: number; +} + +/** + * Platform display info + */ +export interface PlatformInfo { + id: NotificationPlatform; + name: string; + icon: string; + description: string; + requiredFields: string[]; +} + +/** + * Event display info + */ +export interface EventInfo { + id: NotificationEventType; + name: string; + description: string; + icon: string; +} + +/** + * Predefined platform information + */ +export const PLATFORM_INFO: Record = { + discord: { + id: 'discord', + name: 'Discord', + icon: 'message-circle', + description: 'Send notifications to Discord channels via webhook', + requiredFields: ['webhookUrl'], + }, + telegram: { + id: 'telegram', + name: 'Telegram', + icon: 'send', + description: 'Send notifications to Telegram chats via bot', + requiredFields: ['botToken', 'chatId'], + }, + webhook: { + id: 'webhook', + name: 'Custom Webhook', + icon: 'link', + description: 'Send notifications to a custom HTTP endpoint', + requiredFields: ['url'], + }, +}; + +/** + * Predefined event information + */ +export const EVENT_INFO: Record = { + 'ask-user-question': { + id: 'ask-user-question', + name: 'Ask User Question', + description: 'Notification when Claude asks a question via AskUserQuestion', + icon: 'help-circle', + }, + 'session-start': { + id: 'session-start', + name: 'Session Start', + description: 'Notification when a CLI session starts', + icon: 'play', + }, + 'session-end': { + id: 'session-end', + name: 'Session End', + description: 'Notification when a CLI session ends', + icon: 'square', + }, + 'task-completed': { + id: 'task-completed', + name: 'Task Completed', + description: 'Notification when a task completes successfully', + icon: 'check-circle', + }, + 'task-failed': { + id: 'task-failed', + name: 'Task Failed', + description: 'Notification when a task fails', + icon: 'alert-circle', + }, +}; + +/** + * Default configuration for UI initialization + */ +export function getDefaultConfig(): RemoteNotificationConfig { + return { + enabled: false, + platforms: {}, + events: [ + { event: 'ask-user-question', platforms: ['discord', 'telegram'], enabled: true }, + { event: 'session-start', platforms: [], enabled: false }, + { event: 'session-end', platforms: [], enabled: false }, + { event: 'task-completed', platforms: [], enabled: false }, + { event: 'task-failed', platforms: ['discord', 'telegram'], enabled: true }, + ], + timeout: 10000, + }; +} diff --git a/ccw/scripts/unified_memory_embedder.py b/ccw/scripts/unified_memory_embedder.py new file mode 100644 index 00000000..9bbe0f47 --- /dev/null +++ b/ccw/scripts/unified_memory_embedder.py @@ -0,0 +1,473 @@ +#!/usr/bin/env python3 +""" +Unified Memory Embedder - Bridge CCW to CodexLens VectorStore (HNSW) + +Uses CodexLens VectorStore for HNSW-indexed vector storage and search, +replacing full-table-scan cosine similarity with sub-10ms approximate +nearest neighbor lookups. + +Protocol: JSON via stdin/stdout +Operations: embed, search, search_by_vector, status, reindex + +Usage: + echo '{"operation":"embed","store_path":"...","chunks":[...]}' | python unified_memory_embedder.py + echo '{"operation":"search","store_path":"...","query":"..."}' | python unified_memory_embedder.py + echo '{"operation":"status","store_path":"..."}' | python unified_memory_embedder.py + echo '{"operation":"reindex","store_path":"..."}' | python unified_memory_embedder.py +""" + +import json +import sys +import time +from pathlib import Path +from typing import List, Dict, Any, Optional + +try: + import numpy as np +except ImportError: + print(json.dumps({ + "success": False, + "error": "numpy is required. Install with: pip install numpy" + })) + sys.exit(1) + +try: + from codexlens.semantic.factory import get_embedder, clear_embedder_cache + from codexlens.semantic.vector_store import VectorStore + from codexlens.entities import SemanticChunk +except ImportError: + print(json.dumps({ + "success": False, + "error": "CodexLens not found. Install with: pip install codex-lens[semantic]" + })) + sys.exit(1) + + +# Valid category values for filtering +VALID_CATEGORIES = {"core_memory", "cli_history", "workflow", "entity", "pattern"} + + +class UnifiedMemoryEmbedder: + """Unified embedder backed by CodexLens VectorStore (HNSW).""" + + def __init__(self, store_path: str): + """ + Initialize with path to VectorStore database directory. + + Args: + store_path: Directory containing vectors.db and vectors.hnsw + """ + self.store_path = Path(store_path) + self.store_path.mkdir(parents=True, exist_ok=True) + + db_path = str(self.store_path / "vectors.db") + self.store = VectorStore(db_path) + + # Lazy-load embedder to avoid ~0.8s model loading for status command + self._embedder = None + + @property + def embedder(self): + """Lazy-load the embedder on first access.""" + if self._embedder is None: + self._embedder = get_embedder( + backend="fastembed", + profile="code", + use_gpu=True + ) + return self._embedder + + def embed(self, chunks: List[Dict[str, Any]], batch_size: int = 8) -> Dict[str, Any]: + """ + Embed chunks and insert into VectorStore. + + Each chunk dict must contain: + - content: str + - source_id: str + - source_type: str (e.g. "core_memory", "workflow", "cli_history") + - category: str (e.g. "core_memory", "cli_history", "workflow", "entity", "pattern") + + Optional fields: + - chunk_index: int (default 0) + - metadata: dict (additional metadata) + + Args: + chunks: List of chunk dicts to embed + batch_size: Number of chunks to embed per batch + + Returns: + Result dict with success, chunks_processed, chunks_failed, elapsed_time + """ + start_time = time.time() + chunks_processed = 0 + chunks_failed = 0 + + if not chunks: + return { + "success": True, + "chunks_processed": 0, + "chunks_failed": 0, + "elapsed_time": 0.0 + } + + # Process in batches + for i in range(0, len(chunks), batch_size): + batch = chunks[i:i + batch_size] + texts = [c["content"] for c in batch] + + try: + # Batch embed + embeddings = self.embedder.embed_to_numpy(texts) + + # Build SemanticChunks and insert + semantic_chunks = [] + for j, chunk_data in enumerate(batch): + category = chunk_data.get("category", chunk_data.get("source_type", "core_memory")) + source_id = chunk_data.get("source_id", "") + chunk_index = chunk_data.get("chunk_index", 0) + extra_meta = chunk_data.get("metadata", {}) + + # Build metadata dict for VectorStore + metadata = { + "source_id": source_id, + "source_type": chunk_data.get("source_type", ""), + "chunk_index": chunk_index, + **extra_meta + } + + sc = SemanticChunk( + content=chunk_data["content"], + embedding=embeddings[j].tolist(), + metadata=metadata + ) + semantic_chunks.append((sc, source_id, category)) + + # Insert into VectorStore + for sc, file_path, category in semantic_chunks: + try: + self.store.add_chunk(sc, file_path=file_path, category=category) + chunks_processed += 1 + except Exception as e: + print(f"Error inserting chunk: {e}", file=sys.stderr) + chunks_failed += 1 + + except Exception as e: + print(f"Error embedding batch starting at {i}: {e}", file=sys.stderr) + chunks_failed += len(batch) + + elapsed_time = time.time() - start_time + + return { + "success": chunks_failed == 0, + "chunks_processed": chunks_processed, + "chunks_failed": chunks_failed, + "elapsed_time": round(elapsed_time, 3) + } + + def search( + self, + query: str, + top_k: int = 10, + min_score: float = 0.3, + category: Optional[str] = None + ) -> Dict[str, Any]: + """ + Search VectorStore using HNSW index. + + Args: + query: Search query text + top_k: Number of results + min_score: Minimum similarity threshold + category: Optional category filter + + Returns: + Result dict with success and matches list + """ + try: + start_time = time.time() + + # Generate query embedding (embed_to_numpy accepts single string) + query_emb = self.embedder.embed_to_numpy(query)[0].tolist() + + # Search via VectorStore HNSW + results = self.store.search_similar( + query_emb, + top_k=top_k, + min_score=min_score, + category=category + ) + + elapsed_time = time.time() - start_time + + matches = [] + for result in results: + meta = result.metadata if result.metadata else {} + if isinstance(meta, str): + try: + meta = json.loads(meta) + except (json.JSONDecodeError, TypeError): + meta = {} + + matches.append({ + "content": result.content or result.excerpt or "", + "score": round(float(result.score), 4), + "source_id": meta.get("source_id", result.path or ""), + "source_type": meta.get("source_type", ""), + "chunk_index": meta.get("chunk_index", 0), + "category": meta.get("category", ""), + "metadata": meta + }) + + return { + "success": True, + "matches": matches, + "elapsed_time": round(elapsed_time, 3), + "total_searched": len(results) + } + + except Exception as e: + return { + "success": False, + "matches": [], + "error": str(e) + } + + def search_by_vector( + self, + vector: List[float], + top_k: int = 10, + min_score: float = 0.3, + category: Optional[str] = None + ) -> Dict[str, Any]: + """ + Search VectorStore using a pre-computed embedding vector (no re-embedding). + + Args: + vector: Pre-computed embedding vector (list of floats) + top_k: Number of results + min_score: Minimum similarity threshold + category: Optional category filter + + Returns: + Result dict with success and matches list + """ + try: + start_time = time.time() + + # Search via VectorStore HNSW directly with provided vector + results = self.store.search_similar( + vector, + top_k=top_k, + min_score=min_score, + category=category + ) + + elapsed_time = time.time() - start_time + + matches = [] + for result in results: + meta = result.metadata if result.metadata else {} + if isinstance(meta, str): + try: + meta = json.loads(meta) + except (json.JSONDecodeError, TypeError): + meta = {} + + matches.append({ + "content": result.content or result.excerpt or "", + "score": round(float(result.score), 4), + "source_id": meta.get("source_id", result.path or ""), + "source_type": meta.get("source_type", ""), + "chunk_index": meta.get("chunk_index", 0), + "category": meta.get("category", ""), + "metadata": meta + }) + + return { + "success": True, + "matches": matches, + "elapsed_time": round(elapsed_time, 3), + "total_searched": len(results) + } + + except Exception as e: + return { + "success": False, + "matches": [], + "error": str(e) + } + + def status(self) -> Dict[str, Any]: + """ + Get VectorStore index status. + + Returns: + Status dict with total_chunks, hnsw_available, dimension, etc. + """ + try: + total_chunks = self.store.count_chunks() + hnsw_available = self.store.ann_available + hnsw_count = self.store.ann_count + dimension = self.store.dimension or 768 + + # Count per category from SQLite + categories = {} + try: + import sqlite3 + db_path = str(self.store_path / "vectors.db") + with sqlite3.connect(db_path) as conn: + rows = conn.execute( + "SELECT category, COUNT(*) FROM semantic_chunks GROUP BY category" + ).fetchall() + for row in rows: + categories[row[0] or "unknown"] = row[1] + except Exception: + pass + + return { + "success": True, + "total_chunks": total_chunks, + "hnsw_available": hnsw_available, + "hnsw_count": hnsw_count, + "dimension": dimension, + "categories": categories, + "model_config": { + "backend": "fastembed", + "profile": "code", + "dimension": 768, + "max_tokens": 8192 + } + } + + except Exception as e: + return { + "success": False, + "total_chunks": 0, + "hnsw_available": False, + "hnsw_count": 0, + "dimension": 0, + "error": str(e) + } + + def reindex(self) -> Dict[str, Any]: + """ + Rebuild HNSW index from scratch. + + Returns: + Result dict with success and timing + """ + try: + start_time = time.time() + + self.store.rebuild_ann_index() + + elapsed_time = time.time() - start_time + + return { + "success": True, + "hnsw_count": self.store.ann_count, + "elapsed_time": round(elapsed_time, 3) + } + + except Exception as e: + return { + "success": False, + "error": str(e) + } + + +def main(): + """Main entry point. Reads JSON from stdin, writes JSON to stdout.""" + try: + raw_input = sys.stdin.read() + if not raw_input.strip(): + print(json.dumps({ + "success": False, + "error": "No input provided. Send JSON via stdin." + })) + sys.exit(1) + + request = json.loads(raw_input) + except json.JSONDecodeError as e: + print(json.dumps({ + "success": False, + "error": f"Invalid JSON input: {e}" + })) + sys.exit(1) + + operation = request.get("operation") + store_path = request.get("store_path") + + if not operation: + print(json.dumps({ + "success": False, + "error": "Missing required field: operation" + })) + sys.exit(1) + + if not store_path: + print(json.dumps({ + "success": False, + "error": "Missing required field: store_path" + })) + sys.exit(1) + + try: + embedder = UnifiedMemoryEmbedder(store_path) + + if operation == "embed": + chunks = request.get("chunks", []) + batch_size = request.get("batch_size", 8) + result = embedder.embed(chunks, batch_size=batch_size) + + elif operation == "search": + query = request.get("query", "") + if not query: + result = {"success": False, "error": "Missing required field: query", "matches": []} + else: + top_k = request.get("top_k", 10) + min_score = request.get("min_score", 0.3) + category = request.get("category") + result = embedder.search(query, top_k=top_k, min_score=min_score, category=category) + + elif operation == "search_by_vector": + vector = request.get("vector", []) + if not vector: + result = {"success": False, "error": "Missing required field: vector", "matches": []} + else: + top_k = request.get("top_k", 10) + min_score = request.get("min_score", 0.3) + category = request.get("category") + result = embedder.search_by_vector(vector, top_k=top_k, min_score=min_score, category=category) + + elif operation == "status": + result = embedder.status() + + elif operation == "reindex": + result = embedder.reindex() + + else: + result = { + "success": False, + "error": f"Unknown operation: {operation}. Valid: embed, search, search_by_vector, status, reindex" + } + + print(json.dumps(result)) + + # Clean up ONNX resources to ensure process can exit cleanly + clear_embedder_cache() + + except Exception as e: + try: + clear_embedder_cache() + except Exception: + pass + print(json.dumps({ + "success": False, + "error": str(e) + })) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/ccw/src/cli.ts b/ccw/src/cli.ts index fa646dc6..424b050d 100644 --- a/ccw/src/cli.ts +++ b/ccw/src/cli.ts @@ -263,6 +263,10 @@ export function run(argv: string[]): void { .option('--output ', 'Output file path for export') .option('--overwrite', 'Overwrite existing memories when importing') .option('--prefix ', 'Add prefix to imported memory IDs') + .option('--unified', 'Use unified vector+FTS search (for search subcommand)') + .option('--topK ', 'Max results for unified search', '20') + .option('--minScore ', 'Min relevance score for unified search', '0') + .option('--category ', 'Filter by category for unified search') .action((subcommand, args, options) => coreMemoryCommand(subcommand, args, options)); // Hook command - CLI endpoint for Claude Code hooks diff --git a/ccw/src/commands/core-memory.ts b/ccw/src/commands/core-memory.ts index 596642fc..0ecd7cd5 100644 --- a/ccw/src/commands/core-memory.ts +++ b/ccw/src/commands/core-memory.ts @@ -35,6 +35,10 @@ interface CommandOptions { delete?: boolean; merge?: string; dedup?: boolean; + unified?: boolean; + topK?: string; + minScore?: string; + category?: string; } /** @@ -844,6 +848,114 @@ async function jobsAction(options: CommandOptions): Promise { } } +/** + * Unified vector+FTS search across all memory stores + */ +async function unifiedSearchAction(keyword: string, options: CommandOptions): Promise { + if (!keyword || keyword.trim() === '') { + console.error(chalk.red('Error: Query is required')); + console.error(chalk.gray('Usage: ccw core-memory search --unified [--topK 20] [--minScore 0] [--category ]')); + process.exit(1); + } + + try { + const { UnifiedMemoryService } = await import('../core/unified-memory-service.js'); + const service = new UnifiedMemoryService(getProjectPath()); + + const topK = parseInt(options.topK || '20', 10); + const minScore = parseFloat(options.minScore || '0'); + const category = options.category || undefined; + + console.log(chalk.cyan(`\n Unified search: "${keyword}" (topK=${topK}, minScore=${minScore})\n`)); + + const results = await service.search(keyword, { + limit: topK, + minScore, + category: category as any, + }); + + if (results.length === 0) { + console.log(chalk.yellow(' No results found.\n')); + return; + } + + if (options.json) { + console.log(JSON.stringify({ query: keyword, total: results.length, results }, null, 2)); + return; + } + + console.log(chalk.gray(' -----------------------------------------------------------------------')); + + for (const result of results) { + const sources: string[] = []; + if (result.rank_sources.vector_rank) sources.push(`vec:#${result.rank_sources.vector_rank}`); + if (result.rank_sources.fts_rank) sources.push(`fts:#${result.rank_sources.fts_rank}`); + if (result.rank_sources.heat_score) sources.push(`heat:${result.rank_sources.heat_score.toFixed(1)}`); + + const snippet = result.content.substring(0, 120).replace(/\n/g, ' '); + + console.log( + chalk.cyan(` ${result.source_id}`) + + chalk.gray(` [${result.source_type}/${result.category}]`) + + chalk.white(` score=${result.score.toFixed(4)}`) + ); + console.log(chalk.gray(` Sources: ${sources.join(' | ')}`)); + console.log(chalk.white(` ${snippet}${result.content.length > 120 ? '...' : ''}`)); + console.log(chalk.gray(' -----------------------------------------------------------------------')); + } + + console.log(chalk.gray(`\n Total: ${results.length}\n`)); + + } catch (error) { + console.error(chalk.red(`Error: ${(error as Error).message}`)); + process.exit(1); + } +} + +/** + * Rebuild the unified HNSW vector index from scratch + */ +async function reindexAction(options: CommandOptions): Promise { + try { + const { UnifiedVectorIndex, isUnifiedEmbedderAvailable } = await import('../core/unified-vector-index.js'); + + if (!isUnifiedEmbedderAvailable()) { + console.error(chalk.red('Error: Unified embedder is not available.')); + console.error(chalk.gray('Ensure Python venv and embedder script are set up.')); + process.exit(1); + } + + const index = new UnifiedVectorIndex(getProjectPath()); + + console.log(chalk.cyan('\n Rebuilding unified vector index...\n')); + + const result = await index.reindexAll(); + + if (!result.success) { + console.error(chalk.red(` Reindex failed: ${result.error}\n`)); + process.exit(1); + } + + if (options.json) { + console.log(JSON.stringify(result, null, 2)); + return; + } + + console.log(chalk.green(' Reindex complete.')); + if (result.hnsw_count !== undefined) { + console.log(chalk.white(` HNSW vectors: ${result.hnsw_count}`)); + } + if (result.elapsed_time !== undefined) { + console.log(chalk.white(` Elapsed: ${result.elapsed_time.toFixed(2)}s`)); + } + console.log(); + + } catch (error) { + console.error(chalk.red(`Error: ${(error as Error).message}`)); + process.exit(1); + } +} + /** * Core Memory command entry point */ @@ -889,7 +1001,11 @@ export async function coreMemoryCommand( break; case 'search': - await searchAction(textArg, options); + if (options.unified) { + await unifiedSearchAction(textArg, options); + } else { + await searchAction(textArg, options); + } break; case 'projects': @@ -921,6 +1037,10 @@ export async function coreMemoryCommand( await jobsAction(options); break; + case 'reindex': + await reindexAction(options); + break; + default: console.log(chalk.bold.cyan('\n CCW Core Memory\n')); console.log(' Manage core memory entries and session clusters.\n'); @@ -945,12 +1065,14 @@ export async function coreMemoryCommand( console.log(chalk.white(' context ') + chalk.gray('Get progressive index')); console.log(chalk.white(' load-cluster ') + chalk.gray('Load cluster context')); console.log(chalk.white(' search ') + chalk.gray('Search sessions')); + console.log(chalk.white(' search --unified ') + chalk.gray('Unified vector+FTS search')); console.log(); console.log(chalk.bold(' Memory V2 Pipeline:')); console.log(chalk.white(' extract ') + chalk.gray('Run batch memory extraction')); console.log(chalk.white(' extract-status ') + chalk.gray('Show extraction pipeline status')); console.log(chalk.white(' consolidate ') + chalk.gray('Run memory consolidation')); console.log(chalk.white(' jobs ') + chalk.gray('List all pipeline jobs')); + console.log(chalk.white(' reindex ') + chalk.gray('Rebuild unified vector index')); console.log(); console.log(chalk.bold(' Options:')); console.log(chalk.gray(' --id Memory ID (for export/summary)')); diff --git a/ccw/src/commands/hook.ts b/ccw/src/commands/hook.ts index fac4ea10..43906d7d 100644 --- a/ccw/src/commands/hook.ts +++ b/ccw/src/commands/hook.ts @@ -12,7 +12,7 @@ interface HookOptions { stdin?: boolean; sessionId?: string; prompt?: string; - type?: 'session-start' | 'context'; + type?: 'session-start' | 'context' | 'session-end'; path?: string; } @@ -95,10 +95,32 @@ function getProjectPath(hookCwd?: string): string { return hookCwd || process.cwd(); } +/** + * Check if UnifiedContextBuilder is available (embedder dependencies present). + * Returns the builder instance or null if not available. + */ +async function tryCreateContextBuilder(projectPath: string): Promise { + try { + const { isUnifiedEmbedderAvailable } = await import('../core/unified-vector-index.js'); + if (!isUnifiedEmbedderAvailable()) { + return null; + } + const { UnifiedContextBuilder } = await import('../core/unified-context-builder.js'); + return new UnifiedContextBuilder(projectPath); + } catch { + return null; + } +} + /** * Session context action - provides progressive context loading - * First prompt: returns session overview with clusters - * Subsequent prompts: returns intent-matched sessions + * + * Uses UnifiedContextBuilder when available (embedder present): + * - session-start: MEMORY.md summary + clusters + hot entities + patterns + * - per-prompt: vector search across all memory categories + * + * Falls back to SessionClusteringService.getProgressiveIndex() when + * the embedder is unavailable, preserving backward compatibility. */ async function sessionContextAction(options: HookOptions): Promise { let { stdin, sessionId, prompt } = options; @@ -154,29 +176,43 @@ async function sessionContextAction(options: HookOptions): Promise { let contextType: 'session-start' | 'context'; let content = ''; - // Dynamic import to avoid circular dependencies - const { SessionClusteringService } = await import('../core/session-clustering-service.js'); - const clusteringService = new SessionClusteringService(projectPath); + // Try UnifiedContextBuilder first; fall back to getProgressiveIndex + const contextBuilder = await tryCreateContextBuilder(projectPath); - if (isFirstPrompt) { - // First prompt: return session overview with clusters - contextType = 'session-start'; - content = await clusteringService.getProgressiveIndex({ - type: 'session-start', - sessionId - }); - } else if (prompt && prompt.trim().length > 0) { - // Subsequent prompts with content: return intent-matched sessions - contextType = 'context'; - content = await clusteringService.getProgressiveIndex({ - type: 'context', - sessionId, - prompt - }); + if (contextBuilder) { + // Use UnifiedContextBuilder + if (isFirstPrompt) { + contextType = 'session-start'; + content = await contextBuilder.buildSessionStartContext(); + } else if (prompt && prompt.trim().length > 0) { + contextType = 'context'; + content = await contextBuilder.buildPromptContext(prompt); + } else { + contextType = 'context'; + content = ''; + } } else { - // Subsequent prompts without content: return minimal context - contextType = 'context'; - content = ''; // No context needed for empty prompts + // Fallback: use legacy SessionClusteringService.getProgressiveIndex() + const { SessionClusteringService } = await import('../core/session-clustering-service.js'); + const clusteringService = new SessionClusteringService(projectPath); + + if (isFirstPrompt) { + contextType = 'session-start'; + content = await clusteringService.getProgressiveIndex({ + type: 'session-start', + sessionId + }); + } else if (prompt && prompt.trim().length > 0) { + contextType = 'context'; + content = await clusteringService.getProgressiveIndex({ + type: 'context', + sessionId, + prompt + }); + } else { + contextType = 'context'; + content = ''; + } } if (stdin) { @@ -194,6 +230,7 @@ async function sessionContextAction(options: HookOptions): Promise { console.log(chalk.cyan('Type:'), contextType); console.log(chalk.cyan('First Prompt:'), isFirstPrompt ? 'Yes' : 'No'); console.log(chalk.cyan('Load Count:'), newState.loadCount); + console.log(chalk.cyan('Builder:'), contextBuilder ? 'UnifiedContextBuilder' : 'Legacy (getProgressiveIndex)'); console.log(chalk.gray('─'.repeat(40))); if (content) { console.log(content); @@ -210,6 +247,81 @@ async function sessionContextAction(options: HookOptions): Promise { } } +/** + * Session end action - triggers async background tasks for memory maintenance. + * + * Tasks executed: + * 1. Incremental vector embedding (index new/updated content) + * 2. Incremental clustering (cluster unclustered sessions) + * 3. Heat score updates (recalculate entity heat scores) + * + * All tasks run best-effort; failures are logged but do not affect exit code. + */ +async function sessionEndAction(options: HookOptions): Promise { + let { stdin, sessionId } = options; + let hookCwd: string | undefined; + + if (stdin) { + try { + const stdinData = await readStdin(); + if (stdinData) { + const hookData = JSON.parse(stdinData) as HookData; + sessionId = hookData.session_id || sessionId; + hookCwd = hookData.cwd; + } + } catch { + // Silently continue if stdin parsing fails + } + } + + if (!sessionId) { + if (!stdin) { + console.error(chalk.red('Error: --session-id is required')); + } + process.exit(stdin ? 0 : 1); + } + + try { + const projectPath = getProjectPath(hookCwd); + const contextBuilder = await tryCreateContextBuilder(projectPath); + + if (!contextBuilder) { + // UnifiedContextBuilder not available - skip session-end tasks + if (!stdin) { + console.log(chalk.gray('(UnifiedContextBuilder not available, skipping session-end tasks)')); + } + process.exit(0); + } + + const tasks: Array<{ name: string; execute: () => Promise }> = contextBuilder.buildSessionEndTasks(sessionId); + + if (!stdin) { + console.log(chalk.green(`Session End: executing ${tasks.length} background tasks...`)); + } + + // Execute all tasks concurrently (best-effort) + const results = await Promise.allSettled( + tasks.map((task: { name: string; execute: () => Promise }) => task.execute()) + ); + + if (!stdin) { + for (let i = 0; i < tasks.length; i++) { + const status = results[i].status === 'fulfilled' ? 'OK' : 'FAIL'; + const color = status === 'OK' ? chalk.green : chalk.yellow; + console.log(color(` [${status}] ${tasks[i].name}`)); + } + } + + process.exit(0); + } catch (error) { + if (stdin) { + process.exit(0); + } + console.error(chalk.red(`Error: ${(error as Error).message}`)); + process.exit(1); + } +} + /** * Parse CCW status.json and output formatted status */ @@ -311,6 +423,7 @@ ${chalk.bold('USAGE')} ${chalk.bold('SUBCOMMANDS')} parse-status Parse CCW status.json and display current/next command session-context Progressive session context loading (replaces curl/bash hook) + session-end Trigger background memory maintenance tasks notify Send notification to ccw view dashboard ${chalk.bold('OPTIONS')} @@ -363,6 +476,9 @@ export async function hookCommand( case 'context': await sessionContextAction(options); break; + case 'session-end': + await sessionEndAction(options); + break; case 'notify': await notifyAction(options); break; diff --git a/ccw/src/config/remote-notification-config.ts b/ccw/src/config/remote-notification-config.ts new file mode 100644 index 00000000..be37ab15 --- /dev/null +++ b/ccw/src/config/remote-notification-config.ts @@ -0,0 +1,154 @@ +// ======================================== +// Remote Notification Configuration Manager +// ======================================== +// Manages persistent storage of remote notification settings +// Storage: ~/.ccw/config/remote-notification.json + +import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs'; +import { join } from 'path'; +import { getCCWHome, ensureStorageDir } from './storage-paths.js'; +import type { + RemoteNotificationConfig, + DEFAULT_REMOTE_NOTIFICATION_CONFIG, +} from '../types/remote-notification.js'; +import { DeepPartial, deepMerge } from '../types/util.js'; + +/** + * Configuration file path + */ +function getConfigFilePath(): string { + return join(getCCWHome(), 'config', 'remote-notification.json'); +} + +/** + * Ensure configuration directory exists + */ +function ensureConfigDir(): void { + const configDir = join(getCCWHome(), 'config'); + ensureStorageDir(configDir); +} + +/** + * Default configuration factory + */ +export function getDefaultConfig(): RemoteNotificationConfig { + return { + enabled: false, + platforms: {}, + events: [ + { event: 'ask-user-question', platforms: ['discord', 'telegram'], enabled: true }, + { event: 'session-start', platforms: [], enabled: false }, + { event: 'session-end', platforms: [], enabled: false }, + { event: 'task-completed', platforms: [], enabled: false }, + { event: 'task-failed', platforms: ['discord', 'telegram'], enabled: true }, + ], + timeout: 10000, + }; +} + +/** + * Load remote notification configuration + * Returns default config if file doesn't exist + */ +export function loadConfig(): RemoteNotificationConfig { + const configPath = getConfigFilePath(); + + if (!existsSync(configPath)) { + return getDefaultConfig(); + } + + try { + const data = readFileSync(configPath, 'utf-8'); + const parsed = JSON.parse(data); + + // Merge with defaults to ensure all fields exist + return deepMerge(getDefaultConfig(), parsed); + } catch (error) { + console.error('[RemoteNotificationConfig] Failed to load config:', error); + return getDefaultConfig(); + } +} + +/** + * Save remote notification configuration + */ +export function saveConfig(config: RemoteNotificationConfig): void { + ensureConfigDir(); + const configPath = getConfigFilePath(); + + try { + writeFileSync(configPath, JSON.stringify(config, null, 2), 'utf-8'); + } catch (error) { + console.error('[RemoteNotificationConfig] Failed to save config:', error); + throw error; + } +} + +/** + * Update configuration with partial changes + */ +export function updateConfig( + updates: DeepPartial +): RemoteNotificationConfig { + const current = loadConfig(); + const updated = deepMerge(current, updates); + saveConfig(updated); + return updated; +} + +/** + * Reset configuration to defaults + */ +export function resetConfig(): RemoteNotificationConfig { + const defaultConfig = getDefaultConfig(); + saveConfig(defaultConfig); + return defaultConfig; +} + +/** + * Check if any platform is configured and enabled + */ +export function hasEnabledPlatform(config: RemoteNotificationConfig): boolean { + if (!config.enabled) return false; + + const { discord, telegram, webhook } = config.platforms; + + return ( + (discord?.enabled && !!discord.webhookUrl) || + (telegram?.enabled && !!telegram.botToken && !!telegram.chatId) || + (webhook?.enabled && !!webhook.url) + ); +} + +/** + * Get enabled platforms for a specific event + */ +export function getEnabledPlatformsForEvent( + config: RemoteNotificationConfig, + eventType: string +): string[] { + if (!config.enabled) return []; + + const eventConfig = config.events.find((e) => e.event === eventType); + if (!eventConfig || !eventConfig.enabled) return []; + + return eventConfig.platforms.filter((platform) => { + const platformConfig = config.platforms[platform as keyof typeof config.platforms]; + if (!platformConfig) return false; + + switch (platform) { + case 'discord': + return (platformConfig as { enabled: boolean; webhookUrl?: string }).enabled && + !!(platformConfig as { webhookUrl?: string }).webhookUrl; + case 'telegram': + return (platformConfig as { enabled: boolean; botToken?: string; chatId?: string }).enabled && + !!(platformConfig as { botToken?: string }).botToken && + !!(platformConfig as { chatId?: string }).chatId; + case 'webhook': + return (platformConfig as { enabled: boolean; url?: string }).enabled && + !!(platformConfig as { url?: string }).url; + default: + return false; + } + }); +} diff --git a/ccw/src/config/storage-paths.ts b/ccw/src/config/storage-paths.ts index 7d0dcef3..ee6e9d86 100644 --- a/ccw/src/config/storage-paths.ts +++ b/ccw/src/config/storage-paths.ts @@ -388,6 +388,15 @@ export interface ProjectPaths { /** Skills directory */ skills: string; }; + /** Unified vector index paths (HNSW-backed) */ + unifiedVectors: { + /** Root: /unified-vectors/ */ + root: string; + /** SQLite database for vector metadata */ + vectorsDb: string; + /** HNSW index file */ + hnswIndex: string; + }; } /** @@ -454,6 +463,11 @@ export function getProjectPaths(projectPath: string): ProjectPaths { memoryMd: join(projectDir, 'core-memory', 'v2', 'MEMORY.md'), skills: join(projectDir, 'core-memory', 'v2', 'skills'), }, + unifiedVectors: { + root: join(projectDir, 'unified-vectors'), + vectorsDb: join(projectDir, 'unified-vectors', 'vectors.db'), + hnswIndex: join(projectDir, 'unified-vectors', 'vectors.hnsw'), + }, }; } @@ -483,6 +497,11 @@ export function getProjectPathsById(projectId: string): ProjectPaths { memoryMd: join(projectDir, 'core-memory', 'v2', 'MEMORY.md'), skills: join(projectDir, 'core-memory', 'v2', 'skills'), }, + unifiedVectors: { + root: join(projectDir, 'unified-vectors'), + vectorsDb: join(projectDir, 'unified-vectors', 'vectors.db'), + hnswIndex: join(projectDir, 'unified-vectors', 'vectors.hnsw'), + }, }; } diff --git a/ccw/src/core/core-memory-store.ts b/ccw/src/core/core-memory-store.ts index fbb80bd1..c7518bfc 100644 --- a/ccw/src/core/core-memory-store.ts +++ b/ccw/src/core/core-memory-store.ts @@ -7,6 +7,8 @@ import Database from 'better-sqlite3'; import { existsSync, mkdirSync } from 'fs'; import { join } from 'path'; import { StoragePaths, ensureStorageDir } from '../config/storage-paths.js'; +import { UnifiedVectorIndex, isUnifiedEmbedderAvailable } from './unified-vector-index.js'; +import type { ChunkMetadata } from './unified-vector-index.js'; // Types export interface CoreMemory { @@ -101,6 +103,7 @@ export class CoreMemoryStore { private db: Database.Database; private dbPath: string; private projectPath: string; + private vectorIndex: UnifiedVectorIndex | null = null; constructor(projectPath: string) { this.projectPath = projectPath; @@ -328,6 +331,38 @@ export class CoreMemoryStore { return this.db; } + /** + * Get or create the UnifiedVectorIndex instance (lazy initialization). + * Returns null if the embedder is not available. + */ + private getVectorIndex(): UnifiedVectorIndex | null { + if (this.vectorIndex) return this.vectorIndex; + if (!isUnifiedEmbedderAvailable()) return null; + this.vectorIndex = new UnifiedVectorIndex(this.projectPath); + return this.vectorIndex; + } + + /** + * Fire-and-forget: sync content to the vector index. + * Logs errors but never throws, to avoid disrupting the synchronous write path. + */ + private syncToVectorIndex(content: string, sourceId: string): void { + const idx = this.getVectorIndex(); + if (!idx) return; + + const metadata: ChunkMetadata = { + source_id: sourceId, + source_type: 'core_memory', + category: 'core_memory', + }; + + idx.indexContent(content, metadata).catch((err) => { + if (process.env.DEBUG) { + console.error(`[CoreMemoryStore] Vector index sync failed for ${sourceId}:`, (err as Error).message); + } + }); + } + /** * Generate timestamp-based ID for core memory */ @@ -387,6 +422,9 @@ export class CoreMemoryStore { id ); + // Sync updated content to vector index + this.syncToVectorIndex(memory.content, id); + return this.getMemory(id)!; } else { // Insert new memory @@ -406,6 +444,9 @@ export class CoreMemoryStore { memory.metadata || null ); + // Sync new content to vector index + this.syncToVectorIndex(memory.content, id); + return this.getMemory(id)!; } } diff --git a/ccw/src/core/memory-extraction-pipeline.ts b/ccw/src/core/memory-extraction-pipeline.ts index 65c4a435..a1d2b8c0 100644 --- a/ccw/src/core/memory-extraction-pipeline.ts +++ b/ccw/src/core/memory-extraction-pipeline.ts @@ -13,6 +13,10 @@ import type { ConversationRecord } from '../tools/cli-history-store.js'; import { getHistoryStore } from '../tools/cli-history-store.js'; import { getCoreMemoryStore, type Stage1Output } from './core-memory-store.js'; import { MemoryJobScheduler } from './memory-job-scheduler.js'; +import { UnifiedVectorIndex, isUnifiedEmbedderAvailable } from './unified-vector-index.js'; +import type { ChunkMetadata } from './unified-vector-index.js'; +import { SessionClusteringService } from './session-clustering-service.js'; +import { PatternDetector } from './pattern-detector.js'; import { MAX_SESSION_AGE_DAYS, MIN_IDLE_HOURS, @@ -384,9 +388,38 @@ export class MemoryExtractionPipeline { const store = getCoreMemoryStore(this.projectPath); store.upsertStage1Output(output); + // Sync extracted content to vector index (fire-and-forget) + this.syncExtractionToVectorIndex(output); + return output; } + /** + * Sync extraction output to the vector index. + * Indexes both raw_memory and rollout_summary with category='cli_history'. + * Fire-and-forget: errors are logged but never thrown. + */ + private syncExtractionToVectorIndex(output: Stage1Output): void { + if (!isUnifiedEmbedderAvailable()) return; + + const vectorIndex = new UnifiedVectorIndex(this.projectPath); + const combinedContent = `${output.raw_memory}\n\n---\n\n${output.rollout_summary}`; + const metadata: ChunkMetadata = { + source_id: output.thread_id, + source_type: 'cli_history', + category: 'cli_history', + }; + + vectorIndex.indexContent(combinedContent, metadata).catch((err) => { + if (process.env.DEBUG) { + console.error( + `[MemoryExtractionPipeline] Vector index sync failed for ${output.thread_id}:`, + (err as Error).message + ); + } + }); + } + // ======================================================================== // Batch orchestration // ======================================================================== @@ -461,6 +494,76 @@ export class MemoryExtractionPipeline { await Promise.all(promises); } + // Post-extraction: trigger incremental clustering and pattern detection + // These are fire-and-forget to avoid blocking the main extraction flow. + if (result.succeeded > 0) { + this.triggerPostExtractionHooks( + eligibleSessions.filter((_, i) => i < result.processed).map(s => s.id) + ); + } + return result; } + + /** + * Fire-and-forget: trigger incremental clustering and pattern detection + * after Phase 1 extraction completes. + * + * - incrementalCluster: processes each newly extracted session + * - detectPatterns: runs pattern detection across all chunks + * + * Errors are logged but never thrown, to avoid disrupting the caller. + */ + private triggerPostExtractionHooks(extractedSessionIds: string[]): void { + const clusteringService = new SessionClusteringService(this.projectPath); + const patternDetector = new PatternDetector(this.projectPath); + + // Incremental clustering for each extracted session (fire-and-forget) + (async () => { + try { + // Check frequency control before running clustering + const shouldCluster = await clusteringService.shouldRunClustering(); + if (!shouldCluster) { + if (process.env.DEBUG) { + console.log('[PostExtraction] Clustering skipped: frequency control not met'); + } + return; + } + + for (const sessionId of extractedSessionIds) { + try { + await clusteringService.incrementalCluster(sessionId); + } catch (err) { + if (process.env.DEBUG) { + console.warn( + `[PostExtraction] Incremental clustering failed for ${sessionId}:`, + (err as Error).message + ); + } + } + } + } catch (err) { + if (process.env.DEBUG) { + console.warn('[PostExtraction] Clustering hook failed:', (err as Error).message); + } + } + })(); + + // Pattern detection (fire-and-forget) + (async () => { + try { + const result = await patternDetector.detectPatterns(); + if (result.patterns.length > 0) { + console.log( + `[PostExtraction] Pattern detection: ${result.patterns.length} patterns found, ` + + `${result.solidified.length} solidified (${result.elapsedMs}ms)` + ); + } + } catch (err) { + if (process.env.DEBUG) { + console.warn('[PostExtraction] Pattern detection failed:', (err as Error).message); + } + } + })(); + } } diff --git a/ccw/src/core/pattern-detector.ts b/ccw/src/core/pattern-detector.ts new file mode 100644 index 00000000..b0353aa6 --- /dev/null +++ b/ccw/src/core/pattern-detector.ts @@ -0,0 +1,485 @@ +/** + * Pattern Detector - Detects recurring content patterns across sessions + * + * Uses vector clustering (cosine similarity > 0.85) to group semantically similar + * chunks into patterns. Patterns appearing in N>=3 distinct sessions are flagged + * as candidates. High-confidence patterns (>=0.8) are solidified into CoreMemory + * and skills/*.md files. + */ + +import { CoreMemoryStore, getCoreMemoryStore } from './core-memory-store.js'; +import { UnifiedVectorIndex, isUnifiedEmbedderAvailable } from './unified-vector-index.js'; +import type { VectorSearchMatch } from './unified-vector-index.js'; +import { existsSync, mkdirSync, writeFileSync } from 'fs'; +import { join } from 'path'; + +// -- Constants -- + +/** Minimum cosine similarity to group chunks into the same pattern */ +const PATTERN_SIMILARITY_THRESHOLD = 0.85; + +/** Minimum number of distinct sessions a pattern must appear in */ +const MIN_SESSION_FREQUENCY = 3; + +/** Confidence threshold for auto-solidification */ +const SOLIDIFY_CONFIDENCE_THRESHOLD = 0.8; + +/** Maximum number of chunks to analyze per detection run */ +const MAX_CHUNKS_TO_ANALYZE = 200; + +/** Top-K neighbors to search per chunk during clustering */ +const NEIGHBOR_TOP_K = 15; + +// -- Types -- + +export interface DetectedPattern { + /** Unique pattern identifier */ + id: string; + /** Human-readable pattern name derived from content */ + name: string; + /** Representative content snippet */ + representative: string; + /** Source IDs (sessions) where this pattern appears */ + sourceIds: string[]; + /** Number of distinct sessions */ + sessionCount: number; + /** Average similarity score within the pattern group */ + avgSimilarity: number; + /** Confidence score (0-1), based on frequency and similarity */ + confidence: number; + /** Category of the chunks in this pattern */ + category: string; +} + +export interface PatternDetectionResult { + /** All detected patterns */ + patterns: DetectedPattern[]; + /** Number of chunks analyzed */ + chunksAnalyzed: number; + /** Patterns that were solidified (written to CoreMemory + skills) */ + solidified: string[]; + /** Elapsed time in ms */ + elapsedMs: number; +} + +export interface SolidifyResult { + memoryId: string; + skillPath: string | null; +} + +// -- PatternDetector -- + +export class PatternDetector { + private projectPath: string; + private coreMemoryStore: CoreMemoryStore; + private vectorIndex: UnifiedVectorIndex | null = null; + + constructor(projectPath: string) { + this.projectPath = projectPath; + this.coreMemoryStore = getCoreMemoryStore(projectPath); + + if (isUnifiedEmbedderAvailable()) { + this.vectorIndex = new UnifiedVectorIndex(projectPath); + } + } + + /** + * Detect recurring patterns across sessions by vector clustering. + * + * Algorithm: + * 1. Get representative chunks from VectorStore (via search with broad queries) + * 2. For each chunk, search HNSW for nearest neighbors (cosine > PATTERN_SIMILARITY_THRESHOLD) + * 3. Group chunks with high mutual similarity into pattern clusters + * 4. Count distinct source_ids per cluster (session frequency) + * 5. Patterns with sessionCount >= MIN_SESSION_FREQUENCY become candidates + * + * @returns Detection result with candidate patterns + */ + async detectPatterns(): Promise { + const startTime = Date.now(); + const result: PatternDetectionResult = { + patterns: [], + chunksAnalyzed: 0, + solidified: [], + elapsedMs: 0, + }; + + if (!this.vectorIndex) { + result.elapsedMs = Date.now() - startTime; + return result; + } + + // Step 1: Gather chunks from the vector store via broad category searches + const allChunks = await this.gatherChunksForAnalysis(); + result.chunksAnalyzed = allChunks.length; + + if (allChunks.length < MIN_SESSION_FREQUENCY) { + result.elapsedMs = Date.now() - startTime; + return result; + } + + // Step 2: Cluster chunks by vector similarity + const patternGroups = await this.clusterChunksByVector(allChunks); + + // Step 3: Filter by session frequency and build DetectedPattern objects + for (const group of patternGroups) { + const uniqueSources = new Set(group.map(c => c.source_id)); + if (uniqueSources.size < MIN_SESSION_FREQUENCY) continue; + + const avgSim = group.reduce((sum, c) => sum + c.score, 0) / group.length; + + // Confidence: combines frequency (normalized) and avg similarity + const frequencyScore = Math.min(uniqueSources.size / 10, 1.0); + const confidence = avgSim * 0.6 + frequencyScore * 0.4; + + const representative = group[0]; // Highest scoring chunk + const patternName = this.derivePatternName(group); + const patternId = `PAT-${Date.now()}-${Math.random().toString(36).substring(2, 6)}`; + + result.patterns.push({ + id: patternId, + name: patternName, + representative: representative.content.substring(0, 500), + sourceIds: Array.from(uniqueSources), + sessionCount: uniqueSources.size, + avgSimilarity: Math.round(avgSim * 1000) / 1000, + confidence: Math.round(confidence * 1000) / 1000, + category: representative.category || 'unknown', + }); + } + + // Sort by confidence descending + result.patterns.sort((a, b) => b.confidence - a.confidence); + + // Step 4: Auto-solidify high-confidence patterns (fire-and-forget) + for (const pattern of result.patterns) { + if (pattern.confidence >= SOLIDIFY_CONFIDENCE_THRESHOLD) { + try { + await this.solidifyPattern(pattern); + result.solidified.push(pattern.id); + } catch (err) { + console.warn( + `[PatternDetector] Failed to solidify pattern ${pattern.id}:`, + (err as Error).message + ); + } + } + } + + result.elapsedMs = Date.now() - startTime; + return result; + } + + /** + * Gather a representative set of chunks for pattern analysis. + * Uses broad search queries across categories to collect diverse chunks. + */ + private async gatherChunksForAnalysis(): Promise { + if (!this.vectorIndex) return []; + + const allChunks: VectorSearchMatch[] = []; + const seenContent = new Set(); + + // Search across common categories with broad queries + const broadQueries = [ + 'implementation pattern', + 'configuration setup', + 'error handling', + 'testing approach', + 'workflow process', + ]; + + const categories = ['core_memory', 'cli_history', 'workflow'] as const; + + for (const category of categories) { + for (const query of broadQueries) { + if (allChunks.length >= MAX_CHUNKS_TO_ANALYZE) break; + + try { + const result = await this.vectorIndex.search(query, { + topK: Math.ceil(MAX_CHUNKS_TO_ANALYZE / (broadQueries.length * categories.length)), + minScore: 0.1, + category, + }); + + if (result.success) { + for (const match of result.matches) { + // Deduplicate by content hash (first 100 chars) + const contentKey = match.content.substring(0, 100); + if (!seenContent.has(contentKey)) { + seenContent.add(contentKey); + allChunks.push(match); + } + if (allChunks.length >= MAX_CHUNKS_TO_ANALYZE) break; + } + } + } catch { + // Search failed for this query/category, continue + } + } + } + + return allChunks; + } + + /** + * Cluster chunks by vector similarity using HNSW neighbor search. + * + * For each unprocessed chunk, search for its nearest neighbors. + * Chunks with cosine similarity > PATTERN_SIMILARITY_THRESHOLD are grouped together. + * Uses a union-find-like approach via visited tracking. + */ + private async clusterChunksByVector( + chunks: VectorSearchMatch[] + ): Promise { + if (!this.vectorIndex) return []; + + const groups: VectorSearchMatch[][] = []; + const processed = new Set(); + + for (let i = 0; i < chunks.length; i++) { + if (processed.has(i)) continue; + + const seedChunk = chunks[i]; + const group: VectorSearchMatch[] = [seedChunk]; + processed.add(i); + + // Search for neighbors of this chunk's content + try { + const neighbors = await this.vectorIndex.search(seedChunk.content, { + topK: NEIGHBOR_TOP_K, + minScore: PATTERN_SIMILARITY_THRESHOLD, + }); + + if (neighbors.success) { + for (const neighbor of neighbors.matches) { + // Skip self-matches + if (neighbor.content === seedChunk.content) continue; + + // Find this neighbor in our chunk list + for (let j = 0; j < chunks.length; j++) { + if (processed.has(j)) continue; + if ( + chunks[j].source_id === neighbor.source_id && + chunks[j].chunk_index === neighbor.chunk_index + ) { + group.push({ ...chunks[j], score: neighbor.score }); + processed.add(j); + break; + } + } + + // Also include neighbors not in our original list + if (neighbor.source_id && neighbor.source_id !== seedChunk.source_id) { + // Check if already in group by source_id + const alreadyInGroup = group.some( + g => g.source_id === neighbor.source_id && g.chunk_index === neighbor.chunk_index + ); + if (!alreadyInGroup) { + group.push(neighbor); + } + } + } + } + } catch { + // HNSW search failed, skip this chunk's neighborhood + } + + // Only keep groups with chunks from multiple sources + const uniqueSources = new Set(group.map(c => c.source_id)); + if (uniqueSources.size >= 2) { + groups.push(group); + } + } + + return groups; + } + + /** + * Derive a human-readable pattern name from a group of similar chunks. + * Extracts common keywords/phrases from the representative content. + */ + private derivePatternName(group: VectorSearchMatch[]): string { + // Extended stopwords including generic tech terms + const stopwords = new Set([ + 'the', 'and', 'for', 'that', 'this', 'with', 'from', 'have', 'will', + 'are', 'was', 'were', 'been', 'what', 'when', 'where', 'which', + 'there', 'their', 'they', 'them', 'then', 'than', 'into', 'some', + 'code', 'file', 'function', 'class', 'import', 'export', 'const', + 'async', 'await', 'return', 'type', 'interface', 'string', 'number', + 'true', 'false', 'null', 'undefined', 'object', 'array', 'value', + 'data', 'result', 'error', 'name', 'path', 'index', 'item', 'list', + 'should', 'would', 'could', 'does', 'make', 'like', 'just', 'also', + 'used', 'using', 'each', 'other', 'more', 'only', 'need', 'very', + ]); + + const isSignificant = (w: string) => w.length >= 4 && !stopwords.has(w); + + // Count word and bigram frequency across all chunks + const wordFreq = new Map(); + const bigramFreq = new Map(); + + for (const chunk of group) { + const words = chunk.content.toLowerCase().split(/[\s\W]+/).filter(isSignificant); + const uniqueWords = new Set(words); + for (const word of uniqueWords) { + wordFreq.set(word, (wordFreq.get(word) || 0) + 1); + } + + // Extract bigrams from consecutive significant words + for (let i = 0; i < words.length - 1; i++) { + const bigram = `${words[i]}-${words[i + 1]}`; + bigramFreq.set(bigram, (bigramFreq.get(bigram) || 0) + 1); + } + } + + // Prefer bigrams that appear in multiple chunks + const topBigrams = Array.from(bigramFreq.entries()) + .filter(([, count]) => count >= 2) + .sort((a, b) => b[1] - a[1]); + + if (topBigrams.length > 0) { + // Use top bigram, optionally append a distinguishing single word + const name = topBigrams[0][0]; + const bigramWords = new Set(name.split('-')); + const extra = Array.from(wordFreq.entries()) + .filter(([w, count]) => count >= 2 && !bigramWords.has(w)) + .sort((a, b) => b[1] - a[1]); + if (extra.length > 0) { + const candidate = `${name}-${extra[0][0]}`; + return candidate.length <= 50 ? candidate : name; + } + return name; + } + + // Fallback to top single words + const topWords = Array.from(wordFreq.entries()) + .sort((a, b) => b[1] - a[1]) + .slice(0, 3) + .map(([w]) => w); + + if (topWords.length >= 2) { + const name = topWords.join('-'); + return name.length <= 50 ? name : topWords.slice(0, 2).join('-'); + } else if (topWords.length === 1) { + return topWords[0]; + } + + return 'unnamed-pattern'; + } + + /** + * Solidify a detected pattern by writing it to CoreMemory and skills/*.md. + * + * Creates: + * 1. A CoreMemory entry with the pattern content and metadata + * 2. A skills/{pattern_slug}.md file with the pattern documentation + * + * This method is fire-and-forget - errors are logged but not propagated. + * + * @param pattern - The detected pattern to solidify + * @returns Result with memory ID and skill file path + */ + async solidifyPattern(pattern: DetectedPattern): Promise { + // 1. Create CoreMemory entry + const memoryContent = this.buildPatternMemoryContent(pattern); + const memory = this.coreMemoryStore.upsertMemory({ + content: memoryContent, + summary: `Detected pattern: ${pattern.name} (${pattern.sessionCount} sessions, confidence: ${pattern.confidence})`, + metadata: JSON.stringify({ + type: 'detected_pattern', + pattern_id: pattern.id, + pattern_name: pattern.name, + session_count: pattern.sessionCount, + confidence: pattern.confidence, + source_ids: pattern.sourceIds, + detected_at: new Date().toISOString(), + }), + }); + + // 2. Write skills file + let skillPath: string | null = null; + try { + const slug = pattern.name + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-|-$/g, '') + .substring(0, 50); + + const skillsDir = join(this.projectPath, '.claude', 'skills'); + if (!existsSync(skillsDir)) { + mkdirSync(skillsDir, { recursive: true }); + } + + skillPath = join(skillsDir, `${slug}.md`); + const skillContent = this.buildSkillContent(pattern); + writeFileSync(skillPath, skillContent, 'utf-8'); + } catch (err) { + console.warn( + `[PatternDetector] Failed to write skill file for ${pattern.name}:`, + (err as Error).message + ); + skillPath = null; + } + + console.log( + `[PatternDetector] Solidified pattern '${pattern.name}' -> memory=${memory.id}, skill=${skillPath || 'none'}` + ); + + return { memoryId: memory.id, skillPath }; + } + + /** + * Build CoreMemory content for a detected pattern. + */ + private buildPatternMemoryContent(pattern: DetectedPattern): string { + const lines: string[] = [ + `# Detected Pattern: ${pattern.name}`, + '', + `**Confidence**: ${pattern.confidence}`, + `**Sessions**: ${pattern.sessionCount} (${pattern.sourceIds.join(', ')})`, + `**Category**: ${pattern.category}`, + `**Avg Similarity**: ${pattern.avgSimilarity}`, + '', + '## Representative Content', + '', + pattern.representative, + '', + '## Usage', + '', + 'This pattern was automatically detected across multiple sessions.', + 'It represents a recurring approach or concept in this project.', + ]; + + return lines.join('\n'); + } + + /** + * Build skill file content for a detected pattern. + */ + private buildSkillContent(pattern: DetectedPattern): string { + const lines: string[] = [ + `# ${pattern.name}`, + '', + `> Auto-detected pattern (confidence: ${pattern.confidence}, sessions: ${pattern.sessionCount})`, + '', + '## Description', + '', + pattern.representative, + '', + '## Context', + '', + `This pattern was detected across ${pattern.sessionCount} sessions:`, + ...pattern.sourceIds.map(id => `- ${id}`), + '', + '## When to Apply', + '', + 'Apply this pattern when working on similar tasks or encountering related concepts.', + '', + `---`, + `*Auto-generated by PatternDetector on ${new Date().toISOString()}*`, + ]; + + return lines.join('\n'); + } +} diff --git a/ccw/src/core/routes/notification-routes.ts b/ccw/src/core/routes/notification-routes.ts new file mode 100644 index 00000000..00d16b8e --- /dev/null +++ b/ccw/src/core/routes/notification-routes.ts @@ -0,0 +1,357 @@ +// ======================================== +// Remote Notification Routes +// ======================================== +// API endpoints for remote notification configuration + +import type { IncomingMessage, ServerResponse } from 'http'; +import { URL } from 'url'; +import { + loadConfig, + saveConfig, + resetConfig, +} from '../../config/remote-notification-config.js'; +import { + remoteNotificationService, +} from '../../services/remote-notification-service.js'; +import { + maskSensitiveConfig, + type RemoteNotificationConfig, + type TestNotificationRequest, + type NotificationPlatform, + type DiscordConfig, + type TelegramConfig, + type WebhookConfig, +} from '../../types/remote-notification.js'; +import { deepMerge } from '../../types/util.js'; + +// ========== Input Validation ========== + +/** + * Validate URL format (must be http or https) + */ +function isValidUrl(url: string): boolean { + try { + const parsed = new URL(url); + return ['http:', 'https:'].includes(parsed.protocol); + } catch { + return false; + } +} + +/** + * Validate Discord webhook URL format + */ +function isValidDiscordWebhookUrl(url: string): boolean { + if (!isValidUrl(url)) return false; + try { + const parsed = new URL(url); + // Discord webhooks are typically: discord.com/api/webhooks/{id}/{token} + return ( + (parsed.hostname === 'discord.com' || parsed.hostname === 'discordapp.com') && + parsed.pathname.startsWith('/api/webhooks/') + ); + } catch { + return false; + } +} + +/** + * Validate Telegram bot token format (typically: 123456789:ABCdef...) + */ +function isValidTelegramBotToken(token: string): boolean { + // Telegram bot tokens are in format: {bot_id}:{token} + // Bot ID is a number, token is alphanumeric with underscores and hyphens + return /^\d{8,15}:[A-Za-z0-9_-]{30,50}$/.test(token); +} + +/** + * Validate Telegram chat ID format + */ +function isValidTelegramChatId(chatId: string): boolean { + // Chat IDs are numeric, optionally negative (for groups) + return /^-?\d{1,20}$/.test(chatId); +} + +/** + * Validate webhook headers (must be valid JSON object) + */ +function isValidHeaders(headers: unknown): { valid: boolean; error?: string } { + if (headers === undefined || headers === null) { + return { valid: true }; // Optional field + } + + if (typeof headers !== 'object' || Array.isArray(headers)) { + return { valid: false, error: 'Headers must be an object' }; + } + + const headerObj = headers as Record; + + // Check for reasonable size limit (10KB) + const serialized = JSON.stringify(headers); + if (serialized.length > 10240) { + return { valid: false, error: 'Headers too large (max 10KB)' }; + } + + // Validate each header key and value + for (const [key, value] of Object.entries(headerObj)) { + if (typeof key !== 'string' || key.length === 0) { + return { valid: false, error: 'Header keys must be non-empty strings' }; + } + if (typeof value !== 'string') { + return { valid: false, error: `Header '${key}' value must be a string` }; + } + // Block potentially dangerous headers + const lowerKey = key.toLowerCase(); + if (['host', 'content-length', 'connection'].includes(lowerKey)) { + return { valid: false, error: `Header '${key}' is not allowed` }; + } + } + + return { valid: true }; +} + +/** + * Validate configuration updates + */ +function validateConfigUpdates(updates: Partial): { valid: boolean; error?: string } { + // Validate platforms if present + if (updates.platforms) { + const { discord, telegram, webhook } = updates.platforms; + + // Validate Discord config + if (discord) { + if (discord.webhookUrl !== undefined && discord.webhookUrl !== '') { + if (!isValidUrl(discord.webhookUrl)) { + return { valid: false, error: 'Invalid Discord webhook URL format' }; + } + // Warning: we allow non-Discord URLs for flexibility, but log it + if (!isValidDiscordWebhookUrl(discord.webhookUrl)) { + console.warn('[RemoteNotification] Webhook URL does not match Discord format'); + } + } + if (discord.username !== undefined && discord.username.length > 80) { + return { valid: false, error: 'Discord username too long (max 80 chars)' }; + } + } + + // Validate Telegram config + if (telegram) { + if (telegram.botToken !== undefined && telegram.botToken !== '') { + if (!isValidTelegramBotToken(telegram.botToken)) { + return { valid: false, error: 'Invalid Telegram bot token format' }; + } + } + if (telegram.chatId !== undefined && telegram.chatId !== '') { + if (!isValidTelegramChatId(telegram.chatId)) { + return { valid: false, error: 'Invalid Telegram chat ID format' }; + } + } + } + + // Validate Webhook config + if (webhook) { + if (webhook.url !== undefined && webhook.url !== '') { + if (!isValidUrl(webhook.url)) { + return { valid: false, error: 'Invalid webhook URL format' }; + } + } + if (webhook.headers !== undefined) { + const headerValidation = isValidHeaders(webhook.headers); + if (!headerValidation.valid) { + return { valid: false, error: headerValidation.error }; + } + } + if (webhook.timeout !== undefined && (webhook.timeout < 1000 || webhook.timeout > 60000)) { + return { valid: false, error: 'Webhook timeout must be between 1000ms and 60000ms' }; + } + } + } + + // Validate timeout + if (updates.timeout !== undefined && (updates.timeout < 1000 || updates.timeout > 60000)) { + return { valid: false, error: 'Timeout must be between 1000ms and 60000ms' }; + } + + return { valid: true }; +} + +/** + * Validate test notification request + */ +function validateTestRequest(request: TestNotificationRequest): { valid: boolean; error?: string } { + if (!request.platform) { + return { valid: false, error: 'Missing platform' }; + } + + const validPlatforms: NotificationPlatform[] = ['discord', 'telegram', 'webhook']; + if (!validPlatforms.includes(request.platform as NotificationPlatform)) { + return { valid: false, error: `Invalid platform: ${request.platform}` }; + } + + if (!request.config) { + return { valid: false, error: 'Missing config' }; + } + + // Platform-specific validation + switch (request.platform) { + case 'discord': { + const config = request.config as Partial; + if (!config.webhookUrl) { + return { valid: false, error: 'Discord webhook URL is required' }; + } + if (!isValidUrl(config.webhookUrl)) { + return { valid: false, error: 'Invalid Discord webhook URL format' }; + } + break; + } + case 'telegram': { + const config = request.config as Partial; + if (!config.botToken) { + return { valid: false, error: 'Telegram bot token is required' }; + } + if (!config.chatId) { + return { valid: false, error: 'Telegram chat ID is required' }; + } + if (!isValidTelegramBotToken(config.botToken)) { + return { valid: false, error: 'Invalid Telegram bot token format' }; + } + if (!isValidTelegramChatId(config.chatId)) { + return { valid: false, error: 'Invalid Telegram chat ID format' }; + } + break; + } + case 'webhook': { + const config = request.config as Partial; + if (!config.url) { + return { valid: false, error: 'Webhook URL is required' }; + } + if (!isValidUrl(config.url)) { + return { valid: false, error: 'Invalid webhook URL format' }; + } + if (config.headers) { + const headerValidation = isValidHeaders(config.headers); + if (!headerValidation.valid) { + return { valid: false, error: headerValidation.error }; + } + } + break; + } + } + + return { valid: true }; +} + +/** + * Handle remote notification routes + * GET /api/notifications/remote/config - Get current config + * POST /api/notifications/remote/config - Update config + * POST /api/notifications/remote/test - Test notification + * POST /api/notifications/remote/reset - Reset to defaults + */ +export async function handleNotificationRoutes( + req: IncomingMessage, + res: ServerResponse, + pathname: string +): Promise { + // GET /api/notifications/remote/config + if (pathname === '/api/notifications/remote/config' && req.method === 'GET') { + const config = loadConfig(); + const masked = maskSensitiveConfig(config); + + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify(masked)); + return true; + } + + // POST /api/notifications/remote/config + if (pathname === '/api/notifications/remote/config' && req.method === 'POST') { + const body = await readBody(req); + + try { + const updates = JSON.parse(body) as Partial; + + // Validate input + const validation = validateConfigUpdates(updates); + if (!validation.valid) { + res.writeHead(400, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ error: validation.error })); + return true; + } + + const current = loadConfig(); + const updated = deepMerge(current, updates); + + saveConfig(updated); + + // Reload service config + remoteNotificationService.reloadConfig(); + + const masked = maskSensitiveConfig(updated); + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ success: true, config: masked })); + } catch (error) { + res.writeHead(400, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ + error: error instanceof Error ? error.message : 'Invalid configuration', + })); + } + return true; + } + + // POST /api/notifications/remote/test + if (pathname === '/api/notifications/remote/test' && req.method === 'POST') { + const body = await readBody(req); + + try { + const request = JSON.parse(body) as TestNotificationRequest; + + // Validate input + const validation = validateTestRequest(request); + if (!validation.valid) { + res.writeHead(400, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ success: false, error: validation.error })); + return true; + } + + const result = await remoteNotificationService.testPlatform( + request.platform as NotificationPlatform, + request.config + ); + + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify(result)); + } catch (error) { + res.writeHead(400, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ + success: false, + error: error instanceof Error ? error.message : 'Invalid request', + })); + } + return true; + } + + // POST /api/notifications/remote/reset + if (pathname === '/api/notifications/remote/reset' && req.method === 'POST') { + const config = resetConfig(); + remoteNotificationService.reloadConfig(); + + const masked = maskSensitiveConfig(config); + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ success: true, config: masked })); + return true; + } + + return false; +} + +/** + * Read request body as string + */ +async function readBody(req: IncomingMessage): Promise { + return new Promise((resolve, reject) => { + let body = ''; + req.on('data', (chunk) => { body += chunk; }); + req.on('end', () => resolve(body)); + req.on('error', reject); + }); +} diff --git a/ccw/src/core/routes/unified-memory-routes.ts b/ccw/src/core/routes/unified-memory-routes.ts new file mode 100644 index 00000000..9eacfbe5 --- /dev/null +++ b/ccw/src/core/routes/unified-memory-routes.ts @@ -0,0 +1,151 @@ +/** + * Unified Memory API Routes + * + * Provides HTTP endpoints for the unified memory system: + * - GET /api/unified-memory/search - RRF fusion search (vector + FTS5) + * - GET /api/unified-memory/stats - Aggregated statistics + * - POST /api/unified-memory/reindex - Rebuild HNSW vector index + * - GET /api/unified-memory/recommendations/:id - KNN recommendations + */ + +import type { RouteContext } from './types.js'; + +/** + * Handle Unified Memory API routes. + * @returns true if route was handled, false otherwise + */ +export async function handleUnifiedMemoryRoutes(ctx: RouteContext): Promise { + const { pathname, url, req, res, initialPath, handlePostRequest } = ctx; + + // ========================================================================= + // GET /api/unified-memory/search + // Query params: q (required), categories, topK, minScore + // ========================================================================= + if (pathname === '/api/unified-memory/search' && req.method === 'GET') { + const query = url.searchParams.get('q'); + const projectPath = url.searchParams.get('path') || initialPath; + const topK = parseInt(url.searchParams.get('topK') || '20', 10); + const minScore = parseFloat(url.searchParams.get('minScore') || '0'); + const category = url.searchParams.get('category') || undefined; + + if (!query) { + res.writeHead(400, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ error: 'Query parameter q is required' })); + return true; + } + + try { + const { UnifiedMemoryService } = await import('../unified-memory-service.js'); + const service = new UnifiedMemoryService(projectPath); + + const results = await service.search(query, { + limit: topK, + minScore, + category: category as any, + }); + + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ + success: true, + query, + total: results.length, + results, + })); + } catch (error: unknown) { + res.writeHead(500, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ error: (error as Error).message })); + } + return true; + } + + // ========================================================================= + // GET /api/unified-memory/stats + // ========================================================================= + if (pathname === '/api/unified-memory/stats' && req.method === 'GET') { + const projectPath = url.searchParams.get('path') || initialPath; + + try { + const { UnifiedMemoryService } = await import('../unified-memory-service.js'); + const service = new UnifiedMemoryService(projectPath); + const stats = await service.getStats(); + + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ success: true, stats })); + } catch (error: unknown) { + res.writeHead(500, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ error: (error as Error).message })); + } + return true; + } + + // ========================================================================= + // POST /api/unified-memory/reindex + // Body (optional): { path: string } + // ========================================================================= + if (pathname === '/api/unified-memory/reindex' && req.method === 'POST') { + handlePostRequest(req, res, async (body: any) => { + const { path: projectPath } = body || {}; + const basePath = projectPath || initialPath; + + try { + const { UnifiedVectorIndex, isUnifiedEmbedderAvailable } = await import('../unified-vector-index.js'); + + if (!isUnifiedEmbedderAvailable()) { + return { + error: 'Unified embedder is not available. Ensure Python venv and embedder script are set up.', + status: 503, + }; + } + + const index = new UnifiedVectorIndex(basePath); + const result = await index.reindexAll(); + + return { + success: result.success, + hnsw_count: result.hnsw_count, + elapsed_time: result.elapsed_time, + error: result.error, + }; + } catch (error: unknown) { + return { error: (error as Error).message, status: 500 }; + } + }); + return true; + } + + // ========================================================================= + // GET /api/unified-memory/recommendations/:id + // Query params: limit (optional, default 5) + // ========================================================================= + if (pathname.startsWith('/api/unified-memory/recommendations/') && req.method === 'GET') { + const memoryId = pathname.replace('/api/unified-memory/recommendations/', ''); + const projectPath = url.searchParams.get('path') || initialPath; + const limit = parseInt(url.searchParams.get('limit') || '5', 10); + + if (!memoryId) { + res.writeHead(400, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ error: 'Memory ID is required' })); + return true; + } + + try { + const { UnifiedMemoryService } = await import('../unified-memory-service.js'); + const service = new UnifiedMemoryService(projectPath); + const recommendations = await service.getRecommendations(memoryId, limit); + + res.writeHead(200, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ + success: true, + memory_id: memoryId, + total: recommendations.length, + recommendations, + })); + } catch (error: unknown) { + res.writeHead(500, { 'Content-Type': 'application/json' }); + res.end(JSON.stringify({ error: (error as Error).message })); + } + return true; + } + + return false; +} diff --git a/ccw/src/core/server.ts b/ccw/src/core/server.ts index f5107a3d..16b69e36 100644 --- a/ccw/src/core/server.ts +++ b/ccw/src/core/server.ts @@ -9,6 +9,7 @@ import { handleAuditRoutes } from './routes/audit-routes.js'; import { handleProviderRoutes } from './routes/provider-routes.js'; import { handleMemoryRoutes } from './routes/memory-routes.js'; import { handleCoreMemoryRoutes } from './routes/core-memory-routes.js'; +import { handleUnifiedMemoryRoutes } from './routes/unified-memory-routes.js'; import { handleMcpRoutes } from './routes/mcp-routes.js'; import { handleHooksRoutes } from './routes/hooks-routes.js'; import { handleUnsplashRoutes, handleBackgroundRoutes } from './routes/unsplash-routes.js'; @@ -37,6 +38,7 @@ import { handleDashboardRoutes } from './routes/dashboard-routes.js'; import { handleOrchestratorRoutes } from './routes/orchestrator-routes.js'; import { handleConfigRoutes } from './routes/config-routes.js'; import { handleTeamRoutes } from './routes/team-routes.js'; +import { handleNotificationRoutes } from './routes/notification-routes.js'; // Import WebSocket handling import { handleWebSocketUpgrade, broadcastToClients, extractSessionIdFromPath } from './websocket.js'; @@ -462,6 +464,11 @@ export async function startServer(options: ServerOptions = {}): Promise this.CONFIG_TTL) { + this.config = loadConfig(); + this.configLoadedAt = now; + } + return this.config; + } + + /** + * Force reload configuration + */ + reloadConfig(): void { + this.config = loadConfig(); + this.configLoadedAt = Date.now(); + } + + /** + * Check if notifications are enabled for a given event + */ + shouldNotify(eventType: string): boolean { + const config = this.getConfig(); + if (!config.enabled) return false; + + const enabledPlatforms = getEnabledPlatformsForEvent(config, eventType); + return enabledPlatforms.length > 0; + } + + /** + * Send notification to all configured platforms for an event + * Non-blocking: returns immediately, actual dispatch is async + */ + sendNotification( + eventType: string, + context: Omit + ): void { + const config = this.getConfig(); + + // Quick check before async dispatch + if (!config.enabled) return; + + const enabledPlatforms = getEnabledPlatformsForEvent(config, eventType); + if (enabledPlatforms.length === 0) return; + + const fullContext: NotificationContext = { + ...context, + eventType: eventType as NotificationContext['eventType'], + timestamp: new Date().toISOString(), + }; + + // Fire-and-forget dispatch + this.dispatchToPlatforms(enabledPlatforms, fullContext, config).catch((error) => { + // Silent failure - log only + console.error('[RemoteNotification] Dispatch failed:', error); + }); + } + + /** + * Send notification and wait for results (for testing) + */ + async sendNotificationAsync( + eventType: string, + context: Omit + ): Promise { + const config = this.getConfig(); + const startTime = Date.now(); + + if (!config.enabled) { + return { success: false, results: [], totalTime: 0 }; + } + + const enabledPlatforms = getEnabledPlatformsForEvent(config, eventType); + if (enabledPlatforms.length === 0) { + return { success: false, results: [], totalTime: Date.now() - startTime }; + } + + const fullContext: NotificationContext = { + ...context, + eventType: eventType as NotificationContext['eventType'], + timestamp: new Date().toISOString(), + }; + + const results = await this.dispatchToPlatforms(enabledPlatforms, fullContext, config); + + return { + success: results.some((r) => r.success), + results, + totalTime: Date.now() - startTime, + }; + } + + /** + * Dispatch to multiple platforms in parallel + */ + private async dispatchToPlatforms( + platforms: string[], + context: NotificationContext, + config: RemoteNotificationConfig + ): Promise { + const promises = platforms.map((platform) => + this.dispatchToPlatform(platform as NotificationPlatform, context, config) + ); + + const results = await Promise.allSettled(promises); + + return results.map((result, index) => { + if (result.status === 'fulfilled') { + return result.value; + } + return { + platform: platforms[index] as NotificationPlatform, + success: false, + error: result.reason?.message || 'Unknown error', + }; + }); + } + + /** + * Dispatch to a single platform + */ + private async dispatchToPlatform( + platform: NotificationPlatform, + context: NotificationContext, + config: RemoteNotificationConfig + ): Promise { + const startTime = Date.now(); + + try { + switch (platform) { + case 'discord': + return await this.sendDiscord(context, config.platforms.discord!, config.timeout); + case 'telegram': + return await this.sendTelegram(context, config.platforms.telegram!, config.timeout); + case 'webhook': + return await this.sendWebhook(context, config.platforms.webhook!, config.timeout); + default: + return { + platform, + success: false, + error: `Unknown platform: ${platform}`, + }; + } + } catch (error) { + return { + platform, + success: false, + error: error instanceof Error ? error.message : String(error), + responseTime: Date.now() - startTime, + }; + } + } + + /** + * Send Discord notification via webhook + */ + private async sendDiscord( + context: NotificationContext, + config: DiscordConfig, + timeout: number + ): Promise { + const startTime = Date.now(); + + if (!config.webhookUrl) { + return { platform: 'discord', success: false, error: 'Webhook URL not configured' }; + } + + const embed = this.buildDiscordEmbed(context); + const body = { + username: config.username || 'CCW Notification', + avatar_url: config.avatarUrl, + embeds: [embed], + }; + + try { + await this.httpRequest(config.webhookUrl, body, timeout); + return { + platform: 'discord', + success: true, + responseTime: Date.now() - startTime, + }; + } catch (error) { + return { + platform: 'discord', + success: false, + error: error instanceof Error ? error.message : String(error), + responseTime: Date.now() - startTime, + }; + } + } + + /** + * Build Discord embed from context + */ + private buildDiscordEmbed(context: NotificationContext): Record { + const eventEmoji: Record = { + 'ask-user-question': '❓', + 'session-start': '▶️', + 'session-end': '⏹️', + 'task-completed': '✅', + 'task-failed': '❌', + }; + + const eventColors: Record = { + 'ask-user-question': 0x3498db, // Blue + 'session-start': 0x2ecc71, // Green + 'session-end': 0x95a5a6, // Gray + 'task-completed': 0x27ae60, // Dark Green + 'task-failed': 0xe74c3c, // Red + }; + + const fields: Array<{ name: string; value: string; inline?: boolean }> = []; + + if (context.sessionId) { + fields.push({ name: 'Session', value: context.sessionId.slice(0, 16) + '...', inline: true }); + } + + if (context.questionText) { + const truncated = context.questionText.length > 200 + ? context.questionText.slice(0, 200) + '...' + : context.questionText; + fields.push({ name: 'Question', value: truncated, inline: false }); + } + + if (context.taskDescription) { + const truncated = context.taskDescription.length > 200 + ? context.taskDescription.slice(0, 200) + '...' + : context.taskDescription; + fields.push({ name: 'Task', value: truncated, inline: false }); + } + + if (context.errorMessage) { + const truncated = context.errorMessage.length > 200 + ? context.errorMessage.slice(0, 200) + '...' + : context.errorMessage; + fields.push({ name: 'Error', value: truncated, inline: false }); + } + + return { + title: `${eventEmoji[context.eventType] || '📢'} ${this.formatEventName(context.eventType)}`, + color: eventColors[context.eventType] || 0x9b59b6, + fields, + timestamp: context.timestamp, + footer: { text: 'CCW Remote Notification' }, + }; + } + + /** + * Send Telegram notification via Bot API + */ + private async sendTelegram( + context: NotificationContext, + config: TelegramConfig, + timeout: number + ): Promise { + const startTime = Date.now(); + + if (!config.botToken || !config.chatId) { + return { platform: 'telegram', success: false, error: 'Bot token or chat ID not configured' }; + } + + const text = this.buildTelegramMessage(context); + const url = `https://api.telegram.org/bot${config.botToken}/sendMessage`; + const body = { + chat_id: config.chatId, + text, + parse_mode: config.parseMode || 'HTML', + }; + + try { + await this.httpRequest(url, body, timeout); + return { + platform: 'telegram', + success: true, + responseTime: Date.now() - startTime, + }; + } catch (error) { + return { + platform: 'telegram', + success: false, + error: error instanceof Error ? error.message : String(error), + responseTime: Date.now() - startTime, + }; + } + } + + /** + * Build Telegram message from context + */ + private buildTelegramMessage(context: NotificationContext): string { + const eventEmoji: Record = { + 'ask-user-question': '❓', + 'session-start': '▶️', + 'session-end': '⏹️', + 'task-completed': '✅', + 'task-failed': '❌', + }; + + const lines: string[] = []; + lines.push(`${eventEmoji[context.eventType] || '📢'} ${this.formatEventName(context.eventType)}`); + lines.push(''); + + if (context.sessionId) { + lines.push(`Session: ${context.sessionId.slice(0, 16)}...`); + } + + if (context.questionText) { + const truncated = context.questionText.length > 300 + ? context.questionText.slice(0, 300) + '...' + : context.questionText; + lines.push(`Question: ${this.escapeHtml(truncated)}`); + } + + if (context.taskDescription) { + const truncated = context.taskDescription.length > 300 + ? context.taskDescription.slice(0, 300) + '...' + : context.taskDescription; + lines.push(`Task: ${this.escapeHtml(truncated)}`); + } + + if (context.errorMessage) { + const truncated = context.errorMessage.length > 300 + ? context.errorMessage.slice(0, 300) + '...' + : context.errorMessage; + lines.push(`Error: ${this.escapeHtml(truncated)}`); + } + + lines.push(''); + lines.push(`📅 ${new Date(context.timestamp).toLocaleString()}`); + + return lines.join('\n'); + } + + /** + * Send generic webhook notification + */ + private async sendWebhook( + context: NotificationContext, + config: WebhookConfig, + timeout: number + ): Promise { + const startTime = Date.now(); + + if (!config.url) { + return { platform: 'webhook', success: false, error: 'Webhook URL not configured' }; + } + + const body = { + event: context.eventType, + timestamp: context.timestamp, + sessionId: context.sessionId, + questionText: context.questionText, + taskDescription: context.taskDescription, + errorMessage: context.errorMessage, + metadata: context.metadata, + }; + + try { + await this.httpRequest(config.url, body, config.timeout || timeout, config.method, config.headers); + return { + platform: 'webhook', + success: true, + responseTime: Date.now() - startTime, + }; + } catch (error) { + return { + platform: 'webhook', + success: false, + error: error instanceof Error ? error.message : String(error), + responseTime: Date.now() - startTime, + }; + } + } + + /** + * Check if a URL is safe from SSRF attacks + * Blocks private IP ranges, loopback, and link-local addresses + */ + private isUrlSafe(urlString: string): { safe: boolean; error?: string } { + try { + const parsedUrl = new URL(urlString); + + // Only allow http and https protocols + if (!['http:', 'https:'].includes(parsedUrl.protocol)) { + return { safe: false, error: 'Only http and https protocols are allowed' }; + } + + const hostname = parsedUrl.hostname.toLowerCase(); + + // Block localhost variants + if (hostname === 'localhost' || hostname === 'localhost.localdomain' || hostname === '0.0.0.0') { + return { safe: false, error: 'Localhost addresses are not allowed' }; + } + + // Block IPv4 loopback (127.0.0.0/8) + if (/^127\.\d{1,3}\.\d{1,3}\.\d{1,3}$/.test(hostname)) { + return { safe: false, error: 'Loopback addresses are not allowed' }; + } + + // Block IPv4 private ranges + // 10.0.0.0/8 + if (/^10\.\d{1,3}\.\d{1,3}\.\d{1,3}$/.test(hostname)) { + return { safe: false, error: 'Private IP addresses are not allowed' }; + } + // 172.16.0.0/12 + if (/^172\.(1[6-9]|2\d|3[01])\.\d{1,3}\.\d{1,3}$/.test(hostname)) { + return { safe: false, error: 'Private IP addresses are not allowed' }; + } + // 192.168.0.0/16 + if (/^192\.168\.\d{1,3}\.\d{1,3}$/.test(hostname)) { + return { safe: false, error: 'Private IP addresses are not allowed' }; + } + + // Block link-local addresses (169.254.0.0/16) + if (/^169\.254\.\d{1,3}\.\d{1,3}$/.test(hostname)) { + return { safe: false, error: 'Link-local addresses are not allowed' }; + } + + // Block IPv6 loopback and private + if (hostname === '::1' || hostname.startsWith('fc') || hostname.startsWith('fd') || hostname === '::') { + return { safe: false, error: 'IPv6 private/loopback addresses are not allowed' }; + } + + // Block hostnames that look like IP addresses in various formats + // (e.g., 0x7f.0.0.1, 2130706433, etc.) + if (/^0x[0-9a-f]+/i.test(hostname) || /^\d{8,}$/.test(hostname)) { + return { safe: false, error: 'Suspicious hostname format' }; + } + + // Block cloud metadata endpoints + if (hostname === '169.254.169.254' || hostname === 'metadata.google.internal' || hostname === 'metadata.azure.internal') { + return { safe: false, error: 'Cloud metadata endpoints are not allowed' }; + } + + return { safe: true }; + } catch (error) { + return { safe: false, error: 'Invalid URL format' }; + } + } + + /** + * Generic HTTP request helper + */ + private httpRequest( + url: string, + body: unknown, + timeout: number, + method: 'POST' | 'PUT' = 'POST', + headers: Record = {} + ): Promise { + return new Promise((resolve, reject) => { + // SSRF protection: validate URL before making request + const urlSafety = this.isUrlSafe(url); + if (!urlSafety.safe) { + reject(new Error(`URL validation failed: ${urlSafety.error}`)); + return; + } + + const parsedUrl = new URL(url); + const isHttps = parsedUrl.protocol === 'https:'; + const client = isHttps ? https : http; + + const requestOptions: http.RequestOptions = { + hostname: parsedUrl.hostname, + port: parsedUrl.port || (isHttps ? 443 : 80), + path: parsedUrl.pathname + parsedUrl.search, + method, + headers: { + 'Content-Type': 'application/json', + ...headers, + }, + timeout, + }; + + const req = client.request(requestOptions, (res) => { + let data = ''; + res.on('data', (chunk) => { data += chunk; }); + res.on('end', () => { + if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) { + resolve(); + } else { + reject(new Error(`HTTP ${res.statusCode}: ${data.slice(0, 200)}`)); + } + }); + }); + + req.on('error', reject); + req.on('timeout', () => { + req.destroy(); + reject(new Error('Request timeout')); + }); + + req.write(JSON.stringify(body)); + req.end(); + }); + } + + /** + * Format event name for display + */ + private formatEventName(eventType: string): string { + return eventType + .split('-') + .map((word) => word.charAt(0).toUpperCase() + word.slice(1)) + .join(' '); + } + + /** + * Escape HTML for Telegram messages + */ + private escapeHtml(text: string): string { + return text + .replace(/&/g, '&') + .replace(//g, '>'); + } + + /** + * Test a platform configuration + */ + async testPlatform( + platform: NotificationPlatform, + config: DiscordConfig | TelegramConfig | WebhookConfig + ): Promise<{ success: boolean; error?: string; responseTime?: number }> { + const testContext: NotificationContext = { + eventType: 'task-completed', + sessionId: 'test-session', + taskDescription: 'This is a test notification from CCW', + timestamp: new Date().toISOString(), + }; + + const startTime = Date.now(); + + try { + switch (platform) { + case 'discord': + return await this.sendDiscord(testContext, config as DiscordConfig, 10000); + case 'telegram': + return await this.sendTelegram(testContext, config as TelegramConfig, 10000); + case 'webhook': + return await this.sendWebhook(testContext, config as WebhookConfig, 10000); + default: + return { success: false, error: `Unknown platform: ${platform}` }; + } + } catch (error) { + return { + success: false, + error: error instanceof Error ? error.message : String(error), + responseTime: Date.now() - startTime, + }; + } + } +} + +// Singleton instance +export const remoteNotificationService = new RemoteNotificationService(); diff --git a/ccw/src/core/session-clustering-service.ts b/ccw/src/core/session-clustering-service.ts index 16ff6a75..869329dd 100644 --- a/ccw/src/core/session-clustering-service.ts +++ b/ccw/src/core/session-clustering-service.ts @@ -5,6 +5,7 @@ import { CoreMemoryStore, SessionCluster, ClusterMember, SessionMetadataCache } from './core-memory-store.js'; import { CliHistoryStore } from '../tools/cli-history-store.js'; +import { UnifiedVectorIndex, isUnifiedEmbedderAvailable } from './unified-vector-index.js'; import { StoragePaths } from '../config/storage-paths.js'; import { readdirSync, readFileSync, statSync, existsSync } from 'fs'; import { join } from 'path'; @@ -21,6 +22,10 @@ const WEIGHTS = { // Clustering threshold (0.4 = moderate similarity required) const CLUSTER_THRESHOLD = 0.4; +// Incremental clustering frequency control +const MIN_CLUSTER_INTERVAL_HOURS = 6; +const MIN_NEW_SESSIONS_FOR_CLUSTER = 5; + export interface ClusteringOptions { scope?: 'all' | 'recent' | 'unclustered'; timeRange?: { start: string; end: string }; @@ -33,15 +38,29 @@ export interface ClusteringResult { sessionsClustered: number; } +export interface IncrementalClusterResult { + sessionId: string; + clusterId: string | null; + action: 'joined_existing' | 'created_new' | 'skipped'; +} + export class SessionClusteringService { private coreMemoryStore: CoreMemoryStore; private cliHistoryStore: CliHistoryStore; private projectPath: string; + private vectorIndex: UnifiedVectorIndex | null = null; + /** Cache: sessionId -> list of nearby session source_ids from HNSW search */ + private vectorNeighborCache: Map> = new Map(); constructor(projectPath: string) { this.projectPath = projectPath; this.coreMemoryStore = new CoreMemoryStore(projectPath); this.cliHistoryStore = new CliHistoryStore(projectPath); + + // Initialize vector index if available + if (isUnifiedEmbedderAvailable()) { + this.vectorIndex = new UnifiedVectorIndex(projectPath); + } } /** @@ -331,14 +350,36 @@ export class SessionClusteringService { } /** - * Calculate vector similarity using pre-computed embeddings from memory_chunks - * Returns average cosine similarity of chunk embeddings + * Calculate vector similarity using HNSW index when available. + * Falls back to direct cosine similarity on pre-computed embeddings from memory_chunks. + * + * HNSW path: Uses cached neighbor lookup from vectorNeighborCache (populated by + * preloadVectorNeighbors). This replaces the O(N) full-table scan with O(1) cache lookup. + * + * Fallback path: Averages chunk embeddings from SQLite and computes cosine similarity directly. */ private calculateVectorSimilarity(s1: SessionMetadataCache, s2: SessionMetadataCache): number { + // HNSW path: check if we have pre-loaded neighbor scores + const neighbors1 = this.vectorNeighborCache.get(s1.session_id); + if (neighbors1) { + const score = neighbors1.get(s2.session_id); + if (score !== undefined) return score; + // s2 is not a neighbor of s1 via HNSW - low similarity + return 0; + } + + // Also check reverse direction + const neighbors2 = this.vectorNeighborCache.get(s2.session_id); + if (neighbors2) { + const score = neighbors2.get(s1.session_id); + if (score !== undefined) return score; + return 0; + } + + // Fallback: direct cosine similarity on chunk embeddings const embedding1 = this.getSessionEmbedding(s1.session_id); const embedding2 = this.getSessionEmbedding(s2.session_id); - // Graceful fallback if no embeddings available if (!embedding1 || !embedding2) { return 0; } @@ -346,6 +387,55 @@ export class SessionClusteringService { return this.cosineSimilarity(embedding1, embedding2); } + /** + * Preload vector neighbors for a set of sessions using HNSW search. + * For each session, gets its average embedding and searches for nearby chunks, + * then aggregates scores by source_id to get session-level similarity scores. + * + * This replaces the O(N^2) full-table scan with O(N * topK) HNSW lookups. + */ + async preloadVectorNeighbors(sessionIds: string[], topK: number = 20): Promise { + if (!this.vectorIndex) return; + + this.vectorNeighborCache.clear(); + + for (const sessionId of sessionIds) { + const avgEmbedding = this.getSessionEmbedding(sessionId); + if (!avgEmbedding) continue; + + try { + const result = await this.vectorIndex.searchByVector(avgEmbedding, { + topK, + minScore: 0.1, + }); + + if (!result.success || !result.matches.length) continue; + + // Aggregate scores by source_id (session-level similarity) + const neighborScores = new Map(); + for (const match of result.matches) { + const sourceId = match.source_id; + if (sourceId === sessionId) continue; // skip self + if (!neighborScores.has(sourceId)) { + neighborScores.set(sourceId, []); + } + neighborScores.get(sourceId)!.push(match.score); + } + + // Average scores per neighbor session + const avgScores = new Map(); + for (const [neighborId, scores] of neighborScores) { + const avg = scores.reduce((sum, s) => sum + s, 0) / scores.length; + avgScores.set(neighborId, avg); + } + + this.vectorNeighborCache.set(sessionId, avgScores); + } catch { + // HNSW search failed for this session, skip + } + } + } + /** * Get session embedding by averaging all chunk embeddings */ @@ -494,11 +584,16 @@ export class SessionClusteringService { this.coreMemoryStore.upsertSessionMetadata(session); } - // 4. Calculate relevance matrix - const n = sessions.length; - const relevanceMatrix: number[][] = Array(n).fill(0).map(() => Array(n).fill(0)); + // 4. Preload HNSW vector neighbors for efficient similarity calculation + if (this.vectorIndex) { + const sessionIds = sessions.map(s => s.session_id); + await this.preloadVectorNeighbors(sessionIds); + console.log(`[Clustering] Preloaded HNSW vector neighbors for ${sessionIds.length} sessions`); + } - let maxScore = 0; + // 5. Calculate relevance matrix + const n = sessions.length; + const relevanceMatrix: number[][] = Array(n).fill(0).map(() => Array(n).fill(0)); let maxScore = 0; let avgScore = 0; let pairCount = 0; @@ -519,7 +614,7 @@ export class SessionClusteringService { console.log(`[Clustering] Relevance stats: max=${maxScore.toFixed(3)}, avg=${avgScore.toFixed(3)}, pairs=${pairCount}, threshold=${CLUSTER_THRESHOLD}`); } - // 5. Agglomerative clustering + // 6. Agglomerative clustering const minClusterSize = options?.minClusterSize || 2; // Early return if not enough sessions @@ -531,7 +626,7 @@ export class SessionClusteringService { const newPotentialClusters = this.agglomerativeClustering(sessions, relevanceMatrix, CLUSTER_THRESHOLD); console.log(`[Clustering] Generated ${newPotentialClusters.length} potential clusters`); - // 6. Process clusters: create new or merge with existing + // 7. Process clusters: create new or merge with existing let clustersCreated = 0; let clustersMerged = 0; let sessionsClustered = 0; @@ -716,6 +811,145 @@ export class SessionClusteringService { return { merged, deleted, remaining }; } + /** + * Check whether clustering should run based on frequency control. + * Conditions: last clustering > MIN_CLUSTER_INTERVAL_HOURS ago AND + * new unclustered sessions >= MIN_NEW_SESSIONS_FOR_CLUSTER. + * + * Stores last_cluster_time in session_clusters metadata. + */ + async shouldRunClustering(): Promise { + // Check last cluster time from cluster metadata + const clusters = this.coreMemoryStore.listClusters('active'); + let lastClusterTime = 0; + + for (const cluster of clusters) { + const createdMs = new Date(cluster.created_at).getTime(); + if (createdMs > lastClusterTime) { + lastClusterTime = createdMs; + } + const updatedMs = new Date(cluster.updated_at).getTime(); + if (updatedMs > lastClusterTime) { + lastClusterTime = updatedMs; + } + } + + // Check time interval + const now = Date.now(); + const hoursSinceLastCluster = (now - lastClusterTime) / (1000 * 60 * 60); + if (lastClusterTime > 0 && hoursSinceLastCluster < MIN_CLUSTER_INTERVAL_HOURS) { + return false; + } + + // Check number of unclustered sessions + const allSessions = await this.collectSessions({ scope: 'recent' }); + const unclusteredCount = allSessions.filter(s => { + const sessionClusters = this.coreMemoryStore.getSessionClusters(s.session_id); + return sessionClusters.length === 0; + }).length; + + return unclusteredCount >= MIN_NEW_SESSIONS_FOR_CLUSTER; + } + + /** + * Incremental clustering: process only a single new session. + * + * Computes the new session's similarity against existing cluster centroids + * using HNSW search. If similarity >= CLUSTER_THRESHOLD, joins the best + * matching cluster. Otherwise, remains unclustered until enough sessions + * accumulate for a new cluster. + * + * @param sessionId - The session to incrementally cluster + * @returns Result indicating what action was taken + */ + async incrementalCluster(sessionId: string): Promise { + // Get or create session metadata + let sessionMeta = this.coreMemoryStore.getSessionMetadata(sessionId); + if (!sessionMeta) { + // Try to build metadata from available sources + const allSessions = await this.collectSessions({ scope: 'all' }); + sessionMeta = allSessions.find(s => s.session_id === sessionId) || null; + + if (!sessionMeta) { + return { sessionId, clusterId: null, action: 'skipped' }; + } + this.coreMemoryStore.upsertSessionMetadata(sessionMeta); + } + + // Check if already clustered + const existingClusters = this.coreMemoryStore.getSessionClusters(sessionId); + if (existingClusters.length > 0) { + return { sessionId, clusterId: existingClusters[0].id, action: 'skipped' }; + } + + // Get all active clusters and their representative sessions + const activeClusters = this.coreMemoryStore.listClusters('active'); + + if (activeClusters.length === 0) { + return { sessionId, clusterId: null, action: 'skipped' }; + } + + // Use HNSW to find nearest neighbors for the new session + if (this.vectorIndex) { + await this.preloadVectorNeighbors([sessionId]); + } + + // Calculate similarity against each cluster's member sessions + let bestCluster: SessionCluster | null = null; + let bestScore = 0; + + for (const cluster of activeClusters) { + const members = this.coreMemoryStore.getClusterMembers(cluster.id); + if (members.length === 0) continue; + + // Calculate average relevance against cluster members (sample up to 5) + const sampleMembers = members.slice(0, 5); + let totalScore = 0; + let validCount = 0; + + for (const member of sampleMembers) { + const memberMeta = this.coreMemoryStore.getSessionMetadata(member.session_id); + if (!memberMeta) continue; + + const score = this.calculateRelevance(sessionMeta, memberMeta); + totalScore += score; + validCount++; + } + + if (validCount === 0) continue; + + const avgScore = totalScore / validCount; + if (avgScore > bestScore) { + bestScore = avgScore; + bestCluster = cluster; + } + } + + // Join best cluster if above threshold + if (bestCluster && bestScore >= CLUSTER_THRESHOLD) { + const existingMembers = this.coreMemoryStore.getClusterMembers(bestCluster.id); + + this.coreMemoryStore.addClusterMember({ + cluster_id: bestCluster.id, + session_id: sessionId, + session_type: sessionMeta.session_type as 'core_memory' | 'workflow' | 'cli_history' | 'native', + sequence_order: existingMembers.length + 1, + relevance_score: bestScore, + }); + + // Update cluster description + this.coreMemoryStore.updateCluster(bestCluster.id, { + description: `Auto-generated cluster with ${existingMembers.length + 1} sessions` + }); + + console.log(`[Clustering] Session ${sessionId} joined cluster '${bestCluster.name}' (score: ${bestScore.toFixed(3)})`); + return { sessionId, clusterId: bestCluster.id, action: 'joined_existing' }; + } + + // Not similar enough to any existing cluster + return { sessionId, clusterId: null, action: 'skipped' }; + } + /** * Agglomerative clustering algorithm * Returns array of clusters (each cluster is array of sessions) diff --git a/ccw/src/core/unified-context-builder.ts b/ccw/src/core/unified-context-builder.ts new file mode 100644 index 00000000..ea111d07 --- /dev/null +++ b/ccw/src/core/unified-context-builder.ts @@ -0,0 +1,410 @@ +/** + * UnifiedContextBuilder - Assembles context for Claude Code hooks + * + * Provides componentized context assembly for: + * - session-start: MEMORY.md summary + cluster overview + hot entities + solidified patterns + * - per-prompt: vector search + intent matching across all categories + * - session-end: incremental embedding + clustering + heat score update tasks + * + * Character limits: + * - session-start: <= 1000 chars + * - per-prompt: <= 500 chars + */ + +import { existsSync, readdirSync } from 'fs'; +import { join, basename } from 'path'; +import { getProjectPaths } from '../config/storage-paths.js'; +import { getMemoryMdContent } from './memory-consolidation-pipeline.js'; +import { getMemoryStore } from './memory-store.js'; +import type { HotEntity } from './memory-store.js'; +import { + UnifiedVectorIndex, + isUnifiedEmbedderAvailable, +} from './unified-vector-index.js'; +import type { VectorSearchMatch } from './unified-vector-index.js'; +import { SessionClusteringService } from './session-clustering-service.js'; + +// ============================================================================= +// Constants +// ============================================================================= + +/** Maximum character count for session-start context */ +const SESSION_START_LIMIT = 1000; + +/** Maximum character count for per-prompt context */ +const PER_PROMPT_LIMIT = 500; + +/** Maximum characters for the MEMORY.md summary component */ +const MEMORY_SUMMARY_LIMIT = 500; + +/** Number of top clusters to show in overview */ +const TOP_CLUSTERS = 3; + +/** Number of top hot entities to show */ +const TOP_HOT_ENTITIES = 5; + +/** Days to look back for hot entities */ +const HOT_ENTITY_DAYS = 7; + +/** Number of vector search results for per-prompt */ +const VECTOR_TOP_K = 8; + +/** Minimum vector similarity score */ +const VECTOR_MIN_SCORE = 0.3; + +// ============================================================================= +// Types +// ============================================================================= + +/** A task to be executed asynchronously at session-end */ +export interface SessionEndTask { + /** Descriptive name of the task */ + name: string; + /** Async function to execute */ + execute: () => Promise; +} + +// ============================================================================= +// UnifiedContextBuilder +// ============================================================================= + +export class UnifiedContextBuilder { + private projectPath: string; + private paths: ReturnType; + + constructor(projectPath: string) { + this.projectPath = projectPath; + this.paths = getProjectPaths(projectPath); + } + + // --------------------------------------------------------------------------- + // Public: session-start context + // --------------------------------------------------------------------------- + + /** + * Build context for session-start hook injection. + * + * Components (assembled in order, truncated to <= 1000 chars total): + * 1. MEMORY.md summary (up to 500 chars) + * 2. Cluster overview (top 3 active clusters) + * 3. Hot entities (top 5 within last 7 days) + * 4. Solidified patterns (skills/*.md file list) + */ + async buildSessionStartContext(): Promise { + const sections: string[] = []; + + // Component 1: MEMORY.md summary + const memorySummary = this.buildMemorySummary(); + if (memorySummary) { + sections.push(memorySummary); + } + + // Component 2: Cluster overview + const clusterOverview = await this.buildClusterOverview(); + if (clusterOverview) { + sections.push(clusterOverview); + } + + // Component 3: Hot entities + const hotEntities = this.buildHotEntities(); + if (hotEntities) { + sections.push(hotEntities); + } + + // Component 4: Solidified patterns + const patterns = this.buildSolidifiedPatterns(); + if (patterns) { + sections.push(patterns); + } + + if (sections.length === 0) { + return ''; + } + + // Assemble and truncate + let content = '\n' + sections.join('\n') + '\n'; + + if (content.length > SESSION_START_LIMIT) { + content = content.substring(0, SESSION_START_LIMIT - 20) + '\n'; + } + + return content; + } + + // --------------------------------------------------------------------------- + // Public: per-prompt context + // --------------------------------------------------------------------------- + + /** + * Build context for per-prompt hook injection. + * + * Uses vector search across all categories to find relevant memories + * matching the current prompt. Results are ranked by similarity score. + * + * @param prompt - Current user prompt text + * @returns Context string (<= 500 chars) or empty string + */ + async buildPromptContext(prompt: string): Promise { + if (!prompt || !prompt.trim()) { + return ''; + } + + if (!isUnifiedEmbedderAvailable()) { + return ''; + } + + try { + const vectorIndex = new UnifiedVectorIndex(this.projectPath); + const result = await vectorIndex.search(prompt, { + topK: VECTOR_TOP_K, + minScore: VECTOR_MIN_SCORE, + }); + + if (!result.success || result.matches.length === 0) { + return ''; + } + + return this.formatPromptMatches(result.matches); + } catch { + return ''; + } + } + + // --------------------------------------------------------------------------- + // Public: session-end tasks + // --------------------------------------------------------------------------- + + /** + * Build a list of async tasks to run at session-end. + * + * Tasks: + * 1. Incremental vector embedding (index new/updated content) + * 2. Incremental clustering (cluster unclustered sessions) + * 3. Heat score updates (recalculate entity heat scores) + * + * @param sessionId - Current session ID for context + * @returns Array of tasks with name and execute function + */ + buildSessionEndTasks(sessionId: string): SessionEndTask[] { + const tasks: SessionEndTask[] = []; + + // Task 1: Incremental vector embedding + if (isUnifiedEmbedderAvailable()) { + tasks.push({ + name: 'incremental-embedding', + execute: async () => { + try { + const vectorIndex = new UnifiedVectorIndex(this.projectPath); + // Re-index the MEMORY.md content if available + const memoryContent = getMemoryMdContent(this.projectPath); + if (memoryContent) { + await vectorIndex.indexContent(memoryContent, { + source_id: 'MEMORY_MD', + source_type: 'core_memory', + category: 'core_memory', + }); + } + } catch (err) { + // Log but don't throw - session-end tasks are best-effort + if (process.env.DEBUG) { + console.error('[UnifiedContextBuilder] Embedding task failed:', (err as Error).message); + } + } + }, + }); + } + + // Task 2: Incremental clustering + tasks.push({ + name: 'incremental-clustering', + execute: async () => { + try { + const clusteringService = new SessionClusteringService(this.projectPath); + await clusteringService.autocluster({ scope: 'unclustered' }); + } catch (err) { + if (process.env.DEBUG) { + console.error('[UnifiedContextBuilder] Clustering task failed:', (err as Error).message); + } + } + }, + }); + + // Task 3: Heat score updates + tasks.push({ + name: 'heat-score-update', + execute: async () => { + try { + const memoryStore = getMemoryStore(this.projectPath); + const hotEntities = memoryStore.getHotEntities(50); + for (const entity of hotEntities) { + if (entity.id != null) { + memoryStore.calculateHeatScore(entity.id); + } + } + } catch (err) { + if (process.env.DEBUG) { + console.error('[UnifiedContextBuilder] Heat score update failed:', (err as Error).message); + } + } + }, + }); + + return tasks; + } + + // --------------------------------------------------------------------------- + // Private: Component builders + // --------------------------------------------------------------------------- + + /** + * Build MEMORY.md summary component. + * Reads MEMORY.md and returns first MEMORY_SUMMARY_LIMIT characters. + */ + private buildMemorySummary(): string { + const content = getMemoryMdContent(this.projectPath); + if (!content) { + return ''; + } + + let summary = content.trim(); + if (summary.length > MEMORY_SUMMARY_LIMIT) { + // Truncate at a newline boundary if possible + const truncated = summary.substring(0, MEMORY_SUMMARY_LIMIT); + const lastNewline = truncated.lastIndexOf('\n'); + summary = lastNewline > MEMORY_SUMMARY_LIMIT * 0.6 + ? truncated.substring(0, lastNewline) + '...' + : truncated + '...'; + } + + return `## Memory Summary\n${summary}\n`; + } + + /** + * Build cluster overview component. + * Shows top N active clusters from the clustering service. + */ + private async buildClusterOverview(): Promise { + try { + const { CoreMemoryStore } = await import('./core-memory-store.js'); + const store = new CoreMemoryStore(this.projectPath); + const clusters = store.listClusters('active'); + + if (clusters.length === 0) { + return ''; + } + + // Sort by most recent activity + const sorted = clusters + .map(c => { + const members = store.getClusterMembers(c.id); + return { cluster: c, memberCount: members.length }; + }) + .sort((a, b) => b.memberCount - a.memberCount) + .slice(0, TOP_CLUSTERS); + + let output = '## Active Clusters\n'; + for (const { cluster, memberCount } of sorted) { + const intent = cluster.intent ? ` - ${cluster.intent}` : ''; + output += `- **${cluster.name}** (${memberCount})${intent}\n`; + } + + return output; + } catch { + return ''; + } + } + + /** + * Build hot entities component. + * Shows top N entities by heat_score that were active within last 7 days. + */ + private buildHotEntities(): string { + try { + const memoryStore = getMemoryStore(this.projectPath); + const allHot = memoryStore.getHotEntities(TOP_HOT_ENTITIES * 3); + + if (allHot.length === 0) { + return ''; + } + + // Filter to entities seen within the last HOT_ENTITY_DAYS days + const cutoff = new Date(); + cutoff.setDate(cutoff.getDate() - HOT_ENTITY_DAYS); + const cutoffStr = cutoff.toISOString(); + + const recentHot = allHot + .filter(e => (e.last_seen_at || '') >= cutoffStr) + .slice(0, TOP_HOT_ENTITIES); + + if (recentHot.length === 0) { + return ''; + } + + let output = '## Hot Entities (7d)\n'; + for (const entity of recentHot) { + const heat = Math.round(entity.stats.heat_score); + output += `- ${entity.type}:${entity.value} (heat:${heat})\n`; + } + + return output; + } catch { + return ''; + } + } + + /** + * Build solidified patterns component. + * Scans skills/*.md files and lists their names. + */ + private buildSolidifiedPatterns(): string { + try { + const skillsDir = this.paths.memoryV2.skills; + if (!existsSync(skillsDir)) { + return ''; + } + + const files = readdirSync(skillsDir).filter(f => f.endsWith('.md')); + if (files.length === 0) { + return ''; + } + + let output = '## Patterns\n'; + for (const file of files.slice(0, 5)) { + const name = basename(file, '.md'); + output += `- ${name}\n`; + } + + return output; + } catch { + return ''; + } + } + + // --------------------------------------------------------------------------- + // Private: Formatting helpers + // --------------------------------------------------------------------------- + + /** + * Format vector search matches for per-prompt context. + * Builds a compact Markdown snippet within PER_PROMPT_LIMIT chars. + */ + private formatPromptMatches(matches: VectorSearchMatch[]): string { + let output = '\n'; + + for (const match of matches) { + const score = Math.round(match.score * 100); + const snippet = match.content.substring(0, 80).replace(/\n/g, ' ').trim(); + const line = `- [${match.category}] ${snippet} (${score}%)\n`; + + // Check if adding this line would exceed limit + if (output.length + line.length + 25 > PER_PROMPT_LIMIT) { + break; + } + output += line; + } + + output += ''; + + return output; + } +} diff --git a/ccw/src/core/unified-memory-service.ts b/ccw/src/core/unified-memory-service.ts new file mode 100644 index 00000000..415188d1 --- /dev/null +++ b/ccw/src/core/unified-memory-service.ts @@ -0,0 +1,488 @@ +/** + * Unified Memory Service - Cross-store search with RRF fusion + * + * Provides a single search() interface that combines: + * - Vector search (HNSW via UnifiedVectorIndex) + * - Full-text search (FTS5 via MemoryStore.searchPrompts) + * - Heat-based scoring (entity heat from MemoryStore) + * + * Fusion: Reciprocal Rank Fusion (RRF) + * score = sum(1 / (k + rank_i) * weight_i) + * k = 60, weights = { vector: 0.6, fts: 0.3, heat: 0.1 } + */ + +import { UnifiedVectorIndex, isUnifiedEmbedderAvailable } from './unified-vector-index.js'; +import type { + VectorCategory, + VectorSearchMatch, + VectorIndexStatus, +} from './unified-vector-index.js'; +import { CoreMemoryStore, getCoreMemoryStore } from './core-memory-store.js'; +import type { CoreMemory } from './core-memory-store.js'; +import { MemoryStore, getMemoryStore } from './memory-store.js'; +import type { PromptHistory, HotEntity } from './memory-store.js'; + +// ============================================================================= +// Types +// ============================================================================= + +/** Options for unified search */ +export interface UnifiedSearchOptions { + /** Maximum number of results to return (default: 20) */ + limit?: number; + /** Minimum relevance score threshold (default: 0.0) */ + minScore?: number; + /** Filter by category */ + category?: VectorCategory; + /** Vector search top-k (default: 30, fetched internally for fusion) */ + vectorTopK?: number; + /** FTS search limit (default: 30, fetched internally for fusion) */ + ftsLimit?: number; +} + +/** A unified search result item */ +export interface UnifiedSearchResult { + /** Unique identifier for the source item */ + source_id: string; + /** Source type: core_memory, cli_history, workflow, entity, pattern */ + source_type: string; + /** Fused relevance score (0..1 range, higher is better) */ + score: number; + /** Text content (snippet or full) */ + content: string; + /** Category of the result */ + category: string; + /** Which ranking sources contributed to this result */ + rank_sources: { + vector_rank?: number; + vector_score?: number; + fts_rank?: number; + heat_score?: number; + }; +} + +/** Aggregated statistics from all stores + vector index */ +export interface UnifiedMemoryStats { + core_memories: { + total: number; + archived: number; + }; + stage1_outputs: number; + entities: number; + prompts: number; + conversations: number; + vector_index: { + available: boolean; + total_chunks: number; + hnsw_available: boolean; + hnsw_count: number; + dimension: number; + categories?: Record; + }; +} + +/** KNN recommendation result */ +export interface RecommendationResult { + source_id: string; + source_type: string; + score: number; + content: string; + category: string; +} + +// ============================================================================= +// RRF Constants +// ============================================================================= + +/** RRF smoothing constant (standard value from the original RRF paper) */ +const RRF_K = 60; + +/** Fusion weights */ +const WEIGHT_VECTOR = 0.6; +const WEIGHT_FTS = 0.3; +const WEIGHT_HEAT = 0.1; + +// ============================================================================= +// UnifiedMemoryService +// ============================================================================= + +/** + * Unified Memory Service providing cross-store search and recommendations. + * + * Combines vector similarity, full-text search, and entity heat scores + * using Reciprocal Rank Fusion (RRF) for result ranking. + */ +export class UnifiedMemoryService { + private projectPath: string; + private vectorIndex: UnifiedVectorIndex | null = null; + private coreMemoryStore: CoreMemoryStore; + private memoryStore: MemoryStore; + + constructor(projectPath: string) { + this.projectPath = projectPath; + this.coreMemoryStore = getCoreMemoryStore(projectPath); + this.memoryStore = getMemoryStore(projectPath); + + if (isUnifiedEmbedderAvailable()) { + this.vectorIndex = new UnifiedVectorIndex(projectPath); + } + } + + // ========================================================================== + // Search + // ========================================================================== + + /** + * Unified search across all memory stores. + * + * Pipeline: + * 1. Vector search via UnifiedVectorIndex (semantic similarity) + * 2. FTS5 search via MemoryStore.searchPrompts (keyword matching) + * 3. Heat boost via entity heat scores + * 4. RRF fusion to combine ranked lists + * + * @param query - Natural language search query + * @param options - Search options + * @returns Fused search results sorted by relevance + */ + async search( + query: string, + options: UnifiedSearchOptions = {} + ): Promise { + const { + limit = 20, + minScore = 0.0, + category, + vectorTopK = 30, + ftsLimit = 30, + } = options; + + // Run vector search and FTS search in parallel + const [vectorResults, ftsResults, hotEntities] = await Promise.all([ + this.runVectorSearch(query, vectorTopK, category), + this.runFtsSearch(query, ftsLimit), + this.getHeatScores(), + ]); + + // Build heat score lookup + const heatMap = new Map(); + for (const entity of hotEntities) { + // Use normalized_value as key for heat lookup + heatMap.set(entity.normalized_value, entity.stats.heat_score); + } + + // Collect all unique source_ids from both result sets + const allSourceIds = new Set(); + const vectorRankMap = new Map(); + const ftsRankMap = new Map(); + + // Build vector rank map + for (let i = 0; i < vectorResults.length; i++) { + const match = vectorResults[i]; + const id = match.source_id; + allSourceIds.add(id); + vectorRankMap.set(id, { rank: i + 1, score: match.score, match }); + } + + // Build FTS rank map + for (let i = 0; i < ftsResults.length; i++) { + const item = ftsResults[i]; + const id = item.session_id; + allSourceIds.add(id); + ftsRankMap.set(id, { rank: i + 1, item }); + } + + // Calculate RRF score for each unique source_id + const results: UnifiedSearchResult[] = []; + + for (const sourceId of allSourceIds) { + const vectorEntry = vectorRankMap.get(sourceId); + const ftsEntry = ftsRankMap.get(sourceId); + + // RRF: score = sum(weight_i / (k + rank_i)) + let rrfScore = 0; + const rankSources: UnifiedSearchResult['rank_sources'] = {}; + + // Vector component + if (vectorEntry) { + rrfScore += WEIGHT_VECTOR / (RRF_K + vectorEntry.rank); + rankSources.vector_rank = vectorEntry.rank; + rankSources.vector_score = vectorEntry.score; + } + + // FTS component + if (ftsEntry) { + rrfScore += WEIGHT_FTS / (RRF_K + ftsEntry.rank); + rankSources.fts_rank = ftsEntry.rank; + } + + // Heat component (boost based on entity heat) + const heatScore = this.lookupHeatScore(sourceId, heatMap); + if (heatScore > 0) { + // Normalize heat score to a rank-like value (1 = hottest) + // Use inverse: higher heat = lower rank number = higher contribution + const heatRank = Math.max(1, Math.ceil(100 / (1 + heatScore))); + rrfScore += WEIGHT_HEAT / (RRF_K + heatRank); + rankSources.heat_score = heatScore; + } + + if (rrfScore < minScore) continue; + + // Build result entry + let content = ''; + let sourceType = ''; + let resultCategory = ''; + + if (vectorEntry) { + content = vectorEntry.match.content; + sourceType = vectorEntry.match.source_type; + resultCategory = vectorEntry.match.category; + } else if (ftsEntry) { + content = ftsEntry.item.prompt_text || ftsEntry.item.context_summary || ''; + sourceType = 'cli_history'; + resultCategory = 'cli_history'; + } + + results.push({ + source_id: sourceId, + source_type: sourceType, + score: rrfScore, + content, + category: resultCategory, + rank_sources: rankSources, + }); + } + + // Sort by RRF score descending, take top `limit` + results.sort((a, b) => b.score - a.score); + return results.slice(0, limit); + } + + // ========================================================================== + // Recommendations + // ========================================================================== + + /** + * Get recommendations based on a memory's vector neighbors (KNN). + * + * Fetches the content of the given memory, then runs a vector search + * to find similar content across all stores. + * + * @param memoryId - Core memory ID (CMEM-*) + * @param limit - Number of recommendations (default: 5) + * @returns Recommended items sorted by similarity + */ + async getRecommendations( + memoryId: string, + limit: number = 5 + ): Promise { + // Get the memory content + const memory = this.coreMemoryStore.getMemory(memoryId); + if (!memory) { + return []; + } + + if (!this.vectorIndex) { + return []; + } + + // Use memory content as query for KNN search + // Request extra results so we can filter out self + const searchResult = await this.vectorIndex.search(memory.content, { + topK: limit + 5, + minScore: 0.3, + }); + + if (!searchResult.success) { + return []; + } + + // Filter out self and map to recommendations + const recommendations: RecommendationResult[] = []; + for (const match of searchResult.matches) { + // Skip the source memory itself + if (match.source_id === memoryId) continue; + + recommendations.push({ + source_id: match.source_id, + source_type: match.source_type, + score: match.score, + content: match.content, + category: match.category, + }); + + if (recommendations.length >= limit) break; + } + + return recommendations; + } + + // ========================================================================== + // Statistics + // ========================================================================== + + /** + * Get aggregated statistics from all stores and the vector index. + * + * @returns Unified stats across core memories, V2 outputs, entities, prompts, and vectors + */ + async getStats(): Promise { + // Get core memory stats + const allMemories = this.coreMemoryStore.getMemories({ limit: 100000 }); + const archivedMemories = allMemories.filter(m => m.archived); + const stage1Count = this.coreMemoryStore.countStage1Outputs(); + + // Get memory store stats (entities, prompts, conversations) + const db = (this.memoryStore as any).db; + let entityCount = 0; + let promptCount = 0; + let conversationCount = 0; + + try { + entityCount = (db.prepare('SELECT COUNT(*) as count FROM entities').get() as { count: number }).count; + } catch { /* table may not exist */ } + + try { + promptCount = (db.prepare('SELECT COUNT(*) as count FROM prompt_history').get() as { count: number }).count; + } catch { /* table may not exist */ } + + try { + conversationCount = (db.prepare('SELECT COUNT(*) as count FROM conversations').get() as { count: number }).count; + } catch { /* table may not exist */ } + + // Get vector index status + let vectorStatus: VectorIndexStatus = { + success: false, + total_chunks: 0, + hnsw_available: false, + hnsw_count: 0, + dimension: 0, + }; + + if (this.vectorIndex) { + try { + vectorStatus = await this.vectorIndex.getStatus(); + } catch { + // Vector index not available + } + } + + return { + core_memories: { + total: allMemories.length, + archived: archivedMemories.length, + }, + stage1_outputs: stage1Count, + entities: entityCount, + prompts: promptCount, + conversations: conversationCount, + vector_index: { + available: vectorStatus.success, + total_chunks: vectorStatus.total_chunks, + hnsw_available: vectorStatus.hnsw_available, + hnsw_count: vectorStatus.hnsw_count, + dimension: vectorStatus.dimension, + categories: vectorStatus.categories, + }, + }; + } + + // ========================================================================== + // Internal helpers + // ========================================================================== + + /** + * Run vector search via UnifiedVectorIndex. + * Returns empty array if vector index is not available. + */ + private async runVectorSearch( + query: string, + topK: number, + category?: VectorCategory + ): Promise { + if (!this.vectorIndex) { + return []; + } + + try { + const result = await this.vectorIndex.search(query, { + topK, + minScore: 0.1, + category, + }); + + if (!result.success) { + return []; + } + + return result.matches; + } catch { + return []; + } + } + + /** + * Run FTS5 full-text search via MemoryStore.searchPrompts. + * Returns empty array on error. + */ + private async runFtsSearch( + query: string, + limit: number + ): Promise { + try { + // FTS5 requires sanitized query (no special characters) + const sanitized = this.sanitizeFtsQuery(query); + if (!sanitized) return []; + + return this.memoryStore.searchPrompts(sanitized, limit); + } catch { + return []; + } + } + + /** + * Get hot entities for heat-based scoring. + */ + private async getHeatScores(): Promise { + try { + return this.memoryStore.getHotEntities(50); + } catch { + return []; + } + } + + /** + * Look up heat score for a source ID. + * Checks if any entity's normalized_value matches the source_id. + */ + private lookupHeatScore( + sourceId: string, + heatMap: Map + ): number { + // Direct match + if (heatMap.has(sourceId)) { + return heatMap.get(sourceId)!; + } + + // Check if source_id is a substring of any entity value (file paths) + for (const [key, score] of heatMap) { + if (sourceId.includes(key) || key.includes(sourceId)) { + return score; + } + } + + return 0; + } + + /** + * Sanitize a query string for FTS5 MATCH syntax. + * Removes special characters that would cause FTS5 parse errors. + */ + private sanitizeFtsQuery(query: string): string { + // Remove FTS5 special operators and punctuation + return query + .replace(/[*":(){}[\]^~\\/<>!@#$%&=+|;,.'`]/g, ' ') + .replace(/\s+/g, ' ') + .trim(); + } +} diff --git a/ccw/src/core/unified-vector-index.ts b/ccw/src/core/unified-vector-index.ts new file mode 100644 index 00000000..1b9571a9 --- /dev/null +++ b/ccw/src/core/unified-vector-index.ts @@ -0,0 +1,474 @@ +/** + * Unified Vector Index - TypeScript bridge to unified_memory_embedder.py + * + * Provides HNSW-backed vector indexing and search for all memory content + * (core_memory, cli_history, workflow, entity, pattern) via CodexLens VectorStore. + * + * Features: + * - JSON stdin/stdout protocol to Python embedder + * - Content chunking (paragraph -> sentence splitting, CHUNK_SIZE=1500, OVERLAP=200) + * - Batch embedding via CodexLens EmbedderFactory + * - HNSW approximate nearest neighbor search (sub-10ms for 1000 chunks) + * - Category-based filtering + */ + +import { spawn } from 'child_process'; +import { join, dirname } from 'path'; +import { existsSync } from 'fs'; +import { fileURLToPath } from 'url'; +import { getCodexLensPython } from '../utils/codexlens-path.js'; +import { StoragePaths, ensureStorageDir } from '../config/storage-paths.js'; + +// Get directory of this module +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +// Venv python path (reuse CodexLens venv) +const VENV_PYTHON = getCodexLensPython(); + +// Script path +const EMBEDDER_SCRIPT = join(__dirname, '..', '..', 'scripts', 'unified_memory_embedder.py'); + +// Chunking constants (match existing core-memory-store.ts) +const CHUNK_SIZE = 1500; +const OVERLAP = 200; + +// ============================================================================= +// Types +// ============================================================================= + +/** Valid source types for vector content */ +export type SourceType = 'core_memory' | 'workflow' | 'cli_history'; + +/** Valid category values for vector filtering */ +export type VectorCategory = 'core_memory' | 'cli_history' | 'workflow' | 'entity' | 'pattern'; + +/** Metadata attached to each chunk in the vector store */ +export interface ChunkMetadata { + /** Source identifier (e.g., memory ID, session ID) */ + source_id: string; + /** Source type */ + source_type: SourceType; + /** Category for filtering */ + category: VectorCategory; + /** Chunk index within the source */ + chunk_index?: number; + /** Additional metadata */ + [key: string]: unknown; +} + +/** A chunk to be embedded and indexed */ +export interface VectorChunk { + /** Text content */ + content: string; + /** Source identifier */ + source_id: string; + /** Source type */ + source_type: SourceType; + /** Category for filtering */ + category: VectorCategory; + /** Chunk index */ + chunk_index: number; + /** Additional metadata */ + metadata?: Record; +} + +/** Result of an embed operation */ +export interface EmbedResult { + success: boolean; + chunks_processed: number; + chunks_failed: number; + elapsed_time: number; + error?: string; +} + +/** A single search match */ +export interface VectorSearchMatch { + content: string; + score: number; + source_id: string; + source_type: string; + chunk_index: number; + category: string; + metadata: Record; +} + +/** Result of a search operation */ +export interface VectorSearchResult { + success: boolean; + matches: VectorSearchMatch[]; + elapsed_time?: number; + total_searched?: number; + error?: string; +} + +/** Search options */ +export interface VectorSearchOptions { + topK?: number; + minScore?: number; + category?: VectorCategory; +} + +/** Index status information */ +export interface VectorIndexStatus { + success: boolean; + total_chunks: number; + hnsw_available: boolean; + hnsw_count: number; + dimension: number; + categories?: Record; + model_config?: { + backend: string; + profile: string; + dimension: number; + max_tokens: number; + }; + error?: string; +} + +/** Reindex result */ +export interface ReindexResult { + success: boolean; + hnsw_count?: number; + elapsed_time?: number; + error?: string; +} + +// ============================================================================= +// Python Bridge +// ============================================================================= + +/** + * Check if the unified embedder is available (venv and script exist) + */ +export function isUnifiedEmbedderAvailable(): boolean { + if (!existsSync(VENV_PYTHON)) { + return false; + } + if (!existsSync(EMBEDDER_SCRIPT)) { + return false; + } + return true; +} + +/** + * Run Python script with JSON stdin/stdout protocol. + * + * @param request - JSON request object to send via stdin + * @param timeout - Timeout in milliseconds (default: 5 minutes) + * @returns Parsed JSON response + */ +function runPython(request: Record, timeout: number = 300000): Promise { + return new Promise((resolve, reject) => { + if (!isUnifiedEmbedderAvailable()) { + reject( + new Error( + 'Unified embedder not available. Ensure CodexLens venv exists at ~/.codexlens/venv' + ) + ); + return; + } + + const child = spawn(VENV_PYTHON, [EMBEDDER_SCRIPT], { + stdio: ['pipe', 'pipe', 'pipe'], + timeout, + }); + + let stdout = ''; + let stderr = ''; + + child.stdout.on('data', (data) => { + stdout += data.toString(); + }); + + child.stderr.on('data', (data) => { + stderr += data.toString(); + }); + + child.on('close', (code) => { + if (code === 0 && stdout.trim()) { + try { + resolve(JSON.parse(stdout.trim()) as T); + } catch { + reject(new Error(`Failed to parse Python output: ${stdout.substring(0, 500)}`)); + } + } else { + reject(new Error(`Python script failed (exit code ${code}): ${stderr || stdout}`)); + } + }); + + child.on('error', (err) => { + if ((err as NodeJS.ErrnoException).code === 'ETIMEDOUT') { + reject(new Error('Python script timed out')); + } else { + reject(new Error(`Failed to spawn Python: ${err.message}`)); + } + }); + + // Write JSON request to stdin and close + const jsonInput = JSON.stringify(request); + child.stdin.write(jsonInput); + child.stdin.end(); + }); +} + +// ============================================================================= +// Content Chunking +// ============================================================================= + +/** + * Chunk content into smaller pieces for embedding. + * Uses paragraph-first, sentence-fallback strategy with overlap. + * + * Matches the chunking logic in core-memory-store.ts: + * - CHUNK_SIZE = 1500 characters + * - OVERLAP = 200 characters + * - Split by paragraph boundaries (\n\n) first + * - Fall back to sentence boundaries (. ) for oversized paragraphs + * + * @param content - Text content to chunk + * @returns Array of chunk strings + */ +export function chunkContent(content: string): string[] { + const chunks: string[] = []; + + // Split by paragraph boundaries first + const paragraphs = content.split(/\n\n+/); + let currentChunk = ''; + + for (const paragraph of paragraphs) { + // If adding this paragraph would exceed chunk size + if (currentChunk.length + paragraph.length > CHUNK_SIZE && currentChunk.length > 0) { + chunks.push(currentChunk.trim()); + + // Start new chunk with overlap + const overlapText = currentChunk.slice(-OVERLAP); + currentChunk = overlapText + '\n\n' + paragraph; + } else { + currentChunk += (currentChunk ? '\n\n' : '') + paragraph; + } + } + + // Add remaining chunk + if (currentChunk.trim()) { + chunks.push(currentChunk.trim()); + } + + // If chunks are still too large, split by sentences + const finalChunks: string[] = []; + for (const chunk of chunks) { + if (chunk.length <= CHUNK_SIZE) { + finalChunks.push(chunk); + } else { + // Split by sentence boundaries + const sentences = chunk.split(/\. +/); + let sentenceChunk = ''; + + for (const sentence of sentences) { + const sentenceWithPeriod = sentence + '. '; + if ( + sentenceChunk.length + sentenceWithPeriod.length > CHUNK_SIZE && + sentenceChunk.length > 0 + ) { + finalChunks.push(sentenceChunk.trim()); + const overlapText = sentenceChunk.slice(-OVERLAP); + sentenceChunk = overlapText + sentenceWithPeriod; + } else { + sentenceChunk += sentenceWithPeriod; + } + } + + if (sentenceChunk.trim()) { + finalChunks.push(sentenceChunk.trim()); + } + } + } + + return finalChunks.length > 0 ? finalChunks : [content]; +} + +// ============================================================================= +// UnifiedVectorIndex Class +// ============================================================================= + +/** + * Unified vector index backed by CodexLens VectorStore (HNSW). + * + * Provides content chunking, embedding, storage, and search for all + * memory content types through a single interface. + */ +export class UnifiedVectorIndex { + private storePath: string; + + /** + * Create a UnifiedVectorIndex for a project. + * + * @param projectPath - Project root path (used to resolve storage location) + */ + constructor(projectPath: string) { + const paths = StoragePaths.project(projectPath); + this.storePath = paths.unifiedVectors.root; + ensureStorageDir(this.storePath); + } + + /** + * Index content by chunking, embedding, and storing in VectorStore. + * + * @param content - Text content to index + * @param metadata - Metadata for all chunks (source_id, source_type, category) + * @returns Embed result + */ + async indexContent( + content: string, + metadata: ChunkMetadata + ): Promise { + if (!content.trim()) { + return { + success: true, + chunks_processed: 0, + chunks_failed: 0, + elapsed_time: 0, + }; + } + + // Chunk content + const textChunks = chunkContent(content); + + // Build chunk objects for Python + const chunks: VectorChunk[] = textChunks.map((text, index) => ({ + content: text, + source_id: metadata.source_id, + source_type: metadata.source_type, + category: metadata.category, + chunk_index: metadata.chunk_index != null ? metadata.chunk_index + index : index, + metadata: { ...metadata }, + })); + + try { + const result = await runPython({ + operation: 'embed', + store_path: this.storePath, + chunks, + batch_size: 8, + }); + return result; + } catch (err) { + return { + success: false, + chunks_processed: 0, + chunks_failed: textChunks.length, + elapsed_time: 0, + error: (err as Error).message, + }; + } + } + + /** + * Search the vector index using semantic similarity. + * + * @param query - Natural language search query + * @param options - Search options (topK, minScore, category) + * @returns Search results sorted by relevance + */ + async search( + query: string, + options: VectorSearchOptions = {} + ): Promise { + const { topK = 10, minScore = 0.3, category } = options; + + try { + const result = await runPython({ + operation: 'search', + store_path: this.storePath, + query, + top_k: topK, + min_score: minScore, + category: category || null, + }); + return result; + } catch (err) { + return { + success: false, + matches: [], + error: (err as Error).message, + }; + } + } + + /** + * Search the vector index using a pre-computed embedding vector. + * Bypasses text embedding, directly querying HNSW with a raw vector. + * + * @param vector - Pre-computed embedding vector (array of floats) + * @param options - Search options (topK, minScore, category) + * @returns Search results sorted by relevance + */ + async searchByVector( + vector: number[], + options: VectorSearchOptions = {} + ): Promise { + const { topK = 10, minScore = 0.3, category } = options; + + try { + const result = await runPython({ + operation: 'search_by_vector', + store_path: this.storePath, + vector, + top_k: topK, + min_score: minScore, + category: category || null, + }); + return result; + } catch (err) { + return { + success: false, + matches: [], + error: (err as Error).message, + }; + } + } + + /** + * Rebuild the HNSW index from scratch. + * + * @returns Reindex result + */ + async reindexAll(): Promise { + try { + const result = await runPython({ + operation: 'reindex', + store_path: this.storePath, + }); + return result; + } catch (err) { + return { + success: false, + error: (err as Error).message, + }; + } + } + + /** + * Get the current status of the vector index. + * + * @returns Index status including chunk counts, HNSW availability, dimension + */ + async getStatus(): Promise { + try { + const result = await runPython({ + operation: 'status', + store_path: this.storePath, + }); + return result; + } catch (err) { + return { + success: false, + total_chunks: 0, + hnsw_available: false, + hnsw_count: 0, + dimension: 0, + error: (err as Error).message, + }; + } + } +} diff --git a/ccw/src/tools/ask-question.ts b/ccw/src/tools/ask-question.ts index 6eccfd42..86430cf4 100644 --- a/ccw/src/tools/ask-question.ts +++ b/ccw/src/tools/ask-question.ts @@ -17,6 +17,7 @@ import type { } from '../core/a2ui/A2UITypes.js'; import http from 'http'; import { a2uiWebSocketHandler } from '../core/a2ui/A2UIWebSocketHandler.js'; +import { remoteNotificationService } from '../core/services/remote-notification-service.js'; const DASHBOARD_PORT = Number(process.env.CCW_PORT || 3456); const POLL_INTERVAL_MS = 1000; @@ -466,6 +467,14 @@ export async function execute(params: AskQuestionParams): Promise q.question).join('\n'); + remoteNotificationService.sendNotification('ask-user-question', { + sessionId: compositeId, + questionText: questionTexts, + }); + } + // If no local WS clients, start HTTP polling for answer from Dashboard if (sentCount === 0) { startAnswerPolling(compositeId, true); diff --git a/ccw/src/types/remote-notification.ts b/ccw/src/types/remote-notification.ts new file mode 100644 index 00000000..c7968131 --- /dev/null +++ b/ccw/src/types/remote-notification.ts @@ -0,0 +1,227 @@ +// ======================================== +// Remote Notification Types +// ======================================== +// Type definitions for remote notification system +// Supports Discord, Telegram, and Generic Webhook platforms + +/** + * Supported notification platforms + */ +export type NotificationPlatform = 'discord' | 'telegram' | 'webhook'; + +/** + * Event types that can trigger notifications + */ +export type NotificationEventType = + | 'ask-user-question' // AskUserQuestion triggered + | 'session-start' // CLI session started + | 'session-end' // CLI session ended + | 'task-completed' // Task completed successfully + | 'task-failed'; // Task failed + +/** + * Discord platform configuration + */ +export interface DiscordConfig { + /** Whether Discord notifications are enabled */ + enabled: boolean; + /** Discord webhook URL */ + webhookUrl: string; + /** Optional custom username for the webhook */ + username?: string; + /** Optional avatar URL for the webhook */ + avatarUrl?: string; +} + +/** + * Telegram platform configuration + */ +export interface TelegramConfig { + /** Whether Telegram notifications are enabled */ + enabled: boolean; + /** Telegram bot token */ + botToken: string; + /** Telegram chat ID (user or group) */ + chatId: string; + /** Optional parse mode (HTML, Markdown, MarkdownV2) */ + parseMode?: 'HTML' | 'Markdown' | 'MarkdownV2'; +} + +/** + * Generic Webhook platform configuration + */ +export interface WebhookConfig { + /** Whether webhook notifications are enabled */ + enabled: boolean; + /** Webhook URL */ + url: string; + /** HTTP method (POST or PUT) */ + method: 'POST' | 'PUT'; + /** Custom headers */ + headers?: Record; + /** Request timeout in milliseconds */ + timeout?: number; +} + +/** + * Event configuration - maps events to platforms + */ +export interface EventConfig { + /** Event type */ + event: NotificationEventType; + /** Platforms to notify for this event */ + platforms: NotificationPlatform[]; + /** Whether this event's notifications are enabled */ + enabled: boolean; +} + +/** + * Full remote notification configuration + */ +export interface RemoteNotificationConfig { + /** Master switch for all remote notifications */ + enabled: boolean; + /** Platform-specific configurations */ + platforms: { + discord?: DiscordConfig; + telegram?: TelegramConfig; + webhook?: WebhookConfig; + }; + /** Event-to-platform mappings */ + events: EventConfig[]; + /** Global timeout for all notification requests (ms) */ + timeout: number; +} + +/** + * Context passed when sending a notification + */ +export interface NotificationContext { + /** Event type that triggered the notification */ + eventType: NotificationEventType; + /** Session ID if applicable */ + sessionId?: string; + /** Question text for ask-user-question events */ + questionText?: string; + /** Task description for task events */ + taskDescription?: string; + /** Error message for task-failed events */ + errorMessage?: string; + /** Timestamp of the event */ + timestamp: string; + /** Additional metadata */ + metadata?: Record; +} + +/** + * Result of a single platform notification attempt + */ +export interface PlatformNotificationResult { + /** Platform that was notified */ + platform: NotificationPlatform; + /** Whether the notification succeeded */ + success: boolean; + /** Error message if failed */ + error?: string; + /** Response time in milliseconds */ + responseTime?: number; +} + +/** + * Result of sending notifications to all configured platforms + */ +export interface NotificationDispatchResult { + /** Whether at least one notification succeeded */ + success: boolean; + /** Results for each platform */ + results: PlatformNotificationResult[]; + /** Total dispatch time in milliseconds */ + totalTime: number; +} + +/** + * Test notification request + */ +export interface TestNotificationRequest { + /** Platform to test */ + platform: NotificationPlatform; + /** Platform configuration to test (temporary, not saved) */ + config: DiscordConfig | TelegramConfig | WebhookConfig; +} + +/** + * Test notification result + */ +export interface TestNotificationResult { + /** Whether the test succeeded */ + success: boolean; + /** Error message if failed */ + error?: string; + /** Response time in milliseconds */ + responseTime?: number; +} + +/** + * Default configuration values + */ +export const DEFAULT_REMOTE_NOTIFICATION_CONFIG: RemoteNotificationConfig = { + enabled: false, + platforms: {}, + events: [ + { event: 'ask-user-question', platforms: ['discord', 'telegram'], enabled: true }, + { event: 'session-start', platforms: [], enabled: false }, + { event: 'session-end', platforms: [], enabled: false }, + { event: 'task-completed', platforms: [], enabled: false }, + { event: 'task-failed', platforms: ['discord', 'telegram'], enabled: true }, + ], + timeout: 10000, // 10 seconds +}; + +/** + * Mask sensitive fields in config for API responses + */ +export function maskSensitiveConfig(config: RemoteNotificationConfig): RemoteNotificationConfig { + return { + ...config, + platforms: { + discord: config.platforms.discord ? { + ...config.platforms.discord, + webhookUrl: maskWebhookUrl(config.platforms.discord.webhookUrl), + } : undefined, + telegram: config.platforms.telegram ? { + ...config.platforms.telegram, + botToken: maskToken(config.platforms.telegram.botToken), + } : undefined, + webhook: config.platforms.webhook ? { + ...config.platforms.webhook, + // Don't mask webhook URL as it's needed for display + } : undefined, + }, + }; +} + +/** + * Mask webhook URL for display (show only domain and last part) + */ +function maskWebhookUrl(url: string): string { + if (!url) return ''; + try { + const parsed = new URL(url); + const pathParts = parsed.pathname.split('/'); + const lastPart = pathParts[pathParts.length - 1]; + if (lastPart && lastPart.length > 8) { + return `${parsed.origin}/.../${lastPart.slice(0, 4)}****`; + } + return `${parsed.origin}/****`; + } catch { + return '****'; + } +} + +/** + * Mask bot token for display + */ +function maskToken(token: string): string { + if (!token || token.length < 10) return '****'; + return `${token.slice(0, 6)}****${token.slice(-4)}`; +} diff --git a/ccw/src/types/util.ts b/ccw/src/types/util.ts new file mode 100644 index 00000000..4dd6299a --- /dev/null +++ b/ccw/src/types/util.ts @@ -0,0 +1,75 @@ +// ======================================== +// Utility Types +// ======================================== +// Common utility type definitions + +/** + * Deep partial type - makes all nested properties optional + */ +export type DeepPartial = T extends object + ? { + [P in keyof T]?: DeepPartial; + } + : T; + +/** + * Make specific keys optional + */ +export type PartialBy = Omit & Partial>; + +/** + * Make specific keys required + */ +export type RequiredBy = Omit & Required>; + +/** + * Extract function parameter types + */ +export type Parameters = T extends (...args: infer P) => unknown ? P : never; + +/** + * Extract function return type + */ +export type ReturnType = T extends (...args: unknown[]) => infer R ? R : never; + +// ======================================== +// Utility Functions +// ======================================== + +/** + * Deep merge utility for configuration updates + * Recursively merges source into target, preserving nested objects + */ +export function deepMerge>( + target: T, + source: DeepPartial +): T { + const result = { ...target } as T; + + for (const key in source) { + if (Object.prototype.hasOwnProperty.call(source, key)) { + const sourceValue = source[key]; + const targetValue = target[key]; + + if ( + sourceValue !== undefined && + sourceValue !== null && + typeof sourceValue === 'object' && + !Array.isArray(sourceValue) && + targetValue !== undefined && + targetValue !== null && + typeof targetValue === 'object' && + !Array.isArray(targetValue) + ) { + (result as Record)[key] = deepMerge( + targetValue as Record, + sourceValue as DeepPartial> + ); + } else if (sourceValue !== undefined) { + (result as Record)[key] = sourceValue; + } + } + } + + return result; +} diff --git a/codex-lens/pyproject.toml b/codex-lens/pyproject.toml index 9819bfe6..2349f43e 100644 --- a/codex-lens/pyproject.toml +++ b/codex-lens/pyproject.toml @@ -22,6 +22,9 @@ dependencies = [ "tree-sitter-typescript>=0.23", "pathspec>=0.11", "watchdog>=3.0", + # ast-grep for pattern-based AST matching (PyO3 bindings) + # Note: May have compatibility issues with Python 3.13 + "ast-grep-py>=0.3.0; python_version < '3.13'", ] [project.optional-dependencies] diff --git a/codex-lens/src/codexlens/config.py b/codex-lens/src/codexlens/config.py index 8d5cc3ef..84577184 100644 --- a/codex-lens/src/codexlens/config.py +++ b/codex-lens/src/codexlens/config.py @@ -189,6 +189,9 @@ class Config: api_batch_size_max: int = 2048 # Absolute upper limit for batch size chars_per_token_estimate: int = 4 # Characters per token estimation ratio + # Parser configuration + use_astgrep: bool = False # Use ast-grep for Python relationship extraction (tree-sitter is default) + def __post_init__(self) -> None: try: self.data_dir = self.data_dir.expanduser().resolve() diff --git a/codex-lens/src/codexlens/parsers/__init__.py b/codex-lens/src/codexlens/parsers/__init__.py index f2ecfe78..a96ed9a7 100644 --- a/codex-lens/src/codexlens/parsers/__init__.py +++ b/codex-lens/src/codexlens/parsers/__init__.py @@ -3,6 +3,12 @@ from __future__ import annotations from .factory import ParserFactory +from .astgrep_binding import AstGrepBinding, is_astgrep_available, get_supported_languages -__all__ = ["ParserFactory"] +__all__ = [ + "ParserFactory", + "AstGrepBinding", + "is_astgrep_available", + "get_supported_languages", +] diff --git a/codex-lens/src/codexlens/parsers/astgrep_binding.py b/codex-lens/src/codexlens/parsers/astgrep_binding.py new file mode 100644 index 00000000..16985961 --- /dev/null +++ b/codex-lens/src/codexlens/parsers/astgrep_binding.py @@ -0,0 +1,320 @@ +"""ast-grep based parser binding for CodexLens. + +Provides AST-level pattern matching via ast-grep-py (PyO3 bindings). + +Note: This module wraps the official ast-grep Python bindings for pattern-based +code analysis. If ast-grep-py is unavailable, the parser returns None gracefully. +Callers should use tree-sitter or regex-based fallbacks. +""" + +from __future__ import annotations + +import logging +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +# Import patterns from centralized definition (avoid duplication) +from codexlens.parsers.patterns.python import get_pattern, PYTHON_PATTERNS + +# Graceful import pattern following treesitter_parser.py convention +try: + from ast_grep_py import SgNode, SgRoot + ASTGREP_AVAILABLE = True +except ImportError: + SgNode = None # type: ignore[assignment,misc] + SgRoot = None # type: ignore[assignment,misc] + ASTGREP_AVAILABLE = False + +log = logging.getLogger(__name__) + + +class AstGrepBinding: + """Wrapper for ast-grep-py bindings with CodexLens integration. + + Provides pattern-based AST matching for code relationship extraction. + Uses declarative patterns with metavariables ($A, $$ARGS) for matching. + """ + + # Language ID mapping to ast-grep language names + LANGUAGE_MAP = { + "python": "python", + "javascript": "javascript", + "typescript": "typescript", + "tsx": "tsx", + } + + def __init__(self, language_id: str, path: Optional[Path] = None) -> None: + """Initialize ast-grep binding for a language. + + Args: + language_id: Language identifier (python, javascript, typescript, tsx) + path: Optional file path for language variant detection + """ + self.language_id = language_id + self.path = path + self._language: Optional[str] = None + self._root: Optional[SgRoot] = None # type: ignore[valid-type] + + if ASTGREP_AVAILABLE: + self._initialize_language() + + def _initialize_language(self) -> None: + """Initialize ast-grep language setting.""" + # Detect TSX from file extension + if self.language_id == "typescript" and self.path is not None: + if self.path.suffix.lower() == ".tsx": + self._language = "tsx" + return + + self._language = self.LANGUAGE_MAP.get(self.language_id) + + def is_available(self) -> bool: + """Check if ast-grep binding is available and ready. + + Returns: + True if ast-grep-py is installed and language is supported + """ + return ASTGREP_AVAILABLE and self._language is not None + + def parse(self, source_code: str) -> bool: + """Parse source code into ast-grep syntax tree. + + Args: + source_code: Source code text to parse + + Returns: + True if parsing succeeds, False otherwise + """ + if not self.is_available() or SgRoot is None: + return False + + try: + self._root = SgRoot(source_code, self._language) # type: ignore[misc] + return True + except (ValueError, TypeError, RuntimeError) as e: + log.debug(f"ast-grep parse error: {e}") + self._root = None + return False + + def find_all(self, pattern: str) -> List[SgNode]: # type: ignore[valid-type] + """Find all matches for a pattern in the parsed source. + + Args: + pattern: ast-grep pattern string (e.g., "class $NAME($$$BASES) $$$BODY") + + Returns: + List of matching SgNode objects, empty if no matches or not parsed + """ + if not self.is_available() or self._root is None: + return [] + + try: + root_node = self._root.root() + # ast-grep-py 0.40+ requires dict config format + config = {"rule": {"pattern": pattern}} + return list(root_node.find_all(config)) + except (ValueError, TypeError, AttributeError) as e: + log.debug(f"ast-grep find_all error: {e}") + return [] + + def find_inheritance(self) -> List[Dict[str, str]]: + """Find all class inheritance declarations. + + Returns: + List of dicts with 'class_name' and 'bases' keys + """ + if self.language_id != "python": + return [] + + matches = self.find_all(get_pattern("class_with_bases")) + results: List[Dict[str, str]] = [] + + for node in matches: + class_name = self._get_match(node, "NAME") + if class_name: + results.append({ + "class_name": class_name, + "bases": self._get_match(node, "BASES"), # Base classes text + }) + + return results + + def find_calls(self) -> List[Dict[str, str]]: + """Find all function/method calls. + + Returns: + List of dicts with 'function' and 'line' keys + """ + if self.language_id != "python": + return [] + + matches = self.find_all(get_pattern("call")) + results: List[Dict[str, str]] = [] + + for node in matches: + func_name = self._get_match(node, "FUNC") + if func_name: + # Skip self. and cls. prefixed calls + base = func_name.split(".", 1)[0] + if base not in {"self", "cls"}: + results.append({ + "function": func_name, + "line": str(self._get_line_number(node)), + }) + + return results + + def find_imports(self) -> List[Dict[str, str]]: + """Find all import statements. + + Returns: + List of dicts with 'module' and 'type' keys + """ + if self.language_id != "python": + return [] + + results: List[Dict[str, str]] = [] + + # Find 'import X' statements + import_matches = self.find_all(get_pattern("import_stmt")) + for node in import_matches: + module = self._get_match(node, "MODULE") + if module: + results.append({ + "module": module, + "type": "import", + "line": str(self._get_line_number(node)), + }) + + # Find 'from X import Y' statements + from_matches = self.find_all(get_pattern("import_from")) + for node in from_matches: + module = self._get_match(node, "MODULE") + names = self._get_match(node, "NAMES") + if module: + results.append({ + "module": module, + "names": names or "", + "type": "from_import", + "line": str(self._get_line_number(node)), + }) + + return results + + def _get_match(self, node: SgNode, metavar: str) -> str: # type: ignore[valid-type] + """Extract matched metavariable value from node. + + Args: + node: SgNode with match + metavar: Metavariable name (without $ prefix) + + Returns: + Matched text or empty string + """ + if node is None: + return "" + try: + match = node.get_match(metavar) + if match is not None: + return match.text() + except (ValueError, AttributeError, KeyError) as e: + log.debug(f"ast-grep get_match error for {metavar}: {e}") + return "" + + def _get_node_text(self, node: SgNode) -> str: # type: ignore[valid-type] + """Get full text of a node. + + Args: + node: SgNode to extract text from + + Returns: + Node's text content + """ + if node is None: + return "" + try: + return node.text() + except (ValueError, AttributeError) as e: + log.debug(f"ast-grep get_node_text error: {e}") + return "" + + def _get_line_number(self, node: SgNode) -> int: # type: ignore[valid-type] + """Get starting line number of a node. + + Args: + node: SgNode to get line number for + + Returns: + 1-based line number + """ + if node is None: + return 0 + try: + range_info = node.range() + # ast-grep-py 0.40+ returns Range object with .start.line attribute + if hasattr(range_info, 'start') and hasattr(range_info.start, 'line'): + return range_info.start.line + 1 # Convert to 1-based + # Fallback for string format "(0,0)-(1,8)" + if isinstance(range_info, str) and range_info: + start_part = range_info.split('-')[0].strip('()') + start_line = int(start_part.split(',')[0]) + return start_line + 1 + except (ValueError, AttributeError, TypeError, IndexError) as e: + log.debug(f"ast-grep get_line_number error: {e}") + return 0 + + def _get_line_range(self, node: SgNode) -> Tuple[int, int]: # type: ignore[valid-type] + """Get line range (start, end) of a node. + + Args: + node: SgNode to get line range for + + Returns: + Tuple of (start_line, end_line), both 1-based inclusive + """ + if node is None: + return (0, 0) + try: + range_info = node.range() + # ast-grep-py 0.40+ returns Range object with .start.line and .end.line + if hasattr(range_info, 'start') and hasattr(range_info, 'end'): + start_line = getattr(range_info.start, 'line', 0) + end_line = getattr(range_info.end, 'line', 0) + return (start_line + 1, end_line + 1) # Convert to 1-based + # Fallback for string format "(0,0)-(1,8)" + if isinstance(range_info, str) and range_info: + parts = range_info.split('-') + start_part = parts[0].strip('()') + end_part = parts[1].strip('()') + start_line = int(start_part.split(',')[0]) + end_line = int(end_part.split(',')[0]) + return (start_line + 1, end_line + 1) + except (ValueError, AttributeError, TypeError, IndexError) as e: + log.debug(f"ast-grep get_line_range error: {e}") + return (0, 0) + + def get_language(self) -> Optional[str]: + """Get the configured ast-grep language. + + Returns: + Language string or None if not configured + """ + return self._language + + +def is_astgrep_available() -> bool: + """Check if ast-grep-py is installed and available. + + Returns: + True if ast-grep bindings can be imported + """ + return ASTGREP_AVAILABLE + + +def get_supported_languages() -> List[str]: + """Get list of supported languages for ast-grep. + + Returns: + List of language identifiers + """ + return list(AstGrepBinding.LANGUAGE_MAP.keys()) diff --git a/codex-lens/src/codexlens/parsers/astgrep_processor.py b/codex-lens/src/codexlens/parsers/astgrep_processor.py new file mode 100644 index 00000000..2fac1cbb --- /dev/null +++ b/codex-lens/src/codexlens/parsers/astgrep_processor.py @@ -0,0 +1,931 @@ +"""Ast-grep based processor for Python relationship extraction. + +Provides pattern-based AST matching for extracting code relationships +(inheritance, calls, imports) from Python source code. + +This processor wraps the ast-grep-py bindings and provides a higher-level +interface for relationship extraction, similar to TreeSitterSymbolParser. + +Design Pattern: + - Follows TreeSitterSymbolParser class structure for consistency + - Uses declarative patterns defined in patterns/python/__init__.py + - Provides scope-aware relationship extraction with alias resolution +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +from codexlens.entities import CodeRelationship, IndexedFile, RelationshipType, Symbol + +# Import patterns module +from codexlens.parsers.patterns.python import ( + PYTHON_PATTERNS, + get_pattern, + get_metavar, +) + +# Graceful import pattern following existing convention +try: + from ast_grep_py import SgNode, SgRoot + from codexlens.parsers.astgrep_binding import AstGrepBinding, ASTGREP_AVAILABLE +except ImportError: + SgNode = None # type: ignore[assignment,misc] + SgRoot = None # type: ignore[assignment,misc] + AstGrepBinding = None # type: ignore[assignment,misc] + ASTGREP_AVAILABLE = False + + +class BaseAstGrepProcessor(ABC): + """Abstract base class for ast-grep based processors. + + Provides common infrastructure for pattern-based AST processing. + Subclasses implement language-specific pattern processing logic. + """ + + def __init__(self, language_id: str, path: Optional[Path] = None) -> None: + """Initialize processor for a language. + + Args: + language_id: Language identifier (python, javascript, typescript) + path: Optional file path for language variant detection + """ + self.language_id = language_id + self.path = path + self._binding: Optional[AstGrepBinding] = None + + if ASTGREP_AVAILABLE and AstGrepBinding is not None: + self._binding = AstGrepBinding(language_id, path) + + def is_available(self) -> bool: + """Check if ast-grep processor is available. + + Returns: + True if ast-grep binding is ready + """ + return self._binding is not None and self._binding.is_available() + + def run_ast_grep(self, source_code: str, pattern: str) -> List[SgNode]: # type: ignore[valid-type] + """Execute ast-grep pattern matching on source code. + + Args: + source_code: Source code text to analyze + pattern: ast-grep pattern string + + Returns: + List of matching SgNode objects, empty if no matches or unavailable + """ + if not self.is_available() or self._binding is None: + return [] + + if not self._binding.parse(source_code): + return [] + + return self._binding.find_all(pattern) + + @abstractmethod + def process_matches( + self, + matches: List[SgNode], # type: ignore[valid-type] + source_code: str, + path: Path, + ) -> List[CodeRelationship]: + """Process ast-grep matches into code relationships. + + Args: + matches: List of matched SgNode objects + source_code: Original source code + path: File path being processed + + Returns: + List of extracted code relationships + """ + pass + + @abstractmethod + def parse(self, text: str, path: Path) -> Optional[IndexedFile]: + """Parse source code and extract relationships. + + Args: + text: Source code text + path: File path + + Returns: + IndexedFile with symbols and relationships, None if unavailable + """ + pass + + +class AstGrepPythonProcessor(BaseAstGrepProcessor): + """Python-specific ast-grep processor for relationship extraction. + + Extracts INHERITS, CALLS, and IMPORTS relationships from Python code + using declarative ast-grep patterns with scope-aware processing. + """ + + def __init__(self, path: Optional[Path] = None) -> None: + """Initialize Python processor. + + Args: + path: Optional file path (for consistency with base class) + """ + super().__init__("python", path) + + def parse(self, text: str, path: Path) -> Optional[IndexedFile]: + """Parse Python source code and extract relationships. + + Args: + text: Python source code text + path: File path + + Returns: + IndexedFile with symbols and relationships, None if unavailable + """ + if not self.is_available(): + return None + + try: + symbols = self._extract_symbols(text) + relationships = self._extract_relationships(text, path) + + return IndexedFile( + path=str(path.resolve()), + language="python", + symbols=symbols, + chunks=[], + relationships=relationships, + ) + except (ValueError, TypeError, AttributeError) as e: + # Log specific parsing errors for debugging + import logging + logging.getLogger(__name__).debug(f"ast-grep parsing error: {e}") + return None + + def _extract_symbols(self, source_code: str) -> List[Symbol]: + """Extract Python symbols (classes, functions, methods). + + Args: + source_code: Python source code + + Returns: + List of Symbol objects + """ + symbols: List[Symbol] = [] + + # Collect all scope definitions with line ranges for proper method detection + # Format: (start_line, end_line, kind, name) + scope_defs: List[Tuple[int, int, str, str]] = [] + + # Track async function positions to avoid duplicates + async_positions: set = set() + + # Extract class definitions + class_matches = self.run_ast_grep(source_code, get_pattern("class_def")) + for node in class_matches: + name = self._get_match(node, "NAME") + if name: + start_line, end_line = self._get_line_range(node) + scope_defs.append((start_line, end_line, "class", name)) + + # Extract async function definitions FIRST (before regular functions) + async_matches = self.run_ast_grep(source_code, get_pattern("async_func_def")) + for node in async_matches: + name = self._get_match(node, "NAME") + if name: + start_line, end_line = self._get_line_range(node) + scope_defs.append((start_line, end_line, "function", name)) + async_positions.add(start_line) # Mark this position as async + + # Extract function definitions (skip those already captured as async) + func_matches = self.run_ast_grep(source_code, get_pattern("func_def")) + for node in func_matches: + name = self._get_match(node, "NAME") + if name: + start_line, end_line = self._get_line_range(node) + # Skip if already captured as async function (same position) + if start_line not in async_positions: + scope_defs.append((start_line, end_line, "function", name)) + + # Sort by start line for scope-aware processing + scope_defs.sort(key=lambda x: x[0]) + + # Process with scope tracking to determine method vs function + scope_stack: List[Tuple[str, int, str]] = [] # (name, end_line, kind) + + for start_line, end_line, kind, name in scope_defs: + # Pop scopes that have ended + while scope_stack and scope_stack[-1][1] < start_line: + scope_stack.pop() + + if kind == "class": + symbols.append(Symbol( + name=name, + kind="class", + range=(start_line, end_line), + )) + scope_stack.append((name, end_line, "class")) + else: # function + # Determine if it's a method (inside a class) or function + is_method = bool(scope_stack) and scope_stack[-1][2] == "class" + symbols.append(Symbol( + name=name, + kind="method" if is_method else "function", + range=(start_line, end_line), + )) + scope_stack.append((name, end_line, "function")) + + return symbols + + def _extract_relationships(self, source_code: str, path: Path) -> List[CodeRelationship]: + """Extract code relationships with scope and alias resolution. + + Args: + source_code: Python source code + path: File path + + Returns: + List of CodeRelationship objects + """ + if not self.is_available() or self._binding is None: + return [] + + source_file = str(path.resolve()) + + # Collect all matches with line numbers and end lines for scope processing + # Format: (start_line, end_line, match_type, symbol, node) + all_matches: List[Tuple[int, int, str, str, Any]] = [] + + # Get class definitions (with and without bases) for scope tracking + class_with_bases = self.run_ast_grep(source_code, get_pattern("class_with_bases")) + for node in class_with_bases: + class_name = self._get_match(node, "NAME") + start_line, end_line = self._get_line_range(node) + if class_name: + # Record class scope and inheritance + all_matches.append((start_line, end_line, "class_def", class_name, node)) + # Extract bases from node text (ast-grep-py 0.40+ doesn't capture $$$) + node_text = self._binding._get_node_text(node) if self._binding else "" + bases_text = self._extract_bases_from_class_text(node_text) + if bases_text: + # Also record inheritance relationship + all_matches.append((start_line, end_line, "inherits", bases_text, node)) + + # Get classes without bases for scope tracking + class_no_bases = self.run_ast_grep(source_code, get_pattern("class_def")) + for node in class_no_bases: + class_name = self._get_match(node, "NAME") + start_line, end_line = self._get_line_range(node) + if class_name: + # Check if not already recorded (avoid duplicates from class_with_bases) + existing = [m for m in all_matches if m[2] == "class_def" and m[3] == class_name and m[0] == start_line] + if not existing: + all_matches.append((start_line, end_line, "class_def", class_name, node)) + + # Get function definitions for scope tracking + func_matches = self.run_ast_grep(source_code, get_pattern("func_def")) + for node in func_matches: + func_name = self._get_match(node, "NAME") + start_line, end_line = self._get_line_range(node) + if func_name: + all_matches.append((start_line, end_line, "func_def", func_name, node)) + + # Get async function definitions for scope tracking + async_func_matches = self.run_ast_grep(source_code, get_pattern("async_func_def")) + for node in async_func_matches: + func_name = self._get_match(node, "NAME") + start_line, end_line = self._get_line_range(node) + if func_name: + all_matches.append((start_line, end_line, "func_def", func_name, node)) + + # Get import matches + import_matches = self.run_ast_grep(source_code, get_pattern("import_stmt")) + for node in import_matches: + module = self._get_match(node, "MODULE") + start_line, end_line = self._get_line_range(node) + if module: + all_matches.append((start_line, end_line, "import", module, node)) + + from_matches = self.run_ast_grep(source_code, get_pattern("import_from")) + for node in from_matches: + module = self._get_match(node, "MODULE") + names = self._get_match(node, "NAMES") + start_line, end_line = self._get_line_range(node) + if module: + all_matches.append((start_line, end_line, "from_import", f"{module}:{names}", node)) + + # Get call matches + call_matches = self.run_ast_grep(source_code, get_pattern("call")) + for node in call_matches: + func = self._get_match(node, "FUNC") + start_line, end_line = self._get_line_range(node) + if func: + # Skip self. and cls. prefixed calls + base = func.split(".", 1)[0] + if base not in {"self", "cls"}: + all_matches.append((start_line, end_line, "call", func, node)) + + # Sort by start line number for scope processing + all_matches.sort(key=lambda x: (x[0], x[2] == "call")) # Process scope defs before calls on same line + + # Process with scope tracking + relationships = self._process_scope_and_aliases(all_matches, source_file) + + return relationships + + def _process_scope_and_aliases( + self, + matches: List[Tuple[int, int, str, str, Any]], + source_file: str, + ) -> List[CodeRelationship]: + """Process matches with scope and alias resolution. + + Implements proper scope tracking similar to treesitter_parser.py: + - Maintains scope_stack for tracking current scope (class/function names) + - Maintains alias_stack with per-scope alias mappings (inherited from parent) + - Pops scopes when current line passes their end line + - Resolves call targets using current scope's alias map + + Args: + matches: Sorted list of (start_line, end_line, type, symbol, node) tuples + source_file: Source file path + + Returns: + List of resolved CodeRelationship objects + """ + relationships: List[CodeRelationship] = [] + + # Scope stack: list of (name, end_line) tuples + scope_stack: List[Tuple[str, int]] = [("", float("inf"))] + + # Alias stack: list of alias dicts, one per scope level + # Each new scope inherits parent's aliases (copy on write) + alias_stack: List[Dict[str, str]] = [{}] + + def get_current_scope() -> str: + """Get the name of the current (innermost) scope.""" + return scope_stack[-1][0] + + def pop_scopes_before(line: int) -> None: + """Pop all scopes that have ended before the given line.""" + while len(scope_stack) > 1 and scope_stack[-1][1] < line: + scope_stack.pop() + alias_stack.pop() + + def push_scope(name: str, end_line: int) -> None: + """Push a new scope onto the stack.""" + scope_stack.append((name, end_line)) + # Copy parent scope's aliases for inheritance + alias_stack.append(dict(alias_stack[-1])) + + def update_aliases(updates: Dict[str, str]) -> None: + """Update current scope's alias map.""" + alias_stack[-1].update(updates) + + def resolve_alias(symbol: str) -> str: + """Resolve a symbol using current scope's alias map.""" + if "." not in symbol: + # Simple name - check if it's an alias + return alias_stack[-1].get(symbol, symbol) + + # Dotted name - resolve the base + parts = symbol.split(".", 1) + base = parts[0] + rest = parts[1] + + if base in alias_stack[-1]: + return f"{alias_stack[-1][base]}.{rest}" + return symbol + + for start_line, end_line, match_type, symbol, node in matches: + # Pop any scopes that have ended + pop_scopes_before(start_line) + + if match_type == "class_def": + # Push class scope + push_scope(symbol, end_line) + + elif match_type == "func_def": + # Push function scope + push_scope(symbol, end_line) + + elif match_type == "inherits": + # Record inheritance relationship + # Parse base classes from the bases text + base_classes = self._parse_base_classes(symbol) + for base_class in base_classes: + base_class = base_class.strip() + if base_class: + # Resolve alias for base class + resolved_base = resolve_alias(base_class) + relationships.append(CodeRelationship( + source_symbol=get_current_scope(), + target_symbol=resolved_base, + relationship_type=RelationshipType.INHERITS, + source_file=source_file, + target_file=None, + source_line=start_line, + )) + + elif match_type == "import": + # Process import statement + module = symbol + # Simple import: add base name to alias map + base_name = module.split(".", 1)[0] + update_aliases({base_name: module}) + relationships.append(CodeRelationship( + source_symbol=get_current_scope(), + target_symbol=module, + relationship_type=RelationshipType.IMPORTS, + source_file=source_file, + target_file=None, + source_line=start_line, + )) + + elif match_type == "from_import": + # Process from-import statement + parts = symbol.split(":", 1) + module = parts[0] + names = parts[1] if len(parts) > 1 else "" + + # Record the import relationship + relationships.append(CodeRelationship( + source_symbol=get_current_scope(), + target_symbol=module, + relationship_type=RelationshipType.IMPORTS, + source_file=source_file, + target_file=None, + source_line=start_line, + )) + + # Add aliases for imported names + if names and names != "*": + for name in names.split(","): + name = name.strip() + # Handle "name as alias" syntax + if " as " in name: + as_parts = name.split(" as ") + original = as_parts[0].strip() + alias = as_parts[1].strip() + if alias: + update_aliases({alias: f"{module}.{original}"}) + elif name: + update_aliases({name: f"{module}.{name}"}) + + elif match_type == "call": + # Resolve alias for call target + resolved = resolve_alias(symbol) + relationships.append(CodeRelationship( + source_symbol=get_current_scope(), + target_symbol=resolved, + relationship_type=RelationshipType.CALL, + source_file=source_file, + target_file=None, + source_line=start_line, + )) + + return relationships + + def process_matches( + self, + matches: List[SgNode], # type: ignore[valid-type] + source_code: str, + path: Path, + ) -> List[CodeRelationship]: + """Process ast-grep matches into code relationships. + + This is a simplified interface for direct match processing. + For full relationship extraction with scope tracking, use parse(). + + Args: + matches: List of matched SgNode objects + source_code: Original source code + path: File path being processed + + Returns: + List of extracted code relationships + """ + if not self.is_available() or self._binding is None: + return [] + + source_file = str(path.resolve()) + relationships: List[CodeRelationship] = [] + + for node in matches: + # Default to call relationship for generic matches + func = self._get_match(node, "FUNC") + line = self._get_line_number(node) + if func: + base = func.split(".", 1)[0] + if base not in {"self", "cls"}: + relationships.append(CodeRelationship( + source_symbol="", + target_symbol=func, + relationship_type=RelationshipType.CALL, + source_file=source_file, + target_file=None, + source_line=line, + )) + + return relationships + + def _get_match(self, node: SgNode, metavar: str) -> str: # type: ignore[valid-type] + """Extract matched metavariable value from node. + + Args: + node: SgNode with match + metavar: Metavariable name (without $ prefix) + + Returns: + Matched text or empty string + """ + if self._binding is None or node is None: + return "" + return self._binding._get_match(node, metavar) + + def _get_line_number(self, node: SgNode) -> int: # type: ignore[valid-type] + """Get starting line number of a node. + + Args: + node: SgNode to get line number for + + Returns: + 1-based line number + """ + if self._binding is None or node is None: + return 0 + return self._binding._get_line_number(node) + + def _get_line_range(self, node: SgNode) -> Tuple[int, int]: # type: ignore[valid-type] + """Get line range for a node. + + Args: + node: SgNode to get range for + + Returns: + (start_line, end_line) tuple, 1-based inclusive + """ + if self._binding is None or node is None: + return (0, 0) + return self._binding._get_line_range(node) + + + # ========================================================================= + # Dedicated extraction methods for INHERITS, CALL, IMPORTS relationships + # ========================================================================= + + def extract_inherits( + self, + source_code: str, + source_file: str, + source_symbol: str = "", + ) -> List[CodeRelationship]: + """Extract INHERITS relationships from Python code. + + Identifies class inheritance patterns including: + - Single inheritance: class Child(Parent): + - Multiple inheritance: class Child(A, B, C): + + Args: + source_code: Python source code to analyze + source_file: Path to the source file + source_symbol: The containing scope (class or module) + + Returns: + List of CodeRelationship objects with INHERITS type + """ + if not self.is_available(): + return [] + + relationships: List[CodeRelationship] = [] + + # Use class_with_bases pattern to find classes with inheritance + matches = self.run_ast_grep(source_code, get_pattern("class_with_bases")) + + for node in matches: + class_name = self._get_match(node, "NAME") + line = self._get_line_number(node) + + if class_name: + # Extract bases from the node text (first line: "class ClassName(Base1, Base2):") + # ast-grep-py 0.40+ doesn't capture $$$ multi-matches, so parse from text + node_text = self._binding._get_node_text(node) if self._binding else "" + bases_text = self._extract_bases_from_class_text(node_text) + + if bases_text: + # Parse individual base classes from the bases text + base_classes = self._parse_base_classes(bases_text) + + for base_class in base_classes: + base_class = base_class.strip() + if base_class: + relationships.append(CodeRelationship( + source_symbol=class_name, + target_symbol=base_class, + relationship_type=RelationshipType.INHERITS, + source_file=source_file, + target_file=None, + source_line=line, + )) + + return relationships + + def _extract_bases_from_class_text(self, class_text: str) -> str: + """Extract base classes text from class definition. + + Args: + class_text: Full text of class definition (e.g., "class Dog(Animal):\\n pass") + + Returns: + Text inside parentheses (e.g., "Animal") or empty string + """ + import re + # Match "class Name(BASES):" - extract BASES + match = re.search(r'class\s+\w+\s*\(([^)]*)\)\s*:', class_text) + if match: + return match.group(1).strip() + return "" + + def extract_calls( + self, + source_code: str, + source_file: str, + source_symbol: str = "", + alias_map: Optional[Dict[str, str]] = None, + ) -> List[CodeRelationship]: + """Extract CALL relationships from Python code. + + Identifies function and method call patterns including: + - Simple calls: func() + - Calls with arguments: func(arg1, arg2) + - Method calls: obj.method() + - Chained calls: obj.method1().method2() + + Args: + source_code: Python source code to analyze + source_file: Path to the source file + source_symbol: The containing scope (class or module) + alias_map: Optional alias map for resolving imported names + + Returns: + List of CodeRelationship objects with CALL type + """ + if not self.is_available(): + return [] + + relationships: List[CodeRelationship] = [] + alias_map = alias_map or {} + + # Use the generic call pattern + matches = self.run_ast_grep(source_code, get_pattern("call")) + + for node in matches: + func = self._get_match(node, "FUNC") + line = self._get_line_number(node) + + if func: + # Skip self. and cls. prefixed calls (internal method calls) + base = func.split(".", 1)[0] + if base in {"self", "cls", "super"}: + continue + + # Resolve alias if available + resolved = self._resolve_call_alias(func, alias_map) + + relationships.append(CodeRelationship( + source_symbol=source_symbol, + target_symbol=resolved, + relationship_type=RelationshipType.CALL, + source_file=source_file, + target_file=None, + source_line=line, + )) + + return relationships + + def extract_imports( + self, + source_code: str, + source_file: str, + source_symbol: str = "", + ) -> Tuple[List[CodeRelationship], Dict[str, str]]: + """Extract IMPORTS relationships from Python code. + + Identifies import patterns including: + - Simple import: import os + - Import with alias: import numpy as np + - From import: from typing import List + - From import with alias: from collections import defaultdict as dd + - Relative import: from .module import func + - Star import: from module import * + + Args: + source_code: Python source code to analyze + source_file: Path to the source file + source_symbol: The containing scope (class or module) + + Returns: + Tuple of: + - List of CodeRelationship objects with IMPORTS type + - Dict mapping local names to fully qualified module names (alias map) + """ + if not self.is_available(): + return [], {} + + relationships: List[CodeRelationship] = [] + alias_map: Dict[str, str] = {} + + # Process simple imports: import X + import_matches = self.run_ast_grep(source_code, get_pattern("import_stmt")) + for node in import_matches: + module = self._get_match(node, "MODULE") + line = self._get_line_number(node) + + if module: + # Add to alias map: first part of module + base_name = module.split(".", 1)[0] + alias_map[base_name] = module + + relationships.append(CodeRelationship( + source_symbol=source_symbol, + target_symbol=module, + relationship_type=RelationshipType.IMPORTS, + source_file=source_file, + target_file=None, + source_line=line, + )) + + # Process import with alias: import X as Y + alias_matches = self.run_ast_grep(source_code, get_pattern("import_with_alias")) + for node in alias_matches: + module = self._get_match(node, "MODULE") + alias = self._get_match(node, "ALIAS") + line = self._get_line_number(node) + + if module and alias: + alias_map[alias] = module + + relationships.append(CodeRelationship( + source_symbol=source_symbol, + target_symbol=module, + relationship_type=RelationshipType.IMPORTS, + source_file=source_file, + target_file=None, + source_line=line, + )) + + # Process from imports: from X import Y + from_matches = self.run_ast_grep(source_code, get_pattern("import_from")) + for node in from_matches: + module = self._get_match(node, "MODULE") + names = self._get_match(node, "NAMES") + line = self._get_line_number(node) + + if module: + # Add relationship for the module + relationships.append(CodeRelationship( + source_symbol=source_symbol, + target_symbol=module, + relationship_type=RelationshipType.IMPORTS, + source_file=source_file, + target_file=None, + source_line=line, + )) + + # Add aliases for imported names + if names and names != "*": + for name in names.split(","): + name = name.strip() + # Handle "name as alias" syntax + if " as " in name: + parts = name.split(" as ") + original = parts[0].strip() + alias = parts[1].strip() + alias_map[alias] = f"{module}.{original}" + elif name: + alias_map[name] = f"{module}.{name}" + + # Process star imports: from X import * + star_matches = self.run_ast_grep(source_code, get_pattern("from_import_star")) + for node in star_matches: + module = self._get_match(node, "MODULE") + line = self._get_line_number(node) + + if module: + relationships.append(CodeRelationship( + source_symbol=source_symbol, + target_symbol=f"{module}.*", + relationship_type=RelationshipType.IMPORTS, + source_file=source_file, + target_file=None, + source_line=line, + )) + + # Process relative imports: from .X import Y + relative_matches = self.run_ast_grep(source_code, get_pattern("relative_import")) + for node in relative_matches: + module = self._get_match(node, "MODULE") + names = self._get_match(node, "NAMES") + line = self._get_line_number(node) + + # Prepend dot for relative module path + rel_module = f".{module}" if module else "." + + relationships.append(CodeRelationship( + source_symbol=source_symbol, + target_symbol=rel_module, + relationship_type=RelationshipType.IMPORTS, + source_file=source_file, + target_file=None, + source_line=line, + )) + + return relationships, alias_map + + # ========================================================================= + # Helper methods for pattern processing + # ========================================================================= + + def _parse_base_classes(self, bases_text: str) -> List[str]: + """Parse base class names from inheritance text. + + Handles single and multiple inheritance with proper comma splitting. + Accounts for nested parentheses and complex type annotations. + + Args: + bases_text: Text inside the parentheses of class definition + + Returns: + List of base class names + """ + if not bases_text: + return [] + + # Simple comma split (may not handle all edge cases) + bases = [] + depth = 0 + current = [] + + for char in bases_text: + if char == "(": + depth += 1 + current.append(char) + elif char == ")": + depth -= 1 + current.append(char) + elif char == "," and depth == 0: + base = "".join(current).strip() + if base: + bases.append(base) + current = [] + else: + current.append(char) + + # Add the last base class + if current: + base = "".join(current).strip() + if base: + bases.append(base) + + return bases + + def _resolve_call_alias(self, func_name: str, alias_map: Dict[str, str]) -> str: + """Resolve a function call name using import aliases. + + Args: + func_name: The function/method name as it appears in code + alias_map: Mapping of local names to fully qualified names + + Returns: + Resolved function name (fully qualified if possible) + """ + if "." not in func_name: + # Simple function call - check if it's an alias + return alias_map.get(func_name, func_name) + + # Method call or qualified name - resolve the base + parts = func_name.split(".", 1) + base = parts[0] + rest = parts[1] + + if base in alias_map: + return f"{alias_map[base]}.{rest}" + + return func_name + + +def is_astgrep_processor_available() -> bool: + """Check if ast-grep processor is available. + + Returns: + True if ast-grep-py is installed and processor can be used + """ + return ASTGREP_AVAILABLE + + +__all__ = [ + "BaseAstGrepProcessor", + "AstGrepPythonProcessor", + "is_astgrep_processor_available", +] diff --git a/codex-lens/src/codexlens/parsers/patterns/__init__.py b/codex-lens/src/codexlens/parsers/patterns/__init__.py new file mode 100644 index 00000000..10717360 --- /dev/null +++ b/codex-lens/src/codexlens/parsers/patterns/__init__.py @@ -0,0 +1,5 @@ +"""ast-grep pattern definitions for various languages. + +This package contains language-specific pattern definitions for +extracting code relationships using ast-grep declarative patterns. +""" diff --git a/codex-lens/src/codexlens/parsers/patterns/python/__init__.py b/codex-lens/src/codexlens/parsers/patterns/python/__init__.py new file mode 100644 index 00000000..c8d6526e --- /dev/null +++ b/codex-lens/src/codexlens/parsers/patterns/python/__init__.py @@ -0,0 +1,204 @@ +"""Python ast-grep patterns for relationship extraction. + +This module defines declarative patterns for extracting code relationships +(inheritance, calls, imports) from Python source code using ast-grep. + +Pattern Syntax (ast-grep-py 0.40+): + $VAR - Single metavariable (matches one AST node) + $$$VAR - Multiple metavariable (matches zero or more nodes) + +Example: + "class $CLASS_NAME($$$BASES) $$$BODY" matches: + class MyClass(BaseClass): + pass + with $CLASS_NAME = "MyClass", $$$BASES = "BaseClass", $$$BODY = "pass" + +YAML Pattern Files: + inherits.yaml - INHERITS relationship patterns (single/multiple inheritance) + imports.yaml - IMPORTS relationship patterns (import, from...import, as) + call.yaml - CALL relationship patterns (function/method calls) +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Dict, List, Optional + +# Directory containing YAML pattern files +PATTERNS_DIR = Path(__file__).parent + +# Python ast-grep patterns organized by relationship type +# Note: ast-grep-py 0.40+ uses $$$ for zero-or-more multi-match +PYTHON_PATTERNS: Dict[str, str] = { + # Class definitions with inheritance + "class_def": "class $NAME $$$BODY", + "class_with_bases": "class $NAME($$$BASES) $$$BODY", + + # Single inheritance: class Child(Parent): + "single_inheritance": "class $CLASS_NAME($BASE) $$$BODY", + + # Multiple inheritance: class Child(A, B, C): + "multiple_inheritance": "class $CLASS_NAME($BASE, $$$MORE_BASES) $$$BODY", + + # Function definitions (use $$$ for zero-or-more params) + "func_def": "def $NAME($$$PARAMS): $$$BODY", + "async_func_def": "async def $NAME($$$PARAMS): $$$BODY", + + # Import statements - basic forms + "import_stmt": "import $MODULE", + "import_from": "from $MODULE import $NAMES", + + # Import statements - extended forms + "import_with_alias": "import $MODULE as $ALIAS", + "import_multiple": "import $FIRST, $$$REST", + "from_import_single": "from $MODULE import $NAME", + "from_import_with_alias": "from $MODULE import $NAME as $ALIAS", + "from_import_multiple": "from $MODULE import $FIRST, $$$REST", + "from_import_star": "from $MODULE import *", + "relative_import": "from .$$$MODULE import $NAMES", + + # Function/method calls - basic form (use $$$ for zero-or-more args) + "call": "$FUNC($$$ARGS)", + "method_call": "$OBJ.$METHOD($$$ARGS)", + + # Function/method calls - specific forms + "simple_call": "$FUNC()", + "call_with_args": "$FUNC($$$ARGS)", + "chained_call": "$OBJ.$METHOD($$$ARGS).$$$CHAIN", + "constructor_call": "$CLASS($$$ARGS)", +} + +# Metavariable names for extracting match data +METAVARS = { + # Class patterns + "class_name": "NAME", + "class_bases": "BASES", + "class_body": "BODY", + "inherit_class": "CLASS_NAME", + "inherit_base": "BASE", + "inherit_more_bases": "MORE_BASES", + + # Function patterns + "func_name": "NAME", + "func_params": "PARAMS", + "func_body": "BODY", + + # Import patterns + "import_module": "MODULE", + "import_names": "NAMES", + "import_alias": "ALIAS", + "import_first": "FIRST", + "import_rest": "REST", + + # Call patterns + "call_func": "FUNC", + "call_obj": "OBJ", + "call_method": "METHOD", + "call_args": "ARGS", + "call_class": "CLASS", + "call_chain": "CHAIN", +} + +# Relationship pattern mapping - expanded for new patterns +RELATIONSHIP_PATTERNS: Dict[str, List[str]] = { + "inheritance": ["class_with_bases", "single_inheritance", "multiple_inheritance"], + "imports": [ + "import_stmt", "import_from", + "import_with_alias", "import_multiple", + "from_import_single", "from_import_with_alias", + "from_import_multiple", "from_import_star", + "relative_import", + ], + "calls": ["call", "method_call", "simple_call", "call_with_args", "constructor_call"], +} + +# YAML pattern file mapping +YAML_PATTERN_FILES = { + "inheritance": "inherits.yaml", + "imports": "imports.yaml", + "calls": "call.yaml", +} + + +def get_pattern(pattern_name: str) -> str: + """Get an ast-grep pattern by name. + + Args: + pattern_name: Key from PYTHON_PATTERNS dict + + Returns: + Pattern string + + Raises: + KeyError: If pattern name not found + """ + if pattern_name not in PYTHON_PATTERNS: + raise KeyError(f"Unknown pattern: {pattern_name}. Available: {list(PYTHON_PATTERNS.keys())}") + return PYTHON_PATTERNS[pattern_name] + + +def get_patterns_for_relationship(rel_type: str) -> List[str]: + """Get all patterns that can extract a given relationship type. + + Args: + rel_type: Relationship type (inheritance, imports, calls) + + Returns: + List of pattern names + """ + return RELATIONSHIP_PATTERNS.get(rel_type, []) + + +def get_metavar(name: str) -> str: + """Get metavariable name without $ prefix. + + Args: + name: Key from METAVARS dict + + Returns: + Metavariable name (e.g., "NAME" not "$NAME") + """ + return METAVARS.get(name, name.upper()) + + +def get_yaml_pattern_path(rel_type: str) -> Optional[Path]: + """Get the path to a YAML pattern file for a relationship type. + + Args: + rel_type: Relationship type (inheritance, imports, calls) + + Returns: + Path to YAML file or None if not found + """ + filename = YAML_PATTERN_FILES.get(rel_type) + if filename: + return PATTERNS_DIR / filename + return None + + +def list_yaml_pattern_files() -> Dict[str, Path]: + """List all available YAML pattern files. + + Returns: + Dict mapping relationship type to YAML file path + """ + result = {} + for rel_type, filename in YAML_PATTERN_FILES.items(): + path = PATTERNS_DIR / filename + if path.exists(): + result[rel_type] = path + return result + + +__all__ = [ + "PYTHON_PATTERNS", + "METAVARS", + "RELATIONSHIP_PATTERNS", + "YAML_PATTERN_FILES", + "PATTERNS_DIR", + "get_pattern", + "get_patterns_for_relationship", + "get_metavar", + "get_yaml_pattern_path", + "list_yaml_pattern_files", +] diff --git a/codex-lens/src/codexlens/parsers/patterns/python/call.yaml b/codex-lens/src/codexlens/parsers/patterns/python/call.yaml new file mode 100644 index 00000000..1b1bd828 --- /dev/null +++ b/codex-lens/src/codexlens/parsers/patterns/python/call.yaml @@ -0,0 +1,87 @@ +# Python CALL patterns for ast-grep +# Extracts function and method call expressions + +# Pattern metadata +id: python-call +language: python +description: Extract function and method calls from Python code + +patterns: + # Simple function call + # Matches: func() + - id: simple_call + pattern: "$FUNC()" + message: "Found simple function call" + severity: hint + + # Function call with arguments + # Matches: func(arg1, arg2) + - id: call_with_args + pattern: "$FUNC($$$ARGS)" + message: "Found function call with arguments" + severity: hint + + # Method call + # Matches: obj.method() + - id: method_call + pattern: "$OBJ.$METHOD($$$ARGS)" + message: "Found method call" + severity: hint + + # Chained method call + # Matches: obj.method1().method2() + - id: chained_call + pattern: "$OBJ.$METHOD($$$ARGS).$$$CHAIN" + message: "Found chained method call" + severity: hint + + # Call with keyword arguments + # Matches: func(arg=value) + - id: call_with_kwargs + pattern: "$FUNC($$$ARGS, $KWARG=$VALUE$$$MORE)" + message: "Found call with keyword argument" + severity: hint + + # Constructor call + # Matches: ClassName() + - id: constructor_call + pattern: "$CLASS($$$ARGS)" + message: "Found constructor call" + severity: hint + + # Subscript call (not a real call, but often confused) + # This pattern helps exclude indexing from calls + - id: subscript_access + pattern: "$OBJ[$INDEX]" + message: "Found subscript access" + severity: hint + +# Metavariables used: +# $FUNC - Function name being called +# $OBJ - Object receiving the method call +# $METHOD - Method name being called +# $ARGS - Positional arguments +# $KWARG - Keyword argument name +# $VALUE - Keyword argument value +# $CLASS - Class name for constructor calls +# $INDEX - Index for subscript access +# $$$MORE - Additional arguments +# $$$CHAIN - Additional method chains + +# Note: The generic call pattern "$FUNC($$$ARGS)" will match all function calls +# including method calls and constructor calls. More specific patterns help +# categorize the type of call. + +# Examples matched: +# print("hello") -> call_with_args +# len(items) -> call_with_args +# obj.process() -> method_call +# obj.get().save() -> chained_call +# func(name=value) -> call_with_kwargs +# MyClass() -> constructor_call +# items[0] -> subscript_access (not a call) + +# Filtering notes: +# - self.method() calls are typically filtered during processing +# - cls.method() calls are typically filtered during processing +# - super().method() calls may be handled specially diff --git a/codex-lens/src/codexlens/parsers/patterns/python/imports.yaml b/codex-lens/src/codexlens/parsers/patterns/python/imports.yaml new file mode 100644 index 00000000..a1248790 --- /dev/null +++ b/codex-lens/src/codexlens/parsers/patterns/python/imports.yaml @@ -0,0 +1,82 @@ +# Python IMPORTS patterns for ast-grep +# Extracts import statements (import, from...import, as aliases) + +# Pattern metadata +id: python-imports +language: python +description: Extract import statements from Python code + +patterns: + # Simple import + # Matches: import os + - id: simple_import + pattern: "import $MODULE" + message: "Found simple import" + severity: hint + + # Import with alias + # Matches: import numpy as np + - id: import_with_alias + pattern: "import $MODULE as $ALIAS" + message: "Found import with alias" + severity: hint + + # Multiple imports + # Matches: import os, sys + - id: multiple_imports + pattern: "import $FIRST, $$$REST" + message: "Found multiple imports" + severity: hint + + # From import (single name) + # Matches: from os import path + - id: from_import_single + pattern: "from $MODULE import $NAME" + message: "Found from-import single" + severity: hint + + # From import with alias + # Matches: from collections import defaultdict as dd + - id: from_import_with_alias + pattern: "from $MODULE import $NAME as $ALIAS" + message: "Found from-import with alias" + severity: hint + + # From import multiple names + # Matches: from typing import List, Dict, Optional + - id: from_import_multiple + pattern: "from $MODULE import $FIRST, $$$REST" + message: "Found from-import multiple" + severity: hint + + # From import star + # Matches: from module import * + - id: from_import_star + pattern: "from $MODULE import *" + message: "Found star import" + severity: warning + + # Relative import + # Matches: from .module import func + - id: relative_import + pattern: "from .$$$MODULE import $NAMES" + message: "Found relative import" + severity: hint + +# Metavariables used: +# $MODULE - The module being imported +# $ALIAS - The alias for the import +# $NAME - The specific name being imported +# $FIRST - First item in a multi-item import +# $$$REST - Remaining items in a multi-item import +# $NAMES - Names being imported in from-import + +# Examples matched: +# import os -> simple_import +# import numpy as np -> import_with_alias +# import os, sys, pathlib -> multiple_imports +# from os import path -> from_import_single +# from typing import List, Dict, Set -> from_import_multiple +# from collections import defaultdict -> from_import_single +# from .helpers import utils -> relative_import +# from module import * -> from_import_star diff --git a/codex-lens/src/codexlens/parsers/patterns/python/inherits.yaml b/codex-lens/src/codexlens/parsers/patterns/python/inherits.yaml new file mode 100644 index 00000000..d818ab25 --- /dev/null +++ b/codex-lens/src/codexlens/parsers/patterns/python/inherits.yaml @@ -0,0 +1,42 @@ +# Python INHERITS patterns for ast-grep +# Extracts class inheritance relationships (single and multiple inheritance) + +# Pattern metadata +id: python-inherits +language: python +description: Extract class inheritance relationships from Python code + +# Single inheritance pattern +# Matches: class Child(Parent): +patterns: + - id: single_inheritance + pattern: "class $CLASS_NAME($BASE) $$$BODY" + message: "Found single inheritance" + severity: hint + + # Multiple inheritance pattern + # Matches: class Child(Parent1, Parent2, Parent3): + - id: multiple_inheritance + pattern: "class $CLASS_NAME($BASE, $$$MORE_BASES) $$$BODY" + message: "Found multiple inheritance" + severity: hint + + # Generic inheritance with any number of bases + # Matches: class Child(...): with any number of parent classes + - id: class_with_bases + pattern: "class $NAME($$$BASES) $$$BODY" + message: "Found class with base classes" + severity: hint + +# Metavariables used: +# $CLASS_NAME - The name of the child class +# $BASE - First base class (for single inheritance) +# $BASES - All base classes combined +# $MORE_BASES - Additional base classes after the first (for multiple inheritance) +# $$$BODY - Class body (statements, can be multiple) + +# Examples matched: +# class Dog(Animal): -> single_inheritance +# class C(A, B): -> multiple_inheritance +# class D(BaseMixin, logging.Log) -> class_with_bases +# class E(A, B, C, D): -> multiple_inheritance diff --git a/codex-lens/src/codexlens/parsers/treesitter_parser.py b/codex-lens/src/codexlens/parsers/treesitter_parser.py index 4ae44cae..34ef180f 100644 --- a/codex-lens/src/codexlens/parsers/treesitter_parser.py +++ b/codex-lens/src/codexlens/parsers/treesitter_parser.py @@ -11,7 +11,7 @@ return `None`; callers should use a regex-based fallback such as from __future__ import annotations from pathlib import Path -from typing import Dict, List, Optional +from typing import Dict, List, Optional, TYPE_CHECKING try: from tree_sitter import Language as TreeSitterLanguage @@ -27,26 +27,45 @@ except ImportError: from codexlens.entities import CodeRelationship, IndexedFile, RelationshipType, Symbol from codexlens.parsers.tokenizer import get_default_tokenizer +if TYPE_CHECKING: + from codexlens.config import Config + class TreeSitterSymbolParser: - """Parser using tree-sitter for AST-level symbol extraction.""" + """Parser using tree-sitter for AST-level symbol extraction. - def __init__(self, language_id: str, path: Optional[Path] = None) -> None: + Supports optional ast-grep integration for Python relationship extraction + when config.use_astgrep is True and ast-grep-py is available. + """ + + def __init__( + self, + language_id: str, + path: Optional[Path] = None, + config: Optional["Config"] = None, + ) -> None: """Initialize tree-sitter parser for a language. Args: language_id: Language identifier (python, javascript, typescript, etc.) path: Optional file path for language variant detection (e.g., .tsx) + config: Optional Config instance for parser feature toggles """ self.language_id = language_id self.path = path + self._config = config self._parser: Optional[object] = None self._language: Optional[TreeSitterLanguage] = None self._tokenizer = get_default_tokenizer() + self._astgrep_processor = None if TREE_SITTER_AVAILABLE: self._initialize_parser() + # Initialize ast-grep processor for Python if config enables it + if self._should_use_astgrep(): + self._initialize_astgrep_processor() + def _initialize_parser(self) -> None: """Initialize tree-sitter parser and language.""" if TreeSitterParser is None or TreeSitterLanguage is None: @@ -82,6 +101,31 @@ class TreeSitterSymbolParser: self._parser = None self._language = None + def _should_use_astgrep(self) -> bool: + """Check if ast-grep should be used for relationship extraction. + + Returns: + True if config.use_astgrep is True and language is Python + """ + if self._config is None: + return False + if not getattr(self._config, "use_astgrep", False): + return False + return self.language_id == "python" + + def _initialize_astgrep_processor(self) -> None: + """Initialize ast-grep processor for Python relationship extraction.""" + try: + from codexlens.parsers.astgrep_processor import ( + AstGrepPythonProcessor, + is_astgrep_processor_available, + ) + + if is_astgrep_processor_available(): + self._astgrep_processor = AstGrepPythonProcessor(self.path) + except ImportError: + self._astgrep_processor = None + def is_available(self) -> bool: """Check if tree-sitter parser is available. @@ -138,7 +182,10 @@ class TreeSitterSymbolParser: source_bytes, root = parsed try: symbols = self._extract_symbols(source_bytes, root) - relationships = self._extract_relationships(source_bytes, root, path) + # Pass source_code for ast-grep integration + relationships = self._extract_relationships( + source_bytes, root, path, source_code=text + ) return IndexedFile( path=str(path.resolve()), @@ -173,13 +220,68 @@ class TreeSitterSymbolParser: source_bytes: bytes, root: TreeSitterNode, path: Path, + source_code: Optional[str] = None, ) -> List[CodeRelationship]: + """Extract relationships, optionally using ast-grep for Python. + + When config.use_astgrep is True and ast-grep is available for Python, + uses ast-grep for relationship extraction. Otherwise, uses tree-sitter. + + Args: + source_bytes: Source code as bytes + root: Root AST node from tree-sitter + path: File path + source_code: Optional source code string (required for ast-grep) + + Returns: + List of extracted relationships + """ if self.language_id == "python": + # Try ast-grep first if configured and available + if self._astgrep_processor is not None and source_code is not None: + try: + astgrep_rels = self._extract_python_relationships_astgrep( + source_code, path + ) + if astgrep_rels is not None: + return astgrep_rels + except Exception: + # Fall back to tree-sitter on ast-grep failure + pass return self._extract_python_relationships(source_bytes, root, path) if self.language_id in {"javascript", "typescript"}: return self._extract_js_ts_relationships(source_bytes, root, path) return [] + def _extract_python_relationships_astgrep( + self, + source_code: str, + path: Path, + ) -> Optional[List[CodeRelationship]]: + """Extract Python relationships using ast-grep processor. + + Args: + source_code: Python source code text + path: File path + + Returns: + List of relationships, or None if ast-grep unavailable + """ + if self._astgrep_processor is None: + return None + + if not self._astgrep_processor.is_available(): + return None + + try: + indexed = self._astgrep_processor.parse(source_code, path) + if indexed is not None: + return indexed.relationships + except Exception: + pass + + return None + def _extract_python_relationships( self, source_bytes: bytes, diff --git a/codex-lens/tests/parsers/__init__.py b/codex-lens/tests/parsers/__init__.py new file mode 100644 index 00000000..0e066f0e --- /dev/null +++ b/codex-lens/tests/parsers/__init__.py @@ -0,0 +1 @@ +"""Tests for codexlens.parsers modules.""" diff --git a/codex-lens/tests/parsers/test_astgrep_extraction.py b/codex-lens/tests/parsers/test_astgrep_extraction.py new file mode 100644 index 00000000..41f0d2ea --- /dev/null +++ b/codex-lens/tests/parsers/test_astgrep_extraction.py @@ -0,0 +1,444 @@ +"""Tests for dedicated extraction methods: extract_inherits, extract_calls, extract_imports. + +Tests pattern-based relationship extraction from Python source code +using ast-grep-py bindings for INHERITS, CALL, and IMPORTS relationships. +""" + +from pathlib import Path + +import pytest + +from codexlens.parsers.astgrep_processor import ( + AstGrepPythonProcessor, + is_astgrep_processor_available, +) +from codexlens.entities import RelationshipType + + +# Check if ast-grep is available for conditional test skipping +ASTGREP_AVAILABLE = is_astgrep_processor_available() + + +@pytest.mark.skipif(not ASTGREP_AVAILABLE, reason="ast-grep-py not installed") +class TestExtractInherits: + """Tests for extract_inherits method - INHERITS relationship extraction.""" + + def test_single_inheritance(self): + """Test extraction of single inheritance relationship.""" + processor = AstGrepPythonProcessor() + code = """ +class Animal: + pass + +class Dog(Animal): + pass +""" + relationships = processor.extract_inherits(code, "test.py") + + assert len(relationships) == 1 + rel = relationships[0] + assert rel.source_symbol == "Dog" + assert rel.target_symbol == "Animal" + assert rel.relationship_type == RelationshipType.INHERITS + + def test_multiple_inheritance(self): + """Test extraction of multiple inheritance relationships.""" + processor = AstGrepPythonProcessor() + code = """ +class A: + pass + +class B: + pass + +class C(A, B): + pass +""" + relationships = processor.extract_inherits(code, "test.py") + + # Should have 2 relationships: C->A and C->B + assert len(relationships) == 2 + targets = {r.target_symbol for r in relationships} + assert "A" in targets + assert "B" in targets + for rel in relationships: + assert rel.source_symbol == "C" + + def test_no_inheritance(self): + """Test that classes without inheritance return empty list.""" + processor = AstGrepPythonProcessor() + code = """ +class Standalone: + pass +""" + relationships = processor.extract_inherits(code, "test.py") + + assert len(relationships) == 0 + + def test_nested_class_inheritance(self): + """Test extraction of inheritance in nested classes.""" + processor = AstGrepPythonProcessor() + code = """ +class Outer: + class Inner(Base): + pass +""" + relationships = processor.extract_inherits(code, "test.py") + + assert len(relationships) == 1 + assert relationships[0].source_symbol == "Inner" + assert relationships[0].target_symbol == "Base" + + def test_inheritance_with_complex_bases(self): + """Test extraction with generic or complex base classes.""" + processor = AstGrepPythonProcessor() + code = """ +class Service(BaseService, mixins.Loggable): + pass +""" + relationships = processor.extract_inherits(code, "test.py") + + assert len(relationships) == 2 + targets = {r.target_symbol for r in relationships} + assert "BaseService" in targets + assert "mixins.Loggable" in targets + + +@pytest.mark.skipif(not ASTGREP_AVAILABLE, reason="ast-grep-py not installed") +class TestExtractCalls: + """Tests for extract_calls method - CALL relationship extraction.""" + + def test_simple_function_call(self): + """Test extraction of simple function calls.""" + processor = AstGrepPythonProcessor() + code = """ +def main(): + print("hello") + len([1, 2, 3]) +""" + relationships = processor.extract_calls(code, "test.py", "main") + + targets = {r.target_symbol for r in relationships} + assert "print" in targets + assert "len" in targets + + def test_method_call(self): + """Test extraction of method calls.""" + processor = AstGrepPythonProcessor() + code = """ +def process(): + obj.method() + items.append(1) +""" + relationships = processor.extract_calls(code, "test.py", "process") + + targets = {r.target_symbol for r in relationships} + assert "obj.method" in targets + assert "items.append" in targets + + def test_skips_self_calls(self): + """Test that self.method() calls are filtered.""" + processor = AstGrepPythonProcessor() + code = """ +class Service: + def process(self): + self.internal() + external_func() +""" + relationships = processor.extract_calls(code, "test.py", "Service") + + targets = {r.target_symbol for r in relationships} + # self.internal should be filtered + assert "self.internal" not in targets + assert "internal" not in targets + assert "external_func" in targets + + def test_skips_cls_calls(self): + """Test that cls.method() calls are filtered.""" + processor = AstGrepPythonProcessor() + code = """ +class Factory: + @classmethod + def create(cls): + cls.helper() + other_func() +""" + relationships = processor.extract_calls(code, "test.py", "Factory") + + targets = {r.target_symbol for r in relationships} + assert "cls.helper" not in targets + assert "other_func" in targets + + def test_alias_resolution(self): + """Test call alias resolution using import map.""" + processor = AstGrepPythonProcessor() + code = """ +def main(): + np.array([1, 2, 3]) +""" + alias_map = {"np": "numpy"} + relationships = processor.extract_calls(code, "test.py", "main", alias_map) + + assert len(relationships) >= 1 + # Should resolve np.array to numpy.array + assert any("numpy.array" in r.target_symbol for r in relationships) + + def test_no_calls(self): + """Test that code without calls returns empty list.""" + processor = AstGrepPythonProcessor() + code = """ +x = 1 +y = x + 2 +""" + relationships = processor.extract_calls(code, "test.py") + + assert len(relationships) == 0 + + +@pytest.mark.skipif(not ASTGREP_AVAILABLE, reason="ast-grep-py not installed") +class TestExtractImports: + """Tests for extract_imports method - IMPORTS relationship extraction.""" + + def test_simple_import(self): + """Test extraction of simple import statements.""" + processor = AstGrepPythonProcessor() + code = "import os" + + relationships, alias_map = processor.extract_imports(code, "test.py") + + assert len(relationships) == 1 + assert relationships[0].target_symbol == "os" + assert relationships[0].relationship_type == RelationshipType.IMPORTS + assert alias_map.get("os") == "os" + + def test_import_with_alias(self): + """Test extraction of import with alias.""" + processor = AstGrepPythonProcessor() + code = "import numpy as np" + + relationships, alias_map = processor.extract_imports(code, "test.py") + + assert len(relationships) == 1 + assert relationships[0].target_symbol == "numpy" + assert alias_map.get("np") == "numpy" + + def test_from_import(self): + """Test extraction of from-import statements.""" + processor = AstGrepPythonProcessor() + code = "from typing import List, Dict" + + relationships, alias_map = processor.extract_imports(code, "test.py") + + assert len(relationships) == 1 + assert relationships[0].target_symbol == "typing" + assert alias_map.get("List") == "typing.List" + assert alias_map.get("Dict") == "typing.Dict" + + def test_from_import_with_alias(self): + """Test extraction of from-import with alias.""" + processor = AstGrepPythonProcessor() + code = "from collections import defaultdict as dd" + + relationships, alias_map = processor.extract_imports(code, "test.py") + + assert len(relationships) == 1 + # The alias map should map dd to collections.defaultcount + assert "dd" in alias_map + assert "defaultdict" in alias_map.get("dd", "") + + def test_star_import(self): + """Test extraction of star imports.""" + processor = AstGrepPythonProcessor() + code = "from module import *" + + relationships, alias_map = processor.extract_imports(code, "test.py") + + assert len(relationships) >= 1 + # Star import should be recorded + star_imports = [r for r in relationships if "*" in r.target_symbol] + assert len(star_imports) >= 1 + + def test_relative_import(self): + """Test extraction of relative imports.""" + processor = AstGrepPythonProcessor() + code = "from .utils import helper" + + relationships, alias_map = processor.extract_imports(code, "test.py") + + # Should capture the relative import + assert len(relationships) >= 1 + rel_imports = [r for r in relationships if r.target_symbol.startswith(".")] + assert len(rel_imports) >= 1 + + def test_multiple_imports(self): + """Test extraction of multiple import types.""" + processor = AstGrepPythonProcessor() + code = """ +import os +import sys +from typing import List +from collections import defaultdict as dd +""" + + relationships, alias_map = processor.extract_imports(code, "test.py") + + assert len(relationships) >= 4 + targets = {r.target_symbol for r in relationships} + assert "os" in targets + assert "sys" in targets + assert "typing" in targets + assert "collections" in targets + + def test_no_imports(self): + """Test that code without imports returns empty list.""" + processor = AstGrepPythonProcessor() + code = """ +x = 1 +def foo(): + pass +""" + relationships, alias_map = processor.extract_imports(code, "test.py") + + assert len(relationships) == 0 + assert len(alias_map) == 0 + + +@pytest.mark.skipif(not ASTGREP_AVAILABLE, reason="ast-grep-py not installed") +class TestExtractMethodsIntegration: + """Integration tests combining multiple extraction methods.""" + + def test_full_file_extraction(self): + """Test extracting all relationships from a complete file.""" + processor = AstGrepPythonProcessor() + code = """ +import os +from typing import List, Optional + +class Base: + pass + +class Service(Base): + def __init__(self): + self.data = [] + + def process(self): + result = os.path.join("a", "b") + items = List([1, 2, 3]) + return result + +def main(): + svc = Service() + svc.process() +""" + source_file = "test.py" + + # Extract all relationship types + imports, alias_map = processor.extract_imports(code, source_file) + inherits = processor.extract_inherits(code, source_file) + calls = processor.extract_calls(code, source_file, alias_map=alias_map) + + # Verify we got all expected relationships + assert len(imports) >= 2 # os and typing + assert len(inherits) == 1 # Service -> Base + assert len(calls) >= 2 # os.path.join and others + + # Verify inheritance + assert any(r.source_symbol == "Service" and r.target_symbol == "Base" + for r in inherits) + + def test_alias_propagation(self): + """Test that import aliases propagate to call resolution.""" + processor = AstGrepPythonProcessor() + code = """ +import numpy as np + +def compute(): + arr = np.array([1, 2, 3]) + return np.sum(arr) +""" + source_file = "test.py" + + imports, alias_map = processor.extract_imports(code, source_file) + calls = processor.extract_calls(code, source_file, alias_map=alias_map) + + # Alias map should have np -> numpy + assert alias_map.get("np") == "numpy" + + # Calls should resolve np.array and np.sum + resolved_targets = {r.target_symbol for r in calls} + # At minimum, np.array and np.sum should be captured + np_calls = [t for t in resolved_targets if "np" in t or "numpy" in t] + assert len(np_calls) >= 2 + + +class TestExtractMethodFallback: + """Tests for fallback behavior when ast-grep unavailable.""" + + def test_extract_inherits_empty_when_unavailable(self): + """Test extract_inherits returns empty list when unavailable.""" + processor = AstGrepPythonProcessor() + if not processor.is_available(): + code = "class Dog(Animal): pass" + relationships = processor.extract_inherits(code, "test.py") + assert relationships == [] + + def test_extract_calls_empty_when_unavailable(self): + """Test extract_calls returns empty list when unavailable.""" + processor = AstGrepPythonProcessor() + if not processor.is_available(): + code = "print('hello')" + relationships = processor.extract_calls(code, "test.py") + assert relationships == [] + + def test_extract_imports_empty_when_unavailable(self): + """Test extract_imports returns empty tuple when unavailable.""" + processor = AstGrepPythonProcessor() + if not processor.is_available(): + code = "import os" + relationships, alias_map = processor.extract_imports(code, "test.py") + assert relationships == [] + assert alias_map == {} + + +class TestHelperMethods: + """Tests for internal helper methods.""" + + def test_parse_base_classes_single(self): + """Test _parse_base_classes with single base.""" + processor = AstGrepPythonProcessor() + result = processor._parse_base_classes("BaseClass") + assert result == ["BaseClass"] + + def test_parse_base_classes_multiple(self): + """Test _parse_base_classes with multiple bases.""" + processor = AstGrepPythonProcessor() + result = processor._parse_base_classes("A, B, C") + assert result == ["A", "B", "C"] + + def test_parse_base_classes_with_generics(self): + """Test _parse_base_classes with generic types.""" + processor = AstGrepPythonProcessor() + result = processor._parse_base_classes("Generic[T], Mixin") + assert "Generic[T]" in result + assert "Mixin" in result + + def test_resolve_call_alias_simple(self): + """Test _resolve_call_alias with simple name.""" + processor = AstGrepPythonProcessor() + alias_map = {"np": "numpy"} + result = processor._resolve_call_alias("np", alias_map) + assert result == "numpy" + + def test_resolve_call_alias_qualified(self): + """Test _resolve_call_alias with qualified name.""" + processor = AstGrepPythonProcessor() + alias_map = {"np": "numpy"} + result = processor._resolve_call_alias("np.array", alias_map) + assert result == "numpy.array" + + def test_resolve_call_alias_no_match(self): + """Test _resolve_call_alias when no alias exists.""" + processor = AstGrepPythonProcessor() + alias_map = {} + result = processor._resolve_call_alias("myfunc", alias_map) + assert result == "myfunc" diff --git a/codex-lens/tests/parsers/test_astgrep_processor.py b/codex-lens/tests/parsers/test_astgrep_processor.py new file mode 100644 index 00000000..ba10b83e --- /dev/null +++ b/codex-lens/tests/parsers/test_astgrep_processor.py @@ -0,0 +1,402 @@ +"""Tests for AstGrepPythonProcessor. + +Tests pattern-based relationship extraction from Python source code +using ast-grep-py bindings. +""" + +from pathlib import Path + +import pytest + +from codexlens.parsers.astgrep_processor import ( + AstGrepPythonProcessor, + BaseAstGrepProcessor, + is_astgrep_processor_available, +) +from codexlens.parsers.patterns.python import ( + PYTHON_PATTERNS, + METAVARS, + RELATIONSHIP_PATTERNS, + get_pattern, + get_patterns_for_relationship, + get_metavar, +) + + +# Check if ast-grep is available for conditional test skipping +ASTGREP_AVAILABLE = is_astgrep_processor_available() + + +class TestPatternDefinitions: + """Tests for Python pattern definitions.""" + + def test_python_patterns_exist(self): + """Verify all expected patterns are defined.""" + expected_patterns = [ + "class_def", + "class_with_bases", + "func_def", + "async_func_def", + "import_stmt", + "import_from", + "call", + "method_call", + ] + for pattern_name in expected_patterns: + assert pattern_name in PYTHON_PATTERNS, f"Missing pattern: {pattern_name}" + + def test_get_pattern_returns_correct_pattern(self): + """Test get_pattern returns expected pattern strings.""" + # Note: ast-grep-py 0.40+ uses $$$ for zero-or-more multi-match + assert get_pattern("class_def") == "class $NAME $$$BODY" + assert get_pattern("func_def") == "def $NAME($$$PARAMS): $$$BODY" + assert get_pattern("import_stmt") == "import $MODULE" + + def test_get_pattern_raises_for_unknown(self): + """Test get_pattern raises KeyError for unknown patterns.""" + with pytest.raises(KeyError): + get_pattern("nonexistent_pattern") + + def test_metavars_defined(self): + """Verify metavariable mappings are defined.""" + expected_metavars = [ + "class_name", + "func_name", + "import_module", + "call_func", + ] + for var in expected_metavars: + assert var in METAVARS, f"Missing metavar: {var}" + + def test_get_metavar(self): + """Test get_metavar returns correct values.""" + assert get_metavar("class_name") == "NAME" + assert get_metavar("func_name") == "NAME" + assert get_metavar("import_module") == "MODULE" + + def test_relationship_patterns_mapping(self): + """Test relationship type to pattern mapping.""" + assert "class_with_bases" in get_patterns_for_relationship("inheritance") + assert "import_stmt" in get_patterns_for_relationship("imports") + assert "import_from" in get_patterns_for_relationship("imports") + assert "call" in get_patterns_for_relationship("calls") + + +class TestAstGrepPythonProcessorAvailability: + """Tests for processor availability.""" + + def test_is_available_returns_bool(self): + """Test is_available returns a boolean.""" + processor = AstGrepPythonProcessor() + assert isinstance(processor.is_available(), bool) + + def test_is_available_matches_global_check(self): + """Test is_available matches is_astgrep_processor_available.""" + processor = AstGrepPythonProcessor() + assert processor.is_available() == is_astgrep_processor_available() + + def test_module_level_check(self): + """Test module-level availability function.""" + assert isinstance(is_astgrep_processor_available(), bool) + + +@pytest.mark.skipif(not ASTGREP_AVAILABLE, reason="ast-grep-py not installed") +class TestAstGrepPythonProcessorParsing: + """Tests for Python parsing with ast-grep.""" + + def test_parse_simple_function(self): + """Test parsing a simple function definition.""" + processor = AstGrepPythonProcessor() + code = "def hello():\n pass" + result = processor.parse(code, Path("test.py")) + + assert result is not None + assert result.language == "python" + assert len(result.symbols) == 1 + assert result.symbols[0].name == "hello" + assert result.symbols[0].kind == "function" + + def test_parse_class(self): + """Test parsing a class definition.""" + processor = AstGrepPythonProcessor() + code = "class MyClass:\n pass" + result = processor.parse(code, Path("test.py")) + + assert result is not None + assert len(result.symbols) == 1 + assert result.symbols[0].name == "MyClass" + assert result.symbols[0].kind == "class" + + def test_parse_async_function(self): + """Test parsing an async function definition.""" + processor = AstGrepPythonProcessor() + code = "async def fetch_data():\n pass" + result = processor.parse(code, Path("test.py")) + + assert result is not None + assert len(result.symbols) == 1 + assert result.symbols[0].name == "fetch_data" + + def test_parse_class_with_inheritance(self): + """Test parsing class with inheritance.""" + processor = AstGrepPythonProcessor() + code = """ +class Base: + pass + +class Child(Base): + pass +""" + result = processor.parse(code, Path("test.py")) + + assert result is not None + names = [s.name for s in result.symbols] + assert "Base" in names + assert "Child" in names + + # Check inheritance relationship + inherits = [ + r for r in result.relationships + if r.relationship_type.value == "inherits" + ] + assert any(r.source_symbol == "Child" for r in inherits) + + def test_parse_imports(self): + """Test parsing import statements.""" + processor = AstGrepPythonProcessor() + code = """ +import os +from sys import path +""" + result = processor.parse(code, Path("test.py")) + + assert result is not None + imports = [ + r for r in result.relationships + if r.relationship_type.value == "imports" + ] + assert len(imports) >= 1 + targets = {r.target_symbol for r in imports} + assert "os" in targets + + def test_parse_function_calls(self): + """Test parsing function calls.""" + processor = AstGrepPythonProcessor() + code = """ +def main(): + print("hello") + len([1, 2, 3]) +""" + result = processor.parse(code, Path("test.py")) + + assert result is not None + calls = [ + r for r in result.relationships + if r.relationship_type.value == "calls" + ] + targets = {r.target_symbol for r in calls} + assert "print" in targets + assert "len" in targets + + def test_parse_empty_file(self): + """Test parsing an empty file.""" + processor = AstGrepPythonProcessor() + result = processor.parse("", Path("test.py")) + + assert result is not None + assert len(result.symbols) == 0 + + def test_parse_returns_indexed_file(self): + """Test that parse returns proper IndexedFile structure.""" + processor = AstGrepPythonProcessor() + code = "def test():\n pass" + result = processor.parse(code, Path("test.py")) + + assert result is not None + assert result.path.endswith("test.py") + assert result.language == "python" + assert isinstance(result.symbols, list) + assert isinstance(result.chunks, list) + assert isinstance(result.relationships, list) + + +@pytest.mark.skipif(not ASTGREP_AVAILABLE, reason="ast-grep-py not installed") +class TestAstGrepPythonProcessorRelationships: + """Tests for relationship extraction.""" + + def test_inheritance_extraction(self): + """Test extraction of inheritance relationships.""" + processor = AstGrepPythonProcessor() + code = """ +class Animal: + pass + +class Dog(Animal): + pass + +class Cat(Animal): + pass +""" + result = processor.parse(code, Path("test.py")) + + assert result is not None + inherits = [ + r for r in result.relationships + if r.relationship_type.value == "inherits" + ] + # Should have 2 inheritance relationships + assert len(inherits) >= 2 + sources = {r.source_symbol for r in inherits} + assert "Dog" in sources + assert "Cat" in sources + + def test_call_extraction_skips_self(self): + """Test that self.method() calls are filtered.""" + processor = AstGrepPythonProcessor() + code = """ +class Service: + def process(self): + self.internal() + external_call() + +def external_call(): + pass +""" + result = processor.parse(code, Path("test.py")) + + assert result is not None + calls = [ + r for r in result.relationships + if r.relationship_type.value == "calls" + ] + targets = {r.target_symbol for r in calls} + # self.internal should be filtered + assert "self.internal" not in targets + assert "external_call" in targets + + def test_import_with_alias_resolution(self): + """Test import alias resolution in calls.""" + processor = AstGrepPythonProcessor() + code = """ +import os.path as osp + +def main(): + osp.join("a", "b") +""" + result = processor.parse(code, Path("test.py")) + + assert result is not None + calls = [ + r for r in result.relationships + if r.relationship_type.value == "calls" + ] + targets = {r.target_symbol for r in calls} + # Should resolve osp to os.path + assert any("os.path" in t for t in targets) + + +@pytest.mark.skipif(not ASTGREP_AVAILABLE, reason="ast-grep-py not installed") +class TestAstGrepPythonProcessorRunAstGrep: + """Tests for run_ast_grep method.""" + + def test_run_ast_grep_returns_list(self): + """Test run_ast_grep returns a list.""" + processor = AstGrepPythonProcessor() + code = "def hello():\n pass" + processor._binding.parse(code) if processor._binding else None + + matches = processor.run_ast_grep(code, "def $NAME($$$PARAMS) $$$BODY") + assert isinstance(matches, list) + + def test_run_ast_grep_finds_matches(self): + """Test run_ast_grep finds expected matches.""" + processor = AstGrepPythonProcessor() + code = "def hello():\n pass" + + matches = processor.run_ast_grep(code, "def $NAME($$$PARAMS) $$$BODY") + assert len(matches) >= 1 + + def test_run_ast_grep_empty_code(self): + """Test run_ast_grep with empty code.""" + processor = AstGrepPythonProcessor() + matches = processor.run_ast_grep("", "def $NAME($$$PARAMS) $$$BODY") + assert matches == [] + + def test_run_ast_grep_no_matches(self): + """Test run_ast_grep when pattern doesn't match.""" + processor = AstGrepPythonProcessor() + code = "x = 1" + matches = processor.run_ast_grep(code, "class $NAME $$$BODY") + assert matches == [] + + +class TestAstGrepPythonProcessorFallback: + """Tests for fallback behavior when ast-grep unavailable.""" + + def test_parse_returns_none_when_unavailable(self): + """Test parse returns None when ast-grep unavailable.""" + # This test runs regardless of availability + # When unavailable, should gracefully return None + processor = AstGrepPythonProcessor() + if not processor.is_available(): + code = "def test():\n pass" + result = processor.parse(code, Path("test.py")) + assert result is None + + def test_run_ast_grep_empty_when_unavailable(self): + """Test run_ast_grep returns empty list when unavailable.""" + processor = AstGrepPythonProcessor() + if not processor.is_available(): + matches = processor.run_ast_grep("code", "pattern") + assert matches == [] + + +class TestBaseAstGrepProcessor: + """Tests for abstract base class.""" + + def test_cannot_instantiate_base_class(self): + """Test that BaseAstGrepProcessor cannot be instantiated directly.""" + with pytest.raises(TypeError): + BaseAstGrepProcessor("python") # type: ignore[abstract] + + def test_subclass_implements_abstract_methods(self): + """Test that AstGrepPythonProcessor implements all abstract methods.""" + processor = AstGrepPythonProcessor() + # Should have process_matches method + assert hasattr(processor, "process_matches") + # Should have parse method + assert hasattr(processor, "parse") + # Check methods are callable + assert callable(processor.process_matches) + assert callable(processor.parse) + + +class TestPatternIntegration: + """Tests for pattern module integration with processor.""" + + def test_processor_uses_pattern_module(self): + """Verify processor uses patterns from pattern module.""" + # The processor should import and use patterns from patterns/python/ + from codexlens.parsers.astgrep_processor import get_pattern + + # Verify pattern access works + assert get_pattern("class_def") is not None + assert get_pattern("func_def") is not None + + def test_pattern_consistency(self): + """Test pattern definitions are consistent.""" + # Patterns used by processor should exist in pattern module + patterns_needed = [ + "class_def", + "class_with_bases", + "func_def", + "async_func_def", + "import_stmt", + "import_from", + "call", + ] + for pattern_name in patterns_needed: + # Should not raise KeyError + pattern = get_pattern(pattern_name) + assert pattern is not None + assert len(pattern) > 0 diff --git a/codex-lens/tests/parsers/test_comparison.py b/codex-lens/tests/parsers/test_comparison.py new file mode 100644 index 00000000..06fa2b47 --- /dev/null +++ b/codex-lens/tests/parsers/test_comparison.py @@ -0,0 +1,526 @@ +"""Comparison tests for tree-sitter vs ast-grep Python relationship extraction. + +Validates that both parsers produce consistent output for Python relationship +extraction (INHERITS, CALL, IMPORTS). +""" + +from __future__ import annotations + +from pathlib import Path +from typing import List, Set, Tuple + +import pytest + +from codexlens.config import Config +from codexlens.entities import CodeRelationship, RelationshipType +from codexlens.parsers.treesitter_parser import TreeSitterSymbolParser + + +# Sample Python code for testing relationship extraction +SAMPLE_PYTHON_CODE = ''' +"""Module docstring.""" +import os +import sys +from typing import List, Dict, Optional +from collections import defaultdict as dd +from pathlib import Path as PPath + +class BaseClass: + """Base class.""" + + def base_method(self): + pass + + def another_method(self): + return self.base_method() + + +class Mixin: + """Mixin class.""" + + def mixin_func(self): + return "mixin" + + +class ChildClass(BaseClass, Mixin): + """Child class with multiple inheritance.""" + + def __init__(self): + super().__init__() + self.data = dd(list) + + def process(self, items: List[str]) -> Dict[str, int]: + result = {} + for item in items: + result[item] = len(item) + return result + + def call_external(self, path: str) -> Optional[str]: + p = PPath(path) + if p.exists(): + return str(p.read_text()) + return None + + +def standalone_function(): + """Standalone function.""" + data = [1, 2, 3] + return sum(data) + + +async def async_function(): + """Async function.""" + import asyncio + await asyncio.sleep(1) +''' + + +def relationship_to_tuple(rel: CodeRelationship) -> Tuple[str, str, str, int]: + """Convert relationship to a comparable tuple. + + Returns: + (source_symbol, target_symbol, relationship_type, source_line) + """ + return ( + rel.source_symbol, + rel.target_symbol, + rel.relationship_type.value, + rel.source_line, + ) + + +def extract_relationship_tuples( + relationships: List[CodeRelationship], +) -> Set[Tuple[str, str, str]]: + """Extract relationship tuples without line numbers for comparison. + + Returns: + Set of (source_symbol, target_symbol, relationship_type) tuples + """ + return { + (rel.source_symbol, rel.target_symbol, rel.relationship_type.value) + for rel in relationships + } + + +def filter_by_type( + relationships: List[CodeRelationship], + rel_type: RelationshipType, +) -> List[CodeRelationship]: + """Filter relationships by type.""" + return [r for r in relationships if r.relationship_type == rel_type] + + +class TestTreeSitterVsAstGrepComparison: + """Compare tree-sitter and ast-grep Python relationship extraction.""" + + @pytest.fixture + def sample_path(self, tmp_path: Path) -> Path: + """Create a temporary Python file with sample code.""" + py_file = tmp_path / "sample.py" + py_file.write_text(SAMPLE_PYTHON_CODE) + return py_file + + @pytest.fixture + def ts_parser_default(self) -> TreeSitterSymbolParser: + """Create tree-sitter parser with default config (use_astgrep=False).""" + config = Config() + assert config.use_astgrep is False + return TreeSitterSymbolParser("python", config=config) + + @pytest.fixture + def ts_parser_astgrep(self) -> TreeSitterSymbolParser: + """Create tree-sitter parser with ast-grep enabled.""" + config = Config() + config.use_astgrep = True + return TreeSitterSymbolParser("python", config=config) + + def test_parser_availability(self, ts_parser_default: TreeSitterSymbolParser) -> None: + """Test that tree-sitter parser is available.""" + assert ts_parser_default.is_available() + + def test_astgrep_processor_initialization( + self, ts_parser_astgrep: TreeSitterSymbolParser + ) -> None: + """Test that ast-grep processor is initialized when config enables it.""" + # The processor should be initialized (may be None if ast-grep-py not installed) + # This test just verifies the initialization path works + assert ts_parser_astgrep._config is not None + assert ts_parser_astgrep._config.use_astgrep is True + + def _skip_if_astgrep_unavailable( + self, ts_parser_astgrep: TreeSitterSymbolParser + ) -> None: + """Skip test if ast-grep is not available.""" + if ts_parser_astgrep._astgrep_processor is None: + pytest.skip("ast-grep-py not installed") + + def test_parse_returns_valid_result( + self, + ts_parser_default: TreeSitterSymbolParser, + sample_path: Path, + ) -> None: + """Test that parsing returns a valid IndexedFile.""" + source_code = sample_path.read_text() + result = ts_parser_default.parse(source_code, sample_path) + + assert result is not None + assert result.language == "python" + assert len(result.symbols) > 0 + assert len(result.relationships) > 0 + + def test_extracted_symbols_match( + self, + ts_parser_default: TreeSitterSymbolParser, + ts_parser_astgrep: TreeSitterSymbolParser, + sample_path: Path, + ) -> None: + """Test that both parsers extract similar symbols.""" + self._skip_if_astgrep_unavailable(ts_parser_astgrep) + + source_code = sample_path.read_text() + + result_ts = ts_parser_default.parse(source_code, sample_path) + result_astgrep = ts_parser_astgrep.parse(source_code, sample_path) + + assert result_ts is not None + assert result_astgrep is not None + + # Compare symbol names + ts_symbols = {s.name for s in result_ts.symbols} + astgrep_symbols = {s.name for s in result_astgrep.symbols} + + # Should have the same symbols (classes, functions, methods) + assert ts_symbols == astgrep_symbols + + def test_inheritance_relationships( + self, + ts_parser_default: TreeSitterSymbolParser, + ts_parser_astgrep: TreeSitterSymbolParser, + sample_path: Path, + ) -> None: + """Test INHERITS relationship extraction consistency.""" + self._skip_if_astgrep_unavailable(ts_parser_astgrep) + + source_code = sample_path.read_text() + + result_ts = ts_parser_default.parse(source_code, sample_path) + result_astgrep = ts_parser_astgrep.parse(source_code, sample_path) + + assert result_ts is not None + assert result_astgrep is not None + + # Extract inheritance relationships + ts_inherits = filter_by_type(result_ts.relationships, RelationshipType.INHERITS) + astgrep_inherits = filter_by_type( + result_astgrep.relationships, RelationshipType.INHERITS + ) + + ts_tuples = extract_relationship_tuples(ts_inherits) + astgrep_tuples = extract_relationship_tuples(astgrep_inherits) + + # Both should detect ChildClass(BaseClass, Mixin) + assert ts_tuples == astgrep_tuples + + # Verify specific inheritance relationships + expected_inherits = { + ("ChildClass", "BaseClass", "inherits"), + ("ChildClass", "Mixin", "inherits"), + } + assert ts_tuples == expected_inherits + + def test_import_relationships( + self, + ts_parser_default: TreeSitterSymbolParser, + ts_parser_astgrep: TreeSitterSymbolParser, + sample_path: Path, + ) -> None: + """Test IMPORTS relationship extraction consistency.""" + self._skip_if_astgrep_unavailable(ts_parser_astgrep) + + source_code = sample_path.read_text() + + result_ts = ts_parser_default.parse(source_code, sample_path) + result_astgrep = ts_parser_astgrep.parse(source_code, sample_path) + + assert result_ts is not None + assert result_astgrep is not None + + # Extract import relationships + ts_imports = filter_by_type(result_ts.relationships, RelationshipType.IMPORTS) + astgrep_imports = filter_by_type( + result_astgrep.relationships, RelationshipType.IMPORTS + ) + + ts_tuples = extract_relationship_tuples(ts_imports) + astgrep_tuples = extract_relationship_tuples(astgrep_imports) + + # Compare - should be similar (may differ in exact module representation) + # At minimum, both should detect the top-level imports + ts_modules = {t[1].split(".")[0] for t in ts_tuples} + astgrep_modules = {t[1].split(".")[0] for t in astgrep_tuples} + + # Should have imports from: os, sys, typing, collections, pathlib + expected_modules = {"os", "sys", "typing", "collections", "pathlib", "asyncio"} + assert ts_modules >= expected_modules or astgrep_modules >= expected_modules + + def test_call_relationships( + self, + ts_parser_default: TreeSitterSymbolParser, + ts_parser_astgrep: TreeSitterSymbolParser, + sample_path: Path, + ) -> None: + """Test CALL relationship extraction consistency.""" + self._skip_if_astgrep_unavailable(ts_parser_astgrep) + + source_code = sample_path.read_text() + + result_ts = ts_parser_default.parse(source_code, sample_path) + result_astgrep = ts_parser_astgrep.parse(source_code, sample_path) + + assert result_ts is not None + assert result_astgrep is not None + + # Extract call relationships + ts_calls = filter_by_type(result_ts.relationships, RelationshipType.CALL) + astgrep_calls = filter_by_type( + result_astgrep.relationships, RelationshipType.CALL + ) + + # Calls may differ due to scope tracking differences + # Just verify both parsers find call relationships + assert len(ts_calls) > 0 + assert len(astgrep_calls) > 0 + + # Verify specific calls that should be detected + ts_call_targets = {r.target_symbol for r in ts_calls} + astgrep_call_targets = {r.target_symbol for r in astgrep_calls} + + # Both should detect at least some common calls + # (exact match not required due to scope tracking differences) + common_targets = ts_call_targets & astgrep_call_targets + assert len(common_targets) > 0 + + def test_relationship_count_similarity( + self, + ts_parser_default: TreeSitterSymbolParser, + ts_parser_astgrep: TreeSitterSymbolParser, + sample_path: Path, + ) -> None: + """Test that relationship counts are similar (>95% consistency).""" + self._skip_if_astgrep_unavailable(ts_parser_astgrep) + + source_code = sample_path.read_text() + + result_ts = ts_parser_default.parse(source_code, sample_path) + result_astgrep = ts_parser_astgrep.parse(source_code, sample_path) + + assert result_ts is not None + assert result_astgrep is not None + + ts_count = len(result_ts.relationships) + astgrep_count = len(result_astgrep.relationships) + + # Calculate consistency percentage + if max(ts_count, astgrep_count) == 0: + consistency = 100.0 + else: + consistency = ( + min(ts_count, astgrep_count) / max(ts_count, astgrep_count) * 100 + ) + + # Require >95% consistency + assert consistency >= 95.0, ( + f"Relationship consistency {consistency:.1f}% below 95% threshold " + f"(tree-sitter: {ts_count}, ast-grep: {astgrep_count})" + ) + + def test_config_switch_affects_parser( + self, sample_path: Path + ) -> None: + """Test that config.use_astgrep affects which parser is used.""" + config_default = Config() + config_astgrep = Config() + config_astgrep.use_astgrep = True + + parser_default = TreeSitterSymbolParser("python", config=config_default) + parser_astgrep = TreeSitterSymbolParser("python", config=config_astgrep) + + # Default parser should not have ast-grep processor + assert parser_default._astgrep_processor is None + + # Ast-grep parser may have processor if ast-grep-py is installed + # (could be None if not installed, which is fine) + if parser_astgrep._astgrep_processor is not None: + # If available, verify it's the right type + from codexlens.parsers.astgrep_processor import AstGrepPythonProcessor + + assert isinstance( + parser_astgrep._astgrep_processor, AstGrepPythonProcessor + ) + + def test_fallback_to_treesitter_on_astgrep_failure( + self, + ts_parser_astgrep: TreeSitterSymbolParser, + sample_path: Path, + ) -> None: + """Test that parser falls back to tree-sitter if ast-grep fails.""" + source_code = sample_path.read_text() + + # Even with use_astgrep=True, should get valid results + result = ts_parser_astgrep.parse(source_code, sample_path) + + # Should always return a valid result (either from ast-grep or tree-sitter fallback) + assert result is not None + assert result.language == "python" + assert len(result.relationships) > 0 + + +class TestSimpleCodeSamples: + """Test with simple code samples for precise comparison.""" + + def test_simple_inheritance(self) -> None: + """Test simple single inheritance.""" + code = """ +class Parent: + pass + +class Child(Parent): + pass +""" + self._compare_parsers(code, expected_inherits={("Child", "Parent")}) + + def test_multiple_inheritance(self) -> None: + """Test multiple inheritance.""" + code = """ +class A: + pass + +class B: + pass + +class C(A, B): + pass +""" + self._compare_parsers( + code, expected_inherits={("C", "A"), ("C", "B")} + ) + + def test_simple_imports(self) -> None: + """Test simple import statements.""" + code = """ +import os +import sys +""" + config_ts = Config() + config_ag = Config() + config_ag.use_astgrep = True + + parser_ts = TreeSitterSymbolParser("python", config=config_ts) + parser_ag = TreeSitterSymbolParser("python", config=config_ag) + + tmp_path = Path("test.py") + result_ts = parser_ts.parse(code, tmp_path) + result_ag = parser_ag.parse(code, tmp_path) + + assert result_ts is not None + # ast-grep result may be None if not installed + + if result_ag is not None: + ts_imports = { + r.target_symbol + for r in result_ts.relationships + if r.relationship_type == RelationshipType.IMPORTS + } + ag_imports = { + r.target_symbol + for r in result_ag.relationships + if r.relationship_type == RelationshipType.IMPORTS + } + assert ts_imports == ag_imports + + def test_imports_inside_function(self) -> None: + """Test simple import inside a function scope is recorded. + + Note: tree-sitter parser requires a scope to record imports. + Module-level imports without any function/class are not recorded + because scope_stack is empty at module level. + """ + code = """ +def my_function(): + import collections + return collections +""" + config_ts = Config() + config_ag = Config() + config_ag.use_astgrep = True + + parser_ts = TreeSitterSymbolParser("python", config=config_ts) + parser_ag = TreeSitterSymbolParser("python", config=config_ag) + + tmp_path = Path("test.py") + result_ts = parser_ts.parse(code, tmp_path) + result_ag = parser_ag.parse(code, tmp_path) + + assert result_ts is not None + + # Get import relationship targets + ts_imports = [ + r.target_symbol + for r in result_ts.relationships + if r.relationship_type == RelationshipType.IMPORTS + ] + + # Should have collections + ts_has_collections = any("collections" in t for t in ts_imports) + assert ts_has_collections, f"Expected collections import, got: {ts_imports}" + + # If ast-grep is available, verify it also finds the imports + if result_ag is not None: + ag_imports = [ + r.target_symbol + for r in result_ag.relationships + if r.relationship_type == RelationshipType.IMPORTS + ] + ag_has_collections = any("collections" in t for t in ag_imports) + assert ag_has_collections, f"Expected collections import in ast-grep, got: {ag_imports}" + + def _compare_parsers( + self, + code: str, + expected_inherits: Set[Tuple[str, str]], + ) -> None: + """Helper to compare parser outputs for inheritance.""" + config_ts = Config() + config_ag = Config() + config_ag.use_astgrep = True + + parser_ts = TreeSitterSymbolParser("python", config=config_ts) + parser_ag = TreeSitterSymbolParser("python", config=config_ag) + + tmp_path = Path("test.py") + result_ts = parser_ts.parse(code, tmp_path) + + assert result_ts is not None + + # Verify tree-sitter finds expected inheritance + ts_inherits = { + (r.source_symbol, r.target_symbol) + for r in result_ts.relationships + if r.relationship_type == RelationshipType.INHERITS + } + assert ts_inherits == expected_inherits + + # If ast-grep is available, verify it matches + result_ag = parser_ag.parse(code, tmp_path) + if result_ag is not None: + ag_inherits = { + (r.source_symbol, r.target_symbol) + for r in result_ag.relationships + if r.relationship_type == RelationshipType.INHERITS + } + assert ag_inherits == expected_inherits + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/codex-lens/tests/test_astgrep_binding.py b/codex-lens/tests/test_astgrep_binding.py new file mode 100644 index 00000000..7a154845 --- /dev/null +++ b/codex-lens/tests/test_astgrep_binding.py @@ -0,0 +1,191 @@ +"""Tests for ast-grep binding module. + +Verifies basic import and functionality of AstGrepBinding. +Run with: python -m pytest tests/test_astgrep_binding.py -v +""" + +from __future__ import annotations + +import pytest +from pathlib import Path + + +class TestAstGrepBindingAvailability: + """Test availability checks.""" + + def test_is_astgrep_available_function(self): + """Test is_astgrep_available function returns boolean.""" + from codexlens.parsers.astgrep_binding import is_astgrep_available + result = is_astgrep_available() + assert isinstance(result, bool) + + def test_get_supported_languages(self): + """Test get_supported_languages returns expected languages.""" + from codexlens.parsers.astgrep_binding import get_supported_languages + languages = get_supported_languages() + assert isinstance(languages, list) + assert "python" in languages + assert "javascript" in languages + assert "typescript" in languages + + +class TestAstGrepBindingInit: + """Test AstGrepBinding initialization.""" + + def test_init_python(self): + """Test initialization with Python language.""" + from codexlens.parsers.astgrep_binding import AstGrepBinding + binding = AstGrepBinding("python") + assert binding.language_id == "python" + + def test_init_typescript_with_tsx(self): + """Test TSX detection from file extension.""" + from codexlens.parsers.astgrep_binding import AstGrepBinding + binding = AstGrepBinding("typescript", Path("component.tsx")) + assert binding.language_id == "typescript" + + def test_is_available_returns_boolean(self): + """Test is_available returns boolean.""" + from codexlens.parsers.astgrep_binding import AstGrepBinding + binding = AstGrepBinding("python") + result = binding.is_available() + assert isinstance(result, bool) + + +def _is_astgrep_installed(): + """Check if ast-grep-py is installed.""" + try: + import ast_grep_py # noqa: F401 + return True + except ImportError: + return False + + +@pytest.mark.skipif( + not _is_astgrep_installed(), + reason="ast-grep-py not installed" +) +class TestAstGrepBindingWithAstGrep: + """Tests that require ast-grep-py to be installed.""" + + def test_parse_simple_python(self): + """Test parsing simple Python code.""" + from codexlens.parsers.astgrep_binding import AstGrepBinding + binding = AstGrepBinding("python") + + if not binding.is_available(): + pytest.skip("ast-grep not available") + + source = "x = 1" + result = binding.parse(source) + assert result is True + + def test_find_inheritance(self): + """Test finding class inheritance.""" + from codexlens.parsers.astgrep_binding import AstGrepBinding + binding = AstGrepBinding("python") + + if not binding.is_available(): + pytest.skip("ast-grep not available") + + source = """ +class MyClass(BaseClass): + pass +""" + binding.parse(source) + results = binding.find_inheritance() + assert len(results) >= 0 # May or may not find depending on pattern match + + def test_find_calls(self): + """Test finding function calls.""" + from codexlens.parsers.astgrep_binding import AstGrepBinding + binding = AstGrepBinding("python") + + if not binding.is_available(): + pytest.skip("ast-grep not available") + + source = """ +def foo(): + bar() + baz.qux() +""" + binding.parse(source) + results = binding.find_calls() + assert isinstance(results, list) + + def test_find_imports(self): + """Test finding import statements.""" + from codexlens.parsers.astgrep_binding import AstGrepBinding + binding = AstGrepBinding("python") + + if not binding.is_available(): + pytest.skip("ast-grep not available") + + source = """ +import os +from typing import List +""" + binding.parse(source) + results = binding.find_imports() + assert isinstance(results, list) + + +def test_basic_import(): + """Test that the module can be imported.""" + try: + from codexlens.parsers.astgrep_binding import ( + AstGrepBinding, + is_astgrep_available, + get_supported_languages, + ASTGREP_AVAILABLE, + ) + assert True + except ImportError as e: + pytest.fail(f"Failed to import astgrep_binding: {e}") + + +def test_availability_flag(): + """Test ASTGREP_AVAILABLE flag is defined.""" + from codexlens.parsers.astgrep_binding import ASTGREP_AVAILABLE + assert isinstance(ASTGREP_AVAILABLE, bool) + + +if __name__ == "__main__": + # Run basic verification + print("Testing astgrep_binding module...") + + from codexlens.parsers.astgrep_binding import ( + AstGrepBinding, + is_astgrep_available, + get_supported_languages, + ) + + print(f"ast-grep available: {is_astgrep_available()}") + print(f"Supported languages: {get_supported_languages()}") + + binding = AstGrepBinding("python") + print(f"Python binding available: {binding.is_available()}") + + if binding.is_available(): + test_code = """ +import os +from typing import List + +class MyClass(BaseClass): + def method(self): + self.helper() + external_func() + +def helper(): + pass +""" + binding.parse(test_code) + print(f"Inheritance found: {binding.find_inheritance()}") + print(f"Calls found: {binding.find_calls()}") + print(f"Imports found: {binding.find_imports()}") + else: + print("Note: ast-grep-py not installed. To install:") + print(" pip install ast-grep-py") + print(" Note: May have compatibility issues with Python 3.13") + + print("Basic verification complete!")