Add comprehensive tests for ast-grep and tree-sitter relationship extraction

- Introduced test suite for AstGrepPythonProcessor covering pattern definitions, parsing, and relationship extraction. - Added comparison tests between tree-sitter and ast-grep for consistency in relationship extraction. - Implemented tests for ast-grep binding module to verify functionality and availability. - Ensured tests cover various scenarios including inheritance, function calls, and imports.
2026-02-28 09:23:08 +08:00 · 2026-02-15 21:14:14 +08:00
parent 126a357aa2
commit 48a6a1f2aa
56 changed files with 10622 additions and 374 deletions
--- a/ccw/src/commands/core-memory.ts
+++ b/ccw/src/commands/core-memory.ts
@@ -35,6 +35,10 @@ interface CommandOptions {
  delete?: boolean;
  merge?: string;
  dedup?: boolean;
+  unified?: boolean;
+  topK?: string;
+  minScore?: string;
+  category?: string;
 }

 /**
@@ -844,6 +848,114 @@ async function jobsAction(options: CommandOptions): Promise<void> {
  }
 }

+/**
+ * Unified vector+FTS search across all memory stores
+ */
+async function unifiedSearchAction(keyword: string, options: CommandOptions): Promise<void> {
+  if (!keyword || keyword.trim() === '') {
+    console.error(chalk.red('Error: Query is required'));
+    console.error(chalk.gray('Usage: ccw core-memory search --unified <query> [--topK 20] [--minScore 0] [--category <cat>]'));
+    process.exit(1);
+  }
+
+  try {
+    const { UnifiedMemoryService } = await import('../core/unified-memory-service.js');
+    const service = new UnifiedMemoryService(getProjectPath());
+
+    const topK = parseInt(options.topK || '20', 10);
+    const minScore = parseFloat(options.minScore || '0');
+    const category = options.category || undefined;
+
+    console.log(chalk.cyan(`\n  Unified search: "${keyword}" (topK=${topK}, minScore=${minScore})\n`));
+
+    const results = await service.search(keyword, {
+      limit: topK,
+      minScore,
+      category: category as any,
+    });
+
+    if (results.length === 0) {
+      console.log(chalk.yellow('  No results found.\n'));
+      return;
+    }
+
+    if (options.json) {
+      console.log(JSON.stringify({ query: keyword, total: results.length, results }, null, 2));
+      return;
+    }
+
+    console.log(chalk.gray('  -----------------------------------------------------------------------'));
+
+    for (const result of results) {
+      const sources: string[] = [];
+      if (result.rank_sources.vector_rank) sources.push(`vec:#${result.rank_sources.vector_rank}`);
+      if (result.rank_sources.fts_rank) sources.push(`fts:#${result.rank_sources.fts_rank}`);
+      if (result.rank_sources.heat_score) sources.push(`heat:${result.rank_sources.heat_score.toFixed(1)}`);
+
+      const snippet = result.content.substring(0, 120).replace(/\n/g, ' ');
+
+      console.log(
+        chalk.cyan(`  ${result.source_id}`) +
+        chalk.gray(` [${result.source_type}/${result.category}]`) +
+        chalk.white(` score=${result.score.toFixed(4)}`)
+      );
+      console.log(chalk.gray(`    Sources: ${sources.join(' | ')}`));
+      console.log(chalk.white(`    ${snippet}${result.content.length > 120 ? '...' : ''}`));
+      console.log(chalk.gray('  -----------------------------------------------------------------------'));
+    }
+
+    console.log(chalk.gray(`\n  Total: ${results.length}\n`));
+
+  } catch (error) {
+    console.error(chalk.red(`Error: ${(error as Error).message}`));
+    process.exit(1);
+  }
+}
+
+/**
+ * Rebuild the unified HNSW vector index from scratch
+ */
+async function reindexAction(options: CommandOptions): Promise<void> {
+  try {
+    const { UnifiedVectorIndex, isUnifiedEmbedderAvailable } = await import('../core/unified-vector-index.js');
+
+    if (!isUnifiedEmbedderAvailable()) {
+      console.error(chalk.red('Error: Unified embedder is not available.'));
+      console.error(chalk.gray('Ensure Python venv and embedder script are set up.'));
+      process.exit(1);
+    }
+
+    const index = new UnifiedVectorIndex(getProjectPath());
+
+    console.log(chalk.cyan('\n  Rebuilding unified vector index...\n'));
+
+    const result = await index.reindexAll();
+
+    if (!result.success) {
+      console.error(chalk.red(`  Reindex failed: ${result.error}\n`));
+      process.exit(1);
+    }
+
+    if (options.json) {
+      console.log(JSON.stringify(result, null, 2));
+      return;
+    }
+
+    console.log(chalk.green('  Reindex complete.'));
+    if (result.hnsw_count !== undefined) {
+      console.log(chalk.white(`  HNSW vectors: ${result.hnsw_count}`));
+    }
+    if (result.elapsed_time !== undefined) {
+      console.log(chalk.white(`  Elapsed: ${result.elapsed_time.toFixed(2)}s`));
+    }
+    console.log();
+
+  } catch (error) {
+    console.error(chalk.red(`Error: ${(error as Error).message}`));
+    process.exit(1);
+  }
+}
+
 /**
 * Core Memory command entry point
 */
@@ -889,7 +1001,11 @@ export async function coreMemoryCommand(
      break;

    case 'search':
-      await searchAction(textArg, options);
+      if (options.unified) {
+        await unifiedSearchAction(textArg, options);
+      } else {
+        await searchAction(textArg, options);
+      }
      break;

    case 'projects':
@@ -921,6 +1037,10 @@ export async function coreMemoryCommand(
      await jobsAction(options);
      break;

+    case 'reindex':
+      await reindexAction(options);
+      break;
+
    default:
      console.log(chalk.bold.cyan('\n  CCW Core Memory\n'));
      console.log('  Manage core memory entries and session clusters.\n');
@@ -945,12 +1065,14 @@ export async function coreMemoryCommand(
      console.log(chalk.white('    context                     ') + chalk.gray('Get progressive index'));
      console.log(chalk.white('    load-cluster <id>           ') + chalk.gray('Load cluster context'));
      console.log(chalk.white('    search <keyword>            ') + chalk.gray('Search sessions'));
+      console.log(chalk.white('    search --unified <query>    ') + chalk.gray('Unified vector+FTS search'));
      console.log();
      console.log(chalk.bold('  Memory V2 Pipeline:'));
      console.log(chalk.white('    extract                     ') + chalk.gray('Run batch memory extraction'));
      console.log(chalk.white('    extract-status              ') + chalk.gray('Show extraction pipeline status'));
      console.log(chalk.white('    consolidate                 ') + chalk.gray('Run memory consolidation'));
      console.log(chalk.white('    jobs                        ') + chalk.gray('List all pipeline jobs'));
+      console.log(chalk.white('    reindex                     ') + chalk.gray('Rebuild unified vector index'));
      console.log();
      console.log(chalk.bold('  Options:'));
      console.log(chalk.gray('    --id <id>                   Memory ID (for export/summary)'));
--- a/ccw/src/commands/hook.ts
+++ b/ccw/src/commands/hook.ts
@@ -12,7 +12,7 @@ interface HookOptions {
  stdin?: boolean;
  sessionId?: string;
  prompt?: string;
-  type?: 'session-start' | 'context';
+  type?: 'session-start' | 'context' | 'session-end';
  path?: string;
 }

@@ -95,10 +95,32 @@ function getProjectPath(hookCwd?: string): string {
  return hookCwd || process.cwd();
 }

+/**
+ * Check if UnifiedContextBuilder is available (embedder dependencies present).
+ * Returns the builder instance or null if not available.
+ */
+async function tryCreateContextBuilder(projectPath: string): Promise<any | null> {
+  try {
+    const { isUnifiedEmbedderAvailable } = await import('../core/unified-vector-index.js');
+    if (!isUnifiedEmbedderAvailable()) {
+      return null;
+    }
+    const { UnifiedContextBuilder } = await import('../core/unified-context-builder.js');
+    return new UnifiedContextBuilder(projectPath);
+  } catch {
+    return null;
+  }
+}
+
 /**
 * Session context action - provides progressive context loading
- * First prompt: returns session overview with clusters
- * Subsequent prompts: returns intent-matched sessions
+ *
+ * Uses UnifiedContextBuilder when available (embedder present):
+ *   - session-start: MEMORY.md summary + clusters + hot entities + patterns
+ *   - per-prompt: vector search across all memory categories
+ *
+ * Falls back to SessionClusteringService.getProgressiveIndex() when
+ * the embedder is unavailable, preserving backward compatibility.
 */
 async function sessionContextAction(options: HookOptions): Promise<void> {
  let { stdin, sessionId, prompt } = options;
@@ -154,29 +176,43 @@ async function sessionContextAction(options: HookOptions): Promise<void> {
    let contextType: 'session-start' | 'context';
    let content = '';

-    // Dynamic import to avoid circular dependencies
-    const { SessionClusteringService } = await import('../core/session-clustering-service.js');
-    const clusteringService = new SessionClusteringService(projectPath);
+    // Try UnifiedContextBuilder first; fall back to getProgressiveIndex
+    const contextBuilder = await tryCreateContextBuilder(projectPath);

-    if (isFirstPrompt) {
-      // First prompt: return session overview with clusters
-      contextType = 'session-start';
-      content = await clusteringService.getProgressiveIndex({
-        type: 'session-start',
-        sessionId
-      });
-    } else if (prompt && prompt.trim().length > 0) {
-      // Subsequent prompts with content: return intent-matched sessions
-      contextType = 'context';
-      content = await clusteringService.getProgressiveIndex({
-        type: 'context',
-        sessionId,
-        prompt
-      });
+    if (contextBuilder) {
+      // Use UnifiedContextBuilder
+      if (isFirstPrompt) {
+        contextType = 'session-start';
+        content = await contextBuilder.buildSessionStartContext();
+      } else if (prompt && prompt.trim().length > 0) {
+        contextType = 'context';
+        content = await contextBuilder.buildPromptContext(prompt);
+      } else {
+        contextType = 'context';
+        content = '';
+      }
    } else {
-      // Subsequent prompts without content: return minimal context
-      contextType = 'context';
-      content = ''; // No context needed for empty prompts
+      // Fallback: use legacy SessionClusteringService.getProgressiveIndex()
+      const { SessionClusteringService } = await import('../core/session-clustering-service.js');
+      const clusteringService = new SessionClusteringService(projectPath);
+
+      if (isFirstPrompt) {
+        contextType = 'session-start';
+        content = await clusteringService.getProgressiveIndex({
+          type: 'session-start',
+          sessionId
+        });
+      } else if (prompt && prompt.trim().length > 0) {
+        contextType = 'context';
+        content = await clusteringService.getProgressiveIndex({
+          type: 'context',
+          sessionId,
+          prompt
+        });
+      } else {
+        contextType = 'context';
+        content = '';
+      }
    }

    if (stdin) {
@@ -194,6 +230,7 @@ async function sessionContextAction(options: HookOptions): Promise<void> {
    console.log(chalk.cyan('Type:'), contextType);
    console.log(chalk.cyan('First Prompt:'), isFirstPrompt ? 'Yes' : 'No');
    console.log(chalk.cyan('Load Count:'), newState.loadCount);
+    console.log(chalk.cyan('Builder:'), contextBuilder ? 'UnifiedContextBuilder' : 'Legacy (getProgressiveIndex)');
    console.log(chalk.gray('─'.repeat(40)));
    if (content) {
      console.log(content);
@@ -210,6 +247,81 @@ async function sessionContextAction(options: HookOptions): Promise<void> {
  }
 }

+/**
+ * Session end action - triggers async background tasks for memory maintenance.
+ *
+ * Tasks executed:
+ *   1. Incremental vector embedding (index new/updated content)
+ *   2. Incremental clustering (cluster unclustered sessions)
+ *   3. Heat score updates (recalculate entity heat scores)
+ *
+ * All tasks run best-effort; failures are logged but do not affect exit code.
+ */
+async function sessionEndAction(options: HookOptions): Promise<void> {
+  let { stdin, sessionId } = options;
+  let hookCwd: string | undefined;
+
+  if (stdin) {
+    try {
+      const stdinData = await readStdin();
+      if (stdinData) {
+        const hookData = JSON.parse(stdinData) as HookData;
+        sessionId = hookData.session_id || sessionId;
+        hookCwd = hookData.cwd;
+      }
+    } catch {
+      // Silently continue if stdin parsing fails
+    }
+  }
+
+  if (!sessionId) {
+    if (!stdin) {
+      console.error(chalk.red('Error: --session-id is required'));
+    }
+    process.exit(stdin ? 0 : 1);
+  }
+
+  try {
+    const projectPath = getProjectPath(hookCwd);
+    const contextBuilder = await tryCreateContextBuilder(projectPath);
+
+    if (!contextBuilder) {
+      // UnifiedContextBuilder not available - skip session-end tasks
+      if (!stdin) {
+        console.log(chalk.gray('(UnifiedContextBuilder not available, skipping session-end tasks)'));
+      }
+      process.exit(0);
+    }
+
+    const tasks: Array<{ name: string; execute: () => Promise<void> }> = contextBuilder.buildSessionEndTasks(sessionId);
+
+    if (!stdin) {
+      console.log(chalk.green(`Session End: executing ${tasks.length} background tasks...`));
+    }
+
+    // Execute all tasks concurrently (best-effort)
+    const results = await Promise.allSettled(
+      tasks.map((task: { name: string; execute: () => Promise<void> }) => task.execute())
+    );
+
+    if (!stdin) {
+      for (let i = 0; i < tasks.length; i++) {
+        const status = results[i].status === 'fulfilled' ? 'OK' : 'FAIL';
+        const color = status === 'OK' ? chalk.green : chalk.yellow;
+        console.log(color(`  [${status}] ${tasks[i].name}`));
+      }
+    }
+
+    process.exit(0);
+  } catch (error) {
+    if (stdin) {
+      process.exit(0);
+    }
+    console.error(chalk.red(`Error: ${(error as Error).message}`));
+    process.exit(1);
+  }
+}
+
 /**
 * Parse CCW status.json and output formatted status
 */
@@ -311,6 +423,7 @@ ${chalk.bold('USAGE')}
 ${chalk.bold('SUBCOMMANDS')}
  parse-status      Parse CCW status.json and display current/next command
  session-context   Progressive session context loading (replaces curl/bash hook)
+  session-end       Trigger background memory maintenance tasks
  notify            Send notification to ccw view dashboard

 ${chalk.bold('OPTIONS')}
@@ -363,6 +476,9 @@ export async function hookCommand(
    case 'context':
      await sessionContextAction(options);
      break;
+    case 'session-end':
+      await sessionEndAction(options);
+      break;
    case 'notify':
      await notifyAction(options);
      break;