feat: 添加钩子命令，简化 Claude Code 钩子操作接口，支持会话上下文加载和通知功能

2026-02-13 02:41:50 +08:00 · 2025-12-21 23:28:19 +08:00
parent 6d3f10d1d7
commit 210f0f1012
7 changed files with 432 additions and 11 deletions
--- a/ccw/scripts/memory_embedder.py
+++ b/ccw/scripts/memory_embedder.py
@@ -26,7 +26,7 @@ except ImportError:
    sys.exit(1)
 try:
-    from codexlens.semantic.embedder import get_embedder
+    from codexlens.semantic.embedder import get_embedder, clear_embedder_cache
 except ImportError:
    print("Error: CodexLens not found. Install with: pip install codexlens[semantic]", file=sys.stderr)
    sys.exit(1)
@@ -46,8 +46,15 @@ class MemoryEmbedder:
        self.conn = sqlite3.connect(str(self.db_path))
        self.conn.row_factory = sqlite3.Row
-        # Initialize embedder (uses cached singleton)
+        # Lazy-load embedder to avoid ~0.8s model loading for status command
-        self.embedder = get_embedder(profile="code")
+        self._embedder = None
    @property
    def embedder(self):
        """Lazy-load the embedder on first access."""
        if self._embedder is None:
            self._embedder = get_embedder(profile="code")
        return self._embedder
    def close(self):
        """Close database connection."""
@@ -348,9 +355,21 @@ def main():
        # Exit with error code if operation failed
        if "success" in result and not result["success"]:
            # Clean up ONNX resources before exit
            clear_embedder_cache()
            sys.exit(1)
        # Clean up ONNX resources to ensure process can exit cleanly
        # This releases fastembed/ONNX Runtime threads that would otherwise
        # prevent the Python interpreter from shutting down
        clear_embedder_cache()
    except Exception as e:
        # Clean up ONNX resources even on error
        try:
            clear_embedder_cache()
        except Exception:
            pass
        print(json.dumps({
            "success": False,
            "error": str(e)
--- a/ccw/src/cli.ts
+++ b/ccw/src/cli.ts
@@ -11,6 +11,7 @@ import { sessionCommand } from './commands/session.js';
 import { cliCommand } from './commands/cli.js';
 import { memoryCommand } from './commands/memory.js';
 import { coreMemoryCommand } from './commands/core-memory.js';
 import { hookCommand } from './commands/hook.js';
 import { readFileSync, existsSync } from 'fs';
 import { fileURLToPath } from 'url';
 import { dirname, join } from 'path';
@@ -229,5 +230,15 @@ export function run(argv: string[]): void {
    .option('--prefix <prefix>', 'Add prefix to imported memory IDs')
    .action((subcommand, args, options) => coreMemoryCommand(subcommand, args, options));
  // Hook command - CLI endpoint for Claude Code hooks
  program
    .command('hook [subcommand] [args...]')
    .description('CLI endpoint for Claude Code hooks (session-context, notify)')
    .option('--stdin', 'Read input from stdin (for Claude Code hooks)')
    .option('--session-id <id>', 'Session ID')
    .option('--prompt <text>', 'Prompt text')
    .option('--type <type>', 'Context type: session-start, context')
    .action((subcommand, args, options) => hookCommand(subcommand, args, options));
  program.parse(argv);
 }
--- a/ccw/src/commands/hook.ts
+++ b/ccw/src/commands/hook.ts
@@ -0,0 +1,315 @@
 /**
 * Hook Command - CLI endpoint for Claude Code hooks
 * Provides simplified interface for hook operations, replacing complex bash/curl commands
 */
 import chalk from 'chalk';
 import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'fs';
 import { join, dirname } from 'path';
 import { tmpdir } from 'os';
 interface HookOptions {
  stdin?: boolean;
  sessionId?: string;
  prompt?: string;
  type?: 'session-start' | 'context';
 }
 interface HookData {
  session_id?: string;
  prompt?: string;
  cwd?: string;
  tool_input?: Record<string, unknown>;
 }
 interface SessionState {
  firstLoad: string;
  loadCount: number;
  lastPrompt?: string;
 }
 /**
 * Read JSON data from stdin (for Claude Code hooks)
 */
 async function readStdin(): Promise<string> {
  return new Promise((resolve) => {
    let data = '';
    process.stdin.setEncoding('utf8');
    process.stdin.on('readable', () => {
      let chunk;
      while ((chunk = process.stdin.read()) !== null) {
        data += chunk;
      }
    });
    process.stdin.on('end', () => {
      resolve(data);
    });
    // Handle case where stdin is empty or not piped
    if (process.stdin.isTTY) {
      resolve('');
    }
  });
 }
 /**
 * Get session state file path
 */
 function getSessionStateFile(sessionId: string): string {
  const stateDir = join(tmpdir(), '.ccw-sessions');
  if (!existsSync(stateDir)) {
    mkdirSync(stateDir, { recursive: true });
  }
  return join(stateDir, `session-${sessionId}.json`);
 }
 /**
 * Load session state from file
 */
 function loadSessionState(sessionId: string): SessionState | null {
  const stateFile = getSessionStateFile(sessionId);
  if (!existsSync(stateFile)) {
    return null;
  }
  try {
    const content = readFileSync(stateFile, 'utf-8');
    return JSON.parse(content) as SessionState;
  } catch {
    return null;
  }
 }
 /**
 * Save session state to file
 */
 function saveSessionState(sessionId: string, state: SessionState): void {
  const stateFile = getSessionStateFile(sessionId);
  writeFileSync(stateFile, JSON.stringify(state, null, 2));
 }
 /**
 * Get project path from hook data or current working directory
 */
 function getProjectPath(hookCwd?: string): string {
  return hookCwd || process.cwd();
 }
 /**
 * Session context action - provides progressive context loading
 * First prompt: returns session overview with clusters
 * Subsequent prompts: returns intent-matched sessions
 */
 async function sessionContextAction(options: HookOptions): Promise<void> {
  let { stdin, sessionId, prompt } = options;
  let hookCwd: string | undefined;
  // If --stdin flag is set, read from stdin (Claude Code hook format)
  if (stdin) {
    try {
      const stdinData = await readStdin();
      if (stdinData) {
        const hookData = JSON.parse(stdinData) as HookData;
        sessionId = hookData.session_id || sessionId;
        hookCwd = hookData.cwd;
        prompt = hookData.prompt || prompt;
      }
    } catch {
      // Silently continue if stdin parsing fails
    }
  }
  if (!sessionId) {
    if (!stdin) {
      console.error(chalk.red('Error: --session-id is required'));
      console.error(chalk.gray('Usage: ccw hook session-context --session-id <id>'));
      console.error(chalk.gray('       ccw hook session-context --stdin'));
    }
    process.exit(stdin ? 0 : 1);
  }
  try {
    const projectPath = getProjectPath(hookCwd);
    // Load existing session state
    const existingState = loadSessionState(sessionId);
    const isFirstPrompt = !existingState;
    // Update session state
    const newState: SessionState = isFirstPrompt
      ? {
          firstLoad: new Date().toISOString(),
          loadCount: 1,
          lastPrompt: prompt
        }
      : {
          ...existingState,
          loadCount: existingState.loadCount + 1,
          lastPrompt: prompt
        };
    saveSessionState(sessionId, newState);
    // Determine context type and generate content
    let contextType: 'session-start' | 'context';
    let content = '';
    // Dynamic import to avoid circular dependencies
    const { SessionClusteringService } = await import('../core/session-clustering-service.js');
    const clusteringService = new SessionClusteringService(projectPath);
    if (isFirstPrompt) {
      // First prompt: return session overview with clusters
      contextType = 'session-start';
      content = await clusteringService.getProgressiveIndex({
        type: 'session-start',
        sessionId
      });
    } else if (prompt && prompt.trim().length > 0) {
      // Subsequent prompts with content: return intent-matched sessions
      contextType = 'context';
      content = await clusteringService.getProgressiveIndex({
        type: 'context',
        sessionId,
        prompt
      });
    } else {
      // Subsequent prompts without content: return minimal context
      contextType = 'context';
      content = ''; // No context needed for empty prompts
    }
    if (stdin) {
      // For hooks: output content directly to stdout
      if (content) {
        process.stdout.write(content);
      }
      process.exit(0);
    }
    // Interactive mode: show detailed output
    console.log(chalk.green('Session Context'));
    console.log(chalk.gray('─'.repeat(40)));
    console.log(chalk.cyan('Session ID:'), sessionId);
    console.log(chalk.cyan('Type:'), contextType);
    console.log(chalk.cyan('First Prompt:'), isFirstPrompt ? 'Yes' : 'No');
    console.log(chalk.cyan('Load Count:'), newState.loadCount);
    console.log(chalk.gray('─'.repeat(40)));
    if (content) {
      console.log(content);
    } else {
      console.log(chalk.gray('(No context generated)'));
    }
  } catch (error) {
    if (stdin) {
      // Silent failure for hooks
      process.exit(0);
    }
    console.error(chalk.red(`Error: ${(error as Error).message}`));
    process.exit(1);
  }
 }
 /**
 * Notify dashboard action - send notification to running ccw view server
 */
 async function notifyAction(options: HookOptions): Promise<void> {
  const { stdin } = options;
  let hookData: HookData = {};
  if (stdin) {
    try {
      const stdinData = await readStdin();
      if (stdinData) {
        hookData = JSON.parse(stdinData) as HookData;
      }
    } catch {
      // Silently continue if stdin parsing fails
    }
  }
  try {
    const { notifyRefreshRequired } = await import('../tools/notifier.js');
    await notifyRefreshRequired();
    if (!stdin) {
      console.log(chalk.green('Notification sent to dashboard'));
    }
    process.exit(0);
  } catch (error) {
    if (stdin) {
      process.exit(0);
    }
    console.error(chalk.red(`Error: ${(error as Error).message}`));
    process.exit(1);
  }
 }
 /**
 * Show help for hook command
 */
 function showHelp(): void {
  console.log(`
 ${chalk.bold('ccw hook')} - CLI endpoint for Claude Code hooks
 ${chalk.bold('USAGE')}
  ccw hook <subcommand> [options]
 ${chalk.bold('SUBCOMMANDS')}
  session-context   Progressive session context loading (replaces curl/bash hook)
  notify            Send notification to ccw view dashboard
 ${chalk.bold('OPTIONS')}
  --stdin           Read input from stdin (for Claude Code hooks)
  --session-id      Session ID (alternative to stdin)
  --prompt          Current prompt text (alternative to stdin)
 ${chalk.bold('EXAMPLES')}
  ${chalk.gray('# Use in Claude Code hook (settings.json):')}
  ccw hook session-context --stdin
  ${chalk.gray('# Interactive usage:')}
  ccw hook session-context --session-id abc123
  ${chalk.gray('# Notify dashboard:')}
  ccw hook notify --stdin
 ${chalk.bold('HOOK CONFIGURATION')}
  ${chalk.gray('Add to .claude/settings.json:')}
  {
    "hooks": {
      "UserPromptSubmit": [{
        "hooks": [{
          "type": "command",
          "command": "ccw hook session-context --stdin"
        }]
      }]
    }
  }
 `);
 }
 /**
 * Main hook command handler
 */
 export async function hookCommand(
  subcommand: string,
  args: string | string[],
  options: HookOptions
 ): Promise<void> {
  switch (subcommand) {
    case 'session-context':
    case 'context':
      await sessionContextAction(options);
      break;
    case 'notify':
      await notifyAction(options);
      break;
    case 'help':
    case undefined:
      showHelp();
      break;
    default:
      console.error(chalk.red(`Unknown subcommand: ${subcommand}`));
      console.error(chalk.gray('Run "ccw hook help" for usage information'));
      process.exit(1);
  }
 }
--- a/ccw/src/core/session-clustering-service.ts
+++ b/ccw/src/core/session-clustering-service.ts
@@ -165,14 +165,23 @@ export class SessionClusteringService {
      keywords.add(match[1]);
    }
-    // 3. Technical terms (common frameworks/libraries)
+    // 3. Technical terms (common frameworks/libraries/concepts)
    const techTerms = [
      // Frameworks
      'react', 'vue', 'angular', 'typescript', 'javascript', 'node', 'express',
      // Auth
      'auth', 'authentication', 'jwt', 'oauth', 'session', 'token',
      // Data
      'api', 'rest', 'graphql', 'database', 'sql', 'mongodb', 'redis',
      // Testing
      'test', 'testing', 'jest', 'mocha', 'vitest',
      // Development
      'refactor', 'refactoring', 'optimization', 'performance',
-      'bug', 'fix', 'error', 'issue', 'debug'
+      'bug', 'fix', 'error', 'issue', 'debug',
      // CCW-specific terms
      'cluster', 'clustering', 'memory', 'hook', 'service', 'context',
      'workflow', 'skill', 'prompt', 'embedding', 'vector', 'semantic',
      'dashboard', 'view', 'route', 'command', 'cli', 'mcp'
    ];
    const lowerContent = content.toLowerCase();
@@ -182,6 +191,23 @@ export class SessionClusteringService {
      }
    }
    // 4. Generic word extraction (words >= 4 chars, not stopwords)
    const stopwords = new Set([
      'the', 'and', 'for', 'that', 'this', 'with', 'from', 'have', 'will',
      'are', 'was', 'were', 'been', 'being', 'what', 'when', 'where', 'which',
      'there', 'their', 'they', 'them', 'then', 'than', 'into', 'some', 'such',
      'only', 'also', 'just', 'more', 'most', 'other', 'after', 'before'
    ]);
    const wordRegex = /\b([a-z]{4,})\b/g;
    let wordMatch;
    while ((wordMatch = wordRegex.exec(lowerContent)) !== null) {
      const word = wordMatch[1];
      if (!stopwords.has(word)) {
        keywords.add(word);
      }
    }
    // Return top 20 keywords
    return Array.from(keywords).slice(0, 20);
  }
--- a/ccw/src/tools/codex-lens.ts
+++ b/ccw/src/tools/codex-lens.ts
@@ -450,6 +450,16 @@ function parseProgressLine(line: string): ProgressInfo | null {
    return { stage: 'finalizing', message: 'Finalizing vector index...', percent: 90 };
  }
  // Parse embeddings complete message
  const embedCompleteMatch = line.match(/Embeddings complete:\s*(\d+)\s*chunks/i);
  if (embedCompleteMatch) {
    return {
      stage: 'embeddings_complete',
      message: `Embeddings complete: ${embedCompleteMatch[1]} chunks`,
      percent: 95,
    };
  }
  return null;
 }
--- a/codex-lens/src/codexlens/cli/commands.py
+++ b/codex-lens/src/codexlens/cli/commands.py
@@ -151,10 +151,21 @@ def init(
                    if not json_mode:
                        console.print("\n[bold]Generating embeddings...[/bold]")
                        console.print(f"Model: [cyan]{embedding_model}[/cyan]")
                    else:
                        # Output progress message for JSON mode (parsed by Node.js)
                        print("Generating embeddings...", flush=True)
-                    # Progress callback for non-json mode
+                    # Progress callback - outputs progress for both json and non-json modes
                    # Node.js parseProgressLine() expects formats like:
                    # - "Batch X: N files, M chunks"
                    # - "Processing N files"
                    # - "Finalizing index"
                    def progress_update(msg: str):
-                        if not json_mode and verbose:
+                        if json_mode:
                            # Output without prefix so Node.js can parse it
                            # Strip leading spaces that embedding_manager adds
                            print(msg.strip(), flush=True)
                        elif verbose:
                            console.print(f"  {msg}")
                    embed_result = generate_embeddings_recursive(
@@ -162,12 +173,16 @@ def init(
                        model_profile=embedding_model,
                        force=False,  # Don't force regenerate during init
                        chunk_size=2000,
-                        progress_callback=progress_update if not json_mode else None,
+                        progress_callback=progress_update,  # Always use callback
                    )
                    if embed_result["success"]:
                        embed_data = embed_result["result"]
-                        
+
                        # Output completion message for Node.js to parse
                        if json_mode:
                            print(f"Embeddings complete: {embed_data['total_chunks_created']} chunks", flush=True)
                        # Get comprehensive coverage statistics
                        status_result = get_embeddings_status(index_root)
                        if status_result["success"]:
--- a/codex-lens/src/codexlens/cli/embedding_manager.py
+++ b/codex-lens/src/codexlens/cli/embedding_manager.py
@@ -235,7 +235,8 @@ def generate_embeddings(
                        return {"success": False, "error": "No files found in index"}
                    if progress_callback:
-                        progress_callback(f"Processing {total_files} files in batches of {FILE_BATCH_SIZE}...")
+                        # Format must match Node.js parseProgressLine: "Processing N files" with 'embed' keyword
                        progress_callback(f"Processing {total_files} files for embeddings in batches of {FILE_BATCH_SIZE}...")
                    cursor = conn.execute(f"SELECT {path_column}, content, language FROM files")
                    batch_number = 0
@@ -325,10 +326,24 @@ def generate_embeddings(
                    progress_callback(f"Finalizing index... Building ANN index for {total_chunks_created} chunks")
    except Exception as e:
        # Cleanup on error to prevent process hanging
        try:
            clear_embedder_cache()
            gc.collect()
        except Exception:
            pass
        return {"success": False, "error": f"Failed to read or process files: {str(e)}"}
    elapsed_time = time.time() - start_time
    # Final cleanup: release ONNX resources to allow process exit
    # This is critical - without it, ONNX Runtime threads prevent Python from exiting
    try:
        clear_embedder_cache()
        gc.collect()
    except Exception:
        pass
    return {
        "success": True,
        "result": {
@@ -418,7 +433,8 @@ def generate_embeddings_recursive(
                rel_path = index_path.relative_to(index_root)
            except ValueError:
                rel_path = index_path
-            progress_callback(f"[{idx}/{len(index_files)}] Processing {rel_path}")
+            # Format: "Processing file X/Y: path" to match Node.js parseProgressLine
            progress_callback(f"Processing file {idx}/{len(index_files)}: {rel_path}")
        result = generate_embeddings(
            index_path,
@@ -443,6 +459,15 @@ def generate_embeddings_recursive(
    successful = sum(1 for r in all_results if r["success"])
    # Final cleanup after processing all indexes
    # Each generate_embeddings() call does its own cleanup, but do a final one to be safe
    try:
        if SEMANTIC_AVAILABLE:
            clear_embedder_cache()
            gc.collect()
    except Exception:
        pass
    return {
        "success": successful > 0,
        "result": {