From 210f0f1012f4689086f5298e743dd49cfb3af506 Mon Sep 17 00:00:00 2001
From: catlog22 <catlog22@github.com>
Date: Sun, 21 Dec 2025 23:28:19 +0800
Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E9=92=A9=E5=AD=90?=
 =?UTF-8?q?=E5=91=BD=E4=BB=A4=EF=BC=8C=E7=AE=80=E5=8C=96=20Claude=20Code?=
 =?UTF-8?q?=20=E9=92=A9=E5=AD=90=E6=93=8D=E4=BD=9C=E6=8E=A5=E5=8F=A3?=
 =?UTF-8?q?=EF=BC=8C=E6=94=AF=E6=8C=81=E4=BC=9A=E8=AF=9D=E4=B8=8A=E4=B8=8B?=
 =?UTF-8?q?=E6=96=87=E5=8A=A0=E8=BD=BD=E5=92=8C=E9=80=9A=E7=9F=A5=E5=8A=9F?=
 =?UTF-8?q?=E8=83=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ccw/scripts/memory_embedder.py                |  25 +-
 ccw/src/cli.ts                                |  11 +
 ccw/src/commands/hook.ts                      | 315 ++++++++++++++++++
 ccw/src/core/session-clustering-service.ts    |  30 +-
 ccw/src/tools/codex-lens.ts                   |  10 +
 codex-lens/src/codexlens/cli/commands.py      |  23 +-
 .../src/codexlens/cli/embedding_manager.py    |  29 +-
 7 files changed, 432 insertions(+), 11 deletions(-)
 create mode 100644 ccw/src/commands/hook.ts
diff --git a/ccw/scripts/memory_embedder.py b/ccw/scripts/memory_embedder.py
index 0c875d24..095db2a3 100644
--- a/ccw/scripts/memory_embedder.py
+++ b/ccw/scripts/memory_embedder.py
@@ -26,7 +26,7 @@ except ImportError:
     sys.exit(1)
 
 try:
-    from codexlens.semantic.embedder import get_embedder
+    from codexlens.semantic.embedder import get_embedder, clear_embedder_cache
 except ImportError:
     print("Error: CodexLens not found. Install with: pip install codexlens[semantic]", file=sys.stderr)
     sys.exit(1)
@@ -46,8 +46,15 @@ class MemoryEmbedder:
         self.conn = sqlite3.connect(str(self.db_path))
         self.conn.row_factory = sqlite3.Row
 
-        # Initialize embedder (uses cached singleton)
-        self.embedder = get_embedder(profile="code")
+        # Lazy-load embedder to avoid ~0.8s model loading for status command
+        self._embedder = None
+
+    @property
+    def embedder(self):
+        """Lazy-load the embedder on first access."""
+        if self._embedder is None:
+            self._embedder = get_embedder(profile="code")
+        return self._embedder
 
     def close(self):
         """Close database connection."""
@@ -348,9 +355,21 @@ def main():
 
         # Exit with error code if operation failed
         if "success" in result and not result["success"]:
+            # Clean up ONNX resources before exit
+            clear_embedder_cache()
             sys.exit(1)
 
+        # Clean up ONNX resources to ensure process can exit cleanly
+        # This releases fastembed/ONNX Runtime threads that would otherwise
+        # prevent the Python interpreter from shutting down
+        clear_embedder_cache()
+
     except Exception as e:
+        # Clean up ONNX resources even on error
+        try:
+            clear_embedder_cache()
+        except Exception:
+            pass
         print(json.dumps({
             "success": False,
             "error": str(e)
diff --git a/ccw/src/cli.ts b/ccw/src/cli.ts
index 4e300b36..9aacf5c0 100644
--- a/ccw/src/cli.ts
+++ b/ccw/src/cli.ts
@@ -11,6 +11,7 @@ import { sessionCommand } from './commands/session.js';
 import { cliCommand } from './commands/cli.js';
 import { memoryCommand } from './commands/memory.js';
 import { coreMemoryCommand } from './commands/core-memory.js';
+import { hookCommand } from './commands/hook.js';
 import { readFileSync, existsSync } from 'fs';
 import { fileURLToPath } from 'url';
 import { dirname, join } from 'path';
@@ -229,5 +230,15 @@ export function run(argv: string[]): void {
     .option('--prefix <prefix>', 'Add prefix to imported memory IDs')
     .action((subcommand, args, options) => coreMemoryCommand(subcommand, args, options));
 
+  // Hook command - CLI endpoint for Claude Code hooks
+  program
+    .command('hook [subcommand] [args...]')
+    .description('CLI endpoint for Claude Code hooks (session-context, notify)')
+    .option('--stdin', 'Read input from stdin (for Claude Code hooks)')
+    .option('--session-id <id>', 'Session ID')
+    .option('--prompt <text>', 'Prompt text')
+    .option('--type <type>', 'Context type: session-start, context')
+    .action((subcommand, args, options) => hookCommand(subcommand, args, options));
+
   program.parse(argv);
 }
diff --git a/ccw/src/commands/hook.ts b/ccw/src/commands/hook.ts
new file mode 100644
index 00000000..1b710cf6
--- /dev/null
+++ b/ccw/src/commands/hook.ts
@@ -0,0 +1,315 @@
+/**
+ * Hook Command - CLI endpoint for Claude Code hooks
+ * Provides simplified interface for hook operations, replacing complex bash/curl commands
+ */
+
+import chalk from 'chalk';
+import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'fs';
+import { join, dirname } from 'path';
+import { tmpdir } from 'os';
+
+interface HookOptions {
+  stdin?: boolean;
+  sessionId?: string;
+  prompt?: string;
+  type?: 'session-start' | 'context';
+}
+
+interface HookData {
+  session_id?: string;
+  prompt?: string;
+  cwd?: string;
+  tool_input?: Record<string, unknown>;
+}
+
+interface SessionState {
+  firstLoad: string;
+  loadCount: number;
+  lastPrompt?: string;
+}
+
+/**
+ * Read JSON data from stdin (for Claude Code hooks)
+ */
+async function readStdin(): Promise<string> {
+  return new Promise((resolve) => {
+    let data = '';
+    process.stdin.setEncoding('utf8');
+    process.stdin.on('readable', () => {
+      let chunk;
+      while ((chunk = process.stdin.read()) !== null) {
+        data += chunk;
+      }
+    });
+    process.stdin.on('end', () => {
+      resolve(data);
+    });
+    // Handle case where stdin is empty or not piped
+    if (process.stdin.isTTY) {
+      resolve('');
+    }
+  });
+}
+
+/**
+ * Get session state file path
+ */
+function getSessionStateFile(sessionId: string): string {
+  const stateDir = join(tmpdir(), '.ccw-sessions');
+  if (!existsSync(stateDir)) {
+    mkdirSync(stateDir, { recursive: true });
+  }
+  return join(stateDir, `session-${sessionId}.json`);
+}
+
+/**
+ * Load session state from file
+ */
+function loadSessionState(sessionId: string): SessionState | null {
+  const stateFile = getSessionStateFile(sessionId);
+  if (!existsSync(stateFile)) {
+    return null;
+  }
+  try {
+    const content = readFileSync(stateFile, 'utf-8');
+    return JSON.parse(content) as SessionState;
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Save session state to file
+ */
+function saveSessionState(sessionId: string, state: SessionState): void {
+  const stateFile = getSessionStateFile(sessionId);
+  writeFileSync(stateFile, JSON.stringify(state, null, 2));
+}
+
+/**
+ * Get project path from hook data or current working directory
+ */
+function getProjectPath(hookCwd?: string): string {
+  return hookCwd || process.cwd();
+}
+
+/**
+ * Session context action - provides progressive context loading
+ * First prompt: returns session overview with clusters
+ * Subsequent prompts: returns intent-matched sessions
+ */
+async function sessionContextAction(options: HookOptions): Promise<void> {
+  let { stdin, sessionId, prompt } = options;
+  let hookCwd: string | undefined;
+
+  // If --stdin flag is set, read from stdin (Claude Code hook format)
+  if (stdin) {
+    try {
+      const stdinData = await readStdin();
+      if (stdinData) {
+        const hookData = JSON.parse(stdinData) as HookData;
+        sessionId = hookData.session_id || sessionId;
+        hookCwd = hookData.cwd;
+        prompt = hookData.prompt || prompt;
+      }
+    } catch {
+      // Silently continue if stdin parsing fails
+    }
+  }
+
+  if (!sessionId) {
+    if (!stdin) {
+      console.error(chalk.red('Error: --session-id is required'));
+      console.error(chalk.gray('Usage: ccw hook session-context --session-id <id>'));
+      console.error(chalk.gray('       ccw hook session-context --stdin'));
+    }
+    process.exit(stdin ? 0 : 1);
+  }
+
+  try {
+    const projectPath = getProjectPath(hookCwd);
+
+    // Load existing session state
+    const existingState = loadSessionState(sessionId);
+    const isFirstPrompt = !existingState;
+
+    // Update session state
+    const newState: SessionState = isFirstPrompt
+      ? {
+          firstLoad: new Date().toISOString(),
+          loadCount: 1,
+          lastPrompt: prompt
+        }
+      : {
+          ...existingState,
+          loadCount: existingState.loadCount + 1,
+          lastPrompt: prompt
+        };
+
+    saveSessionState(sessionId, newState);
+
+    // Determine context type and generate content
+    let contextType: 'session-start' | 'context';
+    let content = '';
+
+    // Dynamic import to avoid circular dependencies
+    const { SessionClusteringService } = await import('../core/session-clustering-service.js');
+    const clusteringService = new SessionClusteringService(projectPath);
+
+    if (isFirstPrompt) {
+      // First prompt: return session overview with clusters
+      contextType = 'session-start';
+      content = await clusteringService.getProgressiveIndex({
+        type: 'session-start',
+        sessionId
+      });
+    } else if (prompt && prompt.trim().length > 0) {
+      // Subsequent prompts with content: return intent-matched sessions
+      contextType = 'context';
+      content = await clusteringService.getProgressiveIndex({
+        type: 'context',
+        sessionId,
+        prompt
+      });
+    } else {
+      // Subsequent prompts without content: return minimal context
+      contextType = 'context';
+      content = ''; // No context needed for empty prompts
+    }
+
+    if (stdin) {
+      // For hooks: output content directly to stdout
+      if (content) {
+        process.stdout.write(content);
+      }
+      process.exit(0);
+    }
+
+    // Interactive mode: show detailed output
+    console.log(chalk.green('Session Context'));
+    console.log(chalk.gray('─'.repeat(40)));
+    console.log(chalk.cyan('Session ID:'), sessionId);
+    console.log(chalk.cyan('Type:'), contextType);
+    console.log(chalk.cyan('First Prompt:'), isFirstPrompt ? 'Yes' : 'No');
+    console.log(chalk.cyan('Load Count:'), newState.loadCount);
+    console.log(chalk.gray('─'.repeat(40)));
+    if (content) {
+      console.log(content);
+    } else {
+      console.log(chalk.gray('(No context generated)'));
+    }
+  } catch (error) {
+    if (stdin) {
+      // Silent failure for hooks
+      process.exit(0);
+    }
+    console.error(chalk.red(`Error: ${(error as Error).message}`));
+    process.exit(1);
+  }
+}
+
+/**
+ * Notify dashboard action - send notification to running ccw view server
+ */
+async function notifyAction(options: HookOptions): Promise<void> {
+  const { stdin } = options;
+  let hookData: HookData = {};
+
+  if (stdin) {
+    try {
+      const stdinData = await readStdin();
+      if (stdinData) {
+        hookData = JSON.parse(stdinData) as HookData;
+      }
+    } catch {
+      // Silently continue if stdin parsing fails
+    }
+  }
+
+  try {
+    const { notifyRefreshRequired } = await import('../tools/notifier.js');
+    await notifyRefreshRequired();
+
+    if (!stdin) {
+      console.log(chalk.green('Notification sent to dashboard'));
+    }
+    process.exit(0);
+  } catch (error) {
+    if (stdin) {
+      process.exit(0);
+    }
+    console.error(chalk.red(`Error: ${(error as Error).message}`));
+    process.exit(1);
+  }
+}
+
+/**
+ * Show help for hook command
+ */
+function showHelp(): void {
+  console.log(`
+${chalk.bold('ccw hook')} - CLI endpoint for Claude Code hooks
+
+${chalk.bold('USAGE')}
+  ccw hook <subcommand> [options]
+
+${chalk.bold('SUBCOMMANDS')}
+  session-context   Progressive session context loading (replaces curl/bash hook)
+  notify            Send notification to ccw view dashboard
+
+${chalk.bold('OPTIONS')}
+  --stdin           Read input from stdin (for Claude Code hooks)
+  --session-id      Session ID (alternative to stdin)
+  --prompt          Current prompt text (alternative to stdin)
+
+${chalk.bold('EXAMPLES')}
+  ${chalk.gray('# Use in Claude Code hook (settings.json):')}
+  ccw hook session-context --stdin
+
+  ${chalk.gray('# Interactive usage:')}
+  ccw hook session-context --session-id abc123
+
+  ${chalk.gray('# Notify dashboard:')}
+  ccw hook notify --stdin
+
+${chalk.bold('HOOK CONFIGURATION')}
+  ${chalk.gray('Add to .claude/settings.json:')}
+  {
+    "hooks": {
+      "UserPromptSubmit": [{
+        "hooks": [{
+          "type": "command",
+          "command": "ccw hook session-context --stdin"
+        }]
+      }]
+    }
+  }
+`);
+}
+
+/**
+ * Main hook command handler
+ */
+export async function hookCommand(
+  subcommand: string,
+  args: string | string[],
+  options: HookOptions
+): Promise<void> {
+  switch (subcommand) {
+    case 'session-context':
+    case 'context':
+      await sessionContextAction(options);
+      break;
+    case 'notify':
+      await notifyAction(options);
+      break;
+    case 'help':
+    case undefined:
+      showHelp();
+      break;
+    default:
+      console.error(chalk.red(`Unknown subcommand: ${subcommand}`));
+      console.error(chalk.gray('Run "ccw hook help" for usage information'));
+      process.exit(1);
+  }
+}
diff --git a/ccw/src/core/session-clustering-service.ts b/ccw/src/core/session-clustering-service.ts
index 5fef9074..16ff6a75 100644
--- a/ccw/src/core/session-clustering-service.ts
+++ b/ccw/src/core/session-clustering-service.ts
@@ -165,14 +165,23 @@ export class SessionClusteringService {
       keywords.add(match[1]);
     }
 
-    // 3. Technical terms (common frameworks/libraries)
+    // 3. Technical terms (common frameworks/libraries/concepts)
     const techTerms = [
+      // Frameworks
       'react', 'vue', 'angular', 'typescript', 'javascript', 'node', 'express',
+      // Auth
       'auth', 'authentication', 'jwt', 'oauth', 'session', 'token',
+      // Data
       'api', 'rest', 'graphql', 'database', 'sql', 'mongodb', 'redis',
+      // Testing
       'test', 'testing', 'jest', 'mocha', 'vitest',
+      // Development
       'refactor', 'refactoring', 'optimization', 'performance',
-      'bug', 'fix', 'error', 'issue', 'debug'
+      'bug', 'fix', 'error', 'issue', 'debug',
+      // CCW-specific terms
+      'cluster', 'clustering', 'memory', 'hook', 'service', 'context',
+      'workflow', 'skill', 'prompt', 'embedding', 'vector', 'semantic',
+      'dashboard', 'view', 'route', 'command', 'cli', 'mcp'
     ];
 
     const lowerContent = content.toLowerCase();
@@ -182,6 +191,23 @@ export class SessionClusteringService {
       }
     }
 
+    // 4. Generic word extraction (words >= 4 chars, not stopwords)
+    const stopwords = new Set([
+      'the', 'and', 'for', 'that', 'this', 'with', 'from', 'have', 'will',
+      'are', 'was', 'were', 'been', 'being', 'what', 'when', 'where', 'which',
+      'there', 'their', 'they', 'them', 'then', 'than', 'into', 'some', 'such',
+      'only', 'also', 'just', 'more', 'most', 'other', 'after', 'before'
+    ]);
+
+    const wordRegex = /\b([a-z]{4,})\b/g;
+    let wordMatch;
+    while ((wordMatch = wordRegex.exec(lowerContent)) !== null) {
+      const word = wordMatch[1];
+      if (!stopwords.has(word)) {
+        keywords.add(word);
+      }
+    }
+
     // Return top 20 keywords
     return Array.from(keywords).slice(0, 20);
   }
diff --git a/ccw/src/tools/codex-lens.ts b/ccw/src/tools/codex-lens.ts
index 4890c9f3..1c7ba261 100644
--- a/ccw/src/tools/codex-lens.ts
+++ b/ccw/src/tools/codex-lens.ts
@@ -450,6 +450,16 @@ function parseProgressLine(line: string): ProgressInfo | null {
     return { stage: 'finalizing', message: 'Finalizing vector index...', percent: 90 };
   }
 
+  // Parse embeddings complete message
+  const embedCompleteMatch = line.match(/Embeddings complete:\s*(\d+)\s*chunks/i);
+  if (embedCompleteMatch) {
+    return {
+      stage: 'embeddings_complete',
+      message: `Embeddings complete: ${embedCompleteMatch[1]} chunks`,
+      percent: 95,
+    };
+  }
+
   return null;
 }
 
diff --git a/codex-lens/src/codexlens/cli/commands.py b/codex-lens/src/codexlens/cli/commands.py
index 5b96c0e1..5aa6860f 100644
--- a/codex-lens/src/codexlens/cli/commands.py
+++ b/codex-lens/src/codexlens/cli/commands.py
@@ -151,10 +151,21 @@ def init(
                     if not json_mode:
                         console.print("\n[bold]Generating embeddings...[/bold]")
                         console.print(f"Model: [cyan]{embedding_model}[/cyan]")
+                    else:
+                        # Output progress message for JSON mode (parsed by Node.js)
+                        print("Generating embeddings...", flush=True)
 
-                    # Progress callback for non-json mode
+                    # Progress callback - outputs progress for both json and non-json modes
+                    # Node.js parseProgressLine() expects formats like:
+                    # - "Batch X: N files, M chunks"
+                    # - "Processing N files"
+                    # - "Finalizing index"
                     def progress_update(msg: str):
-                        if not json_mode and verbose:
+                        if json_mode:
+                            # Output without prefix so Node.js can parse it
+                            # Strip leading spaces that embedding_manager adds
+                            print(msg.strip(), flush=True)
+                        elif verbose:
                             console.print(f"  {msg}")
 
                     embed_result = generate_embeddings_recursive(
@@ -162,12 +173,16 @@ def init(
                         model_profile=embedding_model,
                         force=False,  # Don't force regenerate during init
                         chunk_size=2000,
-                        progress_callback=progress_update if not json_mode else None,
+                        progress_callback=progress_update,  # Always use callback
                     )
 
                     if embed_result["success"]:
                         embed_data = embed_result["result"]
-                        
+
+                        # Output completion message for Node.js to parse
+                        if json_mode:
+                            print(f"Embeddings complete: {embed_data['total_chunks_created']} chunks", flush=True)
+
                         # Get comprehensive coverage statistics
                         status_result = get_embeddings_status(index_root)
                         if status_result["success"]:
diff --git a/codex-lens/src/codexlens/cli/embedding_manager.py b/codex-lens/src/codexlens/cli/embedding_manager.py
index 72eb29c4..ba4ec45a 100644
--- a/codex-lens/src/codexlens/cli/embedding_manager.py
+++ b/codex-lens/src/codexlens/cli/embedding_manager.py
@@ -235,7 +235,8 @@ def generate_embeddings(
                         return {"success": False, "error": "No files found in index"}
 
                     if progress_callback:
-                        progress_callback(f"Processing {total_files} files in batches of {FILE_BATCH_SIZE}...")
+                        # Format must match Node.js parseProgressLine: "Processing N files" with 'embed' keyword
+                        progress_callback(f"Processing {total_files} files for embeddings in batches of {FILE_BATCH_SIZE}...")
 
                     cursor = conn.execute(f"SELECT {path_column}, content, language FROM files")
                     batch_number = 0
@@ -325,10 +326,24 @@ def generate_embeddings(
                     progress_callback(f"Finalizing index... Building ANN index for {total_chunks_created} chunks")
 
     except Exception as e:
+        # Cleanup on error to prevent process hanging
+        try:
+            clear_embedder_cache()
+            gc.collect()
+        except Exception:
+            pass
         return {"success": False, "error": f"Failed to read or process files: {str(e)}"}
 
     elapsed_time = time.time() - start_time
 
+    # Final cleanup: release ONNX resources to allow process exit
+    # This is critical - without it, ONNX Runtime threads prevent Python from exiting
+    try:
+        clear_embedder_cache()
+        gc.collect()
+    except Exception:
+        pass
+
     return {
         "success": True,
         "result": {
@@ -418,7 +433,8 @@ def generate_embeddings_recursive(
                 rel_path = index_path.relative_to(index_root)
             except ValueError:
                 rel_path = index_path
-            progress_callback(f"[{idx}/{len(index_files)}] Processing {rel_path}")
+            # Format: "Processing file X/Y: path" to match Node.js parseProgressLine
+            progress_callback(f"Processing file {idx}/{len(index_files)}: {rel_path}")
 
         result = generate_embeddings(
             index_path,
@@ -443,6 +459,15 @@ def generate_embeddings_recursive(
 
     successful = sum(1 for r in all_results if r["success"])
 
+    # Final cleanup after processing all indexes
+    # Each generate_embeddings() call does its own cleanup, but do a final one to be safe
+    try:
+        if SEMANTIC_AVAILABLE:
+            clear_embedder_cache()
+            gc.collect()
+    except Exception:
+        pass
+
     return {
         "success": successful > 0,
         "result": {