From 210f0f1012f4689086f5298e743dd49cfb3af506 Mon Sep 17 00:00:00 2001 From: catlog22 Date: Sun, 21 Dec 2025 23:28:19 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E9=92=A9=E5=AD=90?= =?UTF-8?q?=E5=91=BD=E4=BB=A4=EF=BC=8C=E7=AE=80=E5=8C=96=20Claude=20Code?= =?UTF-8?q?=20=E9=92=A9=E5=AD=90=E6=93=8D=E4=BD=9C=E6=8E=A5=E5=8F=A3?= =?UTF-8?q?=EF=BC=8C=E6=94=AF=E6=8C=81=E4=BC=9A=E8=AF=9D=E4=B8=8A=E4=B8=8B?= =?UTF-8?q?=E6=96=87=E5=8A=A0=E8=BD=BD=E5=92=8C=E9=80=9A=E7=9F=A5=E5=8A=9F?= =?UTF-8?q?=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ccw/scripts/memory_embedder.py | 25 +- ccw/src/cli.ts | 11 + ccw/src/commands/hook.ts | 315 ++++++++++++++++++ ccw/src/core/session-clustering-service.ts | 30 +- ccw/src/tools/codex-lens.ts | 10 + codex-lens/src/codexlens/cli/commands.py | 23 +- .../src/codexlens/cli/embedding_manager.py | 29 +- 7 files changed, 432 insertions(+), 11 deletions(-) create mode 100644 ccw/src/commands/hook.ts diff --git a/ccw/scripts/memory_embedder.py b/ccw/scripts/memory_embedder.py index 0c875d24..095db2a3 100644 --- a/ccw/scripts/memory_embedder.py +++ b/ccw/scripts/memory_embedder.py @@ -26,7 +26,7 @@ except ImportError: sys.exit(1) try: - from codexlens.semantic.embedder import get_embedder + from codexlens.semantic.embedder import get_embedder, clear_embedder_cache except ImportError: print("Error: CodexLens not found. Install with: pip install codexlens[semantic]", file=sys.stderr) sys.exit(1) @@ -46,8 +46,15 @@ class MemoryEmbedder: self.conn = sqlite3.connect(str(self.db_path)) self.conn.row_factory = sqlite3.Row - # Initialize embedder (uses cached singleton) - self.embedder = get_embedder(profile="code") + # Lazy-load embedder to avoid ~0.8s model loading for status command + self._embedder = None + + @property + def embedder(self): + """Lazy-load the embedder on first access.""" + if self._embedder is None: + self._embedder = get_embedder(profile="code") + return self._embedder def close(self): """Close database connection.""" @@ -348,9 +355,21 @@ def main(): # Exit with error code if operation failed if "success" in result and not result["success"]: + # Clean up ONNX resources before exit + clear_embedder_cache() sys.exit(1) + # Clean up ONNX resources to ensure process can exit cleanly + # This releases fastembed/ONNX Runtime threads that would otherwise + # prevent the Python interpreter from shutting down + clear_embedder_cache() + except Exception as e: + # Clean up ONNX resources even on error + try: + clear_embedder_cache() + except Exception: + pass print(json.dumps({ "success": False, "error": str(e) diff --git a/ccw/src/cli.ts b/ccw/src/cli.ts index 4e300b36..9aacf5c0 100644 --- a/ccw/src/cli.ts +++ b/ccw/src/cli.ts @@ -11,6 +11,7 @@ import { sessionCommand } from './commands/session.js'; import { cliCommand } from './commands/cli.js'; import { memoryCommand } from './commands/memory.js'; import { coreMemoryCommand } from './commands/core-memory.js'; +import { hookCommand } from './commands/hook.js'; import { readFileSync, existsSync } from 'fs'; import { fileURLToPath } from 'url'; import { dirname, join } from 'path'; @@ -229,5 +230,15 @@ export function run(argv: string[]): void { .option('--prefix ', 'Add prefix to imported memory IDs') .action((subcommand, args, options) => coreMemoryCommand(subcommand, args, options)); + // Hook command - CLI endpoint for Claude Code hooks + program + .command('hook [subcommand] [args...]') + .description('CLI endpoint for Claude Code hooks (session-context, notify)') + .option('--stdin', 'Read input from stdin (for Claude Code hooks)') + .option('--session-id ', 'Session ID') + .option('--prompt ', 'Prompt text') + .option('--type ', 'Context type: session-start, context') + .action((subcommand, args, options) => hookCommand(subcommand, args, options)); + program.parse(argv); } diff --git a/ccw/src/commands/hook.ts b/ccw/src/commands/hook.ts new file mode 100644 index 00000000..1b710cf6 --- /dev/null +++ b/ccw/src/commands/hook.ts @@ -0,0 +1,315 @@ +/** + * Hook Command - CLI endpoint for Claude Code hooks + * Provides simplified interface for hook operations, replacing complex bash/curl commands + */ + +import chalk from 'chalk'; +import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'fs'; +import { join, dirname } from 'path'; +import { tmpdir } from 'os'; + +interface HookOptions { + stdin?: boolean; + sessionId?: string; + prompt?: string; + type?: 'session-start' | 'context'; +} + +interface HookData { + session_id?: string; + prompt?: string; + cwd?: string; + tool_input?: Record; +} + +interface SessionState { + firstLoad: string; + loadCount: number; + lastPrompt?: string; +} + +/** + * Read JSON data from stdin (for Claude Code hooks) + */ +async function readStdin(): Promise { + return new Promise((resolve) => { + let data = ''; + process.stdin.setEncoding('utf8'); + process.stdin.on('readable', () => { + let chunk; + while ((chunk = process.stdin.read()) !== null) { + data += chunk; + } + }); + process.stdin.on('end', () => { + resolve(data); + }); + // Handle case where stdin is empty or not piped + if (process.stdin.isTTY) { + resolve(''); + } + }); +} + +/** + * Get session state file path + */ +function getSessionStateFile(sessionId: string): string { + const stateDir = join(tmpdir(), '.ccw-sessions'); + if (!existsSync(stateDir)) { + mkdirSync(stateDir, { recursive: true }); + } + return join(stateDir, `session-${sessionId}.json`); +} + +/** + * Load session state from file + */ +function loadSessionState(sessionId: string): SessionState | null { + const stateFile = getSessionStateFile(sessionId); + if (!existsSync(stateFile)) { + return null; + } + try { + const content = readFileSync(stateFile, 'utf-8'); + return JSON.parse(content) as SessionState; + } catch { + return null; + } +} + +/** + * Save session state to file + */ +function saveSessionState(sessionId: string, state: SessionState): void { + const stateFile = getSessionStateFile(sessionId); + writeFileSync(stateFile, JSON.stringify(state, null, 2)); +} + +/** + * Get project path from hook data or current working directory + */ +function getProjectPath(hookCwd?: string): string { + return hookCwd || process.cwd(); +} + +/** + * Session context action - provides progressive context loading + * First prompt: returns session overview with clusters + * Subsequent prompts: returns intent-matched sessions + */ +async function sessionContextAction(options: HookOptions): Promise { + let { stdin, sessionId, prompt } = options; + let hookCwd: string | undefined; + + // If --stdin flag is set, read from stdin (Claude Code hook format) + if (stdin) { + try { + const stdinData = await readStdin(); + if (stdinData) { + const hookData = JSON.parse(stdinData) as HookData; + sessionId = hookData.session_id || sessionId; + hookCwd = hookData.cwd; + prompt = hookData.prompt || prompt; + } + } catch { + // Silently continue if stdin parsing fails + } + } + + if (!sessionId) { + if (!stdin) { + console.error(chalk.red('Error: --session-id is required')); + console.error(chalk.gray('Usage: ccw hook session-context --session-id ')); + console.error(chalk.gray(' ccw hook session-context --stdin')); + } + process.exit(stdin ? 0 : 1); + } + + try { + const projectPath = getProjectPath(hookCwd); + + // Load existing session state + const existingState = loadSessionState(sessionId); + const isFirstPrompt = !existingState; + + // Update session state + const newState: SessionState = isFirstPrompt + ? { + firstLoad: new Date().toISOString(), + loadCount: 1, + lastPrompt: prompt + } + : { + ...existingState, + loadCount: existingState.loadCount + 1, + lastPrompt: prompt + }; + + saveSessionState(sessionId, newState); + + // Determine context type and generate content + let contextType: 'session-start' | 'context'; + let content = ''; + + // Dynamic import to avoid circular dependencies + const { SessionClusteringService } = await import('../core/session-clustering-service.js'); + const clusteringService = new SessionClusteringService(projectPath); + + if (isFirstPrompt) { + // First prompt: return session overview with clusters + contextType = 'session-start'; + content = await clusteringService.getProgressiveIndex({ + type: 'session-start', + sessionId + }); + } else if (prompt && prompt.trim().length > 0) { + // Subsequent prompts with content: return intent-matched sessions + contextType = 'context'; + content = await clusteringService.getProgressiveIndex({ + type: 'context', + sessionId, + prompt + }); + } else { + // Subsequent prompts without content: return minimal context + contextType = 'context'; + content = ''; // No context needed for empty prompts + } + + if (stdin) { + // For hooks: output content directly to stdout + if (content) { + process.stdout.write(content); + } + process.exit(0); + } + + // Interactive mode: show detailed output + console.log(chalk.green('Session Context')); + console.log(chalk.gray('─'.repeat(40))); + console.log(chalk.cyan('Session ID:'), sessionId); + console.log(chalk.cyan('Type:'), contextType); + console.log(chalk.cyan('First Prompt:'), isFirstPrompt ? 'Yes' : 'No'); + console.log(chalk.cyan('Load Count:'), newState.loadCount); + console.log(chalk.gray('─'.repeat(40))); + if (content) { + console.log(content); + } else { + console.log(chalk.gray('(No context generated)')); + } + } catch (error) { + if (stdin) { + // Silent failure for hooks + process.exit(0); + } + console.error(chalk.red(`Error: ${(error as Error).message}`)); + process.exit(1); + } +} + +/** + * Notify dashboard action - send notification to running ccw view server + */ +async function notifyAction(options: HookOptions): Promise { + const { stdin } = options; + let hookData: HookData = {}; + + if (stdin) { + try { + const stdinData = await readStdin(); + if (stdinData) { + hookData = JSON.parse(stdinData) as HookData; + } + } catch { + // Silently continue if stdin parsing fails + } + } + + try { + const { notifyRefreshRequired } = await import('../tools/notifier.js'); + await notifyRefreshRequired(); + + if (!stdin) { + console.log(chalk.green('Notification sent to dashboard')); + } + process.exit(0); + } catch (error) { + if (stdin) { + process.exit(0); + } + console.error(chalk.red(`Error: ${(error as Error).message}`)); + process.exit(1); + } +} + +/** + * Show help for hook command + */ +function showHelp(): void { + console.log(` +${chalk.bold('ccw hook')} - CLI endpoint for Claude Code hooks + +${chalk.bold('USAGE')} + ccw hook [options] + +${chalk.bold('SUBCOMMANDS')} + session-context Progressive session context loading (replaces curl/bash hook) + notify Send notification to ccw view dashboard + +${chalk.bold('OPTIONS')} + --stdin Read input from stdin (for Claude Code hooks) + --session-id Session ID (alternative to stdin) + --prompt Current prompt text (alternative to stdin) + +${chalk.bold('EXAMPLES')} + ${chalk.gray('# Use in Claude Code hook (settings.json):')} + ccw hook session-context --stdin + + ${chalk.gray('# Interactive usage:')} + ccw hook session-context --session-id abc123 + + ${chalk.gray('# Notify dashboard:')} + ccw hook notify --stdin + +${chalk.bold('HOOK CONFIGURATION')} + ${chalk.gray('Add to .claude/settings.json:')} + { + "hooks": { + "UserPromptSubmit": [{ + "hooks": [{ + "type": "command", + "command": "ccw hook session-context --stdin" + }] + }] + } + } +`); +} + +/** + * Main hook command handler + */ +export async function hookCommand( + subcommand: string, + args: string | string[], + options: HookOptions +): Promise { + switch (subcommand) { + case 'session-context': + case 'context': + await sessionContextAction(options); + break; + case 'notify': + await notifyAction(options); + break; + case 'help': + case undefined: + showHelp(); + break; + default: + console.error(chalk.red(`Unknown subcommand: ${subcommand}`)); + console.error(chalk.gray('Run "ccw hook help" for usage information')); + process.exit(1); + } +} diff --git a/ccw/src/core/session-clustering-service.ts b/ccw/src/core/session-clustering-service.ts index 5fef9074..16ff6a75 100644 --- a/ccw/src/core/session-clustering-service.ts +++ b/ccw/src/core/session-clustering-service.ts @@ -165,14 +165,23 @@ export class SessionClusteringService { keywords.add(match[1]); } - // 3. Technical terms (common frameworks/libraries) + // 3. Technical terms (common frameworks/libraries/concepts) const techTerms = [ + // Frameworks 'react', 'vue', 'angular', 'typescript', 'javascript', 'node', 'express', + // Auth 'auth', 'authentication', 'jwt', 'oauth', 'session', 'token', + // Data 'api', 'rest', 'graphql', 'database', 'sql', 'mongodb', 'redis', + // Testing 'test', 'testing', 'jest', 'mocha', 'vitest', + // Development 'refactor', 'refactoring', 'optimization', 'performance', - 'bug', 'fix', 'error', 'issue', 'debug' + 'bug', 'fix', 'error', 'issue', 'debug', + // CCW-specific terms + 'cluster', 'clustering', 'memory', 'hook', 'service', 'context', + 'workflow', 'skill', 'prompt', 'embedding', 'vector', 'semantic', + 'dashboard', 'view', 'route', 'command', 'cli', 'mcp' ]; const lowerContent = content.toLowerCase(); @@ -182,6 +191,23 @@ export class SessionClusteringService { } } + // 4. Generic word extraction (words >= 4 chars, not stopwords) + const stopwords = new Set([ + 'the', 'and', 'for', 'that', 'this', 'with', 'from', 'have', 'will', + 'are', 'was', 'were', 'been', 'being', 'what', 'when', 'where', 'which', + 'there', 'their', 'they', 'them', 'then', 'than', 'into', 'some', 'such', + 'only', 'also', 'just', 'more', 'most', 'other', 'after', 'before' + ]); + + const wordRegex = /\b([a-z]{4,})\b/g; + let wordMatch; + while ((wordMatch = wordRegex.exec(lowerContent)) !== null) { + const word = wordMatch[1]; + if (!stopwords.has(word)) { + keywords.add(word); + } + } + // Return top 20 keywords return Array.from(keywords).slice(0, 20); } diff --git a/ccw/src/tools/codex-lens.ts b/ccw/src/tools/codex-lens.ts index 4890c9f3..1c7ba261 100644 --- a/ccw/src/tools/codex-lens.ts +++ b/ccw/src/tools/codex-lens.ts @@ -450,6 +450,16 @@ function parseProgressLine(line: string): ProgressInfo | null { return { stage: 'finalizing', message: 'Finalizing vector index...', percent: 90 }; } + // Parse embeddings complete message + const embedCompleteMatch = line.match(/Embeddings complete:\s*(\d+)\s*chunks/i); + if (embedCompleteMatch) { + return { + stage: 'embeddings_complete', + message: `Embeddings complete: ${embedCompleteMatch[1]} chunks`, + percent: 95, + }; + } + return null; } diff --git a/codex-lens/src/codexlens/cli/commands.py b/codex-lens/src/codexlens/cli/commands.py index 5b96c0e1..5aa6860f 100644 --- a/codex-lens/src/codexlens/cli/commands.py +++ b/codex-lens/src/codexlens/cli/commands.py @@ -151,10 +151,21 @@ def init( if not json_mode: console.print("\n[bold]Generating embeddings...[/bold]") console.print(f"Model: [cyan]{embedding_model}[/cyan]") + else: + # Output progress message for JSON mode (parsed by Node.js) + print("Generating embeddings...", flush=True) - # Progress callback for non-json mode + # Progress callback - outputs progress for both json and non-json modes + # Node.js parseProgressLine() expects formats like: + # - "Batch X: N files, M chunks" + # - "Processing N files" + # - "Finalizing index" def progress_update(msg: str): - if not json_mode and verbose: + if json_mode: + # Output without prefix so Node.js can parse it + # Strip leading spaces that embedding_manager adds + print(msg.strip(), flush=True) + elif verbose: console.print(f" {msg}") embed_result = generate_embeddings_recursive( @@ -162,12 +173,16 @@ def init( model_profile=embedding_model, force=False, # Don't force regenerate during init chunk_size=2000, - progress_callback=progress_update if not json_mode else None, + progress_callback=progress_update, # Always use callback ) if embed_result["success"]: embed_data = embed_result["result"] - + + # Output completion message for Node.js to parse + if json_mode: + print(f"Embeddings complete: {embed_data['total_chunks_created']} chunks", flush=True) + # Get comprehensive coverage statistics status_result = get_embeddings_status(index_root) if status_result["success"]: diff --git a/codex-lens/src/codexlens/cli/embedding_manager.py b/codex-lens/src/codexlens/cli/embedding_manager.py index 72eb29c4..ba4ec45a 100644 --- a/codex-lens/src/codexlens/cli/embedding_manager.py +++ b/codex-lens/src/codexlens/cli/embedding_manager.py @@ -235,7 +235,8 @@ def generate_embeddings( return {"success": False, "error": "No files found in index"} if progress_callback: - progress_callback(f"Processing {total_files} files in batches of {FILE_BATCH_SIZE}...") + # Format must match Node.js parseProgressLine: "Processing N files" with 'embed' keyword + progress_callback(f"Processing {total_files} files for embeddings in batches of {FILE_BATCH_SIZE}...") cursor = conn.execute(f"SELECT {path_column}, content, language FROM files") batch_number = 0 @@ -325,10 +326,24 @@ def generate_embeddings( progress_callback(f"Finalizing index... Building ANN index for {total_chunks_created} chunks") except Exception as e: + # Cleanup on error to prevent process hanging + try: + clear_embedder_cache() + gc.collect() + except Exception: + pass return {"success": False, "error": f"Failed to read or process files: {str(e)}"} elapsed_time = time.time() - start_time + # Final cleanup: release ONNX resources to allow process exit + # This is critical - without it, ONNX Runtime threads prevent Python from exiting + try: + clear_embedder_cache() + gc.collect() + except Exception: + pass + return { "success": True, "result": { @@ -418,7 +433,8 @@ def generate_embeddings_recursive( rel_path = index_path.relative_to(index_root) except ValueError: rel_path = index_path - progress_callback(f"[{idx}/{len(index_files)}] Processing {rel_path}") + # Format: "Processing file X/Y: path" to match Node.js parseProgressLine + progress_callback(f"Processing file {idx}/{len(index_files)}: {rel_path}") result = generate_embeddings( index_path, @@ -443,6 +459,15 @@ def generate_embeddings_recursive( successful = sum(1 for r in all_results if r["success"]) + # Final cleanup after processing all indexes + # Each generate_embeddings() call does its own cleanup, but do a final one to be safe + try: + if SEMANTIC_AVAILABLE: + clear_embedder_cache() + gc.collect() + except Exception: + pass + return { "success": successful > 0, "result": {