mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-13 02:41:50 +08:00
feat: 添加钩子命令,简化 Claude Code 钩子操作接口,支持会话上下文加载和通知功能
This commit is contained in:
@@ -26,7 +26,7 @@ except ImportError:
|
|||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from codexlens.semantic.embedder import get_embedder
|
from codexlens.semantic.embedder import get_embedder, clear_embedder_cache
|
||||||
except ImportError:
|
except ImportError:
|
||||||
print("Error: CodexLens not found. Install with: pip install codexlens[semantic]", file=sys.stderr)
|
print("Error: CodexLens not found. Install with: pip install codexlens[semantic]", file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
@@ -46,8 +46,15 @@ class MemoryEmbedder:
|
|||||||
self.conn = sqlite3.connect(str(self.db_path))
|
self.conn = sqlite3.connect(str(self.db_path))
|
||||||
self.conn.row_factory = sqlite3.Row
|
self.conn.row_factory = sqlite3.Row
|
||||||
|
|
||||||
# Initialize embedder (uses cached singleton)
|
# Lazy-load embedder to avoid ~0.8s model loading for status command
|
||||||
self.embedder = get_embedder(profile="code")
|
self._embedder = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def embedder(self):
|
||||||
|
"""Lazy-load the embedder on first access."""
|
||||||
|
if self._embedder is None:
|
||||||
|
self._embedder = get_embedder(profile="code")
|
||||||
|
return self._embedder
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
"""Close database connection."""
|
"""Close database connection."""
|
||||||
@@ -348,9 +355,21 @@ def main():
|
|||||||
|
|
||||||
# Exit with error code if operation failed
|
# Exit with error code if operation failed
|
||||||
if "success" in result and not result["success"]:
|
if "success" in result and not result["success"]:
|
||||||
|
# Clean up ONNX resources before exit
|
||||||
|
clear_embedder_cache()
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Clean up ONNX resources to ensure process can exit cleanly
|
||||||
|
# This releases fastembed/ONNX Runtime threads that would otherwise
|
||||||
|
# prevent the Python interpreter from shutting down
|
||||||
|
clear_embedder_cache()
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
# Clean up ONNX resources even on error
|
||||||
|
try:
|
||||||
|
clear_embedder_cache()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
print(json.dumps({
|
print(json.dumps({
|
||||||
"success": False,
|
"success": False,
|
||||||
"error": str(e)
|
"error": str(e)
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import { sessionCommand } from './commands/session.js';
|
|||||||
import { cliCommand } from './commands/cli.js';
|
import { cliCommand } from './commands/cli.js';
|
||||||
import { memoryCommand } from './commands/memory.js';
|
import { memoryCommand } from './commands/memory.js';
|
||||||
import { coreMemoryCommand } from './commands/core-memory.js';
|
import { coreMemoryCommand } from './commands/core-memory.js';
|
||||||
|
import { hookCommand } from './commands/hook.js';
|
||||||
import { readFileSync, existsSync } from 'fs';
|
import { readFileSync, existsSync } from 'fs';
|
||||||
import { fileURLToPath } from 'url';
|
import { fileURLToPath } from 'url';
|
||||||
import { dirname, join } from 'path';
|
import { dirname, join } from 'path';
|
||||||
@@ -229,5 +230,15 @@ export function run(argv: string[]): void {
|
|||||||
.option('--prefix <prefix>', 'Add prefix to imported memory IDs')
|
.option('--prefix <prefix>', 'Add prefix to imported memory IDs')
|
||||||
.action((subcommand, args, options) => coreMemoryCommand(subcommand, args, options));
|
.action((subcommand, args, options) => coreMemoryCommand(subcommand, args, options));
|
||||||
|
|
||||||
|
// Hook command - CLI endpoint for Claude Code hooks
|
||||||
|
program
|
||||||
|
.command('hook [subcommand] [args...]')
|
||||||
|
.description('CLI endpoint for Claude Code hooks (session-context, notify)')
|
||||||
|
.option('--stdin', 'Read input from stdin (for Claude Code hooks)')
|
||||||
|
.option('--session-id <id>', 'Session ID')
|
||||||
|
.option('--prompt <text>', 'Prompt text')
|
||||||
|
.option('--type <type>', 'Context type: session-start, context')
|
||||||
|
.action((subcommand, args, options) => hookCommand(subcommand, args, options));
|
||||||
|
|
||||||
program.parse(argv);
|
program.parse(argv);
|
||||||
}
|
}
|
||||||
|
|||||||
315
ccw/src/commands/hook.ts
Normal file
315
ccw/src/commands/hook.ts
Normal file
@@ -0,0 +1,315 @@
|
|||||||
|
/**
|
||||||
|
* Hook Command - CLI endpoint for Claude Code hooks
|
||||||
|
* Provides simplified interface for hook operations, replacing complex bash/curl commands
|
||||||
|
*/
|
||||||
|
|
||||||
|
import chalk from 'chalk';
|
||||||
|
import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'fs';
|
||||||
|
import { join, dirname } from 'path';
|
||||||
|
import { tmpdir } from 'os';
|
||||||
|
|
||||||
|
interface HookOptions {
|
||||||
|
stdin?: boolean;
|
||||||
|
sessionId?: string;
|
||||||
|
prompt?: string;
|
||||||
|
type?: 'session-start' | 'context';
|
||||||
|
}
|
||||||
|
|
||||||
|
interface HookData {
|
||||||
|
session_id?: string;
|
||||||
|
prompt?: string;
|
||||||
|
cwd?: string;
|
||||||
|
tool_input?: Record<string, unknown>;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface SessionState {
|
||||||
|
firstLoad: string;
|
||||||
|
loadCount: number;
|
||||||
|
lastPrompt?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read JSON data from stdin (for Claude Code hooks)
|
||||||
|
*/
|
||||||
|
async function readStdin(): Promise<string> {
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
let data = '';
|
||||||
|
process.stdin.setEncoding('utf8');
|
||||||
|
process.stdin.on('readable', () => {
|
||||||
|
let chunk;
|
||||||
|
while ((chunk = process.stdin.read()) !== null) {
|
||||||
|
data += chunk;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
process.stdin.on('end', () => {
|
||||||
|
resolve(data);
|
||||||
|
});
|
||||||
|
// Handle case where stdin is empty or not piped
|
||||||
|
if (process.stdin.isTTY) {
|
||||||
|
resolve('');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get session state file path
|
||||||
|
*/
|
||||||
|
function getSessionStateFile(sessionId: string): string {
|
||||||
|
const stateDir = join(tmpdir(), '.ccw-sessions');
|
||||||
|
if (!existsSync(stateDir)) {
|
||||||
|
mkdirSync(stateDir, { recursive: true });
|
||||||
|
}
|
||||||
|
return join(stateDir, `session-${sessionId}.json`);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load session state from file
|
||||||
|
*/
|
||||||
|
function loadSessionState(sessionId: string): SessionState | null {
|
||||||
|
const stateFile = getSessionStateFile(sessionId);
|
||||||
|
if (!existsSync(stateFile)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const content = readFileSync(stateFile, 'utf-8');
|
||||||
|
return JSON.parse(content) as SessionState;
|
||||||
|
} catch {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save session state to file
|
||||||
|
*/
|
||||||
|
function saveSessionState(sessionId: string, state: SessionState): void {
|
||||||
|
const stateFile = getSessionStateFile(sessionId);
|
||||||
|
writeFileSync(stateFile, JSON.stringify(state, null, 2));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get project path from hook data or current working directory
|
||||||
|
*/
|
||||||
|
function getProjectPath(hookCwd?: string): string {
|
||||||
|
return hookCwd || process.cwd();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Session context action - provides progressive context loading
|
||||||
|
* First prompt: returns session overview with clusters
|
||||||
|
* Subsequent prompts: returns intent-matched sessions
|
||||||
|
*/
|
||||||
|
async function sessionContextAction(options: HookOptions): Promise<void> {
|
||||||
|
let { stdin, sessionId, prompt } = options;
|
||||||
|
let hookCwd: string | undefined;
|
||||||
|
|
||||||
|
// If --stdin flag is set, read from stdin (Claude Code hook format)
|
||||||
|
if (stdin) {
|
||||||
|
try {
|
||||||
|
const stdinData = await readStdin();
|
||||||
|
if (stdinData) {
|
||||||
|
const hookData = JSON.parse(stdinData) as HookData;
|
||||||
|
sessionId = hookData.session_id || sessionId;
|
||||||
|
hookCwd = hookData.cwd;
|
||||||
|
prompt = hookData.prompt || prompt;
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Silently continue if stdin parsing fails
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!sessionId) {
|
||||||
|
if (!stdin) {
|
||||||
|
console.error(chalk.red('Error: --session-id is required'));
|
||||||
|
console.error(chalk.gray('Usage: ccw hook session-context --session-id <id>'));
|
||||||
|
console.error(chalk.gray(' ccw hook session-context --stdin'));
|
||||||
|
}
|
||||||
|
process.exit(stdin ? 0 : 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const projectPath = getProjectPath(hookCwd);
|
||||||
|
|
||||||
|
// Load existing session state
|
||||||
|
const existingState = loadSessionState(sessionId);
|
||||||
|
const isFirstPrompt = !existingState;
|
||||||
|
|
||||||
|
// Update session state
|
||||||
|
const newState: SessionState = isFirstPrompt
|
||||||
|
? {
|
||||||
|
firstLoad: new Date().toISOString(),
|
||||||
|
loadCount: 1,
|
||||||
|
lastPrompt: prompt
|
||||||
|
}
|
||||||
|
: {
|
||||||
|
...existingState,
|
||||||
|
loadCount: existingState.loadCount + 1,
|
||||||
|
lastPrompt: prompt
|
||||||
|
};
|
||||||
|
|
||||||
|
saveSessionState(sessionId, newState);
|
||||||
|
|
||||||
|
// Determine context type and generate content
|
||||||
|
let contextType: 'session-start' | 'context';
|
||||||
|
let content = '';
|
||||||
|
|
||||||
|
// Dynamic import to avoid circular dependencies
|
||||||
|
const { SessionClusteringService } = await import('../core/session-clustering-service.js');
|
||||||
|
const clusteringService = new SessionClusteringService(projectPath);
|
||||||
|
|
||||||
|
if (isFirstPrompt) {
|
||||||
|
// First prompt: return session overview with clusters
|
||||||
|
contextType = 'session-start';
|
||||||
|
content = await clusteringService.getProgressiveIndex({
|
||||||
|
type: 'session-start',
|
||||||
|
sessionId
|
||||||
|
});
|
||||||
|
} else if (prompt && prompt.trim().length > 0) {
|
||||||
|
// Subsequent prompts with content: return intent-matched sessions
|
||||||
|
contextType = 'context';
|
||||||
|
content = await clusteringService.getProgressiveIndex({
|
||||||
|
type: 'context',
|
||||||
|
sessionId,
|
||||||
|
prompt
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
// Subsequent prompts without content: return minimal context
|
||||||
|
contextType = 'context';
|
||||||
|
content = ''; // No context needed for empty prompts
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stdin) {
|
||||||
|
// For hooks: output content directly to stdout
|
||||||
|
if (content) {
|
||||||
|
process.stdout.write(content);
|
||||||
|
}
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Interactive mode: show detailed output
|
||||||
|
console.log(chalk.green('Session Context'));
|
||||||
|
console.log(chalk.gray('─'.repeat(40)));
|
||||||
|
console.log(chalk.cyan('Session ID:'), sessionId);
|
||||||
|
console.log(chalk.cyan('Type:'), contextType);
|
||||||
|
console.log(chalk.cyan('First Prompt:'), isFirstPrompt ? 'Yes' : 'No');
|
||||||
|
console.log(chalk.cyan('Load Count:'), newState.loadCount);
|
||||||
|
console.log(chalk.gray('─'.repeat(40)));
|
||||||
|
if (content) {
|
||||||
|
console.log(content);
|
||||||
|
} else {
|
||||||
|
console.log(chalk.gray('(No context generated)'));
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
if (stdin) {
|
||||||
|
// Silent failure for hooks
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
console.error(chalk.red(`Error: ${(error as Error).message}`));
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Notify dashboard action - send notification to running ccw view server
|
||||||
|
*/
|
||||||
|
async function notifyAction(options: HookOptions): Promise<void> {
|
||||||
|
const { stdin } = options;
|
||||||
|
let hookData: HookData = {};
|
||||||
|
|
||||||
|
if (stdin) {
|
||||||
|
try {
|
||||||
|
const stdinData = await readStdin();
|
||||||
|
if (stdinData) {
|
||||||
|
hookData = JSON.parse(stdinData) as HookData;
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Silently continue if stdin parsing fails
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const { notifyRefreshRequired } = await import('../tools/notifier.js');
|
||||||
|
await notifyRefreshRequired();
|
||||||
|
|
||||||
|
if (!stdin) {
|
||||||
|
console.log(chalk.green('Notification sent to dashboard'));
|
||||||
|
}
|
||||||
|
process.exit(0);
|
||||||
|
} catch (error) {
|
||||||
|
if (stdin) {
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
console.error(chalk.red(`Error: ${(error as Error).message}`));
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Show help for hook command
|
||||||
|
*/
|
||||||
|
function showHelp(): void {
|
||||||
|
console.log(`
|
||||||
|
${chalk.bold('ccw hook')} - CLI endpoint for Claude Code hooks
|
||||||
|
|
||||||
|
${chalk.bold('USAGE')}
|
||||||
|
ccw hook <subcommand> [options]
|
||||||
|
|
||||||
|
${chalk.bold('SUBCOMMANDS')}
|
||||||
|
session-context Progressive session context loading (replaces curl/bash hook)
|
||||||
|
notify Send notification to ccw view dashboard
|
||||||
|
|
||||||
|
${chalk.bold('OPTIONS')}
|
||||||
|
--stdin Read input from stdin (for Claude Code hooks)
|
||||||
|
--session-id Session ID (alternative to stdin)
|
||||||
|
--prompt Current prompt text (alternative to stdin)
|
||||||
|
|
||||||
|
${chalk.bold('EXAMPLES')}
|
||||||
|
${chalk.gray('# Use in Claude Code hook (settings.json):')}
|
||||||
|
ccw hook session-context --stdin
|
||||||
|
|
||||||
|
${chalk.gray('# Interactive usage:')}
|
||||||
|
ccw hook session-context --session-id abc123
|
||||||
|
|
||||||
|
${chalk.gray('# Notify dashboard:')}
|
||||||
|
ccw hook notify --stdin
|
||||||
|
|
||||||
|
${chalk.bold('HOOK CONFIGURATION')}
|
||||||
|
${chalk.gray('Add to .claude/settings.json:')}
|
||||||
|
{
|
||||||
|
"hooks": {
|
||||||
|
"UserPromptSubmit": [{
|
||||||
|
"hooks": [{
|
||||||
|
"type": "command",
|
||||||
|
"command": "ccw hook session-context --stdin"
|
||||||
|
}]
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
`);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main hook command handler
|
||||||
|
*/
|
||||||
|
export async function hookCommand(
|
||||||
|
subcommand: string,
|
||||||
|
args: string | string[],
|
||||||
|
options: HookOptions
|
||||||
|
): Promise<void> {
|
||||||
|
switch (subcommand) {
|
||||||
|
case 'session-context':
|
||||||
|
case 'context':
|
||||||
|
await sessionContextAction(options);
|
||||||
|
break;
|
||||||
|
case 'notify':
|
||||||
|
await notifyAction(options);
|
||||||
|
break;
|
||||||
|
case 'help':
|
||||||
|
case undefined:
|
||||||
|
showHelp();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
console.error(chalk.red(`Unknown subcommand: ${subcommand}`));
|
||||||
|
console.error(chalk.gray('Run "ccw hook help" for usage information'));
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -165,14 +165,23 @@ export class SessionClusteringService {
|
|||||||
keywords.add(match[1]);
|
keywords.add(match[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. Technical terms (common frameworks/libraries)
|
// 3. Technical terms (common frameworks/libraries/concepts)
|
||||||
const techTerms = [
|
const techTerms = [
|
||||||
|
// Frameworks
|
||||||
'react', 'vue', 'angular', 'typescript', 'javascript', 'node', 'express',
|
'react', 'vue', 'angular', 'typescript', 'javascript', 'node', 'express',
|
||||||
|
// Auth
|
||||||
'auth', 'authentication', 'jwt', 'oauth', 'session', 'token',
|
'auth', 'authentication', 'jwt', 'oauth', 'session', 'token',
|
||||||
|
// Data
|
||||||
'api', 'rest', 'graphql', 'database', 'sql', 'mongodb', 'redis',
|
'api', 'rest', 'graphql', 'database', 'sql', 'mongodb', 'redis',
|
||||||
|
// Testing
|
||||||
'test', 'testing', 'jest', 'mocha', 'vitest',
|
'test', 'testing', 'jest', 'mocha', 'vitest',
|
||||||
|
// Development
|
||||||
'refactor', 'refactoring', 'optimization', 'performance',
|
'refactor', 'refactoring', 'optimization', 'performance',
|
||||||
'bug', 'fix', 'error', 'issue', 'debug'
|
'bug', 'fix', 'error', 'issue', 'debug',
|
||||||
|
// CCW-specific terms
|
||||||
|
'cluster', 'clustering', 'memory', 'hook', 'service', 'context',
|
||||||
|
'workflow', 'skill', 'prompt', 'embedding', 'vector', 'semantic',
|
||||||
|
'dashboard', 'view', 'route', 'command', 'cli', 'mcp'
|
||||||
];
|
];
|
||||||
|
|
||||||
const lowerContent = content.toLowerCase();
|
const lowerContent = content.toLowerCase();
|
||||||
@@ -182,6 +191,23 @@ export class SessionClusteringService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 4. Generic word extraction (words >= 4 chars, not stopwords)
|
||||||
|
const stopwords = new Set([
|
||||||
|
'the', 'and', 'for', 'that', 'this', 'with', 'from', 'have', 'will',
|
||||||
|
'are', 'was', 'were', 'been', 'being', 'what', 'when', 'where', 'which',
|
||||||
|
'there', 'their', 'they', 'them', 'then', 'than', 'into', 'some', 'such',
|
||||||
|
'only', 'also', 'just', 'more', 'most', 'other', 'after', 'before'
|
||||||
|
]);
|
||||||
|
|
||||||
|
const wordRegex = /\b([a-z]{4,})\b/g;
|
||||||
|
let wordMatch;
|
||||||
|
while ((wordMatch = wordRegex.exec(lowerContent)) !== null) {
|
||||||
|
const word = wordMatch[1];
|
||||||
|
if (!stopwords.has(word)) {
|
||||||
|
keywords.add(word);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Return top 20 keywords
|
// Return top 20 keywords
|
||||||
return Array.from(keywords).slice(0, 20);
|
return Array.from(keywords).slice(0, 20);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -450,6 +450,16 @@ function parseProgressLine(line: string): ProgressInfo | null {
|
|||||||
return { stage: 'finalizing', message: 'Finalizing vector index...', percent: 90 };
|
return { stage: 'finalizing', message: 'Finalizing vector index...', percent: 90 };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Parse embeddings complete message
|
||||||
|
const embedCompleteMatch = line.match(/Embeddings complete:\s*(\d+)\s*chunks/i);
|
||||||
|
if (embedCompleteMatch) {
|
||||||
|
return {
|
||||||
|
stage: 'embeddings_complete',
|
||||||
|
message: `Embeddings complete: ${embedCompleteMatch[1]} chunks`,
|
||||||
|
percent: 95,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -151,10 +151,21 @@ def init(
|
|||||||
if not json_mode:
|
if not json_mode:
|
||||||
console.print("\n[bold]Generating embeddings...[/bold]")
|
console.print("\n[bold]Generating embeddings...[/bold]")
|
||||||
console.print(f"Model: [cyan]{embedding_model}[/cyan]")
|
console.print(f"Model: [cyan]{embedding_model}[/cyan]")
|
||||||
|
else:
|
||||||
|
# Output progress message for JSON mode (parsed by Node.js)
|
||||||
|
print("Generating embeddings...", flush=True)
|
||||||
|
|
||||||
# Progress callback for non-json mode
|
# Progress callback - outputs progress for both json and non-json modes
|
||||||
|
# Node.js parseProgressLine() expects formats like:
|
||||||
|
# - "Batch X: N files, M chunks"
|
||||||
|
# - "Processing N files"
|
||||||
|
# - "Finalizing index"
|
||||||
def progress_update(msg: str):
|
def progress_update(msg: str):
|
||||||
if not json_mode and verbose:
|
if json_mode:
|
||||||
|
# Output without prefix so Node.js can parse it
|
||||||
|
# Strip leading spaces that embedding_manager adds
|
||||||
|
print(msg.strip(), flush=True)
|
||||||
|
elif verbose:
|
||||||
console.print(f" {msg}")
|
console.print(f" {msg}")
|
||||||
|
|
||||||
embed_result = generate_embeddings_recursive(
|
embed_result = generate_embeddings_recursive(
|
||||||
@@ -162,12 +173,16 @@ def init(
|
|||||||
model_profile=embedding_model,
|
model_profile=embedding_model,
|
||||||
force=False, # Don't force regenerate during init
|
force=False, # Don't force regenerate during init
|
||||||
chunk_size=2000,
|
chunk_size=2000,
|
||||||
progress_callback=progress_update if not json_mode else None,
|
progress_callback=progress_update, # Always use callback
|
||||||
)
|
)
|
||||||
|
|
||||||
if embed_result["success"]:
|
if embed_result["success"]:
|
||||||
embed_data = embed_result["result"]
|
embed_data = embed_result["result"]
|
||||||
|
|
||||||
|
# Output completion message for Node.js to parse
|
||||||
|
if json_mode:
|
||||||
|
print(f"Embeddings complete: {embed_data['total_chunks_created']} chunks", flush=True)
|
||||||
|
|
||||||
# Get comprehensive coverage statistics
|
# Get comprehensive coverage statistics
|
||||||
status_result = get_embeddings_status(index_root)
|
status_result = get_embeddings_status(index_root)
|
||||||
if status_result["success"]:
|
if status_result["success"]:
|
||||||
|
|||||||
@@ -235,7 +235,8 @@ def generate_embeddings(
|
|||||||
return {"success": False, "error": "No files found in index"}
|
return {"success": False, "error": "No files found in index"}
|
||||||
|
|
||||||
if progress_callback:
|
if progress_callback:
|
||||||
progress_callback(f"Processing {total_files} files in batches of {FILE_BATCH_SIZE}...")
|
# Format must match Node.js parseProgressLine: "Processing N files" with 'embed' keyword
|
||||||
|
progress_callback(f"Processing {total_files} files for embeddings in batches of {FILE_BATCH_SIZE}...")
|
||||||
|
|
||||||
cursor = conn.execute(f"SELECT {path_column}, content, language FROM files")
|
cursor = conn.execute(f"SELECT {path_column}, content, language FROM files")
|
||||||
batch_number = 0
|
batch_number = 0
|
||||||
@@ -325,10 +326,24 @@ def generate_embeddings(
|
|||||||
progress_callback(f"Finalizing index... Building ANN index for {total_chunks_created} chunks")
|
progress_callback(f"Finalizing index... Building ANN index for {total_chunks_created} chunks")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
# Cleanup on error to prevent process hanging
|
||||||
|
try:
|
||||||
|
clear_embedder_cache()
|
||||||
|
gc.collect()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
return {"success": False, "error": f"Failed to read or process files: {str(e)}"}
|
return {"success": False, "error": f"Failed to read or process files: {str(e)}"}
|
||||||
|
|
||||||
elapsed_time = time.time() - start_time
|
elapsed_time = time.time() - start_time
|
||||||
|
|
||||||
|
# Final cleanup: release ONNX resources to allow process exit
|
||||||
|
# This is critical - without it, ONNX Runtime threads prevent Python from exiting
|
||||||
|
try:
|
||||||
|
clear_embedder_cache()
|
||||||
|
gc.collect()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"success": True,
|
"success": True,
|
||||||
"result": {
|
"result": {
|
||||||
@@ -418,7 +433,8 @@ def generate_embeddings_recursive(
|
|||||||
rel_path = index_path.relative_to(index_root)
|
rel_path = index_path.relative_to(index_root)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
rel_path = index_path
|
rel_path = index_path
|
||||||
progress_callback(f"[{idx}/{len(index_files)}] Processing {rel_path}")
|
# Format: "Processing file X/Y: path" to match Node.js parseProgressLine
|
||||||
|
progress_callback(f"Processing file {idx}/{len(index_files)}: {rel_path}")
|
||||||
|
|
||||||
result = generate_embeddings(
|
result = generate_embeddings(
|
||||||
index_path,
|
index_path,
|
||||||
@@ -443,6 +459,15 @@ def generate_embeddings_recursive(
|
|||||||
|
|
||||||
successful = sum(1 for r in all_results if r["success"])
|
successful = sum(1 for r in all_results if r["success"])
|
||||||
|
|
||||||
|
# Final cleanup after processing all indexes
|
||||||
|
# Each generate_embeddings() call does its own cleanup, but do a final one to be safe
|
||||||
|
try:
|
||||||
|
if SEMANTIC_AVAILABLE:
|
||||||
|
clear_embedder_cache()
|
||||||
|
gc.collect()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"success": successful > 0,
|
"success": successful > 0,
|
||||||
"result": {
|
"result": {
|
||||||
|
|||||||
Reference in New Issue
Block a user