mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-03-22 19:18:47 +08:00
feat: add MCP server for semantic code search with FastMCP integration
This commit is contained in:
@@ -1,37 +1,13 @@
|
||||
/**
|
||||
* Memory Embedder Bridge - TypeScript interface to Python memory embedder
|
||||
* Memory Embedder Bridge - STUB (v1 Python bridge removed)
|
||||
*
|
||||
* This module provides a TypeScript bridge to the Python memory_embedder.py script,
|
||||
* which generates and searches embeddings for memory chunks using CodexLens's embedder.
|
||||
*
|
||||
* Features:
|
||||
* - Reuses CodexLens venv at ~/.codexlens/venv
|
||||
* - JSON protocol communication
|
||||
* - Three commands: embed, search, status
|
||||
* - Automatic availability checking
|
||||
* - Stage1 output embedding for V2 pipeline
|
||||
* The Python memory_embedder.py bridge has been removed. This module provides
|
||||
* no-op stubs so that existing consumers compile without errors.
|
||||
*/
|
||||
|
||||
import { spawn } from 'child_process';
|
||||
import { join, dirname } from 'path';
|
||||
import { existsSync } from 'fs';
|
||||
import { fileURLToPath } from 'url';
|
||||
import { getCodexLensHiddenPython } from '../utils/codexlens-path.js';
|
||||
import { getCoreMemoryStore } from './core-memory-store.js';
|
||||
import type { Stage1Output } from './core-memory-store.js';
|
||||
import { StoragePaths } from '../config/storage-paths.js';
|
||||
const V1_REMOVED = 'Memory embedder Python bridge has been removed (v1 cleanup).';
|
||||
|
||||
// Get directory of this module
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
|
||||
// Venv paths (reuse CodexLens venv)
|
||||
const VENV_PYTHON = getCodexLensHiddenPython();
|
||||
|
||||
// Script path
|
||||
const EMBEDDER_SCRIPT = join(__dirname, '..', '..', 'scripts', 'memory_embedder.py');
|
||||
|
||||
// Types
|
||||
// Types (kept for backward compatibility)
|
||||
export interface EmbedResult {
|
||||
success: boolean;
|
||||
chunks_processed: number;
|
||||
@@ -78,197 +54,6 @@ export interface SearchOptions {
|
||||
sourceType?: 'core_memory' | 'workflow' | 'cli_history';
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if embedder is available (venv and script exist)
|
||||
* @returns True if embedder is available
|
||||
*/
|
||||
export function isEmbedderAvailable(): boolean {
|
||||
// Check venv python exists
|
||||
if (!existsSync(VENV_PYTHON)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check script exists
|
||||
if (!existsSync(EMBEDDER_SCRIPT)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run Python script with arguments
|
||||
* @param args - Command line arguments
|
||||
* @param timeout - Timeout in milliseconds
|
||||
* @returns JSON output from script
|
||||
*/
|
||||
function runPython(args: string[], timeout: number = 300000): Promise<string> {
|
||||
return new Promise((resolve, reject) => {
|
||||
// Check availability
|
||||
if (!isEmbedderAvailable()) {
|
||||
reject(
|
||||
new Error(
|
||||
'Memory embedder not available. Ensure CodexLens venv exists at ~/.codexlens/venv'
|
||||
)
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Spawn Python process
|
||||
const child = spawn(VENV_PYTHON, [EMBEDDER_SCRIPT, ...args], {
|
||||
shell: false,
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
timeout,
|
||||
windowsHide: true,
|
||||
env: { ...process.env, PYTHONIOENCODING: 'utf-8' },
|
||||
});
|
||||
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
|
||||
child.stdout.on('data', (data) => {
|
||||
stdout += data.toString();
|
||||
});
|
||||
|
||||
child.stderr.on('data', (data) => {
|
||||
stderr += data.toString();
|
||||
});
|
||||
|
||||
child.on('close', (code) => {
|
||||
if (code === 0) {
|
||||
resolve(stdout.trim());
|
||||
} else {
|
||||
reject(new Error(`Python script failed (exit code ${code}): ${stderr || stdout}`));
|
||||
}
|
||||
});
|
||||
|
||||
child.on('error', (err) => {
|
||||
if ((err as NodeJS.ErrnoException).code === 'ETIMEDOUT') {
|
||||
reject(new Error('Python script timed out'));
|
||||
} else {
|
||||
reject(new Error(`Failed to spawn Python: ${err.message}`));
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate embeddings for memory chunks
|
||||
* @param dbPath - Path to SQLite database
|
||||
* @param options - Embedding options
|
||||
* @returns Embedding result
|
||||
*/
|
||||
export async function generateEmbeddings(
|
||||
dbPath: string,
|
||||
options: EmbedOptions = {}
|
||||
): Promise<EmbedResult> {
|
||||
const { sourceId, batchSize = 8, force = false } = options;
|
||||
|
||||
// Build arguments
|
||||
const args = ['embed', dbPath];
|
||||
|
||||
if (sourceId) {
|
||||
args.push('--source-id', sourceId);
|
||||
}
|
||||
|
||||
if (batchSize !== 8) {
|
||||
args.push('--batch-size', batchSize.toString());
|
||||
}
|
||||
|
||||
if (force) {
|
||||
args.push('--force');
|
||||
}
|
||||
|
||||
try {
|
||||
// Default timeout: 5 minutes
|
||||
const output = await runPython(args, 300000);
|
||||
const result = JSON.parse(output) as EmbedResult;
|
||||
return result;
|
||||
} catch (err) {
|
||||
return {
|
||||
success: false,
|
||||
chunks_processed: 0,
|
||||
chunks_failed: 0,
|
||||
elapsed_time: 0,
|
||||
error: (err as Error).message,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Search memory chunks using semantic search
|
||||
* @param dbPath - Path to SQLite database
|
||||
* @param query - Search query text
|
||||
* @param options - Search options
|
||||
* @returns Search results
|
||||
*/
|
||||
export async function searchMemories(
|
||||
dbPath: string,
|
||||
query: string,
|
||||
options: SearchOptions = {}
|
||||
): Promise<SearchResult> {
|
||||
const { topK = 10, minScore = 0.3, sourceType } = options;
|
||||
|
||||
// Build arguments
|
||||
const args = ['search', dbPath, query];
|
||||
|
||||
if (topK !== 10) {
|
||||
args.push('--top-k', topK.toString());
|
||||
}
|
||||
|
||||
if (minScore !== 0.3) {
|
||||
args.push('--min-score', minScore.toString());
|
||||
}
|
||||
|
||||
if (sourceType) {
|
||||
args.push('--type', sourceType);
|
||||
}
|
||||
|
||||
try {
|
||||
// Default timeout: 30 seconds
|
||||
const output = await runPython(args, 30000);
|
||||
const result = JSON.parse(output) as SearchResult;
|
||||
return result;
|
||||
} catch (err) {
|
||||
return {
|
||||
success: false,
|
||||
matches: [],
|
||||
error: (err as Error).message,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get embedding status statistics
|
||||
* @param dbPath - Path to SQLite database
|
||||
* @returns Embedding status
|
||||
*/
|
||||
export async function getEmbeddingStatus(dbPath: string): Promise<EmbeddingStatus> {
|
||||
// Build arguments
|
||||
const args = ['status', dbPath];
|
||||
|
||||
try {
|
||||
// Default timeout: 30 seconds
|
||||
const output = await runPython(args, 30000);
|
||||
const result = JSON.parse(output) as EmbeddingStatus;
|
||||
return { ...result, success: true };
|
||||
} catch (err) {
|
||||
return {
|
||||
success: false,
|
||||
total_chunks: 0,
|
||||
embedded_chunks: 0,
|
||||
pending_chunks: 0,
|
||||
by_type: {},
|
||||
error: (err as Error).message,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Memory V2: Stage1 Output Embedding
|
||||
// ============================================================================
|
||||
|
||||
/** Result of stage1 embedding operation */
|
||||
export interface Stage1EmbedResult {
|
||||
success: boolean;
|
||||
chunksCreated: number;
|
||||
@@ -276,98 +61,54 @@ export interface Stage1EmbedResult {
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Chunk and embed stage1_outputs (raw_memory + rollout_summary) for semantic search.
|
||||
*
|
||||
* Reads all stage1_outputs from the DB, chunks their raw_memory and rollout_summary
|
||||
* content, inserts chunks into memory_chunks with source_type='cli_history' and
|
||||
* metadata indicating the V2 origin, then triggers embedding generation.
|
||||
*
|
||||
* Uses source_id format: "s1:{thread_id}" to differentiate from regular cli_history chunks.
|
||||
*
|
||||
* @param projectPath - Project root path
|
||||
* @param force - Force re-chunking even if chunks exist
|
||||
* @returns Embedding result
|
||||
*/
|
||||
export async function embedStage1Outputs(
|
||||
projectPath: string,
|
||||
force: boolean = false
|
||||
): Promise<Stage1EmbedResult> {
|
||||
try {
|
||||
const store = getCoreMemoryStore(projectPath);
|
||||
const stage1Outputs = store.listStage1Outputs();
|
||||
|
||||
if (stage1Outputs.length === 0) {
|
||||
return { success: true, chunksCreated: 0, chunksEmbedded: 0 };
|
||||
}
|
||||
|
||||
let totalChunksCreated = 0;
|
||||
|
||||
for (const output of stage1Outputs) {
|
||||
const sourceId = `s1:${output.thread_id}`;
|
||||
|
||||
// Check if already chunked
|
||||
const existingChunks = store.getChunks(sourceId);
|
||||
if (existingChunks.length > 0 && !force) continue;
|
||||
|
||||
// Delete old chunks if force
|
||||
if (force && existingChunks.length > 0) {
|
||||
store.deleteChunks(sourceId);
|
||||
}
|
||||
|
||||
// Combine raw_memory and rollout_summary for richer semantic content
|
||||
const combinedContent = [
|
||||
output.rollout_summary ? `## Summary\n${output.rollout_summary}` : '',
|
||||
output.raw_memory ? `## Raw Memory\n${output.raw_memory}` : '',
|
||||
].filter(Boolean).join('\n\n');
|
||||
|
||||
if (!combinedContent.trim()) continue;
|
||||
|
||||
// Chunk using the store's built-in chunking
|
||||
const chunks = store.chunkContent(combinedContent, sourceId, 'cli_history');
|
||||
|
||||
// Insert chunks with V2 metadata
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
store.insertChunk({
|
||||
source_id: sourceId,
|
||||
source_type: 'cli_history',
|
||||
chunk_index: i,
|
||||
content: chunks[i],
|
||||
metadata: JSON.stringify({
|
||||
v2_source: 'stage1_output',
|
||||
thread_id: output.thread_id,
|
||||
generated_at: output.generated_at,
|
||||
}),
|
||||
created_at: new Date().toISOString(),
|
||||
});
|
||||
totalChunksCreated++;
|
||||
}
|
||||
}
|
||||
|
||||
// If we created chunks, generate embeddings
|
||||
let chunksEmbedded = 0;
|
||||
if (totalChunksCreated > 0) {
|
||||
const paths = StoragePaths.project(projectPath);
|
||||
const dbPath = join(paths.root, 'core-memory', 'core_memory.db');
|
||||
|
||||
const embedResult = await generateEmbeddings(dbPath, { force: false });
|
||||
if (embedResult.success) {
|
||||
chunksEmbedded = embedResult.chunks_processed;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
chunksCreated: totalChunksCreated,
|
||||
chunksEmbedded,
|
||||
};
|
||||
} catch (err) {
|
||||
return {
|
||||
success: false,
|
||||
chunksCreated: 0,
|
||||
chunksEmbedded: 0,
|
||||
error: (err as Error).message,
|
||||
};
|
||||
}
|
||||
export function isEmbedderAvailable(): boolean {
|
||||
return false;
|
||||
}
|
||||
|
||||
export async function generateEmbeddings(
|
||||
_dbPath: string,
|
||||
_options: EmbedOptions = {}
|
||||
): Promise<EmbedResult> {
|
||||
return {
|
||||
success: false,
|
||||
chunks_processed: 0,
|
||||
chunks_failed: 0,
|
||||
elapsed_time: 0,
|
||||
error: V1_REMOVED,
|
||||
};
|
||||
}
|
||||
|
||||
export async function searchMemories(
|
||||
_dbPath: string,
|
||||
_query: string,
|
||||
_options: SearchOptions = {}
|
||||
): Promise<SearchResult> {
|
||||
return {
|
||||
success: false,
|
||||
matches: [],
|
||||
error: V1_REMOVED,
|
||||
};
|
||||
}
|
||||
|
||||
export async function getEmbeddingStatus(_dbPath: string): Promise<EmbeddingStatus> {
|
||||
return {
|
||||
success: false,
|
||||
total_chunks: 0,
|
||||
embedded_chunks: 0,
|
||||
pending_chunks: 0,
|
||||
by_type: {},
|
||||
error: V1_REMOVED,
|
||||
};
|
||||
}
|
||||
|
||||
export async function embedStage1Outputs(
|
||||
_projectPath: string,
|
||||
_force: boolean = false
|
||||
): Promise<Stage1EmbedResult> {
|
||||
return {
|
||||
success: false,
|
||||
chunksCreated: 0,
|
||||
chunksEmbedded: 0,
|
||||
error: V1_REMOVED,
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user