feat: add MCP server for semantic code search with FastMCP integration

2026-03-22 19:18:47 +08:00 · 2026-03-17 23:03:20 +08:00
parent ef2c5a58e1
commit ad9d3f94e0
80 changed files with 3427 additions and 21329 deletions
--- a/ccw/src/core/memory-embedder-bridge.ts
+++ b/ccw/src/core/memory-embedder-bridge.ts
@@ -1,37 +1,13 @@
 /**
- * Memory Embedder Bridge - TypeScript interface to Python memory embedder
+ * Memory Embedder Bridge - STUB (v1 Python bridge removed)
 *
- * This module provides a TypeScript bridge to the Python memory_embedder.py script,
- * which generates and searches embeddings for memory chunks using CodexLens's embedder.
- *
- * Features:
- * - Reuses CodexLens venv at ~/.codexlens/venv
- * - JSON protocol communication
- * - Three commands: embed, search, status
- * - Automatic availability checking
- * - Stage1 output embedding for V2 pipeline
+ * The Python memory_embedder.py bridge has been removed. This module provides
+ * no-op stubs so that existing consumers compile without errors.
 */

-import { spawn } from 'child_process';
-import { join, dirname } from 'path';
-import { existsSync } from 'fs';
-import { fileURLToPath } from 'url';
-import { getCodexLensHiddenPython } from '../utils/codexlens-path.js';
-import { getCoreMemoryStore } from './core-memory-store.js';
-import type { Stage1Output } from './core-memory-store.js';
-import { StoragePaths } from '../config/storage-paths.js';
+const V1_REMOVED = 'Memory embedder Python bridge has been removed (v1 cleanup).';

-// Get directory of this module
-const __filename = fileURLToPath(import.meta.url);
-const __dirname = dirname(__filename);
-
-// Venv paths (reuse CodexLens venv)
-const VENV_PYTHON = getCodexLensHiddenPython();
-
-// Script path
-const EMBEDDER_SCRIPT = join(__dirname, '..', '..', 'scripts', 'memory_embedder.py');
-
-// Types
+// Types (kept for backward compatibility)
 export interface EmbedResult {
  success: boolean;
  chunks_processed: number;
@@ -78,197 +54,6 @@ export interface SearchOptions {
  sourceType?: 'core_memory' | 'workflow' | 'cli_history';
 }

-/**
- * Check if embedder is available (venv and script exist)
- * @returns True if embedder is available
- */
-export function isEmbedderAvailable(): boolean {
-  // Check venv python exists
-  if (!existsSync(VENV_PYTHON)) {
-    return false;
-  }
-
-  // Check script exists
-  if (!existsSync(EMBEDDER_SCRIPT)) {
-    return false;
-  }
-
-  return true;
-}
-
-/**
- * Run Python script with arguments
- * @param args - Command line arguments
- * @param timeout - Timeout in milliseconds
- * @returns JSON output from script
- */
-function runPython(args: string[], timeout: number = 300000): Promise<string> {
-  return new Promise((resolve, reject) => {
-    // Check availability
-    if (!isEmbedderAvailable()) {
-      reject(
-        new Error(
-          'Memory embedder not available. Ensure CodexLens venv exists at ~/.codexlens/venv'
-        )
-      );
-      return;
-    }
-
-    // Spawn Python process
-    const child = spawn(VENV_PYTHON, [EMBEDDER_SCRIPT, ...args], {
-      shell: false,
-      stdio: ['ignore', 'pipe', 'pipe'],
-      timeout,
-      windowsHide: true,
-      env: { ...process.env, PYTHONIOENCODING: 'utf-8' },
-    });
-
-    let stdout = '';
-    let stderr = '';
-
-    child.stdout.on('data', (data) => {
-      stdout += data.toString();
-    });
-
-    child.stderr.on('data', (data) => {
-      stderr += data.toString();
-    });
-
-    child.on('close', (code) => {
-      if (code === 0) {
-        resolve(stdout.trim());
-      } else {
-        reject(new Error(`Python script failed (exit code ${code}): ${stderr || stdout}`));
-      }
-    });
-
-    child.on('error', (err) => {
-      if ((err as NodeJS.ErrnoException).code === 'ETIMEDOUT') {
-        reject(new Error('Python script timed out'));
-      } else {
-        reject(new Error(`Failed to spawn Python: ${err.message}`));
-      }
-    });
-  });
-}
-
-/**
- * Generate embeddings for memory chunks
- * @param dbPath - Path to SQLite database
- * @param options - Embedding options
- * @returns Embedding result
- */
-export async function generateEmbeddings(
-  dbPath: string,
-  options: EmbedOptions = {}
-): Promise<EmbedResult> {
-  const { sourceId, batchSize = 8, force = false } = options;
-
-  // Build arguments
-  const args = ['embed', dbPath];
-
-  if (sourceId) {
-    args.push('--source-id', sourceId);
-  }
-
-  if (batchSize !== 8) {
-    args.push('--batch-size', batchSize.toString());
-  }
-
-  if (force) {
-    args.push('--force');
-  }
-
-  try {
-    // Default timeout: 5 minutes
-    const output = await runPython(args, 300000);
-    const result = JSON.parse(output) as EmbedResult;
-    return result;
-  } catch (err) {
-    return {
-      success: false,
-      chunks_processed: 0,
-      chunks_failed: 0,
-      elapsed_time: 0,
-      error: (err as Error).message,
-    };
-  }
-}
-
-/**
- * Search memory chunks using semantic search
- * @param dbPath - Path to SQLite database
- * @param query - Search query text
- * @param options - Search options
- * @returns Search results
- */
-export async function searchMemories(
-  dbPath: string,
-  query: string,
-  options: SearchOptions = {}
-): Promise<SearchResult> {
-  const { topK = 10, minScore = 0.3, sourceType } = options;
-
-  // Build arguments
-  const args = ['search', dbPath, query];
-
-  if (topK !== 10) {
-    args.push('--top-k', topK.toString());
-  }
-
-  if (minScore !== 0.3) {
-    args.push('--min-score', minScore.toString());
-  }
-
-  if (sourceType) {
-    args.push('--type', sourceType);
-  }
-
-  try {
-    // Default timeout: 30 seconds
-    const output = await runPython(args, 30000);
-    const result = JSON.parse(output) as SearchResult;
-    return result;
-  } catch (err) {
-    return {
-      success: false,
-      matches: [],
-      error: (err as Error).message,
-    };
-  }
-}
-
-/**
- * Get embedding status statistics
- * @param dbPath - Path to SQLite database
- * @returns Embedding status
- */
-export async function getEmbeddingStatus(dbPath: string): Promise<EmbeddingStatus> {
-  // Build arguments
-  const args = ['status', dbPath];
-
-  try {
-    // Default timeout: 30 seconds
-    const output = await runPython(args, 30000);
-    const result = JSON.parse(output) as EmbeddingStatus;
-    return { ...result, success: true };
-  } catch (err) {
-    return {
-      success: false,
-      total_chunks: 0,
-      embedded_chunks: 0,
-      pending_chunks: 0,
-      by_type: {},
-      error: (err as Error).message,
-    };
-  }
-}
-
-// ============================================================================
-// Memory V2: Stage1 Output Embedding
-// ============================================================================
-
-/** Result of stage1 embedding operation */
 export interface Stage1EmbedResult {
  success: boolean;
  chunksCreated: number;
@@ -276,98 +61,54 @@ export interface Stage1EmbedResult {
  error?: string;
 }

-/**
- * Chunk and embed stage1_outputs (raw_memory + rollout_summary) for semantic search.
- *
- * Reads all stage1_outputs from the DB, chunks their raw_memory and rollout_summary
- * content, inserts chunks into memory_chunks with source_type='cli_history' and
- * metadata indicating the V2 origin, then triggers embedding generation.
- *
- * Uses source_id format: "s1:{thread_id}" to differentiate from regular cli_history chunks.
- *
- * @param projectPath - Project root path
- * @param force - Force re-chunking even if chunks exist
- * @returns Embedding result
- */
-export async function embedStage1Outputs(
-  projectPath: string,
-  force: boolean = false
-): Promise<Stage1EmbedResult> {
-  try {
-    const store = getCoreMemoryStore(projectPath);
-    const stage1Outputs = store.listStage1Outputs();
-
-    if (stage1Outputs.length === 0) {
-      return { success: true, chunksCreated: 0, chunksEmbedded: 0 };
-    }
-
-    let totalChunksCreated = 0;
-
-    for (const output of stage1Outputs) {
-      const sourceId = `s1:${output.thread_id}`;
-
-      // Check if already chunked
-      const existingChunks = store.getChunks(sourceId);
-      if (existingChunks.length > 0 && !force) continue;
-
-      // Delete old chunks if force
-      if (force && existingChunks.length > 0) {
-        store.deleteChunks(sourceId);
-      }
-
-      // Combine raw_memory and rollout_summary for richer semantic content
-      const combinedContent = [
-        output.rollout_summary ? `## Summary\n${output.rollout_summary}` : '',
-        output.raw_memory ? `## Raw Memory\n${output.raw_memory}` : '',
-      ].filter(Boolean).join('\n\n');
-
-      if (!combinedContent.trim()) continue;
-
-      // Chunk using the store's built-in chunking
-      const chunks = store.chunkContent(combinedContent, sourceId, 'cli_history');
-
-      // Insert chunks with V2 metadata
-      for (let i = 0; i < chunks.length; i++) {
-        store.insertChunk({
-          source_id: sourceId,
-          source_type: 'cli_history',
-          chunk_index: i,
-          content: chunks[i],
-          metadata: JSON.stringify({
-            v2_source: 'stage1_output',
-            thread_id: output.thread_id,
-            generated_at: output.generated_at,
-          }),
-          created_at: new Date().toISOString(),
-        });
-        totalChunksCreated++;
-      }
-    }
-
-    // If we created chunks, generate embeddings
-    let chunksEmbedded = 0;
-    if (totalChunksCreated > 0) {
-      const paths = StoragePaths.project(projectPath);
-      const dbPath = join(paths.root, 'core-memory', 'core_memory.db');
-
-      const embedResult = await generateEmbeddings(dbPath, { force: false });
-      if (embedResult.success) {
-        chunksEmbedded = embedResult.chunks_processed;
-      }
-    }
-
-    return {
-      success: true,
-      chunksCreated: totalChunksCreated,
-      chunksEmbedded,
-    };
-  } catch (err) {
-    return {
-      success: false,
-      chunksCreated: 0,
-      chunksEmbedded: 0,
-      error: (err as Error).message,
-    };
-  }
+export function isEmbedderAvailable(): boolean {
+  return false;
 }

+export async function generateEmbeddings(
+  _dbPath: string,
+  _options: EmbedOptions = {}
+): Promise<EmbedResult> {
+  return {
+    success: false,
+    chunks_processed: 0,
+    chunks_failed: 0,
+    elapsed_time: 0,
+    error: V1_REMOVED,
+  };
+}
+
+export async function searchMemories(
+  _dbPath: string,
+  _query: string,
+  _options: SearchOptions = {}
+): Promise<SearchResult> {
+  return {
+    success: false,
+    matches: [],
+    error: V1_REMOVED,
+  };
+}
+
+export async function getEmbeddingStatus(_dbPath: string): Promise<EmbeddingStatus> {
+  return {
+    success: false,
+    total_chunks: 0,
+    embedded_chunks: 0,
+    pending_chunks: 0,
+    by_type: {},
+    error: V1_REMOVED,
+  };
+}
+
+export async function embedStage1Outputs(
+  _projectPath: string,
+  _force: boolean = false
+): Promise<Stage1EmbedResult> {
+  return {
+    success: false,
+    chunksCreated: 0,
+    chunksEmbedded: 0,
+    error: V1_REMOVED,
+  };
+}