feat: Add vector embeddings for core memory semantic search

- Add memory_chunks table for storing chunked content with embeddings
- Create Python embedder script (memory_embedder.py) using CodexLens fastembed
- Add TypeScript bridge (memory-embedder-bridge.ts) for Python interop
- Implement content chunking with paragraph/sentence-aware splitting
- Add vectorSimilarity dimension to clustering (weight 0.3)
- New CLI commands: ccw memory embed, search, embed-status
- Extend core-memory MCP tool with embed/search/embed_status operations

Clustering improvement: max relevance 0.388 → 0.809 (+109%)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
catlog22
2025-12-20 13:09:43 +08:00
parent ea284d739a
commit 31cc060837
7 changed files with 1543 additions and 18 deletions

View File

@@ -181,19 +181,24 @@ export function run(argv: string[]): void {
program
.command('memory [subcommand] [args...]')
.description('Memory module for context tracking and prompt optimization')
.option('--type <type>', 'Entity type: file, module, topic')
.option('--type <type>', 'Entity type: file, module, topic (track) OR source type: core_memory, workflow, cli_history (search)')
.option('--action <action>', 'Action: read, write, mention')
.option('--value <value>', 'Entity value (file path, etc.)')
.option('--session <session>', 'Session ID')
.option('--stdin', 'Read input from stdin (for Claude Code hooks)')
.option('--source <source>', 'Import source: history, sessions, all', 'all')
.option('--project <project>', 'Project name filter')
.option('--limit <n>', 'Number of results', '20')
.option('--limit <n>', 'Number of results (prompt search)', '20')
.option('--sort <field>', 'Sort by: heat, reads, writes', 'heat')
.option('--json', 'Output as JSON')
.option('--context <text>', 'Current task context')
.option('--older-than <age>', 'Age threshold for pruning', '30d')
.option('--dry-run', 'Preview without deleting')
.option('--id <id>', 'Memory/session ID (for embed command)')
.option('--force', 'Force re-embed all chunks')
.option('--batch-size <n>', 'Batch size for embedding', '8')
.option('--top-k <n>', 'Number of semantic search results', '10')
.option('--min-score <f>', 'Minimum similarity score for semantic search', '0.5')
.action((subcommand, args, options) => memoryCommand(subcommand, args, options));
// Core Memory command