feat: Add cluster management commands for deletion, merging, and deduplication

This commit is contained in:
catlog22
2025-12-20 12:23:08 +08:00
parent ab06ed0083
commit ea284d739a
4 changed files with 625 additions and 22 deletions

View File

@@ -216,6 +216,12 @@ export function run(argv: string[]): void {
.option('--members <ids>', 'Cluster member IDs (comma-separated)')
.option('--status <status>', 'Cluster status filter')
.option('--level <level>', 'Context level: metadata, keyFiles, full')
.option('--delete', 'Delete a cluster')
.option('--merge <ids>', 'Merge clusters into target (comma-separated source IDs)')
.option('--dedup', 'Deduplicate clusters by merging similar ones')
.option('--output <file>', 'Output file path for export')
.option('--overwrite', 'Overwrite existing memories when importing')
.option('--prefix <prefix>', 'Add prefix to imported memory IDs')
.action((subcommand, args, options) => coreMemoryCommand(subcommand, args, options));
program.parse(argv);

View File

@@ -1,10 +1,16 @@
/**
* Core Memory Command - Simplified CLI for core memory management
* Four commands: list, import, export, summary
* Commands: list, import, export, summary, projects, cluster
*/
import chalk from 'chalk';
import { getCoreMemoryStore } from '../core/core-memory-store.js';
import {
getCoreMemoryStore,
listAllProjects,
getMemoriesFromProject,
exportMemories,
importMemories
} from '../core/core-memory-store.js';
import { notifyRefreshRequired } from '../tools/notifier.js';
interface CommandOptions {
@@ -13,6 +19,11 @@ interface CommandOptions {
status?: string;
json?: boolean;
auto?: boolean;
output?: string;
from?: string;
overwrite?: boolean;
prefix?: string;
all?: boolean;
scope?: string;
create?: boolean;
name?: string;
@@ -20,6 +31,9 @@ interface CommandOptions {
format?: string;
level?: string;
type?: string;
delete?: boolean;
merge?: string;
dedup?: boolean;
}
/**
@@ -91,7 +105,7 @@ async function importAction(text: string): Promise<void> {
}
/**
* Export a memory as plain text
* Export a memory as plain text (searches all projects if not found locally)
*/
async function exportAction(options: CommandOptions): Promise<void> {
const { id } = options;
@@ -103,11 +117,30 @@ async function exportAction(options: CommandOptions): Promise<void> {
}
try {
// First try current project
const store = getCoreMemoryStore(getProjectPath());
const memory = store.getMemory(id);
let memory = store.getMemory(id);
// If not found, search all projects
if (!memory) {
const projects = listAllProjects();
for (const project of projects) {
try {
const memories = getMemoriesFromProject(project.id);
const found = memories.find(m => m.id === id);
if (found) {
memory = found;
console.error(chalk.gray(`Found in project: ${project.id}`));
break;
}
} catch {
// Skip projects that can't be read
}
}
}
if (!memory) {
console.error(chalk.red(`Error: Memory "${id}" not found`));
console.error(chalk.red(`Error: Memory "${id}" not found in any project`));
process.exit(1);
}
@@ -120,6 +153,125 @@ async function exportAction(options: CommandOptions): Promise<void> {
}
}
/**
* List all projects with their memory counts
*/
async function projectsAction(options: CommandOptions): Promise<void> {
try {
const projects = listAllProjects();
if (options.json) {
console.log(JSON.stringify(projects, null, 2));
return;
}
console.log(chalk.bold.cyan('\n All CCW Projects\n'));
if (projects.length === 0) {
console.log(chalk.yellow(' No projects found\n'));
return;
}
console.log(chalk.gray(' ─────────────────────────────────────────────────────────────────'));
for (const project of projects) {
const hasData = project.memoriesCount > 0 || project.clustersCount > 0;
const icon = hasData ? '●' : '○';
const color = hasData ? chalk.cyan : chalk.gray;
console.log(color(` ${icon} ${project.id}`));
console.log(chalk.white(` Path: ${project.path}`));
console.log(chalk.white(` Memories: ${project.memoriesCount} | Clusters: ${project.clustersCount}`));
if (project.lastUpdated) {
console.log(chalk.gray(` Last updated: ${new Date(project.lastUpdated).toLocaleString()}`));
}
console.log(chalk.gray(' ─────────────────────────────────────────────────────────────────'));
}
console.log(chalk.gray(`\n Total: ${projects.length} projects\n`));
} catch (error) {
console.error(chalk.red(`Error: ${(error as Error).message}`));
process.exit(1);
}
}
/**
* Import memories from file or another project
*/
async function importFromAction(source: string, options: CommandOptions): Promise<void> {
if (!source) {
console.error(chalk.red('Error: Source is required'));
console.error(chalk.gray('Usage: ccw core-memory import-from <source>'));
console.error(chalk.gray(' source: file.json or project-id'));
process.exit(1);
}
try {
const result = importMemories(getProjectPath(), source, {
overwrite: options.overwrite,
prefix: options.prefix
});
console.log(chalk.green(`✓ Import complete`));
console.log(chalk.white(` Imported: ${result.imported}`));
console.log(chalk.white(` Skipped: ${result.skipped} (already exist)`));
if (result.imported > 0) {
notifyRefreshRequired('memory').catch(() => { /* ignore */ });
}
} catch (error) {
console.error(chalk.red(`Error: ${(error as Error).message}`));
process.exit(1);
}
}
/**
* List memories from another project
*/
async function listFromAction(projectId: string, options: CommandOptions): Promise<void> {
if (!projectId) {
console.error(chalk.red('Error: Project ID is required'));
console.error(chalk.gray('Usage: ccw core-memory list-from <project-id>'));
console.error(chalk.gray(' Use "ccw core-memory projects" to see available projects'));
process.exit(1);
}
try {
const memories = getMemoriesFromProject(projectId);
if (options.json) {
console.log(JSON.stringify(memories, null, 2));
return;
}
console.log(chalk.bold.cyan(`\n Memories from ${projectId}\n`));
if (memories.length === 0) {
console.log(chalk.yellow(' No memories found\n'));
return;
}
console.log(chalk.gray(' ─────────────────────────────────────────────────────────────────'));
for (const memory of memories) {
const date = new Date(memory.updated_at).toLocaleString();
const archived = memory.archived ? chalk.gray(' [archived]') : '';
console.log(chalk.cyan(` ${memory.id}`) + archived);
console.log(chalk.white(` ${memory.summary || memory.content.substring(0, 80)}${memory.content.length > 80 ? '...' : ''}`));
console.log(chalk.gray(` Updated: ${date}`));
console.log(chalk.gray(' ─────────────────────────────────────────────────────────────────'));
}
console.log(chalk.gray(`\n Total: ${memories.length}\n`));
} catch (error) {
console.error(chalk.red(`Error: ${(error as Error).message}`));
process.exit(1);
}
}
/**
* Generate summary for a memory
*/
@@ -223,6 +375,67 @@ async function clusterAction(clusterId: string | undefined, options: CommandOpti
return;
}
// Deduplicate clusters
if (options.dedup) {
const { SessionClusteringService } = await import('../core/session-clustering-service.js');
const service = new SessionClusteringService(getProjectPath());
console.log(chalk.cyan('🔄 Deduplicating clusters...'));
const result = await service.deduplicateClusters();
console.log(chalk.green(`✓ Deduplication complete`));
console.log(chalk.white(` Merged: ${result.merged} clusters`));
console.log(chalk.white(` Deleted: ${result.deleted} empty clusters`));
console.log(chalk.white(` Remaining: ${result.remaining} clusters`));
// Notify dashboard
notifyRefreshRequired('memory').catch(() => { /* ignore */ });
return;
}
// Delete cluster
if (options.delete && clusterId) {
const cluster = store.getCluster(clusterId);
if (!cluster) {
console.error(chalk.red(`Cluster not found: ${clusterId}`));
process.exit(1);
}
const deleted = store.deleteCluster(clusterId);
if (deleted) {
console.log(chalk.green(`✓ Deleted cluster: ${clusterId}`));
notifyRefreshRequired('memory').catch(() => { /* ignore */ });
} else {
console.error(chalk.red(`Failed to delete cluster: ${clusterId}`));
process.exit(1);
}
return;
}
// Merge clusters
if (options.merge && clusterId) {
const targetCluster = store.getCluster(clusterId);
if (!targetCluster) {
console.error(chalk.red(`Target cluster not found: ${clusterId}`));
process.exit(1);
}
const sourceIds = options.merge.split(',').map(s => s.trim());
console.log(chalk.cyan(`🔄 Merging ${sourceIds.length} clusters into ${clusterId}...`));
try {
const membersMoved = store.mergeClusters(clusterId, sourceIds);
console.log(chalk.green(`✓ Merged successfully`));
console.log(chalk.white(` Members moved: ${membersMoved}`));
console.log(chalk.white(` Clusters deleted: ${sourceIds.length}`));
notifyRefreshRequired('memory').catch(() => { /* ignore */ });
} catch (error) {
console.error(chalk.red(`Failed to merge: ${(error as Error).message}`));
process.exit(1);
}
return;
}
// Create new cluster
if (options.create) {
if (!options.name) {
@@ -499,19 +712,38 @@ export async function coreMemoryCommand(
await searchAction(textArg, options);
break;
case 'projects':
await projectsAction(options);
break;
case 'import-from':
await importFromAction(textArg, options);
break;
case 'list-from':
await listFromAction(textArg, options);
break;
default:
console.log(chalk.bold.cyan('\n CCW Core Memory\n'));
console.log(' Manage core memory entries and session clusters.\n');
console.log(chalk.bold(' Basic Commands:'));
console.log(chalk.white(' list ') + chalk.gray('List all memories'));
console.log(chalk.white(' import "<text>" ') + chalk.gray('Import text as new memory'));
console.log(chalk.white(' export --id <id> ') + chalk.gray('Export memory as plain text'));
console.log(chalk.white(' export --id <id> ') + chalk.gray('Export memory (searches all projects)'));
console.log(chalk.white(' summary --id <id> ') + chalk.gray('Generate AI summary'));
console.log();
console.log(chalk.bold(' Cross-Workspace Commands:'));
console.log(chalk.white(' projects ') + chalk.gray('List all CCW projects'));
console.log(chalk.white(' list-from <project-id> ') + chalk.gray('List memories from another project'));
console.log();
console.log(chalk.bold(' Clustering Commands:'));
console.log(chalk.white(' clusters [--status] ') + chalk.gray('List all clusters'));
console.log(chalk.white(' cluster [id] ') + chalk.gray('View cluster details'));
console.log(chalk.white(' cluster --auto ') + chalk.gray('Run auto-clustering'));
console.log(chalk.white(' cluster --dedup ') + chalk.gray('Deduplicate similar clusters'));
console.log(chalk.white(' cluster <id> --delete ') + chalk.gray('Delete a cluster'));
console.log(chalk.white(' cluster <id> --merge <ids> ') + chalk.gray('Merge clusters into target'));
console.log(chalk.white(' cluster --create --name ') + chalk.gray('Create new cluster'));
console.log(chalk.white(' context ') + chalk.gray('Get progressive index'));
console.log(chalk.white(' load-cluster <id> ') + chalk.gray('Load cluster context'));
@@ -520,28 +752,19 @@ export async function coreMemoryCommand(
console.log(chalk.bold(' Options:'));
console.log(chalk.gray(' --id <id> Memory ID (for export/summary)'));
console.log(chalk.gray(' --tool gemini|qwen AI tool for summary (default: gemini)'));
console.log(chalk.gray(' --status <status> Filter by status (active/archived/merged)'));
console.log(chalk.gray(' --json Output as JSON'));
console.log(chalk.gray(' --scope <scope> Auto-cluster scope (all/recent/unclustered)'));
console.log(chalk.gray(' --name <name> Cluster name (for --create)'));
console.log(chalk.gray(' --members <ids> Comma-separated session IDs (for --create)'));
console.log(chalk.gray(' --format <format> Output format (markdown/json)'));
console.log(chalk.gray(' --level <level> Detail level (metadata/keyFiles/full)'));
console.log(chalk.gray(' --type <type> Filter by type (core/workflow/cli/all)'));
console.log(chalk.gray(' --dedup Deduplicate similar clusters'));
console.log(chalk.gray(' --delete Delete a cluster'));
console.log(chalk.gray(' --merge <ids> Merge source clusters into target'));
console.log();
console.log(chalk.bold(' Examples:'));
console.log(chalk.gray(' # Basic commands'));
console.log(chalk.gray(' ccw core-memory list'));
console.log(chalk.gray(' ccw core-memory import "Important context"'));
console.log(chalk.gray(' ccw core-memory export --id CMEM-20251217-143022'));
console.log();
console.log(chalk.gray(' # Clustering commands'));
console.log(chalk.gray(' ccw core-memory clusters'));
console.log(chalk.gray(' ccw core-memory export --id CMEM-xxx # Searches all projects'));
console.log(chalk.gray(' ccw core-memory projects # List all projects'));
console.log(chalk.gray(' ccw core-memory list-from d--other-project'));
console.log(chalk.gray(' ccw core-memory cluster --auto'));
console.log(chalk.gray(' ccw core-memory cluster CLU-001'));
console.log(chalk.gray(' ccw core-memory cluster --create --name "Auth Module"'));
console.log(chalk.gray(' ccw core-memory load-cluster CLU-001 --level full'));
console.log(chalk.gray(' ccw core-memory search authentication --type workflow'));
console.log(chalk.gray(' ccw core-memory cluster --dedup'));
console.log();
}
}

View File

@@ -522,6 +522,60 @@ ${memory.content}
return result.changes > 0;
}
/**
* Merge multiple clusters into one
* Keeps the first cluster and moves all members from others into it
* @param targetClusterId The cluster to keep
* @param sourceClusterIds The clusters to merge into target (will be deleted)
* @returns Number of members moved
*/
mergeClusters(targetClusterId: string, sourceClusterIds: string[]): number {
const targetCluster = this.getCluster(targetClusterId);
if (!targetCluster) {
throw new Error(`Target cluster not found: ${targetClusterId}`);
}
let membersMoved = 0;
const existingMembers = new Set(
this.getClusterMembers(targetClusterId).map(m => m.session_id)
);
for (const sourceId of sourceClusterIds) {
if (sourceId === targetClusterId) continue;
const sourceMembers = this.getClusterMembers(sourceId);
const maxOrder = this.getClusterMembers(targetClusterId).length;
for (const member of sourceMembers) {
// Skip if already exists in target
if (existingMembers.has(member.session_id)) continue;
// Move member to target cluster
this.addClusterMember({
cluster_id: targetClusterId,
session_id: member.session_id,
session_type: member.session_type,
sequence_order: maxOrder + membersMoved + 1,
relevance_score: member.relevance_score
});
existingMembers.add(member.session_id);
membersMoved++;
}
// Delete source cluster
this.deleteCluster(sourceId);
}
// Update target cluster description
const finalMembers = this.getClusterMembers(targetClusterId);
this.updateCluster(targetClusterId, {
description: `Merged cluster with ${finalMembers.length} sessions`
});
return membersMoved;
}
/**
* Add member to cluster
*/
@@ -784,6 +838,219 @@ export function getCoreMemoryStore(projectPath: string): CoreMemoryStore {
return storeCache.get(normalizedPath)!;
}
// ============================================================================
// Cross-workspace management functions
// ============================================================================
import { readdirSync, writeFileSync, readFileSync } from 'fs';
import { homedir } from 'os';
export interface ProjectInfo {
id: string;
path: string;
memoriesCount: number;
clustersCount: number;
lastUpdated?: string;
}
export interface ExportedMemory {
version: string;
exportedAt: string;
sourceProject: string;
memories: CoreMemory[];
}
/**
* Get CCW home directory
*/
function getCCWHome(): string {
return process.env.CCW_DATA_DIR || join(homedir(), '.ccw');
}
/**
* List all projects with their memory counts
*/
export function listAllProjects(): ProjectInfo[] {
const projectsDir = join(getCCWHome(), 'projects');
if (!existsSync(projectsDir)) {
return [];
}
const projects: ProjectInfo[] = [];
const entries = readdirSync(projectsDir, { withFileTypes: true });
for (const entry of entries) {
if (!entry.isDirectory()) continue;
const projectId = entry.name;
const coreMemoryDb = join(projectsDir, projectId, 'core-memory', 'core_memory.db');
let memoriesCount = 0;
let clustersCount = 0;
let lastUpdated: string | undefined;
if (existsSync(coreMemoryDb)) {
try {
const db = new Database(coreMemoryDb, { readonly: true });
// Count memories
const memResult = db.prepare('SELECT COUNT(*) as count FROM memories').get() as { count: number };
memoriesCount = memResult?.count || 0;
// Count clusters
try {
const clusterResult = db.prepare('SELECT COUNT(*) as count FROM session_clusters').get() as { count: number };
clustersCount = clusterResult?.count || 0;
} catch {
// Table might not exist
}
// Get last update time
const lastMemory = db.prepare('SELECT MAX(updated_at) as last FROM memories').get() as { last: string };
lastUpdated = lastMemory?.last;
db.close();
} catch {
// Database might be locked or corrupted
}
}
// Convert project ID back to approximate path
const approximatePath = projectId
.replace(/^([a-z])--/, '$1:/') // d-- -> d:/
.replace(/--/g, '/')
.replace(/-/g, ' ');
projects.push({
id: projectId,
path: approximatePath,
memoriesCount,
clustersCount,
lastUpdated
});
}
// Sort by last updated (most recent first)
return projects.sort((a, b) => {
if (!a.lastUpdated) return 1;
if (!b.lastUpdated) return -1;
return b.lastUpdated.localeCompare(a.lastUpdated);
});
}
/**
* Get memories from another project by ID
*/
export function getMemoriesFromProject(projectId: string): CoreMemory[] {
const projectsDir = join(getCCWHome(), 'projects');
const coreMemoryDb = join(projectsDir, projectId, 'core-memory', 'core_memory.db');
if (!existsSync(coreMemoryDb)) {
throw new Error(`Project not found: ${projectId}`);
}
const db = new Database(coreMemoryDb, { readonly: true });
const stmt = db.prepare('SELECT * FROM memories ORDER BY updated_at DESC');
const rows = stmt.all() as any[];
db.close();
return rows.map(row => ({
id: row.id,
content: row.content,
summary: row.summary || '',
raw_output: row.raw_output,
created_at: row.created_at,
updated_at: row.updated_at,
archived: Boolean(row.archived),
metadata: row.metadata
}));
}
/**
* Export memories to a JSON file
*/
export function exportMemories(
projectPath: string,
outputPath: string,
options?: { ids?: string[]; includeArchived?: boolean }
): number {
const store = getCoreMemoryStore(projectPath);
let memories = store.getMemories({ archived: options?.includeArchived || false, limit: 10000 });
// Filter by IDs if specified
if (options?.ids && options.ids.length > 0) {
const idSet = new Set(options.ids);
memories = memories.filter(m => idSet.has(m.id));
}
const exportData: ExportedMemory = {
version: '1.0',
exportedAt: new Date().toISOString(),
sourceProject: projectPath,
memories
};
writeFileSync(outputPath, JSON.stringify(exportData, null, 2), 'utf-8');
return memories.length;
}
/**
* Import memories from a JSON file or another project
*/
export function importMemories(
targetProjectPath: string,
source: string, // File path or project ID
options?: { overwrite?: boolean; prefix?: string }
): { imported: number; skipped: number } {
const store = getCoreMemoryStore(targetProjectPath);
let memories: CoreMemory[];
// Check if source is a file or project ID
if (existsSync(source) && source.endsWith('.json')) {
// Import from file
const content = readFileSync(source, 'utf-8');
const data = JSON.parse(content) as ExportedMemory;
memories = data.memories;
} else {
// Import from project ID
memories = getMemoriesFromProject(source);
}
let imported = 0;
let skipped = 0;
for (const memory of memories) {
// Generate new ID with optional prefix
let newId = memory.id;
if (options?.prefix) {
newId = `${options.prefix}-${memory.id}`;
}
// Check if already exists
const existing = store.getMemory(newId);
if (existing && !options?.overwrite) {
skipped++;
continue;
}
// Import memory
store.upsertMemory({
id: newId,
content: memory.content,
summary: memory.summary,
raw_output: memory.raw_output,
metadata: memory.metadata
});
imported++;
}
return { imported, skipped };
}
/**
* Close all store instances
*/

View File

@@ -488,6 +488,113 @@ export class SessionClusteringService {
};
}
/**
* Deduplicate clusters by merging similar ones
* Clusters with same name or >50% member overlap are merged
* @returns Statistics about deduplication
*/
async deduplicateClusters(): Promise<{ merged: number; deleted: number; remaining: number }> {
const clusters = this.coreMemoryStore.listClusters('active');
console.log(`[Dedup] Analyzing ${clusters.length} active clusters`);
if (clusters.length < 2) {
return { merged: 0, deleted: 0, remaining: clusters.length };
}
// Group clusters by name (case-insensitive)
const byName = new Map<string, typeof clusters>();
for (const cluster of clusters) {
const key = cluster.name.toLowerCase().trim();
if (!byName.has(key)) {
byName.set(key, []);
}
byName.get(key)!.push(cluster);
}
let merged = 0;
let deleted = 0;
// Merge clusters with same name
for (const [name, group] of byName) {
if (group.length < 2) continue;
// Sort by created_at (oldest first) to keep the original
group.sort((a, b) => a.created_at.localeCompare(b.created_at));
const target = group[0];
const sources = group.slice(1).map(c => c.id);
console.log(`[Dedup] Merging ${sources.length} duplicate clusters named '${name}' into ${target.id}`);
try {
const membersMoved = this.coreMemoryStore.mergeClusters(target.id, sources);
merged += sources.length;
console.log(`[Dedup] Moved ${membersMoved} members, deleted ${sources.length} clusters`);
} catch (error) {
console.warn(`[Dedup] Failed to merge: ${(error as Error).message}`);
}
}
// Check for clusters with high member overlap
const remainingClusters = this.coreMemoryStore.listClusters('active');
const clusterMembers = new Map<string, Set<string>>();
for (const cluster of remainingClusters) {
const members = this.coreMemoryStore.getClusterMembers(cluster.id);
clusterMembers.set(cluster.id, new Set(members.map(m => m.session_id)));
}
// Find and merge overlapping clusters
const processed = new Set<string>();
for (let i = 0; i < remainingClusters.length; i++) {
const clusterA = remainingClusters[i];
if (processed.has(clusterA.id)) continue;
const membersA = clusterMembers.get(clusterA.id)!;
const toMerge: string[] = [];
for (let j = i + 1; j < remainingClusters.length; j++) {
const clusterB = remainingClusters[j];
if (processed.has(clusterB.id)) continue;
const membersB = clusterMembers.get(clusterB.id)!;
const intersection = new Set([...membersA].filter(m => membersB.has(m)));
// Calculate overlap ratio (based on smaller cluster)
const minSize = Math.min(membersA.size, membersB.size);
if (minSize > 0 && intersection.size / minSize >= 0.5) {
toMerge.push(clusterB.id);
processed.add(clusterB.id);
}
}
if (toMerge.length > 0) {
console.log(`[Dedup] Merging ${toMerge.length} overlapping clusters into ${clusterA.id}`);
try {
this.coreMemoryStore.mergeClusters(clusterA.id, toMerge);
merged += toMerge.length;
} catch (error) {
console.warn(`[Dedup] Failed to merge overlapping: ${(error as Error).message}`);
}
}
}
// Delete empty clusters
const finalClusters = this.coreMemoryStore.listClusters('active');
for (const cluster of finalClusters) {
const members = this.coreMemoryStore.getClusterMembers(cluster.id);
if (members.length === 0) {
this.coreMemoryStore.deleteCluster(cluster.id);
deleted++;
console.log(`[Dedup] Deleted empty cluster: ${cluster.id}`);
}
}
const remaining = this.coreMemoryStore.listClusters('active').length;
console.log(`[Dedup] Complete: ${merged} merged, ${deleted} deleted, ${remaining} remaining`);
return { merged, deleted, remaining };
}
/**
* Agglomerative clustering algorithm
* Returns array of clusters (each cluster is array of sessions)