feat: Add cluster management commands for deletion, merging, and deduplication

This commit is contained in:
catlog22
2025-12-20 12:23:08 +08:00
parent ab06ed0083
commit ea284d739a
4 changed files with 625 additions and 22 deletions

View File

@@ -522,6 +522,60 @@ ${memory.content}
return result.changes > 0;
}
/**
* Merge multiple clusters into one
* Keeps the first cluster and moves all members from others into it
* @param targetClusterId The cluster to keep
* @param sourceClusterIds The clusters to merge into target (will be deleted)
* @returns Number of members moved
*/
mergeClusters(targetClusterId: string, sourceClusterIds: string[]): number {
const targetCluster = this.getCluster(targetClusterId);
if (!targetCluster) {
throw new Error(`Target cluster not found: ${targetClusterId}`);
}
let membersMoved = 0;
const existingMembers = new Set(
this.getClusterMembers(targetClusterId).map(m => m.session_id)
);
for (const sourceId of sourceClusterIds) {
if (sourceId === targetClusterId) continue;
const sourceMembers = this.getClusterMembers(sourceId);
const maxOrder = this.getClusterMembers(targetClusterId).length;
for (const member of sourceMembers) {
// Skip if already exists in target
if (existingMembers.has(member.session_id)) continue;
// Move member to target cluster
this.addClusterMember({
cluster_id: targetClusterId,
session_id: member.session_id,
session_type: member.session_type,
sequence_order: maxOrder + membersMoved + 1,
relevance_score: member.relevance_score
});
existingMembers.add(member.session_id);
membersMoved++;
}
// Delete source cluster
this.deleteCluster(sourceId);
}
// Update target cluster description
const finalMembers = this.getClusterMembers(targetClusterId);
this.updateCluster(targetClusterId, {
description: `Merged cluster with ${finalMembers.length} sessions`
});
return membersMoved;
}
/**
* Add member to cluster
*/
@@ -784,6 +838,219 @@ export function getCoreMemoryStore(projectPath: string): CoreMemoryStore {
return storeCache.get(normalizedPath)!;
}
// ============================================================================
// Cross-workspace management functions
// ============================================================================
import { readdirSync, writeFileSync, readFileSync } from 'fs';
import { homedir } from 'os';
export interface ProjectInfo {
id: string;
path: string;
memoriesCount: number;
clustersCount: number;
lastUpdated?: string;
}
export interface ExportedMemory {
version: string;
exportedAt: string;
sourceProject: string;
memories: CoreMemory[];
}
/**
* Get CCW home directory
*/
function getCCWHome(): string {
return process.env.CCW_DATA_DIR || join(homedir(), '.ccw');
}
/**
* List all projects with their memory counts
*/
export function listAllProjects(): ProjectInfo[] {
const projectsDir = join(getCCWHome(), 'projects');
if (!existsSync(projectsDir)) {
return [];
}
const projects: ProjectInfo[] = [];
const entries = readdirSync(projectsDir, { withFileTypes: true });
for (const entry of entries) {
if (!entry.isDirectory()) continue;
const projectId = entry.name;
const coreMemoryDb = join(projectsDir, projectId, 'core-memory', 'core_memory.db');
let memoriesCount = 0;
let clustersCount = 0;
let lastUpdated: string | undefined;
if (existsSync(coreMemoryDb)) {
try {
const db = new Database(coreMemoryDb, { readonly: true });
// Count memories
const memResult = db.prepare('SELECT COUNT(*) as count FROM memories').get() as { count: number };
memoriesCount = memResult?.count || 0;
// Count clusters
try {
const clusterResult = db.prepare('SELECT COUNT(*) as count FROM session_clusters').get() as { count: number };
clustersCount = clusterResult?.count || 0;
} catch {
// Table might not exist
}
// Get last update time
const lastMemory = db.prepare('SELECT MAX(updated_at) as last FROM memories').get() as { last: string };
lastUpdated = lastMemory?.last;
db.close();
} catch {
// Database might be locked or corrupted
}
}
// Convert project ID back to approximate path
const approximatePath = projectId
.replace(/^([a-z])--/, '$1:/') // d-- -> d:/
.replace(/--/g, '/')
.replace(/-/g, ' ');
projects.push({
id: projectId,
path: approximatePath,
memoriesCount,
clustersCount,
lastUpdated
});
}
// Sort by last updated (most recent first)
return projects.sort((a, b) => {
if (!a.lastUpdated) return 1;
if (!b.lastUpdated) return -1;
return b.lastUpdated.localeCompare(a.lastUpdated);
});
}
/**
* Get memories from another project by ID
*/
export function getMemoriesFromProject(projectId: string): CoreMemory[] {
const projectsDir = join(getCCWHome(), 'projects');
const coreMemoryDb = join(projectsDir, projectId, 'core-memory', 'core_memory.db');
if (!existsSync(coreMemoryDb)) {
throw new Error(`Project not found: ${projectId}`);
}
const db = new Database(coreMemoryDb, { readonly: true });
const stmt = db.prepare('SELECT * FROM memories ORDER BY updated_at DESC');
const rows = stmt.all() as any[];
db.close();
return rows.map(row => ({
id: row.id,
content: row.content,
summary: row.summary || '',
raw_output: row.raw_output,
created_at: row.created_at,
updated_at: row.updated_at,
archived: Boolean(row.archived),
metadata: row.metadata
}));
}
/**
* Export memories to a JSON file
*/
export function exportMemories(
projectPath: string,
outputPath: string,
options?: { ids?: string[]; includeArchived?: boolean }
): number {
const store = getCoreMemoryStore(projectPath);
let memories = store.getMemories({ archived: options?.includeArchived || false, limit: 10000 });
// Filter by IDs if specified
if (options?.ids && options.ids.length > 0) {
const idSet = new Set(options.ids);
memories = memories.filter(m => idSet.has(m.id));
}
const exportData: ExportedMemory = {
version: '1.0',
exportedAt: new Date().toISOString(),
sourceProject: projectPath,
memories
};
writeFileSync(outputPath, JSON.stringify(exportData, null, 2), 'utf-8');
return memories.length;
}
/**
* Import memories from a JSON file or another project
*/
export function importMemories(
targetProjectPath: string,
source: string, // File path or project ID
options?: { overwrite?: boolean; prefix?: string }
): { imported: number; skipped: number } {
const store = getCoreMemoryStore(targetProjectPath);
let memories: CoreMemory[];
// Check if source is a file or project ID
if (existsSync(source) && source.endsWith('.json')) {
// Import from file
const content = readFileSync(source, 'utf-8');
const data = JSON.parse(content) as ExportedMemory;
memories = data.memories;
} else {
// Import from project ID
memories = getMemoriesFromProject(source);
}
let imported = 0;
let skipped = 0;
for (const memory of memories) {
// Generate new ID with optional prefix
let newId = memory.id;
if (options?.prefix) {
newId = `${options.prefix}-${memory.id}`;
}
// Check if already exists
const existing = store.getMemory(newId);
if (existing && !options?.overwrite) {
skipped++;
continue;
}
// Import memory
store.upsertMemory({
id: newId,
content: memory.content,
summary: memory.summary,
raw_output: memory.raw_output,
metadata: memory.metadata
});
imported++;
}
return { imported, skipped };
}
/**
* Close all store instances
*/

View File

@@ -488,6 +488,113 @@ export class SessionClusteringService {
};
}
/**
* Deduplicate clusters by merging similar ones
* Clusters with same name or >50% member overlap are merged
* @returns Statistics about deduplication
*/
async deduplicateClusters(): Promise<{ merged: number; deleted: number; remaining: number }> {
const clusters = this.coreMemoryStore.listClusters('active');
console.log(`[Dedup] Analyzing ${clusters.length} active clusters`);
if (clusters.length < 2) {
return { merged: 0, deleted: 0, remaining: clusters.length };
}
// Group clusters by name (case-insensitive)
const byName = new Map<string, typeof clusters>();
for (const cluster of clusters) {
const key = cluster.name.toLowerCase().trim();
if (!byName.has(key)) {
byName.set(key, []);
}
byName.get(key)!.push(cluster);
}
let merged = 0;
let deleted = 0;
// Merge clusters with same name
for (const [name, group] of byName) {
if (group.length < 2) continue;
// Sort by created_at (oldest first) to keep the original
group.sort((a, b) => a.created_at.localeCompare(b.created_at));
const target = group[0];
const sources = group.slice(1).map(c => c.id);
console.log(`[Dedup] Merging ${sources.length} duplicate clusters named '${name}' into ${target.id}`);
try {
const membersMoved = this.coreMemoryStore.mergeClusters(target.id, sources);
merged += sources.length;
console.log(`[Dedup] Moved ${membersMoved} members, deleted ${sources.length} clusters`);
} catch (error) {
console.warn(`[Dedup] Failed to merge: ${(error as Error).message}`);
}
}
// Check for clusters with high member overlap
const remainingClusters = this.coreMemoryStore.listClusters('active');
const clusterMembers = new Map<string, Set<string>>();
for (const cluster of remainingClusters) {
const members = this.coreMemoryStore.getClusterMembers(cluster.id);
clusterMembers.set(cluster.id, new Set(members.map(m => m.session_id)));
}
// Find and merge overlapping clusters
const processed = new Set<string>();
for (let i = 0; i < remainingClusters.length; i++) {
const clusterA = remainingClusters[i];
if (processed.has(clusterA.id)) continue;
const membersA = clusterMembers.get(clusterA.id)!;
const toMerge: string[] = [];
for (let j = i + 1; j < remainingClusters.length; j++) {
const clusterB = remainingClusters[j];
if (processed.has(clusterB.id)) continue;
const membersB = clusterMembers.get(clusterB.id)!;
const intersection = new Set([...membersA].filter(m => membersB.has(m)));
// Calculate overlap ratio (based on smaller cluster)
const minSize = Math.min(membersA.size, membersB.size);
if (minSize > 0 && intersection.size / minSize >= 0.5) {
toMerge.push(clusterB.id);
processed.add(clusterB.id);
}
}
if (toMerge.length > 0) {
console.log(`[Dedup] Merging ${toMerge.length} overlapping clusters into ${clusterA.id}`);
try {
this.coreMemoryStore.mergeClusters(clusterA.id, toMerge);
merged += toMerge.length;
} catch (error) {
console.warn(`[Dedup] Failed to merge overlapping: ${(error as Error).message}`);
}
}
}
// Delete empty clusters
const finalClusters = this.coreMemoryStore.listClusters('active');
for (const cluster of finalClusters) {
const members = this.coreMemoryStore.getClusterMembers(cluster.id);
if (members.length === 0) {
this.coreMemoryStore.deleteCluster(cluster.id);
deleted++;
console.log(`[Dedup] Deleted empty cluster: ${cluster.id}`);
}
}
const remaining = this.coreMemoryStore.listClusters('active').length;
console.log(`[Dedup] Complete: ${merged} merged, ${deleted} deleted, ${remaining} remaining`);
return { merged, deleted, remaining };
}
/**
* Agglomerative clustering algorithm
* Returns array of clusters (each cluster is array of sessions)