feat: Implement executor assignment and clustering optimizations for session management

2026-02-05 01:50:27 +08:00 · 2025-12-20 11:29:16 +08:00
parent e1cac5dd50
commit 4de4db3c69
3 changed files with 194 additions and 33 deletions
--- a/.claude/commands/workflow/lite-execute.md
+++ b/.claude/commands/workflow/lite-execute.md
@@ -258,6 +258,33 @@ TodoWrite({

 ### Step 3: Launch Execution

+**Executor Resolution** (任务级 executor 优先于全局设置):
+```javascript
+// 获取任务的 executor（优先使用 executorAssignments，fallback 到全局 executionMethod）
+function getTaskExecutor(task) {
+  const assignments = executionContext?.executorAssignments || {}
+  if (assignments[task.id]) {
+    return assignments[task.id].executor  // 'gemini' | 'codex' | 'agent'
+  }
+  // Fallback: 全局 executionMethod 映射
+  const method = executionContext?.executionMethod || 'Auto'
+  if (method === 'Agent') return 'agent'
+  if (method === 'Codex') return 'codex'
+  // Auto: 根据复杂度
+  return planObject.complexity === 'Low' ? 'agent' : 'codex'
+}
+
+// 按 executor 分组任务
+function groupTasksByExecutor(tasks) {
+  const groups = { gemini: [], codex: [], agent: [] }
+  tasks.forEach(task => {
+    const executor = getTaskExecutor(task)
+    groups[executor].push(task)
+  })
+  return groups
+}
+```
+
 **Execution Flow**: Parallel batches concurrently → Sequential batches in order
 ```javascript
 const parallel = executionCalls.filter(c => c.executionType === "parallel")
@@ -283,8 +310,9 @@ for (const call of sequential) {
 **Option A: Agent Execution**

 When to use:
- `executionMethod = "Agent"`
- `executionMethod = "Auto" AND complexity = "Low"`
+- `getTaskExecutor(task) === "agent"`
+- 或 `executionMethod = "Agent"` (全局 fallback)
+- 或 `executionMethod = "Auto" AND complexity = "Low"` (全局 fallback)

 **Task Formatting Principle**: Each task is a self-contained checklist. The agent only needs to know what THIS task requires, not its position or relation to other tasks.

@@ -400,8 +428,9 @@ function extractRelatedFiles(tasks) {
 **Option B: CLI Execution (Codex)**

 When to use:
- `executionMethod = "Codex"`
- `executionMethod = "Auto" AND complexity = "Medium" or "High"`
+- `getTaskExecutor(task) === "codex"`
+- 或 `executionMethod = "Codex"` (全局 fallback)
+- 或 `executionMethod = "Auto" AND complexity = "Medium/High"` (全局 fallback)

 **Task Formatting Principle**: Same as Agent - each task is a self-contained checklist. No task numbering or position awareness.

@@ -530,6 +559,15 @@ if (bash_result.status === 'failed' || bash_result.status === 'timeout') {

 **Result Collection**: After completion, analyze output and collect result following `executionResult` structure (include `cliExecutionId` for resume capability)

+**Option C: CLI Execution (Gemini)**
+
+When to use: `getTaskExecutor(task) === "gemini"` (分析类任务)
+
+```bash
+# 使用与 Option B 相同的 formatBatchPrompt，切换 tool 和 mode
+ccw cli -p "${formatBatchPrompt(batch)}" --tool gemini --mode analysis --id ${sessionId}-${batch.groupId}
+```
+
 ### Step 4: Progress Tracking

 Progress tracked at batch level (not individual task level). Icons: ⚡ (parallel, concurrent), → (sequential, one-by-one)
@@ -697,10 +735,15 @@ Passed from lite-plan via global variable:
  explorationAngles: string[],             // List of exploration angles
  explorationManifest: {...} | null,       // Exploration manifest
  clarificationContext: {...} | null,
-  executionMethod: "Agent" | "Codex" | "Auto",
+  executionMethod: "Agent" | "Codex" | "Auto",  // 全局默认
  codeReviewTool: "Skip" | "Gemini Review" | "Agent Review" | string,
  originalUserInput: string,

+  // 任务级 executor 分配（优先于 executionMethod）
+  executorAssignments: {
+    [taskId]: { executor: "gemini" | "codex" | "agent", reason: string }
+  },
+
  // Session artifacts location (saved by lite-plan)
  session: {
    id: string,                        // Session identifier: {taskSlug}-{shortTimestamp}
--- a/.claude/commands/workflow/lite-plan.md
+++ b/.claude/commands/workflow/lite-plan.md
@@ -353,6 +353,23 @@ if (dedupedClarifications.length > 0) {

 **IMPORTANT**: Phase 3 is **planning only** - NO code execution. All execution happens in Phase 5 via lite-execute.

+**Executor Assignment** (Claude 智能分配，plan 生成后执行):
+
+```javascript
+// 分配规则（优先级从高到低）：
+// 1. 用户明确指定："用 gemini 分析..." → gemini, "codex 实现..." → codex
+// 2. 任务类型推断：
+//    - 分析|审查|评估|探索 → gemini
+//    - 实现|创建|修改|修复 → codex (复杂) 或 agent (简单)
+// 3. 默认 → agent
+
+const executorAssignments = {}  // { taskId: { executor: 'gemini'|'codex'|'agent', reason: string } }
+plan.tasks.forEach(task => {
+  // Claude 根据上述规则语义分析，为每个 task 分配 executor
+  executorAssignments[task.id] = { executor: '...', reason: '...' }
+})
+```
+
 **Low Complexity** - Direct planning by Claude:
 ```javascript
 // Step 1: Read schema
@@ -532,9 +549,13 @@ executionContext = {
  explorationAngles: manifest.explorations.map(e => e.angle),
  explorationManifest: manifest,
  clarificationContext: clarificationContext || null,
-  executionMethod: userSelection.execution_method,
+  executionMethod: userSelection.execution_method,  // 全局默认，可被 executorAssignments 覆盖
  codeReviewTool: userSelection.code_review_tool,
  originalUserInput: task_description,
+
+  // 任务级 executor 分配（优先于全局 executionMethod）
+  executorAssignments: executorAssignments,  // { taskId: { executor, reason } }
+
  session: {
    id: sessionId,
    folder: sessionFolder,
--- a/ccw/src/core/session-clustering-service.ts
+++ b/ccw/src/core/session-clustering-service.ts
@@ -301,13 +301,65 @@ export class SessionClusteringService {
    return intersection.size / union.size;
  }

+  /**
+   * Find the most relevant existing cluster for a set of session IDs
+   * Returns the cluster with highest session overlap
+   */
+  private findExistingClusterForSessions(sessionIds: string[]): SessionCluster | null {
+    if (sessionIds.length === 0) return null;
+
+    const clusterCounts = new Map<string, number>();
+    let maxCount = 0;
+    let bestClusterId: string | null = null;
+
+    for (const sessionId of sessionIds) {
+      const clusters = this.coreMemoryStore.getSessionClusters(sessionId);
+      for (const cluster of clusters) {
+        if (cluster.status !== 'active') continue;
+
+        const count = (clusterCounts.get(cluster.id) || 0) + 1;
+        clusterCounts.set(cluster.id, count);
+
+        if (count > maxCount) {
+          maxCount = count;
+          bestClusterId = cluster.id;
+        }
+      }
+    }
+
+    if (bestClusterId) {
+      return this.coreMemoryStore.getCluster(bestClusterId);
+    }
+    return null;
+  }
+
+  /**
+   * Determine if a new cluster should merge with an existing one
+   * Based on 70% session overlap threshold
+   */
+  private shouldMergeWithExisting(newClusterSessions: SessionMetadataCache[], existingCluster: SessionCluster): boolean {
+    const MERGE_THRESHOLD = 0.7;
+
+    const existingMembers = this.coreMemoryStore.getClusterMembers(existingCluster.id);
+    const newSessionIds = new Set(newClusterSessions.map(s => s.session_id));
+    const existingSessionIds = new Set(existingMembers.map(m => m.session_id));
+
+    if (newSessionIds.size === 0) return false;
+
+    const intersection = new Set([...newSessionIds].filter(id => existingSessionIds.has(id)));
+    const overlapRatio = intersection.size / newSessionIds.size;
+
+    return overlapRatio > MERGE_THRESHOLD;
+  }
+
  /**
   * Run auto-clustering algorithm
+   * Optimized to prevent duplicate clusters by checking existing clusters first
   */
  async autocluster(options?: ClusteringOptions): Promise<ClusteringResult> {
-    // 1. Collect sessions
-    const sessions = await this.collectSessions(options);
-    console.log(`[Clustering] Collected ${sessions.length} sessions`);
+    // 1. Collect only unclustered sessions to prevent re-clustering
+    const sessions = await this.collectSessions({ ...options, scope: 'unclustered' });
+    console.log(`[Clustering] Collected ${sessions.length} unclustered sessions`);

    // 2. Update metadata cache
    for (const session of sessions) {
@@ -327,43 +379,88 @@ export class SessionClusteringService {
    }

    // 4. Agglomerative clustering
-    const clusters = this.agglomerativeClustering(sessions, relevanceMatrix, CLUSTER_THRESHOLD);
-    console.log(`[Clustering] Generated ${clusters.length} clusters`);
+    const minClusterSize = options?.minClusterSize || 2;

-    // 5. Create session_clusters
+    // Early return if not enough sessions
+    if (sessions.length < minClusterSize) {
+      console.log('[Clustering] Not enough unclustered sessions to form new clusters');
+      return { clustersCreated: 0, sessionsProcessed: sessions.length, sessionsClustered: 0 };
+    }
+
+    const newPotentialClusters = this.agglomerativeClustering(sessions, relevanceMatrix, CLUSTER_THRESHOLD);
+    console.log(`[Clustering] Generated ${newPotentialClusters.length} potential clusters`);
+
+    // 5. Process clusters: create new or merge with existing
    let clustersCreated = 0;
+    let clustersMerged = 0;
    let sessionsClustered = 0;

-    for (const cluster of clusters) {
-      if (cluster.length < (options?.minClusterSize || 2)) {
+    for (const clusterSessions of newPotentialClusters) {
+      if (clusterSessions.length < minClusterSize) {
        continue; // Skip small clusters
      }

-      const clusterName = this.generateClusterName(cluster);
-      const clusterIntent = this.generateClusterIntent(cluster);
+      const sessionIds = clusterSessions.map(s => s.session_id);
+      const existingCluster = this.findExistingClusterForSessions(sessionIds);

-      const clusterRecord = this.coreMemoryStore.createCluster({
-        name: clusterName,
-        description: `Auto-generated cluster with ${cluster.length} sessions`,
-        intent: clusterIntent,
-        status: 'active'
-      });
+      // Check if we should merge with an existing cluster
+      if (existingCluster && this.shouldMergeWithExisting(clusterSessions, existingCluster)) {
+        const existingMembers = this.coreMemoryStore.getClusterMembers(existingCluster.id);
+        const existingSessionIds = new Set(existingMembers.map(m => m.session_id));

-      // Add members
-      cluster.forEach((session, index) => {
-        this.coreMemoryStore.addClusterMember({
-          cluster_id: clusterRecord.id,
-          session_id: session.session_id,
-          session_type: session.session_type as 'core_memory' | 'workflow' | 'cli_history' | 'native',
-          sequence_order: index + 1,
-          relevance_score: 1.0 // TODO: Calculate based on centrality
+        // Only add sessions not already in the cluster
+        const newSessions = clusterSessions.filter(s => !existingSessionIds.has(s.session_id));
+
+        if (newSessions.length > 0) {
+          newSessions.forEach((session, index) => {
+            this.coreMemoryStore.addClusterMember({
+              cluster_id: existingCluster.id,
+              session_id: session.session_id,
+              session_type: session.session_type as 'core_memory' | 'workflow' | 'cli_history' | 'native',
+              sequence_order: existingMembers.length + index + 1,
+              relevance_score: 1.0
+            });
+          });
+
+          // Update cluster description
+          this.coreMemoryStore.updateCluster(existingCluster.id, {
+            description: `Auto-generated cluster with ${existingMembers.length + newSessions.length} sessions`
+          });
+
+          clustersMerged++;
+          sessionsClustered += newSessions.length;
+          console.log(`[Clustering] Merged ${newSessions.length} sessions into existing cluster '${existingCluster.name}'`);
+        }
+      } else {
+        // Create new cluster
+        const clusterName = this.generateClusterName(clusterSessions);
+        const clusterIntent = this.generateClusterIntent(clusterSessions);
+
+        const clusterRecord = this.coreMemoryStore.createCluster({
+          name: clusterName,
+          description: `Auto-generated cluster with ${clusterSessions.length} sessions`,
+          intent: clusterIntent,
+          status: 'active'
        });
-      });

-      clustersCreated++;
-      sessionsClustered += cluster.length;
+        // Add members
+        clusterSessions.forEach((session, index) => {
+          this.coreMemoryStore.addClusterMember({
+            cluster_id: clusterRecord.id,
+            session_id: session.session_id,
+            session_type: session.session_type as 'core_memory' | 'workflow' | 'cli_history' | 'native',
+            sequence_order: index + 1,
+            relevance_score: 1.0
+          });
+        });
+
+        clustersCreated++;
+        sessionsClustered += clusterSessions.length;
+      }
    }

+    console.log(`[Clustering] Summary: ${clustersCreated} created, ${clustersMerged} merged`);
+
    return {
      clustersCreated,
      sessionsProcessed: sessions.length,