From 24dad8cefd6913da7e79402f0b32644c068036f5 Mon Sep 17 00:00:00 2001 From: catlog22 Date: Wed, 28 Jan 2026 21:08:49 +0800 Subject: [PATCH] Refactor orchestrator logic and enhance problem taxonomy - Updated orchestrator decision logic to improve state management and action selection. - Introduced structured termination checks and action selection criteria. - Enhanced state update mechanism with sliding window for action history and error tracking. - Revised problem taxonomy for skill execution issues, consolidating categories and refining detection patterns. - Improved severity calculation method for issue prioritization. - Streamlined fix mapping strategies for better clarity and usability. --- .../phases/02-structure-generation.md | 76 ++- .../phases/03-phase-generation.md | 249 +++++++-- .claude/skills/skill-tuning/SKILL.md | 473 +++++------------- .../skill-tuning/phases/orchestrator.md | 376 ++++---------- .../skill-tuning/specs/problem-taxonomy.md | 284 ++++------- 5 files changed, 598 insertions(+), 860 deletions(-) diff --git a/.claude/skills/skill-generator/phases/02-structure-generation.md b/.claude/skills/skill-generator/phases/02-structure-generation.md index 0fdcad2a..65d6007b 100644 --- a/.claude/skills/skill-generator/phases/02-structure-generation.md +++ b/.claude/skills/skill-generator/phases/02-structure-generation.md @@ -21,19 +21,79 @@ const skillDir = `.claude/skills/${config.skill_name}`; ### Step 2: 创建目录结构 -```javascript -// 基础目录 -Bash(`mkdir -p "${skillDir}/phases"`); -Bash(`mkdir -p "${skillDir}/specs"`); -Bash(`mkdir -p "${skillDir}/templates"`); +#### 基础目录(所有模式) -// Autonomous 模式额外目录 +```javascript +// 基础架构 +Bash(`mkdir -p "${skillDir}/{phases,specs,templates,scripts}"`); +``` + +#### 执行模式特定目录 + +``` +config.execution_mode + ↓ + ├─ "sequential" + │ ↓ Creates: + │ └─ phases/ (基础目录已包含) + │ ├─ _orchestrator.md + │ └─ workflow.json + │ + └─ "autonomous" | "hybrid" + ↓ Creates: + └─ phases/actions/ + ├─ state-schema.md + └─ *.md (动作文件) +``` + +```javascript +// Autonomous/Hybrid 模式额外目录 if (config.execution_mode === 'autonomous' || config.execution_mode === 'hybrid') { Bash(`mkdir -p "${skillDir}/phases/actions"`); } +``` -// scripts 目录(默认创建,用于存放确定性脚本) -Bash(`mkdir -p "${skillDir}/scripts"`); +#### Context Strategy 特定目录 (P0 增强) + +```javascript +// ========== P0: 根据上下文策略创建目录 ========== +const contextStrategy = config.context_strategy || 'file'; + +if (contextStrategy === 'file') { + // 文件策略:创建上下文持久化目录 + Bash(`mkdir -p "${skillDir}/.scratchpad-template/context"`); + + // 创建上下文模板文件 + Write( + `${skillDir}/.scratchpad-template/context/.gitkeep`, + "# Runtime context storage for file-based strategy" + ); +} +// 内存策略无需创建目录 (in-memory only) +``` + +**目录树视图**: + +``` +Sequential + File Strategy: + .claude/skills/{skill-name}/ + ├── phases/ + │ ├── _orchestrator.md + │ ├── workflow.json + │ ├── 01-*.md + │ └── 02-*.md + ├── .scratchpad-template/ + │ └── context/ ← File strategy persistent storage + └── specs/ + +Autonomous + Memory Strategy: + .claude/skills/{skill-name}/ + ├── phases/ + │ ├── orchestrator.md + │ ├── state-schema.md + │ └── actions/ + │ └── *.md + └── specs/ ``` ### Step 3: 生成 SKILL.md diff --git a/.claude/skills/skill-generator/phases/03-phase-generation.md b/.claude/skills/skill-generator/phases/03-phase-generation.md index 11570b5e..e8f2c5ca 100644 --- a/.claude/skills/skill-generator/phases/03-phase-generation.md +++ b/.claude/skills/skill-generator/phases/03-phase-generation.md @@ -52,66 +52,93 @@ const skillRoot = '.claude/skills/skill-generator'; ```javascript if (config.execution_mode === 'sequential') { const phases = config.sequential_config.phases; - + // ========== P0 增强: 生成声明式编排器 ========== const workflowOrchestrator = generateSequentialOrchestrator(config, phases); Write(`${skillDir}/phases/_orchestrator.md`, workflowOrchestrator); - + // ========== P0 增强: 生成工作流定义 ========== const workflowDef = generateWorkflowDefinition(config, phases); Write(`${skillDir}/workflow.json`, JSON.stringify(workflowDef, null, 2)); - - // 生成各阶段文件 + + // ========== P0 增强: 生成 Phase 0 (强制规范研读) ========== + const phase0Content = generatePhase0Spec(config); + Write(`${skillDir}/phases/00-spec-study.md`, phase0Content); + + // ========== 生成用户定义的各阶段文件 ========== for (let i = 0; i < phases.length; i++) { const phase = phases[i]; const prevPhase = i > 0 ? phases[i-1] : null; const nextPhase = i < phases.length - 1 ? phases[i+1] : null; - + const content = generateSequentialPhase({ phaseNumber: i + 1, phaseId: phase.id, phaseName: phase.name, phaseDescription: phase.description || `Execute ${phase.name}`, - input: prevPhase ? prevPhase.output : "user input", + input: prevPhase ? prevPhase.output : "phase 0 output", // Phase 0 为首个输入源 output: phase.output, nextPhase: nextPhase ? nextPhase.id : null, config: config, contextStrategy: contextStrategy }); - + Write(`${skillDir}/phases/${phase.id}.md`, content); } } // ========== P0 增强: 声明式工作流定义 ========== function generateWorkflowDefinition(config, phases) { + // ========== P0: 添加强制 Phase 0 ========== + const phase0 = { + id: '00-spec-study', + name: 'Specification Study', + order: 0, + input: null, + output: 'spec-study-complete.flag', + description: '⚠️ MANDATORY: Read all specification documents before execution', + parallel: false, + condition: null, + agent: { + type: 'universal-executor', + run_in_background: false + } + }; + return { skill_name: config.skill_name, version: "1.0.0", execution_mode: "sequential", context_strategy: config.context_strategy || "file", - - // 声明式阶段列表 (类似 software-manual 的 agents_to_run) - phases_to_run: phases.map(p => p.id), - - // 阶段配置 - phases: phases.map((p, i) => ({ - id: p.id, - name: p.name, - order: i + 1, - input: i > 0 ? phases[i-1].output : null, - output: p.output, - // 可选的并行配置 - parallel: p.parallel || false, - // 可选的条件执行 - condition: p.condition || null, - // Agent 配置 - agent: p.agent || { - type: "universal-executor", - run_in_background: false - } - })), - + + // ========== P0: Phase 0 置于首位 ========== + phases_to_run: ['00-spec-study', ...phases.map(p => p.id)], + + // ========== P0: Phase 0 + 用户定义阶段 ========== + phases: [ + phase0, + ...phases.map((p, i) => ({ + id: p.id, + name: p.name, + order: i + 1, + input: i === 0 ? phase0.output : phases[i-1].output, // 第一个阶段依赖 Phase 0 + output: p.output, + parallel: p.parallel || false, + condition: p.condition || null, + // Agent 配置 (支持 LLM 集成) + agent: p.agent || (config.llm_integration?.enabled ? { + type: "llm", + tool: config.llm_integration.default_tool, + mode: config.llm_integration.mode || "analysis", + fallback_chain: config.llm_integration.fallback_chain || [], + run_in_background: false + } : { + type: "universal-executor", + run_in_background: false + }) + })) + ], + // 终止条件 termination: { on_success: "all_phases_completed", @@ -233,10 +260,30 @@ async function executePhase(phaseId, phaseConfig, workDir) { ## 阶段执行计划 +**执行流程**: + +\`\`\` +START + ↓ +Phase 0: Specification Study + ↓ Output: spec-study-complete.flag + ↓ +Phase 1: ${phases[0]?.name || 'First Phase'} + ↓ Output: ${phases[0]?.output || 'phase-1.json'} +${phases.slice(1).map((p, i) => ` ↓ +Phase ${i+2}: ${p.name} + ↓ Output: ${p.output}`).join('\n')} + ↓ +COMPLETE +\`\`\` + +**阶段列表**: + | Order | Phase | Input | Output | Agent | |-------|-------|-------|--------|-------| -${phases.map((p, i) => - `| ${i+1} | ${p.id} | ${i > 0 ? phases[i-1].output : '-'} | ${p.output} | ${p.agent?.type || 'universal-executor'} |` +| 0 | 00-spec-study | - | spec-study-complete.flag | universal-executor | +${phases.map((p, i) => + `| ${i+1} | ${p.id} | ${i === 0 ? 'spec-study-complete.flag' : phases[i-1].output} | ${p.output} | ${p.agent?.type || 'universal-executor'} |` ).join('\n')} ## 错误恢复 @@ -751,6 +798,146 @@ ${actions.sort((a, b) => (b.priority || 0) - (a.priority || 0)).map(a => ### Step 4: 辅助函数 ```javascript +// ========== P0: Phase 0 生成函数 ========== +function generatePhase0Spec(config) { + const skillRoot = '.claude/skills/skill-generator'; + const specsToRead = [ + '../_shared/SKILL-DESIGN-SPEC.md', + `${skillRoot}/templates/*.md` + ]; + + return `# Phase 0: Specification Study + +⚠️ **MANDATORY PREREQUISITE** - 此阶段不可跳过 + +## Objective + +在生成任何文件前,完整阅读所有规范文档,理解 Skill 设计标准。 + +## Why This Matters + +**不研读规范 (❌)**: +\`\`\` +跳过规范 + ├─ ✗ 不符合标准 + ├─ ✗ 结构混乱 + └─ ✗ 质量问题 +\`\`\` + +**研读规范 (✅)**: +\`\`\` +完整研读 + ├─ ✓ 标准化输出 + ├─ ✓ 高质量代码 + └─ ✓ 易于维护 +\`\`\` + +## Required Reading + +### P0 - 核心设计规范 + +\`\`\`javascript +// 通用设计标准 (MUST READ) +const designSpec = Read('.claude/skills/_shared/SKILL-DESIGN-SPEC.md'); + +// 关键内容检查点: +const checkpoints = { + structure: '目录结构约定', + naming: '命名规范', + quality: '质量标准', + output: '输出格式要求' +}; +\`\`\` + +### P1 - 模板文件 (生成前必读) + +\`\`\`javascript +// 根据执行模式加载对应模板 +const templates = { + all: [ + 'templates/skill-md.md' // SKILL.md 入口文件模板 + ], + sequential: [ + 'templates/sequential-phase.md' + ], + autonomous: [ + 'templates/autonomous-orchestrator.md', + 'templates/autonomous-action.md' + ] +}; + +const mode = '${config.execution_mode}'; +const requiredTemplates = [...templates.all, ...templates[mode]]; + +requiredTemplates.forEach(template => { + const content = Read(\`.claude/skills/skill-generator/\${template}\`); + // 理解模板结构、变量位置、生成规则 +}); +\`\`\` + +## Execution + +\`\`\`javascript +// ========== 加载规范 ========== +const specs = []; + +// 1. 设计规范 (P0) +specs.push({ + file: '../_shared/SKILL-DESIGN-SPEC.md', + content: Read('.claude/skills/_shared/SKILL-DESIGN-SPEC.md'), + priority: 'P0' +}); + +// 2. 模板文件 (P1) +const templateFiles = Glob('.claude/skills/skill-generator/templates/*.md'); +templateFiles.forEach(file => { + specs.push({ + file: file, + content: Read(file), + priority: 'P1' + }); +}); + +// ========== 内化规范 ========== +console.log('📖 Reading specifications...'); +specs.forEach(spec => { + console.log(\` [\${spec.priority}] \${spec.file}\`); + // 理解内容(无需生成文件,仅内存处理) +}); + +// ========== 生成完成标记 ========== +const result = { + status: 'completed', + specs_loaded: specs.length, + timestamp: new Date().toISOString() +}; + +Write(\`\${workDir}/spec-study-complete.flag\`, JSON.stringify(result, null, 2)); +\`\`\` + +## Output + +- **标记文件**: \`spec-study-complete.flag\` (证明已完成阅读) +- **副作用**: 内化规范知识,后续阶段遵循标准 + +## Success Criteria + +✅ **通过标准**: +- [ ] 已阅读 SKILL-DESIGN-SPEC.md +- [ ] 已阅读执行模式对应的模板文件 +- [ ] 理解目录结构约定 +- [ ] 理解命名规范 +- [ ] 理解质量标准 + +## Next Phase + +→ [Phase 1: Requirements Discovery](01-requirements-discovery.md) + +**关键**: 只有完成规范研读后,Phase 1 才能正确收集需求并生成符合标准的配置。 +`; +} + +// ========== 其他辅助函数 ========== function toPascalCase(str) { return str.split('-').map(s => s.charAt(0).toUpperCase() + s.slice(1)).join(''); } diff --git a/.claude/skills/skill-tuning/SKILL.md b/.claude/skills/skill-tuning/SKILL.md index 1732eaba..a4ed984d 100644 --- a/.claude/skills/skill-tuning/SKILL.md +++ b/.claude/skills/skill-tuning/SKILL.md @@ -6,375 +6,162 @@ allowed-tools: Task, AskUserQuestion, Read, Write, Bash, Glob, Grep, mcp__ace-to # Skill Tuning -Universal skill diagnosis and optimization tool that identifies and resolves skill execution problems through iterative multi-agent analysis. +Autonomous diagnosis and optimization for skill execution issues. -## Architecture Overview +## Architecture ``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ Skill Tuning Architecture (Autonomous Mode + Gemini CLI) │ -├─────────────────────────────────────────────────────────────────────────────┤ -│ │ -│ ⚠️ Phase 0: Specification → 阅读规范 + 理解目标 skill 结构 (强制前置) │ -│ Study │ -│ ↓ │ -│ ┌───────────────────────────────────────────────────────────────────────┐ │ -│ │ Orchestrator (状态驱动决策) │ │ -│ │ 读取诊断状态 → 选择下一步动作 → 执行 → 更新状态 → 循环直到完成 │ │ -│ └───────────────────────────────────────────────────────────────────────┘ │ -│ │ │ -│ ┌────────────┬───────────┼───────────┬────────────┬────────────┐ │ -│ ↓ ↓ ↓ ↓ ↓ ↓ │ -│ ┌──────┐ ┌──────────┐ ┌─────────┐ ┌────────┐ ┌────────┐ ┌─────────┐ │ -│ │ Init │→ │ Analyze │→ │Diagnose │ │Diagnose│ │Diagnose│ │ Gemini │ │ -│ │ │ │Requiremts│ │ Context │ │ Memory │ │DataFlow│ │Analysis │ │ -│ └──────┘ └──────────┘ └─────────┘ └────────┘ └────────┘ └─────────┘ │ -│ │ │ │ │ │ │ -│ │ └───────────┴───────────┴────────────┘ │ -│ ↓ │ -│ ┌───────────────────────────────────────────────────────────────────────┐ │ -│ │ Requirement Analysis (NEW) │ │ -│ │ • Phase 1: 维度拆解 (Gemini CLI) - 单一描述 → 多个关注维度 │ │ -│ │ • Phase 2: Spec 匹配 - 每个维度 → taxonomy + strategy │ │ -│ │ • Phase 3: 覆盖度评估 - 以"有修复策略"为满足标准 │ │ -│ │ • Phase 4: 歧义检测 - 识别多义性描述,必要时请求澄清 │ │ -│ └───────────────────────────────────────────────────────────────────────┘ │ -│ ↓ │ -│ ┌──────────────────┐ │ -│ │ Apply Fixes + │ │ -│ │ Verify Results │ │ -│ └──────────────────┘ │ -│ │ -│ ┌───────────────────────────────────────────────────────────────────────┐ │ -│ │ Gemini CLI Integration │ │ -│ │ 根据用户需求动态调用 gemini cli 进行深度分析: │ │ -│ │ • 需求维度拆解 (requirement decomposition) │ │ -│ │ • 复杂问题分析 (prompt engineering, architecture review) │ │ -│ │ • 代码模式识别 (pattern matching, anti-pattern detection) │ │ -│ │ • 修复策略生成 (fix generation, refactoring suggestions) │ │ -│ └───────────────────────────────────────────────────────────────────────┘ │ -│ │ -└─────────────────────────────────────────────────────────────────────────────┘ +┌─────────────────────────────────────────────────────┐ +│ Phase 0: Read Specs (mandatory) │ +│ → problem-taxonomy.md, tuning-strategies.md │ +└─────────────────────────────────────────────────────┘ + ↓ +┌─────────────────────────────────────────────────────┐ +│ Orchestrator (state-driven) │ +│ Read state → Select action → Execute → Update → ✓ │ +└─────────────────────────────────────────────────────┘ + ↓ ↓ +┌──────────────────────┐ ┌──────────────────┐ +│ Diagnosis Phase │ │ Gemini CLI │ +│ • Context │ │ Deep analysis │ +│ • Memory │ │ (on-demand) │ +│ • DataFlow │ │ │ +│ • Agent │ │ Complex issues │ +│ • Docs │ │ Architecture │ +│ • Token Usage │ │ Performance │ +└──────────────────────┘ └──────────────────┘ + ↓ + ┌───────────────────┐ + │ Fix & Verify │ + │ Apply → Re-test │ + └───────────────────┘ ``` -## Problem Domain +## Core Issues Detected -Based on comprehensive analysis, skill-tuning addresses **core skill issues** and **general optimization areas**: - -### Core Skill Issues (自动检测) - -| Priority | Problem | Root Cause | Solution Strategy | -|----------|---------|------------|-------------------| -| **P0** | Authoring Principles Violation | 中间文件存储, State膨胀, 文件中转 | eliminate_intermediate_files, minimize_state, context_passing | +| Priority | Problem | Root Cause | Fix Strategy | +|----------|---------|-----------|--------------| +| **P0** | Authoring Violation | Intermediate files, state bloat, file relay | eliminate_intermediate, minimize_state | | **P1** | Data Flow Disruption | Scattered state, inconsistent formats | state_centralization, schema_enforcement | -| **P2** | Agent Coordination | Fragile call chains, merge complexity | error_wrapping, result_validation | -| **P3** | Context Explosion | Token accumulation, multi-turn bloat | sliding_window, context_summarization | +| **P2** | Agent Coordination | Fragile chains, no error handling | error_wrapping, result_validation | +| **P3** | Context Explosion | Unbounded history, full content passing | sliding_window, path_reference | | **P4** | Long-tail Forgetting | Early constraint loss | constraint_injection, checkpoint_restore | -| **P5** | Token Consumption | Verbose prompts, excessive state, redundant I/O | prompt_compression, lazy_loading, output_minimization | +| **P5** | Token Consumption | Verbose prompts, state bloat | prompt_compression, lazy_loading | -### General Optimization Areas (按需分析 via Gemini CLI) +## Problem Categories (Detailed Specs) -| Category | Issues | Gemini Analysis Scope | -|----------|--------|----------------------| -| **Prompt Engineering** | 模糊指令, 输出格式不一致, 幻觉风险 | 提示词优化, 结构化输出设计 | -| **Architecture** | 阶段划分不合理, 依赖混乱, 扩展性差 | 架构审查, 模块化建议 | -| **Performance** | 执行慢, Token消耗高, 重复计算 | 性能分析, 缓存策略 | -| **Error Handling** | 错误恢复不当, 无降级策略, 日志不足 | 容错设计, 可观测性增强 | -| **Output Quality** | 输出不稳定, 格式漂移, 质量波动 | 质量门控, 验证机制 | -| **User Experience** | 交互不流畅, 反馈不清晰, 进度不可见 | UX优化, 进度追踪 | +See [specs/problem-taxonomy.md](specs/problem-taxonomy.md) for: +- Detection patterns (regex/checks) +- Severity calculations +- Impact assessments -## Key Design Principles +## Tuning Strategies (Detailed Specs) -1. **Problem-First Diagnosis**: Systematic identification before any fix attempt -2. **Data-Driven Analysis**: Record execution traces, token counts, state snapshots -3. **Iterative Refinement**: Multiple tuning rounds until quality gates pass -4. **Non-Destructive**: All changes are reversible with backup checkpoints -5. **Agent Coordination**: Use specialized sub-agents for each diagnosis type -6. **Gemini CLI On-Demand**: Deep analysis via CLI for complex/custom issues +See [specs/tuning-strategies.md](specs/tuning-strategies.md) for: +- 10+ strategies per category +- Implementation patterns +- Verification methods ---- +## Workflow -## Gemini CLI Integration +| Step | Action | Orchestrator Decision | Output | +|------|--------|----------------------|--------| +| 1 | `action-init` | status='pending' | Backup, session created | +| 2 | `action-analyze-requirements` | After init | Required dimensions + coverage | +| 3 | Diagnosis (6 types) | Focus areas | state.diagnosis.{type} | +| 4 | `action-gemini-analysis` | Critical issues OR user request | Deep findings | +| 5 | `action-generate-report` | All diagnosis complete | state.final_report | +| 6 | `action-propose-fixes` | Issues found | state.proposed_fixes[] | +| 7 | `action-apply-fix` | Pending fixes | Applied + verified | +| 8 | `action-complete` | Quality gates pass | session.status='completed' | -根据用户需求动态调用 Gemini CLI 进行深度分析。 +## Action Reference -### Trigger Conditions +| Category | Actions | Purpose | +|----------|---------|---------| +| **Setup** | action-init | Initialize backup, session state | +| **Analysis** | action-analyze-requirements | Decompose user request via Gemini CLI | +| **Diagnosis** | action-diagnose-{context,memory,dataflow,agent,docs,token_consumption} | Detect category-specific issues | +| **Deep Analysis** | action-gemini-analysis | Gemini CLI: complex/critical issues | +| **Reporting** | action-generate-report | Consolidate findings → final_report | +| **Fixing** | action-propose-fixes, action-apply-fix | Generate + apply fixes | +| **Verify** | action-verify | Re-run diagnosis, check gates | +| **Exit** | action-complete, action-abort | Finalize or rollback | -| Condition | Action | CLI Mode | -|-----------|--------|----------| -| 用户描述复杂问题 | 调用 Gemini 分析问题根因 | `analysis` | -| 自动诊断发现 critical 问题 | 请求深度分析确认 | `analysis` | -| 用户请求架构审查 | 执行架构分析 | `analysis` | -| 需要生成修复代码 | 生成修复提案 | `write` | -| 标准策略不适用 | 请求定制化策略 | `analysis` | +Full action details: [phases/actions/](phases/actions/) -### CLI Command Template +## State Management + +**Single source of truth**: `.workflow/.scratchpad/skill-tuning-{ts}/state.json` + +```json +{ + "status": "pending|running|completed|failed", + "target_skill": { "name": "...", "path": "..." }, + "diagnosis": { + "context": {...}, + "memory": {...}, + "dataflow": {...}, + "agent": {...}, + "docs": {...}, + "token_consumption": {...} + }, + "issues": [{"id":"...", "severity":"...", "category":"...", "strategy":"..."}], + "proposed_fixes": [...], + "applied_fixes": [...], + "quality_gate": "pass|fail", + "final_report": "..." +} +``` + +See [phases/state-schema.md](phases/state-schema.md) for complete schema. + +## Orchestrator Logic + +See [phases/orchestrator.md](phases/orchestrator.md) for: +- Decision logic (termination checks → action selection) +- State transitions +- Error recovery + +## Key Principles + +1. **Problem-First**: Diagnosis before any fix +2. **Data-Driven**: Record traces, token counts, snapshots +3. **Iterative**: Multiple rounds until quality gates pass +4. **Reversible**: All changes with backup checkpoints +5. **Non-Invasive**: Minimal changes, maximum clarity + +## Usage Examples ```bash -ccw cli -p " -PURPOSE: ${purpose} -TASK: ${task_steps} -MODE: ${mode} -CONTEXT: @${skill_path}/**/* -EXPECTED: ${expected_output} -RULES: $(cat ~/.claude/workflows/cli-templates/protocols/${mode}-protocol.md) | ${constraints} -" --tool gemini --mode ${mode} --cd ${skill_path} +# Basic skill diagnosis +/skill-tuning "Fix memory leaks in my skill" + +# Deep analysis with Gemini +/skill-tuning "Architecture issues in async workflow" + +# Focus on specific areas +/skill-tuning "Optimize token consumption and fix agent coordination" + +# Custom issue +/skill-tuning "My skill produces inconsistent outputs" ``` -### Analysis Types +## Output -#### 1. Problem Root Cause Analysis - -```bash -ccw cli -p " -PURPOSE: Identify root cause of skill execution issue: ${user_issue_description} -TASK: • Analyze skill structure and phase flow • Identify anti-patterns • Trace data flow issues -MODE: analysis -CONTEXT: @**/*.md -EXPECTED: JSON with { root_causes: [], patterns_found: [], recommendations: [] } -RULES: $(cat ~/.claude/workflows/cli-templates/protocols/analysis-protocol.md) | Focus on execution flow -" --tool gemini --mode analysis -``` - -#### 2. Architecture Review - -```bash -ccw cli -p " -PURPOSE: Review skill architecture for scalability and maintainability -TASK: • Evaluate phase decomposition • Check state management patterns • Assess agent coordination -MODE: analysis -CONTEXT: @**/*.md -EXPECTED: Architecture assessment with improvement recommendations -RULES: $(cat ~/.claude/workflows/cli-templates/protocols/analysis-protocol.md) | Focus on modularity -" --tool gemini --mode analysis -``` - -#### 3. Fix Strategy Generation - -```bash -ccw cli -p " -PURPOSE: Generate fix strategy for issue: ${issue_id} - ${issue_description} -TASK: • Analyze issue context • Design fix approach • Generate implementation plan -MODE: analysis -CONTEXT: @**/*.md -EXPECTED: JSON with { strategy: string, changes: [], verification_steps: [] } -RULES: $(cat ~/.claude/workflows/cli-templates/protocols/analysis-protocol.md) | Minimal invasive changes -" --tool gemini --mode analysis -``` - ---- - -## Mandatory Prerequisites - -> **CRITICAL**: Read these documents before executing any action. - -### Core Specs (Required) - -| Document | Purpose | Priority | -|----------|---------|----------| -| [specs/skill-authoring-principles.md](specs/skill-authoring-principles.md) | **首要准则:简洁高效、去除存储、上下文流转** | **P0** | -| [specs/problem-taxonomy.md](specs/problem-taxonomy.md) | Problem classification and detection patterns | **P0** | -| [specs/tuning-strategies.md](specs/tuning-strategies.md) | Fix strategies for each problem type | **P0** | -| [specs/dimension-mapping.md](specs/dimension-mapping.md) | Dimension to Spec mapping rules | **P0** | -| [specs/quality-gates.md](specs/quality-gates.md) | Quality thresholds and verification criteria | P1 | - -### Templates (Reference) - -| Document | Purpose | -|----------|---------| -| [templates/diagnosis-report.md](templates/diagnosis-report.md) | Diagnosis report structure | -| [templates/fix-proposal.md](templates/fix-proposal.md) | Fix proposal format | - ---- - -## Execution Flow - -``` -┌─────────────────────────────────────────────────────────────────────────────┐ -│ Phase 0: Specification Study (强制前置 - 禁止跳过) │ -│ → Read: specs/problem-taxonomy.md (问题分类) │ -│ → Read: specs/tuning-strategies.md (调优策略) │ -│ → Read: specs/dimension-mapping.md (维度映射规则) │ -│ → Read: Target skill's SKILL.md and phases/*.md │ -│ → Output: 内化规范,理解目标 skill 结构 │ -├─────────────────────────────────────────────────────────────────────────────┤ -│ action-init: Initialize Tuning Session │ -│ → Create work directory: .workflow/.scratchpad/skill-tuning-{timestamp} │ -│ → Initialize state.json with target skill info │ -│ → Create backup of target skill files │ -├─────────────────────────────────────────────────────────────────────────────┤ -│ action-analyze-requirements: Requirement Analysis │ -│ → Phase 1: 维度拆解 (Gemini CLI) - 单一描述 → 多个关注维度 │ -│ → Phase 2: Spec 匹配 - 每个维度 → taxonomy + strategy │ -│ → Phase 3: 覆盖度评估 - 以"有修复策略"为满足标准 │ -│ → Phase 4: 歧义检测 - 识别多义性描述,必要时请求澄清 │ -│ → Output: state.json (requirement_analysis field) │ -├─────────────────────────────────────────────────────────────────────────────┤ -│ action-diagnose-*: Diagnosis Actions (context/memory/dataflow/agent/docs/ │ -│ token_consumption) │ -│ → Execute pattern-based detection for each category │ -│ → Output: state.json (diagnosis.{category} field) │ -├─────────────────────────────────────────────────────────────────────────────┤ -│ action-generate-report: Consolidated Report │ -│ → Generate markdown summary from state.diagnosis │ -│ → Prioritize issues by severity │ -│ → Output: state.json (final_report field) │ -├─────────────────────────────────────────────────────────────────────────────┤ -│ action-propose-fixes: Fix Proposal Generation │ -│ → Generate fix strategies for each issue │ -│ → Create implementation plan │ -│ → Output: state.json (proposed_fixes field) │ -├─────────────────────────────────────────────────────────────────────────────┤ -│ action-apply-fix: Apply Selected Fix │ -│ → User selects fix to apply │ -│ → Execute fix with backup │ -│ → Update state with fix result │ -├─────────────────────────────────────────────────────────────────────────────┤ -│ action-verify: Verification │ -│ → Re-run affected diagnosis │ -│ → Check quality gates │ -│ → Update iteration count │ -├─────────────────────────────────────────────────────────────────────────────┤ -│ action-complete: Finalization │ -│ → Set status='completed' │ -│ → Final report already in state.json (final_report field) │ -│ → Output: state.json (final) │ -└─────────────────────────────────────────────────────────────────────────────┘ -``` - -## Directory Setup - -```javascript -const timestamp = new Date().toISOString().slice(0,19).replace(/[-:T]/g, ''); -const workDir = `.workflow/.scratchpad/skill-tuning-${timestamp}`; - -// Simplified: Only backups dir needed, diagnosis results go into state.json -Bash(`mkdir -p "${workDir}/backups"`); -``` - -## Output Structure - -``` -.workflow/.scratchpad/skill-tuning-{timestamp}/ -├── state.json # Single source of truth (all results consolidated) -│ ├── diagnosis.* # All diagnosis results embedded -│ ├── issues[] # Found issues -│ ├── proposed_fixes[] # Fix proposals -│ └── final_report # Markdown summary (on completion) -└── backups/ - └── {skill-name}-backup/ # Original skill files backup -``` - -> **Token Optimization**: All outputs consolidated into state.json. No separate diagnosis files or report files. - -## State Schema - -详细状态结构定义请参阅 [phases/state-schema.md](phases/state-schema.md)。 - -核心状态字段: -- `status`: 工作流状态 (pending/running/completed/failed) -- `target_skill`: 目标 skill 信息 -- `diagnosis`: 各维度诊断结果 -- `issues`: 发现的问题列表 -- `proposed_fixes`: 建议的修复方案 - ---- - -## Action Reference Guide - -Navigation and entry points for each action in the autonomous workflow: - -### Core Orchestration - -**Document**: 🔗 [phases/orchestrator.md](phases/orchestrator.md) - -| Attribute | Value | -|-----------|-------| -| **Purpose** | Drive tuning workflow via state-driven action selection | -| **Decision Logic** | Termination checks → Action preconditions → Selection | -| **Related** | [phases/state-schema.md](phases/state-schema.md) | - ---- - -### Initialization & Requirements - -| Action | Document | Purpose | Preconditions | -|--------|----------|---------|---------------| -| **action-init** | [action-init.md](phases/actions/action-init.md) | Initialize session, backup target skill | `state.status === 'pending'` | -| **action-analyze-requirements** | [action-analyze-requirements.md](phases/actions/action-analyze-requirements.md) | Decompose user request into dimensions via Gemini CLI | After init, before diagnosis | - ---- - -### Diagnosis Actions - -| Action | Document | Purpose | Detects | -|--------|----------|---------|---------| -| **action-diagnose-context** | [action-diagnose-context.md](phases/actions/action-diagnose-context.md) | Context explosion analysis | Token accumulation, multi-turn bloat | -| **action-diagnose-memory** | [action-diagnose-memory.md](phases/actions/action-diagnose-memory.md) | Long-tail forgetting analysis | Early constraint loss | -| **action-diagnose-dataflow** | [action-diagnose-dataflow.md](phases/actions/action-diagnose-dataflow.md) | Data flow analysis | State inconsistency, format drift | -| **action-diagnose-agent** | [action-diagnose-agent.md](phases/actions/action-diagnose-agent.md) | Agent coordination analysis | Call chain failures, merge issues | -| **action-diagnose-docs** | [action-diagnose-docs.md](phases/actions/action-diagnose-docs.md) | Documentation structure analysis | Missing specs, unclear flow | -| **action-diagnose-token-consumption** | [action-diagnose-token-consumption.md](phases/actions/action-diagnose-token-consumption.md) | Token consumption analysis | Verbose prompts, redundant I/O | - ---- - -### Analysis & Reporting - -| Action | Document | Purpose | Output | -|--------|----------|---------|--------| -| **action-gemini-analysis** | [action-gemini-analysis.md](phases/actions/action-gemini-analysis.md) | Deep analysis via Gemini CLI | Custom issue diagnosis | -| **action-generate-report** | [action-generate-report.md](phases/actions/action-generate-report.md) | Consolidate diagnosis results | `state.final_report` | -| **action-propose-fixes** | [action-propose-fixes.md](phases/actions/action-propose-fixes.md) | Generate fix strategies | `state.proposed_fixes[]` | - ---- - -### Fix & Verification - -| Action | Document | Purpose | Preconditions | -|--------|----------|---------|---------------| -| **action-apply-fix** | [action-apply-fix.md](phases/actions/action-apply-fix.md) | Apply selected fix with backup | User selected fix | -| **action-verify** | [action-verify.md](phases/actions/action-verify.md) | Re-run diagnosis, check quality gates | After fix applied | - ---- - -### Termination - -| Action | Document | Purpose | Trigger | -|--------|----------|---------|---------| -| **action-complete** | [action-complete.md](phases/actions/action-complete.md) | Finalize session with report | All quality gates pass | -| **action-abort** | [action-abort.md](phases/actions/action-abort.md) | Abort session, restore backup | Error limit exceeded | - ---- - -## Template Reference - -| Template | Purpose | When Used | -|----------|---------|-----------| -| [templates/diagnosis-report.md](templates/diagnosis-report.md) | Diagnosis report structure | action-generate-report | -| [templates/fix-proposal.md](templates/fix-proposal.md) | Fix proposal format | action-propose-fixes | - ---- +After completion, review: +- `.workflow/.scratchpad/skill-tuning-{ts}/state.json` - Full state with final_report +- `state.final_report` - Markdown summary (in state.json) +- `state.applied_fixes` - List of applied fixes with verification results ## Reference Documents | Document | Purpose | |----------|---------| -| [phases/orchestrator.md](phases/orchestrator.md) | Orchestrator decision logic | +| [specs/problem-taxonomy.md](specs/problem-taxonomy.md) | Classification + detection patterns | +| [specs/tuning-strategies.md](specs/tuning-strategies.md) | Fix implementation guide | +| [specs/dimension-mapping.md](specs/dimension-mapping.md) | Dimension ↔ Spec mapping | +| [specs/quality-gates.md](specs/quality-gates.md) | Quality verification criteria | +| [phases/orchestrator.md](phases/orchestrator.md) | Workflow orchestration | | [phases/state-schema.md](phases/state-schema.md) | State structure definition | -| [phases/actions/action-init.md](phases/actions/action-init.md) | Initialize tuning session | -| [phases/actions/action-analyze-requirements.md](phases/actions/action-analyze-requirements.md) | Requirement analysis (NEW) | -| [phases/actions/action-diagnose-context.md](phases/actions/action-diagnose-context.md) | Context explosion diagnosis | -| [phases/actions/action-diagnose-memory.md](phases/actions/action-diagnose-memory.md) | Long-tail forgetting diagnosis | -| [phases/actions/action-diagnose-dataflow.md](phases/actions/action-diagnose-dataflow.md) | Data flow diagnosis | -| [phases/actions/action-diagnose-agent.md](phases/actions/action-diagnose-agent.md) | Agent coordination diagnosis | -| [phases/actions/action-diagnose-docs.md](phases/actions/action-diagnose-docs.md) | Documentation structure diagnosis | -| [phases/actions/action-diagnose-token-consumption.md](phases/actions/action-diagnose-token-consumption.md) | Token consumption diagnosis | -| [phases/actions/action-generate-report.md](phases/actions/action-generate-report.md) | Report generation | -| [phases/actions/action-propose-fixes.md](phases/actions/action-propose-fixes.md) | Fix proposal | -| [phases/actions/action-apply-fix.md](phases/actions/action-apply-fix.md) | Fix application | -| [phases/actions/action-verify.md](phases/actions/action-verify.md) | Verification | -| [phases/actions/action-complete.md](phases/actions/action-complete.md) | Finalization | -| [specs/problem-taxonomy.md](specs/problem-taxonomy.md) | Problem classification | -| [specs/tuning-strategies.md](specs/tuning-strategies.md) | Fix strategies | -| [specs/dimension-mapping.md](specs/dimension-mapping.md) | Dimension to Spec mapping (NEW) | -| [specs/quality-gates.md](specs/quality-gates.md) | Quality criteria | +| [phases/actions/](phases/actions/) | Individual action implementations | diff --git a/.claude/skills/skill-tuning/phases/orchestrator.md b/.claude/skills/skill-tuning/phases/orchestrator.md index a3d6516b..fc0a1d9b 100644 --- a/.claude/skills/skill-tuning/phases/orchestrator.md +++ b/.claude/skills/skill-tuning/phases/orchestrator.md @@ -1,28 +1,57 @@ # Orchestrator -Autonomous orchestrator for skill-tuning workflow. Reads current state and selects the next action based on diagnosis progress and quality gates. +State-driven orchestrator for autonomous skill-tuning workflow. ## Role -Drive the tuning workflow by: -1. Reading current session state -2. Selecting the appropriate next action -3. Executing the action via sub-agent -4. Updating state with results -5. Repeating until termination conditions met +Read state → Select action → Execute → Update → Repeat until termination. + +## Decision Logic + +### Termination Checks (priority order) + +| Condition | Action | +|-----------|--------| +| `status === 'user_exit'` | null (exit) | +| `status === 'completed'` | null (exit) | +| `error_count >= max_errors` | action-abort | +| `iteration_count >= max_iterations` | action-complete | +| `quality_gate === 'pass'` | action-complete | + +### Action Selection + +| Priority | Condition | Action | +|----------|-----------|--------| +| 1 | `status === 'pending'` | action-init | +| 2 | Init done, req analysis missing | action-analyze-requirements | +| 3 | Req needs clarification | null (wait) | +| 4 | Req coverage unsatisfied | action-gemini-analysis | +| 5 | Gemini requested/critical issues | action-gemini-analysis | +| 6 | Gemini running | null (wait) | +| 7 | Diagnosis pending (in order) | action-diagnose-{type} | +| 8 | All diagnosis done, no report | action-generate-report | +| 9 | Report done, issues exist | action-propose-fixes | +| 10 | Pending fixes exist | action-apply-fix | +| 11 | Fixes need verification | action-verify | +| 12 | New iteration needed | action-diagnose-context (restart) | +| 13 | Default | action-complete | + +**Diagnosis Order**: context → memory → dataflow → agent → docs → token_consumption + +**Gemini Triggers**: +- `gemini_analysis_requested === true` +- Critical issues detected +- Focus areas include: architecture, prompt, performance, custom +- Second iteration with unresolved issues ## State Management -### Read State - ```javascript +// Read const state = JSON.parse(Read(`${workDir}/state.json`)); -``` -### Update State - -```javascript -function updateState(updates) { +// Update (with sliding window for history) +function updateState(workDir, updates) { const state = JSON.parse(Read(`${workDir}/state.json`)); const newState = { ...state, @@ -34,344 +63,127 @@ function updateState(updates) { } ``` -## Decision Logic - -```javascript -function selectNextAction(state) { - // === Termination Checks === - - // User exit - if (state.status === 'user_exit') return null; - - // Completed - if (state.status === 'completed') return null; - - // Error limit exceeded - if (state.error_count >= state.max_errors) { - return 'action-abort'; - } - - // Max iterations exceeded - if (state.iteration_count >= state.max_iterations) { - return 'action-complete'; - } - - // === Action Selection === - - // 1. Not initialized yet - if (state.status === 'pending') { - return 'action-init'; - } - - // 1.5. Requirement analysis (在 init 后,diagnosis 前) - if (state.status === 'running' && - state.completed_actions.includes('action-init') && - !state.completed_actions.includes('action-analyze-requirements')) { - return 'action-analyze-requirements'; - } - - // 1.6. 如果需求分析发现歧义需要澄清,暂停等待用户 - if (state.requirement_analysis?.status === 'needs_clarification') { - return null; // 等待用户澄清后继续 - } - - // 1.7. 如果需求分析覆盖度不足,优先触发 Gemini 深度分析 - if (state.requirement_analysis?.coverage?.status === 'unsatisfied' && - !state.completed_actions.includes('action-gemini-analysis')) { - return 'action-gemini-analysis'; - } - - // 2. Check if Gemini analysis is requested or needed - if (shouldTriggerGeminiAnalysis(state)) { - return 'action-gemini-analysis'; - } - - // 3. Check if Gemini analysis is running - if (state.gemini_analysis?.status === 'running') { - // Wait for Gemini analysis to complete - return null; // Orchestrator will be re-triggered when CLI completes - } - - // 4. Run diagnosis in order (only if not completed) - const diagnosisOrder = ['context', 'memory', 'dataflow', 'agent', 'docs', 'token_consumption']; - - for (const diagType of diagnosisOrder) { - if (state.diagnosis[diagType] === null) { - // Check if user wants to skip this diagnosis - if (!state.focus_areas.length || state.focus_areas.includes(diagType)) { - return `action-diagnose-${diagType}`; - } - // For docs diagnosis, also check 'all' focus_area - if (diagType === 'docs' && state.focus_areas.includes('all')) { - return 'action-diagnose-docs'; - } - } - } - - // 5. All diagnosis complete, generate report if not done - const allDiagnosisComplete = diagnosisOrder.every( - d => state.diagnosis[d] !== null || !state.focus_areas.includes(d) - ); - - if (allDiagnosisComplete && !state.completed_actions.includes('action-generate-report')) { - return 'action-generate-report'; - } - - // 6. Report generated, propose fixes if not done - if (state.completed_actions.includes('action-generate-report') && - state.proposed_fixes.length === 0 && - state.issues.length > 0) { - return 'action-propose-fixes'; - } - - // 7. Fixes proposed, check if user wants to apply - if (state.proposed_fixes.length > 0 && state.pending_fixes.length > 0) { - return 'action-apply-fix'; - } - - // 8. Fixes applied, verify - if (state.applied_fixes.length > 0 && - state.applied_fixes.some(f => f.verification_result === 'pending')) { - return 'action-verify'; - } - - // 9. Quality gate check - if (state.quality_gate === 'pass') { - return 'action-complete'; - } - - // 10. More iterations needed - if (state.iteration_count < state.max_iterations && - state.quality_gate !== 'pass' && - state.issues.some(i => i.severity === 'critical' || i.severity === 'high')) { - // Reset diagnosis for re-evaluation - return 'action-diagnose-context'; // Start new iteration - } - - // 11. Default: complete - return 'action-complete'; -} - -/** - * 判断是否需要触发 Gemini CLI 分析 - */ -function shouldTriggerGeminiAnalysis(state) { - // 已完成 Gemini 分析,不再触发 - if (state.gemini_analysis?.status === 'completed') { - return false; - } - - // 用户显式请求 - if (state.gemini_analysis_requested === true) { - return true; - } - - // 发现 critical 问题且未进行深度分析 - if (state.issues.some(i => i.severity === 'critical') && - !state.completed_actions.includes('action-gemini-analysis')) { - return true; - } - - // 用户指定了需要 Gemini 分析的 focus_areas - const geminiAreas = ['architecture', 'prompt', 'performance', 'custom']; - if (state.focus_areas.some(area => geminiAreas.includes(area))) { - return true; - } - - // 标准诊断完成但问题未得到解决,需要深度分析 - const diagnosisComplete = ['context', 'memory', 'dataflow', 'agent', 'docs'].every( - d => state.diagnosis[d] !== null - ); - if (diagnosisComplete && - state.issues.length > 0 && - state.iteration_count > 0 && - !state.completed_actions.includes('action-gemini-analysis')) { - // 第二轮迭代如果问题仍存在,触发 Gemini 分析 - return true; - } - - return false; -} -``` - ## Execution Loop ```javascript async function runOrchestrator(workDir) { - console.log('=== Skill Tuning Orchestrator Started ==='); - let iteration = 0; - const MAX_LOOP_ITERATIONS = 50; // Safety limit + const MAX_LOOP = 50; - while (iteration < MAX_LOOP_ITERATIONS) { - iteration++; - - // 1. Read current state + while (iteration++ < MAX_LOOP) { + // 1. Read state const state = JSON.parse(Read(`${workDir}/state.json`)); - console.log(`[Loop ${iteration}] Status: ${state.status}, Action: ${state.current_action}`); - // 2. Select next action + // 2. Select action const actionId = selectNextAction(state); + if (!actionId) break; - if (!actionId) { - console.log('No action selected, terminating orchestrator.'); - break; - } - - console.log(`[Loop ${iteration}] Executing: ${actionId}`); - - // 3. Update state: current action - // FIX CTX-001: sliding window for action_history (keep last 10) - updateState({ + // 3. Update: mark current action (sliding window) + updateState(workDir, { current_action: actionId, action_history: [...state.action_history, { action: actionId, - started_at: new Date().toISOString(), - completed_at: null, - result: null, - output_files: [] - }].slice(-10) // Sliding window: prevent unbounded growth + started_at: new Date().toISOString() + }].slice(-10) // Keep last 10 }); // 4. Execute action try { const actionPrompt = Read(`phases/actions/${actionId}.md`); - // FIX CTX-003: Pass state path + key fields only instead of full state + + // Pass state path + key fields (not full state) const stateKeyInfo = { status: state.status, iteration_count: state.iteration_count, - issues_by_severity: state.issues_by_severity, quality_gate: state.quality_gate, - current_action: state.current_action, - completed_actions: state.completed_actions, - user_issue_description: state.user_issue_description, target_skill: { name: state.target_skill.name, path: state.target_skill.path } }; - const stateKeyJson = JSON.stringify(stateKeyInfo, null, 2); const result = await Task({ subagent_type: 'universal-executor', run_in_background: false, prompt: ` [CONTEXT] -You are executing action "${actionId}" for skill-tuning workflow. +Action: ${actionId} Work directory: ${workDir} [STATE KEY INFO] -${stateKeyJson} +${JSON.stringify(stateKeyInfo, null, 2)} [FULL STATE PATH] ${workDir}/state.json -(Read full state from this file if you need additional fields) +(Read full state from this file if needed) [ACTION INSTRUCTIONS] ${actionPrompt} -[OUTPUT REQUIREMENT] -After completing the action: -1. Write any output files to the work directory -2. Return a JSON object with: - - stateUpdates: object with state fields to update - - outputFiles: array of files created - - summary: brief description of what was done +[OUTPUT] +Return JSON: { stateUpdates: {}, outputFiles: [], summary: "..." } ` }); - // 5. Parse result and update state - let actionResult; - try { - actionResult = JSON.parse(result); - } catch (e) { - actionResult = { - stateUpdates: {}, - outputFiles: [], - summary: result - }; - } + // 5. Parse result + let actionResult = result; + try { actionResult = JSON.parse(result); } catch {} - // 6. Update state: action complete - const updatedHistory = [...state.action_history]; - updatedHistory[updatedHistory.length - 1] = { - ...updatedHistory[updatedHistory.length - 1], - completed_at: new Date().toISOString(), - result: 'success', - output_files: actionResult.outputFiles || [] - }; - - updateState({ + // 6. Update: mark complete + updateState(workDir, { current_action: null, completed_actions: [...state.completed_actions, actionId], - action_history: updatedHistory, ...actionResult.stateUpdates }); - console.log(`[Loop ${iteration}] Completed: ${actionId}`); - } catch (error) { - console.log(`[Loop ${iteration}] Error in ${actionId}: ${error.message}`); - - // Error handling - // FIX CTX-002: sliding window for errors (keep last 5) - updateState({ + // Error handling (sliding window for errors) + updateState(workDir, { current_action: null, errors: [...state.errors, { action: actionId, message: error.message, - timestamp: new Date().toISOString(), - recoverable: true - }].slice(-5), // Sliding window: prevent unbounded growth + timestamp: new Date().toISOString() + }].slice(-5), // Keep last 5 error_count: state.error_count + 1 }); } } - - console.log('=== Skill Tuning Orchestrator Finished ==='); } ``` -## Action Catalog +## Action Preconditions -| Action | Purpose | Preconditions | Effects | -|--------|---------|---------------|---------| -| [action-init](actions/action-init.md) | Initialize tuning session | status === 'pending' | Creates work dirs, backup, sets status='running' | -| [action-analyze-requirements](actions/action-analyze-requirements.md) | Analyze user requirements | init completed | Sets requirement_analysis, optimizes focus_areas | -| [action-diagnose-context](actions/action-diagnose-context.md) | Analyze context explosion | status === 'running' | Sets diagnosis.context | -| [action-diagnose-memory](actions/action-diagnose-memory.md) | Analyze long-tail forgetting | status === 'running' | Sets diagnosis.memory | -| [action-diagnose-dataflow](actions/action-diagnose-dataflow.md) | Analyze data flow issues | status === 'running' | Sets diagnosis.dataflow | -| [action-diagnose-agent](actions/action-diagnose-agent.md) | Analyze agent coordination | status === 'running' | Sets diagnosis.agent | -| [action-diagnose-docs](actions/action-diagnose-docs.md) | Analyze documentation structure | status === 'running', focus includes 'docs' | Sets diagnosis.docs | -| [action-gemini-analysis](actions/action-gemini-analysis.md) | Deep analysis via Gemini CLI | User request OR critical issues | Sets gemini_analysis, adds issues | -| [action-generate-report](actions/action-generate-report.md) | Generate consolidated report | All diagnoses complete | Creates tuning-report.md | -| [action-propose-fixes](actions/action-propose-fixes.md) | Generate fix proposals | Report generated, issues > 0 | Sets proposed_fixes | -| [action-apply-fix](actions/action-apply-fix.md) | Apply selected fix | pending_fixes > 0 | Updates applied_fixes | -| [action-verify](actions/action-verify.md) | Verify applied fixes | applied_fixes with pending verification | Updates verification_result | -| [action-complete](actions/action-complete.md) | Finalize session | quality_gate='pass' OR max_iterations | Sets status='completed' | -| [action-abort](actions/action-abort.md) | Abort on errors | error_count >= max_errors | Sets status='failed' | +| Action | Precondition | +|--------|-------------| +| action-init | status='pending' | +| action-analyze-requirements | Init complete, not done | +| action-diagnose-* | status='running', focus area includes type | +| action-gemini-analysis | Requested OR critical issues OR high complexity | +| action-generate-report | All diagnosis complete | +| action-propose-fixes | Report generated, issues > 0 | +| action-apply-fix | pending_fixes > 0 | +| action-verify | applied_fixes with pending verification | +| action-complete | Quality gates pass OR max iterations | +| action-abort | error_count >= max_errors | -## Termination Conditions +## User Interaction Points -- `status === 'completed'`: Normal completion -- `status === 'user_exit'`: User requested exit -- `status === 'failed'`: Unrecoverable error -- `requirement_analysis.status === 'needs_clarification'`: Waiting for user clarification (暂停,非终止) -- `error_count >= max_errors`: Too many errors (default: 3) -- `iteration_count >= max_iterations`: Max iterations reached (default: 5) -- `quality_gate === 'pass'`: All quality criteria met +1. **action-init**: Confirm target skill, describe issue +2. **action-propose-fixes**: Select which fixes to apply +3. **action-verify**: Review verification, decide to continue or stop +4. **action-complete**: Review final summary ## Error Recovery -| Error Type | Recovery Strategy | -|------------|-------------------| +| Error Type | Strategy | +|------------|----------| | Action execution failed | Retry up to 3 times, then skip | | State parse error | Restore from backup | | File write error | Retry with alternative path | | User abort | Save state and exit gracefully | -## User Interaction Points +## Termination Conditions -The orchestrator pauses for user input at these points: - -1. **action-init**: Confirm target skill and describe issue -2. **action-propose-fixes**: Select which fixes to apply -3. **action-verify**: Review verification results, decide to continue or stop -4. **action-complete**: Review final summary +- Normal: `status === 'completed'`, `quality_gate === 'pass'` +- User: `status === 'user_exit'` +- Error: `status === 'failed'`, `error_count >= max_errors` +- Iteration limit: `iteration_count >= max_iterations` +- Clarification wait: `requirement_analysis.status === 'needs_clarification'` (pause, not terminate) diff --git a/.claude/skills/skill-tuning/specs/problem-taxonomy.md b/.claude/skills/skill-tuning/specs/problem-taxonomy.md index 01cc7088..7aefad94 100644 --- a/.claude/skills/skill-tuning/specs/problem-taxonomy.md +++ b/.claude/skills/skill-tuning/specs/problem-taxonomy.md @@ -2,276 +2,174 @@ Classification of skill execution issues with detection patterns and severity criteria. -## When to Use +## Quick Reference -| Phase | Usage | Section | -|-------|-------|---------| -| All Diagnosis Actions | Issue classification | All sections | -| action-propose-fixes | Strategy selection | Fix Mapping | -| action-generate-report | Severity assessment | Severity Criteria | +| Category | Priority | Detection | Fix Strategy | +|----------|----------|-----------|--------------| +| Authoring Violation | P0 | Intermediate files, state bloat, file relay | eliminate_intermediate, minimize_state | +| Data Flow Disruption | P1 | Scattered state, inconsistent formats | state_centralization, schema_enforcement | +| Agent Coordination | P2 | Fragile chains, no error handling | error_wrapping, result_validation | +| Context Explosion | P3 | Unbounded history, full content passing | sliding_window, path_reference | +| Long-tail Forgetting | P4 | Early constraint loss | constraint_injection, checkpoint_restore | +| Token Consumption | P5 | Verbose prompts, redundant I/O | prompt_compression, lazy_loading | +| Doc Redundancy | P6 | Repeated definitions | consolidate_to_ssot | +| Doc Conflict | P7 | Inconsistent definitions | reconcile_definitions | --- -## Problem Categories +## 0. Authoring Principles Violation (P0) -### 0. Authoring Principles Violation (P0) - -**Definition**: 违反 skill 撰写首要准则(简洁高效、去除存储、上下文流转)。 - -**Root Causes**: -- 不必要的中间文件存储 -- State schema 过度膨胀 -- 文件中转代替上下文传递 -- 重复数据存储 +**Definition**: Violates skill authoring principles (simplicity, no intermediate files, context passing). **Detection Patterns**: -| Pattern ID | Regex/Check | Description | -|------------|-------------|-------------| -| APV-001 | `/Write\([^)]*temp-|intermediate-/` | 中间文件写入 | -| APV-002 | `/Write\([^)]+\)[\s\S]{0,50}Read\([^)]+\)/` | 写后立即读(文件中转) | -| APV-003 | State schema > 15 fields | State 字段过多 | -| APV-004 | `/_history\s*[.=].*push|concat/` | 无限增长数组 | -| APV-005 | `/debug_|_cache|_temp/` in state | 调试/缓存字段残留 | -| APV-006 | Same data in multiple state fields | 重复存储 | +| Pattern ID | Check | Description | +|------------|-------|-------------| +| APV-001 | `/Write\([^)]*temp-\|intermediate-/` | Intermediate file writes | +| APV-002 | `/Write\([^)]+\)[\s\S]{0,50}Read\([^)]+\)/` | Write-then-read relay | +| APV-003 | State schema > 15 fields | Excessive state fields | +| APV-004 | `/_history\s*[.=].*push\|concat/` | Unbounded array growth | +| APV-005 | `/debug_\|_cache\|_temp/` in state | Debug/cache field residue | +| APV-006 | Same data in multiple fields | Duplicate storage | -**Impact Levels**: -- **Critical**: 中间文件 > 5 个,严重违反原则 -- **High**: State 字段 > 20 个,或存在文件中转 -- **Medium**: 存在调试字段或轻微冗余 -- **Low**: 轻微的命名不规范 +**Impact**: Critical (>5 intermediate files), High (>20 state fields), Medium (debug fields), Low (naming issues) --- -### 1. Context Explosion (P2) +## 1. Context Explosion (P3) -**Definition**: Excessive token accumulation causing prompt size to grow unbounded. - -**Root Causes**: -- Unbounded conversation history -- Full content passing instead of references -- Missing summarization mechanisms -- Agent returning full output instead of path+summary +**Definition**: Unbounded token accumulation causing prompt size growth. **Detection Patterns**: -| Pattern ID | Regex/Check | Description | -|------------|-------------|-------------| +| Pattern ID | Check | Description | +|------------|-------|-------------| | CTX-001 | `/history\s*[.=].*push\|concat/` | History array growth | | CTX-002 | `/JSON\.stringify\s*\(\s*state\s*\)/` | Full state serialization | | CTX-003 | `/Read\([^)]+\)\s*[\+,]/` | Multiple file content concatenation | | CTX-004 | `/return\s*\{[^}]*content:/` | Agent returning full content | -| CTX-005 | File length > 5000 chars without summarize | Long prompt without compression | +| CTX-005 | File > 5000 chars without summarization | Long prompts | -**Impact Levels**: -- **Critical**: Context exceeds model limit (128K tokens) -- **High**: Context > 50K tokens per iteration -- **Medium**: Context grows 10%+ per iteration -- **Low**: Potential for growth but currently manageable +**Impact**: Critical (>128K tokens), High (>50K per iteration), Medium (10%+ growth), Low (manageable) --- -### 2. Long-tail Forgetting (P3) +## 2. Long-tail Forgetting (P4) -**Definition**: Loss of early instructions, constraints, or goals in long execution chains. - -**Root Causes**: -- No explicit constraint propagation -- Reliance on implicit context -- Missing checkpoint/restore mechanisms -- State schema without requirements field +**Definition**: Loss of early instructions/constraints in long chains. **Detection Patterns**: -| Pattern ID | Regex/Check | Description | -|------------|-------------|-------------| -| MEM-001 | Later phases missing constraint reference | Constraint not carried forward | +| Pattern ID | Check | Description | +|------------|-------|-------------| +| MEM-001 | Later phases missing constraint reference | Constraint not forwarded | | MEM-002 | `/\[TASK\][^[]*(?!\[CONSTRAINTS\])/` | Task without constraints section | | MEM-003 | Key phases without checkpoint | Missing state preservation | -| MEM-004 | State schema lacks `original_requirements` | No constraint persistence | +| MEM-004 | State lacks `original_requirements` | No constraint persistence | | MEM-005 | No verification phase | Output not checked against intent | -**Impact Levels**: -- **Critical**: Original goal completely lost -- **High**: Key constraints ignored in output -- **Medium**: Some requirements missing -- **Low**: Minor goal drift +**Impact**: Critical (goal lost), High (constraints ignored), Medium (some missing), Low (minor drift) --- -### 3. Data Flow Disruption (P0) +## 3. Data Flow Disruption (P1) -**Definition**: Inconsistent state management causing data loss or corruption. - -**Root Causes**: -- Multiple state storage locations -- Inconsistent field naming -- Missing schema validation -- Format transformation without normalization +**Definition**: Inconsistent state management causing data loss/corruption. **Detection Patterns**: -| Pattern ID | Regex/Check | Description | -|------------|-------------|-------------| +| Pattern ID | Check | Description | +|------------|-------|-------------| | DF-001 | Multiple state file writes | Scattered state storage | | DF-002 | Same concept, different names | Field naming inconsistency | | DF-003 | JSON.parse without validation | Missing schema validation | | DF-004 | Files written but never read | Orphaned outputs | | DF-005 | Autonomous skill without state-schema | Undefined state structure | -**Impact Levels**: -- **Critical**: Data loss or corruption -- **High**: State inconsistency between phases -- **Medium**: Potential for inconsistency -- **Low**: Minor naming inconsistencies +**Impact**: Critical (data loss), High (state inconsistency), Medium (potential inconsistency), Low (naming) --- -### 4. Agent Coordination Failure (P1) +## 4. Agent Coordination Failure (P2) **Definition**: Fragile agent call patterns causing cascading failures. -**Root Causes**: -- Missing error handling in Task calls -- No result validation -- Inconsistent agent configurations -- Deeply nested agent calls - **Detection Patterns**: -| Pattern ID | Regex/Check | Description | -|------------|-------------|-------------| +| Pattern ID | Check | Description | +|------------|-------|-------------| | AGT-001 | Task without try-catch | Missing error handling | | AGT-002 | Result used without validation | No return value check | -| AGT-003 | > 3 different agent types | Agent type proliferation | +| AGT-003 | >3 different agent types | Agent type proliferation | | AGT-004 | Nested Task in prompt | Agent calling agent | | AGT-005 | Task used but not in allowed-tools | Tool declaration mismatch | | AGT-006 | Multiple return formats | Inconsistent agent output | -**Impact Levels**: -- **Critical**: Workflow crash on agent failure -- **High**: Unpredictable agent behavior -- **Medium**: Occasional coordination issues -- **Low**: Minor inconsistencies +**Impact**: Critical (crash on failure), High (unpredictable behavior), Medium (occasional issues), Low (minor) --- -### 5. Documentation Redundancy (P5) +## 5. Documentation Redundancy (P6) -**Definition**: 同一定义(如 State Schema、映射表、类型定义)在多个文件中重复出现,导致维护困难和不一致风险。 - -**Root Causes**: -- 缺乏单一真相来源 (SSOT) -- 复制粘贴代替引用 -- 硬编码配置代替集中管理 +**Definition**: Same definition (State Schema, mappings, types) repeated across files. **Detection Patterns**: -| Pattern ID | Regex/Check | Description | -|------------|-------------|-------------| -| DOC-RED-001 | 跨文件语义比较 | 找到 State Schema 等核心概念的重复定义 | -| DOC-RED-002 | 代码块 vs 规范表对比 | action 文件中硬编码与 spec 文档的重复 | -| DOC-RED-003 | `/interface\s+(\w+)/` 同名扫描 | 多处定义的 interface/type | +| Pattern ID | Check | Description | +|------------|-------|-------------| +| DOC-RED-001 | Cross-file semantic comparison | State Schema duplication | +| DOC-RED-002 | Code block vs spec comparison | Hardcoded config duplication | +| DOC-RED-003 | `/interface\s+(\w+)/` same-name scan | Interface/type duplication | -**Impact Levels**: -- **High**: 核心定义(State Schema, 映射表)重复 -- **Medium**: 类型定义重复 -- **Low**: 示例代码重复 +**Impact**: High (core definitions), Medium (type definitions), Low (example code) --- -### 6. Token Consumption (P6) +## 6. Token Consumption (P5) -**Definition**: Excessive token usage from verbose prompts, large state objects, or inefficient I/O patterns. - -**Root Causes**: -- Long static prompts without compression -- State schema with too many fields -- Full content embedding instead of path references -- Arrays growing unbounded without sliding windows -- Write-then-read file relay patterns +**Definition**: Excessive token usage from verbose prompts, large state, inefficient I/O. **Detection Patterns**: -| Pattern ID | Regex/Check | Description | -|------------|-------------|-------------| +| Pattern ID | Check | Description | +|------------|-------|-------------| | TKN-001 | File size > 4KB | Verbose prompt files | | TKN-002 | State fields > 15 | Excessive state schema | | TKN-003 | `/Read\([^)]+\)\s*[\+,]/` | Full content passing | | TKN-004 | `/.push\|concat(?!.*\.slice)/` | Unbounded array growth | | TKN-005 | `/Write\([^)]+\)[\s\S]{0,100}Read\([^)]+\)/` | Write-then-read pattern | -**Impact Levels**: -- **High**: Multiple TKN-003/TKN-004 issues causing significant token waste -- **Medium**: Several verbose files or state bloat -- **Low**: Minor optimization opportunities +**Impact**: High (multiple TKN-003/004), Medium (verbose files), Low (minor optimization) --- -### 7. Documentation Conflict (P7) +## 7. Documentation Conflict (P7) -**Definition**: 同一概念在不同文件中定义不一致,导致行为不可预测和文档误导。 - -**Root Causes**: -- 定义更新后未同步其他位置 -- 实现与文档漂移 -- 缺乏一致性校验 +**Definition**: Same concept defined inconsistently across files. **Detection Patterns**: -| Pattern ID | Regex/Check | Description | -|------------|-------------|-------------| -| DOC-CON-001 | 键值一致性校验 | 同一键(如优先级)在不同文件中值不同 | -| DOC-CON-002 | 实现 vs 文档对比 | 硬编码配置与文档对应项不一致 | +| Pattern ID | Check | Description | +|------------|-------|-------------| +| DOC-CON-001 | Key-value consistency check | Same key, different values | +| DOC-CON-002 | Implementation vs docs comparison | Hardcoded vs documented mismatch | -**Impact Levels**: -- **Critical**: 优先级/类别定义冲突 -- **High**: 策略映射不一致 -- **Medium**: 示例与实际不符 +**Impact**: Critical (priority/category conflicts), High (strategy mapping inconsistency), Medium (example mismatch) --- -## Severity Criteria - -### Global Severity Matrix - -| Severity | Definition | Action Required | -|----------|------------|-----------------| -| **Critical** | Blocks execution or causes data loss | Immediate fix required | -| **High** | Significantly impacts reliability | Should fix before deployment | -| **Medium** | Affects quality or maintainability | Fix in next iteration | -| **Low** | Minor improvement opportunity | Optional fix | - -### Severity Calculation +## Severity Calculation ```javascript -function calculateIssueSeverity(issue) { - const weights = { - impact_on_execution: 40, // Does it block workflow? - data_integrity_risk: 30, // Can it cause data loss? - frequency: 20, // How often does it occur? - complexity_to_fix: 10 // How hard to fix? - }; - +function calculateSeverity(issue) { + const weights = { execution: 40, data_integrity: 30, frequency: 20, complexity: 10 }; let score = 0; - // Impact on execution - if (issue.blocks_execution) score += weights.impact_on_execution; - else if (issue.degrades_execution) score += weights.impact_on_execution * 0.5; - - // Data integrity - if (issue.causes_data_loss) score += weights.data_integrity_risk; - else if (issue.causes_inconsistency) score += weights.data_integrity_risk * 0.5; - - // Frequency + if (issue.blocks_execution) score += weights.execution; + if (issue.causes_data_loss) score += weights.data_integrity; if (issue.occurs_every_run) score += weights.frequency; - else if (issue.occurs_sometimes) score += weights.frequency * 0.5; + if (issue.fix_complexity === 'low') score += weights.complexity; - // Complexity (inverse - easier to fix = higher priority) - if (issue.fix_complexity === 'low') score += weights.complexity_to_fix; - else if (issue.fix_complexity === 'medium') score += weights.complexity_to_fix * 0.5; - - // Map score to severity if (score >= 70) return 'critical'; if (score >= 50) return 'high'; if (score >= 30) return 'medium'; @@ -283,36 +181,30 @@ function calculateIssueSeverity(issue) { ## Fix Mapping -| Problem Type | Recommended Strategies | Priority Order | -|--------------|----------------------|----------------| -| **Authoring Principles Violation** | eliminate_intermediate_files, minimize_state, context_passing | 1, 2, 3 | -| Context Explosion | sliding_window, path_reference, context_summarization | 1, 2, 3 | -| Long-tail Forgetting | constraint_injection, state_constraints_field, checkpoint | 1, 2, 3 | -| Data Flow Disruption | state_centralization, schema_enforcement, field_normalization | 1, 2, 3 | -| Agent Coordination | error_wrapping, result_validation, flatten_nesting | 1, 2, 3 | -| **Token Consumption** | prompt_compression, lazy_loading, output_minimization, state_field_reduction | 1, 2, 3, 4 | -| **Documentation Redundancy** | consolidate_to_ssot, centralize_mapping_config | 1, 2 | -| **Documentation Conflict** | reconcile_conflicting_definitions | 1 | +| Problem | Strategies (priority order) | +|---------|---------------------------| +| Authoring Violation | eliminate_intermediate_files, minimize_state, context_passing | +| Context Explosion | sliding_window, path_reference, context_summarization | +| Long-tail Forgetting | constraint_injection, state_constraints_field, checkpoint | +| Data Flow Disruption | state_centralization, schema_enforcement, field_normalization | +| Agent Coordination | error_wrapping, result_validation, flatten_nesting | +| Token Consumption | prompt_compression, lazy_loading, output_minimization, state_field_reduction | +| Doc Redundancy | consolidate_to_ssot, centralize_mapping_config | +| Doc Conflict | reconcile_conflicting_definitions | --- ## Cross-Category Dependencies -Some issues may trigger others: - ``` -Context Explosion ──→ Long-tail Forgetting - (Large context causes important info to be pushed out) +Context Explosion → Long-tail Forgetting + (Large context pushes important info out) -Data Flow Disruption ──→ Agent Coordination Failure - (Inconsistent data causes agents to fail) +Data Flow Disruption → Agent Coordination Failure + (Inconsistent data causes agent failures) -Agent Coordination Failure ──→ Context Explosion - (Failed retries add to context) +Agent Coordination Failure → Context Explosion + (Failed retries add to context) ``` -When fixing, address in this order: -1. **P0 Data Flow** - Foundation for other fixes -2. **P1 Agent Coordination** - Stability -3. **P2 Context Explosion** - Efficiency -4. **P3 Long-tail Forgetting** - Quality +**Fix Order**: P1 Data Flow → P2 Agent → P3 Context → P4 Memory