mirror of
https://github.com/catlog22/Claude-Code-Workflow.git
synced 2026-02-05 01:50:27 +08:00
Refactor orchestrator logic and enhance problem taxonomy
- Updated orchestrator decision logic to improve state management and action selection. - Introduced structured termination checks and action selection criteria. - Enhanced state update mechanism with sliding window for action history and error tracking. - Revised problem taxonomy for skill execution issues, consolidating categories and refining detection patterns. - Improved severity calculation method for issue prioritization. - Streamlined fix mapping strategies for better clarity and usability.
This commit is contained in:
@@ -21,19 +21,79 @@ const skillDir = `.claude/skills/${config.skill_name}`;
|
||||
|
||||
### Step 2: 创建目录结构
|
||||
|
||||
```javascript
|
||||
// 基础目录
|
||||
Bash(`mkdir -p "${skillDir}/phases"`);
|
||||
Bash(`mkdir -p "${skillDir}/specs"`);
|
||||
Bash(`mkdir -p "${skillDir}/templates"`);
|
||||
#### 基础目录(所有模式)
|
||||
|
||||
// Autonomous 模式额外目录
|
||||
```javascript
|
||||
// 基础架构
|
||||
Bash(`mkdir -p "${skillDir}/{phases,specs,templates,scripts}"`);
|
||||
```
|
||||
|
||||
#### 执行模式特定目录
|
||||
|
||||
```
|
||||
config.execution_mode
|
||||
↓
|
||||
├─ "sequential"
|
||||
│ ↓ Creates:
|
||||
│ └─ phases/ (基础目录已包含)
|
||||
│ ├─ _orchestrator.md
|
||||
│ └─ workflow.json
|
||||
│
|
||||
└─ "autonomous" | "hybrid"
|
||||
↓ Creates:
|
||||
└─ phases/actions/
|
||||
├─ state-schema.md
|
||||
└─ *.md (动作文件)
|
||||
```
|
||||
|
||||
```javascript
|
||||
// Autonomous/Hybrid 模式额外目录
|
||||
if (config.execution_mode === 'autonomous' || config.execution_mode === 'hybrid') {
|
||||
Bash(`mkdir -p "${skillDir}/phases/actions"`);
|
||||
}
|
||||
```
|
||||
|
||||
// scripts 目录(默认创建,用于存放确定性脚本)
|
||||
Bash(`mkdir -p "${skillDir}/scripts"`);
|
||||
#### Context Strategy 特定目录 (P0 增强)
|
||||
|
||||
```javascript
|
||||
// ========== P0: 根据上下文策略创建目录 ==========
|
||||
const contextStrategy = config.context_strategy || 'file';
|
||||
|
||||
if (contextStrategy === 'file') {
|
||||
// 文件策略:创建上下文持久化目录
|
||||
Bash(`mkdir -p "${skillDir}/.scratchpad-template/context"`);
|
||||
|
||||
// 创建上下文模板文件
|
||||
Write(
|
||||
`${skillDir}/.scratchpad-template/context/.gitkeep`,
|
||||
"# Runtime context storage for file-based strategy"
|
||||
);
|
||||
}
|
||||
// 内存策略无需创建目录 (in-memory only)
|
||||
```
|
||||
|
||||
**目录树视图**:
|
||||
|
||||
```
|
||||
Sequential + File Strategy:
|
||||
.claude/skills/{skill-name}/
|
||||
├── phases/
|
||||
│ ├── _orchestrator.md
|
||||
│ ├── workflow.json
|
||||
│ ├── 01-*.md
|
||||
│ └── 02-*.md
|
||||
├── .scratchpad-template/
|
||||
│ └── context/ ← File strategy persistent storage
|
||||
└── specs/
|
||||
|
||||
Autonomous + Memory Strategy:
|
||||
.claude/skills/{skill-name}/
|
||||
├── phases/
|
||||
│ ├── orchestrator.md
|
||||
│ ├── state-schema.md
|
||||
│ └── actions/
|
||||
│ └── *.md
|
||||
└── specs/
|
||||
```
|
||||
|
||||
### Step 3: 生成 SKILL.md
|
||||
|
||||
@@ -52,66 +52,93 @@ const skillRoot = '.claude/skills/skill-generator';
|
||||
```javascript
|
||||
if (config.execution_mode === 'sequential') {
|
||||
const phases = config.sequential_config.phases;
|
||||
|
||||
|
||||
// ========== P0 增强: 生成声明式编排器 ==========
|
||||
const workflowOrchestrator = generateSequentialOrchestrator(config, phases);
|
||||
Write(`${skillDir}/phases/_orchestrator.md`, workflowOrchestrator);
|
||||
|
||||
|
||||
// ========== P0 增强: 生成工作流定义 ==========
|
||||
const workflowDef = generateWorkflowDefinition(config, phases);
|
||||
Write(`${skillDir}/workflow.json`, JSON.stringify(workflowDef, null, 2));
|
||||
|
||||
// 生成各阶段文件
|
||||
|
||||
// ========== P0 增强: 生成 Phase 0 (强制规范研读) ==========
|
||||
const phase0Content = generatePhase0Spec(config);
|
||||
Write(`${skillDir}/phases/00-spec-study.md`, phase0Content);
|
||||
|
||||
// ========== 生成用户定义的各阶段文件 ==========
|
||||
for (let i = 0; i < phases.length; i++) {
|
||||
const phase = phases[i];
|
||||
const prevPhase = i > 0 ? phases[i-1] : null;
|
||||
const nextPhase = i < phases.length - 1 ? phases[i+1] : null;
|
||||
|
||||
|
||||
const content = generateSequentialPhase({
|
||||
phaseNumber: i + 1,
|
||||
phaseId: phase.id,
|
||||
phaseName: phase.name,
|
||||
phaseDescription: phase.description || `Execute ${phase.name}`,
|
||||
input: prevPhase ? prevPhase.output : "user input",
|
||||
input: prevPhase ? prevPhase.output : "phase 0 output", // Phase 0 为首个输入源
|
||||
output: phase.output,
|
||||
nextPhase: nextPhase ? nextPhase.id : null,
|
||||
config: config,
|
||||
contextStrategy: contextStrategy
|
||||
});
|
||||
|
||||
|
||||
Write(`${skillDir}/phases/${phase.id}.md`, content);
|
||||
}
|
||||
}
|
||||
|
||||
// ========== P0 增强: 声明式工作流定义 ==========
|
||||
function generateWorkflowDefinition(config, phases) {
|
||||
// ========== P0: 添加强制 Phase 0 ==========
|
||||
const phase0 = {
|
||||
id: '00-spec-study',
|
||||
name: 'Specification Study',
|
||||
order: 0,
|
||||
input: null,
|
||||
output: 'spec-study-complete.flag',
|
||||
description: '⚠️ MANDATORY: Read all specification documents before execution',
|
||||
parallel: false,
|
||||
condition: null,
|
||||
agent: {
|
||||
type: 'universal-executor',
|
||||
run_in_background: false
|
||||
}
|
||||
};
|
||||
|
||||
return {
|
||||
skill_name: config.skill_name,
|
||||
version: "1.0.0",
|
||||
execution_mode: "sequential",
|
||||
context_strategy: config.context_strategy || "file",
|
||||
|
||||
// 声明式阶段列表 (类似 software-manual 的 agents_to_run)
|
||||
phases_to_run: phases.map(p => p.id),
|
||||
|
||||
// 阶段配置
|
||||
phases: phases.map((p, i) => ({
|
||||
id: p.id,
|
||||
name: p.name,
|
||||
order: i + 1,
|
||||
input: i > 0 ? phases[i-1].output : null,
|
||||
output: p.output,
|
||||
// 可选的并行配置
|
||||
parallel: p.parallel || false,
|
||||
// 可选的条件执行
|
||||
condition: p.condition || null,
|
||||
// Agent 配置
|
||||
agent: p.agent || {
|
||||
type: "universal-executor",
|
||||
run_in_background: false
|
||||
}
|
||||
})),
|
||||
|
||||
|
||||
// ========== P0: Phase 0 置于首位 ==========
|
||||
phases_to_run: ['00-spec-study', ...phases.map(p => p.id)],
|
||||
|
||||
// ========== P0: Phase 0 + 用户定义阶段 ==========
|
||||
phases: [
|
||||
phase0,
|
||||
...phases.map((p, i) => ({
|
||||
id: p.id,
|
||||
name: p.name,
|
||||
order: i + 1,
|
||||
input: i === 0 ? phase0.output : phases[i-1].output, // 第一个阶段依赖 Phase 0
|
||||
output: p.output,
|
||||
parallel: p.parallel || false,
|
||||
condition: p.condition || null,
|
||||
// Agent 配置 (支持 LLM 集成)
|
||||
agent: p.agent || (config.llm_integration?.enabled ? {
|
||||
type: "llm",
|
||||
tool: config.llm_integration.default_tool,
|
||||
mode: config.llm_integration.mode || "analysis",
|
||||
fallback_chain: config.llm_integration.fallback_chain || [],
|
||||
run_in_background: false
|
||||
} : {
|
||||
type: "universal-executor",
|
||||
run_in_background: false
|
||||
})
|
||||
}))
|
||||
],
|
||||
|
||||
// 终止条件
|
||||
termination: {
|
||||
on_success: "all_phases_completed",
|
||||
@@ -233,10 +260,30 @@ async function executePhase(phaseId, phaseConfig, workDir) {
|
||||
|
||||
## 阶段执行计划
|
||||
|
||||
**执行流程**:
|
||||
|
||||
\`\`\`
|
||||
START
|
||||
↓
|
||||
Phase 0: Specification Study
|
||||
↓ Output: spec-study-complete.flag
|
||||
↓
|
||||
Phase 1: ${phases[0]?.name || 'First Phase'}
|
||||
↓ Output: ${phases[0]?.output || 'phase-1.json'}
|
||||
${phases.slice(1).map((p, i) => ` ↓
|
||||
Phase ${i+2}: ${p.name}
|
||||
↓ Output: ${p.output}`).join('\n')}
|
||||
↓
|
||||
COMPLETE
|
||||
\`\`\`
|
||||
|
||||
**阶段列表**:
|
||||
|
||||
| Order | Phase | Input | Output | Agent |
|
||||
|-------|-------|-------|--------|-------|
|
||||
${phases.map((p, i) =>
|
||||
`| ${i+1} | ${p.id} | ${i > 0 ? phases[i-1].output : '-'} | ${p.output} | ${p.agent?.type || 'universal-executor'} |`
|
||||
| 0 | 00-spec-study | - | spec-study-complete.flag | universal-executor |
|
||||
${phases.map((p, i) =>
|
||||
`| ${i+1} | ${p.id} | ${i === 0 ? 'spec-study-complete.flag' : phases[i-1].output} | ${p.output} | ${p.agent?.type || 'universal-executor'} |`
|
||||
).join('\n')}
|
||||
|
||||
## 错误恢复
|
||||
@@ -751,6 +798,146 @@ ${actions.sort((a, b) => (b.priority || 0) - (a.priority || 0)).map(a =>
|
||||
### Step 4: 辅助函数
|
||||
|
||||
```javascript
|
||||
// ========== P0: Phase 0 生成函数 ==========
|
||||
function generatePhase0Spec(config) {
|
||||
const skillRoot = '.claude/skills/skill-generator';
|
||||
const specsToRead = [
|
||||
'../_shared/SKILL-DESIGN-SPEC.md',
|
||||
`${skillRoot}/templates/*.md`
|
||||
];
|
||||
|
||||
return `# Phase 0: Specification Study
|
||||
|
||||
⚠️ **MANDATORY PREREQUISITE** - 此阶段不可跳过
|
||||
|
||||
## Objective
|
||||
|
||||
在生成任何文件前,完整阅读所有规范文档,理解 Skill 设计标准。
|
||||
|
||||
## Why This Matters
|
||||
|
||||
**不研读规范 (❌)**:
|
||||
\`\`\`
|
||||
跳过规范
|
||||
├─ ✗ 不符合标准
|
||||
├─ ✗ 结构混乱
|
||||
└─ ✗ 质量问题
|
||||
\`\`\`
|
||||
|
||||
**研读规范 (✅)**:
|
||||
\`\`\`
|
||||
完整研读
|
||||
├─ ✓ 标准化输出
|
||||
├─ ✓ 高质量代码
|
||||
└─ ✓ 易于维护
|
||||
\`\`\`
|
||||
|
||||
## Required Reading
|
||||
|
||||
### P0 - 核心设计规范
|
||||
|
||||
\`\`\`javascript
|
||||
// 通用设计标准 (MUST READ)
|
||||
const designSpec = Read('.claude/skills/_shared/SKILL-DESIGN-SPEC.md');
|
||||
|
||||
// 关键内容检查点:
|
||||
const checkpoints = {
|
||||
structure: '目录结构约定',
|
||||
naming: '命名规范',
|
||||
quality: '质量标准',
|
||||
output: '输出格式要求'
|
||||
};
|
||||
\`\`\`
|
||||
|
||||
### P1 - 模板文件 (生成前必读)
|
||||
|
||||
\`\`\`javascript
|
||||
// 根据执行模式加载对应模板
|
||||
const templates = {
|
||||
all: [
|
||||
'templates/skill-md.md' // SKILL.md 入口文件模板
|
||||
],
|
||||
sequential: [
|
||||
'templates/sequential-phase.md'
|
||||
],
|
||||
autonomous: [
|
||||
'templates/autonomous-orchestrator.md',
|
||||
'templates/autonomous-action.md'
|
||||
]
|
||||
};
|
||||
|
||||
const mode = '${config.execution_mode}';
|
||||
const requiredTemplates = [...templates.all, ...templates[mode]];
|
||||
|
||||
requiredTemplates.forEach(template => {
|
||||
const content = Read(\`.claude/skills/skill-generator/\${template}\`);
|
||||
// 理解模板结构、变量位置、生成规则
|
||||
});
|
||||
\`\`\`
|
||||
|
||||
## Execution
|
||||
|
||||
\`\`\`javascript
|
||||
// ========== 加载规范 ==========
|
||||
const specs = [];
|
||||
|
||||
// 1. 设计规范 (P0)
|
||||
specs.push({
|
||||
file: '../_shared/SKILL-DESIGN-SPEC.md',
|
||||
content: Read('.claude/skills/_shared/SKILL-DESIGN-SPEC.md'),
|
||||
priority: 'P0'
|
||||
});
|
||||
|
||||
// 2. 模板文件 (P1)
|
||||
const templateFiles = Glob('.claude/skills/skill-generator/templates/*.md');
|
||||
templateFiles.forEach(file => {
|
||||
specs.push({
|
||||
file: file,
|
||||
content: Read(file),
|
||||
priority: 'P1'
|
||||
});
|
||||
});
|
||||
|
||||
// ========== 内化规范 ==========
|
||||
console.log('📖 Reading specifications...');
|
||||
specs.forEach(spec => {
|
||||
console.log(\` [\${spec.priority}] \${spec.file}\`);
|
||||
// 理解内容(无需生成文件,仅内存处理)
|
||||
});
|
||||
|
||||
// ========== 生成完成标记 ==========
|
||||
const result = {
|
||||
status: 'completed',
|
||||
specs_loaded: specs.length,
|
||||
timestamp: new Date().toISOString()
|
||||
};
|
||||
|
||||
Write(\`\${workDir}/spec-study-complete.flag\`, JSON.stringify(result, null, 2));
|
||||
\`\`\`
|
||||
|
||||
## Output
|
||||
|
||||
- **标记文件**: \`spec-study-complete.flag\` (证明已完成阅读)
|
||||
- **副作用**: 内化规范知识,后续阶段遵循标准
|
||||
|
||||
## Success Criteria
|
||||
|
||||
✅ **通过标准**:
|
||||
- [ ] 已阅读 SKILL-DESIGN-SPEC.md
|
||||
- [ ] 已阅读执行模式对应的模板文件
|
||||
- [ ] 理解目录结构约定
|
||||
- [ ] 理解命名规范
|
||||
- [ ] 理解质量标准
|
||||
|
||||
## Next Phase
|
||||
|
||||
→ [Phase 1: Requirements Discovery](01-requirements-discovery.md)
|
||||
|
||||
**关键**: 只有完成规范研读后,Phase 1 才能正确收集需求并生成符合标准的配置。
|
||||
`;
|
||||
}
|
||||
|
||||
// ========== 其他辅助函数 ==========
|
||||
function toPascalCase(str) {
|
||||
return str.split('-').map(s => s.charAt(0).toUpperCase() + s.slice(1)).join('');
|
||||
}
|
||||
|
||||
@@ -6,375 +6,162 @@ allowed-tools: Task, AskUserQuestion, Read, Write, Bash, Glob, Grep, mcp__ace-to
|
||||
|
||||
# Skill Tuning
|
||||
|
||||
Universal skill diagnosis and optimization tool that identifies and resolves skill execution problems through iterative multi-agent analysis.
|
||||
Autonomous diagnosis and optimization for skill execution issues.
|
||||
|
||||
## Architecture Overview
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ Skill Tuning Architecture (Autonomous Mode + Gemini CLI) │
|
||||
├─────────────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ ⚠️ Phase 0: Specification → 阅读规范 + 理解目标 skill 结构 (强制前置) │
|
||||
│ Study │
|
||||
│ ↓ │
|
||||
│ ┌───────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ Orchestrator (状态驱动决策) │ │
|
||||
│ │ 读取诊断状态 → 选择下一步动作 → 执行 → 更新状态 → 循环直到完成 │ │
|
||||
│ └───────────────────────────────────────────────────────────────────────┘ │
|
||||
│ │ │
|
||||
│ ┌────────────┬───────────┼───────────┬────────────┬────────────┐ │
|
||||
│ ↓ ↓ ↓ ↓ ↓ ↓ │
|
||||
│ ┌──────┐ ┌──────────┐ ┌─────────┐ ┌────────┐ ┌────────┐ ┌─────────┐ │
|
||||
│ │ Init │→ │ Analyze │→ │Diagnose │ │Diagnose│ │Diagnose│ │ Gemini │ │
|
||||
│ │ │ │Requiremts│ │ Context │ │ Memory │ │DataFlow│ │Analysis │ │
|
||||
│ └──────┘ └──────────┘ └─────────┘ └────────┘ └────────┘ └─────────┘ │
|
||||
│ │ │ │ │ │ │
|
||||
│ │ └───────────┴───────────┴────────────┘ │
|
||||
│ ↓ │
|
||||
│ ┌───────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ Requirement Analysis (NEW) │ │
|
||||
│ │ • Phase 1: 维度拆解 (Gemini CLI) - 单一描述 → 多个关注维度 │ │
|
||||
│ │ • Phase 2: Spec 匹配 - 每个维度 → taxonomy + strategy │ │
|
||||
│ │ • Phase 3: 覆盖度评估 - 以"有修复策略"为满足标准 │ │
|
||||
│ │ • Phase 4: 歧义检测 - 识别多义性描述,必要时请求澄清 │ │
|
||||
│ └───────────────────────────────────────────────────────────────────────┘ │
|
||||
│ ↓ │
|
||||
│ ┌──────────────────┐ │
|
||||
│ │ Apply Fixes + │ │
|
||||
│ │ Verify Results │ │
|
||||
│ └──────────────────┘ │
|
||||
│ │
|
||||
│ ┌───────────────────────────────────────────────────────────────────────┐ │
|
||||
│ │ Gemini CLI Integration │ │
|
||||
│ │ 根据用户需求动态调用 gemini cli 进行深度分析: │ │
|
||||
│ │ • 需求维度拆解 (requirement decomposition) │ │
|
||||
│ │ • 复杂问题分析 (prompt engineering, architecture review) │ │
|
||||
│ │ • 代码模式识别 (pattern matching, anti-pattern detection) │ │
|
||||
│ │ • 修复策略生成 (fix generation, refactoring suggestions) │ │
|
||||
│ └───────────────────────────────────────────────────────────────────────┘ │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
┌─────────────────────────────────────────────────────┐
|
||||
│ Phase 0: Read Specs (mandatory) │
|
||||
│ → problem-taxonomy.md, tuning-strategies.md │
|
||||
└─────────────────────────────────────────────────────┘
|
||||
↓
|
||||
┌─────────────────────────────────────────────────────┐
|
||||
│ Orchestrator (state-driven) │
|
||||
│ Read state → Select action → Execute → Update → ✓ │
|
||||
└─────────────────────────────────────────────────────┘
|
||||
↓ ↓
|
||||
┌──────────────────────┐ ┌──────────────────┐
|
||||
│ Diagnosis Phase │ │ Gemini CLI │
|
||||
│ • Context │ │ Deep analysis │
|
||||
│ • Memory │ │ (on-demand) │
|
||||
│ • DataFlow │ │ │
|
||||
│ • Agent │ │ Complex issues │
|
||||
│ • Docs │ │ Architecture │
|
||||
│ • Token Usage │ │ Performance │
|
||||
└──────────────────────┘ └──────────────────┘
|
||||
↓
|
||||
┌───────────────────┐
|
||||
│ Fix & Verify │
|
||||
│ Apply → Re-test │
|
||||
└───────────────────┘
|
||||
```
|
||||
|
||||
## Problem Domain
|
||||
## Core Issues Detected
|
||||
|
||||
Based on comprehensive analysis, skill-tuning addresses **core skill issues** and **general optimization areas**:
|
||||
|
||||
### Core Skill Issues (自动检测)
|
||||
|
||||
| Priority | Problem | Root Cause | Solution Strategy |
|
||||
|----------|---------|------------|-------------------|
|
||||
| **P0** | Authoring Principles Violation | 中间文件存储, State膨胀, 文件中转 | eliminate_intermediate_files, minimize_state, context_passing |
|
||||
| Priority | Problem | Root Cause | Fix Strategy |
|
||||
|----------|---------|-----------|--------------|
|
||||
| **P0** | Authoring Violation | Intermediate files, state bloat, file relay | eliminate_intermediate, minimize_state |
|
||||
| **P1** | Data Flow Disruption | Scattered state, inconsistent formats | state_centralization, schema_enforcement |
|
||||
| **P2** | Agent Coordination | Fragile call chains, merge complexity | error_wrapping, result_validation |
|
||||
| **P3** | Context Explosion | Token accumulation, multi-turn bloat | sliding_window, context_summarization |
|
||||
| **P2** | Agent Coordination | Fragile chains, no error handling | error_wrapping, result_validation |
|
||||
| **P3** | Context Explosion | Unbounded history, full content passing | sliding_window, path_reference |
|
||||
| **P4** | Long-tail Forgetting | Early constraint loss | constraint_injection, checkpoint_restore |
|
||||
| **P5** | Token Consumption | Verbose prompts, excessive state, redundant I/O | prompt_compression, lazy_loading, output_minimization |
|
||||
| **P5** | Token Consumption | Verbose prompts, state bloat | prompt_compression, lazy_loading |
|
||||
|
||||
### General Optimization Areas (按需分析 via Gemini CLI)
|
||||
## Problem Categories (Detailed Specs)
|
||||
|
||||
| Category | Issues | Gemini Analysis Scope |
|
||||
|----------|--------|----------------------|
|
||||
| **Prompt Engineering** | 模糊指令, 输出格式不一致, 幻觉风险 | 提示词优化, 结构化输出设计 |
|
||||
| **Architecture** | 阶段划分不合理, 依赖混乱, 扩展性差 | 架构审查, 模块化建议 |
|
||||
| **Performance** | 执行慢, Token消耗高, 重复计算 | 性能分析, 缓存策略 |
|
||||
| **Error Handling** | 错误恢复不当, 无降级策略, 日志不足 | 容错设计, 可观测性增强 |
|
||||
| **Output Quality** | 输出不稳定, 格式漂移, 质量波动 | 质量门控, 验证机制 |
|
||||
| **User Experience** | 交互不流畅, 反馈不清晰, 进度不可见 | UX优化, 进度追踪 |
|
||||
See [specs/problem-taxonomy.md](specs/problem-taxonomy.md) for:
|
||||
- Detection patterns (regex/checks)
|
||||
- Severity calculations
|
||||
- Impact assessments
|
||||
|
||||
## Key Design Principles
|
||||
## Tuning Strategies (Detailed Specs)
|
||||
|
||||
1. **Problem-First Diagnosis**: Systematic identification before any fix attempt
|
||||
2. **Data-Driven Analysis**: Record execution traces, token counts, state snapshots
|
||||
3. **Iterative Refinement**: Multiple tuning rounds until quality gates pass
|
||||
4. **Non-Destructive**: All changes are reversible with backup checkpoints
|
||||
5. **Agent Coordination**: Use specialized sub-agents for each diagnosis type
|
||||
6. **Gemini CLI On-Demand**: Deep analysis via CLI for complex/custom issues
|
||||
See [specs/tuning-strategies.md](specs/tuning-strategies.md) for:
|
||||
- 10+ strategies per category
|
||||
- Implementation patterns
|
||||
- Verification methods
|
||||
|
||||
---
|
||||
## Workflow
|
||||
|
||||
## Gemini CLI Integration
|
||||
| Step | Action | Orchestrator Decision | Output |
|
||||
|------|--------|----------------------|--------|
|
||||
| 1 | `action-init` | status='pending' | Backup, session created |
|
||||
| 2 | `action-analyze-requirements` | After init | Required dimensions + coverage |
|
||||
| 3 | Diagnosis (6 types) | Focus areas | state.diagnosis.{type} |
|
||||
| 4 | `action-gemini-analysis` | Critical issues OR user request | Deep findings |
|
||||
| 5 | `action-generate-report` | All diagnosis complete | state.final_report |
|
||||
| 6 | `action-propose-fixes` | Issues found | state.proposed_fixes[] |
|
||||
| 7 | `action-apply-fix` | Pending fixes | Applied + verified |
|
||||
| 8 | `action-complete` | Quality gates pass | session.status='completed' |
|
||||
|
||||
根据用户需求动态调用 Gemini CLI 进行深度分析。
|
||||
## Action Reference
|
||||
|
||||
### Trigger Conditions
|
||||
| Category | Actions | Purpose |
|
||||
|----------|---------|---------|
|
||||
| **Setup** | action-init | Initialize backup, session state |
|
||||
| **Analysis** | action-analyze-requirements | Decompose user request via Gemini CLI |
|
||||
| **Diagnosis** | action-diagnose-{context,memory,dataflow,agent,docs,token_consumption} | Detect category-specific issues |
|
||||
| **Deep Analysis** | action-gemini-analysis | Gemini CLI: complex/critical issues |
|
||||
| **Reporting** | action-generate-report | Consolidate findings → final_report |
|
||||
| **Fixing** | action-propose-fixes, action-apply-fix | Generate + apply fixes |
|
||||
| **Verify** | action-verify | Re-run diagnosis, check gates |
|
||||
| **Exit** | action-complete, action-abort | Finalize or rollback |
|
||||
|
||||
| Condition | Action | CLI Mode |
|
||||
|-----------|--------|----------|
|
||||
| 用户描述复杂问题 | 调用 Gemini 分析问题根因 | `analysis` |
|
||||
| 自动诊断发现 critical 问题 | 请求深度分析确认 | `analysis` |
|
||||
| 用户请求架构审查 | 执行架构分析 | `analysis` |
|
||||
| 需要生成修复代码 | 生成修复提案 | `write` |
|
||||
| 标准策略不适用 | 请求定制化策略 | `analysis` |
|
||||
Full action details: [phases/actions/](phases/actions/)
|
||||
|
||||
### CLI Command Template
|
||||
## State Management
|
||||
|
||||
**Single source of truth**: `.workflow/.scratchpad/skill-tuning-{ts}/state.json`
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "pending|running|completed|failed",
|
||||
"target_skill": { "name": "...", "path": "..." },
|
||||
"diagnosis": {
|
||||
"context": {...},
|
||||
"memory": {...},
|
||||
"dataflow": {...},
|
||||
"agent": {...},
|
||||
"docs": {...},
|
||||
"token_consumption": {...}
|
||||
},
|
||||
"issues": [{"id":"...", "severity":"...", "category":"...", "strategy":"..."}],
|
||||
"proposed_fixes": [...],
|
||||
"applied_fixes": [...],
|
||||
"quality_gate": "pass|fail",
|
||||
"final_report": "..."
|
||||
}
|
||||
```
|
||||
|
||||
See [phases/state-schema.md](phases/state-schema.md) for complete schema.
|
||||
|
||||
## Orchestrator Logic
|
||||
|
||||
See [phases/orchestrator.md](phases/orchestrator.md) for:
|
||||
- Decision logic (termination checks → action selection)
|
||||
- State transitions
|
||||
- Error recovery
|
||||
|
||||
## Key Principles
|
||||
|
||||
1. **Problem-First**: Diagnosis before any fix
|
||||
2. **Data-Driven**: Record traces, token counts, snapshots
|
||||
3. **Iterative**: Multiple rounds until quality gates pass
|
||||
4. **Reversible**: All changes with backup checkpoints
|
||||
5. **Non-Invasive**: Minimal changes, maximum clarity
|
||||
|
||||
## Usage Examples
|
||||
|
||||
```bash
|
||||
ccw cli -p "
|
||||
PURPOSE: ${purpose}
|
||||
TASK: ${task_steps}
|
||||
MODE: ${mode}
|
||||
CONTEXT: @${skill_path}/**/*
|
||||
EXPECTED: ${expected_output}
|
||||
RULES: $(cat ~/.claude/workflows/cli-templates/protocols/${mode}-protocol.md) | ${constraints}
|
||||
" --tool gemini --mode ${mode} --cd ${skill_path}
|
||||
# Basic skill diagnosis
|
||||
/skill-tuning "Fix memory leaks in my skill"
|
||||
|
||||
# Deep analysis with Gemini
|
||||
/skill-tuning "Architecture issues in async workflow"
|
||||
|
||||
# Focus on specific areas
|
||||
/skill-tuning "Optimize token consumption and fix agent coordination"
|
||||
|
||||
# Custom issue
|
||||
/skill-tuning "My skill produces inconsistent outputs"
|
||||
```
|
||||
|
||||
### Analysis Types
|
||||
## Output
|
||||
|
||||
#### 1. Problem Root Cause Analysis
|
||||
|
||||
```bash
|
||||
ccw cli -p "
|
||||
PURPOSE: Identify root cause of skill execution issue: ${user_issue_description}
|
||||
TASK: • Analyze skill structure and phase flow • Identify anti-patterns • Trace data flow issues
|
||||
MODE: analysis
|
||||
CONTEXT: @**/*.md
|
||||
EXPECTED: JSON with { root_causes: [], patterns_found: [], recommendations: [] }
|
||||
RULES: $(cat ~/.claude/workflows/cli-templates/protocols/analysis-protocol.md) | Focus on execution flow
|
||||
" --tool gemini --mode analysis
|
||||
```
|
||||
|
||||
#### 2. Architecture Review
|
||||
|
||||
```bash
|
||||
ccw cli -p "
|
||||
PURPOSE: Review skill architecture for scalability and maintainability
|
||||
TASK: • Evaluate phase decomposition • Check state management patterns • Assess agent coordination
|
||||
MODE: analysis
|
||||
CONTEXT: @**/*.md
|
||||
EXPECTED: Architecture assessment with improvement recommendations
|
||||
RULES: $(cat ~/.claude/workflows/cli-templates/protocols/analysis-protocol.md) | Focus on modularity
|
||||
" --tool gemini --mode analysis
|
||||
```
|
||||
|
||||
#### 3. Fix Strategy Generation
|
||||
|
||||
```bash
|
||||
ccw cli -p "
|
||||
PURPOSE: Generate fix strategy for issue: ${issue_id} - ${issue_description}
|
||||
TASK: • Analyze issue context • Design fix approach • Generate implementation plan
|
||||
MODE: analysis
|
||||
CONTEXT: @**/*.md
|
||||
EXPECTED: JSON with { strategy: string, changes: [], verification_steps: [] }
|
||||
RULES: $(cat ~/.claude/workflows/cli-templates/protocols/analysis-protocol.md) | Minimal invasive changes
|
||||
" --tool gemini --mode analysis
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Mandatory Prerequisites
|
||||
|
||||
> **CRITICAL**: Read these documents before executing any action.
|
||||
|
||||
### Core Specs (Required)
|
||||
|
||||
| Document | Purpose | Priority |
|
||||
|----------|---------|----------|
|
||||
| [specs/skill-authoring-principles.md](specs/skill-authoring-principles.md) | **首要准则:简洁高效、去除存储、上下文流转** | **P0** |
|
||||
| [specs/problem-taxonomy.md](specs/problem-taxonomy.md) | Problem classification and detection patterns | **P0** |
|
||||
| [specs/tuning-strategies.md](specs/tuning-strategies.md) | Fix strategies for each problem type | **P0** |
|
||||
| [specs/dimension-mapping.md](specs/dimension-mapping.md) | Dimension to Spec mapping rules | **P0** |
|
||||
| [specs/quality-gates.md](specs/quality-gates.md) | Quality thresholds and verification criteria | P1 |
|
||||
|
||||
### Templates (Reference)
|
||||
|
||||
| Document | Purpose |
|
||||
|----------|---------|
|
||||
| [templates/diagnosis-report.md](templates/diagnosis-report.md) | Diagnosis report structure |
|
||||
| [templates/fix-proposal.md](templates/fix-proposal.md) | Fix proposal format |
|
||||
|
||||
---
|
||||
|
||||
## Execution Flow
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ Phase 0: Specification Study (强制前置 - 禁止跳过) │
|
||||
│ → Read: specs/problem-taxonomy.md (问题分类) │
|
||||
│ → Read: specs/tuning-strategies.md (调优策略) │
|
||||
│ → Read: specs/dimension-mapping.md (维度映射规则) │
|
||||
│ → Read: Target skill's SKILL.md and phases/*.md │
|
||||
│ → Output: 内化规范,理解目标 skill 结构 │
|
||||
├─────────────────────────────────────────────────────────────────────────────┤
|
||||
│ action-init: Initialize Tuning Session │
|
||||
│ → Create work directory: .workflow/.scratchpad/skill-tuning-{timestamp} │
|
||||
│ → Initialize state.json with target skill info │
|
||||
│ → Create backup of target skill files │
|
||||
├─────────────────────────────────────────────────────────────────────────────┤
|
||||
│ action-analyze-requirements: Requirement Analysis │
|
||||
│ → Phase 1: 维度拆解 (Gemini CLI) - 单一描述 → 多个关注维度 │
|
||||
│ → Phase 2: Spec 匹配 - 每个维度 → taxonomy + strategy │
|
||||
│ → Phase 3: 覆盖度评估 - 以"有修复策略"为满足标准 │
|
||||
│ → Phase 4: 歧义检测 - 识别多义性描述,必要时请求澄清 │
|
||||
│ → Output: state.json (requirement_analysis field) │
|
||||
├─────────────────────────────────────────────────────────────────────────────┤
|
||||
│ action-diagnose-*: Diagnosis Actions (context/memory/dataflow/agent/docs/ │
|
||||
│ token_consumption) │
|
||||
│ → Execute pattern-based detection for each category │
|
||||
│ → Output: state.json (diagnosis.{category} field) │
|
||||
├─────────────────────────────────────────────────────────────────────────────┤
|
||||
│ action-generate-report: Consolidated Report │
|
||||
│ → Generate markdown summary from state.diagnosis │
|
||||
│ → Prioritize issues by severity │
|
||||
│ → Output: state.json (final_report field) │
|
||||
├─────────────────────────────────────────────────────────────────────────────┤
|
||||
│ action-propose-fixes: Fix Proposal Generation │
|
||||
│ → Generate fix strategies for each issue │
|
||||
│ → Create implementation plan │
|
||||
│ → Output: state.json (proposed_fixes field) │
|
||||
├─────────────────────────────────────────────────────────────────────────────┤
|
||||
│ action-apply-fix: Apply Selected Fix │
|
||||
│ → User selects fix to apply │
|
||||
│ → Execute fix with backup │
|
||||
│ → Update state with fix result │
|
||||
├─────────────────────────────────────────────────────────────────────────────┤
|
||||
│ action-verify: Verification │
|
||||
│ → Re-run affected diagnosis │
|
||||
│ → Check quality gates │
|
||||
│ → Update iteration count │
|
||||
├─────────────────────────────────────────────────────────────────────────────┤
|
||||
│ action-complete: Finalization │
|
||||
│ → Set status='completed' │
|
||||
│ → Final report already in state.json (final_report field) │
|
||||
│ → Output: state.json (final) │
|
||||
└─────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Directory Setup
|
||||
|
||||
```javascript
|
||||
const timestamp = new Date().toISOString().slice(0,19).replace(/[-:T]/g, '');
|
||||
const workDir = `.workflow/.scratchpad/skill-tuning-${timestamp}`;
|
||||
|
||||
// Simplified: Only backups dir needed, diagnosis results go into state.json
|
||||
Bash(`mkdir -p "${workDir}/backups"`);
|
||||
```
|
||||
|
||||
## Output Structure
|
||||
|
||||
```
|
||||
.workflow/.scratchpad/skill-tuning-{timestamp}/
|
||||
├── state.json # Single source of truth (all results consolidated)
|
||||
│ ├── diagnosis.* # All diagnosis results embedded
|
||||
│ ├── issues[] # Found issues
|
||||
│ ├── proposed_fixes[] # Fix proposals
|
||||
│ └── final_report # Markdown summary (on completion)
|
||||
└── backups/
|
||||
└── {skill-name}-backup/ # Original skill files backup
|
||||
```
|
||||
|
||||
> **Token Optimization**: All outputs consolidated into state.json. No separate diagnosis files or report files.
|
||||
|
||||
## State Schema
|
||||
|
||||
详细状态结构定义请参阅 [phases/state-schema.md](phases/state-schema.md)。
|
||||
|
||||
核心状态字段:
|
||||
- `status`: 工作流状态 (pending/running/completed/failed)
|
||||
- `target_skill`: 目标 skill 信息
|
||||
- `diagnosis`: 各维度诊断结果
|
||||
- `issues`: 发现的问题列表
|
||||
- `proposed_fixes`: 建议的修复方案
|
||||
|
||||
---
|
||||
|
||||
## Action Reference Guide
|
||||
|
||||
Navigation and entry points for each action in the autonomous workflow:
|
||||
|
||||
### Core Orchestration
|
||||
|
||||
**Document**: 🔗 [phases/orchestrator.md](phases/orchestrator.md)
|
||||
|
||||
| Attribute | Value |
|
||||
|-----------|-------|
|
||||
| **Purpose** | Drive tuning workflow via state-driven action selection |
|
||||
| **Decision Logic** | Termination checks → Action preconditions → Selection |
|
||||
| **Related** | [phases/state-schema.md](phases/state-schema.md) |
|
||||
|
||||
---
|
||||
|
||||
### Initialization & Requirements
|
||||
|
||||
| Action | Document | Purpose | Preconditions |
|
||||
|--------|----------|---------|---------------|
|
||||
| **action-init** | [action-init.md](phases/actions/action-init.md) | Initialize session, backup target skill | `state.status === 'pending'` |
|
||||
| **action-analyze-requirements** | [action-analyze-requirements.md](phases/actions/action-analyze-requirements.md) | Decompose user request into dimensions via Gemini CLI | After init, before diagnosis |
|
||||
|
||||
---
|
||||
|
||||
### Diagnosis Actions
|
||||
|
||||
| Action | Document | Purpose | Detects |
|
||||
|--------|----------|---------|---------|
|
||||
| **action-diagnose-context** | [action-diagnose-context.md](phases/actions/action-diagnose-context.md) | Context explosion analysis | Token accumulation, multi-turn bloat |
|
||||
| **action-diagnose-memory** | [action-diagnose-memory.md](phases/actions/action-diagnose-memory.md) | Long-tail forgetting analysis | Early constraint loss |
|
||||
| **action-diagnose-dataflow** | [action-diagnose-dataflow.md](phases/actions/action-diagnose-dataflow.md) | Data flow analysis | State inconsistency, format drift |
|
||||
| **action-diagnose-agent** | [action-diagnose-agent.md](phases/actions/action-diagnose-agent.md) | Agent coordination analysis | Call chain failures, merge issues |
|
||||
| **action-diagnose-docs** | [action-diagnose-docs.md](phases/actions/action-diagnose-docs.md) | Documentation structure analysis | Missing specs, unclear flow |
|
||||
| **action-diagnose-token-consumption** | [action-diagnose-token-consumption.md](phases/actions/action-diagnose-token-consumption.md) | Token consumption analysis | Verbose prompts, redundant I/O |
|
||||
|
||||
---
|
||||
|
||||
### Analysis & Reporting
|
||||
|
||||
| Action | Document | Purpose | Output |
|
||||
|--------|----------|---------|--------|
|
||||
| **action-gemini-analysis** | [action-gemini-analysis.md](phases/actions/action-gemini-analysis.md) | Deep analysis via Gemini CLI | Custom issue diagnosis |
|
||||
| **action-generate-report** | [action-generate-report.md](phases/actions/action-generate-report.md) | Consolidate diagnosis results | `state.final_report` |
|
||||
| **action-propose-fixes** | [action-propose-fixes.md](phases/actions/action-propose-fixes.md) | Generate fix strategies | `state.proposed_fixes[]` |
|
||||
|
||||
---
|
||||
|
||||
### Fix & Verification
|
||||
|
||||
| Action | Document | Purpose | Preconditions |
|
||||
|--------|----------|---------|---------------|
|
||||
| **action-apply-fix** | [action-apply-fix.md](phases/actions/action-apply-fix.md) | Apply selected fix with backup | User selected fix |
|
||||
| **action-verify** | [action-verify.md](phases/actions/action-verify.md) | Re-run diagnosis, check quality gates | After fix applied |
|
||||
|
||||
---
|
||||
|
||||
### Termination
|
||||
|
||||
| Action | Document | Purpose | Trigger |
|
||||
|--------|----------|---------|---------|
|
||||
| **action-complete** | [action-complete.md](phases/actions/action-complete.md) | Finalize session with report | All quality gates pass |
|
||||
| **action-abort** | [action-abort.md](phases/actions/action-abort.md) | Abort session, restore backup | Error limit exceeded |
|
||||
|
||||
---
|
||||
|
||||
## Template Reference
|
||||
|
||||
| Template | Purpose | When Used |
|
||||
|----------|---------|-----------|
|
||||
| [templates/diagnosis-report.md](templates/diagnosis-report.md) | Diagnosis report structure | action-generate-report |
|
||||
| [templates/fix-proposal.md](templates/fix-proposal.md) | Fix proposal format | action-propose-fixes |
|
||||
|
||||
---
|
||||
After completion, review:
|
||||
- `.workflow/.scratchpad/skill-tuning-{ts}/state.json` - Full state with final_report
|
||||
- `state.final_report` - Markdown summary (in state.json)
|
||||
- `state.applied_fixes` - List of applied fixes with verification results
|
||||
|
||||
## Reference Documents
|
||||
|
||||
| Document | Purpose |
|
||||
|----------|---------|
|
||||
| [phases/orchestrator.md](phases/orchestrator.md) | Orchestrator decision logic |
|
||||
| [specs/problem-taxonomy.md](specs/problem-taxonomy.md) | Classification + detection patterns |
|
||||
| [specs/tuning-strategies.md](specs/tuning-strategies.md) | Fix implementation guide |
|
||||
| [specs/dimension-mapping.md](specs/dimension-mapping.md) | Dimension ↔ Spec mapping |
|
||||
| [specs/quality-gates.md](specs/quality-gates.md) | Quality verification criteria |
|
||||
| [phases/orchestrator.md](phases/orchestrator.md) | Workflow orchestration |
|
||||
| [phases/state-schema.md](phases/state-schema.md) | State structure definition |
|
||||
| [phases/actions/action-init.md](phases/actions/action-init.md) | Initialize tuning session |
|
||||
| [phases/actions/action-analyze-requirements.md](phases/actions/action-analyze-requirements.md) | Requirement analysis (NEW) |
|
||||
| [phases/actions/action-diagnose-context.md](phases/actions/action-diagnose-context.md) | Context explosion diagnosis |
|
||||
| [phases/actions/action-diagnose-memory.md](phases/actions/action-diagnose-memory.md) | Long-tail forgetting diagnosis |
|
||||
| [phases/actions/action-diagnose-dataflow.md](phases/actions/action-diagnose-dataflow.md) | Data flow diagnosis |
|
||||
| [phases/actions/action-diagnose-agent.md](phases/actions/action-diagnose-agent.md) | Agent coordination diagnosis |
|
||||
| [phases/actions/action-diagnose-docs.md](phases/actions/action-diagnose-docs.md) | Documentation structure diagnosis |
|
||||
| [phases/actions/action-diagnose-token-consumption.md](phases/actions/action-diagnose-token-consumption.md) | Token consumption diagnosis |
|
||||
| [phases/actions/action-generate-report.md](phases/actions/action-generate-report.md) | Report generation |
|
||||
| [phases/actions/action-propose-fixes.md](phases/actions/action-propose-fixes.md) | Fix proposal |
|
||||
| [phases/actions/action-apply-fix.md](phases/actions/action-apply-fix.md) | Fix application |
|
||||
| [phases/actions/action-verify.md](phases/actions/action-verify.md) | Verification |
|
||||
| [phases/actions/action-complete.md](phases/actions/action-complete.md) | Finalization |
|
||||
| [specs/problem-taxonomy.md](specs/problem-taxonomy.md) | Problem classification |
|
||||
| [specs/tuning-strategies.md](specs/tuning-strategies.md) | Fix strategies |
|
||||
| [specs/dimension-mapping.md](specs/dimension-mapping.md) | Dimension to Spec mapping (NEW) |
|
||||
| [specs/quality-gates.md](specs/quality-gates.md) | Quality criteria |
|
||||
| [phases/actions/](phases/actions/) | Individual action implementations |
|
||||
|
||||
@@ -1,28 +1,57 @@
|
||||
# Orchestrator
|
||||
|
||||
Autonomous orchestrator for skill-tuning workflow. Reads current state and selects the next action based on diagnosis progress and quality gates.
|
||||
State-driven orchestrator for autonomous skill-tuning workflow.
|
||||
|
||||
## Role
|
||||
|
||||
Drive the tuning workflow by:
|
||||
1. Reading current session state
|
||||
2. Selecting the appropriate next action
|
||||
3. Executing the action via sub-agent
|
||||
4. Updating state with results
|
||||
5. Repeating until termination conditions met
|
||||
Read state → Select action → Execute → Update → Repeat until termination.
|
||||
|
||||
## Decision Logic
|
||||
|
||||
### Termination Checks (priority order)
|
||||
|
||||
| Condition | Action |
|
||||
|-----------|--------|
|
||||
| `status === 'user_exit'` | null (exit) |
|
||||
| `status === 'completed'` | null (exit) |
|
||||
| `error_count >= max_errors` | action-abort |
|
||||
| `iteration_count >= max_iterations` | action-complete |
|
||||
| `quality_gate === 'pass'` | action-complete |
|
||||
|
||||
### Action Selection
|
||||
|
||||
| Priority | Condition | Action |
|
||||
|----------|-----------|--------|
|
||||
| 1 | `status === 'pending'` | action-init |
|
||||
| 2 | Init done, req analysis missing | action-analyze-requirements |
|
||||
| 3 | Req needs clarification | null (wait) |
|
||||
| 4 | Req coverage unsatisfied | action-gemini-analysis |
|
||||
| 5 | Gemini requested/critical issues | action-gemini-analysis |
|
||||
| 6 | Gemini running | null (wait) |
|
||||
| 7 | Diagnosis pending (in order) | action-diagnose-{type} |
|
||||
| 8 | All diagnosis done, no report | action-generate-report |
|
||||
| 9 | Report done, issues exist | action-propose-fixes |
|
||||
| 10 | Pending fixes exist | action-apply-fix |
|
||||
| 11 | Fixes need verification | action-verify |
|
||||
| 12 | New iteration needed | action-diagnose-context (restart) |
|
||||
| 13 | Default | action-complete |
|
||||
|
||||
**Diagnosis Order**: context → memory → dataflow → agent → docs → token_consumption
|
||||
|
||||
**Gemini Triggers**:
|
||||
- `gemini_analysis_requested === true`
|
||||
- Critical issues detected
|
||||
- Focus areas include: architecture, prompt, performance, custom
|
||||
- Second iteration with unresolved issues
|
||||
|
||||
## State Management
|
||||
|
||||
### Read State
|
||||
|
||||
```javascript
|
||||
// Read
|
||||
const state = JSON.parse(Read(`${workDir}/state.json`));
|
||||
```
|
||||
|
||||
### Update State
|
||||
|
||||
```javascript
|
||||
function updateState(updates) {
|
||||
// Update (with sliding window for history)
|
||||
function updateState(workDir, updates) {
|
||||
const state = JSON.parse(Read(`${workDir}/state.json`));
|
||||
const newState = {
|
||||
...state,
|
||||
@@ -34,344 +63,127 @@ function updateState(updates) {
|
||||
}
|
||||
```
|
||||
|
||||
## Decision Logic
|
||||
|
||||
```javascript
|
||||
function selectNextAction(state) {
|
||||
// === Termination Checks ===
|
||||
|
||||
// User exit
|
||||
if (state.status === 'user_exit') return null;
|
||||
|
||||
// Completed
|
||||
if (state.status === 'completed') return null;
|
||||
|
||||
// Error limit exceeded
|
||||
if (state.error_count >= state.max_errors) {
|
||||
return 'action-abort';
|
||||
}
|
||||
|
||||
// Max iterations exceeded
|
||||
if (state.iteration_count >= state.max_iterations) {
|
||||
return 'action-complete';
|
||||
}
|
||||
|
||||
// === Action Selection ===
|
||||
|
||||
// 1. Not initialized yet
|
||||
if (state.status === 'pending') {
|
||||
return 'action-init';
|
||||
}
|
||||
|
||||
// 1.5. Requirement analysis (在 init 后,diagnosis 前)
|
||||
if (state.status === 'running' &&
|
||||
state.completed_actions.includes('action-init') &&
|
||||
!state.completed_actions.includes('action-analyze-requirements')) {
|
||||
return 'action-analyze-requirements';
|
||||
}
|
||||
|
||||
// 1.6. 如果需求分析发现歧义需要澄清,暂停等待用户
|
||||
if (state.requirement_analysis?.status === 'needs_clarification') {
|
||||
return null; // 等待用户澄清后继续
|
||||
}
|
||||
|
||||
// 1.7. 如果需求分析覆盖度不足,优先触发 Gemini 深度分析
|
||||
if (state.requirement_analysis?.coverage?.status === 'unsatisfied' &&
|
||||
!state.completed_actions.includes('action-gemini-analysis')) {
|
||||
return 'action-gemini-analysis';
|
||||
}
|
||||
|
||||
// 2. Check if Gemini analysis is requested or needed
|
||||
if (shouldTriggerGeminiAnalysis(state)) {
|
||||
return 'action-gemini-analysis';
|
||||
}
|
||||
|
||||
// 3. Check if Gemini analysis is running
|
||||
if (state.gemini_analysis?.status === 'running') {
|
||||
// Wait for Gemini analysis to complete
|
||||
return null; // Orchestrator will be re-triggered when CLI completes
|
||||
}
|
||||
|
||||
// 4. Run diagnosis in order (only if not completed)
|
||||
const diagnosisOrder = ['context', 'memory', 'dataflow', 'agent', 'docs', 'token_consumption'];
|
||||
|
||||
for (const diagType of diagnosisOrder) {
|
||||
if (state.diagnosis[diagType] === null) {
|
||||
// Check if user wants to skip this diagnosis
|
||||
if (!state.focus_areas.length || state.focus_areas.includes(diagType)) {
|
||||
return `action-diagnose-${diagType}`;
|
||||
}
|
||||
// For docs diagnosis, also check 'all' focus_area
|
||||
if (diagType === 'docs' && state.focus_areas.includes('all')) {
|
||||
return 'action-diagnose-docs';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 5. All diagnosis complete, generate report if not done
|
||||
const allDiagnosisComplete = diagnosisOrder.every(
|
||||
d => state.diagnosis[d] !== null || !state.focus_areas.includes(d)
|
||||
);
|
||||
|
||||
if (allDiagnosisComplete && !state.completed_actions.includes('action-generate-report')) {
|
||||
return 'action-generate-report';
|
||||
}
|
||||
|
||||
// 6. Report generated, propose fixes if not done
|
||||
if (state.completed_actions.includes('action-generate-report') &&
|
||||
state.proposed_fixes.length === 0 &&
|
||||
state.issues.length > 0) {
|
||||
return 'action-propose-fixes';
|
||||
}
|
||||
|
||||
// 7. Fixes proposed, check if user wants to apply
|
||||
if (state.proposed_fixes.length > 0 && state.pending_fixes.length > 0) {
|
||||
return 'action-apply-fix';
|
||||
}
|
||||
|
||||
// 8. Fixes applied, verify
|
||||
if (state.applied_fixes.length > 0 &&
|
||||
state.applied_fixes.some(f => f.verification_result === 'pending')) {
|
||||
return 'action-verify';
|
||||
}
|
||||
|
||||
// 9. Quality gate check
|
||||
if (state.quality_gate === 'pass') {
|
||||
return 'action-complete';
|
||||
}
|
||||
|
||||
// 10. More iterations needed
|
||||
if (state.iteration_count < state.max_iterations &&
|
||||
state.quality_gate !== 'pass' &&
|
||||
state.issues.some(i => i.severity === 'critical' || i.severity === 'high')) {
|
||||
// Reset diagnosis for re-evaluation
|
||||
return 'action-diagnose-context'; // Start new iteration
|
||||
}
|
||||
|
||||
// 11. Default: complete
|
||||
return 'action-complete';
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断是否需要触发 Gemini CLI 分析
|
||||
*/
|
||||
function shouldTriggerGeminiAnalysis(state) {
|
||||
// 已完成 Gemini 分析,不再触发
|
||||
if (state.gemini_analysis?.status === 'completed') {
|
||||
return false;
|
||||
}
|
||||
|
||||
// 用户显式请求
|
||||
if (state.gemini_analysis_requested === true) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// 发现 critical 问题且未进行深度分析
|
||||
if (state.issues.some(i => i.severity === 'critical') &&
|
||||
!state.completed_actions.includes('action-gemini-analysis')) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// 用户指定了需要 Gemini 分析的 focus_areas
|
||||
const geminiAreas = ['architecture', 'prompt', 'performance', 'custom'];
|
||||
if (state.focus_areas.some(area => geminiAreas.includes(area))) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// 标准诊断完成但问题未得到解决,需要深度分析
|
||||
const diagnosisComplete = ['context', 'memory', 'dataflow', 'agent', 'docs'].every(
|
||||
d => state.diagnosis[d] !== null
|
||||
);
|
||||
if (diagnosisComplete &&
|
||||
state.issues.length > 0 &&
|
||||
state.iteration_count > 0 &&
|
||||
!state.completed_actions.includes('action-gemini-analysis')) {
|
||||
// 第二轮迭代如果问题仍存在,触发 Gemini 分析
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
```
|
||||
|
||||
## Execution Loop
|
||||
|
||||
```javascript
|
||||
async function runOrchestrator(workDir) {
|
||||
console.log('=== Skill Tuning Orchestrator Started ===');
|
||||
|
||||
let iteration = 0;
|
||||
const MAX_LOOP_ITERATIONS = 50; // Safety limit
|
||||
const MAX_LOOP = 50;
|
||||
|
||||
while (iteration < MAX_LOOP_ITERATIONS) {
|
||||
iteration++;
|
||||
|
||||
// 1. Read current state
|
||||
while (iteration++ < MAX_LOOP) {
|
||||
// 1. Read state
|
||||
const state = JSON.parse(Read(`${workDir}/state.json`));
|
||||
console.log(`[Loop ${iteration}] Status: ${state.status}, Action: ${state.current_action}`);
|
||||
|
||||
// 2. Select next action
|
||||
// 2. Select action
|
||||
const actionId = selectNextAction(state);
|
||||
if (!actionId) break;
|
||||
|
||||
if (!actionId) {
|
||||
console.log('No action selected, terminating orchestrator.');
|
||||
break;
|
||||
}
|
||||
|
||||
console.log(`[Loop ${iteration}] Executing: ${actionId}`);
|
||||
|
||||
// 3. Update state: current action
|
||||
// FIX CTX-001: sliding window for action_history (keep last 10)
|
||||
updateState({
|
||||
// 3. Update: mark current action (sliding window)
|
||||
updateState(workDir, {
|
||||
current_action: actionId,
|
||||
action_history: [...state.action_history, {
|
||||
action: actionId,
|
||||
started_at: new Date().toISOString(),
|
||||
completed_at: null,
|
||||
result: null,
|
||||
output_files: []
|
||||
}].slice(-10) // Sliding window: prevent unbounded growth
|
||||
started_at: new Date().toISOString()
|
||||
}].slice(-10) // Keep last 10
|
||||
});
|
||||
|
||||
// 4. Execute action
|
||||
try {
|
||||
const actionPrompt = Read(`phases/actions/${actionId}.md`);
|
||||
// FIX CTX-003: Pass state path + key fields only instead of full state
|
||||
|
||||
// Pass state path + key fields (not full state)
|
||||
const stateKeyInfo = {
|
||||
status: state.status,
|
||||
iteration_count: state.iteration_count,
|
||||
issues_by_severity: state.issues_by_severity,
|
||||
quality_gate: state.quality_gate,
|
||||
current_action: state.current_action,
|
||||
completed_actions: state.completed_actions,
|
||||
user_issue_description: state.user_issue_description,
|
||||
target_skill: { name: state.target_skill.name, path: state.target_skill.path }
|
||||
};
|
||||
const stateKeyJson = JSON.stringify(stateKeyInfo, null, 2);
|
||||
|
||||
const result = await Task({
|
||||
subagent_type: 'universal-executor',
|
||||
run_in_background: false,
|
||||
prompt: `
|
||||
[CONTEXT]
|
||||
You are executing action "${actionId}" for skill-tuning workflow.
|
||||
Action: ${actionId}
|
||||
Work directory: ${workDir}
|
||||
|
||||
[STATE KEY INFO]
|
||||
${stateKeyJson}
|
||||
${JSON.stringify(stateKeyInfo, null, 2)}
|
||||
|
||||
[FULL STATE PATH]
|
||||
${workDir}/state.json
|
||||
(Read full state from this file if you need additional fields)
|
||||
(Read full state from this file if needed)
|
||||
|
||||
[ACTION INSTRUCTIONS]
|
||||
${actionPrompt}
|
||||
|
||||
[OUTPUT REQUIREMENT]
|
||||
After completing the action:
|
||||
1. Write any output files to the work directory
|
||||
2. Return a JSON object with:
|
||||
- stateUpdates: object with state fields to update
|
||||
- outputFiles: array of files created
|
||||
- summary: brief description of what was done
|
||||
[OUTPUT]
|
||||
Return JSON: { stateUpdates: {}, outputFiles: [], summary: "..." }
|
||||
`
|
||||
});
|
||||
|
||||
// 5. Parse result and update state
|
||||
let actionResult;
|
||||
try {
|
||||
actionResult = JSON.parse(result);
|
||||
} catch (e) {
|
||||
actionResult = {
|
||||
stateUpdates: {},
|
||||
outputFiles: [],
|
||||
summary: result
|
||||
};
|
||||
}
|
||||
// 5. Parse result
|
||||
let actionResult = result;
|
||||
try { actionResult = JSON.parse(result); } catch {}
|
||||
|
||||
// 6. Update state: action complete
|
||||
const updatedHistory = [...state.action_history];
|
||||
updatedHistory[updatedHistory.length - 1] = {
|
||||
...updatedHistory[updatedHistory.length - 1],
|
||||
completed_at: new Date().toISOString(),
|
||||
result: 'success',
|
||||
output_files: actionResult.outputFiles || []
|
||||
};
|
||||
|
||||
updateState({
|
||||
// 6. Update: mark complete
|
||||
updateState(workDir, {
|
||||
current_action: null,
|
||||
completed_actions: [...state.completed_actions, actionId],
|
||||
action_history: updatedHistory,
|
||||
...actionResult.stateUpdates
|
||||
});
|
||||
|
||||
console.log(`[Loop ${iteration}] Completed: ${actionId}`);
|
||||
|
||||
} catch (error) {
|
||||
console.log(`[Loop ${iteration}] Error in ${actionId}: ${error.message}`);
|
||||
|
||||
// Error handling
|
||||
// FIX CTX-002: sliding window for errors (keep last 5)
|
||||
updateState({
|
||||
// Error handling (sliding window for errors)
|
||||
updateState(workDir, {
|
||||
current_action: null,
|
||||
errors: [...state.errors, {
|
||||
action: actionId,
|
||||
message: error.message,
|
||||
timestamp: new Date().toISOString(),
|
||||
recoverable: true
|
||||
}].slice(-5), // Sliding window: prevent unbounded growth
|
||||
timestamp: new Date().toISOString()
|
||||
}].slice(-5), // Keep last 5
|
||||
error_count: state.error_count + 1
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
console.log('=== Skill Tuning Orchestrator Finished ===');
|
||||
}
|
||||
```
|
||||
|
||||
## Action Catalog
|
||||
## Action Preconditions
|
||||
|
||||
| Action | Purpose | Preconditions | Effects |
|
||||
|--------|---------|---------------|---------|
|
||||
| [action-init](actions/action-init.md) | Initialize tuning session | status === 'pending' | Creates work dirs, backup, sets status='running' |
|
||||
| [action-analyze-requirements](actions/action-analyze-requirements.md) | Analyze user requirements | init completed | Sets requirement_analysis, optimizes focus_areas |
|
||||
| [action-diagnose-context](actions/action-diagnose-context.md) | Analyze context explosion | status === 'running' | Sets diagnosis.context |
|
||||
| [action-diagnose-memory](actions/action-diagnose-memory.md) | Analyze long-tail forgetting | status === 'running' | Sets diagnosis.memory |
|
||||
| [action-diagnose-dataflow](actions/action-diagnose-dataflow.md) | Analyze data flow issues | status === 'running' | Sets diagnosis.dataflow |
|
||||
| [action-diagnose-agent](actions/action-diagnose-agent.md) | Analyze agent coordination | status === 'running' | Sets diagnosis.agent |
|
||||
| [action-diagnose-docs](actions/action-diagnose-docs.md) | Analyze documentation structure | status === 'running', focus includes 'docs' | Sets diagnosis.docs |
|
||||
| [action-gemini-analysis](actions/action-gemini-analysis.md) | Deep analysis via Gemini CLI | User request OR critical issues | Sets gemini_analysis, adds issues |
|
||||
| [action-generate-report](actions/action-generate-report.md) | Generate consolidated report | All diagnoses complete | Creates tuning-report.md |
|
||||
| [action-propose-fixes](actions/action-propose-fixes.md) | Generate fix proposals | Report generated, issues > 0 | Sets proposed_fixes |
|
||||
| [action-apply-fix](actions/action-apply-fix.md) | Apply selected fix | pending_fixes > 0 | Updates applied_fixes |
|
||||
| [action-verify](actions/action-verify.md) | Verify applied fixes | applied_fixes with pending verification | Updates verification_result |
|
||||
| [action-complete](actions/action-complete.md) | Finalize session | quality_gate='pass' OR max_iterations | Sets status='completed' |
|
||||
| [action-abort](actions/action-abort.md) | Abort on errors | error_count >= max_errors | Sets status='failed' |
|
||||
| Action | Precondition |
|
||||
|--------|-------------|
|
||||
| action-init | status='pending' |
|
||||
| action-analyze-requirements | Init complete, not done |
|
||||
| action-diagnose-* | status='running', focus area includes type |
|
||||
| action-gemini-analysis | Requested OR critical issues OR high complexity |
|
||||
| action-generate-report | All diagnosis complete |
|
||||
| action-propose-fixes | Report generated, issues > 0 |
|
||||
| action-apply-fix | pending_fixes > 0 |
|
||||
| action-verify | applied_fixes with pending verification |
|
||||
| action-complete | Quality gates pass OR max iterations |
|
||||
| action-abort | error_count >= max_errors |
|
||||
|
||||
## Termination Conditions
|
||||
## User Interaction Points
|
||||
|
||||
- `status === 'completed'`: Normal completion
|
||||
- `status === 'user_exit'`: User requested exit
|
||||
- `status === 'failed'`: Unrecoverable error
|
||||
- `requirement_analysis.status === 'needs_clarification'`: Waiting for user clarification (暂停,非终止)
|
||||
- `error_count >= max_errors`: Too many errors (default: 3)
|
||||
- `iteration_count >= max_iterations`: Max iterations reached (default: 5)
|
||||
- `quality_gate === 'pass'`: All quality criteria met
|
||||
1. **action-init**: Confirm target skill, describe issue
|
||||
2. **action-propose-fixes**: Select which fixes to apply
|
||||
3. **action-verify**: Review verification, decide to continue or stop
|
||||
4. **action-complete**: Review final summary
|
||||
|
||||
## Error Recovery
|
||||
|
||||
| Error Type | Recovery Strategy |
|
||||
|------------|-------------------|
|
||||
| Error Type | Strategy |
|
||||
|------------|----------|
|
||||
| Action execution failed | Retry up to 3 times, then skip |
|
||||
| State parse error | Restore from backup |
|
||||
| File write error | Retry with alternative path |
|
||||
| User abort | Save state and exit gracefully |
|
||||
|
||||
## User Interaction Points
|
||||
## Termination Conditions
|
||||
|
||||
The orchestrator pauses for user input at these points:
|
||||
|
||||
1. **action-init**: Confirm target skill and describe issue
|
||||
2. **action-propose-fixes**: Select which fixes to apply
|
||||
3. **action-verify**: Review verification results, decide to continue or stop
|
||||
4. **action-complete**: Review final summary
|
||||
- Normal: `status === 'completed'`, `quality_gate === 'pass'`
|
||||
- User: `status === 'user_exit'`
|
||||
- Error: `status === 'failed'`, `error_count >= max_errors`
|
||||
- Iteration limit: `iteration_count >= max_iterations`
|
||||
- Clarification wait: `requirement_analysis.status === 'needs_clarification'` (pause, not terminate)
|
||||
|
||||
@@ -2,276 +2,174 @@
|
||||
|
||||
Classification of skill execution issues with detection patterns and severity criteria.
|
||||
|
||||
## When to Use
|
||||
## Quick Reference
|
||||
|
||||
| Phase | Usage | Section |
|
||||
|-------|-------|---------|
|
||||
| All Diagnosis Actions | Issue classification | All sections |
|
||||
| action-propose-fixes | Strategy selection | Fix Mapping |
|
||||
| action-generate-report | Severity assessment | Severity Criteria |
|
||||
| Category | Priority | Detection | Fix Strategy |
|
||||
|----------|----------|-----------|--------------|
|
||||
| Authoring Violation | P0 | Intermediate files, state bloat, file relay | eliminate_intermediate, minimize_state |
|
||||
| Data Flow Disruption | P1 | Scattered state, inconsistent formats | state_centralization, schema_enforcement |
|
||||
| Agent Coordination | P2 | Fragile chains, no error handling | error_wrapping, result_validation |
|
||||
| Context Explosion | P3 | Unbounded history, full content passing | sliding_window, path_reference |
|
||||
| Long-tail Forgetting | P4 | Early constraint loss | constraint_injection, checkpoint_restore |
|
||||
| Token Consumption | P5 | Verbose prompts, redundant I/O | prompt_compression, lazy_loading |
|
||||
| Doc Redundancy | P6 | Repeated definitions | consolidate_to_ssot |
|
||||
| Doc Conflict | P7 | Inconsistent definitions | reconcile_definitions |
|
||||
|
||||
---
|
||||
|
||||
## Problem Categories
|
||||
## 0. Authoring Principles Violation (P0)
|
||||
|
||||
### 0. Authoring Principles Violation (P0)
|
||||
|
||||
**Definition**: 违反 skill 撰写首要准则(简洁高效、去除存储、上下文流转)。
|
||||
|
||||
**Root Causes**:
|
||||
- 不必要的中间文件存储
|
||||
- State schema 过度膨胀
|
||||
- 文件中转代替上下文传递
|
||||
- 重复数据存储
|
||||
**Definition**: Violates skill authoring principles (simplicity, no intermediate files, context passing).
|
||||
|
||||
**Detection Patterns**:
|
||||
|
||||
| Pattern ID | Regex/Check | Description |
|
||||
|------------|-------------|-------------|
|
||||
| APV-001 | `/Write\([^)]*temp-|intermediate-/` | 中间文件写入 |
|
||||
| APV-002 | `/Write\([^)]+\)[\s\S]{0,50}Read\([^)]+\)/` | 写后立即读(文件中转) |
|
||||
| APV-003 | State schema > 15 fields | State 字段过多 |
|
||||
| APV-004 | `/_history\s*[.=].*push|concat/` | 无限增长数组 |
|
||||
| APV-005 | `/debug_|_cache|_temp/` in state | 调试/缓存字段残留 |
|
||||
| APV-006 | Same data in multiple state fields | 重复存储 |
|
||||
| Pattern ID | Check | Description |
|
||||
|------------|-------|-------------|
|
||||
| APV-001 | `/Write\([^)]*temp-\|intermediate-/` | Intermediate file writes |
|
||||
| APV-002 | `/Write\([^)]+\)[\s\S]{0,50}Read\([^)]+\)/` | Write-then-read relay |
|
||||
| APV-003 | State schema > 15 fields | Excessive state fields |
|
||||
| APV-004 | `/_history\s*[.=].*push\|concat/` | Unbounded array growth |
|
||||
| APV-005 | `/debug_\|_cache\|_temp/` in state | Debug/cache field residue |
|
||||
| APV-006 | Same data in multiple fields | Duplicate storage |
|
||||
|
||||
**Impact Levels**:
|
||||
- **Critical**: 中间文件 > 5 个,严重违反原则
|
||||
- **High**: State 字段 > 20 个,或存在文件中转
|
||||
- **Medium**: 存在调试字段或轻微冗余
|
||||
- **Low**: 轻微的命名不规范
|
||||
**Impact**: Critical (>5 intermediate files), High (>20 state fields), Medium (debug fields), Low (naming issues)
|
||||
|
||||
---
|
||||
|
||||
### 1. Context Explosion (P2)
|
||||
## 1. Context Explosion (P3)
|
||||
|
||||
**Definition**: Excessive token accumulation causing prompt size to grow unbounded.
|
||||
|
||||
**Root Causes**:
|
||||
- Unbounded conversation history
|
||||
- Full content passing instead of references
|
||||
- Missing summarization mechanisms
|
||||
- Agent returning full output instead of path+summary
|
||||
**Definition**: Unbounded token accumulation causing prompt size growth.
|
||||
|
||||
**Detection Patterns**:
|
||||
|
||||
| Pattern ID | Regex/Check | Description |
|
||||
|------------|-------------|-------------|
|
||||
| Pattern ID | Check | Description |
|
||||
|------------|-------|-------------|
|
||||
| CTX-001 | `/history\s*[.=].*push\|concat/` | History array growth |
|
||||
| CTX-002 | `/JSON\.stringify\s*\(\s*state\s*\)/` | Full state serialization |
|
||||
| CTX-003 | `/Read\([^)]+\)\s*[\+,]/` | Multiple file content concatenation |
|
||||
| CTX-004 | `/return\s*\{[^}]*content:/` | Agent returning full content |
|
||||
| CTX-005 | File length > 5000 chars without summarize | Long prompt without compression |
|
||||
| CTX-005 | File > 5000 chars without summarization | Long prompts |
|
||||
|
||||
**Impact Levels**:
|
||||
- **Critical**: Context exceeds model limit (128K tokens)
|
||||
- **High**: Context > 50K tokens per iteration
|
||||
- **Medium**: Context grows 10%+ per iteration
|
||||
- **Low**: Potential for growth but currently manageable
|
||||
**Impact**: Critical (>128K tokens), High (>50K per iteration), Medium (10%+ growth), Low (manageable)
|
||||
|
||||
---
|
||||
|
||||
### 2. Long-tail Forgetting (P3)
|
||||
## 2. Long-tail Forgetting (P4)
|
||||
|
||||
**Definition**: Loss of early instructions, constraints, or goals in long execution chains.
|
||||
|
||||
**Root Causes**:
|
||||
- No explicit constraint propagation
|
||||
- Reliance on implicit context
|
||||
- Missing checkpoint/restore mechanisms
|
||||
- State schema without requirements field
|
||||
**Definition**: Loss of early instructions/constraints in long chains.
|
||||
|
||||
**Detection Patterns**:
|
||||
|
||||
| Pattern ID | Regex/Check | Description |
|
||||
|------------|-------------|-------------|
|
||||
| MEM-001 | Later phases missing constraint reference | Constraint not carried forward |
|
||||
| Pattern ID | Check | Description |
|
||||
|------------|-------|-------------|
|
||||
| MEM-001 | Later phases missing constraint reference | Constraint not forwarded |
|
||||
| MEM-002 | `/\[TASK\][^[]*(?!\[CONSTRAINTS\])/` | Task without constraints section |
|
||||
| MEM-003 | Key phases without checkpoint | Missing state preservation |
|
||||
| MEM-004 | State schema lacks `original_requirements` | No constraint persistence |
|
||||
| MEM-004 | State lacks `original_requirements` | No constraint persistence |
|
||||
| MEM-005 | No verification phase | Output not checked against intent |
|
||||
|
||||
**Impact Levels**:
|
||||
- **Critical**: Original goal completely lost
|
||||
- **High**: Key constraints ignored in output
|
||||
- **Medium**: Some requirements missing
|
||||
- **Low**: Minor goal drift
|
||||
**Impact**: Critical (goal lost), High (constraints ignored), Medium (some missing), Low (minor drift)
|
||||
|
||||
---
|
||||
|
||||
### 3. Data Flow Disruption (P0)
|
||||
## 3. Data Flow Disruption (P1)
|
||||
|
||||
**Definition**: Inconsistent state management causing data loss or corruption.
|
||||
|
||||
**Root Causes**:
|
||||
- Multiple state storage locations
|
||||
- Inconsistent field naming
|
||||
- Missing schema validation
|
||||
- Format transformation without normalization
|
||||
**Definition**: Inconsistent state management causing data loss/corruption.
|
||||
|
||||
**Detection Patterns**:
|
||||
|
||||
| Pattern ID | Regex/Check | Description |
|
||||
|------------|-------------|-------------|
|
||||
| Pattern ID | Check | Description |
|
||||
|------------|-------|-------------|
|
||||
| DF-001 | Multiple state file writes | Scattered state storage |
|
||||
| DF-002 | Same concept, different names | Field naming inconsistency |
|
||||
| DF-003 | JSON.parse without validation | Missing schema validation |
|
||||
| DF-004 | Files written but never read | Orphaned outputs |
|
||||
| DF-005 | Autonomous skill without state-schema | Undefined state structure |
|
||||
|
||||
**Impact Levels**:
|
||||
- **Critical**: Data loss or corruption
|
||||
- **High**: State inconsistency between phases
|
||||
- **Medium**: Potential for inconsistency
|
||||
- **Low**: Minor naming inconsistencies
|
||||
**Impact**: Critical (data loss), High (state inconsistency), Medium (potential inconsistency), Low (naming)
|
||||
|
||||
---
|
||||
|
||||
### 4. Agent Coordination Failure (P1)
|
||||
## 4. Agent Coordination Failure (P2)
|
||||
|
||||
**Definition**: Fragile agent call patterns causing cascading failures.
|
||||
|
||||
**Root Causes**:
|
||||
- Missing error handling in Task calls
|
||||
- No result validation
|
||||
- Inconsistent agent configurations
|
||||
- Deeply nested agent calls
|
||||
|
||||
**Detection Patterns**:
|
||||
|
||||
| Pattern ID | Regex/Check | Description |
|
||||
|------------|-------------|-------------|
|
||||
| Pattern ID | Check | Description |
|
||||
|------------|-------|-------------|
|
||||
| AGT-001 | Task without try-catch | Missing error handling |
|
||||
| AGT-002 | Result used without validation | No return value check |
|
||||
| AGT-003 | > 3 different agent types | Agent type proliferation |
|
||||
| AGT-003 | >3 different agent types | Agent type proliferation |
|
||||
| AGT-004 | Nested Task in prompt | Agent calling agent |
|
||||
| AGT-005 | Task used but not in allowed-tools | Tool declaration mismatch |
|
||||
| AGT-006 | Multiple return formats | Inconsistent agent output |
|
||||
|
||||
**Impact Levels**:
|
||||
- **Critical**: Workflow crash on agent failure
|
||||
- **High**: Unpredictable agent behavior
|
||||
- **Medium**: Occasional coordination issues
|
||||
- **Low**: Minor inconsistencies
|
||||
**Impact**: Critical (crash on failure), High (unpredictable behavior), Medium (occasional issues), Low (minor)
|
||||
|
||||
---
|
||||
|
||||
### 5. Documentation Redundancy (P5)
|
||||
## 5. Documentation Redundancy (P6)
|
||||
|
||||
**Definition**: 同一定义(如 State Schema、映射表、类型定义)在多个文件中重复出现,导致维护困难和不一致风险。
|
||||
|
||||
**Root Causes**:
|
||||
- 缺乏单一真相来源 (SSOT)
|
||||
- 复制粘贴代替引用
|
||||
- 硬编码配置代替集中管理
|
||||
**Definition**: Same definition (State Schema, mappings, types) repeated across files.
|
||||
|
||||
**Detection Patterns**:
|
||||
|
||||
| Pattern ID | Regex/Check | Description |
|
||||
|------------|-------------|-------------|
|
||||
| DOC-RED-001 | 跨文件语义比较 | 找到 State Schema 等核心概念的重复定义 |
|
||||
| DOC-RED-002 | 代码块 vs 规范表对比 | action 文件中硬编码与 spec 文档的重复 |
|
||||
| DOC-RED-003 | `/interface\s+(\w+)/` 同名扫描 | 多处定义的 interface/type |
|
||||
| Pattern ID | Check | Description |
|
||||
|------------|-------|-------------|
|
||||
| DOC-RED-001 | Cross-file semantic comparison | State Schema duplication |
|
||||
| DOC-RED-002 | Code block vs spec comparison | Hardcoded config duplication |
|
||||
| DOC-RED-003 | `/interface\s+(\w+)/` same-name scan | Interface/type duplication |
|
||||
|
||||
**Impact Levels**:
|
||||
- **High**: 核心定义(State Schema, 映射表)重复
|
||||
- **Medium**: 类型定义重复
|
||||
- **Low**: 示例代码重复
|
||||
**Impact**: High (core definitions), Medium (type definitions), Low (example code)
|
||||
|
||||
---
|
||||
|
||||
### 6. Token Consumption (P6)
|
||||
## 6. Token Consumption (P5)
|
||||
|
||||
**Definition**: Excessive token usage from verbose prompts, large state objects, or inefficient I/O patterns.
|
||||
|
||||
**Root Causes**:
|
||||
- Long static prompts without compression
|
||||
- State schema with too many fields
|
||||
- Full content embedding instead of path references
|
||||
- Arrays growing unbounded without sliding windows
|
||||
- Write-then-read file relay patterns
|
||||
**Definition**: Excessive token usage from verbose prompts, large state, inefficient I/O.
|
||||
|
||||
**Detection Patterns**:
|
||||
|
||||
| Pattern ID | Regex/Check | Description |
|
||||
|------------|-------------|-------------|
|
||||
| Pattern ID | Check | Description |
|
||||
|------------|-------|-------------|
|
||||
| TKN-001 | File size > 4KB | Verbose prompt files |
|
||||
| TKN-002 | State fields > 15 | Excessive state schema |
|
||||
| TKN-003 | `/Read\([^)]+\)\s*[\+,]/` | Full content passing |
|
||||
| TKN-004 | `/.push\|concat(?!.*\.slice)/` | Unbounded array growth |
|
||||
| TKN-005 | `/Write\([^)]+\)[\s\S]{0,100}Read\([^)]+\)/` | Write-then-read pattern |
|
||||
|
||||
**Impact Levels**:
|
||||
- **High**: Multiple TKN-003/TKN-004 issues causing significant token waste
|
||||
- **Medium**: Several verbose files or state bloat
|
||||
- **Low**: Minor optimization opportunities
|
||||
**Impact**: High (multiple TKN-003/004), Medium (verbose files), Low (minor optimization)
|
||||
|
||||
---
|
||||
|
||||
### 7. Documentation Conflict (P7)
|
||||
## 7. Documentation Conflict (P7)
|
||||
|
||||
**Definition**: 同一概念在不同文件中定义不一致,导致行为不可预测和文档误导。
|
||||
|
||||
**Root Causes**:
|
||||
- 定义更新后未同步其他位置
|
||||
- 实现与文档漂移
|
||||
- 缺乏一致性校验
|
||||
**Definition**: Same concept defined inconsistently across files.
|
||||
|
||||
**Detection Patterns**:
|
||||
|
||||
| Pattern ID | Regex/Check | Description |
|
||||
|------------|-------------|-------------|
|
||||
| DOC-CON-001 | 键值一致性校验 | 同一键(如优先级)在不同文件中值不同 |
|
||||
| DOC-CON-002 | 实现 vs 文档对比 | 硬编码配置与文档对应项不一致 |
|
||||
| Pattern ID | Check | Description |
|
||||
|------------|-------|-------------|
|
||||
| DOC-CON-001 | Key-value consistency check | Same key, different values |
|
||||
| DOC-CON-002 | Implementation vs docs comparison | Hardcoded vs documented mismatch |
|
||||
|
||||
**Impact Levels**:
|
||||
- **Critical**: 优先级/类别定义冲突
|
||||
- **High**: 策略映射不一致
|
||||
- **Medium**: 示例与实际不符
|
||||
**Impact**: Critical (priority/category conflicts), High (strategy mapping inconsistency), Medium (example mismatch)
|
||||
|
||||
---
|
||||
|
||||
## Severity Criteria
|
||||
|
||||
### Global Severity Matrix
|
||||
|
||||
| Severity | Definition | Action Required |
|
||||
|----------|------------|-----------------|
|
||||
| **Critical** | Blocks execution or causes data loss | Immediate fix required |
|
||||
| **High** | Significantly impacts reliability | Should fix before deployment |
|
||||
| **Medium** | Affects quality or maintainability | Fix in next iteration |
|
||||
| **Low** | Minor improvement opportunity | Optional fix |
|
||||
|
||||
### Severity Calculation
|
||||
## Severity Calculation
|
||||
|
||||
```javascript
|
||||
function calculateIssueSeverity(issue) {
|
||||
const weights = {
|
||||
impact_on_execution: 40, // Does it block workflow?
|
||||
data_integrity_risk: 30, // Can it cause data loss?
|
||||
frequency: 20, // How often does it occur?
|
||||
complexity_to_fix: 10 // How hard to fix?
|
||||
};
|
||||
|
||||
function calculateSeverity(issue) {
|
||||
const weights = { execution: 40, data_integrity: 30, frequency: 20, complexity: 10 };
|
||||
let score = 0;
|
||||
|
||||
// Impact on execution
|
||||
if (issue.blocks_execution) score += weights.impact_on_execution;
|
||||
else if (issue.degrades_execution) score += weights.impact_on_execution * 0.5;
|
||||
|
||||
// Data integrity
|
||||
if (issue.causes_data_loss) score += weights.data_integrity_risk;
|
||||
else if (issue.causes_inconsistency) score += weights.data_integrity_risk * 0.5;
|
||||
|
||||
// Frequency
|
||||
if (issue.blocks_execution) score += weights.execution;
|
||||
if (issue.causes_data_loss) score += weights.data_integrity;
|
||||
if (issue.occurs_every_run) score += weights.frequency;
|
||||
else if (issue.occurs_sometimes) score += weights.frequency * 0.5;
|
||||
if (issue.fix_complexity === 'low') score += weights.complexity;
|
||||
|
||||
// Complexity (inverse - easier to fix = higher priority)
|
||||
if (issue.fix_complexity === 'low') score += weights.complexity_to_fix;
|
||||
else if (issue.fix_complexity === 'medium') score += weights.complexity_to_fix * 0.5;
|
||||
|
||||
// Map score to severity
|
||||
if (score >= 70) return 'critical';
|
||||
if (score >= 50) return 'high';
|
||||
if (score >= 30) return 'medium';
|
||||
@@ -283,36 +181,30 @@ function calculateIssueSeverity(issue) {
|
||||
|
||||
## Fix Mapping
|
||||
|
||||
| Problem Type | Recommended Strategies | Priority Order |
|
||||
|--------------|----------------------|----------------|
|
||||
| **Authoring Principles Violation** | eliminate_intermediate_files, minimize_state, context_passing | 1, 2, 3 |
|
||||
| Context Explosion | sliding_window, path_reference, context_summarization | 1, 2, 3 |
|
||||
| Long-tail Forgetting | constraint_injection, state_constraints_field, checkpoint | 1, 2, 3 |
|
||||
| Data Flow Disruption | state_centralization, schema_enforcement, field_normalization | 1, 2, 3 |
|
||||
| Agent Coordination | error_wrapping, result_validation, flatten_nesting | 1, 2, 3 |
|
||||
| **Token Consumption** | prompt_compression, lazy_loading, output_minimization, state_field_reduction | 1, 2, 3, 4 |
|
||||
| **Documentation Redundancy** | consolidate_to_ssot, centralize_mapping_config | 1, 2 |
|
||||
| **Documentation Conflict** | reconcile_conflicting_definitions | 1 |
|
||||
| Problem | Strategies (priority order) |
|
||||
|---------|---------------------------|
|
||||
| Authoring Violation | eliminate_intermediate_files, minimize_state, context_passing |
|
||||
| Context Explosion | sliding_window, path_reference, context_summarization |
|
||||
| Long-tail Forgetting | constraint_injection, state_constraints_field, checkpoint |
|
||||
| Data Flow Disruption | state_centralization, schema_enforcement, field_normalization |
|
||||
| Agent Coordination | error_wrapping, result_validation, flatten_nesting |
|
||||
| Token Consumption | prompt_compression, lazy_loading, output_minimization, state_field_reduction |
|
||||
| Doc Redundancy | consolidate_to_ssot, centralize_mapping_config |
|
||||
| Doc Conflict | reconcile_conflicting_definitions |
|
||||
|
||||
---
|
||||
|
||||
## Cross-Category Dependencies
|
||||
|
||||
Some issues may trigger others:
|
||||
|
||||
```
|
||||
Context Explosion ──→ Long-tail Forgetting
|
||||
(Large context causes important info to be pushed out)
|
||||
Context Explosion → Long-tail Forgetting
|
||||
(Large context pushes important info out)
|
||||
|
||||
Data Flow Disruption ──→ Agent Coordination Failure
|
||||
(Inconsistent data causes agents to fail)
|
||||
Data Flow Disruption → Agent Coordination Failure
|
||||
(Inconsistent data causes agent failures)
|
||||
|
||||
Agent Coordination Failure ──→ Context Explosion
|
||||
(Failed retries add to context)
|
||||
Agent Coordination Failure → Context Explosion
|
||||
(Failed retries add to context)
|
||||
```
|
||||
|
||||
When fixing, address in this order:
|
||||
1. **P0 Data Flow** - Foundation for other fixes
|
||||
2. **P1 Agent Coordination** - Stability
|
||||
3. **P2 Context Explosion** - Efficiency
|
||||
4. **P3 Long-tail Forgetting** - Quality
|
||||
**Fix Order**: P1 Data Flow → P2 Agent → P3 Context → P4 Memory
|
||||
|
||||
Reference in New Issue
Block a user