From 9e55f51501f645b1e7fba140121c1d63130395ec Mon Sep 17 00:00:00 2001 From: catlog22 Date: Wed, 14 Jan 2026 13:42:57 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E9=9C=80=E6=B1=82?= =?UTF-8?q?=E5=88=86=E6=9E=90=E5=8A=9F=E8=83=BD=EF=BC=8C=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E7=BB=B4=E5=BA=A6=E6=8B=86=E8=A7=A3=E3=80=81=E8=A6=86=E7=9B=96?= =?UTF-8?q?=E5=BA=A6=E8=AF=84=E4=BC=B0=E5=92=8C=E6=AD=A7=E4=B9=89=E6=A3=80?= =?UTF-8?q?=E6=B5=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .claude/skills/skill-tuning/SKILL.md | 33 +- .../actions/action-analyze-requirements.md | 436 ++++++++++++++++++ .../skill-tuning/phases/orchestrator.md | 20 + .../skill-tuning/phases/state-schema.md | 59 ++- .../skill-tuning/specs/dimension-mapping.md | 206 +++++++++ .../skill-tuning/specs/problem-taxonomy.md | 30 ++ .../specs/skill-authoring-principles.md | 189 ++++++++ .../skill-tuning/specs/tuning-strategies.md | 149 ++++++ 8 files changed, 1115 insertions(+), 7 deletions(-) create mode 100644 .claude/skills/skill-tuning/phases/actions/action-analyze-requirements.md create mode 100644 .claude/skills/skill-tuning/specs/dimension-mapping.md create mode 100644 .claude/skills/skill-tuning/specs/skill-authoring-principles.md diff --git a/.claude/skills/skill-tuning/SKILL.md b/.claude/skills/skill-tuning/SKILL.md index ca64efec..70b312ee 100644 --- a/.claude/skills/skill-tuning/SKILL.md +++ b/.claude/skills/skill-tuning/SKILL.md @@ -25,12 +25,20 @@ Universal skill diagnosis and optimization tool that identifies and resolves ski │ │ │ │ ┌────────────┬───────────┼───────────┬────────────┬────────────┐ │ │ ↓ ↓ ↓ ↓ ↓ ↓ │ -│ ┌──────┐ ┌─────────┐ ┌────────┐ ┌────────┐ ┌────────┐ ┌─────────┐ │ -│ │ Init │ │Diagnose │ │Diagnose│ │Diagnose│ │Diagnose│ │ Gemini │ │ -│ │ │ │ Context │ │ Memory │ │DataFlow│ │ Agent │ │Analysis │ │ -│ └──────┘ └─────────┘ └────────┘ └────────┘ └────────┘ └─────────┘ │ -│ │ │ │ │ │ │ │ -│ └───────────┴───────────┴───────────┴────────────┴────────────┘ │ +│ ┌──────┐ ┌──────────┐ ┌─────────┐ ┌────────┐ ┌────────┐ ┌─────────┐ │ +│ │ Init │→ │ Analyze │→ │Diagnose │ │Diagnose│ │Diagnose│ │ Gemini │ │ +│ │ │ │Requiremts│ │ Context │ │ Memory │ │DataFlow│ │Analysis │ │ +│ └──────┘ └──────────┘ └─────────┘ └────────┘ └────────┘ └─────────┘ │ +│ │ │ │ │ │ │ +│ │ └───────────┴───────────┴────────────┘ │ +│ ↓ │ +│ ┌───────────────────────────────────────────────────────────────────────┐ │ +│ │ Requirement Analysis (NEW) │ │ +│ │ • Phase 1: 维度拆解 (Gemini CLI) - 单一描述 → 多个关注维度 │ │ +│ │ • Phase 2: Spec 匹配 - 每个维度 → taxonomy + strategy │ │ +│ │ • Phase 3: 覆盖度评估 - 以"有修复策略"为满足标准 │ │ +│ │ • Phase 4: 歧义检测 - 识别多义性描述,必要时请求澄清 │ │ +│ └───────────────────────────────────────────────────────────────────────┘ │ │ ↓ │ │ ┌──────────────────┐ │ │ │ Apply Fixes + │ │ @@ -40,6 +48,7 @@ Universal skill diagnosis and optimization tool that identifies and resolves ski │ ┌───────────────────────────────────────────────────────────────────────┐ │ │ │ Gemini CLI Integration │ │ │ │ 根据用户需求动态调用 gemini cli 进行深度分析: │ │ +│ │ • 需求维度拆解 (requirement decomposition) │ │ │ │ • 复杂问题分析 (prompt engineering, architecture review) │ │ │ │ • 代码模式识别 (pattern matching, anti-pattern detection) │ │ │ │ • 修复策略生成 (fix generation, refactoring suggestions) │ │ @@ -161,8 +170,10 @@ RULES: $(cat ~/.claude/workflows/cli-templates/protocols/analysis-protocol.md) | | Document | Purpose | Priority | |----------|---------|----------| +| [specs/skill-authoring-principles.md](specs/skill-authoring-principles.md) | **首要准则:简洁高效、去除存储、上下文流转** | **P0** | | [specs/problem-taxonomy.md](specs/problem-taxonomy.md) | Problem classification and detection patterns | **P0** | | [specs/tuning-strategies.md](specs/tuning-strategies.md) | Fix strategies for each problem type | **P0** | +| [specs/dimension-mapping.md](specs/dimension-mapping.md) | Dimension to Spec mapping rules | **P0** | | [specs/quality-gates.md](specs/quality-gates.md) | Quality thresholds and verification criteria | P1 | ### Templates (Reference) @@ -181,6 +192,7 @@ RULES: $(cat ~/.claude/workflows/cli-templates/protocols/analysis-protocol.md) | │ Phase 0: Specification Study (强制前置 - 禁止跳过) │ │ → Read: specs/problem-taxonomy.md (问题分类) │ │ → Read: specs/tuning-strategies.md (调优策略) │ +│ → Read: specs/dimension-mapping.md (维度映射规则) │ │ → Read: Target skill's SKILL.md and phases/*.md │ │ → Output: 内化规范,理解目标 skill 结构 │ ├─────────────────────────────────────────────────────────────────────────────┤ @@ -189,6 +201,13 @@ RULES: $(cat ~/.claude/workflows/cli-templates/protocols/analysis-protocol.md) | │ → Initialize state.json with target skill info │ │ → Create backup of target skill files │ ├─────────────────────────────────────────────────────────────────────────────┤ +│ action-analyze-requirements: Requirement Analysis (NEW) │ +│ → Phase 1: 维度拆解 (Gemini CLI) - 单一描述 → 多个关注维度 │ +│ → Phase 2: Spec 匹配 - 每个维度 → taxonomy + strategy │ +│ → Phase 3: 覆盖度评估 - 以"有修复策略"为满足标准 │ +│ → Phase 4: 歧义检测 - 识别多义性描述,必要时请求澄清 │ +│ → Output: requirement-analysis.json, 自动优化 focus_areas │ +├─────────────────────────────────────────────────────────────────────────────┤ │ action-diagnose-context: Context Explosion Analysis │ │ → Scan for token accumulation patterns │ │ → Detect multi-turn dialogue growth │ @@ -328,6 +347,7 @@ interface Fix { | [phases/orchestrator.md](phases/orchestrator.md) | Orchestrator decision logic | | [phases/state-schema.md](phases/state-schema.md) | State structure definition | | [phases/actions/action-init.md](phases/actions/action-init.md) | Initialize tuning session | +| [phases/actions/action-analyze-requirements.md](phases/actions/action-analyze-requirements.md) | Requirement analysis (NEW) | | [phases/actions/action-diagnose-context.md](phases/actions/action-diagnose-context.md) | Context explosion diagnosis | | [phases/actions/action-diagnose-memory.md](phases/actions/action-diagnose-memory.md) | Long-tail forgetting diagnosis | | [phases/actions/action-diagnose-dataflow.md](phases/actions/action-diagnose-dataflow.md) | Data flow diagnosis | @@ -339,4 +359,5 @@ interface Fix { | [phases/actions/action-complete.md](phases/actions/action-complete.md) | Finalization | | [specs/problem-taxonomy.md](specs/problem-taxonomy.md) | Problem classification | | [specs/tuning-strategies.md](specs/tuning-strategies.md) | Fix strategies | +| [specs/dimension-mapping.md](specs/dimension-mapping.md) | Dimension to Spec mapping (NEW) | | [specs/quality-gates.md](specs/quality-gates.md) | Quality criteria | diff --git a/.claude/skills/skill-tuning/phases/actions/action-analyze-requirements.md b/.claude/skills/skill-tuning/phases/actions/action-analyze-requirements.md new file mode 100644 index 00000000..ea36a35c --- /dev/null +++ b/.claude/skills/skill-tuning/phases/actions/action-analyze-requirements.md @@ -0,0 +1,436 @@ +# Action: Analyze Requirements + +将用户问题描述拆解为多个分析维度,匹配 Spec,评估覆盖度,检测歧义。 + +## Purpose + +- 将单一用户描述拆解为多个独立关注维度 +- 为每个维度匹配 problem-taxonomy(检测)+ tuning-strategies(修复) +- 以"有修复策略"为标准判断是否满足需求 +- 检测歧义并在必要时请求用户澄清 + +## Preconditions + +- [ ] `state.status === 'running'` +- [ ] `state.target_skill !== null` +- [ ] `state.completed_actions.includes('action-init')` +- [ ] `!state.completed_actions.includes('action-analyze-requirements')` + +## Execution + +### Phase 1: 维度拆解 (Gemini CLI) + +调用 Gemini 对用户描述进行语义分析,拆解为独立维度: + +```javascript +async function analyzeDimensions(state, workDir) { + const prompt = ` +PURPOSE: 分析用户问题描述,拆解为独立的关注维度 +TASK: +• 识别用户描述中的多个关注点(每个关注点应该是独立的、可单独分析的) +• 为每个关注点提取关键词(中英文均可) +• 推断可能的问题类别: + - context_explosion: 上下文/Token 相关 + - memory_loss: 遗忘/约束丢失相关 + - dataflow_break: 状态/数据流相关 + - agent_failure: Agent/子任务相关 + - prompt_quality: 提示词/输出质量相关 + - architecture: 架构/结构相关 + - performance: 性能/效率相关 + - error_handling: 错误/异常处理相关 + - output_quality: 输出质量/验证相关 + - user_experience: 交互/体验相关 +• 评估推断置信度 (0-1) + +INPUT: +User description: ${state.user_issue_description} +Target skill: ${state.target_skill.name} +Skill structure: ${JSON.stringify(state.target_skill.phases)} + +MODE: analysis +CONTEXT: @specs/problem-taxonomy.md @specs/dimension-mapping.md +EXPECTED: JSON (不要包含 markdown 代码块标记) +{ + "dimensions": [ + { + "id": "DIM-001", + "description": "关注点的简短描述", + "keywords": ["关键词1", "关键词2"], + "inferred_category": "问题类别", + "confidence": 0.85, + "reasoning": "推断理由" + } + ], + "analysis_notes": "整体分析说明" +} +RULES: +- 每个维度必须独立,不重叠 +- 低于 0.5 置信度的推断应标注需要澄清 +- 如果用户描述非常模糊,至少提取一个 "general" 维度 +`; + + const cliCommand = `ccw cli -p "${escapeForShell(prompt)}" --tool gemini --mode analysis --cd "${state.target_skill.path}"`; + + console.log('Phase 1: 执行 Gemini 维度拆解分析...'); + + const result = Bash({ + command: cliCommand, + run_in_background: true, + timeout: 300000 + }); + + return result; +} +``` + +### Phase 2: Spec 匹配 + +基于 `specs/dimension-mapping.md` 规则为每个维度匹配检测模式和修复策略: + +```javascript +function matchSpecs(dimensions) { + // 加载映射规则 + const mappingRules = loadMappingRules(); + + return dimensions.map(dim => { + // 匹配 taxonomy pattern + const taxonomyMatch = findTaxonomyMatch(dim.inferred_category, mappingRules); + + // 匹配 strategy + const strategyMatch = findStrategyMatch(dim.inferred_category, mappingRules); + + // 判断是否满足(核心标准:有修复策略) + const hasFix = strategyMatch !== null && strategyMatch.strategies.length > 0; + + return { + dimension_id: dim.id, + taxonomy_match: taxonomyMatch, + strategy_match: strategyMatch, + has_fix: hasFix, + needs_gemini_analysis: taxonomyMatch === null // 无内置检测时需要 Gemini 深度分析 + }; + }); +} + +function findTaxonomyMatch(category, rules) { + const patternMapping = { + 'context_explosion': { category: 'context_explosion', pattern_ids: ['CTX-001', 'CTX-002', 'CTX-003', 'CTX-004', 'CTX-005'], severity_hint: 'high' }, + 'memory_loss': { category: 'memory_loss', pattern_ids: ['MEM-001', 'MEM-002', 'MEM-003', 'MEM-004', 'MEM-005'], severity_hint: 'high' }, + 'dataflow_break': { category: 'dataflow_break', pattern_ids: ['DF-001', 'DF-002', 'DF-003', 'DF-004', 'DF-005'], severity_hint: 'critical' }, + 'agent_failure': { category: 'agent_failure', pattern_ids: ['AGT-001', 'AGT-002', 'AGT-003', 'AGT-004', 'AGT-005', 'AGT-006'], severity_hint: 'high' }, + 'performance': { category: 'performance', pattern_ids: ['CTX-001', 'CTX-003'], severity_hint: 'medium' }, + 'error_handling': { category: 'error_handling', pattern_ids: ['AGT-001', 'AGT-002'], severity_hint: 'medium' } + }; + + return patternMapping[category] || null; +} + +function findStrategyMatch(category, rules) { + const strategyMapping = { + 'context_explosion': { strategies: ['sliding_window', 'path_reference', 'context_summarization', 'structured_state'], risk_levels: ['low', 'low', 'low', 'medium'] }, + 'memory_loss': { strategies: ['constraint_injection', 'state_constraints_field', 'checkpoint_restore', 'goal_embedding'], risk_levels: ['low', 'low', 'low', 'medium'] }, + 'dataflow_break': { strategies: ['state_centralization', 'schema_enforcement', 'field_normalization'], risk_levels: ['medium', 'low', 'low'] }, + 'agent_failure': { strategies: ['error_wrapping', 'result_validation', 'flatten_nesting'], risk_levels: ['low', 'low', 'medium'] }, + 'prompt_quality': { strategies: ['structured_prompt', 'output_schema', 'grounding_context', 'format_enforcement'], risk_levels: ['low', 'low', 'medium', 'low'] }, + 'architecture': { strategies: ['phase_decomposition', 'interface_contracts', 'plugin_architecture'], risk_levels: ['medium', 'medium', 'high'] }, + 'performance': { strategies: ['token_budgeting', 'parallel_execution', 'result_caching', 'lazy_loading'], risk_levels: ['low', 'low', 'low', 'low'] }, + 'error_handling': { strategies: ['graceful_degradation', 'error_propagation', 'structured_logging'], risk_levels: ['low', 'low', 'low'] }, + 'output_quality': { strategies: ['quality_gates', 'output_validation', 'template_enforcement'], risk_levels: ['low', 'low', 'low'] }, + 'user_experience': { strategies: ['progress_tracking', 'status_communication', 'interactive_checkpoints'], risk_levels: ['low', 'low', 'low'] } + }; + + // Fallback to custom + return strategyMapping[category] || { strategies: ['custom'], risk_levels: ['medium'] }; +} +``` + +### Phase 3: 覆盖度评估 + +评估所有维度的 Spec 覆盖情况: + +```javascript +function evaluateCoverage(specMatches) { + const total = specMatches.length; + const withDetection = specMatches.filter(m => m.taxonomy_match !== null).length; + const withFix = specMatches.filter(m => m.has_fix).length; + + const rate = total > 0 ? Math.round((withFix / total) * 100) : 0; + + let status; + if (rate >= 80) { + status = 'satisfied'; + } else if (rate >= 50) { + status = 'partial'; + } else { + status = 'unsatisfied'; + } + + return { + total_dimensions: total, + with_detection: withDetection, + with_fix_strategy: withFix, + coverage_rate: rate, + status: status + }; +} +``` + +### Phase 4: 歧义检测 + +识别需要用户澄清的歧义点: + +```javascript +function detectAmbiguities(dimensions, specMatches) { + const ambiguities = []; + + for (const dim of dimensions) { + const match = specMatches.find(m => m.dimension_id === dim.id); + + // 检测1: 低置信度 (< 0.5) + if (dim.confidence < 0.5) { + ambiguities.push({ + dimension_id: dim.id, + type: 'vague_description', + description: `维度 "${dim.description}" 描述模糊,推断置信度低 (${dim.confidence})`, + possible_interpretations: suggestInterpretations(dim), + needs_clarification: true + }); + } + + // 检测2: 无匹配类别 + if (!match || (!match.taxonomy_match && !match.strategy_match)) { + ambiguities.push({ + dimension_id: dim.id, + type: 'no_category_match', + description: `维度 "${dim.description}" 无法匹配到已知问题类别`, + possible_interpretations: ['custom'], + needs_clarification: true + }); + } + + // 检测3: 关键词冲突(可能属于多个类别) + if (dim.keywords.length > 3 && hasConflictingKeywords(dim.keywords)) { + ambiguities.push({ + dimension_id: dim.id, + type: 'conflicting_keywords', + description: `维度 "${dim.description}" 的关键词可能指向多个不同问题`, + possible_interpretations: inferMultipleCategories(dim.keywords), + needs_clarification: true + }); + } + } + + return ambiguities; +} + +function suggestInterpretations(dim) { + // 基于关键词推荐可能的解释 + const categories = [ + 'context_explosion', 'memory_loss', 'dataflow_break', 'agent_failure', + 'prompt_quality', 'architecture', 'performance', 'error_handling' + ]; + return categories.slice(0, 4); // 返回最常见的 4 个作为选项 +} + +function hasConflictingKeywords(keywords) { + // 检查关键词是否指向不同方向 + const categoryHints = keywords.map(k => getKeywordCategoryHint(k)); + const uniqueCategories = [...new Set(categoryHints.filter(c => c))]; + return uniqueCategories.length > 1; +} + +function getKeywordCategoryHint(keyword) { + const keywordMap = { + '慢': 'performance', 'slow': 'performance', + '遗忘': 'memory_loss', 'forget': 'memory_loss', + '状态': 'dataflow_break', 'state': 'dataflow_break', + 'agent': 'agent_failure', '失败': 'agent_failure', + 'token': 'context_explosion', '上下文': 'context_explosion' + }; + return keywordMap[keyword.toLowerCase()]; +} +``` + +## User Interaction + +如果检测到需要澄清的歧义,暂停并询问用户: + +```javascript +async function handleAmbiguities(ambiguities, dimensions) { + const needsClarification = ambiguities.filter(a => a.needs_clarification); + + if (needsClarification.length === 0) { + return null; // 无需澄清 + } + + const questions = needsClarification.slice(0, 4).map(a => { + const dim = dimensions.find(d => d.id === a.dimension_id); + + return { + question: `关于 "${dim.description}",您具体指的是?`, + header: a.dimension_id, + options: a.possible_interpretations.map(interp => ({ + label: getCategoryLabel(interp), + description: getCategoryDescription(interp) + })), + multiSelect: false + }; + }); + + return await AskUserQuestion({ questions }); +} + +function getCategoryLabel(category) { + const labels = { + 'context_explosion': '上下文膨胀', + 'memory_loss': '指令遗忘', + 'dataflow_break': '数据流问题', + 'agent_failure': 'Agent 协调问题', + 'prompt_quality': '提示词质量', + 'architecture': '架构问题', + 'performance': '性能问题', + 'error_handling': '错误处理', + 'custom': '其他问题' + }; + return labels[category] || category; +} + +function getCategoryDescription(category) { + const descriptions = { + 'context_explosion': 'Token 累积导致上下文过大', + 'memory_loss': '早期指令或约束在后期丢失', + 'dataflow_break': '状态数据在阶段间不一致', + 'agent_failure': '子 Agent 调用失败或结果异常', + 'prompt_quality': '提示词模糊导致输出不稳定', + 'architecture': '阶段划分或模块结构不合理', + 'performance': '执行慢或 Token 消耗高', + 'error_handling': '错误恢复机制不完善', + 'custom': '需要自定义分析的问题' + }; + return descriptions[category] || '需要进一步分析'; +} +``` + +## Output + +### State Updates + +```javascript +return { + stateUpdates: { + requirement_analysis: { + status: ambiguities.some(a => a.needs_clarification) ? 'needs_clarification' : 'completed', + analyzed_at: new Date().toISOString(), + dimensions: dimensions, + spec_matches: specMatches, + coverage: coverageResult, + ambiguities: ambiguities + }, + // 根据分析结果自动优化 focus_areas + focus_areas: deriveOptimalFocusAreas(specMatches) + }, + outputFiles: [ + `${workDir}/requirement-analysis.json`, + `${workDir}/requirement-analysis.md` + ], + summary: generateSummary(dimensions, coverageResult, ambiguities) +}; + +function deriveOptimalFocusAreas(specMatches) { + const coreCategories = ['context', 'memory', 'dataflow', 'agent']; + const matched = specMatches + .filter(m => m.taxonomy_match !== null) + .map(m => { + // 映射到诊断 focus_area + const category = m.taxonomy_match.category; + if (category === 'context_explosion' || category === 'performance') return 'context'; + if (category === 'memory_loss') return 'memory'; + if (category === 'dataflow_break') return 'dataflow'; + if (category === 'agent_failure' || category === 'error_handling') return 'agent'; + return null; + }) + .filter(f => f && coreCategories.includes(f)); + + // 去重 + return [...new Set(matched)]; +} + +function generateSummary(dimensions, coverage, ambiguities) { + const dimCount = dimensions.length; + const coverageStatus = coverage.status; + const ambiguityCount = ambiguities.filter(a => a.needs_clarification).length; + + let summary = `分析完成:${dimCount} 个维度`; + summary += `,覆盖度 ${coverage.coverage_rate}% (${coverageStatus})`; + + if (ambiguityCount > 0) { + summary += `,${ambiguityCount} 个歧义点待澄清`; + } + + return summary; +} +``` + +### Output Files + +#### requirement-analysis.json + +```json +{ + "timestamp": "2024-01-01T00:00:00Z", + "target_skill": "skill-name", + "user_description": "原始用户描述", + "dimensions": [...], + "spec_matches": [...], + "coverage": {...}, + "ambiguities": [...], + "derived_focus_areas": [...] +} +``` + +#### requirement-analysis.md + +```markdown +# 需求分析报告 + +## 用户描述 +> ${user_issue_description} + +## 维度拆解 + +| ID | 描述 | 类别 | 置信度 | +|----|------|------|--------| +| DIM-001 | ... | ... | 0.85 | + +## Spec 匹配 + +| 维度 | 检测模式 | 修复策略 | 是否满足 | +|------|----------|----------|----------| +| DIM-001 | CTX-001,002 | sliding_window | ✓ | + +## 覆盖度评估 + +- 总维度数: N +- 有检测手段: M +- 有修复策略: K (满足标准) +- 覆盖率: X% +- 状态: satisfied/partial/unsatisfied + +## 歧义点 + +(如有) +``` + +## Error Handling + +| Error | Recovery | +|-------|----------| +| Gemini CLI 超时 | 重试一次,仍失败则使用简化分析 | +| JSON 解析失败 | 尝试修复 JSON 或使用默认维度 | +| 无法匹配任何类别 | 全部归类为 custom,触发 Gemini 深度分析 | + +## Next Actions + +- 如果 `requirement_analysis.status === 'completed'`: 继续到 `action-diagnose-*` +- 如果 `requirement_analysis.status === 'needs_clarification'`: 等待用户澄清后重新执行 +- 如果 `coverage.status === 'unsatisfied'`: 自动触发 `action-gemini-analysis` 进行深度分析 diff --git a/.claude/skills/skill-tuning/phases/orchestrator.md b/.claude/skills/skill-tuning/phases/orchestrator.md index d723cc10..1b8ed36c 100644 --- a/.claude/skills/skill-tuning/phases/orchestrator.md +++ b/.claude/skills/skill-tuning/phases/orchestrator.md @@ -63,6 +63,24 @@ function selectNextAction(state) { return 'action-init'; } + // 1.5. Requirement analysis (在 init 后,diagnosis 前) + if (state.status === 'running' && + state.completed_actions.includes('action-init') && + !state.completed_actions.includes('action-analyze-requirements')) { + return 'action-analyze-requirements'; + } + + // 1.6. 如果需求分析发现歧义需要澄清,暂停等待用户 + if (state.requirement_analysis?.status === 'needs_clarification') { + return null; // 等待用户澄清后继续 + } + + // 1.7. 如果需求分析覆盖度不足,优先触发 Gemini 深度分析 + if (state.requirement_analysis?.coverage?.status === 'unsatisfied' && + !state.completed_actions.includes('action-gemini-analysis')) { + return 'action-gemini-analysis'; + } + // 2. Check if Gemini analysis is requested or needed if (shouldTriggerGeminiAnalysis(state)) { return 'action-gemini-analysis'; @@ -295,6 +313,7 @@ After completing the action: | Action | Purpose | Preconditions | Effects | |--------|---------|---------------|---------| | [action-init](actions/action-init.md) | Initialize tuning session | status === 'pending' | Creates work dirs, backup, sets status='running' | +| [action-analyze-requirements](actions/action-analyze-requirements.md) | Analyze user requirements | init completed | Sets requirement_analysis, optimizes focus_areas | | [action-diagnose-context](actions/action-diagnose-context.md) | Analyze context explosion | status === 'running' | Sets diagnosis.context | | [action-diagnose-memory](actions/action-diagnose-memory.md) | Analyze long-tail forgetting | status === 'running' | Sets diagnosis.memory | | [action-diagnose-dataflow](actions/action-diagnose-dataflow.md) | Analyze data flow issues | status === 'running' | Sets diagnosis.dataflow | @@ -312,6 +331,7 @@ After completing the action: - `status === 'completed'`: Normal completion - `status === 'user_exit'`: User requested exit - `status === 'failed'`: Unrecoverable error +- `requirement_analysis.status === 'needs_clarification'`: Waiting for user clarification (暂停,非终止) - `error_count >= max_errors`: Too many errors (default: 3) - `iteration_count >= max_iterations`: Max iterations reached (default: 5) - `quality_gate === 'pass'`: All quality criteria met diff --git a/.claude/skills/skill-tuning/phases/state-schema.md b/.claude/skills/skill-tuning/phases/state-schema.md index 05fbf87c..bf344a9d 100644 --- a/.claude/skills/skill-tuning/phases/state-schema.md +++ b/.claude/skills/skill-tuning/phases/state-schema.md @@ -67,6 +67,62 @@ interface TuningState { // === Output Paths === work_dir: string; backup_dir: string; + + // === Requirement Analysis (新增) === + requirement_analysis: RequirementAnalysis | null; +} + +interface RequirementAnalysis { + status: 'pending' | 'completed' | 'needs_clarification'; + analyzed_at: string; + + // Phase 1: 维度拆解 + dimensions: Dimension[]; + + // Phase 2: Spec 匹配 + spec_matches: SpecMatch[]; + + // Phase 3: 覆盖度 + coverage: { + total_dimensions: number; + with_detection: number; // 有 taxonomy pattern + with_fix_strategy: number; // 有 tuning strategy (满足判断标准) + coverage_rate: number; // 0-100% + status: 'satisfied' | 'partial' | 'unsatisfied'; + }; + + // Phase 4: 歧义 + ambiguities: Ambiguity[]; +} + +interface Dimension { + id: string; // e.g., "DIM-001" + description: string; // 关注点描述 + keywords: string[]; // 关键词 + inferred_category: string; // 推断的问题类别 + confidence: number; // 置信度 0-1 +} + +interface SpecMatch { + dimension_id: string; + taxonomy_match: { + category: string; // e.g., "context_explosion" + pattern_ids: string[]; // e.g., ["CTX-001", "CTX-003"] + severity_hint: string; + } | null; + strategy_match: { + strategies: string[]; // e.g., ["sliding_window", "path_reference"] + risk_levels: string[]; + } | null; + has_fix: boolean; // 满足性判断核心 +} + +interface Ambiguity { + dimension_id: string; + type: 'multi_category' | 'vague_description' | 'conflicting_keywords'; + description: string; + possible_interpretations: string[]; + needs_clarification: boolean; } interface DiagnosisResult { @@ -208,7 +264,8 @@ interface ErrorEntry { "error_count": 0, "max_errors": 3, "work_dir": null, - "backup_dir": null + "backup_dir": null, + "requirement_analysis": null } ``` diff --git a/.claude/skills/skill-tuning/specs/dimension-mapping.md b/.claude/skills/skill-tuning/specs/dimension-mapping.md new file mode 100644 index 00000000..e7ebd7eb --- /dev/null +++ b/.claude/skills/skill-tuning/specs/dimension-mapping.md @@ -0,0 +1,206 @@ +# Dimension to Spec Mapping + +维度关键词到 Spec 的映射规则,用于 action-analyze-requirements 阶段的自动匹配。 + +## When to Use + +| Phase | Usage | +|-------|-------| +| action-analyze-requirements | 维度→类别→Spec 自动匹配 | +| action-propose-fixes | 策略选择参考 | + +--- + +## Keyword → Category Mapping + +基于关键词将用户描述的维度映射到问题类别。 + +### 中英文关键词表 + +| Keywords (中文) | Keywords (英文) | Primary Category | Secondary | +|----------------|-----------------|------------------|-----------| +| token, 上下文, 爆炸, 太长, 超限, 膨胀 | token, context, explosion, overflow, bloat | context_explosion | - | +| 遗忘, 忘记, 指令丢失, 约束消失, 目标漂移 | forget, lost, drift, constraint, goal | memory_loss | - | +| 状态, 数据, 格式, 不一致, 丢失, 损坏 | state, data, format, inconsistent, corrupt | dataflow_break | - | +| agent, 子任务, 失败, 嵌套, 调用, 协调 | agent, subtask, fail, nested, call, coordinate | agent_failure | - | +| 慢, 性能, 效率, token 消耗, 延迟 | slow, performance, efficiency, latency | performance | context_explosion | +| 提示词, prompt, 输出不稳定, 幻觉 | prompt, unstable, hallucination | prompt_quality | - | +| 架构, 结构, 模块, 耦合, 扩展 | architecture, structure, module, coupling | architecture | - | +| 错误, 异常, 恢复, 降级, 崩溃 | error, exception, recovery, crash | error_handling | agent_failure | +| 输出, 质量, 格式, 验证, 不完整 | output, quality, validation, incomplete | output_quality | - | +| 交互, 体验, 进度, 反馈, 不清晰 | interaction, ux, progress, feedback | user_experience | - | + +### Matching Algorithm + +```javascript +function matchCategory(keywords) { + const categoryScores = {}; + + for (const keyword of keywords) { + const normalizedKeyword = keyword.toLowerCase(); + + for (const [category, categoryKeywords] of Object.entries(KEYWORD_MAP)) { + if (categoryKeywords.some(k => normalizedKeyword.includes(k) || k.includes(normalizedKeyword))) { + categoryScores[category] = (categoryScores[category] || 0) + 1; + } + } + } + + // 返回得分最高的类别 + const sorted = Object.entries(categoryScores).sort((a, b) => b[1] - a[1]); + + if (sorted.length === 0) return null; + + // 如果前两名得分相同,返回多类别(需澄清) + if (sorted.length > 1 && sorted[0][1] === sorted[1][1]) { + return { + primary: sorted[0][0], + secondary: sorted[1][0], + ambiguous: true + }; + } + + return { + primary: sorted[0][0], + secondary: sorted[1]?.[0] || null, + ambiguous: false + }; +} +``` + +--- + +## Category → Taxonomy Pattern Mapping + +将问题类别映射到 problem-taxonomy.md 中的检测模式。 + +| Category | Pattern IDs | Detection Focus | +|----------|-------------|-----------------| +| context_explosion | CTX-001, CTX-002, CTX-003, CTX-004, CTX-005 | Token 累积、内容传递模式 | +| memory_loss | MEM-001, MEM-002, MEM-003, MEM-004, MEM-005 | 约束传播、检查点机制 | +| dataflow_break | DF-001, DF-002, DF-003, DF-004, DF-005 | 状态存储、Schema 验证 | +| agent_failure | AGT-001, AGT-002, AGT-003, AGT-004, AGT-005, AGT-006 | 错误处理、结果验证 | +| prompt_quality | - | (无内置检测,需 Gemini 分析) | +| architecture | - | (无内置检测,需 Gemini 分析) | +| performance | CTX-001, CTX-003 | (复用 context 检测) | +| error_handling | AGT-001, AGT-002 | (复用 agent 检测) | +| output_quality | - | (无内置检测,需 Gemini 分析) | +| user_experience | - | (无内置检测,需 Gemini 分析) | + +--- + +## Category → Strategy Mapping + +将问题类别映射到 tuning-strategies.md 中的修复策略。 + +### Core Categories (有完整策略) + +| Category | Available Strategies | Risk Level | +|----------|---------------------|------------| +| context_explosion | sliding_window, path_reference, context_summarization, structured_state | Low-Medium | +| memory_loss | constraint_injection, state_constraints_field, checkpoint_restore, goal_embedding | Low-Medium | +| dataflow_break | state_centralization, schema_enforcement, field_normalization | Low-Medium | +| agent_failure | error_wrapping, result_validation, flatten_nesting | Low-Medium | + +### Extended Categories (需 Gemini 生成策略) + +| Category | Available Strategies | Risk Level | +|----------|---------------------|------------| +| prompt_quality | structured_prompt, output_schema, grounding_context, format_enforcement | Low | +| architecture | phase_decomposition, interface_contracts, plugin_architecture, state_machine | Medium-High | +| performance | token_budgeting, parallel_execution, result_caching, lazy_loading | Low-Medium | +| error_handling | graceful_degradation, error_propagation, structured_logging, error_context | Low | +| output_quality | quality_gates, output_validation, template_enforcement, completeness_check | Low | +| user_experience | progress_tracking, status_communication, interactive_checkpoints, guided_workflow | Low | + +--- + +## Coverage Rules + +### Satisfaction Criteria + +判断"是否满足需求"的标准: + +```javascript +function evaluateSatisfaction(specMatch) { + // 核心标准:有可用的修复策略 + const hasFix = specMatch.strategy_match !== null && + specMatch.strategy_match.strategies.length > 0; + + // 辅助标准:有检测手段 + const hasDetection = specMatch.taxonomy_match !== null; + + return { + satisfied: hasFix, + detection_available: hasDetection, + needs_gemini: !hasDetection // 无内置检测时需要 Gemini 分析 + }; +} +``` + +### Coverage Status Thresholds + +| Status | Condition | +|--------|-----------| +| satisfied | coverage_rate >= 80% | +| partial | 50% <= coverage_rate < 80% | +| unsatisfied | coverage_rate < 50% | + +--- + +## Fallback Rules + +当无法匹配到具体类别时的处理: + +```javascript +function handleUnmatchedDimension(dimension) { + return { + dimension_id: dimension.id, + taxonomy_match: null, + strategy_match: { + strategies: ['custom'], // Fallback to custom strategy + risk_levels: ['medium'] + }, + has_fix: true, // custom 策略视为"可满足" + needs_gemini_analysis: true, + fallback_reason: 'no_keyword_match' + }; +} +``` + +--- + +## Usage Example + +```javascript +// 输入:用户描述 "skill 执行太慢,而且有时候会忘记最初的指令" + +// Step 1: Gemini 拆解为维度 +const dimensions = [ + { id: 'DIM-001', description: '执行太慢', keywords: ['慢', '执行'], confidence: 0.9 }, + { id: 'DIM-002', description: '忘记最初指令', keywords: ['忘记', '指令'], confidence: 0.85 } +]; + +// Step 2: 匹配类别 +// DIM-001 → performance (慢) +// DIM-002 → memory_loss (忘记, 指令) + +// Step 3: 匹配 Spec +const specMatches = [ + { + dimension_id: 'DIM-001', + taxonomy_match: { category: 'performance', pattern_ids: ['CTX-001', 'CTX-003'], severity_hint: 'medium' }, + strategy_match: { strategies: ['token_budgeting', 'parallel_execution'], risk_levels: ['low', 'low'] }, + has_fix: true + }, + { + dimension_id: 'DIM-002', + taxonomy_match: { category: 'memory_loss', pattern_ids: ['MEM-001', 'MEM-002'], severity_hint: 'high' }, + strategy_match: { strategies: ['constraint_injection', 'checkpoint_restore'], risk_levels: ['low', 'low'] }, + has_fix: true + } +]; + +// Step 4: 评估覆盖度 +// 2/2 = 100% → satisfied +``` diff --git a/.claude/skills/skill-tuning/specs/problem-taxonomy.md b/.claude/skills/skill-tuning/specs/problem-taxonomy.md index 3e5238f5..d599e207 100644 --- a/.claude/skills/skill-tuning/specs/problem-taxonomy.md +++ b/.claude/skills/skill-tuning/specs/problem-taxonomy.md @@ -14,6 +14,35 @@ Classification of skill execution issues with detection patterns and severity cr ## Problem Categories +### 0. Authoring Principles Violation (P0) + +**Definition**: 违反 skill 撰写首要准则(简洁高效、去除存储、上下文流转)。 + +**Root Causes**: +- 不必要的中间文件存储 +- State schema 过度膨胀 +- 文件中转代替上下文传递 +- 重复数据存储 + +**Detection Patterns**: + +| Pattern ID | Regex/Check | Description | +|------------|-------------|-------------| +| APV-001 | `/Write\([^)]*temp-|intermediate-/` | 中间文件写入 | +| APV-002 | `/Write\([^)]+\)[\s\S]{0,50}Read\([^)]+\)/` | 写后立即读(文件中转) | +| APV-003 | State schema > 15 fields | State 字段过多 | +| APV-004 | `/_history\s*[.=].*push|concat/` | 无限增长数组 | +| APV-005 | `/debug_|_cache|_temp/` in state | 调试/缓存字段残留 | +| APV-006 | Same data in multiple state fields | 重复存储 | + +**Impact Levels**: +- **Critical**: 中间文件 > 5 个,严重违反原则 +- **High**: State 字段 > 20 个,或存在文件中转 +- **Medium**: 存在调试字段或轻微冗余 +- **Low**: 轻微的命名不规范 + +--- + ### 1. Context Explosion (P2) **Definition**: Excessive token accumulation causing prompt size to grow unbounded. @@ -181,6 +210,7 @@ function calculateIssueSeverity(issue) { | Problem Type | Recommended Strategies | Priority Order | |--------------|----------------------|----------------| +| **Authoring Principles Violation** | eliminate_intermediate_files, minimize_state, context_passing | 1, 2, 3 | | Context Explosion | sliding_window, path_reference, context_summarization | 1, 2, 3 | | Long-tail Forgetting | constraint_injection, state_constraints_field, checkpoint | 1, 2, 3 | | Data Flow Disruption | state_centralization, schema_enforcement, field_normalization | 1, 2, 3 | diff --git a/.claude/skills/skill-tuning/specs/skill-authoring-principles.md b/.claude/skills/skill-tuning/specs/skill-authoring-principles.md new file mode 100644 index 00000000..b73136dc --- /dev/null +++ b/.claude/skills/skill-tuning/specs/skill-authoring-principles.md @@ -0,0 +1,189 @@ +# Skill Authoring Principles + +Skill 撰写首要准则。所有诊断和优化以此为纲。 + +--- + +## 核心原则 + +``` +简洁高效 → 去除无关存储 → 去除中间存储 → 上下文流转 +``` + +--- + +## 1. 简洁高效 + +**原则**:最小化实现,只做必要的事 + +| DO | DON'T | +|----|-------| +| 单一职责阶段 | 臃肿的多功能阶段 | +| 直接的数据路径 | 迂回的处理流程 | +| 必要的字段 | 冗余的 schema 定义 | +| 精准的 prompt | 过度详细的指令 | + +**检测模式**: +- Phase 文件 > 200 行 → 需拆分 +- State schema 字段 > 20 个 → 需精简 +- 同一数据多处定义 → 需去重 + +--- + +## 2. 去除无关存储 + +**原则**:不存储不需要的数据 + +| DO | DON'T | +|----|-------| +| 只存最终结果 | 存储调试信息 | +| 存路径引用 | 存完整内容副本 | +| 存必要索引 | 存全量历史 | + +**检测模式**: +```javascript +// BAD: 存储完整内容 +state.full_analysis_result = longAnalysisOutput; + +// GOOD: 存路径 + 摘要 +state.analysis = { + path: `${workDir}/analysis.json`, + summary: extractSummary(output), + key_findings: extractFindings(output) +}; +``` + +**反模式清单**: +- `state.debug_*` → 删除 +- `state.*_history` (无限增长) → 限制或删除 +- `state.*_cache` (会话内) → 改用内存变量 +- 重复字段 → 合并 + +--- + +## 3. 去除中间存储 + +**原则**:避免临时文件和中间状态文件 + +| DO | DON'T | +|----|-------| +| 直接传递结果 | 写文件再读文件 | +| 函数返回值 | 中间 JSON 文件 | +| 管道处理 | 阶段性存储 | + +**检测模式**: +```javascript +// BAD: 中间文件 +Write(`${workDir}/temp-step1.json`, step1Result); +const step1 = Read(`${workDir}/temp-step1.json`); +const step2Result = process(step1); +Write(`${workDir}/temp-step2.json`, step2Result); + +// GOOD: 直接流转 +const step1Result = await executeStep1(); +const step2Result = process(step1Result); +const finalResult = finalize(step2Result); +Write(`${workDir}/final-output.json`, finalResult); // 只存最终结果 +``` + +**允许的存储**: +- 最终输出(用户需要的结果) +- 检查点(长流程恢复用,可选) +- 备份(修改前的原始文件) + +**禁止的存储**: +- `temp-*.json` +- `intermediate-*.json` +- `step[N]-output.json` +- `*-draft.md` + +--- + +## 4. 上下文流转 + +**原则**:通过上下文传递而非文件 + +| DO | DON'T | +|----|-------| +| 函数参数传递 | 全局状态读写 | +| 返回值链式处理 | 文件中转 | +| prompt 内嵌数据 | 指向外部文件 | + +**模式**: +```javascript +// 上下文流转模式 +async function executePhase(context) { + const { previousResult, constraints, config } = context; + + const result = await Task({ + prompt: ` + [CONTEXT] + Previous: ${JSON.stringify(previousResult)} + Constraints: ${constraints.join(', ')} + + [TASK] + Process and return result directly. + ` + }); + + return { + ...context, + currentResult: result, + completed: ['phase-name'] + }; +} + +// 链式执行 +let ctx = initialContext; +ctx = await executePhase1(ctx); +ctx = await executePhase2(ctx); +ctx = await executePhase3(ctx); +// ctx 包含完整上下文,无中间文件 +``` + +**State 最小化**: +```typescript +// 只存必要状态 +interface MinimalState { + status: 'pending' | 'running' | 'completed'; + target: { name: string; path: string }; + result_path: string; // 最终结果路径 + error?: string; +} +``` + +--- + +## 应用场景 + +### 诊断时检查 + +| 检查项 | 违反时标记 | +|--------|-----------| +| Phase 内写入 temp 文件 | `unnecessary_storage` | +| State 包含 *_history 无限数组 | `unbounded_state` | +| 文件写入后立即读取 | `redundant_io` | +| 多阶段传递完整内容 | `context_bloat` | + +### 优化策略 + +| 问题 | 策略 | +|------|------| +| 中间文件过多 | `eliminate_intermediate_files` | +| State 膨胀 | `minimize_state_schema` | +| 重复存储 | `deduplicate_storage` | +| 文件中转 | `context_passing` | + +--- + +## 合规检查清单 + +``` +□ 无 temp/intermediate 文件写入 +□ State schema < 15 个字段 +□ 无重复数据存储 +□ Phase 间通过上下文/返回值传递 +□ 只存最终结果文件 +□ 无无限增长的数组 +□ 无调试字段残留 +``` diff --git a/.claude/skills/skill-tuning/specs/tuning-strategies.md b/.claude/skills/skill-tuning/specs/tuning-strategies.md index fcbd6c91..0130c5b8 100644 --- a/.claude/skills/skill-tuning/specs/tuning-strategies.md +++ b/.claude/skills/skill-tuning/specs/tuning-strategies.md @@ -12,6 +12,155 @@ Detailed fix strategies for each problem category with implementation guidance. --- +## Authoring Principles Strategies (P0 - 首要准则) + +> **核心原则**:简洁高效 → 去除无关存储 → 去除中间存储 → 上下文流转 + +### Strategy: eliminate_intermediate_files + +**Purpose**: 删除所有中间文件,改用上下文流转。 + +**Implementation**: +```javascript +// Before: 中间文件 +async function process() { + const step1 = await analyze(); + Write(`${workDir}/step1.json`, JSON.stringify(step1)); + + const step1Data = JSON.parse(Read(`${workDir}/step1.json`)); + const step2 = await transform(step1Data); + Write(`${workDir}/step2.json`, JSON.stringify(step2)); + + const step2Data = JSON.parse(Read(`${workDir}/step2.json`)); + return finalize(step2Data); +} + +// After: 上下文流转 +async function process() { + const step1 = await analyze(); + const step2 = await transform(step1); // 直接传递 + return finalize(step2); // 只返回最终结果 +} +``` + +**Risk**: Low +**Verification**: `ls ${workDir}` 无 temp/intermediate 文件 + +--- + +### Strategy: minimize_state + +**Purpose**: 精简 State schema 至必要字段。 + +**Implementation**: +```typescript +// Before: 膨胀的 State +interface State { + status: string; + target: TargetInfo; + user_input: string; + parsed_input: ParsedInput; // 删除 - 只在处理时用 + intermediate_result: any; // 删除 - 中间结果 + debug_info: DebugInfo; // 删除 - 调试信息 + analysis_cache: any; // 删除 - 缓存 + full_history: HistoryEntry[]; // 删除 - 无限增长 + step1_output: any; // 删除 - 中间输出 + step2_output: any; // 删除 - 中间输出 + final_result: FinalResult; +} + +// After: 精简的 State +interface State { + status: 'pending' | 'running' | 'completed' | 'failed'; + target: { name: string; path: string }; + result_path: string; // 最终结果路径 + error?: string; // 仅失败时有 +} +``` + +**Rules**: +- State 字段 ≤ 15 个 +- 删除所有 `debug_*`, `*_cache`, `*_temp` 字段 +- `*_history` 数组设置上限或改用滚动窗口 + +**Risk**: Medium (需确保不丢失必要数据) +**Verification**: Count state fields ≤ 15 + +--- + +### Strategy: context_passing + +**Purpose**: 用函数参数/返回值代替文件中转。 + +**Implementation**: +```javascript +// 上下文流转模式 +async function executeWorkflow(initialContext) { + let ctx = initialContext; + + // Phase 1: 直接传递上下文 + ctx = await executePhase1(ctx); + + // Phase 2: 继续传递 + ctx = await executePhase2(ctx); + + // Phase 3: 最终处理 + const result = await executePhase3(ctx); + + // 只存最终结果 + Write(`${ctx.workDir}/result.json`, JSON.stringify(result)); + + return result; +} + +// Phase 函数模板 +async function executePhaseN(ctx) { + const { previousResult, constraints } = ctx; + + const result = await Task({ + prompt: ` + [CONTEXT] + ${JSON.stringify(previousResult)} + + [TASK] + Process and return result. + ` + }); + + // 返回更新后的上下文,不写文件 + return { + ...ctx, + previousResult: result, + completed: [...ctx.completed, 'phase-n'] + }; +} +``` + +**Risk**: Low +**Verification**: 无 Write→Read 紧邻模式 + +--- + +### Strategy: deduplicate_storage + +**Purpose**: 消除重复数据存储。 + +**Implementation**: +```javascript +// Before: 重复存储 +state.user_request = userInput; +state.original_request = userInput; +state.input_text = userInput; + +// After: 单一来源 +state.input = userInput; // 唯一存储点 +``` + +**Risk**: Low +**Verification**: 无相同数据多字段存储 + +--- + ## Context Explosion Strategies ### Strategy: sliding_window