diff --git a/.codex/skills/spec-generator/README.md b/.codex/skills/spec-generator/README.md new file mode 100644 index 00000000..486d1f56 --- /dev/null +++ b/.codex/skills/spec-generator/README.md @@ -0,0 +1,110 @@ +# Spec Generator + +Structured specification document generator producing a complete document chain (Product Brief -> PRD -> Architecture -> Epics). + +## Usage + +```bash +# Via workflow command +/workflow:spec "Build a task management system" +/workflow:spec -y "User auth with OAuth2" # Auto mode +/workflow:spec -c "task management" # Resume session +``` + +## Architecture + +``` +spec-generator/ +|- SKILL.md # Entry point: metadata + architecture + flow +|- phases/ +| |- 01-discovery.md # Seed analysis + codebase exploration + spec type selection +| |- 01-5-requirement-clarification.md # Interactive requirement expansion +| |- 02-product-brief.md # Multi-CLI product brief + glossary generation +| |- 03-requirements.md # PRD with MoSCoW priorities + RFC 2119 constraints +| |- 04-architecture.md # Architecture + state machine + config model + observability +| |- 05-epics-stories.md # Epic/Story decomposition +| |- 06-readiness-check.md # Quality validation + handoff + iterate option +| |- 06-5-auto-fix.md # Auto-fix loop for readiness issues (max 2 iterations) +| |- 07-issue-export.md # Issue creation from Epics + export report +|- specs/ +| |- document-standards.md # Format, frontmatter, naming rules +| |- quality-gates.md # Per-phase quality criteria + iteration tracking +| |- glossary-template.json # Terminology glossary schema +|- templates/ +| |- product-brief.md # Product brief template (+ Concepts & Non-Goals) +| |- requirements-prd.md # PRD template +| |- architecture-doc.md # Architecture template (+ state machine, config, observability) +| |- epics-template.md # Epic/Story template (+ versioning) +| |- profiles/ # Spec type specialization profiles +| |- service-profile.md # Service spec: lifecycle, observability, trust +| |- api-profile.md # API spec: endpoints, auth, rate limiting +| |- library-profile.md # Library spec: public API, examples, compatibility +|- README.md # This file +``` + +## 7-Phase Pipeline + +| Phase | Name | Output | CLI Tools | Key Features | +|-------|------|--------|-----------|-------------| +| 1 | Discovery | spec-config.json | Gemini (analysis) | Spec type selection | +| 1.5 | Req Expansion | refined-requirements.json | Gemini (analysis) | Multi-round interactive | +| 2 | Product Brief *(Agent)* | product-brief.md, glossary.json | Gemini + Codex + Claude (parallel) | Terminology glossary | +| 3 | Requirements *(Agent)* | requirements/ | Gemini + **Codex review** | RFC 2119, data model | +| 4 | Architecture *(Agent)* | architecture/ | Gemini + Codex (sequential) | State machine, config, observability | +| 5 | Epics & Stories *(Agent)* | epics/ | Gemini + **Codex review** | Glossary consistency | +| 6 | Readiness Check | readiness-report.md, spec-summary.md | Gemini + **Codex** (parallel) | Per-requirement verification | +| 6.5 | Auto-Fix *(Agent)* | Updated phase docs | Gemini (analysis) | Max 2 iterations | +| 7 | Issue Export | issue-export-report.md | ccw issue create | Epic→Issue mapping, wave assignment | + +## Runtime Output + +``` +.workflow/.spec/SPEC-{slug}-{YYYY-MM-DD}/ +|- spec-config.json # Session state +|- discovery-context.json # Codebase context (optional) +|- refined-requirements.json # Phase 1.5 (requirement expansion) +|- glossary.json # Phase 2 (terminology) +|- product-brief.md # Phase 2 +|- requirements/ # Phase 3 (directory) +| |- _index.md +| |- REQ-*.md +| └── NFR-*.md +|- architecture/ # Phase 4 (directory) +| |- _index.md +| └── ADR-*.md +|- epics/ # Phase 5 (directory) +| |- _index.md +| └── EPIC-*.md +|- readiness-report.md # Phase 6 +|- spec-summary.md # Phase 6 +└── issue-export-report.md # Phase 7 (issue export) +``` + +## Flags + +- `-y|--yes`: Auto mode - skip all interactive confirmations +- `-c|--continue`: Resume from last completed phase + +Spec type is selected interactively in Phase 1 (defaults to `service` in auto mode) +Available types: `service`, `api`, `library`, `platform` + +## Handoff + +After Phase 6, choose execution path: +- `Export Issues (Phase 7)` - Create issues per Epic with spec links → team-planex +- `workflow-lite-plan` - Execute per Epic +- `workflow:req-plan-with-file` - Roadmap decomposition +- `workflow-plan` - Full planning +- `Iterate & improve` - Re-run failed phases (max 2 iterations) + +## Design Principles + +- **Document chain**: Each phase builds on previous outputs +- **Multi-perspective**: Gemini/Codex/Claude provide different viewpoints +- **Template-driven**: Consistent format via templates + frontmatter +- **Resumable**: spec-config.json tracks completed phases +- **Pure documentation**: No code generation - clean handoff to execution workflows +- **Type-specialized**: Profiles adapt templates to service/api/library/platform requirements +- **Iterative quality**: Phase 6.5 auto-fix repairs issues, max 2 iterations before handoff +- **Terminology-first**: glossary.json ensures consistent terminology across all documents +- **Agent-delegated**: Heavy document phases (2-5, 6.5) run in doc-generator agents to minimize main context usage diff --git a/.codex/skills/spec-generator/SKILL.md b/.codex/skills/spec-generator/SKILL.md new file mode 100644 index 00000000..7c0d5754 --- /dev/null +++ b/.codex/skills/spec-generator/SKILL.md @@ -0,0 +1,425 @@ +--- +name: spec-generator +description: Specification generator - 7 phase document chain producing product brief, PRD, architecture, epics, and issues. Agent-delegated heavy phases (2-5, 6.5) with Codex review gates. Triggers on "generate spec", "create specification", "spec generator", "workflow:spec". +allowed-tools: Agent, AskUserQuestion, TaskCreate, TaskUpdate, TaskList, Read, Write, Edit, Bash, Glob, Grep, Skill +--- + +# Spec Generator + +Structured specification document generator producing a complete specification package (Product Brief, PRD, Architecture, Epics, Issues) through 7 sequential phases with multi-CLI analysis, Codex review gates, and interactive refinement. Heavy document phases are delegated to `doc-generator` agents to minimize main context usage. **Document generation only** - execution handoff via issue export to team-planex or existing workflows. + +## Architecture Overview + +``` +Phase 0: Specification Study (Read specs/ + templates/ - mandatory prerequisite) [Inline] + | +Phase 1: Discovery -> spec-config.json + discovery-context.json [Inline] + | (includes spec_type selection) +Phase 1.5: Req Expansion -> refined-requirements.json [Inline] + | (interactive discussion + CLI gap analysis) +Phase 2: Product Brief -> product-brief.md + glossary.json [Agent] + | (3-CLI parallel + synthesis) +Phase 3: Requirements (PRD) -> requirements/ (_index.md + REQ-*.md + NFR-*.md) [Agent] + | (Gemini + Codex review) +Phase 4: Architecture -> architecture/ (_index.md + ADR-*.md) [Agent] + | (Gemini + Codex review) +Phase 5: Epics & Stories -> epics/ (_index.md + EPIC-*.md) [Agent] + | (Gemini + Codex review) +Phase 6: Readiness Check -> readiness-report.md + spec-summary.md [Inline] + | (Gemini + Codex dual validation + per-req verification) + ├── Pass (>=80%): Handoff or Phase 7 + ├── Review (60-79%): Handoff with caveats or Phase 7 + └── Fail (<60%): Phase 6.5 Auto-Fix (max 2 iterations) + | +Phase 6.5: Auto-Fix -> Updated Phase 2-5 documents [Agent] + | + └── Re-run Phase 6 validation + | +Phase 7: Issue Export -> issue-export-report.md [Inline] + (Epic→Issue mapping, ccw issue create, wave assignment) +``` + +## Key Design Principles + +1. **Document Chain**: Each phase builds on previous outputs, creating a traceable specification chain from idea to executable issues +2. **Agent-Delegated**: Heavy document phases (2-5, 6.5) run in `doc-generator` agents, keeping main context lean (summaries only) +3. **Multi-Perspective Analysis**: CLI tools (Gemini/Codex/Claude) provide product, technical, and user perspectives in parallel +4. **Codex Review Gates**: Phases 3, 5, 6 include Codex CLI review for quality validation before output +5. **Interactive by Default**: Each phase offers user confirmation points; `-y` flag enables full auto mode +6. **Resumable Sessions**: `spec-config.json` tracks completed phases; `-c` flag resumes from last checkpoint +7. **Template-Driven**: All documents generated from standardized templates with YAML frontmatter +8. **Pure Documentation**: No code generation or execution - clean handoff via issue export to execution workflows +9. **Spec Type Specialization**: Templates adapt to spec type (service/api/library/platform) via profiles for domain-specific depth +10. **Iterative Quality**: Phase 6.5 auto-fix loop repairs issues found in readiness check (max 2 iterations) +11. **Terminology Consistency**: glossary.json generated in Phase 2, injected into all subsequent phases + +--- + +## Mandatory Prerequisites + +> **Do NOT skip**: Before performing any operations, you **must** completely read the following documents. Proceeding without reading the specifications will result in outputs that do not meet quality standards. + +### Specification Documents (Required Reading) + +| Document | Purpose | Priority | +|----------|---------|----------| +| [specs/document-standards.md](specs/document-standards.md) | Document format, frontmatter, naming conventions | **P0 - Must read before execution** | +| [specs/quality-gates.md](specs/quality-gates.md) | Per-phase quality gate criteria and scoring | **P0 - Must read before execution** | + +### Template Files (Must read before generation) + +| Document | Purpose | +|----------|---------| +| [templates/product-brief.md](templates/product-brief.md) | Product brief document template | +| [templates/requirements-prd.md](templates/requirements-prd.md) | PRD document template | +| [templates/architecture-doc.md](templates/architecture-doc.md) | Architecture document template | +| [templates/epics-template.md](templates/epics-template.md) | Epic/Story document template | + +--- + +## Execution Flow + +``` +Input Parsing: + |- Parse $ARGUMENTS: extract idea/topic, flags (-y, -c, -m) + |- Detect mode: new | continue + |- If continue: read spec-config.json, resume from first incomplete phase + |- If new: proceed to Phase 1 + +Phase 1: Discovery & Seed Analysis + |- Ref: phases/01-discovery.md + |- Generate session ID: SPEC-{slug}-{YYYY-MM-DD} + |- Parse input (text or file reference) + |- Gemini CLI seed analysis (problem, users, domain, dimensions) + |- Codebase exploration (conditional, if project detected) + |- Spec type selection: service|api|library|platform (interactive, -y defaults to service) + |- User confirmation (interactive, -y skips) + |- Output: spec-config.json, discovery-context.json (optional) + +Phase 1.5: Requirement Expansion & Clarification + |- Ref: phases/01-5-requirement-clarification.md + |- CLI gap analysis: completeness scoring, missing dimensions detection + |- Multi-round interactive discussion (max 5 rounds) + | |- Round 1: present gap analysis + expansion suggestions + | |- Round N: follow-up refinement based on user responses + |- User final confirmation of requirements + |- Auto mode (-y): CLI auto-expansion without interaction + |- Output: refined-requirements.json + +Phase 2: Product Brief [AGENT: doc-generator] + |- Delegate to Task(subagent_type="doc-generator") + |- Agent reads: phases/02-product-brief.md + |- Agent executes: 3 parallel CLI analyses + synthesis + glossary generation + |- Agent writes: product-brief.md, glossary.json + |- Agent returns: JSON summary {files_created, quality_notes, key_decisions} + |- Orchestrator validates: files exist, spec-config.json updated + +Phase 3: Requirements / PRD [AGENT: doc-generator] + |- Delegate to Task(subagent_type="doc-generator") + |- Agent reads: phases/03-requirements.md + |- Agent executes: Gemini expansion + Codex review (Step 2.5) + priority sorting + |- Agent writes: requirements/ directory (_index.md + REQ-*.md + NFR-*.md) + |- Agent returns: JSON summary {files_created, codex_review_integrated, key_decisions} + |- Orchestrator validates: directory exists, file count matches + +Phase 4: Architecture [AGENT: doc-generator] + |- Delegate to Task(subagent_type="doc-generator") + |- Agent reads: phases/04-architecture.md + |- Agent executes: Gemini analysis + Codex review + codebase mapping + |- Agent writes: architecture/ directory (_index.md + ADR-*.md) + |- Agent returns: JSON summary {files_created, codex_review_rating, key_decisions} + |- Orchestrator validates: directory exists, ADR files present + +Phase 5: Epics & Stories [AGENT: doc-generator] + |- Delegate to Task(subagent_type="doc-generator") + |- Agent reads: phases/05-epics-stories.md + |- Agent executes: Gemini decomposition + Codex review (Step 2.5) + validation + |- Agent writes: epics/ directory (_index.md + EPIC-*.md) + |- Agent returns: JSON summary {files_created, codex_review_integrated, mvp_epic_count} + |- Orchestrator validates: directory exists, MVP epics present + +Phase 6: Readiness Check [INLINE + ENHANCED] + |- Ref: phases/06-readiness-check.md + |- Gemini CLI: cross-document validation (completeness, consistency, traceability) + |- Codex CLI: technical depth review (ADR quality, data model, security, observability) + |- Per-requirement verification: iterate all REQ-*.md / NFR-*.md + | |- Check: AC exists + testable, Brief trace, Story coverage, Arch coverage + | |- Generate: Per-Requirement Verification table + |- Merge dual CLI scores into quality report + |- Output: readiness-report.md, spec-summary.md + |- Handoff options: Phase 7 (issue export), lite-plan, req-plan, plan, iterate + +Phase 6.5: Auto-Fix (conditional) [AGENT: doc-generator] + |- Delegate to Task(subagent_type="doc-generator") + |- Agent reads: phases/06-5-auto-fix.md + readiness-report.md + |- Agent executes: fix affected Phase 2-5 documents + |- Agent returns: JSON summary {files_modified, issues_fixed, phases_touched} + |- Re-run Phase 6 validation + |- Max 2 iterations, then force handoff + +Phase 7: Issue Export [INLINE] + |- Ref: phases/07-issue-export.md + |- Read EPIC-*.md files, assign waves (MVP→wave-1, others→wave-2) + |- Create issues via ccw issue create (one per Epic) + |- Map Epic dependencies to issue dependencies + |- Generate issue-export-report.md + |- Update spec-config.json with issue_ids + |- Handoff: team-planex, wave-1 only, view issues, done + +Complete: Full specification package with issues ready for execution + +Phase 6/7 → Handoff Bridge (conditional, based on user selection): + ├─ team-planex: Execute issues via coordinated team workflow + ├─ lite-plan: Extract first MVP Epic description → direct text input + ├─ plan / req-plan: Create WFS session + .brainstorming/ bridge files + │ ├─ guidance-specification.md (synthesized from spec outputs) + │ ├─ feature-specs/feature-index.json (Epic → Feature mapping) + │ └─ feature-specs/F-{num}-{slug}.md (one per Epic) + └─ context-search-agent auto-discovers .brainstorming/ + → context-package.json.brainstorm_artifacts populated + → action-planning-agent consumes: guidance_spec (P1) → feature_index (P2) +``` + +## Directory Setup + +```javascript +// Session ID generation +const slug = topic.toLowerCase().replace(/[^a-z0-9\u4e00-\u9fff]+/g, '-').slice(0, 40); +const date = new Date().toISOString().slice(0, 10); +const sessionId = `SPEC-${slug}-${date}`; +const workDir = `.workflow/.spec/${sessionId}`; + +Bash(`mkdir -p "${workDir}"`); +``` + +## Output Structure + +``` +.workflow/.spec/SPEC-{slug}-{YYYY-MM-DD}/ +├── spec-config.json # Session configuration + phase state +├── discovery-context.json # Codebase exploration results (optional) +├── refined-requirements.json # Phase 1.5: Confirmed requirements after discussion +├── glossary.json # Phase 2: Terminology glossary for cross-doc consistency +├── product-brief.md # Phase 2: Product brief +├── requirements/ # Phase 3: Detailed PRD (directory) +│ ├── _index.md # Summary, MoSCoW table, traceability, links +│ ├── REQ-NNN-{slug}.md # Individual functional requirement +│ └── NFR-{type}-NNN-{slug}.md # Individual non-functional requirement +├── architecture/ # Phase 4: Architecture decisions (directory) +│ ├── _index.md # Overview, components, tech stack, links +│ └── ADR-NNN-{slug}.md # Individual Architecture Decision Record +├── epics/ # Phase 5: Epic/Story breakdown (directory) +│ ├── _index.md # Epic table, dependency map, MVP scope +│ └── EPIC-NNN-{slug}.md # Individual Epic with Stories +├── readiness-report.md # Phase 6: Quality report (+ per-req verification table) +├── spec-summary.md # Phase 6: One-page executive summary +└── issue-export-report.md # Phase 7: Issue mapping table + spec links +``` + +## State Management + +**spec-config.json** serves as core state file: +```json +{ + "session_id": "SPEC-xxx-2026-02-11", + "seed_input": "User input text", + "input_type": "text", + "timestamp": "ISO8601", + "mode": "interactive", + "complexity": "moderate", + "depth": "standard", + "focus_areas": [], + "spec_type": "service", + "iteration_count": 0, + "iteration_history": [], + "seed_analysis": { + "problem_statement": "...", + "target_users": [], + "domain": "...", + "constraints": [], + "dimensions": [] + }, + "has_codebase": false, + "refined_requirements_file": "refined-requirements.json", + "issue_ids": [], + "issues_created": 0, + "phasesCompleted": [ + { "phase": 1, "name": "discovery", "output_file": "spec-config.json", "completed_at": "ISO8601" }, + { "phase": 1.5, "name": "requirement-clarification", "output_file": "refined-requirements.json", "discussion_rounds": 2, "completed_at": "ISO8601" }, + { "phase": 3, "name": "requirements", "output_dir": "requirements/", "output_index": "requirements/_index.md", "file_count": 8, "completed_at": "ISO8601" } + ] +} +``` + +**Resume mechanism**: `-c|--continue` flag reads `spec-config.json.phasesCompleted`, resumes from first incomplete phase. + +## Core Rules + +1. **Start Immediately**: First action is TaskCreate initialization, then Phase 0 (spec study), then Phase 1 +2. **Progressive Phase Loading**: Read phase docs ONLY when that phase is about to execute +3. **Auto-Continue**: All phases run autonomously; check TaskList to execute next pending phase +4. **Parse Every Output**: Extract required data from each phase for next phase context +5. **DO NOT STOP**: Continuous 7-phase pipeline until all phases complete or user exits +6. **Respect -y Flag**: When auto mode, skip all AskUserQuestion calls, use recommended defaults +7. **Respect -c Flag**: When continue mode, load spec-config.json and resume from checkpoint +8. **Inject Glossary**: From Phase 3 onward, inject glossary.json terms into every CLI prompt +9. **Load Profile**: Read templates/profiles/{spec_type}-profile.md and inject requirements into Phase 2-5 prompts +10. **Iterate on Failure**: When Phase 6 score < 60%, auto-trigger Phase 6.5 (max 2 iterations) +11. **Agent Delegation**: Phase 2-5 and 6.5 MUST be delegated to `doc-generator` agents via Task tool — never execute inline +12. **Lean Context**: Orchestrator only sees agent return summaries (JSON), never the full document content +13. **Validate Agent Output**: After each agent returns, verify files exist on disk and spec-config.json was updated + +## Agent Delegation Protocol + +For Phase 2-5 and 6.5, the orchestrator delegates to a `doc-generator` agent via the Task tool. The orchestrator builds a lean context envelope — passing only paths, never file content. + +### Context Envelope Template + +```javascript +Task({ + subagent_type: "doc-generator", + run_in_background: false, + description: `Spec Phase ${N}: ${phaseName}`, + prompt: ` +## Spec Generator - Phase ${N}: ${phaseName} + +### Session +- ID: ${sessionId} +- Work Dir: ${workDir} +- Auto Mode: ${autoMode} +- Spec Type: ${specType} + +### Input (read from disk) +${inputFilesList} // Only file paths — agent reads content itself + +### Instructions +Read: ${skillDir}/phases/${phaseFile} // Agent reads the phase doc for full instructions +Apply template: ${skillDir}/templates/${templateFile} + +### Glossary (Phase 3+ only) +Read: ${workDir}/glossary.json + +### Output +Write files to: ${workDir}/${outputPath} +Update: ${workDir}/spec-config.json (phasesCompleted) +Return: JSON summary { files_created, quality_notes, key_decisions } +` +}); +``` + +### Orchestrator Post-Agent Validation + +After each agent returns: + +```javascript +// 1. Parse agent return summary +const summary = JSON.parse(agentResult); + +// 2. Validate files exist +summary.files_created.forEach(file => { + const exists = Glob(`${workDir}/${file}`); + if (!exists.length) throw new Error(`Agent claimed to create ${file} but file not found`); +}); + +// 3. Verify spec-config.json updated +const config = JSON.parse(Read(`${workDir}/spec-config.json`)); +const phaseComplete = config.phasesCompleted.some(p => p.phase === N); +if (!phaseComplete) throw new Error(`Agent did not update phasesCompleted for Phase ${N}`); + +// 4. Store summary for downstream context (do NOT read full documents) +phasesSummaries[N] = summary; +``` + +--- + +## Reference Documents by Phase + +### Phase 1: Discovery +| Document | Purpose | When to Use | +|----------|---------|-------------| +| [phases/01-discovery.md](phases/01-discovery.md) | Seed analysis and session setup | Phase start | +| [templates/profiles/](templates/profiles/) | Spec type profiles | Spec type selection | +| [specs/document-standards.md](specs/document-standards.md) | Frontmatter format for spec-config.json | Config generation | + +### Phase 1.5: Requirement Expansion & Clarification +| Document | Purpose | When to Use | +|----------|---------|-------------| +| [phases/01-5-requirement-clarification.md](phases/01-5-requirement-clarification.md) | Interactive requirement discussion workflow | Phase start | +| [specs/quality-gates.md](specs/quality-gates.md) | Quality criteria for refined requirements | Validation | + +### Phase 2: Product Brief +| Document | Purpose | When to Use | +|----------|---------|-------------| +| [phases/02-product-brief.md](phases/02-product-brief.md) | Multi-CLI analysis orchestration | Phase start | +| [templates/product-brief.md](templates/product-brief.md) | Document template | Document generation | +| [specs/glossary-template.json](specs/glossary-template.json) | Glossary schema | Glossary generation | + +### Phase 3: Requirements +| Document | Purpose | When to Use | +|----------|---------|-------------| +| [phases/03-requirements.md](phases/03-requirements.md) | PRD generation workflow | Phase start | +| [templates/requirements-prd.md](templates/requirements-prd.md) | Document template | Document generation | + +### Phase 4: Architecture +| Document | Purpose | When to Use | +|----------|---------|-------------| +| [phases/04-architecture.md](phases/04-architecture.md) | Architecture decision workflow | Phase start | +| [templates/architecture-doc.md](templates/architecture-doc.md) | Document template | Document generation | + +### Phase 5: Epics & Stories +| Document | Purpose | When to Use | +|----------|---------|-------------| +| [phases/05-epics-stories.md](phases/05-epics-stories.md) | Epic/Story decomposition | Phase start | +| [templates/epics-template.md](templates/epics-template.md) | Document template | Document generation | + +### Phase 6: Readiness Check +| Document | Purpose | When to Use | +|----------|---------|-------------| +| [phases/06-readiness-check.md](phases/06-readiness-check.md) | Cross-document validation | Phase start | +| [specs/quality-gates.md](specs/quality-gates.md) | Quality scoring criteria | Validation | + +### Phase 6.5: Auto-Fix +| Document | Purpose | When to Use | +|----------|---------|-------------| +| [phases/06-5-auto-fix.md](phases/06-5-auto-fix.md) | Auto-fix workflow for readiness issues | When Phase 6 score < 60% | +| [specs/quality-gates.md](specs/quality-gates.md) | Iteration exit criteria | Validation | + +### Phase 7: Issue Export +| Document | Purpose | When to Use | +|----------|---------|-------------| +| [phases/07-issue-export.md](phases/07-issue-export.md) | Epic→Issue mapping and export | Phase start | +| [specs/quality-gates.md](specs/quality-gates.md) | Issue export quality criteria | Validation | + +### Debugging & Troubleshooting +| Issue | Solution Document | +|-------|-------------------| +| Phase execution failed | Refer to the relevant Phase documentation | +| Output does not meet expectations | [specs/quality-gates.md](specs/quality-gates.md) | +| Document format issues | [specs/document-standards.md](specs/document-standards.md) | + +## Error Handling + +| Phase | Error | Blocking? | Action | +|-------|-------|-----------|--------| +| Phase 1 | Empty input | Yes | Error and exit | +| Phase 1 | CLI seed analysis fails | No | Use basic parsing fallback | +| Phase 1.5 | Gap analysis CLI fails | No | Skip to user questions with basic prompts | +| Phase 1.5 | User skips discussion | No | Proceed with seed_analysis as-is | +| Phase 1.5 | Max rounds reached (5) | No | Force confirmation with current state | +| Phase 2 | Single CLI perspective fails | No | Continue with available perspectives | +| Phase 2 | All CLI calls fail | No | Generate basic brief from seed analysis | +| Phase 3 | Gemini CLI fails | No | Use codex fallback | +| Phase 4 | Architecture review fails | No | Skip review, proceed with initial analysis | +| Phase 5 | Story generation fails | No | Generate epics without detailed stories | +| Phase 6 | Validation CLI fails | No | Generate partial report with available data | +| Phase 6.5 | Auto-fix CLI fails | No | Log failure, proceed to handoff with Review status | +| Phase 6.5 | Max iterations reached | No | Force handoff, report remaining issues | +| Phase 7 | ccw issue create fails for one Epic | No | Log error, continue with remaining Epics | +| Phase 7 | No EPIC files found | Yes | Error and return to Phase 5 | +| Phase 7 | All issue creations fail | Yes | Error with CLI diagnostic, suggest manual creation | +| Phase 2-5 | Agent fails to return | Yes | Retry once, then fall back to inline execution | +| Phase 2-5 | Agent returns incomplete files | No | Log gaps, attempt inline completion for missing files | + +### CLI Fallback Chain + +Gemini -> Codex -> Claude -> degraded mode (local analysis only) diff --git a/.codex/skills/spec-generator/phases/01-5-requirement-clarification.md b/.codex/skills/spec-generator/phases/01-5-requirement-clarification.md new file mode 100644 index 00000000..d185febc --- /dev/null +++ b/.codex/skills/spec-generator/phases/01-5-requirement-clarification.md @@ -0,0 +1,404 @@ +# Phase 1.5: Requirement Expansion & Clarification + +在进入正式文档生成前,通过多轮交互讨论对原始需求进行深度挖掘、扩展和确认。 + +## Objective + +- 识别原始需求中的模糊点、遗漏和潜在风险 +- 通过 CLI 辅助分析需求完整性,生成深度探测问题 +- 支持多轮交互讨论,逐步细化需求 +- 生成经用户确认的 `refined-requirements.json` 作为后续阶段的高质量输入 + +## Input + +- Dependency: `{workDir}/spec-config.json` (Phase 1 output) +- Optional: `{workDir}/discovery-context.json` (codebase context) + +## Execution Steps + +### Step 1: Load Phase 1 Context + +```javascript +const specConfig = JSON.parse(Read(`${workDir}/spec-config.json`)); +const { seed_analysis, seed_input, focus_areas, has_codebase, depth } = specConfig; + +let discoveryContext = null; +if (has_codebase) { + try { + discoveryContext = JSON.parse(Read(`${workDir}/discovery-context.json`)); + } catch (e) { /* proceed without */ } +} +``` + +### Step 2: CLI Gap Analysis & Question Generation + +调用 Gemini CLI 分析原始需求的完整性,识别模糊点并生成探测问题。 + +```javascript +Bash({ + command: `ccw cli -p "PURPOSE: 深度分析用户的初始需求,识别模糊点、遗漏和需要澄清的领域。 +Success: 生成 3-5 个高质量的探测问题,覆盖功能范围、边界条件、非功能性需求、用户场景等维度。 + +ORIGINAL SEED INPUT: +${seed_input} + +SEED ANALYSIS: +${JSON.stringify(seed_analysis, null, 2)} + +FOCUS AREAS: ${focus_areas.join(', ')} +${discoveryContext ? ` +CODEBASE CONTEXT: +- Existing patterns: ${discoveryContext.existing_patterns?.slice(0,5).join(', ') || 'none'} +- Tech stack: ${JSON.stringify(discoveryContext.tech_stack || {})} +` : ''} + +TASK: +1. 评估当前需求描述的完整性(1-10 分,列出缺失维度) +2. 识别 3-5 个关键模糊区域,每个区域包含: + - 模糊点描述(为什么不清楚) + - 1-2 个开放式探测问题 + - 1-2 个扩展建议(基于领域最佳实践) +3. 检查以下维度是否有遗漏: + - 功能范围边界(什么在范围内/外?) + - 核心用户场景和流程 + - 非功能性需求(性能、安全、可用性、可扩展性) + - 集成点和外部依赖 + - 数据模型和存储需求 + - 错误处理和异常场景 +4. 基于领域经验提供需求扩展建议 + +MODE: analysis +EXPECTED: JSON output: +{ + \"completeness_score\": 7, + \"missing_dimensions\": [\"Performance requirements\", \"Error handling\"], + \"clarification_areas\": [ + { + \"area\": \"Scope boundary\", + \"rationale\": \"Input does not clarify...\", + \"questions\": [\"Question 1?\", \"Question 2?\"], + \"suggestions\": [\"Suggestion 1\", \"Suggestion 2\"] + } + ], + \"expansion_recommendations\": [ + { + \"category\": \"Non-functional\", + \"recommendation\": \"Consider adding...\", + \"priority\": \"high|medium|low\" + } + ] +} +CONSTRAINTS: 问题必须是开放式的,建议必须具体可执行,使用用户输入的语言 +" --tool gemini --mode analysis`, + run_in_background: true +}); +// Wait for CLI result before continuing +``` + +解析 CLI 输出为结构化数据: +```javascript +const gapAnalysis = { + completeness_score: 0, + missing_dimensions: [], + clarification_areas: [], + expansion_recommendations: [] +}; +// Parse from CLI output +``` + +### Step 3: Interactive Discussion Loop + +核心多轮交互循环。每轮:展示分析结果 → 用户回应 → 更新需求状态 → 判断是否继续。 + +```javascript +// Initialize requirement state +let requirementState = { + problem_statement: seed_analysis.problem_statement, + target_users: seed_analysis.target_users, + domain: seed_analysis.domain, + constraints: seed_analysis.constraints, + confirmed_features: [], + non_functional_requirements: [], + boundary_conditions: [], + integration_points: [], + key_assumptions: [], + discussion_rounds: 0 +}; + +let discussionLog = []; +let userSatisfied = false; + +// === Round 1: Present gap analysis results === +// Display completeness_score, clarification_areas, expansion_recommendations +// Then ask user to respond + +while (!userSatisfied && requirementState.discussion_rounds < 5) { + requirementState.discussion_rounds++; + + if (requirementState.discussion_rounds === 1) { + // --- First round: present initial gap analysis --- + // Format questions and suggestions from gapAnalysis for display + // Present as a structured summary to the user + + AskUserQuestion({ + questions: [ + { + question: buildDiscussionPrompt(gapAnalysis, requirementState), + header: "Req Expand", + multiSelect: false, + options: [ + { label: "I'll answer", description: "I have answers/feedback to provide (type in 'Other')" }, + { label: "Accept all suggestions", description: "Accept all expansion recommendations as-is" }, + { label: "Skip to generation", description: "Requirements are clear enough, proceed directly" } + ] + } + ] + }); + } else { + // --- Subsequent rounds: refine based on user feedback --- + // Call CLI with accumulated context for follow-up analysis + Bash({ + command: `ccw cli -p "PURPOSE: 基于用户最新回应,更新需求理解,识别剩余模糊点。 + +CURRENT REQUIREMENT STATE: +${JSON.stringify(requirementState, null, 2)} + +DISCUSSION HISTORY: +${JSON.stringify(discussionLog, null, 2)} + +USER'S LATEST RESPONSE: +${lastUserResponse} + +TASK: +1. 将用户回应整合到需求状态中 +2. 识别 1-3 个仍需澄清或可扩展的领域 +3. 生成后续问题(如有必要) +4. 如果需求已充分,输出最终需求摘要 + +MODE: analysis +EXPECTED: JSON output: +{ + \"updated_fields\": { /* fields to merge into requirementState */ }, + \"status\": \"need_more_discussion\" | \"ready_for_confirmation\", + \"follow_up\": { + \"remaining_areas\": [{\"area\": \"...\", \"questions\": [\"...\"]}], + \"summary\": \"...\" + } +} +CONSTRAINTS: 避免重复已回答的问题,聚焦未覆盖的领域 +" --tool gemini --mode analysis`, + run_in_background: true + }); + // Wait for CLI result, parse and continue + + // If status === "ready_for_confirmation", break to confirmation step + // If status === "need_more_discussion", present follow-up questions + + AskUserQuestion({ + questions: [ + { + question: buildFollowUpPrompt(followUpAnalysis, requirementState), + header: "Follow-up", + multiSelect: false, + options: [ + { label: "I'll answer", description: "I have more feedback (type in 'Other')" }, + { label: "Looks good", description: "Requirements are sufficiently clear now" }, + { label: "Accept suggestions", description: "Accept remaining suggestions" } + ] + } + ] + }); + } + + // Process user response + // - "Skip to generation" / "Looks good" → userSatisfied = true + // - "Accept all suggestions" → merge suggestions into requirementState, userSatisfied = true + // - "I'll answer" (with Other text) → record in discussionLog, continue loop + // - User selects Other with custom text → parse and record + + discussionLog.push({ + round: requirementState.discussion_rounds, + agent_prompt: currentPrompt, + user_response: userResponse, + timestamp: new Date().toISOString() + }); +} +``` + +#### Helper: Build Discussion Prompt + +```javascript +function buildDiscussionPrompt(gapAnalysis, state) { + let prompt = `## Requirement Analysis Results\n\n`; + prompt += `**Completeness Score**: ${gapAnalysis.completeness_score}/10\n`; + + if (gapAnalysis.missing_dimensions.length > 0) { + prompt += `**Missing Dimensions**: ${gapAnalysis.missing_dimensions.join(', ')}\n\n`; + } + + prompt += `### Key Questions\n\n`; + gapAnalysis.clarification_areas.forEach((area, i) => { + prompt += `**${i+1}. ${area.area}**\n`; + prompt += ` ${area.rationale}\n`; + area.questions.forEach(q => { prompt += ` - ${q}\n`; }); + if (area.suggestions.length > 0) { + prompt += ` Suggestions: ${area.suggestions.join('; ')}\n`; + } + prompt += `\n`; + }); + + if (gapAnalysis.expansion_recommendations.length > 0) { + prompt += `### Expansion Recommendations\n\n`; + gapAnalysis.expansion_recommendations.forEach(rec => { + prompt += `- [${rec.priority}] **${rec.category}**: ${rec.recommendation}\n`; + }); + } + + prompt += `\nPlease answer the questions above, or choose an option below.`; + return prompt; +} +``` + +### Step 4: Auto Mode Handling + +```javascript +if (autoMode) { + // Skip interactive discussion + // CLI generates default requirement expansion based on seed_analysis + Bash({ + command: `ccw cli -p "PURPOSE: 基于种子分析自动生成需求扩展,无需用户交互。 + +SEED ANALYSIS: +${JSON.stringify(seed_analysis, null, 2)} + +SEED INPUT: ${seed_input} +DEPTH: ${depth} +${discoveryContext ? `CODEBASE: ${JSON.stringify(discoveryContext.tech_stack || {})}` : ''} + +TASK: +1. 基于领域最佳实践,自动扩展功能需求清单 +2. 推断合理的非功能性需求 +3. 识别明显的边界条件 +4. 列出关键假设 + +MODE: analysis +EXPECTED: JSON output matching refined-requirements.json schema +CONSTRAINTS: 保守推断,只添加高置信度的扩展 +" --tool gemini --mode analysis`, + run_in_background: true + }); + // Parse output directly into refined-requirements.json +} +``` + +### Step 5: Generate Requirement Confirmation Summary + +在写入文件前,向用户展示最终的需求确认摘要(非 auto mode)。 + +```javascript +if (!autoMode) { + // Build confirmation summary from requirementState + const summary = buildConfirmationSummary(requirementState); + + AskUserQuestion({ + questions: [ + { + question: `## Requirement Confirmation\n\n${summary}\n\nConfirm and proceed to specification generation?`, + header: "Confirm", + multiSelect: false, + options: [ + { label: "Confirm & proceed", description: "Requirements confirmed, start spec generation" }, + { label: "Need adjustments", description: "Go back and refine further" } + ] + } + ] + }); + + // If "Need adjustments" → loop back to Step 3 + // If "Confirm & proceed" → continue to Step 6 +} +``` + +### Step 6: Write refined-requirements.json + +```javascript +const refinedRequirements = { + session_id: specConfig.session_id, + phase: "1.5", + generated_at: new Date().toISOString(), + source: autoMode ? "auto-expansion" : "interactive-discussion", + discussion_rounds: requirementState.discussion_rounds, + + // Core requirement content + clarified_problem_statement: requirementState.problem_statement, + confirmed_target_users: requirementState.target_users.map(u => + typeof u === 'string' ? { name: u, needs: [], pain_points: [] } : u + ), + confirmed_domain: requirementState.domain, + + confirmed_features: requirementState.confirmed_features.map(f => ({ + name: f.name, + description: f.description, + acceptance_criteria: f.acceptance_criteria || [], + edge_cases: f.edge_cases || [], + priority: f.priority || "unset" + })), + + non_functional_requirements: requirementState.non_functional_requirements.map(nfr => ({ + type: nfr.type, // Performance, Security, Usability, Scalability, etc. + details: nfr.details, + measurable_criteria: nfr.measurable_criteria || "" + })), + + boundary_conditions: { + in_scope: requirementState.boundary_conditions.filter(b => b.scope === 'in'), + out_of_scope: requirementState.boundary_conditions.filter(b => b.scope === 'out'), + constraints: requirementState.constraints + }, + + integration_points: requirementState.integration_points, + key_assumptions: requirementState.key_assumptions, + + // Traceability + discussion_log: autoMode ? [] : discussionLog +}; + +Write(`${workDir}/refined-requirements.json`, JSON.stringify(refinedRequirements, null, 2)); +``` + +### Step 7: Update spec-config.json + +```javascript +specConfig.refined_requirements_file = "refined-requirements.json"; +specConfig.phasesCompleted.push({ + phase: 1.5, + name: "requirement-clarification", + output_file: "refined-requirements.json", + discussion_rounds: requirementState.discussion_rounds, + completed_at: new Date().toISOString() +}); + +Write(`${workDir}/spec-config.json`, JSON.stringify(specConfig, null, 2)); +``` + +## Output + +- **File**: `refined-requirements.json` +- **Format**: JSON +- **Updated**: `spec-config.json` (added `refined_requirements_file` field and phase 1.5 to `phasesCompleted`) + +## Quality Checklist + +- [ ] Problem statement refined (>= 30 characters, more specific than seed) +- [ ] At least 2 confirmed features with descriptions +- [ ] At least 1 non-functional requirement identified +- [ ] Boundary conditions defined (in-scope + out-of-scope) +- [ ] Key assumptions listed (>= 1) +- [ ] Discussion rounds recorded (>= 1 in interactive mode) +- [ ] User explicitly confirmed requirements (non-auto mode) +- [ ] `refined-requirements.json` written with valid JSON +- [ ] `spec-config.json` updated with phase 1.5 completion + +## Next Phase + +Proceed to [Phase 2: Product Brief](02-product-brief.md). Phase 2 should load `refined-requirements.json` as primary input instead of relying solely on `spec-config.json.seed_analysis`. diff --git a/.codex/skills/spec-generator/phases/01-discovery.md b/.codex/skills/spec-generator/phases/01-discovery.md new file mode 100644 index 00000000..b7b5cf76 --- /dev/null +++ b/.codex/skills/spec-generator/phases/01-discovery.md @@ -0,0 +1,257 @@ +# Phase 1: Discovery + +Parse input, analyze the seed idea, optionally explore codebase, establish session configuration. + +## Objective + +- Generate session ID and create output directory +- Parse user input (text description or file reference) +- Analyze seed via Gemini CLI to extract problem space dimensions +- Conditionally explore codebase for existing patterns and constraints +- Gather user preferences (depth, focus areas) via interactive confirmation +- Write `spec-config.json` as the session state file + +## Input + +- Dependency: `$ARGUMENTS` (user input from command) +- Flags: `-y` (auto mode), `-c` (continue mode) + +## Execution Steps + +### Step 1: Session Initialization + +```javascript +// Parse arguments +const args = $ARGUMENTS; +const autoMode = args.includes('-y') || args.includes('--yes'); +const continueMode = args.includes('-c') || args.includes('--continue'); + +// Extract the idea/topic (remove flags) +const idea = args.replace(/(-y|--yes|-c|--continue)\s*/g, '').trim(); + +// Generate session ID +const slug = idea.toLowerCase() + .replace(/[^a-z0-9\u4e00-\u9fff]+/g, '-') + .replace(/^-|-$/g, '') + .slice(0, 40); +const date = new Date().toISOString().slice(0, 10); +const sessionId = `SPEC-${slug}-${date}`; +const workDir = `.workflow/.spec/${sessionId}`; + +// Check for continue mode +if (continueMode) { + // Find existing session + const existingSessions = Glob('.workflow/.spec/SPEC-*/spec-config.json'); + // If slug matches an existing session, load it and resume + // Read spec-config.json, find first incomplete phase, jump to that phase + return; // Resume logic handled by orchestrator +} + +// Create output directory +Bash(`mkdir -p "${workDir}"`); +``` + +### Step 2: Input Parsing + +```javascript +// Determine input type +if (idea.startsWith('@') || idea.endsWith('.md') || idea.endsWith('.txt')) { + // File reference - read and extract content + const filePath = idea.replace(/^@/, ''); + const fileContent = Read(filePath); + // Use file content as the seed + inputType = 'file'; + seedInput = fileContent; +} else { + // Direct text description + inputType = 'text'; + seedInput = idea; +} +``` + +### Step 3: Seed Analysis via Gemini CLI + +```javascript +Bash({ + command: `ccw cli -p "PURPOSE: Analyze this seed idea/requirement to extract structured problem space dimensions. +Success: Clear problem statement, target users, domain identification, 3-5 exploration dimensions. + +SEED INPUT: +${seedInput} + +TASK: +- Extract a clear problem statement (what problem does this solve?) +- Identify target users (who benefits?) +- Determine the domain (technical, business, consumer, etc.) +- List constraints (budget, time, technical, regulatory) +- Generate 3-5 exploration dimensions (key areas to investigate) +- Assess complexity: simple (1-2 components), moderate (3-5 components), complex (6+ components) + +MODE: analysis +EXPECTED: JSON output with fields: problem_statement, target_users[], domain, constraints[], dimensions[], complexity +CONSTRAINTS: Be specific and actionable, not vague +" --tool gemini --mode analysis`, + run_in_background: true +}); +// Wait for CLI result before continuing +``` + +Parse the CLI output into structured `seedAnalysis`: +```javascript +const seedAnalysis = { + problem_statement: "...", + target_users: ["..."], + domain: "...", + constraints: ["..."], + dimensions: ["..."] +}; +const complexity = "moderate"; // from CLI output +``` + +### Step 4: Codebase Exploration (Conditional) + +```javascript +// Detect if running inside a project with code +const hasCodebase = Glob('**/*.{ts,js,py,java,go,rs}').length > 0 + || Glob('package.json').length > 0 + || Glob('Cargo.toml').length > 0; + +if (hasCodebase) { + Agent({ + subagent_type: "cli-explore-agent", + run_in_background: false, + description: `Explore codebase for spec: ${slug}`, + prompt: ` +## Spec Generator Context +Topic: ${seedInput} +Dimensions: ${seedAnalysis.dimensions.join(', ')} +Session: ${workDir} + +## MANDATORY FIRST STEPS +1. Search for code related to topic keywords +2. Read project config files (package.json, pyproject.toml, etc.) if they exist + +## Exploration Focus +- Identify existing implementations related to the topic +- Find patterns that could inform architecture decisions +- Map current architecture constraints +- Locate integration points and dependencies + +## Output +Write findings to: ${workDir}/discovery-context.json + +Schema: +{ + "relevant_files": [{"path": "...", "relevance": "high|medium|low", "rationale": "..."}], + "existing_patterns": ["pattern descriptions"], + "architecture_constraints": ["constraint descriptions"], + "integration_points": ["integration point descriptions"], + "tech_stack": {"languages": [], "frameworks": [], "databases": []}, + "_metadata": { "exploration_type": "spec-discovery", "timestamp": "ISO8601" } +} +` + }); +} +``` + +### Step 5: User Confirmation (Interactive) + +```javascript +if (!autoMode) { + // Confirm problem statement and select depth + AskUserQuestion({ + questions: [ + { + question: `Problem statement: "${seedAnalysis.problem_statement}" - Is this accurate?`, + header: "Problem", + multiSelect: false, + options: [ + { label: "Accurate", description: "Proceed with this problem statement" }, + { label: "Needs adjustment", description: "I'll refine the problem statement" } + ] + }, + { + question: "What specification depth do you need?", + header: "Depth", + multiSelect: false, + options: [ + { label: "Light", description: "Quick overview - key decisions only" }, + { label: "Standard (Recommended)", description: "Balanced detail for most projects" }, + { label: "Comprehensive", description: "Maximum detail for complex/critical projects" } + ] + }, + { + question: "Which areas should we focus on?", + header: "Focus", + multiSelect: true, + options: seedAnalysis.dimensions.map(d => ({ label: d, description: `Explore ${d} in depth` })) + }, + { + question: "What type of specification is this?", + header: "Spec Type", + multiSelect: false, + options: [ + { label: "Service (Recommended)", description: "Long-running service with lifecycle, state machine, observability" }, + { label: "API", description: "REST/GraphQL API with endpoints, auth, rate limiting" }, + { label: "Library/SDK", description: "Reusable package with public API surface, examples" }, + { label: "Platform", description: "Multi-component system, uses Service profile" } + ] + } + ] + }); +} else { + // Auto mode defaults + depth = "standard"; + focusAreas = seedAnalysis.dimensions; + specType = "service"; // default for auto mode +} +``` + +### Step 6: Write spec-config.json + +```javascript +const specConfig = { + session_id: sessionId, + seed_input: seedInput, + input_type: inputType, + timestamp: new Date().toISOString(), + mode: autoMode ? "auto" : "interactive", + complexity: complexity, + depth: depth, + focus_areas: focusAreas, + seed_analysis: seedAnalysis, + has_codebase: hasCodebase, + spec_type: specType, // "service" | "api" | "library" | "platform" + iteration_count: 0, + iteration_history: [], + phasesCompleted: [ + { + phase: 1, + name: "discovery", + output_file: "spec-config.json", + completed_at: new Date().toISOString() + } + ] +}; + +Write(`${workDir}/spec-config.json`, JSON.stringify(specConfig, null, 2)); +``` + +## Output + +- **File**: `spec-config.json` +- **File**: `discovery-context.json` (optional, if codebase detected) +- **Format**: JSON + +## Quality Checklist + +- [ ] Session ID matches `SPEC-{slug}-{date}` format +- [ ] Problem statement exists and is >= 20 characters +- [ ] Target users identified (>= 1) +- [ ] 3-5 exploration dimensions generated +- [ ] spec-config.json written with all required fields +- [ ] Output directory created + +## Next Phase + +Proceed to [Phase 2: Product Brief](02-product-brief.md) with the generated spec-config.json. diff --git a/.codex/skills/spec-generator/phases/02-product-brief.md b/.codex/skills/spec-generator/phases/02-product-brief.md new file mode 100644 index 00000000..8a758867 --- /dev/null +++ b/.codex/skills/spec-generator/phases/02-product-brief.md @@ -0,0 +1,298 @@ +# Phase 2: Product Brief + +> **Execution Mode: Agent Delegated** +> This phase is executed by a `doc-generator` agent. The orchestrator (SKILL.md) passes session context via the Task tool. The agent reads this file for instructions, executes all steps, writes output files, and returns a JSON summary. + +Generate a product brief through multi-perspective CLI analysis, establishing "what" and "why". + +## Objective + +- Read Phase 1 outputs (spec-config.json, discovery-context.json) +- Launch 3 parallel CLI analyses from product, technical, and user perspectives +- Synthesize convergent themes and conflicting views +- Optionally refine with user input +- Generate product-brief.md using template + +## Input + +- Dependency: `{workDir}/spec-config.json` +- Primary: `{workDir}/refined-requirements.json` (Phase 1.5 output, preferred over raw seed_analysis) +- Optional: `{workDir}/discovery-context.json` +- Config: `{workDir}/spec-config.json` +- Template: `templates/product-brief.md` + +## Execution Steps + +### Step 1: Load Phase 1 Context + +```javascript +const specConfig = JSON.parse(Read(`${workDir}/spec-config.json`)); +const { seed_analysis, seed_input, has_codebase, depth, focus_areas } = specConfig; + +// Load refined requirements (Phase 1.5 output) - preferred over raw seed_analysis +let refinedReqs = null; +try { + refinedReqs = JSON.parse(Read(`${workDir}/refined-requirements.json`)); +} catch (e) { + // No refined requirements, fall back to seed_analysis +} + +let discoveryContext = null; +if (has_codebase) { + try { + discoveryContext = JSON.parse(Read(`${workDir}/discovery-context.json`)); + } catch (e) { + // No discovery context available, proceed without + } +} + +// Build shared context string for CLI prompts +// Prefer refined requirements over raw seed_analysis +const problem = refinedReqs?.clarified_problem_statement || seed_analysis.problem_statement; +const users = refinedReqs?.confirmed_target_users?.map(u => u.name || u).join(', ') + || seed_analysis.target_users.join(', '); +const domain = refinedReqs?.confirmed_domain || seed_analysis.domain; +const constraints = refinedReqs?.boundary_conditions?.constraints?.join(', ') + || seed_analysis.constraints.join(', '); +const features = refinedReqs?.confirmed_features?.map(f => f.name).join(', ') || ''; +const nfrs = refinedReqs?.non_functional_requirements?.map(n => `${n.type}: ${n.details}`).join('; ') || ''; + +const sharedContext = ` +SEED: ${seed_input} +PROBLEM: ${problem} +TARGET USERS: ${users} +DOMAIN: ${domain} +CONSTRAINTS: ${constraints} +FOCUS AREAS: ${focus_areas.join(', ')} +${features ? `CONFIRMED FEATURES: ${features}` : ''} +${nfrs ? `NON-FUNCTIONAL REQUIREMENTS: ${nfrs}` : ''} +${discoveryContext ? ` +CODEBASE CONTEXT: +- Existing patterns: ${discoveryContext.existing_patterns?.slice(0,5).join(', ') || 'none'} +- Architecture constraints: ${discoveryContext.architecture_constraints?.slice(0,3).join(', ') || 'none'} +- Tech stack: ${JSON.stringify(discoveryContext.tech_stack || {})} +` : ''}`; +``` + +### Step 2: Multi-CLI Parallel Analysis (3 perspectives) + +Launch 3 CLI calls in parallel: + +**Product Perspective (Gemini)**: +```javascript +Bash({ + command: `ccw cli -p "PURPOSE: Product analysis for specification - identify market fit, user value, and success criteria. +Success: Clear vision, measurable goals, competitive positioning. + +${sharedContext} + +TASK: +- Define product vision (1-3 sentences, aspirational) +- Analyze market/competitive landscape +- Define 3-5 measurable success metrics +- Identify scope boundaries (in-scope vs out-of-scope) +- Assess user value proposition +- List assumptions that need validation + +MODE: analysis +EXPECTED: Structured product analysis with: vision, goals with metrics, scope, competitive positioning, assumptions +CONSTRAINTS: Focus on 'what' and 'why', not 'how' +" --tool gemini --mode analysis`, + run_in_background: true +}); +``` + +**Technical Perspective (Codex)**: +```javascript +Bash({ + command: `ccw cli -p "PURPOSE: Technical feasibility analysis for specification - assess implementation viability and constraints. +Success: Clear technical constraints, integration complexity, technology recommendations. + +${sharedContext} + +TASK: +- Assess technical feasibility of the core concept +- Identify technical constraints and blockers +- Evaluate integration complexity with existing systems +- Recommend technology approach (high-level) +- Identify technical risks and dependencies +- Estimate complexity: simple/moderate/complex + +MODE: analysis +EXPECTED: Technical analysis with: feasibility assessment, constraints, integration complexity, tech recommendations, risks +CONSTRAINTS: Focus on feasibility and constraints, not detailed architecture +" --tool codex --mode analysis`, + run_in_background: true +}); +``` + +**User Perspective (Claude)**: +```javascript +Bash({ + command: `ccw cli -p "PURPOSE: User experience analysis for specification - understand user journeys, pain points, and UX considerations. +Success: Clear user personas, journey maps, UX requirements. + +${sharedContext} + +TASK: +- Elaborate user personas with goals and frustrations +- Map primary user journey (happy path) +- Identify key pain points in current experience +- Define UX success criteria +- List accessibility and usability considerations +- Suggest interaction patterns + +MODE: analysis +EXPECTED: User analysis with: personas, journey map, pain points, UX criteria, interaction recommendations +CONSTRAINTS: Focus on user needs and experience, not implementation +" --tool claude --mode analysis`, + run_in_background: true +}); + +// STOP: Wait for all 3 CLI results before continuing +``` + +### Step 3: Synthesize Perspectives + +```javascript +// After receiving all 3 CLI results: +// Extract convergent themes (all agree) +// Identify conflicting views (need resolution) +// Note unique contributions from each perspective + +const synthesis = { + convergent_themes: [], // themes all 3 perspectives agree on + conflicts: [], // areas where perspectives differ + product_insights: [], // unique from product perspective + technical_insights: [], // unique from technical perspective + user_insights: [] // unique from user perspective +}; +``` + +### Step 4: Interactive Refinement (Optional) + +```javascript +if (!autoMode) { + // Present synthesis summary to user + // AskUserQuestion with: + // - Confirm vision statement + // - Resolve any conflicts between perspectives + // - Adjust scope if needed + AskUserQuestion({ + questions: [ + { + question: "Review the synthesized product brief. Any adjustments needed?", + header: "Review", + multiSelect: false, + options: [ + { label: "Looks good", description: "Proceed to PRD generation" }, + { label: "Adjust scope", description: "Narrow or expand the scope" }, + { label: "Revise vision", description: "Refine the vision statement" } + ] + } + ] + }); +} +``` + +### Step 5: Generate product-brief.md + +```javascript +// Read template +const template = Read('templates/product-brief.md'); + +// Fill template with synthesized content +// Apply document-standards.md formatting rules +// Write with YAML frontmatter + +const frontmatter = `--- +session_id: ${specConfig.session_id} +phase: 2 +document_type: product-brief +status: ${autoMode ? 'complete' : 'draft'} +generated_at: ${new Date().toISOString()} +stepsCompleted: ["load-context", "multi-cli-analysis", "synthesis", "generation"] +version: 1 +dependencies: + - spec-config.json +---`; + +// Combine frontmatter + filled template content +Write(`${workDir}/product-brief.md`, `${frontmatter}\n\n${filledContent}`); + +// Update spec-config.json +specConfig.phasesCompleted.push({ + phase: 2, + name: "product-brief", + output_file: "product-brief.md", + completed_at: new Date().toISOString() +}); +Write(`${workDir}/spec-config.json`, JSON.stringify(specConfig, null, 2)); +``` + +### Step 5.5: Generate glossary.json + +```javascript +// Extract terminology from product brief and CLI analysis +// Generate structured glossary for cross-document consistency + +const glossary = { + session_id: specConfig.session_id, + terms: [ + // Extract from product brief content: + // - Key domain nouns from problem statement + // - User persona names + // - Technical terms from multi-perspective synthesis + // Each term should have: + // { term: "...", definition: "...", aliases: [], first_defined_in: "product-brief.md", category: "core|technical|business" } + ] +}; + +Write(`${workDir}/glossary.json`, JSON.stringify(glossary, null, 2)); +``` + +**Glossary Injection**: In all subsequent phase prompts, inject the following into the CONTEXT section: +``` +TERMINOLOGY GLOSSARY (use these terms consistently): +${JSON.stringify(glossary.terms, null, 2)} +``` + +## Output + +- **File**: `product-brief.md` +- **Format**: Markdown with YAML frontmatter + +## Quality Checklist + +- [ ] Vision statement: clear, 1-3 sentences +- [ ] Problem statement: specific and measurable +- [ ] Target users: >= 1 persona with needs +- [ ] Goals: >= 2 with measurable metrics +- [ ] Scope: in-scope and out-of-scope defined +- [ ] Multi-perspective synthesis included +- [ ] YAML frontmatter valid + +## Next Phase + +Proceed to [Phase 3: Requirements](03-requirements.md) with the generated product-brief.md. + +--- + +## Agent Return Summary + +When executed as a delegated agent, return the following JSON summary to the orchestrator: + +```json +{ + "phase": 2, + "status": "complete", + "files_created": ["product-brief.md", "glossary.json"], + "quality_notes": ["list of any quality concerns or deviations"], + "key_decisions": ["list of significant synthesis decisions made"] +} +``` + +The orchestrator will: +1. Validate that listed files exist on disk +2. Read `spec-config.json` to confirm `phasesCompleted` was updated +3. Store the summary for downstream phase context diff --git a/.codex/skills/spec-generator/phases/03-requirements.md b/.codex/skills/spec-generator/phases/03-requirements.md new file mode 100644 index 00000000..1a3c53b1 --- /dev/null +++ b/.codex/skills/spec-generator/phases/03-requirements.md @@ -0,0 +1,248 @@ +# Phase 3: Requirements (PRD) + +> **Execution Mode: Agent Delegated** +> This phase is executed by a `doc-generator` agent. The orchestrator (SKILL.md) passes session context via the Task tool. The agent reads this file for instructions, executes all steps, writes output files, and returns a JSON summary. + +Generate a detailed Product Requirements Document with functional/non-functional requirements, acceptance criteria, and MoSCoW prioritization. + +## Objective + +- Read product-brief.md and extract goals, scope, constraints +- Expand each goal into functional requirements with acceptance criteria +- Generate non-functional requirements +- Apply MoSCoW priority labels (user input or auto) +- Generate requirements.md using template + +## Input + +- Dependency: `{workDir}/product-brief.md` +- Config: `{workDir}/spec-config.json` +- Template: `templates/requirements-prd.md` (directory structure: `_index.md` + `REQ-*.md` + `NFR-*.md`) + +## Execution Steps + +### Step 1: Load Phase 2 Context + +```javascript +const specConfig = JSON.parse(Read(`${workDir}/spec-config.json`)); +const productBrief = Read(`${workDir}/product-brief.md`); + +// Extract key sections from product brief +// - Goals & Success Metrics table +// - Scope (in-scope items) +// - Target Users (personas) +// - Constraints +// - Technical perspective insights +``` + +### Step 2: Requirements Expansion via Gemini CLI + +```javascript +Bash({ + command: `ccw cli -p "PURPOSE: Generate detailed functional and non-functional requirements from product brief. +Success: Complete PRD with testable acceptance criteria for every requirement. + +PRODUCT BRIEF CONTEXT: +${productBrief} + +TASK: +- For each goal in the product brief, generate 3-7 functional requirements +- Each requirement must have: + - Unique ID: REQ-NNN (zero-padded) + - Clear title + - Detailed description + - User story: As a [persona], I want [action] so that [benefit] + - 2-4 specific, testable acceptance criteria +- Generate non-functional requirements: + - Performance (response times, throughput) + - Security (authentication, authorization, data protection) + - Scalability (user load, data volume) + - Usability (accessibility, learnability) +- Assign initial MoSCoW priority based on: + - Must: Core functionality, cannot launch without + - Should: Important but has workaround + - Could: Nice-to-have, enhances experience + - Won't: Explicitly deferred +- Use RFC 2119 keywords (MUST, SHOULD, MAY, MUST NOT, SHOULD NOT) to define behavioral constraints for each requirement. Example: 'The system MUST return a 401 response within 100ms for invalid tokens.' +- For each core domain entity referenced in requirements, define its data model: fields, types, constraints, and relationships to other entities +- Maintain terminology consistency with the glossary below: + TERMINOLOGY GLOSSARY: + \${glossary ? JSON.stringify(glossary.terms, null, 2) : 'N/A - generate terms inline'} + +MODE: analysis +EXPECTED: Structured requirements with: ID, title, description, user story, acceptance criteria, priority, traceability to goals +CONSTRAINTS: Every requirement must be specific enough to estimate and test. No vague requirements like 'system should be fast'. +" --tool gemini --mode analysis`, + run_in_background: true +}); + +// Wait for CLI result +``` + +### Step 2.5: Codex Requirements Review + +After receiving Gemini expansion results, validate requirements quality via Codex CLI before proceeding: + +```javascript +Bash({ + command: `ccw cli -p "PURPOSE: Critical review of generated requirements - validate quality, testability, and scope alignment. +Success: Actionable feedback on requirement quality with specific issues identified. + +GENERATED REQUIREMENTS: +${geminiRequirementsOutput.slice(0, 5000)} + +PRODUCT BRIEF SCOPE: +${productBrief.slice(0, 2000)} + +TASK: +- Verify every acceptance criterion is specific, measurable, and testable (not vague like 'should be fast') +- Validate RFC 2119 keyword usage: MUST/SHOULD/MAY used correctly per RFC 2119 semantics +- Check scope containment: no requirement exceeds the product brief's defined scope boundaries +- Assess data model completeness: all referenced entities have field-level definitions +- Identify duplicate or overlapping requirements +- Rate overall requirements quality: 1-5 with justification + +MODE: analysis +EXPECTED: Requirements review with: per-requirement feedback, testability assessment, scope violations, data model gaps, quality rating +CONSTRAINTS: Be genuinely critical. Focus on requirements that would block implementation if left vague. +" --tool codex --mode analysis`, + run_in_background: true +}); + +// Wait for Codex review result +// Integrate feedback into requirements before writing files: +// - Fix vague acceptance criteria flagged by Codex +// - Correct RFC 2119 keyword misuse +// - Remove or flag requirements that exceed brief scope +// - Fill data model gaps identified by Codex +``` + +### Step 3: User Priority Sorting (Interactive) + +```javascript +if (!autoMode) { + // Present requirements grouped by initial priority + // Allow user to adjust MoSCoW labels + AskUserQuestion({ + questions: [ + { + question: "Review the Must-Have requirements. Any that should be reprioritized?", + header: "Must-Have", + multiSelect: false, + options: [ + { label: "All correct", description: "Must-have requirements are accurate" }, + { label: "Too many", description: "Some should be Should/Could" }, + { label: "Missing items", description: "Some Should requirements should be Must" } + ] + }, + { + question: "What is the target MVP scope?", + header: "MVP Scope", + multiSelect: false, + options: [ + { label: "Must-Have only (Recommended)", description: "MVP includes only Must requirements" }, + { label: "Must + key Should", description: "Include critical Should items in MVP" }, + { label: "Comprehensive", description: "Include all Must and Should" } + ] + } + ] + }); + // Apply user adjustments to priorities +} else { + // Auto mode: accept CLI-suggested priorities as-is +} +``` + +### Step 4: Generate requirements/ directory + +```javascript +// Read template +const template = Read('templates/requirements-prd.md'); + +// Create requirements directory +Bash(`mkdir -p "${workDir}/requirements"`); + +const status = autoMode ? 'complete' : 'draft'; +const timestamp = new Date().toISOString(); + +// Parse CLI output into structured requirements +const funcReqs = parseFunctionalRequirements(cliOutput); // [{id, slug, title, priority, ...}] +const nfReqs = parseNonFunctionalRequirements(cliOutput); // [{id, type, slug, title, ...}] + +// Step 4a: Write individual REQ-*.md files (one per functional requirement) +funcReqs.forEach(req => { + // Use REQ-NNN-{slug}.md template from templates/requirements-prd.md + // Fill: id, title, priority, description, user_story, acceptance_criteria, traces + Write(`${workDir}/requirements/REQ-${req.id}-${req.slug}.md`, reqContent); +}); + +// Step 4b: Write individual NFR-*.md files (one per non-functional requirement) +nfReqs.forEach(nfr => { + // Use NFR-{type}-NNN-{slug}.md template from templates/requirements-prd.md + // Fill: id, type, category, title, requirement, metric, target, traces + Write(`${workDir}/requirements/NFR-${nfr.type}-${nfr.id}-${nfr.slug}.md`, nfrContent); +}); + +// Step 4c: Write _index.md (summary + links to all individual files) +// Use _index.md template from templates/requirements-prd.md +// Fill: summary table, functional req links table, NFR links tables, +// data requirements, integration requirements, traceability matrix +Write(`${workDir}/requirements/_index.md`, indexContent); + +// Update spec-config.json +specConfig.phasesCompleted.push({ + phase: 3, + name: "requirements", + output_dir: "requirements/", + output_index: "requirements/_index.md", + file_count: funcReqs.length + nfReqs.length + 1, + completed_at: timestamp +}); +Write(`${workDir}/spec-config.json`, JSON.stringify(specConfig, null, 2)); +``` + +## Output + +- **Directory**: `requirements/` + - `_index.md` — Summary, MoSCoW table, traceability matrix, links + - `REQ-NNN-{slug}.md` — Individual functional requirement (per requirement) + - `NFR-{type}-NNN-{slug}.md` — Individual non-functional requirement (per NFR) +- **Format**: Markdown with YAML frontmatter, cross-linked via relative paths + +## Quality Checklist + +- [ ] Functional requirements: >= 3 with REQ-NNN IDs, each in own file +- [ ] Every requirement file has >= 1 acceptance criterion +- [ ] Every requirement has MoSCoW priority tag in frontmatter +- [ ] Non-functional requirements: >= 1, each in own file +- [ ] User stories present for Must-have requirements +- [ ] `_index.md` links to all individual requirement files +- [ ] Traceability links to product-brief.md goals +- [ ] All files have valid YAML frontmatter + +## Next Phase + +Proceed to [Phase 4: Architecture](04-architecture.md) with the generated requirements.md. + +--- + +## Agent Return Summary + +When executed as a delegated agent, return the following JSON summary to the orchestrator: + +```json +{ + "phase": 3, + "status": "complete", + "files_created": ["requirements/_index.md", "requirements/REQ-001-*.md", "..."], + "file_count": 0, + "codex_review_integrated": true, + "quality_notes": ["list of quality concerns or Codex feedback items addressed"], + "key_decisions": ["MoSCoW priority rationale", "scope adjustments from Codex review"] +} +``` + +The orchestrator will: +1. Validate that `requirements/` directory exists with `_index.md` and individual files +2. Read `spec-config.json` to confirm `phasesCompleted` was updated +3. Store the summary for downstream phase context diff --git a/.codex/skills/spec-generator/phases/04-architecture.md b/.codex/skills/spec-generator/phases/04-architecture.md new file mode 100644 index 00000000..60f2acd2 --- /dev/null +++ b/.codex/skills/spec-generator/phases/04-architecture.md @@ -0,0 +1,274 @@ +# Phase 4: Architecture + +> **Execution Mode: Agent Delegated** +> This phase is executed by a `doc-generator` agent. The orchestrator (SKILL.md) passes session context via the Task tool. The agent reads this file for instructions, executes all steps, writes output files, and returns a JSON summary. + +Generate technical architecture decisions, component design, and technology selections based on requirements. + +## Objective + +- Analyze requirements to identify core components and system architecture +- Generate Architecture Decision Records (ADRs) with alternatives +- Map architecture to existing codebase (if applicable) +- Challenge architecture via Codex CLI review +- Generate architecture.md using template + +## Input + +- Dependency: `{workDir}/requirements/_index.md` (and individual `REQ-*.md` files) +- Reference: `{workDir}/product-brief.md` +- Optional: `{workDir}/discovery-context.json` +- Config: `{workDir}/spec-config.json` +- Template: `templates/architecture-doc.md` + +## Execution Steps + +### Step 1: Load Phase 2-3 Context + +```javascript +const specConfig = JSON.parse(Read(`${workDir}/spec-config.json`)); +const productBrief = Read(`${workDir}/product-brief.md`); +const requirements = Read(`${workDir}/requirements.md`); + +let discoveryContext = null; +if (specConfig.has_codebase) { + try { + discoveryContext = JSON.parse(Read(`${workDir}/discovery-context.json`)); + } catch (e) { /* no context */ } +} + +// Load glossary for terminology consistency +let glossary = null; +try { + glossary = JSON.parse(Read(`${workDir}/glossary.json`)); +} catch (e) { /* proceed without */ } + +// Load spec type profile for specialized sections +const specType = specConfig.spec_type || 'service'; +let profile = null; +try { + profile = Read(`templates/profiles/${specType}-profile.md`); +} catch (e) { /* use base template only */ } +``` + +### Step 2: Architecture Analysis via Gemini CLI + +```javascript +Bash({ + command: `ccw cli -p "PURPOSE: Generate technical architecture for the specified requirements. +Success: Complete component architecture, tech stack, and ADRs with justified decisions. + +PRODUCT BRIEF (summary): +${productBrief.slice(0, 3000)} + +REQUIREMENTS: +${requirements.slice(0, 5000)} + +${discoveryContext ? `EXISTING CODEBASE: +- Tech stack: ${JSON.stringify(discoveryContext.tech_stack || {})} +- Existing patterns: ${discoveryContext.existing_patterns?.slice(0,5).join('; ') || 'none'} +- Architecture constraints: ${discoveryContext.architecture_constraints?.slice(0,3).join('; ') || 'none'} +` : ''} + +TASK: +- Define system architecture style (monolith, microservices, serverless, etc.) with justification +- Identify core components and their responsibilities +- Create component interaction diagram (Mermaid graph TD format) +- Specify technology stack: languages, frameworks, databases, infrastructure +- Generate 2-4 Architecture Decision Records (ADRs): + - Each ADR: context, decision, 2-3 alternatives with pros/cons, consequences + - Focus on: data storage, API design, authentication, key technical choices +- Define data model: key entities and relationships (Mermaid erDiagram format) +- Identify security architecture: auth, authorization, data protection +- List API endpoints (high-level) +${discoveryContext ? '- Map new components to existing codebase modules' : ''} +- For each core entity with a lifecycle, create an ASCII state machine diagram showing: + - All states and transitions + - Trigger events for each transition + - Side effects of transitions + - Error states and recovery paths +- Define a Configuration Model: list all configurable fields with name, type, default value, constraint, and description +- Define Error Handling strategy: + - Classify errors (transient/permanent/degraded) + - Per-component error behavior using RFC 2119 keywords + - Recovery mechanisms +- Define Observability requirements: + - Key metrics (name, type: counter/gauge/histogram, labels) + - Structured log format and key log events + - Health check endpoints +\${profile ? \` +SPEC TYPE PROFILE REQUIREMENTS (\${specType}): +\${profile} +\` : ''} +\${glossary ? \` +TERMINOLOGY GLOSSARY (use consistently): +\${JSON.stringify(glossary.terms, null, 2)} +\` : ''} + +MODE: analysis +EXPECTED: Complete architecture with: style justification, component diagram, tech stack table, ADRs, data model, security controls, API overview +CONSTRAINTS: Architecture must support all Must-have requirements. Prefer proven technologies over cutting-edge. +" --tool gemini --mode analysis`, + run_in_background: true +}); + +// Wait for CLI result +``` + +### Step 3: Architecture Review via Codex CLI + +```javascript +// After receiving Gemini analysis, challenge it with Codex +Bash({ + command: `ccw cli -p "PURPOSE: Critical review of proposed architecture - identify weaknesses and risks. +Success: Actionable feedback with specific concerns and improvement suggestions. + +PROPOSED ARCHITECTURE: +${geminiArchitectureOutput.slice(0, 5000)} + +REQUIREMENTS CONTEXT: +${requirements.slice(0, 2000)} + +TASK: +- Challenge each ADR: are the alternatives truly the best options? +- Identify scalability bottlenecks in the component design +- Assess security gaps: authentication, authorization, data protection +- Evaluate technology choices: maturity, community support, fit +- Check for over-engineering or under-engineering +- Verify architecture covers all Must-have requirements +- Rate overall architecture quality: 1-5 with justification + +MODE: analysis +EXPECTED: Architecture review with: per-ADR feedback, scalability concerns, security gaps, technology risks, quality rating +CONSTRAINTS: Be genuinely critical, not just validating. Focus on actionable improvements. +" --tool codex --mode analysis`, + run_in_background: true +}); + +// Wait for CLI result +``` + +### Step 4: Interactive ADR Decisions (Optional) + +```javascript +if (!autoMode) { + // Present ADRs with review feedback to user + // For each ADR where review raised concerns: + AskUserQuestion({ + questions: [ + { + question: "Architecture review raised concerns. How should we proceed?", + header: "ADR Review", + multiSelect: false, + options: [ + { label: "Accept as-is", description: "Architecture is sound, proceed" }, + { label: "Incorporate feedback", description: "Adjust ADRs based on review" }, + { label: "Simplify", description: "Reduce complexity, fewer components" } + ] + } + ] + }); + // Apply user decisions to architecture +} +``` + +### Step 5: Codebase Integration Mapping (Conditional) + +```javascript +if (specConfig.has_codebase && discoveryContext) { + // Map new architecture components to existing code + const integrationMapping = discoveryContext.relevant_files.map(f => ({ + new_component: "...", // matched from architecture + existing_module: f.path, + integration_type: "Extend|Replace|New", + notes: f.rationale + })); + // Include in architecture document +} +``` + +### Step 6: Generate architecture/ directory + +```javascript +const template = Read('templates/architecture-doc.md'); + +// Create architecture directory +Bash(`mkdir -p "${workDir}/architecture"`); + +const status = autoMode ? 'complete' : 'draft'; +const timestamp = new Date().toISOString(); + +// Parse CLI outputs into structured ADRs +const adrs = parseADRs(geminiArchitectureOutput, codexReviewOutput); // [{id, slug, title, ...}] + +// Step 6a: Write individual ADR-*.md files (one per decision) +adrs.forEach(adr => { + // Use ADR-NNN-{slug}.md template from templates/architecture-doc.md + // Fill: id, title, status, context, decision, alternatives, consequences, traces + Write(`${workDir}/architecture/ADR-${adr.id}-${adr.slug}.md`, adrContent); +}); + +// Step 6b: Write _index.md (overview + components + tech stack + links to ADRs) +// Use _index.md template from templates/architecture-doc.md +// Fill: system overview, component diagram, tech stack, ADR links table, +// data model, API design, security controls, infrastructure, codebase integration +Write(`${workDir}/architecture/_index.md`, indexContent); + +// Update spec-config.json +specConfig.phasesCompleted.push({ + phase: 4, + name: "architecture", + output_dir: "architecture/", + output_index: "architecture/_index.md", + file_count: adrs.length + 1, + completed_at: timestamp +}); +Write(`${workDir}/spec-config.json`, JSON.stringify(specConfig, null, 2)); +``` + +## Output + +- **Directory**: `architecture/` + - `_index.md` — Overview, component diagram, tech stack, data model, security, links + - `ADR-NNN-{slug}.md` — Individual Architecture Decision Record (per ADR) +- **Format**: Markdown with YAML frontmatter, cross-linked to requirements via relative paths + +## Quality Checklist + +- [ ] Component diagram present in `_index.md` (Mermaid or ASCII) +- [ ] Tech stack specified (languages, frameworks, key libraries) +- [ ] >= 1 ADR file with alternatives considered +- [ ] Each ADR file lists >= 2 options +- [ ] `_index.md` ADR table links to all individual ADR files +- [ ] Integration points identified +- [ ] Data model described +- [ ] Codebase mapping present (if has_codebase) +- [ ] All files have valid YAML frontmatter +- [ ] ADR files link back to requirement files + +## Next Phase + +Proceed to [Phase 5: Epics & Stories](05-epics-stories.md) with the generated architecture.md. + +--- + +## Agent Return Summary + +When executed as a delegated agent, return the following JSON summary to the orchestrator: + +```json +{ + "phase": 4, + "status": "complete", + "files_created": ["architecture/_index.md", "architecture/ADR-001-*.md", "..."], + "file_count": 0, + "codex_review_rating": 0, + "quality_notes": ["list of quality concerns or review feedback addressed"], + "key_decisions": ["architecture style choice", "key ADR decisions"] +} +``` + +The orchestrator will: +1. Validate that `architecture/` directory exists with `_index.md` and ADR files +2. Read `spec-config.json` to confirm `phasesCompleted` was updated +3. Store the summary for downstream phase context diff --git a/.codex/skills/spec-generator/phases/05-epics-stories.md b/.codex/skills/spec-generator/phases/05-epics-stories.md new file mode 100644 index 00000000..fb9c8d45 --- /dev/null +++ b/.codex/skills/spec-generator/phases/05-epics-stories.md @@ -0,0 +1,241 @@ +# Phase 5: Epics & Stories + +> **Execution Mode: Agent Delegated** +> This phase is executed by a `doc-generator` agent. The orchestrator (SKILL.md) passes session context via the Task tool. The agent reads this file for instructions, executes all steps, writes output files, and returns a JSON summary. + +Decompose the specification into executable Epics and Stories with dependency mapping. + +## Objective + +- Group requirements into 3-7 logical Epics +- Tag MVP subset of Epics +- Generate 2-5 Stories per Epic in standard user story format +- Map cross-Epic dependencies (Mermaid diagram) +- Generate epics.md using template + +## Input + +- Dependency: `{workDir}/requirements/_index.md`, `{workDir}/architecture/_index.md` (and individual files) +- Reference: `{workDir}/product-brief.md` +- Config: `{workDir}/spec-config.json` +- Template: `templates/epics-template.md` (directory structure: `_index.md` + `EPIC-*.md`) + +## Execution Steps + +### Step 1: Load Phase 2-4 Context + +```javascript +const specConfig = JSON.parse(Read(`${workDir}/spec-config.json`)); +const productBrief = Read(`${workDir}/product-brief.md`); +const requirements = Read(`${workDir}/requirements.md`); +const architecture = Read(`${workDir}/architecture.md`); + +let glossary = null; +try { + glossary = JSON.parse(Read(`${workDir}/glossary.json`)); +} catch (e) { /* proceed without */ } +``` + +### Step 2: Epic Decomposition via Gemini CLI + +```javascript +Bash({ + command: `ccw cli -p "PURPOSE: Decompose requirements into executable Epics and Stories for implementation planning. +Success: 3-7 Epics with prioritized Stories, dependency map, and MVP subset clearly defined. + +PRODUCT BRIEF (summary): +${productBrief.slice(0, 2000)} + +REQUIREMENTS: +${requirements.slice(0, 5000)} + +ARCHITECTURE (summary): +${architecture.slice(0, 3000)} + +TASK: +- Group requirements into 3-7 logical Epics: + - Each Epic: EPIC-NNN ID, title, description, priority (Must/Should/Could) + - Group by functional domain or user journey stage + - Tag MVP Epics (minimum set for initial release) + +- For each Epic, generate 2-5 Stories: + - Each Story: STORY-{EPIC}-NNN ID, title + - User story format: As a [persona], I want [action] so that [benefit] + - 2-4 acceptance criteria per story (testable) + - Relative size estimate: S/M/L/XL + - Trace to source requirement(s): REQ-NNN + +- Create dependency map: + - Cross-Epic dependencies (which Epics block others) + - Mermaid graph LR format + - Recommended execution order with rationale + +- Define MVP: + - Which Epics are in MVP + - MVP definition of done (3-5 criteria) + - What is explicitly deferred post-MVP + +MODE: analysis +EXPECTED: Structured output with: Epic list (ID, title, priority, MVP flag), Stories per Epic (ID, user story, AC, size, trace), dependency Mermaid diagram, execution order, MVP definition +CONSTRAINTS: +- Every Must-have requirement must appear in at least one Story +- Stories must be small enough to implement independently (no XL stories in MVP) +- Dependencies should be minimized across Epics +\${glossary ? \`- Maintain terminology consistency with glossary: \${glossary.terms.map(t => t.term).join(', ')}\` : ''} +" --tool gemini --mode analysis`, + run_in_background: true +}); + +// Wait for CLI result +``` + +### Step 2.5: Codex Epics Review + +After receiving Gemini decomposition results, validate epic/story quality via Codex CLI: + +```javascript +Bash({ + command: `ccw cli -p "PURPOSE: Critical review of epic/story decomposition - validate coverage, sizing, and dependency structure. +Success: Actionable feedback on epic quality with specific issues identified. + +GENERATED EPICS AND STORIES: +${geminiEpicsOutput.slice(0, 5000)} + +REQUIREMENTS (Must-Have): +${mustHaveRequirements.slice(0, 2000)} + +TASK: +- Verify Must-Have requirement coverage: every Must requirement appears in at least one Story +- Check MVP story sizing: no XL stories in MVP epics (too large to implement independently) +- Validate dependency graph: no circular dependencies between Epics +- Assess acceptance criteria: every Story AC is specific and testable +- Verify traceability: Stories trace back to specific REQ-NNN IDs +- Check Epic granularity: 3-7 epics (not too few/many), 2-5 stories each +- Rate overall decomposition quality: 1-5 with justification + +MODE: analysis +EXPECTED: Epic review with: coverage gaps, oversized stories, dependency issues, traceability gaps, quality rating +CONSTRAINTS: Focus on issues that would block execution planning. Be specific about which Story/Epic has problems. +" --tool codex --mode analysis`, + run_in_background: true +}); + +// Wait for Codex review result +// Integrate feedback into epics before writing files: +// - Add missing Stories for uncovered Must requirements +// - Split XL stories in MVP epics into smaller units +// - Fix dependency cycles identified by Codex +// - Improve vague acceptance criteria +``` + +### Step 3: Interactive Validation (Optional) + +```javascript +if (!autoMode) { + // Present Epic overview table and dependency diagram + AskUserQuestion({ + questions: [ + { + question: "Review the Epic breakdown. Any adjustments needed?", + header: "Epics", + multiSelect: false, + options: [ + { label: "Looks good", description: "Epic structure is appropriate" }, + { label: "Merge epics", description: "Some epics should be combined" }, + { label: "Split epic", description: "An epic is too large, needs splitting" }, + { label: "Adjust MVP", description: "Change which epics are in MVP" } + ] + } + ] + }); + // Apply user adjustments +} +``` + +### Step 4: Generate epics/ directory + +```javascript +const template = Read('templates/epics-template.md'); + +// Create epics directory +Bash(`mkdir -p "${workDir}/epics"`); + +const status = autoMode ? 'complete' : 'draft'; +const timestamp = new Date().toISOString(); + +// Parse CLI output into structured Epics +const epicsList = parseEpics(cliOutput); // [{id, slug, title, priority, mvp, size, stories[], reqs[], adrs[], deps[]}] + +// Step 4a: Write individual EPIC-*.md files (one per Epic, stories included) +epicsList.forEach(epic => { + // Use EPIC-NNN-{slug}.md template from templates/epics-template.md + // Fill: id, title, priority, mvp, size, description, requirements links, + // architecture links, dependency links, stories with user stories + AC + Write(`${workDir}/epics/EPIC-${epic.id}-${epic.slug}.md`, epicContent); +}); + +// Step 4b: Write _index.md (overview + dependency map + MVP scope + traceability) +// Use _index.md template from templates/epics-template.md +// Fill: epic overview table (with links), dependency Mermaid diagram, +// execution order, MVP scope, traceability matrix, estimation summary +Write(`${workDir}/epics/_index.md`, indexContent); + +// Update spec-config.json +specConfig.phasesCompleted.push({ + phase: 5, + name: "epics-stories", + output_dir: "epics/", + output_index: "epics/_index.md", + file_count: epicsList.length + 1, + completed_at: timestamp +}); +Write(`${workDir}/spec-config.json`, JSON.stringify(specConfig, null, 2)); +``` + +## Output + +- **Directory**: `epics/` + - `_index.md` — Overview table, dependency map, MVP scope, traceability matrix, links + - `EPIC-NNN-{slug}.md` — Individual Epic with Stories (per Epic) +- **Format**: Markdown with YAML frontmatter, cross-linked to requirements and architecture via relative paths + +## Quality Checklist + +- [ ] 3-7 Epic files with EPIC-NNN IDs +- [ ] >= 1 Epic tagged as MVP in frontmatter +- [ ] 2-5 Stories per Epic file +- [ ] Stories use "As a...I want...So that..." format +- [ ] `_index.md` has cross-Epic dependency map (Mermaid) +- [ ] `_index.md` links to all individual Epic files +- [ ] Relative sizing (S/M/L/XL) per Story +- [ ] Epic files link to requirement files and ADR files +- [ ] All files have valid YAML frontmatter + +## Next Phase + +Proceed to [Phase 6: Readiness Check](06-readiness-check.md) to validate the complete specification package. + +--- + +## Agent Return Summary + +When executed as a delegated agent, return the following JSON summary to the orchestrator: + +```json +{ + "phase": 5, + "status": "complete", + "files_created": ["epics/_index.md", "epics/EPIC-001-*.md", "..."], + "file_count": 0, + "codex_review_integrated": true, + "mvp_epic_count": 0, + "total_story_count": 0, + "quality_notes": ["list of quality concerns or Codex feedback items addressed"], + "key_decisions": ["MVP scope decisions", "dependency resolution choices"] +} +``` + +The orchestrator will: +1. Validate that `epics/` directory exists with `_index.md` and EPIC files +2. Read `spec-config.json` to confirm `phasesCompleted` was updated +3. Store the summary for downstream phase context diff --git a/.codex/skills/spec-generator/phases/06-5-auto-fix.md b/.codex/skills/spec-generator/phases/06-5-auto-fix.md new file mode 100644 index 00000000..37508fcc --- /dev/null +++ b/.codex/skills/spec-generator/phases/06-5-auto-fix.md @@ -0,0 +1,172 @@ +# Phase 6.5: Auto-Fix + +> **Execution Mode: Agent Delegated** +> This phase is executed by a `doc-generator` agent when triggered by the orchestrator after Phase 6 identifies issues. The agent reads this file for instructions, applies fixes to affected documents, and returns a JSON summary. + +Automatically repair specification issues identified in Phase 6 Readiness Check. + +## Objective + +- Parse readiness-report.md to extract Error and Warning items +- Group issues by originating Phase (2-5) +- Re-generate affected sections with error context injected into CLI prompts +- Re-run Phase 6 validation after fixes + +## Input + +- Dependency: `{workDir}/readiness-report.md` (Phase 6 output) +- Config: `{workDir}/spec-config.json` (with iteration_count) +- All Phase 2-5 outputs + +## Execution Steps + +### Step 1: Parse Readiness Report + +```javascript +const readinessReport = Read(`${workDir}/readiness-report.md`); +const specConfig = JSON.parse(Read(`${workDir}/spec-config.json`)); + +// Load glossary for terminology consistency during fixes +let glossary = null; +try { + glossary = JSON.parse(Read(`${workDir}/glossary.json`)); +} catch (e) { /* proceed without */ } + +// Extract issues from readiness report +// Parse Error and Warning severity items +// Group by originating phase: +// Phase 2 issues: vision, problem statement, scope, personas +// Phase 3 issues: requirements, acceptance criteria, priority, traceability +// Phase 4 issues: architecture, ADRs, tech stack, data model, state machine +// Phase 5 issues: epics, stories, dependencies, MVP scope + +const issuesByPhase = { + 2: [], // product brief issues + 3: [], // requirements issues + 4: [], // architecture issues + 5: [] // epics issues +}; + +// Parse structured issues from report +// Each issue: { severity: "Error"|"Warning", description: "...", location: "file:section" } + +// Map phase numbers to output files +const phaseOutputFile = { + 2: 'product-brief.md', + 3: 'requirements/_index.md', + 4: 'architecture/_index.md', + 5: 'epics/_index.md' +}; +``` + +### Step 2: Fix Affected Phases (Sequential) + +For each phase with issues (in order 2 -> 3 -> 4 -> 5): + +```javascript +for (const [phase, issues] of Object.entries(issuesByPhase)) { + if (issues.length === 0) continue; + + const errorContext = issues.map(i => `[${i.severity}] ${i.description} (at ${i.location})`).join('\n'); + + // Read current phase output + const currentOutput = Read(`${workDir}/${phaseOutputFile[phase]}`); + + Bash({ + command: `ccw cli -p "PURPOSE: Fix specification issues identified in readiness check for Phase ${phase}. +Success: All listed issues resolved while maintaining consistency with other documents. + +CURRENT DOCUMENT: +${currentOutput.slice(0, 5000)} + +ISSUES TO FIX: +${errorContext} + +${glossary ? `GLOSSARY (maintain consistency): +${JSON.stringify(glossary.terms, null, 2)}` : ''} + +TASK: +- Address each listed issue specifically +- Maintain all existing content that is not flagged +- Ensure terminology consistency with glossary +- Preserve YAML frontmatter and cross-references +- Use RFC 2119 keywords for behavioral requirements +- Increment document version number + +MODE: analysis +EXPECTED: Corrected document content addressing all listed issues +CONSTRAINTS: Minimal changes - only fix flagged issues, do not restructure unflagged sections +" --tool gemini --mode analysis`, + run_in_background: true + }); + + // Wait for result, apply fixes to document + // Update document version in frontmatter +} +``` + +### Step 3: Update State + +```javascript +specConfig.phasesCompleted.push({ + phase: 6.5, + name: "auto-fix", + iteration: specConfig.iteration_count, + phases_fixed: Object.keys(issuesByPhase).filter(p => issuesByPhase[p].length > 0), + completed_at: new Date().toISOString() +}); +Write(`${workDir}/spec-config.json`, JSON.stringify(specConfig, null, 2)); +``` + +### Step 4: Re-run Phase 6 Validation + +```javascript +// Re-execute Phase 6: Readiness Check +// This creates a new readiness-report.md +// If still Fail and iteration_count < 2: loop back to Step 1 +// If Pass or iteration_count >= 2: proceed to handoff +``` + +## Output + +- **Updated**: Phase 2-5 documents (only affected ones) +- **Updated**: `spec-config.json` (iteration tracking) +- **Triggers**: Phase 6 re-validation + +## Quality Checklist + +- [ ] All Error-severity issues addressed +- [ ] Warning-severity issues attempted (best effort) +- [ ] Document versions incremented for modified files +- [ ] Terminology consistency maintained +- [ ] Cross-references still valid after fixes +- [ ] Iteration count not exceeded (max 2) + +## Next Phase + +Re-run [Phase 6: Readiness Check](06-readiness-check.md) to validate fixes. + +--- + +## Agent Return Summary + +When executed as a delegated agent, return the following JSON summary to the orchestrator: + +```json +{ + "phase": 6.5, + "status": "complete", + "files_modified": ["list of files that were updated"], + "issues_fixed": { + "errors": 0, + "warnings": 0 + }, + "quality_notes": ["list of fix decisions and remaining concerns"], + "phases_touched": [2, 3, 4, 5] +} +``` + +The orchestrator will: +1. Validate that listed files were actually modified (check version increment) +2. Update `spec-config.json` iteration tracking +3. Re-trigger Phase 6 validation diff --git a/.codex/skills/spec-generator/phases/06-readiness-check.md b/.codex/skills/spec-generator/phases/06-readiness-check.md new file mode 100644 index 00000000..ffa59e42 --- /dev/null +++ b/.codex/skills/spec-generator/phases/06-readiness-check.md @@ -0,0 +1,581 @@ +# Phase 6: Readiness Check + +Validate the complete specification package, generate quality report and executive summary, provide execution handoff options. + +## Objective + +- Cross-document validation: completeness, consistency, traceability, depth +- Generate quality scores per dimension +- Produce readiness-report.md with issue list and traceability matrix +- Produce spec-summary.md as one-page executive summary +- Update all document frontmatter to `status: complete` +- Present handoff options to execution workflows + +## Input + +- All Phase 2-5 outputs: `product-brief.md`, `requirements/_index.md` (+ `REQ-*.md`, `NFR-*.md`), `architecture/_index.md` (+ `ADR-*.md`), `epics/_index.md` (+ `EPIC-*.md`) +- Config: `{workDir}/spec-config.json` +- Reference: `specs/quality-gates.md` + +## Execution Steps + +### Step 1: Load All Documents + +```javascript +const specConfig = JSON.parse(Read(`${workDir}/spec-config.json`)); +const productBrief = Read(`${workDir}/product-brief.md`); +const requirementsIndex = Read(`${workDir}/requirements/_index.md`); +const architectureIndex = Read(`${workDir}/architecture/_index.md`); +const epicsIndex = Read(`${workDir}/epics/_index.md`); +const qualityGates = Read('specs/quality-gates.md'); + +// Load individual files for deep validation +const reqFiles = Glob(`${workDir}/requirements/REQ-*.md`); +const nfrFiles = Glob(`${workDir}/requirements/NFR-*.md`); +const adrFiles = Glob(`${workDir}/architecture/ADR-*.md`); +const epicFiles = Glob(`${workDir}/epics/EPIC-*.md`); +``` + +### Step 2: Cross-Document Validation via Gemini CLI + +```javascript +Bash({ + command: `ccw cli -p "PURPOSE: Validate specification package for completeness, consistency, traceability, and depth. +Success: Comprehensive quality report with scores, issues, and traceability matrix. + +DOCUMENTS TO VALIDATE: + +=== PRODUCT BRIEF === +${productBrief.slice(0, 3000)} + +=== REQUIREMENTS INDEX (${reqFiles.length} REQ + ${nfrFiles.length} NFR files) === +${requirementsIndex.slice(0, 3000)} + +=== ARCHITECTURE INDEX (${adrFiles.length} ADR files) === +${architectureIndex.slice(0, 2500)} + +=== EPICS INDEX (${epicFiles.length} EPIC files) === +${epicsIndex.slice(0, 2500)} + +QUALITY CRITERIA (from quality-gates.md): +${qualityGates.slice(0, 2000)} + +TASK: +Perform 4-dimension validation: + +1. COMPLETENESS (25%): + - All required sections present in each document? + - All template fields filled with substantive content? + - Score 0-100 with specific gaps listed + +2. CONSISTENCY (25%): + - Terminology uniform across documents? + - Terminology glossary compliance: all core terms used consistently per glossary.json definitions? + - No synonym drift (e.g., "user" vs "client" vs "consumer" for same concept)? + - User personas consistent? + - Scope consistent (PRD does not exceed brief)? + - Scope containment: PRD requirements do not exceed product brief's defined scope? + - Non-Goals respected: no requirement or story contradicts explicit Non-Goals? + - Tech stack references match between architecture and epics? + - Score 0-100 with inconsistencies listed + +3. TRACEABILITY (25%): + - Every goal has >= 1 requirement? + - Every Must requirement has architecture coverage? + - Every Must requirement appears in >= 1 story? + - ADR choices reflected in epics? + - Build traceability matrix: Goal -> Requirement -> Architecture -> Epic/Story + - Score 0-100 with orphan items listed + +4. DEPTH (25%): + - Acceptance criteria specific and testable? + - Architecture decisions justified with alternatives? + - Stories estimable by dev team? + - Score 0-100 with vague areas listed + +ALSO: +- List all issues found, classified as Error/Warning/Info +- Generate overall weighted score +- Determine gate: Pass (>=80) / Review (60-79) / Fail (<60) + +MODE: analysis +EXPECTED: JSON-compatible output with: dimension scores, overall score, gate, issues list (severity + description + location), traceability matrix +CONSTRAINTS: Be thorough but fair. Focus on actionable issues. +" --tool gemini --mode analysis`, + run_in_background: true +}); + +// Wait for CLI result +``` + +### Step 2b: Codex Technical Depth Review + +Launch Codex review in parallel with Gemini validation for deeper technical assessment: + +```javascript +Bash({ + command: `ccw cli -p "PURPOSE: Deep technical quality review of specification package - assess architectural rigor and implementation readiness. +Success: Technical quality assessment with specific actionable feedback on ADR quality, data model, security, and observability. + +ARCHITECTURE INDEX: +${architectureIndex.slice(0, 3000)} + +ADR FILES (summaries): +${adrFiles.map(f => Read(f).slice(0, 500)).join('\n---\n')} + +REQUIREMENTS INDEX: +${requirementsIndex.slice(0, 2000)} + +TASK: +- ADR Alternative Quality: Each ADR has >= 2 genuine alternatives with substantive pros/cons (not strawman options) +- Data Model Completeness: All entities referenced in requirements have field-level definitions with types and constraints +- Security Coverage: Authentication, authorization, data protection, and input validation addressed for all external interfaces +- Observability Specification: Metrics, logging, and health checks defined for service/platform types +- Error Handling: Error classification and recovery strategies defined per component +- Configuration Model: All configurable parameters documented with types, defaults, and constraints +- Rate each dimension 1-5 with specific gaps identified + +MODE: analysis +EXPECTED: Technical depth review with: per-dimension scores (1-5), specific gaps, improvement recommendations, overall technical readiness assessment +CONSTRAINTS: Focus on gaps that would cause implementation ambiguity. Ignore cosmetic issues. +" --tool codex --mode analysis`, + run_in_background: true +}); + +// Codex result merged with Gemini result in Step 3 +``` + +### Step 2c: Per-Requirement Verification + +Iterate through all individual requirement files for fine-grained verification: + +```javascript +// Load all requirement files +const reqFiles = Glob(`${workDir}/requirements/REQ-*.md`); +const nfrFiles = Glob(`${workDir}/requirements/NFR-*.md`); +const allReqFiles = [...reqFiles, ...nfrFiles]; + +// Load reference documents for cross-checking +const productBrief = Read(`${workDir}/product-brief.md`); +const epicFiles = Glob(`${workDir}/epics/EPIC-*.md`); +const adrFiles = Glob(`${workDir}/architecture/ADR-*.md`); + +// Read all epic content for coverage check +const epicContents = epicFiles.map(f => ({ path: f, content: Read(f) })); +const adrContents = adrFiles.map(f => ({ path: f, content: Read(f) })); + +// Per-requirement verification +const verificationResults = allReqFiles.map(reqFile => { + const content = Read(reqFile); + const reqId = extractReqId(content); // e.g., REQ-001 or NFR-PERF-001 + const priority = extractPriority(content); // Must/Should/Could/Won't + + // Check 1: AC exists and is testable + const hasAC = content.includes('- [ ]') || content.includes('Acceptance Criteria'); + const acTestable = !content.match(/should be (fast|good|reliable|secure)/i); // No vague AC + + // Check 2: Traces back to Brief goal + const tracesLinks = content.match(/product-brief\.md/); + + // Check 3: Must requirements have Story coverage (search EPIC files) + let storyCoverage = priority !== 'Must' ? 'N/A' : + epicContents.some(e => e.content.includes(reqId)) ? 'Covered' : 'MISSING'; + + // Check 4: Must requirements have architecture coverage (search ADR files) + let archCoverage = priority !== 'Must' ? 'N/A' : + adrContents.some(a => a.content.includes(reqId)) || + Read(`${workDir}/architecture/_index.md`).includes(reqId) ? 'Covered' : 'MISSING'; + + return { + req_id: reqId, + priority, + ac_exists: hasAC ? 'Yes' : 'MISSING', + ac_testable: acTestable ? 'Yes' : 'VAGUE', + brief_trace: tracesLinks ? 'Yes' : 'MISSING', + story_coverage: storyCoverage, + arch_coverage: archCoverage, + pass: hasAC && acTestable && tracesLinks && + (priority !== 'Must' || (storyCoverage === 'Covered' && archCoverage === 'Covered')) + }; +}); + +// Generate Per-Requirement Verification table for readiness-report.md +const verificationTable = ` +## Per-Requirement Verification + +| Req ID | Priority | AC Exists | AC Testable | Brief Trace | Story Coverage | Arch Coverage | Status | +|--------|----------|-----------|-------------|-------------|----------------|---------------|--------| +${verificationResults.map(r => + `| ${r.req_id} | ${r.priority} | ${r.ac_exists} | ${r.ac_testable} | ${r.brief_trace} | ${r.story_coverage} | ${r.arch_coverage} | ${r.pass ? 'PASS' : 'FAIL'} |` +).join('\n')} + +**Summary**: ${verificationResults.filter(r => r.pass).length}/${verificationResults.length} requirements pass all checks. +`; +``` + +### Step 3: Generate readiness-report.md + +```javascript +const frontmatterReport = `--- +session_id: ${specConfig.session_id} +phase: 6 +document_type: readiness-report +status: complete +generated_at: ${new Date().toISOString()} +stepsCompleted: ["load-all", "cross-validation", "codex-technical-review", "per-req-verification", "scoring", "report-generation"] +version: 1 +dependencies: + - product-brief.md + - requirements/_index.md + - architecture/_index.md + - epics/_index.md +---`; + +// Report content from CLI validation output: +// - Quality Score Summary (4 dimensions + overall) +// - Gate Decision (Pass/Review/Fail) +// - Issue List (grouped by severity: Error, Warning, Info) +// - Traceability Matrix (Goal -> Req -> Arch -> Epic/Story) +// - Codex Technical Depth Review (per-dimension scores from Step 2b) +// - Per-Requirement Verification Table (from Step 2c) +// - Recommendations for improvement + +Write(`${workDir}/readiness-report.md`, `${frontmatterReport}\n\n${reportContent}`); +``` + +### Step 4: Generate spec-summary.md + +```javascript +const frontmatterSummary = `--- +session_id: ${specConfig.session_id} +phase: 6 +document_type: spec-summary +status: complete +generated_at: ${new Date().toISOString()} +stepsCompleted: ["synthesis"] +version: 1 +dependencies: + - product-brief.md + - requirements/_index.md + - architecture/_index.md + - epics/_index.md + - readiness-report.md +---`; + +// One-page executive summary: +// - Product Name & Vision (from product-brief.md) +// - Problem & Target Users (from product-brief.md) +// - Key Requirements count (Must/Should/Could from requirements.md) +// - Architecture Style & Tech Stack (from architecture.md) +// - Epic Overview (count, MVP scope from epics.md) +// - Quality Score (from readiness-report.md) +// - Recommended Next Step +// - File manifest with links + +Write(`${workDir}/spec-summary.md`, `${frontmatterSummary}\n\n${summaryContent}`); +``` + +### Step 5: Update All Document Status + +```javascript +// Update frontmatter status to 'complete' in all documents (directories + single files) +// product-brief.md is a single file +const singleFiles = ['product-brief.md']; +singleFiles.forEach(doc => { + const content = Read(`${workDir}/${doc}`); + Write(`${workDir}/${doc}`, content.replace(/status: draft/, 'status: complete')); +}); + +// Update all files in directories (index + individual files) +const dirFiles = [ + ...Glob(`${workDir}/requirements/*.md`), + ...Glob(`${workDir}/architecture/*.md`), + ...Glob(`${workDir}/epics/*.md`) +]; +dirFiles.forEach(filePath => { + const content = Read(filePath); + if (content.includes('status: draft')) { + Write(filePath, content.replace(/status: draft/, 'status: complete')); + } +}); + +// Update spec-config.json +specConfig.phasesCompleted.push({ + phase: 6, + name: "readiness-check", + output_file: "readiness-report.md", + completed_at: new Date().toISOString() +}); +Write(`${workDir}/spec-config.json`, JSON.stringify(specConfig, null, 2)); +``` + +### Step 6: Handoff Options + +```javascript +AskUserQuestion({ + questions: [ + { + question: "Specification package is complete. What would you like to do next?", + header: "Next Step", + multiSelect: false, + options: [ + { + label: "Execute via lite-plan", + description: "Start implementing with /workflow-lite-plan, one Epic at a time" + }, + { + label: "Create roadmap", + description: "Generate execution roadmap with /workflow:req-plan-with-file" + }, + { + label: "Full planning", + description: "Detailed planning with /workflow-plan for the full scope" + }, + { + label: "Export Issues (Phase 7)", + description: "Create issues per Epic with spec links and wave assignment" + }, + { + label: "Iterate & improve", + description: "Re-run failed phases based on readiness report issues (max 2 iterations)" + } + ] + } + ] +}); + +// Based on user selection, execute the corresponding handoff: + +if (selection === "Execute via lite-plan") { + // lite-plan accepts a text description directly + // Read first MVP Epic from individual EPIC-*.md files + const epicFiles = Glob(`${workDir}/epics/EPIC-*.md`); + const firstMvpFile = epicFiles.find(f => { + const content = Read(f); + return content.includes('mvp: true'); + }); + const epicContent = Read(firstMvpFile); + const title = extractTitle(epicContent); // First # heading + const description = extractSection(epicContent, "Description"); + Skill(skill="workflow-lite-plan", args=`"${title}: ${description}"`) +} + +if (selection === "Full planning" || selection === "Create roadmap") { + // === Bridge: Build brainstorm_artifacts compatible structure === + // Reads from directory-based outputs (individual files), maps to .brainstorming/ format + // for context-search-agent auto-discovery → action-planning-agent consumption. + + // Step A: Read spec documents from directories + const specSummary = Read(`${workDir}/spec-summary.md`); + const productBrief = Read(`${workDir}/product-brief.md`); + const requirementsIndex = Read(`${workDir}/requirements/_index.md`); + const architectureIndex = Read(`${workDir}/architecture/_index.md`); + const epicsIndex = Read(`${workDir}/epics/_index.md`); + + // Read individual EPIC files (already split — direct mapping to feature-specs) + const epicFiles = Glob(`${workDir}/epics/EPIC-*.md`); + + // Step B: Build structured description from spec-summary + const structuredDesc = `GOAL: ${extractGoal(specSummary)} +SCOPE: ${extractScope(specSummary)} +CONTEXT: Generated from spec session ${specConfig.session_id}. Source: ${workDir}/`; + + // Step C: Create WFS session (provides session directory + .brainstorming/) + Skill(skill="workflow:session:start", args=`--auto "${structuredDesc}"`) + // → Produces sessionId (WFS-xxx) and session directory at .workflow/active/{sessionId}/ + + // Step D: Create .brainstorming/ bridge files + const brainstormDir = `.workflow/active/${sessionId}/.brainstorming`; + Bash(`mkdir -p "${brainstormDir}/feature-specs"`); + + // D.1: guidance-specification.md (highest priority — action-planning-agent reads first) + // Synthesized from spec-summary + product-brief + architecture/requirements indexes + Write(`${brainstormDir}/guidance-specification.md`, ` +# ${specConfig.seed_analysis.problem_statement} - Confirmed Guidance Specification + +**Source**: spec-generator session ${specConfig.session_id} +**Generated**: ${new Date().toISOString()} +**Spec Directory**: ${workDir} + +## 1. Project Positioning & Goals +${extractSection(productBrief, "Vision")} +${extractSection(productBrief, "Goals")} + +## 2. Requirements Summary +${extractSection(requirementsIndex, "Functional Requirements")} + +## 3. Architecture Decisions +${extractSection(architectureIndex, "Architecture Decision Records")} +${extractSection(architectureIndex, "Technology Stack")} + +## 4. Implementation Scope +${extractSection(epicsIndex, "Epic Overview")} +${extractSection(epicsIndex, "MVP Scope")} + +## Feature Decomposition +${extractSection(epicsIndex, "Traceability Matrix")} + +## Appendix: Source Documents +| Document | Path | Description | +|----------|------|-------------| +| Product Brief | ${workDir}/product-brief.md | Vision, goals, scope | +| Requirements | ${workDir}/requirements/ | _index.md + REQ-*.md + NFR-*.md | +| Architecture | ${workDir}/architecture/ | _index.md + ADR-*.md | +| Epics | ${workDir}/epics/ | _index.md + EPIC-*.md | +| Readiness Report | ${workDir}/readiness-report.md | Quality validation | +`); + + // D.2: feature-index.json (each EPIC file mapped to a Feature) + // Path: feature-specs/feature-index.json (matches context-search-agent discovery) + // Directly read from individual EPIC-*.md files (no monolithic parsing needed) + const features = epicFiles.map(epicFile => { + const content = Read(epicFile); + const fm = parseFrontmatter(content); // Extract YAML frontmatter + const basename = path.basename(epicFile, '.md'); // EPIC-001-slug + const epicNum = fm.id.replace('EPIC-', ''); // 001 + const slug = basename.replace(/^EPIC-\d+-/, ''); // slug + return { + id: `F-${epicNum}`, + slug: slug, + name: extractTitle(content), + description: extractSection(content, "Description"), + priority: fm.mvp ? "High" : "Medium", + spec_path: `${brainstormDir}/feature-specs/F-${epicNum}-${slug}.md`, + source_epic: fm.id, + source_file: epicFile + }; + }); + Write(`${brainstormDir}/feature-specs/feature-index.json`, JSON.stringify({ + version: "1.0", + source: "spec-generator", + spec_session: specConfig.session_id, + features, + cross_cutting_specs: [] + }, null, 2)); + + // D.3: Feature-spec files — directly adapt from individual EPIC-*.md files + // Since Epics are already individual documents, transform format directly + // Filename pattern: F-{num}-{slug}.md (matches context-search-agent glob F-*-*.md) + features.forEach(feature => { + const epicContent = Read(feature.source_file); + Write(feature.spec_path, ` +# Feature Spec: ${feature.source_epic} - ${feature.name} + +**Source**: ${feature.source_file} +**Priority**: ${feature.priority === "High" ? "MVP" : "Post-MVP"} + +## Description +${extractSection(epicContent, "Description")} + +## Stories +${extractSection(epicContent, "Stories")} + +## Requirements +${extractSection(epicContent, "Requirements")} + +## Architecture +${extractSection(epicContent, "Architecture")} +`); + }); + + // Step E: Invoke downstream workflow + // context-search-agent will auto-discover .brainstorming/ files + // → context-package.json.brainstorm_artifacts populated + // → action-planning-agent loads guidance_specification (P1) + feature_index (P2) + if (selection === "Full planning") { + Skill(skill="workflow-plan", args=`"${structuredDesc}"`) + } else { + Skill(skill="workflow:req-plan-with-file", args=`"${extractGoal(specSummary)}"`) + } +} + +if (selection === "Export Issues (Phase 7)") { + // Proceed to Phase 7: Issue Export + // Read phases/07-issue-export.md and execute +} + +// If user selects "Other": Export only or return to specific phase + +if (selection === "Iterate & improve") { + // Check iteration count + if (specConfig.iteration_count >= 2) { + // Max iterations reached, force handoff + // Present handoff options again without iterate + return; + } + + // Update iteration tracking + specConfig.iteration_count = (specConfig.iteration_count || 0) + 1; + specConfig.iteration_history.push({ + iteration: specConfig.iteration_count, + timestamp: new Date().toISOString(), + readiness_score: overallScore, + errors_found: errorCount, + phases_to_fix: affectedPhases + }); + Write(`${workDir}/spec-config.json`, JSON.stringify(specConfig, null, 2)); + + // Proceed to Phase 6.5: Auto-Fix + // Read phases/06-5-auto-fix.md and execute +} +``` + +#### Helper Functions Reference (pseudocode) + +The following helper functions are used in the handoff bridge. They operate on markdown content from individual spec files: + +```javascript +// Extract title from a markdown document (first # heading) +function extractTitle(markdown) { + // Return the text after the first # heading (e.g., "# EPIC-001: Title" → "Title") +} + +// Parse YAML frontmatter from markdown (between --- markers) +function parseFrontmatter(markdown) { + // Return object with: id, priority, mvp, size, requirements, architecture, dependencies +} + +// Extract GOAL/SCOPE from spec-summary frontmatter or ## sections +function extractGoal(specSummary) { /* Return the Vision/Goal line */ } +function extractScope(specSummary) { /* Return the Scope/MVP boundary */ } + +// Extract a named ## section from a markdown document +function extractSection(markdown, sectionName) { + // Return content between ## {sectionName} and next ## heading +} +``` + +## Output + +- **File**: `readiness-report.md` - Quality validation report +- **File**: `spec-summary.md` - One-page executive summary +- **Format**: Markdown with YAML frontmatter + +## Quality Checklist + +- [ ] All document directories validated (product-brief, requirements/, architecture/, epics/) +- [ ] All frontmatter parseable and valid (index + individual files) +- [ ] Cross-references checked (relative links between directories) +- [ ] Overall quality score calculated +- [ ] No unresolved Error-severity issues +- [ ] Traceability matrix generated +- [ ] spec-summary.md created +- [ ] All document statuses updated to 'complete' (all files in all directories) +- [ ] Handoff options presented + +## Completion + +This is the final phase. The specification package is ready for execution handoff. + +### Output Files Manifest + +| Path | Phase | Description | +|------|-------|-------------| +| `spec-config.json` | 1 | Session configuration and state | +| `discovery-context.json` | 1 | Codebase exploration (optional) | +| `product-brief.md` | 2 | Product brief with multi-perspective synthesis | +| `requirements/` | 3 | Directory: `_index.md` + `REQ-*.md` + `NFR-*.md` | +| `architecture/` | 4 | Directory: `_index.md` + `ADR-*.md` | +| `epics/` | 5 | Directory: `_index.md` + `EPIC-*.md` | +| `readiness-report.md` | 6 | Quality validation report | +| `spec-summary.md` | 6 | One-page executive summary | diff --git a/.codex/skills/spec-generator/phases/07-issue-export.md b/.codex/skills/spec-generator/phases/07-issue-export.md new file mode 100644 index 00000000..b93bc187 --- /dev/null +++ b/.codex/skills/spec-generator/phases/07-issue-export.md @@ -0,0 +1,329 @@ +# Phase 7: Issue Export + +Map specification Epics to issues, create them via `ccw issue create`, and generate an export report with spec document links. + +> **Execution Mode: Inline** +> This phase runs in the main orchestrator context (not delegated to agent) for direct access to `ccw issue create` CLI and interactive handoff options. + +## Objective + +- Read all EPIC-*.md files from Phase 5 output +- Assign waves: MVP epics → wave-1, non-MVP → wave-2 +- Create one issue per Epic via `ccw issue create` +- Map Epic dependencies to issue dependencies +- Generate issue-export-report.md with mapping table and spec links +- Present handoff options for execution + +## Input + +- Dependency: `{workDir}/epics/_index.md` (and individual `EPIC-*.md` files) +- Reference: `{workDir}/readiness-report.md`, `{workDir}/spec-config.json` +- Reference: `{workDir}/product-brief.md`, `{workDir}/requirements/_index.md`, `{workDir}/architecture/_index.md` + +## Execution Steps + +### Step 1: Load Epic Files + +```javascript +const specConfig = JSON.parse(Read(`${workDir}/spec-config.json`)); +const epicFiles = Glob(`${workDir}/epics/EPIC-*.md`); +const epicsIndex = Read(`${workDir}/epics/_index.md`); + +// Parse each Epic file +const epics = epicFiles.map(epicFile => { + const content = Read(epicFile); + const fm = parseFrontmatter(content); + const title = extractTitle(content); + const description = extractSection(content, "Description"); + const stories = extractSection(content, "Stories"); + const reqRefs = extractSection(content, "Requirements"); + const adrRefs = extractSection(content, "Architecture"); + const deps = fm.dependencies || []; + + return { + file: epicFile, + id: fm.id, // e.g., EPIC-001 + title, + description, + stories, + reqRefs, + adrRefs, + priority: fm.priority, + mvp: fm.mvp || false, + dependencies: deps, // other EPIC IDs this depends on + size: fm.size + }; +}); +``` + +### Step 2: Wave Assignment + +```javascript +const epicWaves = epics.map(epic => ({ + ...epic, + wave: epic.mvp ? 1 : 2 +})); + +// Log wave assignment +const wave1 = epicWaves.filter(e => e.wave === 1); +const wave2 = epicWaves.filter(e => e.wave === 2); +// wave-1: MVP epics (must-have, core functionality) +// wave-2: Post-MVP epics (should-have, enhancements) +``` + +### Step 3: Issue Creation Loop + +```javascript +const createdIssues = []; +const epicToIssue = {}; // EPIC-ID -> Issue ID mapping + +for (const epic of epicWaves) { + // Build issue JSON matching roadmap-with-file schema + const issueData = { + title: `[${specConfig.session_id}] ${epic.title}`, + status: "pending", + priority: epic.wave === 1 ? 2 : 3, // wave-1 = higher priority + context: `## ${epic.title} + +${epic.description} + +## Stories +${epic.stories} + +## Spec References +- Epic: ${epic.file} +- Requirements: ${epic.reqRefs} +- Architecture: ${epic.adrRefs} +- Product Brief: ${workDir}/product-brief.md +- Full Spec: ${workDir}/`, + source: "text", + tags: [ + "spec-generated", + `spec:${specConfig.session_id}`, + `wave-${epic.wave}`, + epic.mvp ? "mvp" : "post-mvp", + `epic:${epic.id}` + ], + extended_context: { + notes: { + session: specConfig.session_id, + spec_dir: workDir, + source_epic: epic.id, + wave: epic.wave, + depends_on_issues: [], // Filled in Step 4 + spec_documents: { + product_brief: `${workDir}/product-brief.md`, + requirements: `${workDir}/requirements/_index.md`, + architecture: `${workDir}/architecture/_index.md`, + epic: epic.file + } + } + }, + lifecycle_requirements: { + test_strategy: "acceptance", + regression_scope: "affected", + acceptance_type: "manual", + commit_strategy: "per-epic" + } + }; + + // Create issue via ccw issue create (pipe JSON to avoid shell escaping) + const result = Bash(`echo '${JSON.stringify(issueData)}' | ccw issue create`); + + // Parse returned issue ID + const issueId = JSON.parse(result).id; // e.g., ISS-20260308-001 + epicToIssue[epic.id] = issueId; + + createdIssues.push({ + epic_id: epic.id, + epic_title: epic.title, + issue_id: issueId, + wave: epic.wave, + priority: issueData.priority, + mvp: epic.mvp + }); +} +``` + +### Step 4: Epic Dependency → Issue Dependency Mapping + +```javascript +// Map EPIC dependencies to Issue dependencies +for (const epic of epicWaves) { + if (epic.dependencies.length === 0) continue; + + const issueId = epicToIssue[epic.id]; + const depIssueIds = epic.dependencies + .map(depEpicId => epicToIssue[depEpicId]) + .filter(Boolean); + + if (depIssueIds.length > 0) { + // Update issue's extended_context.notes.depends_on_issues + // This is informational — actual dependency enforcement is in execution phase + // Note: ccw issue create already created the issue; dependency info is in the context + } +} +``` + +### Step 5: Generate issue-export-report.md + +```javascript +const timestamp = new Date().toISOString(); + +const reportContent = `--- +session_id: ${specConfig.session_id} +phase: 7 +document_type: issue-export-report +status: complete +generated_at: ${timestamp} +stepsCompleted: ["load-epics", "wave-assignment", "issue-creation", "dependency-mapping", "report-generation"] +version: 1 +dependencies: + - epics/_index.md + - readiness-report.md +--- + +# Issue Export Report + +## Summary + +- **Session**: ${specConfig.session_id} +- **Issues Created**: ${createdIssues.length} +- **Wave 1 (MVP)**: ${wave1.length} issues +- **Wave 2 (Post-MVP)**: ${wave2.length} issues +- **Export Date**: ${timestamp} + +## Issue Mapping + +| Epic ID | Epic Title | Issue ID | Wave | Priority | MVP | +|---------|-----------|----------|------|----------|-----| +${createdIssues.map(i => + `| ${i.epic_id} | ${i.epic_title} | ${i.issue_id} | ${i.wave} | ${i.priority} | ${i.mvp ? 'Yes' : 'No'} |` +).join('\n')} + +## Spec Document Links + +| Document | Path | Description | +|----------|------|-------------| +| Product Brief | ${workDir}/product-brief.md | Vision, goals, scope | +| Requirements | ${workDir}/requirements/_index.md | Functional + non-functional requirements | +| Architecture | ${workDir}/architecture/_index.md | Components, ADRs, tech stack | +| Epics | ${workDir}/epics/_index.md | Epic/Story breakdown | +| Readiness Report | ${workDir}/readiness-report.md | Quality validation | +| Spec Summary | ${workDir}/spec-summary.md | Executive summary | + +## Dependency Map + +| Issue ID | Depends On | +|----------|-----------| +${createdIssues.map(i => { + const epic = epicWaves.find(e => e.id === i.epic_id); + const deps = (epic.dependencies || []).map(d => epicToIssue[d]).filter(Boolean); + return `| ${i.issue_id} | ${deps.length > 0 ? deps.join(', ') : 'None'} |`; +}).join('\n')} + +## Next Steps + +1. **team-planex**: Execute all issues via coordinated team workflow +2. **Wave 1 only**: Execute MVP issues first (${wave1.length} issues) +3. **View issues**: Browse created issues via \`ccw issue list --tag spec:${specConfig.session_id}\` +4. **Manual review**: Review individual issues before execution +`; + +Write(`${workDir}/issue-export-report.md`, reportContent); +``` + +### Step 6: Update spec-config.json + +```javascript +specConfig.issue_ids = createdIssues.map(i => i.issue_id); +specConfig.issues_created = createdIssues.length; +specConfig.phasesCompleted.push({ + phase: 7, + name: "issue-export", + output_file: "issue-export-report.md", + issues_created: createdIssues.length, + wave_1_count: wave1.length, + wave_2_count: wave2.length, + completed_at: timestamp +}); +Write(`${workDir}/spec-config.json`, JSON.stringify(specConfig, null, 2)); +``` + +### Step 7: Handoff Options + +```javascript +AskUserQuestion({ + questions: [ + { + question: `${createdIssues.length} issues created from ${epicWaves.length} Epics. What would you like to do next?`, + header: "Next Step", + multiSelect: false, + options: [ + { + label: "Execute via team-planex", + description: `Execute all ${createdIssues.length} issues with coordinated team workflow` + }, + { + label: "Wave 1 only", + description: `Execute ${wave1.length} MVP issues first` + }, + { + label: "View issues", + description: "Browse created issues before deciding" + }, + { + label: "Done", + description: "Export complete, handle manually" + } + ] + } + ] +}); + +// Based on user selection: +if (selection === "Execute via team-planex") { + const issueIds = createdIssues.map(i => i.issue_id).join(','); + Skill({ skill: "team-planex", args: `--issues ${issueIds}` }); +} + +if (selection === "Wave 1 only") { + const wave1Ids = createdIssues.filter(i => i.wave === 1).map(i => i.issue_id).join(','); + Skill({ skill: "team-planex", args: `--issues ${wave1Ids}` }); +} + +if (selection === "View issues") { + Bash(`ccw issue list --tag spec:${specConfig.session_id}`); +} +``` + +## Output + +- **File**: `issue-export-report.md` — Issue mapping table + spec links + next steps +- **Updated**: `.workflow/issues/issues.jsonl` — New issue entries appended +- **Updated**: `spec-config.json` — Phase 7 completion + issue IDs + +## Quality Checklist + +- [ ] All MVP Epics have corresponding issues created +- [ ] All non-MVP Epics have corresponding issues created +- [ ] Issue tags include `spec-generated` and `spec:{session_id}` +- [ ] Issue `extended_context.notes.spec_documents` paths are correct +- [ ] Wave assignment matches MVP status (MVP → wave-1, non-MVP → wave-2) +- [ ] Epic dependencies mapped to issue dependency references +- [ ] `issue-export-report.md` generated with mapping table +- [ ] `spec-config.json` updated with `issue_ids` and `issues_created` +- [ ] Handoff options presented + +## Error Handling + +| Error | Blocking? | Action | +|-------|-----------|--------| +| `ccw issue create` fails for one Epic | No | Log error, continue with remaining Epics, report partial creation | +| No EPIC files found | Yes | Error and return to Phase 5 | +| All issue creations fail | Yes | Error with CLI diagnostic, suggest manual creation | +| Dependency EPIC not found in mapping | No | Skip dependency link, log warning | + +## Completion + +Phase 7 is the final phase. The specification package has been fully converted to executable issues ready for team-planex or manual execution. diff --git a/.codex/skills/spec-generator/specs/document-standards.md b/.codex/skills/spec-generator/specs/document-standards.md new file mode 100644 index 00000000..7fc66223 --- /dev/null +++ b/.codex/skills/spec-generator/specs/document-standards.md @@ -0,0 +1,295 @@ +# Document Standards + +Defines format conventions, YAML frontmatter schema, naming rules, and content structure for all spec-generator outputs. + +## When to Use + +| Phase | Usage | Section | +|-------|-------|---------| +| All Phases | Frontmatter format | YAML Frontmatter Schema | +| All Phases | File naming | Naming Conventions | +| Phase 2-5 | Document structure | Content Structure | +| Phase 6 | Validation reference | All sections | + +--- + +## YAML Frontmatter Schema + +Every generated document MUST begin with YAML frontmatter: + +```yaml +--- +session_id: SPEC-{slug}-{YYYY-MM-DD} +phase: {1-6} +document_type: {product-brief|requirements|architecture|epics|readiness-report|spec-summary|issue-export-report} +status: draft|review|complete +generated_at: {ISO8601 timestamp} +stepsCompleted: [] +version: 1 +dependencies: + - {list of input documents used} +--- +``` + +### Field Definitions + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `session_id` | string | Yes | Session identifier matching spec-config.json | +| `phase` | number | Yes | Phase number that generated this document (1-6) | +| `document_type` | string | Yes | One of: product-brief, requirements, architecture, epics, readiness-report, spec-summary, issue-export-report | +| `status` | enum | Yes | draft (initial), review (user reviewed), complete (finalized) | +| `generated_at` | string | Yes | ISO8601 timestamp of generation | +| `stepsCompleted` | array | Yes | List of step IDs completed during generation | +| `version` | number | Yes | Document version, incremented on re-generation | +| `dependencies` | array | No | List of input files this document depends on | + +### Status Transitions + +``` +draft -> review -> complete + | ^ + +-------------------+ (direct promotion in auto mode) +``` + +- **draft**: Initial generation, not yet user-reviewed +- **review**: User has reviewed and provided feedback +- **complete**: Finalized, ready for downstream consumption + +In auto mode (`-y`), documents are promoted directly from `draft` to `complete`. + +--- + +## Naming Conventions + +### Session ID Format + +``` +SPEC-{slug}-{YYYY-MM-DD} +``` + +- **slug**: Lowercase, alphanumeric + Chinese characters, hyphens as separators, max 40 chars +- **date**: UTC+8 date in YYYY-MM-DD format + +Examples: +- `SPEC-task-management-system-2026-02-11` +- `SPEC-user-auth-oauth-2026-02-11` + +### Output Files + +| File | Phase | Description | +|------|-------|-------------| +| `spec-config.json` | 1 | Session configuration and state | +| `discovery-context.json` | 1 | Codebase exploration results (optional) | +| `refined-requirements.json` | 1.5 | Confirmed requirements after discussion | +| `glossary.json` | 2 | Terminology glossary for cross-document consistency | +| `product-brief.md` | 2 | Product brief document | +| `requirements.md` | 3 | PRD document | +| `architecture.md` | 4 | Architecture decisions document | +| `epics.md` | 5 | Epic/Story breakdown document | +| `readiness-report.md` | 6 | Quality validation report | +| `spec-summary.md` | 6 | One-page executive summary | +| `issue-export-report.md` | 7 | Issue export report with Epic→Issue mapping | + +### Output Directory + +``` +.workflow/.spec/{session-id}/ +``` + +--- + +## Content Structure + +### Heading Hierarchy + +- `#` (H1): Document title only (one per document) +- `##` (H2): Major sections +- `###` (H3): Subsections +- `####` (H4): Detail items (use sparingly) + +Maximum depth: 4 levels. Prefer flat structures. + +### Section Ordering + +Every document follows this general pattern: + +1. **YAML Frontmatter** (mandatory) +2. **Title** (H1) +3. **Executive Summary** (2-3 sentences) +4. **Core Content Sections** (H2, document-specific) +5. **Open Questions / Risks** (if applicable) +6. **References / Traceability** (links to upstream/downstream docs) + +### Formatting Rules + +| Element | Format | Example | +|---------|--------|---------| +| Requirements | `REQ-{NNN}` prefix | REQ-001: User login | +| Acceptance criteria | Checkbox list | `- [ ] User can log in with email` | +| Architecture decisions | `ADR-{NNN}` prefix | ADR-001: Use PostgreSQL | +| Epics | `EPIC-{NNN}` prefix | EPIC-001: Authentication | +| Stories | `STORY-{EPIC}-{NNN}` prefix | STORY-001-001: Login form | +| Priority tags | MoSCoW labels | `[Must]`, `[Should]`, `[Could]`, `[Won't]` | +| Mermaid diagrams | Fenced code blocks | ````mermaid ... ``` `` | +| Code examples | Language-tagged blocks | ````typescript ... ``` `` | + +### Cross-Reference Format + +Use relative references between documents: + +```markdown +See [Product Brief](product-brief.md#section-name) for details. +Derived from [REQ-001](requirements.md#req-001). +``` + +### Language + +- Document body: Follow user's input language (Chinese or English) +- Technical identifiers: Always English (REQ-001, ADR-001, EPIC-001) +- YAML frontmatter keys: Always English + +--- + +## spec-config.json Schema + +```json +{ + "session_id": "string (required)", + "seed_input": "string (required) - original user input", + "input_type": "text|file (required)", + "timestamp": "ISO8601 (required)", + "mode": "interactive|auto (required)", + "complexity": "simple|moderate|complex (required)", + "depth": "light|standard|comprehensive (required)", + "focus_areas": ["string array"], + "seed_analysis": { + "problem_statement": "string", + "target_users": ["string array"], + "domain": "string", + "constraints": ["string array"], + "dimensions": ["string array - 3-5 exploration dimensions"] + }, + "has_codebase": "boolean", + "spec_type": "service|api|library|platform (required) - type of specification", + "iteration_count": "number (required, default 0) - number of auto-fix iterations completed", + "iteration_history": [ + { + "iteration": "number", + "timestamp": "ISO8601", + "readiness_score": "number (0-100)", + "errors_found": "number", + "phases_fixed": ["number array - phase numbers that were re-generated"] + } + ], + "refined_requirements_file": "string (optional) - path to refined-requirements.json", + "phasesCompleted": [ + { + "phase": "number (1-6)", + "name": "string (phase name)", + "output_file": "string (primary output file)", + "completed_at": "ISO8601" + } + ], + "issue_ids": ["string array (optional) - IDs of issues created in Phase 7"], + "issues_created": "number (optional, default 0) - count of issues created in Phase 7" +} +``` + +--- + +## refined-requirements.json Schema + +```json +{ + "session_id": "string (required) - matches spec-config.json", + "phase": "1.5", + "generated_at": "ISO8601 (required)", + "source": "interactive-discussion|auto-expansion (required)", + "discussion_rounds": "number (required) - 0 for auto mode", + "clarified_problem_statement": "string (required) - refined problem statement", + "confirmed_target_users": [ + { + "name": "string", + "needs": ["string array"], + "pain_points": ["string array"] + } + ], + "confirmed_domain": "string", + "confirmed_features": [ + { + "name": "string", + "description": "string", + "acceptance_criteria": ["string array"], + "edge_cases": ["string array"], + "priority": "must|should|could|unset" + } + ], + "non_functional_requirements": [ + { + "type": "Performance|Security|Usability|Scalability|Reliability|...", + "details": "string", + "measurable_criteria": "string (optional)" + } + ], + "boundary_conditions": { + "in_scope": ["string array"], + "out_of_scope": ["string array"], + "constraints": ["string array"] + }, + "integration_points": ["string array"], + "key_assumptions": ["string array"], + "discussion_log": [ + { + "round": "number", + "agent_prompt": "string", + "user_response": "string", + "timestamp": "ISO8601" + } + ] +} +``` + +--- + +## glossary.json Schema + +```json +{ + "session_id": "string (required) - matches spec-config.json", + "generated_at": "ISO8601 (required)", + "version": "number (required, default 1) - incremented on updates", + "terms": [ + { + "term": "string (required) - the canonical term", + "definition": "string (required) - concise definition", + "aliases": ["string array - acceptable alternative names"], + "first_defined_in": "string (required) - source document path", + "category": "core|technical|business (required)" + } + ] +} +``` + +### Glossary Usage Rules + +- Terms MUST be defined before first use in any document +- All documents MUST use the canonical term from glossary; aliases are for reference only +- Glossary is generated in Phase 2 and injected into all subsequent phase prompts +- Phase 6 validates glossary compliance across all documents + +--- + +## Validation Checklist + +- [ ] Every document starts with valid YAML frontmatter +- [ ] `session_id` matches across all documents in a session +- [ ] `status` field reflects current document state +- [ ] All cross-references resolve to valid targets +- [ ] Heading hierarchy is correct (no skipped levels) +- [ ] Technical identifiers use correct prefixes +- [ ] Output files are in the correct directory +- [ ] `glossary.json` created with >= 5 terms +- [ ] `spec_type` field set in spec-config.json +- [ ] All documents use glossary terms consistently +- [ ] Non-Goals section present in product brief (if applicable) diff --git a/.codex/skills/spec-generator/specs/glossary-template.json b/.codex/skills/spec-generator/specs/glossary-template.json new file mode 100644 index 00000000..4a2fd194 --- /dev/null +++ b/.codex/skills/spec-generator/specs/glossary-template.json @@ -0,0 +1,29 @@ +{ + "$schema": "glossary-v1", + "description": "Template for terminology glossary used across spec-generator documents", + "session_id": "", + "generated_at": "", + "version": 1, + "terms": [ + { + "term": "", + "definition": "", + "aliases": [], + "first_defined_in": "product-brief.md", + "category": "core" + } + ], + "_usage_notes": { + "category_values": { + "core": "Domain-specific terms central to the product (e.g., 'Workspace', 'Session')", + "technical": "Technical terms specific to the architecture (e.g., 'gRPC', 'event bus')", + "business": "Business/process terms (e.g., 'Sprint', 'SLA', 'stakeholder')" + }, + "rules": [ + "Terms MUST be defined before first use in any document", + "All documents MUST use the canonical 'term' field consistently", + "Aliases are for reference only - prefer canonical term in all documents", + "Phase 6 validates glossary compliance across all documents" + ] + } +} diff --git a/.codex/skills/spec-generator/specs/quality-gates.md b/.codex/skills/spec-generator/specs/quality-gates.md new file mode 100644 index 00000000..1c6c1299 --- /dev/null +++ b/.codex/skills/spec-generator/specs/quality-gates.md @@ -0,0 +1,270 @@ +# Quality Gates + +Per-phase quality gate criteria and scoring dimensions for spec-generator outputs. + +## When to Use + +| Phase | Usage | Section | +|-------|-------|---------| +| Phase 2-5 | Post-generation self-check | Per-Phase Gates | +| Phase 6 | Cross-document validation | Cross-Document Validation | +| Phase 6 | Final scoring | Scoring Dimensions | + +--- + +## Quality Thresholds + +| Gate | Score | Action | +|------|-------|--------| +| **Pass** | >= 80% | Continue to next phase | +| **Review** | 60-79% | Log warnings, continue with caveats | +| **Fail** | < 60% | Must address issues before continuing | + +In auto mode (`-y`), Review-level issues are logged but do not block progress. + +--- + +## Scoring Dimensions + +### 1. Completeness (25%) + +All required sections present with substantive content. + +| Score | Criteria | +|-------|----------| +| 100% | All template sections filled with detailed content | +| 75% | All sections present, some lack detail | +| 50% | Major sections present but minor sections missing | +| 25% | Multiple major sections missing or empty | +| 0% | Document is a skeleton only | + +### 2. Consistency (25%) + +Terminology, formatting, and references are uniform across documents. + +| Score | Criteria | +|-------|----------| +| 100% | All terms consistent, all references valid, formatting uniform | +| 75% | Minor terminology variations, all references valid | +| 50% | Some inconsistent terms, 1-2 broken references | +| 25% | Frequent inconsistencies, multiple broken references | +| 0% | Documents contradict each other | + +### 3. Traceability (25%) + +Requirements, architecture decisions, and stories trace back to goals. + +| Score | Criteria | +|-------|----------| +| 100% | Every story traces to a requirement, every requirement traces to a goal | +| 75% | Most items traceable, few orphans | +| 50% | Partial traceability, some disconnected items | +| 25% | Weak traceability, many orphan items | +| 0% | No traceability between documents | + +### 4. Depth (25%) + +Content provides sufficient detail for execution teams. + +| Score | Criteria | +|-------|----------| +| 100% | Acceptance criteria specific and testable, architecture decisions justified, stories estimable | +| 75% | Most items detailed enough, few vague areas | +| 50% | Mix of detailed and vague content | +| 25% | Mostly high-level, lacking actionable detail | +| 0% | Too abstract for execution | + +--- + +## Per-Phase Quality Gates + +### Phase 1: Discovery + +| Check | Criteria | Severity | +|-------|----------|----------| +| Session ID valid | Matches `SPEC-{slug}-{date}` format | Error | +| Problem statement exists | Non-empty, >= 20 characters | Error | +| Target users identified | >= 1 user group | Error | +| Dimensions generated | 3-5 exploration dimensions | Warning | +| Constraints listed | >= 0 (can be empty with justification) | Info | + +### Phase 1.5: Requirement Expansion & Clarification + +| Check | Criteria | Severity | +|-------|----------|----------| +| Problem statement refined | More specific than seed, >= 30 characters | Error | +| Confirmed features | >= 2 features with descriptions | Error | +| Non-functional requirements | >= 1 identified (performance, security, etc.) | Warning | +| Boundary conditions | In-scope and out-of-scope defined | Warning | +| Key assumptions | >= 1 assumption listed | Warning | +| User confirmation | Explicit user confirmation recorded (non-auto mode) | Info | +| Discussion rounds | >= 1 round of interaction (non-auto mode) | Info | + +### Phase 2: Product Brief + +| Check | Criteria | Severity | +|-------|----------|----------| +| Vision statement | Clear, 1-3 sentences | Error | +| Problem statement | Specific and measurable | Error | +| Target users | >= 1 persona with needs described | Error | +| Goals defined | >= 2 measurable goals | Error | +| Success metrics | >= 2 quantifiable metrics | Warning | +| Scope boundaries | In-scope and out-of-scope listed | Warning | +| Multi-perspective | >= 2 CLI perspectives synthesized | Info | +| Terminology glossary generated | glossary.json created with >= 5 terms | Warning | +| Non-Goals section present | At least 1 non-goal with rationale | Warning | +| Concepts section present | Terminology table in product brief | Warning | + +### Phase 3: Requirements (PRD) + +| Check | Criteria | Severity | +|-------|----------|----------| +| Functional requirements | >= 3 with REQ-NNN IDs | Error | +| Acceptance criteria | Every requirement has >= 1 criterion | Error | +| MoSCoW priority | Every requirement tagged | Error | +| Non-functional requirements | >= 1 (performance, security, etc.) | Warning | +| User stories | >= 1 per Must-have requirement | Warning | +| Traceability | Requirements trace to product brief goals | Warning | +| RFC 2119 keywords used | Behavioral requirements use MUST/SHOULD/MAY | Warning | +| Data model defined | Core entities have field-level definitions | Warning | + +### Phase 4: Architecture + +| Check | Criteria | Severity | +|-------|----------|----------| +| Component diagram | Present (Mermaid or ASCII) | Error | +| Tech stack specified | Languages, frameworks, key libraries | Error | +| ADR present | >= 1 Architecture Decision Record | Error | +| ADR has alternatives | Each ADR lists >= 2 options considered | Warning | +| Integration points | External systems/APIs identified | Warning | +| Data model | Key entities and relationships described | Warning | +| Codebase mapping | Mapped to existing code (if has_codebase) | Info | +| State machine defined | >= 1 lifecycle state diagram (if service/platform type) | Warning | +| Configuration model defined | All config fields with type/default/constraint (if service type) | Warning | +| Error handling strategy | Per-component error classification and recovery | Warning | +| Observability metrics | >= 3 metrics defined (if service/platform type) | Warning | +| Trust model defined | Trust levels documented (if service type) | Info | +| Implementation guidance | Key decisions for implementers listed | Info | + +### Phase 5: Epics & Stories + +| Check | Criteria | Severity | +|-------|----------|----------| +| Epics defined | 3-7 epics with EPIC-NNN IDs | Error | +| MVP subset | >= 1 epic tagged as MVP | Error | +| Stories per epic | 2-5 stories per epic | Error | +| Story format | "As a...I want...So that..." pattern | Warning | +| Dependency map | Cross-epic dependencies documented | Warning | +| Estimation hints | Relative sizing (S/M/L/XL) per story | Info | +| Traceability | Stories trace to requirements | Warning | + +### Phase 6: Readiness Check + +| Check | Criteria | Severity | +|-------|----------|----------| +| All documents exist | product-brief, requirements, architecture, epics | Error | +| Frontmatter valid | All YAML frontmatter parseable and correct | Error | +| Cross-references valid | All document links resolve | Error | +| Overall score >= 60% | Weighted average across 4 dimensions | Error | +| No unresolved Errors | All Error-severity issues addressed | Error | +| Summary generated | spec-summary.md created | Warning | +| Per-requirement verified | All Must requirements pass 4-check verification | Error | +| Codex technical review | Technical depth assessment completed | Warning | +| Dual-source validation | Both Gemini and Codex scores recorded | Warning | + +### Phase 7: Issue Export + +| Check | Criteria | Severity | +|-------|----------|----------| +| All MVP epics have issues | Every MVP-tagged Epic has a corresponding issue created | Error | +| Issue tags correct | Each issue has `spec-generated` and `spec:{session_id}` tags | Error | +| Export report generated | `issue-export-report.md` exists with mapping table | Error | +| Wave assignment correct | MVP epics → wave-1, non-MVP epics → wave-2 | Warning | +| Spec document links valid | `extended_context.notes.spec_documents` paths resolve | Warning | +| Epic dependencies mapped | Cross-epic dependencies reflected in issue dependency references | Warning | +| All epics covered | Non-MVP epics also have corresponding issues | Info | + +--- + +## Cross-Document Validation + +Checks performed during Phase 6 across all documents: + +### Completeness Matrix + +``` +Product Brief goals -> Requirements (each goal has >= 1 requirement) +Requirements -> Architecture (each Must requirement has design coverage) +Requirements -> Epics (each Must requirement appears in >= 1 story) +Architecture ADRs -> Epics (tech choices reflected in implementation stories) +Glossary terms -> All Documents (core terms used consistently) +Non-Goals (Brief) -> Requirements + Epics (no contradictions) +``` + +### Consistency Checks + +| Check | Documents | Rule | +|-------|-----------|------| +| Terminology | All | Same term used consistently (no synonyms for same concept) | +| User personas | Brief + PRD + Epics | Same user names/roles throughout | +| Scope | Brief + PRD | PRD scope does not exceed brief scope | +| Tech stack | Architecture + Epics | Stories reference correct technologies | +| Glossary compliance | All | Core terms match glossary.json definitions, no synonym drift | +| Scope containment | Brief + PRD | PRD requirements do not introduce scope beyond brief boundaries | +| Non-Goals respected | Brief + PRD + Epics | No requirement/story contradicts explicit Non-Goals | + +### Traceability Matrix Format + +```markdown +| Goal | Requirements | Architecture | Epics | +|------|-------------|--------------|-------| +| G-001: ... | REQ-001, REQ-002 | ADR-001 | EPIC-001 | +| G-002: ... | REQ-003 | ADR-002 | EPIC-002, EPIC-003 | +``` + +--- + +## Issue Classification + +### Error (Must Fix) + +- Missing required document or section +- Broken cross-references +- Contradictory information between documents +- Empty acceptance criteria on Must-have requirements +- No MVP subset defined in epics + +### Warning (Should Fix) + +- Vague acceptance criteria +- Missing non-functional requirements +- No success metrics defined +- Incomplete traceability +- Missing architecture review notes + +### Info (Nice to Have) + +- Could add more detailed personas +- Consider additional ADR alternatives +- Story estimation hints missing +- Mermaid diagrams could be more detailed + +--- + +## Iteration Quality Tracking + +When Phase 6.5 (Auto-Fix) is triggered: + +| Iteration | Expected Improvement | Max Iterations | +|-----------|---------------------|----------------| +| 1st | Fix all Error-severity issues | - | +| 2nd | Fix remaining Warnings, improve scores | Max reached | + +### Iteration Exit Criteria + +| Condition | Action | +|-----------|--------| +| Overall score >= 80% after fix | Pass, proceed to handoff | +| Overall score 60-79% after 2 iterations | Review, proceed with caveats | +| Overall score < 60% after 2 iterations | Fail, manual intervention required | +| No Error-severity issues remaining | Eligible for handoff regardless of score | diff --git a/.codex/skills/spec-generator/templates/architecture-doc.md b/.codex/skills/spec-generator/templates/architecture-doc.md new file mode 100644 index 00000000..6a325536 --- /dev/null +++ b/.codex/skills/spec-generator/templates/architecture-doc.md @@ -0,0 +1,373 @@ +# Architecture Document Template (Directory Structure) + +Template for generating architecture decision documents as a directory of individual ADR files in Phase 4. + +## Usage Context + +| Phase | Usage | +|-------|-------| +| Phase 4 (Architecture) | Generate `architecture/` directory from requirements analysis | +| Output Location | `{workDir}/architecture/` | + +## Output Structure + +``` +{workDir}/architecture/ +├── _index.md # Overview, components, tech stack, data model, security +├── ADR-001-{slug}.md # Individual Architecture Decision Record +├── ADR-002-{slug}.md +└── ... +``` + +--- + +## Template: _index.md + +```markdown +--- +session_id: {session_id} +phase: 4 +document_type: architecture-index +status: draft +generated_at: {timestamp} +version: 1 +dependencies: + - ../spec-config.json + - ../product-brief.md + - ../requirements/_index.md +--- + +# Architecture: {product_name} + +{executive_summary - high-level architecture approach and key decisions} + +## System Overview + +### Architecture Style +{description of chosen architecture style: microservices, monolith, serverless, etc.} + +### System Context Diagram + +```mermaid +C4Context + title System Context Diagram + Person(user, "User", "Primary user") + System(system, "{product_name}", "Core system") + System_Ext(ext1, "{external_system}", "{description}") + Rel(user, system, "Uses") + Rel(system, ext1, "Integrates with") +``` + +## Component Architecture + +### Component Diagram + +```mermaid +graph TD + subgraph "{product_name}" + A[Component A] --> B[Component B] + B --> C[Component C] + A --> D[Component D] + end + B --> E[External Service] +``` + +### Component Descriptions + +| Component | Responsibility | Technology | Dependencies | +|-----------|---------------|------------|--------------| +| {component_name} | {what it does} | {tech stack} | {depends on} | + +## Technology Stack + +### Core Technologies + +| Layer | Technology | Version | Rationale | +|-------|-----------|---------|-----------| +| Frontend | {technology} | {version} | {why chosen} | +| Backend | {technology} | {version} | {why chosen} | +| Database | {technology} | {version} | {why chosen} | +| Infrastructure | {technology} | {version} | {why chosen} | + +### Key Libraries & Frameworks + +| Library | Purpose | License | +|---------|---------|---------| +| {library_name} | {purpose} | {license} | + +## Architecture Decision Records + +| ADR | Title | Status | Key Choice | +|-----|-------|--------|------------| +| [ADR-001](ADR-001-{slug}.md) | {title} | Accepted | {one-line summary} | +| [ADR-002](ADR-002-{slug}.md) | {title} | Accepted | {one-line summary} | +| [ADR-003](ADR-003-{slug}.md) | {title} | Proposed | {one-line summary} | + +## Data Architecture + +### Data Model + +```mermaid +erDiagram + ENTITY_A ||--o{ ENTITY_B : "has many" + ENTITY_A { + string id PK + string name + datetime created_at + } + ENTITY_B { + string id PK + string entity_a_id FK + string value + } +``` + +### Data Storage Strategy + +| Data Type | Storage | Retention | Backup | +|-----------|---------|-----------|--------| +| {type} | {storage solution} | {retention policy} | {backup strategy} | + +## API Design + +### API Overview + +| Endpoint | Method | Purpose | Auth | +|----------|--------|---------|------| +| {/api/resource} | {GET/POST/etc} | {purpose} | {auth type} | + +## Security Architecture + +### Security Controls + +| Control | Implementation | Requirement | +|---------|---------------|-------------| +| Authentication | {approach} | [NFR-S-{NNN}](../requirements/NFR-S-{NNN}-{slug}.md) | +| Authorization | {approach} | [NFR-S-{NNN}](../requirements/NFR-S-{NNN}-{slug}.md) | +| Data Protection | {approach} | [NFR-S-{NNN}](../requirements/NFR-S-{NNN}-{slug}.md) | + +## Infrastructure & Deployment + +### Deployment Architecture + +{description of deployment model: containers, serverless, VMs, etc.} + +### Environment Strategy + +| Environment | Purpose | Configuration | +|-------------|---------|---------------| +| Development | Local development | {config} | +| Staging | Pre-production testing | {config} | +| Production | Live system | {config} | + +## Codebase Integration + +{if has_codebase is true:} + +### Existing Code Mapping + +| New Component | Existing Module | Integration Type | Notes | +|--------------|----------------|------------------|-------| +| {component} | {existing module path} | Extend/Replace/New | {notes} | + +### Migration Notes +{any migration considerations for existing code} + +## Quality Attributes + +| Attribute | Target | Measurement | ADR Reference | +|-----------|--------|-------------|---------------| +| Performance | {target} | {how measured} | [ADR-{NNN}](ADR-{NNN}-{slug}.md) | +| Scalability | {target} | {how measured} | [ADR-{NNN}](ADR-{NNN}-{slug}.md) | +| Reliability | {target} | {how measured} | [ADR-{NNN}](ADR-{NNN}-{slug}.md) | + +## State Machine + +{For each core entity with a lifecycle (e.g., Order, Session, Task):} + +### {Entity} Lifecycle + +``` +{ASCII state diagram showing all states, transitions, triggers, and error paths} + + ┌──────────┐ + │ Created │ + └─────┬────┘ + │ start() + ▼ + ┌──────────┐ error ┌──────────┐ + │ Running │ ──────────▶ │ Failed │ + └─────┬────┘ └──────────┘ + │ complete() + ▼ + ┌──────────┐ + │ Completed │ + └──────────┘ +``` + +| From State | Event | To State | Side Effects | Error Handling | +|-----------|-------|----------|-------------|----------------| +| {from} | {event} | {to} | {side_effects} | {error_behavior} | + +## Configuration Model + +### Required Configuration + +| Field | Type | Default | Constraint | Description | +|-------|------|---------|------------|-------------| +| {field_name} | {string/number/boolean/enum} | {default_value} | {validation rule} | {description} | + +### Optional Configuration + +| Field | Type | Default | Constraint | Description | +|-------|------|---------|------------|-------------| +| {field_name} | {type} | {default} | {constraint} | {description} | + +### Environment Variables + +| Variable | Maps To | Required | +|----------|---------|----------| +| {ENV_VAR} | {config_field} | {yes/no} | + +## Error Handling + +### Error Classification + +| Category | Severity | Retry | Example | +|----------|----------|-------|---------| +| Transient | Low | Yes, with backoff | Network timeout, rate limit | +| Permanent | High | No | Invalid configuration, auth failure | +| Degraded | Medium | Partial | Dependency unavailable, fallback active | + +### Per-Component Error Strategy + +| Component | Error Scenario | Behavior | Recovery | +|-----------|---------------|----------|----------| +| {component} | {scenario} | {MUST/SHOULD behavior} | {recovery strategy} | + +## Observability + +### Metrics + +| Metric Name | Type | Labels | Description | +|-------------|------|--------|-------------| +| {metric_name} | {counter/gauge/histogram} | {label1, label2} | {what it measures} | + +### Logging + +| Event | Level | Fields | Description | +|-------|-------|--------|-------------| +| {event_name} | {INFO/WARN/ERROR} | {structured fields} | {when logged} | + +### Health Checks + +| Check | Endpoint | Interval | Failure Action | +|-------|----------|----------|----------------| +| {check_name} | {/health/xxx} | {duration} | {action on failure} | + +## Trust & Safety + +### Trust Levels + +| Level | Description | Approval Required | Allowed Operations | +|-------|-------------|-------------------|-------------------| +| High Trust | {description} | None | {operations} | +| Standard | {description} | {approval type} | {operations} | +| Low Trust | {description} | {approval type} | {operations} | + +### Security Controls + +{Detailed security controls beyond the basic auth covered in Security Architecture} + +## Implementation Guidance + +### Key Decisions for Implementers + +| Decision | Options | Recommendation | Rationale | +|----------|---------|---------------|-----------| +| {decision_area} | {option_1, option_2} | {recommended} | {why} | + +### Implementation Order + +1. {component/module 1}: {why first} +2. {component/module 2}: {depends on #1} + +### Testing Strategy + +| Layer | Scope | Tools | Coverage Target | +|-------|-------|-------|-----------------| +| Unit | {scope} | {tools} | {target} | +| Integration | {scope} | {tools} | {target} | +| E2E | {scope} | {tools} | {target} | + +## Risks & Mitigations + +| Risk | Impact | Probability | Mitigation | +|------|--------|-------------|------------| +| {risk} | High/Medium/Low | High/Medium/Low | {mitigation approach} | + +## Open Questions + +- [ ] {architectural question 1} +- [ ] {architectural question 2} + +## References + +- Derived from: [Requirements](../requirements/_index.md), [Product Brief](../product-brief.md) +- Next: [Epics & Stories](../epics/_index.md) +``` + +--- + +## Template: ADR-NNN-{slug}.md (Individual Architecture Decision Record) + +```markdown +--- +id: ADR-{NNN} +status: Accepted +traces_to: [{REQ-NNN}, {NFR-X-NNN}] +date: {timestamp} +--- + +# ADR-{NNN}: {decision_title} + +## Context + +{what is the situation that motivates this decision} + +## Decision + +{what is the chosen approach} + +## Alternatives Considered + +| Option | Pros | Cons | +|--------|------|------| +| {option_1 - chosen} | {pros} | {cons} | +| {option_2} | {pros} | {cons} | +| {option_3} | {pros} | {cons} | + +## Consequences + +- **Positive**: {positive outcomes} +- **Negative**: {tradeoffs accepted} +- **Risks**: {risks to monitor} + +## Traces + +- **Requirements**: [REQ-{NNN}](../requirements/REQ-{NNN}-{slug}.md), [NFR-X-{NNN}](../requirements/NFR-X-{NNN}-{slug}.md) +- **Implemented by**: [EPIC-{NNN}](../epics/EPIC-{NNN}-{slug}.md) (added in Phase 5) +``` + +--- + +## Variable Descriptions + +| Variable | Source | Description | +|----------|--------|-------------| +| `{session_id}` | spec-config.json | Session identifier | +| `{timestamp}` | Runtime | ISO8601 generation timestamp | +| `{product_name}` | product-brief.md | Product/feature name | +| `{NNN}` | Auto-increment | ADR/requirement number | +| `{slug}` | Auto-generated | Kebab-case from decision title | +| `{has_codebase}` | spec-config.json | Whether existing codebase exists | diff --git a/.codex/skills/spec-generator/templates/epics-template.md b/.codex/skills/spec-generator/templates/epics-template.md new file mode 100644 index 00000000..a05e67fc --- /dev/null +++ b/.codex/skills/spec-generator/templates/epics-template.md @@ -0,0 +1,209 @@ +# Epics & Stories Template (Directory Structure) + +Template for generating epic/story breakdown as a directory of individual Epic files in Phase 5. + +## Usage Context + +| Phase | Usage | +|-------|-------| +| Phase 5 (Epics & Stories) | Generate `epics/` directory from requirements decomposition | +| Output Location | `{workDir}/epics/` | + +## Output Structure + +``` +{workDir}/epics/ +├── _index.md # Overview table + dependency map + MVP scope + execution order +├── EPIC-001-{slug}.md # Individual Epic with its Stories +├── EPIC-002-{slug}.md +└── ... +``` + +--- + +## Template: _index.md + +```markdown +--- +session_id: {session_id} +phase: 5 +document_type: epics-index +status: draft +generated_at: {timestamp} +version: 1 +dependencies: + - ../spec-config.json + - ../product-brief.md + - ../requirements/_index.md + - ../architecture/_index.md +--- + +# Epics & Stories: {product_name} + +{executive_summary - overview of epic structure and MVP scope} + +## Epic Overview + +| Epic ID | Title | Priority | MVP | Stories | Est. Size | +|---------|-------|----------|-----|---------|-----------| +| [EPIC-001](EPIC-001-{slug}.md) | {title} | Must | Yes | {n} | {S/M/L/XL} | +| [EPIC-002](EPIC-002-{slug}.md) | {title} | Must | Yes | {n} | {S/M/L/XL} | +| [EPIC-003](EPIC-003-{slug}.md) | {title} | Should | No | {n} | {S/M/L/XL} | + +## Dependency Map + +```mermaid +graph LR + EPIC-001 --> EPIC-002 + EPIC-001 --> EPIC-003 + EPIC-002 --> EPIC-004 + EPIC-003 --> EPIC-005 +``` + +### Dependency Notes +{explanation of why these dependencies exist and suggested execution order} + +### Recommended Execution Order +1. [EPIC-{NNN}](EPIC-{NNN}-{slug}.md): {reason - foundational} +2. [EPIC-{NNN}](EPIC-{NNN}-{slug}.md): {reason - depends on #1} +3. ... + +## MVP Scope + +### MVP Epics +{list of epics included in MVP with justification, linking to each} + +### MVP Definition of Done +- [ ] {MVP completion criterion 1} +- [ ] {MVP completion criterion 2} +- [ ] {MVP completion criterion 3} + +## Traceability Matrix + +| Requirement | Epic | Stories | Architecture | +|-------------|------|---------|--------------| +| [REQ-001](../requirements/REQ-001-{slug}.md) | [EPIC-001](EPIC-001-{slug}.md) | STORY-001-001, STORY-001-002 | [ADR-001](../architecture/ADR-001-{slug}.md) | +| [REQ-002](../requirements/REQ-002-{slug}.md) | [EPIC-001](EPIC-001-{slug}.md) | STORY-001-003 | Component B | +| [REQ-003](../requirements/REQ-003-{slug}.md) | [EPIC-002](EPIC-002-{slug}.md) | STORY-002-001 | [ADR-002](../architecture/ADR-002-{slug}.md) | + +## Estimation Summary + +| Size | Meaning | Count | +|------|---------|-------| +| S | Small - well-understood, minimal risk | {n} | +| M | Medium - some complexity, moderate risk | {n} | +| L | Large - significant complexity, should consider splitting | {n} | +| XL | Extra Large - high complexity, must split before implementation | {n} | + +## Risks & Considerations + +| Risk | Affected Epics | Mitigation | +|------|---------------|------------| +| {risk description} | [EPIC-{NNN}](EPIC-{NNN}-{slug}.md) | {mitigation} | + +## Versioning & Changelog + +### Version Strategy +- **Versioning Scheme**: {semver/calver/custom} +- **Breaking Change Definition**: {what constitutes a breaking change} +- **Deprecation Policy**: {how deprecated features are handled} + +### Changelog + +| Version | Date | Type | Description | +|---------|------|------|-------------| +| {version} | {date} | {Added/Changed/Fixed/Removed} | {description} | + +## Open Questions + +- [ ] {question about scope or implementation 1} +- [ ] {question about scope or implementation 2} + +## References + +- Derived from: [Requirements](../requirements/_index.md), [Architecture](../architecture/_index.md) +- Handoff to: execution workflows (lite-plan, plan, req-plan) +``` + +--- + +## Template: EPIC-NNN-{slug}.md (Individual Epic) + +```markdown +--- +id: EPIC-{NNN} +priority: {Must|Should|Could} +mvp: {true|false} +size: {S|M|L|XL} +requirements: [REQ-{NNN}] +architecture: [ADR-{NNN}] +dependencies: [EPIC-{NNN}] +status: draft +--- + +# EPIC-{NNN}: {epic_title} + +**Priority**: {Must|Should|Could} +**MVP**: {Yes|No} +**Estimated Size**: {S|M|L|XL} + +## Description + +{detailed epic description} + +## Requirements + +- [REQ-{NNN}](../requirements/REQ-{NNN}-{slug}.md): {title} +- [REQ-{NNN}](../requirements/REQ-{NNN}-{slug}.md): {title} + +## Architecture + +- [ADR-{NNN}](../architecture/ADR-{NNN}-{slug}.md): {title} +- Component: {component_name} + +## Dependencies + +- [EPIC-{NNN}](EPIC-{NNN}-{slug}.md) (blocking): {reason} +- [EPIC-{NNN}](EPIC-{NNN}-{slug}.md) (soft): {reason} + +## Stories + +### STORY-{EPIC}-001: {story_title} + +**User Story**: As a {persona}, I want to {action} so that {benefit}. + +**Acceptance Criteria**: +- [ ] {criterion 1} +- [ ] {criterion 2} +- [ ] {criterion 3} + +**Size**: {S|M|L|XL} +**Traces to**: [REQ-{NNN}](../requirements/REQ-{NNN}-{slug}.md) + +--- + +### STORY-{EPIC}-002: {story_title} + +**User Story**: As a {persona}, I want to {action} so that {benefit}. + +**Acceptance Criteria**: +- [ ] {criterion 1} +- [ ] {criterion 2} + +**Size**: {S|M|L|XL} +**Traces to**: [REQ-{NNN}](../requirements/REQ-{NNN}-{slug}.md) +``` + +--- + +## Variable Descriptions + +| Variable | Source | Description | +|----------|--------|-------------| +| `{session_id}` | spec-config.json | Session identifier | +| `{timestamp}` | Runtime | ISO8601 generation timestamp | +| `{product_name}` | product-brief.md | Product/feature name | +| `{EPIC}` | Auto-increment | Epic number (3 digits) | +| `{NNN}` | Auto-increment | Story/requirement number | +| `{slug}` | Auto-generated | Kebab-case from epic/story title | +| `{S\|M\|L\|XL}` | CLI analysis | Relative size estimate | diff --git a/.codex/skills/spec-generator/templates/product-brief.md b/.codex/skills/spec-generator/templates/product-brief.md new file mode 100644 index 00000000..05b50440 --- /dev/null +++ b/.codex/skills/spec-generator/templates/product-brief.md @@ -0,0 +1,153 @@ +# Product Brief Template + +Template for generating product brief documents in Phase 2. + +## Usage Context + +| Phase | Usage | +|-------|-------| +| Phase 2 (Product Brief) | Generate product-brief.md from multi-CLI analysis | +| Output Location | `{workDir}/product-brief.md` | + +--- + +## Template + +```markdown +--- +session_id: {session_id} +phase: 2 +document_type: product-brief +status: draft +generated_at: {timestamp} +stepsCompleted: [] +version: 1 +dependencies: + - spec-config.json +--- + +# Product Brief: {product_name} + +{executive_summary - 2-3 sentences capturing the essence of the product/feature} + +## Concepts & Terminology + +| Term | Definition | Aliases | +|------|-----------|---------| +| {term_1} | {definition} | {comma-separated aliases if any} | +| {term_2} | {definition} | | + +{Note: All documents in this specification MUST use these terms consistently.} + +## Vision + +{vision_statement - clear, aspirational 1-3 sentence statement of what success looks like} + +## Problem Statement + +### Current Situation +{description of the current state and pain points} + +### Impact +{quantified impact of the problem - who is affected, how much, how often} + +## Target Users + +{for each user persona:} + +### {Persona Name} +- **Role**: {user's role/context} +- **Needs**: {primary needs related to this product} +- **Pain Points**: {current frustrations} +- **Success Criteria**: {what success looks like for this user} + +## Goals & Success Metrics + +| Goal ID | Goal | Success Metric | Target | +|---------|------|----------------|--------| +| G-001 | {goal description} | {measurable metric} | {specific target} | +| G-002 | {goal description} | {measurable metric} | {specific target} | + +## Scope + +### In Scope +- {feature/capability 1} +- {feature/capability 2} +- {feature/capability 3} + +### Out of Scope +- {explicitly excluded item 1} +- {explicitly excluded item 2} + +### Non-Goals + +{Explicit list of things this project will NOT do, with rationale for each:} + +| Non-Goal | Rationale | +|----------|-----------| +| {non_goal_1} | {why this is explicitly excluded} | +| {non_goal_2} | {why this is explicitly excluded} | + +### Assumptions +- {key assumption 1} +- {key assumption 2} + +## Competitive Landscape + +| Aspect | Current State | Proposed Solution | Advantage | +|--------|--------------|-------------------|-----------| +| {aspect} | {how it's done now} | {our approach} | {differentiator} | + +## Constraints & Dependencies + +### Technical Constraints +- {constraint 1} +- {constraint 2} + +### Business Constraints +- {constraint 1} + +### Dependencies +- {external dependency 1} +- {external dependency 2} + +## Multi-Perspective Synthesis + +### Product Perspective +{summary of product/market analysis findings} + +### Technical Perspective +{summary of technical feasibility and constraints} + +### User Perspective +{summary of user journey and UX considerations} + +### Convergent Themes +{themes where all perspectives agree} + +### Conflicting Views +{areas where perspectives differ, with notes on resolution approach} + +## Open Questions + +- [ ] {unresolved question 1} +- [ ] {unresolved question 2} + +## References + +- Derived from: [spec-config.json](spec-config.json) +- Next: [Requirements PRD](requirements.md) +``` + +## Variable Descriptions + +| Variable | Source | Description | +|----------|--------|-------------| +| `{session_id}` | spec-config.json | Session identifier | +| `{timestamp}` | Runtime | ISO8601 generation timestamp | +| `{product_name}` | Seed analysis | Product/feature name | +| `{executive_summary}` | CLI synthesis | 2-3 sentence summary | +| `{vision_statement}` | CLI product perspective | Aspirational vision | +| `{term_1}`, `{term_2}` | CLI synthesis | Domain terms with definitions and optional aliases | +| `{non_goal_1}`, `{non_goal_2}` | CLI synthesis | Explicit exclusions with rationale | +| All `{...}` fields | CLI analysis outputs | Filled from multi-perspective analysis | diff --git a/.codex/skills/spec-generator/templates/profiles/api-profile.md b/.codex/skills/spec-generator/templates/profiles/api-profile.md new file mode 100644 index 00000000..fe274449 --- /dev/null +++ b/.codex/skills/spec-generator/templates/profiles/api-profile.md @@ -0,0 +1,27 @@ +# API Spec Profile + +Defines additional required sections for API-type specifications. + +## Required Sections (in addition to base template) + +### In Architecture Document +- **Endpoint Definition**: MUST list all endpoints with method, path, auth, request/response schema +- **Authentication Model**: MUST define auth mechanism (OAuth2/JWT/API Key), token lifecycle +- **Rate Limiting**: MUST define rate limits per tier/endpoint, throttling behavior +- **Error Codes**: MUST define error response format, standard error codes with descriptions +- **API Versioning**: MUST define versioning strategy (URL/header/query), deprecation policy +- **Pagination**: SHOULD define pagination strategy for list endpoints +- **Idempotency**: SHOULD define idempotency requirements for write operations + +### In Requirements Document +- **Endpoint Acceptance Criteria**: Each requirement SHOULD map to specific endpoints +- **SLA Definitions**: MUST define response time, availability targets per endpoint tier + +### Quality Gate Additions +| Check | Criteria | Severity | +|-------|----------|----------| +| Endpoints documented | All endpoints with method + path | Error | +| Auth model defined | Authentication mechanism specified | Error | +| Error codes defined | Standard error format + codes | Warning | +| Rate limits defined | Per-endpoint or per-tier limits | Warning | +| API versioning strategy | Versioning approach specified | Warning | diff --git a/.codex/skills/spec-generator/templates/profiles/library-profile.md b/.codex/skills/spec-generator/templates/profiles/library-profile.md new file mode 100644 index 00000000..78189362 --- /dev/null +++ b/.codex/skills/spec-generator/templates/profiles/library-profile.md @@ -0,0 +1,25 @@ +# Library Spec Profile + +Defines additional required sections for library/SDK-type specifications. + +## Required Sections (in addition to base template) + +### In Architecture Document +- **Public API Surface**: MUST define all public interfaces with signatures, parameters, return types +- **Usage Examples**: MUST provide >= 3 code examples showing common usage patterns +- **Compatibility Matrix**: MUST define supported language versions, runtime environments +- **Dependency Policy**: MUST define transitive dependency policy, version constraints +- **Extension Points**: SHOULD define plugin/extension mechanisms if applicable +- **Bundle Size**: SHOULD define target bundle size and tree-shaking strategy + +### In Requirements Document +- **API Ergonomics**: Requirements SHOULD address developer experience and API consistency +- **Error Reporting**: MUST define error types, messages, and recovery hints for consumers + +### Quality Gate Additions +| Check | Criteria | Severity | +|-------|----------|----------| +| Public API documented | All public interfaces with types | Error | +| Usage examples | >= 3 working examples | Warning | +| Compatibility matrix | Supported environments listed | Warning | +| Dependency policy | Transitive deps strategy defined | Info | diff --git a/.codex/skills/spec-generator/templates/profiles/service-profile.md b/.codex/skills/spec-generator/templates/profiles/service-profile.md new file mode 100644 index 00000000..9fe915a1 --- /dev/null +++ b/.codex/skills/spec-generator/templates/profiles/service-profile.md @@ -0,0 +1,28 @@ +# Service Spec Profile + +Defines additional required sections for service-type specifications. + +## Required Sections (in addition to base template) + +### In Architecture Document +- **Concepts & Terminology**: MUST define all domain terms with consistent aliases +- **State Machine**: MUST include ASCII state diagram for each entity with a lifecycle +- **Configuration Model**: MUST define all configurable fields with types, defaults, constraints +- **Error Handling**: MUST define per-component error classification and recovery strategies +- **Observability**: MUST define >= 3 metrics, structured log format, health check endpoints +- **Trust & Safety**: SHOULD define trust levels and approval matrix +- **Graceful Shutdown**: MUST describe shutdown sequence and cleanup procedures +- **Implementation Guidance**: SHOULD provide implementation order and key decisions + +### In Requirements Document +- **Behavioral Constraints**: MUST use RFC 2119 keywords (MUST/SHOULD/MAY) for all requirements +- **Data Model**: MUST define core entities with field-level detail (type, constraint, relation) + +### Quality Gate Additions +| Check | Criteria | Severity | +|-------|----------|----------| +| State machine present | >= 1 lifecycle state diagram | Error | +| Configuration model | All config fields documented | Warning | +| Observability metrics | >= 3 metrics defined | Warning | +| Error handling defined | Per-component strategy | Warning | +| RFC keywords used | Behavioral requirements use MUST/SHOULD/MAY | Warning | diff --git a/.codex/skills/spec-generator/templates/requirements-prd.md b/.codex/skills/spec-generator/templates/requirements-prd.md new file mode 100644 index 00000000..0b1dbf28 --- /dev/null +++ b/.codex/skills/spec-generator/templates/requirements-prd.md @@ -0,0 +1,224 @@ +# Requirements PRD Template (Directory Structure) + +Template for generating Product Requirements Document as a directory of individual requirement files in Phase 3. + +## Usage Context + +| Phase | Usage | +|-------|-------| +| Phase 3 (Requirements) | Generate `requirements/` directory from product brief expansion | +| Output Location | `{workDir}/requirements/` | + +## Output Structure + +``` +{workDir}/requirements/ +├── _index.md # Summary + MoSCoW table + traceability matrix + links +├── REQ-001-{slug}.md # Individual functional requirement +├── REQ-002-{slug}.md +├── NFR-P-001-{slug}.md # Non-functional: Performance +├── NFR-S-001-{slug}.md # Non-functional: Security +├── NFR-SC-001-{slug}.md # Non-functional: Scalability +├── NFR-U-001-{slug}.md # Non-functional: Usability +└── ... +``` + +--- + +## Template: _index.md + +```markdown +--- +session_id: {session_id} +phase: 3 +document_type: requirements-index +status: draft +generated_at: {timestamp} +version: 1 +dependencies: + - ../spec-config.json + - ../product-brief.md +--- + +# Requirements: {product_name} + +{executive_summary - brief overview of what this PRD covers and key decisions} + +## Requirement Summary + +| Priority | Count | Coverage | +|----------|-------|----------| +| Must Have | {n} | {description of must-have scope} | +| Should Have | {n} | {description of should-have scope} | +| Could Have | {n} | {description of could-have scope} | +| Won't Have | {n} | {description of explicitly excluded} | + +## Functional Requirements + +| ID | Title | Priority | Traces To | +|----|-------|----------|-----------| +| [REQ-001](REQ-001-{slug}.md) | {title} | Must | [G-001](../product-brief.md#goals--success-metrics) | +| [REQ-002](REQ-002-{slug}.md) | {title} | Must | [G-001](../product-brief.md#goals--success-metrics) | +| [REQ-003](REQ-003-{slug}.md) | {title} | Should | [G-002](../product-brief.md#goals--success-metrics) | + +## Non-Functional Requirements + +### Performance + +| ID | Title | Target | +|----|-------|--------| +| [NFR-P-001](NFR-P-001-{slug}.md) | {title} | {target value} | + +### Security + +| ID | Title | Standard | +|----|-------|----------| +| [NFR-S-001](NFR-S-001-{slug}.md) | {title} | {standard/framework} | + +### Scalability + +| ID | Title | Target | +|----|-------|--------| +| [NFR-SC-001](NFR-SC-001-{slug}.md) | {title} | {target value} | + +### Usability + +| ID | Title | Target | +|----|-------|--------| +| [NFR-U-001](NFR-U-001-{slug}.md) | {title} | {target value} | + +## Data Requirements + +### Data Entities + +| Entity | Description | Key Attributes | +|--------|-------------|----------------| +| {entity_name} | {description} | {attr1, attr2, attr3} | + +### Data Flows + +{description of key data flows, optionally with Mermaid diagram} + +## Integration Requirements + +| System | Direction | Protocol | Data Format | Notes | +|--------|-----------|----------|-------------|-------| +| {system_name} | Inbound/Outbound/Both | {REST/gRPC/etc} | {JSON/XML/etc} | {notes} | + +## Constraints & Assumptions + +### Constraints +- {technical or business constraint 1} +- {technical or business constraint 2} + +### Assumptions +- {assumption 1 - must be validated} +- {assumption 2 - must be validated} + +## Priority Rationale + +{explanation of MoSCoW prioritization decisions, especially for Should/Could boundaries} + +## Traceability Matrix + +| Goal | Requirements | +|------|-------------| +| G-001 | [REQ-001](REQ-001-{slug}.md), [REQ-002](REQ-002-{slug}.md), [NFR-P-001](NFR-P-001-{slug}.md) | +| G-002 | [REQ-003](REQ-003-{slug}.md), [NFR-S-001](NFR-S-001-{slug}.md) | + +## Open Questions + +- [ ] {unresolved question 1} +- [ ] {unresolved question 2} + +## References + +- Derived from: [Product Brief](../product-brief.md) +- Next: [Architecture](../architecture/_index.md) +``` + +--- + +## Template: REQ-NNN-{slug}.md (Individual Functional Requirement) + +```markdown +--- +id: REQ-{NNN} +type: functional +priority: {Must|Should|Could|Won't} +traces_to: [G-{NNN}] +status: draft +--- + +# REQ-{NNN}: {requirement_title} + +**Priority**: {Must|Should|Could|Won't} + +## Description + +{detailed requirement description} + +## User Story + +As a {persona}, I want to {action} so that {benefit}. + +## Acceptance Criteria + +- [ ] {specific, testable criterion 1} +- [ ] {specific, testable criterion 2} +- [ ] {specific, testable criterion 3} + +## Traces + +- **Goal**: [G-{NNN}](../product-brief.md#goals--success-metrics) +- **Architecture**: [ADR-{NNN}](../architecture/ADR-{NNN}-{slug}.md) (if applicable) +- **Implemented by**: [EPIC-{NNN}](../epics/EPIC-{NNN}-{slug}.md) (added in Phase 5) +``` + +--- + +## Template: NFR-{type}-NNN-{slug}.md (Individual Non-Functional Requirement) + +```markdown +--- +id: NFR-{type}-{NNN} +type: non-functional +category: {Performance|Security|Scalability|Usability} +priority: {Must|Should|Could} +status: draft +--- + +# NFR-{type}-{NNN}: {requirement_title} + +**Category**: {Performance|Security|Scalability|Usability} +**Priority**: {Must|Should|Could} + +## Requirement + +{detailed requirement description} + +## Metric & Target + +| Metric | Target | Measurement Method | +|--------|--------|--------------------| +| {metric} | {target value} | {how measured} | + +## Traces + +- **Goal**: [G-{NNN}](../product-brief.md#goals--success-metrics) +- **Architecture**: [ADR-{NNN}](../architecture/ADR-{NNN}-{slug}.md) (if applicable) +``` + +--- + +## Variable Descriptions + +| Variable | Source | Description | +|----------|--------|-------------| +| `{session_id}` | spec-config.json | Session identifier | +| `{timestamp}` | Runtime | ISO8601 generation timestamp | +| `{product_name}` | product-brief.md | Product/feature name | +| `{NNN}` | Auto-increment | Requirement number (zero-padded 3 digits) | +| `{slug}` | Auto-generated | Kebab-case from requirement title | +| `{type}` | Category | P (Performance), S (Security), SC (Scalability), U (Usability) | +| `{Must\|Should\|Could\|Won't}` | User input / auto | MoSCoW priority tag | diff --git a/.codex/skills/team-arch-opt/SKILL.md b/.codex/skills/team-arch-opt/SKILL.md new file mode 100644 index 00000000..ed6aaefb --- /dev/null +++ b/.codex/skills/team-arch-opt/SKILL.md @@ -0,0 +1,660 @@ +--- +name: team-arch-opt +description: Architecture optimization team skill. Analyzes codebase architecture, designs refactoring plans, implements changes, validates improvements, and reviews code quality via CSV wave pipeline with interactive review-fix cycles. +argument-hint: "[-y|--yes] [-c|--concurrency N] [--continue] \"architecture optimization task description\"" +allowed-tools: spawn_agents_on_csv, spawn_agent, wait, send_input, close_agent, Read, Write, Edit, Bash, Glob, Grep, AskUserQuestion +--- + +## Auto Mode + +When `--yes` or `-y`: Auto-confirm task decomposition, skip interactive validation, use defaults. + +# Team Architecture Optimization + +## Usage + +```bash +$team-arch-opt "Refactor the auth module to reduce coupling and eliminate circular dependencies" +$team-arch-opt -c 4 "Analyze and fix God Classes across the service layer" +$team-arch-opt -y "Remove dead code and clean up barrel exports in src/utils" +$team-arch-opt --continue "tao-refactor-auth-20260308" +``` + +**Flags**: +- `-y, --yes`: Skip all confirmations (auto mode) +- `-c, --concurrency N`: Max concurrent agents within each wave (default: 3) +- `--continue`: Resume existing session + +**Output Directory**: `.workflow/.csv-wave/{session-id}/` +**Core Output**: `tasks.csv` (master state) + `results.csv` (final) + `discoveries.ndjson` (shared exploration) + `context.md` (human-readable report) + +--- + +## Overview + +Orchestrate multi-agent architecture optimization: analyze codebase structure, design refactoring plan, implement changes, validate improvements, review code quality. The pipeline has five domain roles (analyzer, designer, refactorer, validator, reviewer) mapped to CSV wave stages with an interactive review-fix cycle. + +**Execution Model**: Hybrid -- CSV wave pipeline (primary) + individual agent spawn (secondary) + +``` ++-------------------------------------------------------------------+ +| TEAM ARCHITECTURE OPTIMIZATION WORKFLOW | ++-------------------------------------------------------------------+ +| | +| Phase 0: Pre-Wave Interactive (Requirement Clarification) | +| +- Parse user task description | +| +- Detect scope: targeted module vs full architecture | +| +- Clarify ambiguous requirements (AskUserQuestion) | +| +- Output: refined requirements for decomposition | +| | +| Phase 1: Requirement -> CSV + Classification | +| +- Identify architecture issues to target | +| +- Build 5-stage pipeline (analyze->design->refactor->validate | +| | +review) | +| +- Classify tasks: csv-wave | interactive (exec_mode) | +| +- Compute dependency waves (topological sort) | +| +- Generate tasks.csv with wave + exec_mode columns | +| +- User validates task breakdown (skip if -y) | +| | +| Phase 2: Wave Execution Engine (Extended) | +| +- For each wave (1..N): | +| | +- Execute pre-wave interactive tasks (if any) | +| | +- Build wave CSV (filter csv-wave tasks for this wave) | +| | +- Inject previous findings into prev_context column | +| | +- spawn_agents_on_csv(wave CSV) | +| | +- Execute post-wave interactive tasks (if any) | +| | +- Merge all results into master tasks.csv | +| | +- Check: any failed? -> skip dependents | +| +- discoveries.ndjson shared across all modes (append-only) | +| +- Review-fix cycle: max 3 iterations per branch | +| | +| Phase 3: Post-Wave Interactive (Completion Action) | +| +- Pipeline completion report with improvement metrics | +| +- Interactive completion choice (Archive/Keep/Export) | +| +- Final aggregation / report | +| | +| Phase 4: Results Aggregation | +| +- Export final results.csv | +| +- Generate context.md with all findings | +| +- Display summary: completed/failed/skipped per wave | +| +- Offer: view results | retry failed | done | +| | ++-------------------------------------------------------------------+ +``` + +--- + +## Pipeline Definition + +``` +Stage 1 Stage 2 Stage 3 Stage 4 +ANALYZE-001 --> DESIGN-001 --> REFACTOR-001 --> VALIDATE-001 +[analyzer] [designer] [refactorer] [validator] + ^ | + +<-- FIX-001 ----+ + | REVIEW-001 + +<--------> [reviewer] + (max 3 iterations) +``` + +--- + +## Task Classification Rules + +Each task is classified by `exec_mode`: + +| exec_mode | Mechanism | Criteria | +|-----------|-----------|----------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot, structured I/O, no multi-round interaction | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round, revision cycles, user checkpoints | + +**Classification Decision**: + +| Task Property | Classification | +|---------------|---------------| +| Architecture analysis (single-pass scan) | `csv-wave` | +| Refactoring plan design (single-pass) | `csv-wave` | +| Code refactoring implementation | `csv-wave` | +| Validation (build, test, metrics) | `csv-wave` | +| Code review (single-pass) | `csv-wave` | +| Review-fix cycle (iterative revision) | `interactive` | +| User checkpoint (plan approval) | `interactive` | +| Discussion round (DISCUSS-REFACTOR, DISCUSS-REVIEW) | `interactive` | + +--- + +## CSV Schema + +### tasks.csv (Master State) + +```csv +id,title,description,role,issue_type,priority,target_files,deps,context_from,exec_mode,wave,status,findings,verdict,artifacts_produced,error +"ANALYZE-001","Analyze architecture","Analyze codebase architecture to identify structural issues: cycles, coupling, cohesion, God Classes, dead code, API bloat. Produce baseline metrics and ranked report.","analyzer","","","","","","csv-wave","1","pending","","","","" +"DESIGN-001","Design refactoring plan","Analyze architecture report to design prioritized refactoring plan with strategies, expected improvements, and risk assessments.","designer","","","","ANALYZE-001","ANALYZE-001","csv-wave","2","pending","","","","" +"REFACTOR-001","Implement refactorings","Implement architecture refactoring changes following design plan in priority order (P0 first).","refactorer","","","","DESIGN-001","DESIGN-001","csv-wave","3","pending","","","","" +"VALIDATE-001","Validate changes","Validate refactoring: build checks, test suite, dependency metrics, API compatibility.","validator","","","","REFACTOR-001","REFACTOR-001","csv-wave","4","pending","","PASS","","" +"REVIEW-001","Review refactoring code","Review refactoring changes for correctness, patterns, completeness, migration safety, best practices.","reviewer","","","","REFACTOR-001","REFACTOR-001","csv-wave","4","pending","","APPROVE","","" +``` + +**Columns**: + +| Column | Phase | Description | +|--------|-------|-------------| +| `id` | Input | Unique task identifier (PREFIX-NNN format) | +| `title` | Input | Short task title | +| `description` | Input | Detailed task description (self-contained) | +| `role` | Input | Worker role: analyzer, designer, refactorer, validator, reviewer | +| `issue_type` | Input | Architecture issue category: CYCLE, COUPLING, COHESION, GOD_CLASS, DUPLICATION, LAYER_VIOLATION, DEAD_CODE, API_BLOAT | +| `priority` | Input | P0 (Critical), P1 (High), P2 (Medium), P3 (Low) | +| `target_files` | Input | Semicolon-separated file paths to focus on | +| `deps` | Input | Semicolon-separated dependency task IDs | +| `context_from` | Input | Semicolon-separated task IDs whose findings this task needs | +| `exec_mode` | Input | `csv-wave` or `interactive` | +| `wave` | Computed | Wave number (computed by topological sort, 1-based) | +| `status` | Output | `pending` -> `completed` / `failed` / `skipped` | +| `findings` | Output | Key discoveries or implementation notes (max 500 chars) | +| `verdict` | Output | Validation/review verdict: PASS, WARN, FAIL, APPROVE, REVISE, REJECT | +| `artifacts_produced` | Output | Semicolon-separated paths of produced artifacts | +| `error` | Output | Error message if failed (empty if success) | + +### Per-Wave CSV (Temporary) + +Each wave generates a temporary `wave-{N}.csv` with extra `prev_context` column (csv-wave tasks only). + +--- + +## Agent Registry (Interactive Agents) + +| Agent | Role File | Pattern | Responsibility | Position | +|-------|-----------|---------|----------------|----------| +| Plan Reviewer | agents/plan-reviewer.md | 2.3 (send_input cycle) | Review architecture report or refactoring plan at user checkpoint | pre-wave | +| Fix Cycle Handler | agents/fix-cycle-handler.md | 2.3 (send_input cycle) | Manage review-fix iteration cycle (max 3 rounds) | post-wave | +| Completion Handler | agents/completion-handler.md | 2.3 (send_input cycle) | Handle pipeline completion action (Archive/Keep/Export) | standalone | + +> **COMPACT PROTECTION**: Agent files are execution documents. When context compression occurs, **you MUST immediately `Read` the corresponding agent.md** to reload. + +--- + +## Output Artifacts + +| File | Purpose | Lifecycle | +|------|---------|-----------| +| `tasks.csv` | Master state -- all tasks with status/findings | Updated after each wave | +| `wave-{N}.csv` | Per-wave input (temporary, csv-wave tasks only) | Created before wave, deleted after | +| `results.csv` | Final export of all task results | Created in Phase 4 | +| `discoveries.ndjson` | Shared exploration board (all agents, both modes) | Append-only, carries across waves | +| `context.md` | Human-readable execution report | Created in Phase 4 | +| `task-analysis.json` | Phase 1 output: scope, issues, pipeline config | Created in Phase 1 | +| `artifacts/architecture-baseline.json` | Analyzer: pre-refactoring metrics | Created by analyzer | +| `artifacts/architecture-report.md` | Analyzer: ranked structural issue findings | Created by analyzer | +| `artifacts/refactoring-plan.md` | Designer: prioritized refactoring plan | Created by designer | +| `artifacts/validation-results.json` | Validator: post-refactoring validation | Created by validator | +| `artifacts/review-report.md` | Reviewer: code review findings | Created by reviewer | +| `interactive/{id}-result.json` | Results from interactive tasks | Created per interactive task | + +--- + +## Session Structure + +``` +.workflow/.csv-wave/{session-id}/ ++-- tasks.csv # Master state (all tasks, both modes) ++-- results.csv # Final results export ++-- discoveries.ndjson # Shared discovery board (all agents) ++-- context.md # Human-readable report ++-- task-analysis.json # Phase 1 analysis output ++-- wave-{N}.csv # Temporary per-wave input (csv-wave only) ++-- artifacts/ +| +-- architecture-baseline.json # Analyzer output +| +-- architecture-report.md # Analyzer output +| +-- refactoring-plan.md # Designer output +| +-- validation-results.json # Validator output +| +-- review-report.md # Reviewer output ++-- interactive/ # Interactive task artifacts +| +-- {id}-result.json ++-- wisdom/ + +-- patterns.md # Discovered patterns and conventions +``` + +--- + +## Implementation + +### Session Initialization + +```javascript +const getUtc8ISOString = () => new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString() + +const AUTO_YES = $ARGUMENTS.includes('--yes') || $ARGUMENTS.includes('-y') +const continueMode = $ARGUMENTS.includes('--continue') +const concurrencyMatch = $ARGUMENTS.match(/(?:--concurrency|-c)\s+(\d+)/) +const maxConcurrency = concurrencyMatch ? parseInt(concurrencyMatch[1]) : 3 + +const requirement = $ARGUMENTS + .replace(/--yes|-y|--continue|--concurrency\s+\d+|-c\s+\d+/g, '') + .trim() + +const slug = requirement.toLowerCase() + .replace(/[^a-z0-9\u4e00-\u9fa5]+/g, '-') + .substring(0, 40) +const dateStr = getUtc8ISOString().substring(0, 10).replace(/-/g, '') +const sessionId = `tao-${slug}-${dateStr}` +const sessionFolder = `.workflow/.csv-wave/${sessionId}` + +Bash(`mkdir -p ${sessionFolder}/artifacts ${sessionFolder}/interactive ${sessionFolder}/wisdom`) + +// Initialize discoveries.ndjson +Write(`${sessionFolder}/discoveries.ndjson`, '') + +// Initialize wisdom +Write(`${sessionFolder}/wisdom/patterns.md`, '# Patterns & Conventions\n') +``` + +--- + +### Phase 0: Pre-Wave Interactive (Requirement Clarification) + +**Objective**: Parse user task, detect architecture scope, clarify ambiguities, prepare for decomposition. + +**Workflow**: + +1. **Parse user task description** from $ARGUMENTS + +2. **Check for existing sessions** (continue mode): + - Scan `.workflow/.csv-wave/tao-*/tasks.csv` for sessions with pending tasks + - If `--continue`: resume the specified or most recent session, skip to Phase 2 + - If active session found: ask user whether to resume or start new + +3. **Identify architecture optimization target**: + +| Signal | Target | +|--------|--------| +| Specific file/module mentioned | Scoped refactoring | +| "coupling", "dependency", "structure", generic | Full architecture analysis | +| Specific issue (cycles, God Class, duplication) | Targeted issue resolution | + +4. **Clarify if ambiguous** (skip if AUTO_YES): + ```javascript + AskUserQuestion({ + questions: [{ + question: "Please confirm the architecture optimization scope:", + header: "Architecture Scope", + multiSelect: false, + options: [ + { label: "Proceed as described", description: "Scope is clear" }, + { label: "Narrow scope", description: "Specify modules/files to focus on" }, + { label: "Add constraints", description: "Exclude areas, set priorities" } + ] + }] + }) + ``` + +5. **Output**: Refined requirement string for Phase 1 + +**Success Criteria**: +- Refined requirements available for Phase 1 decomposition +- Existing session detected and handled if applicable + +--- + +### Phase 1: Requirement -> CSV + Classification + +**Objective**: Decompose architecture optimization task into the 5-stage pipeline tasks, assign waves, generate tasks.csv. + +**Decomposition Rules**: + +1. **Stage mapping** -- architecture optimization always follows this pipeline: + +| Stage | Role | Task Prefix | Wave | Description | +|-------|------|-------------|------|-------------| +| 1 | analyzer | ANALYZE | 1 | Scan codebase, identify structural issues, produce baseline metrics | +| 2 | designer | DESIGN | 2 | Design refactoring plan from architecture report | +| 3 | refactorer | REFACTOR | 3 | Implement refactorings per plan priority | +| 4a | validator | VALIDATE | 4 | Validate build, tests, metrics, API compatibility | +| 4b | reviewer | REVIEW | 4 | Review refactoring code for correctness and patterns | + +2. **Single-pipeline decomposition**: Generate one task per stage with sequential dependencies: + - ANALYZE-001 (wave 1, no deps) + - DESIGN-001 (wave 2, deps: ANALYZE-001) + - REFACTOR-001 (wave 3, deps: DESIGN-001) + - VALIDATE-001 (wave 4, deps: REFACTOR-001) + - REVIEW-001 (wave 4, deps: REFACTOR-001) + +3. **Description enrichment**: Each task description must be self-contained with: + - Clear goal statement + - Input artifacts to read + - Output artifacts to produce + - Success criteria + - Session folder path + +**Classification Rules**: + +| Task Property | exec_mode | +|---------------|-----------| +| ANALYZE, DESIGN, REFACTOR, VALIDATE, REVIEW (initial pass) | `csv-wave` | +| FIX tasks (review-fix cycle) | `interactive` (handled by fix-cycle-handler agent) | + +**Wave Computation**: Kahn's BFS topological sort with depth tracking (csv-wave tasks only). + +**User Validation**: Display task breakdown with wave + exec_mode assignment (skip if AUTO_YES). + +**Success Criteria**: +- tasks.csv created with valid schema, wave, and exec_mode assignments +- task-analysis.json written with scope and pipeline config +- No circular dependencies +- User approved (or AUTO_YES) + +--- + +### Phase 2: Wave Execution Engine (Extended) + +**Objective**: Execute tasks wave-by-wave with hybrid mechanism support and cross-wave context propagation. + +```javascript +const masterCsv = Read(`${sessionFolder}/tasks.csv`) +let tasks = parseCsv(masterCsv) +const maxWave = Math.max(...tasks.map(t => t.wave)) + +for (let wave = 1; wave <= maxWave; wave++) { + console.log(`\nWave ${wave}/${maxWave}`) + + // 1. Separate tasks by exec_mode + const waveTasks = tasks.filter(t => t.wave === wave && t.status === 'pending') + const csvTasks = waveTasks.filter(t => t.exec_mode === 'csv-wave') + const interactiveTasks = waveTasks.filter(t => t.exec_mode === 'interactive') + + // 2. Check dependencies -- skip tasks whose deps failed + for (const task of waveTasks) { + const depIds = (task.deps || '').split(';').filter(Boolean) + const depStatuses = depIds.map(id => tasks.find(t => t.id === id)?.status) + if (depStatuses.some(s => s === 'failed' || s === 'skipped')) { + task.status = 'skipped' + task.error = `Dependency failed: ${depIds.filter((id, i) => + ['failed','skipped'].includes(depStatuses[i])).join(', ')}` + } + } + + // 3. Execute pre-wave interactive tasks (if any) + for (const task of interactiveTasks.filter(t => t.status === 'pending')) { + // Determine agent file based on task type + const agentFile = task.id.startsWith('FIX') ? 'agents/fix-cycle-handler.md' : 'agents/plan-reviewer.md' + Read(agentFile) + + const agent = spawn_agent({ + message: `## TASK ASSIGNMENT\n\n### MANDATORY FIRST STEPS\n1. Read: ${agentFile}\n2. Read: ${sessionFolder}/discoveries.ndjson\n3. Read: .workflow/project-tech.json (if exists)\n\n---\n\nGoal: ${task.description}\nScope: ${task.title}\nSession: ${sessionFolder}\n\n### Previous Context\n${buildPrevContext(task, tasks)}` + }) + const result = wait({ ids: [agent], timeout_ms: 600000 }) + if (result.timed_out) { + send_input({ id: agent, message: "Please finalize and output current findings." }) + wait({ ids: [agent], timeout_ms: 120000 }) + } + Write(`${sessionFolder}/interactive/${task.id}-result.json`, JSON.stringify({ + task_id: task.id, status: "completed", findings: parseFindings(result), + timestamp: getUtc8ISOString() + })) + close_agent({ id: agent }) + task.status = 'completed' + task.findings = parseFindings(result) + } + + // 4. Build prev_context for csv-wave tasks + const pendingCsvTasks = csvTasks.filter(t => t.status === 'pending') + for (const task of pendingCsvTasks) { + task.prev_context = buildPrevContext(task, tasks) + } + + if (pendingCsvTasks.length > 0) { + // 5. Write wave CSV + Write(`${sessionFolder}/wave-${wave}.csv`, toCsv(pendingCsvTasks)) + + // 6. Determine instruction -- read from instructions/agent-instruction.md + Read('instructions/agent-instruction.md') + + // 7. Execute wave via spawn_agents_on_csv + spawn_agents_on_csv({ + csv_path: `${sessionFolder}/wave-${wave}.csv`, + id_column: "id", + instruction: archOptInstruction, // from instructions/agent-instruction.md + max_concurrency: maxConcurrency, + max_runtime_seconds: 900, + output_csv_path: `${sessionFolder}/wave-${wave}-results.csv`, + output_schema: { + type: "object", + properties: { + id: { type: "string" }, + status: { type: "string", enum: ["completed", "failed"] }, + findings: { type: "string" }, + verdict: { type: "string" }, + artifacts_produced: { type: "string" }, + error: { type: "string" } + } + } + }) + + // 8. Merge results into master CSV + const results = parseCsv(Read(`${sessionFolder}/wave-${wave}-results.csv`)) + for (const r of results) { + const t = tasks.find(t => t.id === r.id) + if (t) Object.assign(t, r) + } + } + + // 9. Update master CSV + Write(`${sessionFolder}/tasks.csv`, toCsv(tasks)) + + // 10. Cleanup temp files + Bash(`rm -f ${sessionFolder}/wave-${wave}.csv ${sessionFolder}/wave-${wave}-results.csv`) + + // 11. Post-wave: check for review-fix cycle + const validateTask = tasks.find(t => t.id.startsWith('VALIDATE') && t.wave === wave) + const reviewTask = tasks.find(t => t.id.startsWith('REVIEW') && t.wave === wave) + + if ((validateTask?.verdict === 'FAIL' || reviewTask?.verdict === 'REVISE' || reviewTask?.verdict === 'REJECT')) { + const fixCycleCount = tasks.filter(t => t.id.startsWith('FIX')).length + if (fixCycleCount < 3) { + // Create FIX task, add to tasks, re-run refactor -> validate+review cycle + const fixId = `FIX-${String(fixCycleCount + 1).padStart(3, '0')}` + const feedback = [validateTask?.error, reviewTask?.findings].filter(Boolean).join('\n') + tasks.push({ + id: fixId, title: `Fix issues from review/validation cycle ${fixCycleCount + 1}`, + description: `Fix issues found:\n${feedback}`, + role: 'refactorer', issue_type: '', priority: 'P0', target_files: '', + deps: '', context_from: '', exec_mode: 'interactive', + wave: wave + 1, status: 'pending', findings: '', verdict: '', + artifacts_produced: '', error: '' + }) + } + } + + // 12. Display wave summary + const completed = waveTasks.filter(t => t.status === 'completed').length + const failed = waveTasks.filter(t => t.status === 'failed').length + const skipped = waveTasks.filter(t => t.status === 'skipped').length + console.log(`Wave ${wave} Complete: ${completed} completed, ${failed} failed, ${skipped} skipped`) +} +``` + +**Success Criteria**: +- All waves executed in order +- Both csv-wave and interactive tasks handled per wave +- Each wave's results merged into master CSV before next wave starts +- Dependent tasks skipped when predecessor failed +- Review-fix cycle handled with max 3 iterations +- discoveries.ndjson accumulated across all waves and mechanisms + +--- + +### Phase 3: Post-Wave Interactive (Completion Action) + +**Objective**: Pipeline completion report with architecture improvement metrics and interactive completion choice. + +```javascript +// 1. Generate pipeline summary +const tasks = parseCsv(Read(`${sessionFolder}/tasks.csv`)) +const completed = tasks.filter(t => t.status === 'completed') +const failed = tasks.filter(t => t.status === 'failed') + +// 2. Load improvement metrics from validation results +let improvements = '' +try { + const validation = JSON.parse(Read(`${sessionFolder}/artifacts/validation-results.json`)) + improvements = `Architecture Improvements:\n${validation.dimensions.map(d => + ` ${d.name}: ${d.baseline} -> ${d.current} (${d.improvement})`).join('\n')}` +} catch {} + +console.log(` +============================================ +ARCHITECTURE OPTIMIZATION COMPLETE + +Deliverables: + - Architecture Baseline: artifacts/architecture-baseline.json + - Architecture Report: artifacts/architecture-report.md + - Refactoring Plan: artifacts/refactoring-plan.md + - Validation Results: artifacts/validation-results.json + - Review Report: artifacts/review-report.md + +${improvements} + +Pipeline: ${completed.length}/${tasks.length} tasks +Session: ${sessionFolder} +============================================ +`) + +// 3. Completion action +if (!AUTO_YES) { + AskUserQuestion({ + questions: [{ + question: "Architecture optimization complete. What would you like to do?", + header: "Completion", + multiSelect: false, + options: [ + { label: "Archive & Clean (Recommended)", description: "Archive session, output final summary" }, + { label: "Keep Active", description: "Keep session for follow-up work" }, + { label: "Retry Failed", description: "Re-run failed tasks" } + ] + }] + }) +} +``` + +**Success Criteria**: +- Post-wave interactive processing complete +- User informed of results and improvement metrics + +--- + +### Phase 4: Results Aggregation + +**Objective**: Generate final results and human-readable report. + +```javascript +// 1. Export results.csv +Bash(`cp ${sessionFolder}/tasks.csv ${sessionFolder}/results.csv`) + +// 2. Generate context.md +const tasks = parseCsv(Read(`${sessionFolder}/tasks.csv`)) +let contextMd = `# Architecture Optimization Report\n\n` +contextMd += `**Session**: ${sessionId}\n` +contextMd += `**Date**: ${getUtc8ISOString().substring(0, 10)}\n\n` + +contextMd += `## Summary\n` +contextMd += `| Status | Count |\n|--------|-------|\n` +contextMd += `| Completed | ${tasks.filter(t => t.status === 'completed').length} |\n` +contextMd += `| Failed | ${tasks.filter(t => t.status === 'failed').length} |\n` +contextMd += `| Skipped | ${tasks.filter(t => t.status === 'skipped').length} |\n\n` + +contextMd += `## Deliverables\n\n` +contextMd += `| Artifact | Path |\n|----------|------|\n` +contextMd += `| Architecture Baseline | artifacts/architecture-baseline.json |\n` +contextMd += `| Architecture Report | artifacts/architecture-report.md |\n` +contextMd += `| Refactoring Plan | artifacts/refactoring-plan.md |\n` +contextMd += `| Validation Results | artifacts/validation-results.json |\n` +contextMd += `| Review Report | artifacts/review-report.md |\n\n` + +const maxWave = Math.max(...tasks.map(t => t.wave)) +contextMd += `## Wave Execution\n\n` +for (let w = 1; w <= maxWave; w++) { + const waveTasks = tasks.filter(t => t.wave === w) + contextMd += `### Wave ${w}\n\n` + for (const t of waveTasks) { + const icon = t.status === 'completed' ? '[DONE]' : t.status === 'failed' ? '[FAIL]' : '[SKIP]' + contextMd += `${icon} **${t.title}** [${t.role}] ${t.verdict ? `(${t.verdict})` : ''} ${t.findings || ''}\n\n` + } +} + +Write(`${sessionFolder}/context.md`, contextMd) + +console.log(`Results exported to: ${sessionFolder}/results.csv`) +console.log(`Report generated at: ${sessionFolder}/context.md`) +``` + +**Success Criteria**: +- results.csv exported (all tasks, both modes) +- context.md generated with deliverables list +- Summary displayed to user + +--- + +## Shared Discovery Board Protocol + +All agents (csv-wave and interactive) share a single `discoveries.ndjson` file for cross-task knowledge exchange. + +**Format**: One JSON object per line (NDJSON): + +```jsonl +{"ts":"2026-03-08T10:00:00Z","worker":"ANALYZE-001","type":"cycle_found","data":{"modules":["auth","user"],"depth":2,"description":"Circular dependency between auth and user modules"}} +{"ts":"2026-03-08T10:05:00Z","worker":"REFACTOR-001","type":"file_modified","data":{"file":"src/auth/index.ts","change":"Extracted interface to break cycle","lines_added":15}} +``` + +**Discovery Types**: + +| Type | Data Schema | Description | +|------|-------------|-------------| +| `cycle_found` | `{modules, depth, description}` | Circular dependency detected | +| `god_class_found` | `{file, loc, methods, description}` | God Class/Module identified | +| `coupling_issue` | `{module, fan_in, fan_out, description}` | High coupling detected | +| `dead_code_found` | `{file, type, description}` | Dead code or dead export found | +| `file_modified` | `{file, change, lines_added}` | File change recorded | +| `pattern_found` | `{pattern_name, location, description}` | Code pattern identified | +| `metric_measured` | `{metric, value, unit, module}` | Architecture metric measured | +| `artifact_produced` | `{name, path, producer, type}` | Deliverable created | + +**Protocol**: +1. Agents MUST read discoveries.ndjson at start of execution +2. Agents MUST append relevant discoveries during execution +3. Agents MUST NOT modify or delete existing entries +4. Deduplication by `{type, data.file}` or `{type, data.modules}` key + +--- + +## Error Handling + +| Error | Resolution | +|-------|------------| +| Circular dependency in tasks | Detect in wave computation, abort with error message | +| CSV agent timeout | Mark as failed in results, continue with wave | +| CSV agent failed | Mark as failed, skip dependent tasks in later waves | +| Interactive agent timeout | Urge convergence via send_input, then close if still timed out | +| Interactive agent failed | Mark as failed, skip dependents | +| All agents in wave failed | Log error, offer retry or abort | +| CSV parse error | Validate CSV format before execution, show line number | +| discoveries.ndjson corrupt | Ignore malformed lines, continue with valid entries | +| Review-fix cycle exceeds 3 iterations | Escalate to user with summary of remaining issues | +| Validation fails on build | Create FIX task with compilation error details | +| Architecture baseline unavailable | Fall back to static analysis estimates | +| Continue mode: no session found | List available sessions, prompt user to select | + +--- + +## Core Rules + +1. **Start Immediately**: First action is session initialization, then Phase 0/1 +2. **Wave Order is Sacred**: Never execute wave N before wave N-1 completes and results are merged +3. **CSV is Source of Truth**: Master tasks.csv holds all state (both csv-wave and interactive) +4. **CSV First**: Default to csv-wave for tasks; only use interactive when interaction pattern requires it +5. **Context Propagation**: prev_context built from master CSV, not from memory +6. **Discovery Board is Append-Only**: Never clear, modify, or recreate discoveries.ndjson -- both mechanisms share it +7. **Skip on Failure**: If a dependency failed, skip the dependent task (regardless of mechanism) +8. **Max 3 Fix Cycles**: Review-fix cycle capped at 3 iterations; escalate to user after +9. **Cleanup Temp Files**: Remove wave-{N}.csv after results are merged +10. **DO NOT STOP**: Continuous execution until all waves complete or all remaining tasks are skipped diff --git a/.codex/skills/team-arch-opt/agents/completion-handler.md b/.codex/skills/team-arch-opt/agents/completion-handler.md new file mode 100644 index 00000000..48625512 --- /dev/null +++ b/.codex/skills/team-arch-opt/agents/completion-handler.md @@ -0,0 +1,138 @@ +# Completion Handler Agent + +Handle pipeline completion action for architecture optimization: present results summary, offer Archive/Keep/Export options, execute chosen action. + +## Identity + +- **Type**: `interactive` +- **Responsibility**: Pipeline completion and session lifecycle management + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Present complete pipeline summary with improvement metrics +- Offer completion action choices +- Execute chosen action (archive, keep, export) +- Produce structured output + +### MUST NOT + +- Skip presenting results summary +- Execute destructive actions without confirmation +- Modify source code + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | builtin | Load result artifacts | +| `Write` | builtin | Write export files | +| `Bash` | builtin | Archive/cleanup operations | +| `AskUserQuestion` | builtin | Present completion choices | + +--- + +## Execution + +### Phase 1: Results Collection + +**Objective**: Gather all pipeline results for summary. + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| tasks.csv | Yes | Master task state | +| Architecture baseline | Yes | Pre-refactoring metrics | +| Validation results | Yes | Post-refactoring metrics | +| Review report | Yes | Code review findings | + +**Steps**: + +1. Read tasks.csv -- count completed/failed/skipped +2. Read architecture-baseline.json -- extract before metrics +3. Read validation-results.json -- extract after metrics, compute improvements +4. Read review-report.md -- extract final verdict + +**Output**: Compiled results summary + +--- + +### Phase 2: Present and Choose + +**Objective**: Display results and get user's completion choice. + +**Steps**: + +1. Display pipeline summary with improvement metrics +2. Present completion action: + +```javascript +AskUserQuestion({ + questions: [{ + question: "Architecture optimization complete. What would you like to do?", + header: "Completion", + multiSelect: false, + options: [ + { label: "Archive & Clean (Recommended)", description: "Archive session, output final summary" }, + { label: "Keep Active", description: "Keep session for follow-up work or inspection" }, + { label: "Export Results", description: "Export deliverables to a specified location" } + ] + }] +}) +``` + +**Output**: User's choice + +--- + +### Phase 3: Execute Action + +**Objective**: Execute the chosen completion action. + +| Choice | Action | +|--------|--------| +| Archive & Clean | Copy results.csv and context.md to archive, mark session completed | +| Keep Active | Mark session as paused, leave all artifacts in place | +| Export Results | Copy key deliverables to user-specified location | + +--- + +## Structured Output Template + +``` +## Pipeline Summary +- Tasks: X completed, Y failed, Z skipped +- Duration: estimated from timestamps + +## Architecture Improvements +- Metric 1: before -> after (improvement %) +- Metric 2: before -> after (improvement %) + +## Deliverables +- Architecture Report: path +- Refactoring Plan: path +- Validation Results: path +- Review Report: path + +## Action Taken +- Choice: Archive & Clean / Keep Active / Export Results +- Status: completed +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| Result artifacts missing | Report partial summary with available data | +| Archive operation fails | Default to Keep Active | +| Export path invalid | Ask user for valid path | +| Timeout approaching | Default to Keep Active | diff --git a/.codex/skills/team-arch-opt/agents/fix-cycle-handler.md b/.codex/skills/team-arch-opt/agents/fix-cycle-handler.md new file mode 100644 index 00000000..fb8e865a --- /dev/null +++ b/.codex/skills/team-arch-opt/agents/fix-cycle-handler.md @@ -0,0 +1,146 @@ +# Fix Cycle Handler Agent + +Manage the review-fix iteration cycle for architecture refactoring. Reads validation/review feedback, applies targeted fixes, re-validates, up to 3 iterations. + +## Identity + +- **Type**: `interactive` +- **Responsibility**: Iterative fix-verify cycle for refactoring issues + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Read validation results and review report to understand failures +- Apply targeted fixes addressing specific feedback items +- Re-validate after each fix attempt +- Track iteration count (max 3) +- Produce structured output with fix summary + +### MUST NOT + +- Skip reading feedback before attempting fixes +- Apply broad changes unrelated to feedback +- Exceed 3 fix iterations +- Modify code outside the scope of reported issues + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | builtin | Load feedback artifacts and source files | +| `Edit` | builtin | Apply targeted code fixes | +| `Write` | builtin | Write updated artifacts | +| `Bash` | builtin | Run build/test validation | +| `Grep` | builtin | Search for patterns | +| `Glob` | builtin | Find files | + +--- + +## Execution + +### Phase 1: Feedback Loading + +**Objective**: Load and parse validation/review feedback. + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Validation results | Yes (if validation failed) | From artifacts/validation-results.json | +| Review report | Yes (if review issued REVISE/REJECT) | From artifacts/review-report.md | +| Refactoring plan | Yes | Original plan for reference | +| Discoveries | No | Shared findings | + +**Steps**: + +1. Read validation-results.json -- identify failed dimensions (build, test, metrics, API) +2. Read review-report.md -- identify Critical/High findings with file:line references +3. Categorize issues by type and priority + +**Output**: Prioritized list of issues to fix + +--- + +### Phase 2: Fix Implementation (Iterative) + +**Objective**: Apply fixes and re-validate, up to 3 rounds. + +**Steps**: + +For each iteration (1..3): + +1. **Apply fixes**: + - Address highest-severity issues first + - Make minimal, targeted changes at reported file:line locations + - Update imports if structural changes are needed + - Preserve existing behavior + +2. **Self-validate**: + - Run build check (no new compilation errors) + - Run test suite (no new test failures) + - Verify fix addresses the specific concern raised + +3. **Check convergence**: + +| Validation Result | Action | +|-------------------|--------| +| All checks pass | Exit loop, report success | +| Some checks still fail, iteration < 3 | Continue to next iteration | +| Still failing at iteration 3 | Report remaining issues for escalation | + +**Output**: Fix results per iteration + +--- + +### Phase 3: Result Reporting + +**Objective**: Produce final fix cycle summary. + +**Steps**: + +1. Update validation-results.json with post-fix metrics +2. Append fix discoveries to discoveries.ndjson +3. Report final status + +--- + +## Structured Output Template + +``` +## Summary +- Fix cycle completed: N iterations, M issues resolved, K remaining + +## Iterations +### Iteration 1 +- Fixed: [list of fixes applied with file:line] +- Validation: [pass/fail per dimension] + +### Iteration 2 (if needed) +- Fixed: [list of fixes] +- Validation: [pass/fail] + +## Final Status +- verdict: PASS | PARTIAL | ESCALATE +- Remaining issues (if any): [list] + +## Artifacts Updated +- artifacts/validation-results.json (updated metrics) +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| Fix introduces new errors | Revert fix, try alternative approach | +| Cannot reproduce reported issue | Log as resolved-by-environment, continue | +| Fix scope exceeds current files | Report scope expansion needed, escalate | +| Timeout approaching | Output partial results with iteration count | +| 3 iterations exhausted | Report remaining issues for user escalation | diff --git a/.codex/skills/team-arch-opt/agents/plan-reviewer.md b/.codex/skills/team-arch-opt/agents/plan-reviewer.md new file mode 100644 index 00000000..1b610919 --- /dev/null +++ b/.codex/skills/team-arch-opt/agents/plan-reviewer.md @@ -0,0 +1,150 @@ +# Plan Reviewer Agent + +Review architecture report or refactoring plan at user checkpoints, providing interactive approval or revision requests. + +## Identity + +- **Type**: `interactive` +- **Responsibility**: Review and approve/revise plans before execution proceeds + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Read the architecture report or refactoring plan being reviewed +- Produce structured output with clear APPROVE/REVISE verdict +- Include specific file:line references in findings + +### MUST NOT + +- Skip the MANDATORY FIRST STEPS role loading +- Modify source code directly +- Produce unstructured output +- Approve without actually reading the plan + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | builtin | Load plan artifacts and project files | +| `Grep` | builtin | Search for patterns in codebase | +| `Glob` | builtin | Find files by pattern | +| `Bash` | builtin | Run build/test commands | + +### Tool Usage Patterns + +**Read Pattern**: Load context files before review +``` +Read("{session_folder}/artifacts/architecture-report.md") +Read("{session_folder}/artifacts/refactoring-plan.md") +Read("{session_folder}/discoveries.ndjson") +``` + +--- + +## Execution + +### Phase 1: Context Loading + +**Objective**: Load the plan or report to review. + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Architecture report | Yes (if reviewing analysis) | Ranked issue list from analyzer | +| Refactoring plan | Yes (if reviewing design) | Prioritized plan from designer | +| Discoveries | No | Shared findings from prior stages | + +**Steps**: + +1. Read the artifact being reviewed from session artifacts folder +2. Read discoveries.ndjson for additional context +3. Identify which checkpoint this review corresponds to (CP-1 for analysis, CP-2 for design) + +**Output**: Loaded plan context for review + +--- + +### Phase 2: Plan Review + +**Objective**: Evaluate plan quality, completeness, and feasibility. + +**Steps**: + +1. **For architecture report review (CP-1)**: + - Verify all issue categories are covered (cycles, coupling, cohesion, God Classes, dead code, API bloat) + - Check that severity rankings are justified with evidence + - Validate baseline metrics are quantified and reproducible + - Check scope coverage matches original requirement + +2. **For refactoring plan review (CP-2)**: + - Verify each refactoring has unique REFACTOR-ID and self-contained detail + - Check priority assignments follow impact/effort matrix + - Validate target files are non-overlapping between refactorings + - Verify success criteria are measurable + - Check that implementation guidance is actionable + - Assess risk levels and mitigation strategies + +3. **Issue classification**: + +| Finding Severity | Condition | Impact | +|------------------|-----------|--------| +| Critical | Missing key analysis area or infeasible plan | REVISE required | +| High | Unclear criteria or overlapping targets | REVISE recommended | +| Medium | Minor gaps in coverage or detail | Note for improvement | +| Low | Style or formatting issues | Informational | + +**Output**: Review findings with severity classifications + +--- + +### Phase 3: Verdict + +**Objective**: Issue APPROVE or REVISE verdict. + +| Verdict | Condition | Action | +|---------|-----------|--------| +| APPROVE | No Critical or High findings | Plan is ready for next stage | +| REVISE | Has Critical or High findings | Return specific feedback for revision | + +**Output**: Verdict with detailed feedback + +--- + +## Structured Output Template + +``` +## Summary +- One-sentence verdict: APPROVE or REVISE with rationale + +## Findings +- Finding 1: [severity] description with artifact reference +- Finding 2: [severity] description with specific section reference + +## Verdict +- APPROVE: Plan is ready for execution + OR +- REVISE: Specific items requiring revision + 1. Issue description + suggested fix + 2. Issue description + suggested fix + +## Recommendations +- Optional improvement suggestions (non-blocking) +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| Artifact file not found | Report in findings, request re-generation | +| Plan structure invalid | Report as Critical finding, REVISE verdict | +| Scope mismatch | Report in findings, note for coordinator | +| Timeout approaching | Output current findings with "PARTIAL" status | diff --git a/.codex/skills/team-arch-opt/instructions/agent-instruction.md b/.codex/skills/team-arch-opt/instructions/agent-instruction.md new file mode 100644 index 00000000..04acee19 --- /dev/null +++ b/.codex/skills/team-arch-opt/instructions/agent-instruction.md @@ -0,0 +1,114 @@ +## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS +1. Read shared discoveries: {session_folder}/discoveries.ndjson (if exists, skip if not) +2. Read project context: .workflow/project-tech.json (if exists) +3. Read task schema: .codex/skills/team-arch-opt/schemas/tasks-schema.md + +--- + +## Your Task + +**Task ID**: {id} +**Title**: {title} +**Description**: {description} +**Role**: {role} +**Issue Type**: {issue_type} +**Priority**: {priority} +**Target Files**: {target_files} + +### Previous Tasks' Findings (Context) +{prev_context} + +--- + +## Execution Protocol + +1. **Read discoveries**: Load {session_folder}/discoveries.ndjson for shared exploration findings +2. **Use context**: Apply previous tasks' findings from prev_context above +3. **Execute by role**: + + **If role = analyzer**: + - Scan codebase for architecture issues within target scope + - Build import/require graph, detect circular dependencies + - Identify God Classes (>500 LOC, >10 public methods) + - Calculate coupling (fan-in/fan-out) and cohesion metrics + - Detect dead code, dead exports, layering violations + - Collect quantified baseline metrics + - Rank top 3-7 issues by severity (Critical/High/Medium) + - Write `{session_folder}/artifacts/architecture-baseline.json` (metrics) + - Write `{session_folder}/artifacts/architecture-report.md` (ranked issues) + + **If role = designer**: + - Read architecture report and baseline from {session_folder}/artifacts/ + - For each issue, select refactoring strategy by type: + - CYCLE: interface extraction, dependency inversion, mediator + - GOD_CLASS: SRP decomposition, extract class/module + - COUPLING: introduce interface/abstraction, DI, events + - DUPLICATION: extract shared utility/base class + - LAYER_VIOLATION: move to correct layer, add facade + - DEAD_CODE: safe removal with reference verification + - API_BLOAT: privatize internals, barrel file cleanup + - Prioritize by impact/effort: P0 (high impact+low effort) to P3 (low impact or high effort) + - Assign unique REFACTOR-IDs (REFACTOR-001, 002, ...) with non-overlapping file targets + - Write `{session_folder}/artifacts/refactoring-plan.md` + + **If role = refactorer**: + - Read refactoring plan from {session_folder}/artifacts/refactoring-plan.md + - Apply refactorings in priority order (P0 first) + - Preserve existing behavior -- refactoring must not change functionality + - Update ALL import references when moving/renaming modules + - Update ALL test files referencing moved/renamed symbols + - Verify no dangling imports after module moves + + **If role = validator**: + - Read baseline from {session_folder}/artifacts/architecture-baseline.json + - Read plan from {session_folder}/artifacts/refactoring-plan.md + - Build validation: compile/type-check, zero new errors + - Test validation: run test suite, no new failures + - Metric validation: coupling improved or neutral, no new cycles + - API validation: public signatures preserved, no dangling references + - Write `{session_folder}/artifacts/validation-results.json` + - Set verdict: PASS / WARN / FAIL + + **If role = reviewer**: + - Read plan from {session_folder}/artifacts/refactoring-plan.md + - Review changed files across 5 dimensions: + - Correctness: no behavior changes, all references updated + - Pattern consistency: follows existing conventions + - Completeness: imports, tests, configs all updated + - Migration safety: no dangling refs, backward compatible + - Best practices: SOLID, appropriate abstraction + - Write `{session_folder}/artifacts/review-report.md` + - Set verdict: APPROVE / REVISE / REJECT + +4. **Share discoveries**: Append exploration findings to shared board: + ```bash + echo '{"ts":"","worker":"{id}","type":"","data":{...}}' >> {session_folder}/discoveries.ndjson + ``` +5. **Report result**: Return JSON via report_agent_job_result + +### Discovery Types to Share +- `cycle_found`: `{modules, depth, description}` -- Circular dependency detected +- `god_class_found`: `{file, loc, methods, description}` -- God Class identified +- `coupling_issue`: `{module, fan_in, fan_out, description}` -- High coupling +- `dead_code_found`: `{file, type, description}` -- Dead code found +- `layer_violation`: `{from, to, description}` -- Layering violation +- `file_modified`: `{file, change, lines_added}` -- File change recorded +- `pattern_found`: `{pattern_name, location, description}` -- Pattern identified +- `metric_measured`: `{metric, value, unit, module}` -- Metric measured +- `artifact_produced`: `{name, path, producer, type}` -- Deliverable created + +--- + +## Output (report_agent_job_result) + +Return JSON: +{ + "id": "{id}", + "status": "completed" | "failed", + "findings": "Key discoveries and implementation notes (max 500 chars)", + "verdict": "PASS|WARN|FAIL|APPROVE|REVISE|REJECT or empty", + "artifacts_produced": "semicolon-separated artifact paths", + "error": "" +} diff --git a/.codex/skills/team-arch-opt/schemas/tasks-schema.md b/.codex/skills/team-arch-opt/schemas/tasks-schema.md new file mode 100644 index 00000000..14eedd7b --- /dev/null +++ b/.codex/skills/team-arch-opt/schemas/tasks-schema.md @@ -0,0 +1,174 @@ +# Team Architecture Optimization -- CSV Schema + +## Master CSV: tasks.csv + +### Column Definitions + +#### Input Columns (Set by Decomposer) + +| Column | Type | Required | Description | Example | +|--------|------|----------|-------------|---------| +| `id` | string | Yes | Unique task identifier (PREFIX-NNN) | `"ANALYZE-001"` | +| `title` | string | Yes | Short task title | `"Analyze architecture"` | +| `description` | string | Yes | Detailed task description (self-contained) with goal, inputs, outputs, success criteria | `"Analyze codebase architecture..."` | +| `role` | enum | Yes | Worker role: `analyzer`, `designer`, `refactorer`, `validator`, `reviewer` | `"analyzer"` | +| `issue_type` | string | No | Architecture issue category: CYCLE, COUPLING, COHESION, GOD_CLASS, DUPLICATION, LAYER_VIOLATION, DEAD_CODE, API_BLOAT | `"CYCLE"` | +| `priority` | enum | No | P0 (Critical), P1 (High), P2 (Medium), P3 (Low) | `"P0"` | +| `target_files` | string | No | Semicolon-separated file paths to focus on | `"src/auth/index.ts;src/user/index.ts"` | +| `deps` | string | No | Semicolon-separated dependency task IDs | `"ANALYZE-001"` | +| `context_from` | string | No | Semicolon-separated task IDs for context | `"ANALYZE-001"` | +| `exec_mode` | enum | Yes | Execution mechanism: `csv-wave` or `interactive` | `"csv-wave"` | + +#### Computed Columns (Set by Wave Engine) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `wave` | integer | Wave number (1-based, from topological sort) | `2` | +| `prev_context` | string | Aggregated findings from context_from tasks (per-wave CSV only) | `"[ANALYZE-001] Found 5 architecture issues..."` | + +#### Output Columns (Set by Agent) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `status` | enum | `pending` -> `completed` / `failed` / `skipped` | `"completed"` | +| `findings` | string | Key discoveries (max 500 chars) | `"Found 3 circular deps, 2 God Classes..."` | +| `verdict` | string | Validation/review verdict: PASS, WARN, FAIL, APPROVE, REVISE, REJECT | `"PASS"` | +| `artifacts_produced` | string | Semicolon-separated paths of produced artifacts | `"artifacts/architecture-report.md"` | +| `error` | string | Error message if failed | `""` | + +--- + +### exec_mode Values + +| Value | Mechanism | Description | +|-------|-----------|-------------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot batch execution within wave | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round individual execution | + +Interactive tasks appear in master CSV for dependency tracking but are NOT included in wave-{N}.csv files. + +--- + +### Role Prefix Mapping + +| Role | Prefix | Stage | Responsibility | +|------|--------|-------|----------------| +| analyzer | ANALYZE | 1 | Architecture analysis, baseline metrics, issue identification | +| designer | DESIGN | 2 | Refactoring plan design, strategy selection, prioritization | +| refactorer | REFACTOR / FIX | 3 | Code implementation, refactoring application, targeted fixes | +| validator | VALIDATE | 4 | Build checks, test suite, metric validation, API compatibility | +| reviewer | REVIEW | 4 | Code review for correctness, patterns, completeness, safety | + +--- + +### Example Data + +```csv +id,title,description,role,issue_type,priority,target_files,deps,context_from,exec_mode,wave,status,findings,verdict,artifacts_produced,error +"ANALYZE-001","Analyze architecture","PURPOSE: Analyze codebase architecture to identify structural issues\nTASK:\n- Build import graph, detect circular deps\n- Identify God Classes (>500 LOC, >10 methods)\n- Calculate coupling/cohesion metrics\n- Detect dead code and dead exports\nINPUT: Codebase under target scope\nOUTPUT: artifacts/architecture-baseline.json + artifacts/architecture-report.md\nSUCCESS: Ranked issue list with severity, baseline metrics collected\nSESSION: .workflow/.csv-wave/tao-example-20260308","analyzer","","","","","","csv-wave","1","pending","","","","" +"DESIGN-001","Design refactoring plan","PURPOSE: Design prioritized refactoring plan from architecture report\nTASK:\n- For each issue, select refactoring strategy\n- Prioritize by impact/effort ratio (P0-P3)\n- Define measurable success criteria per refactoring\n- Assign unique REFACTOR-IDs with non-overlapping file targets\nINPUT: artifacts/architecture-report.md + artifacts/architecture-baseline.json\nOUTPUT: artifacts/refactoring-plan.md\nSUCCESS: Prioritized plan with self-contained REFACTOR blocks\nSESSION: .workflow/.csv-wave/tao-example-20260308","designer","","","","ANALYZE-001","ANALYZE-001","csv-wave","2","pending","","","","" +"REFACTOR-001","Implement refactorings","PURPOSE: Implement architecture refactoring changes per plan\nTASK:\n- Apply refactorings in priority order (P0 first)\n- Update all import references when moving/renaming\n- Update all test files referencing moved symbols\n- Preserve existing behavior\nINPUT: artifacts/refactoring-plan.md\nOUTPUT: Modified source files\nSUCCESS: All planned structural changes applied, no dangling imports\nSESSION: .workflow/.csv-wave/tao-example-20260308","refactorer","","","","DESIGN-001","DESIGN-001","csv-wave","3","pending","","","","" +"VALIDATE-001","Validate refactoring","PURPOSE: Validate refactoring improves architecture without breaking functionality\nTASK:\n- Build check: zero new compilation errors\n- Test suite: all previously passing tests still pass\n- Metrics: coupling improved or neutral, no new cycles\n- API: public signatures preserved\nINPUT: artifacts/architecture-baseline.json + artifacts/refactoring-plan.md\nOUTPUT: artifacts/validation-results.json\nSUCCESS: All dimensions PASS\nSESSION: .workflow/.csv-wave/tao-example-20260308","validator","","","","REFACTOR-001","REFACTOR-001","csv-wave","4","pending","","","","" +"REVIEW-001","Review refactoring code","PURPOSE: Review refactoring changes for correctness and quality\nTASK:\n- Correctness: no behavior changes, all references updated\n- Pattern consistency: follows existing conventions\n- Completeness: imports, tests, configs all updated\n- Migration safety: no dangling refs, backward compatible\n- Best practices: SOLID principles, appropriate abstraction\nINPUT: artifacts/refactoring-plan.md + changed files\nOUTPUT: artifacts/review-report.md\nSUCCESS: APPROVE verdict (no Critical/High findings)\nSESSION: .workflow/.csv-wave/tao-example-20260308","reviewer","","","","REFACTOR-001","REFACTOR-001","csv-wave","4","pending","","","","" +``` + +--- + +### Column Lifecycle + +``` +Decomposer (Phase 1) Wave Engine (Phase 2) Agent (Execution) +--------------------- -------------------- ----------------- +id ----------> id ----------> id +title ----------> title ----------> (reads) +description ----------> description ----------> (reads) +role ----------> role ----------> (reads) +issue_type ----------> issue_type ----------> (reads) +priority ----------> priority ----------> (reads) +target_files----------> target_files----------> (reads) +deps ----------> deps ----------> (reads) +context_from----------> context_from----------> (reads) +exec_mode ----------> exec_mode ----------> (reads) + wave ----------> (reads) + prev_context ----------> (reads) + status + findings + verdict + artifacts_produced + error +``` + +--- + +## Output Schema (JSON) + +Agent output via `report_agent_job_result` (csv-wave tasks): + +```json +{ + "id": "ANALYZE-001", + "status": "completed", + "findings": "Found 5 architecture issues: 2 circular deps (auth<->user, service<->repo), 1 God Class (UserManager 850 LOC), 1 dead code cluster (src/legacy/), 1 API bloat (utils/ exports 45 symbols, 12 unused).", + "verdict": "", + "artifacts_produced": "artifacts/architecture-baseline.json;artifacts/architecture-report.md", + "error": "" +} +``` + +Interactive tasks output via structured text or JSON written to `interactive/{id}-result.json`. + +--- + +## Discovery Types + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `cycle_found` | `data.modules` (sorted) | `{modules, depth, description}` | Circular dependency detected | +| `god_class_found` | `data.file` | `{file, loc, methods, description}` | God Class/Module identified | +| `coupling_issue` | `data.module` | `{module, fan_in, fan_out, description}` | High coupling detected | +| `dead_code_found` | `data.file+data.type` | `{file, type, description}` | Dead code or dead export | +| `layer_violation` | `data.from+data.to` | `{from, to, description}` | Layering violation detected | +| `file_modified` | `data.file` | `{file, change, lines_added}` | File change recorded | +| `pattern_found` | `data.pattern_name+data.location` | `{pattern_name, location, description}` | Code pattern identified | +| `metric_measured` | `data.metric+data.module` | `{metric, value, unit, module}` | Architecture metric measured | +| `artifact_produced` | `data.path` | `{name, path, producer, type}` | Deliverable created | + +### Discovery NDJSON Format + +```jsonl +{"ts":"2026-03-08T10:00:00Z","worker":"ANALYZE-001","type":"cycle_found","data":{"modules":["auth","user"],"depth":2,"description":"Circular dependency: auth imports user, user imports auth"}} +{"ts":"2026-03-08T10:01:00Z","worker":"ANALYZE-001","type":"god_class_found","data":{"file":"src/services/UserManager.ts","loc":850,"methods":15,"description":"UserManager handles auth, profile, permissions, notifications"}} +{"ts":"2026-03-08T10:05:00Z","worker":"ANALYZE-001","type":"metric_measured","data":{"metric":"coupling_score","value":0.72,"unit":"normalized","module":"src/auth/"}} +{"ts":"2026-03-08T10:20:00Z","worker":"REFACTOR-001","type":"file_modified","data":{"file":"src/auth/index.ts","change":"Extracted IAuthService interface to break cycle","lines_added":25}} +{"ts":"2026-03-08T10:25:00Z","worker":"REFACTOR-001","type":"artifact_produced","data":{"name":"refactoring-summary","path":"artifacts/refactoring-plan.md","producer":"designer","type":"markdown"}} +``` + +> Both csv-wave and interactive agents read/write the same discoveries.ndjson file. + +--- + +## Cross-Mechanism Context Flow + +| Source | Target | Mechanism | +|--------|--------|-----------| +| CSV task findings | Interactive task | Injected via spawn message or send_input | +| Interactive task result | CSV task prev_context | Read from interactive/{id}-result.json | +| Any agent discovery | Any agent | Shared via discoveries.ndjson | + +--- + +## Validation Rules + +| Rule | Check | Error | +|------|-------|-------| +| Unique IDs | No duplicate `id` values | "Duplicate task ID: {id}" | +| Valid deps | All dep IDs exist in tasks | "Unknown dependency: {dep_id}" | +| No self-deps | Task cannot depend on itself | "Self-dependency: {id}" | +| No circular deps | Topological sort completes | "Circular dependency detected involving: {ids}" | +| context_from valid | All context IDs exist and in earlier waves | "Invalid context_from: {id}" | +| exec_mode valid | Value is `csv-wave` or `interactive` | "Invalid exec_mode: {value}" | +| Description non-empty | Every task has description | "Empty description for task: {id}" | +| Status enum | status in {pending, completed, failed, skipped} | "Invalid status: {status}" | +| Role valid | role in {analyzer, designer, refactorer, validator, reviewer} | "Invalid role: {role}" | +| Verdict enum | verdict in {PASS, WARN, FAIL, APPROVE, REVISE, REJECT, ""} | "Invalid verdict: {verdict}" | +| Cross-mechanism deps | Interactive to CSV deps resolve correctly | "Cross-mechanism dependency unresolvable: {id}" | diff --git a/.codex/skills/team-brainstorm/SKILL.md b/.codex/skills/team-brainstorm/SKILL.md new file mode 100644 index 00000000..878f4333 --- /dev/null +++ b/.codex/skills/team-brainstorm/SKILL.md @@ -0,0 +1,687 @@ +--- +name: team-brainstorm +description: Multi-agent brainstorming pipeline with Generator-Critic loop. Generates ideas, challenges assumptions, synthesizes themes, and evaluates proposals. Supports Quick, Deep, and Full pipeline modes. +argument-hint: "[-y|--yes] [-c|--concurrency N] [--continue] \"topic description\"" +allowed-tools: spawn_agents_on_csv, spawn_agent, wait, send_input, close_agent, Read, Write, Edit, Bash, Glob, Grep, AskUserQuestion +--- + +## Auto Mode + +When `--yes` or `-y`: Auto-confirm task decomposition, skip interactive validation, use defaults. + +# Team Brainstorm + +## Usage + +```bash +$team-brainstorm "How should we approach microservices migration?" +$team-brainstorm -c 4 "Innovation strategies for AI-powered developer tools" +$team-brainstorm -y "Quick brainstorm on naming conventions" +$team-brainstorm --continue "brs-microservices-20260308" +``` + +**Flags**: +- `-y, --yes`: Skip all confirmations (auto mode) +- `-c, --concurrency N`: Max concurrent agents within each wave (default: 3) +- `--continue`: Resume existing session + +**Output Directory**: `.workflow/.csv-wave/{session-id}/` +**Core Output**: `tasks.csv` (master state) + `results.csv` (final) + `discoveries.ndjson` (shared exploration) + `context.md` (human-readable report) + +--- + +## Overview + +Multi-agent brainstorming with Generator-Critic loop: generate ideas across multiple angles, challenge assumptions, synthesize themes, and evaluate proposals. Supports three pipeline modes (Quick/Deep/Full) with configurable depth and parallel ideation. + +**Execution Model**: Hybrid — CSV wave pipeline (primary) + individual agent spawn (secondary for Generator-Critic control) + +``` +┌─────────────────────────────────────────────────────────────────────────┐ +│ TEAM BRAINSTORM WORKFLOW │ +├─────────────────────────────────────────────────────────────────────────┤ +│ │ +│ Phase 0: Pre-Wave Interactive │ +│ ├─ Topic clarification + complexity scoring │ +│ ├─ Pipeline mode selection (quick/deep/full) │ +│ └─ Output: refined requirements for decomposition │ +│ │ +│ Phase 1: Requirement → CSV + Classification │ +│ ├─ Parse topic into brainstorm tasks per selected pipeline │ +│ ├─ Assign roles: ideator, challenger, synthesizer, evaluator │ +│ ├─ Classify tasks: csv-wave | interactive (exec_mode) │ +│ ├─ Compute dependency waves (topological sort → depth grouping) │ +│ ├─ Generate tasks.csv with wave + exec_mode columns │ +│ └─ User validates task breakdown (skip if -y) │ +│ │ +│ Phase 2: Wave Execution Engine (Extended) │ +│ ├─ For each wave (1..N): │ +│ │ ├─ Execute pre-wave interactive tasks (if any) │ +│ │ ├─ Build wave CSV (filter csv-wave tasks for this wave) │ +│ │ ├─ Inject previous findings into prev_context column │ +│ │ ├─ spawn_agents_on_csv(wave CSV) │ +│ │ ├─ Execute post-wave interactive tasks (if any) │ +│ │ ├─ Merge all results into master tasks.csv │ +│ │ └─ Check: any failed? → skip dependents │ +│ └─ discoveries.ndjson shared across all modes (append-only) │ +│ │ +│ Phase 3: Post-Wave Interactive │ +│ ├─ Generator-Critic (GC) loop control │ +│ ├─ If critique severity >= HIGH: trigger revision wave │ +│ └─ Max 2 GC rounds, then force convergence │ +│ │ +│ Phase 4: Results Aggregation │ +│ ├─ Export final results.csv │ +│ ├─ Generate context.md with all findings │ +│ ├─ Display summary: completed/failed/skipped per wave │ +│ └─ Offer: view results | retry failed | done │ +│ │ +└─────────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## Task Classification Rules + +Each task is classified by `exec_mode`: + +| exec_mode | Mechanism | Criteria | +|-----------|-----------|----------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot, structured I/O, no multi-round interaction | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round, clarification, inline utility | + +**Classification Decision**: + +| Task Property | Classification | +|---------------|---------------| +| Idea generation (single angle) | `csv-wave` | +| Parallel ideation (Full pipeline, multiple angles) | `csv-wave` (parallel in same wave) | +| Idea revision (GC loop) | `csv-wave` | +| Critique / challenge | `csv-wave` | +| Synthesis (theme extraction) | `csv-wave` | +| Evaluation (scoring / ranking) | `csv-wave` | +| GC loop control (severity check → decide revision or convergence) | `interactive` | +| Topic clarification (Phase 0) | `interactive` | + +--- + +## CSV Schema + +### tasks.csv (Master State) + +```csv +id,title,description,role,angle,gc_round,deps,context_from,exec_mode,wave,status,findings,gc_signal,severity_summary,error +"IDEA-001","Multi-angle idea generation","Generate 3+ ideas per angle with title, description, assumption, impact","ideator","Technical;Product;Innovation","0","","","csv-wave","1","pending","","","","" +"CHALLENGE-001","Critique generated ideas","Challenge each idea across assumption, feasibility, risk, competition dimensions","challenger","","0","IDEA-001","IDEA-001","csv-wave","2","pending","","","","" +"GC-CHECK-001","GC loop decision","Evaluate critique severity and decide: revision or convergence","gc-controller","","1","CHALLENGE-001","CHALLENGE-001","interactive","3","pending","","","","" +``` + +**Columns**: + +| Column | Phase | Description | +|--------|-------|-------------| +| `id` | Input | Unique task identifier (string) | +| `title` | Input | Short task title | +| `description` | Input | Detailed task description | +| `role` | Input | Worker role: ideator, challenger, synthesizer, evaluator | +| `angle` | Input | Brainstorming angle(s) for ideator tasks (semicolon-separated) | +| `gc_round` | Input | Generator-Critic round number (0 = initial, 1+ = revision) | +| `deps` | Input | Semicolon-separated dependency task IDs | +| `context_from` | Input | Semicolon-separated task IDs whose findings this task needs | +| `exec_mode` | Input | `csv-wave` or `interactive` | +| `wave` | Computed | Wave number (computed by topological sort, 1-based) | +| `status` | Output | `pending` → `completed` / `failed` / `skipped` | +| `findings` | Output | Key discoveries or implementation notes (max 500 chars) | +| `gc_signal` | Output | Generator-Critic signal: `REVISION_NEEDED` or `CONVERGED` (challenger only) | +| `severity_summary` | Output | Severity count: e.g. "CRITICAL:1 HIGH:2 MEDIUM:3 LOW:1" | +| `error` | Output | Error message if failed (empty if success) | + +### Per-Wave CSV (Temporary) + +Each wave generates a temporary `wave-{N}.csv` with extra `prev_context` column (csv-wave tasks only). + +--- + +## Agent Registry (Interactive Agents) + +| Agent | Role File | Pattern | Responsibility | Position | +|-------|-----------|---------|----------------|----------| +| gc-controller | agents/gc-controller.md | 2.3 (wait-respond) | Evaluate critique severity, decide revision vs convergence | post-wave (after challenger wave) | +| topic-clarifier | agents/topic-clarifier.md | 2.3 (wait-respond) | Clarify topic, assess complexity, select pipeline mode | standalone (Phase 0) | + +> **COMPACT PROTECTION**: Agent files are execution documents. When context compression occurs, **you MUST immediately `Read` the corresponding agent.md** to reload. + +--- + +## Output Artifacts + +| File | Purpose | Lifecycle | +|------|---------|-----------| +| `tasks.csv` | Master state — all tasks with status/findings | Updated after each wave | +| `wave-{N}.csv` | Per-wave input (temporary, csv-wave tasks only) | Created before wave, deleted after | +| `results.csv` | Final export of all task results | Created in Phase 4 | +| `discoveries.ndjson` | Shared exploration board (all agents, both modes) | Append-only, carries across waves | +| `context.md` | Human-readable execution report | Created in Phase 4 | +| `interactive/{id}-result.json` | Results from interactive tasks | Created per interactive task | + +--- + +## Session Structure + +``` +.workflow/.csv-wave/{session-id}/ +├── tasks.csv # Master state (all tasks, both modes) +├── results.csv # Final results export +├── discoveries.ndjson # Shared discovery board (all agents) +├── context.md # Human-readable report +├── wave-{N}.csv # Temporary per-wave input (csv-wave only) +└── interactive/ # Interactive task artifacts + └── {id}-result.json # Per-task results +``` + +--- + +## Implementation + +### Session Initialization + +```javascript +const getUtc8ISOString = () => new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString() + +// Parse flags +const AUTO_YES = $ARGUMENTS.includes('--yes') || $ARGUMENTS.includes('-y') +const continueMode = $ARGUMENTS.includes('--continue') +const concurrencyMatch = $ARGUMENTS.match(/(?:--concurrency|-c)\s+(\d+)/) +const maxConcurrency = concurrencyMatch ? parseInt(concurrencyMatch[1]) : 3 + +// Clean requirement text (remove flags) +const topic = $ARGUMENTS + .replace(/--yes|-y|--continue|--concurrency\s+\d+|-c\s+\d+/g, '') + .trim() + +const slug = topic.toLowerCase() + .replace(/[^a-z0-9\u4e00-\u9fa5]+/g, '-') + .substring(0, 40) +const dateStr = getUtc8ISOString().substring(0, 10).replace(/-/g, '') +let sessionId = `brs-${slug}-${dateStr}` +let sessionFolder = `.workflow/.csv-wave/${sessionId}` + +// Continue mode: find existing session +if (continueMode) { + const existing = Bash(`ls -t .workflow/.csv-wave/brs-* 2>/dev/null | head -1`).trim() + if (existing) { + sessionId = existing.split('/').pop() + sessionFolder = existing + // Read existing tasks.csv, find incomplete waves, resume from Phase 2 + } +} + +Bash(`mkdir -p ${sessionFolder}/interactive`) +``` + +--- + +### Phase 0: Pre-Wave Interactive + +**Objective**: Clarify topic, assess complexity, and select pipeline mode. + +**Execution**: + +```javascript +const clarifier = spawn_agent({ + message: ` +## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS (Agent Execute) +1. **Read role definition**: .codex/skills/team-brainstorm/agents/topic-clarifier.md (MUST read first) +2. Read: .workflow/project-tech.json (if exists) + +--- + +Goal: Clarify brainstorming topic and select pipeline mode +Topic: ${topic} + +### Task +1. Assess topic complexity using signal detection: + - Strategic/systemic keywords (+3): strategy, architecture, system, framework, paradigm + - Multi-dimensional keywords (+2): multiple, compare, tradeoff, versus, alternative + - Innovation-focused keywords (+2): innovative, creative, novel, breakthrough + - Simple/basic keywords (-2): simple, quick, straightforward, basic +2. Score >= 4 → full, 2-3 → deep, 0-1 → quick +3. Suggest divergence angles (e.g., Technical, Product, Innovation, Risk) +4. Return structured result +` +}) + +const clarifierResult = wait({ ids: [clarifier], timeout_ms: 120000 }) + +if (clarifierResult.timed_out) { + send_input({ id: clarifier, message: "Please finalize and output current findings." }) + const retry = wait({ ids: [clarifier], timeout_ms: 60000 }) +} + +// Parse result for pipeline_mode, angles +close_agent({ id: clarifier }) + +// Store result +Write(`${sessionFolder}/interactive/topic-clarifier-result.json`, JSON.stringify({ + task_id: "topic-clarification", + status: "completed", + pipeline_mode: parsedMode, // "quick" | "deep" | "full" + angles: parsedAngles, // ["Technical", "Product", "Innovation", "Risk"] + complexity_score: parsedScore, + timestamp: getUtc8ISOString() +})) +``` + +If not AUTO_YES, present user with pipeline mode selection for confirmation: + +```javascript +if (!AUTO_YES) { + const answer = AskUserQuestion({ + questions: [{ + question: `Topic: "${topic}"\nRecommended pipeline: ${pipeline_mode} (complexity: ${complexity_score})\nAngles: ${angles.join(', ')}\n\nApprove?`, + header: "Pipeline Selection", + multiSelect: false, + options: [ + { label: "Approve", description: `Use ${pipeline_mode} pipeline` }, + { label: "Quick", description: "3 tasks: generate → challenge → synthesize" }, + { label: "Deep", description: "6 tasks: generate → challenge → revise → re-challenge → synthesize → evaluate" }, + { label: "Full", description: "7 tasks: 3x parallel generation → challenge → revise → synthesize → evaluate" } + ] + }] + }) + // Update pipeline_mode based on user choice +} +``` + +**Success Criteria**: +- Refined requirements available for Phase 1 decomposition +- Interactive agents closed, results stored + +--- + +### Phase 1: Requirement → CSV + Classification + +**Objective**: Build tasks.csv from selected pipeline mode with proper wave assignments. + +**Decomposition Rules**: + +| Pipeline | Tasks | Wave Structure | +|----------|-------|---------------| +| quick | IDEA-001 → CHALLENGE-001 → SYNTH-001 | 3 waves, serial | +| deep | IDEA-001 → CHALLENGE-001 → IDEA-002 → CHALLENGE-002 → SYNTH-001 → EVAL-001 | 6 waves, serial with GC loop | +| full | IDEA-001,002,003 (parallel) → CHALLENGE-001 → IDEA-004 → SYNTH-001 → EVAL-001 | 5 waves, fan-out + GC | + +**Classification Rules**: + +All brainstorm work tasks (ideation, challenging, synthesis, evaluation) are `csv-wave`. The GC loop controller between challenger and next ideation revision is `interactive` (post-wave, spawned by orchestrator to decide the GC outcome). + +**Wave Computation**: Kahn's BFS topological sort with depth tracking (csv-wave tasks only). + +**User Validation**: Display task breakdown with wave + exec_mode assignment (skip if AUTO_YES). + +**Pipeline Task Definitions**: + +#### Quick Pipeline (3 csv-wave tasks) + +| Task ID | Role | Wave | Deps | Description | +|---------|------|------|------|-------------| +| IDEA-001 | ideator | 1 | (none) | Generate multi-angle ideas: 3+ ideas per angle with title, description, assumption, impact | +| CHALLENGE-001 | challenger | 2 | IDEA-001 | Challenge each idea across 4 dimensions (assumption, feasibility, risk, competition). Assign severity per idea. Output GC signal | +| SYNTH-001 | synthesizer | 3 | CHALLENGE-001 | Synthesize ideas and critiques into 1-3 integrated proposals with feasibility and innovation scores | + +#### Deep Pipeline (6 csv-wave tasks + 1 interactive GC check) + +Same as Quick plus: + +| Task ID | Role | Wave | Deps | Description | +|---------|------|------|------|-------------| +| IDEA-002 | ideator | 4 | CHALLENGE-001 | Revise ideas based on critique feedback (GC Round 1). Address HIGH/CRITICAL challenges | +| CHALLENGE-002 | challenger | 5 | IDEA-002 | Validate revised ideas (GC Round 2). Re-evaluate previously challenged ideas | +| SYNTH-001 | synthesizer | 6 | CHALLENGE-002 | Synthesize all ideas and critiques | +| EVAL-001 | evaluator | 7 | SYNTH-001 | Score and rank proposals: Feasibility 30%, Innovation 25%, Impact 25%, Cost 20% | + +GC-CHECK-001 (interactive) runs post-wave after CHALLENGE-001 to decide whether to proceed with revision or skip to synthesis. + +#### Full Pipeline (7 csv-wave tasks + GC control) + +| Task ID | Role | Wave | Deps | Description | +|---------|------|------|------|-------------| +| IDEA-001 | ideator | 1 | (none) | Generate ideas from angle 1 | +| IDEA-002 | ideator | 1 | (none) | Generate ideas from angle 2 | +| IDEA-003 | ideator | 1 | (none) | Generate ideas from angle 3 | +| CHALLENGE-001 | challenger | 2 | IDEA-001;IDEA-002;IDEA-003 | Critique all generated ideas | +| IDEA-004 | ideator | 3 | CHALLENGE-001 | Revise ideas based on critique | +| SYNTH-001 | synthesizer | 4 | IDEA-004 | Synthesize all ideas and critiques | +| EVAL-001 | evaluator | 5 | SYNTH-001 | Score and rank proposals | + +**Success Criteria**: +- tasks.csv created with valid schema, wave, and exec_mode assignments +- No circular dependencies +- User approved (or AUTO_YES) + +--- + +### Phase 2: Wave Execution Engine (Extended) + +**Objective**: Execute tasks wave-by-wave with hybrid mechanism support and cross-wave context propagation. + +```javascript +const failedIds = new Set() +const skippedIds = new Set() +const MAX_GC_ROUNDS = 2 +let gcRound = 0 + +for (let wave = 1; wave <= maxWave; wave++) { + console.log(`\n## Wave ${wave}/${maxWave}\n`) + + // 1. Read current master CSV + const masterCsv = parseCsv(Read(`${sessionFolder}/tasks.csv`)) + + // 2. Separate csv-wave and interactive tasks for this wave + const waveTasks = masterCsv.filter(row => parseInt(row.wave) === wave) + const csvTasks = waveTasks.filter(t => t.exec_mode === 'csv-wave') + const interactiveTasks = waveTasks.filter(t => t.exec_mode === 'interactive') + + // 3. Skip tasks whose deps failed + const executableCsvTasks = [] + for (const task of csvTasks) { + const deps = task.deps.split(';').filter(Boolean) + if (deps.some(d => failedIds.has(d) || skippedIds.has(d))) { + skippedIds.add(task.id) + updateMasterCsvRow(sessionFolder, task.id, { + status: 'skipped', + error: 'Dependency failed or skipped' + }) + continue + } + executableCsvTasks.push(task) + } + + // 4. Build prev_context for each csv-wave task + for (const task of executableCsvTasks) { + const contextIds = task.context_from.split(';').filter(Boolean) + const prevFindings = contextIds + .map(id => { + const prevRow = masterCsv.find(r => r.id === id) + if (prevRow && prevRow.status === 'completed' && prevRow.findings) { + return `[Task ${id}: ${prevRow.title}] ${prevRow.findings}` + } + return null + }) + .filter(Boolean) + .join('\n') + task.prev_context = prevFindings || 'No previous context available' + } + + // 5. Write wave CSV and execute csv-wave tasks + if (executableCsvTasks.length > 0) { + const waveHeader = 'id,title,description,role,angle,gc_round,deps,context_from,exec_mode,wave,prev_context' + const waveRows = executableCsvTasks.map(t => + [t.id, t.title, t.description, t.role, t.angle, t.gc_round, t.deps, t.context_from, t.exec_mode, t.wave, t.prev_context] + .map(cell => `"${String(cell).replace(/"/g, '""')}"`) + .join(',') + ) + Write(`${sessionFolder}/wave-${wave}.csv`, [waveHeader, ...waveRows].join('\n')) + + const waveResult = spawn_agents_on_csv({ + csv_path: `${sessionFolder}/wave-${wave}.csv`, + id_column: "id", + instruction: buildBrainstormInstruction(sessionFolder, wave), + max_concurrency: maxConcurrency, + max_runtime_seconds: 600, + output_csv_path: `${sessionFolder}/wave-${wave}-results.csv`, + output_schema: { + type: "object", + properties: { + id: { type: "string" }, + status: { type: "string", enum: ["completed", "failed"] }, + findings: { type: "string" }, + gc_signal: { type: "string" }, + severity_summary: { type: "string" }, + error: { type: "string" } + }, + required: ["id", "status", "findings"] + } + }) + // Blocks until wave completes + + // Merge results into master CSV + const waveResults = parseCsv(Read(`${sessionFolder}/wave-${wave}-results.csv`)) + for (const result of waveResults) { + updateMasterCsvRow(sessionFolder, result.id, { + status: result.status, + findings: result.findings || '', + gc_signal: result.gc_signal || '', + severity_summary: result.severity_summary || '', + error: result.error || '' + }) + if (result.status === 'failed') failedIds.add(result.id) + } + + Bash(`rm -f "${sessionFolder}/wave-${wave}.csv"`) + } + + // 6. Execute post-wave interactive tasks (GC controller) + for (const task of interactiveTasks) { + if (task.status !== 'pending') continue + const deps = task.deps.split(';').filter(Boolean) + if (deps.some(d => failedIds.has(d) || skippedIds.has(d))) { + skippedIds.add(task.id) + continue + } + + // Spawn GC controller agent + const gcAgent = spawn_agent({ + message: ` +## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS (Agent Execute) +1. **Read role definition**: .codex/skills/team-brainstorm/agents/gc-controller.md (MUST read first) +2. Read: ${sessionFolder}/discoveries.ndjson (shared discoveries) + +--- + +Goal: Evaluate critique severity and decide revision vs convergence +Session: ${sessionFolder} +GC Round: ${gcRound} +Max GC Rounds: ${MAX_GC_ROUNDS} + +### Context +Read the latest critique file and determine the GC signal. +If REVISION_NEEDED and gcRound < maxRounds: output "REVISION" +If CONVERGED or gcRound >= maxRounds: output "CONVERGE" +` + }) + + const gcResult = wait({ ids: [gcAgent], timeout_ms: 120000 }) + if (gcResult.timed_out) { + send_input({ id: gcAgent, message: "Please finalize your decision now." }) + wait({ ids: [gcAgent], timeout_ms: 60000 }) + } + close_agent({ id: gcAgent }) + + // Parse GC decision and potentially create/skip revision tasks + Write(`${sessionFolder}/interactive/${task.id}-result.json`, JSON.stringify({ + task_id: task.id, status: "completed", + gc_decision: gcDecision, gc_round: gcRound, + timestamp: getUtc8ISOString() + })) + + if (gcDecision === "CONVERGE") { + // Skip remaining GC tasks, mark revision tasks as skipped + // Unblock SYNTH directly + } else { + gcRound++ + // Let the revision wave proceed naturally + } + + updateMasterCsvRow(sessionFolder, task.id, { status: 'completed', findings: `GC decision: ${gcDecision}` }) + } +} +``` + +**Success Criteria**: +- All waves executed in order +- Both csv-wave and interactive tasks handled per wave +- Each wave's results merged into master CSV before next wave starts +- Dependent tasks skipped when predecessor failed +- discoveries.ndjson accumulated across all waves and mechanisms +- GC loop controlled with max 2 rounds + +--- + +### Phase 3: Post-Wave Interactive + +**Objective**: Handle any final GC loop convergence and prepare for synthesis. + +If the pipeline used GC loops and the final GC decision was CONVERGE or max rounds reached, ensure SYNTH-001 is unblocked and all remaining GC-related tasks are properly marked. + +**Success Criteria**: +- Post-wave interactive processing complete +- Interactive agents closed, results stored + +--- + +### Phase 4: Results Aggregation + +**Objective**: Generate final results and human-readable report. + +```javascript +const masterCsv = Read(`${sessionFolder}/tasks.csv`) +Write(`${sessionFolder}/results.csv`, masterCsv) + +const tasks = parseCsv(masterCsv) +const completed = tasks.filter(t => t.status === 'completed') +const failed = tasks.filter(t => t.status === 'failed') +const skipped = tasks.filter(t => t.status === 'skipped') + +const contextContent = `# Team Brainstorm Report + +**Session**: ${sessionId} +**Topic**: ${topic} +**Pipeline**: ${pipeline_mode} +**Completed**: ${getUtc8ISOString()} + +--- + +## Summary + +| Metric | Count | +|--------|-------| +| Total Tasks | ${tasks.length} | +| Completed | ${completed.length} | +| Failed | ${failed.length} | +| Skipped | ${skipped.length} | +| GC Rounds | ${gcRound} | + +--- + +## Wave Execution + +${waveDetails} + +--- + +## Task Details + +${taskDetails} + +--- + +## Brainstorm Artifacts + +- Ideas: discoveries with type "idea" in discoveries.ndjson +- Critiques: discoveries with type "critique" in discoveries.ndjson +- Synthesis: discoveries with type "synthesis" in discoveries.ndjson +- Evaluation: discoveries with type "evaluation" in discoveries.ndjson +` + +Write(`${sessionFolder}/context.md`, contextContent) +``` + +If not AUTO_YES and there are failed tasks, offer retry or view report. + +**Success Criteria**: +- results.csv exported (all tasks, both modes) +- context.md generated +- All interactive agents closed +- Summary displayed to user + +--- + +## Shared Discovery Board Protocol + +All agents across all waves share `discoveries.ndjson`. This enables cross-role knowledge sharing. + +**Discovery Types**: + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `idea` | `data.title` | `{title, angle, description, assumption, impact}` | Generated idea | +| `critique` | `data.idea_title` | `{idea_title, dimension, severity, challenge, rationale}` | Critique of an idea | +| `theme` | `data.name` | `{name, strength, supporting_ideas[]}` | Extracted theme from synthesis | +| `proposal` | `data.title` | `{title, source_ideas[], feasibility, innovation, description}` | Integrated proposal | +| `evaluation` | `data.proposal_title` | `{proposal_title, weighted_score, rank, recommendation}` | Proposal evaluation | +| `gc_decision` | `data.round` | `{round, signal, severity_counts}` | GC loop decision | + +**Format**: NDJSON, each line is self-contained JSON: + +```jsonl +{"ts":"2026-03-08T10:00:00+08:00","worker":"IDEA-001","type":"idea","data":{"title":"API Gateway Pattern","angle":"Technical","description":"Centralized API gateway for microservice routing","assumption":"Services need unified entry point","impact":"Simplifies client integration"}} +{"ts":"2026-03-08T10:05:00+08:00","worker":"CHALLENGE-001","type":"critique","data":{"idea_title":"API Gateway Pattern","dimension":"feasibility","severity":"MEDIUM","challenge":"Single point of failure","rationale":"Requires high availability design"}} +``` + +**Protocol Rules**: +1. Read board before own work → leverage existing context +2. Write discoveries immediately via `echo >>` → don't batch +3. Deduplicate — check existing entries by type + dedup key +4. Append-only — never modify or delete existing lines + +--- + +## Consensus Severity Routing + +When the challenger returns critique results with severity-graded verdicts: + +| Severity | Action | +|----------|--------| +| HIGH | Trigger revision round (GC loop), max 2 rounds total | +| MEDIUM | Log warning, continue pipeline | +| LOW | Treat as consensus reached | + +**Constraints**: Max 2 GC rounds (revision cycles). If still HIGH after 2 rounds, force convergence to synthesizer. + +--- + +## Error Handling + +| Error | Resolution | +|-------|------------| +| Circular dependency | Detect in wave computation, abort with error message | +| CSV agent timeout | Mark as failed in results, continue with wave | +| CSV agent failed | Mark as failed, skip dependent tasks in later waves | +| Interactive agent timeout | Urge convergence via send_input, then close if still timed out | +| Interactive agent failed | Mark as failed, skip dependents | +| All agents in wave failed | Log error, offer retry or abort | +| CSV parse error | Validate CSV format before execution, show line number | +| discoveries.ndjson corrupt | Ignore malformed lines, continue with valid entries | +| GC loop exceeds 2 rounds | Force convergence to synthesizer | +| No ideas generated | Report failure, suggest refining topic | +| Continue mode: no session found | List available sessions, prompt user to select | + +--- + +## Core Rules + +1. **Start Immediately**: First action is session initialization, then Phase 0/1 +2. **Wave Order is Sacred**: Never execute wave N before wave N-1 completes and results are merged +3. **CSV is Source of Truth**: Master tasks.csv holds all state (both csv-wave and interactive) +4. **CSV First**: Default to csv-wave for tasks; only use interactive when interaction pattern requires it +5. **Context Propagation**: prev_context built from master CSV, not from memory +6. **Discovery Board is Append-Only**: Never clear, modify, or recreate discoveries.ndjson — both mechanisms share it +7. **Skip on Failure**: If a dependency failed, skip the dependent task (regardless of mechanism) +8. **Lifecycle Balance**: Every spawn_agent MUST have a matching close_agent +9. **Cleanup Temp Files**: Remove wave-{N}.csv after results are merged +10. **DO NOT STOP**: Continuous execution until all waves complete or all remaining tasks are skipped diff --git a/.codex/skills/team-brainstorm/agents/gc-controller.md b/.codex/skills/team-brainstorm/agents/gc-controller.md new file mode 100644 index 00000000..4621544a --- /dev/null +++ b/.codex/skills/team-brainstorm/agents/gc-controller.md @@ -0,0 +1,122 @@ +# GC Controller Agent + +Evaluate Generator-Critic loop severity and decide whether to trigger revision or converge to synthesis. + +## Identity + +- **Type**: `interactive` +- **Responsibility**: GC loop decision making + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Read the latest critique file to assess severity +- Make a binary decision: REVISION or CONVERGE +- Respect max GC round limits +- Produce structured output following template + +### MUST NOT + +- Generate ideas or perform critique (delegate to csv-wave agents) +- Exceed 1 decision per invocation +- Ignore the max round constraint + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | builtin | Load critique artifacts and session state | +| `Glob` | builtin | Find critique files in session directory | + +--- + +## Execution + +### Phase 1: Context Loading + +**Objective**: Load critique results and GC round state + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Session folder | Yes | Path to session directory | +| GC Round | Yes | Current GC round number | +| Max GC Rounds | Yes | Maximum allowed rounds (default: 2) | + +**Steps**: + +1. Read the session's discoveries.ndjson for critique entries +2. Parse prev_context for the challenger's findings +3. Extract severity counts from the challenger's severity_summary +4. Load current gc_round from spawn message + +**Output**: Severity counts and round state loaded + +--- + +### Phase 2: Decision Making + +**Objective**: Determine whether to trigger revision or converge + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Severity counts | Yes | CRITICAL, HIGH, MEDIUM, LOW counts | +| GC round | Yes | Current round number | +| Max rounds | Yes | Maximum allowed rounds | + +**Steps**: + +1. Check severity threshold: + +| Condition | Decision | +|-----------|----------| +| gc_round >= max_rounds | CONVERGE (force, regardless of severity) | +| CRITICAL count > 0 | REVISION (if rounds remain) | +| HIGH count > 0 | REVISION (if rounds remain) | +| All MEDIUM or lower | CONVERGE | + +2. Log the decision rationale + +**Output**: Decision string "REVISION" or "CONVERGE" + +--- + +## Structured Output Template + +``` +## Summary +- GC Round: / +- Decision: REVISION | CONVERGE + +## Severity Assessment +- CRITICAL: +- HIGH: +- MEDIUM: +- LOW: + +## Rationale +- <1-2 sentence explanation of decision> + +## Next Action +- REVISION: Ideator should address HIGH/CRITICAL challenges in next round +- CONVERGE: Proceed to synthesis phase, skip remaining revision tasks +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| No critique data found | Default to CONVERGE (no evidence for revision) | +| Severity parsing fails | Default to CONVERGE with warning | +| Timeout approaching | Output current decision immediately | diff --git a/.codex/skills/team-brainstorm/agents/topic-clarifier.md b/.codex/skills/team-brainstorm/agents/topic-clarifier.md new file mode 100644 index 00000000..3ef3b0ca --- /dev/null +++ b/.codex/skills/team-brainstorm/agents/topic-clarifier.md @@ -0,0 +1,126 @@ +# Topic Clarifier Agent + +Assess brainstorming topic complexity, recommend pipeline mode, and suggest divergence angles. + +## Identity + +- **Type**: `interactive` +- **Responsibility**: Topic analysis and pipeline selection + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Perform text-level analysis only (no source code reading) +- Produce structured output with pipeline recommendation +- Suggest meaningful divergence angles for ideation + +### MUST NOT + +- Read source code or explore codebase +- Generate ideas (that is the ideator's job) +- Make final pipeline decisions (orchestrator confirms with user) + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | builtin | Load project context if available | + +--- + +## Execution + +### Phase 1: Signal Detection + +**Objective**: Analyze topic keywords for complexity signals + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Topic text | Yes | The brainstorming topic from user | + +**Steps**: + +1. Scan topic for complexity signals: + +| Signal | Weight | Keywords | +|--------|--------|----------| +| Strategic/systemic | +3 | strategy, architecture, system, framework, paradigm | +| Multi-dimensional | +2 | multiple, compare, tradeoff, versus, alternative | +| Innovation-focused | +2 | innovative, creative, novel, breakthrough | +| Simple/basic | -2 | simple, quick, straightforward, basic | + +2. Calculate complexity score + +**Output**: Complexity score and matched signals + +--- + +### Phase 2: Pipeline Recommendation + +**Objective**: Map complexity to pipeline mode and suggest angles + +**Steps**: + +1. Map score to pipeline: + +| Score | Complexity | Pipeline | +|-------|------------|----------| +| >= 4 | High | full (3x parallel ideation + GC + evaluation) | +| 2-3 | Medium | deep (serial with GC loop + evaluation) | +| 0-1 | Low | quick (generate → challenge → synthesize) | + +2. Identify divergence angles from topic context: + - **Technical**: Implementation approaches, architecture patterns + - **Product**: User experience, market fit, value proposition + - **Innovation**: Novel approaches, emerging tech, disruption potential + - **Risk**: Failure modes, mitigation strategies, worst cases + - **Business**: Cost, ROI, competitive advantage + - **Organizational**: Team structure, process, culture + +3. Select 3-4 most relevant angles based on topic keywords + +**Output**: Pipeline mode, angles, complexity rationale + +--- + +## Structured Output Template + +``` +## Summary +- Topic: +- Complexity Score: () +- Recommended Pipeline: + +## Signal Detection +- Matched signals: + +## Suggested Angles +1. : +2. : +3. : + +## Pipeline Details +- : +- Expected tasks: +- Parallel ideation: +- GC rounds: <0/1/2> +- Evaluation: +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| Topic too vague | Suggest clarifying questions in output | +| No signal matches | Default to "deep" pipeline with general angles | +| Timeout approaching | Output current analysis with "PARTIAL" status | diff --git a/.codex/skills/team-brainstorm/instructions/agent-instruction.md b/.codex/skills/team-brainstorm/instructions/agent-instruction.md new file mode 100644 index 00000000..81c5decb --- /dev/null +++ b/.codex/skills/team-brainstorm/instructions/agent-instruction.md @@ -0,0 +1,105 @@ +## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS +1. Read shared discoveries: .workflow/.csv-wave/{session-id}/discoveries.ndjson (if exists, skip if not) +2. Read project context: .workflow/project-tech.json (if exists) + +--- + +## Your Task + +**Task ID**: {id} +**Title**: {title} +**Role**: {role} +**Description**: {description} +**Angle(s)**: {angle} +**GC Round**: {gc_round} + +### Previous Tasks' Findings (Context) +{prev_context} + +--- + +## Execution Protocol + +1. **Read discoveries**: Load shared discoveries from the session's discoveries.ndjson for cross-task context +2. **Use context**: Apply previous tasks' findings from prev_context above +3. **Execute by role**: + +### Role: ideator (IDEA-* tasks) +- **Initial Generation** (gc_round = 0): + - For each angle listed in the Angle(s) field, generate 3+ ideas + - Each idea must include: title, description (2-3 sentences), key assumption, potential impact, implementation hint + - Self-review: ensure >= 6 ideas total, no duplicates, all angles covered +- **GC Revision** (gc_round > 0): + - Read critique findings from prev_context + - Focus on HIGH/CRITICAL severity challenges + - Retain unchallenged ideas intact + - Revise challenged ideas with revision rationale + - Replace unsalvageable ideas with new alternatives + +### Role: challenger (CHALLENGE-* tasks) +- Read all idea findings from prev_context +- Challenge each idea across 4 dimensions: + - **Assumption Validity**: Does the core assumption hold? Counter-examples? + - **Feasibility**: Technical/resource/time feasibility? + - **Risk Assessment**: Worst case scenario? Hidden risks? + - **Competitive Analysis**: Better alternatives already exist? +- Assign severity per idea: CRITICAL / HIGH / MEDIUM / LOW +- Determine GC signal: + - Any CRITICAL or HIGH severity → `REVISION_NEEDED` + - All MEDIUM or lower → `CONVERGED` + +### Role: synthesizer (SYNTH-* tasks) +- Read all idea and critique findings from prev_context +- Execute synthesis steps: + 1. **Theme Extraction**: Identify common themes, rate strength (1-10), list supporting ideas + 2. **Conflict Resolution**: Identify contradictions, determine resolution approach + 3. **Complementary Grouping**: Group complementary ideas together + 4. **Gap Identification**: Discover uncovered perspectives + 5. **Integrated Proposals**: Generate 1-3 consolidated proposals with feasibility score (1-10) and innovation score (1-10) + +### Role: evaluator (EVAL-* tasks) +- Read synthesis findings from prev_context +- Score each proposal across 4 weighted dimensions: + - Feasibility (30%): Technical feasibility, resource needs, timeline + - Innovation (25%): Novelty, differentiation, breakthrough potential + - Impact (25%): Scope of impact, value creation, problem resolution + - Cost Efficiency (20%): Implementation cost, risk cost, opportunity cost +- Weighted score = (Feasibility * 0.30) + (Innovation * 0.25) + (Impact * 0.25) + (Cost * 0.20) +- Provide recommendation per proposal: Strong Recommend / Recommend / Consider / Pass +- Generate final ranking + +4. **Share discoveries**: Append exploration findings to shared board: + ```bash + echo '{"ts":"","worker":"{id}","type":"","data":{...}}' >> .workflow/.csv-wave/{session-id}/discoveries.ndjson + ``` + + Discovery types to share: + - `idea`: {title, angle, description, assumption, impact} — generated idea + - `critique`: {idea_title, dimension, severity, challenge, rationale} — critique finding + - `theme`: {name, strength, supporting_ideas[]} — extracted theme + - `proposal`: {title, source_ideas[], feasibility, innovation, description} — integrated proposal + - `evaluation`: {proposal_title, weighted_score, rank, recommendation} — scored proposal + +5. **Report result**: Return JSON via report_agent_job_result + +--- + +## Output (report_agent_job_result) + +Return JSON: +{ + "id": "{id}", + "status": "completed" | "failed", + "findings": "Key discoveries and implementation notes (max 500 chars)", + "gc_signal": "REVISION_NEEDED | CONVERGED | (empty for non-challenger roles)", + "severity_summary": "CRITICAL:N HIGH:N MEDIUM:N LOW:N (challenger only, empty for others)", + "error": "" +} + +**Role-specific findings guidance**: +- **ideator**: List idea count, angles covered, key themes. Example: "Generated 8 ideas across Technical, Product, Innovation. Top ideas: API Gateway, Event Sourcing, DevEx Platform." +- **challenger**: Summarize severity counts and GC signal. Example: "Challenged 8 ideas. 2 HIGH (require revision), 3 MEDIUM, 3 LOW. GC signal: REVISION_NEEDED." +- **synthesizer**: List proposal count and key themes. Example: "Synthesized 3 proposals from 5 themes. Top: Infrastructure Modernization (feasibility:8, innovation:7)." +- **evaluator**: List ranking and top recommendation. Example: "Ranked 3 proposals. #1: Infrastructure Modernization (7.85) - Strong Recommend." diff --git a/.codex/skills/team-brainstorm/schemas/tasks-schema.md b/.codex/skills/team-brainstorm/schemas/tasks-schema.md new file mode 100644 index 00000000..dd50b143 --- /dev/null +++ b/.codex/skills/team-brainstorm/schemas/tasks-schema.md @@ -0,0 +1,171 @@ +# Team Brainstorm — CSV Schema + +## Master CSV: tasks.csv + +### Column Definitions + +#### Input Columns (Set by Decomposer) + +| Column | Type | Required | Description | Example | +|--------|------|----------|-------------|---------| +| `id` | string | Yes | Unique task identifier | `"IDEA-001"` | +| `title` | string | Yes | Short task title | `"Multi-angle idea generation"` | +| `description` | string | Yes | Detailed task description (self-contained) | `"Generate 3+ ideas per angle..."` | +| `role` | string | Yes | Worker role: ideator, challenger, synthesizer, evaluator | `"ideator"` | +| `angle` | string | No | Brainstorming angle(s) for ideator tasks (semicolon-separated) | `"Technical;Product;Innovation"` | +| `gc_round` | integer | Yes | Generator-Critic round number (0 = initial, 1+ = revision) | `"0"` | +| `deps` | string | No | Semicolon-separated dependency task IDs | `"IDEA-001"` | +| `context_from` | string | No | Semicolon-separated task IDs for context | `"IDEA-001"` | +| `exec_mode` | enum | Yes | Execution mechanism: `csv-wave` or `interactive` | `"csv-wave"` | + +#### Computed Columns (Set by Wave Engine) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `wave` | integer | Wave number (1-based, from topological sort) | `2` | +| `prev_context` | string | Aggregated findings from context_from tasks (per-wave CSV only) | `"[Task IDEA-001] Generated 8 ideas..."` | + +#### Output Columns (Set by Agent) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `status` | enum | `pending` → `completed` / `failed` / `skipped` | `"completed"` | +| `findings` | string | Key discoveries (max 500 chars) | `"Generated 8 ideas across 3 angles..."` | +| `gc_signal` | string | Generator-Critic signal (challenger only): `REVISION_NEEDED` or `CONVERGED` | `"REVISION_NEEDED"` | +| `severity_summary` | string | Severity count summary (challenger only) | `"CRITICAL:0 HIGH:2 MEDIUM:3 LOW:1"` | +| `error` | string | Error message if failed | `""` | + +--- + +### exec_mode Values + +| Value | Mechanism | Description | +|-------|-----------|-------------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot batch execution within wave | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round individual execution | + +Interactive tasks appear in master CSV for dependency tracking but are NOT included in wave-{N}.csv files. + +--- + +### Example Data + +```csv +id,title,description,role,angle,gc_round,deps,context_from,exec_mode,wave,status,findings,gc_signal,severity_summary,error +"IDEA-001","Multi-angle idea generation","Generate 3+ ideas per angle with title, description, assumption, and potential impact. Cover all assigned angles comprehensively.","ideator","Technical;Product;Innovation","0","","","csv-wave","1","pending","","","","" +"IDEA-002","Parallel angle generation (Risk)","Generate 3+ ideas focused on Risk angle with title, description, assumption, and potential impact.","ideator","Risk","0","","","csv-wave","1","pending","","","","" +"CHALLENGE-001","Critique generated ideas","Read all idea artifacts. Challenge each idea across assumption validity, feasibility, risk, and competition dimensions. Assign severity (CRITICAL/HIGH/MEDIUM/LOW) per idea. Output GC signal.","challenger","","0","IDEA-001;IDEA-002","IDEA-001;IDEA-002","csv-wave","2","pending","","","","" +"GC-CHECK-001","GC loop decision","Evaluate critique severity counts. If any HIGH/CRITICAL: REVISION_NEEDED. Else: CONVERGED.","gc-controller","","1","CHALLENGE-001","CHALLENGE-001","interactive","3","pending","","","","" +"IDEA-003","Revise ideas (GC Round 1)","Address HIGH/CRITICAL challenges from critique. Retain unchallenged ideas intact. Replace unsalvageable ideas.","ideator","","1","GC-CHECK-001","CHALLENGE-001","csv-wave","4","pending","","","","" +"SYNTH-001","Synthesize proposals","Extract themes from ideas and critiques. Resolve conflicts. Generate 1-3 integrated proposals with feasibility and innovation scores.","synthesizer","","0","IDEA-003","IDEA-001;IDEA-002;IDEA-003;CHALLENGE-001","csv-wave","5","pending","","","","" +"EVAL-001","Score and rank proposals","Score each proposal: Feasibility 30%, Innovation 25%, Impact 25%, Cost 20%. Generate final ranking and recommendation.","evaluator","","0","SYNTH-001","SYNTH-001","csv-wave","6","pending","","","","" +``` + +--- + +### Column Lifecycle + +``` +Decomposer (Phase 1) Wave Engine (Phase 2) Agent (Execution) +───────────────────── ──────────────────── ───────────────── +id ───────────► id ──────────► id +title ───────────► title ──────────► (reads) +description ───────────► description ──────────► (reads) +role ───────────► role ──────────► (reads) +angle ───────────► angle ──────────► (reads) +gc_round ───────────► gc_round ──────────► (reads) +deps ───────────► deps ──────────► (reads) +context_from───────────► context_from──────────► (reads) +exec_mode ───────────► exec_mode ──────────► (reads) + wave ──────────► (reads) + prev_context ──────────► (reads) + status + findings + gc_signal + severity_summary + error +``` + +--- + +## Output Schema (JSON) + +Agent output via `report_agent_job_result` (csv-wave tasks): + +```json +{ + "id": "IDEA-001", + "status": "completed", + "findings": "Generated 8 ideas across Technical, Product, Innovation angles. Key themes: API gateway pattern, event-driven architecture, developer experience tools.", + "gc_signal": "", + "severity_summary": "", + "error": "" +} +``` + +Challenger-specific output: + +```json +{ + "id": "CHALLENGE-001", + "status": "completed", + "findings": "Challenged 8 ideas. 2 HIGH severity (require revision), 3 MEDIUM, 3 LOW.", + "gc_signal": "REVISION_NEEDED", + "severity_summary": "CRITICAL:0 HIGH:2 MEDIUM:3 LOW:3", + "error": "" +} +``` + +Interactive tasks output via structured text or JSON written to `interactive/{id}-result.json`. + +--- + +## Discovery Types + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `idea` | `data.title` | `{title, angle, description, assumption, impact}` | Generated brainstorm idea | +| `critique` | `data.idea_title` | `{idea_title, dimension, severity, challenge, rationale}` | Critique of an idea | +| `theme` | `data.name` | `{name, strength, supporting_ideas[]}` | Extracted theme from synthesis | +| `proposal` | `data.title` | `{title, source_ideas[], feasibility, innovation, description}` | Integrated proposal | +| `evaluation` | `data.proposal_title` | `{proposal_title, weighted_score, rank, recommendation}` | Scored proposal | +| `gc_decision` | `data.round` | `{round, signal, severity_counts}` | GC loop decision record | + +### Discovery NDJSON Format + +```jsonl +{"ts":"2026-03-08T10:00:00+08:00","worker":"IDEA-001","type":"idea","data":{"title":"API Gateway Pattern","angle":"Technical","description":"Centralized API gateway for microservice routing","assumption":"Services need unified entry point","impact":"Simplifies client integration"}} +{"ts":"2026-03-08T10:01:00+08:00","worker":"IDEA-001","type":"idea","data":{"title":"Event Sourcing Migration","angle":"Technical","description":"Adopt event sourcing for service state management","assumption":"Current state is hard to trace across services","impact":"Full audit trail and temporal queries"}} +{"ts":"2026-03-08T10:05:00+08:00","worker":"CHALLENGE-001","type":"critique","data":{"idea_title":"API Gateway Pattern","dimension":"feasibility","severity":"MEDIUM","challenge":"Single point of failure risk","rationale":"Requires HA design with circuit breakers"}} +{"ts":"2026-03-08T10:10:00+08:00","worker":"SYNTH-001","type":"theme","data":{"name":"Infrastructure Modernization","strength":8,"supporting_ideas":["API Gateway Pattern","Event Sourcing Migration"]}} +``` + +> Both csv-wave and interactive agents read/write the same discoveries.ndjson file. + +--- + +## Cross-Mechanism Context Flow + +| Source | Target | Mechanism | +|--------|--------|-----------| +| CSV task findings | Interactive task | Injected via spawn message or send_input | +| Interactive task result | CSV task prev_context | Read from interactive/{id}-result.json | +| Any agent discovery | Any agent | Shared via discoveries.ndjson | + +--- + +## Validation Rules + +| Rule | Check | Error | +|------|-------|-------| +| Unique IDs | No duplicate `id` values | "Duplicate task ID: {id}" | +| Valid deps | All dep IDs exist in tasks | "Unknown dependency: {dep_id}" | +| No self-deps | Task cannot depend on itself | "Self-dependency: {id}" | +| No circular deps | Topological sort completes | "Circular dependency detected involving: {ids}" | +| context_from valid | All context IDs exist and in earlier waves | "Invalid context_from: {id}" | +| exec_mode valid | Value is `csv-wave` or `interactive` | "Invalid exec_mode: {value}" | +| Description non-empty | Every task has description | "Empty description for task: {id}" | +| Status enum | status in {pending, completed, failed, skipped} | "Invalid status: {status}" | +| Valid role | role in {ideator, challenger, synthesizer, evaluator, gc-controller} | "Invalid role: {role}" | +| GC round non-negative | gc_round >= 0 | "Invalid gc_round: {value}" | +| Cross-mechanism deps | Interactive→CSV deps resolve correctly | "Cross-mechanism dependency unresolvable: {id}" | diff --git a/.codex/skills/team-coordinate/SKILL.md b/.codex/skills/team-coordinate/SKILL.md new file mode 100644 index 00000000..07cffded --- /dev/null +++ b/.codex/skills/team-coordinate/SKILL.md @@ -0,0 +1,629 @@ +--- +name: team-coordinate +description: Universal team coordination skill with dynamic role generation. Analyzes task, generates worker roles at runtime, decomposes into CSV tasks with dependency waves, dispatches parallel CSV agents per wave. Coordinator is orchestrator; all workers are CSV or interactive agents with dynamically generated instructions. +argument-hint: "[-y|--yes] [-c|--concurrency N] [--continue] \"task description\"" +allowed-tools: spawn_agents_on_csv, spawn_agent, wait, send_input, close_agent, Read, Write, Edit, Bash, Glob, Grep, AskUserQuestion +--- + +## Auto Mode + +When `--yes` or `-y`: Auto-confirm task decomposition, skip interactive validation, use defaults. + +# Team Coordinate + +## Usage + +```bash +$team-coordinate "Implement user authentication with JWT tokens" +$team-coordinate -c 4 "Refactor payment module and write API documentation" +$team-coordinate -y "Analyze codebase security and fix vulnerabilities" +$team-coordinate --continue "tc-auth-jwt-20260308" +``` + +**Flags**: +- `-y, --yes`: Skip all confirmations (auto mode) +- `-c, --concurrency N`: Max concurrent agents within each wave (default: 3) +- `--continue`: Resume existing session + +**Output Directory**: `.workflow/.csv-wave/{session-id}/` +**Core Output**: `tasks.csv` (master state) + `results.csv` (final) + `discoveries.ndjson` (shared exploration) + `context.md` (human-readable report) + +--- + +## Overview + +Universal team coordination: analyze task -> detect capabilities -> generate dynamic role instructions -> decompose into dependency-ordered CSV tasks -> execute wave-by-wave -> deliver results. Only the **coordinator** (this orchestrator) is built-in. All worker roles are **dynamically generated** as CSV agent instructions at runtime. + +**Execution Model**: Hybrid -- CSV wave pipeline (primary) + individual agent spawn (secondary) + +``` ++-------------------------------------------------------------------+ +| TEAM COORDINATE WORKFLOW | ++-------------------------------------------------------------------+ +| | +| Phase 0: Pre-Wave Interactive (Requirement Clarification) | +| +- Parse user task description | +| +- Clarify ambiguous requirements (AskUserQuestion) | +| +- Output: refined requirements for decomposition | +| | +| Phase 1: Requirement -> CSV + Classification | +| +- Signal detection: keyword scan -> capability inference | +| +- Dependency graph construction (DAG) | +| +- Role minimization (cap at 5 roles) | +| +- Classify tasks: csv-wave | interactive (exec_mode) | +| +- Compute dependency waves (topological sort) | +| +- Generate tasks.csv with wave + exec_mode columns | +| +- Generate per-role agent instructions dynamically | +| +- User validates task breakdown (skip if -y) | +| | +| Phase 2: Wave Execution Engine (Extended) | +| +- For each wave (1..N): | +| | +- Execute pre-wave interactive tasks (if any) | +| | +- Build wave CSV (filter csv-wave tasks for this wave) | +| | +- Inject previous findings into prev_context column | +| | +- spawn_agents_on_csv(wave CSV) | +| | +- Execute post-wave interactive tasks (if any) | +| | +- Merge all results into master tasks.csv | +| | +- Check: any failed? -> skip dependents | +| +- discoveries.ndjson shared across all modes (append-only) | +| | +| Phase 3: Post-Wave Interactive (Completion Action) | +| +- Pipeline completion report | +| +- Interactive completion choice (Archive/Keep/Export) | +| +- Final aggregation / report | +| | +| Phase 4: Results Aggregation | +| +- Export final results.csv | +| +- Generate context.md with all findings | +| +- Display summary: completed/failed/skipped per wave | +| +- Offer: view results | retry failed | done | +| | ++-------------------------------------------------------------------+ +``` + +--- + +## Task Classification Rules + +Each task is classified by `exec_mode`: + +| exec_mode | Mechanism | Criteria | +|-----------|-----------|----------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot, structured I/O, no multi-round interaction | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round, needs clarification, revision cycles | + +**Classification Decision**: + +| Task Property | Classification | +|---------------|---------------| +| Single-pass code implementation | `csv-wave` | +| Single-pass analysis or documentation | `csv-wave` | +| Research with defined scope | `csv-wave` | +| Testing with known targets | `csv-wave` | +| Design requiring iterative refinement | `interactive` | +| Plan requiring user approval checkpoint | `interactive` | +| Revision cycle (fix-verify loop) | `interactive` | + +--- + +## CSV Schema + +### tasks.csv (Master State) + +```csv +id,title,description,role,responsibility_type,output_type,deps,context_from,exec_mode,wave,status,findings,artifacts_produced,error +"RESEARCH-001","Investigate auth patterns","Research JWT authentication patterns and best practices","researcher","orchestration","artifact","","","csv-wave","1","pending","","","" +"IMPL-001","Implement auth module","Build JWT authentication middleware","developer","code-gen","codebase","RESEARCH-001","RESEARCH-001","csv-wave","2","pending","","","" +"TEST-001","Validate auth implementation","Write and run tests for auth module","tester","validation","artifact","IMPL-001","IMPL-001","csv-wave","3","pending","","","" +``` + +**Columns**: + +| Column | Phase | Description | +|--------|-------|-------------| +| `id` | Input | Unique task identifier (PREFIX-NNN format) | +| `title` | Input | Short task title | +| `description` | Input | Detailed task description with goal, steps, success criteria | +| `role` | Input | Dynamic role name (researcher, developer, analyst, etc.) | +| `responsibility_type` | Input | `orchestration`, `read-only`, `code-gen`, `code-gen-docs`, `validation` | +| `output_type` | Input | `artifact` (session files), `codebase` (project files), `mixed` | +| `deps` | Input | Semicolon-separated dependency task IDs | +| `context_from` | Input | Semicolon-separated task IDs whose findings this task needs | +| `exec_mode` | Input | `csv-wave` or `interactive` | +| `wave` | Computed | Wave number (computed by topological sort, 1-based) | +| `status` | Output | `pending` -> `completed` / `failed` / `skipped` | +| `findings` | Output | Key discoveries or implementation notes (max 500 chars) | +| `artifacts_produced` | Output | Semicolon-separated paths of produced artifacts | +| `error` | Output | Error message if failed (empty if success) | + +### Per-Wave CSV (Temporary) + +Each wave generates a temporary `wave-{N}.csv` with extra `prev_context` column (csv-wave tasks only). + +--- + +## Agent Registry (Interactive Agents) + +| Agent | Role File | Pattern | Responsibility | Position | +|-------|-----------|---------|----------------|----------| +| Plan Reviewer | agents/plan-reviewer.md | 2.3 (send_input cycle) | Review and approve plans before execution waves | pre-wave | +| Completion Handler | agents/completion-handler.md | 2.3 (send_input cycle) | Handle pipeline completion action (Archive/Keep/Export) | standalone | + +> **COMPACT PROTECTION**: Agent files are execution documents. When context compression occurs, **you MUST immediately `Read` the corresponding agent.md** to reload. + +--- + +## Output Artifacts + +| File | Purpose | Lifecycle | +|------|---------|-----------| +| `tasks.csv` | Master state -- all tasks with status/findings | Updated after each wave | +| `wave-{N}.csv` | Per-wave input (temporary, csv-wave tasks only) | Created before wave, deleted after | +| `results.csv` | Final export of all task results | Created in Phase 4 | +| `discoveries.ndjson` | Shared exploration board (all agents, both modes) | Append-only, carries across waves | +| `context.md` | Human-readable execution report | Created in Phase 4 | +| `task-analysis.json` | Phase 0/1 output: capabilities, dependency graph, roles | Created in Phase 1 | +| `role-instructions/` | Dynamically generated per-role instruction templates | Created in Phase 1 | +| `interactive/{id}-result.json` | Results from interactive tasks | Created per interactive task | + +--- + +## Session Structure + +``` +.workflow/.csv-wave/{session-id}/ ++-- tasks.csv # Master state (all tasks, both modes) ++-- results.csv # Final results export ++-- discoveries.ndjson # Shared discovery board (all agents) ++-- context.md # Human-readable report ++-- task-analysis.json # Phase 1 analysis output ++-- wave-{N}.csv # Temporary per-wave input (csv-wave only) ++-- role-instructions/ # Dynamically generated instruction templates +| +-- researcher.md +| +-- developer.md +| +-- ... ++-- artifacts/ # All deliverables from workers +| +-- research-findings.md +| +-- implementation-summary.md +| +-- ... ++-- interactive/ # Interactive task artifacts +| +-- {id}-result.json ++-- wisdom/ # Cross-task knowledge + +-- learnings.md + +-- decisions.md +``` + +--- + +## Implementation + +### Session Initialization + +```javascript +const getUtc8ISOString = () => new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString() + +const AUTO_YES = $ARGUMENTS.includes('--yes') || $ARGUMENTS.includes('-y') +const continueMode = $ARGUMENTS.includes('--continue') +const concurrencyMatch = $ARGUMENTS.match(/(?:--concurrency|-c)\s+(\d+)/) +const maxConcurrency = concurrencyMatch ? parseInt(concurrencyMatch[1]) : 3 + +const requirement = $ARGUMENTS + .replace(/--yes|-y|--continue|--concurrency\s+\d+|-c\s+\d+/g, '') + .trim() + +const slug = requirement.toLowerCase() + .replace(/[^a-z0-9\u4e00-\u9fa5]+/g, '-') + .substring(0, 40) +const dateStr = getUtc8ISOString().substring(0, 10).replace(/-/g, '') +const sessionId = `tc-${slug}-${dateStr}` +const sessionFolder = `.workflow/.csv-wave/${sessionId}` + +Bash(`mkdir -p ${sessionFolder}/artifacts ${sessionFolder}/role-instructions ${sessionFolder}/interactive ${sessionFolder}/wisdom`) + +// Initialize discoveries.ndjson +Write(`${sessionFolder}/discoveries.ndjson`, '') + +// Initialize wisdom files +Write(`${sessionFolder}/wisdom/learnings.md`, '# Learnings\n') +Write(`${sessionFolder}/wisdom/decisions.md`, '# Decisions\n') +``` + +--- + +### Phase 0: Pre-Wave Interactive (Requirement Clarification) + +**Objective**: Parse user task, clarify ambiguities, prepare for decomposition. + +**Workflow**: + +1. **Parse user task description** from $ARGUMENTS + +2. **Check for existing sessions** (continue mode): + - Scan `.workflow/.csv-wave/tc-*/tasks.csv` for sessions with pending tasks + - If `--continue`: resume the specified or most recent session, skip to Phase 2 + - If active session found: ask user whether to resume or start new + +3. **Clarify if ambiguous** (skip if AUTO_YES): + ```javascript + AskUserQuestion({ + questions: [{ + question: "Please confirm the task scope and deliverables:", + header: "Task Clarification", + multiSelect: false, + options: [ + { label: "Proceed as described", description: "Task is clear enough" }, + { label: "Narrow scope", description: "Specify files/modules/areas" }, + { label: "Add constraints", description: "Timeline, tech stack, style" } + ] + }] + }) + ``` + +4. **Output**: Refined requirement string for Phase 1 + +**Success Criteria**: +- Refined requirements available for Phase 1 decomposition +- Existing session detected and handled if applicable + +--- + +### Phase 1: Requirement -> CSV + Classification + +**Objective**: Analyze task, detect capabilities, build dependency graph, generate tasks.csv and role instructions. + +**Decomposition Rules**: + +1. **Signal Detection** -- scan task description for capability keywords: + +| Signal | Keywords | Capability | Prefix | Responsibility Type | +|--------|----------|------------|--------|---------------------| +| Research | investigate, explore, compare, survey, find, research, discover | researcher | RESEARCH | orchestration | +| Writing | write, draft, document, article, report, summarize | writer | DRAFT | code-gen-docs | +| Coding | implement, build, code, fix, refactor, develop, create, migrate | developer | IMPL | code-gen | +| Design | design, architect, plan, structure, blueprint, schema | designer | DESIGN | orchestration | +| Analysis | analyze, review, audit, assess, evaluate, inspect, diagnose | analyst | ANALYSIS | read-only | +| Testing | test, verify, validate, QA, quality, check, coverage | tester | TEST | validation | +| Planning | plan, breakdown, organize, schedule, decompose, roadmap | planner | PLAN | orchestration | + +2. **Dependency Graph** -- build DAG using natural ordering tiers: + +| Tier | Capabilities | Description | +|------|-------------|-------------| +| 0 | researcher, planner | Knowledge gathering / planning | +| 1 | designer | Design (requires tier 0 if present) | +| 2 | writer, developer | Creation (requires design/plan if present) | +| 3 | analyst, tester | Validation (requires artifacts to validate) | + +3. **Role Minimization** -- merge overlapping capabilities, cap at 5 roles + +4. **Key File Inference** -- extract nouns from task description, map to likely file paths + +5. **output_type derivation**: + +| Task Signal | output_type | +|-------------|-------------| +| "write report", "analyze", "research" | `artifact` | +| "update code", "modify", "fix bug" | `codebase` | +| "implement feature + write summary" | `mixed` | + +**Classification Rules**: + +| Task Property | exec_mode | +|---------------|-----------| +| Single-pass implementation/analysis/documentation | `csv-wave` | +| Needs iterative user approval | `interactive` | +| Fix-verify revision cycle | `interactive` | +| Standard research, coding, testing | `csv-wave` | + +**Wave Computation**: Kahn's BFS topological sort with depth tracking. + +```javascript +// After task analysis, generate dynamic role instruction templates +for (const role of analysisResult.roles) { + const instruction = generateRoleInstruction(role, sessionFolder) + Write(`${sessionFolder}/role-instructions/${role.name}.md`, instruction) +} + +// Generate tasks.csv from dependency graph +const tasks = buildTasksCsv(analysisResult) +Write(`${sessionFolder}/tasks.csv`, toCsv(tasks)) +Write(`${sessionFolder}/task-analysis.json`, JSON.stringify(analysisResult, null, 2)) +``` + +**User Validation**: Display task breakdown with wave + exec_mode assignment (skip if AUTO_YES). + +**Success Criteria**: +- tasks.csv created with valid schema, wave, and exec_mode assignments +- Role instruction templates generated in role-instructions/ +- task-analysis.json written +- No circular dependencies +- User approved (or AUTO_YES) + +--- + +### Phase 2: Wave Execution Engine (Extended) + +**Objective**: Execute tasks wave-by-wave with hybrid mechanism support and cross-wave context propagation. + +```javascript +const masterCsv = Read(`${sessionFolder}/tasks.csv`) +let tasks = parseCsv(masterCsv) +const maxWave = Math.max(...tasks.map(t => t.wave)) + +for (let wave = 1; wave <= maxWave; wave++) { + console.log(`\nWave ${wave}/${maxWave}`) + + // 1. Separate tasks by exec_mode + const waveTasks = tasks.filter(t => t.wave === wave && t.status === 'pending') + const csvTasks = waveTasks.filter(t => t.exec_mode === 'csv-wave') + const interactiveTasks = waveTasks.filter(t => t.exec_mode === 'interactive') + + // 2. Check dependencies -- skip tasks whose deps failed + for (const task of waveTasks) { + const depIds = (task.deps || '').split(';').filter(Boolean) + const depStatuses = depIds.map(id => tasks.find(t => t.id === id)?.status) + if (depStatuses.some(s => s === 'failed' || s === 'skipped')) { + task.status = 'skipped' + task.error = `Dependency failed: ${depIds.filter((id, i) => + ['failed','skipped'].includes(depStatuses[i])).join(', ')}` + } + } + + // 3. Execute pre-wave interactive tasks (e.g., plan approval) + const preWaveInteractive = interactiveTasks.filter(t => t.status === 'pending') + for (const task of preWaveInteractive) { + // Read agent definition + Read(`agents/plan-reviewer.md`) + + const agent = spawn_agent({ + message: `## TASK ASSIGNMENT\n\n### MANDATORY FIRST STEPS\n1. Read: ${sessionFolder}/discoveries.ndjson\n\nGoal: ${task.description}\nScope: ${task.title}\nSession: ${sessionFolder}\n\n### Previous Context\n${buildPrevContext(task, tasks)}` + }) + const result = wait({ ids: [agent], timeout_ms: 600000 }) + if (result.timed_out) { + send_input({ id: agent, message: "Please finalize and output current findings." }) + wait({ ids: [agent], timeout_ms: 120000 }) + } + Write(`${sessionFolder}/interactive/${task.id}-result.json`, JSON.stringify({ + task_id: task.id, status: "completed", findings: parseFindings(result), + timestamp: getUtc8ISOString() + })) + close_agent({ id: agent }) + task.status = 'completed' + task.findings = parseFindings(result) + } + + // 4. Build prev_context for csv-wave tasks + const pendingCsvTasks = csvTasks.filter(t => t.status === 'pending') + for (const task of pendingCsvTasks) { + task.prev_context = buildPrevContext(task, tasks) + } + + if (pendingCsvTasks.length > 0) { + // 5. Write wave CSV + Write(`${sessionFolder}/wave-${wave}.csv`, toCsv(pendingCsvTasks)) + + // 6. Determine instruction for this wave (use role-specific instruction) + // Group tasks by role, build combined instruction + const waveInstruction = buildWaveInstruction(pendingCsvTasks, sessionFolder, wave) + + // 7. Execute wave via spawn_agents_on_csv + spawn_agents_on_csv({ + csv_path: `${sessionFolder}/wave-${wave}.csv`, + id_column: "id", + instruction: waveInstruction, + max_concurrency: maxConcurrency, + max_runtime_seconds: 900, + output_csv_path: `${sessionFolder}/wave-${wave}-results.csv`, + output_schema: { + type: "object", + properties: { + id: { type: "string" }, + status: { type: "string", enum: ["completed", "failed"] }, + findings: { type: "string" }, + artifacts_produced: { type: "string" }, + error: { type: "string" } + } + } + }) + + // 8. Merge results into master CSV + const results = parseCsv(Read(`${sessionFolder}/wave-${wave}-results.csv`)) + for (const r of results) { + const t = tasks.find(t => t.id === r.id) + if (t) Object.assign(t, r) + } + } + + // 9. Update master CSV + Write(`${sessionFolder}/tasks.csv`, toCsv(tasks)) + + // 10. Cleanup temp files + Bash(`rm -f ${sessionFolder}/wave-${wave}.csv ${sessionFolder}/wave-${wave}-results.csv`) + + // 11. Display wave summary + const completed = waveTasks.filter(t => t.status === 'completed').length + const failed = waveTasks.filter(t => t.status === 'failed').length + const skipped = waveTasks.filter(t => t.status === 'skipped').length + console.log(`Wave ${wave} Complete: ${completed} completed, ${failed} failed, ${skipped} skipped`) +} +``` + +**Success Criteria**: +- All waves executed in order +- Both csv-wave and interactive tasks handled per wave +- Each wave's results merged into master CSV before next wave starts +- Dependent tasks skipped when predecessor failed +- discoveries.ndjson accumulated across all waves and mechanisms + +--- + +### Phase 3: Post-Wave Interactive (Completion Action) + +**Objective**: Pipeline completion report and interactive completion choice. + +```javascript +// 1. Generate pipeline summary +const tasks = parseCsv(Read(`${sessionFolder}/tasks.csv`)) +const completed = tasks.filter(t => t.status === 'completed') +const failed = tasks.filter(t => t.status === 'failed') + +console.log(` +============================================ +TASK COMPLETE + +Deliverables: +${completed.map(t => ` - ${t.id}: ${t.title} (${t.role})`).join('\n')} + +Pipeline: ${completed.length}/${tasks.length} tasks +Duration: +Session: ${sessionFolder} +============================================ +`) + +// 2. Completion action +if (!AUTO_YES) { + const choice = AskUserQuestion({ + questions: [{ + question: "Team pipeline complete. What would you like to do?", + header: "Completion", + multiSelect: false, + options: [ + { label: "Archive & Clean (Recommended)", description: "Archive session, output final summary" }, + { label: "Keep Active", description: "Keep session for follow-up work" }, + { label: "Retry Failed", description: "Re-run failed tasks" } + ] + }] + }) + // Handle choice accordingly +} +``` + +**Success Criteria**: +- Post-wave interactive processing complete +- User informed of results + +--- + +### Phase 4: Results Aggregation + +**Objective**: Generate final results and human-readable report. + +```javascript +// 1. Export results.csv +Bash(`cp ${sessionFolder}/tasks.csv ${sessionFolder}/results.csv`) + +// 2. Generate context.md +const tasks = parseCsv(Read(`${sessionFolder}/tasks.csv`)) +let contextMd = `# Team Coordinate Report\n\n` +contextMd += `**Session**: ${sessionId}\n` +contextMd += `**Date**: ${getUtc8ISOString().substring(0, 10)}\n\n` + +contextMd += `## Summary\n` +contextMd += `| Status | Count |\n|--------|-------|\n` +contextMd += `| Completed | ${tasks.filter(t => t.status === 'completed').length} |\n` +contextMd += `| Failed | ${tasks.filter(t => t.status === 'failed').length} |\n` +contextMd += `| Skipped | ${tasks.filter(t => t.status === 'skipped').length} |\n\n` + +const maxWave = Math.max(...tasks.map(t => t.wave)) +contextMd += `## Wave Execution\n\n` +for (let w = 1; w <= maxWave; w++) { + const waveTasks = tasks.filter(t => t.wave === w) + contextMd += `### Wave ${w}\n\n` + for (const t of waveTasks) { + const icon = t.status === 'completed' ? '[DONE]' : t.status === 'failed' ? '[FAIL]' : '[SKIP]' + contextMd += `${icon} **${t.title}** [${t.role}] ${t.findings || ''}\n\n` + } +} + +Write(`${sessionFolder}/context.md`, contextMd) + +// 3. Display final summary +console.log(`Results exported to: ${sessionFolder}/results.csv`) +console.log(`Report generated at: ${sessionFolder}/context.md`) +``` + +**Success Criteria**: +- results.csv exported (all tasks, both modes) +- context.md generated +- Summary displayed to user + +--- + +## Shared Discovery Board Protocol + +All agents (csv-wave and interactive) share a single `discoveries.ndjson` file for cross-task knowledge exchange. + +**Format**: One JSON object per line (NDJSON): + +```jsonl +{"ts":"2026-03-08T10:00:00Z","worker":"RESEARCH-001","type":"pattern_found","data":{"pattern_name":"Repository Pattern","location":"src/repos/","description":"Data access layer uses repository pattern"}} +{"ts":"2026-03-08T10:05:00Z","worker":"IMPL-001","type":"file_modified","data":{"file":"src/auth/jwt.ts","change":"Added JWT middleware","lines_added":45}} +``` + +**Discovery Types**: + +| Type | Data Schema | Description | +|------|-------------|-------------| +| `pattern_found` | `{pattern_name, location, description}` | Design pattern identified | +| `file_modified` | `{file, change, lines_added}` | File change recorded | +| `dependency_found` | `{from, to, type}` | Dependency relationship discovered | +| `issue_found` | `{file, line, severity, description}` | Issue or bug discovered | +| `decision_made` | `{decision, rationale, impact}` | Design decision recorded | +| `artifact_produced` | `{name, path, producer, type}` | Deliverable created | + +**Protocol**: +1. Agents MUST read discoveries.ndjson at start of execution +2. Agents MUST append relevant discoveries during execution +3. Agents MUST NOT modify or delete existing entries +4. Deduplication by `{type, data.file, data.pattern_name}` key + +--- + +## Dynamic Role Instruction Generation + +The coordinator generates role-specific instruction templates during Phase 1. Each template is written to `role-instructions/{role-name}.md` and used as the `instruction` parameter for `spawn_agents_on_csv`. + +**Generation Rules**: +1. Each instruction must be self-contained (agent has no access to master CSV) +2. Use `{column_name}` placeholders for CSV column substitution +3. Include session folder path as literal (not placeholder) +4. Include mandatory discovery board read/write steps +5. Include role-specific execution guidance based on responsibility_type +6. Include output schema matching tasks.csv output columns + +See `instructions/agent-instruction.md` for the base instruction template that is customized per role. + +--- + +## Error Handling + +| Error | Resolution | +|-------|------------| +| Circular dependency | Detect in wave computation, abort with error message | +| CSV agent timeout | Mark as failed in results, continue with wave | +| CSV agent failed | Mark as failed, skip dependent tasks in later waves | +| Interactive agent timeout | Urge convergence via send_input, then close if still timed out | +| Interactive agent failed | Mark as failed, skip dependents | +| All agents in wave failed | Log error, offer retry or abort | +| CSV parse error | Validate CSV format before execution, show line number | +| discoveries.ndjson corrupt | Ignore malformed lines, continue with valid entries | +| No capabilities detected | Default to single `general` role with TASK prefix | +| All capabilities merge to one | Valid: single-role execution, reduced overhead | +| Task description too vague | AskUserQuestion for clarification in Phase 0 | +| Continue mode: no session found | List available sessions, prompt user to select | +| Role instruction generation fails | Fall back to generic instruction template | + +--- + +## Core Rules + +1. **Start Immediately**: First action is session initialization, then Phase 0/1 +2. **Wave Order is Sacred**: Never execute wave N before wave N-1 completes and results are merged +3. **CSV is Source of Truth**: Master tasks.csv holds all state (both csv-wave and interactive) +4. **CSV First**: Default to csv-wave for tasks; only use interactive when interaction pattern requires it +5. **Context Propagation**: prev_context built from master CSV, not from memory +6. **Discovery Board is Append-Only**: Never clear, modify, or recreate discoveries.ndjson +7. **Skip on Failure**: If a dependency failed, skip the dependent task +8. **Dynamic Roles**: All worker roles are generated at runtime from task analysis -- no static role registry +9. **Cleanup Temp Files**: Remove wave-{N}.csv after results are merged +10. **DO NOT STOP**: Continuous execution until all waves complete or all remaining tasks are skipped diff --git a/.codex/skills/team-coordinate/agents/completion-handler.md b/.codex/skills/team-coordinate/agents/completion-handler.md new file mode 100644 index 00000000..d9de62ba --- /dev/null +++ b/.codex/skills/team-coordinate/agents/completion-handler.md @@ -0,0 +1,127 @@ +# Completion Handler Agent + +Interactive agent for handling pipeline completion actions. Presents results summary and manages Archive/Keep/Export choices. + +## Identity + +- **Type**: `interactive` +- **Role File**: `agents/completion-handler.md` +- **Responsibility**: Pipeline completion reporting and cleanup action + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Read final tasks.csv to compile completion summary +- Present deliverables list with paths +- Execute chosen completion action +- Produce structured output following template + +### MUST NOT + +- Skip the MANDATORY FIRST STEPS role loading +- Delete session data without user confirmation +- Produce unstructured output +- Modify task artifacts + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | built-in | Load tasks.csv, artifacts | +| `AskUserQuestion` | built-in | Get completion choice | +| `Write` | built-in | Store completion result | +| `Bash` | built-in | Archive or export operations | + +--- + +## Execution + +### Phase 1: Summary Generation + +**Objective**: Compile pipeline completion summary + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| tasks.csv | Yes | Master state with all results | +| artifacts/ | No | Deliverable files | +| discoveries.ndjson | No | Shared discoveries | + +**Steps**: + +1. Read tasks.csv, count completed/failed/skipped +2. List all produced artifacts with paths +3. Summarize discoveries +4. Calculate pipeline duration if timestamps available + +**Output**: Completion summary + +--- + +### Phase 2: Completion Choice + +**Objective**: Execute user's chosen completion action + +**Steps**: + +1. Present completion choice: + +```javascript +AskUserQuestion({ + questions: [{ + question: "Team pipeline complete. What would you like to do?", + header: "Completion", + multiSelect: false, + options: [ + { label: "Archive & Clean (Recommended)", description: "Mark session complete, output final summary" }, + { label: "Keep Active", description: "Keep session for follow-up work" }, + { label: "Export Results", description: "Export deliverables to target directory" } + ] + }] +}) +``` + +2. Handle choice: + +| Choice | Steps | +|--------|-------| +| Archive & Clean | Write completion status, output artifact paths | +| Keep Active | Keep session files, output resume instructions | +| Export Results | Ask target path, copy artifacts, then archive | + +**Output**: Completion action result + +--- + +## Structured Output Template + +``` +## Summary +- Pipeline status: completed +- Tasks: / + +## Deliverables +- (produced by ) +- (produced by ) + +## Action Taken +- Choice: +- Details: +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| tasks.csv not found | Report error, suggest manual review | +| Export target path invalid | Ask user for valid path | +| Processing failure | Default to Keep Active, log warning | diff --git a/.codex/skills/team-coordinate/agents/plan-reviewer.md b/.codex/skills/team-coordinate/agents/plan-reviewer.md new file mode 100644 index 00000000..3fae044f --- /dev/null +++ b/.codex/skills/team-coordinate/agents/plan-reviewer.md @@ -0,0 +1,145 @@ +# Plan Reviewer Agent + +Interactive agent for reviewing and approving plans before execution waves. Used when a task requires user confirmation checkpoint before proceeding. + +## Identity + +- **Type**: `interactive` +- **Role File**: `agents/plan-reviewer.md` +- **Responsibility**: Review generated plans, seek user approval, handle revision requests + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Read the plan artifact being reviewed +- Present a clear summary to the user +- Wait for user approval before reporting complete +- Produce structured output following template +- Include file:line references in findings + +### MUST NOT + +- Skip the MANDATORY FIRST STEPS role loading +- Approve plans without user confirmation +- Modify the plan artifact directly +- Produce unstructured output +- Exceed defined scope boundaries + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | built-in | Load plan artifacts and context | +| `AskUserQuestion` | built-in | Get user approval or revision feedback | +| `Write` | built-in | Store review result | + +### Tool Usage Patterns + +**Read Pattern**: Load context files before review +``` +Read("/artifacts/.md") +Read("/discoveries.ndjson") +``` + +**Write Pattern**: Store review result +``` +Write("/interactive/-result.json", ) +``` + +--- + +## Execution + +### Phase 1: Context Loading + +**Objective**: Load the plan artifact and supporting context + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Plan artifact | Yes | The plan document to review | +| discoveries.ndjson | No | Shared discoveries for context | +| Previous task findings | No | Upstream task results | + +**Steps**: + +1. Extract session path from task assignment +2. Read the plan artifact referenced in the task description +3. Read discoveries.ndjson for additional context +4. Summarize key aspects of the plan + +**Output**: Plan summary ready for user review + +--- + +### Phase 2: User Review + +**Objective**: Present plan to user and get approval + +**Steps**: + +1. Display plan summary with key decisions and trade-offs +2. Present approval choice: + +```javascript +AskUserQuestion({ + questions: [{ + question: "Review the plan and decide:", + header: "Plan Review", + multiSelect: false, + options: [ + { label: "Approve", description: "Proceed with execution" }, + { label: "Revise", description: "Request changes to the plan" }, + { label: "Abort", description: "Cancel the pipeline" } + ] + }] +}) +``` + +3. Handle response: + +| Response | Action | +|----------|--------| +| Approve | Report approved status | +| Revise | Collect revision feedback, report revision needed | +| Abort | Report abort status | + +**Output**: Review decision with details + +--- + +## Structured Output Template + +``` +## Summary +- Plan reviewed: +- Decision: + +## Findings +- Key strength 1: description +- Key concern 1: description + +## Decision Details +- User choice: +- Feedback: + +## Open Questions +1. Any unresolved items from review +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| Plan artifact not found | Report in Open Questions, ask user for path | +| User does not respond | Timeout, report partial with "awaiting-review" status | +| Processing failure | Output partial results with clear status indicator | diff --git a/.codex/skills/team-coordinate/instructions/agent-instruction.md b/.codex/skills/team-coordinate/instructions/agent-instruction.md new file mode 100644 index 00000000..697a8a06 --- /dev/null +++ b/.codex/skills/team-coordinate/instructions/agent-instruction.md @@ -0,0 +1,184 @@ +# Agent Instruction Template -- Team Coordinate + +Base instruction template for CSV wave agents. The orchestrator dynamically customizes this per role during Phase 1, writing role-specific versions to `role-instructions/{role-name}.md`. + +## Purpose + +| Phase | Usage | +|-------|-------| +| Phase 1 | Coordinator generates per-role instruction from this template | +| Phase 2 | Injected as `instruction` parameter to `spawn_agents_on_csv` | + +--- + +## Base Instruction Template + +```markdown +## TASK ASSIGNMENT -- Team Coordinate + +### MANDATORY FIRST STEPS +1. Read shared discoveries: /discoveries.ndjson (if exists, skip if not) +2. Read project context: .workflow/project-tech.json (if exists) + +--- + +## Your Task + +**Task ID**: {id} +**Title**: {title} +**Role**: {role} +**Responsibility**: {responsibility_type} +**Output Type**: {output_type} + +### Task Description +{description} + +### Previous Tasks' Findings (Context) +{prev_context} + +--- + +## Execution Protocol + +1. **Read discoveries**: Load /discoveries.ndjson for shared exploration findings +2. **Use context**: Apply previous tasks' findings from prev_context above +3. **Execute task**: + - Read target files referenced in description + - Follow the execution steps outlined in the TASK section of description + - Produce deliverables matching the EXPECTED section of description + - Verify output matches success criteria +4. **Share discoveries**: Append exploration findings to shared board: + ```bash + echo '{"ts":"","worker":"{id}","type":"","data":{...}}' >> /discoveries.ndjson + ``` +5. **Report result**: Return JSON via report_agent_job_result + +### Discovery Types to Share +- `pattern_found`: {pattern_name, location, description} -- Design pattern identified in codebase +- `file_modified`: {file, change, lines_added} -- File change performed by this agent +- `dependency_found`: {from, to, type} -- Dependency relationship between components +- `issue_found`: {file, line, severity, description} -- Issue or bug discovered +- `decision_made`: {decision, rationale, impact} -- Design decision made during execution +- `artifact_produced`: {name, path, producer, type} -- Deliverable file created + +--- + +## Output (report_agent_job_result) + +Return JSON: +{ + "id": "{id}", + "status": "completed" | "failed", + "findings": "Key discoveries and implementation notes (max 500 chars)", + "artifacts_produced": "semicolon-separated paths of produced files", + "error": "" +} +``` + +--- + +## Role-Specific Customization + +The coordinator generates per-role instruction variants during Phase 1. Each variant adds role-specific execution guidance to Step 3. + +### For Research / Exploration Roles + +Add to execution protocol step 3: +``` +3. **Execute**: + - Define exploration scope from description + - Use code search tools to find relevant patterns and implementations + - Survey approaches, compare alternatives + - Document findings with file:line references + - Write research artifact to /artifacts/ +``` + +### For Code Implementation Roles + +Add to execution protocol step 3: +``` +3. **Execute**: + - Read upstream design/spec artifacts referenced in description + - Read target files listed in description + - Apply code changes following project conventions + - Validate changes compile/lint correctly + - Run relevant tests if available + - Write implementation summary to /artifacts/ +``` + +### For Analysis / Audit Roles + +Add to execution protocol step 3: +``` +3. **Execute**: + - Read target files/modules for analysis + - Apply analysis criteria systematically + - Classify findings by severity (critical, high, medium, low) + - Include file:line references in findings + - Write analysis report to /artifacts/ +``` + +### For Test / Validation Roles + +Add to execution protocol step 3: +``` +3. **Execute**: + - Read source files to understand implementation + - Identify test cases from description + - Generate test files following project test conventions + - Run tests and capture results + - Write test report to /artifacts/ +``` + +### For Documentation / Writing Roles + +Add to execution protocol step 3: +``` +3. **Execute**: + - Read source code and existing documentation + - Generate documentation following template in description + - Ensure accuracy against current implementation + - Include code examples where appropriate + - Write document to /artifacts/ +``` + +### For Design / Architecture Roles + +Add to execution protocol step 3: +``` +3. **Execute**: + - Read upstream research findings + - Analyze existing codebase structure + - Design component interactions and data flow + - Document architecture decisions with rationale + - Write design document to /artifacts/ +``` + +--- + +## Quality Requirements + +All agents must verify before reporting complete: + +| Requirement | Criteria | +|-------------|----------| +| Files produced | Verify all claimed artifacts exist via Read | +| Files modified | Verify content actually changed | +| Findings accuracy | Findings reflect actual work done | +| Discovery sharing | At least 1 discovery shared to board | +| Error reporting | Non-empty error field if status is failed | + +--- + +## Placeholder Reference + +| Placeholder | Resolved By | When | +|-------------|------------|------| +| `` | Skill designer (Phase 1) | Literal path baked into instruction | +| `{id}` | spawn_agents_on_csv | Runtime from CSV row | +| `{title}` | spawn_agents_on_csv | Runtime from CSV row | +| `{description}` | spawn_agents_on_csv | Runtime from CSV row | +| `{role}` | spawn_agents_on_csv | Runtime from CSV row | +| `{responsibility_type}` | spawn_agents_on_csv | Runtime from CSV row | +| `{output_type}` | spawn_agents_on_csv | Runtime from CSV row | +| `{prev_context}` | spawn_agents_on_csv | Runtime from CSV row | diff --git a/.codex/skills/team-coordinate/schemas/tasks-schema.md b/.codex/skills/team-coordinate/schemas/tasks-schema.md new file mode 100644 index 00000000..b0833892 --- /dev/null +++ b/.codex/skills/team-coordinate/schemas/tasks-schema.md @@ -0,0 +1,165 @@ +# Team Coordinate -- CSV Schema + +## Master CSV: tasks.csv + +### Column Definitions + +#### Input Columns (Set by Decomposer) + +| Column | Type | Required | Description | Example | +|--------|------|----------|-------------|---------| +| `id` | string | Yes | Unique task identifier (PREFIX-NNN) | `"RESEARCH-001"` | +| `title` | string | Yes | Short task title | `"Investigate auth patterns"` | +| `description` | string | Yes | Detailed task description (self-contained) with goal, steps, success criteria, key files | `"PURPOSE: Research JWT auth patterns..."` | +| `role` | string | Yes | Dynamic role name | `"researcher"` | +| `responsibility_type` | enum | Yes | `orchestration`, `read-only`, `code-gen`, `code-gen-docs`, `validation` | `"orchestration"` | +| `output_type` | enum | Yes | `artifact` (session files), `codebase` (project files), `mixed` | `"artifact"` | +| `deps` | string | No | Semicolon-separated dependency task IDs | `"RESEARCH-001;DESIGN-001"` | +| `context_from` | string | No | Semicolon-separated task IDs for context | `"RESEARCH-001"` | +| `exec_mode` | enum | Yes | Execution mechanism: `csv-wave` or `interactive` | `"csv-wave"` | + +#### Computed Columns (Set by Wave Engine) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `wave` | integer | Wave number (1-based, from topological sort) | `2` | +| `prev_context` | string | Aggregated findings from context_from tasks (per-wave CSV only) | `"[RESEARCH-001] Found 3 auth patterns..."` | + +#### Output Columns (Set by Agent) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `status` | enum | `pending` -> `completed` / `failed` / `skipped` | `"completed"` | +| `findings` | string | Key discoveries (max 500 chars) | `"Implemented JWT middleware with refresh token support..."` | +| `artifacts_produced` | string | Semicolon-separated paths of produced artifacts | `"artifacts/research-findings.md;src/auth/jwt.ts"` | +| `error` | string | Error message if failed | `""` | + +--- + +### exec_mode Values + +| Value | Mechanism | Description | +|-------|-----------|-------------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot batch execution within wave | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round individual execution | + +Interactive tasks appear in master CSV for dependency tracking but are NOT included in wave-{N}.csv files. + +--- + +### Dynamic Role Prefixes + +| Capability | Prefix | Responsibility Type | +|------------|--------|---------------------| +| researcher | RESEARCH | orchestration | +| writer | DRAFT | code-gen-docs | +| developer | IMPL | code-gen | +| designer | DESIGN | orchestration | +| analyst | ANALYSIS | read-only | +| tester | TEST | validation | +| planner | PLAN | orchestration | +| (default) | TASK | orchestration | + +--- + +### Example Data + +```csv +id,title,description,role,responsibility_type,output_type,deps,context_from,exec_mode,wave,status,findings,artifacts_produced,error +"RESEARCH-001","Research auth patterns","PURPOSE: Investigate JWT authentication patterns and industry best practices | Success: Comprehensive findings document with pattern comparison\nTASK:\n- Survey JWT vs session-based auth\n- Compare token refresh strategies\n- Document security considerations\nCONTEXT:\n- Key files: src/auth/*, src/middleware/*\nEXPECTED: artifacts/research-findings.md","researcher","orchestration","artifact","","","csv-wave","1","pending","","","" +"DESIGN-001","Design auth architecture","PURPOSE: Design authentication module architecture based on research | Success: Architecture document with component diagram\nTASK:\n- Define auth module structure\n- Design token lifecycle\n- Plan middleware integration\nCONTEXT:\n- Upstream: RESEARCH-001 findings\nEXPECTED: artifacts/auth-design.md","designer","orchestration","artifact","RESEARCH-001","RESEARCH-001","csv-wave","2","pending","","","" +"IMPL-001","Implement auth module","PURPOSE: Build JWT authentication middleware | Success: Working auth module with tests passing\nTASK:\n- Create JWT utility functions\n- Implement auth middleware\n- Add route guards\nCONTEXT:\n- Upstream: DESIGN-001 architecture\n- Key files: src/auth/*, src/middleware/*\nEXPECTED: Source files + artifacts/implementation-summary.md","developer","code-gen","mixed","DESIGN-001","DESIGN-001","csv-wave","3","pending","","","" +"TEST-001","Test auth implementation","PURPOSE: Validate auth module correctness | Success: All tests pass, coverage >= 80%\nTASK:\n- Write unit tests for JWT utilities\n- Write integration tests for middleware\n- Run test suite\nCONTEXT:\n- Upstream: IMPL-001 implementation\nEXPECTED: artifacts/test-report.md","tester","validation","artifact","IMPL-001","IMPL-001","csv-wave","4","pending","","","" +``` + +--- + +### Column Lifecycle + +``` +Decomposer (Phase 1) Wave Engine (Phase 2) Agent (Execution) +--------------------- -------------------- ----------------- +id ----------> id ----------> id +title ----------> title ----------> (reads) +description ----------> description ----------> (reads) +role ----------> role ----------> (reads) +responsibility_type ---> responsibility_type ---> (reads) +output_type ----------> output_type ----------> (reads) +deps ----------> deps ----------> (reads) +context_from----------> context_from----------> (reads) +exec_mode ----------> exec_mode ----------> (reads) + wave ----------> (reads) + prev_context ----------> (reads) + status + findings + artifacts_produced + error +``` + +--- + +## Output Schema (JSON) + +Agent output via `report_agent_job_result` (csv-wave tasks): + +```json +{ + "id": "IMPL-001", + "status": "completed", + "findings": "Implemented JWT auth middleware with access/refresh token support. Created 3 files: jwt.ts, auth-middleware.ts, route-guard.ts. All syntax checks pass.", + "artifacts_produced": "artifacts/implementation-summary.md;src/auth/jwt.ts;src/auth/auth-middleware.ts", + "error": "" +} +``` + +Interactive tasks output via structured text or JSON written to `interactive/{id}-result.json`. + +--- + +## Discovery Types + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `pattern_found` | `data.pattern_name+data.location` | `{pattern_name, location, description}` | Design pattern identified | +| `file_modified` | `data.file` | `{file, change, lines_added}` | File change recorded | +| `dependency_found` | `data.from+data.to` | `{from, to, type}` | Dependency relationship | +| `issue_found` | `data.file+data.line` | `{file, line, severity, description}` | Issue discovered | +| `decision_made` | `data.decision` | `{decision, rationale, impact}` | Design decision | +| `artifact_produced` | `data.path` | `{name, path, producer, type}` | Deliverable created | + +### Discovery NDJSON Format + +```jsonl +{"ts":"2026-03-08T10:00:00Z","worker":"RESEARCH-001","type":"pattern_found","data":{"pattern_name":"Repository Pattern","location":"src/repos/","description":"Data access layer uses repository pattern"}} +{"ts":"2026-03-08T10:05:00Z","worker":"IMPL-001","type":"file_modified","data":{"file":"src/auth/jwt.ts","change":"Added JWT middleware","lines_added":45}} +{"ts":"2026-03-08T10:10:00Z","worker":"IMPL-001","type":"artifact_produced","data":{"name":"implementation-summary","path":"artifacts/implementation-summary.md","producer":"developer","type":"markdown"}} +``` + +> Both csv-wave and interactive agents read/write the same discoveries.ndjson file. + +--- + +## Cross-Mechanism Context Flow + +| Source | Target | Mechanism | +|--------|--------|-----------| +| CSV task findings | Interactive task | Injected via spawn message or send_input | +| Interactive task result | CSV task prev_context | Read from interactive/{id}-result.json | +| Any agent discovery | Any agent | Shared via discoveries.ndjson | + +--- + +## Validation Rules + +| Rule | Check | Error | +|------|-------|-------| +| Unique IDs | No duplicate `id` values | "Duplicate task ID: {id}" | +| Valid deps | All dep IDs exist in tasks | "Unknown dependency: {dep_id}" | +| No self-deps | Task cannot depend on itself | "Self-dependency: {id}" | +| No circular deps | Topological sort completes | "Circular dependency detected involving: {ids}" | +| context_from valid | All context IDs exist and in earlier waves | "Invalid context_from: {id}" | +| exec_mode valid | Value is `csv-wave` or `interactive` | "Invalid exec_mode: {value}" | +| Description non-empty | Every task has description | "Empty description for task: {id}" | +| Status enum | status in {pending, completed, failed, skipped} | "Invalid status: {status}" | +| Role valid | role matches a generated role-instruction | "No instruction for role: {role}" | +| Cross-mechanism deps | Interactive to CSV deps resolve correctly | "Cross-mechanism dependency unresolvable: {id}" | diff --git a/.codex/skills/team-designer/SKILL.md b/.codex/skills/team-designer/SKILL.md new file mode 100644 index 00000000..3cc984db --- /dev/null +++ b/.codex/skills/team-designer/SKILL.md @@ -0,0 +1,653 @@ +--- +name: team-designer +description: Meta-skill for generating team skills. Analyzes requirements, scaffolds directory structure, generates role definitions and specs, validates completeness. Produces complete Codex team skill packages with SKILL.md orchestrator, CSV schemas, agent instructions, and interactive agents. +argument-hint: "[-y|--yes] [-c|--concurrency N] [--continue] \"skill description with roles and domain\"" +allowed-tools: spawn_agents_on_csv, spawn_agent, wait, send_input, close_agent, Read, Write, Edit, Bash, Glob, Grep, AskUserQuestion +--- + +## Auto Mode + +When `--yes` or `-y`: Auto-confirm task decomposition, skip interactive validation, use defaults. + +# Team Skill Designer + +## Usage + +```bash +$team-designer "Design a code review team with analyst, reviewer, security-expert roles" +$team-designer -c 4 "Create a documentation team with researcher, writer, editor" +$team-designer -y "Generate a test automation team with planner, executor, tester" +$team-designer --continue "td-code-review-20260308" +``` + +**Flags**: +- `-y, --yes`: Skip all confirmations (auto mode) +- `-c, --concurrency N`: Max concurrent agents within each wave (default: 3) +- `--continue`: Resume existing session + +**Output Directory**: `.workflow/.csv-wave/{session-id}/` +**Core Output**: `tasks.csv` (master state) + `results.csv` (final) + `discoveries.ndjson` (shared exploration) + `context.md` (human-readable report) + +--- + +## Overview + +Meta-skill for generating complete team skill packages. Takes a skill description with roles and domain, then: analyzes requirements -> scaffolds directory structure -> generates all role files, specs, templates -> validates the package. The generated skill follows the Codex hybrid team architecture (CSV wave primary + interactive secondary). + +**Execution Model**: Hybrid -- CSV wave pipeline (primary) + individual agent spawn (secondary) + +``` ++-------------------------------------------------------------------+ +| TEAM SKILL DESIGNER WORKFLOW | ++-------------------------------------------------------------------+ +| | +| Phase 0: Pre-Wave Interactive (Requirement Clarification) | +| +- Parse user skill description | +| +- Detect input source (reference, structured, natural) | +| +- Gather core identity (skill name, prefix, domain) | +| +- Output: refined requirements for decomposition | +| | +| Phase 1: Requirement -> CSV + Classification | +| +- Discover roles from domain keywords | +| +- Define pipelines from role combinations | +| +- Determine commands distribution (inline vs commands/) | +| +- Build teamConfig data structure | +| +- Classify tasks: csv-wave | interactive (exec_mode) | +| +- Compute dependency waves (topological sort) | +| +- Generate tasks.csv with wave + exec_mode columns | +| +- User validates task breakdown (skip if -y) | +| | +| Phase 2: Wave Execution Engine (Extended) | +| +- For each wave (1..N): | +| | +- Execute pre-wave interactive tasks (if any) | +| | +- Build wave CSV (filter csv-wave tasks for this wave) | +| | +- Inject previous findings into prev_context column | +| | +- spawn_agents_on_csv(wave CSV) | +| | +- Execute post-wave interactive tasks (if any) | +| | +- Merge all results into master tasks.csv | +| | +- Check: any failed? -> skip dependents | +| +- discoveries.ndjson shared across all modes (append-only) | +| | +| Phase 3: Post-Wave Interactive (Validation) | +| +- Structural validation (files exist, sections present) | +| +- Reference integrity (role registry matches files) | +| +- Pipeline consistency (no circular deps, roles exist) | +| +- Final aggregation / report | +| | +| Phase 4: Results Aggregation | +| +- Export final results.csv | +| +- Generate context.md with all findings | +| +- Display summary: completed/failed/skipped per wave | +| +- Offer: view results | retry failed | done | +| | ++-------------------------------------------------------------------+ +``` + +--- + +## Task Classification Rules + +Each task is classified by `exec_mode`: + +| exec_mode | Mechanism | Criteria | +|-----------|-----------|----------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot, structured I/O, no multi-round interaction | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round, needs clarification, revision cycles | + +**Classification Decision**: + +| Task Property | Classification | +|---------------|---------------| +| Single-pass file generation (role.md, spec.md) | `csv-wave` | +| Directory scaffold creation | `csv-wave` | +| SKILL.md generation (complex, multi-section) | `csv-wave` | +| Coordinator role generation (multi-file) | `csv-wave` | +| Worker role generation (single file) | `csv-wave` | +| Pipeline spec generation | `csv-wave` | +| Template generation | `csv-wave` | +| User requirement clarification | `interactive` | +| Validation requiring user approval | `interactive` | +| Error recovery (auto-fix vs regenerate choice) | `interactive` | + +--- + +## CSV Schema + +### tasks.csv (Master State) + +```csv +id,title,description,role,file_target,gen_type,deps,context_from,exec_mode,wave,status,findings,files_produced,error +"SCAFFOLD-001","Create directory structure","Create the complete directory structure for the team skill including roles/, specs/, templates/ subdirectories","scaffolder","skill-dir","directory","","","csv-wave","1","pending","","","" +"SPEC-001","Generate pipelines spec","Generate specs/pipelines.md with pipeline definitions, task registry, conditional routing","spec-writer","specs/pipelines.md","spec","SCAFFOLD-001","","csv-wave","2","pending","","","" +"ROLE-001","Generate coordinator role","Generate roles/coordinator/role.md with entry router, command execution protocol, phase logic","role-writer","roles/coordinator/","role-bundle","SCAFFOLD-001;SPEC-001","SPEC-001","csv-wave","2","pending","","","" +"ROLE-002","Generate analyst worker role","Generate roles/analyst/role.md with domain-specific Phase 2-4 logic","role-writer","roles/analyst/role.md","role-inline","SCAFFOLD-001;SPEC-001","SPEC-001","csv-wave","2","pending","","","" +``` + +**Columns**: + +| Column | Phase | Description | +|--------|-------|-------------| +| `id` | Input | Unique task identifier (string) | +| `title` | Input | Short task title | +| `description` | Input | Detailed task description with generation instructions | +| `role` | Input | Generator role: `scaffolder`, `spec-writer`, `role-writer`, `router-writer`, `validator` | +| `file_target` | Input | Target file or directory path relative to skill root | +| `gen_type` | Input | Generation type: `directory`, `router`, `role-bundle`, `role-inline`, `spec`, `template` | +| `deps` | Input | Semicolon-separated dependency task IDs | +| `context_from` | Input | Semicolon-separated task IDs whose findings this task needs | +| `exec_mode` | Input | `csv-wave` or `interactive` | +| `wave` | Computed | Wave number (computed by topological sort, 1-based) | +| `status` | Output | `pending` -> `completed` / `failed` / `skipped` | +| `findings` | Output | Key discoveries or implementation notes (max 500 chars) | +| `files_produced` | Output | Semicolon-separated paths of produced files | +| `error` | Output | Error message if failed (empty if success) | + +### Per-Wave CSV (Temporary) + +Each wave generates a temporary `wave-{N}.csv` with extra `prev_context` column (csv-wave tasks only). + +--- + +## Agent Registry (Interactive Agents) + +| Agent | Role File | Pattern | Responsibility | Position | +|-------|-----------|---------|----------------|----------| +| Requirement Clarifier | agents/requirement-clarifier.md | 2.3 (send_input cycle) | Gather and refine skill requirements interactively | standalone (Phase 0) | +| Validation Reporter | agents/validation-reporter.md | 2.3 (send_input cycle) | Validate generated skill package and report results | standalone (Phase 3) | + +> **COMPACT PROTECTION**: Agent files are execution documents. When context compression occurs, **you MUST immediately `Read` the corresponding agent.md** to reload. + +--- + +## Output Artifacts + +| File | Purpose | Lifecycle | +|------|---------|-----------| +| `tasks.csv` | Master state -- all tasks with status/findings | Updated after each wave | +| `wave-{N}.csv` | Per-wave input (temporary, csv-wave tasks only) | Created before wave, deleted after | +| `results.csv` | Final export of all task results | Created in Phase 4 | +| `discoveries.ndjson` | Shared exploration board (all agents, both modes) | Append-only, carries across waves | +| `context.md` | Human-readable execution report | Created in Phase 4 | +| `teamConfig.json` | Phase 0/1 output: skill config, roles, pipelines | Created in Phase 1 | +| `interactive/{id}-result.json` | Results from interactive tasks | Created per interactive task | + +--- + +## Session Structure + +``` +.workflow/.csv-wave/{session-id}/ ++-- tasks.csv # Master state (all tasks, both modes) ++-- results.csv # Final results export ++-- discoveries.ndjson # Shared discovery board (all agents) ++-- context.md # Human-readable report ++-- teamConfig.json # Skill configuration from Phase 1 ++-- wave-{N}.csv # Temporary per-wave input (csv-wave only) ++-- artifacts/ # Generated skill files (intermediate) ++-- interactive/ # Interactive task artifacts +| +-- {id}-result.json ++-- validation/ # Validation reports + +-- structural.json + +-- references.json +``` + +--- + +## Implementation + +### Session Initialization + +```javascript +const getUtc8ISOString = () => new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString() + +const AUTO_YES = $ARGUMENTS.includes('--yes') || $ARGUMENTS.includes('-y') +const continueMode = $ARGUMENTS.includes('--continue') +const concurrencyMatch = $ARGUMENTS.match(/(?:--concurrency|-c)\s+(\d+)/) +const maxConcurrency = concurrencyMatch ? parseInt(concurrencyMatch[1]) : 3 + +const requirement = $ARGUMENTS + .replace(/--yes|-y|--continue|--concurrency\s+\d+|-c\s+\d+/g, '') + .trim() + +const slug = requirement.toLowerCase() + .replace(/[^a-z0-9\u4e00-\u9fa5]+/g, '-') + .substring(0, 40) +const dateStr = getUtc8ISOString().substring(0, 10).replace(/-/g, '') +const sessionId = `td-${slug}-${dateStr}` +const sessionFolder = `.workflow/.csv-wave/${sessionId}` + +Bash(`mkdir -p ${sessionFolder}/artifacts ${sessionFolder}/interactive ${sessionFolder}/validation`) + +// Initialize discoveries.ndjson +Write(`${sessionFolder}/discoveries.ndjson`, '') +``` + +--- + +### Phase 0: Pre-Wave Interactive (Requirement Clarification) + +**Objective**: Parse user skill description, clarify ambiguities, build teamConfig. + +**Workflow**: + +1. **Parse user skill description** from $ARGUMENTS + +2. **Detect input source**: + +| Source Type | Detection | Action | +|-------------|-----------|--------| +| Reference | Contains "based on", "like", or existing skill path | Read referenced skill, extract structure | +| Structured | Contains ROLES:, PIPELINES:, or DOMAIN: | Parse structured input directly | +| Natural language | Default | Analyze keywords, discover roles | + +3. **Check for existing sessions** (continue mode): + - Scan `.workflow/.csv-wave/td-*/tasks.csv` for sessions with pending tasks + - If `--continue`: resume the specified or most recent session, skip to Phase 2 + +4. **Gather core identity** (skip if AUTO_YES or already clear): + +Read `agents/requirement-clarifier.md`, then: + +```javascript +const clarifier = spawn_agent({ + message: `## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS +1. Read: agents/requirement-clarifier.md +2. Read: ${sessionFolder}/discoveries.ndjson (if exists) + +--- + +Goal: Gather team skill requirements from the user +Input: "${requirement}" +Session: ${sessionFolder} + +Determine: skill name (kebab-case), session prefix (3-4 chars), domain description, roles, pipelines, commands distribution.` +}) +const clarifyResult = wait({ ids: [clarifier], timeout_ms: 600000 }) +if (clarifyResult.timed_out) { + send_input({ id: clarifier, message: "Please finalize requirements with current information." }) + wait({ ids: [clarifier], timeout_ms: 120000 }) +} +Write(`${sessionFolder}/interactive/clarify-result.json`, JSON.stringify({ + task_id: "CLARIFY-001", status: "completed", findings: parseFindings(clarifyResult), + timestamp: getUtc8ISOString() +})) +close_agent({ id: clarifier }) +``` + +5. **Build teamConfig** from gathered requirements: + +```javascript +const teamConfig = { + skillName: "", + sessionPrefix: "<3-4 char prefix>", + domain: "", + title: "", + roles: [ + { name: "coordinator", prefix: "—", inner_loop: false, hasCommands: true, commands: ["analyze", "dispatch", "monitor"], path: "roles/coordinator/role.md" }, + // ... discovered worker roles + ], + pipelines: [{ name: "", tasks: [/* task definitions */] }], + specs: ["pipelines"], + templates: [], + conditionalRouting: false, + targetDir: `.codex/skills/` +} + +Write(`${sessionFolder}/teamConfig.json`, JSON.stringify(teamConfig, null, 2)) +``` + +6. **Decompose into tasks** -- generate tasks.csv from teamConfig: + +| Task Pattern | gen_type | Wave | Description | +|--------------|----------|------|-------------| +| Directory scaffold | `directory` | 1 | Create skill directory structure | +| SKILL.md router | `router` | 2 | Generate main SKILL.md orchestrator | +| Pipeline spec | `spec` | 2 | Generate specs/pipelines.md | +| Domain specs | `spec` | 2 | Generate additional specs files | +| Coordinator role | `role-bundle` | 3 | Generate coordinator role.md + commands/ | +| Worker roles (each) | `role-inline` or `role-bundle` | 3 | Generate each worker role.md | +| Templates (each) | `template` | 3 | Generate template files | +| Validation | `validation` | 4 | Validate the complete package | + +**Success Criteria**: +- teamConfig.json written with complete configuration +- Refined requirements available for Phase 1 decomposition +- Interactive agents closed, results stored + +--- + +### Phase 1: Requirement -> CSV + Classification + +**Objective**: Generate tasks.csv from teamConfig with dependency-ordered waves. + +**Decomposition Rules**: + +1. **Role Discovery** -- scan domain description for keywords: + +| Signal | Keywords | Role Name | Prefix | +|--------|----------|-----------|--------| +| Analysis | analyze, research, investigate, explore | analyst | RESEARCH | +| Planning | plan, design, architect, decompose | planner | PLAN | +| Writing | write, document, draft, spec, report | writer | DRAFT | +| Implementation | implement, build, code, develop | executor | IMPL | +| Testing | test, verify, validate, qa | tester | TEST | +| Review | review, audit, check, inspect | reviewer | REVIEW | +| Security | security, vulnerability, penetration | security-expert | SECURITY | + +2. **Commands Distribution** -- determine inline vs commands/: + +| Condition | Commands Structure | +|-----------|-------------------| +| 1 distinct action for role | Inline in role.md | +| 2+ distinct actions | commands/ folder | +| Coordinator (always) | commands/: analyze, dispatch, monitor | + +3. **Pipeline Construction** -- build from role ordering: + +| Role Combination | Pipeline Type | +|------------------|---------------| +| analyst + writer + executor | full-lifecycle | +| analyst + writer (no executor) | spec-only | +| planner + executor (no analyst) | impl-only | +| Other | custom | + +**Classification Rules**: + +| Task Property | exec_mode | +|---------------|-----------| +| Directory creation | `csv-wave` | +| Single file generation (role.md, spec.md) | `csv-wave` | +| Multi-file bundle generation (coordinator) | `csv-wave` | +| SKILL.md router generation | `csv-wave` | +| User requirement clarification | `interactive` | +| Validation with error recovery | `interactive` | + +**Wave Computation**: Kahn's BFS topological sort with depth tracking (csv-wave tasks only). + +**User Validation**: Display task breakdown with wave + exec_mode assignment (skip if AUTO_YES). + +**Success Criteria**: +- tasks.csv created with valid schema, wave, and exec_mode assignments +- No circular dependencies +- User approved (or AUTO_YES) + +--- + +### Phase 2: Wave Execution Engine (Extended) + +**Objective**: Execute tasks wave-by-wave with hybrid mechanism support and cross-wave context propagation. + +```javascript +const masterCsv = Read(`${sessionFolder}/tasks.csv`) +let tasks = parseCsv(masterCsv) +const maxWave = Math.max(...tasks.map(t => t.wave)) + +for (let wave = 1; wave <= maxWave; wave++) { + console.log(`\nWave ${wave}/${maxWave}`) + + // 1. Separate tasks by exec_mode + const waveTasks = tasks.filter(t => t.wave === wave && t.status === 'pending') + const csvTasks = waveTasks.filter(t => t.exec_mode === 'csv-wave') + const interactiveTasks = waveTasks.filter(t => t.exec_mode === 'interactive') + + // 2. Check dependencies -- skip tasks whose deps failed + for (const task of waveTasks) { + const depIds = (task.deps || '').split(';').filter(Boolean) + const depStatuses = depIds.map(id => tasks.find(t => t.id === id)?.status) + if (depStatuses.some(s => s === 'failed' || s === 'skipped')) { + task.status = 'skipped' + task.error = `Dependency failed: ${depIds.filter((id, i) => + ['failed','skipped'].includes(depStatuses[i])).join(', ')}` + } + } + + // 3. Execute pre-wave interactive tasks + const preWaveInteractive = interactiveTasks.filter(t => t.status === 'pending') + for (const task of preWaveInteractive) { + // Use appropriate interactive agent + const agentFile = task.gen_type === 'validation' + ? 'agents/validation-reporter.md' + : 'agents/requirement-clarifier.md' + Read(agentFile) + + const agent = spawn_agent({ + message: `## TASK ASSIGNMENT\n\n### MANDATORY FIRST STEPS\n1. Read: ${agentFile}\n2. Read: ${sessionFolder}/discoveries.ndjson\n\nGoal: ${task.description}\nScope: ${task.title}\nSession: ${sessionFolder}\nteamConfig: ${sessionFolder}/teamConfig.json\n\n### Previous Context\n${buildPrevContext(task, tasks)}` + }) + const result = wait({ ids: [agent], timeout_ms: 600000 }) + if (result.timed_out) { + send_input({ id: agent, message: "Please finalize and output current findings." }) + wait({ ids: [agent], timeout_ms: 120000 }) + } + Write(`${sessionFolder}/interactive/${task.id}-result.json`, JSON.stringify({ + task_id: task.id, status: "completed", findings: parseFindings(result), + timestamp: getUtc8ISOString() + })) + close_agent({ id: agent }) + task.status = 'completed' + task.findings = parseFindings(result) + } + + // 4. Build prev_context for csv-wave tasks + const pendingCsvTasks = csvTasks.filter(t => t.status === 'pending') + for (const task of pendingCsvTasks) { + task.prev_context = buildPrevContext(task, tasks) + } + + if (pendingCsvTasks.length > 0) { + // 5. Write wave CSV + Write(`${sessionFolder}/wave-${wave}.csv`, toCsv(pendingCsvTasks)) + + // 6. Execute wave via spawn_agents_on_csv + spawn_agents_on_csv({ + csv_path: `${sessionFolder}/wave-${wave}.csv`, + id_column: "id", + instruction: Read(`instructions/agent-instruction.md`) + .replace(//g, sessionFolder), + max_concurrency: maxConcurrency, + max_runtime_seconds: 900, + output_csv_path: `${sessionFolder}/wave-${wave}-results.csv`, + output_schema: { + type: "object", + properties: { + id: { type: "string" }, + status: { type: "string", enum: ["completed", "failed"] }, + findings: { type: "string" }, + files_produced: { type: "string" }, + error: { type: "string" } + } + } + }) + + // 7. Merge results into master CSV + const results = parseCsv(Read(`${sessionFolder}/wave-${wave}-results.csv`)) + for (const r of results) { + const t = tasks.find(t => t.id === r.id) + if (t) Object.assign(t, r) + } + } + + // 8. Update master CSV + Write(`${sessionFolder}/tasks.csv`, toCsv(tasks)) + + // 9. Cleanup temp files + Bash(`rm -f ${sessionFolder}/wave-${wave}.csv ${sessionFolder}/wave-${wave}-results.csv`) + + // 10. Display wave summary + const completed = waveTasks.filter(t => t.status === 'completed').length + const failed = waveTasks.filter(t => t.status === 'failed').length + const skipped = waveTasks.filter(t => t.status === 'skipped').length + console.log(`Wave ${wave} Complete: ${completed} completed, ${failed} failed, ${skipped} skipped`) +} +``` + +**Success Criteria**: +- All waves executed in order +- Both csv-wave and interactive tasks handled per wave +- Each wave's results merged into master CSV before next wave starts +- Dependent tasks skipped when predecessor failed +- discoveries.ndjson accumulated across all waves and mechanisms + +--- + +### Phase 3: Post-Wave Interactive (Validation) + +**Objective**: Validate the generated team skill package and present results. + +Read `agents/validation-reporter.md`, then: + +```javascript +const validator = spawn_agent({ + message: `## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS +1. Read: agents/validation-reporter.md +2. Read: ${sessionFolder}/discoveries.ndjson +3. Read: ${sessionFolder}/teamConfig.json + +--- + +Goal: Validate the generated team skill package at ${teamConfig.targetDir} +Session: ${sessionFolder} + +### Validation Checks +1. Structural: All files exist per teamConfig +2. SKILL.md: Required sections present, role registry correct +3. Role frontmatter: YAML frontmatter valid for each worker role +4. Pipeline consistency: No circular deps, roles referenced exist +5. Commands distribution: commands/ matches hasCommands flag + +### Previous Context +${buildCompletePrevContext(tasks)}` +}) +const validResult = wait({ ids: [validator], timeout_ms: 600000 }) +if (validResult.timed_out) { + send_input({ id: validator, message: "Please finalize validation with current findings." }) + wait({ ids: [validator], timeout_ms: 120000 }) +} +Write(`${sessionFolder}/interactive/validation-result.json`, JSON.stringify({ + task_id: "VALIDATE-001", status: "completed", findings: parseFindings(validResult), + timestamp: getUtc8ISOString() +})) +close_agent({ id: validator }) +``` + +**Success Criteria**: +- Post-wave interactive processing complete +- Validation report generated +- Interactive agents closed, results stored + +--- + +### Phase 4: Results Aggregation + +**Objective**: Generate final results and human-readable report. + +```javascript +// 1. Export results.csv +Bash(`cp ${sessionFolder}/tasks.csv ${sessionFolder}/results.csv`) + +// 2. Generate context.md +const tasks = parseCsv(Read(`${sessionFolder}/tasks.csv`)) +let contextMd = `# Team Skill Designer Report\n\n` +contextMd += `**Session**: ${sessionId}\n` +contextMd += `**Skill**: ${teamConfig.skillName}\n` +contextMd += `**Target**: ${teamConfig.targetDir}\n\n` + +contextMd += `## Summary\n` +contextMd += `| Status | Count |\n|--------|-------|\n` +contextMd += `| Completed | ${tasks.filter(t => t.status === 'completed').length} |\n` +contextMd += `| Failed | ${tasks.filter(t => t.status === 'failed').length} |\n` +contextMd += `| Skipped | ${tasks.filter(t => t.status === 'skipped').length} |\n\n` + +contextMd += `## Generated Skill Structure\n\n` +contextMd += `\`\`\`\n${teamConfig.targetDir}/\n` +contextMd += `+-- SKILL.md\n+-- schemas/\n| +-- tasks-schema.md\n+-- instructions/\n| +-- agent-instruction.md\n` +// ... roles, specs, templates +contextMd += `\`\`\`\n\n` + +contextMd += `## Validation\n` +// ... validation results + +Write(`${sessionFolder}/context.md`, contextMd) + +// 3. Display final summary +console.log(`\nTeam Skill Designer Complete`) +console.log(`Generated skill: ${teamConfig.targetDir}`) +console.log(`Results: ${sessionFolder}/results.csv`) +console.log(`Report: ${sessionFolder}/context.md`) +console.log(`\nUsage: $${teamConfig.skillName} "task description"`) +``` + +**Success Criteria**: +- results.csv exported (all tasks, both modes) +- context.md generated +- All interactive agents closed +- Summary displayed to user + +--- + +## Shared Discovery Board Protocol + +All agents (csv-wave and interactive) share a single `discoveries.ndjson` file for cross-task knowledge exchange. + +**Format**: One JSON object per line (NDJSON): + +```jsonl +{"ts":"2026-03-08T10:00:00Z","worker":"SCAFFOLD-001","type":"dir_created","data":{"path":".codex/skills/team-code-review/","description":"Created skill directory structure"}} +{"ts":"2026-03-08T10:05:00Z","worker":"ROLE-001","type":"file_generated","data":{"file":"roles/coordinator/role.md","gen_type":"role-bundle","sections":["entry-router","commands"]}} +{"ts":"2026-03-08T10:10:00Z","worker":"SPEC-001","type":"pattern_found","data":{"pattern_name":"full-lifecycle","description":"Pipeline with analyst -> writer -> executor -> tester"}} +``` + +**Discovery Types**: + +| Type | Data Schema | Description | +|------|-------------|-------------| +| `dir_created` | `{path, description}` | Directory structure created | +| `file_generated` | `{file, gen_type, sections}` | File generated with specific sections | +| `pattern_found` | `{pattern_name, description}` | Design pattern identified in golden sample | +| `config_decision` | `{decision, rationale, impact}` | Configuration decision made | +| `validation_result` | `{check, passed, message}` | Validation check result | +| `reference_found` | `{source, target, type}` | Cross-reference between generated files | + +**Protocol**: +1. Agents MUST read discoveries.ndjson at start of execution +2. Agents MUST append relevant discoveries during execution +3. Agents MUST NOT modify or delete existing entries +4. Deduplication by `{type, data.file, data.path}` key + +--- + +## Error Handling + +| Error | Resolution | +|-------|------------| +| Circular dependency | Detect in wave computation, abort with error message | +| CSV agent timeout | Mark as failed in results, continue with wave | +| CSV agent failed | Mark as failed, skip dependent tasks in later waves | +| Interactive agent timeout | Urge convergence via send_input, then close if still timed out | +| Interactive agent failed | Mark as failed, skip dependents | +| All agents in wave failed | Log error, offer retry or abort | +| CSV parse error | Validate CSV format before execution, show line number | +| discoveries.ndjson corrupt | Ignore malformed lines, continue with valid entries | +| Invalid role name | Must be lowercase alphanumeric with hyphens, max 20 chars | +| Directory conflict | Warn if skill directory already exists, ask user to confirm overwrite | +| Golden sample not found | Fall back to embedded templates in instructions | +| Validation FAIL | Offer auto-fix, regenerate, or accept as-is | +| Continue mode: no session found | List available sessions, prompt user to select | + +--- + +## Core Rules + +1. **Start Immediately**: First action is session initialization, then Phase 0/1 +2. **Wave Order is Sacred**: Never execute wave N before wave N-1 completes and results are merged +3. **CSV is Source of Truth**: Master tasks.csv holds all state (both csv-wave and interactive) +4. **CSV First**: Default to csv-wave for tasks; only use interactive when interaction pattern requires it +5. **Context Propagation**: prev_context built from master CSV, not from memory +6. **Discovery Board is Append-Only**: Never clear, modify, or recreate discoveries.ndjson +7. **Skip on Failure**: If a dependency failed, skip the dependent task +8. **Golden Sample Fidelity**: Generated files must match existing team skill patterns +9. **Cleanup Temp Files**: Remove wave-{N}.csv after results are merged +10. **DO NOT STOP**: Continuous execution until all waves complete or all remaining tasks are skipped diff --git a/.codex/skills/team-designer/agents/requirement-clarifier.md b/.codex/skills/team-designer/agents/requirement-clarifier.md new file mode 100644 index 00000000..ac366ed7 --- /dev/null +++ b/.codex/skills/team-designer/agents/requirement-clarifier.md @@ -0,0 +1,248 @@ +# Requirement Clarifier Agent + +Interactive agent for gathering and refining team skill requirements from user input. Used in Phase 0 when the skill description needs clarification or missing details. + +## Identity + +- **Type**: `interactive` +- **Role File**: `agents/requirement-clarifier.md` +- **Responsibility**: Gather skill name, roles, pipelines, specs, and build teamConfig + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Parse user input to detect input source (reference, structured, natural) +- Gather all required teamConfig fields +- Confirm configuration with user before reporting complete +- Produce structured output following template +- Write teamConfig.json to session folder + +### MUST NOT + +- Skip the MANDATORY FIRST STEPS role loading +- Generate skill files (that is Phase 2 work) +- Approve incomplete configurations +- Produce unstructured output +- Exceed defined scope boundaries + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | built-in | Load reference skills, existing patterns | +| `AskUserQuestion` | built-in | Gather missing details from user | +| `Write` | built-in | Store teamConfig.json | +| `Glob` | built-in | Find reference skill files | + +### Tool Usage Patterns + +**Read Pattern**: Load reference skill for pattern extraction +``` +Read(".codex/skills//SKILL.md") +Read(".codex/skills//schemas/tasks-schema.md") +``` + +**Write Pattern**: Store configuration +``` +Write("/teamConfig.json", ) +``` + +--- + +## Execution + +### Phase 1: Input Detection + +**Objective**: Detect input source type and extract initial information + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| User requirement | Yes | Skill description from $ARGUMENTS | +| Reference skill | No | Existing skill if "based on" detected | + +**Steps**: + +1. Parse user input to detect source type: + +| Source Type | Detection | Action | +|-------------|-----------|--------| +| Reference | Contains "based on", "like", skill path | Read referenced skill, extract roles/pipelines | +| Structured | Contains ROLES:, PIPELINES:, DOMAIN: | Parse structured fields directly | +| Natural language | Default | Analyze keywords for role discovery | + +2. Extract initial information from detected source +3. Identify missing required fields + +**Output**: Initial teamConfig draft with gaps identified + +--- + +### Phase 2: Requirement Gathering + +**Objective**: Fill in all required teamConfig fields + +**Steps**: + +1. **Core Identity** -- gather if not clear from input: + +```javascript +AskUserQuestion({ + questions: [ + { + question: "Team skill name? (kebab-case, e.g., team-code-review)", + header: "Skill Name", + multiSelect: false, + options: [ + { label: "", description: "Auto-suggested from description" }, + { label: "Custom", description: "Enter custom name" } + ] + }, + { + question: "Session prefix? (3-4 chars for task IDs, e.g., TCR)", + header: "Prefix", + multiSelect: false, + options: [ + { label: "", description: "Auto-suggested" }, + { label: "Custom", description: "Enter custom prefix" } + ] + } + ] +}) +``` + +2. **Role Discovery** -- identify roles from domain keywords: + +| Signal | Keywords | Default Role | +|--------|----------|-------------| +| Analysis | analyze, research, investigate | analyst | +| Planning | plan, design, architect | planner | +| Writing | write, document, draft | writer | +| Implementation | implement, build, code | executor | +| Testing | test, verify, validate | tester | +| Review | review, audit, check | reviewer | + +3. **Commands Distribution** -- determine per role: + +| Rule | Condition | Result | +|------|-----------|--------| +| Coordinator | Always | commands/: analyze, dispatch, monitor | +| Multi-action role | 2+ distinct actions | commands/ folder | +| Single-action role | 1 action | Inline in role.md | + +4. **Pipeline Construction** -- determine from role combination: + +| Roles Present | Pipeline Type | +|---------------|---------------| +| analyst + writer + executor | full-lifecycle | +| analyst + writer (no executor) | spec-only | +| planner + executor (no analyst) | impl-only | +| Other combinations | custom | + +5. **Specs and Templates** -- determine required specs: + - Always: pipelines.md + - If quality gates needed: quality-gates.md + - If writer role: domain-appropriate templates + +**Output**: Complete teamConfig ready for confirmation + +--- + +### Phase 3: Confirmation + +**Objective**: Present configuration summary and get user approval + +**Steps**: + +1. Display configuration summary: + +``` +Team Skill Configuration Summary + +Skill Name: +Session Prefix: +Domain: +Target: .codex/skills// + +Roles: + +- coordinator (commands: analyze, dispatch, monitor) + +- [PREFIX-*] (inline) + +- [PREFIX-*] (commands: cmd1, cmd2) + +Pipelines: + +- : TASK-001 -> TASK-002 -> TASK-003 + +Specs: pipelines, +Templates: +``` + +2. Present confirmation: + +```javascript +AskUserQuestion({ + questions: [{ + question: "Confirm this team skill configuration?", + header: "Configuration Review", + multiSelect: false, + options: [ + { label: "Confirm", description: "Proceed with generation" }, + { label: "Modify Roles", description: "Add, remove, or change roles" }, + { label: "Modify Pipelines", description: "Change pipeline structure" }, + { label: "Cancel", description: "Abort skill generation" } + ] + }] +}) +``` + +3. Handle response: + +| Response | Action | +|----------|--------| +| Confirm | Write teamConfig.json, report complete | +| Modify Roles | Loop back to role gathering | +| Modify Pipelines | Loop back to pipeline construction | +| Cancel | Report cancelled status | + +**Output**: Confirmed teamConfig.json written to session folder + +--- + +## Structured Output Template + +``` +## Summary +- Configuration: +- Skill: + +## Configuration +- Roles: roles defined +- Pipelines: pipelines +- Target: + +## Details +- Role list with prefix and commands structure +- Pipeline definitions with task flow +- Specs and templates list + +## Open Questions +1. Any unresolved items from clarification +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| Reference skill not found | Report error, ask for correct path | +| Invalid role name | Suggest valid kebab-case alternative | +| Conflicting pipeline structure | Ask user to resolve | +| User does not respond | Timeout, report partial with current config | +| Processing failure | Output partial results with clear status indicator | diff --git a/.codex/skills/team-designer/instructions/agent-instruction.md b/.codex/skills/team-designer/instructions/agent-instruction.md new file mode 100644 index 00000000..c2a6a123 --- /dev/null +++ b/.codex/skills/team-designer/instructions/agent-instruction.md @@ -0,0 +1,163 @@ +# Agent Instruction Template -- Team Skill Designer + +Base instruction template for CSV wave agents. Each agent receives this template with its row's column values substituted at runtime via `spawn_agents_on_csv`. + +## Purpose + +| Phase | Usage | +|-------|-------| +| Phase 1 | Baked into instruction parameter with session folder path | +| Phase 2 | Injected as `instruction` parameter to `spawn_agents_on_csv` | + +--- + +## Base Instruction Template + +```markdown +## TASK ASSIGNMENT -- Team Skill Designer + +### MANDATORY FIRST STEPS +1. Read shared discoveries: /discoveries.ndjson (if exists, skip if not) +2. Read project context: .workflow/project-tech.json (if exists) +3. Read teamConfig: /teamConfig.json (REQUIRED -- contains complete skill configuration) + +--- + +## Your Task + +**Task ID**: {id} +**Title**: {title} +**Role**: {role} +**File Target**: {file_target} +**Generation Type**: {gen_type} + +### Task Description +{description} + +### Previous Tasks' Findings (Context) +{prev_context} + +--- + +## Execution Protocol + +1. **Read discoveries**: Load /discoveries.ndjson for shared exploration findings +2. **Read teamConfig**: Load /teamConfig.json for complete skill configuration (roles, pipelines, specs, templates) +3. **Use context**: Apply previous tasks' findings from prev_context above +4. **Execute by gen_type**: + +### For gen_type = directory + - Parse teamConfig to determine required directories + - Create directory structure at teamConfig.targetDir + - Create subdirectories: roles/, specs/, templates/ (if needed) + - Create per-role subdirectories: roles// (+ commands/ if hasCommands) + - Verify all directories exist + +### For gen_type = router + - Read existing Codex team skill SKILL.md as reference pattern + - Generate SKILL.md with these sections in order: + 1. YAML frontmatter (name, description, argument-hint, allowed-tools) + 2. Auto Mode section + 3. Title + Usage examples + 4. Overview with workflow diagram + 5. Task Classification Rules + 6. CSV Schema (header + column definitions) + 7. Agent Registry (if interactive agents exist) + 8. Output Artifacts table + 9. Session Structure diagram + 10. Implementation (session init, phases 0-4) + 11. Discovery Board Protocol + 12. Error Handling table + 13. Core Rules list + - Use teamConfig.roles for role registry + - Use teamConfig.pipelines for pipeline definitions + +### For gen_type = role-bundle + - Generate role.md with: + 1. YAML frontmatter (role, prefix, inner_loop, message_types) + 2. Identity section + 3. Boundaries (MUST/MUST NOT) + 4. Entry Router (for coordinator) + 5. Phase references (Phase 0-5 for coordinator) + - Generate commands/*.md for each command in teamConfig.roles[].commands + - Each command file: Purpose, Constants, Phase 2-4 execution logic + - Coordinator always gets: analyze.md, dispatch.md, monitor.md + +### For gen_type = role-inline + - Generate single role.md with: + 1. YAML frontmatter (role, prefix, inner_loop, message_types) + 2. Identity section + 3. Boundaries (MUST/MUST NOT) + 4. Phase 2: Context Loading + 5. Phase 3: Domain Execution (role-specific logic) + 6. Phase 4: Output & Report + +### For gen_type = spec + - For pipelines.md: Generate from teamConfig.pipelines + - Pipeline name, task table (ID, Role, Name, Depends On, Checkpoint) + - Task metadata registry + - Conditional routing rules + - Dynamic specialist injection + - For other specs: Generate domain-appropriate content + +### For gen_type = template + - Check for reference templates in existing skills + - Generate domain-appropriate template structure + - Include placeholder sections and formatting guidelines + +5. **Share discoveries**: Append exploration findings to shared board: + ```bash + echo '{"ts":"","worker":"{id}","type":"","data":{...}}' >> /discoveries.ndjson + ``` +6. **Report result**: Return JSON via report_agent_job_result + +### Discovery Types to Share +- `dir_created`: {path, description} -- Directory structure created +- `file_generated`: {file, gen_type, sections} -- File generated with specific sections +- `pattern_found`: {pattern_name, description} -- Design pattern identified +- `config_decision`: {decision, rationale, impact} -- Configuration decision made +- `reference_found`: {source, target, type} -- Cross-reference between generated files + +--- + +## Output (report_agent_job_result) + +Return JSON: +{ + "id": "{id}", + "status": "completed" | "failed", + "findings": "Key discoveries and generation notes (max 500 chars)", + "files_produced": "semicolon-separated paths of produced files relative to skill root", + "error": "" +} +``` + +--- + +## Quality Requirements + +All agents must verify before reporting complete: + +| Requirement | Criteria | +|-------------|----------| +| Files produced | Verify all claimed files exist via Read | +| teamConfig adherence | Generated content matches teamConfig specifications | +| Pattern fidelity | Generated files follow existing Codex skill patterns | +| Discovery sharing | At least 1 discovery shared to board | +| Error reporting | Non-empty error field if status is failed | +| YAML frontmatter | Role files must have valid frontmatter for agent parsing | + +--- + +## Placeholder Reference + +| Placeholder | Resolved By | When | +|-------------|------------|------| +| `` | Skill designer (Phase 1) | Literal path baked into instruction | +| `{id}` | spawn_agents_on_csv | Runtime from CSV row | +| `{title}` | spawn_agents_on_csv | Runtime from CSV row | +| `{description}` | spawn_agents_on_csv | Runtime from CSV row | +| `{role}` | spawn_agents_on_csv | Runtime from CSV row | +| `{file_target}` | spawn_agents_on_csv | Runtime from CSV row | +| `{gen_type}` | spawn_agents_on_csv | Runtime from CSV row | +| `{prev_context}` | spawn_agents_on_csv | Runtime from CSV row | diff --git a/.codex/skills/team-designer/schemas/tasks-schema.md b/.codex/skills/team-designer/schemas/tasks-schema.md new file mode 100644 index 00000000..c8e3cee3 --- /dev/null +++ b/.codex/skills/team-designer/schemas/tasks-schema.md @@ -0,0 +1,180 @@ +# Team Skill Designer -- CSV Schema + +## Master CSV: tasks.csv + +### Column Definitions + +#### Input Columns (Set by Decomposer) + +| Column | Type | Required | Description | Example | +|--------|------|----------|-------------|---------| +| `id` | string | Yes | Unique task identifier | `"SCAFFOLD-001"` | +| `title` | string | Yes | Short task title | `"Create directory structure"` | +| `description` | string | Yes | Detailed generation instructions (self-contained) | `"Create roles/, specs/, templates/ directories..."` | +| `role` | string | Yes | Generator role name | `"scaffolder"` | +| `file_target` | string | Yes | Target file/directory path relative to skill root | `"roles/coordinator/role.md"` | +| `gen_type` | enum | Yes | `directory`, `router`, `role-bundle`, `role-inline`, `spec`, `template`, `validation` | `"role-inline"` | +| `deps` | string | No | Semicolon-separated dependency task IDs | `"SCAFFOLD-001;SPEC-001"` | +| `context_from` | string | No | Semicolon-separated task IDs for context | `"SPEC-001"` | +| `exec_mode` | enum | Yes | Execution mechanism: `csv-wave` or `interactive` | `"csv-wave"` | + +#### Computed Columns (Set by Wave Engine) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `wave` | integer | Wave number (1-based, from topological sort) | `2` | +| `prev_context` | string | Aggregated findings from context_from tasks (per-wave CSV only) | `"[SCAFFOLD-001] Created directory structure at .codex/skills/..."` | + +#### Output Columns (Set by Agent) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `status` | enum | `pending` -> `completed` / `failed` / `skipped` | `"completed"` | +| `findings` | string | Key discoveries (max 500 chars) | `"Generated coordinator with 3 commands: analyze, dispatch, monitor"` | +| `files_produced` | string | Semicolon-separated paths of produced files | `"roles/coordinator/role.md;roles/coordinator/commands/analyze.md"` | +| `error` | string | Error message if failed | `""` | + +--- + +### exec_mode Values + +| Value | Mechanism | Description | +|-------|-----------|-------------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot batch execution within wave | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round individual execution | + +Interactive tasks appear in master CSV for dependency tracking but are NOT included in wave-{N}.csv files. + +--- + +### Generator Roles + +| Role | gen_type Values | Description | +|------|-----------------|-------------| +| `scaffolder` | `directory` | Creates directory structures | +| `router-writer` | `router` | Generates SKILL.md orchestrator files | +| `role-writer` | `role-bundle`, `role-inline` | Generates role.md files (+ optional commands/) | +| `spec-writer` | `spec` | Generates specs/*.md files | +| `template-writer` | `template` | Generates templates/*.md files | +| `validator` | `validation` | Validates generated skill package | + +--- + +### gen_type Values + +| gen_type | Target | Description | +|----------|--------|-------------| +| `directory` | Directory path | Create directory structure with subdirectories | +| `router` | SKILL.md | Generate main orchestrator SKILL.md with frontmatter, role registry, router | +| `role-bundle` | Directory path | Generate role.md + commands/ folder with multiple command files | +| `role-inline` | Single .md file | Generate single role.md with inline Phase 2-4 logic | +| `spec` | Single .md file | Generate spec file (pipelines, quality-gates, etc.) | +| `template` | Single .md file | Generate document template file | +| `validation` | Report | Validate complete skill package structure and references | + +--- + +### Example Data + +```csv +id,title,description,role,file_target,gen_type,deps,context_from,exec_mode,wave,status,findings,files_produced,error +"SCAFFOLD-001","Create directory structure","Create complete directory structure for team-code-review skill:\n- .codex/skills/team-code-review/\n- roles/coordinator/ + commands/\n- roles/analyst/\n- roles/reviewer/\n- specs/\n- templates/","scaffolder","skill-dir","directory","","","csv-wave","1","pending","","","" +"ROUTER-001","Generate SKILL.md","Generate .codex/skills/team-code-review/SKILL.md with:\n- Frontmatter (name, description, allowed-tools)\n- Architecture diagram\n- Role registry table\n- CSV schema reference\n- Session structure\n- Wave execution engine\nUse teamConfig.json for role list and pipeline definitions","router-writer","SKILL.md","router","SCAFFOLD-001","SCAFFOLD-001","csv-wave","2","pending","","","" +"SPEC-001","Generate pipelines spec","Generate specs/pipelines.md with:\n- Pipeline definitions from teamConfig\n- Task registry with PREFIX-NNN format\n- Conditional routing rules\n- Dynamic specialist injection\nRoles: analyst(ANALYSIS-*), reviewer(REVIEW-*)","spec-writer","specs/pipelines.md","spec","SCAFFOLD-001","SCAFFOLD-001","csv-wave","2","pending","","","" +"ROLE-001","Generate coordinator","Generate roles/coordinator/role.md with entry router and commands/analyze.md, commands/dispatch.md, commands/monitor.md. Coordinator orchestrates the analysis pipeline","role-writer","roles/coordinator/","role-bundle","SCAFFOLD-001;SPEC-001","SPEC-001","csv-wave","3","pending","","","" +"ROLE-002","Generate analyst role","Generate roles/analyst/role.md with Phase 2 (context loading), Phase 3 (analysis execution), Phase 4 (output). Prefix: ANALYSIS, inner_loop: false","role-writer","roles/analyst/role.md","role-inline","SCAFFOLD-001;SPEC-001","SPEC-001","csv-wave","3","pending","","","" +"ROLE-003","Generate reviewer role","Generate roles/reviewer/role.md with Phase 2 (load artifacts), Phase 3 (review execution), Phase 4 (report). Prefix: REVIEW, inner_loop: false","role-writer","roles/reviewer/role.md","role-inline","SCAFFOLD-001;SPEC-001","SPEC-001","csv-wave","3","pending","","","" +``` + +--- + +### Column Lifecycle + +``` +Decomposer (Phase 1) Wave Engine (Phase 2) Agent (Execution) +--------------------- -------------------- ----------------- +id ----------> id ----------> id +title ----------> title ----------> (reads) +description ----------> description ----------> (reads) +role ----------> role ----------> (reads) +file_target ----------> file_target ----------> (reads) +gen_type ----------> gen_type ----------> (reads) +deps ----------> deps ----------> (reads) +context_from----------> context_from----------> (reads) +exec_mode ----------> exec_mode ----------> (reads) + wave ----------> (reads) + prev_context ----------> (reads) + status + findings + files_produced + error +``` + +--- + +## Output Schema (JSON) + +Agent output via `report_agent_job_result` (csv-wave tasks): + +```json +{ + "id": "ROLE-001", + "status": "completed", + "findings": "Generated coordinator role with entry router, 3 commands (analyze, dispatch, monitor), beat model in monitor.md only", + "files_produced": "roles/coordinator/role.md;roles/coordinator/commands/analyze.md;roles/coordinator/commands/dispatch.md;roles/coordinator/commands/monitor.md", + "error": "" +} +``` + +Interactive tasks output via structured text or JSON written to `interactive/{id}-result.json`. + +--- + +## Discovery Types + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `dir_created` | `data.path` | `{path, description}` | Directory structure created | +| `file_generated` | `data.file` | `{file, gen_type, sections}` | File generated with sections | +| `pattern_found` | `data.pattern_name` | `{pattern_name, description}` | Design pattern from golden sample | +| `config_decision` | `data.decision` | `{decision, rationale, impact}` | Config decision made | +| `validation_result` | `data.check` | `{check, passed, message}` | Validation check result | +| `reference_found` | `data.source+data.target` | `{source, target, type}` | Cross-reference between files | + +### Discovery NDJSON Format + +```jsonl +{"ts":"2026-03-08T10:00:00Z","worker":"SCAFFOLD-001","type":"dir_created","data":{"path":".codex/skills/team-code-review/roles/","description":"Created roles directory with coordinator, analyst, reviewer subdirs"}} +{"ts":"2026-03-08T10:05:00Z","worker":"ROLE-001","type":"file_generated","data":{"file":"roles/coordinator/role.md","gen_type":"role-bundle","sections":["entry-router","phase-0","phase-1","phase-2","phase-3"]}} +{"ts":"2026-03-08T10:10:00Z","worker":"SPEC-001","type":"config_decision","data":{"decision":"full-lifecycle pipeline","rationale":"Both analyst and reviewer roles present","impact":"4-tier dependency graph"}} +``` + +> Both csv-wave and interactive agents read/write the same discoveries.ndjson file. + +--- + +## Cross-Mechanism Context Flow + +| Source | Target | Mechanism | +|--------|--------|-----------| +| CSV task findings | Interactive task | Injected via spawn message or send_input | +| Interactive task result | CSV task prev_context | Read from interactive/{id}-result.json | +| Any agent discovery | Any agent | Shared via discoveries.ndjson | + +--- + +## Validation Rules + +| Rule | Check | Error | +|------|-------|-------| +| Unique IDs | No duplicate `id` values | "Duplicate task ID: {id}" | +| Valid deps | All dep IDs exist in tasks | "Unknown dependency: {dep_id}" | +| No self-deps | Task cannot depend on itself | "Self-dependency: {id}" | +| No circular deps | Topological sort completes | "Circular dependency detected involving: {ids}" | +| context_from valid | All context IDs exist and in earlier waves | "Invalid context_from: {id}" | +| exec_mode valid | Value is `csv-wave` or `interactive` | "Invalid exec_mode: {value}" | +| Description non-empty | Every task has description | "Empty description for task: {id}" | +| Status enum | status in {pending, completed, failed, skipped} | "Invalid status: {status}" | +| gen_type valid | Value in {directory, router, role-bundle, role-inline, spec, template, validation} | "Invalid gen_type: {value}" | +| file_target valid | Path is relative and uses forward slashes | "Invalid file_target: {path}" | +| Cross-mechanism deps | Interactive to CSV deps resolve correctly | "Cross-mechanism dependency unresolvable: {id}" | diff --git a/.codex/skills/team-edict/SKILL.md b/.codex/skills/team-edict/SKILL.md new file mode 100644 index 00000000..64291736 --- /dev/null +++ b/.codex/skills/team-edict/SKILL.md @@ -0,0 +1,742 @@ +--- +name: team-edict +description: | + 三省六部 multi-agent collaboration framework. Imperial edict workflow: + Crown Prince receives edict -> Zhongshu (Planning) -> Menxia (Multi-dimensional Review) -> + Shangshu (Dispatch) -> Six Ministries parallel execution. + Mandatory kanban state reporting, Blocked as first-class state, full observability. +argument-hint: "[-y|--yes] [-c|--concurrency N] [--continue] \"task description / edict\"" +allowed-tools: spawn_agents_on_csv, spawn_agent, wait, send_input, close_agent, Read, Write, Edit, Bash, Glob, Grep, AskUserQuestion +--- + +## Auto Mode + +When `--yes` or `-y`: Auto-confirm task decomposition, skip interactive validation, use defaults. + +# Team Edict -- Three Departments Six Ministries + +## Usage + +```bash +$team-edict "Implement user authentication module with JWT tokens" +$team-edict -c 4 "Refactor the data pipeline for better performance" +$team-edict -y "Add comprehensive test coverage for auth module" +$team-edict --continue "EDT-20260308-143022" +``` + +**Flags**: +- `-y, --yes`: Skip all confirmations (auto mode) +- `-c, --concurrency N`: Max concurrent agents within each wave (default: 4) +- `--continue`: Resume existing session + +**Output Directory**: `.workflow/.csv-wave/{session-id}/` +**Core Output**: `tasks.csv` (master state) + `results.csv` (final) + `discoveries.ndjson` (shared exploration) + `context.md` (human-readable report) + +--- + +## Overview + +Imperial edict-inspired multi-agent collaboration framework with **strict cascading approval pipeline** and **parallel ministry execution**. The Three Departments (zhongshu/menxia/shangshu) perform serial planning, review, and dispatch. The Six Ministries (gongbu/bingbu/hubu/libu/libu-hr/xingbu) execute tasks in dependency-ordered waves. + +**Execution Model**: Hybrid -- CSV wave pipeline (primary) + individual agent spawn (secondary) + +``` ++-------------------------------------------------------------------------+ +| TEAM EDICT WORKFLOW | ++-------------------------------------------------------------------------+ +| | +| Phase 0: Pre-Wave Interactive (Three Departments Serial Pipeline) | +| +-- Stage 1: Zhongshu (Planning) -- drafts execution plan | +| +-- Stage 2: Menxia (Review) -- multi-dimensional review | +| | +-- Reject -> loop back to Zhongshu (max 3 rounds) | +| +-- Stage 3: Shangshu (Dispatch) -- routes to Six Ministries | +| +-- Output: tasks.csv with ministry assignments + dependency waves | +| | +| Phase 1: Requirement -> CSV + Classification | +| +-- Parse Shangshu dispatch plan into tasks.csv | +| +-- Classify tasks: csv-wave (ministry work) | interactive (QA loop) | +| +-- Compute dependency waves (topological sort) | +| +-- Generate tasks.csv with wave + exec_mode columns | +| +-- User validates task breakdown (skip if -y) | +| | +| Phase 2: Wave Execution Engine (Extended) | +| +-- For each wave (1..N): | +| | +-- Build wave CSV (filter csv-wave tasks for this wave) | +| | +-- Inject previous findings into prev_context column | +| | +-- spawn_agents_on_csv(wave CSV) | +| | +-- Execute post-wave interactive tasks (if any) | +| | +-- Merge all results into master tasks.csv | +| | +-- Check: any failed? -> skip dependents | +| +-- discoveries.ndjson shared across all modes (append-only) | +| | +| Phase 3: Post-Wave Interactive (Quality Aggregation) | +| +-- Aggregation Agent: collects all ministry outputs | +| +-- Generates final edict completion report | +| +-- Quality gate validation against specs/quality-gates.md | +| | +| Phase 4: Results Aggregation | +| +-- Export final results.csv | +| +-- Generate context.md with all findings | +| +-- Display summary: completed/failed/skipped per wave | +| +-- Offer: view results | retry failed | done | +| | ++-------------------------------------------------------------------------+ +``` + +--- + +## Task Classification Rules + +Each task is classified by `exec_mode`: + +| exec_mode | Mechanism | Criteria | +|-----------|-----------|----------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot, structured I/O, no multi-round interaction | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round, clarification, inline utility | + +**Classification Decision**: + +| Task Property | Classification | +|---------------|---------------| +| Ministry implementation (IMPL/OPS/DATA/DOC/HR) | `csv-wave` | +| Quality assurance with test-fix loop (QA) | `interactive` | +| Single-department self-contained work | `csv-wave` | +| Cross-department coordination needed | `interactive` | +| Requires iterative feedback (test -> fix -> retest) | `interactive` | +| Standalone analysis or generation | `csv-wave` | + +--- + +## CSV Schema + +### tasks.csv (Master State) + +```csv +id,title,description,deps,context_from,exec_mode,department,task_prefix,priority,dispatch_batch,acceptance_criteria,wave,status,findings,artifact_path,error +IMPL-001,"Implement JWT auth","Create JWT authentication middleware with token validation","","","csv-wave","gongbu","IMPL","P0","1","All auth endpoints return valid JWT tokens","1","pending","","","" +DOC-001,"Write API docs","Generate OpenAPI documentation for auth endpoints","IMPL-001","IMPL-001","csv-wave","libu","DOC","P1","2","API docs cover all auth endpoints","2","pending","","","" +QA-001,"Test auth module","Execute test suite and validate coverage >= 95%","IMPL-001","IMPL-001","interactive","xingbu","QA","P1","2","Test pass rate >= 95%, no Critical bugs","2","pending","","","" +``` + +**Columns**: + +| Column | Phase | Description | +|--------|-------|-------------| +| `id` | Input | Unique task identifier (DEPT-NNN format) | +| `title` | Input | Short task title | +| `description` | Input | Detailed task description (self-contained for agent execution) | +| `deps` | Input | Semicolon-separated dependency task IDs | +| `context_from` | Input | Semicolon-separated task IDs whose findings this task needs | +| `exec_mode` | Input | `csv-wave` or `interactive` | +| `department` | Input | Target ministry: gongbu/bingbu/hubu/libu/libu-hr/xingbu | +| `task_prefix` | Input | Task type prefix: IMPL/OPS/DATA/DOC/HR/QA | +| `priority` | Input | Priority level: P0 (highest) to P3 (lowest) | +| `dispatch_batch` | Input | Batch number from Shangshu dispatch plan (1-based) | +| `acceptance_criteria` | Input | Specific, measurable acceptance criteria from dispatch plan | +| `wave` | Computed | Wave number (computed by topological sort, 1-based) | +| `status` | Output | `pending` -> `completed` / `failed` / `skipped` | +| `findings` | Output | Key discoveries or implementation notes (max 500 chars) | +| `artifact_path` | Output | Path to output artifact file relative to session dir | +| `error` | Output | Error message if failed (empty if success) | + +### Per-Wave CSV (Temporary) + +Each wave generates a temporary `wave-{N}.csv` with extra `prev_context` column (csv-wave tasks only). + +--- + +## Agent Registry (Interactive Agents) + +| Agent | Role File | Pattern | Responsibility | Position | +|-------|-----------|---------|----------------|----------| +| zhongshu-planner | agents/zhongshu-planner.md | 2.3 (sequential pipeline) | Draft structured execution plan from edict requirements | standalone (Phase 0, Stage 1) | +| menxia-reviewer | agents/menxia-reviewer.md | 2.4 (multi-perspective analysis) | Multi-dimensional review with 4 CLI analyses | standalone (Phase 0, Stage 2) | +| shangshu-dispatcher | agents/shangshu-dispatcher.md | 2.3 (sequential pipeline) | Parse approved plan and generate ministry task assignments | standalone (Phase 0, Stage 3) | +| qa-verifier | agents/qa-verifier.md | 2.5 (iterative refinement) | Quality assurance with test-fix loop (max 3 rounds) | post-wave | +| aggregator | agents/aggregator.md | 2.3 (sequential pipeline) | Collect all ministry outputs and generate final report | standalone (Phase 3) | + +> **COMPACT PROTECTION**: Agent files are execution documents. When context compression occurs, **you MUST immediately `Read` the corresponding agent.md** to reload. + +--- + +## Output Artifacts + +| File | Purpose | Lifecycle | +|------|---------|-----------| +| `tasks.csv` | Master state -- all tasks with status/findings | Updated after each wave | +| `wave-{N}.csv` | Per-wave input (temporary, csv-wave tasks only) | Created before wave, deleted after | +| `results.csv` | Final export of all task results | Created in Phase 4 | +| `discoveries.ndjson` | Shared exploration board (all agents, both modes) | Append-only, carries across waves | +| `context.md` | Human-readable execution report | Created in Phase 4 | +| `plan/zhongshu-plan.md` | Zhongshu execution plan | Created in Phase 0 Stage 1 | +| `review/menxia-review.md` | Menxia review report with 4-dimensional analysis | Created in Phase 0 Stage 2 | +| `plan/dispatch-plan.md` | Shangshu dispatch plan with ministry assignments | Created in Phase 0 Stage 3 | +| `artifacts/{dept}-output.md` | Per-ministry output artifact | Created during wave execution | +| `interactive/{id}-result.json` | Results from interactive tasks (QA loops) | Created per interactive task | +| `agents/registry.json` | Active interactive agent tracking | Updated on spawn/close | + +--- + +## Session Structure + +``` +.workflow/.csv-wave/{session-id}/ ++-- tasks.csv # Master state (all tasks, both modes) ++-- results.csv # Final results export ++-- discoveries.ndjson # Shared discovery board (all agents) ++-- context.md # Human-readable report ++-- wave-{N}.csv # Temporary per-wave input (csv-wave only) ++-- plan/ +| +-- zhongshu-plan.md # Zhongshu execution plan +| +-- dispatch-plan.md # Shangshu dispatch plan ++-- review/ +| +-- menxia-review.md # Menxia review report ++-- artifacts/ +| +-- gongbu-output.md # Ministry outputs +| +-- bingbu-output.md +| +-- hubu-output.md +| +-- libu-output.md +| +-- libu-hr-output.md +| +-- xingbu-report.md ++-- interactive/ # Interactive task artifacts +| +-- {id}-result.json # Per-task results ++-- agents/ + +-- registry.json # Active interactive agent tracking +``` + +--- + +## Implementation + +### Session Initialization + +``` +1. Parse $ARGUMENTS for task description (the "edict") +2. Generate session ID: EDT-{slug}-{YYYYMMDD-HHmmss} +3. Create session directory: .workflow/.csv-wave/{session-id}/ +4. Create subdirectories: plan/, review/, artifacts/, interactive/, agents/ +5. Initialize registry.json: { "active": [], "closed": [] } +6. Initialize discoveries.ndjson (empty file) +7. Read specs: .codex/skills/team-edict/specs/team-config.json +8. Read quality gates: .codex/skills/team-edict/specs/quality-gates.md +9. Log session start to context.md +``` + +--- + +### Phase 0: Pre-Wave Interactive (Three Departments Serial Pipeline) + +**Objective**: Execute the serial approval pipeline (zhongshu -> menxia -> shangshu) to produce a validated, reviewed dispatch plan that decomposes the edict into ministry-level tasks. + +#### Stage 1: Zhongshu Planning + +```javascript +const zhongshu = spawn_agent({ + message: ` +## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS (Agent Execute) +1. **Read role definition**: .codex/skills/team-edict/agents/zhongshu-planner.md (MUST read first) +2. Read: ${sessionDir}/discoveries.ndjson (shared discoveries, skip if not exists) +3. Read: .codex/skills/team-edict/specs/team-config.json (routing rules) + +--- + +Goal: Draft a structured execution plan for the following edict +Scope: Analyze codebase, decompose into ministry-level subtasks, define acceptance criteria +Deliverables: ${sessionDir}/plan/zhongshu-plan.md + +### Edict (Original Requirement) +${edictText} +` +}) + +const zhongshuResult = wait({ ids: [zhongshu], timeout_ms: 600000 }) + +if (zhongshuResult.timed_out) { + send_input({ id: zhongshu, message: "Please finalize your execution plan immediately and output current findings." }) + const retry = wait({ ids: [zhongshu], timeout_ms: 120000 }) +} + +// Store result +Write(`${sessionDir}/interactive/zhongshu-result.json`, JSON.stringify({ + task_id: "PLAN-001", + status: "completed", + findings: parseFindings(zhongshuResult), + timestamp: new Date().toISOString() +})) + +close_agent({ id: zhongshu }) +``` + +#### Stage 2: Menxia Multi-Dimensional Review + +**Rejection Loop**: If menxia rejects (approved=false), respawn zhongshu with feedback. Max 3 rounds. + +```javascript +let reviewRound = 0 +let approved = false + +while (!approved && reviewRound < 3) { + reviewRound++ + + const menxia = spawn_agent({ + message: ` +## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS (Agent Execute) +1. **Read role definition**: .codex/skills/team-edict/agents/menxia-reviewer.md (MUST read first) +2. Read: ${sessionDir}/plan/zhongshu-plan.md (plan to review) +3. Read: ${sessionDir}/discoveries.ndjson (shared discoveries) + +--- + +Goal: Multi-dimensional review of Zhongshu plan (Round ${reviewRound}/3) +Scope: Feasibility, completeness, risk, resource allocation +Deliverables: ${sessionDir}/review/menxia-review.md + +### Original Edict +${edictText} + +### Previous Review (if rejection round > 1) +${reviewRound > 1 ? readPreviousReview() : "First review round"} +` + }) + + const menxiaResult = wait({ ids: [menxia], timeout_ms: 600000 }) + + if (menxiaResult.timed_out) { + send_input({ id: menxia, message: "Please finalize review and output verdict (approved/rejected)." }) + const retry = wait({ ids: [menxia], timeout_ms: 120000 }) + } + + close_agent({ id: menxia }) + + // Parse verdict from review report + const reviewReport = Read(`${sessionDir}/review/menxia-review.md`) + approved = reviewReport.includes("approved") || reviewReport.includes("approved: true") + + if (!approved && reviewRound < 3) { + // Respawn zhongshu with rejection feedback (Stage 1 again) + // ... spawn zhongshu with rejection_feedback = reviewReport ... + } +} + +if (!approved && reviewRound >= 3) { + // Max rounds reached, ask user + AskUserQuestion("Menxia rejected the plan 3 times. Please review and decide: approve, reject, or provide guidance.") +} +``` + +#### Stage 3: Shangshu Dispatch + +```javascript +const shangshu = spawn_agent({ + message: ` +## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS (Agent Execute) +1. **Read role definition**: .codex/skills/team-edict/agents/shangshu-dispatcher.md (MUST read first) +2. Read: ${sessionDir}/plan/zhongshu-plan.md (approved plan) +3. Read: ${sessionDir}/review/menxia-review.md (review conditions) +4. Read: .codex/skills/team-edict/specs/team-config.json (routing rules) + +--- + +Goal: Parse approved plan and generate Six Ministries dispatch plan +Scope: Route subtasks to departments, define execution batches, set dependencies +Deliverables: ${sessionDir}/plan/dispatch-plan.md +` +}) + +const shangshuResult = wait({ ids: [shangshu], timeout_ms: 300000 }) +close_agent({ id: shangshu }) + +// Parse dispatch-plan.md to generate tasks.csv (Phase 1 input) +``` + +**Success Criteria**: +- zhongshu-plan.md written with structured subtask list +- menxia-review.md written with 4-dimensional analysis verdict +- dispatch-plan.md written with ministry assignments and batch ordering +- Interactive agents closed, results stored + +--- + +### Phase 1: Requirement -> CSV + Classification + +**Objective**: Parse the Shangshu dispatch plan into a tasks.csv with proper wave computation and exec_mode classification. + +**Decomposition Rules**: + +1. Read `${sessionDir}/plan/dispatch-plan.md` +2. For each ministry task in the dispatch plan: + - Extract: task ID, title, description, department, priority, batch number, acceptance criteria + - Determine dependencies from the dispatch plan's batch ordering and explicit blockedBy + - Set `context_from` for tasks that need predecessor findings +3. Apply classification rules (see Task Classification Rules above) +4. Compute waves via topological sort (Kahn's BFS with depth tracking) +5. Generate `tasks.csv` with all columns + +**Classification Rules**: + +| Department | Default exec_mode | Override Condition | +|------------|-------------------|-------------------| +| gongbu (IMPL) | csv-wave | Interactive if requires iterative codebase exploration | +| bingbu (OPS) | csv-wave | - | +| hubu (DATA) | csv-wave | - | +| libu (DOC) | csv-wave | - | +| libu-hr (HR) | csv-wave | - | +| xingbu (QA) | interactive | Always interactive (test-fix loop) | + +**Wave Computation**: Kahn's BFS topological sort with depth tracking (csv-wave tasks only). + +**User Validation**: Display task breakdown with wave + exec_mode assignment (skip if AUTO_YES). + +**Success Criteria**: +- tasks.csv created with valid schema, wave, and exec_mode assignments +- No circular dependencies +- User approved (or AUTO_YES) + +--- + +### Phase 2: Wave Execution Engine (Extended) + +**Objective**: Execute tasks wave-by-wave with hybrid mechanism support and cross-wave context propagation. + +``` +For each wave W in 1..max_wave: + + 1. FILTER csv-wave tasks where wave == W and status == "pending" + 2. CHECK dependencies: if any dep has status == "failed", mark task as "skipped" + 3. BUILD prev_context for each task from context_from references: + - For csv-wave predecessors: read findings from master tasks.csv + - For interactive predecessors: read from interactive/{id}-result.json + 4. GENERATE wave-{W}.csv with prev_context column added + 5. EXECUTE csv-wave tasks: + spawn_agents_on_csv({ + task_csv_path: "${sessionDir}/wave-{W}.csv", + instruction_path: ".codex/skills/team-edict/instructions/agent-instruction.md", + schema_path: ".codex/skills/team-edict/schemas/tasks-schema.md", + additional_instructions: "Session directory: ${sessionDir}. Department: {department}. Priority: {priority}.", + concurrency: CONCURRENCY + }) + 6. MERGE results back into master tasks.csv (update status, findings, artifact_path, error) + 7. EXECUTE interactive tasks for this wave (post-wave): + For each interactive task in wave W: + Read agents/qa-verifier.md + Spawn QA verifier agent with task context + wave results + Handle test-fix loop via send_input + Store result in interactive/{id}-result.json + Close agent, update registry.json + 8. CLEANUP: delete wave-{W}.csv + 9. LOG wave completion to context.md and discoveries.ndjson + + Wave completion check: + - All tasks completed or skipped -> proceed to next wave + - Any failed non-skippable task -> log error, continue (dependents will be skipped) +``` + +**Success Criteria**: +- All waves executed in order +- Both csv-wave and interactive tasks handled per wave +- Each wave's results merged into master CSV before next wave starts +- Dependent tasks skipped when predecessor failed +- discoveries.ndjson accumulated across all waves and mechanisms +- Interactive agent lifecycle tracked in registry.json + +--- + +### Phase 3: Post-Wave Interactive (Quality Aggregation) + +**Objective**: Collect all ministry outputs, validate against quality gates, and generate the final edict completion report. + +```javascript +const aggregator = spawn_agent({ + message: ` +## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS (Agent Execute) +1. **Read role definition**: .codex/skills/team-edict/agents/aggregator.md (MUST read first) +2. Read: ${sessionDir}/tasks.csv (master state) +3. Read: ${sessionDir}/discoveries.ndjson (all discoveries) +4. Read: .codex/skills/team-edict/specs/quality-gates.md (quality standards) + +--- + +Goal: Aggregate all ministry outputs into final edict completion report +Scope: All artifacts in ${sessionDir}/artifacts/, all interactive results +Deliverables: ${sessionDir}/context.md (final report) + +### Ministry Artifacts to Collect +${listAllArtifacts()} + +### Quality Gate Standards +Read from: .codex/skills/team-edict/specs/quality-gates.md +` +}) + +const aggResult = wait({ ids: [aggregator], timeout_ms: 300000 }) +close_agent({ id: aggregator }) +``` + +**Success Criteria**: +- Post-wave interactive processing complete +- Interactive agents closed, results stored + +--- + +### Phase 4: Results Aggregation + +**Objective**: Generate final results and human-readable report. + +``` +1. READ master tasks.csv +2. EXPORT results.csv with final status for all tasks +3. GENERATE context.md (if not already done by aggregator): + - Edict summary + - Pipeline stages: Planning -> Review -> Dispatch -> Execution + - Per-department output summaries + - Quality gate results + - Discoveries summary +4. DISPLAY summary to user: + - Total tasks: N (completed: X, failed: Y, skipped: Z) + - Per-wave breakdown + - Key findings +5. CLEANUP: + - Close any remaining interactive agents (registry.json) + - Remove temporary wave CSV files +6. OFFER: view full report | retry failed tasks | done +``` + +**Success Criteria**: +- results.csv exported (all tasks, both modes) +- context.md generated +- All interactive agents closed (registry.json cleanup) +- Summary displayed to user + +--- + +## Shared Discovery Board Protocol + +All agents (both csv-wave and interactive) share a single `discoveries.ndjson` file for cross-agent knowledge propagation. + +### Discovery Types + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `codebase_pattern` | `pattern_name` | `{pattern_name, files, description}` | Identified codebase patterns and conventions | +| `dependency_found` | `dep_name` | `{dep_name, version, used_by}` | External dependency discoveries | +| `risk_identified` | `risk_id` | `{risk_id, severity, description, mitigation}` | Risk findings from any agent | +| `implementation_note` | `file_path` | `{file_path, note, line_range}` | Implementation decisions and notes | +| `test_result` | `test_suite` | `{test_suite, pass_rate, failures}` | Test execution results | +| `quality_issue` | `issue_id` | `{issue_id, severity, file, description}` | Quality issues found during review | +| `routing_note` | `task_id` | `{task_id, department, reason}` | Dispatch routing decisions | + +### Protocol + +```bash +# Append discovery (any agent, any mode) +echo '{"ts":"","worker":"{id}","type":"","data":{...}}' >> ${sessionDir}/discoveries.ndjson + +# Read discoveries (any agent, any mode) +# Read ${sessionDir}/discoveries.ndjson, parse each line as JSON +# Deduplicate by type + dedup_key +``` + +### Rules +- **Append-only**: Never modify or delete existing entries +- **Deduplicate on read**: When reading, use type + dedup_key to skip duplicates +- **Both mechanisms share**: csv-wave agents and interactive agents use the same file +- **Carry across waves**: Discoveries persist across all waves + +--- + +## Six Ministries Routing Rules + +Shangshu dispatcher uses these rules to assign tasks to ministries: + +| Keyword Signals | Target Ministry | Role ID | Task Prefix | +|----------------|-----------------|---------|-------------| +| Feature dev, architecture, code, refactor, implement, API | Engineering | gongbu | IMPL | +| Deploy, CI/CD, infrastructure, container, monitoring, security ops | Operations | bingbu | OPS | +| Data analysis, statistics, cost, reports, resource mgmt | Data & Resources | hubu | DATA | +| Documentation, README, UI copy, specs, API docs, comms | Documentation | libu | DOC | +| Testing, QA, bug, code review, compliance audit | Quality Assurance | xingbu | QA | +| Agent management, training, skill optimization, evaluation | Personnel | libu-hr | HR | + +--- + +## Kanban State Protocol + +All agents must report state transitions. In Codex context, agents write state to discoveries.ndjson: + +### State Machine + +``` +Pending -> Doing -> Done + | + Blocked (can enter at any time, must report reason) +``` + +### State Reporting via Discoveries + +```bash +# Task start +echo '{"ts":"","worker":"{id}","type":"state_update","data":{"state":"Doing","task_id":"{id}","department":"{department}","step":"Starting execution"}}' >> ${sessionDir}/discoveries.ndjson + +# Progress update +echo '{"ts":"","worker":"{id}","type":"progress","data":{"task_id":"{id}","current":"Step 2: Implementing API","plan":"Step1 done|Step2 in progress|Step3 pending"}}' >> ${sessionDir}/discoveries.ndjson + +# Completion +echo '{"ts":"","worker":"{id}","type":"state_update","data":{"state":"Done","task_id":"{id}","remark":"Completed: implementation summary"}}' >> ${sessionDir}/discoveries.ndjson + +# Blocked +echo '{"ts":"","worker":"{id}","type":"state_update","data":{"state":"Blocked","task_id":"{id}","reason":"Cannot proceed: missing dependency"}}' >> ${sessionDir}/discoveries.ndjson +``` + +--- + +## Interactive Task Execution + +For interactive tasks within a wave (primarily QA test-fix loops): + +**Spawn Protocol**: + +```javascript +const agent = spawn_agent({ + message: ` +## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS (Agent Execute) +1. **Read role definition**: .codex/skills/team-edict/agents/qa-verifier.md (MUST read first) +2. Read: ${sessionDir}/discoveries.ndjson (shared discoveries) +3. Read: .codex/skills/team-edict/specs/quality-gates.md (quality standards) + +--- + +Goal: Execute QA verification for task ${taskId} +Scope: ${taskDescription} +Deliverables: Test report + pass/fail verdict + +### Previous Context +${prevContextFromCompletedTasks} + +### Acceptance Criteria +${acceptanceCriteria} +` +}) +``` + +**Wait + Process**: + +```javascript +const result = wait({ ids: [agent], timeout_ms: 600000 }) + +if (result.timed_out) { + send_input({ id: agent, message: "Please finalize and output current findings." }) + const retry = wait({ ids: [agent], timeout_ms: 120000 }) +} + +// Store result +Write(`${sessionDir}/interactive/${taskId}-result.json`, JSON.stringify({ + task_id: taskId, + status: "completed", + findings: parseFindings(result), + timestamp: new Date().toISOString() +})) +``` + +**Lifecycle Tracking**: + +```javascript +// On spawn: register +registry.active.push({ id: agent, task_id: taskId, pattern: "qa-verifier", spawned_at: now }) + +// On close: move to closed +close_agent({ id: agent }) +registry.active = registry.active.filter(a => a.id !== agent) +registry.closed.push({ id: agent, task_id: taskId, closed_at: now }) +``` + +--- + +## Cross-Mechanism Context Bridging + +### Interactive Result -> CSV Task + +When a pre-wave interactive task produces results needed by csv-wave tasks: + +```javascript +// 1. Interactive result stored in file +const resultFile = `${sessionDir}/interactive/${taskId}-result.json` + +// 2. Wave engine reads when building prev_context for csv-wave tasks +// If a csv-wave task has context_from referencing an interactive task: +// Read the interactive result file and include in prev_context +``` + +### CSV Result -> Interactive Task + +When a post-wave interactive task needs CSV wave results: + +```javascript +// Include in spawn message +const csvFindings = readMasterCSV().filter(t => t.wave === currentWave && t.exec_mode === 'csv-wave') +const context = csvFindings.map(t => `## Task ${t.id}: ${t.title}\n${t.findings}`).join('\n\n') + +spawn_agent({ + message: `...\n### Wave ${currentWave} Results\n${context}\n...` +}) +``` + +--- + +## Error Handling + +| Error | Resolution | +|-------|------------| +| Circular dependency | Detect in wave computation, abort with error message | +| CSV agent timeout | Mark as failed in results, continue with wave | +| CSV agent failed | Mark as failed, skip dependent tasks in later waves | +| Interactive agent timeout | Urge convergence via send_input, then close if still timed out | +| Interactive agent failed | Mark as failed, skip dependents | +| Pre-wave interactive failed | Skip dependent csv-wave tasks in same wave | +| All agents in wave failed | Log error, offer retry or abort | +| CSV parse error | Validate CSV format before execution, show line number | +| discoveries.ndjson corrupt | Ignore malformed lines, continue with valid entries | +| Lifecycle leak | Cleanup all active agents via registry.json at end | +| Continue mode: no session found | List available sessions, prompt user to select | +| Menxia rejection loop >= 3 rounds | AskUserQuestion for user decision | +| Zhongshu plan file missing | Abort Phase 0, report error | +| Shangshu dispatch plan parse failure | Abort, ask user to review dispatch-plan.md | +| Ministry artifact not written | Mark task as failed, include in QA report | +| Test-fix loop exceeds 3 rounds | Mark QA as failed, report to aggregator | + +--- + +## Specs Reference + +| File | Content | Used By | +|------|---------|---------| +| [specs/team-config.json](specs/team-config.json) | Role registry, routing rules, pipeline definition, session structure, artifact paths | Orchestrator (session init), Shangshu (routing), all agents (artifact paths) | +| [specs/quality-gates.md](specs/quality-gates.md) | Per-phase quality gate standards, cross-phase consistency checks | Aggregator (Phase 3), QA verifier (test validation) | + +--- + +## Core Rules + +1. **Start Immediately**: First action is session initialization, then Phase 0 +2. **Wave Order is Sacred**: Never execute wave N before wave N-1 completes and results are merged +3. **CSV is Source of Truth**: Master tasks.csv holds all state (both csv-wave and interactive) +4. **CSV First**: Default to csv-wave for tasks; only use interactive when interaction pattern requires it +5. **Context Propagation**: prev_context built from master CSV, not from memory +6. **Discovery Board is Append-Only**: Never clear, modify, or recreate discoveries.ndjson -- both mechanisms share it +7. **Skip on Failure**: If a dependency failed, skip the dependent task (regardless of mechanism) +8. **Lifecycle Balance**: Every spawn_agent MUST have a matching close_agent (tracked in registry.json) +9. **Cleanup Temp Files**: Remove wave-{N}.csv after results are merged +10. **DO NOT STOP**: Continuous execution until all waves complete or all remaining tasks are skipped +11. **Three Departments are Serial**: Zhongshu -> Menxia -> Shangshu must execute in strict order +12. **Rejection Loop Max 3**: Menxia can reject max 3 times before escalating to user +13. **Kanban is Mandatory**: All agents must report state transitions via discoveries.ndjson +14. **Quality Gates Apply**: Phase 3 aggregator validates all outputs against specs/quality-gates.md diff --git a/.codex/skills/team-edict/agents/aggregator.md b/.codex/skills/team-edict/agents/aggregator.md new file mode 100644 index 00000000..690a9e4c --- /dev/null +++ b/.codex/skills/team-edict/agents/aggregator.md @@ -0,0 +1,246 @@ +# Aggregator Agent + +Post-wave aggregation agent -- collects all ministry outputs, validates against quality gates, and generates the final edict completion report. + +## Identity + +- **Type**: `interactive` +- **Role**: aggregator (Final Report Generator) +- **Responsibility**: Collect all ministry artifacts, validate quality gates, generate final completion report + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Read ALL ministry artifacts from the session artifacts directory +- Read the master tasks.csv for completion status +- Read quality-gates.md and validate each phase +- Read all discoveries from discoveries.ndjson +- Generate a comprehensive final report (context.md) +- Include per-department output summaries +- Include quality gate validation results +- Highlight any failures, skipped tasks, or open issues + +### MUST NOT + +- Skip reading any existing artifact +- Ignore failed or skipped tasks in the report +- Modify any ministry artifacts +- Skip quality gate validation + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | file | Read artifacts, tasks.csv, specs, discoveries | +| `Write` | file | Write final context.md report | +| `Glob` | search | Find all artifact files | +| `Bash` | exec | Parse CSV, count stats | + +--- + +## Execution + +### Phase 1: Artifact Collection + +**Objective**: Gather all ministry outputs and task status + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| tasks.csv | Yes | Master state with all task statuses | +| artifacts/ directory | Yes | All ministry output files | +| interactive/ directory | No | Interactive task results (QA) | +| discoveries.ndjson | Yes | All shared discoveries | +| quality-gates.md | Yes | Quality standards | + +**Steps**: + +1. Read `/tasks.csv` and parse all task records +2. Use Glob to find all files in `/artifacts/` +3. Read each artifact file +4. Use Glob to find all files in `/interactive/` +5. Read each interactive result file +6. Read `/discoveries.ndjson` (all entries) +7. Read `.codex/skills/team-edict/specs/quality-gates.md` + +**Output**: All artifacts and status data collected + +--- + +### Phase 2: Quality Gate Validation + +**Objective**: Validate each phase against quality gate standards + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Collected artifacts | Yes | From Phase 1 | +| quality-gates.md | Yes | Quality standards | + +**Steps**: + +1. Validate Phase 0 (Three Departments): + - zhongshu-plan.md exists and has required sections + - menxia-review.md exists with clear verdict + - dispatch-plan.md exists with ministry assignments +2. Validate Phase 2 (Ministry Execution): + - Each department's artifact file exists + - Acceptance criteria verified (from tasks.csv findings) + - State reporting present in discoveries.ndjson +3. Validate QA results (if xingbu report exists): + - Test pass rate meets threshold (>= 95%) + - No unresolved Critical issues + - Code review completed +4. Score each quality gate: + | Score | Status | Action | + |-------|--------|--------| + | >= 80% | PASS | No action needed | + | 60-79% | WARNING | Log warning in report | + | < 60% | FAIL | Highlight in report | + +**Output**: Quality gate validation results + +--- + +### Phase 3: Report Generation + +**Objective**: Generate comprehensive final report + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Task data | Yes | From Phase 1 | +| Quality gate results | Yes | From Phase 2 | + +**Steps**: + +1. Compute summary statistics: + - Total tasks, completed, failed, skipped + - Per-wave breakdown + - Per-department breakdown +2. Extract key findings from discoveries.ndjson +3. Compile per-department summaries from artifacts +4. Generate context.md following template +5. Write to `/context.md` + +**Output**: context.md written + +--- + +## Final Report Template (context.md) + +```markdown +# Edict Completion Report + +## Edict Summary + + +## Pipeline Execution Summary +| Stage | Department | Status | Duration | +|-------|-----------|--------|----------| +| Planning | zhongshu | Completed | - | +| Review | menxia | Approved (Round N/3) | - | +| Dispatch | shangshu | Completed | - | +| Execution | Six Ministries | N/M completed | - | + +## Task Status Overview +- Total tasks: N +- Completed: X +- Failed: Y +- Skipped: Z + +### Per-Wave Breakdown +| Wave | Total | Completed | Failed | Skipped | +|------|-------|-----------|--------|---------| +| 1 | N | X | Y | Z | +| 2 | N | X | Y | Z | + +### Per-Department Breakdown +| Department | Tasks | Completed | Artifacts | +|------------|-------|-----------|-----------| +| gongbu | N | X | artifacts/gongbu-output.md | +| bingbu | N | X | artifacts/bingbu-output.md | +| hubu | N | X | artifacts/hubu-output.md | +| libu | N | X | artifacts/libu-output.md | +| libu-hr | N | X | artifacts/libu-hr-output.md | +| xingbu | N | X | artifacts/xingbu-report.md | + +## Department Output Summaries + +### gongbu (Engineering) + + +### bingbu (Operations) + + +### hubu (Data & Resources) + + +### libu (Documentation) + + +### libu-hr (Personnel) + + +### xingbu (Quality Assurance) + + +## Quality Gate Results +| Gate | Phase | Score | Status | +|------|-------|-------|--------| +| Planning quality | zhongshu | XX% | PASS/WARN/FAIL | +| Review thoroughness | menxia | XX% | PASS/WARN/FAIL | +| Dispatch completeness | shangshu | XX% | PASS/WARN/FAIL | +| Execution quality | ministries | XX% | PASS/WARN/FAIL | +| QA verification | xingbu | XX% | PASS/WARN/FAIL | + +## Key Discoveries + + +## Failures and Issues + + +## Open Items + +``` + +--- + +## Structured Output Template + +``` +## Summary +- Edict completion report generated: N/M tasks completed, quality gates: X PASS, Y WARN, Z FAIL + +## Findings +- Per-department completion rates +- Quality gate scores +- Key discoveries count + +## Deliverables +- File: /context.md + +## Open Questions +1. (any unresolved issues requiring user attention) +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| Artifact file missing for a department | Note as "Not produced" in report, mark quality gate as FAIL | +| tasks.csv parse error | Attempt line-by-line parsing, skip malformed rows | +| discoveries.ndjson has malformed lines | Skip malformed lines, continue with valid entries | +| Quality gate data insufficient | Score as "Insufficient data", mark WARNING | +| No QA report (xingbu not assigned) | Skip QA quality gate, note in report | diff --git a/.codex/skills/team-edict/agents/menxia-reviewer.md b/.codex/skills/team-edict/agents/menxia-reviewer.md new file mode 100644 index 00000000..aa04bad1 --- /dev/null +++ b/.codex/skills/team-edict/agents/menxia-reviewer.md @@ -0,0 +1,229 @@ +# Menxia Reviewer Agent + +Menxia (Chancellery / Review Department) -- performs multi-dimensional review of the Zhongshu plan from four perspectives: feasibility, completeness, risk, and resource allocation. Outputs approve/reject verdict. + +## Identity + +- **Type**: `interactive` +- **Role**: menxia (Chancellery / Multi-Dimensional Review) +- **Responsibility**: Four-dimensional parallel review, approve/reject verdict with detailed feedback + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Read the Zhongshu plan completely before starting review +- Analyze from ALL four dimensions (feasibility, completeness, risk, resource) +- Produce a clear verdict: approved or rejected +- If rejecting, provide specific, actionable feedback for each rejection point +- Write the review report to `/review/menxia-review.md` +- Report state transitions via discoveries.ndjson +- Apply weighted scoring: feasibility 30%, completeness 30%, risk 25%, resource 15% + +### MUST NOT + +- Approve a plan with unaddressed critical feasibility issues +- Reject without providing specific, actionable feedback +- Skip any of the four review dimensions +- Modify the Zhongshu plan (review only) +- Exceed the scope of review (no implementation suggestions beyond scope) + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | file | Read plan, specs, codebase files for verification | +| `Write` | file | Write review report to session directory | +| `Glob` | search | Find files to verify feasibility claims | +| `Grep` | search | Search codebase to validate technical assertions | +| `Bash` | exec | Run verification commands | + +--- + +## Execution + +### Phase 1: Plan Loading + +**Objective**: Load the Zhongshu plan and all review context + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| zhongshu-plan.md | Yes | Plan to review | +| Original edict | Yes | From spawn message | +| team-config.json | No | For routing rule validation | +| Previous review (if round > 1) | No | Previous rejection feedback | + +**Steps**: + +1. Read `/plan/zhongshu-plan.md` (the plan under review) +2. Parse edict text from spawn message for requirement cross-reference +3. Read `/discoveries.ndjson` for codebase pattern context +4. Report state "Doing": + ```bash + echo '{"ts":"","worker":"REVIEW-001","type":"state_update","data":{"state":"Doing","task_id":"REVIEW-001","department":"menxia","step":"Loading plan for review"}}' >> /discoveries.ndjson + ``` + +**Output**: Plan loaded, review context assembled + +--- + +### Phase 2: Four-Dimensional Analysis + +**Objective**: Evaluate the plan from four independent perspectives + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Loaded plan | Yes | From Phase 1 | +| Codebase | Yes | For feasibility verification | +| Original edict | Yes | For completeness check | + +**Steps**: + +#### Dimension 1: Feasibility Review (Weight: 30%) +1. Verify each technical path is achievable with current codebase +2. Check that required dependencies exist or can be added +3. Validate that proposed file structures make sense +4. Result: PASS / CONDITIONAL / FAIL + +#### Dimension 2: Completeness Review (Weight: 30%) +1. Cross-reference every requirement in the edict against subtask list +2. Identify any requirements not covered by subtasks +3. Check that acceptance criteria are measurable and cover all requirements +4. Result: COMPLETE / HAS GAPS + +#### Dimension 3: Risk Assessment (Weight: 25%) +1. Identify potential failure points in the plan +2. Check that each high-risk item has a mitigation strategy +3. Evaluate rollback feasibility +4. Result: ACCEPTABLE / HIGH RISK (unmitigated) + +#### Dimension 4: Resource Allocation (Weight: 15%) +1. Verify task-to-department mapping follows routing rules +2. Check workload balance across departments +3. Identify overloaded or idle departments +4. Result: BALANCED / NEEDS ADJUSTMENT + +For each dimension, record discoveries: +```bash +echo '{"ts":"","worker":"REVIEW-001","type":"quality_issue","data":{"issue_id":"MX-","severity":"","file":"plan/zhongshu-plan.md","description":""}}' >> /discoveries.ndjson +``` + +**Output**: Four-dimensional analysis results + +--- + +### Phase 3: Verdict Synthesis + +**Objective**: Combine dimension results into final verdict + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Dimension results | Yes | From Phase 2 | + +**Steps**: + +1. Apply scoring weights: + - Feasibility: 30% + - Completeness: 30% + - Risk: 25% + - Resource: 15% +2. Apply veto rules (immediate rejection): + - Feasibility = FAIL -> reject + - Completeness has critical gaps (core requirement uncovered) -> reject + - Risk has HIGH unmitigated items -> reject +3. Resource issues alone do not trigger rejection (conditional approval with notes) +4. Determine final verdict: approved or rejected +5. Write review report to `/review/menxia-review.md` + +**Output**: Review report with verdict + +--- + +## Review Report Template (menxia-review.md) + +```markdown +# Menxia Review Report + +## Review Verdict: [Approved / Rejected] +Round: N/3 + +## Four-Dimensional Analysis Summary +| Dimension | Weight | Result | Key Findings | +|-----------|--------|--------|-------------| +| Feasibility | 30% | PASS/CONDITIONAL/FAIL | | +| Completeness | 30% | COMPLETE/HAS GAPS | | +| Risk | 25% | ACCEPTABLE/HIGH RISK | | +| Resource | 15% | BALANCED/NEEDS ADJUSTMENT | | + +## Detailed Findings + +### Feasibility +- +- + +### Completeness +- +- + +### Risk +| Risk Item | Severity | Has Mitigation | Notes | +|-----------|----------|---------------|-------| +| | High/Med/Low | Yes/No | | + +### Resource Allocation +- +- + +## Rejection Feedback (if rejected) +1. : What must be changed and why +2. : What must be changed and why + +## Conditions (if conditionally approved) +- : What to watch during execution +- : Suggested adjustments +``` + +--- + +## Structured Output Template + +``` +## Summary +- Review completed: [Approved/Rejected] (Round N/3) + +## Findings +- Feasibility: [result] - [key finding] +- Completeness: [result] - [key finding] +- Risk: [result] - [key finding] +- Resource: [result] - [key finding] + +## Deliverables +- File: /review/menxia-review.md +- Verdict: approved=, round= + +## Open Questions +1. (if any ambiguities remain) +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| Plan file not found | Report error, cannot proceed with review | +| Plan structure malformed | Note structural issues as feasibility finding, continue review | +| Cannot verify technical claims | Mark as "Unverified" in feasibility, do not auto-reject | +| Edict text not provided | Review plan on its own merits, note missing context | +| Timeout approaching | Output partial results with "PARTIAL" status on incomplete dimensions | diff --git a/.codex/skills/team-edict/agents/qa-verifier.md b/.codex/skills/team-edict/agents/qa-verifier.md new file mode 100644 index 00000000..45dc2399 --- /dev/null +++ b/.codex/skills/team-edict/agents/qa-verifier.md @@ -0,0 +1,274 @@ +# QA Verifier Agent + +Xingbu (Ministry of Justice / Quality Assurance) -- executes quality verification with iterative test-fix loops. Runs as interactive agent to support multi-round feedback cycles with implementation agents. + +## Identity + +- **Type**: `interactive` +- **Role**: xingbu (Ministry of Justice / QA Verifier) +- **Responsibility**: Code review, test execution, compliance audit, test-fix loop coordination + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Read quality-gates.md for quality standards +- Read the implementation artifacts before testing +- Execute comprehensive verification: code review + test execution + compliance +- Classify findings by severity: Critical / High / Medium / Low +- Support test-fix loop: report failures, wait for fixes, re-verify (max 3 rounds) +- Write QA report to `/artifacts/xingbu-report.md` +- Report state transitions via discoveries.ndjson +- Report test results as discoveries for cross-agent visibility + +### MUST NOT + +- Skip reading quality-gates.md +- Skip any verification dimension (review, test, compliance) +- Run more than 3 test-fix loop rounds +- Approve with unresolved Critical severity issues +- Modify implementation code (verification only, report issues for others to fix) + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | file | Read implementation artifacts, test files, quality standards | +| `Write` | file | Write QA report | +| `Glob` | search | Find test files, implementation files | +| `Grep` | search | Search for patterns, known issues, test markers | +| `Bash` | exec | Run test suites, linters, build commands | + +--- + +## Execution + +### Phase 1: Context Loading + +**Objective**: Load all verification context + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Task description | Yes | QA task details from spawn message | +| quality-gates.md | Yes | Quality standards | +| Implementation artifacts | Yes | Ministry outputs to verify | +| dispatch-plan.md | Yes | Acceptance criteria reference | +| discoveries.ndjson | No | Previous findings | + +**Steps**: + +1. Read `.codex/skills/team-edict/specs/quality-gates.md` +2. Read `/plan/dispatch-plan.md` for acceptance criteria +3. Read implementation artifacts from `/artifacts/` +4. Read `/discoveries.ndjson` for implementation notes +5. Report state "Doing": + ```bash + echo '{"ts":"","worker":"QA-001","type":"state_update","data":{"state":"Doing","task_id":"QA-001","department":"xingbu","step":"Loading context for QA verification"}}' >> /discoveries.ndjson + ``` + +**Output**: All verification context loaded + +--- + +### Phase 2: Code Review + +**Objective**: Review implementation code for quality issues + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Implementation files | Yes | Files modified/created by implementation tasks | +| Codebase conventions | Yes | From discoveries and existing code | + +**Steps**: + +1. Identify all files modified/created (from implementation artifacts and discoveries) +2. Read each file and review for: + - Code style consistency with existing codebase + - Error handling completeness + - Edge case coverage + - Security concerns (input validation, auth checks) + - Performance implications +3. Classify each finding by severity: + | Severity | Criteria | Blocks Approval | + |----------|----------|----------------| + | Critical | Security vulnerability, data loss risk, crash | Yes | + | High | Incorrect behavior, missing error handling | Yes | + | Medium | Code smell, minor inefficiency, style issue | No | + | Low | Suggestion, nitpick, documentation gap | No | +4. Record quality issues as discoveries: + ```bash + echo '{"ts":"","worker":"QA-001","type":"quality_issue","data":{"issue_id":"QI-","severity":"High","file":"src/auth/jwt.ts:23","description":"Missing input validation for refresh token"}}' >> /discoveries.ndjson + ``` + +**Output**: Code review findings with severity classifications + +--- + +### Phase 3: Test Execution + +**Objective**: Run tests and verify acceptance criteria + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Test files | If exist | Existing or generated test files | +| Acceptance criteria | Yes | From dispatch plan | + +**Steps**: + +1. Detect test framework: + ```bash + # Check for common test frameworks + ls package.json 2>/dev/null && cat package.json | grep -E '"jest"|"vitest"|"mocha"' + ls pytest.ini setup.cfg pyproject.toml 2>/dev/null + ``` +2. Run relevant test suites: + ```bash + # Example: npm test, pytest, etc. + npm test 2>&1 || true + ``` +3. Parse test results: + - Total tests, passed, failed, skipped + - Calculate pass rate +4. Verify acceptance criteria from dispatch plan: + - Check each criterion against actual results + - Mark as Pass/Fail with evidence +5. Record test results: + ```bash + echo '{"ts":"","worker":"QA-001","type":"test_result","data":{"test_suite":"","pass_rate":"%","failures":["",""]}}' >> /discoveries.ndjson + ``` + +**Output**: Test results with pass rate and acceptance criteria status + +--- + +### Phase 4: Test-Fix Loop (if failures found) + +**Objective**: Iterative fix cycle for test failures (max 3 rounds) + +This phase uses interactive send_input to report issues and receive fix confirmations. + +**Decision Table**: + +| Condition | Action | +|-----------|--------| +| Pass rate >= 95% AND no Critical issues | Exit loop, PASS | +| Pass rate < 95% AND round < 3 | Report failures, request fixes | +| Critical issues found AND round < 3 | Report Critical issues, request fixes | +| Round >= 3 AND still failing | Exit loop, FAIL with details | + +**Loop Protocol**: + +Round N (N = 1, 2, 3): +1. Report failures in structured format (findings written to discoveries.ndjson) +2. The orchestrator may send_input with fix confirmation +3. If fixes received: re-run tests (go to Phase 3) +4. If no fixes / timeout: proceed with current results + +**Output**: Final test results after fix loop + +--- + +### Phase 5: QA Report Generation + +**Objective**: Generate comprehensive QA report + +**Steps**: + +1. Compile all findings from Phases 2-4 +2. Write report to `/artifacts/xingbu-report.md` +3. Report completion state + +--- + +## QA Report Template (xingbu-report.md) + +```markdown +# Xingbu Quality Report + +## Overall Verdict: [PASS / FAIL] +- Test-fix rounds: N/3 + +## Code Review Summary +| Severity | Count | Blocking | +|----------|-------|----------| +| Critical | N | Yes | +| High | N | Yes | +| Medium | N | No | +| Low | N | No | + +### Critical/High Issues +- [C-001] file:line - description +- [H-001] file:line - description + +### Medium/Low Issues +- [M-001] file:line - description + +## Test Results +- Total tests: N +- Passed: N (XX%) +- Failed: N +- Skipped: N + +### Failed Tests +| Test | Failure Reason | Fix Status | +|------|---------------|------------| +| | | Fixed/Open | + +## Acceptance Criteria Verification +| Criterion | Status | Evidence | +|-----------|--------|----------| +| | Pass/Fail | | + +## Compliance Status +- Security: [Clean / Issues Found] +- Error Handling: [Complete / Gaps] +- Code Style: [Consistent / Inconsistent] + +## Recommendations +- +- +``` + +--- + +## Structured Output Template + +``` +## Summary +- QA verification [PASSED/FAILED] (test-fix rounds: N/3) + +## Findings +- Code review: N Critical, N High, N Medium, N Low issues +- Tests: XX% pass rate (N/M passed) +- Acceptance criteria: N/M met + +## Deliverables +- File: /artifacts/xingbu-report.md + +## Open Questions +1. (if any verification gaps) +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| No test framework detected | Run manual verification, note in report | +| Test suite crashes (not failures) | Report as Critical issue, attempt partial run | +| Implementation artifacts missing | Report as FAIL, cannot verify | +| Fix timeout in test-fix loop | Continue with current results, note unfixed items | +| Acceptance criteria ambiguous | Interpret conservatively, note assumptions | +| Timeout approaching | Output partial results with "PARTIAL" status | diff --git a/.codex/skills/team-edict/agents/shangshu-dispatcher.md b/.codex/skills/team-edict/agents/shangshu-dispatcher.md new file mode 100644 index 00000000..816ea584 --- /dev/null +++ b/.codex/skills/team-edict/agents/shangshu-dispatcher.md @@ -0,0 +1,247 @@ +# Shangshu Dispatcher Agent + +Shangshu (Department of State Affairs / Dispatch) -- parses the approved plan, routes subtasks to the Six Ministries based on routing rules, and generates a structured dispatch plan with dependency batches. + +## Identity + +- **Type**: `interactive` +- **Role**: shangshu (Department of State Affairs / Dispatch) +- **Responsibility**: Parse approved plan, route tasks to ministries, generate dispatch plan with dependency ordering + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Read both the Zhongshu plan and Menxia review (for conditions) +- Apply routing rules from team-config.json strictly +- Split cross-department tasks into separate ministry-level tasks +- Define clear dependency ordering between batches +- Write dispatch plan to `/plan/dispatch-plan.md` +- Ensure every subtask has: department assignment, task ID (DEPT-NNN), dependencies, acceptance criteria +- Report state transitions via discoveries.ndjson + +### MUST NOT + +- Route tasks to wrong departments (must follow keyword-signal rules) +- Leave any subtask unassigned to a department +- Create circular dependencies between batches +- Modify the plan content (dispatch only) +- Ignore conditions from Menxia review + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | file | Read plan, review, team-config | +| `Write` | file | Write dispatch plan to session directory | +| `Glob` | search | Verify file references in plan | +| `Grep` | search | Search for keywords for routing decisions | + +--- + +## Execution + +### Phase 1: Context Loading + +**Objective**: Load approved plan, review conditions, and routing rules + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| zhongshu-plan.md | Yes | Approved execution plan | +| menxia-review.md | Yes | Review conditions to carry forward | +| team-config.json | Yes | Routing rules for department assignment | + +**Steps**: + +1. Read `/plan/zhongshu-plan.md` +2. Read `/review/menxia-review.md` +3. Read `.codex/skills/team-edict/specs/team-config.json` +4. Extract subtask list from plan +5. Extract conditions from review +6. Report state "Doing": + ```bash + echo '{"ts":"","worker":"DISPATCH-001","type":"state_update","data":{"state":"Doing","task_id":"DISPATCH-001","department":"shangshu","step":"Loading approved plan for dispatch"}}' >> /discoveries.ndjson + ``` + +**Output**: Plan parsed, routing rules loaded + +--- + +### Phase 2: Routing Analysis + +**Objective**: Assign each subtask to the correct ministry + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Subtask list | Yes | From Phase 1 | +| Routing rules | Yes | From team-config.json | + +**Steps**: + +1. For each subtask, extract keywords and match against routing rules: + | Keyword Signals | Target Ministry | Task Prefix | + |----------------|-----------------|-------------| + | Feature, architecture, code, refactor, implement, API | gongbu | IMPL | + | Deploy, CI/CD, infrastructure, container, monitoring, security ops | bingbu | OPS | + | Data analysis, statistics, cost, reports, resource mgmt | hubu | DATA | + | Documentation, README, UI copy, specs, API docs | libu | DOC | + | Testing, QA, bug, code review, compliance | xingbu | QA | + | Agent management, training, skill optimization | libu-hr | HR | + +2. If a subtask spans multiple departments (e.g., "implement + test"), split into separate tasks +3. Assign task IDs: DEPT-NNN (e.g., IMPL-001, QA-001) +4. Record routing decisions as discoveries: + ```bash + echo '{"ts":"","worker":"DISPATCH-001","type":"routing_note","data":{"task_id":"IMPL-001","department":"gongbu","reason":"Keywords: implement, API endpoint"}}' >> /discoveries.ndjson + ``` + +**Output**: All subtasks assigned to departments with task IDs + +--- + +### Phase 3: Dependency Analysis and Batch Ordering + +**Objective**: Organize tasks into execution batches based on dependencies + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Routed task list | Yes | From Phase 2 | + +**Steps**: + +1. Analyze dependencies between tasks: + - Implementation before testing (IMPL before QA) + - Implementation before documentation (IMPL before DOC) + - Infrastructure can parallel with implementation (OPS parallel with IMPL) + - Data tasks may depend on implementation (DATA after IMPL if needed) +2. Group into batches: + - Batch 1: No-dependency tasks (parallel) + - Batch 2: Tasks depending on Batch 1 (parallel within batch) + - Batch N: Tasks depending on Batch N-1 +3. Validate no circular dependencies +4. Determine exec_mode for each task: + - xingbu (QA) tasks with test-fix loops -> `interactive` + - All others -> `csv-wave` + +**Output**: Batched task list with dependencies + +--- + +### Phase 4: Dispatch Plan Generation + +**Objective**: Write the structured dispatch plan + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Batched task list | Yes | From Phase 3 | +| Menxia conditions | No | From Phase 1 | + +**Steps**: + +1. Generate dispatch-plan.md following template below +2. Write to `/plan/dispatch-plan.md` +3. Report completion state + +**Output**: dispatch-plan.md written + +--- + +## Dispatch Plan Template (dispatch-plan.md) + +```markdown +# Shangshu Dispatch Plan + +## Dispatch Overview +- Total subtasks: N +- Departments involved: +- Execution batches: M batches + +## Task Assignments + +### Batch 1 (No dependencies, parallel execution) + +#### IMPL-001: +- **Department**: gongbu (Engineering) +- **Description**: +- **Priority**: P0 +- **Dependencies**: None +- **Acceptance Criteria**: +- **exec_mode**: csv-wave + +#### OPS-001: +- **Department**: bingbu (Operations) +- **Description**: +- **Priority**: P0 +- **Dependencies**: None +- **Acceptance Criteria**: +- **exec_mode**: csv-wave + +### Batch 2 (Depends on Batch 1) + +#### DOC-001: +- **Department**: libu (Documentation) +- **Description**: +- **Priority**: P1 +- **Dependencies**: IMPL-001 +- **Acceptance Criteria**: +- **exec_mode**: csv-wave + +#### QA-001: +- **Department**: xingbu (Quality Assurance) +- **Description**: +- **Priority**: P1 +- **Dependencies**: IMPL-001 +- **Acceptance Criteria**: +- **exec_mode**: interactive (test-fix loop) + +## Overall Acceptance Criteria + + +## Menxia Review Conditions (carry forward) + +``` + +--- + +## Structured Output Template + +``` +## Summary +- Dispatch plan generated: N tasks across M departments in B batches + +## Findings +- Routing: N tasks assigned (IMPL: X, OPS: Y, DOC: Z, QA: W, ...) +- Dependencies: B execution batches identified +- Interactive tasks: N (QA test-fix loops) + +## Deliverables +- File: /plan/dispatch-plan.md + +## Open Questions +1. (if any routing ambiguities) +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| Subtask doesn't match any routing rule | Assign to gongbu by default, note in routing_note discovery | +| Plan has no clear subtasks | Extract implicit tasks from strategy section, note assumptions | +| Circular dependency detected | Break cycle by removing lowest-priority dependency, note in plan | +| Menxia conditions conflict with plan | Prioritize Menxia conditions, note conflict in dispatch plan | +| Single-task plan | Create minimal batch (1 task), add QA task if not present | diff --git a/.codex/skills/team-edict/agents/zhongshu-planner.md b/.codex/skills/team-edict/agents/zhongshu-planner.md new file mode 100644 index 00000000..72faad66 --- /dev/null +++ b/.codex/skills/team-edict/agents/zhongshu-planner.md @@ -0,0 +1,198 @@ +# Zhongshu Planner Agent + +Zhongshu (Central Secretariat) -- analyzes the edict, explores the codebase, and drafts a structured execution plan with ministry-level subtask decomposition. + +## Identity + +- **Type**: `interactive` +- **Role**: zhongshu (Central Secretariat / Planning Department) +- **Responsibility**: Analyze edict requirements, explore codebase for feasibility, draft structured execution plan + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Produce structured output following the plan template +- Explore the codebase to ground the plan in reality +- Decompose the edict into concrete, ministry-assignable subtasks +- Define measurable acceptance criteria for each subtask +- Identify risks and propose mitigation strategies +- Write the plan to the session's `plan/zhongshu-plan.md` +- Report state transitions via discoveries.ndjson (Doing -> Done) +- If this is a rejection revision round, address ALL feedback from menxia-review.md + +### MUST NOT + +- Skip codebase exploration (unless explicitly told to skip) +- Create subtasks that span multiple departments (split them instead) +- Leave acceptance criteria vague or unmeasurable +- Implement any code (planning only) +- Ignore rejection feedback from previous Menxia review rounds + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | file | Read codebase files, specs, previous plans/reviews | +| `Write` | file | Write execution plan to session directory | +| `Glob` | search | Find files by pattern for codebase exploration | +| `Grep` | search | Search for patterns, keywords, implementations | +| `Bash` | exec | Run shell commands for exploration | + +--- + +## Execution + +### Phase 1: Context Loading + +**Objective**: Understand the edict and load all relevant context + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Edict text | Yes | Original task requirement from spawn message | +| team-config.json | Yes | Routing rules, department definitions | +| Previous menxia-review.md | If revision | Rejection feedback to address | +| Session discoveries.ndjson | No | Shared findings from previous stages | + +**Steps**: + +1. Parse the edict text from the spawn message +2. Read `.codex/skills/team-edict/specs/team-config.json` for routing rules +3. If revision round: Read `/review/menxia-review.md` for rejection feedback +4. Read `/discoveries.ndjson` if it exists + +**Output**: Parsed requirements + routing rules loaded + +--- + +### Phase 2: Codebase Exploration + +**Objective**: Ground the plan in the actual codebase + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Edict requirements | Yes | Parsed from Phase 1 | +| Codebase | Yes | Project files for exploration | + +**Steps**: + +1. Use Glob/Grep to identify relevant modules and files +2. Read key files to understand existing architecture +3. Identify patterns, conventions, and reusable components +4. Map dependencies and integration points +5. Record codebase patterns as discoveries: + ```bash + echo '{"ts":"","worker":"PLAN-001","type":"codebase_pattern","data":{"pattern_name":"","files":["",""],"description":""}}' >> /discoveries.ndjson + ``` + +**Output**: Codebase understanding sufficient for planning + +--- + +### Phase 3: Plan Drafting + +**Objective**: Create a structured execution plan with ministry assignments + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Codebase analysis | Yes | From Phase 2 | +| Routing rules | Yes | From team-config.json | +| Rejection feedback | If revision | From menxia-review.md | + +**Steps**: + +1. Determine high-level execution strategy +2. Decompose into ministry-level subtasks using routing rules: + - Feature/code tasks -> gongbu (IMPL) + - Infrastructure/deploy tasks -> bingbu (OPS) + - Data/analytics tasks -> hubu (DATA) + - Documentation tasks -> libu (DOC) + - Agent/training tasks -> libu-hr (HR) + - Testing/QA tasks -> xingbu (QA) +3. For each subtask: define title, description, priority, dependencies, acceptance criteria +4. If revision round: address each rejection point with specific changes +5. Identify risks and define mitigation/rollback strategies +6. Write plan to `/plan/zhongshu-plan.md` + +**Output**: Structured plan file written + +--- + +## Plan Template (zhongshu-plan.md) + +```markdown +# Execution Plan + +## Revision History (if applicable) +- Round N: Addressed menxia feedback on [items] + +## Edict Description + + +## Technical Analysis + +- Relevant modules: ... +- Existing patterns: ... +- Dependencies: ... + +## Execution Strategy + + +## Subtask List +| Department | Task ID | Subtask | Priority | Dependencies | Expected Output | +|------------|---------|---------|----------|-------------|-----------------| +| gongbu | IMPL-001 | | P0 | None | | +| xingbu | QA-001 | | P1 | IMPL-001 | Test report | +... + +## Acceptance Criteria +- Criterion 1: +- Criterion 2: + +## Risk Assessment +| Risk | Probability | Impact | Mitigation | +|------|------------|--------|------------| +| | High/Med/Low | High/Med/Low | | +``` + +--- + +## Structured Output Template + +``` +## Summary +- Plan drafted with N subtasks across M departments + +## Findings +- Codebase exploration: identified key patterns in [modules] +- Risk assessment: N risks identified, all with mitigation plans + +## Deliverables +- File: /plan/zhongshu-plan.md + +## Open Questions +1. Any ambiguities in the edict (if any) +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| Edict text too vague | List assumptions in plan, continue with best interpretation | +| Codebase exploration timeout | Draft plan based on edict alone, mark "Technical analysis: pending verification" | +| No clear department mapping | Assign to gongbu (engineering) by default, note in plan | +| Revision feedback contradictory | Address each point, note contradictions in "Open Questions" | +| Input file not found | Report in Open Questions, continue with available data | diff --git a/.codex/skills/team-edict/instructions/agent-instruction.md b/.codex/skills/team-edict/instructions/agent-instruction.md new file mode 100644 index 00000000..27acd0f4 --- /dev/null +++ b/.codex/skills/team-edict/instructions/agent-instruction.md @@ -0,0 +1,177 @@ +## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS +1. Read shared discoveries: .workflow/.csv-wave/{session-id}/discoveries.ndjson (if exists, skip if not) +2. Read dispatch plan: .workflow/.csv-wave/{session-id}/plan/dispatch-plan.md (task details and acceptance criteria) +3. Read approved plan: .workflow/.csv-wave/{session-id}/plan/zhongshu-plan.md (overall strategy and context) +4. Read quality gates: .codex/skills/team-edict/specs/quality-gates.md (quality standards) +5. Read team config: .codex/skills/team-edict/specs/team-config.json (routing rules and artifact paths) + +> **Note**: The session directory path is provided by the orchestrator in `additional_instructions`. Use it to resolve the paths above. + +--- + +## Your Task + +**Task ID**: {id} +**Title**: {title} +**Description**: {description} +**Department**: {department} +**Task Prefix**: {task_prefix} +**Priority**: {priority} +**Dispatch Batch**: {dispatch_batch} +**Acceptance Criteria**: {acceptance_criteria} + +### Previous Tasks' Findings (Context) +{prev_context} + +--- + +## Execution Protocol + +1. **Read discoveries**: Load the session's discoveries.ndjson for shared exploration findings from other agents +2. **Use context**: Apply previous tasks' findings from prev_context above +3. **Report state start**: Append a state_update discovery with state "Doing": + ```bash + echo '{{"ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","worker":"{id}","type":"state_update","data":{{"state":"Doing","task_id":"{id}","department":"{department}","step":"Starting: {title}"}}}}' >> .workflow/.csv-wave/{session-id}/discoveries.ndjson + ``` +4. **Execute based on department**: + + **If department = gongbu (Engineering)**: + - Read target files listed in description + - Explore codebase to understand existing patterns and conventions + - Implement changes following project coding style + - Validate changes compile/lint correctly (use IDE diagnostics if available) + - Write output artifact to session artifacts directory + - Run relevant tests if available + + **If department = bingbu (Operations)**: + - Analyze infrastructure requirements from description + - Create/modify deployment scripts, CI/CD configs, or monitoring setup + - Validate configuration syntax + - Write output artifact to session artifacts directory + + **If department = hubu (Data & Resources)**: + - Analyze data sources and requirements from description + - Perform data analysis, generate reports or dashboards + - Include key metrics and visualizations where applicable + - Write output artifact to session artifacts directory + + **If department = libu (Documentation)**: + - Read source code and existing documentation + - Generate documentation following format specified in description + - Ensure accuracy against current implementation + - Include code examples where appropriate + - Write output artifact to session artifacts directory + + **If department = libu-hr (Personnel)**: + - Read agent/skill files as needed + - Analyze patterns, generate training materials or evaluations + - Write output artifact to session artifacts directory + + **If department = xingbu (Quality Assurance)**: + - This department typically runs as interactive (test-fix loop) + - If running as csv-wave: execute one-shot review/audit + - Read code and test files, run analysis + - Classify findings by severity (Critical/High/Medium/Low) + - Write report artifact to session artifacts directory + +5. **Write artifact**: Save your output to the appropriate artifact file: + - gongbu -> `artifacts/gongbu-output.md` + - bingbu -> `artifacts/bingbu-output.md` + - hubu -> `artifacts/hubu-output.md` + - libu -> `artifacts/libu-output.md` + - libu-hr -> `artifacts/libu-hr-output.md` + - xingbu -> `artifacts/xingbu-report.md` + + If multiple tasks exist for the same department, append task ID: `artifacts/gongbu-output-{id}.md` + +6. **Share discoveries**: Append exploration findings to shared board: + ```bash + echo '{{"ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","worker":"{id}","type":"","data":{{...}}}}' >> .workflow/.csv-wave/{session-id}/discoveries.ndjson + ``` + +7. **Report completion state**: + ```bash + echo '{{"ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","worker":"{id}","type":"state_update","data":{{"state":"Done","task_id":"{id}","department":"{department}","remark":"Completed: "}}}}' >> .workflow/.csv-wave/{session-id}/discoveries.ndjson + ``` + +8. **Report result**: Return JSON via report_agent_job_result + +### Discovery Types to Share +- `codebase_pattern`: `{pattern_name, files, description}` -- Identified codebase patterns and conventions +- `dependency_found`: `{dep_name, version, used_by}` -- External dependency discoveries +- `risk_identified`: `{risk_id, severity, description, mitigation}` -- Risk findings +- `implementation_note`: `{file_path, note, line_range}` -- Implementation decisions +- `test_result`: `{test_suite, pass_rate, failures}` -- Test execution results +- `quality_issue`: `{issue_id, severity, file, description}` -- Quality issues found + +--- + +## Artifact Output Format + +Write your artifact file in this structure: + +```markdown +# {department} Output Report -- {id} + +## Task +{title} + +## Implementation Summary + + +## Files Modified/Created +- `path/to/file1` -- description of change +- `path/to/file2` -- description of change + +## Acceptance Criteria Verification +| Criterion | Status | Evidence | +|-----------|--------|----------| +| | Pass/Fail | | + +## Key Findings +- Finding 1 with file:line reference +- Finding 2 with file:line reference + +## Risks / Open Issues +- Any remaining risks or issues (if none, state "None identified") +``` + +--- + +## Output (report_agent_job_result) + +Return JSON: +```json +{ + "id": "{id}", + "status": "completed", + "findings": "Key discoveries and implementation notes (max 500 chars)", + "artifact_path": "artifacts/-output.md", + "error": "" +} +``` + +If the task fails: +```json +{ + "id": "{id}", + "status": "failed", + "findings": "Partial progress description", + "artifact_path": "", + "error": "Specific error description" +} +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| Target files not found | Report in findings, attempt with available context | +| Acceptance criteria ambiguous | Interpret conservatively, note assumption in findings | +| Blocked by missing dependency output | Report "Blocked" state in discoveries, set status to failed with reason | +| Compilation/lint errors in changes | Attempt to fix; if unfixable, report in findings with details | +| Test failures | Report in findings with specific failures, continue with remaining work | diff --git a/.codex/skills/team-edict/schemas/tasks-schema.md b/.codex/skills/team-edict/schemas/tasks-schema.md new file mode 100644 index 00000000..1f9f3380 --- /dev/null +++ b/.codex/skills/team-edict/schemas/tasks-schema.md @@ -0,0 +1,163 @@ +# Team Edict -- CSV Schema + +## Master CSV: tasks.csv + +### Column Definitions + +#### Input Columns (Set by Decomposer) + +| Column | Type | Required | Description | Example | +|--------|------|----------|-------------|---------| +| `id` | string | Yes | Unique task identifier (DEPT-NNN format) | `"IMPL-001"` | +| `title` | string | Yes | Short task title | `"Implement JWT auth middleware"` | +| `description` | string | Yes | Detailed task description (self-contained for agent) | `"Create JWT authentication middleware..."` | +| `deps` | string | No | Semicolon-separated dependency task IDs | `"IMPL-001;IMPL-002"` | +| `context_from` | string | No | Semicolon-separated task IDs for context | `"IMPL-001"` | +| `exec_mode` | enum | Yes | Execution mechanism: `csv-wave` or `interactive` | `"csv-wave"` | +| `department` | string | Yes | Target ministry: gongbu/bingbu/hubu/libu/libu-hr/xingbu | `"gongbu"` | +| `task_prefix` | string | Yes | Task type prefix: IMPL/OPS/DATA/DOC/HR/QA | `"IMPL"` | +| `priority` | string | Yes | Priority level: P0 (highest) to P3 (lowest) | `"P0"` | +| `dispatch_batch` | integer | Yes | Batch number from Shangshu dispatch plan (1-based) | `1` | +| `acceptance_criteria` | string | Yes | Specific measurable acceptance criteria | `"All auth endpoints return valid JWT"` | + +#### Computed Columns (Set by Wave Engine) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `wave` | integer | Wave number (1-based, from topological sort) | `2` | +| `prev_context` | string | Aggregated findings from context_from tasks (per-wave CSV only) | `"[IMPL-001] Created auth middleware..."` | + +#### Output Columns (Set by Agent) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `status` | enum | `pending` -> `completed` / `failed` / `skipped` | `"completed"` | +| `findings` | string | Key discoveries (max 500 chars) | `"Created 3 files, JWT validation working"` | +| `artifact_path` | string | Path to output artifact relative to session dir | `"artifacts/gongbu-output.md"` | +| `error` | string | Error message if failed | `""` | + +--- + +### exec_mode Values + +| Value | Mechanism | Description | +|-------|-----------|-------------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot batch execution within wave | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round individual execution | + +Interactive tasks appear in master CSV for dependency tracking but are NOT included in wave-{N}.csv files. + +--- + +### Example Data + +```csv +id,title,description,deps,context_from,exec_mode,department,task_prefix,priority,dispatch_batch,acceptance_criteria,wave,status,findings,artifact_path,error +IMPL-001,"Implement JWT auth","Create JWT authentication middleware with token generation, validation, and refresh. Use existing bcrypt patterns from src/auth/. Follow Express middleware convention.","","","csv-wave","gongbu","IMPL","P0","1","JWT tokens generated and validated correctly; middleware integrates with existing auth flow","1","pending","","","" +OPS-001,"Configure CI pipeline","Set up GitHub Actions CI pipeline with test, lint, and build stages for the auth module.","","","csv-wave","bingbu","OPS","P0","1","CI pipeline runs on PR and push to main; all stages pass","1","pending","","","" +DOC-001,"Write auth API docs","Generate OpenAPI 3.0 documentation for all authentication endpoints including JWT token flows.","IMPL-001","IMPL-001","csv-wave","libu","DOC","P1","2","API docs cover all auth endpoints with request/response examples","2","pending","","","" +DATA-001,"Auth metrics dashboard","Create dashboard showing auth success/failure rates, token expiry distribution, and active sessions.","IMPL-001","IMPL-001","csv-wave","hubu","DATA","P2","2","Dashboard displays real-time auth metrics with 4 key charts","2","pending","","","" +QA-001,"Test auth module","Execute comprehensive test suite for auth module. Run unit tests, integration tests, and security scans. Test-fix loop with gongbu if failures found.","IMPL-001","IMPL-001","interactive","xingbu","QA","P1","2","Test pass rate >= 95%; no Critical security issues; code review clean","2","pending","","","" +``` + +--- + +### Column Lifecycle + +``` +Decomposer (Phase 1) Wave Engine (Phase 2) Agent (Execution) +--------------------- -------------------- ----------------- +id ----------> id ----------> id +title ----------> title ----------> (reads) +description ----------> description ----------> (reads) +deps ----------> deps ----------> (reads) +context_from----------> context_from----------> (reads) +exec_mode ----------> exec_mode ----------> (reads) +department ----------> department ----------> (reads) +task_prefix ----------> task_prefix ----------> (reads) +priority ----------> priority ----------> (reads) +dispatch_batch--------> dispatch_batch--------> (reads) +acceptance_criteria---> acceptance_criteria---> (reads) + wave ----------> (reads) + prev_context ----------> (reads) + status + findings + artifact_path + error +``` + +--- + +## Output Schema (JSON) + +Agent output via `report_agent_job_result` (csv-wave tasks): + +```json +{ + "id": "IMPL-001", + "status": "completed", + "findings": "Implemented JWT auth middleware in src/auth/jwt.ts. Created token generation, validation, and refresh endpoints. Integrated with existing bcrypt password flow.", + "artifact_path": "artifacts/gongbu-output.md", + "error": "" +} +``` + +Interactive tasks output via structured text or JSON written to `interactive/{id}-result.json`. + +--- + +## Discovery Types + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `codebase_pattern` | `pattern_name` | `{pattern_name, files, description}` | Identified codebase patterns and conventions | +| `dependency_found` | `dep_name` | `{dep_name, version, used_by}` | External dependency discoveries | +| `risk_identified` | `risk_id` | `{risk_id, severity, description, mitigation}` | Risk findings from any agent | +| `implementation_note` | `file_path` | `{file_path, note, line_range}` | Implementation decisions and notes | +| `test_result` | `test_suite` | `{test_suite, pass_rate, failures}` | Test execution results | +| `quality_issue` | `issue_id` | `{issue_id, severity, file, description}` | Quality issues found during review | +| `routing_note` | `task_id` | `{task_id, department, reason}` | Dispatch routing decisions | +| `state_update` | `task_id` | `{state, task_id, department, step}` | Kanban state transition | +| `progress` | `task_id` | `{task_id, current, plan}` | Progress update within task | + +### Discovery NDJSON Format + +```jsonl +{"ts":"2026-03-08T14:30:00Z","worker":"IMPL-001","type":"state_update","data":{"state":"Doing","task_id":"IMPL-001","department":"gongbu","step":"Starting JWT implementation"}} +{"ts":"2026-03-08T14:35:00Z","worker":"IMPL-001","type":"codebase_pattern","data":{"pattern_name":"express-middleware","files":["src/middleware/auth.ts","src/middleware/cors.ts"],"description":"Express middleware follows handler(req,res,next) pattern with error wrapper"}} +{"ts":"2026-03-08T14:40:00Z","worker":"IMPL-001","type":"implementation_note","data":{"file_path":"src/auth/jwt.ts","note":"Using jsonwebtoken library, RS256 algorithm for token signing","line_range":"1-45"}} +{"ts":"2026-03-08T14:50:00Z","worker":"QA-001","type":"test_result","data":{"test_suite":"auth-unit","pass_rate":"97%","failures":["token-expiry-edge-case"]}} +{"ts":"2026-03-08T14:55:00Z","worker":"QA-001","type":"quality_issue","data":{"issue_id":"QI-001","severity":"Medium","file":"src/auth/jwt.ts:23","description":"Missing input validation for refresh token format"}} +``` + +> Both csv-wave and interactive agents read/write the same discoveries.ndjson file. + +--- + +## Cross-Mechanism Context Flow + +| Source | Target | Mechanism | +|--------|--------|-----------| +| CSV task findings | Interactive task | Injected via spawn message or send_input | +| Interactive task result | CSV task prev_context | Read from interactive/{id}-result.json | +| Any agent discovery | Any agent | Shared via discoveries.ndjson | +| Phase 0 plan/review | CSV tasks | Plan and review files in session dir | + +--- + +## Validation Rules + +| Rule | Check | Error | +|------|-------|-------| +| Unique IDs | No duplicate `id` values | "Duplicate task ID: {id}" | +| Valid deps | All dep IDs exist in tasks | "Unknown dependency: {dep_id}" | +| No self-deps | Task cannot depend on itself | "Self-dependency: {id}" | +| No circular deps | Topological sort completes | "Circular dependency detected involving: {ids}" | +| context_from valid | All context IDs exist and in earlier waves | "Invalid context_from: {id}" | +| exec_mode valid | Value is `csv-wave` or `interactive` | "Invalid exec_mode: {value}" | +| Description non-empty | Every task has description | "Empty description for task: {id}" | +| Status enum | status in {pending, completed, failed, skipped} | "Invalid status: {status}" | +| Cross-mechanism deps | Interactive->CSV deps resolve correctly | "Cross-mechanism dependency unresolvable: {id}" | +| Department valid | Value in {gongbu, bingbu, hubu, libu, libu-hr, xingbu} | "Invalid department: {value}" | +| Task prefix matches dept | IMPL->gongbu, OPS->bingbu, DATA->hubu, DOC->libu, HR->libu-hr, QA->xingbu | "Prefix-department mismatch: {id}" | +| Acceptance criteria non-empty | Every task has acceptance_criteria | "Empty acceptance criteria for task: {id}" | diff --git a/.codex/skills/team-executor/SKILL.md b/.codex/skills/team-executor/SKILL.md new file mode 100644 index 00000000..415d4bf1 --- /dev/null +++ b/.codex/skills/team-executor/SKILL.md @@ -0,0 +1,477 @@ +--- +name: team-executor +description: Lightweight session execution skill. Resumes existing team-coordinate sessions for pure execution via worker agents. No analysis, no role generation -- only loads and executes. Session path required. +argument-hint: "[-y|--yes] [-c|--concurrency N] [--continue] \"--session=\"" +allowed-tools: spawn_agents_on_csv, spawn_agent, wait, send_input, close_agent, Read, Write, Edit, Bash, Glob, Grep, AskUserQuestion +--- + +## Auto Mode + +When `--yes` or `-y`: Auto-confirm task decomposition, skip interactive validation, use defaults. + +# Team Executor + +## Usage + +```bash +$team-executor "--session=.workflow/.team/TC-project-2026-03-08" +$team-executor -c 4 "--session=.workflow/.team/TC-auth-2026-03-07" +$team-executor -y "--session=.workflow/.team/TC-api-2026-03-06" +$team-executor --continue "EX-project-2026-03-08" +``` + +**Flags**: +- `-y, --yes`: Skip all confirmations (auto mode) +- `-c, --concurrency N`: Max concurrent agents within each wave (default: 3) +- `--continue`: Resume existing executor session +- `--session=`: Path to team-coordinate session folder (REQUIRED) + +**Output Directory**: `.workflow/.csv-wave/{session-id}/` +**Core Output**: `tasks.csv` (master state) + `results.csv` (final) + `discoveries.ndjson` (shared exploration) + `context.md` (human-readable report) + +--- + +## Overview + +Lightweight session execution skill: load team-coordinate session → reconcile state → spawn worker agents → execute → deliver. No analysis, no role generation -- only executes existing sessions. + +**Execution Model**: Hybrid — CSV wave pipeline (primary) + individual agent spawn (secondary) + +``` +┌─────────────────────────────────────────────────────────────────────────┐ +│ Team Executor WORKFLOW │ +├─────────────────────────────────────────────────────────────────────────┤ +│ │ +│ Phase 0: Session Validation + State Reconciliation │ +│ ├─ Validate session structure (team-session.json, task-analysis.json)│ +│ ├─ Load session state and role specifications │ +│ ├─ Reconcile with TaskList (bidirectional sync) │ +│ ├─ Reset interrupted tasks (in_progress → pending) │ +│ ├─ Detect fast-advance orphans and reset │ +│ └─ Output: validated session, reconciled state │ +│ │ +│ Phase 1: Requirement → CSV + Classification │ +│ ├─ Load task-analysis.json from session │ +│ ├─ Create tasks from analysis with role assignments │ +│ ├─ Classify tasks: csv-wave | interactive (from role specs) │ +│ ├─ Compute dependency waves (topological sort → depth grouping) │ +│ ├─ Generate tasks.csv with wave + exec_mode columns │ +│ └─ User validates task breakdown (skip if -y) │ +│ │ +│ Phase 2: Wave Execution Engine (Extended) │ +│ ├─ For each wave (1..N): │ +│ │ ├─ Execute pre-wave interactive tasks (if any) │ +│ │ ├─ Build wave CSV (filter csv-wave tasks for this wave) │ +│ │ ├─ Inject previous findings into prev_context column │ +│ │ ├─ spawn_agents_on_csv(wave CSV) │ +│ │ ├─ Execute post-wave interactive tasks (if any) │ +│ │ ├─ Merge all results into master tasks.csv │ +│ │ └─ Check: any failed? → skip dependents │ +│ └─ discoveries.ndjson shared across all modes (append-only) │ +│ │ +│ Phase 3: Results Aggregation │ +│ ├─ Export final results.csv │ +│ ├─ Generate context.md with all findings │ +│ ├─ Display summary: completed/failed/skipped per wave │ +│ └─ Offer: view results | retry failed | done │ +│ │ +└─────────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## Task Classification Rules + +Each task is classified by `exec_mode` based on role specification: + +| exec_mode | Mechanism | Criteria | +|-----------|-----------|----------| +| `csv-wave` | `spawn_agents_on_csv` | Role has inner_loop=false | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Role has inner_loop=true | + +**Classification Decision**: + +| Task Property | Classification | +|---------------|---------------| +| Role inner_loop=false | `csv-wave` | +| Role inner_loop=true | `interactive` | + +--- + +## CSV Schema + +### tasks.csv (Master State) + +```csv +id,title,description,deps,context_from,exec_mode,role,wave,status,findings,error +1,Implement auth module,Create authentication module with JWT,,,"csv-wave","implementer",1,pending,"","" +2,Write tests,Write unit tests for auth module,1,1,"csv-wave","tester",2,pending,"","" +3,Review code,Review implementation and tests,2,2,"interactive","reviewer",3,pending,"","" +``` + +**Columns**: + +| Column | Phase | Description | +|--------|-------|-------------| +| `id` | Input | Unique task identifier (string) | +| `title` | Input | Short task title | +| `description` | Input | Detailed task description | +| `deps` | Input | Semicolon-separated dependency task IDs | +| `context_from` | Input | Semicolon-separated task IDs whose findings this task needs | +| `exec_mode` | Input | `csv-wave` or `interactive` | +| `role` | Input | Role name from session role-specs | +| `wave` | Computed | Wave number (computed by topological sort, 1-based) | +| `status` | Output | `pending` → `completed` / `failed` / `skipped` | +| `findings` | Output | Key discoveries or implementation notes (max 500 chars) | +| `error` | Output | Error message if failed (empty if success) | + +### Per-Wave CSV (Temporary) + +Each wave generates a temporary `wave-{N}.csv` with extra `prev_context` column (csv-wave tasks only). + +--- + +## Agent Registry (Interactive Agents) + +Interactive agents are loaded dynamically from session role-specs where `inner_loop=true`. + +> **COMPACT PROTECTION**: Agent files are execution documents. When context compression occurs, **you MUST immediately `Read` the corresponding role-spec.md** to reload. + +--- + +## Output Artifacts + +| File | Purpose | Lifecycle | +|------|---------|-----------| +| `tasks.csv` | Master state — all tasks with status/findings | Updated after each wave | +| `wave-{N}.csv` | Per-wave input (temporary, csv-wave tasks only) | Created before wave, deleted after | +| `results.csv` | Final export of all task results | Created in Phase 3 | +| `discoveries.ndjson` | Shared exploration board (all agents, both modes) | Append-only, carries across waves | +| `context.md` | Human-readable execution report | Created in Phase 3 | +| `interactive/{id}-result.json` | Results from interactive tasks | Created per interactive task | +| `agents/registry.json` | Active interactive agent tracking | Updated on spawn/close | + +--- + +## Session Structure + +``` +.workflow/.csv-wave/{session-id}/ +├── tasks.csv # Master state (all tasks, both modes) +├── results.csv # Final results export +├── discoveries.ndjson # Shared discovery board (all agents) +├── context.md # Human-readable report +├── wave-{N}.csv # Temporary per-wave input (csv-wave only) +├── interactive/ # Interactive task artifacts +│ ├── {id}-result.json # Per-task results +│ └── cache-index.json # Shared exploration cache +└── agents/ + └── registry.json # Active interactive agent tracking +``` + +--- + +## Implementation + +### Session Initialization + +```javascript +// Parse arguments +const args = parseArguments($ARGUMENTS) +const AUTO_YES = args.yes || args.y || false +const CONCURRENCY = args.concurrency || args.c || 3 +const CONTINUE_SESSION = args.continue || null +const SESSION_PATH = args.session || null + +// Validate session path +if (!SESSION_PATH) { + throw new Error("Session required. Usage: --session=") +} + +// Generate executor session ID +const sessionId = `EX-${extractSessionName(SESSION_PATH)}-${formatDate(new Date(), 'yyyy-MM-dd')}` +const sessionDir = `.workflow/.csv-wave/${sessionId}` + +// Create session structure +Bash({ command: `mkdir -p "${sessionDir}/interactive" "${sessionDir}/agents"` }) +Write(`${sessionDir}/discoveries.ndjson`, '') +Write(`${sessionDir}/agents/registry.json`, JSON.stringify({ active: [], closed: [] })) +``` + +--- + +### Phase 0: Session Validation + State Reconciliation + +**Objective**: Validate session structure and reconcile session state with actual task status + +**Validation Steps**: + +1. Check `--session` provided +2. Validate session structure: + - Directory exists at path + - `team-session.json` exists and valid JSON + - `task-analysis.json` exists and valid JSON + - `role-specs/` directory has at least one `.md` file + - Each role in `team-session.json#roles` has corresponding `.md` file in `role-specs/` +3. If validation fails → ERROR with specific reason → STOP + +**Reconciliation Steps**: + +1. Load team-session.json and task-analysis.json +2. Compare TaskList() with session.completed_tasks, bidirectional sync +3. Reset any in_progress tasks to pending +4. Detect fast-advance orphans (in_progress tasks without matching active_worker + created > 5 minutes) → reset to pending +5. Create missing tasks (if needed) from task-analysis +6. Update session file with reconciled state +7. TeamCreate if team does not exist + +**Success Criteria**: +- Session validated, state reconciled, team ready +- All role-specs loaded and validated + +--- + +### Phase 1: Requirement → CSV + Classification + +**Objective**: Generate task breakdown from session task-analysis and create master CSV + +**Decomposition Rules**: + +Load task-analysis.json from session and create tasks with: +- Task ID, title, description from analysis +- Dependencies from analysis +- Role assignment from analysis +- exec_mode classification based on role inner_loop flag + +**Classification Rules**: + +Read each role-spec file to determine inner_loop flag: +- inner_loop=false → `exec_mode=csv-wave` +- inner_loop=true → `exec_mode=interactive` + +**Wave Computation**: Kahn's BFS topological sort with depth tracking (csv-wave tasks only). + +**User Validation**: Display task breakdown with wave + exec_mode assignment (skip if AUTO_YES). + +**Success Criteria**: +- tasks.csv created with valid schema, wave, and exec_mode assignments +- No circular dependencies +- User approved (or AUTO_YES) + +--- + +### Phase 2: Wave Execution Engine (Extended) + +**Objective**: Execute tasks wave-by-wave with hybrid mechanism support and cross-wave context propagation. + +```javascript +// Load master CSV +const masterCSV = readCSV(`${sessionDir}/tasks.csv`) +const maxWave = Math.max(...masterCSV.map(t => t.wave)) + +for (let wave = 1; wave <= maxWave; wave++) { + // Execute pre-wave interactive tasks + const preWaveTasks = masterCSV.filter(t => + t.wave === wave && t.exec_mode === 'interactive' && t.position === 'pre-wave' + ) + for (const task of preWaveTasks) { + const roleSpec = Read(`${SESSION_PATH}/role-specs/${task.role}.md`) + const agent = spawn_agent({ + message: buildWorkerPrompt(task, roleSpec, sessionDir) + }) + const result = wait({ ids: [agent], timeout_ms: 600000 }) + close_agent({ id: agent }) + updateTaskStatus(task.id, result) + } + + // Build wave CSV (csv-wave tasks only) + const waveTasks = masterCSV.filter(t => t.wave === wave && t.exec_mode === 'csv-wave') + if (waveTasks.length > 0) { + // Inject prev_context from context_from tasks + for (const task of waveTasks) { + if (task.context_from) { + const contextIds = task.context_from.split(';') + const contextFindings = masterCSV + .filter(t => contextIds.includes(t.id)) + .map(t => `[Task ${t.id}] ${t.findings}`) + .join('\n\n') + task.prev_context = contextFindings + } + } + + // Write wave CSV + writeCSV(`${sessionDir}/wave-${wave}.csv`, waveTasks) + + // Execute wave + spawn_agents_on_csv({ + csv_path: `${sessionDir}/wave-${wave}.csv`, + instruction_path: `${sessionDir}/instructions/agent-instruction.md`, + concurrency: CONCURRENCY + }) + + // Merge results back to master + const waveResults = readCSV(`${sessionDir}/wave-${wave}.csv`) + for (const result of waveResults) { + const masterTask = masterCSV.find(t => t.id === result.id) + Object.assign(masterTask, result) + } + writeCSV(`${sessionDir}/tasks.csv`, masterCSV) + + // Cleanup wave CSV + Bash({ command: `rm "${sessionDir}/wave-${wave}.csv"` }) + } + + // Execute post-wave interactive tasks + const postWaveTasks = masterCSV.filter(t => + t.wave === wave && t.exec_mode === 'interactive' && t.position === 'post-wave' + ) + for (const task of postWaveTasks) { + const roleSpec = Read(`${SESSION_PATH}/role-specs/${task.role}.md`) + const agent = spawn_agent({ + message: buildWorkerPrompt(task, roleSpec, sessionDir) + }) + const result = wait({ ids: [agent], timeout_ms: 600000 }) + close_agent({ id: agent }) + updateTaskStatus(task.id, result) + } + + // Check for failures and skip dependents + const failedTasks = masterCSV.filter(t => t.wave === wave && t.status === 'failed') + if (failedTasks.length > 0) { + skipDependents(masterCSV, failedTasks) + } +} +``` + +**Success Criteria**: +- All waves executed in order +- Both csv-wave and interactive tasks handled per wave +- Each wave's results merged into master CSV before next wave starts +- Dependent tasks skipped when predecessor failed +- discoveries.ndjson accumulated across all waves and mechanisms +- Interactive agent lifecycle tracked in registry.json + +--- + +### Phase 3: Results Aggregation + +**Objective**: Generate final results and human-readable report. + +```javascript +// Export results.csv +const masterCSV = readCSV(`${sessionDir}/tasks.csv`) +writeCSV(`${sessionDir}/results.csv`, masterCSV) + +// Generate context.md +const contextMd = generateContextReport(masterCSV, sessionDir) +Write(`${sessionDir}/context.md`, contextMd) + +// Cleanup interactive agents +const registry = JSON.parse(Read(`${sessionDir}/agents/registry.json`)) +for (const agent of registry.active) { + close_agent({ id: agent.id }) +} +Write(`${sessionDir}/agents/registry.json`, JSON.stringify({ active: [], closed: registry.closed })) + +// Display summary +const summary = { + total: masterCSV.length, + completed: masterCSV.filter(t => t.status === 'completed').length, + failed: masterCSV.filter(t => t.status === 'failed').length, + skipped: masterCSV.filter(t => t.status === 'skipped').length +} +console.log(`Pipeline complete: ${summary.completed}/${summary.total} tasks completed`) + +// Completion action +const action = await AskUserQuestion({ + questions: [{ + question: "Team pipeline complete. What would you like to do?", + header: "Completion", + multiSelect: false, + options: [ + { label: "Archive & Clean (Recommended)", description: "Archive session, clean up team" }, + { label: "Keep Active", description: "Keep session for follow-up work" }, + { label: "Export Results", description: "Export deliverables to target directory, then clean" } + ] + }] +}) + +// Handle completion action +if (action === "Archive & Clean") { + // Update session status, cleanup team +} else if (action === "Keep Active") { + // Update session status to paused +} else if (action === "Export Results") { + // Ask for target path, copy artifacts, then archive +} +``` + +**Success Criteria**: +- results.csv exported (all tasks, both modes) +- context.md generated +- All interactive agents closed (registry.json cleanup) +- Summary displayed to user +- Completion action executed + +--- + +## Shared Discovery Board Protocol + +**Discovery Types**: + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `implementation` | `file+function` | `{file, function, approach, notes}` | Implementation approach taken | +| `test_result` | `test_name` | `{test_name, status, duration}` | Test execution result | +| `review_comment` | `file+line` | `{file, line, severity, comment}` | Code review comment | +| `pattern` | `pattern_name` | `{pattern, files[], occurrences}` | Code pattern identified | + +**Discovery NDJSON Format**: + +```jsonl +{"ts":"2026-03-08T14:30:22Z","worker":"1","type":"implementation","data":{"file":"src/auth.ts","function":"login","approach":"JWT-based","notes":"Used bcrypt for password hashing"}} +{"ts":"2026-03-08T14:35:10Z","worker":"2","type":"test_result","data":{"test_name":"auth.login.success","status":"pass","duration":125}} +{"ts":"2026-03-08T14:40:05Z","worker":"3","type":"review_comment","data":{"file":"src/auth.ts","line":42,"severity":"medium","comment":"Consider adding rate limiting"}} +``` + +> Both csv-wave and interactive agents read/write the same discoveries.ndjson file. + +--- + +## Error Handling + +| Error | Resolution | +|-------|------------| +| No --session provided | ERROR immediately with usage message | +| Session directory not found | ERROR with path, suggest checking path | +| team-session.json missing | ERROR, session incomplete, suggest re-run team-coordinate | +| task-analysis.json missing | ERROR, session incomplete, suggest re-run team-coordinate | +| No role-specs in session | ERROR, session incomplete, suggest re-run team-coordinate | +| Role-spec file not found | ERROR with expected path | +| capability_gap reported | Warn only, cannot generate new role-specs | +| Circular dependency | Detect in wave computation, abort with error message | +| CSV agent timeout | Mark as failed in results, continue with wave | +| CSV agent failed | Mark as failed, skip dependent tasks in later waves | +| Interactive agent timeout | Urge convergence via send_input, then close if still timed out | +| Interactive agent failed | Mark as failed, skip dependents | +| All agents in wave failed | Log error, offer retry or abort | +| CSV parse error | Validate CSV format before execution, show line number | +| discoveries.ndjson corrupt | Ignore malformed lines, continue with valid entries | +| Lifecycle leak | Cleanup all active agents via registry.json at end | +| Continue mode: no session found | List available sessions, prompt user to select | +| Completion action fails | Default to Keep Active, log warning | + +--- + +## Core Rules + +1. **Start Immediately**: First action is session validation, then Phase 0/1 +2. **Wave Order is Sacred**: Never execute wave N before wave N-1 completes and results are merged +3. **CSV is Source of Truth**: Master tasks.csv holds all state (both csv-wave and interactive) +4. **CSV First**: Default to csv-wave for tasks; only use interactive when role inner_loop=true +5. **Context Propagation**: prev_context built from master CSV, not from memory +6. **Discovery Board is Append-Only**: Never clear, modify, or recreate discoveries.ndjson — both mechanisms share it +7. **Skip on Failure**: If a dependency failed, skip the dependent task (regardless of mechanism) +8. **Lifecycle Balance**: Every spawn_agent MUST have a matching close_agent (tracked in registry.json) +9. **Cleanup Temp Files**: Remove wave-{N}.csv after results are merged +10. **DO NOT STOP**: Continuous execution until all waves complete or all remaining tasks are skipped diff --git a/.codex/skills/team-executor/instructions/agent-instruction.md b/.codex/skills/team-executor/instructions/agent-instruction.md new file mode 100644 index 00000000..51cff2ef --- /dev/null +++ b/.codex/skills/team-executor/instructions/agent-instruction.md @@ -0,0 +1,62 @@ +## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS +1. Read role definition: {role_spec_path} (MUST read first) +2. Read shared discoveries: {session_folder}/discoveries.ndjson (if exists, skip if not) +3. Read project context: .workflow/project-tech.json (if exists) + +--- + +## Your Task + +**Task ID**: {id} +**Title**: {title} +**Description**: {description} +**Role**: {role} + +### Previous Tasks' Findings (Context) +{prev_context} + +--- + +## Execution Protocol + +1. **Read role definition**: Load {role_spec_path} for Phase 2-4 domain instructions (MANDATORY) +2. **Read discoveries**: Load {session_folder}/discoveries.ndjson for shared exploration findings +3. **Use context**: Apply previous tasks' findings from prev_context above +4. **Execute**: Follow the role-specific instructions from your role definition file +5. **Share discoveries**: Append exploration findings to shared board: + ```bash + echo '{"ts":"","worker":"{id}","type":"","data":{...}}' >> {session_folder}/discoveries.ndjson + ``` +6. **Report result**: Return JSON via report_agent_job_result + +### Role Definition Structure + +Your role definition file contains: +- **Phase 2**: Context & scope resolution +- **Phase 3**: Execution steps +- **Phase 4**: Output generation + +Follow the phases in order as defined in your role file. + +### Discovery Types to Share + +- `implementation`: {file, function, approach, notes} — Implementation approach taken +- `test_result`: {test_name, status, duration} — Test execution result +- `review_comment`: {file, line, severity, comment} — Code review comment +- `pattern`: {pattern, files[], occurrences} — Code pattern identified + +--- + +## Output (report_agent_job_result) + +Return JSON: +{ + "id": "{id}", + "status": "completed" | "failed", + "findings": "Key discoveries and implementation notes (max 500 chars)", + "error": "" +} + +**Findings format**: Concise summary of what was accomplished, key decisions made, and any important notes for downstream tasks. diff --git a/.codex/skills/team-executor/schemas/tasks-schema.md b/.codex/skills/team-executor/schemas/tasks-schema.md new file mode 100644 index 00000000..55deece7 --- /dev/null +++ b/.codex/skills/team-executor/schemas/tasks-schema.md @@ -0,0 +1,141 @@ +# Team Executor — CSV Schema + +## Master CSV: tasks.csv + +### Column Definitions + +#### Input Columns (Set by Decomposer) + +| Column | Type | Required | Description | Example | +|--------|------|----------|-------------|---------| +| `id` | string | Yes | Unique task identifier | `"1"` | +| `title` | string | Yes | Short task title | `"Implement auth module"` | +| `description` | string | Yes | Detailed task description (self-contained) | `"Create authentication module with JWT support"` | +| `deps` | string | No | Semicolon-separated dependency task IDs | `"1;2"` | +| `context_from` | string | No | Semicolon-separated task IDs for context | `"1"` | +| `exec_mode` | enum | Yes | Execution mechanism: `csv-wave` or `interactive` | `"csv-wave"` | +| `role` | string | Yes | Role name from session role-specs | `"implementer"` | + +#### Computed Columns (Set by Wave Engine) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `wave` | integer | Wave number (1-based, from topological sort) | `2` | +| `prev_context` | string | Aggregated findings from context_from tasks (per-wave CSV only) | `"[Task 1] Created auth module..."` | + +#### Output Columns (Set by Agent) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `status` | enum | `pending` → `completed` / `failed` / `skipped` | `"completed"` | +| `findings` | string | Key discoveries (max 500 chars) | `"Implemented JWT auth with bcrypt password hashing"` | +| `error` | string | Error message if failed | `""` | + +--- + +### exec_mode Values + +| Value | Mechanism | Description | +|-------|-----------|-------------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot batch execution within wave | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round individual execution | + +Interactive tasks appear in master CSV for dependency tracking but are NOT included in wave-{N}.csv files. + +--- + +### Example Data + +```csv +id,title,description,deps,context_from,exec_mode,role,wave,status,findings,error +1,Implement auth module,Create authentication module with JWT,,,"csv-wave","implementer",1,pending,"","" +2,Write tests,Write unit tests for auth module,1,1,"csv-wave","tester",2,pending,"","" +3,Review code,Review implementation and tests,2,2,"interactive","reviewer",3,pending,"","" +``` + +--- + +### Column Lifecycle + +``` +Decomposer (Phase 1) Wave Engine (Phase 2) Agent (Execution) +───────────────────── ──────────────────── ───────────────── +id ───────────► id ──────────► id +title ───────────► title ──────────► (reads) +description ───────────► description ──────────► (reads) +deps ───────────► deps ──────────► (reads) +context_from───────────► context_from──────────► (reads) +exec_mode ───────────► exec_mode ──────────► (reads) +role ───────────► role ──────────► (reads) + wave ──────────► (reads) + prev_context ──────────► (reads) + status + findings + error +``` + +--- + +## Output Schema (JSON) + +Agent output via `report_agent_job_result` (csv-wave tasks): + +```json +{ + "id": "1", + "status": "completed", + "findings": "Implemented JWT authentication with bcrypt password hashing. Created login, logout, and token refresh endpoints. Added middleware for protected routes.", + "error": "" +} +``` + +Interactive tasks output via structured text or JSON written to `interactive/{id}-result.json`. + +--- + +## Discovery Types + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `implementation` | `file+function` | `{file, function, approach, notes}` | Implementation approach taken | +| `test_result` | `test_name` | `{test_name, status, duration}` | Test execution result | +| `review_comment` | `file+line` | `{file, line, severity, comment}` | Code review comment | +| `pattern` | `pattern_name` | `{pattern, files[], occurrences}` | Code pattern identified | + +### Discovery NDJSON Format + +```jsonl +{"ts":"2026-03-08T14:30:22Z","worker":"1","type":"implementation","data":{"file":"src/auth.ts","function":"login","approach":"JWT-based","notes":"Used bcrypt for password hashing"}} +{"ts":"2026-03-08T14:35:10Z","worker":"2","type":"test_result","data":{"test_name":"auth.login.success","status":"pass","duration":125}} +{"ts":"2026-03-08T14:40:05Z","worker":"3","type":"review_comment","data":{"file":"src/auth.ts","line":42,"severity":"medium","comment":"Consider adding rate limiting"}} +``` + +> Both csv-wave and interactive agents read/write the same discoveries.ndjson file. + +--- + +## Cross-Mechanism Context Flow + +| Source | Target | Mechanism | +|--------|--------|-----------| +| CSV task findings | Interactive task | Injected via spawn message or send_input | +| Interactive task result | CSV task prev_context | Read from interactive/{id}-result.json | +| Any agent discovery | Any agent | Shared via discoveries.ndjson | + +--- + +## Validation Rules + +| Rule | Check | Error | +|------|-------|-------| +| Unique IDs | No duplicate `id` values | "Duplicate task ID: {id}" | +| Valid deps | All dep IDs exist in tasks | "Unknown dependency: {dep_id}" | +| No self-deps | Task cannot depend on itself | "Self-dependency: {id}" | +| No circular deps | Topological sort completes | "Circular dependency detected involving: {ids}" | +| context_from valid | All context IDs exist and in earlier waves | "Invalid context_from: {id}" | +| exec_mode valid | Value is `csv-wave` or `interactive` | "Invalid exec_mode: {value}" | +| Description non-empty | Every task has description | "Empty description for task: {id}" | +| Status enum | status ∈ {pending, completed, failed, skipped} | "Invalid status: {status}" | +| Cross-mechanism deps | Interactive→CSV deps resolve correctly | "Cross-mechanism dependency unresolvable: {id}" | +| Role non-empty | Every task has role | "Empty role for task: {id}" | +| Role exists | Role has corresponding role-spec file | "Role not found: {role}" | diff --git a/.codex/skills/team-frontend-debug/SKILL.md b/.codex/skills/team-frontend-debug/SKILL.md new file mode 100644 index 00000000..9b5dde75 --- /dev/null +++ b/.codex/skills/team-frontend-debug/SKILL.md @@ -0,0 +1,783 @@ +--- +name: team-frontend-debug +description: Frontend debugging team using Chrome DevTools MCP. Dual-mode -- feature-list testing or bug-report debugging. Covers reproduction, root cause analysis, code fixes, and verification. CSV wave pipeline with conditional skip and iteration loops. +argument-hint: "[-y|--yes] [-c|--concurrency N] [--continue] \"feature list or bug description\"" +allowed-tools: spawn_agents_on_csv, spawn_agent, wait, send_input, close_agent, Read, Write, Edit, Bash, Glob, Grep, AskUserQuestion +--- + +## Auto Mode + +When `--yes` or `-y`: Auto-confirm task decomposition, skip interactive validation, use defaults. + +# Frontend Debug Team + +## Usage + +```bash +$team-frontend-debug "Test features: login, dashboard, user profile at localhost:3000" +$team-frontend-debug "Bug: clicking save button on /settings causes white screen" +$team-frontend-debug -y "Test: 1. User registration 2. Email verification 3. Password reset" +$team-frontend-debug --continue "tfd-login-bug-20260308" +``` + +**Flags**: +- `-y, --yes`: Skip all confirmations (auto mode) +- `-c, --concurrency N`: Max concurrent agents within each wave (default: 2) +- `--continue`: Resume existing session + +**Output Directory**: `.workflow/.csv-wave/{session-id}/` +**Core Output**: `tasks.csv` (master state) + `results.csv` (final) + `discoveries.ndjson` (shared exploration) + `context.md` (human-readable report) + +--- + +## Overview + +Dual-mode frontend debugging: feature-list testing or bug-report debugging, powered by Chrome DevTools MCP. Roles: tester (test-pipeline), reproducer (debug-pipeline), analyzer, fixer, verifier. Supports conditional skip (all tests pass -> no downstream tasks), iteration loops (analyzer requesting more evidence, verifier triggering re-fix), and Chrome DevTools-based browser interaction. + +**Execution Model**: Hybrid -- CSV wave pipeline (primary) + individual agent spawn (secondary) + +``` ++-------------------------------------------------------------------+ +| FRONTEND DEBUG WORKFLOW | ++-------------------------------------------------------------------+ +| | +| Phase 0: Pre-Wave Interactive (Input Analysis) | +| +- Parse user input (feature list or bug report) | +| +- Detect mode: test-pipeline or debug-pipeline | +| +- Extract: base URL, features/steps, evidence plan | +| +- Output: refined requirements for decomposition | +| | +| Phase 1: Requirement -> CSV + Classification | +| +- Select pipeline (test or debug) | +| +- Build dependency graph from pipeline definition | +| +- Classify tasks: csv-wave | interactive (exec_mode) | +| +- Compute dependency waves (topological sort) | +| +- Generate tasks.csv with wave + exec_mode columns | +| +- User validates task breakdown (skip if -y) | +| | +| Phase 2: Wave Execution Engine (Extended) | +| +- For each wave (1..N): | +| | +- Execute pre-wave interactive tasks (if any) | +| | +- Build wave CSV (filter csv-wave tasks for this wave) | +| | +- Inject previous findings into prev_context column | +| | +- spawn_agents_on_csv(wave CSV) | +| | +- Execute post-wave interactive tasks (if any) | +| | +- Merge all results into master tasks.csv | +| | +- Conditional skip: TEST-001 with 0 issues -> done | +| | +- Iteration: ANALYZE needs more evidence -> REPRODUCE-002 | +| | +- Re-fix: VERIFY fails -> FIX-002 -> VERIFY-002 | +| +- discoveries.ndjson shared across all modes (append-only) | +| | +| Phase 3: Post-Wave Interactive (Completion Action) | +| +- Pipeline completion report with debug summary | +| +- Interactive completion choice (Archive/Keep/Export) | +| +- Final aggregation / report | +| | +| Phase 4: Results Aggregation | +| +- Export final results.csv | +| +- Generate context.md with all findings | +| +- Display summary: completed/failed/skipped per wave | +| +- Offer: view results | retry failed | done | +| | ++-------------------------------------------------------------------+ +``` + +--- + +## Pipeline Modes + +| Input Pattern | Pipeline | Flow | +|---------------|----------|------| +| Feature list / function checklist / test items | `test-pipeline` | TEST -> ANALYZE -> FIX -> VERIFY | +| Bug report / error description / crash report | `debug-pipeline` | REPRODUCE -> ANALYZE -> FIX -> VERIFY | + +### Pipeline Selection Keywords + +| Keywords | Pipeline | +|----------|----------| +| feature, test, list, check, verify functions, validate | `test-pipeline` | +| bug, error, crash, broken, white screen, not working | `debug-pipeline` | +| performance, slow, latency, memory leak | `debug-pipeline` (perf dimension) | +| Ambiguous / unclear | AskUserQuestion to clarify | + +--- + +## Task Classification Rules + +Each task is classified by `exec_mode`: + +| exec_mode | Mechanism | Criteria | +|-----------|-----------|----------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot, structured I/O, no multi-round interaction | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round, progress updates, inner loop | + +**Classification Decision**: + +| Task Property | Classification | +|---------------|---------------| +| Feature testing with inner loop (tester iterates over features) | `csv-wave` | +| Bug reproduction (single pass) | `csv-wave` | +| Root cause analysis (single pass) | `csv-wave` | +| Code fix implementation | `csv-wave` | +| Fix verification (single pass) | `csv-wave` | +| Conditional skip gate (evaluating TEST results) | `interactive` | +| Pipeline completion action | `interactive` | + +--- + +## CSV Schema + +### tasks.csv (Master State) + +```csv +id,title,description,role,pipeline_mode,base_url,evidence_dimensions,deps,context_from,exec_mode,wave,status,findings,artifacts_produced,issues_count,verdict,error +"TEST-001","Feature testing","PURPOSE: Test all features from list | Success: All features tested with evidence","tester","test-pipeline","http://localhost:3000","screenshot;console;network","","","csv-wave","1","pending","","","","","" +"ANALYZE-001","Root cause analysis","PURPOSE: Analyze discovered issues | Success: RCA for each issue","analyzer","test-pipeline","","console;network","TEST-001","TEST-001","csv-wave","2","pending","","","","","" +``` + +**Columns**: + +| Column | Phase | Description | +|--------|-------|-------------| +| `id` | Input | Unique task identifier (PREFIX-NNN: TEST, REPRODUCE, ANALYZE, FIX, VERIFY) | +| `title` | Input | Short task title | +| `description` | Input | Detailed task description with PURPOSE/TASK/CONTEXT/EXPECTED/CONSTRAINTS | +| `role` | Input | Role name: `tester`, `reproducer`, `analyzer`, `fixer`, `verifier` | +| `pipeline_mode` | Input | Pipeline: `test-pipeline` or `debug-pipeline` | +| `base_url` | Input | Target URL for browser-based tasks (empty for non-browser tasks) | +| `evidence_dimensions` | Input | Semicolon-separated evidence types: `screenshot`, `console`, `network`, `snapshot`, `performance` | +| `deps` | Input | Semicolon-separated dependency task IDs | +| `context_from` | Input | Semicolon-separated task IDs whose findings this task needs | +| `exec_mode` | Input | `csv-wave` or `interactive` | +| `wave` | Computed | Wave number (computed by topological sort, 1-based) | +| `status` | Output | `pending` -> `completed` / `failed` / `skipped` | +| `findings` | Output | Key discoveries or implementation notes (max 500 chars) | +| `artifacts_produced` | Output | Semicolon-separated paths of produced artifacts | +| `issues_count` | Output | Number of issues found (tester/analyzer), empty for others | +| `verdict` | Output | Verification verdict: `pass`, `pass_with_warnings`, `fail` (verifier only) | +| `error` | Output | Error message if failed (empty if success) | + +### Per-Wave CSV (Temporary) + +Each wave generates a temporary `wave-{N}.csv` with extra `prev_context` column (csv-wave tasks only). + +--- + +## Agent Registry (Interactive Agents) + +| Agent | Role File | Pattern | Responsibility | Position | +|-------|-----------|---------|----------------|----------| +| Conditional Skip Gate | agents/conditional-skip-gate.md | 2.3 (send_input cycle) | Evaluate TEST results and skip downstream if no issues | post-wave | +| Iteration Handler | agents/iteration-handler.md | 2.3 (send_input cycle) | Handle analyzer's need_more_evidence request | post-wave | +| Completion Handler | agents/completion-handler.md | 2.3 (send_input cycle) | Handle pipeline completion action (Archive/Keep/Export) | standalone | + +> **COMPACT PROTECTION**: Agent files are execution documents. When context compression occurs, **you MUST immediately `Read` the corresponding agent.md** to reload. + +--- + +## Chrome DevTools MCP Tools + +All browser inspection operations use Chrome DevTools MCP. Tester, reproducer, and verifier are primary consumers. These tools are available to CSV wave agents. + +| Tool | Purpose | +|------|---------| +| `mcp__chrome-devtools__navigate_page` | Navigate to target URL | +| `mcp__chrome-devtools__take_screenshot` | Capture visual state | +| `mcp__chrome-devtools__take_snapshot` | Capture DOM/a11y tree | +| `mcp__chrome-devtools__list_console_messages` | Read console logs | +| `mcp__chrome-devtools__get_console_message` | Get specific console message | +| `mcp__chrome-devtools__list_network_requests` | Monitor network activity | +| `mcp__chrome-devtools__get_network_request` | Inspect request/response detail | +| `mcp__chrome-devtools__performance_start_trace` | Start performance recording | +| `mcp__chrome-devtools__performance_stop_trace` | Stop and analyze trace | +| `mcp__chrome-devtools__click` | Simulate user click | +| `mcp__chrome-devtools__fill` | Fill form inputs | +| `mcp__chrome-devtools__hover` | Hover over elements | +| `mcp__chrome-devtools__evaluate_script` | Execute JavaScript in page | +| `mcp__chrome-devtools__wait_for` | Wait for element/text | +| `mcp__chrome-devtools__list_pages` | List open browser tabs | +| `mcp__chrome-devtools__select_page` | Switch active tab | +| `mcp__chrome-devtools__press_key` | Press keyboard keys | + +--- + +## Output Artifacts + +| File | Purpose | Lifecycle | +|------|---------|-----------| +| `tasks.csv` | Master state -- all tasks with status/findings | Updated after each wave | +| `wave-{N}.csv` | Per-wave input (temporary, csv-wave tasks only) | Created before wave, deleted after | +| `results.csv` | Final export of all task results | Created in Phase 4 | +| `discoveries.ndjson` | Shared exploration board (all agents, both modes) | Append-only, carries across waves | +| `context.md` | Human-readable execution report | Created in Phase 4 | +| `task-analysis.json` | Phase 0/1 output: mode, features/steps, dimensions | Created in Phase 1 | +| `role-instructions/` | Per-role instruction templates for CSV agents | Created in Phase 1 | +| `artifacts/` | All deliverables: test reports, RCA reports, fix changes, verification reports | Created by agents | +| `evidence/` | Screenshots, snapshots, network logs, performance traces | Created by tester/reproducer/verifier | +| `interactive/{id}-result.json` | Results from interactive tasks | Created per interactive task | + +--- + +## Session Structure + +``` +.workflow/.csv-wave/{session-id}/ ++-- tasks.csv # Master state (all tasks, both modes) ++-- results.csv # Final results export ++-- discoveries.ndjson # Shared discovery board (all agents) ++-- context.md # Human-readable report ++-- task-analysis.json # Phase 1 analysis output ++-- wave-{N}.csv # Temporary per-wave input (csv-wave only) ++-- role-instructions/ # Per-role instruction templates +| +-- tester.md # (test-pipeline) +| +-- reproducer.md # (debug-pipeline) +| +-- analyzer.md +| +-- fixer.md +| +-- verifier.md ++-- artifacts/ # All deliverables +| +-- TEST-001-report.md +| +-- TEST-001-issues.json +| +-- ANALYZE-001-rca.md +| +-- FIX-001-changes.md +| +-- VERIFY-001-report.md ++-- evidence/ # Browser evidence +| +-- F-001-login-before.png +| +-- F-001-login-after.png +| +-- before-screenshot.png +| +-- after-screenshot.png +| +-- before-snapshot.txt +| +-- after-snapshot.txt +| +-- evidence-summary.json ++-- interactive/ # Interactive task artifacts +| +-- {id}-result.json ++-- wisdom/ # Cross-task knowledge + +-- learnings.md +``` + +--- + +## Implementation + +### Session Initialization + +```javascript +const getUtc8ISOString = () => new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString() + +const AUTO_YES = $ARGUMENTS.includes('--yes') || $ARGUMENTS.includes('-y') +const continueMode = $ARGUMENTS.includes('--continue') +const concurrencyMatch = $ARGUMENTS.match(/(?:--concurrency|-c)\s+(\d+)/) +const maxConcurrency = concurrencyMatch ? parseInt(concurrencyMatch[1]) : 2 + +const requirement = $ARGUMENTS + .replace(/--yes|-y|--continue|--concurrency\s+\d+|-c\s+\d+/g, '') + .trim() + +const slug = requirement.toLowerCase() + .replace(/[^a-z0-9\u4e00-\u9fa5]+/g, '-') + .substring(0, 40) +const dateStr = getUtc8ISOString().substring(0, 10).replace(/-/g, '') +const sessionId = `tfd-${slug}-${dateStr}` +const sessionFolder = `.workflow/.csv-wave/${sessionId}` + +Bash(`mkdir -p ${sessionFolder}/artifacts ${sessionFolder}/evidence ${sessionFolder}/role-instructions ${sessionFolder}/interactive ${sessionFolder}/wisdom`) + +Write(`${sessionFolder}/discoveries.ndjson`, '') +Write(`${sessionFolder}/wisdom/learnings.md`, '# Debug Learnings\n') +``` + +--- + +### Phase 0: Pre-Wave Interactive (Input Analysis) + +**Objective**: Parse user input, detect mode (test vs debug), extract parameters. + +**Workflow**: + +1. **Parse user input** from $ARGUMENTS + +2. **Check for existing sessions** (continue mode): + - Scan `.workflow/.csv-wave/tfd-*/tasks.csv` for sessions with pending tasks + - If `--continue`: resume the specified or most recent session, skip to Phase 2 + +3. **Detect mode**: + + | Input Pattern | Mode | + |---------------|------| + | Contains: feature, test, list, check, verify | `test-pipeline` | + | Contains: bug, error, crash, broken, not working | `debug-pipeline` | + | Ambiguous | AskUserQuestion to clarify | + +4. **Extract parameters by mode**: + + **Test Mode**: + - `base_url`: URL in text or AskUserQuestion + - `features`: Parse feature list (bullet points, numbered list, free text) + - Generate structured feature items with id, name, url + + **Debug Mode**: + - `bug_description`: Bug description text + - `target_url`: URL in text or AskUserQuestion + - `reproduction_steps`: Steps in text or AskUserQuestion + - `evidence_plan`: Detect dimensions from keywords (UI, network, console, performance) + +5. **Dimension Detection** (debug mode): + + | Keywords | Dimension | + |----------|-----------| + | render, style, display, layout, CSS | screenshot, snapshot | + | request, API, network, timeout | network | + | error, crash, exception | console | + | slow, performance, lag, memory | performance | + | interaction, click, input, form | screenshot, console | + +**Success Criteria**: +- Mode determined (test-pipeline or debug-pipeline) +- Base URL and features/steps extracted +- Evidence dimensions identified + +--- + +### Phase 1: Requirement -> CSV + Classification + +**Objective**: Build task dependency graph, generate tasks.csv and per-role instruction templates. + +**Decomposition Rules**: + +1. **Pipeline Definition**: + + **Test Pipeline** (4 tasks, conditional): + ``` + TEST-001 -> [issues?] -> ANALYZE-001 -> FIX-001 -> VERIFY-001 + | + +-- no issues -> Pipeline Complete (skip downstream) + ``` + + **Debug Pipeline** (4 tasks, linear with iteration): + ``` + REPRODUCE-001 -> ANALYZE-001 -> FIX-001 -> VERIFY-001 + ^ | + | (if fail) | + +--- REPRODUCE-002 <-----+ + ``` + +2. **Task Description Template**: Every task uses PURPOSE/TASK/CONTEXT/EXPECTED/CONSTRAINTS format with session path, base URL, and upstream artifact references + +3. **Role Instruction Generation**: Write per-role instruction templates to `role-instructions/{role}.md` using the base instruction template customized for each role + +**Classification Rules**: + +| Task Property | exec_mode | +|---------------|-----------| +| Feature testing (tester with inner loop) | `csv-wave` | +| Bug reproduction (single pass) | `csv-wave` | +| Root cause analysis (single pass) | `csv-wave` | +| Code fix (may need multiple passes) | `csv-wave` | +| Fix verification (single pass) | `csv-wave` | +| All standard pipeline tasks | `csv-wave` | + +**Wave Computation**: Kahn's BFS topological sort with depth tracking. + +```javascript +// Generate per-role instruction templates +const roles = pipelineMode === 'test-pipeline' + ? ['tester', 'analyzer', 'fixer', 'verifier'] + : ['reproducer', 'analyzer', 'fixer', 'verifier'] + +for (const role of roles) { + const instruction = generateRoleInstruction(role, sessionFolder, pipelineMode) + Write(`${sessionFolder}/role-instructions/${role}.md`, instruction) +} + +const tasks = buildTasksCsv(pipelineMode, requirement, sessionFolder, baseUrl, evidencePlan) +Write(`${sessionFolder}/tasks.csv`, toCsv(tasks)) +Write(`${sessionFolder}/task-analysis.json`, JSON.stringify(analysisResult, null, 2)) +``` + +**User Validation**: Display task breakdown (skip if AUTO_YES). + +**Success Criteria**: +- tasks.csv created with valid schema and wave assignments +- Role instruction templates generated +- task-analysis.json written +- No circular dependencies + +--- + +### Phase 2: Wave Execution Engine (Extended) + +**Objective**: Execute tasks wave-by-wave with conditional skip, iteration loops, and re-fix cycles. + +```javascript +const masterCsv = Read(`${sessionFolder}/tasks.csv`) +let tasks = parseCsv(masterCsv) +let maxWave = Math.max(...tasks.map(t => t.wave)) +let fixRound = 0 +const MAX_FIX_ROUNDS = 3 +const MAX_REPRODUCE_ROUNDS = 2 + +for (let wave = 1; wave <= maxWave; wave++) { + console.log(`\nWave ${wave}/${maxWave}`) + + const waveTasks = tasks.filter(t => t.wave === wave && t.status === 'pending') + const csvTasks = waveTasks.filter(t => t.exec_mode === 'csv-wave') + const interactiveTasks = waveTasks.filter(t => t.exec_mode === 'interactive') + + // Check dependencies -- skip tasks whose deps failed + for (const task of waveTasks) { + const depIds = (task.deps || '').split(';').filter(Boolean) + const depStatuses = depIds.map(id => tasks.find(t => t.id === id)?.status) + if (depStatuses.some(s => s === 'failed' || s === 'skipped')) { + task.status = 'skipped' + task.error = `Dependency failed: ${depIds.filter((id, i) => + ['failed','skipped'].includes(depStatuses[i])).join(', ')}` + } + } + + // Execute pre-wave interactive tasks (if any) + for (const task of interactiveTasks.filter(t => t.status === 'pending')) { + // Determine agent file based on task type + const agentFile = task.id.includes('skip') ? 'agents/conditional-skip-gate.md' + : task.id.includes('iter') ? 'agents/iteration-handler.md' + : 'agents/completion-handler.md' + + Read(agentFile) + const agent = spawn_agent({ + message: `## TASK ASSIGNMENT\n\n### MANDATORY FIRST STEPS\n1. Read: ${agentFile}\n2. Read: ${sessionFolder}/discoveries.ndjson\n\nGoal: ${task.description}\nSession: ${sessionFolder}\n\n### Previous Context\n${buildPrevContext(task, tasks)}` + }) + const result = wait({ ids: [agent], timeout_ms: 600000 }) + if (result.timed_out) { + send_input({ id: agent, message: "Please finalize and output current findings." }) + wait({ ids: [agent], timeout_ms: 120000 }) + } + Write(`${sessionFolder}/interactive/${task.id}-result.json`, JSON.stringify({ + task_id: task.id, status: "completed", findings: parseFindings(result), + timestamp: getUtc8ISOString() + })) + close_agent({ id: agent }) + task.status = 'completed' + task.findings = parseFindings(result) + } + + // Build prev_context for csv-wave tasks + const pendingCsvTasks = csvTasks.filter(t => t.status === 'pending') + for (const task of pendingCsvTasks) { + task.prev_context = buildPrevContext(task, tasks) + } + + if (pendingCsvTasks.length > 0) { + Write(`${sessionFolder}/wave-${wave}.csv`, toCsv(pendingCsvTasks)) + + const waveInstruction = buildWaveInstruction(pendingCsvTasks, sessionFolder, wave) + + spawn_agents_on_csv({ + csv_path: `${sessionFolder}/wave-${wave}.csv`, + id_column: "id", + instruction: waveInstruction, + max_concurrency: maxConcurrency, + max_runtime_seconds: 1200, + output_csv_path: `${sessionFolder}/wave-${wave}-results.csv`, + output_schema: { + type: "object", + properties: { + id: { type: "string" }, + status: { type: "string", enum: ["completed", "failed"] }, + findings: { type: "string" }, + artifacts_produced: { type: "string" }, + issues_count: { type: "string" }, + verdict: { type: "string" }, + error: { type: "string" } + } + } + }) + + // Merge results into master CSV + const results = parseCsv(Read(`${sessionFolder}/wave-${wave}-results.csv`)) + for (const r of results) { + const t = tasks.find(t => t.id === r.id) + if (t) Object.assign(t, r) + } + + // Conditional Skip: TEST-001 with 0 issues + const testResult = results.find(r => r.id === 'TEST-001') + if (testResult && parseInt(testResult.issues_count || '0') === 0) { + // Skip all downstream tasks + tasks.filter(t => t.wave > wave && t.status === 'pending').forEach(t => { + t.status = 'skipped' + t.error = 'No issues found in testing -- skipped' + }) + console.log('All features passed. No issues found. Pipeline complete.') + } + + // Iteration: Analyzer needs more evidence + const analyzerResult = results.find(r => r.id.startsWith('ANALYZE') && r.findings?.includes('need_more_evidence')) + if (analyzerResult) { + const reproduceRound = tasks.filter(t => t.id.startsWith('REPRODUCE')).length + if (reproduceRound < MAX_REPRODUCE_ROUNDS) { + const newRepId = `REPRODUCE-${String(reproduceRound + 1).padStart(3, '0')}` + const newAnalyzeId = `ANALYZE-${String(tasks.filter(t => t.id.startsWith('ANALYZE')).length + 1).padStart(3, '0')}` + tasks.push({ + id: newRepId, title: 'Supplemental evidence collection', + description: `PURPOSE: Collect additional evidence per Analyzer request | Success: Targeted evidence collected`, + role: 'reproducer', pipeline_mode: tasks[0].pipeline_mode, + base_url: tasks[0].base_url, evidence_dimensions: tasks[0].evidence_dimensions, + deps: '', context_from: analyzerResult.id, + exec_mode: 'csv-wave', wave: wave + 1, status: 'pending', + findings: '', artifacts_produced: '', issues_count: '', verdict: '', error: '' + }) + tasks.push({ + id: newAnalyzeId, title: 'Re-analysis with supplemental evidence', + description: `PURPOSE: Re-analyze with additional evidence | Success: Higher-confidence RCA`, + role: 'analyzer', pipeline_mode: tasks[0].pipeline_mode, + base_url: '', evidence_dimensions: '', + deps: newRepId, context_from: `${analyzerResult.id};${newRepId}`, + exec_mode: 'csv-wave', wave: wave + 2, status: 'pending', + findings: '', artifacts_produced: '', issues_count: '', verdict: '', error: '' + }) + // Update FIX task deps + const fixTask = tasks.find(t => t.id === 'FIX-001' && t.status === 'pending') + if (fixTask) fixTask.deps = newAnalyzeId + } + } + + // Re-fix: Verifier verdict = fail + const verifyResult = results.find(r => r.id.startsWith('VERIFY') && r.verdict === 'fail') + if (verifyResult && fixRound < MAX_FIX_ROUNDS) { + fixRound++ + const newFixId = `FIX-${String(fixRound + 1).padStart(3, '0')}` + const newVerifyId = `VERIFY-${String(fixRound + 1).padStart(3, '0')}` + tasks.push({ + id: newFixId, title: `Re-fix (round ${fixRound + 1})`, + description: `PURPOSE: Re-fix based on verification failure | Success: Issue resolved`, + role: 'fixer', pipeline_mode: tasks[0].pipeline_mode, + base_url: '', evidence_dimensions: '', + deps: verifyResult.id, context_from: verifyResult.id, + exec_mode: 'csv-wave', wave: wave + 1, status: 'pending', + findings: '', artifacts_produced: '', issues_count: '', verdict: '', error: '' + }) + tasks.push({ + id: newVerifyId, title: `Re-verify (round ${fixRound + 1})`, + description: `PURPOSE: Re-verify after fix | Success: Bug resolved`, + role: 'verifier', pipeline_mode: tasks[0].pipeline_mode, + base_url: tasks[0].base_url, evidence_dimensions: tasks[0].evidence_dimensions, + deps: newFixId, context_from: newFixId, + exec_mode: 'csv-wave', wave: wave + 2, status: 'pending', + findings: '', artifacts_produced: '', issues_count: '', verdict: '', error: '' + }) + } + } + + // Update master CSV + Write(`${sessionFolder}/tasks.csv`, toCsv(tasks)) + + // Cleanup temp files + Bash(`rm -f ${sessionFolder}/wave-${wave}.csv ${sessionFolder}/wave-${wave}-results.csv`) + + // Recalculate maxWave (may have grown from iteration/re-fix) + maxWave = Math.max(maxWave, ...tasks.map(t => t.wave)) + + // Display wave summary + const completed = waveTasks.filter(t => t.status === 'completed').length + const failed = waveTasks.filter(t => t.status === 'failed').length + const skipped = waveTasks.filter(t => t.status === 'skipped').length + console.log(`Wave ${wave} Complete: ${completed} completed, ${failed} failed, ${skipped} skipped`) +} +``` + +**Success Criteria**: +- All waves executed in order +- Conditional skip handled (TEST with 0 issues) +- Iteration loops handled (analyzer need_more_evidence) +- Re-fix cycles handled (verifier fail verdict) +- discoveries.ndjson accumulated across all waves +- Max iteration/fix bounds respected + +--- + +### Phase 3: Post-Wave Interactive (Completion Action) + +**Objective**: Pipeline completion report with debug summary. + +```javascript +const tasks = parseCsv(Read(`${sessionFolder}/tasks.csv`)) +const completed = tasks.filter(t => t.status === 'completed') +const pipelineMode = tasks[0]?.pipeline_mode + +console.log(` +============================================ +FRONTEND DEBUG COMPLETE + +Pipeline: ${pipelineMode} | ${completed.length}/${tasks.length} tasks +Fix Rounds: ${fixRound}/${MAX_FIX_ROUNDS} +Session: ${sessionFolder} + +Results: +${completed.map(t => ` [DONE] ${t.id} (${t.role}): ${t.findings?.substring(0, 80) || 'completed'}`).join('\n')} +============================================ +`) + +if (!AUTO_YES) { + AskUserQuestion({ + questions: [{ + question: "Debug pipeline complete. What would you like to do?", + header: "Completion", + multiSelect: false, + options: [ + { label: "Archive & Clean (Recommended)", description: "Archive session, output final summary" }, + { label: "Keep Active", description: "Keep session for follow-up debugging" }, + { label: "Export Results", description: "Export debug report and patches" } + ] + }] + }) +} +``` + +**Success Criteria**: +- User informed of debug pipeline results +- Completion action taken + +--- + +### Phase 4: Results Aggregation + +**Objective**: Generate final results and human-readable report. + +```javascript +Bash(`cp ${sessionFolder}/tasks.csv ${sessionFolder}/results.csv`) + +const tasks = parseCsv(Read(`${sessionFolder}/tasks.csv`)) +let contextMd = `# Frontend Debug Report\n\n` +contextMd += `**Session**: ${sessionId}\n` +contextMd += `**Pipeline**: ${tasks[0]?.pipeline_mode}\n` +contextMd += `**Date**: ${getUtc8ISOString().substring(0, 10)}\n\n` + +contextMd += `## Summary\n` +contextMd += `| Status | Count |\n|--------|-------|\n` +contextMd += `| Completed | ${tasks.filter(t => t.status === 'completed').length} |\n` +contextMd += `| Failed | ${tasks.filter(t => t.status === 'failed').length} |\n` +contextMd += `| Skipped | ${tasks.filter(t => t.status === 'skipped').length} |\n\n` + +const maxWave = Math.max(...tasks.map(t => t.wave)) +contextMd += `## Wave Execution\n\n` +for (let w = 1; w <= maxWave; w++) { + const waveTasks = tasks.filter(t => t.wave === w) + contextMd += `### Wave ${w}\n\n` + for (const t of waveTasks) { + const icon = t.status === 'completed' ? '[DONE]' : t.status === 'failed' ? '[FAIL]' : '[SKIP]' + contextMd += `${icon} **${t.title}** [${t.role}]` + if (t.verdict) contextMd += ` Verdict: ${t.verdict}` + if (t.issues_count) contextMd += ` Issues: ${t.issues_count}` + contextMd += ` ${t.findings || ''}\n\n` + } +} + +// Debug-specific sections +const verifyTasks = tasks.filter(t => t.role === 'verifier' && t.verdict) +if (verifyTasks.length > 0) { + contextMd += `## Verification Results\n\n` + for (const v of verifyTasks) { + contextMd += `- **${v.id}**: ${v.verdict}\n` + } +} + +Write(`${sessionFolder}/context.md`, contextMd) +console.log(`Results exported to: ${sessionFolder}/results.csv`) +console.log(`Report generated at: ${sessionFolder}/context.md`) +``` + +**Success Criteria**: +- results.csv exported +- context.md generated with debug summary +- Summary displayed to user + +--- + +## Shared Discovery Board Protocol + +All agents share a single `discoveries.ndjson` file. + +**Format**: One JSON object per line (NDJSON): + +```jsonl +{"ts":"2026-03-08T10:00:00Z","worker":"TEST-001","type":"feature_tested","data":{"feature":"F-001","name":"Login","result":"fail","issues":2}} +{"ts":"2026-03-08T10:05:00Z","worker":"REPRODUCE-001","type":"bug_reproduced","data":{"url":"/settings","steps":3,"console_errors":2,"network_failures":1}} +{"ts":"2026-03-08T10:10:00Z","worker":"ANALYZE-001","type":"root_cause_found","data":{"category":"TypeError","file":"src/components/Settings.tsx","line":142,"confidence":"high"}} +{"ts":"2026-03-08T10:15:00Z","worker":"FIX-001","type":"file_modified","data":{"file":"src/components/Settings.tsx","change":"Added null check","lines_added":3}} +{"ts":"2026-03-08T10:20:00Z","worker":"VERIFY-001","type":"verification_result","data":{"verdict":"pass","original_error_resolved":true,"new_errors":0}} +``` + +**Discovery Types**: + +| Type | Data Schema | Description | +|------|-------------|-------------| +| `feature_tested` | `{feature, name, result, issues}` | Feature test result | +| `bug_reproduced` | `{url, steps, console_errors, network_failures}` | Bug reproduction result | +| `evidence_collected` | `{dimension, file, description}` | Evidence artifact saved | +| `root_cause_found` | `{category, file, line, confidence}` | Root cause identified | +| `file_modified` | `{file, change, lines_added}` | Code fix applied | +| `verification_result` | `{verdict, original_error_resolved, new_errors}` | Fix verification result | +| `issue_found` | `{file, line, severity, description}` | Issue discovered | + +**Protocol**: +1. Agents MUST read discoveries.ndjson at start of execution +2. Agents MUST append relevant discoveries during execution +3. Agents MUST NOT modify or delete existing entries +4. Deduplication by `{type, data.file}` key + +--- + +## Conditional Skip Logic + +After TEST-001 completes, evaluate issues: + +| Condition | Action | +|-----------|--------| +| `issues_count === 0` | Skip ANALYZE/FIX/VERIFY. Pipeline complete with all-pass. | +| Only low-severity warnings | AskUserQuestion: fix warnings or complete | +| High/medium severity issues | Proceed with ANALYZE -> FIX -> VERIFY | + +--- + +## Iteration Rules + +| Trigger | Condition | Action | Max | +|---------|-----------|--------|-----| +| Analyzer -> Reproducer | Confidence < 50% | Create REPRODUCE-002 -> ANALYZE-002 | 2 reproduction rounds | +| Verifier -> Fixer | Verdict = fail | Create FIX-002 -> VERIFY-002 | 3 fix rounds | +| Max iterations reached | Round >= max | Report to user for manual intervention | -- | + +--- + +## Error Handling + +| Error | Resolution | +|-------|------------| +| Circular dependency | Detect in wave computation, abort with error message | +| CSV agent timeout | Mark as failed in results, continue with wave | +| CSV agent failed | Mark as failed, skip dependent tasks in later waves | +| Interactive agent timeout | Urge convergence via send_input, then close if still timed out | +| All agents in wave failed | Log error, offer retry or abort | +| CSV parse error | Validate CSV format before execution, show line number | +| discoveries.ndjson corrupt | Ignore malformed lines, continue with valid entries | +| All features pass test | Skip downstream tasks, report success | +| Bug not reproducible | Report failure, ask user for more details | +| Browser not available | Report error, suggest manual reproduction steps | +| Analysis inconclusive | Request more evidence via iteration loop | +| Fix introduces regression | Verifier reports fail, dispatch re-fix | +| Max iterations reached | Escalate to user for manual intervention | +| Continue mode: no session found | List available sessions, prompt user to select | + +--- + +## Core Rules + +1. **Start Immediately**: First action is session initialization, then Phase 0/1 +2. **Wave Order is Sacred**: Never execute wave N before wave N-1 completes and results are merged +3. **CSV is Source of Truth**: Master tasks.csv holds all state (both csv-wave and interactive) +4. **CSV First**: Default to csv-wave for tasks; only use interactive when interaction pattern requires it +5. **Context Propagation**: prev_context built from master CSV, not from memory +6. **Discovery Board is Append-Only**: Never clear, modify, or recreate discoveries.ndjson +7. **Skip on Failure**: If a dependency failed, skip the dependent task +8. **Conditional Skip**: If TEST finds 0 issues, skip all downstream tasks +9. **Iteration Bounds**: Max 2 reproduction rounds, max 3 fix rounds +10. **Cleanup Temp Files**: Remove wave-{N}.csv after results are merged +11. **DO NOT STOP**: Continuous execution until all waves complete or all remaining tasks are skipped diff --git a/.codex/skills/team-frontend-debug/agents/completion-handler.md b/.codex/skills/team-frontend-debug/agents/completion-handler.md new file mode 100644 index 00000000..13a342b0 --- /dev/null +++ b/.codex/skills/team-frontend-debug/agents/completion-handler.md @@ -0,0 +1,142 @@ +# Completion Handler Agent + +Interactive agent for handling pipeline completion action. Presents debug summary and offers Archive/Keep/Export choices. + +## Identity + +- **Type**: `interactive` +- **Role File**: `agents/completion-handler.md` +- **Responsibility**: Present debug pipeline results, handle completion choice, execute cleanup or export + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Read all task results from master CSV +- Present debug summary (reproduction, RCA, fix, verification) +- Wait for user choice before acting +- Produce structured output following template + +### MUST NOT + +- Skip the MANDATORY FIRST STEPS role loading +- Delete session files without user approval +- Modify task artifacts +- Produce unstructured output + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | built-in | Load task results and artifacts | +| `AskUserQuestion` | built-in | Get user completion choice | +| `Write` | built-in | Store completion result | +| `Bash` | built-in | Execute archive/export operations | + +--- + +## Execution + +### Phase 1: Results Loading + +**Objective**: Load all task results and build debug summary + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| tasks.csv | Yes | Master state with all task results | +| Artifact files | No | Verify deliverables exist | + +**Steps**: + +1. Read master tasks.csv +2. Parse all completed tasks and their artifacts +3. Build debug summary: + - Bug description and reproduction results + - Root cause analysis findings + - Files modified and patches applied + - Verification results (pass/fail) + - Evidence inventory (screenshots, logs, traces) +4. Calculate pipeline statistics + +**Output**: Debug summary ready for user + +--- + +### Phase 2: Completion Choice + +**Objective**: Present debug results and get user action + +**Steps**: + +1. Display pipeline summary with debug details +2. Present completion choice: + +```javascript +AskUserQuestion({ + questions: [{ + question: "Debug pipeline complete. What would you like to do?", + header: "Completion", + multiSelect: false, + options: [ + { label: "Archive & Clean (Recommended)", description: "Archive session, output final summary" }, + { label: "Keep Active", description: "Keep session for follow-up debugging" }, + { label: "Export Results", description: "Export debug report and patches" } + ] + }] +}) +``` + +3. Handle response: + +| Response | Action | +|----------|--------| +| Archive & Clean | Mark session completed, output final summary | +| Keep Active | Mark session paused, keep all evidence/artifacts | +| Export Results | Copy RCA report, fix changes, verification report to project directory | + +**Output**: Completion action result + +--- + +## Structured Output Template + +``` +## Summary +- Pipeline mode: +- Tasks completed: / +- Fix rounds: / +- Final verdict: + +## Debug Summary +- Bug: +- Root cause: +- Fix: +- Verification: + +## Evidence Inventory +- Screenshots: +- Console logs: +- Network logs: +- Performance trace: + +## Action Taken +- Choice: +- Session status: +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| tasks.csv not found | Report error, cannot complete | +| Artifacts missing | Report partial completion with gaps noted | +| User does not respond | Timeout, default to keep active | diff --git a/.codex/skills/team-frontend-debug/agents/conditional-skip-gate.md b/.codex/skills/team-frontend-debug/agents/conditional-skip-gate.md new file mode 100644 index 00000000..6a942347 --- /dev/null +++ b/.codex/skills/team-frontend-debug/agents/conditional-skip-gate.md @@ -0,0 +1,130 @@ +# Conditional Skip Gate Agent + +Interactive agent for evaluating TEST-001 results and determining whether to skip downstream tasks (ANALYZE, FIX, VERIFY) when no issues are found. + +## Identity + +- **Type**: `interactive` +- **Role File**: `agents/conditional-skip-gate.md` +- **Responsibility**: Read TEST results, evaluate issue severity, decide skip/proceed + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Read the TEST-001 issues JSON +- Evaluate issue count and severity distribution +- Apply conditional skip logic +- Present decision to user when only warnings exist +- Produce structured output following template + +### MUST NOT + +- Skip the MANDATORY FIRST STEPS role loading +- Auto-skip when high/medium issues exist +- Modify test artifacts directly +- Produce unstructured output + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | built-in | Load test results and issues | +| `AskUserQuestion` | built-in | Get user decision on warnings | +| `Write` | built-in | Store gate decision result | + +--- + +## Execution + +### Phase 1: Load Test Results + +**Objective**: Load TEST-001 issues and evaluate severity + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| TEST-001-issues.json | Yes | Discovered issues with severity | +| TEST-001-report.md | No | Full test report | + +**Steps**: + +1. Extract session path from task assignment +2. Read TEST-001-issues.json +3. Parse issues array +4. Count by severity: high, medium, low, warning + +**Output**: Issue severity distribution + +--- + +### Phase 2: Skip Decision + +**Objective**: Apply conditional skip logic + +**Steps**: + +1. Evaluate issues: + +| Condition | Action | +|-----------|--------| +| `issues.length === 0` | Skip all downstream. Report "all_pass". | +| Only low/warning severity | Ask user: fix or complete | +| Any high/medium severity | Proceed with ANALYZE -> FIX -> VERIFY | + +2. If only warnings, present choice: + +```javascript +AskUserQuestion({ + questions: [{ + question: "Testing found only low-severity warnings. How would you like to proceed?", + header: "Test Results", + multiSelect: false, + options: [ + { label: "Fix warnings", description: "Proceed with analysis and fixes for warnings" }, + { label: "Complete", description: "Accept current state, skip remaining tasks" } + ] + }] +}) +``` + +3. Handle response and record decision + +**Output**: Skip/proceed directive + +--- + +## Structured Output Template + +``` +## Summary +- Test report evaluated: TEST-001 +- Issues found: +- High: , Medium: , Low: , Warning: +- Decision: + +## Findings +- All features tested: +- Pass rate: + +## Decision Details +- Action: +- Downstream tasks affected: ANALYZE-001, FIX-001, VERIFY-001 +- User choice: +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| TEST-001-issues.json not found | Report error, cannot evaluate | +| Issues JSON malformed | Report parse error, default to proceed | +| User does not respond | Timeout, default to proceed with fixes | diff --git a/.codex/skills/team-frontend-debug/agents/iteration-handler.md b/.codex/skills/team-frontend-debug/agents/iteration-handler.md new file mode 100644 index 00000000..6052df15 --- /dev/null +++ b/.codex/skills/team-frontend-debug/agents/iteration-handler.md @@ -0,0 +1,120 @@ +# Iteration Handler Agent + +Interactive agent for handling the analyzer's request for more evidence. Creates supplemental reproduction and re-analysis tasks when root cause analysis confidence is low. + +## Identity + +- **Type**: `interactive` +- **Role File**: `agents/iteration-handler.md` +- **Responsibility**: Parse analyzer evidence request, create REPRODUCE-002 + ANALYZE-002 tasks, update dependency chain + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Read the analyzer's need_more_evidence request +- Parse specific evidence dimensions and actions requested +- Create supplemental reproduction task description +- Create re-analysis task description +- Update FIX dependency to point to new ANALYZE task +- Produce structured output following template + +### MUST NOT + +- Skip the MANDATORY FIRST STEPS role loading +- Ignore the analyzer's specific requests +- Create tasks beyond iteration bounds (max 2 reproduction rounds) +- Modify existing task artifacts + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | built-in | Load analyzer output and session state | +| `Write` | built-in | Store iteration handler result | + +--- + +## Execution + +### Phase 1: Parse Evidence Request + +**Objective**: Understand what additional evidence the analyzer needs + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Analyzer findings | Yes | Contains need_more_evidence with specifics | +| Session state | No | Current iteration count | + +**Steps**: + +1. Extract session path from task assignment +2. Read analyzer's findings or RCA report (partial) +3. Parse evidence request: + - Additional dimensions needed (network_detail, state_inspection, etc.) + - Specific actions (capture request body, evaluate React state, etc.) +4. Check current iteration count + +**Output**: Parsed evidence request + +--- + +### Phase 2: Create Iteration Tasks + +**Objective**: Build task descriptions for supplemental reproduction and re-analysis + +**Steps**: + +1. Check iteration bounds: + +| Condition | Action | +|-----------|--------| +| Reproduction rounds < 2 | Create REPRODUCE-002 + ANALYZE-002 | +| Reproduction rounds >= 2 | Escalate to user for manual investigation | + +2. Build REPRODUCE-002 description with specific evidence requests from analyzer + +3. Build ANALYZE-002 description that loads both original and supplemental evidence + +4. Record new tasks and dependency updates + +**Output**: Task descriptions for dynamic wave extension + +--- + +## Structured Output Template + +``` +## Summary +- Analyzer evidence request processed +- Iteration round: / +- Action: + +## Evidence Request +- Dimensions needed: +- Specific actions: + +## Tasks Created +- REPRODUCE-002: +- ANALYZE-002: + +## Dependency Updates +- FIX-001 deps updated: ANALYZE-001 -> ANALYZE-002 +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| Evidence request unclear | Use all default dimensions | +| Max iterations reached | Escalate to user | +| Session state missing | Default to iteration round 1 | diff --git a/.codex/skills/team-frontend-debug/instructions/agent-instruction.md b/.codex/skills/team-frontend-debug/instructions/agent-instruction.md new file mode 100644 index 00000000..806e0695 --- /dev/null +++ b/.codex/skills/team-frontend-debug/instructions/agent-instruction.md @@ -0,0 +1,272 @@ +# Agent Instruction Template -- Team Frontend Debug + +Base instruction template for CSV wave agents. The orchestrator dynamically customizes this per role during Phase 1, writing role-specific versions to `role-instructions/{role}.md`. + +## Purpose + +| Phase | Usage | +|-------|-------| +| Phase 1 | Coordinator generates per-role instruction from this template | +| Phase 2 | Injected as `instruction` parameter to `spawn_agents_on_csv` | + +--- + +## Base Instruction Template + +```markdown +## TASK ASSIGNMENT -- Team Frontend Debug + +### MANDATORY FIRST STEPS +1. Read shared discoveries: /discoveries.ndjson (if exists, skip if not) +2. Read project context: .workflow/project-tech.json (if exists) + +--- + +## Your Task + +**Task ID**: {id} +**Title**: {title} +**Role**: {role} +**Pipeline Mode**: {pipeline_mode} +**Base URL**: {base_url} +**Evidence Dimensions**: {evidence_dimensions} + +### Task Description +{description} + +### Previous Tasks' Findings (Context) +{prev_context} + +--- + +## Execution Protocol + +1. **Read discoveries**: Load /discoveries.ndjson for shared exploration findings +2. **Use context**: Apply previous tasks' findings from prev_context above +3. **Execute task**: Follow role-specific instructions below +4. **Share discoveries**: Append exploration findings to shared board: + ```bash + echo '{"ts":"","worker":"{id}","type":"","data":{...}}' >> /discoveries.ndjson + ``` +5. **Report result**: Return JSON via report_agent_job_result + +### Discovery Types to Share +- `feature_tested`: {feature, name, result, issues} -- Feature test result +- `bug_reproduced`: {url, steps, console_errors, network_failures} -- Bug reproduction outcome +- `evidence_collected`: {dimension, file, description} -- Evidence artifact saved +- `root_cause_found`: {category, file, line, confidence} -- Root cause identified +- `file_modified`: {file, change, lines_added} -- Code fix applied +- `verification_result`: {verdict, original_error_resolved, new_errors} -- Verification outcome +- `issue_found`: {file, line, severity, description} -- Issue discovered + +--- + +## Output (report_agent_job_result) + +Return JSON: +{ + "id": "{id}", + "status": "completed" | "failed", + "findings": "Key discoveries and implementation notes (max 500 chars)", + "artifacts_produced": "semicolon-separated paths of produced files", + "issues_count": "", + "verdict": "", + "error": "" +} +``` + +--- + +## Role-Specific Customization + +The coordinator generates per-role instruction variants during Phase 1. + +### For Tester Role (test-pipeline) + +``` +3. **Execute**: + - Parse feature list from task description + - For each feature: + a. Navigate to feature URL: mcp__chrome-devtools__navigate_page({ type: "url", url: "" }) + b. Wait for page load: mcp__chrome-devtools__wait_for({ text: [""], timeout: 10000 }) + c. Explore page structure: mcp__chrome-devtools__take_snapshot() + d. Generate test scenarios from UI elements if not predefined + e. Capture baseline: take_screenshot (before), list_console_messages + f. Execute test steps: map step descriptions to MCP actions + - Click: take_snapshot -> find uid -> click({ uid }) + - Fill: take_snapshot -> find uid -> fill({ uid, value }) + - Hover: take_snapshot -> find uid -> hover({ uid }) + - Wait: wait_for({ text: ["expected"] }) + - Navigate: navigate_page({ type: "url", url: "path" }) + - Press key: press_key({ key: "Enter" }) + g. Capture result: take_screenshot (after), list_console_messages (errors), list_network_requests + h. Evaluate: console errors? network failures? expected text present? visual issues? + i. Classify: pass / fail / warning + - Compile test report: /artifacts/TEST-001-report.md + - Compile issues list: /artifacts/TEST-001-issues.json + - Set issues_count in output +``` + +### For Reproducer Role (debug-pipeline) + +``` +3. **Execute**: + - Verify browser accessible: mcp__chrome-devtools__list_pages() + - Navigate to target URL: mcp__chrome-devtools__navigate_page({ type: "url", url: "" }) + - Wait for load: mcp__chrome-devtools__wait_for({ text: [""], timeout: 10000 }) + - Capture baseline evidence: + - Screenshot (before): take_screenshot({ filePath: "/evidence/before-screenshot.png" }) + - DOM snapshot (before): take_snapshot({ filePath: "/evidence/before-snapshot.txt" }) + - Console baseline: list_console_messages() + - Execute reproduction steps: + - For each step, parse action and execute via MCP tools + - Track DOM changes via snapshots after key steps + - Capture post-action evidence: + - Screenshot (after): take_screenshot({ filePath: "/evidence/after-screenshot.png" }) + - DOM snapshot (after): take_snapshot({ filePath: "/evidence/after-snapshot.txt" }) + - Console errors: list_console_messages({ types: ["error", "warn"] }) + - Network requests: list_network_requests({ resourceTypes: ["xhr", "fetch"] }) + - Request details for failures: get_network_request({ reqid: }) + - Performance trace (if dimension): performance_start_trace() + reproduce + performance_stop_trace() + - Write evidence-summary.json to /evidence/ +``` + +### For Analyzer Role + +``` +3. **Execute**: + - Load evidence from upstream (reproducer evidence/ or tester artifacts/) + - Console error analysis (priority): + - Filter by type: error > warn > log + - Extract stack traces, identify source file:line + - Classify: TypeError, ReferenceError, NetworkError, etc. + - Network analysis (if dimension): + - Identify failed requests (4xx, 5xx, timeout, CORS) + - Check auth tokens, API endpoints, payload issues + - DOM structure analysis (if snapshots): + - Compare before/after snapshots + - Identify missing/extra elements, attribute anomalies + - Performance analysis (if trace): + - Identify long tasks (>50ms), layout thrashing, memory leaks + - Cross-correlation: build timeline, identify trigger point + - Source code mapping: + - Use mcp__ace-tool__search_context or Grep to locate root cause + - Read identified source files + - Confidence assessment: + - High (>80%): clear stack trace + specific line + - Medium (50-80%): likely cause, needs confirmation + - Low (<50%): request more evidence (set findings to include "need_more_evidence") + - Write RCA report to /artifacts/ANALYZE-001-rca.md + - Set issues_count in output +``` + +### For Fixer Role + +``` +3. **Execute**: + - Load RCA report from analyzer output + - Extract root cause: category, file, line, recommended fix + - Read identified source files + - Search for similar patterns: mcp__ace-tool__search_context + - Plan fix: minimal change addressing root cause + - Apply fix strategy by category: + - TypeError/null: add null check, default value + - API error: fix URL, add error handling + - Missing import: add import statement + - CSS/rendering: fix styles, layout + - State bug: fix state update logic + - Race condition: add async handling + - Implement fix using Edit tool (fallback: mcp__ccw-tools__edit_file) + - Validate: run syntax/type checks + - Document changes in /artifacts/FIX-001-changes.md +``` + +### For Verifier Role + +``` +3. **Execute**: + - Load original evidence (reproducer) and fix changes (fixer) + - Pre-verification: check modified files contain expected changes + - Navigate to same URL: mcp__chrome-devtools__navigate_page + - Execute EXACT same reproduction/test steps + - Capture post-fix evidence: + - Screenshot: take_screenshot({ filePath: "/evidence/verify-screenshot.png" }) + - DOM snapshot: take_snapshot({ filePath: "/evidence/verify-snapshot.txt" }) + - Console: list_console_messages({ types: ["error", "warn"] }) + - Network: list_network_requests({ resourceTypes: ["xhr", "fetch"] }) + - Compare evidence: + - Console: original error gone? + - Network: failed request now succeeds? + - Visual: expected rendering achieved? + - New errors: any regression? + - Determine verdict: + - pass: original resolved AND no new errors + - pass_with_warnings: original resolved BUT new issues + - fail: original still present + - Write verification report to /artifacts/VERIFY-001-report.md + - Set verdict in output +``` + +--- + +## Chrome DevTools MCP Reference + +### Common Patterns + +**Navigate and Wait**: +``` +mcp__chrome-devtools__navigate_page({ type: "url", url: "" }) +mcp__chrome-devtools__wait_for({ text: [""], timeout: 10000 }) +``` + +**Find Element and Interact**: +``` +mcp__chrome-devtools__take_snapshot() // Get uids +mcp__chrome-devtools__click({ uid: "" }) +mcp__chrome-devtools__fill({ uid: "", value: "" }) +``` + +**Capture Evidence**: +``` +mcp__chrome-devtools__take_screenshot({ filePath: "" }) +mcp__chrome-devtools__list_console_messages({ types: ["error", "warn"] }) +mcp__chrome-devtools__list_network_requests({ resourceTypes: ["xhr", "fetch"] }) +``` + +**Debug API Error**: +``` +mcp__chrome-devtools__list_network_requests() // Find request +mcp__chrome-devtools__get_network_request({ reqid: }) // Inspect details +``` + +--- + +## Quality Requirements + +All agents must verify before reporting complete: + +| Requirement | Criteria | +|-------------|----------| +| Files produced | Verify all claimed artifacts exist via Read | +| Evidence captured | All planned dimensions have evidence files | +| Findings accuracy | Findings reflect actual observations | +| Discovery sharing | At least 1 discovery shared to board | +| Error reporting | Non-empty error field if status is failed | +| Verdict set | verifier role sets verdict field | +| Issues count set | tester/analyzer roles set issues_count field | + +--- + +## Placeholder Reference + +| Placeholder | Resolved By | When | +|-------------|------------|------| +| `` | Skill designer (Phase 1) | Literal path baked into instruction | +| `{id}` | spawn_agents_on_csv | Runtime from CSV row | +| `{title}` | spawn_agents_on_csv | Runtime from CSV row | +| `{description}` | spawn_agents_on_csv | Runtime from CSV row | +| `{role}` | spawn_agents_on_csv | Runtime from CSV row | +| `{pipeline_mode}` | spawn_agents_on_csv | Runtime from CSV row | +| `{base_url}` | spawn_agents_on_csv | Runtime from CSV row | +| `{evidence_dimensions}` | spawn_agents_on_csv | Runtime from CSV row | +| `{prev_context}` | spawn_agents_on_csv | Runtime from CSV row | diff --git a/.codex/skills/team-frontend-debug/schemas/tasks-schema.md b/.codex/skills/team-frontend-debug/schemas/tasks-schema.md new file mode 100644 index 00000000..b4230d47 --- /dev/null +++ b/.codex/skills/team-frontend-debug/schemas/tasks-schema.md @@ -0,0 +1,198 @@ +# Team Frontend Debug -- CSV Schema + +## Master CSV: tasks.csv + +### Column Definitions + +#### Input Columns (Set by Decomposer) + +| Column | Type | Required | Description | Example | +|--------|------|----------|-------------|---------| +| `id` | string | Yes | Unique task identifier (PREFIX-NNN) | `"TEST-001"` | +| `title` | string | Yes | Short task title | `"Feature testing"` | +| `description` | string | Yes | Detailed task description (self-contained) with PURPOSE/TASK/CONTEXT/EXPECTED/CONSTRAINTS | `"PURPOSE: Test all features from list..."` | +| `role` | enum | Yes | Worker role: `tester`, `reproducer`, `analyzer`, `fixer`, `verifier` | `"tester"` | +| `pipeline_mode` | enum | Yes | Pipeline mode: `test-pipeline` or `debug-pipeline` | `"test-pipeline"` | +| `base_url` | string | No | Target URL for browser-based tasks | `"http://localhost:3000"` | +| `evidence_dimensions` | string | No | Semicolon-separated evidence types to collect | `"screenshot;console;network"` | +| `deps` | string | No | Semicolon-separated dependency task IDs | `"TEST-001"` | +| `context_from` | string | No | Semicolon-separated task IDs for context | `"TEST-001"` | +| `exec_mode` | enum | Yes | Execution mechanism: `csv-wave` or `interactive` | `"csv-wave"` | + +#### Computed Columns (Set by Wave Engine) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `wave` | integer | Wave number (1-based, from topological sort) | `2` | +| `prev_context` | string | Aggregated findings from context_from tasks (per-wave CSV only) | `"[TEST-001] Found 3 issues: 2 high, 1 medium..."` | + +#### Output Columns (Set by Agent) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `status` | enum | `pending` -> `completed` / `failed` / `skipped` | `"completed"` | +| `findings` | string | Key discoveries (max 500 chars) | `"Tested 5 features: 3 pass, 2 fail. BUG-001: TypeError on login. BUG-002: API 500 on save."` | +| `artifacts_produced` | string | Semicolon-separated paths of produced artifacts | `"artifacts/TEST-001-report.md;artifacts/TEST-001-issues.json"` | +| `issues_count` | string | Number of issues found (tester/analyzer only, empty for others) | `"2"` | +| `verdict` | string | Verification verdict: `pass`, `pass_with_warnings`, `fail` (verifier only) | `"pass"` | +| `error` | string | Error message if failed | `""` | + +--- + +### exec_mode Values + +| Value | Mechanism | Description | +|-------|-----------|-------------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot batch execution within wave | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round individual execution | + +Interactive tasks appear in master CSV for dependency tracking but are NOT included in wave-{N}.csv files. + +--- + +### Role Prefixes + +| Role | Prefix | Pipeline | Inner Loop | +|------|--------|----------|------------| +| tester | TEST | test-pipeline | Yes (iterates over features) | +| reproducer | REPRODUCE | debug-pipeline | No | +| analyzer | ANALYZE | both | No | +| fixer | FIX | both | Yes (may need multiple fix passes) | +| verifier | VERIFY | both | No | + +--- + +### Example Data (Test Pipeline) + +```csv +id,title,description,role,pipeline_mode,base_url,evidence_dimensions,deps,context_from,exec_mode,wave,status,findings,artifacts_produced,issues_count,verdict,error +"TEST-001","Feature testing","PURPOSE: Test all features from feature list and discover issues | Success: All features tested with pass/fail results\nTASK:\n- Parse feature list\n- Navigate to each feature URL using Chrome DevTools\n- Execute test scenarios (click, fill, hover)\n- Capture evidence: screenshots, console logs, network requests\n- Classify results: pass/fail/warning\nCONTEXT:\n- Session: .workflow/.csv-wave/tfd-login-test-20260308\n- Base URL: http://localhost:3000\n- Features: Login, Dashboard, Profile\nEXPECTED: artifacts/TEST-001-report.md + artifacts/TEST-001-issues.json\nCONSTRAINTS: Chrome DevTools MCP only | No code modifications","tester","test-pipeline","http://localhost:3000","screenshot;console;network","","","csv-wave","1","pending","","","","","" +"ANALYZE-001","Root cause analysis","PURPOSE: Analyze discovered issues to identify root causes | Success: RCA for each high/medium issue\nTASK:\n- Load test report and issues list\n- Analyze console errors, network failures, DOM anomalies\n- Map to source code locations\nCONTEXT:\n- Session: .workflow/.csv-wave/tfd-login-test-20260308\n- Upstream: artifacts/TEST-001-issues.json\nEXPECTED: artifacts/ANALYZE-001-rca.md","analyzer","test-pipeline","","console;network","TEST-001","TEST-001","csv-wave","2","pending","","","","","" +"FIX-001","Fix all issues","PURPOSE: Fix identified issues | Success: All high/medium issues resolved\nTASK:\n- Load RCA report\n- Locate and fix each root cause\n- Run syntax/type checks\nCONTEXT:\n- Session: .workflow/.csv-wave/tfd-login-test-20260308\n- Upstream: artifacts/ANALYZE-001-rca.md\nEXPECTED: Modified source files + artifacts/FIX-001-changes.md","fixer","test-pipeline","","","ANALYZE-001","ANALYZE-001","csv-wave","3","pending","","","","","" +"VERIFY-001","Verify fixes","PURPOSE: Re-test failed scenarios to verify fixes | Success: Previously failed scenarios now pass\nTASK:\n- Re-execute failed test scenarios\n- Capture evidence and compare\n- Report pass/fail per scenario\nCONTEXT:\n- Session: .workflow/.csv-wave/tfd-login-test-20260308\n- Original: artifacts/TEST-001-report.md\n- Fix: artifacts/FIX-001-changes.md\nEXPECTED: artifacts/VERIFY-001-report.md","verifier","test-pipeline","http://localhost:3000","screenshot;console;network","FIX-001","FIX-001;TEST-001","csv-wave","4","pending","","","","","" +``` + +### Example Data (Debug Pipeline) + +```csv +id,title,description,role,pipeline_mode,base_url,evidence_dimensions,deps,context_from,exec_mode,wave,status,findings,artifacts_produced,issues_count,verdict,error +"REPRODUCE-001","Bug reproduction","PURPOSE: Reproduce bug and collect evidence | Success: Bug reproduced with artifacts\nTASK:\n- Navigate to target URL\n- Execute reproduction steps\n- Capture screenshots, snapshots, console logs, network\nCONTEXT:\n- Session: .workflow/.csv-wave/tfd-save-crash-20260308\n- Bug URL: http://localhost:3000/settings\n- Steps: 1. Click save 2. Observe white screen\nEXPECTED: evidence/ directory with all captures","reproducer","debug-pipeline","http://localhost:3000/settings","screenshot;console;network;snapshot","","","csv-wave","1","pending","","","","","" +"ANALYZE-001","Root cause analysis","PURPOSE: Analyze evidence to find root cause | Success: RCA with file:line location\nTASK:\n- Load evidence from reproducer\n- Analyze console errors and stack traces\n- Map to source code\nCONTEXT:\n- Session: .workflow/.csv-wave/tfd-save-crash-20260308\n- Upstream: evidence/\nEXPECTED: artifacts/ANALYZE-001-rca.md","analyzer","debug-pipeline","","","REPRODUCE-001","REPRODUCE-001","csv-wave","2","pending","","","","","" +"FIX-001","Code fix","PURPOSE: Fix the identified bug | Success: Root cause resolved\nTASK:\n- Load RCA report\n- Implement fix\n- Validate syntax\nCONTEXT:\n- Session: .workflow/.csv-wave/tfd-save-crash-20260308\n- Upstream: artifacts/ANALYZE-001-rca.md\nEXPECTED: Modified files + artifacts/FIX-001-changes.md","fixer","debug-pipeline","","","ANALYZE-001","ANALYZE-001","csv-wave","3","pending","","","","","" +"VERIFY-001","Fix verification","PURPOSE: Verify bug is fixed | Success: Original bug no longer reproduces\nTASK:\n- Same reproduction steps as REPRODUCE-001\n- Capture evidence and compare\n- Confirm resolution\nCONTEXT:\n- Session: .workflow/.csv-wave/tfd-save-crash-20260308\n- Original: evidence/\n- Fix: artifacts/FIX-001-changes.md\nEXPECTED: artifacts/VERIFY-001-report.md","verifier","debug-pipeline","http://localhost:3000/settings","screenshot;console;network;snapshot","FIX-001","FIX-001;REPRODUCE-001","csv-wave","4","pending","","","","","" +``` + +--- + +### Column Lifecycle + +``` +Decomposer (Phase 1) Wave Engine (Phase 2) Agent (Execution) +--------------------- -------------------- ----------------- +id ----------> id ----------> id +title ----------> title ----------> (reads) +description ----------> description ----------> (reads) +role ----------> role ----------> (reads) +pipeline_mode ---------> pipeline_mode ---------> (reads) +base_url ----------> base_url ----------> (reads) +evidence_dimensions ---> evidence_dimensions ---> (reads) +deps ----------> deps ----------> (reads) +context_from----------> context_from----------> (reads) +exec_mode ----------> exec_mode ----------> (reads) + wave ----------> (reads) + prev_context ----------> (reads) + status + findings + artifacts_produced + issues_count + verdict + error +``` + +--- + +## Output Schema (JSON) + +Agent output via `report_agent_job_result` (csv-wave tasks): + +Tester output: +```json +{ + "id": "TEST-001", + "status": "completed", + "findings": "Tested 5 features: 3 pass, 2 fail. BUG-001: TypeError on login submit. BUG-002: API 500 on profile save.", + "artifacts_produced": "artifacts/TEST-001-report.md;artifacts/TEST-001-issues.json", + "issues_count": "2", + "verdict": "", + "error": "" +} +``` + +Verifier output: +```json +{ + "id": "VERIFY-001", + "status": "completed", + "findings": "Original bug resolved. Login error no longer appears. No new console errors. No new network failures.", + "artifacts_produced": "artifacts/VERIFY-001-report.md", + "issues_count": "", + "verdict": "pass", + "error": "" +} +``` + +Interactive tasks output via structured text or JSON written to `interactive/{id}-result.json`. + +--- + +## Discovery Types + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `feature_tested` | `data.feature` | `{feature, name, result, issues}` | Feature test result | +| `bug_reproduced` | `data.url` | `{url, steps, console_errors, network_failures}` | Bug reproduction result | +| `evidence_collected` | `data.dimension+data.file` | `{dimension, file, description}` | Evidence artifact saved | +| `root_cause_found` | `data.file+data.line` | `{category, file, line, confidence}` | Root cause identified | +| `file_modified` | `data.file` | `{file, change, lines_added}` | Code fix applied | +| `verification_result` | `data.verdict` | `{verdict, original_error_resolved, new_errors}` | Fix verification | +| `issue_found` | `data.file+data.line` | `{file, line, severity, description}` | Issue discovered | + +### Discovery NDJSON Format + +```jsonl +{"ts":"2026-03-08T10:00:00Z","worker":"TEST-001","type":"feature_tested","data":{"feature":"F-001","name":"Login","result":"fail","issues":1}} +{"ts":"2026-03-08T10:05:00Z","worker":"REPRODUCE-001","type":"bug_reproduced","data":{"url":"/settings","steps":3,"console_errors":2,"network_failures":0}} +{"ts":"2026-03-08T10:10:00Z","worker":"ANALYZE-001","type":"root_cause_found","data":{"category":"TypeError","file":"src/components/Settings.tsx","line":142,"confidence":"high"}} +{"ts":"2026-03-08T10:15:00Z","worker":"FIX-001","type":"file_modified","data":{"file":"src/components/Settings.tsx","change":"Added null check for user object","lines_added":3}} +``` + +> Both csv-wave and interactive agents read/write the same discoveries.ndjson file. + +--- + +## Cross-Mechanism Context Flow + +| Source | Target | Mechanism | +|--------|--------|-----------| +| CSV task findings | Interactive task | Injected via spawn message or send_input | +| Interactive task result | CSV task prev_context | Read from interactive/{id}-result.json | +| Any agent discovery | Any agent | Shared via discoveries.ndjson | + +--- + +## Validation Rules + +| Rule | Check | Error | +|------|-------|-------| +| Unique IDs | No duplicate `id` values | "Duplicate task ID: {id}" | +| Valid deps | All dep IDs exist in tasks | "Unknown dependency: {dep_id}" | +| No self-deps | Task cannot depend on itself | "Self-dependency: {id}" | +| No circular deps | Topological sort completes | "Circular dependency detected involving: {ids}" | +| context_from valid | All context IDs exist and in earlier waves | "Invalid context_from: {id}" | +| exec_mode valid | Value is `csv-wave` or `interactive` | "Invalid exec_mode: {value}" | +| Description non-empty | Every task has description | "Empty description for task: {id}" | +| Status enum | status in {pending, completed, failed, skipped} | "Invalid status: {status}" | +| Role valid | role in {tester, reproducer, analyzer, fixer, verifier} | "Invalid role: {role}" | +| Pipeline mode valid | pipeline_mode in {test-pipeline, debug-pipeline} | "Invalid pipeline_mode: {mode}" | +| Verdict valid | verdict in {pass, pass_with_warnings, fail, ""} | "Invalid verdict: {verdict}" | +| Base URL for browser tasks | tester/reproducer/verifier have non-empty base_url | "Missing base_url for browser task: {id}" | diff --git a/.codex/skills/team-frontend/SKILL.md b/.codex/skills/team-frontend/SKILL.md new file mode 100644 index 00000000..d234c11d --- /dev/null +++ b/.codex/skills/team-frontend/SKILL.md @@ -0,0 +1,712 @@ +--- +name: team-frontend +description: Frontend development team with built-in ui-ux-pro-max design intelligence. Covers requirement analysis, design system generation, frontend implementation, and quality assurance. CSV wave pipeline with interactive QA gates. +argument-hint: "[-y|--yes] [-c|--concurrency N] [--continue] \"frontend task description\"" +allowed-tools: spawn_agents_on_csv, spawn_agent, wait, send_input, close_agent, Read, Write, Edit, Bash, Glob, Grep, AskUserQuestion +--- + +## Auto Mode + +When `--yes` or `-y`: Auto-confirm task decomposition, skip interactive validation, use defaults for scope/industry/constraints. + +# Team Frontend Development + +## Usage + +```bash +$team-frontend "Build a SaaS dashboard with user management and analytics" +$team-frontend -c 3 "Create a healthcare patient portal with WCAG AA compliance" +$team-frontend -y "Implement e-commerce product listing page with dark mode" +$team-frontend --continue "fe-saas-dashboard-20260308" +``` + +**Flags**: +- `-y, --yes`: Skip all confirmations (auto mode) +- `-c, --concurrency N`: Max concurrent agents within each wave (default: 3) +- `--continue`: Resume existing session + +**Output Directory**: `.workflow/.csv-wave/{session-id}/` +**Core Output**: `tasks.csv` (master state) + `results.csv` (final) + `discoveries.ndjson` (shared exploration) + `context.md` (human-readable report) + +--- + +## Overview + +Unified frontend development team: analyze requirements, retrieve design intelligence (ui-ux-pro-max), generate design token system, architect components, implement code, and run 5-dimension quality audit. Roles: analyst, architect, developer, qa -- dynamically assigned as CSV wave tasks with dependency ordering. Supports page (4-task), feature (5-task), and system (7-task) pipeline modes. + +**Execution Model**: Hybrid -- CSV wave pipeline (primary) + individual agent spawn (secondary) + +``` ++-------------------------------------------------------------------+ +| TEAM FRONTEND WORKFLOW | ++-------------------------------------------------------------------+ +| | +| Phase 0: Pre-Wave Interactive (Requirement Clarification) | +| +- Parse frontend task description | +| +- Select scope (page/feature/system), industry, constraints | +| +- Output: refined requirements for decomposition | +| | +| Phase 1: Requirement -> CSV + Classification | +| +- Signal detection: keyword scan -> capability inference | +| +- Pipeline selection (page: 4-task, feature: 5-task, system) | +| +- Dependency graph from pipeline definition | +| +- Classify tasks: csv-wave | interactive (exec_mode) | +| +- Compute dependency waves (topological sort) | +| +- Generate tasks.csv with wave + exec_mode columns | +| +- User validates task breakdown (skip if -y) | +| | +| Phase 2: Wave Execution Engine (Extended) | +| +- For each wave (1..N): | +| | +- Execute pre-wave interactive tasks (if any) | +| | +- Build wave CSV (filter csv-wave tasks for this wave) | +| | +- Inject previous findings into prev_context column | +| | +- spawn_agents_on_csv(wave CSV) | +| | +- Execute post-wave interactive tasks (if any) | +| | +- Merge all results into master tasks.csv | +| | +- Check: any failed? -> skip dependents | +| | +- GC Loop: if QA fails, create DEV-fix + QA-recheck | +| +- discoveries.ndjson shared across all modes (append-only) | +| | +| Phase 3: Post-Wave Interactive (Completion Action) | +| +- Pipeline completion report with deliverables listing | +| +- Interactive completion choice (Archive/Keep/Export) | +| +- Final aggregation / report | +| | +| Phase 4: Results Aggregation | +| +- Export final results.csv | +| +- Generate context.md with all findings | +| +- Display summary: completed/failed/skipped per wave | +| +- Offer: view results | retry failed | done | +| | ++-------------------------------------------------------------------+ +``` + +--- + +## Task Classification Rules + +Each task is classified by `exec_mode`: + +| exec_mode | Mechanism | Criteria | +|-----------|-----------|----------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot, structured I/O, no multi-round interaction | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round, revision cycles, user approval | + +**Classification Decision**: + +| Task Property | Classification | +|---------------|---------------| +| Requirement analysis (analyst) | `csv-wave` | +| Architecture design (architect) | `csv-wave` | +| Code implementation (developer) | `csv-wave` | +| QA audit (qa) | `csv-wave` | +| Architecture review gate (qa approving architecture before dev starts) | `interactive` | +| GC loop revision (developer fixing QA issues) | `csv-wave` | +| Pipeline completion action | `interactive` | + +--- + +## CSV Schema + +### tasks.csv (Master State) + +```csv +id,title,description,role,pipeline_mode,scope,review_type,deps,context_from,exec_mode,wave,status,findings,artifacts_produced,qa_score,qa_verdict,error +"ANALYZE-001","Requirement analysis + design intelligence","PURPOSE: Analyze frontend requirements and retrieve design intelligence | Success: design-intelligence.json produced","analyst","feature","full","","","","csv-wave","1","pending","","","","","" +"ARCH-001","Design token system + component architecture","PURPOSE: Define design token system and component specs | Success: design-tokens.json + component specs produced","architect","feature","full","","ANALYZE-001","ANALYZE-001","csv-wave","2","pending","","","","","" +"QA-001","Architecture review","PURPOSE: Review architecture artifacts before development | Success: Architecture approved","qa","feature","full","architecture-review","ARCH-001","ARCH-001","csv-wave","3","pending","","","","","" +``` + +**Columns**: + +| Column | Phase | Description | +|--------|-------|-------------| +| `id` | Input | Unique task identifier (PREFIX-NNN format: ANALYZE, ARCH, DEV, QA) | +| `title` | Input | Short task title | +| `description` | Input | Detailed task description with PURPOSE/TASK/CONTEXT/EXPECTED/CONSTRAINTS | +| `role` | Input | Role name: `analyst`, `architect`, `developer`, `qa` | +| `pipeline_mode` | Input | Pipeline: `page`, `feature`, `system` | +| `scope` | Input | Task scope: `full`, `tokens`, `components` | +| `review_type` | Input | QA review type: `architecture-review`, `code-review`, `final` (empty for non-QA) | +| `deps` | Input | Semicolon-separated dependency task IDs | +| `context_from` | Input | Semicolon-separated task IDs whose findings this task needs | +| `exec_mode` | Input | `csv-wave` or `interactive` | +| `wave` | Computed | Wave number (computed by topological sort, 1-based) | +| `status` | Output | `pending` -> `completed` / `failed` / `skipped` | +| `findings` | Output | Key discoveries or implementation notes (max 500 chars) | +| `artifacts_produced` | Output | Semicolon-separated paths of produced artifacts | +| `qa_score` | Output | QA weighted score (0-10, empty for non-QA tasks) | +| `qa_verdict` | Output | QA verdict: `PASSED`, `PASSED_WITH_WARNINGS`, `FIX_REQUIRED` (empty for non-QA) | +| `error` | Output | Error message if failed (empty if success) | + +### Per-Wave CSV (Temporary) + +Each wave generates a temporary `wave-{N}.csv` with extra `prev_context` column (csv-wave tasks only). + +--- + +## Agent Registry (Interactive Agents) + +| Agent | Role File | Pattern | Responsibility | Position | +|-------|-----------|---------|----------------|----------| +| QA Gate Reviewer | agents/qa-gate-reviewer.md | 2.3 (send_input cycle) | Review QA verdict and handle GC loop decisions | post-wave | +| Completion Handler | agents/completion-handler.md | 2.3 (send_input cycle) | Handle pipeline completion action (Archive/Keep/Export) | standalone | + +> **COMPACT PROTECTION**: Agent files are execution documents. When context compression occurs, **you MUST immediately `Read` the corresponding agent.md** to reload. + +--- + +## Output Artifacts + +| File | Purpose | Lifecycle | +|------|---------|-----------| +| `tasks.csv` | Master state -- all tasks with status/findings | Updated after each wave | +| `wave-{N}.csv` | Per-wave input (temporary, csv-wave tasks only) | Created before wave, deleted after | +| `results.csv` | Final export of all task results | Created in Phase 4 | +| `discoveries.ndjson` | Shared exploration board (all agents, both modes) | Append-only, carries across waves | +| `context.md` | Human-readable execution report | Created in Phase 4 | +| `task-analysis.json` | Phase 0/1 output: capabilities, pipeline, roles | Created in Phase 1 | +| `role-instructions/` | Per-role instruction templates for CSV agents | Created in Phase 1 | +| `artifacts/` | All deliverables: design-intelligence.json, design-tokens.json, component-specs/, QA audits | Created by agents | +| `interactive/{id}-result.json` | Results from interactive tasks | Created per interactive task | + +--- + +## Session Structure + +``` +.workflow/.csv-wave/{session-id}/ ++-- tasks.csv # Master state (all tasks, both modes) ++-- results.csv # Final results export ++-- discoveries.ndjson # Shared discovery board (all agents) ++-- context.md # Human-readable report ++-- task-analysis.json # Phase 1 analysis output ++-- wave-{N}.csv # Temporary per-wave input (csv-wave only) ++-- role-instructions/ # Per-role instruction templates +| +-- analyst.md +| +-- architect.md +| +-- developer.md +| +-- qa.md ++-- artifacts/ # All deliverables +| +-- analysis/ +| | +-- design-intelligence.json +| | +-- requirements.md +| +-- architecture/ +| | +-- design-tokens.json +| | +-- component-specs/ +| | +-- project-structure.md +| +-- qa/ +| | +-- audit-001.md +| +-- build/ ++-- interactive/ # Interactive task artifacts +| +-- {id}-result.json ++-- wisdom/ # Cross-task knowledge + +-- learnings.md + +-- decisions.md +``` + +--- + +## Implementation + +### Session Initialization + +```javascript +const getUtc8ISOString = () => new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString() + +const AUTO_YES = $ARGUMENTS.includes('--yes') || $ARGUMENTS.includes('-y') +const continueMode = $ARGUMENTS.includes('--continue') +const concurrencyMatch = $ARGUMENTS.match(/(?:--concurrency|-c)\s+(\d+)/) +const maxConcurrency = concurrencyMatch ? parseInt(concurrencyMatch[1]) : 3 + +const requirement = $ARGUMENTS + .replace(/--yes|-y|--continue|--concurrency\s+\d+|-c\s+\d+/g, '') + .trim() + +const slug = requirement.toLowerCase() + .replace(/[^a-z0-9\u4e00-\u9fa5]+/g, '-') + .substring(0, 40) +const dateStr = getUtc8ISOString().substring(0, 10).replace(/-/g, '') +const sessionId = `fe-${slug}-${dateStr}` +const sessionFolder = `.workflow/.csv-wave/${sessionId}` + +Bash(`mkdir -p ${sessionFolder}/artifacts/analysis ${sessionFolder}/artifacts/architecture/component-specs ${sessionFolder}/artifacts/qa ${sessionFolder}/artifacts/build ${sessionFolder}/role-instructions ${sessionFolder}/interactive ${sessionFolder}/wisdom`) + +Write(`${sessionFolder}/discoveries.ndjson`, '') +Write(`${sessionFolder}/wisdom/learnings.md`, '# Learnings\n') +Write(`${sessionFolder}/wisdom/decisions.md`, '# Decisions\n') +``` + +--- + +### Phase 0: Pre-Wave Interactive (Requirement Clarification) + +**Objective**: Parse frontend task, clarify scope/industry/constraints, prepare for decomposition. + +**Workflow**: + +1. **Parse user task description** from $ARGUMENTS + +2. **Check for existing sessions** (continue mode): + - Scan `.workflow/.csv-wave/fe-*/tasks.csv` for sessions with pending tasks + - If `--continue`: resume the specified or most recent session, skip to Phase 2 + - If active session found: ask user whether to resume or start new + +3. **Clarify scope and industry** (skip if AUTO_YES): + + **Scope Selection**: + + | Option | Pipeline | Task Count | + |--------|----------|------------| + | Single page | page | 4 tasks (linear) | + | Multi-component feature | feature | 5 tasks (with arch review gate) | + | Full frontend system | system | 7 tasks (dual-track parallel) | + + **Industry Selection**: + + | Option | Strictness | + |--------|------------| + | SaaS/Tech | standard | + | E-commerce/Retail | standard | + | Healthcare/Finance | strict (extra accessibility) | + | Other | standard | + + **Design Constraints** (multi-select): Existing design system, WCAG AA, Responsive, Dark mode + +4. **Record requirements**: mode, scope, industry, constraints + +5. **Signal Detection** for pipeline selection: + + | Signal | Keywords | Capability | + |--------|----------|------------| + | Analysis | analyze, requirements, design intelligence | analyst | + | Architecture | design tokens, component architecture, design system | architect | + | Implementation | implement, build, code, develop, page, component | developer | + | Quality | review, audit, quality, test, accessibility | qa | + +6. **Complexity Scoring**: + + | Factor | Points | + |--------|--------| + | ui-ux-pro-max integration needed | +1 | + | Existing design system detected | +1 | + | Accessibility strict mode (healthcare/finance) | +2 | + | Multiple tech stacks | +2 | + | Dark mode required | +1 | + + Results: 1-2 page, 3-4 feature, 5+ system. Default: feature. + +**Success Criteria**: +- Scope, industry, constraints determined +- Pipeline mode selected (page/feature/system) + +--- + +### Phase 1: Requirement -> CSV + Classification + +**Objective**: Build task dependency graph, generate tasks.csv and per-role instruction templates. + +**Decomposition Rules**: + +1. **Pipeline Selection** based on scope: + + | Scope | Pipeline | Tasks | + |-------|----------|-------| + | page | ANALYZE-001 -> ARCH-001 -> DEV-001 -> QA-001 | 4 | + | feature | ANALYZE-001 -> ARCH-001 -> QA-001(arch) -> DEV-001 -> QA-002(code) | 5 | + | system | ANALYZE-001 -> ARCH-001 -> QA-001(arch) -> [ARCH-002 + DEV-001] -> QA-002 -> DEV-002 -> QA-003(final) | 7 | + +2. **Task Description Template**: Every task description uses PURPOSE/TASK/CONTEXT/EXPECTED/CONSTRAINTS format (see dispatch.md for full templates) + +3. **Role Instruction Generation**: Write per-role instruction templates to `role-instructions/{role}.md` using the base instruction template customized for each role (analyst, architect, developer, qa) + +**Classification Rules**: + +| Task Property | exec_mode | +|---------------|-----------| +| Analyst analysis pass | `csv-wave` | +| Architect design pass | `csv-wave` | +| Developer implementation pass | `csv-wave` | +| QA audit pass | `csv-wave` | +| All standard pipeline tasks | `csv-wave` | + +**Wave Computation**: Kahn's BFS topological sort with depth tracking. + +```javascript +// Generate per-role instruction templates +for (const role of ['analyst', 'architect', 'developer', 'qa']) { + const instruction = generateRoleInstruction(role, sessionFolder) + Write(`${sessionFolder}/role-instructions/${role}.md`, instruction) +} + +// Generate tasks.csv from pipeline definition +const tasks = buildTasksCsv(pipelineMode, requirement, sessionFolder, industry, constraints) +Write(`${sessionFolder}/tasks.csv`, toCsv(tasks)) +Write(`${sessionFolder}/task-analysis.json`, JSON.stringify(analysisResult, null, 2)) +``` + +**User Validation**: Display task breakdown with wave + exec_mode assignment (skip if AUTO_YES). + +**Success Criteria**: +- tasks.csv created with valid schema, wave, and exec_mode assignments +- Role instruction templates generated in role-instructions/ +- task-analysis.json written +- No circular dependencies +- User approved (or AUTO_YES) + +--- + +### Phase 2: Wave Execution Engine (Extended) + +**Objective**: Execute tasks wave-by-wave with hybrid mechanism support and cross-wave context propagation. + +```javascript +const masterCsv = Read(`${sessionFolder}/tasks.csv`) +let tasks = parseCsv(masterCsv) +const maxWave = Math.max(...tasks.map(t => t.wave)) +let gcRound = 0 +const MAX_GC_ROUNDS = 2 + +for (let wave = 1; wave <= maxWave; wave++) { + console.log(`\nWave ${wave}/${maxWave}`) + + // 1. Separate tasks by exec_mode + const waveTasks = tasks.filter(t => t.wave === wave && t.status === 'pending') + const csvTasks = waveTasks.filter(t => t.exec_mode === 'csv-wave') + const interactiveTasks = waveTasks.filter(t => t.exec_mode === 'interactive') + + // 2. Check dependencies -- skip tasks whose deps failed + for (const task of waveTasks) { + const depIds = (task.deps || '').split(';').filter(Boolean) + const depStatuses = depIds.map(id => tasks.find(t => t.id === id)?.status) + if (depStatuses.some(s => s === 'failed' || s === 'skipped')) { + task.status = 'skipped' + task.error = `Dependency failed: ${depIds.filter((id, i) => + ['failed','skipped'].includes(depStatuses[i])).join(', ')}` + } + } + + // 3. Execute pre-wave interactive tasks (if any) + const preWaveInteractive = interactiveTasks.filter(t => t.status === 'pending') + for (const task of preWaveInteractive) { + Read(`agents/qa-gate-reviewer.md`) + const agent = spawn_agent({ + message: `## TASK ASSIGNMENT\n\n### MANDATORY FIRST STEPS\n1. Read: agents/qa-gate-reviewer.md\n2. Read: ${sessionFolder}/discoveries.ndjson\n\nGoal: ${task.description}\nScope: ${task.title}\nSession: ${sessionFolder}\n\n### Previous Context\n${buildPrevContext(task, tasks)}` + }) + const result = wait({ ids: [agent], timeout_ms: 600000 }) + if (result.timed_out) { + send_input({ id: agent, message: "Please finalize and output current findings." }) + wait({ ids: [agent], timeout_ms: 120000 }) + } + Write(`${sessionFolder}/interactive/${task.id}-result.json`, JSON.stringify({ + task_id: task.id, status: "completed", findings: parseFindings(result), + timestamp: getUtc8ISOString() + })) + close_agent({ id: agent }) + task.status = 'completed' + task.findings = parseFindings(result) + } + + // 4. Build prev_context for csv-wave tasks + const pendingCsvTasks = csvTasks.filter(t => t.status === 'pending') + for (const task of pendingCsvTasks) { + task.prev_context = buildPrevContext(task, tasks) + } + + if (pendingCsvTasks.length > 0) { + // 5. Write wave CSV + Write(`${sessionFolder}/wave-${wave}.csv`, toCsv(pendingCsvTasks)) + + // 6. Build instruction per role group + const waveInstruction = buildWaveInstruction(pendingCsvTasks, sessionFolder, wave) + + // 7. Execute wave via spawn_agents_on_csv + spawn_agents_on_csv({ + csv_path: `${sessionFolder}/wave-${wave}.csv`, + id_column: "id", + instruction: waveInstruction, + max_concurrency: maxConcurrency, + max_runtime_seconds: 900, + output_csv_path: `${sessionFolder}/wave-${wave}-results.csv`, + output_schema: { + type: "object", + properties: { + id: { type: "string" }, + status: { type: "string", enum: ["completed", "failed"] }, + findings: { type: "string" }, + artifacts_produced: { type: "string" }, + qa_score: { type: "string" }, + qa_verdict: { type: "string" }, + error: { type: "string" } + } + } + }) + + // 8. Merge results into master CSV + const results = parseCsv(Read(`${sessionFolder}/wave-${wave}-results.csv`)) + for (const r of results) { + const t = tasks.find(t => t.id === r.id) + if (t) Object.assign(t, r) + } + + // 9. GC Loop Check: if a QA task returned FIX_REQUIRED + const qaResults = results.filter(r => r.id.startsWith('QA') && r.qa_verdict === 'FIX_REQUIRED') + for (const qr of qaResults) { + if (gcRound < MAX_GC_ROUNDS) { + gcRound++ + // Create DEV-fix and QA-recheck tasks, append to tasks array + const fixId = `DEV-fix-${gcRound}` + const recheckId = `QA-recheck-${gcRound}` + tasks.push({ + id: fixId, title: `Fix QA issues (round ${gcRound})`, + description: `PURPOSE: Fix issues from ${qr.id} audit | Success: All critical/high resolved\nTASK:\n- Load QA audit report\n- Fix critical/high issues\n- Re-validate\nCONTEXT:\n- Session: ${sessionFolder}\n- Upstream: ${sessionFolder}/artifacts/qa/\nEXPECTED: Fixed source files\nCONSTRAINTS: Targeted fixes only`, + role: 'developer', pipeline_mode: tasks[0].pipeline_mode, scope: 'full', + review_type: '', deps: qr.id, context_from: qr.id, + exec_mode: 'csv-wave', wave: wave + 1, status: 'pending', + findings: '', artifacts_produced: '', qa_score: '', qa_verdict: '', error: '' + }) + tasks.push({ + id: recheckId, title: `QA recheck (round ${gcRound})`, + description: `PURPOSE: Re-audit after fixes | Success: Score >= 8, critical == 0\nTASK:\n- Execute 5-dimension audit on fixed code\n- Focus on previously flagged issues\nCONTEXT:\n- Session: ${sessionFolder}\n- Review type: code-review\nEXPECTED: ${sessionFolder}/artifacts/qa/audit-recheck-${gcRound}.md`, + role: 'qa', pipeline_mode: tasks[0].pipeline_mode, scope: 'full', + review_type: 'code-review', deps: fixId, context_from: fixId, + exec_mode: 'csv-wave', wave: wave + 2, status: 'pending', + findings: '', artifacts_produced: '', qa_score: '', qa_verdict: '', error: '' + }) + // Extend maxWave + } + } + } + + // 10. Update master CSV + Write(`${sessionFolder}/tasks.csv`, toCsv(tasks)) + + // 11. Cleanup temp files + Bash(`rm -f ${sessionFolder}/wave-${wave}.csv ${sessionFolder}/wave-${wave}-results.csv`) + + // 12. Display wave summary + const completed = waveTasks.filter(t => t.status === 'completed').length + const failed = waveTasks.filter(t => t.status === 'failed').length + const skipped = waveTasks.filter(t => t.status === 'skipped').length + console.log(`Wave ${wave} Complete: ${completed} completed, ${failed} failed, ${skipped} skipped`) +} +``` + +**Success Criteria**: +- All waves executed in order +- Both csv-wave and interactive tasks handled per wave +- Each wave's results merged into master CSV before next wave starts +- Dependent tasks skipped when predecessor failed +- GC loop (developer <-> qa) handled with max 2 rounds +- discoveries.ndjson accumulated across all waves and mechanisms + +--- + +### Phase 3: Post-Wave Interactive (Completion Action) + +**Objective**: Pipeline completion report with deliverables listing and interactive completion choice. + +```javascript +const tasks = parseCsv(Read(`${sessionFolder}/tasks.csv`)) +const completed = tasks.filter(t => t.status === 'completed') +const failed = tasks.filter(t => t.status === 'failed') + +// List deliverables +const deliverables = [ + { name: "Design Intelligence", path: `${sessionFolder}/artifacts/analysis/design-intelligence.json` }, + { name: "Requirements", path: `${sessionFolder}/artifacts/analysis/requirements.md` }, + { name: "Design Tokens", path: `${sessionFolder}/artifacts/architecture/design-tokens.json` }, + { name: "Component Specs", path: `${sessionFolder}/artifacts/architecture/component-specs/` }, + { name: "Project Structure", path: `${sessionFolder}/artifacts/architecture/project-structure.md` }, + { name: "QA Audits", path: `${sessionFolder}/artifacts/qa/` } +] + +console.log(` +============================================ +FRONTEND TEAM COMPLETE + +Pipeline: ${completed.length}/${tasks.length} tasks (${tasks[0]?.pipeline_mode} mode) +GC Rounds: ${gcRound}/${MAX_GC_ROUNDS} +Session: ${sessionFolder} + +Deliverables: +${deliverables.map(d => ` - ${d.name}: ${d.path}`).join('\n')} +============================================ +`) + +if (!AUTO_YES) { + AskUserQuestion({ + questions: [{ + question: "Frontend pipeline complete. What would you like to do?", + header: "Completion", + multiSelect: false, + options: [ + { label: "Archive & Clean (Recommended)", description: "Archive session, output final summary" }, + { label: "Keep Active", description: "Keep session for follow-up work" }, + { label: "Export Results", description: "Export design tokens and component specs" } + ] + }] + }) +} +``` + +**Success Criteria**: +- Post-wave interactive processing complete +- User informed of deliverables and pipeline status + +--- + +### Phase 4: Results Aggregation + +**Objective**: Generate final results and human-readable report. + +```javascript +Bash(`cp ${sessionFolder}/tasks.csv ${sessionFolder}/results.csv`) + +const tasks = parseCsv(Read(`${sessionFolder}/tasks.csv`)) +let contextMd = `# Frontend Development Report\n\n` +contextMd += `**Session**: ${sessionId}\n` +contextMd += `**Pipeline**: ${tasks[0]?.pipeline_mode} mode\n` +contextMd += `**Date**: ${getUtc8ISOString().substring(0, 10)}\n\n` + +contextMd += `## Summary\n` +contextMd += `| Status | Count |\n|--------|-------|\n` +contextMd += `| Completed | ${tasks.filter(t => t.status === 'completed').length} |\n` +contextMd += `| Failed | ${tasks.filter(t => t.status === 'failed').length} |\n` +contextMd += `| Skipped | ${tasks.filter(t => t.status === 'skipped').length} |\n\n` + +const maxWave = Math.max(...tasks.map(t => t.wave)) +contextMd += `## Wave Execution\n\n` +for (let w = 1; w <= maxWave; w++) { + const waveTasks = tasks.filter(t => t.wave === w) + contextMd += `### Wave ${w}\n\n` + for (const t of waveTasks) { + const icon = t.status === 'completed' ? '[DONE]' : t.status === 'failed' ? '[FAIL]' : '[SKIP]' + contextMd += `${icon} **${t.title}** [${t.role}]` + if (t.qa_score) contextMd += ` Score: ${t.qa_score}/10 (${t.qa_verdict})` + contextMd += ` ${t.findings || ''}\n\n` + } +} + +contextMd += `## QA Summary\n\n` +const qaResults = tasks.filter(t => t.role === 'qa' && t.qa_score) +for (const q of qaResults) { + contextMd += `- **${q.id}**: Score ${q.qa_score}/10 - ${q.qa_verdict}\n` +} + +Write(`${sessionFolder}/context.md`, contextMd) +console.log(`Results exported to: ${sessionFolder}/results.csv`) +console.log(`Report generated at: ${sessionFolder}/context.md`) +``` + +**Success Criteria**: +- results.csv exported (all tasks, both modes) +- context.md generated with QA summary +- Summary displayed to user + +--- + +## Shared Discovery Board Protocol + +All agents (csv-wave and interactive) share a single `discoveries.ndjson` file for cross-task knowledge exchange. + +**Format**: One JSON object per line (NDJSON): + +```jsonl +{"ts":"2026-03-08T10:00:00Z","worker":"ANALYZE-001","type":"tech_stack_detected","data":{"stack":"react","framework":"nextjs","ui_lib":"shadcn"}} +{"ts":"2026-03-08T10:05:00Z","worker":"ARCH-001","type":"token_generated","data":{"category":"color","count":24,"supports_dark_mode":true}} +{"ts":"2026-03-08T10:10:00Z","worker":"DEV-001","type":"file_modified","data":{"file":"src/styles/tokens.css","change":"Generated CSS custom properties","lines_added":85}} +{"ts":"2026-03-08T10:15:00Z","worker":"QA-001","type":"issue_found","data":{"file":"src/components/Button.tsx","line":42,"severity":"high","description":"Missing cursor-pointer on button"}} +``` + +**Discovery Types**: + +| Type | Data Schema | Description | +|------|-------------|-------------| +| `tech_stack_detected` | `{stack, framework, ui_lib}` | Tech stack identified by analyst | +| `design_pattern_found` | `{pattern_name, location, description}` | Existing design pattern in codebase | +| `token_generated` | `{category, count, supports_dark_mode}` | Design token category created | +| `file_modified` | `{file, change, lines_added}` | File change recorded | +| `issue_found` | `{file, line, severity, description}` | QA issue discovered | +| `anti_pattern_violation` | `{pattern, file, line, description}` | Industry anti-pattern detected | +| `artifact_produced` | `{name, path, producer, type}` | Deliverable created | + +**Protocol**: +1. Agents MUST read discoveries.ndjson at start of execution +2. Agents MUST append relevant discoveries during execution +3. Agents MUST NOT modify or delete existing entries +4. Deduplication by `{type, data.file}` key + +--- + +## Pipeline Definitions + +### Page Mode (4 tasks, linear) + +``` +ANALYZE-001 --> ARCH-001 --> DEV-001 --> QA-001 +[analyst] [architect] [developer] [qa:code-review] + wave 1 wave 2 wave 3 wave 4 +``` + +### Feature Mode (5 tasks, with architecture review gate) + +``` +ANALYZE-001 --> ARCH-001 --> QA-001 --> DEV-001 --> QA-002 +[analyst] [architect] [qa:arch] [developer] [qa:code-review] + wave 1 wave 2 wave 3 wave 4 wave 5 +``` + +### System Mode (7 tasks, dual-track parallel) + +``` +ANALYZE-001 --> ARCH-001 --> QA-001 --> ARCH-002 --+ +[analyst] [architect] [qa:arch] [architect] | + DEV-001 --+--> QA-002 --> DEV-002 --> QA-003 + [dev:tokens] [qa] [dev:comp] [qa:final] + wave 1 wave 2 wave 3 wave 4 wave 5 wave 6 wave 7 +``` + +### Generator-Critic Loop (developer <-> qa) + +``` +developer (Generator) -> QA artifact -> qa (Critic) + <- QA feedback <- + (max 2 rounds) + +Convergence: qa.score >= 8 && qa.critical_count === 0 +``` + +--- + +## Error Handling + +| Error | Resolution | +|-------|------------| +| Circular dependency | Detect in wave computation, abort with error message | +| CSV agent timeout | Mark as failed in results, continue with wave | +| CSV agent failed | Mark as failed, skip dependent tasks in later waves | +| Interactive agent timeout | Urge convergence via send_input, then close if still timed out | +| Interactive agent failed | Mark as failed, skip dependents | +| All agents in wave failed | Log error, offer retry or abort | +| CSV parse error | Validate CSV format before execution, show line number | +| discoveries.ndjson corrupt | Ignore malformed lines, continue with valid entries | +| QA score < 6 over 2 GC rounds | Escalate to user for manual intervention | +| ui-ux-pro-max unavailable | Degrade to LLM general design knowledge | +| Task description too vague | AskUserQuestion for clarification in Phase 0 | +| Continue mode: no session found | List available sessions, prompt user to select | + +--- + +## Core Rules + +1. **Start Immediately**: First action is session initialization, then Phase 0/1 +2. **Wave Order is Sacred**: Never execute wave N before wave N-1 completes and results are merged +3. **CSV is Source of Truth**: Master tasks.csv holds all state (both csv-wave and interactive) +4. **CSV First**: Default to csv-wave for tasks; only use interactive when interaction pattern requires it +5. **Context Propagation**: prev_context built from master CSV, not from memory +6. **Discovery Board is Append-Only**: Never clear, modify, or recreate discoveries.ndjson +7. **Skip on Failure**: If a dependency failed, skip the dependent task +8. **GC Loop Cap**: Max 2 generator-critic rounds between developer and qa +9. **Cleanup Temp Files**: Remove wave-{N}.csv after results are merged +10. **DO NOT STOP**: Continuous execution until all waves complete or all remaining tasks are skipped diff --git a/.codex/skills/team-frontend/agents/completion-handler.md b/.codex/skills/team-frontend/agents/completion-handler.md new file mode 100644 index 00000000..542d1c05 --- /dev/null +++ b/.codex/skills/team-frontend/agents/completion-handler.md @@ -0,0 +1,131 @@ +# Completion Handler Agent + +Interactive agent for handling pipeline completion action. Presents deliverables summary and offers Archive/Keep/Export choices. + +## Identity + +- **Type**: `interactive` +- **Role File**: `agents/completion-handler.md` +- **Responsibility**: Present pipeline results, handle completion choice, execute cleanup or export + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Read all task results from master CSV +- Present complete deliverables listing +- Wait for user choice before acting +- Produce structured output following template + +### MUST NOT + +- Skip the MANDATORY FIRST STEPS role loading +- Delete session files without user approval +- Modify task artifacts +- Produce unstructured output + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | built-in | Load task results and artifacts | +| `AskUserQuestion` | built-in | Get user completion choice | +| `Write` | built-in | Store completion result | +| `Bash` | built-in | Execute archive/export operations | + +--- + +## Execution + +### Phase 1: Results Loading + +**Objective**: Load all task results and build deliverables inventory + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| tasks.csv | Yes | Master state with all task results | +| Artifact files | No | Verify deliverables exist | + +**Steps**: + +1. Read master tasks.csv +2. Parse all completed tasks and their artifacts +3. Build deliverables inventory +4. Calculate pipeline statistics + +**Output**: Deliverables summary ready for user + +--- + +### Phase 2: Completion Choice + +**Objective**: Present results and get user action + +**Steps**: + +1. Display pipeline summary with deliverables +2. Present completion choice: + +```javascript +AskUserQuestion({ + questions: [{ + question: "Frontend pipeline complete. What would you like to do?", + header: "Completion", + multiSelect: false, + options: [ + { label: "Archive & Clean (Recommended)", description: "Archive session, output final summary" }, + { label: "Keep Active", description: "Keep session for follow-up work" }, + { label: "Export Results", description: "Export design tokens, component specs, and QA audits" } + ] + }] +}) +``` + +3. Handle response: + +| Response | Action | +|----------|--------| +| Archive & Clean | Mark session as completed, output final summary | +| Keep Active | Mark session as paused, keep all artifacts | +| Export Results | Copy key artifacts to project directory | + +**Output**: Completion action result + +--- + +## Structured Output Template + +``` +## Summary +- Pipeline completed: tasks +- Status: +- QA final score: /10 + +## Deliverables +- Design Intelligence: +- Design Tokens: +- Component Specs: +- QA Audits: +- Implementation: files + +## Action Taken +- Choice: +- Session status: +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| tasks.csv not found | Report error, cannot complete | +| Artifacts missing | Report partial completion with gaps noted | +| User does not respond | Timeout, default to keep active | diff --git a/.codex/skills/team-frontend/agents/qa-gate-reviewer.md b/.codex/skills/team-frontend/agents/qa-gate-reviewer.md new file mode 100644 index 00000000..86ce7d40 --- /dev/null +++ b/.codex/skills/team-frontend/agents/qa-gate-reviewer.md @@ -0,0 +1,153 @@ +# QA Gate Reviewer Agent + +Interactive agent for reviewing QA audit verdicts and handling the Generator-Critic (GC) loop decision. Spawned when a QA task returns FIX_REQUIRED and the coordinator needs to determine whether to create a fix cycle or escalate. + +## Identity + +- **Type**: `interactive` +- **Role File**: `agents/qa-gate-reviewer.md` +- **Responsibility**: Review QA audit verdicts, handle architecture review gates, manage GC loop decisions + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Read the QA audit report being reviewed +- Present a clear summary of QA findings to the user +- Wait for user decision before proceeding (if not AUTO_YES) +- Produce structured output following template +- Include file:line references in findings + +### MUST NOT + +- Skip the MANDATORY FIRST STEPS role loading +- Auto-approve FIX_REQUIRED verdicts without checking GC round count +- Modify QA audit artifacts directly +- Produce unstructured output +- Exceed defined scope boundaries + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | built-in | Load QA audit reports and context | +| `AskUserQuestion` | built-in | Get user decision on QA gate | +| `Write` | built-in | Store review result | + +### Tool Usage Patterns + +**Read Pattern**: Load context files before review +``` +Read("/artifacts/qa/audit-*.md") +Read("/discoveries.ndjson") +``` + +**Write Pattern**: Store review result +``` +Write("/interactive/-result.json", ) +``` + +--- + +## Execution + +### Phase 1: Context Loading + +**Objective**: Load QA audit report and GC loop state + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| QA audit report | Yes | The audit document to review | +| discoveries.ndjson | No | Shared discoveries for context | +| Master CSV (tasks.csv) | No | For GC round tracking | + +**Steps**: + +1. Extract session path from task assignment +2. Read the QA audit report referenced in the task description +3. Read discoveries.ndjson for additional context +4. Check current GC round count from session state + +**Output**: QA verdict summary ready for review + +--- + +### Phase 2: Gate Decision + +**Objective**: Determine next action based on QA verdict + +**Steps**: + +1. Parse QA verdict from audit report: + +| Verdict | GC Round | Action | +|---------|----------|--------| +| PASSED | any | Report approved, no fix cycle needed | +| PASSED_WITH_WARNINGS | any | Report approved with warnings noted | +| FIX_REQUIRED | < 2 | Create DEV-fix + QA-recheck tasks | +| FIX_REQUIRED | >= 2 | Escalate to user for manual intervention | + +2. If escalation needed, present choice: + +```javascript +AskUserQuestion({ + questions: [{ + question: "QA has flagged issues after 2 fix rounds. How would you like to proceed?", + header: "QA Gate", + multiSelect: false, + options: [ + { label: "Accept current state", description: "Proceed despite remaining issues" }, + { label: "Manual fix", description: "You will fix the issues manually" }, + { label: "Abort pipeline", description: "Stop the pipeline" } + ] + }] +}) +``` + +3. Handle response accordingly + +**Output**: Gate decision with action directive + +--- + +## Structured Output Template + +``` +## Summary +- QA audit reviewed: +- Verdict: +- Score: /10 +- Decision: + +## Findings +- Critical issues: +- High issues: +- Medium issues: +- Low issues: + +## Decision Details +- GC round: / +- Action: +- User feedback: + +## Open Questions +1. Any unresolved items from review +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| QA audit report not found | Report in Open Questions, ask for path | +| GC round state missing | Default to round 0 | +| User does not respond | Timeout, report partial with "awaiting-review" status | +| Processing failure | Output partial results with clear status indicator | diff --git a/.codex/skills/team-frontend/instructions/agent-instruction.md b/.codex/skills/team-frontend/instructions/agent-instruction.md new file mode 100644 index 00000000..1e0fd72d --- /dev/null +++ b/.codex/skills/team-frontend/instructions/agent-instruction.md @@ -0,0 +1,197 @@ +# Agent Instruction Template -- Team Frontend + +Base instruction template for CSV wave agents. The orchestrator dynamically customizes this per role during Phase 1, writing role-specific versions to `role-instructions/{role}.md`. + +## Purpose + +| Phase | Usage | +|-------|-------| +| Phase 1 | Coordinator generates per-role instruction from this template | +| Phase 2 | Injected as `instruction` parameter to `spawn_agents_on_csv` | + +--- + +## Base Instruction Template + +```markdown +## TASK ASSIGNMENT -- Team Frontend + +### MANDATORY FIRST STEPS +1. Read shared discoveries: /discoveries.ndjson (if exists, skip if not) +2. Read project context: .workflow/project-tech.json (if exists) + +--- + +## Your Task + +**Task ID**: {id} +**Title**: {title} +**Role**: {role} +**Pipeline Mode**: {pipeline_mode} +**Scope**: {scope} +**Review Type**: {review_type} + +### Task Description +{description} + +### Previous Tasks' Findings (Context) +{prev_context} + +--- + +## Execution Protocol + +1. **Read discoveries**: Load /discoveries.ndjson for shared exploration findings +2. **Use context**: Apply previous tasks' findings from prev_context above +3. **Execute task**: Follow role-specific instructions below +4. **Share discoveries**: Append exploration findings to shared board: + ```bash + echo '{"ts":"","worker":"{id}","type":"","data":{...}}' >> /discoveries.ndjson + ``` +5. **Report result**: Return JSON via report_agent_job_result + +### Discovery Types to Share +- `tech_stack_detected`: {stack, framework, ui_lib} -- Tech stack identification +- `design_pattern_found`: {pattern_name, location, description} -- Existing design pattern +- `token_generated`: {category, count, supports_dark_mode} -- Design token category created +- `file_modified`: {file, change, lines_added} -- File change performed +- `issue_found`: {file, line, severity, description} -- Issue discovered +- `anti_pattern_violation`: {pattern, file, line, description} -- Anti-pattern detected +- `artifact_produced`: {name, path, producer, type} -- Deliverable created + +--- + +## Output (report_agent_job_result) + +Return JSON: +{ + "id": "{id}", + "status": "completed" | "failed", + "findings": "Key discoveries and implementation notes (max 500 chars)", + "artifacts_produced": "semicolon-separated paths of produced files", + "qa_score": "", + "qa_verdict": "", + "error": "" +} +``` + +--- + +## Role-Specific Customization + +The coordinator generates per-role instruction variants during Phase 1. Each variant adds role-specific execution guidance to Step 3. + +### For Analyst Role + +``` +3. **Execute**: + - Detect tech stack from package.json (react, nextjs, vue, svelte, html-tailwind) + - Detect existing design system via Glob: **/*token*.*, **/*.css + - Retrieve design intelligence via ui-ux-pro-max skill: + - Full design system: Skill(skill="ui-ux-pro-max", args=" --design-system") + - UX guidelines: Skill(skill="ui-ux-pro-max", args="accessibility animation responsive --domain ux") + - Tech stack guide: Skill(skill="ui-ux-pro-max", args=" --stack ") + - Fallback if skill unavailable: generate from LLM general design knowledge + - Analyze existing codebase patterns (color palette, typography, spacing, components) + - Optional: WebSearch for " web design trends best practices" + - Compile design-intelligence.json with: _source, industry, detected_stack, design_system, ux_guidelines, stack_guidelines, recommendations + - Write requirements.md summarizing all requirements + - Output to /artifacts/analysis/ +``` + +### For Architect Role + +``` +3. **Execute**: + - Load design-intelligence.json from analyst output + - Generate design token system (design-tokens.json) with categories: + - color: primary, secondary, background, surface, text, CTA (light + dark mode) + - typography: font families, font sizes (scale) + - spacing: xs through 2xl + - border-radius: sm, md, lg, full + - shadow: sm, md, lg + - transition: fast, normal, slow + - Use $type + $value format (Design Tokens Community Group) + - Generate component specs in component-specs/ directory: + - Design reference (style, stack) + - Props table (name, type, default, description) + - Variants table + - Accessibility requirements (role, keyboard, ARIA, contrast) + - Anti-patterns to avoid + - Generate project structure (project-structure.md) using stack-specific layout + - Output to /artifacts/architecture/ +``` + +### For Developer Role + +``` +3. **Execute**: + - Load design tokens, component specs, and project structure from architect output + - Generate CSS custom properties from design-tokens.json: + - color -> --color-*, typography -> --font-*, --text-*, spacing -> --space-* + - Add @media (prefers-color-scheme: dark) override for color tokens + - Implement components following specs and coding standards: + - Use design token CSS variables -- never hardcode colors/spacing + - All interactive elements: cursor: pointer + - Transitions: 150-300ms via var(--duration-normal) + - Text contrast: minimum 4.5:1 ratio + - Include focus-visible styles for keyboard navigation + - Support prefers-reduced-motion + - Responsive: mobile-first with md/lg breakpoints + - No emoji as functional icons + - Self-validate: scan for hardcoded colors, missing cursor-pointer, missing focus styles + - Auto-fix where possible + - Output to src/ directory (codebase files) + implementation summary +``` + +### For QA Role + +``` +3. **Execute**: + - Load design intelligence and design tokens for compliance checks + - Collect files to review based on review_type: + - architecture-review: /artifacts/architecture/**/* + - code-review: src/**/*.{tsx,jsx,vue,svelte,html,css} + - final: src/**/*.{tsx,jsx,vue,svelte,html,css} + - Execute 5-dimension audit: + - Dim 1 Code Quality (0.20): file length, console.log, empty catch, unused imports + - Dim 2 Accessibility (0.25): alt text, labels, headings, focus styles, ARIA + - Dim 3 Design Compliance (0.20): hardcoded colors, spacing, anti-patterns + - Dim 4 UX Best Practices (0.20): cursor-pointer, transitions, responsive, states + - Dim 5 Pre-Delivery (0.15): final checklist (code-review/final types only) + - Calculate weighted score: sum(dimension_score * weight) + - Determine verdict: score >= 8 AND critical == 0 -> PASSED; score >= 6 AND critical == 0 -> PASSED_WITH_WARNINGS; else -> FIX_REQUIRED + - Write audit report to /artifacts/qa/ + - Set qa_score and qa_verdict in output +``` + +--- + +## Quality Requirements + +All agents must verify before reporting complete: + +| Requirement | Criteria | +|-------------|----------| +| Files produced | Verify all claimed artifacts exist via Read | +| Files modified | Verify content actually changed | +| Findings accuracy | Findings reflect actual work done | +| Discovery sharing | At least 1 discovery shared to board | +| Error reporting | Non-empty error field if status is failed | +| QA fields | qa_score and qa_verdict set for QA role tasks | + +--- + +## Placeholder Reference + +| Placeholder | Resolved By | When | +|-------------|------------|------| +| `` | Skill designer (Phase 1) | Literal path baked into instruction | +| `{id}` | spawn_agents_on_csv | Runtime from CSV row | +| `{title}` | spawn_agents_on_csv | Runtime from CSV row | +| `{description}` | spawn_agents_on_csv | Runtime from CSV row | +| `{role}` | spawn_agents_on_csv | Runtime from CSV row | +| `{pipeline_mode}` | spawn_agents_on_csv | Runtime from CSV row | +| `{scope}` | spawn_agents_on_csv | Runtime from CSV row | +| `{review_type}` | spawn_agents_on_csv | Runtime from CSV row | +| `{prev_context}` | spawn_agents_on_csv | Runtime from CSV row | diff --git a/.codex/skills/team-frontend/schemas/tasks-schema.md b/.codex/skills/team-frontend/schemas/tasks-schema.md new file mode 100644 index 00000000..5467d069 --- /dev/null +++ b/.codex/skills/team-frontend/schemas/tasks-schema.md @@ -0,0 +1,188 @@ +# Team Frontend -- CSV Schema + +## Master CSV: tasks.csv + +### Column Definitions + +#### Input Columns (Set by Decomposer) + +| Column | Type | Required | Description | Example | +|--------|------|----------|-------------|---------| +| `id` | string | Yes | Unique task identifier (PREFIX-NNN) | `"ANALYZE-001"` | +| `title` | string | Yes | Short task title | `"Requirement analysis + design intelligence"` | +| `description` | string | Yes | Detailed task description (self-contained) with PURPOSE/TASK/CONTEXT/EXPECTED/CONSTRAINTS | `"PURPOSE: Analyze frontend requirements..."` | +| `role` | enum | Yes | Worker role: `analyst`, `architect`, `developer`, `qa` | `"analyst"` | +| `pipeline_mode` | enum | Yes | Pipeline mode: `page`, `feature`, `system` | `"feature"` | +| `scope` | enum | Yes | Task scope: `full`, `tokens`, `components` | `"full"` | +| `review_type` | string | No | QA review type: `architecture-review`, `code-review`, `final` (empty for non-QA tasks) | `"code-review"` | +| `deps` | string | No | Semicolon-separated dependency task IDs | `"ANALYZE-001"` | +| `context_from` | string | No | Semicolon-separated task IDs for context | `"ANALYZE-001"` | +| `exec_mode` | enum | Yes | Execution mechanism: `csv-wave` or `interactive` | `"csv-wave"` | + +#### Computed Columns (Set by Wave Engine) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `wave` | integer | Wave number (1-based, from topological sort) | `2` | +| `prev_context` | string | Aggregated findings from context_from tasks (per-wave CSV only) | `"[ANALYZE-001] Detected React + shadcn stack..."` | + +#### Output Columns (Set by Agent) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `status` | enum | `pending` -> `completed` / `failed` / `skipped` | `"completed"` | +| `findings` | string | Key discoveries (max 500 chars) | `"Generated 24 color tokens with dark mode..."` | +| `artifacts_produced` | string | Semicolon-separated paths of produced artifacts | `"artifacts/analysis/design-intelligence.json;artifacts/analysis/requirements.md"` | +| `qa_score` | string | QA weighted score 0-10 (empty for non-QA tasks) | `"8.5"` | +| `qa_verdict` | enum | `PASSED`, `PASSED_WITH_WARNINGS`, `FIX_REQUIRED` (empty for non-QA) | `"PASSED"` | +| `error` | string | Error message if failed | `""` | + +--- + +### exec_mode Values + +| Value | Mechanism | Description | +|-------|-----------|-------------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot batch execution within wave | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round individual execution | + +Interactive tasks appear in master CSV for dependency tracking but are NOT included in wave-{N}.csv files. + +--- + +### Role Prefixes + +| Role | Prefix | Responsibility Type | +|------|--------|---------------------| +| analyst | ANALYZE | read-only (analysis + design intelligence retrieval) | +| architect | ARCH | orchestration (design token + component spec generation) | +| developer | DEV | code-gen (implementation from specs) | +| qa | QA | validation (5-dimension quality audit) | + +--- + +### Example Data + +```csv +id,title,description,role,pipeline_mode,scope,review_type,deps,context_from,exec_mode,wave,status,findings,artifacts_produced,qa_score,qa_verdict,error +"ANALYZE-001","Requirement analysis","PURPOSE: Analyze frontend requirements and retrieve design intelligence via ui-ux-pro-max | Success: design-intelligence.json produced with industry-specific recommendations\nTASK:\n- Detect tech stack and existing design system\n- Retrieve design intelligence\n- Compile design-intelligence.json and requirements.md\nCONTEXT:\n- Session: .workflow/.csv-wave/fe-saas-dashboard-20260308\n- Industry: saas\nEXPECTED: artifacts/analysis/design-intelligence.json + requirements.md","analyst","feature","full","","","","csv-wave","1","pending","","","","","" +"ARCH-001","Design token system + architecture","PURPOSE: Define design token system and component architecture | Success: design-tokens.json + component specs\nTASK:\n- Load design intelligence from analyst\n- Generate design tokens (colors, typography, spacing, shadows)\n- Define component specs\n- Generate project structure\nCONTEXT:\n- Session: .workflow/.csv-wave/fe-saas-dashboard-20260308\n- Upstream: design-intelligence.json\nEXPECTED: artifacts/architecture/design-tokens.json + component-specs/ + project-structure.md","architect","feature","full","","ANALYZE-001","ANALYZE-001","csv-wave","2","pending","","","","","" +"QA-001","Architecture review","PURPOSE: Review architecture before development | Success: Architecture approved with score >= 8\nTASK:\n- Load design intelligence and tokens\n- Execute 5-dimension audit on architecture\n- Calculate score and verdict\nCONTEXT:\n- Session: .workflow/.csv-wave/fe-saas-dashboard-20260308\n- Review type: architecture-review\nEXPECTED: artifacts/qa/audit-001.md","qa","feature","full","architecture-review","ARCH-001","ARCH-001","csv-wave","3","pending","","","","","" +"DEV-001","Frontend implementation","PURPOSE: Implement frontend from architecture artifacts | Success: All planned files implemented\nTASK:\n- Generate CSS custom properties from design tokens\n- Implement components following specs\n- Self-validate: no hardcoded colors, cursor-pointer, focus styles\nCONTEXT:\n- Session: .workflow/.csv-wave/fe-saas-dashboard-20260308\n- Upstream: design-tokens.json, component-specs/\nEXPECTED: src/styles/tokens.css + component files","developer","feature","full","","QA-001","QA-001;ARCH-001","csv-wave","4","pending","","","","","" +"QA-002","Code review","PURPOSE: Execute 5-dimension quality audit | Success: Score >= 8 with 0 critical issues\nTASK:\n- Execute full audit (code quality, accessibility, design compliance, UX, pre-delivery)\n- Calculate weighted score\nCONTEXT:\n- Session: .workflow/.csv-wave/fe-saas-dashboard-20260308\n- Review type: code-review\nEXPECTED: artifacts/qa/audit-002.md","qa","feature","full","code-review","DEV-001","DEV-001","csv-wave","5","pending","","","","","" +``` + +--- + +### Column Lifecycle + +``` +Decomposer (Phase 1) Wave Engine (Phase 2) Agent (Execution) +--------------------- -------------------- ----------------- +id ----------> id ----------> id +title ----------> title ----------> (reads) +description ----------> description ----------> (reads) +role ----------> role ----------> (reads) +pipeline_mode ---------> pipeline_mode ---------> (reads) +scope ----------> scope ----------> (reads) +review_type ----------> review_type ----------> (reads) +deps ----------> deps ----------> (reads) +context_from----------> context_from----------> (reads) +exec_mode ----------> exec_mode ----------> (reads) + wave ----------> (reads) + prev_context ----------> (reads) + status + findings + artifacts_produced + qa_score + qa_verdict + error +``` + +--- + +## Output Schema (JSON) + +Agent output via `report_agent_job_result` (csv-wave tasks): + +```json +{ + "id": "ARCH-001", + "status": "completed", + "findings": "Generated design token system with 24 color tokens (light+dark), 7 typography scales, 6 spacing values. Created 3 component specs: Button, Card, Header. Project structure follows Next.js app router convention.", + "artifacts_produced": "artifacts/architecture/design-tokens.json;artifacts/architecture/component-specs/button.md;artifacts/architecture/project-structure.md", + "qa_score": "", + "qa_verdict": "", + "error": "" +} +``` + +QA agent output example: + +```json +{ + "id": "QA-001", + "status": "completed", + "findings": "Architecture review: 8.2/10. Color tokens complete, typography scale follows best practices. Minor: missing border-radius for 'pill' variant.", + "artifacts_produced": "artifacts/qa/audit-001.md", + "qa_score": "8.2", + "qa_verdict": "PASSED", + "error": "" +} +``` + +Interactive tasks output via structured text or JSON written to `interactive/{id}-result.json`. + +--- + +## Discovery Types + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `tech_stack_detected` | `data.stack` | `{stack, framework, ui_lib}` | Tech stack identified | +| `design_pattern_found` | `data.pattern_name+data.location` | `{pattern_name, location, description}` | Existing design pattern | +| `token_generated` | `data.category` | `{category, count, supports_dark_mode}` | Design token category created | +| `file_modified` | `data.file` | `{file, change, lines_added}` | File change recorded | +| `issue_found` | `data.file+data.line` | `{file, line, severity, description}` | QA issue discovered | +| `anti_pattern_violation` | `data.pattern+data.file` | `{pattern, file, line, description}` | Industry anti-pattern detected | +| `artifact_produced` | `data.path` | `{name, path, producer, type}` | Deliverable created | + +### Discovery NDJSON Format + +```jsonl +{"ts":"2026-03-08T10:00:00Z","worker":"ANALYZE-001","type":"tech_stack_detected","data":{"stack":"react","framework":"nextjs","ui_lib":"shadcn"}} +{"ts":"2026-03-08T10:05:00Z","worker":"ARCH-001","type":"token_generated","data":{"category":"color","count":24,"supports_dark_mode":true}} +{"ts":"2026-03-08T10:10:00Z","worker":"DEV-001","type":"file_modified","data":{"file":"src/styles/tokens.css","change":"Generated CSS custom properties from design tokens","lines_added":85}} +{"ts":"2026-03-08T10:15:00Z","worker":"QA-001","type":"issue_found","data":{"file":"src/components/Button.tsx","line":42,"severity":"high","description":"Missing cursor-pointer on interactive button element"}} +``` + +> Both csv-wave and interactive agents read/write the same discoveries.ndjson file. + +--- + +## Cross-Mechanism Context Flow + +| Source | Target | Mechanism | +|--------|--------|-----------| +| CSV task findings | Interactive task | Injected via spawn message or send_input | +| Interactive task result | CSV task prev_context | Read from interactive/{id}-result.json | +| Any agent discovery | Any agent | Shared via discoveries.ndjson | + +--- + +## Validation Rules + +| Rule | Check | Error | +|------|-------|-------| +| Unique IDs | No duplicate `id` values | "Duplicate task ID: {id}" | +| Valid deps | All dep IDs exist in tasks | "Unknown dependency: {dep_id}" | +| No self-deps | Task cannot depend on itself | "Self-dependency: {id}" | +| No circular deps | Topological sort completes | "Circular dependency detected involving: {ids}" | +| context_from valid | All context IDs exist and in earlier waves | "Invalid context_from: {id}" | +| exec_mode valid | Value is `csv-wave` or `interactive` | "Invalid exec_mode: {value}" | +| Description non-empty | Every task has description | "Empty description for task: {id}" | +| Status enum | status in {pending, completed, failed, skipped} | "Invalid status: {status}" | +| Role valid | role in {analyst, architect, developer, qa} | "Invalid role: {role}" | +| Pipeline mode valid | pipeline_mode in {page, feature, system} | "Invalid pipeline_mode: {mode}" | +| QA verdict valid | qa_verdict in {PASSED, PASSED_WITH_WARNINGS, FIX_REQUIRED, ""} | "Invalid qa_verdict: {verdict}" | +| Cross-mechanism deps | Interactive to CSV deps resolve correctly | "Cross-mechanism dependency unresolvable: {id}" | diff --git a/.codex/skills/team-issue/SKILL.md b/.codex/skills/team-issue/SKILL.md new file mode 100644 index 00000000..231261f3 --- /dev/null +++ b/.codex/skills/team-issue/SKILL.md @@ -0,0 +1,702 @@ +--- +name: team-issue +description: Hybrid team skill for issue resolution. CSV wave primary for exploration, planning, integration, and implementation. Interactive agents for review gates with fix cycles. Supports Quick, Full, and Batch pipelines. +argument-hint: "[-y|--yes] [-c|--concurrency N] [--continue] [--mode=quick|full|batch] \"issue-ids or --all-pending\"" +allowed-tools: spawn_agents_on_csv, spawn_agent, wait, send_input, close_agent, Read, Write, Edit, Bash, Glob, Grep, AskUserQuestion +--- + +## Auto Mode + +When `--yes` or `-y`: Auto-confirm task decomposition, skip interactive validation, use defaults. + +# Team Issue Resolution + +## Usage + +```bash +$team-issue "ISS-20260308-120000 ISS-20260308-120001" +$team-issue -c 4 "ISS-20260308-120000 --mode=full" +$team-issue -y "--all-pending" +$team-issue --continue "issue-auth-fix-20260308" +``` + +**Flags**: +- `-y, --yes`: Skip all confirmations (auto mode) +- `-c, --concurrency N`: Max concurrent agents within each wave (default: 3) +- `--continue`: Resume existing session +- `--mode=quick|full|batch`: Force pipeline mode (default: auto-detect) + +**Output Directory**: `.workflow/.csv-wave/{session-id}/` +**Core Output**: `tasks.csv` (master state) + `results.csv` (final) + `discoveries.ndjson` (shared exploration) + `context.md` (human-readable report) + +--- + +## Overview + +Orchestrate issue resolution pipeline: explore context, plan solution, review (optional), marshal queue, implement. Supports Quick, Full, and Batch pipelines with review-fix cycle. + +**Execution Model**: Hybrid -- CSV wave pipeline (primary) + individual agent spawn (secondary) + +``` ++---------------------------------------------------------------------------+ +| TEAM ISSUE RESOLUTION WORKFLOW | ++---------------------------------------------------------------------------+ +| | +| Phase 1: Requirement Parsing + Pipeline Selection | +| +-- Parse issue IDs (GH-\d+, ISS-\d{8}-\d{6}, --all-pending) | +| +-- Auto-detect pipeline mode (quick/full/batch) | +| +-- Determine execution method (codex/gemini/auto) | +| +-- Generate tasks.csv with wave + exec_mode columns | +| +-- User validates task breakdown (skip if -y) | +| | +| Phase 2: Wave Execution Engine (Extended) | +| +-- For each wave (1..N): | +| | +-- Execute pre-wave interactive tasks (if any) | +| | +-- Build wave CSV (filter csv-wave tasks for this wave) | +| | +-- Inject previous findings into prev_context column | +| | +-- spawn_agents_on_csv(wave CSV) | +| | +-- Execute post-wave interactive tasks (if any) | +| | +-- Merge all results into master tasks.csv | +| | +-- Check: any failed? -> skip dependents | +| +-- discoveries.ndjson shared across all modes (append-only) | +| | +| Phase 3: Post-Wave Interactive (Review Gate) | +| +-- Reviewer agent: multi-dimensional review with verdict | +| +-- Fix cycle: rejected -> revise solution -> re-review (max 2) | +| +-- Final aggregation / report | +| | +| Phase 4: Results Aggregation | +| +-- Export final results.csv | +| +-- Generate context.md with all findings | +| +-- Display summary: completed/failed/skipped per wave | +| +-- Offer: view results | retry failed | done | +| | ++---------------------------------------------------------------------------+ +``` + +--- + +## Task Classification Rules + +Each task is classified by `exec_mode`: + +| exec_mode | Mechanism | Criteria | +|-----------|-----------|----------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot, structured I/O, no multi-round interaction | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round, clarification, review gates | + +**Classification Decision**: + +| Task Property | Classification | +|---------------|---------------| +| Codebase exploration (EXPLORE-*) | `csv-wave` | +| Solution planning (SOLVE-*) | `csv-wave` | +| Queue formation / integration (MARSHAL-*) | `csv-wave` | +| Code implementation (BUILD-*) | `csv-wave` | +| Technical review with verdict (AUDIT-*) | `interactive` | +| Solution revision after rejection (SOLVE-fix-*) | `csv-wave` | + +--- + +## CSV Schema + +### tasks.csv (Master State) + +```csv +id,title,description,role,issue_ids,exec_mode,execution_method,deps,context_from,wave,status,findings,artifact_path,error +"EXPLORE-001","Context analysis","Analyze issue context and map codebase impact for ISS-20260308-120000","explorer","ISS-20260308-120000","csv-wave","","","","1","pending","","","","" +"SOLVE-001","Solution design","Design solution and decompose into implementation tasks","planner","ISS-20260308-120000","csv-wave","","EXPLORE-001","EXPLORE-001","2","pending","","","","" +"AUDIT-001","Technical review","Review solution for feasibility, risk, and completeness","reviewer","ISS-20260308-120000","interactive","","SOLVE-001","SOLVE-001","3","pending","","","","" +"MARSHAL-001","Queue formation","Form execution queue with conflict detection","integrator","ISS-20260308-120000","csv-wave","","AUDIT-001","SOLVE-001","4","pending","","","","" +"BUILD-001","Implementation","Implement solution plan and verify with tests","implementer","ISS-20260308-120000","csv-wave","gemini","MARSHAL-001","EXPLORE-001;SOLVE-001","5","pending","","","","" +``` + +**Columns**: + +| Column | Phase | Description | +|--------|-------|-------------| +| `id` | Input | Unique task identifier (EXPLORE-NNN, SOLVE-NNN, AUDIT-NNN, MARSHAL-NNN, BUILD-NNN) | +| `title` | Input | Short task title | +| `description` | Input | Detailed task description | +| `role` | Input | Worker role: explorer, planner, reviewer, integrator, implementer | +| `issue_ids` | Input | Semicolon-separated issue IDs this task covers | +| `exec_mode` | Input | `csv-wave` or `interactive` | +| `execution_method` | Input | codex, gemini, qwen, or empty (for non-BUILD tasks) | +| `deps` | Input | Semicolon-separated dependency task IDs | +| `context_from` | Input | Semicolon-separated task IDs whose findings this task needs | +| `wave` | Computed | Wave number (computed by topological sort, 1-based) | +| `status` | Output | `pending` -> `completed` / `failed` / `skipped` | +| `findings` | Output | Key discoveries or implementation notes (max 500 chars) | +| `artifact_path` | Output | Path to generated artifact (context report, solution, queue, etc.) | +| `error` | Output | Error message if failed (empty if success) | + +### Per-Wave CSV (Temporary) + +Each wave generates a temporary `wave-{N}.csv` with extra `prev_context` column (csv-wave tasks only). + +--- + +## Agent Registry (Interactive Agents) + +| Agent | Role File | Pattern | Responsibility | Position | +|-------|-----------|---------|----------------|----------| +| reviewer | agents/reviewer.md | 2.3 (structured review) | Multi-dimensional solution review with verdict | post-wave (after SOLVE wave) | + +> **COMPACT PROTECTION**: Agent files are execution documents. When context compression occurs, **you MUST immediately `Read` the corresponding agent.md** to reload. + +--- + +## Output Artifacts + +| File | Purpose | Lifecycle | +|------|---------|-----------| +| `tasks.csv` | Master state -- all tasks with status/findings | Updated after each wave | +| `wave-{N}.csv` | Per-wave input (temporary, csv-wave tasks only) | Created before wave, deleted after | +| `results.csv` | Final export of all task results | Created in Phase 4 | +| `discoveries.ndjson` | Shared exploration board (all agents, both modes) | Append-only, carries across waves | +| `context.md` | Human-readable execution report | Created in Phase 4 | +| `explorations/context-{issueId}.json` | Explorer context reports | Created by explorer agents | +| `solutions/solution-{issueId}.json` | Planner solution plans | Created by planner agents | +| `audits/audit-report.json` | Reviewer audit report | Created by reviewer agent | +| `queue/execution-queue.json` | Integrator execution queue | Created by integrator agent | +| `builds/build-{issueId}.json` | Implementer build results | Created by implementer agents | +| `interactive/{id}-result.json` | Results from interactive tasks | Created per interactive task | + +--- + +## Session Structure + +``` +.workflow/.csv-wave/{session-id}/ ++-- tasks.csv # Master state (all tasks, both modes) ++-- results.csv # Final results export ++-- discoveries.ndjson # Shared discovery board (all agents) ++-- context.md # Human-readable report ++-- wave-{N}.csv # Temporary per-wave input (csv-wave only) ++-- explorations/ # Explorer output +| +-- context-{issueId}.json ++-- solutions/ # Planner output +| +-- solution-{issueId}.json ++-- audits/ # Reviewer output +| +-- audit-report.json ++-- queue/ # Integrator output +| +-- execution-queue.json ++-- builds/ # Implementer output +| +-- build-{issueId}.json ++-- interactive/ # Interactive task artifacts +| +-- {id}-result.json ++-- wisdom/ # Cross-task knowledge + +-- learnings.md + +-- decisions.md + +-- conventions.md + +-- issues.md +``` + +--- + +## Implementation + +### Session Initialization + +```javascript +const getUtc8ISOString = () => new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString() + +const AUTO_YES = $ARGUMENTS.includes('--yes') || $ARGUMENTS.includes('-y') +const continueMode = $ARGUMENTS.includes('--continue') +const concurrencyMatch = $ARGUMENTS.match(/(?:--concurrency|-c)\s+(\d+)/) +const maxConcurrency = concurrencyMatch ? parseInt(concurrencyMatch[1]) : 3 + +const requirement = $ARGUMENTS + .replace(/--yes|-y|--continue|--concurrency\s+\d+|-c\s+\d+/g, '') + .trim() + +// Parse issue IDs +const issueIdPattern = /(?:GH-\d+|ISS-\d{8}-\d{6})/g +let issueIds = requirement.match(issueIdPattern) || [] + +// Parse mode override +const modeMatch = requirement.match(/--mode=(\w+)/) +let pipelineMode = modeMatch ? modeMatch[1] : null + +// Handle --all-pending +if (requirement.includes('--all-pending')) { + const result = Bash("ccw issue list --status registered,pending --json") + issueIds = JSON.parse(result).map(i => i.id) +} + +// If no issue IDs, ask user +if (issueIds.length === 0) { + const answer = AskUserQuestion("No issue IDs found. Please provide issue IDs (e.g., ISS-20260308-120000):") + issueIds = answer.match(issueIdPattern) || [] + if (issueIds.length === 0) return // abort +} + +// Auto-detect pipeline mode +if (!pipelineMode) { + // Load issue priorities + const priorities = [] + for (const id of issueIds) { + const info = JSON.parse(Bash(`ccw issue status ${id} --json`)) + priorities.push(info.priority || 0) + } + const hasHighPriority = priorities.some(p => p >= 4) + + if (issueIds.length <= 2 && !hasHighPriority) pipelineMode = 'quick' + else if (issueIds.length <= 4) pipelineMode = 'full' + else pipelineMode = 'batch' +} + +// Execution method selection +let executionMethod = 'gemini' // default +const execMatch = requirement.match(/--exec=(\w+)/) +if (execMatch) executionMethod = execMatch[1] + +const slug = issueIds[0].toLowerCase().replace(/[^a-z0-9]+/g, '-').substring(0, 30) +const dateStr = getUtc8ISOString().substring(0, 10).replace(/-/g, '') +const sessionId = `issue-${slug}-${dateStr}` +const sessionFolder = `.workflow/.csv-wave/${sessionId}` + +Bash(`mkdir -p ${sessionFolder}/{explorations,solutions,audits,queue,builds,interactive,wisdom}`) + +Write(`${sessionFolder}/discoveries.ndjson`, `# Discovery Board - ${sessionId}\n# Format: NDJSON\n`) + +// Initialize wisdom files +Write(`${sessionFolder}/wisdom/learnings.md`, `# Learnings\n\nAccumulated during ${sessionId}\n`) +Write(`${sessionFolder}/wisdom/decisions.md`, `# Decisions\n\n`) +Write(`${sessionFolder}/wisdom/conventions.md`, `# Conventions\n\n`) +Write(`${sessionFolder}/wisdom/issues.md`, `# Issues\n\n`) + +// Store session metadata +Write(`${sessionFolder}/session.json`, JSON.stringify({ + session_id: sessionId, + pipeline_mode: pipelineMode, + issue_ids: issueIds, + execution_method: executionMethod, + fix_cycles: 0, + max_fix_cycles: 2, + created_at: getUtc8ISOString() +}, null, 2)) +``` + +--- + +### Phase 1: Requirement -> CSV + Classification + +**Objective**: Parse issue IDs, determine pipeline mode, generate tasks.csv with wave and exec_mode assignments. + +**Decomposition Rules**: + +| Pipeline | Tasks Generated | +|----------|----------------| +| quick | EXPLORE-001, SOLVE-001, MARSHAL-001, BUILD-001 (4 tasks, waves 1-4) | +| full | EXPLORE-001, SOLVE-001, AUDIT-001, MARSHAL-001, BUILD-001 (5 tasks, waves 1-5) | +| batch | EXPLORE-001..N, SOLVE-001..N, AUDIT-001, MARSHAL-001, BUILD-001..M (N+N+1+1+M tasks) | + +**Classification Rules**: + +| Task Prefix | Role | exec_mode | Rationale | +|-------------|------|-----------|-----------| +| EXPLORE-* | explorer | csv-wave | One-shot codebase analysis | +| SOLVE-* | planner | csv-wave | One-shot solution design via CLI | +| SOLVE-fix-* | planner | csv-wave | One-shot revision addressing feedback | +| AUDIT-* | reviewer | interactive | Multi-round review with verdict routing | +| MARSHAL-* | integrator | csv-wave | One-shot queue formation | +| BUILD-* | implementer | csv-wave | One-shot implementation via CLI | + +**Wave Computation**: Kahn's BFS topological sort with depth tracking (csv-wave tasks only). + +**User Validation**: Display task breakdown with wave + exec_mode assignment (skip if AUTO_YES). + +**Task Generation by Pipeline Mode**: + +Quick pipeline: +```csv +id,title,description,role,issue_ids,exec_mode,execution_method,deps,context_from,wave,status,findings,artifact_path,error +"EXPLORE-001","Context analysis","Analyze issue context and map codebase impact","explorer","","csv-wave","","","","1","pending","","","" +"SOLVE-001","Solution design","Design solution and decompose into implementation tasks","planner","","csv-wave","","EXPLORE-001","EXPLORE-001","2","pending","","","" +"MARSHAL-001","Queue formation","Form execution queue with conflict detection and ordering","integrator","","csv-wave","","SOLVE-001","SOLVE-001","3","pending","","","" +"BUILD-001","Implementation","Implement solution plan and verify with tests","implementer","","csv-wave","","MARSHAL-001","EXPLORE-001;SOLVE-001","4","pending","","","" +``` + +Full pipeline (adds AUDIT-001 as interactive between SOLVE and MARSHAL): +```csv +"AUDIT-001","Technical review","Review solution for feasibility, risk, and completeness","reviewer","","interactive","","SOLVE-001","SOLVE-001","3","pending","","","" +"MARSHAL-001","Queue formation","...","integrator","","csv-wave","","AUDIT-001","SOLVE-001","4","pending","","","" +"BUILD-001","Implementation","...","implementer","","csv-wave","","MARSHAL-001","EXPLORE-001;SOLVE-001","5","pending","","","" +``` + +Batch pipeline (parallel EXPLORE, sequential SOLVE, then AUDIT, MARSHAL, deferred BUILD): +- EXPLORE-001..N with wave=1, no deps +- SOLVE-001..N with wave=2, deps on all EXPLORE-* +- AUDIT-001 with wave=3, deps on all SOLVE-*, interactive +- MARSHAL-001 with wave=4, deps on AUDIT-001 +- BUILD-001..M created after MARSHAL completes (deferred) + +**Success Criteria**: +- tasks.csv created with valid schema, wave, and exec_mode assignments +- No circular dependencies +- User approved (or AUTO_YES) + +--- + +### Phase 2: Wave Execution Engine (Extended) + +**Objective**: Execute tasks wave-by-wave with hybrid mechanism support and cross-wave context propagation. + +```javascript +const masterCsv = Read(`${sessionFolder}/tasks.csv`) +let tasks = parseCsv(masterCsv) +const maxWave = Math.max(...tasks.map(t => parseInt(t.wave))) +let fixCycles = 0 + +for (let wave = 1; wave <= maxWave; wave++) { + console.log(`\nWave ${wave}/${maxWave}`) + + // 1. Separate tasks by exec_mode + const waveTasks = tasks.filter(t => parseInt(t.wave) === wave) + const csvTasks = waveTasks.filter(t => t.exec_mode === 'csv-wave' && t.status === 'pending') + const interactiveTasks = waveTasks.filter(t => t.exec_mode === 'interactive' && t.status === 'pending') + + // 2. Check dependencies - skip if upstream failed + for (const task of waveTasks) { + const depIds = (task.deps || '').split(';').filter(Boolean) + const depStatuses = depIds.map(id => tasks.find(t => t.id === id)?.status) + if (depStatuses.some(s => s === 'failed' || s === 'skipped')) { + task.status = 'skipped' + task.error = `Dependency failed: ${depIds.filter((id, i) => + ['failed','skipped'].includes(depStatuses[i])).join(', ')}` + } + } + + // 3. Execute csv-wave tasks + const pendingCsv = csvTasks.filter(t => t.status === 'pending') + if (pendingCsv.length > 0) { + // Build prev_context for each task + for (const task of pendingCsv) { + const contextIds = (task.context_from || '').split(';').filter(Boolean) + const prevFindings = contextIds.map(id => { + const src = tasks.find(t => t.id === id) + if (!src?.findings) return '' + return `## [${src.id}] ${src.title}\n${src.findings}` + }).filter(Boolean).join('\n\n') + task.prev_context = prevFindings + } + + // Write wave CSV + Write(`${sessionFolder}/wave-${wave}.csv`, toCsv(pendingCsv)) + + // Execute + spawn_agents_on_csv({ + csv_path: `${sessionFolder}/wave-${wave}.csv`, + id_column: "id", + instruction: Read(".codex/skills/team-issue/instructions/agent-instruction.md"), + max_concurrency: maxConcurrency, + max_runtime_seconds: 1200, + output_csv_path: `${sessionFolder}/wave-${wave}-results.csv`, + output_schema: { + type: "object", + properties: { + id: { type: "string" }, + status: { type: "string", enum: ["completed", "failed"] }, + findings: { type: "string" }, + artifact_path: { type: "string" }, + error: { type: "string" } + } + } + }) + + // Merge results + const results = parseCsv(Read(`${sessionFolder}/wave-${wave}-results.csv`)) + for (const r of results) { + const t = tasks.find(t => t.id === r.id) + if (t) Object.assign(t, r) + } + + // Cleanup temp files + Bash(`rm -f ${sessionFolder}/wave-${wave}.csv ${sessionFolder}/wave-${wave}-results.csv`) + } + + // 4. Execute interactive tasks (post-wave) + const pendingInteractive = interactiveTasks.filter(t => t.status === 'pending') + for (const task of pendingInteractive) { + // Read agent definition + const agentDef = Read(`.codex/skills/team-issue/agents/reviewer.md`) + + // Build context from upstream tasks + const contextIds = (task.context_from || '').split(';').filter(Boolean) + const prevContext = contextIds.map(id => { + const src = tasks.find(t => t.id === id) + if (!src?.findings) return '' + return `## [${src.id}] ${src.title}\n${src.findings}\nArtifact: ${src.artifact_path || 'N/A'}` + }).filter(Boolean).join('\n\n') + + const agent = spawn_agent({ + message: `## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS (Agent Execute) +1. **Read role definition**: .codex/skills/team-issue/agents/reviewer.md (MUST read first) +2. Read: ${sessionFolder}/discoveries.ndjson (shared discoveries) +3. Read: .workflow/project-tech.json (if exists) + +--- + +Goal: ${task.description} +Issue IDs: ${task.issue_ids} +Session: ${sessionFolder} +Scope: Review all solutions in ${sessionFolder}/solutions/ for technical feasibility, risk, and completeness + +Deliverables: +- Audit report at ${sessionFolder}/audits/audit-report.json +- Per-issue verdict: approved (>=80), concerns (60-79), rejected (<60) +- Overall verdict + +### Previous Context +${prevContext}` + }) + + const result = wait({ ids: [agent], timeout_ms: 600000 }) + + if (result.timed_out) { + send_input({ id: agent, message: "Please finalize and output current findings immediately." }) + const retry = wait({ ids: [agent], timeout_ms: 120000 }) + } + + // Store interactive result + Write(`${sessionFolder}/interactive/${task.id}-result.json`, JSON.stringify({ + task_id: task.id, + status: "completed", + findings: "Review completed", + timestamp: getUtc8ISOString() + })) + + close_agent({ id: agent }) + + // Parse review verdict from audit report + let verdict = 'approved' + try { + const auditReport = JSON.parse(Read(`${sessionFolder}/audits/audit-report.json`)) + verdict = auditReport.overall_verdict || 'approved' + } catch (e) { /* default to approved */ } + + task.status = 'completed' + task.findings = `Review verdict: ${verdict}` + + // Handle review-fix cycle + if (verdict === 'rejected' && fixCycles < 2) { + fixCycles++ + // Create SOLVE-fix and AUDIT re-review tasks + const fixTask = { + id: `SOLVE-fix-${String(fixCycles).padStart(3, '0')}`, + title: `Revise solution (fix cycle ${fixCycles})`, + description: `Revise solution addressing reviewer feedback. Read audit report for rejection reasons.`, + role: 'planner', + issue_ids: task.issue_ids, + exec_mode: 'csv-wave', + execution_method: '', + deps: task.id, + context_from: task.id, + wave: String(parseInt(task.wave) + 1), + status: 'pending', + findings: '', artifact_path: '', error: '' + } + const reReviewTask = { + id: `AUDIT-${String(fixCycles + 1).padStart(3, '0')}`, + title: `Re-review revised solution (cycle ${fixCycles})`, + description: `Re-review revised solution focusing on previously rejected dimensions.`, + role: 'reviewer', + issue_ids: task.issue_ids, + exec_mode: 'interactive', + execution_method: '', + deps: fixTask.id, + context_from: fixTask.id, + wave: String(parseInt(task.wave) + 2), + status: 'pending', + findings: '', artifact_path: '', error: '' + } + tasks.push(fixTask, reReviewTask) + // Adjust MARSHAL and BUILD waves + for (const t of tasks) { + if (t.id.startsWith('MARSHAL') || t.id.startsWith('BUILD')) { + t.wave = String(parseInt(reReviewTask.wave) + (t.id.startsWith('MARSHAL') ? 1 : 2)) + if (t.id.startsWith('MARSHAL')) t.deps = reReviewTask.id + } + } + } else if (verdict === 'rejected' && fixCycles >= 2) { + // Force proceed with warning + console.log(`WARNING: Fix cycle limit (${fixCycles}) reached. Forcing proceed to MARSHAL.`) + } + } + + // 5. Merge all results into master CSV + Write(`${sessionFolder}/tasks.csv`, toCsv(tasks)) + + // 6. Handle deferred BUILD task creation (batch mode after MARSHAL) + const completedMarshal = tasks.find(t => t.id === 'MARSHAL-001' && t.status === 'completed') + if (completedMarshal && pipelineMode === 'batch') { + try { + const queue = JSON.parse(Read(`${sessionFolder}/queue/execution-queue.json`)) + const buildCount = queue.parallel_groups?.length || 1 + for (let b = 1; b <= Math.min(buildCount, 3); b++) { + const buildIssues = queue.parallel_groups[b-1]?.issues || issueIds + tasks.push({ + id: `BUILD-${String(b).padStart(3, '0')}`, + title: `Implementation group ${b}`, + description: `Implement solutions for issues in parallel group ${b}`, + role: 'implementer', + issue_ids: buildIssues.join(';'), + exec_mode: 'csv-wave', + execution_method: executionMethod, + deps: 'MARSHAL-001', + context_from: 'EXPLORE-001;SOLVE-001', + wave: String(parseInt(completedMarshal.wave) + 1), + status: 'pending', + findings: '', artifact_path: '', error: '' + }) + } + Write(`${sessionFolder}/tasks.csv`, toCsv(tasks)) + } catch (e) { /* single BUILD fallback */ } + } +} +``` + +**Success Criteria**: +- All waves executed in order +- Both csv-wave and interactive tasks handled per wave +- Each wave's results merged into master CSV before next wave starts +- Dependent tasks skipped when predecessor failed +- discoveries.ndjson accumulated across all waves and mechanisms +- Review-fix cycles handled (max 2) +- Deferred BUILD tasks created after MARSHAL (batch mode) + +--- + +### Phase 3: Post-Wave Interactive + +**Objective**: Handle any remaining interactive tasks after all waves complete. In most cases, the review gate is handled inline during Phase 2 wave execution. + +If any interactive tasks remain unprocessed (e.g., from dynamically added fix cycles), execute them using the same spawn_agent protocol as Phase 2. + +**Success Criteria**: +- All interactive tasks completed or skipped +- Fix cycle limit respected + +--- + +### Phase 4: Results Aggregation + +**Objective**: Generate final results and human-readable report. + +```javascript +const tasks = parseCsv(Read(`${sessionFolder}/tasks.csv`)) +const completed = tasks.filter(t => t.status === 'completed') +const failed = tasks.filter(t => t.status === 'failed') +const skipped = tasks.filter(t => t.status === 'skipped') + +// Export results.csv +Bash(`cp ${sessionFolder}/tasks.csv ${sessionFolder}/results.csv`) + +// Generate context.md +let contextMd = `# Issue Resolution Report\n\n` +contextMd += `**Session**: ${sessionId}\n` +contextMd += `**Pipeline**: ${pipelineMode}\n` +contextMd += `**Issues**: ${issueIds.join(', ')}\n` +contextMd += `**Fix Cycles**: ${fixCycles}/${2}\n\n` + +contextMd += `## Summary\n\n` +contextMd += `| Status | Count |\n|--------|-------|\n` +contextMd += `| Completed | ${completed.length} |\n` +contextMd += `| Failed | ${failed.length} |\n` +contextMd += `| Skipped | ${skipped.length} |\n\n` + +contextMd += `## Task Details\n\n` +for (const t of tasks) { + const icon = t.status === 'completed' ? '[OK]' : t.status === 'failed' ? '[FAIL]' : '[SKIP]' + contextMd += `${icon} **${t.id}**: ${t.title} (${t.role})\n` + if (t.findings) contextMd += ` Findings: ${t.findings.substring(0, 200)}\n` + if (t.artifact_path) contextMd += ` Artifact: ${t.artifact_path}\n` + if (t.error) contextMd += ` Error: ${t.error}\n` + contextMd += `\n` +} + +contextMd += `## Deliverables\n\n` +contextMd += `| Artifact | Path |\n|----------|------|\n` +contextMd += `| Context Reports | ${sessionFolder}/explorations/ |\n` +contextMd += `| Solution Plans | ${sessionFolder}/solutions/ |\n` +contextMd += `| Audit Report | ${sessionFolder}/audits/audit-report.json |\n` +contextMd += `| Execution Queue | ${sessionFolder}/queue/execution-queue.json |\n` +contextMd += `| Build Results | ${sessionFolder}/builds/ |\n` + +Write(`${sessionFolder}/context.md`, contextMd) + +// Display summary +console.log(` +Issue Resolution Complete +Pipeline: ${pipelineMode} +Completed: ${completed.length} | Failed: ${failed.length} | Skipped: ${skipped.length} +Fix Cycles Used: ${fixCycles}/2 +Output: ${sessionFolder} +`) +``` + +**Success Criteria**: +- results.csv exported (all tasks, both modes) +- context.md generated +- Summary displayed to user + +--- + +## Shared Discovery Board Protocol + +Both csv-wave and interactive agents share the same discoveries.ndjson file: + +```jsonl +{"ts":"2026-03-08T10:00:00Z","worker":"EXPLORE-001","type":"file_found","data":{"path":"src/auth/handler.ts","relevance":"high","purpose":"Main auth handler"}} +{"ts":"2026-03-08T10:01:00Z","worker":"EXPLORE-001","type":"pattern_found","data":{"pattern":"middleware-chain","location":"src/middleware/","description":"Express middleware chain pattern"}} +{"ts":"2026-03-08T10:05:00Z","worker":"SOLVE-001","type":"solution_approach","data":{"issue_id":"ISS-20260308-120000","approach":"refactor","estimated_files":5}} +{"ts":"2026-03-08T10:10:00Z","worker":"BUILD-001","type":"impl_result","data":{"issue_id":"ISS-20260308-120000","files_changed":3,"tests_pass":true}} +``` + +**Discovery Types**: + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `file_found` | `path` | `{path, relevance, purpose}` | Relevant file discovered | +| `pattern_found` | `pattern+location` | `{pattern, location, description}` | Code pattern identified | +| `dependency_found` | `from+to` | `{from, to, type}` | Dependency relationship | +| `solution_approach` | `issue_id` | `{issue_id, approach, estimated_files}` | Solution strategy | +| `conflict_found` | `files` | `{issues, files, resolution}` | File conflict between issues | +| `impl_result` | `issue_id` | `{issue_id, files_changed, tests_pass}` | Implementation outcome | + +--- + +## Error Handling + +| Error | Resolution | +|-------|------------| +| Circular dependency | Detect in wave computation, abort with error message | +| CSV agent timeout | Mark as failed in results, continue with wave | +| CSV agent failed | Mark as failed, skip dependent tasks in later waves | +| Interactive agent timeout | Urge convergence via send_input, then close if still timed out | +| Interactive agent failed | Mark as failed, skip dependents | +| All agents in wave failed | Log error, offer retry or abort | +| CSV parse error | Validate CSV format before execution, show line number | +| discoveries.ndjson corrupt | Ignore malformed lines, continue with valid entries | +| Review rejection exceeds 2 rounds | Force convergence to MARSHAL with warning | +| No issues found for given IDs | Report error, ask user for valid IDs | +| Deferred BUILD count unknown | Read execution-queue.json after MARSHAL completes | +| Continue mode: no session found | List available sessions, prompt user to select | + +--- + +## Core Rules + +1. **Start Immediately**: First action is session initialization, then Phase 1 +2. **Wave Order is Sacred**: Never execute wave N before wave N-1 completes and results are merged +3. **CSV is Source of Truth**: Master tasks.csv holds all state (both csv-wave and interactive) +4. **CSV First**: Default to csv-wave for tasks; only use interactive when interaction pattern requires it +5. **Context Propagation**: prev_context built from master CSV, not from memory +6. **Discovery Board is Append-Only**: Never clear, modify, or recreate discoveries.ndjson -- both mechanisms share it +7. **Skip on Failure**: If a dependency failed, skip the dependent task (regardless of mechanism) +8. **Lifecycle Balance**: Every spawn_agent MUST have a matching close_agent +9. **Cleanup Temp Files**: Remove wave-{N}.csv after results are merged +10. **DO NOT STOP**: Continuous execution until all waves complete or all remaining tasks are skipped diff --git a/.codex/skills/team-issue/agents/reviewer.md b/.codex/skills/team-issue/agents/reviewer.md new file mode 100644 index 00000000..08c63ea9 --- /dev/null +++ b/.codex/skills/team-issue/agents/reviewer.md @@ -0,0 +1,204 @@ +# Reviewer Agent + +Technical review agent for issue solutions. Performs multi-dimensional review with scored verdict. Used as interactive agent within the team-issue pipeline when review gates are required (full/batch modes). + +## Identity + +- **Type**: `interactive` +- **Responsibility**: Multi-dimensional solution review with verdict routing + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Read all solution artifacts and explorer context before reviewing +- Score across three weighted dimensions: Technical Feasibility (40%), Risk (30%), Completeness (30%) +- Produce structured output with per-issue and overall verdicts +- Include file:line references in findings +- Write audit report to session audits folder + +### MUST NOT + +- Skip the MANDATORY FIRST STEPS role loading +- Modify solution artifacts or code +- Produce unstructured output +- Review without reading explorer context (when available) +- Skip any scoring dimension + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | file | Load solution artifacts and context files | +| `Bash` | shell | Run `ccw issue solutions --json` to load bound solutions | +| `Grep` | search | Search codebase for pattern conformance checks | +| `Glob` | search | Find relevant files for coverage validation | +| `Write` | file | Write audit report | + +--- + +## Execution + +### Phase 1: Context Loading + +**Objective**: Load all inputs needed for review. + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Solution artifacts | Yes | `/solutions/solution-.json` | +| Explorer context | No | `/explorations/context-.json` | +| Bound solutions | Yes | `ccw issue solutions --json` | +| Discoveries | No | `/discoveries.ndjson` | +| Wisdom files | No | `/wisdom/` | + +**Steps**: + +1. Read session folder path from spawn message +2. Extract issue IDs from spawn message +3. Load explorer context reports for each issue +4. Load bound solutions for each issue via CLI +5. Load discoveries for cross-reference + +--- + +### Phase 2: Multi-Dimensional Review + +**Objective**: Score each solution across three weighted dimensions. + +**Technical Feasibility (40%)**: + +| Criterion | Check | Score Impact | +|-----------|-------|-------------| +| File Coverage | Solution covers all affected files from explorer context | High | +| Dependency Awareness | Considers dependency cascade effects | Medium | +| API Compatibility | Maintains backward compatibility | High | +| Pattern Conformance | Follows existing code patterns | Medium | + +**Risk Assessment (30%)**: + +| Criterion | Check | Score Impact | +|-----------|-------|-------------| +| Scope Creep | Solution stays within issue boundary (task_count <= 10) | High | +| Breaking Changes | No destructive modifications | High | +| Side Effects | No unforeseen side effects | Medium | +| Rollback Path | Can rollback if issues occur | Low | + +**Completeness (30%)**: + +| Criterion | Check | Score Impact | +|-----------|-------|-------------| +| All Tasks Defined | Task decomposition is complete (count > 0) | High | +| Test Coverage | Includes test plan | Medium | +| Edge Cases | Considers boundary conditions | Low | + +**Score Calculation**: + +``` +total_score = round( + technical_feasibility.score * 0.4 + + risk_assessment.score * 0.3 + + completeness.score * 0.3 +) +``` + +**Verdict Rules**: + +| Score | Verdict | Description | +|-------|---------|-------------| +| >= 80 | approved | Solution is ready for implementation | +| 60-79 | concerns | Minor issues noted, proceed with warnings | +| < 60 | rejected | Solution needs revision before proceeding | + +--- + +### Phase 3: Compile Audit Report + +**Objective**: Write structured audit report. + +**Steps**: + +1. Compute per-issue scores and verdicts +2. Compute overall verdict (any rejected -> overall rejected) +3. Write audit report to `/audits/audit-report.json`: + +```json +{ + "session_id": "", + "review_timestamp": "", + "issues_reviewed": [ + { + "issue_id": "", + "solution_id": "", + "total_score": 85, + "verdict": "approved", + "technical_feasibility": { + "score": 90, + "findings": ["Good file coverage", "API compatible"] + }, + "risk_assessment": { + "score": 80, + "findings": ["No breaking changes", "Rollback via git revert"] + }, + "completeness": { + "score": 82, + "findings": ["5 tasks defined", "Test plan included"] + } + } + ], + "overall_verdict": "approved", + "overall_score": 85, + "review_count": 1, + "rejection_reasons": [], + "actionable_feedback": [] +} +``` + +4. For rejected solutions: include specific rejection reasons and actionable feedback for SOLVE-fix task + +--- + +## Structured Output Template + +``` +## Summary +- Review of solutions: + +## Findings +- Finding 1: specific description with file:line reference +- Finding 2: specific description with file:line reference + +## Per-Issue Verdicts +- : /100 () + - Technical: /100 + - Risk: /100 + - Completeness: /100 + +## Overall Verdict + (score: /100) + +## Rejection Feedback (if rejected) +1. Specific concern with remediation suggestion +2. Specific concern with remediation suggestion + +## Open Questions +1. Question needing clarification (if any) +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| Solution file not found | Report in Open Questions, score as 0 for completeness | +| Explorer context missing | Proceed with reduced confidence, note in findings | +| Bound solution not found via CLI | Attempt file-based fallback, report if still missing | +| Processing failure | Output partial results with clear status indicator | +| Timeout approaching | Output current findings with "PARTIAL" status | diff --git a/.codex/skills/team-issue/instructions/agent-instruction.md b/.codex/skills/team-issue/instructions/agent-instruction.md new file mode 100644 index 00000000..79a951bd --- /dev/null +++ b/.codex/skills/team-issue/instructions/agent-instruction.md @@ -0,0 +1,198 @@ +# Agent Instruction -- Team Issue Resolution + +CSV agent instruction template for `spawn_agents_on_csv`. Each agent receives this template with its row's column values substituted via `{column_name}` placeholders. + +--- + +## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS +1. Read shared discoveries: `.workflow/.csv-wave/{session_id}/discoveries.ndjson` (if exists, skip if not) +2. Read project context: `.workflow/project-tech.json` (if exists) +3. Read wisdom files: `.workflow/.csv-wave/{session_id}/wisdom/` (conventions, learnings) + +--- + +## Your Task + +**Task ID**: {id} +**Title**: {title} +**Description**: {description} +**Role**: {role} +**Issue IDs**: {issue_ids} +**Execution Method**: {execution_method} + +### Previous Tasks' Findings (Context) +{prev_context} + +--- + +## Execution Protocol + +### Role Router + +Determine your execution steps based on `{role}`: + +| Role | Execution Steps | +|------|----------------| +| explorer | Step A: Codebase Exploration | +| planner | Step B: Solution Design | +| integrator | Step C: Queue Formation | +| implementer | Step D: Implementation | + +--- + +### Step A: Codebase Exploration (explorer role) + +1. Extract issue ID from `{issue_ids}` (pattern: `GH-\d+` or `ISS-\d{8}-\d{6}`) +2. Load issue details: `Bash("ccw issue status --json")` +3. Assess complexity from issue keywords: + +| Signal | Weight | +|--------|--------| +| Structural change (refactor, architect) | +2 | +| Cross-cutting (multiple, across) | +2 | +| Integration (api, database) | +1 | +| High priority (>= 4) | +1 | + +4. Explore codebase: + - Use `mcp__ace-tool__search_context` for semantic search based on issue keywords + - Read relevant files to understand context + - Map dependencies and integration points + - Check git log for related changes + +5. Write context report: + ```bash + # Write to session explorations folder + Write("/explorations/context-.json", JSON.stringify({ + issue_id: "", + issue: { id, title, priority, status, labels, feedback }, + relevant_files: [{ path, relevance }], + dependencies: [], + impact_scope: "low|medium|high", + existing_patterns: [], + related_changes: [], + key_findings: [], + complexity_assessment: "Low|Medium|High" + })) + ``` + +6. Share discoveries to board + +--- + +### Step B: Solution Design (planner role) + +1. Extract issue ID from `{issue_ids}` +2. Load explorer context (if available): Read upstream artifact from prev_context +3. Check if this is a revision task (SOLVE-fix-*): If yes, read audit report for rejection feedback +4. Generate solution via CLI: + ```bash + ccw cli -p "PURPOSE: Design solution for issue and decompose into implementation tasks; success = solution with task breakdown + TASK: * Load issue details * Analyze explorer context * Design solution approach * Break into tasks * Generate solution JSON + MODE: analysis + CONTEXT: @**/* | Memory: Issue , Explorer findings from prev_context + EXPECTED: Solution JSON with: issue_id, solution_id, approach, tasks[], estimated_files, dependencies + CONSTRAINTS: Follow existing patterns | Minimal changes + " --tool gemini --mode analysis --rule planning-breakdown-task-steps + ``` +5. Write solution artifact: + ```bash + Write("/solutions/solution-.json", solutionJson) + ``` +6. Bind solution to issue: `Bash("ccw issue bind ")` + +--- + +### Step C: Queue Formation (integrator role) + +1. Extract issue IDs from `{issue_ids}` +2. Verify all issues have bound solutions: `Bash("ccw issue solutions --json")` +3. Analyze file conflicts between solutions +4. Build dependency graph for execution ordering +5. Determine parallel execution groups +6. Write execution queue: + ```bash + Write("/queue/execution-queue.json", JSON.stringify({ + queue: [{ issue_id, solution_id, order, depends_on: [], estimated_files: [] }], + conflicts: [{ issues: [], files: [], resolution: "" }], + parallel_groups: [{ group: 0, issues: [] }] + })) + ``` + +--- + +### Step D: Implementation (implementer role) + +1. Extract issue ID from `{issue_ids}` +2. Load bound solution: `Bash("ccw issue solutions --json")` +3. Load explorer context (from prev_context or file) +4. Determine execution backend from `{execution_method}`: + +| Method | CLI Command | +|--------|-------------| +| codex | `ccw cli --tool codex --mode write --id issue-` | +| gemini | `ccw cli --tool gemini --mode write --id issue-` | +| qwen | `ccw cli --tool qwen --mode write --id issue-` | + +5. Execute implementation: + ```bash + ccw cli -p "PURPOSE: Implement solution for issue ; success = all tasks completed, tests pass + TASK: + MODE: write + CONTEXT: @**/* | Memory: Solution plan, explorer context + EXPECTED: Working implementation with code changes, test updates + CONSTRAINTS: Follow existing patterns | Maintain backward compatibility + " --tool --mode write --rule development-implement-feature + ``` + +6. Verify: Run tests, check for errors +7. Update issue status: `Bash("ccw issue update --status resolved")` + +--- + +## Share Discoveries (ALL ROLES) + +After completing your work, append findings to the shared discovery board: + +```bash +echo '{"ts":"","worker":"{id}","type":"","data":{...}}' >> /discoveries.ndjson +``` + +**Discovery Types to Share**: + +| Type | Data Schema | When to Use | +|------|-------------|-------------| +| `file_found` | `{path, relevance, purpose}` | Explorer: relevant file discovered | +| `pattern_found` | `{pattern, location, description}` | Explorer: code pattern identified | +| `dependency_found` | `{from, to, type}` | Explorer: module dependency found | +| `solution_approach` | `{issue_id, approach, estimated_files}` | Planner: solution strategy | +| `conflict_found` | `{issues, files, resolution}` | Integrator: file conflict | +| `impl_result` | `{issue_id, files_changed, tests_pass}` | Implementer: build outcome | + +--- + +## Output (report_agent_job_result) + +Return JSON: +```json +{ + "id": "{id}", + "status": "completed | failed", + "findings": "Key discoveries and implementation notes (max 500 chars)", + "artifact_path": "relative path to main artifact file (e.g., explorations/context-ISS-xxx.json)", + "error": "" +} +``` + +--- + +## Quality Checklist + +Before reporting complete: +- [ ] Mandatory first steps completed (discoveries, project context, wisdom) +- [ ] Role-specific execution steps followed +- [ ] At least 1 discovery shared to board +- [ ] Artifact file written to session folder +- [ ] Findings include file:line references where applicable +- [ ] prev_context findings were incorporated diff --git a/.codex/skills/team-issue/schemas/tasks-schema.md b/.codex/skills/team-issue/schemas/tasks-schema.md new file mode 100644 index 00000000..4ebaa3fb --- /dev/null +++ b/.codex/skills/team-issue/schemas/tasks-schema.md @@ -0,0 +1,198 @@ +# Team Issue Resolution -- CSV Schema + +## Master CSV: tasks.csv + +### Column Definitions + +#### Input Columns (Set by Decomposer) + +| Column | Type | Required | Description | Example | +|--------|------|----------|-------------|---------| +| `id` | string | Yes | Unique task identifier | `"EXPLORE-001"` | +| `title` | string | Yes | Short task title | `"Context analysis"` | +| `description` | string | Yes | Detailed task description (self-contained) | `"Analyze issue context and map codebase impact for ISS-20260308-120000"` | +| `role` | enum | Yes | Worker role: explorer, planner, reviewer, integrator, implementer | `"explorer"` | +| `issue_ids` | string | Yes | Semicolon-separated issue IDs | `"ISS-20260308-120000;ISS-20260308-120001"` | +| `exec_mode` | enum | Yes | Execution mechanism: `csv-wave` or `interactive` | `"csv-wave"` | +| `execution_method` | string | No | CLI tool for BUILD tasks: codex, gemini, qwen, or empty | `"gemini"` | +| `deps` | string | No | Semicolon-separated dependency task IDs | `"EXPLORE-001"` | +| `context_from` | string | No | Semicolon-separated task IDs for context | `"EXPLORE-001"` | + +#### Computed Columns (Set by Wave Engine) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `wave` | integer | Wave number (1-based, from topological sort) | `2` | +| `prev_context` | string | Aggregated findings from context_from tasks (per-wave CSV only) | `"[EXPLORE-001] Found 5 relevant files..."` | + +#### Output Columns (Set by Agent) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `status` | enum | `pending` -> `completed` / `failed` / `skipped` | `"completed"` | +| `findings` | string | Key discoveries (max 500 chars) | `"Identified 3 affected modules..."` | +| `artifact_path` | string | Path to generated artifact file | `"explorations/context-ISS-20260308-120000.json"` | +| `error` | string | Error message if failed | `""` | + +--- + +### exec_mode Values + +| Value | Mechanism | Description | +|-------|-----------|-------------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot batch execution within wave | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round individual execution (review gates) | + +Interactive tasks appear in master CSV for dependency tracking but are NOT included in wave-{N}.csv files. + +--- + +### Role Values + +| Role | Task Prefixes | Responsibility | +|------|---------------|----------------| +| `explorer` | EXPLORE-* | Codebase exploration, context analysis, impact assessment | +| `planner` | SOLVE-*, SOLVE-fix-* | Solution design, task decomposition, revision | +| `reviewer` | AUDIT-* | Technical review with multi-dimensional scoring | +| `integrator` | MARSHAL-* | Queue formation, conflict detection, execution ordering | +| `implementer` | BUILD-* | Code implementation, testing, verification | + +--- + +### Example Data + +```csv +id,title,description,role,issue_ids,exec_mode,execution_method,deps,context_from,wave,status,findings,artifact_path,error +"EXPLORE-001","Context analysis","Analyze issue context and map codebase impact for ISS-20260308-120000","explorer","ISS-20260308-120000","csv-wave","","","","1","pending","","","" +"SOLVE-001","Solution design","Design solution and decompose into implementation tasks for ISS-20260308-120000","planner","ISS-20260308-120000","csv-wave","","EXPLORE-001","EXPLORE-001","2","pending","","","" +"AUDIT-001","Technical review","Review solution for feasibility risk and completeness","reviewer","ISS-20260308-120000","interactive","","SOLVE-001","SOLVE-001","3","pending","","","" +"MARSHAL-001","Queue formation","Form execution queue with conflict detection and optimal ordering","integrator","ISS-20260308-120000","csv-wave","","AUDIT-001","SOLVE-001","4","pending","","","" +"BUILD-001","Implementation","Implement solution plan and verify with tests","implementer","ISS-20260308-120000","csv-wave","gemini","MARSHAL-001","EXPLORE-001;SOLVE-001","5","pending","","","" +``` + +--- + +### Column Lifecycle + +``` +Decomposer (Phase 1) Wave Engine (Phase 2) Agent (Execution) +--------------------- -------------------- ----------------- +id ----------> id ----------> id +title ----------> title ----------> (reads) +description ----------> description ----------> (reads) +role ----------> role ----------> (reads) +issue_ids ----------> issue_ids ----------> (reads) +exec_mode ----------> exec_mode ----------> (reads) +execution_method ------> execution_method -----> (reads) +deps ----------> deps ----------> (reads) +context_from----------> context_from----------> (reads) + wave ----------> (reads) + prev_context ----------> (reads) + status + findings + artifact_path + error +``` + +--- + +## Output Schema (JSON) + +Agent output via `report_agent_job_result` (csv-wave tasks): + +```json +{ + "id": "EXPLORE-001", + "status": "completed", + "findings": "Identified 5 relevant files in src/auth/. Impact scope: medium. Key dependency: shared/utils/token.ts. Existing pattern: middleware-chain in src/middleware/.", + "artifact_path": "explorations/context-ISS-20260308-120000.json", + "error": "" +} +``` + +Interactive tasks output via structured text or JSON written to `interactive/{id}-result.json`. + +--- + +## Discovery Types + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `file_found` | `path` | `{path, relevance, purpose}` | Relevant file discovered during exploration | +| `pattern_found` | `pattern+location` | `{pattern, location, description}` | Code pattern identified | +| `dependency_found` | `from+to` | `{from, to, type}` | Dependency relationship between modules | +| `solution_approach` | `issue_id` | `{issue_id, approach, estimated_files}` | Solution strategy chosen | +| `conflict_found` | `files` | `{issues, files, resolution}` | File conflict between issue solutions | +| `impl_result` | `issue_id` | `{issue_id, files_changed, tests_pass}` | Implementation outcome | + +### Discovery NDJSON Format + +```jsonl +{"ts":"2026-03-08T10:00:00Z","worker":"EXPLORE-001","type":"file_found","data":{"path":"src/auth/handler.ts","relevance":"high","purpose":"Main auth request handler"}} +{"ts":"2026-03-08T10:01:00Z","worker":"EXPLORE-001","type":"pattern_found","data":{"pattern":"middleware-chain","location":"src/middleware/","description":"Express middleware chain pattern used across all route handlers"}} +{"ts":"2026-03-08T10:05:00Z","worker":"SOLVE-001","type":"solution_approach","data":{"issue_id":"ISS-20260308-120000","approach":"refactor-extract","estimated_files":5}} +{"ts":"2026-03-08T10:15:00Z","worker":"MARSHAL-001","type":"conflict_found","data":{"issues":["ISS-20260308-120000","ISS-20260308-120001"],"files":["src/auth/handler.ts"],"resolution":"sequential"}} +``` + +> Both csv-wave and interactive agents read/write the same discoveries.ndjson file. + +--- + +## Cross-Mechanism Context Flow + +| Source | Target | Mechanism | +|--------|--------|-----------| +| CSV task findings | Interactive task | Injected via spawn message (prev_context) | +| Interactive task result | CSV task prev_context | Read from interactive/{id}-result.json | +| Any agent discovery | Any agent | Shared via discoveries.ndjson | + +--- + +## Pipeline-Specific Schemas + +### Quick Pipeline (4 tasks, 4 waves) + +| Wave | Tasks | exec_mode | +|------|-------|-----------| +| 1 | EXPLORE-001 | csv-wave | +| 2 | SOLVE-001 | csv-wave | +| 3 | MARSHAL-001 | csv-wave | +| 4 | BUILD-001 | csv-wave | + +### Full Pipeline (5 tasks, 5 waves) + +| Wave | Tasks | exec_mode | +|------|-------|-----------| +| 1 | EXPLORE-001 | csv-wave | +| 2 | SOLVE-001 | csv-wave | +| 3 | AUDIT-001 | interactive | +| 4 | MARSHAL-001 | csv-wave | +| 5 | BUILD-001 | csv-wave | + +### Batch Pipeline (N+N+1+1+M tasks) + +| Wave | Tasks | exec_mode | Parallelism | +|------|-------|-----------|-------------| +| 1 | EXPLORE-001..N | csv-wave | max 5 concurrent | +| 2 | SOLVE-001..N | csv-wave | sequential | +| 3 | AUDIT-001 | interactive | 1 | +| 4 | MARSHAL-001 | csv-wave | 1 | +| 5 | BUILD-001..M (deferred) | csv-wave | max 3 concurrent | + +--- + +## Validation Rules + +| Rule | Check | Error | +|------|-------|-------| +| Unique IDs | No duplicate `id` values | "Duplicate task ID: {id}" | +| Valid deps | All dep IDs exist in tasks | "Unknown dependency: {dep_id}" | +| No self-deps | Task cannot depend on itself | "Self-dependency: {id}" | +| No circular deps | Topological sort completes | "Circular dependency detected involving: {ids}" | +| context_from valid | All context IDs exist and in earlier waves | "Invalid context_from: {id}" | +| exec_mode valid | Value is `csv-wave` or `interactive` | "Invalid exec_mode: {value}" | +| Role valid | Value in {explorer, planner, reviewer, integrator, implementer} | "Invalid role: {role}" | +| Description non-empty | Every task has description | "Empty description for task: {id}" | +| Status enum | status in {pending, completed, failed, skipped} | "Invalid status: {status}" | +| Cross-mechanism deps | Interactive->CSV deps resolve correctly | "Cross-mechanism dependency unresolvable: {id}" | +| Issue IDs non-empty | Every task has at least one issue_id | "No issue_ids for task: {id}" | diff --git a/.codex/skills/team-iterdev/SKILL.md b/.codex/skills/team-iterdev/SKILL.md new file mode 100644 index 00000000..fa4b9cb6 --- /dev/null +++ b/.codex/skills/team-iterdev/SKILL.md @@ -0,0 +1,788 @@ +--- +name: team-iterdev +description: Iterative development team with Generator-Critic loop, dynamic pipeline selection (patch/sprint/multi-sprint), task ledger for progress tracking, and shared wisdom for cross-sprint learning. +argument-hint: "[-y|--yes] [-c|--concurrency N] [--continue] \"task description\"" +allowed-tools: spawn_agents_on_csv, spawn_agent, wait, send_input, close_agent, Read, Write, Edit, Bash, Glob, Grep, AskUserQuestion +--- + +## Auto Mode + +When `--yes` or `-y`: Auto-confirm task decomposition, skip interactive validation, use defaults. + +# Team IterDev + +## Usage + +```bash +$team-iterdev "Implement user authentication with JWT" +$team-iterdev -c 4 "Refactor payment module to support multiple gateways" +$team-iterdev -y "Fix login button not responding on mobile" +$team-iterdev --continue "ids-auth-jwt-20260308" +``` + +**Flags**: +- `-y, --yes`: Skip all confirmations (auto mode) +- `-c, --concurrency N`: Max concurrent agents within each wave (default: 3) +- `--continue`: Resume existing session + +**Output Directory**: `.workflow/.csv-wave/{session-id}/` +**Core Output**: `tasks.csv` (master state) + `results.csv` (final) + `discoveries.ndjson` (shared exploration) + `context.md` (human-readable report) + +--- + +## Overview + +Iterative development team skill with Generator-Critic (GC) loops between developer and reviewer roles (max 3 rounds). Automatically selects pipeline complexity (patch/sprint/multi-sprint) based on task signals. Tracks progress via task ledger. Accumulates cross-sprint wisdom in shared discovery board. + +**Execution Model**: Hybrid -- CSV wave pipeline (primary) + individual agent spawn (secondary for GC loop control and requirement analysis) + +``` ++-------------------------------------------------------------------------+ +| TEAM ITERDEV WORKFLOW | ++-------------------------------------------------------------------------+ +| | +| Phase 0: Pre-Wave Interactive | +| +-- Analyze task complexity and select pipeline mode | +| +-- Explore codebase for patterns and dependencies | +| +-- Output: pipeline mode, task analysis, session artifacts | +| | +| Phase 1: Requirement -> CSV + Classification | +| +-- Parse task into pipeline-specific task chain | +| +-- Assign roles: architect, developer, tester, reviewer | +| +-- Classify tasks: csv-wave | interactive (exec_mode) | +| +-- Compute dependency waves (topological sort -> depth grouping) | +| +-- Generate tasks.csv with wave + exec_mode columns | +| +-- User validates task breakdown (skip if -y) | +| | +| Phase 2: Wave Execution Engine (Extended) | +| +-- For each wave (1..N): | +| | +-- Execute pre-wave interactive tasks (if any) | +| | +-- Build wave CSV (filter csv-wave tasks for this wave) | +| | +-- Inject previous findings into prev_context column | +| | +-- spawn_agents_on_csv(wave CSV) | +| | +-- Execute post-wave interactive tasks (if any) | +| | +-- Merge all results into master tasks.csv | +| | +-- Check: any failed? -> skip dependents | +| +-- discoveries.ndjson shared across all modes (append-only) | +| | +| Phase 3: Post-Wave Interactive | +| +-- Generator-Critic (GC) loop control | +| +-- If review has critical issues: trigger DEV-fix -> re-REVIEW | +| +-- Max 3 GC rounds, then force convergence | +| | +| Phase 4: Results Aggregation | +| +-- Export final results.csv | +| +-- Generate context.md with all findings | +| +-- Display summary: completed/failed/skipped per wave | +| +-- Offer: view results | retry failed | done | +| | ++-------------------------------------------------------------------------+ +``` + +--- + +## Task Classification Rules + +Each task is classified by `exec_mode`: + +| exec_mode | Mechanism | Criteria | +|-----------|-----------|----------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot, structured I/O, no multi-round interaction | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round, clarification, inline utility | + +**Classification Decision**: + +| Task Property | Classification | +|---------------|---------------| +| Architecture design (DESIGN-*) | `csv-wave` | +| Code implementation (DEV-*) | `csv-wave` | +| Test execution and fix cycle (VERIFY-*) | `csv-wave` | +| Code review (REVIEW-*) | `csv-wave` | +| Fix task from review feedback (DEV-fix-*) | `csv-wave` | +| GC loop control (decide revision vs convergence) | `interactive` | +| Task analysis and pipeline selection (Phase 0) | `interactive` | + +--- + +## Pipeline Selection Logic + +| Signal | Score | +|--------|-------| +| Changed files > 10 | +3 | +| Changed files 3-10 | +2 | +| Structural change (refactor, architect, restructure) | +3 | +| Cross-cutting concern (multiple, across, cross) | +2 | +| Simple fix (fix, bug, typo, patch) | -2 | + +| Score | Pipeline | +|-------|----------| +| >= 5 | multi-sprint | +| 2-4 | sprint | +| 0-1 | patch | + +### Pipeline Definitions + +**Patch** (2 tasks, serial): +``` +DEV-001 -> VERIFY-001 +``` + +**Sprint** (4 tasks, with parallel window): +``` +DESIGN-001 -> DEV-001 -> [VERIFY-001 + REVIEW-001] (parallel) +``` + +**Multi-Sprint** (5+ tasks, iterative with GC loop): +``` +Sprint 1: DESIGN-001 -> DEV-001 -> [VERIFY-001 + REVIEW-001] -> DEV-fix (if needed) -> REVIEW-002 +Sprint 2+ created dynamically +``` + +--- + +## CSV Schema + +### tasks.csv (Master State) + +```csv +id,title,description,role,pipeline,sprint_num,gc_round,deps,context_from,exec_mode,wave,status,findings,review_score,gc_signal,error +"DESIGN-001","Technical design and task breakdown","Explore codebase, create component design, break into implementable tasks with acceptance criteria","architect","sprint","1","0","","","csv-wave","1","pending","","","","" +"DEV-001","Implement design","Load design and task breakdown, implement tasks in execution order, validate syntax","developer","sprint","1","0","DESIGN-001","DESIGN-001","csv-wave","2","pending","","","","" +"VERIFY-001","Verify implementation","Detect test framework, run targeted tests, run regression suite","tester","sprint","1","0","DEV-001","DEV-001","csv-wave","3","pending","","","","" +``` + +**Columns**: + +| Column | Phase | Description | +|--------|-------|-------------| +| `id` | Input | Unique task identifier (string) | +| `title` | Input | Short task title | +| `description` | Input | Detailed task description | +| `role` | Input | Worker role: architect, developer, tester, reviewer | +| `pipeline` | Input | Pipeline mode: patch, sprint, multi-sprint | +| `sprint_num` | Input | Sprint number (1-based, for multi-sprint) | +| `gc_round` | Input | Generator-Critic round number (0 = initial, 1+ = fix round) | +| `deps` | Input | Semicolon-separated dependency task IDs | +| `context_from` | Input | Semicolon-separated task IDs whose findings this task needs | +| `exec_mode` | Input | `csv-wave` or `interactive` | +| `wave` | Computed | Wave number (computed by topological sort, 1-based) | +| `status` | Output | `pending` -> `completed` / `failed` / `skipped` | +| `findings` | Output | Key discoveries or implementation notes (max 500 chars) | +| `review_score` | Output | Quality score 1-10 (reviewer only) | +| `gc_signal` | Output | `REVISION_NEEDED` or `CONVERGED` (reviewer only) | +| `error` | Output | Error message if failed (empty if success) | + +### Per-Wave CSV (Temporary) + +Each wave generates a temporary `wave-{N}.csv` with extra `prev_context` column (csv-wave tasks only). + +--- + +## Agent Registry (Interactive Agents) + +| Agent | Role File | Pattern | Responsibility | Position | +|-------|-----------|---------|----------------|----------| +| task-analyzer | agents/task-analyzer.md | 2.3 (wait-respond) | Analyze task complexity, select pipeline mode, detect capabilities | standalone (Phase 0) | +| gc-controller | agents/gc-controller.md | 2.3 (wait-respond) | Evaluate review severity, decide DEV-fix vs convergence | post-wave (after REVIEW wave) | + +> **COMPACT PROTECTION**: Agent files are execution documents. When context compression occurs, **you MUST immediately `Read` the corresponding agent.md** to reload. + +--- + +## Output Artifacts + +| File | Purpose | Lifecycle | +|------|---------|-----------| +| `tasks.csv` | Master state -- all tasks with status/findings | Updated after each wave | +| `wave-{N}.csv` | Per-wave input (temporary, csv-wave tasks only) | Created before wave, deleted after | +| `results.csv` | Final export of all task results | Created in Phase 4 | +| `discoveries.ndjson` | Shared exploration board (all agents, both modes) | Append-only, carries across waves | +| `context.md` | Human-readable execution report | Created in Phase 4 | +| `interactive/{id}-result.json` | Results from interactive tasks | Created per interactive task | +| `wisdom/` | Cross-sprint knowledge accumulation | Updated by agents via discoveries | + +--- + +## Session Structure + +``` +.workflow/.csv-wave/{session-id}/ ++-- tasks.csv # Master state (all tasks, both modes) ++-- results.csv # Final results export ++-- discoveries.ndjson # Shared discovery board (all agents) ++-- context.md # Human-readable report ++-- wave-{N}.csv # Temporary per-wave input (csv-wave only) ++-- interactive/ # Interactive task artifacts +| +-- {id}-result.json # Per-task results ++-- wisdom/ # Cross-sprint knowledge +| +-- learnings.md +| +-- decisions.md +| +-- conventions.md +| +-- issues.md ++-- design/ # Architect output +| +-- design-001.md +| +-- task-breakdown.json ++-- code/ # Developer tracking +| +-- dev-log.md ++-- verify/ # Tester output +| +-- verify-001.json ++-- review/ # Reviewer output + +-- review-001.md +``` + +--- + +## Implementation + +### Session Initialization + +```javascript +const getUtc8ISOString = () => new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString() + +// Parse flags +const AUTO_YES = $ARGUMENTS.includes('--yes') || $ARGUMENTS.includes('-y') +const continueMode = $ARGUMENTS.includes('--continue') +const concurrencyMatch = $ARGUMENTS.match(/(?:--concurrency|-c)\s+(\d+)/) +const maxConcurrency = concurrencyMatch ? parseInt(concurrencyMatch[1]) : 3 + +// Clean requirement text (remove flags) +const requirement = $ARGUMENTS + .replace(/--yes|-y|--continue|--concurrency\s+\d+|-c\s+\d+/g, '') + .trim() + +const slug = requirement.toLowerCase() + .replace(/[^a-z0-9\u4e00-\u9fa5]+/g, '-') + .substring(0, 40) +const dateStr = getUtc8ISOString().substring(0, 10).replace(/-/g, '') +let sessionId = `ids-${slug}-${dateStr}` +let sessionFolder = `.workflow/.csv-wave/${sessionId}` + +// Continue mode: find existing session +if (continueMode) { + const existing = Bash(`ls -t .workflow/.csv-wave/ids-* 2>/dev/null | head -1`).trim() + if (existing) { + sessionId = existing.split('/').pop() + sessionFolder = existing + // Read existing tasks.csv, find incomplete waves, resume from Phase 2 + } +} + +Bash(`mkdir -p ${sessionFolder}/{interactive,wisdom,design,code,verify,review}`) + +// Initialize wisdom files +Write(`${sessionFolder}/wisdom/learnings.md`, `# Learnings\n\n`) +Write(`${sessionFolder}/wisdom/decisions.md`, `# Decisions\n\n`) +Write(`${sessionFolder}/wisdom/conventions.md`, `# Conventions\n\n`) +Write(`${sessionFolder}/wisdom/issues.md`, `# Issues\n\n`) +``` + +--- + +### Phase 0: Pre-Wave Interactive + +**Objective**: Analyze task complexity, explore codebase, and select pipeline mode. + +**Execution**: + +```javascript +const analyzer = spawn_agent({ + message: ` +## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS (Agent Execute) +1. **Read role definition**: .codex/skills/team-iterdev/agents/task-analyzer.md (MUST read first) +2. Read: .workflow/project-tech.json (if exists) + +--- + +Goal: Analyze iterative development task and select pipeline mode +Requirement: ${requirement} + +### Task +1. Detect capabilities from keywords: + - design/architect/restructure -> architect role needed + - implement/build/code/fix -> developer role needed + - test/verify/validate -> tester role needed + - review/audit/quality -> reviewer role needed +2. Score complexity for pipeline selection: + - Changed files > 10: +3, 3-10: +2 + - Structural change: +3 + - Cross-cutting: +2 + - Simple fix: -2 +3. Score >= 5 -> multi-sprint, 2-4 -> sprint, 0-1 -> patch +4. Return structured analysis result +` +}) + +const analyzerResult = wait({ ids: [analyzer], timeout_ms: 120000 }) + +if (analyzerResult.timed_out) { + send_input({ id: analyzer, message: "Please finalize and output current findings." }) + const retry = wait({ ids: [analyzer], timeout_ms: 60000 }) +} + +close_agent({ id: analyzer }) + +// Store analysis result +Write(`${sessionFolder}/interactive/task-analyzer-result.json`, JSON.stringify({ + task_id: "task-analysis", + status: "completed", + pipeline_mode: parsedMode, // "patch" | "sprint" | "multi-sprint" + capabilities: parsedCapabilities, + complexity_score: parsedScore, + roles_needed: parsedRoles, + timestamp: getUtc8ISOString() +})) +``` + +If not AUTO_YES, present pipeline mode selection for confirmation: + +```javascript +if (!AUTO_YES) { + const answer = AskUserQuestion({ + questions: [{ + question: `Task: "${requirement}"\nRecommended pipeline: ${pipeline_mode} (complexity: ${complexity_score})\nRoles: ${roles_needed.join(', ')}\n\nApprove?`, + header: "Pipeline Selection", + multiSelect: false, + options: [ + { label: "Approve", description: `Use ${pipeline_mode} pipeline` }, + { label: "Patch", description: "Simple fix: DEV -> VERIFY (2 tasks)" }, + { label: "Sprint", description: "Standard: DESIGN -> DEV -> VERIFY + REVIEW (4 tasks)" }, + { label: "Multi-Sprint", description: "Complex: Multiple sprint cycles with incremental delivery" } + ] + }] + }) +} +``` + +**Success Criteria**: +- Pipeline mode selected and confirmed +- Task analysis stored in session +- Interactive agents closed, results stored + +--- + +### Phase 1: Requirement -> CSV + Classification + +**Objective**: Build tasks.csv from selected pipeline mode with proper wave assignments. + +**Decomposition Rules**: + +| Pipeline | Tasks | Wave Structure | +|----------|-------|---------------| +| patch | DEV-001 -> VERIFY-001 | 2 waves, serial | +| sprint | DESIGN-001 -> DEV-001 -> VERIFY-001 + REVIEW-001 | 3 waves (VERIFY and REVIEW parallel in wave 3) | +| multi-sprint | DESIGN-001 -> DEV-001 -> VERIFY-001 + REVIEW-001 -> DEV-fix + REVIEW-002 | 4+ waves, with GC loop | + +**Pipeline Task Definitions**: + +#### Patch Pipeline (2 csv-wave tasks) + +| Task ID | Role | Wave | Deps | Description | +|---------|------|------|------|-------------| +| DEV-001 | developer | 1 | (none) | Implement fix: load target files, apply changes, validate syntax | +| VERIFY-001 | tester | 2 | DEV-001 | Verify fix: detect test framework, run targeted tests, check for regressions | + +#### Sprint Pipeline (4 csv-wave tasks) + +| Task ID | Role | Wave | Deps | Description | +|---------|------|------|------|-------------| +| DESIGN-001 | architect | 1 | (none) | Technical design: explore codebase, create component design, task breakdown | +| DEV-001 | developer | 2 | DESIGN-001 | Implement design: load design and task breakdown, implement in order, validate syntax | +| VERIFY-001 | tester | 3 | DEV-001 | Verify implementation: detect framework, run targeted tests, run regression suite | +| REVIEW-001 | reviewer | 3 | DEV-001 | Code review: load changes and design, review across correctness/completeness/maintainability/security, score quality | + +#### Multi-Sprint Pipeline (5+ csv-wave tasks + GC control) + +| Task ID | Role | Wave | Deps | Description | +|---------|------|------|------|-------------| +| DESIGN-001 | architect | 1 | (none) | Technical design and task breakdown for sprint 1 | +| DEV-001 | developer | 2 | DESIGN-001 | First implementation batch | +| VERIFY-001 | tester | 3 | DEV-001 | Test execution and fix cycle | +| REVIEW-001 | reviewer | 3 | DEV-001 | Code review with GC signal | +| GC-CHECK-001 | gc-controller | 4 | REVIEW-001 | GC decision: revision or convergence | + +Additional DEV-fix and REVIEW tasks created dynamically when GC controller decides REVISION. + +**Classification Rules**: + +All work tasks (design, development, testing, review) are `csv-wave`. GC loop control between reviewer and next dev-fix is `interactive` (post-wave, spawned by orchestrator to decide the GC outcome). + +**Wave Computation**: Kahn's BFS topological sort with depth tracking (csv-wave tasks only). + +**User Validation**: Display task breakdown with wave + exec_mode assignment (skip if AUTO_YES). + +**Success Criteria**: +- tasks.csv created with valid schema, wave, and exec_mode assignments +- No circular dependencies +- User approved (or AUTO_YES) + +--- + +### Phase 2: Wave Execution Engine (Extended) + +**Objective**: Execute tasks wave-by-wave with hybrid mechanism support and cross-wave context propagation. + +```javascript +const failedIds = new Set() +const skippedIds = new Set() +const MAX_GC_ROUNDS = 3 +let gcRound = 0 + +for (let wave = 1; wave <= maxWave; wave++) { + console.log(`\n## Wave ${wave}/${maxWave}\n`) + + // 1. Read current master CSV + const masterCsv = parseCsv(Read(`${sessionFolder}/tasks.csv`)) + + // 2. Separate csv-wave and interactive tasks for this wave + const waveTasks = masterCsv.filter(row => parseInt(row.wave) === wave) + const csvTasks = waveTasks.filter(t => t.exec_mode === 'csv-wave') + const interactiveTasks = waveTasks.filter(t => t.exec_mode === 'interactive') + + // 3. Skip tasks whose deps failed + const executableCsvTasks = [] + for (const task of csvTasks) { + const deps = task.deps.split(';').filter(Boolean) + if (deps.some(d => failedIds.has(d) || skippedIds.has(d))) { + skippedIds.add(task.id) + updateMasterCsvRow(sessionFolder, task.id, { + status: 'skipped', + error: 'Dependency failed or skipped' + }) + continue + } + executableCsvTasks.push(task) + } + + // 4. Build prev_context for each csv-wave task + for (const task of executableCsvTasks) { + const contextIds = task.context_from.split(';').filter(Boolean) + const prevFindings = contextIds + .map(id => { + const prevRow = masterCsv.find(r => r.id === id) + if (prevRow && prevRow.status === 'completed' && prevRow.findings) { + return `[Task ${id}: ${prevRow.title}] ${prevRow.findings}` + } + return null + }) + .filter(Boolean) + .join('\n') + task.prev_context = prevFindings || 'No previous context available' + } + + // 5. Write wave CSV and execute csv-wave tasks + if (executableCsvTasks.length > 0) { + const waveHeader = 'id,title,description,role,pipeline,sprint_num,gc_round,deps,context_from,exec_mode,wave,prev_context' + const waveRows = executableCsvTasks.map(t => + [t.id, t.title, t.description, t.role, t.pipeline, t.sprint_num, t.gc_round, t.deps, t.context_from, t.exec_mode, t.wave, t.prev_context] + .map(cell => `"${String(cell).replace(/"/g, '""')}"`) + .join(',') + ) + Write(`${sessionFolder}/wave-${wave}.csv`, [waveHeader, ...waveRows].join('\n')) + + const waveResult = spawn_agents_on_csv({ + csv_path: `${sessionFolder}/wave-${wave}.csv`, + id_column: "id", + instruction: Read(`.codex/skills/team-iterdev/instructions/agent-instruction.md`), + max_concurrency: maxConcurrency, + max_runtime_seconds: 900, + output_csv_path: `${sessionFolder}/wave-${wave}-results.csv`, + output_schema: { + type: "object", + properties: { + id: { type: "string" }, + status: { type: "string", enum: ["completed", "failed"] }, + findings: { type: "string" }, + review_score: { type: "string" }, + gc_signal: { type: "string" }, + error: { type: "string" } + }, + required: ["id", "status", "findings"] + } + }) + + // Merge results into master CSV + const waveResults = parseCsv(Read(`${sessionFolder}/wave-${wave}-results.csv`)) + for (const result of waveResults) { + updateMasterCsvRow(sessionFolder, result.id, { + status: result.status, + findings: result.findings || '', + review_score: result.review_score || '', + gc_signal: result.gc_signal || '', + error: result.error || '' + }) + if (result.status === 'failed') failedIds.add(result.id) + } + + Bash(`rm -f "${sessionFolder}/wave-${wave}.csv"`) + } + + // 6. Execute post-wave interactive tasks (GC controller) + for (const task of interactiveTasks) { + if (task.status !== 'pending') continue + const deps = task.deps.split(';').filter(Boolean) + if (deps.some(d => failedIds.has(d) || skippedIds.has(d))) { + skippedIds.add(task.id) + continue + } + + // Spawn GC controller agent + const gcAgent = spawn_agent({ + message: ` +## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS (Agent Execute) +1. **Read role definition**: .codex/skills/team-iterdev/agents/gc-controller.md (MUST read first) +2. Read: ${sessionFolder}/discoveries.ndjson (shared discoveries) + +--- + +Goal: Evaluate review severity and decide DEV-fix vs convergence +Session: ${sessionFolder} +GC Round: ${gcRound} +Max GC Rounds: ${MAX_GC_ROUNDS} + +### Context +Read the latest review file in ${sessionFolder}/review/ and check: +- review.critical_count > 0 OR review.score < 7 -> REVISION +- review.critical_count == 0 AND review.score >= 7 -> CONVERGE +If gcRound >= maxRounds -> CONVERGE (force convergence) +` + }) + + const gcResult = wait({ ids: [gcAgent], timeout_ms: 120000 }) + if (gcResult.timed_out) { + send_input({ id: gcAgent, message: "Please finalize your decision now." }) + wait({ ids: [gcAgent], timeout_ms: 60000 }) + } + close_agent({ id: gcAgent }) + + Write(`${sessionFolder}/interactive/${task.id}-result.json`, JSON.stringify({ + task_id: task.id, status: "completed", + gc_decision: gcDecision, gc_round: gcRound, + timestamp: getUtc8ISOString() + })) + + if (gcDecision === "CONVERGE") { + // Skip remaining GC tasks, mark fix tasks as skipped + } else { + gcRound++ + // Dynamically add DEV-fix and REVIEW tasks to master CSV for next waves + const fixWave = wave + 1 + const reviewWave = wave + 2 + appendMasterCsvRow(sessionFolder, { + id: `DEV-fix-${gcRound}`, title: `Fix review issues (round ${gcRound})`, + description: `Fix critical/high issues from REVIEW. Focus on review feedback only.`, + role: 'developer', pipeline: pipeline_mode, sprint_num: '1', + gc_round: String(gcRound), deps: task.id, context_from: `REVIEW-001`, + exec_mode: 'csv-wave', wave: String(fixWave), + status: 'pending', findings: '', review_score: '', gc_signal: '', error: '' + }) + appendMasterCsvRow(sessionFolder, { + id: `REVIEW-${gcRound + 1}`, title: `Re-review (round ${gcRound})`, + description: `Review fixes from DEV-fix-${gcRound}. Re-evaluate quality.`, + role: 'reviewer', pipeline: pipeline_mode, sprint_num: '1', + gc_round: String(gcRound), deps: `DEV-fix-${gcRound}`, context_from: `DEV-fix-${gcRound}`, + exec_mode: 'csv-wave', wave: String(reviewWave), + status: 'pending', findings: '', review_score: '', gc_signal: '', error: '' + }) + maxWave = Math.max(maxWave, reviewWave) + } + + updateMasterCsvRow(sessionFolder, task.id, { status: 'completed', findings: `GC decision: ${gcDecision}` }) + } +} +``` + +**Success Criteria**: +- All waves executed in order +- Both csv-wave and interactive tasks handled per wave +- Each wave's results merged into master CSV before next wave starts +- Dependent tasks skipped when predecessor failed +- discoveries.ndjson accumulated across all waves and mechanisms +- GC loop controlled with max 3 rounds + +--- + +### Phase 3: Post-Wave Interactive + +**Objective**: Handle any final GC loop convergence and multi-sprint transitions. + +If the pipeline is multi-sprint and the current sprint completed successfully: +1. Evaluate sprint metrics (velocity, review scores) +2. If more sprints needed, dynamically create next sprint tasks in master CSV +3. If sprint metrics are strong (review avg >= 8), consider downgrading next sprint to simpler pipeline + +If max GC rounds reached and issues remain, log to wisdom/issues.md and proceed. + +**Success Criteria**: +- Post-wave interactive processing complete +- Interactive agents closed, results stored + +--- + +### Phase 4: Results Aggregation + +**Objective**: Generate final results and human-readable report. + +```javascript +const masterCsv = Read(`${sessionFolder}/tasks.csv`) +Write(`${sessionFolder}/results.csv`, masterCsv) + +const tasks = parseCsv(masterCsv) +const completed = tasks.filter(t => t.status === 'completed') +const failed = tasks.filter(t => t.status === 'failed') +const skipped = tasks.filter(t => t.status === 'skipped') + +const contextContent = `# Team IterDev Report + +**Session**: ${sessionId} +**Requirement**: ${requirement} +**Pipeline**: ${pipeline_mode} +**Completed**: ${getUtc8ISOString()} + +--- + +## Summary + +| Metric | Count | +|--------|-------| +| Total Tasks | ${tasks.length} | +| Completed | ${completed.length} | +| Failed | ${failed.length} | +| Skipped | ${skipped.length} | +| GC Rounds | ${gcRound} | + +--- + +## Pipeline Execution + +${waveDetails} + +--- + +## Task Details + +${taskDetails} + +--- + +## Deliverables + +| Artifact | Path | +|----------|------| +| Design Document | ${sessionFolder}/design/design-001.md | +| Task Breakdown | ${sessionFolder}/design/task-breakdown.json | +| Dev Log | ${sessionFolder}/code/dev-log.md | +| Verification | ${sessionFolder}/verify/verify-001.json | +| Review Report | ${sessionFolder}/review/review-001.md | +| Wisdom | ${sessionFolder}/wisdom/ | +` + +Write(`${sessionFolder}/context.md`, contextContent) +``` + +If not AUTO_YES, offer completion actions: + +```javascript +if (!AUTO_YES) { + AskUserQuestion({ + questions: [{ + question: "IterDev pipeline complete. What would you like to do?", + header: "Completion", + multiSelect: false, + options: [ + { label: "Archive & Clean (Recommended)", description: "Archive session, generate final report" }, + { label: "Keep Active", description: "Keep session for follow-up or inspection" }, + { label: "Retry Failed", description: "Re-run failed tasks" } + ] + }] + }) +} +``` + +**Success Criteria**: +- results.csv exported (all tasks, both modes) +- context.md generated +- All interactive agents closed +- Summary displayed to user + +--- + +## Shared Discovery Board Protocol + +All agents across all waves share `discoveries.ndjson`. This enables cross-role knowledge sharing. + +**Discovery Types**: + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `design_decision` | `data.component` | `{component, approach, rationale, alternatives}` | Architecture decision | +| `implementation` | `data.file` | `{file, changes, pattern_used, notes}` | Code implementation detail | +| `test_result` | `data.test_suite` | `{test_suite, pass_rate, failures[], regressions}` | Test execution result | +| `review_finding` | `data.file_line` | `{file_line, severity, dimension, description, suggestion}` | Review finding | +| `convention` | `data.name` | `{name, description, example}` | Discovered project convention | +| `gc_decision` | `data.round` | `{round, signal, critical_count, score}` | GC loop decision | + +**Format**: NDJSON, each line is self-contained JSON: + +```jsonl +{"ts":"2026-03-08T10:00:00+08:00","worker":"DESIGN-001","type":"design_decision","data":{"component":"AuthModule","approach":"JWT with refresh tokens","rationale":"Stateless auth for microservices","alternatives":"Session-based, OAuth2"}} +{"ts":"2026-03-08T10:05:00+08:00","worker":"DEV-001","type":"implementation","data":{"file":"src/auth/jwt.ts","changes":"Added JWT middleware","pattern_used":"Express middleware pattern","notes":"Uses existing bcrypt dependency"}} +{"ts":"2026-03-08T10:10:00+08:00","worker":"REVIEW-001","type":"review_finding","data":{"file_line":"src/auth/jwt.ts:42","severity":"HIGH","dimension":"security","description":"Token expiry not validated","suggestion":"Add exp claim check"}} +``` + +**Protocol Rules**: +1. Read board before own work -- leverage existing context +2. Write discoveries immediately via `echo >>` -- don't batch +3. Deduplicate -- check existing entries by type + dedup key +4. Append-only -- never modify or delete existing lines + +--- + +## Consensus Severity Routing + +When the reviewer returns review results with severity-graded verdicts: + +| Severity | Action | +|----------|--------| +| HIGH | Trigger DEV-fix round (GC loop), max 3 rounds total | +| MEDIUM | Log warning, continue pipeline | +| LOW | Treat as review passed | + +**Constraints**: Max 3 GC rounds (fix cycles). If still HIGH after 3 rounds, force convergence and record in wisdom/issues.md. + +--- + +## Error Handling + +| Error | Resolution | +|-------|------------| +| Circular dependency | Detect in wave computation, abort with error message | +| CSV agent timeout | Mark as failed in results, continue with wave | +| CSV agent failed | Mark as failed, skip dependent tasks in later waves | +| Interactive agent timeout | Urge convergence via send_input, then close if still timed out | +| Interactive agent failed | Mark as failed, skip dependents | +| All agents in wave failed | Log error, offer retry or abort | +| CSV parse error | Validate CSV format before execution, show line number | +| discoveries.ndjson corrupt | Ignore malformed lines, continue with valid entries | +| GC loop exceeds 3 rounds | Force convergence, record in wisdom/issues.md | +| Sprint velocity drops below 50% | Report to user, suggest scope reduction | +| Task ledger corrupted | Rebuild from tasks.csv state | +| Continue mode: no session found | List available sessions, prompt user to select | + +--- + +## Core Rules + +1. **Start Immediately**: First action is session initialization, then Phase 0/1 +2. **Wave Order is Sacred**: Never execute wave N before wave N-1 completes and results are merged +3. **CSV is Source of Truth**: Master tasks.csv holds all state (both csv-wave and interactive) +4. **CSV First**: Default to csv-wave for tasks; only use interactive when interaction pattern requires it +5. **Context Propagation**: prev_context built from master CSV, not from memory +6. **Discovery Board is Append-Only**: Never clear, modify, or recreate discoveries.ndjson -- both mechanisms share it +7. **Skip on Failure**: If a dependency failed, skip the dependent task (regardless of mechanism) +8. **Lifecycle Balance**: Every spawn_agent MUST have a matching close_agent +9. **Cleanup Temp Files**: Remove wave-{N}.csv after results are merged +10. **DO NOT STOP**: Continuous execution until all waves complete or all remaining tasks are skipped diff --git a/.codex/skills/team-iterdev/instructions/agent-instruction.md b/.codex/skills/team-iterdev/instructions/agent-instruction.md new file mode 100644 index 00000000..c3ff1ec7 --- /dev/null +++ b/.codex/skills/team-iterdev/instructions/agent-instruction.md @@ -0,0 +1,118 @@ +## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS +1. Read shared discoveries: .workflow/.csv-wave/{session-id}/discoveries.ndjson (if exists, skip if not) +2. Read project context: .workflow/project-tech.json (if exists) + +--- + +## Your Task + +**Task ID**: {id} +**Title**: {title} +**Role**: {role} +**Description**: {description} +**Pipeline**: {pipeline} +**Sprint**: {sprint_num} +**GC Round**: {gc_round} + +### Previous Tasks' Findings (Context) +{prev_context} + +--- + +## Execution Protocol + +1. **Read discoveries**: Load shared discoveries from the session's discoveries.ndjson for cross-task context +2. **Use context**: Apply previous tasks' findings from prev_context above +3. **Execute by role**: + +### Role: architect (DESIGN-* tasks) +- Explore codebase for existing patterns, module structure, and dependencies +- Use mcp__ace-tool__search_context for semantic discovery when available +- Create design document covering: + - Architecture decision: approach, rationale, alternatives considered + - Component design: responsibility, dependencies, files to modify, complexity + - Task breakdown: file changes, estimated complexity, dependencies, acceptance criteria + - Integration points and risks with mitigations +- Write design document to session design/ directory +- Write task breakdown JSON (array of tasks with id, title, files, complexity, dependencies, acceptance_criteria) +- Record architecture decisions in wisdom/decisions.md via discovery board + +### Role: developer (DEV-* tasks) +- **Normal task** (gc_round = 0): + - Read design document and task breakdown from context + - Implement tasks following the execution order from breakdown + - Use Edit or Write for file modifications + - Validate syntax after each major change (tsc --noEmit or equivalent) + - Auto-fix if validation fails (max 2 attempts) +- **Fix task** (gc_round > 0): + - Read review feedback from prev_context + - Focus on critical/high severity issues ONLY + - Do NOT change code that was not flagged in review + - Fix critical issues first, then high, then medium + - Maintain existing code style and patterns +- Write dev log to session code/ directory +- Record implementation details via discovery board + +### Role: tester (VERIFY-* tasks) +- Detect test framework from project files (package.json, pytest.ini, etc.) +- Get list of changed files from dev log in prev_context +- Run targeted tests for changed files +- Run regression test suite +- If tests fail: attempt fix (max 3 iterations using available tools) +- Write verification results JSON to session verify/ directory +- Record test results via discovery board +- Report pass rate in findings + +### Role: reviewer (REVIEW-* tasks) +- Read changed files from dev log in prev_context +- Read design document for requirements alignment +- Review across 4 weighted dimensions: + - Correctness (30%): Logic correctness, boundary handling, edge cases + - Completeness (25%): Coverage of design requirements + - Maintainability (25%): Readability, code style, DRY, naming + - Security (20%): Vulnerabilities, input validation, auth issues +- Assign severity per finding: CRITICAL / HIGH / MEDIUM / LOW +- Include file:line references for each finding +- Calculate weighted quality score (1-10) +- Determine GC signal: + - critical_count > 0 OR score < 7 -> `REVISION_NEEDED` + - critical_count == 0 AND score >= 7 -> `CONVERGED` +- Write review report to session review/ directory +- Record review findings via discovery board + +4. **Share discoveries**: Append exploration findings to shared board: + ```bash + echo '{"ts":"","worker":"{id}","type":"","data":{...}}' >> .workflow/.csv-wave/{session-id}/discoveries.ndjson + ``` + + Discovery types to share: + - `design_decision`: {component, approach, rationale, alternatives} -- architecture decision + - `implementation`: {file, changes, pattern_used, notes} -- code implementation detail + - `test_result`: {test_suite, pass_rate, failures[], regressions} -- test execution result + - `review_finding`: {file_line, severity, dimension, description, suggestion} -- review finding + - `convention`: {name, description, example} -- discovered project convention + +5. **Report result**: Return JSON via report_agent_job_result + +--- + +## Output (report_agent_job_result) + +Return JSON: +{ + "id": "{id}", + "status": "completed" | "failed", + "findings": "Key discoveries and implementation notes (max 500 chars)", + "review_score": "Quality score 1-10 (reviewer only, empty for others)", + "gc_signal": "REVISION_NEEDED | CONVERGED (reviewer only, empty for others)", + "error": "" +} + +**Role-specific findings guidance**: +- **architect**: List component count, task count, key decisions. Example: "Designed 3 components (AuthModule, TokenService, Middleware). Created 5 implementation tasks. Key decision: JWT with refresh token rotation." +- **developer**: List changed file count, syntax status, key changes. Example: "Modified 5 files. All syntax clean. Key changes: JWT middleware, token validation, auth routes." +- **developer (fix)**: List fixed issue count, remaining issues. Example: "Fixed 2 HIGH issues (token expiry, input validation). 0 remaining critical/high issues." +- **tester**: List pass rate, test count, regression status. Example: "Pass rate: 96% (24/25 tests). 1 edge case failure (token-expiry). No regressions detected." +- **reviewer**: List score, issue counts, verdict. Example: "Score: 7.5/10. Findings: 0 CRITICAL, 1 HIGH, 3 MEDIUM, 2 LOW. GC signal: REVISION_NEEDED." diff --git a/.codex/skills/team-iterdev/schemas/tasks-schema.md b/.codex/skills/team-iterdev/schemas/tasks-schema.md new file mode 100644 index 00000000..7f799de9 --- /dev/null +++ b/.codex/skills/team-iterdev/schemas/tasks-schema.md @@ -0,0 +1,174 @@ +# Team IterDev -- CSV Schema + +## Master CSV: tasks.csv + +### Column Definitions + +#### Input Columns (Set by Decomposer) + +| Column | Type | Required | Description | Example | +|--------|------|----------|-------------|---------| +| `id` | string | Yes | Unique task identifier | `"DEV-001"` | +| `title` | string | Yes | Short task title | `"Implement design"` | +| `description` | string | Yes | Detailed task description (self-contained) | `"Load design document, implement tasks in execution order..."` | +| `role` | string | Yes | Worker role: architect, developer, tester, reviewer | `"developer"` | +| `pipeline` | string | Yes | Pipeline mode: patch, sprint, multi-sprint | `"sprint"` | +| `sprint_num` | integer | Yes | Sprint number (1-based, for multi-sprint tracking) | `"1"` | +| `gc_round` | integer | Yes | Generator-Critic round number (0 = initial, 1+ = fix round) | `"0"` | +| `deps` | string | No | Semicolon-separated dependency task IDs | `"DESIGN-001"` | +| `context_from` | string | No | Semicolon-separated task IDs for context | `"DESIGN-001"` | +| `exec_mode` | enum | Yes | Execution mechanism: `csv-wave` or `interactive` | `"csv-wave"` | + +#### Computed Columns (Set by Wave Engine) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `wave` | integer | Wave number (1-based, from topological sort) | `2` | +| `prev_context` | string | Aggregated findings from context_from tasks (per-wave CSV only) | `"[Task DESIGN-001] Created design with 3 components..."` | + +#### Output Columns (Set by Agent) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `status` | enum | `pending` -> `completed` / `failed` / `skipped` | `"completed"` | +| `findings` | string | Key discoveries (max 500 chars) | `"Implemented 5 files, all syntax clean..."` | +| `review_score` | string | Quality score 1-10 (reviewer only, empty for others) | `"8"` | +| `gc_signal` | string | `REVISION_NEEDED` or `CONVERGED` (reviewer only) | `"CONVERGED"` | +| `error` | string | Error message if failed | `""` | + +--- + +### exec_mode Values + +| Value | Mechanism | Description | +|-------|-----------|-------------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot batch execution within wave | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round individual execution | + +Interactive tasks appear in master CSV for dependency tracking but are NOT included in wave-{N}.csv files. + +--- + +### Example Data + +```csv +id,title,description,role,pipeline,sprint_num,gc_round,deps,context_from,exec_mode,wave,status,findings,review_score,gc_signal,error +"DESIGN-001","Technical design and task breakdown","Explore codebase for patterns and dependencies. Create component design with integration points. Break into implementable tasks with acceptance criteria.","architect","sprint","1","0","","","csv-wave","1","pending","","","","" +"DEV-001","Implement design","Load design document and task breakdown. Implement tasks in execution order. Validate syntax after each change. Write dev log.","developer","sprint","1","0","DESIGN-001","DESIGN-001","csv-wave","2","pending","","","","" +"VERIFY-001","Verify implementation","Detect test framework. Run targeted tests for changed files. Run regression test suite. Report pass rate.","tester","sprint","1","0","DEV-001","DEV-001","csv-wave","3","pending","","","","" +"REVIEW-001","Code review","Load changed files and design. Review across correctness, completeness, maintainability, security. Score quality 1-10. Issue verdict.","reviewer","sprint","1","0","DEV-001","DEV-001","csv-wave","3","pending","","","","" +"GC-CHECK-001","GC loop decision","Evaluate review severity. If critical_count > 0 or score < 7: REVISION. Else: CONVERGE.","gc-controller","sprint","1","1","REVIEW-001","REVIEW-001","interactive","4","pending","","","","" +"DEV-fix-1","Fix review issues (round 1)","Fix critical and high issues from REVIEW-001. Focus on review feedback only. Do NOT change unflagged code.","developer","sprint","1","1","GC-CHECK-001","REVIEW-001","csv-wave","5","pending","","","","" +"REVIEW-002","Re-review (round 1)","Review fixes from DEV-fix-1. Re-evaluate quality. Check if critical issues are resolved.","reviewer","sprint","1","1","DEV-fix-1","DEV-fix-1","csv-wave","6","pending","","","","" +``` + +--- + +### Column Lifecycle + +``` +Decomposer (Phase 1) Wave Engine (Phase 2) Agent (Execution) +--------------------- -------------------- ----------------- +id -----------> id ----------> id +title -----------> title ----------> (reads) +description -----------> description ----------> (reads) +role -----------> role ----------> (reads) +pipeline -----------> pipeline ----------> (reads) +sprint_num -----------> sprint_num ----------> (reads) +gc_round -----------> gc_round ----------> (reads) +deps -----------> deps ----------> (reads) +context_from-----------> context_from----------> (reads) +exec_mode -----------> exec_mode ----------> (reads) + wave ----------> (reads) + prev_context ----------> (reads) + status + findings + review_score + gc_signal + error +``` + +--- + +## Output Schema (JSON) + +Agent output via `report_agent_job_result` (csv-wave tasks): + +```json +{ + "id": "DEV-001", + "status": "completed", + "findings": "Implemented 5 files following design. All syntax checks pass. Key changes: src/auth/jwt.ts, src/middleware/auth.ts.", + "review_score": "", + "gc_signal": "", + "error": "" +} +``` + +Reviewer-specific output: + +```json +{ + "id": "REVIEW-001", + "status": "completed", + "findings": "Reviewed 5 files. Correctness: 8/10, Completeness: 9/10, Maintainability: 7/10, Security: 6/10. 1 HIGH issue (missing token expiry check).", + "review_score": "7.5", + "gc_signal": "REVISION_NEEDED", + "error": "" +} +``` + +Interactive tasks output via structured text or JSON written to `interactive/{id}-result.json`. + +--- + +## Discovery Types + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `design_decision` | `data.component` | `{component, approach, rationale, alternatives}` | Architecture decision | +| `implementation` | `data.file` | `{file, changes, pattern_used, notes}` | Code implementation detail | +| `test_result` | `data.test_suite` | `{test_suite, pass_rate, failures[], regressions}` | Test execution result | +| `review_finding` | `data.file_line` | `{file_line, severity, dimension, description, suggestion}` | Review finding | +| `convention` | `data.name` | `{name, description, example}` | Discovered project convention | +| `gc_decision` | `data.round` | `{round, signal, critical_count, score}` | GC loop decision record | + +### Discovery NDJSON Format + +```jsonl +{"ts":"2026-03-08T10:00:00+08:00","worker":"DESIGN-001","type":"design_decision","data":{"component":"AuthModule","approach":"JWT with refresh tokens","rationale":"Stateless auth","alternatives":"Session-based, OAuth2"}} +{"ts":"2026-03-08T10:05:00+08:00","worker":"DEV-001","type":"implementation","data":{"file":"src/auth/jwt.ts","changes":"Added JWT middleware with token validation","pattern_used":"Express middleware","notes":"Reuses existing bcrypt"}} +{"ts":"2026-03-08T10:10:00+08:00","worker":"VERIFY-001","type":"test_result","data":{"test_suite":"auth","pass_rate":0.96,"failures":["token-expiry-edge-case"],"regressions":false}} +{"ts":"2026-03-08T10:15:00+08:00","worker":"REVIEW-001","type":"review_finding","data":{"file_line":"src/auth/jwt.ts:42","severity":"HIGH","dimension":"security","description":"Token expiry not validated","suggestion":"Add exp claim check in validateToken()"}} +``` + +> Both csv-wave and interactive agents read/write the same discoveries.ndjson file. + +--- + +## Cross-Mechanism Context Flow + +| Source | Target | Mechanism | +|--------|--------|-----------| +| CSV task findings | Interactive task | Injected via spawn message or send_input | +| Interactive task result | CSV task prev_context | Read from interactive/{id}-result.json | +| Any agent discovery | Any agent | Shared via discoveries.ndjson | + +--- + +## Validation Rules + +| Rule | Check | Error | +|------|-------|-------| +| Unique IDs | No duplicate `id` values | "Duplicate task ID: {id}" | +| Valid deps | All dep IDs exist in tasks | "Unknown dependency: {dep_id}" | +| No self-deps | Task cannot depend on itself | "Self-dependency: {id}" | +| No circular deps | Topological sort completes | "Circular dependency detected involving: {ids}" | +| context_from valid | All context IDs exist and in earlier waves | "Invalid context_from: {id}" | +| exec_mode valid | Value is `csv-wave` or `interactive` | "Invalid exec_mode: {value}" | +| Description non-empty | Every task has description | "Empty description for task: {id}" | +| Status enum | status in {pending, completed, failed, skipped} | "Invalid status: {status}" | +| Valid role | role in {architect, developer, tester, reviewer, gc-controller} | "Invalid role: {role}" | +| GC round non-negative | gc_round >= 0 | "Invalid gc_round: {value}" | +| Valid pipeline | pipeline in {patch, sprint, multi-sprint} | "Invalid pipeline: {value}" | +| Cross-mechanism deps | Interactive<->CSV deps resolve correctly | "Cross-mechanism dependency unresolvable: {id}" | diff --git a/.codex/skills/team-lifecycle-v4/SKILL.md b/.codex/skills/team-lifecycle-v4/SKILL.md new file mode 100644 index 00000000..85dedf90 --- /dev/null +++ b/.codex/skills/team-lifecycle-v4/SKILL.md @@ -0,0 +1,737 @@ +--- +name: team-lifecycle-v4 +description: Full lifecycle team skill — specification, planning, implementation, testing, and review. Supports spec-only, impl-only, full-lifecycle, and frontend pipelines with optional supervisor checkpoints. +argument-hint: "[-y|--yes] [-c|--concurrency N] [--continue] \"task description\"" +allowed-tools: spawn_agents_on_csv, spawn_agent, wait, send_input, close_agent, Read, Write, Edit, Bash, Glob, Grep, AskUserQuestion +--- + +## Auto Mode + +When `--yes` or `-y`: Auto-confirm task decomposition, skip interactive validation, use defaults. + +# Team Lifecycle v4 + +## Usage + +```bash +$team-lifecycle-v4 "Design and implement a user authentication system" +$team-lifecycle-v4 -c 4 "Full lifecycle: build a REST API for order management" +$team-lifecycle-v4 -y "Implement dark mode toggle in settings page" +$team-lifecycle-v4 --continue "tlv4-auth-system-20260308" +``` + +**Flags**: +- `-y, --yes`: Skip all confirmations (auto mode) +- `-c, --concurrency N`: Max concurrent agents within each wave (default: 3) +- `--continue`: Resume existing session +- `--no-supervision`: Skip CHECKPOINT tasks (supervisor opt-out) + +**Output Directory**: `.workflow/.csv-wave/{session-id}/` +**Core Output**: `tasks.csv` (master state) + `results.csv` (final) + `discoveries.ndjson` (shared exploration) + `context.md` (human-readable report) + +--- + +## Overview + +Full lifecycle software development orchestration: requirement analysis, specification writing (product brief, requirements, architecture, epics), quality gating, implementation planning, code implementation, testing, and code review. Supports multiple pipeline modes with optional supervisor checkpoints at phase transition points. + +**Execution Model**: Hybrid -- CSV wave pipeline (primary) + individual agent spawn (secondary for supervisor checkpoints and requirement clarification) + +``` ++-------------------------------------------------------------------------+ +| TEAM LIFECYCLE v4 WORKFLOW | ++--------------------------------------------------------------------------+ +| | +| Phase 0: Pre-Wave Interactive | +| +-- Requirement clarification + pipeline selection | +| +-- Complexity scoring + signal detection | +| +-- Output: refined requirements for decomposition | +| | +| Phase 1: Requirement -> CSV + Classification | +| +-- Parse task into lifecycle tasks per selected pipeline | +| +-- Assign roles: analyst, writer, planner, executor, tester, | +| | reviewer, supervisor | +| +-- Classify tasks: csv-wave | interactive (exec_mode) | +| +-- Compute dependency waves (topological sort -> depth grouping) | +| +-- Generate tasks.csv with wave + exec_mode columns | +| +-- User validates task breakdown (skip if -y) | +| | +| Phase 2: Wave Execution Engine (Extended) | +| +-- For each wave (1..N): | +| | +-- Execute pre-wave interactive tasks (if any) | +| | +-- Build wave CSV (filter csv-wave tasks for this wave) | +| | +-- Inject previous findings into prev_context column | +| | +-- spawn_agents_on_csv(wave CSV) | +| | +-- Execute post-wave interactive tasks (if any) | +| | +-- Handle CHECKPOINT tasks via interactive supervisor | +| | +-- Merge all results into master tasks.csv | +| | +-- Check: any failed? -> skip dependents | +| +-- discoveries.ndjson shared across all modes (append-only) | +| | +| Phase 3: Post-Wave Interactive | +| +-- Quality gate evaluation (QUALITY-001) | +| +-- User approval checkpoint before implementation | +| +-- Complexity-based implementation routing | +| | +| Phase 4: Results Aggregation | +| +-- Export final results.csv | +| +-- Generate context.md with all findings | +| +-- Display summary: completed/failed/skipped per wave | +| +-- Offer: view results | retry failed | done | +| | ++--------------------------------------------------------------------------+ +``` + +--- + +## Task Classification Rules + +Each task is classified by `exec_mode`: + +| exec_mode | Mechanism | Criteria | +|-----------|-----------|----------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot, structured I/O, no multi-round interaction | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round, clarification, checkpoint evaluation | + +**Classification Decision**: + +| Task Property | Classification | +|---------------|---------------| +| Research / analysis (RESEARCH-*) | `csv-wave` | +| Document generation (DRAFT-*) | `csv-wave` | +| Implementation planning (PLAN-*) | `csv-wave` | +| Code implementation (IMPL-*) | `csv-wave` | +| Test execution (TEST-*) | `csv-wave` | +| Code review (REVIEW-*) | `csv-wave` | +| Quality gate scoring (QUALITY-*) | `csv-wave` | +| Supervisor checkpoints (CHECKPOINT-*) | `interactive` | +| Requirement clarification (Phase 0) | `interactive` | +| Quality gate user approval | `interactive` | + +--- + +## CSV Schema + +### tasks.csv (Master State) + +```csv +id,title,description,role,pipeline_phase,deps,context_from,exec_mode,wave,status,findings,quality_score,supervision_verdict,error +"RESEARCH-001","Domain research","Explore domain, extract structured context, identify constraints","analyst","research","","","csv-wave","1","pending","","","","" +"DRAFT-001","Product brief","Generate product brief from research context","writer","product-brief","RESEARCH-001","RESEARCH-001","csv-wave","2","pending","","","","" +"CHECKPOINT-001","Brief-PRD consistency","Verify terminology alignment and scope consistency between brief and PRD","supervisor","checkpoint","DRAFT-002","DRAFT-001;DRAFT-002","interactive","4","pending","","","","" +``` + +**Columns**: + +| Column | Phase | Description | +|--------|-------|-------------| +| `id` | Input | Unique task identifier (string) | +| `title` | Input | Short task title | +| `description` | Input | Detailed task description | +| `role` | Input | Worker role: analyst, writer, planner, executor, tester, reviewer, supervisor | +| `pipeline_phase` | Input | Lifecycle phase: research, product-brief, requirements, architecture, epics, checkpoint, readiness, planning, implementation, validation, review | +| `deps` | Input | Semicolon-separated dependency task IDs | +| `context_from` | Input | Semicolon-separated task IDs whose findings this task needs | +| `exec_mode` | Input | `csv-wave` or `interactive` | +| `wave` | Computed | Wave number (computed by topological sort, 1-based) | +| `status` | Output | `pending` -> `completed` / `failed` / `skipped` | +| `findings` | Output | Key discoveries or implementation notes (max 500 chars) | +| `quality_score` | Output | Quality gate score (0-100) for QUALITY-* tasks | +| `supervision_verdict` | Output | `pass` / `warn` / `block` for CHECKPOINT-* tasks | +| `error` | Output | Error message if failed (empty if success) | + +### Per-Wave CSV (Temporary) + +Each wave generates a temporary `wave-{N}.csv` with extra `prev_context` column (csv-wave tasks only). + +--- + +## Agent Registry (Interactive Agents) + +| Agent | Role File | Pattern | Responsibility | Position | +|-------|-----------|---------|----------------|----------| +| requirement-clarifier | agents/requirement-clarifier.md | 2.3 (wait-respond) | Parse task, detect signals, select pipeline mode | standalone (Phase 0) | +| supervisor | agents/supervisor.md | 2.3 (wait-respond) | Verify cross-artifact consistency at phase transitions | post-wave (after checkpoint dependencies complete) | +| quality-gate | agents/quality-gate.md | 2.3 (wait-respond) | Evaluate quality and present user approval | post-wave (after QUALITY-001 completes) | + +> **COMPACT PROTECTION**: Agent files are execution documents. When context compression occurs, **you MUST immediately `Read` the corresponding agent.md** to reload. + +--- + +## Output Artifacts + +| File | Purpose | Lifecycle | +|------|---------|-----------| +| `tasks.csv` | Master state -- all tasks with status/findings | Updated after each wave | +| `wave-{N}.csv` | Per-wave input (temporary, csv-wave tasks only) | Created before wave, deleted after | +| `results.csv` | Final export of all task results | Created in Phase 4 | +| `discoveries.ndjson` | Shared exploration board (all agents, both modes) | Append-only, carries across waves | +| `context.md` | Human-readable execution report | Created in Phase 4 | +| `interactive/{id}-result.json` | Results from interactive tasks | Created per interactive task | + +--- + +## Session Structure + +``` +.workflow/.csv-wave/{session-id}/ ++-- tasks.csv # Master state (all tasks, both modes) ++-- results.csv # Final results export ++-- discoveries.ndjson # Shared discovery board (all agents) ++-- context.md # Human-readable report ++-- wave-{N}.csv # Temporary per-wave input (csv-wave only) ++-- spec/ # Specification artifacts +| +-- spec-config.json +| +-- discovery-context.json +| +-- product-brief.md +| +-- requirements/ +| +-- architecture.md +| +-- epics.md ++-- plan/ # Implementation plan +| +-- plan.json +| +-- .task/TASK-*.json ++-- artifacts/ # Review and checkpoint reports +| +-- CHECKPOINT-*-report.md +| +-- review-report.md ++-- wisdom/ # Cross-task knowledge ++-- explorations/ # Shared exploration cache ++-- interactive/ # Interactive task artifacts + +-- {id}-result.json +``` + +--- + +## Implementation + +### Session Initialization + +```javascript +const getUtc8ISOString = () => new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString() + +// Parse flags +const AUTO_YES = $ARGUMENTS.includes('--yes') || $ARGUMENTS.includes('-y') +const continueMode = $ARGUMENTS.includes('--continue') +const noSupervision = $ARGUMENTS.includes('--no-supervision') +const concurrencyMatch = $ARGUMENTS.match(/(?:--concurrency|-c)\s+(\d+)/) +const maxConcurrency = concurrencyMatch ? parseInt(concurrencyMatch[1]) : 3 + +// Clean requirement text (remove flags) +const requirement = $ARGUMENTS + .replace(/--yes|-y|--continue|--no-supervision|--concurrency\s+\d+|-c\s+\d+/g, '') + .trim() + +const slug = requirement.toLowerCase() + .replace(/[^a-z0-9\u4e00-\u9fa5]+/g, '-') + .substring(0, 40) +const dateStr = getUtc8ISOString().substring(0, 10).replace(/-/g, '') +let sessionId = `tlv4-${slug}-${dateStr}` +let sessionFolder = `.workflow/.csv-wave/${sessionId}` + +// Continue mode: find existing session +if (continueMode) { + const existing = Bash(`ls -dt .workflow/.csv-wave/tlv4-* 2>/dev/null | head -1`).trim() + if (existing) { + sessionId = existing.split('/').pop() + sessionFolder = existing + // Read existing tasks.csv, find incomplete waves, resume from Phase 2 + } +} + +Bash(`mkdir -p ${sessionFolder}/{spec,plan,plan/.task,artifacts,wisdom,explorations,interactive}`) +``` + +--- + +### Phase 0: Pre-Wave Interactive + +**Objective**: Clarify requirement, detect capabilities, select pipeline mode. + +**Execution**: + +```javascript +const clarifier = spawn_agent({ + message: ` +## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS (Agent Execute) +1. **Read role definition**: .codex/skills/team-lifecycle-v4/agents/requirement-clarifier.md (MUST read first) +2. Read: .workflow/project-tech.json (if exists) + +--- + +Goal: Analyze task requirement and select appropriate pipeline +Requirement: ${requirement} + +### Task +1. Parse task description for capability signals: + - spec/design/document/requirements -> spec-only + - implement/build/fix/code -> impl-only + - full/lifecycle/end-to-end -> full-lifecycle + - frontend/UI/react/vue -> fe-only or fullstack +2. Score complexity (per capability +1, cross-domain +2, parallel tracks +1, serial depth >3 +1) +3. Return structured result with pipeline_type, capabilities, complexity +` +}) + +const clarifierResult = wait({ ids: [clarifier], timeout_ms: 120000 }) +if (clarifierResult.timed_out) { + send_input({ id: clarifier, message: "Please finalize and output current findings." }) + wait({ ids: [clarifier], timeout_ms: 60000 }) +} +close_agent({ id: clarifier }) + +Write(`${sessionFolder}/interactive/requirement-clarifier-result.json`, JSON.stringify({ + task_id: "requirement-clarification", + status: "completed", + pipeline_type: parsedPipelineType, + capabilities: parsedCapabilities, + complexity: parsedComplexity, + timestamp: getUtc8ISOString() +})) +``` + +If not AUTO_YES, confirm pipeline selection: + +```javascript +if (!AUTO_YES) { + const answer = AskUserQuestion({ + questions: [{ + question: `Requirement: "${requirement}"\nDetected pipeline: ${pipeline_type} (complexity: ${complexity.level})\nRoles: ${capabilities.map(c => c.name).join(', ')}\n\nApprove?`, + header: "Pipeline Selection", + multiSelect: false, + options: [ + { label: "Approve", description: `Use ${pipeline_type} pipeline` }, + { label: "Spec Only", description: "Research -> draft specs -> quality gate" }, + { label: "Impl Only", description: "Plan -> implement -> test + review" }, + { label: "Full Lifecycle", description: "Spec pipeline + implementation pipeline" } + ] + }] + }) +} +``` + +**Success Criteria**: +- Refined requirements available for Phase 1 decomposition +- Interactive agents closed, results stored + +--- + +### Phase 1: Requirement -> CSV + Classification + +**Objective**: Build tasks.csv from selected pipeline mode with proper wave assignments. + +**Decomposition Rules**: + +| Pipeline | Tasks | Wave Structure | +|----------|-------|---------------| +| spec-only | RESEARCH-001 -> DRAFT-001 -> DRAFT-002 -> [CHECKPOINT-001] -> DRAFT-003 -> DRAFT-004 -> [CHECKPOINT-002] -> QUALITY-001 | 8 waves (6 csv + 2 interactive checkpoints) | +| impl-only | PLAN-001 -> [CHECKPOINT-003] -> IMPL-001 -> TEST-001 + REVIEW-001 | 4 waves (3 csv + 1 interactive) | +| full-lifecycle | spec-only pipeline + impl-only pipeline (PLAN blocked by QUALITY-001) | 12 waves | + +**Pipeline Task Definitions**: + +#### Spec-Only Pipeline + +| Task ID | Role | Wave | Deps | exec_mode | Description | +|---------|------|------|------|-----------|-------------| +| RESEARCH-001 | analyst | 1 | (none) | csv-wave | Research domain, extract structured context | +| DRAFT-001 | writer | 2 | RESEARCH-001 | csv-wave | Generate product brief | +| DRAFT-002 | writer | 3 | DRAFT-001 | csv-wave | Generate requirements PRD | +| CHECKPOINT-001 | supervisor | 4 | DRAFT-002 | interactive | Brief-PRD consistency check | +| DRAFT-003 | writer | 5 | CHECKPOINT-001 | csv-wave | Generate architecture design | +| DRAFT-004 | writer | 6 | DRAFT-003 | csv-wave | Generate epics and stories | +| CHECKPOINT-002 | supervisor | 7 | DRAFT-004 | interactive | Full spec consistency check | +| QUALITY-001 | reviewer | 8 | CHECKPOINT-002 | csv-wave | Quality gate scoring | + +#### Impl-Only Pipeline + +| Task ID | Role | Wave | Deps | exec_mode | Description | +|---------|------|------|------|-----------|-------------| +| PLAN-001 | planner | 1 | (none) | csv-wave | Break down into implementation steps | +| CHECKPOINT-003 | supervisor | 2 | PLAN-001 | interactive | Plan-input alignment check | +| IMPL-001 | executor | 3 | CHECKPOINT-003 | csv-wave | Execute implementation plan | +| TEST-001 | tester | 4 | IMPL-001 | csv-wave | Run tests, fix failures | +| REVIEW-001 | reviewer | 4 | IMPL-001 | csv-wave | Code review | + +When `--no-supervision` is set, skip all CHECKPOINT-* tasks entirely, adjust wave numbers and dependencies accordingly (e.g., DRAFT-003 depends directly on DRAFT-002). + +**Classification Rules**: + +All lifecycle work tasks (research, drafting, planning, implementation, testing, review, quality) are `csv-wave`. Supervisor checkpoints are `interactive` (post-wave, spawned by orchestrator to verify cross-artifact consistency). Quality gate user approval is `interactive`. + +**Wave Computation**: Kahn's BFS topological sort with depth tracking (csv-wave tasks only). + +**User Validation**: Display task breakdown with wave + exec_mode assignment (skip if AUTO_YES). + +**Success Criteria**: +- tasks.csv created with valid schema, wave, and exec_mode assignments +- No circular dependencies +- User approved (or AUTO_YES) + +--- + +### Phase 2: Wave Execution Engine (Extended) + +**Objective**: Execute tasks wave-by-wave with hybrid mechanism support and cross-wave context propagation. + +```javascript +const failedIds = new Set() +const skippedIds = new Set() + +for (let wave = 1; wave <= maxWave; wave++) { + console.log(`\n## Wave ${wave}/${maxWave}\n`) + + // 1. Read current master CSV + const masterCsv = parseCsv(Read(`${sessionFolder}/tasks.csv`)) + + // 2. Separate csv-wave and interactive tasks for this wave + const waveTasks = masterCsv.filter(row => parseInt(row.wave) === wave) + const csvTasks = waveTasks.filter(t => t.exec_mode === 'csv-wave') + const interactiveTasks = waveTasks.filter(t => t.exec_mode === 'interactive') + + // 3. Skip tasks whose deps failed + const executableCsvTasks = [] + for (const task of csvTasks) { + const deps = task.deps.split(';').filter(Boolean) + if (deps.some(d => failedIds.has(d) || skippedIds.has(d))) { + skippedIds.add(task.id) + updateMasterCsvRow(sessionFolder, task.id, { + status: 'skipped', error: 'Dependency failed or skipped' + }) + continue + } + executableCsvTasks.push(task) + } + + // 4. Build prev_context for each csv-wave task + for (const task of executableCsvTasks) { + const contextIds = task.context_from.split(';').filter(Boolean) + const prevFindings = contextIds + .map(id => { + const prevRow = masterCsv.find(r => r.id === id) + if (prevRow && prevRow.status === 'completed' && prevRow.findings) { + return `[Task ${id}: ${prevRow.title}] ${prevRow.findings}` + } + // Check interactive results + try { + const interactiveResult = JSON.parse(Read(`${sessionFolder}/interactive/${id}-result.json`)) + return `[Task ${id}] ${JSON.stringify(interactiveResult.key_findings || interactiveResult.findings || '')}` + } catch { return null } + }) + .filter(Boolean) + .join('\n') + task.prev_context = prevFindings || 'No previous context available' + } + + // 5. Write wave CSV and execute csv-wave tasks + if (executableCsvTasks.length > 0) { + const waveHeader = 'id,title,description,role,pipeline_phase,deps,context_from,exec_mode,wave,prev_context' + const waveRows = executableCsvTasks.map(t => + [t.id, t.title, t.description, t.role, t.pipeline_phase, t.deps, t.context_from, t.exec_mode, t.wave, t.prev_context] + .map(cell => `"${String(cell).replace(/"/g, '""')}"`) + .join(',') + ) + Write(`${sessionFolder}/wave-${wave}.csv`, [waveHeader, ...waveRows].join('\n')) + + const waveResult = spawn_agents_on_csv({ + csv_path: `${sessionFolder}/wave-${wave}.csv`, + id_column: "id", + instruction: Read(`.codex/skills/team-lifecycle-v4/instructions/agent-instruction.md`) + .replace(/{session-id}/g, sessionId), + max_concurrency: maxConcurrency, + max_runtime_seconds: 900, + output_csv_path: `${sessionFolder}/wave-${wave}-results.csv`, + output_schema: { + type: "object", + properties: { + id: { type: "string" }, + status: { type: "string", enum: ["completed", "failed"] }, + findings: { type: "string" }, + quality_score: { type: "string" }, + supervision_verdict: { type: "string" }, + error: { type: "string" } + }, + required: ["id", "status", "findings"] + } + }) + + // Merge results into master CSV + const waveResults = parseCsv(Read(`${sessionFolder}/wave-${wave}-results.csv`)) + for (const result of waveResults) { + updateMasterCsvRow(sessionFolder, result.id, { + status: result.status, + findings: result.findings || '', + quality_score: result.quality_score || '', + supervision_verdict: result.supervision_verdict || '', + error: result.error || '' + }) + if (result.status === 'failed') failedIds.add(result.id) + } + + Bash(`rm -f "${sessionFolder}/wave-${wave}.csv"`) + } + + // 6. Execute post-wave interactive tasks (supervisor checkpoints) + for (const task of interactiveTasks) { + if (task.status !== 'pending') continue + const deps = task.deps.split(';').filter(Boolean) + if (deps.some(d => failedIds.has(d) || skippedIds.has(d))) { + skippedIds.add(task.id) + continue + } + + // Spawn supervisor agent for CHECKPOINT tasks + const supervisorAgent = spawn_agent({ + message: ` +## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS (Agent Execute) +1. **Read role definition**: .codex/skills/team-lifecycle-v4/agents/supervisor.md (MUST read first) +2. Read: ${sessionFolder}/discoveries.ndjson (shared discoveries) + +--- + +Goal: Execute checkpoint verification +Session: ${sessionFolder} +Task ID: ${task.id} +Description: ${task.description} +Scope: ${task.deps} + +### Context +Read upstream artifacts and verify cross-artifact consistency. +Produce verdict: pass (score >= 0.8), warn (0.5-0.79), block (< 0.5). +Write report to ${sessionFolder}/artifacts/${task.id}-report.md. +` + }) + + const checkpointResult = wait({ ids: [supervisorAgent], timeout_ms: 300000 }) + if (checkpointResult.timed_out) { + send_input({ id: supervisorAgent, message: "Please finalize your checkpoint evaluation now." }) + wait({ ids: [supervisorAgent], timeout_ms: 120000 }) + } + close_agent({ id: supervisorAgent }) + + // Parse checkpoint verdict + Write(`${sessionFolder}/interactive/${task.id}-result.json`, JSON.stringify({ + task_id: task.id, status: "completed", + supervision_verdict: parsedVerdict, + supervision_score: parsedScore, + timestamp: getUtc8ISOString() + })) + + // Handle verdict + if (parsedVerdict === 'block') { + if (!AUTO_YES) { + const answer = AskUserQuestion({ + questions: [{ + question: `Checkpoint ${task.id} BLOCKED (score: ${parsedScore}). What to do?`, + header: "Checkpoint Blocked", + options: [ + { label: "Override", description: "Proceed despite block" }, + { label: "Revise upstream", description: "Go back and fix issues" }, + { label: "Abort", description: "Stop pipeline" } + ] + }] + }) + // Handle user choice + } + } + + updateMasterCsvRow(sessionFolder, task.id, { + status: 'completed', + findings: `Checkpoint verdict: ${parsedVerdict} (score: ${parsedScore})`, + supervision_verdict: parsedVerdict + }) + } + + // 7. Handle special post-wave logic + // After QUALITY-001: pause for user approval before implementation + // After PLAN-001: read complexity for conditional routing +} +``` + +**Success Criteria**: +- All waves executed in order +- Both csv-wave and interactive tasks handled per wave +- Each wave's results merged into master CSV before next wave starts +- Dependent tasks skipped when predecessor failed +- discoveries.ndjson accumulated across all waves and mechanisms +- Supervisor checkpoints evaluated with proper verdict routing + +--- + +### Phase 3: Post-Wave Interactive + +**Objective**: Handle quality gate user approval and complexity-based implementation routing. + +After QUALITY-001 completes (spec pipelines): +1. Read quality score from QUALITY-001 findings +2. If score >= 80%: present user approval for implementation (if full-lifecycle) +3. If score 60-79%: suggest revisions, offer retry +4. If score < 60%: return to writer for rework + +After PLAN-001 completes (impl pipelines): +1. Read plan.json complexity assessment +2. Route by complexity: + - Low (1-2 modules): direct IMPL-001 + - Medium (3-4 modules): parallel IMPL-{1..N} + - High (5+ modules): detailed architecture first, then parallel IMPL + +**Success Criteria**: +- Post-wave interactive processing complete +- Interactive agents closed, results stored + +--- + +### Phase 4: Results Aggregation + +**Objective**: Generate final results and human-readable report. + +```javascript +const masterCsv = Read(`${sessionFolder}/tasks.csv`) +Write(`${sessionFolder}/results.csv`, masterCsv) + +const tasks = parseCsv(masterCsv) +const completed = tasks.filter(t => t.status === 'completed') +const failed = tasks.filter(t => t.status === 'failed') +const skipped = tasks.filter(t => t.status === 'skipped') + +const contextContent = `# Team Lifecycle v4 Report + +**Session**: ${sessionId} +**Requirement**: ${requirement} +**Pipeline**: ${pipeline_type} +**Completed**: ${getUtc8ISOString()} + +--- + +## Summary + +| Metric | Count | +|--------|-------| +| Total Tasks | ${tasks.length} | +| Completed | ${completed.length} | +| Failed | ${failed.length} | +| Skipped | ${skipped.length} | +| Supervision | ${noSupervision ? 'Disabled' : 'Enabled'} | + +--- + +## Pipeline Execution + +${waveDetails} + +--- + +## Deliverables + +${deliverablesList} + +--- + +## Quality Gates + +${qualityGateResults} + +--- + +## Checkpoint Reports + +${checkpointResults} +` + +Write(`${sessionFolder}/context.md`, contextContent) +``` + +If not AUTO_YES, offer completion action: + +```javascript +if (!AUTO_YES) { + AskUserQuestion({ + questions: [{ + question: "Pipeline complete. What would you like to do?", + header: "Completion", + multiSelect: false, + options: [ + { label: "Archive & Clean (Recommended)", description: "Archive session" }, + { label: "Keep Active", description: "Keep session for follow-up work" }, + { label: "Export Results", description: "Export deliverables to target directory" } + ] + }] + }) +} +``` + +**Success Criteria**: +- results.csv exported (all tasks, both modes) +- context.md generated +- All interactive agents closed +- Summary displayed to user + +--- + +## Shared Discovery Board Protocol + +All agents across all waves share `discoveries.ndjson`. This enables cross-role knowledge sharing. + +**Discovery Types**: + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `research` | `data.dimension` | `{dimension, findings[], constraints[], integration_points[]}` | Research findings | +| `spec_artifact` | `data.doc_type` | `{doc_type, path, sections[], key_decisions[]}` | Specification document artifact | +| `exploration` | `data.angle` | `{angle, relevant_files[], patterns[], recommendations[]}` | Codebase exploration finding | +| `plan_task` | `data.task_id` | `{task_id, title, files[], complexity, convergence_criteria[]}` | Implementation task definition | +| `implementation` | `data.task_id` | `{task_id, files_modified[], approach, changes_summary}` | Implementation result | +| `test_result` | `data.framework` | `{framework, pass_rate, failures[], fix_iterations}` | Test execution result | +| `review_finding` | `data.file` | `{file, line, severity, dimension, description, suggested_fix}` | Code review finding | +| `checkpoint` | `data.checkpoint_id` | `{checkpoint_id, verdict, score, risks[], blocks[]}` | Supervisor checkpoint result | +| `quality_gate` | `data.gate_id` | `{gate_id, score, dimensions{}, verdict}` | Quality gate assessment | + +**Format**: NDJSON, each line is self-contained JSON: + +```jsonl +{"ts":"2026-03-08T10:00:00+08:00","worker":"RESEARCH-001","type":"research","data":{"dimension":"domain","findings":["Auth system needs OAuth2 + RBAC"],"constraints":["Must support SSO"],"integration_points":["User service API"]}} +{"ts":"2026-03-08T10:15:00+08:00","worker":"DRAFT-001","type":"spec_artifact","data":{"doc_type":"product-brief","path":"spec/product-brief.md","sections":["Vision","Problem","Users","Goals"],"key_decisions":["OAuth2 over custom auth"]}} +{"ts":"2026-03-08T11:00:00+08:00","worker":"CHECKPOINT-001","type":"checkpoint","data":{"checkpoint_id":"CHECKPOINT-001","verdict":"pass","score":0.90,"risks":[],"blocks":[]}} +``` + +**Protocol Rules**: +1. Read board before own work -> leverage existing context +2. Write discoveries immediately via `echo >>` -> don't batch +3. Deduplicate -- check existing entries by type + dedup key +4. Append-only -- never modify or delete existing lines + +--- + +## Error Handling + +| Error | Resolution | +|-------|------------| +| Circular dependency | Detect in wave computation, abort with error message | +| CSV agent timeout | Mark as failed in results, continue with wave | +| CSV agent failed | Mark as failed, skip dependent tasks in later waves | +| Interactive agent timeout | Urge convergence via send_input, then close if still timed out | +| Interactive agent failed | Mark as failed, skip dependents | +| Supervisor checkpoint blocked | AskUserQuestion: Override / Revise / Abort | +| Quality gate failed (< 60%) | Return to writer for rework | +| All agents in wave failed | Log error, offer retry or abort | +| CSV parse error | Validate CSV format before execution, show line number | +| discoveries.ndjson corrupt | Ignore malformed lines, continue with valid entries | +| CLI tool fails | Agent fallback to direct implementation | +| Continue mode: no session found | List available sessions, prompt user to select | + +--- + +## Core Rules + +1. **Start Immediately**: First action is session initialization, then Phase 0/1 +2. **Wave Order is Sacred**: Never execute wave N before wave N-1 completes and results are merged +3. **CSV is Source of Truth**: Master tasks.csv holds all state (both csv-wave and interactive) +4. **CSV First**: Default to csv-wave for tasks; only use interactive when interaction pattern requires it +5. **Context Propagation**: prev_context built from master CSV, not from memory +6. **Discovery Board is Append-Only**: Never clear, modify, or recreate discoveries.ndjson -- both mechanisms share it +7. **Skip on Failure**: If a dependency failed, skip the dependent task (regardless of mechanism) +8. **Lifecycle Balance**: Every spawn_agent MUST have a matching close_agent +9. **Cleanup Temp Files**: Remove wave-{N}.csv after results are merged +10. **DO NOT STOP**: Continuous execution until all waves complete or all remaining tasks are skipped diff --git a/.codex/skills/team-lifecycle-v4/instructions/agent-instruction.md b/.codex/skills/team-lifecycle-v4/instructions/agent-instruction.md new file mode 100644 index 00000000..4b65cfd4 --- /dev/null +++ b/.codex/skills/team-lifecycle-v4/instructions/agent-instruction.md @@ -0,0 +1,725 @@ +# Team Lifecycle v4 — Agent Instruction + +This instruction is loaded by team-worker agents when spawned with roles: `analyst`, `writer`, `planner`, `executor`, `tester`, `reviewer`. + +--- + +## Role-Based Execution + +### Analyst Role + +**Responsibility**: Research domain, extract structured context, identify constraints. + +**Input**: +- `id`: Task ID (e.g., `RESEARCH-001`) +- `title`: Task title +- `description`: Detailed task description with PURPOSE/TASK/CONTEXT/EXPECTED/CONSTRAINTS +- `role`: `analyst` +- `pipeline_phase`: `research` +- `prev_context`: Previous tasks' findings (empty for wave 1) + +**Execution Protocol**: + +1. **Read shared discoveries**: + ```javascript + const discoveries = Read(`{session}/discoveries.ndjson`) + ``` + +2. **Explore domain** (use CLI analysis tools): + ```bash + ccw cli -p "PURPOSE: Research domain for {requirement} + TASK: • Identify problem statement • Define target users • Extract constraints • Map integration points + CONTEXT: @**/* | Memory: {requirement} + EXPECTED: Structured research context with problem/users/domain/constraints + CONSTRAINTS: Read-only analysis" --tool gemini --mode analysis --rule analysis-trace-code-execution + ``` + +3. **Extract structured context**: + - Problem statement: What problem are we solving? + - Target users: Who will use this? + - Domain: What domain/industry? + - Constraints: Technical, business, regulatory constraints + - Integration points: External systems, APIs, services + +4. **Write discovery context**: + ```javascript + Write(`{session}/spec/discovery-context.json`, JSON.stringify({ + problem_statement: "Users need OAuth2 authentication with SSO support", + target_users: ["Enterprise customers", "Internal teams"], + domain: "Authentication & Authorization", + constraints: ["Must support SAML", "GDPR compliance", "99.9% uptime"], + integration_points: ["User service API", "Session store", "Audit log"], + exploration_dimensions: ["Security", "Scalability", "User experience"] + }, null, 2)) + ``` + +5. **Share discoveries**: + ```bash + echo '{"ts":"2026-03-08T10:00:00+08:00","worker":"{id}","type":"research","data":{"dimension":"domain","findings":["Auth system needs OAuth2 + RBAC"],"constraints":["Must support SSO"],"integration_points":["User service API"]}}' >> {session}/discoveries.ndjson + ``` + +6. **Report result**: + ```javascript + report_agent_job_result({ + id: "{id}", + status: "completed", + findings: "Explored domain: identified OAuth2+RBAC auth pattern, 5 integration points, TypeScript/React stack. Key constraint: must support SSO.", + quality_score: "", + supervision_verdict: "", + error: "" + }) + ``` + +**Success Criteria**: +- Discovery context written with all required fields +- Problem statement clear and actionable +- Constraints identified +- Integration points mapped + +--- + +### Writer Role + +**Responsibility**: Generate specification documents (product brief, requirements, architecture, epics). + +**Input**: +- `id`: Task ID (e.g., `DRAFT-001`) +- `title`: Task title +- `description`: Detailed task description +- `role`: `writer` +- `pipeline_phase`: `product-brief`, `requirements`, `architecture`, or `epics` +- `context_from`: Upstream task IDs +- `prev_context`: Previous tasks' findings +- `inner_loop`: `true` (writer uses inner loop for revision) + +**Execution Protocol**: + +1. **Read upstream artifacts**: + ```javascript + const discoveryContext = JSON.parse(Read(`{session}/spec/discovery-context.json`)) + const productBrief = Read(`{session}/spec/product-brief.md`) // if exists + ``` + +2. **Generate document based on pipeline_phase**: + + **Product Brief** (DRAFT-001): + ```markdown + # Product Brief: OAuth2 Authentication System + + ## Vision + Enable enterprise customers to authenticate users via OAuth2 with SSO support. + + ## Problem + Current authentication system lacks OAuth2 support, blocking enterprise adoption. + + ## Target Users + - Enterprise customers requiring SSO + - Internal teams needing centralized auth + + ## Success Goals + - 99.9% uptime + - <200ms auth latency + - GDPR compliant + - Support 10k concurrent users + + ## Key Decisions + - Use OAuth2 over custom auth + - Support SAML for SSO + - Implement RBAC for authorization + ``` + + **Requirements PRD** (DRAFT-002): + ```markdown + # Requirements: OAuth2 Authentication + + ## Functional Requirements + + ### FR-001: OAuth2 Authorization Flow + **Priority**: Must Have + **Description**: Implement OAuth2 authorization code flow + **Acceptance Criteria**: + - User redirected to OAuth provider + - Authorization code exchanged for access token + - Token stored securely in session + + ### FR-002: SSO Integration + **Priority**: Must Have + **Description**: Support SAML-based SSO + **Acceptance Criteria**: + - SAML assertion validated + - User attributes mapped to internal user model + - Session created with SSO context + + ## User Stories + + ### US-001: Enterprise User Login + **As an** enterprise user + **I want to** log in via my company's SSO + **So that** I don't need separate credentials + + **Acceptance Criteria**: + - Given I'm on the login page + - When I click "Login with SSO" + - Then I'm redirected to my company's SSO provider + - And I'm logged in after successful authentication + ``` + + **Architecture Design** (DRAFT-003): + ```markdown + # Architecture: OAuth2 Authentication + + ## Component Diagram + [User] -> [Auth Gateway] -> [OAuth Provider] + | + v + [Session Store] + | + v + [User Service] + + ## Tech Stack + - **Backend**: Node.js + Express + - **OAuth Library**: Passport.js + - **Session Store**: Redis + - **Database**: PostgreSQL + + ## Architecture Decision Records + + ### ADR-001: Use Passport.js for OAuth + **Status**: Accepted + **Context**: Need OAuth2 + SAML support + **Decision**: Use Passport.js with passport-oauth2 and passport-saml strategies + **Consequences**: Mature library, good community support, but adds dependency + + ## Data Model + ```sql + CREATE TABLE users ( + id UUID PRIMARY KEY, + email VARCHAR(255) UNIQUE, + oauth_provider VARCHAR(50), + oauth_id VARCHAR(255) + ); + ``` + + ## Integration Points + - User Service API: GET /users/:id, POST /users + - Session Store: Redis SET/GET with TTL + - Audit Log: POST /audit/events + ``` + + **Epics and Stories** (DRAFT-004): + ```markdown + # Epics: OAuth2 Authentication + + ## Epic 1: OAuth2 Core Flow + **Priority**: Must Have (MVP) + **Estimate**: 13 story points + + ### Stories + 1. **STORY-001**: Implement authorization endpoint (3 pts) + 2. **STORY-002**: Implement token exchange (5 pts) + 3. **STORY-003**: Implement token refresh (3 pts) + 4. **STORY-004**: Add session management (2 pts) + + ## Epic 2: SSO Integration + **Priority**: Must Have (MVP) + **Estimate**: 8 story points + + ### Stories + 1. **STORY-005**: Integrate SAML provider (5 pts) + 2. **STORY-006**: Map SAML attributes (3 pts) + + ## Epic 3: RBAC Authorization + **Priority**: Should Have + **Estimate**: 8 story points + + ### Stories + 1. **STORY-007**: Define role model (2 pts) + 2. **STORY-008**: Implement permission checks (3 pts) + 3. **STORY-009**: Add role assignment UI (3 pts) + ``` + +3. **Write document to spec/ directory**: + ```javascript + Write(`{session}/spec/{doc-type}.md`, documentContent) + ``` + +4. **Share discoveries**: + ```bash + echo '{"ts":"2026-03-08T10:15:00+08:00","worker":"{id}","type":"spec_artifact","data":{"doc_type":"product-brief","path":"spec/product-brief.md","sections":["Vision","Problem","Users","Goals"],"key_decisions":["OAuth2 over custom auth"]}}' >> {session}/discoveries.ndjson + ``` + +5. **Report result**: + ```javascript + report_agent_job_result({ + id: "{id}", + status: "completed", + findings: "Generated product brief with vision, problem statement, target users, success goals. Key decision: OAuth2 over custom auth.", + quality_score: "", + supervision_verdict: "", + error: "" + }) + ``` + +**Success Criteria**: +- Document follows template structure +- All required sections present +- Terminology consistent with upstream docs +- Key decisions documented + +--- + +### Planner Role + +**Responsibility**: Break down requirements into implementation tasks. + +**Input**: +- `id`: Task ID (e.g., `PLAN-001`) +- `title`: Task title +- `description`: Detailed task description +- `role`: `planner` +- `pipeline_phase`: `planning` +- `context_from`: Upstream task IDs (e.g., `QUALITY-001`) +- `prev_context`: Previous tasks' findings +- `inner_loop`: `true` (planner uses inner loop for refinement) + +**Execution Protocol**: + +1. **Read spec artifacts**: + ```javascript + const requirements = Read(`{session}/spec/requirements.md`) + const architecture = Read(`{session}/spec/architecture.md`) + const epics = Read(`{session}/spec/epics.md`) + ``` + +2. **Explore codebase** (use CLI analysis tools): + ```bash + ccw cli -p "PURPOSE: Explore codebase for {requirement} + TASK: • Identify relevant files • Find existing patterns • Locate integration points + CONTEXT: @**/* | Memory: {requirement} + EXPECTED: Exploration findings with file paths and patterns + CONSTRAINTS: Read-only analysis" --tool gemini --mode analysis --rule analysis-trace-code-execution + ``` + +3. **Generate implementation plan**: + ```javascript + const plan = { + requirement: "{requirement}", + complexity: "Medium", // Low (1-2 modules), Medium (3-4), High (5+) + approach: "Strategy pattern for OAuth providers", + tasks: [ + { + task_id: "TASK-001", + title: "Create OAuth provider interface", + description: "Define provider interface with authorize/token/refresh methods", + files: ["src/auth/providers/oauth-provider.ts"], + depends_on: [], + convergence_criteria: [ + "Interface compiles without errors", + "Type definitions exported" + ] + }, + { + task_id: "TASK-002", + title: "Implement Google OAuth provider", + description: "Concrete implementation for Google OAuth2", + files: ["src/auth/providers/google-oauth.ts"], + depends_on: ["TASK-001"], + convergence_criteria: [ + "Tests pass", + "Handles token refresh", + "Error handling complete" + ] + } + ], + exploration_findings: { + existing_patterns: ["Strategy pattern in payment module"], + tech_stack: ["TypeScript", "Express", "Passport.js"], + integration_points: ["User service", "Session store"] + } + } + Write(`{session}/plan/plan.json`, JSON.stringify(plan, null, 2)) + ``` + +4. **Write per-task files**: + ```javascript + for (const task of plan.tasks) { + Write(`{session}/plan/.task/${task.task_id}.json`, JSON.stringify(task, null, 2)) + } + ``` + +5. **Share discoveries**: + ```bash + echo '{"ts":"2026-03-08T11:00:00+08:00","worker":"{id}","type":"plan_task","data":{"task_id":"TASK-001","title":"Create OAuth provider interface","files":["src/auth/providers/oauth-provider.ts"],"complexity":"Low","convergence_criteria":["Interface compiles"]}}' >> {session}/discoveries.ndjson + ``` + +6. **Report result**: + ```javascript + report_agent_job_result({ + id: "{id}", + status: "completed", + findings: "Generated implementation plan with 2 tasks. Complexity: Medium. Approach: Strategy pattern for OAuth providers. Identified existing strategy pattern in payment module.", + quality_score: "", + supervision_verdict: "", + error: "" + }) + ``` + +**Success Criteria**: +- plan.json written with valid structure +- 2-7 tasks defined +- Task dependencies form DAG (no cycles) +- Convergence criteria defined per task +- Complexity assessed + +--- + +### Executor Role + +**Responsibility**: Execute implementation plan tasks. + +**Input**: +- `id`: Task ID (e.g., `IMPL-001`) +- `title`: Task title +- `description`: Detailed task description +- `role`: `executor` +- `pipeline_phase`: `implementation` +- `context_from`: Upstream task IDs (e.g., `PLAN-001`) +- `prev_context`: Previous tasks' findings +- `inner_loop`: `true` (executor uses inner loop for self-repair) + +**Execution Protocol**: + +1. **Read implementation plan**: + ```javascript + const plan = JSON.parse(Read(`{session}/plan/plan.json`)) + ``` + +2. **For each task in plan.tasks** (ordered by depends_on): + + a. **Read context files**: + ```javascript + for (const file of task.files) { + if (fileExists(file)) Read(file) + } + ``` + + b. **Identify patterns**: + - Note imports, naming conventions, existing structure + - Follow project patterns from exploration_findings + + c. **Apply changes**: + - Use Edit for existing files (prefer) + - Use Write for new files + - Follow convergence criteria from task + + d. **Build check** (if build command exists): + ```bash + npm run build 2>&1 || echo BUILD_FAILED + ``` + - If build fails: analyze error → fix → rebuild (max 3 retries) + + e. **Verify convergence**: + - Check each criterion in task.convergence_criteria + - If not met: self-repair loop (max 3 iterations) + +3. **Share discoveries**: + ```bash + echo '{"ts":"2026-03-08T11:00:00+08:00","worker":"{id}","type":"implementation","data":{"task_id":"IMPL-001","files_modified":["src/auth/oauth.ts","src/auth/rbac.ts"],"approach":"Strategy pattern for auth providers","changes_summary":"Created OAuth2 provider, RBAC middleware, session management"}}' >> {session}/discoveries.ndjson + ``` + +4. **Report result**: + ```javascript + report_agent_job_result({ + id: "{id}", + status: "completed", + findings: "Implemented 2 tasks: OAuth provider interface + Google OAuth implementation. Modified 2 files. All convergence criteria met.", + quality_score: "", + supervision_verdict: "", + error: "" + }) + ``` + +**Success Criteria**: +- All tasks completed in dependency order +- Build passes (if build command exists) +- All convergence criteria met +- Code follows project patterns + +--- + +### Tester Role + +**Responsibility**: Run tests, fix failures, achieve 95% pass rate. + +**Input**: +- `id`: Task ID (e.g., `TEST-001`) +- `title`: Task title +- `description`: Detailed task description +- `role`: `tester` +- `pipeline_phase`: `validation` +- `context_from`: Upstream task IDs (e.g., `IMPL-001`) +- `prev_context`: Previous tasks' findings + +**Execution Protocol**: + +1. **Detect test framework**: + ```javascript + const packageJson = JSON.parse(Read('package.json')) + const testCommand = packageJson.scripts?.test || packageJson.scripts?.['test:unit'] + ``` + +2. **Run affected tests first** (if possible): + ```bash + npm test -- --changed + ``` + +3. **Run full test suite**: + ```bash + npm test 2>&1 + ``` + +4. **Parse test results**: + - Total tests + - Passed tests + - Failed tests + - Pass rate = passed / total + +5. **Self-repair loop** (if pass rate < 95%): + - Analyze test output + - Diagnose failure cause + - Fix source code + - Re-run tests + - Max 10 iterations + +6. **Share discoveries**: + ```bash + echo '{"ts":"2026-03-08T11:30:00+08:00","worker":"{id}","type":"test_result","data":{"framework":"vitest","pass_rate":98,"failures":["timeout in SSO integration test"],"fix_iterations":2}}' >> {session}/discoveries.ndjson + ``` + +7. **Report result**: + ```javascript + report_agent_job_result({ + id: "{id}", + status: "completed", + findings: "Ran 50 tests. Pass rate: 98% (49/50). Fixed 2 failures in 2 iterations. Remaining failure: timeout in SSO integration test (non-blocking).", + quality_score: "", + supervision_verdict: "", + error: "" + }) + ``` + +**Success Criteria**: +- Test suite executed +- Pass rate >= 95% +- Failures fixed (max 10 iterations) +- Test results documented + +--- + +### Reviewer Role + +**Responsibility**: Multi-dimensional code review or quality gate scoring. + +**Input**: +- `id`: Task ID (e.g., `REVIEW-001` or `QUALITY-001`) +- `title`: Task title +- `description`: Detailed task description +- `role`: `reviewer` +- `pipeline_phase`: `review` or `readiness` +- `context_from`: Upstream task IDs +- `prev_context`: Previous tasks' findings + +**Execution Protocol**: + +**For Code Review** (REVIEW-*): + +1. **Read implementation files**: + ```javascript + const plan = JSON.parse(Read(`{session}/plan/plan.json`)) + const modifiedFiles = plan.tasks.flatMap(t => t.files) + ``` + +2. **Multi-dimensional review**: + - **Quality**: Code style, naming, structure + - **Security**: Input validation, auth checks, SQL injection + - **Architecture**: Follows design, proper abstractions + - **Requirements**: Covers all FRs, acceptance criteria met + +3. **Determine verdict**: + - `BLOCK`: Critical issues, cannot merge + - `CONDITIONAL`: Minor issues, can merge with fixes + - `APPROVE`: No issues, ready to merge + +4. **Write review report**: + ```markdown + # Code Review: {id} + + ## Verdict: APPROVE + + ## Quality (8/10) + - Code style consistent + - Naming clear and semantic + - Minor: some functions could be extracted + + ## Security (9/10) + - Input validation present + - Auth checks correct + - SQL injection prevented + + ## Architecture (8/10) + - Follows strategy pattern + - Proper abstractions + - Minor: could use dependency injection + + ## Requirements Coverage (10/10) + - All FRs implemented + - Acceptance criteria met + - Edge cases handled + + ## Issues + (none) + + ## Recommendations + 1. Extract validation logic to separate module + 2. Add dependency injection for testability + ``` + +**For Quality Gate** (QUALITY-*): + +1. **Read all spec artifacts**: + ```javascript + const productBrief = Read(`{session}/spec/product-brief.md`) + const requirements = Read(`{session}/spec/requirements.md`) + const architecture = Read(`{session}/spec/architecture.md`) + const epics = Read(`{session}/spec/epics.md`) + ``` + +2. **Score 4 dimensions** (25% each): + - **Completeness**: All sections present, no gaps + - **Consistency**: Terminology aligned, decisions traced + - **Traceability**: Vision → requirements → architecture → epics + - **Depth**: Sufficient detail for implementation + +3. **Calculate overall score**: + ```javascript + const score = (completeness + consistency + traceability + depth) / 4 + ``` + +4. **Determine gate verdict**: + - `>= 80%`: PASS (proceed to implementation) + - `60-79%`: REVIEW (revisions recommended) + - `< 60%`: FAIL (return to writer for rework) + +5. **Write quality report**: + ```markdown + # Quality Gate: {id} + + ## Overall Score: 82% + + ## Dimension Scores + - Completeness: 90% (23/25) + - Consistency: 85% (21/25) + - Traceability: 80% (20/25) + - Depth: 75% (19/25) + + ## Verdict: PASS + + ## Findings + - All spec documents present and complete + - Terminology consistent across docs + - Clear trace from vision to epics + - Sufficient detail for implementation + - Minor: architecture could include more error handling details + ``` + +6. **Share discoveries**: + ```bash + echo '{"ts":"2026-03-08T12:00:00+08:00","worker":"{id}","type":"quality_gate","data":{"gate_id":"QUALITY-001","score":82,"dimensions":{"completeness":90,"consistency":85,"traceability":80,"depth":75},"verdict":"pass"}}' >> {session}/discoveries.ndjson + ``` + +7. **Report result**: + ```javascript + report_agent_job_result({ + id: "{id}", + status: "completed", + findings: "Quality gate: Completeness 90%, Consistency 85%, Traceability 80%, Depth 75%. Overall: 82.5% PASS.", + quality_score: "82", + supervision_verdict: "", + error: "" + }) + ``` + +**Success Criteria**: +- All dimensions scored +- Report written with findings +- Verdict determined +- Score >= 80% for quality gate pass + +--- + +## Inner Loop Protocol + +Roles with `inner_loop: true` support self-repair: + +| Scenario | Max Iterations | Action | +|----------|---------------|--------| +| Build failure | 3 | Analyze error → fix source → rebuild | +| Test failure | 10 | Analyze failure → fix source → re-run tests | +| Convergence not met | 3 | Check criteria → adjust implementation → re-verify | +| Document incomplete | 2 | Review template → add missing sections → re-validate | + +After max iterations: report error, mark task as failed. + +--- + +## Shared Discovery Board + +All roles read/write `{session}/discoveries.ndjson`: + +**Discovery Types**: +- `research`: Research findings +- `spec_artifact`: Specification document +- `exploration`: Codebase exploration +- `plan_task`: Implementation task definition +- `implementation`: Implementation result +- `test_result`: Test execution result +- `review_finding`: Code review finding +- `checkpoint`: Supervisor checkpoint result +- `quality_gate`: Quality gate assessment + +**Protocol**: +1. Read discoveries at start +2. Append discoveries during execution (never modify existing) +3. Deduplicate by type + dedup key + +--- + +## Error Handling + +| Error | Resolution | +|-------|------------| +| Upstream artifact not found | Report error, mark failed | +| Spec document invalid format | Report error, mark failed | +| Plan JSON corrupt | Report error, mark failed | +| Build fails after 3 retries | Mark task failed, report error | +| Tests fail after 10 retries | Mark task failed, report error | +| CLI tool timeout | Fallback to direct implementation | +| Dependency task failed | Skip dependent tasks, report error | + +--- + +## Output Format + +All roles use `report_agent_job_result` with this schema: + +```json +{ + "id": "{id}", + "status": "completed" | "failed", + "findings": "Key discoveries (max 500 chars)", + "quality_score": "0-100 (reviewer only)", + "supervision_verdict": "pass|warn|block (supervisor only)", + "error": "" +} +``` diff --git a/.codex/skills/team-lifecycle-v4/schemas/tasks-schema.md b/.codex/skills/team-lifecycle-v4/schemas/tasks-schema.md new file mode 100644 index 00000000..f4f48c50 --- /dev/null +++ b/.codex/skills/team-lifecycle-v4/schemas/tasks-schema.md @@ -0,0 +1,178 @@ +# Team Lifecycle v4 -- CSV Schema + +## Master CSV: tasks.csv + +### Column Definitions + +#### Input Columns (Set by Decomposer) + +| Column | Type | Required | Description | Example | +|--------|------|----------|-------------|---------| +| `id` | string | Yes | Unique task identifier | `"RESEARCH-001"` | +| `title` | string | Yes | Short task title | `"Domain research"` | +| `description` | string | Yes | Detailed task description (self-contained) | `"Explore domain, extract structured context..."` | +| `role` | string | Yes | Worker role: analyst, writer, planner, executor, tester, reviewer, supervisor | `"analyst"` | +| `pipeline_phase` | string | Yes | Lifecycle phase: research, product-brief, requirements, architecture, epics, checkpoint, readiness, planning, implementation, validation, review | `"research"` | +| `deps` | string | No | Semicolon-separated dependency task IDs | `"RESEARCH-001"` | +| `context_from` | string | No | Semicolon-separated task IDs for context | `"RESEARCH-001"` | +| `exec_mode` | enum | Yes | Execution mechanism: `csv-wave` or `interactive` | `"csv-wave"` | + +#### Computed Columns (Set by Wave Engine) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `wave` | integer | Wave number (1-based, from topological sort) | `2` | +| `prev_context` | string | Aggregated findings from context_from tasks (per-wave CSV only) | `"[Task RESEARCH-001] Explored domain..."` | + +#### Output Columns (Set by Agent) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `status` | enum | `pending` -> `completed` / `failed` / `skipped` | `"completed"` | +| `findings` | string | Key discoveries (max 500 chars) | `"Identified 5 integration points..."` | +| `quality_score` | string | Quality gate score (0-100) for reviewer tasks | `"85"` | +| `supervision_verdict` | string | Checkpoint verdict: `pass` / `warn` / `block` | `"pass"` | +| `error` | string | Error message if failed | `""` | + +--- + +### exec_mode Values + +| Value | Mechanism | Description | +|-------|-----------|-------------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot batch execution within wave | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round individual execution | + +Interactive tasks appear in master CSV for dependency tracking but are NOT included in wave-{N}.csv files. + +--- + +### Example Data + +```csv +id,title,description,role,pipeline_phase,deps,context_from,exec_mode,wave,status,findings,quality_score,supervision_verdict,error +"RESEARCH-001","Domain research","Explore domain and competitors. Extract structured context: problem statement, target users, domain, constraints, exploration dimensions. Use CLI analysis tools.","analyst","research","","","csv-wave","1","pending","","","","" +"DRAFT-001","Product brief","Generate product brief from research context. Include vision statement, problem definition, target users, success goals. Use templates/product-brief.md template.","writer","product-brief","RESEARCH-001","RESEARCH-001","csv-wave","2","pending","","","","" +"DRAFT-002","Requirements PRD","Generate requirements PRD with functional requirements (FR-NNN), acceptance criteria, MoSCoW prioritization, user stories.","writer","requirements","DRAFT-001","DRAFT-001","csv-wave","3","pending","","","","" +"CHECKPOINT-001","Brief-PRD consistency","Verify: vision->requirements trace, terminology alignment, scope consistency, decision continuity, artifact existence.","supervisor","checkpoint","DRAFT-002","DRAFT-001;DRAFT-002","interactive","4","pending","","","","" +"DRAFT-003","Architecture design","Generate architecture with component diagram, tech stack justification, ADRs, data model, integration points.","writer","architecture","CHECKPOINT-001","DRAFT-002;CHECKPOINT-001","csv-wave","5","pending","","","","" +"DRAFT-004","Epics and stories","Generate 2-8 epics with 3-12 stories each. Include MVP subset, story format with ACs and estimates.","writer","epics","DRAFT-003","DRAFT-003","csv-wave","6","pending","","","","" +"CHECKPOINT-002","Full spec consistency","Verify: 4-doc terminology, decision chain, architecture-epics alignment, quality trend, open questions.","supervisor","checkpoint","DRAFT-004","DRAFT-001;DRAFT-002;DRAFT-003;DRAFT-004","interactive","7","pending","","","","" +"QUALITY-001","Readiness gate","Score spec quality across Completeness, Consistency, Traceability, Depth (25% each). Gate: >=80% pass, 60-79% review, <60% fail.","reviewer","readiness","CHECKPOINT-002","DRAFT-001;DRAFT-002;DRAFT-003;DRAFT-004","csv-wave","8","pending","","","","" +"PLAN-001","Implementation planning","Explore codebase, generate plan.json + TASK-*.json (2-7 tasks), assess complexity (Low/Medium/High).","planner","planning","QUALITY-001","QUALITY-001","csv-wave","9","pending","","","","" +"CHECKPOINT-003","Plan-input alignment","Verify: plan covers requirements, complexity sanity, dependency chain, execution method, upstream context.","supervisor","checkpoint","PLAN-001","PLAN-001","interactive","10","pending","","","","" +"IMPL-001","Code implementation","Execute implementation plan tasks. Follow existing code patterns. Run convergence checks.","executor","implementation","CHECKPOINT-003","PLAN-001","csv-wave","11","pending","","","","" +"TEST-001","Test execution","Detect test framework. Run affected tests first, then full suite. Fix failures (max 10 iterations, 95% target).","tester","validation","IMPL-001","IMPL-001","csv-wave","12","pending","","","","" +"REVIEW-001","Code review","Multi-dimensional code review: quality, security, architecture, requirements coverage. Verdict: BLOCK/CONDITIONAL/APPROVE.","reviewer","review","IMPL-001","IMPL-001","csv-wave","12","pending","","","","" +``` + +--- + +### Column Lifecycle + +``` +Decomposer (Phase 1) Wave Engine (Phase 2) Agent (Execution) +--------------------- -------------------- ----------------- +id ----------> id ----------> id +title ----------> title ----------> (reads) +description ----------> description ----------> (reads) +role ----------> role ----------> (reads) +pipeline_phase --------> pipeline_phase --------> (reads) +deps ----------> deps ----------> (reads) +context_from----------> context_from----------> (reads) +exec_mode ----------> exec_mode ----------> (reads) + wave ----------> (reads) + prev_context ----------> (reads) + status + findings + quality_score + supervision_verdict + error +``` + +--- + +## Output Schema (JSON) + +Agent output via `report_agent_job_result` (csv-wave tasks): + +```json +{ + "id": "RESEARCH-001", + "status": "completed", + "findings": "Explored domain: identified OAuth2+RBAC auth pattern, 5 integration points, TypeScript/React stack. Key constraint: must support SSO.", + "quality_score": "", + "supervision_verdict": "", + "error": "" +} +``` + +Quality gate output: + +```json +{ + "id": "QUALITY-001", + "status": "completed", + "findings": "Quality gate: Completeness 90%, Consistency 85%, Traceability 80%, Depth 75%. Overall: 82.5% PASS.", + "quality_score": "82", + "supervision_verdict": "", + "error": "" +} +``` + +Interactive tasks (CHECKPOINT-*) output via JSON written to `interactive/{id}-result.json`. + +--- + +## Discovery Types + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `research` | `data.dimension` | `{dimension, findings[], constraints[], integration_points[]}` | Research context | +| `spec_artifact` | `data.doc_type` | `{doc_type, path, sections[], key_decisions[]}` | Specification document | +| `exploration` | `data.angle` | `{angle, relevant_files[], patterns[], recommendations[]}` | Codebase exploration | +| `plan_task` | `data.task_id` | `{task_id, title, files[], complexity, convergence_criteria[]}` | Plan task definition | +| `implementation` | `data.task_id` | `{task_id, files_modified[], approach, changes_summary}` | Implementation result | +| `test_result` | `data.framework` | `{framework, pass_rate, failures[], fix_iterations}` | Test result | +| `review_finding` | `data.file` | `{file, line, severity, dimension, description, suggested_fix}` | Review finding | +| `checkpoint` | `data.checkpoint_id` | `{checkpoint_id, verdict, score, risks[], blocks[]}` | Checkpoint result | +| `quality_gate` | `data.gate_id` | `{gate_id, score, dimensions{}, verdict}` | Quality assessment | + +### Discovery NDJSON Format + +```jsonl +{"ts":"2026-03-08T10:00:00+08:00","worker":"RESEARCH-001","type":"research","data":{"dimension":"domain","findings":["Auth system needs OAuth2 + RBAC"],"constraints":["Must support SSO"],"integration_points":["User service API"]}} +{"ts":"2026-03-08T10:15:00+08:00","worker":"DRAFT-001","type":"spec_artifact","data":{"doc_type":"product-brief","path":"spec/product-brief.md","sections":["Vision","Problem","Users","Goals"],"key_decisions":["OAuth2 over custom auth"]}} +{"ts":"2026-03-08T11:00:00+08:00","worker":"IMPL-001","type":"implementation","data":{"task_id":"IMPL-001","files_modified":["src/auth/oauth.ts","src/auth/rbac.ts"],"approach":"Strategy pattern for auth providers","changes_summary":"Created OAuth2 provider, RBAC middleware, session management"}} +{"ts":"2026-03-08T11:30:00+08:00","worker":"TEST-001","type":"test_result","data":{"framework":"vitest","pass_rate":98,"failures":["timeout in SSO integration test"],"fix_iterations":2}} +``` + +> Both csv-wave and interactive agents read/write the same discoveries.ndjson file. + +--- + +## Cross-Mechanism Context Flow + +| Source | Target | Mechanism | +|--------|--------|-----------| +| CSV task findings | Interactive task | Injected via spawn message or send_input | +| Interactive task result | CSV task prev_context | Read from interactive/{id}-result.json | +| Any agent discovery | Any agent | Shared via discoveries.ndjson | + +--- + +## Validation Rules + +| Rule | Check | Error | +|------|-------|-------| +| Unique IDs | No duplicate `id` values | "Duplicate task ID: {id}" | +| Valid deps | All dep IDs exist in tasks | "Unknown dependency: {dep_id}" | +| No self-deps | Task cannot depend on itself | "Self-dependency: {id}" | +| No circular deps | Topological sort completes | "Circular dependency detected involving: {ids}" | +| context_from valid | All context IDs exist and in earlier waves | "Invalid context_from: {id}" | +| exec_mode valid | Value is `csv-wave` or `interactive` | "Invalid exec_mode: {value}" | +| Description non-empty | Every task has description | "Empty description for task: {id}" | +| Status enum | status in {pending, completed, failed, skipped} | "Invalid status: {status}" | +| Valid role | role in {analyst, writer, planner, executor, tester, reviewer, supervisor} | "Invalid role: {role}" | +| Valid pipeline_phase | pipeline_phase in {research, product-brief, requirements, architecture, epics, checkpoint, readiness, planning, implementation, validation, review} | "Invalid pipeline_phase: {value}" | +| Cross-mechanism deps | Interactive->CSV deps resolve correctly | "Cross-mechanism dependency unresolvable: {id}" | diff --git a/.codex/skills/team-perf-opt/SKILL.md b/.codex/skills/team-perf-opt/SKILL.md new file mode 100644 index 00000000..72908fb5 --- /dev/null +++ b/.codex/skills/team-perf-opt/SKILL.md @@ -0,0 +1,659 @@ +--- +name: team-perf-opt +description: Performance optimization team skill. Profiles application performance, identifies bottlenecks, designs optimization strategies, implements changes, benchmarks improvements, and reviews code quality via CSV wave pipeline with interactive review-fix cycles. +argument-hint: "[-y|--yes] [-c|--concurrency N] [--continue] \"performance optimization task description\"" +allowed-tools: spawn_agents_on_csv, spawn_agent, wait, send_input, close_agent, Read, Write, Edit, Bash, Glob, Grep, AskUserQuestion +--- + +## Auto Mode + +When `--yes` or `-y`: Auto-confirm task decomposition, skip interactive validation, use defaults. + +# Team Performance Optimization + +## Usage + +```bash +$team-perf-opt "Optimize API response times for the user dashboard endpoints" +$team-perf-opt -c 4 "Profile and reduce memory usage in the data processing pipeline" +$team-perf-opt -y "Optimize bundle size and rendering performance for the frontend" +$team-perf-opt --continue "perf-optimize-api-20260308" +``` + +**Flags**: +- `-y, --yes`: Skip all confirmations (auto mode) +- `-c, --concurrency N`: Max concurrent agents within each wave (default: 3) +- `--continue`: Resume existing session + +**Output Directory**: `.workflow/.csv-wave/{session-id}/` +**Core Output**: `tasks.csv` (master state) + `results.csv` (final) + `discoveries.ndjson` (shared exploration) + `context.md` (human-readable report) + +--- + +## Overview + +Orchestrate multi-agent performance optimization: profile application, identify bottlenecks, design optimization strategies, implement changes, benchmark improvements, review code quality. The pipeline has five domain roles (profiler, strategist, optimizer, benchmarker, reviewer) mapped to CSV wave stages with an interactive review-fix cycle. + +**Execution Model**: Hybrid -- CSV wave pipeline (primary) + individual agent spawn (secondary) + +``` ++-------------------------------------------------------------------+ +| TEAM PERFORMANCE OPTIMIZATION WORKFLOW | ++-------------------------------------------------------------------+ +| | +| Phase 0: Pre-Wave Interactive (Requirement Clarification) | +| +- Parse user task description | +| +- Detect scope: specific endpoint vs full app profiling | +| +- Clarify ambiguous requirements (AskUserQuestion) | +| +- Output: refined requirements for decomposition | +| | +| Phase 1: Requirement -> CSV + Classification | +| +- Identify performance targets and metrics | +| +- Build 5-stage pipeline (profile->strategize->optimize-> | +| | benchmark+review) | +| +- Classify tasks: csv-wave | interactive (exec_mode) | +| +- Compute dependency waves (topological sort) | +| +- Generate tasks.csv with wave + exec_mode columns | +| +- User validates task breakdown (skip if -y) | +| | +| Phase 2: Wave Execution Engine (Extended) | +| +- For each wave (1..N): | +| | +- Execute pre-wave interactive tasks (if any) | +| | +- Build wave CSV (filter csv-wave tasks for this wave) | +| | +- Inject previous findings into prev_context column | +| | +- spawn_agents_on_csv(wave CSV) | +| | +- Execute post-wave interactive tasks (if any) | +| | +- Merge all results into master tasks.csv | +| | +- Check: any failed? -> skip dependents | +| +- discoveries.ndjson shared across all modes (append-only) | +| +- Review-fix cycle: max 3 iterations per branch | +| | +| Phase 3: Post-Wave Interactive (Completion Action) | +| +- Pipeline completion report with benchmark comparisons | +| +- Interactive completion choice (Archive/Keep/Export) | +| +- Final aggregation / report | +| | +| Phase 4: Results Aggregation | +| +- Export final results.csv | +| +- Generate context.md with all findings | +| +- Display summary: completed/failed/skipped per wave | +| +- Offer: view results | retry failed | done | +| | ++-------------------------------------------------------------------+ +``` + +--- + +## Pipeline Definition + +``` +Stage 1 Stage 2 Stage 3 Stage 4 +PROFILE-001 --> STRATEGY-001 --> IMPL-001 --> BENCH-001 +[profiler] [strategist] [optimizer] [benchmarker] + ^ | + +<-- FIX-001 ---+ + | REVIEW-001 + +<--------> [reviewer] + (max 3 iterations) +``` + +--- + +## Task Classification Rules + +Each task is classified by `exec_mode`: + +| exec_mode | Mechanism | Criteria | +|-----------|-----------|----------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot, structured I/O, no multi-round interaction | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round, revision cycles, user checkpoints | + +**Classification Decision**: + +| Task Property | Classification | +|---------------|---------------| +| Performance profiling (single-pass) | `csv-wave` | +| Optimization strategy design (single-pass) | `csv-wave` | +| Code optimization implementation | `csv-wave` | +| Benchmark execution (single-pass) | `csv-wave` | +| Code review (single-pass) | `csv-wave` | +| Review-fix cycle (iterative revision) | `interactive` | +| User checkpoint (plan approval) | `interactive` | +| Discussion round (DISCUSS-OPT, DISCUSS-REVIEW) | `interactive` | + +--- + +## CSV Schema + +### tasks.csv (Master State) + +```csv +id,title,description,role,bottleneck_type,priority,target_files,deps,context_from,exec_mode,wave,status,findings,verdict,artifacts_produced,error +"PROFILE-001","Profile performance","Profile application performance to identify CPU, memory, I/O, network, and rendering bottlenecks. Produce baseline metrics and ranked report.","profiler","","","","","","csv-wave","1","pending","","","","" +"STRATEGY-001","Design optimization plan","Analyze bottleneck report to design prioritized optimization plan with strategies and expected improvements.","strategist","","","","PROFILE-001","PROFILE-001","csv-wave","2","pending","","","","" +"IMPL-001","Implement optimizations","Implement performance optimization changes following strategy plan in priority order.","optimizer","","","","STRATEGY-001","STRATEGY-001","csv-wave","3","pending","","","","" +"BENCH-001","Benchmark improvements","Run benchmarks comparing before/after optimization metrics. Validate improvements meet plan criteria.","benchmarker","","","","IMPL-001","IMPL-001","csv-wave","4","pending","","PASS","","" +"REVIEW-001","Review optimization code","Review optimization changes for correctness, side effects, regression risks, and best practices.","reviewer","","","","IMPL-001","IMPL-001","csv-wave","4","pending","","APPROVE","","" +``` + +**Columns**: + +| Column | Phase | Description | +|--------|-------|-------------| +| `id` | Input | Unique task identifier (PREFIX-NNN format) | +| `title` | Input | Short task title | +| `description` | Input | Detailed task description (self-contained) | +| `role` | Input | Worker role: profiler, strategist, optimizer, benchmarker, reviewer | +| `bottleneck_type` | Input | Performance bottleneck category: CPU, MEMORY, IO, NETWORK, RENDERING, DATABASE | +| `priority` | Input | P0 (Critical), P1 (High), P2 (Medium), P3 (Low) | +| `target_files` | Input | Semicolon-separated file paths to focus on | +| `deps` | Input | Semicolon-separated dependency task IDs | +| `context_from` | Input | Semicolon-separated task IDs whose findings this task needs | +| `exec_mode` | Input | `csv-wave` or `interactive` | +| `wave` | Computed | Wave number (computed by topological sort, 1-based) | +| `status` | Output | `pending` -> `completed` / `failed` / `skipped` | +| `findings` | Output | Key discoveries or implementation notes (max 500 chars) | +| `verdict` | Output | Benchmark/review verdict: PASS, WARN, FAIL, APPROVE, REVISE, REJECT | +| `artifacts_produced` | Output | Semicolon-separated paths of produced artifacts | +| `error` | Output | Error message if failed (empty if success) | + +### Per-Wave CSV (Temporary) + +Each wave generates a temporary `wave-{N}.csv` with extra `prev_context` column (csv-wave tasks only). + +--- + +## Agent Registry (Interactive Agents) + +| Agent | Role File | Pattern | Responsibility | Position | +|-------|-----------|---------|----------------|----------| +| Plan Reviewer | agents/plan-reviewer.md | 2.3 (send_input cycle) | Review bottleneck report or optimization plan at user checkpoint | pre-wave | +| Fix Cycle Handler | agents/fix-cycle-handler.md | 2.3 (send_input cycle) | Manage review-fix iteration cycle (max 3 rounds) | post-wave | +| Completion Handler | agents/completion-handler.md | 2.3 (send_input cycle) | Handle pipeline completion action (Archive/Keep/Export) | standalone | + +> **COMPACT PROTECTION**: Agent files are execution documents. When context compression occurs, **you MUST immediately `Read` the corresponding agent.md** to reload. + +--- + +## Output Artifacts + +| File | Purpose | Lifecycle | +|------|---------|-----------| +| `tasks.csv` | Master state -- all tasks with status/findings | Updated after each wave | +| `wave-{N}.csv` | Per-wave input (temporary, csv-wave tasks only) | Created before wave, deleted after | +| `results.csv` | Final export of all task results | Created in Phase 4 | +| `discoveries.ndjson` | Shared exploration board (all agents, both modes) | Append-only, carries across waves | +| `context.md` | Human-readable execution report | Created in Phase 4 | +| `task-analysis.json` | Phase 1 output: scope, bottleneck targets, pipeline config | Created in Phase 1 | +| `artifacts/baseline-metrics.json` | Profiler: before-optimization metrics | Created by profiler | +| `artifacts/bottleneck-report.md` | Profiler: ranked bottleneck findings | Created by profiler | +| `artifacts/optimization-plan.md` | Strategist: prioritized optimization plan | Created by strategist | +| `artifacts/benchmark-results.json` | Benchmarker: after-optimization metrics | Created by benchmarker | +| `artifacts/review-report.md` | Reviewer: code review findings | Created by reviewer | +| `interactive/{id}-result.json` | Results from interactive tasks | Created per interactive task | + +--- + +## Session Structure + +``` +.workflow/.csv-wave/{session-id}/ ++-- tasks.csv # Master state (all tasks, both modes) ++-- results.csv # Final results export ++-- discoveries.ndjson # Shared discovery board (all agents) ++-- context.md # Human-readable report ++-- task-analysis.json # Phase 1 analysis output ++-- wave-{N}.csv # Temporary per-wave input (csv-wave only) ++-- artifacts/ +| +-- baseline-metrics.json # Profiler output +| +-- bottleneck-report.md # Profiler output +| +-- optimization-plan.md # Strategist output +| +-- benchmark-results.json # Benchmarker output +| +-- review-report.md # Reviewer output ++-- interactive/ # Interactive task artifacts +| +-- {id}-result.json ++-- wisdom/ + +-- patterns.md # Discovered patterns and conventions +``` + +--- + +## Implementation + +### Session Initialization + +```javascript +const getUtc8ISOString = () => new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString() + +const AUTO_YES = $ARGUMENTS.includes('--yes') || $ARGUMENTS.includes('-y') +const continueMode = $ARGUMENTS.includes('--continue') +const concurrencyMatch = $ARGUMENTS.match(/(?:--concurrency|-c)\s+(\d+)/) +const maxConcurrency = concurrencyMatch ? parseInt(concurrencyMatch[1]) : 3 + +const requirement = $ARGUMENTS + .replace(/--yes|-y|--continue|--concurrency\s+\d+|-c\s+\d+/g, '') + .trim() + +const slug = requirement.toLowerCase() + .replace(/[^a-z0-9\u4e00-\u9fa5]+/g, '-') + .substring(0, 40) +const dateStr = getUtc8ISOString().substring(0, 10).replace(/-/g, '') +const sessionId = `perf-${slug}-${dateStr}` +const sessionFolder = `.workflow/.csv-wave/${sessionId}` + +Bash(`mkdir -p ${sessionFolder}/artifacts ${sessionFolder}/interactive ${sessionFolder}/wisdom`) + +// Initialize discoveries.ndjson +Write(`${sessionFolder}/discoveries.ndjson`, '') + +// Initialize wisdom +Write(`${sessionFolder}/wisdom/patterns.md`, '# Patterns & Conventions\n') +``` + +--- + +### Phase 0: Pre-Wave Interactive (Requirement Clarification) + +**Objective**: Parse user task, detect performance scope, clarify ambiguities, prepare for decomposition. + +**Workflow**: + +1. **Parse user task description** from $ARGUMENTS + +2. **Check for existing sessions** (continue mode): + - Scan `.workflow/.csv-wave/perf-*/tasks.csv` for sessions with pending tasks + - If `--continue`: resume the specified or most recent session, skip to Phase 2 + - If active session found: ask user whether to resume or start new + +3. **Identify performance optimization target**: + +| Signal | Target | +|--------|--------| +| Specific endpoint/file mentioned | Scoped optimization | +| "slow", "performance", "speed", generic | Full application profiling | +| Specific metric (response time, memory, bundle size) | Targeted metric optimization | +| "frontend", "backend", "CLI" | Platform-specific profiling | + +4. **Clarify if ambiguous** (skip if AUTO_YES): + ```javascript + AskUserQuestion({ + questions: [{ + question: "Please confirm the performance optimization scope:", + header: "Performance Scope", + multiSelect: false, + options: [ + { label: "Proceed as described", description: "Scope is clear" }, + { label: "Narrow scope", description: "Specify endpoints/modules to focus on" }, + { label: "Add constraints", description: "Target metrics, acceptable trade-offs" } + ] + }] + }) + ``` + +5. **Output**: Refined requirement string for Phase 1 + +**Success Criteria**: +- Refined requirements available for Phase 1 decomposition +- Existing session detected and handled if applicable + +--- + +### Phase 1: Requirement -> CSV + Classification + +**Objective**: Decompose performance optimization task into the 5-stage pipeline tasks, assign waves, generate tasks.csv. + +**Decomposition Rules**: + +1. **Stage mapping** -- performance optimization always follows this pipeline: + +| Stage | Role | Task Prefix | Wave | Description | +|-------|------|-------------|------|-------------| +| 1 | profiler | PROFILE | 1 | Profile app, identify bottlenecks, produce baseline metrics | +| 2 | strategist | STRATEGY | 2 | Design optimization plan from bottleneck report | +| 3 | optimizer | IMPL | 3 | Implement optimizations per plan priority | +| 4a | benchmarker | BENCH | 4 | Benchmark before/after, validate improvements | +| 4b | reviewer | REVIEW | 4 | Review optimization code for correctness | + +2. **Single-pipeline decomposition**: Generate one task per stage with sequential dependencies: + - PROFILE-001 (wave 1, no deps) + - STRATEGY-001 (wave 2, deps: PROFILE-001) + - IMPL-001 (wave 3, deps: STRATEGY-001) + - BENCH-001 (wave 4, deps: IMPL-001) + - REVIEW-001 (wave 4, deps: IMPL-001) + +3. **Description enrichment**: Each task description must be self-contained with: + - Clear goal statement + - Input artifacts to read + - Output artifacts to produce + - Success criteria + - Session folder path + +**Classification Rules**: + +| Task Property | exec_mode | +|---------------|-----------| +| PROFILE, STRATEGY, IMPL, BENCH, REVIEW (initial pass) | `csv-wave` | +| FIX tasks (review-fix cycle) | `interactive` (handled by fix-cycle-handler agent) | + +**Wave Computation**: Kahn's BFS topological sort with depth tracking (csv-wave tasks only). + +**User Validation**: Display task breakdown with wave + exec_mode assignment (skip if AUTO_YES). + +**Success Criteria**: +- tasks.csv created with valid schema, wave, and exec_mode assignments +- task-analysis.json written with scope and pipeline config +- No circular dependencies +- User approved (or AUTO_YES) + +--- + +### Phase 2: Wave Execution Engine (Extended) + +**Objective**: Execute tasks wave-by-wave with hybrid mechanism support and cross-wave context propagation. + +```javascript +const masterCsv = Read(`${sessionFolder}/tasks.csv`) +let tasks = parseCsv(masterCsv) +const maxWave = Math.max(...tasks.map(t => t.wave)) + +for (let wave = 1; wave <= maxWave; wave++) { + console.log(`\nWave ${wave}/${maxWave}`) + + // 1. Separate tasks by exec_mode + const waveTasks = tasks.filter(t => t.wave === wave && t.status === 'pending') + const csvTasks = waveTasks.filter(t => t.exec_mode === 'csv-wave') + const interactiveTasks = waveTasks.filter(t => t.exec_mode === 'interactive') + + // 2. Check dependencies -- skip tasks whose deps failed + for (const task of waveTasks) { + const depIds = (task.deps || '').split(';').filter(Boolean) + const depStatuses = depIds.map(id => tasks.find(t => t.id === id)?.status) + if (depStatuses.some(s => s === 'failed' || s === 'skipped')) { + task.status = 'skipped' + task.error = `Dependency failed: ${depIds.filter((id, i) => + ['failed','skipped'].includes(depStatuses[i])).join(', ')}` + } + } + + // 3. Execute pre-wave interactive tasks (if any) + for (const task of interactiveTasks.filter(t => t.status === 'pending')) { + const agentFile = task.id.startsWith('FIX') ? 'agents/fix-cycle-handler.md' : 'agents/plan-reviewer.md' + Read(agentFile) + + const agent = spawn_agent({ + message: `## TASK ASSIGNMENT\n\n### MANDATORY FIRST STEPS\n1. Read: ${agentFile}\n2. Read: ${sessionFolder}/discoveries.ndjson\n3. Read: .workflow/project-tech.json (if exists)\n\n---\n\nGoal: ${task.description}\nScope: ${task.title}\nSession: ${sessionFolder}\n\n### Previous Context\n${buildPrevContext(task, tasks)}` + }) + const result = wait({ ids: [agent], timeout_ms: 600000 }) + if (result.timed_out) { + send_input({ id: agent, message: "Please finalize and output current findings." }) + wait({ ids: [agent], timeout_ms: 120000 }) + } + Write(`${sessionFolder}/interactive/${task.id}-result.json`, JSON.stringify({ + task_id: task.id, status: "completed", findings: parseFindings(result), + timestamp: getUtc8ISOString() + })) + close_agent({ id: agent }) + task.status = 'completed' + task.findings = parseFindings(result) + } + + // 4. Build prev_context for csv-wave tasks + const pendingCsvTasks = csvTasks.filter(t => t.status === 'pending') + for (const task of pendingCsvTasks) { + task.prev_context = buildPrevContext(task, tasks) + } + + if (pendingCsvTasks.length > 0) { + // 5. Write wave CSV + Write(`${sessionFolder}/wave-${wave}.csv`, toCsv(pendingCsvTasks)) + + // 6. Determine instruction -- read from instructions/agent-instruction.md + Read('instructions/agent-instruction.md') + + // 7. Execute wave via spawn_agents_on_csv + spawn_agents_on_csv({ + csv_path: `${sessionFolder}/wave-${wave}.csv`, + id_column: "id", + instruction: perfOptInstruction, // from instructions/agent-instruction.md + max_concurrency: maxConcurrency, + max_runtime_seconds: 900, + output_csv_path: `${sessionFolder}/wave-${wave}-results.csv`, + output_schema: { + type: "object", + properties: { + id: { type: "string" }, + status: { type: "string", enum: ["completed", "failed"] }, + findings: { type: "string" }, + verdict: { type: "string" }, + artifacts_produced: { type: "string" }, + error: { type: "string" } + } + } + }) + + // 8. Merge results into master CSV + const results = parseCsv(Read(`${sessionFolder}/wave-${wave}-results.csv`)) + for (const r of results) { + const t = tasks.find(t => t.id === r.id) + if (t) Object.assign(t, r) + } + } + + // 9. Update master CSV + Write(`${sessionFolder}/tasks.csv`, toCsv(tasks)) + + // 10. Cleanup temp files + Bash(`rm -f ${sessionFolder}/wave-${wave}.csv ${sessionFolder}/wave-${wave}-results.csv`) + + // 11. Post-wave: check for review-fix cycle + const benchTask = tasks.find(t => t.id.startsWith('BENCH') && t.wave === wave) + const reviewTask = tasks.find(t => t.id.startsWith('REVIEW') && t.wave === wave) + + if ((benchTask?.verdict === 'FAIL' || reviewTask?.verdict === 'REVISE' || reviewTask?.verdict === 'REJECT')) { + const fixCycleCount = tasks.filter(t => t.id.startsWith('FIX')).length + if (fixCycleCount < 3) { + const fixId = `FIX-${String(fixCycleCount + 1).padStart(3, '0')}` + const feedback = [benchTask?.error, reviewTask?.findings].filter(Boolean).join('\n') + tasks.push({ + id: fixId, title: `Fix issues from review/benchmark cycle ${fixCycleCount + 1}`, + description: `Fix issues found:\n${feedback}`, + role: 'optimizer', bottleneck_type: '', priority: 'P0', target_files: '', + deps: '', context_from: '', exec_mode: 'interactive', + wave: wave + 1, status: 'pending', findings: '', verdict: '', + artifacts_produced: '', error: '' + }) + } + } + + // 12. Display wave summary + const completed = waveTasks.filter(t => t.status === 'completed').length + const failed = waveTasks.filter(t => t.status === 'failed').length + const skipped = waveTasks.filter(t => t.status === 'skipped').length + console.log(`Wave ${wave} Complete: ${completed} completed, ${failed} failed, ${skipped} skipped`) +} +``` + +**Success Criteria**: +- All waves executed in order +- Both csv-wave and interactive tasks handled per wave +- Each wave's results merged into master CSV before next wave starts +- Dependent tasks skipped when predecessor failed +- Review-fix cycle handled with max 3 iterations +- discoveries.ndjson accumulated across all waves and mechanisms + +--- + +### Phase 3: Post-Wave Interactive (Completion Action) + +**Objective**: Pipeline completion report with performance improvement metrics and interactive completion choice. + +```javascript +// 1. Generate pipeline summary +const tasks = parseCsv(Read(`${sessionFolder}/tasks.csv`)) +const completed = tasks.filter(t => t.status === 'completed') +const failed = tasks.filter(t => t.status === 'failed') + +// 2. Load improvement metrics from benchmark results +let improvements = '' +try { + const benchmark = JSON.parse(Read(`${sessionFolder}/artifacts/benchmark-results.json`)) + improvements = `Performance Improvements:\n${benchmark.metrics.map(m => + ` ${m.name}: ${m.baseline} -> ${m.current} (${m.improvement})`).join('\n')}` +} catch {} + +console.log(` +============================================ +PERFORMANCE OPTIMIZATION COMPLETE + +Deliverables: + - Baseline Metrics: artifacts/baseline-metrics.json + - Bottleneck Report: artifacts/bottleneck-report.md + - Optimization Plan: artifacts/optimization-plan.md + - Benchmark Results: artifacts/benchmark-results.json + - Review Report: artifacts/review-report.md + +${improvements} + +Pipeline: ${completed.length}/${tasks.length} tasks +Session: ${sessionFolder} +============================================ +`) + +// 3. Completion action +if (!AUTO_YES) { + AskUserQuestion({ + questions: [{ + question: "Performance optimization complete. What would you like to do?", + header: "Completion", + multiSelect: false, + options: [ + { label: "Archive & Clean (Recommended)", description: "Archive session, output final summary" }, + { label: "Keep Active", description: "Keep session for follow-up work" }, + { label: "Retry Failed", description: "Re-run failed tasks" } + ] + }] + }) +} +``` + +**Success Criteria**: +- Post-wave interactive processing complete +- User informed of results and improvement metrics + +--- + +### Phase 4: Results Aggregation + +**Objective**: Generate final results and human-readable report. + +```javascript +// 1. Export results.csv +Bash(`cp ${sessionFolder}/tasks.csv ${sessionFolder}/results.csv`) + +// 2. Generate context.md +const tasks = parseCsv(Read(`${sessionFolder}/tasks.csv`)) +let contextMd = `# Performance Optimization Report\n\n` +contextMd += `**Session**: ${sessionId}\n` +contextMd += `**Date**: ${getUtc8ISOString().substring(0, 10)}\n\n` + +contextMd += `## Summary\n` +contextMd += `| Status | Count |\n|--------|-------|\n` +contextMd += `| Completed | ${tasks.filter(t => t.status === 'completed').length} |\n` +contextMd += `| Failed | ${tasks.filter(t => t.status === 'failed').length} |\n` +contextMd += `| Skipped | ${tasks.filter(t => t.status === 'skipped').length} |\n\n` + +contextMd += `## Deliverables\n\n` +contextMd += `| Artifact | Path |\n|----------|------|\n` +contextMd += `| Baseline Metrics | artifacts/baseline-metrics.json |\n` +contextMd += `| Bottleneck Report | artifacts/bottleneck-report.md |\n` +contextMd += `| Optimization Plan | artifacts/optimization-plan.md |\n` +contextMd += `| Benchmark Results | artifacts/benchmark-results.json |\n` +contextMd += `| Review Report | artifacts/review-report.md |\n\n` + +const maxWave = Math.max(...tasks.map(t => t.wave)) +contextMd += `## Wave Execution\n\n` +for (let w = 1; w <= maxWave; w++) { + const waveTasks = tasks.filter(t => t.wave === w) + contextMd += `### Wave ${w}\n\n` + for (const t of waveTasks) { + const icon = t.status === 'completed' ? '[DONE]' : t.status === 'failed' ? '[FAIL]' : '[SKIP]' + contextMd += `${icon} **${t.title}** [${t.role}] ${t.verdict ? `(${t.verdict})` : ''} ${t.findings || ''}\n\n` + } +} + +Write(`${sessionFolder}/context.md`, contextMd) + +console.log(`Results exported to: ${sessionFolder}/results.csv`) +console.log(`Report generated at: ${sessionFolder}/context.md`) +``` + +**Success Criteria**: +- results.csv exported (all tasks, both modes) +- context.md generated with deliverables list +- Summary displayed to user + +--- + +## Shared Discovery Board Protocol + +All agents (csv-wave and interactive) share a single `discoveries.ndjson` file for cross-task knowledge exchange. + +**Format**: One JSON object per line (NDJSON): + +```jsonl +{"ts":"2026-03-08T10:00:00Z","worker":"PROFILE-001","type":"bottleneck_found","data":{"type":"CPU","location":"src/services/DataProcessor.ts:145","severity":"Critical","description":"O(n^2) nested loop in processRecords"}} +{"ts":"2026-03-08T10:05:00Z","worker":"IMPL-001","type":"file_modified","data":{"file":"src/services/DataProcessor.ts","change":"Replaced nested loop with Map lookup","lines_added":8}} +``` + +**Discovery Types**: + +| Type | Data Schema | Description | +|------|-------------|-------------| +| `bottleneck_found` | `{type, location, severity, description}` | Performance bottleneck identified | +| `hotspot_found` | `{file, function, cpu_pct, description}` | CPU hotspot detected | +| `memory_issue` | `{file, type, size_mb, description}` | Memory leak or bloat found | +| `io_issue` | `{operation, latency_ms, description}` | I/O performance issue | +| `file_modified` | `{file, change, lines_added}` | File change recorded | +| `metric_measured` | `{metric, value, unit, context}` | Performance metric measured | +| `pattern_found` | `{pattern_name, location, description}` | Code pattern identified | +| `artifact_produced` | `{name, path, producer, type}` | Deliverable created | + +**Protocol**: +1. Agents MUST read discoveries.ndjson at start of execution +2. Agents MUST append relevant discoveries during execution +3. Agents MUST NOT modify or delete existing entries +4. Deduplication by `{type, data.location}` or `{type, data.file}` key + +--- + +## Error Handling + +| Error | Resolution | +|-------|------------| +| Circular dependency in tasks | Detect in wave computation, abort with error message | +| CSV agent timeout | Mark as failed in results, continue with wave | +| CSV agent failed | Mark as failed, skip dependent tasks in later waves | +| Interactive agent timeout | Urge convergence via send_input, then close if still timed out | +| Interactive agent failed | Mark as failed, skip dependents | +| All agents in wave failed | Log error, offer retry or abort | +| CSV parse error | Validate CSV format before execution, show line number | +| discoveries.ndjson corrupt | Ignore malformed lines, continue with valid entries | +| Review-fix cycle exceeds 3 iterations | Escalate to user with summary of remaining issues | +| Benchmark regression detected | Create FIX task with regression details | +| Profiling tool not available | Fall back to static analysis methods | +| Continue mode: no session found | List available sessions, prompt user to select | + +--- + +## Core Rules + +1. **Start Immediately**: First action is session initialization, then Phase 0/1 +2. **Wave Order is Sacred**: Never execute wave N before wave N-1 completes and results are merged +3. **CSV is Source of Truth**: Master tasks.csv holds all state (both csv-wave and interactive) +4. **CSV First**: Default to csv-wave for tasks; only use interactive when interaction pattern requires it +5. **Context Propagation**: prev_context built from master CSV, not from memory +6. **Discovery Board is Append-Only**: Never clear, modify, or recreate discoveries.ndjson -- both mechanisms share it +7. **Skip on Failure**: If a dependency failed, skip the dependent task (regardless of mechanism) +8. **Max 3 Fix Cycles**: Review-fix cycle capped at 3 iterations; escalate to user after +9. **Cleanup Temp Files**: Remove wave-{N}.csv after results are merged +10. **DO NOT STOP**: Continuous execution until all waves complete or all remaining tasks are skipped diff --git a/.codex/skills/team-perf-opt/agents/completion-handler.md b/.codex/skills/team-perf-opt/agents/completion-handler.md new file mode 100644 index 00000000..04c96f12 --- /dev/null +++ b/.codex/skills/team-perf-opt/agents/completion-handler.md @@ -0,0 +1,141 @@ +# Completion Handler Agent + +Handle pipeline completion action for performance optimization: present results summary with before/after metrics, offer Archive/Keep/Export options, execute chosen action. + +## Identity + +- **Type**: `interactive` +- **Responsibility**: Pipeline completion and session lifecycle management + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Present complete pipeline summary with before/after performance metrics +- Offer completion action choices +- Execute chosen action (archive, keep, export) +- Produce structured output + +### MUST NOT + +- Skip presenting results summary +- Execute destructive actions without confirmation +- Modify source code + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | builtin | Load result artifacts | +| `Write` | builtin | Write export files | +| `Bash` | builtin | Archive/cleanup operations | +| `AskUserQuestion` | builtin | Present completion choices | + +--- + +## Execution + +### Phase 1: Results Collection + +**Objective**: Gather all pipeline results for summary. + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| tasks.csv | Yes | Master task state | +| Baseline metrics | Yes | Pre-optimization metrics | +| Benchmark results | Yes | Post-optimization metrics | +| Review report | Yes | Code review findings | + +**Steps**: + +1. Read tasks.csv -- count completed/failed/skipped +2. Read baseline-metrics.json -- extract before metrics +3. Read benchmark-results.json -- extract after metrics, compute improvements +4. Read review-report.md -- extract final verdict + +**Output**: Compiled results summary with before/after comparison + +--- + +### Phase 2: Present and Choose + +**Objective**: Display results and get user's completion choice. + +**Steps**: + +1. Display pipeline summary with before/after metrics comparison table +2. Present completion action: + +```javascript +AskUserQuestion({ + questions: [{ + question: "Performance optimization complete. What would you like to do?", + header: "Completion", + multiSelect: false, + options: [ + { label: "Archive & Clean (Recommended)", description: "Archive session, output final summary" }, + { label: "Keep Active", description: "Keep session for follow-up work or inspection" }, + { label: "Export Results", description: "Export deliverables to a specified location" } + ] + }] +}) +``` + +**Output**: User's choice + +--- + +### Phase 3: Execute Action + +**Objective**: Execute the chosen completion action. + +| Choice | Action | +|--------|--------| +| Archive & Clean | Copy results.csv and context.md to archive, mark session completed | +| Keep Active | Mark session as paused, leave all artifacts in place | +| Export Results | Copy key deliverables to user-specified location | + +--- + +## Structured Output Template + +``` +## Pipeline Summary +- Tasks: X completed, Y failed, Z skipped +- Duration: estimated from timestamps + +## Performance Improvements +| Metric | Before | After | Improvement | +|--------|--------|-------|-------------| +| metric_1 | value | value | +X% | +| metric_2 | value | value | +X% | + +## Deliverables +- Baseline Metrics: path +- Bottleneck Report: path +- Optimization Plan: path +- Benchmark Results: path +- Review Report: path + +## Action Taken +- Choice: Archive & Clean / Keep Active / Export Results +- Status: completed +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| Result artifacts missing | Report partial summary with available data | +| Archive operation fails | Default to Keep Active | +| Export path invalid | Ask user for valid path | +| Timeout approaching | Default to Keep Active | diff --git a/.codex/skills/team-perf-opt/agents/fix-cycle-handler.md b/.codex/skills/team-perf-opt/agents/fix-cycle-handler.md new file mode 100644 index 00000000..da1ab5a0 --- /dev/null +++ b/.codex/skills/team-perf-opt/agents/fix-cycle-handler.md @@ -0,0 +1,156 @@ +# Fix Cycle Handler Agent + +Manage the review-fix iteration cycle for performance optimization. Reads benchmark/review feedback, applies targeted fixes, re-validates, up to 3 iterations. + +## Identity + +- **Type**: `interactive` +- **Responsibility**: Iterative fix-verify cycle for optimization issues + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Read benchmark results and review report to understand failures +- Apply targeted fixes addressing specific feedback items +- Re-validate after each fix attempt +- Track iteration count (max 3) +- Produce structured output with fix summary + +### MUST NOT + +- Skip reading feedback before attempting fixes +- Apply broad changes unrelated to feedback +- Exceed 3 fix iterations +- Modify code outside the scope of reported issues + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | builtin | Load feedback artifacts and source files | +| `Edit` | builtin | Apply targeted code fixes | +| `Write` | builtin | Write updated artifacts | +| `Bash` | builtin | Run build/test/benchmark validation | +| `Grep` | builtin | Search for patterns | +| `Glob` | builtin | Find files | + +--- + +## Execution + +### Phase 1: Feedback Loading + +**Objective**: Load and parse benchmark/review feedback. + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Benchmark results | Yes (if benchmark failed) | From artifacts/benchmark-results.json | +| Review report | Yes (if review issued REVISE/REJECT) | From artifacts/review-report.md | +| Optimization plan | Yes | Original plan for reference | +| Baseline metrics | Yes | For regression comparison | +| Discoveries | No | Shared findings | + +**Steps**: + +1. Read benchmark-results.json -- identify metrics that failed targets or regressed +2. Read review-report.md -- identify Critical/High findings with file:line references +3. Categorize issues by type and priority: + - Performance regression (benchmark target not met) + - Correctness issue (logic error, race condition) + - Side effect (unintended behavior change) + - Maintainability concern (excessive complexity) + +**Output**: Prioritized list of issues to fix + +--- + +### Phase 2: Fix Implementation (Iterative) + +**Objective**: Apply fixes and re-validate, up to 3 rounds. + +**Steps**: + +For each iteration (1..3): + +1. **Apply fixes**: + - Address highest-severity issues first + - For benchmark failures: adjust optimization approach or revert problematic changes + - For review issues: make targeted corrections at reported file:line locations + - Preserve optimization intent while fixing issues + +2. **Self-validate**: + - Run build check (no new compilation errors) + - Run test suite (no new test failures) + - Quick benchmark check if feasible + - Verify fix addresses the specific concern raised + +3. **Check convergence**: + +| Validation Result | Action | +|-------------------|--------| +| All checks pass | Exit loop, report success | +| Some checks still fail, iteration < 3 | Continue to next iteration | +| Still failing at iteration 3 | Report remaining issues for escalation | + +**Output**: Fix results per iteration + +--- + +### Phase 3: Result Reporting + +**Objective**: Produce final fix cycle summary. + +**Steps**: + +1. Update benchmark-results.json with post-fix metrics if applicable +2. Append fix discoveries to discoveries.ndjson +3. Report final status + +--- + +## Structured Output Template + +``` +## Summary +- Fix cycle completed: N iterations, M issues resolved, K remaining + +## Iterations +### Iteration 1 +- Fixed: [list of fixes applied with file:line] +- Validation: [pass/fail per dimension] + +### Iteration 2 (if needed) +- Fixed: [list of fixes] +- Validation: [pass/fail] + +## Final Status +- verdict: PASS | PARTIAL | ESCALATE +- Remaining issues (if any): [list] + +## Performance Impact +- Metric changes from fixes (if measured) + +## Artifacts Updated +- artifacts/benchmark-results.json (updated metrics, if re-benchmarked) +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| Fix introduces new regression | Revert fix, try alternative approach | +| Cannot reproduce reported issue | Log as resolved-by-environment, continue | +| Fix scope exceeds current files | Report scope expansion needed, escalate | +| Optimization approach fundamentally flawed | Report for strategist escalation | +| Timeout approaching | Output partial results with iteration count | +| 3 iterations exhausted | Report remaining issues for user escalation | diff --git a/.codex/skills/team-perf-opt/agents/plan-reviewer.md b/.codex/skills/team-perf-opt/agents/plan-reviewer.md new file mode 100644 index 00000000..2c40d517 --- /dev/null +++ b/.codex/skills/team-perf-opt/agents/plan-reviewer.md @@ -0,0 +1,150 @@ +# Plan Reviewer Agent + +Review bottleneck report or optimization plan at user checkpoints, providing interactive approval or revision requests. + +## Identity + +- **Type**: `interactive` +- **Responsibility**: Review and approve/revise plans before execution proceeds + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Read the bottleneck report or optimization plan being reviewed +- Produce structured output with clear APPROVE/REVISE verdict +- Include specific file:line references in findings + +### MUST NOT + +- Skip the MANDATORY FIRST STEPS role loading +- Modify source code directly +- Produce unstructured output +- Approve without actually reading the plan + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | builtin | Load plan artifacts and project files | +| `Grep` | builtin | Search for patterns in codebase | +| `Glob` | builtin | Find files by pattern | +| `Bash` | builtin | Run build/test commands | + +### Tool Usage Patterns + +**Read Pattern**: Load context files before review +``` +Read("{session_folder}/artifacts/bottleneck-report.md") +Read("{session_folder}/artifacts/optimization-plan.md") +Read("{session_folder}/discoveries.ndjson") +``` + +--- + +## Execution + +### Phase 1: Context Loading + +**Objective**: Load the plan or report to review. + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Bottleneck report | Yes (if reviewing profiling) | Ranked bottleneck list from profiler | +| Optimization plan | Yes (if reviewing strategy) | Prioritized plan from strategist | +| Discoveries | No | Shared findings from prior stages | + +**Steps**: + +1. Read the artifact being reviewed from session artifacts folder +2. Read discoveries.ndjson for additional context +3. Identify which checkpoint this review corresponds to (CP-1 for profiling, CP-2 for strategy) + +**Output**: Loaded plan context for review + +--- + +### Phase 2: Plan Review + +**Objective**: Evaluate plan quality, completeness, and feasibility. + +**Steps**: + +1. **For bottleneck report review (CP-1)**: + - Verify all performance dimensions are covered (CPU, memory, I/O, network, rendering) + - Check that severity rankings are justified with measured evidence + - Validate baseline metrics are quantified with units and measurement method + - Check scope coverage matches original requirement + +2. **For optimization plan review (CP-2)**: + - Verify each optimization has unique OPT-ID and self-contained detail + - Check priority assignments follow impact/effort matrix + - Validate target files are non-overlapping between optimizations + - Verify success criteria are measurable with specific thresholds + - Check that implementation guidance is actionable + - Assess risk levels and potential side effects + +3. **Issue classification**: + +| Finding Severity | Condition | Impact | +|------------------|-----------|--------| +| Critical | Missing key profiling dimension or infeasible plan | REVISE required | +| High | Unclear criteria or unrealistic targets | REVISE recommended | +| Medium | Minor gaps in coverage or detail | Note for improvement | +| Low | Style or formatting issues | Informational | + +**Output**: Review findings with severity classifications + +--- + +### Phase 3: Verdict + +**Objective**: Issue APPROVE or REVISE verdict. + +| Verdict | Condition | Action | +|---------|-----------|--------| +| APPROVE | No Critical or High findings | Plan is ready for next stage | +| REVISE | Has Critical or High findings | Return specific feedback for revision | + +**Output**: Verdict with detailed feedback + +--- + +## Structured Output Template + +``` +## Summary +- One-sentence verdict: APPROVE or REVISE with rationale + +## Findings +- Finding 1: [severity] description with artifact reference +- Finding 2: [severity] description with specific section reference + +## Verdict +- APPROVE: Plan is ready for execution + OR +- REVISE: Specific items requiring revision + 1. Issue description + suggested fix + 2. Issue description + suggested fix + +## Recommendations +- Optional improvement suggestions (non-blocking) +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| Artifact file not found | Report in findings, request re-generation | +| Plan structure invalid | Report as Critical finding, REVISE verdict | +| Scope mismatch | Report in findings, note for coordinator | +| Timeout approaching | Output current findings with "PARTIAL" status | diff --git a/.codex/skills/team-perf-opt/instructions/agent-instruction.md b/.codex/skills/team-perf-opt/instructions/agent-instruction.md new file mode 100644 index 00000000..af6c5bcf --- /dev/null +++ b/.codex/skills/team-perf-opt/instructions/agent-instruction.md @@ -0,0 +1,122 @@ +## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS +1. Read shared discoveries: {session_folder}/discoveries.ndjson (if exists, skip if not) +2. Read project context: .workflow/project-tech.json (if exists) +3. Read task schema: .codex/skills/team-perf-opt/schemas/tasks-schema.md + +--- + +## Your Task + +**Task ID**: {id} +**Title**: {title} +**Description**: {description} +**Role**: {role} +**Bottleneck Type**: {bottleneck_type} +**Priority**: {priority} +**Target Files**: {target_files} + +### Previous Tasks' Findings (Context) +{prev_context} + +--- + +## Execution Protocol + +1. **Read discoveries**: Load {session_folder}/discoveries.ndjson for shared exploration findings +2. **Use context**: Apply previous tasks' findings from prev_context above +3. **Execute by role**: + + **If role = profiler**: + - Detect project type by scanning for framework markers: + - Frontend (React/Vue/Angular): render time, bundle size, FCP/LCP/CLS + - Backend Node (Express/Fastify/NestJS): CPU hotspots, memory, DB queries + - Native/JVM Backend (Cargo/Go/Java): CPU, memory, GC tuning + - CLI Tool: startup time, throughput, memory peak + - Trace hot code paths and CPU hotspots within target scope + - Identify memory allocation patterns and potential leaks + - Measure I/O and network latency where applicable + - Collect quantified baseline metrics (timing, memory, throughput) + - Rank top 3-5 bottlenecks by severity (Critical/High/Medium) + - Record evidence: file paths, line numbers, measured values + - Write `{session_folder}/artifacts/baseline-metrics.json` (metrics) + - Write `{session_folder}/artifacts/bottleneck-report.md` (ranked bottlenecks) + + **If role = strategist**: + - Read bottleneck report and baseline from {session_folder}/artifacts/ + - For each bottleneck, select optimization strategy by type: + - CPU: algorithm optimization, memoization, caching, worker threads + - MEMORY: pool reuse, lazy init, WeakRef, scope cleanup + - IO: batching, async pipelines, streaming, connection pooling + - NETWORK: request coalescing, compression, CDN, prefetching + - RENDERING: virtualization, memoization, CSS containment, code splitting + - DATABASE: index optimization, query rewriting, caching layer + - Prioritize by impact/effort: P0 (high impact+low effort) to P3 + - Assign unique OPT-IDs (OPT-001, 002, ...) with non-overlapping file targets + - Define measurable success criteria (target metric value or improvement %) + - Write `{session_folder}/artifacts/optimization-plan.md` + + **If role = optimizer**: + - Read optimization plan from {session_folder}/artifacts/optimization-plan.md + - Apply optimizations in priority order (P0 first) + - Preserve existing behavior -- optimization must not break functionality + - Make minimal, focused changes per optimization + - Add comments only where optimization logic is non-obvious + - Preserve existing code style and conventions + + **If role = benchmarker**: + - Read baseline from {session_folder}/artifacts/baseline-metrics.json + - Read plan from {session_folder}/artifacts/optimization-plan.md + - Run benchmarks matching detected project type: + - Frontend: bundle size, render performance + - Backend: endpoint response times, memory under load, DB query times + - CLI: execution time, memory peak, throughput + - Run test suite to verify no regressions + - Collect post-optimization metrics matching baseline format + - Calculate improvement percentages per metric + - Compare against plan success criteria + - Write `{session_folder}/artifacts/benchmark-results.json` + - Set verdict: PASS (meets criteria) / WARN (partial) / FAIL (regression or criteria not met) + + **If role = reviewer**: + - Read plan from {session_folder}/artifacts/optimization-plan.md + - Review changed files across 5 dimensions: + - Correctness: logic errors, race conditions, null safety + - Side effects: unintended behavior changes, API contract breaks + - Maintainability: code clarity, complexity increase, naming + - Regression risk: impact on unrelated code paths + - Best practices: idiomatic patterns, no optimization anti-patterns + - Write `{session_folder}/artifacts/review-report.md` + - Set verdict: APPROVE / REVISE / REJECT + +4. **Share discoveries**: Append exploration findings to shared board: + ```bash + echo '{"ts":"","worker":"{id}","type":"","data":{...}}' >> {session_folder}/discoveries.ndjson + ``` +5. **Report result**: Return JSON via report_agent_job_result + +### Discovery Types to Share +- `bottleneck_found`: `{type, location, severity, description}` -- Bottleneck identified +- `hotspot_found`: `{file, function, cpu_pct, description}` -- CPU hotspot +- `memory_issue`: `{file, type, size_mb, description}` -- Memory problem +- `io_issue`: `{operation, latency_ms, description}` -- I/O issue +- `db_issue`: `{query, latency_ms, description}` -- Database issue +- `file_modified`: `{file, change, lines_added}` -- File change recorded +- `metric_measured`: `{metric, value, unit, context}` -- Metric measured +- `pattern_found`: `{pattern_name, location, description}` -- Pattern identified +- `artifact_produced`: `{name, path, producer, type}` -- Deliverable created + +--- + +## Output (report_agent_job_result) + +Return JSON: +{ + "id": "{id}", + "status": "completed" | "failed", + "findings": "Key discoveries and implementation notes (max 500 chars)", + "verdict": "PASS|WARN|FAIL|APPROVE|REVISE|REJECT or empty", + "artifacts_produced": "semicolon-separated artifact paths", + "error": "" +} diff --git a/.codex/skills/team-perf-opt/schemas/tasks-schema.md b/.codex/skills/team-perf-opt/schemas/tasks-schema.md new file mode 100644 index 00000000..072c5d84 --- /dev/null +++ b/.codex/skills/team-perf-opt/schemas/tasks-schema.md @@ -0,0 +1,174 @@ +# Team Performance Optimization -- CSV Schema + +## Master CSV: tasks.csv + +### Column Definitions + +#### Input Columns (Set by Decomposer) + +| Column | Type | Required | Description | Example | +|--------|------|----------|-------------|---------| +| `id` | string | Yes | Unique task identifier (PREFIX-NNN) | `"PROFILE-001"` | +| `title` | string | Yes | Short task title | `"Profile performance"` | +| `description` | string | Yes | Detailed task description (self-contained) with goal, inputs, outputs, success criteria | `"Profile application performance..."` | +| `role` | enum | Yes | Worker role: `profiler`, `strategist`, `optimizer`, `benchmarker`, `reviewer` | `"profiler"` | +| `bottleneck_type` | string | No | Performance bottleneck category: CPU, MEMORY, IO, NETWORK, RENDERING, DATABASE | `"CPU"` | +| `priority` | enum | No | P0 (Critical), P1 (High), P2 (Medium), P3 (Low) | `"P0"` | +| `target_files` | string | No | Semicolon-separated file paths to focus on | `"src/services/DataProcessor.ts"` | +| `deps` | string | No | Semicolon-separated dependency task IDs | `"PROFILE-001"` | +| `context_from` | string | No | Semicolon-separated task IDs for context | `"PROFILE-001"` | +| `exec_mode` | enum | Yes | Execution mechanism: `csv-wave` or `interactive` | `"csv-wave"` | + +#### Computed Columns (Set by Wave Engine) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `wave` | integer | Wave number (1-based, from topological sort) | `2` | +| `prev_context` | string | Aggregated findings from context_from tasks (per-wave CSV only) | `"[PROFILE-001] Found 3 CPU hotspots..."` | + +#### Output Columns (Set by Agent) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `status` | enum | `pending` -> `completed` / `failed` / `skipped` | `"completed"` | +| `findings` | string | Key discoveries (max 500 chars) | `"Found 3 CPU hotspots, 1 memory leak..."` | +| `verdict` | string | Benchmark/review verdict: PASS, WARN, FAIL, APPROVE, REVISE, REJECT | `"PASS"` | +| `artifacts_produced` | string | Semicolon-separated paths of produced artifacts | `"artifacts/bottleneck-report.md"` | +| `error` | string | Error message if failed | `""` | + +--- + +### exec_mode Values + +| Value | Mechanism | Description | +|-------|-----------|-------------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot batch execution within wave | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round individual execution | + +Interactive tasks appear in master CSV for dependency tracking but are NOT included in wave-{N}.csv files. + +--- + +### Role Prefix Mapping + +| Role | Prefix | Stage | Responsibility | +|------|--------|-------|----------------| +| profiler | PROFILE | 1 | Performance profiling, baseline metrics, bottleneck identification | +| strategist | STRATEGY | 2 | Optimization plan design, strategy selection, prioritization | +| optimizer | IMPL / FIX | 3 | Code implementation, optimization application, targeted fixes | +| benchmarker | BENCH | 4 | Benchmark execution, before/after comparison, regression detection | +| reviewer | REVIEW | 4 | Code review for correctness, side effects, regression risks | + +--- + +### Example Data + +```csv +id,title,description,role,bottleneck_type,priority,target_files,deps,context_from,exec_mode,wave,status,findings,verdict,artifacts_produced,error +"PROFILE-001","Profile performance","PURPOSE: Profile application performance to identify bottlenecks\nTASK:\n- Detect project type (frontend/backend/CLI)\n- Trace hot code paths and CPU hotspots\n- Identify memory allocation patterns and leaks\n- Measure I/O and network latency\n- Collect quantified baseline metrics\nINPUT: Codebase under target scope\nOUTPUT: artifacts/baseline-metrics.json + artifacts/bottleneck-report.md\nSUCCESS: Ranked bottleneck list with severity, baseline metrics collected\nSESSION: .workflow/.csv-wave/perf-example-20260308","profiler","","","","","","csv-wave","1","pending","","","","" +"STRATEGY-001","Design optimization plan","PURPOSE: Design prioritized optimization plan from bottleneck report\nTASK:\n- For each bottleneck, select optimization strategy\n- Prioritize by impact/effort ratio (P0-P3)\n- Define measurable success criteria per optimization\n- Assign unique OPT-IDs with non-overlapping file targets\nINPUT: artifacts/bottleneck-report.md + artifacts/baseline-metrics.json\nOUTPUT: artifacts/optimization-plan.md\nSUCCESS: Prioritized plan with self-contained OPT blocks\nSESSION: .workflow/.csv-wave/perf-example-20260308","strategist","","","","PROFILE-001","PROFILE-001","csv-wave","2","pending","","","","" +"IMPL-001","Implement optimizations","PURPOSE: Implement performance optimizations per plan\nTASK:\n- Apply optimizations in priority order (P0 first)\n- Preserve existing behavior\n- Make minimal, focused changes\nINPUT: artifacts/optimization-plan.md\nOUTPUT: Modified source files\nSUCCESS: All planned optimizations applied, no functionality regressions\nSESSION: .workflow/.csv-wave/perf-example-20260308","optimizer","","","","STRATEGY-001","STRATEGY-001","csv-wave","3","pending","","","","" +"BENCH-001","Benchmark improvements","PURPOSE: Benchmark before/after optimization metrics\nTASK:\n- Run benchmarks matching detected project type\n- Compare post-optimization metrics vs baseline\n- Calculate improvement percentages\n- Detect any regressions\nINPUT: artifacts/baseline-metrics.json + artifacts/optimization-plan.md\nOUTPUT: artifacts/benchmark-results.json\nSUCCESS: All target improvements met, no regressions\nSESSION: .workflow/.csv-wave/perf-example-20260308","benchmarker","","","","IMPL-001","IMPL-001","csv-wave","4","pending","","","","" +"REVIEW-001","Review optimization code","PURPOSE: Review optimization changes for correctness and quality\nTASK:\n- Correctness: logic errors, race conditions, null safety\n- Side effects: unintended behavior changes, API breaks\n- Maintainability: code clarity, complexity, naming\n- Regression risk: impact on unrelated code paths\n- Best practices: idiomatic patterns, no anti-patterns\nINPUT: artifacts/optimization-plan.md + changed files\nOUTPUT: artifacts/review-report.md\nSUCCESS: APPROVE verdict (no Critical/High findings)\nSESSION: .workflow/.csv-wave/perf-example-20260308","reviewer","","","","IMPL-001","IMPL-001","csv-wave","4","pending","","","","" +``` + +--- + +### Column Lifecycle + +``` +Decomposer (Phase 1) Wave Engine (Phase 2) Agent (Execution) +--------------------- -------------------- ----------------- +id ----------> id ----------> id +title ----------> title ----------> (reads) +description ----------> description ----------> (reads) +role ----------> role ----------> (reads) +bottleneck_type--------> bottleneck_type--------> (reads) +priority ----------> priority ----------> (reads) +target_files----------> target_files----------> (reads) +deps ----------> deps ----------> (reads) +context_from----------> context_from----------> (reads) +exec_mode ----------> exec_mode ----------> (reads) + wave ----------> (reads) + prev_context ----------> (reads) + status + findings + verdict + artifacts_produced + error +``` + +--- + +## Output Schema (JSON) + +Agent output via `report_agent_job_result` (csv-wave tasks): + +```json +{ + "id": "PROFILE-001", + "status": "completed", + "findings": "Found 3 CPU hotspots: O(n^2) in DataProcessor.processRecords (Critical), unoptimized regex in Validator.check (High), synchronous file reads in ConfigLoader (Medium). Memory baseline: 145MB peak, 2 potential leak sites.", + "verdict": "", + "artifacts_produced": "artifacts/baseline-metrics.json;artifacts/bottleneck-report.md", + "error": "" +} +``` + +Interactive tasks output via structured text or JSON written to `interactive/{id}-result.json`. + +--- + +## Discovery Types + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `bottleneck_found` | `data.location` | `{type, location, severity, description}` | Performance bottleneck identified | +| `hotspot_found` | `data.file+data.function` | `{file, function, cpu_pct, description}` | CPU hotspot detected | +| `memory_issue` | `data.file+data.type` | `{file, type, size_mb, description}` | Memory leak or bloat | +| `io_issue` | `data.operation` | `{operation, latency_ms, description}` | I/O performance issue | +| `db_issue` | `data.query` | `{query, latency_ms, description}` | Database performance issue | +| `file_modified` | `data.file` | `{file, change, lines_added}` | File change recorded | +| `metric_measured` | `data.metric+data.context` | `{metric, value, unit, context}` | Performance metric measured | +| `pattern_found` | `data.pattern_name+data.location` | `{pattern_name, location, description}` | Code pattern identified | +| `artifact_produced` | `data.path` | `{name, path, producer, type}` | Deliverable created | + +### Discovery NDJSON Format + +```jsonl +{"ts":"2026-03-08T10:00:00Z","worker":"PROFILE-001","type":"bottleneck_found","data":{"type":"CPU","location":"src/services/DataProcessor.ts:145","severity":"Critical","description":"O(n^2) nested loop in processRecords, 850ms for 10k records"}} +{"ts":"2026-03-08T10:01:00Z","worker":"PROFILE-001","type":"hotspot_found","data":{"file":"src/services/DataProcessor.ts","function":"processRecords","cpu_pct":42,"description":"Accounts for 42% of CPU time in profiling run"}} +{"ts":"2026-03-08T10:02:00Z","worker":"PROFILE-001","type":"metric_measured","data":{"metric":"response_time_p95","value":1250,"unit":"ms","context":"GET /api/dashboard"}} +{"ts":"2026-03-08T10:15:00Z","worker":"IMPL-001","type":"file_modified","data":{"file":"src/services/DataProcessor.ts","change":"Replaced O(n^2) with Map lookup O(n)","lines_added":12}} +{"ts":"2026-03-08T10:25:00Z","worker":"BENCH-001","type":"metric_measured","data":{"metric":"response_time_p95","value":380,"unit":"ms","context":"GET /api/dashboard (after optimization)"}} +``` + +> Both csv-wave and interactive agents read/write the same discoveries.ndjson file. + +--- + +## Cross-Mechanism Context Flow + +| Source | Target | Mechanism | +|--------|--------|-----------| +| CSV task findings | Interactive task | Injected via spawn message or send_input | +| Interactive task result | CSV task prev_context | Read from interactive/{id}-result.json | +| Any agent discovery | Any agent | Shared via discoveries.ndjson | + +--- + +## Validation Rules + +| Rule | Check | Error | +|------|-------|-------| +| Unique IDs | No duplicate `id` values | "Duplicate task ID: {id}" | +| Valid deps | All dep IDs exist in tasks | "Unknown dependency: {dep_id}" | +| No self-deps | Task cannot depend on itself | "Self-dependency: {id}" | +| No circular deps | Topological sort completes | "Circular dependency detected involving: {ids}" | +| context_from valid | All context IDs exist and in earlier waves | "Invalid context_from: {id}" | +| exec_mode valid | Value is `csv-wave` or `interactive` | "Invalid exec_mode: {value}" | +| Description non-empty | Every task has description | "Empty description for task: {id}" | +| Status enum | status in {pending, completed, failed, skipped} | "Invalid status: {status}" | +| Role valid | role in {profiler, strategist, optimizer, benchmarker, reviewer} | "Invalid role: {role}" | +| Verdict enum | verdict in {PASS, WARN, FAIL, APPROVE, REVISE, REJECT, ""} | "Invalid verdict: {verdict}" | +| Cross-mechanism deps | Interactive to CSV deps resolve correctly | "Cross-mechanism dependency unresolvable: {id}" | diff --git a/.codex/skills/team-planex-v2/SKILL.md b/.codex/skills/team-planex-v2/SKILL.md new file mode 100644 index 00000000..77692668 --- /dev/null +++ b/.codex/skills/team-planex-v2/SKILL.md @@ -0,0 +1,599 @@ +--- +name: team-planex-v2 +description: Hybrid team skill for plan-and-execute pipeline. CSV wave primary for planning and execution. Planner decomposes requirements into issues and solutions, then executor implements each via CLI tools. Supports issue IDs, text input, and plan file input. +argument-hint: "[-y|--yes] [-c|--concurrency N] [--continue] [--exec=codex|gemini] \"issue IDs or --text 'description' or --plan path\"" +allowed-tools: spawn_agents_on_csv, spawn_agent, wait, send_input, close_agent, Read, Write, Edit, Bash, Glob, Grep, AskUserQuestion +--- + +## Auto Mode + +When `--yes` or `-y`: Auto-confirm task decomposition, skip interactive validation, use defaults. + +# Team PlanEx + +## Usage + +```bash +$team-planex-v2 "ISS-20260308-120000 ISS-20260308-120001" +$team-planex-v2 -c 3 "--text 'Add rate limiting to all API endpoints'" +$team-planex-v2 -y "--plan .workflow/specs/roadmap.md --exec=codex" +$team-planex-v2 --continue "planex-rate-limit-20260308" +``` + +**Flags**: +- `-y, --yes`: Skip all confirmations (auto mode) +- `-c, --concurrency N`: Max concurrent agents within each wave (default: 3) +- `--continue`: Resume existing session +- `--exec=codex|gemini|qwen`: Force execution method for implementation + +**Output Directory**: `.workflow/.csv-wave/{session-id}/` +**Core Output**: `tasks.csv` (master state) + `results.csv` (final) + `discoveries.ndjson` (shared exploration) + `context.md` (human-readable report) + +--- + +## Overview + +Plan-and-execute pipeline for issue-based development. Planner decomposes requirements into individual issues with solution plans, then executors implement each issue independently. + +**Execution Model**: Hybrid -- CSV wave pipeline (primary) + individual agent spawn (secondary) + +``` ++---------------------------------------------------------------------------+ +| TEAM PLANEX WORKFLOW | ++---------------------------------------------------------------------------+ +| | +| Phase 0: Pre-Wave Interactive (Input Analysis) | +| +-- Parse input type (issue IDs / --text / --plan) | +| +-- Determine execution method (codex/gemini/auto) | +| +-- Create issues from text/plan if needed | +| +-- Output: refined issue list for decomposition | +| | +| Phase 1: Requirement -> CSV + Classification | +| +-- Planning wave: generate solutions for each issue | +| +-- Execution wave: implement each issue independently | +| +-- Classify tasks: csv-wave (default) | interactive | +| +-- Compute dependency waves (topological sort) | +| +-- Generate tasks.csv with wave + exec_mode columns | +| +-- User validates task breakdown (skip if -y) | +| | +| Phase 2: Wave Execution Engine (Extended) | +| +-- For each wave (1..N): | +| | +-- Build wave CSV (filter csv-wave tasks for this wave) | +| | +-- Inject previous findings into prev_context column | +| | +-- spawn_agents_on_csv(wave CSV) | +| | +-- Merge all results into master tasks.csv | +| | +-- Check: any failed? -> skip dependents | +| +-- discoveries.ndjson shared across all modes (append-only) | +| | +| Phase 3: Results Aggregation | +| +-- Export final results.csv | +| +-- Generate context.md with all findings | +| +-- Display summary: completed/failed/skipped per wave | +| +-- Offer: view results | retry failed | done | +| | ++---------------------------------------------------------------------------+ +``` + +--- + +## Task Classification Rules + +Each task is classified by `exec_mode`: + +| exec_mode | Mechanism | Criteria | +|-----------|-----------|----------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot, structured I/O, no multi-round interaction | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round, clarification needed | + +**Classification Decision**: + +| Task Property | Classification | +|---------------|---------------| +| Solution planning per issue (PLAN-*) | `csv-wave` | +| Code implementation per issue (EXEC-*) | `csv-wave` | +| Complex multi-issue coordination (rare) | `interactive` | + +> In the standard PlanEx pipeline, all tasks default to `csv-wave`. Interactive mode is reserved for edge cases requiring multi-round coordination. + +--- + +## CSV Schema + +### tasks.csv (Master State) + +```csv +id,title,description,role,issue_ids,input_type,raw_input,exec_mode,execution_method,deps,context_from,wave,status,findings,artifact_path,error +"PLAN-001","Plan issue-1","Generate solution for ISS-20260308-120000","planner","ISS-20260308-120000","issues","ISS-20260308-120000","csv-wave","","","","1","pending","","","" +"PLAN-002","Plan issue-2","Generate solution for ISS-20260308-120001","planner","ISS-20260308-120001","issues","ISS-20260308-120001","csv-wave","","","","1","pending","","","" +"EXEC-001","Implement issue-1","Implement solution for ISS-20260308-120000","executor","ISS-20260308-120000","","","csv-wave","gemini","PLAN-001","PLAN-001","2","pending","","","" +"EXEC-002","Implement issue-2","Implement solution for ISS-20260308-120001","executor","ISS-20260308-120001","","","csv-wave","gemini","PLAN-002","PLAN-002","2","pending","","","" +``` + +**Columns**: + +| Column | Phase | Description | +|--------|-------|-------------| +| `id` | Input | Unique task identifier (PLAN-NNN, EXEC-NNN) | +| `title` | Input | Short task title | +| `description` | Input | Detailed task description | +| `role` | Input | Worker role: planner or executor | +| `issue_ids` | Input | Semicolon-separated issue IDs this task covers | +| `input_type` | Input | Input type: issues, text, or plan (planner tasks only) | +| `raw_input` | Input | Raw input text (planner tasks only) | +| `exec_mode` | Input | `csv-wave` or `interactive` | +| `execution_method` | Input | codex, gemini, qwen, or empty (executor tasks only) | +| `deps` | Input | Semicolon-separated dependency task IDs | +| `context_from` | Input | Semicolon-separated task IDs whose findings this task needs | +| `wave` | Computed | Wave number (computed by topological sort, 1-based) | +| `status` | Output | `pending` -> `completed` / `failed` / `skipped` | +| `findings` | Output | Key discoveries or implementation notes (max 500 chars) | +| `artifact_path` | Output | Path to generated artifact (solution file, build result) | +| `error` | Output | Error message if failed (empty if success) | + +### Per-Wave CSV (Temporary) + +Each wave generates a temporary `wave-{N}.csv` with extra `prev_context` column (csv-wave tasks only). + +--- + +## Output Artifacts + +| File | Purpose | Lifecycle | +|------|---------|-----------| +| `tasks.csv` | Master state -- all tasks with status/findings | Updated after each wave | +| `wave-{N}.csv` | Per-wave input (temporary, csv-wave tasks only) | Created before wave, deleted after | +| `results.csv` | Final export of all task results | Created in Phase 3 | +| `discoveries.ndjson` | Shared exploration board (all agents) | Append-only, carries across waves | +| `context.md` | Human-readable execution report | Created in Phase 3 | +| `artifacts/solutions/{issueId}.json` | Planner solution artifacts | Created by planner agents | +| `builds/{issueId}.json` | Executor build results | Created by executor agents | + +--- + +## Session Structure + +``` +.workflow/.csv-wave/{session-id}/ ++-- tasks.csv # Master state (all tasks) ++-- results.csv # Final results export ++-- discoveries.ndjson # Shared discovery board ++-- context.md # Human-readable report ++-- wave-{N}.csv # Temporary per-wave input ++-- artifacts/ +| +-- solutions/ # Planner output +| +-- {issueId}.json ++-- builds/ # Executor output +| +-- {issueId}.json ++-- wisdom/ # Cross-task knowledge + +-- learnings.md + +-- decisions.md + +-- conventions.md + +-- issues.md +``` + +--- + +## Implementation + +### Session Initialization + +```javascript +const getUtc8ISOString = () => new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString() + +const AUTO_YES = $ARGUMENTS.includes('--yes') || $ARGUMENTS.includes('-y') +const continueMode = $ARGUMENTS.includes('--continue') +const concurrencyMatch = $ARGUMENTS.match(/(?:--concurrency|-c)\s+(\d+)/) +const maxConcurrency = concurrencyMatch ? parseInt(concurrencyMatch[1]) : 3 + +const requirement = $ARGUMENTS + .replace(/--yes|-y|--continue|--concurrency\s+\d+|-c\s+\d+/g, '') + .trim() + +// Parse execution method +let executionMethod = 'gemini' // default +const execMatch = requirement.match(/--exec=(\w+)/) +if (execMatch) executionMethod = execMatch[1] + +// Detect input type +const issueIdPattern = /ISS-\d{8}-\d{6}/g +const textMatch = requirement.match(/--text\s+'([^']+)'/) +const planMatch = requirement.match(/--plan\s+(\S+)/) + +let inputType = 'issues' +let rawInput = requirement +let issueIds = requirement.match(issueIdPattern) || [] + +if (textMatch) { + inputType = 'text' + rawInput = textMatch[1] + issueIds = [] // will be created by planner +} else if (planMatch) { + inputType = 'plan' + rawInput = planMatch[1] + issueIds = [] // will be parsed from plan file +} + +// If no input detected, ask user +if (issueIds.length === 0 && inputType === 'issues') { + const answer = AskUserQuestion("No input detected. Provide issue IDs, or use --text 'description' or --plan :") + issueIds = answer.match(issueIdPattern) || [] + if (issueIds.length === 0 && !answer.includes('--text') && !answer.includes('--plan')) { + inputType = 'text' + rawInput = answer + } +} + +// Execution method selection (interactive if no flag) +if (!execMatch && !AUTO_YES) { + const methodChoice = AskUserQuestion({ + questions: [{ question: "Select execution method for implementation:", + options: [ + { label: "Gemini", description: "gemini-2.5-pro (recommended for <= 3 tasks)" }, + { label: "Codex", description: "gpt-5.2 (recommended for > 3 tasks)" }, + { label: "Auto", description: "Auto-select based on task count" } + ] + }] + }) + if (methodChoice === 'Codex') executionMethod = 'codex' + else if (methodChoice === 'Auto') executionMethod = 'auto' +} + +const slug = (issueIds[0] || rawInput).toLowerCase() + .replace(/[^a-z0-9\u4e00-\u9fa5]+/g, '-') + .substring(0, 30) +const dateStr = getUtc8ISOString().substring(0, 10).replace(/-/g, '') +const sessionId = `planex-${slug}-${dateStr}` +const sessionFolder = `.workflow/.csv-wave/${sessionId}` + +Bash(`mkdir -p ${sessionFolder}/{artifacts/solutions,builds,wisdom}`) + +Write(`${sessionFolder}/discoveries.ndjson`, `# Discovery Board - ${sessionId}\n# Format: NDJSON\n`) + +// Initialize wisdom files +Write(`${sessionFolder}/wisdom/learnings.md`, `# Learnings\n\nAccumulated during ${sessionId}\n`) +Write(`${sessionFolder}/wisdom/decisions.md`, `# Decisions\n\n`) +Write(`${sessionFolder}/wisdom/conventions.md`, `# Conventions\n\n`) +Write(`${sessionFolder}/wisdom/issues.md`, `# Issues\n\n`) + +// Store session metadata +Write(`${sessionFolder}/session.json`, JSON.stringify({ + session_id: sessionId, + pipeline_type: 'plan-execute', + input_type: inputType, + raw_input: rawInput, + issue_ids: issueIds, + execution_method: executionMethod, + created_at: getUtc8ISOString() +}, null, 2)) +``` + +--- + +### Phase 0: Pre-Wave Interactive (Input Analysis) + +**Objective**: Parse and normalize input into a list of issue IDs ready for the planning wave. + +**Input Type Handling**: + +| Input Type | Processing | +|------------|-----------| +| `issues` (ISS-* IDs) | Use directly, verify exist via `ccw issue status` | +| `text` (--text flag) | Create issues via `ccw issue create --title ... --context ...` | +| `plan` (--plan flag) | Read plan file, parse phases/tasks, batch create issues | + +For `text` input: +```bash +# Create issue from text description +ccw issue create --title "" --context "" +# Parse output for new issue ID +``` + +For `plan` input: +```bash +# Read plan file +planContent = Read("") +# Parse phases/sections into individual issues +# Create each as a separate issue via ccw issue create +``` + +After processing, update session.json with resolved issue_ids. + +**Success Criteria**: +- All inputs resolved to valid issue IDs +- Session metadata updated with final issue list + +--- + +### Phase 1: Requirement -> CSV + Classification + +**Objective**: Generate tasks.csv with PLAN-* tasks (wave 1) and EXEC-* tasks (wave 2). + +**Two-Wave Structure**: + +Wave 1 (Planning): One PLAN-NNN task per issue, all independent (no deps), concurrent execution. +Wave 2 (Execution): One EXEC-NNN task per issue, each depends on its corresponding PLAN-NNN. + +**Task Generation**: + +```javascript +const tasks = [] + +// Wave 1: Planning tasks (one per issue) +for (let i = 0; i < issueIds.length; i++) { + const n = String(i + 1).padStart(3, '0') + tasks.push({ + id: `PLAN-${n}`, + title: `Plan ${issueIds[i]}`, + description: `Generate implementation solution for issue ${issueIds[i]}. Analyze requirements, design solution approach, break down into implementation tasks, identify files to modify/create.`, + role: 'planner', + issue_ids: issueIds[i], + input_type: inputType, + raw_input: inputType === 'issues' ? issueIds[i] : rawInput, + exec_mode: 'csv-wave', + execution_method: '', + deps: '', + context_from: '', + wave: '1', + status: 'pending', + findings: '', artifact_path: '', error: '' + }) +} + +// Wave 2: Execution tasks (one per issue, depends on corresponding PLAN) +for (let i = 0; i < issueIds.length; i++) { + const n = String(i + 1).padStart(3, '0') + // Resolve execution method + let method = executionMethod + if (method === 'auto') { + method = issueIds.length <= 3 ? 'gemini' : 'codex' + } + tasks.push({ + id: `EXEC-${n}`, + title: `Implement ${issueIds[i]}`, + description: `Implement solution for issue ${issueIds[i]}. Load solution artifact, execute implementation via CLI, run tests, commit.`, + role: 'executor', + issue_ids: issueIds[i], + input_type: '', + raw_input: '', + exec_mode: 'csv-wave', + execution_method: method, + deps: `PLAN-${n}`, + context_from: `PLAN-${n}`, + wave: '2', + status: 'pending', + findings: '', artifact_path: '', error: '' + }) +} + +Write(`${sessionFolder}/tasks.csv`, toCsv(tasks)) +``` + +**User Validation**: Display task breakdown with wave assignment (skip if AUTO_YES). + +**Success Criteria**: +- tasks.csv created with valid schema and wave assignments +- PLAN-* tasks in wave 1, EXEC-* tasks in wave 2 +- Each EXEC-* depends on its corresponding PLAN-* +- No circular dependencies +- User approved (or AUTO_YES) + +--- + +### Phase 2: Wave Execution Engine (Extended) + +**Objective**: Execute tasks wave-by-wave with context propagation between planning and execution waves. + +```javascript +const masterCsv = Read(`${sessionFolder}/tasks.csv`) +let tasks = parseCsv(masterCsv) +const maxWave = Math.max(...tasks.map(t => parseInt(t.wave))) + +for (let wave = 1; wave <= maxWave; wave++) { + console.log(`\nWave ${wave}/${maxWave} (${wave === 1 ? 'Planning' : 'Execution'})`) + + // 1. Filter tasks for this wave + const waveTasks = tasks.filter(t => parseInt(t.wave) === wave && t.status === 'pending') + + // 2. Check dependencies - skip if upstream failed + for (const task of waveTasks) { + const depIds = (task.deps || '').split(';').filter(Boolean) + const depStatuses = depIds.map(id => tasks.find(t => t.id === id)?.status) + if (depStatuses.some(s => s === 'failed' || s === 'skipped')) { + task.status = 'skipped' + task.error = `Dependency failed: ${depIds.filter((id, i) => + ['failed','skipped'].includes(depStatuses[i])).join(', ')}` + } + } + + const pendingTasks = waveTasks.filter(t => t.status === 'pending') + if (pendingTasks.length === 0) { + console.log(`Wave ${wave}: No pending tasks, skipping...`) + continue + } + + // 3. Build prev_context from completed upstream tasks + for (const task of pendingTasks) { + const contextIds = (task.context_from || '').split(';').filter(Boolean) + const prevFindings = contextIds.map(id => { + const src = tasks.find(t => t.id === id) + if (!src?.findings) return '' + return `## [${src.id}] ${src.title}\n${src.findings}\nArtifact: ${src.artifact_path || 'N/A'}` + }).filter(Boolean).join('\n\n') + task.prev_context = prevFindings + } + + // 4. Write wave CSV + Write(`${sessionFolder}/wave-${wave}.csv`, toCsv(pendingTasks)) + + // 5. Execute wave + spawn_agents_on_csv({ + csv_path: `${sessionFolder}/wave-${wave}.csv`, + id_column: "id", + instruction: Read(".codex/skills/team-planex/instructions/agent-instruction.md"), + max_concurrency: maxConcurrency, + max_runtime_seconds: 1200, + output_csv_path: `${sessionFolder}/wave-${wave}-results.csv`, + output_schema: { + type: "object", + properties: { + id: { type: "string" }, + status: { type: "string", enum: ["completed", "failed"] }, + findings: { type: "string" }, + artifact_path: { type: "string" }, + error: { type: "string" } + } + } + }) + + // 6. Merge results into master CSV + const results = parseCsv(Read(`${sessionFolder}/wave-${wave}-results.csv`)) + for (const r of results) { + const t = tasks.find(t => t.id === r.id) + if (t) Object.assign(t, r) + } + Write(`${sessionFolder}/tasks.csv`, toCsv(tasks)) + + // 7. Cleanup temp files + Bash(`rm -f ${sessionFolder}/wave-${wave}.csv ${sessionFolder}/wave-${wave}-results.csv`) + + // 8. Display wave summary + const completed = results.filter(r => r.status === 'completed').length + const failed = results.filter(r => r.status === 'failed').length + console.log(`Wave ${wave} Complete: ${completed} completed, ${failed} failed`) +} +``` + +**Success Criteria**: +- All waves executed in order +- Each wave's results merged into master CSV before next wave starts +- Dependent tasks skipped when predecessor failed +- discoveries.ndjson accumulated across all waves +- Planning wave completes before execution wave starts + +--- + +### Phase 3: Results Aggregation + +**Objective**: Generate final results and human-readable report. + +```javascript +const tasks = parseCsv(Read(`${sessionFolder}/tasks.csv`)) +const completed = tasks.filter(t => t.status === 'completed') +const failed = tasks.filter(t => t.status === 'failed') +const skipped = tasks.filter(t => t.status === 'skipped') + +const planTasks = tasks.filter(t => t.role === 'planner') +const execTasks = tasks.filter(t => t.role === 'executor') + +// Export results.csv +Bash(`cp ${sessionFolder}/tasks.csv ${sessionFolder}/results.csv`) + +// Generate context.md +let contextMd = `# PlanEx Pipeline Report\n\n` +contextMd += `**Session**: ${sessionId}\n` +contextMd += `**Input Type**: ${inputType}\n` +contextMd += `**Execution Method**: ${executionMethod}\n` +contextMd += `**Issues**: ${issueIds.join(', ')}\n\n` + +contextMd += `## Summary\n\n` +contextMd += `| Status | Count |\n|--------|-------|\n` +contextMd += `| Completed | ${completed.length} |\n` +contextMd += `| Failed | ${failed.length} |\n` +contextMd += `| Skipped | ${skipped.length} |\n\n` + +contextMd += `## Planning Wave\n\n` +for (const t of planTasks) { + const icon = t.status === 'completed' ? '[OK]' : t.status === 'failed' ? '[FAIL]' : '[SKIP]' + contextMd += `${icon} **${t.id}**: ${t.title}\n` + if (t.findings) contextMd += ` ${t.findings.substring(0, 200)}\n` + if (t.artifact_path) contextMd += ` Solution: ${t.artifact_path}\n` + contextMd += `\n` +} + +contextMd += `## Execution Wave\n\n` +for (const t of execTasks) { + const icon = t.status === 'completed' ? '[OK]' : t.status === 'failed' ? '[FAIL]' : '[SKIP]' + contextMd += `${icon} **${t.id}**: ${t.title}\n` + if (t.findings) contextMd += ` ${t.findings.substring(0, 200)}\n` + if (t.error) contextMd += ` Error: ${t.error}\n` + contextMd += `\n` +} + +contextMd += `## Deliverables\n\n` +contextMd += `| Artifact | Path |\n|----------|------|\n` +contextMd += `| Solution Plans | ${sessionFolder}/artifacts/solutions/ |\n` +contextMd += `| Build Results | ${sessionFolder}/builds/ |\n` +contextMd += `| Discovery Board | ${sessionFolder}/discoveries.ndjson |\n` + +Write(`${sessionFolder}/context.md`, contextMd) + +// Display summary +console.log(` +PlanEx Pipeline Complete +Input: ${inputType} (${issueIds.length} issues) +Planning: ${planTasks.filter(t => t.status === 'completed').length}/${planTasks.length} completed +Execution: ${execTasks.filter(t => t.status === 'completed').length}/${execTasks.length} completed +Failed: ${failed.length} | Skipped: ${skipped.length} +Output: ${sessionFolder} +`) +``` + +**Success Criteria**: +- results.csv exported (all tasks) +- context.md generated +- Summary displayed to user + +--- + +## Shared Discovery Board Protocol + +Both planner and executor agents share the same discoveries.ndjson file: + +```jsonl +{"ts":"2026-03-08T10:00:00Z","worker":"PLAN-001","type":"solution_designed","data":{"issue_id":"ISS-20260308-120000","approach":"refactor","task_count":4,"estimated_files":6}} +{"ts":"2026-03-08T10:05:00Z","worker":"PLAN-002","type":"conflict_warning","data":{"issue_ids":["ISS-20260308-120000","ISS-20260308-120001"],"overlapping_files":["src/auth/handler.ts"]}} +{"ts":"2026-03-08T10:10:00Z","worker":"EXEC-001","type":"impl_result","data":{"issue_id":"ISS-20260308-120000","files_changed":3,"tests_pass":true,"commit":"abc123"}} +``` + +**Discovery Types**: + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `solution_designed` | `issue_id` | `{issue_id, approach, task_count, estimated_files}` | Planner: solution plan completed | +| `conflict_warning` | `issue_ids` | `{issue_ids, overlapping_files}` | Planner: file overlap detected between issues | +| `pattern_found` | `pattern+location` | `{pattern, location, description}` | Any: code pattern identified | +| `impl_result` | `issue_id` | `{issue_id, files_changed, tests_pass, commit}` | Executor: implementation outcome | +| `test_failure` | `issue_id` | `{issue_id, test_file, error_msg}` | Executor: test failure details | + +--- + +## Error Handling + +| Error | Resolution | +|-------|------------| +| Circular dependency | Detect in wave computation, abort with error message | +| CSV agent timeout | Mark as failed in results, continue with wave | +| CSV agent failed | Mark as failed, skip dependent EXEC tasks | +| Planner fails to create solution | Mark PLAN task failed, skip corresponding EXEC task | +| Executor fails implementation | Mark as failed, report in context.md | +| All agents in wave failed | Log error, offer retry or abort | +| CSV parse error | Validate CSV format before execution, show line number | +| discoveries.ndjson corrupt | Ignore malformed lines, continue with valid entries | +| No input provided | Ask user for input via AskUserQuestion | +| Issue creation fails (text/plan input) | Report error, suggest manual issue creation | +| Continue mode: no session found | List available sessions, prompt user to select | + +--- + +## Core Rules + +1. **Start Immediately**: First action is session initialization, then input parsing +2. **Wave Order is Sacred**: Never execute wave N before wave N-1 completes and results are merged +3. **CSV is Source of Truth**: Master tasks.csv holds all state +4. **CSV First**: Default to csv-wave for all tasks; interactive only for edge cases +5. **Context Propagation**: prev_context built from master CSV, not from memory +6. **Discovery Board is Append-Only**: Never clear, modify, or recreate discoveries.ndjson +7. **Skip on Failure**: If PLAN-N failed, skip EXEC-N automatically +8. **Cleanup Temp Files**: Remove wave-{N}.csv after results are merged +9. **Two-Wave Pipeline**: Wave 1 = Planning (PLAN-*), Wave 2 = Execution (EXEC-*) +10. **DO NOT STOP**: Continuous execution until all waves complete or all remaining tasks are skipped diff --git a/.codex/skills/team-planex-v2/instructions/agent-instruction.md b/.codex/skills/team-planex-v2/instructions/agent-instruction.md new file mode 100644 index 00000000..f4cf6de9 --- /dev/null +++ b/.codex/skills/team-planex-v2/instructions/agent-instruction.md @@ -0,0 +1,193 @@ +# Agent Instruction -- Team PlanEx + +CSV agent instruction template for `spawn_agents_on_csv`. Each agent receives this template with its row's column values substituted via `{column_name}` placeholders. + +--- + +## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS +1. Read shared discoveries: `.workflow/.csv-wave/{session_id}/discoveries.ndjson` (if exists, skip if not) +2. Read project context: `.workflow/project-tech.json` (if exists) +3. Read wisdom files: `.workflow/.csv-wave/{session_id}/wisdom/` (conventions, learnings) + +--- + +## Your Task + +**Task ID**: {id} +**Title**: {title} +**Description**: {description} +**Role**: {role} +**Issue IDs**: {issue_ids} +**Input Type**: {input_type} +**Raw Input**: {raw_input} +**Execution Method**: {execution_method} + +### Previous Tasks' Findings (Context) +{prev_context} + +--- + +## Execution Protocol + +### Role Router + +Determine your execution steps based on `{role}`: + +| Role | Execution Steps | +|------|----------------| +| planner | Step A: Solution Planning | +| executor | Step B: Implementation | + +--- + +### Step A: Solution Planning (planner role) + +1. Parse issue ID from `{issue_ids}` +2. Determine input source from `{input_type}`: + +| Input Type | Action | +|------------|--------| +| `issues` | Load issue details: `Bash("ccw issue status {issue_ids} --json")` | +| `text` | Create issue from text: `Bash("ccw issue create --title '' --context '{raw_input}'")` | +| `plan` | Read plan file: `Read("{raw_input}")`, parse into issue requirements | + +3. Generate solution via CLI: + ```bash + ccw cli -p "PURPOSE: Generate implementation solution for issue ; success = actionable task breakdown with file paths + TASK: * Load issue details * Analyze requirements * Design solution approach * Break down into implementation tasks * Identify files to modify/create + MODE: analysis + CONTEXT: @**/* | Memory: Session wisdom + EXPECTED: JSON solution with: title, description, tasks array (each with description, files_touched), estimated_complexity + CONSTRAINTS: Follow project patterns | Reference existing implementations + " --tool gemini --mode analysis --rule planning-breakdown-task-steps + ``` + +4. Parse CLI output to extract solution JSON + +5. Write solution artifact: + ```javascript + Write("/artifacts/solutions/.json", JSON.stringify({ + session_id: "", + issue_id: "", + solution: solutionFromCli, + planned_at: new Date().toISOString() + })) + ``` + +6. Check for file conflicts with other solutions in session: + - Read other solution files in `/artifacts/solutions/` + - Compare `files_touched` lists + - If overlapping files found, log warning to discoveries.ndjson + +7. Share discoveries to board: + ```bash + echo '{"ts":"","worker":"{id}","type":"solution_designed","data":{"issue_id":"","approach":"","task_count":,"estimated_files":}}' >> /discoveries.ndjson + ``` + +--- + +### Step B: Implementation (executor role) + +1. Parse issue ID from `{issue_ids}` + +2. Load solution artifact: + - Primary: Read file from prev_context artifact_path + - Fallback: `Read("/artifacts/solutions/.json")` + - Last resort: `Bash("ccw issue solutions --json")` + +3. Load wisdom files for conventions and patterns + +4. Determine execution backend from `{execution_method}`: + +| Method | CLI Command | +|--------|-------------| +| codex | `ccw cli --tool codex --mode write --id exec-` | +| gemini | `ccw cli --tool gemini --mode write --id exec-` | +| qwen | `ccw cli --tool qwen --mode write --id exec-` | + +5. Execute implementation via CLI: + ```bash + ccw cli -p "PURPOSE: Implement solution for issue ; success = all tasks completed, tests pass + TASK: + MODE: write + CONTEXT: @**/* | Memory: Solution plan, session wisdom + EXPECTED: Working implementation with code changes, test updates, no syntax errors + CONSTRAINTS: Follow existing patterns | Maintain backward compatibility + Issue: + Title: + Solution: " --tool --mode write --rule development-implement-feature + ``` + +6. Verify implementation: + +| Check | Method | Pass Criteria | +|-------|--------|---------------| +| Tests | Detect and run project test command | All pass | +| Syntax | IDE diagnostics or `tsc --noEmit` | No errors | + + If tests fail: retry implementation once, then report as failed. + +7. Commit changes: + ```bash + git add -A + git commit -m "feat(): " + ``` + +8. Update issue status: + ```bash + ccw issue update --status completed + ``` + +9. Share discoveries to board: + ```bash + echo '{"ts":"","worker":"{id}","type":"impl_result","data":{"issue_id":"","files_changed":,"tests_pass":,"commit":""}}' >> /discoveries.ndjson + ``` + +--- + +## Share Discoveries (ALL ROLES) + +After completing your work, append findings to the shared discovery board: + +```bash +echo '{"ts":"","worker":"{id}","type":"","data":{...}}' >> /discoveries.ndjson +``` + +**Discovery Types to Share**: + +| Type | Data Schema | When to Use | +|------|-------------|-------------| +| `solution_designed` | `{issue_id, approach, task_count, estimated_files}` | Planner: solution plan completed | +| `conflict_warning` | `{issue_ids, overlapping_files}` | Planner: file overlap between issues | +| `pattern_found` | `{pattern, location, description}` | Any: code pattern identified | +| `impl_result` | `{issue_id, files_changed, tests_pass, commit}` | Executor: implementation outcome | +| `test_failure` | `{issue_id, test_file, error_msg}` | Executor: test failure | + +--- + +## Output (report_agent_job_result) + +Return JSON: +```json +{ + "id": "{id}", + "status": "completed | failed", + "findings": "Key discoveries and implementation notes (max 500 chars)", + "artifact_path": "relative path to main artifact (e.g., artifacts/solutions/ISS-xxx.json or builds/ISS-xxx.json)", + "error": "" +} +``` + +--- + +## Quality Checklist + +Before reporting complete: +- [ ] Mandatory first steps completed (discoveries, project context, wisdom) +- [ ] Role-specific execution steps followed +- [ ] At least 1 discovery shared to board +- [ ] Artifact file written to session folder +- [ ] Findings include actionable details (file paths, task counts, etc.) +- [ ] prev_context findings were incorporated where available diff --git a/.codex/skills/team-planex-v2/schemas/tasks-schema.md b/.codex/skills/team-planex-v2/schemas/tasks-schema.md new file mode 100644 index 00000000..65e30258 --- /dev/null +++ b/.codex/skills/team-planex-v2/schemas/tasks-schema.md @@ -0,0 +1,206 @@ +# Team PlanEx -- CSV Schema + +## Master CSV: tasks.csv + +### Column Definitions + +#### Input Columns (Set by Decomposer) + +| Column | Type | Required | Description | Example | +|--------|------|----------|-------------|---------| +| `id` | string | Yes | Unique task identifier | `"PLAN-001"` | +| `title` | string | Yes | Short task title | `"Plan ISS-20260308-120000"` | +| `description` | string | Yes | Detailed task description (self-contained) | `"Generate implementation solution for issue ISS-20260308-120000"` | +| `role` | enum | Yes | Worker role: `planner` or `executor` | `"planner"` | +| `issue_ids` | string | Yes | Semicolon-separated issue IDs | `"ISS-20260308-120000"` | +| `input_type` | string | No | Input source type (planner only): `issues`, `text`, or `plan` | `"issues"` | +| `raw_input` | string | No | Raw input text (planner only) | `"ISS-20260308-120000"` | +| `exec_mode` | enum | Yes | Execution mechanism: `csv-wave` or `interactive` | `"csv-wave"` | +| `execution_method` | string | No | CLI tool for EXEC tasks: codex, gemini, qwen, or empty | `"gemini"` | +| `deps` | string | No | Semicolon-separated dependency task IDs | `"PLAN-001"` | +| `context_from` | string | No | Semicolon-separated task IDs for context | `"PLAN-001"` | + +#### Computed Columns (Set by Wave Engine) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `wave` | integer | Wave number (1-based, from topological sort) | `2` | +| `prev_context` | string | Aggregated findings from context_from tasks (per-wave CSV only) | `"[PLAN-001] Designed 4-task solution..."` | + +#### Output Columns (Set by Agent) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `status` | enum | `pending` -> `completed` / `failed` / `skipped` | `"completed"` | +| `findings` | string | Key discoveries (max 500 chars) | `"Solution designed with 4 implementation tasks..."` | +| `artifact_path` | string | Path to generated artifact file | `"artifacts/solutions/ISS-20260308-120000.json"` | +| `error` | string | Error message if failed | `""` | + +--- + +### exec_mode Values + +| Value | Mechanism | Description | +|-------|-----------|-------------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot batch execution within wave | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round individual execution (edge cases) | + +> In standard PlanEx, all tasks use `csv-wave`. Interactive mode is reserved for rare multi-round coordination scenarios. + +--- + +### Role Values + +| Role | Task Prefixes | Responsibility | +|------|---------------|----------------| +| `planner` | PLAN-* | Requirement decomposition, solution design, issue creation | +| `executor` | EXEC-* | Solution implementation, testing, verification, commit | + +--- + +### Example Data + +```csv +id,title,description,role,issue_ids,input_type,raw_input,exec_mode,execution_method,deps,context_from,wave,status,findings,artifact_path,error +"PLAN-001","Plan issue-1","Generate solution for ISS-20260308-120000","planner","ISS-20260308-120000","issues","ISS-20260308-120000","csv-wave","","","","1","pending","","","" +"PLAN-002","Plan issue-2","Generate solution for ISS-20260308-120001","planner","ISS-20260308-120001","issues","ISS-20260308-120001","csv-wave","","","","1","pending","","","" +"EXEC-001","Implement issue-1","Implement solution for ISS-20260308-120000","executor","ISS-20260308-120000","","","csv-wave","gemini","PLAN-001","PLAN-001","2","pending","","","" +"EXEC-002","Implement issue-2","Implement solution for ISS-20260308-120001","executor","ISS-20260308-120001","","","csv-wave","gemini","PLAN-002","PLAN-002","2","pending","","","" +``` + +--- + +### Column Lifecycle + +``` +Decomposer (Phase 1) Wave Engine (Phase 2) Agent (Execution) +--------------------- -------------------- ----------------- +id ----------> id ----------> id +title ----------> title ----------> (reads) +description ----------> description ----------> (reads) +role ----------> role ----------> (reads) +issue_ids ----------> issue_ids ----------> (reads) +input_type ----------> input_type ----------> (reads, planner) +raw_input ----------> raw_input ----------> (reads, planner) +exec_mode ----------> exec_mode ----------> (reads) +execution_method ------> execution_method -----> (reads, executor) +deps ----------> deps ----------> (reads) +context_from----------> context_from----------> (reads) + wave ----------> (reads) + prev_context ----------> (reads) + status + findings + artifact_path + error +``` + +--- + +## Output Schema (JSON) + +Agent output via `report_agent_job_result` (csv-wave tasks): + +```json +{ + "id": "PLAN-001", + "status": "completed", + "findings": "Designed solution for ISS-20260308-120000: 4 implementation tasks, 6 files affected. Approach: refactor authentication handler to support token refresh.", + "artifact_path": "artifacts/solutions/ISS-20260308-120000.json", + "error": "" +} +``` + +--- + +## Discovery Types + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `solution_designed` | `issue_id` | `{issue_id, approach, task_count, estimated_files}` | Planner: solution plan completed | +| `conflict_warning` | `issue_ids` | `{issue_ids, overlapping_files}` | Planner: file overlap between issues | +| `pattern_found` | `pattern+location` | `{pattern, location, description}` | Any: code pattern identified | +| `impl_result` | `issue_id` | `{issue_id, files_changed, tests_pass, commit}` | Executor: implementation outcome | +| `test_failure` | `issue_id` | `{issue_id, test_file, error_msg}` | Executor: test failure details | + +### Discovery NDJSON Format + +```jsonl +{"ts":"2026-03-08T10:00:00Z","worker":"PLAN-001","type":"solution_designed","data":{"issue_id":"ISS-20260308-120000","approach":"refactor","task_count":4,"estimated_files":6}} +{"ts":"2026-03-08T10:05:00Z","worker":"PLAN-002","type":"conflict_warning","data":{"issue_ids":["ISS-20260308-120000","ISS-20260308-120001"],"overlapping_files":["src/auth/handler.ts"]}} +{"ts":"2026-03-08T10:10:00Z","worker":"EXEC-001","type":"impl_result","data":{"issue_id":"ISS-20260308-120000","files_changed":3,"tests_pass":true,"commit":"abc123"}} +``` + +> All agents (planner and executor) read/write the same discoveries.ndjson file. + +--- + +## Cross-Wave Context Flow + +| Source | Target | Mechanism | +|--------|--------|-----------| +| PLAN-N findings | EXEC-N prev_context | Injected via prev_context column in wave-2.csv | +| PLAN-N artifact_path | EXEC-N | Executor reads solution file from artifact_path | +| Any agent discovery | Any agent | Shared via discoveries.ndjson | + +--- + +## Pipeline Structure + +### Standard Two-Wave Pipeline + +| Wave | Tasks | Role | Parallelism | +|------|-------|------|-------------| +| 1 | PLAN-001..N | planner | All concurrent (up to max_concurrency) | +| 2 | EXEC-001..N | executor | All concurrent (up to max_concurrency) | + +Each EXEC-NNN depends on its corresponding PLAN-NNN. If PLAN-NNN fails, EXEC-NNN is automatically skipped. + +--- + +## Solution Artifact Schema + +Written by planner agents to `artifacts/solutions/{issueId}.json`: + +```json +{ + "session_id": "planex-xxx-20260308", + "issue_id": "ISS-20260308-120000", + "solution": { + "title": "Add rate limiting middleware", + "approach": "Create express middleware with sliding window", + "tasks": [ + { + "order": 1, + "description": "Create rate limiter middleware in src/middleware/rate-limit.ts", + "files_touched": ["src/middleware/rate-limit.ts"] + }, + { + "order": 2, + "description": "Add per-route configuration in src/config/routes.ts", + "files_touched": ["src/config/routes.ts"] + } + ], + "estimated_complexity": "Medium", + "estimated_files": 4 + }, + "planned_at": "2026-03-08T10:00:00Z" +} +``` + +--- + +## Validation Rules + +| Rule | Check | Error | +|------|-------|-------| +| Unique IDs | No duplicate `id` values | "Duplicate task ID: {id}" | +| Valid deps | All dep IDs exist in tasks | "Unknown dependency: {dep_id}" | +| No self-deps | Task cannot depend on itself | "Self-dependency: {id}" | +| No circular deps | Topological sort completes | "Circular dependency detected involving: {ids}" | +| context_from valid | All context IDs exist and in earlier waves | "Invalid context_from: {id}" | +| exec_mode valid | Value is `csv-wave` or `interactive` | "Invalid exec_mode: {value}" | +| Role valid | Value in {planner, executor} | "Invalid role: {role}" | +| Description non-empty | Every task has description | "Empty description for task: {id}" | +| Status enum | status in {pending, completed, failed, skipped} | "Invalid status" | +| EXEC deps on PLAN | Every EXEC-N must depend on PLAN-N | "EXEC task without PLAN dependency: {id}" | +| Issue IDs non-empty | Every task has at least one issue_id | "No issue_ids for task: {id}" | diff --git a/.codex/skills/team-planex/instructions/agent-instruction.md b/.codex/skills/team-planex/instructions/agent-instruction.md new file mode 100644 index 00000000..45b7143d --- /dev/null +++ b/.codex/skills/team-planex/instructions/agent-instruction.md @@ -0,0 +1,301 @@ +# Team PlanEx — Agent Instruction + +This instruction is loaded by team-worker agents when spawned with `role: planner` or `role: executor`. + +--- + +## Role-Based Execution + +### Planner Role + +**Responsibility**: Explore codebase, generate implementation solution for issue. + +**Input**: +- `issue_ids`: Array of issue IDs to plan (from spawn message or send_input) +- `session`: Session directory path +- `session_id`: Session identifier + +**Execution Protocol**: + +1. **Read issue details**: + ```bash + ccw issue status {issue_id} --json + ``` + +2. **Explore codebase** (use CLI analysis tools): + ```bash + ccw cli -p "PURPOSE: Explore codebase for {issue_title} + TASK: • Identify relevant files • Find existing patterns • Locate integration points + CONTEXT: @**/* | Memory: Issue {issue_id} + EXPECTED: Exploration findings with file paths and patterns + CONSTRAINTS: Read-only analysis" --tool gemini --mode analysis --rule analysis-trace-code-execution + ``` + +3. **Generate solution**: + - Break down into 2-7 implementation tasks + - Define task dependencies (topological order) + - Specify files to modify per task + - Define convergence criteria per task + - Assess complexity: Low (1-2 files), Medium (3-5 files), High (6+ files) + +4. **Write solution file**: + ```javascript + Write(`{session}/artifacts/solutions/{issue_id}.json`, JSON.stringify({ + issue_id: "{issue_id}", + title: "{issue_title}", + approach: "Strategy pattern with...", + complexity: "Medium", + tasks: [ + { + task_id: "EXEC-001", + title: "Create interface", + description: "Define provider interface...", + files: ["src/auth/providers/oauth-provider.ts"], + depends_on: [], + convergence_criteria: ["Interface compiles", "Types exported"] + } + ], + exploration_findings: { + existing_patterns: ["Strategy pattern in payment module"], + tech_stack: ["TypeScript", "Express"], + integration_points: ["User service"] + } + }, null, 2)) + ``` + +5. **Write ready marker**: + ```javascript + Write(`{session}/artifacts/solutions/{issue_id}.ready`, JSON.stringify({ + issue_id: "{issue_id}", + task_count: tasks.length, + file_count: uniqueFiles.length + })) + ``` + +6. **Report to coordinator** (via team_msg): + ```javascript + mcp__ccw-tools__team_msg({ + operation: "log", + session_id: "{session_id}", + from: "planner", + to: "coordinator", + type: "plan_ready", + summary: "Planning complete for {issue_id}", + data: { + issue_id: "{issue_id}", + solution_path: "artifacts/solutions/{issue_id}.json", + task_count: tasks.length + } + }) + ``` + +7. **Wait for next issue** (multi-issue mode): + - After completing one issue, output results and wait + - Coordinator will send next issue via send_input + - Repeat steps 1-6 for each issue + +**Success Criteria**: +- Solution file written with valid JSON +- Ready marker created +- Message sent to coordinator +- All tasks have valid dependencies (no cycles) + +--- + +### Executor Role + +**Responsibility**: Execute implementation tasks from planner solution. + +**Input**: +- `issue_id`: Issue to implement +- `session`: Session directory path +- `session_id`: Session identifier +- `execution_method`: `codex` or `gemini` (from coordinator) +- `inner_loop`: `true` (executor uses inner loop for self-repair) + +**Execution Protocol**: + +1. **Read solution file**: + ```javascript + const solution = JSON.parse(Read(`{session}/artifacts/solutions/{issue_id}.json`)) + ``` + +2. **For each task in solution.tasks** (ordered by depends_on): + + a. **Report start**: + ```javascript + mcp__ccw-tools__team_msg({ + operation: "log", + session_id: "{session_id}", + from: "executor", + to: "coordinator", + type: "impl_start", + summary: "Starting {task_id}", + data: { task_id: "{task_id}", issue_id: "{issue_id}" } + }) + ``` + + b. **Read context files**: + ```javascript + for (const file of task.files) { + Read(file) // Load existing code + } + ``` + + c. **Identify patterns**: + - Note imports, naming conventions, existing structure + - Follow project patterns from exploration_findings + + d. **Apply changes**: + - Use Edit for existing files (prefer) + - Use Write for new files + - Follow convergence criteria from task + + e. **Build check** (if build command exists): + ```bash + npm run build 2>&1 || echo BUILD_FAILED + ``` + - If build fails: analyze error → fix → rebuild (max 3 retries) + + f. **Verify convergence**: + - Check each criterion in task.convergence_criteria + - If not met: self-repair loop (max 3 iterations) + + g. **Report progress**: + ```javascript + mcp__ccw-tools__team_msg({ + operation: "log", + session_id: "{session_id}", + from: "executor", + to: "coordinator", + type: "impl_progress", + summary: "Completed {task_id}", + data: { task_id: "{task_id}", progress_pct: (taskIndex / totalTasks) * 100 } + }) + ``` + +3. **Run tests** (after all tasks complete): + ```bash + npm test 2>&1 + ``` + - If tests fail: self-repair loop (max 3 retries) + - Target: 95% pass rate + +4. **Git commit**: + ```bash + git add -A + git commit -m "feat({issue_id}): {solution.title}" + ``` + +5. **Report completion**: + ```javascript + mcp__ccw-tools__team_msg({ + operation: "log", + session_id: "{session_id}", + from: "executor", + to: "coordinator", + type: "impl_complete", + summary: "Completed {issue_id}", + data: { + task_id: "{task_id}", + issue_id: "{issue_id}", + files_modified: modifiedFiles, + commit_hash: commitHash + } + }) + ``` + +6. **Update issue status**: + ```bash + ccw issue update {issue_id} --status completed + ``` + +**Success Criteria**: +- All tasks completed in dependency order +- Build passes (if build command exists) +- Tests pass (95% target) +- Git commit created +- Issue status updated to completed + +--- + +## Inner Loop Protocol + +Both roles support inner loop for self-repair: + +| Scenario | Max Iterations | Action | +|----------|---------------|--------| +| Build failure | 3 | Analyze error → fix source → rebuild | +| Test failure | 3 | Analyze failure → fix source → re-run tests | +| Convergence not met | 3 | Check criteria → adjust implementation → re-verify | + +After 3 failed iterations: report error to coordinator, mark task as failed. + +--- + +## CLI Tool Usage + +### Analysis (Planner) + +```bash +ccw cli -p "PURPOSE: {goal} +TASK: • {step1} • {step2} +CONTEXT: @**/* | Memory: {context} +EXPECTED: {deliverable} +CONSTRAINTS: Read-only" --tool gemini --mode analysis --rule {template} +``` + +### Implementation (Executor, optional) + +```bash +ccw cli -p "PURPOSE: {goal} +TASK: • {step1} • {step2} +CONTEXT: @{files} | Memory: {context} +EXPECTED: {deliverable} +CONSTRAINTS: {constraints}" --tool {execution_method} --mode write --rule development-implement-feature +``` + +Use CLI tools when: +- Planner: Always use for codebase exploration +- Executor: Use for complex tasks (High complexity), direct implementation for Low/Medium + +--- + +## Error Handling + +| Error | Resolution | +|-------|------------| +| Solution file not found | Report error to coordinator, skip issue | +| Solution JSON corrupt | Report error, skip issue | +| Build fails after 3 retries | Mark task failed, report to coordinator | +| Tests fail after 3 retries | Mark task failed, report to coordinator | +| Git commit fails | Warn, mark completed anyway | +| CLI tool timeout | Fallback to direct implementation | +| Dependency task failed | Skip dependent tasks, report to coordinator | + +--- + +## Wisdom Directory + +Record learnings in `{session}/wisdom/`: + +| File | Content | +|------|---------| +| `learnings.md` | Patterns discovered, gotchas, best practices | +| `decisions.md` | Architecture decisions, trade-offs | +| `conventions.md` | Code style, naming conventions | +| `issues.md` | Issue-specific notes, blockers resolved | + +Append to these files during execution to share knowledge across issues. + +--- + +## Output Format + +No structured output required. Workers communicate via: +- Solution files (planner) +- Message bus (both roles) +- Git commits (executor) +- Wisdom files (both roles) + +Coordinator monitors message bus and meta.json for state tracking. diff --git a/.codex/skills/team-planex/schemas/tasks-schema.md b/.codex/skills/team-planex/schemas/tasks-schema.md new file mode 100644 index 00000000..a37c3bc0 --- /dev/null +++ b/.codex/skills/team-planex/schemas/tasks-schema.md @@ -0,0 +1,198 @@ +# Team PlanEx — Tasks Schema + +## Task Metadata Registry + +Team PlanEx uses a **message bus + state file** architecture instead of CSV. Tasks are tracked in `.msg/meta.json` with state updates via `team_msg`. + +### Task State Fields + +| Field | Type | Description | Example | +|-------|------|-------------|---------| +| `task_id` | string | Unique task identifier | `"PLAN-001"` | +| `issue_id` | string | Source issue identifier | `"ISS-20260308-001"` | +| `title` | string | Task title from solution | `"Implement OAuth2 provider"` | +| `role` | enum | Worker role: `planner`, `executor` | `"executor"` | +| `status` | enum | Task status: `pending`, `in_progress`, `completed`, `failed` | `"completed"` | +| `assigned_to` | string | Worker agent name | `"executor"` | +| `depends_on` | array | Dependency task IDs | `["PLAN-001"]` | +| `files` | array | Files to modify | `["src/auth/oauth.ts"]` | +| `convergence_criteria` | array | Success criteria | `["Tests pass", "No lint errors"]` | +| `started_at` | string | ISO timestamp | `"2026-03-08T10:00:00+08:00"` | +| `completed_at` | string | ISO timestamp | `"2026-03-08T10:15:00+08:00"` | +| `error` | string | Error message if failed | `""` | + +--- + +## Message Bus Schema + +### Message Types + +| Type | From | To | Data Schema | Description | +|------|------|----|-----------|----| +| `plan_ready` | planner | coordinator | `{issue_id, solution_path, task_count}` | Planning complete | +| `impl_start` | executor | coordinator | `{task_id, issue_id}` | Implementation started | +| `impl_complete` | executor | coordinator | `{task_id, issue_id, files_modified[], commit_hash}` | Implementation complete | +| `impl_progress` | executor | coordinator | `{task_id, progress_pct, current_step}` | Progress update | +| `error` | any | coordinator | `{task_id, error_type, message}` | Error occurred | +| `state_update` | any | coordinator | `{role, state: {}}` | Role state update (auto-synced to meta.json) | + +### Message Format (NDJSON) + +```jsonl +{"id":"MSG-001","ts":"2026-03-08T10:00:00+08:00","from":"planner","to":"coordinator","type":"plan_ready","summary":"Planning complete for ISS-001","data":{"issue_id":"ISS-20260308-001","solution_path":"artifacts/solutions/ISS-20260308-001.json","task_count":3}} +{"id":"MSG-002","ts":"2026-03-08T10:05:00+08:00","from":"executor","to":"coordinator","type":"impl_start","summary":"Starting EXEC-001","data":{"task_id":"EXEC-001","issue_id":"ISS-20260308-001"}} +{"id":"MSG-003","ts":"2026-03-08T10:15:00+08:00","from":"executor","to":"coordinator","type":"impl_complete","summary":"Completed EXEC-001","data":{"task_id":"EXEC-001","issue_id":"ISS-20260308-001","files_modified":["src/auth/oauth.ts"],"commit_hash":"abc123"}} +``` + +--- + +## State File Schema (meta.json) + +### Structure + +```json +{ + "session_id": "PEX-auth-system-20260308", + "pipeline_mode": "plan-execute", + "execution_method": "codex", + "status": "running", + "started_at": "2026-03-08T10:00:00+08:00", + "issues": { + "ISS-20260308-001": { + "status": "completed", + "solution_path": "artifacts/solutions/ISS-20260308-001.json", + "tasks": ["EXEC-001", "EXEC-002"], + "completed_at": "2026-03-08T10:30:00+08:00" + } + }, + "tasks": { + "EXEC-001": { + "task_id": "EXEC-001", + "issue_id": "ISS-20260308-001", + "title": "Implement OAuth2 provider", + "role": "executor", + "status": "completed", + "assigned_to": "executor", + "depends_on": [], + "files": ["src/auth/oauth.ts"], + "convergence_criteria": ["Tests pass", "No lint errors"], + "started_at": "2026-03-08T10:05:00+08:00", + "completed_at": "2026-03-08T10:15:00+08:00", + "error": "" + } + }, + "roles": { + "coordinator": { + "status": "active", + "current_phase": "execution", + "last_update": "2026-03-08T10:15:00+08:00" + }, + "planner": { + "status": "idle", + "issues_planned": 5, + "last_update": "2026-03-08T10:10:00+08:00" + }, + "executor": { + "status": "active", + "current_task": "EXEC-002", + "tasks_completed": 1, + "last_update": "2026-03-08T10:15:00+08:00" + } + } +} +``` + +--- + +## Solution File Schema + +Planner generates solution files in `artifacts/solutions/.json`: + +```json +{ + "issue_id": "ISS-20260308-001", + "title": "Implement OAuth2 authentication", + "approach": "Strategy pattern with provider abstraction", + "complexity": "Medium", + "tasks": [ + { + "task_id": "EXEC-001", + "title": "Create OAuth2 provider interface", + "description": "Define provider interface with authorize/token/refresh methods", + "files": ["src/auth/providers/oauth-provider.ts"], + "depends_on": [], + "convergence_criteria": [ + "Interface compiles without errors", + "Type definitions exported" + ] + }, + { + "task_id": "EXEC-002", + "title": "Implement Google OAuth2 provider", + "description": "Concrete implementation for Google OAuth2", + "files": ["src/auth/providers/google-oauth.ts"], + "depends_on": ["EXEC-001"], + "convergence_criteria": [ + "Tests pass", + "Handles token refresh", + "Error handling complete" + ] + } + ], + "exploration_findings": { + "existing_patterns": ["Strategy pattern in payment module"], + "tech_stack": ["TypeScript", "Express", "Passport.js"], + "integration_points": ["User service", "Session store"] + } +} +``` + +--- + +## Execution Method Selection + +Coordinator selects execution method based on issue complexity: + +| Complexity | Method | Criteria | +|------------|--------|----------| +| Low | `gemini` | 1-2 files, simple logic, no architecture changes | +| Medium | `codex` | 3-5 files, moderate complexity, existing patterns | +| High | `codex` | 6+ files, complex logic, architecture changes | + +Stored in `meta.json` → `execution_method` field. + +--- + +## Validation Rules + +| Rule | Check | Error | +|------|-------|-------| +| Unique task IDs | No duplicate `task_id` in meta.json | "Duplicate task ID: {task_id}" | +| Valid deps | All `depends_on` task IDs exist | "Unknown dependency: {dep_id}" | +| No self-deps | Task cannot depend on itself | "Self-dependency: {task_id}" | +| No circular deps | Dependency graph is acyclic | "Circular dependency detected" | +| Valid status | status ∈ {pending, in_progress, completed, failed} | "Invalid status: {status}" | +| Valid role | role ∈ {planner, executor} | "Invalid role: {role}" | +| Issue exists | issue_id exists in issues registry | "Unknown issue: {issue_id}" | +| Solution file exists | solution_path points to valid file | "Solution file not found: {path}" | + +--- + +## Cross-Role Context Flow + +| Source | Target | Mechanism | +|--------|--------|-----------| +| Planner solution | Executor | Read solution JSON from artifacts/solutions/ | +| Executor progress | Coordinator | Message bus (impl_progress, impl_complete) | +| Coordinator state | All workers | Read meta.json state field | +| Any role state update | meta.json | Auto-sync via team_msg type="state_update" | + +--- + +## Discovery Types + +Team PlanEx does not use discoveries.ndjson. All context is stored in: +- Solution files (planner output) +- Message bus (real-time communication) +- meta.json (persistent state) +- wisdom/ directory (cross-task knowledge) diff --git a/.codex/skills/team-quality-assurance/SKILL.md b/.codex/skills/team-quality-assurance/SKILL.md new file mode 100644 index 00000000..9be579d4 --- /dev/null +++ b/.codex/skills/team-quality-assurance/SKILL.md @@ -0,0 +1,776 @@ +--- +name: team-quality-assurance +description: Full closed-loop QA combining issue discovery and software testing. Scout -> Strategist -> Generator -> Executor -> Analyst with multi-perspective scanning, progressive test layers, GC loops, and quality scoring. Supports discovery, testing, and full QA modes. +argument-hint: "[-y|--yes] [-c|--concurrency N] [--continue] [--mode=discovery|testing|full] \"task description\"" +allowed-tools: spawn_agents_on_csv, spawn_agent, wait, send_input, close_agent, Read, Write, Edit, Bash, Glob, Grep, AskUserQuestion +--- + +## Auto Mode + +When `--yes` or `-y`: Auto-confirm task decomposition, skip interactive validation, use defaults. + +# Team Quality Assurance + +## Usage + +```bash +$team-quality-assurance "Full QA for the authentication module" +$team-quality-assurance --mode=discovery "Scan codebase for security and bug issues" +$team-quality-assurance --mode=testing "Test recent changes with progressive coverage" +$team-quality-assurance -c 4 --mode=full "Complete QA cycle with regression scanning" +$team-quality-assurance -y "QA all changed files since last commit" +$team-quality-assurance --continue "qa-auth-module-20260308" +``` + +**Flags**: +- `-y, --yes`: Skip all confirmations (auto mode) +- `-c, --concurrency N`: Max concurrent agents within each wave (default: 3) +- `--continue`: Resume existing session +- `--mode=discovery|testing|full`: Force QA mode (default: auto-detect or full) + +**Output Directory**: `.workflow/.csv-wave/{session-id}/` +**Core Output**: `tasks.csv` (master state) + `results.csv` (final) + `discoveries.ndjson` (shared exploration) + `context.md` (human-readable report) + +--- + +## Overview + +Orchestrate multi-agent QA pipeline: scout -> strategist -> generator -> executor -> analyst. Supports three modes: **discovery** (issue scanning), **testing** (progressive test coverage), and **full** (closed-loop QA with regression). Multi-perspective scanning from bug, security, test-coverage, code-quality, and UX viewpoints. Progressive layer coverage (L1/L2/L3) with Generator-Critic loops for coverage convergence. + +**Execution Model**: Hybrid -- CSV wave pipeline (primary) + individual agent spawn (secondary) + +``` ++-------------------------------------------------------------------+ +| TEAM QUALITY ASSURANCE WORKFLOW | ++-------------------------------------------------------------------+ +| | +| Phase 0: Pre-Wave Interactive (Requirement Clarification) | +| +- Parse task description, detect QA mode | +| +- Mode selection (discovery/testing/full) | +| +- Output: refined requirements for decomposition | +| | +| Phase 1: Requirement -> CSV + Classification | +| +- Select pipeline based on QA mode | +| +- Build dependency chain with appropriate roles | +| +- Classify tasks: csv-wave | interactive (exec_mode) | +| +- Compute dependency waves (topological sort) | +| +- Generate tasks.csv with wave + exec_mode columns | +| +- User validates task breakdown (skip if -y) | +| | +| Phase 2: Wave Execution Engine (Extended) | +| +- For each wave (1..N): | +| | +- Execute pre-wave interactive tasks (if any) | +| | +- Build wave CSV (filter csv-wave tasks for this wave) | +| | +- Inject previous findings into prev_context column | +| | +- spawn_agents_on_csv(wave CSV) | +| | +- Execute post-wave interactive tasks (if any) | +| | +- Merge all results into master tasks.csv | +| | +- GC Loop Check: coverage < target? -> spawn fix tasks | +| | +- Check: any failed? -> skip dependents | +| +- discoveries.ndjson shared across all modes (append-only) | +| | +| Phase 3: Post-Wave Interactive (Completion Action) | +| +- Pipeline completion report with quality score | +| +- Interactive completion choice (Archive/Keep/Export) | +| +- Final aggregation / report | +| | +| Phase 4: Results Aggregation | +| +- Export final results.csv | +| +- Generate context.md with all findings | +| +- Display summary: completed/failed/skipped per wave | +| +- Offer: view results | retry failed | done | +| | ++-------------------------------------------------------------------+ +``` + +--- + +## Task Classification Rules + +Each task is classified by `exec_mode`: + +| exec_mode | Mechanism | Criteria | +|-----------|-----------|----------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot, structured I/O, no multi-round interaction | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round, needs iterative fix-verify cycles | + +**Classification Decision**: + +| Task Property | Classification | +|---------------|---------------| +| Multi-perspective code scanning (scout) | `csv-wave` | +| Strategy formulation (single-pass analysis) | `csv-wave` | +| Test generation (single-pass code creation) | `csv-wave` | +| Test execution with auto-fix cycle | `interactive` | +| Quality analysis (single-pass report) | `csv-wave` | +| GC loop fix-verify iteration | `interactive` | +| Regression scanning (post-fix) | `csv-wave` | + +--- + +## CSV Schema + +### tasks.csv (Master State) + +```csv +id,title,description,role,perspective,layer,coverage_target,deps,context_from,exec_mode,wave,status,findings,issues_found,pass_rate,coverage_achieved,test_files,quality_score,error +"SCOUT-001","Multi-perspective code scan","Scan codebase from bug, security, test-coverage, code-quality perspectives. Produce severity-ranked findings with file:line references.","scout","bug;security;test-coverage;code-quality","","","","","csv-wave","1","pending","","","","","","","" +"QASTRAT-001","Test strategy formulation","Analyze scout findings and code changes. Determine test layers, define coverage targets, generate test strategy document.","strategist","","","","SCOUT-001","SCOUT-001","csv-wave","2","pending","","","","","","","" +"QAGEN-L1-001","Generate L1 unit tests","Generate L1 unit tests based on strategy. Cover priority files, include happy path, edge cases, error handling.","generator","","L1","80","QASTRAT-001","QASTRAT-001","csv-wave","3","pending","","","","","","","" +``` + +**Columns**: + +| Column | Phase | Description | +|--------|-------|-------------| +| `id` | Input | Unique task identifier (PREFIX-NNN format) | +| `title` | Input | Short task title | +| `description` | Input | Detailed task description (self-contained) | +| `role` | Input | Worker role: `scout`, `strategist`, `generator`, `executor`, `analyst` | +| `perspective` | Input | Scan perspectives (semicolon-separated, scout only) | +| `layer` | Input | Test layer: `L1`, `L2`, `L3`, or empty for non-layer tasks | +| `coverage_target` | Input | Target coverage percentage for this layer (empty if N/A) | +| `deps` | Input | Semicolon-separated dependency task IDs | +| `context_from` | Input | Semicolon-separated task IDs whose findings this task needs | +| `exec_mode` | Input | `csv-wave` or `interactive` | +| `wave` | Computed | Wave number (computed by topological sort, 1-based) | +| `status` | Output | `pending` -> `completed` / `failed` / `skipped` | +| `findings` | Output | Key discoveries or implementation notes (max 500 chars) | +| `issues_found` | Output | Count of issues discovered (scout/analyst) | +| `pass_rate` | Output | Test pass rate as decimal (executor only) | +| `coverage_achieved` | Output | Actual coverage percentage achieved (executor only) | +| `test_files` | Output | Semicolon-separated paths of test files (generator only) | +| `quality_score` | Output | Quality score 0-100 (analyst only) | +| `error` | Output | Error message if failed (empty if success) | + +### Per-Wave CSV (Temporary) + +Each wave generates a temporary `wave-{N}.csv` with extra `prev_context` column (csv-wave tasks only). + +--- + +## Agent Registry (Interactive Agents) + +| Agent | Role File | Pattern | Responsibility | Position | +|-------|-----------|---------|----------------|----------| +| Test Executor | agents/executor.md | 2.3 (send_input cycle) | Execute tests with iterative fix cycle, report pass rate and coverage | per-wave | +| GC Loop Handler | agents/gc-loop-handler.md | 2.3 (send_input cycle) | Manage Generator-Critic loop: evaluate coverage, trigger fix rounds | post-wave | + +> **COMPACT PROTECTION**: Agent files are execution documents. When context compression occurs, **you MUST immediately `Read` the corresponding agent.md** to reload. + +--- + +## Output Artifacts + +| File | Purpose | Lifecycle | +|------|---------|-----------| +| `tasks.csv` | Master state -- all tasks with status/findings | Updated after each wave | +| `wave-{N}.csv` | Per-wave input (temporary, csv-wave tasks only) | Created before wave, deleted after | +| `results.csv` | Final export of all task results | Created in Phase 4 | +| `discoveries.ndjson` | Shared exploration board (all agents, both modes) | Append-only, carries across waves | +| `context.md` | Human-readable execution report | Created in Phase 4 | +| `scan/scan-results.json` | Scout output: multi-perspective scan results | Created in scout wave | +| `strategy/test-strategy.md` | Strategist output: test strategy document | Created in strategy wave | +| `tests/L1-unit/` | Generator output: L1 unit test files | Created in L1 wave | +| `tests/L2-integration/` | Generator output: L2 integration test files | Created in L2 wave | +| `tests/L3-e2e/` | Generator output: L3 E2E test files | Created in L3 wave | +| `results/run-{layer}.json` | Executor output: per-layer test results | Created per execution | +| `analysis/quality-report.md` | Analyst output: quality analysis report | Created in final wave | +| `interactive/{id}-result.json` | Results from interactive tasks | Created per interactive task | + +--- + +## Session Structure + +``` +.workflow/.csv-wave/{session-id}/ ++-- tasks.csv # Master state (all tasks, both modes) ++-- results.csv # Final results export ++-- discoveries.ndjson # Shared discovery board (all agents) ++-- context.md # Human-readable report ++-- wave-{N}.csv # Temporary per-wave input (csv-wave only) ++-- scan/ # Scout output +| +-- scan-results.json ++-- strategy/ # Strategist output +| +-- test-strategy.md ++-- tests/ # Generator output +| +-- L1-unit/ +| +-- L2-integration/ +| +-- L3-e2e/ ++-- results/ # Executor output +| +-- run-L1.json +| +-- run-L2.json ++-- analysis/ # Analyst output +| +-- quality-report.md ++-- wisdom/ # Cross-task knowledge +| +-- learnings.md +| +-- conventions.md +| +-- decisions.md +| +-- issues.md ++-- interactive/ # Interactive task artifacts +| +-- {id}-result.json ++-- gc-state.json # GC loop tracking state +``` + +--- + +## Implementation + +### Session Initialization + +```javascript +const getUtc8ISOString = () => new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString() + +const AUTO_YES = $ARGUMENTS.includes('--yes') || $ARGUMENTS.includes('-y') +const continueMode = $ARGUMENTS.includes('--continue') +const concurrencyMatch = $ARGUMENTS.match(/(?:--concurrency|-c)\s+(\d+)/) +const maxConcurrency = concurrencyMatch ? parseInt(concurrencyMatch[1]) : 3 + +// Parse QA mode flag +const modeMatch = $ARGUMENTS.match(/--mode=(\w+)/) +const explicitMode = modeMatch ? modeMatch[1] : null + +const requirement = $ARGUMENTS + .replace(/--yes|-y|--continue|--concurrency\s+\d+|-c\s+\d+|--mode=\w+/g, '') + .trim() + +const slug = requirement.toLowerCase() + .replace(/[^a-z0-9\u4e00-\u9fa5]+/g, '-') + .substring(0, 40) +const dateStr = getUtc8ISOString().substring(0, 10).replace(/-/g, '') +const sessionId = `qa-${slug}-${dateStr}` +const sessionFolder = `.workflow/.csv-wave/${sessionId}` + +Bash(`mkdir -p ${sessionFolder}/scan ${sessionFolder}/strategy ${sessionFolder}/tests/L1-unit ${sessionFolder}/tests/L2-integration ${sessionFolder}/tests/L3-e2e ${sessionFolder}/results ${sessionFolder}/analysis ${sessionFolder}/wisdom ${sessionFolder}/interactive`) + +// Initialize discoveries.ndjson +Write(`${sessionFolder}/discoveries.ndjson`, '') + +// Initialize wisdom files +Write(`${sessionFolder}/wisdom/learnings.md`, '# Learnings\n') +Write(`${sessionFolder}/wisdom/conventions.md`, '# Conventions\n') +Write(`${sessionFolder}/wisdom/decisions.md`, '# Decisions\n') +Write(`${sessionFolder}/wisdom/issues.md`, '# Issues\n') + +// Initialize GC state +Write(`${sessionFolder}/gc-state.json`, JSON.stringify({ + rounds: {}, coverage_history: [], max_rounds_per_layer: 3 +}, null, 2)) +``` + +--- + +### Phase 0: Pre-Wave Interactive (Requirement Clarification) + +**Objective**: Parse task description, detect QA mode, prepare for decomposition. + +**Workflow**: + +1. **Parse user task description** from $ARGUMENTS + +2. **Check for existing sessions** (continue mode): + - Scan `.workflow/.csv-wave/qa-*/tasks.csv` for sessions with pending tasks + - If `--continue`: resume the specified or most recent session, skip to Phase 2 + - If active session found: ask user whether to resume or start new + +3. **QA Mode Selection**: + + | Condition | Mode | Description | + |-----------|------|-------------| + | Explicit `--mode=discovery` | discovery | Scout-first: issue discovery then testing | + | Explicit `--mode=testing` | testing | Skip scout, direct test pipeline | + | Explicit `--mode=full` | full | Complete QA closed loop + regression scan | + | Keywords: discovery, scan, issue, audit | discovery | Auto-detected discovery mode | + | Keywords: test, coverage, TDD, verify | testing | Auto-detected testing mode | + | No explicit flag and no keyword match | full | Default to full QA | + +4. **Clarify if ambiguous** (skip if AUTO_YES): + ```javascript + AskUserQuestion({ + questions: [{ + question: "Detected QA mode: '" + qaMode + "'. Confirm?", + header: "QA Mode Selection", + multiSelect: false, + options: [ + { label: "Proceed with " + qaMode, description: "Detected mode is appropriate" }, + { label: "Use discovery", description: "Scout-first: scan for issues, then test" }, + { label: "Use testing", description: "Direct testing pipeline (skip scout)" }, + { label: "Use full", description: "Complete QA closed loop with regression" } + ] + }] + }) + ``` + +5. **Output**: Refined requirement, QA mode, scope + +**Success Criteria**: +- QA mode selected +- Refined requirements available for Phase 1 decomposition + +--- + +### Phase 1: Requirement -> CSV + Classification + +**Objective**: Decompose QA task into dependency-ordered CSV tasks based on selected mode. + +**Decomposition Rules**: + +1. **Select pipeline based on QA mode**: + + | Mode | Pipeline | + |------|----------| + | discovery | SCOUT-001 -> QASTRAT-001 -> QAGEN-001 -> QARUN-001 -> QAANA-001 | + | testing | QASTRAT-001 -> QAGEN-L1-001 -> QARUN-L1-001 -> QAGEN-L2-001 -> QARUN-L2-001 -> QAANA-001 | + | full | SCOUT-001 -> QASTRAT-001 -> [QAGEN-L1-001, QAGEN-L2-001] -> [QARUN-L1-001, QARUN-L2-001] -> QAANA-001 -> SCOUT-002 | + +2. **Assign roles, layers, perspectives, and coverage targets** per task + +3. **Assign exec_mode**: + - Scout, Strategist, Generator, Analyst tasks: `csv-wave` (single-pass) + - Executor tasks: `interactive` (iterative fix cycle) + +**Classification Rules**: + +| Task Property | exec_mode | +|---------------|-----------| +| Multi-perspective scanning (single-pass) | `csv-wave` | +| Strategy analysis (single-pass read + write) | `csv-wave` | +| Test code generation (single-pass write) | `csv-wave` | +| Test execution with fix loop (multi-round) | `interactive` | +| Quality analysis (single-pass read + write) | `csv-wave` | +| Regression scanning (single-pass) | `csv-wave` | + +**Wave Computation**: Kahn's BFS topological sort with depth tracking. + +**User Validation**: Display task breakdown with wave + exec_mode + role assignment (skip if AUTO_YES). + +**Success Criteria**: +- tasks.csv created with valid schema, wave, and exec_mode assignments +- No circular dependencies +- User approved (or AUTO_YES) + +--- + +### Phase 2: Wave Execution Engine (Extended) + +**Objective**: Execute tasks wave-by-wave with hybrid mechanism support, GC loop handling, and cross-wave context propagation. + +```javascript +const masterCsv = Read(`${sessionFolder}/tasks.csv`) +let tasks = parseCsv(masterCsv) +const maxWave = Math.max(...tasks.map(t => t.wave)) + +for (let wave = 1; wave <= maxWave; wave++) { + console.log(`\nWave ${wave}/${maxWave}`) + + // 1. Separate tasks by exec_mode + const waveTasks = tasks.filter(t => t.wave === wave && t.status === 'pending') + const csvTasks = waveTasks.filter(t => t.exec_mode === 'csv-wave') + const interactiveTasks = waveTasks.filter(t => t.exec_mode === 'interactive') + + // 2. Check dependencies -- skip tasks whose deps failed + for (const task of waveTasks) { + const depIds = (task.deps || '').split(';').filter(Boolean) + const depStatuses = depIds.map(id => tasks.find(t => t.id === id)?.status) + if (depStatuses.some(s => s === 'failed' || s === 'skipped')) { + task.status = 'skipped' + task.error = `Dependency failed: ${depIds.filter((id, i) => + ['failed','skipped'].includes(depStatuses[i])).join(', ')}` + } + } + + // 3. Execute csv-wave tasks + const pendingCsvTasks = csvTasks.filter(t => t.status === 'pending') + if (pendingCsvTasks.length > 0) { + for (const task of pendingCsvTasks) { + task.prev_context = buildPrevContext(task, tasks) + } + + Write(`${sessionFolder}/wave-${wave}.csv`, toCsv(pendingCsvTasks)) + + // Read instruction template + Read(`instructions/agent-instruction.md`) + + // Build instruction with session folder baked in + const instruction = buildQAInstruction(sessionFolder, wave) + + spawn_agents_on_csv({ + csv_path: `${sessionFolder}/wave-${wave}.csv`, + id_column: "id", + instruction: instruction, + max_concurrency: maxConcurrency, + max_runtime_seconds: 900, + output_csv_path: `${sessionFolder}/wave-${wave}-results.csv`, + output_schema: { + type: "object", + properties: { + id: { type: "string" }, + status: { type: "string", enum: ["completed", "failed"] }, + findings: { type: "string" }, + issues_found: { type: "string" }, + pass_rate: { type: "string" }, + coverage_achieved: { type: "string" }, + test_files: { type: "string" }, + quality_score: { type: "string" }, + error: { type: "string" } + } + } + }) + + // Merge results + const results = parseCsv(Read(`${sessionFolder}/wave-${wave}-results.csv`)) + for (const r of results) { + const t = tasks.find(t => t.id === r.id) + if (t) Object.assign(t, r) + } + } + + // 4. Execute interactive tasks (executor with fix cycle) + const pendingInteractive = interactiveTasks.filter(t => t.status === 'pending') + for (const task of pendingInteractive) { + Read(`agents/executor.md`) + + const prevContext = buildPrevContext(task, tasks) + const agent = spawn_agent({ + message: `## TASK ASSIGNMENT\n\n### MANDATORY FIRST STEPS\n1. Read: agents/executor.md\n2. Read: ${sessionFolder}/discoveries.ndjson\n3. Read: .workflow/project-tech.json (if exists)\n\n---\n\nGoal: ${task.description}\nLayer: ${task.layer}\nCoverage Target: ${task.coverage_target}%\nSession: ${sessionFolder}\n\n### Previous Context\n${prevContext}` + }) + const result = wait({ ids: [agent], timeout_ms: 900000 }) + if (result.timed_out) { + send_input({ id: agent, message: "Please finalize current test results and report." }) + wait({ ids: [agent], timeout_ms: 120000 }) + } + Write(`${sessionFolder}/interactive/${task.id}-result.json`, JSON.stringify({ + task_id: task.id, status: "completed", findings: parseFindings(result), + timestamp: getUtc8ISOString() + })) + close_agent({ id: agent }) + task.status = result.success ? 'completed' : 'failed' + task.findings = parseFindings(result) + } + + // 5. GC Loop Check (after executor completes) + for (const task of pendingInteractive.filter(t => t.role === 'executor')) { + const gcState = JSON.parse(Read(`${sessionFolder}/gc-state.json`)) + const layer = task.layer + const rounds = gcState.rounds[layer] || 0 + const coverageAchieved = parseFloat(task.coverage_achieved || '0') + const coverageTarget = parseFloat(task.coverage_target || '80') + const passRate = parseFloat(task.pass_rate || '0') + + if (coverageAchieved < coverageTarget && passRate < 0.95 && rounds < 3) { + gcState.rounds[layer] = rounds + 1 + Write(`${sessionFolder}/gc-state.json`, JSON.stringify(gcState, null, 2)) + + Read(`agents/gc-loop-handler.md`) + const gcAgent = spawn_agent({ + message: `## GC LOOP ROUND ${rounds + 1}\n\n### MANDATORY FIRST STEPS\n1. Read: agents/gc-loop-handler.md\n2. Read: ${sessionFolder}/discoveries.ndjson\n\nLayer: ${layer}\nRound: ${rounds + 1}/3\nCurrent Coverage: ${coverageAchieved}%\nTarget: ${coverageTarget}%\nPass Rate: ${passRate}\nSession: ${sessionFolder}\nPrevious Results: ${sessionFolder}/results/run-${layer}.json\nTest Directory: ${sessionFolder}/tests/${layer === 'L1' ? 'L1-unit' : layer === 'L2' ? 'L2-integration' : 'L3-e2e'}/` + }) + const gcResult = wait({ ids: [gcAgent], timeout_ms: 900000 }) + close_agent({ id: gcAgent }) + } + } + + // 6. Update master CSV + Write(`${sessionFolder}/tasks.csv`, toCsv(tasks)) + + // 7. Cleanup temp files + Bash(`rm -f ${sessionFolder}/wave-${wave}.csv ${sessionFolder}/wave-${wave}-results.csv`) + + // 8. Display wave summary + const completed = waveTasks.filter(t => t.status === 'completed').length + const failed = waveTasks.filter(t => t.status === 'failed').length + const skipped = waveTasks.filter(t => t.status === 'skipped').length + console.log(`Wave ${wave} Complete: ${completed} completed, ${failed} failed, ${skipped} skipped`) +} +``` + +**Success Criteria**: +- All waves executed in order +- Both csv-wave and interactive tasks handled per wave +- Each wave's results merged into master CSV before next wave starts +- GC loops triggered when coverage below target (max 3 rounds per layer) +- Dependent tasks skipped when predecessor failed +- discoveries.ndjson accumulated across all waves and mechanisms + +--- + +### Phase 3: Post-Wave Interactive (Completion Action) + +**Objective**: Pipeline completion report with quality score and interactive completion choice. + +```javascript +const tasks = parseCsv(Read(`${sessionFolder}/tasks.csv`)) +const completed = tasks.filter(t => t.status === 'completed') +const failed = tasks.filter(t => t.status === 'failed') + +// Quality score from analyst +const analystTask = tasks.find(t => t.role === 'analyst' && t.status === 'completed') +const qualityScore = analystTask?.quality_score || 'N/A' + +// Scout issues count +const scoutTasks = tasks.filter(t => t.role === 'scout' && t.status === 'completed') +const totalIssues = scoutTasks.reduce((sum, t) => sum + parseInt(t.issues_found || '0'), 0) + +// Coverage summary per layer +const layerSummary = ['L1', 'L2', 'L3'].map(layer => { + const execTask = tasks.find(t => t.role === 'executor' && t.layer === layer && t.status === 'completed') + return execTask ? ` ${layer}: ${execTask.coverage_achieved}% coverage, ${execTask.pass_rate} pass rate` : null +}).filter(Boolean).join('\n') + +console.log(` +============================================ +QA PIPELINE COMPLETE + +Quality Score: ${qualityScore}/100 +Issues Discovered: ${totalIssues} + +Deliverables: +${completed.map(t => ` - ${t.id}: ${t.title} (${t.role})`).join('\n')} + +Coverage: +${layerSummary} + +Pipeline: ${completed.length}/${tasks.length} tasks +Session: ${sessionFolder} +============================================ +`) + +if (!AUTO_YES) { + AskUserQuestion({ + questions: [{ + question: "Quality Assurance pipeline complete. What would you like to do?", + header: "Completion", + multiSelect: false, + options: [ + { label: "Archive & Clean (Recommended)", description: "Archive session, output final summary" }, + { label: "Keep Active", description: "Keep session for follow-up work" }, + { label: "Export Results", description: "Export deliverables to target directory" } + ] + }] + }) +} +``` + +**Success Criteria**: +- Post-wave interactive processing complete +- Quality score and coverage metrics displayed +- User informed of results + +--- + +### Phase 4: Results Aggregation + +**Objective**: Generate final results and human-readable report. + +```javascript +// 1. Export results.csv +Bash(`cp ${sessionFolder}/tasks.csv ${sessionFolder}/results.csv`) + +// 2. Generate context.md +const tasks = parseCsv(Read(`${sessionFolder}/tasks.csv`)) +const gcState = JSON.parse(Read(`${sessionFolder}/gc-state.json`)) +const analystTask = tasks.find(t => t.role === 'analyst' && t.status === 'completed') + +let contextMd = `# Team Quality Assurance Report\n\n` +contextMd += `**Session**: ${sessionId}\n` +contextMd += `**Date**: ${getUtc8ISOString().substring(0, 10)}\n` +contextMd += `**QA Mode**: ${explicitMode || 'full'}\n` +contextMd += `**Quality Score**: ${analystTask?.quality_score || 'N/A'}/100\n\n` + +contextMd += `## Summary\n` +contextMd += `| Status | Count |\n|--------|-------|\n` +contextMd += `| Completed | ${tasks.filter(t => t.status === 'completed').length} |\n` +contextMd += `| Failed | ${tasks.filter(t => t.status === 'failed').length} |\n` +contextMd += `| Skipped | ${tasks.filter(t => t.status === 'skipped').length} |\n\n` + +// Scout findings +const scoutTasks = tasks.filter(t => t.role === 'scout' && t.status === 'completed') +if (scoutTasks.length > 0) { + contextMd += `## Scout Findings\n\n` + for (const t of scoutTasks) { + contextMd += `**${t.title}**: ${t.issues_found || 0} issues found\n${t.findings || ''}\n\n` + } +} + +// Coverage results +contextMd += `## Coverage Results\n\n` +contextMd += `| Layer | Coverage | Target | Pass Rate | GC Rounds |\n` +contextMd += `|-------|----------|--------|-----------|----------|\n` +for (const layer of ['L1', 'L2', 'L3']) { + const execTask = tasks.find(t => t.role === 'executor' && t.layer === layer) + if (execTask) { + contextMd += `| ${layer} | ${execTask.coverage_achieved || 'N/A'}% | ${execTask.coverage_target}% | ${execTask.pass_rate || 'N/A'} | ${gcState.rounds[layer] || 0} |\n` + } +} +contextMd += '\n' + +// Wave execution details +const maxWave = Math.max(...tasks.map(t => t.wave)) +contextMd += `## Wave Execution\n\n` +for (let w = 1; w <= maxWave; w++) { + const waveTasks = tasks.filter(t => t.wave === w) + contextMd += `### Wave ${w}\n\n` + for (const t of waveTasks) { + const icon = t.status === 'completed' ? '[DONE]' : t.status === 'failed' ? '[FAIL]' : '[SKIP]' + contextMd += `${icon} **${t.title}** [${t.role}/${t.layer || '-'}] ${t.findings || ''}\n\n` + } +} + +Write(`${sessionFolder}/context.md`, contextMd) + +console.log(`Results exported to: ${sessionFolder}/results.csv`) +console.log(`Report generated at: ${sessionFolder}/context.md`) +``` + +**Success Criteria**: +- results.csv exported (all tasks, both modes) +- context.md generated with quality score, scout findings, and coverage breakdown +- Summary displayed to user + +--- + +## Shared Discovery Board Protocol + +All agents (csv-wave and interactive) share a single `discoveries.ndjson` file for cross-task knowledge exchange. + +**Format**: One JSON object per line (NDJSON): + +```jsonl +{"ts":"2026-03-08T10:00:00Z","worker":"SCOUT-001","type":"issue_found","data":{"file":"src/auth.ts","line":42,"severity":"high","perspective":"security","description":"Hardcoded secret key in auth module"}} +{"ts":"2026-03-08T10:05:00Z","worker":"QASTRAT-001","type":"framework_detected","data":{"framework":"vitest","config_file":"vitest.config.ts","test_pattern":"**/*.test.ts"}} +{"ts":"2026-03-08T10:10:00Z","worker":"QAGEN-L1-001","type":"test_generated","data":{"file":"tests/L1-unit/auth.test.ts","source_file":"src/auth.ts","test_count":8}} +{"ts":"2026-03-08T10:15:00Z","worker":"QARUN-L1-001","type":"defect_found","data":{"file":"src/auth.ts","line":42,"pattern":"null_reference","description":"Missing null check on token payload"}} +``` + +**Discovery Types**: + +| Type | Data Schema | Description | +|------|-------------|-------------| +| `issue_found` | `{file, line, severity, perspective, description}` | Issue discovered by scout | +| `framework_detected` | `{framework, config_file, test_pattern}` | Test framework identified | +| `test_generated` | `{file, source_file, test_count}` | Test file created | +| `defect_found` | `{file, line, pattern, description}` | Defect pattern discovered during testing | +| `coverage_gap` | `{file, current, target, gap}` | Coverage gap identified | +| `convention_found` | `{pattern, example_file, description}` | Test convention detected | +| `fix_applied` | `{test_file, fix_type, description}` | Test fix during GC loop | +| `quality_metric` | `{dimension, score, details}` | Quality dimension score | + +**Protocol**: +1. Agents MUST read discoveries.ndjson at start of execution +2. Agents MUST append relevant discoveries during execution +3. Agents MUST NOT modify or delete existing entries +4. Deduplication by `{type, data.file, data.line}` key (where applicable) + +--- + +## Pipeline Definitions + +### Discovery Mode (5 tasks, serial) + +``` +SCOUT-001 -> QASTRAT-001 -> QAGEN-001 -> QARUN-001 -> QAANA-001 +``` + +| Task ID | Role | Layer | Wave | exec_mode | +|---------|------|-------|------|-----------| +| SCOUT-001 | scout | - | 1 | csv-wave | +| QASTRAT-001 | strategist | - | 2 | csv-wave | +| QAGEN-001 | generator | L1 | 3 | csv-wave | +| QARUN-001 | executor | L1 | 4 | interactive | +| QAANA-001 | analyst | - | 5 | csv-wave | + +### Testing Mode (6 tasks, progressive layers) + +``` +QASTRAT-001 -> QAGEN-L1-001 -> QARUN-L1-001 -> QAGEN-L2-001 -> QARUN-L2-001 -> QAANA-001 +``` + +| Task ID | Role | Layer | Wave | exec_mode | +|---------|------|-------|------|-----------| +| QASTRAT-001 | strategist | - | 1 | csv-wave | +| QAGEN-L1-001 | generator | L1 | 2 | csv-wave | +| QARUN-L1-001 | executor | L1 | 3 | interactive | +| QAGEN-L2-001 | generator | L2 | 4 | csv-wave | +| QARUN-L2-001 | executor | L2 | 5 | interactive | +| QAANA-001 | analyst | - | 6 | csv-wave | + +### Full Mode (8 tasks, parallel windows + regression) + +``` +SCOUT-001 -> QASTRAT-001 -> [QAGEN-L1-001 // QAGEN-L2-001] -> [QARUN-L1-001 // QARUN-L2-001] -> QAANA-001 -> SCOUT-002 +``` + +| Task ID | Role | Layer | Wave | exec_mode | +|---------|------|-------|------|-----------| +| SCOUT-001 | scout | - | 1 | csv-wave | +| QASTRAT-001 | strategist | - | 2 | csv-wave | +| QAGEN-L1-001 | generator | L1 | 3 | csv-wave | +| QAGEN-L2-001 | generator | L2 | 3 | csv-wave | +| QARUN-L1-001 | executor | L1 | 4 | interactive | +| QARUN-L2-001 | executor | L2 | 4 | interactive | +| QAANA-001 | analyst | - | 5 | csv-wave | +| SCOUT-002 | scout | - | 6 | csv-wave | + +--- + +## GC Loop (Generator-Critic) + +Generator and executor iterate per test layer until coverage converges: + +``` +QAGEN -> QARUN -> (if coverage < target) -> GC Loop Handler + (if coverage >= target) -> next wave +``` + +- Max iterations: 3 per layer +- After 3 iterations: accept current coverage with warning +- GC loop runs as interactive agent (gc-loop-handler.md) which internally generates fixes and re-runs tests + +--- + +## Scan Perspectives (Scout) + +| Perspective | Focus | +|-------------|-------| +| bug | Logic errors, crash paths, null references | +| security | Vulnerabilities, auth bypass, data exposure | +| test-coverage | Untested code paths, missing assertions | +| code-quality | Anti-patterns, complexity, maintainability | +| ux | User-facing issues, accessibility (optional, when task mentions UX/UI) | + +--- + +## Error Handling + +| Error | Resolution | +|-------|------------| +| Circular dependency | Detect in wave computation, abort with error message | +| CSV agent timeout | Mark as failed in results, continue with wave | +| CSV agent failed | Mark as failed, skip dependent tasks in later waves | +| Interactive agent timeout | Urge convergence via send_input, then close if still timed out | +| Interactive agent failed | Mark as failed, skip dependents | +| All agents in wave failed | Log error, offer retry or abort | +| CSV parse error | Validate CSV format before execution, show line number | +| discoveries.ndjson corrupt | Ignore malformed lines, continue with valid entries | +| Scout finds no issues | Report clean scan, proceed to testing (skip discovery-specific tasks) | +| GC loop exceeded (3 rounds) | Accept current coverage with warning, proceed to next layer | +| Test framework not detected | Default to Jest patterns | +| Coverage tool unavailable | Degrade to pass rate judgment | +| quality_score < 60 | Report with WARNING, suggest re-run with deeper coverage | +| Continue mode: no session found | List available sessions, prompt user to select | + +--- + +## Core Rules + +1. **Start Immediately**: First action is session initialization, then Phase 0/1 +2. **Wave Order is Sacred**: Never execute wave N before wave N-1 completes and results are merged +3. **CSV is Source of Truth**: Master tasks.csv holds all state (both csv-wave and interactive) +4. **CSV First**: Default to csv-wave for tasks; only use interactive when multi-round interaction is required +5. **Context Propagation**: prev_context built from master CSV, not from memory +6. **Discovery Board is Append-Only**: Never clear, modify, or recreate discoveries.ndjson +7. **Skip on Failure**: If a dependency failed, skip the dependent task +8. **GC Loop Discipline**: Max 3 rounds per layer; never infinite-loop on coverage +9. **Scout Feeds Strategy**: Scout findings flow into strategist via prev_context and discoveries.ndjson +10. **Cleanup Temp Files**: Remove wave-{N}.csv after results are merged +11. **DO NOT STOP**: Continuous execution until all waves complete or all remaining tasks are skipped diff --git a/.codex/skills/team-quality-assurance/agents/executor.md b/.codex/skills/team-quality-assurance/agents/executor.md new file mode 100644 index 00000000..153f0abe --- /dev/null +++ b/.codex/skills/team-quality-assurance/agents/executor.md @@ -0,0 +1,192 @@ +# Test Executor Agent + +Interactive agent that executes test suites, collects coverage, and performs iterative auto-fix cycles. Acts as the Critic in the Generator-Critic loop within the QA pipeline. + +## Identity + +- **Type**: `interactive` +- **Responsibility**: Validation (test execution with fix cycles) + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Run test suites using the correct framework command +- Collect coverage data from test output or coverage reports +- Attempt auto-fix for failing tests (max 5 iterations per invocation) +- Only modify test files, NEVER modify source code +- Save results to session results directory +- Share defect discoveries to discoveries.ndjson +- Report pass rate and coverage in structured output + +### MUST NOT + +- Skip the MANDATORY FIRST STEPS role loading +- Modify source code (only test files may be changed) +- Use `@ts-ignore`, `as any`, or skip/ignore test annotations +- Exceed 5 fix iterations without reporting current state +- Delete or disable existing passing tests + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | file-read | Load test files, source files, strategy, results | +| `Write` | file-write | Save test results, update test files | +| `Edit` | file-edit | Fix test assertions, imports, mocks | +| `Bash` | shell | Run test commands, collect coverage | +| `Glob` | search | Find test files in session directory | +| `Grep` | search | Find patterns in test output | + +--- + +## Execution + +### Phase 1: Context Loading + +**Objective**: Detect test framework and locate test files. + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Session folder | Yes | Path to session directory | +| Layer | Yes | Target test layer (L1/L2/L3) | +| Coverage target | Yes | Minimum coverage percentage | +| Previous context | No | Findings from generator and scout | + +**Steps**: + +1. Read discoveries.ndjson for framework detection info +2. Determine layer directory: + - L1 -> tests/L1-unit/ + - L2 -> tests/L2-integration/ + - L3 -> tests/L3-e2e/ +3. Find test files in the layer directory +4. Determine test framework command: + +| Framework | Command Template | +|-----------|-----------------| +| vitest | `npx vitest run --coverage --reporter=json ` | +| jest | `npx jest --coverage --json --outputFile= ` | +| pytest | `python -m pytest --cov --cov-report=json -v ` | +| mocha | `npx mocha --reporter json > test-results.json` | +| default | `npm test -- --coverage` | + +**Output**: Framework, test command, test file list + +--- + +### Phase 2: Iterative Test-Fix Cycle + +**Objective**: Run tests and fix failures up to 5 iterations. + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Test command | Yes | From Phase 1 | +| Test files | Yes | From Phase 1 | +| Coverage target | Yes | From spawn message | + +**Steps**: + +For each iteration (1..5): + +1. Run test command, capture stdout/stderr +2. Parse results: extract passed/failed counts, parse coverage +3. Evaluate exit condition: + +| Condition | Action | +|-----------|--------| +| All tests pass (0 failures) | Exit loop: SUCCESS | +| pass_rate >= 0.95 AND iteration >= 2 | Exit loop: GOOD ENOUGH | +| iteration >= 5 | Exit loop: MAX ITERATIONS | + +4. If not exiting, extract failure details: + - Error messages and stack traces + - Failing test file:line references + - Assertion mismatches + +5. Apply targeted fixes: + - Fix incorrect assertions (expected vs actual) + - Fix missing imports or broken module paths + - Fix mock setup issues + - Fix async/await handling + - Do NOT skip tests, do NOT add type suppressions + +6. Share defect discoveries: + ```bash + echo '{"ts":"","worker":"","type":"defect_found","data":{"file":"","line":,"pattern":"","description":""}}' >> /discoveries.ndjson + ``` + +**Output**: Final pass rate, coverage achieved, iteration count + +--- + +### Phase 3: Result Recording + +**Objective**: Save execution results and update state. + +**Steps**: + +1. Build result data: + ```json + { + "layer": "", + "framework": "", + "iterations": , + "pass_rate": , + "coverage": , + "tests_passed": , + "tests_failed": , + "all_passed": , + "defect_patterns": [...] + } + ``` + +2. Save results to `/results/run-.json` +3. Save last test output to `/results/output-.txt` + +--- + +## Structured Output Template + +``` +## Summary +- Test execution for : pass rate, % coverage after iterations + +## Findings +- Finding 1: specific test result with file:line reference +- Finding 2: defect pattern discovered + +## Defect Patterns +- Pattern: type, frequency, severity +- Pattern: type, frequency, severity + +## Coverage +- Overall: % +- Target: % +- Gap files: file1 (%), file2 (%) + +## Open Questions +1. Any unresolvable test failures (if any) +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| Test command not found | Try alternative commands (npx, npm test), report if all fail | +| No test files found | Report in findings, status = failed | +| Coverage tool unavailable | Degrade to pass rate only, report in findings | +| All tests timeout | Report with partial results, status = failed | +| Import resolution fails after fix | Report remaining failures, continue with other tests | +| Timeout approaching | Output current findings with "PARTIAL" status | diff --git a/.codex/skills/team-quality-assurance/agents/gc-loop-handler.md b/.codex/skills/team-quality-assurance/agents/gc-loop-handler.md new file mode 100644 index 00000000..bf37c188 --- /dev/null +++ b/.codex/skills/team-quality-assurance/agents/gc-loop-handler.md @@ -0,0 +1,163 @@ +# GC Loop Handler Agent + +Interactive agent that manages Generator-Critic loop iterations within the QA pipeline. When coverage is below target after executor completes, this agent generates test fixes and re-runs tests. + +## Identity + +- **Type**: `interactive` +- **Responsibility**: Orchestration (fix-verify cycle within GC loop) + +## Boundaries + +### MUST + +- Read previous execution results to understand failures +- Generate targeted test fixes based on failure details +- Re-run tests after fixes to verify improvement +- Track coverage improvement across iterations +- Only modify test files, NEVER modify source code +- Report final coverage and pass rate +- Share fix discoveries to discoveries.ndjson +- Consider scout findings when generating fixes (available in discoveries.ndjson) + +### MUST NOT + +- Skip the MANDATORY FIRST STEPS role loading +- Modify source code (only test files) +- Use `@ts-ignore`, `as any`, or test skip annotations +- Run more than 1 fix-verify cycle per invocation (coordinator manages round count) +- Delete or disable passing tests + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | file-read | Load test results, test files, source files, scan results | +| `Write` | file-write | Write fixed test files | +| `Edit` | file-edit | Apply targeted test fixes | +| `Bash` | shell | Run test commands | +| `Glob` | search | Find test files | +| `Grep` | search | Search test output for patterns | + +--- + +## Execution + +### Phase 1: Failure Analysis + +**Objective**: Understand why tests failed or coverage was insufficient. + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Session folder | Yes | Path to session directory | +| Layer | Yes | Target test layer (L1/L2/L3) | +| Round number | Yes | Current GC round (1-3) | +| Previous results | Yes | Path to run-{layer}.json | + +**Steps**: + +1. Read previous execution results from results/run-{layer}.json +2. Read test output from results/output-{layer}.txt +3. Read discoveries.ndjson for scout-found issues (may inform additional test cases) +4. Categorize failures: + +| Failure Type | Detection | Fix Strategy | +|--------------|-----------|--------------| +| Assertion mismatch | "expected X, received Y" | Correct expected values | +| Missing import | "Cannot find module" | Fix import paths | +| Null reference | "Cannot read property of null" | Add null guards in tests | +| Async issue | "timeout", "not resolved" | Fix async/await patterns | +| Mock issue | "mock not called" | Fix mock setup/teardown | +| Type error | "Type X is not assignable" | Fix type annotations | + +5. Identify uncovered files from coverage report +6. Cross-reference with scout findings for targeted coverage improvement + +**Output**: Failure categories, fix targets, uncovered areas + +--- + +### Phase 2: Fix Generation + Re-execution + +**Objective**: Apply fixes and verify improvement. + +**Steps**: + +1. For each failing test file: + - Read the test file content + - Apply targeted fixes based on failure category + - Verify fix does not break other tests conceptually + +2. For coverage gaps: + - Read uncovered source files + - Cross-reference with scout-discovered issues for high-value test targets + - Generate additional test cases targeting uncovered paths + - Append to existing test files or create new ones + +3. Re-run test suite with coverage: + ```bash + 2>&1 || true + ``` + +4. Parse new results: pass rate, coverage +5. Calculate improvement delta + +6. Share discoveries: + ```bash + echo '{"ts":"","worker":"gc-loop--R","type":"fix_applied","data":{"test_file":"","fix_type":"","description":""}}' >> /discoveries.ndjson + ``` + +**Output**: Updated pass rate, coverage, improvement delta + +--- + +### Phase 3: Result Update + +**Objective**: Save updated results for coordinator evaluation. + +**Steps**: + +1. Overwrite results/run-{layer}.json with new data +2. Save test output to results/output-{layer}.txt +3. Report improvement delta in findings + +--- + +## Structured Output Template + +``` +## Summary +- GC Loop Round for : coverage % -> % (delta: +%) + +## Fixes Applied +- Fix 1: - - +- Fix 2: - - + +## Coverage Update +- Before: %, After: %, Target: % +- Pass Rate: -> + +## Scout-Informed Additions +- Added test for scout issue #: (if applicable) + +## Remaining Issues +- Issue 1: (if any) +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| No previous results found | Report error, cannot proceed without baseline | +| All fixes cause new failures | Revert fixes, report inability to improve | +| Coverage tool unavailable | Use pass rate as proxy metric | +| Scout findings not available | Proceed without scout context | +| Timeout approaching | Output partial results with current state | diff --git a/.codex/skills/team-quality-assurance/instructions/agent-instruction.md b/.codex/skills/team-quality-assurance/instructions/agent-instruction.md new file mode 100644 index 00000000..e4af054b --- /dev/null +++ b/.codex/skills/team-quality-assurance/instructions/agent-instruction.md @@ -0,0 +1,185 @@ +# Agent Instruction Template -- Team Quality Assurance + +Base instruction template for CSV wave agents in the QA pipeline. Used by scout, strategist, generator, and analyst roles (csv-wave tasks). + +## Purpose + +| Phase | Usage | +|-------|-------| +| Phase 1 | Coordinator builds instruction from this template with session folder baked in | +| Phase 2 | Injected as `instruction` parameter to `spawn_agents_on_csv` | + +--- + +## Base Instruction Template + +```markdown +## TASK ASSIGNMENT -- Team Quality Assurance + +### MANDATORY FIRST STEPS +1. Read shared discoveries: /discoveries.ndjson (if exists, skip if not) +2. Read project context: .workflow/project-tech.json (if exists) +3. Read scan results: /scan/scan-results.json (if exists, for non-scout roles) +4. Read test strategy: /strategy/test-strategy.md (if exists, for generator/analyst) + +--- + +## Your Task + +**Task ID**: {id} +**Title**: {title} +**Role**: {role} +**Perspectives**: {perspective} +**Layer**: {layer} +**Coverage Target**: {coverage_target}% + +### Task Description +{description} + +### Previous Tasks' Findings (Context) +{prev_context} + +--- + +## Execution Protocol + +### If Role = scout + +1. **Determine scan scope**: Use git diff and task description to identify target files + ```bash + git diff --name-only HEAD~5 2>/dev/null || echo "" + ``` +2. **Load historical patterns**: Read discoveries.ndjson for known defect patterns +3. **Execute multi-perspective scan**: For each perspective in {perspective} (semicolon-separated): + - **bug**: Scan for logic errors, crash paths, null references, unhandled exceptions + - **security**: Scan for vulnerabilities, hardcoded secrets, auth bypass, data exposure + - **test-coverage**: Identify untested code paths, missing assertions, uncovered branches + - **code-quality**: Detect anti-patterns, high complexity, duplicated logic, maintainability issues + - **ux** (if present): Check for user-facing issues, accessibility problems +4. **Aggregate and rank**: Deduplicate by file:line, rank by severity (critical > high > medium > low) +5. **Write scan results**: Save to /scan/scan-results.json: + ```json + { + "scan_date": "", + "perspectives": ["bug", "security", ...], + "total_findings": , + "by_severity": { "critical": , "high": , "medium": , "low": }, + "findings": [{ "id": "", "severity": "", "perspective": "", "file": "", "line": , "description": "" }] + } + ``` +6. **Share discoveries**: For each critical/high finding: + ```bash + echo '{"ts":"","worker":"{id}","type":"issue_found","data":{"file":"","line":,"severity":"","perspective":"","description":""}}' >> /discoveries.ndjson + ``` + +### If Role = strategist + +1. **Read scout results**: Load /scan/scan-results.json (if discovery or full mode) +2. **Analyze change scope**: Run `git diff --name-only HEAD~5` to identify changed files +3. **Detect test framework**: Check for vitest.config.ts, jest.config.js, pytest.ini, pyproject.toml +4. **Categorize files**: Source, Test, Config patterns +5. **Select test layers**: + + | Condition | Layer | Target | + |-----------|-------|--------| + | Has source file changes | L1: Unit Tests | 80% | + | >= 3 source files OR critical issues | L2: Integration Tests | 60% | + | >= 3 critical/high severity issues | L3: E2E Tests | 40% | + +6. **Generate strategy**: Write to /strategy/test-strategy.md with scope analysis, layer configs, priority issues, risk assessment +7. **Share discoveries**: Append framework detection to board: + ```bash + echo '{"ts":"","worker":"{id}","type":"framework_detected","data":{"framework":"","config_file":"","test_pattern":""}}' >> /discoveries.ndjson + ``` + +### If Role = generator + +1. **Read strategy**: Load /strategy/test-strategy.md for layer config and priority files +2. **Read source files**: Load files listed in strategy for the target layer (limit 20 files) +3. **Learn test patterns**: Find 3 existing test files to understand conventions (imports, structure, naming) +4. **Detect if GC fix mode**: If task description contains "fix" -> read failure info from results/run-{layer}.json, fix failing tests only +5. **Generate tests**: For each priority source file: + - Determine test file path following project conventions + - Generate test cases: happy path, edge cases, error handling + - Use proper test framework API + - Include proper imports and mocks +6. **Write test files**: Save to /tests// + - L1 -> tests/L1-unit/ + - L2 -> tests/L2-integration/ + - L3 -> tests/L3-e2e/ +7. **Syntax check**: Run `tsc --noEmit` or equivalent to verify syntax +8. **Share discoveries**: Append test generation info to discoveries board + +### If Role = analyst + +1. **Read all results**: Load /results/run-*.json for execution data +2. **Read scan results**: Load /scan/scan-results.json (if exists) +3. **Read strategy**: Load /strategy/test-strategy.md +4. **Read discoveries**: Parse /discoveries.ndjson for all findings +5. **Analyze five dimensions**: + - **Defect patterns**: Group issues by type, identify patterns with >= 2 occurrences + - **Coverage gaps**: Compare achieved vs target per layer, identify per-file gaps + - **Test effectiveness**: Per layer -- pass rate, iterations, coverage achieved + - **Quality trend**: Compare against coverage_history if available + - **Quality score** (0-100): Start from 100, deduct for issues, gaps, failures; bonus for effective layers +6. **Score-based recommendations**: + + | Score | Recommendation | + |-------|----------------| + | >= 80 | Quality is GOOD. Maintain current practices. | + | 60-79 | Quality needs IMPROVEMENT. Focus on gaps and patterns. | + | < 60 | Quality is CONCERNING. Recommend comprehensive review. | + +7. **Generate report**: Write to /analysis/quality-report.md +8. **Share discoveries**: Append quality metrics to board + +--- + +## Output (report_agent_job_result) + +Return JSON: +{ + "id": "{id}", + "status": "completed" | "failed", + "findings": "Key discoveries and implementation notes (max 500 chars)", + "issues_found": "count of issues discovered (scout/analyst, empty for others)", + "pass_rate": "test pass rate as decimal (empty for non-executor tasks)", + "coverage_achieved": "actual coverage percentage (empty for non-executor tasks)", + "test_files": "semicolon-separated paths of test files (empty for non-generator tasks)", + "quality_score": "quality score 0-100 (analyst only, empty for others)", + "error": "" +} +``` + +--- + +## Quality Requirements + +All agents must verify before reporting complete: + +| Requirement | Criteria | +|-------------|----------| +| Scan results written | Verify scan-results.json exists (scout) | +| Strategy written | Verify test-strategy.md exists (strategist) | +| Tests generated | Verify test files exist in correct layer dir (generator) | +| Syntax clean | No compilation errors in generated tests (generator) | +| Report written | Verify quality-report.md exists (analyst) | +| Findings accuracy | Findings reflect actual work done | +| Discovery sharing | At least 1 discovery shared to board | +| Error reporting | Non-empty error field if status is failed | + +--- + +## Placeholder Reference + +| Placeholder | Resolved By | When | +|-------------|------------|------| +| `` | Skill designer (Phase 1) | Literal path baked into instruction | +| `{id}` | spawn_agents_on_csv | Runtime from CSV row | +| `{title}` | spawn_agents_on_csv | Runtime from CSV row | +| `{description}` | spawn_agents_on_csv | Runtime from CSV row | +| `{role}` | spawn_agents_on_csv | Runtime from CSV row | +| `{perspective}` | spawn_agents_on_csv | Runtime from CSV row | +| `{layer}` | spawn_agents_on_csv | Runtime from CSV row | +| `{coverage_target}` | spawn_agents_on_csv | Runtime from CSV row | +| `{prev_context}` | spawn_agents_on_csv | Runtime from CSV row | diff --git a/.codex/skills/team-quality-assurance/schemas/tasks-schema.md b/.codex/skills/team-quality-assurance/schemas/tasks-schema.md new file mode 100644 index 00000000..7a8b8349 --- /dev/null +++ b/.codex/skills/team-quality-assurance/schemas/tasks-schema.md @@ -0,0 +1,190 @@ +# Team Quality Assurance -- CSV Schema + +## Master CSV: tasks.csv + +### Column Definitions + +#### Input Columns (Set by Decomposer) + +| Column | Type | Required | Description | Example | +|--------|------|----------|-------------|---------| +| `id` | string | Yes | Unique task identifier (PREFIX-NNN) | `"SCOUT-001"` | +| `title` | string | Yes | Short task title | `"Multi-perspective code scan"` | +| `description` | string | Yes | Detailed task description (self-contained) | `"Scan codebase from multiple perspectives..."` | +| `role` | enum | Yes | Worker role: `scout`, `strategist`, `generator`, `executor`, `analyst` | `"scout"` | +| `perspective` | string | No | Scan perspectives (semicolon-separated, scout only) | `"bug;security;test-coverage;code-quality"` | +| `layer` | string | No | Test layer: `L1`, `L2`, `L3`, or empty | `"L1"` | +| `coverage_target` | string | No | Target coverage percentage for this layer | `"80"` | +| `deps` | string | No | Semicolon-separated dependency task IDs | `"SCOUT-001"` | +| `context_from` | string | No | Semicolon-separated task IDs for context | `"SCOUT-001"` | +| `exec_mode` | enum | Yes | Execution mechanism: `csv-wave` or `interactive` | `"csv-wave"` | + +#### Computed Columns (Set by Wave Engine) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `wave` | integer | Wave number (1-based, from topological sort) | `2` | +| `prev_context` | string | Aggregated findings from context_from tasks (per-wave CSV only) | `"[SCOUT-001] Found 5 security issues..."` | + +#### Output Columns (Set by Agent) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `status` | enum | `pending` -> `completed` / `failed` / `skipped` | `"completed"` | +| `findings` | string | Key discoveries (max 500 chars) | `"Found 3 critical security issues..."` | +| `issues_found` | string | Count of issues discovered (scout/analyst) | `"5"` | +| `pass_rate` | string | Test pass rate as decimal (executor only) | `"0.95"` | +| `coverage_achieved` | string | Actual coverage percentage (executor only) | `"82"` | +| `test_files` | string | Semicolon-separated test file paths (generator only) | `"tests/L1-unit/auth.test.ts"` | +| `quality_score` | string | Quality score 0-100 (analyst only) | `"78"` | +| `error` | string | Error message if failed | `""` | + +--- + +### exec_mode Values + +| Value | Mechanism | Description | +|-------|-----------|-------------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot batch execution within wave | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round individual execution (executor fix cycles) | + +Interactive tasks appear in master CSV for dependency tracking but are NOT included in wave-{N}.csv files. + +--- + +### Role Prefixes + +| Role | Prefix | Responsibility Type | +|------|--------|---------------------| +| scout | SCOUT | read-only analysis (multi-perspective scan) | +| strategist | QASTRAT | read-only analysis (strategy formulation) | +| generator | QAGEN | code-gen (test file generation) | +| executor | QARUN | validation (test execution + fix cycles) | +| analyst | QAANA | read-only analysis (quality reporting) | + +--- + +### Example Data + +```csv +id,title,description,role,perspective,layer,coverage_target,deps,context_from,exec_mode,wave,status,findings,issues_found,pass_rate,coverage_achieved,test_files,quality_score,error +"SCOUT-001","Multi-perspective code scan","Scan codebase from bug, security, test-coverage, code-quality perspectives. Identify issues with severity ranking (critical/high/medium/low) and file:line references. Write scan results to /scan/scan-results.json","scout","bug;security;test-coverage;code-quality","","","","","csv-wave","1","pending","","","","","","","" +"QASTRAT-001","Test strategy formulation","Analyze scout findings and code changes. Determine test layers (L1/L2/L3), define coverage targets, detect test framework, identify priority files. Write strategy to /strategy/test-strategy.md","strategist","","","","SCOUT-001","SCOUT-001","csv-wave","2","pending","","","","","","","" +"QAGEN-L1-001","Generate L1 unit tests","Generate L1 unit tests based on strategy. Read source files, identify exports, generate test cases for happy path, edge cases, error handling. Follow project test conventions. Write tests to /tests/L1-unit/","generator","","L1","80","QASTRAT-001","QASTRAT-001","csv-wave","3","pending","","","","","","","" +"QAGEN-L2-001","Generate L2 integration tests","Generate L2 integration tests based on strategy. Focus on module interaction points and integration boundaries. Write tests to /tests/L2-integration/","generator","","L2","60","QASTRAT-001","QASTRAT-001","csv-wave","3","pending","","","","","","","" +"QARUN-L1-001","Execute L1 tests and collect coverage","Run L1 test suite with coverage collection. Parse results for pass rate and coverage. If pass_rate < 0.95 or coverage < 80%, attempt auto-fix (max 3 iterations). Save results to /results/run-L1.json","executor","","L1","80","QAGEN-L1-001","QAGEN-L1-001","interactive","4","pending","","","","","","","" +"QARUN-L2-001","Execute L2 tests and collect coverage","Run L2 integration test suite with coverage. Auto-fix up to 3 iterations. Save results to /results/run-L2.json","executor","","L2","60","QAGEN-L2-001","QAGEN-L2-001","interactive","4","pending","","","","","","","" +"QAANA-001","Quality analysis report","Analyze defect patterns, coverage gaps, test effectiveness. Calculate quality score (0-100). Generate comprehensive report with recommendations. Write to /analysis/quality-report.md","analyst","","","","QARUN-L1-001;QARUN-L2-001","QARUN-L1-001;QARUN-L2-001","csv-wave","5","pending","","","","","","","" +"SCOUT-002","Regression scan","Post-fix regression scan. Verify no new issues introduced by test fixes. Focus on areas modified during GC loops.","scout","bug;security;code-quality","","","QAANA-001","QAANA-001","csv-wave","6","pending","","","","","","","" +``` + +--- + +### Column Lifecycle + +``` +Decomposer (Phase 1) Wave Engine (Phase 2) Agent (Execution) +--------------------- -------------------- ----------------- +id ----------> id ----------> id +title ----------> title ----------> (reads) +description ----------> description ----------> (reads) +role ----------> role ----------> (reads) +perspective ----------> perspective ----------> (reads) +layer ----------> layer ----------> (reads) +coverage_target -------> coverage_target -------> (reads) +deps ----------> deps ----------> (reads) +context_from----------> context_from----------> (reads) +exec_mode ----------> exec_mode ----------> (reads) + wave ----------> (reads) + prev_context ----------> (reads) + status + findings + issues_found + pass_rate + coverage_achieved + test_files + quality_score + error +``` + +--- + +## Output Schema (JSON) + +Agent output via `report_agent_job_result` (csv-wave tasks): + +```json +{ + "id": "SCOUT-001", + "status": "completed", + "findings": "Multi-perspective scan found 5 issues: 2 security (hardcoded keys, missing auth), 1 bug (null reference), 2 code-quality (duplicated logic, high complexity). All issues logged to discoveries.ndjson.", + "issues_found": "5", + "pass_rate": "", + "coverage_achieved": "", + "test_files": "", + "quality_score": "", + "error": "" +} +``` + +Interactive tasks output via structured text or JSON written to `interactive/{id}-result.json`. + +--- + +## Discovery Types + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `issue_found` | `data.file+data.line` | `{file, line, severity, perspective, description}` | Issue discovered by scout | +| `framework_detected` | `data.framework` | `{framework, config_file, test_pattern}` | Test framework identified | +| `test_generated` | `data.file` | `{file, source_file, test_count}` | Test file created | +| `defect_found` | `data.file+data.line` | `{file, line, pattern, description}` | Defect found during testing | +| `coverage_gap` | `data.file` | `{file, current, target, gap}` | Coverage gap identified | +| `convention_found` | `data.pattern` | `{pattern, example_file, description}` | Test convention detected | +| `fix_applied` | `data.test_file+data.fix_type` | `{test_file, fix_type, description}` | Test fix during GC loop | +| `quality_metric` | `data.dimension` | `{dimension, score, details}` | Quality dimension score | + +### Discovery NDJSON Format + +```jsonl +{"ts":"2026-03-08T10:00:00Z","worker":"SCOUT-001","type":"issue_found","data":{"file":"src/auth.ts","line":42,"severity":"high","perspective":"security","description":"Hardcoded secret key in auth module"}} +{"ts":"2026-03-08T10:02:00Z","worker":"SCOUT-001","type":"issue_found","data":{"file":"src/user.ts","line":15,"severity":"medium","perspective":"bug","description":"Missing null check on user object"}} +{"ts":"2026-03-08T10:05:00Z","worker":"QASTRAT-001","type":"framework_detected","data":{"framework":"vitest","config_file":"vitest.config.ts","test_pattern":"**/*.test.ts"}} +{"ts":"2026-03-08T10:10:00Z","worker":"QAGEN-L1-001","type":"test_generated","data":{"file":"tests/L1-unit/auth.test.ts","source_file":"src/auth.ts","test_count":8}} +{"ts":"2026-03-08T10:15:00Z","worker":"QARUN-L1-001","type":"defect_found","data":{"file":"src/auth.ts","line":42,"pattern":"null_reference","description":"Missing null check on token payload"}} +{"ts":"2026-03-08T10:20:00Z","worker":"QAANA-001","type":"quality_metric","data":{"dimension":"coverage_achievement","score":85,"details":"L1: 82%, L2: 68%"}} +``` + +> Both csv-wave and interactive agents read/write the same discoveries.ndjson file. + +--- + +## Cross-Mechanism Context Flow + +| Source | Target | Mechanism | +|--------|--------|-----------| +| Scout findings | Strategist prev_context | CSV context_from column | +| CSV task findings | Interactive task | Injected via spawn message | +| Interactive task result | CSV task prev_context | Read from interactive/{id}-result.json | +| Any agent discovery | Any agent | Shared via discoveries.ndjson | +| Executor coverage data | GC loop handler | Read from results/run-{layer}.json | +| Analyst quality score | Regression scout | Injected via prev_context | + +--- + +## Validation Rules + +| Rule | Check | Error | +|------|-------|-------| +| Unique IDs | No duplicate `id` values | "Duplicate task ID: {id}" | +| Valid deps | All dep IDs exist in tasks | "Unknown dependency: {dep_id}" | +| No self-deps | Task cannot depend on itself | "Self-dependency: {id}" | +| No circular deps | Topological sort completes | "Circular dependency detected involving: {ids}" | +| context_from valid | All context IDs exist and in earlier waves | "Invalid context_from: {id}" | +| exec_mode valid | Value is `csv-wave` or `interactive` | "Invalid exec_mode: {value}" | +| Description non-empty | Every task has description | "Empty description for task: {id}" | +| Status enum | status in {pending, completed, failed, skipped} | "Invalid status: {status}" | +| Role valid | role in {scout, strategist, generator, executor, analyst} | "Invalid role: {role}" | +| Layer valid | layer in {L1, L2, L3, ""} | "Invalid layer: {layer}" | +| Perspective valid | If scout, perspective contains valid values | "Invalid perspective: {value}" | +| Coverage target valid | If layer present, coverage_target is numeric | "Invalid coverage target: {value}" | diff --git a/.codex/skills/team-review/SKILL.md b/.codex/skills/team-review/SKILL.md new file mode 100644 index 00000000..a7e7e4be --- /dev/null +++ b/.codex/skills/team-review/SKILL.md @@ -0,0 +1,495 @@ +--- +name: team-review +description: Multi-agent code review pipeline with scanner, reviewer, and fixer roles. Executes toolchain + LLM scan, deep analysis with root cause enrichment, and automated fixes with rollback-on-failure. +argument-hint: "[-y|--yes] [-c|--concurrency N] [--continue] [--full|--fix|-q] [--dimensions=sec,cor,prf,mnt] \"target path or pattern\"" +allowed-tools: spawn_agents_on_csv, spawn_agent, wait, send_input, close_agent, Read, Write, Edit, Bash, Glob, Grep, AskUserQuestion +--- + +## Auto Mode + +When `--yes` or `-y`: Auto-confirm task decomposition, skip interactive validation, use defaults. + +# Team Review + +## Usage + +```bash +$team-review "src/auth/**/*.ts" +$team-review -c 2 --full "src/components" +$team-review -y --dimensions=sec,cor "src/api" +$team-review --continue "RV-auth-review-2026-03-08" +$team-review -q "src/utils" +$team-review --fix "src/auth/login.ts" +``` + +**Flags**: +- `-y, --yes`: Skip all confirmations (auto mode) +- `-c, --concurrency N`: Max concurrent agents within each wave (default: 3) +- `--continue`: Resume existing session +- `--full`: Enable scan + review + fix pipeline +- `--fix`: Fix-only mode (skip scan/review) +- `-q, --quick`: Quick scan only +- `--dimensions=sec,cor,prf,mnt`: Custom dimensions (security, correctness, performance, maintainability) + +**Output Directory**: `.workflow/.csv-wave/{session-id}/` +**Core Output**: `tasks.csv` (master state) + `results.csv` (final) + `discoveries.ndjson` (shared exploration) + `context.md` (human-readable report) + +--- + +## Overview + +Orchestrate multi-agent code review with three specialized roles: scanner (toolchain + LLM semantic scan), reviewer (deep analysis with root cause enrichment), and fixer (automated fixes with rollback-on-failure). Supports 4-dimension analysis: security (SEC), correctness (COR), performance (PRF), maintainability (MNT). + +**Execution Model**: Hybrid — CSV wave pipeline (primary) + individual agent spawn (secondary) + +``` +┌─────────────────────────────────────────────────────────────────────────┐ +│ Team Review WORKFLOW │ +├─────────────────────────────────────────────────────────────────────────┤ +│ │ +│ Phase 0: Pre-Wave Interactive │ +│ ├─ Parse arguments and detect pipeline mode │ +│ ├─ Validate target path and resolve file patterns │ +│ └─ Output: refined requirements for decomposition │ +│ │ +│ Phase 1: Requirement → CSV + Classification │ +│ ├─ Generate task breakdown based on pipeline mode │ +│ ├─ Create scan/review/fix tasks with dependencies │ +│ ├─ Classify tasks: csv-wave (scanner, reviewer) | interactive (fixer)│ +│ ├─ Compute dependency waves (topological sort → depth grouping) │ +│ ├─ Generate tasks.csv with wave + exec_mode columns │ +│ └─ User validates task breakdown (skip if -y) │ +│ │ +│ Phase 2: Wave Execution Engine (Extended) │ +│ ├─ For each wave (1..N): │ +│ │ ├─ Execute pre-wave interactive tasks (if any) │ +│ │ ├─ Build wave CSV (filter csv-wave tasks for this wave) │ +│ │ ├─ Inject previous findings into prev_context column │ +│ │ ├─ spawn_agents_on_csv(wave CSV) │ +│ │ ├─ Execute post-wave interactive tasks (if any) │ +│ │ ├─ Merge all results into master tasks.csv │ +│ │ └─ Check: any failed? → skip dependents │ +│ └─ discoveries.ndjson shared across all modes (append-only) │ +│ │ +│ Phase 3: Post-Wave Interactive │ +│ ├─ Generate final review report and fix summary │ +│ └─ Final aggregation / report │ +│ │ +│ Phase 4: Results Aggregation │ +│ ├─ Export final results.csv │ +│ ├─ Generate context.md with all findings │ +│ ├─ Display summary: completed/failed/skipped per wave │ +│ └─ Offer: view results | retry failed | done │ +│ │ +└─────────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## Task Classification Rules + +Each task is classified by `exec_mode`: + +| exec_mode | Mechanism | Criteria | +|-----------|-----------|----------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot, structured I/O, no multi-round interaction | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round, clarification, inline utility | + +**Classification Decision**: + +| Task Property | Classification | +|---------------|---------------| +| Scanner task (toolchain + LLM scan) | `csv-wave` | +| Reviewer task (deep analysis) | `csv-wave` | +| Fixer task (code modification with rollback) | `interactive` | + +--- + +## CSV Schema + +### tasks.csv (Master State) + +```csv +id,title,description,deps,context_from,exec_mode,dimension,target,wave,status,findings,error +1,Scan codebase,Run toolchain + LLM scan on target files,,,"csv-wave","sec,cor,prf,mnt","src/**/*.ts",1,pending,"","" +2,Review findings,Deep analysis with root cause enrichment,1,1,"csv-wave","sec,cor,prf,mnt","scan-results.json",2,pending,"","" +3,Fix issues,Apply fixes with rollback-on-failure,2,2,"interactive","","review-report.json",3,pending,"","" +``` + +**Columns**: + +| Column | Phase | Description | +|--------|-------|-------------| +| `id` | Input | Unique task identifier (string) | +| `title` | Input | Short task title | +| `description` | Input | Detailed task description | +| `deps` | Input | Semicolon-separated dependency task IDs | +| `context_from` | Input | Semicolon-separated task IDs whose findings this task needs | +| `exec_mode` | Input | `csv-wave` or `interactive` | +| `dimension` | Input | Review dimensions (sec,cor,prf,mnt) | +| `target` | Input | Target path or pattern | +| `wave` | Computed | Wave number (computed by topological sort, 1-based) | +| `status` | Output | `pending` → `completed` / `failed` / `skipped` | +| `findings` | Output | Key discoveries or implementation notes (max 500 chars) | +| `error` | Output | Error message if failed (empty if success) | + +### Per-Wave CSV (Temporary) + +Each wave generates a temporary `wave-{N}.csv` with extra `prev_context` column (csv-wave tasks only). + +--- + +## Agent Registry (Interactive Agents) + +| Agent | Role File | Pattern | Responsibility | Position | +|-------|-----------|---------|----------------|----------| +| fixer | agents/fixer.md | 2.3 | Apply fixes with rollback-on-failure | post-wave | + +> **COMPACT PROTECTION**: Agent files are execution documents. When context compression occurs, **you MUST immediately `Read` the corresponding agent.md** to reload. + +--- + +## Output Artifacts + +| File | Purpose | Lifecycle | +|------|---------|-----------| +| `tasks.csv` | Master state — all tasks with status/findings | Updated after each wave | +| `wave-{N}.csv` | Per-wave input (temporary, csv-wave tasks only) | Created before wave, deleted after | +| `results.csv` | Final export of all task results | Created in Phase 4 | +| `discoveries.ndjson` | Shared exploration board (all agents, both modes) | Append-only, carries across waves | +| `context.md` | Human-readable execution report | Created in Phase 4 | +| `interactive/fixer-result.json` | Results from fixer task | Created per interactive task | +| `agents/registry.json` | Active interactive agent tracking | Updated on spawn/close | + +--- + +## Session Structure + +``` +.workflow/.csv-wave/{session-id}/ +├── tasks.csv # Master state (all tasks, both modes) +├── results.csv # Final results export +├── discoveries.ndjson # Shared discovery board (all agents) +├── context.md # Human-readable report +├── wave-{N}.csv # Temporary per-wave input (csv-wave only) +├── interactive/ # Interactive task artifacts +│ ├── fixer-result.json # Per-task results +│ └── cache-index.json # Shared exploration cache +└── agents/ + └── registry.json # Active interactive agent tracking +``` + +--- + +## Implementation + +### Session Initialization + +```javascript +// Parse arguments +const args = parseArguments($ARGUMENTS) +const AUTO_YES = args.yes || args.y || false +const CONCURRENCY = args.concurrency || args.c || 3 +const CONTINUE_SESSION = args.continue || null +const MODE = args.full ? 'full' : args.fix ? 'fix-only' : args.quick || args.q ? 'quick' : 'default' +const DIMENSIONS = args.dimensions || 'sec,cor,prf,mnt' +const TARGET = args._[0] || null + +// Generate session ID +const sessionId = `RV-${slugify(TARGET || 'review')}-${formatDate(new Date(), 'yyyy-MM-dd')}` +const sessionDir = `.workflow/.csv-wave/${sessionId}` + +// Create session structure +Bash({ command: `mkdir -p "${sessionDir}/interactive" "${sessionDir}/agents"` }) +Write(`${sessionDir}/discoveries.ndjson`, '') +Write(`${sessionDir}/agents/registry.json`, JSON.stringify({ active: [], closed: [] })) +``` + +--- + +### Phase 0: Pre-Wave Interactive + +**Objective**: Parse arguments, validate target, detect pipeline mode + +**Execution**: + +1. Parse command-line arguments for mode flags (--full, --fix, -q) +2. Extract target path/pattern from arguments +3. Validate target exists and resolve to file list +4. Detect pipeline mode based on flags +5. Store configuration in session metadata + +**Success Criteria**: +- Refined requirements available for Phase 1 decomposition +- Interactive agents closed, results stored + +--- + +### Phase 1: Requirement → CSV + Classification + +**Objective**: Generate task breakdown based on pipeline mode and create master CSV + +**Decomposition Rules**: + +| Mode | Tasks Generated | +|------|----------------| +| quick | SCAN-001 (quick scan only) | +| default | SCAN-001 → REV-001 | +| full | SCAN-001 → REV-001 → FIX-001 | +| fix-only | FIX-001 (requires existing review report) | + +**Classification Rules**: + +- Scanner tasks: `exec_mode=csv-wave` (one-shot toolchain + LLM scan) +- Reviewer tasks: `exec_mode=csv-wave` (one-shot deep analysis) +- Fixer tasks: `exec_mode=interactive` (multi-round with rollback) + +**Wave Computation**: Kahn's BFS topological sort with depth tracking (csv-wave tasks only). + +**User Validation**: Display task breakdown with wave + exec_mode assignment (skip if AUTO_YES). + +**Success Criteria**: +- tasks.csv created with valid schema, wave, and exec_mode assignments +- No circular dependencies +- User approved (or AUTO_YES) + +--- + +### Phase 2: Wave Execution Engine (Extended) + +**Objective**: Execute tasks wave-by-wave with hybrid mechanism support and cross-wave context propagation. + +```javascript +// Load master CSV +const masterCSV = readCSV(`${sessionDir}/tasks.csv`) +const maxWave = Math.max(...masterCSV.map(t => t.wave)) + +for (let wave = 1; wave <= maxWave; wave++) { + // Execute pre-wave interactive tasks + const preWaveTasks = masterCSV.filter(t => + t.wave === wave && t.exec_mode === 'interactive' && t.position === 'pre-wave' + ) + for (const task of preWaveTasks) { + const agent = spawn_agent({ + message: buildInteractivePrompt(task, sessionDir) + }) + const result = wait({ ids: [agent], timeout_ms: 600000 }) + close_agent({ id: agent }) + updateTaskStatus(task.id, result) + } + + // Build wave CSV (csv-wave tasks only) + const waveTasks = masterCSV.filter(t => t.wave === wave && t.exec_mode === 'csv-wave') + if (waveTasks.length > 0) { + // Inject prev_context from context_from tasks + for (const task of waveTasks) { + if (task.context_from) { + const contextIds = task.context_from.split(';') + const contextFindings = masterCSV + .filter(t => contextIds.includes(t.id)) + .map(t => `[Task ${t.id}] ${t.findings}`) + .join('\n\n') + task.prev_context = contextFindings + } + } + + // Write wave CSV + writeCSV(`${sessionDir}/wave-${wave}.csv`, waveTasks) + + // Execute wave + spawn_agents_on_csv({ + csv_path: `${sessionDir}/wave-${wave}.csv`, + instruction_path: `${sessionDir}/instructions/agent-instruction.md`, + concurrency: CONCURRENCY + }) + + // Merge results back to master + const waveResults = readCSV(`${sessionDir}/wave-${wave}.csv`) + for (const result of waveResults) { + const masterTask = masterCSV.find(t => t.id === result.id) + Object.assign(masterTask, result) + } + writeCSV(`${sessionDir}/tasks.csv`, masterCSV) + + // Cleanup wave CSV + Bash({ command: `rm "${sessionDir}/wave-${wave}.csv"` }) + } + + // Execute post-wave interactive tasks + const postWaveTasks = masterCSV.filter(t => + t.wave === wave && t.exec_mode === 'interactive' && t.position === 'post-wave' + ) + for (const task of postWaveTasks) { + const agent = spawn_agent({ + message: buildInteractivePrompt(task, sessionDir) + }) + const result = wait({ ids: [agent], timeout_ms: 600000 }) + close_agent({ id: agent }) + updateTaskStatus(task.id, result) + } + + // Check for failures and skip dependents + const failedTasks = masterCSV.filter(t => t.wave === wave && t.status === 'failed') + if (failedTasks.length > 0) { + skipDependents(masterCSV, failedTasks) + } +} +``` + +**Success Criteria**: +- All waves executed in order +- Both csv-wave and interactive tasks handled per wave +- Each wave's results merged into master CSV before next wave starts +- Dependent tasks skipped when predecessor failed +- discoveries.ndjson accumulated across all waves and mechanisms +- Interactive agent lifecycle tracked in registry.json + +--- + +### Phase 3: Post-Wave Interactive + +**Objective**: Generate final review report and fix summary + +**Execution**: + +1. Aggregate all findings from scan and review tasks +2. Generate comprehensive review report with metrics +3. If fixer ran, generate fix summary with success/failure rates +4. Write final reports to session directory + +**Success Criteria**: +- Post-wave interactive processing complete +- Interactive agents closed, results stored + +--- + +### Phase 4: Results Aggregation + +**Objective**: Generate final results and human-readable report. + +```javascript +// Export results.csv +const masterCSV = readCSV(`${sessionDir}/tasks.csv`) +writeCSV(`${sessionDir}/results.csv`, masterCSV) + +// Generate context.md +const contextMd = generateContextReport(masterCSV, sessionDir) +Write(`${sessionDir}/context.md`, contextMd) + +// Cleanup interactive agents +const registry = JSON.parse(Read(`${sessionDir}/agents/registry.json`)) +for (const agent of registry.active) { + close_agent({ id: agent.id }) +} +Write(`${sessionDir}/agents/registry.json`, JSON.stringify({ active: [], closed: registry.closed })) + +// Display summary +const summary = { + total: masterCSV.length, + completed: masterCSV.filter(t => t.status === 'completed').length, + failed: masterCSV.filter(t => t.status === 'failed').length, + skipped: masterCSV.filter(t => t.status === 'skipped').length +} +console.log(`Pipeline complete: ${summary.completed}/${summary.total} tasks completed`) +``` + +**Success Criteria**: +- results.csv exported (all tasks, both modes) +- context.md generated +- All interactive agents closed (registry.json cleanup) +- Summary displayed to user + +--- + +## Shared Discovery Board Protocol + +**Discovery Types**: + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `finding` | `file+line+dimension` | `{dimension, file, line, severity, title}` | Code issue discovered by scanner | +| `root_cause` | `finding_id` | `{finding_id, description, related_findings[]}` | Root cause analysis from reviewer | +| `fix_applied` | `file+line` | `{file, line, fix_strategy, status}` | Fix application result from fixer | +| `pattern` | `pattern_name` | `{pattern, files[], occurrences}` | Code pattern identified across files | + +**Discovery NDJSON Format**: + +```jsonl +{"ts":"2026-03-08T14:30:22Z","worker":"1","type":"finding","data":{"dimension":"sec","file":"src/auth.ts","line":42,"severity":"high","title":"SQL injection vulnerability"}} +{"ts":"2026-03-08T14:35:10Z","worker":"2","type":"root_cause","data":{"finding_id":"SEC-001","description":"Unsanitized user input in query","related_findings":["SEC-002"]}} +{"ts":"2026-03-08T14:40:05Z","worker":"3","type":"fix_applied","data":{"file":"src/auth.ts","line":42,"fix_strategy":"minimal","status":"fixed"}} +``` + +> Both csv-wave and interactive agents read/write the same discoveries.ndjson file. + +--- + +## Cross-Mechanism Context Bridging + +### Interactive Result → CSV Task + +When a pre-wave interactive task produces results needed by csv-wave tasks: + +```javascript +// 1. Interactive result stored in file +const resultFile = `${sessionDir}/interactive/${taskId}-result.json` + +// 2. Wave engine reads when building prev_context for csv-wave tasks +// If a csv-wave task has context_from referencing an interactive task: +// Read the interactive result file and include in prev_context +``` + +### CSV Result → Interactive Task + +When a post-wave interactive task needs CSV wave results: + +```javascript +// Option A: Include in spawn message +const csvFindings = readMasterCSV().filter(t => t.wave === currentWave && t.exec_mode === 'csv-wave') +const context = csvFindings.map(t => `## Task ${t.id}: ${t.title}\n${t.findings}`).join('\n\n') + +spawn_agent({ + message: `...\n### Wave ${currentWave} Results\n${context}\n...` +}) + +// Option B: Inject via send_input (if agent already running) +send_input({ + id: activeAgent, + message: `## Wave ${currentWave} Results\n${context}\n\nProceed with analysis.` +}) +``` + +--- + +## Error Handling + +| Error | Resolution | +|-------|------------| +| Circular dependency | Detect in wave computation, abort with error message | +| CSV agent timeout | Mark as failed in results, continue with wave | +| CSV agent failed | Mark as failed, skip dependent tasks in later waves | +| Interactive agent timeout | Urge convergence via send_input, then close if still timed out | +| Interactive agent failed | Mark as failed, skip dependents | +| Pre-wave interactive failed | Skip dependent csv-wave tasks in same wave | +| All agents in wave failed | Log error, offer retry or abort | +| CSV parse error | Validate CSV format before execution, show line number | +| discoveries.ndjson corrupt | Ignore malformed lines, continue with valid entries | +| Lifecycle leak | Cleanup all active agents via registry.json at end | +| Continue mode: no session found | List available sessions, prompt user to select | +| Target path invalid | AskUserQuestion for corrected path | +| Scanner finds 0 findings | Report clean, skip review + fix stages | + +--- + +## Core Rules + +1. **Start Immediately**: First action is session initialization, then Phase 0/1 +2. **Wave Order is Sacred**: Never execute wave N before wave N-1 completes and results are merged +3. **CSV is Source of Truth**: Master tasks.csv holds all state (both csv-wave and interactive) +4. **CSV First**: Default to csv-wave for tasks; only use interactive when interaction pattern requires it +5. **Context Propagation**: prev_context built from master CSV, not from memory +6. **Discovery Board is Append-Only**: Never clear, modify, or recreate discoveries.ndjson — both mechanisms share it +7. **Skip on Failure**: If a dependency failed, skip the dependent task (regardless of mechanism) +8. **Lifecycle Balance**: Every spawn_agent MUST have a matching close_agent (tracked in registry.json) +9. **Cleanup Temp Files**: Remove wave-{N}.csv after results are merged +10. **DO NOT STOP**: Continuous execution until all waves complete or all remaining tasks are skipped diff --git a/.codex/skills/team-review/agents/fixer.md b/.codex/skills/team-review/agents/fixer.md new file mode 100644 index 00000000..dd743031 --- /dev/null +++ b/.codex/skills/team-review/agents/fixer.md @@ -0,0 +1,360 @@ +# Fixer Agent + +Fix code based on reviewed findings. Load manifest, plan fix groups, apply with rollback-on-failure, verify. + +## Identity + +- **Type**: `code-generation` +- **Role File**: `~/.codex/agents/fixer.md` +- **Responsibility**: Code modification with rollback-on-failure + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Produce structured output following template +- Include file:line references in findings +- Apply fixes using Edit tool in dependency order +- Run tests after each fix +- Rollback on test failure (no retry) +- Mark dependent fixes as skipped if prerequisite failed + +### MUST NOT + +- Skip the MANDATORY FIRST STEPS role loading +- Produce unstructured output +- Exceed defined scope boundaries +- Retry failed fixes (rollback and move on) +- Apply fixes without running tests +- Modify files outside fix scope + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | File I/O | Load fix manifest, review report, source files | +| `Write` | File I/O | Write fix plan, execution results, summary | +| `Edit` | File modification | Apply code fixes | +| `Bash` | Shell execution | Run tests, verification tools, git operations | +| `Glob` | File discovery | Find test files, source files | +| `Grep` | Content search | Search for patterns in code | + +### Tool Usage Patterns + +**Read Pattern**: Load context files before fixing +``` +Read(".workflow/project-tech.json") +Read("/fix/fix-manifest.json") +Read("/review/review-report.json") +Read("") +``` + +**Write Pattern**: Generate artifacts after processing +``` +Write("/fix/fix-plan.json", ) +Write("/fix/execution-results.json", ) +Write("/fix/fix-summary.json", ) +``` + +--- + +## Execution + +### Phase 1: Context & Scope Resolution + +**Objective**: Load fix manifest, review report, and determine fixable findings + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Task description | Yes | Contains session path and input path | +| Fix manifest | Yes | /fix/fix-manifest.json | +| Review report | Yes | /review/review-report.json | +| Project tech | No | .workflow/project-tech.json | + +**Steps**: + +1. Extract session path and input path from task description +2. Load fix manifest (scope, source report path) +3. Load review report (findings with enrichment) +4. Filter fixable findings: severity in scope AND fix_strategy !== 'skip' +5. If 0 fixable → report complete immediately +6. Detect quick path: findings <= 5 AND no cross-file dependencies +7. Detect verification tools: + - tsc: tsconfig.json exists + - eslint: package.json contains eslint + - jest: package.json contains jest + - pytest: pyproject.toml exists + - semgrep: semgrep available +8. Load wisdom files from `/wisdom/` + +**Output**: Fixable findings list, quick_path flag, available verification tools + +--- + +### Phase 2: Plan Fixes + +**Objective**: Group findings, resolve dependencies, determine execution order + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Fixable findings | Yes | From Phase 1 | +| Fix dependencies | Yes | From review report enrichment | + +**Steps**: + +1. Group findings by primary file +2. Merge groups with cross-file dependencies (union-find algorithm) +3. Topological sort within each group (respect fix_dependencies, append cycles at end) +4. Sort groups by max severity (critical first) +5. Determine execution path: + - quick_path: <=5 findings AND <=1 group → single agent + - standard: one agent per group, in execution_order +6. Write fix plan to `/fix/fix-plan.json`: + ```json + { + "plan_id": "", + "quick_path": true|false, + "groups": [ + { + "id": "group-1", + "files": ["src/auth.ts"], + "findings": ["SEC-001", "SEC-002"], + "max_severity": "critical" + } + ], + "execution_order": ["group-1", "group-2"], + "total_findings": 10, + "total_groups": 2 + } + ``` + +**Output**: Fix plan with grouped findings and execution order + +--- + +### Phase 3: Execute Fixes + +**Objective**: Apply fixes with rollback-on-failure + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Fix plan | Yes | From Phase 2 | +| Source files | Yes | Files to modify | + +**Steps**: + +**Quick path**: Single code-developer agent for all findings +**Standard path**: One code-developer agent per group, in execution_order + +Agent prompt includes: +- Finding list (dependency-sorted) +- File contents (truncated 8K) +- Critical rules: + 1. Apply each fix using Edit tool in order + 2. After each fix, run related tests + 3. Tests PASS → finding is "fixed" + 4. Tests FAIL → `git checkout -- {file}` → mark "failed" → continue + 5. No retry on failure. Rollback and move on + 6. If finding depends on previously failed finding → mark "skipped" + +Agent execution: +```javascript +const agent = spawn_agent({ + message: `## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS +1. Read role definition: ~/.codex/agents/code-developer.md + +--- + +## Fix Group: {group.id} + +**Files**: {group.files.join(', ')} +**Findings**: {group.findings.length} + +### Findings (dependency-sorted): +{group.findings.map(f => ` +- ID: ${f.id} +- Severity: ${f.severity} +- Location: ${f.location.file}:${f.location.line} +- Description: ${f.description} +- Fix Strategy: ${f.fix_strategy} +- Dependencies: ${f.fix_dependencies.join(', ')} +`).join('\n')} + +### Critical Rules: +1. Apply each fix using Edit tool in order +2. After each fix, run related tests +3. Tests PASS → finding is "fixed" +4. Tests FAIL → git checkout -- {file} → mark "failed" → continue +5. No retry on failure. Rollback and move on +6. If finding depends on previously failed finding → mark "skipped" + +### Output Format: +Return JSON: +{ + "results": [ + {"id": "SEC-001", "status": "fixed|failed|skipped", "file": "src/auth.ts", "error": ""} + ] +} +` +}) + +const result = wait({ ids: [agent], timeout_ms: 600000 }) +close_agent({ id: agent }) +``` + +Parse agent response for structured JSON. Fallback: check git diff per file if no structured output. + +Write execution results to `/fix/execution-results.json`: +```json +{ + "fixed": ["SEC-001", "COR-003"], + "failed": ["SEC-002"], + "skipped": ["SEC-004"] +} +``` + +**Output**: Execution results with fixed/failed/skipped findings + +--- + +### Phase 4: Post-Fix Verification + +**Objective**: Run verification tools on modified files + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Execution results | Yes | From Phase 3 | +| Modified files | Yes | Files that were changed | +| Verification tools | Yes | From Phase 1 detection | + +**Steps**: + +1. Run available verification tools on modified files: + +| Tool | Command | Pass Criteria | +|------|---------|---------------| +| tsc | `npx tsc --noEmit` | 0 errors | +| eslint | `npx eslint ` | 0 errors | +| jest | `npx jest --passWithNoTests` | Tests pass | +| pytest | `pytest --tb=short` | Tests pass | +| semgrep | `semgrep --config auto --json` | 0 results | + +2. If verification fails critically → rollback last batch +3. Write verification results to `/fix/verify-results.json` +4. Generate fix summary: + ```json + { + "fix_id": "", + "fix_date": "", + "scope": "critical,high", + "total": 10, + "fixed": 7, + "failed": 2, + "skipped": 1, + "fix_rate": 0.7, + "verification": { + "tsc": "pass", + "eslint": "pass", + "jest": "pass" + } + } + ``` +5. Generate human-readable summary in `/fix/fix-summary.md` +6. Update `/.msg/meta.json` with fix results +7. Contribute discoveries to `/wisdom/` files + +**Output**: Fix summary with verification results + +--- + +## Inline Subagent Calls + +This agent may spawn utility subagents during its execution: + +### code-developer + +**When**: After fix plan is ready +**Agent File**: ~/.codex/agents/code-developer.md + +```javascript +const utility = spawn_agent({ + message: `### MANDATORY FIRST STEPS +1. Read: ~/.codex/agents/code-developer.md + +## Fix Group: {group.id} +[See Phase 3 prompt template above] +` +}) +const result = wait({ ids: [utility], timeout_ms: 600000 }) +close_agent({ id: utility }) +// Parse result and update execution results +``` + +### Result Handling + +| Result | Severity | Action | +|--------|----------|--------| +| Success | - | Integrate findings, continue | +| consensus_blocked | HIGH | Include in output with severity flag for orchestrator | +| consensus_blocked | MEDIUM | Include warning, continue | +| Timeout/Error | - | Continue without utility result, log warning | + +--- + +## Structured Output Template + +``` +## Summary +- Fixed X/Y findings (Z% success rate) +- Failed: A findings (rolled back) +- Skipped: B findings (dependency failures) + +## Findings +- SEC-001: Fixed SQL injection in src/auth.ts:42 +- SEC-002: Failed to fix XSS (tests failed, rolled back) +- SEC-004: Skipped (depends on SEC-002) + +## Verification Results +- tsc: PASS (0 errors) +- eslint: PASS (0 errors) +- jest: PASS (all tests passed) + +## Modified Files +- src/auth.ts: 2 fixes applied +- src/utils/sanitize.ts: 1 fix applied + +## Open Questions +1. SEC-002 fix caused test failures - manual review needed +2. Consider refactoring auth module for better security +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| Input file not found | Report in Open Questions, continue with available data | +| Scope ambiguity | Report in Open Questions, proceed with reasonable assumption | +| Processing failure | Output partial results with clear status indicator | +| Timeout approaching | Output current findings with "PARTIAL" status | +| Fix manifest missing | ERROR, cannot proceed without manifest | +| Review report missing | ERROR, cannot proceed without review | +| All fixes failed | Report failure, include rollback details | +| Verification tool unavailable | Skip verification, warn in output | +| Git operations fail | Report error, manual intervention needed | diff --git a/.codex/skills/team-review/instructions/agent-instruction.md b/.codex/skills/team-review/instructions/agent-instruction.md new file mode 100644 index 00000000..396d4413 --- /dev/null +++ b/.codex/skills/team-review/instructions/agent-instruction.md @@ -0,0 +1,102 @@ +## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS +1. Read shared discoveries: {session_folder}/discoveries.ndjson (if exists, skip if not) +2. Read project context: .workflow/project-tech.json (if exists) + +--- + +## Your Task + +**Task ID**: {id} +**Title**: {title} +**Description**: {description} +**Dimension**: {dimension} +**Target**: {target} + +### Previous Tasks' Findings (Context) +{prev_context} + +--- + +## Execution Protocol + +1. **Read discoveries**: Load {session_folder}/discoveries.ndjson for shared exploration findings +2. **Use context**: Apply previous tasks' findings from prev_context above +3. **Execute**: Perform your assigned role (scanner or reviewer) following the role-specific instructions below +4. **Share discoveries**: Append exploration findings to shared board: + ```bash + echo '{"ts":"","worker":"{id}","type":"","data":{...}}' >> {session_folder}/discoveries.ndjson + ``` +5. **Report result**: Return JSON via report_agent_job_result + +### Role-Specific Instructions + +**If you are a Scanner (SCAN-* task)**: +1. Extract session path and target from description +2. Resolve target files (glob pattern or directory → `**/*.{ts,tsx,js,jsx,py,go,java,rs}`) +3. If no source files found → report empty, complete task cleanly +4. Detect toolchain availability: + - tsc: `tsconfig.json` exists → COR dimension + - eslint: `.eslintrc*` or `eslint` in package.json → COR/MNT + - semgrep: `.semgrep.yml` exists → SEC dimension + - ruff: `pyproject.toml` + ruff available → SEC/COR/MNT + - mypy: mypy available + `pyproject.toml` → COR + - npmAudit: `package-lock.json` exists → SEC +5. Run detected tools in parallel via Bash backgrounding +6. Parse tool outputs into normalized findings with dimension, severity, file:line +7. Execute semantic scan via CLI: `ccw cli --tool gemini --mode analysis --rule analysis-review-code-quality` +8. Focus areas per dimension: + - SEC: Business logic vulnerabilities, privilege escalation, sensitive data flow, auth bypass + - COR: Logic errors, unhandled exception paths, state management bugs, race conditions + - PRF: Algorithm complexity, N+1 queries, unnecessary sync, memory leaks, missing caching + - MNT: Architectural coupling, abstraction leaks, convention violations, dead code +9. Merge toolchain + semantic findings, deduplicate (same file + line + dimension) +10. Assign dimension-prefixed IDs: SEC-001, COR-001, PRF-001, MNT-001 +11. Write scan results to session directory + +**If you are a Reviewer (REV-* task)**: +1. Extract session path and input path from description +2. Load scan results from previous task (via prev_context or session directory) +3. If scan results empty → report clean, complete immediately +4. Triage findings into deep_analysis (critical/high/medium, max 15) and pass_through (remaining) +5. Split deep_analysis into domain groups: + - Group A: Security + Correctness → Root cause tracing, fix dependencies, blast radius + - Group B: Performance + Maintainability → Optimization approaches, refactor tradeoffs +6. Execute parallel CLI agents for enrichment: `ccw cli --tool gemini --mode analysis --rule analysis-diagnose-bug-root-cause` +7. Request 6 enrichment fields per finding: + - root_cause: {description, related_findings[], is_symptom} + - impact: {scope: low/medium/high, affected_files[], blast_radius} + - optimization: {approach, alternative, tradeoff} + - fix_strategy: minimal / refactor / skip + - fix_complexity: low / medium / high + - fix_dependencies: finding IDs that must be fixed first +8. Merge enriched + pass_through findings +9. Cross-correlate: + - Critical files: file appears in >=2 dimensions + - Root cause groups: cluster findings sharing related_findings + - Optimization suggestions: from root cause groups + standalone enriched findings +10. Compute metrics: by_dimension, by_severity, dimension_severity_matrix, fixable_count +11. Write review report to session directory + +### Discovery Types to Share + +- `finding`: {dimension, file, line, severity, title} — Code issue discovered +- `root_cause`: {finding_id, description, related_findings[]} — Root cause analysis +- `pattern`: {pattern, files[], occurrences} — Code pattern identified + +--- + +## Output (report_agent_job_result) + +Return JSON: +{ + "id": "{id}", + "status": "completed" | "failed", + "findings": "Key discoveries and implementation notes (max 500 chars)", + "error": "" +} + +**Scanner findings format**: "Found X security issues (Y critical, Z high), A correctness bugs, B performance issues, C maintainability concerns. Toolchain: [tool results]. LLM scan: [semantic issues]." + +**Reviewer findings format**: "Analyzed X findings. Critical files: [files]. Root cause groups: [count]. Fixable: Y/X. Recommended fix scope: [scope]." diff --git a/.codex/skills/team-review/schemas/tasks-schema.md b/.codex/skills/team-review/schemas/tasks-schema.md new file mode 100644 index 00000000..b32db940 --- /dev/null +++ b/.codex/skills/team-review/schemas/tasks-schema.md @@ -0,0 +1,143 @@ +# Team Review — CSV Schema + +## Master CSV: tasks.csv + +### Column Definitions + +#### Input Columns (Set by Decomposer) + +| Column | Type | Required | Description | Example | +|--------|------|----------|-------------|---------| +| `id` | string | Yes | Unique task identifier | `"1"` | +| `title` | string | Yes | Short task title | `"Scan codebase"` | +| `description` | string | Yes | Detailed task description (self-contained) | `"Run toolchain + LLM scan on src/**/*.ts"` | +| `deps` | string | No | Semicolon-separated dependency task IDs | `"1;2"` | +| `context_from` | string | No | Semicolon-separated task IDs for context | `"1"` | +| `exec_mode` | enum | Yes | Execution mechanism: `csv-wave` or `interactive` | `"csv-wave"` | +| `dimension` | string | Yes | Review dimensions (comma-separated: sec,cor,prf,mnt) | `"sec,cor,prf,mnt"` | +| `target` | string | Yes | Target path or pattern for analysis | `"src/**/*.ts"` | + +#### Computed Columns (Set by Wave Engine) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `wave` | integer | Wave number (1-based, from topological sort) | `2` | +| `prev_context` | string | Aggregated findings from context_from tasks (per-wave CSV only) | `"[Task 1] Scan found 15 issues..."` | + +#### Output Columns (Set by Agent) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `status` | enum | `pending` → `completed` / `failed` / `skipped` | `"completed"` | +| `findings` | string | Key discoveries (max 500 chars) | `"Found 15 security issues, 8 correctness bugs"` | +| `error` | string | Error message if failed | `""` | + +--- + +### exec_mode Values + +| Value | Mechanism | Description | +|-------|-----------|-------------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot batch execution within wave | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round individual execution | + +Interactive tasks appear in master CSV for dependency tracking but are NOT included in wave-{N}.csv files. + +--- + +### Example Data + +```csv +id,title,description,deps,context_from,exec_mode,dimension,target,wave,status,findings,error +1,Scan codebase,Run toolchain + LLM scan on target files,,,"csv-wave","sec,cor,prf,mnt","src/**/*.ts",1,pending,"","" +2,Review findings,Deep analysis with root cause enrichment,1,1,"csv-wave","sec,cor,prf,mnt","scan-results.json",2,pending,"","" +3,Fix issues,Apply fixes with rollback-on-failure,2,2,"interactive","","review-report.json",3,pending,"","" +``` + +--- + +### Column Lifecycle + +``` +Decomposer (Phase 1) Wave Engine (Phase 2) Agent (Execution) +───────────────────── ──────────────────── ───────────────── +id ───────────► id ──────────► id +title ───────────► title ──────────► (reads) +description ───────────► description ──────────► (reads) +deps ───────────► deps ──────────► (reads) +context_from───────────► context_from──────────► (reads) +exec_mode ───────────► exec_mode ──────────► (reads) +dimension ───────────► dimension ──────────► (reads) +target ───────────► target ──────────► (reads) + wave ──────────► (reads) + prev_context ──────────► (reads) + status + findings + error +``` + +--- + +## Output Schema (JSON) + +Agent output via `report_agent_job_result` (csv-wave tasks): + +```json +{ + "id": "1", + "status": "completed", + "findings": "Found 15 security issues (3 critical, 5 high, 7 medium), 8 correctness bugs, 4 performance issues, 12 maintainability concerns. Toolchain: tsc (5 errors), eslint (8 warnings), semgrep (3 vulnerabilities). LLM scan: 26 semantic issues.", + "error": "" +} +``` + +Interactive tasks output via structured text or JSON written to `interactive/{id}-result.json`. + +--- + +## Discovery Types + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `finding` | `file+line+dimension` | `{dimension, file, line, severity, title}` | Code issue discovered by scanner | +| `root_cause` | `finding_id` | `{finding_id, description, related_findings[]}` | Root cause analysis from reviewer | +| `fix_applied` | `file+line` | `{file, line, fix_strategy, status}` | Fix application result from fixer | +| `pattern` | `pattern_name` | `{pattern, files[], occurrences}` | Code pattern identified across files | + +### Discovery NDJSON Format + +```jsonl +{"ts":"2026-03-08T14:30:22Z","worker":"1","type":"finding","data":{"dimension":"sec","file":"src/auth.ts","line":42,"severity":"high","title":"SQL injection vulnerability"}} +{"ts":"2026-03-08T14:35:10Z","worker":"2","type":"root_cause","data":{"finding_id":"SEC-001","description":"Unsanitized user input in query","related_findings":["SEC-002"]}} +{"ts":"2026-03-08T14:40:05Z","worker":"3","type":"fix_applied","data":{"file":"src/auth.ts","line":42,"fix_strategy":"minimal","status":"fixed"}} +``` + +> Both csv-wave and interactive agents read/write the same discoveries.ndjson file. + +--- + +## Cross-Mechanism Context Flow + +| Source | Target | Mechanism | +|--------|--------|-----------| +| CSV task findings | Interactive task | Injected via spawn message or send_input | +| Interactive task result | CSV task prev_context | Read from interactive/{id}-result.json | +| Any agent discovery | Any agent | Shared via discoveries.ndjson | + +--- + +## Validation Rules + +| Rule | Check | Error | +|------|-------|-------| +| Unique IDs | No duplicate `id` values | "Duplicate task ID: {id}" | +| Valid deps | All dep IDs exist in tasks | "Unknown dependency: {dep_id}" | +| No self-deps | Task cannot depend on itself | "Self-dependency: {id}" | +| No circular deps | Topological sort completes | "Circular dependency detected involving: {ids}" | +| context_from valid | All context IDs exist and in earlier waves | "Invalid context_from: {id}" | +| exec_mode valid | Value is `csv-wave` or `interactive` | "Invalid exec_mode: {value}" | +| Description non-empty | Every task has description | "Empty description for task: {id}" | +| Status enum | status ∈ {pending, completed, failed, skipped} | "Invalid status: {status}" | +| Cross-mechanism deps | Interactive→CSV deps resolve correctly | "Cross-mechanism dependency unresolvable: {id}" | +| Dimension valid | dimension ∈ {sec, cor, prf, mnt} or combinations | "Invalid dimension: {dimension}" | +| Target non-empty | Every task has target | "Empty target for task: {id}" | diff --git a/.codex/skills/team-roadmap-dev/SKILL.md b/.codex/skills/team-roadmap-dev/SKILL.md new file mode 100644 index 00000000..2e1cbd02 --- /dev/null +++ b/.codex/skills/team-roadmap-dev/SKILL.md @@ -0,0 +1,645 @@ +--- +name: team-roadmap-dev +description: Roadmap-driven development with phased execution pipeline. Coordinator discusses roadmap with user, then executes plan->execute->verify cycles per phase using CSV wave execution. +argument-hint: "[-y|--yes] [-c|--concurrency N] [--continue] \"\"" +allowed-tools: spawn_agents_on_csv, spawn_agent, wait, send_input, close_agent, Read, Write, Edit, Bash, Glob, Grep, AskUserQuestion +--- + +## Auto Mode + +When `--yes` or `-y`: Auto-confirm task decomposition, skip interactive validation, use defaults. + +# Roadmap-Driven Development + +## Usage + +```bash +$team-roadmap-dev "Build authentication module with JWT tokens" +$team-roadmap-dev -c 4 "Refactor payment processing to support multiple gateways" +$team-roadmap-dev -y "Add real-time notifications feature" +$team-roadmap-dev --continue "RD-auth-module-2026-03-08" +``` + +**Flags**: +- `-y, --yes`: Skip all confirmations (auto mode) +- `-c, --concurrency N`: Max concurrent agents within each wave (default: 3) +- `--continue`: Resume existing session + +**Output Directory**: `.workflow/.csv-wave/{session-id}/` +**Core Output**: `tasks.csv` (master state) + `results.csv` (final) + `discoveries.ndjson` (shared exploration) + `context.md` (human-readable report) + +--- + +## Overview + +Roadmap-driven development workflow that breaks down complex development tasks into phases, with each phase following a plan->execute->verify cycle. The coordinator discusses the roadmap with the user to establish phases and requirements, then executes each phase systematically using CSV wave execution for parallel task processing. + +**Execution Model**: Hybrid — CSV wave pipeline (primary) + individual agent spawn (secondary) + +``` +┌─────────────────────────────────────────────────────────────────────────┐ +│ ROADMAP-DRIVEN DEVELOPMENT WORKFLOW │ +├─────────────────────────────────────────────────────────────────────────┤ +│ │ +│ Phase 0: Roadmap Discussion (Interactive) │ +│ ├─ Discuss requirements and scope with user │ +│ ├─ Break down into logical phases │ +│ ├─ Define success criteria per phase │ +│ └─ Output: roadmap.md with phase definitions │ +│ │ +│ Phase 1: Requirement → CSV + Classification │ +│ ├─ For each roadmap phase: generate plan->execute->verify tasks │ +│ ├─ Classify tasks: csv-wave (execution) | interactive (planning) │ +│ ├─ Compute dependency waves (topological sort → depth grouping) │ +│ ├─ Generate tasks.csv with wave + exec_mode columns │ +│ └─ User validates task breakdown (skip if -y) │ +│ │ +│ Phase 2: Wave Execution Engine (Extended) │ +│ ├─ For each wave (1..N): │ +│ │ ├─ Execute pre-wave interactive tasks (planning) │ +│ │ ├─ Build wave CSV (filter csv-wave tasks for this wave) │ +│ │ ├─ Inject previous findings into prev_context column │ +│ │ ├─ spawn_agents_on_csv(wave CSV) │ +│ │ ├─ Execute post-wave interactive tasks (verification) │ +│ │ ├─ Merge all results into master tasks.csv │ +│ │ └─ Check: any failed? → skip dependents │ +│ └─ discoveries.ndjson shared across all modes (append-only) │ +│ │ +│ Phase 3: Results Aggregation │ +│ ├─ Export final results.csv │ +│ ├─ Generate context.md with all findings │ +│ ├─ Display summary: completed/failed/skipped per wave │ +│ └─ Offer: view results | retry failed | done │ +│ │ +└─────────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## Task Classification Rules + +Each task is classified by `exec_mode`: + +| exec_mode | Mechanism | Criteria | +|-----------|-----------|----------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot, structured I/O, no multi-round interaction | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round, clarification, inline utility | + +**Classification Decision**: + +| Task Property | Classification | +|---------------|---------------| +| Planning tasks (research, exploration, plan generation) | `interactive` | +| Execution tasks (code implementation, file modifications) | `csv-wave` | +| Verification tasks (testing, validation, gap detection) | `interactive` | +| Gap closure tasks (re-planning based on verification) | `interactive` | + +--- + +## CSV Schema + +### tasks.csv (Master State) + +```csv +id,title,description,deps,context_from,exec_mode,phase,role,wave,status,findings,error +PLAN-101,Phase 1 Planning,Research and plan for authentication module,,,"interactive",1,planner,1,pending,"","" +EXEC-101,Implement auth routes,Create Express routes for login/logout/register,PLAN-101,PLAN-101,"csv-wave",1,executor,2,pending,"","" +EXEC-102,Implement JWT middleware,Create JWT token generation and validation,PLAN-101,PLAN-101,"csv-wave",1,executor,2,pending,"","" +VERIFY-101,Verify Phase 1,Test and validate phase 1 implementation,"EXEC-101;EXEC-102","EXEC-101;EXEC-102","interactive",1,verifier,3,pending,"","" +``` + +**Columns**: + +| Column | Phase | Description | +|--------|-------|-------------| +| `id` | Input | Unique task identifier (string) | +| `title` | Input | Short task title | +| `description` | Input | Detailed task description | +| `deps` | Input | Semicolon-separated dependency task IDs | +| `context_from` | Input | Semicolon-separated task IDs whose findings this task needs | +| `exec_mode` | Input | `csv-wave` or `interactive` | +| `phase` | Input | Phase number (1-based) | +| `role` | Input | Role name: planner, executor, verifier | +| `wave` | Computed | Wave number (computed by topological sort, 1-based) | +| `status` | Output | `pending` → `completed` / `failed` / `skipped` | +| `findings` | Output | Key discoveries or implementation notes (max 500 chars) | +| `error` | Output | Error message if failed (empty if success) | + +### Per-Wave CSV (Temporary) + +Each wave generates a temporary `wave-{N}.csv` with extra `prev_context` column (csv-wave tasks only). + +--- + +## Agent Registry (Interactive Agents) + +| Agent | Role File | Pattern | Responsibility | Position | +|-------|-----------|---------|----------------|----------| +| roadmap-discusser | ~/.codex/agents/roadmap-discusser.md | 2.3 | Discuss roadmap with user, generate phase plan | pre-wave (Phase 0) | +| planner | ~/.codex/agents/roadmap-planner.md | 2.4 | Research and plan creation per phase | pre-wave (per phase) | +| verifier | ~/.codex/agents/roadmap-verifier.md | 2.4 | Test and validate phase implementation | post-wave (per phase) | + +> **COMPACT PROTECTION**: Agent files are execution documents. When context compression occurs, **you MUST immediately `Read` the corresponding agent.md** to reload. + +--- + +## Output Artifacts + +| File | Purpose | Lifecycle | +|------|---------|-----------| +| `tasks.csv` | Master state — all tasks with status/findings | Updated after each wave | +| `wave-{N}.csv` | Per-wave input (temporary, csv-wave tasks only) | Created before wave, deleted after | +| `results.csv` | Final export of all task results | Created in Phase 3 | +| `discoveries.ndjson` | Shared exploration board (all agents, both modes) | Append-only, carries across waves | +| `context.md` | Human-readable execution report | Created in Phase 3 | +| `interactive/{id}-result.json` | Results from interactive tasks | Created per interactive task | +| `agents/registry.json` | Active interactive agent tracking | Updated on spawn/close | +| `roadmap.md` | Phase definitions and requirements | Created in Phase 0 | +| `phase-{N}/IMPL_PLAN.md` | Implementation plan per phase | Created by planner | +| `phase-{N}/verification.md` | Verification results per phase | Created by verifier | + +--- + +## Session Structure + +``` +.workflow/.csv-wave/{session-id}/ +├── tasks.csv # Master state (all tasks, both modes) +├── results.csv # Final results export +├── discoveries.ndjson # Shared discovery board (all agents) +├── context.md # Human-readable report +├── roadmap.md # Phase definitions +├── wave-{N}.csv # Temporary per-wave input (csv-wave only) +├── interactive/ # Interactive task artifacts +│ ├── {id}-result.json # Per-task results +│ └── cache-index.json # Shared exploration cache +├── agents/ +│ └── registry.json # Active interactive agent tracking +└── phase-{N}/ # Per-phase artifacts + ├── IMPL_PLAN.md + ├── TODO_LIST.md + ├── .task/IMPL-*.json + └── verification.md +``` + +--- + +## Implementation + +### Session Initialization + +```javascript +// Parse arguments +const args = parseArguments($ARGUMENTS) +const autoYes = args.yes || args.y +const concurrency = args.concurrency || args.c || 3 +const continueMode = args.continue +const taskDescription = args._[0] + +// Generate session ID +const slug = taskDescription.toLowerCase().replace(/[^a-z0-9]+/g, '-').substring(0, 30) +const date = new Date().toISOString().split('T')[0] +const sessionId = `RD-${slug}-${date}` +const sessionDir = `.workflow/.csv-wave/${sessionId}` + +// Create session structure +Bash(`mkdir -p "${sessionDir}/interactive" "${sessionDir}/agents" "${sessionDir}/phase-1"`) + +// Initialize registry +Write(`${sessionDir}/agents/registry.json`, JSON.stringify({ + active: [], + closed: [], + created_at: new Date().toISOString() +}, null, 2)) + +// Initialize discoveries +Write(`${sessionDir}/discoveries.ndjson`, '') +``` + +--- + +### Phase 0: Roadmap Discussion (Interactive) + +**Objective**: Discuss roadmap with user and generate phase plan with requirements and success criteria. + +```javascript +// Spawn roadmap discusser +const discusser = spawn_agent({ + message: `### MANDATORY FIRST STEPS +1. Read: ~/.codex/agents/roadmap-discusser.md + +--- + +## Task Assignment + +**Goal**: Discuss roadmap with user and generate phase plan + +**Task Description**: ${taskDescription} + +**Session Directory**: ${sessionDir} + +**Deliverables**: +- roadmap.md with phase definitions, requirements, and success criteria +- Each phase should have: phase number, goal, requirements (REQ-IDs), success criteria + +**Instructions**: +1. Analyze task description to understand scope +2. Propose phase breakdown to user via AskUserQuestion +3. For each phase, clarify requirements and success criteria +4. Generate roadmap.md with structured phase definitions +5. Output result as JSON with roadmap_path and phase_count` +}) + +// Wait for completion +const result = wait({ ids: [discusser], timeout_ms: 600000 }) + +if (result.timed_out) { + send_input({ id: discusser, message: "Please finalize roadmap and output current plan." }) + const retry = wait({ ids: [discusser], timeout_ms: 120000 }) +} + +// Store result +const discusserOutput = JSON.parse(result.output) +Write(`${sessionDir}/interactive/DISCUSS-001-result.json`, JSON.stringify({ + task_id: "DISCUSS-001", + status: "completed", + findings: discusserOutput.summary, + roadmap_path: discusserOutput.roadmap_path, + phase_count: discusserOutput.phase_count, + timestamp: new Date().toISOString() +}, null, 2)) + +close_agent({ id: discusser }) + +// Load roadmap +const roadmap = Read(discusserOutput.roadmap_path) +const phases = parsePhases(roadmap) +``` + +**Success Criteria**: +- roadmap.md created with phase definitions +- Each phase has clear requirements and success criteria +- User approved phase breakdown + +--- + +### Phase 1: Requirement → CSV + Classification + +**Objective**: Generate task breakdown from roadmap phases, classify by exec_mode, compute waves. + +```javascript +// Read roadmap +const roadmapContent = Read(`${sessionDir}/roadmap.md`) +const phases = parseRoadmapPhases(roadmapContent) + +// Generate tasks for all phases +const allTasks = [] +let taskCounter = 1 + +for (const phase of phases) { + const phaseNum = phase.number + + // Planning task (interactive, pre-wave) + allTasks.push({ + id: `PLAN-${phaseNum}01`, + title: `Phase ${phaseNum} Planning`, + description: `Research and plan for: ${phase.goal}\n\nRequirements:\n${phase.requirements.join('\n')}\n\nSuccess Criteria:\n${phase.success_criteria.join('\n')}`, + deps: phaseNum > 1 ? `VERIFY-${phaseNum-1}01` : "", + context_from: phaseNum > 1 ? `VERIFY-${phaseNum-1}01` : "", + exec_mode: "interactive", + phase: phaseNum, + role: "planner", + wave: 0, // Computed later + status: "pending", + findings: "", + error: "" + }) + + // Execution tasks (csv-wave) - will be generated by planner + // Placeholder: planner will create EXEC-{phaseNum}01, EXEC-{phaseNum}02, etc. + + // Verification task (interactive, post-wave) + allTasks.push({ + id: `VERIFY-${phaseNum}01`, + title: `Phase ${phaseNum} Verification`, + description: `Test and validate phase ${phaseNum} implementation against success criteria:\n${phase.success_criteria.join('\n')}`, + deps: `PLAN-${phaseNum}01`, // Will be updated after execution tasks created + context_from: `PLAN-${phaseNum}01`, // Will be updated + exec_mode: "interactive", + phase: phaseNum, + role: "verifier", + wave: 0, // Computed later + status: "pending", + findings: "", + error: "" + }) +} + +// Compute waves via topological sort +const tasksWithWaves = computeWaves(allTasks) + +// Write master CSV +writeMasterCSV(`${sessionDir}/tasks.csv`, tasksWithWaves) + +// User validation (skip if autoYes) +if (!autoYes) { + const approval = AskUserQuestion({ + questions: [{ + question: `Generated ${tasksWithWaves.length} tasks across ${phases.length} phases. Proceed?`, + header: "Task Breakdown Validation", + multiSelect: false, + options: [ + { label: "Proceed", description: "Start execution" }, + { label: "Cancel", description: "Abort workflow" } + ] + }] + }) + + if (approval.answers[0] !== "Proceed") { + throw new Error("User cancelled workflow") + } +} +``` + +**Success Criteria**: +- tasks.csv created with valid schema, wave, and exec_mode assignments +- No circular dependencies +- User approved (or AUTO_YES) + +--- + +### Phase 2: Wave Execution Engine (Extended) + +**Objective**: Execute tasks wave-by-wave with hybrid mechanism support and cross-wave context propagation. + +```javascript +// Load master CSV +const masterCSV = readMasterCSV(`${sessionDir}/tasks.csv`) +const maxWave = Math.max(...masterCSV.map(t => t.wave)) + +for (let waveNum = 1; waveNum <= maxWave; waveNum++) { + console.log(`\n=== Executing Wave ${waveNum} ===\n`) + + // Get tasks for this wave + const waveTasks = masterCSV.filter(t => t.wave === waveNum && t.status === 'pending') + + // Separate by exec_mode + const interactiveTasks = waveTasks.filter(t => t.exec_mode === 'interactive') + const csvTasks = waveTasks.filter(t => t.exec_mode === 'csv-wave') + + // Execute pre-wave interactive tasks (planners) + for (const task of interactiveTasks.filter(t => t.role === 'planner')) { + const agent = spawn_agent({ + message: buildPlannerPrompt(task, sessionDir) + }) + + const result = wait({ ids: [agent], timeout_ms: 600000 }) + + if (result.timed_out) { + send_input({ id: agent, message: "Please finalize plan and output current results." }) + const retry = wait({ ids: [agent], timeout_ms: 120000 }) + } + + // Store result + Write(`${sessionDir}/interactive/${task.id}-result.json`, JSON.stringify({ + task_id: task.id, + status: "completed", + findings: parseFindings(result), + timestamp: new Date().toISOString() + }, null, 2)) + + close_agent({ id: agent }) + + // Update master CSV + updateTaskStatus(masterCSV, task.id, "completed", parseFindings(result)) + + // Planner generates execution tasks - read and add to master CSV + const planTasks = readPlanTasks(`${sessionDir}/phase-${task.phase}/.task/`) + for (const planTask of planTasks) { + masterCSV.push({ + id: planTask.id, + title: planTask.title, + description: planTask.description, + deps: task.id, + context_from: task.id, + exec_mode: "csv-wave", + phase: task.phase, + role: "executor", + wave: waveNum + 1, // Next wave + status: "pending", + findings: "", + error: "" + }) + } + } + + // Build wave CSV for csv-wave tasks + if (csvTasks.length > 0) { + const waveCSV = buildWaveCSV(csvTasks, masterCSV, sessionDir) + const waveCSVPath = `${sessionDir}/wave-${waveNum}.csv` + writeWaveCSV(waveCSVPath, waveCSV) + + // Execute CSV wave + spawn_agents_on_csv({ + csv_file_path: waveCSVPath, + instruction_file_path: `${sessionDir}/../instructions/executor-instruction.md`, + concurrency: concurrency + }) + + // Merge results back to master CSV + const waveResults = readWaveCSV(waveCSVPath) + for (const result of waveResults) { + updateTaskStatus(masterCSV, result.id, result.status, result.findings, result.error) + } + + // Cleanup temp wave CSV + Bash(`rm "${waveCSVPath}"`) + } + + // Execute post-wave interactive tasks (verifiers) + for (const task of interactiveTasks.filter(t => t.role === 'verifier')) { + const agent = spawn_agent({ + message: buildVerifierPrompt(task, sessionDir, masterCSV) + }) + + const result = wait({ ids: [agent], timeout_ms: 600000 }) + + if (result.timed_out) { + send_input({ id: agent, message: "Please finalize verification and output current results." }) + const retry = wait({ ids: [agent], timeout_ms: 120000 }) + } + + // Store result + const verificationResult = JSON.parse(result.output) + Write(`${sessionDir}/interactive/${task.id}-result.json`, JSON.stringify({ + task_id: task.id, + status: "completed", + findings: verificationResult.summary, + gaps_found: verificationResult.gaps || [], + timestamp: new Date().toISOString() + }, null, 2)) + + close_agent({ id: agent }) + + // Update master CSV + updateTaskStatus(masterCSV, task.id, "completed", verificationResult.summary) + + // Handle gaps (max 3 iterations) + if (verificationResult.gaps && verificationResult.gaps.length > 0) { + const gapIteration = countGapIterations(masterCSV, task.phase) + if (gapIteration < 3) { + // Create gap closure tasks + const gapTasks = createGapClosureTasks(verificationResult.gaps, task.phase, gapIteration) + masterCSV.push(...gapTasks) + } else { + console.log(`[WARNING] Max gap iterations (3) reached for phase ${task.phase}`) + } + } + } + + // Write updated master CSV + writeMasterCSV(`${sessionDir}/tasks.csv`, masterCSV) + + // Check for failures and skip dependents + const failedTasks = waveTasks.filter(t => t.status === 'failed') + if (failedTasks.length > 0) { + skipDependentTasks(masterCSV, failedTasks.map(t => t.id)) + } +} +``` + +**Success Criteria**: +- All waves executed in order +- Both csv-wave and interactive tasks handled per wave +- Each wave's results merged into master CSV before next wave starts +- Dependent tasks skipped when predecessor failed +- discoveries.ndjson accumulated across all waves and mechanisms +- Interactive agent lifecycle tracked in registry.json + +--- + +### Phase 3: Results Aggregation + +**Objective**: Generate final results and human-readable report. + +```javascript +// Load final master CSV +const finalCSV = readMasterCSV(`${sessionDir}/tasks.csv`) + +// Export results.csv +writeFinalResults(`${sessionDir}/results.csv`, finalCSV) + +// Generate context.md +const contextMd = generateContextReport(finalCSV, sessionDir) +Write(`${sessionDir}/context.md`, contextMd) + +// Cleanup active agents +const registry = JSON.parse(Read(`${sessionDir}/agents/registry.json`)) +for (const agent of registry.active) { + close_agent({ id: agent.id }) +} +registry.active = [] +Write(`${sessionDir}/agents/registry.json`, JSON.stringify(registry, null, 2)) + +// Display summary +const completed = finalCSV.filter(t => t.status === 'completed').length +const failed = finalCSV.filter(t => t.status === 'failed').length +const skipped = finalCSV.filter(t => t.status === 'skipped').length + +console.log(`\n=== Roadmap Development Complete ===`) +console.log(`Completed: ${completed}`) +console.log(`Failed: ${failed}`) +console.log(`Skipped: ${skipped}`) +console.log(`\nResults: ${sessionDir}/results.csv`) +console.log(`Report: ${sessionDir}/context.md`) + +// Offer next steps +const nextStep = AskUserQuestion({ + questions: [{ + question: "Roadmap Dev pipeline complete. What would you like to do?", + header: "Completion", + multiSelect: false, + options: [ + { label: "Archive & Clean (Recommended)", description: "Archive session, clean up tasks and team resources" }, + { label: "Keep Active", description: "Keep session active for follow-up work or inspection" }, + { label: "Export Results", description: "Export deliverables to a specified location, then clean" } + ] + }] +}) + +if (nextStep.answers[0] === "Archive & Clean (Recommended)") { + Bash(`tar -czf "${sessionDir}.tar.gz" "${sessionDir}" && rm -rf "${sessionDir}"`) + console.log(`Session archived to ${sessionDir}.tar.gz`) +} +``` + +**Success Criteria**: +- results.csv exported (all tasks, both modes) +- context.md generated +- All interactive agents closed (registry.json cleanup) +- Summary displayed to user + +--- + +## Shared Discovery Board Protocol + +All agents (both csv-wave and interactive) share a single `discoveries.ndjson` file for exploration findings. + +**Discovery Types**: + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `file_pattern` | `pattern` | `{pattern, files[], description}` | Code patterns discovered | +| `dependency` | `from+to` | `{from, to, type}` | Module dependencies | +| `risk` | `description` | `{description, severity, mitigation}` | Implementation risks | +| `test_gap` | `area` | `{area, description, priority}` | Testing gaps | + +**Write Protocol**: + +```bash +echo '{"ts":"2026-03-08T14:30:22Z","worker":"EXEC-101","type":"file_pattern","data":{"pattern":"auth middleware","files":["src/middleware/auth.ts"],"description":"JWT validation pattern"}}' >> ${sessionDir}/discoveries.ndjson +``` + +**Read Protocol**: + +```javascript +const discoveries = Read(`${sessionDir}/discoveries.ndjson`) + .split('\n') + .filter(line => line.trim()) + .map(line => JSON.parse(line)) +``` + +--- + +## Error Handling + +| Error | Resolution | +|-------|------------| +| Circular dependency | Detect in wave computation, abort with error message | +| CSV agent timeout | Mark as failed in results, continue with wave | +| CSV agent failed | Mark as failed, skip dependent tasks in later waves | +| Interactive agent timeout | Urge convergence via send_input, then close if still timed out | +| Interactive agent failed | Mark as failed, skip dependents | +| Pre-wave interactive failed | Skip dependent csv-wave tasks in same wave | +| All agents in wave failed | Log error, offer retry or abort | +| CSV parse error | Validate CSV format before execution, show line number | +| discoveries.ndjson corrupt | Ignore malformed lines, continue with valid entries | +| Lifecycle leak | Cleanup all active agents via registry.json at end | +| Continue mode: no session found | List available sessions, prompt user to select | +| project-tech.json missing | Invoke workflow:init skill | +| Verifier gaps persist (>3 iterations) | Report to user, ask for manual intervention | + +--- + +## Core Rules + +1. **Start Immediately**: First action is session initialization, then Phase 0 +2. **Wave Order is Sacred**: Never execute wave N before wave N-1 completes and results are merged +3. **CSV is Source of Truth**: Master tasks.csv holds all state (both csv-wave and interactive) +4. **CSV First**: Default to csv-wave for tasks; only use interactive when interaction pattern requires it +5. **Context Propagation**: prev_context built from master CSV, not from memory +6. **Discovery Board is Append-Only**: Never clear, modify, or recreate discoveries.ndjson — both mechanisms share it +7. **Skip on Failure**: If a dependency failed, skip the dependent task (regardless of mechanism) +8. **Lifecycle Balance**: Every spawn_agent MUST have a matching close_agent (tracked in registry.json) +9. **Cleanup Temp Files**: Remove wave-{N}.csv after results are merged +10. **DO NOT STOP**: Continuous execution until all waves complete or all remaining tasks are skipped diff --git a/.codex/skills/team-roadmap-dev/agents/roadmap-discusser.md b/.codex/skills/team-roadmap-dev/agents/roadmap-discusser.md new file mode 100644 index 00000000..aff02c3b --- /dev/null +++ b/.codex/skills/team-roadmap-dev/agents/roadmap-discusser.md @@ -0,0 +1,176 @@ +# Roadmap Discusser Agent + +Interactive agent for discussing roadmap with user and generating phase plan with requirements and success criteria. + +## Identity + +- **Type**: `interactive` +- **Role File**: `~/.codex/agents/roadmap-discusser.md` +- **Responsibility**: Roadmap discussion and phase planning + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Produce structured output following template +- Interact with user via AskUserQuestion +- Generate roadmap.md with phase definitions +- Include requirements (REQ-IDs) and success criteria per phase + +### MUST NOT + +- Skip the MANDATORY FIRST STEPS role loading +- Produce unstructured output +- Execute implementation tasks +- Skip user interaction + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `AskUserQuestion` | Human interaction | Clarify requirements, propose phase breakdown | +| `Read` | File I/O | Load project context | +| `Write` | File I/O | Generate roadmap.md | + +--- + +## Execution + +### Phase 1: Requirement Analysis + +**Objective**: Analyze task description and understand scope. + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Task description | Yes | User's task description from arguments | +| .workflow/project-tech.json | No | Project context if available | + +**Steps**: + +1. Read task description from spawn message +2. Load project context if available +3. Identify key requirements and scope +4. Detect complexity signals (multi-module, cross-cutting, integration) + +**Output**: Requirement analysis summary + +--- + +### Phase 2: Phase Breakdown Proposal + +**Objective**: Propose logical phase breakdown to user. + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Requirement analysis | Yes | From Phase 1 | + +**Steps**: + +1. Analyze requirements to identify logical phases +2. Propose phase breakdown (typically 2-5 phases) +3. For each phase, draft: + - Phase goal (one sentence) + - Key requirements (REQ-IDs) + - Success criteria (measurable) +4. Present to user via AskUserQuestion: + ```javascript + AskUserQuestion({ + questions: [{ + question: "Proposed phase breakdown:\n\nPhase 1: [goal]\n- REQ-001: [requirement]\n- Success: [criteria]\n\nPhase 2: [goal]\n...\n\nApprove or request changes?", + header: "Roadmap Discussion", + multiSelect: false, + options: [ + { label: "Approve", description: "Proceed with this breakdown" }, + { label: "Modify", description: "Request changes to phases" }, + { label: "Cancel", description: "Abort workflow" } + ] + }] + }) + ``` +5. If user requests modifications, iterate on phase breakdown + +**Output**: User-approved phase breakdown + +--- + +### Phase 3: Roadmap Generation + +**Objective**: Generate roadmap.md with structured phase definitions. + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Approved phase breakdown | Yes | From Phase 2 | + +**Steps**: + +1. Format roadmap.md with structure: + ```markdown + # Roadmap: [Task Title] + + ## Overview + [Task description and scope] + + ## Phase 1: [Phase Goal] + + ### Requirements + - REQ-101: [Requirement description] + - REQ-102: [Requirement description] + + ### Success Criteria + - [Measurable criterion 1] + - [Measurable criterion 2] + + ## Phase 2: [Phase Goal] + ... + ``` +2. Write roadmap.md to session directory +3. Prepare output JSON with roadmap path and phase count + +**Output**: roadmap.md file + JSON result + +--- + +## Structured Output Template + +``` +## Summary +- Generated roadmap with [N] phases for [task description] + +## Findings +- Phase breakdown approved by user +- [N] phases defined with requirements and success criteria +- Roadmap written to: [path] + +## Deliverables +- File: [session]/roadmap.md + Content: Phase definitions with REQ-IDs and success criteria + +## Output JSON +{ + "roadmap_path": "[session]/roadmap.md", + "phase_count": [N], + "summary": "Generated roadmap with [N] phases" +} +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| User cancels | Output partial roadmap, mark as cancelled | +| Project context not found | Continue without project context, note in findings | +| User requests too many phases (>10) | Warn about complexity, suggest consolidation | +| Ambiguous requirements | Ask clarifying questions via AskUserQuestion | diff --git a/.codex/skills/team-roadmap-dev/agents/roadmap-planner.md b/.codex/skills/team-roadmap-dev/agents/roadmap-planner.md new file mode 100644 index 00000000..b5ea890d --- /dev/null +++ b/.codex/skills/team-roadmap-dev/agents/roadmap-planner.md @@ -0,0 +1,194 @@ +# Roadmap Planner Agent + +Interactive agent for research and plan creation per roadmap phase. Gathers codebase context via CLI exploration, then generates wave-based execution plans. + +## Identity + +- **Type**: `interactive` +- **Role File**: `~/.codex/agents/roadmap-planner.md` +- **Responsibility**: Phase planning and task decomposition + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Produce structured output following template +- Use CLI tools for codebase exploration +- Generate IMPL_PLAN.md and task JSON files +- Define convergence criteria per task + +### MUST NOT + +- Skip the MANDATORY FIRST STEPS role loading +- Execute implementation tasks +- Skip CLI exploration step +- Generate tasks without convergence criteria + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Bash` | CLI execution | Run ccw cli for exploration and planning | +| `Read` | File I/O | Load roadmap, context, prior summaries | +| `Write` | File I/O | Generate plan artifacts | +| `Glob` | File search | Find relevant files | + +--- + +## Execution + +### Phase 1: Context Loading + +**Objective**: Load phase requirements and prior context. + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| roadmap.md | Yes | Phase definitions from session | +| config.json | Yes | Session configuration | +| Prior summaries | No | Previous phase results | +| discoveries.ndjson | No | Shared exploration findings | + +**Steps**: + +1. Read roadmap.md, extract phase goal, requirements (REQ-IDs), success criteria +2. Read config.json for depth setting (quick/standard/comprehensive) +3. Load prior phase summaries for dependency context +4. Detect gap closure mode (task description contains "Gap closure") + +**Output**: Phase context loaded + +--- + +### Phase 2: Codebase Exploration + +**Objective**: Explore codebase to understand implementation context. + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Phase requirements | Yes | From Phase 1 | + +**Steps**: + +1. Launch CLI exploration with phase requirements: + ```bash + ccw cli -p "PURPOSE: Explore codebase for phase requirements + TASK: • Identify files needing modification • Map patterns and dependencies • Assess test infrastructure • Identify risks + MODE: analysis + CONTEXT: @**/* | Memory: Phase goal: ${phaseGoal} + EXPECTED: Structured exploration results with file lists, patterns, risks + CONSTRAINTS: Read-only analysis" --tool gemini --mode analysis + ``` +2. Wait for CLI completion (run_in_background: false) +3. Parse exploration results +4. Write context.md combining roadmap requirements + exploration results + +**Output**: context.md with exploration findings + +--- + +### Phase 3: Plan Generation + +**Objective**: Generate wave-based execution plan with task breakdown. + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| context.md | Yes | From Phase 2 | + +**Steps**: + +1. Load context.md +2. Create output directory: phase-{N}/.task/ +3. Delegate to CLI planning tool: + ```bash + ccw cli -p "PURPOSE: Generate wave-based execution plan for phase ${phaseNum} + TASK: • Break down requirements into tasks • Define convergence criteria • Build dependency graph • Assign waves + MODE: write + CONTEXT: @${contextMd} | Memory: ${priorSummaries} + EXPECTED: IMPL_PLAN.md + IMPL-*.json files + TODO_LIST.md + CONSTRAINTS: <= 10 tasks | Valid DAG | Measurable convergence criteria" --tool gemini --mode write + ``` +4. Wait for CLI completion +5. CLI tool produces: IMPL_PLAN.md, .task/IMPL-*.json, TODO_LIST.md +6. If gap closure: only create tasks for gaps, starting from next available ID + +**Output**: IMPL_PLAN.md + task JSON files + +--- + +### Phase 4: Self-Validation + +**Objective**: Validate generated plan for completeness and correctness. + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| IMPL_PLAN.md | Yes | From Phase 3 | +| .task/IMPL-*.json | Yes | Task definitions | + +**Steps**: + +1. Check task JSON files exist (>= 1 IMPL-*.json found) +2. Validate required fields: id, title, description, files, implementation, convergence +3. Check convergence criteria (each task has >= 1 criterion) +4. Validate no self-dependency (task.id not in task.depends_on) +5. Validate all deps valid (every depends_on ID exists) +6. Check IMPL_PLAN.md exists (generate minimal version if missing) +7. Compute wave structure from dependency graph for reporting + +**Output**: Validation report + wave structure + +--- + +## Structured Output Template + +``` +## Summary +- Generated implementation plan for phase {phase} with {N} tasks across {M} waves + +## Findings +- Exploration identified {X} files needing modification +- Key patterns: [pattern list] +- Risks: [risk list] +- Task breakdown validated with no circular dependencies + +## Deliverables +- File: phase-{N}/IMPL_PLAN.md + Content: Wave-based execution plan +- File: phase-{N}/.task/IMPL-*.json + Content: Task definitions with convergence criteria +- File: phase-{N}/TODO_LIST.md + Content: Task checklist + +## Output JSON +{ + "plan_path": "phase-{N}/IMPL_PLAN.md", + "task_count": {N}, + "wave_count": {M}, + "files_affected": [file list], + "summary": "Generated plan with {N} tasks" +} +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| CLI exploration fails | Use fallback file search, note limitation | +| CLI planning fails | Generate minimal plan manually, warn user | +| Circular dependency detected | Remove cycle, log warning | +| No convergence criteria | Add default criteria, log warning | +| Task count exceeds 10 | Consolidate tasks, warn about complexity | diff --git a/.codex/skills/team-roadmap-dev/agents/roadmap-verifier.md b/.codex/skills/team-roadmap-dev/agents/roadmap-verifier.md new file mode 100644 index 00000000..7b901dc2 --- /dev/null +++ b/.codex/skills/team-roadmap-dev/agents/roadmap-verifier.md @@ -0,0 +1,221 @@ +# Roadmap Verifier Agent + +Interactive agent for testing and validating phase implementation against success criteria. Identifies gaps and triggers gap closure if needed. + +## Identity + +- **Type**: `interactive` +- **Role File**: `~/.codex/agents/roadmap-verifier.md` +- **Responsibility**: Phase verification and gap detection + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Produce structured output following template +- Test implementation against success criteria +- Identify gaps with specific remediation steps +- Limit gap closure iterations to 3 per phase + +### MUST NOT + +- Skip the MANDATORY FIRST STEPS role loading +- Execute implementation tasks +- Skip testing step +- Approve phase with unmet success criteria without documenting gaps + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Bash` | CLI execution | Run tests, linters, build commands | +| `Read` | File I/O | Load implementation, success criteria | +| `Write` | File I/O | Generate verification report | +| `Glob` | File search | Find test files | + +--- + +## Execution + +### Phase 1: Context Loading + +**Objective**: Load phase implementation and success criteria. + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| roadmap.md | Yes | Phase success criteria | +| Execution task findings | Yes | From prev_context | +| discoveries.ndjson | No | Shared exploration findings | + +**Steps**: + +1. Read roadmap.md, extract phase success criteria +2. Load execution task findings from prev_context +3. Read discoveries.ndjson for implementation notes +4. Identify files modified during execution + +**Output**: Verification context loaded + +--- + +### Phase 2: Testing Execution + +**Objective**: Run tests and validation checks. + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Modified files | Yes | From Phase 1 | +| Test files | No | Discovered via Glob | + +**Steps**: + +1. Identify test files related to modified code +2. Run relevant tests: + ```bash + npm test -- [test-pattern] + ``` +3. Run linter/type checker: + ```bash + npm run lint + npm run type-check + ``` +4. Check build succeeds: + ```bash + npm run build + ``` +5. Collect test results, errors, warnings + +**Output**: Test execution results + +--- + +### Phase 3: Gap Analysis + +**Objective**: Compare implementation against success criteria and identify gaps. + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Success criteria | Yes | From roadmap.md | +| Test results | Yes | From Phase 2 | +| Implementation findings | Yes | From execution tasks | + +**Steps**: + +1. For each success criterion: + - Check if met by implementation + - Check if validated by tests + - Document status: met / partial / unmet +2. Identify gaps: + - Missing functionality + - Failing tests + - Unmet success criteria +3. For each gap, define: + - Gap description + - Severity (critical / high / medium / low) + - Remediation steps +4. Check gap closure iteration count (max 3) + +**Output**: Gap analysis with remediation steps + +--- + +### Phase 4: Verification Report + +**Objective**: Generate verification report and output results. + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Gap analysis | Yes | From Phase 3 | + +**Steps**: + +1. Generate verification.md: + ```markdown + # Phase {N} Verification + + ## Success Criteria Status + - [✓] Criterion 1: Met + - [✗] Criterion 2: Unmet - [gap description] + + ## Test Results + - Tests passed: {X}/{Y} + - Build status: [success/failed] + - Linter warnings: {Z} + + ## Gaps Identified + ### Gap 1: [Description] + - Severity: [critical/high/medium/low] + - Remediation: [steps] + + ## Recommendation + [Approve / Gap Closure Required] + ``` +2. Write verification.md to phase directory +3. Prepare output JSON with gap list + +**Output**: verification.md + JSON result + +--- + +## Structured Output Template + +``` +## Summary +- Phase {phase} verification complete: {X}/{Y} success criteria met + +## Findings +- Tests passed: {X}/{Y} +- Build status: [success/failed] +- Gaps identified: {N} ([critical/high/medium/low] breakdown) + +## Gaps +- Gap 1: [description] (severity: [level]) + Remediation: [steps] +- Gap 2: [description] (severity: [level]) + Remediation: [steps] + +## Deliverables +- File: phase-{N}/verification.md + Content: Verification report with gap analysis + +## Output JSON +{ + "verification_path": "phase-{N}/verification.md", + "criteria_met": {X}, + "criteria_total": {Y}, + "gaps": [ + { + "description": "[gap description]", + "severity": "[critical/high/medium/low]", + "remediation": "[steps]" + } + ], + "recommendation": "approve" | "gap_closure_required", + "summary": "Phase {phase} verification: {X}/{Y} criteria met" +} +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| Tests fail to run | Document as gap, continue verification | +| Build fails | Mark as critical gap, recommend gap closure | +| No test files found | Note in findings, continue with manual verification | +| Gap closure iterations exceed 3 | Report to user, recommend manual intervention | +| Success criteria ambiguous | Document interpretation, ask for clarification | diff --git a/.codex/skills/team-roadmap-dev/instructions/executor-instruction.md b/.codex/skills/team-roadmap-dev/instructions/executor-instruction.md new file mode 100644 index 00000000..ce9e1f2e --- /dev/null +++ b/.codex/skills/team-roadmap-dev/instructions/executor-instruction.md @@ -0,0 +1,55 @@ +## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS +1. Read shared discoveries: {session_folder}/discoveries.ndjson (if exists, skip if not) +2. Read project context: .workflow/project-tech.json (if exists) +3. Read implementation plan: {session_folder}/phase-{phase}/IMPL_PLAN.md +4. Read task details: {session_folder}/phase-{phase}/.task/{id}.json (if exists) + +--- + +## Your Task + +**Task ID**: {id} +**Title**: {title} +**Description**: {description} +**Phase**: {phase} +**Role**: {role} + +### Previous Tasks' Findings (Context) +{prev_context} + +--- + +## Execution Protocol + +1. **Read discoveries**: Load {session_folder}/discoveries.ndjson for shared exploration findings +2. **Use context**: Apply previous tasks' findings from prev_context above +3. **Execute**: Implement the task following the implementation plan and task details + - Read target files listed in description + - Apply changes following project conventions + - Validate changes compile/lint correctly + - Run relevant tests if available +4. **Share discoveries**: Append exploration findings to shared board: + ```bash + echo '{"ts":"","worker":"{id}","type":"","data":{...}}' >> {session_folder}/discoveries.ndjson + ``` +5. **Report result**: Return JSON via report_agent_job_result + +### Discovery Types to Share +- `file_pattern`: `{pattern, files[], description}` — Code patterns discovered +- `dependency`: `{from, to, type}` — Module dependencies identified +- `risk`: `{description, severity, mitigation}` — Implementation risks +- `test_gap`: `{area, description, priority}` — Testing gaps identified + +--- + +## Output (report_agent_job_result) + +Return JSON: +{ + "id": "{id}", + "status": "completed" | "failed", + "findings": "Key discoveries and implementation notes (max 500 chars)", + "error": "" +} diff --git a/.codex/skills/team-roadmap-dev/schemas/tasks-schema.md b/.codex/skills/team-roadmap-dev/schemas/tasks-schema.md new file mode 100644 index 00000000..d654196b --- /dev/null +++ b/.codex/skills/team-roadmap-dev/schemas/tasks-schema.md @@ -0,0 +1,144 @@ +# Roadmap-Driven Development — CSV Schema + +## Master CSV: tasks.csv + +### Column Definitions + +#### Input Columns (Set by Decomposer) + +| Column | Type | Required | Description | Example | +|--------|------|----------|-------------|---------| +| `id` | string | Yes | Unique task identifier | `"PLAN-101"` | +| `title` | string | Yes | Short task title | `"Phase 1 Planning"` | +| `description` | string | Yes | Detailed task description (self-contained) | `"Research and plan for authentication module..."` | +| `deps` | string | No | Semicolon-separated dependency task IDs | `"PLAN-101"` | +| `context_from` | string | No | Semicolon-separated task IDs for context | `"PLAN-101"` | +| `exec_mode` | enum | Yes | Execution mechanism: `csv-wave` or `interactive` | `"csv-wave"` | +| `phase` | integer | Yes | Phase number (1-based) | `1` | +| `role` | enum | Yes | Role name: `planner`, `executor`, `verifier` | `"executor"` | + +#### Computed Columns (Set by Wave Engine) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `wave` | integer | Wave number (1-based, from topological sort) | `2` | +| `prev_context` | string | Aggregated findings from context_from tasks (per-wave CSV only) | `"[PLAN-101] Created implementation plan..."` | + +#### Output Columns (Set by Agent) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `status` | enum | `pending` → `completed` / `failed` / `skipped` | `"completed"` | +| `findings` | string | Key discoveries (max 500 chars) | `"Implemented JWT middleware in src/middleware/auth.ts..."` | +| `error` | string | Error message if failed | `""` | + +--- + +### exec_mode Values + +| Value | Mechanism | Description | +|-------|-----------|-------------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot batch execution within wave | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round individual execution | + +Interactive tasks appear in master CSV for dependency tracking but are NOT included in wave-{N}.csv files. + +--- + +### Example Data + +```csv +id,title,description,deps,context_from,exec_mode,phase,role,wave,status,findings,error +PLAN-101,Phase 1 Planning,Research and plan for authentication module,,,"interactive",1,planner,1,pending,"","" +EXEC-101,Implement auth routes,Create Express routes for login/logout/register,PLAN-101,PLAN-101,"csv-wave",1,executor,2,pending,"","" +EXEC-102,Implement JWT middleware,Create JWT token generation and validation,PLAN-101,PLAN-101,"csv-wave",1,executor,2,pending,"","" +VERIFY-101,Verify Phase 1,Test and validate phase 1 implementation,"EXEC-101;EXEC-102","EXEC-101;EXEC-102","interactive",1,verifier,3,pending,"","" +``` + +--- + +### Column Lifecycle + +``` +Decomposer (Phase 1) Wave Engine (Phase 2) Agent (Execution) +───────────────────── ──────────────────── ───────────────── +id ───────────► id ──────────► id +title ───────────► title ──────────► (reads) +description ───────────► description ──────────► (reads) +deps ───────────► deps ──────────► (reads) +context_from───────────► context_from──────────► (reads) +exec_mode ───────────► exec_mode ──────────► (reads) +phase ───────────► phase ──────────► (reads) +role ───────────► role ──────────► (reads) + wave ──────────► (reads) + prev_context ──────────► (reads) + status + findings + error +``` + +--- + +## Output Schema (JSON) + +Agent output via `report_agent_job_result` (csv-wave tasks): + +```json +{ + "id": "EXEC-101", + "status": "completed", + "findings": "Implemented authentication routes in src/routes/auth.ts with login, logout, and register endpoints. Added input validation and error handling.", + "error": "" +} +``` + +Interactive tasks output via structured text or JSON written to `interactive/{id}-result.json`. + +--- + +## Discovery Types + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `file_pattern` | `pattern` | `{pattern, files[], description}` | Code patterns discovered during exploration | +| `dependency` | `from+to` | `{from, to, type}` | Module dependencies identified | +| `risk` | `description` | `{description, severity, mitigation}` | Implementation risks and concerns | +| `test_gap` | `area` | `{area, description, priority}` | Testing gaps identified during verification | + +### Discovery NDJSON Format + +```jsonl +{"ts":"2026-03-08T14:30:22Z","worker":"EXEC-101","type":"file_pattern","data":{"pattern":"auth middleware","files":["src/middleware/auth.ts"],"description":"JWT validation pattern"}} +{"ts":"2026-03-08T14:35:10Z","worker":"EXEC-102","type":"dependency","data":{"from":"auth.ts","to":"jwt.ts","type":"import"}} +{"ts":"2026-03-08T15:20:45Z","worker":"VERIFY-101","type":"test_gap","data":{"area":"token refresh","description":"No tests for token refresh flow","priority":"high"}} +``` + +> Both csv-wave and interactive agents read/write the same discoveries.ndjson file. + +--- + +## Cross-Mechanism Context Flow + +| Source | Target | Mechanism | +|--------|--------|-----------| +| CSV task findings | Interactive task | Injected via spawn message or send_input | +| Interactive task result | CSV task prev_context | Read from interactive/{id}-result.json | +| Any agent discovery | Any agent | Shared via discoveries.ndjson | + +--- + +## Validation Rules + +| Rule | Check | Error | +|------|-------|-------| +| Unique IDs | No duplicate `id` values | "Duplicate task ID: {id}" | +| Valid deps | All dep IDs exist in tasks | "Unknown dependency: {dep_id}" | +| No self-deps | Task cannot depend on itself | "Self-dependency: {id}" | +| No circular deps | Topological sort completes | "Circular dependency detected involving: {ids}" | +| context_from valid | All context IDs exist and in earlier waves | "Invalid context_from: {id}" | +| exec_mode valid | Value is `csv-wave` or `interactive` | "Invalid exec_mode: {value}" | +| Description non-empty | Every task has description | "Empty description for task: {id}" | +| Status enum | status ∈ {pending, completed, failed, skipped} | "Invalid status: {status}" | +| Cross-mechanism deps | Interactive→CSV deps resolve correctly | "Cross-mechanism dependency unresolvable: {id}" | +| Role valid | role ∈ {planner, executor, verifier} | "Invalid role: {role}" | +| Phase valid | phase >= 1 | "Invalid phase: {phase}" | diff --git a/.codex/skills/team-tech-debt/SKILL.md b/.codex/skills/team-tech-debt/SKILL.md new file mode 100644 index 00000000..48678f38 --- /dev/null +++ b/.codex/skills/team-tech-debt/SKILL.md @@ -0,0 +1,670 @@ +--- +name: team-tech-debt +description: Systematic tech debt governance with CSV wave pipeline. Scans codebase for tech debt across 5 dimensions, assesses severity with priority matrix, plans phased remediation, executes fixes in worktree, validates with 4-layer checks. Supports scan/remediate/targeted pipeline modes with fix-verify GC loop. +argument-hint: "[-y|--yes] [-c|--concurrency N] [--continue] [--mode=scan|remediate|targeted] \"scope or description\"" +allowed-tools: spawn_agents_on_csv, spawn_agent, wait, send_input, close_agent, Read, Write, Edit, Bash, Glob, Grep, AskUserQuestion +--- + +## Auto Mode + +When `--yes` or `-y`: Auto-confirm task decomposition, skip interactive validation, use defaults. + +# Team Tech Debt + +## Usage + +```bash +$team-tech-debt "Scan and fix tech debt in src/ module" +$team-tech-debt --mode=scan "Audit codebase for tech debt" +$team-tech-debt --mode=targeted "Fix known TODO/FIXME items in auth module" +$team-tech-debt -c 4 -y "Full remediation pipeline for entire project" +$team-tech-debt --continue "td-auth-debt-20260308" +``` + +**Flags**: +- `-y, --yes`: Skip all confirmations (auto mode) +- `-c, --concurrency N`: Max concurrent agents within each wave (default: 3) +- `--continue`: Resume existing session +- `--mode=scan`: Scan and assess only, no fixes +- `--mode=targeted`: Skip scan/assess, direct fix path for known debt +- `--mode=remediate`: Full pipeline (default) + +**Output Directory**: `.workflow/.csv-wave/{session-id}/` +**Core Output**: `tasks.csv` (master state) + `results.csv` (final) + `discoveries.ndjson` (shared exploration) + `context.md` (human-readable report) + +--- + +## Overview + +Systematic tech debt governance: scan -> assess -> plan -> fix -> validate. Five specialized worker roles execute as CSV wave agents, with interactive agents for plan approval checkpoints and fix-verify GC loops. + +**Execution Model**: Hybrid -- CSV wave pipeline (primary) + individual agent spawn (secondary) + +``` ++-------------------------------------------------------------------+ +| TEAM TECH DEBT WORKFLOW | ++-------------------------------------------------------------------+ +| | +| Phase 0: Pre-Wave Interactive (Requirement Clarification) | +| +- Parse mode (scan/remediate/targeted) | +| +- Clarify scope and focus areas | +| +- Output: pipeline mode + scope for decomposition | +| | +| Phase 1: Requirement -> CSV + Classification | +| +- Select pipeline mode (scan/remediate/targeted) | +| +- Build task chain with fixed role assignments | +| +- Classify tasks: csv-wave | interactive (exec_mode) | +| +- Compute dependency waves (linear chain) | +| +- Generate tasks.csv with wave + exec_mode columns | +| +- User validates task breakdown (skip if -y) | +| | +| Phase 2: Wave Execution Engine (Extended) | +| +- For each wave (1..N): | +| | +- Execute pre-wave interactive tasks (plan approval) | +| | +- Build wave CSV (filter csv-wave tasks for this wave) | +| | +- Inject previous findings into prev_context column | +| | +- spawn_agents_on_csv(wave CSV) | +| | +- Execute post-wave interactive tasks (if any) | +| | +- Merge all results into master tasks.csv | +| | +- Check: any failed? -> skip dependents | +| | +- TDVAL checkpoint: GC loop check | +| +- discoveries.ndjson shared across all modes (append-only) | +| | +| Phase 3: Post-Wave Interactive (Completion + PR) | +| +- PR creation (if worktree mode, validation passed) | +| +- Debt reduction metrics report | +| +- Interactive completion choice | +| | +| Phase 4: Results Aggregation | +| +- Export final results.csv | +| +- Generate context.md with debt metrics | +| +- Display summary: debt scores, reduction rate | +| +- Offer: new target | deep fix | close | +| | ++-------------------------------------------------------------------+ +``` + +--- + +## Task Classification Rules + +Each task is classified by `exec_mode`: + +| exec_mode | Mechanism | Criteria | +|-----------|-----------|----------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot scan, assessment, planning, execution, validation | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Plan approval checkpoint, fix-verify GC loop management | + +**Classification Decision**: + +| Task Property | Classification | +|---------------|---------------| +| Multi-dimension debt scan (TDSCAN) | `csv-wave` | +| Quantitative assessment (TDEVAL) | `csv-wave` | +| Remediation planning (TDPLAN) | `csv-wave` | +| Plan approval gate | `interactive` | +| Debt cleanup execution (TDFIX) | `csv-wave` | +| Cleanup validation (TDVAL) | `csv-wave` | +| Fix-verify GC loop management | `interactive` | + +--- + +## CSV Schema + +### tasks.csv (Master State) + +```csv +id,title,description,role,debt_dimension,pipeline_mode,deps,context_from,exec_mode,wave,status,findings,debt_items_count,artifacts_produced,error +"TDSCAN-001","Multi-dimension debt scan","Scan codebase across 5 dimensions for tech debt items","scanner","all","remediate","","","csv-wave","1","pending","","0","","" +"TDEVAL-001","Severity assessment","Quantify impact and fix cost for each debt item","assessor","all","remediate","TDSCAN-001","TDSCAN-001","csv-wave","2","pending","","0","","" +"TDPLAN-001","Remediation planning","Create phased remediation plan from priority matrix","planner","all","remediate","TDEVAL-001","TDEVAL-001","csv-wave","3","pending","","0","","" +``` + +**Columns**: + +| Column | Phase | Description | +|--------|-------|-------------| +| `id` | Input | Unique task identifier (TDPREFIX-NNN) | +| `title` | Input | Short task title | +| `description` | Input | Detailed task description with scope and context | +| `role` | Input | Worker role: scanner, assessor, planner, executor, validator | +| `debt_dimension` | Input | `all`, `code`, `architecture`, `testing`, `dependency`, `documentation` | +| `pipeline_mode` | Input | `scan`, `remediate`, `targeted` | +| `deps` | Input | Semicolon-separated dependency task IDs | +| `context_from` | Input | Semicolon-separated task IDs whose findings this task needs | +| `exec_mode` | Input | `csv-wave` or `interactive` | +| `wave` | Computed | Wave number (1-based) | +| `status` | Output | `pending` -> `completed` / `failed` / `skipped` | +| `findings` | Output | Key discoveries or execution notes (max 500 chars) | +| `debt_items_count` | Output | Number of debt items found/fixed/validated | +| `artifacts_produced` | Output | Semicolon-separated paths of produced artifacts | +| `error` | Output | Error message if failed | + +### Per-Wave CSV (Temporary) + +Each wave generates a temporary `wave-{N}.csv` with extra `prev_context` column (csv-wave tasks only). + +--- + +## Agent Registry (Interactive Agents) + +| Agent | Role File | Pattern | Responsibility | Position | +|-------|-----------|---------|----------------|----------| +| Plan Approver | agents/plan-approver.md | 2.3 (send_input cycle) | Review remediation plan, approve/revise/abort | pre-wave (before TDFIX) | +| GC Loop Manager | agents/gc-loop-manager.md | 2.3 (send_input cycle) | Manage fix-verify loop, create retry tasks | post-wave (after TDVAL) | + +> **COMPACT PROTECTION**: Agent files are execution documents. When context compression occurs, **you MUST immediately `Read` the corresponding agent.md** to reload. + +--- + +## Output Artifacts + +| File | Purpose | Lifecycle | +|------|---------|-----------| +| `tasks.csv` | Master state -- all tasks with status/findings | Updated after each wave | +| `wave-{N}.csv` | Per-wave input (temporary, csv-wave tasks only) | Created before wave, deleted after | +| `results.csv` | Final export of all task results | Created in Phase 4 | +| `discoveries.ndjson` | Shared exploration board (all agents) | Append-only, carries across waves | +| `context.md` | Human-readable report with debt metrics | Created in Phase 4 | +| `scan/debt-inventory.json` | Scanner output: structured debt inventory | Created by TDSCAN | +| `assessment/priority-matrix.json` | Assessor output: prioritized debt items | Created by TDEVAL | +| `plan/remediation-plan.md` | Planner output: phased fix plan | Created by TDPLAN | +| `plan/remediation-plan.json` | Planner output: machine-readable plan | Created by TDPLAN | +| `fixes/fix-log.json` | Executor output: fix results | Created by TDFIX | +| `validation/validation-report.json` | Validator output: validation results | Created by TDVAL | +| `interactive/{id}-result.json` | Results from interactive tasks | Created per interactive task | + +--- + +## Session Structure + +``` +.workflow/.csv-wave/{session-id}/ ++-- tasks.csv # Master state ++-- results.csv # Final results ++-- discoveries.ndjson # Shared discovery board ++-- context.md # Human-readable report ++-- wave-{N}.csv # Temporary per-wave input ++-- scan/ +| +-- debt-inventory.json # Scanner output ++-- assessment/ +| +-- priority-matrix.json # Assessor output ++-- plan/ +| +-- remediation-plan.md # Planner output (human) +| +-- remediation-plan.json # Planner output (machine) ++-- fixes/ +| +-- fix-log.json # Executor output ++-- validation/ +| +-- validation-report.json # Validator output ++-- interactive/ +| +-- {id}-result.json # Interactive task results ++-- wisdom/ + +-- learnings.md + +-- decisions.md +``` + +--- + +## Implementation + +### Session Initialization + +```javascript +const getUtc8ISOString = () => new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString() + +const AUTO_YES = $ARGUMENTS.includes('--yes') || $ARGUMENTS.includes('-y') +const continueMode = $ARGUMENTS.includes('--continue') +const concurrencyMatch = $ARGUMENTS.match(/(?:--concurrency|-c)\s+(\d+)/) +const maxConcurrency = concurrencyMatch ? parseInt(concurrencyMatch[1]) : 3 + +// Detect pipeline mode +let pipelineMode = 'remediate' +if ($ARGUMENTS.includes('--mode=scan')) pipelineMode = 'scan' +else if ($ARGUMENTS.includes('--mode=targeted')) pipelineMode = 'targeted' + +const requirement = $ARGUMENTS + .replace(/--yes|-y|--continue|--concurrency\s+\d+|-c\s+\d+|--mode=\w+/g, '') + .trim() + +const slug = requirement.toLowerCase() + .replace(/[^a-z0-9\u4e00-\u9fa5]+/g, '-') + .substring(0, 40) +const dateStr = getUtc8ISOString().substring(0, 10).replace(/-/g, '') +const sessionId = `td-${slug}-${dateStr}` +const sessionFolder = `.workflow/.csv-wave/${sessionId}` + +Bash(`mkdir -p ${sessionFolder}/scan ${sessionFolder}/assessment ${sessionFolder}/plan ${sessionFolder}/fixes ${sessionFolder}/validation ${sessionFolder}/interactive ${sessionFolder}/wisdom`) + +// Initialize discoveries.ndjson +Write(`${sessionFolder}/discoveries.ndjson`, '') +Write(`${sessionFolder}/wisdom/learnings.md`, '# Learnings\n') +Write(`${sessionFolder}/wisdom/decisions.md`, '# Decisions\n') +``` + +--- + +### Phase 0: Pre-Wave Interactive (Requirement Clarification) + +**Objective**: Parse mode, clarify scope, prepare pipeline configuration. + +**Workflow**: + +1. **Detect mode from arguments** (--mode=scan/remediate/targeted) or from keywords: + +| Keywords | Mode | +|----------|------| +| scan, audit, assess | scan | +| targeted, specific, fix known | targeted | +| Default | remediate | + +2. **Clarify scope** (skip if AUTO_YES): + ```javascript + AskUserQuestion({ + questions: [{ + question: "Tech debt governance scope:", + header: "Scope Selection", + multiSelect: false, + options: [ + { label: "Full project scan", description: "Scan entire codebase" }, + { label: "Specific module", description: "Target specific directory" }, + { label: "Custom scope", description: "Specify file patterns" } + ] + }] + }) + ``` + +3. **Detect debt dimensions** from task description: + +| Keywords | Dimension | +|----------|-----------| +| code quality, complexity, smell | code | +| architecture, coupling, structure | architecture | +| test, coverage, quality | testing | +| dependency, outdated, vulnerable | dependency | +| documentation, api doc, comments | documentation | +| Default | all | + +4. **Output**: pipeline mode, scope, focus dimensions + +**Success Criteria**: +- Pipeline mode determined +- Scope and dimensions clarified + +--- + +### Phase 1: Requirement -> CSV + Classification + +**Objective**: Build task chain based on pipeline mode, generate tasks.csv. + +**Pipeline Definitions**: + +| Mode | Task Chain | +|------|------------| +| scan | TDSCAN-001 -> TDEVAL-001 | +| remediate | TDSCAN-001 -> TDEVAL-001 -> TDPLAN-001 -> (plan-approval) -> TDFIX-001 -> TDVAL-001 | +| targeted | TDPLAN-001 -> (plan-approval) -> TDFIX-001 -> TDVAL-001 | + +**Task Registry**: + +| Task ID | Role | Prefix | exec_mode | Wave | Description | +|---------|------|--------|-----------|------|-------------| +| TDSCAN-001 | scanner | TDSCAN | csv-wave | 1 | Multi-dimension codebase scan | +| TDEVAL-001 | assessor | TDEVAL | csv-wave | 2 | Severity assessment with priority matrix | +| PLAN-APPROVE | - | - | interactive | 3 (pre-wave) | Plan approval checkpoint | +| TDPLAN-001 | planner | TDPLAN | csv-wave | 3 | Phased remediation plan | +| TDFIX-001 | executor | TDFIX | csv-wave | 4 | Worktree-based incremental fixes | +| TDVAL-001 | validator | TDVAL | csv-wave | 5 | 4-layer validation | + +**Worktree Creation** (before TDFIX, remediate mode): +```bash +git worktree add .worktrees/td-- -b tech-debt/td-- +``` + +**Wave Computation**: Linear chain, waves assigned by position in pipeline. + +**User Validation**: Display pipeline with mode and task chain (skip if AUTO_YES). + +**Success Criteria**: +- tasks.csv created with correct pipeline chain +- No circular dependencies +- User approved (or AUTO_YES) + +--- + +### Phase 2: Wave Execution Engine (Extended) + +**Objective**: Execute tasks wave-by-wave with checkpoints and GC loop support. + +```javascript +const masterCsv = Read(`${sessionFolder}/tasks.csv`) +let tasks = parseCsv(masterCsv) +const maxWave = Math.max(...tasks.map(t => t.wave)) +let gcRounds = 0 +const MAX_GC_ROUNDS = 3 + +for (let wave = 1; wave <= maxWave; wave++) { + const waveTasks = tasks.filter(t => t.wave === wave && t.status === 'pending') + const csvTasks = waveTasks.filter(t => t.exec_mode === 'csv-wave') + const interactiveTasks = waveTasks.filter(t => t.exec_mode === 'interactive') + + // Check dependencies + for (const task of waveTasks) { + const depIds = (task.deps || '').split(';').filter(Boolean) + const depStatuses = depIds.map(id => tasks.find(t => t.id === id)?.status) + if (depStatuses.some(s => s === 'failed' || s === 'skipped')) { + task.status = 'skipped' + task.error = `Dependency failed` + } + } + + // Pre-wave interactive: Plan Approval Gate (after TDPLAN completes) + if (interactiveTasks.some(t => t.id === 'PLAN-APPROVE' && t.status === 'pending')) { + Read('agents/plan-approver.md') + const planTask = interactiveTasks.find(t => t.id === 'PLAN-APPROVE') + + const agent = spawn_agent({ + message: `## PLAN REVIEW\n\n### MANDATORY FIRST STEPS\n1. Read: ${sessionFolder}/plan/remediation-plan.md\n2. Read: ${sessionFolder}/discoveries.ndjson\n\nReview the remediation plan and decide: Approve / Revise / Abort\n\nSession: ${sessionFolder}` + }) + const result = wait({ ids: [agent], timeout_ms: 600000 }) + + // Parse decision + if (result includes "Abort") { + // Skip remaining pipeline + for (const t of tasks.filter(t => t.status === 'pending')) t.status = 'skipped' + } else if (result includes "Revise") { + // Create revision task, re-run planner + // ... create TDPLAN-revised task + } + // Approve: continue normally + + close_agent({ id: agent }) + planTask.status = 'completed' + + // Create worktree for fix execution + if (pipelineMode === 'remediate' || pipelineMode === 'targeted') { + Bash(`git worktree add .worktrees/${sessionId} -b tech-debt/${sessionId}`) + } + } + + // Execute csv-wave tasks + const pendingCsvTasks = csvTasks.filter(t => t.status === 'pending') + for (const task of pendingCsvTasks) { + task.prev_context = buildPrevContext(task, tasks) + } + + if (pendingCsvTasks.length > 0) { + Write(`${sessionFolder}/wave-${wave}.csv`, toCsv(pendingCsvTasks)) + + // Select instruction based on role + const role = pendingCsvTasks[0].role + const instruction = Read(`instructions/agent-instruction.md`) + // Customize instruction for role (scanner/assessor/planner/executor/validator) + + spawn_agents_on_csv({ + csv_path: `${sessionFolder}/wave-${wave}.csv`, + id_column: "id", + instruction: buildRoleInstruction(role, sessionFolder, wave), + max_concurrency: maxConcurrency, + max_runtime_seconds: 900, + output_csv_path: `${sessionFolder}/wave-${wave}-results.csv`, + output_schema: { + type: "object", + properties: { + id: { type: "string" }, + status: { type: "string", enum: ["completed", "failed"] }, + findings: { type: "string" }, + debt_items_count: { type: "string" }, + artifacts_produced: { type: "string" }, + error: { type: "string" } + } + } + }) + + // Merge results + const results = parseCsv(Read(`${sessionFolder}/wave-${wave}-results.csv`)) + for (const r of results) { + const t = tasks.find(t => t.id === r.id) + if (t) Object.assign(t, r) + } + } + + // Post-wave: TDVAL GC Loop Check + const completedVal = tasks.find(t => t.id.startsWith('TDVAL') && t.status === 'completed' && t.wave === wave) + if (completedVal) { + // Read validation results + const valReport = JSON.parse(Read(`${sessionFolder}/validation/validation-report.json`)) + + if (!valReport.passed && gcRounds < MAX_GC_ROUNDS) { + gcRounds++ + // Create fix-verify retry tasks + const fixId = `TDFIX-fix-${gcRounds}` + const valId = `TDVAL-recheck-${gcRounds}` + tasks.push({ + id: fixId, title: `Fix regressions (GC #${gcRounds})`, role: 'executor', + description: `Fix regressions found in validation round ${gcRounds}`, + debt_dimension: 'all', pipeline_mode: pipelineMode, + deps: completedVal.id, context_from: completedVal.id, + exec_mode: 'csv-wave', wave: wave + 1, status: 'pending', + findings: '', debt_items_count: '0', artifacts_produced: '', error: '' + }) + tasks.push({ + id: valId, title: `Revalidate (GC #${gcRounds})`, role: 'validator', + description: `Revalidate after fix round ${gcRounds}`, + debt_dimension: 'all', pipeline_mode: pipelineMode, + deps: fixId, context_from: fixId, + exec_mode: 'csv-wave', wave: wave + 2, status: 'pending', + findings: '', debt_items_count: '0', artifacts_produced: '', error: '' + }) + // Extend maxWave + } else if (!valReport.passed && gcRounds >= MAX_GC_ROUNDS) { + // Accept current state + console.log(`Max GC rounds (${MAX_GC_ROUNDS}) reached. Accepting current state.`) + } + } + + // Update master CSV + Write(`${sessionFolder}/tasks.csv`, toCsv(tasks)) + Bash(`rm -f ${sessionFolder}/wave-${wave}.csv ${sessionFolder}/wave-${wave}-results.csv`) +} +``` + +**Success Criteria**: +- All waves executed in order +- Plan approval checkpoint enforced before fix execution +- GC loop properly bounded (max 3 rounds) +- Worktree created for fix execution +- discoveries.ndjson accumulated across all waves + +--- + +### Phase 3: Post-Wave Interactive (Completion + PR) + +**Objective**: Create PR from worktree if validation passed, generate debt reduction report. + +```javascript +const tasks = parseCsv(Read(`${sessionFolder}/tasks.csv`)) +const allCompleted = tasks.every(t => t.status === 'completed' || t.status === 'skipped') + +// PR Creation (if worktree exists and validation passed) +const worktreePath = `.worktrees/${sessionId}` +const valReport = JSON.parse(Read(`${sessionFolder}/validation/validation-report.json`) || '{}') +if (valReport.passed && fileExists(worktreePath)) { + Bash(`cd ${worktreePath} && git add -A && git commit -m "tech-debt: remediate debt items (${sessionId})" && git push -u origin tech-debt/${sessionId}`) + Bash(`gh pr create --title "Tech Debt Remediation: ${sessionId}" --body "Automated tech debt cleanup. See ${sessionFolder}/context.md for details."`) + Bash(`git worktree remove ${worktreePath}`) +} + +// Debt reduction metrics +const scanReport = JSON.parse(Read(`${sessionFolder}/scan/debt-inventory.json`) || '{}') +const debtBefore = scanReport.total_items || 0 +const debtAfter = valReport.debt_score_after || 0 +const reductionRate = debtBefore > 0 ? Math.round(((debtBefore - debtAfter) / debtBefore) * 100) : 0 + +console.log(` +============================================ +TECH DEBT GOVERNANCE COMPLETE + +Mode: ${pipelineMode} +Debt Items Found: ${debtBefore} +Debt Items Fixed: ${debtBefore - debtAfter} +Reduction Rate: ${reductionRate}% +GC Rounds: ${gcRounds}/${MAX_GC_ROUNDS} +Validation: ${valReport.passed ? 'PASSED' : 'FAILED'} + +Session: ${sessionFolder} +============================================ +`) + +// Completion action +if (!AUTO_YES) { + AskUserQuestion({ + questions: [{ + question: "What next?", + header: "Completion", + multiSelect: false, + options: [ + { label: "New target", description: "Run another scan/fix cycle" }, + { label: "Deep fix", description: "Continue fixing remaining items" }, + { label: "Close", description: "Archive session" } + ] + }] + }) +} +``` + +**Success Criteria**: +- PR created if applicable +- Debt metrics calculated and reported +- User informed of next steps + +--- + +### Phase 4: Results Aggregation + +**Objective**: Generate final results and human-readable report. + +```javascript +Bash(`cp ${sessionFolder}/tasks.csv ${sessionFolder}/results.csv`) + +let contextMd = `# Tech Debt Governance Report\n\n` +contextMd += `**Session**: ${sessionId}\n` +contextMd += `**Mode**: ${pipelineMode}\n` +contextMd += `**Date**: ${getUtc8ISOString().substring(0, 10)}\n\n` + +contextMd += `## Debt Metrics\n` +contextMd += `| Metric | Value |\n|--------|-------|\n` +contextMd += `| Items Found | ${debtBefore} |\n` +contextMd += `| Items Fixed | ${debtBefore - debtAfter} |\n` +contextMd += `| Reduction Rate | ${reductionRate}% |\n` +contextMd += `| GC Rounds | ${gcRounds} |\n` +contextMd += `| Validation | ${valReport.passed ? 'PASSED' : 'FAILED'} |\n\n` + +contextMd += `## Pipeline Execution\n\n` +for (const t of tasks) { + const icon = t.status === 'completed' ? '[DONE]' : t.status === 'failed' ? '[FAIL]' : '[SKIP]' + contextMd += `${icon} **${t.title}** [${t.role}] ${t.findings || ''}\n\n` +} + +Write(`${sessionFolder}/context.md`, contextMd) +``` + +**Success Criteria**: +- results.csv exported +- context.md generated with debt metrics +- Summary displayed to user + +--- + +## Shared Discovery Board Protocol + +**Format**: NDJSON (one JSON per line) + +**Discovery Types**: + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `debt_item_found` | `data.file+data.line` | `{id, dimension, severity, file, line, description, suggestion}` | Tech debt item identified | +| `pattern_found` | `data.pattern_name+data.location` | `{pattern_name, location, description}` | Code pattern (anti-pattern) found | +| `fix_applied` | `data.file+data.change` | `{file, change, lines_modified, debt_id}` | Fix applied to debt item | +| `regression_found` | `data.file+data.test` | `{file, test, description, severity}` | Regression found during validation | +| `dependency_issue` | `data.package+data.issue` | `{package, current, latest, issue, severity}` | Dependency problem | +| `metric_recorded` | `data.metric` | `{metric, value, dimension, file}` | Quality metric recorded | + +```jsonl +{"ts":"2026-03-08T10:00:00Z","worker":"TDSCAN-001","type":"debt_item_found","data":{"id":"TD-001","dimension":"code","severity":"high","file":"src/auth/jwt.ts","line":42,"description":"Complexity > 15","suggestion":"Extract helper functions"}} +{"ts":"2026-03-08T10:15:00Z","worker":"TDFIX-001","type":"fix_applied","data":{"file":"src/auth/jwt.ts","change":"Extracted 3 helper functions","lines_modified":25,"debt_id":"TD-001"}} +``` + +--- + +## Checkpoints + +| Checkpoint | Trigger | Condition | Action | +|------------|---------|-----------|--------| +| Plan Approval Gate | TDPLAN-001 completes | Always (remediate/targeted mode) | Interactive: Approve / Revise / Abort | +| Worktree Creation | Plan approved | Before TDFIX | `git worktree add .worktrees/{session-id}` | +| Fix-Verify GC Loop | TDVAL-* completes | Regressions found | Create TDFIX-fix-N + TDVAL-recheck-N (max 3 rounds) | + +--- + +## Pipeline Mode Details + +### Scan Mode +``` +Wave 1: TDSCAN-001 (scanner) -> Scan 5 dimensions +Wave 2: TDEVAL-001 (assessor) -> Priority matrix +``` + +### Remediate Mode (Full Pipeline) +``` +Wave 1: TDSCAN-001 (scanner) -> Scan 5 dimensions +Wave 2: TDEVAL-001 (assessor) -> Priority matrix +Wave 3: TDPLAN-001 (planner) -> Remediation plan + PLAN-APPROVE (interactive) -> User approval +Wave 4: TDFIX-001 (executor) -> Apply fixes in worktree +Wave 5: TDVAL-001 (validator) -> 4-layer validation + [GC Loop: TDFIX-fix-N -> TDVAL-recheck-N, max 3] +``` + +### Targeted Mode +``` +Wave 1: TDPLAN-001 (planner) -> Targeted fix plan + PLAN-APPROVE (interactive) -> User approval +Wave 2: TDFIX-001 (executor) -> Apply fixes in worktree +Wave 3: TDVAL-001 (validator) -> 4-layer validation + [GC Loop: TDFIX-fix-N -> TDVAL-recheck-N, max 3] +``` + +--- + +## Error Handling + +| Error | Resolution | +|-------|------------| +| Circular dependency | Detect in wave computation, abort with error message | +| CSV agent timeout | Mark as failed in results, continue with wave | +| CSV agent failed | Mark as failed, skip dependent tasks in later waves | +| Interactive agent timeout | Urge convergence via send_input, then close if still timed out | +| Scanner finds no debt | Report clean codebase, skip to summary | +| Plan rejected by user | Abort pipeline or create revision task | +| Fix-verify loop stuck (>3 rounds) | Accept current state, continue to completion | +| Worktree creation fails | Fall back to direct changes with user confirmation | +| Validation tools not available | Skip unavailable checks, report partial validation | +| CSV parse error | Validate CSV format before execution, show line number | +| discoveries.ndjson corrupt | Ignore malformed lines, continue with valid entries | +| Continue mode: no session found | List available sessions, prompt user to select | + +--- + +## Core Rules + +1. **Start Immediately**: First action is session initialization, then Phase 0/1 +2. **Wave Order is Sacred**: Never execute wave N before wave N-1 completes and results are merged +3. **CSV is Source of Truth**: Master tasks.csv holds all state (both csv-wave and interactive) +4. **CSV First**: Default to csv-wave for tasks; only use interactive for approval checkpoints +5. **Context Propagation**: prev_context built from master CSV, not from memory +6. **Discovery Board is Append-Only**: Never clear, modify, or recreate discoveries.ndjson +7. **Skip on Failure**: If a dependency failed, skip the dependent task +8. **GC Loop Bounded**: Maximum 3 fix-verify rounds before accepting current state +9. **Worktree Isolation**: All fix execution happens in git worktree, not main branch +10. **DO NOT STOP**: Continuous execution until all waves complete or all remaining tasks are skipped diff --git a/.codex/skills/team-tech-debt/agents/gc-loop-manager.md b/.codex/skills/team-tech-debt/agents/gc-loop-manager.md new file mode 100644 index 00000000..ec8d6f9c --- /dev/null +++ b/.codex/skills/team-tech-debt/agents/gc-loop-manager.md @@ -0,0 +1,130 @@ +# GC Loop Manager Agent + +Interactive agent for managing the fix-verify GC (Garbage Collection) loop. Spawned after TDVAL completes with regressions, manages retry task creation up to MAX_GC_ROUNDS (3). + +## Identity + +- **Type**: `interactive` +- **Role File**: `agents/gc-loop-manager.md` +- **Responsibility**: Evaluate validation results, decide whether to retry or accept, create GC loop tasks + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Read validation report to determine regression status +- Track GC round count (max 3) +- Create fix-verify retry tasks when regressions found and rounds remain +- Accept current state when GC rounds exhausted +- Report decision to orchestrator +- Produce structured output following template + +### MUST NOT + +- Skip the MANDATORY FIRST STEPS role loading +- Execute fix actions directly +- Exceed MAX_GC_ROUNDS (3) +- Skip validation report reading +- Produce unstructured output + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | built-in | Load validation report and context | +| `Write` | built-in | Store GC decision result | + +--- + +## Execution + +### Phase 1: Validation Assessment + +**Objective**: Read validation results and determine action + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| validation-report.json | Yes | Validation results | +| discoveries.ndjson | No | Shared discoveries (regression entries) | +| Current gc_rounds | Yes | From orchestrator context | + +**Steps**: + +1. Read validation-report.json +2. Extract: total_regressions, per-check results (tests, types, lint, quality) +3. Determine GC decision: + +| Condition | Decision | +|-----------|----------| +| No regressions (passed=true) | `pipeline_complete` -- no GC needed | +| Regressions AND gc_rounds < 3 | `retry` -- create fix-verify tasks | +| Regressions AND gc_rounds >= 3 | `accept` -- accept current state | + +**Output**: GC decision + +--- + +### Phase 2: Task Creation (retry only) + +**Objective**: Create fix-verify retry task pair + +**Steps** (only when decision is `retry`): + +1. Increment gc_rounds +2. Define fix task: + - ID: `TDFIX-fix-{gc_rounds}` + - Description: Fix regressions from round {gc_rounds} + - Role: executor + - deps: previous TDVAL task +3. Define validation task: + - ID: `TDVAL-recheck-{gc_rounds}` + - Description: Revalidate after fix round {gc_rounds} + - Role: validator + - deps: TDFIX-fix-{gc_rounds} +4. Report new tasks to orchestrator for CSV insertion + +**Output**: New task definitions for orchestrator to add to master CSV + +--- + +## Structured Output Template + +``` +## Summary +- Validation result: +- Total regressions: +- GC round: / +- Decision: + +## Regression Details (if any) +- Test failures: +- Type errors: +- Lint errors: + +## Action Taken +- Decision: +- New tasks created: + +## Metrics +- Debt score before: +- Debt score after: +- Improvement: % +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| Validation report not found | Report error, suggest re-running validator | +| Report parse error | Treat as failed validation, trigger retry if rounds remain | +| GC rounds already at max | Accept current state, report to orchestrator | +| Processing failure | Output partial results with clear status | diff --git a/.codex/skills/team-tech-debt/agents/plan-approver.md b/.codex/skills/team-tech-debt/agents/plan-approver.md new file mode 100644 index 00000000..d5d16273 --- /dev/null +++ b/.codex/skills/team-tech-debt/agents/plan-approver.md @@ -0,0 +1,151 @@ +# Plan Approver Agent + +Interactive agent for reviewing the tech debt remediation plan at the plan approval gate checkpoint. Spawned after TDPLAN-001 completes, before TDFIX execution begins. + +## Identity + +- **Type**: `interactive` +- **Role File**: `agents/plan-approver.md` +- **Responsibility**: Review remediation plan, present to user, handle Approve/Revise/Abort + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Read the remediation plan (both .md and .json) +- Present clear summary with phases, item counts, effort estimates +- Wait for user approval before reporting +- Handle all three outcomes (Approve, Revise, Abort) +- Produce structured output following template + +### MUST NOT + +- Skip the MANDATORY FIRST STEPS role loading +- Approve plan without user confirmation +- Modify the plan artifacts directly +- Execute any fix actions +- Produce unstructured output + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | built-in | Load plan artifacts and context | +| `AskUserQuestion` | built-in | Get user approval decision | +| `Write` | built-in | Store approval result | + +### Tool Usage Patterns + +**Read Pattern**: Load plan before review +``` +Read("/plan/remediation-plan.md") +Read("/plan/remediation-plan.json") +Read("/assessment/priority-matrix.json") +``` + +--- + +## Execution + +### Phase 1: Plan Loading + +**Objective**: Load and summarize the remediation plan + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| remediation-plan.md | Yes | Human-readable plan | +| remediation-plan.json | Yes | Machine-readable plan | +| priority-matrix.json | No | Assessment context | +| discoveries.ndjson | No | Shared discoveries | + +**Steps**: + +1. Read remediation-plan.md for overview +2. Read remediation-plan.json for metrics +3. Summarize: total actions, effort distribution, phases +4. Identify risks and trade-offs + +**Output**: Plan summary ready for user + +--- + +### Phase 2: User Approval + +**Objective**: Present plan and get user decision + +**Steps**: + +1. Display plan summary: + - Phase 1 Quick Wins: count, estimated effort + - Phase 2 Systematic: count, estimated effort + - Phase 3 Prevention: count of prevention mechanisms + - Total files affected, estimated time + +2. Present decision: + +```javascript +AskUserQuestion({ + questions: [{ + question: "Remediation plan generated. Review and decide:", + header: "Plan Approval Gate", + multiSelect: false, + options: [ + { label: "Approve", description: "Proceed with fix execution in worktree" }, + { label: "Revise", description: "Re-run planner with specific feedback" }, + { label: "Abort", description: "Stop pipeline, keep scan/assessment results" } + ] + }] +}) +``` + +3. Handle response: + +| Response | Action | +|----------|--------| +| Approve | Report approved, trigger worktree creation | +| Revise | Collect revision feedback, report revision-needed | +| Abort | Report abort, pipeline stops | + +**Output**: Approval decision with details + +--- + +## Structured Output Template + +``` +## Summary +- Plan reviewed: remediation-plan.md +- Decision: + +## Plan Overview +- Phase 1 Quick Wins: items, effort +- Phase 2 Systematic: items, effort +- Phase 3 Prevention: mechanisms +- Files affected: + +## Decision Details +- User choice: +- Feedback: + +## Risks Identified +- Risk 1: description +- Risk 2: description +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| Plan file not found | Report error, suggest re-running planner | +| Plan is empty (no actions) | Report clean codebase, suggest closing | +| User does not respond | Timeout, report awaiting-review | +| Plan JSON parse error | Fall back to .md for review, report warning | diff --git a/.codex/skills/team-tech-debt/instructions/agent-instruction.md b/.codex/skills/team-tech-debt/instructions/agent-instruction.md new file mode 100644 index 00000000..fc93f7b2 --- /dev/null +++ b/.codex/skills/team-tech-debt/instructions/agent-instruction.md @@ -0,0 +1,390 @@ +# Agent Instruction Template -- Team Tech Debt + +Role-specific instruction templates for CSV wave agents in the tech debt pipeline. Each role has a specialized instruction that is injected as the `instruction` parameter to `spawn_agents_on_csv`. + +## Purpose + +| Phase | Usage | +|-------|-------| +| Phase 1 | Orchestrator selects role-specific instruction based on task role | +| Phase 2 | Injected as `instruction` parameter to `spawn_agents_on_csv` | + +--- + +## Scanner Instruction + +```markdown +## TECH DEBT SCAN TASK + +### MANDATORY FIRST STEPS +1. Read shared discoveries: /discoveries.ndjson (if exists) +2. Read project context: .workflow/project-tech.json (if exists) + +--- + +## Your Task + +**Task ID**: {id} +**Title**: {title} +**Role**: scanner +**Dimension Focus**: {debt_dimension} +**Pipeline Mode**: {pipeline_mode} + +### Task Description +{description} + +### Previous Context +{prev_context} + +--- + +## Execution Protocol + +1. **Read discoveries**: Load /discoveries.ndjson +2. **Detect project type**: Check package.json, pyproject.toml, go.mod, etc. +3. **Scan 5 dimensions**: + - **Code**: Complexity > 10, TODO/FIXME, deprecated APIs, dead code, duplicated logic + - **Architecture**: Circular dependencies, god classes, layering violations, tight coupling + - **Testing**: Missing tests, low coverage, test quality issues, no integration tests + - **Dependency**: Outdated packages, known vulnerabilities, unused dependencies + - **Documentation**: Missing JSDoc/docstrings, stale API docs, no README sections +4. **Use tools**: mcp__ace-tool__search_context for semantic search, Grep for pattern matching, Bash for static analysis tools +5. **Standardize each finding**: + - id: TD-NNN (sequential) + - dimension: code|architecture|testing|dependency|documentation + - severity: critical|high|medium|low + - file: path, line: number + - description: issue description + - suggestion: fix suggestion + - estimated_effort: small|medium|large|unknown +6. **Share discoveries**: Append each finding to discovery board: + ```bash + echo '{"ts":"","worker":"{id}","type":"debt_item_found","data":{"id":"TD-NNN","dimension":"","severity":"","file":"","line":,"description":"","suggestion":"","estimated_effort":""}}' >> /discoveries.ndjson + ``` +7. **Write artifact**: Save structured inventory to /scan/debt-inventory.json +8. **Report result** + +--- + +## Output (report_agent_job_result) + +{ + "id": "{id}", + "status": "completed" | "failed", + "findings": "Scanned N dimensions. Found M debt items: X critical, Y high... (max 500 chars)", + "debt_items_count": "", + "artifacts_produced": "scan/debt-inventory.json", + "error": "" +} +``` + +--- + +## Assessor Instruction + +```markdown +## TECH DEBT ASSESSMENT TASK + +### MANDATORY FIRST STEPS +1. Read shared discoveries: /discoveries.ndjson (if exists) +2. Read debt inventory: /scan/debt-inventory.json + +--- + +## Your Task + +**Task ID**: {id} +**Title**: {title} +**Role**: assessor + +### Task Description +{description} + +### Previous Context +{prev_context} + +--- + +## Execution Protocol + +1. **Load debt inventory** from /scan/debt-inventory.json +2. **Score each item**: + - **Impact Score** (1-5): critical=5, high=4, medium=3, low=1 + - **Cost Score** (1-5): small=1, medium=3, large=5, unknown=3 +3. **Classify into priority quadrants**: + | Impact | Cost | Quadrant | + |--------|------|----------| + | >= 4 | <= 2 | quick-win | + | >= 4 | >= 3 | strategic | + | <= 3 | <= 2 | backlog | + | <= 3 | >= 3 | defer | +4. **Sort** within each quadrant by impact_score descending +5. **Share discoveries**: Append assessment summary to discovery board +6. **Write artifact**: /assessment/priority-matrix.json +7. **Report result** + +--- + +## Output (report_agent_job_result) + +{ + "id": "{id}", + "status": "completed" | "failed", + "findings": "Assessed M items. Quick-wins: X, Strategic: Y, Backlog: Z, Defer: W (max 500 chars)", + "debt_items_count": "", + "artifacts_produced": "assessment/priority-matrix.json", + "error": "" +} +``` + +--- + +## Planner Instruction + +```markdown +## TECH DEBT PLANNING TASK + +### MANDATORY FIRST STEPS +1. Read shared discoveries: /discoveries.ndjson (if exists) +2. Read priority matrix: /assessment/priority-matrix.json + +--- + +## Your Task + +**Task ID**: {id} +**Title**: {title} +**Role**: planner + +### Task Description +{description} + +### Previous Context +{prev_context} + +--- + +## Execution Protocol + +1. **Load priority matrix** from /assessment/priority-matrix.json +2. **Group items**: quickWins (quick-win), strategic, backlog, deferred +3. **Create 3-phase remediation plan**: + - **Phase 1: Quick Wins** -- High impact, low cost, immediate execution + - **Phase 2: Systematic** -- High impact, high cost, structured refactoring + - **Phase 3: Prevention** -- Long-term prevention mechanisms +4. **Map action types** per dimension: + | Dimension | Action Type | + |-----------|-------------| + | code | refactor | + | architecture | restructure | + | testing | add-tests | + | dependency | update-deps | + | documentation | add-docs | +5. **Generate prevention actions** for dimensions with >= 3 items: + | Dimension | Prevention | + |-----------|------------| + | code | Add linting rules for complexity thresholds | + | architecture | Introduce module boundary checks in CI | + | testing | Set minimum coverage thresholds | + | dependency | Configure automated update bot | + | documentation | Add docstring enforcement in linting | +6. **Write artifacts**: + - /plan/remediation-plan.md (human-readable with checklists) + - /plan/remediation-plan.json (machine-readable) +7. **Report result** + +--- + +## Output (report_agent_job_result) + +{ + "id": "{id}", + "status": "completed" | "failed", + "findings": "Created 3-phase plan. Phase 1: X quick-wins. Phase 2: Y systematic. Phase 3: Z prevention. Total actions: N (max 500 chars)", + "debt_items_count": "", + "artifacts_produced": "plan/remediation-plan.md;plan/remediation-plan.json", + "error": "" +} +``` + +--- + +## Executor Instruction + +```markdown +## TECH DEBT FIX EXECUTION TASK + +### MANDATORY FIRST STEPS +1. Read shared discoveries: /discoveries.ndjson (if exists) +2. Read remediation plan: /plan/remediation-plan.json + +--- + +## Your Task + +**Task ID**: {id} +**Title**: {title} +**Role**: executor + +### Task Description +{description} + +### Previous Context +{prev_context} + +--- + +## Execution Protocol + +**CRITICAL**: ALL file operations must execute within the worktree path. + +1. **Load remediation plan** from /plan/remediation-plan.json +2. **Extract worktree path** from task description +3. **Group actions by type**: refactor -> update-deps -> add-tests -> add-docs -> restructure +4. **For each batch**: + - Read target files in worktree + - Apply changes following project conventions + - Validate changes compile/lint: `cd "" && npx tsc --noEmit` or equivalent + - Track: items_fixed, items_failed, files_modified +5. **After each batch**: Verify via `cd "" && git diff --name-only` +6. **Share discoveries**: Append fix_applied entries to discovery board: + ```bash + echo '{"ts":"","worker":"{id}","type":"fix_applied","data":{"file":"","change":"","lines_modified":,"debt_id":""}}' >> /discoveries.ndjson + ``` +7. **Self-validate**: + | Check | Command | Pass Criteria | + |-------|---------|---------------| + | Syntax | `cd "" && npx tsc --noEmit` | No new errors | + | Lint | `cd "" && npx eslint --no-error-on-unmatched-pattern` | No new errors | +8. **Write artifact**: /fixes/fix-log.json +9. **Report result** + +--- + +## Output (report_agent_job_result) + +{ + "id": "{id}", + "status": "completed" | "failed", + "findings": "Fixed X/Y items. Batches: refactor(N), update-deps(N), add-tests(N). Files modified: Z (max 500 chars)", + "debt_items_count": "", + "artifacts_produced": "fixes/fix-log.json", + "error": "" +} +``` + +--- + +## Validator Instruction + +```markdown +## TECH DEBT VALIDATION TASK + +### MANDATORY FIRST STEPS +1. Read shared discoveries: /discoveries.ndjson (if exists) +2. Read fix log: /fixes/fix-log.json (if exists) + +--- + +## Your Task + +**Task ID**: {id} +**Title**: {title} +**Role**: validator + +### Task Description +{description} + +### Previous Context +{prev_context} + +--- + +## Execution Protocol + +**CRITICAL**: ALL validation commands must execute within the worktree path. + +1. **Extract worktree path** from task description +2. **Load fix results** from /fixes/fix-log.json +3. **Run 4-layer validation**: + + **Layer 1 -- Test Suite**: + - Command: `cd "" && npm test` or `cd "" && python -m pytest` + - PASS: No FAIL/error/failed keywords + - SKIP: No test runner available + + **Layer 2 -- Type Check**: + - Command: `cd "" && npx tsc --noEmit` + - Count: `error TS` occurrences + + **Layer 3 -- Lint Check**: + - Command: `cd "" && npx eslint --no-error-on-unmatched-pattern ` + - Count: error occurrences + + **Layer 4 -- Quality Analysis** (when > 5 modified files): + - Compare code quality before/after + - Assess complexity, duplication, naming improvements + +4. **Calculate debt score**: + - debt_score_after = debt items NOT in modified files (remaining unfixed) + - improvement_percentage = ((before - after) / before) * 100 + +5. **Auto-fix attempt** (when total_regressions <= 3): + - Fix minor regressions inline + - Re-run validation checks + +6. **Share discoveries**: Append regression_found entries if any: + ```bash + echo '{"ts":"","worker":"{id}","type":"regression_found","data":{"file":"","test":"","description":"","severity":""}}' >> /discoveries.ndjson + ``` + +7. **Write artifact**: /validation/validation-report.json with: + - validation_date, passed (bool), total_regressions + - checks: {tests, types, lint, quality} with per-check status + - debt_score_before, debt_score_after, improvement_percentage +8. **Report result** + +--- + +## Output (report_agent_job_result) + +{ + "id": "{id}", + "status": "completed" | "failed", + "findings": "Validation: PASSED|FAILED. Tests: OK/N failures. Types: OK/N errors. Lint: OK/N errors. Debt reduction: X% (max 500 chars)", + "debt_items_count": "", + "artifacts_produced": "validation/validation-report.json", + "error": "" +} +``` + +--- + +## Placeholder Reference + +| Placeholder | Resolved By | When | +|-------------|------------|------| +| `` | Skill designer (Phase 1) | Literal path baked into instruction | +| `{id}` | spawn_agents_on_csv | Runtime from CSV row | +| `{title}` | spawn_agents_on_csv | Runtime from CSV row | +| `{description}` | spawn_agents_on_csv | Runtime from CSV row | +| `{role}` | spawn_agents_on_csv | Runtime from CSV row | +| `{debt_dimension}` | spawn_agents_on_csv | Runtime from CSV row | +| `{pipeline_mode}` | spawn_agents_on_csv | Runtime from CSV row | +| `{prev_context}` | spawn_agents_on_csv | Runtime from CSV row | + +--- + +## Instruction Selection Logic + +The orchestrator selects the appropriate instruction section based on the task's `role` column: + +| Role | Instruction Section | +|------|-------------------| +| scanner | Scanner Instruction | +| assessor | Assessor Instruction | +| planner | Planner Instruction | +| executor | Executor Instruction | +| validator | Validator Instruction | + +Since each wave typically contains tasks from a single role (linear pipeline), the orchestrator uses the role of the first task in the wave to select the instruction template. The `` placeholder is replaced with the actual session path before injection. diff --git a/.codex/skills/team-tech-debt/schemas/tasks-schema.md b/.codex/skills/team-tech-debt/schemas/tasks-schema.md new file mode 100644 index 00000000..4e8ad5b2 --- /dev/null +++ b/.codex/skills/team-tech-debt/schemas/tasks-schema.md @@ -0,0 +1,196 @@ +# Team Tech Debt -- CSV Schema + +## Master CSV: tasks.csv + +### Column Definitions + +#### Input Columns (Set by Decomposer) + +| Column | Type | Required | Description | Example | +|--------|------|----------|-------------|---------| +| `id` | string | Yes | Unique task identifier (TDPREFIX-NNN) | `"TDSCAN-001"` | +| `title` | string | Yes | Short task title | `"Multi-dimension debt scan"` | +| `description` | string | Yes | Detailed task description (self-contained) | `"Scan codebase across 5 dimensions..."` | +| `role` | enum | Yes | Worker role: `scanner`, `assessor`, `planner`, `executor`, `validator` | `"scanner"` | +| `debt_dimension` | string | Yes | Target dimensions: `all`, or specific dimension(s) | `"all"` | +| `pipeline_mode` | enum | Yes | Pipeline mode: `scan`, `remediate`, `targeted` | `"remediate"` | +| `deps` | string | No | Semicolon-separated dependency task IDs | `"TDSCAN-001"` | +| `context_from` | string | No | Semicolon-separated task IDs for context | `"TDSCAN-001"` | +| `exec_mode` | enum | Yes | Execution mechanism: `csv-wave` or `interactive` | `"csv-wave"` | + +#### Computed Columns (Set by Wave Engine) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `wave` | integer | Wave number (1-based, from pipeline position) | `2` | +| `prev_context` | string | Aggregated findings from context_from tasks (per-wave CSV only) | `"[TDSCAN-001] Found 42 debt items..."` | + +#### Output Columns (Set by Agent) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `status` | enum | `pending` -> `completed` / `failed` / `skipped` | `"completed"` | +| `findings` | string | Key discoveries (max 500 chars) | `"Found 42 debt items: 5 critical, 12 high..."` | +| `debt_items_count` | integer | Number of debt items processed | `42` | +| `artifacts_produced` | string | Semicolon-separated artifact paths | `"scan/debt-inventory.json"` | +| `error` | string | Error message if failed | `""` | + +--- + +### exec_mode Values + +| Value | Mechanism | Description | +|-------|-----------|-------------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot batch execution within wave | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Plan approval, GC loop management | + +Interactive tasks appear in master CSV for dependency tracking but are NOT included in wave-{N}.csv files. + +--- + +### Role Registry + +| Role | Prefix | Responsibility | inner_loop | +|------|--------|----------------|------------| +| scanner | TDSCAN | Multi-dimension debt scanning | false | +| assessor | TDEVAL | Quantitative severity assessment | false | +| planner | TDPLAN | Phased remediation planning | false | +| executor | TDFIX | Worktree-based debt cleanup | true | +| validator | TDVAL | 4-layer validation | false | + +--- + +### Debt Dimensions + +| Dimension | Description | Tools/Methods | +|-----------|-------------|---------------| +| code | Code smells, complexity, duplication | Static analysis, complexity metrics | +| architecture | Coupling, circular deps, layering violations | Dependency graph, coupling analysis | +| testing | Missing tests, low coverage, test quality | Coverage analysis, test quality | +| dependency | Outdated packages, vulnerabilities | Outdated check, vulnerability scan | +| documentation | Missing docs, stale API docs | Doc coverage, API doc check | + +--- + +### Example Data + +```csv +id,title,description,role,debt_dimension,pipeline_mode,deps,context_from,exec_mode,wave,status,findings,debt_items_count,artifacts_produced,error +"TDSCAN-001","Multi-dimension debt scan","Scan codebase across code, architecture, testing, dependency, and documentation dimensions. Produce structured debt inventory with severity rankings.\nSession: .workflow/.csv-wave/td-auth-20260308\nScope: src/**","scanner","all","remediate","","","csv-wave","1","pending","","0","","" +"TDEVAL-001","Severity assessment","Evaluate each debt item: impact score (1-5) x cost score (1-5). Classify into priority quadrants: quick-win, strategic, backlog, defer.\nSession: .workflow/.csv-wave/td-auth-20260308\nUpstream: TDSCAN-001 debt inventory","assessor","all","remediate","TDSCAN-001","TDSCAN-001","csv-wave","2","pending","","0","","" +"TDPLAN-001","Remediation planning","Create 3-phase remediation plan: Phase 1 quick-wins, Phase 2 systematic, Phase 3 prevention.\nSession: .workflow/.csv-wave/td-auth-20260308\nUpstream: TDEVAL-001 priority matrix","planner","all","remediate","TDEVAL-001","TDEVAL-001","csv-wave","3","pending","","0","","" +"PLAN-APPROVE","Plan approval gate","Review remediation plan and approve for execution","","all","remediate","TDPLAN-001","TDPLAN-001","interactive","3","pending","","0","","" +"TDFIX-001","Debt cleanup execution","Apply remediation plan actions in worktree: refactor, update deps, add tests, add docs.\nSession: .workflow/.csv-wave/td-auth-20260308\nWorktree: .worktrees/td-auth-20260308","executor","all","remediate","PLAN-APPROVE","TDPLAN-001","csv-wave","4","pending","","0","","" +"TDVAL-001","Cleanup validation","Run 4-layer validation: tests, type check, lint, quality analysis. Compare before/after debt scores.\nSession: .workflow/.csv-wave/td-auth-20260308\nWorktree: .worktrees/td-auth-20260308","validator","all","remediate","TDFIX-001","TDFIX-001","csv-wave","5","pending","","0","","" +``` + +--- + +### Column Lifecycle + +``` +Decomposer (Phase 1) Wave Engine (Phase 2) Agent (Execution) +--------------------- -------------------- ----------------- +id ----------> id ----------> id +title ----------> title ----------> (reads) +description ----------> description ----------> (reads) +role ----------> role ----------> (reads) +debt_dimension -------> debt_dimension -------> (reads) +pipeline_mode --------> pipeline_mode --------> (reads) +deps ----------> deps ----------> (reads) +context_from----------> context_from----------> (reads) +exec_mode ----------> exec_mode ----------> (reads) + wave ----------> (reads) + prev_context ----------> (reads) + status + findings + debt_items_count + artifacts_produced + error +``` + +--- + +## Output Schema (JSON) + +Agent output via `report_agent_job_result` (csv-wave tasks): + +```json +{ + "id": "TDSCAN-001", + "status": "completed", + "findings": "Scanned 5 dimensions. Found 42 debt items: 5 critical, 12 high, 15 medium, 10 low. Top issues: complex auth logic (code), circular deps in services (architecture), missing integration tests (testing).", + "debt_items_count": "42", + "artifacts_produced": "scan/debt-inventory.json", + "error": "" +} +``` + +Interactive tasks output via structured text or JSON written to `interactive/{id}-result.json`. + +--- + +## Discovery Types + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `debt_item_found` | `data.file+data.line` | `{id, dimension, severity, file, line, description, suggestion, estimated_effort}` | Tech debt item identified | +| `pattern_found` | `data.pattern_name+data.location` | `{pattern_name, location, description}` | Anti-pattern found | +| `fix_applied` | `data.file+data.change` | `{file, change, lines_modified, debt_id}` | Fix applied | +| `regression_found` | `data.file+data.test` | `{file, test, description, severity}` | Regression in validation | +| `dependency_issue` | `data.package+data.issue` | `{package, current, latest, issue, severity}` | Dependency problem | +| `metric_recorded` | `data.metric` | `{metric, value, dimension, file}` | Quality metric | + +### Discovery NDJSON Format + +```jsonl +{"ts":"2026-03-08T10:00:00Z","worker":"TDSCAN-001","type":"debt_item_found","data":{"id":"TD-001","dimension":"code","severity":"high","file":"src/auth/jwt.ts","line":42,"description":"Cyclomatic complexity 18 exceeds threshold 10","suggestion":"Extract token validation logic","estimated_effort":"medium"}} +{"ts":"2026-03-08T10:05:00Z","worker":"TDSCAN-001","type":"dependency_issue","data":{"package":"express","current":"4.17.1","latest":"4.19.2","issue":"Known security vulnerability CVE-2024-XXXX","severity":"critical"}} +{"ts":"2026-03-08T10:30:00Z","worker":"TDFIX-001","type":"fix_applied","data":{"file":"src/auth/jwt.ts","change":"Extracted validateToken helper","lines_modified":25,"debt_id":"TD-001"}} +``` + +> Both csv-wave and interactive agents read/write the same discoveries.ndjson file. + +--- + +## Cross-Mechanism Context Flow + +| Source | Target | Mechanism | +|--------|--------|-----------| +| Scanner findings | Assessor | prev_context from TDSCAN + scan/debt-inventory.json | +| Assessor matrix | Planner | prev_context from TDEVAL + assessment/priority-matrix.json | +| Planner plan | Plan Approver | Interactive spawn reads plan/remediation-plan.md | +| Plan approval | Executor | Interactive result in interactive/PLAN-APPROVE-result.json | +| Executor fixes | Validator | prev_context from TDFIX + fixes/fix-log.json | +| Validator results | GC Loop | Interactive read of validation/validation-report.json | +| Any agent discovery | Any agent | Shared via discoveries.ndjson | + +--- + +## GC Loop Schema + +| Field | Type | Description | +|-------|------|-------------| +| `gc_rounds` | integer | Current GC round (0-based) | +| `max_gc_rounds` | integer | Maximum rounds (3) | +| `fix_task_id` | string | Current fix task ID (TDFIX-fix-N) | +| `val_task_id` | string | Current validation task ID (TDVAL-recheck-N) | +| `regressions` | array | List of regression descriptions | + +--- + +## Validation Rules + +| Rule | Check | Error | +|------|-------|-------| +| Unique IDs | No duplicate `id` values | "Duplicate task ID: {id}" | +| Valid deps | All dep IDs exist in tasks | "Unknown dependency: {dep_id}" | +| No self-deps | Task cannot depend on itself | "Self-dependency: {id}" | +| No circular deps | Topological sort completes | "Circular dependency detected involving: {ids}" | +| context_from valid | All context IDs exist and in earlier waves | "Invalid context_from: {id}" | +| exec_mode valid | Value is `csv-wave` or `interactive` | "Invalid exec_mode: {value}" | +| Role valid | role in {scanner, assessor, planner, executor, validator} | "Invalid role: {role}" | +| Pipeline mode valid | pipeline_mode in {scan, remediate, targeted} | "Invalid pipeline_mode: {mode}" | +| Description non-empty | Every task has description | "Empty description for task: {id}" | +| Status enum | status in {pending, completed, failed, skipped} | "Invalid status: {status}" | +| GC round limit | gc_rounds <= 3 | "GC round limit exceeded" | diff --git a/.codex/skills/team-testing/SKILL.md b/.codex/skills/team-testing/SKILL.md new file mode 100644 index 00000000..d1595ca7 --- /dev/null +++ b/.codex/skills/team-testing/SKILL.md @@ -0,0 +1,732 @@ +--- +name: team-testing +description: Multi-agent test pipeline with progressive layer coverage (L1/L2/L3), Generator-Critic loops for coverage convergence, and shared defect memory. Strategist -> Generator -> Executor -> Analyst with dynamic pipeline selection. +argument-hint: "[-y|--yes] [-c|--concurrency N] [--continue] \"task description or scope\"" +allowed-tools: spawn_agents_on_csv, spawn_agent, wait, send_input, close_agent, Read, Write, Edit, Bash, Glob, Grep, AskUserQuestion +--- + +## Auto Mode + +When `--yes` or `-y`: Auto-confirm task decomposition, skip interactive validation, use defaults. + +# Team Testing + +## Usage + +```bash +$team-testing "Generate tests for the authentication module" +$team-testing -c 4 "Progressive testing for recent changes with L1+L2 coverage" +$team-testing -y "Test all changed files since last commit" +$team-testing --continue "tst-auth-module-20260308" +``` + +**Flags**: +- `-y, --yes`: Skip all confirmations (auto mode) +- `-c, --concurrency N`: Max concurrent agents within each wave (default: 3) +- `--continue`: Resume existing session + +**Output Directory**: `.workflow/.csv-wave/{session-id}/` +**Core Output**: `tasks.csv` (master state) + `results.csv` (final) + `discoveries.ndjson` (shared exploration) + `context.md` (human-readable report) + +--- + +## Overview + +Orchestrate multi-agent test pipeline: strategist -> generator -> executor -> analyst. Progressive layer coverage (L1 unit / L2 integration / L3 E2E) with Generator-Critic (GC) loops for coverage convergence. Dynamic pipeline selection based on change scope (targeted / standard / comprehensive). + +**Execution Model**: Hybrid -- CSV wave pipeline (primary) + individual agent spawn (secondary) + +``` ++-------------------------------------------------------------------+ +| TEAM TESTING WORKFLOW | ++-------------------------------------------------------------------+ +| | +| Phase 0: Pre-Wave Interactive (Requirement Clarification) | +| +- Parse task description, detect change scope | +| +- Select pipeline (targeted/standard/comprehensive) | +| +- Output: refined requirements for decomposition | +| | +| Phase 1: Requirement -> CSV + Classification | +| +- Analyze git diff for changed files | +| +- Map files to test layers (L1/L2/L3) | +| +- Build dependency chain with GC loop tasks | +| +- Classify tasks: csv-wave | interactive (exec_mode) | +| +- Compute dependency waves (topological sort) | +| +- Generate tasks.csv with wave + exec_mode columns | +| +- User validates task breakdown (skip if -y) | +| | +| Phase 2: Wave Execution Engine (Extended) | +| +- For each wave (1..N): | +| | +- Execute pre-wave interactive tasks (if any) | +| | +- Build wave CSV (filter csv-wave tasks for this wave) | +| | +- Inject previous findings into prev_context column | +| | +- spawn_agents_on_csv(wave CSV) | +| | +- Execute post-wave interactive tasks (if any) | +| | +- Merge all results into master tasks.csv | +| | +- GC Loop Check: coverage < target? -> spawn fix tasks | +| | +- Check: any failed? -> skip dependents | +| +- discoveries.ndjson shared across all modes (append-only) | +| | +| Phase 3: Post-Wave Interactive (Completion Action) | +| +- Pipeline completion report with coverage metrics | +| +- Interactive completion choice (Archive/Keep/Deepen) | +| +- Final aggregation / report | +| | +| Phase 4: Results Aggregation | +| +- Export final results.csv | +| +- Generate context.md with all findings | +| +- Display summary: completed/failed/skipped per wave | +| +- Offer: view results | retry failed | done | +| | ++-------------------------------------------------------------------+ +``` + +--- + +## Task Classification Rules + +Each task is classified by `exec_mode`: + +| exec_mode | Mechanism | Criteria | +|-----------|-----------|----------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot, structured I/O, no multi-round interaction | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round, needs iterative fix-verify cycles | + +**Classification Decision**: + +| Task Property | Classification | +|---------------|---------------| +| Strategy formulation (single-pass analysis) | `csv-wave` | +| Test generation (single-pass code creation) | `csv-wave` | +| Test execution with auto-fix cycle | `interactive` | +| Quality analysis (single-pass report) | `csv-wave` | +| GC loop fix-verify iteration | `interactive` | +| Coverage gate decision (coordinator) | `interactive` | + +--- + +## CSV Schema + +### tasks.csv (Master State) + +```csv +id,title,description,role,layer,coverage_target,deps,context_from,exec_mode,wave,status,findings,pass_rate,coverage_achieved,test_files,error +"STRATEGY-001","Analyze changes and define test strategy","Analyze git diff, detect test framework, determine test layers, define coverage targets, formulate prioritized test strategy","strategist","","","","","csv-wave","1","pending","","","","","" +"TESTGEN-001","Generate L1 unit tests","Generate L1 unit tests for priority files based on test strategy. Follow project test conventions, include happy path, edge cases, error handling","generator","L1","80","STRATEGY-001","STRATEGY-001","csv-wave","2","pending","","","","","" +"TESTRUN-001","Execute L1 tests and collect coverage","Run L1 test suite, collect coverage data, auto-fix failures up to 3 iterations. Report pass rate and coverage percentage","executor","L1","80","TESTGEN-001","TESTGEN-001","interactive","3","pending","","","","","" +``` + +**Columns**: + +| Column | Phase | Description | +|--------|-------|-------------| +| `id` | Input | Unique task identifier (PREFIX-NNN format) | +| `title` | Input | Short task title | +| `description` | Input | Detailed task description (self-contained) | +| `role` | Input | Worker role: `strategist`, `generator`, `executor`, `analyst` | +| `layer` | Input | Test layer: `L1`, `L2`, `L3`, or empty for non-layer tasks | +| `coverage_target` | Input | Target coverage percentage for this layer (empty if N/A) | +| `deps` | Input | Semicolon-separated dependency task IDs | +| `context_from` | Input | Semicolon-separated task IDs whose findings this task needs | +| `exec_mode` | Input | `csv-wave` or `interactive` | +| `wave` | Computed | Wave number (computed by topological sort, 1-based) | +| `status` | Output | `pending` -> `completed` / `failed` / `skipped` | +| `findings` | Output | Key discoveries or implementation notes (max 500 chars) | +| `pass_rate` | Output | Test pass rate as decimal (e.g., "0.95") | +| `coverage_achieved` | Output | Actual coverage percentage achieved | +| `test_files` | Output | Semicolon-separated paths of test files produced | +| `error` | Output | Error message if failed (empty if success) | + +### Per-Wave CSV (Temporary) + +Each wave generates a temporary `wave-{N}.csv` with extra `prev_context` column (csv-wave tasks only). + +--- + +## Agent Registry (Interactive Agents) + +| Agent | Role File | Pattern | Responsibility | Position | +|-------|-----------|---------|----------------|----------| +| Test Executor | agents/executor.md | 2.3 (send_input cycle) | Execute tests with iterative fix cycle, report pass rate and coverage | per-wave | +| GC Loop Handler | agents/gc-loop-handler.md | 2.3 (send_input cycle) | Manage Generator-Critic loop: evaluate coverage, trigger fix rounds | post-wave | + +> **COMPACT PROTECTION**: Agent files are execution documents. When context compression occurs, **you MUST immediately `Read` the corresponding agent.md** to reload. + +--- + +## Output Artifacts + +| File | Purpose | Lifecycle | +|------|---------|-----------| +| `tasks.csv` | Master state -- all tasks with status/findings | Updated after each wave | +| `wave-{N}.csv` | Per-wave input (temporary, csv-wave tasks only) | Created before wave, deleted after | +| `results.csv` | Final export of all task results | Created in Phase 4 | +| `discoveries.ndjson` | Shared exploration board (all agents, both modes) | Append-only, carries across waves | +| `context.md` | Human-readable execution report | Created in Phase 4 | +| `strategy/test-strategy.md` | Strategist output: test strategy document | Created in wave 1 | +| `tests/L1-unit/` | Generator output: L1 unit test files | Created in L1 wave | +| `tests/L2-integration/` | Generator output: L2 integration test files | Created in L2 wave | +| `tests/L3-e2e/` | Generator output: L3 E2E test files | Created in L3 wave | +| `results/run-{layer}.json` | Executor output: per-layer test results | Created per execution | +| `analysis/quality-report.md` | Analyst output: quality analysis report | Created in final wave | +| `interactive/{id}-result.json` | Results from interactive tasks | Created per interactive task | + +--- + +## Session Structure + +``` +.workflow/.csv-wave/{session-id}/ ++-- tasks.csv # Master state (all tasks, both modes) ++-- results.csv # Final results export ++-- discoveries.ndjson # Shared discovery board (all agents) ++-- context.md # Human-readable report ++-- wave-{N}.csv # Temporary per-wave input (csv-wave only) ++-- strategy/ # Strategist output +| +-- test-strategy.md ++-- tests/ # Generator output +| +-- L1-unit/ +| +-- L2-integration/ +| +-- L3-e2e/ ++-- results/ # Executor output +| +-- run-L1.json +| +-- run-L2.json +| +-- run-L3.json ++-- analysis/ # Analyst output +| +-- quality-report.md ++-- wisdom/ # Cross-task knowledge +| +-- learnings.md +| +-- conventions.md +| +-- decisions.md ++-- interactive/ # Interactive task artifacts +| +-- {id}-result.json ++-- gc-state.json # GC loop tracking state +``` + +--- + +## Implementation + +### Session Initialization + +```javascript +const getUtc8ISOString = () => new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString() + +const AUTO_YES = $ARGUMENTS.includes('--yes') || $ARGUMENTS.includes('-y') +const continueMode = $ARGUMENTS.includes('--continue') +const concurrencyMatch = $ARGUMENTS.match(/(?:--concurrency|-c)\s+(\d+)/) +const maxConcurrency = concurrencyMatch ? parseInt(concurrencyMatch[1]) : 3 + +const requirement = $ARGUMENTS + .replace(/--yes|-y|--continue|--concurrency\s+\d+|-c\s+\d+/g, '') + .trim() + +const slug = requirement.toLowerCase() + .replace(/[^a-z0-9\u4e00-\u9fa5]+/g, '-') + .substring(0, 40) +const dateStr = getUtc8ISOString().substring(0, 10).replace(/-/g, '') +const sessionId = `tst-${slug}-${dateStr}` +const sessionFolder = `.workflow/.csv-wave/${sessionId}` + +Bash(`mkdir -p ${sessionFolder}/strategy ${sessionFolder}/tests/L1-unit ${sessionFolder}/tests/L2-integration ${sessionFolder}/tests/L3-e2e ${sessionFolder}/results ${sessionFolder}/analysis ${sessionFolder}/wisdom ${sessionFolder}/interactive`) + +// Initialize discoveries.ndjson +Write(`${sessionFolder}/discoveries.ndjson`, '') + +// Initialize wisdom files +Write(`${sessionFolder}/wisdom/learnings.md`, '# Learnings\n') +Write(`${sessionFolder}/wisdom/conventions.md`, '# Conventions\n') +Write(`${sessionFolder}/wisdom/decisions.md`, '# Decisions\n') + +// Initialize GC state +Write(`${sessionFolder}/gc-state.json`, JSON.stringify({ + rounds: {}, coverage_history: [], max_rounds_per_layer: 3 +}, null, 2)) +``` + +--- + +### Phase 0: Pre-Wave Interactive (Requirement Clarification) + +**Objective**: Parse task description, analyze change scope, select pipeline mode. + +**Workflow**: + +1. **Parse user task description** from $ARGUMENTS + +2. **Check for existing sessions** (continue mode): + - Scan `.workflow/.csv-wave/tst-*/tasks.csv` for sessions with pending tasks + - If `--continue`: resume the specified or most recent session, skip to Phase 2 + - If active session found: ask user whether to resume or start new + +3. **Analyze change scope**: + ```bash + git diff --name-only HEAD~1 2>/dev/null || git diff --name-only --cached + ``` + +4. **Select pipeline**: + + | Condition | Pipeline | Stages | + |-----------|----------|--------| + | fileCount <= 3 AND moduleCount <= 1 | targeted | strategy -> gen-L1 -> run-L1 | + | fileCount <= 10 AND moduleCount <= 3 | standard | strategy -> gen-L1 -> run-L1 -> gen-L2 -> run-L2 -> analysis | + | Otherwise | comprehensive | strategy -> [gen-L1 // gen-L2] -> [run-L1 // run-L2] -> gen-L3 -> run-L3 -> analysis | + +5. **Clarify if ambiguous** (skip if AUTO_YES): + ```javascript + AskUserQuestion({ + questions: [{ + question: "Detected scope suggests the '" + pipeline + "' pipeline. Confirm?", + header: "Pipeline Selection", + multiSelect: false, + options: [ + { label: "Proceed with " + pipeline, description: "Detected pipeline is appropriate" }, + { label: "Use targeted", description: "Minimal: L1 only" }, + { label: "Use standard", description: "Progressive: L1 + L2 + analysis" }, + { label: "Use comprehensive", description: "Full: L1 + L2 + L3 + analysis" } + ] + }] + }) + ``` + +6. **Output**: Refined requirement, pipeline mode, changed file list + +**Success Criteria**: +- Pipeline mode selected +- Changed files identified +- Refined requirements available for Phase 1 decomposition + +--- + +### Phase 1: Requirement -> CSV + Classification + +**Objective**: Decompose testing task into dependency-ordered CSV tasks with wave assignments. + +**Decomposition Rules**: + +1. **Detect test framework** from project files: + + | Signal File | Framework | + |-------------|-----------| + | vitest.config.ts/js | Vitest | + | jest.config.js/ts | Jest | + | pytest.ini / pyproject.toml | Pytest | + | No detection | Default to Jest | + +2. **Build pipeline task chain** from selected pipeline: + + | Pipeline | Task Chain | + |----------|------------| + | targeted | STRATEGY-001 -> TESTGEN-001 -> TESTRUN-001 | + | standard | STRATEGY-001 -> TESTGEN-001 -> TESTRUN-001 -> TESTGEN-002 -> TESTRUN-002 -> TESTANA-001 | + | comprehensive | STRATEGY-001 -> [TESTGEN-001, TESTGEN-002] -> [TESTRUN-001, TESTRUN-002] -> TESTGEN-003 -> TESTRUN-003 -> TESTANA-001 | + +3. **Assign roles, layers, and coverage targets** per task + +4. **Assign exec_mode**: + - Strategist, Generator, Analyst tasks: `csv-wave` (single-pass) + - Executor tasks: `interactive` (iterative fix cycle) + +**Classification Rules**: + +| Task Property | exec_mode | +|---------------|-----------| +| Strategy analysis (single-pass read + write) | `csv-wave` | +| Test code generation (single-pass write) | `csv-wave` | +| Test execution with fix loop (multi-round) | `interactive` | +| Quality analysis (single-pass read + write) | `csv-wave` | + +**Wave Computation**: Kahn's BFS topological sort with depth tracking. + +**User Validation**: Display task breakdown with wave + exec_mode + layer assignment (skip if AUTO_YES). + +**Success Criteria**: +- tasks.csv created with valid schema, wave, and exec_mode assignments +- No circular dependencies +- User approved (or AUTO_YES) + +--- + +### Phase 2: Wave Execution Engine (Extended) + +**Objective**: Execute tasks wave-by-wave with hybrid mechanism support, GC loop handling, and cross-wave context propagation. + +```javascript +const masterCsv = Read(`${sessionFolder}/tasks.csv`) +let tasks = parseCsv(masterCsv) +const maxWave = Math.max(...tasks.map(t => t.wave)) + +for (let wave = 1; wave <= maxWave; wave++) { + console.log(`\nWave ${wave}/${maxWave}`) + + // 1. Separate tasks by exec_mode + const waveTasks = tasks.filter(t => t.wave === wave && t.status === 'pending') + const csvTasks = waveTasks.filter(t => t.exec_mode === 'csv-wave') + const interactiveTasks = waveTasks.filter(t => t.exec_mode === 'interactive') + + // 2. Check dependencies -- skip tasks whose deps failed + for (const task of waveTasks) { + const depIds = (task.deps || '').split(';').filter(Boolean) + const depStatuses = depIds.map(id => tasks.find(t => t.id === id)?.status) + if (depStatuses.some(s => s === 'failed' || s === 'skipped')) { + task.status = 'skipped' + task.error = `Dependency failed: ${depIds.filter((id, i) => + ['failed','skipped'].includes(depStatuses[i])).join(', ')}` + } + } + + // 3. Execute csv-wave tasks + const pendingCsvTasks = csvTasks.filter(t => t.status === 'pending') + if (pendingCsvTasks.length > 0) { + for (const task of pendingCsvTasks) { + task.prev_context = buildPrevContext(task, tasks) + } + + Write(`${sessionFolder}/wave-${wave}.csv`, toCsv(pendingCsvTasks)) + + // Read instruction template + Read(`instructions/agent-instruction.md`) + + // Build instruction with session folder baked in + const instruction = buildTestingInstruction(sessionFolder, wave) + + spawn_agents_on_csv({ + csv_path: `${sessionFolder}/wave-${wave}.csv`, + id_column: "id", + instruction: instruction, + max_concurrency: maxConcurrency, + max_runtime_seconds: 900, + output_csv_path: `${sessionFolder}/wave-${wave}-results.csv`, + output_schema: { + type: "object", + properties: { + id: { type: "string" }, + status: { type: "string", enum: ["completed", "failed"] }, + findings: { type: "string" }, + pass_rate: { type: "string" }, + coverage_achieved: { type: "string" }, + test_files: { type: "string" }, + error: { type: "string" } + } + } + }) + + // Merge results + const results = parseCsv(Read(`${sessionFolder}/wave-${wave}-results.csv`)) + for (const r of results) { + const t = tasks.find(t => t.id === r.id) + if (t) Object.assign(t, r) + } + } + + // 4. Execute interactive tasks (executor with fix cycle) + const pendingInteractive = interactiveTasks.filter(t => t.status === 'pending') + for (const task of pendingInteractive) { + Read(`agents/executor.md`) + + const prevContext = buildPrevContext(task, tasks) + const agent = spawn_agent({ + message: `## TASK ASSIGNMENT\n\n### MANDATORY FIRST STEPS\n1. Read: agents/executor.md\n2. Read: ${sessionFolder}/discoveries.ndjson\n3. Read: .workflow/project-tech.json (if exists)\n\n---\n\nGoal: ${task.description}\nLayer: ${task.layer}\nCoverage Target: ${task.coverage_target}%\nSession: ${sessionFolder}\n\n### Previous Context\n${prevContext}` + }) + const result = wait({ ids: [agent], timeout_ms: 900000 }) + if (result.timed_out) { + send_input({ id: agent, message: "Please finalize current test results and report." }) + wait({ ids: [agent], timeout_ms: 120000 }) + } + Write(`${sessionFolder}/interactive/${task.id}-result.json`, JSON.stringify({ + task_id: task.id, status: "completed", findings: parseFindings(result), + timestamp: getUtc8ISOString() + })) + close_agent({ id: agent }) + task.status = result.success ? 'completed' : 'failed' + task.findings = parseFindings(result) + } + + // 5. GC Loop Check (after executor completes) + for (const task of pendingInteractive.filter(t => t.role === 'executor')) { + const gcState = JSON.parse(Read(`${sessionFolder}/gc-state.json`)) + const layer = task.layer + const rounds = gcState.rounds[layer] || 0 + const coverageAchieved = parseFloat(task.coverage_achieved || '0') + const coverageTarget = parseFloat(task.coverage_target || '80') + const passRate = parseFloat(task.pass_rate || '0') + + if (coverageAchieved < coverageTarget && passRate < 0.95 && rounds < 3) { + // Trigger GC fix round + gcState.rounds[layer] = rounds + 1 + Write(`${sessionFolder}/gc-state.json`, JSON.stringify(gcState, null, 2)) + + // Insert fix tasks into tasks array for a subsequent micro-wave + // TESTGEN-fix task + TESTRUN-fix task + // These are spawned inline, not added to CSV + Read(`agents/gc-loop-handler.md`) + const gcAgent = spawn_agent({ + message: `## GC LOOP ROUND ${rounds + 1}\n\n### MANDATORY FIRST STEPS\n1. Read: agents/gc-loop-handler.md\n2. Read: ${sessionFolder}/discoveries.ndjson\n\nLayer: ${layer}\nRound: ${rounds + 1}/3\nCurrent Coverage: ${coverageAchieved}%\nTarget: ${coverageTarget}%\nPass Rate: ${passRate}\nSession: ${sessionFolder}\nPrevious Results: ${sessionFolder}/results/run-${layer}.json\nTest Directory: ${sessionFolder}/tests/${layer === 'L1' ? 'L1-unit' : layer === 'L2' ? 'L2-integration' : 'L3-e2e'}/` + }) + const gcResult = wait({ ids: [gcAgent], timeout_ms: 900000 }) + close_agent({ id: gcAgent }) + } + } + + // 6. Update master CSV + Write(`${sessionFolder}/tasks.csv`, toCsv(tasks)) + + // 7. Cleanup temp files + Bash(`rm -f ${sessionFolder}/wave-${wave}.csv ${sessionFolder}/wave-${wave}-results.csv`) + + // 8. Display wave summary + const completed = waveTasks.filter(t => t.status === 'completed').length + const failed = waveTasks.filter(t => t.status === 'failed').length + const skipped = waveTasks.filter(t => t.status === 'skipped').length + console.log(`Wave ${wave} Complete: ${completed} completed, ${failed} failed, ${skipped} skipped`) +} +``` + +**Success Criteria**: +- All waves executed in order +- Both csv-wave and interactive tasks handled per wave +- Each wave's results merged into master CSV before next wave starts +- GC loops triggered when coverage below target (max 3 rounds per layer) +- Dependent tasks skipped when predecessor failed +- discoveries.ndjson accumulated across all waves and mechanisms + +--- + +### Phase 3: Post-Wave Interactive (Completion Action) + +**Objective**: Pipeline completion report with coverage metrics and interactive completion choice. + +```javascript +const tasks = parseCsv(Read(`${sessionFolder}/tasks.csv`)) +const completed = tasks.filter(t => t.status === 'completed') +const failed = tasks.filter(t => t.status === 'failed') +const gcState = JSON.parse(Read(`${sessionFolder}/gc-state.json`)) + +// Coverage summary per layer +const layerSummary = ['L1', 'L2', 'L3'].map(layer => { + const execTask = tasks.find(t => t.role === 'executor' && t.layer === layer && t.status === 'completed') + return execTask ? ` ${layer}: ${execTask.coverage_achieved}% coverage, ${execTask.pass_rate} pass rate` : null +}).filter(Boolean).join('\n') + +console.log(` +============================================ +TESTING PIPELINE COMPLETE + +Deliverables: +${completed.map(t => ` - ${t.id}: ${t.title} (${t.role})`).join('\n')} + +Coverage: +${layerSummary} + +GC Rounds: ${JSON.stringify(gcState.rounds)} +Pipeline: ${completed.length}/${tasks.length} tasks +Session: ${sessionFolder} +============================================ +`) + +if (!AUTO_YES) { + AskUserQuestion({ + questions: [{ + question: "Testing pipeline complete. What would you like to do?", + header: "Completion", + multiSelect: false, + options: [ + { label: "Archive & Clean (Recommended)", description: "Archive session, output final summary" }, + { label: "Keep Active", description: "Keep session for follow-up work" }, + { label: "Deepen Coverage", description: "Add more test layers or increase coverage targets" } + ] + }] + }) +} +``` + +**Success Criteria**: +- Post-wave interactive processing complete +- Coverage metrics displayed +- User informed of results + +--- + +### Phase 4: Results Aggregation + +**Objective**: Generate final results and human-readable report. + +```javascript +// 1. Export results.csv +Bash(`cp ${sessionFolder}/tasks.csv ${sessionFolder}/results.csv`) + +// 2. Generate context.md +const tasks = parseCsv(Read(`${sessionFolder}/tasks.csv`)) +const gcState = JSON.parse(Read(`${sessionFolder}/gc-state.json`)) + +let contextMd = `# Team Testing Report\n\n` +contextMd += `**Session**: ${sessionId}\n` +contextMd += `**Date**: ${getUtc8ISOString().substring(0, 10)}\n\n` + +contextMd += `## Summary\n` +contextMd += `| Status | Count |\n|--------|-------|\n` +contextMd += `| Completed | ${tasks.filter(t => t.status === 'completed').length} |\n` +contextMd += `| Failed | ${tasks.filter(t => t.status === 'failed').length} |\n` +contextMd += `| Skipped | ${tasks.filter(t => t.status === 'skipped').length} |\n\n` + +contextMd += `## Coverage Results\n\n` +contextMd += `| Layer | Coverage | Target | Pass Rate | GC Rounds |\n` +contextMd += `|-------|----------|--------|-----------|----------|\n` +for (const layer of ['L1', 'L2', 'L3']) { + const execTask = tasks.find(t => t.role === 'executor' && t.layer === layer) + if (execTask) { + contextMd += `| ${layer} | ${execTask.coverage_achieved || 'N/A'}% | ${execTask.coverage_target}% | ${execTask.pass_rate || 'N/A'} | ${gcState.rounds[layer] || 0} |\n` + } +} +contextMd += '\n' + +const maxWave = Math.max(...tasks.map(t => t.wave)) +contextMd += `## Wave Execution\n\n` +for (let w = 1; w <= maxWave; w++) { + const waveTasks = tasks.filter(t => t.wave === w) + contextMd += `### Wave ${w}\n\n` + for (const t of waveTasks) { + const icon = t.status === 'completed' ? '[DONE]' : t.status === 'failed' ? '[FAIL]' : '[SKIP]' + contextMd += `${icon} **${t.title}** [${t.role}/${t.layer || '-'}] ${t.findings || ''}\n\n` + } +} + +Write(`${sessionFolder}/context.md`, contextMd) + +console.log(`Results exported to: ${sessionFolder}/results.csv`) +console.log(`Report generated at: ${sessionFolder}/context.md`) +``` + +**Success Criteria**: +- results.csv exported (all tasks, both modes) +- context.md generated with coverage breakdown +- Summary displayed to user + +--- + +## Shared Discovery Board Protocol + +All agents (csv-wave and interactive) share a single `discoveries.ndjson` file for cross-task knowledge exchange. + +**Format**: One JSON object per line (NDJSON): + +```jsonl +{"ts":"2026-03-08T10:00:00Z","worker":"STRATEGY-001","type":"framework_detected","data":{"framework":"vitest","config_file":"vitest.config.ts","test_pattern":"**/*.test.ts"}} +{"ts":"2026-03-08T10:05:00Z","worker":"TESTGEN-001","type":"test_generated","data":{"file":"tests/L1-unit/auth.test.ts","source_file":"src/auth.ts","test_count":8}} +{"ts":"2026-03-08T10:10:00Z","worker":"TESTRUN-001","type":"defect_found","data":{"file":"src/auth.ts","line":42,"pattern":"null_reference","description":"Missing null check on token payload"}} +``` + +**Discovery Types**: + +| Type | Data Schema | Description | +|------|-------------|-------------| +| `framework_detected` | `{framework, config_file, test_pattern}` | Test framework identified | +| `test_generated` | `{file, source_file, test_count}` | Test file created | +| `defect_found` | `{file, line, pattern, description}` | Defect pattern discovered | +| `coverage_gap` | `{file, current, target, gap}` | Coverage gap identified | +| `convention_found` | `{pattern, example_file, description}` | Test convention detected | +| `fix_applied` | `{test_file, fix_type, description}` | Test fix during GC loop | + +**Protocol**: +1. Agents MUST read discoveries.ndjson at start of execution +2. Agents MUST append relevant discoveries during execution +3. Agents MUST NOT modify or delete existing entries +4. Deduplication by `{type, data.file}` key + +--- + +## Pipeline Definitions + +### Targeted Pipeline (3 tasks, serial) + +``` +STRATEGY-001 -> TESTGEN-001 -> TESTRUN-001 +``` + +| Task ID | Role | Layer | Wave | exec_mode | +|---------|------|-------|------|-----------| +| STRATEGY-001 | strategist | - | 1 | csv-wave | +| TESTGEN-001 | generator | L1 | 2 | csv-wave | +| TESTRUN-001 | executor | L1 | 3 | interactive | + +### Standard Pipeline (6 tasks, progressive layers) + +``` +STRATEGY-001 -> TESTGEN-001 -> TESTRUN-001 -> TESTGEN-002 -> TESTRUN-002 -> TESTANA-001 +``` + +| Task ID | Role | Layer | Wave | exec_mode | +|---------|------|-------|------|-----------| +| STRATEGY-001 | strategist | - | 1 | csv-wave | +| TESTGEN-001 | generator | L1 | 2 | csv-wave | +| TESTRUN-001 | executor | L1 | 3 | interactive | +| TESTGEN-002 | generator | L2 | 4 | csv-wave | +| TESTRUN-002 | executor | L2 | 5 | interactive | +| TESTANA-001 | analyst | - | 6 | csv-wave | + +### Comprehensive Pipeline (8 tasks, parallel windows) + +``` +STRATEGY-001 -> [TESTGEN-001 // TESTGEN-002] -> [TESTRUN-001 // TESTRUN-002] -> TESTGEN-003 -> TESTRUN-003 -> TESTANA-001 +``` + +| Task ID | Role | Layer | Wave | exec_mode | +|---------|------|-------|------|-----------| +| STRATEGY-001 | strategist | - | 1 | csv-wave | +| TESTGEN-001 | generator | L1 | 2 | csv-wave | +| TESTGEN-002 | generator | L2 | 2 | csv-wave | +| TESTRUN-001 | executor | L1 | 3 | interactive | +| TESTRUN-002 | executor | L2 | 3 | interactive | +| TESTGEN-003 | generator | L3 | 4 | csv-wave | +| TESTRUN-003 | executor | L3 | 5 | interactive | +| TESTANA-001 | analyst | - | 6 | csv-wave | + +--- + +## GC Loop (Generator-Critic) + +Generator and executor iterate per test layer until coverage converges: + +``` +TESTGEN -> TESTRUN -> (if pass_rate < 0.95 OR coverage < target) -> GC Loop Handler + (if pass_rate >= 0.95 AND coverage >= target) -> next wave +``` + +- Max iterations: 3 per layer +- After 3 iterations: accept current coverage with warning +- GC loop runs as interactive agent (gc-loop-handler.md) which internally generates fixes and re-runs tests + +--- + +## Error Handling + +| Error | Resolution | +|-------|------------| +| Circular dependency | Detect in wave computation, abort with error message | +| CSV agent timeout | Mark as failed in results, continue with wave | +| CSV agent failed | Mark as failed, skip dependent tasks in later waves | +| Interactive agent timeout | Urge convergence via send_input, then close if still timed out | +| Interactive agent failed | Mark as failed, skip dependents | +| All agents in wave failed | Log error, offer retry or abort | +| CSV parse error | Validate CSV format before execution, show line number | +| discoveries.ndjson corrupt | Ignore malformed lines, continue with valid entries | +| GC loop exceeded (3 rounds) | Accept current coverage with warning, proceed to next layer | +| Test framework not detected | Default to Jest patterns | +| No changed files found | Use full project scan with user confirmation | +| Coverage tool unavailable | Degrade to pass rate judgment | +| Continue mode: no session found | List available sessions, prompt user to select | + +--- + +## Core Rules + +1. **Start Immediately**: First action is session initialization, then Phase 0/1 +2. **Wave Order is Sacred**: Never execute wave N before wave N-1 completes and results are merged +3. **CSV is Source of Truth**: Master tasks.csv holds all state (both csv-wave and interactive) +4. **CSV First**: Default to csv-wave for tasks; only use interactive when multi-round interaction is required +5. **Context Propagation**: prev_context built from master CSV, not from memory +6. **Discovery Board is Append-Only**: Never clear, modify, or recreate discoveries.ndjson +7. **Skip on Failure**: If a dependency failed, skip the dependent task +8. **GC Loop Discipline**: Max 3 rounds per layer; never infinite-loop on coverage +9. **Cleanup Temp Files**: Remove wave-{N}.csv after results are merged +10. **DO NOT STOP**: Continuous execution until all waves complete or all remaining tasks are skipped diff --git a/.codex/skills/team-testing/agents/executor.md b/.codex/skills/team-testing/agents/executor.md new file mode 100644 index 00000000..d9b53afb --- /dev/null +++ b/.codex/skills/team-testing/agents/executor.md @@ -0,0 +1,195 @@ +# Test Executor Agent + +Interactive agent that executes test suites, collects coverage, and performs iterative auto-fix cycles. Acts as the Critic in the Generator-Critic loop. + +## Identity + +- **Type**: `interactive` +- **Responsibility**: Validation (test execution with fix cycles) + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Run test suites using the correct framework command +- Collect coverage data from test output or coverage reports +- Attempt auto-fix for failing tests (max 3 iterations per invocation) +- Only modify test files, NEVER modify source code +- Save results to session results directory +- Share defect discoveries to discoveries.ndjson +- Report pass rate and coverage in structured output + +### MUST NOT + +- Skip the MANDATORY FIRST STEPS role loading +- Modify source code (only test files may be changed) +- Use `@ts-ignore`, `as any`, or skip/ignore test annotations +- Exceed 3 fix iterations without reporting current state +- Delete or disable existing passing tests + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | file-read | Load test files, source files, strategy, results | +| `Write` | file-write | Save test results, update test files | +| `Edit` | file-edit | Fix test assertions, imports, mocks | +| `Bash` | shell | Run test commands, collect coverage | +| `Glob` | search | Find test files in session directory | +| `Grep` | search | Find patterns in test output | + +--- + +## Execution + +### Phase 1: Context Loading + +**Objective**: Detect test framework and locate test files. + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Session folder | Yes | Path to session directory | +| Layer | Yes | Target test layer (L1/L2/L3) | +| Coverage target | Yes | Minimum coverage percentage | +| Previous context | No | Findings from generator | + +**Steps**: + +1. Read discoveries.ndjson for framework detection info +2. Determine layer directory: + - L1 -> tests/L1-unit/ + - L2 -> tests/L2-integration/ + - L3 -> tests/L3-e2e/ +3. Find test files in the layer directory +4. Determine test framework command: + +| Framework | Command Template | +|-----------|-----------------| +| vitest | `npx vitest run --coverage --reporter=json ` | +| jest | `npx jest --coverage --json --outputFile= ` | +| pytest | `python -m pytest --cov --cov-report=json -v ` | +| default | `npm test -- --coverage` | + +**Output**: Framework, test command, test file list + +--- + +### Phase 2: Iterative Test-Fix Cycle + +**Objective**: Run tests and fix failures up to 3 iterations. + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Test command | Yes | From Phase 1 | +| Test files | Yes | From Phase 1 | +| Coverage target | Yes | From spawn message | + +**Steps**: + +For each iteration (1..3): + +1. Run test command, capture stdout/stderr +2. Parse results: extract passed/failed counts, parse coverage +3. Evaluate exit condition: + +| Condition | Action | +|-----------|--------| +| All tests pass AND coverage >= target | Exit loop: SUCCESS | +| pass_rate >= 0.95 AND iteration >= 2 | Exit loop: GOOD ENOUGH | +| iteration >= 3 | Exit loop: MAX ITERATIONS | + +4. If not exiting, extract failure details: + - Error messages and stack traces + - Failing test file:line references + - Assertion mismatches + +5. Apply targeted fixes: + - Fix incorrect assertions (expected vs actual swap) + - Fix missing imports or broken module paths + - Fix mock setup issues + - Fix async/await handling + - Do NOT skip tests, do NOT add type suppressions + +6. Share defect discoveries: + ```bash + echo '{"ts":"","worker":"","type":"defect_found","data":{"file":"","line":,"pattern":"","description":""}}' >> /discoveries.ndjson + ``` + +**Output**: Final pass rate, coverage achieved, iteration count + +--- + +### Phase 3: Result Recording + +**Objective**: Save execution results and update state. + +**Steps**: + +1. Build result data: + ```json + { + "layer": "", + "framework": "", + "iterations": , + "pass_rate": , + "coverage": , + "tests_passed": , + "tests_failed": , + "all_passed": , + "defect_patterns": [...] + } + ``` + +2. Save results to `/results/run-.json` +3. Save last test output to `/results/output-.txt` +4. Record effective test patterns (if pass_rate > 0.8): + - Happy path patterns that work + - Edge case patterns that catch bugs + - Error handling patterns + +--- + +## Structured Output Template + +``` +## Summary +- Test execution for : pass rate, % coverage after iterations + +## Findings +- Finding 1: specific test result with file:line reference +- Finding 2: defect pattern discovered + +## Defect Patterns +- Pattern: type, frequency, severity +- Pattern: type, frequency, severity + +## Coverage +- Overall: % +- Target: % +- Gap files: file1 (%), file2 (%) + +## Open Questions +1. Any unresolvable test failures (if any) +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| Test command not found | Try alternative commands (npx, npm test), report if all fail | +| No test files found | Report in findings, status = failed | +| Coverage tool unavailable | Degrade to pass rate only, report in findings | +| All tests timeout | Report with partial results, status = failed | +| Import resolution fails after fix | Report remaining failures, continue with other tests | +| Timeout approaching | Output current findings with "PARTIAL" status | diff --git a/.codex/skills/team-testing/agents/gc-loop-handler.md b/.codex/skills/team-testing/agents/gc-loop-handler.md new file mode 100644 index 00000000..ba9ef641 --- /dev/null +++ b/.codex/skills/team-testing/agents/gc-loop-handler.md @@ -0,0 +1,155 @@ +# GC Loop Handler Agent + +Interactive agent that manages Generator-Critic loop iterations. When coverage is below target after executor completes, this agent generates test fixes and re-runs tests. + +## Identity + +- **Type**: `interactive` +- **Responsibility**: Orchestration (fix-verify cycle within GC loop) + +## Boundaries + +### MUST + +- Read previous execution results to understand failures +- Generate targeted test fixes based on failure details +- Re-run tests after fixes to verify improvement +- Track coverage improvement across iterations +- Only modify test files, NEVER modify source code +- Report final coverage and pass rate +- Share fix discoveries to discoveries.ndjson + +### MUST NOT + +- Skip the MANDATORY FIRST STEPS role loading +- Modify source code (only test files) +- Use `@ts-ignore`, `as any`, or test skip annotations +- Run more than 1 fix-verify cycle per invocation (coordinator manages round count) +- Delete or disable passing tests + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | file-read | Load test results, test files, source files | +| `Write` | file-write | Write fixed test files | +| `Edit` | file-edit | Apply targeted test fixes | +| `Bash` | shell | Run test commands | +| `Glob` | search | Find test files | +| `Grep` | search | Search test output for patterns | + +--- + +## Execution + +### Phase 1: Failure Analysis + +**Objective**: Understand why tests failed or coverage was insufficient. + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Session folder | Yes | Path to session directory | +| Layer | Yes | Target test layer (L1/L2/L3) | +| Round number | Yes | Current GC round (1-3) | +| Previous results | Yes | Path to run-{layer}.json | + +**Steps**: + +1. Read previous execution results from results/run-{layer}.json +2. Read test output from results/output-{layer}.txt +3. Categorize failures: + +| Failure Type | Detection | Fix Strategy | +|--------------|-----------|--------------| +| Assertion mismatch | "expected X, received Y" | Correct expected values | +| Missing import | "Cannot find module" | Fix import paths | +| Null reference | "Cannot read property of null" | Add null guards in tests | +| Async issue | "timeout", "not resolved" | Fix async/await patterns | +| Mock issue | "mock not called" | Fix mock setup/teardown | +| Type error | "Type X is not assignable" | Fix type annotations | + +4. Identify uncovered files from coverage report + +**Output**: Failure categories, fix targets, uncovered areas + +--- + +### Phase 2: Fix Generation + Re-execution + +**Objective**: Apply fixes and verify improvement. + +**Steps**: + +1. For each failing test file: + - Read the test file content + - Apply targeted fixes based on failure category + - Verify fix does not break other tests conceptually + +2. For coverage gaps: + - Read uncovered source files + - Generate additional test cases targeting uncovered paths + - Append to existing test files or create new ones + +3. Re-run test suite with coverage: + ```bash + 2>&1 || true + ``` + +4. Parse new results: pass rate, coverage +5. Calculate improvement delta + +6. Share discoveries: + ```bash + echo '{"ts":"","worker":"gc-loop--R","type":"fix_applied","data":{"test_file":"","fix_type":"","description":""}}' >> /discoveries.ndjson + ``` + +**Output**: Updated pass rate, coverage, improvement delta + +--- + +### Phase 3: Result Update + +**Objective**: Save updated results for coordinator evaluation. + +**Steps**: + +1. Overwrite results/run-{layer}.json with new data +2. Save test output to results/output-{layer}.txt +3. Report improvement delta in findings + +--- + +## Structured Output Template + +``` +## Summary +- GC Loop Round for : coverage % -> % (delta: +%) + +## Fixes Applied +- Fix 1: - - +- Fix 2: - - + +## Coverage Update +- Before: %, After: %, Target: % +- Pass Rate: -> + +## Remaining Issues +- Issue 1: (if any) +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| No previous results found | Report error, cannot proceed without baseline | +| All fixes cause new failures | Revert fixes, report inability to improve | +| Coverage tool unavailable | Use pass rate as proxy metric | +| Timeout approaching | Output partial results with current state | diff --git a/.codex/skills/team-testing/instructions/agent-instruction.md b/.codex/skills/team-testing/instructions/agent-instruction.md new file mode 100644 index 00000000..31494dc4 --- /dev/null +++ b/.codex/skills/team-testing/instructions/agent-instruction.md @@ -0,0 +1,142 @@ +# Agent Instruction Template -- Team Testing + +Base instruction template for CSV wave agents in the testing pipeline. Used by strategist, generator, and analyst roles (csv-wave tasks). + +## Purpose + +| Phase | Usage | +|-------|-------| +| Phase 1 | Coordinator builds instruction from this template with session folder baked in | +| Phase 2 | Injected as `instruction` parameter to `spawn_agents_on_csv` | + +--- + +## Base Instruction Template + +```markdown +## TASK ASSIGNMENT -- Team Testing + +### MANDATORY FIRST STEPS +1. Read shared discoveries: /discoveries.ndjson (if exists, skip if not) +2. Read project context: .workflow/project-tech.json (if exists) +3. Read test strategy: /strategy/test-strategy.md (if exists, skip for strategist) + +--- + +## Your Task + +**Task ID**: {id} +**Title**: {title} +**Role**: {role} +**Layer**: {layer} +**Coverage Target**: {coverage_target}% + +### Task Description +{description} + +### Previous Tasks' Findings (Context) +{prev_context} + +--- + +## Execution Protocol + +### If Role = strategist + +1. **Analyze git diff**: Run `git diff --name-only HEAD~1 2>/dev/null || git diff --name-only --cached` to identify changed files +2. **Detect test framework**: Check for vitest.config.ts, jest.config.js, pytest.ini, pyproject.toml +3. **Scan existing test patterns**: Glob for `**/*.test.*` and `**/*.spec.*` to understand conventions +4. **Formulate strategy**: + - Classify changed files by impact (new, modified, deleted, config) + - Determine appropriate test layers (L1/L2/L3) + - Set coverage targets per layer + - Prioritize files for testing + - Document risk assessment +5. **Write strategy**: Save to /strategy/test-strategy.md +6. **Share discoveries**: Append framework detection and conventions to discoveries board: + ```bash + echo '{"ts":"","worker":"{id}","type":"framework_detected","data":{"framework":"","config_file":"","test_pattern":""}}' >> /discoveries.ndjson + ``` + +### If Role = generator + +1. **Read strategy**: Load /strategy/test-strategy.md for layer config and priority files +2. **Read source files**: Load files listed in strategy for the target layer +3. **Learn test patterns**: Find 3 existing test files to understand conventions (imports, structure, naming) +4. **Generate tests**: For each priority source file: + - Determine test file path following project conventions + - Generate test cases: happy path, edge cases, error handling + - Use proper test framework API (describe/it/test/expect) + - Include proper imports and mocks +5. **Write test files**: Save to /tests// + - L1 -> tests/L1-unit/ + - L2 -> tests/L2-integration/ + - L3 -> tests/L3-e2e/ +6. **Syntax check**: Run `tsc --noEmit` or equivalent to verify syntax +7. **Share discoveries**: Append test generation info to discoveries board: + ```bash + echo '{"ts":"","worker":"{id}","type":"test_generated","data":{"file":"","source_file":"","test_count":}}' >> /discoveries.ndjson + ``` + +### If Role = analyst + +1. **Read all results**: Load /results/run-*.json for execution data +2. **Read strategy**: Load /strategy/test-strategy.md +3. **Read discoveries**: Parse /discoveries.ndjson for defect patterns +4. **Analyze coverage**: Compare achieved vs target per layer +5. **Analyze defect patterns**: Group by type/frequency, assign severity +6. **Assess GC effectiveness**: Review improvement across rounds +7. **Calculate quality score** (0-100): + - Coverage achievement: 30% weight + - Test effectiveness: 25% weight + - Defect detection: 25% weight + - GC loop efficiency: 20% weight +8. **Generate report**: Write comprehensive analysis to /analysis/quality-report.md +9. **Share discoveries**: Append analysis findings to discoveries board + +--- + +## Output (report_agent_job_result) + +Return JSON: +{ + "id": "{id}", + "status": "completed" | "failed", + "findings": "Key discoveries and implementation notes (max 500 chars)", + "pass_rate": "test pass rate as decimal (empty for non-executor tasks)", + "coverage_achieved": "actual coverage percentage (empty for non-executor tasks)", + "test_files": "semicolon-separated paths of test files (empty for non-generator tasks)", + "error": "" +} +``` + +--- + +## Quality Requirements + +All agents must verify before reporting complete: + +| Requirement | Criteria | +|-------------|----------| +| Strategy written | Verify test-strategy.md exists (strategist) | +| Tests generated | Verify test files exist in correct layer dir (generator) | +| Syntax clean | No compilation errors in generated tests (generator) | +| Report written | Verify quality-report.md exists (analyst) | +| Findings accuracy | Findings reflect actual work done | +| Discovery sharing | At least 1 discovery shared to board | +| Error reporting | Non-empty error field if status is failed | + +--- + +## Placeholder Reference + +| Placeholder | Resolved By | When | +|-------------|------------|------| +| `` | Skill designer (Phase 1) | Literal path baked into instruction | +| `{id}` | spawn_agents_on_csv | Runtime from CSV row | +| `{title}` | spawn_agents_on_csv | Runtime from CSV row | +| `{description}` | spawn_agents_on_csv | Runtime from CSV row | +| `{role}` | spawn_agents_on_csv | Runtime from CSV row | +| `{layer}` | spawn_agents_on_csv | Runtime from CSV row | +| `{coverage_target}` | spawn_agents_on_csv | Runtime from CSV row | +| `{prev_context}` | spawn_agents_on_csv | Runtime from CSV row | diff --git a/.codex/skills/team-testing/schemas/tasks-schema.md b/.codex/skills/team-testing/schemas/tasks-schema.md new file mode 100644 index 00000000..7ac8ad60 --- /dev/null +++ b/.codex/skills/team-testing/schemas/tasks-schema.md @@ -0,0 +1,172 @@ +# Team Testing -- CSV Schema + +## Master CSV: tasks.csv + +### Column Definitions + +#### Input Columns (Set by Decomposer) + +| Column | Type | Required | Description | Example | +|--------|------|----------|-------------|---------| +| `id` | string | Yes | Unique task identifier (PREFIX-NNN) | `"STRATEGY-001"` | +| `title` | string | Yes | Short task title | `"Analyze changes and define test strategy"` | +| `description` | string | Yes | Detailed task description (self-contained) | `"Analyze git diff, detect framework..."` | +| `role` | enum | Yes | Worker role: `strategist`, `generator`, `executor`, `analyst` | `"generator"` | +| `layer` | string | No | Test layer: `L1`, `L2`, `L3`, or empty | `"L1"` | +| `coverage_target` | string | No | Target coverage percentage for this layer | `"80"` | +| `deps` | string | No | Semicolon-separated dependency task IDs | `"STRATEGY-001"` | +| `context_from` | string | No | Semicolon-separated task IDs for context | `"STRATEGY-001"` | +| `exec_mode` | enum | Yes | Execution mechanism: `csv-wave` or `interactive` | `"csv-wave"` | + +#### Computed Columns (Set by Wave Engine) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `wave` | integer | Wave number (1-based, from topological sort) | `2` | +| `prev_context` | string | Aggregated findings from context_from tasks (per-wave CSV only) | `"[STRATEGY-001] Detected vitest, L1 target 80%..."` | + +#### Output Columns (Set by Agent) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `status` | enum | `pending` -> `completed` / `failed` / `skipped` | `"completed"` | +| `findings` | string | Key discoveries (max 500 chars) | `"Generated 5 test files covering auth module..."` | +| `pass_rate` | string | Test pass rate as decimal | `"0.95"` | +| `coverage_achieved` | string | Actual coverage percentage achieved | `"82"` | +| `test_files` | string | Semicolon-separated paths of test files | `"tests/L1-unit/auth.test.ts;tests/L1-unit/user.test.ts"` | +| `error` | string | Error message if failed | `""` | + +--- + +### exec_mode Values + +| Value | Mechanism | Description | +|-------|-----------|-------------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot batch execution within wave | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round individual execution (executor fix cycles) | + +Interactive tasks appear in master CSV for dependency tracking but are NOT included in wave-{N}.csv files. + +--- + +### Role Prefixes + +| Role | Prefix | Responsibility Type | +|------|--------|---------------------| +| strategist | STRATEGY | read-only analysis | +| generator | TESTGEN | code-gen (test files) | +| executor | TESTRUN | validation (run + fix) | +| analyst | TESTANA | read-only analysis | + +--- + +### Example Data + +```csv +id,title,description,role,layer,coverage_target,deps,context_from,exec_mode,wave,status,findings,pass_rate,coverage_achieved,test_files,error +"STRATEGY-001","Analyze changes and define test strategy","Analyze git diff for changed files. Detect test framework (vitest/jest/pytest). Determine test layers needed (L1/L2/L3). Define coverage targets per layer. Generate prioritized test strategy document at /strategy/test-strategy.md","strategist","","","","","csv-wave","1","pending","","","","","" +"TESTGEN-001","Generate L1 unit tests","Generate L1 unit tests for priority files from strategy. Read source files, identify exports, generate test cases covering happy path, edge cases, error handling. Write tests to /tests/L1-unit/. Follow project test conventions.","generator","L1","80","STRATEGY-001","STRATEGY-001","csv-wave","2","pending","","","","","" +"TESTRUN-001","Execute L1 tests and collect coverage","Run L1 test suite with coverage collection. Parse results for pass rate and coverage. If pass_rate < 0.95 or coverage < 80%, attempt auto-fix (max 3 iterations). Save results to /results/run-L1.json","executor","L1","80","TESTGEN-001","TESTGEN-001","interactive","3","pending","","","","","" +"TESTGEN-002","Generate L2 integration tests","Generate L2 integration tests based on L1 results and strategy. Focus on module interaction points. Write tests to /tests/L2-integration/.","generator","L2","60","TESTRUN-001","TESTRUN-001","csv-wave","4","pending","","","","","" +"TESTRUN-002","Execute L2 tests and collect coverage","Run L2 integration test suite with coverage. Auto-fix up to 3 iterations. Save results to /results/run-L2.json","executor","L2","60","TESTGEN-002","TESTGEN-002","interactive","5","pending","","","","","" +"TESTANA-001","Quality analysis report","Analyze defect patterns, coverage gaps, GC loop effectiveness. Generate quality report with score and recommendations. Write to /analysis/quality-report.md","analyst","","","TESTRUN-002","TESTRUN-001;TESTRUN-002","csv-wave","6","pending","","","","","" +``` + +--- + +### Column Lifecycle + +``` +Decomposer (Phase 1) Wave Engine (Phase 2) Agent (Execution) +--------------------- -------------------- ----------------- +id ----------> id ----------> id +title ----------> title ----------> (reads) +description ----------> description ----------> (reads) +role ----------> role ----------> (reads) +layer ----------> layer ----------> (reads) +coverage_target -------> coverage_target -------> (reads) +deps ----------> deps ----------> (reads) +context_from----------> context_from----------> (reads) +exec_mode ----------> exec_mode ----------> (reads) + wave ----------> (reads) + prev_context ----------> (reads) + status + findings + pass_rate + coverage_achieved + test_files + error +``` + +--- + +## Output Schema (JSON) + +Agent output via `report_agent_job_result` (csv-wave tasks): + +```json +{ + "id": "TESTGEN-001", + "status": "completed", + "findings": "Generated 5 L1 unit test files covering auth, user, and session modules. Total 24 test cases: 15 happy path, 6 edge cases, 3 error handling.", + "pass_rate": "", + "coverage_achieved": "", + "test_files": "tests/L1-unit/auth.test.ts;tests/L1-unit/user.test.ts;tests/L1-unit/session.test.ts", + "error": "" +} +``` + +Interactive tasks output via structured text or JSON written to `interactive/{id}-result.json`. + +--- + +## Discovery Types + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `framework_detected` | `data.framework` | `{framework, config_file, test_pattern}` | Test framework identified | +| `test_generated` | `data.file` | `{file, source_file, test_count}` | Test file created | +| `defect_found` | `data.file+data.line` | `{file, line, pattern, description}` | Defect pattern discovered | +| `coverage_gap` | `data.file` | `{file, current, target, gap}` | Coverage gap identified | +| `convention_found` | `data.pattern` | `{pattern, example_file, description}` | Test convention detected | +| `fix_applied` | `data.test_file+data.fix_type` | `{test_file, fix_type, description}` | Test fix during GC loop | + +### Discovery NDJSON Format + +```jsonl +{"ts":"2026-03-08T10:00:00Z","worker":"STRATEGY-001","type":"framework_detected","data":{"framework":"vitest","config_file":"vitest.config.ts","test_pattern":"**/*.test.ts"}} +{"ts":"2026-03-08T10:05:00Z","worker":"TESTGEN-001","type":"test_generated","data":{"file":"tests/L1-unit/auth.test.ts","source_file":"src/auth.ts","test_count":8}} +{"ts":"2026-03-08T10:10:00Z","worker":"TESTRUN-001","type":"defect_found","data":{"file":"src/auth.ts","line":42,"pattern":"null_reference","description":"Missing null check on token payload"}} +{"ts":"2026-03-08T10:12:00Z","worker":"TESTRUN-001","type":"fix_applied","data":{"test_file":"tests/L1-unit/auth.test.ts","fix_type":"assertion_fix","description":"Fixed expected return type assertion"}} +``` + +> Both csv-wave and interactive agents read/write the same discoveries.ndjson file. + +--- + +## Cross-Mechanism Context Flow + +| Source | Target | Mechanism | +|--------|--------|-----------| +| CSV task findings | Interactive task | Injected via spawn message | +| Interactive task result | CSV task prev_context | Read from interactive/{id}-result.json | +| Any agent discovery | Any agent | Shared via discoveries.ndjson | +| Executor coverage data | GC loop handler | Read from results/run-{layer}.json | + +--- + +## Validation Rules + +| Rule | Check | Error | +|------|-------|-------| +| Unique IDs | No duplicate `id` values | "Duplicate task ID: {id}" | +| Valid deps | All dep IDs exist in tasks | "Unknown dependency: {dep_id}" | +| No self-deps | Task cannot depend on itself | "Self-dependency: {id}" | +| No circular deps | Topological sort completes | "Circular dependency detected involving: {ids}" | +| context_from valid | All context IDs exist and in earlier waves | "Invalid context_from: {id}" | +| exec_mode valid | Value is `csv-wave` or `interactive` | "Invalid exec_mode: {value}" | +| Description non-empty | Every task has description | "Empty description for task: {id}" | +| Status enum | status in {pending, completed, failed, skipped} | "Invalid status: {status}" | +| Role valid | role in {strategist, generator, executor, analyst} | "Invalid role: {role}" | +| Layer valid | layer in {L1, L2, L3, ""} | "Invalid layer: {layer}" | +| Coverage target valid | If layer present, coverage_target is numeric | "Invalid coverage target: {value}" | diff --git a/.codex/skills/team-uidesign/SKILL.md b/.codex/skills/team-uidesign/SKILL.md new file mode 100644 index 00000000..1c74b8da --- /dev/null +++ b/.codex/skills/team-uidesign/SKILL.md @@ -0,0 +1,729 @@ +--- +name: team-uidesign +description: UI design team pipeline. Research existing design system, generate design tokens (W3C format), audit quality, and implement code. CSV wave pipeline with GC loop (designer <-> reviewer) and dual-track parallel support. +argument-hint: "[-y|--yes] [-c|--concurrency N] [--continue] \"UI design task description\"" +allowed-tools: spawn_agents_on_csv, spawn_agent, wait, send_input, close_agent, Read, Write, Edit, Bash, Glob, Grep, AskUserQuestion +--- + +## Auto Mode + +When `--yes` or `-y`: Auto-confirm task decomposition, skip interactive validation, use defaults for scope/industry/constraints. + +# Team UI Design + +## Usage + +```bash +$team-uidesign "Design a button component with tokens and accessibility" +$team-uidesign -c 3 "Create a complete design system for our SaaS dashboard" +$team-uidesign -y "Full design system redesign for healthcare portal" +$team-uidesign --continue "uds-saas-dashboard-20260308" +``` + +**Flags**: +- `-y, --yes`: Skip all confirmations (auto mode) +- `-c, --concurrency N`: Max concurrent agents within each wave (default: 3) +- `--continue`: Resume existing session + +**Output Directory**: `.workflow/.csv-wave/{session-id}/` +**Core Output**: `tasks.csv` (master state) + `results.csv` (final) + `discoveries.ndjson` (shared exploration) + `context.md` (human-readable report) + +--- + +## Overview + +Systematic UI design pipeline: research existing design system, generate design tokens (W3C Design Tokens Format), audit for quality/accessibility, and implement production code. Roles: researcher, designer, reviewer, implementer -- dynamically assigned as CSV wave tasks with dependency ordering. Supports component (4-task), system (7-task), and full-system (8-task) pipeline modes. Designer <-> Reviewer Generator-Critic loop with max 2 rounds. + +**Execution Model**: Hybrid -- CSV wave pipeline (primary) + individual agent spawn (secondary) + +``` ++-------------------------------------------------------------------+ +| TEAM UI DESIGN WORKFLOW | ++-------------------------------------------------------------------+ +| | +| Phase 0: Pre-Wave Interactive (Requirement Clarification) | +| +- Parse UI design task description | +| +- Select scope (component/system/full-system), industry | +| +- Output: refined requirements for decomposition | +| | +| Phase 1: Requirement -> CSV + Classification | +| +- Signal detection: keyword scan -> pipeline inference | +| +- Pipeline selection (component/system/full-system) | +| +- Dependency graph from pipeline definition | +| +- Classify tasks: csv-wave | interactive (exec_mode) | +| +- Compute dependency waves (topological sort) | +| +- Generate tasks.csv with wave + exec_mode columns | +| +- User validates task breakdown (skip if -y) | +| | +| Phase 2: Wave Execution Engine (Extended) | +| +- For each wave (1..N): | +| | +- Execute pre-wave interactive tasks (if any) | +| | +- Build wave CSV (filter csv-wave tasks for this wave) | +| | +- Inject previous findings into prev_context column | +| | +- spawn_agents_on_csv(wave CSV) | +| | +- Execute post-wave interactive tasks (if any) | +| | +- Merge all results into master tasks.csv | +| | +- Check: any failed? -> skip dependents | +| | +- GC Loop: if audit fails, create DESIGN-fix + AUDIT-re | +| +- discoveries.ndjson shared across all modes (append-only) | +| | +| Phase 3: Post-Wave Interactive (Completion Action) | +| +- Pipeline completion report with deliverables listing | +| +- Interactive completion choice (Archive/Keep/Export) | +| +- Final aggregation / report | +| | +| Phase 4: Results Aggregation | +| +- Export final results.csv | +| +- Generate context.md with all findings | +| +- Display summary: completed/failed/skipped per wave | +| +- Offer: view results | retry failed | done | +| | ++-------------------------------------------------------------------+ +``` + +--- + +## Task Classification Rules + +Each task is classified by `exec_mode`: + +| exec_mode | Mechanism | Criteria | +|-----------|-----------|----------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot, structured I/O, no multi-round interaction | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round, revision cycles, user approval | + +**Classification Decision**: + +| Task Property | Classification | +|---------------|---------------| +| Design system research (researcher) | `csv-wave` | +| Token system design (designer) | `csv-wave` | +| Component specification (designer) | `csv-wave` | +| 5-dimension audit (reviewer) | `csv-wave` | +| Token/component implementation (implementer) | `csv-wave` | +| GC loop fix revision (designer) | `csv-wave` | +| GC loop escalation (user decision on audit failure) | `interactive` | +| Pipeline completion action | `interactive` | + +--- + +## CSV Schema + +### tasks.csv (Master State) + +```csv +id,title,description,role,pipeline_mode,scope,audit_type,deps,context_from,exec_mode,wave,status,findings,artifacts_produced,audit_score,audit_signal,error +"RESEARCH-001","Design system analysis","PURPOSE: Analyze existing design system...","researcher","component","full","","","","csv-wave","1","pending","","","","","" +"DESIGN-001","Design tokens + component spec","PURPOSE: Define design tokens...","designer","component","tokens","","RESEARCH-001","RESEARCH-001","csv-wave","2","pending","","","","","" +"AUDIT-001","Design audit","PURPOSE: 5-dimension quality audit...","reviewer","component","full","token-audit","DESIGN-001","DESIGN-001","csv-wave","3","pending","","","","","" +``` + +**Columns**: + +| Column | Phase | Description | +|--------|-------|-------------| +| `id` | Input | Unique task identifier (PREFIX-NNN: RESEARCH, DESIGN, AUDIT, BUILD) | +| `title` | Input | Short task title | +| `description` | Input | Detailed task description with PURPOSE/TASK/CONTEXT/EXPECTED/CONSTRAINTS | +| `role` | Input | Role name: `researcher`, `designer`, `reviewer`, `implementer` | +| `pipeline_mode` | Input | Pipeline: `component`, `system`, `full-system` | +| `scope` | Input | Task scope: `full`, `tokens`, `components` | +| `audit_type` | Input | Audit type: `token-audit`, `component-audit`, `final-audit` (empty for non-reviewer) | +| `deps` | Input | Semicolon-separated dependency task IDs | +| `context_from` | Input | Semicolon-separated task IDs whose findings this task needs | +| `exec_mode` | Input | `csv-wave` or `interactive` | +| `wave` | Computed | Wave number (computed by topological sort, 1-based) | +| `status` | Output | `pending` -> `completed` / `failed` / `skipped` | +| `findings` | Output | Key discoveries or implementation notes (max 500 chars) | +| `artifacts_produced` | Output | Semicolon-separated paths of produced artifacts | +| `audit_score` | Output | Audit weighted score (0-10, empty for non-reviewer tasks) | +| `audit_signal` | Output | Audit signal: `audit_passed`, `audit_result`, `fix_required` (empty for non-reviewer) | +| `error` | Output | Error message if failed (empty if success) | + +### Per-Wave CSV (Temporary) + +Each wave generates a temporary `wave-{N}.csv` with extra `prev_context` column (csv-wave tasks only). + +--- + +## Agent Registry (Interactive Agents) + +| Agent | Role File | Pattern | Responsibility | Position | +|-------|-----------|---------|----------------|----------| +| GC Loop Handler | agents/gc-loop-handler.md | 2.3 (send_input cycle) | Handle audit GC loop escalation decisions | post-wave | +| Completion Handler | agents/completion-handler.md | 2.3 (send_input cycle) | Handle pipeline completion action (Archive/Keep/Export) | standalone | + +> **COMPACT PROTECTION**: Agent files are execution documents. When context compression occurs, **you MUST immediately `Read` the corresponding agent.md** to reload. + +--- + +## Output Artifacts + +| File | Purpose | Lifecycle | +|------|---------|-----------| +| `tasks.csv` | Master state -- all tasks with status/findings | Updated after each wave | +| `wave-{N}.csv` | Per-wave input (temporary, csv-wave tasks only) | Created before wave, deleted after | +| `results.csv` | Final export of all task results | Created in Phase 4 | +| `discoveries.ndjson` | Shared exploration board (all agents, both modes) | Append-only, carries across waves | +| `context.md` | Human-readable execution report | Created in Phase 4 | +| `task-analysis.json` | Phase 0/1 output: scope, pipeline, industry | Created in Phase 1 | +| `role-instructions/` | Per-role instruction templates for CSV agents | Created in Phase 1 | +| `artifacts/` | All deliverables: research, design, audit, build artifacts | Created by agents | +| `interactive/{id}-result.json` | Results from interactive tasks | Created per interactive task | + +--- + +## Session Structure + +``` +.workflow/.csv-wave/{session-id}/ ++-- tasks.csv # Master state (all tasks, both modes) ++-- results.csv # Final results export ++-- discoveries.ndjson # Shared discovery board (all agents) ++-- context.md # Human-readable report ++-- task-analysis.json # Phase 1 analysis output ++-- wave-{N}.csv # Temporary per-wave input (csv-wave only) ++-- role-instructions/ # Per-role instruction templates +| +-- researcher.md +| +-- designer.md +| +-- reviewer.md +| +-- implementer.md ++-- artifacts/ # All deliverables +| +-- research/ +| | +-- design-system-analysis.json +| | +-- component-inventory.json +| | +-- accessibility-audit.json +| | +-- design-intelligence.json +| +-- design/ +| | +-- design-tokens.json +| | +-- component-specs/ +| | +-- layout-specs/ +| +-- audit/ +| | +-- audit-001.md +| +-- build/ +| +-- token-files/ +| +-- component-files/ ++-- interactive/ # Interactive task artifacts +| +-- {id}-result.json ++-- wisdom/ # Cross-task knowledge + +-- learnings.md + +-- decisions.md +``` + +--- + +## Implementation + +### Session Initialization + +```javascript +const getUtc8ISOString = () => new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString() + +const AUTO_YES = $ARGUMENTS.includes('--yes') || $ARGUMENTS.includes('-y') +const continueMode = $ARGUMENTS.includes('--continue') +const concurrencyMatch = $ARGUMENTS.match(/(?:--concurrency|-c)\s+(\d+)/) +const maxConcurrency = concurrencyMatch ? parseInt(concurrencyMatch[1]) : 3 + +const requirement = $ARGUMENTS + .replace(/--yes|-y|--continue|--concurrency\s+\d+|-c\s+\d+/g, '') + .trim() + +const slug = requirement.toLowerCase() + .replace(/[^a-z0-9\u4e00-\u9fa5]+/g, '-') + .substring(0, 40) +const dateStr = getUtc8ISOString().substring(0, 10).replace(/-/g, '') +const sessionId = `uds-${slug}-${dateStr}` +const sessionFolder = `.workflow/.csv-wave/${sessionId}` + +Bash(`mkdir -p ${sessionFolder}/artifacts/research ${sessionFolder}/artifacts/design/component-specs ${sessionFolder}/artifacts/design/layout-specs ${sessionFolder}/artifacts/audit ${sessionFolder}/artifacts/build/token-files ${sessionFolder}/artifacts/build/component-files ${sessionFolder}/role-instructions ${sessionFolder}/interactive ${sessionFolder}/wisdom`) + +Write(`${sessionFolder}/discoveries.ndjson`, '') +Write(`${sessionFolder}/wisdom/learnings.md`, '# Learnings\n') +Write(`${sessionFolder}/wisdom/decisions.md`, '# Decisions\n') +``` + +--- + +### Phase 0: Pre-Wave Interactive (Requirement Clarification) + +**Objective**: Parse UI design task, clarify scope/industry/constraints, prepare for decomposition. + +**Workflow**: + +1. **Parse user task description** from $ARGUMENTS + +2. **Check for existing sessions** (continue mode): + - Scan `.workflow/.csv-wave/uds-*/tasks.csv` for sessions with pending tasks + - If `--continue`: resume the specified or most recent session, skip to Phase 2 + - If active session found: ask user whether to resume or start new + +3. **Clarify scope and industry** (skip if AUTO_YES): + + **Scope Selection**: + + | Option | Pipeline | Task Count | + |--------|----------|------------| + | Single component | component | 4 tasks (linear) | + | Component system | system | 7 tasks (dual-track parallel) | + | Full design system | full-system | 8 tasks (dual-track + final audit) | + + **Industry Selection**: + + | Option | Strictness | + |--------|------------| + | SaaS/Tech | standard | + | E-commerce/Retail | standard | + | Healthcare/Finance | strict (extra accessibility) | + | Education/Content | standard | + | Other | standard | + +4. **Signal Detection** for pipeline selection: + + | Signal | Keywords | Pipeline Hint | + |--------|----------|---------------| + | Component | component, button, card, input, modal | component | + | System | design system, token, theme, multiple components | system | + | Full | complete, full, all components, redesign | full-system | + | Accessibility | accessibility, a11y, wcag | component or system | + +5. **Complexity Scoring**: + + | Factor | Points | + |--------|--------| + | Single component | +1 | + | Component system | +2 | + | Full design system | +3 | + | Accessibility required | +1 | + | Multiple industries/constraints | +1 | + + Results: 1-2 component, 3-4 system, 5+ full-system. + +6. **Industry Detection**: + + | Keywords | Industry | + |----------|----------| + | saas, dashboard, analytics | SaaS/Tech | + | shop, cart, checkout | E-commerce | + | medical, patient, healthcare | Healthcare | + | bank, finance, payment | Finance | + | edu, course, learning | Education/Content | + | Default | SaaS/Tech | + +7. Record: pipeline_mode, industry, complexity + +**Success Criteria**: +- Scope, industry, constraints determined +- Pipeline mode selected (component/system/full-system) + +--- + +### Phase 1: Requirement -> CSV + Classification + +**Objective**: Build task dependency graph, generate tasks.csv and per-role instruction templates. + +**Decomposition Rules**: + +1. **Pipeline Selection** based on scope: + + | Scope | Pipeline | Tasks | + |-------|----------|-------| + | component | RESEARCH-001 -> DESIGN-001 -> AUDIT-001 -> BUILD-001 | 4 | + | system | RESEARCH-001 -> DESIGN-001 -> AUDIT-001 -> [DESIGN-002 + BUILD-001] -> AUDIT-002 -> BUILD-002 | 7 | + | full-system | system chain + AUDIT-003 after BUILD-002 | 8 | + +2. **Task Description Template**: Every task description uses PURPOSE/TASK/CONTEXT/EXPECTED/CONSTRAINTS format + +3. **Role Instruction Generation**: Write per-role instruction templates to `role-instructions/{role}.md` using the base instruction template customized for each role (researcher, designer, reviewer, implementer) + +**Classification Rules**: + +| Task Property | exec_mode | +|---------------|-----------| +| Research analysis pass (researcher) | `csv-wave` | +| Token design pass (designer) | `csv-wave` | +| Component spec pass (designer) | `csv-wave` | +| Audit pass (reviewer) | `csv-wave` | +| Implementation pass (implementer) | `csv-wave` | +| GC fix revision (designer) | `csv-wave` | +| All standard pipeline tasks | `csv-wave` | + +**Wave Computation**: Kahn's BFS topological sort with depth tracking. + +```javascript +// Generate per-role instruction templates +for (const role of ['researcher', 'designer', 'reviewer', 'implementer']) { + const instruction = generateRoleInstruction(role, sessionFolder) + Write(`${sessionFolder}/role-instructions/${role}.md`, instruction) +} + +// Generate tasks.csv from pipeline definition +const tasks = buildTasksCsv(pipelineMode, requirement, sessionFolder, industry, constraints) +Write(`${sessionFolder}/tasks.csv`, toCsv(tasks)) +Write(`${sessionFolder}/task-analysis.json`, JSON.stringify(analysisResult, null, 2)) +``` + +**User Validation**: Display task breakdown with wave + exec_mode assignment (skip if AUTO_YES). + +**Success Criteria**: +- tasks.csv created with valid schema, wave, and exec_mode assignments +- Role instruction templates generated in role-instructions/ +- task-analysis.json written +- No circular dependencies +- User approved (or AUTO_YES) + +--- + +### Phase 2: Wave Execution Engine (Extended) + +**Objective**: Execute tasks wave-by-wave with hybrid mechanism support and cross-wave context propagation. + +```javascript +const masterCsv = Read(`${sessionFolder}/tasks.csv`) +let tasks = parseCsv(masterCsv) +const maxWave = Math.max(...tasks.map(t => t.wave)) +let gcRound = 0 +const MAX_GC_ROUNDS = 2 + +for (let wave = 1; wave <= maxWave; wave++) { + console.log(`\nWave ${wave}/${maxWave}`) + + // 1. Separate tasks by exec_mode + const waveTasks = tasks.filter(t => t.wave === wave && t.status === 'pending') + const csvTasks = waveTasks.filter(t => t.exec_mode === 'csv-wave') + const interactiveTasks = waveTasks.filter(t => t.exec_mode === 'interactive') + + // 2. Check dependencies -- skip tasks whose deps failed + for (const task of waveTasks) { + const depIds = (task.deps || '').split(';').filter(Boolean) + const depStatuses = depIds.map(id => tasks.find(t => t.id === id)?.status) + if (depStatuses.some(s => s === 'failed' || s === 'skipped')) { + task.status = 'skipped' + task.error = `Dependency failed: ${depIds.filter((id, i) => + ['failed','skipped'].includes(depStatuses[i])).join(', ')}` + } + } + + // 3. Execute pre-wave interactive tasks (if any) + const preWaveInteractive = interactiveTasks.filter(t => t.status === 'pending') + for (const task of preWaveInteractive) { + Read(`agents/gc-loop-handler.md`) + const agent = spawn_agent({ + message: `## TASK ASSIGNMENT\n\n### MANDATORY FIRST STEPS\n1. Read: agents/gc-loop-handler.md\n2. Read: ${sessionFolder}/discoveries.ndjson\n\nGoal: ${task.description}\nScope: ${task.title}\nSession: ${sessionFolder}\n\n### Previous Context\n${buildPrevContext(task, tasks)}` + }) + const result = wait({ ids: [agent], timeout_ms: 600000 }) + if (result.timed_out) { + send_input({ id: agent, message: "Please finalize and output current findings." }) + wait({ ids: [agent], timeout_ms: 120000 }) + } + Write(`${sessionFolder}/interactive/${task.id}-result.json`, JSON.stringify({ + task_id: task.id, status: "completed", findings: parseFindings(result), + timestamp: getUtc8ISOString() + })) + close_agent({ id: agent }) + task.status = 'completed' + task.findings = parseFindings(result) + } + + // 4. Build prev_context for csv-wave tasks + const pendingCsvTasks = csvTasks.filter(t => t.status === 'pending') + for (const task of pendingCsvTasks) { + task.prev_context = buildPrevContext(task, tasks) + } + + if (pendingCsvTasks.length > 0) { + // 5. Write wave CSV + Write(`${sessionFolder}/wave-${wave}.csv`, toCsv(pendingCsvTasks)) + + // 6. Build instruction per role group + const waveInstruction = buildWaveInstruction(pendingCsvTasks, sessionFolder, wave) + + // 7. Execute wave via spawn_agents_on_csv + spawn_agents_on_csv({ + csv_path: `${sessionFolder}/wave-${wave}.csv`, + id_column: "id", + instruction: waveInstruction, + max_concurrency: maxConcurrency, + max_runtime_seconds: 900, + output_csv_path: `${sessionFolder}/wave-${wave}-results.csv`, + output_schema: { + type: "object", + properties: { + id: { type: "string" }, + status: { type: "string", enum: ["completed", "failed"] }, + findings: { type: "string" }, + artifacts_produced: { type: "string" }, + audit_score: { type: "string" }, + audit_signal: { type: "string" }, + error: { type: "string" } + } + } + }) + + // 8. Merge results into master CSV + const results = parseCsv(Read(`${sessionFolder}/wave-${wave}-results.csv`)) + for (const r of results) { + const t = tasks.find(t => t.id === r.id) + if (t) Object.assign(t, r) + } + + // 9. GC Loop Check: if a reviewer task returned fix_required + const auditResults = results.filter(r => r.id.startsWith('AUDIT') && r.audit_signal === 'fix_required') + for (const ar of auditResults) { + if (gcRound < MAX_GC_ROUNDS) { + gcRound++ + const fixId = `DESIGN-fix-${gcRound}` + const recheckId = `AUDIT-recheck-${gcRound}` + tasks.push({ + id: fixId, title: `Fix audit issues (round ${gcRound})`, + description: `PURPOSE: Address audit feedback from ${ar.id} | Success: All critical/high issues resolved\nTASK:\n- Parse audit feedback for specific issues\n- Apply targeted fixes to design tokens/specs\n- Re-validate affected artifacts\nCONTEXT:\n- Session: ${sessionFolder}\n- Upstream: artifacts/audit/\nEXPECTED: Fixed design artifacts\nCONSTRAINTS: Targeted fixes only`, + role: 'designer', pipeline_mode: tasks[0].pipeline_mode, scope: 'full', + audit_type: '', deps: ar.id, context_from: ar.id, + exec_mode: 'csv-wave', wave: wave + 1, status: 'pending', + findings: '', artifacts_produced: '', audit_score: '', audit_signal: '', error: '' + }) + tasks.push({ + id: recheckId, title: `Audit recheck (round ${gcRound})`, + description: `PURPOSE: Re-audit after fixes | Success: Score >= 8, critical == 0\nTASK:\n- Execute 5-dimension audit on fixed artifacts\n- Focus on previously flagged issues\nCONTEXT:\n- Session: ${sessionFolder}\n- Audit type: token-audit\nEXPECTED: artifacts/audit/audit-recheck-${gcRound}.md`, + role: 'reviewer', pipeline_mode: tasks[0].pipeline_mode, scope: 'full', + audit_type: 'token-audit', deps: fixId, context_from: fixId, + exec_mode: 'csv-wave', wave: wave + 2, status: 'pending', + findings: '', artifacts_produced: '', audit_score: '', audit_signal: '', error: '' + }) + } + } + } + + // 10. Update master CSV + Write(`${sessionFolder}/tasks.csv`, toCsv(tasks)) + + // 11. Cleanup temp files + Bash(`rm -f ${sessionFolder}/wave-${wave}.csv ${sessionFolder}/wave-${wave}-results.csv`) + + // 12. Display wave summary + const completed = waveTasks.filter(t => t.status === 'completed').length + const failed = waveTasks.filter(t => t.status === 'failed').length + const skipped = waveTasks.filter(t => t.status === 'skipped').length + console.log(`Wave ${wave} Complete: ${completed} completed, ${failed} failed, ${skipped} skipped`) +} +``` + +**Success Criteria**: +- All waves executed in order +- Both csv-wave and interactive tasks handled per wave +- Each wave's results merged into master CSV before next wave starts +- Dependent tasks skipped when predecessor failed +- GC loop (designer <-> reviewer) handled with max 2 rounds +- discoveries.ndjson accumulated across all waves and mechanisms + +--- + +### Phase 3: Post-Wave Interactive (Completion Action) + +**Objective**: Pipeline completion report with deliverables listing and interactive completion choice. + +```javascript +const tasks = parseCsv(Read(`${sessionFolder}/tasks.csv`)) +const completed = tasks.filter(t => t.status === 'completed') +const failed = tasks.filter(t => t.status === 'failed') + +const deliverables = [ + { name: "Design System Analysis", path: `${sessionFolder}/artifacts/research/design-system-analysis.json` }, + { name: "Component Inventory", path: `${sessionFolder}/artifacts/research/component-inventory.json` }, + { name: "Accessibility Audit", path: `${sessionFolder}/artifacts/research/accessibility-audit.json` }, + { name: "Design Intelligence", path: `${sessionFolder}/artifacts/research/design-intelligence.json` }, + { name: "Design Tokens", path: `${sessionFolder}/artifacts/design/design-tokens.json` }, + { name: "Component Specs", path: `${sessionFolder}/artifacts/design/component-specs/` }, + { name: "Audit Reports", path: `${sessionFolder}/artifacts/audit/` }, + { name: "Token Files", path: `${sessionFolder}/artifacts/build/token-files/` }, + { name: "Component Files", path: `${sessionFolder}/artifacts/build/component-files/` } +] + +console.log(` +============================================ +UI DESIGN TEAM COMPLETE + +Pipeline: ${completed.length}/${tasks.length} tasks (${tasks[0]?.pipeline_mode} mode) +GC Rounds: ${gcRound}/${MAX_GC_ROUNDS} +Session: ${sessionFolder} + +Deliverables: +${deliverables.map(d => ` - ${d.name}: ${d.path}`).join('\n')} +============================================ +`) + +if (!AUTO_YES) { + // Spawn completion handler interactive agent + Read(`agents/completion-handler.md`) + const agent = spawn_agent({ + message: `## TASK ASSIGNMENT\n\n### MANDATORY FIRST STEPS\n1. Read: agents/completion-handler.md\n2. Read: ${sessionFolder}/tasks.csv\n\nGoal: Handle pipeline completion action\nSession: ${sessionFolder}\nDeliverables: ${JSON.stringify(deliverables)}` + }) + const result = wait({ ids: [agent], timeout_ms: 300000 }) + close_agent({ id: agent }) +} +``` + +**Success Criteria**: +- Post-wave interactive processing complete +- User informed of deliverables and pipeline status + +--- + +### Phase 4: Results Aggregation + +**Objective**: Generate final results and human-readable report. + +```javascript +Bash(`cp ${sessionFolder}/tasks.csv ${sessionFolder}/results.csv`) + +const tasks = parseCsv(Read(`${sessionFolder}/tasks.csv`)) +let contextMd = `# UI Design Report\n\n` +contextMd += `**Session**: ${sessionId}\n` +contextMd += `**Pipeline**: ${tasks[0]?.pipeline_mode} mode\n` +contextMd += `**Date**: ${getUtc8ISOString().substring(0, 10)}\n\n` + +contextMd += `## Summary\n` +contextMd += `| Status | Count |\n|--------|-------|\n` +contextMd += `| Completed | ${tasks.filter(t => t.status === 'completed').length} |\n` +contextMd += `| Failed | ${tasks.filter(t => t.status === 'failed').length} |\n` +contextMd += `| Skipped | ${tasks.filter(t => t.status === 'skipped').length} |\n\n` + +const maxWave = Math.max(...tasks.map(t => t.wave)) +contextMd += `## Wave Execution\n\n` +for (let w = 1; w <= maxWave; w++) { + const waveTasks = tasks.filter(t => t.wave === w) + contextMd += `### Wave ${w}\n\n` + for (const t of waveTasks) { + const icon = t.status === 'completed' ? '[DONE]' : t.status === 'failed' ? '[FAIL]' : '[SKIP]' + contextMd += `${icon} **${t.title}** [${t.role}]` + if (t.audit_score) contextMd += ` Score: ${t.audit_score}/10 (${t.audit_signal})` + contextMd += ` ${t.findings || ''}\n\n` + } +} + +contextMd += `## Audit Summary\n\n` +const auditResults = tasks.filter(t => t.role === 'reviewer' && t.audit_score) +for (const a of auditResults) { + contextMd += `- **${a.id}**: Score ${a.audit_score}/10 - ${a.audit_signal}\n` +} + +contextMd += `\n## GC Loop Summary\n` +contextMd += `- Rounds used: ${gcRound}/${MAX_GC_ROUNDS}\n` + +Write(`${sessionFolder}/context.md`, contextMd) +console.log(`Results exported to: ${sessionFolder}/results.csv`) +console.log(`Report generated at: ${sessionFolder}/context.md`) +``` + +**Success Criteria**: +- results.csv exported (all tasks, both modes) +- context.md generated with audit summary +- Summary displayed to user + +--- + +## Shared Discovery Board Protocol + +All agents (csv-wave and interactive) share a single `discoveries.ndjson` file for cross-task knowledge exchange. + +**Format**: One JSON object per line (NDJSON): + +```jsonl +{"ts":"2026-03-08T10:00:00Z","worker":"RESEARCH-001","type":"tech_stack_detected","data":{"stack":"react","framework":"nextjs","ui_lib":"shadcn"}} +{"ts":"2026-03-08T10:05:00Z","worker":"DESIGN-001","type":"token_generated","data":{"category":"color","count":24,"supports_dark_mode":true}} +{"ts":"2026-03-08T10:10:00Z","worker":"BUILD-001","type":"file_modified","data":{"file":"tokens.css","change":"Generated CSS custom properties","lines_added":85}} +{"ts":"2026-03-08T10:15:00Z","worker":"AUDIT-001","type":"issue_found","data":{"file":"design-tokens.json","line":0,"severity":"high","description":"Missing dark mode variant for semantic color tokens"}} +``` + +**Discovery Types**: + +| Type | Data Schema | Description | +|------|-------------|-------------| +| `tech_stack_detected` | `{stack, framework, ui_lib}` | Tech stack identified by researcher | +| `design_pattern_found` | `{pattern_name, location, description}` | Existing design pattern in codebase | +| `token_generated` | `{category, count, supports_dark_mode}` | Design token category created | +| `file_modified` | `{file, change, lines_added}` | File change recorded | +| `issue_found` | `{file, line, severity, description}` | Audit issue discovered | +| `anti_pattern_violation` | `{pattern, file, line, description}` | Design anti-pattern detected | +| `artifact_produced` | `{name, path, producer, type}` | Deliverable created | + +**Protocol**: +1. Agents MUST read discoveries.ndjson at start of execution +2. Agents MUST append relevant discoveries during execution +3. Agents MUST NOT modify or delete existing entries +4. Deduplication by `{type, data.file}` key + +--- + +## Pipeline Definitions + +### Component Mode (4 tasks, linear) + +``` +RESEARCH-001 --> DESIGN-001 --> AUDIT-001 --> BUILD-001 +[researcher] [designer] [reviewer] [implementer] + wave 1 wave 2 wave 3 wave 4 +``` + +### System Mode (7 tasks, dual-track parallel) + +``` +RESEARCH-001 --> DESIGN-001 --> AUDIT-001 --> DESIGN-002 --+ +[researcher] [designer] [reviewer] [designer] | + BUILD-001 --+--> AUDIT-002 --> BUILD-002 + [implementer] [reviewer] [implementer] + wave 1 wave 2 wave 3 wave 4 wave 5 wave 6 +``` + +### Full-System Mode (8 tasks, dual-track + final audit) + +``` +Same as System + AUDIT-003 after BUILD-002 + +BUILD-002 --> AUDIT-003 + [reviewer: final-audit] + wave 6 wave 7 +``` + +### Generator-Critic Loop (designer <-> reviewer) + +``` +designer (Generator) -> design artifacts -> reviewer (Critic) + <- audit feedback <- + (max 2 rounds) + +Convergence: audit.score >= 8 AND audit.critical_count === 0 +``` + +--- + +## Error Handling + +| Error | Resolution | +|-------|------------| +| Circular dependency | Detect in wave computation, abort with error message | +| CSV agent timeout | Mark as failed in results, continue with wave | +| CSV agent failed | Mark as failed, skip dependent tasks in later waves | +| Interactive agent timeout | Urge convergence via send_input, then close if still timed out | +| Interactive agent failed | Mark as failed, skip dependents | +| All agents in wave failed | Log error, offer retry or abort | +| CSV parse error | Validate CSV format before execution, show line number | +| discoveries.ndjson corrupt | Ignore malformed lines, continue with valid entries | +| Audit score < 6 over 2 GC rounds | Escalate to user for manual intervention | +| ui-ux-pro-max unavailable | Degrade to LLM general design knowledge | +| Task description too vague | AskUserQuestion for clarification in Phase 0 | +| Continue mode: no session found | List available sessions, prompt user to select | + +--- + +## Core Rules + +1. **Start Immediately**: First action is session initialization, then Phase 0/1 +2. **Wave Order is Sacred**: Never execute wave N before wave N-1 completes and results are merged +3. **CSV is Source of Truth**: Master tasks.csv holds all state (both csv-wave and interactive) +4. **CSV First**: Default to csv-wave for tasks; only use interactive when interaction pattern requires it +5. **Context Propagation**: prev_context built from master CSV, not from memory +6. **Discovery Board is Append-Only**: Never clear, modify, or recreate discoveries.ndjson +7. **Skip on Failure**: If a dependency failed, skip the dependent task +8. **GC Loop Cap**: Max 2 generator-critic rounds between designer and reviewer +9. **Cleanup Temp Files**: Remove wave-{N}.csv after results are merged +10. **DO NOT STOP**: Continuous execution until all waves complete or all remaining tasks are skipped diff --git a/.codex/skills/team-uidesign/instructions/agent-instruction.md b/.codex/skills/team-uidesign/instructions/agent-instruction.md new file mode 100644 index 00000000..4fe038cb --- /dev/null +++ b/.codex/skills/team-uidesign/instructions/agent-instruction.md @@ -0,0 +1,509 @@ +# Team UI Design — Agent Instruction + +This instruction is loaded by team-worker agents when spawned with roles: `researcher`, `designer`, `reviewer`, `implementer`. + +--- + +## Role-Based Execution + +### Researcher Role + +**Responsibility**: Analyze existing design system, build component inventory, assess accessibility baseline, retrieve design intelligence. + +**Input**: +- `id`: Task ID (e.g., `RESEARCH-001`) +- `title`: Task title +- `description`: Detailed task description with PURPOSE/TASK/CONTEXT/EXPECTED/CONSTRAINTS +- `role`: `researcher` +- `pipeline_mode`: `component`, `system`, or `full-system` +- `scope`: `full` +- `prev_context`: Previous tasks' findings (empty for wave 1) + +**Execution Protocol**: + +1. **Read shared discoveries**: + ```javascript + const discoveries = Read(`{session}/discoveries.ndjson`) + ``` + +2. **Analyze existing design system**: + - Scan codebase for design tokens (CSS variables, theme files, config) + - Identify styling patterns (CSS-in-JS, Tailwind, styled-components) + - Extract color palette, typography scale, spacing system + +3. **Build component inventory**: + - List all UI components with props and states + - Document component hierarchy and composition patterns + - Note accessibility features (ARIA, keyboard nav) + +4. **Assess accessibility baseline**: + - Check contrast ratios (WCAG AA/AAA) + - Verify semantic HTML usage + - Document keyboard navigation support + - Note screen reader compatibility + +5. **Retrieve design intelligence** (if ui-ux-pro-max available): + - Query for industry best practices + - Get component design patterns + - Retrieve accessibility guidelines + +6. **Write research artifacts**: + ```javascript + Write(`{session}/artifacts/research/design-system-analysis.json`, JSON.stringify({ + tech_stack: { framework: "React", ui_lib: "shadcn", styling: "Tailwind" }, + existing_tokens: { colors: 24, typography: 7, spacing: 6 }, + patterns: ["Compound components", "Render props"], + gaps: ["Missing dark mode", "Inconsistent spacing"] + }, null, 2)) + + Write(`{session}/artifacts/research/component-inventory.json`, JSON.stringify({ + components: [ + { name: "Button", props: ["variant", "size"], states: ["default", "hover", "active", "disabled"] } + ] + }, null, 2)) + + Write(`{session}/artifacts/research/accessibility-audit.json`, JSON.stringify({ + wcag_level: "AA", + contrast_issues: 3, + keyboard_nav: "partial", + screen_reader: "good" + }, null, 2)) + + Write(`{session}/artifacts/research/design-intelligence.json`, JSON.stringify({ + industry: "SaaS/Tech", + best_practices: ["8px grid", "4-5 color shades", "Semantic naming"], + patterns: ["Button variants", "Form validation states"] + }, null, 2)) + ``` + +7. **Share discoveries**: + ```bash + echo '{"ts":"2026-03-08T10:00:00Z","worker":"{id}","type":"tech_stack_detected","data":{"stack":"react","framework":"nextjs","ui_lib":"shadcn"}}' >> {session}/discoveries.ndjson + ``` + +8. **Report result**: + ```javascript + report_agent_job_result({ + id: "{id}", + status: "completed", + findings: "Detected React + shadcn stack. 24 color tokens, 7 typography scales. Missing dark mode variants. WCAG AA baseline with 3 contrast issues.", + artifacts_produced: "artifacts/research/design-system-analysis.json;artifacts/research/component-inventory.json;artifacts/research/accessibility-audit.json;artifacts/research/design-intelligence.json", + audit_score: "", + audit_signal: "", + error: "" + }) + ``` + +**Success Criteria**: +- All 4 research artifacts produced with valid JSON +- Tech stack identified +- Component inventory complete +- Accessibility baseline documented + +--- + +### Designer Role + +**Responsibility**: Generate design tokens (W3C Design Tokens Format) and component specifications. + +**Input**: +- `id`: Task ID (e.g., `DESIGN-001`) +- `title`: Task title +- `description`: Detailed task description +- `role`: `designer` +- `pipeline_mode`: `component`, `system`, or `full-system` +- `scope`: `tokens`, `components`, or `full` +- `context_from`: Upstream task IDs (e.g., `RESEARCH-001`) +- `prev_context`: Previous tasks' findings + +**Execution Protocol**: + +1. **Read shared discoveries and upstream artifacts**: + ```javascript + const discoveries = Read(`{session}/discoveries.ndjson`) + const research = JSON.parse(Read(`{session}/artifacts/research/design-system-analysis.json`)) + ``` + +2. **Generate design tokens** (W3C Design Tokens Format): + ```javascript + const tokens = { + "color": { + "primary": { + "$type": "color", + "$value": "#3B82F6", + "$description": "Primary brand color" + }, + "primary-dark": { + "$type": "color", + "$value": "#1E40AF", + "$description": "Primary color for dark mode" + } + }, + "typography": { + "font-size-base": { + "$type": "dimension", + "$value": "16px" + } + }, + "spacing": { + "space-1": { + "$type": "dimension", + "$value": "4px" + } + } + } + Write(`{session}/artifacts/design/design-tokens.json`, JSON.stringify(tokens, null, 2)) + ``` + +3. **Create component specifications**: + ```markdown + # Button Component Specification + + ## Overview + Primary interactive element for user actions. + + ## States + 1. Default: Base appearance + 2. Hover: Elevated, color shift + 3. Active: Pressed state + 4. Disabled: Reduced opacity, no interaction + 5. Focus: Keyboard focus ring + + ## Variants + - Primary: Filled background + - Secondary: Outlined + - Ghost: Text only + + ## Accessibility + - ARIA role: button + - Keyboard: Enter/Space to activate + - Focus visible: 2px outline + - Contrast: WCAG AA minimum + + ## Token Usage + - Background: color.primary + - Text: color.on-primary + - Padding: spacing.space-3 spacing.space-4 + - Border radius: border.radius-md + ``` + +4. **Ensure light/dark mode support**: + - All color tokens have light and dark variants + - Semantic tokens reference base tokens + - Theme switching mechanism defined + +5. **Share discoveries**: + ```bash + echo '{"ts":"2026-03-08T10:05:00Z","worker":"{id}","type":"token_generated","data":{"category":"color","count":24,"supports_dark_mode":true}}' >> {session}/discoveries.ndjson + ``` + +6. **Report result**: + ```javascript + report_agent_job_result({ + id: "{id}", + status: "completed", + findings: "Generated design token system with 24 color tokens (light+dark), 7 typography scales, 6 spacing values. Created component spec for Button with all 5 states, ARIA roles, and responsive breakpoints.", + artifacts_produced: "artifacts/design/design-tokens.json;artifacts/design/component-specs/button.md", + audit_score: "", + audit_signal: "", + error: "" + }) + ``` + +**Success Criteria**: +- Design tokens in W3C format +- All color tokens have light/dark variants +- Component specs include all 5 states +- Accessibility requirements documented + +--- + +### Reviewer Role + +**Responsibility**: 5-dimension quality audit for design artifacts. + +**Input**: +- `id`: Task ID (e.g., `AUDIT-001`) +- `title`: Task title +- `description`: Detailed task description +- `role`: `reviewer` +- `pipeline_mode`: `component`, `system`, or `full-system` +- `scope`: `full` +- `audit_type`: `token-audit`, `component-audit`, or `final-audit` +- `context_from`: Upstream task IDs (e.g., `DESIGN-001`) +- `prev_context`: Previous tasks' findings + +**Execution Protocol**: + +1. **Read design artifacts**: + ```javascript + const tokens = JSON.parse(Read(`{session}/artifacts/design/design-tokens.json`)) + const componentSpecs = Glob(`{session}/artifacts/design/component-specs/*.md`) + ``` + +2. **5-Dimension Audit**: + + **Consistency (20%)**: + - Token naming follows convention + - Semantic tokens reference base tokens correctly + - Component specs use consistent terminology + + **Accessibility (25%)**: + - Contrast ratios meet WCAG AA (4.5:1 text, 3:1 UI) + - All interactive states have focus indicators + - ARIA roles and labels defined + - Keyboard navigation specified + + **Completeness (20%)**: + - All 5 interactive states defined + - Light and dark mode for all color tokens + - Responsive breakpoints specified + - Edge cases documented + + **Quality (15%)**: + - Token values follow design principles (8px grid, etc.) + - Component specs are clear and actionable + - No hardcoded values in specs + + **Industry Compliance (20%)**: + - Follows industry best practices (from research) + - Meets domain-specific requirements (healthcare: stricter accessibility) + - Aligns with design system standards + +3. **Calculate weighted score**: + ```javascript + const score = (consistency * 0.20) + (accessibility * 0.25) + (completeness * 0.20) + (quality * 0.15) + (industry * 0.20) + ``` + +4. **Determine audit signal**: + - `audit_passed`: score >= 8.0 AND critical_count === 0 + - `audit_result`: score >= 6.0 AND critical_count === 0 + - `fix_required`: score < 6.0 OR critical_count > 0 + +5. **Write audit report**: + ```markdown + # Design Audit Report: {id} + + ## Overall Score: {score}/10 + + ## Dimension Scores + - Consistency: {consistency}/10 (20%) + - Accessibility: {accessibility}/10 (25%) + - Completeness: {completeness}/10 (20%) + - Quality: {quality}/10 (15%) + - Industry: {industry}/10 (20%) + + ## Issues Found + + ### Critical (0) + (none) + + ### High (1) + - Missing dark mode variant for semantic color tokens + + ### Medium (2) + - Border radius not defined for pill variant + - Focus ring color not specified + + ### Low (3) + - Token naming could be more semantic + - Component spec missing edge case documentation + - Responsive breakpoint values not aligned with 8px grid + + ## Recommendations + 1. Add dark mode variants for all semantic tokens + 2. Define border-radius-pill token + 3. Specify focus ring color (accessibility.focus-ring) + + ## Verdict: {audit_signal} + ``` + +6. **Share discoveries**: + ```bash + echo '{"ts":"2026-03-08T10:15:00Z","worker":"{id}","type":"issue_found","data":{"file":"design-tokens.json","line":0,"severity":"high","description":"Missing dark mode variant for semantic color tokens"}}' >> {session}/discoveries.ndjson + ``` + +7. **Report result**: + ```javascript + report_agent_job_result({ + id: "{id}", + status: "completed", + findings: "Design audit: 8.4/10. Token naming consistent, all color tokens have light/dark variants, contrast ratios meet WCAG AA. Minor: missing border-radius for pill variant.", + artifacts_produced: "artifacts/audit/audit-001.md", + audit_score: "8.4", + audit_signal: "audit_passed", + error: "" + }) + ``` + +**Success Criteria**: +- All 5 dimensions scored +- Audit report written with issue breakdown +- Audit signal determined (pass/result/fix_required) +- Score >= 8.0 with 0 critical issues for GC convergence + +--- + +### Implementer Role + +**Responsibility**: Implement component code from design specs with token consumption and accessibility. + +**Input**: +- `id`: Task ID (e.g., `BUILD-001`) +- `title`: Task title +- `description`: Detailed task description +- `role`: `implementer` +- `pipeline_mode`: `component`, `system`, or `full-system` +- `scope`: `full` +- `context_from`: Upstream task IDs (e.g., `AUDIT-001`) +- `prev_context`: Previous tasks' findings + +**Execution Protocol**: + +1. **Read design artifacts and audit feedback**: + ```javascript + const tokens = JSON.parse(Read(`{session}/artifacts/design/design-tokens.json`)) + const componentSpec = Read(`{session}/artifacts/design/component-specs/button.md`) + const auditReport = Read(`{session}/artifacts/audit/audit-001.md`) + ``` + +2. **Generate CSS custom properties from tokens**: + ```css + :root { + --color-primary: #3B82F6; + --color-primary-dark: #1E40AF; + --font-size-base: 16px; + --space-1: 4px; + } + + [data-theme="dark"] { + --color-primary: var(--color-primary-dark); + } + ``` + +3. **Implement component with all 5 states**: + ```tsx + import React from 'react' + + interface ButtonProps { + variant?: 'primary' | 'secondary' | 'ghost' + disabled?: boolean + children: React.ReactNode + } + + export const Button: React.FC = ({ variant = 'primary', disabled, children }) => { + return ( + + ) + } + ``` + +4. **Add ARIA attributes and keyboard navigation**: + - `role="button"` (if not native button) + - `aria-disabled` for disabled state + - `aria-pressed` for toggle buttons + - Focus management with `:focus-visible` + +5. **Validate no hardcoded values**: + - All colors use `var(--token-name)` + - All spacing uses token variables + - All typography uses token variables + +6. **Follow project patterns**: + - Match existing component structure + - Use same import patterns + - Follow naming conventions from research + +7. **Share discoveries**: + ```bash + echo '{"ts":"2026-03-08T10:10:00Z","worker":"{id}","type":"file_modified","data":{"file":"tokens.css","change":"Generated CSS custom properties from design tokens","lines_added":85}}' >> {session}/discoveries.ndjson + ``` + +8. **Report result**: + ```javascript + report_agent_job_result({ + id: "{id}", + status: "completed", + findings: "Implemented Button component with all 5 states, ARIA attributes, keyboard navigation. Generated CSS custom properties from design tokens. No hardcoded values.", + artifacts_produced: "artifacts/build/token-files/tokens.css;artifacts/build/component-files/Button.tsx", + audit_score: "", + audit_signal: "", + error: "" + }) + ``` + +**Success Criteria**: +- Component code implements all 5 states +- All values use token variables (no hardcoded) +- ARIA attributes present +- Keyboard navigation functional +- Follows project patterns + +--- + +## Generator-Critic Loop (Designer <-> Reviewer) + +When reviewer returns `audit_signal: "fix_required"`: + +1. Coordinator creates `DESIGN-fix-{round}` task (max 2 rounds) +2. Designer reads audit feedback, applies targeted fixes +3. Coordinator creates `AUDIT-recheck-{round}` task +4. Reviewer re-audits fixed artifacts +5. Convergence: score >= 8.0 AND critical_count === 0 + +--- + +## Shared Discovery Board + +All roles read/write `{session}/discoveries.ndjson`: + +**Discovery Types**: +- `tech_stack_detected`: Tech stack identified +- `design_pattern_found`: Existing design pattern +- `token_generated`: Design token category created +- `file_modified`: File change recorded +- `issue_found`: Audit issue discovered +- `anti_pattern_violation`: Design anti-pattern detected +- `artifact_produced`: Deliverable created + +**Protocol**: +1. Read discoveries at start +2. Append discoveries during execution (never modify existing) +3. Deduplicate by type + data key + +--- + +## Error Handling + +| Error | Resolution | +|-------|------------| +| Upstream artifact not found | Report error, mark failed | +| Design tokens invalid JSON | Report error, mark failed | +| Component spec missing required sections | Report error, mark failed | +| Audit score calculation error | Default to 0, report error | +| Implementation build fails | Report error, mark failed | +| CLI tool timeout | Fallback to direct implementation | + +--- + +## Output Format + +All roles use `report_agent_job_result` with this schema: + +```json +{ + "id": "{id}", + "status": "completed" | "failed", + "findings": "Key discoveries (max 500 chars)", + "artifacts_produced": "semicolon-separated paths", + "audit_score": "0-10 (reviewer only)", + "audit_signal": "audit_passed|audit_result|fix_required (reviewer only)", + "error": "" +} +``` diff --git a/.codex/skills/team-uidesign/schemas/tasks-schema.md b/.codex/skills/team-uidesign/schemas/tasks-schema.md new file mode 100644 index 00000000..1e06bf63 --- /dev/null +++ b/.codex/skills/team-uidesign/schemas/tasks-schema.md @@ -0,0 +1,187 @@ +# Team UI Design -- CSV Schema + +## Master CSV: tasks.csv + +### Column Definitions + +#### Input Columns (Set by Decomposer) + +| Column | Type | Required | Description | Example | +|--------|------|----------|-------------|---------| +| `id` | string | Yes | Unique task identifier (PREFIX-NNN) | `"RESEARCH-001"` | +| `title` | string | Yes | Short task title | `"Design system analysis"` | +| `description` | string | Yes | Detailed task description (self-contained) with PURPOSE/TASK/CONTEXT/EXPECTED/CONSTRAINTS | `"PURPOSE: Analyze existing design system..."` | +| `role` | enum | Yes | Worker role: `researcher`, `designer`, `reviewer`, `implementer` | `"researcher"` | +| `pipeline_mode` | enum | Yes | Pipeline mode: `component`, `system`, `full-system` | `"component"` | +| `scope` | enum | Yes | Task scope: `full`, `tokens`, `components` | `"full"` | +| `audit_type` | string | No | Audit type: `token-audit`, `component-audit`, `final-audit` (empty for non-reviewer) | `"token-audit"` | +| `deps` | string | No | Semicolon-separated dependency task IDs | `"RESEARCH-001"` | +| `context_from` | string | No | Semicolon-separated task IDs for context | `"RESEARCH-001"` | +| `exec_mode` | enum | Yes | Execution mechanism: `csv-wave` or `interactive` | `"csv-wave"` | + +#### Computed Columns (Set by Wave Engine) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `wave` | integer | Wave number (1-based, from topological sort) | `2` | +| `prev_context` | string | Aggregated findings from context_from tasks (per-wave CSV only) | `"[RESEARCH-001] Detected React + shadcn stack..."` | + +#### Output Columns (Set by Agent) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `status` | enum | `pending` -> `completed` / `failed` / `skipped` | `"completed"` | +| `findings` | string | Key discoveries (max 500 chars) | `"Generated 24 color tokens with dark mode..."` | +| `artifacts_produced` | string | Semicolon-separated paths of produced artifacts | `"artifacts/research/design-system-analysis.json;artifacts/research/component-inventory.json"` | +| `audit_score` | string | Audit weighted score 0-10 (empty for non-reviewer tasks) | `"8.5"` | +| `audit_signal` | enum | `audit_passed`, `audit_result`, `fix_required` (empty for non-reviewer) | `"audit_passed"` | +| `error` | string | Error message if failed | `""` | + +--- + +### exec_mode Values + +| Value | Mechanism | Description | +|-------|-----------|-------------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot batch execution within wave | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round individual execution | + +Interactive tasks appear in master CSV for dependency tracking but are NOT included in wave-{N}.csv files. + +--- + +### Role Prefixes + +| Role | Prefix | Responsibility Type | +|------|--------|---------------------| +| researcher | RESEARCH | read-only (design system analysis + intelligence retrieval) | +| designer | DESIGN | generation (design tokens + component specs, W3C format) | +| reviewer | AUDIT | validation (5-dimension quality audit, GC critic) | +| implementer | BUILD | code-gen (CSS custom properties + components + accessibility) | + +--- + +### Example Data + +```csv +id,title,description,role,pipeline_mode,scope,audit_type,deps,context_from,exec_mode,wave,status,findings,artifacts_produced,audit_score,audit_signal,error +"RESEARCH-001","Design system analysis","PURPOSE: Analyze existing design system, build component inventory, assess accessibility baseline | Success: 4 research artifacts produced with valid data\nTASK:\n- Analyze existing design tokens and styling patterns\n- Build component inventory with props and states\n- Assess accessibility baseline\n- Retrieve design intelligence via ui-ux-pro-max\nCONTEXT:\n- Session: .workflow/.csv-wave/uds-saas-dashboard-20260308\n- Industry: SaaS/Tech\nEXPECTED: artifacts/research/*.json | All 4 research files with valid JSON\nCONSTRAINTS: Read-only analysis","researcher","component","full","","","","csv-wave","1","pending","","","","","" +"DESIGN-001","Design tokens + component spec","PURPOSE: Define design tokens (W3C format) and component specification | Success: Design tokens + component spec with all states defined\nTASK:\n- Define complete token system (color, typography, spacing, shadow, border, breakpoint)\n- Create component specification with all 5 interactive states\n- Ensure accessibility spec\nCONTEXT:\n- Session: .workflow/.csv-wave/uds-saas-dashboard-20260308\n- Upstream: research/*.json\nEXPECTED: artifacts/design/design-tokens.json + component-specs/*.md\nCONSTRAINTS: Follow W3C Design Tokens Format | Light/dark for all color tokens","designer","component","tokens","","RESEARCH-001","RESEARCH-001","csv-wave","2","pending","","","","","" +"AUDIT-001","Design audit","PURPOSE: 5-dimension quality audit for consistency, accessibility, completeness, quality, industry compliance | Success: Audit score >= 8 with 0 critical issues\nTASK:\n- Score 5 dimensions (consistency 20%, accessibility 25%, completeness 20%, quality 15%, industry 20%)\n- Check token naming, theme completeness, contrast ratios\n- Verify component states and ARIA spec\nCONTEXT:\n- Session: .workflow/.csv-wave/uds-saas-dashboard-20260308\n- Upstream: design/design-tokens.json, design/component-specs/*.md\nEXPECTED: artifacts/audit/audit-001.md\nCONSTRAINTS: Read-only analysis | GC convergence: score >= 8 and 0 critical","reviewer","component","full","token-audit","DESIGN-001","DESIGN-001","csv-wave","3","pending","","","","","" +"BUILD-001","Component implementation","PURPOSE: Implement component code from design specs | Success: Production code with token consumption and accessibility\nTASK:\n- Generate CSS custom properties from design tokens\n- Implement component with all 5 states\n- Add ARIA attributes and keyboard navigation\n- Validate no hardcoded values\nCONTEXT:\n- Session: .workflow/.csv-wave/uds-saas-dashboard-20260308\n- Upstream: design/design-tokens.json, design/component-specs/*.md, audit/audit-001.md\nEXPECTED: artifacts/build/**/*\nCONSTRAINTS: Use var(--token-name) only | Follow project patterns","implementer","component","full","","AUDIT-001","AUDIT-001","csv-wave","4","pending","","","","","" +``` + +--- + +### Column Lifecycle + +``` +Decomposer (Phase 1) Wave Engine (Phase 2) Agent (Execution) +--------------------- -------------------- ----------------- +id ----------> id ----------> id +title ----------> title ----------> (reads) +description ----------> description ----------> (reads) +role ----------> role ----------> (reads) +pipeline_mode ---------> pipeline_mode ---------> (reads) +scope ----------> scope ----------> (reads) +audit_type ----------> audit_type ----------> (reads) +deps ----------> deps ----------> (reads) +context_from----------> context_from----------> (reads) +exec_mode ----------> exec_mode ----------> (reads) + wave ----------> (reads) + prev_context ----------> (reads) + status + findings + artifacts_produced + audit_score + audit_signal + error +``` + +--- + +## Output Schema (JSON) + +Agent output via `report_agent_job_result` (csv-wave tasks): + +```json +{ + "id": "DESIGN-001", + "status": "completed", + "findings": "Generated design token system with 24 color tokens (light+dark), 7 typography scales, 6 spacing values. Created component spec for Button with all 5 states, ARIA roles, and responsive breakpoints.", + "artifacts_produced": "artifacts/design/design-tokens.json;artifacts/design/component-specs/button.md", + "audit_score": "", + "audit_signal": "", + "error": "" +} +``` + +Reviewer agent output example: + +```json +{ + "id": "AUDIT-001", + "status": "completed", + "findings": "Design audit: 8.4/10. Token naming consistent, all color tokens have light/dark variants, contrast ratios meet WCAG AA. Minor: missing border-radius for pill variant.", + "artifacts_produced": "artifacts/audit/audit-001.md", + "audit_score": "8.4", + "audit_signal": "audit_passed", + "error": "" +} +``` + +Interactive tasks output via structured text or JSON written to `interactive/{id}-result.json`. + +--- + +## Discovery Types + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `tech_stack_detected` | `data.stack` | `{stack, framework, ui_lib}` | Tech stack identified | +| `design_pattern_found` | `data.pattern_name+data.location` | `{pattern_name, location, description}` | Existing design pattern | +| `token_generated` | `data.category` | `{category, count, supports_dark_mode}` | Design token category created | +| `file_modified` | `data.file` | `{file, change, lines_added}` | File change recorded | +| `issue_found` | `data.file+data.line` | `{file, line, severity, description}` | Audit issue discovered | +| `anti_pattern_violation` | `data.pattern+data.file` | `{pattern, file, line, description}` | Design anti-pattern detected | +| `artifact_produced` | `data.path` | `{name, path, producer, type}` | Deliverable created | + +### Discovery NDJSON Format + +```jsonl +{"ts":"2026-03-08T10:00:00Z","worker":"RESEARCH-001","type":"tech_stack_detected","data":{"stack":"react","framework":"nextjs","ui_lib":"shadcn"}} +{"ts":"2026-03-08T10:05:00Z","worker":"DESIGN-001","type":"token_generated","data":{"category":"color","count":24,"supports_dark_mode":true}} +{"ts":"2026-03-08T10:10:00Z","worker":"BUILD-001","type":"file_modified","data":{"file":"tokens.css","change":"Generated CSS custom properties from design tokens","lines_added":85}} +{"ts":"2026-03-08T10:15:00Z","worker":"AUDIT-001","type":"issue_found","data":{"file":"design-tokens.json","line":0,"severity":"high","description":"Missing dark mode variant for semantic color tokens"}} +``` + +> Both csv-wave and interactive agents read/write the same discoveries.ndjson file. + +--- + +## Cross-Mechanism Context Flow + +| Source | Target | Mechanism | +|--------|--------|-----------| +| CSV task findings | Interactive task | Injected via spawn message or send_input | +| Interactive task result | CSV task prev_context | Read from interactive/{id}-result.json | +| Any agent discovery | Any agent | Shared via discoveries.ndjson | + +--- + +## Validation Rules + +| Rule | Check | Error | +|------|-------|-------| +| Unique IDs | No duplicate `id` values | "Duplicate task ID: {id}" | +| Valid deps | All dep IDs exist in tasks | "Unknown dependency: {dep_id}" | +| No self-deps | Task cannot depend on itself | "Self-dependency: {id}" | +| No circular deps | Topological sort completes | "Circular dependency detected involving: {ids}" | +| context_from valid | All context IDs exist and in earlier waves | "Invalid context_from: {id}" | +| exec_mode valid | Value is `csv-wave` or `interactive` | "Invalid exec_mode: {value}" | +| Description non-empty | Every task has description | "Empty description for task: {id}" | +| Status enum | status in {pending, completed, failed, skipped} | "Invalid status: {status}" | +| Role valid | role in {researcher, designer, reviewer, implementer} | "Invalid role: {role}" | +| Pipeline mode valid | pipeline_mode in {component, system, full-system} | "Invalid pipeline_mode: {mode}" | +| Audit signal valid | audit_signal in {audit_passed, audit_result, fix_required, ""} | "Invalid audit_signal: {signal}" | +| Cross-mechanism deps | Interactive to CSV deps resolve correctly | "Cross-mechanism dependency unresolvable: {id}" | diff --git a/.codex/skills/team-ultra-analyze/SKILL.md b/.codex/skills/team-ultra-analyze/SKILL.md new file mode 100644 index 00000000..5f6ce301 --- /dev/null +++ b/.codex/skills/team-ultra-analyze/SKILL.md @@ -0,0 +1,749 @@ +--- +name: team-ultra-analyze +description: Deep collaborative analysis pipeline. Multi-perspective exploration, deep analysis, user-driven discussion loops, and cross-perspective synthesis. Supports Quick, Standard, and Deep pipeline modes. +argument-hint: "[-y|--yes] [-c|--concurrency N] [--continue] [--mode quick|standard|deep] \"analysis topic\"" +allowed-tools: spawn_agents_on_csv, spawn_agent, wait, send_input, close_agent, Read, Write, Edit, Bash, Glob, Grep, AskUserQuestion +--- + +## Auto Mode + +When `--yes` or `-y`: Auto-confirm task decomposition, skip interactive validation, use defaults. + +# Team Ultra Analyze + +## Usage + +```bash +$team-ultra-analyze "Analyze authentication module architecture and security" +$team-ultra-analyze -c 4 --mode deep "Deep analysis of payment processing pipeline" +$team-ultra-analyze -y --mode quick "Quick overview of API endpoint structure" +$team-ultra-analyze --continue "uan-auth-analysis-20260308" +``` + +**Flags**: +- `-y, --yes`: Skip all confirmations (auto mode) +- `-c, --concurrency N`: Max concurrent agents within each wave (default: 3) +- `--mode`: Pipeline mode override (quick|standard|deep) +- `--continue`: Resume existing session + +**Output Directory**: `.workflow/.csv-wave/{session-id}/` +**Core Output**: `tasks.csv` (master state) + `results.csv` (final) + `discoveries.ndjson` (shared exploration) + `context.md` (human-readable report) + +--- + +## Overview + +Deep collaborative analysis with multi-perspective exploration, deep analysis, user-driven discussion loops, and cross-perspective synthesis. Each perspective gets its own explorer and analyst, working in parallel. Discussion rounds allow the user to steer analysis depth and direction. + +**Execution Model**: Hybrid — CSV wave pipeline (primary) + individual agent spawn (secondary for discussion feedback loop) + +``` +┌─────────────────────────────────────────────────────────────────────────┐ +│ TEAM ULTRA ANALYZE WORKFLOW │ +├─────────────────────────────────────────────────────────────────────────┤ +│ │ +│ Phase 0: Pre-Wave Interactive │ +│ ├─ Topic parsing + dimension detection │ +│ ├─ Pipeline mode selection (quick/standard/deep) │ +│ ├─ Perspective assignment │ +│ └─ Output: refined requirements for decomposition │ +│ │ +│ Phase 1: Requirement → CSV + Classification │ +│ ├─ Parse topic into exploration + analysis + discussion + synthesis │ +│ ├─ Assign roles: explorer, analyst, discussant, synthesizer │ +│ ├─ Classify tasks: csv-wave | interactive (exec_mode) │ +│ ├─ Compute dependency waves (topological sort → depth grouping) │ +│ ├─ Generate tasks.csv with wave + exec_mode columns │ +│ └─ User validates task breakdown (skip if -y) │ +│ │ +│ Phase 2: Wave Execution Engine (Extended) │ +│ ├─ For each wave (1..N): │ +│ │ ├─ Build wave CSV (filter csv-wave tasks for this wave) │ +│ │ ├─ Inject previous findings into prev_context column │ +│ │ ├─ spawn_agents_on_csv(wave CSV) │ +│ │ ├─ Execute post-wave interactive tasks (if any) │ +│ │ ├─ Merge all results into master tasks.csv │ +│ │ └─ Check: any failed? → skip dependents │ +│ └─ discoveries.ndjson shared across all modes (append-only) │ +│ │ +│ Phase 3: Post-Wave Interactive (Discussion Loop) │ +│ ├─ After discussant completes: user feedback gate │ +│ ├─ User chooses: continue deeper | adjust direction | done │ +│ ├─ Creates dynamic tasks (DISCUSS-N, ANALYZE-fix-N) as needed │ +│ └─ Max discussion rounds: quick=0, standard=1, deep=5 │ +│ │ +│ Phase 4: Results Aggregation │ +│ ├─ Export final results.csv │ +│ ├─ Generate context.md with all findings │ +│ ├─ Display summary: completed/failed/skipped per wave │ +│ └─ Offer: view results | export | archive │ +│ │ +└─────────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## Task Classification Rules + +Each task is classified by `exec_mode`: + +| exec_mode | Mechanism | Criteria | +|-----------|-----------|----------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot, structured I/O, no multi-round interaction | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round, user feedback, direction control | + +**Classification Decision**: + +| Task Property | Classification | +|---------------|---------------| +| Codebase exploration (single perspective) | `csv-wave` | +| Parallel exploration (multiple perspectives) | `csv-wave` (parallel in same wave) | +| Deep analysis (single perspective) | `csv-wave` | +| Parallel analysis (multiple perspectives) | `csv-wave` (parallel in same wave) | +| Direction-fix analysis (adjusted focus) | `csv-wave` | +| Discussion processing (aggregate results) | `csv-wave` | +| Final synthesis (cross-perspective integration) | `csv-wave` | +| Discussion feedback gate (user interaction) | `interactive` | +| Topic clarification (Phase 0) | `interactive` | + +--- + +## CSV Schema + +### tasks.csv (Master State) + +```csv +id,title,description,role,perspective,dimensions,discussion_round,discussion_type,deps,context_from,exec_mode,wave,status,findings,error +"EXPLORE-001","Explore from technical perspective","Search codebase from technical perspective. Collect files, patterns, findings.","explorer","technical","architecture;implementation","0","","","","csv-wave","1","pending","","" +"ANALYZE-001","Deep analysis from technical perspective","Analyze exploration results from technical perspective. Generate insights with confidence levels.","analyst","technical","architecture;implementation","0","","EXPLORE-001","EXPLORE-001","csv-wave","2","pending","","" +"DISCUSS-001","Initial discussion round","Aggregate all analysis results. Identify convergent themes, conflicts, top discussion points.","discussant","","","1","initial","ANALYZE-001;ANALYZE-002","ANALYZE-001;ANALYZE-002","csv-wave","3","pending","","" +``` + +**Columns**: + +| Column | Phase | Description | +|--------|-------|-------------| +| `id` | Input | Unique task identifier (string) | +| `title` | Input | Short task title | +| `description` | Input | Detailed task description | +| `role` | Input | Worker role: explorer, analyst, discussant, synthesizer | +| `perspective` | Input | Analysis perspective: technical, architectural, business, domain_expert | +| `dimensions` | Input | Analysis dimensions (semicolon-separated): architecture, implementation, performance, security, concept, comparison, decision | +| `discussion_round` | Input | Discussion round number (0 = N/A, 1+ = round number) | +| `discussion_type` | Input | Discussion type: initial, deepen, direction-adjusted, specific-questions | +| `deps` | Input | Semicolon-separated dependency task IDs | +| `context_from` | Input | Semicolon-separated task IDs whose findings this task needs | +| `exec_mode` | Input | `csv-wave` or `interactive` | +| `wave` | Computed | Wave number (computed by topological sort, 1-based) | +| `status` | Output | `pending` → `completed` / `failed` / `skipped` | +| `findings` | Output | Key discoveries or implementation notes (max 500 chars) | +| `error` | Output | Error message if failed (empty if success) | + +### Per-Wave CSV (Temporary) + +Each wave generates a temporary `wave-{N}.csv` with extra `prev_context` column (csv-wave tasks only). + +--- + +## Agent Registry (Interactive Agents) + +| Agent | Role File | Pattern | Responsibility | Position | +|-------|-----------|---------|----------------|----------| +| discussion-feedback | agents/discussion-feedback.md | 2.3 (wait-respond) | Collect user feedback after discussion round, create dynamic tasks | post-wave (after discussant wave) | +| topic-analyzer | agents/topic-analyzer.md | 2.3 (wait-respond) | Parse topic, detect dimensions, select pipeline mode and perspectives | standalone (Phase 0) | + +> **COMPACT PROTECTION**: Agent files are execution documents. When context compression occurs, **you MUST immediately `Read` the corresponding agent.md** to reload. + +--- + +## Output Artifacts + +| File | Purpose | Lifecycle | +|------|---------|-----------| +| `tasks.csv` | Master state — all tasks with status/findings | Updated after each wave | +| `wave-{N}.csv` | Per-wave input (temporary, csv-wave tasks only) | Created before wave, deleted after | +| `results.csv` | Final export of all task results | Created in Phase 4 | +| `discoveries.ndjson` | Shared exploration board (all agents, both modes) | Append-only, carries across waves | +| `context.md` | Human-readable execution report | Created in Phase 4 | +| `interactive/{id}-result.json` | Results from interactive tasks | Created per interactive task | + +--- + +## Session Structure + +``` +.workflow/.csv-wave/{session-id}/ +├── tasks.csv # Master state (all tasks, both modes) +├── results.csv # Final results export +├── discoveries.ndjson # Shared discovery board (all agents) +├── context.md # Human-readable report +├── wave-{N}.csv # Temporary per-wave input (csv-wave only) +└── interactive/ # Interactive task artifacts + └── {id}-result.json # Per-task results +``` + +--- + +## Implementation + +### Session Initialization + +```javascript +const getUtc8ISOString = () => new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString() + +// Parse flags +const AUTO_YES = $ARGUMENTS.includes('--yes') || $ARGUMENTS.includes('-y') +const continueMode = $ARGUMENTS.includes('--continue') +const concurrencyMatch = $ARGUMENTS.match(/(?:--concurrency|-c)\s+(\d+)/) +const maxConcurrency = concurrencyMatch ? parseInt(concurrencyMatch[1]) : 3 +const modeMatch = $ARGUMENTS.match(/--mode\s+(quick|standard|deep)/) +const explicitMode = modeMatch ? modeMatch[1] : null + +// Clean requirement text (remove flags) +const topic = $ARGUMENTS + .replace(/--yes|-y|--continue|--concurrency\s+\d+|-c\s+\d+|--mode\s+\w+/g, '') + .trim() + +const slug = topic.toLowerCase() + .replace(/[^a-z0-9\u4e00-\u9fa5]+/g, '-') + .substring(0, 40) +const dateStr = getUtc8ISOString().substring(0, 10).replace(/-/g, '') +let sessionId = `uan-${slug}-${dateStr}` +let sessionFolder = `.workflow/.csv-wave/${sessionId}` + +// Continue mode: find existing session +if (continueMode) { + const existing = Bash(`ls -t .workflow/.csv-wave/uan-* 2>/dev/null | head -1`).trim() + if (existing) { + sessionId = existing.split('/').pop() + sessionFolder = existing + } +} + +Bash(`mkdir -p ${sessionFolder}/interactive`) +``` + +--- + +### Phase 0: Pre-Wave Interactive + +**Objective**: Parse topic, detect analysis dimensions, select pipeline mode, and assign perspectives. + +**Execution**: + +```javascript +const analyzer = spawn_agent({ + message: ` +## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS (Agent Execute) +1. **Read role definition**: .codex/skills/team-ultra-analyze/agents/topic-analyzer.md (MUST read first) +2. Read: .workflow/project-tech.json (if exists) + +--- + +Goal: Analyze topic and recommend pipeline configuration +Topic: ${topic} +Explicit Mode: ${explicitMode || 'auto-detect'} + +### Task +1. Detect analysis dimensions from topic keywords: + - architecture, implementation, performance, security, concept, comparison, decision +2. Select perspectives based on dimensions: + - technical, architectural, business, domain_expert +3. Determine pipeline mode (if not explicitly set): + - Complexity 1-3 → quick, 4-6 → standard, 7+ → deep +4. Return structured configuration +` +}) + +const analyzerResult = wait({ ids: [analyzer], timeout_ms: 120000 }) + +if (analyzerResult.timed_out) { + send_input({ id: analyzer, message: "Please finalize and output current findings." }) + wait({ ids: [analyzer], timeout_ms: 60000 }) +} + +close_agent({ id: analyzer }) + +// Parse result: pipeline_mode, perspectives[], dimensions[], depth +Write(`${sessionFolder}/interactive/topic-analyzer-result.json`, JSON.stringify({ + task_id: "topic-analysis", + status: "completed", + pipeline_mode: parsedMode, + perspectives: parsedPerspectives, + dimensions: parsedDimensions, + depth: parsedDepth, + timestamp: getUtc8ISOString() +})) +``` + +If not AUTO_YES, present user with configuration for confirmation: + +```javascript +if (!AUTO_YES) { + const answer = AskUserQuestion({ + questions: [{ + question: `Topic: "${topic}"\nPipeline: ${pipeline_mode}\nPerspectives: ${perspectives.join(', ')}\nDimensions: ${dimensions.join(', ')}\n\nApprove?`, + header: "Analysis Configuration", + multiSelect: false, + options: [ + { label: "Approve", description: `Use ${pipeline_mode} mode with ${perspectives.length} perspectives` }, + { label: "Quick", description: "1 explorer → 1 analyst → synthesizer (fast)" }, + { label: "Standard", description: "N explorers → N analysts → discussion → synthesizer" }, + { label: "Deep", description: "N explorers → N analysts → discussion loop (up to 5 rounds) → synthesizer" } + ] + }] + }) +} +``` + +**Success Criteria**: +- Refined requirements available for Phase 1 decomposition +- Interactive agents closed, results stored + +--- + +### Phase 1: Requirement → CSV + Classification + +**Objective**: Build tasks.csv from selected pipeline mode and perspectives. + +**Decomposition Rules**: + +| Pipeline | Tasks | Wave Structure | +|----------|-------|---------------| +| quick | EXPLORE-001 → ANALYZE-001 → SYNTH-001 | 3 waves, serial, depth=1 | +| standard | EXPLORE-001..N → ANALYZE-001..N → DISCUSS-001 → SYNTH-001 | 4 wave groups, parallel explore+analyze | +| deep | EXPLORE-001..N → ANALYZE-001..N → DISCUSS-001 (→ dynamic tasks) → SYNTH-001 | 3+ waves, SYNTH created after discussion loop | + +Where N = number of selected perspectives. + +**Classification Rules**: + +All work tasks (exploration, analysis, discussion processing, synthesis) are `csv-wave`. The discussion feedback gate (user interaction after discussant completes) is `interactive`. + +**Pipeline Task Definitions**: + +#### Quick Pipeline (3 csv-wave tasks) + +| Task ID | Role | Wave | Deps | Perspective | Description | +|---------|------|------|------|-------------|-------------| +| EXPLORE-001 | explorer | 1 | (none) | general | Explore codebase structure for analysis topic | +| ANALYZE-001 | analyst | 2 | EXPLORE-001 | technical | Deep analysis from technical perspective | +| SYNTH-001 | synthesizer | 3 | ANALYZE-001 | (all) | Integrate analysis into final conclusions | + +#### Standard Pipeline (2N+2 tasks, parallel windows) + +| Task ID | Role | Wave | Deps | Perspective | Description | +|---------|------|------|------|-------------|-------------| +| EXPLORE-001..N | explorer | 1 | (none) | per-perspective | Parallel codebase exploration, one per perspective | +| ANALYZE-001..N | analyst | 2 | EXPLORE-N | per-perspective | Parallel deep analysis, one per perspective | +| DISCUSS-001 | discussant | 3 | all ANALYZE-* | (all) | Aggregate analyses, identify themes and conflicts | +| FEEDBACK-001 | (interactive) | 4 | DISCUSS-001 | - | User feedback: done → create SYNTH, continue → more discussion | +| SYNTH-001 | synthesizer | 5 | FEEDBACK-001 | (all) | Cross-perspective integration and conclusions | + +#### Deep Pipeline (2N+1 initial tasks + dynamic) + +Same as Standard, but SYNTH-001 is omitted initially. Created dynamically after the discussion loop (up to 5 rounds) completes. Additional dynamic tasks: +- `DISCUSS-N` — subsequent discussion round +- `ANALYZE-fix-N` — supplementary analysis with adjusted focus +- `SYNTH-001` — created after final discussion round + +**Wave Computation**: Kahn's BFS topological sort with depth tracking (csv-wave tasks only). + +**User Validation**: Display task breakdown with wave + exec_mode assignment (skip if AUTO_YES). + +**Success Criteria**: +- tasks.csv created with valid schema, wave, and exec_mode assignments +- No circular dependencies +- User approved (or AUTO_YES) + +--- + +### Phase 2: Wave Execution Engine (Extended) + +**Objective**: Execute tasks wave-by-wave with hybrid mechanism support and cross-wave context propagation. + +```javascript +const failedIds = new Set() +const skippedIds = new Set() +let discussionRound = 0 +const MAX_DISCUSSION_ROUNDS = pipeline_mode === 'deep' ? 5 : pipeline_mode === 'standard' ? 1 : 0 + +for (let wave = 1; wave <= maxWave; wave++) { + console.log(`\n## Wave ${wave}/${maxWave}\n`) + + // 1. Read current master CSV + const masterCsv = parseCsv(Read(`${sessionFolder}/tasks.csv`)) + + // 2. Separate csv-wave and interactive tasks for this wave + const waveTasks = masterCsv.filter(row => parseInt(row.wave) === wave) + const csvTasks = waveTasks.filter(t => t.exec_mode === 'csv-wave') + const interactiveTasks = waveTasks.filter(t => t.exec_mode === 'interactive') + + // 3. Skip tasks whose deps failed + const executableCsvTasks = [] + for (const task of csvTasks) { + const deps = task.deps.split(';').filter(Boolean) + if (deps.some(d => failedIds.has(d) || skippedIds.has(d))) { + skippedIds.add(task.id) + updateMasterCsvRow(sessionFolder, task.id, { + status: 'skipped', error: 'Dependency failed or skipped' + }) + continue + } + executableCsvTasks.push(task) + } + + // 4. Build prev_context for each csv-wave task + for (const task of executableCsvTasks) { + const contextIds = task.context_from.split(';').filter(Boolean) + const prevFindings = contextIds + .map(id => { + const prevRow = masterCsv.find(r => r.id === id) + if (prevRow && prevRow.status === 'completed' && prevRow.findings) { + return `[Task ${id}: ${prevRow.title}] ${prevRow.findings}` + } + return null + }) + .filter(Boolean) + .join('\n') + task.prev_context = prevFindings || 'No previous context available' + } + + // 5. Write wave CSV and execute csv-wave tasks + if (executableCsvTasks.length > 0) { + const waveHeader = 'id,title,description,role,perspective,dimensions,discussion_round,discussion_type,deps,context_from,exec_mode,wave,prev_context' + const waveRows = executableCsvTasks.map(t => + [t.id, t.title, t.description, t.role, t.perspective, t.dimensions, + t.discussion_round, t.discussion_type, t.deps, t.context_from, t.exec_mode, t.wave, t.prev_context] + .map(cell => `"${String(cell).replace(/"/g, '""')}"`) + .join(',') + ) + Write(`${sessionFolder}/wave-${wave}.csv`, [waveHeader, ...waveRows].join('\n')) + + const waveResult = spawn_agents_on_csv({ + csv_path: `${sessionFolder}/wave-${wave}.csv`, + id_column: "id", + instruction: buildAnalysisInstruction(sessionFolder, wave), + max_concurrency: maxConcurrency, + max_runtime_seconds: 600, + output_csv_path: `${sessionFolder}/wave-${wave}-results.csv`, + output_schema: { + type: "object", + properties: { + id: { type: "string" }, + status: { type: "string", enum: ["completed", "failed"] }, + findings: { type: "string" }, + error: { type: "string" } + }, + required: ["id", "status", "findings"] + } + }) + + // Merge results into master CSV + const waveResults = parseCsv(Read(`${sessionFolder}/wave-${wave}-results.csv`)) + for (const result of waveResults) { + updateMasterCsvRow(sessionFolder, result.id, { + status: result.status, + findings: result.findings || '', + error: result.error || '' + }) + if (result.status === 'failed') failedIds.add(result.id) + } + + Bash(`rm -f "${sessionFolder}/wave-${wave}.csv"`) + } + + // 6. Execute post-wave interactive tasks (Discussion Feedback) + for (const task of interactiveTasks) { + if (task.status !== 'pending') continue + const deps = task.deps.split(';').filter(Boolean) + if (deps.some(d => failedIds.has(d) || skippedIds.has(d))) { + skippedIds.add(task.id) + continue + } + + discussionRound++ + + // Discussion Feedback Gate + if (pipeline_mode === 'quick' || discussionRound > MAX_DISCUSSION_ROUNDS) { + // No discussion or max rounds reached — proceed to synthesis + if (!masterCsv.find(t => t.id === 'SYNTH-001')) { + // Create SYNTH-001 dynamically + const lastDiscuss = masterCsv.filter(t => t.id.startsWith('DISCUSS')) + .sort((a, b) => b.id.localeCompare(a.id))[0] + addTaskToMasterCsv(sessionFolder, { + id: 'SYNTH-001', title: 'Final synthesis', + description: 'Integrate all analysis into final conclusions', + role: 'synthesizer', perspective: '', dimensions: '', + discussion_round: '0', discussion_type: '', + deps: lastDiscuss ? lastDiscuss.id : '', context_from: 'all', + exec_mode: 'csv-wave', wave: String(wave + 1), + status: 'pending', findings: '', error: '' + }) + maxWave = wave + 1 + } + updateMasterCsvRow(sessionFolder, task.id, { + status: 'completed', + findings: `Discussion round ${discussionRound}: proceeding to synthesis` + }) + continue + } + + // Spawn discussion feedback agent + const feedbackAgent = spawn_agent({ + message: ` +## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS (Agent Execute) +1. **Read role definition**: .codex/skills/team-ultra-analyze/agents/discussion-feedback.md (MUST read first) +2. Read: ${sessionFolder}/discoveries.ndjson (shared discoveries) + +--- + +Goal: Collect user feedback on discussion round ${discussionRound} +Session: ${sessionFolder} +Discussion Round: ${discussionRound}/${MAX_DISCUSSION_ROUNDS} +Pipeline Mode: ${pipeline_mode} + +### Context +The discussant has completed round ${discussionRound}. Present the user with discussion results and collect feedback on next direction. +` + }) + + const feedbackResult = wait({ ids: [feedbackAgent], timeout_ms: 300000 }) + if (feedbackResult.timed_out) { + send_input({ id: feedbackAgent, message: "Please finalize: user did not respond, default to 'Done'." }) + wait({ ids: [feedbackAgent], timeout_ms: 60000 }) + } + close_agent({ id: feedbackAgent }) + + // Parse feedback decision: "continue_deeper" | "adjust_direction" | "done" + Write(`${sessionFolder}/interactive/${task.id}-result.json`, JSON.stringify({ + task_id: task.id, status: "completed", + discussion_round: discussionRound, + feedback: feedbackDecision, + timestamp: getUtc8ISOString() + })) + + // Handle feedback + if (feedbackDecision === 'done') { + // Create SYNTH-001 blocked by last DISCUSS task + addTaskToMasterCsv(sessionFolder, { + id: 'SYNTH-001', deps: task.id.replace('FEEDBACK', 'DISCUSS'), + role: 'synthesizer', exec_mode: 'csv-wave', wave: String(wave + 1) + }) + maxWave = wave + 1 + } else if (feedbackDecision === 'adjust_direction') { + // Create ANALYZE-fix-N and DISCUSS-N+1 + const fixId = `ANALYZE-fix-${discussionRound}` + const nextDiscussId = `DISCUSS-${String(discussionRound + 1).padStart(3, '0')}` + addTaskToMasterCsv(sessionFolder, { + id: fixId, role: 'analyst', exec_mode: 'csv-wave', wave: String(wave + 1) + }) + addTaskToMasterCsv(sessionFolder, { + id: nextDiscussId, role: 'discussant', deps: fixId, + exec_mode: 'csv-wave', wave: String(wave + 2) + }) + addTaskToMasterCsv(sessionFolder, { + id: `FEEDBACK-${String(discussionRound + 1).padStart(3, '0')}`, + exec_mode: 'interactive', deps: nextDiscussId, wave: String(wave + 3) + }) + maxWave = wave + 3 + } else { + // continue_deeper: Create DISCUSS-N+1 + const nextDiscussId = `DISCUSS-${String(discussionRound + 1).padStart(3, '0')}` + addTaskToMasterCsv(sessionFolder, { + id: nextDiscussId, role: 'discussant', exec_mode: 'csv-wave', wave: String(wave + 1) + }) + addTaskToMasterCsv(sessionFolder, { + id: `FEEDBACK-${String(discussionRound + 1).padStart(3, '0')}`, + exec_mode: 'interactive', deps: nextDiscussId, wave: String(wave + 2) + }) + maxWave = wave + 2 + } + + updateMasterCsvRow(sessionFolder, task.id, { + status: 'completed', + findings: `Discussion feedback: ${feedbackDecision}, round ${discussionRound}` + }) + } +} +``` + +**Success Criteria**: +- All waves executed in order +- Both csv-wave and interactive tasks handled per wave +- Each wave's results merged into master CSV before next wave starts +- Dependent tasks skipped when predecessor failed +- discoveries.ndjson accumulated across all waves and mechanisms +- Discussion loop controlled with proper round tracking +- Dynamic tasks created correctly based on user feedback + +--- + +### Phase 3: Post-Wave Interactive + +**Objective**: Handle discussion loop completion and ensure synthesis is triggered. + +After all discussion rounds are exhausted or user chooses "done": +1. Ensure SYNTH-001 exists in master CSV +2. Ensure SYNTH-001 is unblocked (blocked by last completed discussion task) +3. Execute remaining waves (synthesis) + +**Success Criteria**: +- Post-wave interactive processing complete +- Interactive agents closed, results stored + +--- + +### Phase 4: Results Aggregation + +**Objective**: Generate final results and human-readable report. + +```javascript +const masterCsv = Read(`${sessionFolder}/tasks.csv`) +Write(`${sessionFolder}/results.csv`, masterCsv) + +const tasks = parseCsv(masterCsv) +const completed = tasks.filter(t => t.status === 'completed') +const failed = tasks.filter(t => t.status === 'failed') +const skipped = tasks.filter(t => t.status === 'skipped') + +const contextContent = `# Ultra Analyze Report + +**Session**: ${sessionId} +**Topic**: ${topic} +**Pipeline**: ${pipeline_mode} +**Perspectives**: ${perspectives.join(', ')} +**Discussion Rounds**: ${discussionRound} +**Completed**: ${getUtc8ISOString()} + +--- + +## Summary + +| Metric | Count | +|--------|-------| +| Total Tasks | ${tasks.length} | +| Completed | ${completed.length} | +| Failed | ${failed.length} | +| Skipped | ${skipped.length} | +| Discussion Rounds | ${discussionRound} | + +--- + +## Wave Execution + +${waveDetails} + +--- + +## Analysis Artifacts + +- Explorations: discoveries with type "exploration" in discoveries.ndjson +- Analyses: discoveries with type "analysis" in discoveries.ndjson +- Discussion: discoveries with type "discussion" in discoveries.ndjson +- Conclusions: discoveries with type "conclusion" in discoveries.ndjson + +--- + +## Conclusions + +${synthesisFindings} +` + +Write(`${sessionFolder}/context.md`, contextContent) +``` + +If not AUTO_YES, offer completion options: + +```javascript +if (!AUTO_YES) { + const answer = AskUserQuestion({ + questions: [{ + question: "Ultra-Analyze pipeline complete. What would you like to do?", + header: "Completion", + multiSelect: false, + options: [ + { label: "Archive & Clean (Recommended)", description: "Archive session" }, + { label: "Keep Active", description: "Keep session for follow-up" }, + { label: "Export Results", description: "Export deliverables to specified location" } + ] + }] + }) +} +``` + +**Success Criteria**: +- results.csv exported (all tasks, both modes) +- context.md generated +- All interactive agents closed +- Summary displayed to user + +--- + +## Shared Discovery Board Protocol + +All agents across all waves share `discoveries.ndjson`. This enables cross-role knowledge sharing. + +**Discovery Types**: + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `exploration` | `data.perspective+data.file` | `{perspective, file, relevance, summary, patterns[]}` | Explored file/module | +| `analysis` | `data.perspective+data.insight` | `{perspective, insight, confidence, evidence, file_ref}` | Analysis insight | +| `pattern` | `data.name` | `{name, file, description, type}` | Code/architecture pattern | +| `discussion_point` | `data.topic` | `{topic, perspectives[], convergence, open_questions[]}` | Discussion point | +| `recommendation` | `data.action` | `{action, rationale, priority, confidence}` | Recommendation | +| `conclusion` | `data.point` | `{point, evidence, confidence, perspectives_supporting[]}` | Final conclusion | + +**Format**: NDJSON, each line is self-contained JSON: + +```jsonl +{"ts":"2026-03-08T10:00:00+08:00","worker":"EXPLORE-001","type":"exploration","data":{"perspective":"technical","file":"src/auth/index.ts","relevance":"high","summary":"Auth module entry point with OAuth and JWT exports","patterns":["module-pattern","strategy-pattern"]}} +{"ts":"2026-03-08T10:05:00+08:00","worker":"ANALYZE-001","type":"analysis","data":{"perspective":"technical","insight":"Auth module uses strategy pattern for provider switching","confidence":"high","evidence":"src/auth/strategies/*.ts","file_ref":"src/auth/index.ts:15"}} +{"ts":"2026-03-08T10:10:00+08:00","worker":"DISCUSS-001","type":"discussion_point","data":{"topic":"Authentication scalability","perspectives":["technical","architectural"],"convergence":"Both perspectives agree on stateless JWT approach","open_questions":["Token refresh strategy for long sessions"]}} +``` + +**Protocol Rules**: +1. Read board before own exploration → skip covered areas +2. Write discoveries immediately via `echo >>` → don't batch +3. Deduplicate — check existing entries by type + dedup key +4. Append-only — never modify or delete existing lines + +--- + +## Error Handling + +| Error | Resolution | +|-------|------------| +| Circular dependency | Detect in wave computation, abort with error message | +| CSV agent timeout | Mark as failed in results, continue with wave | +| CSV agent failed | Mark as failed, skip dependent tasks in later waves | +| Interactive agent timeout | Urge convergence via send_input, then close if still timed out | +| Interactive agent failed | Mark as failed, skip dependents | +| All agents in wave failed | Log error, offer retry or abort | +| CSV parse error | Validate CSV format before execution, show line number | +| discoveries.ndjson corrupt | Ignore malformed lines, continue with valid entries | +| Discussion loop exceeds 5 rounds | Force synthesis, offer continuation | +| Explorer finds nothing | Continue with limited context, note limitation | +| CLI tool unavailable | Fallback chain: gemini → codex → direct analysis | +| User timeout in discussion | Save state, default to "done", proceed to synthesis | +| Continue mode: no session found | List available sessions, prompt user to select | + +--- + +## Core Rules + +1. **Start Immediately**: First action is session initialization, then Phase 0/1 +2. **Wave Order is Sacred**: Never execute wave N before wave N-1 completes and results are merged +3. **CSV is Source of Truth**: Master tasks.csv holds all state (both csv-wave and interactive) +4. **CSV First**: Default to csv-wave for tasks; only use interactive when user interaction is needed +5. **Context Propagation**: prev_context built from master CSV, not from memory +6. **Discovery Board is Append-Only**: Never clear, modify, or recreate discoveries.ndjson — both mechanisms share it +7. **Skip on Failure**: If a dependency failed, skip the dependent task (regardless of mechanism) +8. **Lifecycle Balance**: Every spawn_agent MUST have a matching close_agent +9. **Cleanup Temp Files**: Remove wave-{N}.csv after results are merged +10. **DO NOT STOP**: Continuous execution until all waves complete or all remaining tasks are skipped diff --git a/.codex/skills/team-ultra-analyze/agents/discussion-feedback.md b/.codex/skills/team-ultra-analyze/agents/discussion-feedback.md new file mode 100644 index 00000000..728889fb --- /dev/null +++ b/.codex/skills/team-ultra-analyze/agents/discussion-feedback.md @@ -0,0 +1,155 @@ +# Discussion Feedback Agent + +Collect user feedback after a discussion round and determine next action for the analysis pipeline. + +## Identity + +- **Type**: `interactive` +- **Responsibility**: User feedback collection and discussion loop control + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Present discussion results to the user clearly +- Collect explicit user feedback via AskUserQuestion +- Return structured decision for orchestrator to act on +- Respect max discussion round limits + +### MUST NOT + +- Perform analysis or exploration (delegate to csv-wave agents) +- Create tasks directly (orchestrator handles dynamic task creation) +- Skip user interaction (this is the user-in-the-loop checkpoint) +- Exceed the configured max discussion rounds + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | builtin | Load discussion results and session state | +| `AskUserQuestion` | builtin | Collect user feedback on discussion | + +--- + +## Execution + +### Phase 1: Context Loading + +**Objective**: Load discussion results for user presentation + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Session folder | Yes | Path to session directory | +| Discussion round | Yes | Current round number | +| Max discussion rounds | Yes | Maximum allowed rounds | +| Pipeline mode | Yes | quick, standard, or deep | + +**Steps**: + +1. Read the session's discoveries.ndjson for discussion_point entries +2. Parse prev_context for the discussant's findings +3. Extract key themes, conflicts, and open questions from findings +4. Load current discussion_round from spawn message + +**Output**: Discussion summary ready for user presentation + +--- + +### Phase 2: User Feedback Collection + +**Objective**: Present results and collect next-step decision + +**Steps**: + +1. Format discussion summary for user: + - Convergent themes identified + - Conflicting views between perspectives + - Top open questions + - Round progress (current/max) + +2. Present options via AskUserQuestion: + +``` +AskUserQuestion({ + questions: [{ + question: "Discussion round / complete.\n\nThemes: \nConflicts: \nOpen Questions: \n\nWhat next?", + header: "Discussion Feedback", + multiSelect: false, + options: [ + { label: "Continue deeper", description: "Current direction is good, investigate open questions deeper" }, + { label: "Adjust direction", description: "Shift analysis focus to a different area" }, + { label: "Done", description: "Sufficient depth reached, proceed to final synthesis" } + ] + }] +}) +``` + +3. If user chooses "Adjust direction": + - Follow up with another AskUserQuestion asking for the new focus area + - Capture the adjusted focus text + +**Output**: User decision and optional adjusted focus + +--- + +### Phase 3: Decision Formatting + +**Objective**: Package user decision for orchestrator + +**Steps**: + +1. Map user choice to decision string: + +| User Choice | Decision | Additional Data | +|------------|----------|-----------------| +| Continue deeper | `continue_deeper` | None | +| Adjust direction | `adjust_direction` | `adjusted_focus: ` | +| Done | `done` | None | + +2. Format structured output for orchestrator + +**Output**: Structured decision + +--- + +## Structured Output Template + +``` +## Summary +- Discussion Round: / +- User Decision: continue_deeper | adjust_direction | done + +## Discussion Summary Presented +- Themes: +- Conflicts: +- Open Questions: + +## Decision Details +- Decision: +- Adjusted Focus: +- Rationale: + +## Next Action (for orchestrator) +- continue_deeper: Create DISCUSS- task, then FEEDBACK- +- adjust_direction: Create ANALYZE-fix- task, then DISCUSS-, then FEEDBACK- +- done: Create SYNTH-001 task blocked by last DISCUSS task +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| User does not respond | After timeout, default to "done" and proceed to synthesis | +| Max rounds reached | Inform user this is the final round, only offer "Done" option | +| No discussion data found | Present what is available, note limitations | +| Timeout approaching | Output current state with default "done" decision | diff --git a/.codex/skills/team-ultra-analyze/agents/topic-analyzer.md b/.codex/skills/team-ultra-analyze/agents/topic-analyzer.md new file mode 100644 index 00000000..670e8fb8 --- /dev/null +++ b/.codex/skills/team-ultra-analyze/agents/topic-analyzer.md @@ -0,0 +1,153 @@ +# Topic Analyzer Agent + +Parse analysis topic, detect dimensions, select pipeline mode, and assign perspectives. + +## Identity + +- **Type**: `interactive` +- **Responsibility**: Topic analysis and pipeline configuration + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Perform text-level analysis only (no source code reading) +- Produce structured output with pipeline configuration +- Detect dimensions from topic keywords +- Recommend appropriate perspectives for the topic + +### MUST NOT + +- Read source code or explore codebase (that is the explorer's job) +- Perform any analysis (that is the analyst's job) +- Make final pipeline decisions without providing rationale + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | builtin | Load project context if available | + +--- + +## Execution + +### Phase 1: Dimension Detection + +**Objective**: Scan topic keywords to identify analysis dimensions + +**Input**: + +| Source | Required | Description | +|--------|----------|-------------| +| Topic text | Yes | The analysis topic from user | +| Explicit mode | No | --mode override if provided | + +**Steps**: + +1. Scan topic for dimension keywords: + +| Dimension | Keywords | +|-----------|----------| +| architecture | architecture, design, structure | +| implementation | implement, code, source | +| performance | performance, optimize, speed | +| security | security, auth, vulnerability | +| concept | concept, theory, principle | +| comparison | compare, vs, difference | +| decision | decision, choice, tradeoff | + +2. Select matching dimensions (default to general if none match) + +**Output**: List of detected dimensions + +--- + +### Phase 2: Pipeline Mode Selection + +**Objective**: Determine pipeline mode and depth + +**Steps**: + +1. If explicit `--mode` provided, use it directly +2. Otherwise, auto-detect from complexity scoring: + +| Factor | Points | +|--------|--------| +| Per detected dimension | +1 | +| Deep-mode keywords (deep, thorough, detailed, comprehensive) | +2 | +| Cross-domain (3+ dimensions) | +1 | + +| Score | Pipeline Mode | +|-------|--------------| +| 1-3 | quick | +| 4-6 | standard | +| 7+ | deep | + +3. Determine depth = number of selected perspectives + +**Output**: Pipeline mode and depth + +--- + +### Phase 3: Perspective Assignment + +**Objective**: Select analysis perspectives based on topic and dimensions + +**Steps**: + +1. Map dimensions to perspectives: + +| Dimension Match | Perspective | Focus | +|----------------|-------------|-------| +| architecture, implementation | technical | Implementation details, code patterns | +| architecture, security | architectural | System design, scalability | +| concept, comparison, decision | business | Value, ROI, strategy | +| domain-specific keywords | domain_expert | Domain patterns, standards | + +2. Quick mode: always 1 perspective (technical by default) +3. Standard/Deep mode: 2-4 perspectives based on dimension coverage + +**Output**: List of perspectives with focus areas + +--- + +## Structured Output Template + +``` +## Summary +- Topic: +- Pipeline Mode: +- Depth: + +## Dimension Detection +- Detected dimensions: +- Complexity score: + +## Perspectives +1. : +2. : + +## Discussion Configuration +- Max discussion rounds: <0|1|5> + +## Pipeline Structure +- Total tasks: +- Parallel stages: +- Dynamic tasks possible: +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| Topic too vague | Suggest clarifying questions, default to standard mode | +| No dimension matches | Default to "general" dimension with technical perspective | +| Timeout approaching | Output current analysis with "PARTIAL" status | diff --git a/.codex/skills/team-ultra-analyze/instructions/agent-instruction.md b/.codex/skills/team-ultra-analyze/instructions/agent-instruction.md new file mode 100644 index 00000000..32e54cba --- /dev/null +++ b/.codex/skills/team-ultra-analyze/instructions/agent-instruction.md @@ -0,0 +1,169 @@ +## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS +1. Read shared discoveries: .workflow/.csv-wave/{session-id}/discoveries.ndjson (if exists, skip if not) +2. Read project context: .workflow/project-tech.json (if exists) + +--- + +## Your Task + +**Task ID**: {id} +**Title**: {title} +**Role**: {role} +**Description**: {description} +**Perspective**: {perspective} +**Dimensions**: {dimensions} +**Discussion Round**: {discussion_round} +**Discussion Type**: {discussion_type} + +### Previous Tasks' Findings (Context) +{prev_context} + +--- + +## Execution Protocol + +1. **Read discoveries**: Load shared discoveries from the session's discoveries.ndjson for cross-task context +2. **Use context**: Apply previous tasks' findings from prev_context above +3. **Execute by role**: + +### Role: explorer (EXPLORE-* tasks) +Explore codebase structure from the assigned perspective, collecting structured context for downstream analysis. + +- Determine exploration strategy by perspective: + +| Perspective | Focus | Search Depth | +|-------------|-------|-------------| +| general | Overall codebase structure and patterns | broad | +| technical | Implementation details, code patterns, feasibility | medium | +| architectural | System design, module boundaries, interactions | broad | +| business | Business logic, domain models, value flows | medium | +| domain_expert | Domain patterns, standards, best practices | deep | + +- Use available tools (Read, Glob, Grep, Bash) to search the codebase +- Collect: relevant files (path, relevance, summary), code patterns, key findings, module relationships +- Generate questions for downstream analysis +- Focus exploration on the dimensions listed in the Dimensions field + +### Role: analyst (ANALYZE-* tasks) +Perform deep analysis on exploration results from the assigned perspective. + +- Load exploration results from prev_context +- Detect if this is a direction-fix task (description mentions "adjusted focus"): + - Normal: analyze from assigned perspective using corresponding exploration results + - Direction-fix: re-analyze from adjusted perspective using all available explorations + +- Select analysis approach by perspective: + +| Perspective | CLI Tool | Focus | +|-------------|----------|-------| +| technical | gemini | Implementation patterns, code quality, feasibility | +| architectural | gemini | System design, scalability, component interactions | +| business | gemini | Value, ROI, stakeholder impact | +| domain_expert | gemini | Domain-specific patterns, best practices | + +- Use `ccw cli` for deep analysis: + ```bash + ccw cli -p "PURPOSE: Deep analysis of '' from perspective + TASK: • Analyze patterns found in exploration • Generate insights with confidence levels • Identify discussion points + MODE: analysis + CONTEXT: @**/* | Memory: Exploration findings + EXPECTED: Structured insights with confidence levels and evidence" --tool gemini --mode analysis + ``` + +- Generate structured output: + - key_insights: [{insight, confidence (high/medium/low), evidence (file:line)}] + - key_findings: [{finding, file_ref, impact}] + - discussion_points: [questions needing cross-perspective discussion] + - open_questions: [areas needing further exploration] + - recommendations: [{action, rationale, priority}] + +### Role: discussant (DISCUSS-* tasks) +Process analysis results and generate discussion summary. Strategy depends on discussion type. + +- **initial**: Cross-perspective aggregation + - Aggregate all analysis results from prev_context + - Identify convergent themes across perspectives + - Identify conflicting views between perspectives + - Generate top 5 discussion points and open questions + - Produce structured round summary + +- **deepen**: Deep investigation of open questions + - Use CLI tool to investigate uncertain insights: + ```bash + ccw cli -p "PURPOSE: Investigate open questions and uncertain insights + TASK: • Focus on questions from previous round • Find supporting evidence • Validate uncertain insights + MODE: analysis + CONTEXT: @**/* + EXPECTED: Evidence-based findings" --tool gemini --mode analysis + ``` + +- **direction-adjusted**: Re-analysis from adjusted focus + - Use CLI to re-analyze from adjusted perspective based on user feedback + +- **specific-questions**: Targeted Q&A + - Use CLI for targeted investigation of user-specified questions + +- For all types, produce round summary: + - updated_understanding: {confirmed[], corrected[], new_insights[]} + - convergent themes, conflicting views + - remaining open questions + +### Role: synthesizer (SYNTH-* tasks) +Integrate all explorations, analyses, and discussions into final conclusions. + +- Read all available artifacts from prev_context (explorations, analyses, discussions) +- Execute synthesis in four steps: + 1. **Theme Extraction**: Identify convergent themes across perspectives, rank by cross-perspective confirmation + 2. **Conflict Resolution**: Identify contradictions, present trade-off analysis + 3. **Evidence Consolidation**: Deduplicate findings, aggregate by file reference, assign confidence levels + 4. **Recommendation Prioritization**: Sort by priority, deduplicate, cap at 10 + +- Confidence levels: + +| Level | Criteria | +|-------|----------| +| High | Multiple sources confirm, strong evidence | +| Medium | Single source or partial evidence | +| Low | Speculative, needs verification | + +- Produce final conclusions: + - Executive summary + - Key conclusions with evidence and confidence + - Prioritized recommendations + - Open questions + - Cross-perspective synthesis (convergent themes, conflicts resolved, unique contributions) + +4. **Share discoveries**: Append exploration findings to shared board: + ```bash + echo '{"ts":"","worker":"{id}","type":"","data":{...}}' >> .workflow/.csv-wave/{session-id}/discoveries.ndjson + ``` + + Discovery types to share: + - `exploration`: {perspective, file, relevance, summary, patterns[]} — explored file/module + - `analysis`: {perspective, insight, confidence, evidence, file_ref} — analysis insight + - `pattern`: {name, file, description, type} — code/architecture pattern + - `discussion_point`: {topic, perspectives[], convergence, open_questions[]} — discussion point + - `recommendation`: {action, rationale, priority, confidence} — recommendation + - `conclusion`: {point, evidence, confidence, perspectives_supporting[]} — final conclusion + +5. **Report result**: Return JSON via report_agent_job_result + +--- + +## Output (report_agent_job_result) + +Return JSON: +{ + "id": "{id}", + "status": "completed" | "failed", + "findings": "Key discoveries and implementation notes (max 500 chars)", + "error": "" +} + +**Role-specific findings guidance**: +- **explorer**: List file count, key files, patterns found. Example: "Found 12 files related to auth. Key: src/auth/index.ts (entry), src/auth/strategies/*.ts (providers). Patterns: strategy, middleware chain." +- **analyst**: List insight count, top insights with confidence. Example: "3 insights: (1) Strategy pattern for providers [high], (2) Missing token rotation [medium], (3) No rate limiting [high]. 2 discussion points." +- **discussant**: List themes, conflicts, question count. Example: "Convergent: JWT security (2 perspectives). Conflict: middleware approach. 3 open questions on refresh tokens." +- **synthesizer**: List conclusion count, top recommendations. Example: "5 conclusions, 4 recommendations. Top: Implement refresh token rotation [high priority, high confidence]." diff --git a/.codex/skills/team-ultra-analyze/schemas/tasks-schema.md b/.codex/skills/team-ultra-analyze/schemas/tasks-schema.md new file mode 100644 index 00000000..9e1b686a --- /dev/null +++ b/.codex/skills/team-ultra-analyze/schemas/tasks-schema.md @@ -0,0 +1,180 @@ +# Team Ultra Analyze — CSV Schema + +## Master CSV: tasks.csv + +### Column Definitions + +#### Input Columns (Set by Decomposer) + +| Column | Type | Required | Description | Example | +|--------|------|----------|-------------|---------| +| `id` | string | Yes | Unique task identifier | `"EXPLORE-001"` | +| `title` | string | Yes | Short task title | `"Explore from technical perspective"` | +| `description` | string | Yes | Detailed task description (self-contained) | `"Search codebase from technical perspective..."` | +| `role` | string | Yes | Worker role: explorer, analyst, discussant, synthesizer | `"explorer"` | +| `perspective` | string | No | Analysis perspective: technical, architectural, business, domain_expert | `"technical"` | +| `dimensions` | string | No | Analysis dimensions (semicolon-separated) | `"architecture;implementation"` | +| `discussion_round` | integer | No | Discussion round number (0 = N/A, 1+ = round) | `"1"` | +| `discussion_type` | string | No | Discussion type: initial, deepen, direction-adjusted, specific-questions | `"initial"` | +| `deps` | string | No | Semicolon-separated dependency task IDs | `"EXPLORE-001"` | +| `context_from` | string | No | Semicolon-separated task IDs for context | `"EXPLORE-001"` | +| `exec_mode` | enum | Yes | Execution mechanism: `csv-wave` or `interactive` | `"csv-wave"` | + +#### Computed Columns (Set by Wave Engine) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `wave` | integer | Wave number (1-based, from topological sort) | `2` | +| `prev_context` | string | Aggregated findings from context_from tasks (per-wave CSV only) | `"[Task EXPLORE-001] Found 12 relevant files..."` | + +#### Output Columns (Set by Agent) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `status` | enum | `pending` → `completed` / `failed` / `skipped` | `"completed"` | +| `findings` | string | Key discoveries (max 500 chars) | `"Found 12 files related to auth module..."` | +| `error` | string | Error message if failed | `""` | + +--- + +### exec_mode Values + +| Value | Mechanism | Description | +|-------|-----------|-------------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot batch execution within wave | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round individual execution | + +Interactive tasks appear in master CSV for dependency tracking but are NOT included in wave-{N}.csv files. + +--- + +### Example Data + +```csv +id,title,description,role,perspective,dimensions,discussion_round,discussion_type,deps,context_from,exec_mode,wave,status,findings,error +"EXPLORE-001","Explore from technical perspective","Search codebase from technical perspective. Collect files, patterns, and findings related to authentication module.","explorer","technical","architecture;implementation","0","","","","csv-wave","1","pending","","" +"EXPLORE-002","Explore from architectural perspective","Search codebase from architectural perspective. Focus on module boundaries, component interactions, and system design patterns.","explorer","architectural","architecture;security","0","","","","csv-wave","1","pending","","" +"ANALYZE-001","Deep analysis from technical perspective","Analyze exploration results from technical perspective. Generate insights with confidence levels and evidence references.","analyst","technical","architecture;implementation","0","","EXPLORE-001","EXPLORE-001","csv-wave","2","pending","","" +"ANALYZE-002","Deep analysis from architectural perspective","Analyze exploration results from architectural perspective. Focus on system design quality and scalability.","analyst","architectural","architecture;security","0","","EXPLORE-002","EXPLORE-002","csv-wave","2","pending","","" +"DISCUSS-001","Initial discussion round","Aggregate all analysis results across perspectives. Identify convergent themes, conflicting views, and top discussion points.","discussant","","","1","initial","ANALYZE-001;ANALYZE-002","ANALYZE-001;ANALYZE-002","csv-wave","3","pending","","" +"FEEDBACK-001","Discussion feedback gate","Collect user feedback on discussion results. Decide: continue deeper, adjust direction, or proceed to synthesis.","","","","1","","DISCUSS-001","DISCUSS-001","interactive","4","pending","","" +"SYNTH-001","Final synthesis","Integrate all explorations, analyses, and discussions into final conclusions with prioritized recommendations.","synthesizer","","","0","","FEEDBACK-001","EXPLORE-001;EXPLORE-002;ANALYZE-001;ANALYZE-002;DISCUSS-001","csv-wave","5","pending","","" +``` + +--- + +### Column Lifecycle + +``` +Decomposer (Phase 1) Wave Engine (Phase 2) Agent (Execution) +───────────────────── ──────────────────── ───────────────── +id ───────────► id ──────────► id +title ───────────► title ──────────► (reads) +description ───────────► description ──────────► (reads) +role ───────────► role ──────────► (reads) +perspective ───────────► perspective ──────────► (reads) +dimensions ───────────► dimensions ──────────► (reads) +discussion_round ──────► discussion_round ─────► (reads) +discussion_type ───────► discussion_type ──────► (reads) +deps ───────────► deps ──────────► (reads) +context_from───────────► context_from──────────► (reads) +exec_mode ───────────► exec_mode ──────────► (reads) + wave ──────────► (reads) + prev_context ──────────► (reads) + status + findings + error +``` + +--- + +## Output Schema (JSON) + +Agent output via `report_agent_job_result` (csv-wave tasks): + +```json +{ + "id": "EXPLORE-001", + "status": "completed", + "findings": "Found 12 files related to auth module. Key files: src/auth/index.ts, src/auth/strategies/*.ts. Patterns: strategy pattern for provider switching, middleware chain for request validation.", + "error": "" +} +``` + +Analyst output: + +```json +{ + "id": "ANALYZE-001", + "status": "completed", + "findings": "3 key insights: (1) Auth uses strategy pattern [high confidence], (2) JWT validation lacks refresh token rotation [medium], (3) Rate limiting missing on auth endpoints [high]. 2 discussion points identified.", + "error": "" +} +``` + +Discussant output: + +```json +{ + "id": "DISCUSS-001", + "status": "completed", + "findings": "Convergent themes: JWT security concerns (2 perspectives agree), strategy pattern approval. Conflicts: architectural vs technical on middleware approach. Top questions: refresh token strategy, rate limit placement.", + "error": "" +} +``` + +Interactive tasks output via structured text or JSON written to `interactive/{id}-result.json`. + +--- + +## Discovery Types + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `exploration` | `data.perspective+data.file` | `{perspective, file, relevance, summary, patterns[]}` | Explored file or module | +| `analysis` | `data.perspective+data.insight` | `{perspective, insight, confidence, evidence, file_ref}` | Analysis insight | +| `pattern` | `data.name` | `{name, file, description, type}` | Code or architecture pattern | +| `discussion_point` | `data.topic` | `{topic, perspectives[], convergence, open_questions[]}` | Discussion point | +| `recommendation` | `data.action` | `{action, rationale, priority, confidence}` | Recommendation | +| `conclusion` | `data.point` | `{point, evidence, confidence, perspectives_supporting[]}` | Final conclusion | + +### Discovery NDJSON Format + +```jsonl +{"ts":"2026-03-08T10:00:00+08:00","worker":"EXPLORE-001","type":"exploration","data":{"perspective":"technical","file":"src/auth/index.ts","relevance":"high","summary":"Auth module entry point with OAuth and JWT exports","patterns":["module-pattern","strategy-pattern"]}} +{"ts":"2026-03-08T10:01:00+08:00","worker":"EXPLORE-001","type":"pattern","data":{"name":"strategy-pattern","file":"src/auth/strategies/","description":"Provider switching via strategy pattern","type":"behavioral"}} +{"ts":"2026-03-08T10:05:00+08:00","worker":"ANALYZE-001","type":"analysis","data":{"perspective":"technical","insight":"JWT validation lacks refresh token rotation","confidence":"medium","evidence":"No rotation logic in src/auth/jwt/verify.ts","file_ref":"src/auth/jwt/verify.ts:42"}} +{"ts":"2026-03-08T10:10:00+08:00","worker":"DISCUSS-001","type":"discussion_point","data":{"topic":"JWT Security","perspectives":["technical","architectural"],"convergence":"Both agree on rotation need","open_questions":["Sliding vs fixed window?"]}} +{"ts":"2026-03-08T10:15:00+08:00","worker":"SYNTH-001","type":"conclusion","data":{"point":"Auth module needs refresh token rotation","evidence":"src/auth/jwt/verify.ts lacks rotation","confidence":"high","perspectives_supporting":["technical","architectural"]}} +``` + +> Both csv-wave and interactive agents read/write the same discoveries.ndjson file. + +--- + +## Cross-Mechanism Context Flow + +| Source | Target | Mechanism | +|--------|--------|-----------| +| CSV task findings | Interactive task | Injected via spawn message or send_input | +| Interactive task result | CSV task prev_context | Read from interactive/{id}-result.json | +| Any agent discovery | Any agent | Shared via discoveries.ndjson | + +--- + +## Validation Rules + +| Rule | Check | Error | +|------|-------|-------| +| Unique IDs | No duplicate `id` values | "Duplicate task ID: {id}" | +| Valid deps | All dep IDs exist in tasks | "Unknown dependency: {dep_id}" | +| No self-deps | Task cannot depend on itself | "Self-dependency: {id}" | +| No circular deps | Topological sort completes | "Circular dependency detected involving: {ids}" | +| context_from valid | All context IDs exist and in earlier waves | "Invalid context_from: {id}" | +| exec_mode valid | Value is `csv-wave` or `interactive` | "Invalid exec_mode: {value}" | +| Description non-empty | Every task has description | "Empty description for task: {id}" | +| Status enum | status in {pending, completed, failed, skipped} | "Invalid status: {status}" | +| Valid role | role in {explorer, analyst, discussant, synthesizer} | "Invalid role: {role}" | +| Valid perspective | perspective in {technical, architectural, business, domain_expert, general, ""} | "Invalid perspective: {value}" | +| Discussion round non-negative | discussion_round >= 0 | "Invalid discussion_round: {value}" | +| Cross-mechanism deps | Interactive→CSV deps resolve correctly | "Cross-mechanism dependency unresolvable: {id}" | diff --git a/.codex/skills/team-ux-improve/SKILL.md b/.codex/skills/team-ux-improve/SKILL.md new file mode 100644 index 00000000..ca895689 --- /dev/null +++ b/.codex/skills/team-ux-improve/SKILL.md @@ -0,0 +1,601 @@ +--- +name: team-ux-improve +description: Systematic UX improvement pipeline. Discovers and fixes UI/UX interaction issues including unresponsive buttons, missing feedback, and state refresh problems using scan->diagnose->design->implement->test workflow. +argument-hint: "[-y|--yes] [-c|--concurrency N] [--continue] \" [--framework react|vue]\"" +allowed-tools: spawn_agents_on_csv, spawn_agent, wait, send_input, close_agent, Read, Write, Edit, Bash, Glob, Grep, AskUserQuestion +--- + +## Auto Mode + +When `--yes` or `-y`: Auto-confirm task decomposition, skip interactive validation, use defaults. + +# UX Improvement Pipeline + +## Usage + +```bash +$team-ux-improve "ccw/frontend --framework react" +$team-ux-improve -c 4 "src/components" +$team-ux-improve -y "app/ui --framework vue" +$team-ux-improve --continue "ux-improve-1709856000" +``` + +**Flags**: +- `-y, --yes`: Skip all confirmations (auto mode) +- `-c, --concurrency N`: Max concurrent agents within each wave (default: 4) +- `--continue`: Resume existing session +- `--framework react|vue`: Specify UI framework (auto-detected if omitted) + +**Output Directory**: `.workflow/.csv-wave/{session-id}/` +**Core Output**: `tasks.csv` (master state) + `results.csv` (final) + `discoveries.ndjson` (shared exploration) + `context.md` (human-readable report) + +--- + +## Overview + +Systematic UX improvement workflow that discovers UI/UX interaction issues (unresponsive buttons, missing feedback, state refresh problems) and fixes them methodically. The pipeline scans for issues, diagnoses root causes, designs solutions, implements fixes, and validates with tests. + +**Execution Model**: Hybrid — CSV wave pipeline (primary) + individual agent spawn (secondary) + +``` +┌─────────────────────────────────────────────────────────────────────────┐ +│ UX IMPROVEMENT PIPELINE WORKFLOW │ +├─────────────────────────────────────────────────────────────────────────┤ +│ │ +│ Phase 0: Framework Detection & Exploration (Interactive) │ +│ ├─ Detect UI framework (React/Vue/etc.) │ +│ ├─ Explore component patterns and conventions │ +│ ├─ Build component inventory │ +│ └─ Output: exploration cache for downstream tasks │ +│ │ +│ Phase 1: Requirement → CSV + Classification │ +│ ├─ Generate scan->diagnose->design->implement->test task chain │ +│ ├─ Classify tasks: csv-wave (scan/implement) | interactive (design) │ +│ ├─ Compute dependency waves (topological sort → depth grouping) │ +│ ├─ Generate tasks.csv with wave + exec_mode columns │ +│ └─ User validates task breakdown (skip if -y) │ +│ │ +│ Phase 2: Wave Execution Engine (Extended) │ +│ ├─ For each wave (1..N): │ +│ │ ├─ Execute pre-wave interactive tasks (design) │ +│ │ ├─ Build wave CSV (filter csv-wave tasks for this wave) │ +│ │ ├─ Inject previous findings into prev_context column │ +│ │ ├─ spawn_agents_on_csv(wave CSV) │ +│ │ ├─ Execute post-wave interactive tasks (testing) │ +│ │ ├─ Merge all results into master tasks.csv │ +│ │ └─ Check: any failed? → skip dependents │ +│ └─ discoveries.ndjson shared across all modes (append-only) │ +│ │ +│ Phase 3: Results Aggregation │ +│ ├─ Export final results.csv │ +│ ├─ Generate context.md with all findings │ +│ ├─ Display summary: issues found/fixed, test pass rate │ +│ └─ Offer: view results | retry failed | done │ +│ │ +└─────────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## Task Classification Rules + +Each task is classified by `exec_mode`: + +| exec_mode | Mechanism | Criteria | +|-----------|-----------|----------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot, structured I/O, no multi-round interaction | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round, clarification, inline utility | + +**Classification Decision**: + +| Task Property | Classification | +|---------------|---------------| +| Scanning tasks (pattern matching, issue detection) | `csv-wave` | +| Diagnosis tasks (root cause analysis) | `csv-wave` | +| Design tasks (solution design, user interaction) | `interactive` | +| Implementation tasks (code fixes) | `csv-wave` | +| Testing tasks (validation, iteration) | `interactive` | +| Exploration tasks (framework patterns, component inventory) | `interactive` | + +--- + +## CSV Schema + +### tasks.csv (Master State) + +```csv +id,title,description,deps,context_from,exec_mode,role,component,wave,status,findings,issues_found,issues_fixed,error +EXPLORE-001,Framework Exploration,Explore React component patterns and conventions,,,"interactive",explorer,,1,pending,"","","","" +SCAN-001,Scan for UX issues,Scan components for unresponsive buttons and missing feedback,EXPLORE-001,EXPLORE-001,"csv-wave",scanner,Button,2,pending,"","","","" +DIAG-001,Diagnose root causes,Analyze root causes of identified UX issues,SCAN-001,SCAN-001,"csv-wave",diagnoser,Button,3,pending,"","","","" +DESIGN-001,Design solutions,Design fix approach for UX issues,DIAG-001,DIAG-001,"interactive",designer,Button,4,pending,"","","","" +IMPL-001,Implement fixes,Apply fixes to Button component,DESIGN-001,DESIGN-001,"csv-wave",implementer,Button,5,pending,"","","","" +TEST-001,Test fixes,Validate fixes and run tests,IMPL-001,IMPL-001,"interactive",tester,Button,6,pending,"","","","" +``` + +**Columns**: + +| Column | Phase | Description | +|--------|-------|-------------| +| `id` | Input | Unique task identifier (string) | +| `title` | Input | Short task title | +| `description` | Input | Detailed task description | +| `deps` | Input | Semicolon-separated dependency task IDs | +| `context_from` | Input | Semicolon-separated task IDs whose findings this task needs | +| `exec_mode` | Input | `csv-wave` or `interactive` | +| `role` | Input | Role name: explorer, scanner, diagnoser, designer, implementer, tester | +| `component` | Input | Component name being processed (empty for exploration) | +| `wave` | Computed | Wave number (computed by topological sort, 1-based) | +| `status` | Output | `pending` → `completed` / `failed` / `skipped` | +| `findings` | Output | Key discoveries or implementation notes (max 500 chars) | +| `issues_found` | Output | Number of issues found (scanner/diagnoser only) | +| `issues_fixed` | Output | Number of issues fixed (implementer only) | +| `error` | Output | Error message if failed (empty if success) | + +### Per-Wave CSV (Temporary) + +Each wave generates a temporary `wave-{N}.csv` with extra `prev_context` column (csv-wave tasks only). + +--- + +## Agent Registry (Interactive Agents) + +| Agent | Role File | Pattern | Responsibility | Position | +|-------|-----------|---------|----------------|----------| +| explorer | ~/.codex/agents/ux-explorer.md | 2.3 | Explore codebase for UI component patterns | pre-wave (Phase 0) | +| designer | ~/.codex/agents/ux-designer.md | 2.4 | Design fix approach for UX issues | pre-wave (per component) | +| tester | ~/.codex/agents/ux-tester.md | 2.4 | Validate fixes and run tests | post-wave (per component) | + +> **COMPACT PROTECTION**: Agent files are execution documents. When context compression occurs, **you MUST immediately `Read` the corresponding agent.md** to reload. + +--- + +## Output Artifacts + +| File | Purpose | Lifecycle | +|------|---------|-----------| +| `tasks.csv` | Master state — all tasks with status/findings | Updated after each wave | +| `wave-{N}.csv` | Per-wave input (temporary, csv-wave tasks only) | Created before wave, deleted after | +| `results.csv` | Final export of all task results | Created in Phase 3 | +| `discoveries.ndjson` | Shared exploration board (all agents, both modes) | Append-only, carries across waves | +| `context.md` | Human-readable execution report | Created in Phase 3 | +| `interactive/{id}-result.json` | Results from interactive tasks | Created per interactive task | +| `agents/registry.json` | Active interactive agent tracking | Updated on spawn/close | +| `artifacts/scan-report.md` | Scanner findings | Created by scanner | +| `artifacts/diagnosis.md` | Diagnoser analysis | Created by diagnoser | +| `artifacts/design-guide.md` | Designer solutions | Created by designer | +| `artifacts/fixes/` | Implementation files | Created by implementer | +| `artifacts/test-report.md` | Tester validation | Created by tester | + +--- + +## Session Structure + +``` +.workflow/.csv-wave/{session-id}/ +├── tasks.csv # Master state (all tasks, both modes) +├── results.csv # Final results export +├── discoveries.ndjson # Shared discovery board (all agents) +├── context.md # Human-readable report +├── wave-{N}.csv # Temporary per-wave input (csv-wave only) +├── interactive/ # Interactive task artifacts +│ ├── {id}-result.json # Per-task results +│ └── cache-index.json # Shared exploration cache +├── agents/ +│ └── registry.json # Active interactive agent tracking +└── artifacts/ # Role deliverables + ├── scan-report.md + ├── diagnosis.md + ├── design-guide.md + ├── fixes/ + └── test-report.md +``` + +--- + +## Implementation + +### Session Initialization + +```javascript +// Parse arguments +const args = parseArguments($ARGUMENTS) +const autoYes = args.yes || args.y +const concurrency = args.concurrency || args.c || 4 +const continueMode = args.continue +const projectPath = args._[0] +const framework = args.framework + +// Validate project path +if (!projectPath) { + throw new Error("Project path required") +} + +// Generate session ID +const timestamp = Math.floor(Date.now() / 1000) +const sessionId = `ux-improve-${timestamp}` +const sessionDir = `.workflow/.csv-wave/${sessionId}` + +// Create session structure +Bash(`mkdir -p "${sessionDir}/interactive" "${sessionDir}/agents" "${sessionDir}/artifacts/fixes"`) + +// Initialize registry +Write(`${sessionDir}/agents/registry.json`, JSON.stringify({ + active: [], + closed: [], + created_at: new Date().toISOString() +}, null, 2)) + +// Initialize discoveries +Write(`${sessionDir}/discoveries.ndjson`, '') + +// Store session config +Write(`${sessionDir}/config.json`, JSON.stringify({ + project_path: projectPath, + framework: framework || "auto-detect", + max_test_iterations: 5 +}, null, 2)) +``` + +--- + +### Phase 0: Framework Detection & Exploration (Interactive) + +**Objective**: Detect UI framework and explore component patterns. + +```javascript +// Spawn explorer +const explorer = spawn_agent({ + message: `### MANDATORY FIRST STEPS +1. Read: ~/.codex/agents/ux-explorer.md + +--- + +## Task Assignment + +**Goal**: Explore codebase for UI component patterns and framework conventions + +**Project Path**: ${projectPath} + +**Framework**: ${framework || "auto-detect"} + +**Session Directory**: ${sessionDir} + +**Deliverables**: +- Framework detection (if auto-detect) +- Component inventory with file paths +- Pattern analysis (state management, event handling, styling) +- Exploration cache for downstream tasks + +**Instructions**: +1. Detect framework if not specified (check package.json, file extensions) +2. Scan for UI components (*.tsx, *.vue, etc.) +3. Analyze component patterns and conventions +4. Build component inventory +5. Cache findings in explorations/cache-index.json +6. Output result as JSON with framework and component list` +}) + +// Wait for completion +const result = wait({ ids: [explorer], timeout_ms: 600000 }) + +if (result.timed_out) { + send_input({ id: explorer, message: "Please finalize exploration and output current findings." }) + const retry = wait({ ids: [explorer], timeout_ms: 120000 }) +} + +// Store result +const explorerOutput = JSON.parse(result.output) +Write(`${sessionDir}/interactive/EXPLORE-001-result.json`, JSON.stringify({ + task_id: "EXPLORE-001", + status: "completed", + findings: explorerOutput.summary, + framework: explorerOutput.framework, + component_count: explorerOutput.components.length, + timestamp: new Date().toISOString() +}, null, 2)) + +close_agent({ id: explorer }) + +// Update config with detected framework +const config = JSON.parse(Read(`${sessionDir}/config.json`)) +config.framework = explorerOutput.framework +Write(`${sessionDir}/config.json`, JSON.stringify(config, null, 2)) +``` + +**Success Criteria**: +- Framework detected or confirmed +- Component inventory created +- Exploration cache available for downstream tasks + +--- + +### Phase 1: Requirement → CSV + Classification + +**Objective**: Generate task breakdown for UX improvement pipeline per component. + +```javascript +// Load exploration results +const explorationResult = JSON.parse(Read(`${sessionDir}/interactive/EXPLORE-001-result.json`)) +const components = explorationResult.components || [] + +// Generate tasks for each component +const allTasks = [] +let taskCounter = 1 + +// Add exploration task (already completed) +allTasks.push({ + id: "EXPLORE-001", + title: "Framework Exploration", + description: "Explore component patterns and conventions", + deps: "", + context_from: "", + exec_mode: "interactive", + role: "explorer", + component: "", + wave: 1, + status: "completed", + findings: explorationResult.findings, + issues_found: "", + issues_fixed: "", + error: "" +}) + +// For each component, create pipeline: scan -> diagnose -> design -> implement -> test +for (const component of components) { + const compName = component.name + const compPath = component.path + + // Scan task (csv-wave) + const scanId = `SCAN-${String(taskCounter).padStart(3, '0')}` + allTasks.push({ + id: scanId, + title: `Scan ${compName}`, + description: `Scan ${compName} component for UX issues: unresponsive buttons, missing feedback, state refresh problems\n\nFile: ${compPath}`, + deps: "EXPLORE-001", + context_from: "EXPLORE-001", + exec_mode: "csv-wave", + role: "scanner", + component: compName, + wave: 0, // Computed later + status: "pending", + findings: "", + issues_found: "", + issues_fixed: "", + error: "" + }) + + // Diagnose task (csv-wave) + const diagId = `DIAG-${String(taskCounter).padStart(3, '0')}` + allTasks.push({ + id: diagId, + title: `Diagnose ${compName}`, + description: `Analyze root causes of UX issues in ${compName}\n\nFile: ${compPath}`, + deps: scanId, + context_from: scanId, + exec_mode: "csv-wave", + role: "diagnoser", + component: compName, + wave: 0, + status: "pending", + findings: "", + issues_found: "", + issues_fixed: "", + error: "" + }) + + // Design task (interactive) + const designId = `DESIGN-${String(taskCounter).padStart(3, '0')}` + allTasks.push({ + id: designId, + title: `Design fixes for ${compName}`, + description: `Design fix approach for UX issues in ${compName}\n\nFile: ${compPath}`, + deps: diagId, + context_from: diagId, + exec_mode: "interactive", + role: "designer", + component: compName, + wave: 0, + status: "pending", + findings: "", + issues_found: "", + issues_fixed: "", + error: "" + }) + + // Implement task (csv-wave) + const implId = `IMPL-${String(taskCounter).padStart(3, '0')}` + allTasks.push({ + id: implId, + title: `Implement fixes for ${compName}`, + description: `Apply fixes to ${compName} component\n\nFile: ${compPath}`, + deps: designId, + context_from: designId, + exec_mode: "csv-wave", + role: "implementer", + component: compName, + wave: 0, + status: "pending", + findings: "", + issues_found: "", + issues_fixed: "", + error: "" + }) + + // Test task (interactive) + const testId = `TEST-${String(taskCounter).padStart(3, '0')}` + allTasks.push({ + id: testId, + title: `Test fixes for ${compName}`, + description: `Validate fixes and run tests for ${compName}\n\nFile: ${compPath}`, + deps: implId, + context_from: implId, + exec_mode: "interactive", + role: "tester", + component: compName, + wave: 0, + status: "pending", + findings: "", + issues_found: "", + issues_fixed: "", + error: "" + }) + + taskCounter++ +} + +// Compute waves via topological sort +const tasksWithWaves = computeWaves(allTasks) + +// Write master CSV +writeMasterCSV(`${sessionDir}/tasks.csv`, tasksWithWaves) + +// User validation (skip if autoYes) +if (!autoYes) { + const approval = AskUserQuestion({ + questions: [{ + question: `Generated ${tasksWithWaves.length} tasks for ${components.length} components. Proceed?`, + header: "Task Breakdown Validation", + multiSelect: false, + options: [ + { label: "Proceed", description: "Start UX improvement pipeline" }, + { label: "Cancel", description: "Abort workflow" } + ] + }] + }) + + if (approval.answers[0] !== "Proceed") { + throw new Error("User cancelled workflow") + } +} +``` + +**Success Criteria**: +- tasks.csv created with valid schema, wave, and exec_mode assignments +- No circular dependencies +- User approved (or AUTO_YES) + +--- + +### Phase 2: Wave Execution Engine (Extended) + +**Objective**: Execute tasks wave-by-wave with hybrid mechanism support. + +(Implementation follows same pattern as team-roadmap-dev Phase 2, adapted for UX improvement roles) + +--- + +### Phase 3: Results Aggregation + +**Objective**: Generate final results and human-readable report. + +```javascript +// Load final master CSV +const finalCSV = readMasterCSV(`${sessionDir}/tasks.csv`) + +// Calculate metrics +const completed = finalCSV.filter(t => t.status === 'completed').length +const failed = finalCSV.filter(t => t.status === 'failed').length +const skipped = finalCSV.filter(t => t.status === 'skipped').length +const totalIssuesFound = finalCSV.reduce((sum, t) => sum + (parseInt(t.issues_found) || 0), 0) +const totalIssuesFixed = finalCSV.reduce((sum, t) => sum + (parseInt(t.issues_fixed) || 0), 0) + +// Export results.csv +writeFinalResults(`${sessionDir}/results.csv`, finalCSV) + +// Generate context.md +const contextMd = generateUXContextReport(finalCSV, sessionDir, { + totalIssuesFound, + totalIssuesFixed +}) +Write(`${sessionDir}/context.md`, contextMd) + +// Cleanup active agents +const registry = JSON.parse(Read(`${sessionDir}/agents/registry.json`)) +for (const agent of registry.active) { + close_agent({ id: agent.id }) +} +registry.active = [] +Write(`${sessionDir}/agents/registry.json`, JSON.stringify(registry, null, 2)) + +// Display summary +console.log(`\n=== UX Improvement Pipeline Complete ===`) +console.log(`Completed: ${completed}`) +console.log(`Failed: ${failed}`) +console.log(`Skipped: ${skipped}`) +console.log(`Issues Found: ${totalIssuesFound}`) +console.log(`Issues Fixed: ${totalIssuesFixed}`) +console.log(`Fix Rate: ${totalIssuesFound > 0 ? Math.round(totalIssuesFixed / totalIssuesFound * 100) : 0}%`) +console.log(`\nResults: ${sessionDir}/results.csv`) +console.log(`Report: ${sessionDir}/context.md`) + +// Offer next steps +const nextStep = AskUserQuestion({ + questions: [{ + question: "UX Improvement pipeline complete. What would you like to do?", + header: "Completion", + multiSelect: false, + options: [ + { label: "Archive & Clean", description: "Archive session and clean up team resources" }, + { label: "Keep Active", description: "Keep session for follow-up work" }, + { label: "Export Results", description: "Export deliverables to specified location" } + ] + }] +}) + +if (nextStep.answers[0] === "Archive & Clean") { + Bash(`tar -czf "${sessionDir}.tar.gz" "${sessionDir}" && rm -rf "${sessionDir}"`) + console.log(`Session archived to ${sessionDir}.tar.gz`) +} +``` + +**Success Criteria**: +- results.csv exported with UX metrics +- context.md generated with issue summary +- All interactive agents closed +- Summary displayed to user + +--- + +## Shared Discovery Board Protocol + +All agents share `discoveries.ndjson` for UX findings. + +**Discovery Types**: + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `ux_issue` | `component+type` | `{component, type, description, severity}` | UX issues discovered | +| `pattern` | `pattern` | `{pattern, files[], description}` | UI patterns identified | +| `fix_approach` | `component+issue` | `{component, issue, approach, rationale}` | Fix strategies | +| `test_result` | `component+test` | `{component, test, status, details}` | Test outcomes | + +--- + +## Error Handling + +| Error | Resolution | +|-------|------------| +| Framework detection fails | AskUserQuestion for framework selection | +| No components found | Complete with empty report, note in findings | +| Circular dependency | Detect in wave computation, abort with error | +| CSV agent timeout | Mark as failed, continue with wave | +| Interactive agent timeout | Urge convergence via send_input | +| Test iterations exceeded (5) | Accept current state, continue | +| All agents in wave failed | Log error, offer retry or abort | +| Project path invalid | Re-prompt user for valid path | + +--- + +## Core Rules + +1. **Start Immediately**: First action is session initialization, then Phase 0 +2. **Wave Order is Sacred**: Never execute wave N before wave N-1 completes +3. **CSV is Source of Truth**: Master tasks.csv holds all state +4. **CSV First**: Default to csv-wave; use interactive for design/testing +5. **Context Propagation**: prev_context built from master CSV +6. **Discovery Board is Append-Only**: Never clear discoveries.ndjson +7. **Skip on Failure**: If dependency failed, skip dependent task +8. **Lifecycle Balance**: Every spawn_agent has matching close_agent +9. **Cleanup Temp Files**: Remove wave-{N}.csv after merge +10. **DO NOT STOP**: Continuous execution until all waves complete diff --git a/.codex/skills/team-ux-improve/agents/ux-designer.md b/.codex/skills/team-ux-improve/agents/ux-designer.md new file mode 100644 index 00000000..4425c3a7 --- /dev/null +++ b/.codex/skills/team-ux-improve/agents/ux-designer.md @@ -0,0 +1,136 @@ +# UX Designer Agent + +Interactive agent for designing fix approaches for identified UX issues. Proposes solutions and may interact with user for clarification. + +## Identity + +- **Type**: `interactive` +- **Role File**: `~/.codex/agents/ux-designer.md` +- **Responsibility**: Solution design for UX issues + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Produce structured output following template +- Design fix approaches for all identified issues +- Consider framework patterns and conventions +- Generate design guide for implementer + +### MUST NOT + +- Skip the MANDATORY FIRST STEPS role loading +- Execute implementation directly +- Skip issue analysis step + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | File I/O | Load diagnosis, exploration cache | +| `Write` | File I/O | Generate design guide | +| `AskUserQuestion` | Human interaction | Clarify design decisions if needed | + +--- + +## Execution + +### Phase 1: Issue Analysis + +**Objective**: Analyze diagnosed issues and understand context. + +**Steps**: + +1. Read diagnosis findings from prev_context +2. Load exploration cache for framework patterns +3. Read discoveries.ndjson for related findings +4. Categorize issues by type and severity + +**Output**: Issue analysis summary + +--- + +### Phase 2: Solution Design + +**Objective**: Design fix approaches for each issue. + +**Steps**: + +1. For each issue: + - Identify root cause from diagnosis + - Propose fix approach following framework patterns + - Consider side effects and edge cases + - Define validation criteria +2. Prioritize fixes by severity +3. Document rationale for each approach + +**Output**: Fix approaches per issue + +--- + +### Phase 3: Design Guide Generation + +**Objective**: Generate design guide for implementer. + +**Steps**: + +1. Format design guide: + ```markdown + # Design Guide: {Component} + + ## Issues to Fix + + ### Issue 1: {description} + - **Severity**: {high/medium/low} + - **Root Cause**: {cause} + - **Fix Approach**: {approach} + - **Rationale**: {why this approach} + - **Validation**: {how to verify} + + ## Implementation Notes + - Follow {framework} patterns + - Test cases needed: {list} + ``` +2. Write design guide to artifacts/design-guide.md +3. Share fix approaches via discoveries.ndjson + +**Output**: Design guide file + +--- + +## Structured Output Template + +``` +## Summary +- Designed fixes for {N} issues in {component} + +## Findings +- Issue 1: {description} → Fix: {approach} +- Issue 2: {description} → Fix: {approach} + +## Deliverables +- File: artifacts/design-guide.md + Content: Fix approaches with rationale and validation criteria + +## Output JSON +{ + "design_guide_path": "artifacts/design-guide.md", + "issues_addressed": {N}, + "summary": "Designed fixes for {N} issues" +} +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| No issues found | Generate empty design guide, note in findings | +| Ambiguous fix approach | Ask user for guidance via AskUserQuestion | +| Conflicting patterns | Document trade-offs, recommend approach | diff --git a/.codex/skills/team-ux-improve/agents/ux-explorer.md b/.codex/skills/team-ux-improve/agents/ux-explorer.md new file mode 100644 index 00000000..43a5b716 --- /dev/null +++ b/.codex/skills/team-ux-improve/agents/ux-explorer.md @@ -0,0 +1,158 @@ +# UX Explorer Agent + +Interactive agent for exploring codebase to identify UI component patterns and framework conventions. + +## Identity + +- **Type**: `interactive` +- **Role File**: `~/.codex/agents/ux-explorer.md` +- **Responsibility**: Framework detection and component inventory + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Produce structured output following template +- Detect UI framework (React/Vue/etc.) +- Build component inventory with file paths +- Cache findings for downstream tasks + +### MUST NOT + +- Skip the MANDATORY FIRST STEPS role loading +- Execute implementation or fix tasks +- Skip framework detection step + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Read` | File I/O | Load package.json, component files | +| `Write` | File I/O | Generate exploration cache | +| `Glob` | File search | Find component files | +| `Bash` | CLI execution | Run framework detection commands | + +--- + +## Execution + +### Phase 1: Framework Detection + +**Objective**: Detect UI framework if not specified. + +**Steps**: + +1. If framework specified in arguments, use it +2. Otherwise, detect from package.json: + - Check dependencies for react, vue, angular, svelte + - Check file extensions (*.tsx → React, *.vue → Vue) +3. Validate framework detection + +**Output**: Framework name (react/vue/angular/svelte) + +--- + +### Phase 2: Component Inventory + +**Objective**: Build inventory of UI components. + +**Steps**: + +1. Search for component files based on framework: + - React: `**/*.tsx`, `**/*.jsx` + - Vue: `**/*.vue` + - Angular: `**/*.component.ts` +2. For each component: + - Extract component name + - Record file path + - Identify component type (button, form, modal, etc.) +3. Build component list + +**Output**: Component inventory with paths + +--- + +### Phase 3: Pattern Analysis + +**Objective**: Analyze component patterns and conventions. + +**Steps**: + +1. Sample components to identify patterns: + - State management (useState, Vuex, etc.) + - Event handling patterns + - Styling approach (CSS modules, styled-components, etc.) +2. Document conventions +3. Identify common anti-patterns + +**Output**: Pattern analysis summary + +--- + +### Phase 4: Cache Generation + +**Objective**: Generate exploration cache for downstream tasks. + +**Steps**: + +1. Create cache structure: + ```json + { + "framework": "react", + "components": [ + {"name": "Button", "path": "src/components/Button.tsx", "type": "button"}, + {"name": "Form", "path": "src/components/Form.tsx", "type": "form"} + ], + "patterns": { + "state_management": "React hooks", + "event_handling": "inline handlers", + "styling": "CSS modules" + }, + "conventions": ["PascalCase component names", "Props interface per component"] + } + ``` +2. Write cache to explorations/cache-index.json + +**Output**: Exploration cache file + +--- + +## Structured Output Template + +``` +## Summary +- Detected framework: {framework} +- Found {N} components + +## Findings +- Component inventory: {N} components identified +- Patterns: {state management}, {event handling}, {styling} +- Conventions: {list} + +## Deliverables +- File: explorations/cache-index.json + Content: Component inventory and pattern analysis + +## Output JSON +{ + "framework": "{framework}", + "components": [{component list}], + "component_count": {N}, + "summary": "Explored {N} components in {framework} project" +} +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| Framework detection fails | Ask user via AskUserQuestion | +| No components found | Return empty inventory, note in findings | +| Invalid project path | Report error, request valid path | diff --git a/.codex/skills/team-ux-improve/agents/ux-tester.md b/.codex/skills/team-ux-improve/agents/ux-tester.md new file mode 100644 index 00000000..4f5b6726 --- /dev/null +++ b/.codex/skills/team-ux-improve/agents/ux-tester.md @@ -0,0 +1,174 @@ +# UX Tester Agent + +Interactive agent for validating fixes and running tests. Iterates up to 5 times if tests fail. + +## Identity + +- **Type**: `interactive` +- **Role File**: `~/.codex/agents/ux-tester.md` +- **Responsibility**: Fix validation and testing + +## Boundaries + +### MUST + +- Load role definition via MANDATORY FIRST STEPS pattern +- Produce structured output following template +- Run tests and validate fixes +- Iterate up to 5 times on test failures +- Generate test report + +### MUST NOT + +- Skip the MANDATORY FIRST STEPS role loading +- Exceed 5 test iterations +- Skip test execution step + +--- + +## Toolbox + +### Available Tools + +| Tool | Type | Purpose | +|------|------|---------| +| `Bash` | CLI execution | Run tests, linters, build | +| `Read` | File I/O | Load implementation findings, design guide | +| `Write` | File I/O | Generate test report | + +--- + +## Execution + +### Phase 1: Test Preparation + +**Objective**: Identify tests to run and prepare test environment. + +**Steps**: + +1. Read implementation findings from prev_context +2. Load design guide for validation criteria +3. Identify test files related to component +4. Check test framework (Jest, Vitest, etc.) + +**Output**: Test plan + +--- + +### Phase 2: Test Execution + +**Objective**: Run tests and validate fixes (max 5 iterations). + +**Steps**: + +1. Run component tests: + ```bash + npm test -- {component}.test + ``` +2. Run linter: + ```bash + npm run lint + ``` +3. Check build: + ```bash + npm run build + ``` +4. Collect results +5. If tests fail and iteration < 5: + - Analyze failures + - Apply quick fixes if possible + - Re-run tests +6. If iteration >= 5: + - Accept current state + - Document remaining issues + +**Output**: Test results with pass/fail status + +--- + +### Phase 3: Validation + +**Objective**: Validate fixes against design guide criteria. + +**Steps**: + +1. For each validation criterion in design guide: + - Check if met by implementation + - Check if validated by tests + - Document status +2. Calculate fix success rate +3. Identify remaining issues + +**Output**: Validation summary + +--- + +### Phase 4: Test Report Generation + +**Objective**: Generate test report with results. + +**Steps**: + +1. Format test report: + ```markdown + # Test Report: {Component} + + ## Test Results + - Tests passed: {X}/{Y} + - Build status: {success/failed} + - Linter warnings: {Z} + + ## Validation Status + - Issue 1: {fixed/partial/unfixed} + - Issue 2: {fixed/partial/unfixed} + + ## Remaining Issues + - {list if any} + + ## Recommendation + {approve/needs_work} + ``` +2. Write test report to artifacts/test-report.md +3. Share test results via discoveries.ndjson + +**Output**: Test report file + +--- + +## Structured Output Template + +``` +## Summary +- Testing complete for {component}: {X}/{Y} tests passed + +## Findings +- Tests passed: {X}/{Y} +- Build status: {success/failed} +- Issues fixed: {N} +- Remaining issues: {M} + +## Deliverables +- File: artifacts/test-report.md + Content: Test results and validation status + +## Output JSON +{ + "test_report_path": "artifacts/test-report.md", + "tests_passed": {X}, + "tests_total": {Y}, + "issues_fixed": {N}, + "recommendation": "approve" | "needs_work", + "summary": "Testing complete: {X}/{Y} tests passed" +} +``` + +--- + +## Error Handling + +| Scenario | Resolution | +|----------|------------| +| Tests fail to run | Document as issue, continue validation | +| Build fails | Mark as critical issue, recommend fix | +| Test iterations exceed 5 | Accept current state, document remaining issues | +| No test files found | Note in findings, perform manual validation | diff --git a/.codex/skills/team-ux-improve/instructions/ux-worker-instruction.md b/.codex/skills/team-ux-improve/instructions/ux-worker-instruction.md new file mode 100644 index 00000000..d577c964 --- /dev/null +++ b/.codex/skills/team-ux-improve/instructions/ux-worker-instruction.md @@ -0,0 +1,55 @@ +## TASK ASSIGNMENT + +### MANDATORY FIRST STEPS +1. Read shared discoveries: {session_folder}/discoveries.ndjson (if exists, skip if not) +2. Read project context: .workflow/project-tech.json (if exists) +3. Read exploration cache: {session_folder}/explorations/cache-index.json (if exists) + +--- + +## Your Task + +**Task ID**: {id} +**Title**: {title} +**Description**: {description} +**Role**: {role} +**Component**: {component} + +### Previous Tasks' Findings (Context) +{prev_context} + +--- + +## Execution Protocol + +1. **Read discoveries**: Load {session_folder}/discoveries.ndjson for shared UX findings +2. **Use context**: Apply previous tasks' findings from prev_context above +3. **Execute**: Perform role-specific task + - **Scanner**: Scan component for UX issues (unresponsive buttons, missing feedback, state refresh) + - **Diagnoser**: Analyze root causes of identified issues + - **Implementer**: Apply fixes following design guide +4. **Share discoveries**: Append findings to shared board: + ```bash + echo '{"ts":"","worker":"{id}","type":"","data":{...}}' >> {session_folder}/discoveries.ndjson + ``` +5. **Report result**: Return JSON via report_agent_job_result + +### Discovery Types to Share +- `ux_issue`: `{component, type, description, severity}` — UX issues discovered +- `pattern`: `{pattern, files[], description}` — UI patterns identified +- `fix_approach`: `{component, issue, approach, rationale}` — Fix strategies +- `test_result`: `{component, test, status, details}` — Test outcomes + +--- + +## Output (report_agent_job_result) + +Return JSON: +{ + "id": "{id}", + "status": "completed" | "failed", + "findings": "Key discoveries (max 500 chars)", + "issues_found": "3", + "issues_fixed": "3", + "error": "" +} diff --git a/.codex/skills/team-ux-improve/schemas/tasks-schema.md b/.codex/skills/team-ux-improve/schemas/tasks-schema.md new file mode 100644 index 00000000..5bc0cc49 --- /dev/null +++ b/.codex/skills/team-ux-improve/schemas/tasks-schema.md @@ -0,0 +1,87 @@ +# UX Improvement — CSV Schema + +## Master CSV: tasks.csv + +### Column Definitions + +#### Input Columns (Set by Decomposer) + +| Column | Type | Required | Description | Example | +|--------|------|----------|-------------|---------| +| `id` | string | Yes | Unique task identifier | `"SCAN-001"` | +| `title` | string | Yes | Short task title | `"Scan Button component"` | +| `description` | string | Yes | Detailed task description (self-contained) | `"Scan Button component for UX issues..."` | +| `deps` | string | No | Semicolon-separated dependency task IDs | `"EXPLORE-001"` | +| `context_from` | string | No | Semicolon-separated task IDs for context | `"EXPLORE-001"` | +| `exec_mode` | enum | Yes | Execution mechanism: `csv-wave` or `interactive` | `"csv-wave"` | +| `role` | enum | Yes | Role name: `explorer`, `scanner`, `diagnoser`, `designer`, `implementer`, `tester` | `"scanner"` | +| `component` | string | No | Component name being processed | `"Button"` | + +#### Computed Columns (Set by Wave Engine) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `wave` | integer | Wave number (1-based, from topological sort) | `2` | +| `prev_context` | string | Aggregated findings from context_from tasks (per-wave CSV only) | `"[EXPLORE-001] Found 15 components..."` | + +#### Output Columns (Set by Agent) + +| Column | Type | Description | Example | +|--------|------|-------------|---------| +| `status` | enum | `pending` → `completed` / `failed` / `skipped` | `"completed"` | +| `findings` | string | Key discoveries (max 500 chars) | `"Found 3 UX issues: unresponsive onClick..."` | +| `issues_found` | string | Number of issues found (scanner/diagnoser) | `"3"` | +| `issues_fixed` | string | Number of issues fixed (implementer) | `"3"` | +| `error` | string | Error message if failed | `""` | + +--- + +### exec_mode Values + +| Value | Mechanism | Description | +|-------|-----------|-------------| +| `csv-wave` | `spawn_agents_on_csv` | One-shot batch execution within wave | +| `interactive` | `spawn_agent`/`wait`/`send_input`/`close_agent` | Multi-round individual execution | + +--- + +### Example Data + +```csv +id,title,description,deps,context_from,exec_mode,role,component,wave,status,findings,issues_found,issues_fixed,error +EXPLORE-001,Framework Exploration,Explore React component patterns,,,"interactive",explorer,,1,completed,"Found 15 components using React hooks","","","" +SCAN-001,Scan Button,Scan Button for UX issues,EXPLORE-001,EXPLORE-001,"csv-wave",scanner,Button,2,pending,"","","","" +DIAG-001,Diagnose Button,Analyze root causes in Button,SCAN-001,SCAN-001,"csv-wave",diagnoser,Button,3,pending,"","","","" +DESIGN-001,Design Button fixes,Design fix approach for Button,DIAG-001,DIAG-001,"interactive",designer,Button,4,pending,"","","","" +``` + +--- + +## Discovery Types + +| Type | Dedup Key | Data Schema | Description | +|------|-----------|-------------|-------------| +| `ux_issue` | `component+type` | `{component, type, description, severity}` | UX issues discovered | +| `pattern` | `pattern` | `{pattern, files[], description}` | UI patterns identified | +| `fix_approach` | `component+issue` | `{component, issue, approach, rationale}` | Fix strategies | +| `test_result` | `component+test` | `{component, test, status, details}` | Test outcomes | + +### Discovery NDJSON Format + +```jsonl +{"ts":"2026-03-08T14:30:22Z","worker":"SCAN-001","type":"ux_issue","data":{"component":"Button","type":"unresponsive_click","description":"onClick handler not firing","severity":"high"}} +{"ts":"2026-03-08T14:35:10Z","worker":"DIAG-001","type":"pattern","data":{"pattern":"event delegation","files":["Button.tsx"],"description":"Using event delegation pattern"}} +``` + +--- + +## Validation Rules + +| Rule | Check | Error | +|------|-------|-------| +| Unique IDs | No duplicate `id` values | "Duplicate task ID: {id}" | +| Valid deps | All dep IDs exist in tasks | "Unknown dependency: {dep_id}" | +| No self-deps | Task cannot depend on itself | "Self-dependency: {id}" | +| No circular deps | Topological sort completes | "Circular dependency detected" | +| exec_mode valid | Value is `csv-wave` or `interactive` | "Invalid exec_mode: {value}" | +| Role valid | role ∈ {explorer, scanner, diagnoser, designer, implementer, tester} | "Invalid role: {role}" | diff --git a/.codex/skills/workflow-tdd-plan/SKILL.md b/.codex/skills/workflow-tdd-plan/SKILL.md new file mode 100644 index 00000000..783f5281 --- /dev/null +++ b/.codex/skills/workflow-tdd-plan/SKILL.md @@ -0,0 +1,759 @@ +--- +name: workflow-tdd-plan +description: | + TDD planning pipeline with multi-mode routing (plan/verify). Session discovery → + context gathering (spawn_agent) → test coverage analysis (spawn_agent) → conditional + conflict resolution → TDD task generation (spawn_agent) → structure validation → + interactive verification. Produces IMPL_PLAN.md with Red-Green-Refactor cycles, + task JSONs, TODO_LIST.md. +argument-hint: "[-y|--yes] [--session ID] \"task description\" | verify [--session ID]" +allowed-tools: spawn_agent, wait, send_input, close_agent, AskUserQuestion, Read, Write, Edit, Bash, Glob, Grep +--- + +## Auto Mode + +When `--yes` or `-y`: Skip all confirmations, use defaults, auto-verify, auto-continue to execute if PROCEED. + +# Workflow TDD Plan + +## Usage + +```bash +# Plan mode (default) +$workflow-tdd-plan "Build authentication system with JWT and OAuth" +$workflow-tdd-plan -y "Add rate limiting to API endpoints" +$workflow-tdd-plan --session WFS-auth "Extend with 2FA support" + +# Verify mode +$workflow-tdd-plan verify --session WFS-auth +$workflow-tdd-plan verify +``` + +**Flags**: +- `-y, --yes`: Skip all confirmations (auto mode) +- `--session ID`: Use specific session + +--- + +## Overview + +Multi-mode TDD planning pipeline using subagent coordination. Plan mode runs 6 sequential phases with conditional branching; verify mode operates on existing plans with TDD compliance validation. + +**Core Principle**: NO PRODUCTION CODE WITHOUT A FAILING TEST FIRST + +``` +┌──────────────────────────────────────────────────────────────────┐ +│ WORKFLOW TDD PLAN PIPELINE │ +├──────────────────────────────────────────────────────────────────┤ +│ │ +│ Mode Detection: plan | verify │ +│ │ +│ ═══ Plan Mode (default) ═══ │ +│ │ +│ Phase 1: Session Discovery │ +│ ├─ Create or find workflow session │ +│ └─ Initialize planning-notes.md with TDD context │ +│ │ +│ Phase 2: Context Gathering (spawn_agent: context-search-agent) │ +│ ├─ Codebase analysis → context-package.json │ +│ └─ Conflict risk assessment │ +│ │ +│ Phase 3: Test Coverage Analysis (spawn_agent: cli-explore-agent)│ +│ ├─ Detect test framework and conventions │ +│ ├─ Analyze existing test coverage │ +│ └─ Output: test-context-package.json │ +│ │ +│ Phase 4: Conflict Resolution (conditional: risk ≥ medium) │ +│ ├─ CLI-driven conflict analysis │ +│ └─ User-selected resolution strategies │ +│ │ +│ Phase 5: TDD Task Generation (spawn_agent: action-planning-agent)│ +│ ├─ Generate tasks with Red-Green-Refactor cycles │ +│ └─ Output: IMPL_PLAN.md + task JSONs + TODO_LIST.md │ +│ │ +│ Phase 6: TDD Structure Validation │ +│ ├─ Validate Red-Green-Refactor structure │ +│ └─ Present Plan Confirmation Gate │ +│ │ +│ Plan Confirmation Gate │ +│ ├─ "Verify TDD Compliance" → Phase 7 │ +│ ├─ "Start Execution" → workflow-execute │ +│ └─ "Review Status" → Display inline │ +│ │ +│ ═══ Verify Mode ═══ │ +│ Phase 7: TDD Verification (spawn_agent: cli-explore-agent) │ +│ └─ 4-dimension TDD compliance → TDD_COMPLIANCE_REPORT.md │ +│ │ +└──────────────────────────────────────────────────────────────────┘ +``` + +--- + +## Data Flow + +``` +User Input (task description) + │ + ↓ [Convert to TDD Structured Format] + │ TDD: [Feature Name] + │ GOAL: [objective] + │ SCOPE: [boundaries] + │ CONTEXT: [background] + │ TEST_FOCUS: [test scenarios] + │ +Phase 1 ──→ sessionId, planning-notes.md + │ +Phase 2 ──→ context-package.json, conflictRisk + │ +Phase 3 ──→ test-context-package.json + │ + ├── conflictRisk ≥ medium ──→ Phase 4 ──→ conflict-resolution.json + └── conflictRisk < medium ──→ skip Phase 4 + │ +Phase 5 ──→ IMPL_PLAN.md (with Red-Green-Refactor), task JSONs, TODO_LIST.md + │ +Phase 6 ──→ TDD structure validation + │ + ├── Verify → Phase 7 → TDD_COMPLIANCE_REPORT.md + ├── Execute → workflow-execute skill + └── Review → inline display +``` + +--- + +## Session Structure + +``` +.workflow/active/WFS-{session}/ +├── workflow-session.json # Session metadata +├── planning-notes.md # Accumulated context across phases +├── IMPL_PLAN.md # Implementation plan with TDD cycles +├── plan.json # Structured plan overview +├── TODO_LIST.md # Task checklist +├── .task/ # Task definitions with TDD phases +│ ├── IMPL-1.json # Each task has Red-Green-Refactor steps +│ └── IMPL-N.json +└── .process/ + ├── context-package.json # Phase 2 output + ├── test-context-package.json # Phase 3 output + ├── conflict-resolution.json # Phase 4 output (conditional) + └── TDD_COMPLIANCE_REPORT.md # Phase 7 output +``` + +--- + +## Implementation + +### Session Initialization + +```javascript +const getUtc8ISOString = () => new Date(Date.now() + 8 * 60 * 60 * 1000).toISOString() + +// Parse flags +const AUTO_YES = $ARGUMENTS.includes('--yes') || $ARGUMENTS.includes('-y') +const sessionMatch = $ARGUMENTS.match(/--session\s+(\S+)/) +const existingSessionId = sessionMatch ? sessionMatch[1] : null + +// Mode detection +const cleanArgs = $ARGUMENTS + .replace(/--yes|-y|--session\s+\S+/g, '').trim() + +let mode = 'plan' +if (cleanArgs.startsWith('verify')) mode = 'verify' + +const taskDescription = cleanArgs + .replace(/^verify\s*/, '') + .replace(/^["']|["']$/g, '') + .trim() + +// Convert to TDD structured format +function toTddStructured(desc) { + const featureName = desc.split(/\s+/).slice(0, 3).join(' ') + return `TDD: ${featureName} +GOAL: ${desc} +SCOPE: Core implementation +CONTEXT: New development +TEST_FOCUS: Unit tests, integration tests, edge cases` +} + +const structuredDesc = toTddStructured(taskDescription) +``` + +--- + +### Phase 1: Session Discovery (Plan Mode) + +**Objective**: Create or find workflow session, initialize planning notes with TDD context. + +```javascript +if (mode !== 'plan') { + // verify: locate existing session + // → Jump to Phase 7 +} + +let sessionId, sessionFolder + +if (existingSessionId) { + sessionId = existingSessionId + sessionFolder = `.workflow/active/${sessionId}` + if (!Bash(`test -d "${sessionFolder}" && echo yes`).trim()) { + console.log(`ERROR: Session ${sessionId} not found`) + return + } +} else { + // Auto-detect from .workflow/active/ or create new + const sessions = Bash(`ls -d .workflow/active/WFS-* 2>/dev/null`).trim().split('\n').filter(Boolean) + + if (sessions.length === 0 || taskDescription) { + // Create new session + const slug = taskDescription.toLowerCase() + .replace(/[^a-z0-9\u4e00-\u9fa5]+/g, '-').substring(0, 40) + sessionId = `WFS-${slug}` + sessionFolder = `.workflow/active/${sessionId}` + Bash(`mkdir -p "${sessionFolder}/.task" "${sessionFolder}/.process"`) + + Write(`${sessionFolder}/workflow-session.json`, JSON.stringify({ + session_id: sessionId, + status: 'planning', + workflow_type: 'tdd', + created_at: getUtc8ISOString(), + task_description: taskDescription + }, null, 2)) + } else if (sessions.length === 1) { + sessionId = sessions[0].split('/').pop() + sessionFolder = sessions[0] + } else { + // Multiple sessions — ask user + if (AUTO_YES) { + sessionFolder = sessions[0] + sessionId = sessions[0].split('/').pop() + } else { + const answer = AskUserQuestion({ + questions: [{ + question: "Multiple sessions found. Select one:", + header: "Session", + multiSelect: false, + options: sessions.slice(0, 4).map(s => ({ + label: s.split('/').pop(), + description: s + })) + }] + }) + sessionId = answer.Session + sessionFolder = `.workflow/active/${sessionId}` + } + } +} + +// Initialize planning-notes.md with TDD context +Write(`${sessionFolder}/planning-notes.md`, `# TDD Planning Notes + +## User Intent +${structuredDesc} + +## TDD Principles +- NO PRODUCTION CODE WITHOUT A FAILING TEST FIRST +- Red-Green-Refactor cycle for all tasks +- Test-first forces edge case discovery before implementation +`) + +console.log(`Session: ${sessionId}`) +``` + +--- + +### Phase 2: Context Gathering (spawn_agent) + +**Objective**: Gather project context, assess conflict risk. + +```javascript +console.log(`\n## Phase 2: Context Gathering\n`) + +const ctxAgent = spawn_agent({ + agent: `~/.codex/agents/context-search-agent.md`, + instruction: ` +Gather implementation context for TDD planning. + +**Session**: ${sessionFolder} +**Task**: ${taskDescription} +**Mode**: TDD_PLAN + +### Steps +1. Analyze project structure (package.json, tsconfig, etc.) +2. Search for existing similar implementations +3. Identify integration points and dependencies +4. Assess conflict risk with existing code +5. Generate context package + +### Output +Write context package to: ${sessionFolder}/.process/context-package.json +Format: { + "critical_files": [...], + "patterns": [...], + "dependencies": [...], + "integration_points": [...], + "conflict_risk": "none" | "low" | "medium" | "high", + "conflict_areas": [...], + "constraints": [...] +} +` +}) + +wait({ id: ctxAgent }) +close_agent({ id: ctxAgent }) + +// Parse outputs +const contextPkg = JSON.parse(Read(`${sessionFolder}/.process/context-package.json`) || '{}') +const conflictRisk = contextPkg.conflict_risk || 'none' + +// Update planning-notes.md +Edit(`${sessionFolder}/planning-notes.md`, { + oldText: '## User Intent', + newText: `## Context Findings +- Critical files: ${(contextPkg.critical_files || []).join(', ')} +- Conflict risk: ${conflictRisk} +- Constraints: ${(contextPkg.constraints || []).join('; ')} + +## User Intent` +}) + +console.log(` Context gathered. Conflict risk: ${conflictRisk}`) +``` + +--- + +### Phase 3: Test Coverage Analysis (spawn_agent) + +**Objective**: Analyze existing test patterns and coverage. + +```javascript +console.log(`\n## Phase 3: Test Coverage Analysis\n`) + +const testAgent = spawn_agent({ + agent: `~/.codex/agents/cli-explore-agent.md`, + instruction: ` +Analyze test coverage and framework for TDD planning. + +**Session**: ${sessionFolder} +**Context**: ${sessionFolder}/.process/context-package.json + +### Steps +1. Detect test framework (Jest, Vitest, Mocha, etc.) +2. Identify test file patterns and conventions +3. Analyze existing test coverage +4. Identify coverage gaps +5. Extract test utilities and helpers + +### Output +Write test context to: ${sessionFolder}/.process/test-context-package.json +Format: { + "test_framework": "jest" | "vitest" | "mocha" | "other", + "test_patterns": { + "unit": "**/*.test.ts", + "integration": "**/*.integration.test.ts" + }, + "coverage_summary": { + "lines": 75.5, + "branches": 68.2, + "functions": 80.1 + }, + "coverage_gaps": [...], + "test_utilities": [...], + "conventions": { + "naming": "describe/it", + "mocking": "jest.mock", + "assertions": "expect" + } +} +` +}) + +wait({ id: testAgent }) +close_agent({ id: testAgent }) + +const testContext = JSON.parse(Read(`${sessionFolder}/.process/test-context-package.json`) || '{}') + +// Update planning-notes +Edit(`${sessionFolder}/planning-notes.md`, { + oldText: '## TDD Principles', + newText: `## Test Context +- Framework: ${testContext.test_framework || 'unknown'} +- Coverage: ${testContext.coverage_summary?.lines || 'N/A'}% lines +- Gaps: ${(testContext.coverage_gaps || []).join(', ')} + +## TDD Principles` +}) + +console.log(` Test framework: ${testContext.test_framework}`) +``` + +--- + +### Phase 4: Conflict Resolution (Conditional) + +**Objective**: Detect and resolve conflicts when risk ≥ medium. + +```javascript +if (['medium', 'high'].includes(conflictRisk)) { + console.log(`\n## Phase 4: Conflict Resolution (risk: ${conflictRisk})\n`) + + Bash({ + command: `ccw cli -p "PURPOSE: Analyze and resolve conflicts between planned changes and existing codebase. +TASK: + • Read context package for conflict areas + • Analyze each conflict area in detail + • Propose resolution strategies (refactor, adapt, isolate, defer) + • For each conflict: assess impact and recommend approach +MODE: analysis +CONTEXT: @**/* +EXPECTED: JSON: {conflicts: [{area, severity, description, strategy, impact}], summary: string} +CONSTRAINTS: Focus on ${(contextPkg.conflict_areas || []).join(', ')} + +TASK DESCRIPTION: ${taskDescription}" --tool gemini --mode analysis --rule analysis-diagnose-bug-root-cause`, + run_in_background: true + }) + // Wait for CLI → conflicts[] + + if (!AUTO_YES && conflicts.length > 0) { + // Present conflicts and let user select strategies + console.log(`\n### Conflicts Found: ${conflicts.length}\n`) + conflicts.forEach((c, i) => { + console.log(`${i + 1}. [${c.severity}] ${c.area}: ${c.description}`) + console.log(` Strategy: ${c.strategy} | Impact: ${c.impact}`) + }) + + const answer = AskUserQuestion({ + questions: [{ + question: "Accept conflict resolution strategies?", + header: "Conflicts", + multiSelect: false, + options: [ + { label: "Accept All", description: "Apply all recommended strategies" }, + { label: "Review Each", description: "Approve strategies individually" }, + { label: "Skip", description: "Proceed without resolving" } + ] + }] + }) + } + + // Write resolution + Write(`${sessionFolder}/.process/conflict-resolution.json`, + JSON.stringify({ conflicts, resolved_at: getUtc8ISOString() }, null, 2)) +} else { + console.log(` Conflict risk: ${conflictRisk} — skipping Phase 4`) +} +``` + +--- + +### Phase 5: TDD Task Generation (spawn_agent) + +**Objective**: Generate IMPL_PLAN.md with Red-Green-Refactor cycles, task JSONs, TODO_LIST.md. + +```javascript +console.log(`\n## Phase 5: TDD Task Generation\n`) + +const planAgent = spawn_agent({ + agent: `~/.codex/agents/action-planning-agent.md`, + instruction: ` +Generate TDD implementation plan with Red-Green-Refactor cycles. + +**Session**: ${sessionFolder} +**Task**: ${taskDescription} +**Context**: ${sessionFolder}/.process/context-package.json +**Test Context**: ${sessionFolder}/.process/test-context-package.json +**Planning Notes**: ${sessionFolder}/planning-notes.md +${conflictRisk === 'medium' || conflictRisk === 'high' + ? `**Conflict Resolution**: ${sessionFolder}/.process/conflict-resolution.json` : ''} + +### TDD Requirements +Each task MUST include Red-Green-Refactor cycle: +1. **Red Phase**: Write failing test first + - Define test cases + - Verify test fails (proves test is valid) + - Document expected failure +2. **Green Phase**: Implement minimal code to pass + - Write simplest implementation + - Run tests until passing + - Max 3 test-fix iterations (auto-revert if exceeded) +3. **Refactor Phase**: Improve code quality + - Refactor with tests as safety net + - Maintain passing tests + - Document improvements + +### Output Requirements +1. **IMPL_PLAN.md** at ${sessionFolder}/IMPL_PLAN.md + - Section 1: Requirements Summary + - Section 2: Test Strategy (framework, patterns, coverage goals) + - Section 3: Task Breakdown with TDD cycles + - Section 4: Implementation Strategy + - Section 5: Risk Assessment +2. **plan.json** at ${sessionFolder}/plan.json + - {task_ids[], recommended_execution, complexity, tdd_compliance: true} +3. **Task JSONs** at ${sessionFolder}/.task/IMPL-{N}.json + - Each task has "implementation" array with 3 steps: + [ + {step: 1, tdd_phase: "red", description: "Write failing test", ...}, + {step: 2, tdd_phase: "green", description: "Implement code", test_fix_cycle: {max_iterations: 3, auto_revert: true}}, + {step: 3, tdd_phase: "refactor", description: "Refactor code", ...} + ] +4. **TODO_LIST.md** at ${sessionFolder}/TODO_LIST.md + - Checkbox format: - [ ] IMPL-{N}: {title} (TDD) +` +}) + +wait({ id: planAgent }) +close_agent({ id: planAgent }) + +console.log(` TDD tasks generated`) +``` + +--- + +### Phase 6: TDD Structure Validation + +**Objective**: Validate Red-Green-Refactor structure in all tasks. + +```javascript +console.log(`\n## Phase 6: TDD Structure Validation\n`) + +// Read all task JSONs +const taskFiles = Bash(`ls ${sessionFolder}/.task/IMPL-*.json 2>/dev/null`).trim().split('\n').filter(Boolean) +const tasks = taskFiles.map(f => JSON.parse(Read(f))) + +// Validate TDD structure +const validationErrors = [] +for (const task of tasks) { + const impl = task.implementation || [] + + // Check 3-step structure + if (impl.length !== 3) { + validationErrors.push(`${task.id}: Expected 3 steps, found ${impl.length}`) + continue + } + + // Check Red phase + if (impl[0].tdd_phase !== 'red') { + validationErrors.push(`${task.id}: Step 1 must be Red phase`) + } + + // Check Green phase with test-fix-cycle + if (impl[1].tdd_phase !== 'green') { + validationErrors.push(`${task.id}: Step 2 must be Green phase`) + } + if (!impl[1].test_fix_cycle || !impl[1].test_fix_cycle.max_iterations) { + validationErrors.push(`${task.id}: Green phase missing test-fix-cycle config`) + } + + // Check Refactor phase + if (impl[2].tdd_phase !== 'refactor') { + validationErrors.push(`${task.id}: Step 3 must be Refactor phase`) + } +} + +if (validationErrors.length > 0) { + console.log(`\n### TDD Structure Validation Errors:\n`) + validationErrors.forEach(e => console.log(` - ${e}`)) + + if (!AUTO_YES) { + const answer = AskUserQuestion({ + questions: [{ + question: "TDD structure validation failed. Continue anyway?", + header: "Validation", + multiSelect: false, + options: [ + { label: "Fix and Retry", description: "Regenerate tasks with correct structure" }, + { label: "Continue", description: "Proceed despite errors" }, + { label: "Abort", description: "Stop planning" } + ] + }] + }) + + if (answer.Validation === "Fix and Retry") { + // Re-run Phase 5 + // → goto Phase 5 + } else if (answer.Validation === "Abort") { + return + } + } +} else { + console.log(` ✓ All tasks have valid Red-Green-Refactor structure`) +} + +// Plan Confirmation Gate +const taskCount = tasks.length +console.log(`\n## Plan Generated\n`) +console.log(` Tasks: ${taskCount}`) +console.log(` Plan: ${sessionFolder}/IMPL_PLAN.md`) + +if (AUTO_YES) { + console.log(` [--yes] Auto-verifying TDD compliance...`) + // → Fall through to Phase 7 +} else { + const nextStep = AskUserQuestion({ + questions: [{ + question: "TDD plan generated. What's next?", + header: "Next Step", + multiSelect: false, + options: [ + { label: "Verify TDD Compliance (Recommended)", description: "Run full TDD compliance verification" }, + { label: "Start Execution", description: "Proceed to workflow-execute" }, + { label: "Review Status", description: "Display plan summary inline" } + ] + }] + }) + + if (nextStep['Next Step'] === 'Start Execution') { + console.log(`\nReady to execute. Run: $workflow-execute --session ${sessionId}`) + return + } + if (nextStep['Next Step'] === 'Review Status') { + const plan = Read(`${sessionFolder}/IMPL_PLAN.md`) + console.log(plan) + return + } + // Verify → continue to Phase 7 +} +``` + +--- + +### Phase 7: TDD Verification (Verify Mode) + +**Objective**: Full TDD compliance verification with quality gate. + +```javascript +if (mode === 'verify' || /* auto-verify from Phase 6 */) { + console.log(`\n## Phase 7: TDD Verification\n`) + + // Find session if in verify mode entry + if (mode === 'verify' && !sessionFolder) { + // Session discovery (same logic as Phase 1) + } + + const verifyAgent = spawn_agent({ + agent: `~/.codex/agents/cli-explore-agent.md`, + instruction: ` +Verify TDD compliance across 4 dimensions. + +**Session**: ${sessionFolder} + +### Verification Dimensions + +**A. Test-First Structure** +- Every task has Red-Green-Refactor cycle +- Red phase defines failing tests +- Green phase implements code +- Refactor phase improves quality + +**B. Test Coverage** +- All critical paths have tests +- Edge cases covered +- Integration points tested +- Coverage meets project standards + +**C. Cycle Integrity** +- Red phase: test fails before implementation +- Green phase: minimal code to pass +- Refactor phase: maintains passing tests +- No production code without failing test first + +**D. Quality Gates** +- Test-fix-cycle configured (max 3 iterations) +- Auto-revert on iteration limit +- Clear acceptance criteria +- Testable convergence conditions + +### Output +Write report to: ${sessionFolder}/.process/TDD_COMPLIANCE_REPORT.md + +Format: +# TDD Compliance Report + +## Summary +- Quality Gate: APPROVED | CONDITIONAL | BLOCKED +- Tasks Analyzed: N +- Compliance Score: X% + +## Dimension Scores +- A. Test-First Structure: PASS/WARN/FAIL +- B. Test Coverage: PASS/WARN/FAIL +- C. Cycle Integrity: PASS/WARN/FAIL +- D. Quality Gates: PASS/WARN/FAIL + +## Issues Found +[List specific issues with task IDs] + +## Recommendations +[Actionable recommendations] + +## Quality Gate Decision +APPROVED: All dimensions PASS, ready for execution +CONDITIONAL: Minor warnings, can proceed with caution +BLOCKED: Critical failures, must fix before execution +` + }) + + wait({ id: verifyAgent }) + close_agent({ id: verifyAgent }) + + const report = Read(`${sessionFolder}/.process/TDD_COMPLIANCE_REPORT.md`) + const qualityGate = report.match(/Quality Gate: (\w+)/)?.[1] || 'UNKNOWN' + + console.log(` Quality gate: ${qualityGate}`) + console.log(` Report: ${sessionFolder}/.process/TDD_COMPLIANCE_REPORT.md`) + + if (AUTO_YES && qualityGate === 'APPROVED') { + console.log(` [--yes] TDD compliance verified. Ready for execution.`) + console.log(` Run: $workflow-execute --session ${sessionId}`) + } +} +``` + +--- + +## Error Handling + +| Error | Recovery | +|-------|----------| +| No active session | Guide: run `$workflow-tdd-plan "description"` first | +| Context gathering timeout | Retry with reduced scope | +| Test framework detection failed | Manual specification in test-context-package.json | +| TDD structure validation failed | Regenerate tasks or fix manually | +| Conflict analysis failure | Skip Phase 4, proceed with warning | +| Task generation failure | Retry agent with simplified context | +| Verification failure | Display partial results | +| Session not found (verify) | List available sessions | + +--- + +## TDD Compliance Requirements + +### The Iron Law + +``` +NO PRODUCTION CODE WITHOUT A FAILING TEST FIRST +``` + +### Red-Green-Refactor Cycle + +Every task must have: +1. **Red**: Write failing test (proves test is valid) +2. **Green**: Implement minimal code to pass (with test-fix-cycle) +3. **Refactor**: Improve code quality (maintain passing tests) + +### Quality Gates + +- Test-fix-cycle: max 3 iterations +- Auto-revert: triggered when max iterations reached +- Coverage goals: defined per project +- Acceptance criteria: testable and measurable + +--- + +## Compact Protection + +> **COMPACT DIRECTIVE**: If context compression occurs: +> - Re-read `planning-notes.md` for accumulated context +> - Re-read `workflow-session.json` for session state +> - Phase 5 is the most token-intensive — if compressed, re-read task JSONs on demand diff --git a/ccw/frontend/src/App.tsx b/ccw/frontend/src/App.tsx index e928b5dc..f0fd7f26 100644 --- a/ccw/frontend/src/App.tsx +++ b/ccw/frontend/src/App.tsx @@ -11,13 +11,19 @@ import { Toaster } from 'sonner'; import { router } from './router'; import queryClient from './lib/query-client'; import type { Locale } from './lib/i18n'; -import { useWorkflowStore } from '@/stores/workflowStore'; +import { fetchCliSessions, initializeCsrfToken } from './lib/api'; +import { useWorkflowStore, selectProjectPath } from '@/stores/workflowStore'; import { useCliStreamStore } from '@/stores/cliStreamStore'; +import { useCliSessionStore } from '@/stores/cliSessionStore'; import { useExecutionMonitorStore } from '@/stores/executionMonitorStore'; +import { useSessionManagerStore } from '@/stores/sessionManagerStore'; +import { useIssueQueueIntegrationStore } from '@/stores/issueQueueIntegrationStore'; +import { useQueueExecutionStore } from '@/stores/queueExecutionStore'; +import { useQueueSchedulerStore } from '@/stores/queueSchedulerStore'; import { useTerminalPanelStore } from '@/stores/terminalPanelStore'; +import { useTerminalGridStore } from '@/stores/terminalGridStore'; import { useActiveCliExecutions, ACTIVE_CLI_EXECUTIONS_QUERY_KEY } from '@/hooks/useActiveCliExecutions'; import { DialogStyleProvider } from '@/contexts/DialogStyleContext'; -import { initializeCsrfToken } from './lib/api'; interface AppProps { locale: Locale; @@ -39,6 +45,7 @@ function App({ locale, messages }: AppProps) { + @@ -59,8 +66,21 @@ function QueryInvalidator() { // Register callback to invalidate all workspace-related queries on workspace switch const callback = () => { useCliStreamStore.getState().resetState(); + useCliSessionStore.getState().resetState(); useExecutionMonitorStore.getState().resetState(); + useSessionManagerStore.getState().resetState(); + useIssueQueueIntegrationStore.getState().resetState(); + useQueueExecutionStore.getState().resetState(); + const queueSchedulerStore = useQueueSchedulerStore.getState(); + queueSchedulerStore.resetState(); + const nextProjectPath = useWorkflowStore.getState().projectPath; + if (nextProjectPath) { + void queueSchedulerStore.loadInitialState().catch((error) => { + console.error('[QueueSchedulerSync] Failed to sync scheduler state:', error); + }); + } useTerminalPanelStore.getState().resetState(); + useTerminalGridStore.getState().resetWorkspaceState(); queryClient.invalidateQueries({ queryKey: ACTIVE_CLI_EXECUTIONS_QUERY_KEY }); queryClient.invalidateQueries({ predicate: (query) => { @@ -86,6 +106,41 @@ function QueryInvalidator() { * CLI Execution Sync component * Syncs active CLI executions in the background to keep the count updated in Header */ +function CliSessionSync() { + const projectPath = useWorkflowStore(selectProjectPath); + const setSessions = useCliSessionStore((state) => state.setSessions); + + useEffect(() => { + let cancelled = false; + + if (!projectPath) { + setSessions([]); + return () => { + cancelled = true; + }; + } + + fetchCliSessions(projectPath) + .then(({ sessions }) => { + if (!cancelled) { + setSessions(sessions); + } + }) + .catch((error) => { + console.error('[CliSessionSync] Failed to sync CLI sessions:', error); + if (!cancelled) { + setSessions([]); + } + }); + + return () => { + cancelled = true; + }; + }, [projectPath, setSessions]); + + return null; +} + function CliExecutionSync() { // Always sync active CLI executions with a longer polling interval // This ensures the activeCliCount badge in Header shows correct count on initial load diff --git a/ccw/frontend/src/components/terminal-dashboard/AssociationHighlight.test.tsx b/ccw/frontend/src/components/terminal-dashboard/AssociationHighlight.test.tsx new file mode 100644 index 00000000..ee61b8e0 --- /dev/null +++ b/ccw/frontend/src/components/terminal-dashboard/AssociationHighlight.test.tsx @@ -0,0 +1,48 @@ +// ======================================== +// Association Highlight Tests +// ======================================== + +import { useEffect } from 'react'; +import { render, screen } from '@testing-library/react'; +import { describe, expect, it } from 'vitest'; +import { AssociationHighlightProvider, useAssociationHighlight } from './AssociationHighlight'; + +function Probe({ chain, scopeKey }: { chain: { issueId: string | null; queueItemId: string | null; sessionId: string | null } | null; scopeKey: string }) { + return ( + + + + ); +} + +function ProbeInner({ chain }: { chain: { issueId: string | null; queueItemId: string | null; sessionId: string | null } | null }) { + const { chain: activeChain, setChain } = useAssociationHighlight(); + + useEffect(() => { + setChain(chain); + }, [chain, setChain]); + + return
{activeChain?.issueId ?? 'none'}
; +} + +describe('AssociationHighlightProvider', () => { + it('clears highlighted chain when scopeKey changes', () => { + const { rerender } = render( + + ); + + expect(screen.getByTestId('chain').textContent).toBe('ISSUE-1'); + + rerender( + + ); + + expect(screen.getByTestId('chain').textContent).toBe('none'); + }); +}); diff --git a/ccw/frontend/src/components/terminal-dashboard/AssociationHighlight.tsx b/ccw/frontend/src/components/terminal-dashboard/AssociationHighlight.tsx index 070bc784..cba41914 100644 --- a/ccw/frontend/src/components/terminal-dashboard/AssociationHighlight.tsx +++ b/ccw/frontend/src/components/terminal-dashboard/AssociationHighlight.tsx @@ -15,7 +15,9 @@ import { useContext, useState, useCallback, + useEffect, useMemo, + useRef, type ReactNode, } from 'react'; import type { AssociationChain } from '@/types/terminal-dashboard'; @@ -37,8 +39,22 @@ const AssociationHighlightContext = createContext(null); + const lastScopeKeyRef = useRef(scopeKey); + + useEffect(() => { + if (lastScopeKeyRef.current !== scopeKey) { + lastScopeKeyRef.current = scopeKey; + setChainState(null); + } + }, [scopeKey]); const setChain = useCallback((nextChain: AssociationChain | null) => { setChainState(nextChain); diff --git a/ccw/frontend/src/components/terminal-dashboard/DashboardToolbar.test.tsx b/ccw/frontend/src/components/terminal-dashboard/DashboardToolbar.test.tsx new file mode 100644 index 00000000..63889390 --- /dev/null +++ b/ccw/frontend/src/components/terminal-dashboard/DashboardToolbar.test.tsx @@ -0,0 +1,127 @@ +// ======================================== +// DashboardToolbar Tests +// ======================================== + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { renderWithI18n, screen, fireEvent } from '@/test/i18n'; +import { DashboardToolbar } from './DashboardToolbar'; + +const mockState = vi.hoisted(() => ({ + currentProjectPath: 'D:/workspace-a', + resetLayout: vi.fn(), + createSessionAndAssign: vi.fn(), + updateTerminalMeta: vi.fn(), + toastError: vi.fn(), +})); + +vi.mock('@/hooks/useIssues', () => ({ + useIssues: () => ({ openCount: 0 }), + useIssueQueue: () => ({ data: { grouped_items: {} } }), +})); + +vi.mock('@/stores/workflowStore', () => ({ + useWorkflowStore: (selector: (state: { projectPath: string | null }) => unknown) => + selector({ projectPath: mockState.currentProjectPath }), + selectProjectPath: (state: { projectPath: string | null }) => state.projectPath, +})); + +vi.mock('@/stores/issueQueueIntegrationStore', () => ({ + useIssueQueueIntegrationStore: (selector: (state: { associationChain: null }) => unknown) => + selector({ associationChain: null }), + selectAssociationChain: (state: { associationChain: null }) => state.associationChain, +})); + +vi.mock('@/stores/terminalGridStore', () => ({ + useTerminalGridStore: (selector: (state: { + resetLayout: typeof mockState.resetLayout; + focusedPaneId: string; + createSessionAndAssign: typeof mockState.createSessionAndAssign; + }) => unknown) => + selector({ + resetLayout: mockState.resetLayout, + focusedPaneId: 'pane-1', + createSessionAndAssign: mockState.createSessionAndAssign, + }), + selectTerminalGridFocusedPaneId: (state: { focusedPaneId: string }) => state.focusedPaneId, +})); + +vi.mock('@/stores/executionMonitorStore', () => ({ + useExecutionMonitorStore: (selector: (state: { count: number }) => unknown) => selector({ count: 0 }), + selectActiveExecutionCount: (state: { count: number }) => state.count, +})); + +vi.mock('@/stores/sessionManagerStore', () => ({ + useSessionManagerStore: (selector: (state: { updateTerminalMeta: typeof mockState.updateTerminalMeta }) => unknown) => + selector({ updateTerminalMeta: mockState.updateTerminalMeta }), +})); + +vi.mock('@/stores/configStore', () => ({ + useConfigStore: (selector: (state: { featureFlags: Record }) => unknown) => + selector({ + featureFlags: { + dashboardQueuePanelEnabled: true, + dashboardInspectorEnabled: true, + dashboardExecutionMonitorEnabled: true, + }, + }), +})); + +vi.mock('@/stores/queueSchedulerStore', () => ({ + useQueueSchedulerStore: (selector: (state: { status: string }) => unknown) => selector({ status: 'idle' }), + selectQueueSchedulerStatus: (state: { status: string }) => state.status, +})); + +vi.mock('@/stores/notificationStore', () => ({ + toast: { + error: mockState.toastError, + }, +})); + +vi.mock('./CliConfigModal', () => ({ + CliConfigModal: ({ isOpen }: { isOpen: boolean }) => + isOpen ?
open
: null, +})); + +describe('DashboardToolbar', () => { + beforeEach(() => { + mockState.currentProjectPath = 'D:/workspace-a'; + mockState.resetLayout.mockReset(); + mockState.createSessionAndAssign.mockReset(); + mockState.updateTerminalMeta.mockReset(); + mockState.toastError.mockReset(); + }); + + it('closes the CLI config modal when workspace changes', () => { + const view = renderWithI18n( + undefined} + isFileSidebarOpen + onToggleFileSidebar={() => undefined} + isSessionSidebarOpen + onToggleSessionSidebar={() => undefined} + isFullscreen={false} + onToggleFullscreen={() => undefined} + /> + ); + + fireEvent.click(screen.getByTitle('Click to configure and launch a CLI session')); + expect(screen.getByTestId('cli-config-modal')).toBeInTheDocument(); + + mockState.currentProjectPath = 'D:/workspace-b'; + view.rerender( + undefined} + isFileSidebarOpen + onToggleFileSidebar={() => undefined} + isSessionSidebarOpen + onToggleSessionSidebar={() => undefined} + isFullscreen={false} + onToggleFullscreen={() => undefined} + /> + ); + + expect(screen.queryByTestId('cli-config-modal')).not.toBeInTheDocument(); + }); +}); diff --git a/ccw/frontend/src/components/terminal-dashboard/DashboardToolbar.tsx b/ccw/frontend/src/components/terminal-dashboard/DashboardToolbar.tsx index 01bc805f..b19328b6 100644 --- a/ccw/frontend/src/components/terminal-dashboard/DashboardToolbar.tsx +++ b/ccw/frontend/src/components/terminal-dashboard/DashboardToolbar.tsx @@ -5,7 +5,7 @@ // Provides toggle buttons for floating panels (Issues/Queue/Inspector) // and layout preset controls. Sessions sidebar is always visible. -import { useCallback, useMemo, useState } from 'react'; +import { useCallback, useEffect, useMemo, useState } from 'react'; import { useIntl } from 'react-intl'; import { AlertCircle, @@ -124,6 +124,11 @@ export function DashboardToolbar({ activePanel, onTogglePanel, isFileSidebarOpen const [isCreating, setIsCreating] = useState(false); const [isConfigOpen, setIsConfigOpen] = useState(false); + useEffect(() => { + setIsCreating(false); + setIsConfigOpen(false); + }, [projectPath]); + // Helper to get or create a focused pane const getOrCreateFocusedPane = useCallback(() => { if (focusedPaneId) return focusedPaneId; diff --git a/ccw/frontend/src/components/terminal-dashboard/IssuePanel.tsx b/ccw/frontend/src/components/terminal-dashboard/IssuePanel.tsx index 25a8636b..d05a5409 100644 --- a/ccw/frontend/src/components/terminal-dashboard/IssuePanel.tsx +++ b/ccw/frontend/src/components/terminal-dashboard/IssuePanel.tsx @@ -258,6 +258,20 @@ export function IssuePanel() { }; }, []); + useEffect(() => { + if (sentTimerRef.current) clearTimeout(sentTimerRef.current); + if (queuedTimerRef.current) clearTimeout(queuedTimerRef.current); + setSelectedIds(new Set()); + setIsSending(false); + setJustSent(false); + setExecutionMethod('skill-team-issue'); + setIsSendConfigOpen(false); + setCustomPrompt(''); + setIsAddingToQueue(false); + setJustQueued(false); + setQueueMode('write'); + }, [projectPath]); + // Sort: open/in_progress first, then by priority (critical > high > medium > low) const sortedIssues = useMemo(() => { const priorityOrder: Record = { diff --git a/ccw/frontend/src/components/terminal-dashboard/QueuePanel.test.tsx b/ccw/frontend/src/components/terminal-dashboard/QueuePanel.test.tsx new file mode 100644 index 00000000..c46b9a70 --- /dev/null +++ b/ccw/frontend/src/components/terminal-dashboard/QueuePanel.test.tsx @@ -0,0 +1,75 @@ +// ======================================== +// QueuePanel Tests +// ======================================== + +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { renderWithI18n, screen, fireEvent } from '@/test/i18n'; +import { QueuePanel } from './QueuePanel'; + +const mockState = vi.hoisted(() => ({ + currentProjectPath: 'D:/workspace-a', + loadInitialState: vi.fn(), + buildAssociationChain: vi.fn(), +})); + +vi.mock('@/hooks/useIssues', () => ({ + useIssueQueue: () => ({ data: null, isLoading: false, error: null }), +})); + +vi.mock('@/stores/workflowStore', () => ({ + useWorkflowStore: (selector: (state: { projectPath: string | null }) => unknown) => + selector({ projectPath: mockState.currentProjectPath }), + selectProjectPath: (state: { projectPath: string | null }) => state.projectPath, +})); + +vi.mock('@/stores/issueQueueIntegrationStore', () => ({ + useIssueQueueIntegrationStore: (selector: (state: { + associationChain: null; + buildAssociationChain: typeof mockState.buildAssociationChain; + }) => unknown) => + selector({ associationChain: null, buildAssociationChain: mockState.buildAssociationChain }), + selectAssociationChain: (state: { associationChain: null }) => state.associationChain, +})); + +vi.mock('@/stores/queueExecutionStore', () => ({ + useQueueExecutionStore: () => [], + selectByQueueItem: () => () => [], +})); + +vi.mock('@/stores/queueSchedulerStore', () => ({ + useQueueSchedulerStore: (selector: (state: { + status: string; + items: never[]; + loadInitialState: typeof mockState.loadInitialState; + }) => unknown) => + selector({ status: 'idle', items: [], loadInitialState: mockState.loadInitialState }), + selectQueueSchedulerStatus: (state: { status: string }) => state.status, + selectQueueItems: (state: { items: never[] }) => state.items, +})); + +vi.mock('@/stores/orchestratorStore', () => ({ + useOrchestratorStore: (selector: (state: { activePlans: Record; activePlanCount: number }) => unknown) => + selector({ activePlans: {}, activePlanCount: 0 }), + selectActivePlans: (state: { activePlans: Record }) => state.activePlans, + selectActivePlanCount: (state: { activePlanCount: number }) => state.activePlanCount, +})); + +describe('QueuePanel', () => { + beforeEach(() => { + mockState.currentProjectPath = 'D:/workspace-a'; + mockState.loadInitialState.mockReset(); + mockState.buildAssociationChain.mockReset(); + }); + + it('resets the active tab back to queue when workspace changes', () => { + const view = renderWithI18n(); + + fireEvent.click(screen.getByRole('button', { name: /orchestrator/i })); + expect(screen.getByText('No active orchestrations')).toBeInTheDocument(); + + mockState.currentProjectPath = 'D:/workspace-b'; + view.rerender(); + + expect(screen.queryByText('No active orchestrations')).not.toBeInTheDocument(); + }); +}); diff --git a/ccw/frontend/src/components/terminal-dashboard/QueuePanel.tsx b/ccw/frontend/src/components/terminal-dashboard/QueuePanel.tsx index f13538d5..9f983886 100644 --- a/ccw/frontend/src/components/terminal-dashboard/QueuePanel.tsx +++ b/ccw/frontend/src/components/terminal-dashboard/QueuePanel.tsx @@ -53,6 +53,7 @@ import { selectActivePlanCount, type OrchestrationRunState, } from '@/stores/orchestratorStore'; +import { useWorkflowStore, selectProjectPath } from '@/stores/workflowStore'; import type { StepStatus, OrchestrationStatus } from '@/types/orchestrator'; import type { QueueItem as ApiQueueItem } from '@/lib/api'; import type { QueueItem as SchedulerQueueItem, QueueItemStatus as SchedulerQueueItemStatus } from '@/types/queue-frontend-types'; @@ -506,6 +507,7 @@ function OrchestratorTabContent() { export function QueuePanel({ embedded = false }: { embedded?: boolean }) { const { formatMessage } = useIntl(); const [activeTab, setActiveTab] = useState('queue'); + const projectPath = useWorkflowStore(selectProjectPath); const orchestratorCount = useOrchestratorStore(selectActivePlanCount); // Scheduler store data for active count @@ -536,6 +538,10 @@ export function QueuePanel({ embedded = false }: { embedded?: boolean }) { return count; }, [useSchedulerData, schedulerItems, queueQuery.data]); + useEffect(() => { + setActiveTab('queue'); + }, [projectPath]); + return (
{/* Tab bar */} diff --git a/ccw/frontend/src/components/terminal-dashboard/SessionGroupTree.test.tsx b/ccw/frontend/src/components/terminal-dashboard/SessionGroupTree.test.tsx new file mode 100644 index 00000000..0edca164 --- /dev/null +++ b/ccw/frontend/src/components/terminal-dashboard/SessionGroupTree.test.tsx @@ -0,0 +1,53 @@ +// ======================================== +// SessionGroupTree Tests +// ======================================== + +import { act } from 'react'; +import { beforeEach, describe, expect, it } from 'vitest'; +import { renderWithI18n, screen, fireEvent } from '@/test/i18n'; +import { SessionGroupTree } from './SessionGroupTree'; +import { useCliSessionStore } from '@/stores/cliSessionStore'; +import { useSessionManagerStore } from '@/stores/sessionManagerStore'; +import { useTerminalGridStore } from '@/stores/terminalGridStore'; +import { useWorkflowStore } from '@/stores/workflowStore'; + +describe('SessionGroupTree', () => { + beforeEach(() => { + useCliSessionStore.getState().resetState(); + useSessionManagerStore.getState().resetState(); + useTerminalGridStore.getState().resetLayout('single'); + + act(() => { + useWorkflowStore.setState({ projectPath: 'D:/workspace-a' }); + }); + + useCliSessionStore.getState().setSessions([ + { + sessionKey: 'session-1', + shellKind: 'bash', + workingDir: 'D:/workspace-a', + tool: 'codex', + createdAt: '2026-03-08T12:00:00.000Z', + updatedAt: '2026-03-08T12:00:00.000Z', + isPaused: false, + }, + ]); + useSessionManagerStore.getState().updateTerminalMeta('session-1', { + tag: 'workspace-a-tag', + status: 'active', + }); + }); + + it('collapses expanded tag groups when workspace changes', () => { + renderWithI18n(); + + fireEvent.click(screen.getByRole('button', { name: /workspace-a-tag/i })); + expect(screen.getByText('codex')).toBeInTheDocument(); + + act(() => { + useWorkflowStore.setState({ projectPath: 'D:/workspace-b' }); + }); + + expect(screen.queryByText('codex')).not.toBeInTheDocument(); + }); +}); diff --git a/ccw/frontend/src/components/terminal-dashboard/SessionGroupTree.tsx b/ccw/frontend/src/components/terminal-dashboard/SessionGroupTree.tsx index 1f7c65e0..4eede8a7 100644 --- a/ccw/frontend/src/components/terminal-dashboard/SessionGroupTree.tsx +++ b/ccw/frontend/src/components/terminal-dashboard/SessionGroupTree.tsx @@ -4,7 +4,7 @@ // Tree view for CLI sessions grouped by tag. // Sessions are automatically grouped by their tag (e.g., "gemini-143052"). -import { useState, useCallback, useMemo } from 'react'; +import { useState, useCallback, useEffect, useMemo } from 'react'; import { useIntl } from 'react-intl'; import { ChevronRight, @@ -15,6 +15,7 @@ import { cn } from '@/lib/utils'; import { useSessionManagerStore, selectSessionManagerActiveTerminalId, selectTerminalMetas } from '@/stores'; import { useCliSessionStore } from '@/stores/cliSessionStore'; import { useTerminalGridStore, selectTerminalGridPanes } from '@/stores/terminalGridStore'; +import { useWorkflowStore, selectProjectPath } from '@/stores/workflowStore'; import { Badge } from '@/components/ui/Badge'; import type { TerminalStatus } from '@/types/terminal-dashboard'; @@ -44,6 +45,11 @@ export function SessionGroupTree() { const setFocused = useTerminalGridStore((s) => s.setFocused); const [expandedTags, setExpandedTags] = useState>(new Set()); + const projectPath = useWorkflowStore(selectProjectPath); + + useEffect(() => { + setExpandedTags(new Set()); + }, [projectPath]); const toggleTag = useCallback((tag: string) => { setExpandedTags((prev) => { diff --git a/ccw/frontend/src/hooks/useWebSocket.test.tsx b/ccw/frontend/src/hooks/useWebSocket.test.tsx new file mode 100644 index 00000000..dccf1d95 --- /dev/null +++ b/ccw/frontend/src/hooks/useWebSocket.test.tsx @@ -0,0 +1,220 @@ +// ======================================== +// useWebSocket Hook Tests +// ======================================== + +import { act, renderHook } from '@testing-library/react'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { useWebSocket } from './useWebSocket'; +import { useCliSessionStore } from '@/stores/cliSessionStore'; +import { useExecutionMonitorStore } from '@/stores/executionMonitorStore'; +import { useSessionManagerStore } from '@/stores/sessionManagerStore'; +import { useWorkflowStore } from '@/stores/workflowStore'; + +class MockWebSocket { + static readonly OPEN = 1; + static instances: MockWebSocket[] = []; + + readonly url: string; + readyState = 0; + onopen: ((event: Event) => void) | null = null; + onmessage: ((event: MessageEvent) => void) | null = null; + onclose: ((event: CloseEvent) => void) | null = null; + onerror: ((event: Event) => void) | null = null; + send = vi.fn(); + close = vi.fn(() => { + this.readyState = 3; + this.onclose?.(new CloseEvent('close')); + }); + + constructor(url: string) { + this.url = url; + MockWebSocket.instances.push(this); + } + + open() { + this.readyState = MockWebSocket.OPEN; + this.onopen?.(new Event('open')); + } + + message(payload: unknown) { + this.onmessage?.({ data: JSON.stringify(payload) } as MessageEvent); + } +} + +function createSession(sessionKey: string, workingDir = 'D:/workspace-a') { + return { + sessionKey, + shellKind: 'pwsh', + workingDir, + createdAt: '2026-03-08T12:00:00.000Z', + updatedAt: '2026-03-08T12:00:00.000Z', + isPaused: false, + }; +} + +function connectHook() { + const hook = renderHook(() => useWebSocket({ enabled: true })); + const socket = MockWebSocket.instances[MockWebSocket.instances.length - 1]; + if (!socket) { + throw new Error('Expected WebSocket to be created'); + } + + act(() => { + socket.open(); + }); + + return { ...hook, socket }; +} + +describe('useWebSocket workspace scoping', () => { + beforeEach(() => { + vi.clearAllMocks(); + localStorage.clear(); + MockWebSocket.instances = []; + + useCliSessionStore.getState().resetState(); + useExecutionMonitorStore.getState().resetState(); + useSessionManagerStore.getState().resetState(); + useWorkflowStore.setState({ projectPath: 'D:\\workspace-a' }); + + vi.stubGlobal('WebSocket', MockWebSocket as unknown as typeof WebSocket); + }); + + afterEach(() => { + useCliSessionStore.getState().resetState(); + useExecutionMonitorStore.getState().resetState(); + useSessionManagerStore.getState().resetState(); + vi.unstubAllGlobals(); + }); + + it('ignores scoped CLI and execution messages from another workspace', () => { + const { socket } = connectHook(); + + act(() => { + socket.message({ + type: 'CLI_SESSION_CREATED', + payload: { + session: createSession('session-foreign', 'D:/workspace-b'), + timestamp: '2026-03-08T12:00:01.000Z', + projectPath: 'D:/workspace-b', + }, + }); + socket.message({ + type: 'CLI_SESSION_LOCKED', + payload: { + sessionKey: 'session-foreign', + reason: 'Foreign execution', + executionId: 'exec-foreign', + timestamp: '2026-03-08T12:00:02.000Z', + projectPath: 'D:/workspace-b', + }, + }); + socket.message({ + type: 'EXECUTION_STARTED', + payload: { + executionId: 'exec-foreign', + flowId: 'flow-foreign', + sessionKey: 'session-foreign', + stepName: 'Foreign flow', + totalSteps: 2, + timestamp: '2026-03-08T12:00:03.000Z', + projectPath: 'D:/workspace-b', + }, + }); + }); + + expect(useCliSessionStore.getState().sessions['session-foreign']).toBeUndefined(); + expect(useSessionManagerStore.getState().terminalMetas['session-foreign']).toBeUndefined(); + expect(useExecutionMonitorStore.getState().activeExecutions['exec-foreign']).toBeUndefined(); + }); + + it('handles matching scoped messages and legacy messages for known sessions', () => { + const { socket } = connectHook(); + + act(() => { + socket.message({ + type: 'CLI_SESSION_CREATED', + payload: { + session: createSession('session-local', 'D:/workspace-a/subdir'), + timestamp: '2026-03-08T12:00:01.000Z', + projectPath: 'd:/workspace-a', + }, + }); + socket.message({ + type: 'CLI_SESSION_OUTPUT', + payload: { + sessionKey: 'session-local', + data: 'hello from current workspace', + timestamp: '2026-03-08T12:00:02.000Z', + }, + }); + socket.message({ + type: 'CLI_SESSION_LOCKED', + payload: { + sessionKey: 'session-local', + reason: 'Current execution', + executionId: 'exec-local', + timestamp: '2026-03-08T12:00:03.000Z', + projectPath: 'D:/workspace-a', + }, + }); + socket.message({ + type: 'EXECUTION_STARTED', + payload: { + executionId: 'exec-local', + flowId: 'flow-local', + sessionKey: 'session-local', + stepName: 'Current flow', + totalSteps: 3, + timestamp: '2026-03-08T12:00:04.000Z', + }, + }); + }); + + const cliState = useCliSessionStore.getState(); + expect(cliState.sessions['session-local']?.workingDir).toBe('D:/workspace-a/subdir'); + expect(cliState.outputChunks['session-local']).toEqual([ + { + data: 'hello from current workspace', + timestamp: expect.any(Number), + }, + ]); + + const sessionManagerState = useSessionManagerStore.getState(); + expect(sessionManagerState.terminalMetas['session-local']?.isLocked).toBe(true); + expect(sessionManagerState.terminalMetas['session-local']?.lockedByExecutionId).toBe('exec-local'); + + const executionState = useExecutionMonitorStore.getState(); + expect(executionState.activeExecutions['exec-local']?.sessionKey).toBe('session-local'); + expect(executionState.currentExecutionId).toBe('exec-local'); + }); + + it('ignores legacy unscoped messages when session is unknown', () => { + const { socket } = connectHook(); + + act(() => { + socket.message({ + type: 'CLI_SESSION_OUTPUT', + payload: { + sessionKey: 'session-unknown', + data: 'should be ignored', + timestamp: '2026-03-08T12:00:02.000Z', + }, + }); + socket.message({ + type: 'EXECUTION_STARTED', + payload: { + executionId: 'exec-unknown', + flowId: 'flow-unknown', + sessionKey: 'session-unknown', + stepName: 'Unknown flow', + totalSteps: 1, + timestamp: '2026-03-08T12:00:03.000Z', + }, + }); + }); + + expect(useCliSessionStore.getState().outputChunks['session-unknown']).toBeUndefined(); + expect(useExecutionMonitorStore.getState().activeExecutions['exec-unknown']).toBeUndefined(); + }); +}); diff --git a/ccw/frontend/src/hooks/useWebSocket.ts b/ccw/frontend/src/hooks/useWebSocket.ts index 98cfb62c..bc1477c3 100644 --- a/ccw/frontend/src/hooks/useWebSocket.ts +++ b/ccw/frontend/src/hooks/useWebSocket.ts @@ -11,11 +11,13 @@ import { useCliSessionStore } from '@/stores/cliSessionStore'; import { handleSessionLockedMessage, handleSessionUnlockedMessage, + useSessionManagerStore, } from '@/stores/sessionManagerStore'; import { useExecutionMonitorStore, type ExecutionWSMessage, } from '@/stores/executionMonitorStore'; +import { useWorkflowStore, selectProjectPath } from '@/stores/workflowStore'; import { OrchestratorMessageSchema, type OrchestratorWebSocketMessage, @@ -28,6 +30,15 @@ import type { ToolCallKind, ToolCallExecution } from '../types/toolCall'; const RECONNECT_DELAY_BASE = 1000; // 1 second const RECONNECT_DELAY_MAX = 30000; // 30 seconds const RECONNECT_DELAY_MULTIPLIER = 1.5; +const WORKSPACE_SCOPED_CLI_MESSAGE_TYPES = new Set([ + 'CLI_SESSION_CREATED', + 'CLI_SESSION_OUTPUT', + 'CLI_SESSION_CLOSED', + 'CLI_SESSION_PAUSED', + 'CLI_SESSION_RESUMED', + 'CLI_SESSION_LOCKED', + 'CLI_SESSION_UNLOCKED', +]); // Access store state/actions via getState() - avoids calling hooks in callbacks/effects // This is the zustand-recommended pattern for non-rendering store access @@ -71,6 +82,85 @@ function getStoreState() { }; } +function normalizeWorkspacePath(path: string | null | undefined): string | null { + if (typeof path !== 'string') return null; + + const normalized = path.trim().replace(/\\/g, '/').replace(/\/+$/, ''); + if (!normalized) return null; + + return /^[a-z]:/i.test(normalized) ? normalized.toLowerCase() : normalized; +} + +function getCurrentWorkspacePath(): string | null { + return normalizeWorkspacePath(selectProjectPath(useWorkflowStore.getState())); +} + +function isProjectPathInCurrentWorkspace(projectPath: string | null | undefined): boolean { + const currentWorkspacePath = getCurrentWorkspacePath(); + if (!currentWorkspacePath) return true; + + return normalizeWorkspacePath(projectPath) === currentWorkspacePath; +} + +function isPathInCurrentWorkspace(candidatePath: string | null | undefined): boolean { + const currentWorkspacePath = getCurrentWorkspacePath(); + if (!currentWorkspacePath) return true; + + const normalizedCandidatePath = normalizeWorkspacePath(candidatePath); + if (!normalizedCandidatePath) return false; + + return ( + normalizedCandidatePath === currentWorkspacePath || + normalizedCandidatePath.startsWith(`${currentWorkspacePath}/`) + ); +} + +function isKnownCliSession(sessionKey: string | null | undefined): boolean { + if (typeof sessionKey !== 'string' || !sessionKey) return false; + + if (sessionKey in useCliSessionStore.getState().sessions) { + return true; + } + + return sessionKey in useSessionManagerStore.getState().terminalMetas; +} + +function shouldHandleCliSessionMessage(data: { type?: string; payload?: Record }): boolean { + const currentWorkspacePath = getCurrentWorkspacePath(); + if (!currentWorkspacePath) return true; + + const payload = data.payload ?? {}; + if (typeof payload.projectPath === 'string') { + return isProjectPathInCurrentWorkspace(payload.projectPath); + } + + if (data.type === 'CLI_SESSION_CREATED') { + const session = payload.session as { workingDir?: string } | undefined; + return isPathInCurrentWorkspace(session?.workingDir); + } + + return isKnownCliSession(typeof payload.sessionKey === 'string' ? payload.sessionKey : null); +} + +function shouldHandleExecutionWsMessage(message: ExecutionWSMessage): boolean { + const currentWorkspacePath = getCurrentWorkspacePath(); + if (!currentWorkspacePath) return true; + + if (typeof message.payload.projectPath === 'string') { + return isProjectPathInCurrentWorkspace(message.payload.projectPath); + } + + if (message.payload.executionId in useExecutionMonitorStore.getState().activeExecutions) { + return true; + } + + if (message.type === 'EXECUTION_STARTED') { + return isKnownCliSession(message.payload.sessionKey ?? null); + } + + return false; +} + export interface UseWebSocketOptions { enabled?: boolean; onMessage?: (message: OrchestratorWebSocketMessage) => void; @@ -162,6 +252,13 @@ export function useWebSocket(options: UseWebSocketOptions = {}): UseWebSocketRet // Handle CLI messages if (data.type?.startsWith('CLI_')) { + if ( + WORKSPACE_SCOPED_CLI_MESSAGE_TYPES.has(data.type) && + !shouldHandleCliSessionMessage(data as { type?: string; payload?: Record }) + ) { + return; + } + switch (data.type) { // ========== PTY CLI Sessions ========== case 'CLI_SESSION_CREATED': { @@ -293,8 +390,13 @@ export function useWebSocket(options: UseWebSocketOptions = {}): UseWebSocketRet // Handle EXECUTION messages (from orchestrator execution-in-session) if (data.type?.startsWith('EXECUTION_')) { + const executionMessage = data as ExecutionWSMessage; + if (!shouldHandleExecutionWsMessage(executionMessage)) { + return; + } + const handleExecutionMessage = useExecutionMonitorStore.getState().handleExecutionMessage; - handleExecutionMessage(data as ExecutionWSMessage); + handleExecutionMessage(executionMessage); return; } diff --git a/ccw/frontend/src/pages/TerminalDashboardPage.test.tsx b/ccw/frontend/src/pages/TerminalDashboardPage.test.tsx new file mode 100644 index 00000000..1f190201 --- /dev/null +++ b/ccw/frontend/src/pages/TerminalDashboardPage.test.tsx @@ -0,0 +1,97 @@ +// ======================================== +// TerminalDashboardPage Tests +// ======================================== + +import type { ReactNode } from 'react'; +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { renderWithI18n, screen, fireEvent } from '@/test/i18n'; +import { TerminalDashboardPage } from './TerminalDashboardPage'; + +const mockState = vi.hoisted(() => ({ + currentProjectPath: 'D:/workspace-a', + toggleImmersiveMode: vi.fn(), +})); + +vi.mock('allotment', () => { + const Pane = ({ children }: { children: ReactNode }) =>
{children}
; + const Allotment = ({ children }: { children: ReactNode }) =>
{children}
; + Object.assign(Allotment, { Pane }); + return { Allotment }; +}); + +vi.mock('@/components/terminal-dashboard/AssociationHighlight', () => ({ + AssociationHighlightProvider: ({ children }: { children: ReactNode }) => <>{children}, +})); + +vi.mock('@/components/terminal-dashboard/DashboardToolbar', () => ({ + DashboardToolbar: ({ activePanel, onTogglePanel }: { activePanel: string | null; onTogglePanel: (panelId: 'queue') => void }) => ( +
+
{activePanel ?? 'none'}
+ +
+ ), +})); + +vi.mock('@/components/terminal-dashboard/FloatingPanel', () => ({ + FloatingPanel: ({ isOpen, children }: { isOpen: boolean; children: ReactNode }) => + isOpen ?
{children}
: null, +})); + +vi.mock('@/components/terminal-dashboard/TerminalGrid', () => ({ TerminalGrid: () =>
terminal-grid
})); +vi.mock('@/components/terminal-dashboard/SessionGroupTree', () => ({ SessionGroupTree: () =>
session-tree
})); +vi.mock('@/components/terminal-dashboard/IssuePanel', () => ({ IssuePanel: () =>
issue-panel
})); +vi.mock('@/components/terminal-dashboard/QueuePanel', () => ({ QueuePanel: () =>
queue-panel
})); +vi.mock('@/components/terminal-dashboard/QueueListColumn', () => ({ QueueListColumn: () =>
queue-list
})); +vi.mock('@/components/terminal-dashboard/SchedulerPanel', () => ({ SchedulerPanel: () =>
scheduler-panel
})); +vi.mock('@/components/terminal-dashboard/BottomInspector', () => ({ InspectorContent: () =>
inspector-panel
})); +vi.mock('@/components/terminal-dashboard/ExecutionMonitorPanel', () => ({ ExecutionMonitorPanel: () =>
execution-panel
})); +vi.mock('@/components/terminal-dashboard/FileSidebarPanel', () => ({ + FileSidebarPanel: () =>
file-sidebar
, +})); + +vi.mock('@/stores/workflowStore', () => ({ + useWorkflowStore: (selector: (state: { projectPath: string | null }) => unknown) => + selector({ projectPath: mockState.currentProjectPath }), + selectProjectPath: (state: { projectPath: string | null }) => state.projectPath, +})); + +vi.mock('@/stores/appStore', () => ({ + useAppStore: (selector: (state: { isImmersiveMode: boolean; toggleImmersiveMode: () => void }) => unknown) => + selector({ isImmersiveMode: false, toggleImmersiveMode: mockState.toggleImmersiveMode }), + selectIsImmersiveMode: (state: { isImmersiveMode: boolean }) => state.isImmersiveMode, +})); + +vi.mock('@/stores/configStore', () => ({ + useConfigStore: (selector: (state: { featureFlags: Record }) => unknown) => + selector({ + featureFlags: { + dashboardQueuePanelEnabled: true, + dashboardInspectorEnabled: true, + dashboardExecutionMonitorEnabled: true, + }, + }), +})); + +describe('TerminalDashboardPage', () => { + beforeEach(() => { + mockState.currentProjectPath = 'D:/workspace-a'; + mockState.toggleImmersiveMode.mockReset(); + }); + + it('clears the active floating panel when workspace changes', () => { + const view = renderWithI18n(); + + fireEvent.click(screen.getByRole('button', { name: 'open-queue' })); + + expect(screen.getByTestId('active-panel').textContent).toBe('queue'); + expect(screen.getByText('queue-panel')).toBeInTheDocument(); + + mockState.currentProjectPath = 'D:/workspace-b'; + view.rerender(); + + expect(screen.getByTestId('active-panel').textContent).toBe('none'); + expect(screen.queryByText('queue-panel')).not.toBeInTheDocument(); + }); +}); diff --git a/ccw/frontend/src/pages/TerminalDashboardPage.tsx b/ccw/frontend/src/pages/TerminalDashboardPage.tsx index 5c35339a..ed03b8d3 100644 --- a/ccw/frontend/src/pages/TerminalDashboardPage.tsx +++ b/ccw/frontend/src/pages/TerminalDashboardPage.tsx @@ -9,7 +9,7 @@ // Floating panels: Issues, Queue, Inspector, Execution Monitor (overlay, mutually exclusive) // Fullscreen mode: Uses global isImmersiveMode to hide app chrome (Header + Sidebar) -import { useState, useCallback } from 'react'; +import { useState, useCallback, useEffect } from 'react'; import { useIntl } from 'react-intl'; import { Allotment } from 'allotment'; import 'allotment/dist/style.css'; @@ -54,9 +54,13 @@ export function TerminalDashboardPage() { setActivePanel(null); }, []); + useEffect(() => { + setActivePanel(null); + }, [projectPath]); + return (
- + {/* Global toolbar */} { + beforeEach(() => { + useCliSessionStore.getState().resetState(); + }); + + it('resetState clears workspace-scoped sessions and output buffers', () => { + const store = useCliSessionStore.getState(); + + store.setSessions([ + { + sessionKey: 'session-1', + shellKind: 'bash', + workingDir: 'D:/workspace-a', + tool: 'codex', + createdAt: '2026-03-08T12:00:00.000Z', + updatedAt: '2026-03-08T12:00:00.000Z', + isPaused: false, + }, + ]); + store.appendOutput('session-1', 'hello world', 1_741_430_000_000); + + expect(useCliSessionStore.getState().sessions['session-1']).toBeDefined(); + expect(useCliSessionStore.getState().outputChunks['session-1']).toHaveLength(1); + + store.resetState(); + + const nextState = useCliSessionStore.getState(); + expect(nextState.sessions).toEqual({}); + expect(nextState.outputChunks).toEqual({}); + expect(nextState.outputBytes).toEqual({}); + }); +}); diff --git a/ccw/frontend/src/stores/cliSessionStore.ts b/ccw/frontend/src/stores/cliSessionStore.ts index b59652a1..7739d074 100644 --- a/ccw/frontend/src/stores/cliSessionStore.ts +++ b/ccw/frontend/src/stores/cliSessionStore.ts @@ -34,6 +34,7 @@ interface CliSessionState { upsertSession: (session: CliSessionMeta) => void; removeSession: (sessionKey: string) => void; updateSessionPausedState: (sessionKey: string, isPaused: boolean) => void; + resetState: () => void; setBuffer: (sessionKey: string, buffer: string) => void; appendOutput: (sessionKey: string, data: string, timestamp?: number) => void; @@ -48,12 +49,16 @@ function utf8ByteLength(value: string): number { return utf8Encoder.encode(value).length; } +const initialState = { + sessions: {}, + outputChunks: {}, + outputBytes: {}, +}; + export const useCliSessionStore = create()( devtools( (set, get) => ({ - sessions: {}, - outputChunks: {}, - outputBytes: {}, + ...initialState, setSessions: (sessions) => set((state) => { @@ -103,6 +108,8 @@ export const useCliSessionStore = create()( }; }), + resetState: () => set({ ...initialState }), + setBuffer: (sessionKey, buffer) => set((state) => ({ outputChunks: { diff --git a/ccw/frontend/src/stores/executionMonitorStore.ts b/ccw/frontend/src/stores/executionMonitorStore.ts index b7c72cfa..14d54988 100644 --- a/ccw/frontend/src/stores/executionMonitorStore.ts +++ b/ccw/frontend/src/stores/executionMonitorStore.ts @@ -52,7 +52,8 @@ export interface ExecutionWSMessage { payload: { executionId: string; flowId: string; - sessionKey: string; + sessionKey?: string; + projectPath?: string; stepId?: string; stepName?: string; totalSteps?: number; @@ -117,7 +118,7 @@ export const useExecutionMonitorStore = create()( executionId, flowId, flowName: stepName || 'Workflow', - sessionKey, + sessionKey: sessionKey ?? '', status: 'running', totalSteps: totalSteps || 0, completedSteps: 0, diff --git a/ccw/frontend/src/stores/issueQueueIntegrationStore.test.ts b/ccw/frontend/src/stores/issueQueueIntegrationStore.test.ts new file mode 100644 index 00000000..d6ff92c8 --- /dev/null +++ b/ccw/frontend/src/stores/issueQueueIntegrationStore.test.ts @@ -0,0 +1,45 @@ +// ======================================== +// Issue Queue Integration Store Tests +// ======================================== + +import { beforeEach, describe, expect, it } from 'vitest'; +import { useIssueQueueIntegrationStore } from './issueQueueIntegrationStore'; +import { useQueueExecutionStore } from './queueExecutionStore'; + +describe('issueQueueIntegrationStore', () => { + beforeEach(() => { + useIssueQueueIntegrationStore.getState().resetState(); + useQueueExecutionStore.getState().resetState(); + }); + + it('resetState clears selected issue and association chain', () => { + useQueueExecutionStore.getState().addExecution({ + id: 'queue-exec-1', + queueItemId: 'Q-1', + issueId: 'ISSUE-1', + solutionId: 'SOL-1', + type: 'session', + sessionKey: 'session-1', + tool: 'codex', + mode: 'analysis', + status: 'running', + startedAt: '2026-03-08T12:00:00.000Z', + }); + + const store = useIssueQueueIntegrationStore.getState(); + store.buildAssociationChain('ISSUE-1', 'issue'); + + expect(useIssueQueueIntegrationStore.getState().selectedIssueId).toBe('ISSUE-1'); + expect(useIssueQueueIntegrationStore.getState().associationChain).toEqual({ + issueId: 'ISSUE-1', + queueItemId: 'Q-1', + sessionId: 'session-1', + }); + + store.resetState(); + + const nextState = useIssueQueueIntegrationStore.getState(); + expect(nextState.selectedIssueId).toBeNull(); + expect(nextState.associationChain).toBeNull(); + }); +}); diff --git a/ccw/frontend/src/stores/issueQueueIntegrationStore.ts b/ccw/frontend/src/stores/issueQueueIntegrationStore.ts index 9dd61479..dfe70122 100644 --- a/ccw/frontend/src/stores/issueQueueIntegrationStore.ts +++ b/ccw/frontend/src/stores/issueQueueIntegrationStore.ts @@ -85,6 +85,10 @@ export const useIssueQueueIntegrationStore = create( ); }, + resetState: () => { + set({ ...initialState }, false, 'resetState'); + }, + // ========== Queue Status Bridge ========== _updateQueueItemStatus: ( diff --git a/ccw/frontend/src/stores/queueExecutionStore.test.ts b/ccw/frontend/src/stores/queueExecutionStore.test.ts new file mode 100644 index 00000000..830e65f6 --- /dev/null +++ b/ccw/frontend/src/stores/queueExecutionStore.test.ts @@ -0,0 +1,35 @@ +// ======================================== +// Queue Execution Store Tests +// ======================================== + +import { beforeEach, describe, expect, it } from 'vitest'; +import { useQueueExecutionStore } from './queueExecutionStore'; + +describe('queueExecutionStore', () => { + beforeEach(() => { + useQueueExecutionStore.getState().resetState(); + }); + + it('resetState clears workspace-scoped queue execution tracking', () => { + const store = useQueueExecutionStore.getState(); + + store.addExecution({ + id: 'queue-exec-1', + queueItemId: 'Q-1', + issueId: 'ISSUE-1', + solutionId: 'SOL-1', + type: 'session', + sessionKey: 'session-1', + tool: 'codex', + mode: 'analysis', + status: 'running', + startedAt: '2026-03-08T12:00:00.000Z', + }); + + expect(useQueueExecutionStore.getState().executions['queue-exec-1']).toBeDefined(); + + store.resetState(); + + expect(useQueueExecutionStore.getState().executions).toEqual({}); + }); +}); diff --git a/ccw/frontend/src/stores/queueExecutionStore.ts b/ccw/frontend/src/stores/queueExecutionStore.ts index 4166dd3f..1bbfadc1 100644 --- a/ccw/frontend/src/stores/queueExecutionStore.ts +++ b/ccw/frontend/src/stores/queueExecutionStore.ts @@ -68,6 +68,8 @@ export interface QueueExecutionActions { removeExecution: (id: string) => void; /** Remove all completed and failed executions */ clearCompleted: () => void; + /** Reset workspace-scoped queue execution state */ + resetState: () => void; } export type QueueExecutionStore = QueueExecutionState & QueueExecutionActions; @@ -150,6 +152,10 @@ export const useQueueExecutionStore = create()( 'clearCompleted' ); }, + + resetState: () => { + set({ ...initialState }, false, 'resetState'); + }, }), { name: 'QueueExecutionStore' } ) diff --git a/ccw/frontend/src/stores/queueSchedulerStore.test.ts b/ccw/frontend/src/stores/queueSchedulerStore.test.ts new file mode 100644 index 00000000..79e4c7bc --- /dev/null +++ b/ccw/frontend/src/stores/queueSchedulerStore.test.ts @@ -0,0 +1,131 @@ +// ======================================== +// Queue Scheduler Store Tests +// ======================================== + +import { afterAll, beforeAll, beforeEach, describe, expect, it, vi } from 'vitest'; +import type { QueueSchedulerState } from '@/types/queue-frontend-types'; + +type QueueSchedulerModule = typeof import('./queueSchedulerStore'); + +type Deferred = { + promise: Promise; + resolve: (value: T) => void; + reject: (reason?: unknown) => void; +}; + +function createDeferred(): Deferred { + let resolve!: (value: T) => void; + let reject!: (reason?: unknown) => void; + const promise = new Promise((res, rej) => { + resolve = res; + reject = rej; + }); + return { promise, resolve, reject }; +} + +function createState(status: QueueSchedulerState['status'], issueId: string): QueueSchedulerState { + return { + status, + items: [ + { + item_id: `${issueId}-Q1`, + issue_id: issueId, + status: status === 'running' ? 'executing' : 'pending', + tool: 'codex', + prompt: `Handle ${issueId}`, + depends_on: [], + execution_order: 1, + execution_group: 'wave-1', + createdAt: '2026-03-08T12:00:00.000Z', + }, + ], + sessionPool: {}, + config: { + maxConcurrentSessions: 3, + sessionIdleTimeoutMs: 60_000, + resumeKeySessionBindingTimeoutMs: 300_000, + }, + currentConcurrency: status === 'running' ? 1 : 0, + lastActivityAt: '2026-03-08T12:00:00.000Z', + error: undefined, + }; +} + +function createFetchResponse(state: QueueSchedulerState) { + return { + ok: true, + json: vi.fn().mockResolvedValue(state), + }; +} + +describe('queueSchedulerStore', () => { + let useQueueSchedulerStore: QueueSchedulerModule['useQueueSchedulerStore']; + let fetchMock: ReturnType; + const originalFetch = global.fetch; + + beforeAll(async () => { + vi.useFakeTimers(); + fetchMock = vi.fn(); + global.fetch = fetchMock as unknown as typeof fetch; + ({ useQueueSchedulerStore } = await import('./queueSchedulerStore')); + }); + + afterAll(() => { + vi.clearAllTimers(); + vi.useRealTimers(); + global.fetch = originalFetch; + }); + + beforeEach(() => { + vi.clearAllMocks(); + vi.clearAllTimers(); + useQueueSchedulerStore.getState().resetState(); + }); + + it('resetState clears workspace-scoped scheduler state', () => { + useQueueSchedulerStore.getState().handleSchedulerMessage({ + type: 'QUEUE_SCHEDULER_STATE_UPDATE', + state: createState('running', 'ISSUE-1'), + timestamp: '2026-03-08T12:00:00.000Z', + }); + + expect(useQueueSchedulerStore.getState().status).toBe('running'); + expect(useQueueSchedulerStore.getState().items).toHaveLength(1); + + useQueueSchedulerStore.getState().resetState(); + + const nextState = useQueueSchedulerStore.getState(); + expect(nextState.status).toBe('idle'); + expect(nextState.items).toEqual([]); + expect(nextState.sessionPool).toEqual({}); + expect(nextState.currentConcurrency).toBe(0); + expect(nextState.error).toBeNull(); + }); + + it('ignores stale loadInitialState responses after workspace reset', async () => { + const staleResponse = createDeferred>(); + const freshResponse = createDeferred>(); + + fetchMock + .mockImplementationOnce(() => staleResponse.promise) + .mockImplementationOnce(() => freshResponse.promise); + + const firstLoad = useQueueSchedulerStore.getState().loadInitialState(); + + useQueueSchedulerStore.getState().resetState(); + + const secondLoad = useQueueSchedulerStore.getState().loadInitialState(); + + freshResponse.resolve(createFetchResponse(createState('paused', 'ISSUE-NEW'))); + await secondLoad; + + expect(useQueueSchedulerStore.getState().status).toBe('paused'); + expect(useQueueSchedulerStore.getState().items[0]?.issue_id).toBe('ISSUE-NEW'); + + staleResponse.resolve(createFetchResponse(createState('running', 'ISSUE-OLD'))); + await firstLoad; + + expect(useQueueSchedulerStore.getState().status).toBe('paused'); + expect(useQueueSchedulerStore.getState().items[0]?.issue_id).toBe('ISSUE-NEW'); + }); +}); diff --git a/ccw/frontend/src/stores/queueSchedulerStore.ts b/ccw/frontend/src/stores/queueSchedulerStore.ts index 2c20b617..dda5c90d 100644 --- a/ccw/frontend/src/stores/queueSchedulerStore.ts +++ b/ccw/frontend/src/stores/queueSchedulerStore.ts @@ -57,6 +57,8 @@ interface QueueSchedulerActions { stopQueue: () => Promise; /** Reset the queue scheduler via POST /api/queue/scheduler/reset */ resetQueue: () => Promise; + /** Clear workspace-scoped scheduler state and invalidate stale loads */ + resetState: () => void; /** Update scheduler config via POST /api/queue/scheduler/config */ updateConfig: (config: Partial) => Promise; } @@ -75,6 +77,8 @@ const initialState: QueueSchedulerStoreState = { error: null, }; +let loadInitialStateRequestVersion = 0; + // ========== Store ========== export const useQueueSchedulerStore = create()( @@ -173,6 +177,8 @@ export const useQueueSchedulerStore = create()( }, loadInitialState: async () => { + const requestVersion = ++loadInitialStateRequestVersion; + try { const response = await fetch('/api/queue/scheduler/state', { credentials: 'same-origin', @@ -181,6 +187,11 @@ export const useQueueSchedulerStore = create()( throw new Error(`Failed to load scheduler state: ${response.statusText}`); } const data: QueueSchedulerState = await response.json(); + + if (requestVersion !== loadInitialStateRequestVersion) { + return; + } + set( { status: data.status, @@ -195,6 +206,10 @@ export const useQueueSchedulerStore = create()( 'loadInitialState' ); } catch (error) { + if (requestVersion !== loadInitialStateRequestVersion) { + return; + } + // Silently ignore network errors (backend not connected) // Only log non-network errors const message = error instanceof Error ? error.message : 'Unknown error'; @@ -287,6 +302,11 @@ export const useQueueSchedulerStore = create()( } }, + resetState: () => { + loadInitialStateRequestVersion += 1; + set({ ...initialState }, false, 'resetState'); + }, + updateConfig: async (config: Partial) => { try { const response = await fetch('/api/queue/scheduler/config', { diff --git a/ccw/frontend/src/stores/sessionManagerStore.test.ts b/ccw/frontend/src/stores/sessionManagerStore.test.ts new file mode 100644 index 00000000..d8f3aecd --- /dev/null +++ b/ccw/frontend/src/stores/sessionManagerStore.test.ts @@ -0,0 +1,37 @@ +// ======================================== +// Session Manager Store Tests +// ======================================== + +import { beforeEach, describe, expect, it } from 'vitest'; +import { useSessionManagerStore } from './sessionManagerStore'; + +describe('sessionManagerStore', () => { + beforeEach(() => { + useSessionManagerStore.getState().resetState(); + }); + + it('resetState clears workspace-scoped terminal metadata and selection', () => { + const store = useSessionManagerStore.getState(); + + store.createGroup('Workspace Group'); + store.setActiveTerminal('session-1'); + store.updateTerminalMeta('session-1', { + title: 'Session 1', + status: 'active', + alertCount: 2, + tag: 'workspace-a', + }); + + const activeState = useSessionManagerStore.getState(); + expect(activeState.groups).toHaveLength(1); + expect(activeState.activeTerminalId).toBe('session-1'); + expect(activeState.terminalMetas['session-1']?.status).toBe('active'); + + store.resetState(); + + const nextState = useSessionManagerStore.getState(); + expect(nextState.groups).toEqual([]); + expect(nextState.activeTerminalId).toBeNull(); + expect(nextState.terminalMetas).toEqual({}); + }); +}); diff --git a/ccw/frontend/src/stores/sessionManagerStore.ts b/ccw/frontend/src/stores/sessionManagerStore.ts index 91d95efc..537abecc 100644 --- a/ccw/frontend/src/stores/sessionManagerStore.ts +++ b/ccw/frontend/src/stores/sessionManagerStore.ts @@ -182,6 +182,14 @@ export const useSessionManagerStore = create()( ); }, + resetState: () => { + if (_workerRef) { + _workerRef.terminate(); + _workerRef = null; + } + set({ ...initialState }, false, 'resetState'); + }, + // ========== Layout Management ========== setGroupLayout: (layout: SessionLayout) => { diff --git a/ccw/frontend/src/stores/terminalGridStore.test.ts b/ccw/frontend/src/stores/terminalGridStore.test.ts new file mode 100644 index 00000000..e5bfafd0 --- /dev/null +++ b/ccw/frontend/src/stores/terminalGridStore.test.ts @@ -0,0 +1,38 @@ +// ======================================== +// Terminal Grid Store Tests +// ======================================== + +import { beforeEach, describe, expect, it } from 'vitest'; +import { useTerminalGridStore } from './terminalGridStore'; + +describe('terminalGridStore', () => { + beforeEach(() => { + useTerminalGridStore.getState().resetLayout('single'); + }); + + it('resetWorkspaceState clears pane session bindings while preserving layout', () => { + const store = useTerminalGridStore.getState(); + + store.resetLayout('split-h'); + const configuredState = useTerminalGridStore.getState(); + const paneIds = Object.keys(configuredState.panes); + const originalLayout = configuredState.layout; + + store.assignSession(paneIds[0], 'session-a', 'codex'); + store.showFileInPane(paneIds[1], 'D:/workspace-a/file.ts'); + store.setFocused(paneIds[1]); + + store.resetWorkspaceState(); + + const nextState = useTerminalGridStore.getState(); + expect(nextState.layout).toEqual(originalLayout); + expect(Object.keys(nextState.panes)).toEqual(paneIds); + expect(nextState.focusedPaneId).toBe(paneIds[1]); + for (const paneId of paneIds) { + expect(nextState.panes[paneId]?.sessionId).toBeNull(); + expect(nextState.panes[paneId]?.cliTool).toBeNull(); + expect(nextState.panes[paneId]?.displayMode).toBe('terminal'); + expect(nextState.panes[paneId]?.filePath).toBeNull(); + } + }); +}); diff --git a/ccw/frontend/src/stores/terminalGridStore.ts b/ccw/frontend/src/stores/terminalGridStore.ts index d6a85ae9..52bd1837 100644 --- a/ccw/frontend/src/stores/terminalGridStore.ts +++ b/ccw/frontend/src/stores/terminalGridStore.ts @@ -45,6 +45,8 @@ export interface TerminalGridActions { assignSession: (paneId: PaneId, sessionId: string | null, cliTool?: string | null) => void; setFocused: (paneId: PaneId) => void; resetLayout: (preset: 'single' | 'split-h' | 'split-v' | 'grid-2x2') => void; + /** Clear workspace-scoped pane bindings while preserving layout */ + resetWorkspaceState: () => void; /** Create a new CLI session and assign it to a new pane (auto-split from specified pane) */ createSessionAndAssign: ( paneId: PaneId, @@ -302,6 +304,42 @@ export const useTerminalGridStore = create()( ); }, + resetWorkspaceState: () => { + const state = get(); + const paneIds = Object.keys(state.panes) as PaneId[]; + if (paneIds.length === 0) { + set({ ...initialState }, false, 'terminalGrid/resetWorkspaceState'); + return; + } + + const nextPanes = paneIds.reduce>((acc, paneId) => { + const pane = state.panes[paneId]; + acc[paneId] = { + ...pane, + sessionId: null, + cliTool: null, + displayMode: 'terminal', + filePath: null, + }; + return acc; + }, {} as Record); + + const nextFocusedPaneId = state.focusedPaneId && nextPanes[state.focusedPaneId] + ? state.focusedPaneId + : paneIds[0] ?? null; + + set( + { + layout: state.layout, + panes: nextPanes, + focusedPaneId: nextFocusedPaneId, + nextPaneIdCounter: state.nextPaneIdCounter, + }, + false, + 'terminalGrid/resetWorkspaceState' + ); + }, + createSessionAndAssign: async (paneId, config, projectPath) => { try { // 1. Create the CLI session via API diff --git a/ccw/frontend/src/types/terminal-dashboard.ts b/ccw/frontend/src/types/terminal-dashboard.ts index 25c20132..decf7611 100644 --- a/ccw/frontend/src/types/terminal-dashboard.ts +++ b/ccw/frontend/src/types/terminal-dashboard.ts @@ -85,6 +85,8 @@ export interface SessionManagerActions { setActiveTerminal: (sessionId: string | null) => void; /** Update metadata for a specific terminal */ updateTerminalMeta: (sessionId: string, meta: Partial) => void; + /** Reset workspace-scoped dashboard session state */ + resetState: () => void; /** Set the terminal grid layout */ setGroupLayout: (layout: SessionLayout) => void; /** Spawn the monitor Web Worker (idempotent) */ @@ -135,6 +137,8 @@ export interface IssueQueueIntegrationActions { setSelectedIssue: (issueId: string | null) => void; /** Build a full association chain from any entity ID (issue, queue item, or session) */ buildAssociationChain: (entityId: string, entityType: 'issue' | 'queue' | 'session') => void; + /** Reset workspace-scoped issue/queue linkage state */ + resetState: () => void; /** Internal: update queue item status bridging to queueExecutionStore */ _updateQueueItemStatus: (queueItemId: string, status: string, sessionId?: string) => void; } diff --git a/ccw/src/core/routes/orchestrator-routes.ts b/ccw/src/core/routes/orchestrator-routes.ts index d0ac8029..cd5c091f 100644 --- a/ccw/src/core/routes/orchestrator-routes.ts +++ b/ccw/src/core/routes/orchestrator-routes.ts @@ -1232,6 +1232,7 @@ export async function handleOrchestratorRoutes(ctx: RouteContext): Promise [...currentArgs, '--static-graph'], warning: 'CodexLens CLI hit a Typer static-graph option conflict; retried with explicit --static-graph.', }, + { + shouldRetry: shouldRetryWithCentralizedPreference, + transform: (currentArgs: string[]) => [...currentArgs, '--centralized'], + warning: 'CodexLens CLI hit a Typer centralized/distributed option conflict; retried with explicit --centralized.', + }, ]; for (const retry of compatibilityRetries) { diff --git a/ccw/src/tools/smart-search.ts b/ccw/src/tools/smart-search.ts index d9d299ec..df6b4cf5 100644 --- a/ccw/src/tools/smart-search.ts +++ b/ccw/src/tools/smart-search.ts @@ -10,7 +10,8 @@ * - Multi-backend search routing with RRF ranking * * Actions: - * - init: Initialize CodexLens index + * - init: Initialize CodexLens static index + * - embed: Generate semantic/vector embeddings for the index * - search: Intelligent search with fuzzy (default) or semantic mode * - status: Check index status * - update: Incremental index update for changed files @@ -20,15 +21,20 @@ import { z } from 'zod'; import type { ToolSchema, ToolResult } from '../types/tool.js'; import { spawn, execSync } from 'child_process'; -import { statSync } from 'fs'; -import { dirname, resolve } from 'path'; +import { existsSync, readFileSync, statSync } from 'fs'; +import { dirname, join, resolve } from 'path'; import { ensureReady as ensureCodexLensReady, + ensureLiteLLMEmbedderReady, executeCodexLens, + getVenvPythonPath, } from './codex-lens.js'; import type { ProgressInfo } from './codex-lens.js'; import { getProjectRoot } from '../utils/path-validator.js'; +import { getCodexLensDataDir } from '../utils/codexlens-path.js'; import { EXEC_TIMEOUTS } from '../utils/exec-constants.js'; +import { generateRotationEndpoints } from '../config/litellm-api-config-manager.js'; +import type { RotationEndpointConfig } from '../config/litellm-api-config-manager.js'; // Timing utilities for performance analysis const TIMING_ENABLED = process.env.SMART_SEARCH_TIMING === '1' || process.env.DEBUG?.includes('timing'); @@ -65,10 +71,10 @@ function createTimer(): { mark: (name: string) => void; getTimings: () => Timing // Define Zod schema for validation const ParamsSchema = z.object({ - // Action: search (content), find_files (path/name pattern), init, init_force, status, update (incremental), watch + // Action: search (content), find_files (path/name pattern), init, init_force, embed, status, update (incremental), watch // Note: search_files is deprecated, use search with output_mode='files_only' - // init: incremental index (skip existing), init_force: force full rebuild (delete and recreate) - action: z.enum(['init', 'init_force', 'search', 'search_files', 'find_files', 'status', 'update', 'watch']).default('search'), + // init: static FTS index by default, embed: generate semantic/vector embeddings, init_force: force full rebuild (delete and recreate) + action: z.enum(['init', 'init_force', 'embed', 'search', 'search_files', 'find_files', 'status', 'update', 'watch']).default('search'), query: z.string().optional().describe('Content search query (for action="search")'), pattern: z.string().optional().describe('Glob pattern for path matching (for action="find_files")'), mode: z.enum(['fuzzy', 'semantic']).default('fuzzy'), @@ -79,6 +85,10 @@ const ParamsSchema = z.object({ maxResults: z.number().default(5), // Default 5 with full content includeHidden: z.boolean().default(false), languages: z.array(z.string()).optional(), + embeddingBackend: z.string().optional().describe('Embedding backend for action="embed": fastembed/local or litellm/api.'), + embeddingModel: z.string().optional().describe('Embedding model/profile for action="embed". Examples: "code", "fast", "qwen3-embedding-sf".'), + apiMaxWorkers: z.number().int().min(1).optional().describe('Max concurrent API embedding workers for action="embed". Recommended: 8-16 for litellm/api when multiple endpoints are configured.'), + force: z.boolean().default(false).describe('Force regeneration for action="embed".'), limit: z.number().default(5), // Default 5 with full content extraFilesCount: z.number().default(10), // Additional file-only results maxContentLength: z.number().default(200), // Max content length for truncation (50-2000) @@ -313,6 +323,11 @@ interface SearchMetadata { totalFiles?: number; }; progressHistory?: ProgressInfo[]; + api_max_workers?: number; + endpoint_count?: number; + use_gpu?: boolean; + cascade_strategy?: string; + staged_stage2_mode?: string; } interface SearchResult { @@ -344,6 +359,11 @@ interface CodexLensConfig { reranker_backend?: string; // 'onnx' (local) or 'api' reranker_model?: string; reranker_top_k?: number; + api_max_workers?: number; + api_batch_size?: number; + cascade_strategy?: string; + staged_stage2_mode?: string; + static_graph_enabled?: boolean; } interface IndexStatus { @@ -357,6 +377,39 @@ interface IndexStatus { warning?: string; } +function readCodexLensSettingsSnapshot(): Partial { + const settingsPath = join(getCodexLensDataDir(), 'settings.json'); + if (!existsSync(settingsPath)) { + return {}; + } + + try { + const parsed = JSON.parse(readFileSync(settingsPath, 'utf-8')) as Record; + const embedding = (parsed.embedding ?? {}) as Record; + const reranker = (parsed.reranker ?? {}) as Record; + const api = (parsed.api ?? {}) as Record; + const cascade = (parsed.cascade ?? {}) as Record; + const staged = (parsed.staged ?? {}) as Record; + const indexing = (parsed.indexing ?? {}) as Record; + + return { + embedding_backend: normalizeEmbeddingBackend(typeof embedding.backend === 'string' ? embedding.backend : undefined), + embedding_model: typeof embedding.model === 'string' ? embedding.model : undefined, + reranker_enabled: typeof reranker.enabled === 'boolean' ? reranker.enabled : undefined, + reranker_backend: typeof reranker.backend === 'string' ? reranker.backend : undefined, + reranker_model: typeof reranker.model === 'string' ? reranker.model : undefined, + reranker_top_k: typeof reranker.top_k === 'number' ? reranker.top_k : undefined, + api_max_workers: typeof api.max_workers === 'number' ? api.max_workers : undefined, + api_batch_size: typeof api.batch_size === 'number' ? api.batch_size : undefined, + cascade_strategy: typeof cascade.strategy === 'string' ? cascade.strategy : undefined, + staged_stage2_mode: typeof staged.stage2_mode === 'string' ? staged.stage2_mode : undefined, + static_graph_enabled: typeof indexing.static_graph_enabled === 'boolean' ? indexing.static_graph_enabled : undefined, + }; + } catch { + return {}; + } +} + /** * Strip ANSI color codes from string (for JSON parsing) */ @@ -464,6 +517,99 @@ function filterResultsToTargetFile(results: T[], sco return results.filter((result) => normalizeResultFilePath(result.file, scope.workingDirectory) === normalizedTarget); } +function parseCodexLensJsonOutput(output: string | undefined): any | null { + const cleanOutput = stripAnsi(output || '').trim(); + if (!cleanOutput) { + return null; + } + + const candidates = [ + cleanOutput, + ...cleanOutput.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.startsWith('{') || line.startsWith('[')), + ]; + + const firstBrace = cleanOutput.indexOf('{'); + const lastBrace = cleanOutput.lastIndexOf('}'); + if (firstBrace !== -1 && lastBrace > firstBrace) { + candidates.push(cleanOutput.slice(firstBrace, lastBrace + 1)); + } + + const firstBracket = cleanOutput.indexOf('['); + const lastBracket = cleanOutput.lastIndexOf(']'); + if (firstBracket !== -1 && lastBracket > firstBracket) { + candidates.push(cleanOutput.slice(firstBracket, lastBracket + 1)); + } + + for (const candidate of candidates) { + try { + return JSON.parse(candidate); + } catch { + continue; + } + } + + return null; +} + +function mapCodexLensSemanticMatches(data: any[], scope: SearchScope, maxContentLength: number): SemanticMatch[] { + return filterResultsToTargetFile(data.map((item: any) => { + const rawScore = item.score || 0; + const similarityScore = rawScore > 0 ? 1 / (1 + rawScore) : 1; + return { + file: item.path || item.file, + score: similarityScore, + content: truncateContent(item.content || item.excerpt, maxContentLength), + symbol: item.symbol || null, + }; + }), scope); +} + +function parsePlainTextFileMatches(output: string | undefined, scope: SearchScope): SemanticMatch[] { + const lines = stripAnsi(output || '') + .split(/\r?\n/) + .map((line) => line.trim()) + .filter(Boolean); + + const fileLines = lines.filter((line) => { + if (line.includes('RuntimeWarning:') || line.startsWith('warn(') || line.startsWith('Warning:')) { + return false; + } + + const resolvedPath = /^[a-zA-Z]:[\\/]|^\//.test(line) + ? line + : resolve(scope.workingDirectory, line); + + try { + return statSync(resolvedPath).isFile(); + } catch { + return false; + } + }); + + return filterResultsToTargetFile( + [...new Set(fileLines)].map((file, index) => ({ + file, + score: Math.max(0.1, 1 - index * 0.05), + content: '', + symbol: null, + })), + scope, + ); +} + +function hasCentralizedVectorArtifacts(indexRoot: unknown): boolean { + if (typeof indexRoot !== 'string' || !indexRoot.trim()) { + return false; + } + + const resolvedRoot = resolve(indexRoot); + return [ + join(resolvedRoot, '_vectors.hnsw'), + join(resolvedRoot, '_vectors_meta.db'), + join(resolvedRoot, '_binary_vectors.mmap'), + ].every((artifactPath) => existsSync(artifactPath)); +} + function collectBackendError( errors: string[], backendName: string, @@ -498,18 +644,20 @@ async function checkIndexStatus(path: string = '.'): Promise { try { // Fetch both status and config in parallel const [statusResult, configResult] = await Promise.all([ - executeCodexLens(['status', '--json'], { cwd: scope.workingDirectory }), - executeCodexLens(['config', 'show', '--json'], { cwd: scope.workingDirectory }), + executeCodexLens(['index', 'status', scope.workingDirectory], { cwd: scope.workingDirectory }), + executeCodexLens(['config', '--json'], { cwd: scope.workingDirectory }), ]); // Parse config - let config: CodexLensConfig | null = null; + const settingsConfig = readCodexLensSettingsSnapshot(); + let config: CodexLensConfig | null = Object.keys(settingsConfig).length > 0 ? { ...settingsConfig } : null; if (configResult.success && configResult.output) { try { const cleanConfigOutput = stripAnsi(configResult.output); const parsedConfig = JSON.parse(cleanConfigOutput); const configData = parsedConfig.result || parsedConfig; config = { + ...settingsConfig, config_file: configData.config_file, index_dir: configData.index_dir, embedding_backend: configData.embedding_backend, @@ -540,13 +688,21 @@ async function checkIndexStatus(path: string = '.'): Promise { const parsed = JSON.parse(cleanOutput); // Handle both direct and nested response formats (status returns {success, result: {...}}) const status = parsed.result || parsed; - const indexed = status.projects_count > 0 || status.total_files > 0; // Get embeddings coverage from comprehensive status const embeddingsData = status.embeddings || {}; - const embeddingsCoverage = embeddingsData.coverage_percent || 0; - const has_embeddings = embeddingsCoverage >= 50; // Threshold: 50% - const totalChunks = embeddingsData.total_chunks || 0; + const totalIndexes = Number(embeddingsData.total_indexes || 0); + const indexesWithEmbeddings = Number(embeddingsData.indexes_with_embeddings || 0); + const totalChunks = Number(embeddingsData.total_chunks || 0); + const hasCentralizedVectors = hasCentralizedVectorArtifacts(status.index_root); + let embeddingsCoverage = typeof embeddingsData.coverage_percent === 'number' + ? embeddingsData.coverage_percent + : (totalIndexes > 0 ? (indexesWithEmbeddings / totalIndexes) * 100 : 0); + if (hasCentralizedVectors) { + embeddingsCoverage = Math.max(embeddingsCoverage, 100); + } + const indexed = Boolean(status.projects_count > 0 || status.total_files > 0 || status.index_root || totalIndexes > 0 || totalChunks > 0); + const has_embeddings = indexesWithEmbeddings > 0 || embeddingsCoverage > 0 || totalChunks > 0 || hasCentralizedVectors; // Extract model info if available const modelInfoData = embeddingsData.model_info; @@ -563,9 +719,9 @@ async function checkIndexStatus(path: string = '.'): Promise { if (!indexed) { warning = 'No CodexLens index found. Run smart_search(action="init") to create index for better search results.'; } else if (embeddingsCoverage === 0) { - warning = 'Index exists but no embeddings generated. Run: codexlens embeddings-generate --recursive'; + warning = 'Index exists but no embeddings generated. Run smart_search(action="embed") to build the vector index.'; } else if (embeddingsCoverage < 50) { - warning = `Embeddings coverage is ${embeddingsCoverage.toFixed(1)}% (below 50%). Hybrid search will use exact mode. Run: codexlens embeddings-generate --recursive`; + warning = `Embeddings coverage is ${embeddingsCoverage.toFixed(1)}% (below 50%). Hybrid search will degrade. Run smart_search(action="embed") to improve vector coverage.`; } return { @@ -777,9 +933,198 @@ function buildRipgrepCommand(params: { return { command: 'rg', args, tokens }; } +function normalizeEmbeddingBackend(backend?: string): string | undefined { + if (!backend) { + return undefined; + } + + const normalized = backend.trim().toLowerCase(); + if (!normalized) { + return undefined; + } + if (normalized === 'api') { + return 'litellm'; + } + if (normalized === 'local') { + return 'fastembed'; + } + return normalized; +} + +const EMBED_PROGRESS_PREFIX = '__CCW_EMBED_PROGRESS__'; + +function resolveEmbeddingEndpoints(backend?: string): RotationEndpointConfig[] { + if (backend !== 'litellm') { + return []; + } + + try { + return generateRotationEndpoints(getProjectRoot()).filter((endpoint) => { + const apiKey = endpoint.api_key?.trim() ?? ''; + return Boolean( + apiKey && + apiKey.length > 8 && + !/^\*+$/.test(apiKey) && + endpoint.api_base?.trim() && + endpoint.model?.trim() + ); + }); + } catch { + return []; + } +} + +function resolveApiWorkerCount( + requestedWorkers: number | undefined, + backend: string | undefined, + endpoints: RotationEndpointConfig[] +): number | undefined { + if (backend !== 'litellm') { + return undefined; + } + + if (typeof requestedWorkers === 'number' && Number.isFinite(requestedWorkers)) { + return Math.max(1, Math.floor(requestedWorkers)); + } + + if (endpoints.length <= 1) { + return 4; + } + + return Math.min(16, Math.max(4, endpoints.length * 2)); +} + +function extractEmbedJsonLine(stdout: string): string | undefined { + const lines = stdout + .split(/\r?\n/) + .map((line) => line.trim()) + .filter(Boolean) + .filter((line) => !line.startsWith(EMBED_PROGRESS_PREFIX)); + + return [...lines].reverse().find((line) => line.startsWith('{') && line.endsWith('}')); +} + +async function executeEmbeddingsViaPython(params: { + projectPath: string; + backend?: string; + model?: string; + force: boolean; + maxWorkers?: number; + endpoints?: RotationEndpointConfig[]; +}): Promise<{ success: boolean; error?: string; progressMessages?: string[] }> { + const { projectPath, backend, model, force, maxWorkers, endpoints = [] } = params; + const pythonCode = ` +import json +import sys +from pathlib import Path +from codexlens.storage.registry import RegistryStore +from codexlens.cli.embedding_manager import generate_dense_embeddings_centralized + +target_path = Path(r"__PROJECT_PATH__").expanduser().resolve() +backend = __BACKEND__ +model = __MODEL__ +force = __FORCE__ +max_workers = __MAX_WORKERS__ +endpoints = json.loads(r'''__ENDPOINTS_JSON__''') + +def progress_update(message: str): + print("__CCW_EMBED_PROGRESS__" + str(message), flush=True) + +registry = RegistryStore() +registry.initialize() +try: + project = registry.get_project(target_path) + if project is None: + print(json.dumps({"success": False, "error": f"No index found for: {target_path}"}), flush=True) + sys.exit(1) + + index_root = Path(project.index_root) + result = generate_dense_embeddings_centralized( + index_root, + embedding_backend=backend, + model_profile=model, + force=force, + use_gpu=True, + max_workers=max_workers, + endpoints=endpoints if endpoints else None, + progress_callback=progress_update, + ) + + print(json.dumps(result), flush=True) + if not result.get("success"): + sys.exit(1) +finally: + registry.close() +` + .replace('__PROJECT_PATH__', projectPath.replace(/\\/g, '\\\\')) + .replace('__BACKEND__', backend ? JSON.stringify(backend) : 'None') + .replace('__MODEL__', model ? JSON.stringify(model) : 'None') + .replace('__FORCE__', force ? 'True' : 'False') + .replace('__MAX_WORKERS__', typeof maxWorkers === 'number' ? String(Math.max(1, Math.floor(maxWorkers))) : 'None') + .replace('__ENDPOINTS_JSON__', JSON.stringify(endpoints).replace(/\\/g, '\\\\').replace(/'''/g, "\\'\\'\\'")); + + return await new Promise((resolve) => { + const child = spawn(getVenvPythonPath(), ['-c', pythonCode], { + cwd: projectPath, + shell: false, + timeout: 1800000, + env: { ...process.env, PYTHONIOENCODING: 'utf-8' }, + }); + + let stdout = ''; + let stderr = ''; + const progressMessages: string[] = []; + + child.stdout.on('data', (data: Buffer) => { + const chunk = data.toString(); + stdout += chunk; + for (const line of chunk.split(/\r?\n/)) { + if (line.startsWith(EMBED_PROGRESS_PREFIX)) { + progressMessages.push(line.slice(EMBED_PROGRESS_PREFIX.length).trim()); + } + } + }); + + child.stderr.on('data', (data: Buffer) => { + stderr += data.toString(); + }); + + child.on('error', (err) => { + resolve({ success: false, error: `Failed to start embeddings process: ${err.message}`, progressMessages }); + }); + + child.on('close', (code) => { + const jsonLine = extractEmbedJsonLine(stdout); + if (jsonLine) { + try { + const parsed = JSON.parse(jsonLine) as { success?: boolean; error?: string }; + if (parsed.success) { + resolve({ success: true, progressMessages }); + return; + } + resolve({ + success: false, + error: parsed.error || stderr.trim() || stdout.trim() || `Embeddings process exited with code ${code}`, + progressMessages, + }); + return; + } catch { + // Fall through to generic error handling below. + } + } + + resolve({ + success: code === 0, + error: code === 0 ? undefined : (stderr.trim() || stdout.trim() || `Embeddings process exited with code ${code}`), + progressMessages, + }); + }); + }); +} + /** * Action: init - Initialize CodexLens index (FTS only, no embeddings) - * For semantic/vector search, use ccw view dashboard or codexlens CLI directly + * For semantic/vector search, follow with action="embed" to generate vectors. * @param params - Search parameters * @param force - If true, force full rebuild (delete existing index first) */ @@ -853,6 +1198,80 @@ async function executeInitAction(params: Params, force: boolean = false): Promis }; } +/** + * Action: embed - Generate semantic/vector embeddings for an indexed project + */ +async function executeEmbedAction(params: Params): Promise { + const { path = '.', embeddingBackend, embeddingModel, apiMaxWorkers, force = false } = params; + const scope = resolveSearchScope(path); + + const readyStatus = await ensureCodexLensReady(); + if (!readyStatus.ready) { + return { + success: false, + error: `CodexLens not available: ${readyStatus.error}. CodexLens will be auto-installed on first use.`, + }; + } + + const currentStatus = await checkIndexStatus(scope.workingDirectory); + const normalizedBackend = normalizeEmbeddingBackend(embeddingBackend) || currentStatus.config?.embedding_backend; + const trimmedModel = embeddingModel?.trim() || currentStatus.config?.embedding_model; + const endpoints = resolveEmbeddingEndpoints(normalizedBackend); + const configuredApiMaxWorkers = currentStatus.config?.api_max_workers; + const effectiveApiMaxWorkers = typeof apiMaxWorkers === 'number' + ? Math.max(1, Math.floor(apiMaxWorkers)) + : (typeof configuredApiMaxWorkers === 'number' + ? Math.max(1, Math.floor(configuredApiMaxWorkers)) + : resolveApiWorkerCount(undefined, normalizedBackend, endpoints)); + + if (normalizedBackend === 'litellm') { + const embedderReady = await ensureLiteLLMEmbedderReady(); + if (!embedderReady.success) { + return { + success: false, + error: embedderReady.error || 'LiteLLM embedder is not ready.', + }; + } + } + + const result = await executeEmbeddingsViaPython({ + projectPath: scope.workingDirectory, + backend: normalizedBackend, + model: trimmedModel, + force, + maxWorkers: effectiveApiMaxWorkers, + endpoints, + }); + + const indexStatus = result.success ? await checkIndexStatus(scope.workingDirectory) : currentStatus; + const coverage = indexStatus?.embeddings_coverage_percent; + const coverageText = coverage !== undefined ? ` (${coverage.toFixed(1)}% coverage)` : ''; + const progressMessage = result.progressMessages && result.progressMessages.length > 0 + ? result.progressMessages[result.progressMessages.length - 1] + : undefined; + + return { + success: result.success, + error: result.error, + message: result.success + ? `Embeddings generated for ${path}${coverageText}` + : undefined, + metadata: { + action: 'embed', + path: scope.workingDirectory, + backend: normalizedBackend || indexStatus?.config?.embedding_backend, + embeddings_coverage_percent: coverage, + api_max_workers: effectiveApiMaxWorkers, + endpoint_count: endpoints.length, + use_gpu: true, + cascade_strategy: currentStatus.config?.cascade_strategy, + staged_stage2_mode: currentStatus.config?.staged_stage2_mode, + note: progressMessage, + }, + status: indexStatus, + }; +} + /** * Action: status - Check CodexLens index status */ @@ -885,6 +1304,15 @@ async function executeStatusAction(params: Params): Promise { // Embedding backend info const embeddingType = cfg.embedding_backend === 'litellm' ? 'API' : 'Local'; statusParts.push(`Embedding: ${embeddingType} (${cfg.embedding_model || 'default'})`); + if (typeof cfg.api_max_workers === 'number') { + statusParts.push(`API Workers: ${cfg.api_max_workers}`); + } + if (cfg.cascade_strategy) { + statusParts.push(`Cascade: ${cfg.cascade_strategy}`); + } + if (cfg.staged_stage2_mode) { + statusParts.push(`Stage2: ${cfg.staged_stage2_mode}`); + } // Reranker info if (cfg.reranker_enabled) { @@ -1583,8 +2011,11 @@ async function executeHybridMode(params: Params): Promise { timer.mark('index_status_check'); // Request more results to support split (full content + extra files) + // NOTE: Current CodexLens search CLI in this environment rejects value-taking options + // like --limit/--offset/--method for search. Keep the invocation minimal and apply + // pagination/selection in CCW after parsing results. const totalToFetch = maxResults + extraFilesCount; - const args = ['search', query, '--limit', totalToFetch.toString(), '--offset', offset.toString(), '--method', 'dense_rerank', '--json']; + const args = ['search', query, '--json']; if (enrich) { args.push('--enrich'); } @@ -1619,22 +2050,10 @@ async function executeHybridMode(params: Params): Promise { let baselineInfo: { score: number; count: number } | null = null; let initialCount = 0; - try { - const parsed = JSON.parse(stripAnsi(result.output || '{}')); - const data = parsed.result?.results || parsed.results || parsed; - allResults = filterResultsToTargetFile((Array.isArray(data) ? data : []).map((item: any) => { - const rawScore = item.score || 0; - // Hybrid mode returns distance scores (lower is better). - // Convert to similarity scores (higher is better) for consistency. - // Formula: similarity = 1 / (1 + distance) - const similarityScore = rawScore > 0 ? 1 / (1 + rawScore) : 1; - return { - file: item.path || item.file, - score: similarityScore, - content: truncateContent(item.content || item.excerpt, maxContentLength), - symbol: item.symbol || null, - }; - }), scope); + const parsedOutput = parseCodexLensJsonOutput(result.output); + const parsedData = parsedOutput?.result?.results || parsedOutput?.results || parsedOutput; + if (Array.isArray(parsedData)) { + allResults = mapCodexLensSemanticMatches(parsedData, scope, maxContentLength); timer.mark('parse_results'); initialCount = allResults.length; @@ -1655,19 +2074,24 @@ async function executeHybridMode(params: Params): Promise { // 4. Re-sort by adjusted scores allResults.sort((a, b) => b.score - a.score); timer.mark('post_processing'); - } catch { - return { - success: true, - results: [], - output: result.output, - metadata: { - mode: 'hybrid', - backend: 'codexlens', - count: 0, - query, - warning: mergeWarnings(indexStatus.warning, result.warning, 'Failed to parse JSON output'), - }, - }; + } else { + allResults = parsePlainTextFileMatches(result.output, scope); + if (allResults.length === 0) { + return { + success: true, + results: [], + output: result.output, + metadata: { + mode: 'hybrid', + backend: 'codexlens', + count: 0, + query, + warning: mergeWarnings(indexStatus.warning, result.warning, 'Failed to parse JSON output'), + }, + }; + } + timer.mark('parse_results'); + initialCount = allResults.length; } // Split results: first N with full content, rest as file paths only @@ -2164,6 +2588,13 @@ Recommended MCP flow: use **action=\"search\"** for lookups, **action=\"init\"** * **init_force**: Force full rebuild (delete and recreate static index). * *path* (string): Directory to index (default: current). +* **embed**: Generate semantic/vector embeddings for an indexed project. + * *path* (string): Directory to embed (default: current). + * *embeddingBackend* (string): 'litellm'/'api' for remote API embeddings, 'fastembed'/'local' for local embeddings. + * *embeddingModel* (string): Embedding model/profile to use. + * *apiMaxWorkers* (number): Max concurrent API embedding workers. Defaults to auto-sizing from the configured endpoint pool. + * *force* (boolean): Regenerate embeddings even if they already exist. + * **status**: Check index status. (No required params) * **update**: Incremental index update. @@ -2175,16 +2606,17 @@ Recommended MCP flow: use **action=\"search\"** for lookups, **action=\"init\"** **Examples:** smart_search(query="authentication logic") # Content search (default action) smart_search(query="MyClass", mode="semantic") # Semantic search - smart_search(action="find_files", pattern="*.ts") # Find TypeScript files + smart_search(action=\"embed\", path=\"/project\", embeddingBackend=\"api\", apiMaxWorkers=8) # Build API vector index smart_search(action="init", path="/project") # Build static FTS index + smart_search(action="embed", path="/project", embeddingBackend="api") # Build API vector index smart_search(query="auth", limit=10, offset=0) # Paginated search`, inputSchema: { type: 'object', properties: { action: { type: 'string', - enum: ['init', 'init_force', 'search', 'find_files', 'status', 'update', 'watch', 'search_files'], - description: 'Action: search (content search; default and recommended), find_files (path pattern matching), init (create static FTS index, incremental), init_force (force full rebuild), status (check index), update (incremental refresh), watch (auto-update watcher; opt-in). Note: search_files is deprecated.', + enum: ['init', 'init_force', 'embed', 'search', 'find_files', 'status', 'update', 'watch', 'search_files'], + description: 'Action: search (content search; default and recommended), find_files (path pattern matching), init (create static FTS index, incremental), init_force (force full rebuild), embed (generate semantic/vector embeddings), status (check index), update (incremental refresh), watch (auto-update watcher; opt-in). Note: search_files is deprecated.', default: 'search', }, query: { @@ -2259,6 +2691,23 @@ Recommended MCP flow: use **action=\"search\"** for lookups, **action=\"init\"** items: { type: 'string' }, description: 'Languages to index (for init action). Example: ["javascript", "typescript"]', }, + embeddingBackend: { + type: 'string', + description: 'Embedding backend for action="embed": litellm/api (remote API) or fastembed/local (local GPU/CPU).', + }, + embeddingModel: { + type: 'string', + description: 'Embedding model/profile for action="embed". Examples: "code", "fast", "qwen3-embedding-sf".', + }, + apiMaxWorkers: { + type: 'number', + description: 'Max concurrent API embedding workers for action="embed". Defaults to auto-sizing from the configured endpoint pool.', + }, + force: { + type: 'boolean', + description: 'Force regeneration for action="embed".', + default: false, + }, enrich: { type: 'boolean', description: 'Enrich search results with code graph relationships (calls, imports, called_by, imported_by).', @@ -2625,6 +3074,10 @@ export async function handler(params: Record): Promise { assert.equal(props.maxResults.default, 5); assert.equal(props.limit.default, 5); assert.match(schema.description, /static FTS index/i); + assert.match(schema.description, /semantic\/vector embeddings/i); + assert.ok(props.action.enum.includes('embed')); + assert.match(props.embeddingBackend.description, /litellm\/api/i); + assert.match(props.apiMaxWorkers.description, /endpoint pool/i); + assert.match(schema.description, /apiMaxWorkers=8/i); assert.match(props.path.description, /single file path/i); });