feat: add harness skill with hooks install/uninstall support (#156)

Add multi-session autonomous agent harness with progress checkpointing, failure recovery, task dependencies, and post-completion self-reflection. - Add harness module to config.json (copy_dir with hooks.json) - Add 7 hook scripts: stop, sessionstart, teammateidle, subagentstop, claim, renew, self-reflect-stop + shared _harness_common.py - Fix self-reflect-stop: only triggers when harness was initialized (checks harness-tasks.json existence), not on every session - Add unmerge_hooks_from_settings() to uninstall.py for clean hook removal - Add unit tests (57 tests) and E2E test (100 tasks + 5 self-reflect) Generated with SWE-Agent.ai Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>
2026-03-02 15:23:16 +08:00 · 2026-03-01 22:14:16 +08:00
parent 62309d1429
commit 683409464c
14 changed files with 3051 additions and 10 deletions
--- a/skills/harness/tests/e2e-100tasks.sh
+++ b/skills/harness/tests/e2e-100tasks.sh
@@ -0,0 +1,178 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# E2E test: 100 harness tasks + 5 self-reflection iterations via claude -p
+# Usage: bash e2e-100tasks.sh
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_DIR="$(mktemp -d /tmp/harness-e2e-XXXXXX)"
+LOG_FILE="${PROJECT_DIR}/test-output.log"
+
+echo "=== Harness E2E Test: 100 tasks + 5 self-reflect ==="
+echo "Project dir: ${PROJECT_DIR}"
+echo ""
+
+# --- 1. Generate harness-tasks.json with 100 trivial tasks ---
+python3 - "${PROJECT_DIR}" <<'PYEOF'
+import json, sys
+
+root = sys.argv[1]
+tasks = []
+for i in range(1, 101):
+    tid = f"task-{i:03d}"
+    tasks.append({
+        "id": tid,
+        "title": f"Create file {tid}.txt",
+        "status": "pending",
+        "priority": "P1",
+        "depends_on": [],
+        "attempts": 0,
+        "max_attempts": 3,
+        "started_at_commit": None,
+        "validation": {
+            "command": f"test -f {tid}.txt && grep -q 'done-{tid}' {tid}.txt",
+            "timeout_seconds": 10
+        },
+        "on_failure": {"cleanup": None},
+        "error_log": [],
+        "checkpoints": [],
+        "completed_at": None
+    })
+
+state = {
+    "version": 2,
+    "created": "2026-03-01T00:00:00Z",
+    "session_config": {
+        "concurrency_mode": "exclusive",
+        "max_tasks_per_session": 100,
+        "max_sessions": 50,
+        "max_reflect_iterations": 5
+    },
+    "tasks": tasks,
+    "session_count": 0,
+    "last_session": None
+}
+
+with open(f"{root}/harness-tasks.json", "w") as f:
+    json.dump(state, f, indent=2, ensure_ascii=False)
+
+print(f"Generated {len(tasks)} tasks")
+PYEOF
+
+# --- 2. Create progress log ---
+touch "${PROJECT_DIR}/harness-progress.txt"
+
+# --- 3. Create .harness-active marker ---
+touch "${PROJECT_DIR}/.harness-active"
+
+# --- 4. Init git repo (required for harness commit tracking) ---
+cd "${PROJECT_DIR}"
+git init -q
+git add harness-tasks.json harness-progress.txt .harness-active
+git commit -q -m "harness init"
+
+echo "Setup complete. Running claude -p ..."
+echo ""
+
+# --- 5. Build the prompt ---
+PROMPT="$(cat <<'PROMPT_EOF'
+You are in a project with a harness setup. Run /harness run to execute all tasks.
+
+The project is at the current working directory. There are 100 tasks in harness-tasks.json.
+Each task requires creating a file: for task-001, create task-001.txt with content "done-task-001".
+
+Execute the harness infinite loop protocol:
+1. Read harness-tasks.json and harness-progress.txt
+2. Pick next eligible task by priority
+3. For each task: create the file with the required content, run validation, mark completed
+4. Continue until all tasks are done
+5. After completion, the self-reflect stop hook will trigger 5 times — complete those iterations
+
+IMPORTANT: Do NOT use any skill tools. Just directly create files and update harness state.
+For efficiency, you can batch multiple file creations in a single command.
+After creating files, update harness-tasks.json to mark them completed.
+Do all work directly — no planning mode, no subagents.
+PROMPT_EOF
+)"
+
+# --- 6. Run claude -p ---
+START_TIME=$(date +%s)
+
+cd "${PROJECT_DIR}"
+unset CLAUDECODE
+REFLECT_MAX_ITERATIONS=5 \
+HARNESS_STATE_ROOT="${PROJECT_DIR}" \
+claude -p "${PROMPT}" \
+  --model sonnet \
+  --dangerously-skip-permissions \
+  --disable-slash-commands \
+  --no-session-persistence \
+  --max-budget-usd 5 \
+  --allowedTools 'Bash(*)' 'Read' 'Write' 'Glob' 'Grep' 'Edit' \
+  2>&1 | tee "${LOG_FILE}"
+
+END_TIME=$(date +%s)
+ELAPSED=$((END_TIME - START_TIME))
+
+echo ""
+echo "=== Test Results ==="
+echo "Duration: ${ELAPSED}s"
+echo ""
+
+# --- 7. Verify results ---
+python3 - "${PROJECT_DIR}" <<'VERIFY_EOF'
+import json, sys, os
+from pathlib import Path
+
+root = Path(sys.argv[1])
+tasks_path = root / "harness-tasks.json"
+progress_path = root / "harness-progress.txt"
+
+# Check task files created
+created = 0
+for i in range(1, 101):
+    tid = f"task-{i:03d}"
+    fpath = root / f"{tid}.txt"
+    if fpath.is_file():
+        content = fpath.read_text().strip()
+        if f"done-{tid}" in content:
+            created += 1
+
+# Check task statuses
+with tasks_path.open() as f:
+    state = json.load(f)
+tasks = state.get("tasks", [])
+completed = sum(1 for t in tasks if t.get("status") == "completed")
+failed = sum(1 for t in tasks if t.get("status") == "failed")
+pending = sum(1 for t in tasks if t.get("status") == "pending")
+in_progress = sum(1 for t in tasks if t.get("status") == "in_progress")
+
+# Check .harness-active removed
+marker_removed = not (root / ".harness-active").is_file()
+
+# Check progress log
+progress_lines = 0
+if progress_path.is_file():
+    progress_lines = len([l for l in progress_path.read_text().splitlines() if l.strip()])
+
+print(f"Files created:     {created}/100")
+print(f"Tasks completed:   {completed}/100")
+print(f"Tasks failed:      {failed}")
+print(f"Tasks pending:     {pending}")
+print(f"Tasks in_progress: {in_progress}")
+print(f"Marker removed:    {marker_removed}")
+print(f"Progress log lines: {progress_lines}")
+print()
+
+if created >= 95 and completed >= 95:
+    print("PASS: >= 95% tasks completed successfully")
+    sys.exit(0)
+else:
+    print(f"PARTIAL: {created} files, {completed} completed")
+    print("Check the log for details")
+    sys.exit(1)
+VERIFY_EOF
+
+echo ""
+echo "Log: ${LOG_FILE}"
+echo "Project: ${PROJECT_DIR}"