feat: add harness skill with hooks install/uninstall support (#156)

Add multi-session autonomous agent harness with progress checkpointing,
failure recovery, task dependencies, and post-completion self-reflection.

- Add harness module to config.json (copy_dir with hooks.json)
- Add 7 hook scripts: stop, sessionstart, teammateidle, subagentstop,
  claim, renew, self-reflect-stop + shared _harness_common.py
- Fix self-reflect-stop: only triggers when harness was initialized
  (checks harness-tasks.json existence), not on every session
- Add unmerge_hooks_from_settings() to uninstall.py for clean hook removal
- Add unit tests (57 tests) and E2E test (100 tasks + 5 self-reflect)

Generated with SWE-Agent.ai

Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>
This commit is contained in:
cexll
2026-03-01 22:14:16 +08:00
parent 62309d1429
commit 683409464c
14 changed files with 3051 additions and 10 deletions

View File

@@ -0,0 +1,314 @@
#!/usr/bin/env python3
"""Harness Stop hook — blocks Claude from stopping when eligible tasks remain.
Uses `stop_hook_active` field and a consecutive-block counter to prevent
infinite loops. If the hook blocks N times in a row without any task
completing, it allows the stop with a warning.
"""
from __future__ import annotations
import json
import os
import sys
from pathlib import Path
from typing import Any, Optional
MAX_CONSECUTIVE_BLOCKS = 8 # safety valve
def _read_hook_payload() -> dict[str, Any]:
raw = sys.stdin.read()
if not raw.strip():
return {}
try:
data = json.loads(raw)
return data if isinstance(data, dict) else {}
except Exception:
return {"_invalid_json": True}
def _find_harness_root(payload: dict[str, Any]) -> Optional[Path]:
state_root = os.environ.get("HARNESS_STATE_ROOT")
if state_root:
p = Path(state_root)
if (p / "harness-tasks.json").is_file():
try:
return p.resolve()
except Exception:
return p
candidates: list[Path] = []
env_dir = os.environ.get("CLAUDE_PROJECT_DIR")
if env_dir:
candidates.append(Path(env_dir))
cwd = payload.get("cwd") or os.getcwd()
candidates.append(Path(cwd))
seen: set[str] = set()
for base in candidates:
try:
base = base.resolve()
except Exception:
continue
if str(base) in seen:
continue
seen.add(str(base))
for parent in [base, *list(base.parents)[:8]]:
if (parent / "harness-tasks.json").is_file():
return parent
return None
def _load_json(path: Path) -> dict[str, Any]:
with path.open("r", encoding="utf-8") as f:
data = json.load(f)
if not isinstance(data, dict):
raise ValueError(f"{path.name} must be a JSON object")
return data
def _tail_text(path: Path, max_bytes: int = 200_000) -> str:
with path.open("rb") as f:
try:
f.seek(0, os.SEEK_END)
size = f.tell()
f.seek(max(0, size - max_bytes), os.SEEK_SET)
except Exception:
f.seek(0, os.SEEK_SET)
chunk = f.read()
return chunk.decode("utf-8", errors="replace")
def _priority_rank(v: Any) -> int:
return {"P0": 0, "P1": 1, "P2": 2}.get(str(v or ""), 9)
def _deps_completed(t: dict[str, Any], completed: set[str]) -> bool:
deps = t.get("depends_on") or []
if not isinstance(deps, list):
return False
return all(str(d) in completed for d in deps)
def _attempts(t: dict[str, Any]) -> int:
try:
return int(t.get("attempts") or 0)
except Exception:
return 0
def _max_attempts(t: dict[str, Any]) -> int:
try:
v = t.get("max_attempts")
return int(v) if v is not None else 3
except Exception:
return 3
def _pick_next(pending: list[dict[str, Any]], retry: list[dict[str, Any]]) -> Optional[dict[str, Any]]:
def key(t: dict[str, Any]) -> tuple[int, str]:
return (_priority_rank(t.get("priority")), str(t.get("id", "")))
pending.sort(key=key)
retry.sort(key=key)
return pending[0] if pending else (retry[0] if retry else None)
def _block_counter_path(root: Path) -> Path:
return root / ".harness-stop-counter"
def _read_block_counter(root: Path) -> tuple[int, int]:
"""Returns (consecutive_blocks, last_completed_count)."""
p = _block_counter_path(root)
try:
raw = p.read_text("utf-8").strip()
parts = raw.split(",")
return int(parts[0]), int(parts[1]) if len(parts) > 1 else 0
except Exception:
return 0, 0
def _write_block_counter(root: Path, blocks: int, completed: int) -> None:
p = _block_counter_path(root)
tmp = p.with_name(f"{p.name}.tmp.{os.getpid()}")
try:
tmp.write_text(f"{blocks},{completed}", encoding="utf-8")
os.replace(tmp, p)
except Exception:
try:
tmp.unlink(missing_ok=True)
except Exception:
pass
def _reset_block_counter(root: Path) -> None:
p = _block_counter_path(root)
try:
p.unlink(missing_ok=True)
except Exception:
pass
def _is_harness_active(root: Path) -> bool:
"""Check if harness skill is actively running (marker file exists)."""
return (root / ".harness-active").is_file()
def main() -> int:
payload = _read_hook_payload()
# Safety: if stop_hook_active is True, Claude is already continuing
# from a previous Stop hook block. Check if we should allow stop
# to prevent infinite loops.
stop_hook_active = payload.get("stop_hook_active", False)
root = _find_harness_root(payload)
if root is None:
return 0 # no harness project, allow stop
# Guard: only active when harness skill is triggered
if not _is_harness_active(root):
return 0
tasks_path = root / "harness-tasks.json"
progress_path = root / "harness-progress.txt"
try:
state = _load_json(tasks_path)
tasks_raw = state.get("tasks") or []
if not isinstance(tasks_raw, list):
raise ValueError("tasks must be a list")
tasks = [t for t in tasks_raw if isinstance(t, dict)]
except Exception as e:
if stop_hook_active:
sys.stderr.write(
"HARNESS: WARN — harness-tasks.json 无法解析且 stop_hook_active=True"
"为避免无限循环,本次允许停止。\n"
)
return 0
reason = (
"HARNESS: 检测到配置损坏,无法解析 harness-tasks.json。\n"
f"HARNESS: error={e}\n"
"按 SKILL.md 的 JSON corruption 恢复:优先用 harness-tasks.json.bak 还原;无法还原则停止并要求人工修复。"
)
print(json.dumps({"decision": "block", "reason": reason}, ensure_ascii=False))
return 0
session_config = state.get("session_config") or {}
if not isinstance(session_config, dict):
session_config = {}
concurrency_mode = str(session_config.get("concurrency_mode") or "exclusive")
is_concurrent = concurrency_mode == "concurrent"
worker_id = os.environ.get("HARNESS_WORKER_ID") or None
# Check session limits
try:
session_count = int(state.get("session_count") or 0)
except Exception:
session_count = 0
try:
max_sessions = int(session_config.get("max_sessions") or 0)
except Exception:
max_sessions = 0
if max_sessions > 0 and session_count >= max_sessions:
_reset_block_counter(root)
return 0 # session limit reached, allow stop
# Check per-session task limit
try:
max_tasks_per_session = int(session_config.get("max_tasks_per_session") or 0)
except Exception:
max_tasks_per_session = 0
if not is_concurrent and max_tasks_per_session > 0 and session_count > 0 and progress_path.is_file():
tail = _tail_text(progress_path)
tag = f"[SESSION-{session_count}]"
finished = 0
for ln in tail.splitlines():
if tag not in ln:
continue
if " Completed [" in ln or (" ERROR [" in ln and "[task-" in ln):
finished += 1
if finished >= max_tasks_per_session:
_reset_block_counter(root)
return 0 # per-session limit reached, allow stop
# Compute eligible tasks
counts: dict[str, int] = {}
for t in tasks:
s = str(t.get("status") or "pending")
counts[s] = counts.get(s, 0) + 1
completed_ids = {str(t.get("id", "")) for t in tasks if str(t.get("status", "")) == "completed"}
completed_count = len(completed_ids)
pending_eligible = [t for t in tasks if str(t.get("status", "")) == "pending" and _deps_completed(t, completed_ids)]
retryable = [
t for t in tasks
if str(t.get("status", "")) == "failed"
and _attempts(t) < _max_attempts(t)
and _deps_completed(t, completed_ids)
]
in_progress_any = [t for t in tasks if str(t.get("status", "")) == "in_progress"]
if is_concurrent and worker_id:
in_progress_blocking = [
t for t in in_progress_any
if str(t.get("claimed_by") or "") == worker_id or not t.get("claimed_by")
]
else:
in_progress_blocking = in_progress_any
# If nothing left to do, allow stop
if not pending_eligible and not retryable and not in_progress_blocking:
_reset_block_counter(root)
try:
(root / ".harness-active").unlink(missing_ok=True)
except Exception:
pass
return 0
# Safety valve: track consecutive blocks without progress
prev_blocks, prev_completed = _read_block_counter(root)
if completed_count > prev_completed:
# Progress was made, reset counter
prev_blocks = 0
consecutive = prev_blocks + 1
_write_block_counter(root, consecutive, completed_count)
if stop_hook_active and consecutive > MAX_CONSECUTIVE_BLOCKS:
# Too many consecutive blocks without progress — allow stop to prevent infinite loop
_reset_block_counter(root)
sys.stderr.write(
f"HARNESS: WARN — Stop hook blocked {consecutive} times without progress. "
"Allowing stop to prevent infinite loop. Check task definitions and validation commands.\n"
)
return 0
# Block the stop — tasks remain
next_task = _pick_next(pending_eligible, retryable)
next_hint = ""
if next_task is not None:
tid = str(next_task.get("id") or "")
title = str(next_task.get("title") or "").strip()
next_hint = f"next={tid}{(': ' + title) if title else ''}"
summary = (
"HARNESS: 未满足停止条件,继续执行。\n"
+ "HARNESS: "
+ " ".join(f"{k}={v}" for k, v in sorted(counts.items()))
+ f" total={len(tasks)}"
+ (f" {next_hint}" if next_hint else "")
).strip()
reason = (
summary
+ "\n"
+ "请按 SKILL.md 的 Task Selection Algorithm 选择下一个 eligible 任务,并完整执行 Task Execution Cycle"
"Claim → Checkpoint → Validate → Record outcome → STATS如需→ Continue。"
)
print(json.dumps({"decision": "block", "reason": reason}, ensure_ascii=False))
return 0
if __name__ == "__main__":
raise SystemExit(main())