Compare commits

..

1 Commits

Author SHA1 Message Date
cexll
08877af530 fix(harness): replace implicit .harness-active absence with explicit .harness-reflect marker for self-reflect triggering
Fixes two bugs:
- False positive: stale harness-tasks.json from previous runs triggered self-reflect when harness wasn't active
- False negative: self-reflect didn't trigger after harness completed all tasks

Generated with SWE-Agent.ai

Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>
2026-03-02 00:30:34 +08:00
3 changed files with 54 additions and 14 deletions

View File

@@ -260,6 +260,11 @@ def main() -> int:
# If nothing left to do, allow stop # If nothing left to do, allow stop
if not pending_eligible and not retryable and not in_progress_blocking: if not pending_eligible and not retryable and not in_progress_blocking:
_reset_block_counter(root) _reset_block_counter(root)
# Signal self-reflect hook BEFORE removing active marker
try:
(root / ".harness-reflect").touch()
except Exception:
pass
try: try:
(root / ".harness-active").unlink(missing_ok=True) (root / ".harness-active").unlink(missing_ok=True)
except Exception: except Exception:

View File

@@ -146,13 +146,15 @@ def main() -> int:
if not session_id: if not session_id:
return 0 # 无 session_id放行 return 0 # 无 session_id放行
# 守卫 1harness 从未初始化过 → 完全不触发自 # 守卫:仅当 harness 完成所有任务后(.harness-reflect 存在)才触发自
# 这避免了两个问题:
# 1. 历史残留的 harness-tasks.json 导致误触发false positive
# 2. harness-stop.py 移除 .harness-active 后 Claude Code 跳过后续 hookfalse negative
root = _find_harness_root(payload) root = _find_harness_root(payload)
if root is None: if root is None:
return 0 # harness 未曾使用,不触发自省 return 0
# 守卫 2harness 仍活跃 → 由 harness-stop.py 全权管理 if not (root / ".harness-reflect").is_file():
if (root / ".harness-active").is_file():
return 0 return 0
# 读取最大迭代次数 # 读取最大迭代次数
@@ -168,8 +170,12 @@ def main() -> int:
# 读取当前计数 # 读取当前计数
count = _read_counter(session_id) count = _read_counter(session_id)
# 超过最大次数,放行 # 超过最大次数,清理 marker 并放行
if count >= max_iter: if count >= max_iter:
try:
(root / ".harness-reflect").unlink(missing_ok=True)
except Exception:
pass
return 0 return 0
# 递增计数 # 递增计数

View File

@@ -200,7 +200,7 @@ class TestStopHookTaskLogic(unittest.TestCase):
return {"cwd": self.tmpdir, **extra} return {"cwd": self.tmpdir, **extra}
def test_all_completed_allows_stop(self): def test_all_completed_allows_stop(self):
"""When all tasks are completed, stop is allowed.""" """When all tasks are completed, stop is allowed and .harness-reflect created."""
write_tasks(self.root, [ write_tasks(self.root, [
{"id": "t1", "status": "completed"}, {"id": "t1", "status": "completed"},
{"id": "t2", "status": "completed"}, {"id": "t2", "status": "completed"},
@@ -209,6 +209,10 @@ class TestStopHookTaskLogic(unittest.TestCase):
self.assertEqual(code, 0) self.assertEqual(code, 0)
self.assertEqual(stdout, "") self.assertEqual(stdout, "")
self.assertFalse((self.root / ".harness-active").exists()) self.assertFalse((self.root / ".harness-active").exists())
self.assertTrue(
(self.root / ".harness-reflect").exists(),
".harness-reflect should be created when all tasks complete",
)
def test_pending_with_unmet_deps_allows_stop(self): def test_pending_with_unmet_deps_allows_stop(self):
"""Pending tasks with unmet dependencies don't block stop.""" """Pending tasks with unmet dependencies don't block stop."""
@@ -644,7 +648,7 @@ REFLECT_HOOK = HOOKS_DIR / "self-reflect-stop.py"
class TestSelfReflectStopHook(unittest.TestCase): class TestSelfReflectStopHook(unittest.TestCase):
"""self-reflect-stop.py must only trigger when harness was used and completed.""" """self-reflect-stop.py must only trigger when .harness-reflect marker exists."""
def setUp(self): def setUp(self):
self.tmpdir = tempfile.mkdtemp() self.tmpdir = tempfile.mkdtemp()
@@ -663,14 +667,18 @@ class TestSelfReflectStopHook(unittest.TestCase):
def _payload(self, session_id="test-reflect-001", **extra): def _payload(self, session_id="test-reflect-001", **extra):
return {"cwd": self.tmpdir, "session_id": session_id, **extra} return {"cwd": self.tmpdir, "session_id": session_id, **extra}
def _set_reflect(self):
"""Create .harness-reflect marker (simulates harness completion)."""
(self.root / ".harness-reflect").touch()
def test_no_harness_root_is_noop(self): def test_no_harness_root_is_noop(self):
"""When harness-tasks.json doesn't exist, hook is a complete no-op.""" """When harness-tasks.json doesn't exist, hook is a complete no-op."""
code, stdout, stderr = run_hook(REFLECT_HOOK, self._payload()) code, stdout, stderr = run_hook(REFLECT_HOOK, self._payload())
self.assertEqual(code, 0) self.assertEqual(code, 0)
self.assertEqual(stdout, "", "Should produce no output when harness never used") self.assertEqual(stdout, "", "Should produce no output when harness never used")
def test_harness_active_defers(self): def test_harness_active_no_reflect_marker(self):
"""When .harness-active exists, hook defers to harness-stop.py.""" """When .harness-active exists but no .harness-reflect, hook is no-op."""
write_tasks(self.root, [ write_tasks(self.root, [
{"id": "t1", "status": "pending", "depends_on": []}, {"id": "t1", "status": "pending", "depends_on": []},
]) ])
@@ -679,12 +687,24 @@ class TestSelfReflectStopHook(unittest.TestCase):
self.assertEqual(code, 0) self.assertEqual(code, 0)
self.assertEqual(stdout, "", "Should not self-reflect while harness is active") self.assertEqual(stdout, "", "Should not self-reflect while harness is active")
def test_harness_completed_triggers_reflection(self): def test_stale_tasks_without_reflect_marker_is_noop(self):
"""When harness-tasks.json exists but .harness-active removed, triggers self-reflection.""" """Stale harness-tasks.json without .harness-reflect does NOT trigger (fixes false positive)."""
write_tasks(self.root, [ write_tasks(self.root, [
{"id": "t1", "status": "completed"}, {"id": "t1", "status": "completed"},
]) ])
deactivate(self.root) deactivate(self.root)
# No .harness-reflect marker — this is a stale file from a previous run
code, stdout, _ = run_hook(REFLECT_HOOK, self._payload(session_id="test-stale"))
self.assertEqual(code, 0)
self.assertEqual(stdout, "", "Stale harness-tasks.json should NOT trigger self-reflect")
def test_harness_completed_triggers_reflection(self):
"""When .harness-reflect marker exists, triggers self-reflection."""
write_tasks(self.root, [
{"id": "t1", "status": "completed"},
])
deactivate(self.root)
self._set_reflect()
sid = "test-reflect-trigger" sid = "test-reflect-trigger"
code, stdout, _ = run_hook(REFLECT_HOOK, self._payload(session_id=sid)) code, stdout, _ = run_hook(REFLECT_HOOK, self._payload(session_id=sid))
self.assertEqual(code, 0) self.assertEqual(code, 0)
@@ -696,6 +716,7 @@ class TestSelfReflectStopHook(unittest.TestCase):
"""Each invocation increments the iteration counter.""" """Each invocation increments the iteration counter."""
write_tasks(self.root, [{"id": "t1", "status": "completed"}]) write_tasks(self.root, [{"id": "t1", "status": "completed"}])
deactivate(self.root) deactivate(self.root)
self._set_reflect()
sid = "test-reflect-counter" sid = "test-reflect-counter"
# First call: iteration 1 # First call: iteration 1
@@ -708,10 +729,11 @@ class TestSelfReflectStopHook(unittest.TestCase):
data = json.loads(stdout) data = json.loads(stdout)
self.assertIn("2/5", data["reason"]) self.assertIn("2/5", data["reason"])
def test_max_iterations_allows_stop(self): def test_max_iterations_allows_stop_and_cleans_marker(self):
"""After max iterations, hook allows stop (no output).""" """After max iterations, hook allows stop and removes .harness-reflect."""
write_tasks(self.root, [{"id": "t1", "status": "completed"}]) write_tasks(self.root, [{"id": "t1", "status": "completed"}])
deactivate(self.root) deactivate(self.root)
self._set_reflect()
sid = "test-reflect-max" sid = "test-reflect-max"
# Write counter at max # Write counter at max
@@ -721,11 +743,16 @@ class TestSelfReflectStopHook(unittest.TestCase):
code, stdout, _ = run_hook(REFLECT_HOOK, self._payload(session_id=sid)) code, stdout, _ = run_hook(REFLECT_HOOK, self._payload(session_id=sid))
self.assertEqual(code, 0) self.assertEqual(code, 0)
self.assertEqual(stdout, "", "Should allow stop after max iterations") self.assertEqual(stdout, "", "Should allow stop after max iterations")
self.assertFalse(
(self.root / ".harness-reflect").exists(),
".harness-reflect should be cleaned up after max iterations",
)
def test_disabled_via_env(self): def test_disabled_via_env(self):
"""REFLECT_MAX_ITERATIONS=0 disables self-reflection.""" """REFLECT_MAX_ITERATIONS=0 disables self-reflection."""
write_tasks(self.root, [{"id": "t1", "status": "completed"}]) write_tasks(self.root, [{"id": "t1", "status": "completed"}])
deactivate(self.root) deactivate(self.root)
self._set_reflect()
code, stdout, _ = run_hook( code, stdout, _ = run_hook(
REFLECT_HOOK, REFLECT_HOOK,
self._payload(session_id="test-reflect-disabled"), self._payload(session_id="test-reflect-disabled"),
@@ -738,6 +765,7 @@ class TestSelfReflectStopHook(unittest.TestCase):
"""Missing session_id makes hook a no-op.""" """Missing session_id makes hook a no-op."""
write_tasks(self.root, [{"id": "t1", "status": "completed"}]) write_tasks(self.root, [{"id": "t1", "status": "completed"}])
deactivate(self.root) deactivate(self.root)
self._set_reflect()
code, stdout, _ = run_hook(REFLECT_HOOK, {"cwd": self.tmpdir}) code, stdout, _ = run_hook(REFLECT_HOOK, {"cwd": self.tmpdir})
self.assertEqual(code, 0) self.assertEqual(code, 0)
self.assertEqual(stdout, "") self.assertEqual(stdout, "")
@@ -745,7 +773,7 @@ class TestSelfReflectStopHook(unittest.TestCase):
def test_empty_stdin_no_crash(self): def test_empty_stdin_no_crash(self):
"""Empty stdin doesn't crash.""" """Empty stdin doesn't crash."""
write_tasks(self.root, [{"id": "t1", "status": "completed"}]) write_tasks(self.root, [{"id": "t1", "status": "completed"}])
activate(self.root) self._set_reflect()
proc = subprocess.run( proc = subprocess.run(
[sys.executable, str(REFLECT_HOOK)], [sys.executable, str(REFLECT_HOOK)],
input="", input="",
@@ -760,6 +788,7 @@ class TestSelfReflectStopHook(unittest.TestCase):
"""HARNESS_STATE_ROOT env var is used for root discovery.""" """HARNESS_STATE_ROOT env var is used for root discovery."""
write_tasks(self.root, [{"id": "t1", "status": "completed"}]) write_tasks(self.root, [{"id": "t1", "status": "completed"}])
deactivate(self.root) deactivate(self.root)
self._set_reflect()
sid = "test-reflect-env" sid = "test-reflect-env"
code, stdout, _ = run_hook( code, stdout, _ = run_hook(
REFLECT_HOOK, REFLECT_HOOK,