diff --git a/skills/harness/hooks/harness-stop.py b/skills/harness/hooks/harness-stop.py index 909c491..58cedd5 100755 --- a/skills/harness/hooks/harness-stop.py +++ b/skills/harness/hooks/harness-stop.py @@ -260,6 +260,11 @@ def main() -> int: # If nothing left to do, allow stop if not pending_eligible and not retryable and not in_progress_blocking: _reset_block_counter(root) + # Signal self-reflect hook BEFORE removing active marker + try: + (root / ".harness-reflect").touch() + except Exception: + pass try: (root / ".harness-active").unlink(missing_ok=True) except Exception: diff --git a/skills/harness/hooks/self-reflect-stop.py b/skills/harness/hooks/self-reflect-stop.py index 94344b3..c07c011 100644 --- a/skills/harness/hooks/self-reflect-stop.py +++ b/skills/harness/hooks/self-reflect-stop.py @@ -146,13 +146,15 @@ def main() -> int: if not session_id: return 0 # 无 session_id,放行 - # 守卫 1:harness 从未初始化过 → 完全不触发自检 + # 守卫:仅当 harness 完成所有任务后(.harness-reflect 存在)才触发自省 + # 这避免了两个问题: + # 1. 历史残留的 harness-tasks.json 导致误触发(false positive) + # 2. harness-stop.py 移除 .harness-active 后 Claude Code 跳过后续 hook(false negative) root = _find_harness_root(payload) if root is None: - return 0 # harness 未曾使用,不触发自省 + return 0 - # 守卫 2:harness 仍活跃 → 由 harness-stop.py 全权管理 - if (root / ".harness-active").is_file(): + if not (root / ".harness-reflect").is_file(): return 0 # 读取最大迭代次数 @@ -168,8 +170,12 @@ def main() -> int: # 读取当前计数 count = _read_counter(session_id) - # 超过最大次数,放行 + # 超过最大次数,清理 marker 并放行 if count >= max_iter: + try: + (root / ".harness-reflect").unlink(missing_ok=True) + except Exception: + pass return 0 # 递增计数 diff --git a/skills/harness/tests/test_hooks.py b/skills/harness/tests/test_hooks.py index 78b4c77..f33d72e 100644 --- a/skills/harness/tests/test_hooks.py +++ b/skills/harness/tests/test_hooks.py @@ -200,7 +200,7 @@ class TestStopHookTaskLogic(unittest.TestCase): return {"cwd": self.tmpdir, **extra} def test_all_completed_allows_stop(self): - """When all tasks are completed, stop is allowed.""" + """When all tasks are completed, stop is allowed and .harness-reflect created.""" write_tasks(self.root, [ {"id": "t1", "status": "completed"}, {"id": "t2", "status": "completed"}, @@ -209,6 +209,10 @@ class TestStopHookTaskLogic(unittest.TestCase): self.assertEqual(code, 0) self.assertEqual(stdout, "") self.assertFalse((self.root / ".harness-active").exists()) + self.assertTrue( + (self.root / ".harness-reflect").exists(), + ".harness-reflect should be created when all tasks complete", + ) def test_pending_with_unmet_deps_allows_stop(self): """Pending tasks with unmet dependencies don't block stop.""" @@ -644,7 +648,7 @@ REFLECT_HOOK = HOOKS_DIR / "self-reflect-stop.py" class TestSelfReflectStopHook(unittest.TestCase): - """self-reflect-stop.py must only trigger when harness was used and completed.""" + """self-reflect-stop.py must only trigger when .harness-reflect marker exists.""" def setUp(self): self.tmpdir = tempfile.mkdtemp() @@ -663,14 +667,18 @@ class TestSelfReflectStopHook(unittest.TestCase): def _payload(self, session_id="test-reflect-001", **extra): return {"cwd": self.tmpdir, "session_id": session_id, **extra} + def _set_reflect(self): + """Create .harness-reflect marker (simulates harness completion).""" + (self.root / ".harness-reflect").touch() + def test_no_harness_root_is_noop(self): """When harness-tasks.json doesn't exist, hook is a complete no-op.""" code, stdout, stderr = run_hook(REFLECT_HOOK, self._payload()) self.assertEqual(code, 0) self.assertEqual(stdout, "", "Should produce no output when harness never used") - def test_harness_active_defers(self): - """When .harness-active exists, hook defers to harness-stop.py.""" + def test_harness_active_no_reflect_marker(self): + """When .harness-active exists but no .harness-reflect, hook is no-op.""" write_tasks(self.root, [ {"id": "t1", "status": "pending", "depends_on": []}, ]) @@ -679,12 +687,24 @@ class TestSelfReflectStopHook(unittest.TestCase): self.assertEqual(code, 0) self.assertEqual(stdout, "", "Should not self-reflect while harness is active") - def test_harness_completed_triggers_reflection(self): - """When harness-tasks.json exists but .harness-active removed, triggers self-reflection.""" + def test_stale_tasks_without_reflect_marker_is_noop(self): + """Stale harness-tasks.json without .harness-reflect does NOT trigger (fixes false positive).""" write_tasks(self.root, [ {"id": "t1", "status": "completed"}, ]) deactivate(self.root) + # No .harness-reflect marker — this is a stale file from a previous run + code, stdout, _ = run_hook(REFLECT_HOOK, self._payload(session_id="test-stale")) + self.assertEqual(code, 0) + self.assertEqual(stdout, "", "Stale harness-tasks.json should NOT trigger self-reflect") + + def test_harness_completed_triggers_reflection(self): + """When .harness-reflect marker exists, triggers self-reflection.""" + write_tasks(self.root, [ + {"id": "t1", "status": "completed"}, + ]) + deactivate(self.root) + self._set_reflect() sid = "test-reflect-trigger" code, stdout, _ = run_hook(REFLECT_HOOK, self._payload(session_id=sid)) self.assertEqual(code, 0) @@ -696,6 +716,7 @@ class TestSelfReflectStopHook(unittest.TestCase): """Each invocation increments the iteration counter.""" write_tasks(self.root, [{"id": "t1", "status": "completed"}]) deactivate(self.root) + self._set_reflect() sid = "test-reflect-counter" # First call: iteration 1 @@ -708,10 +729,11 @@ class TestSelfReflectStopHook(unittest.TestCase): data = json.loads(stdout) self.assertIn("2/5", data["reason"]) - def test_max_iterations_allows_stop(self): - """After max iterations, hook allows stop (no output).""" + def test_max_iterations_allows_stop_and_cleans_marker(self): + """After max iterations, hook allows stop and removes .harness-reflect.""" write_tasks(self.root, [{"id": "t1", "status": "completed"}]) deactivate(self.root) + self._set_reflect() sid = "test-reflect-max" # Write counter at max @@ -721,11 +743,16 @@ class TestSelfReflectStopHook(unittest.TestCase): code, stdout, _ = run_hook(REFLECT_HOOK, self._payload(session_id=sid)) self.assertEqual(code, 0) self.assertEqual(stdout, "", "Should allow stop after max iterations") + self.assertFalse( + (self.root / ".harness-reflect").exists(), + ".harness-reflect should be cleaned up after max iterations", + ) def test_disabled_via_env(self): """REFLECT_MAX_ITERATIONS=0 disables self-reflection.""" write_tasks(self.root, [{"id": "t1", "status": "completed"}]) deactivate(self.root) + self._set_reflect() code, stdout, _ = run_hook( REFLECT_HOOK, self._payload(session_id="test-reflect-disabled"), @@ -738,6 +765,7 @@ class TestSelfReflectStopHook(unittest.TestCase): """Missing session_id makes hook a no-op.""" write_tasks(self.root, [{"id": "t1", "status": "completed"}]) deactivate(self.root) + self._set_reflect() code, stdout, _ = run_hook(REFLECT_HOOK, {"cwd": self.tmpdir}) self.assertEqual(code, 0) self.assertEqual(stdout, "") @@ -745,7 +773,7 @@ class TestSelfReflectStopHook(unittest.TestCase): def test_empty_stdin_no_crash(self): """Empty stdin doesn't crash.""" write_tasks(self.root, [{"id": "t1", "status": "completed"}]) - activate(self.root) + self._set_reflect() proc = subprocess.run( [sys.executable, str(REFLECT_HOOK)], input="", @@ -760,6 +788,7 @@ class TestSelfReflectStopHook(unittest.TestCase): """HARNESS_STATE_ROOT env var is used for root discovery.""" write_tasks(self.root, [{"id": "t1", "status": "completed"}]) deactivate(self.root) + self._set_reflect() sid = "test-reflect-env" code, stdout, _ = run_hook( REFLECT_HOOK,