mirror of
https://github.com/cexll/myclaude.git
synced 2026-03-03 15:33:14 +08:00
Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
64886d36e0 | ||
|
|
08877af530 |
@@ -545,7 +545,7 @@ func runParallelMode(cmd *cobra.Command, args []string, opts *cliOptions, v *vip
|
|||||||
results[i].CoverageNum = extractCoverageNum(results[i].Coverage)
|
results[i].CoverageNum = extractCoverageNum(results[i].Coverage)
|
||||||
results[i].FilesChanged = extractFilesChangedFromLines(lines)
|
results[i].FilesChanged = extractFilesChangedFromLines(lines)
|
||||||
results[i].TestsPassed, results[i].TestsFailed = extractTestResultsFromLines(lines)
|
results[i].TestsPassed, results[i].TestsFailed = extractTestResultsFromLines(lines)
|
||||||
results[i].KeyOutput = extractKeyOutputFromLines(lines, 150)
|
results[i].KeyOutput = extractKeyOutputFromLines(lines, 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := writeStructuredOutput(outputPath, results); err != nil {
|
if err := writeStructuredOutput(outputPath, results); err != nil {
|
||||||
|
|||||||
@@ -479,11 +479,19 @@ func extractNumberBefore(s string, idx int) int {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// extractKeyOutputFromLines extracts key output from pre-split lines.
|
// extractKeyOutputFromLines extracts key output from pre-split lines.
|
||||||
|
// maxLen=0 means unlimited; maxLen<0 returns "".
|
||||||
func extractKeyOutputFromLines(lines []string, maxLen int) string {
|
func extractKeyOutputFromLines(lines []string, maxLen int) string {
|
||||||
if len(lines) == 0 || maxLen <= 0 {
|
if len(lines) == 0 || maxLen < 0 {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mayTruncate := func(s string) string {
|
||||||
|
if maxLen == 0 {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
return safeTruncate(s, maxLen)
|
||||||
|
}
|
||||||
|
|
||||||
// Priority 1: Look for explicit summary lines
|
// Priority 1: Look for explicit summary lines
|
||||||
for _, line := range lines {
|
for _, line := range lines {
|
||||||
line = strings.TrimSpace(line)
|
line = strings.TrimSpace(line)
|
||||||
@@ -498,7 +506,7 @@ func extractKeyOutputFromLines(lines []string, maxLen int) string {
|
|||||||
}
|
}
|
||||||
content = strings.TrimSpace(content)
|
content = strings.TrimSpace(content)
|
||||||
if len(content) > 0 {
|
if len(content) > 0 {
|
||||||
return safeTruncate(content, maxLen)
|
return mayTruncate(content)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -514,10 +522,10 @@ func extractKeyOutputFromLines(lines []string, maxLen int) string {
|
|||||||
if len(line) < 20 {
|
if len(line) < 20 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
return safeTruncate(line, maxLen)
|
return mayTruncate(line)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fallback: truncate entire message
|
// Fallback: return entire message
|
||||||
clean := strings.TrimSpace(strings.Join(lines, "\n"))
|
clean := strings.TrimSpace(strings.Join(lines, "\n"))
|
||||||
return safeTruncate(clean, maxLen)
|
return mayTruncate(clean)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -122,6 +122,33 @@ func TestSafeTruncate(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestExtractKeyOutputFromLines(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
lines []string
|
||||||
|
maxLen int
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{"empty lines returns empty", []string{}, 0, ""},
|
||||||
|
{"negative maxLen returns empty", []string{"hello world"}, -1, ""},
|
||||||
|
{"unlimited: summary prefix returned in full", []string{"Summary: all done with a very long description that exceeds 150 characters and should not be cut off at all"}, 0, "all done with a very long description that exceeds 150 characters and should not be cut off at all"},
|
||||||
|
{"unlimited: first meaningful line returned in full", []string{"this is a meaningful line that is longer than twenty chars and should not be truncated"}, 0, "this is a meaningful line that is longer than twenty chars and should not be truncated"},
|
||||||
|
{"unlimited: skips noise lines", []string{"---", "# heading", "short", "this is a meaningful line longer than twenty chars"}, 0, "this is a meaningful line longer than twenty chars"},
|
||||||
|
{"limited: summary prefix truncated", []string{"Summary: hello world truncated"}, 10, "hello w..."},
|
||||||
|
{"limited: first meaningful line truncated", []string{"this is a long meaningful line that will be truncated"}, 20, "this is a long me..."},
|
||||||
|
{"unlimited: fallback joins all lines", []string{"short", "also short"}, 0, "short\nalso short"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
got := extractKeyOutputFromLines(tt.lines, tt.maxLen)
|
||||||
|
if got != tt.want {
|
||||||
|
t.Fatalf("extractKeyOutputFromLines(%v, %d) = %q, want %q", tt.lines, tt.maxLen, got, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestSanitizeOutput(t *testing.T) {
|
func TestSanitizeOutput(t *testing.T) {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
|
|||||||
@@ -260,6 +260,11 @@ def main() -> int:
|
|||||||
# If nothing left to do, allow stop
|
# If nothing left to do, allow stop
|
||||||
if not pending_eligible and not retryable and not in_progress_blocking:
|
if not pending_eligible and not retryable and not in_progress_blocking:
|
||||||
_reset_block_counter(root)
|
_reset_block_counter(root)
|
||||||
|
# Signal self-reflect hook BEFORE removing active marker
|
||||||
|
try:
|
||||||
|
(root / ".harness-reflect").touch()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
try:
|
try:
|
||||||
(root / ".harness-active").unlink(missing_ok=True)
|
(root / ".harness-active").unlink(missing_ok=True)
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|||||||
@@ -146,13 +146,15 @@ def main() -> int:
|
|||||||
if not session_id:
|
if not session_id:
|
||||||
return 0 # 无 session_id,放行
|
return 0 # 无 session_id,放行
|
||||||
|
|
||||||
# 守卫 1:harness 从未初始化过 → 完全不触发自检
|
# 守卫:仅当 harness 完成所有任务后(.harness-reflect 存在)才触发自省
|
||||||
|
# 这避免了两个问题:
|
||||||
|
# 1. 历史残留的 harness-tasks.json 导致误触发(false positive)
|
||||||
|
# 2. harness-stop.py 移除 .harness-active 后 Claude Code 跳过后续 hook(false negative)
|
||||||
root = _find_harness_root(payload)
|
root = _find_harness_root(payload)
|
||||||
if root is None:
|
if root is None:
|
||||||
return 0 # harness 未曾使用,不触发自省
|
return 0
|
||||||
|
|
||||||
# 守卫 2:harness 仍活跃 → 由 harness-stop.py 全权管理
|
if not (root / ".harness-reflect").is_file():
|
||||||
if (root / ".harness-active").is_file():
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
# 读取最大迭代次数
|
# 读取最大迭代次数
|
||||||
@@ -168,8 +170,12 @@ def main() -> int:
|
|||||||
# 读取当前计数
|
# 读取当前计数
|
||||||
count = _read_counter(session_id)
|
count = _read_counter(session_id)
|
||||||
|
|
||||||
# 超过最大次数,放行
|
# 超过最大次数,清理 marker 并放行
|
||||||
if count >= max_iter:
|
if count >= max_iter:
|
||||||
|
try:
|
||||||
|
(root / ".harness-reflect").unlink(missing_ok=True)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
# 递增计数
|
# 递增计数
|
||||||
|
|||||||
@@ -200,7 +200,7 @@ class TestStopHookTaskLogic(unittest.TestCase):
|
|||||||
return {"cwd": self.tmpdir, **extra}
|
return {"cwd": self.tmpdir, **extra}
|
||||||
|
|
||||||
def test_all_completed_allows_stop(self):
|
def test_all_completed_allows_stop(self):
|
||||||
"""When all tasks are completed, stop is allowed."""
|
"""When all tasks are completed, stop is allowed and .harness-reflect created."""
|
||||||
write_tasks(self.root, [
|
write_tasks(self.root, [
|
||||||
{"id": "t1", "status": "completed"},
|
{"id": "t1", "status": "completed"},
|
||||||
{"id": "t2", "status": "completed"},
|
{"id": "t2", "status": "completed"},
|
||||||
@@ -209,6 +209,10 @@ class TestStopHookTaskLogic(unittest.TestCase):
|
|||||||
self.assertEqual(code, 0)
|
self.assertEqual(code, 0)
|
||||||
self.assertEqual(stdout, "")
|
self.assertEqual(stdout, "")
|
||||||
self.assertFalse((self.root / ".harness-active").exists())
|
self.assertFalse((self.root / ".harness-active").exists())
|
||||||
|
self.assertTrue(
|
||||||
|
(self.root / ".harness-reflect").exists(),
|
||||||
|
".harness-reflect should be created when all tasks complete",
|
||||||
|
)
|
||||||
|
|
||||||
def test_pending_with_unmet_deps_allows_stop(self):
|
def test_pending_with_unmet_deps_allows_stop(self):
|
||||||
"""Pending tasks with unmet dependencies don't block stop."""
|
"""Pending tasks with unmet dependencies don't block stop."""
|
||||||
@@ -644,7 +648,7 @@ REFLECT_HOOK = HOOKS_DIR / "self-reflect-stop.py"
|
|||||||
|
|
||||||
|
|
||||||
class TestSelfReflectStopHook(unittest.TestCase):
|
class TestSelfReflectStopHook(unittest.TestCase):
|
||||||
"""self-reflect-stop.py must only trigger when harness was used and completed."""
|
"""self-reflect-stop.py must only trigger when .harness-reflect marker exists."""
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.tmpdir = tempfile.mkdtemp()
|
self.tmpdir = tempfile.mkdtemp()
|
||||||
@@ -663,14 +667,18 @@ class TestSelfReflectStopHook(unittest.TestCase):
|
|||||||
def _payload(self, session_id="test-reflect-001", **extra):
|
def _payload(self, session_id="test-reflect-001", **extra):
|
||||||
return {"cwd": self.tmpdir, "session_id": session_id, **extra}
|
return {"cwd": self.tmpdir, "session_id": session_id, **extra}
|
||||||
|
|
||||||
|
def _set_reflect(self):
|
||||||
|
"""Create .harness-reflect marker (simulates harness completion)."""
|
||||||
|
(self.root / ".harness-reflect").touch()
|
||||||
|
|
||||||
def test_no_harness_root_is_noop(self):
|
def test_no_harness_root_is_noop(self):
|
||||||
"""When harness-tasks.json doesn't exist, hook is a complete no-op."""
|
"""When harness-tasks.json doesn't exist, hook is a complete no-op."""
|
||||||
code, stdout, stderr = run_hook(REFLECT_HOOK, self._payload())
|
code, stdout, stderr = run_hook(REFLECT_HOOK, self._payload())
|
||||||
self.assertEqual(code, 0)
|
self.assertEqual(code, 0)
|
||||||
self.assertEqual(stdout, "", "Should produce no output when harness never used")
|
self.assertEqual(stdout, "", "Should produce no output when harness never used")
|
||||||
|
|
||||||
def test_harness_active_defers(self):
|
def test_harness_active_no_reflect_marker(self):
|
||||||
"""When .harness-active exists, hook defers to harness-stop.py."""
|
"""When .harness-active exists but no .harness-reflect, hook is no-op."""
|
||||||
write_tasks(self.root, [
|
write_tasks(self.root, [
|
||||||
{"id": "t1", "status": "pending", "depends_on": []},
|
{"id": "t1", "status": "pending", "depends_on": []},
|
||||||
])
|
])
|
||||||
@@ -679,12 +687,24 @@ class TestSelfReflectStopHook(unittest.TestCase):
|
|||||||
self.assertEqual(code, 0)
|
self.assertEqual(code, 0)
|
||||||
self.assertEqual(stdout, "", "Should not self-reflect while harness is active")
|
self.assertEqual(stdout, "", "Should not self-reflect while harness is active")
|
||||||
|
|
||||||
def test_harness_completed_triggers_reflection(self):
|
def test_stale_tasks_without_reflect_marker_is_noop(self):
|
||||||
"""When harness-tasks.json exists but .harness-active removed, triggers self-reflection."""
|
"""Stale harness-tasks.json without .harness-reflect does NOT trigger (fixes false positive)."""
|
||||||
write_tasks(self.root, [
|
write_tasks(self.root, [
|
||||||
{"id": "t1", "status": "completed"},
|
{"id": "t1", "status": "completed"},
|
||||||
])
|
])
|
||||||
deactivate(self.root)
|
deactivate(self.root)
|
||||||
|
# No .harness-reflect marker — this is a stale file from a previous run
|
||||||
|
code, stdout, _ = run_hook(REFLECT_HOOK, self._payload(session_id="test-stale"))
|
||||||
|
self.assertEqual(code, 0)
|
||||||
|
self.assertEqual(stdout, "", "Stale harness-tasks.json should NOT trigger self-reflect")
|
||||||
|
|
||||||
|
def test_harness_completed_triggers_reflection(self):
|
||||||
|
"""When .harness-reflect marker exists, triggers self-reflection."""
|
||||||
|
write_tasks(self.root, [
|
||||||
|
{"id": "t1", "status": "completed"},
|
||||||
|
])
|
||||||
|
deactivate(self.root)
|
||||||
|
self._set_reflect()
|
||||||
sid = "test-reflect-trigger"
|
sid = "test-reflect-trigger"
|
||||||
code, stdout, _ = run_hook(REFLECT_HOOK, self._payload(session_id=sid))
|
code, stdout, _ = run_hook(REFLECT_HOOK, self._payload(session_id=sid))
|
||||||
self.assertEqual(code, 0)
|
self.assertEqual(code, 0)
|
||||||
@@ -696,6 +716,7 @@ class TestSelfReflectStopHook(unittest.TestCase):
|
|||||||
"""Each invocation increments the iteration counter."""
|
"""Each invocation increments the iteration counter."""
|
||||||
write_tasks(self.root, [{"id": "t1", "status": "completed"}])
|
write_tasks(self.root, [{"id": "t1", "status": "completed"}])
|
||||||
deactivate(self.root)
|
deactivate(self.root)
|
||||||
|
self._set_reflect()
|
||||||
sid = "test-reflect-counter"
|
sid = "test-reflect-counter"
|
||||||
|
|
||||||
# First call: iteration 1
|
# First call: iteration 1
|
||||||
@@ -708,10 +729,11 @@ class TestSelfReflectStopHook(unittest.TestCase):
|
|||||||
data = json.loads(stdout)
|
data = json.loads(stdout)
|
||||||
self.assertIn("2/5", data["reason"])
|
self.assertIn("2/5", data["reason"])
|
||||||
|
|
||||||
def test_max_iterations_allows_stop(self):
|
def test_max_iterations_allows_stop_and_cleans_marker(self):
|
||||||
"""After max iterations, hook allows stop (no output)."""
|
"""After max iterations, hook allows stop and removes .harness-reflect."""
|
||||||
write_tasks(self.root, [{"id": "t1", "status": "completed"}])
|
write_tasks(self.root, [{"id": "t1", "status": "completed"}])
|
||||||
deactivate(self.root)
|
deactivate(self.root)
|
||||||
|
self._set_reflect()
|
||||||
sid = "test-reflect-max"
|
sid = "test-reflect-max"
|
||||||
|
|
||||||
# Write counter at max
|
# Write counter at max
|
||||||
@@ -721,11 +743,16 @@ class TestSelfReflectStopHook(unittest.TestCase):
|
|||||||
code, stdout, _ = run_hook(REFLECT_HOOK, self._payload(session_id=sid))
|
code, stdout, _ = run_hook(REFLECT_HOOK, self._payload(session_id=sid))
|
||||||
self.assertEqual(code, 0)
|
self.assertEqual(code, 0)
|
||||||
self.assertEqual(stdout, "", "Should allow stop after max iterations")
|
self.assertEqual(stdout, "", "Should allow stop after max iterations")
|
||||||
|
self.assertFalse(
|
||||||
|
(self.root / ".harness-reflect").exists(),
|
||||||
|
".harness-reflect should be cleaned up after max iterations",
|
||||||
|
)
|
||||||
|
|
||||||
def test_disabled_via_env(self):
|
def test_disabled_via_env(self):
|
||||||
"""REFLECT_MAX_ITERATIONS=0 disables self-reflection."""
|
"""REFLECT_MAX_ITERATIONS=0 disables self-reflection."""
|
||||||
write_tasks(self.root, [{"id": "t1", "status": "completed"}])
|
write_tasks(self.root, [{"id": "t1", "status": "completed"}])
|
||||||
deactivate(self.root)
|
deactivate(self.root)
|
||||||
|
self._set_reflect()
|
||||||
code, stdout, _ = run_hook(
|
code, stdout, _ = run_hook(
|
||||||
REFLECT_HOOK,
|
REFLECT_HOOK,
|
||||||
self._payload(session_id="test-reflect-disabled"),
|
self._payload(session_id="test-reflect-disabled"),
|
||||||
@@ -738,6 +765,7 @@ class TestSelfReflectStopHook(unittest.TestCase):
|
|||||||
"""Missing session_id makes hook a no-op."""
|
"""Missing session_id makes hook a no-op."""
|
||||||
write_tasks(self.root, [{"id": "t1", "status": "completed"}])
|
write_tasks(self.root, [{"id": "t1", "status": "completed"}])
|
||||||
deactivate(self.root)
|
deactivate(self.root)
|
||||||
|
self._set_reflect()
|
||||||
code, stdout, _ = run_hook(REFLECT_HOOK, {"cwd": self.tmpdir})
|
code, stdout, _ = run_hook(REFLECT_HOOK, {"cwd": self.tmpdir})
|
||||||
self.assertEqual(code, 0)
|
self.assertEqual(code, 0)
|
||||||
self.assertEqual(stdout, "")
|
self.assertEqual(stdout, "")
|
||||||
@@ -745,7 +773,7 @@ class TestSelfReflectStopHook(unittest.TestCase):
|
|||||||
def test_empty_stdin_no_crash(self):
|
def test_empty_stdin_no_crash(self):
|
||||||
"""Empty stdin doesn't crash."""
|
"""Empty stdin doesn't crash."""
|
||||||
write_tasks(self.root, [{"id": "t1", "status": "completed"}])
|
write_tasks(self.root, [{"id": "t1", "status": "completed"}])
|
||||||
activate(self.root)
|
self._set_reflect()
|
||||||
proc = subprocess.run(
|
proc = subprocess.run(
|
||||||
[sys.executable, str(REFLECT_HOOK)],
|
[sys.executable, str(REFLECT_HOOK)],
|
||||||
input="",
|
input="",
|
||||||
@@ -760,6 +788,7 @@ class TestSelfReflectStopHook(unittest.TestCase):
|
|||||||
"""HARNESS_STATE_ROOT env var is used for root discovery."""
|
"""HARNESS_STATE_ROOT env var is used for root discovery."""
|
||||||
write_tasks(self.root, [{"id": "t1", "status": "completed"}])
|
write_tasks(self.root, [{"id": "t1", "status": "completed"}])
|
||||||
deactivate(self.root)
|
deactivate(self.root)
|
||||||
|
self._set_reflect()
|
||||||
sid = "test-reflect-env"
|
sid = "test-reflect-env"
|
||||||
code, stdout, _ = run_hook(
|
code, stdout, _ = run_hook(
|
||||||
REFLECT_HOOK,
|
REFLECT_HOOK,
|
||||||
|
|||||||
Reference in New Issue
Block a user