fix(codeagent-wrapper): remove 150-char truncation limit for KeyOutput in summary mode

maxLen=0 in extractKeyOutputFromLines now means unlimited, so the orchestrator receives the agent's full concluding summary instead of a 150-character stub. Negative maxLen still returns empty. Structured summary format and log-path-only behaviour are preserved. Generated with SWE-Agent.ai Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>
fix(harness): replace implicit .harness-active absence with explicit .harness-reflect marker for self-reflect triggering
2026-03-03 15:33:14 +08:00 · 2026-03-03 11:12:02 +08:00 · 2026-03-02 00:30:34 +08:00
6 changed files with 95 additions and 20 deletions
--- a/codeagent-wrapper/internal/app/cli.go
+++ b/codeagent-wrapper/internal/app/cli.go
@@ -545,7 +545,7 @@ func runParallelMode(cmd *cobra.Command, args []string, opts *cliOptions, v *vip
 		results[i].CoverageNum = extractCoverageNum(results[i].Coverage)
 		results[i].FilesChanged = extractFilesChangedFromLines(lines)
 		results[i].TestsPassed, results[i].TestsFailed = extractTestResultsFromLines(lines)
-		results[i].KeyOutput = extractKeyOutputFromLines(lines, 150)
+		results[i].KeyOutput = extractKeyOutputFromLines(lines, 0)
 	}

 	if err := writeStructuredOutput(outputPath, results); err != nil {
--- a/codeagent-wrapper/internal/app/utils.go
+++ b/codeagent-wrapper/internal/app/utils.go
@@ -479,11 +479,19 @@ func extractNumberBefore(s string, idx int) int {
 }

 // extractKeyOutputFromLines extracts key output from pre-split lines.
+// maxLen=0 means unlimited; maxLen<0 returns "".
 func extractKeyOutputFromLines(lines []string, maxLen int) string {
-	if len(lines) == 0 || maxLen <= 0 {
+	if len(lines) == 0 || maxLen < 0 {
 		return ""
 	}

+	mayTruncate := func(s string) string {
+		if maxLen == 0 {
+			return s
+		}
+		return safeTruncate(s, maxLen)
+	}
+
 	// Priority 1: Look for explicit summary lines
 	for _, line := range lines {
 		line = strings.TrimSpace(line)
@@ -498,7 +506,7 @@ func extractKeyOutputFromLines(lines []string, maxLen int) string {
 			}
 			content = strings.TrimSpace(content)
 			if len(content) > 0 {
-				return safeTruncate(content, maxLen)
+				return mayTruncate(content)
 			}
 		}
 	}
@@ -514,10 +522,10 @@ func extractKeyOutputFromLines(lines []string, maxLen int) string {
 		if len(line) < 20 {
 			continue
 		}
-		return safeTruncate(line, maxLen)
+		return mayTruncate(line)
 	}

-	// Fallback: truncate entire message
+	// Fallback: return entire message
 	clean := strings.TrimSpace(strings.Join(lines, "\n"))
-	return safeTruncate(clean, maxLen)
+	return mayTruncate(clean)
 }
--- a/codeagent-wrapper/internal/app/utils_test.go
+++ b/codeagent-wrapper/internal/app/utils_test.go
@@ -122,6 +122,33 @@ func TestSafeTruncate(t *testing.T) {
 	}
 }

+func TestExtractKeyOutputFromLines(t *testing.T) {
+	tests := []struct {
+		name   string
+		lines  []string
+		maxLen int
+		want   string
+	}{
+		{"empty lines returns empty", []string{}, 0, ""},
+		{"negative maxLen returns empty", []string{"hello world"}, -1, ""},
+		{"unlimited: summary prefix returned in full", []string{"Summary: all done with a very long description that exceeds 150 characters and should not be cut off at all"}, 0, "all done with a very long description that exceeds 150 characters and should not be cut off at all"},
+		{"unlimited: first meaningful line returned in full", []string{"this is a meaningful line that is longer than twenty chars and should not be truncated"}, 0, "this is a meaningful line that is longer than twenty chars and should not be truncated"},
+		{"unlimited: skips noise lines", []string{"---", "# heading", "short", "this is a meaningful line longer than twenty chars"}, 0, "this is a meaningful line longer than twenty chars"},
+		{"limited: summary prefix truncated", []string{"Summary: hello world truncated"}, 10, "hello w..."},
+		{"limited: first meaningful line truncated", []string{"this is a long meaningful line that will be truncated"}, 20, "this is a long me..."},
+		{"unlimited: fallback joins all lines", []string{"short", "also short"}, 0, "short\nalso short"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := extractKeyOutputFromLines(tt.lines, tt.maxLen)
+			if got != tt.want {
+				t.Fatalf("extractKeyOutputFromLines(%v, %d) = %q, want %q", tt.lines, tt.maxLen, got, tt.want)
+			}
+		})
+	}
+}
+
 func TestSanitizeOutput(t *testing.T) {
 	tests := []struct {
 		name string
--- a/skills/harness/hooks/harness-stop.py
+++ b/skills/harness/hooks/harness-stop.py
@@ -260,6 +260,11 @@ def main() -> int:
    # If nothing left to do, allow stop
    if not pending_eligible and not retryable and not in_progress_blocking:
        _reset_block_counter(root)
+        # Signal self-reflect hook BEFORE removing active marker
+        try:
+            (root / ".harness-reflect").touch()
+        except Exception:
+            pass
        try:
            (root / ".harness-active").unlink(missing_ok=True)
        except Exception:
--- a/skills/harness/hooks/self-reflect-stop.py
+++ b/skills/harness/hooks/self-reflect-stop.py
@@ -146,13 +146,15 @@ def main() -> int:
    if not session_id:
        return 0  # 无 session_id，放行

-    # 守卫 1：harness 从未初始化过 → 完全不触发自检
+    # 守卫：仅当 harness 完成所有任务后（.harness-reflect 存在）才触发自省
+    # 这避免了两个问题：
+    #   1. 历史残留的 harness-tasks.json 导致误触发（false positive）
+    #   2. harness-stop.py 移除 .harness-active 后 Claude Code 跳过后续 hook（false negative）
    root = _find_harness_root(payload)
    if root is None:
-        return 0  # harness 未曾使用，不触发自省
+        return 0

-    # 守卫 2：harness 仍活跃 → 由 harness-stop.py 全权管理
-    if (root / ".harness-active").is_file():
+    if not (root / ".harness-reflect").is_file():
        return 0

    # 读取最大迭代次数
@@ -168,8 +170,12 @@ def main() -> int:
    # 读取当前计数
    count = _read_counter(session_id)

-    # 超过最大次数，放行
+    # 超过最大次数，清理 marker 并放行
    if count >= max_iter:
+        try:
+            (root / ".harness-reflect").unlink(missing_ok=True)
+        except Exception:
+            pass
        return 0

    # 递增计数
--- a/skills/harness/tests/test_hooks.py
+++ b/skills/harness/tests/test_hooks.py
@@ -200,7 +200,7 @@ class TestStopHookTaskLogic(unittest.TestCase):
        return {"cwd": self.tmpdir, **extra}

    def test_all_completed_allows_stop(self):
-        """When all tasks are completed, stop is allowed."""
+        """When all tasks are completed, stop is allowed and .harness-reflect created."""
        write_tasks(self.root, [
            {"id": "t1", "status": "completed"},
            {"id": "t2", "status": "completed"},
@@ -209,6 +209,10 @@ class TestStopHookTaskLogic(unittest.TestCase):
        self.assertEqual(code, 0)
        self.assertEqual(stdout, "")
        self.assertFalse((self.root / ".harness-active").exists())
+        self.assertTrue(
+            (self.root / ".harness-reflect").exists(),
+            ".harness-reflect should be created when all tasks complete",
+        )

    def test_pending_with_unmet_deps_allows_stop(self):
        """Pending tasks with unmet dependencies don't block stop."""
@@ -644,7 +648,7 @@ REFLECT_HOOK = HOOKS_DIR / "self-reflect-stop.py"


 class TestSelfReflectStopHook(unittest.TestCase):
-    """self-reflect-stop.py must only trigger when harness was used and completed."""
+    """self-reflect-stop.py must only trigger when .harness-reflect marker exists."""

    def setUp(self):
        self.tmpdir = tempfile.mkdtemp()
@@ -663,14 +667,18 @@ class TestSelfReflectStopHook(unittest.TestCase):
    def _payload(self, session_id="test-reflect-001", **extra):
        return {"cwd": self.tmpdir, "session_id": session_id, **extra}

+    def _set_reflect(self):
+        """Create .harness-reflect marker (simulates harness completion)."""
+        (self.root / ".harness-reflect").touch()
+
    def test_no_harness_root_is_noop(self):
        """When harness-tasks.json doesn't exist, hook is a complete no-op."""
        code, stdout, stderr = run_hook(REFLECT_HOOK, self._payload())
        self.assertEqual(code, 0)
        self.assertEqual(stdout, "", "Should produce no output when harness never used")

-    def test_harness_active_defers(self):
-        """When .harness-active exists, hook defers to harness-stop.py."""
+    def test_harness_active_no_reflect_marker(self):
+        """When .harness-active exists but no .harness-reflect, hook is no-op."""
        write_tasks(self.root, [
            {"id": "t1", "status": "pending", "depends_on": []},
        ])
@@ -679,12 +687,24 @@ class TestSelfReflectStopHook(unittest.TestCase):
        self.assertEqual(code, 0)
        self.assertEqual(stdout, "", "Should not self-reflect while harness is active")

-    def test_harness_completed_triggers_reflection(self):
-        """When harness-tasks.json exists but .harness-active removed, triggers self-reflection."""
+    def test_stale_tasks_without_reflect_marker_is_noop(self):
+        """Stale harness-tasks.json without .harness-reflect does NOT trigger (fixes false positive)."""
        write_tasks(self.root, [
            {"id": "t1", "status": "completed"},
        ])
        deactivate(self.root)
+        # No .harness-reflect marker — this is a stale file from a previous run
+        code, stdout, _ = run_hook(REFLECT_HOOK, self._payload(session_id="test-stale"))
+        self.assertEqual(code, 0)
+        self.assertEqual(stdout, "", "Stale harness-tasks.json should NOT trigger self-reflect")
+
+    def test_harness_completed_triggers_reflection(self):
+        """When .harness-reflect marker exists, triggers self-reflection."""
+        write_tasks(self.root, [
+            {"id": "t1", "status": "completed"},
+        ])
+        deactivate(self.root)
+        self._set_reflect()
        sid = "test-reflect-trigger"
        code, stdout, _ = run_hook(REFLECT_HOOK, self._payload(session_id=sid))
        self.assertEqual(code, 0)
@@ -696,6 +716,7 @@ class TestSelfReflectStopHook(unittest.TestCase):
        """Each invocation increments the iteration counter."""
        write_tasks(self.root, [{"id": "t1", "status": "completed"}])
        deactivate(self.root)
+        self._set_reflect()
        sid = "test-reflect-counter"

        # First call: iteration 1
@@ -708,10 +729,11 @@ class TestSelfReflectStopHook(unittest.TestCase):
        data = json.loads(stdout)
        self.assertIn("2/5", data["reason"])

-    def test_max_iterations_allows_stop(self):
-        """After max iterations, hook allows stop (no output)."""
+    def test_max_iterations_allows_stop_and_cleans_marker(self):
+        """After max iterations, hook allows stop and removes .harness-reflect."""
        write_tasks(self.root, [{"id": "t1", "status": "completed"}])
        deactivate(self.root)
+        self._set_reflect()
        sid = "test-reflect-max"

        # Write counter at max
@@ -721,11 +743,16 @@ class TestSelfReflectStopHook(unittest.TestCase):
        code, stdout, _ = run_hook(REFLECT_HOOK, self._payload(session_id=sid))
        self.assertEqual(code, 0)
        self.assertEqual(stdout, "", "Should allow stop after max iterations")
+        self.assertFalse(
+            (self.root / ".harness-reflect").exists(),
+            ".harness-reflect should be cleaned up after max iterations",
+        )

    def test_disabled_via_env(self):
        """REFLECT_MAX_ITERATIONS=0 disables self-reflection."""
        write_tasks(self.root, [{"id": "t1", "status": "completed"}])
        deactivate(self.root)
+        self._set_reflect()
        code, stdout, _ = run_hook(
            REFLECT_HOOK,
            self._payload(session_id="test-reflect-disabled"),
@@ -738,6 +765,7 @@ class TestSelfReflectStopHook(unittest.TestCase):
        """Missing session_id makes hook a no-op."""
        write_tasks(self.root, [{"id": "t1", "status": "completed"}])
        deactivate(self.root)
+        self._set_reflect()
        code, stdout, _ = run_hook(REFLECT_HOOK, {"cwd": self.tmpdir})
        self.assertEqual(code, 0)
        self.assertEqual(stdout, "")
@@ -745,7 +773,7 @@ class TestSelfReflectStopHook(unittest.TestCase):
    def test_empty_stdin_no_crash(self):
        """Empty stdin doesn't crash."""
        write_tasks(self.root, [{"id": "t1", "status": "completed"}])
-        activate(self.root)
+        self._set_reflect()
        proc = subprocess.run(
            [sys.executable, str(REFLECT_HOOK)],
            input="",
@@ -760,6 +788,7 @@ class TestSelfReflectStopHook(unittest.TestCase):
        """HARNESS_STATE_ROOT env var is used for root discovery."""
        write_tasks(self.root, [{"id": "t1", "status": "completed"}])
        deactivate(self.root)
+        self._set_reflect()
        sid = "test-reflect-env"
        code, stdout, _ = run_hook(
            REFLECT_HOOK,