mirror of
https://github.com/cexll/myclaude.git
synced 2026-02-05 02:30:26 +08:00
Improve backend termination after message and extend timeout (#86)
* Improve backend termination after message and extend timeout
* fix: prevent premature backend termination and revert timeout
Critical fixes for executor.go termination logic:
1. Add onComplete callback to prevent premature termination
- Parser now distinguishes between "any message" (onMessage) and
"terminal event" (onComplete)
- Codex: triggers onComplete on thread.completed
- Claude: triggers onComplete on type:"result"
- Gemini: triggers onComplete on type:"result" + terminal status
2. Fix executor to wait for completion events
- Replace messageSeen termination trigger with completeSeen
- Only start postMessageTerminateDelay after terminal event
- Prevents killing backend before final answer in multi-message scenarios
3. Fix terminated flag synchronization
- Only set terminated=true if terminateCommandFn actually succeeds
- Prevents "marked as terminated but not actually terminated" state
4. Simplify timer cleanup logic
- Unified non-blocking drain on messageTimer.C
- Remove dependency on messageTimerCh nil state
5. Revert defaultTimeout from 24h to 2h
- 24h (86400s) → 2h (7200s) to avoid operational risks
- 12× timeout increase could cause resource exhaustion
- Users needing longer tasks can use CODEX_TIMEOUT env var
All tests pass. Resolves early termination bug from code review.
Co-authored-by: Codeagent (Codex)
Generated with SWE-Agent.ai
Co-Authored-By: SWE-Agent.ai <noreply@swe-agent.ai>
---------
Co-authored-by: SWE-Agent.ai <noreply@swe-agent.ai>
This commit is contained in:
@@ -16,6 +16,8 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
const postMessageTerminateDelay = 1 * time.Second
|
||||
|
||||
// commandRunner abstracts exec.Cmd for testability
|
||||
type commandRunner interface {
|
||||
Start() error
|
||||
@@ -729,6 +731,7 @@ func runCodexTaskWithContext(parentCtx context.Context, taskSpec TaskSpec, backe
|
||||
// Start parse goroutine BEFORE starting the command to avoid race condition
|
||||
// where fast-completing commands close stdout before parser starts reading
|
||||
messageSeen := make(chan struct{}, 1)
|
||||
completeSeen := make(chan struct{}, 1)
|
||||
parseCh := make(chan parseResult, 1)
|
||||
go func() {
|
||||
msg, tid := parseJSONStreamInternal(stdoutReader, logWarnFn, logInfoFn, func() {
|
||||
@@ -736,6 +739,11 @@ func runCodexTaskWithContext(parentCtx context.Context, taskSpec TaskSpec, backe
|
||||
case messageSeen <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
}, func() {
|
||||
select {
|
||||
case completeSeen <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
})
|
||||
parseCh <- parseResult{message: msg, threadID: tid}
|
||||
}()
|
||||
@@ -773,17 +781,63 @@ func runCodexTaskWithContext(parentCtx context.Context, taskSpec TaskSpec, backe
|
||||
waitCh := make(chan error, 1)
|
||||
go func() { waitCh <- cmd.Wait() }()
|
||||
|
||||
var waitErr error
|
||||
var forceKillTimer *forceKillTimer
|
||||
var ctxCancelled bool
|
||||
var (
|
||||
waitErr error
|
||||
forceKillTimer *forceKillTimer
|
||||
ctxCancelled bool
|
||||
messageTimer *time.Timer
|
||||
messageTimerCh <-chan time.Time
|
||||
forcedAfterComplete bool
|
||||
terminated bool
|
||||
messageSeenObserved bool
|
||||
completeSeenObserved bool
|
||||
)
|
||||
|
||||
select {
|
||||
case waitErr = <-waitCh:
|
||||
case <-ctx.Done():
|
||||
ctxCancelled = true
|
||||
logErrorFn(cancelReason(commandName, ctx))
|
||||
forceKillTimer = terminateCommandFn(cmd)
|
||||
waitErr = <-waitCh
|
||||
waitLoop:
|
||||
for {
|
||||
select {
|
||||
case waitErr = <-waitCh:
|
||||
break waitLoop
|
||||
case <-ctx.Done():
|
||||
ctxCancelled = true
|
||||
logErrorFn(cancelReason(commandName, ctx))
|
||||
if !terminated {
|
||||
if timer := terminateCommandFn(cmd); timer != nil {
|
||||
forceKillTimer = timer
|
||||
terminated = true
|
||||
}
|
||||
}
|
||||
waitErr = <-waitCh
|
||||
break waitLoop
|
||||
case <-messageTimerCh:
|
||||
forcedAfterComplete = true
|
||||
messageTimerCh = nil
|
||||
if !terminated {
|
||||
logWarnFn(fmt.Sprintf("%s output parsed; terminating lingering backend", commandName))
|
||||
if timer := terminateCommandFn(cmd); timer != nil {
|
||||
forceKillTimer = timer
|
||||
terminated = true
|
||||
}
|
||||
}
|
||||
case <-completeSeen:
|
||||
completeSeenObserved = true
|
||||
if messageTimer != nil {
|
||||
continue
|
||||
}
|
||||
messageTimer = time.NewTimer(postMessageTerminateDelay)
|
||||
messageTimerCh = messageTimer.C
|
||||
case <-messageSeen:
|
||||
messageSeenObserved = true
|
||||
}
|
||||
}
|
||||
|
||||
if messageTimer != nil {
|
||||
if !messageTimer.Stop() {
|
||||
select {
|
||||
case <-messageTimer.C:
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if forceKillTimer != nil {
|
||||
@@ -791,10 +845,14 @@ func runCodexTaskWithContext(parentCtx context.Context, taskSpec TaskSpec, backe
|
||||
}
|
||||
|
||||
var parsed parseResult
|
||||
if ctxCancelled {
|
||||
switch {
|
||||
case ctxCancelled:
|
||||
closeWithReason(stdout, stdoutCloseReasonCtx)
|
||||
parsed = <-parseCh
|
||||
} else {
|
||||
case messageSeenObserved || completeSeenObserved:
|
||||
closeWithReason(stdout, stdoutCloseReasonWait)
|
||||
parsed = <-parseCh
|
||||
default:
|
||||
drainTimer := time.NewTimer(stdoutDrainTimeout)
|
||||
defer drainTimer.Stop()
|
||||
|
||||
@@ -802,6 +860,11 @@ func runCodexTaskWithContext(parentCtx context.Context, taskSpec TaskSpec, backe
|
||||
case parsed = <-parseCh:
|
||||
closeWithReason(stdout, stdoutCloseReasonWait)
|
||||
case <-messageSeen:
|
||||
messageSeenObserved = true
|
||||
closeWithReason(stdout, stdoutCloseReasonWait)
|
||||
parsed = <-parseCh
|
||||
case <-completeSeen:
|
||||
completeSeenObserved = true
|
||||
closeWithReason(stdout, stdoutCloseReasonWait)
|
||||
parsed = <-parseCh
|
||||
case <-drainTimer.C:
|
||||
@@ -822,17 +885,21 @@ func runCodexTaskWithContext(parentCtx context.Context, taskSpec TaskSpec, backe
|
||||
}
|
||||
|
||||
if waitErr != nil {
|
||||
if exitErr, ok := waitErr.(*exec.ExitError); ok {
|
||||
code := exitErr.ExitCode()
|
||||
logErrorFn(fmt.Sprintf("%s exited with status %d", commandName, code))
|
||||
result.ExitCode = code
|
||||
result.Error = attachStderr(fmt.Sprintf("%s exited with status %d", commandName, code))
|
||||
if forcedAfterComplete && parsed.message != "" {
|
||||
logWarnFn(fmt.Sprintf("%s terminated after delivering output", commandName))
|
||||
} else {
|
||||
if exitErr, ok := waitErr.(*exec.ExitError); ok {
|
||||
code := exitErr.ExitCode()
|
||||
logErrorFn(fmt.Sprintf("%s exited with status %d", commandName, code))
|
||||
result.ExitCode = code
|
||||
result.Error = attachStderr(fmt.Sprintf("%s exited with status %d", commandName, code))
|
||||
return result
|
||||
}
|
||||
logErrorFn(commandName + " error: " + waitErr.Error())
|
||||
result.ExitCode = 1
|
||||
result.Error = attachStderr(commandName + " error: " + waitErr.Error())
|
||||
return result
|
||||
}
|
||||
logErrorFn(commandName + " error: " + waitErr.Error())
|
||||
result.ExitCode = 1
|
||||
result.Error = attachStderr(commandName + " error: " + waitErr.Error())
|
||||
return result
|
||||
}
|
||||
|
||||
message := parsed.message
|
||||
|
||||
Reference in New Issue
Block a user