fix: 修复多 backend 并行日志 PID 混乱并移除包装格式 (#74) (#76)

* fix(logger): 修复多 backend 并行日志 PID 混乱并移除包装格式

**问题:**
- logger.go:288 使用 os.Getpid() 导致并行任务日志 PID 混乱
- 日志文件添加时间戳/PID/级别前缀包装,应输出 backend 原始内容

**修复:**
1. Logger 结构体添加 pid 字段,创建时捕获 PID
2. 日志写入使用固定 l.pid 替代 os.Getpid()
3. 移除日志输出格式包装,直接写入原始消息
4. 添加内存缓存 ERROR/WARN 条目,ExtractRecentErrors 从缓存读取
5. 优化 executor.go context 初始化顺序,避免重复创建 logger

**测试:**
- 所有测试通过(23.7s)
- 更新相关测试用例匹配新格式

Closes #74

* fix(logger): 增强并发日志隔离和 task ID 清理

## 核心修复

### 1. Task ID Sanitization (logger.go)
- 新增 sanitizeLogSuffix(): 清理非法字符 (/, \, :, 等)
- 新增 fallbackLogSuffix(): 为空/非法 ID 生成唯一后备名
- 新增 isSafeLogRune(): 仅允许 [A-Za-z0-9._-]
- 路径穿越防护: ../../../etc/passwd → etc-passwd-{hash}.log
- 超长 ID 处理: 截断到 64 字符 + hash 确保唯一性
- 自动创建 TMPDIR (MkdirAll)

### 2. 共享日志标识 (executor.go)
- 新增 taskLoggerHandle 结构: 封装 logger、路径、共享标志
- 新增 newTaskLoggerHandle(): 统一处理 logger 创建和回退
- printTaskStart(): 显示 "Log (shared)" 标识
- generateFinalOutput(): 在 summary 中标记共享日志
- 并发失败时明确标识所有任务使用共享主日志

### 3. 内部标志 (config.go)
- TaskResult.sharedLog: 非导出字段,标识共享日志状态

### 4. Race Detector 修复 (logger.go:209-219)
- Close() 在关闭 channel 前先等待 pendingWG
- 消除 Logger.Close() 与 Logger.log() 之间的竞态条件

## 测试覆盖

### 新增测试 (logger_suffix_test.go)
- TestLoggerWithSuffixSanitizesUnsafeSuffix: 非法字符清理
- TestLoggerWithSuffixReturnsErrorWhenTempDirNotWritable: 只读目录处理

### 新增测试 (executor_concurrent_test.go)
- TestConcurrentTaskLoggerFailure: 多任务失败时共享日志标识
- TestSanitizeTaskID: 并发场景下 task ID 清理验证

## 验证结果

 所有单元测试通过
 Race detector 无竞态 (65.4s)
 路径穿越攻击防护
 并发日志完全隔离
 边界情况正确处理

Resolves: PR #76 review feedback
Co-Authored-By: Codex Review <codex@anthropic.ai>

Generated with swe-agent-bot

Co-Authored-By: swe-agent-bot <agent@swe-agent.ai>

* fix(logger): 修复关键 bug 并优化日志系统 (v5.2.5)

修复 P0 级别问题:
- sanitizeLogSuffix 的 trim 碰撞(防止多 task 日志文件名冲突)
- ExtractRecentErrors 边界检查(防止 slice 越界)
- Logger.Close 阻塞风险(新增可配置超时机制)

代码质量改进:
- 删除无用字段 Logger.pid 和 logEntry.level
- 优化 sharedLog 标记绑定到最终 LogPath
- 移除日志前缀,直接输出 backend 原始内容

测试覆盖增强:
- 新增 4 个测试用例(碰撞防护、边界检查、缓存上限、shared 判定)
- 优化测试注释和逻辑

版本更新:5.2.4 → 5.2.5

Generated with swe-agent-bot

Co-Authored-By: swe-agent-bot <agent@swe-agent.ai>

---------

Co-authored-by: swe-agent-bot <agent@swe-agent.ai>
This commit is contained in:
ben
2025-12-17 10:33:38 +08:00
committed by GitHub
parent 50093036c3
commit fe5508228f
9 changed files with 730 additions and 131 deletions

View File

@@ -139,6 +139,38 @@ func taskLoggerFromContext(ctx context.Context) *Logger {
return logger
}
type taskLoggerHandle struct {
logger *Logger
path string
shared bool
closeFn func()
}
func newTaskLoggerHandle(taskID string) taskLoggerHandle {
taskLogger, err := NewLoggerWithSuffix(taskID)
if err == nil {
return taskLoggerHandle{
logger: taskLogger,
path: taskLogger.Path(),
closeFn: func() { _ = taskLogger.Close() },
}
}
msg := fmt.Sprintf("Failed to create task logger for %s: %v, using main logger", taskID, err)
mainLogger := activeLogger()
if mainLogger != nil {
logWarn(msg)
return taskLoggerHandle{
logger: mainLogger,
path: mainLogger.Path(),
shared: true,
}
}
fmt.Fprintln(os.Stderr, msg)
return taskLoggerHandle{}
}
// defaultRunCodexTaskFn is the default implementation of runCodexTaskFn (exposed for test reset)
func defaultRunCodexTaskFn(task TaskSpec, timeout int) TaskResult {
if task.WorkDir == "" {
@@ -255,7 +287,7 @@ func executeConcurrentWithContext(parentCtx context.Context, layers [][]TaskSpec
var startPrintMu sync.Mutex
bannerPrinted := false
printTaskStart := func(taskID, logPath string) {
printTaskStart := func(taskID, logPath string, shared bool) {
if logPath == "" {
return
}
@@ -264,7 +296,11 @@ func executeConcurrentWithContext(parentCtx context.Context, layers [][]TaskSpec
fmt.Fprintln(os.Stderr, "=== Starting Parallel Execution ===")
bannerPrinted = true
}
fmt.Fprintf(os.Stderr, "Task %s: Log: %s\n", taskID, logPath)
label := "Log"
if shared {
label = "Log (shared)"
}
fmt.Fprintf(os.Stderr, "Task %s: %s: %s\n", taskID, label, logPath)
startPrintMu.Unlock()
}
@@ -334,11 +370,11 @@ func executeConcurrentWithContext(parentCtx context.Context, layers [][]TaskSpec
wg.Add(1)
go func(ts TaskSpec) {
defer wg.Done()
var taskLogger *Logger
var taskLogPath string
handle := taskLoggerHandle{}
defer func() {
if r := recover(); r != nil {
resultsCh <- TaskResult{TaskID: ts.ID, ExitCode: 1, Error: fmt.Sprintf("panic: %v", r), LogPath: taskLogPath}
resultsCh <- TaskResult{TaskID: ts.ID, ExitCode: 1, Error: fmt.Sprintf("panic: %v", r), LogPath: taskLogPath, sharedLog: handle.shared}
}
}()
@@ -355,18 +391,29 @@ func executeConcurrentWithContext(parentCtx context.Context, layers [][]TaskSpec
logConcurrencyState("done", ts.ID, int(after), workerLimit)
}()
if l, err := NewLoggerWithSuffix(ts.ID); err == nil {
taskLogger = l
taskLogPath = l.Path()
defer func() { _ = taskLogger.Close() }()
handle = newTaskLoggerHandle(ts.ID)
taskLogPath = handle.path
if handle.closeFn != nil {
defer handle.closeFn()
}
ts.Context = withTaskLogger(ctx, taskLogger)
printTaskStart(ts.ID, taskLogPath)
taskCtx := ctx
if handle.logger != nil {
taskCtx = withTaskLogger(ctx, handle.logger)
}
ts.Context = taskCtx
printTaskStart(ts.ID, taskLogPath, handle.shared)
res := runCodexTaskFn(ts, timeout)
if res.LogPath == "" && taskLogPath != "" {
res.LogPath = taskLogPath
if taskLogPath != "" {
if res.LogPath == "" || (handle.shared && handle.logger != nil && res.LogPath == handle.logger.Path()) {
res.LogPath = taskLogPath
}
}
// 只有当最终的 LogPath 确实是共享 logger 的路径时才标记为 shared
if handle.shared && handle.logger != nil && res.LogPath == handle.logger.Path() {
res.sharedLog = true
}
resultsCh <- res
}(task)
@@ -444,7 +491,11 @@ func generateFinalOutput(results []TaskResult) string {
sb.WriteString(fmt.Sprintf("Session: %s\n", res.SessionID))
}
if res.LogPath != "" {
sb.WriteString(fmt.Sprintf("Log: %s\n", res.LogPath))
if res.sharedLog {
sb.WriteString(fmt.Sprintf("Log: %s (shared)\n", res.LogPath))
} else {
sb.WriteString(fmt.Sprintf("Log: %s\n", res.LogPath))
}
}
if res.Message != "" {
sb.WriteString(fmt.Sprintf("\n%s\n", res.Message))
@@ -485,6 +536,13 @@ func runCodexProcess(parentCtx context.Context, codexArgs []string, taskText str
}
func runCodexTaskWithContext(parentCtx context.Context, taskSpec TaskSpec, backend Backend, customArgs []string, useCustomArgs bool, silent bool, timeoutSec int) TaskResult {
if parentCtx == nil {
parentCtx = taskSpec.Context
}
if parentCtx == nil {
parentCtx = context.Background()
}
result := TaskResult{TaskID: taskSpec.ID}
injectedLogger := taskLoggerFromContext(parentCtx)
logger := injectedLogger
@@ -595,15 +653,15 @@ func runCodexTaskWithContext(parentCtx context.Context, taskSpec TaskSpec, backe
}
if !silent {
stdoutLogger = newLogWriter("CODEX_STDOUT: ", codexLogLineLimit)
stderrLogger = newLogWriter("CODEX_STDERR: ", codexLogLineLimit)
// Note: Empty prefix ensures backend output is logged as-is without any wrapper format.
// This preserves the original stdout/stderr content from codex/claude/gemini backends.
// Trade-off: Reduces distinguishability between stdout/stderr in logs, but maintains
// output fidelity which is critical for debugging backend-specific issues.
stdoutLogger = newLogWriter("", codexLogLineLimit)
stderrLogger = newLogWriter("", codexLogLineLimit)
}
ctx := parentCtx
if ctx == nil {
ctx = context.Background()
}
ctx, cancel := context.WithTimeout(ctx, time.Duration(timeoutSec)*time.Second)
defer cancel()
ctx, stop := signal.NotifyContext(ctx, syscall.SIGINT, syscall.SIGTERM)