Files
myclaude/codeagent-wrapper/main.go
ben fe5508228f fix: 修复多 backend 并行日志 PID 混乱并移除包装格式 (#74) (#76)
* fix(logger): 修复多 backend 并行日志 PID 混乱并移除包装格式

**问题:**
- logger.go:288 使用 os.Getpid() 导致并行任务日志 PID 混乱
- 日志文件添加时间戳/PID/级别前缀包装,应输出 backend 原始内容

**修复:**
1. Logger 结构体添加 pid 字段,创建时捕获 PID
2. 日志写入使用固定 l.pid 替代 os.Getpid()
3. 移除日志输出格式包装,直接写入原始消息
4. 添加内存缓存 ERROR/WARN 条目,ExtractRecentErrors 从缓存读取
5. 优化 executor.go context 初始化顺序,避免重复创建 logger

**测试:**
- 所有测试通过(23.7s)
- 更新相关测试用例匹配新格式

Closes #74

* fix(logger): 增强并发日志隔离和 task ID 清理

## 核心修复

### 1. Task ID Sanitization (logger.go)
- 新增 sanitizeLogSuffix(): 清理非法字符 (/, \, :, 等)
- 新增 fallbackLogSuffix(): 为空/非法 ID 生成唯一后备名
- 新增 isSafeLogRune(): 仅允许 [A-Za-z0-9._-]
- 路径穿越防护: ../../../etc/passwd → etc-passwd-{hash}.log
- 超长 ID 处理: 截断到 64 字符 + hash 确保唯一性
- 自动创建 TMPDIR (MkdirAll)

### 2. 共享日志标识 (executor.go)
- 新增 taskLoggerHandle 结构: 封装 logger、路径、共享标志
- 新增 newTaskLoggerHandle(): 统一处理 logger 创建和回退
- printTaskStart(): 显示 "Log (shared)" 标识
- generateFinalOutput(): 在 summary 中标记共享日志
- 并发失败时明确标识所有任务使用共享主日志

### 3. 内部标志 (config.go)
- TaskResult.sharedLog: 非导出字段,标识共享日志状态

### 4. Race Detector 修复 (logger.go:209-219)
- Close() 在关闭 channel 前先等待 pendingWG
- 消除 Logger.Close() 与 Logger.log() 之间的竞态条件

## 测试覆盖

### 新增测试 (logger_suffix_test.go)
- TestLoggerWithSuffixSanitizesUnsafeSuffix: 非法字符清理
- TestLoggerWithSuffixReturnsErrorWhenTempDirNotWritable: 只读目录处理

### 新增测试 (executor_concurrent_test.go)
- TestConcurrentTaskLoggerFailure: 多任务失败时共享日志标识
- TestSanitizeTaskID: 并发场景下 task ID 清理验证

## 验证结果

 所有单元测试通过
 Race detector 无竞态 (65.4s)
 路径穿越攻击防护
 并发日志完全隔离
 边界情况正确处理

Resolves: PR #76 review feedback
Co-Authored-By: Codex Review <codex@anthropic.ai>

Generated with swe-agent-bot

Co-Authored-By: swe-agent-bot <agent@swe-agent.ai>

* fix(logger): 修复关键 bug 并优化日志系统 (v5.2.5)

修复 P0 级别问题:
- sanitizeLogSuffix 的 trim 碰撞(防止多 task 日志文件名冲突)
- ExtractRecentErrors 边界检查(防止 slice 越界)
- Logger.Close 阻塞风险(新增可配置超时机制)

代码质量改进:
- 删除无用字段 Logger.pid 和 logEntry.level
- 优化 sharedLog 标记绑定到最终 LogPath
- 移除日志前缀,直接输出 backend 原始内容

测试覆盖增强:
- 新增 4 个测试用例(碰撞防护、边界检查、缓存上限、shared 判定)
- 优化测试注释和逻辑

版本更新:5.2.4 → 5.2.5

Generated with swe-agent-bot

Co-Authored-By: swe-agent-bot <agent@swe-agent.ai>

---------

Co-authored-by: swe-agent-bot <agent@swe-agent.ai>
2025-12-17 10:33:38 +08:00

470 lines
11 KiB
Go

package main
import (
"encoding/json"
"fmt"
"io"
"os"
"os/exec"
"os/signal"
"reflect"
"strings"
"sync/atomic"
"time"
)
const (
version = "5.2.5"
defaultWorkdir = "."
defaultTimeout = 7200 // seconds
codexLogLineLimit = 1000
stdinSpecialChars = "\n\\\"'`$"
stderrCaptureLimit = 4 * 1024
defaultBackendName = "codex"
defaultCodexCommand = "codex"
// stdout close reasons
stdoutCloseReasonWait = "wait-done"
stdoutCloseReasonDrain = "drain-timeout"
stdoutCloseReasonCtx = "context-cancel"
stdoutDrainTimeout = 100 * time.Millisecond
)
// Test hooks for dependency injection
var (
stdinReader io.Reader = os.Stdin
isTerminalFn = defaultIsTerminal
codexCommand = defaultCodexCommand
cleanupHook func()
loggerPtr atomic.Pointer[Logger]
buildCodexArgsFn = buildCodexArgs
selectBackendFn = selectBackend
commandContext = exec.CommandContext
jsonMarshal = json.Marshal
cleanupLogsFn = cleanupOldLogs
signalNotifyFn = signal.Notify
signalStopFn = signal.Stop
terminateCommandFn = terminateCommand
defaultBuildArgsFn = buildCodexArgs
runTaskFn = runCodexTask
exitFn = os.Exit
)
var forceKillDelay atomic.Int32
func init() {
forceKillDelay.Store(5) // seconds - default value
}
func runStartupCleanup() {
if cleanupLogsFn == nil {
return
}
defer func() {
if r := recover(); r != nil {
logWarn(fmt.Sprintf("cleanupOldLogs panic: %v", r))
}
}()
if _, err := cleanupLogsFn(); err != nil {
logWarn(fmt.Sprintf("cleanupOldLogs error: %v", err))
}
}
func runCleanupMode() int {
if cleanupLogsFn == nil {
fmt.Fprintln(os.Stderr, "Cleanup failed: log cleanup function not configured")
return 1
}
stats, err := cleanupLogsFn()
if err != nil {
fmt.Fprintf(os.Stderr, "Cleanup failed: %v\n", err)
return 1
}
fmt.Println("Cleanup completed")
fmt.Printf("Files scanned: %d\n", stats.Scanned)
fmt.Printf("Files deleted: %d\n", stats.Deleted)
if len(stats.DeletedFiles) > 0 {
for _, f := range stats.DeletedFiles {
fmt.Printf(" - %s\n", f)
}
}
fmt.Printf("Files kept: %d\n", stats.Kept)
if len(stats.KeptFiles) > 0 {
for _, f := range stats.KeptFiles {
fmt.Printf(" - %s\n", f)
}
}
if stats.Errors > 0 {
fmt.Printf("Deletion errors: %d\n", stats.Errors)
}
return 0
}
func main() {
exitCode := run()
exitFn(exitCode)
}
// run is the main logic, returns exit code for testability
func run() (exitCode int) {
name := currentWrapperName()
// Handle --version and --help first (no logger needed)
if len(os.Args) > 1 {
switch os.Args[1] {
case "--version", "-v":
fmt.Printf("%s version %s\n", name, version)
return 0
case "--help", "-h":
printHelp()
return 0
case "--cleanup":
return runCleanupMode()
}
}
// Initialize logger for all other commands
logger, err := NewLogger()
if err != nil {
fmt.Fprintf(os.Stderr, "ERROR: failed to initialize logger: %v\n", err)
return 1
}
setLogger(logger)
defer func() {
logger := activeLogger()
if logger != nil {
logger.Flush()
}
if err := closeLogger(); err != nil {
fmt.Fprintf(os.Stderr, "ERROR: failed to close logger: %v\n", err)
}
// On failure, extract and display recent errors before removing log
if logger != nil {
if exitCode != 0 {
if errors := logger.ExtractRecentErrors(10); len(errors) > 0 {
fmt.Fprintln(os.Stderr, "\n=== Recent Errors ===")
for _, entry := range errors {
fmt.Fprintln(os.Stderr, entry)
}
fmt.Fprintf(os.Stderr, "Log file: %s (deleted)\n", logger.Path())
}
}
if err := logger.RemoveLogFile(); err != nil && !os.IsNotExist(err) {
// Silently ignore removal errors
}
}
}()
defer runCleanupHook()
// Clean up stale logs from previous runs.
runStartupCleanup()
// Handle remaining commands
if len(os.Args) > 1 {
args := os.Args[1:]
parallelIndex := -1
for i, arg := range args {
if arg == "--parallel" {
parallelIndex = i
break
}
}
if parallelIndex != -1 {
backendName := defaultBackendName
var extras []string
for i := 0; i < len(args); i++ {
arg := args[i]
switch {
case arg == "--parallel":
continue
case arg == "--backend":
if i+1 >= len(args) {
fmt.Fprintln(os.Stderr, "ERROR: --backend flag requires a value")
return 1
}
backendName = args[i+1]
i++
case strings.HasPrefix(arg, "--backend="):
value := strings.TrimPrefix(arg, "--backend=")
if value == "" {
fmt.Fprintln(os.Stderr, "ERROR: --backend flag requires a value")
return 1
}
backendName = value
default:
extras = append(extras, arg)
}
}
if len(extras) > 0 {
fmt.Fprintln(os.Stderr, "ERROR: --parallel reads its task configuration from stdin; only --backend is allowed.")
fmt.Fprintln(os.Stderr, "Usage examples:")
fmt.Fprintf(os.Stderr, " %s --parallel < tasks.txt\n", name)
fmt.Fprintf(os.Stderr, " echo '...' | %s --parallel\n", name)
fmt.Fprintf(os.Stderr, " %s --parallel <<'EOF'\n", name)
return 1
}
backend, err := selectBackendFn(backendName)
if err != nil {
fmt.Fprintf(os.Stderr, "ERROR: %v\n", err)
return 1
}
backendName = backend.Name()
data, err := io.ReadAll(stdinReader)
if err != nil {
fmt.Fprintf(os.Stderr, "ERROR: failed to read stdin: %v\n", err)
return 1
}
cfg, err := parseParallelConfig(data)
if err != nil {
fmt.Fprintf(os.Stderr, "ERROR: %v\n", err)
return 1
}
cfg.GlobalBackend = backendName
for i := range cfg.Tasks {
if strings.TrimSpace(cfg.Tasks[i].Backend) == "" {
cfg.Tasks[i].Backend = backendName
}
}
timeoutSec := resolveTimeout()
layers, err := topologicalSort(cfg.Tasks)
if err != nil {
fmt.Fprintf(os.Stderr, "ERROR: %v\n", err)
return 1
}
results := executeConcurrent(layers, timeoutSec)
fmt.Println(generateFinalOutput(results))
exitCode = 0
for _, res := range results {
if res.ExitCode != 0 {
exitCode = res.ExitCode
}
}
return exitCode
}
}
logInfo("Script started")
cfg, err := parseArgs()
if err != nil {
logError(err.Error())
return 1
}
logInfo(fmt.Sprintf("Parsed args: mode=%s, task_len=%d, backend=%s", cfg.Mode, len(cfg.Task), cfg.Backend))
backend, err := selectBackendFn(cfg.Backend)
if err != nil {
logError(err.Error())
return 1
}
cfg.Backend = backend.Name()
cmdInjected := codexCommand != defaultCodexCommand
argsInjected := buildCodexArgsFn != nil && reflect.ValueOf(buildCodexArgsFn).Pointer() != reflect.ValueOf(defaultBuildArgsFn).Pointer()
// Wire selected backend into runtime hooks for the rest of the execution,
// but preserve any injected test hooks for the default backend.
if backend.Name() != defaultBackendName || !cmdInjected {
codexCommand = backend.Command()
}
if backend.Name() != defaultBackendName || !argsInjected {
buildCodexArgsFn = backend.BuildArgs
}
logInfo(fmt.Sprintf("Selected backend: %s", backend.Name()))
timeoutSec := resolveTimeout()
logInfo(fmt.Sprintf("Timeout: %ds", timeoutSec))
cfg.Timeout = timeoutSec
var taskText string
var piped bool
if cfg.ExplicitStdin {
logInfo("Explicit stdin mode: reading task from stdin")
data, err := io.ReadAll(stdinReader)
if err != nil {
logError("Failed to read stdin: " + err.Error())
return 1
}
taskText = string(data)
if taskText == "" {
logError("Explicit stdin mode requires task input from stdin")
return 1
}
piped = !isTerminal()
} else {
pipedTask, err := readPipedTask()
if err != nil {
logError("Failed to read piped stdin: " + err.Error())
return 1
}
piped = pipedTask != ""
if piped {
taskText = pipedTask
} else {
taskText = cfg.Task
}
}
useStdin := cfg.ExplicitStdin || shouldUseStdin(taskText, piped)
targetArg := taskText
if useStdin {
targetArg = "-"
}
codexArgs := buildCodexArgsFn(cfg, targetArg)
// Print startup information to stderr
fmt.Fprintf(os.Stderr, "[%s]\n", name)
fmt.Fprintf(os.Stderr, " Backend: %s\n", cfg.Backend)
fmt.Fprintf(os.Stderr, " Command: %s %s\n", codexCommand, strings.Join(codexArgs, " "))
fmt.Fprintf(os.Stderr, " PID: %d\n", os.Getpid())
fmt.Fprintf(os.Stderr, " Log: %s\n", logger.Path())
if useStdin {
var reasons []string
if piped {
reasons = append(reasons, "piped input")
}
if cfg.ExplicitStdin {
reasons = append(reasons, "explicit \"-\"")
}
if strings.Contains(taskText, "\n") {
reasons = append(reasons, "newline")
}
if strings.Contains(taskText, "\\") {
reasons = append(reasons, "backslash")
}
if strings.Contains(taskText, "\"") {
reasons = append(reasons, "double-quote")
}
if strings.Contains(taskText, "'") {
reasons = append(reasons, "single-quote")
}
if strings.Contains(taskText, "`") {
reasons = append(reasons, "backtick")
}
if strings.Contains(taskText, "$") {
reasons = append(reasons, "dollar")
}
if len(taskText) > 800 {
reasons = append(reasons, "length>800")
}
if len(reasons) > 0 {
logWarn(fmt.Sprintf("Using stdin mode for task due to: %s", strings.Join(reasons, ", ")))
}
}
logInfo(fmt.Sprintf("%s running...", cfg.Backend))
taskSpec := TaskSpec{
Task: taskText,
WorkDir: cfg.WorkDir,
Mode: cfg.Mode,
SessionID: cfg.SessionID,
UseStdin: useStdin,
}
result := runTaskFn(taskSpec, false, cfg.Timeout)
if result.ExitCode != 0 {
return result.ExitCode
}
fmt.Println(result.Message)
if result.SessionID != "" {
fmt.Printf("\n---\nSESSION_ID: %s\n", result.SessionID)
}
return 0
}
func setLogger(l *Logger) {
loggerPtr.Store(l)
}
func closeLogger() error {
logger := loggerPtr.Swap(nil)
if logger == nil {
return nil
}
return logger.Close()
}
func activeLogger() *Logger {
return loggerPtr.Load()
}
func logInfo(msg string) {
if logger := activeLogger(); logger != nil {
logger.Info(msg)
}
}
func logWarn(msg string) {
if logger := activeLogger(); logger != nil {
logger.Warn(msg)
}
}
func logError(msg string) {
if logger := activeLogger(); logger != nil {
logger.Error(msg)
}
}
func runCleanupHook() {
if logger := activeLogger(); logger != nil {
logger.Flush()
}
if cleanupHook != nil {
cleanupHook()
}
}
func printHelp() {
name := currentWrapperName()
help := fmt.Sprintf(`%[1]s - Go wrapper for AI CLI backends
Usage:
%[1]s "task" [workdir]
%[1]s --backend claude "task" [workdir]
%[1]s - [workdir] Read task from stdin
%[1]s resume <session_id> "task" [workdir]
%[1]s resume <session_id> - [workdir]
%[1]s --parallel Run tasks in parallel (config from stdin)
%[1]s --version
%[1]s --help
Parallel mode examples:
%[1]s --parallel < tasks.txt
echo '...' | %[1]s --parallel
%[1]s --parallel <<'EOF'
Environment Variables:
CODEX_TIMEOUT Timeout in milliseconds (default: 7200000)
Exit Codes:
0 Success
1 General error (missing args, no output)
124 Timeout
127 backend command not found
130 Interrupted (Ctrl+C)
* Passthrough from backend process`, name)
fmt.Println(help)
}