Files
myclaude/codeagent-wrapper/logger.go
swe-agent[bot] e1ad08fcc1 feat(codeagent-wrapper): 完整多后端支持与安全优化
修复 PR #53 中发现的问题,实现完整的多后端功能:

**多后端功能完整性**
- Claude/Gemini 后端支持 workdir (-C) 和 resume (--session-id) 参数
- 并行模式支持全局 --backend 参数和任务级 backend 配置
- 后端参数映射统一,支持 new/resume 两种模式

**安全控制**
- Claude 后端默认启用 --dangerously-skip-permissions 以支持自动化
- 通过 CODEAGENT_SKIP_PERMISSIONS 环境变量控制权限检查
- 不同后端行为区分:Claude 默认跳过,Codex/Gemini 默认启用

**并发控制**
- 新增 CODEAGENT_MAX_PARALLEL_WORKERS 环境变量限制并发数
- 实现 fail-fast context 取消机制
- Worker pool 防止资源耗尽,支持并发监控日志

**向后兼容**
- 版本号统一管理,提供 codex-wrapper 兼容脚本
- 所有默认行为保持不变
- 支持渐进式迁移

**测试覆盖**
- 总体覆盖率 93.4%(超过 90% 要求)
- 新增后端参数、并行模式、并发控制测试用例
- 核心模块覆盖率:backend.go 100%, config.go 97.8%, executor.go 96.4%

**文档更新**
- 更新 skills/codeagent/SKILL.md 反映多后端和安全控制
- 添加 CHANGELOG.md 记录重要变更
- 更新 README 版本说明和安装脚本

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-11 16:09:33 +08:00

504 lines
12 KiB
Go

package main
import (
"bufio"
"context"
"errors"
"fmt"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
)
// Logger writes log messages asynchronously to a temp file.
// It is intentionally minimal: a buffered channel + single worker goroutine
// to avoid contention while keeping ordering guarantees.
type Logger struct {
path string
file *os.File
writer *bufio.Writer
ch chan logEntry
flushReq chan chan struct{}
done chan struct{}
closed atomic.Bool
closeOnce sync.Once
workerWG sync.WaitGroup
pendingWG sync.WaitGroup
flushMu sync.Mutex
}
type logEntry struct {
level string
msg string
}
// CleanupStats captures the outcome of a cleanupOldLogs run.
type CleanupStats struct {
Scanned int
Deleted int
Kept int
Errors int
DeletedFiles []string
KeptFiles []string
}
var (
processRunningCheck = isProcessRunning
processStartTimeFn = getProcessStartTime
removeLogFileFn = os.Remove
globLogFiles = filepath.Glob
fileStatFn = os.Lstat // Use Lstat to detect symlinks
evalSymlinksFn = filepath.EvalSymlinks
)
// NewLogger creates the async logger and starts the worker goroutine.
// The log file is created under os.TempDir() using the required naming scheme.
func NewLogger() (*Logger, error) {
return NewLoggerWithSuffix("")
}
// NewLoggerWithSuffix creates a logger with an optional suffix in the filename.
// Useful for tests that need isolated log files within the same process.
func NewLoggerWithSuffix(suffix string) (*Logger, error) {
filename := fmt.Sprintf("%s-%d", primaryLogPrefix(), os.Getpid())
if suffix != "" {
filename += "-" + suffix
}
filename += ".log"
path := filepath.Clean(filepath.Join(os.TempDir(), filename))
f, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o600)
if err != nil {
return nil, err
}
l := &Logger{
path: path,
file: f,
writer: bufio.NewWriterSize(f, 4096),
ch: make(chan logEntry, 1000),
flushReq: make(chan chan struct{}, 1),
done: make(chan struct{}),
}
l.workerWG.Add(1)
go l.run()
return l, nil
}
// Path returns the underlying log file path (useful for tests/inspection).
func (l *Logger) Path() string {
if l == nil {
return ""
}
return l.path
}
// Info logs at INFO level.
func (l *Logger) Info(msg string) { l.log("INFO", msg) }
// Warn logs at WARN level.
func (l *Logger) Warn(msg string) { l.log("WARN", msg) }
// Debug logs at DEBUG level.
func (l *Logger) Debug(msg string) { l.log("DEBUG", msg) }
// Error logs at ERROR level.
func (l *Logger) Error(msg string) { l.log("ERROR", msg) }
// Close stops the worker and syncs the log file.
// The log file is NOT removed, allowing inspection after program exit.
// It is safe to call multiple times.
// Returns after a 5-second timeout if worker doesn't stop gracefully.
func (l *Logger) Close() error {
if l == nil {
return nil
}
var closeErr error
l.closeOnce.Do(func() {
l.closed.Store(true)
close(l.done)
close(l.ch)
// Wait for worker with timeout
workerDone := make(chan struct{})
go func() {
l.workerWG.Wait()
close(workerDone)
}()
select {
case <-workerDone:
// Worker stopped gracefully
case <-time.After(5 * time.Second):
// Worker timeout - proceed with cleanup anyway
closeErr = fmt.Errorf("logger worker timeout during close")
}
if err := l.writer.Flush(); err != nil && closeErr == nil {
closeErr = err
}
if err := l.file.Sync(); err != nil && closeErr == nil {
closeErr = err
}
if err := l.file.Close(); err != nil && closeErr == nil {
closeErr = err
}
// Log file is kept for debugging - NOT removed
// Users can manually clean up /tmp/<wrapper>-*.log files
})
return closeErr
}
// RemoveLogFile removes the log file. Should only be called after Close().
func (l *Logger) RemoveLogFile() error {
if l == nil {
return nil
}
return os.Remove(l.path)
}
// Flush waits for all pending log entries to be written. Primarily for tests.
// Returns after a 5-second timeout to prevent indefinite blocking.
func (l *Logger) Flush() {
if l == nil {
return
}
l.flushMu.Lock()
defer l.flushMu.Unlock()
// Wait for pending entries with timeout
done := make(chan struct{})
go func() {
l.pendingWG.Wait()
close(done)
}()
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
select {
case <-done:
// All pending entries processed
case <-ctx.Done():
// Timeout - return without full flush
return
}
// Trigger writer flush
flushDone := make(chan struct{})
select {
case l.flushReq <- flushDone:
// Wait for flush to complete
select {
case <-flushDone:
// Flush completed
case <-time.After(1 * time.Second):
// Flush timeout
}
case <-l.done:
// Logger is closing
case <-time.After(1 * time.Second):
// Timeout sending flush request
}
}
func (l *Logger) log(level, msg string) {
if l == nil {
return
}
if l.closed.Load() {
return
}
entry := logEntry{level: level, msg: msg}
l.flushMu.Lock()
l.pendingWG.Add(1)
l.flushMu.Unlock()
select {
case l.ch <- entry:
// Successfully sent to channel
case <-l.done:
// Logger is closing, drop this entry
l.pendingWG.Done()
return
}
}
func (l *Logger) run() {
defer l.workerWG.Done()
ticker := time.NewTicker(500 * time.Millisecond)
defer ticker.Stop()
for {
select {
case entry, ok := <-l.ch:
if !ok {
// Channel closed, final flush
_ = l.writer.Flush()
return
}
timestamp := time.Now().Format("2006-01-02 15:04:05.000")
pid := os.Getpid()
fmt.Fprintf(l.writer, "[%s] [PID:%d] %s: %s\n", timestamp, pid, entry.level, entry.msg)
l.pendingWG.Done()
case <-ticker.C:
_ = l.writer.Flush()
case flushDone := <-l.flushReq:
// Explicit flush request - flush writer and sync to disk
_ = l.writer.Flush()
_ = l.file.Sync()
close(flushDone)
}
}
}
// cleanupOldLogs scans os.TempDir() for wrapper log files and removes those
// whose owning process is no longer running (i.e., orphaned logs).
// It includes safety checks for:
// - PID reuse: Compares file modification time with process start time
// - Symlink attacks: Ensures files are within TempDir and not symlinks
func cleanupOldLogs() (CleanupStats, error) {
var stats CleanupStats
tempDir := os.TempDir()
prefixes := logPrefixes()
if len(prefixes) == 0 {
prefixes = []string{defaultWrapperName}
}
seen := make(map[string]struct{})
var matches []string
for _, prefix := range prefixes {
pattern := filepath.Join(tempDir, fmt.Sprintf("%s-*.log", prefix))
found, err := globLogFiles(pattern)
if err != nil {
logWarn(fmt.Sprintf("cleanupOldLogs: failed to list logs: %v", err))
return stats, fmt.Errorf("cleanupOldLogs: %w", err)
}
for _, path := range found {
if _, ok := seen[path]; ok {
continue
}
seen[path] = struct{}{}
matches = append(matches, path)
}
}
var removeErr error
for _, path := range matches {
stats.Scanned++
filename := filepath.Base(path)
// Security check: Verify file is not a symlink and is within tempDir
if shouldSkipFile, reason := isUnsafeFile(path, tempDir); shouldSkipFile {
stats.Kept++
stats.KeptFiles = append(stats.KeptFiles, filename)
if reason != "" {
logWarn(fmt.Sprintf("cleanupOldLogs: skipping %s: %s", filename, reason))
}
continue
}
pid, ok := parsePIDFromLog(path)
if !ok {
stats.Kept++
stats.KeptFiles = append(stats.KeptFiles, filename)
continue
}
// Check if process is running
if !processRunningCheck(pid) {
// Process not running, safe to delete
if err := removeLogFileFn(path); err != nil {
if errors.Is(err, os.ErrNotExist) {
// File already deleted by another process, don't count as success
stats.Kept++
stats.KeptFiles = append(stats.KeptFiles, filename+" (already deleted)")
continue
}
stats.Errors++
logWarn(fmt.Sprintf("cleanupOldLogs: failed to remove %s: %v", filename, err))
removeErr = errors.Join(removeErr, fmt.Errorf("failed to remove %s: %w", filename, err))
continue
}
stats.Deleted++
stats.DeletedFiles = append(stats.DeletedFiles, filename)
continue
}
// Process is running, check for PID reuse
if isPIDReused(path, pid) {
// PID was reused, the log file is orphaned
if err := removeLogFileFn(path); err != nil {
if errors.Is(err, os.ErrNotExist) {
stats.Kept++
stats.KeptFiles = append(stats.KeptFiles, filename+" (already deleted)")
continue
}
stats.Errors++
logWarn(fmt.Sprintf("cleanupOldLogs: failed to remove %s (PID reused): %v", filename, err))
removeErr = errors.Join(removeErr, fmt.Errorf("failed to remove %s: %w", filename, err))
continue
}
stats.Deleted++
stats.DeletedFiles = append(stats.DeletedFiles, filename)
continue
}
// Process is running and owns this log file
stats.Kept++
stats.KeptFiles = append(stats.KeptFiles, filename)
}
if removeErr != nil {
return stats, fmt.Errorf("cleanupOldLogs: %w", removeErr)
}
return stats, nil
}
// isUnsafeFile checks if a file is unsafe to delete (symlink or outside tempDir).
// Returns (true, reason) if the file should be skipped.
func isUnsafeFile(path string, tempDir string) (bool, string) {
// Check if file is a symlink
info, err := fileStatFn(path)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return true, "" // File disappeared, skip silently
}
return true, fmt.Sprintf("stat failed: %v", err)
}
// Check if it's a symlink
if info.Mode()&os.ModeSymlink != 0 {
return true, "refusing to delete symlink"
}
// Resolve any path traversal and verify it's within tempDir
resolvedPath, err := evalSymlinksFn(path)
if err != nil {
return true, fmt.Sprintf("path resolution failed: %v", err)
}
// Get absolute path of tempDir
absTempDir, err := filepath.Abs(tempDir)
if err != nil {
return true, fmt.Sprintf("tempDir resolution failed: %v", err)
}
// Ensure resolved path is within tempDir
relPath, err := filepath.Rel(absTempDir, resolvedPath)
if err != nil || strings.HasPrefix(relPath, "..") {
return true, "file is outside tempDir"
}
return false, ""
}
// isPIDReused checks if a PID has been reused by comparing file modification time
// with process start time. Returns true if the log file was created by a different
// process that previously had the same PID.
func isPIDReused(logPath string, pid int) bool {
// Get file modification time (when log was last written)
info, err := fileStatFn(logPath)
if err != nil {
// If we can't stat the file, be conservative and keep it
return false
}
fileModTime := info.ModTime()
// Get process start time
procStartTime := processStartTimeFn(pid)
if procStartTime.IsZero() {
// Can't determine process start time
// Check if file is very old (>7 days), likely from a dead process
if time.Since(fileModTime) > 7*24*time.Hour {
return true // File is old enough to be from a different process
}
return false // Be conservative for recent files
}
// If the log file was modified before the process started, PID was reused
// Add a small buffer (1 second) to account for clock skew and file system timing
return fileModTime.Add(1 * time.Second).Before(procStartTime)
}
func parsePIDFromLog(path string) (int, bool) {
name := filepath.Base(path)
prefixes := logPrefixes()
if len(prefixes) == 0 {
prefixes = []string{defaultWrapperName}
}
for _, prefix := range prefixes {
prefixWithDash := fmt.Sprintf("%s-", prefix)
if !strings.HasPrefix(name, prefixWithDash) || !strings.HasSuffix(name, ".log") {
continue
}
core := strings.TrimSuffix(strings.TrimPrefix(name, prefixWithDash), ".log")
if core == "" {
continue
}
pidPart := core
if idx := strings.IndexRune(core, '-'); idx != -1 {
pidPart = core[:idx]
}
if pidPart == "" {
continue
}
pid, err := strconv.Atoi(pidPart)
if err != nil || pid <= 0 {
continue
}
return pid, true
}
return 0, false
}
func logConcurrencyPlanning(limit, total int) {
logger := activeLogger()
if logger == nil {
return
}
logger.Info(fmt.Sprintf("parallel: worker_limit=%s total_tasks=%d", renderWorkerLimit(limit), total))
}
func logConcurrencyState(event, taskID string, active, limit int) {
logger := activeLogger()
if logger == nil {
return
}
logger.Debug(fmt.Sprintf("parallel: %s task=%s active=%d limit=%s", event, taskID, active, renderWorkerLimit(limit)))
}
func renderWorkerLimit(limit int) string {
if limit <= 0 {
return "unbounded"
}
return strconv.Itoa(limit)
}