mirror of
https://github.com/cexll/myclaude.git
synced 2026-02-11 03:23:50 +08:00
* fix(logger): 修复多 backend 并行日志 PID 混乱并移除包装格式 **问题:** - logger.go:288 使用 os.Getpid() 导致并行任务日志 PID 混乱 - 日志文件添加时间戳/PID/级别前缀包装,应输出 backend 原始内容 **修复:** 1. Logger 结构体添加 pid 字段,创建时捕获 PID 2. 日志写入使用固定 l.pid 替代 os.Getpid() 3. 移除日志输出格式包装,直接写入原始消息 4. 添加内存缓存 ERROR/WARN 条目,ExtractRecentErrors 从缓存读取 5. 优化 executor.go context 初始化顺序,避免重复创建 logger **测试:** - 所有测试通过(23.7s) - 更新相关测试用例匹配新格式 Closes #74 * fix(logger): 增强并发日志隔离和 task ID 清理 ## 核心修复 ### 1. Task ID Sanitization (logger.go) - 新增 sanitizeLogSuffix(): 清理非法字符 (/, \, :, 等) - 新增 fallbackLogSuffix(): 为空/非法 ID 生成唯一后备名 - 新增 isSafeLogRune(): 仅允许 [A-Za-z0-9._-] - 路径穿越防护: ../../../etc/passwd → etc-passwd-{hash}.log - 超长 ID 处理: 截断到 64 字符 + hash 确保唯一性 - 自动创建 TMPDIR (MkdirAll) ### 2. 共享日志标识 (executor.go) - 新增 taskLoggerHandle 结构: 封装 logger、路径、共享标志 - 新增 newTaskLoggerHandle(): 统一处理 logger 创建和回退 - printTaskStart(): 显示 "Log (shared)" 标识 - generateFinalOutput(): 在 summary 中标记共享日志 - 并发失败时明确标识所有任务使用共享主日志 ### 3. 内部标志 (config.go) - TaskResult.sharedLog: 非导出字段,标识共享日志状态 ### 4. Race Detector 修复 (logger.go:209-219) - Close() 在关闭 channel 前先等待 pendingWG - 消除 Logger.Close() 与 Logger.log() 之间的竞态条件 ## 测试覆盖 ### 新增测试 (logger_suffix_test.go) - TestLoggerWithSuffixSanitizesUnsafeSuffix: 非法字符清理 - TestLoggerWithSuffixReturnsErrorWhenTempDirNotWritable: 只读目录处理 ### 新增测试 (executor_concurrent_test.go) - TestConcurrentTaskLoggerFailure: 多任务失败时共享日志标识 - TestSanitizeTaskID: 并发场景下 task ID 清理验证 ## 验证结果 ✅ 所有单元测试通过 ✅ Race detector 无竞态 (65.4s) ✅ 路径穿越攻击防护 ✅ 并发日志完全隔离 ✅ 边界情况正确处理 Resolves: PR #76 review feedback Co-Authored-By: Codex Review <codex@anthropic.ai> Generated with swe-agent-bot Co-Authored-By: swe-agent-bot <agent@swe-agent.ai> * fix(logger): 修复关键 bug 并优化日志系统 (v5.2.5) 修复 P0 级别问题: - sanitizeLogSuffix 的 trim 碰撞(防止多 task 日志文件名冲突) - ExtractRecentErrors 边界检查(防止 slice 越界) - Logger.Close 阻塞风险(新增可配置超时机制) 代码质量改进: - 删除无用字段 Logger.pid 和 logEntry.level - 优化 sharedLog 标记绑定到最终 LogPath - 移除日志前缀,直接输出 backend 原始内容 测试覆盖增强: - 新增 4 个测试用例(碰撞防护、边界检查、缓存上限、shared 判定) - 优化测试注释和逻辑 版本更新:5.2.4 → 5.2.5 Generated with swe-agent-bot Co-Authored-By: swe-agent-bot <agent@swe-agent.ai> --------- Co-authored-by: swe-agent-bot <agent@swe-agent.ai>
This commit is contained in:
@@ -5,6 +5,7 @@ import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"hash/crc32"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
@@ -18,22 +19,25 @@ import (
|
||||
// It is intentionally minimal: a buffered channel + single worker goroutine
|
||||
// to avoid contention while keeping ordering guarantees.
|
||||
type Logger struct {
|
||||
path string
|
||||
file *os.File
|
||||
writer *bufio.Writer
|
||||
ch chan logEntry
|
||||
flushReq chan chan struct{}
|
||||
done chan struct{}
|
||||
closed atomic.Bool
|
||||
closeOnce sync.Once
|
||||
workerWG sync.WaitGroup
|
||||
pendingWG sync.WaitGroup
|
||||
flushMu sync.Mutex
|
||||
path string
|
||||
file *os.File
|
||||
writer *bufio.Writer
|
||||
ch chan logEntry
|
||||
flushReq chan chan struct{}
|
||||
done chan struct{}
|
||||
closed atomic.Bool
|
||||
closeOnce sync.Once
|
||||
workerWG sync.WaitGroup
|
||||
pendingWG sync.WaitGroup
|
||||
flushMu sync.Mutex
|
||||
workerErr error
|
||||
errorEntries []string // Cache of recent ERROR/WARN entries
|
||||
errorMu sync.Mutex
|
||||
}
|
||||
|
||||
type logEntry struct {
|
||||
level string
|
||||
msg string
|
||||
msg string
|
||||
isError bool // true for ERROR or WARN levels
|
||||
}
|
||||
|
||||
// CleanupStats captures the outcome of a cleanupOldLogs run.
|
||||
@@ -55,6 +59,10 @@ var (
|
||||
evalSymlinksFn = filepath.EvalSymlinks
|
||||
)
|
||||
|
||||
const maxLogSuffixLen = 64
|
||||
|
||||
var logSuffixCounter atomic.Uint64
|
||||
|
||||
// NewLogger creates the async logger and starts the worker goroutine.
|
||||
// The log file is created under os.TempDir() using the required naming scheme.
|
||||
func NewLogger() (*Logger, error) {
|
||||
@@ -64,14 +72,23 @@ func NewLogger() (*Logger, error) {
|
||||
// NewLoggerWithSuffix creates a logger with an optional suffix in the filename.
|
||||
// Useful for tests that need isolated log files within the same process.
|
||||
func NewLoggerWithSuffix(suffix string) (*Logger, error) {
|
||||
filename := fmt.Sprintf("%s-%d", primaryLogPrefix(), os.Getpid())
|
||||
pid := os.Getpid()
|
||||
filename := fmt.Sprintf("%s-%d", primaryLogPrefix(), pid)
|
||||
var safeSuffix string
|
||||
if suffix != "" {
|
||||
filename += "-" + suffix
|
||||
safeSuffix = sanitizeLogSuffix(suffix)
|
||||
}
|
||||
if safeSuffix != "" {
|
||||
filename += "-" + safeSuffix
|
||||
}
|
||||
filename += ".log"
|
||||
|
||||
path := filepath.Clean(filepath.Join(os.TempDir(), filename))
|
||||
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0o700); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
f, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o600)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -92,6 +109,73 @@ func NewLoggerWithSuffix(suffix string) (*Logger, error) {
|
||||
return l, nil
|
||||
}
|
||||
|
||||
func sanitizeLogSuffix(raw string) string {
|
||||
trimmed := strings.TrimSpace(raw)
|
||||
if trimmed == "" {
|
||||
return fallbackLogSuffix()
|
||||
}
|
||||
|
||||
var b strings.Builder
|
||||
changed := false
|
||||
for _, r := range trimmed {
|
||||
if isSafeLogRune(r) {
|
||||
b.WriteRune(r)
|
||||
} else {
|
||||
changed = true
|
||||
b.WriteByte('-')
|
||||
}
|
||||
if b.Len() >= maxLogSuffixLen {
|
||||
changed = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
sanitized := strings.Trim(b.String(), "-.")
|
||||
if sanitized != b.String() {
|
||||
changed = true // Mark if trim removed any characters
|
||||
}
|
||||
if sanitized == "" {
|
||||
return fallbackLogSuffix()
|
||||
}
|
||||
|
||||
if changed || len(sanitized) > maxLogSuffixLen {
|
||||
hash := crc32.ChecksumIEEE([]byte(trimmed))
|
||||
hashStr := fmt.Sprintf("%x", hash)
|
||||
|
||||
maxPrefix := maxLogSuffixLen - len(hashStr) - 1
|
||||
if maxPrefix < 1 {
|
||||
maxPrefix = 1
|
||||
}
|
||||
if len(sanitized) > maxPrefix {
|
||||
sanitized = sanitized[:maxPrefix]
|
||||
}
|
||||
|
||||
sanitized = fmt.Sprintf("%s-%s", sanitized, hashStr)
|
||||
}
|
||||
|
||||
return sanitized
|
||||
}
|
||||
|
||||
func fallbackLogSuffix() string {
|
||||
next := logSuffixCounter.Add(1)
|
||||
return fmt.Sprintf("task-%d", next)
|
||||
}
|
||||
|
||||
func isSafeLogRune(r rune) bool {
|
||||
switch {
|
||||
case r >= 'a' && r <= 'z':
|
||||
return true
|
||||
case r >= 'A' && r <= 'Z':
|
||||
return true
|
||||
case r >= '0' && r <= '9':
|
||||
return true
|
||||
case r == '-', r == '_', r == '.':
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Path returns the underlying log file path (useful for tests/inspection).
|
||||
func (l *Logger) Path() string {
|
||||
if l == nil {
|
||||
@@ -112,10 +196,11 @@ func (l *Logger) Debug(msg string) { l.log("DEBUG", msg) }
|
||||
// Error logs at ERROR level.
|
||||
func (l *Logger) Error(msg string) { l.log("ERROR", msg) }
|
||||
|
||||
// Close stops the worker and syncs the log file.
|
||||
// Close signals the worker to flush and close the log file.
|
||||
// The log file is NOT removed, allowing inspection after program exit.
|
||||
// It is safe to call multiple times.
|
||||
// Returns after a 5-second timeout if worker doesn't stop gracefully.
|
||||
// Waits up to CODEAGENT_LOGGER_CLOSE_TIMEOUT_MS (default: 5000) for shutdown; set to 0 to wait indefinitely.
|
||||
// Returns an error if shutdown doesn't complete within the timeout.
|
||||
func (l *Logger) Close() error {
|
||||
if l == nil {
|
||||
return nil
|
||||
@@ -126,42 +211,51 @@ func (l *Logger) Close() error {
|
||||
l.closeOnce.Do(func() {
|
||||
l.closed.Store(true)
|
||||
close(l.done)
|
||||
close(l.ch)
|
||||
|
||||
// Wait for worker with timeout
|
||||
timeout := loggerCloseTimeout()
|
||||
workerDone := make(chan struct{})
|
||||
go func() {
|
||||
l.workerWG.Wait()
|
||||
close(workerDone)
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-workerDone:
|
||||
// Worker stopped gracefully
|
||||
case <-time.After(5 * time.Second):
|
||||
// Worker timeout - proceed with cleanup anyway
|
||||
closeErr = fmt.Errorf("logger worker timeout during close")
|
||||
if timeout > 0 {
|
||||
select {
|
||||
case <-workerDone:
|
||||
// Worker stopped gracefully
|
||||
case <-time.After(timeout):
|
||||
closeErr = fmt.Errorf("logger worker timeout during close")
|
||||
return
|
||||
}
|
||||
} else {
|
||||
<-workerDone
|
||||
}
|
||||
|
||||
if err := l.writer.Flush(); err != nil && closeErr == nil {
|
||||
closeErr = err
|
||||
if l.workerErr != nil && closeErr == nil {
|
||||
closeErr = l.workerErr
|
||||
}
|
||||
|
||||
if err := l.file.Sync(); err != nil && closeErr == nil {
|
||||
closeErr = err
|
||||
}
|
||||
|
||||
if err := l.file.Close(); err != nil && closeErr == nil {
|
||||
closeErr = err
|
||||
}
|
||||
|
||||
// Log file is kept for debugging - NOT removed
|
||||
// Users can manually clean up /tmp/<wrapper>-*.log files
|
||||
})
|
||||
|
||||
return closeErr
|
||||
}
|
||||
|
||||
func loggerCloseTimeout() time.Duration {
|
||||
const defaultTimeout = 5 * time.Second
|
||||
|
||||
raw := strings.TrimSpace(os.Getenv("CODEAGENT_LOGGER_CLOSE_TIMEOUT_MS"))
|
||||
if raw == "" {
|
||||
return defaultTimeout
|
||||
}
|
||||
ms, err := strconv.Atoi(raw)
|
||||
if err != nil {
|
||||
return defaultTimeout
|
||||
}
|
||||
if ms <= 0 {
|
||||
return 0
|
||||
}
|
||||
return time.Duration(ms) * time.Millisecond
|
||||
}
|
||||
|
||||
// RemoveLogFile removes the log file. Should only be called after Close().
|
||||
func (l *Logger) RemoveLogFile() error {
|
||||
if l == nil {
|
||||
@@ -170,34 +264,29 @@ func (l *Logger) RemoveLogFile() error {
|
||||
return os.Remove(l.path)
|
||||
}
|
||||
|
||||
// ExtractRecentErrors reads the log file and returns the most recent ERROR and WARN entries.
|
||||
// ExtractRecentErrors returns the most recent ERROR and WARN entries from memory cache.
|
||||
// Returns up to maxEntries entries in chronological order.
|
||||
func (l *Logger) ExtractRecentErrors(maxEntries int) []string {
|
||||
if l == nil || l.path == "" {
|
||||
if l == nil || maxEntries <= 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
f, err := os.Open(l.path)
|
||||
if err != nil {
|
||||
l.errorMu.Lock()
|
||||
defer l.errorMu.Unlock()
|
||||
|
||||
if len(l.errorEntries) == 0 {
|
||||
return nil
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var entries []string
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if strings.Contains(line, "] ERROR:") || strings.Contains(line, "] WARN:") {
|
||||
entries = append(entries, line)
|
||||
}
|
||||
// Return last N entries
|
||||
start := 0
|
||||
if len(l.errorEntries) > maxEntries {
|
||||
start = len(l.errorEntries) - maxEntries
|
||||
}
|
||||
|
||||
// Keep only the last maxEntries
|
||||
if len(entries) > maxEntries {
|
||||
entries = entries[len(entries)-maxEntries:]
|
||||
}
|
||||
|
||||
return entries
|
||||
result := make([]string, len(l.errorEntries)-start)
|
||||
copy(result, l.errorEntries[start:])
|
||||
return result
|
||||
}
|
||||
|
||||
// Flush waits for all pending log entries to be written. Primarily for tests.
|
||||
@@ -254,7 +343,8 @@ func (l *Logger) log(level, msg string) {
|
||||
return
|
||||
}
|
||||
|
||||
entry := logEntry{level: level, msg: msg}
|
||||
isError := level == "WARN" || level == "ERROR"
|
||||
entry := logEntry{msg: msg, isError: isError}
|
||||
l.flushMu.Lock()
|
||||
l.pendingWG.Add(1)
|
||||
l.flushMu.Unlock()
|
||||
@@ -275,18 +365,42 @@ func (l *Logger) run() {
|
||||
ticker := time.NewTicker(500 * time.Millisecond)
|
||||
defer ticker.Stop()
|
||||
|
||||
writeEntry := func(entry logEntry) {
|
||||
fmt.Fprintf(l.writer, "%s\n", entry.msg)
|
||||
|
||||
// Cache error/warn entries in memory for fast extraction
|
||||
if entry.isError {
|
||||
l.errorMu.Lock()
|
||||
l.errorEntries = append(l.errorEntries, entry.msg)
|
||||
if len(l.errorEntries) > 100 { // Keep last 100
|
||||
l.errorEntries = l.errorEntries[1:]
|
||||
}
|
||||
l.errorMu.Unlock()
|
||||
}
|
||||
|
||||
l.pendingWG.Done()
|
||||
}
|
||||
|
||||
finalize := func() {
|
||||
if err := l.writer.Flush(); err != nil && l.workerErr == nil {
|
||||
l.workerErr = err
|
||||
}
|
||||
if err := l.file.Sync(); err != nil && l.workerErr == nil {
|
||||
l.workerErr = err
|
||||
}
|
||||
if err := l.file.Close(); err != nil && l.workerErr == nil {
|
||||
l.workerErr = err
|
||||
}
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case entry, ok := <-l.ch:
|
||||
if !ok {
|
||||
// Channel closed, final flush
|
||||
_ = l.writer.Flush()
|
||||
finalize()
|
||||
return
|
||||
}
|
||||
timestamp := time.Now().Format("2006-01-02 15:04:05.000")
|
||||
pid := os.Getpid()
|
||||
fmt.Fprintf(l.writer, "[%s] [PID:%d] %s: %s\n", timestamp, pid, entry.level, entry.msg)
|
||||
l.pendingWG.Done()
|
||||
writeEntry(entry)
|
||||
|
||||
case <-ticker.C:
|
||||
_ = l.writer.Flush()
|
||||
@@ -296,6 +410,21 @@ func (l *Logger) run() {
|
||||
_ = l.writer.Flush()
|
||||
_ = l.file.Sync()
|
||||
close(flushDone)
|
||||
|
||||
case <-l.done:
|
||||
for {
|
||||
select {
|
||||
case entry, ok := <-l.ch:
|
||||
if !ok {
|
||||
finalize()
|
||||
return
|
||||
}
|
||||
writeEntry(entry)
|
||||
default:
|
||||
finalize()
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user