mirror of
https://github.com/cexll/myclaude.git
synced 2026-02-05 02:30:26 +08:00
* fix(logger): 修复多 backend 并行日志 PID 混乱并移除包装格式 **问题:** - logger.go:288 使用 os.Getpid() 导致并行任务日志 PID 混乱 - 日志文件添加时间戳/PID/级别前缀包装,应输出 backend 原始内容 **修复:** 1. Logger 结构体添加 pid 字段,创建时捕获 PID 2. 日志写入使用固定 l.pid 替代 os.Getpid() 3. 移除日志输出格式包装,直接写入原始消息 4. 添加内存缓存 ERROR/WARN 条目,ExtractRecentErrors 从缓存读取 5. 优化 executor.go context 初始化顺序,避免重复创建 logger **测试:** - 所有测试通过(23.7s) - 更新相关测试用例匹配新格式 Closes #74 * fix(logger): 增强并发日志隔离和 task ID 清理 ## 核心修复 ### 1. Task ID Sanitization (logger.go) - 新增 sanitizeLogSuffix(): 清理非法字符 (/, \, :, 等) - 新增 fallbackLogSuffix(): 为空/非法 ID 生成唯一后备名 - 新增 isSafeLogRune(): 仅允许 [A-Za-z0-9._-] - 路径穿越防护: ../../../etc/passwd → etc-passwd-{hash}.log - 超长 ID 处理: 截断到 64 字符 + hash 确保唯一性 - 自动创建 TMPDIR (MkdirAll) ### 2. 共享日志标识 (executor.go) - 新增 taskLoggerHandle 结构: 封装 logger、路径、共享标志 - 新增 newTaskLoggerHandle(): 统一处理 logger 创建和回退 - printTaskStart(): 显示 "Log (shared)" 标识 - generateFinalOutput(): 在 summary 中标记共享日志 - 并发失败时明确标识所有任务使用共享主日志 ### 3. 内部标志 (config.go) - TaskResult.sharedLog: 非导出字段,标识共享日志状态 ### 4. Race Detector 修复 (logger.go:209-219) - Close() 在关闭 channel 前先等待 pendingWG - 消除 Logger.Close() 与 Logger.log() 之间的竞态条件 ## 测试覆盖 ### 新增测试 (logger_suffix_test.go) - TestLoggerWithSuffixSanitizesUnsafeSuffix: 非法字符清理 - TestLoggerWithSuffixReturnsErrorWhenTempDirNotWritable: 只读目录处理 ### 新增测试 (executor_concurrent_test.go) - TestConcurrentTaskLoggerFailure: 多任务失败时共享日志标识 - TestSanitizeTaskID: 并发场景下 task ID 清理验证 ## 验证结果 ✅ 所有单元测试通过 ✅ Race detector 无竞态 (65.4s) ✅ 路径穿越攻击防护 ✅ 并发日志完全隔离 ✅ 边界情况正确处理 Resolves: PR #76 review feedback Co-Authored-By: Codex Review <codex@anthropic.ai> Generated with swe-agent-bot Co-Authored-By: swe-agent-bot <agent@swe-agent.ai> * fix(logger): 修复关键 bug 并优化日志系统 (v5.2.5) 修复 P0 级别问题: - sanitizeLogSuffix 的 trim 碰撞(防止多 task 日志文件名冲突) - ExtractRecentErrors 边界检查(防止 slice 越界) - Logger.Close 阻塞风险(新增可配置超时机制) 代码质量改进: - 删除无用字段 Logger.pid 和 logEntry.level - 优化 sharedLog 标记绑定到最终 LogPath - 移除日志前缀,直接输出 backend 原始内容 测试覆盖增强: - 新增 4 个测试用例(碰撞防护、边界检查、缓存上限、shared 判定) - 优化测试注释和逻辑 版本更新:5.2.4 → 5.2.5 Generated with swe-agent-bot Co-Authored-By: swe-agent-bot <agent@swe-agent.ai> --------- Co-authored-by: swe-agent-bot <agent@swe-agent.ai>
424 lines
11 KiB
Go
424 lines
11 KiB
Go
package main
|
|
|
|
import (
|
|
"bufio"
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"regexp"
|
|
"strings"
|
|
"sync"
|
|
"sync/atomic"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
// TestConcurrentStressLogger 高并发压力测试
|
|
func TestConcurrentStressLogger(t *testing.T) {
|
|
if testing.Short() {
|
|
t.Skip("skipping stress test in short mode")
|
|
}
|
|
|
|
logger, err := NewLoggerWithSuffix("stress")
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
defer logger.Close()
|
|
|
|
t.Logf("Log file: %s", logger.Path())
|
|
|
|
const (
|
|
numGoroutines = 100 // 并发协程数
|
|
logsPerRoutine = 1000 // 每个协程写入日志数
|
|
totalExpected = numGoroutines * logsPerRoutine
|
|
)
|
|
|
|
var wg sync.WaitGroup
|
|
start := time.Now()
|
|
|
|
// 启动并发写入
|
|
for i := 0; i < numGoroutines; i++ {
|
|
wg.Add(1)
|
|
go func(id int) {
|
|
defer wg.Done()
|
|
for j := 0; j < logsPerRoutine; j++ {
|
|
logger.Info(fmt.Sprintf("goroutine-%d-msg-%d", id, j))
|
|
}
|
|
}(i)
|
|
}
|
|
|
|
wg.Wait()
|
|
logger.Flush()
|
|
elapsed := time.Since(start)
|
|
|
|
// 读取日志文件验证
|
|
data, err := os.ReadFile(logger.Path())
|
|
if err != nil {
|
|
t.Fatalf("failed to read log file: %v", err)
|
|
}
|
|
|
|
lines := strings.Split(strings.TrimSpace(string(data)), "\n")
|
|
actualCount := len(lines)
|
|
|
|
t.Logf("Concurrent stress test results:")
|
|
t.Logf(" Goroutines: %d", numGoroutines)
|
|
t.Logf(" Logs per goroutine: %d", logsPerRoutine)
|
|
t.Logf(" Total expected: %d", totalExpected)
|
|
t.Logf(" Total actual: %d", actualCount)
|
|
t.Logf(" Duration: %v", elapsed)
|
|
t.Logf(" Throughput: %.2f logs/sec", float64(totalExpected)/elapsed.Seconds())
|
|
|
|
// 验证日志数量
|
|
if actualCount < totalExpected/10 {
|
|
t.Errorf("too many logs lost: got %d, want at least %d (10%% of %d)",
|
|
actualCount, totalExpected/10, totalExpected)
|
|
}
|
|
t.Logf("Successfully wrote %d/%d logs (%.1f%%)",
|
|
actualCount, totalExpected, float64(actualCount)/float64(totalExpected)*100)
|
|
|
|
// 验证日志格式(纯文本,无前缀)
|
|
formatRE := regexp.MustCompile(`^goroutine-\d+-msg-\d+$`)
|
|
for i, line := range lines[:min(10, len(lines))] {
|
|
if !formatRE.MatchString(line) {
|
|
t.Errorf("line %d has invalid format: %s", i, line)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestConcurrentBurstLogger 突发流量测试
|
|
func TestConcurrentBurstLogger(t *testing.T) {
|
|
if testing.Short() {
|
|
t.Skip("skipping burst test in short mode")
|
|
}
|
|
|
|
logger, err := NewLoggerWithSuffix("burst")
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
defer logger.Close()
|
|
|
|
t.Logf("Log file: %s", logger.Path())
|
|
|
|
const (
|
|
numBursts = 10
|
|
goroutinesPerBurst = 50
|
|
logsPerGoroutine = 100
|
|
)
|
|
|
|
totalLogs := 0
|
|
start := time.Now()
|
|
|
|
// 模拟突发流量
|
|
for burst := 0; burst < numBursts; burst++ {
|
|
var wg sync.WaitGroup
|
|
for i := 0; i < goroutinesPerBurst; i++ {
|
|
wg.Add(1)
|
|
totalLogs += logsPerGoroutine
|
|
go func(b, g int) {
|
|
defer wg.Done()
|
|
for j := 0; j < logsPerGoroutine; j++ {
|
|
logger.Info(fmt.Sprintf("burst-%d-goroutine-%d-msg-%d", b, g, j))
|
|
}
|
|
}(burst, i)
|
|
}
|
|
wg.Wait()
|
|
time.Sleep(10 * time.Millisecond) // 突发间隔
|
|
}
|
|
|
|
logger.Flush()
|
|
elapsed := time.Since(start)
|
|
|
|
// 验证
|
|
data, err := os.ReadFile(logger.Path())
|
|
if err != nil {
|
|
t.Fatalf("failed to read log file: %v", err)
|
|
}
|
|
|
|
lines := strings.Split(strings.TrimSpace(string(data)), "\n")
|
|
actualCount := len(lines)
|
|
|
|
t.Logf("Burst test results:")
|
|
t.Logf(" Total bursts: %d", numBursts)
|
|
t.Logf(" Goroutines per burst: %d", goroutinesPerBurst)
|
|
t.Logf(" Expected logs: %d", totalLogs)
|
|
t.Logf(" Actual logs: %d", actualCount)
|
|
t.Logf(" Duration: %v", elapsed)
|
|
t.Logf(" Throughput: %.2f logs/sec", float64(totalLogs)/elapsed.Seconds())
|
|
|
|
if actualCount < totalLogs/10 {
|
|
t.Errorf("too many logs lost: got %d, want at least %d (10%% of %d)", actualCount, totalLogs/10, totalLogs)
|
|
}
|
|
t.Logf("Successfully wrote %d/%d logs (%.1f%%)",
|
|
actualCount, totalLogs, float64(actualCount)/float64(totalLogs)*100)
|
|
}
|
|
|
|
// TestLoggerChannelCapacity 测试 channel 容量极限
|
|
func TestLoggerChannelCapacity(t *testing.T) {
|
|
logger, err := NewLoggerWithSuffix("capacity")
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
defer logger.Close()
|
|
|
|
const rapidLogs = 2000 // 超过 channel 容量 (1000)
|
|
|
|
start := time.Now()
|
|
for i := 0; i < rapidLogs; i++ {
|
|
logger.Info(fmt.Sprintf("rapid-log-%d", i))
|
|
}
|
|
sendDuration := time.Since(start)
|
|
|
|
logger.Flush()
|
|
flushDuration := time.Since(start) - sendDuration
|
|
|
|
t.Logf("Channel capacity test:")
|
|
t.Logf(" Logs sent: %d", rapidLogs)
|
|
t.Logf(" Send duration: %v", sendDuration)
|
|
t.Logf(" Flush duration: %v", flushDuration)
|
|
|
|
// 验证仍有合理比例的日志写入(非阻塞模式允许部分丢失)
|
|
data, err := os.ReadFile(logger.Path())
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
lines := strings.Split(strings.TrimSpace(string(data)), "\n")
|
|
actualCount := len(lines)
|
|
|
|
if actualCount < rapidLogs/10 {
|
|
t.Errorf("too many logs lost: got %d, want at least %d (10%% of %d)", actualCount, rapidLogs/10, rapidLogs)
|
|
}
|
|
t.Logf("Logs persisted: %d/%d (%.1f%%)", actualCount, rapidLogs, float64(actualCount)/float64(rapidLogs)*100)
|
|
}
|
|
|
|
// TestLoggerMemoryUsage 内存使用测试
|
|
func TestLoggerMemoryUsage(t *testing.T) {
|
|
logger, err := NewLoggerWithSuffix("memory")
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
defer logger.Close()
|
|
|
|
const numLogs = 20000
|
|
longMessage := strings.Repeat("x", 500) // 500 字节长消息
|
|
|
|
start := time.Now()
|
|
for i := 0; i < numLogs; i++ {
|
|
logger.Info(fmt.Sprintf("log-%d-%s", i, longMessage))
|
|
}
|
|
logger.Flush()
|
|
elapsed := time.Since(start)
|
|
|
|
// 检查文件大小
|
|
info, err := os.Stat(logger.Path())
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
expectedTotalSize := int64(numLogs * 500) // 理论最小总字节数
|
|
expectedMinSize := expectedTotalSize / 10 // 接受最多 90% 丢失
|
|
actualSize := info.Size()
|
|
|
|
t.Logf("Memory/disk usage test:")
|
|
t.Logf(" Logs written: %d", numLogs)
|
|
t.Logf(" Message size: 500 bytes")
|
|
t.Logf(" File size: %.2f MB", float64(actualSize)/1024/1024)
|
|
t.Logf(" Duration: %v", elapsed)
|
|
t.Logf(" Write speed: %.2f MB/s", float64(actualSize)/1024/1024/elapsed.Seconds())
|
|
t.Logf(" Persistence ratio: %.1f%%", float64(actualSize)/float64(expectedTotalSize)*100)
|
|
|
|
if actualSize < expectedMinSize {
|
|
t.Errorf("file size too small: got %d bytes, expected at least %d", actualSize, expectedMinSize)
|
|
}
|
|
}
|
|
|
|
// TestLoggerFlushTimeout 测试 Flush 超时机制
|
|
func TestLoggerFlushTimeout(t *testing.T) {
|
|
logger, err := NewLoggerWithSuffix("flush")
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
defer logger.Close()
|
|
|
|
// 写入一些日志
|
|
for i := 0; i < 100; i++ {
|
|
logger.Info(fmt.Sprintf("test-log-%d", i))
|
|
}
|
|
|
|
// 测试 Flush 应该在合理时间内完成
|
|
start := time.Now()
|
|
logger.Flush()
|
|
duration := time.Since(start)
|
|
|
|
t.Logf("Flush duration: %v", duration)
|
|
|
|
if duration > 6*time.Second {
|
|
t.Errorf("Flush took too long: %v (expected < 6s)", duration)
|
|
}
|
|
}
|
|
|
|
// TestLoggerOrderPreservation 测试日志顺序保持
|
|
func TestLoggerOrderPreservation(t *testing.T) {
|
|
logger, err := NewLoggerWithSuffix("order")
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
defer logger.Close()
|
|
|
|
const numGoroutines = 10
|
|
const logsPerRoutine = 100
|
|
|
|
var wg sync.WaitGroup
|
|
for i := 0; i < numGoroutines; i++ {
|
|
wg.Add(1)
|
|
go func(id int) {
|
|
defer wg.Done()
|
|
for j := 0; j < logsPerRoutine; j++ {
|
|
logger.Info(fmt.Sprintf("G%d-SEQ%04d", id, j))
|
|
}
|
|
}(i)
|
|
}
|
|
|
|
wg.Wait()
|
|
logger.Flush()
|
|
|
|
// 读取并验证每个 goroutine 的日志顺序
|
|
data, err := os.ReadFile(logger.Path())
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
scanner := bufio.NewScanner(strings.NewReader(string(data)))
|
|
sequences := make(map[int][]int) // goroutine ID -> sequence numbers
|
|
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
var gid, seq int
|
|
// Parse format: G0-SEQ0001 (without INFO: prefix)
|
|
_, err := fmt.Sscanf(line, "G%d-SEQ%04d", &gid, &seq)
|
|
if err != nil {
|
|
t.Errorf("invalid log format: %s (error: %v)", line, err)
|
|
continue
|
|
}
|
|
sequences[gid] = append(sequences[gid], seq)
|
|
}
|
|
|
|
// 验证每个 goroutine 内部顺序
|
|
for gid, seqs := range sequences {
|
|
for i := 0; i < len(seqs)-1; i++ {
|
|
if seqs[i] >= seqs[i+1] {
|
|
t.Errorf("Goroutine %d: out of order at index %d: %d >= %d",
|
|
gid, i, seqs[i], seqs[i+1])
|
|
}
|
|
}
|
|
if len(seqs) != logsPerRoutine {
|
|
t.Errorf("Goroutine %d: missing logs, got %d, want %d",
|
|
gid, len(seqs), logsPerRoutine)
|
|
}
|
|
}
|
|
|
|
t.Logf("Order preservation test: all %d goroutines maintained sequence order", len(sequences))
|
|
}
|
|
|
|
func TestConcurrentWorkerPoolLimit(t *testing.T) {
|
|
orig := runCodexTaskFn
|
|
defer func() { runCodexTaskFn = orig }()
|
|
|
|
logger, err := NewLoggerWithSuffix("pool-limit")
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
setLogger(logger)
|
|
t.Cleanup(func() {
|
|
_ = closeLogger()
|
|
_ = logger.RemoveLogFile()
|
|
})
|
|
|
|
var active int64
|
|
var maxSeen int64
|
|
runCodexTaskFn = func(task TaskSpec, timeout int) TaskResult {
|
|
if task.Context == nil {
|
|
t.Fatalf("context not propagated for task %s", task.ID)
|
|
}
|
|
cur := atomic.AddInt64(&active, 1)
|
|
for {
|
|
prev := atomic.LoadInt64(&maxSeen)
|
|
if cur <= prev || atomic.CompareAndSwapInt64(&maxSeen, prev, cur) {
|
|
break
|
|
}
|
|
}
|
|
select {
|
|
case <-task.Context.Done():
|
|
atomic.AddInt64(&active, -1)
|
|
return TaskResult{TaskID: task.ID, ExitCode: 130, Error: "context cancelled"}
|
|
case <-time.After(30 * time.Millisecond):
|
|
}
|
|
atomic.AddInt64(&active, -1)
|
|
return TaskResult{TaskID: task.ID}
|
|
}
|
|
|
|
layers := [][]TaskSpec{{{ID: "t1"}, {ID: "t2"}, {ID: "t3"}, {ID: "t4"}, {ID: "t5"}}}
|
|
results := executeConcurrentWithContext(context.Background(), layers, 5, 2)
|
|
|
|
if len(results) != 5 {
|
|
t.Fatalf("unexpected result count: got %d", len(results))
|
|
}
|
|
if maxSeen > 2 {
|
|
t.Fatalf("worker pool exceeded limit: saw %d active workers", maxSeen)
|
|
}
|
|
|
|
logger.Flush()
|
|
data, err := os.ReadFile(logger.Path())
|
|
if err != nil {
|
|
t.Fatalf("failed to read log file: %v", err)
|
|
}
|
|
content := string(data)
|
|
if !strings.Contains(content, "worker_limit=2") {
|
|
t.Fatalf("concurrency planning log missing, content: %s", content)
|
|
}
|
|
if !strings.Contains(content, "parallel: start") {
|
|
t.Fatalf("concurrency start logs missing, content: %s", content)
|
|
}
|
|
}
|
|
|
|
func TestConcurrentCancellationPropagation(t *testing.T) {
|
|
orig := runCodexTaskFn
|
|
defer func() { runCodexTaskFn = orig }()
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
runCodexTaskFn = func(task TaskSpec, timeout int) TaskResult {
|
|
if task.Context == nil {
|
|
t.Fatalf("context not propagated for task %s", task.ID)
|
|
}
|
|
select {
|
|
case <-task.Context.Done():
|
|
return TaskResult{TaskID: task.ID, ExitCode: 130, Error: "context cancelled"}
|
|
case <-time.After(200 * time.Millisecond):
|
|
return TaskResult{TaskID: task.ID}
|
|
}
|
|
}
|
|
|
|
layers := [][]TaskSpec{{{ID: "a"}, {ID: "b"}, {ID: "c"}}}
|
|
go func() {
|
|
time.Sleep(50 * time.Millisecond)
|
|
cancel()
|
|
}()
|
|
|
|
results := executeConcurrentWithContext(ctx, layers, 1, 2)
|
|
if len(results) != 3 {
|
|
t.Fatalf("unexpected result count: got %d", len(results))
|
|
}
|
|
|
|
cancelled := 0
|
|
for _, res := range results {
|
|
if res.ExitCode != 0 {
|
|
cancelled++
|
|
}
|
|
}
|
|
|
|
if cancelled == 0 {
|
|
t.Fatalf("expected cancellation to propagate, got results: %+v", results)
|
|
}
|
|
}
|