mirror of
https://github.com/cexll/myclaude.git
synced 2026-02-05 02:30:26 +08:00
fix(parser): 修复 bufio.Scanner token too long 错误 (#64)
## 问题 - 执行 rg 等命令时,如果匹配到 minified 文件,单行输出可能超过 10MB - 旧实现使用 bufio.Scanner,遇到超长行会报错并中止整个解析 - 导致后续的 agent_message 无法读取,任务失败 ## 修复 1. **parser.go**: - 移除 bufio.Scanner,改用 bufio.Reader + readLineWithLimit - 超长行(>10MB)会被跳过但继续处理后续事件 - 添加 codexHeader 轻量级解析,只在 agent_message 时完整解析 2. **utils.go**: - 修复 logWriter 内存膨胀问题 - 添加 writeLimited 方法限制缓冲区大小 3. **测试**: - parser_token_too_long_test.go: 验证超长行处理 - log_writer_limit_test.go: 验证日志缓冲限制 ## 测试结果 - ✅ TestParseJSONStream_SkipsOverlongLineAndContinues - ✅ TestLogWriterWriteLimitsBuffer - ✅ 完整测试套件通过 Fixes #64 Generated with swe-agent-bot Co-Authored-By: swe-agent-bot <agent@swe-agent.ai>
This commit is contained in:
@@ -53,9 +53,22 @@ func parseJSONStreamWithLog(r io.Reader, warnFn func(string), infoFn func(string
|
||||
return parseJSONStreamInternal(r, warnFn, infoFn, nil)
|
||||
}
|
||||
|
||||
const (
|
||||
jsonLineReaderSize = 64 * 1024
|
||||
jsonLineMaxBytes = 10 * 1024 * 1024
|
||||
jsonLinePreviewBytes = 256
|
||||
)
|
||||
|
||||
type codexHeader struct {
|
||||
Type string `json:"type"`
|
||||
ThreadID string `json:"thread_id,omitempty"`
|
||||
Item *struct {
|
||||
Type string `json:"type"`
|
||||
} `json:"item,omitempty"`
|
||||
}
|
||||
|
||||
func parseJSONStreamInternal(r io.Reader, warnFn func(string), infoFn func(string), onMessage func()) (message, threadID string) {
|
||||
scanner := bufio.NewScanner(r)
|
||||
scanner.Buffer(make([]byte, 64*1024), 10*1024*1024)
|
||||
reader := bufio.NewReaderSize(r, jsonLineReaderSize)
|
||||
|
||||
if warnFn == nil {
|
||||
warnFn = func(string) {}
|
||||
@@ -78,79 +91,89 @@ func parseJSONStreamInternal(r io.Reader, warnFn func(string), infoFn func(strin
|
||||
geminiBuffer strings.Builder
|
||||
)
|
||||
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if line == "" {
|
||||
for {
|
||||
line, tooLong, err := readLineWithLimit(reader, jsonLineMaxBytes, jsonLinePreviewBytes)
|
||||
if err != nil {
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
}
|
||||
warnFn("Read stdout error: " + err.Error())
|
||||
break
|
||||
}
|
||||
|
||||
line = bytes.TrimSpace(line)
|
||||
if len(line) == 0 {
|
||||
continue
|
||||
}
|
||||
totalEvents++
|
||||
|
||||
var raw map[string]json.RawMessage
|
||||
if err := json.Unmarshal([]byte(line), &raw); err != nil {
|
||||
warnFn(fmt.Sprintf("Failed to parse line: %s", truncate(line, 100)))
|
||||
if tooLong {
|
||||
warnFn(fmt.Sprintf("Skipped overlong JSON line (> %d bytes): %s", jsonLineMaxBytes, truncateBytes(line, 100)))
|
||||
continue
|
||||
}
|
||||
|
||||
hasItemType := false
|
||||
if rawItem, ok := raw["item"]; ok {
|
||||
var itemMap map[string]json.RawMessage
|
||||
if err := json.Unmarshal(rawItem, &itemMap); err == nil {
|
||||
if _, ok := itemMap["type"]; ok {
|
||||
hasItemType = true
|
||||
var codex codexHeader
|
||||
if err := json.Unmarshal(line, &codex); err == nil {
|
||||
isCodex := codex.ThreadID != "" || (codex.Item != nil && codex.Item.Type != "")
|
||||
if isCodex {
|
||||
var details []string
|
||||
if codex.ThreadID != "" {
|
||||
details = append(details, fmt.Sprintf("thread_id=%s", codex.ThreadID))
|
||||
}
|
||||
if codex.Item != nil && codex.Item.Type != "" {
|
||||
details = append(details, fmt.Sprintf("item_type=%s", codex.Item.Type))
|
||||
}
|
||||
if len(details) > 0 {
|
||||
infoFn(fmt.Sprintf("Parsed event #%d type=%s (%s)", totalEvents, codex.Type, strings.Join(details, ", ")))
|
||||
} else {
|
||||
infoFn(fmt.Sprintf("Parsed event #%d type=%s", totalEvents, codex.Type))
|
||||
}
|
||||
|
||||
switch codex.Type {
|
||||
case "thread.started":
|
||||
threadID = codex.ThreadID
|
||||
infoFn(fmt.Sprintf("thread.started event thread_id=%s", threadID))
|
||||
case "item.completed":
|
||||
itemType := ""
|
||||
if codex.Item != nil {
|
||||
itemType = codex.Item.Type
|
||||
}
|
||||
|
||||
if itemType == "agent_message" {
|
||||
var event JSONEvent
|
||||
if err := json.Unmarshal(line, &event); err != nil {
|
||||
warnFn(fmt.Sprintf("Failed to parse Codex event: %s", truncateBytes(line, 100)))
|
||||
continue
|
||||
}
|
||||
|
||||
normalized := ""
|
||||
if event.Item != nil {
|
||||
normalized = normalizeText(event.Item.Text)
|
||||
}
|
||||
infoFn(fmt.Sprintf("item.completed event item_type=%s message_len=%d", itemType, len(normalized)))
|
||||
if normalized != "" {
|
||||
codexMessage = normalized
|
||||
notifyMessage()
|
||||
}
|
||||
} else {
|
||||
infoFn(fmt.Sprintf("item.completed event item_type=%s", itemType))
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
isCodex := hasItemType
|
||||
if !isCodex {
|
||||
if _, ok := raw["thread_id"]; ok {
|
||||
isCodex = true
|
||||
}
|
||||
var raw map[string]json.RawMessage
|
||||
if err := json.Unmarshal(line, &raw); err != nil {
|
||||
warnFn(fmt.Sprintf("Failed to parse line: %s", truncateBytes(line, 100)))
|
||||
continue
|
||||
}
|
||||
|
||||
switch {
|
||||
case isCodex:
|
||||
var event JSONEvent
|
||||
if err := json.Unmarshal([]byte(line), &event); err != nil {
|
||||
warnFn(fmt.Sprintf("Failed to parse Codex event: %s", truncate(line, 100)))
|
||||
continue
|
||||
}
|
||||
|
||||
var details []string
|
||||
if event.ThreadID != "" {
|
||||
details = append(details, fmt.Sprintf("thread_id=%s", event.ThreadID))
|
||||
}
|
||||
if event.Item != nil && event.Item.Type != "" {
|
||||
details = append(details, fmt.Sprintf("item_type=%s", event.Item.Type))
|
||||
}
|
||||
if len(details) > 0 {
|
||||
infoFn(fmt.Sprintf("Parsed event #%d type=%s (%s)", totalEvents, event.Type, strings.Join(details, ", ")))
|
||||
} else {
|
||||
infoFn(fmt.Sprintf("Parsed event #%d type=%s", totalEvents, event.Type))
|
||||
}
|
||||
|
||||
switch event.Type {
|
||||
case "thread.started":
|
||||
threadID = event.ThreadID
|
||||
infoFn(fmt.Sprintf("thread.started event thread_id=%s", threadID))
|
||||
case "item.completed":
|
||||
var itemType string
|
||||
var normalized string
|
||||
if event.Item != nil {
|
||||
itemType = event.Item.Type
|
||||
normalized = normalizeText(event.Item.Text)
|
||||
}
|
||||
infoFn(fmt.Sprintf("item.completed event item_type=%s message_len=%d", itemType, len(normalized)))
|
||||
if event.Item != nil && event.Item.Type == "agent_message" && normalized != "" {
|
||||
codexMessage = normalized
|
||||
notifyMessage()
|
||||
}
|
||||
}
|
||||
|
||||
case hasKey(raw, "subtype") || hasKey(raw, "result"):
|
||||
var event ClaudeEvent
|
||||
if err := json.Unmarshal([]byte(line), &event); err != nil {
|
||||
warnFn(fmt.Sprintf("Failed to parse Claude event: %s", truncate(line, 100)))
|
||||
if err := json.Unmarshal(line, &event); err != nil {
|
||||
warnFn(fmt.Sprintf("Failed to parse Claude event: %s", truncateBytes(line, 100)))
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -167,8 +190,8 @@ func parseJSONStreamInternal(r io.Reader, warnFn func(string), infoFn func(strin
|
||||
|
||||
case hasKey(raw, "role") || hasKey(raw, "delta"):
|
||||
var event GeminiEvent
|
||||
if err := json.Unmarshal([]byte(line), &event); err != nil {
|
||||
warnFn(fmt.Sprintf("Failed to parse Gemini event: %s", truncate(line, 100)))
|
||||
if err := json.Unmarshal(line, &event); err != nil {
|
||||
warnFn(fmt.Sprintf("Failed to parse Gemini event: %s", truncateBytes(line, 100)))
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -184,14 +207,10 @@ func parseJSONStreamInternal(r io.Reader, warnFn func(string), infoFn func(strin
|
||||
infoFn(fmt.Sprintf("Parsed Gemini event #%d type=%s role=%s delta=%t status=%s content_len=%d", totalEvents, event.Type, event.Role, event.Delta, event.Status, len(event.Content)))
|
||||
|
||||
default:
|
||||
warnFn(fmt.Sprintf("Unknown event format: %s", truncate(line, 100)))
|
||||
warnFn(fmt.Sprintf("Unknown event format: %s", truncateBytes(line, 100)))
|
||||
}
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil && !errors.Is(err, io.EOF) {
|
||||
warnFn("Read stdout error: " + err.Error())
|
||||
}
|
||||
|
||||
switch {
|
||||
case geminiBuffer.Len() > 0:
|
||||
message = geminiBuffer.String()
|
||||
@@ -236,6 +255,79 @@ func discardInvalidJSON(decoder *json.Decoder, reader *bufio.Reader) (*bufio.Rea
|
||||
return bufio.NewReader(io.MultiReader(bytes.NewReader(remaining), reader)), err
|
||||
}
|
||||
|
||||
func readLineWithLimit(r *bufio.Reader, maxBytes int, previewBytes int) (line []byte, tooLong bool, err error) {
|
||||
if r == nil {
|
||||
return nil, false, errors.New("reader is nil")
|
||||
}
|
||||
if maxBytes <= 0 {
|
||||
return nil, false, errors.New("maxBytes must be > 0")
|
||||
}
|
||||
if previewBytes < 0 {
|
||||
previewBytes = 0
|
||||
}
|
||||
|
||||
part, isPrefix, err := r.ReadLine()
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
if !isPrefix {
|
||||
if len(part) > maxBytes {
|
||||
return part[:min(len(part), previewBytes)], true, nil
|
||||
}
|
||||
return part, false, nil
|
||||
}
|
||||
|
||||
preview := make([]byte, 0, min(previewBytes, len(part)))
|
||||
if previewBytes > 0 {
|
||||
preview = append(preview, part[:min(previewBytes, len(part))]...)
|
||||
}
|
||||
|
||||
buf := make([]byte, 0, min(maxBytes, len(part)*2))
|
||||
total := 0
|
||||
if len(part) > maxBytes {
|
||||
tooLong = true
|
||||
} else {
|
||||
buf = append(buf, part...)
|
||||
total = len(part)
|
||||
}
|
||||
|
||||
for isPrefix {
|
||||
part, isPrefix, err = r.ReadLine()
|
||||
if err != nil {
|
||||
return nil, tooLong, err
|
||||
}
|
||||
|
||||
if previewBytes > 0 && len(preview) < previewBytes {
|
||||
preview = append(preview, part[:min(previewBytes-len(preview), len(part))]...)
|
||||
}
|
||||
|
||||
if !tooLong {
|
||||
if total+len(part) > maxBytes {
|
||||
tooLong = true
|
||||
continue
|
||||
}
|
||||
buf = append(buf, part...)
|
||||
total += len(part)
|
||||
}
|
||||
}
|
||||
|
||||
if tooLong {
|
||||
return preview, true, nil
|
||||
}
|
||||
return buf, false, nil
|
||||
}
|
||||
|
||||
func truncateBytes(b []byte, maxLen int) string {
|
||||
if len(b) <= maxLen {
|
||||
return string(b)
|
||||
}
|
||||
if maxLen < 0 {
|
||||
return ""
|
||||
}
|
||||
return string(b[:maxLen]) + "..."
|
||||
}
|
||||
|
||||
func normalizeText(text interface{}) string {
|
||||
switch v := text.(type) {
|
||||
case string:
|
||||
|
||||
Reference in New Issue
Block a user