package main import ( "bufio" "context" "errors" "fmt" "os" "path/filepath" "strconv" "strings" "sync" "sync/atomic" "time" ) // Logger writes log messages asynchronously to a temp file. // It is intentionally minimal: a buffered channel + single worker goroutine // to avoid contention while keeping ordering guarantees. type Logger struct { path string file *os.File writer *bufio.Writer ch chan logEntry flushReq chan chan struct{} done chan struct{} closed atomic.Bool closeOnce sync.Once workerWG sync.WaitGroup pendingWG sync.WaitGroup flushMu sync.Mutex } type logEntry struct { level string msg string } // CleanupStats captures the outcome of a cleanupOldLogs run. type CleanupStats struct { Scanned int Deleted int Kept int Errors int DeletedFiles []string KeptFiles []string } var ( processRunningCheck = isProcessRunning processStartTimeFn = getProcessStartTime removeLogFileFn = os.Remove globLogFiles = filepath.Glob fileStatFn = os.Lstat // Use Lstat to detect symlinks evalSymlinksFn = filepath.EvalSymlinks ) // NewLogger creates the async logger and starts the worker goroutine. // The log file is created under os.TempDir() using the required naming scheme. func NewLogger() (*Logger, error) { return NewLoggerWithSuffix("") } // NewLoggerWithSuffix creates a logger with an optional suffix in the filename. // Useful for tests that need isolated log files within the same process. func NewLoggerWithSuffix(suffix string) (*Logger, error) { filename := fmt.Sprintf("%s-%d", primaryLogPrefix(), os.Getpid()) if suffix != "" { filename += "-" + suffix } filename += ".log" path := filepath.Clean(filepath.Join(os.TempDir(), filename)) f, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o600) if err != nil { return nil, err } l := &Logger{ path: path, file: f, writer: bufio.NewWriterSize(f, 4096), ch: make(chan logEntry, 1000), flushReq: make(chan chan struct{}, 1), done: make(chan struct{}), } l.workerWG.Add(1) go l.run() return l, nil } // Path returns the underlying log file path (useful for tests/inspection). func (l *Logger) Path() string { if l == nil { return "" } return l.path } // Info logs at INFO level. func (l *Logger) Info(msg string) { l.log("INFO", msg) } // Warn logs at WARN level. func (l *Logger) Warn(msg string) { l.log("WARN", msg) } // Debug logs at DEBUG level. func (l *Logger) Debug(msg string) { l.log("DEBUG", msg) } // Error logs at ERROR level. func (l *Logger) Error(msg string) { l.log("ERROR", msg) } // Close stops the worker and syncs the log file. // The log file is NOT removed, allowing inspection after program exit. // It is safe to call multiple times. // Returns after a 5-second timeout if worker doesn't stop gracefully. func (l *Logger) Close() error { if l == nil { return nil } var closeErr error l.closeOnce.Do(func() { l.closed.Store(true) close(l.done) close(l.ch) // Wait for worker with timeout workerDone := make(chan struct{}) go func() { l.workerWG.Wait() close(workerDone) }() select { case <-workerDone: // Worker stopped gracefully case <-time.After(5 * time.Second): // Worker timeout - proceed with cleanup anyway closeErr = fmt.Errorf("logger worker timeout during close") } if err := l.writer.Flush(); err != nil && closeErr == nil { closeErr = err } if err := l.file.Sync(); err != nil && closeErr == nil { closeErr = err } if err := l.file.Close(); err != nil && closeErr == nil { closeErr = err } // Log file is kept for debugging - NOT removed // Users can manually clean up /tmp/-*.log files }) return closeErr } // RemoveLogFile removes the log file. Should only be called after Close(). func (l *Logger) RemoveLogFile() error { if l == nil { return nil } return os.Remove(l.path) } // Flush waits for all pending log entries to be written. Primarily for tests. // Returns after a 5-second timeout to prevent indefinite blocking. func (l *Logger) Flush() { if l == nil { return } l.flushMu.Lock() defer l.flushMu.Unlock() // Wait for pending entries with timeout done := make(chan struct{}) go func() { l.pendingWG.Wait() close(done) }() ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() select { case <-done: // All pending entries processed case <-ctx.Done(): // Timeout - return without full flush return } // Trigger writer flush flushDone := make(chan struct{}) select { case l.flushReq <- flushDone: // Wait for flush to complete select { case <-flushDone: // Flush completed case <-time.After(1 * time.Second): // Flush timeout } case <-l.done: // Logger is closing case <-time.After(1 * time.Second): // Timeout sending flush request } } func (l *Logger) log(level, msg string) { if l == nil { return } if l.closed.Load() { return } entry := logEntry{level: level, msg: msg} l.flushMu.Lock() l.pendingWG.Add(1) l.flushMu.Unlock() select { case l.ch <- entry: // Successfully sent to channel case <-l.done: // Logger is closing, drop this entry l.pendingWG.Done() return } } func (l *Logger) run() { defer l.workerWG.Done() ticker := time.NewTicker(500 * time.Millisecond) defer ticker.Stop() for { select { case entry, ok := <-l.ch: if !ok { // Channel closed, final flush _ = l.writer.Flush() return } timestamp := time.Now().Format("2006-01-02 15:04:05.000") pid := os.Getpid() fmt.Fprintf(l.writer, "[%s] [PID:%d] %s: %s\n", timestamp, pid, entry.level, entry.msg) l.pendingWG.Done() case <-ticker.C: _ = l.writer.Flush() case flushDone := <-l.flushReq: // Explicit flush request - flush writer and sync to disk _ = l.writer.Flush() _ = l.file.Sync() close(flushDone) } } } // cleanupOldLogs scans os.TempDir() for wrapper log files and removes those // whose owning process is no longer running (i.e., orphaned logs). // It includes safety checks for: // - PID reuse: Compares file modification time with process start time // - Symlink attacks: Ensures files are within TempDir and not symlinks func cleanupOldLogs() (CleanupStats, error) { var stats CleanupStats tempDir := os.TempDir() prefixes := logPrefixes() if len(prefixes) == 0 { prefixes = []string{defaultWrapperName} } seen := make(map[string]struct{}) var matches []string for _, prefix := range prefixes { pattern := filepath.Join(tempDir, fmt.Sprintf("%s-*.log", prefix)) found, err := globLogFiles(pattern) if err != nil { logWarn(fmt.Sprintf("cleanupOldLogs: failed to list logs: %v", err)) return stats, fmt.Errorf("cleanupOldLogs: %w", err) } for _, path := range found { if _, ok := seen[path]; ok { continue } seen[path] = struct{}{} matches = append(matches, path) } } var removeErr error for _, path := range matches { stats.Scanned++ filename := filepath.Base(path) // Security check: Verify file is not a symlink and is within tempDir if shouldSkipFile, reason := isUnsafeFile(path, tempDir); shouldSkipFile { stats.Kept++ stats.KeptFiles = append(stats.KeptFiles, filename) if reason != "" { logWarn(fmt.Sprintf("cleanupOldLogs: skipping %s: %s", filename, reason)) } continue } pid, ok := parsePIDFromLog(path) if !ok { stats.Kept++ stats.KeptFiles = append(stats.KeptFiles, filename) continue } // Check if process is running if !processRunningCheck(pid) { // Process not running, safe to delete if err := removeLogFileFn(path); err != nil { if errors.Is(err, os.ErrNotExist) { // File already deleted by another process, don't count as success stats.Kept++ stats.KeptFiles = append(stats.KeptFiles, filename+" (already deleted)") continue } stats.Errors++ logWarn(fmt.Sprintf("cleanupOldLogs: failed to remove %s: %v", filename, err)) removeErr = errors.Join(removeErr, fmt.Errorf("failed to remove %s: %w", filename, err)) continue } stats.Deleted++ stats.DeletedFiles = append(stats.DeletedFiles, filename) continue } // Process is running, check for PID reuse if isPIDReused(path, pid) { // PID was reused, the log file is orphaned if err := removeLogFileFn(path); err != nil { if errors.Is(err, os.ErrNotExist) { stats.Kept++ stats.KeptFiles = append(stats.KeptFiles, filename+" (already deleted)") continue } stats.Errors++ logWarn(fmt.Sprintf("cleanupOldLogs: failed to remove %s (PID reused): %v", filename, err)) removeErr = errors.Join(removeErr, fmt.Errorf("failed to remove %s: %w", filename, err)) continue } stats.Deleted++ stats.DeletedFiles = append(stats.DeletedFiles, filename) continue } // Process is running and owns this log file stats.Kept++ stats.KeptFiles = append(stats.KeptFiles, filename) } if removeErr != nil { return stats, fmt.Errorf("cleanupOldLogs: %w", removeErr) } return stats, nil } // isUnsafeFile checks if a file is unsafe to delete (symlink or outside tempDir). // Returns (true, reason) if the file should be skipped. func isUnsafeFile(path string, tempDir string) (bool, string) { // Check if file is a symlink info, err := fileStatFn(path) if err != nil { if errors.Is(err, os.ErrNotExist) { return true, "" // File disappeared, skip silently } return true, fmt.Sprintf("stat failed: %v", err) } // Check if it's a symlink if info.Mode()&os.ModeSymlink != 0 { return true, "refusing to delete symlink" } // Resolve any path traversal and verify it's within tempDir resolvedPath, err := evalSymlinksFn(path) if err != nil { return true, fmt.Sprintf("path resolution failed: %v", err) } // Get absolute path of tempDir absTempDir, err := filepath.Abs(tempDir) if err != nil { return true, fmt.Sprintf("tempDir resolution failed: %v", err) } // Ensure resolved path is within tempDir relPath, err := filepath.Rel(absTempDir, resolvedPath) if err != nil || strings.HasPrefix(relPath, "..") { return true, "file is outside tempDir" } return false, "" } // isPIDReused checks if a PID has been reused by comparing file modification time // with process start time. Returns true if the log file was created by a different // process that previously had the same PID. func isPIDReused(logPath string, pid int) bool { // Get file modification time (when log was last written) info, err := fileStatFn(logPath) if err != nil { // If we can't stat the file, be conservative and keep it return false } fileModTime := info.ModTime() // Get process start time procStartTime := processStartTimeFn(pid) if procStartTime.IsZero() { // Can't determine process start time // Check if file is very old (>7 days), likely from a dead process if time.Since(fileModTime) > 7*24*time.Hour { return true // File is old enough to be from a different process } return false // Be conservative for recent files } // If the log file was modified before the process started, PID was reused // Add a small buffer (1 second) to account for clock skew and file system timing return fileModTime.Add(1 * time.Second).Before(procStartTime) } func parsePIDFromLog(path string) (int, bool) { name := filepath.Base(path) prefixes := logPrefixes() if len(prefixes) == 0 { prefixes = []string{defaultWrapperName} } for _, prefix := range prefixes { prefixWithDash := fmt.Sprintf("%s-", prefix) if !strings.HasPrefix(name, prefixWithDash) || !strings.HasSuffix(name, ".log") { continue } core := strings.TrimSuffix(strings.TrimPrefix(name, prefixWithDash), ".log") if core == "" { continue } pidPart := core if idx := strings.IndexRune(core, '-'); idx != -1 { pidPart = core[:idx] } if pidPart == "" { continue } pid, err := strconv.Atoi(pidPart) if err != nil || pid <= 0 { continue } return pid, true } return 0, false }