From 408e764b945a9b3b4ebd853d0270cda0d24c48dc Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Fri, 3 Mar 2023 09:42:30 +0100 Subject: [PATCH] events: no duplicates when streaming during a log rotation When streaming events, prevent returning duplicates after a log rotation by marking a beginning and an end for rotated events. Before starting to stream, get a timestamp while holding the event lock. The timestamp allows for detecting whether a rotation event happened while reading the log file and to skip all events between the begin and end rotation event. In an ideal scenario, we could detect rotated events by enforcing a chronological order when reading and skip those detected to not be more recent than the last read event. However, events are not always _written_ in chronological order. While this can be changed, existing event files could not be read correctly anymore. Fixes: #17665 Signed-off-by: Valentin Rothberg --- libpod/events/logfile.go | 145 +++++++++++++++++++++++++++------- libpod/events/logfile_test.go | 18 ++--- test/system/090-events.bats | 81 ++++++++++++++++++- 3 files changed, 202 insertions(+), 42 deletions(-) diff --git a/libpod/events/logfile.go b/libpod/events/logfile.go index 0b59d87f19..17c93e9609 100644 --- a/libpod/events/logfile.go +++ b/libpod/events/logfile.go @@ -57,22 +57,8 @@ func (e EventLogFile) Write(ee Event) error { return err } - rotated, err := rotateLog(e.options.LogFilePath, eventJSONString, e.options.LogFileMaxSize) - if err != nil { - return fmt.Errorf("rotating log file: %w", err) - } - - if rotated { - rEvent := NewEvent(Rotate) - rEvent.Type = System - rEvent.Name = e.options.LogFilePath - rotateJSONString, err := rEvent.ToJSONString() - if err != nil { - return err - } - if err := e.writeString(rotateJSONString); err != nil { - return err - } + if _, err := rotateLog(e.options.LogFilePath, eventJSONString, e.options.LogFileMaxSize); err != nil { + return err } return e.writeString(eventJSONString) @@ -83,21 +69,42 @@ func (e EventLogFile) writeString(s string) error { if err != nil { return err } - if _, err := f.WriteString(s + "\n"); err != nil { - return err - } - return nil + return writeToFile(s, f) +} + +func writeToFile(s string, f *os.File) error { + _, err := f.WriteString(s + "\n") + return err } func (e EventLogFile) getTail(options ReadOptions) (*tail.Tail, error) { - reopen := true seek := tail.SeekInfo{Offset: 0, Whence: io.SeekEnd} if options.FromStart || !options.Stream { seek.Whence = 0 - reopen = false } stream := options.Stream - return tail.TailFile(e.options.LogFilePath, tail.Config{ReOpen: reopen, Follow: stream, Location: &seek, Logger: tail.DiscardingLogger, Poll: true}) + return tail.TailFile(e.options.LogFilePath, tail.Config{ReOpen: stream, Follow: stream, Location: &seek, Logger: tail.DiscardingLogger, Poll: true}) +} + +func (e EventLogFile) readRotateEvent(event *Event) (begin bool, end bool, err error) { + if event.Status != Rotate { + return + } + if event.Details.Attributes == nil { + // may be an old event before storing attributes in the rotate event + return + } + switch event.Details.Attributes[rotateEventAttribute] { + case rotateEventBegin: + begin = true + return + case rotateEventEnd: + end = true + return + default: + err = fmt.Errorf("unknown rotate-event attribute %q", event.Details.Attributes[rotateEventAttribute]) + return + } } // Reads from the log file @@ -125,8 +132,26 @@ func (e EventLogFile) Read(ctx context.Context, options ReadOptions) error { } logrus.Debugf("Reading events from file %q", e.options.LogFilePath) + // Get the time *before* starting to read. Comparing the timestamps + // with events avoids returning events more than once after a log-file + // rotation. + readTime, err := func() (time.Time, error) { + // We need to lock events file + lock, err := lockfile.GetLockFile(e.options.LogFilePath + ".lock") + if err != nil { + return time.Time{}, err + } + lock.Lock() + defer lock.Unlock() + return time.Now(), nil + }() + if err != nil { + return err + } + var line *tail.Line var ok bool + var skipRotate bool for { select { case <-ctx.Done(): @@ -146,11 +171,30 @@ func (e EventLogFile) Read(ctx context.Context, options ReadOptions) error { return err } switch event.Type { - case Image, Volume, Pod, System, Container, Network: - // no-op + case Image, Volume, Pod, Container, Network: + // no-op + case System: + begin, end, err := e.readRotateEvent(event) + if err != nil { + return err + } + if begin && event.Time.After(readTime) { + // If the rotation event happened _after_ we + // started reading, we need to ignore/skip + // subsequent event until the end of the + // rotation. + skipRotate = true + logrus.Debugf("Skipping already read events after log-file rotation: %v", event) + } else if end { + // This rotate event + skipRotate = false + } default: return fmt.Errorf("event type %s is not valid in %s", event.Type.String(), e.options.LogFilePath) } + if skipRotate { + continue + } if applyFilters(event, filterMap) { options.EventChannel <- event } @@ -162,8 +206,43 @@ func (e EventLogFile) String() string { return LogFile.String() } +const ( + rotateEventAttribute = "io.podman.event.rotate" + rotateEventBegin = "begin" + rotateEventEnd = "end" +) + +func writeRotateEvent(f *os.File, logFilePath string, begin bool) error { + rEvent := NewEvent(Rotate) + rEvent.Type = System + rEvent.Name = logFilePath + rEvent.Attributes = make(map[string]string) + if begin { + rEvent.Attributes[rotateEventAttribute] = rotateEventBegin + } else { + rEvent.Attributes[rotateEventAttribute] = rotateEventEnd + } + rotateJSONString, err := rEvent.ToJSONString() + if err != nil { + return err + } + return writeToFile(rotateJSONString, f) +} + // Rotates the log file if the log file size and content exceeds limit func rotateLog(logfile string, content string, limit uint64) (bool, error) { + needsRotation, err := logNeedsRotation(logfile, content, limit) + if err != nil || !needsRotation { + return false, err + } + if err := truncate(logfile); err != nil { + return false, err + } + return true, nil +} + +// logNeedsRotation return true if the log file needs to be rotated. +func logNeedsRotation(logfile string, content string, limit uint64) (bool, error) { if limit == 0 { return false, nil } @@ -181,9 +260,6 @@ func rotateLog(logfile string, content string, limit uint64) (bool, error) { return false, nil } - if err := truncate(logfile); err != nil { - return false, err - } return true, nil } @@ -224,9 +300,16 @@ func truncate(filePath string) error { return err } } + + if err := writeRotateEvent(tmp, filePath, true); err != nil { + return fmt.Errorf("writing rotation event begin marker: %w", err) + } if _, err := reader.WriteTo(tmp); err != nil { return fmt.Errorf("writing truncated contents: %w", err) } + if err := writeRotateEvent(tmp, filePath, false); err != nil { + return fmt.Errorf("writing rotation event end marker: %w", err) + } if err := renameLog(tmp.Name(), filePath); err != nil { return fmt.Errorf("writing back %s to %s: %w", tmp.Name(), filePath, err) @@ -254,6 +337,12 @@ func renameLog(from, to string) error { } defer fFrom.Close() + // Remove the old file to make sure we're not truncating current + // readers. + if err := os.Remove(to); err != nil { + return fmt.Errorf("recreating file %s: %w", to, err) + } + fTo, err := os.Create(to) if err != nil { return err diff --git a/libpod/events/logfile_test.go b/libpod/events/logfile_test.go index 50141168e4..0fb344177c 100644 --- a/libpod/events/logfile_test.go +++ b/libpod/events/logfile_test.go @@ -2,6 +2,7 @@ package events import ( "os" + "strings" "testing" "github.com/stretchr/testify/require" @@ -75,12 +76,6 @@ func TestTruncationOutput(t *testing.T) { 8 9 10 -` - contentAfter := `6 -7 -8 -9 -10 ` // Create dummy file tmp, err := os.CreateTemp("", "log-rotation") @@ -99,11 +94,14 @@ func TestTruncationOutput(t *testing.T) { require.NoError(t, err) afterTruncation, err := os.ReadFile(tmp.Name()) require.NoError(t, err) - - // Test if rotation was successful - require.NoError(t, err, "Log content has changed") + // Content has changed require.NotEqual(t, beforeTruncation, afterTruncation) - require.Equal(t, string(afterTruncation), contentAfter) + split := strings.Split(string(afterTruncation), "\n") + require.Len(t, split, 8) // 2 events + 5 rotated lines + last new line + require.Contains(t, split[0], "\"Attributes\":{\"io.podman.event.rotate\":\"begin\"}") + require.Equal(t, split[1:6], []string{"6", "7", "8", "9", "10"}) + require.Contains(t, split[6], "\"Attributes\":{\"io.podman.event.rotate\":\"end\"}") + require.Contains(t, split[7], "") } func TestRenameLog(t *testing.T) { diff --git a/test/system/090-events.bats b/test/system/090-events.bats index b48314bff5..513b32ce12 100644 --- a/test/system/090-events.bats +++ b/test/system/090-events.bats @@ -209,7 +209,8 @@ EOF expectedContentAfterTruncation=$PODMAN_TMPDIR/truncated.txt run_podman create $IMAGE - CONTAINERS_CONF=$containersConf run_podman rm $output + ctrID=$output + CONTAINERS_CONF=$containersConf run_podman rm $ctrID tail -n52 $eventsFile >> $expectedContentAfterTruncation # Make sure the events file looks as expected. @@ -217,9 +218,81 @@ EOF # Make sure that `podman events` can read the file, and that it returns the # same amount of events. We checked the contents before. - CONTAINERS_CONF=$containersConf run_podman events --stream=false --since="2022-03-06T11:26:42.723667984+02:00" - assert "${#lines[@]}" = 51 "Number of events returned" - is "${lines[-2]}" ".* log-rotation $eventsFile" + CONTAINERS_CONF=$containersConf run_podman events --stream=false --since="2022-03-06T11:26:42.723667984+02:00" --format=json + assert "${#lines[@]}" = 52 "Number of events returned" + is "${lines[0]}" "{\"Name\":\"$eventsFile\",\"Status\":\"log-rotation\",\"Time\":\".*\",\"Type\":\"system\",\"Attributes\":{\"io.podman.event.rotate\":\"begin\"}}" + is "${lines[-2]}" "{\"Name\":\"$eventsFile\",\"Status\":\"log-rotation\",\"Time\":\".*\",\"Type\":\"system\",\"Attributes\":{\"io.podman.event.rotate\":\"end\"}}" + is "${lines[-1]}" "{\"ID\":\"$ctrID\",\"Image\":\"$IMAGE\",\"Name\":\".*\",\"Status\":\"remove\",\"Time\":\".*\",\"Type\":\"container\",\"Attributes\":{.*}}" +} + +@test "events log-file no duplicates" { + skip_if_remote "setting CONTAINERS_CONF logger options does not affect remote client" + + # This test makes sure that events are not returned more than once when + # streaming during a log-file rotation. + eventsFile=$PODMAN_TMPDIR/events.txt + eventsJSON=$PODMAN_TMPDIR/events.json + containersConf=$PODMAN_TMPDIR/containers.conf + cat >$containersConf < $eventsJSON & + + # Now wait for the above podman-events process to write to the eventsJSON + # file, so we know it's reading. + retries=20 + while [[ $retries -gt 0 ]]; do + if [ -s $eventsJSON ]; then + break + fi + retries=$((retries - 1)) + sleep 0.5 + done + assert $retries -gt 0 \ + "Timed out waiting for podman-events to start reading pre-existing events" + + CONTAINERS_CONF=$containersConf run_podman create $IMAGE + ctrID=$output + CONTAINERS_CONF=$containersConf run_podman rm -f $ctrID + + # Now wait for the last event above to be read by the `podman-events` + # process above. + retries=20 + while [[ $retries -gt 0 ]]; do + run grep "\"Status\"\:\"remove\"," $eventsJSON + if [[ $status -eq 0 ]]; then + break + fi + retries=$((retries - 1)) + sleep 0.5 + done + test "$status" = 0 || die "Did not find expected 'Status:remove' line in log" + + # Make sure that the log file has been rotated as expected. + run cat $eventsFile + assert "${#lines[@]}" = 54 "Number of events in $eventsFile" # 49 previous + 2 rotation + pull/create/rm + is "${lines[0]}" "{\"Name\":\"$eventsFile\",\"Status\":\"log-rotation\",\"Time\":\".*\",\"Type\":\"system\",\"Attributes\":{\"io.podman.event.rotate\":\"begin\"}}" + is "${lines[1]}" "{\"Name\":\"busybox\",\"Status\":\"pull\",\"Time\":\"2022-04-06T11:26:42.723667951+02:00\",\"Type\":\"image\",\"Attributes\":null}" + is "${lines[49]}" "{\"Name\":\"busybox\",\"Status\":\"pull\",\"Time\":\"2022-04-06T11:26:42.723667999+02:00\",\"Type\":\"image\",\"Attributes\":null}" + is "${lines[50]}" "{\"Name\":\"$eventsFile\",\"Status\":\"log-rotation\",\"Time\":\".*\",\"Type\":\"system\",\"Attributes\":{\"io.podman.event.rotate\":\"end\"}}" + is "${lines[53]}" "{\"ID\":\"$ctrID\",\"Image\":\"$IMAGE\",\"Name\":\".*\",\"Status\":\"remove\",\"Time\":\".*\",\"Type\":\"container\",\"Attributes\":{.*}}" + + + # Make sure that the JSON stream looks as expected. That means it has all + # events and no duplicates. + run cat $eventsJSON + is "${lines[0]}" "{\"Name\":\"busybox\",\"Status\":\"pull\",\"Time\":\"2022-04-06T11:26:42.7236679+02:00\",\"Type\":\"image\",\"Attributes\":null}" + is "${lines[99]}" "{\"Name\":\"busybox\",\"Status\":\"pull\",\"Time\":\"2022-04-06T11:26:42.723667999+02:00\",\"Type\":\"image\",\"Attributes\":null}" + is "${lines[100]}" "{\"Name\":\"$eventsFile\",\"Status\":\"log-rotation\",\"Time\":\".*\",\"Type\":\"system\",\"Attributes\":{\"io.podman.event.rotate\":\"end\"}}" + is "${lines[103]}" "{\"ID\":\"$ctrID\",\"Image\":\"$IMAGE\",\"Name\":\".*\",\"Status\":\"remove\",\"Time\":\".*\",\"Type\":\"container\",\"Attributes\":{.*}}" } # Prior to #15633, container labels would not appear in 'die' log events