From 0edb3ddd39ec5ed80cf580148b119d0528875fa9 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Tue, 22 Mar 2022 17:10:43 +0100 Subject: [PATCH 1/2] podman stats: calc CPU percentage correctly When you run podman stats, the first interval always shows the wrong cpu usage. To calculate cpu percentage we get the cpu time from the cgroup and compare this against the system time between two stats. Since the first time we do not have a previous stats an empty struct is used instead. Thus we do not use the actual running time of the container but the current unix timestamp (time since Jan 1 1970). To fix this we make sure that the previous stats time is set to the container start time, when it is empty. [NO NEW TESTS NEEDED] No idea how I could create a test which would have a predictable cpu usage. See the linked bugzilla for a reproducer. Fixes https://bugzilla.redhat.com/show_bug.cgi?id=2066145 Signed-off-by: Paul Holzinger --- libpod/pod.go | 9 +-------- libpod/stats.go | 12 +++++++++++- pkg/api/handlers/compat/containers_stats.go | 2 +- pkg/domain/infra/abi/containers.go | 7 +------ 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/libpod/pod.go b/libpod/pod.go index 6273ff2478..ed2d97b374 100644 --- a/libpod/pod.go +++ b/libpod/pod.go @@ -422,10 +422,6 @@ type PodContainerStats struct { // GetPodStats returns the stats for each of its containers func (p *Pod) GetPodStats(previousContainerStats map[string]*define.ContainerStats) (map[string]*define.ContainerStats, error) { - var ( - ok bool - prevStat *define.ContainerStats - ) p.lock.Lock() defer p.lock.Unlock() @@ -438,10 +434,7 @@ func (p *Pod) GetPodStats(previousContainerStats map[string]*define.ContainerSta } newContainerStats := make(map[string]*define.ContainerStats) for _, c := range containers { - if prevStat, ok = previousContainerStats[c.ID()]; !ok { - prevStat = &define.ContainerStats{} - } - newStats, err := c.GetContainerStats(prevStat) + newStats, err := c.GetContainerStats(previousContainerStats[c.ID()]) // If the container wasn't running, don't include it // but also suppress the error if err != nil && errors.Cause(err) != define.ErrCtrStateInvalid { diff --git a/libpod/stats.go b/libpod/stats.go index b5d39240d5..988aa4b5c2 100644 --- a/libpod/stats.go +++ b/libpod/stats.go @@ -14,7 +14,9 @@ import ( "github.com/pkg/errors" ) -// GetContainerStats gets the running stats for a given container +// GetContainerStats gets the running stats for a given container. +// The previousStats is used to correctly calculate cpu percentages. You +// should pass nil if there is no previous stat for this container. func (c *Container) GetContainerStats(previousStats *define.ContainerStats) (*define.ContainerStats, error) { stats := new(define.ContainerStats) stats.ContainerID = c.ID() @@ -36,6 +38,14 @@ func (c *Container) GetContainerStats(previousStats *define.ContainerStats) (*de return stats, define.ErrCtrStateInvalid } + if previousStats == nil { + previousStats = &define.ContainerStats{ + // if we have no prev stats use the container start time as prev time + // otherwise we cannot correctly calculate the CPU percentage + SystemNano: uint64(c.state.StartedTime.UnixNano()), + } + } + cgroupPath, err := c.cGroupPath() if err != nil { return nil, err diff --git a/pkg/api/handlers/compat/containers_stats.go b/pkg/api/handlers/compat/containers_stats.go index 99f14d02fb..77b16b03e7 100644 --- a/pkg/api/handlers/compat/containers_stats.go +++ b/pkg/api/handlers/compat/containers_stats.go @@ -56,7 +56,7 @@ func StatsContainer(w http.ResponseWriter, r *http.Request) { return } - stats, err := ctnr.GetContainerStats(&define.ContainerStats{}) + stats, err := ctnr.GetContainerStats(nil) if err != nil { utils.InternalServerError(w, errors.Wrapf(err, "failed to obtain Container %s stats", name)) return diff --git a/pkg/domain/infra/abi/containers.go b/pkg/domain/infra/abi/containers.go index e6feb7c824..1986228a66 100644 --- a/pkg/domain/infra/abi/containers.go +++ b/pkg/domain/infra/abi/containers.go @@ -1431,12 +1431,7 @@ func (ic *ContainerEngine) ContainerStats(ctx context.Context, namesOrIds []stri reportStats := []define.ContainerStats{} for _, ctr := range containers { - prev, ok := containerStats[ctr.ID()] - if !ok { - prev = &define.ContainerStats{} - } - - stats, err := ctr.GetContainerStats(prev) + stats, err := ctr.GetContainerStats(containerStats[ctr.ID()]) if err != nil { cause := errors.Cause(err) if queryAll && (cause == define.ErrCtrRemoved || cause == define.ErrNoSuchCtr || cause == define.ErrCtrStateInvalid) { From 130bcc3a93d82674eaaf2a62f854983db263a940 Mon Sep 17 00:00:00 2001 From: Paul Holzinger Date: Tue, 22 Mar 2022 17:37:23 +0100 Subject: [PATCH 2/2] podman stats: improve cpu average calc We can just calculate the cpu percent for the time the container is running. There is no need to use datapoints. Signed-off-by: Paul Holzinger --- libpod/define/containerstate.go | 1 - libpod/stats.go | 10 ++-------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/libpod/define/containerstate.go b/libpod/define/containerstate.go index 23ba1f451e..9ad3aec08e 100644 --- a/libpod/define/containerstate.go +++ b/libpod/define/containerstate.go @@ -138,7 +138,6 @@ type ContainerStats struct { CPU float64 CPUNano uint64 CPUSystemNano uint64 - DataPoints int64 SystemNano uint64 MemUsage uint64 MemLimit uint64 diff --git a/libpod/stats.go b/libpod/stats.go index 988aa4b5c2..25baa378d4 100644 --- a/libpod/stats.go +++ b/libpod/stats.go @@ -77,8 +77,8 @@ func (c *Container) GetContainerStats(previousStats *define.ContainerStats) (*de stats.Duration = cgroupStats.CPU.Usage.Total stats.UpTime = time.Duration(stats.Duration) stats.CPU = calculateCPUPercent(cgroupStats, previousCPU, now, previousStats.SystemNano) - stats.AvgCPU = calculateAvgCPU(stats.CPU, previousStats.AvgCPU, previousStats.DataPoints) - stats.DataPoints = previousStats.DataPoints + 1 + // calc the average cpu usage for the time the container is running + stats.AvgCPU = calculateCPUPercent(cgroupStats, 0, now, uint64(c.state.StartedTime.UnixNano())) stats.MemUsage = cgroupStats.Memory.Usage.Usage stats.MemLimit = c.getMemLimit() stats.MemPerc = (float64(stats.MemUsage) / float64(stats.MemLimit)) * 100 @@ -156,9 +156,3 @@ func calculateBlockIO(stats *cgroups.Metrics) (read uint64, write uint64) { } return } - -// calculateAvgCPU calculates the avg CPU percentage given the previous average and the number of data points. -func calculateAvgCPU(statsCPU float64, prevAvg float64, prevData int64) float64 { - avgPer := ((prevAvg * float64(prevData)) + statsCPU) / (float64(prevData) + 1) - return avgPer -}