From c00ea686fef5a382849307d393226971fb1da1f3 Mon Sep 17 00:00:00 2001 From: Charlie Doern Date: Thu, 7 Jul 2022 14:44:10 -0400 Subject: [PATCH] resource limits for pods added the following flags and handling for podman pod create --memory-swap --cpuset-mems --device-read-bps --device-write-bps --blkio-weight --blkio-weight-device --cpu-shares given the new backend for systemd in c/common, all of these can now be exposed to pod create. most of the heavy lifting (nearly all) is done within c/common. However, some rewiring needed to be done here as well! Signed-off-by: Charlie Doern --- cmd/podman/common/create.go | 112 ++++++++--------- .../markdown/podman-container-clone.1.md | 16 +++ docs/source/markdown/podman-pod-clone.1.md | 69 +++++++++++ docs/source/markdown/podman-pod-create.1.md | 74 +++++++++++- libpod/define/pod_inspect.go | 12 ++ libpod/options.go | 12 ++ libpod/pod.go | 108 +++++++++++------ libpod/pod_api.go | 72 +++++------ libpod/runtime_pod_linux.go | 4 +- pkg/specgen/generate/container.go | 114 +++++++++--------- pkg/specgen/generate/container_create.go | 3 - pkg/specgen/generate/oci.go | 35 ++++-- pkg/specgen/generate/pod_create.go | 37 ++++++ pkg/specgenutil/specgen.go | 12 +- test/system/200-pod.bats | 75 ++++++++---- 15 files changed, 529 insertions(+), 226 deletions(-) diff --git a/cmd/podman/common/create.go b/cmd/podman/common/create.go index d2646aa432..00873b95b5 100644 --- a/cmd/podman/common/create.go +++ b/cmd/podman/common/create.go @@ -56,22 +56,6 @@ func DefineCreateFlags(cmd *cobra.Command, cf *entities.ContainerCreateOptions, ) _ = cmd.RegisterFlagCompletionFunc(authfileFlagName, completion.AutocompleteDefault) - blkioWeightFlagName := "blkio-weight" - createFlags.StringVar( - &cf.BlkIOWeight, - blkioWeightFlagName, "", - "Block IO weight (relative weight) accepts a weight value between 10 and 1000.", - ) - _ = cmd.RegisterFlagCompletionFunc(blkioWeightFlagName, completion.AutocompleteNone) - - blkioWeightDeviceFlagName := "blkio-weight-device" - createFlags.StringSliceVar( - &cf.BlkIOWeightDevice, - blkioWeightDeviceFlagName, []string{}, - "Block IO weight (relative device weight, format: `DEVICE_NAME:WEIGHT`)", - ) - _ = cmd.RegisterFlagCompletionFunc(blkioWeightDeviceFlagName, completion.AutocompleteDefault) - capAddFlagName := "cap-add" createFlags.StringSliceVar( &cf.CapAdd, @@ -127,14 +111,6 @@ func DefineCreateFlags(cmd *cobra.Command, cf *entities.ContainerCreateOptions, ) _ = cmd.RegisterFlagCompletionFunc(deviceReadIopsFlagName, completion.AutocompleteDefault) - deviceWriteBpsFlagName := "device-write-bps" - createFlags.StringSliceVar( - &cf.DeviceWriteBPs, - deviceWriteBpsFlagName, []string{}, - "Limit write rate (bytes per second) to a device (e.g. --device-write-bps=/dev/sda:1mb)", - ) - _ = cmd.RegisterFlagCompletionFunc(deviceWriteBpsFlagName, completion.AutocompleteDefault) - deviceWriteIopsFlagName := "device-write-iops" createFlags.StringSliceVar( &cf.DeviceWriteIOPs, @@ -783,14 +759,6 @@ func DefineCreateFlags(cmd *cobra.Command, cf *entities.ContainerCreateOptions, ) _ = cmd.RegisterFlagCompletionFunc(deviceFlagName, completion.AutocompleteDefault) - deviceReadBpsFlagName := "device-read-bps" - createFlags.StringSliceVar( - &cf.DeviceReadBPs, - deviceReadBpsFlagName, []string{}, - "Limit read rate (bytes per second) from a device (e.g. --device-read-bps=/dev/sda:1mb)", - ) - _ = cmd.RegisterFlagCompletionFunc(deviceReadBpsFlagName, completion.AutocompleteDefault) - volumesFromFlagName := "volumes-from" createFlags.StringArrayVar( &cf.VolumesFrom, @@ -848,22 +816,6 @@ func DefineCreateFlags(cmd *cobra.Command, cf *entities.ContainerCreateOptions, ) _ = cmd.RegisterFlagCompletionFunc(cpuRtRuntimeFlagName, completion.AutocompleteNone) - cpuSharesFlagName := "cpu-shares" - createFlags.Uint64VarP( - &cf.CPUShares, - cpuSharesFlagName, "c", 0, - "CPU shares (relative weight)", - ) - _ = cmd.RegisterFlagCompletionFunc(cpuSharesFlagName, completion.AutocompleteNone) - - cpusetMemsFlagName := "cpuset-mems" - createFlags.StringVar( - &cf.CPUSetMems, - cpusetMemsFlagName, "", - "Memory nodes (MEMs) in which to allow execution (0-3, 0,1). Only effective on NUMA systems.", - ) - _ = cmd.RegisterFlagCompletionFunc(cpusetMemsFlagName, completion.AutocompleteNone) - memoryReservationFlagName := "memory-reservation" createFlags.StringVar( &cf.MemoryReservation, @@ -872,14 +824,6 @@ func DefineCreateFlags(cmd *cobra.Command, cf *entities.ContainerCreateOptions, ) _ = cmd.RegisterFlagCompletionFunc(memoryReservationFlagName, completion.AutocompleteNone) - memorySwapFlagName := "memory-swap" - createFlags.StringVar( - &cf.MemorySwap, - memorySwapFlagName, "", - "Swap limit equal to memory plus swap: '-1' to enable unlimited swap", - ) - _ = cmd.RegisterFlagCompletionFunc(memorySwapFlagName, completion.AutocompleteNone) - memorySwappinessFlagName := "memory-swappiness" createFlags.Int64Var( &cf.MemorySwappiness, @@ -913,4 +857,60 @@ func DefineCreateFlags(cmd *cobra.Command, cf *entities.ContainerCreateOptions, "Memory limit "+sizeWithUnitFormat, ) _ = cmd.RegisterFlagCompletionFunc(memoryFlagName, completion.AutocompleteNone) + + cpuSharesFlagName := "cpu-shares" + createFlags.Uint64VarP( + &cf.CPUShares, + cpuSharesFlagName, "c", 0, + "CPU shares (relative weight)", + ) + _ = cmd.RegisterFlagCompletionFunc(cpuSharesFlagName, completion.AutocompleteNone) + + cpusetMemsFlagName := "cpuset-mems" + createFlags.StringVar( + &cf.CPUSetMems, + cpusetMemsFlagName, "", + "Memory nodes (MEMs) in which to allow execution (0-3, 0,1). Only effective on NUMA systems.", + ) + _ = cmd.RegisterFlagCompletionFunc(cpusetMemsFlagName, completion.AutocompleteNone) + + memorySwapFlagName := "memory-swap" + createFlags.StringVar( + &cf.MemorySwap, + memorySwapFlagName, "", + "Swap limit equal to memory plus swap: '-1' to enable unlimited swap", + ) + _ = cmd.RegisterFlagCompletionFunc(memorySwapFlagName, completion.AutocompleteNone) + + deviceReadBpsFlagName := "device-read-bps" + createFlags.StringSliceVar( + &cf.DeviceReadBPs, + deviceReadBpsFlagName, []string{}, + "Limit read rate (bytes per second) from a device (e.g. --device-read-bps=/dev/sda:1mb)", + ) + _ = cmd.RegisterFlagCompletionFunc(deviceReadBpsFlagName, completion.AutocompleteDefault) + + deviceWriteBpsFlagName := "device-write-bps" + createFlags.StringSliceVar( + &cf.DeviceWriteBPs, + deviceWriteBpsFlagName, []string{}, + "Limit write rate (bytes per second) to a device (e.g. --device-write-bps=/dev/sda:1mb)", + ) + _ = cmd.RegisterFlagCompletionFunc(deviceWriteBpsFlagName, completion.AutocompleteDefault) + + blkioWeightFlagName := "blkio-weight" + createFlags.StringVar( + &cf.BlkIOWeight, + blkioWeightFlagName, "", + "Block IO weight (relative weight) accepts a weight value between 10 and 1000.", + ) + _ = cmd.RegisterFlagCompletionFunc(blkioWeightFlagName, completion.AutocompleteNone) + + blkioWeightDeviceFlagName := "blkio-weight-device" + createFlags.StringSliceVar( + &cf.BlkIOWeightDevice, + blkioWeightDeviceFlagName, []string{}, + "Block IO weight (relative device weight, format: `DEVICE_NAME:WEIGHT`)", + ) + _ = cmd.RegisterFlagCompletionFunc(blkioWeightDeviceFlagName, completion.AutocompleteDefault) } diff --git a/docs/source/markdown/podman-container-clone.1.md b/docs/source/markdown/podman-container-clone.1.md index 3b9d79862f..9baedfd36b 100644 --- a/docs/source/markdown/podman-container-clone.1.md +++ b/docs/source/markdown/podman-container-clone.1.md @@ -11,6 +11,14 @@ podman\-container\-clone - Creates a copy of an existing container ## OPTIONS +#### **--blkio-weight**=*weight* + +Block IO weight (relative weight) accepts a weight value between 10 and 1000. + +#### **--blkio-weight-device**=*weight* + +Block IO weight (relative device weight, format: `DEVICE_NAME:WEIGHT`). + #### **--cpu-period**=*limit* Set the CPU period for the Completely Fair Scheduler (CFS), which is a @@ -126,6 +134,14 @@ If none are specified, the original container's CPU memory nodes are used. Remove the original container that we are cloning once used to mimic the configuration. +#### **--device-read-bps**=*path* + +Limit read rate (bytes per second) from a device (e.g. --device-read-bps=/dev/sda:1mb). + +#### **--device-write-bps**=*path* + +Limit write rate (bytes per second) to a device (e.g. --device-write-bps=/dev/sda:1mb) + #### **--force**, **-f** Force removal of the original container that we are cloning. Can only be used in conjunction with **--destroy**. diff --git a/docs/source/markdown/podman-pod-clone.1.md b/docs/source/markdown/podman-pod-clone.1.md index 49084386c5..5473407b08 100644 --- a/docs/source/markdown/podman-pod-clone.1.md +++ b/docs/source/markdown/podman-pod-clone.1.md @@ -11,10 +11,55 @@ podman\-pod\-clone - Creates a copy of an existing pod ## OPTIONS +#### **--blkio-weight**=*weight* + +Block IO weight (relative weight) accepts a weight value between 10 and 1000. + +#### **--blkio-weight-device**=*weight* + +Block IO weight (relative device weight, format: `DEVICE_NAME:WEIGHT`). + #### **--cgroup-parent**=*path* Path to cgroups under which the cgroup for the pod will be created. If the path is not absolute, the path is considered to be relative to the cgroups path of the init process. Cgroups will be created if they do not already exist. +#### **--cpu-shares**, **-c**=*shares* + +CPU shares (relative weight) + +By default, all containers get the same proportion of CPU cycles. This proportion +can be modified by changing the container's CPU share weighting relative +to the weighting of all other running containers. + +To modify the proportion from the default of 1024, use the **--cpu-shares** +flag to set the weighting to 2 or higher. + +The proportion will only apply when CPU-intensive processes are running. +When tasks in one container are idle, other containers can use the +left-over CPU time. The actual amount of CPU time will vary depending on +the number of containers running on the system. + +For example, consider three containers, one has a cpu-share of 1024 and +two others have a cpu-share setting of 512. When processes in all three +containers attempt to use 100% of CPU, the first container would receive +50% of the total CPU time. If you add a fourth container with a cpu-share +of 1024, the first container only gets 33% of the CPU. The remaining containers +receive 16.5%, 16.5% and 33% of the CPU. + +On a multi-core system, the shares of CPU time are distributed over all CPU +cores. Even if a container is limited to less than 100% of CPU time, it can +use 100% of each individual CPU core. + +For example, consider a system with more than three cores. If you start one +container **{C0}** with **-c=512** running one process, and another container +**{C1}** with **-c=1024** running two processes, this can result in the following +division of CPU shares: + +PID container CPU CPU share +100 {C0} 0 100% of CPU0 +101 {C1} 1 100% of CPU1 +102 {C1} 2 100% of CPU2 + #### **--cpus** Set a number of CPUs for the pod that overrides the original pods CPU limits. If none are specified, the original pod's Nano CPUs are used. @@ -23,6 +68,15 @@ Set a number of CPUs for the pod that overrides the original pods CPU limits. If CPUs in which to allow execution (0-3, 0,1). If none are specified, the original pod's CPUset is used. + +#### **--cpuset-mems**=*nodes* + +Memory nodes (MEMs) in which to allow execution (0-3, 0,1). Only effective on NUMA systems. + +If there are four memory nodes on the system (0-3), use `--cpuset-mems=0,1` +then processes in the container will only use memory from the first +two memory nodes. + #### **--destroy** Remove the original pod that we are cloning once used to mimic the configuration. @@ -48,6 +102,10 @@ device. The devices that Podman will load modules for when necessary are: Limit read rate (bytes per second) from a device (e.g. --device-read-bps=/dev/sda:1mb). +#### **--device-write-bps**=*path* + +Limit write rate (bytes per second) to a device (e.g. --device-write-bps=/dev/sda:1mb) + #### **--gidmap**=*pod_gid:host_gid:amount* GID map for the user namespace. Using this flag will run all containers in the pod with user namespace enabled. It conflicts with the `--userns` and `--subgidname` flags. @@ -90,6 +148,17 @@ RAM. If a limit of 0 is specified (not using **-m**), the container's memory is not limited. The actual limit may be rounded up to a multiple of the operating system's page size (the value would be very large, that's millions of trillions). +#### **--memory-swap**=*limit* + +A limit value equal to memory plus swap. Must be used with the **-m** +(**--memory**) flag. The swap `LIMIT` should always be larger than **-m** +(**--memory**) value. By default, the swap `LIMIT` will be set to double +the value of --memory. + +The format of `LIMIT` is `[]`. Unit can be `b` (bytes), +`k` (kibibytes), `m` (mebibytes), or `g` (gibibytes). If you don't specify a +unit, `b` is used. Set LIMIT to `-1` to enable unlimited swap. + #### **--name**, **-n** Set a custom name for the cloned pod. The default if not specified is of the syntax: **-clone** diff --git a/docs/source/markdown/podman-pod-create.1.md b/docs/source/markdown/podman-pod-create.1.md index de9a34bfa8..660112865c 100644 --- a/docs/source/markdown/podman-pod-create.1.md +++ b/docs/source/markdown/podman-pod-create.1.md @@ -23,6 +23,9 @@ podman generates a UUID for each pod, and if a name is not assigned to the container with **--name** then a random string name will be generated for it. The name is useful any place you need to identify a pod. +Note: resource limit related flags work by setting the limits explicitly in the pod's cgroup +which by default, is the cgroup parent for all containers joining the pod. Containers are still delegated the ability to set their own resource limits when joining a pod meaning that if you run **podman pod create --cpus=5** you can also run **podman container create --pod=`` --cpus=4** and the container will only see the smaller limit. containers do NOT get the pod level cgroup resources if they specify their own cgroup when joining a pod such as **--cgroupns=host** + ## OPTIONS #### **--add-host**=*host:ip* @@ -33,10 +36,55 @@ Add a line to /etc/hosts. The format is hostname:ip. The **--add-host** option can be set multiple times. The /etc/hosts file is shared between all containers in the pod. +#### **--blkio-weight**=*weight* + +Block IO weight (relative weight) accepts a weight value between 10 and 1000. + +#### **--blkio-weight-device**=*weight* + +Block IO weight (relative device weight, format: `DEVICE_NAME:WEIGHT`). + #### **--cgroup-parent**=*path* Path to cgroups under which the cgroup for the pod will be created. If the path is not absolute, the path is considered to be relative to the cgroups path of the init process. Cgroups will be created if they do not already exist. +#### **--cpu-shares**, **-c**=*shares* + +CPU shares (relative weight) + +By default, all containers get the same proportion of CPU cycles. This proportion +can be modified by changing the container's CPU share weighting relative +to the weighting of all other running containers. + +To modify the proportion from the default of 1024, use the **--cpu-shares** +flag to set the weighting to 2 or higher. + +The proportion will only apply when CPU-intensive processes are running. +When tasks in one container are idle, other containers can use the +left-over CPU time. The actual amount of CPU time will vary depending on +the number of containers running on the system. + +For example, consider three containers, one has a cpu-share of 1024 and +two others have a cpu-share setting of 512. When processes in all three +containers attempt to use 100% of CPU, the first container would receive +50% of the total CPU time. If you add a fourth container with a cpu-share +of 1024, the first container only gets 33% of the CPU. The remaining containers +receive 16.5%, 16.5% and 33% of the CPU. + +On a multi-core system, the shares of CPU time are distributed over all CPU +cores. Even if a container is limited to less than 100% of CPU time, it can +use 100% of each individual CPU core. + +For example, consider a system with more than three cores. If you start one +container **{C0}** with **-c=512** running one process, and another container +**{C1}** with **-c=1024** running two processes, this can result in the following +division of CPU shares: + +PID container CPU CPU share +100 {C0} 0 100% of CPU0 +101 {C1} 1 100% of CPU1 +102 {C1} 2 100% of CPU2 + #### **--cpus**=*amount* Set the total number of CPUs delegated to the pod. Default is 0.000 which indicates that there is no limit on computation power. @@ -52,7 +100,15 @@ Examples of the List Format: 0-4,9 # bits 0, 1, 2, 3, 4, and 9 set 0-2,7,12-14 # bits 0, 1, 2, 7, 12, 13, and 14 set -#### **--device**=*host-device[:container-device][:permissions]* +#### **--cpuset-mems**=*nodes* + +Memory nodes (MEMs) in which to allow execution (0-3, 0,1). Only effective on NUMA systems. + +If there are four memory nodes on the system (0-3), use `--cpuset-mems=0,1` +then processes in the container will only use memory from the first +two memory nodes. + +#### **--device**=_host-device_[**:**_container-device_][**:**_permissions_] Add a host device to the pod. Optional *permissions* parameter can be used to specify device permissions. It is a combination of @@ -73,6 +129,10 @@ device. The devices that Podman will load modules for when necessary are: Limit read rate (bytes per second) from a device (e.g. --device-read-bps=/dev/sda:1mb) +#### **--device-write-bps**=*path* + +Limit write rate (bytes per second) to a device (e.g. --device-write-bps=/dev/sda:1mb) + #### **--dns**=*ipaddr* Set custom DNS servers in the /etc/resolv.conf file that will be shared between all containers in the pod. A special option, "none" is allowed which disables creation of /etc/resolv.conf for the pod. @@ -174,6 +234,16 @@ RAM. If a limit of 0 is specified (not using **-m**), the container's memory is not limited. The actual limit may be rounded up to a multiple of the operating system's page size (the value would be very large, that's millions of trillions). +#### **--memory-swap**=*limit* + +A limit value equal to memory plus swap. Must be used with the **-m** +(**--memory**) flag. The swap `LIMIT` should always be larger than **-m** +(**--memory**) value. By default, the swap `LIMIT` will be set to double +the value of --memory. + +The format of `LIMIT` is `[]`. Unit can be `b` (bytes), +`k` (kibibytes), `m` (mebibytes), or `g` (gibibytes). If you don't specify a +unit, `b` is used. Set LIMIT to `-1` to enable unlimited swap. #### **--name**, **-n**=*name* @@ -603,7 +673,7 @@ $ podman pod create --network net1:ip=10.89.1.5 --network net2:ip=10.89.10.10 ``` ## SEE ALSO -**[podman(1)](podman.1.md)**, **[podman-pod(1)](podman-pod.1.md)**, **[podman-kube-play(1)](podman-kube-play.1.md)**, **containers.conf(1)** +**[podman(1)](podman.1.md)**, **[podman-pod(1)](podman-pod.1.md)**, **[podman-kube-play(1)](podman-kube-play.1.md)**, **containers.conf(1)**, **[cgroups(7)](https://man7.org/linux/man-pages/man7/cgroups.7.html)** ## HISTORY diff --git a/libpod/define/pod_inspect.go b/libpod/define/pod_inspect.go index 2afef48c44..d56074882d 100644 --- a/libpod/define/pod_inspect.go +++ b/libpod/define/pod_inspect.go @@ -57,20 +57,32 @@ type InspectPodData struct { CPUPeriod uint64 `json:"cpu_period,omitempty"` // CPUQuota contains the CPU quota of the pod CPUQuota int64 `json:"cpu_quota,omitempty"` + // CPUShares contains the cpu shares for the pod + CPUShares uint64 `json:"cpu_shares,omitempty"` // CPUSetCPUs contains linux specific CPU data for the pod CPUSetCPUs string `json:"cpuset_cpus,omitempty"` + // CPUSetMems contains linux specific CPU data for the pod + CPUSetMems string `json:"cpuset_mems,omitempty"` // Mounts contains volume related information for the pod Mounts []InspectMount `json:"mounts,omitempty"` // Devices contains the specified host devices Devices []InspectDevice `json:"devices,omitempty"` // BlkioDeviceReadBps contains the Read/Access limit for the pod's devices BlkioDeviceReadBps []InspectBlkioThrottleDevice `json:"device_read_bps,omitempty"` + // BlkioDeviceReadBps contains the Read/Access limit for the pod's devices + BlkioDeviceWriteBps []InspectBlkioThrottleDevice `json:"device_write_bps,omitempty"` // VolumesFrom contains the containers that the pod inherits mounts from VolumesFrom []string `json:"volumes_from,omitempty"` // SecurityOpt contains the specified security labels and related SELinux information SecurityOpts []string `json:"security_opt,omitempty"` // MemoryLimit contains the specified cgroup memory limit for the pod MemoryLimit uint64 `json:"memory_limit,omitempty"` + // MemorySwap contains the specified memory swap limit for the pod + MemorySwap uint64 `json:"memory_swap,omitempty"` + // BlkioWeight contains the blkio weight limit for the pod + BlkioWeight uint64 `json:"blkio_weight,omitempty"` + // BlkioWeightDevice contains the blkio weight device limits for the pod + BlkioWeightDevice []InspectBlkioWeightDevice `json:"blkio_weight_device,omitempty"` } // InspectPodInfraConfig contains the configuration of the pod's infra diff --git a/libpod/options.go b/libpod/options.go index f03980017f..b31cb4ab24 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -2145,6 +2145,18 @@ func WithServiceContainer(id string) PodCreateOption { } } +// WithPodResources sets resource limits to be applied to the pod's cgroup +// these will be inherited by all containers unless overridden. +func WithPodResources(resources specs.LinuxResources) PodCreateOption { + return func(pod *Pod) error { + if pod.valid { + return define.ErrPodFinalized + } + pod.config.ResourceLimits = resources + return nil + } +} + // WithVolatile sets the volatile flag for the container storage. // The option can potentially cause data loss when used on a container that must survive a machine reboot. func WithVolatile() CtrCreateOption { diff --git a/libpod/pod.go b/libpod/pod.go index e059c9416e..89f6d6cfe3 100644 --- a/libpod/pod.go +++ b/libpod/pod.go @@ -83,6 +83,9 @@ type PodConfig struct { // ID of the pod's lock LockID uint32 `json:"lockID"` + + // ResourceLimits hold the pod level resource limits + ResourceLimits specs.LinuxResources } // podState represents a pod's state @@ -116,18 +119,7 @@ func (p *Pod) ResourceLim() *specs.LinuxResources { empty := &specs.LinuxResources{ CPU: &specs.LinuxCPU{}, } - infra, err := p.runtime.GetContainer(p.state.InfraContainerID) - if err != nil { - return empty - } - conf := infra.config.Spec - if err != nil { - return empty - } - if conf.Linux == nil || conf.Linux.Resources == nil { - return empty - } - if err = JSONDeepCopy(conf.Linux.Resources, resCopy); err != nil { + if err := JSONDeepCopy(p.config.ResourceLimits, resCopy); err != nil { return nil } if resCopy.CPU != nil { @@ -139,51 +131,91 @@ func (p *Pod) ResourceLim() *specs.LinuxResources { // CPUPeriod returns the pod CPU period func (p *Pod) CPUPeriod() uint64 { - if p.state.InfraContainerID == "" { + resLim := p.ResourceLim() + if resLim.CPU == nil || resLim.CPU.Period == nil { return 0 } - infra, err := p.runtime.GetContainer(p.state.InfraContainerID) - if err != nil { + return *resLim.CPU.Period +} + +// CPUQuota returns the pod CPU quota +func (p *Pod) CPUQuota() int64 { + resLim := p.ResourceLim() + if resLim.CPU == nil || resLim.CPU.Quota == nil { return 0 } - conf := infra.config.Spec - if conf != nil && conf.Linux != nil && conf.Linux.Resources != nil && conf.Linux.Resources.CPU != nil && conf.Linux.Resources.CPU.Period != nil { - return *conf.Linux.Resources.CPU.Period + return *resLim.CPU.Quota +} + +// MemoryLimit returns the pod Memory Limit +func (p *Pod) MemoryLimit() uint64 { + resLim := p.ResourceLim() + if resLim.Memory == nil || resLim.Memory.Limit == nil { + return 0 } - return 0 + return uint64(*resLim.Memory.Limit) } -// CPUQuota returns the pod CPU quota -func (p *Pod) CPUQuota() int64 { - if p.state.InfraContainerID == "" { +// MemorySwap returns the pod Memory swap limit +func (p *Pod) MemorySwap() uint64 { + resLim := p.ResourceLim() + if resLim.Memory == nil || resLim.Memory.Swap == nil { return 0 } - infra, err := p.runtime.GetContainer(p.state.InfraContainerID) - if err != nil { + return uint64(*resLim.Memory.Swap) +} + +// BlkioWeight returns the pod blkio weight +func (p *Pod) BlkioWeight() uint64 { + resLim := p.ResourceLim() + if resLim.BlockIO == nil || resLim.BlockIO.Weight == nil { return 0 } - conf := infra.config.Spec - if conf != nil && conf.Linux != nil && conf.Linux.Resources != nil && conf.Linux.Resources.CPU != nil && conf.Linux.Resources.CPU.Quota != nil { - return *conf.Linux.Resources.CPU.Quota + return uint64(*resLim.BlockIO.Weight) +} + +// CPUSetMems returns the pod CPUSet memory nodes +func (p *Pod) CPUSetMems() string { + resLim := p.ResourceLim() + if resLim.CPU == nil { + return "" } - return 0 + return resLim.CPU.Mems } -// MemoryLimit returns the pod Memory Limit -func (p *Pod) MemoryLimit() uint64 { - if p.state.InfraContainerID == "" { +// CPUShares returns the pod cpu shares +func (p *Pod) CPUShares() uint64 { + resLim := p.ResourceLim() + if resLim.CPU == nil || resLim.CPU.Shares == nil { return 0 } - infra, err := p.runtime.GetContainer(p.state.InfraContainerID) + return *resLim.CPU.Shares +} + +// BlkiThrottleReadBps returns the pod throttle devices +func (p *Pod) BlkiThrottleReadBps() []define.InspectBlkioThrottleDevice { + resLim := p.ResourceLim() + if resLim.BlockIO == nil || resLim.BlockIO.ThrottleReadBpsDevice == nil { + return []define.InspectBlkioThrottleDevice{} + } + devs, err := blkioDeviceThrottle(nil, resLim.BlockIO.ThrottleReadBpsDevice) if err != nil { - return 0 + return []define.InspectBlkioThrottleDevice{} } - conf := infra.config.Spec - if conf != nil && conf.Linux != nil && conf.Linux.Resources != nil && conf.Linux.Resources.Memory != nil && conf.Linux.Resources.Memory.Limit != nil { - val := *conf.Linux.Resources.Memory.Limit - return uint64(val) + return devs +} + +// BlkiThrottleWriteBps returns the pod throttle devices +func (p *Pod) BlkiThrottleWriteBps() []define.InspectBlkioThrottleDevice { + resLim := p.ResourceLim() + if resLim.BlockIO == nil || resLim.BlockIO.ThrottleWriteBpsDevice == nil { + return []define.InspectBlkioThrottleDevice{} + } + devs, err := blkioDeviceThrottle(nil, resLim.BlockIO.ThrottleWriteBpsDevice) + if err != nil { + return []define.InspectBlkioThrottleDevice{} } - return 0 + return devs } // NetworkMode returns the Network mode given by the user ex: pod, private... diff --git a/libpod/pod_api.go b/libpod/pod_api.go index c1d54d55e8..29964ae95a 100644 --- a/libpod/pod_api.go +++ b/libpod/pod_api.go @@ -659,7 +659,6 @@ func (p *Pod) Inspect() (*define.InspectPodData, error) { var infraConfig *define.InspectPodInfraConfig var inspectMounts []define.InspectMount var devices []define.InspectDevice - var deviceLimits []define.InspectBlkioThrottleDevice var infraSecurity []string if p.state.InfraContainerID != "" { infra, err := p.runtime.GetContainer(p.state.InfraContainerID) @@ -683,18 +682,6 @@ func (p *Pod) Inspect() (*define.InspectPodData, error) { if err != nil { return nil, err } - var nodes map[string]string - devices, err = infra.GetDevices(false, *infra.config.Spec, nodes) - if err != nil { - return nil, err - } - spec := infra.config.Spec - if spec.Linux != nil && spec.Linux.Resources != nil && spec.Linux.Resources.BlockIO != nil { - deviceLimits, err = blkioDeviceThrottle(nodes, spec.Linux.Resources.BlockIO.ThrottleReadBpsDevice) - if err != nil { - return nil, err - } - } if len(infra.config.ContainerNetworkConfig.DNSServer) > 0 { infraConfig.DNSServer = make([]string, 0, len(infra.config.ContainerNetworkConfig.DNSServer)) @@ -731,33 +718,38 @@ func (p *Pod) Inspect() (*define.InspectPodData, error) { } inspectData := define.InspectPodData{ - ID: p.ID(), - Name: p.Name(), - Namespace: p.Namespace(), - Created: p.CreatedTime(), - CreateCommand: p.config.CreateCommand, - ExitPolicy: string(p.config.ExitPolicy), - State: podState, - Hostname: p.config.Hostname, - Labels: p.Labels(), - CreateCgroup: p.config.UsePodCgroup, - CgroupParent: p.CgroupParent(), - CgroupPath: p.state.CgroupPath, - CreateInfra: infraConfig != nil, - InfraContainerID: p.state.InfraContainerID, - InfraConfig: infraConfig, - SharedNamespaces: sharesNS, - NumContainers: uint(len(containers)), - Containers: ctrs, - CPUSetCPUs: p.ResourceLim().CPU.Cpus, - CPUPeriod: p.CPUPeriod(), - CPUQuota: p.CPUQuota(), - MemoryLimit: p.MemoryLimit(), - Mounts: inspectMounts, - Devices: devices, - BlkioDeviceReadBps: deviceLimits, - VolumesFrom: p.VolumesFrom(), - SecurityOpts: infraSecurity, + ID: p.ID(), + Name: p.Name(), + Namespace: p.Namespace(), + Created: p.CreatedTime(), + CreateCommand: p.config.CreateCommand, + ExitPolicy: string(p.config.ExitPolicy), + State: podState, + Hostname: p.config.Hostname, + Labels: p.Labels(), + CreateCgroup: p.config.UsePodCgroup, + CgroupParent: p.CgroupParent(), + CgroupPath: p.state.CgroupPath, + CreateInfra: infraConfig != nil, + InfraContainerID: p.state.InfraContainerID, + InfraConfig: infraConfig, + SharedNamespaces: sharesNS, + NumContainers: uint(len(containers)), + Containers: ctrs, + CPUSetCPUs: p.ResourceLim().CPU.Cpus, + CPUPeriod: p.CPUPeriod(), + CPUQuota: p.CPUQuota(), + MemoryLimit: p.MemoryLimit(), + Mounts: inspectMounts, + Devices: devices, + BlkioDeviceReadBps: p.BlkiThrottleReadBps(), + VolumesFrom: p.VolumesFrom(), + SecurityOpts: infraSecurity, + MemorySwap: p.MemorySwap(), + BlkioWeight: p.BlkioWeight(), + CPUSetMems: p.CPUSetMems(), + BlkioDeviceWriteBps: p.BlkiThrottleWriteBps(), + CPUShares: p.CPUShares(), } return &inspectData, nil diff --git a/libpod/runtime_pod_linux.go b/libpod/runtime_pod_linux.go index 75ff24e410..3bb22ec262 100644 --- a/libpod/runtime_pod_linux.go +++ b/libpod/runtime_pod_linux.go @@ -80,7 +80,7 @@ func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, option p.InfraContainerSpec.CgroupParent = pod.state.CgroupPath // cgroupfs + rootless = permission denied when creating the cgroup. if !rootless.IsRootless() { - res, err := GetLimits(p.InfraContainerSpec.ResourceLimits) + res, err := GetLimits(p.ResourceLimits) if err != nil { return nil, err } @@ -113,7 +113,7 @@ func (r *Runtime) NewPod(ctx context.Context, p specgen.PodSpecGenerator, option // If we are set to use pod cgroups, set the cgroup parent that // all containers in the pod will share if pod.config.UsePodCgroup { - cgroupPath, err := systemdSliceFromPath(pod.config.CgroupParent, fmt.Sprintf("libpod_pod_%s", pod.ID()), p.InfraContainerSpec.ResourceLimits) + cgroupPath, err := systemdSliceFromPath(pod.config.CgroupParent, fmt.Sprintf("libpod_pod_%s", pod.ID()), p.ResourceLimits) if err != nil { return nil, fmt.Errorf("unable to create pod cgroup for pod %s: %w", pod.ID(), err) } diff --git a/pkg/specgen/generate/container.go b/pkg/specgen/generate/container.go index 2248c92351..9bb7caace6 100644 --- a/pkg/specgen/generate/container.go +++ b/pkg/specgen/generate/container.go @@ -302,60 +302,6 @@ func CompleteSpec(ctx context.Context, r *libpod.Runtime, s *specgen.SpecGenerat return warnings, nil } -// FinishThrottleDevices takes the temporary representation of the throttle -// devices in the specgen and looks up the major and major minors. it then -// sets the throttle devices proper in the specgen -func FinishThrottleDevices(s *specgen.SpecGenerator) error { - if bps := s.ThrottleReadBpsDevice; len(bps) > 0 { - for k, v := range bps { - statT := unix.Stat_t{} - if err := unix.Stat(k, &statT); err != nil { - return err - } - v.Major = (int64(unix.Major(uint64(statT.Rdev)))) //nolint: unconvert - v.Minor = (int64(unix.Minor(uint64(statT.Rdev)))) //nolint: unconvert - if s.ResourceLimits.BlockIO == nil { - s.ResourceLimits.BlockIO = new(spec.LinuxBlockIO) - } - s.ResourceLimits.BlockIO.ThrottleReadBpsDevice = append(s.ResourceLimits.BlockIO.ThrottleReadBpsDevice, v) - } - } - if bps := s.ThrottleWriteBpsDevice; len(bps) > 0 { - for k, v := range bps { - statT := unix.Stat_t{} - if err := unix.Stat(k, &statT); err != nil { - return err - } - v.Major = (int64(unix.Major(uint64(statT.Rdev)))) //nolint: unconvert - v.Minor = (int64(unix.Minor(uint64(statT.Rdev)))) //nolint: unconvert - s.ResourceLimits.BlockIO.ThrottleWriteBpsDevice = append(s.ResourceLimits.BlockIO.ThrottleWriteBpsDevice, v) - } - } - if iops := s.ThrottleReadIOPSDevice; len(iops) > 0 { - for k, v := range iops { - statT := unix.Stat_t{} - if err := unix.Stat(k, &statT); err != nil { - return err - } - v.Major = (int64(unix.Major(uint64(statT.Rdev)))) //nolint: unconvert - v.Minor = (int64(unix.Minor(uint64(statT.Rdev)))) //nolint: unconvert - s.ResourceLimits.BlockIO.ThrottleReadIOPSDevice = append(s.ResourceLimits.BlockIO.ThrottleReadIOPSDevice, v) - } - } - if iops := s.ThrottleWriteIOPSDevice; len(iops) > 0 { - for k, v := range iops { - statT := unix.Stat_t{} - if err := unix.Stat(k, &statT); err != nil { - return err - } - v.Major = (int64(unix.Major(uint64(statT.Rdev)))) //nolint: unconvert - v.Minor = (int64(unix.Minor(uint64(statT.Rdev)))) //nolint: unconvert - s.ResourceLimits.BlockIO.ThrottleWriteIOPSDevice = append(s.ResourceLimits.BlockIO.ThrottleWriteIOPSDevice, v) - } - } - return nil -} - // ConfigToSpec takes a completed container config and converts it back into a specgenerator for purposes of cloning an existing container func ConfigToSpec(rt *libpod.Runtime, specg *specgen.SpecGenerator, contaierID string) (*libpod.Container, *libpod.InfraInherit, error) { c, err := rt.LookupContainer(contaierID) @@ -540,3 +486,63 @@ func mapSecurityConfig(c *libpod.ContainerConfig, s *specgen.SpecGenerator) { s.Groups = c.Groups s.HostUsers = c.HostUsers } + +// FinishThrottleDevices takes the temporary representation of the throttle +// devices in the specgen and looks up the major and major minors. it then +// sets the throttle devices proper in the specgen +func FinishThrottleDevices(s *specgen.SpecGenerator) error { + if s.ResourceLimits == nil { + s.ResourceLimits = &spec.LinuxResources{} + } + if s.ResourceLimits.BlockIO == nil { + s.ResourceLimits.BlockIO = &spec.LinuxBlockIO{} + } + if bps := s.ThrottleReadBpsDevice; len(bps) > 0 { + for k, v := range bps { + statT := unix.Stat_t{} + if err := unix.Stat(k, &statT); err != nil { + return fmt.Errorf("could not parse throttle device at %s: %w", k, err) + } + v.Major = (int64(unix.Major(uint64(statT.Rdev)))) //nolint: unconvert + v.Minor = (int64(unix.Minor(uint64(statT.Rdev)))) //nolint: unconvert + if s.ResourceLimits.BlockIO == nil { + s.ResourceLimits.BlockIO = new(spec.LinuxBlockIO) + } + s.ResourceLimits.BlockIO.ThrottleReadBpsDevice = append(s.ResourceLimits.BlockIO.ThrottleReadBpsDevice, v) + } + } + if bps := s.ThrottleWriteBpsDevice; len(bps) > 0 { + for k, v := range bps { + statT := unix.Stat_t{} + if err := unix.Stat(k, &statT); err != nil { + return fmt.Errorf("could not parse throttle device at %s: %w", k, err) + } + v.Major = (int64(unix.Major(uint64(statT.Rdev)))) //nolint: unconvert + v.Minor = (int64(unix.Minor(uint64(statT.Rdev)))) //nolint: unconvert + s.ResourceLimits.BlockIO.ThrottleWriteBpsDevice = append(s.ResourceLimits.BlockIO.ThrottleWriteBpsDevice, v) + } + } + if iops := s.ThrottleReadIOPSDevice; len(iops) > 0 { + for k, v := range iops { + statT := unix.Stat_t{} + if err := unix.Stat(k, &statT); err != nil { + return fmt.Errorf("could not parse throttle device at %s: %w", k, err) + } + v.Major = (int64(unix.Major(uint64(statT.Rdev)))) //nolint: unconvert + v.Minor = (int64(unix.Minor(uint64(statT.Rdev)))) //nolint: unconvert + s.ResourceLimits.BlockIO.ThrottleReadIOPSDevice = append(s.ResourceLimits.BlockIO.ThrottleReadIOPSDevice, v) + } + } + if iops := s.ThrottleWriteIOPSDevice; len(iops) > 0 { + for k, v := range iops { + statT := unix.Stat_t{} + if err := unix.Stat(k, &statT); err != nil { + return fmt.Errorf("could not parse throttle device at %s: %w", k, err) + } + v.Major = (int64(unix.Major(uint64(statT.Rdev)))) //nolint: unconvert + v.Minor = (int64(unix.Minor(uint64(statT.Rdev)))) //nolint: unconvert + s.ResourceLimits.BlockIO.ThrottleWriteIOPSDevice = append(s.ResourceLimits.BlockIO.ThrottleWriteIOPSDevice, v) + } + } + return nil +} diff --git a/pkg/specgen/generate/container_create.go b/pkg/specgen/generate/container_create.go index 51d290bb4b..389900820c 100644 --- a/pkg/specgen/generate/container_create.go +++ b/pkg/specgen/generate/container_create.go @@ -55,9 +55,6 @@ func MakeContainer(ctx context.Context, rt *libpod.Runtime, s *specgen.SpecGener } } - if err := FinishThrottleDevices(s); err != nil { - return nil, nil, nil, err - } // Set defaults for unset namespaces if s.PidNS.IsDefault() { defaultNS, err := GetDefaultNamespaceMode("pid", rtc, pod) diff --git a/pkg/specgen/generate/oci.go b/pkg/specgen/generate/oci.go index bb5f2d0ec0..f59fe10116 100644 --- a/pkg/specgen/generate/oci.go +++ b/pkg/specgen/generate/oci.go @@ -309,6 +309,17 @@ func SpecGenToOCI(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Runt } g.Config.Linux.Resources = s.ResourceLimits } + + weightDevices, err := WeightDevices(s.WeightDevice) + if err != nil { + return nil, err + } + if len(weightDevices) > 0 { + for _, dev := range weightDevices { + g.AddLinuxResourcesBlockIOWeightDevice(dev.Major, dev.Minor, *dev.Weight) + } + } + // Devices // set the default rule at the beginning of device configuration if !inUserNS && !s.Privileged { @@ -345,14 +356,6 @@ func SpecGenToOCI(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Runt } } - for k, v := range s.WeightDevice { - statT := unix.Stat_t{} - if err := unix.Stat(k, &statT); err != nil { - return nil, fmt.Errorf("failed to inspect '%s' in --blkio-weight-device: %w", k, err) - } - g.AddLinuxResourcesBlockIOWeightDevice((int64(unix.Major(uint64(statT.Rdev)))), (int64(unix.Minor(uint64(statT.Rdev)))), *v.Weight) //nolint: unconvert - } - BlockAccessToKernelFilesystems(s.Privileged, s.PidNS.IsHost(), s.Mask, s.Unmask, &g) g.ClearProcessEnv() @@ -413,3 +416,19 @@ func SpecGenToOCI(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Runt return configSpec, nil } + +func WeightDevices(wtDevices map[string]spec.LinuxWeightDevice) ([]spec.LinuxWeightDevice, error) { + devs := []spec.LinuxWeightDevice{} + for k, v := range wtDevices { + statT := unix.Stat_t{} + if err := unix.Stat(k, &statT); err != nil { + return nil, fmt.Errorf("failed to inspect '%s' in --blkio-weight-device: %w", k, err) + } + dev := new(spec.LinuxWeightDevice) + dev.Major = (int64(unix.Major(uint64(statT.Rdev)))) //nolint: unconvert + dev.Minor = (int64(unix.Minor(uint64(statT.Rdev)))) //nolint: unconvert + dev.Weight = v.Weight + devs = append(devs, *dev) + } + return devs, nil +} diff --git a/pkg/specgen/generate/pod_create.go b/pkg/specgen/generate/pod_create.go index 212d613fe9..4e6362c9b0 100644 --- a/pkg/specgen/generate/pod_create.go +++ b/pkg/specgen/generate/pod_create.go @@ -13,6 +13,7 @@ import ( "github.com/containers/podman/v4/pkg/domain/entities" "github.com/containers/podman/v4/pkg/specgen" "github.com/containers/podman/v4/pkg/specgenutil" + "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" ) @@ -21,6 +22,10 @@ func MakePod(p *entities.PodSpec, rt *libpod.Runtime) (*libpod.Pod, error) { return nil, err } + if p.PodSpecGen.ResourceLimits == nil { + p.PodSpecGen.ResourceLimits = &specs.LinuxResources{} + } + if !p.PodSpecGen.NoInfra { imageName, err := PullOrBuildInfraImage(rt, p.PodSpecGen.InfraImage) if err != nil { @@ -38,10 +43,33 @@ func MakePod(p *entities.PodSpec, rt *libpod.Runtime) (*libpod.Pod, error) { } } + if !p.PodSpecGen.NoInfra { + err := FinishThrottleDevices(p.PodSpecGen.InfraContainerSpec) + if err != nil { + return nil, err + } + if p.PodSpecGen.InfraContainerSpec.ResourceLimits.BlockIO != nil { + p.PodSpecGen.ResourceLimits.BlockIO = p.PodSpecGen.InfraContainerSpec.ResourceLimits.BlockIO + } + + weightDevices, err := WeightDevices(p.PodSpecGen.InfraContainerSpec.WeightDevice) + if err != nil { + return nil, err + } + + if p.PodSpecGen.ResourceLimits != nil && len(weightDevices) > 0 { + if p.PodSpecGen.ResourceLimits.BlockIO == nil { + p.PodSpecGen.ResourceLimits.BlockIO = &specs.LinuxBlockIO{} + } + p.PodSpecGen.ResourceLimits.BlockIO.WeightDevice = weightDevices + } + } + options, err := createPodOptions(&p.PodSpecGen) if err != nil { return nil, err } + pod, err := rt.NewPod(context.Background(), p.PodSpecGen, options...) if err != nil { return nil, err @@ -55,6 +83,11 @@ func MakePod(p *entities.PodSpec, rt *libpod.Runtime) (*libpod.Pod, error) { return nil, err } p.PodSpecGen.InfraContainerSpec.User = "" // infraSpec user will get incorrectly assigned via the container creation process, overwrite here + // infra's resource limits are used as a parsing tool, + // we do not want infra to get these resources in its cgroup + // make sure of that here. + p.PodSpecGen.InfraContainerSpec.ResourceLimits = nil + p.PodSpecGen.InfraContainerSpec.WeightDevice = nil rtSpec, spec, opts, err := MakeContainer(context.Background(), rt, p.PodSpecGen.InfraContainerSpec, false, nil) if err != nil { return nil, err @@ -122,6 +155,10 @@ func createPodOptions(p *specgen.PodSpecGenerator) ([]libpod.PodCreateOption, er options = append(options, libpod.WithPodHostname(p.Hostname)) } + if p.ResourceLimits != nil { + options = append(options, libpod.WithPodResources(*p.ResourceLimits)) + } + options = append(options, libpod.WithPodExitPolicy(p.ExitPolicy)) return options, nil diff --git a/pkg/specgenutil/specgen.go b/pkg/specgenutil/specgen.go index 9a7d50947f..4ab019b5ba 100644 --- a/pkg/specgenutil/specgen.go +++ b/pkg/specgenutil/specgen.go @@ -74,6 +74,12 @@ func getCPULimits(c *entities.ContainerCreateOptions) *specs.LinuxCPU { func getIOLimits(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions) (*specs.LinuxBlockIO, error) { var err error io := &specs.LinuxBlockIO{} + if s.ResourceLimits == nil { + s.ResourceLimits = &specs.LinuxResources{} + } + if s.ResourceLimits.BlockIO == nil { + s.ResourceLimits.BlockIO = &specs.LinuxBlockIO{} + } hasLimits := false if b := c.BlkIOWeight; len(b) > 0 { u, err := strconv.ParseUint(b, 10, 16) @@ -82,6 +88,7 @@ func getIOLimits(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions) ( } nu := uint16(u) io.Weight = &nu + s.ResourceLimits.BlockIO.Weight = &nu hasLimits = true } @@ -96,6 +103,7 @@ func getIOLimits(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions) ( if s.ThrottleReadBpsDevice, err = parseThrottleBPSDevices(bps); err != nil { return nil, err } + hasLimits = true } @@ -123,6 +131,8 @@ func getIOLimits(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions) ( if !hasLimits { return nil, nil } + io = s.ResourceLimits.BlockIO + return io, nil } @@ -509,7 +519,7 @@ func FillOutSpecGen(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions return err } } - if s.ResourceLimits.BlockIO == nil || (len(c.BlkIOWeight) != 0 || len(c.BlkIOWeightDevice) != 0) { + if s.ResourceLimits.BlockIO == nil || (len(c.BlkIOWeight) != 0 || len(c.BlkIOWeightDevice) != 0 || len(c.DeviceReadBPs) != 0 || len(c.DeviceWriteBPs) != 0) { s.ResourceLimits.BlockIO, err = getIOLimits(s, c) if err != nil { return err diff --git a/test/system/200-pod.bats b/test/system/200-pod.bats index 7b7f5e8bbf..b9c668993d 100644 --- a/test/system/200-pod.bats +++ b/test/system/200-pod.bats @@ -2,12 +2,17 @@ load helpers +LOOPDEVICE= + # This is a long ugly way to clean up pods and remove the pause image function teardown() { run_podman pod rm -f -t 0 -a run_podman rm -f -t 0 -a run_podman rmi --ignore $(pause_image) basic_teardown + if [[ -n "$LOOPDEVICE" ]]; then + losetup -d $LOOPDEVICE + fi } @@ -474,31 +479,57 @@ spec: @test "pod resource limits" { skip_if_remote "resource limits only implemented on non-remote" - if is_rootless; then + if is_rootless || ! is_cgroupsv2; then skip "only meaningful for rootful" fi - local name1="resources1" - run_podman --cgroup-manager=systemd pod create --name=$name1 --cpus=5 --memory=10m - run_podman --cgroup-manager=systemd pod start $name1 - run_podman pod inspect --format '{{.CgroupPath}}' $name1 - local path1="$output" - local actual1=$(< /sys/fs/cgroup/$path1/cpu.max) - is "$actual1" "500000 100000" "resource limits set properly" - local actual2=$(< /sys/fs/cgroup/$path1/memory.max) - is "$actual2" "10485760" "resource limits set properly" - run_podman pod --cgroup-manager=systemd rm -f $name1 - - local name2="resources2" - run_podman --cgroup-manager=cgroupfs pod create --cpus=5 --memory=10m --name=$name2 - run_podman --cgroup-manager=cgroupfs pod start $name2 - run_podman pod inspect --format '{{.CgroupPath}}' $name2 - local path2="$output" - local actual2=$(< /sys/fs/cgroup/$path2/cpu.max) - is "$actual2" "500000 100000" "resource limits set properly" - local actual2=$(< /sys/fs/cgroup/$path2/memory.max) - is "$actual2" "10485760" "resource limits set properly" - run_podman --cgroup-manager=cgroupfs pod rm $name2 + # create loopback device + lofile=${PODMAN_TMPDIR}/disk.img + fallocate -l 1k ${lofile} + LOOPDEVICE=$(losetup --show -f $lofile) + + # tr needed because losetup seems to use %2d + lomajmin=$(losetup -l --noheadings --output MAJ:MIN $LOOPDEVICE | tr -d ' ') + run grep -w bfq /sys/block/$(basename ${LOOPDEVICE})/queue/scheduler + if [ $status -ne 0 ]; then + skip "BFQ scheduler is not supported on the system" + if [ -f ${lofile} ]; then + run_podman '?' rm -t 0 --all --force --ignore + + while read path dev; do + if [[ "$path" == "$lofile" ]]; then + losetup -d $dev + fi + done < <(losetup -l --noheadings --output BACK-FILE,NAME) + rm ${lofile} + fi + fi + echo bfq > /sys/block/$(basename ${LOOPDEVICE})/queue/scheduler + + expected_limits=" +cpu.max | 500000 100000 +memory.max | 5242880 +memory.swap.max | 1068498944 +io.max | $lomajmin rbps=1048576 wbps=1048576 riops=max wiops=max +" + + for cgm in systemd cgroupfs; do + local name=resources-$cgm + run_podman --cgroup-manager=$cgm pod create --name=$name --cpus=5 --memory=5m --memory-swap=1g --cpu-shares=1000 --cpuset-cpus=0 --cpuset-mems=0 --device-read-bps=${LOOPDEVICE}:1mb --device-write-bps=${LOOPDEVICE}:1mb --blkio-weight-device=${LOOPDEVICE}:123 --blkio-weight=50 + run_podman --cgroup-manager=$cgm pod start $name + run_podman pod inspect --format '{{.CgroupPath}}' $name + local cgroup_path="$output" + + while read unit expect; do + local actual=$(< /sys/fs/cgroup/$cgroup_path/$unit) + is "$actual" "$expect" "resource limit under $cgm: $unit" + done < <(parse_table "$expected_limits") + run_podman --cgroup-manager=$cgm pod rm -f $name + done + + # Clean up, and prevent duplicate cleanup in teardown + losetup -d $LOOPDEVICE + LOOPDEVICE= } @test "podman pod ps doesn't race with pod rm" {