Skip to content

Commit

Permalink
Adds Windows resource limits support
Browse files Browse the repository at this point in the history
This will allow running Windows Containers to have their resource
limits updated through containerd. The CPU resource limits support
has been added for Windows Server 20H2 and newer, on older versions
hcsshim will raise an Unimplemented error.

Signed-off-by: Claudiu Belu <[email protected]>
  • Loading branch information
claudiubelu committed Sep 25, 2021
1 parent 5b29542 commit 2bc77b8
Show file tree
Hide file tree
Showing 10 changed files with 233 additions and 159 deletions.
4 changes: 2 additions & 2 deletions integration/container_update_resources_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func TestUpdateContainerResources(t *testing.T) {
t.Log("Update container memory limit after created")
err = runtimeService.UpdateContainerResources(cn, &runtime.LinuxContainerResources{
MemoryLimitInBytes: 400 * 1024 * 1024,
})
}, nil)
require.NoError(t, err)

t.Log("Check memory limit in container OCI spec")
Expand All @@ -90,7 +90,7 @@ func TestUpdateContainerResources(t *testing.T) {
t.Log("Update container memory limit after started")
err = runtimeService.UpdateContainerResources(cn, &runtime.LinuxContainerResources{
MemoryLimitInBytes: 800 * 1024 * 1024,
})
}, nil)
require.NoError(t, err)

t.Log("Check memory limit in container OCI spec")
Expand Down
2 changes: 1 addition & 1 deletion integration/cri-api/pkg/apis/services.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ type ContainerManager interface {
// ContainerStatus returns the status of the container.
ContainerStatus(containerID string, opts ...grpc.CallOption) (*runtimeapi.ContainerStatus, error)
// UpdateContainerResources updates the cgroup resources for the container.
UpdateContainerResources(containerID string, resources *runtimeapi.LinuxContainerResources, opts ...grpc.CallOption) error
UpdateContainerResources(containerID string, resources *runtimeapi.LinuxContainerResources, windowsResources *runtimeapi.WindowsContainerResources, opts ...grpc.CallOption) error
// ExecSync executes a command in the container, and returns the stdout output.
// If command exits with a non-zero exit code, an error is returned.
ExecSync(containerID string, cmd []string, timeout time.Duration, opts ...grpc.CallOption) (stdout []byte, stderr []byte, err error)
Expand Down
3 changes: 2 additions & 1 deletion integration/remote/remote_runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -361,14 +361,15 @@ func (r *RuntimeService) ContainerStatus(containerID string, opts ...grpc.CallOp
}

// UpdateContainerResources updates a containers resource config
func (r *RuntimeService) UpdateContainerResources(containerID string, resources *runtimeapi.LinuxContainerResources, opts ...grpc.CallOption) error {
func (r *RuntimeService) UpdateContainerResources(containerID string, resources *runtimeapi.LinuxContainerResources, windowsResources *runtimeapi.WindowsContainerResources, opts ...grpc.CallOption) error {
klog.V(10).Infof("[RuntimeService] UpdateContainerResources (containerID=%v, timeout=%v)", containerID, r.timeout)
ctx, cancel := getContextWithTimeout(r.timeout)
defer cancel()

_, err := r.runtimeClient.UpdateContainerResources(ctx, &runtimeapi.UpdateContainerResourcesRequest{
ContainerId: containerID,
Linux: resources,
Windows: windowsResources,
}, opts...)
if err != nil {
klog.Errorf("UpdateContainerResources %q from runtime service failed: %v", containerID, err)
Expand Down
9 changes: 6 additions & 3 deletions integration/truncindex_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,15 @@ func TestTruncIndex(t *testing.T) {
require.NoError(t, err)
assert.Equal(t, cn, cStats.Attributes.Id)

t.Logf("Update container memory limit after started")
if goruntime.GOOS != "windows" {
// TODO(claudiub): remove this when UpdateContainerResources works on running Windows Containers.
// https://github.com/containerd/containerd/issues/5187
t.Logf("Update container memory limit after started")
err = runtimeService.UpdateContainerResources(cnTruncIndex, &runtimeapi.LinuxContainerResources{
MemoryLimitInBytes: 50 * 1024 * 1024,
}, nil)
assert.NoError(t, err)
} else {
err = runtimeService.UpdateContainerResources(cnTruncIndex, nil, &runtimeapi.WindowsContainerResources{
MemoryLimitInBytes: 50 * 1024 * 1024,
})
assert.NoError(t, err)
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/cri/opts/spec_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,9 +174,6 @@ func WithWindowsResources(resources *runtime.WindowsContainerResources) oci.Spec
if s.Windows.Resources == nil {
s.Windows.Resources = &runtimespec.WindowsResources{}
}
if s.Windows.Resources.CPU == nil {
s.Windows.Resources.CPU = &runtimespec.WindowsCPUResources{}
}
if s.Windows.Resources.Memory == nil {
s.Windows.Resources.Memory = &runtimespec.WindowsMemoryResources{}
}
Expand All @@ -187,6 +184,9 @@ func WithWindowsResources(resources *runtime.WindowsContainerResources) oci.Spec
max = uint16(resources.GetCpuMaximum())
limit = uint64(resources.GetMemoryLimitInBytes())
)
if s.Windows.Resources.CPU == nil && (count != 0 || shares != 0 || max != 0) {
s.Windows.Resources.CPU = &runtimespec.WindowsCPUResources{}
}
if count != 0 {
s.Windows.Resources.CPU.Count = &count
}
Expand Down
131 changes: 131 additions & 0 deletions pkg/cri/server/container_update_resources.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
//go:build !darwin && !freebsd
// +build !darwin,!freebsd

/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package server

import (
gocontext "context"

"github.com/containerd/containerd"
"github.com/containerd/containerd/containers"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/log"
"github.com/containerd/typeurl"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"golang.org/x/net/context"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"

containerstore "github.com/containerd/containerd/pkg/cri/store/container"
ctrdutil "github.com/containerd/containerd/pkg/cri/util"
)

// UpdateContainerResources updates ContainerConfig of the container.
func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.UpdateContainerResourcesRequest) (retRes *runtime.UpdateContainerResourcesResponse, retErr error) {
container, err := c.containerStore.Get(r.GetContainerId())
if err != nil {
return nil, errors.Wrap(err, "failed to find container")
}
// Update resources in status update transaction, so that:
// 1) There won't be race condition with container start.
// 2) There won't be concurrent resource update to the same container.
if err := container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
return status, c.updateContainerResources(ctx, container, r, status)
}); err != nil {
return nil, errors.Wrap(err, "failed to update resources")
}
return &runtime.UpdateContainerResourcesResponse{}, nil
}

func (c *criService) updateContainerResources(ctx context.Context,
cntr containerstore.Container,
r *runtime.UpdateContainerResourcesRequest,
status containerstore.Status) (retErr error) {
id := cntr.ID
// Do not update the container when there is a removal in progress.
if status.Removing {
return errors.Errorf("container %q is in removing state", id)
}

// Update container spec. If the container is not started yet, updating
// spec makes sure that the resource limits are correct when start;
// if the container is already started, updating spec is still required,
// the spec will become our source of truth for resource limits.
oldSpec, err := cntr.Container.Spec(ctx)
if err != nil {
return errors.Wrap(err, "failed to get container spec")
}
newSpec, err := updateOCIResource(ctx, oldSpec, r, c.config)
if err != nil {
return errors.Wrap(err, "failed to update resource in spec")
}

if err := updateContainerSpec(ctx, cntr.Container, newSpec); err != nil {
return err
}
defer func() {
if retErr != nil {
deferCtx, deferCancel := ctrdutil.DeferContext()
defer deferCancel()
// Reset spec on error.
if err := updateContainerSpec(deferCtx, cntr.Container, oldSpec); err != nil {
log.G(ctx).WithError(err).Errorf("Failed to update spec %+v for container %q", oldSpec, id)
}
}
}()

// If container is not running, only update spec is enough, new resource
// limit will be applied when container start.
if status.State() != runtime.ContainerState_CONTAINER_RUNNING {
return nil
}

task, err := cntr.Container.Task(ctx, nil)
if err != nil {
if errdefs.IsNotFound(err) {
// Task exited already.
return nil
}
return errors.Wrap(err, "failed to get task")
}
// newSpec.Linux / newSpec.Windows won't be nil
if err := task.Update(ctx, containerd.WithResources(getResources(newSpec))); err != nil {
if errdefs.IsNotFound(err) {
// Task exited already.
return nil
}
return errors.Wrap(err, "failed to update resources")
}
return nil
}

// updateContainerSpec updates container spec.
func updateContainerSpec(ctx context.Context, cntr containerd.Container, spec *runtimespec.Spec) error {
any, err := typeurl.MarshalAny(spec)
if err != nil {
return errors.Wrapf(err, "failed to marshal spec %+v", spec)
}
if err := cntr.Update(ctx, func(ctx gocontext.Context, client *containerd.Client, c *containers.Container) error {
c.Spec = any
return nil
}); err != nil {
return errors.Wrap(err, "failed to update container spec")
}
return nil
}
116 changes: 9 additions & 107 deletions pkg/cri/server/container_update_resources_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,122 +17,20 @@
package server

import (
gocontext "context"

"github.com/containerd/containerd"
"github.com/containerd/containerd/containers"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/log"
"github.com/containerd/typeurl"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"golang.org/x/net/context"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"

criconfig "github.com/containerd/containerd/pkg/cri/config"
"github.com/containerd/containerd/pkg/cri/opts"
containerstore "github.com/containerd/containerd/pkg/cri/store/container"
"github.com/containerd/containerd/pkg/cri/util"
ctrdutil "github.com/containerd/containerd/pkg/cri/util"
)

// UpdateContainerResources updates ContainerConfig of the container.
func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.UpdateContainerResourcesRequest) (retRes *runtime.UpdateContainerResourcesResponse, retErr error) {
container, err := c.containerStore.Get(r.GetContainerId())
if err != nil {
return nil, errors.Wrap(err, "failed to find container")
}
// Update resources in status update transaction, so that:
// 1) There won't be race condition with container start.
// 2) There won't be concurrent resource update to the same container.
if err := container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
return status, c.updateContainerResources(ctx, container, r.GetLinux(), status)
}); err != nil {
return nil, errors.Wrap(err, "failed to update resources")
}
return &runtime.UpdateContainerResourcesResponse{}, nil
}

func (c *criService) updateContainerResources(ctx context.Context,
cntr containerstore.Container,
resources *runtime.LinuxContainerResources,
status containerstore.Status) (retErr error) {
id := cntr.ID
// Do not update the container when there is a removal in progress.
if status.Removing {
return errors.Errorf("container %q is in removing state", id)
}

// Update container spec. If the container is not started yet, updating
// spec makes sure that the resource limits are correct when start;
// if the container is already started, updating spec is still required,
// the spec will become our source of truth for resource limits.
oldSpec, err := cntr.Container.Spec(ctx)
if err != nil {
return errors.Wrap(err, "failed to get container spec")
}
newSpec, err := updateOCILinuxResource(ctx, oldSpec, resources,
c.config.TolerateMissingHugetlbController, c.config.DisableHugetlbController)
if err != nil {
return errors.Wrap(err, "failed to update resource in spec")
}

if err := updateContainerSpec(ctx, cntr.Container, newSpec); err != nil {
return err
}
defer func() {
if retErr != nil {
deferCtx, deferCancel := ctrdutil.DeferContext()
defer deferCancel()
// Reset spec on error.
if err := updateContainerSpec(deferCtx, cntr.Container, oldSpec); err != nil {
log.G(ctx).WithError(err).Errorf("Failed to update spec %+v for container %q", oldSpec, id)
}
}
}()
// updateOCIResource updates container resource limit.
func updateOCIResource(ctx context.Context, spec *runtimespec.Spec, r *runtime.UpdateContainerResourcesRequest,
config criconfig.Config) (*runtimespec.Spec, error) {

// If container is not running, only update spec is enough, new resource
// limit will be applied when container start.
if status.State() != runtime.ContainerState_CONTAINER_RUNNING {
return nil
}

task, err := cntr.Container.Task(ctx, nil)
if err != nil {
if errdefs.IsNotFound(err) {
// Task exited already.
return nil
}
return errors.Wrap(err, "failed to get task")
}
// newSpec.Linux won't be nil
if err := task.Update(ctx, containerd.WithResources(newSpec.Linux.Resources)); err != nil {
if errdefs.IsNotFound(err) {
// Task exited already.
return nil
}
return errors.Wrap(err, "failed to update resources")
}
return nil
}

// updateContainerSpec updates container spec.
func updateContainerSpec(ctx context.Context, cntr containerd.Container, spec *runtimespec.Spec) error {
any, err := typeurl.MarshalAny(spec)
if err != nil {
return errors.Wrapf(err, "failed to marshal spec %+v", spec)
}
if err := cntr.Update(ctx, func(ctx gocontext.Context, client *containerd.Client, c *containers.Container) error {
c.Spec = any
return nil
}); err != nil {
return errors.Wrap(err, "failed to update container spec")
}
return nil
}

// updateOCILinuxResource updates container resource limit.
func updateOCILinuxResource(ctx context.Context, spec *runtimespec.Spec, new *runtime.LinuxContainerResources,
tolerateMissingHugetlbController, disableHugetlbController bool) (*runtimespec.Spec, error) {
// Copy to make sure old spec is not changed.
var cloned runtimespec.Spec
if err := util.DeepCopy(&cloned, spec); err != nil {
Expand All @@ -141,8 +39,12 @@ func updateOCILinuxResource(ctx context.Context, spec *runtimespec.Spec, new *ru
if cloned.Linux == nil {
cloned.Linux = &runtimespec.Linux{}
}
if err := opts.WithResources(new, tolerateMissingHugetlbController, disableHugetlbController)(ctx, nil, nil, &cloned); err != nil {
if err := opts.WithResources(r.GetLinux(), config.TolerateMissingHugetlbController, config.DisableHugetlbController)(ctx, nil, nil, &cloned); err != nil {
return nil, errors.Wrap(err, "unable to set linux container resources")
}
return &cloned, nil
}

func getResources(spec *runtimespec.Spec) interface{} {
return spec.Linux.Resources
}
Loading

0 comments on commit 2bc77b8

Please sign in to comment.