From b746b22564298463ebf322d630488236b7277074 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 21 Dec 2021 13:28:04 +0000 Subject: [PATCH 1/2] Update go-criu to v5.3.0 Signed-off-by: Adrian Reber --- go.mod | 2 +- go.sum | 3 +- .../checkpoint-restore/go-criu/v5/Makefile | 9 +++- .../checkpoint-restore/go-criu/v5/features.go | 45 +++++++++++++++++++ .../checkpoint-restore/go-criu/v5/main.go | 14 +++--- vendor/modules.txt | 2 +- 6 files changed, 66 insertions(+), 9 deletions(-) create mode 100644 vendor/github.com/checkpoint-restore/go-criu/v5/features.go diff --git a/go.mod b/go.mod index 4286b1d817..9ae49eff01 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ require ( github.com/blang/semver v3.5.1+incompatible github.com/buger/goterm v0.0.0-20181115115552-c206103e1f37 github.com/checkpoint-restore/checkpointctl v0.0.0-20211204171957-54b4ebfdb681 - github.com/checkpoint-restore/go-criu/v5 v5.2.0 + github.com/checkpoint-restore/go-criu/v5 v5.3.0 github.com/container-orchestrated-devices/container-device-interface v0.0.0-20210325223243-f99e8b6c10b9 github.com/containernetworking/cni v1.0.1 github.com/containernetworking/plugins v1.0.1 diff --git a/go.sum b/go.sum index 0394697761..0597ab5c08 100644 --- a/go.sum +++ b/go.sum @@ -155,8 +155,9 @@ github.com/checkpoint-restore/checkpointctl v0.0.0-20211204171957-54b4ebfdb681 h github.com/checkpoint-restore/checkpointctl v0.0.0-20211204171957-54b4ebfdb681/go.mod h1:67kWC1PXQLR3lM/mmNnu3Kzn7K4TSWZAGUuQP1JSngk= github.com/checkpoint-restore/go-criu/v4 v4.1.0/go.mod h1:xUQBLp4RLc5zJtWY++yjOoMoB5lihDt7fai+75m+rGw= github.com/checkpoint-restore/go-criu/v5 v5.0.0/go.mod h1:cfwC0EG7HMUenopBsUf9d89JlCLQIfgVcNsNN0t6T2M= -github.com/checkpoint-restore/go-criu/v5 v5.2.0 h1:QwsRK9EdBr2kQr44DqSdBrP4dULp2+4EkqounYQOnF8= github.com/checkpoint-restore/go-criu/v5 v5.2.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E= +github.com/checkpoint-restore/go-criu/v5 v5.3.0 h1:wpFFOoomK3389ue2lAb0Boag6XPht5QYpipxmSNL4d8= +github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E= github.com/chzyer/logex v1.1.10 h1:Swpa1K6QvQznwJRcfTfQJmTE72DqScAa40E+fbHEXEE= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e h1:fY5BOSpyZCqRo5OhCuC+XN+r/bBCmeuuJtjz+bCNIf8= diff --git a/vendor/github.com/checkpoint-restore/go-criu/v5/Makefile b/vendor/github.com/checkpoint-restore/go-criu/v5/Makefile index 558e61453b..67c43a05b3 100644 --- a/vendor/github.com/checkpoint-restore/go-criu/v5/Makefile +++ b/vendor/github.com/checkpoint-restore/go-criu/v5/Makefile @@ -2,6 +2,11 @@ SHELL = /bin/bash GO ?= go CC ?= gcc COVERAGE_PATH ?= $(shell pwd)/.coverage +CRIU_FEATURE_MEM_TRACK = $(shell if criu check --feature mem_dirty_track > /dev/null; then echo 1; else echo 0; fi) +CRIU_FEATURE_LAZY_PAGES = $(shell if criu check --feature uffd-noncoop > /dev/null; then echo 1; else echo 0; fi) +CRIU_FEATURE_PIDFD_STORE = $(shell if criu check --feature pidfd_store > /dev/null; then echo 1; else echo 0; fi) + +export CRIU_FEATURE_MEM_TRACK CRIU_FEATURE_LAZY_PAGES CRIU_FEATURE_PIDFD_STORE all: build test phaul-test @@ -70,6 +75,8 @@ coverage: $(COVERAGE_BINARIES) $(TEST_PAYLOAD) test/phaul/phaul.coverage -test.coverprofile=coverprofile.integration.$$RANDOM -test.outputdir=${COVERAGE_PATH} COVERAGE $$PID; \ pkill -9 piggie; \ } + echo "mode: set" > .coverage/coverage.out && cat .coverage/coverprofile* | \ + grep -v mode: | sort -r | awk '{if($$1 != last) {print $$0;last=$$1}}' >> .coverage/coverage.out clean: @rm -f $(TEST_BINARIES) $(COVERAGE_BINARIES) codecov @@ -95,6 +102,6 @@ vendor: codecov: curl -Os https://uploader.codecov.io/latest/linux/codecov chmod +x codecov - ./codecov -f '.coverage/*' + ./codecov -f '.coverage/coverage.out' .PHONY: build test phaul-test test-bin clean lint vendor coverage codecov diff --git a/vendor/github.com/checkpoint-restore/go-criu/v5/features.go b/vendor/github.com/checkpoint-restore/go-criu/v5/features.go new file mode 100644 index 0000000000..c7127f9515 --- /dev/null +++ b/vendor/github.com/checkpoint-restore/go-criu/v5/features.go @@ -0,0 +1,45 @@ +package criu + +import ( + "fmt" + + "github.com/checkpoint-restore/go-criu/v5/rpc" +) + +// Feature checking in go-criu is based on the libcriu feature checking function. + +// Feature checking allows the user to check if CRIU supports +// certain features. There are CRIU features which do not depend +// on the version of CRIU but on kernel features or architecture. +// +// One example is memory tracking. Memory tracking can be disabled +// in the kernel or there are architectures which do not support +// it (aarch64 for example). By using the feature check a libcriu +// user can easily query CRIU if a certain feature is available. +// +// The features which should be checked can be marked in the +// structure 'struct criu_feature_check'. Each structure member +// that is set to true will result in CRIU checking for the +// availability of that feature in the current combination of +// CRIU/kernel/architecture. +// +// Available features will be set to true when the function +// returns successfully. Missing features will be set to false. + +func (c *Criu) FeatureCheck(features *rpc.CriuFeatures) (*rpc.CriuFeatures, error) { + resp, err := c.doSwrkWithResp( + rpc.CriuReqType_FEATURE_CHECK, + nil, + nil, + features, + ) + if err != nil { + return nil, err + } + + if resp.GetType() != rpc.CriuReqType_FEATURE_CHECK { + return nil, fmt.Errorf("Unexpected CRIU RPC response") + } + + return features, nil +} diff --git a/vendor/github.com/checkpoint-restore/go-criu/v5/main.go b/vendor/github.com/checkpoint-restore/go-criu/v5/main.go index 78811c309c..88b1b24587 100644 --- a/vendor/github.com/checkpoint-restore/go-criu/v5/main.go +++ b/vendor/github.com/checkpoint-restore/go-criu/v5/main.go @@ -87,19 +87,19 @@ func (c *Criu) sendAndRecv(reqB []byte) ([]byte, int, error) { } func (c *Criu) doSwrk(reqType rpc.CriuReqType, opts *rpc.CriuOpts, nfy Notify) error { - resp, err := c.doSwrkWithResp(reqType, opts, nfy) + resp, err := c.doSwrkWithResp(reqType, opts, nfy, nil) if err != nil { return err } respType := resp.GetType() if respType != reqType { - return errors.New("unexpected responce") + return errors.New("unexpected CRIU RPC response") } return nil } -func (c *Criu) doSwrkWithResp(reqType rpc.CriuReqType, opts *rpc.CriuOpts, nfy Notify) (*rpc.CriuResp, error) { +func (c *Criu) doSwrkWithResp(reqType rpc.CriuReqType, opts *rpc.CriuOpts, nfy Notify, features *rpc.CriuFeatures) (*rpc.CriuResp, error) { var resp *rpc.CriuResp req := rpc.CriuReq{ @@ -111,6 +111,10 @@ func (c *Criu) doSwrkWithResp(reqType rpc.CriuReqType, opts *rpc.CriuOpts, nfy N opts.NotifyScripts = proto.Bool(true) } + if features != nil { + req.Features = features + } + if c.swrkCmd == nil { err := c.Prepare() if err != nil { @@ -209,7 +213,7 @@ func (c *Criu) StartPageServer(opts *rpc.CriuOpts) error { // StartPageServerChld starts the page server and returns PID and port func (c *Criu) StartPageServerChld(opts *rpc.CriuOpts) (int, int, error) { - resp, err := c.doSwrkWithResp(rpc.CriuReqType_PAGE_SERVER_CHLD, opts, nil) + resp, err := c.doSwrkWithResp(rpc.CriuReqType_PAGE_SERVER_CHLD, opts, nil, nil) if err != nil { return 0, 0, err } @@ -220,7 +224,7 @@ func (c *Criu) StartPageServerChld(opts *rpc.CriuOpts) (int, int, error) { // GetCriuVersion executes the VERSION RPC call and returns the version // as an integer. Major * 10000 + Minor * 100 + SubLevel func (c *Criu) GetCriuVersion() (int, error) { - resp, err := c.doSwrkWithResp(rpc.CriuReqType_VERSION, nil, nil) + resp, err := c.doSwrkWithResp(rpc.CriuReqType_VERSION, nil, nil, nil) if err != nil { return 0, err } diff --git a/vendor/modules.txt b/vendor/modules.txt index c512576130..e07c2c6b1b 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -49,7 +49,7 @@ github.com/cespare/xxhash/v2 # github.com/checkpoint-restore/checkpointctl v0.0.0-20211204171957-54b4ebfdb681 ## explicit github.com/checkpoint-restore/checkpointctl/lib -# github.com/checkpoint-restore/go-criu/v5 v5.2.0 +# github.com/checkpoint-restore/go-criu/v5 v5.3.0 ## explicit github.com/checkpoint-restore/go-criu/v5 github.com/checkpoint-restore/go-criu/v5/magic From d669dbfb9fbb4bae56ba90ce9b58352c793eff6e Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 21 Dec 2021 16:01:53 +0000 Subject: [PATCH 2/2] Error out early if system does not support pre-copy checkpointing CRIU's pre-copy migration support relies on the soft dirty page tracking in the Linux kernel: https://www.kernel.org/doc/Documentation/vm/soft-dirty.txt This functionality is not implemented for all architectures and it can also be turned off in the kernel. CRIU can check if the combination of architecture/kernel/CRIU supports the soft dirty page tracking and exports this feature checking functionality in go-criu. This commit adds an early check if the user selects pre-copy checkpointing to error out if the system does not support it. Signed-off-by: Adrian Reber --- cmd/podman/containers/checkpoint.go | 4 ++++ .../markdown/podman-container-checkpoint.1.md | 12 ++++++++++ go.mod | 1 + pkg/criu/criu.go | 22 +++++++++++++++++++ pkg/criu/criu_unsupported.go | 7 ++++++ test/e2e/checkpoint_test.go | 6 +++++ vendor/modules.txt | 1 + 7 files changed, 53 insertions(+) create mode 100644 pkg/criu/criu_unsupported.go diff --git a/cmd/podman/containers/checkpoint.go b/cmd/podman/containers/checkpoint.go index e8dd25978c..43a1b75e54 100644 --- a/cmd/podman/containers/checkpoint.go +++ b/cmd/podman/containers/checkpoint.go @@ -11,6 +11,7 @@ import ( "github.com/containers/podman/v3/cmd/podman/registry" "github.com/containers/podman/v3/cmd/podman/utils" "github.com/containers/podman/v3/cmd/podman/validate" + "github.com/containers/podman/v3/pkg/criu" "github.com/containers/podman/v3/pkg/domain/entities" "github.com/containers/podman/v3/pkg/rootless" "github.com/containers/storage/pkg/archive" @@ -113,6 +114,9 @@ func checkpoint(cmd *cobra.Command, args []string) error { if checkpointOptions.WithPrevious && checkpointOptions.PreCheckPoint { return errors.Errorf("--with-previous can not be used with --pre-checkpoint") } + if (checkpointOptions.WithPrevious || checkpointOptions.PreCheckPoint) && !criu.MemTrack() { + return errors.New("system (architecture/kernel/CRIU) does not support memory tracking") + } responses, err := registry.ContainerEngine().ContainerCheckpoint(context.Background(), args, checkpointOptions) if err != nil { return err diff --git a/docs/source/markdown/podman-container-checkpoint.1.md b/docs/source/markdown/podman-container-checkpoint.1.md index e54274775e..00d8f70954 100644 --- a/docs/source/markdown/podman-container-checkpoint.1.md +++ b/docs/source/markdown/podman-container-checkpoint.1.md @@ -70,6 +70,13 @@ Dump the *container's* memory information only, leaving the *container* running. operations will supersede prior dumps. It only works on `runc 1.0-rc3` or `higher`.\ The default is **false**. +The functionality to only checkpoint the memory of the container and in a second +checkpoint only write out the memory pages which have changed since the first +checkpoint relies on the Linux kernel's soft-dirty bit, which is not available +on all systems as it depends on the system architecture and the configuration +of the Linux kernel. Podman will verify if the current system supports this +functionality and return an error if the current system does not support it. + #### **--print-stats** Print out statistics about checkpointing the container(s). The output is @@ -126,6 +133,11 @@ Check out the *container* with previous criu image files in pre-dump. It only wo The default is **false**.\ *IMPORTANT: This OPTION is not available with __--pre-checkpoint__*. +This option requires that the option __--pre-checkpoint__ has been used before on the +same container. Without an existing pre-checkpoint, this option will fail. + +Also see __--pre-checkpoint__ for additional information about __--pre-checkpoint__ +availability on different systems. ## EXAMPLES Make a checkpoint for the container "mywebserver". diff --git a/go.mod b/go.mod index 9ae49eff01..2a8902a1ac 100644 --- a/go.mod +++ b/go.mod @@ -66,6 +66,7 @@ require ( golang.org/x/crypto v0.0.0-20210817164053-32db794688a5 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c golang.org/x/sys v0.0.0-20211205182925-97ca703d548d + google.golang.org/protobuf v1.27.1 gopkg.in/fsnotify.v1 v1.4.7 // indirect gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b k8s.io/api v0.22.4 diff --git a/pkg/criu/criu.go b/pkg/criu/criu.go index 2a6805979d..967da0dca8 100644 --- a/pkg/criu/criu.go +++ b/pkg/criu/criu.go @@ -1,7 +1,12 @@ +// +build linux + package criu import ( "github.com/checkpoint-restore/go-criu/v5" + "github.com/checkpoint-restore/go-criu/v5/rpc" + + "google.golang.org/protobuf/proto" ) // MinCriuVersion for Podman at least CRIU 3.11 is required @@ -21,3 +26,20 @@ func CheckForCriu(version int) bool { } return result } + +func MemTrack() bool { + features, err := criu.MakeCriu().FeatureCheck( + &rpc.CriuFeatures{ + MemTrack: proto.Bool(true), + }, + ) + if err != nil { + return false + } + + if features == nil || features.MemTrack == nil { + return false + } + + return *features.MemTrack +} diff --git a/pkg/criu/criu_unsupported.go b/pkg/criu/criu_unsupported.go new file mode 100644 index 0000000000..51cd0c1fd9 --- /dev/null +++ b/pkg/criu/criu_unsupported.go @@ -0,0 +1,7 @@ +// +build !linux + +package criu + +func MemTrack() bool { + return false +} diff --git a/test/e2e/checkpoint_test.go b/test/e2e/checkpoint_test.go index e97503337c..4771f8e580 100644 --- a/test/e2e/checkpoint_test.go +++ b/test/e2e/checkpoint_test.go @@ -939,6 +939,9 @@ var _ = Describe("Podman checkpoint", func() { }) It("podman checkpoint container with --pre-checkpoint", func() { + if !criu.MemTrack() { + Skip("system (architecture/kernel/CRIU) does not support memory tracking") + } if !strings.Contains(podmanTest.OCIRuntime, "runc") { Skip("Test only works on runc 1.0-rc3 or higher.") } @@ -972,6 +975,9 @@ var _ = Describe("Podman checkpoint", func() { It("podman checkpoint container with --pre-checkpoint and export (migration)", func() { SkipIfRemote("--import-previous is not yet supported on the remote client") + if !criu.MemTrack() { + Skip("system (architecture/kernel/CRIU) does not support memory tracking") + } if !strings.Contains(podmanTest.OCIRuntime, "runc") { Skip("Test only works on runc 1.0-rc3 or higher.") } diff --git a/vendor/modules.txt b/vendor/modules.txt index e07c2c6b1b..45209ff265 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -817,6 +817,7 @@ google.golang.org/grpc/stats google.golang.org/grpc/status google.golang.org/grpc/tap # google.golang.org/protobuf v1.27.1 +## explicit google.golang.org/protobuf/encoding/prototext google.golang.org/protobuf/encoding/protowire google.golang.org/protobuf/internal/descfmt