diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 0189588c..2aeff09a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -35,6 +35,15 @@ jobs: sudo podman save --format docker-archive --output $ARCHIVE_DIR/docker-archive alpine:latest sudo podman save --format oci-archive --output $ARCHIVE_DIR/oci-archive alpine:latest sudo podman image rm alpine:latest + - name: Setup registries for testing + run: | + sudo bash -c 'cat < /etc/containers/registries.conf + unqualified-search-registries = ["docker.io", "quay.io"] + + [[registry]] + location = "localhost:5000" + insecure = true + EOF' - name: Run Tests run: sudo -E GOPATH=$PWD/build CI=1 env PATH="$PATH" /home/runner/go/bin/gotestsum --junitfile build/test/result.xml -- -timeout=15m . ./api - name: Archive test result diff --git a/CHANGELOG.md b/CHANGELOG.md index ee11ec2d..b57cdf64 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ IMPROVEMENTS: * config: Allow setting `pids_limit` option. [[GH-203](https://github.com/hashicorp/nomad-driver-podman/pull/203)] * runtime: Set mount propagation from TaskConfig [[GH-204](https://github.com/hashicorp/nomad-driver-podman/pull/204)] +BUG FIXES: + +* driver: Fixed a bug that caused `image_pull_timeout` to the capped by the value of `client_http_timeout` [[GH-218](https://github.com/hashicorp/nomad-driver-podman/pull/218)] ## 0.4.1 (November 15, 2022) diff --git a/driver.go b/driver.go index 5738b7a1..da59adfc 100644 --- a/driver.go +++ b/driver.go @@ -114,6 +114,10 @@ type Driver struct { // podmanClient encapsulates podman remote calls podman *api.API + // slowPodman is a Podman client with no timeout configured that is used + // for slow operations that may need longer timeouts. + slowPodman *api.API + // SystemInfo collected at first fingerprint query systemInfo api.Info // Queried from systemInfo: is podman running on a cgroupv2 system? @@ -176,23 +180,34 @@ func (d *Driver) SetConfig(cfg *base.Config) error { d.nomadConfig = cfg.AgentConfig.Driver } - clientConfig := api.DefaultClientConfig() - if pluginConfig.SocketPath != "" { - clientConfig.SocketPath = pluginConfig.SocketPath - } - + var timeout time.Duration if pluginConfig.ClientHttpTimeout != "" { t, err := time.ParseDuration(pluginConfig.ClientHttpTimeout) if err != nil { return err } - clientConfig.HttpTimeout = t + timeout = t } - d.podman = api.NewClient(d.logger, clientConfig) + d.podman = d.newPodmanClient(timeout) + d.slowPodman = d.newPodmanClient(0) + return nil } +// newPodmanClient returns Podman client configured with the provided timeout. +// This method must be called after the driver configuration has been loaded. +func (d *Driver) newPodmanClient(timeout time.Duration) *api.API { + clientConfig := api.DefaultClientConfig() + clientConfig.HttpTimeout = timeout + + if d.config.SocketPath != "" { + clientConfig.SocketPath = d.config.SocketPath + } + + return api.NewClient(d.logger, clientConfig) +} + // TaskConfigSchema returns the schema for the driver configuration of the task. func (d *Driver) TaskConfigSchema() (*hclspec.Spec, error) { return taskConfigSpec, nil @@ -902,7 +917,7 @@ func (d *Driver) createImage(image string, auth *AuthConfig, forcePull bool, ima ctx, cancel := context.WithTimeout(context.Background(), imagePullTimeout) defer cancel() - if imageID, err = d.podman.ImagePull(ctx, imageName, imageAuth); err != nil { + if imageID, err = d.slowPodman.ImagePull(ctx, imageName, imageAuth); err != nil { return imageID, fmt.Errorf("failed to start task, unable to pull image %s : %w", imageName, err) } return imageID, nil diff --git a/driver_test.go b/driver_test.go index b732ccd2..698daed7 100644 --- a/driver_test.go +++ b/driver_test.go @@ -7,6 +7,11 @@ import ( "context" "fmt" "math/rand" + "net" + "net/http" + "net/http/httptest" + "net/http/httputil" + "net/url" "os" "os/exec" "path/filepath" @@ -72,6 +77,15 @@ func podmanDriverHarness(t *testing.T, cfg map[string]interface{}) *dtestutil.Dr baseConfig := base.Config{} pluginConfig := PluginConfig{} + + // Set ClientHttpTimeout before calling SetConfig() because it affects the + // HTTP client that is created. + if v, ok := cfg["ClientHttpTimeout"]; ok { + if sv, ok := v.(string); ok { + pluginConfig.ClientHttpTimeout = sv + } + } + if err := base.MsgPackEncode(&baseConfig.PluginConfig, &pluginConfig); err != nil { t.Error("Unable to encode plugin config", err) } @@ -1978,6 +1992,91 @@ func startDestroyInspectImage(t *testing.T, image string, taskName string) { require.Equal(t, imageID, inspectData.Image) } +// TestPodmanDriver_Pull_Timeout verifies that the task image_pull_timeout +// configuration is respected and that it can be set to higher value than the +// driver's client_http_timeout. +// +// This test starts a proxy on port 5000 and requires the machine running the +// test to allow an insecure registry at localhost:5000. To run this test add +// the following to /etc/containers/registries.conf: +// +// [[registry]] +// location = "localhost:5000" +// insecure = true +func TestPodmanDriver_Pull_Timeout(t *testing.T) { + // Check if the machine running the test is properly configured to run this + // test. + expectedRegistry := `[[registry]] +location = "localhost:5000" +insecure = true` + + content, err := os.ReadFile("/etc/containers/registries.conf") + require.NoError(t, err) + if !strings.Contains(string(content), expectedRegistry) { + t.Skip("Skipping test because /etc/containers/registries.conf doesn't have insecure registry config for localhost:5000") + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Create a listener on port 5000. + l, err := net.Listen("tcp", "127.0.0.1:5000") + require.NoError(t, err) + + // Proxy requests to quay.io except when trying to pull a busybox image. + parsedURL, err := url.Parse("https://quay.io") + require.NoError(t, err) + + proxy := httputil.NewSingleHostReverseProxy(parsedURL) + proxy.ModifyResponse = func(resp *http.Response) error { + if strings.Contains(resp.Request.URL.Path, "busybox") { + select { + case <-ctx.Done(): + return nil + case <-time.After(5 * time.Second): + return fmt.Errorf("expected image pull to timeout") + } + } + return nil + } + // Create a new HTTP test server but don't start it so we can use our + // own listener on port 5000. + ts := httptest.NewUnstartedServer(proxy) + ts.Listener.Close() + ts.Listener = l + ts.Start() + defer ts.Close() + + // Create the test harness and a sample task. + d := podmanDriverHarness(t, map[string]interface{}{ + "ClientHttpTimeout": "1s", + }) + task := &drivers.TaskConfig{ + ID: uuid.Generate(), + Name: "test", + AllocID: uuid.Generate(), + Resources: createBasicResources(), + } + + // Time how long it takes to pull the image. + now := time.Now() + + resultCh := make(chan error) + go func() { + // Pull image using our proxy. + image := "localhost:5000/quay/busybox:latest" + _, err = getPodmanDriver(t, d).createImage(image, &AuthConfig{}, true, 3*time.Second, task) + resultCh <- err + }() + + err = <-resultCh + cancel() + + // Verify that the timeout happened after client_http_timeout. + require.Greater(t, time.Now().Sub(now), 2*time.Second) + require.ErrorContains(t, err, "context deadline exceeded") +} + func Test_createImage(t *testing.T) { if !tu.IsCI() { t.Parallel()