Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

driver: fix image_pull_timeout #218

Merged
merged 3 commits into from
Mar 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,15 @@ jobs:
sudo podman save --format docker-archive --output $ARCHIVE_DIR/docker-archive alpine:latest
sudo podman save --format oci-archive --output $ARCHIVE_DIR/oci-archive alpine:latest
sudo podman image rm alpine:latest
- name: Setup registries for testing
run: |
sudo bash -c 'cat <<EOF > /etc/containers/registries.conf
unqualified-search-registries = ["docker.io", "quay.io"]

[[registry]]
location = "localhost:5000"
insecure = true
EOF'
- name: Run Tests
run: sudo -E GOPATH=$PWD/build CI=1 env PATH="$PATH" /home/runner/go/bin/gotestsum --junitfile build/test/result.xml -- -timeout=15m . ./api
- name: Archive test result
Expand Down
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ IMPROVEMENTS:
* config: Allow setting `pids_limit` option. [[GH-203](https://github.com/hashicorp/nomad-driver-podman/pull/203)]
* runtime: Set mount propagation from TaskConfig [[GH-204](https://github.com/hashicorp/nomad-driver-podman/pull/204)]

BUG FIXES:

* driver: Fixed a bug that caused `image_pull_timeout` to the capped by the value of `client_http_timeout` [[GH-218](https://github.com/hashicorp/nomad-driver-podman/pull/218)]

## 0.4.1 (November 15, 2022)

Expand Down
31 changes: 23 additions & 8 deletions driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@ type Driver struct {
// podmanClient encapsulates podman remote calls
podman *api.API

// slowPodman is a Podman client with no timeout configured that is used
// for slow operations that may need longer timeouts.
slowPodman *api.API

// SystemInfo collected at first fingerprint query
systemInfo api.Info
// Queried from systemInfo: is podman running on a cgroupv2 system?
Expand Down Expand Up @@ -166,23 +170,34 @@ func (d *Driver) SetConfig(cfg *base.Config) error {
d.nomadConfig = cfg.AgentConfig.Driver
}

clientConfig := api.DefaultClientConfig()
if pluginConfig.SocketPath != "" {
clientConfig.SocketPath = pluginConfig.SocketPath
}

var timeout time.Duration
if pluginConfig.ClientHttpTimeout != "" {
t, err := time.ParseDuration(pluginConfig.ClientHttpTimeout)
if err != nil {
return err
}
clientConfig.HttpTimeout = t
timeout = t
}

d.podman = api.NewClient(d.logger, clientConfig)
d.podman = d.newPodmanClient(timeout)
d.slowPodman = d.newPodmanClient(0)

return nil
}

// newPodmanClient returns Podman client configured with the provided timeout.
// This method must be called after the driver configuration has been loaded.
func (d *Driver) newPodmanClient(timeout time.Duration) *api.API {
clientConfig := api.DefaultClientConfig()
clientConfig.HttpTimeout = timeout

if d.config.SocketPath != "" {
clientConfig.SocketPath = d.config.SocketPath
}

return api.NewClient(d.logger, clientConfig)
}

// TaskConfigSchema returns the schema for the driver configuration of the task.
func (d *Driver) TaskConfigSchema() (*hclspec.Spec, error) {
return taskConfigSpec, nil
Expand Down Expand Up @@ -850,7 +865,7 @@ func (d *Driver) createImage(image string, auth *AuthConfig, forcePull bool, ima
ctx, cancel := context.WithTimeout(context.Background(), imagePullTimeout)
defer cancel()

if imageID, err = d.podman.ImagePull(ctx, imageName, imageAuth); err != nil {
if imageID, err = d.slowPodman.ImagePull(ctx, imageName, imageAuth); err != nil {
return imageID, fmt.Errorf("failed to start task, unable to pull image %s : %w", imageName, err)
}
return imageID, nil
Expand Down
99 changes: 99 additions & 0 deletions driver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ import (
"context"
"fmt"
"math/rand"
"net"
"net/http"
"net/http/httptest"
"net/http/httputil"
"net/url"
"os"
"os/exec"
"path/filepath"
Expand Down Expand Up @@ -72,6 +77,15 @@ func podmanDriverHarness(t *testing.T, cfg map[string]interface{}) *dtestutil.Dr

baseConfig := base.Config{}
pluginConfig := PluginConfig{}

// Set ClientHttpTimeout before calling SetConfig() because it affects the
// HTTP client that is created.
if v, ok := cfg["ClientHttpTimeout"]; ok {
if sv, ok := v.(string); ok {
pluginConfig.ClientHttpTimeout = sv
}
}

if err := base.MsgPackEncode(&baseConfig.PluginConfig, &pluginConfig); err != nil {
t.Error("Unable to encode plugin config", err)
}
Expand Down Expand Up @@ -1914,6 +1928,91 @@ func startDestroyInspectImage(t *testing.T, image string, taskName string) {
require.Equal(t, imageID, inspectData.Image)
}

// TestPodmanDriver_Pull_Timeout verifies that the task image_pull_timeout
// configuration is respected and that it can be set to higher value than the
// driver's client_http_timeout.
//
// This test starts a proxy on port 5000 and requires the machine running the
// test to allow an insecure registry at localhost:5000. To run this test add
// the following to /etc/containers/registries.conf:
//
// [[registry]]
// location = "localhost:5000"
// insecure = true
func TestPodmanDriver_Pull_Timeout(t *testing.T) {
lgfa29 marked this conversation as resolved.
Show resolved Hide resolved
// Check if the machine running the test is properly configured to run this
// test.
expectedRegistry := `[[registry]]
location = "localhost:5000"
insecure = true`

content, err := os.ReadFile("/etc/containers/registries.conf")
require.NoError(t, err)
if !strings.Contains(string(content), expectedRegistry) {
t.Skip("Skipping test because /etc/containers/registries.conf doesn't have insecure registry config for localhost:5000")
}

ctx, cancel := context.WithCancel(context.Background())
defer cancel()

// Create a listener on port 5000.
l, err := net.Listen("tcp", "127.0.0.1:5000")
require.NoError(t, err)

// Proxy requests to quay.io except when trying to pull a busybox image.
parsedURL, err := url.Parse("https://quay.io")
require.NoError(t, err)

proxy := httputil.NewSingleHostReverseProxy(parsedURL)
proxy.ModifyResponse = func(resp *http.Response) error {
if strings.Contains(resp.Request.URL.Path, "busybox") {
select {
case <-ctx.Done():
return nil
case <-time.After(5 * time.Second):
return fmt.Errorf("expected image pull to timeout")
}
}
return nil
}
// Create a new HTTP test server but don't start it so we can use our
// own listener on port 5000.
ts := httptest.NewUnstartedServer(proxy)
ts.Listener.Close()
ts.Listener = l
ts.Start()
defer ts.Close()

// Create the test harness and a sample task.
d := podmanDriverHarness(t, map[string]interface{}{
"ClientHttpTimeout": "1s",
})
task := &drivers.TaskConfig{
ID: uuid.Generate(),
Name: "test",
AllocID: uuid.Generate(),
Resources: createBasicResources(),
}

// Time how long it takes to pull the image.
now := time.Now()

resultCh := make(chan error)
go func() {
// Pull image using our proxy.
image := "localhost:5000/quay/busybox:latest"
_, err = getPodmanDriver(t, d).createImage(image, &AuthConfig{}, true, 3*time.Second, task)
resultCh <- err
}()

err = <-resultCh
cancel()

// Verify that the timeout happened after client_http_timeout.
require.Greater(t, time.Now().Sub(now), 2*time.Second)
require.ErrorContains(t, err, "context deadline exceeded")
}

func Test_createImage(t *testing.T) {
if !tu.IsCI() {
t.Parallel()
Expand Down