Skip to content

Commit

Permalink
Make --gpus work with nvidia gpus
Browse files Browse the repository at this point in the history
Somewhat documented here:
https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html
https://stackoverflow.com/questions/25185405/using-gpu-from-a-docker-container

Fixes: #21156

Don't have access to nvidia GPUS, relying on contributor testing.

Signed-off-by: Daniel J Walsh <[email protected]>
  • Loading branch information
rhatdan committed Jan 12, 2024
1 parent e06abd1 commit 46cfc98
Show file tree
Hide file tree
Showing 10 changed files with 27 additions and 12 deletions.
4 changes: 4 additions & 0 deletions cmd/podman/common/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,10 @@ func DefineCreateFlags(cmd *cobra.Command, cf *entities.ContainerCreateOptions,
)
_ = cmd.RegisterFlagCompletionFunc(gidmapFlagName, completion.AutocompleteNone)

gpuFlagName := "gpus"
createFlags.StringSliceVar(&cf.GPUs, gpuFlagName, []string{}, "GPU devices to add to the container ('all' to pass all GPUs)")
_ = cmd.RegisterFlagCompletionFunc(gpuFlagName, completion.AutocompleteNone)

uidmapFlagName := "uidmap"
createFlags.StringSliceVar(
&cf.UIDMap,
Expand Down
5 changes: 0 additions & 5 deletions cmd/podman/containers/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,6 @@ func runFlags(cmd *cobra.Command) {
flags.StringVar(&runOpts.DetachKeys, detachKeysFlagName, containerConfig.DetachKeys(), "Override the key sequence for detaching a container. Format is a single character `[a-Z]` or a comma separated sequence of `ctrl-<value>`, where `<value>` is one of: `a-cf`, `@`, `^`, `[`, `\\`, `]`, `^` or `_`")
_ = cmd.RegisterFlagCompletionFunc(detachKeysFlagName, common.AutocompleteDetachKeys)

gpuFlagName := "gpus"
flags.String(gpuFlagName, "", "This is a Docker specific option and is a NOOP")
_ = cmd.RegisterFlagCompletionFunc(gpuFlagName, completion.AutocompleteNone)
_ = flags.MarkHidden("gpus")

passwdFlagName := "passwd"
flags.BoolVar(&runOpts.Passwd, passwdFlagName, true, "add entries to /etc/passwd and /etc/group")

Expand Down
8 changes: 8 additions & 0 deletions docs/source/markdown/options/gpus.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
####> This option file is used in:
####> podman create, pod clone, pod create, run
####> If file is edited, make sure the changes
####> are applicable to all of those.
#### **--gpus**=*ENTRY*

GPU devices to add to the container ('all' to pass all GPUs) Currently only
Nvidia devices are supported.
2 changes: 2 additions & 0 deletions docs/source/markdown/podman-create.1.md.in
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,8 @@ See [**Environment**](#environment) note below for precedence and examples.

@@option gidmap.container

@@option gpus

@@option group-add

@@option group-entry
Expand Down
2 changes: 2 additions & 0 deletions docs/source/markdown/podman-pod-clone.1.md.in
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ Note: the pod implements devices by storing the initial configuration passed by

@@option gidmap.pod

@@option gpus

#### **--help**, **-h**

Print usage statement.
Expand Down
2 changes: 2 additions & 0 deletions docs/source/markdown/podman-pod-create.1.md.in
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ Set the exit policy of the pod when the last container exits. Supported policie

@@option gidmap.pod

@@option gpus

#### **--help**, **-h**

Print usage statement.
Expand Down
2 changes: 2 additions & 0 deletions docs/source/markdown/podman-run.1.md.in
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,8 @@ See [**Environment**](#environment) note below for precedence and examples.

@@option gidmap.container

@@option gpus

@@option group-add

@@option group-entry
Expand Down
1 change: 1 addition & 0 deletions pkg/domain/entities/pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ type ContainerCreateOptions struct {
EnvFile []string
Expose []string
GIDMap []string
GPUs []string
GroupAdd []string
HealthCmd string
HealthInterval string
Expand Down
7 changes: 6 additions & 1 deletion pkg/specgenutil/specgen.go
Original file line number Diff line number Diff line change
Expand Up @@ -784,7 +784,12 @@ func FillOutSpecGen(s *specgen.SpecGenerator, c *entities.ContainerCreateOptions
s.ImageVolumes = imageVolumes
}

for _, dev := range c.Devices {
devices := c.Devices
for _, gpu := range c.GPUs {
devices = append(devices, "nvidia.com/gpu="+gpu)
}

for _, dev := range devices {
s.Devices = append(s.Devices, specs.LinuxDevice{Path: dev})
}

Expand Down
6 changes: 0 additions & 6 deletions test/e2e/run_device_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,6 @@ var _ = Describe("Podman run device", func() {
Expect(session).Should(ExitCleanly())
})

It("podman run --gpus noop", func() {
session := podmanTest.Podman([]string{"run", "--gpus", "all", ALPINE, "true"})
session.WaitWithDefaultTimeout()
Expect(session).Should(ExitCleanly())
})

It("podman run cannot access non default devices", func() {
session := podmanTest.Podman([]string{"run", "-v /dev:/dev-host", ALPINE, "head", "-1", "/dev-host/kmsg"})
session.WaitWithDefaultTimeout()
Expand Down

0 comments on commit 46cfc98

Please sign in to comment.