Skip to content

Commit

Permalink
oci: support legacy --nv bind mode
Browse files Browse the repository at this point in the history
Closes sylabs/singularity#1033

Signed-off-by: Edita Kizinevic <[email protected]>
  • Loading branch information
dtrudg authored and edytuk committed Jun 14, 2023
1 parent 514f940 commit c57eb13
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 7 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ For older changes see the [archived Singularity change log](https://github.com/a
- Container environment variables via `--env`, `--env-file`, and
`APPTAINERENV_` host env vars.
- `--rocm` to bind ROCm GPU libraries and devices into the container.
- `--nv` to bind Nvidia driver / basic CUDA libraries and devices into
the container.

## Changes since last pre-release

Expand Down
41 changes: 40 additions & 1 deletion e2e/gpu/gpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,44 @@ func (c ctx) testNvidiaLegacy(t *testing.T) {
}
}

func (c ctx) ociTestNvidiaLegacy(t *testing.T) {
require.Nvidia(t)

imageURL := "docker://ubuntu:20.04"

// Basic test that we can run the bound in `nvidia-smi` which *should* be on the PATH
tests := []struct {
profile e2e.Profile
args []string
env []string
}{
{
profile: e2e.OCIUserProfile,
args: []string{"--nv", imageURL, "nvidia-smi"},
},
{
profile: e2e.OCIFakerootProfile,
args: []string{"--nv", imageURL, "nvidia-smi"},
},
{
profile: e2e.OCIRootProfile,
args: []string{"--nv", imageURL, "nvidia-smi"},
},
}

for _, tt := range tests {
c.env.RunApptainer(
t,
e2e.AsSubtest(tt.profile.String()),
e2e.WithProfile(tt.profile),
e2e.WithCommand("exec"),
e2e.WithArgs(tt.args...),
e2e.WithEnv(tt.env),
e2e.ExpectExit(0),
)
}
}

func (c ctx) testNvCCLI(t *testing.T) {
require.Nvidia(t)
require.NvCCLI(t)
Expand Down Expand Up @@ -602,6 +640,7 @@ func E2ETests(env e2e.TestEnv) testhelper.Tests {
"build nvccli": c.testBuildNvCCLI,
"build rocm": c.testBuildRocm,
// oci mode
"oci rocm": c.ociTestRocm,
"oci nvidia": c.ociTestNvidiaLegacy,
"oci rocm": c.ociTestRocm,
}
}
6 changes: 0 additions & 6 deletions internal/pkg/runtime/launcher/oci/launcher_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,15 +112,9 @@ func checkOpts(lo launcher.Options) error {
badOpt = append(badOpt, "NoMount")
}

if lo.Nvidia {
badOpt = append(badOpt, "Nvidia")
}
if lo.NvCCLI {
badOpt = append(badOpt, "NvCCLI")
}
if lo.NoNvidia {
badOpt = append(badOpt, "NoNvidia")
}

if len(lo.ContainLibs) > 0 {
badOpt = append(badOpt, "ContainLibs")
Expand Down
79 changes: 79 additions & 0 deletions internal/pkg/runtime/launcher/oci/mounts_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ func (l *Launcher) getMounts() ([]specs.Mount, error) {
return nil, fmt.Errorf("while configuring ROCm mount(s): %w", err)
}
}
if (l.cfg.Nvidia || l.apptainerConf.AlwaysUseNv) && !l.cfg.NoNvidia {
if err := l.addNvidiaMounts(mounts); err != nil {
return nil, fmt.Errorf("while configuring Nvidia mount(s): %w", err)
}
}

return *mounts, nil
}
Expand Down Expand Up @@ -350,3 +355,77 @@ func (l *Launcher) addRocmMounts(mounts *[]specs.Mount) error {

return nil
}

func (l *Launcher) addNvidiaMounts(mounts *[]specs.Mount) error {
if l.apptainerConf.UseNvCCLI {
sylog.Warningf("--nvccli not yet supported with --oci. Falling back to legacy --nv support.")
}

gpuConfFile := filepath.Join(buildcfg.APPTAINER_CONFDIR, "nvliblist.conf")
libs, bins, err := gpu.NvidiaPaths(gpuConfFile)
if err != nil {
sylog.Warningf("While finding Nvidia bind points: %v", err)
}
if len(libs) == 0 {
sylog.Warningf("Could not find any Nvidia libraries on this host!")
}

ipcs, err := gpu.NvidiaIpcsPath()
if err != nil {
sylog.Warningf("While finding Nvidia IPCs: %v", err)
}

devs, err := gpu.NvidiaDevices(true)
if err != nil {
sylog.Warningf("While finding Nvidia devices: %v", err)
}
if len(devs) == 0 {
sylog.Warningf("Could not find any ROCm devices on this host!")
}

for _, binary := range bins {
containerBinary := filepath.Join("/usr/bin", filepath.Base(binary))
bind := bind.Path{
Source: binary,
Destination: containerBinary,
Options: map[string]*bind.Option{"ro": {}},
}
if err := addBindMount(mounts, bind); err != nil {
return err
}
}

for _, lib := range libs {
containerLib := filepath.Join(containerLibDir, filepath.Base(lib))
bind := bind.Path{
Source: lib,
Destination: containerLib,
Options: map[string]*bind.Option{"ro": {}},
}
if err := addBindMount(mounts, bind); err != nil {
return err
}
}

for _, ipc := range ipcs {
bind := bind.Path{
Source: ipc,
Destination: ipc,
}
if err := addBindMount(mounts, bind); err != nil {
return err
}
}

for _, dev := range devs {
bind := bind.Path{
Source: dev,
Destination: dev,
}
if err := addDevBindMount(mounts, bind); err != nil {
return err
}
}

return nil
}

0 comments on commit c57eb13

Please sign in to comment.