diff --git a/cmd/nvidia-ctk/cdi/generate/generate.go b/cmd/nvidia-ctk/cdi/generate/generate.go index 24ad57a4e..7504ad997 100644 --- a/cmd/nvidia-ctk/cdi/generate/generate.go +++ b/cmd/nvidia-ctk/cdi/generate/generate.go @@ -155,6 +155,7 @@ func (m command) validateFlags(c *cli.Context, opts *options) error { case nvcdi.ModeNvml: case nvcdi.ModeWsl: case nvcdi.ModeManagement: + case nvcdi.ModeVfio: default: return fmt.Errorf("invalid discovery mode: %v", opts.mode) } diff --git a/pkg/nvcdi/api.go b/pkg/nvcdi/api.go index 68bfd8452..88afbab4d 100644 --- a/pkg/nvcdi/api.go +++ b/pkg/nvcdi/api.go @@ -39,6 +39,8 @@ const ( // ModeCSV configures the CDI spec generator to generate a spec based on the contents of CSV // mountspec files. ModeCSV = "csv" + // ModeVfio configures the CDI spec generator to generate a VFIO spec. + ModeVfio = "vfio" ) // Interface defines the API for the nvcdi package diff --git a/pkg/nvcdi/lib-vfio.go b/pkg/nvcdi/lib-vfio.go new file mode 100644 index 000000000..b9361cdcb --- /dev/null +++ b/pkg/nvcdi/lib-vfio.go @@ -0,0 +1,90 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package nvcdi + +import ( + "fmt" + + "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec" + "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi" + "github.com/container-orchestrated-devices/container-device-interface/specs-go" + "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device" +) + +type vfiolib nvcdilib + +var _ Interface = (*vfiolib)(nil) + +// GetSpec should not be called for vfiolib +func (l *vfiolib) GetSpec() (spec.Interface, error) { + return nil, fmt.Errorf("Unexpected call to vfiolib.GetSpec()") +} + +// GetAllDeviceSpecs returns the device specs for all available devices. +func (l *vfiolib) GetAllDeviceSpecs() ([]specs.Device, error) { + var deviceSpecs []specs.Device + + devices, err := l.nvpcilib.GetGPUs() + if err != nil { + return nil, fmt.Errorf("failed getting NVIDIA GPUs: %v", err) + } + + for idx, dev := range devices { + if dev.Driver == "vfio-pci" { + l.logger.Debugf("Found NVIDIA device: address=%s, driver=%s, iommu_group=%d, deviceId=%x", + dev.Address, dev.Driver, dev.IommuGroup, dev.Device) + deviceSpecs = append(deviceSpecs, specs.Device{ + Name: fmt.Sprintf("%d", idx), + ContainerEdits: specs.ContainerEdits{ + DeviceNodes: []*specs.DeviceNode{ + &specs.DeviceNode{ + Path: fmt.Sprintf("/dev/vfio/%d", dev.IommuGroup), + }, + }, + }, + }) + } + } + + return deviceSpecs, nil +} + +// GetCommonEdits returns common edits for ALL devices. +// Note, currently there are no common edits. +func (l *vfiolib) GetCommonEdits() (*cdi.ContainerEdits, error) { + return &cdi.ContainerEdits{ContainerEdits: &specs.ContainerEdits{}}, nil +} + +// GetGPUDeviceEdits should not be called for vfiolib +func (l *vfiolib) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error) { + return nil, fmt.Errorf("Unexpected call to vfiolib.GetGPUDeviceEdits()") +} + +// GetGPUDeviceSpecs should not be called for vfiolib +func (l *vfiolib) GetGPUDeviceSpecs(int, device.Device) (*specs.Device, error) { + return nil, fmt.Errorf("Unexpected call to vfiolib.GetGPUDeviceSpecs()") +} + +// GetMIGDeviceEdits should not be called for vfiolib +func (l *vfiolib) GetMIGDeviceEdits(device.Device, device.MigDevice) (*cdi.ContainerEdits, error) { + return nil, fmt.Errorf("Unexpected call to vfiolib.GetMIGDeviceEdits()") +} + +// GetMIGDeviceSpecs should not be called for vfiolib +func (l *vfiolib) GetMIGDeviceSpecs(int, device.Device, int, device.MigDevice) (*specs.Device, error) { + return nil, fmt.Errorf("Unexpected call to vfiolib.GetMIGDeviceSpecs()") +} diff --git a/pkg/nvcdi/lib.go b/pkg/nvcdi/lib.go index ad5dd2704..53a3e8e6c 100644 --- a/pkg/nvcdi/lib.go +++ b/pkg/nvcdi/lib.go @@ -26,6 +26,8 @@ import ( "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device" "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info" "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml" + + "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvpci" ) type wrapper struct { @@ -53,6 +55,8 @@ type nvcdilib struct { infolib info.Interface + nvpcilib nvpci.Interface + mergedDeviceOptions []transform.MergedDeviceOption } @@ -114,6 +118,14 @@ func New(opts ...Option) (Interface, error) { l.class = "mofed" } lib = (*mofedlib)(l) + case ModeVfio: + if l.class == "" { + l.class = "pgpu" + } + if l.nvpcilib == nil { + l.nvpcilib = nvpci.New() + } + lib = (*vfiolib)(l) default: return nil, fmt.Errorf("unknown mode %q", l.mode) }