diff --git a/internal/guest/runtime/hcsv2/uvm.go b/internal/guest/runtime/hcsv2/uvm.go index 617d2bdf7d..f4fb3be5e6 100644 --- a/internal/guest/runtime/hcsv2/uvm.go +++ b/internal/guest/runtime/hcsv2/uvm.go @@ -1005,6 +1005,7 @@ func modifyMappedVirtualDisk( VerityInfo: verityInfo, EnsureFilesystem: mvd.EnsureFilesystem, Filesystem: mvd.Filesystem, + BlockDev: mvd.BlockDev, } return scsi.Mount(mountCtx, mvd.Controller, mvd.Lun, mvd.Partition, mvd.MountPath, mvd.ReadOnly, mvd.Options, config) @@ -1022,6 +1023,7 @@ func modifyMappedVirtualDisk( VerityInfo: verityInfo, EnsureFilesystem: mvd.EnsureFilesystem, Filesystem: mvd.Filesystem, + BlockDev: mvd.BlockDev, } if err := scsi.Unmount(ctx, mvd.Controller, mvd.Lun, mvd.Partition, mvd.MountPath, config); err != nil { diff --git a/internal/guest/runtime/hcsv2/workload_container.go b/internal/guest/runtime/hcsv2/workload_container.go index 28349de5df..6ea39101e8 100644 --- a/internal/guest/runtime/hcsv2/workload_container.go +++ b/internal/guest/runtime/hcsv2/workload_container.go @@ -5,10 +5,12 @@ package hcsv2 import ( "context" + "fmt" "os" "path/filepath" "strings" + "github.com/opencontainers/runc/libcontainer/devices" oci "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" "go.opencensus.io/trace" @@ -84,6 +86,45 @@ func updateHugePageMounts(sbid string, spec *oci.Spec) error { return nil } +func updateBlockDeviceMounts(spec *oci.Spec) error { + for i, m := range spec.Mounts { + if !strings.HasPrefix(m.Destination, guestpath.BlockDevMountPrefix) { + continue + } + permissions := "rwm" + for _, o := range m.Options { + if o == "ro" { + permissions = "r" + break + } + } + + // For block device mounts, the source will be a symlink. Resolve it first + // before passing to `DeviceFromPath`, which expects a real device path. + rPath, err := os.Readlink(m.Source) + if err != nil { + return fmt.Errorf("failed to readlink %s: %w", m.Source, err) + } + + sourceDevice, err := devices.DeviceFromPath(rPath, permissions) + if err != nil { + return fmt.Errorf("failed to get device from path: %w", err) + } + + deviceCgroup := oci.LinuxDeviceCgroup{ + Allow: true, + Type: string(sourceDevice.Type), + Major: &sourceDevice.Major, + Minor: &sourceDevice.Minor, + Access: string(sourceDevice.Permissions), + } + + spec.Linux.Resources.Devices = append(spec.Linux.Resources.Devices, deviceCgroup) + spec.Mounts[i].Destination = strings.TrimPrefix(m.Destination, guestpath.BlockDevMountPrefix) + } + return nil +} + func specHasGPUDevice(spec *oci.Spec) bool { for _, d := range spec.Windows.Devices { if d.IDType == "gpu" { @@ -115,6 +156,10 @@ func setupWorkloadContainerSpec(ctx context.Context, sbid, id string, spec *oci. return errors.Wrapf(err, "failed to update hugepages mounts for container %v in sandbox %v", id, sbid) } + if err = updateBlockDeviceMounts(spec); err != nil { + return fmt.Errorf("failed to update block device mounts for container %v in sandbox %v: %w", id, sbid, err) + } + // Add default mounts for container networking (e.g. /etc/hostname, /etc/hosts), // if spec didn't override them explicitly. networkingMounts := specInternal.GenerateWorkloadContainerNetworkMounts(sbid, spec) diff --git a/internal/guest/storage/scsi/scsi.go b/internal/guest/storage/scsi/scsi.go index a24946e467..4d13d7c522 100644 --- a/internal/guest/storage/scsi/scsi.go +++ b/internal/guest/storage/scsi/scsi.go @@ -15,6 +15,7 @@ import ( "time" "github.com/pkg/errors" + "github.com/sirupsen/logrus" "go.opencensus.io/trace" "golang.org/x/sys/unix" @@ -34,6 +35,7 @@ import ( var ( osMkdirAll = os.MkdirAll osRemoveAll = os.RemoveAll + osSymlink = os.Symlink unixMount = unix.Mount // mock functions for testing getDevicePath @@ -112,6 +114,7 @@ type Config struct { VerityInfo *guestresource.DeviceVerityInfo EnsureFilesystem bool Filesystem string + BlockDev bool } // Mount creates a mount from the SCSI device on `controller` index `lun` to @@ -163,6 +166,19 @@ func Mount( } } + // create and symlink block device mount target + if config.BlockDev { + parent := filepath.Dir(target) + if err := osMkdirAll(parent, 0700); err != nil { + return err + } + log.G(ctx).WithFields(logrus.Fields{ + "source": source, + "target": target, + }).Trace("creating block device symlink") + return osSymlink(source, target) + } + if err := osMkdirAll(target, 0700); err != nil { return err } @@ -280,6 +296,15 @@ func Unmount( trace.Int64Attribute("partition", int64(partition)), trace.StringAttribute("target", target)) + // skip unmount logic for block devices, since they are just symlinks + if config.BlockDev { + log.G(ctx).WithField("target", target).Trace("removing block device symlink") + if err := osRemoveAll(target); err != nil { + return fmt.Errorf("failed to remove symlink: %w", err) + } + return nil + } + // unmount target if err := storageUnmountPath(ctx, target, true); err != nil { return errors.Wrapf(err, "unmount failed: %s", target) diff --git a/internal/guestpath/paths.go b/internal/guestpath/paths.go index 8ab4ac7159..1ff048c406 100644 --- a/internal/guestpath/paths.go +++ b/internal/guestpath/paths.go @@ -13,6 +13,9 @@ const ( // HugePagesMountPrefix is mount prefix used in container spec to mark a // huge-pages mount HugePagesMountPrefix = "hugepages://" + // BlockDevMountPrefix is mount prefix used in container spec to mark a + // block-device mount. + BlockDevMountPrefix = "blockdev://" // PipePrefix is the mount prefix used in container spec to mark a named pipe PipePrefix = `\\.\pipe` // LCOWMountPathPrefixFmt is the path format in the LCOW UVM where diff --git a/internal/hcsoci/resources_lcow.go b/internal/hcsoci/resources_lcow.go index a99496b9a1..b98493ce4c 100644 --- a/internal/hcsoci/resources_lcow.go +++ b/internal/hcsoci/resources_lcow.go @@ -83,6 +83,8 @@ func allocateLinuxResources(ctx context.Context, coi *createOptionsInternal, r * } l := log.G(ctx).WithField("mount", fmt.Sprintf("%+v", mount)) + + isBlockDev := strings.HasPrefix(mount.Destination, guestpath.BlockDevMountPrefix) if mount.Type == MountTypePhysicalDisk { l.Debug("hcsshim::allocateLinuxResources Hot-adding SCSI physical disk for OCI mount") scsiMount, err := coi.HostingSystem.SCSIManager.AddPhysicalDisk( @@ -90,15 +92,18 @@ func allocateLinuxResources(ctx context.Context, coi *createOptionsInternal, r * hostPath, readOnly, coi.HostingSystem.ID(), - &scsi.MountConfig{Options: mount.Options}, + &scsi.MountConfig{Options: mount.Options, BlockDev: isBlockDev}, ) if err != nil { return errors.Wrapf(err, "adding SCSI physical disk mount %+v", mount) } - uvmPathForFile = scsiMount.GuestPath() r.Add(scsiMount) - coi.Spec.Mounts[i].Type = "none" + mt := "none" + if isBlockDev { + mt = "bind" + } + coi.Spec.Mounts[i].Type = mt } else if mount.Type == MountTypeVirtualDisk { l.Debug("hcsshim::allocateLinuxResources Hot-adding SCSI virtual disk for OCI mount") @@ -109,7 +114,7 @@ func allocateLinuxResources(ctx context.Context, coi *createOptionsInternal, r * hostPath, readOnly, coi.HostingSystem.ID(), - &scsi.MountConfig{Options: mount.Options}, + &scsi.MountConfig{Options: mount.Options, BlockDev: isBlockDev}, ) if err != nil { return errors.Wrapf(err, "adding SCSI virtual disk mount %+v", mount) @@ -117,7 +122,11 @@ func allocateLinuxResources(ctx context.Context, coi *createOptionsInternal, r * uvmPathForFile = scsiMount.GuestPath() r.Add(scsiMount) - coi.Spec.Mounts[i].Type = "none" + mt := "none" + if isBlockDev { + mt = "bind" + } + coi.Spec.Mounts[i].Type = mt } else if strings.HasPrefix(mount.Source, guestpath.SandboxMountPrefix) { // Mounts that map to a path in UVM are specified with 'sandbox://' prefix. // example: sandbox:///a/dirInUvm destination:/b/dirInContainer diff --git a/internal/protocol/guestresource/resources.go b/internal/protocol/guestresource/resources.go index 8b68bc4d7d..1eb695e801 100644 --- a/internal/protocol/guestresource/resources.go +++ b/internal/protocol/guestresource/resources.go @@ -85,6 +85,7 @@ type LCOWMappedVirtualDisk struct { ReadOnly bool `json:"ReadOnly,omitempty"` Encrypted bool `json:"Encrypted,omitempty"` Options []string `json:"Options,omitempty"` + BlockDev bool `json:"BlockDev,omitempty"` // Deprecated: verity info is read by the guest VerityInfo *DeviceVerityInfo `json:"VerityInfo,omitempty"` EnsureFilesystem bool `json:"EnsureFilesystem,omitempty"` diff --git a/internal/uvm/scsi/backend.go b/internal/uvm/scsi/backend.go index 9194b94324..6219a15172 100644 --- a/internal/uvm/scsi/backend.go +++ b/internal/uvm/scsi/backend.go @@ -196,6 +196,7 @@ func mountRequest(controller, lun uint, path string, config *mountConfig, osType Options: config.options, EnsureFilesystem: config.ensureFilesystem, Filesystem: config.filesystem, + BlockDev: config.blockDev, } default: return guestrequest.ModificationRequest{}, fmt.Errorf("unsupported os type: %s", osType) @@ -221,6 +222,7 @@ func unmountRequest(controller, lun uint, path string, config *mountConfig, osTy Lun: uint8(lun), Partition: config.partition, Controller: uint8(controller), + BlockDev: config.blockDev, } default: return guestrequest.ModificationRequest{}, fmt.Errorf("unsupported os type: %s", osType) diff --git a/internal/uvm/scsi/manager.go b/internal/uvm/scsi/manager.go index bcf87c76ca..fd7173b5b1 100644 --- a/internal/uvm/scsi/manager.go +++ b/internal/uvm/scsi/manager.go @@ -83,6 +83,9 @@ type MountConfig struct { // mounted as. // This is only supported for LCOW. Filesystem string + // BlockDev indicates if the device should be mounted as a block device. + // This is only supported for LCOW. + BlockDev bool } // Mount represents a SCSI device that has been attached to a VM, and potentially @@ -157,6 +160,7 @@ func (m *Manager) AddVirtualDisk( options: mc.Options, ensureFilesystem: mc.EnsureFilesystem, filesystem: mc.Filesystem, + blockDev: mc.BlockDev, } } return m.add(ctx, @@ -202,6 +206,7 @@ func (m *Manager) AddPhysicalDisk( options: mc.Options, ensureFilesystem: mc.EnsureFilesystem, filesystem: mc.Filesystem, + blockDev: mc.BlockDev, } } return m.add(ctx, diff --git a/internal/uvm/scsi/mount.go b/internal/uvm/scsi/mount.go index c4bdcc6d81..ed77fa9991 100644 --- a/internal/uvm/scsi/mount.go +++ b/internal/uvm/scsi/mount.go @@ -41,6 +41,7 @@ type mountConfig struct { partition uint64 readOnly bool encrypted bool + blockDev bool options []string ensureFilesystem bool filesystem string