diff --git a/pkg/cloudprovider/provider/kubevirt/provider.go b/pkg/cloudprovider/provider/kubevirt/provider.go index b84a0154a2..5303a5ab1a 100644 --- a/pkg/cloudprovider/provider/kubevirt/provider.go +++ b/pkg/cloudprovider/provider/kubevirt/provider.go @@ -58,6 +58,9 @@ func init() { if err := kubevirtv1.AddToScheme(scheme.Scheme); err != nil { klog.Fatalf("failed to add kubevirtv1 to scheme: %v", err) } + if err := cdiv1beta1.AddToScheme(scheme.Scheme); err != nil { + klog.Fatalf("failed to add cdiv1beta1 to scheme: %v", err) + } } const ( @@ -66,6 +69,8 @@ const ( // machineDeploymentLabelKey defines the label key used to contains as value the MachineDeployment name // which machine comes from. machineDeploymentLabelKey = "md" + // preAllocatedDVAnnotation defines the annotation for preAllocated DataVolume for cloning. + preAllocatedDVAnnotation = "disk-clone.machines.cluster.k8s.io/machine-%s" ) var supportedOS = map[providerconfigtypes.OperatingSystem]*struct{}{ @@ -93,7 +98,7 @@ type Config struct { CPUs string Memory string Namespace string - OsImage OSImage + OSImage OSImage StorageClassName string PVCSize resource.Quantity FlavorName string @@ -148,9 +153,28 @@ type SecondaryDisks struct { StorageClassName string } +// OSImage represents OS-source parsed from rawConfig. type OSImage struct { - URL string + // HTTP represents http source to download VMImage. + HTTP *HTTPSource + // PVCClone represents pvc source for cloning VMImage. + PVCClone *PVCSource +} + +// HTTPSource represents http source to download VMImage. +type HTTPSource struct { + // URL of the http source endpoint. + URL string + // CacheImage represents whether to use cachedImage or not. + CacheImage bool +} + +// PVCSource represents pvc source for cloning VMImage. +type PVCSource struct { + // DataVolumeName represent name of source pvc for cloning. DataVolumeName string + // Namespace represents source-pvc namespace. + Namespace string } type kubeVirtServer struct { @@ -247,15 +271,12 @@ func (p *provider) getConfig(provSpec clusterv1alpha1.ProviderSpec) (*Config, *p return nil, nil, fmt.Errorf(`failed to get value of "memory" field: %w`, err) } config.Namespace = getNamespace() - osImage, err := p.configVarResolver.GetConfigVarStringValue(rawConfig.VirtualMachine.Template.PrimaryDisk.OsImage) + + parseOSImage, err := p.parseOSImage(rawConfig.VirtualMachine.Template.PrimaryDisk, config.Namespace) if err != nil { - return nil, nil, fmt.Errorf(`failed to get value of "sourceURL" field: %w`, err) - } - if _, err = url.ParseRequestURI(osImage); err == nil { - config.OsImage.URL = osImage - } else { - config.OsImage.DataVolumeName = osImage + return nil, nil, fmt.Errorf(`failed to get value of "osImage" field: %w`, err) } + config.OSImage = *parseOSImage pvcSize, err := p.configVarResolver.GetConfigVarStringValue(rawConfig.VirtualMachine.Template.PrimaryDisk.Size) if err != nil { return nil, nil, fmt.Errorf(`failed to get value of "pvcSize" field: %w`, err) @@ -377,6 +398,40 @@ func (p *provider) parseTopologySpreadConstraint(topologyConstraints []kubevirtt return parsedTopologyConstraints, nil } +func (p *provider) parseOSImage(primaryDisk kubevirttypes.PrimaryDisk, nameSpace string) (*OSImage, error) { + osImage, err := p.configVarResolver.GetConfigVarStringValue(primaryDisk.OsImage) + if err != nil { + return nil, fmt.Errorf(`failed to get value of "primaryDisk.osImage" field: %w`, err) + } + + var parsedOSImage *OSImage + // If PrimaryDiskOSSource is specified, parse the config based on new API. + if primaryDisk.Source != nil { + if primaryDisk.Source.HTTP != nil { + preAllocatedDV, _, err := p.configVarResolver.GetConfigVarBoolValue(primaryDisk.Source.HTTP.PreAllocateDataVolume) + if err != nil { + return nil, fmt.Errorf(`failed to get value of "preAllocateDataVolume" field: %w`, err) + } + parsedOSImage = &OSImage{HTTP: &HTTPSource{URL: osImage, CacheImage: preAllocatedDV}} + } else if primaryDisk.Source.PVCClone != nil { + parsedOSImage = &OSImage{PVCClone: &PVCSource{DataVolumeName: osImage, Namespace: nameSpace}} + } + } + // If PrimaryDiskOSSource is not specified, parse the config based on older API for backward compatibility. + if parsedOSImage == nil { + if _, err = url.ParseRequestURI(osImage); err == nil { + parsedOSImage = &OSImage{HTTP: &HTTPSource{URL: osImage}} + } else { + if nameSpaceAndName := strings.Split(osImage, "/"); len(nameSpaceAndName) >= 2 { + parsedOSImage = &OSImage{PVCClone: &PVCSource{Namespace: nameSpaceAndName[0], DataVolumeName: nameSpaceAndName[1]}} + } else { + parsedOSImage = &OSImage{PVCClone: &PVCSource{Namespace: nameSpace, DataVolumeName: osImage}} + } + } + } + return parsedOSImage, nil +} + // getNamespace returns the namespace where the VM is created. // VM is created in a dedicated namespace // which is the namespace where the machine-controller pod is running. @@ -509,7 +564,11 @@ func (p *provider) MachineMetricsLabels(machine *clusterv1alpha1.Machine) (map[s if err == nil { labels["cpus"] = c.CPUs labels["memoryMIB"] = c.Memory - labels["osImage"] = c.OsImage.URL + if c.OSImage.HTTP != nil { + labels["osImage"] = c.OSImage.HTTP.URL + } else if c.OSImage.PVCClone != nil { + labels["osImage"] = c.OSImage.PVCClone.DataVolumeName + } } return labels, err @@ -540,8 +599,19 @@ func (p *provider) Create(ctx context.Context, machine *clusterv1alpha1.Machine, userDataSecretName := fmt.Sprintf("userdata-%s-%s", machine.Name, strconv.Itoa(int(time.Now().Unix()))) + var machineDeploymentName string + if mdName, err := machineDeploymentNameAndRevisionForMachineGetter(ctx, machine, data.Client)(); err == nil { + machineDeploymentName = mdName + } + + if c.OSImage.HTTP != nil && c.OSImage.HTTP.CacheImage { + if err = p.createPreAllocatedDataVolume(ctx, c, machine, sigClient, machineDeploymentName); err != nil { + return nil, fmt.Errorf("failed to create preAllocatedDataVolume: %w", err) + } + } + virtualMachine, err := p.newVirtualMachine(ctx, c, pc, machine, userDataSecretName, userdata, - machineDeploymentNameAndRevisionForMachineGetter(ctx, machine, data.Client), randomMacAddressGetter, sigClient) + machineDeploymentName, randomMacAddressGetter, sigClient) if err != nil { return nil, fmt.Errorf("could not create a VirtualMachine manifest %w", err) } @@ -565,7 +635,7 @@ func (p *provider) Create(ctx context.Context, machine *clusterv1alpha1.Machine, } func (p *provider) newVirtualMachine(ctx context.Context, c *Config, pc *providerconfigtypes.Config, machine *clusterv1alpha1.Machine, - userdataSecretName, userdata string, mdNameGetter machineDeploymentNameGetter, macAddressGetter macAddressGetter, sigClient client.Client) (*kubevirtv1.VirtualMachine, error) { + userdataSecretName, userdata string, mdName string, macAddressGetter macAddressGetter, sigClient client.Client) (*kubevirtv1.VirtualMachine, error) { // We add the timestamp because the secret name must be different when we recreate the VMI // because its pod got deleted // The secret has an ownerRef on the VMI so garbace collection will take care of cleaning up. @@ -574,9 +644,7 @@ func (p *provider) newVirtualMachine(ctx context.Context, c *Config, pc *provide resourceRequirements := kubevirtv1.ResourceRequirements{} labels := map[string]string{"kubevirt.io/vm": machine.Name} //Add a common label to all VirtualMachines spawned by the same MachineDeployment (= MachineDeployment name). - if mdName, err := mdNameGetter(); err == nil { - labels[machineDeploymentLabelKey] = mdName - } + labels[machineDeploymentLabelKey] = mdName // Priority to instancetype. // if no instancetype and no flavor, resources are from config. @@ -651,13 +719,26 @@ func (p *provider) newVirtualMachine(ctx context.Context, c *Config, pc *provide TopologySpreadConstraints: getTopologySpreadConstraints(c, map[string]string{machineDeploymentLabelKey: labels[machineDeploymentLabelKey]}), }, }, - DataVolumeTemplates: getDataVolumeTemplates(c, dataVolumeName), + DataVolumeTemplates: getDataVolumeTemplates(c, dataVolumeName, getDataVolumeSource(c, mdName)), }, } return virtualMachine, nil } -func (p *provider) Cleanup(ctx context.Context, machine *clusterv1alpha1.Machine, _ *cloudprovidertypes.ProviderData) (bool, error) { +func (p *provider) createPreAllocatedDataVolume(ctx context.Context, c *Config, machine *clusterv1alpha1.Machine, sigClient client.Client, dvName string) error { + existingDV := &cdiv1beta1.DataVolume{} + if err := sigClient.Get(ctx, types.NamespacedName{Namespace: c.Namespace, Name: dvName}, existingDV); err != nil { + if !kerrors.IsNotFound(err) { + return err + } + + return sigClient.Create(ctx, newDataVolume(c, dvName, machine.Name)) + } + existingDV.Annotations[fmt.Sprintf(preAllocatedDVAnnotation, machine.Name)] = "true" + return sigClient.Update(ctx, existingDV) +} + +func (p *provider) Cleanup(ctx context.Context, machine *clusterv1alpha1.Machine, data *cloudprovidertypes.ProviderData) (bool, error) { c, _, err := p.getConfig(machine.Spec.ProviderSpec) if err != nil { return false, cloudprovidererrors.TerminalError{ @@ -679,9 +760,37 @@ func (p *provider) Cleanup(ctx context.Context, machine *clusterv1alpha1.Machine return true, nil } + if err := p.cleanupPreAllocatedDataVolume(ctx, sigClient, c, machine, vm.Labels[machineDeploymentLabelKey]); err != nil { + return false, fmt.Errorf("failed to remove PreAllocatedDataVolume %s: %w", vm.Labels[machineDeploymentLabelKey], err) + } + return false, sigClient.Delete(ctx, vm) } +func (p *provider) cleanupPreAllocatedDataVolume(ctx context.Context, sigClient client.Client, config *Config, machine *clusterv1alpha1.Machine, dvName string) error { + if dvName != "" { + dv := &cdiv1beta1.DataVolume{} + if err := sigClient.Get(ctx, types.NamespacedName{Namespace: config.Namespace, Name: dvName}, dv); err != nil { + if !kerrors.IsNotFound(err) { + return err + } + return nil + } + + annotationKey := fmt.Sprintf(preAllocatedDVAnnotation, machine.Name) + if dv.Annotations[annotationKey] == "true" { + // If the Machine-Deployment is scaled to zero or deleted or preAllocateDataVolume is disabled, remove preAllocated DataVolume. + if len(dv.Annotations) <= 1 { + return sigClient.Delete(ctx, dv) + } + // Remove annotation of corresponding VM from preAllocated DataVolume. + delete(dv.Annotations, annotationKey) + return sigClient.Update(ctx, dv) + } + } + return nil +} + func parseResources(cpus, memory string) (*corev1.ResourceList, error) { memoryResource, err := resource.ParseQuantity(memory) if err != nil { @@ -789,8 +898,7 @@ func getVMVolumes(config *Config, dataVolumeName string, userDataSecretName stri return volumes } -func getDataVolumeTemplates(config *Config, dataVolumeName string) []kubevirtv1.DataVolumeTemplateSpec { - dataVolumeSource := getDataVolumeSource(config.OsImage) +func getDataVolumeTemplates(config *Config, dataVolumeName string, dataVolumeSource *cdiv1beta1.DataVolumeSource) []kubevirtv1.DataVolumeTemplateSpec { pvcRequest := corev1.ResourceList{corev1.ResourceStorage: config.PVCSize} dataVolumeTemplates := []kubevirtv1.DataVolumeTemplateSpec{ { @@ -834,16 +942,18 @@ func getDataVolumeTemplates(config *Config, dataVolumeName string) []kubevirtv1. } // getDataVolumeSource returns DataVolumeSource, HTTP or PVC. -func getDataVolumeSource(osImage OSImage) *cdiv1beta1.DataVolumeSource { +func getDataVolumeSource(config *Config, preAllocatedDVName string) *cdiv1beta1.DataVolumeSource { dataVolumeSource := &cdiv1beta1.DataVolumeSource{} - if osImage.URL != "" { - dataVolumeSource.HTTP = &cdiv1beta1.DataVolumeSourceHTTP{URL: osImage.URL} - } else if osImage.DataVolumeName != "" { - if nameSpaceAndName := strings.Split(osImage.DataVolumeName, "/"); len(nameSpaceAndName) >= 2 { - dataVolumeSource.PVC = &cdiv1beta1.DataVolumeSourcePVC{ - Namespace: nameSpaceAndName[0], - Name: nameSpaceAndName[1], - } + if config.OSImage.HTTP != nil { + if config.OSImage.HTTP.CacheImage { + dataVolumeSource.PVC = &cdiv1beta1.DataVolumeSourcePVC{Namespace: config.Namespace, Name: preAllocatedDVName} + } else { + dataVolumeSource.HTTP = &cdiv1beta1.DataVolumeSourceHTTP{URL: config.OSImage.HTTP.URL} + } + } else if config.OSImage.PVCClone != nil { + dataVolumeSource.PVC = &cdiv1beta1.DataVolumeSourcePVC{ + Namespace: config.OSImage.PVCClone.Namespace, + Name: config.OSImage.PVCClone.DataVolumeName, } } return dataVolumeSource @@ -915,3 +1025,29 @@ func getTopologySpreadConstraints(config *Config, matchLabels map[string]string) }, } } + +func newDataVolume(config *Config, dataVolumeName string, annotateVMName string) *cdiv1beta1.DataVolume { + return &cdiv1beta1.DataVolume{ + ObjectMeta: metav1.ObjectMeta{ + Name: dataVolumeName, + Namespace: config.Namespace, + Annotations: map[string]string{fmt.Sprintf(preAllocatedDVAnnotation, annotateVMName): "true"}, + }, + Spec: cdiv1beta1.DataVolumeSpec{ + Source: &cdiv1beta1.DataVolumeSource{ + HTTP: &cdiv1beta1.DataVolumeSourceHTTP{ + URL: config.OSImage.HTTP.URL, + }, + }, + PVC: &corev1.PersistentVolumeClaimSpec{ + StorageClassName: utilpointer.StringPtr(config.StorageClassName), + AccessModes: []corev1.PersistentVolumeAccessMode{ + "ReadWriteOnce", + }, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: config.PVCSize}, + }, + }, + }, + } +} diff --git a/pkg/cloudprovider/provider/kubevirt/provider_test.go b/pkg/cloudprovider/provider/kubevirt/provider_test.go index 3af0d314c2..faa15b4f33 100644 --- a/pkg/cloudprovider/provider/kubevirt/provider_test.go +++ b/pkg/cloudprovider/provider/kubevirt/provider_test.go @@ -96,6 +96,7 @@ func getPreset(cpu, memory, presetName string) *kubevirtv1.VirtualMachineInstanc type kubevirtProviderSpecConf struct { OsImageDV string // if OsImage from DV and not from http source + PreAllocatedDataVolume bool Instancetype *kubevirtv1.InstancetypeMatcher Preference *kubevirtv1.PreferenceMatcher Flavor string // to remove when Flavor is deprecated @@ -167,7 +168,16 @@ func (k kubevirtProviderSpecConf) rawProviderSpec(t *testing.T) []byte { "osImage": "http://x.y.z.t/ubuntu.img", {{- end }} "size": "10Gi", + {{- if .PreAllocatedDataVolume }} + "storageClassName": "longhorn", + "source": { + "http": { + "preAllocateDataVolume": true + } + } + {{- else }} "storageClassName": "longhorn" + {{- end }} } } } @@ -267,6 +277,10 @@ func TestNewVirtualMachine(t *testing.T) { name: "custom-local-disk", specConf: kubevirtProviderSpecConf{OsImageDV: "ns/dvname"}, }, + { + name: "preAllocated-dataVolume", + specConf: kubevirtProviderSpecConf{PreAllocatedDataVolume: true}, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -289,9 +303,9 @@ func TestNewVirtualMachine(t *testing.T) { c.Namespace = testNamespace // Check the created VirtualMachine - vm, _ := p.newVirtualMachine(context.TODO(), c, pc, machine, "udsn", userdata, fakeMachineDeploymentNameAndRevisionForMachineGetter(), fixedMacAddressGetter, fakeclient) + fakeMD, _ := fakeMachineDeploymentNameAndRevisionForMachineGetter()() + vm, _ := p.newVirtualMachine(context.TODO(), c, pc, machine, "udsn", userdata, fakeMD, fixedMacAddressGetter, fakeclient) vm.TypeMeta.APIVersion, vm.TypeMeta.Kind = kubevirtv1.VirtualMachineGroupVersionKind.ToAPIVersionAndKind() - if !equality.Semantic.DeepEqual(vm, expectedVms[tt.name]) { t.Errorf("Diff %v", diff.ObjectGoPrintDiff(expectedVms[tt.name], vm)) } diff --git a/pkg/cloudprovider/provider/kubevirt/testdata/preAllocated-dataVolume.yaml b/pkg/cloudprovider/provider/kubevirt/testdata/preAllocated-dataVolume.yaml new file mode 100644 index 0000000000..fe398ce7c9 --- /dev/null +++ b/pkg/cloudprovider/provider/kubevirt/testdata/preAllocated-dataVolume.yaml @@ -0,0 +1,73 @@ +apiVersion: kubevirt.io/v1 +kind: VirtualMachine +metadata: + annotations: + labels: + kubevirt.io/vm: preAllocated-dataVolume + md: md-name + name: preAllocated-dataVolume + namespace: test-namespace +spec: + dataVolumeTemplates: + - metadata: + name: preAllocated-dataVolume + spec: + pvc: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + storageClassName: longhorn + source: + pvc: + namespace: test-namespace + name: md-name + running: true + template: + metadata: + creationTimestamp: null + labels: + kubevirt.io/vm: preAllocated-dataVolume + md: md-name + spec: + affinity: {} + domain: + devices: + disks: + - disk: + bus: virtio + name: datavolumedisk + - disk: + bus: virtio + name: cloudinitdisk + interfaces: + - macAddress: b6:f5:b4:fe:45:1d + name: default + bridge: {} + resources: + limits: + cpu: "2" + memory: 2Gi + requests: + cpu: "2" + memory: 2Gi + networks: + - name: default + pod: {} + terminationGracePeriodSeconds: 30 + topologyspreadconstraints: + - maxskew: 1 + topologykey: kubernetes.io/hostname + whenunsatisfiable: ScheduleAnyway + labelselector: + matchlabels: + md: md-name + volumes: + - dataVolume: + name: preAllocated-dataVolume + name: datavolumedisk + - cloudInitNoCloud: + secretRef: + name: udsn + name: cloudinitdisk diff --git a/pkg/cloudprovider/provider/kubevirt/types/types.go b/pkg/cloudprovider/provider/kubevirt/types/types.go index 365171d083..9788033091 100644 --- a/pkg/cloudprovider/provider/kubevirt/types/types.go +++ b/pkg/cloudprovider/provider/kubevirt/types/types.go @@ -67,6 +67,25 @@ type Template struct { type PrimaryDisk struct { Disk OsImage providerconfigtypes.ConfigVarString `json:"osImage,omitempty"` + Source *Source `json:"source,omitempty"` +} + +// Source describes thr VM Disk Image source. +type Source struct { + // HTTP represents http source for downloading VMDisk Image. + HTTP *HTTPSource `json:"http,omitempty"` + // PVCClone represents the pvc source for cloning of VMDisk Image. + PVCClone *PVCCloneSource `json:"pvcClone,omitempty"` +} + +// HTTPSource. +type HTTPSource struct { + // PreAllocateDataVolume describes to clone disk image or not. + PreAllocateDataVolume providerconfigtypes.ConfigVarBool `json:"preAllocateDataVolume,omitempty"` +} + +// PVCCloneSource. +type PVCCloneSource struct { } // SecondaryDisks.