diff --git a/cluster-autoscaler/cloudprovider/gce/reserved.go b/cluster-autoscaler/cloudprovider/gce/reserved.go index 115693cddea6..4d901d3ee689 100644 --- a/cluster-autoscaler/cloudprovider/gce/reserved.go +++ b/cluster-autoscaler/cloudprovider/gce/reserved.go @@ -29,6 +29,8 @@ import ( // There should be no imports as it is used standalone in e2e tests const ( + // KiB - KibiByte size (2^10) + KiB = 1024 // MiB - MebiByte size (2^20) MiB = 1024 * 1024 // GiB - GibiByte size (2^30) @@ -228,35 +230,42 @@ var ephemeralStorageOnLocalSSDFilesystemOverheadInKiBByOSAndDiskCount = map[Oper }, } -// mapActualToMeasuredEphemeralStorageLocalSSDCount returns the next local SSD -// count for which we measured a filesystem overhead. We measured all possible -// counts in GKE, but custom Kubernetes on GCE may allow intermediate counts, -// attaching the measured count, but not using it all for ephemeral storage. In -// that case, the difference in overhead between GKE and custom node images may -// be higher than the difference in overhead between two disk counts, so -// interpolating wouldn't make much sense. -func mapActualToMeasuredEphemeralStorageLocalSSDCount(count int64) int64 { - if count <= 8 { - return count - } - if count <= 16 { - return 16 - } - return 24 // max attachable -} - // EphemeralStorageOnLocalSSDFilesystemOverheadInBytes estimates the difference // between the total physical capacity of the local SSDs and the ephemeral -// storage filesystem capacity. It uses experimental values measured in GKE, -// which are good-enough approximations for custom Kubernetes on GCE. +// storage filesystem capacity. It uses experimental values measured for all +// possible disk counts in GKE. Custom Kubernetes on GCE may allow intermediate +// counts, attaching the measured count, but not using it all for ephemeral +// storage. In that case, the difference in overhead between GKE and custom node +// images may be higher than the difference in overhead between two disk counts, +// so interpolating wouldn't make much sense. Instead, we use the next count for +// which we measured a filesystem overhead, which is a safer approximation +// (better to reserve more and not scale up than not enough and not schedule). func EphemeralStorageOnLocalSSDFilesystemOverheadInBytes(diskCount int64, osDistribution OperatingSystemDistribution) int64 { - c := mapActualToMeasuredEphemeralStorageLocalSSDCount(diskCount) - o, ok := ephemeralStorageOnLocalSSDFilesystemOverheadInKiBByOSAndDiskCount[osDistribution] + var measuredCount int64 + if diskCount <= 8 { + measuredCount = diskCount + } else if diskCount <= 16 { + measuredCount = 16 + } else { + measuredCount = 24 // max attachable + } + + // the container runtime doesn't affect filesystem overhead + var measuredOS OperatingSystemDistribution + if osDistribution == OperatingSystemDistributionCOSContainerd { + measuredOS = OperatingSystemDistributionCOS + } else if osDistribution == OperatingSystemDistributionUbuntuContainerd { + measuredOS = OperatingSystemDistributionUbuntu + } else { + measuredOS = osDistribution + } + + o, ok := ephemeralStorageOnLocalSSDFilesystemOverheadInKiBByOSAndDiskCount[measuredOS] if !ok { klog.Errorf("Ephemeral storage backed by local SSDs is not supported for image family %v", osDistribution) return 0 } - return 1024 * o[c] + return o[measuredCount] * KiB } // CalculateOSReservedEphemeralStorage estimates how much ephemeral storage OS will reserve and eviction threshold diff --git a/cluster-autoscaler/cloudprovider/gce/reserved_test.go b/cluster-autoscaler/cloudprovider/gce/reserved_test.go index 9c9039dd3861..74b3b58b3898 100644 --- a/cluster-autoscaler/cloudprovider/gce/reserved_test.go +++ b/cluster-autoscaler/cloudprovider/gce/reserved_test.go @@ -103,3 +103,50 @@ func TestCalculateKernelReservedLinux(t *testing.T) { }) } } + +func TestEphemeralStorageOnLocalSSDFilesystemOverheadInBytes(t *testing.T) { + type testCase struct { + scenario string + diskCount int64 + osDistribution OperatingSystemDistribution + expected int64 + } + testCases := []testCase{ + { + scenario: "measured disk count and OS (cos)", + diskCount: 1, + osDistribution: OperatingSystemDistributionCOS, + expected: 7289472 * KiB, + }, + { + scenario: "measured disk count but OS with different container runtime (cos_containerd)", + diskCount: 1, + osDistribution: OperatingSystemDistributionCOSContainerd, + expected: 7289472 * KiB, // same as COS + }, + { + scenario: "measured disk count and OS (ubuntu)", + diskCount: 1, + osDistribution: OperatingSystemDistributionUbuntu, + expected: 7219840 * KiB, + }, + { + scenario: "measured disk count but OS with different container runtime (ubuntu_containerd)", + diskCount: 1, + osDistribution: OperatingSystemDistributionUbuntuContainerd, + expected: 7219840 * KiB, // same as Ubuntu + }, + { + scenario: "mapped disk count", + diskCount: 10, + osDistribution: OperatingSystemDistributionCOS, + expected: 52837800 * KiB, // value measured for 16 disks + }, + } + for _, tc := range testCases { + t.Run(tc.scenario, func(t *testing.T) { + actual := EphemeralStorageOnLocalSSDFilesystemOverheadInBytes(tc.diskCount, tc.osDistribution) + assert.Equal(t, tc.expected, actual) + }) + } +} diff --git a/cluster-autoscaler/cloudprovider/gce/templates.go b/cluster-autoscaler/cloudprovider/gce/templates.go index ce8a8891ac9b..8fc904f453d5 100644 --- a/cluster-autoscaler/cloudprovider/gce/templates.go +++ b/cluster-autoscaler/cloudprovider/gce/templates.go @@ -40,6 +40,8 @@ import ( // GceTemplateBuilder builds templates for GCE nodes. type GceTemplateBuilder struct{} +const LocalSSDDiskSizeInGiB = 375 + // TODO: This should be imported from sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/common/constants.go // This key is applicable to both GCE and GKE const gceCSITopologyKeyZone = "topology.gke.io/zone" @@ -256,8 +258,6 @@ func ephemeralStorageLocalSSDCount(kubeEnvValue string) int64 { return int64(n) } -const localSSDDiskSizeInGiB = 375 - func getLocalSSDEphemeralStorageFromInstanceTemplateProperties(instanceProperties *gce.InstanceProperties, ssdCount int64) (ephemeralStorage int64, err error) { if instanceProperties.Disks == nil { return 0, fmt.Errorf("instance properties disks is nil") @@ -276,7 +276,7 @@ func getLocalSSDEphemeralStorageFromInstanceTemplateProperties(instancePropertie return 0, fmt.Errorf("actual local SSD count is lower than ephemeral_storage_local_ssd_count") } - return ssdCount * localSSDDiskSizeInGiB * units.GiB, nil + return ssdCount * LocalSSDDiskSizeInGiB * units.GiB, nil } // isBootDiskEphemeralStorageWithInstanceTemplateDisabled will allow bypassing Disk Size of Boot Disk from being diff --git a/cluster-autoscaler/cloudprovider/gce/templates_test.go b/cluster-autoscaler/cloudprovider/gce/templates_test.go index d4a2d33d4a3a..496b880ab3c5 100644 --- a/cluster-autoscaler/cloudprovider/gce/templates_test.go +++ b/cluster-autoscaler/cloudprovider/gce/templates_test.go @@ -170,6 +170,19 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) { attachedLocalSSDCount: 4, expectedErr: false, }, + { + scenario: "ephemeral storage on local SSDs with kube-reserved", + kubeEnv: "AUTOSCALER_ENV_VARS: kube_reserved=cpu=0,memory=0,ephemeral-storage=10Gi;os_distribution=cos;os=linux;ephemeral_storage_local_ssd_count=2\n", + physicalCpu: 8, + physicalMemory: 200 * units.MiB, + ephemeralStorageLocalSSDCount: 2, + kubeReserved: true, + reservedCpu: "0m", + reservedMemory: fmt.Sprintf("%v", 0*units.MiB), + reservedEphemeralStorage: "10Gi", + attachedLocalSSDCount: 4, + expectedErr: false, + }, } for _, tc := range testCases { t.Run(tc.scenario, func(t *testing.T) { @@ -221,7 +234,7 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) { // specifying physicalEphemeralStorageGiB in the testCase struct physicalEphemeralStorageGiB := tc.bootDiskSizeGiB if tc.ephemeralStorageLocalSSDCount > 0 { - physicalEphemeralStorageGiB = tc.ephemeralStorageLocalSSDCount * localSSDDiskSizeInGiB + physicalEphemeralStorageGiB = tc.ephemeralStorageLocalSSDCount * LocalSSDDiskSizeInGiB } else if tc.isEphemeralStorageBlocked { physicalEphemeralStorageGiB = 0 } @@ -416,17 +429,17 @@ func TestParseEvictionHard(t *testing.T) { testCases := []testCase{{ memory: "200Mi", ephemeralStorage: "15%", - memoryExpected: 200 * 1024 * 1024, + memoryExpected: 200 * MiB, ephemeralStorageRatioExpected: 0.15, }, { memory: "2Gi", ephemeralStorage: "11.5%", - memoryExpected: 2 * 1024 * 1024 * 1024, + memoryExpected: 2 * GiB, ephemeralStorageRatioExpected: 0.115, }, { memory: "", ephemeralStorage: "", // empty string, fallback to default - memoryExpected: 100 * 1024 * 1024, + memoryExpected: 100 * MiB, ephemeralStorageRatioExpected: 0.1, }, { memory: "110292", @@ -436,7 +449,7 @@ func TestParseEvictionHard(t *testing.T) { }, { memory: "abcb12", // unparsable, fallback to default ephemeralStorage: "-11%", // negative percentage, should fallback to default - memoryExpected: 100 * 1024 * 1024, + memoryExpected: 100 * MiB, ephemeralStorageRatioExpected: 0.1, }} for _, tc := range testCases {