Skip to content

Commit

Permalink
changes following review
Browse files Browse the repository at this point in the history
- combine mapActualToMeasuredEphemeralStorageLocalSSDCount into EphemeralStorageOnLocalSSDFilesystemOverheadInBytes
- fix EphemeralStorageOnLocalSSDFilesystemOverheadInBytes for containerd OS distributions
- add test for EphemeralStorageOnLocalSSDFilesystemOverheadInBytes
- move localSSDDiskSizeInGiB constant to top of file and export
- add TestBuildNodeFromTemplateSetsResources test case with local SSD and kube-reserved ephemeral storage
  • Loading branch information
adrienjt committed Sep 27, 2021
1 parent 5efd026 commit 41111b0
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 30 deletions.
53 changes: 31 additions & 22 deletions cluster-autoscaler/cloudprovider/gce/reserved.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ import (
// There should be no imports as it is used standalone in e2e tests

const (
// KiB - KibiByte size (2^10)
KiB = 1024
// MiB - MebiByte size (2^20)
MiB = 1024 * 1024
// GiB - GibiByte size (2^30)
Expand Down Expand Up @@ -228,35 +230,42 @@ var ephemeralStorageOnLocalSSDFilesystemOverheadInKiBByOSAndDiskCount = map[Oper
},
}

// mapActualToMeasuredEphemeralStorageLocalSSDCount returns the next local SSD
// count for which we measured a filesystem overhead. We measured all possible
// counts in GKE, but custom Kubernetes on GCE may allow intermediate counts,
// attaching the measured count, but not using it all for ephemeral storage. In
// that case, the difference in overhead between GKE and custom node images may
// be higher than the difference in overhead between two disk counts, so
// interpolating wouldn't make much sense.
func mapActualToMeasuredEphemeralStorageLocalSSDCount(count int64) int64 {
if count <= 8 {
return count
}
if count <= 16 {
return 16
}
return 24 // max attachable
}

// EphemeralStorageOnLocalSSDFilesystemOverheadInBytes estimates the difference
// between the total physical capacity of the local SSDs and the ephemeral
// storage filesystem capacity. It uses experimental values measured in GKE,
// which are good-enough approximations for custom Kubernetes on GCE.
// storage filesystem capacity. It uses experimental values measured for all
// possible disk counts in GKE. Custom Kubernetes on GCE may allow intermediate
// counts, attaching the measured count, but not using it all for ephemeral
// storage. In that case, the difference in overhead between GKE and custom node
// images may be higher than the difference in overhead between two disk counts,
// so interpolating wouldn't make much sense. Instead, we use the next count for
// which we measured a filesystem overhead, which is a safer approximation
// (better to reserve more and not scale up than not enough and not schedule).
func EphemeralStorageOnLocalSSDFilesystemOverheadInBytes(diskCount int64, osDistribution OperatingSystemDistribution) int64 {
c := mapActualToMeasuredEphemeralStorageLocalSSDCount(diskCount)
o, ok := ephemeralStorageOnLocalSSDFilesystemOverheadInKiBByOSAndDiskCount[osDistribution]
var measuredCount int64
if diskCount <= 8 {
measuredCount = diskCount
} else if diskCount <= 16 {
measuredCount = 16
} else {
measuredCount = 24 // max attachable
}

// the container runtime doesn't affect filesystem overhead
var measuredOS OperatingSystemDistribution
if osDistribution == OperatingSystemDistributionCOSContainerd {
measuredOS = OperatingSystemDistributionCOS
} else if osDistribution == OperatingSystemDistributionUbuntuContainerd {
measuredOS = OperatingSystemDistributionUbuntu
} else {
measuredOS = osDistribution
}

o, ok := ephemeralStorageOnLocalSSDFilesystemOverheadInKiBByOSAndDiskCount[measuredOS]
if !ok {
klog.Errorf("Ephemeral storage backed by local SSDs is not supported for image family %v", osDistribution)
return 0
}
return 1024 * o[c]
return o[measuredCount] * KiB
}

// CalculateOSReservedEphemeralStorage estimates how much ephemeral storage OS will reserve and eviction threshold
Expand Down
47 changes: 47 additions & 0 deletions cluster-autoscaler/cloudprovider/gce/reserved_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,50 @@ func TestCalculateKernelReservedLinux(t *testing.T) {
})
}
}

func TestEphemeralStorageOnLocalSSDFilesystemOverheadInBytes(t *testing.T) {
type testCase struct {
scenario string
diskCount int64
osDistribution OperatingSystemDistribution
expected int64
}
testCases := []testCase{
{
scenario: "measured disk count and OS (cos)",
diskCount: 1,
osDistribution: OperatingSystemDistributionCOS,
expected: 7289472 * KiB,
},
{
scenario: "measured disk count but OS with different container runtime (cos_containerd)",
diskCount: 1,
osDistribution: OperatingSystemDistributionCOSContainerd,
expected: 7289472 * KiB, // same as COS
},
{
scenario: "measured disk count and OS (ubuntu)",
diskCount: 1,
osDistribution: OperatingSystemDistributionUbuntu,
expected: 7219840 * KiB,
},
{
scenario: "measured disk count but OS with different container runtime (ubuntu_containerd)",
diskCount: 1,
osDistribution: OperatingSystemDistributionUbuntuContainerd,
expected: 7219840 * KiB, // same as Ubuntu
},
{
scenario: "mapped disk count",
diskCount: 10,
osDistribution: OperatingSystemDistributionCOS,
expected: 52837800 * KiB, // value measured for 16 disks
},
}
for _, tc := range testCases {
t.Run(tc.scenario, func(t *testing.T) {
actual := EphemeralStorageOnLocalSSDFilesystemOverheadInBytes(tc.diskCount, tc.osDistribution)
assert.Equal(t, tc.expected, actual)
})
}
}
6 changes: 3 additions & 3 deletions cluster-autoscaler/cloudprovider/gce/templates.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ import (
// GceTemplateBuilder builds templates for GCE nodes.
type GceTemplateBuilder struct{}

const LocalSSDDiskSizeInGiB = 375

// TODO: This should be imported from sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/common/constants.go
// This key is applicable to both GCE and GKE
const gceCSITopologyKeyZone = "topology.gke.io/zone"
Expand Down Expand Up @@ -256,8 +258,6 @@ func ephemeralStorageLocalSSDCount(kubeEnvValue string) int64 {
return int64(n)
}

const localSSDDiskSizeInGiB = 375

func getLocalSSDEphemeralStorageFromInstanceTemplateProperties(instanceProperties *gce.InstanceProperties, ssdCount int64) (ephemeralStorage int64, err error) {
if instanceProperties.Disks == nil {
return 0, fmt.Errorf("instance properties disks is nil")
Expand All @@ -276,7 +276,7 @@ func getLocalSSDEphemeralStorageFromInstanceTemplateProperties(instancePropertie
return 0, fmt.Errorf("actual local SSD count is lower than ephemeral_storage_local_ssd_count")
}

return ssdCount * localSSDDiskSizeInGiB * units.GiB, nil
return ssdCount * LocalSSDDiskSizeInGiB * units.GiB, nil
}

// isBootDiskEphemeralStorageWithInstanceTemplateDisabled will allow bypassing Disk Size of Boot Disk from being
Expand Down
23 changes: 18 additions & 5 deletions cluster-autoscaler/cloudprovider/gce/templates_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,19 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
attachedLocalSSDCount: 4,
expectedErr: false,
},
{
scenario: "ephemeral storage on local SSDs with kube-reserved",
kubeEnv: "AUTOSCALER_ENV_VARS: kube_reserved=cpu=0,memory=0,ephemeral-storage=10Gi;os_distribution=cos;os=linux;ephemeral_storage_local_ssd_count=2\n",
physicalCpu: 8,
physicalMemory: 200 * units.MiB,
ephemeralStorageLocalSSDCount: 2,
kubeReserved: true,
reservedCpu: "0m",
reservedMemory: fmt.Sprintf("%v", 0*units.MiB),
reservedEphemeralStorage: "10Gi",
attachedLocalSSDCount: 4,
expectedErr: false,
},
}
for _, tc := range testCases {
t.Run(tc.scenario, func(t *testing.T) {
Expand Down Expand Up @@ -221,7 +234,7 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
// specifying physicalEphemeralStorageGiB in the testCase struct
physicalEphemeralStorageGiB := tc.bootDiskSizeGiB
if tc.ephemeralStorageLocalSSDCount > 0 {
physicalEphemeralStorageGiB = tc.ephemeralStorageLocalSSDCount * localSSDDiskSizeInGiB
physicalEphemeralStorageGiB = tc.ephemeralStorageLocalSSDCount * LocalSSDDiskSizeInGiB
} else if tc.isEphemeralStorageBlocked {
physicalEphemeralStorageGiB = 0
}
Expand Down Expand Up @@ -416,17 +429,17 @@ func TestParseEvictionHard(t *testing.T) {
testCases := []testCase{{
memory: "200Mi",
ephemeralStorage: "15%",
memoryExpected: 200 * 1024 * 1024,
memoryExpected: 200 * MiB,
ephemeralStorageRatioExpected: 0.15,
}, {
memory: "2Gi",
ephemeralStorage: "11.5%",
memoryExpected: 2 * 1024 * 1024 * 1024,
memoryExpected: 2 * GiB,
ephemeralStorageRatioExpected: 0.115,
}, {
memory: "",
ephemeralStorage: "", // empty string, fallback to default
memoryExpected: 100 * 1024 * 1024,
memoryExpected: 100 * MiB,
ephemeralStorageRatioExpected: 0.1,
}, {
memory: "110292",
Expand All @@ -436,7 +449,7 @@ func TestParseEvictionHard(t *testing.T) {
}, {
memory: "abcb12", // unparsable, fallback to default
ephemeralStorage: "-11%", // negative percentage, should fallback to default
memoryExpected: 100 * 1024 * 1024,
memoryExpected: 100 * MiB,
ephemeralStorageRatioExpected: 0.1,
}}
for _, tc := range testCases {
Expand Down

0 comments on commit 41111b0

Please sign in to comment.