Skip to content

Commit

Permalink
Add Support for Partition GPU on GKE Nodes (#5315) (#3739)
Browse files Browse the repository at this point in the history
* add support for partitioning gpus

Co-authored-by: upodroid <[email protected]>

* rename field

Signed-off-by: Modular Magician <[email protected]>
  • Loading branch information
modular-magician authored Oct 18, 2021
1 parent 5b5182c commit dba79f7
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 9 deletions.
3 changes: 3 additions & 0 deletions .changelog/5315.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:enhancement
container: added `node_config.0.guest_accelerator.0.gpu_partition_size` field to google_container_node_pool
```
12 changes: 10 additions & 2 deletions google-beta/node_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,12 @@ func schemaNodeConfig() *schema.Schema {
DiffSuppressFunc: compareSelfLinkOrResourceName,
Description: `The accelerator type resource name.`,
},
"gpu_partition_size": {
Type: schema.TypeString,
Optional: true,
ForceNew: true,
Description: `Size of partitions to create on the GPU. Valid values are described in the NVIDIA mig user guide (https://docs.nvidia.com/datacenter/tesla/mig-user-guide/#partitioning)`,
},
},
},
},
Expand Down Expand Up @@ -378,6 +384,7 @@ func expandNodeConfig(v interface{}) *containerBeta.NodeConfig {
guestAccelerators = append(guestAccelerators, &containerBeta.AcceleratorConfig{
AcceleratorCount: int64(data["count"].(int)),
AcceleratorType: data["type"].(string),
GpuPartitionSize: data["gpu_partition_size"].(string),
})
}
nc.Accelerators = guestAccelerators
Expand Down Expand Up @@ -612,8 +619,9 @@ func flattenContainerGuestAccelerators(c []*containerBeta.AcceleratorConfig) []m
result := []map[string]interface{}{}
for _, accel := range c {
result = append(result, map[string]interface{}{
"count": accel.AcceleratorCount,
"type": accel.AcceleratorType,
"count": accel.AcceleratorCount,
"type": accel.AcceleratorType,
"gpu_partition_size": accel.GpuPartitionSize,
})
}
return result
Expand Down
5 changes: 3 additions & 2 deletions google-beta/resource_container_node_pool_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1902,7 +1902,7 @@ resource "google_container_node_pool" "np_with_gpu" {
initial_node_count = 1
node_config {
machine_type = "n1-standard-1" // can't be e2 because of accelerator
machine_type = "a2-highgpu-1g" // can't be e2 because of accelerator
disk_size_gb = 32
oauth_scopes = [
Expand All @@ -1919,7 +1919,8 @@ resource "google_container_node_pool" "np_with_gpu" {
image_type = "COS"
guest_accelerator {
type = "nvidia-tesla-k80"
type = "nvidia-tesla-a100"
gpu_partition_size = "1g.5gb"
count = 1
}
}
Expand Down
3 changes: 1 addition & 2 deletions google-beta/resource_dataproc_cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,8 @@ import (
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/resource"
"github.com/hashicorp/terraform-plugin-sdk/v2/terraform"

"google.golang.org/api/googleapi"

dataproc "google.golang.org/api/dataproc/v1beta2"
"google.golang.org/api/googleapi"
)

func TestDataprocExtractInitTimeout(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion google-beta/resource_gke_hub_feature_membership_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import (
"fmt"
"testing"

"github.com/GoogleCloudPlatform/declarative-resource-client-library/dcl"
dcl "github.com/GoogleCloudPlatform/declarative-resource-client-library/dcl"
gkehub "github.com/GoogleCloudPlatform/declarative-resource-client-library/services/google/gkehub/beta"
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/resource"
"github.com/hashicorp/terraform-plugin-sdk/v2/terraform"
Expand Down
6 changes: 4 additions & 2 deletions website/docs/r/container_cluster.html.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -738,11 +738,13 @@ linux_node_config {

* `count` (Required) - The number of the guest accelerator cards exposed to this instance.

<a name="nested_workload_identity_config"></a>The `workload_identity_config` block supports:
* `gpu_partition_size` (Optional) - Size of partitions to create on the GPU. Valid values are described in the NVIDIA mig [user guide](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/#partitioning).

<a name="nested_workload_identity_config"></a> The `workload_identity_config` block supports:

* `identity_namespace` (Optional, Deprecated) - Currently, the only supported identity namespace is the project's default.

* `workload_pool` (Optional) - The workload pool to attach all Kubernetes service accounts to. Currently, the only supported identity namespace is the project's default.
* `workload_pool` (Optional) - The workload pool to attach all Kubernetes service accounts to. Currently, the only supported identity namespace is the project of the cluster.

```hcl
workload_identity_config {
Expand Down

0 comments on commit dba79f7

Please sign in to comment.