Add Support for Partition GPU on GKE Nodes (#5315) (#3739)

* add support for partitioning gpus Co-authored-by: upodroid <[email protected]> * rename field Signed-off-by: Modular Magician <[email protected]>
hashicorp · Oct 18, 2021 · dba79f7 · dba79f7
1 parent 5b5182c
commit dba79f7
Show file tree

Hide file tree

Showing 6 changed files with 22 additions and 9 deletions.
diff --git a/.changelog/5315.txt b/.changelog/5315.txt
@@ -0,0 +1,3 @@
+```release-note:enhancement
+container: added `node_config.0.guest_accelerator.0.gpu_partition_size` field to google_container_node_pool
+```
diff --git a/google-beta/node_config.go b/google-beta/node_config.go
@@ -70,6 +70,12 @@ func schemaNodeConfig() *schema.Schema {
 								DiffSuppressFunc: compareSelfLinkOrResourceName,
 								Description:      `The accelerator type resource name.`,
 							},
+							"gpu_partition_size": {
+								Type:        schema.TypeString,
+								Optional:    true,
+								ForceNew:    true,
+								Description: `Size of partitions to create on the GPU. Valid values are described in the NVIDIA mig user guide (https://docs.nvidia.com/datacenter/tesla/mig-user-guide/#partitioning)`,
+							},
 						},
 					},
 				},
@@ -378,6 +384,7 @@ func expandNodeConfig(v interface{}) *containerBeta.NodeConfig {
 			guestAccelerators = append(guestAccelerators, &containerBeta.AcceleratorConfig{
 				AcceleratorCount: int64(data["count"].(int)),
 				AcceleratorType:  data["type"].(string),
+				GpuPartitionSize: data["gpu_partition_size"].(string),
 			})
 		}
 		nc.Accelerators = guestAccelerators
@@ -612,8 +619,9 @@ func flattenContainerGuestAccelerators(c []*containerBeta.AcceleratorConfig) []m
 	result := []map[string]interface{}{}
 	for _, accel := range c {
 		result = append(result, map[string]interface{}{
-			"count": accel.AcceleratorCount,
-			"type":  accel.AcceleratorType,
+			"count":              accel.AcceleratorCount,
+			"type":               accel.AcceleratorType,
+			"gpu_partition_size": accel.GpuPartitionSize,
 		})
 	}
 	return result

diff --git a/google-beta/resource_container_node_pool_test.go b/google-beta/resource_container_node_pool_test.go
@@ -1902,7 +1902,7 @@ resource "google_container_node_pool" "np_with_gpu" {
   initial_node_count = 1
 
   node_config {
-    machine_type = "n1-standard-1"  // can't be e2 because of accelerator
+    machine_type = "a2-highgpu-1g"  // can't be e2 because of accelerator
     disk_size_gb = 32
 
     oauth_scopes = [
@@ -1919,7 +1919,8 @@ resource "google_container_node_pool" "np_with_gpu" {
     image_type      = "COS"
 
     guest_accelerator {
-      type  = "nvidia-tesla-k80"
+      type  = "nvidia-tesla-a100"
+      gpu_partition_size = "1g.5gb"
       count = 1
     }
   }

diff --git a/google-beta/resource_dataproc_cluster_test.go b/google-beta/resource_dataproc_cluster_test.go
@@ -13,9 +13,8 @@ import (
 	"github.com/hashicorp/terraform-plugin-sdk/v2/helper/resource"
 	"github.com/hashicorp/terraform-plugin-sdk/v2/terraform"
 
-	"google.golang.org/api/googleapi"
-
 	dataproc "google.golang.org/api/dataproc/v1beta2"
+	"google.golang.org/api/googleapi"
 )
 
 func TestDataprocExtractInitTimeout(t *testing.T) {

diff --git a/google-beta/resource_gke_hub_feature_membership_test.go b/google-beta/resource_gke_hub_feature_membership_test.go
@@ -5,7 +5,7 @@ import (
 	"fmt"
 	"testing"
 
-	"github.com/GoogleCloudPlatform/declarative-resource-client-library/dcl"
+	dcl "github.com/GoogleCloudPlatform/declarative-resource-client-library/dcl"
 	gkehub "github.com/GoogleCloudPlatform/declarative-resource-client-library/services/google/gkehub/beta"
 	"github.com/hashicorp/terraform-plugin-sdk/v2/helper/resource"
 	"github.com/hashicorp/terraform-plugin-sdk/v2/terraform"

diff --git a/website/docs/r/container_cluster.html.markdown b/website/docs/r/container_cluster.html.markdown
@@ -738,11 +738,13 @@ linux_node_config {
 
 * `count` (Required) - The number of the guest accelerator cards exposed to this instance.
 
-<a name="nested_workload_identity_config"></a>The `workload_identity_config` block supports:
+* `gpu_partition_size` (Optional) - Size of partitions to create on the GPU. Valid values are described in the NVIDIA mig [user guide](https://docs.nvidia.com/datacenter/tesla/mig-user-guide/#partitioning).
+
+<a name="nested_workload_identity_config"></a> The `workload_identity_config` block supports:
 
 * `identity_namespace` (Optional, Deprecated) - Currently, the only supported identity namespace is the project's default.
 
-* `workload_pool` (Optional) - The workload pool to attach all Kubernetes service accounts to. Currently, the only supported identity namespace is the project's default.
+* `workload_pool` (Optional) - The workload pool to attach all Kubernetes service accounts to. Currently, the only supported identity namespace is the project of the cluster.
 
 ```hcl
 workload_identity_config {