Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add changes for DRA support in 1.31 alpha #11

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions platforms/gke-aiml/playground/configsync.tf
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,31 @@ resource "null_resource" "kueue" {
# }
# }

# NVIDIA DRA DRIVER
###############################################################################
resource "null_resource" "nvidia_dra_driver" {
depends_on = [
google_gke_hub_feature_membership.cluster_configmanagement,
google_secret_manager_secret_version.git_config,
module.configsync_repository,
]

provisioner "local-exec" {
command = "${path.module}/scripts/nvidia_dra_driver_manifests.sh"
environment = {
GIT_CONFIG_SECRET_NAME = local.git_config_secret_name
GIT_REPOSITORY = local.git_repository
MANIFESTS_DIRECTORY = local.configsync_manifests_directory
PROJECT_ID = data.google_project.environment.project_id
MLP_AR_REPO_URL = "${google_artifact_registry_repository.container_images.location}-docker.pkg.dev/${google_artifact_registry_repository.container_images.project}/${var.environment_name}/k8s-dra-driver:v0.1.0"
}
}

triggers = {
md5_files = md5(join("", [for f in fileset("${path.module}/templates/configsync/templates/_cluster_template/dra/nvidia-dra-drivers", "**") : md5("${path.module}/templates/configsync/templates/_cluster_template/dra/nvidia-dra-drivers/${f}")]))
md5_script = filemd5("${path.module}/scripts/nvidia_dra_driver_manifests.sh")
}
}


# KUBERAY MANIFESTS
Expand Down
21 changes: 16 additions & 5 deletions platforms/gke-aiml/playground/container_cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ resource "google_container_cluster" "mlp" {
project = data.google_project.environment.project_id
remove_default_node_pool = false
subnetwork = module.create-vpc.subnet-1
enable_kubernetes_alpha = true

addons_config {
gcp_filestore_csi_driver_config {
Expand All @@ -75,14 +76,17 @@ resource "google_container_cluster" "mlp" {

auto_provisioning_defaults {
disk_type = "pd-balanced"
disk_size = 100
image_type = "UBUNTU_CONTAINERD"
oauth_scopes = [
"https://www.googleapis.com/auth/cloud-platform"
"https://www.googleapis.com/auth/cloud-platform",
"https://www.googleapis.com/auth/devstorage.read_only"
]
service_account = google_service_account.cluster.email

management {
auto_repair = true
auto_upgrade = true
auto_repair = false
auto_upgrade = false
}

shielded_instance_config {
Expand Down Expand Up @@ -221,11 +225,17 @@ resource "google_container_cluster" "mlp" {
enable_private_nodes = true
}

management {
auto_repair = false
auto_upgrade = false
}

node_config {
machine_type = "e2-standard-4"
service_account = google_service_account.cluster.email
oauth_scopes = [
"https://www.googleapis.com/auth/cloud-platform"
"https://www.googleapis.com/auth/cloud-platform",
"https://www.googleapis.com/auth/devstorage.read_only"
]

gcfs_config {
Expand All @@ -245,11 +255,12 @@ resource "google_container_cluster" "mlp" {
enabled = true
}
}

}

private_cluster_config {
enable_private_nodes = true
enable_private_endpoint = true
enable_private_endpoint = false
master_ipv4_cidr_block = "172.16.0.32/28"
}

Expand Down
Loading