From 9d76cedd92413bfbccc6d0af0f51bfca0fbf3702 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Thu, 24 Aug 2023 10:38:58 +0200 Subject: [PATCH 1/4] gcp, dask-worker-nodes: pangeo-hubs to use single dask worker node type --- terraform/gcp/projects/pangeo-hubs.tfvars | 45 +---------------------- 1 file changed, 2 insertions(+), 43 deletions(-) diff --git a/terraform/gcp/projects/pangeo-hubs.tfvars b/terraform/gcp/projects/pangeo-hubs.tfvars index bc026f4957..8a6a9055b0 100644 --- a/terraform/gcp/projects/pangeo-hubs.tfvars +++ b/terraform/gcp/projects/pangeo-hubs.tfvars @@ -94,52 +94,11 @@ notebook_nodes = { # A not yet fully established policy is being developed about using a single # node pool, see https://github.com/2i2c-org/infrastructure/issues/2687. # -# TODO: Transition to a single n2-highmem-16 worker node pool to be able to -# provide standardized worker pod config for all daskhubs. -# -# Tracked in https://github.com/2i2c-org/infrastructure/issues/2687 -# -# The node pool to setup should look like this: -# -# "worker" : { -# min : 0, -# max : 100, -# machine_type : "n2-highmem-16", -# }, -# dask_nodes = { - "small" : { - min : 0, - max : 100, - machine_type : "n1-standard-4", - labels : {}, - gpu : { - enabled : false, - type : "", - count : 0 - } - }, - "medium" : { - min : 0, - max : 100, - machine_type : "n1-standard-8", - labels : {}, - gpu : { - enabled : false, - type : "", - count : 0 - } - }, - "large" : { + "worker" : { min : 0, max : 100, - machine_type : "n1-standard-16", - labels : {}, - gpu : { - enabled : false, - type : "", - count : 0 - } + machine_type : "n2-highmem-16", }, } From 83f4fd894768757f0304f92402906695337d2c5f Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Thu, 24 Aug 2023 12:19:04 +0200 Subject: [PATCH 2/4] terraform, gcp: add billing_project_id variable for use by pangeo-hubs --- config/clusters/pangeo-hubs/cluster.yaml | 2 +- terraform/gcp/main.tf | 2 +- terraform/gcp/projects/pangeo-hubs.tfvars | 21 ++++++++++++++------- terraform/gcp/variables.tf | 10 ++++++++++ 4 files changed, 26 insertions(+), 9 deletions(-) diff --git a/config/clusters/pangeo-hubs/cluster.yaml b/config/clusters/pangeo-hubs/cluster.yaml index ade2f4020d..2cabc368a3 100644 --- a/config/clusters/pangeo-hubs/cluster.yaml +++ b/config/clusters/pangeo-hubs/cluster.yaml @@ -1,5 +1,5 @@ name: pangeo-hubs -provider: gcp # https://console.cloud.google.com/kubernetes/clusters/details/us-central1-b/pangeo-hubs-cluster/nodes?project=columbia +provider: gcp # https://console.cloud.google.com/kubernetes/clusters/details/us-central1-b/pangeo-hubs-cluster/nodes?project=pangeo-integration-te-3eea account: columbia gcp: key: enc-deployer-credentials.secret.json diff --git a/terraform/gcp/main.tf b/terraform/gcp/main.tf index 8ba02960ad..a656ddfad5 100644 --- a/terraform/gcp/main.tf +++ b/terraform/gcp/main.tf @@ -31,7 +31,7 @@ provider "google" { # https://registry.terraform.io/providers/hashicorp/google/latest/docs/guides/provider_reference#user_project_override # user_project_override = true - billing_project = "two-eye-two-see" + billing_project = var.billing_project_id } data "google_client_config" "default" {} diff --git a/terraform/gcp/projects/pangeo-hubs.tfvars b/terraform/gcp/projects/pangeo-hubs.tfvars index 8a6a9055b0..ae793fcc6d 100644 --- a/terraform/gcp/projects/pangeo-hubs.tfvars +++ b/terraform/gcp/projects/pangeo-hubs.tfvars @@ -2,18 +2,25 @@ # ------------------------------------------------------------------------------- # # The terraform state associated with this file is stored in a dedicated GCP -# bucket, so in order to work with this file you need to do the following after -# clearing a local .terraform folder. +# bucket, so a new terraform backend has to be chosen. Also, you will need to +# authenticate with a @columbia.edu account as our @2i2c.org accounts don't have +# access. # -# terraform init -backend-config backends/pangeo-backend.hcl -# terraform workspace list -# terraform workspace select <...> +# This can look something like this: # -# The GCP project having the bucket is https://console.cloud.google.com/?project=columbia +# gcloud auth login --update-adc +# +# cd terraform/gcp +# rm -rf .terraform +# +# terraform init -backend-config backends/pangeo-backend.hcl +# terraform workspace select pangeo-hubs +# +# terraform apply --var-file projects/pangeo-hubs.tfvars # - prefix = "pangeo-hubs" project_id = "pangeo-integration-te-3eea" +billing_project_id = "pangeo-integration-te-3eea" zone = "us-central1-b" region = "us-central1" core_node_machine_type = "n2-highmem-4" diff --git a/terraform/gcp/variables.tf b/terraform/gcp/variables.tf index 218aec2ed1..cb472866af 100644 --- a/terraform/gcp/variables.tf +++ b/terraform/gcp/variables.tf @@ -23,6 +23,16 @@ variable "project_id" { EOT } +variable "billing_project_id" { + type = string + default = "two-eye-two-see" + description = <<-EOT + GCP Project ID associated with billing. + + Should be the id, rather than display name of the project. + EOT +} + variable "k8s_version_prefixes" { type = set(string) # Available minor versions are picked from the GKE regular release channel. To From 1efa77a7e00bd8e4d17917a6ed2ef372ecc5feea Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Thu, 24 Aug 2023 13:09:04 +0200 Subject: [PATCH 3/4] Add comments to summarize the confusion --- terraform/gcp/main.tf | 10 ++++++++++ terraform/gcp/variables.tf | 8 ++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/terraform/gcp/main.tf b/terraform/gcp/main.tf index a656ddfad5..32d37409bf 100644 --- a/terraform/gcp/main.tf +++ b/terraform/gcp/main.tf @@ -30,6 +30,16 @@ provider "google" { # Configuration reference: # https://registry.terraform.io/providers/hashicorp/google/latest/docs/guides/provider_reference#user_project_override # + # FIXME: Erik concluded that billing_project could be set to var.project_id at + # least for one cluster, but it required that the project where the + # cluster lived first enabled the GCP API: https://console.cloud.google.com/apis/library/cloudresourcemanager.googleapis.com + # + # So, we should probably not reference a new variable here, but enable + # the API for all our existing GCP projects and new GCP projects, and + # then reference var.project_id instead. + # + # But who knows, its hard to understand whats going on. + # user_project_override = true billing_project = var.billing_project_id } diff --git a/terraform/gcp/variables.tf b/terraform/gcp/variables.tf index cb472866af..45cb956fea 100644 --- a/terraform/gcp/variables.tf +++ b/terraform/gcp/variables.tf @@ -27,9 +27,13 @@ variable "billing_project_id" { type = string default = "two-eye-two-see" description = <<-EOT - GCP Project ID associated with billing. + This should be a GCP Project ID, not a GCP Billing Account ID as the name + indicates. It should be to a project that has a GCP API called Cloud Resource + Manager enabled. That can be enabled on a project via the link below: + https://console.cloud.google.com/apis/library/cloudresourcemanager.googleapis.com - Should be the id, rather than display name of the project. + What goes on here is confusing, see the comments about the confusion in main.tf + for more details. EOT } From 3ed2dcf1421e7d5d2b9a8680dc455192ca22c371 Mon Sep 17 00:00:00 2001 From: Georgiana Dolocan Date: Fri, 25 Aug 2023 11:46:40 +0300 Subject: [PATCH 4/4] Update code nodes machine type to match whatever was set from the UI --- terraform/gcp/projects/pangeo-hubs.tfvars | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/gcp/projects/pangeo-hubs.tfvars b/terraform/gcp/projects/pangeo-hubs.tfvars index ae793fcc6d..e45d5755ae 100644 --- a/terraform/gcp/projects/pangeo-hubs.tfvars +++ b/terraform/gcp/projects/pangeo-hubs.tfvars @@ -23,7 +23,7 @@ project_id = "pangeo-integration-te-3eea" billing_project_id = "pangeo-integration-te-3eea" zone = "us-central1-b" region = "us-central1" -core_node_machine_type = "n2-highmem-4" +core_node_machine_type = "n2-highmem-8" enable_private_cluster = true # Multi-tenant cluster, network policy is required to enforce separation between hubs