From f4ec56b2fdd7e151c4b7b79390676c5429ae8a39 Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Tue, 4 Jul 2023 12:44:26 +0100 Subject: [PATCH 01/16] Setup nodepool for neurohackademy Reconstruction/reversion of PR #1726 --- terraform/gcp/projects/pilot-hubs.tfvars | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/terraform/gcp/projects/pilot-hubs.tfvars b/terraform/gcp/projects/pilot-hubs.tfvars index 9526532fd4..b1320e6fcd 100644 --- a/terraform/gcp/projects/pilot-hubs.tfvars +++ b/terraform/gcp/projects/pilot-hubs.tfvars @@ -35,6 +35,15 @@ notebook_nodes = { resource_labels : { "community" : "climatematch" } + }, + "neurohackademy": { + # We expect around 120 users + min: 1, + max: 100, + machine_type: "n1-highmem-n16", + labels: { + "2i2c.org/community": "neurohackademy" + }, } } From b53e3eff8f013ea3cfd2a8527e5f361d54c6b6d6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 4 Jul 2023 11:45:51 +0000 Subject: [PATCH 02/16] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- terraform/gcp/projects/pilot-hubs.tfvars | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/terraform/gcp/projects/pilot-hubs.tfvars b/terraform/gcp/projects/pilot-hubs.tfvars index b1320e6fcd..604dd97d04 100644 --- a/terraform/gcp/projects/pilot-hubs.tfvars +++ b/terraform/gcp/projects/pilot-hubs.tfvars @@ -36,13 +36,13 @@ notebook_nodes = { "community" : "climatematch" } }, - "neurohackademy": { + "neurohackademy" : { # We expect around 120 users - min: 1, - max: 100, - machine_type: "n1-highmem-n16", - labels: { - "2i2c.org/community": "neurohackademy" + min : 1, + max : 100, + machine_type : "n1-highmem-n16", + labels : { + "2i2c.org/community" : "neurohackademy" }, } } From 29e7f0869e64579c1c2b695e73699b88babb110c Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Tue, 4 Jul 2023 12:55:06 +0100 Subject: [PATCH 03/16] Add comment with GitHub issue link --- terraform/gcp/projects/pilot-hubs.tfvars | 1 + 1 file changed, 1 insertion(+) diff --git a/terraform/gcp/projects/pilot-hubs.tfvars b/terraform/gcp/projects/pilot-hubs.tfvars index 604dd97d04..d806d7dd3e 100644 --- a/terraform/gcp/projects/pilot-hubs.tfvars +++ b/terraform/gcp/projects/pilot-hubs.tfvars @@ -36,6 +36,7 @@ notebook_nodes = { "community" : "climatematch" } }, + # Nodepool for neurohackademy. Tracking issue: https://github.com/2i2c-org/infrastructure/issues/2681 "neurohackademy" : { # We expect around 120 users min : 1, From fb6a652a9a92d229214838dafddd8a652a3603b1 Mon Sep 17 00:00:00 2001 From: Sarah Gibson <44771837+sgibson91@users.noreply.github.com> Date: Tue, 4 Jul 2023 13:16:13 +0100 Subject: [PATCH 04/16] Use n2 machines Co-authored-by: Erik Sundell --- terraform/gcp/projects/pilot-hubs.tfvars | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/gcp/projects/pilot-hubs.tfvars b/terraform/gcp/projects/pilot-hubs.tfvars index d806d7dd3e..1628e24251 100644 --- a/terraform/gcp/projects/pilot-hubs.tfvars +++ b/terraform/gcp/projects/pilot-hubs.tfvars @@ -41,7 +41,7 @@ notebook_nodes = { # We expect around 120 users min : 1, max : 100, - machine_type : "n1-highmem-n16", + machine_type : "n2-highmem-16", labels : { "2i2c.org/community" : "neurohackademy" }, From 0d77ee664265f54c858581c2603e5a042b6eb59b Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Tue, 4 Jul 2023 13:25:16 +0100 Subject: [PATCH 05/16] Add comment about machine type with link to motivation --- terraform/gcp/projects/pilot-hubs.tfvars | 2 ++ 1 file changed, 2 insertions(+) diff --git a/terraform/gcp/projects/pilot-hubs.tfvars b/terraform/gcp/projects/pilot-hubs.tfvars index 1628e24251..356349a81c 100644 --- a/terraform/gcp/projects/pilot-hubs.tfvars +++ b/terraform/gcp/projects/pilot-hubs.tfvars @@ -41,6 +41,8 @@ notebook_nodes = { # We expect around 120 users min : 1, max : 100, + # Swapped to n2 machines (from n1) based on investigations in + # https://github.com/2i2c-org/infrastructure/issues/2121#issuecomment-1497387131 machine_type : "n2-highmem-16", labels : { "2i2c.org/community" : "neurohackademy" From 204a6fa5d4c93ce001b7498e0f33af4a176c6bf2 Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Tue, 4 Jul 2023 16:54:49 +0100 Subject: [PATCH 06/16] Define zones for user, worker and climatematch nodepools Resolves errors in https://github.com/2i2c-org/infrastructure/pull/2758#issuecomment-1620474575 --- terraform/gcp/projects/pilot-hubs.tfvars | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/terraform/gcp/projects/pilot-hubs.tfvars b/terraform/gcp/projects/pilot-hubs.tfvars index 356349a81c..86841f28e1 100644 --- a/terraform/gcp/projects/pilot-hubs.tfvars +++ b/terraform/gcp/projects/pilot-hubs.tfvars @@ -19,6 +19,7 @@ notebook_nodes = { min : 0, max : 20, machine_type : "n1-highmem-4", + zones : ["us-central1-b"] }, "climatematch" : { min : 0, @@ -34,7 +35,8 @@ notebook_nodes = { }], resource_labels : { "community" : "climatematch" - } + }, + zones : ["us-central1-b"] }, # Nodepool for neurohackademy. Tracking issue: https://github.com/2i2c-org/infrastructure/issues/2681 "neurohackademy" : { @@ -55,6 +57,7 @@ dask_nodes = { min : 0, max : 100, machine_type : "n1-highmem-4", + zones : ["us-central1-b"] } } From 0057015a46746d08adc1936f0d427109ce25af2e Mon Sep 17 00:00:00 2001 From: Sarah Gibson Date: Tue, 4 Jul 2023 17:01:34 +0100 Subject: [PATCH 07/16] Define zone for neurohackademy nodepool --- terraform/gcp/projects/pilot-hubs.tfvars | 1 + 1 file changed, 1 insertion(+) diff --git a/terraform/gcp/projects/pilot-hubs.tfvars b/terraform/gcp/projects/pilot-hubs.tfvars index 86841f28e1..5f1a53972e 100644 --- a/terraform/gcp/projects/pilot-hubs.tfvars +++ b/terraform/gcp/projects/pilot-hubs.tfvars @@ -49,6 +49,7 @@ notebook_nodes = { labels : { "2i2c.org/community" : "neurohackademy" }, + zones : ["us-central1-b"] } } From 658ec6c7f6eab711e0219beb96cb608c585cd683 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Wed, 5 Jul 2023 16:28:33 -0700 Subject: [PATCH 08/16] Specify node_locations correctly even when not set explicitly --- terraform/gcp/cluster.tf | 2 +- terraform/gcp/variables.tf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/terraform/gcp/cluster.tf b/terraform/gcp/cluster.tf index e270d09007..bba10eba0e 100644 --- a/terraform/gcp/cluster.tf +++ b/terraform/gcp/cluster.tf @@ -243,7 +243,7 @@ resource "google_container_node_pool" "notebook" { project = google_container_cluster.cluster.project version = var.k8s_versions.notebook_nodes_version - node_locations = each.value.zones == null ? google_container_cluster.cluster.node_locations : each.value.zones + node_locations = length(each.value.zones) == 0 ? [ var.zone ] : each.value.zones initial_node_count = each.value.min diff --git a/terraform/gcp/variables.tf b/terraform/gcp/variables.tf index c24c52e090..1e73fc92f7 100644 --- a/terraform/gcp/variables.tf +++ b/terraform/gcp/variables.tf @@ -84,7 +84,7 @@ variable "notebook_nodes" { {} ), resource_labels : optional(map(string), {}), - zones : optional(list(string), null) + zones : optional(list(string), []) })) description = "Notebook node pools to create" default = {} From ce7aa84365498480817b188b686e2f455648477d Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Wed, 5 Jul 2023 16:29:01 -0700 Subject: [PATCH 09/16] Remove explicit mention of node_locations --- terraform/gcp/projects/pilot-hubs.tfvars | 3 --- 1 file changed, 3 deletions(-) diff --git a/terraform/gcp/projects/pilot-hubs.tfvars b/terraform/gcp/projects/pilot-hubs.tfvars index 5f1a53972e..b69266f417 100644 --- a/terraform/gcp/projects/pilot-hubs.tfvars +++ b/terraform/gcp/projects/pilot-hubs.tfvars @@ -19,7 +19,6 @@ notebook_nodes = { min : 0, max : 20, machine_type : "n1-highmem-4", - zones : ["us-central1-b"] }, "climatematch" : { min : 0, @@ -36,7 +35,6 @@ notebook_nodes = { resource_labels : { "community" : "climatematch" }, - zones : ["us-central1-b"] }, # Nodepool for neurohackademy. Tracking issue: https://github.com/2i2c-org/infrastructure/issues/2681 "neurohackademy" : { @@ -49,7 +47,6 @@ notebook_nodes = { labels : { "2i2c.org/community" : "neurohackademy" }, - zones : ["us-central1-b"] } } From d50f3053db33d19e243c03303df5e1412f4b4362 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Wed, 5 Jul 2023 16:31:24 -0700 Subject: [PATCH 10/16] Switch back to using n1-highmem nodes We have no quota for n2-highmem nodes --- terraform/gcp/projects/pilot-hubs.tfvars | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/gcp/projects/pilot-hubs.tfvars b/terraform/gcp/projects/pilot-hubs.tfvars index b69266f417..9e5d1b1756 100644 --- a/terraform/gcp/projects/pilot-hubs.tfvars +++ b/terraform/gcp/projects/pilot-hubs.tfvars @@ -43,7 +43,7 @@ notebook_nodes = { max : 100, # Swapped to n2 machines (from n1) based on investigations in # https://github.com/2i2c-org/infrastructure/issues/2121#issuecomment-1497387131 - machine_type : "n2-highmem-16", + machine_type : "n1-highmem-16", labels : { "2i2c.org/community" : "neurohackademy" }, From 7ceb9f695ce450b034d309d9b240c5029434898d Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Wed, 5 Jul 2023 16:31:54 -0700 Subject: [PATCH 11/16] Setup taints and resource_labels for neurohackademy nodepool Follows https://infrastructure.2i2c.org/howto/features/dedicated-nodepool/ --- terraform/gcp/projects/pilot-hubs.tfvars | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/terraform/gcp/projects/pilot-hubs.tfvars b/terraform/gcp/projects/pilot-hubs.tfvars index 9e5d1b1756..39eb97d3ab 100644 --- a/terraform/gcp/projects/pilot-hubs.tfvars +++ b/terraform/gcp/projects/pilot-hubs.tfvars @@ -47,6 +47,14 @@ notebook_nodes = { labels : { "2i2c.org/community" : "neurohackademy" }, + taints : [{ + key : "2i2c.org/community", + value : "neurohackademy", + effect : "NO_SCHEDULE" + }], + resource_labels : { + "community" : "neurohackademy" + }, } } From ff0644e9c9f72f8cf911a00d6a3508b7cd33ee68 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 5 Jul 2023 23:32:53 +0000 Subject: [PATCH 12/16] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- terraform/gcp/cluster.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/gcp/cluster.tf b/terraform/gcp/cluster.tf index bba10eba0e..bda1494659 100644 --- a/terraform/gcp/cluster.tf +++ b/terraform/gcp/cluster.tf @@ -243,7 +243,7 @@ resource "google_container_node_pool" "notebook" { project = google_container_cluster.cluster.project version = var.k8s_versions.notebook_nodes_version - node_locations = length(each.value.zones) == 0 ? [ var.zone ] : each.value.zones + node_locations = length(each.value.zones) == 0 ? [var.zone] : each.value.zones initial_node_count = each.value.min From 64939e15bc9b72774e2868767eabaa95349d50e9 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Wed, 5 Jul 2023 16:37:10 -0700 Subject: [PATCH 13/16] Remove unnecessary zone specification in dask pool --- terraform/gcp/projects/pilot-hubs.tfvars | 1 - 1 file changed, 1 deletion(-) diff --git a/terraform/gcp/projects/pilot-hubs.tfvars b/terraform/gcp/projects/pilot-hubs.tfvars index 39eb97d3ab..4633013328 100644 --- a/terraform/gcp/projects/pilot-hubs.tfvars +++ b/terraform/gcp/projects/pilot-hubs.tfvars @@ -63,7 +63,6 @@ dask_nodes = { min : 0, max : 100, machine_type : "n1-highmem-4", - zones : ["us-central1-b"] } } From 0e3af26775a7fa4f9229a27dfb9d6e6ffde6f116 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Wed, 5 Jul 2023 17:49:21 -0700 Subject: [PATCH 14/16] Remove outdated comment about n2- machines --- terraform/gcp/projects/pilot-hubs.tfvars | 2 -- 1 file changed, 2 deletions(-) diff --git a/terraform/gcp/projects/pilot-hubs.tfvars b/terraform/gcp/projects/pilot-hubs.tfvars index 4633013328..dcabae1110 100644 --- a/terraform/gcp/projects/pilot-hubs.tfvars +++ b/terraform/gcp/projects/pilot-hubs.tfvars @@ -41,8 +41,6 @@ notebook_nodes = { # We expect around 120 users min : 1, max : 100, - # Swapped to n2 machines (from n1) based on investigations in - # https://github.com/2i2c-org/infrastructure/issues/2121#issuecomment-1497387131 machine_type : "n1-highmem-16", labels : { "2i2c.org/community" : "neurohackademy" From 2e8c067941e9a4371dff1c0145c9d4a1ce4692e4 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Wed, 5 Jul 2023 17:49:33 -0700 Subject: [PATCH 15/16] Apply node_locations fix to dask pools as well --- terraform/gcp/cluster.tf | 2 +- terraform/gcp/variables.tf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/terraform/gcp/cluster.tf b/terraform/gcp/cluster.tf index bda1494659..eab4d22ac4 100644 --- a/terraform/gcp/cluster.tf +++ b/terraform/gcp/cluster.tf @@ -342,7 +342,7 @@ resource "google_container_node_pool" "dask_worker" { project = google_container_cluster.cluster.project version = var.k8s_versions.dask_nodes_version - node_locations = each.value.zones == null ? google_container_cluster.cluster.node_locations : each.value.zones + node_locations = length(each.value.zones) == 0 ? [var.zone] : each.value.zones # Default to same config as notebook nodepools config for_each = var.dask_nodes diff --git a/terraform/gcp/variables.tf b/terraform/gcp/variables.tf index 1e73fc92f7..4bdfd27d27 100644 --- a/terraform/gcp/variables.tf +++ b/terraform/gcp/variables.tf @@ -111,7 +111,7 @@ variable "dask_nodes" { {} ), resource_labels : optional(map(string), {}), - zones : optional(list(string), null) + zones : optional(list(string), []) })) description = "Dask node pools to create. Defaults to notebook_nodes" default = {} From ee3d7d00757a251884f42e2362aa45c4d562fb15 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Wed, 5 Jul 2023 17:49:52 -0700 Subject: [PATCH 16/16] Set neurohackademy minimum node pool to 0, not 1 --- terraform/gcp/projects/pilot-hubs.tfvars | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/gcp/projects/pilot-hubs.tfvars b/terraform/gcp/projects/pilot-hubs.tfvars index dcabae1110..b43cf9d96c 100644 --- a/terraform/gcp/projects/pilot-hubs.tfvars +++ b/terraform/gcp/projects/pilot-hubs.tfvars @@ -39,7 +39,7 @@ notebook_nodes = { # Nodepool for neurohackademy. Tracking issue: https://github.com/2i2c-org/infrastructure/issues/2681 "neurohackademy" : { # We expect around 120 users - min : 1, + min : 0, max : 100, machine_type : "n1-highmem-16", labels : {