From da12177c07cfc9961074deaa8cff2cc5c4a9f627 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Wed, 22 Mar 2023 19:29:47 +0530 Subject: [PATCH 01/10] Add ability to give users / groups write access to buckets GCS allows individual Google Users as well as Google Groups to have permissions to read / write to GCS buckets (unlike AWS). We can use this to allow community leaders to manage who can read and write to GCS buckets from outside the cloud by managing membership in a Google Group! In this commit, we set up the persistent buckets of the LEAP hubs to have this functionality. Access is managed via a Google Group - I have temporarily created this under the 2i2c org and invited Julius (the community champion) as an administrator. But perhaps it should be just created as a regular google group. Using groups here allows management of this access to not require any 2i2c engineering work. Future work would probably fold the separate variable we have for determining if a bucket is accessible publicly as an attribute as well. Ref https://github.com/2i2c-org/infrastructure/issues/2096 --- terraform/gcp/buckets.tf | 16 ++++++++++++++++ terraform/gcp/projects/leap.tfvars | 12 ++++++++---- terraform/gcp/variables.tf | 15 +++++++++++---- 3 files changed, 35 insertions(+), 8 deletions(-) diff --git a/terraform/gcp/buckets.tf b/terraform/gcp/buckets.tf index 8102e24ac0..44cb8b9400 100644 --- a/terraform/gcp/buckets.tf +++ b/terraform/gcp/buckets.tf @@ -36,6 +36,15 @@ locals { } ] ])) + + bucket_extra_admin_members = distinct(flatten([ + for bucket_name, properties in var.user_buckets : [ + for extra_member in properties.extra_admin_members : { + bucket_name = bucket_name + member = extra_member + } + ] + ])) } resource "google_storage_bucket_iam_member" "member" { @@ -45,6 +54,13 @@ resource "google_storage_bucket_iam_member" "member" { member = "serviceAccount:${google_service_account.workload_sa[each.value.hub_name].email}" } +resource "google_storage_bucket_iam_member" "extra_admin_members" { + for_each = { for bm in local.bucket_extra_admin_members : "${bm.bucket_name}.${bm.member}" => bm } + bucket = google_storage_bucket.user_buckets[each.value.bucket_name].name + role = "roles/storage.admin" + member = each.value.member +} + resource "google_storage_default_object_access_control" "public_rule" { for_each = toset(var.bucket_public_access) bucket = google_storage_bucket.user_buckets[each.key].name diff --git a/terraform/gcp/projects/leap.tfvars b/terraform/gcp/projects/leap.tfvars index 2b57c6819c..1c09b4f577 100644 --- a/terraform/gcp/projects/leap.tfvars +++ b/terraform/gcp/projects/leap.tfvars @@ -25,17 +25,21 @@ filestore_capacity_gb = 2048 user_buckets = { "scratch-staging" : { - "delete_after" : 7 + "delete_after" : 7, + "extra_admin_members": [] }, "scratch" : { - "delete_after" : 7 + "delete_after" : 7, + "extra_admin_members": [] } # For https://github.com/2i2c-org/infrastructure/issues/1230#issuecomment-1278183441 "persistent" : { - "delete_after" : null + "delete_after" : null, + "extra_admin_members": ["group:leap-external-bucket-users@2i2c.org"] }, "persistent-staging" : { - "delete_after" : null + "delete_after" : null, + "extra_admin_members": ["group:leap-external-bucket-users@2i2c.org"] } } diff --git a/terraform/gcp/variables.tf b/terraform/gcp/variables.tf index 50ea0c12b7..1e143ca8b5 100644 --- a/terraform/gcp/variables.tf +++ b/terraform/gcp/variables.tf @@ -198,7 +198,7 @@ variable "enable_network_policy" { } variable "user_buckets" { - type = map(object({ delete_after : number })) + type = map(object({ delete_after : number, extra_admin_members: list(string) })) default = {} description = <<-EOT GCS Buckets to be created. @@ -206,9 +206,16 @@ variable "user_buckets" { The key for each entry will be prefixed with {var.prefix}- to form the name of the bucket. - The value is a map, with 'delete_after' the only accepted key in that - map - it lists the number of days after which any content in the - bucket will be deleted. Set to null to not delete data. + The value is a map, accepting the following keys: + + 'delete_after' specifies the number of days after which any content + in the bucket will be deleted. Set to null to not delete data. + + 'extra_admin_members' describes extra identies (user groups, user accounts, + service accounts, etc) that will have *full* access to this bucket. This + is primarily useful for moving data into and out of buckets from outside + the cloud. See https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/storage_bucket_iam#member/members + for the format this would be specified in. EOT } From c0636a01fedc236f6995643ce2d1d4bbe473d93a Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Mon, 3 Apr 2023 21:00:20 +0530 Subject: [PATCH 02/10] Switch to using a regular google group The previous 2i2c.org one could not be managed by users outside the 2i2c.org org it looks like. --- terraform/gcp/projects/leap.tfvars | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/terraform/gcp/projects/leap.tfvars b/terraform/gcp/projects/leap.tfvars index 1c09b4f577..22763dbf23 100644 --- a/terraform/gcp/projects/leap.tfvars +++ b/terraform/gcp/projects/leap.tfvars @@ -35,11 +35,11 @@ user_buckets = { # For https://github.com/2i2c-org/infrastructure/issues/1230#issuecomment-1278183441 "persistent" : { "delete_after" : null, - "extra_admin_members": ["group:leap-external-bucket-users@2i2c.org"] + "extra_admin_members": ["group:leap-persistent-bucket-writers@googlegroups.com"] }, "persistent-staging" : { "delete_after" : null, - "extra_admin_members": ["group:leap-external-bucket-users@2i2c.org"] + "extra_admin_members": ["group:leap-persistent-bucket-writers@googlegroups.com"] } } From 34753eb148455e4c1c9fffb85ea275fafdc212f8 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Wed, 5 Apr 2023 14:37:47 +0530 Subject: [PATCH 03/10] Add extra_admin_members for m2lines too --- terraform/gcp/projects/m2lines.tfvars | 6 ++++-- terraform/gcp/variables.tf | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/terraform/gcp/projects/m2lines.tfvars b/terraform/gcp/projects/m2lines.tfvars index d4a4b65369..e9317251cd 100644 --- a/terraform/gcp/projects/m2lines.tfvars +++ b/terraform/gcp/projects/m2lines.tfvars @@ -22,10 +22,12 @@ user_buckets = { }, # For https://2i2c.freshdesk.com/a/tickets/218 "persistent": { - "delete_after": null + "delete_after": null, + "extra_admin_members": ["group:m2lines-persistent-bucket-writers@googlegroups.com"] }, "persistent-staging": { - "delete_after": null + "delete_after": null, + "extra_admin_members": ["group:m2lines-persistent-bucket-writers@googlegroups.com"] }, "public-persistent": { "delete_after": null diff --git a/terraform/gcp/variables.tf b/terraform/gcp/variables.tf index 1e143ca8b5..82fdb48ffd 100644 --- a/terraform/gcp/variables.tf +++ b/terraform/gcp/variables.tf @@ -198,7 +198,7 @@ variable "enable_network_policy" { } variable "user_buckets" { - type = map(object({ delete_after : number, extra_admin_members: list(string) })) + type = map(object({ delete_after : number, extra_admin_members: optional(list(string), []) })) default = {} description = <<-EOT GCS Buckets to be created. From 3e2827fcd7f720b08bc6c623ff08b4083e377228 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Thu, 6 Apr 2023 00:33:01 +0530 Subject: [PATCH 04/10] Give rights to the public bucket to m2lines group too --- terraform/gcp/projects/m2lines.tfvars | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/terraform/gcp/projects/m2lines.tfvars b/terraform/gcp/projects/m2lines.tfvars index e9317251cd..5d39725d4b 100644 --- a/terraform/gcp/projects/m2lines.tfvars +++ b/terraform/gcp/projects/m2lines.tfvars @@ -30,7 +30,8 @@ user_buckets = { "extra_admin_members": ["group:m2lines-persistent-bucket-writers@googlegroups.com"] }, "public-persistent": { - "delete_after": null + "delete_after": null, + "extra_admin_members": ["group:m2lines-persistent-bucket-writers@googlegroups.com"] }, } From 850c85d6022e29e1bda689831b000aa0c875bd50 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Thu, 18 May 2023 19:55:15 +0530 Subject: [PATCH 05/10] Add support for readonly buckets --- terraform/gcp/buckets.tf | 20 ++++++++++++++++++-- terraform/gcp/projects/leap.tfvars | 11 +++++++++++ terraform/gcp/variables.tf | 9 ++++++++- 3 files changed, 37 insertions(+), 3 deletions(-) diff --git a/terraform/gcp/buckets.tf b/terraform/gcp/buckets.tf index 44cb8b9400..dd1caea3a1 100644 --- a/terraform/gcp/buckets.tf +++ b/terraform/gcp/buckets.tf @@ -28,7 +28,7 @@ resource "google_storage_bucket" "user_buckets" { locals { # Nested for loop, thanks to https://www.daveperrett.com/articles/2021/08/19/nested-for-each-with-terraform/ - bucket_permissions = distinct(flatten([ + bucket_admin_permissions = distinct(flatten([ for hub_name, permissions in var.hub_cloud_permissions : [ for bucket_name in permissions.bucket_admin_access : { hub_name = hub_name @@ -37,6 +37,15 @@ locals { ] ])) + bucket_readonly_permissions = distinct(flatten([ + for hub_name, permissions in var.hub_cloud_permissions : [ + for bucket_name in permissions.bucket_readonly_access : { + hub_name = hub_name + bucket_name = bucket_name + } + ] + ])) + bucket_extra_admin_members = distinct(flatten([ for bucket_name, properties in var.user_buckets : [ for extra_member in properties.extra_admin_members : { @@ -48,12 +57,19 @@ locals { } resource "google_storage_bucket_iam_member" "member" { - for_each = { for bp in local.bucket_permissions : "${bp.hub_name}.${bp.bucket_name}" => bp } + for_each = { for bp in local.bucket_admin_permissions : "${bp.hub_name}.${bp.bucket_name}" => bp } bucket = google_storage_bucket.user_buckets[each.value.bucket_name].name role = "roles/storage.admin" member = "serviceAccount:${google_service_account.workload_sa[each.value.hub_name].email}" } +resource "google_storage_bucket_iam_member" "member_readonly" { + for_each = { for bp in local.bucket_admin_permissions : "${bp.hub_name}.${bp.bucket_name}" => bp } + bucket = google_storage_bucket.user_buckets[each.value.bucket_name].name + role = "roles/storage.objectViewer" + member = "serviceAccount:${google_service_account.workload_sa[each.value.hub_name].email}" +} + resource "google_storage_bucket_iam_member" "extra_admin_members" { for_each = { for bm in local.bucket_extra_admin_members : "${bm.bucket_name}.${bm.member}" => bm } bucket = google_storage_bucket.user_buckets[each.value.bucket_name].name diff --git a/terraform/gcp/projects/leap.tfvars b/terraform/gcp/projects/leap.tfvars index 22763dbf23..d221997872 100644 --- a/terraform/gcp/projects/leap.tfvars +++ b/terraform/gcp/projects/leap.tfvars @@ -41,17 +41,28 @@ user_buckets = { "delete_after" : null, "extra_admin_members": ["group:leap-persistent-bucket-writers@googlegroups.com"] } + # For https://github.com/2i2c-org/infrastructure/issues/1230#issuecomment-1278183441 + "persistent-ro" : { + "delete_after" : null, + "extra_admin_members": ["group:leap-persistent-bucket-writers@googlegroups.com"] + }, + "persistent-ro-staging" : { + "delete_after" : null, + "extra_admin_members": ["group:leap-persistent-bucket-writers@googlegroups.com"] + } } hub_cloud_permissions = { "staging" : { requestor_pays : true, bucket_admin_access : ["scratch-staging", "persistent-staging"], + bucket_readonly_access: ["persistent-ro-staging"], hub_namespace : "staging" }, "prod" : { requestor_pays : true, bucket_admin_access : ["scratch", "persistent"], + bucket_readonly_access: ["persistent-ro"], hub_namespace : "prod" } } diff --git a/terraform/gcp/variables.tf b/terraform/gcp/variables.tf index 82fdb48ffd..49b391c871 100644 --- a/terraform/gcp/variables.tf +++ b/terraform/gcp/variables.tf @@ -323,7 +323,14 @@ variable "max_cpu" { } variable "hub_cloud_permissions" { - type = map(object({ requestor_pays : bool, bucket_admin_access : set(string), hub_namespace : string })) + type = map( + object({ + requestor_pays : bool, + bucket_admin_access : set(string), + bucket_readonly_access: optional(set(string), []), + hub_namespace : string + }) + ) default = {} description = <<-EOT Map of cloud permissions given to a particular hub From 5a514f3668d46a7c2d580e1ed48ea65cd3d0e788 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Fri, 19 May 2023 13:18:30 +0530 Subject: [PATCH 06/10] Fix typo --- terraform/gcp/buckets.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/gcp/buckets.tf b/terraform/gcp/buckets.tf index dd1caea3a1..47be777e28 100644 --- a/terraform/gcp/buckets.tf +++ b/terraform/gcp/buckets.tf @@ -64,7 +64,7 @@ resource "google_storage_bucket_iam_member" "member" { } resource "google_storage_bucket_iam_member" "member_readonly" { - for_each = { for bp in local.bucket_admin_permissions : "${bp.hub_name}.${bp.bucket_name}" => bp } + for_each = { for bp in local.bucket_readonly_permissions : "${bp.hub_name}.${bp.bucket_name}" => bp } bucket = google_storage_bucket.user_buckets[each.value.bucket_name].name role = "roles/storage.objectViewer" member = "serviceAccount:${google_service_account.workload_sa[each.value.hub_name].email}" From 58281ceced305590191b4a33daa408b4de0bcb48 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Tue, 13 Jun 2023 22:23:56 +0530 Subject: [PATCH 07/10] Deploy GPU nodes across more zones In https://2i2c.freshdesk.com/a/tickets/764, LEAP users are running out of GPU on the one zone their notebook nodes are in. This just expands that to all possible zones just for GPU nodes, to maximize the amount of GPUs made available to them. This comes at the cost of home directory access maybe being slightly slower, but that's ok. --- terraform/gcp/cluster.tf | 8 +++++--- terraform/gcp/projects/leap.tfvars | 10 +++++++++- terraform/gcp/variables.tf | 24 ++++++++++++++++++++++-- 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/terraform/gcp/cluster.tf b/terraform/gcp/cluster.tf index d52484f500..c63834e359 100644 --- a/terraform/gcp/cluster.tf +++ b/terraform/gcp/cluster.tf @@ -236,13 +236,15 @@ resource "google_container_node_pool" "core" { # resource ref: https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/container_node_pool resource "google_container_node_pool" "notebook" { + for_each = var.notebook_nodes + name = "nb-${each.key}" cluster = google_container_cluster.cluster.name project = google_container_cluster.cluster.project - location = google_container_cluster.cluster.location version = var.k8s_versions.notebook_nodes_version - for_each = var.notebook_nodes + node_locations = each.value.zones == null ? google_container_cluster.cluster.node_locations : each.value.zones + initial_node_count = each.value.min autoscaling { @@ -335,9 +337,9 @@ resource "google_container_node_pool" "dask_worker" { name = "dask-${each.key}" cluster = google_container_cluster.cluster.name project = google_container_cluster.cluster.project - location = google_container_cluster.cluster.location version = var.k8s_versions.dask_nodes_version + node_locations = each.value.zones == null ? google_container_cluster.cluster.node_locations : each.value.zones # Default to same config as notebook nodepools config for_each = var.dask_nodes diff --git a/terraform/gcp/projects/leap.tfvars b/terraform/gcp/projects/leap.tfvars index d221997872..43db7f523b 100644 --- a/terraform/gcp/projects/leap.tfvars +++ b/terraform/gcp/projects/leap.tfvars @@ -91,7 +91,15 @@ notebook_nodes = { enabled : true, type : "nvidia-tesla-t4", count : 1 - } + }, + zones: [ + # Get GPUs wherever they are available, as sometimes a single + # zone might be out of GPUs. + "us-central1-a", + "us-central1-b", + "us-central1-c", + "us-central1-f" + ] }, } diff --git a/terraform/gcp/variables.tf b/terraform/gcp/variables.tf index 49b391c871..00e451afc6 100644 --- a/terraform/gcp/variables.tf +++ b/terraform/gcp/variables.tf @@ -55,7 +55,20 @@ variable "k8s_versions" { } variable "notebook_nodes" { - type = map(object({ min : number, max : number, machine_type : string, labels : map(string), gpu : object({ enabled : bool, type : string, count : number }) })) + type = map(object({ + min : number, + max : number, + machine_type : string, + labels : map(string), + taints : optional(list(object({ + key : string, + value : string, + effect : string + })), []) + gpu : object({ enabled : bool, type : string, count : number }), + resource_labels : optional(map(string), {}), + zones : optional(list(string), null) + })) description = "Notebook node pools to create" default = {} } @@ -67,7 +80,14 @@ variable "dask_nodes" { preemptible: optional(bool, true), machine_type : string, labels : map(string), - gpu : object({ enabled : bool, type : string, count : number }) + taints : optional(list(object({ + key : string, + value : string, + effect : string + })), []) + gpu : object({ enabled : bool, type : string, count : number }), + resource_labels : optional(map(string), {}), + zones : optional(list(string), null) })) description = "Dask node pools to create. Defaults to notebook_nodes" default = {} From 954057f1f03692fafbcc6e2aab8af13bf086f921 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 27 Jun 2023 02:20:52 +0000 Subject: [PATCH 08/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- terraform/gcp/buckets.tf | 2 +- terraform/gcp/cluster.tf | 16 ++++++++-------- terraform/gcp/projects/leap.tfvars | 18 +++++++++--------- terraform/gcp/projects/m2lines.tfvars | 18 +++++++++--------- terraform/gcp/variables.tf | 6 +++--- 5 files changed, 30 insertions(+), 30 deletions(-) diff --git a/terraform/gcp/buckets.tf b/terraform/gcp/buckets.tf index 30db3a7c6e..59044a42bc 100644 --- a/terraform/gcp/buckets.tf +++ b/terraform/gcp/buckets.tf @@ -50,7 +50,7 @@ locals { for bucket_name, properties in var.user_buckets : [ for extra_member in properties.extra_admin_members : { bucket_name = bucket_name - member = extra_member + member = extra_member } ] ])) diff --git a/terraform/gcp/cluster.tf b/terraform/gcp/cluster.tf index 69c6571cf5..e270d09007 100644 --- a/terraform/gcp/cluster.tf +++ b/terraform/gcp/cluster.tf @@ -238,10 +238,10 @@ resource "google_container_node_pool" "core" { resource "google_container_node_pool" "notebook" { for_each = var.notebook_nodes - name = "nb-${each.key}" - cluster = google_container_cluster.cluster.name - project = google_container_cluster.cluster.project - version = var.k8s_versions.notebook_nodes_version + name = "nb-${each.key}" + cluster = google_container_cluster.cluster.name + project = google_container_cluster.cluster.project + version = var.k8s_versions.notebook_nodes_version node_locations = each.value.zones == null ? google_container_cluster.cluster.node_locations : each.value.zones @@ -337,10 +337,10 @@ resource "google_container_node_pool" "notebook" { # resource ref: https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/container_node_pool resource "google_container_node_pool" "dask_worker" { - name = "dask-${each.key}" - cluster = google_container_cluster.cluster.name - project = google_container_cluster.cluster.project - version = var.k8s_versions.dask_nodes_version + name = "dask-${each.key}" + cluster = google_container_cluster.cluster.name + project = google_container_cluster.cluster.project + version = var.k8s_versions.dask_nodes_version node_locations = each.value.zones == null ? google_container_cluster.cluster.node_locations : each.value.zones # Default to same config as notebook nodepools config diff --git a/terraform/gcp/projects/leap.tfvars b/terraform/gcp/projects/leap.tfvars index 286342b883..0382edd486 100644 --- a/terraform/gcp/projects/leap.tfvars +++ b/terraform/gcp/projects/leap.tfvars @@ -26,29 +26,29 @@ filestore_capacity_gb = 2048 user_buckets = { "scratch-staging" : { "delete_after" : 7, - "extra_admin_members": [] + "extra_admin_members" : [] }, "scratch" : { "delete_after" : 7, - "extra_admin_members": [] + "extra_admin_members" : [] } # For https://github.com/2i2c-org/infrastructure/issues/1230#issuecomment-1278183441 "persistent" : { "delete_after" : null, - "extra_admin_members": ["group:leap-persistent-bucket-writers@googlegroups.com"] + "extra_admin_members" : ["group:leap-persistent-bucket-writers@googlegroups.com"] }, "persistent-staging" : { "delete_after" : null, - "extra_admin_members": ["group:leap-persistent-bucket-writers@googlegroups.com"] + "extra_admin_members" : ["group:leap-persistent-bucket-writers@googlegroups.com"] } # For https://github.com/2i2c-org/infrastructure/issues/1230#issuecomment-1278183441 "persistent-ro" : { "delete_after" : null, - "extra_admin_members": ["group:leap-persistent-bucket-writers@googlegroups.com"] + "extra_admin_members" : ["group:leap-persistent-bucket-writers@googlegroups.com"] }, "persistent-ro-staging" : { "delete_after" : null, - "extra_admin_members": ["group:leap-persistent-bucket-writers@googlegroups.com"] + "extra_admin_members" : ["group:leap-persistent-bucket-writers@googlegroups.com"] } } @@ -56,13 +56,13 @@ hub_cloud_permissions = { "staging" : { requestor_pays : true, bucket_admin_access : ["scratch-staging", "persistent-staging"], - bucket_readonly_access: ["persistent-ro-staging"], + bucket_readonly_access : ["persistent-ro-staging"], hub_namespace : "staging" }, "prod" : { requestor_pays : true, bucket_admin_access : ["scratch", "persistent"], - bucket_readonly_access: ["persistent-ro"], + bucket_readonly_access : ["persistent-ro"], hub_namespace : "prod" } } @@ -85,7 +85,7 @@ notebook_nodes = { type : "nvidia-tesla-t4", count : 1 }, - zones: [ + zones : [ # Get GPUs wherever they are available, as sometimes a single # zone might be out of GPUs. "us-central1-a", diff --git a/terraform/gcp/projects/m2lines.tfvars b/terraform/gcp/projects/m2lines.tfvars index 58433842c0..bf4c67d9ff 100644 --- a/terraform/gcp/projects/m2lines.tfvars +++ b/terraform/gcp/projects/m2lines.tfvars @@ -21,17 +21,17 @@ user_buckets = { "delete_after" : 7 }, # For https://2i2c.freshdesk.com/a/tickets/218 - "persistent": { - "delete_after": null, - "extra_admin_members": ["group:m2lines-persistent-bucket-writers@googlegroups.com"] + "persistent" : { + "delete_after" : null, + "extra_admin_members" : ["group:m2lines-persistent-bucket-writers@googlegroups.com"] }, - "persistent-staging": { - "delete_after": null, - "extra_admin_members": ["group:m2lines-persistent-bucket-writers@googlegroups.com"] + "persistent-staging" : { + "delete_after" : null, + "extra_admin_members" : ["group:m2lines-persistent-bucket-writers@googlegroups.com"] }, - "public-persistent": { - "delete_after": null, - "extra_admin_members": ["group:m2lines-persistent-bucket-writers@googlegroups.com"] + "public-persistent" : { + "delete_after" : null, + "extra_admin_members" : ["group:m2lines-persistent-bucket-writers@googlegroups.com"] }, } diff --git a/terraform/gcp/variables.tf b/terraform/gcp/variables.tf index f11a500c3a..80d674d1db 100644 --- a/terraform/gcp/variables.tf +++ b/terraform/gcp/variables.tf @@ -225,7 +225,7 @@ variable "enable_network_policy" { } variable "user_buckets" { - type = map(object({ delete_after : number, extra_admin_members: optional(list(string), []) })) + type = map(object({ delete_after : number, extra_admin_members : optional(list(string), []) })) default = {} description = <<-EOT GCS Buckets to be created. @@ -350,11 +350,11 @@ variable "max_cpu" { } variable "hub_cloud_permissions" { - type = map( + type = map( object({ requestor_pays : bool, bucket_admin_access : set(string), - bucket_readonly_access: optional(set(string), []), + bucket_readonly_access : optional(set(string), []), hub_namespace : string }) ) From e9623e89a066852ad8f381d8621b26e925434e8f Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Mon, 26 Jun 2023 19:52:55 -0700 Subject: [PATCH 09/10] Add docs on GPU setup on GCP --- docs/howto/features/gpu.md | 68 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 2 deletions(-) diff --git a/docs/howto/features/gpu.md b/docs/howto/features/gpu.md index 77385d5c00..0af5c6039d 100644 --- a/docs/howto/features/gpu.md +++ b/docs/howto/features/gpu.md @@ -6,6 +6,70 @@ GPUs on all major cloud providers. ## Setting up GPU nodes +### GCP + +#### Requesting quota increase + +New GCP projects start with no GPU quota, so we must ask for some to enable +GPUs. + +1. Go to the [GCP Quotas page](https://console.cloud.google.com/apis/api/compute.googleapis.com/quotas), + **and make sure you are in the right project**. + +2. Search for "NVIDIA T4 GPU", and find the entry for the **correct region**. + This is very important, as getting a quota increase in the wrong region means + we have to do this all over again. + +3. Check the box next to the correct quota, and click "Edit Quotas" button + just above the list. + +4. Enter the number of GPUs we want quota for on the right. For a brand new + project, 4 is a good starting number. We can consistently ask for more, + if these get used. GCP requires we provide a description for this quota + increase request - "We need GPUs to work on some ML based research" is + a good start. + +5. Click "Next", and then "Submit Request". + +6. Sometimes the request is immediately granted, other times it takes a few + days. + +#### Setting up GPU nodepools with terraform + +The `notebook_nodes` variable for our GCP terraform accepts a `gpu` +parameter, which can be used to provision a GPU nodepool. An example +would look like: + +```terraform +notebook_nodes = { + "gpu-t4": { + min: 0, + max: 20, + machine_type: "n1-highmem-8", + gpu: { + enabled: true, + type: "nvidia-tesla-t4", + count: 1 + }, + # Optional, in case we run into resource exhaustion in the main zone + zones: [ + "us-central1-a", + "us-central1-b", + "us-central1-c", + "us-central1-f" + ] + } +} +``` + +This provisions a `n1-highmem-8` node, where each node has 1 NVidia +T4 GPU. + +In addition, we could ask for GPU nodes to be spawned in whatever zone +available in the same region, rather than just the same zone as the rest +of our notebook nodes. This should only be used if we run into GPU scarcity +issues in the zone! + ### AWS #### Requesting Quota Increase @@ -78,7 +142,7 @@ AWS, and we can configure a node group there to provide us GPUs. autoscaler should recognize this! `eksctl` will also setup the appropriate driver installer, so you won't have to. -#### Setting up a GPU user profile +## Setting up a GPU user profile Finally, we need to give users the option of using the GPU via a profile. This should be placed in the hub configuration: @@ -141,7 +205,7 @@ jupyterhub: Do a deployment with this config, and then we can test to make sure this works! -#### Testing +## Testing 1. Login to the hub, and start a server with the GPU profile you just set up. From a8925d526a635f0b9d907ec70e8a3a3e6db9a666 Mon Sep 17 00:00:00 2001 From: YuviPanda Date: Mon, 26 Jun 2023 20:08:02 -0700 Subject: [PATCH 10/10] Add docs on how to enable extra_admin_members --- docs/howto/features/buckets.md | 44 +++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/docs/howto/features/buckets.md b/docs/howto/features/buckets.md index 2342bb2f4d..dd8efa0617 100644 --- a/docs/howto/features/buckets.md +++ b/docs/howto/features/buckets.md @@ -80,4 +80,46 @@ on why users want this! You can also add other env vars pointing to other buckets users requested. 5. Get this change deployed, and users should now be able to use the buckets! - Currently running users might have to restart their pods for the change to take effect. \ No newline at end of file + Currently running users might have to restart their pods for the change to take effect. + + +## Allowing access to buckets from outside the JupyterHub + +### GCP + +Some hub users want to be able to write to the bucket from outside the hub, +primarily for large data transfer from on-premise systems. Since +[Google Groups](https://groups.google.com) can be used to control access to +GCS buckets, it can be used to allow arbitrary users to write to the bucket! + +1. With your `2i2c.org` google account, go to [Google Groups](https://groups.google.com) and create a new Google Group with the name + "-writers", where "" is the name of the bucket + we are going to grant write access to. + +2. Grant "Group Owner" access to the community champion requesting this feature. + They will be able to add / remove users from the group as necessary, and + thus manage access without needing to involve 2i2c engineers. + +3. In the `user_buckets` definition for the bucket in question, add the group + name as an `extra_admin_members`: + + ```terraform + user_buckets = { + "persistent": { + "delete_after": null, + "extra_admin_members": [ + "group:@googlegroups.com" + ] + } + } + ``` + + Apply this terraform change to create the appropriate permissions for members + of the group to have full read/write access to that GCS bucket. + +4. We want the community champions to handle granting / revoking access to + this google group, as well as produce community specific documentation on + how to actually upload data here. We currently do not have a template of + how end users can use this, but something can be stolen from the + [documentation for LEAP users](https://leap-stc.github.io/leap-pangeo/jupyterhub.html#i-have-a-dataset-and-want-to-work-with-it-on-the-hub-how-do-i-upload-it) +