Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

daskhub: provide worker resource options for 16CPU/128GB nodes on GKE/EKS #3344

Merged
merged 8 commits into from
Nov 20, 2023
62 changes: 0 additions & 62 deletions config/clusters/jupyter-meets-the-earth/common.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -284,65 +284,3 @@ dask-gateway:
memory:
request: 2G
limit: 500G

# Note that we are overriding options provided in 2i2c's helm chart that has
# default values for these config entries.
#
extraConfig:
# This configuration represents options that can be presented to users
# that want to create a Dask cluster using dask-gateway. For more
# details, see https://gateway.dask.org/cluster-options.html
#
# The goal is to provide a simple configuration that allow the user some
# flexibility while also fitting well well on AWS nodes that are all
# having 1:4 ratio between CPU and GB of memory. By providing the
# username label, we help administrators to track user pods.
option_handler: |
from dask_gateway_server.options import Options, Select, String, Mapping
def cluster_options(user):
def option_handler(options):
if ":" not in options.image:
raise ValueError("When specifying an image you must also provide a tag")
extra_labels = {}
scheduler_extra_pod_annotations = {
"prometheus.io/scrape": "true",
"prometheus.io/port": "8787",
}
chosen_worker_cpu = int(options.worker_specification.split("CPU")[0])
chosen_worker_memory = 4 * chosen_worker_cpu
# We multiply the requests by a fraction to ensure that the
# worker fit well within a node that need some resources
# reserved for system pods.
return {
# A default image is suggested via DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE env variable
"image": options.image,
"scheduler_extra_pod_labels": extra_labels,
"scheduler_extra_pod_annotations": scheduler_extra_pod_annotations,
"worker_extra_pod_labels": extra_labels,
"worker_cores": 0.85 * chosen_worker_cpu,
"worker_cores_limit": chosen_worker_cpu,
"worker_memory": "%fG" % (0.85 * chosen_worker_memory),
"worker_memory_limit": "%fG" % chosen_worker_memory,
"environment": options.environment,
}
return Options(
Select(
"worker_specification",
[
"1CPU, 4GB",
"2CPU, 8GB",
"4CPU, 16GB",
"8CPU, 32GB",
"16CPU, 64GB",
"32CPU, 128GB",
"64CPU, 256GB",
],
default="1CPU, 4GB",
label="Worker specification",
),
# The default image is set via DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE env variable
String("image", label="Image"),
Mapping("environment", {}, label="Environment variables"),
handler=option_handler,
)
c.Backend.cluster_options = cluster_options
19 changes: 19 additions & 0 deletions helm-charts/basehub/templates/configmap-cluster-info.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
kind: ConfigMap
apiVersion: v1
metadata:
name: basehub-cluster-info
labels:
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
app.kubernetes.io/name: basehub
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
data:
{{- $k8s_dist := "" }}
{{- if (.Capabilities.KubeVersion.Version | contains "gke") }}
{{- $k8s_dist = "gke" }}
{{- else if (.Capabilities.KubeVersion.Version | contains "eks") }}
{{- $k8s_dist = "eks" }}
{{- else }}
{{- $k8s_dist = "aks" }}
{{- end }}
K8S_DIST: {{ $k8s_dist }}
6 changes: 6 additions & 0 deletions helm-charts/basehub/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,12 @@ jupyterhub:
- value: "/rstudio"
title: RStudio
description: An IDE For R, created by the RStudio company
extraEnv:
BASEHUB_K8S_DIST:
valueFrom:
configMapKeyRef:
name: basehub-cluster-info
key: K8S_DIST
initContainers:
- name: templates-clone
image: alpine/git:2.40.1
Expand Down
142 changes: 126 additions & 16 deletions helm-charts/daskhub/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,32 @@ dask-gateway:
nodeSelector:
# Dask workers get their own pre-emptible pool
k8s.dask.org/node-purpose: worker
env:
- name: BASEHUB_K8S_DIST
valueFrom:
configMapKeyRef:
name: basehub-cluster-info
key: K8S_DIST

# TODO: figure out a replacement for userLimits.
extraConfig:
# This configuration represents options that can be presented to users
# that want to create a Dask cluster using dask-gateway client.
#
# This configuration is meant to enable the user to request dask worker
# pods that fits well on 2i2c's clusters. Currently the only kind of
# instance types used are n2-highmem-16 or r5.4xlarge.
#
# - Documentation about exposing cluster options to users:
# https://gateway.dask.org/cluster-options.html and the
# - Reference for KubeClusterConfig, which is what can be configured:
# https://gateway.dask.org/api-server.html#kubeclusterconfig.
#
optionHandler: |
from dask_gateway_server.options import Options, Integer, Float, String, Mapping
import os
import string

from dask_gateway_server.options import Integer, Mapping, Options, Select, String

# Escape a string to be dns-safe in the same way that KubeSpawner does it.
# Reference https://github.com/jupyterhub/kubespawner/blob/616f72c4aee26c3d2127c6af6086ec50d6cda383/kubespawner/spawner.py#L1828-L1835
# Adapted from https://github.com/minrk/escapism to avoid installing the package
Expand All @@ -177,40 +196,131 @@ dask-gateway:
chars.append(escaped_hex_char)
return u''.join(chars)

# Decide on available instance types and their resource allocation
# choices to expose based on cloud provider. For each daskhub hub
# managed by 2i2c, there should be these instance types available.
GeorgianaElena marked this conversation as resolved.
Show resolved Hide resolved
#
cloud_provider = os.environ["BASEHUB_K8S_DIST"] # gke, eks, or aks
instance_types = {
"gke": ["n2-highmem-16"],
"eks": ["r5.4xlarge"],
# 2i2c doesn't yet manage any dask-gateway installations on AKS, so
# this hasn't been configured yet and may cause an error - but that
# is good as we really should have this if we setup dask-gateway for
# AKS anyhow.
# aks: [],
}

# NOTE: Data mentioned below comes from manual inspection of data
# collected and currently only available at
# https://github.com/2i2c-org/infrastructure/pull/3337.
#
resource_allocations = {
# n2-highmem-16 nodes in our clusters have 15.89 allocatable cores
# and 116.549Gi allocatable memory, and daemonset are expected to
# not add more than 400m cores and 800Mi (0.781Gi) memory with some
# margin, so we get 15.49 cores and 115.768Gi available for worker
# pods to request.
consideRatio marked this conversation as resolved.
Show resolved Hide resolved
#
# This is an initial conservative strategy, allowing a slight
# oversubscription of CPU but not any oversubscription of memory.
#
# To workaround https://github.com/dask/dask-gateway/issues/765, we
# round worker_cores down from [0.968, 1.936, 3.872, 7.745, 15.49]
# to [0.9, 1.9, 3.8, 7.7, 15.4].
#
"n2-highmem-16": {
"1CPU, 7.2Gi": {"worker_cores": 0.9, "worker_cores_limit": 1, "worker_memory": "7.235G", "worker_memory_limit": "7.235G"},
"2CPU, 14.5Gi": {"worker_cores": 1.9, "worker_cores_limit": 2, "worker_memory": "14.471G", "worker_memory_limit": "14.471G"},
"4CPU, 28.9Gi": {"worker_cores": 3.8, "worker_cores_limit": 4, "worker_memory": "28.942G", "worker_memory_limit": "28.942G"},
"8CPU, 57.9Gi": {"worker_cores": 7.7, "worker_cores_limit": 8, "worker_memory": "57.884G", "worker_memory_limit": "57.884G"},
"16CPU, 115.8Gi": {"worker_cores": 15.4, "worker_cores_limit": 16, "worker_memory": "115.768G", "worker_memory_limit": "115.768G"},
},
# r5.4xlarge nodes in our clusters have 15.89 allocatable cores and
# 121.504Gi allocatable memory, and daemonset are expected to not
# add more than 400m cores and 800Mi (0.781Gi) memory with some
# margin, so we get 15.49 cores and 120.723Gi available for worker
# pods to request.
#
# This is an initial conservative strategy, allowing a slight
# oversubscription of CPU but not any oversubscription of memory.
#
# To workaround https://github.com/dask/dask-gateway/issues/765, we
# round worker_cores down from [0.968, 1.936, 3.872, 7.745, 15.49]
# to [0.9, 1.9, 3.8, 7.7, 15.4].
#
"r5.4xlarge": {
"1CPU, 7.5Gi": {"worker_cores": 0.9, "worker_cores_limit": 1, "worker_memory": "7.545G", "worker_memory_limit": "7.545G"},
"2CPU, 15.1Gi": {"worker_cores": 1.9, "worker_cores_limit": 2, "worker_memory": "15.090G", "worker_memory_limit": "15.090G"},
"4CPU, 30.2Gi": {"worker_cores": 3.8, "worker_cores_limit": 4, "worker_memory": "30.180G", "worker_memory_limit": "30.180G"},
"8CPU, 60.4Gi": {"worker_cores": 7.7, "worker_cores_limit": 8, "worker_memory": "60.361G", "worker_memory_limit": "60.361G"},
"16CPU, 120.7Gi": {"worker_cores": 15.4, "worker_cores_limit": 16, "worker_memory": "120.723G", "worker_memory_limit": "120.723G"},
},
}

# for now we support only on one instance type per cluster, listing it
# as an option is a way to help convey how things work a bit better
it = instance_types[cloud_provider][0]
ra = resource_allocations[it]
ra_keys = list(ra.keys())

def cluster_options(user):
safe_username = escape_string_label_safe(user.name)
def option_handler(options):
if ":" not in options.image:
raise ValueError("When specifying an image you must also provide a tag")
extra_labels = {
"hub.jupyter.org/username": escape_string_label_safe(user.name),
}
scheduler_extra_pod_annotations = {
"hub.jupyter.org/username": safe_username,
"hub.jupyter.org/username": user.name,
"prometheus.io/scrape": "true",
"prometheus.io/port": "8787",
}
extra_labels = {
"hub.jupyter.org/username": safe_username,
worker_extra_pod_annotations = {
"hub.jupyter.org/username": user.name,
}
picked_ra = ra[options.worker_resource_allocation]

return {
"worker_cores_limit": options.worker_cores,
"worker_cores": options.worker_cores,
"worker_memory": "%fG" % options.worker_memory,
# A default image is suggested via DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE env variable
"image": options.image,
"scheduler_extra_pod_annotations": scheduler_extra_pod_annotations,
"scheduler_extra_pod_labels": extra_labels,
"scheduler_extra_pod_annotations": scheduler_extra_pod_annotations,
"worker_extra_pod_labels": extra_labels,
"worker_extra_pod_annotations": worker_extra_pod_annotations,
"worker_cores": picked_ra["worker_cores"],
"worker_cores_limit": picked_ra["worker_cores_limit"],
"worker_memory": picked_ra["worker_memory"],
"worker_memory_limit": picked_ra["worker_memory_limit"],
"environment": options.environment,
"idle_timeout": options.idle_timeout_minutes * 60,
}
return Options(
Integer("worker_cores", 2, min=1, label="Worker Cores"),
Float("worker_memory", 4, min=1, label="Worker Memory (GiB)"),
# The default image is set via DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE env variable
Select(
"instance_type",
[it],
default=it,
label="Instance type running worker containers",
),
Select(
"worker_resource_allocation",
ra_keys,
default=ra_keys[0],
label="Resources per worker container",
),
# The default image is pre-specified by the dask-gateway client
# via the env var DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE set on
# the jupyterhub user pods
String("image", label="Image"),
Mapping("environment", {}, label="Environment Variables"),
Mapping("environment", {}, label="Environment variables (YAML)"),
Integer("idle_timeout_minutes", 30, min=0, label="Idle cluster terminated after (minutes)"),
handler=option_handler,
)
c.Backend.cluster_options = cluster_options
idle: |
# timeout after 30 minutes of inactivity

# timeout after 30 minutes of inactivity by default, keep this in sync
# with the user exposed option idle_timeout_minutes's default value
# configured above
c.KubeClusterConfig.idle_timeout = 1800
prefix: "/services/dask-gateway" # Users connect to the Gateway through the JupyterHub service.
auth:
Expand Down