From 08792cae74dbe005f7c9ab807b176af5e25c786e Mon Sep 17 00:00:00 2001 From: Yicheng-Lu-llll <51814063+Yicheng-Lu-llll@users.noreply.github.com> Date: Mon, 22 May 2023 13:34:13 -0500 Subject: [PATCH] Add a document to outline the default settings for `rayStartParams` in Kuberay (#1057) Add a document to outline the default settings for `rayStartParams` in Kuberay --- docs/guidance/rayStartParams.md | 37 +++++++++++++++++++ .../samples/ray-cluster.autoscaler.large.yaml | 10 +++-- .../samples/ray-cluster.autoscaler.yaml | 9 +++-- .../samples/ray-cluster.complete.large.yaml | 8 +++- .../config/samples/ray-cluster.complete.yaml | 8 +++- .../samples/ray-cluster.external-redis.yaml | 12 +++++- .../samples/ray-cluster.head-command.yaml | 5 ++- .../samples/ray-cluster.heterogeneous.yaml | 18 +++++++-- .../config/samples/ray-cluster.mini.yaml | 5 ++- .../samples/ray-cluster.separate-ingress.yaml | 10 ++++- .../config/samples/ray-cluster.tls.yaml | 6 +++ .../samples/ray-service.autoscaler.yaml | 6 +++ .../config/samples/ray_v1alpha1_rayjob.yaml | 8 +++- .../samples/ray_v1alpha1_rayservice.yaml | 9 +++-- ray-operator/controllers/ray/common/pod.go | 7 ++++ .../controllers/ray/common/pod_test.go | 28 ++++++++++++++ 16 files changed, 159 insertions(+), 27 deletions(-) create mode 100644 docs/guidance/rayStartParams.md diff --git a/docs/guidance/rayStartParams.md b/docs/guidance/rayStartParams.md new file mode 100644 index 00000000000..ff2f13a451a --- /dev/null +++ b/docs/guidance/rayStartParams.md @@ -0,0 +1,37 @@ + +## Default Ray Start Parameters for KubeRay + +This document outlines the default settings for `rayStartParams` in KubeRay. + + +### Options Exclusive to the Head Pod + +- `--dashboard-host`: Host for the dashboard server, either `localhost` (127.0.0.1) or `0.0.0.0`. +The latter setting exposes the Ray dashboard outside the Ray cluster, which is required when [ingress](https://github.com/ray-project/kuberay/blob/master/docs/guidance/ingress.md) is utilized for Ray cluster access. +The default value for both Ray and KubeRay 0.5.0 is `localhost`. Please note that this will change for versions of KubeRay later than 0.5.0, where the default setting will be `0.0.0.0`. + +- `--no-monitor`: This option disables the monitor and autoscaler in the **user's container**. It will be automatically set when [autoscaling](https://github.com/ray-project/kuberay/blob/master/docs/guidance/autoscaler.md) is enabled. The autoscaling feature introduces the autoscaler as a sidecar container within the head pod, thereby obviating the need for a monitor and autoscaler in the **user's container**. See [PR #13505](https://github.com/ray-project/ray/pull/13505) for more details. Modification is not recommended. + + +- `--port`: Port for the GCS server. The port is set to `6379` by default. Please ensure that this value matches the `gcs-server` container port in Ray head container. + +- `--redis-password`: Redis password for an external Redis, necessary when [fault tolerance](https://github.com/ray-project/kuberay/blob/master/docs/guidance/gcs-ft.md) is enabled. +The default value is `""` after Ray 2.3.0. See [#929](https://github.com/ray-project/kuberay/pull/929) for more details. + +### Options Exclusive to the worker Pods + +- `--address`: Address of the GCS server. Worker pods utilize this address to establish a connection with the Ray cluster. By default, this address takes the form `:`. The `GCS_PORT` corresponds to the value set in the `--port` option. For more insights on Fully Qualified Domain Name (FQDN), refer to [PR #938](https://github.com/ray-project/kuberay/pull/938) and [PR #951](https://github.com/ray-project/kuberay/pull/951). + +### Options Applicable to Both Head and Worker Pods + +- `--block`: This option blocks the ray start command indefinitely. It will be automatically set by KubeRay. See [PR #675](https://github.com/ray-project/kuberay/pull/675) for more details. Modification is not recommended. + +- `--memory`: Amount of memory on this Ray node. Default is determined by Ray container resource limits. Modify Ray container resource limits instead of this option. See [PR #170](https://github.com/ray-project/kuberay/pull/170). + +- `--metrics-export-port`: Port for exposing Ray metrics through a Prometheus endpoint. The port is set to `8080` by default. Please ensure that this value matches the `metrics` container port if you need to customize it. See [PR #954](https://github.com/ray-project/kuberay/pull/954) and [prometheus-grafana doc](https://github.com/ray-project/kuberay/blob/master/docs/guidance/prometheus-grafana.md) for more details. + +- `--num-cpus`: Number of logical CPUs on this Ray node. Default is determined by Ray container resource limits. Modify Ray container resource limits instead of this option. See [PR #170](https://github.com/ray-project/kuberay/pull/170). However, it is sometimes useful to override this autodetected value. For example, setting `num-cpus:"0"` for the Ray head pod will prevent Ray workloads with non-zero CPU requirements from being scheduled on the head. + +- `--num-gpus`: Number of GPUs on this Ray node. Default is determined by Ray container resource limits. Modify Ray container resource limits instead of this option. See [PR #170](https://github.com/ray-project/kuberay/pull/170). + + diff --git a/ray-operator/config/samples/ray-cluster.autoscaler.large.yaml b/ray-operator/config/samples/ray-cluster.autoscaler.large.yaml index 39498490fb7..22fc0a5b040 100644 --- a/ray-operator/config/samples/ray-cluster.autoscaler.large.yaml +++ b/ray-operator/config/samples/ray-cluster.autoscaler.large.yaml @@ -57,11 +57,11 @@ spec: memory: "512Mi" # Ray head pod template headGroupSpec: - # the following params are used to complete the ray start: ray start --head --block --port=6379 ... + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. rayStartParams: - # Flag "no-monitor" will be automatically set when autoscaling is enabled. dashboard-host: '0.0.0.0' - # num-cpus: '14' # can be auto-completed from the limits # Use `resources` to optionally specify custom resource annotations for the Ray node. # The value of `resources` is a string-integer mapping. # Currently, `resources` must be provided in the specific format demonstrated below: @@ -112,7 +112,9 @@ spec: # - raycluster-complete-worker-large-group-bdtwh # - raycluster-complete-worker-large-group-hv457 # - raycluster-complete-worker-large-group-k8tj7 - # the following params are used to complete the ray start: ray start --block --node-ip-address= ... + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. rayStartParams: {} #pod template template: diff --git a/ray-operator/config/samples/ray-cluster.autoscaler.yaml b/ray-operator/config/samples/ray-cluster.autoscaler.yaml index 19004234bf0..98407fc9e40 100644 --- a/ray-operator/config/samples/ray-cluster.autoscaler.yaml +++ b/ray-operator/config/samples/ray-cluster.autoscaler.yaml @@ -49,10 +49,11 @@ spec: memory: "512Mi" # Ray head pod template headGroupSpec: - # the following params are used to complete the ray start: ray start --head --block ... + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. rayStartParams: dashboard-host: '0.0.0.0' - # num-cpus: '1' # can be auto-completed from the limits # Use `resources` to optionally specify custom resource annotations for the Ray node. # The value of `resources` is a string-integer mapping. # Currently, `resources` must be provided in the specific format demonstrated below: @@ -112,7 +113,9 @@ spec: # - raycluster-complete-worker-small-group-bdtwh # - raycluster-complete-worker-small-group-hv457 # - raycluster-complete-worker-small-group-k8tj7 - # the following params are used to complete the ray start: ray start --block ... + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. rayStartParams: {} #pod template template: diff --git a/ray-operator/config/samples/ray-cluster.complete.large.yaml b/ray-operator/config/samples/ray-cluster.complete.large.yaml index 9aa30a97ac5..9c830d2af8d 100644 --- a/ray-operator/config/samples/ray-cluster.complete.large.yaml +++ b/ray-operator/config/samples/ray-cluster.complete.large.yaml @@ -22,7 +22,9 @@ spec: # for the head group, replicas should always be 1. # headGroupSpec.replicas is deprecated in KubeRay >= 0.3.0. replicas: 1 - # the following params are used to complete the ray start: ray start --head --block --dashboard-host: '0.0.0.0' ... + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. rayStartParams: dashboard-host: '0.0.0.0' # pod template @@ -80,7 +82,9 @@ spec: # - raycluster-complete-worker-large-group-bdtwh # - raycluster-complete-worker-large-group-hv457 # - raycluster-complete-worker-large-group-k8tj7 - # the following params are used to complete the ray start: ray start --block ... + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. rayStartParams: {} #pod template template: diff --git a/ray-operator/config/samples/ray-cluster.complete.yaml b/ray-operator/config/samples/ray-cluster.complete.yaml index e5d61c9b716..856c0a2657f 100644 --- a/ray-operator/config/samples/ray-cluster.complete.yaml +++ b/ray-operator/config/samples/ray-cluster.complete.yaml @@ -16,7 +16,9 @@ spec: # Kubernetes Service Type. This is an optional field, and the default value is ClusterIP. # Refer to https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types. serviceType: ClusterIP - # the following params are used to complete the ray start: ray start --head --block --dashboard-host: '0.0.0.0' ... + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. rayStartParams: dashboard-host: '0.0.0.0' # pod template @@ -80,7 +82,9 @@ spec: # - raycluster-complete-worker-small-group-bdtwh # - raycluster-complete-worker-small-group-hv457 # - raycluster-complete-worker-small-group-k8tj7 - # the following params are used to complete the ray start: ray start --block + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. rayStartParams: {} #pod template template: diff --git a/ray-operator/config/samples/ray-cluster.external-redis.yaml b/ray-operator/config/samples/ray-cluster.external-redis.yaml index 87ce75ac335..1c6df5385b0 100644 --- a/ray-operator/config/samples/ray-cluster.external-redis.yaml +++ b/ray-operator/config/samples/ray-cluster.external-redis.yaml @@ -90,9 +90,11 @@ spec: rayVersion: '2.4.0' headGroupSpec: replicas: 1 + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. rayStartParams: dashboard-host: "0.0.0.0" - num-cpus: "1" # can be auto-completed from the limits # redis-password should match "requirepass" in redis.conf in the ConfigMap above. # Ray 2.3.0 changes the default redis password from "5241590000000000" to "". redis-password: $REDIS_PASSWORD @@ -102,6 +104,11 @@ spec: containers: - name: ray-head image: rayproject/ray:2.4.0 + resources: + limits: + cpu: "1" + requests: + cpu: "200m" env: # RAY_REDIS_ADDRESS can force ray to use external redis - name: RAY_REDIS_ADDRESS @@ -131,6 +138,9 @@ spec: maxReplicas: 2 # logical group name, for this called small-group, also can be functional groupName: small-group + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. rayStartParams: {} #pod template template: diff --git a/ray-operator/config/samples/ray-cluster.head-command.yaml b/ray-operator/config/samples/ray-cluster.head-command.yaml index 7a6a4070154..44c91970b80 100644 --- a/ray-operator/config/samples/ray-cluster.head-command.yaml +++ b/ray-operator/config/samples/ray-cluster.head-command.yaml @@ -11,10 +11,11 @@ spec: rayVersion: '2.4.0' # should match the Ray version in the image of the containers # Ray head pod template headGroupSpec: - # the following params are used to complete the ray start: ray start --head --block --redis-port=6379 ... + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. rayStartParams: dashboard-host: '0.0.0.0' - num-cpus: '1' # can be auto-completed from the limits #pod template template: spec: diff --git a/ray-operator/config/samples/ray-cluster.heterogeneous.yaml b/ray-operator/config/samples/ray-cluster.heterogeneous.yaml index 57010774b31..f4bcd26e33e 100644 --- a/ray-operator/config/samples/ray-cluster.heterogeneous.yaml +++ b/ray-operator/config/samples/ray-cluster.heterogeneous.yaml @@ -38,16 +38,22 @@ spec: ######################headGroupSpecs################################# # Ray head pod template headGroupSpec: - # the following params are used to complete the ray start: ray start --head --block ... + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. rayStartParams: dashboard-host: '0.0.0.0' - num-cpus: '1' # can be auto-completed from Ray container resource limits #pod template template: spec: containers: - name: ray-head image: rayproject/ray:2.4.0 + resources: + limits: + cpu: "1" + requests: + cpu: "200m" volumeMounts: - mountPath: /opt name: config @@ -72,7 +78,9 @@ spec: maxReplicas: 10 # logical group name, for this called small-group, also can be functional groupName: small-group - # the following params are used to complete the ray start: ray start --block ... + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. rayStartParams: {} #pod template template: @@ -106,7 +114,9 @@ spec: # workersToDelete: #- raycluster-heterogeneous-worker-medium-group-7bv5h # - worker-4k2ih - # the following params are used to complete the ray start: ray start --block --node-ip-address= ... + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. rayStartParams: {} #pod template template: diff --git a/ray-operator/config/samples/ray-cluster.mini.yaml b/ray-operator/config/samples/ray-cluster.mini.yaml index 863bff62524..778ee14846c 100644 --- a/ray-operator/config/samples/ray-cluster.mini.yaml +++ b/ray-operator/config/samples/ray-cluster.mini.yaml @@ -13,10 +13,11 @@ spec: rayVersion: '2.4.0' # should match the Ray version in the image of the containers # Ray head pod template headGroupSpec: - # the following params are used to complete the ray start: ray start --head --block --redis-port=6379 ... + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. rayStartParams: dashboard-host: '0.0.0.0' - num-cpus: '1' # can be auto-completed from the limits #pod template template: spec: diff --git a/ray-operator/config/samples/ray-cluster.separate-ingress.yaml b/ray-operator/config/samples/ray-cluster.separate-ingress.yaml index 77484d40e85..f752687592d 100644 --- a/ray-operator/config/samples/ray-cluster.separate-ingress.yaml +++ b/ray-operator/config/samples/ray-cluster.separate-ingress.yaml @@ -11,16 +11,22 @@ spec: headGroupSpec: serviceType: NodePort replicas: 1 + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. rayStartParams: - port: '6379' dashboard-host: '0.0.0.0' - num-cpus: '1' # can be auto-completed from the limits #pod template template: spec: containers: - name: ray-head image: rayproject/ray:2.4.0 + resources: + limits: + cpu: "1" + requests: + cpu: "200m" ports: - containerPort: 6379 name: gcs-server diff --git a/ray-operator/config/samples/ray-cluster.tls.yaml b/ray-operator/config/samples/ray-cluster.tls.yaml index 433d607b7df..b89776fe6d7 100644 --- a/ray-operator/config/samples/ray-cluster.tls.yaml +++ b/ray-operator/config/samples/ray-cluster.tls.yaml @@ -8,6 +8,9 @@ spec: rayVersion: '2.4.0' # Ray head pod configuration headGroupSpec: + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. rayStartParams: dashboard-host: '0.0.0.0' # pod template @@ -96,6 +99,9 @@ spec: minReplicas: 1 maxReplicas: 10 groupName: small-group + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. rayStartParams: {} #pod template template: diff --git a/ray-operator/config/samples/ray-service.autoscaler.yaml b/ray-operator/config/samples/ray-service.autoscaler.yaml index b4d0533ec99..0b9bc2987c0 100644 --- a/ray-operator/config/samples/ray-service.autoscaler.yaml +++ b/ray-operator/config/samples/ray-service.autoscaler.yaml @@ -56,6 +56,9 @@ spec: memory: "1000Mi" ######################headGroupSpecs################################# headGroupSpec: + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. rayStartParams: {"num-cpus": "0"} #pod template template: @@ -86,6 +89,9 @@ spec: maxReplicas: 5 # logical group name, for this called small-group, also can be functional groupName: small-group + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. rayStartParams: {} #pod template template: diff --git a/ray-operator/config/samples/ray_v1alpha1_rayjob.yaml b/ray-operator/config/samples/ray_v1alpha1_rayjob.yaml index 789b00c506e..94bdba45135 100644 --- a/ray-operator/config/samples/ray_v1alpha1_rayjob.yaml +++ b/ray-operator/config/samples/ray_v1alpha1_rayjob.yaml @@ -19,10 +19,11 @@ spec: rayVersion: '2.4.0' # should match the Ray version in the image of the containers # Ray head pod template headGroupSpec: - # the following params are used to complete the ray start: ray start --head --block --redis-port=6379 ... + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. rayStartParams: dashboard-host: '0.0.0.0' - num-cpus: '1' # can be auto-completed from the limits #pod template template: spec: @@ -63,6 +64,9 @@ spec: maxReplicas: 5 # logical group name, for this called small-group, also can be functional groupName: small-group + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. rayStartParams: {} #pod template template: diff --git a/ray-operator/config/samples/ray_v1alpha1_rayservice.yaml b/ray-operator/config/samples/ray_v1alpha1_rayservice.yaml index 75d60ee3baa..fcec18b6baa 100644 --- a/ray-operator/config/samples/ray_v1alpha1_rayservice.yaml +++ b/ray-operator/config/samples/ray_v1alpha1_rayservice.yaml @@ -46,11 +46,11 @@ spec: ######################headGroupSpecs################################# # Ray head pod template. headGroupSpec: - # the following params are used to complete the ray start: ray start --head --block --redis-port=6379 ... + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. rayStartParams: - port: '6379' # should match container port named gcs-server dashboard-host: '0.0.0.0' - num-cpus: '2' # can be auto-completed from the limits #pod template template: spec: @@ -80,6 +80,9 @@ spec: maxReplicas: 5 # logical group name, for this called small-group, also can be functional groupName: small-group + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. rayStartParams: {} #pod template template: diff --git a/ray-operator/controllers/ray/common/pod.go b/ray-operator/controllers/ray/common/pod.go index 0d4cffc498d..ac0feec1ed8 100644 --- a/ray-operator/controllers/ray/common/pod.go +++ b/ray-operator/controllers/ray/common/pod.go @@ -685,6 +685,13 @@ func setMissingRayStartParams(rayStartParams map[string]string, nodeType rayiov1 } } + if nodeType == rayiov1alpha1.HeadNode { + // allow incoming connections from all network interfaces for the dashboard by default. + if _, ok := rayStartParams["dashboard-host"]; !ok { + rayStartParams["dashboard-host"] = "0.0.0.0" + } + } + // add metrics port for expose the metrics to the prometheus. if _, ok := rayStartParams["metrics-export-port"]; !ok { rayStartParams["metrics-export-port"] = fmt.Sprint(DefaultMetricsPort) diff --git a/ray-operator/controllers/ray/common/pod_test.go b/ray-operator/controllers/ray/common/pod_test.go index 9ec1b67c8ba..5ca7ed8d3b1 100644 --- a/ray-operator/controllers/ray/common/pod_test.go +++ b/ray-operator/controllers/ray/common/pod_test.go @@ -1023,6 +1023,34 @@ func TestSetMissingRayStartParamsBlock(t *testing.T) { assert.Equal(t, "false", rayStartParams["block"], fmt.Sprintf("Expected `%v` but got `%v`", "false", rayStartParams["block"])) } +func TestSetMissingRayStartParamsDashboardHost(t *testing.T) { + // The dashboard-host option is automatically injected into RayStartParams with a default value of "0.0.0.0" for head only as workers do not have dashborad server. + // Users can manually set the dashboard-host option to customize the host the dashboard server binds to, either "localhost" (127.0.0.1) or "0.0.0.0" (available from all interfaces). + headPort := "6379" + fqdnRayIP := "raycluster-kuberay-head-svc.default.svc.cluster.local" + + // Case 1: Head node with no dashboard-host option set. + rayStartParams := map[string]string{} + rayStartParams = setMissingRayStartParams(rayStartParams, rayiov1alpha1.HeadNode, headPort, "") + assert.Equal(t, "0.0.0.0", rayStartParams["dashboard-host"], fmt.Sprintf("Expected `%v` but got `%v`", "0.0.0.0", rayStartParams["dashboard-host"])) + + // Case 2: Head node with dashboard-host option set. + rayStartParams = map[string]string{"dashboard-host": "localhost"} + rayStartParams = setMissingRayStartParams(rayStartParams, rayiov1alpha1.HeadNode, headPort, "") + assert.Equal(t, "localhost", rayStartParams["dashboard-host"], fmt.Sprintf("Expected `%v` but got `%v`", "localhost", rayStartParams["dashboard-host"])) + + // Case 3: Worker node with no dashboard-host option set. + rayStartParams = map[string]string{} + rayStartParams = setMissingRayStartParams(rayStartParams, rayiov1alpha1.WorkerNode, headPort, fqdnRayIP) + assert.NotContains(t, rayStartParams, "dashboard-host", "workers should not have an dashboard-host option set.") + + // Case 4: Worker node with dashboard-host option set. + // To maximize user empowerment, this option can be enabled. However, it is important to note that the dashboard is not available on worker nodes. + rayStartParams = map[string]string{"dashboard-host": "localhost"} + rayStartParams = setMissingRayStartParams(rayStartParams, rayiov1alpha1.WorkerNode, headPort, fqdnRayIP) + assert.Equal(t, "localhost", rayStartParams["dashboard-host"], fmt.Sprintf("Expected `%v` but got `%v`", "localhost", rayStartParams["dashboard-host"])) +} + func TestGetCustomWorkerInitImage(t *testing.T) { // cleanup defer os.Unsetenv(EnableInitContainerInjectionEnvKey)