Skip to content

Commit

Permalink
Clean up example samples (ray-project#434)
Browse files Browse the repository at this point in the history
This PR cleans up the "complete" and "autoscaler" sample yamls a bit.
Unnecessary pod spec fields are removed without sacrificing the completeness of the examples.
The idea is to make the configuration look less intimidating.

Signed-off-by: Dmitri Gekhtman <[email protected]>
  • Loading branch information
DmitriGekhtman authored Aug 5, 2022
1 parent 8401e2c commit ed464ac
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 336 deletions.
106 changes: 19 additions & 87 deletions ray-operator/config/samples/ray-cluster.autoscaler.large.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ kind: RayCluster
metadata:
labels:
controller-tools.k8s.io: "1.0"
# An unique identifier for the head node and workers of this cluster.
name: raycluster-autoscaler-large
# A unique identifier for the head node and workers of this cluster.
name: raycluster-autoscaler
spec:
# The version of Ray you are using. Make sure all Ray containers are running this version of Ray.
rayVersion: '1.13.0'
Expand All @@ -38,7 +38,7 @@ spec:
idleTimeoutSeconds: 60
# image optionally overrides the autoscaler's container image.
# If instance.spec.rayVersion is at least "2.0.0", the autoscaler will default to the same image as
# the ray container by default. For older Ray versions, the autoscaler will default to using the Ray 2.0.0 image.
# the ray container by. For older Ray versions, the autoscaler will default to using the Ray 2.0.0 image.
## image: "my-repo/my-custom-autoscaler-image:tag"
# imagePullPolicy optionally overrides the autoscaler container's image pull policy.
imagePullPolicy: Always
Expand All @@ -51,7 +51,7 @@ spec:
requests:
cpu: "500m"
memory: "512Mi"
######################headGroupSpecs#################################
######################headGroupSpec#################################
# head group template and specs, (perhaps 'group' is not needed in the name)
headGroupSpec:
# Kubernetes Service Type, valid values are 'ClusterIP', 'NodePort' and 'LoadBalancer'
Expand All @@ -61,16 +61,13 @@ spec:
# rayNodeType: head # Not needed since it is under the headgroup
# the following params are used to complete the ray start: ray start --head --block --port=6379 ...
rayStartParams:
# Flag "no-monitor" must be set when running the autoscaler in
# a sidecar container.
port: '6379'
# Flag "no-monitor" will be automatically set when autoscaling is enabled.
dashboard-host: '0.0.0.0'
node-ip-address: $MY_POD_IP # auto-completed as the head pod IP
block: 'true'
num-cpus: '1' # can be auto-completed from the limits
# num-cpus: '14' # can be auto-completed from the limits
# Use `resources` to optionally specify custom resource annotations for the Ray node.
# The value of `resources` is a string-integer mapping.
# Currently, `resources` must be provided in the unfortunate format demonstrated below:
# Currently, `resources` must be provided in the specific format demonstrated below:
# resources: '"{\"Custom1\": 1, \"Custom2\": 5}"'
#pod template
template:
Expand All @@ -86,36 +83,11 @@ spec:
# resource accounting. K8s requests are not used by Ray.
resources:
limits:
cpu: "14"
memory: "54Gi"
cpu: 14
memory: 54Gi
requests:
cpu: "14"
memory: "54Gi"
env:
- name: CPU_REQUEST
valueFrom:
resourceFieldRef:
containerName: ray-head
resource: requests.cpu
- name: CPU_LIMITS
valueFrom:
resourceFieldRef:
containerName: ray-head
resource: limits.cpu
- name: MEMORY_LIMITS
valueFrom:
resourceFieldRef:
containerName: ray-head
resource: limits.memory
- name: MEMORY_REQUESTS
valueFrom:
resourceFieldRef:
containerName: ray-head
resource: requests.memory
- name: MY_POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
cpu: 14
memory: 54Gi
ports:
- containerPort: 6379
name: gcs
Expand All @@ -132,22 +104,19 @@ spec:
- replicas: 1
minReplicas: 1
maxReplicas: 10
# logical group name, for this called small-group, also can be functional
# logical group name, for this called large-group, also can be functional
groupName: large-group
# if worker pods need to be added, we can simply increment the replicas
# if worker pods need to be removed, we decrement the replicas, and populate the podsToDelete list
# the operator will remove pods from the list until the number of replicas is satisfied
# when a pod is confirmed to be deleted, its name will be removed from the list below
#scaleStrategy:
# workersToDelete:
# - raycluster-complete-worker-small-group-bdtwh
# - raycluster-complete-worker-small-group-hv457
# - raycluster-complete-worker-small-group-k8tj7
# - raycluster-complete-worker-large-group-bdtwh
# - raycluster-complete-worker-large-group-hv457
# - raycluster-complete-worker-large-group-k8tj7
# the following params are used to complete the ray start: ray start --block --node-ip-address= ...
rayStartParams:
#redis-password: '5241590000000000'
redis-password: 'LetMeInRay' # Deprecated since Ray 1.11 due to GCS bootstrapping enabled
node-ip-address: $MY_POD_IP
block: 'true'
#pod template
template:
Expand All @@ -172,48 +141,11 @@ spec:
# resource accounting. K8s requests are not used by Ray.
resources:
limits:
cpu: "14"
memory: "54Gi"
cpu: 14
memory: 54Gi
requests:
cpu: "14"
memory: "54Gi"
# environment variables to set in the container.Optional.
# Refer to https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/
env:
- name: RAY_DISABLE_DOCKER_CPU_WARNING
value: "1"
- name: TYPE
value: "worker"
- name: CPU_REQUEST
valueFrom:
resourceFieldRef:
containerName: machine-learning
resource: requests.cpu
- name: CPU_LIMITS
valueFrom:
resourceFieldRef:
containerName: machine-learning
resource: limits.cpu
- name: MEMORY_LIMITS
valueFrom:
resourceFieldRef:
containerName: machine-learning
resource: limits.memory
- name: MEMORY_REQUESTS
valueFrom:
resourceFieldRef:
containerName: machine-learning
resource: requests.memory
- name: MY_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: MY_POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
ports:
- containerPort: 80
cpu: 14
memory: 54Gi
lifecycle:
preStop:
exec:
Expand Down
82 changes: 8 additions & 74 deletions ray-operator/config/samples/ray-cluster.autoscaler.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ kind: RayCluster
metadata:
labels:
controller-tools.k8s.io: "1.0"
# An unique identifier for the head node and workers of this cluster.
# A unique identifier for the head node and workers of this cluster.
name: raycluster-autoscaler
spec:
# The version of Ray you are using. Make sure all Ray containers are running this version of Ray.
Expand All @@ -29,7 +29,7 @@ spec:
idleTimeoutSeconds: 60
# image optionally overrides the autoscaler's container image.
# If instance.spec.rayVersion is at least "2.0.0", the autoscaler will default to the same image as
# the ray container by default. For older Ray versions, the autoscaler will default to using the Ray 2.0.0 image.
# the ray container. For older Ray versions, the autoscaler will default to using the Ray 2.0.0 image.
## image: "my-repo/my-custom-autoscaler-image:tag"
# imagePullPolicy optionally overrides the autoscaler container's image pull policy.
imagePullPolicy: Always
Expand All @@ -42,26 +42,23 @@ spec:
requests:
cpu: "500m"
memory: "512Mi"
######################headGroupSpecs#################################
######################headGroupSpec#################################
# head group template and specs, (perhaps 'group' is not needed in the name)
headGroupSpec:
# Kubernetes Service Type, valid values are 'ClusterIP', 'NodePort' and 'LoadBalancer'
serviceType: ClusterIP
# logical group name, for this called head-group, also can be functional
# pod type head or worker
# rayNodeType: head # Not needed since it is under the headgroup
# the following params are used to complete the ray start: ray start --head --block --port=6379 ...
# the following params are used to complete the ray start: ray start --head --block ...
rayStartParams:
# Flag "no-monitor" must be set when running the autoscaler in
# a sidecar container.
port: '6379'
# Flag "no-monitor" will be automatically set when autoscaling is enabled.
dashboard-host: '0.0.0.0'
node-ip-address: $MY_POD_IP # auto-completed as the head pod IP
block: 'true'
num-cpus: '1' # can be auto-completed from the limits
# num-cpus: '1' # can be auto-completed from the limits
# Use `resources` to optionally specify custom resource annotations for the Ray node.
# The value of `resources` is a string-integer mapping.
# Currently, `resources` must be provided in the unfortunate format demonstrated below:
# Currently, `resources` must be provided in the specific format demonstrated below:
# resources: '"{\"Custom1\": 1, \"Custom2\": 5}"'
#pod template
template:
Expand All @@ -71,31 +68,6 @@ spec:
- name: ray-head
image: rayproject/ray:1.13.0
imagePullPolicy: Always
env:
- name: CPU_REQUEST
valueFrom:
resourceFieldRef:
containerName: ray-head
resource: requests.cpu
- name: CPU_LIMITS
valueFrom:
resourceFieldRef:
containerName: ray-head
resource: limits.cpu
- name: MEMORY_LIMITS
valueFrom:
resourceFieldRef:
containerName: ray-head
resource: limits.memory
- name: MEMORY_REQUESTS
valueFrom:
resourceFieldRef:
containerName: ray-head
resource: requests.memory
- name: MY_POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
ports:
- containerPort: 6379
name: gcs
Expand Down Expand Up @@ -130,11 +102,8 @@ spec:
# - raycluster-complete-worker-small-group-bdtwh
# - raycluster-complete-worker-small-group-hv457
# - raycluster-complete-worker-small-group-k8tj7
# the following params are used to complete the ray start: ray start --block --node-ip-address= ...
# the following params are used to complete the ray start: ray start --block ...
rayStartParams:
#redis-password: '5241590000000000'
redis-password: 'LetMeInRay' # Deprecated since Ray 1.11 due to GCS bootstrapping enabled
node-ip-address: $MY_POD_IP
block: 'true'
#pod template
template:
Expand All @@ -155,41 +124,6 @@ spec:
image: rayproject/ray:1.13.0
# environment variables to set in the container.Optional.
# Refer to https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/
env:
- name: RAY_DISABLE_DOCKER_CPU_WARNING
value: "1"
- name: TYPE
value: "worker"
- name: CPU_REQUEST
valueFrom:
resourceFieldRef:
containerName: machine-learning
resource: requests.cpu
- name: CPU_LIMITS
valueFrom:
resourceFieldRef:
containerName: machine-learning
resource: limits.cpu
- name: MEMORY_LIMITS
valueFrom:
resourceFieldRef:
containerName: machine-learning
resource: limits.memory
- name: MEMORY_REQUESTS
valueFrom:
resourceFieldRef:
containerName: machine-learning
resource: requests.memory
- name: MY_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: MY_POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
ports:
- containerPort: 80
lifecycle:
preStop:
exec:
Expand Down
Loading

0 comments on commit ed464ac

Please sign in to comment.